buildlog 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
buildlog/confidence.py ADDED
@@ -0,0 +1,311 @@
1
+ """Confidence scoring for rules and patterns.
2
+
3
+ Confidence represents structural inertia - how hard it would be for the system
4
+ to stop believing a rule. It reflects accumulated mass from reinforcement,
5
+ not objective correctness.
6
+
7
+ A rule gains mass when:
8
+ - It shows up again (frequency)
9
+ - It shows up recently (recency)
10
+ - It survives contradictions
11
+
12
+ A rule loses mass when:
13
+ - It's unused (time decay)
14
+ - It's contradicted
15
+ - It's contextually bypassed
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import math
21
+ from dataclasses import dataclass, field
22
+ from datetime import datetime, timezone
23
+ from enum import Enum
24
+ from typing import TypedDict
25
+
26
+ __all__ = [
27
+ "ConfidenceTier",
28
+ "ConfidenceConfig",
29
+ "ConfidenceMetrics",
30
+ "ConfidenceMetricsDict",
31
+ "calculate_confidence",
32
+ "get_confidence_tier",
33
+ "merge_confidence_metrics",
34
+ "add_contradiction",
35
+ ]
36
+
37
+
38
+ class ConfidenceTier(str, Enum):
39
+ """Descriptive tiers for rule confidence.
40
+
41
+ These are purely descriptive labels for human interpretation.
42
+ No logic gates or hard thresholds are enforced by the system.
43
+ """
44
+
45
+ SPECULATIVE = "speculative" # Low mass, recently introduced
46
+ PROVISIONAL = "provisional" # Growing mass, some reinforcement
47
+ STABLE = "stable" # Consistent reinforcement, moderate mass
48
+ ENTRENCHED = "entrenched" # High mass, sustained over time
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class ConfidenceConfig:
53
+ """Configuration parameters for confidence calculation.
54
+
55
+ Attributes:
56
+ tau: Half-life for recency decay (in days). Smaller = twitchier system.
57
+ k: Saturation constant for frequency. Larger = slower saturation.
58
+ lambda_: Decay constant for contradiction penalty.
59
+ tier_thresholds: Confidence score thresholds for each tier.
60
+ """
61
+
62
+ tau: float = 30.0 # 30-day half-life by default
63
+ k: float = 5.0 # Frequency saturation constant
64
+ lambda_: float = 2.0 # Contradiction decay constant
65
+ tier_thresholds: tuple[float, float, float] = (0.2, 0.4, 0.7)
66
+
67
+ def __post_init__(self) -> None:
68
+ if self.tau <= 0:
69
+ raise ValueError("tau must be positive")
70
+ if self.k <= 0:
71
+ raise ValueError("k must be positive")
72
+ if self.lambda_ <= 0:
73
+ raise ValueError("lambda_ must be positive")
74
+ low, mid, high = self.tier_thresholds
75
+ if not (0 <= low <= mid <= high <= 1):
76
+ raise ValueError(
77
+ "tier_thresholds must be monotonically increasing in [0, 1]"
78
+ )
79
+
80
+
81
+ class ConfidenceMetricsDict(TypedDict):
82
+ """Serializable form of confidence metrics."""
83
+
84
+ reinforcement_count: int
85
+ last_reinforced: str # ISO format timestamp
86
+ contradiction_count: int
87
+ first_seen: str # ISO format timestamp
88
+
89
+
90
+ @dataclass
91
+ class ConfidenceMetrics:
92
+ """Tracked metrics for confidence calculation.
93
+
94
+ These are the raw inputs that feed into the confidence formula.
95
+ """
96
+
97
+ reinforcement_count: int = 1
98
+ last_reinforced: datetime = field(
99
+ default_factory=lambda: datetime.now(timezone.utc)
100
+ )
101
+ contradiction_count: int = 0
102
+ first_seen: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
103
+
104
+ def __post_init__(self) -> None:
105
+ if self.reinforcement_count < 0:
106
+ raise ValueError("reinforcement_count must be non-negative")
107
+ if self.contradiction_count < 0:
108
+ raise ValueError("contradiction_count must be non-negative")
109
+
110
+ def to_dict(self) -> ConfidenceMetricsDict:
111
+ """Convert to serializable dictionary."""
112
+ return {
113
+ "reinforcement_count": self.reinforcement_count,
114
+ "last_reinforced": self.last_reinforced.isoformat(),
115
+ "contradiction_count": self.contradiction_count,
116
+ "first_seen": self.first_seen.isoformat(),
117
+ }
118
+
119
+ @classmethod
120
+ def from_dict(cls, data: ConfidenceMetricsDict) -> ConfidenceMetrics:
121
+ """Reconstruct from serialized dictionary.
122
+
123
+ Note: Timezone-naive datetimes are assumed to be UTC.
124
+ """
125
+ last_reinforced = datetime.fromisoformat(data["last_reinforced"])
126
+ first_seen = datetime.fromisoformat(data["first_seen"])
127
+
128
+ # Ensure timezone awareness (assume UTC for naive datetimes)
129
+ if last_reinforced.tzinfo is None:
130
+ last_reinforced = last_reinforced.replace(tzinfo=timezone.utc)
131
+ if first_seen.tzinfo is None:
132
+ first_seen = first_seen.replace(tzinfo=timezone.utc)
133
+
134
+ return cls(
135
+ reinforcement_count=data["reinforcement_count"],
136
+ last_reinforced=last_reinforced,
137
+ contradiction_count=data["contradiction_count"],
138
+ first_seen=first_seen,
139
+ )
140
+
141
+
142
+ def calculate_frequency_weight(n: int, k: float) -> float:
143
+ """Calculate frequency weight with saturation.
144
+
145
+ Uses bounded exponential: 1 - exp(-n/k)
146
+ This makes early reinforcement matter more than late spam.
147
+
148
+ Args:
149
+ n: Reinforcement count
150
+ k: Saturation constant (larger = slower saturation)
151
+
152
+ Returns:
153
+ Weight in range (0, 1), approaching 1 as n grows
154
+ """
155
+ return 1.0 - math.exp(-n / k)
156
+
157
+
158
+ def calculate_recency_weight(
159
+ t_last: datetime,
160
+ t_now: datetime,
161
+ tau: float,
162
+ ) -> float:
163
+ """Calculate recency weight with exponential decay.
164
+
165
+ Uses: exp(-(t_now - t_last) / tau)
166
+
167
+ Args:
168
+ t_last: Timestamp of last reinforcement
169
+ t_now: Current timestamp
170
+ tau: Half-life in days
171
+
172
+ Returns:
173
+ Weight in range (0, 1], decaying over time.
174
+ If t_last is in the future, clamps to 1.0.
175
+ """
176
+ days_elapsed = (t_now - t_last).total_seconds() / (24 * 60 * 60)
177
+ if days_elapsed < 0:
178
+ return 1.0 # Future timestamps treated as "just now"
179
+ return math.exp(-days_elapsed / tau)
180
+
181
+
182
+ def calculate_contradiction_penalty(c: int, lambda_: float) -> float:
183
+ """Calculate contradiction penalty (drag).
184
+
185
+ Rules don't die from contradictions, they get heavy and sink.
186
+ Uses: exp(-c / lambda)
187
+
188
+ Args:
189
+ c: Contradiction count
190
+ lambda_: Decay constant
191
+
192
+ Returns:
193
+ Penalty multiplier in range (0, 1]
194
+ """
195
+ return math.exp(-c / lambda_)
196
+
197
+
198
+ def calculate_confidence(
199
+ metrics: ConfidenceMetrics,
200
+ config: ConfidenceConfig | None = None,
201
+ t_now: datetime | None = None,
202
+ ) -> float:
203
+ """Calculate confidence score for a rule.
204
+
205
+ Confidence = frequency_weight * recency_weight * contradiction_penalty
206
+
207
+ This gives a scalar that:
208
+ - Rises fast early
209
+ - Decays naturally over time
210
+ - Never quite hits zero
211
+ - Never explodes to infinity
212
+
213
+ Args:
214
+ metrics: Tracked metrics for the rule
215
+ config: Scoring configuration (uses defaults if None)
216
+ t_now: Current time (uses now if None)
217
+
218
+ Returns:
219
+ Confidence score in range (0, 1)
220
+ """
221
+ if config is None:
222
+ config = ConfidenceConfig()
223
+ if t_now is None:
224
+ t_now = datetime.now(timezone.utc)
225
+
226
+ freq = calculate_frequency_weight(metrics.reinforcement_count, config.k)
227
+ recency = calculate_recency_weight(metrics.last_reinforced, t_now, config.tau)
228
+ penalty = calculate_contradiction_penalty(
229
+ metrics.contradiction_count, config.lambda_
230
+ )
231
+
232
+ return freq * recency * penalty
233
+
234
+
235
+ def get_confidence_tier(
236
+ score: float,
237
+ config: ConfidenceConfig | None = None,
238
+ ) -> ConfidenceTier:
239
+ """Map confidence score to descriptive tier.
240
+
241
+ Args:
242
+ score: Confidence score in range [0, 1]
243
+ config: Configuration with tier thresholds
244
+
245
+ Returns:
246
+ Descriptive tier label
247
+
248
+ Raises:
249
+ ValueError: If score is outside [0, 1] range
250
+ """
251
+ if not (0.0 <= score <= 1.0):
252
+ raise ValueError(f"score must be in [0, 1], got {score}")
253
+
254
+ if config is None:
255
+ config = ConfidenceConfig()
256
+
257
+ low, mid, high = config.tier_thresholds
258
+
259
+ if score < low:
260
+ return ConfidenceTier.SPECULATIVE
261
+ elif score < mid:
262
+ return ConfidenceTier.PROVISIONAL
263
+ elif score < high:
264
+ return ConfidenceTier.STABLE
265
+ else:
266
+ return ConfidenceTier.ENTRENCHED
267
+
268
+
269
+ def merge_confidence_metrics(
270
+ existing: ConfidenceMetrics,
271
+ new_occurrence: datetime | None = None,
272
+ ) -> ConfidenceMetrics:
273
+ """Merge a new occurrence into existing metrics.
274
+
275
+ This is called when a rule is reinforced (seen again).
276
+
277
+ Args:
278
+ existing: Current metrics for the rule
279
+ new_occurrence: Timestamp of new occurrence (uses now if None)
280
+
281
+ Returns:
282
+ Updated metrics with incremented count and updated timestamp
283
+ """
284
+ if new_occurrence is None:
285
+ new_occurrence = datetime.now(timezone.utc)
286
+
287
+ return ConfidenceMetrics(
288
+ reinforcement_count=existing.reinforcement_count + 1,
289
+ last_reinforced=new_occurrence,
290
+ contradiction_count=existing.contradiction_count,
291
+ first_seen=existing.first_seen,
292
+ )
293
+
294
+
295
+ def add_contradiction(metrics: ConfidenceMetrics) -> ConfidenceMetrics:
296
+ """Record a contradiction against a rule.
297
+
298
+ Contradictions add drag but don't invalidate rules.
299
+
300
+ Args:
301
+ metrics: Current metrics for the rule
302
+
303
+ Returns:
304
+ Updated metrics with incremented contradiction count
305
+ """
306
+ return ConfidenceMetrics(
307
+ reinforcement_count=metrics.reinforcement_count,
308
+ last_reinforced=metrics.last_reinforced,
309
+ contradiction_count=metrics.contradiction_count + 1,
310
+ first_seen=metrics.first_seen,
311
+ )
buildlog/core/__init__.py CHANGED
@@ -2,11 +2,15 @@
2
2
 
3
3
  from buildlog.core.operations import (
4
4
  DiffResult,
5
+ LearnFromReviewResult,
5
6
  PromoteResult,
6
7
  RejectResult,
8
+ ReviewIssue,
9
+ ReviewLearning,
7
10
  StatusResult,
8
11
  diff,
9
12
  find_skills_by_ids,
13
+ learn_from_review,
10
14
  promote,
11
15
  reject,
12
16
  status,
@@ -17,9 +21,13 @@ __all__ = [
17
21
  "PromoteResult",
18
22
  "RejectResult",
19
23
  "DiffResult",
24
+ "ReviewIssue",
25
+ "ReviewLearning",
26
+ "LearnFromReviewResult",
20
27
  "status",
21
28
  "promote",
22
29
  "reject",
23
30
  "diff",
24
31
  "find_skills_by_ids",
32
+ "learn_from_review",
25
33
  ]