buildlog 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- buildlog/confidence.py +311 -0
- buildlog/core/__init__.py +8 -0
- buildlog/core/operations.py +343 -2
- buildlog/mcp/__init__.py +2 -0
- buildlog/mcp/server.py +2 -0
- buildlog/mcp/tools.py +46 -1
- buildlog/skills.py +233 -11
- {buildlog-0.2.0.data → buildlog-0.4.0.data}/data/share/buildlog/post_gen.py +11 -7
- {buildlog-0.2.0.dist-info → buildlog-0.4.0.dist-info}/METADATA +134 -2
- buildlog-0.4.0.dist-info/RECORD +30 -0
- buildlog-0.2.0.dist-info/RECORD +0 -29
- {buildlog-0.2.0.data → buildlog-0.4.0.data}/data/share/buildlog/copier.yml +0 -0
- {buildlog-0.2.0.data → buildlog-0.4.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
- {buildlog-0.2.0.data → buildlog-0.4.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
- {buildlog-0.2.0.data → buildlog-0.4.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
- {buildlog-0.2.0.data → buildlog-0.4.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
- {buildlog-0.2.0.data → buildlog-0.4.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
- {buildlog-0.2.0.dist-info → buildlog-0.4.0.dist-info}/WHEEL +0 -0
- {buildlog-0.2.0.dist-info → buildlog-0.4.0.dist-info}/entry_points.txt +0 -0
- {buildlog-0.2.0.dist-info → buildlog-0.4.0.dist-info}/licenses/LICENSE +0 -0
buildlog/confidence.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""Confidence scoring for rules and patterns.
|
|
2
|
+
|
|
3
|
+
Confidence represents structural inertia - how hard it would be for the system
|
|
4
|
+
to stop believing a rule. It reflects accumulated mass from reinforcement,
|
|
5
|
+
not objective correctness.
|
|
6
|
+
|
|
7
|
+
A rule gains mass when:
|
|
8
|
+
- It shows up again (frequency)
|
|
9
|
+
- It shows up recently (recency)
|
|
10
|
+
- It survives contradictions
|
|
11
|
+
|
|
12
|
+
A rule loses mass when:
|
|
13
|
+
- It's unused (time decay)
|
|
14
|
+
- It's contradicted
|
|
15
|
+
- It's contextually bypassed
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import math
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from datetime import datetime, timezone
|
|
23
|
+
from enum import Enum
|
|
24
|
+
from typing import TypedDict
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"ConfidenceTier",
|
|
28
|
+
"ConfidenceConfig",
|
|
29
|
+
"ConfidenceMetrics",
|
|
30
|
+
"ConfidenceMetricsDict",
|
|
31
|
+
"calculate_confidence",
|
|
32
|
+
"get_confidence_tier",
|
|
33
|
+
"merge_confidence_metrics",
|
|
34
|
+
"add_contradiction",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ConfidenceTier(str, Enum):
|
|
39
|
+
"""Descriptive tiers for rule confidence.
|
|
40
|
+
|
|
41
|
+
These are purely descriptive labels for human interpretation.
|
|
42
|
+
No logic gates or hard thresholds are enforced by the system.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
SPECULATIVE = "speculative" # Low mass, recently introduced
|
|
46
|
+
PROVISIONAL = "provisional" # Growing mass, some reinforcement
|
|
47
|
+
STABLE = "stable" # Consistent reinforcement, moderate mass
|
|
48
|
+
ENTRENCHED = "entrenched" # High mass, sustained over time
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class ConfidenceConfig:
|
|
53
|
+
"""Configuration parameters for confidence calculation.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
tau: Half-life for recency decay (in days). Smaller = twitchier system.
|
|
57
|
+
k: Saturation constant for frequency. Larger = slower saturation.
|
|
58
|
+
lambda_: Decay constant for contradiction penalty.
|
|
59
|
+
tier_thresholds: Confidence score thresholds for each tier.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
tau: float = 30.0 # 30-day half-life by default
|
|
63
|
+
k: float = 5.0 # Frequency saturation constant
|
|
64
|
+
lambda_: float = 2.0 # Contradiction decay constant
|
|
65
|
+
tier_thresholds: tuple[float, float, float] = (0.2, 0.4, 0.7)
|
|
66
|
+
|
|
67
|
+
def __post_init__(self) -> None:
|
|
68
|
+
if self.tau <= 0:
|
|
69
|
+
raise ValueError("tau must be positive")
|
|
70
|
+
if self.k <= 0:
|
|
71
|
+
raise ValueError("k must be positive")
|
|
72
|
+
if self.lambda_ <= 0:
|
|
73
|
+
raise ValueError("lambda_ must be positive")
|
|
74
|
+
low, mid, high = self.tier_thresholds
|
|
75
|
+
if not (0 <= low <= mid <= high <= 1):
|
|
76
|
+
raise ValueError(
|
|
77
|
+
"tier_thresholds must be monotonically increasing in [0, 1]"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ConfidenceMetricsDict(TypedDict):
|
|
82
|
+
"""Serializable form of confidence metrics."""
|
|
83
|
+
|
|
84
|
+
reinforcement_count: int
|
|
85
|
+
last_reinforced: str # ISO format timestamp
|
|
86
|
+
contradiction_count: int
|
|
87
|
+
first_seen: str # ISO format timestamp
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class ConfidenceMetrics:
|
|
92
|
+
"""Tracked metrics for confidence calculation.
|
|
93
|
+
|
|
94
|
+
These are the raw inputs that feed into the confidence formula.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
reinforcement_count: int = 1
|
|
98
|
+
last_reinforced: datetime = field(
|
|
99
|
+
default_factory=lambda: datetime.now(timezone.utc)
|
|
100
|
+
)
|
|
101
|
+
contradiction_count: int = 0
|
|
102
|
+
first_seen: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
103
|
+
|
|
104
|
+
def __post_init__(self) -> None:
|
|
105
|
+
if self.reinforcement_count < 0:
|
|
106
|
+
raise ValueError("reinforcement_count must be non-negative")
|
|
107
|
+
if self.contradiction_count < 0:
|
|
108
|
+
raise ValueError("contradiction_count must be non-negative")
|
|
109
|
+
|
|
110
|
+
def to_dict(self) -> ConfidenceMetricsDict:
|
|
111
|
+
"""Convert to serializable dictionary."""
|
|
112
|
+
return {
|
|
113
|
+
"reinforcement_count": self.reinforcement_count,
|
|
114
|
+
"last_reinforced": self.last_reinforced.isoformat(),
|
|
115
|
+
"contradiction_count": self.contradiction_count,
|
|
116
|
+
"first_seen": self.first_seen.isoformat(),
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def from_dict(cls, data: ConfidenceMetricsDict) -> ConfidenceMetrics:
|
|
121
|
+
"""Reconstruct from serialized dictionary.
|
|
122
|
+
|
|
123
|
+
Note: Timezone-naive datetimes are assumed to be UTC.
|
|
124
|
+
"""
|
|
125
|
+
last_reinforced = datetime.fromisoformat(data["last_reinforced"])
|
|
126
|
+
first_seen = datetime.fromisoformat(data["first_seen"])
|
|
127
|
+
|
|
128
|
+
# Ensure timezone awareness (assume UTC for naive datetimes)
|
|
129
|
+
if last_reinforced.tzinfo is None:
|
|
130
|
+
last_reinforced = last_reinforced.replace(tzinfo=timezone.utc)
|
|
131
|
+
if first_seen.tzinfo is None:
|
|
132
|
+
first_seen = first_seen.replace(tzinfo=timezone.utc)
|
|
133
|
+
|
|
134
|
+
return cls(
|
|
135
|
+
reinforcement_count=data["reinforcement_count"],
|
|
136
|
+
last_reinforced=last_reinforced,
|
|
137
|
+
contradiction_count=data["contradiction_count"],
|
|
138
|
+
first_seen=first_seen,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def calculate_frequency_weight(n: int, k: float) -> float:
|
|
143
|
+
"""Calculate frequency weight with saturation.
|
|
144
|
+
|
|
145
|
+
Uses bounded exponential: 1 - exp(-n/k)
|
|
146
|
+
This makes early reinforcement matter more than late spam.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
n: Reinforcement count
|
|
150
|
+
k: Saturation constant (larger = slower saturation)
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Weight in range (0, 1), approaching 1 as n grows
|
|
154
|
+
"""
|
|
155
|
+
return 1.0 - math.exp(-n / k)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def calculate_recency_weight(
|
|
159
|
+
t_last: datetime,
|
|
160
|
+
t_now: datetime,
|
|
161
|
+
tau: float,
|
|
162
|
+
) -> float:
|
|
163
|
+
"""Calculate recency weight with exponential decay.
|
|
164
|
+
|
|
165
|
+
Uses: exp(-(t_now - t_last) / tau)
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
t_last: Timestamp of last reinforcement
|
|
169
|
+
t_now: Current timestamp
|
|
170
|
+
tau: Half-life in days
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Weight in range (0, 1], decaying over time.
|
|
174
|
+
If t_last is in the future, clamps to 1.0.
|
|
175
|
+
"""
|
|
176
|
+
days_elapsed = (t_now - t_last).total_seconds() / (24 * 60 * 60)
|
|
177
|
+
if days_elapsed < 0:
|
|
178
|
+
return 1.0 # Future timestamps treated as "just now"
|
|
179
|
+
return math.exp(-days_elapsed / tau)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def calculate_contradiction_penalty(c: int, lambda_: float) -> float:
|
|
183
|
+
"""Calculate contradiction penalty (drag).
|
|
184
|
+
|
|
185
|
+
Rules don't die from contradictions, they get heavy and sink.
|
|
186
|
+
Uses: exp(-c / lambda)
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
c: Contradiction count
|
|
190
|
+
lambda_: Decay constant
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Penalty multiplier in range (0, 1]
|
|
194
|
+
"""
|
|
195
|
+
return math.exp(-c / lambda_)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def calculate_confidence(
|
|
199
|
+
metrics: ConfidenceMetrics,
|
|
200
|
+
config: ConfidenceConfig | None = None,
|
|
201
|
+
t_now: datetime | None = None,
|
|
202
|
+
) -> float:
|
|
203
|
+
"""Calculate confidence score for a rule.
|
|
204
|
+
|
|
205
|
+
Confidence = frequency_weight * recency_weight * contradiction_penalty
|
|
206
|
+
|
|
207
|
+
This gives a scalar that:
|
|
208
|
+
- Rises fast early
|
|
209
|
+
- Decays naturally over time
|
|
210
|
+
- Never quite hits zero
|
|
211
|
+
- Never explodes to infinity
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
metrics: Tracked metrics for the rule
|
|
215
|
+
config: Scoring configuration (uses defaults if None)
|
|
216
|
+
t_now: Current time (uses now if None)
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Confidence score in range (0, 1)
|
|
220
|
+
"""
|
|
221
|
+
if config is None:
|
|
222
|
+
config = ConfidenceConfig()
|
|
223
|
+
if t_now is None:
|
|
224
|
+
t_now = datetime.now(timezone.utc)
|
|
225
|
+
|
|
226
|
+
freq = calculate_frequency_weight(metrics.reinforcement_count, config.k)
|
|
227
|
+
recency = calculate_recency_weight(metrics.last_reinforced, t_now, config.tau)
|
|
228
|
+
penalty = calculate_contradiction_penalty(
|
|
229
|
+
metrics.contradiction_count, config.lambda_
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
return freq * recency * penalty
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def get_confidence_tier(
|
|
236
|
+
score: float,
|
|
237
|
+
config: ConfidenceConfig | None = None,
|
|
238
|
+
) -> ConfidenceTier:
|
|
239
|
+
"""Map confidence score to descriptive tier.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
score: Confidence score in range [0, 1]
|
|
243
|
+
config: Configuration with tier thresholds
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Descriptive tier label
|
|
247
|
+
|
|
248
|
+
Raises:
|
|
249
|
+
ValueError: If score is outside [0, 1] range
|
|
250
|
+
"""
|
|
251
|
+
if not (0.0 <= score <= 1.0):
|
|
252
|
+
raise ValueError(f"score must be in [0, 1], got {score}")
|
|
253
|
+
|
|
254
|
+
if config is None:
|
|
255
|
+
config = ConfidenceConfig()
|
|
256
|
+
|
|
257
|
+
low, mid, high = config.tier_thresholds
|
|
258
|
+
|
|
259
|
+
if score < low:
|
|
260
|
+
return ConfidenceTier.SPECULATIVE
|
|
261
|
+
elif score < mid:
|
|
262
|
+
return ConfidenceTier.PROVISIONAL
|
|
263
|
+
elif score < high:
|
|
264
|
+
return ConfidenceTier.STABLE
|
|
265
|
+
else:
|
|
266
|
+
return ConfidenceTier.ENTRENCHED
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def merge_confidence_metrics(
|
|
270
|
+
existing: ConfidenceMetrics,
|
|
271
|
+
new_occurrence: datetime | None = None,
|
|
272
|
+
) -> ConfidenceMetrics:
|
|
273
|
+
"""Merge a new occurrence into existing metrics.
|
|
274
|
+
|
|
275
|
+
This is called when a rule is reinforced (seen again).
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
existing: Current metrics for the rule
|
|
279
|
+
new_occurrence: Timestamp of new occurrence (uses now if None)
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Updated metrics with incremented count and updated timestamp
|
|
283
|
+
"""
|
|
284
|
+
if new_occurrence is None:
|
|
285
|
+
new_occurrence = datetime.now(timezone.utc)
|
|
286
|
+
|
|
287
|
+
return ConfidenceMetrics(
|
|
288
|
+
reinforcement_count=existing.reinforcement_count + 1,
|
|
289
|
+
last_reinforced=new_occurrence,
|
|
290
|
+
contradiction_count=existing.contradiction_count,
|
|
291
|
+
first_seen=existing.first_seen,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def add_contradiction(metrics: ConfidenceMetrics) -> ConfidenceMetrics:
|
|
296
|
+
"""Record a contradiction against a rule.
|
|
297
|
+
|
|
298
|
+
Contradictions add drag but don't invalidate rules.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
metrics: Current metrics for the rule
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Updated metrics with incremented contradiction count
|
|
305
|
+
"""
|
|
306
|
+
return ConfidenceMetrics(
|
|
307
|
+
reinforcement_count=metrics.reinforcement_count,
|
|
308
|
+
last_reinforced=metrics.last_reinforced,
|
|
309
|
+
contradiction_count=metrics.contradiction_count + 1,
|
|
310
|
+
first_seen=metrics.first_seen,
|
|
311
|
+
)
|
buildlog/core/__init__.py
CHANGED
|
@@ -2,11 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
from buildlog.core.operations import (
|
|
4
4
|
DiffResult,
|
|
5
|
+
LearnFromReviewResult,
|
|
5
6
|
PromoteResult,
|
|
6
7
|
RejectResult,
|
|
8
|
+
ReviewIssue,
|
|
9
|
+
ReviewLearning,
|
|
7
10
|
StatusResult,
|
|
8
11
|
diff,
|
|
9
12
|
find_skills_by_ids,
|
|
13
|
+
learn_from_review,
|
|
10
14
|
promote,
|
|
11
15
|
reject,
|
|
12
16
|
status,
|
|
@@ -17,9 +21,13 @@ __all__ = [
|
|
|
17
21
|
"PromoteResult",
|
|
18
22
|
"RejectResult",
|
|
19
23
|
"DiffResult",
|
|
24
|
+
"ReviewIssue",
|
|
25
|
+
"ReviewLearning",
|
|
26
|
+
"LearnFromReviewResult",
|
|
20
27
|
"status",
|
|
21
28
|
"promote",
|
|
22
29
|
"reject",
|
|
23
30
|
"diff",
|
|
24
31
|
"find_skills_by_ids",
|
|
32
|
+
"learn_from_review",
|
|
25
33
|
]
|