buildlog 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- buildlog/cli.py +46 -23
- buildlog/confidence.py +311 -0
- buildlog/core/operations.py +11 -15
- buildlog/distill.py +3 -3
- buildlog/embeddings.py +108 -16
- buildlog/mcp/tools.py +4 -4
- buildlog/render/__init__.py +34 -11
- buildlog/render/claude_md.py +3 -24
- buildlog/render/settings_json.py +3 -23
- buildlog/render/skill.py +175 -0
- buildlog/render/tracking.py +43 -0
- buildlog/skills.py +229 -47
- buildlog/stats.py +7 -5
- {buildlog-0.1.0.data → buildlog-0.3.0.data}/data/share/buildlog/post_gen.py +11 -7
- buildlog-0.3.0.dist-info/METADATA +763 -0
- buildlog-0.3.0.dist-info/RECORD +30 -0
- buildlog-0.1.0.dist-info/METADATA +0 -664
- buildlog-0.1.0.dist-info/RECORD +0 -27
- {buildlog-0.1.0.data → buildlog-0.3.0.data}/data/share/buildlog/copier.yml +0 -0
- {buildlog-0.1.0.data → buildlog-0.3.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
- {buildlog-0.1.0.data → buildlog-0.3.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
- {buildlog-0.1.0.data → buildlog-0.3.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
- {buildlog-0.1.0.data → buildlog-0.3.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
- {buildlog-0.1.0.data → buildlog-0.3.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
- {buildlog-0.1.0.dist-info → buildlog-0.3.0.dist-info}/WHEEL +0 -0
- {buildlog-0.1.0.dist-info → buildlog-0.3.0.dist-info}/entry_points.txt +0 -0
- {buildlog-0.1.0.dist-info → buildlog-0.3.0.dist-info}/licenses/LICENSE +0 -0
buildlog/skills.py
CHANGED
|
@@ -5,11 +5,13 @@ from __future__ import annotations
|
|
|
5
5
|
__all__ = [
|
|
6
6
|
"Skill",
|
|
7
7
|
"SkillSet",
|
|
8
|
+
"ConfidenceConfig", # Re-exported for convenience
|
|
8
9
|
"_deduplicate_insights",
|
|
9
10
|
"_calculate_confidence",
|
|
10
11
|
"_extract_tags",
|
|
11
12
|
"_generate_skill_id",
|
|
12
13
|
"_to_imperative",
|
|
14
|
+
"_build_confidence_metrics",
|
|
13
15
|
"generate_skills",
|
|
14
16
|
"format_skills",
|
|
15
17
|
]
|
|
@@ -19,10 +21,13 @@ import json
|
|
|
19
21
|
import logging
|
|
20
22
|
import re
|
|
21
23
|
from dataclasses import dataclass, field
|
|
22
|
-
from datetime import
|
|
24
|
+
from datetime import date, datetime, timezone
|
|
23
25
|
from pathlib import Path
|
|
24
26
|
from typing import Final, Literal, TypedDict
|
|
25
27
|
|
|
28
|
+
from buildlog.confidence import ConfidenceConfig, ConfidenceMetrics
|
|
29
|
+
from buildlog.confidence import calculate_confidence as calculate_continuous_confidence
|
|
30
|
+
from buildlog.confidence import get_confidence_tier
|
|
26
31
|
from buildlog.distill import CATEGORIES, PatternDict, distill_all
|
|
27
32
|
from buildlog.embeddings import EmbeddingBackend, get_backend, get_default_backend
|
|
28
33
|
|
|
@@ -39,8 +44,8 @@ OutputFormat = Literal["yaml", "json", "markdown", "rules", "settings"]
|
|
|
39
44
|
ConfidenceLevel = Literal["high", "medium", "low"]
|
|
40
45
|
|
|
41
46
|
|
|
42
|
-
class
|
|
43
|
-
"""
|
|
47
|
+
class _SkillDictRequired(TypedDict):
|
|
48
|
+
"""Required fields for skill dictionary (base class)."""
|
|
44
49
|
|
|
45
50
|
id: str
|
|
46
51
|
category: str
|
|
@@ -51,6 +56,17 @@ class SkillDict(TypedDict):
|
|
|
51
56
|
tags: list[str]
|
|
52
57
|
|
|
53
58
|
|
|
59
|
+
class SkillDict(_SkillDictRequired, total=False):
|
|
60
|
+
"""Type for skill dictionary representation.
|
|
61
|
+
|
|
62
|
+
Inherits required fields from _SkillDictRequired.
|
|
63
|
+
Optional fields are only present when continuous confidence is enabled.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
confidence_score: float
|
|
67
|
+
confidence_tier: str
|
|
68
|
+
|
|
69
|
+
|
|
54
70
|
class SkillSetDict(TypedDict):
|
|
55
71
|
"""Type for full skill set dictionary."""
|
|
56
72
|
|
|
@@ -66,6 +82,17 @@ class Skill:
|
|
|
66
82
|
|
|
67
83
|
Represents a single actionable rule derived from one or more
|
|
68
84
|
similar insights across buildlog entries.
|
|
85
|
+
|
|
86
|
+
Attributes:
|
|
87
|
+
id: Stable identifier for the skill.
|
|
88
|
+
category: Category (architectural, workflow, etc.).
|
|
89
|
+
rule: The actionable rule text.
|
|
90
|
+
frequency: How many times this pattern was seen.
|
|
91
|
+
confidence: Discrete confidence level (high/medium/low).
|
|
92
|
+
sources: List of source files where this pattern appeared.
|
|
93
|
+
tags: Extracted technology/concept tags.
|
|
94
|
+
confidence_score: Continuous confidence score (0-1), if calculated.
|
|
95
|
+
confidence_tier: Descriptive tier (speculative/provisional/stable/entrenched).
|
|
69
96
|
"""
|
|
70
97
|
|
|
71
98
|
id: str
|
|
@@ -75,10 +102,16 @@ class Skill:
|
|
|
75
102
|
confidence: ConfidenceLevel
|
|
76
103
|
sources: list[str] = field(default_factory=list)
|
|
77
104
|
tags: list[str] = field(default_factory=list)
|
|
105
|
+
confidence_score: float | None = None
|
|
106
|
+
confidence_tier: str | None = None
|
|
78
107
|
|
|
79
108
|
def to_dict(self) -> SkillDict:
|
|
80
|
-
"""Convert to dictionary for serialization.
|
|
81
|
-
|
|
109
|
+
"""Convert to dictionary for serialization.
|
|
110
|
+
|
|
111
|
+
Only includes optional fields (confidence_score, confidence_tier)
|
|
112
|
+
when they are set.
|
|
113
|
+
"""
|
|
114
|
+
result = SkillDict(
|
|
82
115
|
id=self.id,
|
|
83
116
|
category=self.category,
|
|
84
117
|
rule=self.rule,
|
|
@@ -87,6 +120,11 @@ class Skill:
|
|
|
87
120
|
sources=self.sources,
|
|
88
121
|
tags=self.tags,
|
|
89
122
|
)
|
|
123
|
+
if self.confidence_score is not None:
|
|
124
|
+
result["confidence_score"] = self.confidence_score
|
|
125
|
+
if self.confidence_tier is not None:
|
|
126
|
+
result["confidence_tier"] = self.confidence_tier
|
|
127
|
+
return result
|
|
90
128
|
|
|
91
129
|
|
|
92
130
|
@dataclass
|
|
@@ -134,8 +172,6 @@ def _generate_skill_id(category: str, rule: str) -> str:
|
|
|
134
172
|
return f"{prefix}-{rule_hash}"
|
|
135
173
|
|
|
136
174
|
|
|
137
|
-
|
|
138
|
-
|
|
139
175
|
def _calculate_confidence(
|
|
140
176
|
frequency: int,
|
|
141
177
|
most_recent_date: date | None,
|
|
@@ -164,7 +200,10 @@ def _calculate_confidence(
|
|
|
164
200
|
if most_recent_date:
|
|
165
201
|
recency_days = (reference_date - most_recent_date).days
|
|
166
202
|
|
|
167
|
-
if
|
|
203
|
+
if (
|
|
204
|
+
frequency >= HIGH_CONFIDENCE_FREQUENCY
|
|
205
|
+
and recency_days < HIGH_CONFIDENCE_RECENCY_DAYS
|
|
206
|
+
):
|
|
168
207
|
return "high"
|
|
169
208
|
elif frequency >= MEDIUM_CONFIDENCE_FREQUENCY:
|
|
170
209
|
return "medium"
|
|
@@ -179,12 +218,44 @@ def _extract_tags(rule: str) -> list[str]:
|
|
|
179
218
|
"""
|
|
180
219
|
# Common tech/concept terms to extract as tags
|
|
181
220
|
known_tags = {
|
|
182
|
-
"api",
|
|
183
|
-
"
|
|
184
|
-
"
|
|
185
|
-
"
|
|
186
|
-
"
|
|
187
|
-
"
|
|
221
|
+
"api",
|
|
222
|
+
"http",
|
|
223
|
+
"json",
|
|
224
|
+
"yaml",
|
|
225
|
+
"sql",
|
|
226
|
+
"database",
|
|
227
|
+
"cache",
|
|
228
|
+
"redis",
|
|
229
|
+
"supabase",
|
|
230
|
+
"postgres",
|
|
231
|
+
"mongodb",
|
|
232
|
+
"git",
|
|
233
|
+
"docker",
|
|
234
|
+
"kubernetes",
|
|
235
|
+
"aws",
|
|
236
|
+
"gcp",
|
|
237
|
+
"azure",
|
|
238
|
+
"react",
|
|
239
|
+
"python",
|
|
240
|
+
"typescript",
|
|
241
|
+
"javascript",
|
|
242
|
+
"rust",
|
|
243
|
+
"go",
|
|
244
|
+
"test",
|
|
245
|
+
"testing",
|
|
246
|
+
"ci",
|
|
247
|
+
"cd",
|
|
248
|
+
"deploy",
|
|
249
|
+
"error",
|
|
250
|
+
"retry",
|
|
251
|
+
"timeout",
|
|
252
|
+
"auth",
|
|
253
|
+
"jwt",
|
|
254
|
+
"oauth",
|
|
255
|
+
"plugin",
|
|
256
|
+
"middleware",
|
|
257
|
+
"async",
|
|
258
|
+
"sync",
|
|
188
259
|
}
|
|
189
260
|
|
|
190
261
|
# Word variants that map to canonical tags
|
|
@@ -220,7 +291,7 @@ def _deduplicate_insights(
|
|
|
220
291
|
patterns: list[PatternDict],
|
|
221
292
|
threshold: float = MIN_SIMILARITY_THRESHOLD,
|
|
222
293
|
backend: EmbeddingBackend | None = None,
|
|
223
|
-
) -> list[tuple[str, int, list[str], date | None]]:
|
|
294
|
+
) -> list[tuple[str, int, list[str], date | None, date | None]]:
|
|
224
295
|
"""Deduplicate similar insights into merged rules.
|
|
225
296
|
|
|
226
297
|
Args:
|
|
@@ -229,7 +300,8 @@ def _deduplicate_insights(
|
|
|
229
300
|
backend: Embedding backend for similarity computation.
|
|
230
301
|
|
|
231
302
|
Returns:
|
|
232
|
-
List of (rule, frequency, sources, most_recent_date) tuples.
|
|
303
|
+
List of (rule, frequency, sources, most_recent_date, earliest_date) tuples.
|
|
304
|
+
Both dates can be None if no valid dates are found in the patterns.
|
|
233
305
|
"""
|
|
234
306
|
if not patterns:
|
|
235
307
|
return []
|
|
@@ -256,7 +328,7 @@ def _deduplicate_insights(
|
|
|
256
328
|
groups.append([pattern])
|
|
257
329
|
|
|
258
330
|
# Convert groups to deduplicated rules
|
|
259
|
-
results: list[tuple[str, int, list[str], date | None]] = []
|
|
331
|
+
results: list[tuple[str, int, list[str], date | None, date | None]] = []
|
|
260
332
|
|
|
261
333
|
for group in groups:
|
|
262
334
|
# Use the shortest insight as the canonical rule (often cleaner)
|
|
@@ -265,7 +337,7 @@ def _deduplicate_insights(
|
|
|
265
337
|
frequency = len(group)
|
|
266
338
|
sources = sorted(set(p["source"] for p in group))
|
|
267
339
|
|
|
268
|
-
# Find most recent
|
|
340
|
+
# Find most recent and earliest dates
|
|
269
341
|
dates: list[date] = []
|
|
270
342
|
for p in group:
|
|
271
343
|
try:
|
|
@@ -274,16 +346,58 @@ def _deduplicate_insights(
|
|
|
274
346
|
pass
|
|
275
347
|
|
|
276
348
|
most_recent = max(dates) if dates else None
|
|
277
|
-
|
|
349
|
+
earliest = min(dates) if dates else None
|
|
350
|
+
results.append((rule, frequency, sources, most_recent, earliest))
|
|
278
351
|
|
|
279
352
|
return results
|
|
280
353
|
|
|
281
354
|
|
|
355
|
+
def _build_confidence_metrics(
|
|
356
|
+
frequency: int,
|
|
357
|
+
most_recent: date | None,
|
|
358
|
+
earliest: date | None,
|
|
359
|
+
) -> ConfidenceMetrics:
|
|
360
|
+
"""Build ConfidenceMetrics from deduplication results.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
frequency: Number of times the pattern was seen.
|
|
364
|
+
most_recent: Most recent occurrence date.
|
|
365
|
+
earliest: Earliest occurrence date.
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
ConfidenceMetrics for continuous confidence calculation.
|
|
369
|
+
"""
|
|
370
|
+
# Use midnight UTC for date-based timestamps
|
|
371
|
+
now = datetime.now(timezone.utc)
|
|
372
|
+
|
|
373
|
+
if most_recent is not None:
|
|
374
|
+
last_reinforced = datetime(
|
|
375
|
+
most_recent.year, most_recent.month, most_recent.day, tzinfo=timezone.utc
|
|
376
|
+
)
|
|
377
|
+
else:
|
|
378
|
+
last_reinforced = now
|
|
379
|
+
|
|
380
|
+
if earliest is not None:
|
|
381
|
+
first_seen = datetime(
|
|
382
|
+
earliest.year, earliest.month, earliest.day, tzinfo=timezone.utc
|
|
383
|
+
)
|
|
384
|
+
else:
|
|
385
|
+
first_seen = last_reinforced
|
|
386
|
+
|
|
387
|
+
return ConfidenceMetrics(
|
|
388
|
+
reinforcement_count=frequency,
|
|
389
|
+
last_reinforced=last_reinforced,
|
|
390
|
+
contradiction_count=0, # Deferred: no contradiction tracking yet
|
|
391
|
+
first_seen=first_seen,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
|
|
282
395
|
def generate_skills(
|
|
283
396
|
buildlog_dir: Path,
|
|
284
397
|
min_frequency: int = 1,
|
|
285
398
|
since_date: date | None = None,
|
|
286
399
|
embedding_backend: str | None = None,
|
|
400
|
+
confidence_config: ConfidenceConfig | None = None,
|
|
287
401
|
) -> SkillSet:
|
|
288
402
|
"""Generate skills from buildlog patterns.
|
|
289
403
|
|
|
@@ -293,6 +407,9 @@ def generate_skills(
|
|
|
293
407
|
since_date: Only include patterns from this date onward.
|
|
294
408
|
embedding_backend: Name of embedding backend for deduplication.
|
|
295
409
|
Options: "token" (default), "sentence-transformers", "openai".
|
|
410
|
+
confidence_config: Configuration for continuous confidence scoring.
|
|
411
|
+
If provided, skills will include confidence_score and confidence_tier.
|
|
412
|
+
If None, only discrete confidence levels (high/medium/low) are computed.
|
|
296
413
|
|
|
297
414
|
Returns:
|
|
298
415
|
SkillSet with generated skills.
|
|
@@ -301,9 +418,16 @@ def generate_skills(
|
|
|
301
418
|
result = distill_all(buildlog_dir, since=since_date)
|
|
302
419
|
|
|
303
420
|
# Get embedding backend
|
|
304
|
-
backend =
|
|
421
|
+
backend = (
|
|
422
|
+
get_backend(embedding_backend) # type: ignore[arg-type]
|
|
423
|
+
if embedding_backend
|
|
424
|
+
else get_default_backend()
|
|
425
|
+
)
|
|
305
426
|
logger.info("Using embedding backend: %s", backend.name)
|
|
306
427
|
|
|
428
|
+
# Capture reference time for confidence calculations
|
|
429
|
+
t_now = datetime.now(timezone.utc) if confidence_config else None
|
|
430
|
+
|
|
307
431
|
skills_by_category: dict[str, list[Skill]] = {}
|
|
308
432
|
|
|
309
433
|
for category in CATEGORIES:
|
|
@@ -311,10 +435,22 @@ def generate_skills(
|
|
|
311
435
|
deduplicated = _deduplicate_insights(patterns, backend=backend)
|
|
312
436
|
|
|
313
437
|
skills: list[Skill] = []
|
|
314
|
-
for rule, frequency, sources, most_recent in deduplicated:
|
|
438
|
+
for rule, frequency, sources, most_recent, earliest in deduplicated:
|
|
315
439
|
if frequency < min_frequency:
|
|
316
440
|
continue
|
|
317
441
|
|
|
442
|
+
# Calculate continuous confidence if config provided
|
|
443
|
+
confidence_score: float | None = None
|
|
444
|
+
confidence_tier: str | None = None
|
|
445
|
+
if confidence_config is not None and t_now is not None:
|
|
446
|
+
metrics = _build_confidence_metrics(frequency, most_recent, earliest)
|
|
447
|
+
confidence_score = calculate_continuous_confidence(
|
|
448
|
+
metrics, confidence_config, t_now
|
|
449
|
+
)
|
|
450
|
+
confidence_tier = get_confidence_tier(
|
|
451
|
+
confidence_score, confidence_config
|
|
452
|
+
).value
|
|
453
|
+
|
|
318
454
|
skill = Skill(
|
|
319
455
|
id=_generate_skill_id(category, rule),
|
|
320
456
|
category=category,
|
|
@@ -323,6 +459,8 @@ def generate_skills(
|
|
|
323
459
|
confidence=_calculate_confidence(frequency, most_recent),
|
|
324
460
|
sources=sources,
|
|
325
461
|
tags=_extract_tags(rule),
|
|
462
|
+
confidence_score=confidence_score,
|
|
463
|
+
confidence_tier=confidence_tier,
|
|
326
464
|
)
|
|
327
465
|
skills.append(skill)
|
|
328
466
|
|
|
@@ -331,7 +469,7 @@ def generate_skills(
|
|
|
331
469
|
skills_by_category[category] = skills
|
|
332
470
|
|
|
333
471
|
return SkillSet(
|
|
334
|
-
generated_at=datetime.now(
|
|
472
|
+
generated_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
335
473
|
source_entries=result.entry_count,
|
|
336
474
|
skills=skills_by_category,
|
|
337
475
|
)
|
|
@@ -347,7 +485,9 @@ def _format_yaml(skill_set: SkillSet) -> str:
|
|
|
347
485
|
) from e
|
|
348
486
|
|
|
349
487
|
data = skill_set.to_dict()
|
|
350
|
-
return yaml.dump(
|
|
488
|
+
return yaml.dump(
|
|
489
|
+
data, default_flow_style=False, allow_unicode=True, sort_keys=False
|
|
490
|
+
)
|
|
351
491
|
|
|
352
492
|
|
|
353
493
|
def _format_json(skill_set: SkillSet) -> str:
|
|
@@ -361,8 +501,10 @@ def _format_markdown(skill_set: SkillSet) -> str:
|
|
|
361
501
|
|
|
362
502
|
lines.append("## Learned Skills")
|
|
363
503
|
lines.append("")
|
|
364
|
-
lines.append(
|
|
365
|
-
|
|
504
|
+
lines.append(
|
|
505
|
+
f"Based on {skill_set.source_entries} buildlog entries, "
|
|
506
|
+
f"{skill_set.total_skills} actionable skills have emerged:"
|
|
507
|
+
)
|
|
366
508
|
lines.append("")
|
|
367
509
|
|
|
368
510
|
category_titles = {
|
|
@@ -384,7 +526,9 @@ def _format_markdown(skill_set: SkillSet) -> str:
|
|
|
384
526
|
confidence_badge = {"high": "🟢", "medium": "🟡", "low": "⚪"}.get(
|
|
385
527
|
skill.confidence, ""
|
|
386
528
|
)
|
|
387
|
-
freq_text =
|
|
529
|
+
freq_text = (
|
|
530
|
+
f"seen {skill.frequency}x" if skill.frequency > 1 else "seen once"
|
|
531
|
+
)
|
|
388
532
|
lines.append(f"- {confidence_badge} **{skill.rule}** ({freq_text})")
|
|
389
533
|
|
|
390
534
|
lines.append("")
|
|
@@ -397,16 +541,23 @@ def _format_markdown(skill_set: SkillSet) -> str:
|
|
|
397
541
|
|
|
398
542
|
# Pre-compiled patterns for _to_imperative (module-level for efficiency)
|
|
399
543
|
_NEGATIVE_PATTERNS = tuple(
|
|
400
|
-
re.compile(p)
|
|
401
|
-
|
|
402
|
-
r"\
|
|
544
|
+
re.compile(p)
|
|
545
|
+
for p in (
|
|
546
|
+
r"\bdon't\b",
|
|
547
|
+
r"\bdo not\b",
|
|
548
|
+
r"\bnever\b",
|
|
549
|
+
r"\bavoid\b",
|
|
550
|
+
r"\bstop\b",
|
|
551
|
+
r"\bshouldn't\b",
|
|
552
|
+
r"\bshould not\b",
|
|
403
553
|
)
|
|
404
554
|
)
|
|
405
555
|
|
|
406
556
|
# Comparison patterns - intentionally narrow to avoid false positives
|
|
407
557
|
# "over" alone matches "all over", "game over" etc. so we require context
|
|
408
558
|
_COMPARISON_PATTERNS = tuple(
|
|
409
|
-
re.compile(p)
|
|
559
|
+
re.compile(p)
|
|
560
|
+
for p in (
|
|
410
561
|
r"\binstead of\b",
|
|
411
562
|
r"\brather than\b",
|
|
412
563
|
r"\bbetter than\b",
|
|
@@ -416,10 +567,22 @@ _COMPARISON_PATTERNS = tuple(
|
|
|
416
567
|
|
|
417
568
|
# Verbs that need -ing form when following "Avoid" or bare "Prefer"
|
|
418
569
|
_VERB_TO_GERUND: Final[dict[str, str]] = {
|
|
419
|
-
"use": "using",
|
|
420
|
-
"
|
|
421
|
-
"
|
|
422
|
-
"
|
|
570
|
+
"use": "using",
|
|
571
|
+
"run": "running",
|
|
572
|
+
"make": "making",
|
|
573
|
+
"write": "writing",
|
|
574
|
+
"read": "reading",
|
|
575
|
+
"put": "putting",
|
|
576
|
+
"get": "getting",
|
|
577
|
+
"set": "setting",
|
|
578
|
+
"add": "adding",
|
|
579
|
+
"create": "creating",
|
|
580
|
+
"delete": "deleting",
|
|
581
|
+
"call": "calling",
|
|
582
|
+
"pass": "passing",
|
|
583
|
+
"send": "sending",
|
|
584
|
+
"store": "storing",
|
|
585
|
+
"cache": "caching",
|
|
423
586
|
}
|
|
424
587
|
|
|
425
588
|
|
|
@@ -456,8 +619,14 @@ def _to_imperative(rule: str, confidence: ConfidenceLevel) -> str:
|
|
|
456
619
|
|
|
457
620
|
# Already has a confidence modifier - just capitalize and return
|
|
458
621
|
confidence_modifiers = (
|
|
459
|
-
"always",
|
|
460
|
-
"
|
|
622
|
+
"always",
|
|
623
|
+
"never",
|
|
624
|
+
"prefer",
|
|
625
|
+
"avoid",
|
|
626
|
+
"consider",
|
|
627
|
+
"remember",
|
|
628
|
+
"don't",
|
|
629
|
+
"do not",
|
|
461
630
|
)
|
|
462
631
|
if any(rule_lower.startswith(word) for word in confidence_modifiers):
|
|
463
632
|
return rule[0].upper() + rule[1:]
|
|
@@ -485,16 +654,23 @@ def _to_imperative(rule: str, confidence: ConfidenceLevel) -> str:
|
|
|
485
654
|
# Clean up the rule for prefixing
|
|
486
655
|
# Remove leading "should" type words (order matters - longer first)
|
|
487
656
|
cleaners = [
|
|
488
|
-
"you shouldn't ",
|
|
489
|
-
"
|
|
490
|
-
"
|
|
491
|
-
"
|
|
657
|
+
"you shouldn't ",
|
|
658
|
+
"we shouldn't ",
|
|
659
|
+
"shouldn't ",
|
|
660
|
+
"you should not ",
|
|
661
|
+
"we should not ",
|
|
662
|
+
"should not ",
|
|
663
|
+
"you should ",
|
|
664
|
+
"we should ",
|
|
665
|
+
"should ",
|
|
666
|
+
"it's better to ",
|
|
667
|
+
"it is better to ",
|
|
492
668
|
]
|
|
493
669
|
cleaned = rule
|
|
494
670
|
cleaned_lower = rule_lower
|
|
495
671
|
for cleaner in cleaners:
|
|
496
672
|
if cleaned_lower.startswith(cleaner):
|
|
497
|
-
cleaned = cleaned[len(cleaner):]
|
|
673
|
+
cleaned = cleaned[len(cleaner) :]
|
|
498
674
|
cleaned_lower = cleaned.lower()
|
|
499
675
|
break
|
|
500
676
|
|
|
@@ -505,10 +681,12 @@ def _to_imperative(rule: str, confidence: ConfidenceLevel) -> str:
|
|
|
505
681
|
|
|
506
682
|
# Avoid double words: "Avoid avoid using..." -> "Avoid using..."
|
|
507
683
|
prefix_lower = prefix.lower()
|
|
508
|
-
if cleaned_lower.startswith(prefix_lower + " ") or cleaned_lower.startswith(
|
|
684
|
+
if cleaned_lower.startswith(prefix_lower + " ") or cleaned_lower.startswith(
|
|
685
|
+
prefix_lower + "ing "
|
|
686
|
+
):
|
|
509
687
|
first_space = cleaned.find(" ")
|
|
510
688
|
if first_space > 0:
|
|
511
|
-
cleaned = cleaned[first_space + 1:]
|
|
689
|
+
cleaned = cleaned[first_space + 1 :]
|
|
512
690
|
cleaned_lower = cleaned.lower()
|
|
513
691
|
|
|
514
692
|
# For "Avoid" and bare "Prefer", convert leading verbs to gerund form
|
|
@@ -518,7 +696,7 @@ def _to_imperative(rule: str, confidence: ConfidenceLevel) -> str:
|
|
|
518
696
|
first_word = cleaned_lower.split()[0] if cleaned_lower else ""
|
|
519
697
|
if first_word in _VERB_TO_GERUND:
|
|
520
698
|
gerund = _VERB_TO_GERUND[first_word]
|
|
521
|
-
cleaned = gerund + cleaned[len(first_word):]
|
|
699
|
+
cleaned = gerund + cleaned[len(first_word) :]
|
|
522
700
|
cleaned_lower = cleaned.lower()
|
|
523
701
|
|
|
524
702
|
# Lowercase first char if we're adding a prefix (but not for gerunds which are already lower)
|
|
@@ -538,8 +716,10 @@ def _format_rules(skill_set: SkillSet) -> str:
|
|
|
538
716
|
|
|
539
717
|
lines.append("# Project Rules")
|
|
540
718
|
lines.append("")
|
|
541
|
-
lines.append(
|
|
542
|
-
|
|
719
|
+
lines.append(
|
|
720
|
+
f"*Auto-generated from {skill_set.source_entries} buildlog entries. "
|
|
721
|
+
f"{skill_set.total_skills} rules extracted.*"
|
|
722
|
+
)
|
|
543
723
|
lines.append("")
|
|
544
724
|
|
|
545
725
|
# Collect all skills, sort by confidence then frequency
|
|
@@ -625,6 +805,8 @@ def format_skills(skill_set: SkillSet, fmt: OutputFormat = "yaml") -> str:
|
|
|
625
805
|
|
|
626
806
|
formatter = formatters.get(fmt)
|
|
627
807
|
if formatter is None:
|
|
628
|
-
raise ValueError(
|
|
808
|
+
raise ValueError(
|
|
809
|
+
f"Unknown format: {fmt}. Must be one of: {list(formatters.keys())}"
|
|
810
|
+
)
|
|
629
811
|
|
|
630
812
|
return formatter(skill_set)
|
buildlog/stats.py
CHANGED
|
@@ -12,7 +12,7 @@ __all__ = [
|
|
|
12
12
|
import json
|
|
13
13
|
import logging
|
|
14
14
|
from dataclasses import dataclass, field
|
|
15
|
-
from datetime import
|
|
15
|
+
from datetime import date, datetime, timedelta, timezone
|
|
16
16
|
from itertools import takewhile
|
|
17
17
|
from pathlib import Path
|
|
18
18
|
from typing import Final, NamedTuple, TypedDict
|
|
@@ -315,7 +315,9 @@ def calculate_stats(
|
|
|
315
315
|
# Parse all entries using functional map/filter pattern
|
|
316
316
|
parsed_or_none = [
|
|
317
317
|
_parse_entry(entry_path, date_str)
|
|
318
|
-
for entry_path, date_str in iter_buildlog_entries(
|
|
318
|
+
for entry_path, date_str in iter_buildlog_entries(
|
|
319
|
+
buildlog_dir, since=since_date
|
|
320
|
+
)
|
|
319
321
|
]
|
|
320
322
|
entries = [e for e in parsed_or_none if e is not None]
|
|
321
323
|
|
|
@@ -326,8 +328,8 @@ def calculate_stats(
|
|
|
326
328
|
|
|
327
329
|
entry_dates = [e.entry_date for e in entries if e.entry_date]
|
|
328
330
|
|
|
329
|
-
this_week = sum(1 for d in entry_dates if d and d >= week_ago)
|
|
330
|
-
this_month = sum(1 for d in entry_dates if d and d >= month_start)
|
|
331
|
+
this_week = sum(1 for d in entry_dates if d and d >= week_ago) # type: ignore[misc]
|
|
332
|
+
this_month = sum(1 for d in entry_dates if d and d >= month_start) # type: ignore[misc]
|
|
331
333
|
|
|
332
334
|
with_improvements = sum(1 for e in entries if e.has_improvements)
|
|
333
335
|
coverage_percent = int((with_improvements / len(entries) * 100) if entries else 0)
|
|
@@ -351,7 +353,7 @@ def calculate_stats(
|
|
|
351
353
|
warnings.insert(0, "No buildlog entries found")
|
|
352
354
|
|
|
353
355
|
return BuildlogStats(
|
|
354
|
-
generated_at=datetime.now(
|
|
356
|
+
generated_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
355
357
|
entries=EntryStats(
|
|
356
358
|
total=len(entries),
|
|
357
359
|
this_week=this_week,
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
"""Post-generation script to update CLAUDE.md with buildlog instructions."""
|
|
3
3
|
|
|
4
|
-
import os
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
|
|
7
|
-
CLAUDE_MD_SECTION =
|
|
6
|
+
CLAUDE_MD_SECTION = """
|
|
8
7
|
## Build Journal
|
|
9
8
|
|
|
10
|
-
After completing significant work (features, debugging sessions, deployments,
|
|
9
|
+
After completing significant work (features, debugging sessions, deployments,
|
|
10
|
+
2+ hour focused sessions), write a build journal entry.
|
|
11
11
|
|
|
12
12
|
**Location:** `buildlog/YYYY-MM-DD-{slug}.md`
|
|
13
13
|
**Template:** `buildlog/_TEMPLATE.md`
|
|
@@ -15,18 +15,21 @@ After completing significant work (features, debugging sessions, deployments, 2+
|
|
|
15
15
|
### Required Sections
|
|
16
16
|
1. **The Goal** - What we built and why
|
|
17
17
|
2. **What We Built** - Architecture diagram, components table
|
|
18
|
-
3. **The Journey** - Chronological INCLUDING mistakes, wrong turns, actual
|
|
18
|
+
3. **The Journey** - Chronological INCLUDING mistakes, wrong turns, actual errors
|
|
19
19
|
4. **Test Results** - Actual commands run, actual outputs received
|
|
20
20
|
5. **Code Samples** - Key snippets with context (not full files)
|
|
21
21
|
6. **AI Experience Reflection** - Meta-commentary on the collaboration
|
|
22
|
-
7. **Improvements** - Actionable learnings: architectural, workflow, tool usage
|
|
22
|
+
7. **Improvements** - Actionable learnings: architectural, workflow, tool usage
|
|
23
23
|
|
|
24
|
-
The **Improvements** section is critical - capture concrete insights like
|
|
24
|
+
The **Improvements** section is critical - capture concrete insights like
|
|
25
|
+
"Should have defined the API contract before implementing the client"
|
|
26
|
+
not vague observations like "Should have planned better."
|
|
25
27
|
|
|
26
28
|
**Quality bar:** Publishable as a $500+ Envato Tuts+/Manning tutorial.
|
|
27
29
|
|
|
28
30
|
After significant work, ask: "Should I write a build journal entry for this?"
|
|
29
|
-
|
|
31
|
+
"""
|
|
32
|
+
|
|
30
33
|
|
|
31
34
|
def main():
|
|
32
35
|
claude_md = Path("CLAUDE.md")
|
|
@@ -47,5 +50,6 @@ def main():
|
|
|
47
50
|
|
|
48
51
|
print("Added Build Journal section to CLAUDE.md")
|
|
49
52
|
|
|
53
|
+
|
|
50
54
|
if __name__ == "__main__":
|
|
51
55
|
main()
|