repr-cli 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repr/__init__.py +1 -1
- repr/api.py +363 -62
- repr/auth.py +47 -38
- repr/change_synthesis.py +478 -0
- repr/cli.py +4103 -267
- repr/config.py +119 -11
- repr/configure.py +889 -0
- repr/cron.py +419 -0
- repr/dashboard/__init__.py +9 -0
- repr/dashboard/build.py +126 -0
- repr/dashboard/dist/assets/index-BYFVbEev.css +1 -0
- repr/dashboard/dist/assets/index-BrrhyJFO.css +1 -0
- repr/dashboard/dist/assets/index-CcEg74ts.js +270 -0
- repr/dashboard/dist/assets/index-Cerc-iA_.js +377 -0
- repr/dashboard/dist/assets/index-CjVcBW2L.css +1 -0
- repr/dashboard/dist/assets/index-Dfl3mR5E.js +377 -0
- repr/dashboard/dist/favicon.svg +4 -0
- repr/dashboard/dist/index.html +14 -0
- repr/dashboard/manager.py +234 -0
- repr/dashboard/server.py +1298 -0
- repr/db.py +980 -0
- repr/hooks.py +3 -2
- repr/loaders/__init__.py +22 -0
- repr/loaders/base.py +156 -0
- repr/loaders/claude_code.py +287 -0
- repr/loaders/clawdbot.py +313 -0
- repr/loaders/gemini_antigravity.py +381 -0
- repr/mcp_server.py +1196 -0
- repr/models.py +503 -0
- repr/openai_analysis.py +25 -0
- repr/session_extractor.py +481 -0
- repr/storage.py +360 -0
- repr/story_synthesis.py +1296 -0
- repr/templates.py +68 -4
- repr/timeline.py +710 -0
- repr/tools.py +17 -8
- {repr_cli-0.2.15.dist-info → repr_cli-0.2.17.dist-info}/METADATA +50 -10
- repr_cli-0.2.17.dist-info/RECORD +52 -0
- {repr_cli-0.2.15.dist-info → repr_cli-0.2.17.dist-info}/WHEEL +1 -1
- {repr_cli-0.2.15.dist-info → repr_cli-0.2.17.dist-info}/entry_points.txt +1 -0
- repr_cli-0.2.15.dist-info/RECORD +0 -26
- {repr_cli-0.2.15.dist-info → repr_cli-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {repr_cli-0.2.15.dist-info → repr_cli-0.2.17.dist-info}/top_level.txt +0 -0
repr/story_synthesis.py
ADDED
|
@@ -0,0 +1,1296 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Story synthesis from commits and sessions.
|
|
3
|
+
|
|
4
|
+
Creates coherent Story objects by:
|
|
5
|
+
1. Batching commits (time-ordered)
|
|
6
|
+
2. Using LLM to decide story boundaries and extract context
|
|
7
|
+
3. Linking sessions to stories based on commit overlap
|
|
8
|
+
4. Building content index per batch
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import uuid
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from typing import Callable
|
|
15
|
+
|
|
16
|
+
from openai import OpenAI
|
|
17
|
+
from pydantic import BaseModel, Field
|
|
18
|
+
|
|
19
|
+
from .config import get_or_generate_username
|
|
20
|
+
from .models import (
|
|
21
|
+
CodeSnippet,
|
|
22
|
+
CommitData,
|
|
23
|
+
ContentIndex,
|
|
24
|
+
FileChange,
|
|
25
|
+
SessionContext,
|
|
26
|
+
Story,
|
|
27
|
+
StoryDigest,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# =============================================================================
|
|
32
|
+
# LLM Schemas
|
|
33
|
+
# =============================================================================
|
|
34
|
+
|
|
35
|
+
class StoryBoundary(BaseModel):
|
|
36
|
+
"""LLM-identified story boundary within a batch of commits."""
|
|
37
|
+
commit_shas: list[str] = Field(description="SHAs of commits that form this story")
|
|
38
|
+
title: str = Field(description="One-line title for this story")
|
|
39
|
+
problem: str = Field(default="", description="What problem was being solved")
|
|
40
|
+
approach: str = Field(default="", description="Technical approach used")
|
|
41
|
+
implementation_details: list[str] = Field(default_factory=list, description="Specific code changes and patterns")
|
|
42
|
+
decisions: list[str] = Field(default_factory=list, description="Key decisions")
|
|
43
|
+
tradeoffs: str = Field(default="")
|
|
44
|
+
outcome: str = Field(default="")
|
|
45
|
+
lessons: list[str] = Field(default_factory=list)
|
|
46
|
+
category: str = Field(default="feature")
|
|
47
|
+
diagram: str | None = Field(default=None, description="ASCII diagram explaining the change")
|
|
48
|
+
technologies: list[str] = Field(default_factory=list, description="Resume-worthy skills demonstrated")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class BatchAnalysis(BaseModel):
|
|
52
|
+
"""LLM output for analyzing a batch of commits."""
|
|
53
|
+
stories: list[StoryBoundary] = Field(
|
|
54
|
+
description="List of coherent stories found in the commits"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# =============================================================================
|
|
59
|
+
# Prompts
|
|
60
|
+
# =============================================================================
|
|
61
|
+
PUBLIC_STORY_SYSTEM = """You're creating structured content for a developer's "build in public" feed.
|
|
62
|
+
|
|
63
|
+
Write like a real developer logging progress, not a blog or marketing post.
|
|
64
|
+
|
|
65
|
+
Tone rules (VERY IMPORTANT):
|
|
66
|
+
- Prefer first-person when natural ("I")
|
|
67
|
+
- Be specific and concrete
|
|
68
|
+
- Sound like a Slack message to teammates
|
|
69
|
+
- Avoid grand metaphors and philosophical language
|
|
70
|
+
- Avoid generic lessons or "universal truths"
|
|
71
|
+
- Slight messiness is OK — polish is not the goal
|
|
72
|
+
- No hype, no thought-leader tone
|
|
73
|
+
|
|
74
|
+
Given a technical story, compose:
|
|
75
|
+
|
|
76
|
+
1. HOOK (<60 chars):
|
|
77
|
+
A short first-person dev-log opener.
|
|
78
|
+
Focus on a problem, realization, or change.
|
|
79
|
+
Examples:
|
|
80
|
+
- "I got tired of doing timezone math."
|
|
81
|
+
- "This kept crashing until I found why."
|
|
82
|
+
- "I finally fixed the story engine docs."
|
|
83
|
+
Avoid clickbait and drama.
|
|
84
|
+
|
|
85
|
+
2. WHAT (1 sentence):
|
|
86
|
+
What you actually changed.
|
|
87
|
+
Concrete, observable behavior only.
|
|
88
|
+
|
|
89
|
+
3. VALUE (1 sentence):
|
|
90
|
+
Why this matters to users or teammates.
|
|
91
|
+
Practical impact > abstract value.
|
|
92
|
+
|
|
93
|
+
4. INSIGHT (1 sentence):
|
|
94
|
+
A grounded takeaway from THIS change.
|
|
95
|
+
Not universal wisdom — just what you learned.
|
|
96
|
+
|
|
97
|
+
5. SHOW (optional):
|
|
98
|
+
Code or before/after only if it adds clarity.
|
|
99
|
+
|
|
100
|
+
6. POST_BODY (2–5 sentences):
|
|
101
|
+
Write the final post in a natural voice.
|
|
102
|
+
- First person
|
|
103
|
+
- Mention what changed and why
|
|
104
|
+
- Include one small detail (a file/function/user pain) for authenticity
|
|
105
|
+
- You MAY include the insight, but do NOT label it "Insight:"
|
|
106
|
+
- Should not feel templated or like a changelog
|
|
107
|
+
|
|
108
|
+
Output JSON with these exact fields:
|
|
109
|
+
- "hook": string (<60 chars)
|
|
110
|
+
- "what": string (1 sentence)
|
|
111
|
+
- "value": string (1 sentence)
|
|
112
|
+
- "insight": string (1 sentence)
|
|
113
|
+
- "show": string or null
|
|
114
|
+
- "post_body": string
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
PUBLIC_STORY_USER = """Turn this into a first-person build-in-public dev log:
|
|
119
|
+
|
|
120
|
+
Title: {title}
|
|
121
|
+
Category: {category}
|
|
122
|
+
Problem: {problem}
|
|
123
|
+
Approach: {approach}
|
|
124
|
+
Outcome: {outcome}
|
|
125
|
+
Implementation Details: {implementation_details}
|
|
126
|
+
|
|
127
|
+
Write like a developer explaining their own work.
|
|
128
|
+
|
|
129
|
+
Output valid JSON with "hook", "what", "value", "insight", "show", and "post_body" fields."""
|
|
130
|
+
|
|
131
|
+
INTERNAL_STORY_SYSTEM = """You're creating structured content for a developer's internal feed.
|
|
132
|
+
|
|
133
|
+
Write like an engineer documenting work for teammates.
|
|
134
|
+
|
|
135
|
+
Tone rules:
|
|
136
|
+
- First-person preferred
|
|
137
|
+
- Direct and practical
|
|
138
|
+
- No marketing or philosophical tone
|
|
139
|
+
- Say what happened, why, and what changed
|
|
140
|
+
- Avoid abstract language
|
|
141
|
+
|
|
142
|
+
Given a technical story:
|
|
143
|
+
|
|
144
|
+
1. HOOK (<60 chars):
|
|
145
|
+
A short first-person dev-log line.
|
|
146
|
+
|
|
147
|
+
2. WHAT (1 sentence):
|
|
148
|
+
Observable change made.
|
|
149
|
+
|
|
150
|
+
3. VALUE (1 sentence):
|
|
151
|
+
Why this helps users or the team.
|
|
152
|
+
|
|
153
|
+
4. PROBLEM (1 sentence):
|
|
154
|
+
What was broken or missing.
|
|
155
|
+
|
|
156
|
+
5. HOW (list):
|
|
157
|
+
Concrete technical actions taken (files/functions/patterns).
|
|
158
|
+
|
|
159
|
+
6. INSIGHT (1 sentence):
|
|
160
|
+
Practical lesson from this change.
|
|
161
|
+
|
|
162
|
+
7. SHOW (optional):
|
|
163
|
+
Only include if useful.
|
|
164
|
+
|
|
165
|
+
8. POST_BODY (3–6 sentences):
|
|
166
|
+
A natural internal update for teammates.
|
|
167
|
+
- First person
|
|
168
|
+
- Mention the problem briefly, what you changed, and any gotchas
|
|
169
|
+
- Reference 1–2 concrete details (file/function/config)
|
|
170
|
+
- No templated structure, no headings
|
|
171
|
+
|
|
172
|
+
Output JSON with:
|
|
173
|
+
- "hook"
|
|
174
|
+
- "what"
|
|
175
|
+
- "value"
|
|
176
|
+
- "problem"
|
|
177
|
+
- "how"
|
|
178
|
+
- "insight"
|
|
179
|
+
- "show"
|
|
180
|
+
- "post_body"
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
INTERNAL_STORY_USER = """Extract this as a first-person internal dev log:
|
|
184
|
+
|
|
185
|
+
Title: {title}
|
|
186
|
+
Category: {category}
|
|
187
|
+
Problem: {problem}
|
|
188
|
+
Approach: {approach}
|
|
189
|
+
Outcome: {outcome}
|
|
190
|
+
Implementation Details: {implementation_details}
|
|
191
|
+
Decisions: {decisions}
|
|
192
|
+
Files: {files}
|
|
193
|
+
|
|
194
|
+
Write like a developer explaining their own work.
|
|
195
|
+
|
|
196
|
+
Output valid JSON with "hook", "what", "value", "problem", "how", "insight", "show", and "post_body" fields."""
|
|
197
|
+
|
|
198
|
+
STORY_SYNTHESIS_SYSTEM = """You analyze git commits and group them into coherent "stories" - logical units of work.
|
|
199
|
+
|
|
200
|
+
Your job:
|
|
201
|
+
1. Read the batch of commits
|
|
202
|
+
2. Group related commits into meaningful stories (features, fixes, refactors)
|
|
203
|
+
3. For each group, extract the WHY/WHAT/HOW context
|
|
204
|
+
|
|
205
|
+
IMPORTANT: A story should represent ONE coherent unit of value. Apply these rules:
|
|
206
|
+
|
|
207
|
+
GROUPING (consolidate):
|
|
208
|
+
- Multiple commits for the same feature → GROUP into one story
|
|
209
|
+
- A commit + its follow-up fix → GROUP together
|
|
210
|
+
|
|
211
|
+
SPLITTING (unpack):
|
|
212
|
+
- A single commit with MULTIPLE UNRELATED changes → SPLIT into separate stories
|
|
213
|
+
- Look for signs of a "packed" commit:
|
|
214
|
+
* Commit message lists multiple things ("Add X, fix Y, update Z")
|
|
215
|
+
* Files changed span unrelated areas (e.g., auth + UI + docs)
|
|
216
|
+
* Insertions/deletions suggest multiple distinct changes
|
|
217
|
+
- When splitting, the same commit SHA can appear in multiple stories
|
|
218
|
+
- Each split story should have its own distinct title, problem, and approach
|
|
219
|
+
|
|
220
|
+
Output JSON with a "stories" array. EVERY field must be filled in - do not leave any empty:
|
|
221
|
+
|
|
222
|
+
Required fields for each story:
|
|
223
|
+
- commit_shas: List of commit SHAs that form this story (REQUIRED)
|
|
224
|
+
- title: One-line title describing the work (REQUIRED)
|
|
225
|
+
- problem: What was lacking/broken/needed? WHY was this change made? (REQUIRED)
|
|
226
|
+
- approach: HOW was it solved? What strategy or pattern? (REQUIRED)
|
|
227
|
+
- implementation_details: List of SPECIFIC code changes made (REQUIRED - at least 2 items):
|
|
228
|
+
* What files were modified and how
|
|
229
|
+
* What functions/classes were added or changed
|
|
230
|
+
* What patterns or techniques were used
|
|
231
|
+
* Any APIs, libraries, or frameworks involved
|
|
232
|
+
- decisions: List of key choices made, format: ["Chose X because Y", ...]
|
|
233
|
+
- category: One of: feature, bugfix, refactor, perf, infra, docs, test, chore
|
|
234
|
+
- technologies: Resume-worthy skills demonstrated (REQUIRED - be specific):
|
|
235
|
+
* Frameworks/libraries: React, FastAPI, scikit-learn, PyTorch, Next.js, Prisma, SQLAlchemy
|
|
236
|
+
* Infrastructure: Kubernetes, Docker, Terraform, AWS Lambda, GCP, Nginx
|
|
237
|
+
* Tools/practices: CI/CD, GitHub Actions, Redis, PostgreSQL, GraphQL, REST API
|
|
238
|
+
* Patterns: WebSockets, OAuth, JWT, Event-driven, CQRS
|
|
239
|
+
* NOT just languages - those are inferred from files
|
|
240
|
+
|
|
241
|
+
Optional field:
|
|
242
|
+
- diagram: ASCII diagram explaining the change visually (null if not helpful)
|
|
243
|
+
Include a diagram ONLY when it adds clarity for:
|
|
244
|
+
* Architecture changes (component relationships, data flow)
|
|
245
|
+
* State machine or flow changes (before/after)
|
|
246
|
+
* API/interface changes (request/response flow)
|
|
247
|
+
* Refactoring (module structure changes)
|
|
248
|
+
|
|
249
|
+
Diagram style: Use simple box-and-arrow ASCII art. Max 15 lines.
|
|
250
|
+
Example:
|
|
251
|
+
```
|
|
252
|
+
Before: After:
|
|
253
|
+
[Client] [Client]
|
|
254
|
+
| |
|
|
255
|
+
v v
|
|
256
|
+
[Server] [Cache] --> [Server]
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
Rules:
|
|
260
|
+
- Every commit must appear in at least one story
|
|
261
|
+
- A packed commit may appear in MULTIPLE stories if it contains distinct changes
|
|
262
|
+
- NEVER leave problem, approach, or implementation_details empty
|
|
263
|
+
- Be specific: "Added `UserAuth` class with JWT validation" not "Added auth"
|
|
264
|
+
- Use plain engineering language in titles and summaries
|
|
265
|
+
- Avoid dramatic or philosophical phrasing
|
|
266
|
+
- Prefer literal descriptions over metaphors
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
STORY_SYNTHESIS_USER = """Analyze these commits and group them into stories with FULL context:
|
|
270
|
+
|
|
271
|
+
{commits_text}
|
|
272
|
+
|
|
273
|
+
Output valid JSON with a "stories" array. Fill in ALL fields with specific details.
|
|
274
|
+
|
|
275
|
+
Example 1 - Normal story:
|
|
276
|
+
{{
|
|
277
|
+
"stories": [
|
|
278
|
+
{{
|
|
279
|
+
"commit_shas": ["abc1234"],
|
|
280
|
+
"title": "Add user authentication",
|
|
281
|
+
"problem": "Users could not log in to the application",
|
|
282
|
+
"approach": "Implemented JWT-based auth with refresh tokens",
|
|
283
|
+
"implementation_details": [
|
|
284
|
+
"Added UserAuth class in auth/user_auth.py with login() and verify_token() methods",
|
|
285
|
+
"Created JWT middleware in middleware/jwt.py using PyJWT library"
|
|
286
|
+
],
|
|
287
|
+
"decisions": ["Chose JWT over sessions for stateless scaling"],
|
|
288
|
+
"category": "feature",
|
|
289
|
+
"technologies": ["JWT", "bcrypt", "FastAPI"],
|
|
290
|
+
"diagram": null
|
|
291
|
+
}}
|
|
292
|
+
]
|
|
293
|
+
}}
|
|
294
|
+
|
|
295
|
+
Example 2 - Splitting a packed commit (same SHA in multiple stories):
|
|
296
|
+
If commit "def5678" has message "Add auth, fix navbar, update docs" and touches unrelated files:
|
|
297
|
+
{{
|
|
298
|
+
"stories": [
|
|
299
|
+
{{
|
|
300
|
+
"commit_shas": ["def5678"],
|
|
301
|
+
"title": "Add user authentication",
|
|
302
|
+
"problem": "...",
|
|
303
|
+
"approach": "...",
|
|
304
|
+
...
|
|
305
|
+
}},
|
|
306
|
+
{{
|
|
307
|
+
"commit_shas": ["def5678"],
|
|
308
|
+
"title": "Fix navbar alignment",
|
|
309
|
+
"problem": "...",
|
|
310
|
+
"approach": "...",
|
|
311
|
+
...
|
|
312
|
+
}},
|
|
313
|
+
{{
|
|
314
|
+
"commit_shas": ["def5678"],
|
|
315
|
+
"title": "Update API documentation",
|
|
316
|
+
"problem": "...",
|
|
317
|
+
"approach": "...",
|
|
318
|
+
...
|
|
319
|
+
}}
|
|
320
|
+
]
|
|
321
|
+
}}
|
|
322
|
+
|
|
323
|
+
Note: "diagram" is optional. Only include when it clarifies architecture/flow changes."""
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
# =============================================================================
|
|
327
|
+
# File Change Extraction
|
|
328
|
+
# =============================================================================
|
|
329
|
+
|
|
330
|
+
def extract_file_changes_from_commits(
|
|
331
|
+
commit_shas: list[str],
|
|
332
|
+
project_path: str | None = None,
|
|
333
|
+
) -> tuple[list[FileChange], int, int]:
|
|
334
|
+
"""
|
|
335
|
+
Extract detailed file changes from git commits.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
commit_shas: List of commit SHAs to analyze
|
|
339
|
+
project_path: Path to git repo (optional, uses cwd)
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Tuple of (file_changes, total_insertions, total_deletions)
|
|
343
|
+
"""
|
|
344
|
+
try:
|
|
345
|
+
from git import Repo
|
|
346
|
+
from pathlib import Path
|
|
347
|
+
|
|
348
|
+
repo_path = Path(project_path) if project_path else Path.cwd()
|
|
349
|
+
repo = Repo(repo_path, search_parent_directories=True)
|
|
350
|
+
except Exception:
|
|
351
|
+
return [], 0, 0
|
|
352
|
+
|
|
353
|
+
# Aggregate file stats across all commits
|
|
354
|
+
file_stats: dict[str, dict] = {} # path -> {insertions, deletions, change_type}
|
|
355
|
+
total_ins = 0
|
|
356
|
+
total_del = 0
|
|
357
|
+
|
|
358
|
+
for sha in commit_shas:
|
|
359
|
+
try:
|
|
360
|
+
commit = repo.commit(sha)
|
|
361
|
+
|
|
362
|
+
# Get per-file stats
|
|
363
|
+
for file_path, stats in commit.stats.files.items():
|
|
364
|
+
ins = stats.get("insertions", 0)
|
|
365
|
+
dels = stats.get("deletions", 0)
|
|
366
|
+
|
|
367
|
+
if file_path not in file_stats:
|
|
368
|
+
file_stats[file_path] = {
|
|
369
|
+
"insertions": 0,
|
|
370
|
+
"deletions": 0,
|
|
371
|
+
"change_type": "modified",
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
file_stats[file_path]["insertions"] += ins
|
|
375
|
+
file_stats[file_path]["deletions"] += dels
|
|
376
|
+
total_ins += ins
|
|
377
|
+
total_del += dels
|
|
378
|
+
|
|
379
|
+
# Determine change type
|
|
380
|
+
if ins > 0 and dels == 0 and file_stats[file_path]["deletions"] == 0:
|
|
381
|
+
file_stats[file_path]["change_type"] = "added"
|
|
382
|
+
elif dels > 0 and ins == 0 and file_stats[file_path]["insertions"] == 0:
|
|
383
|
+
file_stats[file_path]["change_type"] = "deleted"
|
|
384
|
+
|
|
385
|
+
except Exception:
|
|
386
|
+
continue
|
|
387
|
+
|
|
388
|
+
# Convert to FileChange objects
|
|
389
|
+
file_changes = [
|
|
390
|
+
FileChange(
|
|
391
|
+
file_path=path,
|
|
392
|
+
change_type=stats["change_type"],
|
|
393
|
+
insertions=stats["insertions"],
|
|
394
|
+
deletions=stats["deletions"],
|
|
395
|
+
)
|
|
396
|
+
for path, stats in sorted(file_stats.items())
|
|
397
|
+
]
|
|
398
|
+
|
|
399
|
+
return file_changes, total_ins, total_del
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def extract_key_snippets_from_commits(
|
|
403
|
+
commit_shas: list[str],
|
|
404
|
+
project_path: str | None = None,
|
|
405
|
+
max_snippets: int = 3,
|
|
406
|
+
max_lines: int = 15,
|
|
407
|
+
) -> list[CodeSnippet]:
|
|
408
|
+
"""
|
|
409
|
+
Extract representative code snippets from commit diffs.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
commit_shas: List of commit SHAs
|
|
413
|
+
project_path: Path to git repo
|
|
414
|
+
max_snippets: Maximum number of snippets to return
|
|
415
|
+
max_lines: Maximum lines per snippet
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
List of CodeSnippet objects
|
|
419
|
+
"""
|
|
420
|
+
try:
|
|
421
|
+
from git import Repo
|
|
422
|
+
from pathlib import Path
|
|
423
|
+
|
|
424
|
+
repo_path = Path(project_path) if project_path else Path.cwd()
|
|
425
|
+
repo = Repo(repo_path, search_parent_directories=True)
|
|
426
|
+
except Exception:
|
|
427
|
+
return []
|
|
428
|
+
|
|
429
|
+
snippets = []
|
|
430
|
+
seen_files = set()
|
|
431
|
+
|
|
432
|
+
# Language detection by extension
|
|
433
|
+
ext_to_lang = {
|
|
434
|
+
".py": "python", ".js": "javascript", ".ts": "typescript",
|
|
435
|
+
".tsx": "tsx", ".jsx": "jsx", ".go": "go", ".rs": "rust",
|
|
436
|
+
".java": "java", ".rb": "ruby", ".php": "php", ".c": "c",
|
|
437
|
+
".cpp": "cpp", ".h": "c", ".hpp": "cpp", ".cs": "csharp",
|
|
438
|
+
".swift": "swift", ".kt": "kotlin", ".sql": "sql",
|
|
439
|
+
".sh": "bash", ".yaml": "yaml", ".yml": "yaml", ".json": "json",
|
|
440
|
+
".md": "markdown", ".html": "html", ".css": "css", ".scss": "scss",
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
for sha in commit_shas:
|
|
444
|
+
if len(snippets) >= max_snippets:
|
|
445
|
+
break
|
|
446
|
+
|
|
447
|
+
try:
|
|
448
|
+
commit = repo.commit(sha)
|
|
449
|
+
|
|
450
|
+
# Get diff with parent
|
|
451
|
+
if not commit.parents:
|
|
452
|
+
continue
|
|
453
|
+
parent = commit.parents[0]
|
|
454
|
+
|
|
455
|
+
for diff in parent.diff(commit, create_patch=True):
|
|
456
|
+
if len(snippets) >= max_snippets:
|
|
457
|
+
break
|
|
458
|
+
|
|
459
|
+
file_path = diff.b_path or diff.a_path
|
|
460
|
+
if not file_path or file_path in seen_files:
|
|
461
|
+
continue
|
|
462
|
+
|
|
463
|
+
# Skip binary and non-code files
|
|
464
|
+
ext = Path(file_path).suffix.lower()
|
|
465
|
+
if ext not in ext_to_lang:
|
|
466
|
+
continue
|
|
467
|
+
|
|
468
|
+
seen_files.add(file_path)
|
|
469
|
+
|
|
470
|
+
# Extract added lines from diff
|
|
471
|
+
try:
|
|
472
|
+
diff_text = diff.diff.decode("utf-8", errors="ignore")
|
|
473
|
+
except Exception:
|
|
474
|
+
continue
|
|
475
|
+
|
|
476
|
+
# Find added lines (lines starting with +, not ++)
|
|
477
|
+
added_lines = []
|
|
478
|
+
for line in diff_text.split("\n"):
|
|
479
|
+
if line.startswith("+") and not line.startswith("+++"):
|
|
480
|
+
added_lines.append(line[1:]) # Remove the + prefix
|
|
481
|
+
|
|
482
|
+
if not added_lines:
|
|
483
|
+
continue
|
|
484
|
+
|
|
485
|
+
# Take first max_lines of meaningful additions
|
|
486
|
+
content_lines = []
|
|
487
|
+
for line in added_lines:
|
|
488
|
+
stripped = line.strip()
|
|
489
|
+
# Skip empty lines, imports, and simple syntax
|
|
490
|
+
if stripped and not stripped.startswith(("import ", "from ", "#", "//", "/*", "*")):
|
|
491
|
+
content_lines.append(line)
|
|
492
|
+
if len(content_lines) >= max_lines:
|
|
493
|
+
break
|
|
494
|
+
|
|
495
|
+
if len(content_lines) < 2: # Need at least 2 meaningful lines
|
|
496
|
+
continue
|
|
497
|
+
|
|
498
|
+
snippets.append(CodeSnippet(
|
|
499
|
+
file_path=file_path,
|
|
500
|
+
language=ext_to_lang.get(ext, ""),
|
|
501
|
+
content="\n".join(content_lines),
|
|
502
|
+
line_count=len(content_lines),
|
|
503
|
+
context=f"Changes in {Path(file_path).name}",
|
|
504
|
+
))
|
|
505
|
+
|
|
506
|
+
except Exception:
|
|
507
|
+
continue
|
|
508
|
+
|
|
509
|
+
return snippets
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
# =============================================================================
|
|
513
|
+
# Synthesis Engine
|
|
514
|
+
# =============================================================================
|
|
515
|
+
|
|
516
|
+
class StorySynthesizer:
|
|
517
|
+
"""Synthesizes stories from commits using LLM."""
|
|
518
|
+
|
|
519
|
+
def __init__(
|
|
520
|
+
self,
|
|
521
|
+
api_key: str | None = None,
|
|
522
|
+
base_url: str | None = None,
|
|
523
|
+
model: str | None = None,
|
|
524
|
+
):
|
|
525
|
+
self.api_key = api_key
|
|
526
|
+
self.base_url = base_url
|
|
527
|
+
self._model_override = model # Explicit override
|
|
528
|
+
self._model: str | None = None # Resolved model (lazy)
|
|
529
|
+
self._client: OpenAI | None = None
|
|
530
|
+
|
|
531
|
+
@property
|
|
532
|
+
def model(self) -> str:
|
|
533
|
+
"""Get the model to use, reading from config if not set."""
|
|
534
|
+
if self._model is None:
|
|
535
|
+
self._model = self._resolve_model()
|
|
536
|
+
return self._model
|
|
537
|
+
|
|
538
|
+
@model.setter
|
|
539
|
+
def model(self, value: str):
|
|
540
|
+
"""Allow setting model directly."""
|
|
541
|
+
self._model = value
|
|
542
|
+
|
|
543
|
+
def _resolve_model(self) -> str:
|
|
544
|
+
"""Resolve model from override, config, or default."""
|
|
545
|
+
if self._model_override:
|
|
546
|
+
return self._model_override
|
|
547
|
+
|
|
548
|
+
try:
|
|
549
|
+
from .config import get_llm_config
|
|
550
|
+
llm_config = get_llm_config()
|
|
551
|
+
default_mode = llm_config.get("default", "local")
|
|
552
|
+
|
|
553
|
+
# Priority: synthesis_model > mode-specific model > default
|
|
554
|
+
if llm_config.get("synthesis_model"):
|
|
555
|
+
return llm_config["synthesis_model"]
|
|
556
|
+
|
|
557
|
+
# Use model based on configured default mode
|
|
558
|
+
if default_mode == "local" and llm_config.get("local_model"):
|
|
559
|
+
return llm_config["local_model"]
|
|
560
|
+
elif default_mode == "cloud" and llm_config.get("cloud_model"):
|
|
561
|
+
return llm_config["cloud_model"]
|
|
562
|
+
|
|
563
|
+
# Fallback to any configured model
|
|
564
|
+
if llm_config.get("local_model"):
|
|
565
|
+
return llm_config["local_model"]
|
|
566
|
+
if llm_config.get("cloud_model"):
|
|
567
|
+
return llm_config["cloud_model"]
|
|
568
|
+
except Exception:
|
|
569
|
+
pass
|
|
570
|
+
|
|
571
|
+
return "gpt-4o-mini" # Final fallback
|
|
572
|
+
|
|
573
|
+
def _get_client(self) -> OpenAI:
|
|
574
|
+
"""Get or create OpenAI client."""
|
|
575
|
+
if self._client is None:
|
|
576
|
+
import os
|
|
577
|
+
|
|
578
|
+
api_key = self.api_key
|
|
579
|
+
base_url = self.base_url
|
|
580
|
+
|
|
581
|
+
if not api_key:
|
|
582
|
+
try:
|
|
583
|
+
from .config import get_byok_config, get_llm_config, get_litellm_config
|
|
584
|
+
|
|
585
|
+
# Check BYOK first
|
|
586
|
+
byok = get_byok_config("openai")
|
|
587
|
+
if byok and byok.get("api_key"):
|
|
588
|
+
api_key = byok["api_key"]
|
|
589
|
+
base_url = base_url or byok.get("base_url")
|
|
590
|
+
|
|
591
|
+
# Check local LLM config
|
|
592
|
+
if not api_key:
|
|
593
|
+
llm_config = get_llm_config()
|
|
594
|
+
if llm_config.get("local_api_key"):
|
|
595
|
+
api_key = llm_config["local_api_key"]
|
|
596
|
+
base_url = base_url or llm_config.get("local_api_url")
|
|
597
|
+
|
|
598
|
+
# Check LiteLLM
|
|
599
|
+
if not api_key:
|
|
600
|
+
litellm_url, litellm_key = get_litellm_config()
|
|
601
|
+
if litellm_key:
|
|
602
|
+
api_key = litellm_key
|
|
603
|
+
base_url = base_url or litellm_url
|
|
604
|
+
except Exception:
|
|
605
|
+
pass
|
|
606
|
+
|
|
607
|
+
if not api_key:
|
|
608
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
609
|
+
|
|
610
|
+
if not api_key:
|
|
611
|
+
raise ValueError("No API key found. Configure via 'repr llm byok openai <key>'")
|
|
612
|
+
|
|
613
|
+
self._client = OpenAI(api_key=api_key, base_url=base_url)
|
|
614
|
+
|
|
615
|
+
return self._client
|
|
616
|
+
|
|
617
|
+
def _format_commits_for_prompt(self, commits: list[CommitData]) -> str:
|
|
618
|
+
"""Format commits for LLM prompt."""
|
|
619
|
+
lines = []
|
|
620
|
+
for c in commits:
|
|
621
|
+
lines.append(f"SHA: {c.sha[:8]}")
|
|
622
|
+
lines.append(f"Message: {c.message}")
|
|
623
|
+
lines.append(f"Files: {', '.join(c.files[:10])}")
|
|
624
|
+
if c.insertions or c.deletions:
|
|
625
|
+
lines.append(f"Changes: +{c.insertions}/-{c.deletions}")
|
|
626
|
+
lines.append("")
|
|
627
|
+
return "\n".join(lines)
|
|
628
|
+
|
|
629
|
+
async def synthesize_batch(
|
|
630
|
+
self,
|
|
631
|
+
commits: list[CommitData],
|
|
632
|
+
sessions: list[SessionContext] | None = None,
|
|
633
|
+
) -> tuple[list[Story], ContentIndex]:
|
|
634
|
+
"""
|
|
635
|
+
Synthesize stories from a batch of commits.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
commits: Commits to analyze (ordered by time)
|
|
639
|
+
sessions: Optional sessions to link
|
|
640
|
+
|
|
641
|
+
Returns:
|
|
642
|
+
Tuple of (stories, updated_index)
|
|
643
|
+
"""
|
|
644
|
+
if not commits:
|
|
645
|
+
return [], ContentIndex()
|
|
646
|
+
|
|
647
|
+
# Get LLM analysis (using sync client to avoid event loop cleanup issues)
|
|
648
|
+
client = self._get_client()
|
|
649
|
+
commits_text = self._format_commits_for_prompt(commits)
|
|
650
|
+
|
|
651
|
+
try:
|
|
652
|
+
response = client.chat.completions.create(
|
|
653
|
+
model=self.model.split("/")[-1] if "/" in self.model else self.model,
|
|
654
|
+
messages=[
|
|
655
|
+
{"role": "system", "content": STORY_SYNTHESIS_SYSTEM},
|
|
656
|
+
{"role": "user", "content": STORY_SYNTHESIS_USER.format(
|
|
657
|
+
commits_text=commits_text
|
|
658
|
+
)},
|
|
659
|
+
],
|
|
660
|
+
response_format={"type": "json_object"},
|
|
661
|
+
temperature=0.3,
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
content = response.choices[0].message.content
|
|
665
|
+
|
|
666
|
+
# Strip markdown code fences if present (many models wrap JSON in ```json blocks)
|
|
667
|
+
content = content.strip()
|
|
668
|
+
if content.startswith("```"):
|
|
669
|
+
# Remove opening fence
|
|
670
|
+
first_newline = content.find("\n")
|
|
671
|
+
if first_newline > 0:
|
|
672
|
+
content = content[first_newline + 1:]
|
|
673
|
+
# Remove closing fence
|
|
674
|
+
if content.endswith("```"):
|
|
675
|
+
content = content[:-3].rstrip()
|
|
676
|
+
|
|
677
|
+
# Debug: show raw response if REPR_DEBUG is set
|
|
678
|
+
import os
|
|
679
|
+
if os.environ.get("REPR_DEBUG"):
|
|
680
|
+
print(f"DEBUG: Raw LLM response ({len(content)} chars):")
|
|
681
|
+
print(content[:1000])
|
|
682
|
+
|
|
683
|
+
analysis = BatchAnalysis.model_validate_json(content)
|
|
684
|
+
|
|
685
|
+
except Exception as e:
|
|
686
|
+
# Log the error for debugging
|
|
687
|
+
import os
|
|
688
|
+
if os.environ.get("REPR_DEBUG"):
|
|
689
|
+
print(f"DEBUG: Exception in LLM call: {type(e).__name__}: {e}")
|
|
690
|
+
|
|
691
|
+
# Fallback: each commit is its own story
|
|
692
|
+
analysis = BatchAnalysis(stories=[
|
|
693
|
+
StoryBoundary(
|
|
694
|
+
commit_shas=[c.sha],
|
|
695
|
+
title=c.message.split("\n")[0][:80],
|
|
696
|
+
category="chore",
|
|
697
|
+
)
|
|
698
|
+
for c in commits
|
|
699
|
+
])
|
|
700
|
+
|
|
701
|
+
# Build commit lookup - support both full and prefix matching
|
|
702
|
+
commit_map = {c.sha: c for c in commits}
|
|
703
|
+
|
|
704
|
+
def find_commit_by_sha(sha: str) -> CommitData | None:
|
|
705
|
+
"""Find commit by full or prefix SHA."""
|
|
706
|
+
if sha in commit_map:
|
|
707
|
+
return commit_map[sha]
|
|
708
|
+
# Try prefix matching
|
|
709
|
+
for full_sha, commit in commit_map.items():
|
|
710
|
+
if full_sha.startswith(sha):
|
|
711
|
+
return commit
|
|
712
|
+
return None
|
|
713
|
+
|
|
714
|
+
# Create stories
|
|
715
|
+
now = datetime.now(timezone.utc)
|
|
716
|
+
stories = []
|
|
717
|
+
|
|
718
|
+
for boundary in analysis.stories:
|
|
719
|
+
# Get commits for this story (with prefix matching)
|
|
720
|
+
story_commits = []
|
|
721
|
+
matched_shas = []
|
|
722
|
+
for sha in boundary.commit_shas:
|
|
723
|
+
commit = find_commit_by_sha(sha)
|
|
724
|
+
if commit:
|
|
725
|
+
story_commits.append(commit)
|
|
726
|
+
matched_shas.append(commit.sha) # Use full SHA
|
|
727
|
+
|
|
728
|
+
if not story_commits:
|
|
729
|
+
continue
|
|
730
|
+
|
|
731
|
+
# Aggregate files
|
|
732
|
+
all_files = set()
|
|
733
|
+
for c in story_commits:
|
|
734
|
+
all_files.update(c.files)
|
|
735
|
+
|
|
736
|
+
# Calculate timespan
|
|
737
|
+
timestamps = [c.timestamp for c in story_commits]
|
|
738
|
+
started_at = min(timestamps)
|
|
739
|
+
ended_at = max(timestamps)
|
|
740
|
+
|
|
741
|
+
# Find linked sessions (by commit overlap)
|
|
742
|
+
linked_sessions = []
|
|
743
|
+
if sessions:
|
|
744
|
+
for session in sessions:
|
|
745
|
+
if any(sha in session.linked_commits for sha in boundary.commit_shas):
|
|
746
|
+
linked_sessions.append(session.session_id)
|
|
747
|
+
|
|
748
|
+
# Use LLM-extracted technologies, fall back to file-based detection
|
|
749
|
+
files_list = sorted(all_files)[:50] # Cap at 50 files
|
|
750
|
+
technologies = boundary.technologies if boundary.technologies else self._detect_tech_stack(files_list)
|
|
751
|
+
|
|
752
|
+
# Extract detailed file changes and snippets for recall
|
|
753
|
+
file_changes, total_ins, total_del = extract_file_changes_from_commits(matched_shas)
|
|
754
|
+
key_snippets = extract_key_snippets_from_commits(matched_shas, max_snippets=3)
|
|
755
|
+
|
|
756
|
+
# Deterministic ID based on sorted commit SHAs + title
|
|
757
|
+
# Include title to differentiate stories split from the same packed commit
|
|
758
|
+
sorted_shas = sorted(matched_shas)
|
|
759
|
+
id_input = f"repr-story-{'-'.join(sorted_shas)}-{boundary.title}"
|
|
760
|
+
story_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, id_input))
|
|
761
|
+
|
|
762
|
+
# Get author: profile username > GPG-derived mnemonic > git author > unknown
|
|
763
|
+
if identity := get_or_generate_username():
|
|
764
|
+
author_name = identity
|
|
765
|
+
elif story_commits and story_commits[0].author:
|
|
766
|
+
author_name = story_commits[0].author
|
|
767
|
+
else:
|
|
768
|
+
author_name = "unknown"
|
|
769
|
+
|
|
770
|
+
# Get email from first commit for Gravatar
|
|
771
|
+
author_email = story_commits[0].author_email if story_commits else ""
|
|
772
|
+
|
|
773
|
+
story = Story(
|
|
774
|
+
id=story_id,
|
|
775
|
+
created_at=now,
|
|
776
|
+
updated_at=now,
|
|
777
|
+
author_name=author_name,
|
|
778
|
+
author_email=author_email,
|
|
779
|
+
commit_shas=matched_shas, # Use full matched SHAs
|
|
780
|
+
session_ids=linked_sessions,
|
|
781
|
+
title=boundary.title,
|
|
782
|
+
problem=boundary.problem,
|
|
783
|
+
approach=boundary.approach,
|
|
784
|
+
implementation_details=boundary.implementation_details,
|
|
785
|
+
decisions=boundary.decisions,
|
|
786
|
+
tradeoffs=boundary.tradeoffs,
|
|
787
|
+
outcome=boundary.outcome,
|
|
788
|
+
lessons=boundary.lessons,
|
|
789
|
+
category=boundary.category,
|
|
790
|
+
technologies=technologies,
|
|
791
|
+
files=files_list,
|
|
792
|
+
started_at=started_at,
|
|
793
|
+
ended_at=ended_at,
|
|
794
|
+
diagram=boundary.diagram,
|
|
795
|
+
# Recall data
|
|
796
|
+
file_changes=file_changes,
|
|
797
|
+
key_snippets=key_snippets,
|
|
798
|
+
total_insertions=total_ins,
|
|
799
|
+
total_deletions=total_del,
|
|
800
|
+
)
|
|
801
|
+
stories.append(story)
|
|
802
|
+
|
|
803
|
+
# Build index for this batch
|
|
804
|
+
index = self._build_index(stories)
|
|
805
|
+
|
|
806
|
+
return stories, index
|
|
807
|
+
|
|
808
|
+
def _build_index(self, stories: list[Story]) -> ContentIndex:
|
|
809
|
+
"""Build content index from stories."""
|
|
810
|
+
index = ContentIndex(
|
|
811
|
+
last_updated=datetime.now(timezone.utc),
|
|
812
|
+
story_count=len(stories),
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
for story in stories:
|
|
816
|
+
# File → story mapping
|
|
817
|
+
for f in story.files:
|
|
818
|
+
if f not in index.files_to_stories:
|
|
819
|
+
index.files_to_stories[f] = []
|
|
820
|
+
index.files_to_stories[f].append(story.id)
|
|
821
|
+
|
|
822
|
+
# Keyword extraction (simple: from title and problem)
|
|
823
|
+
keywords = self._extract_keywords(story.title + " " + story.problem)
|
|
824
|
+
for kw in keywords:
|
|
825
|
+
if kw not in index.keywords_to_stories:
|
|
826
|
+
index.keywords_to_stories[kw] = []
|
|
827
|
+
index.keywords_to_stories[kw].append(story.id)
|
|
828
|
+
|
|
829
|
+
# Weekly index
|
|
830
|
+
if story.started_at:
|
|
831
|
+
week = story.started_at.strftime("%Y-W%W")
|
|
832
|
+
if week not in index.by_week:
|
|
833
|
+
index.by_week[week] = []
|
|
834
|
+
index.by_week[week].append(story.id)
|
|
835
|
+
|
|
836
|
+
# Story digest
|
|
837
|
+
index.story_digests.append(StoryDigest(
|
|
838
|
+
story_id=story.id,
|
|
839
|
+
title=story.title,
|
|
840
|
+
problem_keywords=keywords[:10],
|
|
841
|
+
files=story.files[:5],
|
|
842
|
+
tech_stack=self._detect_tech_stack(story.files),
|
|
843
|
+
category=story.category,
|
|
844
|
+
timestamp=story.started_at or story.created_at,
|
|
845
|
+
))
|
|
846
|
+
|
|
847
|
+
return index
|
|
848
|
+
|
|
849
|
+
def _extract_keywords(self, text: str) -> list[str]:
|
|
850
|
+
"""Extract keywords from text (simple approach)."""
|
|
851
|
+
import re
|
|
852
|
+
|
|
853
|
+
# Split on non-word chars, lowercase, filter short words
|
|
854
|
+
words = re.findall(r'\b[a-z]+\b', text.lower())
|
|
855
|
+
stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'for', 'to', 'in', 'on', 'of', 'and', 'or', 'with', 'from'}
|
|
856
|
+
keywords = [w for w in words if len(w) > 2 and w not in stopwords]
|
|
857
|
+
|
|
858
|
+
# Dedupe while preserving order
|
|
859
|
+
seen = set()
|
|
860
|
+
result = []
|
|
861
|
+
for kw in keywords:
|
|
862
|
+
if kw not in seen:
|
|
863
|
+
seen.add(kw)
|
|
864
|
+
result.append(kw)
|
|
865
|
+
|
|
866
|
+
return result
|
|
867
|
+
|
|
868
|
+
def _detect_tech_stack(self, files: list[str]) -> list[str]:
|
|
869
|
+
"""Detect technologies from file extensions/names."""
|
|
870
|
+
tech = set()
|
|
871
|
+
|
|
872
|
+
ext_map = {
|
|
873
|
+
'.py': 'Python',
|
|
874
|
+
'.ts': 'TypeScript',
|
|
875
|
+
'.tsx': 'React',
|
|
876
|
+
'.js': 'JavaScript',
|
|
877
|
+
'.jsx': 'React',
|
|
878
|
+
'.go': 'Go',
|
|
879
|
+
'.rs': 'Rust',
|
|
880
|
+
'.vue': 'Vue',
|
|
881
|
+
'.sql': 'SQL',
|
|
882
|
+
'.prisma': 'Prisma',
|
|
883
|
+
'.graphql': 'GraphQL',
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
file_map = {
|
|
887
|
+
'Dockerfile': 'Docker',
|
|
888
|
+
'docker-compose': 'Docker',
|
|
889
|
+
'package.json': 'Node.js',
|
|
890
|
+
'pyproject.toml': 'Python',
|
|
891
|
+
'Cargo.toml': 'Rust',
|
|
892
|
+
'go.mod': 'Go',
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
for f in files:
|
|
896
|
+
# Check extensions
|
|
897
|
+
for ext, name in ext_map.items():
|
|
898
|
+
if f.endswith(ext):
|
|
899
|
+
tech.add(name)
|
|
900
|
+
|
|
901
|
+
# Check filenames
|
|
902
|
+
for fname, name in file_map.items():
|
|
903
|
+
if fname in f:
|
|
904
|
+
tech.add(name)
|
|
905
|
+
|
|
906
|
+
return sorted(tech)
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
# =============================================================================
|
|
910
|
+
# Convenience Functions
|
|
911
|
+
# =============================================================================
|
|
912
|
+
|
|
913
|
+
async def synthesize_stories(
|
|
914
|
+
commits: list[CommitData],
|
|
915
|
+
sessions: list[SessionContext] | None = None,
|
|
916
|
+
api_key: str | None = None,
|
|
917
|
+
model: str = "gpt-4o-mini",
|
|
918
|
+
batch_size: int = 25,
|
|
919
|
+
progress_callback: Callable[[int, int], None] | None = None,
|
|
920
|
+
) -> tuple[list[Story], ContentIndex]:
|
|
921
|
+
"""
|
|
922
|
+
Synthesize stories from commits with batching.
|
|
923
|
+
|
|
924
|
+
Args:
|
|
925
|
+
commits: All commits to process
|
|
926
|
+
sessions: Optional sessions for enrichment
|
|
927
|
+
api_key: API key for LLM
|
|
928
|
+
model: Model to use
|
|
929
|
+
batch_size: Commits per batch
|
|
930
|
+
progress_callback: Optional progress callback(current, total)
|
|
931
|
+
|
|
932
|
+
Returns:
|
|
933
|
+
Tuple of (all_stories, merged_index)
|
|
934
|
+
"""
|
|
935
|
+
synthesizer = StorySynthesizer(api_key=api_key, model=model)
|
|
936
|
+
|
|
937
|
+
all_stories = []
|
|
938
|
+
merged_index = ContentIndex(last_updated=datetime.now(timezone.utc))
|
|
939
|
+
|
|
940
|
+
# Process in batches
|
|
941
|
+
total_batches = (len(commits) + batch_size - 1) // batch_size
|
|
942
|
+
|
|
943
|
+
for i in range(0, len(commits), batch_size):
|
|
944
|
+
batch = commits[i:i + batch_size]
|
|
945
|
+
batch_num = i // batch_size + 1
|
|
946
|
+
|
|
947
|
+
if progress_callback:
|
|
948
|
+
progress_callback(batch_num, total_batches)
|
|
949
|
+
|
|
950
|
+
stories, index = await synthesizer.synthesize_batch(batch, sessions)
|
|
951
|
+
all_stories.extend(stories)
|
|
952
|
+
|
|
953
|
+
# Merge index
|
|
954
|
+
for f, story_ids in index.files_to_stories.items():
|
|
955
|
+
if f not in merged_index.files_to_stories:
|
|
956
|
+
merged_index.files_to_stories[f] = []
|
|
957
|
+
merged_index.files_to_stories[f].extend(story_ids)
|
|
958
|
+
|
|
959
|
+
for kw, story_ids in index.keywords_to_stories.items():
|
|
960
|
+
if kw not in merged_index.keywords_to_stories:
|
|
961
|
+
merged_index.keywords_to_stories[kw] = []
|
|
962
|
+
merged_index.keywords_to_stories[kw].extend(story_ids)
|
|
963
|
+
|
|
964
|
+
for week, story_ids in index.by_week.items():
|
|
965
|
+
if week not in merged_index.by_week:
|
|
966
|
+
merged_index.by_week[week] = []
|
|
967
|
+
merged_index.by_week[week].extend(story_ids)
|
|
968
|
+
|
|
969
|
+
merged_index.story_digests.extend(index.story_digests)
|
|
970
|
+
|
|
971
|
+
merged_index.story_count = len(all_stories)
|
|
972
|
+
|
|
973
|
+
return all_stories, merged_index
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
def synthesize_stories_sync(
|
|
977
|
+
commits: list[CommitData],
|
|
978
|
+
sessions: list[SessionContext] | None = None,
|
|
979
|
+
**kwargs,
|
|
980
|
+
) -> tuple[list[Story], ContentIndex]:
|
|
981
|
+
"""Synchronous wrapper for synthesize_stories."""
|
|
982
|
+
return asyncio.run(synthesize_stories(commits, sessions, **kwargs))
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
# =============================================================================
|
|
986
|
+
# Public/Internal Story Transformation
|
|
987
|
+
# =============================================================================
|
|
988
|
+
|
|
989
|
+
class PublicStory(BaseModel):
|
|
990
|
+
"""LLM output for public-facing story (Tripartite Codex)."""
|
|
991
|
+
hook: str = Field(description="Engagement hook, <60 chars")
|
|
992
|
+
what: str = Field(description="Behavioral primitive - observable change")
|
|
993
|
+
value: str = Field(description="External why - user/stakeholder value")
|
|
994
|
+
insight: str = Field(description="Transferable engineering lesson")
|
|
995
|
+
show: str | None = Field(default=None, description="Optional visual/code block")
|
|
996
|
+
post_body: str = Field(description="Final natural post text (2–5 sentences)")
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
class InternalStory(BaseModel):
|
|
1000
|
+
"""LLM output for internal story with full technical context."""
|
|
1001
|
+
hook: str = Field(description="Engagement hook, <60 chars")
|
|
1002
|
+
what: str = Field(description="Behavioral primitive - observable change")
|
|
1003
|
+
value: str = Field(description="External why - user/stakeholder value")
|
|
1004
|
+
problem: str = Field(default="", description="Internal why - what was broken/missing")
|
|
1005
|
+
how: list[str] = Field(default_factory=list, description="Implementation details")
|
|
1006
|
+
insight: str = Field(description="Transferable engineering lesson")
|
|
1007
|
+
show: str | None = Field(default=None, description="Optional visual/code block")
|
|
1008
|
+
post_body: str = Field(description="Final natural internal update (3–6 sentences)")
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
async def transform_story_for_feed(
|
|
1012
|
+
story: Story,
|
|
1013
|
+
mode: str = "public",
|
|
1014
|
+
api_key: str | None = None,
|
|
1015
|
+
base_url: str | None = None,
|
|
1016
|
+
model: str | None = None,
|
|
1017
|
+
) -> PublicStory | InternalStory:
|
|
1018
|
+
"""
|
|
1019
|
+
Transform a technical story into a build-in-public feed post.
|
|
1020
|
+
|
|
1021
|
+
Args:
|
|
1022
|
+
story: The Story to transform
|
|
1023
|
+
mode: "public" (impact only) or "internal" (with technical details)
|
|
1024
|
+
api_key: Optional API key
|
|
1025
|
+
base_url: Optional base URL for API
|
|
1026
|
+
model: Optional model name
|
|
1027
|
+
|
|
1028
|
+
Returns:
|
|
1029
|
+
PublicStory or InternalStory depending on mode
|
|
1030
|
+
"""
|
|
1031
|
+
synthesizer = StorySynthesizer(api_key=api_key, base_url=base_url, model=model)
|
|
1032
|
+
client = synthesizer._get_client()
|
|
1033
|
+
model_name = synthesizer.model
|
|
1034
|
+
|
|
1035
|
+
# Format implementation details
|
|
1036
|
+
impl_details = "\n".join(f"- {d}" for d in story.implementation_details) if story.implementation_details else "None"
|
|
1037
|
+
decisions = "\n".join(f"- {d}" for d in story.decisions) if story.decisions else "None"
|
|
1038
|
+
files = ", ".join(story.files[:10]) if story.files else "None"
|
|
1039
|
+
|
|
1040
|
+
if mode == "public":
|
|
1041
|
+
system_prompt = PUBLIC_STORY_SYSTEM
|
|
1042
|
+
user_prompt = PUBLIC_STORY_USER.format(
|
|
1043
|
+
title=story.title,
|
|
1044
|
+
category=story.category,
|
|
1045
|
+
problem=story.problem or "Not specified",
|
|
1046
|
+
approach=story.approach or "Not specified",
|
|
1047
|
+
outcome=story.outcome or "Not specified",
|
|
1048
|
+
implementation_details=impl_details,
|
|
1049
|
+
)
|
|
1050
|
+
response_model = PublicStory
|
|
1051
|
+
else:
|
|
1052
|
+
system_prompt = INTERNAL_STORY_SYSTEM
|
|
1053
|
+
user_prompt = INTERNAL_STORY_USER.format(
|
|
1054
|
+
title=story.title,
|
|
1055
|
+
category=story.category,
|
|
1056
|
+
problem=story.problem or "Not specified",
|
|
1057
|
+
approach=story.approach or "Not specified",
|
|
1058
|
+
outcome=story.outcome or "Not specified",
|
|
1059
|
+
implementation_details=impl_details,
|
|
1060
|
+
decisions=decisions,
|
|
1061
|
+
files=files,
|
|
1062
|
+
)
|
|
1063
|
+
response_model = InternalStory
|
|
1064
|
+
|
|
1065
|
+
try:
|
|
1066
|
+
# Use sync client to avoid event loop cleanup issues
|
|
1067
|
+
response = client.chat.completions.create(
|
|
1068
|
+
model=model_name.split("/")[-1] if "/" in model_name else model_name,
|
|
1069
|
+
messages=[
|
|
1070
|
+
{"role": "system", "content": system_prompt},
|
|
1071
|
+
{"role": "user", "content": user_prompt},
|
|
1072
|
+
],
|
|
1073
|
+
response_format={"type": "json_object"},
|
|
1074
|
+
temperature=0.7,
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
content = response.choices[0].message.content.strip()
|
|
1078
|
+
|
|
1079
|
+
# Strip markdown code fences if present
|
|
1080
|
+
if content.startswith("```"):
|
|
1081
|
+
first_newline = content.find("\n")
|
|
1082
|
+
if first_newline > 0:
|
|
1083
|
+
content = content[first_newline + 1:]
|
|
1084
|
+
if content.endswith("```"):
|
|
1085
|
+
content = content[:-3].rstrip()
|
|
1086
|
+
|
|
1087
|
+
result = response_model.model_validate_json(content)
|
|
1088
|
+
|
|
1089
|
+
# Quality check: if hook is empty or too generic, or post_body is empty/short, regenerate
|
|
1090
|
+
if not result.hook or len(result.hook) < 10:
|
|
1091
|
+
result = _enhance_with_fallback(result, story, mode)
|
|
1092
|
+
elif not result.post_body or len(result.post_body.strip()) < 40:
|
|
1093
|
+
result = _enhance_with_fallback(result, story, mode)
|
|
1094
|
+
|
|
1095
|
+
return result
|
|
1096
|
+
|
|
1097
|
+
except Exception as e:
|
|
1098
|
+
# Fallback: construct structured content from available data
|
|
1099
|
+
return _build_fallback_codex(story, mode)
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
def _build_post_body_public(hook: str, what: str, value: str, insight: str) -> str:
|
|
1103
|
+
"""Build natural post body for public mode."""
|
|
1104
|
+
what_clean = what.rstrip(".").rstrip()
|
|
1105
|
+
value_clean = value.lstrip(".").lstrip()
|
|
1106
|
+
return (
|
|
1107
|
+
f"{hook}\n\n"
|
|
1108
|
+
f"{what_clean}. {value_clean}\n"
|
|
1109
|
+
f"{insight}"
|
|
1110
|
+
).strip()
|
|
1111
|
+
|
|
1112
|
+
|
|
1113
|
+
def _build_post_body_internal(hook: str, problem: str, what: str, how: list[str], insight: str) -> str:
|
|
1114
|
+
"""Build natural post body for internal mode."""
|
|
1115
|
+
detail = how[0] if how else ""
|
|
1116
|
+
what_clean = what.rstrip(".").rstrip()
|
|
1117
|
+
body = f"{hook}\n\n{problem}\n\n{what_clean}."
|
|
1118
|
+
if detail:
|
|
1119
|
+
detail_clean = detail.rstrip(".").rstrip()
|
|
1120
|
+
body += f" First change: {detail_clean}."
|
|
1121
|
+
body += f" {insight}"
|
|
1122
|
+
return body.strip()
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
def _build_fallback_codex(story: Story, mode: str) -> PublicStory | InternalStory:
|
|
1126
|
+
"""Build structured Tripartite Codex content when LLM fails."""
|
|
1127
|
+
import random
|
|
1128
|
+
|
|
1129
|
+
# Hook variations by category
|
|
1130
|
+
category_hooks = {
|
|
1131
|
+
"feature": [
|
|
1132
|
+
"Finally built the thing.",
|
|
1133
|
+
"New capability unlocked.",
|
|
1134
|
+
"This changes everything. (Well, something.)",
|
|
1135
|
+
"Shipped it.",
|
|
1136
|
+
],
|
|
1137
|
+
"bugfix": [
|
|
1138
|
+
"One less thing to worry about.",
|
|
1139
|
+
"The bug is dead. Long live the code.",
|
|
1140
|
+
"Found it. Fixed it. Done.",
|
|
1141
|
+
"That crash? Gone.",
|
|
1142
|
+
],
|
|
1143
|
+
"refactor": [
|
|
1144
|
+
"Same behavior. Better code.",
|
|
1145
|
+
"Future me will thank present me.",
|
|
1146
|
+
"Cleaned up the mess.",
|
|
1147
|
+
"Technical debt: paid.",
|
|
1148
|
+
],
|
|
1149
|
+
"perf": [
|
|
1150
|
+
"Faster now.",
|
|
1151
|
+
"Speed boost shipped.",
|
|
1152
|
+
"Shaved off the milliseconds.",
|
|
1153
|
+
"Performance win.",
|
|
1154
|
+
],
|
|
1155
|
+
"infra": [
|
|
1156
|
+
"Infrastructure that just works.",
|
|
1157
|
+
"Set it up. Forgot about it.",
|
|
1158
|
+
"The plumbing nobody sees.",
|
|
1159
|
+
"Foundation laid.",
|
|
1160
|
+
],
|
|
1161
|
+
"docs": [
|
|
1162
|
+
"Wrote it down so I won't forget.",
|
|
1163
|
+
"Documentation: the async communication.",
|
|
1164
|
+
"Now it's not just in my head.",
|
|
1165
|
+
"Future onboarding: simplified.",
|
|
1166
|
+
],
|
|
1167
|
+
"test": [
|
|
1168
|
+
"Now I can refactor with confidence.",
|
|
1169
|
+
"Tests: the safety net.",
|
|
1170
|
+
"Covered.",
|
|
1171
|
+
"One more thing that won't break silently.",
|
|
1172
|
+
],
|
|
1173
|
+
"chore": [
|
|
1174
|
+
"Housekeeping done.",
|
|
1175
|
+
"Small fix. Big relief.",
|
|
1176
|
+
"Maintenance mode.",
|
|
1177
|
+
"Keeping things tidy.",
|
|
1178
|
+
],
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
category_insights = {
|
|
1182
|
+
"feature": "New capabilities unlock new possibilities.",
|
|
1183
|
+
"bugfix": "Fewer edge cases mean more reliable software.",
|
|
1184
|
+
"refactor": "Cleaner code is easier to extend.",
|
|
1185
|
+
"perf": "Performance gains compound over time.",
|
|
1186
|
+
"infra": "Good infrastructure is invisible until it's missing.",
|
|
1187
|
+
"docs": "Documentation is a gift to your future self.",
|
|
1188
|
+
"test": "Tests are the safety net that enables bold changes.",
|
|
1189
|
+
"chore": "Small maintenance prevents big problems.",
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
hooks = category_hooks.get(story.category, category_hooks["chore"])
|
|
1193
|
+
hook = random.choice(hooks)
|
|
1194
|
+
|
|
1195
|
+
# Build what from title
|
|
1196
|
+
what = story.title.rstrip(".")
|
|
1197
|
+
|
|
1198
|
+
# Build value from outcome or generate
|
|
1199
|
+
value = story.outcome if story.outcome else f"Improves the {story.category} workflow."
|
|
1200
|
+
|
|
1201
|
+
# Build insight
|
|
1202
|
+
insight = category_insights.get(story.category, "Incremental progress adds up.")
|
|
1203
|
+
|
|
1204
|
+
if mode == "public":
|
|
1205
|
+
post_body = _build_post_body_public(hook, what, value, insight)
|
|
1206
|
+
return PublicStory(
|
|
1207
|
+
hook=hook,
|
|
1208
|
+
what=what,
|
|
1209
|
+
value=value,
|
|
1210
|
+
insight=insight,
|
|
1211
|
+
show=None,
|
|
1212
|
+
post_body=post_body,
|
|
1213
|
+
)
|
|
1214
|
+
else:
|
|
1215
|
+
problem = story.problem or "Needed improvement."
|
|
1216
|
+
how = story.implementation_details or []
|
|
1217
|
+
post_body = _build_post_body_internal(hook, problem, what, how, insight)
|
|
1218
|
+
return InternalStory(
|
|
1219
|
+
hook=hook,
|
|
1220
|
+
what=what,
|
|
1221
|
+
value=value,
|
|
1222
|
+
problem=problem,
|
|
1223
|
+
how=how,
|
|
1224
|
+
insight=insight,
|
|
1225
|
+
show=None,
|
|
1226
|
+
post_body=post_body,
|
|
1227
|
+
)
|
|
1228
|
+
|
|
1229
|
+
|
|
1230
|
+
def _enhance_with_fallback(result: PublicStory | InternalStory, story: Story, mode: str) -> PublicStory | InternalStory:
|
|
1231
|
+
"""Enhance a weak LLM result with fallback data."""
|
|
1232
|
+
import random
|
|
1233
|
+
|
|
1234
|
+
category_hooks = {
|
|
1235
|
+
"feature": ["Finally built the thing.", "New capability unlocked.", "Shipped it."],
|
|
1236
|
+
"bugfix": ["One less thing to worry about.", "Found it. Fixed it.", "That crash? Gone."],
|
|
1237
|
+
"refactor": ["Same behavior. Better code.", "Technical debt: paid.", "Cleaned up."],
|
|
1238
|
+
"perf": ["Faster now.", "Speed boost shipped.", "Performance win."],
|
|
1239
|
+
"infra": ["Infrastructure that works.", "Foundation laid.", "Set up and running."],
|
|
1240
|
+
"docs": ["Wrote it down.", "Now it's documented.", "Future-proofed the knowledge."],
|
|
1241
|
+
"test": ["Now I can refactor safely.", "Covered.", "Tests added."],
|
|
1242
|
+
"chore": ["Housekeeping done.", "Small fix. Big relief.", "Tidied up."],
|
|
1243
|
+
}
|
|
1244
|
+
|
|
1245
|
+
hooks = category_hooks.get(story.category, ["Done."])
|
|
1246
|
+
new_hook = random.choice(hooks)
|
|
1247
|
+
|
|
1248
|
+
if mode == "public":
|
|
1249
|
+
what = result.what or story.title
|
|
1250
|
+
value = result.value or story.outcome or "Improvement shipped."
|
|
1251
|
+
insight = result.insight or "Progress is progress."
|
|
1252
|
+
post_body = getattr(result, "post_body", "") or _build_post_body_public(
|
|
1253
|
+
new_hook, what, value, insight
|
|
1254
|
+
)
|
|
1255
|
+
return PublicStory(
|
|
1256
|
+
hook=new_hook,
|
|
1257
|
+
what=what,
|
|
1258
|
+
value=value,
|
|
1259
|
+
insight=insight,
|
|
1260
|
+
show=result.show,
|
|
1261
|
+
post_body=post_body,
|
|
1262
|
+
)
|
|
1263
|
+
else:
|
|
1264
|
+
what = result.what or story.title
|
|
1265
|
+
problem = result.problem if hasattr(result, 'problem') and result.problem else (story.problem or "")
|
|
1266
|
+
how = result.how if hasattr(result, 'how') and result.how else (story.implementation_details or [])
|
|
1267
|
+
insight = result.insight or "Progress is progress."
|
|
1268
|
+
post_body = getattr(result, "post_body", "") or _build_post_body_internal(
|
|
1269
|
+
new_hook, problem, what, how, insight
|
|
1270
|
+
)
|
|
1271
|
+
return InternalStory(
|
|
1272
|
+
hook=new_hook,
|
|
1273
|
+
what=what,
|
|
1274
|
+
value=result.value or story.outcome or "Improvement shipped.",
|
|
1275
|
+
problem=problem,
|
|
1276
|
+
how=how,
|
|
1277
|
+
insight=insight,
|
|
1278
|
+
show=result.show,
|
|
1279
|
+
post_body=post_body,
|
|
1280
|
+
)
|
|
1281
|
+
|
|
1282
|
+
|
|
1283
|
+
# Legacy function for backward compatibility
|
|
1284
|
+
def _build_fallback_post(story: Story) -> str:
|
|
1285
|
+
"""Build a legacy post string from story data."""
|
|
1286
|
+
result = _build_fallback_codex(story, "public")
|
|
1287
|
+
return result.post_body
|
|
1288
|
+
|
|
1289
|
+
|
|
1290
|
+
def transform_story_for_feed_sync(
|
|
1291
|
+
story: Story,
|
|
1292
|
+
mode: str = "public",
|
|
1293
|
+
**kwargs,
|
|
1294
|
+
) -> PublicStory | InternalStory:
|
|
1295
|
+
"""Synchronous wrapper for transform_story_for_feed."""
|
|
1296
|
+
return asyncio.run(transform_story_for_feed(story, mode, **kwargs))
|