repr-cli 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repr/__init__.py +1 -1
- repr/api.py +363 -62
- repr/auth.py +47 -38
- repr/change_synthesis.py +478 -0
- repr/cli.py +4099 -280
- repr/config.py +119 -11
- repr/configure.py +889 -0
- repr/cron.py +419 -0
- repr/dashboard/__init__.py +9 -0
- repr/dashboard/build.py +126 -0
- repr/dashboard/dist/assets/index-BYFVbEev.css +1 -0
- repr/dashboard/dist/assets/index-BrrhyJFO.css +1 -0
- repr/dashboard/dist/assets/index-CcEg74ts.js +270 -0
- repr/dashboard/dist/assets/index-Cerc-iA_.js +377 -0
- repr/dashboard/dist/assets/index-CjVcBW2L.css +1 -0
- repr/dashboard/dist/assets/index-Dfl3mR5E.js +377 -0
- repr/dashboard/dist/favicon.svg +4 -0
- repr/dashboard/dist/index.html +14 -0
- repr/dashboard/manager.py +234 -0
- repr/dashboard/server.py +1298 -0
- repr/db.py +980 -0
- repr/hooks.py +3 -2
- repr/loaders/__init__.py +22 -0
- repr/loaders/base.py +156 -0
- repr/loaders/claude_code.py +287 -0
- repr/loaders/clawdbot.py +313 -0
- repr/loaders/gemini_antigravity.py +381 -0
- repr/mcp_server.py +1196 -0
- repr/models.py +503 -0
- repr/openai_analysis.py +25 -0
- repr/session_extractor.py +481 -0
- repr/storage.py +328 -0
- repr/story_synthesis.py +1296 -0
- repr/templates.py +68 -4
- repr/timeline.py +710 -0
- repr/tools.py +17 -8
- {repr_cli-0.2.16.dist-info → repr_cli-0.2.17.dist-info}/METADATA +48 -10
- repr_cli-0.2.17.dist-info/RECORD +52 -0
- {repr_cli-0.2.16.dist-info → repr_cli-0.2.17.dist-info}/WHEEL +1 -1
- {repr_cli-0.2.16.dist-info → repr_cli-0.2.17.dist-info}/entry_points.txt +1 -0
- repr_cli-0.2.16.dist-info/RECORD +0 -26
- {repr_cli-0.2.16.dist-info → repr_cli-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {repr_cli-0.2.16.dist-info → repr_cli-0.2.17.dist-info}/top_level.txt +0 -0
repr/models.py
ADDED
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data models for session integration.
|
|
3
|
+
|
|
4
|
+
These models support multi-source context extraction from:
|
|
5
|
+
- Git commits (existing)
|
|
6
|
+
- AI session logs (Claude Code, Clawdbot)
|
|
7
|
+
- Unified timeline (merged view)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import Literal
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# =============================================================================
|
|
19
|
+
# Session Models
|
|
20
|
+
# =============================================================================
|
|
21
|
+
|
|
22
|
+
class ContentBlockType(str, Enum):
|
|
23
|
+
"""Type of content block within a message."""
|
|
24
|
+
TEXT = "text"
|
|
25
|
+
TOOL_CALL = "toolCall"
|
|
26
|
+
TOOL_RESULT = "toolResult"
|
|
27
|
+
THINKING = "thinking"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class ContentBlock:
|
|
32
|
+
"""Content within a message."""
|
|
33
|
+
type: ContentBlockType
|
|
34
|
+
text: str | None = None
|
|
35
|
+
name: str | None = None # tool name if toolCall
|
|
36
|
+
input: dict | None = None # tool input if toolCall
|
|
37
|
+
|
|
38
|
+
def __post_init__(self):
|
|
39
|
+
if isinstance(self.type, str):
|
|
40
|
+
self.type = ContentBlockType(self.type)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class MessageRole(str, Enum):
|
|
44
|
+
"""Role of a message sender."""
|
|
45
|
+
USER = "user"
|
|
46
|
+
ASSISTANT = "assistant"
|
|
47
|
+
SYSTEM = "system"
|
|
48
|
+
TOOL_RESULT = "toolResult"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class SessionMessage:
|
|
53
|
+
"""A single message in an AI session."""
|
|
54
|
+
timestamp: datetime
|
|
55
|
+
role: MessageRole
|
|
56
|
+
content: list[ContentBlock]
|
|
57
|
+
uuid: str | None = None
|
|
58
|
+
|
|
59
|
+
def __post_init__(self):
|
|
60
|
+
if isinstance(self.role, str):
|
|
61
|
+
self.role = MessageRole(self.role)
|
|
62
|
+
if isinstance(self.timestamp, str):
|
|
63
|
+
self.timestamp = datetime.fromisoformat(self.timestamp.replace("Z", "+00:00"))
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def text_content(self) -> str:
|
|
67
|
+
"""Get concatenated text content from all text blocks."""
|
|
68
|
+
texts = []
|
|
69
|
+
for block in self.content:
|
|
70
|
+
if block.type == ContentBlockType.TEXT and block.text:
|
|
71
|
+
texts.append(block.text)
|
|
72
|
+
return "\n".join(texts)
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def tool_calls(self) -> list[ContentBlock]:
|
|
76
|
+
"""Get all tool call blocks."""
|
|
77
|
+
return [b for b in self.content if b.type == ContentBlockType.TOOL_CALL]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class Session:
|
|
82
|
+
"""A complete AI session."""
|
|
83
|
+
id: str
|
|
84
|
+
started_at: datetime
|
|
85
|
+
ended_at: datetime | None
|
|
86
|
+
channel: str # "cli", "slack", "web", "telegram", etc.
|
|
87
|
+
messages: list[SessionMessage]
|
|
88
|
+
cwd: str | None = None
|
|
89
|
+
git_branch: str | None = None
|
|
90
|
+
model: str | None = None
|
|
91
|
+
|
|
92
|
+
# Derived (computed lazily)
|
|
93
|
+
_tools_used: list[str] | None = field(default=None, repr=False)
|
|
94
|
+
_files_touched: list[str] | None = field(default=None, repr=False)
|
|
95
|
+
|
|
96
|
+
def __post_init__(self):
|
|
97
|
+
if isinstance(self.started_at, str):
|
|
98
|
+
self.started_at = datetime.fromisoformat(self.started_at.replace("Z", "+00:00"))
|
|
99
|
+
if self.ended_at and isinstance(self.ended_at, str):
|
|
100
|
+
self.ended_at = datetime.fromisoformat(self.ended_at.replace("Z", "+00:00"))
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def tools_used(self) -> list[str]:
|
|
104
|
+
"""Get list of unique tools used in this session."""
|
|
105
|
+
if self._tools_used is None:
|
|
106
|
+
tools = set()
|
|
107
|
+
for msg in self.messages:
|
|
108
|
+
for block in msg.content:
|
|
109
|
+
if block.type == ContentBlockType.TOOL_CALL and block.name:
|
|
110
|
+
tools.add(block.name)
|
|
111
|
+
self._tools_used = sorted(tools)
|
|
112
|
+
return self._tools_used
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def files_touched(self) -> list[str]:
|
|
116
|
+
"""Get list of files touched in this session (from tool calls)."""
|
|
117
|
+
if self._files_touched is None:
|
|
118
|
+
files = set()
|
|
119
|
+
file_tools = {"Read", "Write", "Edit", "read_file", "write_file", "edit_file"}
|
|
120
|
+
for msg in self.messages:
|
|
121
|
+
for block in msg.content:
|
|
122
|
+
if block.type == ContentBlockType.TOOL_CALL and block.name in file_tools:
|
|
123
|
+
if block.input:
|
|
124
|
+
# Try common parameter names for file paths
|
|
125
|
+
for key in ["path", "file_path", "filePath", "filename"]:
|
|
126
|
+
if key in block.input and block.input[key]:
|
|
127
|
+
files.add(block.input[key])
|
|
128
|
+
self._files_touched = sorted(files)
|
|
129
|
+
return self._files_touched
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def duration_seconds(self) -> float | None:
|
|
133
|
+
"""Get session duration in seconds."""
|
|
134
|
+
if self.ended_at:
|
|
135
|
+
return (self.ended_at - self.started_at).total_seconds()
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def message_count(self) -> int:
|
|
140
|
+
"""Get total message count."""
|
|
141
|
+
return len(self.messages)
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def user_message_count(self) -> int:
|
|
145
|
+
"""Get user message count."""
|
|
146
|
+
return sum(1 for m in self.messages if m.role == MessageRole.USER)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# =============================================================================
|
|
150
|
+
# Context Models (LLM-extracted)
|
|
151
|
+
# =============================================================================
|
|
152
|
+
|
|
153
|
+
class SessionContext(BaseModel):
|
|
154
|
+
"""Extracted context from a session (LLM-generated)."""
|
|
155
|
+
session_id: str
|
|
156
|
+
timestamp: datetime
|
|
157
|
+
|
|
158
|
+
# Core context
|
|
159
|
+
problem: str = Field(description="What was the user trying to solve?")
|
|
160
|
+
approach: str = Field(description="What strategy/pattern was used?")
|
|
161
|
+
decisions: list[str] = Field(default_factory=list, description="Key decisions made")
|
|
162
|
+
files_modified: list[str] = Field(default_factory=list, description="Files that were modified")
|
|
163
|
+
tools_used: list[str] = Field(default_factory=list, description="Tools that were used")
|
|
164
|
+
outcome: str = Field(description="Did it work? What was the result?")
|
|
165
|
+
lessons: list[str] = Field(default_factory=list, description="Gotchas, learnings")
|
|
166
|
+
|
|
167
|
+
# Linking
|
|
168
|
+
linked_commits: list[str] = Field(default_factory=list, description="Commit SHAs matched by time/files")
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# =============================================================================
|
|
172
|
+
# Timeline Models
|
|
173
|
+
# =============================================================================
|
|
174
|
+
|
|
175
|
+
class TimelineEntryType(str, Enum):
|
|
176
|
+
"""Type of timeline entry."""
|
|
177
|
+
COMMIT = "commit"
|
|
178
|
+
SESSION = "session"
|
|
179
|
+
MERGED = "merged"
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@dataclass
|
|
183
|
+
class CommitData:
|
|
184
|
+
"""Commit data for timeline entries."""
|
|
185
|
+
sha: str
|
|
186
|
+
message: str
|
|
187
|
+
author: str
|
|
188
|
+
timestamp: datetime
|
|
189
|
+
files: list[str]
|
|
190
|
+
insertions: int = 0
|
|
191
|
+
deletions: int = 0
|
|
192
|
+
author_email: str = ""
|
|
193
|
+
|
|
194
|
+
def __post_init__(self):
|
|
195
|
+
if isinstance(self.timestamp, str):
|
|
196
|
+
self.timestamp = datetime.fromisoformat(self.timestamp.replace("Z", "+00:00"))
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
@dataclass
|
|
200
|
+
class TimelineEntry:
|
|
201
|
+
"""A single entry in the unified timeline."""
|
|
202
|
+
timestamp: datetime
|
|
203
|
+
type: TimelineEntryType
|
|
204
|
+
|
|
205
|
+
# Source data (one or both present)
|
|
206
|
+
commit: CommitData | None = None
|
|
207
|
+
session_context: SessionContext | None = None
|
|
208
|
+
|
|
209
|
+
# Unified story (generated)
|
|
210
|
+
story: dict | None = None # StoryOutput as dict for flexibility
|
|
211
|
+
|
|
212
|
+
def __post_init__(self):
|
|
213
|
+
if isinstance(self.type, str):
|
|
214
|
+
self.type = TimelineEntryType(self.type)
|
|
215
|
+
if isinstance(self.timestamp, str):
|
|
216
|
+
self.timestamp = datetime.fromisoformat(self.timestamp.replace("Z", "+00:00"))
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@dataclass
|
|
220
|
+
class ReprTimeline:
|
|
221
|
+
"""Unified view stored in .repr/timeline.json"""
|
|
222
|
+
project_path: str
|
|
223
|
+
initialized_at: datetime
|
|
224
|
+
entries: list[TimelineEntry] = field(default_factory=list)
|
|
225
|
+
|
|
226
|
+
# Metadata
|
|
227
|
+
session_sources: list[str] = field(default_factory=list) # ["claude_code", "clawdbot"]
|
|
228
|
+
last_updated: datetime | None = None
|
|
229
|
+
|
|
230
|
+
def __post_init__(self):
|
|
231
|
+
if isinstance(self.initialized_at, str):
|
|
232
|
+
self.initialized_at = datetime.fromisoformat(self.initialized_at.replace("Z", "+00:00"))
|
|
233
|
+
if self.last_updated and isinstance(self.last_updated, str):
|
|
234
|
+
self.last_updated = datetime.fromisoformat(self.last_updated.replace("Z", "+00:00"))
|
|
235
|
+
|
|
236
|
+
def add_entry(self, entry: TimelineEntry) -> None:
|
|
237
|
+
"""Add an entry and keep timeline sorted by timestamp."""
|
|
238
|
+
self.entries.append(entry)
|
|
239
|
+
self.entries.sort(key=lambda e: e.timestamp)
|
|
240
|
+
self.last_updated = datetime.now()
|
|
241
|
+
|
|
242
|
+
def get_entries_in_range(
|
|
243
|
+
self,
|
|
244
|
+
start: datetime,
|
|
245
|
+
end: datetime,
|
|
246
|
+
entry_type: TimelineEntryType | None = None,
|
|
247
|
+
) -> list[TimelineEntry]:
|
|
248
|
+
"""Get entries within a time range, optionally filtered by type."""
|
|
249
|
+
result = []
|
|
250
|
+
for entry in self.entries:
|
|
251
|
+
if start <= entry.timestamp <= end:
|
|
252
|
+
if entry_type is None or entry.type == entry_type:
|
|
253
|
+
result.append(entry)
|
|
254
|
+
return result
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# =============================================================================
|
|
258
|
+
# Story Models (1:M commits, 1:M sessions)
|
|
259
|
+
# =============================================================================
|
|
260
|
+
|
|
261
|
+
class CodeSnippet(BaseModel):
|
|
262
|
+
"""A representative code snippet from a story's changes."""
|
|
263
|
+
file_path: str = Field(description="Path to the file")
|
|
264
|
+
language: str = Field(default="", description="Language for syntax highlighting")
|
|
265
|
+
content: str = Field(description="The code snippet")
|
|
266
|
+
line_start: int = Field(default=0, description="Starting line number")
|
|
267
|
+
line_count: int = Field(default=0, description="Number of lines in snippet")
|
|
268
|
+
context: str = Field(default="", description="Brief description of what this snippet shows")
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class FileChange(BaseModel):
|
|
272
|
+
"""Summary of changes to a single file."""
|
|
273
|
+
file_path: str = Field(description="Path to the file")
|
|
274
|
+
change_type: str = Field(default="modified", description="added, modified, deleted, renamed")
|
|
275
|
+
insertions: int = Field(default=0, description="Lines added")
|
|
276
|
+
deletions: int = Field(default=0, description="Lines removed")
|
|
277
|
+
old_path: str | None = Field(default=None, description="Original path if renamed")
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class Story(BaseModel):
|
|
281
|
+
"""
|
|
282
|
+
Coherent unit of work with WHY/WHAT context.
|
|
283
|
+
|
|
284
|
+
Stories are the intelligent layer that synthesizes:
|
|
285
|
+
- Multiple commits (1:M) - raw data about WHAT changed
|
|
286
|
+
- Multiple sessions (1:M) - enrichment about WHY
|
|
287
|
+
"""
|
|
288
|
+
id: str # UUID
|
|
289
|
+
created_at: datetime
|
|
290
|
+
updated_at: datetime
|
|
291
|
+
project_id: str = Field(default="", description="ID of the project this story belongs to")
|
|
292
|
+
author_name: str = Field(default="unknown", description="Claimed username, or git author from first commit")
|
|
293
|
+
author_email: str = Field(default="", description="Git author email for Gravatar")
|
|
294
|
+
|
|
295
|
+
# Ownership and visibility
|
|
296
|
+
user_id: str | None = Field(default=None, description="User ID if connected to repr.dev")
|
|
297
|
+
visibility: str = Field(default="private", description="public, private, or connections")
|
|
298
|
+
|
|
299
|
+
# 1:M relationships (SHAs/IDs, not full objects)
|
|
300
|
+
commit_shas: list[str] = Field(default_factory=list)
|
|
301
|
+
session_ids: list[str] = Field(default_factory=list)
|
|
302
|
+
|
|
303
|
+
# Context (LLM-extracted or synthesized)
|
|
304
|
+
title: str = Field(description="One-line title, e.g. 'Add OAuth popup flow'")
|
|
305
|
+
problem: str = Field(default="", description="What was broken/missing")
|
|
306
|
+
approach: str = Field(default="", description="Technical strategy used")
|
|
307
|
+
implementation_details: list[str] = Field(default_factory=list, description="Specific code changes made")
|
|
308
|
+
decisions: list[str] = Field(default_factory=list, description="'Chose X over Y because Z'")
|
|
309
|
+
tradeoffs: str = Field(default="", description="What was gained/lost")
|
|
310
|
+
outcome: str = Field(default="", description="Observable result, metrics")
|
|
311
|
+
lessons: list[str] = Field(default_factory=list, description="Gotchas, learnings")
|
|
312
|
+
|
|
313
|
+
# Metadata for filtering/display
|
|
314
|
+
category: str = Field(default="feature", description="feature, bugfix, refactor, perf, infra, docs, test, chore")
|
|
315
|
+
scope: str = Field(default="internal", description="user-facing, internal, platform, ops")
|
|
316
|
+
technologies: list[str] = Field(default_factory=list, description="Resume-worthy skills: frameworks, libraries, tools, patterns, cloud services, APIs")
|
|
317
|
+
files: list[str] = Field(default_factory=list, description="Aggregated from commits")
|
|
318
|
+
|
|
319
|
+
# Timespan
|
|
320
|
+
started_at: datetime | None = None # First commit timestamp
|
|
321
|
+
ended_at: datetime | None = None # Last commit timestamp
|
|
322
|
+
|
|
323
|
+
# Structured story content (Tripartite Codex)
|
|
324
|
+
hook: str = Field(default="", description="Engagement hook - story opener, <60 chars")
|
|
325
|
+
what: str = Field(default="", description="Behavioral primitive - the observable change")
|
|
326
|
+
value: str = Field(default="", description="External why - user/stakeholder value")
|
|
327
|
+
# problem already exists above - internal why / what was broken
|
|
328
|
+
# implementation_details already exists above - the how
|
|
329
|
+
insight: str = Field(default="", description="Engineering lesson - transferable principle")
|
|
330
|
+
show: str | None = Field(default=None, description="Visual - code block, diagram, before/after")
|
|
331
|
+
diagram: str | None = Field(default=None, description="ASCII diagram explaining the change visually (architecture, flow, before/after)")
|
|
332
|
+
post_body: str = Field(default="", description="LLM-generated natural post text")
|
|
333
|
+
|
|
334
|
+
# Legacy fields (for backward compatibility during migration)
|
|
335
|
+
public_post: str = Field(default="", description="[Legacy] Build-in-public post text")
|
|
336
|
+
public_show: str | None = Field(default=None, description="[Legacy] Optional code block for public post")
|
|
337
|
+
internal_post: str = Field(default="", description="[Legacy] Internal post with tech context")
|
|
338
|
+
internal_show: str | None = Field(default=None, description="[Legacy] Optional code block for internal post")
|
|
339
|
+
internal_details: list[str] = Field(default_factory=list, description="[Legacy] Technical implementation details")
|
|
340
|
+
|
|
341
|
+
# Recall/diff data (for internal developer view)
|
|
342
|
+
file_changes: list[FileChange] = Field(default_factory=list, description="Per-file change summary")
|
|
343
|
+
key_snippets: list[CodeSnippet] = Field(default_factory=list, description="Representative code snippets")
|
|
344
|
+
total_insertions: int = Field(default=0, description="Total lines added across all files")
|
|
345
|
+
total_deletions: int = Field(default=0, description="Total lines removed across all files")
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class StoryDigest(BaseModel):
|
|
349
|
+
"""Compact summary for content index."""
|
|
350
|
+
story_id: str
|
|
351
|
+
title: str
|
|
352
|
+
problem_keywords: list[str] = Field(default_factory=list)
|
|
353
|
+
files: list[str] = Field(default_factory=list) # Top 5 files
|
|
354
|
+
tech_stack: list[str] = Field(default_factory=list)
|
|
355
|
+
category: str = "feature"
|
|
356
|
+
timestamp: datetime
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class ContentIndex(BaseModel):
|
|
360
|
+
"""Search index optimized for LLM retrieval."""
|
|
361
|
+
files_to_stories: dict[str, list[str]] = Field(default_factory=dict)
|
|
362
|
+
keywords_to_stories: dict[str, list[str]] = Field(default_factory=dict)
|
|
363
|
+
story_digests: list[StoryDigest] = Field(default_factory=list)
|
|
364
|
+
by_week: dict[str, list[str]] = Field(default_factory=dict)
|
|
365
|
+
last_updated: datetime | None = None
|
|
366
|
+
story_count: int = 0
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
class ReprStore(BaseModel):
|
|
370
|
+
"""
|
|
371
|
+
Complete repr storage model for .repr/ directory.
|
|
372
|
+
|
|
373
|
+
Replaces the old ReprTimeline with a more structured approach.
|
|
374
|
+
"""
|
|
375
|
+
project_path: str
|
|
376
|
+
initialized_at: datetime
|
|
377
|
+
last_updated: datetime | None = None
|
|
378
|
+
|
|
379
|
+
# Raw data
|
|
380
|
+
commits: list[CommitData] = Field(default_factory=list)
|
|
381
|
+
sessions: list[SessionContext] = Field(default_factory=list)
|
|
382
|
+
|
|
383
|
+
# Synthesized
|
|
384
|
+
stories: list[Story] = Field(default_factory=list)
|
|
385
|
+
|
|
386
|
+
# Index
|
|
387
|
+
index: ContentIndex = Field(default_factory=ContentIndex)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
# =============================================================================
|
|
391
|
+
# Commit-Session Matching
|
|
392
|
+
# =============================================================================
|
|
393
|
+
|
|
394
|
+
@dataclass
|
|
395
|
+
class CommitSessionMatch:
|
|
396
|
+
"""A match between a commit and a session."""
|
|
397
|
+
commit_sha: str
|
|
398
|
+
session_id: str
|
|
399
|
+
confidence: float # 0.0 to 1.0
|
|
400
|
+
match_reasons: list[str] # e.g., ["timestamp_overlap", "files_match"]
|
|
401
|
+
|
|
402
|
+
# Timestamps for reference
|
|
403
|
+
commit_time: datetime
|
|
404
|
+
session_start: datetime
|
|
405
|
+
session_end: datetime | None
|
|
406
|
+
|
|
407
|
+
# Overlap details
|
|
408
|
+
overlapping_files: list[str] = field(default_factory=list)
|
|
409
|
+
time_delta_seconds: float = 0.0 # Seconds between session end and commit
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def match_commits_to_sessions(
|
|
413
|
+
commits: list[CommitData],
|
|
414
|
+
sessions: list[Session],
|
|
415
|
+
time_window_hours: float = 4.0,
|
|
416
|
+
min_confidence: float = 0.3,
|
|
417
|
+
) -> list[CommitSessionMatch]:
|
|
418
|
+
"""
|
|
419
|
+
Match commits to sessions based on timestamp and file overlap.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
commits: List of commit data
|
|
423
|
+
sessions: List of sessions
|
|
424
|
+
time_window_hours: Max hours between session and commit
|
|
425
|
+
min_confidence: Minimum confidence threshold for matches
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
List of commit-session matches above confidence threshold
|
|
429
|
+
"""
|
|
430
|
+
from datetime import timedelta, timezone
|
|
431
|
+
|
|
432
|
+
def normalize_datetime(dt: datetime) -> datetime:
|
|
433
|
+
"""Ensure datetime is timezone-aware (UTC if naive)."""
|
|
434
|
+
if dt.tzinfo is None:
|
|
435
|
+
return dt.replace(tzinfo=timezone.utc)
|
|
436
|
+
return dt
|
|
437
|
+
|
|
438
|
+
matches = []
|
|
439
|
+
time_window = timedelta(hours=time_window_hours)
|
|
440
|
+
|
|
441
|
+
for commit in commits:
|
|
442
|
+
commit_time = normalize_datetime(commit.timestamp)
|
|
443
|
+
|
|
444
|
+
for session in sessions:
|
|
445
|
+
reasons = []
|
|
446
|
+
confidence = 0.0
|
|
447
|
+
|
|
448
|
+
# Calculate time delta (normalize timezones)
|
|
449
|
+
session_start = normalize_datetime(session.started_at)
|
|
450
|
+
session_end = normalize_datetime(session.ended_at or session.started_at)
|
|
451
|
+
time_delta = (commit_time - session_end).total_seconds()
|
|
452
|
+
|
|
453
|
+
# Check timestamp: commit should be after or during session
|
|
454
|
+
if session_start <= commit_time <= session_end + time_window:
|
|
455
|
+
# Commit during or shortly after session
|
|
456
|
+
if session_start <= commit_time <= session_end:
|
|
457
|
+
confidence += 0.5
|
|
458
|
+
reasons.append("commit_during_session")
|
|
459
|
+
else:
|
|
460
|
+
# Decay confidence based on time after session
|
|
461
|
+
hours_after = time_delta / 3600
|
|
462
|
+
time_confidence = max(0, 0.4 * (1 - hours_after / time_window_hours))
|
|
463
|
+
confidence += time_confidence
|
|
464
|
+
if time_confidence > 0:
|
|
465
|
+
reasons.append("timestamp_proximity")
|
|
466
|
+
else:
|
|
467
|
+
# Outside time window
|
|
468
|
+
continue
|
|
469
|
+
|
|
470
|
+
# Check file overlap
|
|
471
|
+
session_files = set(session.files_touched)
|
|
472
|
+
commit_files = set(commit.files)
|
|
473
|
+
overlapping = session_files & commit_files
|
|
474
|
+
|
|
475
|
+
if overlapping:
|
|
476
|
+
# Higher confidence for more file matches
|
|
477
|
+
file_confidence = min(0.5, len(overlapping) * 0.15)
|
|
478
|
+
confidence += file_confidence
|
|
479
|
+
reasons.append(f"files_match:{len(overlapping)}")
|
|
480
|
+
|
|
481
|
+
# Check working directory match
|
|
482
|
+
if session.cwd and any(commit.files):
|
|
483
|
+
# Rough check: does commit touch files in session's cwd?
|
|
484
|
+
# This is a heuristic - real matching would need repo root detection
|
|
485
|
+
pass
|
|
486
|
+
|
|
487
|
+
# Create match if above threshold
|
|
488
|
+
if confidence >= min_confidence:
|
|
489
|
+
matches.append(CommitSessionMatch(
|
|
490
|
+
commit_sha=commit.sha,
|
|
491
|
+
session_id=session.id,
|
|
492
|
+
confidence=confidence,
|
|
493
|
+
match_reasons=reasons,
|
|
494
|
+
commit_time=commit_time,
|
|
495
|
+
session_start=session_start,
|
|
496
|
+
session_end=session.ended_at, # Keep original for display
|
|
497
|
+
overlapping_files=sorted(overlapping),
|
|
498
|
+
time_delta_seconds=time_delta,
|
|
499
|
+
))
|
|
500
|
+
|
|
501
|
+
# Sort by confidence descending
|
|
502
|
+
matches.sort(key=lambda m: m.confidence, reverse=True)
|
|
503
|
+
return matches
|
repr/openai_analysis.py
CHANGED
|
@@ -28,6 +28,13 @@ class ExtractedStory(BaseModel):
|
|
|
28
28
|
stack: str = Field(description="Stack layer. One of: frontend, backend, database, infra, mobile, fullstack")
|
|
29
29
|
initiative: str = Field(description="Initiative type. One of: greenfield, migration, integration, scaling, incident-response, tech-debt")
|
|
30
30
|
complexity: str = Field(description="Complexity/effort. One of: quick-win, project, epic, architecture")
|
|
31
|
+
# Context fields for AI agents
|
|
32
|
+
problem: str | None = Field(default=None, description="What problem was being solved? What was broken/slow/missing?")
|
|
33
|
+
approach: str | None = Field(default=None, description="What pattern/strategy was used? Technical approach.")
|
|
34
|
+
decisions: list[str] | None = Field(default=None, description="Key decisions. Format: 'Chose X over Y because Z'")
|
|
35
|
+
tradeoffs: str | None = Field(default=None, description="What was gained/lost with this approach?")
|
|
36
|
+
outcome: str | None = Field(default=None, description="Observable result, metrics if available")
|
|
37
|
+
lessons: list[str] | None = Field(default=None, description="Gotchas, learnings, things to remember")
|
|
31
38
|
|
|
32
39
|
|
|
33
40
|
class ExtractedCommitBatch(BaseModel):
|
|
@@ -216,6 +223,16 @@ Per story:
|
|
|
216
223
|
- initiative: Why this work - greenfield, migration, integration, scaling, incident-response, or tech-debt
|
|
217
224
|
- complexity: Effort level - quick-win, project, epic, or architecture
|
|
218
225
|
|
|
226
|
+
ALSO capture the context (when evident from commits/diffs):
|
|
227
|
+
- problem: What problem was being solved? What was broken/slow/missing?
|
|
228
|
+
- approach: What pattern/strategy was used? How does the solution work?
|
|
229
|
+
- decisions: Key decisions (1-3 items). Format: "Chose X over Y because Z"
|
|
230
|
+
- tradeoffs: What was gained/lost with this approach?
|
|
231
|
+
- outcome: Observable result (if clear from commits)
|
|
232
|
+
- lessons: Gotchas visible in code comments, error handling patterns, etc.
|
|
233
|
+
|
|
234
|
+
If context isn't clear from the commits, leave those fields null rather than guessing.
|
|
235
|
+
|
|
219
236
|
No corporate fluff. No "enhanced", "improved", "robust". Just say what happened."""
|
|
220
237
|
|
|
221
238
|
if not user_prompt:
|
|
@@ -249,6 +266,14 @@ No corporate fluff. No "enhanced", "improved", "robust". Just say what happened.
|
|
|
249
266
|
stack=story.stack,
|
|
250
267
|
initiative=story.initiative,
|
|
251
268
|
complexity=story.complexity,
|
|
269
|
+
# Context fields for AI agents
|
|
270
|
+
problem=story.problem,
|
|
271
|
+
approach=story.approach,
|
|
272
|
+
decisions=story.decisions,
|
|
273
|
+
tradeoffs=story.tradeoffs,
|
|
274
|
+
outcome=story.outcome,
|
|
275
|
+
lessons=story.lessons,
|
|
276
|
+
source_type="commit",
|
|
252
277
|
)
|
|
253
278
|
for story in parsed.stories
|
|
254
279
|
]
|