codealmanac 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. codealmanac/__init__.py +13 -0
  2. codealmanac/app.py +175 -0
  3. codealmanac/cli/__init__.py +1 -0
  4. codealmanac/cli/dispatch/__init__.py +0 -0
  5. codealmanac/cli/dispatch/admin.py +124 -0
  6. codealmanac/cli/dispatch/config.py +50 -0
  7. codealmanac/cli/dispatch/root.py +328 -0
  8. codealmanac/cli/main.py +28 -0
  9. codealmanac/cli/parser/__init__.py +0 -0
  10. codealmanac/cli/parser/admin.py +81 -0
  11. codealmanac/cli/parser/lifecycle.py +57 -0
  12. codealmanac/cli/parser/root.py +19 -0
  13. codealmanac/cli/parser/wiki.py +87 -0
  14. codealmanac/cli/render/__init__.py +0 -0
  15. codealmanac/cli/render/admin.py +191 -0
  16. codealmanac/cli/render/root.py +290 -0
  17. codealmanac/core/__init__.py +1 -0
  18. codealmanac/core/errors.py +45 -0
  19. codealmanac/core/models.py +14 -0
  20. codealmanac/core/paths.py +25 -0
  21. codealmanac/core/slug.py +7 -0
  22. codealmanac/core/text.py +5 -0
  23. codealmanac/database/__init__.py +15 -0
  24. codealmanac/database/sqlite.py +54 -0
  25. codealmanac/integrations/__init__.py +1 -0
  26. codealmanac/integrations/automation/__init__.py +3 -0
  27. codealmanac/integrations/automation/scheduler/__init__.py +5 -0
  28. codealmanac/integrations/automation/scheduler/launchd.py +163 -0
  29. codealmanac/integrations/command.py +56 -0
  30. codealmanac/integrations/harnesses/__init__.py +7 -0
  31. codealmanac/integrations/harnesses/claude/__init__.py +1 -0
  32. codealmanac/integrations/harnesses/claude/adapter.py +217 -0
  33. codealmanac/integrations/harnesses/codex/__init__.py +3 -0
  34. codealmanac/integrations/harnesses/codex/adapter.py +221 -0
  35. codealmanac/integrations/harnesses/git_status.py +49 -0
  36. codealmanac/integrations/sources/__init__.py +29 -0
  37. codealmanac/integrations/sources/filesystem/__init__.py +5 -0
  38. codealmanac/integrations/sources/filesystem/adapter.py +685 -0
  39. codealmanac/integrations/sources/filesystem/selection.py +209 -0
  40. codealmanac/integrations/sources/git/__init__.py +3 -0
  41. codealmanac/integrations/sources/git/adapter.py +132 -0
  42. codealmanac/integrations/sources/github/__init__.py +3 -0
  43. codealmanac/integrations/sources/github/adapter.py +413 -0
  44. codealmanac/integrations/sources/runtime.py +22 -0
  45. codealmanac/integrations/sources/transcripts/__init__.py +33 -0
  46. codealmanac/integrations/sources/transcripts/claude.py +61 -0
  47. codealmanac/integrations/sources/transcripts/codex.py +69 -0
  48. codealmanac/integrations/sources/transcripts/jsonl.py +84 -0
  49. codealmanac/integrations/sources/transcripts/runtime.py +387 -0
  50. codealmanac/integrations/sources/web/__init__.py +3 -0
  51. codealmanac/integrations/sources/web/adapter.py +303 -0
  52. codealmanac/integrations/updates/__init__.py +7 -0
  53. codealmanac/integrations/updates/package.py +85 -0
  54. codealmanac/integrations/workspaces/__init__.py +1 -0
  55. codealmanac/integrations/workspaces/git/__init__.py +3 -0
  56. codealmanac/integrations/workspaces/git/probe.py +128 -0
  57. codealmanac/manual/README.md +24 -0
  58. codealmanac/manual/__init__.py +19 -0
  59. codealmanac/manual/build.md +20 -0
  60. codealmanac/manual/evidence.md +23 -0
  61. codealmanac/manual/garden.md +20 -0
  62. codealmanac/manual/ingest.md +17 -0
  63. codealmanac/manual/library.py +84 -0
  64. codealmanac/manual/models.py +83 -0
  65. codealmanac/manual/pages.md +28 -0
  66. codealmanac/manual/requests.py +6 -0
  67. codealmanac/manual/sources.md +18 -0
  68. codealmanac/manual/style.md +19 -0
  69. codealmanac/prompts/__init__.py +5 -0
  70. codealmanac/prompts/base/notability.md +14 -0
  71. codealmanac/prompts/base/purpose.md +23 -0
  72. codealmanac/prompts/base/syntax.md +19 -0
  73. codealmanac/prompts/models.py +9 -0
  74. codealmanac/prompts/operations/garden.md +26 -0
  75. codealmanac/prompts/operations/ingest.md +18 -0
  76. codealmanac/prompts/renderer.py +24 -0
  77. codealmanac/prompts/requests.py +22 -0
  78. codealmanac/server/__init__.py +1 -0
  79. codealmanac/server/app.py +202 -0
  80. codealmanac/server/assets/__init__.py +1 -0
  81. codealmanac/server/assets/app.css +865 -0
  82. codealmanac/server/assets/app.js +3 -0
  83. codealmanac/server/assets/index.html +80 -0
  84. codealmanac/server/assets/viewer/api.js +30 -0
  85. codealmanac/server/assets/viewer/components.js +197 -0
  86. codealmanac/server/assets/viewer/main.js +126 -0
  87. codealmanac/server/assets/viewer/renderers.js +122 -0
  88. codealmanac/server/assets/viewer/routes.js +36 -0
  89. codealmanac/services/__init__.py +1 -0
  90. codealmanac/services/automation/__init__.py +3 -0
  91. codealmanac/services/automation/models.py +83 -0
  92. codealmanac/services/automation/ports.py +14 -0
  93. codealmanac/services/automation/requests.py +40 -0
  94. codealmanac/services/automation/service.py +294 -0
  95. codealmanac/services/config/__init__.py +17 -0
  96. codealmanac/services/config/models.py +61 -0
  97. codealmanac/services/config/requests.py +21 -0
  98. codealmanac/services/config/service.py +55 -0
  99. codealmanac/services/config/store.py +26 -0
  100. codealmanac/services/diagnostics/__init__.py +1 -0
  101. codealmanac/services/diagnostics/models.py +22 -0
  102. codealmanac/services/diagnostics/requests.py +8 -0
  103. codealmanac/services/diagnostics/service.py +283 -0
  104. codealmanac/services/harnesses/__init__.py +1 -0
  105. codealmanac/services/harnesses/models.py +104 -0
  106. codealmanac/services/harnesses/ports.py +18 -0
  107. codealmanac/services/harnesses/requests.py +19 -0
  108. codealmanac/services/harnesses/service.py +38 -0
  109. codealmanac/services/health/__init__.py +1 -0
  110. codealmanac/services/health/requests.py +8 -0
  111. codealmanac/services/health/service.py +20 -0
  112. codealmanac/services/index/__init__.py +1 -0
  113. codealmanac/services/index/models.py +135 -0
  114. codealmanac/services/index/requests.py +26 -0
  115. codealmanac/services/index/service.py +86 -0
  116. codealmanac/services/index/store.py +411 -0
  117. codealmanac/services/index/views.py +524 -0
  118. codealmanac/services/pages/__init__.py +1 -0
  119. codealmanac/services/pages/requests.py +17 -0
  120. codealmanac/services/pages/service.py +26 -0
  121. codealmanac/services/runs/__init__.py +1 -0
  122. codealmanac/services/runs/models.py +91 -0
  123. codealmanac/services/runs/requests.py +76 -0
  124. codealmanac/services/runs/service.py +86 -0
  125. codealmanac/services/runs/store.py +256 -0
  126. codealmanac/services/search/__init__.py +1 -0
  127. codealmanac/services/search/requests.py +23 -0
  128. codealmanac/services/search/service.py +31 -0
  129. codealmanac/services/sources/__init__.py +1 -0
  130. codealmanac/services/sources/models.py +126 -0
  131. codealmanac/services/sources/ports.py +30 -0
  132. codealmanac/services/sources/requests.py +76 -0
  133. codealmanac/services/sources/service.py +351 -0
  134. codealmanac/services/tagging/__init__.py +1 -0
  135. codealmanac/services/tagging/models.py +9 -0
  136. codealmanac/services/tagging/requests.py +35 -0
  137. codealmanac/services/tagging/service.py +43 -0
  138. codealmanac/services/topics/__init__.py +1 -0
  139. codealmanac/services/topics/models.py +36 -0
  140. codealmanac/services/topics/requests.py +115 -0
  141. codealmanac/services/topics/service.py +297 -0
  142. codealmanac/services/updates/__init__.py +4 -0
  143. codealmanac/services/updates/models.py +83 -0
  144. codealmanac/services/updates/ports.py +17 -0
  145. codealmanac/services/updates/requests.py +10 -0
  146. codealmanac/services/updates/service.py +113 -0
  147. codealmanac/services/viewer/__init__.py +1 -0
  148. codealmanac/services/viewer/models.py +80 -0
  149. codealmanac/services/viewer/renderer.py +89 -0
  150. codealmanac/services/viewer/requests.py +86 -0
  151. codealmanac/services/viewer/service.py +211 -0
  152. codealmanac/services/wiki/__init__.py +1 -0
  153. codealmanac/services/wiki/documents.py +83 -0
  154. codealmanac/services/wiki/frontmatter.py +94 -0
  155. codealmanac/services/wiki/frontmatter_rewrite.py +142 -0
  156. codealmanac/services/wiki/models.py +69 -0
  157. codealmanac/services/wiki/paths.py +42 -0
  158. codealmanac/services/wiki/service.py +57 -0
  159. codealmanac/services/wiki/templates.py +73 -0
  160. codealmanac/services/wiki/topics.py +266 -0
  161. codealmanac/services/wiki/wikilinks.py +58 -0
  162. codealmanac/services/workspaces/__init__.py +1 -0
  163. codealmanac/services/workspaces/models.py +124 -0
  164. codealmanac/services/workspaces/ports.py +9 -0
  165. codealmanac/services/workspaces/requests.py +82 -0
  166. codealmanac/services/workspaces/roots.py +74 -0
  167. codealmanac/services/workspaces/service.py +303 -0
  168. codealmanac/services/workspaces/store.py +127 -0
  169. codealmanac/workflows/__init__.py +1 -0
  170. codealmanac/workflows/build/__init__.py +1 -0
  171. codealmanac/workflows/build/models.py +8 -0
  172. codealmanac/workflows/build/service.py +45 -0
  173. codealmanac/workflows/garden/__init__.py +3 -0
  174. codealmanac/workflows/garden/models.py +30 -0
  175. codealmanac/workflows/garden/requests.py +22 -0
  176. codealmanac/workflows/garden/service.py +239 -0
  177. codealmanac/workflows/ingest/__init__.py +1 -0
  178. codealmanac/workflows/ingest/models.py +26 -0
  179. codealmanac/workflows/ingest/requests.py +39 -0
  180. codealmanac/workflows/ingest/service.py +302 -0
  181. codealmanac/workflows/lifecycle.py +197 -0
  182. codealmanac/workflows/sync/__init__.py +3 -0
  183. codealmanac/workflows/sync/models.py +157 -0
  184. codealmanac/workflows/sync/requests.py +63 -0
  185. codealmanac/workflows/sync/service.py +651 -0
  186. codealmanac/workflows/sync/store.py +51 -0
  187. codealmanac-0.1.0.dev0.dist-info/METADATA +248 -0
  188. codealmanac-0.1.0.dev0.dist-info/RECORD +192 -0
  189. codealmanac-0.1.0.dev0.dist-info/WHEEL +5 -0
  190. codealmanac-0.1.0.dev0.dist-info/entry_points.txt +2 -0
  191. codealmanac-0.1.0.dev0.dist-info/licenses/LICENSE.md +201 -0
  192. codealmanac-0.1.0.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,651 @@
1
+ from datetime import UTC, datetime, timedelta
2
+ from hashlib import sha256
3
+ from pathlib import Path
4
+ from uuid import uuid4
5
+
6
+ from codealmanac.core.paths import normalize_path
7
+ from codealmanac.services.runs.models import RunRecord, RunStatus
8
+ from codealmanac.services.runs.requests import ListRunsRequest
9
+ from codealmanac.services.runs.service import RunsService
10
+ from codealmanac.services.sources.models import TranscriptCandidate
11
+ from codealmanac.services.sources.requests import DiscoverTranscriptsRequest
12
+ from codealmanac.services.sources.service import SourcesService
13
+ from codealmanac.services.workspaces.models import Workspace
14
+ from codealmanac.services.workspaces.requests import SelectWorkspaceRequest
15
+ from codealmanac.services.workspaces.service import WorkspacesService
16
+ from codealmanac.workflows.ingest.requests import (
17
+ RunIngestRequest,
18
+ RunIngestWithRunRequest,
19
+ )
20
+ from codealmanac.workflows.ingest.service import IngestWorkflow
21
+ from codealmanac.workflows.sync.models import (
22
+ SyncCursorDecision,
23
+ SyncDecisionKind,
24
+ SyncEvaluation,
25
+ SyncLedger,
26
+ SyncLedgerEntry,
27
+ SyncLedgerStatus,
28
+ SyncMode,
29
+ SyncReady,
30
+ SyncSkipped,
31
+ SyncStarted,
32
+ SyncSummary,
33
+ SyncWorkItem,
34
+ TranscriptSnapshot,
35
+ )
36
+ from codealmanac.workflows.sync.requests import (
37
+ RunSyncRequest,
38
+ RunSyncStatusRequest,
39
+ SyncSelectionRequest,
40
+ )
41
+ from codealmanac.workflows.sync.store import SyncLedgerStore
42
+
43
+ EMPTY_SHA256 = f"sha256:{sha256(b'').hexdigest()}"
44
+
45
+
46
+ class SyncWorkflow:
47
+ def __init__(
48
+ self,
49
+ workspaces: WorkspacesService,
50
+ sources: SourcesService,
51
+ runs: RunsService,
52
+ ingest: IngestWorkflow,
53
+ ledger_store: SyncLedgerStore,
54
+ ):
55
+ self.workspaces = workspaces
56
+ self.sources = sources
57
+ self.runs = runs
58
+ self.ingest = ingest
59
+ self.ledger_store = ledger_store
60
+
61
+ def status(self, request: RunSyncStatusRequest) -> SyncSummary:
62
+ return self.evaluate(request, SyncMode.STATUS).summary
63
+
64
+ def run(self, request: RunSyncRequest) -> SyncSummary:
65
+ now = request.now or datetime.now(UTC)
66
+ evaluation = self.evaluate(request, SyncMode.SYNC, now=now)
67
+ claim_owner = request.claim_owner or sync_claim_owner(now)
68
+ started: list[SyncStarted] = []
69
+ needs_attention = list(evaluation.summary.needs_attention)
70
+ ledgers = dict(evaluation.ledgers)
71
+ for item in evaluation.work_items:
72
+ ledger = ledgers[item.candidate.repo_root]
73
+ ingest_request = RunIngestRequest(
74
+ cwd=item.candidate.repo_root,
75
+ inputs=(f"transcript:{item.candidate.transcript_path}",),
76
+ harness=request.harness,
77
+ wiki=request.wiki,
78
+ title=sync_ingest_title(item.candidate),
79
+ guidance=sync_ingest_guidance(item),
80
+ )
81
+ run = self.ingest.start(ingest_request)
82
+ pending = pending_entry(item.entry, item, now, claim_owner, run.run_id)
83
+ ledger.sessions[item.ledger_key] = pending
84
+ ledger = self.ledger_store.save(
85
+ item.candidate.almanac_path,
86
+ ledger,
87
+ now,
88
+ )
89
+ ledgers[item.candidate.repo_root] = ledger
90
+ item = item.model_copy(update={"entry": pending})
91
+ try:
92
+ result = self.ingest.run_with_run(
93
+ RunIngestWithRunRequest(
94
+ cwd=ingest_request.cwd,
95
+ inputs=ingest_request.inputs,
96
+ harness=ingest_request.harness,
97
+ wiki=ingest_request.wiki,
98
+ title=ingest_request.title,
99
+ guidance=ingest_request.guidance,
100
+ run_id=run.run_id,
101
+ )
102
+ )
103
+ except Exception as error:
104
+ ledger.sessions[item.ledger_key] = failed_entry(
105
+ item.entry,
106
+ error,
107
+ run.run_id,
108
+ )
109
+ self.ledger_store.save(
110
+ item.candidate.almanac_path,
111
+ ledger,
112
+ now,
113
+ )
114
+ needs_attention.append(skip(item.candidate, "ingest-failed"))
115
+ continue
116
+ ledger.sessions[item.ledger_key] = absorbed_entry(
117
+ item.entry,
118
+ item.snapshot,
119
+ result.run.run_id,
120
+ now,
121
+ )
122
+ ledgers[item.candidate.repo_root] = self.ledger_store.save(
123
+ item.candidate.almanac_path,
124
+ ledger,
125
+ now,
126
+ )
127
+ started.append(
128
+ SyncStarted(
129
+ app=item.candidate.app,
130
+ session_id=item.candidate.session_id,
131
+ transcript_path=item.candidate.transcript_path,
132
+ repo_root=item.candidate.repo_root,
133
+ run_id=result.run.run_id,
134
+ from_line=item.from_line,
135
+ to_line=item.to_line,
136
+ )
137
+ )
138
+ return evaluation.summary.model_copy(
139
+ update={
140
+ "started": tuple(started),
141
+ "needs_attention": tuple(needs_attention),
142
+ }
143
+ )
144
+
145
+ def evaluate(
146
+ self,
147
+ request: SyncSelectionRequest,
148
+ mode: SyncMode,
149
+ now: datetime | None = None,
150
+ ) -> SyncEvaluation:
151
+ current_time = now or request.now or datetime.now(UTC)
152
+ candidates = self.sources.discover_transcripts(
153
+ DiscoverTranscriptsRequest(
154
+ home=normalize_path(request.home or Path.home()),
155
+ apps=request.apps,
156
+ almanac_roots=self.workspaces.discoverable_almanac_roots(),
157
+ )
158
+ )
159
+ scoped_candidates = self.scope_candidates(request, candidates)
160
+ ready: list[SyncReady] = []
161
+ skipped: list[SyncSkipped] = []
162
+ needs_attention: list[SyncSkipped] = []
163
+ ledgers: dict[Path, SyncLedger] = {}
164
+ work_items: list[SyncWorkItem] = []
165
+ run_records: dict[Path, tuple[RunRecord, ...]] = {}
166
+ for candidate in scoped_candidates:
167
+ quiet_skip = quiet_window_skip(candidate, request, current_time)
168
+ if quiet_skip is not None:
169
+ skipped.append(quiet_skip)
170
+ continue
171
+ records = run_records.setdefault(
172
+ candidate.repo_root,
173
+ self.runs.list(ListRunsRequest(cwd=candidate.repo_root)),
174
+ )
175
+ if is_internal_transcript(candidate, records):
176
+ skipped.append(skip(candidate, "internal-lifecycle-transcript"))
177
+ continue
178
+ ledger = ledgers.setdefault(
179
+ candidate.repo_root,
180
+ self.ledger_store.load(candidate.almanac_path),
181
+ )
182
+ snapshot = read_transcript(candidate)
183
+ if snapshot is None:
184
+ needs_attention.append(skip(candidate, "read-failed"))
185
+ continue
186
+ key = ledger_key(candidate)
187
+ entry = ledger_entry(ledger, candidate, key)
188
+ if mode == SyncMode.SYNC:
189
+ reconciled = reconcile_pending_entry(entry, records, current_time)
190
+ if reconciled != entry:
191
+ ledger.sessions[key] = reconciled
192
+ ledger = self.ledger_store.save(
193
+ candidate.almanac_path,
194
+ ledger,
195
+ current_time,
196
+ )
197
+ ledgers[candidate.repo_root] = ledger
198
+ entry = reconciled
199
+ pending_run_decision = evaluate_pending_run(entry, records)
200
+ if pending_run_decision is not None:
201
+ if pending_run_decision.kind == SyncDecisionKind.SKIP:
202
+ skipped.append(skip(candidate, pending_run_decision.reason))
203
+ else:
204
+ needs_attention.append(
205
+ skip(candidate, pending_run_decision.reason)
206
+ )
207
+ continue
208
+ decision = evaluate_cursor(
209
+ entry,
210
+ snapshot,
211
+ current_time,
212
+ request.pending_timeout,
213
+ request.max_failed_attempts,
214
+ )
215
+ if decision.kind == SyncDecisionKind.SKIP:
216
+ skipped.append(skip(candidate, decision.reason))
217
+ elif decision.kind == SyncDecisionKind.NEEDS_ATTENTION:
218
+ needs_attention.append(skip(candidate, decision.reason))
219
+ else:
220
+ if mode == SyncMode.STATUS:
221
+ ready.append(
222
+ SyncReady(
223
+ app=candidate.app,
224
+ session_id=candidate.session_id,
225
+ transcript_path=candidate.transcript_path,
226
+ repo_root=candidate.repo_root,
227
+ from_line=decision.from_line,
228
+ to_line=decision.to_line,
229
+ )
230
+ )
231
+ work_items.append(
232
+ SyncWorkItem(
233
+ candidate=candidate,
234
+ ledger_key=key,
235
+ entry=entry,
236
+ snapshot=snapshot,
237
+ from_line=decision.from_line,
238
+ to_line=decision.to_line,
239
+ )
240
+ )
241
+ summary = SyncSummary(
242
+ mode=mode,
243
+ scanned=len(candidates),
244
+ eligible=len(work_items),
245
+ ready=tuple(ready),
246
+ skipped=tuple(skipped),
247
+ needs_attention=tuple(needs_attention),
248
+ )
249
+ return SyncEvaluation(
250
+ summary=summary,
251
+ work_items=tuple(work_items),
252
+ ledgers=ledgers,
253
+ )
254
+
255
+ def scope_candidates(
256
+ self,
257
+ request: SyncSelectionRequest,
258
+ candidates: tuple[TranscriptCandidate, ...],
259
+ ) -> tuple[TranscriptCandidate, ...]:
260
+ if request.wiki is None:
261
+ return candidates
262
+ workspace = self.workspaces.select(
263
+ SelectWorkspaceRequest(selector=request.wiki, base_path=request.cwd)
264
+ )
265
+ return tuple(
266
+ candidate
267
+ for candidate in candidates
268
+ if same_workspace(candidate.repo_root, workspace)
269
+ )
270
+
271
+
272
+ def same_workspace(repo_root: Path, workspace: Workspace) -> bool:
273
+ return normalize_path(repo_root) == normalize_path(workspace.root_path)
274
+
275
+
276
+ def quiet_window_skip(
277
+ candidate: TranscriptCandidate,
278
+ request: SyncSelectionRequest,
279
+ now: datetime,
280
+ ) -> SyncSkipped | None:
281
+ if now - candidate.modified_at < request.quiet:
282
+ return skip(candidate, "quiet-window")
283
+ return None
284
+
285
+
286
+ def is_internal_transcript(
287
+ candidate: TranscriptCandidate,
288
+ records: tuple[RunRecord, ...],
289
+ ) -> bool:
290
+ candidate_path = normalize_path(candidate.transcript_path)
291
+ for record in records:
292
+ ref = record.harness_transcript
293
+ if ref is None or ref.kind.value != candidate.app.value:
294
+ continue
295
+ if ref.session_id == candidate.session_id:
296
+ return True
297
+ if (
298
+ ref.transcript_path is not None
299
+ and normalize_path(ref.transcript_path) == candidate_path
300
+ ):
301
+ return True
302
+ return False
303
+
304
+
305
+ def read_transcript(candidate: TranscriptCandidate) -> TranscriptSnapshot | None:
306
+ try:
307
+ content = candidate.transcript_path.read_bytes()
308
+ except OSError:
309
+ return None
310
+ return TranscriptSnapshot(
311
+ content=content,
312
+ current_size=len(content),
313
+ current_line=count_lines(content.decode("utf-8", errors="replace")),
314
+ )
315
+
316
+
317
+ def fresh_ledger_entry(candidate: TranscriptCandidate) -> SyncLedgerEntry:
318
+ return SyncLedgerEntry(
319
+ app=candidate.app,
320
+ session_id=candidate.session_id,
321
+ transcript_path=candidate.transcript_path,
322
+ status=SyncLedgerStatus.DONE,
323
+ last_absorbed_size=0,
324
+ last_absorbed_line=0,
325
+ last_absorbed_prefix_hash=EMPTY_SHA256,
326
+ )
327
+
328
+
329
+ def absorbed_entry(
330
+ entry: SyncLedgerEntry,
331
+ snapshot: TranscriptSnapshot,
332
+ run_id: str,
333
+ now: datetime,
334
+ ) -> SyncLedgerEntry:
335
+ return entry.model_copy(
336
+ update={
337
+ "status": SyncLedgerStatus.DONE,
338
+ "last_absorbed_size": snapshot.current_size,
339
+ "last_absorbed_line": snapshot.current_line,
340
+ "last_absorbed_prefix_hash": sha256_bytes(snapshot.content),
341
+ "last_absorbed_at": now,
342
+ "last_job_id": run_id,
343
+ "last_error": None,
344
+ "failed_attempts": 0,
345
+ "pending_started_at": None,
346
+ "pending_owner": None,
347
+ "pending_run_id": None,
348
+ "pending_to_size": None,
349
+ "pending_prefix_hash": None,
350
+ "pending_from_line": None,
351
+ "pending_to_line": None,
352
+ }
353
+ )
354
+
355
+
356
+ def failed_entry(
357
+ entry: SyncLedgerEntry,
358
+ error: Exception,
359
+ run_id: str | None = None,
360
+ ) -> SyncLedgerEntry:
361
+ return entry.model_copy(
362
+ update={
363
+ "status": SyncLedgerStatus.FAILED,
364
+ "last_error": first_error_line(error),
365
+ "last_job_id": run_id or entry.pending_run_id or entry.last_job_id,
366
+ "failed_attempts": entry.failed_attempts + 1,
367
+ "pending_started_at": None,
368
+ "pending_owner": None,
369
+ "pending_run_id": None,
370
+ "pending_to_size": None,
371
+ "pending_prefix_hash": None,
372
+ "pending_from_line": None,
373
+ "pending_to_line": None,
374
+ }
375
+ )
376
+
377
+
378
+ def pending_entry(
379
+ entry: SyncLedgerEntry,
380
+ item: SyncWorkItem,
381
+ now: datetime,
382
+ owner: str,
383
+ run_id: str,
384
+ ) -> SyncLedgerEntry:
385
+ return entry.model_copy(
386
+ update={
387
+ "status": SyncLedgerStatus.PENDING,
388
+ "last_error": None,
389
+ "pending_started_at": now,
390
+ "pending_owner": owner,
391
+ "pending_run_id": run_id,
392
+ "pending_to_size": item.snapshot.current_size,
393
+ "pending_prefix_hash": sha256_bytes(item.snapshot.content),
394
+ "pending_from_line": item.from_line,
395
+ "pending_to_line": item.to_line,
396
+ }
397
+ )
398
+
399
+
400
+ def first_error_line(error: Exception) -> str:
401
+ message = str(error).strip()
402
+ if message == "":
403
+ return error.__class__.__name__
404
+ return message.splitlines()[0]
405
+
406
+
407
+ def evaluate_cursor(
408
+ entry: SyncLedgerEntry,
409
+ snapshot: TranscriptSnapshot,
410
+ now: datetime,
411
+ pending_timeout: timedelta,
412
+ max_failed_attempts: int,
413
+ ) -> SyncCursorDecision:
414
+ if entry.status == SyncLedgerStatus.NEEDS_ATTENTION:
415
+ return SyncCursorDecision(
416
+ kind=SyncDecisionKind.NEEDS_ATTENTION,
417
+ reason=entry.last_error or "sync-needs-attention",
418
+ )
419
+ if (
420
+ entry.status == SyncLedgerStatus.FAILED
421
+ and entry.failed_attempts >= max_failed_attempts
422
+ ):
423
+ return SyncCursorDecision(
424
+ kind=SyncDecisionKind.NEEDS_ATTENTION,
425
+ reason="sync-retry-budget-exhausted",
426
+ )
427
+ if entry.status == SyncLedgerStatus.PENDING:
428
+ if pending_is_stale(entry, now, pending_timeout):
429
+ return SyncCursorDecision(
430
+ kind=SyncDecisionKind.NEEDS_ATTENTION,
431
+ reason="sync-pending-stale",
432
+ )
433
+ return SyncCursorDecision(
434
+ kind=SyncDecisionKind.SKIP,
435
+ reason="sync-already-pending",
436
+ )
437
+ if snapshot.current_size <= entry.last_absorbed_size:
438
+ return SyncCursorDecision(kind=SyncDecisionKind.SKIP, reason="unchanged")
439
+ prefix_hash = sha256_bytes(snapshot.content[: entry.last_absorbed_size])
440
+ if prefix_hash != entry.last_absorbed_prefix_hash:
441
+ return SyncCursorDecision(
442
+ kind=SyncDecisionKind.NEEDS_ATTENTION,
443
+ reason="prefix-mismatch",
444
+ )
445
+ return SyncCursorDecision(
446
+ kind=SyncDecisionKind.READY,
447
+ from_line=entry.last_absorbed_line + 1,
448
+ to_line=snapshot.current_line,
449
+ )
450
+
451
+
452
+ def pending_is_stale(
453
+ entry: SyncLedgerEntry,
454
+ now: datetime,
455
+ pending_timeout: timedelta,
456
+ ) -> bool:
457
+ if entry.pending_started_at is None:
458
+ return True
459
+ return now - entry.pending_started_at > pending_timeout
460
+
461
+
462
+ def evaluate_pending_run(
463
+ entry: SyncLedgerEntry,
464
+ records: tuple[RunRecord, ...],
465
+ ) -> SyncCursorDecision | None:
466
+ if entry.status != SyncLedgerStatus.PENDING or entry.pending_run_id is None:
467
+ return None
468
+ record = run_record(records, entry.pending_run_id)
469
+ if record is None:
470
+ return None
471
+ if record.status in {RunStatus.QUEUED, RunStatus.RUNNING}:
472
+ return SyncCursorDecision(
473
+ kind=SyncDecisionKind.SKIP,
474
+ reason="sync-pending-run-active",
475
+ )
476
+ if record.status == RunStatus.DONE:
477
+ return SyncCursorDecision(
478
+ kind=SyncDecisionKind.NEEDS_ATTENTION,
479
+ reason="sync-pending-run-done",
480
+ )
481
+ return SyncCursorDecision(
482
+ kind=SyncDecisionKind.NEEDS_ATTENTION,
483
+ reason="sync-pending-run-failed",
484
+ )
485
+
486
+
487
+ def reconcile_pending_entry(
488
+ entry: SyncLedgerEntry,
489
+ records: tuple[RunRecord, ...],
490
+ now: datetime,
491
+ ) -> SyncLedgerEntry:
492
+ if entry.status != SyncLedgerStatus.PENDING or entry.pending_run_id is None:
493
+ return entry
494
+ record = run_record(records, entry.pending_run_id)
495
+ if record is None or record.status in {RunStatus.QUEUED, RunStatus.RUNNING}:
496
+ return entry
497
+ if record.status == RunStatus.DONE:
498
+ if not pending_cursor_complete(entry):
499
+ return needs_attention_entry(
500
+ entry,
501
+ "sync-pending-missing-cursor",
502
+ record.run_id,
503
+ )
504
+ return entry.model_copy(
505
+ update={
506
+ "status": SyncLedgerStatus.DONE,
507
+ "last_absorbed_size": entry.pending_to_size,
508
+ "last_absorbed_line": entry.pending_to_line,
509
+ "last_absorbed_prefix_hash": entry.pending_prefix_hash,
510
+ "last_absorbed_at": record.finished_at or now,
511
+ "last_job_id": record.run_id,
512
+ "last_error": None,
513
+ **cleared_pending_fields(),
514
+ }
515
+ )
516
+ return entry.model_copy(
517
+ update={
518
+ "status": SyncLedgerStatus.FAILED,
519
+ "last_job_id": record.run_id,
520
+ "last_error": record.error or f"sync-pending-run-{record.status.value}",
521
+ "failed_attempts": entry.failed_attempts + 1,
522
+ **cleared_pending_fields(),
523
+ }
524
+ )
525
+
526
+
527
+ def pending_cursor_complete(entry: SyncLedgerEntry) -> bool:
528
+ return (
529
+ entry.pending_to_size is not None
530
+ and entry.pending_to_line is not None
531
+ and entry.pending_prefix_hash is not None
532
+ )
533
+
534
+
535
+ def needs_attention_entry(
536
+ entry: SyncLedgerEntry,
537
+ reason: str,
538
+ run_id: str,
539
+ ) -> SyncLedgerEntry:
540
+ return entry.model_copy(
541
+ update={
542
+ "status": SyncLedgerStatus.NEEDS_ATTENTION,
543
+ "last_job_id": run_id,
544
+ "last_error": reason,
545
+ **cleared_pending_fields(),
546
+ }
547
+ )
548
+
549
+
550
+ def cleared_pending_fields() -> dict[str, None]:
551
+ return {
552
+ "pending_started_at": None,
553
+ "pending_owner": None,
554
+ "pending_run_id": None,
555
+ "pending_to_size": None,
556
+ "pending_prefix_hash": None,
557
+ "pending_from_line": None,
558
+ "pending_to_line": None,
559
+ }
560
+
561
+
562
+ def run_record(records: tuple[RunRecord, ...], run_id: str) -> RunRecord | None:
563
+ for record in records:
564
+ if record.run_id == run_id:
565
+ return record
566
+ return None
567
+
568
+
569
+ def sync_claim_owner(now: datetime) -> str:
570
+ stamp = now.strftime("%Y%m%d%H%M%S")
571
+ return f"sync-{stamp}-{uuid4().hex[:8]}"
572
+
573
+
574
+ def ledger_key(candidate: TranscriptCandidate) -> str:
575
+ return f"{candidate.app.value}:{normalize_path(candidate.transcript_path)}"
576
+
577
+
578
+ def ledger_entry(
579
+ ledger: SyncLedger,
580
+ candidate: TranscriptCandidate,
581
+ key: str,
582
+ ) -> SyncLedgerEntry:
583
+ entry = ledger.sessions.get(key)
584
+ if entry is not None:
585
+ return entry
586
+ raw_key = raw_ledger_key(candidate)
587
+ if raw_key != key:
588
+ entry = ledger.sessions.get(raw_key)
589
+ if entry is not None:
590
+ return entry
591
+ for stored_entry in ledger.sessions.values():
592
+ if same_ledger_identity(stored_entry, candidate):
593
+ return stored_entry
594
+ return fresh_ledger_entry(candidate)
595
+
596
+
597
+ def raw_ledger_key(candidate: TranscriptCandidate) -> str:
598
+ return f"{candidate.app.value}:{candidate.transcript_path}"
599
+
600
+
601
+ def same_ledger_identity(
602
+ entry: SyncLedgerEntry,
603
+ candidate: TranscriptCandidate,
604
+ ) -> bool:
605
+ return (
606
+ entry.app == candidate.app
607
+ and entry.session_id == candidate.session_id
608
+ and normalize_path(entry.transcript_path)
609
+ == normalize_path(candidate.transcript_path)
610
+ )
611
+
612
+
613
+ def sha256_bytes(content: bytes) -> str:
614
+ return f"sha256:{sha256(content).hexdigest()}"
615
+
616
+
617
+ def count_lines(content: str) -> int:
618
+ if content == "":
619
+ return 0
620
+ return content.count("\n") + (0 if content.endswith("\n") else 1)
621
+
622
+
623
+ def skip(candidate: TranscriptCandidate, reason: str) -> SyncSkipped:
624
+ return SyncSkipped(
625
+ app=candidate.app,
626
+ session_id=candidate.session_id,
627
+ transcript_path=candidate.transcript_path,
628
+ repo_root=candidate.repo_root,
629
+ reason=reason,
630
+ )
631
+
632
+
633
+ def sync_ingest_title(candidate: TranscriptCandidate) -> str:
634
+ return f"Sync {candidate.app.value} transcript {candidate.session_id}"
635
+
636
+
637
+ def sync_ingest_guidance(item: SyncWorkItem) -> str:
638
+ return "\n".join(
639
+ (
640
+ "Scheduled sync cursor:",
641
+ f"- App: {item.candidate.app.value}",
642
+ f"- Session id: {item.candidate.session_id}",
643
+ f"- Transcript: {item.candidate.transcript_path}",
644
+ f"- Previously absorbed through line: {item.entry.last_absorbed_line}",
645
+ f"- Previously absorbed through byte: {item.entry.last_absorbed_size}",
646
+ f"- Focus on line {item.from_line} onward.",
647
+ "- You may inspect earlier lines only for context.",
648
+ "- Do not re-document decisions already absorbed unless newer lines "
649
+ "amend, invalidate, or add important nuance to them.",
650
+ )
651
+ )
@@ -0,0 +1,51 @@
1
+ from datetime import UTC, datetime
2
+ from pathlib import Path
3
+ from uuid import uuid4
4
+
5
+ from pydantic import ValidationError
6
+
7
+ from codealmanac.workflows.sync.models import SyncLedger
8
+
9
+ SYNC_LEDGER_VERSION = 1
10
+
11
+
12
+ class SyncLedgerStore:
13
+ def load(self, almanac_path: Path) -> SyncLedger:
14
+ path = sync_ledger_path(almanac_path)
15
+ try:
16
+ return SyncLedger.model_validate_json(path.read_text(encoding="utf-8"))
17
+ except (OSError, ValidationError, ValueError):
18
+ return empty_ledger()
19
+
20
+ def save(
21
+ self,
22
+ almanac_path: Path,
23
+ ledger: SyncLedger,
24
+ now: datetime,
25
+ ) -> SyncLedger:
26
+ updated = ledger.model_copy(update={"updated_at": now})
27
+ path = sync_ledger_path(almanac_path)
28
+ path.parent.mkdir(parents=True, exist_ok=True)
29
+ temporary = path.with_name(f".{path.name}.{uuid4().hex}.tmp")
30
+ try:
31
+ temporary.write_text(
32
+ updated.model_dump_json(indent=2),
33
+ encoding="utf-8",
34
+ )
35
+ temporary.replace(path)
36
+ finally:
37
+ if temporary.exists():
38
+ temporary.unlink()
39
+ return updated
40
+
41
+
42
+ def sync_ledger_path(almanac_path: Path) -> Path:
43
+ return almanac_path / "jobs" / "sync-ledger.json"
44
+
45
+
46
+ def empty_ledger() -> SyncLedger:
47
+ return SyncLedger(
48
+ version=SYNC_LEDGER_VERSION,
49
+ updated_at=datetime.fromtimestamp(0, UTC),
50
+ sessions={},
51
+ )