codealmanac 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codealmanac/__init__.py +13 -0
- codealmanac/app.py +175 -0
- codealmanac/cli/__init__.py +1 -0
- codealmanac/cli/dispatch/__init__.py +0 -0
- codealmanac/cli/dispatch/admin.py +124 -0
- codealmanac/cli/dispatch/config.py +50 -0
- codealmanac/cli/dispatch/root.py +328 -0
- codealmanac/cli/main.py +28 -0
- codealmanac/cli/parser/__init__.py +0 -0
- codealmanac/cli/parser/admin.py +81 -0
- codealmanac/cli/parser/lifecycle.py +57 -0
- codealmanac/cli/parser/root.py +19 -0
- codealmanac/cli/parser/wiki.py +87 -0
- codealmanac/cli/render/__init__.py +0 -0
- codealmanac/cli/render/admin.py +191 -0
- codealmanac/cli/render/root.py +290 -0
- codealmanac/core/__init__.py +1 -0
- codealmanac/core/errors.py +45 -0
- codealmanac/core/models.py +14 -0
- codealmanac/core/paths.py +25 -0
- codealmanac/core/slug.py +7 -0
- codealmanac/core/text.py +5 -0
- codealmanac/database/__init__.py +15 -0
- codealmanac/database/sqlite.py +54 -0
- codealmanac/integrations/__init__.py +1 -0
- codealmanac/integrations/automation/__init__.py +3 -0
- codealmanac/integrations/automation/scheduler/__init__.py +5 -0
- codealmanac/integrations/automation/scheduler/launchd.py +163 -0
- codealmanac/integrations/command.py +56 -0
- codealmanac/integrations/harnesses/__init__.py +7 -0
- codealmanac/integrations/harnesses/claude/__init__.py +1 -0
- codealmanac/integrations/harnesses/claude/adapter.py +217 -0
- codealmanac/integrations/harnesses/codex/__init__.py +3 -0
- codealmanac/integrations/harnesses/codex/adapter.py +221 -0
- codealmanac/integrations/harnesses/git_status.py +49 -0
- codealmanac/integrations/sources/__init__.py +29 -0
- codealmanac/integrations/sources/filesystem/__init__.py +5 -0
- codealmanac/integrations/sources/filesystem/adapter.py +685 -0
- codealmanac/integrations/sources/filesystem/selection.py +209 -0
- codealmanac/integrations/sources/git/__init__.py +3 -0
- codealmanac/integrations/sources/git/adapter.py +132 -0
- codealmanac/integrations/sources/github/__init__.py +3 -0
- codealmanac/integrations/sources/github/adapter.py +413 -0
- codealmanac/integrations/sources/runtime.py +22 -0
- codealmanac/integrations/sources/transcripts/__init__.py +33 -0
- codealmanac/integrations/sources/transcripts/claude.py +61 -0
- codealmanac/integrations/sources/transcripts/codex.py +69 -0
- codealmanac/integrations/sources/transcripts/jsonl.py +84 -0
- codealmanac/integrations/sources/transcripts/runtime.py +387 -0
- codealmanac/integrations/sources/web/__init__.py +3 -0
- codealmanac/integrations/sources/web/adapter.py +303 -0
- codealmanac/integrations/updates/__init__.py +7 -0
- codealmanac/integrations/updates/package.py +85 -0
- codealmanac/integrations/workspaces/__init__.py +1 -0
- codealmanac/integrations/workspaces/git/__init__.py +3 -0
- codealmanac/integrations/workspaces/git/probe.py +128 -0
- codealmanac/manual/README.md +24 -0
- codealmanac/manual/__init__.py +19 -0
- codealmanac/manual/build.md +20 -0
- codealmanac/manual/evidence.md +23 -0
- codealmanac/manual/garden.md +20 -0
- codealmanac/manual/ingest.md +17 -0
- codealmanac/manual/library.py +84 -0
- codealmanac/manual/models.py +83 -0
- codealmanac/manual/pages.md +28 -0
- codealmanac/manual/requests.py +6 -0
- codealmanac/manual/sources.md +18 -0
- codealmanac/manual/style.md +19 -0
- codealmanac/prompts/__init__.py +5 -0
- codealmanac/prompts/base/notability.md +14 -0
- codealmanac/prompts/base/purpose.md +23 -0
- codealmanac/prompts/base/syntax.md +19 -0
- codealmanac/prompts/models.py +9 -0
- codealmanac/prompts/operations/garden.md +26 -0
- codealmanac/prompts/operations/ingest.md +18 -0
- codealmanac/prompts/renderer.py +24 -0
- codealmanac/prompts/requests.py +22 -0
- codealmanac/server/__init__.py +1 -0
- codealmanac/server/app.py +202 -0
- codealmanac/server/assets/__init__.py +1 -0
- codealmanac/server/assets/app.css +865 -0
- codealmanac/server/assets/app.js +3 -0
- codealmanac/server/assets/index.html +80 -0
- codealmanac/server/assets/viewer/api.js +30 -0
- codealmanac/server/assets/viewer/components.js +197 -0
- codealmanac/server/assets/viewer/main.js +126 -0
- codealmanac/server/assets/viewer/renderers.js +122 -0
- codealmanac/server/assets/viewer/routes.js +36 -0
- codealmanac/services/__init__.py +1 -0
- codealmanac/services/automation/__init__.py +3 -0
- codealmanac/services/automation/models.py +83 -0
- codealmanac/services/automation/ports.py +14 -0
- codealmanac/services/automation/requests.py +40 -0
- codealmanac/services/automation/service.py +294 -0
- codealmanac/services/config/__init__.py +17 -0
- codealmanac/services/config/models.py +61 -0
- codealmanac/services/config/requests.py +21 -0
- codealmanac/services/config/service.py +55 -0
- codealmanac/services/config/store.py +26 -0
- codealmanac/services/diagnostics/__init__.py +1 -0
- codealmanac/services/diagnostics/models.py +22 -0
- codealmanac/services/diagnostics/requests.py +8 -0
- codealmanac/services/diagnostics/service.py +283 -0
- codealmanac/services/harnesses/__init__.py +1 -0
- codealmanac/services/harnesses/models.py +104 -0
- codealmanac/services/harnesses/ports.py +18 -0
- codealmanac/services/harnesses/requests.py +19 -0
- codealmanac/services/harnesses/service.py +38 -0
- codealmanac/services/health/__init__.py +1 -0
- codealmanac/services/health/requests.py +8 -0
- codealmanac/services/health/service.py +20 -0
- codealmanac/services/index/__init__.py +1 -0
- codealmanac/services/index/models.py +135 -0
- codealmanac/services/index/requests.py +26 -0
- codealmanac/services/index/service.py +86 -0
- codealmanac/services/index/store.py +411 -0
- codealmanac/services/index/views.py +524 -0
- codealmanac/services/pages/__init__.py +1 -0
- codealmanac/services/pages/requests.py +17 -0
- codealmanac/services/pages/service.py +26 -0
- codealmanac/services/runs/__init__.py +1 -0
- codealmanac/services/runs/models.py +91 -0
- codealmanac/services/runs/requests.py +76 -0
- codealmanac/services/runs/service.py +86 -0
- codealmanac/services/runs/store.py +256 -0
- codealmanac/services/search/__init__.py +1 -0
- codealmanac/services/search/requests.py +23 -0
- codealmanac/services/search/service.py +31 -0
- codealmanac/services/sources/__init__.py +1 -0
- codealmanac/services/sources/models.py +126 -0
- codealmanac/services/sources/ports.py +30 -0
- codealmanac/services/sources/requests.py +76 -0
- codealmanac/services/sources/service.py +351 -0
- codealmanac/services/tagging/__init__.py +1 -0
- codealmanac/services/tagging/models.py +9 -0
- codealmanac/services/tagging/requests.py +35 -0
- codealmanac/services/tagging/service.py +43 -0
- codealmanac/services/topics/__init__.py +1 -0
- codealmanac/services/topics/models.py +36 -0
- codealmanac/services/topics/requests.py +115 -0
- codealmanac/services/topics/service.py +297 -0
- codealmanac/services/updates/__init__.py +4 -0
- codealmanac/services/updates/models.py +83 -0
- codealmanac/services/updates/ports.py +17 -0
- codealmanac/services/updates/requests.py +10 -0
- codealmanac/services/updates/service.py +113 -0
- codealmanac/services/viewer/__init__.py +1 -0
- codealmanac/services/viewer/models.py +80 -0
- codealmanac/services/viewer/renderer.py +89 -0
- codealmanac/services/viewer/requests.py +86 -0
- codealmanac/services/viewer/service.py +211 -0
- codealmanac/services/wiki/__init__.py +1 -0
- codealmanac/services/wiki/documents.py +83 -0
- codealmanac/services/wiki/frontmatter.py +94 -0
- codealmanac/services/wiki/frontmatter_rewrite.py +142 -0
- codealmanac/services/wiki/models.py +69 -0
- codealmanac/services/wiki/paths.py +42 -0
- codealmanac/services/wiki/service.py +57 -0
- codealmanac/services/wiki/templates.py +73 -0
- codealmanac/services/wiki/topics.py +266 -0
- codealmanac/services/wiki/wikilinks.py +58 -0
- codealmanac/services/workspaces/__init__.py +1 -0
- codealmanac/services/workspaces/models.py +124 -0
- codealmanac/services/workspaces/ports.py +9 -0
- codealmanac/services/workspaces/requests.py +82 -0
- codealmanac/services/workspaces/roots.py +74 -0
- codealmanac/services/workspaces/service.py +303 -0
- codealmanac/services/workspaces/store.py +127 -0
- codealmanac/workflows/__init__.py +1 -0
- codealmanac/workflows/build/__init__.py +1 -0
- codealmanac/workflows/build/models.py +8 -0
- codealmanac/workflows/build/service.py +45 -0
- codealmanac/workflows/garden/__init__.py +3 -0
- codealmanac/workflows/garden/models.py +30 -0
- codealmanac/workflows/garden/requests.py +22 -0
- codealmanac/workflows/garden/service.py +239 -0
- codealmanac/workflows/ingest/__init__.py +1 -0
- codealmanac/workflows/ingest/models.py +26 -0
- codealmanac/workflows/ingest/requests.py +39 -0
- codealmanac/workflows/ingest/service.py +302 -0
- codealmanac/workflows/lifecycle.py +197 -0
- codealmanac/workflows/sync/__init__.py +3 -0
- codealmanac/workflows/sync/models.py +157 -0
- codealmanac/workflows/sync/requests.py +63 -0
- codealmanac/workflows/sync/service.py +651 -0
- codealmanac/workflows/sync/store.py +51 -0
- codealmanac-0.1.0.dev0.dist-info/METADATA +248 -0
- codealmanac-0.1.0.dev0.dist-info/RECORD +192 -0
- codealmanac-0.1.0.dev0.dist-info/WHEEL +5 -0
- codealmanac-0.1.0.dev0.dist-info/entry_points.txt +2 -0
- codealmanac-0.1.0.dev0.dist-info/licenses/LICENSE.md +201 -0
- codealmanac-0.1.0.dev0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
from enum import StrEnum
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import jsonlines
|
|
7
|
+
from pydantic import Field, JsonValue, ValidationError, field_validator
|
|
8
|
+
|
|
9
|
+
from codealmanac.core.models import CodeAlmanacModel
|
|
10
|
+
from codealmanac.core.paths import normalize_path
|
|
11
|
+
from codealmanac.core.text import required_text
|
|
12
|
+
from codealmanac.integrations.sources.runtime import source_runtime_section
|
|
13
|
+
from codealmanac.services.sources.models import (
|
|
14
|
+
SourceKind,
|
|
15
|
+
SourceRef,
|
|
16
|
+
SourceRuntime,
|
|
17
|
+
SourceRuntimeStatus,
|
|
18
|
+
)
|
|
19
|
+
from codealmanac.services.sources.requests import InspectSourceRuntimeRequest
|
|
20
|
+
|
|
21
|
+
DEFAULT_MAX_CHARS = 60_000
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TranscriptRuntimeLineKind(StrEnum):
|
|
25
|
+
META = "meta"
|
|
26
|
+
MESSAGE = "message"
|
|
27
|
+
TOOL_CALL = "tool_call"
|
|
28
|
+
TOOL_RESULT = "tool_result"
|
|
29
|
+
EVENT = "event"
|
|
30
|
+
RAW = "raw"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TranscriptRuntimeEntry(CodeAlmanacModel):
|
|
34
|
+
line_number: int
|
|
35
|
+
kind: TranscriptRuntimeLineKind
|
|
36
|
+
label: str
|
|
37
|
+
text: str
|
|
38
|
+
|
|
39
|
+
@field_validator("line_number")
|
|
40
|
+
@classmethod
|
|
41
|
+
def positive_line_number(cls, value: int) -> int:
|
|
42
|
+
if value < 1:
|
|
43
|
+
raise ValueError("line number must be positive")
|
|
44
|
+
return value
|
|
45
|
+
|
|
46
|
+
@field_validator("label", "text")
|
|
47
|
+
@classmethod
|
|
48
|
+
def require_text(cls, value: str) -> str:
|
|
49
|
+
return required_text(value, "transcript runtime entry")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class TranscriptJsonLine(CodeAlmanacModel):
|
|
53
|
+
# External provider JSON is intentionally kept as JsonValue at this first
|
|
54
|
+
# boundary; helpers below validate known sub-shapes before reading fields.
|
|
55
|
+
type: str | None = None
|
|
56
|
+
timestamp: str | None = None
|
|
57
|
+
session_id: str | None = Field(default=None, alias="sessionId")
|
|
58
|
+
cwd: str | None = None
|
|
59
|
+
payload: JsonValue | None = None
|
|
60
|
+
message: JsonValue | None = None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class TranscriptPayload(CodeAlmanacModel):
|
|
64
|
+
id: str | None = None
|
|
65
|
+
cwd: str | None = None
|
|
66
|
+
thread_source: str | None = None
|
|
67
|
+
message: str | None = None
|
|
68
|
+
item: JsonValue | None = None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class TranscriptMessage(CodeAlmanacModel):
|
|
72
|
+
role: str | None = None
|
|
73
|
+
content: JsonValue | None = None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class TranscriptItem(CodeAlmanacModel):
|
|
77
|
+
type: str | None = None
|
|
78
|
+
role: str | None = None
|
|
79
|
+
name: str | None = None
|
|
80
|
+
call_id: str | None = None
|
|
81
|
+
content: JsonValue | None = None
|
|
82
|
+
arguments: JsonValue | None = None
|
|
83
|
+
output: JsonValue | None = None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class TranscriptSourceRuntimeAdapter:
|
|
87
|
+
def __init__(self, max_chars: int = DEFAULT_MAX_CHARS):
|
|
88
|
+
self.max_chars = max_chars
|
|
89
|
+
|
|
90
|
+
def supports(self, ref: SourceRef) -> bool:
|
|
91
|
+
return ref.kind == SourceKind.TRANSCRIPT
|
|
92
|
+
|
|
93
|
+
def inspect(self, request: InspectSourceRuntimeRequest) -> SourceRuntime:
|
|
94
|
+
if request.ref.kind != SourceKind.TRANSCRIPT:
|
|
95
|
+
return SourceRuntime(
|
|
96
|
+
ref=request.ref,
|
|
97
|
+
status=SourceRuntimeStatus.SKIPPED,
|
|
98
|
+
title=f"Unsupported transcript source {request.ref.identity}",
|
|
99
|
+
)
|
|
100
|
+
path = transcript_path(request.cwd, request.ref)
|
|
101
|
+
if path is None:
|
|
102
|
+
return unavailable_runtime(
|
|
103
|
+
request.ref,
|
|
104
|
+
"Transcript unavailable",
|
|
105
|
+
"transcript source requires a path",
|
|
106
|
+
)
|
|
107
|
+
if not path.is_file():
|
|
108
|
+
return unavailable_runtime(
|
|
109
|
+
request.ref,
|
|
110
|
+
"Transcript unavailable",
|
|
111
|
+
f"transcript file not found: {path}",
|
|
112
|
+
)
|
|
113
|
+
entries = tuple(read_transcript_entries(path))
|
|
114
|
+
if len(entries) == 0:
|
|
115
|
+
return unavailable_runtime(
|
|
116
|
+
request.ref,
|
|
117
|
+
f"Transcript {path}",
|
|
118
|
+
"no readable JSONL objects found",
|
|
119
|
+
)
|
|
120
|
+
body = "\n".join(render_entry(entry) for entry in entries)
|
|
121
|
+
content, truncated = bounded_tail_text(
|
|
122
|
+
"\n\n".join(
|
|
123
|
+
(
|
|
124
|
+
source_runtime_section(
|
|
125
|
+
"metadata",
|
|
126
|
+
f"path: {path}\nreadable_entries: {len(entries)}",
|
|
127
|
+
),
|
|
128
|
+
source_runtime_section("transcript", body),
|
|
129
|
+
)
|
|
130
|
+
),
|
|
131
|
+
self.max_chars,
|
|
132
|
+
)
|
|
133
|
+
return SourceRuntime(
|
|
134
|
+
ref=request.ref,
|
|
135
|
+
status=SourceRuntimeStatus.AVAILABLE,
|
|
136
|
+
title=f"Transcript {path}",
|
|
137
|
+
content=content,
|
|
138
|
+
truncated=truncated,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def transcript_path(cwd: Path, ref: SourceRef) -> Path | None:
|
|
143
|
+
if ref.transcript is None or ref.transcript.strip() == "":
|
|
144
|
+
return None
|
|
145
|
+
path = Path(ref.transcript).expanduser()
|
|
146
|
+
if not path.is_absolute():
|
|
147
|
+
path = cwd / path
|
|
148
|
+
return normalize_path(path)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def unavailable_runtime(ref: SourceRef, title: str, diagnostic: str) -> SourceRuntime:
|
|
152
|
+
return SourceRuntime(
|
|
153
|
+
ref=ref,
|
|
154
|
+
status=SourceRuntimeStatus.UNAVAILABLE,
|
|
155
|
+
title=title,
|
|
156
|
+
diagnostics=(diagnostic,),
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def read_transcript_entries(path: Path) -> Iterator[TranscriptRuntimeEntry]:
|
|
161
|
+
with path.open("r", encoding="utf-8") as file:
|
|
162
|
+
for line_number, line in enumerate(file, start=1):
|
|
163
|
+
parsed = read_jsonl_object(line)
|
|
164
|
+
if parsed is None:
|
|
165
|
+
continue
|
|
166
|
+
yield transcript_entry(line_number, parsed)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def read_jsonl_object(line: str) -> dict[str, object] | None:
|
|
170
|
+
reader = jsonlines.Reader([line])
|
|
171
|
+
return next(reader.iter(type=dict, skip_empty=True, skip_invalid=True), None)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def transcript_entry(
|
|
175
|
+
line_number: int,
|
|
176
|
+
parsed: dict[str, object],
|
|
177
|
+
) -> TranscriptRuntimeEntry:
|
|
178
|
+
try:
|
|
179
|
+
line = TranscriptJsonLine.model_validate(parsed)
|
|
180
|
+
except ValidationError:
|
|
181
|
+
return runtime_entry(
|
|
182
|
+
line_number,
|
|
183
|
+
TranscriptRuntimeLineKind.RAW,
|
|
184
|
+
"raw",
|
|
185
|
+
compact_json(parsed),
|
|
186
|
+
)
|
|
187
|
+
payload = parse_payload(line.payload)
|
|
188
|
+
if payload is not None:
|
|
189
|
+
entry = entry_from_payload(line_number, line, payload)
|
|
190
|
+
if entry is not None:
|
|
191
|
+
return entry
|
|
192
|
+
message = parse_message(line.message)
|
|
193
|
+
if message is not None:
|
|
194
|
+
role = message.role or line.type or "message"
|
|
195
|
+
return runtime_entry(
|
|
196
|
+
line_number,
|
|
197
|
+
TranscriptRuntimeLineKind.MESSAGE,
|
|
198
|
+
label_with_timestamp(role, line.timestamp),
|
|
199
|
+
line_message_text(line, message),
|
|
200
|
+
)
|
|
201
|
+
if line.session_id is not None or line.cwd is not None:
|
|
202
|
+
text = "\n".join(
|
|
203
|
+
part
|
|
204
|
+
for part in (
|
|
205
|
+
f"session_id: {line.session_id}" if line.session_id else "",
|
|
206
|
+
f"cwd: {line.cwd}" if line.cwd else "",
|
|
207
|
+
)
|
|
208
|
+
if part
|
|
209
|
+
)
|
|
210
|
+
return runtime_entry(
|
|
211
|
+
line_number,
|
|
212
|
+
TranscriptRuntimeLineKind.META,
|
|
213
|
+
label_with_timestamp(line.type or "meta", line.timestamp),
|
|
214
|
+
text,
|
|
215
|
+
)
|
|
216
|
+
return runtime_entry(
|
|
217
|
+
line_number,
|
|
218
|
+
TranscriptRuntimeLineKind.RAW,
|
|
219
|
+
label_with_timestamp(line.type or "raw", line.timestamp),
|
|
220
|
+
compact_json(parsed),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def entry_from_payload(
|
|
225
|
+
line_number: int,
|
|
226
|
+
line: TranscriptJsonLine,
|
|
227
|
+
payload: TranscriptPayload,
|
|
228
|
+
) -> TranscriptRuntimeEntry | None:
|
|
229
|
+
if payload.id is not None or payload.cwd is not None:
|
|
230
|
+
text = "\n".join(
|
|
231
|
+
part
|
|
232
|
+
for part in (
|
|
233
|
+
f"id: {payload.id}" if payload.id else "",
|
|
234
|
+
f"cwd: {payload.cwd}" if payload.cwd else "",
|
|
235
|
+
f"thread_source: {payload.thread_source}"
|
|
236
|
+
if payload.thread_source
|
|
237
|
+
else "",
|
|
238
|
+
)
|
|
239
|
+
if part
|
|
240
|
+
)
|
|
241
|
+
return runtime_entry(
|
|
242
|
+
line_number,
|
|
243
|
+
TranscriptRuntimeLineKind.META,
|
|
244
|
+
label_with_timestamp(line.type or "payload", line.timestamp),
|
|
245
|
+
text,
|
|
246
|
+
)
|
|
247
|
+
if payload.message is not None:
|
|
248
|
+
return runtime_entry(
|
|
249
|
+
line_number,
|
|
250
|
+
TranscriptRuntimeLineKind.EVENT,
|
|
251
|
+
label_with_timestamp(line.type or "event", line.timestamp),
|
|
252
|
+
payload.message,
|
|
253
|
+
)
|
|
254
|
+
item = parse_item(payload.item)
|
|
255
|
+
if item is None:
|
|
256
|
+
return None
|
|
257
|
+
return entry_from_item(line_number, line, item)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def entry_from_item(
|
|
261
|
+
line_number: int,
|
|
262
|
+
line: TranscriptJsonLine,
|
|
263
|
+
item: TranscriptItem,
|
|
264
|
+
) -> TranscriptRuntimeEntry:
|
|
265
|
+
if item.type in {"function_call", "tool_call"} or item.name is not None:
|
|
266
|
+
return runtime_entry(
|
|
267
|
+
line_number,
|
|
268
|
+
TranscriptRuntimeLineKind.TOOL_CALL,
|
|
269
|
+
label_with_timestamp(f"tool_call {item.name or 'unknown'}", line.timestamp),
|
|
270
|
+
render_json_text(item.arguments),
|
|
271
|
+
)
|
|
272
|
+
if item.type in {"function_call_output", "tool_result"} or item.output is not None:
|
|
273
|
+
return runtime_entry(
|
|
274
|
+
line_number,
|
|
275
|
+
TranscriptRuntimeLineKind.TOOL_RESULT,
|
|
276
|
+
label_with_timestamp("tool_result", line.timestamp),
|
|
277
|
+
render_json_text(item.output or item.content),
|
|
278
|
+
)
|
|
279
|
+
role = item.role or item.type or "item"
|
|
280
|
+
return runtime_entry(
|
|
281
|
+
line_number,
|
|
282
|
+
TranscriptRuntimeLineKind.MESSAGE,
|
|
283
|
+
label_with_timestamp(role, line.timestamp),
|
|
284
|
+
render_json_text(item.content),
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def parse_payload(value: JsonValue | None) -> TranscriptPayload | None:
|
|
289
|
+
if not isinstance(value, dict):
|
|
290
|
+
return None
|
|
291
|
+
try:
|
|
292
|
+
return TranscriptPayload.model_validate(value)
|
|
293
|
+
except ValidationError:
|
|
294
|
+
return None
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def parse_message(value: JsonValue | None) -> TranscriptMessage | None:
|
|
298
|
+
if not isinstance(value, dict):
|
|
299
|
+
return None
|
|
300
|
+
try:
|
|
301
|
+
return TranscriptMessage.model_validate(value)
|
|
302
|
+
except ValidationError:
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def parse_item(value: JsonValue | None) -> TranscriptItem | None:
|
|
307
|
+
if not isinstance(value, dict):
|
|
308
|
+
return None
|
|
309
|
+
try:
|
|
310
|
+
return TranscriptItem.model_validate(value)
|
|
311
|
+
except ValidationError:
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def line_message_text(line: TranscriptJsonLine, message: TranscriptMessage) -> str:
|
|
316
|
+
parts = [
|
|
317
|
+
part
|
|
318
|
+
for part in (
|
|
319
|
+
f"session_id: {line.session_id}" if line.session_id else "",
|
|
320
|
+
f"cwd: {line.cwd}" if line.cwd else "",
|
|
321
|
+
render_json_text(message.content),
|
|
322
|
+
)
|
|
323
|
+
if part
|
|
324
|
+
]
|
|
325
|
+
return "\n".join(parts)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def runtime_entry(
|
|
329
|
+
line_number: int,
|
|
330
|
+
kind: TranscriptRuntimeLineKind,
|
|
331
|
+
label: str,
|
|
332
|
+
text: str,
|
|
333
|
+
) -> TranscriptRuntimeEntry:
|
|
334
|
+
rendered = text.strip()
|
|
335
|
+
if rendered == "":
|
|
336
|
+
rendered = "(empty)"
|
|
337
|
+
return TranscriptRuntimeEntry(
|
|
338
|
+
line_number=line_number,
|
|
339
|
+
kind=kind,
|
|
340
|
+
label=label,
|
|
341
|
+
text=rendered,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def label_with_timestamp(label: str, timestamp: str | None) -> str:
|
|
346
|
+
if timestamp is None or timestamp.strip() == "":
|
|
347
|
+
return label
|
|
348
|
+
return f"{label} {timestamp}"
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def render_entry(entry: TranscriptRuntimeEntry) -> str:
|
|
352
|
+
return f"L{entry.line_number:04d} [{entry.kind.value}] {entry.label}: {entry.text}"
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def render_json_text(value: JsonValue | None) -> str:
|
|
356
|
+
if value is None:
|
|
357
|
+
return ""
|
|
358
|
+
if isinstance(value, str):
|
|
359
|
+
return value
|
|
360
|
+
if isinstance(value, list):
|
|
361
|
+
return "\n".join(
|
|
362
|
+
part for part in (render_json_text(item) for item in value) if part
|
|
363
|
+
)
|
|
364
|
+
if isinstance(value, dict):
|
|
365
|
+
text = value.get("text")
|
|
366
|
+
if isinstance(text, str):
|
|
367
|
+
return text
|
|
368
|
+
content = value.get("content")
|
|
369
|
+
if content is not None:
|
|
370
|
+
return render_json_text(content)
|
|
371
|
+
return compact_json(value)
|
|
372
|
+
return compact_json(value)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def compact_json(value: JsonValue) -> str:
|
|
376
|
+
return json.dumps(value, sort_keys=True, separators=(",", ":"))
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def bounded_tail_text(value: str, max_chars: int) -> tuple[str, bool]:
|
|
380
|
+
if len(value) <= max_chars:
|
|
381
|
+
return value, False
|
|
382
|
+
marker = "[truncated earlier transcript lines]\n\n"
|
|
383
|
+
tail = value[-max_chars:]
|
|
384
|
+
first_newline = tail.find("\n")
|
|
385
|
+
if first_newline != -1:
|
|
386
|
+
tail = tail[first_newline + 1 :]
|
|
387
|
+
return marker + tail, True
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
from bs4 import BeautifulSoup
|
|
5
|
+
from pydantic import field_validator
|
|
6
|
+
|
|
7
|
+
from codealmanac.core.models import CodeAlmanacModel
|
|
8
|
+
from codealmanac.core.text import required_text
|
|
9
|
+
from codealmanac.integrations.sources.runtime import (
|
|
10
|
+
bounded_text,
|
|
11
|
+
source_runtime_section,
|
|
12
|
+
)
|
|
13
|
+
from codealmanac.services.sources.models import (
|
|
14
|
+
SourceKind,
|
|
15
|
+
SourceRef,
|
|
16
|
+
SourceRuntime,
|
|
17
|
+
SourceRuntimeStatus,
|
|
18
|
+
)
|
|
19
|
+
from codealmanac.services.sources.requests import InspectSourceRuntimeRequest
|
|
20
|
+
|
|
21
|
+
WEB_RUNTIME_TIMEOUT_SECONDS = 20
|
|
22
|
+
DEFAULT_MAX_BYTES = 2_000_000
|
|
23
|
+
DEFAULT_MAX_CHARS = 60_000
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class WebContentKind(StrEnum):
|
|
27
|
+
HTML = "html"
|
|
28
|
+
TEXT = "text"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FetchedWebResponse(CodeAlmanacModel):
|
|
32
|
+
final_url: str
|
|
33
|
+
status_code: int
|
|
34
|
+
content_type: str
|
|
35
|
+
body: bytes
|
|
36
|
+
response_truncated: bool = False
|
|
37
|
+
|
|
38
|
+
@field_validator("final_url", "content_type")
|
|
39
|
+
@classmethod
|
|
40
|
+
def require_text_fields(cls, value: str) -> str:
|
|
41
|
+
return required_text(value, "web response")
|
|
42
|
+
|
|
43
|
+
@field_validator("status_code")
|
|
44
|
+
@classmethod
|
|
45
|
+
def validate_status_code(cls, value: int) -> int:
|
|
46
|
+
if value < 100 or value > 599:
|
|
47
|
+
raise ValueError("HTTP status code must be between 100 and 599")
|
|
48
|
+
return value
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class WebRuntimeDocument(CodeAlmanacModel):
|
|
52
|
+
final_url: str
|
|
53
|
+
status_code: int
|
|
54
|
+
content_type: str
|
|
55
|
+
content_kind: WebContentKind
|
|
56
|
+
body: str
|
|
57
|
+
title: str | None = None
|
|
58
|
+
response_truncated: bool = False
|
|
59
|
+
|
|
60
|
+
@field_validator("final_url", "content_type", "body")
|
|
61
|
+
@classmethod
|
|
62
|
+
def require_text_fields(cls, value: str) -> str:
|
|
63
|
+
return required_text(value, "web runtime document")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class UnsupportedWebContentError(Exception):
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class WebSourceRuntimeAdapter:
|
|
71
|
+
def __init__(
|
|
72
|
+
self,
|
|
73
|
+
client: httpx.Client | None = None,
|
|
74
|
+
max_bytes: int = DEFAULT_MAX_BYTES,
|
|
75
|
+
max_chars: int = DEFAULT_MAX_CHARS,
|
|
76
|
+
timeout_seconds: int = WEB_RUNTIME_TIMEOUT_SECONDS,
|
|
77
|
+
):
|
|
78
|
+
self.client = client
|
|
79
|
+
self.max_bytes = max_bytes
|
|
80
|
+
self.max_chars = max_chars
|
|
81
|
+
self.timeout_seconds = timeout_seconds
|
|
82
|
+
|
|
83
|
+
def supports(self, ref: SourceRef) -> bool:
|
|
84
|
+
return ref.kind == SourceKind.WEB_URL
|
|
85
|
+
|
|
86
|
+
def inspect(self, request: InspectSourceRuntimeRequest) -> SourceRuntime:
|
|
87
|
+
if request.ref.kind != SourceKind.WEB_URL:
|
|
88
|
+
return SourceRuntime(
|
|
89
|
+
ref=request.ref,
|
|
90
|
+
status=SourceRuntimeStatus.SKIPPED,
|
|
91
|
+
title=f"Unsupported web source {request.ref.identity}",
|
|
92
|
+
)
|
|
93
|
+
if request.ref.url is None:
|
|
94
|
+
return unavailable_runtime(
|
|
95
|
+
request.ref,
|
|
96
|
+
"Web URL unavailable",
|
|
97
|
+
"web source requires a URL",
|
|
98
|
+
)
|
|
99
|
+
try:
|
|
100
|
+
response = self._fetch(request.ref.url)
|
|
101
|
+
document = parse_web_response(response)
|
|
102
|
+
except (httpx.HTTPError, UnsupportedWebContentError, ValueError) as error:
|
|
103
|
+
return unavailable_runtime(
|
|
104
|
+
request.ref,
|
|
105
|
+
"Web URL unavailable",
|
|
106
|
+
first_error_line(error),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
content, truncated = bounded_text(
|
|
110
|
+
"\n\n".join(
|
|
111
|
+
(
|
|
112
|
+
source_runtime_section("metadata", render_metadata(document)),
|
|
113
|
+
source_runtime_section("content", document.body),
|
|
114
|
+
)
|
|
115
|
+
),
|
|
116
|
+
self.max_chars,
|
|
117
|
+
)
|
|
118
|
+
title_suffix = f": {document.title}" if document.title is not None else ""
|
|
119
|
+
return SourceRuntime(
|
|
120
|
+
ref=request.ref,
|
|
121
|
+
status=SourceRuntimeStatus.AVAILABLE,
|
|
122
|
+
title=f"Web URL {document.final_url}{title_suffix}",
|
|
123
|
+
content=content,
|
|
124
|
+
truncated=truncated or document.response_truncated,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def _fetch(self, url: str) -> FetchedWebResponse:
|
|
128
|
+
if self.client is not None:
|
|
129
|
+
return fetch_with_client(
|
|
130
|
+
self.client,
|
|
131
|
+
url,
|
|
132
|
+
self.max_bytes,
|
|
133
|
+
self.timeout_seconds,
|
|
134
|
+
)
|
|
135
|
+
with httpx.Client(
|
|
136
|
+
follow_redirects=True,
|
|
137
|
+
timeout=self.timeout_seconds,
|
|
138
|
+
) as client:
|
|
139
|
+
return fetch_with_client(
|
|
140
|
+
client,
|
|
141
|
+
url,
|
|
142
|
+
self.max_bytes,
|
|
143
|
+
self.timeout_seconds,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def fetch_with_client(
|
|
148
|
+
client: httpx.Client,
|
|
149
|
+
url: str,
|
|
150
|
+
max_bytes: int,
|
|
151
|
+
timeout_seconds: int,
|
|
152
|
+
) -> FetchedWebResponse:
|
|
153
|
+
with client.stream(
|
|
154
|
+
"GET",
|
|
155
|
+
url,
|
|
156
|
+
follow_redirects=True,
|
|
157
|
+
timeout=timeout_seconds,
|
|
158
|
+
) as response:
|
|
159
|
+
response.raise_for_status()
|
|
160
|
+
body, truncated = read_bounded_response(response, max_bytes)
|
|
161
|
+
content_type = response.headers.get("content-type", "").strip()
|
|
162
|
+
return FetchedWebResponse(
|
|
163
|
+
final_url=str(response.url),
|
|
164
|
+
status_code=response.status_code,
|
|
165
|
+
content_type=content_type or "(none)",
|
|
166
|
+
body=body,
|
|
167
|
+
response_truncated=truncated,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def read_bounded_response(
|
|
172
|
+
response: httpx.Response,
|
|
173
|
+
max_bytes: int,
|
|
174
|
+
) -> tuple[bytes, bool]:
|
|
175
|
+
chunks: list[bytes] = []
|
|
176
|
+
total = 0
|
|
177
|
+
truncated = False
|
|
178
|
+
for chunk in response.iter_bytes():
|
|
179
|
+
remaining = max_bytes - total
|
|
180
|
+
if remaining <= 0:
|
|
181
|
+
truncated = True
|
|
182
|
+
break
|
|
183
|
+
if len(chunk) > remaining:
|
|
184
|
+
chunks.append(chunk[:remaining])
|
|
185
|
+
truncated = True
|
|
186
|
+
break
|
|
187
|
+
chunks.append(chunk)
|
|
188
|
+
total += len(chunk)
|
|
189
|
+
return b"".join(chunks), truncated
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def parse_web_response(response: FetchedWebResponse) -> WebRuntimeDocument:
|
|
193
|
+
content_kind = content_kind_for(response.content_type)
|
|
194
|
+
text = decode_body(response.body)
|
|
195
|
+
if content_kind == WebContentKind.HTML:
|
|
196
|
+
return parse_html_document(response, text)
|
|
197
|
+
return WebRuntimeDocument(
|
|
198
|
+
final_url=response.final_url,
|
|
199
|
+
status_code=response.status_code,
|
|
200
|
+
content_type=response.content_type,
|
|
201
|
+
content_kind=content_kind,
|
|
202
|
+
body=normalized_text(text),
|
|
203
|
+
response_truncated=response.response_truncated,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def content_kind_for(content_type: str) -> WebContentKind:
|
|
208
|
+
media_type = content_type.split(";", 1)[0].strip().casefold()
|
|
209
|
+
if media_type in {"text/html", "application/xhtml+xml", "(none)"}:
|
|
210
|
+
return WebContentKind.HTML
|
|
211
|
+
if media_type.startswith("text/"):
|
|
212
|
+
return WebContentKind.TEXT
|
|
213
|
+
if media_type in {
|
|
214
|
+
"application/json",
|
|
215
|
+
"application/ld+json",
|
|
216
|
+
"application/xml",
|
|
217
|
+
"application/rss+xml",
|
|
218
|
+
"application/atom+xml",
|
|
219
|
+
}:
|
|
220
|
+
return WebContentKind.TEXT
|
|
221
|
+
raise UnsupportedWebContentError(f"unsupported web content type: {media_type}")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def parse_html_document(response: FetchedWebResponse, html: str) -> WebRuntimeDocument:
|
|
225
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
226
|
+
for element in soup(("script", "style", "noscript", "template", "svg")):
|
|
227
|
+
element.decompose()
|
|
228
|
+
title = title_text(soup)
|
|
229
|
+
body = normalized_text(soup.get_text("\n"))
|
|
230
|
+
if body == "":
|
|
231
|
+
body = "(empty page text)"
|
|
232
|
+
return WebRuntimeDocument(
|
|
233
|
+
final_url=response.final_url,
|
|
234
|
+
status_code=response.status_code,
|
|
235
|
+
content_type=response.content_type,
|
|
236
|
+
content_kind=WebContentKind.HTML,
|
|
237
|
+
title=title,
|
|
238
|
+
body=body,
|
|
239
|
+
response_truncated=response.response_truncated,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def decode_body(body: bytes) -> str:
|
|
244
|
+
return body.decode("utf-8", errors="replace")
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def title_text(soup: BeautifulSoup) -> str | None:
|
|
248
|
+
if soup.title is None:
|
|
249
|
+
return None
|
|
250
|
+
title = normalized_text(soup.title.get_text(" "))
|
|
251
|
+
if title == "":
|
|
252
|
+
return None
|
|
253
|
+
return title
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def normalized_text(value: str) -> str:
|
|
257
|
+
lines: list[str] = []
|
|
258
|
+
previous_blank = False
|
|
259
|
+
for raw_line in value.splitlines():
|
|
260
|
+
line = " ".join(raw_line.strip().split())
|
|
261
|
+
if line == "":
|
|
262
|
+
if lines and not previous_blank:
|
|
263
|
+
lines.append("")
|
|
264
|
+
previous_blank = True
|
|
265
|
+
continue
|
|
266
|
+
lines.append(line)
|
|
267
|
+
previous_blank = False
|
|
268
|
+
while lines and lines[-1] == "":
|
|
269
|
+
lines.pop()
|
|
270
|
+
return "\n".join(lines)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def render_metadata(document: WebRuntimeDocument) -> str:
|
|
274
|
+
lines = [
|
|
275
|
+
f"final_url: {document.final_url}",
|
|
276
|
+
f"status_code: {document.status_code}",
|
|
277
|
+
f"content_type: {document.content_type}",
|
|
278
|
+
f"content_kind: {document.content_kind.value}",
|
|
279
|
+
f"response_truncated: {str(document.response_truncated).lower()}",
|
|
280
|
+
]
|
|
281
|
+
if document.title is not None:
|
|
282
|
+
lines.append(f"title: {document.title}")
|
|
283
|
+
return "\n".join(lines)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def unavailable_runtime(
|
|
287
|
+
ref: SourceRef,
|
|
288
|
+
title: str,
|
|
289
|
+
diagnostic: str,
|
|
290
|
+
) -> SourceRuntime:
|
|
291
|
+
return SourceRuntime(
|
|
292
|
+
ref=ref,
|
|
293
|
+
status=SourceRuntimeStatus.UNAVAILABLE,
|
|
294
|
+
title=title,
|
|
295
|
+
diagnostics=(diagnostic,),
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def first_error_line(error: Exception) -> str:
|
|
300
|
+
lines = [line.strip() for line in str(error).splitlines() if line.strip()]
|
|
301
|
+
if len(lines) == 0:
|
|
302
|
+
return error.__class__.__name__
|
|
303
|
+
return lines[0]
|