shellbrain 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +1 -0
- app/__main__.py +7 -0
- app/boot/__init__.py +1 -0
- app/boot/admin_db.py +88 -0
- app/boot/config.py +14 -0
- app/boot/create_policy.py +52 -0
- app/boot/db.py +70 -0
- app/boot/embeddings.py +55 -0
- app/boot/home.py +45 -0
- app/boot/migrations.py +61 -0
- app/boot/read_policy.py +179 -0
- app/boot/repos.py +15 -0
- app/boot/retrieval.py +3 -0
- app/boot/thresholds.py +19 -0
- app/boot/update_policy.py +34 -0
- app/boot/use_cases.py +22 -0
- app/config/__init__.py +1 -0
- app/config/defaults/create_policy.yaml +7 -0
- app/config/defaults/read_policy.yaml +25 -0
- app/config/defaults/runtime.yaml +10 -0
- app/config/defaults/thresholds.yaml +3 -0
- app/config/defaults/update_policy.yaml +5 -0
- app/config/loader.py +58 -0
- app/core/__init__.py +1 -0
- app/core/contracts/__init__.py +1 -0
- app/core/contracts/errors.py +29 -0
- app/core/contracts/requests.py +211 -0
- app/core/contracts/responses.py +15 -0
- app/core/entities/__init__.py +1 -0
- app/core/entities/associations.py +58 -0
- app/core/entities/episodes.py +66 -0
- app/core/entities/evidence.py +29 -0
- app/core/entities/facts.py +30 -0
- app/core/entities/guidance.py +47 -0
- app/core/entities/identity.py +48 -0
- app/core/entities/memory.py +34 -0
- app/core/entities/runtime_context.py +19 -0
- app/core/entities/session_state.py +31 -0
- app/core/entities/telemetry.py +152 -0
- app/core/entities/utility.py +14 -0
- app/core/interfaces/__init__.py +1 -0
- app/core/interfaces/clock.py +12 -0
- app/core/interfaces/config.py +28 -0
- app/core/interfaces/embeddings.py +12 -0
- app/core/interfaces/idgen.py +11 -0
- app/core/interfaces/repos.py +279 -0
- app/core/interfaces/retrieval.py +20 -0
- app/core/interfaces/session_state_store.py +33 -0
- app/core/interfaces/unit_of_work.py +50 -0
- app/core/policies/__init__.py +1 -0
- app/core/policies/_shared/__init__.py +1 -0
- app/core/policies/_shared/executor.py +132 -0
- app/core/policies/_shared/side_effects.py +9 -0
- app/core/policies/create_policy/__init__.py +1 -0
- app/core/policies/create_policy/pipeline.py +96 -0
- app/core/policies/read_policy/__init__.py +1 -0
- app/core/policies/read_policy/bm25.py +114 -0
- app/core/policies/read_policy/context_pack_builder.py +140 -0
- app/core/policies/read_policy/expansion.py +132 -0
- app/core/policies/read_policy/fusion_rrf.py +34 -0
- app/core/policies/read_policy/lexical_query.py +101 -0
- app/core/policies/read_policy/pipeline.py +93 -0
- app/core/policies/read_policy/scenario_lift.py +11 -0
- app/core/policies/read_policy/scoring.py +61 -0
- app/core/policies/read_policy/seed_retrieval.py +54 -0
- app/core/policies/read_policy/utility_prior.py +11 -0
- app/core/policies/update_policy/__init__.py +1 -0
- app/core/policies/update_policy/pipeline.py +80 -0
- app/core/use_cases/__init__.py +1 -0
- app/core/use_cases/build_guidance.py +85 -0
- app/core/use_cases/create_memory.py +26 -0
- app/core/use_cases/manage_session_state.py +159 -0
- app/core/use_cases/read_memory.py +21 -0
- app/core/use_cases/record_episode_sync_telemetry.py +19 -0
- app/core/use_cases/record_operation_telemetry.py +32 -0
- app/core/use_cases/sync_episode.py +162 -0
- app/core/use_cases/update_memory.py +40 -0
- app/migrations/__init__.py +1 -0
- app/migrations/env.py +65 -0
- app/migrations/versions/20260226_0001_initial_schema.py +232 -0
- app/migrations/versions/20260312_0002_add_hard_invariants.py +60 -0
- app/migrations/versions/20260312_0003_drop_create_confidence.py +40 -0
- app/migrations/versions/20260313_0004_episode_sync_hardening.py +71 -0
- app/migrations/versions/20260313_0005_evidence_episode_event_refs.py +45 -0
- app/migrations/versions/20260318_0006_usage_telemetry_schema.py +175 -0
- app/migrations/versions/20260319_0007_identity_session_guidance.py +49 -0
- app/migrations/versions/20260320_0008_instance_metadata_and_backup_safety.py +31 -0
- app/migrations/versions/__init__.py +1 -0
- app/periphery/__init__.py +1 -0
- app/periphery/admin/__init__.py +1 -0
- app/periphery/admin/backup.py +360 -0
- app/periphery/admin/destructive_guard.py +32 -0
- app/periphery/admin/doctor.py +192 -0
- app/periphery/admin/init.py +996 -0
- app/periphery/admin/instance_guard.py +211 -0
- app/periphery/admin/machine_state.py +354 -0
- app/periphery/admin/privileges.py +42 -0
- app/periphery/admin/repo_state.py +266 -0
- app/periphery/admin/restore.py +30 -0
- app/periphery/cli/__init__.py +1 -0
- app/periphery/cli/handlers.py +830 -0
- app/periphery/cli/hydration.py +119 -0
- app/periphery/cli/main.py +710 -0
- app/periphery/cli/presenter_json.py +10 -0
- app/periphery/cli/schema_validation.py +201 -0
- app/periphery/db/__init__.py +1 -0
- app/periphery/db/engine.py +10 -0
- app/periphery/db/models/__init__.py +1 -0
- app/periphery/db/models/associations.py +55 -0
- app/periphery/db/models/episodes.py +55 -0
- app/periphery/db/models/evidence.py +19 -0
- app/periphery/db/models/experiences.py +33 -0
- app/periphery/db/models/instance_metadata.py +17 -0
- app/periphery/db/models/memories.py +39 -0
- app/periphery/db/models/metadata.py +6 -0
- app/periphery/db/models/registry.py +18 -0
- app/periphery/db/models/telemetry.py +174 -0
- app/periphery/db/models/utility.py +19 -0
- app/periphery/db/models/views.py +154 -0
- app/periphery/db/repos/__init__.py +1 -0
- app/periphery/db/repos/relational/__init__.py +1 -0
- app/periphery/db/repos/relational/associations_repo.py +117 -0
- app/periphery/db/repos/relational/episodes_repo.py +188 -0
- app/periphery/db/repos/relational/evidence_repo.py +82 -0
- app/periphery/db/repos/relational/experiences_repo.py +41 -0
- app/periphery/db/repos/relational/memories_repo.py +99 -0
- app/periphery/db/repos/relational/read_policy_repo.py +202 -0
- app/periphery/db/repos/relational/telemetry_repo.py +161 -0
- app/periphery/db/repos/relational/utility_repo.py +30 -0
- app/periphery/db/repos/semantic/__init__.py +1 -0
- app/periphery/db/repos/semantic/keyword_retrieval_repo.py +63 -0
- app/periphery/db/repos/semantic/semantic_retrieval_repo.py +111 -0
- app/periphery/db/session.py +10 -0
- app/periphery/db/uow.py +75 -0
- app/periphery/embeddings/__init__.py +1 -0
- app/periphery/embeddings/local_provider.py +35 -0
- app/periphery/embeddings/query_vector_search.py +18 -0
- app/periphery/episodes/__init__.py +1 -0
- app/periphery/episodes/claude_code.py +387 -0
- app/periphery/episodes/codex.py +423 -0
- app/periphery/episodes/launcher.py +66 -0
- app/periphery/episodes/normalization.py +31 -0
- app/periphery/episodes/poller.py +299 -0
- app/periphery/episodes/source_discovery.py +66 -0
- app/periphery/episodes/tool_filter.py +165 -0
- app/periphery/identity/__init__.py +1 -0
- app/periphery/identity/claude_hook_install.py +67 -0
- app/periphery/identity/claude_runtime.py +83 -0
- app/periphery/identity/codex_runtime.py +32 -0
- app/periphery/identity/compatibility.py +38 -0
- app/periphery/identity/resolver.py +163 -0
- app/periphery/session_state/__init__.py +1 -0
- app/periphery/session_state/file_store.py +100 -0
- app/periphery/telemetry/__init__.py +33 -0
- app/periphery/telemetry/operation_summary.py +299 -0
- app/periphery/telemetry/session_selection.py +156 -0
- app/periphery/telemetry/sync_summary.py +65 -0
- app/periphery/validation/__init__.py +1 -0
- app/periphery/validation/integrity_validation.py +253 -0
- app/periphery/validation/semantic_validation.py +94 -0
- shellbrain-0.1.0.dist-info/METADATA +130 -0
- shellbrain-0.1.0.dist-info/RECORD +165 -0
- shellbrain-0.1.0.dist-info/WHEEL +5 -0
- shellbrain-0.1.0.dist-info/entry_points.txt +2 -0
- shellbrain-0.1.0.dist-info/top_level.txt +1 -0
app/periphery/db/uow.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""This module defines a PostgreSQL-backed unit-of-work implementation with repo bindings."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
5
|
+
from app.core.interfaces.retrieval import IVectorSearch
|
|
6
|
+
from app.core.interfaces.unit_of_work import IUnitOfWork
|
|
7
|
+
from app.periphery.db.repos.relational.associations_repo import AssociationsRepo
|
|
8
|
+
from app.periphery.db.repos.relational.episodes_repo import EpisodesRepo
|
|
9
|
+
from app.periphery.db.repos.relational.evidence_repo import EvidenceRepo
|
|
10
|
+
from app.periphery.db.repos.relational.experiences_repo import ExperiencesRepo
|
|
11
|
+
from app.periphery.db.repos.relational.memories_repo import MemoriesRepo
|
|
12
|
+
from app.periphery.db.repos.relational.read_policy_repo import ReadPolicyRepo
|
|
13
|
+
from app.periphery.db.repos.relational.telemetry_repo import TelemetryRepo
|
|
14
|
+
from app.periphery.db.repos.relational.utility_repo import UtilityRepo
|
|
15
|
+
from app.periphery.db.repos.semantic.keyword_retrieval_repo import KeywordRetrievalRepo
|
|
16
|
+
from app.periphery.db.repos.semantic.semantic_retrieval_repo import SemanticRetrievalRepo
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PostgresUnitOfWork(IUnitOfWork):
|
|
20
|
+
"""This class coordinates transaction boundaries and repository lifecycle."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
session_factory,
|
|
25
|
+
*,
|
|
26
|
+
vector_search_factory: Callable[[], IVectorSearch] | None = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Store factories used to create one transaction scope and its read dependencies."""
|
|
29
|
+
|
|
30
|
+
self._session_factory = session_factory
|
|
31
|
+
self._vector_search_factory = vector_search_factory
|
|
32
|
+
self._session = None
|
|
33
|
+
self.vector_search = None
|
|
34
|
+
|
|
35
|
+
def __enter__(self):
|
|
36
|
+
"""This method opens a DB session and binds repositories to it."""
|
|
37
|
+
|
|
38
|
+
self._session = self._session_factory()
|
|
39
|
+
self.vector_search = (
|
|
40
|
+
self._vector_search_factory() if self._vector_search_factory is not None else None
|
|
41
|
+
)
|
|
42
|
+
self.memories = MemoriesRepo(self._session)
|
|
43
|
+
self.experiences = ExperiencesRepo(self._session)
|
|
44
|
+
self.associations = AssociationsRepo(self._session)
|
|
45
|
+
self.utility = UtilityRepo(self._session)
|
|
46
|
+
self.episodes = EpisodesRepo(self._session)
|
|
47
|
+
self.evidence = EvidenceRepo(self._session)
|
|
48
|
+
self.semantic_retrieval = SemanticRetrievalRepo(self._session)
|
|
49
|
+
self.keyword_retrieval = KeywordRetrievalRepo(self._session)
|
|
50
|
+
self.read_policy = ReadPolicyRepo(self._session)
|
|
51
|
+
self.telemetry = TelemetryRepo(self._session)
|
|
52
|
+
return self
|
|
53
|
+
|
|
54
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
55
|
+
"""This method commits on success and rolls back on failure before closing session."""
|
|
56
|
+
|
|
57
|
+
if exc_type is None:
|
|
58
|
+
self.commit()
|
|
59
|
+
else:
|
|
60
|
+
self.rollback()
|
|
61
|
+
if self._session is not None:
|
|
62
|
+
self._session.close()
|
|
63
|
+
self.vector_search = None
|
|
64
|
+
|
|
65
|
+
def commit(self) -> None:
|
|
66
|
+
"""This method commits the active SQLAlchemy session."""
|
|
67
|
+
|
|
68
|
+
if self._session is not None:
|
|
69
|
+
self._session.commit()
|
|
70
|
+
|
|
71
|
+
def rollback(self) -> None:
|
|
72
|
+
"""This method rolls back the active SQLAlchemy session."""
|
|
73
|
+
|
|
74
|
+
if self._session is not None:
|
|
75
|
+
self._session.rollback()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""This package defines embedding-provider adapters used by write-path orchestration."""
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""This module defines a sentence-transformers-backed local embedding provider."""
|
|
2
|
+
|
|
3
|
+
from typing import Sequence
|
|
4
|
+
|
|
5
|
+
from app.core.interfaces.embeddings import IEmbeddingProvider
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SentenceTransformersEmbeddingProvider(IEmbeddingProvider):
|
|
9
|
+
"""This class generates embeddings with a local sentence-transformers model."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, *, model: str, cache_folder: str | None = None) -> None:
|
|
12
|
+
"""This method stores sentence-transformers model configuration for lazy loading."""
|
|
13
|
+
|
|
14
|
+
self._model_name = model
|
|
15
|
+
self._cache_folder = cache_folder
|
|
16
|
+
self._model = None
|
|
17
|
+
|
|
18
|
+
def _get_model(self):
|
|
19
|
+
"""This method lazily loads the configured sentence-transformers model."""
|
|
20
|
+
|
|
21
|
+
if self._model is not None:
|
|
22
|
+
return self._model
|
|
23
|
+
try:
|
|
24
|
+
from sentence_transformers import SentenceTransformer
|
|
25
|
+
self._model = SentenceTransformer(self._model_name, cache_folder=self._cache_folder)
|
|
26
|
+
except Exception as exc:
|
|
27
|
+
raise RuntimeError("sentence-transformers is unavailable for local embedding generation") from exc
|
|
28
|
+
return self._model
|
|
29
|
+
|
|
30
|
+
def embed(self, text: str) -> Sequence[float]:
|
|
31
|
+
"""This method returns a dense embedding vector from the local sentence-transformers model."""
|
|
32
|
+
|
|
33
|
+
model = self._get_model()
|
|
34
|
+
vector = model.encode(text, convert_to_numpy=False, normalize_embeddings=False)
|
|
35
|
+
return [float(value) for value in vector]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Adapters that expose query-vector generation through the read retrieval interface."""
|
|
2
|
+
|
|
3
|
+
from app.core.interfaces.embeddings import IEmbeddingProvider
|
|
4
|
+
from app.core.interfaces.retrieval import IVectorSearch
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class EmbeddingBackedVectorSearch(IVectorSearch):
|
|
8
|
+
"""Use the configured embedding provider as the read-path query-vector source."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, embedding_provider: IEmbeddingProvider) -> None:
|
|
11
|
+
"""Store the embedding provider used to build query vectors."""
|
|
12
|
+
|
|
13
|
+
self._embedding_provider = embedding_provider
|
|
14
|
+
|
|
15
|
+
def embed_query(self, text: str) -> list[float]:
|
|
16
|
+
"""Generate a query vector using the same embedding space as stored memories."""
|
|
17
|
+
|
|
18
|
+
return [float(value) for value in self._embedding_provider.embed(text)]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Episode-ingestion adapters for host transcript discovery and normalization."""
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
"""Claude Code transcript discovery and normalization helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterable, Sequence
|
|
6
|
+
import hashlib
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import re
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from app.periphery.episodes.tool_filter import should_keep_tool_result, summarize_tool_result
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def resolve_claude_code_transcript_path(
|
|
16
|
+
*,
|
|
17
|
+
host_session_key: str,
|
|
18
|
+
search_roots: Sequence[Path],
|
|
19
|
+
last_known_path: Path | None = None,
|
|
20
|
+
) -> Path:
|
|
21
|
+
"""Resolve one Claude Code transcript path from a CLI session id."""
|
|
22
|
+
|
|
23
|
+
if last_known_path is not None and last_known_path.exists():
|
|
24
|
+
return last_known_path
|
|
25
|
+
|
|
26
|
+
for root in search_roots:
|
|
27
|
+
if not root.exists():
|
|
28
|
+
continue
|
|
29
|
+
for metadata_path in _iter_metadata_files(root):
|
|
30
|
+
metadata = _read_metadata(metadata_path)
|
|
31
|
+
if metadata.get("cliSessionId") != host_session_key:
|
|
32
|
+
continue
|
|
33
|
+
transcript_path = _transcript_path_for_metadata(root=root, metadata=metadata)
|
|
34
|
+
if transcript_path.exists():
|
|
35
|
+
return transcript_path
|
|
36
|
+
|
|
37
|
+
for root in search_roots:
|
|
38
|
+
if not root.exists():
|
|
39
|
+
continue
|
|
40
|
+
matches = sorted(root.rglob(f"{host_session_key}.jsonl"))
|
|
41
|
+
if matches:
|
|
42
|
+
return max(matches, key=lambda path: path.stat().st_mtime)
|
|
43
|
+
|
|
44
|
+
raise FileNotFoundError(
|
|
45
|
+
f"Claude Code transcript source for session '{host_session_key}' could not be found."
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def find_latest_claude_code_session_for_repo(
|
|
50
|
+
*,
|
|
51
|
+
repo_root: Path,
|
|
52
|
+
search_roots: Sequence[Path],
|
|
53
|
+
) -> dict[str, Any] | None:
|
|
54
|
+
"""Return the most recently updated Claude Code session for one repo root."""
|
|
55
|
+
|
|
56
|
+
candidates = list_claude_code_sessions_for_repo(repo_root=repo_root, search_roots=search_roots)
|
|
57
|
+
if not candidates:
|
|
58
|
+
return None
|
|
59
|
+
return max(candidates, key=lambda candidate: candidate["updated_at"])
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def list_claude_code_sessions_for_repo(
|
|
63
|
+
*,
|
|
64
|
+
repo_root: Path,
|
|
65
|
+
search_roots: Sequence[Path],
|
|
66
|
+
) -> list[dict[str, Any]]:
|
|
67
|
+
"""Return all repo-matching Claude Code sessions under the bounded search roots."""
|
|
68
|
+
|
|
69
|
+
candidates: list[dict[str, Any]] = []
|
|
70
|
+
resolved_repo_root = repo_root.resolve()
|
|
71
|
+
for root in search_roots:
|
|
72
|
+
if not root.exists():
|
|
73
|
+
continue
|
|
74
|
+
for metadata_path in _iter_metadata_files(root):
|
|
75
|
+
metadata = _read_metadata(metadata_path)
|
|
76
|
+
cwd = metadata.get("cwd")
|
|
77
|
+
cli_session_id = metadata.get("cliSessionId")
|
|
78
|
+
if not isinstance(cwd, str) or not isinstance(cli_session_id, str):
|
|
79
|
+
continue
|
|
80
|
+
try:
|
|
81
|
+
if Path(cwd).resolve() != resolved_repo_root:
|
|
82
|
+
continue
|
|
83
|
+
except FileNotFoundError:
|
|
84
|
+
continue
|
|
85
|
+
transcript_path = _transcript_path_for_metadata(root=root, metadata=metadata)
|
|
86
|
+
if not transcript_path.exists():
|
|
87
|
+
continue
|
|
88
|
+
candidates.append(
|
|
89
|
+
{
|
|
90
|
+
"host_app": "claude_code",
|
|
91
|
+
"host_session_key": cli_session_id,
|
|
92
|
+
"transcript_path": transcript_path,
|
|
93
|
+
"updated_at": transcript_path.stat().st_mtime,
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
return candidates
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def normalize_claude_code_transcript(
|
|
100
|
+
*,
|
|
101
|
+
host_session_key: str,
|
|
102
|
+
transcript_path: Path,
|
|
103
|
+
) -> list[dict[str, Any]]:
|
|
104
|
+
"""Normalize one Claude Code transcript into shared compact event dictionaries."""
|
|
105
|
+
|
|
106
|
+
events: list[dict[str, Any]] = []
|
|
107
|
+
tool_uses: dict[str, dict[str, Any]] = {}
|
|
108
|
+
|
|
109
|
+
with transcript_path.open(encoding="utf-8") as handle:
|
|
110
|
+
for line_number, raw_line in enumerate(handle, start=1):
|
|
111
|
+
raw_line = raw_line.rstrip("\n")
|
|
112
|
+
if not raw_line:
|
|
113
|
+
continue
|
|
114
|
+
payload = json.loads(raw_line)
|
|
115
|
+
line_type = payload.get("type")
|
|
116
|
+
message = payload.get("message", {})
|
|
117
|
+
content = message.get("content")
|
|
118
|
+
|
|
119
|
+
if line_type == "assistant":
|
|
120
|
+
tool_uses.update(_collect_tool_uses(content))
|
|
121
|
+
text = _extract_claude_text(content)
|
|
122
|
+
if text:
|
|
123
|
+
events.append(
|
|
124
|
+
_build_event(
|
|
125
|
+
host_session_key=host_session_key,
|
|
126
|
+
host_event_key=_fallback_key(payload, raw_line, line_number),
|
|
127
|
+
source="assistant",
|
|
128
|
+
occurred_at=str(payload.get("timestamp") or ""),
|
|
129
|
+
content_kind="message",
|
|
130
|
+
content_text=text,
|
|
131
|
+
)
|
|
132
|
+
)
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
if line_type == "user":
|
|
136
|
+
text = _extract_user_text_message(payload)
|
|
137
|
+
if text:
|
|
138
|
+
events.append(
|
|
139
|
+
_build_event(
|
|
140
|
+
host_session_key=host_session_key,
|
|
141
|
+
host_event_key=_fallback_key(payload, raw_line, line_number),
|
|
142
|
+
source="user",
|
|
143
|
+
occurred_at=str(payload.get("timestamp") or ""),
|
|
144
|
+
content_kind="message",
|
|
145
|
+
content_text=text,
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
for tool_event in _normalize_tool_results(
|
|
149
|
+
payload,
|
|
150
|
+
host_session_key=host_session_key,
|
|
151
|
+
raw_line=raw_line,
|
|
152
|
+
line_number=line_number,
|
|
153
|
+
tool_uses=tool_uses,
|
|
154
|
+
):
|
|
155
|
+
events.append(tool_event)
|
|
156
|
+
|
|
157
|
+
return events
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _read_metadata(metadata_path: Path) -> dict[str, Any]:
|
|
161
|
+
"""Read one Claude Code local session metadata file."""
|
|
162
|
+
|
|
163
|
+
return json.loads(metadata_path.read_text(encoding="utf-8"))
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _iter_metadata_files(root: Path) -> Iterable[Path]:
|
|
167
|
+
"""Yield Claude Code local-session metadata files from bounded search roots."""
|
|
168
|
+
|
|
169
|
+
direct_root = root / "Library" / "Application Support" / "Claude" / "claude-code-sessions"
|
|
170
|
+
if direct_root.exists():
|
|
171
|
+
yield from direct_root.rglob("local_*.json")
|
|
172
|
+
return
|
|
173
|
+
yield from root.rglob("local_*.json")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _transcript_path_for_metadata(*, root: Path, metadata: dict[str, Any]) -> Path:
|
|
177
|
+
"""Resolve the transcript path described by one Claude Code metadata file."""
|
|
178
|
+
|
|
179
|
+
cwd = metadata.get("cwd")
|
|
180
|
+
cli_session_id = metadata.get("cliSessionId")
|
|
181
|
+
if not isinstance(cwd, str) or not isinstance(cli_session_id, str):
|
|
182
|
+
return root / "__missing__"
|
|
183
|
+
return root / ".claude" / "projects" / _encode_cwd(cwd) / f"{cli_session_id}.jsonl"
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _encode_cwd(cwd: str) -> str:
|
|
187
|
+
"""Match Claude Code's cwd-to-project-folder encoding."""
|
|
188
|
+
|
|
189
|
+
return cwd.replace("/", "-")
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _collect_tool_uses(content: Any) -> dict[str, dict[str, Any]]:
|
|
193
|
+
"""Extract tool-use metadata from one assistant content block."""
|
|
194
|
+
|
|
195
|
+
collected: dict[str, dict[str, Any]] = {}
|
|
196
|
+
if not isinstance(content, Iterable) or isinstance(content, (str, bytes)):
|
|
197
|
+
return collected
|
|
198
|
+
for item in content:
|
|
199
|
+
if not isinstance(item, dict) or item.get("type") != "tool_use":
|
|
200
|
+
continue
|
|
201
|
+
tool_id = item.get("id")
|
|
202
|
+
if isinstance(tool_id, str):
|
|
203
|
+
collected[tool_id] = item
|
|
204
|
+
return collected
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _extract_claude_text(content: Any) -> str:
|
|
208
|
+
"""Extract visible text items from one Claude Code message content block."""
|
|
209
|
+
|
|
210
|
+
if isinstance(content, str):
|
|
211
|
+
return content.strip()
|
|
212
|
+
if not isinstance(content, Iterable) or isinstance(content, (str, bytes)):
|
|
213
|
+
return ""
|
|
214
|
+
parts: list[str] = []
|
|
215
|
+
for item in content:
|
|
216
|
+
if not isinstance(item, dict):
|
|
217
|
+
continue
|
|
218
|
+
if item.get("type") != "text":
|
|
219
|
+
continue
|
|
220
|
+
text = item.get("text")
|
|
221
|
+
if isinstance(text, str) and text.strip():
|
|
222
|
+
parts.append(text.strip())
|
|
223
|
+
return "\n".join(parts).strip()
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _extract_user_text_message(payload: dict[str, Any]) -> str:
|
|
227
|
+
"""Extract a user-authored Claude Code message while skipping tool-result wrappers."""
|
|
228
|
+
|
|
229
|
+
message = payload.get("message", {})
|
|
230
|
+
content = message.get("content")
|
|
231
|
+
if isinstance(content, str):
|
|
232
|
+
return content.strip()
|
|
233
|
+
if not isinstance(content, Iterable) or isinstance(content, (str, bytes)):
|
|
234
|
+
return ""
|
|
235
|
+
if any(isinstance(item, dict) and item.get("type") == "tool_result" for item in content):
|
|
236
|
+
return ""
|
|
237
|
+
return _extract_claude_text(content)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _normalize_tool_results(
|
|
241
|
+
payload: dict[str, Any],
|
|
242
|
+
*,
|
|
243
|
+
host_session_key: str,
|
|
244
|
+
raw_line: str,
|
|
245
|
+
line_number: int,
|
|
246
|
+
tool_uses: dict[str, dict[str, Any]],
|
|
247
|
+
) -> list[dict[str, Any]]:
|
|
248
|
+
"""Normalize all meaningful tool results carried in one Claude Code user record."""
|
|
249
|
+
|
|
250
|
+
message = payload.get("message", {})
|
|
251
|
+
content = message.get("content")
|
|
252
|
+
if not isinstance(content, Iterable) or isinstance(content, (str, bytes)):
|
|
253
|
+
return []
|
|
254
|
+
|
|
255
|
+
events: list[dict[str, Any]] = []
|
|
256
|
+
for item in content:
|
|
257
|
+
if not isinstance(item, dict) or item.get("type") != "tool_result":
|
|
258
|
+
continue
|
|
259
|
+
tool_use_id = item.get("tool_use_id")
|
|
260
|
+
tool_use = tool_uses.get(str(tool_use_id)) if isinstance(tool_use_id, str) else None
|
|
261
|
+
tool_name = tool_use.get("name") if isinstance(tool_use, dict) else None
|
|
262
|
+
command = _tool_command(tool_use)
|
|
263
|
+
is_error = bool(item.get("is_error"))
|
|
264
|
+
text = _tool_result_text(item)
|
|
265
|
+
if not should_keep_tool_result(
|
|
266
|
+
tool_name=tool_name if isinstance(tool_name, str) else None,
|
|
267
|
+
status="error" if is_error else "ok",
|
|
268
|
+
text=text,
|
|
269
|
+
command=command,
|
|
270
|
+
is_error=is_error,
|
|
271
|
+
):
|
|
272
|
+
continue
|
|
273
|
+
host_event_key = str(payload.get("uuid") or _fallback_key(payload, raw_line, line_number))
|
|
274
|
+
if isinstance(tool_use_id, str):
|
|
275
|
+
host_event_key = tool_use_id
|
|
276
|
+
events.append(
|
|
277
|
+
_build_event(
|
|
278
|
+
host_session_key=host_session_key,
|
|
279
|
+
host_event_key=host_event_key,
|
|
280
|
+
source="tool",
|
|
281
|
+
occurred_at=str(payload.get("timestamp") or ""),
|
|
282
|
+
content_kind="tool_result",
|
|
283
|
+
content_text=summarize_tool_result(
|
|
284
|
+
tool_name=_normalized_tool_name(tool_name=tool_name, command=command),
|
|
285
|
+
status="error" if is_error else "ok",
|
|
286
|
+
text=text,
|
|
287
|
+
command=command,
|
|
288
|
+
is_error=is_error,
|
|
289
|
+
),
|
|
290
|
+
extra_fields={
|
|
291
|
+
"tool_name": _normalized_tool_name(tool_name=tool_name, command=command),
|
|
292
|
+
"status": "error" if is_error else _normalized_tool_status(text=text),
|
|
293
|
+
"is_error": is_error,
|
|
294
|
+
},
|
|
295
|
+
)
|
|
296
|
+
)
|
|
297
|
+
return events
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _tool_command(tool_use: dict[str, Any] | None) -> str | None:
|
|
301
|
+
"""Extract a shell command from a Claude Code tool use when present."""
|
|
302
|
+
|
|
303
|
+
if not isinstance(tool_use, dict):
|
|
304
|
+
return None
|
|
305
|
+
name = tool_use.get("name")
|
|
306
|
+
if name != "Bash":
|
|
307
|
+
return None
|
|
308
|
+
input_payload = tool_use.get("input", {})
|
|
309
|
+
if not isinstance(input_payload, dict):
|
|
310
|
+
return None
|
|
311
|
+
command = input_payload.get("command")
|
|
312
|
+
return command if isinstance(command, str) else None
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _tool_result_text(item: dict[str, Any]) -> str | None:
|
|
316
|
+
"""Extract tool-result text from Claude Code's multiple result layouts."""
|
|
317
|
+
|
|
318
|
+
text = item.get("text")
|
|
319
|
+
if isinstance(text, str):
|
|
320
|
+
return text
|
|
321
|
+
content = item.get("content")
|
|
322
|
+
if isinstance(content, str):
|
|
323
|
+
return content
|
|
324
|
+
if isinstance(content, list):
|
|
325
|
+
parts = [piece.get("text", "") for piece in content if isinstance(piece, dict)]
|
|
326
|
+
if parts:
|
|
327
|
+
return "\n".join(part for part in parts if part)
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _build_event(
|
|
332
|
+
*,
|
|
333
|
+
host_session_key: str,
|
|
334
|
+
host_event_key: str,
|
|
335
|
+
source: str,
|
|
336
|
+
occurred_at: str,
|
|
337
|
+
content_kind: str,
|
|
338
|
+
content_text: str,
|
|
339
|
+
extra_fields: dict[str, Any] | None = None,
|
|
340
|
+
) -> dict[str, Any]:
|
|
341
|
+
"""Construct one shared normalized Claude Code event payload."""
|
|
342
|
+
|
|
343
|
+
event = {
|
|
344
|
+
"host_app": "claude_code",
|
|
345
|
+
"host_session_key": host_session_key,
|
|
346
|
+
"host_event_key": host_event_key,
|
|
347
|
+
"source": source,
|
|
348
|
+
"occurred_at": occurred_at,
|
|
349
|
+
"content_kind": content_kind,
|
|
350
|
+
"content_text": content_text,
|
|
351
|
+
"raw_ref": f"claude_code://sessions/{host_session_key}#event={host_event_key}",
|
|
352
|
+
}
|
|
353
|
+
if extra_fields:
|
|
354
|
+
event.update(extra_fields)
|
|
355
|
+
return event
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _normalized_tool_name(*, tool_name: object, command: str | None) -> str:
|
|
359
|
+
"""Normalize Claude Code tool identifiers into stable analytics-friendly names."""
|
|
360
|
+
|
|
361
|
+
if isinstance(tool_name, str) and tool_name:
|
|
362
|
+
return "exec_command" if tool_name == "Bash" else tool_name
|
|
363
|
+
if command is not None:
|
|
364
|
+
return "exec_command"
|
|
365
|
+
return "exec_command"
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _normalized_tool_status(*, text: str | None) -> str:
|
|
369
|
+
"""Infer a stable ok/error status for Claude tool results without explicit status."""
|
|
370
|
+
|
|
371
|
+
lowered = (text or "").lower()
|
|
372
|
+
if any(token in lowered for token in ("failed", "error", "exception")):
|
|
373
|
+
return "error"
|
|
374
|
+
match = re.search(r"process exited with code (\d+)", text or "", re.IGNORECASE)
|
|
375
|
+
if match is not None and int(match.group(1)) != 0:
|
|
376
|
+
return "error"
|
|
377
|
+
return "ok"
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _fallback_key(payload: dict[str, Any], raw_line: str, line_number: int) -> str:
|
|
381
|
+
"""Build a stable upstream event key when Claude Code does not expose one directly."""
|
|
382
|
+
|
|
383
|
+
explicit = payload.get("uuid")
|
|
384
|
+
if isinstance(explicit, str) and explicit:
|
|
385
|
+
return explicit
|
|
386
|
+
digest = hashlib.sha1(raw_line.encode("utf-8"), usedforsecurity=False).hexdigest()[:16]
|
|
387
|
+
return f"claude-line-{line_number}-{digest}"
|