contextbase-plugin-codex-local 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextbase_plugin_codex_local-0.2.9.dist-info/METADATA +12 -0
- contextbase_plugin_codex_local-0.2.9.dist-info/RECORD +16 -0
- contextbase_plugin_codex_local-0.2.9.dist-info/WHEEL +4 -0
- plugin_codex_local/__init__.py +0 -0
- plugin_codex_local/binding_config.py +13 -0
- plugin_codex_local/component.py +110 -0
- plugin_codex_local/defs/__init__.py +0 -0
- plugin_codex_local/defs/defs.yaml +1 -0
- plugin_codex_local/models/__init__.py +0 -0
- plugin_codex_local/models/ctx.py +21 -0
- plugin_codex_local/models/translators.py +70 -0
- plugin_codex_local/plugin.json +7 -0
- plugin_codex_local/sources/__init__.py +0 -0
- plugin_codex_local/sources/snapshot.py +72 -0
- plugin_codex_local/utils/__init__.py +0 -0
- plugin_codex_local/utils/parse.py +177 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: contextbase-plugin-codex-local
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: Codex local plugin for ContextBase
|
|
5
|
+
Author: Alizain Feerasta
|
|
6
|
+
Author-email: Alizain Feerasta <alizain.feerasta@gmail.com>
|
|
7
|
+
Requires-Dist: contextbase-shared-plugins==0.2.9
|
|
8
|
+
Requires-Dist: dagster==1.12.14
|
|
9
|
+
Requires-Dist: dagster-dlt==0.28.14
|
|
10
|
+
Requires-Dist: dlt>=1.26.0
|
|
11
|
+
Requires-Dist: pydantic>=2.12.0
|
|
12
|
+
Requires-Python: >=3.14, <3.15
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
plugin_codex_local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
plugin_codex_local/binding_config.py,sha256=O0XCVcuYAgPL0ETBUkFHbWgWn-NPH1cMupLemxnzjGw,319
|
|
3
|
+
plugin_codex_local/component.py,sha256=DfrjukSu9wu062gk5pAFRatFfmUT20T29JvIkadTyQs,3730
|
|
4
|
+
plugin_codex_local/defs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
plugin_codex_local/defs/defs.yaml,sha256=v4A5kPSPPZEEI5QfIaavAo6_1CnKGm7io9mlI8ZcpSU,59
|
|
6
|
+
plugin_codex_local/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
plugin_codex_local/models/ctx.py,sha256=UWab_pHMFEyTBDmyqekphE-UQP87b7Vi8O5p0sMtjQs,483
|
|
8
|
+
plugin_codex_local/models/translators.py,sha256=2Ejj_WrPhoFAZI6uQ_PUkzUg0j9ITIZVyORdoMeTZoA,2108
|
|
9
|
+
plugin_codex_local/plugin.json,sha256=ghXsEmoqR3tdHjTxlPBqUpQ4MhaMahxrTXo960-HHaI,84
|
|
10
|
+
plugin_codex_local/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
plugin_codex_local/sources/snapshot.py,sha256=VTHP20xkVQYAtsRtmK1PHruFIOeHvh9eI7DclQUMHIY,2111
|
|
12
|
+
plugin_codex_local/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
plugin_codex_local/utils/parse.py,sha256=h8TxTH_hJmE1vSgGJbu5aWjqeHT1LgrhNHAFj53NufU,5309
|
|
14
|
+
contextbase_plugin_codex_local-0.2.9.dist-info/WHEEL,sha256=i9aSRDivn5iP9LaR1BLQX2GNAuriQWPsFwbbWygTX2k,81
|
|
15
|
+
contextbase_plugin_codex_local-0.2.9.dist-info/METADATA,sha256=xUFnrzSCJads87VUEdPTar4hwMJAf4kadpmtDOLnPoE,410
|
|
16
|
+
contextbase_plugin_codex_local-0.2.9.dist-info/RECORD,,
|
|
File without changes
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
|
|
7
|
+
from shared_plugins.bindings import BaseBindingConfigModel, ResolvedPath
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CodexLocalBindingConfig(BaseBindingConfigModel):
|
|
11
|
+
codex_dir: ResolvedPath = Field(
|
|
12
|
+
default_factory=lambda: Path.home() / ".codex",
|
|
13
|
+
)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import dagster as dg
|
|
2
|
+
from dagster import AssetExecutionContext
|
|
3
|
+
from dagster_dlt import DagsterDltResource
|
|
4
|
+
from shared_plugins.automation import non_overlapping_automation_condition
|
|
5
|
+
from shared_plugins.bindings import parse_binding_config
|
|
6
|
+
from shared_plugins.control_plane import ControlPlaneClient
|
|
7
|
+
from shared_plugins.dlt import resolve_partition_binding, run_dlt_pipeline
|
|
8
|
+
from shared_plugins.naming import (
|
|
9
|
+
dagster_asset_group_name,
|
|
10
|
+
dagster_asset_tags,
|
|
11
|
+
dagster_dlt_asset_key,
|
|
12
|
+
dagster_partition_def_name,
|
|
13
|
+
dagster_pool_name,
|
|
14
|
+
dlt_source_name,
|
|
15
|
+
plugin_id_from_module,
|
|
16
|
+
)
|
|
17
|
+
from shared_plugins.resources import DLT_RESOURCE
|
|
18
|
+
|
|
19
|
+
from .binding_config import CodexLocalBindingConfig
|
|
20
|
+
from .sources.snapshot import codex_local_snapshot_source
|
|
21
|
+
|
|
22
|
+
PLUGIN_ID = plugin_id_from_module(__file__)
|
|
23
|
+
SNAPSHOT_JOB = "snapshot"
|
|
24
|
+
SNAPSHOT_SOURCE_NAME = dlt_source_name(PLUGIN_ID, SNAPSHOT_JOB)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _build_snapshot_specs(
|
|
28
|
+
partitions_def: dg.PartitionsDefinition,
|
|
29
|
+
automation_condition: dg.AutomationCondition,
|
|
30
|
+
) -> list[dg.AssetSpec]:
|
|
31
|
+
shared = dict(
|
|
32
|
+
group_name=dagster_asset_group_name(PLUGIN_ID),
|
|
33
|
+
tags=dagster_asset_tags(PLUGIN_ID),
|
|
34
|
+
automation_condition=automation_condition,
|
|
35
|
+
partitions_def=partitions_def,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
session_key = dagster_dlt_asset_key(SNAPSHOT_SOURCE_NAME, "session")
|
|
39
|
+
record_key = dagster_dlt_asset_key(SNAPSHOT_SOURCE_NAME, "record")
|
|
40
|
+
|
|
41
|
+
return [
|
|
42
|
+
dg.AssetSpec(
|
|
43
|
+
key=session_key,
|
|
44
|
+
**shared,
|
|
45
|
+
),
|
|
46
|
+
dg.AssetSpec(
|
|
47
|
+
key=record_key,
|
|
48
|
+
deps=[session_key],
|
|
49
|
+
**shared,
|
|
50
|
+
),
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class CodexLocalSyncComponent(dg.Component):
|
|
55
|
+
def build_defs(self, context: dg.ComponentLoadContext) -> dg.Definitions:
|
|
56
|
+
partitions_def = dg.DynamicPartitionsDefinition(
|
|
57
|
+
name=dagster_partition_def_name(PLUGIN_ID)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
snapshot_specs = _build_snapshot_specs(
|
|
61
|
+
partitions_def=partitions_def,
|
|
62
|
+
automation_condition=non_overlapping_automation_condition(
|
|
63
|
+
dg.AutomationCondition.on_missing()
|
|
64
|
+
| dg.AutomationCondition.on_cron("*/15 * * * *")
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
@dg.multi_asset(
|
|
69
|
+
specs=snapshot_specs,
|
|
70
|
+
can_subset=True,
|
|
71
|
+
name="codex_local_snapshot",
|
|
72
|
+
pool=dagster_pool_name(PLUGIN_ID),
|
|
73
|
+
)
|
|
74
|
+
def codex_local_snapshot_assets(
|
|
75
|
+
context: AssetExecutionContext,
|
|
76
|
+
dlt_resource: DagsterDltResource,
|
|
77
|
+
control_plane: dg.ResourceParam[ControlPlaneClient],
|
|
78
|
+
):
|
|
79
|
+
binding = resolve_partition_binding(
|
|
80
|
+
context=context,
|
|
81
|
+
control_plane=control_plane,
|
|
82
|
+
plugin_id=PLUGIN_ID,
|
|
83
|
+
)
|
|
84
|
+
binding_id = str(binding.binding_id)
|
|
85
|
+
cfg = parse_binding_config(binding, CodexLocalBindingConfig)
|
|
86
|
+
|
|
87
|
+
source = codex_local_snapshot_source(binding_id, cfg)
|
|
88
|
+
yield from run_dlt_pipeline(
|
|
89
|
+
context=context,
|
|
90
|
+
dlt_resource=dlt_resource,
|
|
91
|
+
source=source,
|
|
92
|
+
plugin_id=PLUGIN_ID,
|
|
93
|
+
binding_id=binding_id,
|
|
94
|
+
job_name=SNAPSHOT_JOB,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
automation_sensor = dg.AutomationConditionSensorDefinition(
|
|
98
|
+
name="codex_local_automation_sensor",
|
|
99
|
+
target=dg.AssetSelection.assets(codex_local_snapshot_assets),
|
|
100
|
+
default_status=dg.DefaultSensorStatus.RUNNING,
|
|
101
|
+
minimum_interval_seconds=30,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return dg.Definitions(
|
|
105
|
+
assets=[codex_local_snapshot_assets],
|
|
106
|
+
sensors=[automation_sensor],
|
|
107
|
+
resources={
|
|
108
|
+
"dlt_resource": DLT_RESOURCE,
|
|
109
|
+
},
|
|
110
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
type: plugin_codex_local.component.CodexLocalSyncComponent
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pydantic import AwareDatetime
|
|
6
|
+
from shared_plugins.models import CtxModel, IdStr, NonNegativeInt
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SessionRow(CtxModel):
|
|
10
|
+
file_path: IdStr
|
|
11
|
+
is_archived: bool
|
|
12
|
+
file_mtime: AwareDatetime | None = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RecordRow(CtxModel):
|
|
16
|
+
file_path: IdStr
|
|
17
|
+
line_index: NonNegativeInt
|
|
18
|
+
record_type: str
|
|
19
|
+
payload_type: str | None = None
|
|
20
|
+
timestamp: AwareDatetime | None = None
|
|
21
|
+
payload: Any = None
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable, Iterator
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from shared_plugins.values import parse_utc_datetime_from_str
|
|
7
|
+
|
|
8
|
+
from ..utils.parse import ParsedSession
|
|
9
|
+
from .ctx import RecordRow, SessionRow
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _strip_null_bytes(value: Any) -> Any:
|
|
13
|
+
"""Recursively strip \\x00 null bytes from strings in a structure.
|
|
14
|
+
|
|
15
|
+
Postgres rejects \\u0000 in both text and jsonb columns.
|
|
16
|
+
"""
|
|
17
|
+
if isinstance(value, str):
|
|
18
|
+
return value.replace("\x00", "")
|
|
19
|
+
if isinstance(value, dict):
|
|
20
|
+
return {k: _strip_null_bytes(v) for k, v in value.items()}
|
|
21
|
+
if isinstance(value, list):
|
|
22
|
+
return [_strip_null_bytes(item) for item in value]
|
|
23
|
+
return value
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def sessions_to_ctx_models(
|
|
27
|
+
*,
|
|
28
|
+
binding_id: str,
|
|
29
|
+
snapshots: Iterable[ParsedSession],
|
|
30
|
+
) -> Iterator[SessionRow]:
|
|
31
|
+
"""Yield one SessionRow per parsed session file."""
|
|
32
|
+
for snapshot in snapshots:
|
|
33
|
+
yield SessionRow(
|
|
34
|
+
ctx_binding_id=binding_id,
|
|
35
|
+
ctx_source_updated_at=snapshot.file_mtime,
|
|
36
|
+
file_path=snapshot.file_path,
|
|
37
|
+
is_archived=snapshot.is_archived,
|
|
38
|
+
file_mtime=snapshot.file_mtime,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def records_to_ctx_models(
|
|
43
|
+
*,
|
|
44
|
+
binding_id: str,
|
|
45
|
+
snapshots: Iterable[ParsedSession],
|
|
46
|
+
) -> Iterator[RecordRow]:
|
|
47
|
+
"""Yield one RecordRow per valid JSONL line across all sessions."""
|
|
48
|
+
for snapshot in snapshots:
|
|
49
|
+
for line in snapshot.lines:
|
|
50
|
+
if line.record_type is None:
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
ts = (
|
|
54
|
+
parse_utc_datetime_from_str(line.timestamp_raw)
|
|
55
|
+
if line.timestamp_raw
|
|
56
|
+
else None
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
payload = _strip_null_bytes(line.payload)
|
|
60
|
+
|
|
61
|
+
yield RecordRow(
|
|
62
|
+
ctx_binding_id=binding_id,
|
|
63
|
+
ctx_source_updated_at=ts,
|
|
64
|
+
file_path=snapshot.file_path,
|
|
65
|
+
line_index=line.line_index,
|
|
66
|
+
record_type=line.record_type,
|
|
67
|
+
payload_type=line.payload_type,
|
|
68
|
+
timestamp=ts,
|
|
69
|
+
payload=payload,
|
|
70
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterator
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import dlt
|
|
7
|
+
from shared_plugins.naming import (
|
|
8
|
+
dlt_resource_name,
|
|
9
|
+
dlt_source_name,
|
|
10
|
+
plugin_id_from_module,
|
|
11
|
+
)
|
|
12
|
+
from shared_plugins.resources import ctx_dlt_resource
|
|
13
|
+
|
|
14
|
+
from ..binding_config import CodexLocalBindingConfig
|
|
15
|
+
from ..models.ctx import RecordRow, SessionRow
|
|
16
|
+
from ..models.translators import (
|
|
17
|
+
records_to_ctx_models,
|
|
18
|
+
sessions_to_ctx_models,
|
|
19
|
+
)
|
|
20
|
+
from ..utils.parse import parse_session_snapshots
|
|
21
|
+
|
|
22
|
+
PLUGIN_ID = plugin_id_from_module(__file__)
|
|
23
|
+
JOB = "snapshot"
|
|
24
|
+
MERGE_WRITE_DISPOSITION = {"disposition": "merge", "strategy": "delete-insert"}
|
|
25
|
+
MERGE_KEY = ("_ctx_binding_id",)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dlt.source(name=dlt_source_name(PLUGIN_ID, JOB))
|
|
29
|
+
def codex_local_snapshot_source(
|
|
30
|
+
binding_id: str,
|
|
31
|
+
cfg: CodexLocalBindingConfig,
|
|
32
|
+
) -> tuple[Any, ...]:
|
|
33
|
+
sessions_dir = cfg.codex_dir / "sessions"
|
|
34
|
+
archived_dir = cfg.codex_dir / "archived_sessions"
|
|
35
|
+
|
|
36
|
+
@ctx_dlt_resource(
|
|
37
|
+
name=dlt_resource_name("session"),
|
|
38
|
+
write_disposition=MERGE_WRITE_DISPOSITION,
|
|
39
|
+
merge_key=MERGE_KEY,
|
|
40
|
+
primary_key=("_ctx_binding_id", "file_path"),
|
|
41
|
+
)
|
|
42
|
+
def session_resource() -> Iterator[SessionRow]:
|
|
43
|
+
snapshots = parse_session_snapshots(
|
|
44
|
+
sessions_dir=sessions_dir,
|
|
45
|
+
archived_dir=archived_dir,
|
|
46
|
+
)
|
|
47
|
+
yield from sessions_to_ctx_models(
|
|
48
|
+
binding_id=binding_id,
|
|
49
|
+
snapshots=snapshots,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@ctx_dlt_resource(
|
|
53
|
+
name=dlt_resource_name("record"),
|
|
54
|
+
write_disposition=MERGE_WRITE_DISPOSITION,
|
|
55
|
+
merge_key=MERGE_KEY,
|
|
56
|
+
primary_key=("_ctx_binding_id", "file_path", "line_index"),
|
|
57
|
+
columns={"payload": {"data_type": "json"}},
|
|
58
|
+
)
|
|
59
|
+
def record_resource() -> Iterator[RecordRow]:
|
|
60
|
+
snapshots = parse_session_snapshots(
|
|
61
|
+
sessions_dir=sessions_dir,
|
|
62
|
+
archived_dir=archived_dir,
|
|
63
|
+
)
|
|
64
|
+
yield from records_to_ctx_models(
|
|
65
|
+
binding_id=binding_id,
|
|
66
|
+
snapshots=snapshots,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
return (
|
|
70
|
+
session_resource,
|
|
71
|
+
record_resource,
|
|
72
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from shared_plugins.values import as_mapping, as_string
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class ParsedLine:
|
|
15
|
+
"""A single parsed JSONL line."""
|
|
16
|
+
|
|
17
|
+
line_index: int
|
|
18
|
+
record_type: str | None
|
|
19
|
+
payload_type: str | None
|
|
20
|
+
timestamp_raw: str | None
|
|
21
|
+
payload: Any
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class SessionFileRef:
|
|
26
|
+
"""Filesystem reference to a JSONL file before parsing."""
|
|
27
|
+
|
|
28
|
+
file_path: str
|
|
29
|
+
is_archived: bool
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class ParsedSession:
|
|
34
|
+
"""A fully parsed session file."""
|
|
35
|
+
|
|
36
|
+
file_path: str
|
|
37
|
+
is_archived: bool
|
|
38
|
+
file_mtime: datetime | None
|
|
39
|
+
lines: list[ParsedLine]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _parse_json_line(
|
|
43
|
+
line: str,
|
|
44
|
+
) -> tuple[str | None, str | None, str | None, Any]:
|
|
45
|
+
"""Parse a single JSON line into (record_type, payload_type, timestamp_raw, payload).
|
|
46
|
+
|
|
47
|
+
Returns (None, None, None, None) if the line cannot be parsed or lacks a type field.
|
|
48
|
+
"""
|
|
49
|
+
try:
|
|
50
|
+
parsed = json.loads(line)
|
|
51
|
+
except (json.JSONDecodeError, ValueError):
|
|
52
|
+
return None, None, None, None
|
|
53
|
+
|
|
54
|
+
obj = as_mapping(parsed)
|
|
55
|
+
if obj is None:
|
|
56
|
+
return None, None, None, None
|
|
57
|
+
|
|
58
|
+
record_type = as_string(obj.get("type"))
|
|
59
|
+
if record_type is None:
|
|
60
|
+
return None, None, None, None
|
|
61
|
+
|
|
62
|
+
payload = obj.get("payload")
|
|
63
|
+
|
|
64
|
+
payload_type: str | None = None
|
|
65
|
+
payload_mapping = as_mapping(payload)
|
|
66
|
+
if payload_mapping is not None:
|
|
67
|
+
payload_type = as_string(payload_mapping.get("type"))
|
|
68
|
+
|
|
69
|
+
timestamp_raw = as_string(obj.get("timestamp"))
|
|
70
|
+
|
|
71
|
+
return record_type, payload_type, timestamp_raw, payload
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _scan_jsonl_files_recursive(root_dir: Path) -> list[str]:
|
|
75
|
+
"""Recursively list all .jsonl files under a directory."""
|
|
76
|
+
if not root_dir.exists():
|
|
77
|
+
return []
|
|
78
|
+
if not root_dir.is_dir():
|
|
79
|
+
raise RuntimeError(f"Codex session path is not a directory: '{root_dir}'.")
|
|
80
|
+
|
|
81
|
+
files: list[str] = []
|
|
82
|
+
stack: list[Path] = [root_dir]
|
|
83
|
+
while stack:
|
|
84
|
+
current = stack.pop()
|
|
85
|
+
try:
|
|
86
|
+
entries = list(os.scandir(current))
|
|
87
|
+
except OSError as exc:
|
|
88
|
+
raise RuntimeError(
|
|
89
|
+
f"Failed to scan Codex session directory '{current}'."
|
|
90
|
+
) from exc
|
|
91
|
+
for entry in entries:
|
|
92
|
+
try:
|
|
93
|
+
is_dir = entry.is_dir(follow_symlinks=False)
|
|
94
|
+
is_file = entry.is_file(follow_symlinks=False)
|
|
95
|
+
except OSError as exc:
|
|
96
|
+
raise RuntimeError(
|
|
97
|
+
f"Failed to inspect Codex session path '{entry.path}'."
|
|
98
|
+
) from exc
|
|
99
|
+
|
|
100
|
+
if is_dir:
|
|
101
|
+
stack.append(Path(entry.path))
|
|
102
|
+
elif is_file and entry.name.lower().endswith(".jsonl"):
|
|
103
|
+
files.append(entry.path)
|
|
104
|
+
return files
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _list_jsonl_files(
|
|
108
|
+
sessions_dir: Path,
|
|
109
|
+
archived_dir: Path,
|
|
110
|
+
) -> list[SessionFileRef]:
|
|
111
|
+
"""List all JSONL files from sessions and archived directories, deduped by path."""
|
|
112
|
+
unique: dict[str, SessionFileRef] = {}
|
|
113
|
+
for fp in _scan_jsonl_files_recursive(sessions_dir):
|
|
114
|
+
unique.setdefault(fp, SessionFileRef(file_path=fp, is_archived=False))
|
|
115
|
+
for fp in _scan_jsonl_files_recursive(archived_dir):
|
|
116
|
+
unique.setdefault(fp, SessionFileRef(file_path=fp, is_archived=True))
|
|
117
|
+
return [unique[k] for k in sorted(unique)]
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _get_file_mtime(file_path: str) -> datetime | None:
|
|
121
|
+
"""Return the file's modification time as UTC datetime."""
|
|
122
|
+
try:
|
|
123
|
+
stat_result = Path(file_path).stat()
|
|
124
|
+
except OSError as exc:
|
|
125
|
+
raise RuntimeError(f"Failed to stat Codex session file '{file_path}'.") from exc
|
|
126
|
+
return datetime.fromtimestamp(stat_result.st_mtime, tz=timezone.utc)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _parse_session_file(file_ref: SessionFileRef) -> ParsedSession:
|
|
130
|
+
"""Parse a single JSONL session file.
|
|
131
|
+
|
|
132
|
+
Every discovered file is a session snapshot; unreadable files fail loudly.
|
|
133
|
+
"""
|
|
134
|
+
try:
|
|
135
|
+
with open(file_ref.file_path, "r", encoding="utf-8") as handle:
|
|
136
|
+
raw_lines = handle.readlines()
|
|
137
|
+
except (OSError, UnicodeError) as exc:
|
|
138
|
+
raise RuntimeError(
|
|
139
|
+
f"Failed to read Codex session file '{file_ref.file_path}'."
|
|
140
|
+
) from exc
|
|
141
|
+
|
|
142
|
+
parsed_lines: list[ParsedLine] = []
|
|
143
|
+
|
|
144
|
+
for idx, raw_line in enumerate(raw_lines):
|
|
145
|
+
stripped = raw_line.strip()
|
|
146
|
+
if not stripped:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
record_type, payload_type, timestamp_raw, payload = _parse_json_line(stripped)
|
|
150
|
+
|
|
151
|
+
parsed_lines.append(
|
|
152
|
+
ParsedLine(
|
|
153
|
+
line_index=idx,
|
|
154
|
+
record_type=record_type,
|
|
155
|
+
payload_type=payload_type,
|
|
156
|
+
timestamp_raw=timestamp_raw,
|
|
157
|
+
payload=payload,
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return ParsedSession(
|
|
162
|
+
file_path=file_ref.file_path,
|
|
163
|
+
is_archived=file_ref.is_archived,
|
|
164
|
+
file_mtime=_get_file_mtime(file_ref.file_path),
|
|
165
|
+
lines=parsed_lines,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def parse_session_snapshots(
|
|
170
|
+
sessions_dir: Path,
|
|
171
|
+
archived_dir: Path,
|
|
172
|
+
) -> list[ParsedSession]:
|
|
173
|
+
"""Scan and parse all Codex session files from both directories."""
|
|
174
|
+
return [
|
|
175
|
+
_parse_session_file(file_ref)
|
|
176
|
+
for file_ref in _list_jsonl_files(sessions_dir, archived_dir)
|
|
177
|
+
]
|