contextbase-plugin-codex-local 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.3
2
+ Name: contextbase-plugin-codex-local
3
+ Version: 0.2.9
4
+ Summary: Codex local plugin for ContextBase
5
+ Author: Alizain Feerasta
6
+ Author-email: Alizain Feerasta <alizain.feerasta@gmail.com>
7
+ Requires-Dist: contextbase-shared-plugins==0.2.9
8
+ Requires-Dist: dagster==1.12.14
9
+ Requires-Dist: dagster-dlt==0.28.14
10
+ Requires-Dist: dlt>=1.26.0
11
+ Requires-Dist: pydantic>=2.12.0
12
+ Requires-Python: >=3.14, <3.15
@@ -0,0 +1,16 @@
1
+ plugin_codex_local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ plugin_codex_local/binding_config.py,sha256=O0XCVcuYAgPL0ETBUkFHbWgWn-NPH1cMupLemxnzjGw,319
3
+ plugin_codex_local/component.py,sha256=DfrjukSu9wu062gk5pAFRatFfmUT20T29JvIkadTyQs,3730
4
+ plugin_codex_local/defs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ plugin_codex_local/defs/defs.yaml,sha256=v4A5kPSPPZEEI5QfIaavAo6_1CnKGm7io9mlI8ZcpSU,59
6
+ plugin_codex_local/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ plugin_codex_local/models/ctx.py,sha256=UWab_pHMFEyTBDmyqekphE-UQP87b7Vi8O5p0sMtjQs,483
8
+ plugin_codex_local/models/translators.py,sha256=2Ejj_WrPhoFAZI6uQ_PUkzUg0j9ITIZVyORdoMeTZoA,2108
9
+ plugin_codex_local/plugin.json,sha256=ghXsEmoqR3tdHjTxlPBqUpQ4MhaMahxrTXo960-HHaI,84
10
+ plugin_codex_local/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ plugin_codex_local/sources/snapshot.py,sha256=VTHP20xkVQYAtsRtmK1PHruFIOeHvh9eI7DclQUMHIY,2111
12
+ plugin_codex_local/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ plugin_codex_local/utils/parse.py,sha256=h8TxTH_hJmE1vSgGJbu5aWjqeHT1LgrhNHAFj53NufU,5309
14
+ contextbase_plugin_codex_local-0.2.9.dist-info/WHEEL,sha256=i9aSRDivn5iP9LaR1BLQX2GNAuriQWPsFwbbWygTX2k,81
15
+ contextbase_plugin_codex_local-0.2.9.dist-info/METADATA,sha256=xUFnrzSCJads87VUEdPTar4hwMJAf4kadpmtDOLnPoE,410
16
+ contextbase_plugin_codex_local-0.2.9.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.11.15
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
File without changes
@@ -0,0 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from pydantic import Field
6
+
7
+ from shared_plugins.bindings import BaseBindingConfigModel, ResolvedPath
8
+
9
+
10
+ class CodexLocalBindingConfig(BaseBindingConfigModel):
11
+ codex_dir: ResolvedPath = Field(
12
+ default_factory=lambda: Path.home() / ".codex",
13
+ )
@@ -0,0 +1,110 @@
1
+ import dagster as dg
2
+ from dagster import AssetExecutionContext
3
+ from dagster_dlt import DagsterDltResource
4
+ from shared_plugins.automation import non_overlapping_automation_condition
5
+ from shared_plugins.bindings import parse_binding_config
6
+ from shared_plugins.control_plane import ControlPlaneClient
7
+ from shared_plugins.dlt import resolve_partition_binding, run_dlt_pipeline
8
+ from shared_plugins.naming import (
9
+ dagster_asset_group_name,
10
+ dagster_asset_tags,
11
+ dagster_dlt_asset_key,
12
+ dagster_partition_def_name,
13
+ dagster_pool_name,
14
+ dlt_source_name,
15
+ plugin_id_from_module,
16
+ )
17
+ from shared_plugins.resources import DLT_RESOURCE
18
+
19
+ from .binding_config import CodexLocalBindingConfig
20
+ from .sources.snapshot import codex_local_snapshot_source
21
+
22
+ PLUGIN_ID = plugin_id_from_module(__file__)
23
+ SNAPSHOT_JOB = "snapshot"
24
+ SNAPSHOT_SOURCE_NAME = dlt_source_name(PLUGIN_ID, SNAPSHOT_JOB)
25
+
26
+
27
+ def _build_snapshot_specs(
28
+ partitions_def: dg.PartitionsDefinition,
29
+ automation_condition: dg.AutomationCondition,
30
+ ) -> list[dg.AssetSpec]:
31
+ shared = dict(
32
+ group_name=dagster_asset_group_name(PLUGIN_ID),
33
+ tags=dagster_asset_tags(PLUGIN_ID),
34
+ automation_condition=automation_condition,
35
+ partitions_def=partitions_def,
36
+ )
37
+
38
+ session_key = dagster_dlt_asset_key(SNAPSHOT_SOURCE_NAME, "session")
39
+ record_key = dagster_dlt_asset_key(SNAPSHOT_SOURCE_NAME, "record")
40
+
41
+ return [
42
+ dg.AssetSpec(
43
+ key=session_key,
44
+ **shared,
45
+ ),
46
+ dg.AssetSpec(
47
+ key=record_key,
48
+ deps=[session_key],
49
+ **shared,
50
+ ),
51
+ ]
52
+
53
+
54
+ class CodexLocalSyncComponent(dg.Component):
55
+ def build_defs(self, context: dg.ComponentLoadContext) -> dg.Definitions:
56
+ partitions_def = dg.DynamicPartitionsDefinition(
57
+ name=dagster_partition_def_name(PLUGIN_ID)
58
+ )
59
+
60
+ snapshot_specs = _build_snapshot_specs(
61
+ partitions_def=partitions_def,
62
+ automation_condition=non_overlapping_automation_condition(
63
+ dg.AutomationCondition.on_missing()
64
+ | dg.AutomationCondition.on_cron("*/15 * * * *")
65
+ ),
66
+ )
67
+
68
+ @dg.multi_asset(
69
+ specs=snapshot_specs,
70
+ can_subset=True,
71
+ name="codex_local_snapshot",
72
+ pool=dagster_pool_name(PLUGIN_ID),
73
+ )
74
+ def codex_local_snapshot_assets(
75
+ context: AssetExecutionContext,
76
+ dlt_resource: DagsterDltResource,
77
+ control_plane: dg.ResourceParam[ControlPlaneClient],
78
+ ):
79
+ binding = resolve_partition_binding(
80
+ context=context,
81
+ control_plane=control_plane,
82
+ plugin_id=PLUGIN_ID,
83
+ )
84
+ binding_id = str(binding.binding_id)
85
+ cfg = parse_binding_config(binding, CodexLocalBindingConfig)
86
+
87
+ source = codex_local_snapshot_source(binding_id, cfg)
88
+ yield from run_dlt_pipeline(
89
+ context=context,
90
+ dlt_resource=dlt_resource,
91
+ source=source,
92
+ plugin_id=PLUGIN_ID,
93
+ binding_id=binding_id,
94
+ job_name=SNAPSHOT_JOB,
95
+ )
96
+
97
+ automation_sensor = dg.AutomationConditionSensorDefinition(
98
+ name="codex_local_automation_sensor",
99
+ target=dg.AssetSelection.assets(codex_local_snapshot_assets),
100
+ default_status=dg.DefaultSensorStatus.RUNNING,
101
+ minimum_interval_seconds=30,
102
+ )
103
+
104
+ return dg.Definitions(
105
+ assets=[codex_local_snapshot_assets],
106
+ sensors=[automation_sensor],
107
+ resources={
108
+ "dlt_resource": DLT_RESOURCE,
109
+ },
110
+ )
File without changes
@@ -0,0 +1 @@
1
+ type: plugin_codex_local.component.CodexLocalSyncComponent
File without changes
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from pydantic import AwareDatetime
6
+ from shared_plugins.models import CtxModel, IdStr, NonNegativeInt
7
+
8
+
9
+ class SessionRow(CtxModel):
10
+ file_path: IdStr
11
+ is_archived: bool
12
+ file_mtime: AwareDatetime | None = None
13
+
14
+
15
+ class RecordRow(CtxModel):
16
+ file_path: IdStr
17
+ line_index: NonNegativeInt
18
+ record_type: str
19
+ payload_type: str | None = None
20
+ timestamp: AwareDatetime | None = None
21
+ payload: Any = None
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Iterator
4
+ from typing import Any
5
+
6
+ from shared_plugins.values import parse_utc_datetime_from_str
7
+
8
+ from ..utils.parse import ParsedSession
9
+ from .ctx import RecordRow, SessionRow
10
+
11
+
12
+ def _strip_null_bytes(value: Any) -> Any:
13
+ """Recursively strip \\x00 null bytes from strings in a structure.
14
+
15
+ Postgres rejects \\u0000 in both text and jsonb columns.
16
+ """
17
+ if isinstance(value, str):
18
+ return value.replace("\x00", "")
19
+ if isinstance(value, dict):
20
+ return {k: _strip_null_bytes(v) for k, v in value.items()}
21
+ if isinstance(value, list):
22
+ return [_strip_null_bytes(item) for item in value]
23
+ return value
24
+
25
+
26
+ def sessions_to_ctx_models(
27
+ *,
28
+ binding_id: str,
29
+ snapshots: Iterable[ParsedSession],
30
+ ) -> Iterator[SessionRow]:
31
+ """Yield one SessionRow per parsed session file."""
32
+ for snapshot in snapshots:
33
+ yield SessionRow(
34
+ ctx_binding_id=binding_id,
35
+ ctx_source_updated_at=snapshot.file_mtime,
36
+ file_path=snapshot.file_path,
37
+ is_archived=snapshot.is_archived,
38
+ file_mtime=snapshot.file_mtime,
39
+ )
40
+
41
+
42
+ def records_to_ctx_models(
43
+ *,
44
+ binding_id: str,
45
+ snapshots: Iterable[ParsedSession],
46
+ ) -> Iterator[RecordRow]:
47
+ """Yield one RecordRow per valid JSONL line across all sessions."""
48
+ for snapshot in snapshots:
49
+ for line in snapshot.lines:
50
+ if line.record_type is None:
51
+ continue
52
+
53
+ ts = (
54
+ parse_utc_datetime_from_str(line.timestamp_raw)
55
+ if line.timestamp_raw
56
+ else None
57
+ )
58
+
59
+ payload = _strip_null_bytes(line.payload)
60
+
61
+ yield RecordRow(
62
+ ctx_binding_id=binding_id,
63
+ ctx_source_updated_at=ts,
64
+ file_path=snapshot.file_path,
65
+ line_index=line.line_index,
66
+ record_type=line.record_type,
67
+ payload_type=line.payload_type,
68
+ timestamp=ts,
69
+ payload=payload,
70
+ )
@@ -0,0 +1,7 @@
1
+ {
2
+ "auth": {
3
+ "type": "none"
4
+ },
5
+ "mode": "dagster",
6
+ "plugin_id": "codex_local"
7
+ }
File without changes
@@ -0,0 +1,72 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator
4
+ from typing import Any
5
+
6
+ import dlt
7
+ from shared_plugins.naming import (
8
+ dlt_resource_name,
9
+ dlt_source_name,
10
+ plugin_id_from_module,
11
+ )
12
+ from shared_plugins.resources import ctx_dlt_resource
13
+
14
+ from ..binding_config import CodexLocalBindingConfig
15
+ from ..models.ctx import RecordRow, SessionRow
16
+ from ..models.translators import (
17
+ records_to_ctx_models,
18
+ sessions_to_ctx_models,
19
+ )
20
+ from ..utils.parse import parse_session_snapshots
21
+
22
+ PLUGIN_ID = plugin_id_from_module(__file__)
23
+ JOB = "snapshot"
24
+ MERGE_WRITE_DISPOSITION = {"disposition": "merge", "strategy": "delete-insert"}
25
+ MERGE_KEY = ("_ctx_binding_id",)
26
+
27
+
28
+ @dlt.source(name=dlt_source_name(PLUGIN_ID, JOB))
29
+ def codex_local_snapshot_source(
30
+ binding_id: str,
31
+ cfg: CodexLocalBindingConfig,
32
+ ) -> tuple[Any, ...]:
33
+ sessions_dir = cfg.codex_dir / "sessions"
34
+ archived_dir = cfg.codex_dir / "archived_sessions"
35
+
36
+ @ctx_dlt_resource(
37
+ name=dlt_resource_name("session"),
38
+ write_disposition=MERGE_WRITE_DISPOSITION,
39
+ merge_key=MERGE_KEY,
40
+ primary_key=("_ctx_binding_id", "file_path"),
41
+ )
42
+ def session_resource() -> Iterator[SessionRow]:
43
+ snapshots = parse_session_snapshots(
44
+ sessions_dir=sessions_dir,
45
+ archived_dir=archived_dir,
46
+ )
47
+ yield from sessions_to_ctx_models(
48
+ binding_id=binding_id,
49
+ snapshots=snapshots,
50
+ )
51
+
52
+ @ctx_dlt_resource(
53
+ name=dlt_resource_name("record"),
54
+ write_disposition=MERGE_WRITE_DISPOSITION,
55
+ merge_key=MERGE_KEY,
56
+ primary_key=("_ctx_binding_id", "file_path", "line_index"),
57
+ columns={"payload": {"data_type": "json"}},
58
+ )
59
+ def record_resource() -> Iterator[RecordRow]:
60
+ snapshots = parse_session_snapshots(
61
+ sessions_dir=sessions_dir,
62
+ archived_dir=archived_dir,
63
+ )
64
+ yield from records_to_ctx_models(
65
+ binding_id=binding_id,
66
+ snapshots=snapshots,
67
+ )
68
+
69
+ return (
70
+ session_resource,
71
+ record_resource,
72
+ )
File without changes
@@ -0,0 +1,177 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from dataclasses import dataclass
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from shared_plugins.values import as_mapping, as_string
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class ParsedLine:
15
+ """A single parsed JSONL line."""
16
+
17
+ line_index: int
18
+ record_type: str | None
19
+ payload_type: str | None
20
+ timestamp_raw: str | None
21
+ payload: Any
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class SessionFileRef:
26
+ """Filesystem reference to a JSONL file before parsing."""
27
+
28
+ file_path: str
29
+ is_archived: bool
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class ParsedSession:
34
+ """A fully parsed session file."""
35
+
36
+ file_path: str
37
+ is_archived: bool
38
+ file_mtime: datetime | None
39
+ lines: list[ParsedLine]
40
+
41
+
42
+ def _parse_json_line(
43
+ line: str,
44
+ ) -> tuple[str | None, str | None, str | None, Any]:
45
+ """Parse a single JSON line into (record_type, payload_type, timestamp_raw, payload).
46
+
47
+ Returns (None, None, None, None) if the line cannot be parsed or lacks a type field.
48
+ """
49
+ try:
50
+ parsed = json.loads(line)
51
+ except (json.JSONDecodeError, ValueError):
52
+ return None, None, None, None
53
+
54
+ obj = as_mapping(parsed)
55
+ if obj is None:
56
+ return None, None, None, None
57
+
58
+ record_type = as_string(obj.get("type"))
59
+ if record_type is None:
60
+ return None, None, None, None
61
+
62
+ payload = obj.get("payload")
63
+
64
+ payload_type: str | None = None
65
+ payload_mapping = as_mapping(payload)
66
+ if payload_mapping is not None:
67
+ payload_type = as_string(payload_mapping.get("type"))
68
+
69
+ timestamp_raw = as_string(obj.get("timestamp"))
70
+
71
+ return record_type, payload_type, timestamp_raw, payload
72
+
73
+
74
+ def _scan_jsonl_files_recursive(root_dir: Path) -> list[str]:
75
+ """Recursively list all .jsonl files under a directory."""
76
+ if not root_dir.exists():
77
+ return []
78
+ if not root_dir.is_dir():
79
+ raise RuntimeError(f"Codex session path is not a directory: '{root_dir}'.")
80
+
81
+ files: list[str] = []
82
+ stack: list[Path] = [root_dir]
83
+ while stack:
84
+ current = stack.pop()
85
+ try:
86
+ entries = list(os.scandir(current))
87
+ except OSError as exc:
88
+ raise RuntimeError(
89
+ f"Failed to scan Codex session directory '{current}'."
90
+ ) from exc
91
+ for entry in entries:
92
+ try:
93
+ is_dir = entry.is_dir(follow_symlinks=False)
94
+ is_file = entry.is_file(follow_symlinks=False)
95
+ except OSError as exc:
96
+ raise RuntimeError(
97
+ f"Failed to inspect Codex session path '{entry.path}'."
98
+ ) from exc
99
+
100
+ if is_dir:
101
+ stack.append(Path(entry.path))
102
+ elif is_file and entry.name.lower().endswith(".jsonl"):
103
+ files.append(entry.path)
104
+ return files
105
+
106
+
107
+ def _list_jsonl_files(
108
+ sessions_dir: Path,
109
+ archived_dir: Path,
110
+ ) -> list[SessionFileRef]:
111
+ """List all JSONL files from sessions and archived directories, deduped by path."""
112
+ unique: dict[str, SessionFileRef] = {}
113
+ for fp in _scan_jsonl_files_recursive(sessions_dir):
114
+ unique.setdefault(fp, SessionFileRef(file_path=fp, is_archived=False))
115
+ for fp in _scan_jsonl_files_recursive(archived_dir):
116
+ unique.setdefault(fp, SessionFileRef(file_path=fp, is_archived=True))
117
+ return [unique[k] for k in sorted(unique)]
118
+
119
+
120
+ def _get_file_mtime(file_path: str) -> datetime | None:
121
+ """Return the file's modification time as UTC datetime."""
122
+ try:
123
+ stat_result = Path(file_path).stat()
124
+ except OSError as exc:
125
+ raise RuntimeError(f"Failed to stat Codex session file '{file_path}'.") from exc
126
+ return datetime.fromtimestamp(stat_result.st_mtime, tz=timezone.utc)
127
+
128
+
129
+ def _parse_session_file(file_ref: SessionFileRef) -> ParsedSession:
130
+ """Parse a single JSONL session file.
131
+
132
+ Every discovered file is a session snapshot; unreadable files fail loudly.
133
+ """
134
+ try:
135
+ with open(file_ref.file_path, "r", encoding="utf-8") as handle:
136
+ raw_lines = handle.readlines()
137
+ except (OSError, UnicodeError) as exc:
138
+ raise RuntimeError(
139
+ f"Failed to read Codex session file '{file_ref.file_path}'."
140
+ ) from exc
141
+
142
+ parsed_lines: list[ParsedLine] = []
143
+
144
+ for idx, raw_line in enumerate(raw_lines):
145
+ stripped = raw_line.strip()
146
+ if not stripped:
147
+ continue
148
+
149
+ record_type, payload_type, timestamp_raw, payload = _parse_json_line(stripped)
150
+
151
+ parsed_lines.append(
152
+ ParsedLine(
153
+ line_index=idx,
154
+ record_type=record_type,
155
+ payload_type=payload_type,
156
+ timestamp_raw=timestamp_raw,
157
+ payload=payload,
158
+ )
159
+ )
160
+
161
+ return ParsedSession(
162
+ file_path=file_ref.file_path,
163
+ is_archived=file_ref.is_archived,
164
+ file_mtime=_get_file_mtime(file_ref.file_path),
165
+ lines=parsed_lines,
166
+ )
167
+
168
+
169
+ def parse_session_snapshots(
170
+ sessions_dir: Path,
171
+ archived_dir: Path,
172
+ ) -> list[ParsedSession]:
173
+ """Scan and parse all Codex session files from both directories."""
174
+ return [
175
+ _parse_session_file(file_ref)
176
+ for file_ref in _list_jsonl_files(sessions_dir, archived_dir)
177
+ ]