cjm-context-graph-layer 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __version__ = "0.0.2"
@@ -0,0 +1,63 @@
1
+ # Autogenerated by nbdev
2
+
3
+ d = { 'settings': { 'branch': 'main',
4
+ 'doc_baseurl': '/cjm-context-graph-layer',
5
+ 'doc_host': 'https://cj-mills.github.io',
6
+ 'git_url': 'https://github.com/cj-mills/cjm-context-graph-layer',
7
+ 'lib_path': 'cjm_context_graph_layer'},
8
+ 'syms': { 'cjm_context_graph_layer.declare': { 'cjm_context_graph_layer.declare.Derivation': ( 'declare.html#derivation',
9
+ 'cjm_context_graph_layer/declare.py'),
10
+ 'cjm_context_graph_layer.declare.derivation_to_graph': ( 'declare.html#derivation_to_graph',
11
+ 'cjm_context_graph_layer/declare.py')},
12
+ 'cjm_context_graph_layer.edits': { 'cjm_context_graph_layer.edits.SpineEdit': ( 'edits.html#spineedit',
13
+ 'cjm_context_graph_layer/edits.py'),
14
+ 'cjm_context_graph_layer.edits.SpineEdit.__post_init__': ( 'edits.html#spineedit.__post_init__',
15
+ 'cjm_context_graph_layer/edits.py'),
16
+ 'cjm_context_graph_layer.edits.SpineEdit.from_dict': ( 'edits.html#spineedit.from_dict',
17
+ 'cjm_context_graph_layer/edits.py'),
18
+ 'cjm_context_graph_layer.edits.SpineEdit.to_dict': ( 'edits.html#spineedit.to_dict',
19
+ 'cjm_context_graph_layer/edits.py'),
20
+ 'cjm_context_graph_layer.edits.SpineEditError': ( 'edits.html#spineediterror',
21
+ 'cjm_context_graph_layer/edits.py'),
22
+ 'cjm_context_graph_layer.edits.SpineUnit': ( 'edits.html#spineunit',
23
+ 'cjm_context_graph_layer/edits.py'),
24
+ 'cjm_context_graph_layer.edits.project_effective_spine': ( 'edits.html#project_effective_spine',
25
+ 'cjm_context_graph_layer/edits.py'),
26
+ 'cjm_context_graph_layer.edits.resolve_active': ( 'edits.html#resolve_active',
27
+ 'cjm_context_graph_layer/edits.py')},
28
+ 'cjm_context_graph_layer.grammar': { 'cjm_context_graph_layer.grammar.OverlayRelations': ( 'grammar.html#overlayrelations',
29
+ 'cjm_context_graph_layer/grammar.py'),
30
+ 'cjm_context_graph_layer.grammar.OverlayRelations.all': ( 'grammar.html#overlayrelations.all',
31
+ 'cjm_context_graph_layer/grammar.py'),
32
+ 'cjm_context_graph_layer.grammar.SpineRelations': ( 'grammar.html#spinerelations',
33
+ 'cjm_context_graph_layer/grammar.py'),
34
+ 'cjm_context_graph_layer.grammar.SpineRelations.all': ( 'grammar.html#spinerelations.all',
35
+ 'cjm_context_graph_layer/grammar.py'),
36
+ 'cjm_context_graph_layer.grammar.attribution': ( 'grammar.html#attribution',
37
+ 'cjm_context_graph_layer/grammar.py'),
38
+ 'cjm_context_graph_layer.grammar.grouped_spine_edges': ( 'grammar.html#grouped_spine_edges',
39
+ 'cjm_context_graph_layer/grammar.py'),
40
+ 'cjm_context_graph_layer.grammar.make_edge': ( 'grammar.html#make_edge',
41
+ 'cjm_context_graph_layer/grammar.py'),
42
+ 'cjm_context_graph_layer.grammar.spine_edges': ( 'grammar.html#spine_edges',
43
+ 'cjm_context_graph_layer/grammar.py')},
44
+ 'cjm_context_graph_layer.identity': { 'cjm_context_graph_layer.identity.canonical_part': ( 'identity.html#canonical_part',
45
+ 'cjm_context_graph_layer/identity.py'),
46
+ 'cjm_context_graph_layer.identity.derive_edge_id': ( 'identity.html#derive_edge_id',
47
+ 'cjm_context_graph_layer/identity.py'),
48
+ 'cjm_context_graph_layer.identity.derive_node_id': ( 'identity.html#derive_node_id',
49
+ 'cjm_context_graph_layer/identity.py')},
50
+ 'cjm_context_graph_layer.ops': { 'cjm_context_graph_layer.ops.ExtendResult': ( 'ops.html#extendresult',
51
+ 'cjm_context_graph_layer/ops.py'),
52
+ 'cjm_context_graph_layer.ops.GraphIntegrityError': ( 'ops.html#graphintegrityerror',
53
+ 'cjm_context_graph_layer/ops.py'),
54
+ 'cjm_context_graph_layer.ops._source_hashes': ( 'ops.html#_source_hashes',
55
+ 'cjm_context_graph_layer/ops.py'),
56
+ 'cjm_context_graph_layer.ops.extend_graph': ( 'ops.html#extend_graph',
57
+ 'cjm_context_graph_layer/ops.py'),
58
+ 'cjm_context_graph_layer.ops.graph_task': ( 'ops.html#graph_task',
59
+ 'cjm_context_graph_layer/ops.py'),
60
+ 'cjm_context_graph_layer.ops.node_identity_mismatch': ( 'ops.html#node_identity_mismatch',
61
+ 'cjm_context_graph_layer/ops.py'),
62
+ 'cjm_context_graph_layer.ops.partition_by_presence': ( 'ops.html#partition_by_presence',
63
+ 'cjm_context_graph_layer/ops.py')}}}
@@ -0,0 +1,60 @@
1
+ """Provenance-by-declaration: host logic stays readable Python in the workflow core and DECLARES its provenance contributions as a `Derivation` event node (+ DERIVED_FROM input edges, PRODUCED output edges). This recovers audit completeness without the substrate executing host logic (pass-2 Thread 4's false-dichotomy resolution). The substrate stays untouched: declarations read composition/job ids from the outside.
2
+
3
+ Docs: https://cj-mills.github.io/cjm-context-graph-layerdeclare.html.md"""
4
+
5
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/declare.ipynb.
6
+
7
+ # %% auto #0
8
+ __all__ = ['Derivation', 'derivation_to_graph']
9
+
10
+ # %% ../nbs/declare.ipynb #97664d2b
11
+ import time
12
+ from uuid import uuid4
13
+ from dataclasses import dataclass, field
14
+ from typing import Any, Dict, List, Optional, Tuple
15
+
16
+ from .grammar import OverlayRelations, attribution, make_edge
17
+
18
+ # %% ../nbs/declare.ipynb #c11abbce
19
+ @dataclass
20
+ class Derivation:
21
+ """One host-logic transformation event, declared for the audit trail.
22
+
23
+ Coarse-grained by design: the adopter passes the ids that anchor the event
24
+ (e.g. the Transcript nodes consumed + the Source whose spine was produced),
25
+ not every fine-grained output (per-node provenance already rides each
26
+ node's SourceRefs — duplicating it here would re-create topology, the
27
+ Thread-2 no-derived_from rule).
28
+ """
29
+ actor: str # Who ran it (e.g. "host:cjm-transcript-decomp-core")
30
+ method: str # The transformation (e.g. "alignment-fold/v1")
31
+ input_ids: List[str] = field(default_factory=list) # Graph node ids consumed
32
+ output_ids: List[str] = field(default_factory=list) # Graph node ids produced (coarse anchors)
33
+ asserted_at: Optional[float] = None # Unix timestamp; None = now at to_graph time
34
+ composition_id: Optional[str] = None # Substrate composition run id, if any
35
+ job_ids: List[str] = field(default_factory=list) # Member job ids, if any
36
+ properties: Dict[str, Any] = field(default_factory=dict) # Extra event properties
37
+
38
+ # %% ../nbs/declare.ipynb #7c77965e
39
+ def derivation_to_graph(
40
+ d: Derivation, # The declared event
41
+ derivation_id: Optional[str] = None, # Explicit node id; None = generated (events are asserted, not re-derivable)
42
+ ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: # (event node wire dict, edges)
43
+ """Materialize a declaration as one event node + DERIVED_FROM / PRODUCED edges.
44
+
45
+ The event node gets a GENERATED id (asserted/decision class — the
46
+ FLIP-TRIGGER-protected kind); its edges are deterministic per
47
+ (event, anchor, relation)."""
48
+ node_id = derivation_id or str(uuid4())
49
+ props: Dict[str, Any] = attribution(d.actor, method=d.method, asserted_at=d.asserted_at)
50
+ if d.composition_id:
51
+ props["composition_id"] = d.composition_id
52
+ if d.job_ids:
53
+ props["job_ids"] = list(d.job_ids)
54
+ props.update(d.properties)
55
+ node = {"id": node_id, "label": "Derivation", "properties": props, "sources": []}
56
+ edges = (
57
+ [make_edge(node_id, i, OverlayRelations.DERIVED_FROM, {"role": "input"}) for i in d.input_ids]
58
+ + [make_edge(node_id, o, OverlayRelations.PRODUCED, {"role": "output"}) for o in d.output_ids]
59
+ )
60
+ return node, edges
@@ -0,0 +1,132 @@
1
+ """The spine-edit operation vocabulary (`prune` / `replace_text` / `boundary_shift`) + supersession resolution + the effective-view projection. These are generic operations on any NEXT-chained text spine; correction workflows carry them in overlay-node payloads, and the projection interprets them at read time (migrates correction-core C11/C16 onto the layer).
2
+
3
+ Docs: https://cj-mills.github.io/cjm-context-graph-layeredits.html.md"""
4
+
5
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/edits.ipynb.
6
+
7
+ # %% auto #0
8
+ __all__ = ['EDIT_OPS', 'SpineEditError', 'SpineUnit', 'SpineEdit', 'resolve_active', 'project_effective_spine']
9
+
10
+ # %% ../nbs/edits.ipynb #68a6f6d3
11
+ from dataclasses import dataclass, field
12
+ from typing import Any, Dict, Iterable, List, Set, Tuple
13
+
14
+ # %% ../nbs/edits.ipynb #376d25b0
15
+ # Reserved spine-edit operation vocabulary (reserve-enum-values-up-front):
16
+ # boundary_shift is locked in NOW per the where-graph-begins resolution even
17
+ # though no driver produces it yet — the persisted decision preserves the
18
+ # alignment-error-vs-transcription-error distinction.
19
+ EDIT_OPS = ("prune", "replace_text", "boundary_shift")
20
+
21
+ # %% ../nbs/edits.ipynb #d771e0ec
22
+ class SpineEditError(ValueError):
23
+ """A spine edit could not be validated or applied (loud, never silent)."""
24
+ pass
25
+
26
+ # %% ../nbs/edits.ipynb #95ad3798
27
+ @dataclass
28
+ class SpineUnit:
29
+ """Minimal projection unit: one spine position with its effective text."""
30
+ id: str # Layer-0 segment node id
31
+ text: str # Effective text at this position
32
+
33
+ # %% ../nbs/edits.ipynb #e6875de1
34
+ @dataclass
35
+ class SpineEdit:
36
+ """One spine-edit decision, as carried in an overlay node's payload.
37
+
38
+ `op` semantics:
39
+ - `prune`: drop `targets` from the effective view (payload unused).
40
+ - `replace_text`: payload `{"text": ...}` replaces each target's text.
41
+ - `boundary_shift`: payload `{"boundary_after": <left segment id>,
42
+ "text": <moved text>, "direction": "push"|"pull"}` moves text across the
43
+ boundary between two adjacent FIXED positions (push = from the end of the
44
+ left unit to the start of the right; pull = the mirror). 1:1 alignment is
45
+ maintained continuously — count and positions never change.
46
+ """
47
+ edit_id: str # Carrying overlay node id (supersession anchor)
48
+ op: str # One of EDIT_OPS
49
+ targets: List[str] = field(default_factory=list) # Layer-0 segment node ids the edit applies to
50
+ payload: Dict[str, Any] = field(default_factory=dict) # Op-specific payload (see above)
51
+ created_at: float = 0.0 # Decision timestamp (application order + latest-wins tiebreak)
52
+
53
+ def __post_init__(self):
54
+ if self.op not in EDIT_OPS:
55
+ raise SpineEditError(f"unknown spine-edit op: {self.op!r} (known: {EDIT_OPS})")
56
+
57
+ def to_dict(self) -> Dict[str, Any]: # Payload-ready dict
58
+ """Serialize for carriage in an overlay node payload."""
59
+ return {"edit_id": self.edit_id, "op": self.op, "targets": list(self.targets),
60
+ "payload": dict(self.payload), "created_at": self.created_at}
61
+
62
+ @classmethod
63
+ def from_dict(cls, d: Dict[str, Any]) -> "SpineEdit": # Reconstructed edit
64
+ """Reconstruct from a payload dict."""
65
+ return cls(edit_id=d["edit_id"], op=d["op"], targets=list(d.get("targets") or []),
66
+ payload=dict(d.get("payload") or {}), created_at=float(d.get("created_at") or 0.0))
67
+
68
+ # %% ../nbs/edits.ipynb #d0b1d211
69
+ def resolve_active(
70
+ edit_ids: Iterable[str], # Candidate overlay node ids
71
+ supersedes_pairs: Iterable[Tuple[str, str]], # (superseder_id, superseded_id) SUPERSEDES edges
72
+ ) -> Set[str]: # Active (non-superseded) ids
73
+ """Resolve the active set under append-only supersession.
74
+
75
+ An id is superseded iff it is the TARGET of any SUPERSEDES edge — chains
76
+ resolve naturally (C supersedes B supersedes A leaves only C active), and
77
+ nothing is ever mutated (the C16 semantics, now layer-owned).
78
+ """
79
+ superseded = {target for _, target in supersedes_pairs}
80
+ return {eid for eid in edit_ids if eid not in superseded}
81
+
82
+ # %% ../nbs/edits.ipynb #0ced23b9
83
+ def project_effective_spine(
84
+ units: List[SpineUnit], # Ordered layer-0 spine (immutable input)
85
+ edits: List[SpineEdit], # ACTIVE edits to apply (resolve supersession first)
86
+ ) -> List[SpineUnit]: # New effective spine (input never mutated)
87
+ """Project the effective view: layer-0 + active edits, resolved at read time.
88
+
89
+ Edits apply in (created_at, edit_id) order over the evolving text state, so
90
+ later decisions see earlier ones' effects and replace_text latest-wins
91
+ emerges from ordering. Prunes drop positions at the end (a boundary_shift
92
+ or replace recorded before a later prune still applies cleanly).
93
+ boundary_shift is STRICT: if the current text no longer carries the moved
94
+ text verbatim at the boundary, the projection fails loudly rather than
95
+ guessing (SpineEditError).
96
+ """
97
+ order = {u.id: i for i, u in enumerate(units)}
98
+ texts = {u.id: u.text for u in units}
99
+ pruned: Set[str] = set()
100
+
101
+ for e in sorted(edits, key=lambda e: (e.created_at, e.edit_id)):
102
+ if e.op == "prune":
103
+ pruned.update(e.targets)
104
+ elif e.op == "replace_text":
105
+ new_text = e.payload.get("text", "")
106
+ for t in e.targets:
107
+ if t in texts:
108
+ texts[t] = new_text
109
+ elif e.op == "boundary_shift":
110
+ left = e.payload.get("boundary_after")
111
+ moved = e.payload.get("text", "")
112
+ direction = e.payload.get("direction", "push")
113
+ if left not in order:
114
+ raise SpineEditError(f"boundary_shift: unknown boundary_after {left!r}")
115
+ idx = order[left]
116
+ if idx + 1 >= len(units):
117
+ raise SpineEditError("boundary_shift: no unit after the boundary")
118
+ right = units[idx + 1].id
119
+ if direction == "push":
120
+ if not texts[left].endswith(moved):
121
+ raise SpineEditError(f"boundary_shift push: left text does not end with the moved text ({e.edit_id})")
122
+ texts[left] = texts[left][: len(texts[left]) - len(moved)]
123
+ texts[right] = moved + texts[right]
124
+ elif direction == "pull":
125
+ if not texts[right].startswith(moved):
126
+ raise SpineEditError(f"boundary_shift pull: right text does not start with the moved text ({e.edit_id})")
127
+ texts[right] = texts[right][len(moved):]
128
+ texts[left] = texts[left] + moved
129
+ else:
130
+ raise SpineEditError(f"boundary_shift: unknown direction {direction!r}")
131
+
132
+ return [SpineUnit(u.id, texts[u.id]) for u in units if u.id not in pruned]
@@ -0,0 +1,117 @@
1
+ """The domain-neutral context-graph grammar: spine relations (NEXT / PART_OF / STARTS_WITH, recurring fractally at every layer), overlay relations (SUPERSEDES / DERIVED_FROM / PRODUCED), root kinds, and the standardized attribution fields.
2
+
3
+ Docs: https://cj-mills.github.io/cjm-context-graph-layergrammar.html.md"""
4
+
5
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/grammar.ipynb.
6
+
7
+ # %% auto #0
8
+ __all__ = ['ROOT_KINDS', 'SpineRelations', 'OverlayRelations', 'attribution', 'make_edge', 'spine_edges', 'grouped_spine_edges']
9
+
10
+ # %% ../nbs/grammar.ipynb #f66243ba
11
+ import time
12
+ from typing import Any, Dict, List, Optional, Tuple
13
+
14
+ from .identity import derive_edge_id
15
+
16
+ # %% ../nbs/grammar.ipynb #b4d7b6c7
17
+ class SpineRelations:
18
+ """Structural spine relations, reused fractally at every layer
19
+ (Source -> AudioSegment -> Segment today; series -> episode tomorrow)."""
20
+ NEXT = "NEXT" # Sequential order among siblings
21
+ PART_OF = "PART_OF" # Containment (child -> parent)
22
+ STARTS_WITH = "STARTS_WITH" # Entry point (parent -> first child)
23
+
24
+ @classmethod
25
+ def all(cls) -> list: # All spine relation types
26
+ """All spine relation types."""
27
+ return [cls.NEXT, cls.PART_OF, cls.STARTS_WITH]
28
+
29
+ # %% ../nbs/grammar.ipynb #8945de1c
30
+ class OverlayRelations:
31
+ """Overlay/derivation relations — the trust grammar shared by every
32
+ workflow's graph extensions."""
33
+ SUPERSEDES = "SUPERSEDES" # Newer overlay node -> the prior one it replaces (append-only undo)
34
+ DERIVED_FROM = "DERIVED_FROM" # Derived/overlay node -> the node(s) it derives from / consumed
35
+ PRODUCED = "PRODUCED" # Derivation event -> the node(s) it produced
36
+
37
+ @classmethod
38
+ def all(cls) -> list: # All overlay relation types
39
+ """All overlay relation types."""
40
+ return [cls.SUPERSEDES, cls.DERIVED_FROM, cls.PRODUCED]
41
+
42
+ # %% ../nbs/grammar.ipynb #8eab5d13
43
+ # The three provenance-root kinds (where-graph-begins resolution): knowledge
44
+ # enters the graph anchored one of these ways.
45
+ ROOT_KINDS = ("ingested", "asserted", "derived")
46
+
47
+ # %% ../nbs/grammar.ipynb #87da6c7e
48
+ def attribution(
49
+ actor: str, # Who asserted/produced this (e.g. "human", "agent:claude", "capability:whisper")
50
+ method: Optional[str] = None, # How (e.g. "transcribe", "alignment-fold/v1")
51
+ asserted_at: Optional[float] = None, # Unix timestamp; None = now
52
+ ) -> Dict[str, Any]: # Standardized attribution property dict
53
+ """Standardized attribution fields for derived/asserted nodes.
54
+
55
+ Every derivation/assertion carries the same three fields, so audit reads
56
+ are uniform across workflows (P13's hand-rolled Connection attribution
57
+ graduated into the grammar).
58
+ """
59
+ out: Dict[str, Any] = {"actor": actor, "asserted_at": asserted_at if asserted_at is not None else time.time()}
60
+ if method is not None:
61
+ out["method"] = method
62
+ return out
63
+
64
+ # %% ../nbs/grammar.ipynb #43ed17f3
65
+ def make_edge(
66
+ source_id: str, # Edge source node id
67
+ target_id: str, # Edge target node id
68
+ relation_type: str, # Relation type (SpineRelations / OverlayRelations / domain)
69
+ properties: Optional[Dict[str, Any]] = None, # Optional edge properties (e.g. {"role": "foreshadow"})
70
+ edge_id: Optional[str] = None, # Explicit id; None = deterministic from the triple
71
+ ) -> Dict[str, Any]: # Edge wire dict
72
+ """Build an edge wire dict with a deterministic id by default."""
73
+ return {
74
+ "id": edge_id or derive_edge_id(source_id, target_id, relation_type),
75
+ "source_id": source_id,
76
+ "target_id": target_id,
77
+ "relation_type": relation_type,
78
+ "properties": properties or {},
79
+ }
80
+
81
+ # %% ../nbs/grammar.ipynb #047d3e7e
82
+ def spine_edges(
83
+ parent_id: str, # Parent node id
84
+ child_ids: List[str], # Ordered child node ids
85
+ ) -> List[Dict[str, Any]]: # Edge wire dicts
86
+ """The uniform spine pattern at any layer: PART_OF child->parent for each
87
+ child + NEXT chain among children + STARTS_WITH parent->first child."""
88
+ edges: List[Dict[str, Any]] = []
89
+ if child_ids:
90
+ edges.append(make_edge(parent_id, child_ids[0], SpineRelations.STARTS_WITH))
91
+ for i, cid in enumerate(child_ids):
92
+ edges.append(make_edge(cid, parent_id, SpineRelations.PART_OF))
93
+ if i < len(child_ids) - 1:
94
+ edges.append(make_edge(cid, child_ids[i + 1], SpineRelations.NEXT))
95
+ return edges
96
+
97
+ # %% ../nbs/grammar.ipynb #71f01b78
98
+ def grouped_spine_edges(
99
+ groups: List[Tuple[str, List[str]]], # (parent id, ordered child ids) per group, groups in spine order
100
+ ) -> List[Dict[str, Any]]: # Edge wire dicts
101
+ """Spine edges for a fine layer grouped under coarse parents.
102
+
103
+ PART_OF goes to the OWNING parent; STARTS_WITH per parent -> its first
104
+ child (the coarse-seam jump anchor); the NEXT chain is GLOBAL across group
105
+ boundaries — fine continuity crosses coarse boundaries (agent span reads).
106
+ """
107
+ edges: List[Dict[str, Any]] = []
108
+ flat: List[str] = []
109
+ for parent_id, child_ids in groups:
110
+ if child_ids:
111
+ edges.append(make_edge(parent_id, child_ids[0], SpineRelations.STARTS_WITH))
112
+ for cid in child_ids:
113
+ edges.append(make_edge(cid, parent_id, SpineRelations.PART_OF))
114
+ flat.append(cid)
115
+ for i in range(len(flat) - 1):
116
+ edges.append(make_edge(flat[i], flat[i + 1], SpineRelations.NEXT))
117
+ return edges
@@ -0,0 +1,73 @@
1
+ """Deterministic node/edge identity: UUIDv5 over canonical identity tuples (stage-5 ratified rule: a node's id derives from what makes it THE same node across re-derivation, never from its correctable content).
2
+
3
+ Docs: https://cj-mills.github.io/cjm-context-graph-layeridentity.html.md"""
4
+
5
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/identity.ipynb.
6
+
7
+ # %% auto #0
8
+ __all__ = ['LAYER_ID_NAMESPACE', 'IDENTITY_SEPARATOR', 'canonical_part', 'derive_node_id', 'derive_edge_id']
9
+
10
+ # %% ../nbs/identity.ipynb #48c3ba87
11
+ import uuid
12
+ from typing import Union
13
+
14
+ # %% ../nbs/identity.ipynb #8ae437f7
15
+ # Fixed namespace for all context-graph layer ids. Derived once from the DNS
16
+ # namespace so the value is stable and reproducible; every deterministic id in
17
+ # the ecosystem hangs off this constant.
18
+ LAYER_ID_NAMESPACE = uuid.uuid5(uuid.NAMESPACE_DNS, "context-graph.cj-mills.com")
19
+
20
+ # Unit separator: cannot appear in paths/hashes/relation names, so joined
21
+ # identity parts can never collide across part boundaries.
22
+ IDENTITY_SEPARATOR = "\x1f"
23
+
24
+ # %% ../nbs/identity.ipynb #2f674553
25
+ def canonical_part(
26
+ value: Union[str, int, float], # One identity-tuple part
27
+ ) -> str: # Canonical string form used inside the UUIDv5 name
28
+ """Render one identity-tuple part canonically.
29
+
30
+ Floats use `repr` (shortest round-trip — identical floats from the same
31
+ deterministic computation render identically); ints use `str`; strings pass
32
+ through. Anything else (including bool, whose int-ness is ambiguous) is
33
+ rejected loudly: identity inputs must be deliberate.
34
+ """
35
+ if isinstance(value, bool):
36
+ raise TypeError("bool is not a valid identity part (ambiguous int)")
37
+ if isinstance(value, float):
38
+ return repr(value)
39
+ if isinstance(value, int):
40
+ return str(value)
41
+ if isinstance(value, str):
42
+ return value
43
+ raise TypeError(f"unsupported identity part type: {type(value).__name__}")
44
+
45
+ # %% ../nbs/identity.ipynb #cc1d398c
46
+ def derive_node_id(
47
+ kind: str, # Node kind discriminator (e.g. "source", "audio-segment")
48
+ *parts: Union[str, int, float], # The identity tuple (positional, order-significant)
49
+ ) -> str: # Deterministic UUID string (UUIDv5)
50
+ """Derive a deterministic node id from a kind + identity tuple.
51
+
52
+ Same kind + same parts always yields the same id, across processes and
53
+ re-derivations — re-derived graphs reproduce their node ids, so cross-graph
54
+ references survive a rebuild (the G3a fix made structural). Content hashes
55
+ belong in SourceRefs, NOT here: identity is position/provenance, never the
56
+ correctable content.
57
+ """
58
+ name = IDENTITY_SEPARATOR.join([kind, *(canonical_part(p) for p in parts)])
59
+ return str(uuid.uuid5(LAYER_ID_NAMESPACE, name))
60
+
61
+ # %% ../nbs/identity.ipynb #b603404f
62
+ def derive_edge_id(
63
+ source_id: str, # Edge source node id
64
+ target_id: str, # Edge target node id
65
+ relation_type: str, # Relation type (e.g. "NEXT")
66
+ ) -> str: # Deterministic UUID string
67
+ """Derive a deterministic edge id from (source, target, relation).
68
+
69
+ Layer-0 structural edges are unique per (source, target, relation), so the
70
+ triple IS the identity — re-derivation reproduces edge ids the same way it
71
+ reproduces node ids.
72
+ """
73
+ return derive_node_id("edge", source_id, target_id, relation_type)
@@ -0,0 +1,141 @@
1
+ """Queue-touching layer operations: the shared `graph_task` helper (task channel), idempotent emission (emit-if-absent + verify-if-present), and `extend_graph` — the one primitive every graph-extending workflow commits through. Deterministic ids (see `identity`) make idempotency a presence check instead of a search.
2
+
3
+ Docs: https://cj-mills.github.io/cjm-context-graph-layerops.html.md"""
4
+
5
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/ops.ipynb.
6
+
7
+ # %% auto #0
8
+ __all__ = ['GRAPH_TASK', 'graph_task', 'GraphIntegrityError', 'node_identity_mismatch', 'partition_by_presence', 'ExtendResult',
9
+ 'extend_graph']
10
+
11
+ # %% ../nbs/ops.ipynb #189a5452
12
+ from dataclasses import dataclass, field
13
+ from typing import Any, Dict, List, Optional, Set, Tuple
14
+
15
+ from cjm_plugin_system.core.queue import JobQueue, JobStatus
16
+ # Importing the typed query/result classes IS the host-side wire registration (F8).
17
+ from cjm_context_graph_primitives.query import NodeQuery, EdgeQuery, NodeQueryResult, EdgeQueryResult
18
+
19
+ # %% ../nbs/ops.ipynb #5756d771
20
+ GRAPH_TASK = "graph-storage" # The graph-storage adapter task (explicit task channel, stage 4)
21
+
22
+
23
+ async def graph_task(
24
+ queue: JobQueue, # Started job queue
25
+ graph_id: str, # Graph-storage capability instance id
26
+ method: str, # Adapter method (e.g. "query_nodes", "add_nodes")
27
+ **kwargs, # Typed-method kwargs (wire dicts ok; the in-worker adapter normalizes)
28
+ ) -> Any: # Typed task result (wire-decoded host-side)
29
+ """Invoke a graph-storage adapter method through the queue's task channel.
30
+
31
+ THE shared copy: decomp-core and correction-core's per-core helpers migrate
32
+ onto this one (graph ops stay on the queue path for telemetry/cancellation
33
+ per D7/Thread-5 lock 5).
34
+ """
35
+ jid = await queue.submit(graph_id, task=GRAPH_TASK, method=method, **kwargs)
36
+ job = await queue.wait_for_job(jid)
37
+ if job.status != JobStatus.completed:
38
+ raise RuntimeError(f"{graph_id} {method} {job.status}: {job.error}")
39
+ return job.result
40
+
41
+ # %% ../nbs/ops.ipynb #77646074
42
+ class GraphIntegrityError(RuntimeError):
43
+ """An emitted node collided with an existing node of different identity content.
44
+
45
+ Raised by verify-if-present: same deterministic id but mismatched label or
46
+ provenance content hashes means the identity tuple and the content have
47
+ diverged — never overwrite silently."""
48
+ pass
49
+
50
+ # %% ../nbs/ops.ipynb #def7e7bb
51
+ def _source_hashes(sources: Optional[List[Any]]) -> Set[str]:
52
+ """Content-hash set from a node's sources (typed SourceRefs or wire dicts)."""
53
+ out: Set[str] = set()
54
+ for s in sources or []:
55
+ h = s.get("content_hash") if isinstance(s, dict) else getattr(s, "content_hash", None)
56
+ if h:
57
+ out.add(h)
58
+ return out
59
+
60
+
61
+ def node_identity_mismatch(
62
+ existing: Any, # Existing node (typed GraphNode or wire dict)
63
+ new: Dict[str, Any], # New node wire dict being emitted
64
+ ) -> Optional[str]: # Mismatch description, or None when compatible
65
+ """Verify-if-present check: label + sources content-hash set must match."""
66
+ ex_label = existing.get("label") if isinstance(existing, dict) else getattr(existing, "label", None)
67
+ if ex_label != new.get("label"):
68
+ return f"label mismatch: existing {ex_label!r} != new {new.get('label')!r}"
69
+ ex_sources = existing.get("sources") if isinstance(existing, dict) else getattr(existing, "sources", None)
70
+ ex_hashes, new_hashes = _source_hashes(ex_sources), _source_hashes(new.get("sources"))
71
+ if ex_hashes != new_hashes:
72
+ return f"sources content-hash mismatch: existing {sorted(ex_hashes)} != new {sorted(new_hashes)}"
73
+ return None
74
+
75
+ # %% ../nbs/ops.ipynb #96cab756
76
+ def partition_by_presence(
77
+ items: List[Dict[str, Any]], # Wire dicts carrying "id"
78
+ existing_ids: Set[str], # Ids already present in the graph
79
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: # (absent, present)
80
+ """Split wire dicts into absent (to add) and present (to verify)."""
81
+ absent = [it for it in items if it["id"] not in existing_ids]
82
+ present = [it for it in items if it["id"] in existing_ids]
83
+ return absent, present
84
+
85
+ # %% ../nbs/ops.ipynb #c3d8d7da
86
+ @dataclass
87
+ class ExtendResult:
88
+ """Outcome of one idempotent extend_graph commit."""
89
+ nodes_added: int = 0 # Nodes newly created
90
+ nodes_verified: int = 0 # Nodes already present, identity-verified
91
+ edges_added: int = 0 # Edges newly created
92
+ edges_existing: int = 0 # Edges already present (skipped)
93
+ added_node_ids: List[str] = field(default_factory=list) # Ids of created nodes
94
+ added_edge_ids: List[str] = field(default_factory=list) # Ids of created edges
95
+
96
+ # %% ../nbs/ops.ipynb #bcda5566
97
+ async def extend_graph(
98
+ queue: JobQueue, # Started job queue
99
+ graph_id: str, # Graph-storage capability id
100
+ nodes: List[Dict[str, Any]], # Node wire dicts (deterministic ids for layer-0; generated for decisions)
101
+ edges: List[Dict[str, Any]], # Edge wire dicts
102
+ ) -> ExtendResult: # Counts + created ids
103
+ """Idempotently extend the graph: emit-if-absent + verify-if-present.
104
+
105
+ Deterministic ids make idempotency a batched presence check (2 reads + at
106
+ most 2 writes per call — the C17 lesson applied to the write path): nodes
107
+ already present are verified against the new emission (label + provenance
108
+ content hashes) and a mismatch raises `GraphIntegrityError` LOUDLY; absent
109
+ nodes/edges are added. Cache-hit re-emission therefore collides into a
110
+ verified no-op (stress item 4), and a re-derived spine reproduces — never
111
+ duplicates — its layer-0 (stress item 1).
112
+ """
113
+ result = ExtendResult()
114
+
115
+ if nodes:
116
+ res = await graph_task(queue, graph_id, "query_nodes",
117
+ query=NodeQuery(ids=[n["id"] for n in nodes]).to_dict())
118
+ existing = {gn.id: gn for gn in (res.nodes or [])}
119
+ absent, present = partition_by_presence(nodes, set(existing))
120
+ for n in present:
121
+ msg = node_identity_mismatch(existing[n["id"]], n)
122
+ if msg:
123
+ raise GraphIntegrityError(f"node {n['id']}: {msg}")
124
+ result.nodes_verified = len(present)
125
+ if absent:
126
+ added = await graph_task(queue, graph_id, "add_nodes", nodes=absent)
127
+ result.added_node_ids = list(added or [])
128
+ result.nodes_added = len(result.added_node_ids)
129
+
130
+ if edges:
131
+ eres = await graph_task(queue, graph_id, "query_edges",
132
+ query=EdgeQuery(ids=[e["id"] for e in edges], project=["id"]).to_dict())
133
+ existing_eids = {r["id"] for r in (eres.rows or [])}
134
+ absent_edges = [e for e in edges if e["id"] not in existing_eids]
135
+ result.edges_existing = len(edges) - len(absent_edges)
136
+ if absent_edges:
137
+ added = await graph_task(queue, graph_id, "add_edges", edges=absent_edges)
138
+ result.added_edge_ids = list(added or [])
139
+ result.edges_added = len(result.added_edge_ids)
140
+
141
+ return result