cjm-context-graph-layer 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cjm_context_graph_layer/__init__.py +1 -0
- cjm_context_graph_layer/_modidx.py +63 -0
- cjm_context_graph_layer/declare.py +60 -0
- cjm_context_graph_layer/edits.py +132 -0
- cjm_context_graph_layer/grammar.py +117 -0
- cjm_context_graph_layer/identity.py +73 -0
- cjm_context_graph_layer/ops.py +141 -0
- cjm_context_graph_layer-0.0.2.dist-info/METADATA +523 -0
- cjm_context_graph_layer-0.0.2.dist-info/RECORD +13 -0
- cjm_context_graph_layer-0.0.2.dist-info/WHEEL +5 -0
- cjm_context_graph_layer-0.0.2.dist-info/entry_points.txt +2 -0
- cjm_context_graph_layer-0.0.2.dist-info/licenses/LICENSE +201 -0
- cjm_context_graph_layer-0.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.2"
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Autogenerated by nbdev
|
|
2
|
+
|
|
3
|
+
d = { 'settings': { 'branch': 'main',
|
|
4
|
+
'doc_baseurl': '/cjm-context-graph-layer',
|
|
5
|
+
'doc_host': 'https://cj-mills.github.io',
|
|
6
|
+
'git_url': 'https://github.com/cj-mills/cjm-context-graph-layer',
|
|
7
|
+
'lib_path': 'cjm_context_graph_layer'},
|
|
8
|
+
'syms': { 'cjm_context_graph_layer.declare': { 'cjm_context_graph_layer.declare.Derivation': ( 'declare.html#derivation',
|
|
9
|
+
'cjm_context_graph_layer/declare.py'),
|
|
10
|
+
'cjm_context_graph_layer.declare.derivation_to_graph': ( 'declare.html#derivation_to_graph',
|
|
11
|
+
'cjm_context_graph_layer/declare.py')},
|
|
12
|
+
'cjm_context_graph_layer.edits': { 'cjm_context_graph_layer.edits.SpineEdit': ( 'edits.html#spineedit',
|
|
13
|
+
'cjm_context_graph_layer/edits.py'),
|
|
14
|
+
'cjm_context_graph_layer.edits.SpineEdit.__post_init__': ( 'edits.html#spineedit.__post_init__',
|
|
15
|
+
'cjm_context_graph_layer/edits.py'),
|
|
16
|
+
'cjm_context_graph_layer.edits.SpineEdit.from_dict': ( 'edits.html#spineedit.from_dict',
|
|
17
|
+
'cjm_context_graph_layer/edits.py'),
|
|
18
|
+
'cjm_context_graph_layer.edits.SpineEdit.to_dict': ( 'edits.html#spineedit.to_dict',
|
|
19
|
+
'cjm_context_graph_layer/edits.py'),
|
|
20
|
+
'cjm_context_graph_layer.edits.SpineEditError': ( 'edits.html#spineediterror',
|
|
21
|
+
'cjm_context_graph_layer/edits.py'),
|
|
22
|
+
'cjm_context_graph_layer.edits.SpineUnit': ( 'edits.html#spineunit',
|
|
23
|
+
'cjm_context_graph_layer/edits.py'),
|
|
24
|
+
'cjm_context_graph_layer.edits.project_effective_spine': ( 'edits.html#project_effective_spine',
|
|
25
|
+
'cjm_context_graph_layer/edits.py'),
|
|
26
|
+
'cjm_context_graph_layer.edits.resolve_active': ( 'edits.html#resolve_active',
|
|
27
|
+
'cjm_context_graph_layer/edits.py')},
|
|
28
|
+
'cjm_context_graph_layer.grammar': { 'cjm_context_graph_layer.grammar.OverlayRelations': ( 'grammar.html#overlayrelations',
|
|
29
|
+
'cjm_context_graph_layer/grammar.py'),
|
|
30
|
+
'cjm_context_graph_layer.grammar.OverlayRelations.all': ( 'grammar.html#overlayrelations.all',
|
|
31
|
+
'cjm_context_graph_layer/grammar.py'),
|
|
32
|
+
'cjm_context_graph_layer.grammar.SpineRelations': ( 'grammar.html#spinerelations',
|
|
33
|
+
'cjm_context_graph_layer/grammar.py'),
|
|
34
|
+
'cjm_context_graph_layer.grammar.SpineRelations.all': ( 'grammar.html#spinerelations.all',
|
|
35
|
+
'cjm_context_graph_layer/grammar.py'),
|
|
36
|
+
'cjm_context_graph_layer.grammar.attribution': ( 'grammar.html#attribution',
|
|
37
|
+
'cjm_context_graph_layer/grammar.py'),
|
|
38
|
+
'cjm_context_graph_layer.grammar.grouped_spine_edges': ( 'grammar.html#grouped_spine_edges',
|
|
39
|
+
'cjm_context_graph_layer/grammar.py'),
|
|
40
|
+
'cjm_context_graph_layer.grammar.make_edge': ( 'grammar.html#make_edge',
|
|
41
|
+
'cjm_context_graph_layer/grammar.py'),
|
|
42
|
+
'cjm_context_graph_layer.grammar.spine_edges': ( 'grammar.html#spine_edges',
|
|
43
|
+
'cjm_context_graph_layer/grammar.py')},
|
|
44
|
+
'cjm_context_graph_layer.identity': { 'cjm_context_graph_layer.identity.canonical_part': ( 'identity.html#canonical_part',
|
|
45
|
+
'cjm_context_graph_layer/identity.py'),
|
|
46
|
+
'cjm_context_graph_layer.identity.derive_edge_id': ( 'identity.html#derive_edge_id',
|
|
47
|
+
'cjm_context_graph_layer/identity.py'),
|
|
48
|
+
'cjm_context_graph_layer.identity.derive_node_id': ( 'identity.html#derive_node_id',
|
|
49
|
+
'cjm_context_graph_layer/identity.py')},
|
|
50
|
+
'cjm_context_graph_layer.ops': { 'cjm_context_graph_layer.ops.ExtendResult': ( 'ops.html#extendresult',
|
|
51
|
+
'cjm_context_graph_layer/ops.py'),
|
|
52
|
+
'cjm_context_graph_layer.ops.GraphIntegrityError': ( 'ops.html#graphintegrityerror',
|
|
53
|
+
'cjm_context_graph_layer/ops.py'),
|
|
54
|
+
'cjm_context_graph_layer.ops._source_hashes': ( 'ops.html#_source_hashes',
|
|
55
|
+
'cjm_context_graph_layer/ops.py'),
|
|
56
|
+
'cjm_context_graph_layer.ops.extend_graph': ( 'ops.html#extend_graph',
|
|
57
|
+
'cjm_context_graph_layer/ops.py'),
|
|
58
|
+
'cjm_context_graph_layer.ops.graph_task': ( 'ops.html#graph_task',
|
|
59
|
+
'cjm_context_graph_layer/ops.py'),
|
|
60
|
+
'cjm_context_graph_layer.ops.node_identity_mismatch': ( 'ops.html#node_identity_mismatch',
|
|
61
|
+
'cjm_context_graph_layer/ops.py'),
|
|
62
|
+
'cjm_context_graph_layer.ops.partition_by_presence': ( 'ops.html#partition_by_presence',
|
|
63
|
+
'cjm_context_graph_layer/ops.py')}}}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Provenance-by-declaration: host logic stays readable Python in the workflow core and DECLARES its provenance contributions as a `Derivation` event node (+ DERIVED_FROM input edges, PRODUCED output edges). This recovers audit completeness without the substrate executing host logic (pass-2 Thread 4's false-dichotomy resolution). The substrate stays untouched: declarations read composition/job ids from the outside.
|
|
2
|
+
|
|
3
|
+
Docs: https://cj-mills.github.io/cjm-context-graph-layerdeclare.html.md"""
|
|
4
|
+
|
|
5
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/declare.ipynb.
|
|
6
|
+
|
|
7
|
+
# %% auto #0
|
|
8
|
+
__all__ = ['Derivation', 'derivation_to_graph']
|
|
9
|
+
|
|
10
|
+
# %% ../nbs/declare.ipynb #97664d2b
|
|
11
|
+
import time
|
|
12
|
+
from uuid import uuid4
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
from .grammar import OverlayRelations, attribution, make_edge
|
|
17
|
+
|
|
18
|
+
# %% ../nbs/declare.ipynb #c11abbce
|
|
19
|
+
@dataclass
|
|
20
|
+
class Derivation:
|
|
21
|
+
"""One host-logic transformation event, declared for the audit trail.
|
|
22
|
+
|
|
23
|
+
Coarse-grained by design: the adopter passes the ids that anchor the event
|
|
24
|
+
(e.g. the Transcript nodes consumed + the Source whose spine was produced),
|
|
25
|
+
not every fine-grained output (per-node provenance already rides each
|
|
26
|
+
node's SourceRefs — duplicating it here would re-create topology, the
|
|
27
|
+
Thread-2 no-derived_from rule).
|
|
28
|
+
"""
|
|
29
|
+
actor: str # Who ran it (e.g. "host:cjm-transcript-decomp-core")
|
|
30
|
+
method: str # The transformation (e.g. "alignment-fold/v1")
|
|
31
|
+
input_ids: List[str] = field(default_factory=list) # Graph node ids consumed
|
|
32
|
+
output_ids: List[str] = field(default_factory=list) # Graph node ids produced (coarse anchors)
|
|
33
|
+
asserted_at: Optional[float] = None # Unix timestamp; None = now at to_graph time
|
|
34
|
+
composition_id: Optional[str] = None # Substrate composition run id, if any
|
|
35
|
+
job_ids: List[str] = field(default_factory=list) # Member job ids, if any
|
|
36
|
+
properties: Dict[str, Any] = field(default_factory=dict) # Extra event properties
|
|
37
|
+
|
|
38
|
+
# %% ../nbs/declare.ipynb #7c77965e
|
|
39
|
+
def derivation_to_graph(
|
|
40
|
+
d: Derivation, # The declared event
|
|
41
|
+
derivation_id: Optional[str] = None, # Explicit node id; None = generated (events are asserted, not re-derivable)
|
|
42
|
+
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: # (event node wire dict, edges)
|
|
43
|
+
"""Materialize a declaration as one event node + DERIVED_FROM / PRODUCED edges.
|
|
44
|
+
|
|
45
|
+
The event node gets a GENERATED id (asserted/decision class — the
|
|
46
|
+
FLIP-TRIGGER-protected kind); its edges are deterministic per
|
|
47
|
+
(event, anchor, relation)."""
|
|
48
|
+
node_id = derivation_id or str(uuid4())
|
|
49
|
+
props: Dict[str, Any] = attribution(d.actor, method=d.method, asserted_at=d.asserted_at)
|
|
50
|
+
if d.composition_id:
|
|
51
|
+
props["composition_id"] = d.composition_id
|
|
52
|
+
if d.job_ids:
|
|
53
|
+
props["job_ids"] = list(d.job_ids)
|
|
54
|
+
props.update(d.properties)
|
|
55
|
+
node = {"id": node_id, "label": "Derivation", "properties": props, "sources": []}
|
|
56
|
+
edges = (
|
|
57
|
+
[make_edge(node_id, i, OverlayRelations.DERIVED_FROM, {"role": "input"}) for i in d.input_ids]
|
|
58
|
+
+ [make_edge(node_id, o, OverlayRelations.PRODUCED, {"role": "output"}) for o in d.output_ids]
|
|
59
|
+
)
|
|
60
|
+
return node, edges
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""The spine-edit operation vocabulary (`prune` / `replace_text` / `boundary_shift`) + supersession resolution + the effective-view projection. These are generic operations on any NEXT-chained text spine; correction workflows carry them in overlay-node payloads, and the projection interprets them at read time (migrates correction-core C11/C16 onto the layer).
|
|
2
|
+
|
|
3
|
+
Docs: https://cj-mills.github.io/cjm-context-graph-layeredits.html.md"""
|
|
4
|
+
|
|
5
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/edits.ipynb.
|
|
6
|
+
|
|
7
|
+
# %% auto #0
|
|
8
|
+
__all__ = ['EDIT_OPS', 'SpineEditError', 'SpineUnit', 'SpineEdit', 'resolve_active', 'project_effective_spine']
|
|
9
|
+
|
|
10
|
+
# %% ../nbs/edits.ipynb #68a6f6d3
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Any, Dict, Iterable, List, Set, Tuple
|
|
13
|
+
|
|
14
|
+
# %% ../nbs/edits.ipynb #376d25b0
|
|
15
|
+
# Reserved spine-edit operation vocabulary (reserve-enum-values-up-front):
|
|
16
|
+
# boundary_shift is locked in NOW per the where-graph-begins resolution even
|
|
17
|
+
# though no driver produces it yet — the persisted decision preserves the
|
|
18
|
+
# alignment-error-vs-transcription-error distinction.
|
|
19
|
+
EDIT_OPS = ("prune", "replace_text", "boundary_shift")
|
|
20
|
+
|
|
21
|
+
# %% ../nbs/edits.ipynb #d771e0ec
|
|
22
|
+
class SpineEditError(ValueError):
|
|
23
|
+
"""A spine edit could not be validated or applied (loud, never silent)."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
# %% ../nbs/edits.ipynb #95ad3798
|
|
27
|
+
@dataclass
|
|
28
|
+
class SpineUnit:
|
|
29
|
+
"""Minimal projection unit: one spine position with its effective text."""
|
|
30
|
+
id: str # Layer-0 segment node id
|
|
31
|
+
text: str # Effective text at this position
|
|
32
|
+
|
|
33
|
+
# %% ../nbs/edits.ipynb #e6875de1
|
|
34
|
+
@dataclass
|
|
35
|
+
class SpineEdit:
|
|
36
|
+
"""One spine-edit decision, as carried in an overlay node's payload.
|
|
37
|
+
|
|
38
|
+
`op` semantics:
|
|
39
|
+
- `prune`: drop `targets` from the effective view (payload unused).
|
|
40
|
+
- `replace_text`: payload `{"text": ...}` replaces each target's text.
|
|
41
|
+
- `boundary_shift`: payload `{"boundary_after": <left segment id>,
|
|
42
|
+
"text": <moved text>, "direction": "push"|"pull"}` moves text across the
|
|
43
|
+
boundary between two adjacent FIXED positions (push = from the end of the
|
|
44
|
+
left unit to the start of the right; pull = the mirror). 1:1 alignment is
|
|
45
|
+
maintained continuously — count and positions never change.
|
|
46
|
+
"""
|
|
47
|
+
edit_id: str # Carrying overlay node id (supersession anchor)
|
|
48
|
+
op: str # One of EDIT_OPS
|
|
49
|
+
targets: List[str] = field(default_factory=list) # Layer-0 segment node ids the edit applies to
|
|
50
|
+
payload: Dict[str, Any] = field(default_factory=dict) # Op-specific payload (see above)
|
|
51
|
+
created_at: float = 0.0 # Decision timestamp (application order + latest-wins tiebreak)
|
|
52
|
+
|
|
53
|
+
def __post_init__(self):
|
|
54
|
+
if self.op not in EDIT_OPS:
|
|
55
|
+
raise SpineEditError(f"unknown spine-edit op: {self.op!r} (known: {EDIT_OPS})")
|
|
56
|
+
|
|
57
|
+
def to_dict(self) -> Dict[str, Any]: # Payload-ready dict
|
|
58
|
+
"""Serialize for carriage in an overlay node payload."""
|
|
59
|
+
return {"edit_id": self.edit_id, "op": self.op, "targets": list(self.targets),
|
|
60
|
+
"payload": dict(self.payload), "created_at": self.created_at}
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def from_dict(cls, d: Dict[str, Any]) -> "SpineEdit": # Reconstructed edit
|
|
64
|
+
"""Reconstruct from a payload dict."""
|
|
65
|
+
return cls(edit_id=d["edit_id"], op=d["op"], targets=list(d.get("targets") or []),
|
|
66
|
+
payload=dict(d.get("payload") or {}), created_at=float(d.get("created_at") or 0.0))
|
|
67
|
+
|
|
68
|
+
# %% ../nbs/edits.ipynb #d0b1d211
|
|
69
|
+
def resolve_active(
|
|
70
|
+
edit_ids: Iterable[str], # Candidate overlay node ids
|
|
71
|
+
supersedes_pairs: Iterable[Tuple[str, str]], # (superseder_id, superseded_id) SUPERSEDES edges
|
|
72
|
+
) -> Set[str]: # Active (non-superseded) ids
|
|
73
|
+
"""Resolve the active set under append-only supersession.
|
|
74
|
+
|
|
75
|
+
An id is superseded iff it is the TARGET of any SUPERSEDES edge — chains
|
|
76
|
+
resolve naturally (C supersedes B supersedes A leaves only C active), and
|
|
77
|
+
nothing is ever mutated (the C16 semantics, now layer-owned).
|
|
78
|
+
"""
|
|
79
|
+
superseded = {target for _, target in supersedes_pairs}
|
|
80
|
+
return {eid for eid in edit_ids if eid not in superseded}
|
|
81
|
+
|
|
82
|
+
# %% ../nbs/edits.ipynb #0ced23b9
|
|
83
|
+
def project_effective_spine(
|
|
84
|
+
units: List[SpineUnit], # Ordered layer-0 spine (immutable input)
|
|
85
|
+
edits: List[SpineEdit], # ACTIVE edits to apply (resolve supersession first)
|
|
86
|
+
) -> List[SpineUnit]: # New effective spine (input never mutated)
|
|
87
|
+
"""Project the effective view: layer-0 + active edits, resolved at read time.
|
|
88
|
+
|
|
89
|
+
Edits apply in (created_at, edit_id) order over the evolving text state, so
|
|
90
|
+
later decisions see earlier ones' effects and replace_text latest-wins
|
|
91
|
+
emerges from ordering. Prunes drop positions at the end (a boundary_shift
|
|
92
|
+
or replace recorded before a later prune still applies cleanly).
|
|
93
|
+
boundary_shift is STRICT: if the current text no longer carries the moved
|
|
94
|
+
text verbatim at the boundary, the projection fails loudly rather than
|
|
95
|
+
guessing (SpineEditError).
|
|
96
|
+
"""
|
|
97
|
+
order = {u.id: i for i, u in enumerate(units)}
|
|
98
|
+
texts = {u.id: u.text for u in units}
|
|
99
|
+
pruned: Set[str] = set()
|
|
100
|
+
|
|
101
|
+
for e in sorted(edits, key=lambda e: (e.created_at, e.edit_id)):
|
|
102
|
+
if e.op == "prune":
|
|
103
|
+
pruned.update(e.targets)
|
|
104
|
+
elif e.op == "replace_text":
|
|
105
|
+
new_text = e.payload.get("text", "")
|
|
106
|
+
for t in e.targets:
|
|
107
|
+
if t in texts:
|
|
108
|
+
texts[t] = new_text
|
|
109
|
+
elif e.op == "boundary_shift":
|
|
110
|
+
left = e.payload.get("boundary_after")
|
|
111
|
+
moved = e.payload.get("text", "")
|
|
112
|
+
direction = e.payload.get("direction", "push")
|
|
113
|
+
if left not in order:
|
|
114
|
+
raise SpineEditError(f"boundary_shift: unknown boundary_after {left!r}")
|
|
115
|
+
idx = order[left]
|
|
116
|
+
if idx + 1 >= len(units):
|
|
117
|
+
raise SpineEditError("boundary_shift: no unit after the boundary")
|
|
118
|
+
right = units[idx + 1].id
|
|
119
|
+
if direction == "push":
|
|
120
|
+
if not texts[left].endswith(moved):
|
|
121
|
+
raise SpineEditError(f"boundary_shift push: left text does not end with the moved text ({e.edit_id})")
|
|
122
|
+
texts[left] = texts[left][: len(texts[left]) - len(moved)]
|
|
123
|
+
texts[right] = moved + texts[right]
|
|
124
|
+
elif direction == "pull":
|
|
125
|
+
if not texts[right].startswith(moved):
|
|
126
|
+
raise SpineEditError(f"boundary_shift pull: right text does not start with the moved text ({e.edit_id})")
|
|
127
|
+
texts[right] = texts[right][len(moved):]
|
|
128
|
+
texts[left] = texts[left] + moved
|
|
129
|
+
else:
|
|
130
|
+
raise SpineEditError(f"boundary_shift: unknown direction {direction!r}")
|
|
131
|
+
|
|
132
|
+
return [SpineUnit(u.id, texts[u.id]) for u in units if u.id not in pruned]
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""The domain-neutral context-graph grammar: spine relations (NEXT / PART_OF / STARTS_WITH, recurring fractally at every layer), overlay relations (SUPERSEDES / DERIVED_FROM / PRODUCED), root kinds, and the standardized attribution fields.
|
|
2
|
+
|
|
3
|
+
Docs: https://cj-mills.github.io/cjm-context-graph-layergrammar.html.md"""
|
|
4
|
+
|
|
5
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/grammar.ipynb.
|
|
6
|
+
|
|
7
|
+
# %% auto #0
|
|
8
|
+
__all__ = ['ROOT_KINDS', 'SpineRelations', 'OverlayRelations', 'attribution', 'make_edge', 'spine_edges', 'grouped_spine_edges']
|
|
9
|
+
|
|
10
|
+
# %% ../nbs/grammar.ipynb #f66243ba
|
|
11
|
+
import time
|
|
12
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
13
|
+
|
|
14
|
+
from .identity import derive_edge_id
|
|
15
|
+
|
|
16
|
+
# %% ../nbs/grammar.ipynb #b4d7b6c7
|
|
17
|
+
class SpineRelations:
|
|
18
|
+
"""Structural spine relations, reused fractally at every layer
|
|
19
|
+
(Source -> AudioSegment -> Segment today; series -> episode tomorrow)."""
|
|
20
|
+
NEXT = "NEXT" # Sequential order among siblings
|
|
21
|
+
PART_OF = "PART_OF" # Containment (child -> parent)
|
|
22
|
+
STARTS_WITH = "STARTS_WITH" # Entry point (parent -> first child)
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def all(cls) -> list: # All spine relation types
|
|
26
|
+
"""All spine relation types."""
|
|
27
|
+
return [cls.NEXT, cls.PART_OF, cls.STARTS_WITH]
|
|
28
|
+
|
|
29
|
+
# %% ../nbs/grammar.ipynb #8945de1c
|
|
30
|
+
class OverlayRelations:
|
|
31
|
+
"""Overlay/derivation relations — the trust grammar shared by every
|
|
32
|
+
workflow's graph extensions."""
|
|
33
|
+
SUPERSEDES = "SUPERSEDES" # Newer overlay node -> the prior one it replaces (append-only undo)
|
|
34
|
+
DERIVED_FROM = "DERIVED_FROM" # Derived/overlay node -> the node(s) it derives from / consumed
|
|
35
|
+
PRODUCED = "PRODUCED" # Derivation event -> the node(s) it produced
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def all(cls) -> list: # All overlay relation types
|
|
39
|
+
"""All overlay relation types."""
|
|
40
|
+
return [cls.SUPERSEDES, cls.DERIVED_FROM, cls.PRODUCED]
|
|
41
|
+
|
|
42
|
+
# %% ../nbs/grammar.ipynb #8eab5d13
|
|
43
|
+
# The three provenance-root kinds (where-graph-begins resolution): knowledge
|
|
44
|
+
# enters the graph anchored one of these ways.
|
|
45
|
+
ROOT_KINDS = ("ingested", "asserted", "derived")
|
|
46
|
+
|
|
47
|
+
# %% ../nbs/grammar.ipynb #87da6c7e
|
|
48
|
+
def attribution(
|
|
49
|
+
actor: str, # Who asserted/produced this (e.g. "human", "agent:claude", "capability:whisper")
|
|
50
|
+
method: Optional[str] = None, # How (e.g. "transcribe", "alignment-fold/v1")
|
|
51
|
+
asserted_at: Optional[float] = None, # Unix timestamp; None = now
|
|
52
|
+
) -> Dict[str, Any]: # Standardized attribution property dict
|
|
53
|
+
"""Standardized attribution fields for derived/asserted nodes.
|
|
54
|
+
|
|
55
|
+
Every derivation/assertion carries the same three fields, so audit reads
|
|
56
|
+
are uniform across workflows (P13's hand-rolled Connection attribution
|
|
57
|
+
graduated into the grammar).
|
|
58
|
+
"""
|
|
59
|
+
out: Dict[str, Any] = {"actor": actor, "asserted_at": asserted_at if asserted_at is not None else time.time()}
|
|
60
|
+
if method is not None:
|
|
61
|
+
out["method"] = method
|
|
62
|
+
return out
|
|
63
|
+
|
|
64
|
+
# %% ../nbs/grammar.ipynb #43ed17f3
|
|
65
|
+
def make_edge(
|
|
66
|
+
source_id: str, # Edge source node id
|
|
67
|
+
target_id: str, # Edge target node id
|
|
68
|
+
relation_type: str, # Relation type (SpineRelations / OverlayRelations / domain)
|
|
69
|
+
properties: Optional[Dict[str, Any]] = None, # Optional edge properties (e.g. {"role": "foreshadow"})
|
|
70
|
+
edge_id: Optional[str] = None, # Explicit id; None = deterministic from the triple
|
|
71
|
+
) -> Dict[str, Any]: # Edge wire dict
|
|
72
|
+
"""Build an edge wire dict with a deterministic id by default."""
|
|
73
|
+
return {
|
|
74
|
+
"id": edge_id or derive_edge_id(source_id, target_id, relation_type),
|
|
75
|
+
"source_id": source_id,
|
|
76
|
+
"target_id": target_id,
|
|
77
|
+
"relation_type": relation_type,
|
|
78
|
+
"properties": properties or {},
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# %% ../nbs/grammar.ipynb #047d3e7e
|
|
82
|
+
def spine_edges(
|
|
83
|
+
parent_id: str, # Parent node id
|
|
84
|
+
child_ids: List[str], # Ordered child node ids
|
|
85
|
+
) -> List[Dict[str, Any]]: # Edge wire dicts
|
|
86
|
+
"""The uniform spine pattern at any layer: PART_OF child->parent for each
|
|
87
|
+
child + NEXT chain among children + STARTS_WITH parent->first child."""
|
|
88
|
+
edges: List[Dict[str, Any]] = []
|
|
89
|
+
if child_ids:
|
|
90
|
+
edges.append(make_edge(parent_id, child_ids[0], SpineRelations.STARTS_WITH))
|
|
91
|
+
for i, cid in enumerate(child_ids):
|
|
92
|
+
edges.append(make_edge(cid, parent_id, SpineRelations.PART_OF))
|
|
93
|
+
if i < len(child_ids) - 1:
|
|
94
|
+
edges.append(make_edge(cid, child_ids[i + 1], SpineRelations.NEXT))
|
|
95
|
+
return edges
|
|
96
|
+
|
|
97
|
+
# %% ../nbs/grammar.ipynb #71f01b78
|
|
98
|
+
def grouped_spine_edges(
|
|
99
|
+
groups: List[Tuple[str, List[str]]], # (parent id, ordered child ids) per group, groups in spine order
|
|
100
|
+
) -> List[Dict[str, Any]]: # Edge wire dicts
|
|
101
|
+
"""Spine edges for a fine layer grouped under coarse parents.
|
|
102
|
+
|
|
103
|
+
PART_OF goes to the OWNING parent; STARTS_WITH per parent -> its first
|
|
104
|
+
child (the coarse-seam jump anchor); the NEXT chain is GLOBAL across group
|
|
105
|
+
boundaries — fine continuity crosses coarse boundaries (agent span reads).
|
|
106
|
+
"""
|
|
107
|
+
edges: List[Dict[str, Any]] = []
|
|
108
|
+
flat: List[str] = []
|
|
109
|
+
for parent_id, child_ids in groups:
|
|
110
|
+
if child_ids:
|
|
111
|
+
edges.append(make_edge(parent_id, child_ids[0], SpineRelations.STARTS_WITH))
|
|
112
|
+
for cid in child_ids:
|
|
113
|
+
edges.append(make_edge(cid, parent_id, SpineRelations.PART_OF))
|
|
114
|
+
flat.append(cid)
|
|
115
|
+
for i in range(len(flat) - 1):
|
|
116
|
+
edges.append(make_edge(flat[i], flat[i + 1], SpineRelations.NEXT))
|
|
117
|
+
return edges
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Deterministic node/edge identity: UUIDv5 over canonical identity tuples (stage-5 ratified rule: a node's id derives from what makes it THE same node across re-derivation, never from its correctable content).
|
|
2
|
+
|
|
3
|
+
Docs: https://cj-mills.github.io/cjm-context-graph-layeridentity.html.md"""
|
|
4
|
+
|
|
5
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/identity.ipynb.
|
|
6
|
+
|
|
7
|
+
# %% auto #0
|
|
8
|
+
__all__ = ['LAYER_ID_NAMESPACE', 'IDENTITY_SEPARATOR', 'canonical_part', 'derive_node_id', 'derive_edge_id']
|
|
9
|
+
|
|
10
|
+
# %% ../nbs/identity.ipynb #48c3ba87
|
|
11
|
+
import uuid
|
|
12
|
+
from typing import Union
|
|
13
|
+
|
|
14
|
+
# %% ../nbs/identity.ipynb #8ae437f7
|
|
15
|
+
# Fixed namespace for all context-graph layer ids. Derived once from the DNS
|
|
16
|
+
# namespace so the value is stable and reproducible; every deterministic id in
|
|
17
|
+
# the ecosystem hangs off this constant.
|
|
18
|
+
LAYER_ID_NAMESPACE = uuid.uuid5(uuid.NAMESPACE_DNS, "context-graph.cj-mills.com")
|
|
19
|
+
|
|
20
|
+
# Unit separator: cannot appear in paths/hashes/relation names, so joined
|
|
21
|
+
# identity parts can never collide across part boundaries.
|
|
22
|
+
IDENTITY_SEPARATOR = "\x1f"
|
|
23
|
+
|
|
24
|
+
# %% ../nbs/identity.ipynb #2f674553
|
|
25
|
+
def canonical_part(
|
|
26
|
+
value: Union[str, int, float], # One identity-tuple part
|
|
27
|
+
) -> str: # Canonical string form used inside the UUIDv5 name
|
|
28
|
+
"""Render one identity-tuple part canonically.
|
|
29
|
+
|
|
30
|
+
Floats use `repr` (shortest round-trip — identical floats from the same
|
|
31
|
+
deterministic computation render identically); ints use `str`; strings pass
|
|
32
|
+
through. Anything else (including bool, whose int-ness is ambiguous) is
|
|
33
|
+
rejected loudly: identity inputs must be deliberate.
|
|
34
|
+
"""
|
|
35
|
+
if isinstance(value, bool):
|
|
36
|
+
raise TypeError("bool is not a valid identity part (ambiguous int)")
|
|
37
|
+
if isinstance(value, float):
|
|
38
|
+
return repr(value)
|
|
39
|
+
if isinstance(value, int):
|
|
40
|
+
return str(value)
|
|
41
|
+
if isinstance(value, str):
|
|
42
|
+
return value
|
|
43
|
+
raise TypeError(f"unsupported identity part type: {type(value).__name__}")
|
|
44
|
+
|
|
45
|
+
# %% ../nbs/identity.ipynb #cc1d398c
|
|
46
|
+
def derive_node_id(
|
|
47
|
+
kind: str, # Node kind discriminator (e.g. "source", "audio-segment")
|
|
48
|
+
*parts: Union[str, int, float], # The identity tuple (positional, order-significant)
|
|
49
|
+
) -> str: # Deterministic UUID string (UUIDv5)
|
|
50
|
+
"""Derive a deterministic node id from a kind + identity tuple.
|
|
51
|
+
|
|
52
|
+
Same kind + same parts always yields the same id, across processes and
|
|
53
|
+
re-derivations — re-derived graphs reproduce their node ids, so cross-graph
|
|
54
|
+
references survive a rebuild (the G3a fix made structural). Content hashes
|
|
55
|
+
belong in SourceRefs, NOT here: identity is position/provenance, never the
|
|
56
|
+
correctable content.
|
|
57
|
+
"""
|
|
58
|
+
name = IDENTITY_SEPARATOR.join([kind, *(canonical_part(p) for p in parts)])
|
|
59
|
+
return str(uuid.uuid5(LAYER_ID_NAMESPACE, name))
|
|
60
|
+
|
|
61
|
+
# %% ../nbs/identity.ipynb #b603404f
|
|
62
|
+
def derive_edge_id(
|
|
63
|
+
source_id: str, # Edge source node id
|
|
64
|
+
target_id: str, # Edge target node id
|
|
65
|
+
relation_type: str, # Relation type (e.g. "NEXT")
|
|
66
|
+
) -> str: # Deterministic UUID string
|
|
67
|
+
"""Derive a deterministic edge id from (source, target, relation).
|
|
68
|
+
|
|
69
|
+
Layer-0 structural edges are unique per (source, target, relation), so the
|
|
70
|
+
triple IS the identity — re-derivation reproduces edge ids the same way it
|
|
71
|
+
reproduces node ids.
|
|
72
|
+
"""
|
|
73
|
+
return derive_node_id("edge", source_id, target_id, relation_type)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Queue-touching layer operations: the shared `graph_task` helper (task channel), idempotent emission (emit-if-absent + verify-if-present), and `extend_graph` — the one primitive every graph-extending workflow commits through. Deterministic ids (see `identity`) make idempotency a presence check instead of a search.
|
|
2
|
+
|
|
3
|
+
Docs: https://cj-mills.github.io/cjm-context-graph-layerops.html.md"""
|
|
4
|
+
|
|
5
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/ops.ipynb.
|
|
6
|
+
|
|
7
|
+
# %% auto #0
|
|
8
|
+
__all__ = ['GRAPH_TASK', 'graph_task', 'GraphIntegrityError', 'node_identity_mismatch', 'partition_by_presence', 'ExtendResult',
|
|
9
|
+
'extend_graph']
|
|
10
|
+
|
|
11
|
+
# %% ../nbs/ops.ipynb #189a5452
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
14
|
+
|
|
15
|
+
from cjm_plugin_system.core.queue import JobQueue, JobStatus
|
|
16
|
+
# Importing the typed query/result classes IS the host-side wire registration (F8).
|
|
17
|
+
from cjm_context_graph_primitives.query import NodeQuery, EdgeQuery, NodeQueryResult, EdgeQueryResult
|
|
18
|
+
|
|
19
|
+
# %% ../nbs/ops.ipynb #5756d771
|
|
20
|
+
GRAPH_TASK = "graph-storage" # The graph-storage adapter task (explicit task channel, stage 4)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def graph_task(
|
|
24
|
+
queue: JobQueue, # Started job queue
|
|
25
|
+
graph_id: str, # Graph-storage capability instance id
|
|
26
|
+
method: str, # Adapter method (e.g. "query_nodes", "add_nodes")
|
|
27
|
+
**kwargs, # Typed-method kwargs (wire dicts ok; the in-worker adapter normalizes)
|
|
28
|
+
) -> Any: # Typed task result (wire-decoded host-side)
|
|
29
|
+
"""Invoke a graph-storage adapter method through the queue's task channel.
|
|
30
|
+
|
|
31
|
+
THE shared copy: decomp-core and correction-core's per-core helpers migrate
|
|
32
|
+
onto this one (graph ops stay on the queue path for telemetry/cancellation
|
|
33
|
+
per D7/Thread-5 lock 5).
|
|
34
|
+
"""
|
|
35
|
+
jid = await queue.submit(graph_id, task=GRAPH_TASK, method=method, **kwargs)
|
|
36
|
+
job = await queue.wait_for_job(jid)
|
|
37
|
+
if job.status != JobStatus.completed:
|
|
38
|
+
raise RuntimeError(f"{graph_id} {method} {job.status}: {job.error}")
|
|
39
|
+
return job.result
|
|
40
|
+
|
|
41
|
+
# %% ../nbs/ops.ipynb #77646074
|
|
42
|
+
class GraphIntegrityError(RuntimeError):
|
|
43
|
+
"""An emitted node collided with an existing node of different identity content.
|
|
44
|
+
|
|
45
|
+
Raised by verify-if-present: same deterministic id but mismatched label or
|
|
46
|
+
provenance content hashes means the identity tuple and the content have
|
|
47
|
+
diverged — never overwrite silently."""
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
# %% ../nbs/ops.ipynb #def7e7bb
|
|
51
|
+
def _source_hashes(sources: Optional[List[Any]]) -> Set[str]:
|
|
52
|
+
"""Content-hash set from a node's sources (typed SourceRefs or wire dicts)."""
|
|
53
|
+
out: Set[str] = set()
|
|
54
|
+
for s in sources or []:
|
|
55
|
+
h = s.get("content_hash") if isinstance(s, dict) else getattr(s, "content_hash", None)
|
|
56
|
+
if h:
|
|
57
|
+
out.add(h)
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def node_identity_mismatch(
|
|
62
|
+
existing: Any, # Existing node (typed GraphNode or wire dict)
|
|
63
|
+
new: Dict[str, Any], # New node wire dict being emitted
|
|
64
|
+
) -> Optional[str]: # Mismatch description, or None when compatible
|
|
65
|
+
"""Verify-if-present check: label + sources content-hash set must match."""
|
|
66
|
+
ex_label = existing.get("label") if isinstance(existing, dict) else getattr(existing, "label", None)
|
|
67
|
+
if ex_label != new.get("label"):
|
|
68
|
+
return f"label mismatch: existing {ex_label!r} != new {new.get('label')!r}"
|
|
69
|
+
ex_sources = existing.get("sources") if isinstance(existing, dict) else getattr(existing, "sources", None)
|
|
70
|
+
ex_hashes, new_hashes = _source_hashes(ex_sources), _source_hashes(new.get("sources"))
|
|
71
|
+
if ex_hashes != new_hashes:
|
|
72
|
+
return f"sources content-hash mismatch: existing {sorted(ex_hashes)} != new {sorted(new_hashes)}"
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
# %% ../nbs/ops.ipynb #96cab756
|
|
76
|
+
def partition_by_presence(
|
|
77
|
+
items: List[Dict[str, Any]], # Wire dicts carrying "id"
|
|
78
|
+
existing_ids: Set[str], # Ids already present in the graph
|
|
79
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: # (absent, present)
|
|
80
|
+
"""Split wire dicts into absent (to add) and present (to verify)."""
|
|
81
|
+
absent = [it for it in items if it["id"] not in existing_ids]
|
|
82
|
+
present = [it for it in items if it["id"] in existing_ids]
|
|
83
|
+
return absent, present
|
|
84
|
+
|
|
85
|
+
# %% ../nbs/ops.ipynb #c3d8d7da
|
|
86
|
+
@dataclass
|
|
87
|
+
class ExtendResult:
|
|
88
|
+
"""Outcome of one idempotent extend_graph commit."""
|
|
89
|
+
nodes_added: int = 0 # Nodes newly created
|
|
90
|
+
nodes_verified: int = 0 # Nodes already present, identity-verified
|
|
91
|
+
edges_added: int = 0 # Edges newly created
|
|
92
|
+
edges_existing: int = 0 # Edges already present (skipped)
|
|
93
|
+
added_node_ids: List[str] = field(default_factory=list) # Ids of created nodes
|
|
94
|
+
added_edge_ids: List[str] = field(default_factory=list) # Ids of created edges
|
|
95
|
+
|
|
96
|
+
# %% ../nbs/ops.ipynb #bcda5566
|
|
97
|
+
async def extend_graph(
|
|
98
|
+
queue: JobQueue, # Started job queue
|
|
99
|
+
graph_id: str, # Graph-storage capability id
|
|
100
|
+
nodes: List[Dict[str, Any]], # Node wire dicts (deterministic ids for layer-0; generated for decisions)
|
|
101
|
+
edges: List[Dict[str, Any]], # Edge wire dicts
|
|
102
|
+
) -> ExtendResult: # Counts + created ids
|
|
103
|
+
"""Idempotently extend the graph: emit-if-absent + verify-if-present.
|
|
104
|
+
|
|
105
|
+
Deterministic ids make idempotency a batched presence check (2 reads + at
|
|
106
|
+
most 2 writes per call — the C17 lesson applied to the write path): nodes
|
|
107
|
+
already present are verified against the new emission (label + provenance
|
|
108
|
+
content hashes) and a mismatch raises `GraphIntegrityError` LOUDLY; absent
|
|
109
|
+
nodes/edges are added. Cache-hit re-emission therefore collides into a
|
|
110
|
+
verified no-op (stress item 4), and a re-derived spine reproduces — never
|
|
111
|
+
duplicates — its layer-0 (stress item 1).
|
|
112
|
+
"""
|
|
113
|
+
result = ExtendResult()
|
|
114
|
+
|
|
115
|
+
if nodes:
|
|
116
|
+
res = await graph_task(queue, graph_id, "query_nodes",
|
|
117
|
+
query=NodeQuery(ids=[n["id"] for n in nodes]).to_dict())
|
|
118
|
+
existing = {gn.id: gn for gn in (res.nodes or [])}
|
|
119
|
+
absent, present = partition_by_presence(nodes, set(existing))
|
|
120
|
+
for n in present:
|
|
121
|
+
msg = node_identity_mismatch(existing[n["id"]], n)
|
|
122
|
+
if msg:
|
|
123
|
+
raise GraphIntegrityError(f"node {n['id']}: {msg}")
|
|
124
|
+
result.nodes_verified = len(present)
|
|
125
|
+
if absent:
|
|
126
|
+
added = await graph_task(queue, graph_id, "add_nodes", nodes=absent)
|
|
127
|
+
result.added_node_ids = list(added or [])
|
|
128
|
+
result.nodes_added = len(result.added_node_ids)
|
|
129
|
+
|
|
130
|
+
if edges:
|
|
131
|
+
eres = await graph_task(queue, graph_id, "query_edges",
|
|
132
|
+
query=EdgeQuery(ids=[e["id"] for e in edges], project=["id"]).to_dict())
|
|
133
|
+
existing_eids = {r["id"] for r in (eres.rows or [])}
|
|
134
|
+
absent_edges = [e for e in edges if e["id"] not in existing_eids]
|
|
135
|
+
result.edges_existing = len(edges) - len(absent_edges)
|
|
136
|
+
if absent_edges:
|
|
137
|
+
added = await graph_task(queue, graph_id, "add_edges", edges=absent_edges)
|
|
138
|
+
result.added_edge_ids = list(added or [])
|
|
139
|
+
result.edges_added = len(result.added_edge_ids)
|
|
140
|
+
|
|
141
|
+
return result
|