cjm-context-graph-primitives 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"
@@ -0,0 +1,191 @@
1
+ # Autogenerated by nbdev
2
+
3
+ d = { 'settings': { 'branch': 'main',
4
+ 'doc_baseurl': '/cjm-context-graph-primitives',
5
+ 'doc_host': 'https://cj-mills.github.io',
6
+ 'git_url': 'https://github.com/cj-mills/cjm-context-graph-primitives',
7
+ 'lib_path': 'cjm_context_graph_primitives'},
8
+ 'syms': { 'cjm_context_graph_primitives.graph': { 'cjm_context_graph_primitives.graph.GraphContext': ( 'graph.html#graphcontext',
9
+ 'cjm_context_graph_primitives/graph.py'),
10
+ 'cjm_context_graph_primitives.graph.GraphContext.from_dict': ( 'graph.html#graphcontext.from_dict',
11
+ 'cjm_context_graph_primitives/graph.py'),
12
+ 'cjm_context_graph_primitives.graph.GraphContext.from_file': ( 'graph.html#graphcontext.from_file',
13
+ 'cjm_context_graph_primitives/graph.py'),
14
+ 'cjm_context_graph_primitives.graph.GraphContext.to_dict': ( 'graph.html#graphcontext.to_dict',
15
+ 'cjm_context_graph_primitives/graph.py'),
16
+ 'cjm_context_graph_primitives.graph.GraphContext.to_temp_file': ( 'graph.html#graphcontext.to_temp_file',
17
+ 'cjm_context_graph_primitives/graph.py'),
18
+ 'cjm_context_graph_primitives.graph.GraphEdge': ( 'graph.html#graphedge',
19
+ 'cjm_context_graph_primitives/graph.py'),
20
+ 'cjm_context_graph_primitives.graph.GraphEdge.from_dict': ( 'graph.html#graphedge.from_dict',
21
+ 'cjm_context_graph_primitives/graph.py'),
22
+ 'cjm_context_graph_primitives.graph.GraphEdge.to_dict': ( 'graph.html#graphedge.to_dict',
23
+ 'cjm_context_graph_primitives/graph.py'),
24
+ 'cjm_context_graph_primitives.graph.GraphNode': ( 'graph.html#graphnode',
25
+ 'cjm_context_graph_primitives/graph.py'),
26
+ 'cjm_context_graph_primitives.graph.GraphNode.from_dict': ( 'graph.html#graphnode.from_dict',
27
+ 'cjm_context_graph_primitives/graph.py'),
28
+ 'cjm_context_graph_primitives.graph.GraphNode.to_dict': ( 'graph.html#graphnode.to_dict',
29
+ 'cjm_context_graph_primitives/graph.py')},
30
+ 'cjm_context_graph_primitives.locators': { 'cjm_context_graph_primitives.locators.FileRef': ( 'locators.html#fileref',
31
+ 'cjm_context_graph_primitives/locators.py'),
32
+ 'cjm_context_graph_primitives.locators.FileRef.__str__': ( 'locators.html#fileref.__str__',
33
+ 'cjm_context_graph_primitives/locators.py'),
34
+ 'cjm_context_graph_primitives.locators.FileRef.to_dict': ( 'locators.html#fileref.to_dict',
35
+ 'cjm_context_graph_primitives/locators.py'),
36
+ 'cjm_context_graph_primitives.locators.FileRef.to_uri': ( 'locators.html#fileref.to_uri',
37
+ 'cjm_context_graph_primitives/locators.py'),
38
+ 'cjm_context_graph_primitives.locators.GraphNodeRef': ( 'locators.html#graphnoderef',
39
+ 'cjm_context_graph_primitives/locators.py'),
40
+ 'cjm_context_graph_primitives.locators.GraphNodeRef.__str__': ( 'locators.html#graphnoderef.__str__',
41
+ 'cjm_context_graph_primitives/locators.py'),
42
+ 'cjm_context_graph_primitives.locators.GraphNodeRef.to_dict': ( 'locators.html#graphnoderef.to_dict',
43
+ 'cjm_context_graph_primitives/locators.py'),
44
+ 'cjm_context_graph_primitives.locators.GraphNodeRef.to_uri': ( 'locators.html#graphnoderef.to_uri',
45
+ 'cjm_context_graph_primitives/locators.py'),
46
+ 'cjm_context_graph_primitives.locators.UnknownLocator': ( 'locators.html#unknownlocator',
47
+ 'cjm_context_graph_primitives/locators.py'),
48
+ 'cjm_context_graph_primitives.locators.UnknownLocator.__str__': ( 'locators.html#unknownlocator.__str__',
49
+ 'cjm_context_graph_primitives/locators.py'),
50
+ 'cjm_context_graph_primitives.locators.UnknownLocator.to_dict': ( 'locators.html#unknownlocator.to_dict',
51
+ 'cjm_context_graph_primitives/locators.py'),
52
+ 'cjm_context_graph_primitives.locators.UnknownLocator.to_uri': ( 'locators.html#unknownlocator.to_uri',
53
+ 'cjm_context_graph_primitives/locators.py'),
54
+ 'cjm_context_graph_primitives.locators.locator_from_dict': ( 'locators.html#locator_from_dict',
55
+ 'cjm_context_graph_primitives/locators.py')},
56
+ 'cjm_context_graph_primitives.provenance': { 'cjm_context_graph_primitives.provenance.SourceRef': ( 'provenance.html#sourceref',
57
+ 'cjm_context_graph_primitives/provenance.py'),
58
+ 'cjm_context_graph_primitives.provenance.SourceRef.__str__': ( 'provenance.html#sourceref.__str__',
59
+ 'cjm_context_graph_primitives/provenance.py'),
60
+ 'cjm_context_graph_primitives.provenance.SourceRef.compute_hash': ( 'provenance.html#sourceref.compute_hash',
61
+ 'cjm_context_graph_primitives/provenance.py'),
62
+ 'cjm_context_graph_primitives.provenance.SourceRef.from_dict': ( 'provenance.html#sourceref.from_dict',
63
+ 'cjm_context_graph_primitives/provenance.py'),
64
+ 'cjm_context_graph_primitives.provenance.SourceRef.to_dict': ( 'provenance.html#sourceref.to_dict',
65
+ 'cjm_context_graph_primitives/provenance.py'),
66
+ 'cjm_context_graph_primitives.provenance.SourceRef.to_uri': ( 'provenance.html#sourceref.to_uri',
67
+ 'cjm_context_graph_primitives/provenance.py'),
68
+ 'cjm_context_graph_primitives.provenance.SourceRef.verify': ( 'provenance.html#sourceref.verify',
69
+ 'cjm_context_graph_primitives/provenance.py')},
70
+ 'cjm_context_graph_primitives.query': { 'cjm_context_graph_primitives.query.EdgeQuery': ( 'query.html#edgequery',
71
+ 'cjm_context_graph_primitives/query.py'),
72
+ 'cjm_context_graph_primitives.query.EdgeQuery.from_dict': ( 'query.html#edgequery.from_dict',
73
+ 'cjm_context_graph_primitives/query.py'),
74
+ 'cjm_context_graph_primitives.query.EdgeQuery.to_dict': ( 'query.html#edgequery.to_dict',
75
+ 'cjm_context_graph_primitives/query.py'),
76
+ 'cjm_context_graph_primitives.query.NodeQuery': ( 'query.html#nodequery',
77
+ 'cjm_context_graph_primitives/query.py'),
78
+ 'cjm_context_graph_primitives.query.NodeQuery.from_dict': ( 'query.html#nodequery.from_dict',
79
+ 'cjm_context_graph_primitives/query.py'),
80
+ 'cjm_context_graph_primitives.query.NodeQuery.to_dict': ( 'query.html#nodequery.to_dict',
81
+ 'cjm_context_graph_primitives/query.py'),
82
+ 'cjm_context_graph_primitives.query.OrderBy': ( 'query.html#orderby',
83
+ 'cjm_context_graph_primitives/query.py'),
84
+ 'cjm_context_graph_primitives.query.OrderBy.from_dict': ( 'query.html#orderby.from_dict',
85
+ 'cjm_context_graph_primitives/query.py'),
86
+ 'cjm_context_graph_primitives.query.OrderBy.to_dict': ( 'query.html#orderby.to_dict',
87
+ 'cjm_context_graph_primitives/query.py'),
88
+ 'cjm_context_graph_primitives.query.PropertyPredicate': ( 'query.html#propertypredicate',
89
+ 'cjm_context_graph_primitives/query.py'),
90
+ 'cjm_context_graph_primitives.query.PropertyPredicate.__post_init__': ( 'query.html#propertypredicate.__post_init__',
91
+ 'cjm_context_graph_primitives/query.py'),
92
+ 'cjm_context_graph_primitives.query.PropertyPredicate.from_dict': ( 'query.html#propertypredicate.from_dict',
93
+ 'cjm_context_graph_primitives/query.py'),
94
+ 'cjm_context_graph_primitives.query.PropertyPredicate.to_dict': ( 'query.html#propertypredicate.to_dict',
95
+ 'cjm_context_graph_primitives/query.py'),
96
+ 'cjm_context_graph_primitives.query.RawQuery': ( 'query.html#rawquery',
97
+ 'cjm_context_graph_primitives/query.py'),
98
+ 'cjm_context_graph_primitives.query.RawQuery.__post_init__': ( 'query.html#rawquery.__post_init__',
99
+ 'cjm_context_graph_primitives/query.py'),
100
+ 'cjm_context_graph_primitives.query.RawQuery.from_dict': ( 'query.html#rawquery.from_dict',
101
+ 'cjm_context_graph_primitives/query.py'),
102
+ 'cjm_context_graph_primitives.query.RawQuery.to_dict': ( 'query.html#rawquery.to_dict',
103
+ 'cjm_context_graph_primitives/query.py'),
104
+ 'cjm_context_graph_primitives.query.RelationPredicate': ( 'query.html#relationpredicate',
105
+ 'cjm_context_graph_primitives/query.py'),
106
+ 'cjm_context_graph_primitives.query.RelationPredicate.__post_init__': ( 'query.html#relationpredicate.__post_init__',
107
+ 'cjm_context_graph_primitives/query.py'),
108
+ 'cjm_context_graph_primitives.query.RelationPredicate.from_dict': ( 'query.html#relationpredicate.from_dict',
109
+ 'cjm_context_graph_primitives/query.py'),
110
+ 'cjm_context_graph_primitives.query.RelationPredicate.to_dict': ( 'query.html#relationpredicate.to_dict',
111
+ 'cjm_context_graph_primitives/query.py'),
112
+ 'cjm_context_graph_primitives.query.SourcePredicate': ( 'query.html#sourcepredicate',
113
+ 'cjm_context_graph_primitives/query.py'),
114
+ 'cjm_context_graph_primitives.query.SourcePredicate.__post_init__': ( 'query.html#sourcepredicate.__post_init__',
115
+ 'cjm_context_graph_primitives/query.py'),
116
+ 'cjm_context_graph_primitives.query.SourcePredicate.from_dict': ( 'query.html#sourcepredicate.from_dict',
117
+ 'cjm_context_graph_primitives/query.py'),
118
+ 'cjm_context_graph_primitives.query.SourcePredicate.to_dict': ( 'query.html#sourcepredicate.to_dict',
119
+ 'cjm_context_graph_primitives/query.py'),
120
+ 'cjm_context_graph_primitives.query.query_from_dict': ( 'query.html#query_from_dict',
121
+ 'cjm_context_graph_primitives/query.py')},
122
+ 'cjm_context_graph_primitives.slices': { 'cjm_context_graph_primitives.slices.CharSlice': ( 'slices.html#charslice',
123
+ 'cjm_context_graph_primitives/slices.py'),
124
+ 'cjm_context_graph_primitives.slices.CharSlice.__post_init__': ( 'slices.html#charslice.__post_init__',
125
+ 'cjm_context_graph_primitives/slices.py'),
126
+ 'cjm_context_graph_primitives.slices.CharSlice.__str__': ( 'slices.html#charslice.__str__',
127
+ 'cjm_context_graph_primitives/slices.py'),
128
+ 'cjm_context_graph_primitives.slices.CharSlice.to_dict': ( 'slices.html#charslice.to_dict',
129
+ 'cjm_context_graph_primitives/slices.py'),
130
+ 'cjm_context_graph_primitives.slices.CharSlice.to_slice_string': ( 'slices.html#charslice.to_slice_string',
131
+ 'cjm_context_graph_primitives/slices.py'),
132
+ 'cjm_context_graph_primitives.slices.FrameSlice': ( 'slices.html#frameslice',
133
+ 'cjm_context_graph_primitives/slices.py'),
134
+ 'cjm_context_graph_primitives.slices.FrameSlice.__post_init__': ( 'slices.html#frameslice.__post_init__',
135
+ 'cjm_context_graph_primitives/slices.py'),
136
+ 'cjm_context_graph_primitives.slices.FrameSlice.__str__': ( 'slices.html#frameslice.__str__',
137
+ 'cjm_context_graph_primitives/slices.py'),
138
+ 'cjm_context_graph_primitives.slices.FrameSlice.to_dict': ( 'slices.html#frameslice.to_dict',
139
+ 'cjm_context_graph_primitives/slices.py'),
140
+ 'cjm_context_graph_primitives.slices.FrameSlice.to_slice_string': ( 'slices.html#frameslice.to_slice_string',
141
+ 'cjm_context_graph_primitives/slices.py'),
142
+ 'cjm_context_graph_primitives.slices.FullContent': ( 'slices.html#fullcontent',
143
+ 'cjm_context_graph_primitives/slices.py'),
144
+ 'cjm_context_graph_primitives.slices.FullContent.__str__': ( 'slices.html#fullcontent.__str__',
145
+ 'cjm_context_graph_primitives/slices.py'),
146
+ 'cjm_context_graph_primitives.slices.FullContent.to_dict': ( 'slices.html#fullcontent.to_dict',
147
+ 'cjm_context_graph_primitives/slices.py'),
148
+ 'cjm_context_graph_primitives.slices.FullContent.to_slice_string': ( 'slices.html#fullcontent.to_slice_string',
149
+ 'cjm_context_graph_primitives/slices.py'),
150
+ 'cjm_context_graph_primitives.slices.LineSlice': ( 'slices.html#lineslice',
151
+ 'cjm_context_graph_primitives/slices.py'),
152
+ 'cjm_context_graph_primitives.slices.LineSlice.__post_init__': ( 'slices.html#lineslice.__post_init__',
153
+ 'cjm_context_graph_primitives/slices.py'),
154
+ 'cjm_context_graph_primitives.slices.LineSlice.__str__': ( 'slices.html#lineslice.__str__',
155
+ 'cjm_context_graph_primitives/slices.py'),
156
+ 'cjm_context_graph_primitives.slices.LineSlice.to_dict': ( 'slices.html#lineslice.to_dict',
157
+ 'cjm_context_graph_primitives/slices.py'),
158
+ 'cjm_context_graph_primitives.slices.LineSlice.to_slice_string': ( 'slices.html#lineslice.to_slice_string',
159
+ 'cjm_context_graph_primitives/slices.py'),
160
+ 'cjm_context_graph_primitives.slices.PageSlice': ( 'slices.html#pageslice',
161
+ 'cjm_context_graph_primitives/slices.py'),
162
+ 'cjm_context_graph_primitives.slices.PageSlice.__post_init__': ( 'slices.html#pageslice.__post_init__',
163
+ 'cjm_context_graph_primitives/slices.py'),
164
+ 'cjm_context_graph_primitives.slices.PageSlice.__str__': ( 'slices.html#pageslice.__str__',
165
+ 'cjm_context_graph_primitives/slices.py'),
166
+ 'cjm_context_graph_primitives.slices.PageSlice.to_dict': ( 'slices.html#pageslice.to_dict',
167
+ 'cjm_context_graph_primitives/slices.py'),
168
+ 'cjm_context_graph_primitives.slices.PageSlice.to_slice_string': ( 'slices.html#pageslice.to_slice_string',
169
+ 'cjm_context_graph_primitives/slices.py'),
170
+ 'cjm_context_graph_primitives.slices.TimeSlice': ( 'slices.html#timeslice',
171
+ 'cjm_context_graph_primitives/slices.py'),
172
+ 'cjm_context_graph_primitives.slices.TimeSlice.__post_init__': ( 'slices.html#timeslice.__post_init__',
173
+ 'cjm_context_graph_primitives/slices.py'),
174
+ 'cjm_context_graph_primitives.slices.TimeSlice.__str__': ( 'slices.html#timeslice.__str__',
175
+ 'cjm_context_graph_primitives/slices.py'),
176
+ 'cjm_context_graph_primitives.slices.TimeSlice.to_dict': ( 'slices.html#timeslice.to_dict',
177
+ 'cjm_context_graph_primitives/slices.py'),
178
+ 'cjm_context_graph_primitives.slices.TimeSlice.to_slice_string': ( 'slices.html#timeslice.to_slice_string',
179
+ 'cjm_context_graph_primitives/slices.py'),
180
+ 'cjm_context_graph_primitives.slices.UnknownSlice': ( 'slices.html#unknownslice',
181
+ 'cjm_context_graph_primitives/slices.py'),
182
+ 'cjm_context_graph_primitives.slices.UnknownSlice.__str__': ( 'slices.html#unknownslice.__str__',
183
+ 'cjm_context_graph_primitives/slices.py'),
184
+ 'cjm_context_graph_primitives.slices.UnknownSlice.to_dict': ( 'slices.html#unknownslice.to_dict',
185
+ 'cjm_context_graph_primitives/slices.py'),
186
+ 'cjm_context_graph_primitives.slices.UnknownSlice.to_slice_string': ( 'slices.html#unknownslice.to_slice_string',
187
+ 'cjm_context_graph_primitives/slices.py'),
188
+ 'cjm_context_graph_primitives.slices.parse_slice': ( 'slices.html#parse_slice',
189
+ 'cjm_context_graph_primitives/slices.py'),
190
+ 'cjm_context_graph_primitives.slices.slice_from_dict': ( 'slices.html#slice_from_dict',
191
+ 'cjm_context_graph_primitives/slices.py')}}}
@@ -0,0 +1,153 @@
1
+ """The graph data nouns — `GraphNode` / `GraphEdge` / `GraphContext`. Moved here from `cjm-graph-plugin-system` per the data-nouns-vs-storage-verbs split (pass-2 Thread 2): every consumer of graph DATA (workflow cores, bundles, the CR-18 graph-aware layer, the storage adapter itself) depends on this library; only persistence depends on the storage adapter. `GraphContext` satisfies the substrate's `FileBackedDTO` protocol (`to_temp_file`) for zero-copy worker transfer.
2
+
3
+ Docs: https://cj-mills.github.io/cjm-context-graph-primitivesgraph.html.md"""
4
+
5
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/graph.ipynb.
6
+
7
+ # %% auto #0
8
+ __all__ = ['GraphNode', 'GraphEdge', 'GraphContext']
9
+
10
+ # %% ../nbs/graph.ipynb #85463c1d
11
+ import json
12
+ import tempfile
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from typing import Any, Dict, List, Optional
16
+
17
+ from .provenance import SourceRef
18
+
19
+ # %% ../nbs/graph.ipynb #2b42380b
20
+ @dataclass
21
+ class GraphNode:
22
+ """An entity in a context graph.
23
+
24
+ `sources` carries the node's provenance references (multiple refs per node:
25
+ e.g. a fine Segment carries an audio ref and a text ref — the slice kind
26
+ selects the facet).
27
+ """
28
+ id: str # UUID
29
+ label: str # e.g. "Source", "Segment", "Correction"
30
+ properties: Dict[str, Any] = field(default_factory=dict) # Arbitrary domain payload
31
+ sources: List[SourceRef] = field(default_factory=list) # Provenance references
32
+ created_at: Optional[float] = None # Unix timestamp when created
33
+ updated_at: Optional[float] = None # Unix timestamp when last updated
34
+
35
+ def to_dict(self) -> Dict[str, Any]: # Wire dict with nested source dicts
36
+ """Serialize to the wire dict form."""
37
+ return {
38
+ "id": self.id,
39
+ "label": self.label,
40
+ "properties": self.properties,
41
+ "sources": [s.to_dict() for s in self.sources],
42
+ "created_at": self.created_at,
43
+ "updated_at": self.updated_at,
44
+ }
45
+
46
+ @classmethod
47
+ def from_dict(
48
+ cls,
49
+ data: Dict[str, Any] # Wire dict (nested source dicts or SourceRef instances)
50
+ ) -> "GraphNode": # Reconstructed node
51
+ """Reconstruct from the wire dict form (single authority — storage adapters
52
+ and `GraphContext` both route through here rather than re-implementing)."""
53
+ sources = []
54
+ for s in data.get("sources", []):
55
+ sources.append(SourceRef.from_dict(s) if isinstance(s, dict) else s)
56
+ return cls(
57
+ id=data["id"],
58
+ label=data["label"],
59
+ properties=data.get("properties", {}),
60
+ sources=sources,
61
+ created_at=data.get("created_at"),
62
+ updated_at=data.get("updated_at"),
63
+ )
64
+
65
+ # %% ../nbs/graph.ipynb #62668064
66
+ @dataclass
67
+ class GraphEdge:
68
+ """A relationship between two nodes. Composition is ALWAYS edges — grouping,
69
+ chains, supersession, and provenance topology all live here, never in
70
+ multi-range slices or per-ref chain fields."""
71
+ id: str # UUID
72
+ source_id: str # Origin node UUID
73
+ target_id: str # Destination node UUID
74
+ relation_type: str # e.g. "NEXT", "PART_OF", "CORRECTS", "DERIVED_FROM"
75
+ properties: Dict[str, Any] = field(default_factory=dict) # Arbitrary metadata
76
+ created_at: Optional[float] = None # Unix timestamp when created
77
+ updated_at: Optional[float] = None # Unix timestamp when last updated
78
+
79
+ def to_dict(self) -> Dict[str, Any]: # Wire dict
80
+ """Serialize to the wire dict form."""
81
+ return {
82
+ "id": self.id,
83
+ "source_id": self.source_id,
84
+ "target_id": self.target_id,
85
+ "relation_type": self.relation_type,
86
+ "properties": self.properties,
87
+ "created_at": self.created_at,
88
+ "updated_at": self.updated_at,
89
+ }
90
+
91
+ @classmethod
92
+ def from_dict(
93
+ cls,
94
+ data: Dict[str, Any] # Wire dict
95
+ ) -> "GraphEdge": # Reconstructed edge
96
+ """Reconstruct from the wire dict form."""
97
+ return cls(
98
+ id=data["id"],
99
+ source_id=data["source_id"],
100
+ target_id=data["target_id"],
101
+ relation_type=data["relation_type"],
102
+ properties=data.get("properties", {}),
103
+ created_at=data.get("created_at"),
104
+ updated_at=data.get("updated_at"),
105
+ )
106
+
107
+ # %% ../nbs/graph.ipynb #f167174b
108
+ @dataclass
109
+ class GraphContext:
110
+ """Container for graph read results (a subgraph).
111
+
112
+ Satisfies the substrate's `FileBackedDTO` protocol via `to_temp_file` for
113
+ zero-copy transfer across the worker boundary.
114
+ """
115
+ nodes: List[GraphNode] # Nodes in the subgraph
116
+ edges: List[GraphEdge] # Edges in the subgraph
117
+ metadata: Dict[str, Any] = field(default_factory=dict) # Query metadata, stats, etc.
118
+
119
+ def to_dict(self) -> Dict[str, Any]: # Wire dict
120
+ """Serialize to the wire dict form."""
121
+ return {
122
+ "nodes": [n.to_dict() for n in self.nodes],
123
+ "edges": [e.to_dict() for e in self.edges],
124
+ "metadata": self.metadata,
125
+ }
126
+
127
+ def to_temp_file(self) -> str: # Absolute path to a temporary JSON file
128
+ """Save to a temp file for zero-copy transfer (FileBackedDTO)."""
129
+ tmp = tempfile.NamedTemporaryFile(suffix=".json", delete=False, mode="w")
130
+ json.dump(self.to_dict(), tmp)
131
+ tmp.close()
132
+ return str(Path(tmp.name).absolute())
133
+
134
+ @classmethod
135
+ def from_dict(
136
+ cls,
137
+ data: Dict[str, Any] # Wire dict with nodes, edges, metadata
138
+ ) -> "GraphContext": # Reconstructed context
139
+ """Reconstruct from the wire dict form."""
140
+ return cls(
141
+ nodes=[GraphNode.from_dict(n) for n in data.get("nodes", [])],
142
+ edges=[GraphEdge.from_dict(e) for e in data.get("edges", [])],
143
+ metadata=data.get("metadata", {}),
144
+ )
145
+
146
+ @classmethod
147
+ def from_file(
148
+ cls,
149
+ filepath: str # Path to a JSON file produced by `to_temp_file`
150
+ ) -> "GraphContext": # Reconstructed context
151
+ """Load from a JSON file."""
152
+ with open(filepath, "r") as f:
153
+ return cls.from_dict(json.load(f))
@@ -0,0 +1,132 @@
1
+ """Structured resource locators — the typed sum type addressing WHERE referenced content lives (CR-19). A locator renders a canonical URI string for the things strings are good at (grep, logs, cache keys, display) while keeping typed field access primary; unknown kinds round-trip losslessly for forward compatibility.
2
+
3
+ Docs: https://cj-mills.github.io/cjm-context-graph-primitiveslocators.html.md"""
4
+
5
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/locators.ipynb.
6
+
7
+ # %% auto #0
8
+ __all__ = ['ResourceLocator', 'LOCATOR_KINDS', 'GraphNodeRef', 'FileRef', 'UnknownLocator', 'locator_from_dict']
9
+
10
+ # %% ../nbs/locators.ipynb #80a73854
11
+ import json
12
+ from dataclasses import dataclass, field
13
+ from typing import Any, ClassVar, Dict, Optional, Union
14
+
15
+ from cjm_plugin_system.core.errors import PluginInputError
16
+
17
+ # %% ../nbs/locators.ipynb #7ce6b17e
18
+ @dataclass(frozen=True)
19
+ class GraphNodeRef:
20
+ """Locator for a node in a context graph.
21
+
22
+ `graph_id=None` means the current graph (intra-graph reference). A non-None
23
+ `graph_id` addresses a node in another graph — cross-graph references become
24
+ real with provenance bundles (CR-20); the field exists now so the wire shape
25
+ does not change when they do.
26
+ """
27
+ node_id: str # Target node id (UUID)
28
+ graph_id: Optional[str] = None # Owning graph id; None = the current graph
29
+
30
+ KIND: ClassVar[str] = "graph-node" # Locator kind discriminator
31
+
32
+ def to_uri(self) -> str: # Canonical URI, e.g. "graph-node:<node_id>" or "graph-node:<graph_id>/<node_id>"
33
+ """Render the canonical URI form."""
34
+ if self.graph_id:
35
+ return f"{self.KIND}:{self.graph_id}/{self.node_id}"
36
+ return f"{self.KIND}:{self.node_id}"
37
+
38
+ def __str__(self) -> str: # Same as `to_uri`
39
+ return self.to_uri()
40
+
41
+ def to_dict(self) -> Dict[str, Any]: # Wire dict with "kind" discriminator
42
+ """Serialize to the wire dict form."""
43
+ return {"kind": self.KIND, "node_id": self.node_id, "graph_id": self.graph_id}
44
+
45
+ # %% ../nbs/locators.ipynb #1f6e920b
46
+ @dataclass(frozen=True)
47
+ class FileRef:
48
+ """Locator for a filesystem artifact (e.g. a consumed run manifest).
49
+
50
+ The path is stored raw and rendered raw — the URI form is a display/grep
51
+ canonical string, not an RFC 3986 URI (no percent-encoding; corpus paths
52
+ contain spaces).
53
+ """
54
+ path: str # Absolute filesystem path
55
+
56
+ KIND: ClassVar[str] = "file" # Locator kind discriminator
57
+
58
+ def to_uri(self) -> str: # Canonical URI, e.g. "file:/abs/path"
59
+ """Render the canonical URI form."""
60
+ return f"{self.KIND}:{self.path}"
61
+
62
+ def __str__(self) -> str: # Same as `to_uri`
63
+ return self.to_uri()
64
+
65
+ def to_dict(self) -> Dict[str, Any]: # Wire dict with "kind" discriminator
66
+ """Serialize to the wire dict form."""
67
+ return {"kind": self.KIND, "path": self.path}
68
+
69
+ # %% ../nbs/locators.ipynb #b36b75e1
70
+ @dataclass(frozen=True)
71
+ class UnknownLocator:
72
+ """Lossless carrier for a locator kind this library version does not know.
73
+
74
+ Forward-compatibility law (CR-19): consumers must round-trip locator kinds
75
+ they cannot interpret — a shared bundle from a newer ecosystem version (or a
76
+ future source type) keeps its references intact, and `SourceRef.content_hash`
77
+ still verifies the content behind an un-understood locator. `data` preserves
78
+ the original payload verbatim (minus the "kind" discriminator).
79
+
80
+ Not hashable in practice (carries a dict); known-kind locators are the
81
+ value-object path.
82
+ """
83
+ kind: str # The unrecognized kind discriminator
84
+ data: Dict[str, Any] = field(default_factory=dict) # Original payload, verbatim (minus "kind")
85
+
86
+ def to_uri(self) -> str: # Best-effort canonical URI: "<kind>:<canonical-json>"
87
+ """Render a deterministic best-effort URI form."""
88
+ canonical = json.dumps(self.data, sort_keys=True, separators=(",", ":"))
89
+ return f"{self.kind}:{canonical}"
90
+
91
+ def __str__(self) -> str: # Same as `to_uri`
92
+ return self.to_uri()
93
+
94
+ def to_dict(self) -> Dict[str, Any]: # The original wire dict, reconstructed verbatim
95
+ """Serialize back to the original wire dict form."""
96
+ return {"kind": self.kind, **self.data}
97
+
98
+ # %% ../nbs/locators.ipynb #6f16b55e
99
+ ResourceLocator = Union[GraphNodeRef, FileRef, UnknownLocator] # The locator sum type
100
+
101
+ LOCATOR_KINDS: Dict[str, type] = { # Known-kind registry for wire-dict dispatch
102
+ GraphNodeRef.KIND: GraphNodeRef,
103
+ FileRef.KIND: FileRef,
104
+ }
105
+
106
+ # %% ../nbs/locators.ipynb #23a86f57
107
+ def locator_from_dict(
108
+ d: Dict[str, Any] # Wire dict with a "kind" discriminator
109
+ ) -> ResourceLocator: # Typed locator; unknown kinds round-trip as UnknownLocator
110
+ """Reconstruct a locator from its wire dict.
111
+
112
+ Unknown kinds are preserved losslessly as `UnknownLocator`. Known kinds are
113
+ strict: a payload mismatch (extra/missing fields) raises, because additive
114
+ evolution of a known kind must land in this library, not be silently dropped.
115
+ """
116
+ kind = d.get("kind")
117
+ if not kind:
118
+ raise PluginInputError(
119
+ f"Locator dict missing 'kind' discriminator: {d!r}",
120
+ fields_invalid=["kind"],
121
+ )
122
+ cls = LOCATOR_KINDS.get(kind)
123
+ payload = {k: v for k, v in d.items() if k != "kind"}
124
+ if cls is None:
125
+ return UnknownLocator(kind=kind, data=payload)
126
+ try:
127
+ return cls(**payload)
128
+ except TypeError as e:
129
+ raise PluginInputError(
130
+ f"Malformed '{kind}' locator payload {payload!r}: {e}",
131
+ fields_invalid=list(payload.keys()),
132
+ ) from e
@@ -0,0 +1,82 @@
1
+ """`SourceRef` — the cross-cutting provenance reference (CR-19). **Identity = `content_hash` (PRIMARY); location = `locator`; region = optional atomic typed `slice`.** `verify()` is hash-based regardless of whether the locator still resolves — the structural fix for dangling row-id provenance (cache-hit rows; ledgers E13/D3).
2
+
3
+ Docs: https://cj-mills.github.io/cjm-context-graph-primitivesprovenance.html.md"""
4
+
5
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/provenance.ipynb.
6
+
7
+ # %% auto #0
8
+ __all__ = ['SourceRef']
9
+
10
+ # %% ../nbs/provenance.ipynb #e9105179
11
+ from dataclasses import dataclass
12
+ from typing import Any, Dict, Optional
13
+
14
+ from cjm_plugin_system.core.errors import PluginInputError
15
+ from cjm_plugin_system.utils.hashing import hash_bytes, verify_hash
16
+
17
+ from .locators import ResourceLocator, locator_from_dict
18
+ from .slices import TypedSlice, slice_from_dict
19
+
20
+ # %% ../nbs/provenance.ipynb #7cdef44b
21
+ @dataclass(frozen=True)
22
+ class SourceRef:
23
+ """A provenance reference to (a region of) a resource.
24
+
25
+ Replaces the old plugin_name/table_name/row_id/segment_slice shape: the
26
+ locator is a structured sum type (no `external:<path>` string abuse), the
27
+ hash is primary identity, and the slice is typed and atomic.
28
+ """
29
+ locator: ResourceLocator # WHERE the content lives/lived (may dangle without breaking verifiability)
30
+ content_hash: str # PRIMARY identity: "algo:hexdigest" over the CONSUMED (sliced) content
31
+ slice: Optional[TypedSlice] = None # Consumed region, framed to the locator target; None = whole resource
32
+
33
+ def to_uri(self) -> str: # Canonical string: "<locator-uri>[#<slice-string>]@<content_hash>"
34
+ """Render the complete canonical string form (grep-able by locator, slice, or hash)."""
35
+ base = self.locator.to_uri()
36
+ if self.slice is not None:
37
+ base = f"{base}#{self.slice.to_slice_string()}"
38
+ return f"{base}@{self.content_hash}"
39
+
40
+ def __str__(self) -> str: # Same as `to_uri`
41
+ return self.to_uri()
42
+
43
+ def to_dict(self) -> Dict[str, Any]: # Nested wire dict
44
+ """Serialize to the wire dict form."""
45
+ return {
46
+ "locator": self.locator.to_dict(),
47
+ "content_hash": self.content_hash,
48
+ "slice": self.slice.to_dict() if self.slice is not None else None,
49
+ }
50
+
51
+ @classmethod
52
+ def from_dict(
53
+ cls,
54
+ d: Dict[str, Any] # Wire dict with nested locator/slice dicts
55
+ ) -> "SourceRef": # Reconstructed reference (unknown locator/slice kinds round-trip)
56
+ """Reconstruct from the wire dict form."""
57
+ if "locator" not in d or "content_hash" not in d:
58
+ raise PluginInputError(
59
+ f"SourceRef dict missing required keys: {sorted(d.keys())!r}",
60
+ fields_invalid=[k for k in ("locator", "content_hash") if k not in d],
61
+ )
62
+ sl = d.get("slice")
63
+ return cls(
64
+ locator=locator_from_dict(d["locator"]),
65
+ content_hash=d["content_hash"],
66
+ slice=slice_from_dict(sl) if sl is not None else None,
67
+ )
68
+
69
+ def verify(
70
+ self,
71
+ current_content: bytes # Current bytes of the CONSUMED (sliced) content
72
+ ) -> bool: # True if content still matches the stored hash
73
+ """Hash-verify content — works even when the locator no longer resolves."""
74
+ return verify_hash(current_content, self.content_hash)
75
+
76
+ @staticmethod
77
+ def compute_hash(
78
+ content: bytes, # Content to hash
79
+ algo: str = "sha256" # Hash algorithm name
80
+ ) -> str: # Hash string in "algo:hexdigest" format
81
+ """Compute a content hash for use in a SourceRef."""
82
+ return hash_bytes(content, algo)