crabpath 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crabpath/__init__.py +191 -0
- crabpath/__main__.py +7 -0
- crabpath/_io.py +257 -0
- crabpath/_structural_utils.py +127 -0
- crabpath/adapter.py +452 -0
- crabpath/autotune.py +1187 -0
- crabpath/cli.py +1279 -0
- crabpath/controller.py +612 -0
- crabpath/decay.py +94 -0
- crabpath/embeddings.py +402 -0
- crabpath/feedback.py +451 -0
- crabpath/graph.py +828 -0
- crabpath/inhibition.py +168 -0
- crabpath/learning.py +360 -0
- crabpath/legacy/__init__.py +1 -0
- crabpath/legacy/activation.py +236 -0
- crabpath/lifecycle_sim.py +608 -0
- crabpath/mcp_server.py +769 -0
- crabpath/migrate.py +686 -0
- crabpath/mitosis.py +878 -0
- crabpath/py.typed +1 -0
- crabpath/router.py +585 -0
- crabpath/shadow_logger.py +219 -0
- crabpath/synaptogenesis.py +360 -0
- crabpath/traversal.py +307 -0
- crabpath-1.0.0.dist-info/METADATA +423 -0
- crabpath-1.0.0.dist-info/RECORD +30 -0
- crabpath-1.0.0.dist-info/WHEEL +4 -0
- crabpath-1.0.0.dist-info/entry_points.txt +2 -0
- crabpath-1.0.0.dist-info/licenses/LICENSE +201 -0
crabpath/__init__.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Most users only need: from crabpath import Graph, MemoryController
|
|
3
|
+
|
|
4
|
+
🦀 CrabPath: The Graph is the Prompt.
|
|
5
|
+
|
|
6
|
+
LLM-guided memory traversal with learned pointer weights
|
|
7
|
+
and corrected policy gradients.
|
|
8
|
+
|
|
9
|
+
CLI:
|
|
10
|
+
python -m crabpath
|
|
11
|
+
crabpath # via console_scripts entry point
|
|
12
|
+
|
|
13
|
+
Paper: https://jonathangu.com/crabpath/
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__version__ = "1.0.0"
|
|
17
|
+
|
|
18
|
+
from .adapter import CrabPathAgent, OpenClawCrabPathAdapter
|
|
19
|
+
from .autotune import (
|
|
20
|
+
DEFAULTS,
|
|
21
|
+
HEALTH_TARGETS,
|
|
22
|
+
Adjustment,
|
|
23
|
+
GraphHealth,
|
|
24
|
+
autotune,
|
|
25
|
+
measure_health,
|
|
26
|
+
suggest_config,
|
|
27
|
+
)
|
|
28
|
+
from .controller import ControllerConfig, MemoryController, QueryResult
|
|
29
|
+
from .embeddings import (
|
|
30
|
+
EmbeddingIndex,
|
|
31
|
+
auto_embed,
|
|
32
|
+
cohere_embed,
|
|
33
|
+
gemini_embed,
|
|
34
|
+
ollama_embed,
|
|
35
|
+
openai_embed,
|
|
36
|
+
)
|
|
37
|
+
from .feedback import (
|
|
38
|
+
auto_feedback,
|
|
39
|
+
auto_outcome,
|
|
40
|
+
detect_correction,
|
|
41
|
+
map_correction_to_snapshot,
|
|
42
|
+
score_retrieval,
|
|
43
|
+
)
|
|
44
|
+
from .graph import (
|
|
45
|
+
ConsolidationConfig,
|
|
46
|
+
ConsolidationResult,
|
|
47
|
+
Edge,
|
|
48
|
+
Graph,
|
|
49
|
+
Node,
|
|
50
|
+
consolidate,
|
|
51
|
+
prune_orphan_nodes,
|
|
52
|
+
prune_probationary,
|
|
53
|
+
prune_weak_edges,
|
|
54
|
+
should_split,
|
|
55
|
+
)
|
|
56
|
+
from .inhibition import (
|
|
57
|
+
InhibitionConfig,
|
|
58
|
+
apply_correction,
|
|
59
|
+
get_inhibitory_edges,
|
|
60
|
+
inhibition_stats,
|
|
61
|
+
is_inhibited,
|
|
62
|
+
score_with_inhibition,
|
|
63
|
+
)
|
|
64
|
+
from .learning import LearningConfig, LearningResult, RewardSignal, make_learning_step
|
|
65
|
+
from .legacy.activation import Firing, activate, learn
|
|
66
|
+
from .migrate import MigrateConfig, gather_files, migrate, parse_session_logs
|
|
67
|
+
from .mitosis import (
|
|
68
|
+
BLOCKED_QUERIES,
|
|
69
|
+
MergeResult,
|
|
70
|
+
MitosisConfig,
|
|
71
|
+
MitosisState,
|
|
72
|
+
NeurogenesisConfig,
|
|
73
|
+
NeurogenesisResult,
|
|
74
|
+
NoveltyResult,
|
|
75
|
+
SplitResult,
|
|
76
|
+
assess_novelty,
|
|
77
|
+
bootstrap_workspace,
|
|
78
|
+
connect_new_node,
|
|
79
|
+
create_node,
|
|
80
|
+
deterministic_auto_id,
|
|
81
|
+
find_co_firing_families,
|
|
82
|
+
merge_nodes,
|
|
83
|
+
mitosis_maintenance,
|
|
84
|
+
should_create_node,
|
|
85
|
+
should_merge,
|
|
86
|
+
split_node,
|
|
87
|
+
split_with_llm,
|
|
88
|
+
)
|
|
89
|
+
from .shadow_logger import ShadowLog
|
|
90
|
+
from .synaptogenesis import (
|
|
91
|
+
ProtoEdge,
|
|
92
|
+
SynaptogenesisConfig,
|
|
93
|
+
SynaptogenesisState,
|
|
94
|
+
classify_tier,
|
|
95
|
+
decay_proto_edges,
|
|
96
|
+
edge_tier_stats,
|
|
97
|
+
record_cofiring,
|
|
98
|
+
record_correction,
|
|
99
|
+
record_skips,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
__all__ = [
|
|
103
|
+
# --- Core (start here) ---
|
|
104
|
+
"Graph",
|
|
105
|
+
"Node",
|
|
106
|
+
"Edge",
|
|
107
|
+
"MemoryController",
|
|
108
|
+
"QueryResult",
|
|
109
|
+
"ControllerConfig",
|
|
110
|
+
|
|
111
|
+
# --- Embeddings ---
|
|
112
|
+
"EmbeddingIndex",
|
|
113
|
+
"auto_embed",
|
|
114
|
+
"openai_embed",
|
|
115
|
+
"gemini_embed",
|
|
116
|
+
"cohere_embed",
|
|
117
|
+
"ollama_embed",
|
|
118
|
+
|
|
119
|
+
# --- Learning ---
|
|
120
|
+
"activate",
|
|
121
|
+
"learn",
|
|
122
|
+
"Firing",
|
|
123
|
+
"LearningConfig",
|
|
124
|
+
"LearningResult",
|
|
125
|
+
"RewardSignal",
|
|
126
|
+
"make_learning_step",
|
|
127
|
+
|
|
128
|
+
# --- Advanced (internals) ---
|
|
129
|
+
"ConsolidationConfig",
|
|
130
|
+
"ConsolidationResult",
|
|
131
|
+
"prune_orphan_nodes",
|
|
132
|
+
"prune_weak_edges",
|
|
133
|
+
"prune_probationary",
|
|
134
|
+
"should_split",
|
|
135
|
+
"consolidate",
|
|
136
|
+
"should_merge",
|
|
137
|
+
"CrabPathAgent",
|
|
138
|
+
"OpenClawCrabPathAdapter",
|
|
139
|
+
"migrate",
|
|
140
|
+
"MigrateConfig",
|
|
141
|
+
"gather_files",
|
|
142
|
+
"parse_session_logs",
|
|
143
|
+
"auto_feedback",
|
|
144
|
+
"auto_outcome",
|
|
145
|
+
"detect_correction",
|
|
146
|
+
"score_retrieval",
|
|
147
|
+
"map_correction_to_snapshot",
|
|
148
|
+
"DEFAULTS",
|
|
149
|
+
"Adjustment",
|
|
150
|
+
"GraphHealth",
|
|
151
|
+
"HEALTH_TARGETS",
|
|
152
|
+
"autotune",
|
|
153
|
+
"measure_health",
|
|
154
|
+
"suggest_config",
|
|
155
|
+
"InhibitionConfig",
|
|
156
|
+
"apply_correction",
|
|
157
|
+
"score_with_inhibition",
|
|
158
|
+
"is_inhibited",
|
|
159
|
+
"get_inhibitory_edges",
|
|
160
|
+
"inhibition_stats",
|
|
161
|
+
"MitosisConfig",
|
|
162
|
+
"MitosisState",
|
|
163
|
+
"SplitResult",
|
|
164
|
+
"MergeResult",
|
|
165
|
+
"NeurogenesisConfig",
|
|
166
|
+
"NeurogenesisResult",
|
|
167
|
+
"NoveltyResult",
|
|
168
|
+
"assess_novelty",
|
|
169
|
+
"connect_new_node",
|
|
170
|
+
"deterministic_auto_id",
|
|
171
|
+
"split_node",
|
|
172
|
+
"split_with_llm",
|
|
173
|
+
"should_merge",
|
|
174
|
+
"create_node",
|
|
175
|
+
"should_create_node",
|
|
176
|
+
"find_co_firing_families",
|
|
177
|
+
"merge_nodes",
|
|
178
|
+
"bootstrap_workspace",
|
|
179
|
+
"mitosis_maintenance",
|
|
180
|
+
"SynaptogenesisConfig",
|
|
181
|
+
"SynaptogenesisState",
|
|
182
|
+
"ProtoEdge",
|
|
183
|
+
"record_cofiring",
|
|
184
|
+
"record_skips",
|
|
185
|
+
"record_correction",
|
|
186
|
+
"classify_tier",
|
|
187
|
+
"decay_proto_edges",
|
|
188
|
+
"edge_tier_stats",
|
|
189
|
+
"ShadowLog",
|
|
190
|
+
"BLOCKED_QUERIES",
|
|
191
|
+
]
|
crabpath/__main__.py
ADDED
crabpath/_io.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""Shared IO helpers for CLI and MCP entrypoints."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Callable
|
|
9
|
+
|
|
10
|
+
from ._structural_utils import count_cross_file_edges
|
|
11
|
+
from .autotune import HEALTH_TARGETS
|
|
12
|
+
from .embeddings import EmbeddingIndex
|
|
13
|
+
from .graph import Graph
|
|
14
|
+
from .legacy.activation import Firing, activate
|
|
15
|
+
from .mitosis import MitosisState
|
|
16
|
+
from .synaptogenesis import edge_tier_stats
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load_graph(path: str) -> Graph:
|
|
20
|
+
"""Load a graph with stable CLI/MCP error messages."""
|
|
21
|
+
file_path = Path(path)
|
|
22
|
+
if not file_path.exists():
|
|
23
|
+
raise FileNotFoundError(f"graph file not found: {path}")
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
return Graph.load(path)
|
|
27
|
+
except (OSError, json.JSONDecodeError, KeyError, TypeError, ValueError) as exc:
|
|
28
|
+
raise ValueError(f"failed to load graph: {path}: {exc}") from exc
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def load_index(path: str) -> EmbeddingIndex:
|
|
32
|
+
"""Load embedding index from disk, or return empty if missing."""
|
|
33
|
+
file_path = Path(path)
|
|
34
|
+
if not file_path.exists():
|
|
35
|
+
return EmbeddingIndex()
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
return EmbeddingIndex.load(path)
|
|
39
|
+
except (OSError, json.JSONDecodeError, KeyError, TypeError) as exc:
|
|
40
|
+
raise ValueError(f"failed to load index: {path}: {exc}") from exc
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_query_stats(path: str | None) -> dict[str, Any]:
|
|
44
|
+
if path is None:
|
|
45
|
+
return {}
|
|
46
|
+
|
|
47
|
+
file_path = Path(path)
|
|
48
|
+
if not file_path.exists():
|
|
49
|
+
raise FileNotFoundError(f"query-stats file not found: {path}")
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
raw = file_path.read_text(encoding="utf-8")
|
|
53
|
+
stats = json.loads(raw)
|
|
54
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
55
|
+
raise ValueError(f"failed to load query-stats: {path}: {exc}") from exc
|
|
56
|
+
|
|
57
|
+
if not isinstance(stats, dict):
|
|
58
|
+
raise ValueError(f"query-stats must be a JSON object: {path}")
|
|
59
|
+
|
|
60
|
+
return stats
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def load_mitosis_state(path: str | None) -> MitosisState:
|
|
64
|
+
if path is None:
|
|
65
|
+
return MitosisState()
|
|
66
|
+
|
|
67
|
+
file_path = Path(path)
|
|
68
|
+
if not file_path.exists():
|
|
69
|
+
raise FileNotFoundError(f"mitosis-state file not found: {path}")
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
raw = file_path.read_text(encoding="utf-8")
|
|
73
|
+
state_data = json.loads(raw)
|
|
74
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
75
|
+
raise ValueError(f"failed to load mitosis-state: {path}: {exc}") from exc
|
|
76
|
+
|
|
77
|
+
if not isinstance(state_data, dict):
|
|
78
|
+
raise ValueError(f"mitosis-state must be a JSON object: {path}")
|
|
79
|
+
|
|
80
|
+
return MitosisState(
|
|
81
|
+
families=state_data.get("families", {}),
|
|
82
|
+
generations=state_data.get("generations", {}),
|
|
83
|
+
chunk_to_parent=state_data.get("chunk_to_parent", {}),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def load_snapshot_rows(path: Path) -> list[dict[str, Any]]:
|
|
88
|
+
if not path.exists():
|
|
89
|
+
return []
|
|
90
|
+
|
|
91
|
+
rows: list[dict[str, Any]] = []
|
|
92
|
+
with path.open(encoding="utf-8") as stream:
|
|
93
|
+
for raw in stream:
|
|
94
|
+
text = raw.strip()
|
|
95
|
+
if not text:
|
|
96
|
+
continue
|
|
97
|
+
try:
|
|
98
|
+
row = json.loads(text)
|
|
99
|
+
except json.JSONDecodeError as exc:
|
|
100
|
+
raise ValueError(f"invalid JSON line in snapshots file: {path}: {exc}") from exc
|
|
101
|
+
if not isinstance(row, dict):
|
|
102
|
+
raise ValueError(f"invalid snapshot row in snapshots file: {path}")
|
|
103
|
+
rows.append(row)
|
|
104
|
+
|
|
105
|
+
return rows
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def split_csv(value: str) -> list[str]:
|
|
109
|
+
ids = [item.strip() for item in value.split(",") if item.strip()]
|
|
110
|
+
if not ids:
|
|
111
|
+
raise ValueError("fired-ids must contain at least one id")
|
|
112
|
+
return ids
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def graph_stats(graph: Graph) -> dict[str, Any]:
|
|
116
|
+
"""Compute canonical graph summary statistics."""
|
|
117
|
+
edges = graph.edges()
|
|
118
|
+
avg_weight = sum(edge.weight for edge in edges) / len(edges) if edges else 0.0
|
|
119
|
+
|
|
120
|
+
degree: dict[str, int] = {}
|
|
121
|
+
for edge in edges:
|
|
122
|
+
degree[edge.source] = degree.get(edge.source, 0) + 1
|
|
123
|
+
degree[edge.target] = degree.get(edge.target, 0) + 1
|
|
124
|
+
top = sorted(degree.items(), key=lambda item: (-item[1], item[0]))[:5]
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
"nodes": graph.node_count,
|
|
128
|
+
"edges": graph.edge_count,
|
|
129
|
+
"avg_weight": avg_weight,
|
|
130
|
+
"top_hubs": [node_id for node_id, _ in top],
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def keyword_seed(graph: Graph, query_text: str) -> dict[str, float]:
|
|
135
|
+
if not query_text:
|
|
136
|
+
return {}
|
|
137
|
+
|
|
138
|
+
needles = {token.strip().lower() for token in query_text.split() if token.strip()}
|
|
139
|
+
seeds: dict[str, float] = {}
|
|
140
|
+
for node in graph.nodes():
|
|
141
|
+
haystack = f"{node.id} {node.content}".lower()
|
|
142
|
+
score = 0.0
|
|
143
|
+
for needle in needles:
|
|
144
|
+
if needle in haystack:
|
|
145
|
+
score += 1.0
|
|
146
|
+
if score:
|
|
147
|
+
seeds[node.id] = score
|
|
148
|
+
|
|
149
|
+
return seeds
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def run_query(
|
|
153
|
+
graph: Graph,
|
|
154
|
+
index: EmbeddingIndex,
|
|
155
|
+
query_text: str,
|
|
156
|
+
*,
|
|
157
|
+
top_k: int,
|
|
158
|
+
embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
|
|
159
|
+
) -> Firing:
|
|
160
|
+
seeds: dict[str, float] = {}
|
|
161
|
+
if embed_fn is not None and index.vectors:
|
|
162
|
+
try:
|
|
163
|
+
seeds = index.seed(query_text, embed_fn=embed_fn, top_k=top_k)
|
|
164
|
+
except (TypeError, ValueError):
|
|
165
|
+
seeds = {}
|
|
166
|
+
|
|
167
|
+
if not seeds:
|
|
168
|
+
seeds = keyword_seed(graph, query_text)
|
|
169
|
+
|
|
170
|
+
return activate(
|
|
171
|
+
graph,
|
|
172
|
+
seeds,
|
|
173
|
+
max_steps=3,
|
|
174
|
+
decay=0.1,
|
|
175
|
+
top_k=top_k,
|
|
176
|
+
reset=False,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def build_firing(graph: Graph, fired_ids: list[str]) -> Firing:
|
|
181
|
+
if not fired_ids:
|
|
182
|
+
raise ValueError("fired-ids must contain at least one id")
|
|
183
|
+
|
|
184
|
+
nodes: list[tuple[Any, float]] = []
|
|
185
|
+
fired_at: dict[str, int] = {}
|
|
186
|
+
for idx, node_id in enumerate(fired_ids):
|
|
187
|
+
node = graph.get_node(node_id)
|
|
188
|
+
if node is None:
|
|
189
|
+
raise ValueError(f"unknown node id: {node_id}")
|
|
190
|
+
nodes.append((node, 1.0))
|
|
191
|
+
fired_at[node_id] = idx
|
|
192
|
+
|
|
193
|
+
return Firing(fired=nodes, inhibited=[], fired_at=fired_at)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def build_snapshot(graph: Graph) -> dict[str, Any]:
|
|
197
|
+
return {
|
|
198
|
+
"timestamp": time.time(),
|
|
199
|
+
"nodes": graph.node_count,
|
|
200
|
+
"edges": graph.edge_count,
|
|
201
|
+
"tier_counts": edge_tier_stats(graph),
|
|
202
|
+
"cross_file_edges": count_cross_file_edges(graph),
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def health_metric_available(metric: str, has_query_stats: bool) -> bool:
|
|
207
|
+
if metric in {
|
|
208
|
+
"avg_nodes_fired_per_query",
|
|
209
|
+
"context_compression",
|
|
210
|
+
"proto_promotion_rate",
|
|
211
|
+
"reconvergence_rate",
|
|
212
|
+
}:
|
|
213
|
+
return has_query_stats
|
|
214
|
+
return True
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def health_metric_status(
|
|
218
|
+
value: float | None,
|
|
219
|
+
target: tuple[float | None, float | None],
|
|
220
|
+
available: bool,
|
|
221
|
+
) -> str:
|
|
222
|
+
if not available or value is None:
|
|
223
|
+
return "warn"
|
|
224
|
+
|
|
225
|
+
min_v, max_v = target
|
|
226
|
+
if min_v is not None and value < min_v:
|
|
227
|
+
return "low"
|
|
228
|
+
if max_v is not None and value > max_v:
|
|
229
|
+
return "high"
|
|
230
|
+
return "ok"
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def build_health_rows(
|
|
234
|
+
health: Any,
|
|
235
|
+
has_query_stats: bool,
|
|
236
|
+
) -> list[dict[str, Any]]:
|
|
237
|
+
rows: list[dict[str, Any]] = []
|
|
238
|
+
for metric, target in HEALTH_TARGETS.items():
|
|
239
|
+
available = health_metric_available(metric, has_query_stats)
|
|
240
|
+
raw_value = getattr(health, metric, None)
|
|
241
|
+
value = raw_value if available else None
|
|
242
|
+
value_num = raw_value if isinstance(raw_value, (int, float)) else None
|
|
243
|
+
status = health_metric_status(
|
|
244
|
+
float(value_num) if value_num is not None else None,
|
|
245
|
+
target,
|
|
246
|
+
available,
|
|
247
|
+
)
|
|
248
|
+
rows.append(
|
|
249
|
+
{
|
|
250
|
+
"metric": metric,
|
|
251
|
+
"value": value if available else None,
|
|
252
|
+
"target_range": target,
|
|
253
|
+
"status": status,
|
|
254
|
+
}
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
return rows
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Internal structural helpers shared across modules."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import asdict, dataclass, replace
|
|
8
|
+
from typing import Any, TypeVar
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ConfigBase:
|
|
13
|
+
"""Small helper base for config-style dataclasses."""
|
|
14
|
+
|
|
15
|
+
def with_updates(self: "T", **updates: Any) -> "T":
|
|
16
|
+
"""Return a copy with updated fields."""
|
|
17
|
+
return replace(self, **updates)
|
|
18
|
+
|
|
19
|
+
def as_dict(self) -> dict[str, Any]:
|
|
20
|
+
"""Serialize dataclass fields as a plain dictionary."""
|
|
21
|
+
return asdict(self)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class JSONStateMixin:
|
|
25
|
+
"""Small persistence helpers for JSON-backed state."""
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def _load_json_file(path: str, default: Any) -> Any:
|
|
29
|
+
try:
|
|
30
|
+
with open(path, encoding="utf-8") as f:
|
|
31
|
+
return json.load(f)
|
|
32
|
+
except (FileNotFoundError, json.JSONDecodeError):
|
|
33
|
+
return default
|
|
34
|
+
except (TypeError, OSError):
|
|
35
|
+
return default
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def _write_json_file(path: str, payload: Any, *, sort_keys: bool = False) -> None:
|
|
39
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
40
|
+
json.dump(payload, f, indent=2, sort_keys=sort_keys)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def parse_markdown_json(raw: str, *, require_object: bool = False) -> Any:
|
|
44
|
+
"""Parse JSON payloads, stripping markdown JSON fences when present."""
|
|
45
|
+
if not isinstance(raw, str):
|
|
46
|
+
raise TypeError("raw model output must be a string")
|
|
47
|
+
|
|
48
|
+
cleaned = raw.strip()
|
|
49
|
+
if not cleaned:
|
|
50
|
+
raise ValueError("empty model output")
|
|
51
|
+
|
|
52
|
+
if cleaned.startswith("```"):
|
|
53
|
+
lines = cleaned.splitlines()
|
|
54
|
+
if len(lines) >= 2 and lines[0].startswith("```"):
|
|
55
|
+
if lines[-1].strip().endswith("```"):
|
|
56
|
+
lines = lines[1:-1]
|
|
57
|
+
cleaned = "\n".join(lines).strip()
|
|
58
|
+
|
|
59
|
+
payload = json.loads(cleaned)
|
|
60
|
+
if require_object and not isinstance(payload, dict):
|
|
61
|
+
raise TypeError("parsed JSON payload must be an object")
|
|
62
|
+
return payload
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def split_fallback_sections(
|
|
66
|
+
content: str,
|
|
67
|
+
*,
|
|
68
|
+
min_header_chars: int = 1,
|
|
69
|
+
min_paragraph_chars: int = 1,
|
|
70
|
+
merge_short_paragraphs: int = 0,
|
|
71
|
+
) -> list[str]:
|
|
72
|
+
"""Structural fallback split with heading-first decomposition."""
|
|
73
|
+
parts = [p.strip() for p in re.split(r"\n(?=## )", content) if p.strip()]
|
|
74
|
+
parts = [p for p in parts if len(p) >= max(1, min_header_chars)]
|
|
75
|
+
if len(parts) >= 2:
|
|
76
|
+
return parts
|
|
77
|
+
|
|
78
|
+
parts = [p.strip() for p in content.split("\n\n") if p.strip()]
|
|
79
|
+
parts = [p for p in parts if len(p) >= max(1, min_paragraph_chars)]
|
|
80
|
+
if len(parts) >= 2:
|
|
81
|
+
if merge_short_paragraphs > 0:
|
|
82
|
+
merged: list[str] = [parts[0]]
|
|
83
|
+
for p in parts[1:]:
|
|
84
|
+
if len(merged[-1]) < merge_short_paragraphs:
|
|
85
|
+
merged[-1] = f"{merged[-1]}\n\n{p}"
|
|
86
|
+
else:
|
|
87
|
+
merged.append(p)
|
|
88
|
+
if len(merged) >= 2:
|
|
89
|
+
return merged
|
|
90
|
+
return parts
|
|
91
|
+
|
|
92
|
+
return [content]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def node_file_id(node_id: Any) -> str:
|
|
96
|
+
"""Normalize node ids to a coarse file-level identifier."""
|
|
97
|
+
return str(node_id).split("::", 1)[0]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def count_cross_file_edges(graph: Any) -> int:
|
|
101
|
+
"""Count edges that cross file boundaries in node ids."""
|
|
102
|
+
if getattr(graph, "node_count", 0) <= 1:
|
|
103
|
+
return 0
|
|
104
|
+
|
|
105
|
+
return sum(
|
|
106
|
+
1
|
|
107
|
+
for edge in graph.edges()
|
|
108
|
+
if node_file_id(edge.source) != node_file_id(edge.target)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def classify_edge_tier(
|
|
113
|
+
weight: float,
|
|
114
|
+
*,
|
|
115
|
+
dormant_threshold: float = 0.3,
|
|
116
|
+
reflex_threshold: float = 0.8,
|
|
117
|
+
) -> str:
|
|
118
|
+
"""Map an edge weight to a routing tier."""
|
|
119
|
+
if weight >= reflex_threshold:
|
|
120
|
+
return "reflex"
|
|
121
|
+
if weight >= dormant_threshold:
|
|
122
|
+
return "habitual"
|
|
123
|
+
return "dormant"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
T = TypeVar("T", bound=ConfigBase)
|
|
127
|
+
|