crabpath 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crabpath/__init__.py ADDED
@@ -0,0 +1,191 @@
1
+ """
2
+ Most users only need: from crabpath import Graph, MemoryController
3
+
4
+ 🦀 CrabPath: The Graph is the Prompt.
5
+
6
+ LLM-guided memory traversal with learned pointer weights
7
+ and corrected policy gradients.
8
+
9
+ CLI:
10
+ python -m crabpath
11
+ crabpath # via console_scripts entry point
12
+
13
+ Paper: https://jonathangu.com/crabpath/
14
+ """
15
+
16
+ __version__ = "1.0.0"
17
+
18
+ from .adapter import CrabPathAgent, OpenClawCrabPathAdapter
19
+ from .autotune import (
20
+ DEFAULTS,
21
+ HEALTH_TARGETS,
22
+ Adjustment,
23
+ GraphHealth,
24
+ autotune,
25
+ measure_health,
26
+ suggest_config,
27
+ )
28
+ from .controller import ControllerConfig, MemoryController, QueryResult
29
+ from .embeddings import (
30
+ EmbeddingIndex,
31
+ auto_embed,
32
+ cohere_embed,
33
+ gemini_embed,
34
+ ollama_embed,
35
+ openai_embed,
36
+ )
37
+ from .feedback import (
38
+ auto_feedback,
39
+ auto_outcome,
40
+ detect_correction,
41
+ map_correction_to_snapshot,
42
+ score_retrieval,
43
+ )
44
+ from .graph import (
45
+ ConsolidationConfig,
46
+ ConsolidationResult,
47
+ Edge,
48
+ Graph,
49
+ Node,
50
+ consolidate,
51
+ prune_orphan_nodes,
52
+ prune_probationary,
53
+ prune_weak_edges,
54
+ should_split,
55
+ )
56
+ from .inhibition import (
57
+ InhibitionConfig,
58
+ apply_correction,
59
+ get_inhibitory_edges,
60
+ inhibition_stats,
61
+ is_inhibited,
62
+ score_with_inhibition,
63
+ )
64
+ from .learning import LearningConfig, LearningResult, RewardSignal, make_learning_step
65
+ from .legacy.activation import Firing, activate, learn
66
+ from .migrate import MigrateConfig, gather_files, migrate, parse_session_logs
67
+ from .mitosis import (
68
+ BLOCKED_QUERIES,
69
+ MergeResult,
70
+ MitosisConfig,
71
+ MitosisState,
72
+ NeurogenesisConfig,
73
+ NeurogenesisResult,
74
+ NoveltyResult,
75
+ SplitResult,
76
+ assess_novelty,
77
+ bootstrap_workspace,
78
+ connect_new_node,
79
+ create_node,
80
+ deterministic_auto_id,
81
+ find_co_firing_families,
82
+ merge_nodes,
83
+ mitosis_maintenance,
84
+ should_create_node,
85
+ should_merge,
86
+ split_node,
87
+ split_with_llm,
88
+ )
89
+ from .shadow_logger import ShadowLog
90
+ from .synaptogenesis import (
91
+ ProtoEdge,
92
+ SynaptogenesisConfig,
93
+ SynaptogenesisState,
94
+ classify_tier,
95
+ decay_proto_edges,
96
+ edge_tier_stats,
97
+ record_cofiring,
98
+ record_correction,
99
+ record_skips,
100
+ )
101
+
102
+ __all__ = [
103
+ # --- Core (start here) ---
104
+ "Graph",
105
+ "Node",
106
+ "Edge",
107
+ "MemoryController",
108
+ "QueryResult",
109
+ "ControllerConfig",
110
+
111
+ # --- Embeddings ---
112
+ "EmbeddingIndex",
113
+ "auto_embed",
114
+ "openai_embed",
115
+ "gemini_embed",
116
+ "cohere_embed",
117
+ "ollama_embed",
118
+
119
+ # --- Learning ---
120
+ "activate",
121
+ "learn",
122
+ "Firing",
123
+ "LearningConfig",
124
+ "LearningResult",
125
+ "RewardSignal",
126
+ "make_learning_step",
127
+
128
+ # --- Advanced (internals) ---
129
+ "ConsolidationConfig",
130
+ "ConsolidationResult",
131
+ "prune_orphan_nodes",
132
+ "prune_weak_edges",
133
+ "prune_probationary",
134
+ "should_split",
135
+ "consolidate",
136
+ "should_merge",
137
+ "CrabPathAgent",
138
+ "OpenClawCrabPathAdapter",
139
+ "migrate",
140
+ "MigrateConfig",
141
+ "gather_files",
142
+ "parse_session_logs",
143
+ "auto_feedback",
144
+ "auto_outcome",
145
+ "detect_correction",
146
+ "score_retrieval",
147
+ "map_correction_to_snapshot",
148
+ "DEFAULTS",
149
+ "Adjustment",
150
+ "GraphHealth",
151
+ "HEALTH_TARGETS",
152
+ "autotune",
153
+ "measure_health",
154
+ "suggest_config",
155
+ "InhibitionConfig",
156
+ "apply_correction",
157
+ "score_with_inhibition",
158
+ "is_inhibited",
159
+ "get_inhibitory_edges",
160
+ "inhibition_stats",
161
+ "MitosisConfig",
162
+ "MitosisState",
163
+ "SplitResult",
164
+ "MergeResult",
165
+ "NeurogenesisConfig",
166
+ "NeurogenesisResult",
167
+ "NoveltyResult",
168
+ "assess_novelty",
169
+ "connect_new_node",
170
+ "deterministic_auto_id",
171
+ "split_node",
172
+ "split_with_llm",
173
+ "should_merge",
174
+ "create_node",
175
+ "should_create_node",
176
+ "find_co_firing_families",
177
+ "merge_nodes",
178
+ "bootstrap_workspace",
179
+ "mitosis_maintenance",
180
+ "SynaptogenesisConfig",
181
+ "SynaptogenesisState",
182
+ "ProtoEdge",
183
+ "record_cofiring",
184
+ "record_skips",
185
+ "record_correction",
186
+ "classify_tier",
187
+ "decay_proto_edges",
188
+ "edge_tier_stats",
189
+ "ShadowLog",
190
+ "BLOCKED_QUERIES",
191
+ ]
crabpath/__main__.py ADDED
@@ -0,0 +1,7 @@
1
+ """Package entrypoint for `python -m crabpath`."""
2
+
3
+ from .cli import main
4
+ import sys
5
+
6
+ if __name__ == "__main__":
7
+ sys.exit(main())
crabpath/_io.py ADDED
@@ -0,0 +1,257 @@
1
+ """Shared IO helpers for CLI and MCP entrypoints."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Any, Callable
9
+
10
+ from ._structural_utils import count_cross_file_edges
11
+ from .autotune import HEALTH_TARGETS
12
+ from .embeddings import EmbeddingIndex
13
+ from .graph import Graph
14
+ from .legacy.activation import Firing, activate
15
+ from .mitosis import MitosisState
16
+ from .synaptogenesis import edge_tier_stats
17
+
18
+
19
+ def load_graph(path: str) -> Graph:
20
+ """Load a graph with stable CLI/MCP error messages."""
21
+ file_path = Path(path)
22
+ if not file_path.exists():
23
+ raise FileNotFoundError(f"graph file not found: {path}")
24
+
25
+ try:
26
+ return Graph.load(path)
27
+ except (OSError, json.JSONDecodeError, KeyError, TypeError, ValueError) as exc:
28
+ raise ValueError(f"failed to load graph: {path}: {exc}") from exc
29
+
30
+
31
+ def load_index(path: str) -> EmbeddingIndex:
32
+ """Load embedding index from disk, or return empty if missing."""
33
+ file_path = Path(path)
34
+ if not file_path.exists():
35
+ return EmbeddingIndex()
36
+
37
+ try:
38
+ return EmbeddingIndex.load(path)
39
+ except (OSError, json.JSONDecodeError, KeyError, TypeError) as exc:
40
+ raise ValueError(f"failed to load index: {path}: {exc}") from exc
41
+
42
+
43
+ def load_query_stats(path: str | None) -> dict[str, Any]:
44
+ if path is None:
45
+ return {}
46
+
47
+ file_path = Path(path)
48
+ if not file_path.exists():
49
+ raise FileNotFoundError(f"query-stats file not found: {path}")
50
+
51
+ try:
52
+ raw = file_path.read_text(encoding="utf-8")
53
+ stats = json.loads(raw)
54
+ except (OSError, json.JSONDecodeError) as exc:
55
+ raise ValueError(f"failed to load query-stats: {path}: {exc}") from exc
56
+
57
+ if not isinstance(stats, dict):
58
+ raise ValueError(f"query-stats must be a JSON object: {path}")
59
+
60
+ return stats
61
+
62
+
63
+ def load_mitosis_state(path: str | None) -> MitosisState:
64
+ if path is None:
65
+ return MitosisState()
66
+
67
+ file_path = Path(path)
68
+ if not file_path.exists():
69
+ raise FileNotFoundError(f"mitosis-state file not found: {path}")
70
+
71
+ try:
72
+ raw = file_path.read_text(encoding="utf-8")
73
+ state_data = json.loads(raw)
74
+ except (OSError, json.JSONDecodeError) as exc:
75
+ raise ValueError(f"failed to load mitosis-state: {path}: {exc}") from exc
76
+
77
+ if not isinstance(state_data, dict):
78
+ raise ValueError(f"mitosis-state must be a JSON object: {path}")
79
+
80
+ return MitosisState(
81
+ families=state_data.get("families", {}),
82
+ generations=state_data.get("generations", {}),
83
+ chunk_to_parent=state_data.get("chunk_to_parent", {}),
84
+ )
85
+
86
+
87
+ def load_snapshot_rows(path: Path) -> list[dict[str, Any]]:
88
+ if not path.exists():
89
+ return []
90
+
91
+ rows: list[dict[str, Any]] = []
92
+ with path.open(encoding="utf-8") as stream:
93
+ for raw in stream:
94
+ text = raw.strip()
95
+ if not text:
96
+ continue
97
+ try:
98
+ row = json.loads(text)
99
+ except json.JSONDecodeError as exc:
100
+ raise ValueError(f"invalid JSON line in snapshots file: {path}: {exc}") from exc
101
+ if not isinstance(row, dict):
102
+ raise ValueError(f"invalid snapshot row in snapshots file: {path}")
103
+ rows.append(row)
104
+
105
+ return rows
106
+
107
+
108
+ def split_csv(value: str) -> list[str]:
109
+ ids = [item.strip() for item in value.split(",") if item.strip()]
110
+ if not ids:
111
+ raise ValueError("fired-ids must contain at least one id")
112
+ return ids
113
+
114
+
115
+ def graph_stats(graph: Graph) -> dict[str, Any]:
116
+ """Compute canonical graph summary statistics."""
117
+ edges = graph.edges()
118
+ avg_weight = sum(edge.weight for edge in edges) / len(edges) if edges else 0.0
119
+
120
+ degree: dict[str, int] = {}
121
+ for edge in edges:
122
+ degree[edge.source] = degree.get(edge.source, 0) + 1
123
+ degree[edge.target] = degree.get(edge.target, 0) + 1
124
+ top = sorted(degree.items(), key=lambda item: (-item[1], item[0]))[:5]
125
+
126
+ return {
127
+ "nodes": graph.node_count,
128
+ "edges": graph.edge_count,
129
+ "avg_weight": avg_weight,
130
+ "top_hubs": [node_id for node_id, _ in top],
131
+ }
132
+
133
+
134
+ def keyword_seed(graph: Graph, query_text: str) -> dict[str, float]:
135
+ if not query_text:
136
+ return {}
137
+
138
+ needles = {token.strip().lower() for token in query_text.split() if token.strip()}
139
+ seeds: dict[str, float] = {}
140
+ for node in graph.nodes():
141
+ haystack = f"{node.id} {node.content}".lower()
142
+ score = 0.0
143
+ for needle in needles:
144
+ if needle in haystack:
145
+ score += 1.0
146
+ if score:
147
+ seeds[node.id] = score
148
+
149
+ return seeds
150
+
151
+
152
+ def run_query(
153
+ graph: Graph,
154
+ index: EmbeddingIndex,
155
+ query_text: str,
156
+ *,
157
+ top_k: int,
158
+ embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
159
+ ) -> Firing:
160
+ seeds: dict[str, float] = {}
161
+ if embed_fn is not None and index.vectors:
162
+ try:
163
+ seeds = index.seed(query_text, embed_fn=embed_fn, top_k=top_k)
164
+ except (TypeError, ValueError):
165
+ seeds = {}
166
+
167
+ if not seeds:
168
+ seeds = keyword_seed(graph, query_text)
169
+
170
+ return activate(
171
+ graph,
172
+ seeds,
173
+ max_steps=3,
174
+ decay=0.1,
175
+ top_k=top_k,
176
+ reset=False,
177
+ )
178
+
179
+
180
+ def build_firing(graph: Graph, fired_ids: list[str]) -> Firing:
181
+ if not fired_ids:
182
+ raise ValueError("fired-ids must contain at least one id")
183
+
184
+ nodes: list[tuple[Any, float]] = []
185
+ fired_at: dict[str, int] = {}
186
+ for idx, node_id in enumerate(fired_ids):
187
+ node = graph.get_node(node_id)
188
+ if node is None:
189
+ raise ValueError(f"unknown node id: {node_id}")
190
+ nodes.append((node, 1.0))
191
+ fired_at[node_id] = idx
192
+
193
+ return Firing(fired=nodes, inhibited=[], fired_at=fired_at)
194
+
195
+
196
+ def build_snapshot(graph: Graph) -> dict[str, Any]:
197
+ return {
198
+ "timestamp": time.time(),
199
+ "nodes": graph.node_count,
200
+ "edges": graph.edge_count,
201
+ "tier_counts": edge_tier_stats(graph),
202
+ "cross_file_edges": count_cross_file_edges(graph),
203
+ }
204
+
205
+
206
+ def health_metric_available(metric: str, has_query_stats: bool) -> bool:
207
+ if metric in {
208
+ "avg_nodes_fired_per_query",
209
+ "context_compression",
210
+ "proto_promotion_rate",
211
+ "reconvergence_rate",
212
+ }:
213
+ return has_query_stats
214
+ return True
215
+
216
+
217
+ def health_metric_status(
218
+ value: float | None,
219
+ target: tuple[float | None, float | None],
220
+ available: bool,
221
+ ) -> str:
222
+ if not available or value is None:
223
+ return "warn"
224
+
225
+ min_v, max_v = target
226
+ if min_v is not None and value < min_v:
227
+ return "low"
228
+ if max_v is not None and value > max_v:
229
+ return "high"
230
+ return "ok"
231
+
232
+
233
+ def build_health_rows(
234
+ health: Any,
235
+ has_query_stats: bool,
236
+ ) -> list[dict[str, Any]]:
237
+ rows: list[dict[str, Any]] = []
238
+ for metric, target in HEALTH_TARGETS.items():
239
+ available = health_metric_available(metric, has_query_stats)
240
+ raw_value = getattr(health, metric, None)
241
+ value = raw_value if available else None
242
+ value_num = raw_value if isinstance(raw_value, (int, float)) else None
243
+ status = health_metric_status(
244
+ float(value_num) if value_num is not None else None,
245
+ target,
246
+ available,
247
+ )
248
+ rows.append(
249
+ {
250
+ "metric": metric,
251
+ "value": value if available else None,
252
+ "target_range": target,
253
+ "status": status,
254
+ }
255
+ )
256
+
257
+ return rows
@@ -0,0 +1,127 @@
1
+ """Internal structural helpers shared across modules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import asdict, dataclass, replace
8
+ from typing import Any, TypeVar
9
+
10
+
11
+ @dataclass
12
+ class ConfigBase:
13
+ """Small helper base for config-style dataclasses."""
14
+
15
+ def with_updates(self: "T", **updates: Any) -> "T":
16
+ """Return a copy with updated fields."""
17
+ return replace(self, **updates)
18
+
19
+ def as_dict(self) -> dict[str, Any]:
20
+ """Serialize dataclass fields as a plain dictionary."""
21
+ return asdict(self)
22
+
23
+
24
+ class JSONStateMixin:
25
+ """Small persistence helpers for JSON-backed state."""
26
+
27
+ @staticmethod
28
+ def _load_json_file(path: str, default: Any) -> Any:
29
+ try:
30
+ with open(path, encoding="utf-8") as f:
31
+ return json.load(f)
32
+ except (FileNotFoundError, json.JSONDecodeError):
33
+ return default
34
+ except (TypeError, OSError):
35
+ return default
36
+
37
+ @staticmethod
38
+ def _write_json_file(path: str, payload: Any, *, sort_keys: bool = False) -> None:
39
+ with open(path, "w", encoding="utf-8") as f:
40
+ json.dump(payload, f, indent=2, sort_keys=sort_keys)
41
+
42
+
43
+ def parse_markdown_json(raw: str, *, require_object: bool = False) -> Any:
44
+ """Parse JSON payloads, stripping markdown JSON fences when present."""
45
+ if not isinstance(raw, str):
46
+ raise TypeError("raw model output must be a string")
47
+
48
+ cleaned = raw.strip()
49
+ if not cleaned:
50
+ raise ValueError("empty model output")
51
+
52
+ if cleaned.startswith("```"):
53
+ lines = cleaned.splitlines()
54
+ if len(lines) >= 2 and lines[0].startswith("```"):
55
+ if lines[-1].strip().endswith("```"):
56
+ lines = lines[1:-1]
57
+ cleaned = "\n".join(lines).strip()
58
+
59
+ payload = json.loads(cleaned)
60
+ if require_object and not isinstance(payload, dict):
61
+ raise TypeError("parsed JSON payload must be an object")
62
+ return payload
63
+
64
+
65
+ def split_fallback_sections(
66
+ content: str,
67
+ *,
68
+ min_header_chars: int = 1,
69
+ min_paragraph_chars: int = 1,
70
+ merge_short_paragraphs: int = 0,
71
+ ) -> list[str]:
72
+ """Structural fallback split with heading-first decomposition."""
73
+ parts = [p.strip() for p in re.split(r"\n(?=## )", content) if p.strip()]
74
+ parts = [p for p in parts if len(p) >= max(1, min_header_chars)]
75
+ if len(parts) >= 2:
76
+ return parts
77
+
78
+ parts = [p.strip() for p in content.split("\n\n") if p.strip()]
79
+ parts = [p for p in parts if len(p) >= max(1, min_paragraph_chars)]
80
+ if len(parts) >= 2:
81
+ if merge_short_paragraphs > 0:
82
+ merged: list[str] = [parts[0]]
83
+ for p in parts[1:]:
84
+ if len(merged[-1]) < merge_short_paragraphs:
85
+ merged[-1] = f"{merged[-1]}\n\n{p}"
86
+ else:
87
+ merged.append(p)
88
+ if len(merged) >= 2:
89
+ return merged
90
+ return parts
91
+
92
+ return [content]
93
+
94
+
95
+ def node_file_id(node_id: Any) -> str:
96
+ """Normalize node ids to a coarse file-level identifier."""
97
+ return str(node_id).split("::", 1)[0]
98
+
99
+
100
+ def count_cross_file_edges(graph: Any) -> int:
101
+ """Count edges that cross file boundaries in node ids."""
102
+ if getattr(graph, "node_count", 0) <= 1:
103
+ return 0
104
+
105
+ return sum(
106
+ 1
107
+ for edge in graph.edges()
108
+ if node_file_id(edge.source) != node_file_id(edge.target)
109
+ )
110
+
111
+
112
+ def classify_edge_tier(
113
+ weight: float,
114
+ *,
115
+ dormant_threshold: float = 0.3,
116
+ reflex_threshold: float = 0.8,
117
+ ) -> str:
118
+ """Map an edge weight to a routing tier."""
119
+ if weight >= reflex_threshold:
120
+ return "reflex"
121
+ if weight >= dormant_threshold:
122
+ return "habitual"
123
+ return "dormant"
124
+
125
+
126
+ T = TypeVar("T", bound=ConfigBase)
127
+