crabpath 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. crabpath-1.0.0/.github/workflows/publish.yml +30 -0
  2. crabpath-1.0.0/.gitignore +49 -0
  3. crabpath-1.0.0/ACTIVATION_V2_DESIGN.md +443 -0
  4. crabpath-1.0.0/ARCHITECTURE_REVIEW.md +204 -0
  5. crabpath-1.0.0/AUDIT.md +70 -0
  6. crabpath-1.0.0/AUDIT_REPORT.md +101 -0
  7. crabpath-1.0.0/BUG_SWEEP_1.md +46 -0
  8. crabpath-1.0.0/BUG_SWEEP_2.md +59 -0
  9. crabpath-1.0.0/BUG_SWEEP_3.md +81 -0
  10. crabpath-1.0.0/BUG_SWEEP_4.md +101 -0
  11. crabpath-1.0.0/BUG_SWEEP_5.md +63 -0
  12. crabpath-1.0.0/CHANGELOG.md +101 -0
  13. crabpath-1.0.0/CODING_PLAN_V2.md +668 -0
  14. crabpath-1.0.0/CONSOLIDATION_LOG.md +54 -0
  15. crabpath-1.0.0/CONTRIBUTING.md +55 -0
  16. crabpath-1.0.0/CRABPATH_V2_SPEC.md +548 -0
  17. crabpath-1.0.0/EXPERIMENT_PLAN.md +278 -0
  18. crabpath-1.0.0/IMPLEMENTATION_PLAN.md +381 -0
  19. crabpath-1.0.0/LICENSE +201 -0
  20. crabpath-1.0.0/NEUROGENESIS_DESIGN.md +292 -0
  21. crabpath-1.0.0/PAPER_CODE_AUDIT.md +126 -0
  22. crabpath-1.0.0/PKG-INFO +423 -0
  23. crabpath-1.0.0/PLAYBOOK.md +211 -0
  24. crabpath-1.0.0/README.md +378 -0
  25. crabpath-1.0.0/REPRODUCIBILITY.md +168 -0
  26. crabpath-1.0.0/SKILL.md +169 -0
  27. crabpath-1.0.0/TUNING.md +40 -0
  28. crabpath-1.0.0/crabpath/__init__.py +191 -0
  29. crabpath-1.0.0/crabpath/__main__.py +7 -0
  30. crabpath-1.0.0/crabpath/_io.py +257 -0
  31. crabpath-1.0.0/crabpath/_structural_utils.py +127 -0
  32. crabpath-1.0.0/crabpath/adapter.py +452 -0
  33. crabpath-1.0.0/crabpath/autotune.py +1187 -0
  34. crabpath-1.0.0/crabpath/cli.py +1279 -0
  35. crabpath-1.0.0/crabpath/controller.py +612 -0
  36. crabpath-1.0.0/crabpath/decay.py +94 -0
  37. crabpath-1.0.0/crabpath/embeddings.py +402 -0
  38. crabpath-1.0.0/crabpath/feedback.py +451 -0
  39. crabpath-1.0.0/crabpath/graph.py +828 -0
  40. crabpath-1.0.0/crabpath/inhibition.py +168 -0
  41. crabpath-1.0.0/crabpath/learning.py +360 -0
  42. crabpath-1.0.0/crabpath/legacy/__init__.py +1 -0
  43. crabpath-1.0.0/crabpath/legacy/activation.py +236 -0
  44. crabpath-1.0.0/crabpath/lifecycle_sim.py +608 -0
  45. crabpath-1.0.0/crabpath/mcp_server.py +769 -0
  46. crabpath-1.0.0/crabpath/migrate.py +686 -0
  47. crabpath-1.0.0/crabpath/mitosis.py +878 -0
  48. crabpath-1.0.0/crabpath/py.typed +1 -0
  49. crabpath-1.0.0/crabpath/router.py +585 -0
  50. crabpath-1.0.0/crabpath/shadow_logger.py +219 -0
  51. crabpath-1.0.0/crabpath/synaptogenesis.py +360 -0
  52. crabpath-1.0.0/crabpath/traversal.py +307 -0
  53. crabpath-1.0.0/docs/MIGRATION_GUIDE.md +98 -0
  54. crabpath-1.0.0/examples/agent_memory.py +67 -0
  55. crabpath-1.0.0/examples/hello_world.py +70 -0
  56. crabpath-1.0.0/examples/langchain_adapter.py +97 -0
  57. crabpath-1.0.0/examples/learning_loop.py +233 -0
  58. crabpath-1.0.0/examples/minimal_shadow.py +41 -0
  59. crabpath-1.0.0/examples/openai_agent.py +162 -0
  60. crabpath-1.0.0/examples/openclaw_shadow_hook.sh +30 -0
  61. crabpath-1.0.0/examples/quickstart.py +49 -0
  62. crabpath-1.0.0/examples/toy_workspace/README.md +26 -0
  63. crabpath-1.0.0/examples/toy_workspace/api-reference.md +33 -0
  64. crabpath-1.0.0/examples/toy_workspace/architecture.md +34 -0
  65. crabpath-1.0.0/examples/toy_workspace/runbook.md +36 -0
  66. crabpath-1.0.0/examples/toy_workspace/troubleshooting.md +30 -0
  67. crabpath-1.0.0/experiments/build_context_bloat_graph.py +776 -0
  68. crabpath-1.0.0/experiments/build_deploy_pipeline.py +101 -0
  69. crabpath-1.0.0/experiments/build_forbidden_door.py +24 -0
  70. crabpath-1.0.0/experiments/build_gate_bloat_graph.py +325 -0
  71. crabpath-1.0.0/experiments/build_giraffe_test.py +25 -0
  72. crabpath-1.0.0/experiments/build_negation_graph.py +228 -0
  73. crabpath-1.0.0/experiments/build_procedure_graph.py +169 -0
  74. crabpath-1.0.0/experiments/build_stale_context_graph.py +310 -0
  75. crabpath-1.0.0/experiments/run_all.py +149 -0
  76. crabpath-1.0.0/experiments/run_comparison.py +265 -0
  77. crabpath-1.0.0/experiments/run_deploy_sim.py +232 -0
  78. crabpath-1.0.0/figures/ablation_accuracy.png +0 -0
  79. crabpath-1.0.0/figures/ablation_query_types.png +0 -0
  80. crabpath-1.0.0/figures/context_utilization.png +0 -0
  81. crabpath-1.0.0/figures/deploy_pipeline.png +0 -0
  82. crabpath-1.0.0/figures/downstream_accuracy.png +0 -0
  83. crabpath-1.0.0/figures/downstream_qa.png +0 -0
  84. crabpath-1.0.0/figures/hotpotqa_cold_start.png +0 -0
  85. crabpath-1.0.0/figures/learning_curve.png +0 -0
  86. crabpath-1.0.0/figures/niah_multi_needle.png +0 -0
  87. crabpath-1.0.0/figures/noise_sensitivity.png +0 -0
  88. crabpath-1.0.0/figures/phase_transition.png +0 -0
  89. crabpath-1.0.0/figures/procedural_memory.png +0 -0
  90. crabpath-1.0.0/figures/rag_collapse.png +0 -0
  91. crabpath-1.0.0/figures/recurring_topic.png +0 -0
  92. crabpath-1.0.0/figures/ruler_multi_fact.png +0 -0
  93. crabpath-1.0.0/figures/scaling_curves.png +0 -0
  94. crabpath-1.0.0/figures/sparsity_crossover.png +0 -0
  95. crabpath-1.0.0/figures/temporal_drift.png +0 -0
  96. crabpath-1.0.0/figures/traversal_comparison.png +0 -0
  97. crabpath-1.0.0/pyproject.toml +63 -0
  98. crabpath-1.0.0/scenarios/context_bloat.jsonl +20 -0
  99. crabpath-1.0.0/scenarios/deploy_pipeline.jsonl +15 -0
  100. crabpath-1.0.0/scenarios/forbidden_door.jsonl +3 -0
  101. crabpath-1.0.0/scenarios/gate_bloat.jsonl +20 -0
  102. crabpath-1.0.0/scenarios/giraffe_test.jsonl +10 -0
  103. crabpath-1.0.0/scenarios/negation.jsonl +11 -0
  104. crabpath-1.0.0/scenarios/procedure.jsonl +10 -0
  105. crabpath-1.0.0/scenarios/stale_context.jsonl +30 -0
  106. crabpath-1.0.0/scratch/paper.tex +1095 -0
  107. crabpath-1.0.0/scripts/ab_scoring_sim.py +648 -0
  108. crabpath-1.0.0/scripts/ablation_study.py +950 -0
  109. crabpath-1.0.0/scripts/bootstrap_from_workspace.py +268 -0
  110. crabpath-1.0.0/scripts/calibrate_defaults.py +380 -0
  111. crabpath-1.0.0/scripts/context_noise_drift_benchmark.py +923 -0
  112. crabpath-1.0.0/scripts/downstream_accuracy_benchmark.py +933 -0
  113. crabpath-1.0.0/scripts/downstream_qa_benchmark.py +1817 -0
  114. crabpath-1.0.0/scripts/external_benchmark.py +1554 -0
  115. crabpath-1.0.0/scripts/generate_figures.py +473 -0
  116. crabpath-1.0.0/scripts/generate_paper_figures.py +511 -0
  117. crabpath-1.0.0/scripts/generate_worker2_figures.py +333 -0
  118. crabpath-1.0.0/scripts/generate_worker3_figures.py +431 -0
  119. crabpath-1.0.0/scripts/hero_sims.py +721 -0
  120. crabpath-1.0.0/scripts/hotpot_subset_100.json +1 -0
  121. crabpath-1.0.0/scripts/migrate_graph_v2.py +95 -0
  122. crabpath-1.0.0/scripts/niah_scaling_benchmark.py +1046 -0
  123. crabpath-1.0.0/scripts/phase_transition_plot.py +480 -0
  124. crabpath-1.0.0/scripts/procedural_memory_sim.py +425 -0
  125. crabpath-1.0.0/scripts/quickstart.sh +124 -0
  126. crabpath-1.0.0/scripts/rag_collapse_sim.py +867 -0
  127. crabpath-1.0.0/scripts/replay_shadow_queries.py +476 -0
  128. crabpath-1.0.0/scripts/sparsity_scale_experiment.py +856 -0
  129. crabpath-1.0.0/scripts/standard_ir_metrics.py +683 -0
  130. crabpath-1.0.0/scripts/traversal_comparison_benchmark.py +341 -0
  131. crabpath-1.0.0/sim_edge_damping.py +176 -0
  132. crabpath-1.0.0/tests/__init__.py +1 -0
  133. crabpath-1.0.0/tests/test_activation.py +452 -0
  134. crabpath-1.0.0/tests/test_adapter.py +310 -0
  135. crabpath-1.0.0/tests/test_autotune.py +846 -0
  136. crabpath-1.0.0/tests/test_bootstrap.py +67 -0
  137. crabpath-1.0.0/tests/test_cli.py +747 -0
  138. crabpath-1.0.0/tests/test_consolidation.py +177 -0
  139. crabpath-1.0.0/tests/test_controller.py +299 -0
  140. crabpath-1.0.0/tests/test_decay.py +100 -0
  141. crabpath-1.0.0/tests/test_edge_damping.py +205 -0
  142. crabpath-1.0.0/tests/test_embeddings.py +219 -0
  143. crabpath-1.0.0/tests/test_examples.py +30 -0
  144. crabpath-1.0.0/tests/test_feedback.py +190 -0
  145. crabpath-1.0.0/tests/test_graph.py +324 -0
  146. crabpath-1.0.0/tests/test_inhibition.py +95 -0
  147. crabpath-1.0.0/tests/test_io.py +19 -0
  148. crabpath-1.0.0/tests/test_learning.py +323 -0
  149. crabpath-1.0.0/tests/test_lifecycle_sim.py +81 -0
  150. crabpath-1.0.0/tests/test_mcp_server.py +224 -0
  151. crabpath-1.0.0/tests/test_migrate.py +270 -0
  152. crabpath-1.0.0/tests/test_mitosis.py +583 -0
  153. crabpath-1.0.0/tests/test_neurogenesis.py +251 -0
  154. crabpath-1.0.0/tests/test_packaging.py +20 -0
  155. crabpath-1.0.0/tests/test_persistence.py +131 -0
  156. crabpath-1.0.0/tests/test_router.py +300 -0
  157. crabpath-1.0.0/tests/test_shadow_logger.py +82 -0
  158. crabpath-1.0.0/tests/test_shadow_mode_v2_gating.py +63 -0
  159. crabpath-1.0.0/tests/test_simulator.py +90 -0
  160. crabpath-1.0.0/tests/test_structural_utils.py +88 -0
  161. crabpath-1.0.0/tests/test_synaptogenesis.py +385 -0
  162. crabpath-1.0.0/tests/test_traversal.py +219 -0
  163. crabpath-1.0.0/tests/test_v2_schema.py +116 -0
  164. crabpath-1.0.0/tmp_paper.html +705 -0
  165. crabpath-1.0.0/tools/openapi.yaml +577 -0
@@ -0,0 +1,30 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ id-token: write # Required for trusted publishing (OIDC)
10
+ contents: read
11
+
12
+ jobs:
13
+ publish:
14
+ runs-on: ubuntu-latest
15
+ environment: pypi
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.12"
22
+
23
+ - name: Install build tools
24
+ run: pip install build
25
+
26
+ - name: Build package
27
+ run: python -m build
28
+
29
+ - name: Publish to PyPI
30
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,49 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ *.egg
9
+ .eggs/
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+ *.swp
20
+ *.swo
21
+ *~
22
+
23
+ # OS
24
+ .DS_Store
25
+ Thumbs.db
26
+
27
+ # Testing
28
+ .coverage
29
+ htmlcov/
30
+ .pytest_cache/
31
+ .mypy_cache/
32
+
33
+ # Data (user-specific)
34
+ *.db
35
+ data/
36
+ scratch/
37
+ !scratch/paper.tex
38
+
39
+ # CrabPath data (keep in private repo)
40
+ *.json
41
+ !package.json
42
+
43
+ # Public benchmark fixtures (required for reproducibility)
44
+ !scripts/hotpot_subset_100.json
45
+
46
+ *.events.db
47
+ snapshots/
48
+
49
+ /ignored
@@ -0,0 +1,443 @@
1
+ > **Note:** This design doc is historical. The implementation lives in crabpath/*.py. See ARCHITECTURE_REVIEW.md for current architecture.
2
+
3
+ # Activation v2 Design: LLM-Guided Graph Activation for CrabPath
4
+
5
+ ## 0) Scope and baseline
6
+
7
+ `activate()` currently routes energy mechanically with `weight × signal` and STDP updates weights after fact in `learn()`. This v2 design introduces an LLM in the propagation loop while preserving the existing graph semantics and API where possible.
8
+
9
+ Current code touchpoints:
10
+
11
+ - `crabpath/graph.py` stores neurons (`Node`) and directed weighted edges (`Edge`).
12
+ - `crabpath/activation.py` runs the synchronous leaky integrate-and-fire pass.
13
+ - `crabpath/adapter.py` provides query/seed/activate flow and existing auto-node creation hooks.
14
+ - `crabpath/neurogenesis.py` provides novelty gating + deterministic node IDs.
15
+ - `crabpath/feedback.py` stores delayed feedback snapshots used for learning.
16
+ - `NEUROGENESIS_DESIGN.md` and `IMPLEMENTATION_PLAN.md` already establish auto-node creation and an habitual-tier control path.
17
+
18
+ Paper context confirms current claim: CrabPath learns what gets loaded, not just what is semantically similar, and encodes sequencing via STDP timing.
19
+
20
+ ## 1) ARCHITECTURE: LLM-in-the-loop activation loop
21
+
22
+ ### 1.1 Core state and contracts
23
+
24
+ Introduce two optional data structures without changing node/edge dataclasses:
25
+
26
+ - `RouteEvent`: `{source, target, multiplier, confidence, reason, edge_exists, created, flagged_for_removal}`
27
+ - `ActivationContext`: holds user message, fired node states, pending LLM decisions, and per-step route logs.
28
+
29
+ Keep all existing fields in `Firing` and add optional companion object:
30
+
31
+ - `Firing.routes: dict[(source_id, target_id), RouteEvent]`
32
+ - `Firing.route_metadata: dict[str, object]`
33
+ - `Firing.new_nodes: list[dict]`
34
+ - `Firing.prune_candidates: list[dict]`
35
+
36
+ This keeps backward compatibility for current CLI/tests while enabling learning to use router outputs.
37
+
38
+ ### 1.2 Activation step-by-step
39
+
40
+ For each `activate_v2(graph, seeds, user_message, max_steps, ...)`:
41
+
42
+ 1. Decay traces and apply seeds exactly as today.
43
+ 2. For each step:
44
+ 3. Compute `to_fire` from mechanical threshold + not-fired check.
45
+ 4. For each fired node, collect outgoing edges and classify each candidate with tier policy.
46
+ 5. Build a single batched LLM request for the whole step containing all non-reflex candidates.
47
+ 6. Apply routing multipliers:
48
+ 7. Apply inhibitory and excitatory transfers for each outgoing target.
49
+ 8. Seed any newly created nodes into current turn state.
50
+ 9. Apply refractory reset and leak.
51
+ 10. Optionally create/remove nodes mid-step from LLM decisions.
52
+ 11. Continue until `max_steps`.
53
+
54
+ ### 1.3 Input format to LLM (per step)
55
+
56
+ - `user_message`: raw user text
57
+ - `global_context`: session tags, open context IDs, turn index, novelty signals
58
+ - `fired_nodes`: up to `max_fired_context` entries with
59
+ - `node_id`, `content`, `energy_at_fire`, `trace`, `step_fired`, `metadata`
60
+ - `outgoing_candidates`: per fired node list of edges with
61
+ - `source_id`, `target_id` (nullable for absent edge), `edge_weight`, `edge_tier`, `seed_overlap`, `edge_context`
62
+ - `missing_concepts`: candidate concept tokens from user not mapped to known nodes
63
+ - `policy`: thresholds and safety constraints
64
+
65
+ Output schema is strict JSON (one-shot parse).
66
+
67
+ ### 1.4 Number of LLM calls per activation
68
+
69
+ - `0` calls: all candidates are reflex tier or early bypass confidence gate.
70
+ - `1` call/step: one batched call for all habitual/novel candidates in that step.
71
+ - `max_steps` = 3 by default, so up to `3` routing calls.
72
+ - Optional pre-step novelty call before propagation if no seeds and high novelty risk.
73
+ - Typical upper bound = 4 calls/turn, controlled by budget-aware short-circuit.
74
+
75
+ ## 2) COST MODEL
76
+
77
+ ### 2.1 Token estimate
78
+
79
+ Let:
80
+
81
+ - `B`: base prompt tokens (~180-320)
82
+ - `N`: fired nodes in request
83
+ - `E`: candidate edges sent to model
84
+ - `C_n`: tokens per node (`~36`)
85
+ - `C_e`: tokens per edge (`~24`)
86
+
87
+ Approx:
88
+
89
+ `input_tokens = B + N * C_n + E * C_e`
90
+
91
+ `output_tokens`: target 120-220 (strict JSON, concise).
92
+
93
+ Example:
94
+
95
+ - `N=8`, `E=24`, `B=240` ⇒ ~840 input tokens.
96
+ - `output=180`.
97
+
98
+ ### 2.2 Cost with current public rates (as of 2026-02-25)
99
+
100
+ - OpenAI GPT-4o-mini: $0.15 in / $0.60 out per 1M tokens.
101
+ - 840 in + 180 out ≈ `$0.000228`/call.
102
+ - Gemini 2.5 Flash: $0.30 in / $2.50 out per 1M tokens (text) on the standard tier.
103
+ - 840 in + 180 out ≈ `$0.000774`/call.
104
+
105
+ At 1.0 call/turn:
106
+
107
+ - GPT-4o-mini ≈ `$0.00023`/turn.
108
+ - Gemini Flash ≈ `$0.00077`/turn.
109
+
110
+ At 3 calls/turn:
111
+
112
+ - GPT-4o-mini ≈ `$0.00068`/turn.
113
+ - Gemini Flash ≈ `$0.00232`/turn.
114
+
115
+ ### 2.3 Budget ceilings
116
+
117
+ Recommended guardrails:
118
+
119
+ - Hard token ceiling per call: 1200 in, 240 out.
120
+ - Hard spend ceiling per turn: `$0.003` for Gemini, `$0.001` for GPT-mini.
121
+ - Monthly spend kill-switch default: 20% of budget allocated for retrieval/context operations.
122
+ - Budget-aware bypass: if expected tokens × expected calls exceeds ceiling, skip LLM and use mechanical routing.
123
+
124
+ ## 3) HYBRID THREE-TIER SYSTEM
125
+
126
+ ### 3.1 Tier definitions
127
+
128
+ - Reflex: `edge.weight > 0.8` or `edge.weight < -0.5`.
129
+ - Mechanical routing, no LLM.
130
+ - Habitual: `0.2 <= edge.weight <= 0.8` or `-0.5 <= edge.weight <= -0.2`.
131
+ - LLM routing only.
132
+ - Novel: no edge exists, or concept absent in graph.
133
+ - LLM may create node, choose target, return positive/negative multiplier, and emit routing decision.
134
+
135
+ ### 3.2 Decision precedence
136
+
137
+ - If any novel concept is asserted in user message, route a `create_node_here` action at highest priority for that step.
138
+ - Novel node insertion is speculative until step-local validation passes:
139
+ - not duplicate (`deterministic_auto_id`-style check)
140
+ - content quality gate (min chars, no blocked chatter phrases)
141
+ - top-edge relevance threshold met
142
+
143
+ ### 3.3 Candidate selection
144
+
145
+ For each fired node, compute outgoing candidates in this order:
146
+
147
+ 1. existing reflex candidates
148
+ 2. existing habitual candidates
149
+ 3. conceptual candidates extracted from user message
150
+ 4. recently co-firing symbolic neighbors (for consolidation continuity)
151
+
152
+ ## 4) PROMPT ENGINEERING AND FAST INFERENCE CONTRACT
153
+
154
+ ### 4.1 Required behavior
155
+
156
+ - `temperature=0`
157
+ - `max_output_tokens=220`
158
+ - `response_format` = JSON (or structured outputs where supported)
159
+ - `stop` on schema end
160
+ - reject hidden chain-of-thought
161
+
162
+ ### 4.2 Actual system+user template
163
+
164
+ Use this template exactly with provider-level templating:
165
+
166
+ ```
167
+ SYSTEM:
168
+ You are CrabPath Router, a deterministic graph activation policy controller.
169
+ Return strict JSON only, no markdown, no prose.
170
+ - multipliers are in [-1.000, 1.000]
171
+ - do not fabricate target nodes
172
+ - do not return duplicate route entries
173
+ - never create a node whose id is missing or empty
174
+ - if uncertain, use neutral multiplier 0.0 with confidence <= 0.35
175
+ - if user directly negates a concept, prefer negative multiplier for that path
176
+
177
+ USER:
178
+ TURN_ID={{turn_id}}
179
+ MODEL_VERSION={{model_version}}
180
+ USER_MESSAGE={{user_message}}
181
+ SEED_NODES={{top_seed_nodes JSON}}
182
+ FIRED_NODES={{fired_nodes JSON}}
183
+ CANDIDATE_EDGES={{candidate_edges JSON}}
184
+ NOVELTY_CONTEXT={{novelty_signals JSON}}
185
+ POLICY={{policy JSON}}
186
+
187
+ Return JSON with keys:
188
+ {
189
+ "step": int,
190
+ "routes": [
191
+ {
192
+ "source_id": "string",
193
+ "target_id": "string or null",
194
+ "multiplier": float,
195
+ "confidence": 0.0,
196
+ "edge_exists": bool,
197
+ "create_node_here": false,
198
+ "prune_target": false,
199
+ "reason": "short rationale"
200
+ }
201
+ ],
202
+ "new_nodes": [
203
+ {
204
+ "node_id": "string optional",
205
+ "content": "string",
206
+ "from_source_ids": ["string"],
207
+ "proposed_threshold": 0.8,
208
+ "initial_in_edges_from": ["string"],
209
+ "proposed_out_edges_to": ["string"],
210
+ "confidence": 0.0
211
+ }
212
+ ],
213
+ "prune_candidates": [
214
+ {
215
+ "node_id": "string",
216
+ "reason": "superseded|contradicted|stale",
217
+ "confidence": 0.0
218
+ }
219
+ ],
220
+ "route_summary": {
221
+ "total_edges_considered": int,
222
+ "mutations": int,
223
+ "skipped": int
224
+ }
225
+ }
226
+ ```
227
+
228
+ ### 4.3 Output parser (strict)
229
+
230
+ - Parse with UTF-8 JSON only.
231
+ - Validate all entries against schema.
232
+ - Drop rows with out-of-range multiplier.
233
+ - Clamp values to bounds only when value is numeric and parse succeeded.
234
+ - If parse fails, fall back to deterministic baseline.
235
+
236
+ ### 4.4 Parsing speed
237
+
238
+ One-shot parser path should be schema-driven and allocation-light:
239
+
240
+ - no regex extraction
241
+ - no markdown stripping beyond basic `.strip()`
242
+ - one `json.loads`
243
+ - fast-fail on schema mismatch by route count cap and numeric checks
244
+
245
+ ## 5) LEARNING FROM ROUTING DECISIONS
246
+
247
+ ### 5.1 Separation of concerns
248
+
249
+ Introduce a router signal `m_uv` and timing signal `f(dt)` and keep STDP semantics.
250
+
251
+ - `m_uv` comes from LLM multiplier in `[-1,1]` for every candidate edge used in this activation.
252
+ - `f(dt)` is unchanged from current timing factor in `learn()`.
253
+ - For edges without LLM input, fallback `m_uv = tanh(edge.weight / w_ref)`.
254
+
255
+ ### 5.2 Edge update rule
256
+
257
+ Let:
258
+
259
+ - `w` = current edge weight.
260
+ - `r` = route multiplier from LLM (or fallback fallback).
261
+ - `g` = confidence in decision in `[0,1]`.
262
+ - `y` = outcome in `[-1,1]`.
263
+ - `f` = timing factor.
264
+ - `η_route = 0.25` (new default for v2).
265
+
266
+ ```
267
+ z = max(-1.0, min(1.0, w / w_ref))
268
+ target = (1.0 - λ_safety) * z + λ_safety * r
269
+ Δw_llm = η_route * y * f * g * (target - z)
270
+ Δw_stdp = η_stdp * y * f
271
+ w_new = clamp(w + Δw_llm + Δw_stdp, -10, 10)
272
+ ```
273
+
274
+ Take `w_ref = 1.0` for routing-sensitive learning.
275
+
276
+ Interpretation: repeated `r=0.9` pushes effective policy toward high positive routing if outcomes are positive; `r=-0.8` pushes away. If `η_route=0.25`, one positive outcome moves strongly and repeatedly, while negative outcomes reverse it. Existing STDP remains present but becomes a smoothed second-order correction with the same `y * f` asymmetry.
277
+
278
+ ### 5.3 Edge creation from router
279
+
280
+ If `create_node_here` is requested and novelty is strong:
281
+
282
+ - Add node immediately in same turn.
283
+ - Create provisional weak edges with signed weights from `m_uv * 0.15`.
284
+ - Mark edge metadata:
285
+ - `origin="llm_router"`
286
+ - `created_at_turn`
287
+ - `first_route_mult`
288
+ - `route_count`
289
+
290
+ ### 5.4 Bridging to existing `learn()`
291
+
292
+ Keep `learn()` signature but add optional `router_feedback` argument:
293
+
294
+ - `learn(graph, result, outcome, route_events=None, route_rate=0.25)`.
295
+ - If route events missing, fallback behavior identical to current v0.6 mechanics.
296
+ - If present, apply route-smoothed update first, then classical STDP and edge creation gates.
297
+
298
+ ## 6) FAILURE MODES AND CIRCUIT BREAKERS
299
+
300
+ ### 6.1 Hallucinated edges/nodes
301
+
302
+ - Validate `target_id` must exist unless `create_node_here=true`.
303
+ - For new nodes, apply strict content filters:
304
+ - length 8..220
305
+ - no blocked tokens list (`hello`, `thanks`, etc.)
306
+ - no all-punctuation
307
+ - dedupe with hash + embedding similarity guard
308
+ - Require at least 2/3 agreement between router calls if same edge appears contradictory across two adjacent steps.
309
+
310
+ If node quality is low:
311
+
312
+ - `state=probationary`
313
+ - not added to durable context ranking
314
+ - only persistent after 2 successful firings or one explicit positive outcome
315
+
316
+ ### 6.2 Slow or unstable latency
317
+
318
+ - Timeout per call: 350ms p95 target, 800ms hard cap.
319
+ - On timeout: return mechanical result for that step and mark degraded mode for next step.
320
+ - If 3 consecutive hard-timeouts in a session: disable routing for N=25 turns.
321
+
322
+ ### 6.3 Parse/contract failures
323
+
324
+ - Malformed JSON or missing required keys: skip LLM for the step.
325
+ - Invalid numeric values: clamp and continue.
326
+ - Route for non-existing target: ignore and log warning event.
327
+
328
+ ### 6.4 Unsafe inhibition and oscillation
329
+
330
+ - Hard cap on negative energy injection per step.
331
+ - If inhibition makes a node oscillate above threshold-rebound pattern for >3 turns, clamp target multiplier to 0.0 for that node/step.
332
+ - Track `inhibited_count_by_node` and auto-quarantine if above anomaly limit.
333
+
334
+ ### 6.5 Garbage growth and churn
335
+
336
+ - per-turn new nodes budget: max 2.
337
+ - per-session new nodes budget: configurable (default 1/turn max, 60/session/day).
338
+ - per-edge new route mutation cap per turn: 48.
339
+ - periodic consolidation continues to prune weak edges and orphans.
340
+
341
+ ## 7) COMPARISON TO EXISTING WORK
342
+
343
+ ### 7.1 GraphRAG (Microsoft)
344
+
345
+ - GraphRAG builds/queries a knowledge graph (entities + relationships) before retrieval; edge traversal is mostly symbolic and static per indexing cycle.
346
+ - CrabPath v2 routes in a running activation dynamics loop with continuous firing/inhibition and outcome feedback.
347
+ - New contribution: edge strengths are policy parameters at inference time, not retrieval indices only.
348
+
349
+ ### 7.2 Generative Agents
350
+
351
+ - Generative Agents stores episodic memory and uses prompt logic to pick context from recency and relevance.
352
+ - CrabPath v2 introduces per-edge signed multipliers in a recurrent graph dynamic.
353
+ - Novelty: LLM performs low-latency synaptic-style decisions repeatedly inside the propagation step, not once per response.
354
+
355
+ ### 7.3 MemGPT
356
+
357
+ - MemGPT has explicit memory hierarchy and controller actions (write/read/summarize) managed by an LLM.
358
+ - CrabPath v2 keeps graph operations local and differentiable-like (weights/threshold dynamics) while still using compact router signals.
359
+ - Novelty: router is not a full agent controller, only a routing micro-controller.
360
+
361
+ ### 7.4 Voyager
362
+
363
+ - Voyager-style agents use long-lived memory plus LLM-evolved skill modules and exploration strategy in environments.
364
+ - CrabPath v2 differs by learning retrieval-policy structure from node co-firing and by having explicit inhibitory edges in the same graph.
365
+ - Novelty: graph edges are both inhibitory and excitatory propagation channels updated by outcomes.
366
+
367
+ ### 7.5 Think-on-Graph
368
+
369
+ - Think-on-Graph systems reason over graph nodes/paths externally before answering.
370
+ - CrabPath v2 differs by integrating routing inside a neuron-like activation loop, with local state (`trace/potential`) and timing-dependent learning.
371
+ - Novelty: no external planner is required for every turn; routing is an embedded control law.
372
+
373
+ ## 8) IMPLEMENTATION PLAN
374
+
375
+ ### 8.1 Phase A: minimal viable v2
376
+
377
+ - Add `RouteEvent` and `ActivationContext` dataclasses in `activation.py`.
378
+ - Extend `activate()` to optionally accept `router` callback and `llm_client`.
379
+ - Add `--v2` path in `adapter.query()` to collect and forward router feedback but still write through current `learn()` compatibility.
380
+ - Add `openai` and `gemini` adapters in a new `router.py` with strict schema parser.
381
+ - Add token/cost counters in per-session state.
382
+ - Keep no behavior change when `router=None`.
383
+
384
+ ### 8.2 Phase B: LLM-guided routing + real-time neurogenesis
385
+
386
+ - Add `create_node_here` handling and `new_nodes` application during step.
387
+ - Add prune candidates as low-priority metadata flags (deferred deletion).
388
+ - Add `learn(..., route_events=...)` path.
389
+ - Add probationary lifecycle for newly created nodes.
390
+
391
+ ### 8.3 Phase C: production hardening
392
+
393
+ - Add circuit-breaker and fallback telemetry.
394
+ - Add strict per-session budget controls and short-circuit confidence gate.
395
+ - Add regression tests for malformed JSON, malformed node IDs, and prune actions.
396
+
397
+ ### 8.4 A/B testing design
398
+
399
+ Compare three controllers over same logs:
400
+
401
+ 1. Mechanical baseline (current v0.6 behavior).
402
+ 2. Mechanical + novelty gating only (no per-edge LLM routing).
403
+ 3. Full v2 routing (hybrid tiers + real-time neurogenesis + LLM prune candidates).
404
+
405
+ Use a time-split replay.
406
+
407
+ Primary metrics:
408
+
409
+ - task success correction-adjusted
410
+ - number of user corrections per 100 turns
411
+ - mean activated-node precision/recall against known-good context
412
+ - context token footprint
413
+ - mean latency
414
+ - cost/turn and cost/session
415
+
416
+ Secondary metrics:
417
+
418
+ - edge churn rate
419
+ - node churn rate
420
+ - prune false-positive rate
421
+ - inhibitory recall (does harmful paths get blocked when contradicted)
422
+
423
+ ## 9) PAPER FRAMING: WHAT THIS MEANS
424
+
425
+ This becomes a new claim, not just an optimization patch:
426
+
427
+ 1. CrabPath becomes a learned activation policy network where a compact LLM controls a dynamic routing kernel.
428
+ 2. The loop is no longer purely mechanical; it is a hybrid controller: mechanical core with learned micro-gating.
429
+ 3. The novelty is in treating LLM routing as a low-cost, low-latency control signal and treating STDP as the slow, outcome-conditioned integrator of those micro-decisions.
430
+ 4. This gives a stronger novelty over prior work:
431
+ - not just retrieval (GraphRAG),
432
+ - not just memory controller (MemGPT),
433
+ - not just planning graph (Think-on-Graph),
434
+ - but a policy graph with explicit inhibition, timing asymmetry, and online concept growth.
435
+
436
+ Potential paper title update:
437
+
438
+ - "LLM-Guided Synaptic Routing in Agent Memory Graphs"
439
+ - Subtitle: "A Hybrid Mechanical-LLM Activation Loop with Timing-Aware Outcome Integration"
440
+
441
+ Suggested reframe in abstract:
442
+
443
+ - "CrabPath v2 shifts from fixed weighted propagation to adaptive graph routing where routing preferences are inferred from context per step, then integrated into long-term edge dynamics via outcome-conditioned, timing-aware updates."