crabpath 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crabpath-1.0.0/.github/workflows/publish.yml +30 -0
- crabpath-1.0.0/.gitignore +49 -0
- crabpath-1.0.0/ACTIVATION_V2_DESIGN.md +443 -0
- crabpath-1.0.0/ARCHITECTURE_REVIEW.md +204 -0
- crabpath-1.0.0/AUDIT.md +70 -0
- crabpath-1.0.0/AUDIT_REPORT.md +101 -0
- crabpath-1.0.0/BUG_SWEEP_1.md +46 -0
- crabpath-1.0.0/BUG_SWEEP_2.md +59 -0
- crabpath-1.0.0/BUG_SWEEP_3.md +81 -0
- crabpath-1.0.0/BUG_SWEEP_4.md +101 -0
- crabpath-1.0.0/BUG_SWEEP_5.md +63 -0
- crabpath-1.0.0/CHANGELOG.md +101 -0
- crabpath-1.0.0/CODING_PLAN_V2.md +668 -0
- crabpath-1.0.0/CONSOLIDATION_LOG.md +54 -0
- crabpath-1.0.0/CONTRIBUTING.md +55 -0
- crabpath-1.0.0/CRABPATH_V2_SPEC.md +548 -0
- crabpath-1.0.0/EXPERIMENT_PLAN.md +278 -0
- crabpath-1.0.0/IMPLEMENTATION_PLAN.md +381 -0
- crabpath-1.0.0/LICENSE +201 -0
- crabpath-1.0.0/NEUROGENESIS_DESIGN.md +292 -0
- crabpath-1.0.0/PAPER_CODE_AUDIT.md +126 -0
- crabpath-1.0.0/PKG-INFO +423 -0
- crabpath-1.0.0/PLAYBOOK.md +211 -0
- crabpath-1.0.0/README.md +378 -0
- crabpath-1.0.0/REPRODUCIBILITY.md +168 -0
- crabpath-1.0.0/SKILL.md +169 -0
- crabpath-1.0.0/TUNING.md +40 -0
- crabpath-1.0.0/crabpath/__init__.py +191 -0
- crabpath-1.0.0/crabpath/__main__.py +7 -0
- crabpath-1.0.0/crabpath/_io.py +257 -0
- crabpath-1.0.0/crabpath/_structural_utils.py +127 -0
- crabpath-1.0.0/crabpath/adapter.py +452 -0
- crabpath-1.0.0/crabpath/autotune.py +1187 -0
- crabpath-1.0.0/crabpath/cli.py +1279 -0
- crabpath-1.0.0/crabpath/controller.py +612 -0
- crabpath-1.0.0/crabpath/decay.py +94 -0
- crabpath-1.0.0/crabpath/embeddings.py +402 -0
- crabpath-1.0.0/crabpath/feedback.py +451 -0
- crabpath-1.0.0/crabpath/graph.py +828 -0
- crabpath-1.0.0/crabpath/inhibition.py +168 -0
- crabpath-1.0.0/crabpath/learning.py +360 -0
- crabpath-1.0.0/crabpath/legacy/__init__.py +1 -0
- crabpath-1.0.0/crabpath/legacy/activation.py +236 -0
- crabpath-1.0.0/crabpath/lifecycle_sim.py +608 -0
- crabpath-1.0.0/crabpath/mcp_server.py +769 -0
- crabpath-1.0.0/crabpath/migrate.py +686 -0
- crabpath-1.0.0/crabpath/mitosis.py +878 -0
- crabpath-1.0.0/crabpath/py.typed +1 -0
- crabpath-1.0.0/crabpath/router.py +585 -0
- crabpath-1.0.0/crabpath/shadow_logger.py +219 -0
- crabpath-1.0.0/crabpath/synaptogenesis.py +360 -0
- crabpath-1.0.0/crabpath/traversal.py +307 -0
- crabpath-1.0.0/docs/MIGRATION_GUIDE.md +98 -0
- crabpath-1.0.0/examples/agent_memory.py +67 -0
- crabpath-1.0.0/examples/hello_world.py +70 -0
- crabpath-1.0.0/examples/langchain_adapter.py +97 -0
- crabpath-1.0.0/examples/learning_loop.py +233 -0
- crabpath-1.0.0/examples/minimal_shadow.py +41 -0
- crabpath-1.0.0/examples/openai_agent.py +162 -0
- crabpath-1.0.0/examples/openclaw_shadow_hook.sh +30 -0
- crabpath-1.0.0/examples/quickstart.py +49 -0
- crabpath-1.0.0/examples/toy_workspace/README.md +26 -0
- crabpath-1.0.0/examples/toy_workspace/api-reference.md +33 -0
- crabpath-1.0.0/examples/toy_workspace/architecture.md +34 -0
- crabpath-1.0.0/examples/toy_workspace/runbook.md +36 -0
- crabpath-1.0.0/examples/toy_workspace/troubleshooting.md +30 -0
- crabpath-1.0.0/experiments/build_context_bloat_graph.py +776 -0
- crabpath-1.0.0/experiments/build_deploy_pipeline.py +101 -0
- crabpath-1.0.0/experiments/build_forbidden_door.py +24 -0
- crabpath-1.0.0/experiments/build_gate_bloat_graph.py +325 -0
- crabpath-1.0.0/experiments/build_giraffe_test.py +25 -0
- crabpath-1.0.0/experiments/build_negation_graph.py +228 -0
- crabpath-1.0.0/experiments/build_procedure_graph.py +169 -0
- crabpath-1.0.0/experiments/build_stale_context_graph.py +310 -0
- crabpath-1.0.0/experiments/run_all.py +149 -0
- crabpath-1.0.0/experiments/run_comparison.py +265 -0
- crabpath-1.0.0/experiments/run_deploy_sim.py +232 -0
- crabpath-1.0.0/figures/ablation_accuracy.png +0 -0
- crabpath-1.0.0/figures/ablation_query_types.png +0 -0
- crabpath-1.0.0/figures/context_utilization.png +0 -0
- crabpath-1.0.0/figures/deploy_pipeline.png +0 -0
- crabpath-1.0.0/figures/downstream_accuracy.png +0 -0
- crabpath-1.0.0/figures/downstream_qa.png +0 -0
- crabpath-1.0.0/figures/hotpotqa_cold_start.png +0 -0
- crabpath-1.0.0/figures/learning_curve.png +0 -0
- crabpath-1.0.0/figures/niah_multi_needle.png +0 -0
- crabpath-1.0.0/figures/noise_sensitivity.png +0 -0
- crabpath-1.0.0/figures/phase_transition.png +0 -0
- crabpath-1.0.0/figures/procedural_memory.png +0 -0
- crabpath-1.0.0/figures/rag_collapse.png +0 -0
- crabpath-1.0.0/figures/recurring_topic.png +0 -0
- crabpath-1.0.0/figures/ruler_multi_fact.png +0 -0
- crabpath-1.0.0/figures/scaling_curves.png +0 -0
- crabpath-1.0.0/figures/sparsity_crossover.png +0 -0
- crabpath-1.0.0/figures/temporal_drift.png +0 -0
- crabpath-1.0.0/figures/traversal_comparison.png +0 -0
- crabpath-1.0.0/pyproject.toml +63 -0
- crabpath-1.0.0/scenarios/context_bloat.jsonl +20 -0
- crabpath-1.0.0/scenarios/deploy_pipeline.jsonl +15 -0
- crabpath-1.0.0/scenarios/forbidden_door.jsonl +3 -0
- crabpath-1.0.0/scenarios/gate_bloat.jsonl +20 -0
- crabpath-1.0.0/scenarios/giraffe_test.jsonl +10 -0
- crabpath-1.0.0/scenarios/negation.jsonl +11 -0
- crabpath-1.0.0/scenarios/procedure.jsonl +10 -0
- crabpath-1.0.0/scenarios/stale_context.jsonl +30 -0
- crabpath-1.0.0/scratch/paper.tex +1095 -0
- crabpath-1.0.0/scripts/ab_scoring_sim.py +648 -0
- crabpath-1.0.0/scripts/ablation_study.py +950 -0
- crabpath-1.0.0/scripts/bootstrap_from_workspace.py +268 -0
- crabpath-1.0.0/scripts/calibrate_defaults.py +380 -0
- crabpath-1.0.0/scripts/context_noise_drift_benchmark.py +923 -0
- crabpath-1.0.0/scripts/downstream_accuracy_benchmark.py +933 -0
- crabpath-1.0.0/scripts/downstream_qa_benchmark.py +1817 -0
- crabpath-1.0.0/scripts/external_benchmark.py +1554 -0
- crabpath-1.0.0/scripts/generate_figures.py +473 -0
- crabpath-1.0.0/scripts/generate_paper_figures.py +511 -0
- crabpath-1.0.0/scripts/generate_worker2_figures.py +333 -0
- crabpath-1.0.0/scripts/generate_worker3_figures.py +431 -0
- crabpath-1.0.0/scripts/hero_sims.py +721 -0
- crabpath-1.0.0/scripts/hotpot_subset_100.json +1 -0
- crabpath-1.0.0/scripts/migrate_graph_v2.py +95 -0
- crabpath-1.0.0/scripts/niah_scaling_benchmark.py +1046 -0
- crabpath-1.0.0/scripts/phase_transition_plot.py +480 -0
- crabpath-1.0.0/scripts/procedural_memory_sim.py +425 -0
- crabpath-1.0.0/scripts/quickstart.sh +124 -0
- crabpath-1.0.0/scripts/rag_collapse_sim.py +867 -0
- crabpath-1.0.0/scripts/replay_shadow_queries.py +476 -0
- crabpath-1.0.0/scripts/sparsity_scale_experiment.py +856 -0
- crabpath-1.0.0/scripts/standard_ir_metrics.py +683 -0
- crabpath-1.0.0/scripts/traversal_comparison_benchmark.py +341 -0
- crabpath-1.0.0/sim_edge_damping.py +176 -0
- crabpath-1.0.0/tests/__init__.py +1 -0
- crabpath-1.0.0/tests/test_activation.py +452 -0
- crabpath-1.0.0/tests/test_adapter.py +310 -0
- crabpath-1.0.0/tests/test_autotune.py +846 -0
- crabpath-1.0.0/tests/test_bootstrap.py +67 -0
- crabpath-1.0.0/tests/test_cli.py +747 -0
- crabpath-1.0.0/tests/test_consolidation.py +177 -0
- crabpath-1.0.0/tests/test_controller.py +299 -0
- crabpath-1.0.0/tests/test_decay.py +100 -0
- crabpath-1.0.0/tests/test_edge_damping.py +205 -0
- crabpath-1.0.0/tests/test_embeddings.py +219 -0
- crabpath-1.0.0/tests/test_examples.py +30 -0
- crabpath-1.0.0/tests/test_feedback.py +190 -0
- crabpath-1.0.0/tests/test_graph.py +324 -0
- crabpath-1.0.0/tests/test_inhibition.py +95 -0
- crabpath-1.0.0/tests/test_io.py +19 -0
- crabpath-1.0.0/tests/test_learning.py +323 -0
- crabpath-1.0.0/tests/test_lifecycle_sim.py +81 -0
- crabpath-1.0.0/tests/test_mcp_server.py +224 -0
- crabpath-1.0.0/tests/test_migrate.py +270 -0
- crabpath-1.0.0/tests/test_mitosis.py +583 -0
- crabpath-1.0.0/tests/test_neurogenesis.py +251 -0
- crabpath-1.0.0/tests/test_packaging.py +20 -0
- crabpath-1.0.0/tests/test_persistence.py +131 -0
- crabpath-1.0.0/tests/test_router.py +300 -0
- crabpath-1.0.0/tests/test_shadow_logger.py +82 -0
- crabpath-1.0.0/tests/test_shadow_mode_v2_gating.py +63 -0
- crabpath-1.0.0/tests/test_simulator.py +90 -0
- crabpath-1.0.0/tests/test_structural_utils.py +88 -0
- crabpath-1.0.0/tests/test_synaptogenesis.py +385 -0
- crabpath-1.0.0/tests/test_traversal.py +219 -0
- crabpath-1.0.0/tests/test_v2_schema.py +116 -0
- crabpath-1.0.0/tmp_paper.html +705 -0
- crabpath-1.0.0/tools/openapi.yaml +577 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
id-token: write # Required for trusted publishing (OIDC)
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
publish:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
environment: pypi
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.12"
|
|
22
|
+
|
|
23
|
+
- name: Install build tools
|
|
24
|
+
run: pip install build
|
|
25
|
+
|
|
26
|
+
- name: Build package
|
|
27
|
+
run: python -m build
|
|
28
|
+
|
|
29
|
+
- name: Publish to PyPI
|
|
30
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
*.egg
|
|
9
|
+
.eggs/
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
|
|
16
|
+
# IDE
|
|
17
|
+
.vscode/
|
|
18
|
+
.idea/
|
|
19
|
+
*.swp
|
|
20
|
+
*.swo
|
|
21
|
+
*~
|
|
22
|
+
|
|
23
|
+
# OS
|
|
24
|
+
.DS_Store
|
|
25
|
+
Thumbs.db
|
|
26
|
+
|
|
27
|
+
# Testing
|
|
28
|
+
.coverage
|
|
29
|
+
htmlcov/
|
|
30
|
+
.pytest_cache/
|
|
31
|
+
.mypy_cache/
|
|
32
|
+
|
|
33
|
+
# Data (user-specific)
|
|
34
|
+
*.db
|
|
35
|
+
data/
|
|
36
|
+
scratch/
|
|
37
|
+
!scratch/paper.tex
|
|
38
|
+
|
|
39
|
+
# CrabPath data (keep in private repo)
|
|
40
|
+
*.json
|
|
41
|
+
!package.json
|
|
42
|
+
|
|
43
|
+
# Public benchmark fixtures (required for reproducibility)
|
|
44
|
+
!scripts/hotpot_subset_100.json
|
|
45
|
+
|
|
46
|
+
*.events.db
|
|
47
|
+
snapshots/
|
|
48
|
+
|
|
49
|
+
/ignored
|
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
> **Note:** This design doc is historical. The implementation lives in crabpath/*.py. See ARCHITECTURE_REVIEW.md for current architecture.
|
|
2
|
+
|
|
3
|
+
# Activation v2 Design: LLM-Guided Graph Activation for CrabPath
|
|
4
|
+
|
|
5
|
+
## 0) Scope and baseline
|
|
6
|
+
|
|
7
|
+
`activate()` currently routes energy mechanically with `weight × signal` and STDP updates weights after fact in `learn()`. This v2 design introduces an LLM in the propagation loop while preserving the existing graph semantics and API where possible.
|
|
8
|
+
|
|
9
|
+
Current code touchpoints:
|
|
10
|
+
|
|
11
|
+
- `crabpath/graph.py` stores neurons (`Node`) and directed weighted edges (`Edge`).
|
|
12
|
+
- `crabpath/activation.py` runs the synchronous leaky integrate-and-fire pass.
|
|
13
|
+
- `crabpath/adapter.py` provides query/seed/activate flow and existing auto-node creation hooks.
|
|
14
|
+
- `crabpath/neurogenesis.py` provides novelty gating + deterministic node IDs.
|
|
15
|
+
- `crabpath/feedback.py` stores delayed feedback snapshots used for learning.
|
|
16
|
+
- `NEUROGENESIS_DESIGN.md` and `IMPLEMENTATION_PLAN.md` already establish auto-node creation and an habitual-tier control path.
|
|
17
|
+
|
|
18
|
+
Paper context confirms current claim: CrabPath learns what gets loaded, not just what is semantically similar, and encodes sequencing via STDP timing.
|
|
19
|
+
|
|
20
|
+
## 1) ARCHITECTURE: LLM-in-the-loop activation loop
|
|
21
|
+
|
|
22
|
+
### 1.1 Core state and contracts
|
|
23
|
+
|
|
24
|
+
Introduce two optional data structures without changing node/edge dataclasses:
|
|
25
|
+
|
|
26
|
+
- `RouteEvent`: `{source, target, multiplier, confidence, reason, edge_exists, created, flagged_for_removal}`
|
|
27
|
+
- `ActivationContext`: holds user message, fired node states, pending LLM decisions, and per-step route logs.
|
|
28
|
+
|
|
29
|
+
Keep all existing fields in `Firing` and add optional companion object:
|
|
30
|
+
|
|
31
|
+
- `Firing.routes: dict[(source_id, target_id), RouteEvent]`
|
|
32
|
+
- `Firing.route_metadata: dict[str, object]`
|
|
33
|
+
- `Firing.new_nodes: list[dict]`
|
|
34
|
+
- `Firing.prune_candidates: list[dict]`
|
|
35
|
+
|
|
36
|
+
This keeps backward compatibility for current CLI/tests while enabling learning to use router outputs.
|
|
37
|
+
|
|
38
|
+
### 1.2 Activation step-by-step
|
|
39
|
+
|
|
40
|
+
For each `activate_v2(graph, seeds, user_message, max_steps, ...)`:
|
|
41
|
+
|
|
42
|
+
1. Decay traces and apply seeds exactly as today.
|
|
43
|
+
2. For each step:
|
|
44
|
+
3. Compute `to_fire` from mechanical threshold + not-fired check.
|
|
45
|
+
4. For each fired node, collect outgoing edges and classify each candidate with tier policy.
|
|
46
|
+
5. Build a single batched LLM request for the whole step containing all non-reflex candidates.
|
|
47
|
+
6. Apply routing multipliers:
|
|
48
|
+
7. Apply inhibitory and excitatory transfers for each outgoing target.
|
|
49
|
+
8. Seed any newly created nodes into current turn state.
|
|
50
|
+
9. Apply refractory reset and leak.
|
|
51
|
+
10. Optionally create/remove nodes mid-step from LLM decisions.
|
|
52
|
+
11. Continue until `max_steps`.
|
|
53
|
+
|
|
54
|
+
### 1.3 Input format to LLM (per step)
|
|
55
|
+
|
|
56
|
+
- `user_message`: raw user text
|
|
57
|
+
- `global_context`: session tags, open context IDs, turn index, novelty signals
|
|
58
|
+
- `fired_nodes`: up to `max_fired_context` entries with
|
|
59
|
+
- `node_id`, `content`, `energy_at_fire`, `trace`, `step_fired`, `metadata`
|
|
60
|
+
- `outgoing_candidates`: per fired node list of edges with
|
|
61
|
+
- `source_id`, `target_id` (nullable for absent edge), `edge_weight`, `edge_tier`, `seed_overlap`, `edge_context`
|
|
62
|
+
- `missing_concepts`: candidate concept tokens from user not mapped to known nodes
|
|
63
|
+
- `policy`: thresholds and safety constraints
|
|
64
|
+
|
|
65
|
+
Output schema is strict JSON (one-shot parse).
|
|
66
|
+
|
|
67
|
+
### 1.4 Number of LLM calls per activation
|
|
68
|
+
|
|
69
|
+
- `0` calls: all candidates are reflex tier or early bypass confidence gate.
|
|
70
|
+
- `1` call/step: one batched call for all habitual/novel candidates in that step.
|
|
71
|
+
- `max_steps` = 3 by default, so up to `3` routing calls.
|
|
72
|
+
- Optional pre-step novelty call before propagation if no seeds and high novelty risk.
|
|
73
|
+
- Typical upper bound = 4 calls/turn, controlled by budget-aware short-circuit.
|
|
74
|
+
|
|
75
|
+
## 2) COST MODEL
|
|
76
|
+
|
|
77
|
+
### 2.1 Token estimate
|
|
78
|
+
|
|
79
|
+
Let:
|
|
80
|
+
|
|
81
|
+
- `B`: base prompt tokens (~180-320)
|
|
82
|
+
- `N`: fired nodes in request
|
|
83
|
+
- `E`: candidate edges sent to model
|
|
84
|
+
- `C_n`: tokens per node (`~36`)
|
|
85
|
+
- `C_e`: tokens per edge (`~24`)
|
|
86
|
+
|
|
87
|
+
Approx:
|
|
88
|
+
|
|
89
|
+
`input_tokens = B + N * C_n + E * C_e`
|
|
90
|
+
|
|
91
|
+
`output_tokens`: target 120-220 (strict JSON, concise).
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
|
|
95
|
+
- `N=8`, `E=24`, `B=240` ⇒ ~840 input tokens.
|
|
96
|
+
- `output=180`.
|
|
97
|
+
|
|
98
|
+
### 2.2 Cost with current public rates (as of 2026-02-25)
|
|
99
|
+
|
|
100
|
+
- OpenAI GPT-4o-mini: $0.15 in / $0.60 out per 1M tokens.
|
|
101
|
+
- 840 in + 180 out ≈ `$0.000228`/call.
|
|
102
|
+
- Gemini 2.5 Flash: $0.30 in / $2.50 out per 1M tokens (text) on the standard tier.
|
|
103
|
+
- 840 in + 180 out ≈ `$0.000774`/call.
|
|
104
|
+
|
|
105
|
+
At 1.0 call/turn:
|
|
106
|
+
|
|
107
|
+
- GPT-4o-mini ≈ `$0.00023`/turn.
|
|
108
|
+
- Gemini Flash ≈ `$0.00077`/turn.
|
|
109
|
+
|
|
110
|
+
At 3 calls/turn:
|
|
111
|
+
|
|
112
|
+
- GPT-4o-mini ≈ `$0.00068`/turn.
|
|
113
|
+
- Gemini Flash ≈ `$0.00232`/turn.
|
|
114
|
+
|
|
115
|
+
### 2.3 Budget ceilings
|
|
116
|
+
|
|
117
|
+
Recommended guardrails:
|
|
118
|
+
|
|
119
|
+
- Hard token ceiling per call: 1200 in, 240 out.
|
|
120
|
+
- Hard spend ceiling per turn: `$0.003` for Gemini, `$0.001` for GPT-mini.
|
|
121
|
+
- Monthly spend kill-switch default: 20% of budget allocated for retrieval/context operations.
|
|
122
|
+
- Budget-aware bypass: if expected tokens × expected calls exceeds ceiling, skip LLM and use mechanical routing.
|
|
123
|
+
|
|
124
|
+
## 3) HYBRID THREE-TIER SYSTEM
|
|
125
|
+
|
|
126
|
+
### 3.1 Tier definitions
|
|
127
|
+
|
|
128
|
+
- Reflex: `edge.weight > 0.8` or `edge.weight < -0.5`.
|
|
129
|
+
- Mechanical routing, no LLM.
|
|
130
|
+
- Habitual: `0.2 <= edge.weight <= 0.8` or `-0.5 <= edge.weight <= -0.2`.
|
|
131
|
+
- LLM routing only.
|
|
132
|
+
- Novel: no edge exists, or concept absent in graph.
|
|
133
|
+
- LLM may create node, choose target, return positive/negative multiplier, and emit routing decision.
|
|
134
|
+
|
|
135
|
+
### 3.2 Decision precedence
|
|
136
|
+
|
|
137
|
+
- If any novel concept is asserted in user message, route a `create_node_here` action at highest priority for that step.
|
|
138
|
+
- Novel node insertion is speculative until step-local validation passes:
|
|
139
|
+
- not duplicate (`deterministic_auto_id`-style check)
|
|
140
|
+
- content quality gate (min chars, no blocked chatter phrases)
|
|
141
|
+
- top-edge relevance threshold met
|
|
142
|
+
|
|
143
|
+
### 3.3 Candidate selection
|
|
144
|
+
|
|
145
|
+
For each fired node, compute outgoing candidates in this order:
|
|
146
|
+
|
|
147
|
+
1. existing reflex candidates
|
|
148
|
+
2. existing habitual candidates
|
|
149
|
+
3. conceptual candidates extracted from user message
|
|
150
|
+
4. recently co-firing symbolic neighbors (for consolidation continuity)
|
|
151
|
+
|
|
152
|
+
## 4) PROMPT ENGINEERING AND FAST INFERENCE CONTRACT
|
|
153
|
+
|
|
154
|
+
### 4.1 Required behavior
|
|
155
|
+
|
|
156
|
+
- `temperature=0`
|
|
157
|
+
- `max_output_tokens=220`
|
|
158
|
+
- `response_format` = JSON (or structured outputs where supported)
|
|
159
|
+
- `stop` on schema end
|
|
160
|
+
- reject hidden chain-of-thought
|
|
161
|
+
|
|
162
|
+
### 4.2 Actual system+user template
|
|
163
|
+
|
|
164
|
+
Use this template exactly with provider-level templating:
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
SYSTEM:
|
|
168
|
+
You are CrabPath Router, a deterministic graph activation policy controller.
|
|
169
|
+
Return strict JSON only, no markdown, no prose.
|
|
170
|
+
- multipliers are in [-1.000, 1.000]
|
|
171
|
+
- do not fabricate target nodes
|
|
172
|
+
- do not return duplicate route entries
|
|
173
|
+
- never create a node whose id is missing or empty
|
|
174
|
+
- if uncertain, use neutral multiplier 0.0 with confidence <= 0.35
|
|
175
|
+
- if user directly negates a concept, prefer negative multiplier for that path
|
|
176
|
+
|
|
177
|
+
USER:
|
|
178
|
+
TURN_ID={{turn_id}}
|
|
179
|
+
MODEL_VERSION={{model_version}}
|
|
180
|
+
USER_MESSAGE={{user_message}}
|
|
181
|
+
SEED_NODES={{top_seed_nodes JSON}}
|
|
182
|
+
FIRED_NODES={{fired_nodes JSON}}
|
|
183
|
+
CANDIDATE_EDGES={{candidate_edges JSON}}
|
|
184
|
+
NOVELTY_CONTEXT={{novelty_signals JSON}}
|
|
185
|
+
POLICY={{policy JSON}}
|
|
186
|
+
|
|
187
|
+
Return JSON with keys:
|
|
188
|
+
{
|
|
189
|
+
"step": int,
|
|
190
|
+
"routes": [
|
|
191
|
+
{
|
|
192
|
+
"source_id": "string",
|
|
193
|
+
"target_id": "string or null",
|
|
194
|
+
"multiplier": float,
|
|
195
|
+
"confidence": 0.0,
|
|
196
|
+
"edge_exists": bool,
|
|
197
|
+
"create_node_here": false,
|
|
198
|
+
"prune_target": false,
|
|
199
|
+
"reason": "short rationale"
|
|
200
|
+
}
|
|
201
|
+
],
|
|
202
|
+
"new_nodes": [
|
|
203
|
+
{
|
|
204
|
+
"node_id": "string optional",
|
|
205
|
+
"content": "string",
|
|
206
|
+
"from_source_ids": ["string"],
|
|
207
|
+
"proposed_threshold": 0.8,
|
|
208
|
+
"initial_in_edges_from": ["string"],
|
|
209
|
+
"proposed_out_edges_to": ["string"],
|
|
210
|
+
"confidence": 0.0
|
|
211
|
+
}
|
|
212
|
+
],
|
|
213
|
+
"prune_candidates": [
|
|
214
|
+
{
|
|
215
|
+
"node_id": "string",
|
|
216
|
+
"reason": "superseded|contradicted|stale",
|
|
217
|
+
"confidence": 0.0
|
|
218
|
+
}
|
|
219
|
+
],
|
|
220
|
+
"route_summary": {
|
|
221
|
+
"total_edges_considered": int,
|
|
222
|
+
"mutations": int,
|
|
223
|
+
"skipped": int
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### 4.3 Output parser (strict)
|
|
229
|
+
|
|
230
|
+
- Parse with UTF-8 JSON only.
|
|
231
|
+
- Validate all entries against schema.
|
|
232
|
+
- Drop rows with out-of-range multiplier.
|
|
233
|
+
- Clamp values to bounds only when value is numeric and parse succeeded.
|
|
234
|
+
- If parse fails, fall back to deterministic baseline.
|
|
235
|
+
|
|
236
|
+
### 4.4 Parsing speed
|
|
237
|
+
|
|
238
|
+
One-shot parser path should be schema-driven and allocation-light:
|
|
239
|
+
|
|
240
|
+
- no regex extraction
|
|
241
|
+
- no markdown stripping beyond basic `.strip()`
|
|
242
|
+
- one `json.loads`
|
|
243
|
+
- fast-fail on schema mismatch by route count cap and numeric checks
|
|
244
|
+
|
|
245
|
+
## 5) LEARNING FROM ROUTING DECISIONS
|
|
246
|
+
|
|
247
|
+
### 5.1 Separation of concerns
|
|
248
|
+
|
|
249
|
+
Introduce a router signal `m_uv` and timing signal `f(dt)` and keep STDP semantics.
|
|
250
|
+
|
|
251
|
+
- `m_uv` comes from LLM multiplier in `[-1,1]` for every candidate edge used in this activation.
|
|
252
|
+
- `f(dt)` is unchanged from current timing factor in `learn()`.
|
|
253
|
+
- For edges without LLM input, fallback `m_uv = tanh(edge.weight / w_ref)`.
|
|
254
|
+
|
|
255
|
+
### 5.2 Edge update rule
|
|
256
|
+
|
|
257
|
+
Let:
|
|
258
|
+
|
|
259
|
+
- `w` = current edge weight.
|
|
260
|
+
- `r` = route multiplier from LLM (or fallback fallback).
|
|
261
|
+
- `g` = confidence in decision in `[0,1]`.
|
|
262
|
+
- `y` = outcome in `[-1,1]`.
|
|
263
|
+
- `f` = timing factor.
|
|
264
|
+
- `η_route = 0.25` (new default for v2).
|
|
265
|
+
|
|
266
|
+
```
|
|
267
|
+
z = max(-1.0, min(1.0, w / w_ref))
|
|
268
|
+
target = (1.0 - λ_safety) * z + λ_safety * r
|
|
269
|
+
Δw_llm = η_route * y * f * g * (target - z)
|
|
270
|
+
Δw_stdp = η_stdp * y * f
|
|
271
|
+
w_new = clamp(w + Δw_llm + Δw_stdp, -10, 10)
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Take `w_ref = 1.0` for routing-sensitive learning.
|
|
275
|
+
|
|
276
|
+
Interpretation: repeated `r=0.9` pushes effective policy toward high positive routing if outcomes are positive; `r=-0.8` pushes away. If `η_route=0.25`, one positive outcome moves strongly and repeatedly, while negative outcomes reverse it. Existing STDP remains present but becomes a smoothed second-order correction with the same `y * f` asymmetry.
|
|
277
|
+
|
|
278
|
+
### 5.3 Edge creation from router
|
|
279
|
+
|
|
280
|
+
If `create_node_here` is requested and novelty is strong:
|
|
281
|
+
|
|
282
|
+
- Add node immediately in same turn.
|
|
283
|
+
- Create provisional weak edges with signed weights from `m_uv * 0.15`.
|
|
284
|
+
- Mark edge metadata:
|
|
285
|
+
- `origin="llm_router"`
|
|
286
|
+
- `created_at_turn`
|
|
287
|
+
- `first_route_mult`
|
|
288
|
+
- `route_count`
|
|
289
|
+
|
|
290
|
+
### 5.4 Bridging to existing `learn()`
|
|
291
|
+
|
|
292
|
+
Keep `learn()` signature but add optional `router_feedback` argument:
|
|
293
|
+
|
|
294
|
+
- `learn(graph, result, outcome, route_events=None, route_rate=0.25)`.
|
|
295
|
+
- If route events missing, fallback behavior identical to current v0.6 mechanics.
|
|
296
|
+
- If present, apply route-smoothed update first, then classical STDP and edge creation gates.
|
|
297
|
+
|
|
298
|
+
## 6) FAILURE MODES AND CIRCUIT BREAKERS
|
|
299
|
+
|
|
300
|
+
### 6.1 Hallucinated edges/nodes
|
|
301
|
+
|
|
302
|
+
- Validate `target_id` must exist unless `create_node_here=true`.
|
|
303
|
+
- For new nodes, apply strict content filters:
|
|
304
|
+
- length 8..220
|
|
305
|
+
- no blocked tokens list (`hello`, `thanks`, etc.)
|
|
306
|
+
- no all-punctuation
|
|
307
|
+
- dedupe with hash + embedding similarity guard
|
|
308
|
+
- Require at least 2/3 agreement between router calls if same edge appears contradictory across two adjacent steps.
|
|
309
|
+
|
|
310
|
+
If node quality is low:
|
|
311
|
+
|
|
312
|
+
- `state=probationary`
|
|
313
|
+
- not added to durable context ranking
|
|
314
|
+
- only persistent after 2 successful firings or one explicit positive outcome
|
|
315
|
+
|
|
316
|
+
### 6.2 Slow or unstable latency
|
|
317
|
+
|
|
318
|
+
- Timeout per call: 350ms p95 target, 800ms hard cap.
|
|
319
|
+
- On timeout: return mechanical result for that step and mark degraded mode for next step.
|
|
320
|
+
- If 3 consecutive hard-timeouts in a session: disable routing for N=25 turns.
|
|
321
|
+
|
|
322
|
+
### 6.3 Parse/contract failures
|
|
323
|
+
|
|
324
|
+
- Malformed JSON or missing required keys: skip LLM for the step.
|
|
325
|
+
- Invalid numeric values: clamp and continue.
|
|
326
|
+
- Route for non-existing target: ignore and log warning event.
|
|
327
|
+
|
|
328
|
+
### 6.4 Unsafe inhibition and oscillation
|
|
329
|
+
|
|
330
|
+
- Hard cap on negative energy injection per step.
|
|
331
|
+
- If inhibition makes a node oscillate above threshold-rebound pattern for >3 turns, clamp target multiplier to 0.0 for that node/step.
|
|
332
|
+
- Track `inhibited_count_by_node` and auto-quarantine if above anomaly limit.
|
|
333
|
+
|
|
334
|
+
### 6.5 Garbage growth and churn
|
|
335
|
+
|
|
336
|
+
- per-turn new nodes budget: max 2.
|
|
337
|
+
- per-session new nodes budget: configurable (default 1/turn max, 60/session/day).
|
|
338
|
+
- per-edge new route mutation cap per turn: 48.
|
|
339
|
+
- periodic consolidation continues to prune weak edges and orphans.
|
|
340
|
+
|
|
341
|
+
## 7) COMPARISON TO EXISTING WORK
|
|
342
|
+
|
|
343
|
+
### 7.1 GraphRAG (Microsoft)
|
|
344
|
+
|
|
345
|
+
- GraphRAG builds/queries a knowledge graph (entities + relationships) before retrieval; edge traversal is mostly symbolic and static per indexing cycle.
|
|
346
|
+
- CrabPath v2 routes in a running activation dynamics loop with continuous firing/inhibition and outcome feedback.
|
|
347
|
+
- New contribution: edge strengths are policy parameters at inference time, not retrieval indices only.
|
|
348
|
+
|
|
349
|
+
### 7.2 Generative Agents
|
|
350
|
+
|
|
351
|
+
- Generative Agents stores episodic memory and uses prompt logic to pick context from recency and relevance.
|
|
352
|
+
- CrabPath v2 introduces per-edge signed multipliers in a recurrent graph dynamic.
|
|
353
|
+
- Novelty: LLM performs low-latency synaptic-style decisions repeatedly inside the propagation step, not once per response.
|
|
354
|
+
|
|
355
|
+
### 7.3 MemGPT
|
|
356
|
+
|
|
357
|
+
- MemGPT has explicit memory hierarchy and controller actions (write/read/summarize) managed by an LLM.
|
|
358
|
+
- CrabPath v2 keeps graph operations local and differentiable-like (weights/threshold dynamics) while still using compact router signals.
|
|
359
|
+
- Novelty: router is not a full agent controller, only a routing micro-controller.
|
|
360
|
+
|
|
361
|
+
### 7.4 Voyager
|
|
362
|
+
|
|
363
|
+
- Voyager-style agents use long-lived memory plus LLM-evolved skill modules and exploration strategy in environments.
|
|
364
|
+
- CrabPath v2 differs by learning retrieval-policy structure from node co-firing and by having explicit inhibitory edges in the same graph.
|
|
365
|
+
- Novelty: graph edges are both inhibitory and excitatory propagation channels updated by outcomes.
|
|
366
|
+
|
|
367
|
+
### 7.5 Think-on-Graph
|
|
368
|
+
|
|
369
|
+
- Think-on-Graph systems reason over graph nodes/paths externally before answering.
|
|
370
|
+
- CrabPath v2 differs by integrating routing inside a neuron-like activation loop, with local state (`trace/potential`) and timing-dependent learning.
|
|
371
|
+
- Novelty: no external planner is required for every turn; routing is an embedded control law.
|
|
372
|
+
|
|
373
|
+
## 8) IMPLEMENTATION PLAN
|
|
374
|
+
|
|
375
|
+
### 8.1 Phase A: minimal viable v2
|
|
376
|
+
|
|
377
|
+
- Add `RouteEvent` and `ActivationContext` dataclasses in `activation.py`.
|
|
378
|
+
- Extend `activate()` to optionally accept `router` callback and `llm_client`.
|
|
379
|
+
- Add `--v2` path in `adapter.query()` to collect and forward router feedback but still write through current `learn()` compatibility.
|
|
380
|
+
- Add `openai` and `gemini` adapters in a new `router.py` with strict schema parser.
|
|
381
|
+
- Add token/cost counters in per-session state.
|
|
382
|
+
- Keep no behavior change when `router=None`.
|
|
383
|
+
|
|
384
|
+
### 8.2 Phase B: LLM-guided routing + real-time neurogenesis
|
|
385
|
+
|
|
386
|
+
- Add `create_node_here` handling and `new_nodes` application during step.
|
|
387
|
+
- Add prune candidates as low-priority metadata flags (deferred deletion).
|
|
388
|
+
- Add `learn(..., route_events=...)` path.
|
|
389
|
+
- Add probationary lifecycle for newly created nodes.
|
|
390
|
+
|
|
391
|
+
### 8.3 Phase C: production hardening
|
|
392
|
+
|
|
393
|
+
- Add circuit-breaker and fallback telemetry.
|
|
394
|
+
- Add strict per-session budget controls and short-circuit confidence gate.
|
|
395
|
+
- Add regression tests for malformed JSON, malformed node IDs, and prune actions.
|
|
396
|
+
|
|
397
|
+
### 8.4 A/B testing design
|
|
398
|
+
|
|
399
|
+
Compare three controllers over same logs:
|
|
400
|
+
|
|
401
|
+
1. Mechanical baseline (current v0.6 behavior).
|
|
402
|
+
2. Mechanical + novelty gating only (no per-edge LLM routing).
|
|
403
|
+
3. Full v2 routing (hybrid tiers + real-time neurogenesis + LLM prune candidates).
|
|
404
|
+
|
|
405
|
+
Use a time-split replay.
|
|
406
|
+
|
|
407
|
+
Primary metrics:
|
|
408
|
+
|
|
409
|
+
- task success correction-adjusted
|
|
410
|
+
- number of user corrections per 100 turns
|
|
411
|
+
- mean activated-node precision/recall against known-good context
|
|
412
|
+
- context token footprint
|
|
413
|
+
- mean latency
|
|
414
|
+
- cost/turn and cost/session
|
|
415
|
+
|
|
416
|
+
Secondary metrics:
|
|
417
|
+
|
|
418
|
+
- edge churn rate
|
|
419
|
+
- node churn rate
|
|
420
|
+
- prune false-positive rate
|
|
421
|
+
- inhibitory recall (does harmful paths get blocked when contradicted)
|
|
422
|
+
|
|
423
|
+
## 9) PAPER FRAMING: WHAT THIS MEANS
|
|
424
|
+
|
|
425
|
+
This becomes a new claim, not just an optimization patch:
|
|
426
|
+
|
|
427
|
+
1. CrabPath becomes a learned activation policy network where a compact LLM controls a dynamic routing kernel.
|
|
428
|
+
2. The loop is no longer purely mechanical; it is a hybrid controller: mechanical core with learned micro-gating.
|
|
429
|
+
3. The novelty is in treating LLM routing as a low-cost, low-latency control signal and treating STDP as the slow, outcome-conditioned integrator of those micro-decisions.
|
|
430
|
+
4. This gives a stronger novelty over prior work:
|
|
431
|
+
- not just retrieval (GraphRAG),
|
|
432
|
+
- not just memory controller (MemGPT),
|
|
433
|
+
- not just planning graph (Think-on-Graph),
|
|
434
|
+
- but a policy graph with explicit inhibition, timing asymmetry, and online concept growth.
|
|
435
|
+
|
|
436
|
+
Potential paper title update:
|
|
437
|
+
|
|
438
|
+
- "LLM-Guided Synaptic Routing in Agent Memory Graphs"
|
|
439
|
+
- Subtitle: "A Hybrid Mechanical-LLM Activation Loop with Timing-Aware Outcome Integration"
|
|
440
|
+
|
|
441
|
+
Suggested reframe in abstract:
|
|
442
|
+
|
|
443
|
+
- "CrabPath v2 shifts from fixed weighted propagation to adaptive graph routing where routing preferences are inferred from context per step, then integrated into long-term edge dynamics via outcome-conditioned, timing-aware updates."
|