amalfa 0.0.0-reserved → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/.biomeignore +19 -0
  2. package/:memory: +0 -0
  3. package/:memory:-shm +0 -0
  4. package/:memory:-wal +0 -0
  5. package/LICENSE +21 -0
  6. package/README.md +343 -13
  7. package/README.old.md +112 -0
  8. package/agents.config.json +11 -0
  9. package/amalfa.config.example.ts +100 -0
  10. package/biome.json +49 -0
  11. package/bun.lock +371 -0
  12. package/docs/AGENT_PROTOCOLS.md +28 -0
  13. package/docs/ARCHITECTURAL_OVERVIEW.md +123 -0
  14. package/docs/BENTO_BOXING_DEPRECATION.md +281 -0
  15. package/docs/Bun-SQLite.html +464 -0
  16. package/docs/COMMIT_GUIDELINES.md +367 -0
  17. package/docs/DEVELOPER_ONBOARDING.md +36 -0
  18. package/docs/Graph and Vector Database Best Practices.md +214 -0
  19. package/docs/PERFORMANCE_BASELINES.md +88 -0
  20. package/docs/REPOSITORY_CLEANUP_SUMMARY.md +261 -0
  21. package/docs/edge-generation-methods.md +57 -0
  22. package/docs/elevator-pitch.md +118 -0
  23. package/docs/graph-and-vector-database-playbook.html +480 -0
  24. package/docs/hardened-sqlite.md +85 -0
  25. package/docs/headless-knowledge-management.md +79 -0
  26. package/docs/john-kaye-flux-prompt.md +46 -0
  27. package/docs/keyboard-shortcuts.md +80 -0
  28. package/docs/opinion-proceed-pattern.md +29 -0
  29. package/docs/polyvis-nodes-edges-schema.md +77 -0
  30. package/docs/protocols/lab-protocol.md +30 -0
  31. package/docs/reaction-iquest-loop-coder.md +46 -0
  32. package/docs/services.md +60 -0
  33. package/docs/sqlite-wal-readonly-trap.md +228 -0
  34. package/docs/strategy/css-architecture.md +40 -0
  35. package/docs/test-document-cycle.md +83 -0
  36. package/docs/test_lifecycle_E2E.md +4 -0
  37. package/docs/the-bicameral-graph.md +83 -0
  38. package/docs/user-guide.md +70 -0
  39. package/docs/vision-helper.md +53 -0
  40. package/drizzle/0000_minor_iron_fist.sql +19 -0
  41. package/drizzle/meta/0000_snapshot.json +139 -0
  42. package/drizzle/meta/_journal.json +13 -0
  43. package/example_usage.ts +39 -0
  44. package/experiment.sh +35 -0
  45. package/hello +2 -0
  46. package/index.html +52 -0
  47. package/knowledge/excalibur.md +12 -0
  48. package/package.json +60 -15
  49. package/plans/experience-graph-integration.md +60 -0
  50. package/prompts/gemini-king-mode-prompt.md +46 -0
  51. package/public/docs/MCP_TOOLS.md +372 -0
  52. package/schemas/README.md +20 -0
  53. package/schemas/cda.schema.json +84 -0
  54. package/schemas/conceptual-lexicon.schema.json +75 -0
  55. package/scratchpads/dummy-debrief-boxed.md +39 -0
  56. package/scratchpads/dummy-debrief.md +27 -0
  57. package/scratchpads/scratchpad-design.md +50 -0
  58. package/scratchpads/scratchpad-scrolling.md +20 -0
  59. package/scratchpads/scratchpad-toc-disappearance.md +23 -0
  60. package/scratchpads/scratchpad-toc.md +28 -0
  61. package/scratchpads/test_gardener.md +7 -0
  62. package/src/EnlightenedTriad.ts +146 -0
  63. package/src/JIT_Triad.ts +137 -0
  64. package/src/cli.ts +318 -0
  65. package/src/config/constants.ts +7 -0
  66. package/src/config/defaults.ts +81 -0
  67. package/src/core/BentoNormalizer.ts +113 -0
  68. package/src/core/EdgeWeaver.ts +145 -0
  69. package/src/core/FractureLogic.ts +22 -0
  70. package/src/core/Harvester.ts +73 -0
  71. package/src/core/LLMClient.ts +93 -0
  72. package/src/core/LouvainGate.ts +67 -0
  73. package/src/core/MarkdownMasker.ts +49 -0
  74. package/src/core/README.md +11 -0
  75. package/src/core/SemanticMatcher.ts +89 -0
  76. package/src/core/SemanticWeaver.ts +96 -0
  77. package/src/core/TagEngine.ts +56 -0
  78. package/src/core/TimelineWeaver.ts +61 -0
  79. package/src/core/VectorEngine.ts +232 -0
  80. package/src/daemon/index.ts +221 -0
  81. package/src/data/experience/test_doc_1.md +2 -0
  82. package/src/data/experience/test_doc_2.md +2 -0
  83. package/src/db/schema.ts +46 -0
  84. package/src/demo-triad.ts +45 -0
  85. package/src/gardeners/AutoTagger.ts +116 -0
  86. package/src/gardeners/BaseGardener.ts +55 -0
  87. package/src/llm/EnlightenedProvider.ts +95 -0
  88. package/src/mcp/README.md +6 -0
  89. package/src/mcp/index.ts +341 -0
  90. package/src/pipeline/AmalfaIngestor.ts +262 -0
  91. package/src/pipeline/HarvesterPipeline.ts +101 -0
  92. package/src/pipeline/Ingestor.ts +555 -0
  93. package/src/pipeline/README.md +7 -0
  94. package/src/pipeline/SemanticHarvester.ts +222 -0
  95. package/src/resonance/DatabaseFactory.ts +100 -0
  96. package/src/resonance/README.md +148 -0
  97. package/src/resonance/cli/README.md +7 -0
  98. package/src/resonance/cli/ingest.ts +41 -0
  99. package/src/resonance/cli/migrate.ts +54 -0
  100. package/src/resonance/config.ts +40 -0
  101. package/src/resonance/daemon.ts +236 -0
  102. package/src/resonance/db.ts +422 -0
  103. package/src/resonance/pipeline/README.md +7 -0
  104. package/src/resonance/pipeline/extract.ts +89 -0
  105. package/src/resonance/pipeline/transform_docs.ts +60 -0
  106. package/src/resonance/schema.ts +138 -0
  107. package/src/resonance/services/embedder.ts +131 -0
  108. package/src/resonance/services/simpleTokenizer.ts +119 -0
  109. package/src/resonance/services/stats.ts +327 -0
  110. package/src/resonance/services/tokenizer.ts +159 -0
  111. package/src/resonance/transform/cda.ts +393 -0
  112. package/src/resonance/types/enriched-cda.ts +112 -0
  113. package/src/services/README.md +56 -0
  114. package/src/services/llama.ts +59 -0
  115. package/src/services/llamauv.ts +56 -0
  116. package/src/services/olmo3.ts +58 -0
  117. package/src/services/phi.ts +52 -0
  118. package/src/types/artifact.ts +12 -0
  119. package/src/utils/EnvironmentVerifier.ts +67 -0
  120. package/src/utils/Logger.ts +21 -0
  121. package/src/utils/ServiceLifecycle.ts +207 -0
  122. package/src/utils/ZombieDefense.ts +244 -0
  123. package/src/utils/validator.ts +264 -0
  124. package/substack/substack-playbook-1.md +95 -0
  125. package/substack/substack-playbook-2.md +78 -0
  126. package/tasks/ui-investigation.md +26 -0
  127. package/test-db +0 -0
  128. package/test-db-shm +0 -0
  129. package/test-db-wal +0 -0
  130. package/tests/canary/verify_pinch_check.ts +44 -0
  131. package/tests/fixtures/ingest_test.md +12 -0
  132. package/tests/fixtures/ingest_test_boxed.md +13 -0
  133. package/tests/fixtures/safety_test.md +45 -0
  134. package/tests/fixtures/safety_test_boxed.md +49 -0
  135. package/tests/fixtures/tagged_output.md +49 -0
  136. package/tests/fixtures/tagged_test.md +49 -0
  137. package/tests/mcp-server-settings.json +8 -0
  138. package/tsconfig.json +46 -0
  139. package/verify-embedder.ts +54 -0
@@ -0,0 +1,88 @@
1
+ # Performance Baselines & Benchmarks
2
+
3
+ > **Last Updated:** 2025-12-16
4
+ > **Device:** Apple Silicon (M-series)
5
+ > **Node/Bun:** Bun v1.3.4+
6
+
7
+ ## 1. Memory Footprint (Daemon)
8
+ The Daemon run-time consists of the `Core Kernel`, `VectorEngine (WASM)`, and `Data Graph`.
9
+
10
+ | Component | RAM Usage (RSS) | Scaling Nature | Notes |
11
+ | :--- | :--- | :--- | :--- |
12
+ | **AI Model (fastembed)** | **~252 MB** | 🛑 **Fixed** | Unavoidable one-time cost for local embedding. |
13
+ | **Daemon Runtime** | **~60 MB** | 🛑 **Fixed** | Bun runtime + SQLite + Kernel overhead. |
14
+ | **Graph Structure** | **~14 kB / node** | 🟢 **Variable** | Graphology in-memory graph representation. |
15
+ | **Raw Data** | **~9 kB / node** | 🟢 **Variable** | Text content and metadata objects. |
16
+ | **Vectors** | **~2 kB / node** | 🟢 **Variable** | Float32Arrays and normalized buffers. |
17
+
18
+ ### Total Daemon Footprint
19
+ - **Empty State**: ~310 MB
20
+ - **Current State (429 nodes)**: ~320 MB
21
+ - **PROJECTED (10k nodes)**: ~560 MB
22
+
23
+ ## 2. Storage Footprint (Disk)
24
+ Benchmarks from `public/resonance.db`.
25
+
26
+ | Metric | Size / Count | Notes |
27
+ | :--- | :--- | :--- |
28
+ | **DB File Size** | 6.09 MB | SQLite WAL mode enabled. |
29
+ | **Vector Data** | 1.42 MB | Blob storage. |
30
+ | **Text Content** | 1.00 MB | Raw text in `content` column. |
31
+ | **Node Count** | 429 | Including Experience & Persona domains. |
32
+
33
+ ## 3. How to Measure
34
+ ### Memory
35
+ 1. Ensure `graphology` is installed (temporarily): `bun add graphology`.
36
+ 2. Uncomment the graphology section in `scripts/profile_memory.ts`.
37
+ 3. Run: `bun run scripts/profile_memory.ts`.
38
+ 4. Revert: `bun remove graphology`.
39
+
40
+ ### Disk / DB Stats
41
+ 1. Run: `bun run scripts/assess_db_weight.ts`.
42
+
43
+ ## 4. Ingestion Baselines (Comparison)
44
+
45
+ > **Baseline Source:** `_misc/ingestion-baseline.json` (2025-12-11)
46
+
47
+ | Metric | Baseline (Dec 11) | Current (Dec 16) | Delta | Notes |
48
+ | :--- | :--- | :--- | :--- | :--- |
49
+ | **Persona Nodes** | 185 | 185 | **0** | Stable. Core lexicon. |
50
+ | **Experience Nodes** | 128 | 244 | **+116** | Significant growth (new sessions/debriefs). |
51
+ | **Total Nodes** | 313 | 429 | **+116** | ~37% Growth. |
52
+ | **Total Edges** | 498 | 631 | **+133** | ~27% Growth. |
53
+ | **Total Vectors** | 289 | 242 | **-47** | 📉 **Intentional Optimization** |
54
+
55
+ ### Insights
56
+ - **Vector Efficiency**: Despite node growth (+37%), vector count dropped (-16%). This reflects the **"Narrative Vector Strategy"**, where only high-value content (Concepts, Playbooks) is vectorized, while structural nodes (Logs, raw fragments) are skipped.
57
+ - **Graph Connectivity**: Edge growth (+27%) tracks closely with node growth, maintaining decent density.
58
+
59
+ ## 5. Speed Benchmarks (Dec 16)
60
+
61
+ > **Environment**: Apple Silicon (M-series) | Bun v1.3.4
62
+
63
+ | Operation | Latency | Notes |
64
+ | :--- | :--- | :--- |
65
+ | **Model Load (Cold)** | **~192 ms** | One-time initialization cost. |
66
+ | **Vector Search** | **~71 ms** | Avg of 10 runs (Top-5 search). |
67
+ | **SQL Insert (Raw)** | **~0.001 ms/row** | Batch prepared statement (Buffered). |
68
+ | **SQL Insert (ORM)** | **~0.012 ms/row** | Drizzle ORM overhead (~12x slower than raw). |
69
+
70
+ ## 6. Vector Inclusion Rules (Audit)
71
+
72
+ > **Policy:** "Everything in the folders noted in the settings file should be in the vector store."
73
+
74
+ **Audit Results (Dec 16):**
75
+
76
+ | Domain | Source | Status | Notes |
77
+ | :--- | :--- | :--- | :--- |
78
+ | **Experience** | `debriefs/` | ✅ **100% Vectorized** | Narrative content. |
79
+ | **Experience** | `playbooks/` | ✅ **100% Vectorized** | Procedural knowledge. |
80
+ | **Experience** | `briefs/` | ✅ **100% Vectorized** | Context setting. |
81
+ | **Experience** | `docs/` | ✅ **100% Vectorized** | Project documentation. |
82
+ | **Persona** | `lexicon.json` | ⚪ **Excluded** | *Optimization*: Concepts matched via Keywords/Graph. |
83
+ | **Persona** | `cda.json` | ⚪ **Excluded** | *Optimization*: Directives matched via Keywords/Graph. |
84
+ | **Experience** | `test-artifacts` | ⚪ **Excluded** | *Transient*: `test-doc-1` & `test-doc-2` (from test scripts). |
85
+
86
+ **Conclusion:**
87
+ The logic complies with the rule. All folder-based narrative content is vectorized. File-based structured data (Persona) is excluded to save memory, as it is efficiently retrievable via exact graph traversal. The only un-vectorized `document` nodes are confirmed test artifacts.
88
+
@@ -0,0 +1,261 @@
1
+ # Repository Cleanup Summary
2
+
3
+ **Date:** 2026-01-05
4
+ **Branch:** alpine-refactor
5
+ **Issue:** Repository was tracking 966 files with .git size of 193 MB
6
+
7
+ ---
8
+
9
+ ## Problem Identified
10
+
11
+ The polyvis repository was accumulating artifacts that should not be versioned:
12
+
13
+ ### Artifacts Found (40+ MB total)
14
+
15
+ 1. **Database Files** (~20 MB)
16
+ - `_misc/bento_ledger.sqlite`
17
+ - `bento_ledger.sqlite`, `.sqlite-wal`, `.sqlite-shm`
18
+ - `canary-persistence.db`
19
+ - `test-graph-integrity.db-wal`
20
+ - `public/resonance.db.pre-hollow-node` (5.9 MB)
21
+
22
+ 2. **Database Backups** (~9 MB)
23
+ - `backups/db/resonance.20251214140633.db` (5.9 MB)
24
+ - `backups/db/benchmarks/resonance.db.pre-benchmark-20251217-184046` (8.5 MB)
25
+ - `backups/db/benchmarks/resonance.db.corrupted-20251217-201947`
26
+
27
+ 3. **Large PDFs** (~11 MB)
28
+ - `experiments/enlightenment/representational-engineering.pdf` (10 MB)
29
+ - `docs/2310.08560v2.pdf` (648 KB)
30
+
31
+ 4. **Built Bundles**
32
+ - `experiments/data-star-dashboard/dist/datastar.bundle.js` (80 KB)
33
+
34
+ ---
35
+
36
+ ## Actions Taken
37
+
38
+ ### 1. Updated .gitignore
39
+
40
+ Added comprehensive patterns to prevent future commits:
41
+
42
+ ```gitignore
43
+ # Database Files (Generated Artifacts - Never Commit)
44
+ *.db
45
+ *.db-wal
46
+ *.db-shm
47
+ *.sqlite
48
+ *.sqlite-wal
49
+ *.sqlite-shm
50
+
51
+ # Database Backups
52
+ backups/db/
53
+
54
+ # Built/Bundled JavaScript
55
+ **/dist/*.bundle.js
56
+ **/dist/*.min.js
57
+ experiments/**/dist/
58
+
59
+ # Test Artifacts
60
+ test-*.db
61
+ test-*.db-wal
62
+ canary-*.db
63
+
64
+ # Large Research Papers
65
+ *.pdf
66
+ !docs/architecture-diagrams/*.pdf
67
+ ```
68
+
69
+ **Commit:** `0c3015e`
70
+
71
+ ### 2. Created Documentation
72
+
73
+ **docs/COMMIT_GUIDELINES.md**
74
+ - Comprehensive guide on what to/not to commit
75
+ - Quick reference checklist
76
+ - Edge cases and troubleshooting
77
+ - Philosophy: "Repository should contain minimum necessary to build and understand"
78
+
79
+ **Key principles:**
80
+ - ✅ Source code, configs, documentation, small assets
81
+ - ❌ Generated artifacts, large binaries, backups, secrets
82
+
83
+ **Commit:** `0c3015e`
84
+
85
+ ### 3. Created Cleanup Script
86
+
87
+ **scripts/cleanup-repo-artifacts.sh**
88
+ - Interactive script to remove artifacts from git history
89
+ - Safety checks (prevents running on main branch)
90
+ - Creates backup branch before cleanup
91
+ - Uses git-filter-repo (preferred) or filter-branch (fallback)
92
+ - Aggressive garbage collection
93
+
94
+ **Usage:**
95
+ ```bash
96
+ ./scripts/cleanup-repo-artifacts.sh
97
+ ```
98
+
99
+ **Commit:** `0c3015e`
100
+
101
+ ### 4. Removed Artifacts from Index
102
+
103
+ Removed 14 files from git tracking (not history):
104
+
105
+ ```bash
106
+ git rm --cached -r backups/db/
107
+ git rm --cached _misc/bento_ledger.sqlite
108
+ git rm --cached bento_ledger.sqlite*
109
+ git rm --cached canary-persistence.db
110
+ git rm --cached test-graph-integrity.db-wal
111
+ git rm --cached public/resonance.db.pre-hollow-node
112
+ git rm --cached experiments/enlightenment/representational-engineering.pdf
113
+ git rm --cached docs/2310.08560v2.pdf
114
+ git rm --cached experiments/data-star-dashboard/dist/datastar.bundle.js
115
+ ```
116
+
117
+ **Result:** 966 → 954 tracked files
118
+
119
+ **Commit:** `0c3015e`
120
+
121
+ ---
122
+
123
+ ## Current State
124
+
125
+ ### Metrics (Post-Cleanup)
126
+
127
+ - **Files tracked:** 954 (down from 966)
128
+ - **Repository size:** 193 MB (unchanged - files remain in history)
129
+ - **Untracked files:** Database files now properly ignored
130
+
131
+ ### Why .git Size Unchanged?
132
+
133
+ The removed files are still in git history. To fully reclaim space, you need to:
134
+
135
+ 1. Run the cleanup script: `./scripts/cleanup-repo-artifacts.sh`
136
+ 2. Force push to rewrite remote history
137
+ 3. Coordinate with team (they'll need to re-clone or reset)
138
+
139
+ **⚠️ Important:** History rewriting is disruptive. Only do this if:
140
+ - Working on a feature branch (✅ we're on alpine-refactor)
141
+ - Team is coordinated
142
+ - No open PRs depend on current history
143
+
144
+ ---
145
+
146
+ ## Benefits Achieved
147
+
148
+ ### Immediate Benefits
149
+
150
+ 1. **Prevention:** `.gitignore` now prevents committing artifacts
151
+ 2. **Documentation:** Clear guidelines on what to commit
152
+ 3. **Tools:** Script ready for full history cleanup
153
+ 4. **Current commits:** New work won't add artifacts
154
+
155
+ ### Potential Future Benefits (After History Cleanup)
156
+
157
+ 1. **Faster operations:** Clone, fetch, push will be quicker
158
+ 2. **Smaller repo:** ~40-50 MB reduction estimated
159
+ 3. **Cleaner history:** Only source code and docs versioned
160
+
161
+ ---
162
+
163
+ ## Next Steps (Optional)
164
+
165
+ ### Full History Cleanup
166
+
167
+ If you want to reclaim the 40+ MB from history:
168
+
169
+ ```bash
170
+ # 1. Ensure you're on alpine-refactor
171
+ git checkout alpine-refactor
172
+
173
+ # 2. Run the cleanup script
174
+ ./scripts/cleanup-repo-artifacts.sh
175
+
176
+ # 3. Force push (after verification)
177
+ git push --force origin alpine-refactor
178
+
179
+ # 4. Notify team members to reset their branches
180
+ ```
181
+
182
+ **Team coordination required!**
183
+
184
+ ### Maintenance
185
+
186
+ **Going forward:**
187
+
188
+ 1. Review `.gitignore` patterns regularly
189
+ 2. Check commit size before pushing (see COMMIT_GUIDELINES.md)
190
+ 3. Run `git ls-files | grep -E '\.(db|sqlite|pdf)$'` periodically
191
+ 4. Educate contributors about artifact policies
192
+
193
+ ---
194
+
195
+ ## Related Files
196
+
197
+ - **Guidelines:** `docs/COMMIT_GUIDELINES.md`
198
+ - **Cleanup script:** `scripts/cleanup-repo-artifacts.sh`
199
+ - **Gitignore:** `.gitignore`
200
+ - **Beads playbooks:** `playbooks/beads-{agent,human}-playbook.md`
201
+
202
+ ---
203
+
204
+ ## Philosophy
205
+
206
+ **Core principle:** *Git is for source code, not generated artifacts.*
207
+
208
+ **Rationale:**
209
+ - Database files are generated from JSON (the source of truth)
210
+ - Built bundles are generated from TypeScript source
211
+ - Research papers should be linked, not embedded
212
+ - Backups belong in backup systems, not version control
213
+
214
+ **Goal:** Keep the repository lean, fast, and comprehensible.
215
+
216
+ ---
217
+
218
+ ## Before/After Comparison
219
+
220
+ ### Before
221
+
222
+ ```
223
+ Files tracked: 966
224
+ .git size: 193 MB
225
+ Issues: Databases, PDFs, backups committed
226
+ Prevention: Weak .gitignore patterns
227
+ Documentation: None
228
+ ```
229
+
230
+ ### After (Current)
231
+
232
+ ```
233
+ Files tracked: 954
234
+ .git size: 193 MB (history unchanged)
235
+ Issues: Future commits prevented
236
+ Prevention: Comprehensive .gitignore
237
+ Documentation: COMMIT_GUIDELINES.md
238
+ Tools: cleanup-repo-artifacts.sh
239
+ ```
240
+
241
+ ### After (If History Cleaned)
242
+
243
+ ```
244
+ Files tracked: 954
245
+ .git size: ~140 MB (estimated)
246
+ Issues: Resolved
247
+ Prevention: Comprehensive .gitignore
248
+ Documentation: COMMIT_GUIDELINES.md
249
+ Tools: cleanup-repo-artifacts.sh
250
+ ```
251
+
252
+ ---
253
+
254
+ ## Commits
255
+
256
+ - `0c3015e` - Remove artifacts and add commit guidelines
257
+ - `aee1d2a` - Add Beads playbooks (includes initial .resonance/cache cleanup)
258
+
259
+ ---
260
+
261
+ **Conclusion:** Immediate improvements achieved. Full history cleanup optional but recommended for long-term repository health.
@@ -0,0 +1,57 @@
1
+ # Edge Generation Methods
2
+
3
+ ## Overview
4
+
5
+ ResonanceDB uses multiple methods to generate edges between nodes. This document tracks each method and its contribution to the knowledge graph.
6
+
7
+ ---
8
+
9
+ ## Methods
10
+
11
+ | # | Method | Edge Type | Description |
12
+ |---|--------|-----------|-------------|
13
+ | 1 | **ConceptualLexicon** | MENTIONS | Concept → Term relationships from structured JSON |
14
+ | 2 | **TimelineWeaver** | SUCCEEDS | Chronological debrief chain |
15
+ | 3 | **SemanticHarvester** | IS_A, IMPLEMENTS | ML-extracted from markdown (SetFit + Llama) |
16
+
17
+ ---
18
+
19
+ ## Current Edge Distribution
20
+
21
+ *As of 2025-12-28*
22
+
23
+ | Source | Type | Count | % |
24
+ |--------|------|-------|---|
25
+ | ConceptualLexicon | MENTIONS | 386 | 77% |
26
+ | TimelineWeaver | SUCCEEDS | 110 | 22% |
27
+ | SemanticHarvester | CONCEPT, EXEMPLIFIES | 3 | <1% |
28
+ | **Total** | | **499** | 100% |
29
+
30
+ ---
31
+
32
+ ## Implementation Details
33
+
34
+ ### ConceptualLexicon Ingestor
35
+ - **Source:** `src/pipeline/Ingestor.ts`
36
+ - **Input:** `experiments/json2md/*.json`
37
+ - **Generates:** Bulk MENTIONS edges from structured concept definitions
38
+
39
+ ### TimelineWeaver
40
+ - **Source:** `src/pipeline/Ingestor.ts` (TimelineWeaver class)
41
+ - **Logic:** Links debriefs by date via SUCCEEDS edges
42
+ - **Automatic:** Runs on every ingestion
43
+
44
+ ### SemanticHarvester
45
+ - **Source:** `ingest/harvester.py`, `src/pipeline/SemanticHarvester.ts`
46
+ - **Logic:** SetFit classification → Llama.cpp extraction (or regex fallback)
47
+ - **Manual:** Run via `scripts/run-semantic-harvest.ts`
48
+
49
+ ---
50
+
51
+ ## Future Methods
52
+
53
+ Potential edge generators not yet implemented:
54
+
55
+ - **TagWeaver** — Link documents sharing common tags
56
+ - **VectorSimilarity** — Create edges between semantically similar nodes
57
+ - **ExplicitLinks** — Parse `[[wikilinks]]` from markdown
@@ -0,0 +1,118 @@
1
+ This is a great scenario because it describes the **"Drop & Watch"** workflow, which effectively turns your file system into a reactive database.
2
+
3
+ Here is my opinion on why this specific "Two Bucket" scenario is the ultimate stress test—and value generator—for Polyvis.
4
+
5
+ ### 1. The "Chemical Reaction" (Not Just Addition)
6
+ Most tools treat a new folder like a separate silo. You import it, and it sits there.
7
+ In Polyvis (with the Weavers we discussed), adding `Bucket B` to `Bucket A` isn't an addition; it's a **chemical reaction**.
8
+
9
+ * **The Catalyst:** The `DesireLineWeaver` and `SemanticWeaver`.
10
+ * **The Reaction:** Immediately upon ingestion, the graph will try to "stitch" the new bucket to the old one.
11
+ * *Did the new PDF mention "Bento Box"?* **SNAP.** A "Desire Line" edge is formed.
12
+ * *Is the new text semantically similar to your "Architecture" briefs?* **SNAP.** A semantic edge is formed.
13
+
14
+ **The Value:** You aren't looking for the new documents. You are looking for **where the new documents touch the old documents.** The "Surface Area of Contact" is where the insight lives.
15
+
16
+ ### 2. The "Wrong Bucket" Detector (Isolation Analysis)
17
+ You made a profound point: *"unless I just picked the wrong document bucket... which is also valuable info."*
18
+
19
+ In a standard search engine, if you search and find nothing, you feel like you failed.
20
+ In a Graph, **Disconnection is Data**.
21
+
22
+ If you ingest `Bucket B` and it forms a perfect, floating island with **zero edges** connecting to `Bucket A`:
23
+ * **The Insight:** "This topic is completely orthogonal to our current work."
24
+ * **The Action:** You can safely archive it or spin it off. You have mathematically proven it is irrelevant to the current context.
25
+
26
+ ### 3. The "Bridge Node" (The MVP)
27
+ The most valuable nodes in this scenario are the **Bridge Nodes**.
28
+ These are the few specific documents in `Bucket B` that link back to `Bucket A`.
29
+
30
+ * *Example:* You ingest a folder of "Competitor Research" (Bucket B). Most of it is noise. But one file, `competitor-pricing.md`, links strongly to your internal `pricing-strategy.md` (Bucket A).
31
+ * *Result:* The graph highlights this Bridge. You can ignore the other 99 files and focus entirely on that one connection.
32
+
33
+ ### 4. Operational Note (The Friction Point)
34
+ The only "gotcha" in this workflow currently is `polyvis.settings.json`.
35
+ * **Current Reality:** You have to manually add the new folder path to the `sources` array in settings.
36
+ * **The Riff:** We should eventually support **"Hot Folders"** or a `watch_list` glob pattern (e.g., `_incoming/*`).
37
+ * You drop a folder into `_incoming/`.
38
+ * The Daemon wakes up, ingests it as a temporary collection.
39
+ * The Graph reports: *"Ingested 'Project X'. Found 12 connections to existing knowledge. Keep or Discard?"*
40
+
41
+ ### Summary
42
+ This workflow shifts the cognitive load from **Classification** (you organizing files) to **Recognition** (you seeing patterns). You dump the bucket; the machine shakes the sieve; you pick up the gold nuggets that remain.
43
+
44
+ ---
45
+
46
+ This is a powerful user story because it moves the value proposition from "tech" (search/vectors) to "leverage" (winning arguments).
47
+
48
+ You have described **Asymmetric Insight**. You won because you could see the hidden structure of reality (the timeline) that the other party couldn't.
49
+
50
+ ### The Metaphor: "The Evidence Board" (or "The Loom")
51
+
52
+ Since we are already using `TimelineWeaver`, `EdgeWeaver`, and `SemanticWeaver`, the code is begging for a **Weaving** metaphor. But given your solicitor story, I think the stronger metaphor is **"The Forensic Darkroom."**
53
+
54
+ * **The Raw State (The Film):** Your bucket of random documents, emails, and notes. It contains the truth, but it is invisible (latent).
55
+ * **The Ingestion (The Developer):** You dip the film into the chemical bath (Polyvis).
56
+ * **The Reaction (The Weavers):**
57
+ * **TimelineWeaver** creates the *sequence* (Time).
58
+ * **StructureWeaver** creates the *location* (Space).
59
+ * **SemanticWeaver** connects the *dots* (The Red String).
60
+ * **The Result (The Print):** Suddenly, a sharp, high-contrast image appears. You see the "gun," the "motive," and the "alibi" clearly.
61
+
62
+ ### The Pivot: "Cognitive Armor"
63
+ Your story highlights that the ultimate output of this system isn't a "graph"—it is **Confidence**.
64
+
65
+ When you walked into that solicitor's office, you were wearing **Cognitive Armor**. You weren't asking, "Do I have a case?" You were saying, "Here is the case; execute it."
66
+
67
+ **Opinion:**
68
+ This defines the **"Why"** of Polyvis better than any technical feature.
69
+ * **Most tools** (Obsidian, Notion) are for *storing* thoughts.
70
+ * **Polyvis** is for *auditing* reality.
71
+
72
+ It is a **Due Diligence Engine**. It is designed for the moment you need to be **100% right**.
73
+ * *Did we promise that feature in the Brief?* (Check the timeline).
74
+ * *Did we actually build it in the Debrief?* (Check the link).
75
+ * *Is it documented in the Playbook?* (Check the gap).
76
+
77
+ If we frame Polyvis as **"The machine that builds your case,"** it becomes indispensable for Architects, Leads, and Strategists—anyone who needs to be the most informed person in the room.
78
+
79
+ ---
80
+
81
+ This is the **"Trojan Horse of Competence."**
82
+
83
+ You have just described the perfect treaty between **The Business** (who needs answers) and **IT** (who controls the pipes).
84
+
85
+ Here is my opinion on why this specific workflow—the "Just give me a Markdown Repo" request—is a killer organizational hack.
86
+
87
+ ### 1. The "Demilitarized Zone" (DMZ)
88
+ Usually, asking IT for "AI help" triggers a 6-month procurement review, security audit, and a $50k pilot program for some cloud tool that creates more lock-in.
89
+
90
+ But asking IT to *"dump text files into a secure private Repo"*?
91
+ * **IT loves it:** It’s standard, secure, audit-compliant, and uses tools they already own (GitHub/GitLab). They don't have to "buy" anything.
92
+ * **You love it:** You get the raw material (data) without the "help" (bloated enterprise UI).
93
+ * **The Magic:** Markdown is the **DMZ**. It is readable by their machines and readable by your Polyvis. It breaks the dependency chain.
94
+
95
+ ### 2. The "Cryogenic Freeze" (Zero Context Decay)
96
+ Your point about *"later on the case blows up again"* is the strongest operational argument.
97
+
98
+ In traditional tools (Teams, Slack, Shared Drives), context has a half-life of about two weeks. If you leave a project and come back 6 months later, the "search" is broken, the links are dead, and you have forgotten the mental model.
99
+
100
+ With Polyvis + Git:
101
+ * **The Repo is the Truth:** It hasn't moved.
102
+ * **The Graph is the Context:** You boot up Polyvis, it re-ingests the updated repo, and—crucially—**the structures you built last time (Folders, Tags) remain valid.**
103
+ * **The Delta:** When you ingest the *new* docs, they immediately light up next to the *old* docs. You don't have to "re-learn" the case; you just see the new growth on the old tree.
104
+
105
+ ### 3. The "Answer Artifact"
106
+ You mentioned: *"put your results in the ANSWERS folder."*
107
+ This is subtle but vital. By checking your *deductions* (the Answer) back into the repo alongside the *evidence* (the Docs), you are creating a **Composite Memory**.
108
+
109
+ The next person who picks up the case doesn't just get the raw files; they get the files **plus your logic.**
110
+
111
+ ### Summary Opinion
112
+ You have defined **"Headless Knowledge Management."**
113
+
114
+ * **Headless:** The data lives in a dumb, sturdy repository (Git).
115
+ * **Local Mind:** The intelligence lives in a temporary, disposable, high-speed engine (Polyvis) on your laptop.
116
+
117
+ This is the only architecture that survives corporate chaos. IT can change their servers, you can change your laptop, but the **Repo + Graph** remains the portable soul of the project.
118
+