moflo 4.8.21 → 4.8.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/.claude/agents/browser/browser-agent.yaml +182 -182
  2. package/.claude/agents/core/coder.md +265 -265
  3. package/.claude/agents/core/planner.md +167 -167
  4. package/.claude/agents/core/researcher.md +189 -189
  5. package/.claude/agents/core/reviewer.md +325 -325
  6. package/.claude/agents/core/tester.md +318 -318
  7. package/.claude/agents/database-specialist.yaml +21 -21
  8. package/.claude/agents/dual-mode/codex-coordinator.md +224 -224
  9. package/.claude/agents/dual-mode/codex-worker.md +211 -211
  10. package/.claude/agents/dual-mode/dual-orchestrator.md +291 -291
  11. package/.claude/agents/github/code-review-swarm.md +537 -537
  12. package/.claude/agents/github/github-modes.md +172 -172
  13. package/.claude/agents/github/issue-tracker.md +318 -318
  14. package/.claude/agents/github/multi-repo-swarm.md +552 -552
  15. package/.claude/agents/github/pr-manager.md +190 -190
  16. package/.claude/agents/github/project-board-sync.md +508 -508
  17. package/.claude/agents/github/release-manager.md +366 -366
  18. package/.claude/agents/github/release-swarm.md +582 -582
  19. package/.claude/agents/github/repo-architect.md +397 -397
  20. package/.claude/agents/github/swarm-issue.md +572 -572
  21. package/.claude/agents/github/swarm-pr.md +427 -427
  22. package/.claude/agents/github/sync-coordinator.md +451 -451
  23. package/.claude/agents/github/workflow-automation.md +634 -634
  24. package/.claude/agents/goal/code-goal-planner.md +445 -445
  25. package/.claude/agents/hive-mind/collective-intelligence-coordinator.md +129 -129
  26. package/.claude/agents/hive-mind/queen-coordinator.md +202 -202
  27. package/.claude/agents/hive-mind/scout-explorer.md +241 -241
  28. package/.claude/agents/hive-mind/swarm-memory-manager.md +192 -192
  29. package/.claude/agents/hive-mind/worker-specialist.md +216 -216
  30. package/.claude/agents/index.yaml +17 -17
  31. package/.claude/agents/neural/safla-neural.md +73 -73
  32. package/.claude/agents/project-coordinator.yaml +15 -15
  33. package/.claude/agents/python-specialist.yaml +21 -21
  34. package/.claude/agents/reasoning/goal-planner.md +72 -72
  35. package/.claude/agents/security-auditor.yaml +20 -20
  36. package/.claude/agents/swarm/adaptive-coordinator.md +395 -395
  37. package/.claude/agents/swarm/hierarchical-coordinator.md +326 -326
  38. package/.claude/agents/swarm/mesh-coordinator.md +391 -391
  39. package/.claude/agents/templates/migration-plan.md +745 -745
  40. package/.claude/agents/typescript-specialist.yaml +21 -21
  41. package/.claude/checkpoints/1767754460.json +8 -8
  42. package/.claude/commands/agents/agent-spawning.md +28 -28
  43. package/.claude/commands/github/github-modes.md +146 -146
  44. package/.claude/commands/github/github-swarm.md +121 -121
  45. package/.claude/commands/github/issue-tracker.md +291 -291
  46. package/.claude/commands/github/pr-manager.md +169 -169
  47. package/.claude/commands/github/release-manager.md +337 -337
  48. package/.claude/commands/github/repo-architect.md +366 -366
  49. package/.claude/commands/github/sync-coordinator.md +300 -300
  50. package/.claude/commands/memory/neural.md +47 -47
  51. package/.claude/commands/sparc/analyzer.md +51 -51
  52. package/.claude/commands/sparc/architect.md +53 -53
  53. package/.claude/commands/sparc/ask.md +97 -97
  54. package/.claude/commands/sparc/batch-executor.md +54 -54
  55. package/.claude/commands/sparc/code.md +89 -89
  56. package/.claude/commands/sparc/coder.md +54 -54
  57. package/.claude/commands/sparc/debug.md +83 -83
  58. package/.claude/commands/sparc/debugger.md +54 -54
  59. package/.claude/commands/sparc/designer.md +53 -53
  60. package/.claude/commands/sparc/devops.md +109 -109
  61. package/.claude/commands/sparc/docs-writer.md +80 -80
  62. package/.claude/commands/sparc/documenter.md +54 -54
  63. package/.claude/commands/sparc/innovator.md +54 -54
  64. package/.claude/commands/sparc/integration.md +83 -83
  65. package/.claude/commands/sparc/mcp.md +117 -117
  66. package/.claude/commands/sparc/memory-manager.md +54 -54
  67. package/.claude/commands/sparc/optimizer.md +54 -54
  68. package/.claude/commands/sparc/orchestrator.md +131 -131
  69. package/.claude/commands/sparc/post-deployment-monitoring-mode.md +83 -83
  70. package/.claude/commands/sparc/refinement-optimization-mode.md +83 -83
  71. package/.claude/commands/sparc/researcher.md +54 -54
  72. package/.claude/commands/sparc/reviewer.md +54 -54
  73. package/.claude/commands/sparc/security-review.md +80 -80
  74. package/.claude/commands/sparc/sparc-modes.md +174 -174
  75. package/.claude/commands/sparc/sparc.md +111 -111
  76. package/.claude/commands/sparc/spec-pseudocode.md +80 -80
  77. package/.claude/commands/sparc/supabase-admin.md +348 -348
  78. package/.claude/commands/sparc/swarm-coordinator.md +54 -54
  79. package/.claude/commands/sparc/tdd.md +54 -54
  80. package/.claude/commands/sparc/tester.md +54 -54
  81. package/.claude/commands/sparc/tutorial.md +79 -79
  82. package/.claude/commands/sparc/workflow-manager.md +54 -54
  83. package/.claude/commands/sparc.md +166 -166
  84. package/.claude/commands/swarm/analysis.md +95 -95
  85. package/.claude/commands/swarm/development.md +96 -96
  86. package/.claude/commands/swarm/examples.md +168 -168
  87. package/.claude/commands/swarm/maintenance.md +102 -102
  88. package/.claude/commands/swarm/optimization.md +117 -117
  89. package/.claude/commands/swarm/research.md +136 -136
  90. package/.claude/commands/swarm/testing.md +131 -131
  91. package/.claude/commands/workflows/development.md +77 -77
  92. package/.claude/commands/workflows/research.md +62 -62
  93. package/.claude/guidance/moflo-bootstrap.md +126 -126
  94. package/.claude/guidance/shipped/agent-bootstrap.md +126 -126
  95. package/.claude/guidance/shipped/guidance-memory-strategy.md +262 -262
  96. package/.claude/guidance/shipped/memory-strategy.md +204 -204
  97. package/.claude/guidance/shipped/moflo.md +668 -653
  98. package/.claude/guidance/shipped/task-swarm-integration.md +441 -441
  99. package/.claude/helpers/intelligence.cjs +207 -207
  100. package/.claude/helpers/statusline.cjs +851 -851
  101. package/.claude/settings.local.json +18 -0
  102. package/.claude/skills/fl/SKILL.md +583 -583
  103. package/.claude/skills/flo/SKILL.md +583 -583
  104. package/.claude/skills/github-code-review/SKILL.md +1140 -1140
  105. package/.claude/skills/github-multi-repo/SKILL.md +874 -874
  106. package/.claude/skills/github-project-management/SKILL.md +1277 -1277
  107. package/.claude/skills/github-release-management/SKILL.md +1081 -1081
  108. package/.claude/skills/github-workflow-automation/SKILL.md +1065 -1065
  109. package/.claude/skills/hive-mind-advanced/SKILL.md +712 -712
  110. package/.claude/skills/hooks-automation/SKILL.md +1201 -1201
  111. package/.claude/skills/performance-analysis/SKILL.md +563 -563
  112. package/.claude/skills/sparc-methodology/SKILL.md +1115 -1115
  113. package/.claude/skills/swarm-advanced/SKILL.md +973 -973
  114. package/.claude/workflow-state.json +4 -4
  115. package/LICENSE +21 -21
  116. package/README.md +698 -685
  117. package/bin/cli.js +0 -0
  118. package/bin/gate-hook.mjs +50 -50
  119. package/bin/gate.cjs +138 -138
  120. package/bin/generate-code-map.mjs +775 -775
  121. package/bin/hook-handler.cjs +83 -83
  122. package/bin/hooks.mjs +656 -656
  123. package/bin/index-guidance.mjs +892 -892
  124. package/bin/index-tests.mjs +709 -709
  125. package/bin/lib/process-manager.mjs +243 -243
  126. package/bin/lib/registry-cleanup.cjs +41 -41
  127. package/bin/prompt-hook.mjs +72 -72
  128. package/bin/semantic-search.mjs +472 -472
  129. package/bin/session-start-launcher.mjs +238 -238
  130. package/bin/setup-project.mjs +250 -250
  131. package/package.json +123 -123
  132. package/src/@claude-flow/cli/README.md +452 -452
  133. package/src/@claude-flow/cli/bin/cli.js +180 -180
  134. package/src/@claude-flow/cli/bin/preinstall.cjs +2 -2
  135. package/src/@claude-flow/cli/dist/src/commands/completions.js +409 -409
  136. package/src/@claude-flow/cli/dist/src/commands/doctor.js +18 -2
  137. package/src/@claude-flow/cli/dist/src/commands/embeddings.js +25 -25
  138. package/src/@claude-flow/cli/dist/src/commands/github.js +61 -61
  139. package/src/@claude-flow/cli/dist/src/commands/hive-mind.js +90 -90
  140. package/src/@claude-flow/cli/dist/src/commands/hooks.js +9 -9
  141. package/src/@claude-flow/cli/dist/src/commands/init.js +3 -8
  142. package/src/@claude-flow/cli/dist/src/commands/ruvector/import.js +14 -14
  143. package/src/@claude-flow/cli/dist/src/commands/ruvector/setup.js +624 -624
  144. package/src/@claude-flow/cli/dist/src/config/moflo-config.d.ts +3 -0
  145. package/src/@claude-flow/cli/dist/src/config/moflo-config.js +101 -91
  146. package/src/@claude-flow/cli/dist/src/index.d.ts +5 -0
  147. package/src/@claude-flow/cli/dist/src/index.js +44 -0
  148. package/src/@claude-flow/cli/dist/src/init/claudemd-generator.d.ts +29 -29
  149. package/src/@claude-flow/cli/dist/src/init/claudemd-generator.js +43 -43
  150. package/src/@claude-flow/cli/dist/src/init/executor.js +453 -453
  151. package/src/@claude-flow/cli/dist/src/init/helpers-generator.js +482 -482
  152. package/src/@claude-flow/cli/dist/src/init/moflo-init.d.ts +30 -30
  153. package/src/@claude-flow/cli/dist/src/init/moflo-init.js +140 -140
  154. package/src/@claude-flow/cli/dist/src/init/statusline-generator.js +876 -876
  155. package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +371 -371
  156. package/src/@claude-flow/cli/dist/src/runtime/headless.js +28 -28
  157. package/src/@claude-flow/cli/dist/src/services/container-worker-pool.d.ts +197 -0
  158. package/src/@claude-flow/cli/dist/src/services/container-worker-pool.js +584 -0
  159. package/src/@claude-flow/cli/dist/src/services/daemon-lock.d.ts +14 -0
  160. package/src/@claude-flow/cli/dist/src/services/daemon-lock.js +1 -1
  161. package/src/@claude-flow/cli/dist/src/services/headless-worker-executor.js +84 -84
  162. package/src/@claude-flow/cli/package.json +1 -1
  163. package/src/@claude-flow/guidance/README.md +1195 -1195
  164. package/src/@claude-flow/guidance/package.json +198 -198
  165. package/src/@claude-flow/memory/README.md +587 -587
  166. package/src/@claude-flow/memory/dist/agentdb-backend.js +26 -26
  167. package/src/@claude-flow/memory/dist/auto-memory-bridge.test.js +27 -27
  168. package/src/@claude-flow/memory/dist/hybrid-backend.d.ts +245 -0
  169. package/src/@claude-flow/memory/dist/hybrid-backend.js +569 -0
  170. package/src/@claude-flow/memory/dist/hybrid-backend.test.d.ts +8 -0
  171. package/src/@claude-flow/memory/dist/hybrid-backend.test.js +320 -0
  172. package/src/@claude-flow/memory/dist/sqlite-backend.d.ts +121 -0
  173. package/src/@claude-flow/memory/dist/sqlite-backend.js +572 -0
  174. package/src/@claude-flow/memory/dist/sqljs-backend.js +26 -26
  175. package/src/@claude-flow/memory/package.json +44 -44
  176. package/src/@claude-flow/shared/README.md +323 -323
  177. package/src/@claude-flow/shared/dist/events/event-store.js +31 -31
  178. package/src/README.md +493 -493
@@ -1,473 +1,473 @@
1
1
  #!/usr/bin/env node
2
- /**
3
- * Semantic search using 384-dim embeddings (Xenova/all-MiniLM-L6-v2 or hash fallback)
4
- *
5
- * Query embedding MUST match stored embedding model:
6
- * 1. Transformers.js with all-MiniLM-L6-v2 (best quality, matches build-embeddings)
7
- * 2. Domain-aware semantic hash embeddings (fallback when transformers unavailable)
8
- *
9
- * Usage:
10
- * node node_modules/moflo/bin/semantic-search.mjs "your search query"
11
- * npx flo-search "your search query"
12
- * npx flo-search "query" --limit 10
13
- * npx flo-search "query" --namespace guidance
14
- * npx flo-search "query" --threshold 0.3
15
- */
16
-
17
- import { existsSync, readFileSync } from 'fs';
18
- import { resolve, dirname } from 'path';
19
- import { mofloResolveURL } from './lib/moflo-resolve.mjs';
20
- const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
21
-
22
- function findProjectRoot() {
23
- let dir = process.cwd();
24
- const root = resolve(dir, '/');
25
- while (dir !== root) {
26
- if (existsSync(resolve(dir, 'package.json'))) return dir;
27
- dir = dirname(dir);
28
- }
29
- return process.cwd();
30
- }
31
-
32
- const projectRoot = findProjectRoot();
33
-
34
- const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
35
- const EMBEDDING_DIMS = 384;
36
- const EMBEDDING_MODEL_NEURAL = 'Xenova/all-MiniLM-L6-v2';
37
- const EMBEDDING_MODEL_HASH = 'domain-aware-hash-v1';
38
- // 'onnx' is a legacy alias for the Xenova model — treat them as compatible vector spaces
39
- const NEURAL_ALIASES = new Set([EMBEDDING_MODEL_NEURAL, 'onnx']);
40
-
41
- // Parse args
42
- const args = process.argv.slice(2);
43
- const query = args.find(a => !a.startsWith('--'));
44
- const limit = args.includes('--limit') ? parseInt(args[args.indexOf('--limit') + 1]) : 5;
45
- let namespace = args.includes('--namespace') ? args[args.indexOf('--namespace') + 1] : null;
46
- const withTests = args.includes('--with-tests');
47
- const threshold = args.includes('--threshold') ? parseFloat(args[args.indexOf('--threshold') + 1]) : 0.3;
48
- const json = args.includes('--json');
49
- const debug = args.includes('--debug');
50
-
51
- // Auto-routing: when query mentions test-related terms, also search tests namespace
52
- const TEST_KEYWORDS = /\b(test|spec|coverage|assert|mock|stub|fixture|describe|jest|vitest|mocha|e2e|integration test)\b/i;
53
-
54
- if (!query) {
55
- console.error('Usage: npx flo-search "your query" [--limit N] [--namespace X] [--threshold N]');
56
- process.exit(1);
57
- }
58
-
59
- // ============================================================================
60
- // Transformers.js Neural Embeddings (primary — matches build-embeddings.mjs)
61
- // ============================================================================
62
-
63
- let pipeline = null;
64
- let useTransformers = false;
65
-
66
- async function loadTransformersModel() {
67
- try {
68
- const { env, pipeline: createPipeline } = await import(mofloResolveURL('@xenova/transformers'));
69
- env.allowLocalModels = false;
70
- env.backends.onnx.wasm.numThreads = 1;
71
-
72
- pipeline = await createPipeline('feature-extraction', EMBEDDING_MODEL_NEURAL, {
73
- quantized: false,
74
- });
75
-
76
- useTransformers = true;
77
- if (debug) console.error('[semantic-search] Using Transformers.js neural model');
78
- return true;
79
- } catch (err) {
80
- if (debug) console.error(`[semantic-search] Transformers.js unavailable: ${err.message?.split('\n')[0]}`);
81
- useTransformers = false;
82
- return false;
83
- }
84
- }
85
-
86
- async function generateNeuralEmbedding(text) {
87
- if (!pipeline) return null;
88
- try {
89
- const output = await pipeline(text, { pooling: 'mean', normalize: true });
90
- return Array.from(output.data);
91
- } catch {
92
- return null;
93
- }
94
- }
95
-
96
- // ============================================================================
97
- // Domain-Aware Semantic Hash Embeddings (fallback)
98
- // ============================================================================
99
-
100
- const DOMAIN_CLUSTERS = {
101
- database: ['typeorm', 'mongodb', 'database', 'entity', 'schema', 'table', 'collection',
102
- 'query', 'sql', 'nosql', 'orm', 'model', 'migration', 'repository', 'column',
103
- 'relation', 'foreign', 'primary', 'index', 'constraint', 'transaction',
104
- 'mikroorm', 'mikro', 'postgresql', 'postgres', 'soft', 'delete', 'deletedat'],
105
- frontend: ['react', 'component', 'ui', 'styling', 'css', 'html', 'jsx', 'tsx', 'frontend',
106
- 'material', 'mui', 'tailwind', 'dom', 'render', 'hook', 'state', 'props',
107
- 'redux', 'context', 'styled', 'emotion', 'theme', 'layout', 'responsive',
108
- 'mantis', 'syncfusion', 'scheduler', 'i18n', 'intl', 'locale'],
109
- backend: ['fastify', 'api', 'route', 'handler', 'rest', 'endpoint', 'server', 'controller',
110
- 'middleware', 'request', 'response', 'http', 'express', 'nest', 'graphql',
111
- 'websocket', 'socket', 'cors', 'auth', 'jwt', 'session', 'cookie',
112
- 'awilix', 'dependency', 'injection', 'scope'],
113
- testing: ['test', 'testing', 'vitest', 'jest', 'mock', 'spy', 'assert', 'expect', 'describe',
114
- 'it', 'spec', 'unit', 'integration', 'e2e', 'playwright', 'cypress', 'coverage',
115
- 'fixture', 'stub', 'fake', 'snapshot', 'beforeeach', 'aftereach',
116
- 'anti-pattern', 'antipattern', 'mocking'],
117
- tenancy: ['tenant', 'tenancy', 'companyid', 'company', 'isolation', 'multi', 'multitenant',
118
- 'organization', 'workspace', 'account', 'customer', 'client', 'subdomain'],
119
- security: ['security', 'auth', 'authentication', 'authorization', 'permission', 'role',
120
- 'access', 'token', 'jwt', 'oauth', 'password', 'encrypt', 'hash', 'salt',
121
- 'csrf', 'xss', 'injection', 'sanitize', 'validate', 'rbac'],
122
- patterns: ['pattern', 'service', 'factory', 'singleton', 'decorator', 'adapter', 'facade',
123
- 'observer', 'strategy', 'command', 'repository', 'usecase', 'domain', 'ddd',
124
- 'clean', 'architecture', 'solid', 'dry', 'kiss', 'functional', 'pipeasync'],
125
- workflow: ['workflow', 'pipeline', 'ci', 'cd', 'deploy', 'build', 'actions',
126
- 'hook', 'trigger', 'job', 'step', 'artifact', 'release', 'version', 'tag'],
127
- memory: ['memory', 'cache', 'store', 'persist', 'storage', 'redis', 'session', 'state',
128
- 'buffer', 'queue', 'stack', 'heap', 'gc', 'leak', 'embedding', 'vector', 'hnsw',
129
- 'semantic', 'search', 'index', 'retrieval'],
130
- agent: ['agent', 'swarm', 'coordinator', 'orchestrator', 'task', 'worker', 'spawn',
131
- 'parallel', 'concurrent', 'async', 'promise', 'queue', 'priority', 'schedule'],
132
- github: ['github', 'issue', 'branch', 'pr', 'pull', 'request', 'merge', 'commit', 'push',
133
- 'clone', 'fork', 'remote', 'origin', 'main', 'master', 'checkout', 'rebase',
134
- 'squash', 'repository', 'repo', 'gh', 'git', 'assignee', 'label', 'mandatory',
135
- 'checklist', 'closes', 'fixes', 'conventional', 'feat', 'refactor'],
136
- documentation: ['guidance', 'documentation', 'docs', 'readme', 'guide', 'tutorial',
137
- 'reference', 'standard', 'convention', 'rule', 'policy', 'template',
138
- 'example', 'usage', 'instruction', 'meta', 'index', 'umbrella', 'claude',
139
- 'optimized', 'audience', 'structure', 'format', 'markdown']
140
- };
141
-
142
- const COMMON_WORDS = new Set([
143
- 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
144
- 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
145
- 'can', 'need', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into',
146
- 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'and', 'but',
147
- 'or', 'nor', 'so', 'yet', 'both', 'either', 'neither', 'not', 'only', 'own', 'same', 'than',
148
- 'too', 'very', 'just', 'also', 'this', 'that', 'these', 'those', 'it', 'its', 'if', 'then',
149
- 'else', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'any', 'some', 'no', 'yes',
150
- 'use', 'using', 'used', 'uses', 'get', 'set', 'new', 'see', 'like', 'make', 'made'
151
- ]);
152
-
153
- function hash(str, seed = 0) {
154
- let h = seed ^ str.length;
155
- for (let i = 0; i < str.length; i++) {
156
- h ^= str.charCodeAt(i);
157
- h = Math.imul(h, 0x5bd1e995);
158
- h ^= h >>> 15;
159
- }
160
- return h >>> 0;
161
- }
162
-
163
- // Pre-compute domain signatures
164
- const domainSignatures = {};
165
- for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
166
- const sig = new Float32Array(EMBEDDING_DIMS);
167
- for (const kw of keywords) {
168
- for (let h = 0; h < 2; h++) {
169
- const idx = hash(kw + '_dom_' + domain, h) % EMBEDDING_DIMS;
170
- sig[idx] = 1;
171
- }
172
- }
173
- domainSignatures[domain] = sig;
174
- }
175
-
176
- function semanticHashEmbed(text, dims = EMBEDDING_DIMS) {
177
- const vec = new Float32Array(dims);
178
- const lowerText = text.toLowerCase();
179
- const words = lowerText.replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 1);
180
-
181
- if (words.length === 0) return vec;
182
-
183
- // Add domain signatures
184
- for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
185
- let matchCount = 0;
186
- for (const kw of keywords) {
187
- if (lowerText.includes(kw)) matchCount++;
188
- }
189
- if (matchCount > 0) {
190
- const weight = Math.min(2.0, 0.5 + matchCount * 0.3);
191
- const sig = domainSignatures[domain];
192
- for (let i = 0; i < dims; i++) {
193
- vec[i] += sig[i] * weight;
194
- }
195
- }
196
- }
197
-
198
- // Add word features
199
- for (const word of words) {
200
- const isCommon = COMMON_WORDS.has(word);
201
- const weight = isCommon ? 0.2 : (word.length > 6 ? 0.8 : 0.5);
202
- for (let h = 0; h < 3; h++) {
203
- const idx = hash(word, h * 17) % dims;
204
- const sign = (hash(word, h * 31 + 1) % 2 === 0) ? 1 : -1;
205
- vec[idx] += sign * weight;
206
- }
207
- }
208
-
209
- // Add bigrams
210
- for (let i = 0; i < words.length - 1; i++) {
211
- if (COMMON_WORDS.has(words[i]) && COMMON_WORDS.has(words[i + 1])) continue;
212
- const bigram = words[i] + '_' + words[i + 1];
213
- const idx = hash(bigram, 42) % dims;
214
- const sign = (hash(bigram, 43) % 2 === 0) ? 1 : -1;
215
- vec[idx] += sign * 0.4;
216
- }
217
-
218
- // Add trigrams
219
- for (let i = 0; i < words.length - 2; i++) {
220
- const trigram = words[i] + '_' + words[i + 1] + '_' + words[i + 2];
221
- const idx = hash(trigram, 99) % dims;
222
- const sign = (hash(trigram, 100) % 2 === 0) ? 1 : -1;
223
- vec[idx] += sign * 0.3;
224
- }
225
-
226
- // L2 normalize
227
- let norm = 0;
228
- for (let i = 0; i < dims; i++) norm += vec[i] * vec[i];
229
- norm = Math.sqrt(norm);
230
- if (norm > 0) {
231
- for (let i = 0; i < dims; i++) vec[i] /= norm;
232
- }
233
-
234
- return vec;
235
- }
236
-
237
- // ============================================================================
238
- // Unified Embedding Generator (matches stored embeddings)
239
- // ============================================================================
240
-
241
- /**
242
- * Generate query embedding using the SAME model as stored embeddings.
243
- * Checks what model was used for stored entries and matches it.
244
- */
245
- async function generateQueryEmbedding(queryText, db) {
246
- // Check what model the stored entries use
247
- let modelCheckSql = `SELECT embedding_model, COUNT(*) as cnt FROM memory_entries
248
- WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
249
- ${namespace ? "AND namespace = ?" : ""}
250
- GROUP BY embedding_model ORDER BY cnt DESC LIMIT 1`;
251
- const modelStmt = db.prepare(modelCheckSql);
252
- modelStmt.bind(namespace ? [namespace] : []);
253
- const modelCheck = modelStmt.step() ? modelStmt.getAsObject() : null;
254
- modelStmt.free();
255
-
256
- const storedModel = modelCheck?.embedding_model || EMBEDDING_MODEL_HASH;
257
-
258
- if (debug) console.error(`[semantic-search] Stored model: ${storedModel}`);
259
-
260
- // If stored embeddings are neural, try to use neural for query too
261
- // Accept both canonical name and legacy 'onnx' tag (both use the same Xenova pipeline)
262
- if (storedModel === EMBEDDING_MODEL_NEURAL || storedModel === 'onnx') {
263
- await loadTransformersModel();
264
- if (useTransformers) {
265
- const neuralEmb = await generateNeuralEmbedding(queryText);
266
- if (neuralEmb && neuralEmb.length === EMBEDDING_DIMS) {
267
- return { embedding: neuralEmb, model: EMBEDDING_MODEL_NEURAL };
268
- }
269
- }
270
- // Neural failed — warn about model mismatch
271
- if (!json) {
272
- console.error('[semantic-search] WARNING: Stored embeddings use neural model but Transformers.js unavailable.');
273
- console.error('[semantic-search] Results may be poor. Run: npx flo-embeddings --force');
274
- }
275
- }
276
-
277
- // Use hash embeddings (either matching stored hash model, or as fallback)
278
- const hashEmb = Array.from(semanticHashEmbed(queryText));
279
- return { embedding: hashEmb, model: EMBEDDING_MODEL_HASH };
280
- }
281
-
282
- // ============================================================================
283
- // Search Functions
284
- // ============================================================================
285
-
286
- function cosineSimilarity(a, b) {
287
- if (!a || !b || a.length !== b.length) return 0;
288
- let dot = 0;
289
- for (let i = 0; i < a.length; i++) {
290
- dot += a[i] * b[i];
291
- }
292
- return dot; // Already L2 normalized
293
- }
294
-
295
- async function getDb() {
296
- if (!existsSync(DB_PATH)) {
297
- throw new Error(`Database not found: ${DB_PATH}`);
298
- }
299
- const SQL = await initSqlJs();
300
- const buffer = readFileSync(DB_PATH);
301
- return new SQL.Database(buffer);
302
- }
303
-
304
- async function semanticSearch(queryText, options = {}) {
305
- const { limit = 5, namespace = null, threshold = 0.3 } = options;
306
- const startTime = performance.now();
307
-
308
- const db = await getDb();
309
-
310
- // Generate query embedding matching the stored model
311
- const { embedding: queryEmbedding, model: queryModel } = await generateQueryEmbedding(queryText, db);
312
-
313
- if (debug) console.error(`[semantic-search] Query model: ${queryModel}`);
314
-
315
- // Get all entries with embeddings
316
- let sql = `
317
- SELECT id, key, namespace, content, embedding, embedding_model, metadata
318
- FROM memory_entries
319
- WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
320
- `;
321
- const params = [];
322
-
323
- if (namespace) {
324
- sql += ` AND namespace = ?`;
325
- params.push(namespace);
326
- }
327
-
328
- const stmt = db.prepare(sql);
329
- stmt.bind(params);
330
-
331
- // Calculate similarity scores
332
- const results = [];
333
- while (stmt.step()) {
334
- const entry = stmt.getAsObject();
335
- try {
336
- const storedIsNeural = NEURAL_ALIASES.has(entry.embedding_model);
337
- const queryIsNeural = NEURAL_ALIASES.has(queryModel);
338
- if (entry.embedding_model && entry.embedding_model !== queryModel && !(storedIsNeural && queryIsNeural)) continue;
339
-
340
- const embedding = JSON.parse(entry.embedding);
341
- if (!Array.isArray(embedding) || embedding.length !== EMBEDDING_DIMS) continue;
342
-
343
- const similarity = cosineSimilarity(queryEmbedding, embedding);
344
-
345
- if (similarity >= threshold) {
346
- let metadata = {};
347
- try {
348
- metadata = JSON.parse(entry.metadata || '{}');
349
- } catch {}
350
-
351
- results.push({
352
- key: entry.key,
353
- namespace: entry.namespace,
354
- score: similarity,
355
- preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
356
- type: metadata.type || 'unknown',
357
- parentDoc: metadata.parentDoc || null,
358
- chunkTitle: metadata.chunkTitle || null,
359
- });
360
- }
361
- } catch {
362
- // Skip entries with invalid embeddings
363
- }
364
- }
365
- stmt.free();
366
-
367
- db.close();
368
-
369
- // Sort by similarity (descending) and limit
370
- results.sort((a, b) => b.score - a.score);
371
- const topResults = results.slice(0, limit);
372
-
373
- const searchTime = performance.now() - startTime;
374
-
375
- return {
376
- query: queryText,
377
- results: topResults,
378
- totalMatches: results.length,
379
- searchTime: `${searchTime.toFixed(0)}ms`,
380
- indexType: 'vector-cosine',
381
- model: queryModel,
382
- };
383
- }
384
-
385
- // ============================================================================
386
- // Main
387
- // ============================================================================
388
-
389
- async function main() {
390
- if (!json) {
391
- console.log('');
392
- console.log(`[semantic-search] Query: "${query}"`);
393
- }
394
-
395
- try {
396
- // --with-tests: search both the specified namespace (or code-map) and tests
397
- // Auto-route: if query contains test keywords and no namespace specified, also search tests
398
- const autoRouteTests = !namespace && TEST_KEYWORDS.test(query);
399
- let results;
400
-
401
- if (withTests || autoRouteTests) {
402
- const primaryNs = namespace || 'code-map';
403
- const primaryResults = await semanticSearch(query, { limit, namespace: primaryNs, threshold });
404
- const testResults = await semanticSearch(query, { limit, namespace: 'tests', threshold });
405
-
406
- // Merge and re-sort by score
407
- const merged = [...primaryResults.results, ...testResults.results]
408
- .sort((a, b) => b.score - a.score)
409
- .slice(0, limit);
410
-
411
- results = {
412
- ...primaryResults,
413
- results: merged,
414
- totalMatches: primaryResults.totalMatches + testResults.totalMatches,
415
- searchTime: `${parseInt(primaryResults.searchTime) + parseInt(testResults.searchTime)}ms`,
416
- namespaces: [primaryNs, 'tests'],
417
- };
418
-
419
- if (!json && autoRouteTests) {
420
- console.log(`[semantic-search] Auto-routed to tests namespace (query contains test keywords)`);
421
- }
422
- } else {
423
- results = await semanticSearch(query, { limit, namespace, threshold });
424
- }
425
-
426
- if (json) {
427
- console.log(JSON.stringify(results, null, 2));
428
- return;
429
- }
430
-
431
- console.log(`[semantic-search] Found ${results.totalMatches} matches (${results.searchTime}) [${results.model}]`);
432
- console.log('');
433
-
434
- if (results.results.length === 0) {
435
- console.log('No results found above threshold. Try lowering --threshold or broadening your query.');
436
- return;
437
- }
438
-
439
- // Display results
440
- console.log('┌─────────────────────────────────────────────────────────────────────────────┐');
441
- console.log('│ Rank │ Score │ Key │ Type │ Preview │');
442
- console.log('├─────────────────────────────────────────────────────────────────────────────┤');
443
-
444
- for (let i = 0; i < results.results.length; i++) {
445
- const r = results.results[i];
446
- const rank = String(i + 1).padStart(4);
447
- const score = r.score.toFixed(3);
448
- const key = r.key.substring(0, 28).padEnd(28);
449
- const type = (r.type || '').substring(0, 6).padEnd(6);
450
- const preview = r.preview.substring(0, 18).padEnd(18);
451
-
452
- console.log(`│ ${rank} │ ${score} │ ${key} │ ${type} │ ${preview}… │`);
453
- }
454
-
455
- console.log('└─────────────────────────────────────────────────────────────────────────────┘');
456
-
457
- // Show chunk context
458
- console.log('');
459
- console.log('Top result details:');
460
- const top = results.results[0];
461
- console.log(` Key: ${top.key}`);
462
- console.log(` Score: ${top.score.toFixed(4)}`);
463
- if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
464
- if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
465
- console.log(` Preview: ${top.preview}...`);
466
-
467
- } catch (err) {
468
- console.error(`[semantic-search] Error: ${err.message}`);
469
- process.exit(1);
470
- }
471
- }
472
-
473
- main();
2
+ /**
3
+ * Semantic search using 384-dim embeddings (Xenova/all-MiniLM-L6-v2 or hash fallback)
4
+ *
5
+ * Query embedding MUST match stored embedding model:
6
+ * 1. Transformers.js with all-MiniLM-L6-v2 (best quality, matches build-embeddings)
7
+ * 2. Domain-aware semantic hash embeddings (fallback when transformers unavailable)
8
+ *
9
+ * Usage:
10
+ * node node_modules/moflo/bin/semantic-search.mjs "your search query"
11
+ * npx flo-search "your search query"
12
+ * npx flo-search "query" --limit 10
13
+ * npx flo-search "query" --namespace guidance
14
+ * npx flo-search "query" --threshold 0.3
15
+ */
16
+
17
+ import { existsSync, readFileSync } from 'fs';
18
+ import { resolve, dirname } from 'path';
19
+ import { mofloResolveURL } from './lib/moflo-resolve.mjs';
20
+ const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
21
+
22
+ function findProjectRoot() {
23
+ let dir = process.cwd();
24
+ const root = resolve(dir, '/');
25
+ while (dir !== root) {
26
+ if (existsSync(resolve(dir, 'package.json'))) return dir;
27
+ dir = dirname(dir);
28
+ }
29
+ return process.cwd();
30
+ }
31
+
32
+ const projectRoot = findProjectRoot();
33
+
34
+ const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
35
+ const EMBEDDING_DIMS = 384;
36
+ const EMBEDDING_MODEL_NEURAL = 'Xenova/all-MiniLM-L6-v2';
37
+ const EMBEDDING_MODEL_HASH = 'domain-aware-hash-v1';
38
+ // 'onnx' is a legacy alias for the Xenova model — treat them as compatible vector spaces
39
+ const NEURAL_ALIASES = new Set([EMBEDDING_MODEL_NEURAL, 'onnx']);
40
+
41
+ // Parse args
42
+ const args = process.argv.slice(2);
43
+ const query = args.find(a => !a.startsWith('--'));
44
+ const limit = args.includes('--limit') ? parseInt(args[args.indexOf('--limit') + 1]) : 5;
45
+ let namespace = args.includes('--namespace') ? args[args.indexOf('--namespace') + 1] : null;
46
+ const withTests = args.includes('--with-tests');
47
+ const threshold = args.includes('--threshold') ? parseFloat(args[args.indexOf('--threshold') + 1]) : 0.3;
48
+ const json = args.includes('--json');
49
+ const debug = args.includes('--debug');
50
+
51
+ // Auto-routing: when query mentions test-related terms, also search tests namespace
52
+ const TEST_KEYWORDS = /\b(test|spec|coverage|assert|mock|stub|fixture|describe|jest|vitest|mocha|e2e|integration test)\b/i;
53
+
54
+ if (!query) {
55
+ console.error('Usage: npx flo-search "your query" [--limit N] [--namespace X] [--threshold N]');
56
+ process.exit(1);
57
+ }
58
+
59
+ // ============================================================================
60
+ // Transformers.js Neural Embeddings (primary — matches build-embeddings.mjs)
61
+ // ============================================================================
62
+
63
+ let pipeline = null;
64
+ let useTransformers = false;
65
+
66
+ async function loadTransformersModel() {
67
+ try {
68
+ const { env, pipeline: createPipeline } = await import(mofloResolveURL('@xenova/transformers'));
69
+ env.allowLocalModels = false;
70
+ env.backends.onnx.wasm.numThreads = 1;
71
+
72
+ pipeline = await createPipeline('feature-extraction', EMBEDDING_MODEL_NEURAL, {
73
+ quantized: false,
74
+ });
75
+
76
+ useTransformers = true;
77
+ if (debug) console.error('[semantic-search] Using Transformers.js neural model');
78
+ return true;
79
+ } catch (err) {
80
+ if (debug) console.error(`[semantic-search] Transformers.js unavailable: ${err.message?.split('\n')[0]}`);
81
+ useTransformers = false;
82
+ return false;
83
+ }
84
+ }
85
+
86
+ async function generateNeuralEmbedding(text) {
87
+ if (!pipeline) return null;
88
+ try {
89
+ const output = await pipeline(text, { pooling: 'mean', normalize: true });
90
+ return Array.from(output.data);
91
+ } catch {
92
+ return null;
93
+ }
94
+ }
95
+
96
+ // ============================================================================
97
+ // Domain-Aware Semantic Hash Embeddings (fallback)
98
+ // ============================================================================
99
+
100
+ const DOMAIN_CLUSTERS = {
101
+ database: ['typeorm', 'mongodb', 'database', 'entity', 'schema', 'table', 'collection',
102
+ 'query', 'sql', 'nosql', 'orm', 'model', 'migration', 'repository', 'column',
103
+ 'relation', 'foreign', 'primary', 'index', 'constraint', 'transaction',
104
+ 'mikroorm', 'mikro', 'postgresql', 'postgres', 'soft', 'delete', 'deletedat'],
105
+ frontend: ['react', 'component', 'ui', 'styling', 'css', 'html', 'jsx', 'tsx', 'frontend',
106
+ 'material', 'mui', 'tailwind', 'dom', 'render', 'hook', 'state', 'props',
107
+ 'redux', 'context', 'styled', 'emotion', 'theme', 'layout', 'responsive',
108
+ 'mantis', 'syncfusion', 'scheduler', 'i18n', 'intl', 'locale'],
109
+ backend: ['fastify', 'api', 'route', 'handler', 'rest', 'endpoint', 'server', 'controller',
110
+ 'middleware', 'request', 'response', 'http', 'express', 'nest', 'graphql',
111
+ 'websocket', 'socket', 'cors', 'auth', 'jwt', 'session', 'cookie',
112
+ 'awilix', 'dependency', 'injection', 'scope'],
113
+ testing: ['test', 'testing', 'vitest', 'jest', 'mock', 'spy', 'assert', 'expect', 'describe',
114
+ 'it', 'spec', 'unit', 'integration', 'e2e', 'playwright', 'cypress', 'coverage',
115
+ 'fixture', 'stub', 'fake', 'snapshot', 'beforeeach', 'aftereach',
116
+ 'anti-pattern', 'antipattern', 'mocking'],
117
+ tenancy: ['tenant', 'tenancy', 'companyid', 'company', 'isolation', 'multi', 'multitenant',
118
+ 'organization', 'workspace', 'account', 'customer', 'client', 'subdomain'],
119
+ security: ['security', 'auth', 'authentication', 'authorization', 'permission', 'role',
120
+ 'access', 'token', 'jwt', 'oauth', 'password', 'encrypt', 'hash', 'salt',
121
+ 'csrf', 'xss', 'injection', 'sanitize', 'validate', 'rbac'],
122
+ patterns: ['pattern', 'service', 'factory', 'singleton', 'decorator', 'adapter', 'facade',
123
+ 'observer', 'strategy', 'command', 'repository', 'usecase', 'domain', 'ddd',
124
+ 'clean', 'architecture', 'solid', 'dry', 'kiss', 'functional', 'pipeasync'],
125
+ workflow: ['workflow', 'pipeline', 'ci', 'cd', 'deploy', 'build', 'actions',
126
+ 'hook', 'trigger', 'job', 'step', 'artifact', 'release', 'version', 'tag'],
127
+ memory: ['memory', 'cache', 'store', 'persist', 'storage', 'redis', 'session', 'state',
128
+ 'buffer', 'queue', 'stack', 'heap', 'gc', 'leak', 'embedding', 'vector', 'hnsw',
129
+ 'semantic', 'search', 'index', 'retrieval'],
130
+ agent: ['agent', 'swarm', 'coordinator', 'orchestrator', 'task', 'worker', 'spawn',
131
+ 'parallel', 'concurrent', 'async', 'promise', 'queue', 'priority', 'schedule'],
132
+ github: ['github', 'issue', 'branch', 'pr', 'pull', 'request', 'merge', 'commit', 'push',
133
+ 'clone', 'fork', 'remote', 'origin', 'main', 'master', 'checkout', 'rebase',
134
+ 'squash', 'repository', 'repo', 'gh', 'git', 'assignee', 'label', 'mandatory',
135
+ 'checklist', 'closes', 'fixes', 'conventional', 'feat', 'refactor'],
136
+ documentation: ['guidance', 'documentation', 'docs', 'readme', 'guide', 'tutorial',
137
+ 'reference', 'standard', 'convention', 'rule', 'policy', 'template',
138
+ 'example', 'usage', 'instruction', 'meta', 'index', 'umbrella', 'claude',
139
+ 'optimized', 'audience', 'structure', 'format', 'markdown']
140
+ };
141
+
142
+ const COMMON_WORDS = new Set([
143
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
144
+ 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
145
+ 'can', 'need', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into',
146
+ 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'and', 'but',
147
+ 'or', 'nor', 'so', 'yet', 'both', 'either', 'neither', 'not', 'only', 'own', 'same', 'than',
148
+ 'too', 'very', 'just', 'also', 'this', 'that', 'these', 'those', 'it', 'its', 'if', 'then',
149
+ 'else', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'any', 'some', 'no', 'yes',
150
+ 'use', 'using', 'used', 'uses', 'get', 'set', 'new', 'see', 'like', 'make', 'made'
151
+ ]);
152
+
153
+ function hash(str, seed = 0) {
154
+ let h = seed ^ str.length;
155
+ for (let i = 0; i < str.length; i++) {
156
+ h ^= str.charCodeAt(i);
157
+ h = Math.imul(h, 0x5bd1e995);
158
+ h ^= h >>> 15;
159
+ }
160
+ return h >>> 0;
161
+ }
162
+
163
+ // Pre-compute domain signatures
164
+ const domainSignatures = {};
165
+ for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
166
+ const sig = new Float32Array(EMBEDDING_DIMS);
167
+ for (const kw of keywords) {
168
+ for (let h = 0; h < 2; h++) {
169
+ const idx = hash(kw + '_dom_' + domain, h) % EMBEDDING_DIMS;
170
+ sig[idx] = 1;
171
+ }
172
+ }
173
+ domainSignatures[domain] = sig;
174
+ }
175
+
176
+ function semanticHashEmbed(text, dims = EMBEDDING_DIMS) {
177
+ const vec = new Float32Array(dims);
178
+ const lowerText = text.toLowerCase();
179
+ const words = lowerText.replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 1);
180
+
181
+ if (words.length === 0) return vec;
182
+
183
+ // Add domain signatures
184
+ for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
185
+ let matchCount = 0;
186
+ for (const kw of keywords) {
187
+ if (lowerText.includes(kw)) matchCount++;
188
+ }
189
+ if (matchCount > 0) {
190
+ const weight = Math.min(2.0, 0.5 + matchCount * 0.3);
191
+ const sig = domainSignatures[domain];
192
+ for (let i = 0; i < dims; i++) {
193
+ vec[i] += sig[i] * weight;
194
+ }
195
+ }
196
+ }
197
+
198
+ // Add word features
199
+ for (const word of words) {
200
+ const isCommon = COMMON_WORDS.has(word);
201
+ const weight = isCommon ? 0.2 : (word.length > 6 ? 0.8 : 0.5);
202
+ for (let h = 0; h < 3; h++) {
203
+ const idx = hash(word, h * 17) % dims;
204
+ const sign = (hash(word, h * 31 + 1) % 2 === 0) ? 1 : -1;
205
+ vec[idx] += sign * weight;
206
+ }
207
+ }
208
+
209
+ // Add bigrams
210
+ for (let i = 0; i < words.length - 1; i++) {
211
+ if (COMMON_WORDS.has(words[i]) && COMMON_WORDS.has(words[i + 1])) continue;
212
+ const bigram = words[i] + '_' + words[i + 1];
213
+ const idx = hash(bigram, 42) % dims;
214
+ const sign = (hash(bigram, 43) % 2 === 0) ? 1 : -1;
215
+ vec[idx] += sign * 0.4;
216
+ }
217
+
218
+ // Add trigrams
219
+ for (let i = 0; i < words.length - 2; i++) {
220
+ const trigram = words[i] + '_' + words[i + 1] + '_' + words[i + 2];
221
+ const idx = hash(trigram, 99) % dims;
222
+ const sign = (hash(trigram, 100) % 2 === 0) ? 1 : -1;
223
+ vec[idx] += sign * 0.3;
224
+ }
225
+
226
+ // L2 normalize
227
+ let norm = 0;
228
+ for (let i = 0; i < dims; i++) norm += vec[i] * vec[i];
229
+ norm = Math.sqrt(norm);
230
+ if (norm > 0) {
231
+ for (let i = 0; i < dims; i++) vec[i] /= norm;
232
+ }
233
+
234
+ return vec;
235
+ }
236
+
237
+ // ============================================================================
238
+ // Unified Embedding Generator (matches stored embeddings)
239
+ // ============================================================================
240
+
241
+ /**
242
+ * Generate query embedding using the SAME model as stored embeddings.
243
+ * Checks what model was used for stored entries and matches it.
244
+ */
245
+ async function generateQueryEmbedding(queryText, db) {
246
+ // Check what model the stored entries use
247
+ let modelCheckSql = `SELECT embedding_model, COUNT(*) as cnt FROM memory_entries
248
+ WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
249
+ ${namespace ? "AND namespace = ?" : ""}
250
+ GROUP BY embedding_model ORDER BY cnt DESC LIMIT 1`;
251
+ const modelStmt = db.prepare(modelCheckSql);
252
+ modelStmt.bind(namespace ? [namespace] : []);
253
+ const modelCheck = modelStmt.step() ? modelStmt.getAsObject() : null;
254
+ modelStmt.free();
255
+
256
+ const storedModel = modelCheck?.embedding_model || EMBEDDING_MODEL_HASH;
257
+
258
+ if (debug) console.error(`[semantic-search] Stored model: ${storedModel}`);
259
+
260
+ // If stored embeddings are neural, try to use neural for query too
261
+ // Accept both canonical name and legacy 'onnx' tag (both use the same Xenova pipeline)
262
+ if (storedModel === EMBEDDING_MODEL_NEURAL || storedModel === 'onnx') {
263
+ await loadTransformersModel();
264
+ if (useTransformers) {
265
+ const neuralEmb = await generateNeuralEmbedding(queryText);
266
+ if (neuralEmb && neuralEmb.length === EMBEDDING_DIMS) {
267
+ return { embedding: neuralEmb, model: EMBEDDING_MODEL_NEURAL };
268
+ }
269
+ }
270
+ // Neural failed — warn about model mismatch
271
+ if (!json) {
272
+ console.error('[semantic-search] WARNING: Stored embeddings use neural model but Transformers.js unavailable.');
273
+ console.error('[semantic-search] Results may be poor. Run: npx flo-embeddings --force');
274
+ }
275
+ }
276
+
277
+ // Use hash embeddings (either matching stored hash model, or as fallback)
278
+ const hashEmb = Array.from(semanticHashEmbed(queryText));
279
+ return { embedding: hashEmb, model: EMBEDDING_MODEL_HASH };
280
+ }
281
+
282
+ // ============================================================================
283
+ // Search Functions
284
+ // ============================================================================
285
+
286
+ function cosineSimilarity(a, b) {
287
+ if (!a || !b || a.length !== b.length) return 0;
288
+ let dot = 0;
289
+ for (let i = 0; i < a.length; i++) {
290
+ dot += a[i] * b[i];
291
+ }
292
+ return dot; // Already L2 normalized
293
+ }
294
+
295
+ async function getDb() {
296
+ if (!existsSync(DB_PATH)) {
297
+ throw new Error(`Database not found: ${DB_PATH}`);
298
+ }
299
+ const SQL = await initSqlJs();
300
+ const buffer = readFileSync(DB_PATH);
301
+ return new SQL.Database(buffer);
302
+ }
303
+
304
+ async function semanticSearch(queryText, options = {}) {
305
+ const { limit = 5, namespace = null, threshold = 0.3 } = options;
306
+ const startTime = performance.now();
307
+
308
+ const db = await getDb();
309
+
310
+ // Generate query embedding matching the stored model
311
+ const { embedding: queryEmbedding, model: queryModel } = await generateQueryEmbedding(queryText, db);
312
+
313
+ if (debug) console.error(`[semantic-search] Query model: ${queryModel}`);
314
+
315
+ // Get all entries with embeddings
316
+ let sql = `
317
+ SELECT id, key, namespace, content, embedding, embedding_model, metadata
318
+ FROM memory_entries
319
+ WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
320
+ `;
321
+ const params = [];
322
+
323
+ if (namespace) {
324
+ sql += ` AND namespace = ?`;
325
+ params.push(namespace);
326
+ }
327
+
328
+ const stmt = db.prepare(sql);
329
+ stmt.bind(params);
330
+
331
+ // Calculate similarity scores
332
+ const results = [];
333
+ while (stmt.step()) {
334
+ const entry = stmt.getAsObject();
335
+ try {
336
+ const storedIsNeural = NEURAL_ALIASES.has(entry.embedding_model);
337
+ const queryIsNeural = NEURAL_ALIASES.has(queryModel);
338
+ if (entry.embedding_model && entry.embedding_model !== queryModel && !(storedIsNeural && queryIsNeural)) continue;
339
+
340
+ const embedding = JSON.parse(entry.embedding);
341
+ if (!Array.isArray(embedding) || embedding.length !== EMBEDDING_DIMS) continue;
342
+
343
+ const similarity = cosineSimilarity(queryEmbedding, embedding);
344
+
345
+ if (similarity >= threshold) {
346
+ let metadata = {};
347
+ try {
348
+ metadata = JSON.parse(entry.metadata || '{}');
349
+ } catch {}
350
+
351
+ results.push({
352
+ key: entry.key,
353
+ namespace: entry.namespace,
354
+ score: similarity,
355
+ preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
356
+ type: metadata.type || 'unknown',
357
+ parentDoc: metadata.parentDoc || null,
358
+ chunkTitle: metadata.chunkTitle || null,
359
+ });
360
+ }
361
+ } catch {
362
+ // Skip entries with invalid embeddings
363
+ }
364
+ }
365
+ stmt.free();
366
+
367
+ db.close();
368
+
369
+ // Sort by similarity (descending) and limit
370
+ results.sort((a, b) => b.score - a.score);
371
+ const topResults = results.slice(0, limit);
372
+
373
+ const searchTime = performance.now() - startTime;
374
+
375
+ return {
376
+ query: queryText,
377
+ results: topResults,
378
+ totalMatches: results.length,
379
+ searchTime: `${searchTime.toFixed(0)}ms`,
380
+ indexType: 'vector-cosine',
381
+ model: queryModel,
382
+ };
383
+ }
384
+
385
+ // ============================================================================
386
+ // Main
387
+ // ============================================================================
388
+
389
+ async function main() {
390
+ if (!json) {
391
+ console.log('');
392
+ console.log(`[semantic-search] Query: "${query}"`);
393
+ }
394
+
395
+ try {
396
+ // --with-tests: search both the specified namespace (or code-map) and tests
397
+ // Auto-route: if query contains test keywords and no namespace specified, also search tests
398
+ const autoRouteTests = !namespace && TEST_KEYWORDS.test(query);
399
+ let results;
400
+
401
+ if (withTests || autoRouteTests) {
402
+ const primaryNs = namespace || 'code-map';
403
+ const primaryResults = await semanticSearch(query, { limit, namespace: primaryNs, threshold });
404
+ const testResults = await semanticSearch(query, { limit, namespace: 'tests', threshold });
405
+
406
+ // Merge and re-sort by score
407
+ const merged = [...primaryResults.results, ...testResults.results]
408
+ .sort((a, b) => b.score - a.score)
409
+ .slice(0, limit);
410
+
411
+ results = {
412
+ ...primaryResults,
413
+ results: merged,
414
+ totalMatches: primaryResults.totalMatches + testResults.totalMatches,
415
+ searchTime: `${parseInt(primaryResults.searchTime) + parseInt(testResults.searchTime)}ms`,
416
+ namespaces: [primaryNs, 'tests'],
417
+ };
418
+
419
+ if (!json && autoRouteTests) {
420
+ console.log(`[semantic-search] Auto-routed to tests namespace (query contains test keywords)`);
421
+ }
422
+ } else {
423
+ results = await semanticSearch(query, { limit, namespace, threshold });
424
+ }
425
+
426
+ if (json) {
427
+ console.log(JSON.stringify(results, null, 2));
428
+ return;
429
+ }
430
+
431
+ console.log(`[semantic-search] Found ${results.totalMatches} matches (${results.searchTime}) [${results.model}]`);
432
+ console.log('');
433
+
434
+ if (results.results.length === 0) {
435
+ console.log('No results found above threshold. Try lowering --threshold or broadening your query.');
436
+ return;
437
+ }
438
+
439
+ // Display results
440
+ console.log('┌─────────────────────────────────────────────────────────────────────────────┐');
441
+ console.log('│ Rank │ Score │ Key │ Type │ Preview │');
442
+ console.log('├─────────────────────────────────────────────────────────────────────────────┤');
443
+
444
+ for (let i = 0; i < results.results.length; i++) {
445
+ const r = results.results[i];
446
+ const rank = String(i + 1).padStart(4);
447
+ const score = r.score.toFixed(3);
448
+ const key = r.key.substring(0, 28).padEnd(28);
449
+ const type = (r.type || '').substring(0, 6).padEnd(6);
450
+ const preview = r.preview.substring(0, 18).padEnd(18);
451
+
452
+ console.log(`│ ${rank} │ ${score} │ ${key} │ ${type} │ ${preview}… │`);
453
+ }
454
+
455
+ console.log('└─────────────────────────────────────────────────────────────────────────────┘');
456
+
457
+ // Show chunk context
458
+ console.log('');
459
+ console.log('Top result details:');
460
+ const top = results.results[0];
461
+ console.log(` Key: ${top.key}`);
462
+ console.log(` Score: ${top.score.toFixed(4)}`);
463
+ if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
464
+ if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
465
+ console.log(` Preview: ${top.preview}...`);
466
+
467
+ } catch (err) {
468
+ console.error(`[semantic-search] Error: ${err.message}`);
469
+ process.exit(1);
470
+ }
471
+ }
472
+
473
+ main();