moflo 4.8.10 → 4.8.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/.claude/agents/browser/browser-agent.yaml +182 -0
  2. package/.claude/agents/core/coder.md +265 -265
  3. package/.claude/agents/core/planner.md +167 -167
  4. package/.claude/agents/core/researcher.md +189 -189
  5. package/.claude/agents/core/reviewer.md +325 -325
  6. package/.claude/agents/core/tester.md +318 -318
  7. package/.claude/agents/dual-mode/codex-coordinator.md +224 -224
  8. package/.claude/agents/dual-mode/codex-worker.md +211 -211
  9. package/.claude/agents/dual-mode/dual-orchestrator.md +291 -291
  10. package/.claude/agents/github/code-review-swarm.md +537 -537
  11. package/.claude/agents/github/github-modes.md +172 -172
  12. package/.claude/agents/github/issue-tracker.md +318 -318
  13. package/.claude/agents/github/multi-repo-swarm.md +552 -552
  14. package/.claude/agents/github/pr-manager.md +190 -190
  15. package/.claude/agents/github/project-board-sync.md +508 -508
  16. package/.claude/agents/github/release-manager.md +366 -366
  17. package/.claude/agents/github/release-swarm.md +582 -582
  18. package/.claude/agents/github/repo-architect.md +397 -397
  19. package/.claude/agents/github/swarm-issue.md +572 -572
  20. package/.claude/agents/github/swarm-pr.md +427 -427
  21. package/.claude/agents/github/sync-coordinator.md +451 -451
  22. package/.claude/agents/github/workflow-automation.md +634 -634
  23. package/.claude/agents/goal/code-goal-planner.md +445 -445
  24. package/.claude/agents/hive-mind/collective-intelligence-coordinator.md +129 -129
  25. package/.claude/agents/hive-mind/queen-coordinator.md +202 -202
  26. package/.claude/agents/hive-mind/scout-explorer.md +241 -241
  27. package/.claude/agents/hive-mind/swarm-memory-manager.md +192 -192
  28. package/.claude/agents/hive-mind/worker-specialist.md +216 -216
  29. package/.claude/agents/neural/safla-neural.md +73 -73
  30. package/.claude/agents/reasoning/goal-planner.md +72 -72
  31. package/.claude/agents/swarm/adaptive-coordinator.md +395 -395
  32. package/.claude/agents/swarm/hierarchical-coordinator.md +326 -326
  33. package/.claude/agents/swarm/mesh-coordinator.md +391 -391
  34. package/.claude/agents/templates/migration-plan.md +745 -745
  35. package/.claude/commands/agents/agent-spawning.md +28 -28
  36. package/.claude/commands/analysis/COMMAND_COMPLIANCE_REPORT.md +53 -53
  37. package/.claude/commands/analysis/bottleneck-detect.md +162 -162
  38. package/.claude/commands/analysis/performance-bottlenecks.md +58 -58
  39. package/.claude/commands/analysis/token-efficiency.md +44 -44
  40. package/.claude/commands/automation/auto-agent.md +122 -122
  41. package/.claude/commands/automation/self-healing.md +105 -105
  42. package/.claude/commands/automation/session-memory.md +89 -89
  43. package/.claude/commands/automation/smart-agents.md +72 -72
  44. package/.claude/commands/coordination/init.md +44 -44
  45. package/.claude/commands/coordination/orchestrate.md +43 -43
  46. package/.claude/commands/coordination/spawn.md +45 -45
  47. package/.claude/commands/coordination/swarm-init.md +85 -85
  48. package/.claude/commands/github/github-modes.md +146 -146
  49. package/.claude/commands/github/github-swarm.md +121 -121
  50. package/.claude/commands/github/issue-tracker.md +291 -291
  51. package/.claude/commands/github/pr-manager.md +169 -169
  52. package/.claude/commands/github/release-manager.md +337 -337
  53. package/.claude/commands/github/repo-architect.md +366 -366
  54. package/.claude/commands/github/sync-coordinator.md +300 -300
  55. package/.claude/commands/memory/neural.md +47 -47
  56. package/.claude/commands/monitoring/agents.md +44 -44
  57. package/.claude/commands/monitoring/status.md +46 -46
  58. package/.claude/commands/optimization/auto-topology.md +61 -61
  59. package/.claude/commands/optimization/parallel-execution.md +49 -49
  60. package/.claude/commands/sparc/analyzer.md +51 -51
  61. package/.claude/commands/sparc/architect.md +53 -53
  62. package/.claude/commands/sparc/ask.md +97 -97
  63. package/.claude/commands/sparc/batch-executor.md +54 -54
  64. package/.claude/commands/sparc/code.md +89 -89
  65. package/.claude/commands/sparc/coder.md +54 -54
  66. package/.claude/commands/sparc/debug.md +83 -83
  67. package/.claude/commands/sparc/debugger.md +54 -54
  68. package/.claude/commands/sparc/designer.md +53 -53
  69. package/.claude/commands/sparc/devops.md +109 -109
  70. package/.claude/commands/sparc/docs-writer.md +80 -80
  71. package/.claude/commands/sparc/documenter.md +54 -54
  72. package/.claude/commands/sparc/innovator.md +54 -54
  73. package/.claude/commands/sparc/integration.md +83 -83
  74. package/.claude/commands/sparc/mcp.md +117 -117
  75. package/.claude/commands/sparc/memory-manager.md +54 -54
  76. package/.claude/commands/sparc/optimizer.md +54 -54
  77. package/.claude/commands/sparc/orchestrator.md +131 -131
  78. package/.claude/commands/sparc/post-deployment-monitoring-mode.md +83 -83
  79. package/.claude/commands/sparc/refinement-optimization-mode.md +83 -83
  80. package/.claude/commands/sparc/researcher.md +54 -54
  81. package/.claude/commands/sparc/reviewer.md +54 -54
  82. package/.claude/commands/sparc/security-review.md +80 -80
  83. package/.claude/commands/sparc/sparc-modes.md +174 -174
  84. package/.claude/commands/sparc/sparc.md +111 -111
  85. package/.claude/commands/sparc/spec-pseudocode.md +80 -80
  86. package/.claude/commands/sparc/supabase-admin.md +348 -348
  87. package/.claude/commands/sparc/swarm-coordinator.md +54 -54
  88. package/.claude/commands/sparc/tdd.md +54 -54
  89. package/.claude/commands/sparc/tester.md +54 -54
  90. package/.claude/commands/sparc/tutorial.md +79 -79
  91. package/.claude/commands/sparc/workflow-manager.md +54 -54
  92. package/.claude/commands/sparc.md +166 -166
  93. package/.claude/commands/swarm/analysis.md +95 -95
  94. package/.claude/commands/swarm/development.md +96 -96
  95. package/.claude/commands/swarm/examples.md +168 -168
  96. package/.claude/commands/swarm/maintenance.md +102 -102
  97. package/.claude/commands/swarm/optimization.md +117 -117
  98. package/.claude/commands/swarm/research.md +136 -136
  99. package/.claude/commands/swarm/testing.md +131 -131
  100. package/.claude/commands/training/neural-patterns.md +73 -73
  101. package/.claude/commands/training/specialization.md +62 -62
  102. package/.claude/commands/workflows/development.md +77 -77
  103. package/.claude/commands/workflows/research.md +62 -62
  104. package/.claude/guidance/moflo-bootstrap.md +129 -0
  105. package/.claude/guidance/{agent-bootstrap.md → shipped/agent-bootstrap.md} +126 -126
  106. package/.claude/guidance/{guidance-memory-strategy.md → shipped/guidance-memory-strategy.md} +262 -262
  107. package/.claude/guidance/{memory-strategy.md → shipped/memory-strategy.md} +204 -204
  108. package/.claude/guidance/{moflo.md → shipped/moflo.md} +45 -31
  109. package/.claude/guidance/{task-swarm-integration.md → shipped/task-swarm-integration.md} +441 -348
  110. package/.claude/helpers/gate-hook.mjs +50 -0
  111. package/.claude/helpers/gate.cjs +138 -236
  112. package/.claude/helpers/hook-handler.cjs +64 -326
  113. package/.claude/helpers/post-commit +16 -0
  114. package/.claude/helpers/pre-commit +26 -0
  115. package/.claude/helpers/prompt-hook.mjs +72 -0
  116. package/.claude/scripts/build-embeddings.mjs +549 -0
  117. package/.claude/scripts/generate-code-map.mjs +697 -0
  118. package/.claude/scripts/hooks.mjs +656 -0
  119. package/.claude/scripts/index-guidance.mjs +893 -0
  120. package/.claude/scripts/index-tests.mjs +710 -0
  121. package/.claude/scripts/semantic-search.mjs +473 -0
  122. package/.claude/scripts/session-start-launcher.mjs +226 -0
  123. package/.claude/settings.json +351 -290
  124. package/.claude/settings.local.json +4 -3
  125. package/.claude/skills/browser/SKILL.md +204 -0
  126. package/.claude/skills/fl/SKILL.md +29 -23
  127. package/.claude/skills/flo/SKILL.md +29 -23
  128. package/.claude/skills/github-code-review/SKILL.md +4 -4
  129. package/.claude/skills/github-multi-repo/SKILL.md +8 -8
  130. package/.claude/skills/github-project-management/SKILL.md +6 -6
  131. package/.claude/skills/github-release-management/SKILL.md +12 -12
  132. package/.claude/skills/github-workflow-automation/SKILL.md +6 -6
  133. package/.claude/skills/hooks-automation/SKILL.md +1201 -1201
  134. package/.claude/skills/performance-analysis/SKILL.md +563 -563
  135. package/.claude/skills/sparc-methodology/SKILL.md +64 -64
  136. package/.claude/skills/swarm-advanced/SKILL.md +77 -77
  137. package/.claude/workflow-state.json +9 -0
  138. package/.claude-plugin/README.md +3 -3
  139. package/.claude-plugin/docs/PLUGIN_SUMMARY.md +3 -3
  140. package/.claude-plugin/docs/QUICKSTART.md +4 -4
  141. package/.claude-plugin/marketplace.json +3 -3
  142. package/.claude-plugin/plugin.json +3 -3
  143. package/.claude-plugin/scripts/install.sh +9 -9
  144. package/.claude-plugin/scripts/verify.sh +7 -7
  145. package/README.md +311 -116
  146. package/bin/gate-hook.mjs +50 -0
  147. package/bin/gate.cjs +138 -0
  148. package/bin/hook-handler.cjs +83 -0
  149. package/bin/hooks.mjs +72 -12
  150. package/bin/index-guidance.mjs +29 -35
  151. package/bin/index-tests.mjs +710 -0
  152. package/bin/lib/process-manager.mjs +243 -0
  153. package/bin/lib/registry-cleanup.cjs +41 -0
  154. package/bin/prompt-hook.mjs +72 -0
  155. package/bin/semantic-search.mjs +472 -440
  156. package/bin/session-start-launcher.mjs +81 -31
  157. package/bin/setup-project.mjs +65 -65
  158. package/package.json +4 -2
  159. package/src/@claude-flow/cli/README.md +1 -1
  160. package/src/@claude-flow/cli/bin/cli.js +175 -175
  161. package/src/@claude-flow/cli/dist/src/commands/doctor.js +1091 -736
  162. package/src/@claude-flow/cli/dist/src/commands/github.d.ts +12 -0
  163. package/src/@claude-flow/cli/dist/src/commands/github.js +505 -0
  164. package/src/@claude-flow/cli/dist/src/commands/hive-mind.js +90 -90
  165. package/src/@claude-flow/cli/dist/src/commands/index.d.ts +1 -0
  166. package/src/@claude-flow/cli/dist/src/commands/index.js +7 -0
  167. package/src/@claude-flow/cli/dist/src/config-adapter.js +1 -1
  168. package/src/@claude-flow/cli/dist/src/init/claudemd-generator.d.ts +29 -24
  169. package/src/@claude-flow/cli/dist/src/init/claudemd-generator.js +73 -494
  170. package/src/@claude-flow/cli/dist/src/init/executor.js +109 -5
  171. package/src/@claude-flow/cli/dist/src/init/helpers-generator.d.ts +14 -0
  172. package/src/@claude-flow/cli/dist/src/init/helpers-generator.js +156 -24
  173. package/src/@claude-flow/cli/dist/src/init/mcp-generator.js +20 -20
  174. package/src/@claude-flow/cli/dist/src/init/moflo-init.d.ts +30 -23
  175. package/src/@claude-flow/cli/dist/src/init/moflo-init.js +727 -670
  176. package/src/@claude-flow/cli/dist/src/init/settings-generator.js +23 -14
  177. package/src/@claude-flow/cli/dist/src/mcp-server.js +3 -3
  178. package/src/@claude-flow/cli/dist/src/plugins/manager.js +9 -8
  179. package/src/@claude-flow/cli/dist/src/services/worker-daemon.d.ts +1 -0
  180. package/src/@claude-flow/cli/dist/src/services/worker-daemon.js +3 -1
  181. package/src/@claude-flow/cli/dist/src/services/workflow-gate.js +10 -10
  182. package/src/@claude-flow/cli/package.json +106 -106
@@ -1,441 +1,473 @@
1
1
  #!/usr/bin/env node
2
- /**
3
- * Semantic search using 384-dim embeddings (Xenova/all-MiniLM-L6-v2 or hash fallback)
4
- *
5
- * Query embedding MUST match stored embedding model:
6
- * 1. Transformers.js with all-MiniLM-L6-v2 (best quality, matches build-embeddings)
7
- * 2. Domain-aware semantic hash embeddings (fallback when transformers unavailable)
8
- *
9
- * Usage:
10
- * node node_modules/moflo/bin/semantic-search.mjs "your search query"
11
- * npx flo-search "your search query"
12
- * npx flo-search "query" --limit 10
13
- * npx flo-search "query" --namespace guidance
14
- * npx flo-search "query" --threshold 0.3
15
- */
16
-
17
- import { existsSync, readFileSync } from 'fs';
18
- import { resolve, dirname } from 'path';
19
- import { mofloResolveURL } from './lib/moflo-resolve.mjs';
20
- const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
21
-
22
- function findProjectRoot() {
23
- let dir = process.cwd();
24
- const root = resolve(dir, '/');
25
- while (dir !== root) {
26
- if (existsSync(resolve(dir, 'package.json'))) return dir;
27
- dir = dirname(dir);
28
- }
29
- return process.cwd();
30
- }
31
-
32
- const projectRoot = findProjectRoot();
33
-
34
- const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
35
- const EMBEDDING_DIMS = 384;
36
- const EMBEDDING_MODEL_NEURAL = 'Xenova/all-MiniLM-L6-v2';
37
- const EMBEDDING_MODEL_HASH = 'domain-aware-hash-v1';
38
- // 'onnx' is a legacy alias for the Xenova model — treat them as compatible vector spaces
39
- const NEURAL_ALIASES = new Set([EMBEDDING_MODEL_NEURAL, 'onnx']);
40
-
41
- // Parse args
42
- const args = process.argv.slice(2);
43
- const query = args.find(a => !a.startsWith('--'));
44
- const limit = args.includes('--limit') ? parseInt(args[args.indexOf('--limit') + 1]) : 5;
45
- const namespace = args.includes('--namespace') ? args[args.indexOf('--namespace') + 1] : null;
46
- const threshold = args.includes('--threshold') ? parseFloat(args[args.indexOf('--threshold') + 1]) : 0.3;
47
- const json = args.includes('--json');
48
- const debug = args.includes('--debug');
49
-
50
- if (!query) {
51
- console.error('Usage: npx flo-search "your query" [--limit N] [--namespace X] [--threshold N]');
52
- process.exit(1);
53
- }
54
-
55
- // ============================================================================
56
- // Transformers.js Neural Embeddings (primary — matches build-embeddings.mjs)
57
- // ============================================================================
58
-
59
- let pipeline = null;
60
- let useTransformers = false;
61
-
62
- async function loadTransformersModel() {
63
- try {
64
- const { env, pipeline: createPipeline } = await import(mofloResolveURL('@xenova/transformers'));
65
- env.allowLocalModels = false;
66
- env.backends.onnx.wasm.numThreads = 1;
67
-
68
- pipeline = await createPipeline('feature-extraction', EMBEDDING_MODEL_NEURAL, {
69
- quantized: false,
70
- });
71
-
72
- useTransformers = true;
73
- if (debug) console.error('[semantic-search] Using Transformers.js neural model');
74
- return true;
75
- } catch (err) {
76
- if (debug) console.error(`[semantic-search] Transformers.js unavailable: ${err.message?.split('\n')[0]}`);
77
- useTransformers = false;
78
- return false;
79
- }
80
- }
81
-
82
- async function generateNeuralEmbedding(text) {
83
- if (!pipeline) return null;
84
- try {
85
- const output = await pipeline(text, { pooling: 'mean', normalize: true });
86
- return Array.from(output.data);
87
- } catch {
88
- return null;
89
- }
90
- }
91
-
92
- // ============================================================================
93
- // Domain-Aware Semantic Hash Embeddings (fallback)
94
- // ============================================================================
95
-
96
- const DOMAIN_CLUSTERS = {
97
- database: ['typeorm', 'mongodb', 'database', 'entity', 'schema', 'table', 'collection',
98
- 'query', 'sql', 'nosql', 'orm', 'model', 'migration', 'repository', 'column',
99
- 'relation', 'foreign', 'primary', 'index', 'constraint', 'transaction',
100
- 'mikroorm', 'mikro', 'postgresql', 'postgres', 'soft', 'delete', 'deletedat'],
101
- frontend: ['react', 'component', 'ui', 'styling', 'css', 'html', 'jsx', 'tsx', 'frontend',
102
- 'material', 'mui', 'tailwind', 'dom', 'render', 'hook', 'state', 'props',
103
- 'redux', 'context', 'styled', 'emotion', 'theme', 'layout', 'responsive',
104
- 'mantis', 'syncfusion', 'scheduler', 'i18n', 'intl', 'locale'],
105
- backend: ['fastify', 'api', 'route', 'handler', 'rest', 'endpoint', 'server', 'controller',
106
- 'middleware', 'request', 'response', 'http', 'express', 'nest', 'graphql',
107
- 'websocket', 'socket', 'cors', 'auth', 'jwt', 'session', 'cookie',
108
- 'awilix', 'dependency', 'injection', 'scope'],
109
- testing: ['test', 'testing', 'vitest', 'jest', 'mock', 'spy', 'assert', 'expect', 'describe',
110
- 'it', 'spec', 'unit', 'integration', 'e2e', 'playwright', 'cypress', 'coverage',
111
- 'fixture', 'stub', 'fake', 'snapshot', 'beforeeach', 'aftereach',
112
- 'anti-pattern', 'antipattern', 'mocking'],
113
- tenancy: ['tenant', 'tenancy', 'companyid', 'company', 'isolation', 'multi', 'multitenant',
114
- 'organization', 'workspace', 'account', 'customer', 'client', 'subdomain'],
115
- security: ['security', 'auth', 'authentication', 'authorization', 'permission', 'role',
116
- 'access', 'token', 'jwt', 'oauth', 'password', 'encrypt', 'hash', 'salt',
117
- 'csrf', 'xss', 'injection', 'sanitize', 'validate', 'rbac'],
118
- patterns: ['pattern', 'service', 'factory', 'singleton', 'decorator', 'adapter', 'facade',
119
- 'observer', 'strategy', 'command', 'repository', 'usecase', 'domain', 'ddd',
120
- 'clean', 'architecture', 'solid', 'dry', 'kiss', 'functional', 'pipeasync'],
121
- workflow: ['workflow', 'pipeline', 'ci', 'cd', 'deploy', 'build', 'actions',
122
- 'hook', 'trigger', 'job', 'step', 'artifact', 'release', 'version', 'tag'],
123
- memory: ['memory', 'cache', 'store', 'persist', 'storage', 'redis', 'session', 'state',
124
- 'buffer', 'queue', 'stack', 'heap', 'gc', 'leak', 'embedding', 'vector', 'hnsw',
125
- 'semantic', 'search', 'index', 'retrieval'],
126
- agent: ['agent', 'swarm', 'coordinator', 'orchestrator', 'task', 'worker', 'spawn',
127
- 'parallel', 'concurrent', 'async', 'promise', 'queue', 'priority', 'schedule'],
128
- github: ['github', 'issue', 'branch', 'pr', 'pull', 'request', 'merge', 'commit', 'push',
129
- 'clone', 'fork', 'remote', 'origin', 'main', 'master', 'checkout', 'rebase',
130
- 'squash', 'repository', 'repo', 'gh', 'git', 'assignee', 'label', 'mandatory',
131
- 'checklist', 'closes', 'fixes', 'conventional', 'feat', 'refactor'],
132
- documentation: ['guidance', 'documentation', 'docs', 'readme', 'guide', 'tutorial',
133
- 'reference', 'standard', 'convention', 'rule', 'policy', 'template',
134
- 'example', 'usage', 'instruction', 'meta', 'index', 'umbrella', 'claude',
135
- 'optimized', 'audience', 'structure', 'format', 'markdown']
136
- };
137
-
138
- const COMMON_WORDS = new Set([
139
- 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
140
- 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
141
- 'can', 'need', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into',
142
- 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'and', 'but',
143
- 'or', 'nor', 'so', 'yet', 'both', 'either', 'neither', 'not', 'only', 'own', 'same', 'than',
144
- 'too', 'very', 'just', 'also', 'this', 'that', 'these', 'those', 'it', 'its', 'if', 'then',
145
- 'else', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'any', 'some', 'no', 'yes',
146
- 'use', 'using', 'used', 'uses', 'get', 'set', 'new', 'see', 'like', 'make', 'made'
147
- ]);
148
-
149
- function hash(str, seed = 0) {
150
- let h = seed ^ str.length;
151
- for (let i = 0; i < str.length; i++) {
152
- h ^= str.charCodeAt(i);
153
- h = Math.imul(h, 0x5bd1e995);
154
- h ^= h >>> 15;
155
- }
156
- return h >>> 0;
157
- }
158
-
159
- // Pre-compute domain signatures
160
- const domainSignatures = {};
161
- for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
162
- const sig = new Float32Array(EMBEDDING_DIMS);
163
- for (const kw of keywords) {
164
- for (let h = 0; h < 2; h++) {
165
- const idx = hash(kw + '_dom_' + domain, h) % EMBEDDING_DIMS;
166
- sig[idx] = 1;
167
- }
168
- }
169
- domainSignatures[domain] = sig;
170
- }
171
-
172
- function semanticHashEmbed(text, dims = EMBEDDING_DIMS) {
173
- const vec = new Float32Array(dims);
174
- const lowerText = text.toLowerCase();
175
- const words = lowerText.replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 1);
176
-
177
- if (words.length === 0) return vec;
178
-
179
- // Add domain signatures
180
- for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
181
- let matchCount = 0;
182
- for (const kw of keywords) {
183
- if (lowerText.includes(kw)) matchCount++;
184
- }
185
- if (matchCount > 0) {
186
- const weight = Math.min(2.0, 0.5 + matchCount * 0.3);
187
- const sig = domainSignatures[domain];
188
- for (let i = 0; i < dims; i++) {
189
- vec[i] += sig[i] * weight;
190
- }
191
- }
192
- }
193
-
194
- // Add word features
195
- for (const word of words) {
196
- const isCommon = COMMON_WORDS.has(word);
197
- const weight = isCommon ? 0.2 : (word.length > 6 ? 0.8 : 0.5);
198
- for (let h = 0; h < 3; h++) {
199
- const idx = hash(word, h * 17) % dims;
200
- const sign = (hash(word, h * 31 + 1) % 2 === 0) ? 1 : -1;
201
- vec[idx] += sign * weight;
202
- }
203
- }
204
-
205
- // Add bigrams
206
- for (let i = 0; i < words.length - 1; i++) {
207
- if (COMMON_WORDS.has(words[i]) && COMMON_WORDS.has(words[i + 1])) continue;
208
- const bigram = words[i] + '_' + words[i + 1];
209
- const idx = hash(bigram, 42) % dims;
210
- const sign = (hash(bigram, 43) % 2 === 0) ? 1 : -1;
211
- vec[idx] += sign * 0.4;
212
- }
213
-
214
- // Add trigrams
215
- for (let i = 0; i < words.length - 2; i++) {
216
- const trigram = words[i] + '_' + words[i + 1] + '_' + words[i + 2];
217
- const idx = hash(trigram, 99) % dims;
218
- const sign = (hash(trigram, 100) % 2 === 0) ? 1 : -1;
219
- vec[idx] += sign * 0.3;
220
- }
221
-
222
- // L2 normalize
223
- let norm = 0;
224
- for (let i = 0; i < dims; i++) norm += vec[i] * vec[i];
225
- norm = Math.sqrt(norm);
226
- if (norm > 0) {
227
- for (let i = 0; i < dims; i++) vec[i] /= norm;
228
- }
229
-
230
- return vec;
231
- }
232
-
233
- // ============================================================================
234
- // Unified Embedding Generator (matches stored embeddings)
235
- // ============================================================================
236
-
237
- /**
238
- * Generate query embedding using the SAME model as stored embeddings.
239
- * Checks what model was used for stored entries and matches it.
240
- */
241
- async function generateQueryEmbedding(queryText, db) {
242
- // Check what model the stored entries use
243
- let modelCheckSql = `SELECT embedding_model, COUNT(*) as cnt FROM memory_entries
244
- WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
245
- ${namespace ? "AND namespace = ?" : ""}
246
- GROUP BY embedding_model ORDER BY cnt DESC LIMIT 1`;
247
- const modelStmt = db.prepare(modelCheckSql);
248
- modelStmt.bind(namespace ? [namespace] : []);
249
- const modelCheck = modelStmt.step() ? modelStmt.getAsObject() : null;
250
- modelStmt.free();
251
-
252
- const storedModel = modelCheck?.embedding_model || EMBEDDING_MODEL_HASH;
253
-
254
- if (debug) console.error(`[semantic-search] Stored model: ${storedModel}`);
255
-
256
- // If stored embeddings are neural, try to use neural for query too
257
- // Accept both canonical name and legacy 'onnx' tag (both use the same Xenova pipeline)
258
- if (storedModel === EMBEDDING_MODEL_NEURAL || storedModel === 'onnx') {
259
- await loadTransformersModel();
260
- if (useTransformers) {
261
- const neuralEmb = await generateNeuralEmbedding(queryText);
262
- if (neuralEmb && neuralEmb.length === EMBEDDING_DIMS) {
263
- return { embedding: neuralEmb, model: EMBEDDING_MODEL_NEURAL };
264
- }
265
- }
266
- // Neural failed warn about model mismatch
267
- if (!json) {
268
- console.error('[semantic-search] WARNING: Stored embeddings use neural model but Transformers.js unavailable.');
269
- console.error('[semantic-search] Results may be poor. Run: npx flo-embeddings --force');
270
- }
271
- }
272
-
273
- // Use hash embeddings (either matching stored hash model, or as fallback)
274
- const hashEmb = Array.from(semanticHashEmbed(queryText));
275
- return { embedding: hashEmb, model: EMBEDDING_MODEL_HASH };
276
- }
277
-
278
- // ============================================================================
279
- // Search Functions
280
- // ============================================================================
281
-
282
- function cosineSimilarity(a, b) {
283
- if (!a || !b || a.length !== b.length) return 0;
284
- let dot = 0;
285
- for (let i = 0; i < a.length; i++) {
286
- dot += a[i] * b[i];
287
- }
288
- return dot; // Already L2 normalized
289
- }
290
-
291
- async function getDb() {
292
- if (!existsSync(DB_PATH)) {
293
- throw new Error(`Database not found: ${DB_PATH}`);
294
- }
295
- const SQL = await initSqlJs();
296
- const buffer = readFileSync(DB_PATH);
297
- return new SQL.Database(buffer);
298
- }
299
-
300
- async function semanticSearch(queryText, options = {}) {
301
- const { limit = 5, namespace = null, threshold = 0.3 } = options;
302
- const startTime = performance.now();
303
-
304
- const db = await getDb();
305
-
306
- // Generate query embedding matching the stored model
307
- const { embedding: queryEmbedding, model: queryModel } = await generateQueryEmbedding(queryText, db);
308
-
309
- if (debug) console.error(`[semantic-search] Query model: ${queryModel}`);
310
-
311
- // Get all entries with embeddings
312
- let sql = `
313
- SELECT id, key, namespace, content, embedding, embedding_model, metadata
314
- FROM memory_entries
315
- WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
316
- `;
317
- const params = [];
318
-
319
- if (namespace) {
320
- sql += ` AND namespace = ?`;
321
- params.push(namespace);
322
- }
323
-
324
- const stmt = db.prepare(sql);
325
- stmt.bind(params);
326
-
327
- // Calculate similarity scores
328
- const results = [];
329
- while (stmt.step()) {
330
- const entry = stmt.getAsObject();
331
- try {
332
- const storedIsNeural = NEURAL_ALIASES.has(entry.embedding_model);
333
- const queryIsNeural = NEURAL_ALIASES.has(queryModel);
334
- if (entry.embedding_model && entry.embedding_model !== queryModel && !(storedIsNeural && queryIsNeural)) continue;
335
-
336
- const embedding = JSON.parse(entry.embedding);
337
- if (!Array.isArray(embedding) || embedding.length !== EMBEDDING_DIMS) continue;
338
-
339
- const similarity = cosineSimilarity(queryEmbedding, embedding);
340
-
341
- if (similarity >= threshold) {
342
- let metadata = {};
343
- try {
344
- metadata = JSON.parse(entry.metadata || '{}');
345
- } catch {}
346
-
347
- results.push({
348
- key: entry.key,
349
- namespace: entry.namespace,
350
- score: similarity,
351
- preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
352
- type: metadata.type || 'unknown',
353
- parentDoc: metadata.parentDoc || null,
354
- chunkTitle: metadata.chunkTitle || null,
355
- });
356
- }
357
- } catch {
358
- // Skip entries with invalid embeddings
359
- }
360
- }
361
- stmt.free();
362
-
363
- db.close();
364
-
365
- // Sort by similarity (descending) and limit
366
- results.sort((a, b) => b.score - a.score);
367
- const topResults = results.slice(0, limit);
368
-
369
- const searchTime = performance.now() - startTime;
370
-
371
- return {
372
- query: queryText,
373
- results: topResults,
374
- totalMatches: results.length,
375
- searchTime: `${searchTime.toFixed(0)}ms`,
376
- indexType: 'vector-cosine',
377
- model: queryModel,
378
- };
379
- }
380
-
381
- // ============================================================================
382
- // Main
383
- // ============================================================================
384
-
385
- async function main() {
386
- if (!json) {
387
- console.log('');
388
- console.log(`[semantic-search] Query: "${query}"`);
389
- }
390
-
391
- try {
392
- const results = await semanticSearch(query, { limit, namespace, threshold });
393
-
394
- if (json) {
395
- console.log(JSON.stringify(results, null, 2));
396
- return;
397
- }
398
-
399
- console.log(`[semantic-search] Found ${results.totalMatches} matches (${results.searchTime}) [${results.model}]`);
400
- console.log('');
401
-
402
- if (results.results.length === 0) {
403
- console.log('No results found above threshold. Try lowering --threshold or broadening your query.');
404
- return;
405
- }
406
-
407
- // Display results
408
- console.log('┌─────────────────────────────────────────────────────────────────────────────┐');
409
- console.log('│ Rank │ Score │ Key │ Type │ Preview │');
410
- console.log('├─────────────────────────────────────────────────────────────────────────────┤');
411
-
412
- for (let i = 0; i < results.results.length; i++) {
413
- const r = results.results[i];
414
- const rank = String(i + 1).padStart(4);
415
- const score = r.score.toFixed(3);
416
- const key = r.key.substring(0, 28).padEnd(28);
417
- const type = (r.type || '').substring(0, 6).padEnd(6);
418
- const preview = r.preview.substring(0, 18).padEnd(18);
419
-
420
- console.log(`│ ${rank} ${score} ${key} ${type} │ ${preview}… │`);
421
- }
422
-
423
- console.log('└─────────────────────────────────────────────────────────────────────────────┘');
424
-
425
- // Show chunk context
426
- console.log('');
427
- console.log('Top result details:');
428
- const top = results.results[0];
429
- console.log(` Key: ${top.key}`);
430
- console.log(` Score: ${top.score.toFixed(4)}`);
431
- if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
432
- if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
433
- console.log(` Preview: ${top.preview}...`);
434
-
435
- } catch (err) {
436
- console.error(`[semantic-search] Error: ${err.message}`);
437
- process.exit(1);
438
- }
439
- }
440
-
441
- main();
2
+ /**
3
+ * Semantic search using 384-dim embeddings (Xenova/all-MiniLM-L6-v2 or hash fallback)
4
+ *
5
+ * Query embedding MUST match stored embedding model:
6
+ * 1. Transformers.js with all-MiniLM-L6-v2 (best quality, matches build-embeddings)
7
+ * 2. Domain-aware semantic hash embeddings (fallback when transformers unavailable)
8
+ *
9
+ * Usage:
10
+ * node node_modules/moflo/bin/semantic-search.mjs "your search query"
11
+ * npx flo-search "your search query"
12
+ * npx flo-search "query" --limit 10
13
+ * npx flo-search "query" --namespace guidance
14
+ * npx flo-search "query" --threshold 0.3
15
+ */
16
+
17
+ import { existsSync, readFileSync } from 'fs';
18
+ import { resolve, dirname } from 'path';
19
+ import { mofloResolveURL } from './lib/moflo-resolve.mjs';
20
+ const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
21
+
22
+ function findProjectRoot() {
23
+ let dir = process.cwd();
24
+ const root = resolve(dir, '/');
25
+ while (dir !== root) {
26
+ if (existsSync(resolve(dir, 'package.json'))) return dir;
27
+ dir = dirname(dir);
28
+ }
29
+ return process.cwd();
30
+ }
31
+
32
+ const projectRoot = findProjectRoot();
33
+
34
+ const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
35
+ const EMBEDDING_DIMS = 384;
36
+ const EMBEDDING_MODEL_NEURAL = 'Xenova/all-MiniLM-L6-v2';
37
+ const EMBEDDING_MODEL_HASH = 'domain-aware-hash-v1';
38
+ // 'onnx' is a legacy alias for the Xenova model — treat them as compatible vector spaces
39
+ const NEURAL_ALIASES = new Set([EMBEDDING_MODEL_NEURAL, 'onnx']);
40
+
41
+ // Parse args
42
+ const args = process.argv.slice(2);
43
+ const query = args.find(a => !a.startsWith('--'));
44
+ const limit = args.includes('--limit') ? parseInt(args[args.indexOf('--limit') + 1]) : 5;
45
+ let namespace = args.includes('--namespace') ? args[args.indexOf('--namespace') + 1] : null;
46
+ const withTests = args.includes('--with-tests');
47
+ const threshold = args.includes('--threshold') ? parseFloat(args[args.indexOf('--threshold') + 1]) : 0.3;
48
+ const json = args.includes('--json');
49
+ const debug = args.includes('--debug');
50
+
51
+ // Auto-routing: when query mentions test-related terms, also search tests namespace
52
+ const TEST_KEYWORDS = /\b(test|spec|coverage|assert|mock|stub|fixture|describe|jest|vitest|mocha|e2e|integration test)\b/i;
53
+
54
+ if (!query) {
55
+ console.error('Usage: npx flo-search "your query" [--limit N] [--namespace X] [--threshold N]');
56
+ process.exit(1);
57
+ }
58
+
59
+ // ============================================================================
60
+ // Transformers.js Neural Embeddings (primary — matches build-embeddings.mjs)
61
+ // ============================================================================
62
+
63
+ let pipeline = null;
64
+ let useTransformers = false;
65
+
66
+ async function loadTransformersModel() {
67
+ try {
68
+ const { env, pipeline: createPipeline } = await import(mofloResolveURL('@xenova/transformers'));
69
+ env.allowLocalModels = false;
70
+ env.backends.onnx.wasm.numThreads = 1;
71
+
72
+ pipeline = await createPipeline('feature-extraction', EMBEDDING_MODEL_NEURAL, {
73
+ quantized: false,
74
+ });
75
+
76
+ useTransformers = true;
77
+ if (debug) console.error('[semantic-search] Using Transformers.js neural model');
78
+ return true;
79
+ } catch (err) {
80
+ if (debug) console.error(`[semantic-search] Transformers.js unavailable: ${err.message?.split('\n')[0]}`);
81
+ useTransformers = false;
82
+ return false;
83
+ }
84
+ }
85
+
86
+ async function generateNeuralEmbedding(text) {
87
+ if (!pipeline) return null;
88
+ try {
89
+ const output = await pipeline(text, { pooling: 'mean', normalize: true });
90
+ return Array.from(output.data);
91
+ } catch {
92
+ return null;
93
+ }
94
+ }
95
+
96
+ // ============================================================================
97
+ // Domain-Aware Semantic Hash Embeddings (fallback)
98
+ // ============================================================================
99
+
100
+ const DOMAIN_CLUSTERS = {
101
+ database: ['typeorm', 'mongodb', 'database', 'entity', 'schema', 'table', 'collection',
102
+ 'query', 'sql', 'nosql', 'orm', 'model', 'migration', 'repository', 'column',
103
+ 'relation', 'foreign', 'primary', 'index', 'constraint', 'transaction',
104
+ 'mikroorm', 'mikro', 'postgresql', 'postgres', 'soft', 'delete', 'deletedat'],
105
+ frontend: ['react', 'component', 'ui', 'styling', 'css', 'html', 'jsx', 'tsx', 'frontend',
106
+ 'material', 'mui', 'tailwind', 'dom', 'render', 'hook', 'state', 'props',
107
+ 'redux', 'context', 'styled', 'emotion', 'theme', 'layout', 'responsive',
108
+ 'mantis', 'syncfusion', 'scheduler', 'i18n', 'intl', 'locale'],
109
+ backend: ['fastify', 'api', 'route', 'handler', 'rest', 'endpoint', 'server', 'controller',
110
+ 'middleware', 'request', 'response', 'http', 'express', 'nest', 'graphql',
111
+ 'websocket', 'socket', 'cors', 'auth', 'jwt', 'session', 'cookie',
112
+ 'awilix', 'dependency', 'injection', 'scope'],
113
+ testing: ['test', 'testing', 'vitest', 'jest', 'mock', 'spy', 'assert', 'expect', 'describe',
114
+ 'it', 'spec', 'unit', 'integration', 'e2e', 'playwright', 'cypress', 'coverage',
115
+ 'fixture', 'stub', 'fake', 'snapshot', 'beforeeach', 'aftereach',
116
+ 'anti-pattern', 'antipattern', 'mocking'],
117
+ tenancy: ['tenant', 'tenancy', 'companyid', 'company', 'isolation', 'multi', 'multitenant',
118
+ 'organization', 'workspace', 'account', 'customer', 'client', 'subdomain'],
119
+ security: ['security', 'auth', 'authentication', 'authorization', 'permission', 'role',
120
+ 'access', 'token', 'jwt', 'oauth', 'password', 'encrypt', 'hash', 'salt',
121
+ 'csrf', 'xss', 'injection', 'sanitize', 'validate', 'rbac'],
122
+ patterns: ['pattern', 'service', 'factory', 'singleton', 'decorator', 'adapter', 'facade',
123
+ 'observer', 'strategy', 'command', 'repository', 'usecase', 'domain', 'ddd',
124
+ 'clean', 'architecture', 'solid', 'dry', 'kiss', 'functional', 'pipeasync'],
125
+ workflow: ['workflow', 'pipeline', 'ci', 'cd', 'deploy', 'build', 'actions',
126
+ 'hook', 'trigger', 'job', 'step', 'artifact', 'release', 'version', 'tag'],
127
+ memory: ['memory', 'cache', 'store', 'persist', 'storage', 'redis', 'session', 'state',
128
+ 'buffer', 'queue', 'stack', 'heap', 'gc', 'leak', 'embedding', 'vector', 'hnsw',
129
+ 'semantic', 'search', 'index', 'retrieval'],
130
+ agent: ['agent', 'swarm', 'coordinator', 'orchestrator', 'task', 'worker', 'spawn',
131
+ 'parallel', 'concurrent', 'async', 'promise', 'queue', 'priority', 'schedule'],
132
+ github: ['github', 'issue', 'branch', 'pr', 'pull', 'request', 'merge', 'commit', 'push',
133
+ 'clone', 'fork', 'remote', 'origin', 'main', 'master', 'checkout', 'rebase',
134
+ 'squash', 'repository', 'repo', 'gh', 'git', 'assignee', 'label', 'mandatory',
135
+ 'checklist', 'closes', 'fixes', 'conventional', 'feat', 'refactor'],
136
+ documentation: ['guidance', 'documentation', 'docs', 'readme', 'guide', 'tutorial',
137
+ 'reference', 'standard', 'convention', 'rule', 'policy', 'template',
138
+ 'example', 'usage', 'instruction', 'meta', 'index', 'umbrella', 'claude',
139
+ 'optimized', 'audience', 'structure', 'format', 'markdown']
140
+ };
141
+
142
+ const COMMON_WORDS = new Set([
143
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
144
+ 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
145
+ 'can', 'need', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into',
146
+ 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'and', 'but',
147
+ 'or', 'nor', 'so', 'yet', 'both', 'either', 'neither', 'not', 'only', 'own', 'same', 'than',
148
+ 'too', 'very', 'just', 'also', 'this', 'that', 'these', 'those', 'it', 'its', 'if', 'then',
149
+ 'else', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'any', 'some', 'no', 'yes',
150
+ 'use', 'using', 'used', 'uses', 'get', 'set', 'new', 'see', 'like', 'make', 'made'
151
+ ]);
152
+
153
+ function hash(str, seed = 0) {
154
+ let h = seed ^ str.length;
155
+ for (let i = 0; i < str.length; i++) {
156
+ h ^= str.charCodeAt(i);
157
+ h = Math.imul(h, 0x5bd1e995);
158
+ h ^= h >>> 15;
159
+ }
160
+ return h >>> 0;
161
+ }
162
+
163
+ // Pre-compute domain signatures
164
+ const domainSignatures = {};
165
+ for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
166
+ const sig = new Float32Array(EMBEDDING_DIMS);
167
+ for (const kw of keywords) {
168
+ for (let h = 0; h < 2; h++) {
169
+ const idx = hash(kw + '_dom_' + domain, h) % EMBEDDING_DIMS;
170
+ sig[idx] = 1;
171
+ }
172
+ }
173
+ domainSignatures[domain] = sig;
174
+ }
175
+
176
+ function semanticHashEmbed(text, dims = EMBEDDING_DIMS) {
177
+ const vec = new Float32Array(dims);
178
+ const lowerText = text.toLowerCase();
179
+ const words = lowerText.replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 1);
180
+
181
+ if (words.length === 0) return vec;
182
+
183
+ // Add domain signatures
184
+ for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
185
+ let matchCount = 0;
186
+ for (const kw of keywords) {
187
+ if (lowerText.includes(kw)) matchCount++;
188
+ }
189
+ if (matchCount > 0) {
190
+ const weight = Math.min(2.0, 0.5 + matchCount * 0.3);
191
+ const sig = domainSignatures[domain];
192
+ for (let i = 0; i < dims; i++) {
193
+ vec[i] += sig[i] * weight;
194
+ }
195
+ }
196
+ }
197
+
198
+ // Add word features
199
+ for (const word of words) {
200
+ const isCommon = COMMON_WORDS.has(word);
201
+ const weight = isCommon ? 0.2 : (word.length > 6 ? 0.8 : 0.5);
202
+ for (let h = 0; h < 3; h++) {
203
+ const idx = hash(word, h * 17) % dims;
204
+ const sign = (hash(word, h * 31 + 1) % 2 === 0) ? 1 : -1;
205
+ vec[idx] += sign * weight;
206
+ }
207
+ }
208
+
209
+ // Add bigrams
210
+ for (let i = 0; i < words.length - 1; i++) {
211
+ if (COMMON_WORDS.has(words[i]) && COMMON_WORDS.has(words[i + 1])) continue;
212
+ const bigram = words[i] + '_' + words[i + 1];
213
+ const idx = hash(bigram, 42) % dims;
214
+ const sign = (hash(bigram, 43) % 2 === 0) ? 1 : -1;
215
+ vec[idx] += sign * 0.4;
216
+ }
217
+
218
+ // Add trigrams
219
+ for (let i = 0; i < words.length - 2; i++) {
220
+ const trigram = words[i] + '_' + words[i + 1] + '_' + words[i + 2];
221
+ const idx = hash(trigram, 99) % dims;
222
+ const sign = (hash(trigram, 100) % 2 === 0) ? 1 : -1;
223
+ vec[idx] += sign * 0.3;
224
+ }
225
+
226
+ // L2 normalize
227
+ let norm = 0;
228
+ for (let i = 0; i < dims; i++) norm += vec[i] * vec[i];
229
+ norm = Math.sqrt(norm);
230
+ if (norm > 0) {
231
+ for (let i = 0; i < dims; i++) vec[i] /= norm;
232
+ }
233
+
234
+ return vec;
235
+ }
236
+
237
+ // ============================================================================
238
+ // Unified Embedding Generator (matches stored embeddings)
239
+ // ============================================================================
240
+
241
+ /**
242
+ * Generate query embedding using the SAME model as stored embeddings.
243
+ * Checks what model was used for stored entries and matches it.
244
+ */
245
+ async function generateQueryEmbedding(queryText, db) {
246
+ // Check what model the stored entries use
247
+ let modelCheckSql = `SELECT embedding_model, COUNT(*) as cnt FROM memory_entries
248
+ WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
249
+ ${namespace ? "AND namespace = ?" : ""}
250
+ GROUP BY embedding_model ORDER BY cnt DESC LIMIT 1`;
251
+ const modelStmt = db.prepare(modelCheckSql);
252
+ modelStmt.bind(namespace ? [namespace] : []);
253
+ const modelCheck = modelStmt.step() ? modelStmt.getAsObject() : null;
254
+ modelStmt.free();
255
+
256
+ const storedModel = modelCheck?.embedding_model || EMBEDDING_MODEL_HASH;
257
+
258
+ if (debug) console.error(`[semantic-search] Stored model: ${storedModel}`);
259
+
260
+ // If stored embeddings are neural, try to use neural for query too
261
+ // Accept both canonical name and legacy 'onnx' tag (both use the same Xenova pipeline)
262
+ if (storedModel === EMBEDDING_MODEL_NEURAL || storedModel === 'onnx') {
263
+ await loadTransformersModel();
264
+ if (useTransformers) {
265
+ const neuralEmb = await generateNeuralEmbedding(queryText);
266
+ if (neuralEmb && neuralEmb.length === EMBEDDING_DIMS) {
267
+ return { embedding: neuralEmb, model: EMBEDDING_MODEL_NEURAL };
268
+ }
269
+ }
270
+ // Neural failed — warn about model mismatch
271
+ if (!json) {
272
+ console.error('[semantic-search] WARNING: Stored embeddings use neural model but Transformers.js unavailable.');
273
+ console.error('[semantic-search] Results may be poor. Run: npx flo-embeddings --force');
274
+ }
275
+ }
276
+
277
+ // Use hash embeddings (either matching stored hash model, or as fallback)
278
+ const hashEmb = Array.from(semanticHashEmbed(queryText));
279
+ return { embedding: hashEmb, model: EMBEDDING_MODEL_HASH };
280
+ }
281
+
282
+ // ============================================================================
283
+ // Search Functions
284
+ // ============================================================================
285
+
286
+ function cosineSimilarity(a, b) {
287
+ if (!a || !b || a.length !== b.length) return 0;
288
+ let dot = 0;
289
+ for (let i = 0; i < a.length; i++) {
290
+ dot += a[i] * b[i];
291
+ }
292
+ return dot; // Already L2 normalized
293
+ }
294
+
295
+ async function getDb() {
296
+ if (!existsSync(DB_PATH)) {
297
+ throw new Error(`Database not found: ${DB_PATH}`);
298
+ }
299
+ const SQL = await initSqlJs();
300
+ const buffer = readFileSync(DB_PATH);
301
+ return new SQL.Database(buffer);
302
+ }
303
+
304
+ async function semanticSearch(queryText, options = {}) {
305
+ const { limit = 5, namespace = null, threshold = 0.3 } = options;
306
+ const startTime = performance.now();
307
+
308
+ const db = await getDb();
309
+
310
+ // Generate query embedding matching the stored model
311
+ const { embedding: queryEmbedding, model: queryModel } = await generateQueryEmbedding(queryText, db);
312
+
313
+ if (debug) console.error(`[semantic-search] Query model: ${queryModel}`);
314
+
315
+ // Get all entries with embeddings
316
+ let sql = `
317
+ SELECT id, key, namespace, content, embedding, embedding_model, metadata
318
+ FROM memory_entries
319
+ WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
320
+ `;
321
+ const params = [];
322
+
323
+ if (namespace) {
324
+ sql += ` AND namespace = ?`;
325
+ params.push(namespace);
326
+ }
327
+
328
+ const stmt = db.prepare(sql);
329
+ stmt.bind(params);
330
+
331
+ // Calculate similarity scores
332
+ const results = [];
333
+ while (stmt.step()) {
334
+ const entry = stmt.getAsObject();
335
+ try {
336
+ const storedIsNeural = NEURAL_ALIASES.has(entry.embedding_model);
337
+ const queryIsNeural = NEURAL_ALIASES.has(queryModel);
338
+ if (entry.embedding_model && entry.embedding_model !== queryModel && !(storedIsNeural && queryIsNeural)) continue;
339
+
340
+ const embedding = JSON.parse(entry.embedding);
341
+ if (!Array.isArray(embedding) || embedding.length !== EMBEDDING_DIMS) continue;
342
+
343
+ const similarity = cosineSimilarity(queryEmbedding, embedding);
344
+
345
+ if (similarity >= threshold) {
346
+ let metadata = {};
347
+ try {
348
+ metadata = JSON.parse(entry.metadata || '{}');
349
+ } catch {}
350
+
351
+ results.push({
352
+ key: entry.key,
353
+ namespace: entry.namespace,
354
+ score: similarity,
355
+ preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
356
+ type: metadata.type || 'unknown',
357
+ parentDoc: metadata.parentDoc || null,
358
+ chunkTitle: metadata.chunkTitle || null,
359
+ });
360
+ }
361
+ } catch {
362
+ // Skip entries with invalid embeddings
363
+ }
364
+ }
365
+ stmt.free();
366
+
367
+ db.close();
368
+
369
+ // Sort by similarity (descending) and limit
370
+ results.sort((a, b) => b.score - a.score);
371
+ const topResults = results.slice(0, limit);
372
+
373
+ const searchTime = performance.now() - startTime;
374
+
375
+ return {
376
+ query: queryText,
377
+ results: topResults,
378
+ totalMatches: results.length,
379
+ searchTime: `${searchTime.toFixed(0)}ms`,
380
+ indexType: 'vector-cosine',
381
+ model: queryModel,
382
+ };
383
+ }
384
+
385
+ // ============================================================================
386
+ // Main
387
+ // ============================================================================
388
+
389
+ async function main() {
390
+ if (!json) {
391
+ console.log('');
392
+ console.log(`[semantic-search] Query: "${query}"`);
393
+ }
394
+
395
+ try {
396
+ // --with-tests: search both the specified namespace (or code-map) and tests
397
+ // Auto-route: if query contains test keywords and no namespace specified, also search tests
398
+ const autoRouteTests = !namespace && TEST_KEYWORDS.test(query);
399
+ let results;
400
+
401
+ if (withTests || autoRouteTests) {
402
+ const primaryNs = namespace || 'code-map';
403
+ const primaryResults = await semanticSearch(query, { limit, namespace: primaryNs, threshold });
404
+ const testResults = await semanticSearch(query, { limit, namespace: 'tests', threshold });
405
+
406
+ // Merge and re-sort by score
407
+ const merged = [...primaryResults.results, ...testResults.results]
408
+ .sort((a, b) => b.score - a.score)
409
+ .slice(0, limit);
410
+
411
+ results = {
412
+ ...primaryResults,
413
+ results: merged,
414
+ totalMatches: primaryResults.totalMatches + testResults.totalMatches,
415
+ searchTime: `${parseInt(primaryResults.searchTime) + parseInt(testResults.searchTime)}ms`,
416
+ namespaces: [primaryNs, 'tests'],
417
+ };
418
+
419
+ if (!json && autoRouteTests) {
420
+ console.log(`[semantic-search] Auto-routed to tests namespace (query contains test keywords)`);
421
+ }
422
+ } else {
423
+ results = await semanticSearch(query, { limit, namespace, threshold });
424
+ }
425
+
426
+ if (json) {
427
+ console.log(JSON.stringify(results, null, 2));
428
+ return;
429
+ }
430
+
431
+ console.log(`[semantic-search] Found ${results.totalMatches} matches (${results.searchTime}) [${results.model}]`);
432
+ console.log('');
433
+
434
+ if (results.results.length === 0) {
435
+ console.log('No results found above threshold. Try lowering --threshold or broadening your query.');
436
+ return;
437
+ }
438
+
439
+ // Display results
440
+ console.log('┌─────────────────────────────────────────────────────────────────────────────┐');
441
+ console.log('│ Rank │ Score │ Key │ Type │ Preview │');
442
+ console.log('├─────────────────────────────────────────────────────────────────────────────┤');
443
+
444
+ for (let i = 0; i < results.results.length; i++) {
445
+ const r = results.results[i];
446
+ const rank = String(i + 1).padStart(4);
447
+ const score = r.score.toFixed(3);
448
+ const key = r.key.substring(0, 28).padEnd(28);
449
+ const type = (r.type || '').substring(0, 6).padEnd(6);
450
+ const preview = r.preview.substring(0, 18).padEnd(18);
451
+
452
+ console.log(`│ ${rank} │ ${score} │ ${key} │ ${type} │ ${preview}… │`);
453
+ }
454
+
455
+ console.log('└─────────────────────────────────────────────────────────────────────────────┘');
456
+
457
+ // Show chunk context
458
+ console.log('');
459
+ console.log('Top result details:');
460
+ const top = results.results[0];
461
+ console.log(` Key: ${top.key}`);
462
+ console.log(` Score: ${top.score.toFixed(4)}`);
463
+ if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
464
+ if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
465
+ console.log(` Preview: ${top.preview}...`);
466
+
467
+ } catch (err) {
468
+ console.error(`[semantic-search] Error: ${err.message}`);
469
+ process.exit(1);
470
+ }
471
+ }
472
+
473
+ main();