moflo 4.8.26 → 4.8.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. package/.claude/agents/browser/browser-agent.yaml +182 -182
  2. package/.claude/agents/core/coder.md +265 -265
  3. package/.claude/agents/core/planner.md +167 -167
  4. package/.claude/agents/core/researcher.md +189 -189
  5. package/.claude/agents/core/reviewer.md +325 -325
  6. package/.claude/agents/core/tester.md +318 -318
  7. package/.claude/agents/database-specialist.yaml +21 -21
  8. package/.claude/agents/dual-mode/codex-coordinator.md +224 -224
  9. package/.claude/agents/dual-mode/codex-worker.md +211 -211
  10. package/.claude/agents/dual-mode/dual-orchestrator.md +291 -291
  11. package/.claude/agents/flow-nexus/app-store.md +88 -0
  12. package/.claude/agents/flow-nexus/authentication.md +69 -0
  13. package/.claude/agents/flow-nexus/challenges.md +81 -0
  14. package/.claude/agents/flow-nexus/neural-network.md +88 -0
  15. package/.claude/agents/flow-nexus/payments.md +83 -0
  16. package/.claude/agents/flow-nexus/sandbox.md +76 -0
  17. package/.claude/agents/flow-nexus/swarm.md +76 -0
  18. package/.claude/agents/flow-nexus/user-tools.md +96 -0
  19. package/.claude/agents/flow-nexus/workflow.md +84 -0
  20. package/.claude/agents/github/code-review-swarm.md +537 -537
  21. package/.claude/agents/github/github-modes.md +172 -172
  22. package/.claude/agents/github/issue-tracker.md +318 -318
  23. package/.claude/agents/github/multi-repo-swarm.md +552 -552
  24. package/.claude/agents/github/pr-manager.md +190 -190
  25. package/.claude/agents/github/project-board-sync.md +508 -508
  26. package/.claude/agents/github/release-manager.md +366 -366
  27. package/.claude/agents/github/release-swarm.md +582 -582
  28. package/.claude/agents/github/repo-architect.md +397 -397
  29. package/.claude/agents/github/swarm-issue.md +572 -572
  30. package/.claude/agents/github/swarm-pr.md +427 -427
  31. package/.claude/agents/github/sync-coordinator.md +451 -451
  32. package/.claude/agents/github/workflow-automation.md +634 -634
  33. package/.claude/agents/goal/code-goal-planner.md +445 -445
  34. package/.claude/agents/hive-mind/collective-intelligence-coordinator.md +129 -129
  35. package/.claude/agents/hive-mind/queen-coordinator.md +202 -202
  36. package/.claude/agents/hive-mind/scout-explorer.md +241 -241
  37. package/.claude/agents/hive-mind/swarm-memory-manager.md +192 -192
  38. package/.claude/agents/hive-mind/worker-specialist.md +216 -216
  39. package/.claude/agents/index.yaml +17 -17
  40. package/.claude/agents/neural/safla-neural.md +73 -73
  41. package/.claude/agents/payments/agentic-payments.md +126 -0
  42. package/.claude/agents/project-coordinator.yaml +15 -15
  43. package/.claude/agents/python-specialist.yaml +21 -21
  44. package/.claude/agents/reasoning/goal-planner.md +72 -72
  45. package/.claude/agents/security-auditor.yaml +20 -20
  46. package/.claude/agents/sona/sona-learning-optimizer.md +74 -0
  47. package/.claude/agents/sublinear/consensus-coordinator.md +338 -0
  48. package/.claude/agents/sublinear/matrix-optimizer.md +185 -0
  49. package/.claude/agents/sublinear/pagerank-analyzer.md +299 -0
  50. package/.claude/agents/sublinear/performance-optimizer.md +368 -0
  51. package/.claude/agents/sublinear/trading-predictor.md +246 -0
  52. package/.claude/agents/swarm/adaptive-coordinator.md +395 -395
  53. package/.claude/agents/swarm/hierarchical-coordinator.md +326 -326
  54. package/.claude/agents/swarm/mesh-coordinator.md +391 -391
  55. package/.claude/agents/templates/migration-plan.md +745 -745
  56. package/.claude/agents/typescript-specialist.yaml +21 -21
  57. package/.claude/agents/v3/adr-architect.md +184 -0
  58. package/.claude/agents/v3/aidefence-guardian.md +282 -0
  59. package/.claude/agents/v3/claims-authorizer.md +208 -0
  60. package/.claude/agents/v3/collective-intelligence-coordinator.md +993 -0
  61. package/.claude/agents/v3/ddd-domain-expert.md +220 -0
  62. package/.claude/agents/v3/injection-analyst.md +236 -0
  63. package/.claude/agents/v3/memory-specialist.md +995 -0
  64. package/.claude/agents/v3/performance-engineer.md +1233 -0
  65. package/.claude/agents/v3/pii-detector.md +151 -0
  66. package/.claude/agents/v3/reasoningbank-learner.md +213 -0
  67. package/.claude/agents/v3/security-architect-aidefence.md +410 -0
  68. package/.claude/agents/v3/security-architect.md +867 -0
  69. package/.claude/agents/v3/security-auditor.md +771 -0
  70. package/.claude/agents/v3/sparc-orchestrator.md +182 -0
  71. package/.claude/agents/v3/swarm-memory-manager.md +157 -0
  72. package/.claude/agents/v3/v3-integration-architect.md +205 -0
  73. package/.claude/checkpoints/1767754460.json +8 -8
  74. package/.claude/commands/agents/agent-spawning.md +28 -28
  75. package/.claude/commands/analysis/COMMAND_COMPLIANCE_REPORT.md +54 -0
  76. package/.claude/commands/analysis/README.md +9 -0
  77. package/.claude/commands/analysis/bottleneck-detect.md +162 -0
  78. package/.claude/commands/analysis/performance-bottlenecks.md +59 -0
  79. package/.claude/commands/analysis/performance-report.md +25 -0
  80. package/.claude/commands/analysis/token-efficiency.md +45 -0
  81. package/.claude/commands/analysis/token-usage.md +25 -0
  82. package/.claude/commands/automation/README.md +9 -0
  83. package/.claude/commands/automation/auto-agent.md +122 -0
  84. package/.claude/commands/automation/self-healing.md +106 -0
  85. package/.claude/commands/automation/session-memory.md +90 -0
  86. package/.claude/commands/automation/smart-agents.md +73 -0
  87. package/.claude/commands/automation/smart-spawn.md +25 -0
  88. package/.claude/commands/automation/workflow-select.md +25 -0
  89. package/.claude/commands/github/github-modes.md +146 -146
  90. package/.claude/commands/github/github-swarm.md +121 -121
  91. package/.claude/commands/github/issue-tracker.md +291 -291
  92. package/.claude/commands/github/pr-manager.md +169 -169
  93. package/.claude/commands/github/release-manager.md +337 -337
  94. package/.claude/commands/github/repo-architect.md +366 -366
  95. package/.claude/commands/github/sync-coordinator.md +300 -300
  96. package/.claude/commands/memory/neural.md +47 -47
  97. package/.claude/commands/monitoring/README.md +9 -0
  98. package/.claude/commands/monitoring/agent-metrics.md +25 -0
  99. package/.claude/commands/monitoring/agents.md +44 -0
  100. package/.claude/commands/monitoring/real-time-view.md +25 -0
  101. package/.claude/commands/monitoring/status.md +46 -0
  102. package/.claude/commands/monitoring/swarm-monitor.md +25 -0
  103. package/.claude/commands/optimization/README.md +9 -0
  104. package/.claude/commands/optimization/auto-topology.md +62 -0
  105. package/.claude/commands/optimization/cache-manage.md +25 -0
  106. package/.claude/commands/optimization/parallel-execute.md +25 -0
  107. package/.claude/commands/optimization/parallel-execution.md +50 -0
  108. package/.claude/commands/optimization/topology-optimize.md +25 -0
  109. package/.claude/commands/sparc/analyzer.md +51 -51
  110. package/.claude/commands/sparc/architect.md +53 -53
  111. package/.claude/commands/sparc/ask.md +97 -97
  112. package/.claude/commands/sparc/batch-executor.md +54 -54
  113. package/.claude/commands/sparc/code.md +89 -89
  114. package/.claude/commands/sparc/coder.md +54 -54
  115. package/.claude/commands/sparc/debug.md +83 -83
  116. package/.claude/commands/sparc/debugger.md +54 -54
  117. package/.claude/commands/sparc/designer.md +53 -53
  118. package/.claude/commands/sparc/devops.md +109 -109
  119. package/.claude/commands/sparc/docs-writer.md +80 -80
  120. package/.claude/commands/sparc/documenter.md +54 -54
  121. package/.claude/commands/sparc/innovator.md +54 -54
  122. package/.claude/commands/sparc/integration.md +83 -83
  123. package/.claude/commands/sparc/mcp.md +117 -117
  124. package/.claude/commands/sparc/memory-manager.md +54 -54
  125. package/.claude/commands/sparc/optimizer.md +54 -54
  126. package/.claude/commands/sparc/orchestrator.md +131 -131
  127. package/.claude/commands/sparc/post-deployment-monitoring-mode.md +83 -83
  128. package/.claude/commands/sparc/refinement-optimization-mode.md +83 -83
  129. package/.claude/commands/sparc/researcher.md +54 -54
  130. package/.claude/commands/sparc/reviewer.md +54 -54
  131. package/.claude/commands/sparc/security-review.md +80 -80
  132. package/.claude/commands/sparc/sparc-modes.md +174 -174
  133. package/.claude/commands/sparc/sparc.md +111 -111
  134. package/.claude/commands/sparc/spec-pseudocode.md +80 -80
  135. package/.claude/commands/sparc/supabase-admin.md +348 -348
  136. package/.claude/commands/sparc/swarm-coordinator.md +54 -54
  137. package/.claude/commands/sparc/tdd.md +54 -54
  138. package/.claude/commands/sparc/tester.md +54 -54
  139. package/.claude/commands/sparc/tutorial.md +79 -79
  140. package/.claude/commands/sparc/workflow-manager.md +54 -54
  141. package/.claude/commands/sparc.md +166 -166
  142. package/.claude/commands/swarm/analysis.md +95 -95
  143. package/.claude/commands/swarm/development.md +96 -96
  144. package/.claude/commands/swarm/examples.md +168 -168
  145. package/.claude/commands/swarm/maintenance.md +102 -102
  146. package/.claude/commands/swarm/optimization.md +117 -117
  147. package/.claude/commands/swarm/research.md +136 -136
  148. package/.claude/commands/swarm/testing.md +131 -131
  149. package/.claude/commands/workflows/development.md +77 -77
  150. package/.claude/commands/workflows/research.md +62 -62
  151. package/.claude/guidance/moflo-bootstrap.md +126 -126
  152. package/.claude/guidance/shipped/agent-bootstrap.md +148 -143
  153. package/.claude/guidance/shipped/guidance-memory-strategy.md +262 -262
  154. package/.claude/guidance/shipped/memory-strategy.md +204 -204
  155. package/.claude/guidance/shipped/moflo.md +668 -675
  156. package/.claude/guidance/shipped/task-icons.md +42 -0
  157. package/.claude/guidance/shipped/task-swarm-integration.md +441 -441
  158. package/.claude/helpers/gate-hook.mjs +50 -0
  159. package/.claude/helpers/gate.cjs +138 -0
  160. package/.claude/helpers/hook-handler.cjs +76 -0
  161. package/.claude/helpers/intelligence.cjs +207 -207
  162. package/.claude/helpers/prompt-hook.mjs +72 -0
  163. package/.claude/helpers/statusline.cjs +851 -851
  164. package/.claude/scripts/build-embeddings.mjs +549 -0
  165. package/.claude/scripts/generate-code-map.mjs +776 -0
  166. package/.claude/scripts/hooks.mjs +656 -0
  167. package/.claude/scripts/index-guidance.mjs +893 -0
  168. package/.claude/scripts/index-tests.mjs +710 -0
  169. package/.claude/scripts/semantic-search.mjs +473 -0
  170. package/.claude/scripts/session-start-launcher.mjs +238 -0
  171. package/.claude/settings.local.json +18 -0
  172. package/.claude/skills/fl/SKILL.md +583 -583
  173. package/.claude/skills/flo/SKILL.md +583 -583
  174. package/.claude/skills/github-code-review/SKILL.md +1140 -1140
  175. package/.claude/skills/github-multi-repo/SKILL.md +874 -874
  176. package/.claude/skills/github-project-management/SKILL.md +1277 -1277
  177. package/.claude/skills/github-release-management/SKILL.md +1081 -1081
  178. package/.claude/skills/github-workflow-automation/SKILL.md +1065 -1065
  179. package/.claude/skills/hive-mind-advanced/SKILL.md +712 -712
  180. package/.claude/skills/hooks-automation/SKILL.md +1201 -1201
  181. package/.claude/skills/pair-programming/SKILL.md +1202 -0
  182. package/.claude/skills/performance-analysis/SKILL.md +563 -563
  183. package/.claude/skills/sparc-methodology/SKILL.md +1115 -1115
  184. package/.claude/skills/stream-chain/SKILL.md +563 -0
  185. package/.claude/skills/swarm-advanced/SKILL.md +973 -973
  186. package/.claude/skills/v3-cli-modernization/SKILL.md +872 -0
  187. package/.claude/skills/v3-core-implementation/SKILL.md +797 -0
  188. package/.claude/skills/v3-ddd-architecture/SKILL.md +442 -0
  189. package/.claude/skills/v3-integration-deep/SKILL.md +241 -0
  190. package/.claude/skills/v3-mcp-optimization/SKILL.md +777 -0
  191. package/.claude/skills/v3-memory-unification/SKILL.md +174 -0
  192. package/.claude/skills/v3-performance-optimization/SKILL.md +390 -0
  193. package/.claude/skills/v3-security-overhaul/SKILL.md +82 -0
  194. package/.claude/skills/v3-swarm-coordination/SKILL.md +340 -0
  195. package/.claude/workflow-state.json +5 -5
  196. package/LICENSE +21 -21
  197. package/README.md +698 -685
  198. package/bin/cli.js +0 -0
  199. package/bin/gate-hook.mjs +50 -50
  200. package/bin/gate.cjs +138 -138
  201. package/bin/generate-code-map.mjs +956 -938
  202. package/bin/hook-handler.cjs +83 -83
  203. package/bin/hooks.mjs +696 -696
  204. package/bin/index-guidance.mjs +906 -893
  205. package/bin/index-tests.mjs +729 -710
  206. package/bin/lib/process-manager.mjs +256 -256
  207. package/bin/lib/registry-cleanup.cjs +41 -41
  208. package/bin/prompt-hook.mjs +72 -72
  209. package/bin/semantic-search.mjs +472 -472
  210. package/bin/session-start-launcher.mjs +238 -238
  211. package/bin/setup-project.mjs +253 -251
  212. package/package.json +123 -123
  213. package/src/@claude-flow/cli/README.md +452 -452
  214. package/src/@claude-flow/cli/bin/cli.js +180 -180
  215. package/src/@claude-flow/cli/bin/preinstall.cjs +2 -2
  216. package/src/@claude-flow/cli/dist/src/commands/completions.js +409 -409
  217. package/src/@claude-flow/cli/dist/src/commands/doctor.js +1107 -1091
  218. package/src/@claude-flow/cli/dist/src/commands/embeddings.js +25 -25
  219. package/src/@claude-flow/cli/dist/src/commands/github.js +61 -61
  220. package/src/@claude-flow/cli/dist/src/commands/hive-mind.js +90 -90
  221. package/src/@claude-flow/cli/dist/src/commands/hooks.js +9 -9
  222. package/src/@claude-flow/cli/dist/src/commands/init.js +3 -8
  223. package/src/@claude-flow/cli/dist/src/commands/ruvector/import.js +14 -14
  224. package/src/@claude-flow/cli/dist/src/commands/ruvector/setup.js +624 -624
  225. package/src/@claude-flow/cli/dist/src/config/moflo-config.d.ts +3 -0
  226. package/src/@claude-flow/cli/dist/src/config/moflo-config.js +101 -91
  227. package/src/@claude-flow/cli/dist/src/index.d.ts +5 -0
  228. package/src/@claude-flow/cli/dist/src/index.js +44 -0
  229. package/src/@claude-flow/cli/dist/src/init/claudemd-generator.d.ts +29 -29
  230. package/src/@claude-flow/cli/dist/src/init/claudemd-generator.js +89 -87
  231. package/src/@claude-flow/cli/dist/src/init/executor.js +453 -453
  232. package/src/@claude-flow/cli/dist/src/init/helpers-generator.js +482 -482
  233. package/src/@claude-flow/cli/dist/src/init/moflo-init.d.ts +30 -30
  234. package/src/@claude-flow/cli/dist/src/init/moflo-init.js +904 -848
  235. package/src/@claude-flow/cli/dist/src/init/statusline-generator.js +876 -876
  236. package/src/@claude-flow/cli/dist/src/mcp-tools/hooks-tools.js +5 -11
  237. package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +371 -371
  238. package/src/@claude-flow/cli/dist/src/runtime/headless.js +28 -28
  239. package/src/@claude-flow/cli/dist/src/services/container-worker-pool.d.ts +197 -0
  240. package/src/@claude-flow/cli/dist/src/services/container-worker-pool.js +584 -0
  241. package/src/@claude-flow/cli/dist/src/services/daemon-lock.d.ts +14 -0
  242. package/src/@claude-flow/cli/dist/src/services/daemon-lock.js +1 -1
  243. package/src/@claude-flow/cli/dist/src/services/headless-worker-executor.js +84 -84
  244. package/src/@claude-flow/cli/package.json +106 -106
  245. package/src/@claude-flow/guidance/README.md +1195 -1195
  246. package/src/@claude-flow/guidance/package.json +198 -198
  247. package/src/@claude-flow/memory/README.md +587 -587
  248. package/src/@claude-flow/memory/dist/agentdb-backend.js +26 -26
  249. package/src/@claude-flow/memory/dist/auto-memory-bridge.test.js +27 -27
  250. package/src/@claude-flow/memory/dist/hybrid-backend.d.ts +245 -0
  251. package/src/@claude-flow/memory/dist/hybrid-backend.js +569 -0
  252. package/src/@claude-flow/memory/dist/hybrid-backend.test.d.ts +8 -0
  253. package/src/@claude-flow/memory/dist/hybrid-backend.test.js +320 -0
  254. package/src/@claude-flow/memory/dist/sqlite-backend.d.ts +121 -0
  255. package/src/@claude-flow/memory/dist/sqlite-backend.js +572 -0
  256. package/src/@claude-flow/memory/dist/sqljs-backend.js +26 -26
  257. package/src/@claude-flow/memory/package.json +44 -44
  258. package/src/@claude-flow/shared/README.md +323 -323
  259. package/src/@claude-flow/shared/dist/events/event-store.js +31 -31
  260. package/src/README.md +493 -493
@@ -1,473 +1,473 @@
1
1
  #!/usr/bin/env node
2
- /**
3
- * Semantic search using 384-dim embeddings (Xenova/all-MiniLM-L6-v2 or hash fallback)
4
- *
5
- * Query embedding MUST match stored embedding model:
6
- * 1. Transformers.js with all-MiniLM-L6-v2 (best quality, matches build-embeddings)
7
- * 2. Domain-aware semantic hash embeddings (fallback when transformers unavailable)
8
- *
9
- * Usage:
10
- * node node_modules/moflo/bin/semantic-search.mjs "your search query"
11
- * npx flo-search "your search query"
12
- * npx flo-search "query" --limit 10
13
- * npx flo-search "query" --namespace guidance
14
- * npx flo-search "query" --threshold 0.3
15
- */
16
-
17
- import { existsSync, readFileSync } from 'fs';
18
- import { resolve, dirname } from 'path';
19
- import { mofloResolveURL } from './lib/moflo-resolve.mjs';
20
- const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
21
-
22
- function findProjectRoot() {
23
- let dir = process.cwd();
24
- const root = resolve(dir, '/');
25
- while (dir !== root) {
26
- if (existsSync(resolve(dir, 'package.json'))) return dir;
27
- dir = dirname(dir);
28
- }
29
- return process.cwd();
30
- }
31
-
32
- const projectRoot = findProjectRoot();
33
-
34
- const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
35
- const EMBEDDING_DIMS = 384;
36
- const EMBEDDING_MODEL_NEURAL = 'Xenova/all-MiniLM-L6-v2';
37
- const EMBEDDING_MODEL_HASH = 'domain-aware-hash-v1';
38
- // 'onnx' is a legacy alias for the Xenova model — treat them as compatible vector spaces
39
- const NEURAL_ALIASES = new Set([EMBEDDING_MODEL_NEURAL, 'onnx']);
40
-
41
- // Parse args
42
- const args = process.argv.slice(2);
43
- const query = args.find(a => !a.startsWith('--'));
44
- const limit = args.includes('--limit') ? parseInt(args[args.indexOf('--limit') + 1]) : 5;
45
- let namespace = args.includes('--namespace') ? args[args.indexOf('--namespace') + 1] : null;
46
- const withTests = args.includes('--with-tests');
47
- const threshold = args.includes('--threshold') ? parseFloat(args[args.indexOf('--threshold') + 1]) : 0.3;
48
- const json = args.includes('--json');
49
- const debug = args.includes('--debug');
50
-
51
- // Auto-routing: when query mentions test-related terms, also search tests namespace
52
- const TEST_KEYWORDS = /\b(test|spec|coverage|assert|mock|stub|fixture|describe|jest|vitest|mocha|e2e|integration test)\b/i;
53
-
54
- if (!query) {
55
- console.error('Usage: npx flo-search "your query" [--limit N] [--namespace X] [--threshold N]');
56
- process.exit(1);
57
- }
58
-
59
- // ============================================================================
60
- // Transformers.js Neural Embeddings (primary — matches build-embeddings.mjs)
61
- // ============================================================================
62
-
63
- let pipeline = null;
64
- let useTransformers = false;
65
-
66
- async function loadTransformersModel() {
67
- try {
68
- const { env, pipeline: createPipeline } = await import(mofloResolveURL('@xenova/transformers'));
69
- env.allowLocalModels = false;
70
- env.backends.onnx.wasm.numThreads = 1;
71
-
72
- pipeline = await createPipeline('feature-extraction', EMBEDDING_MODEL_NEURAL, {
73
- quantized: false,
74
- });
75
-
76
- useTransformers = true;
77
- if (debug) console.error('[semantic-search] Using Transformers.js neural model');
78
- return true;
79
- } catch (err) {
80
- if (debug) console.error(`[semantic-search] Transformers.js unavailable: ${err.message?.split('\n')[0]}`);
81
- useTransformers = false;
82
- return false;
83
- }
84
- }
85
-
86
- async function generateNeuralEmbedding(text) {
87
- if (!pipeline) return null;
88
- try {
89
- const output = await pipeline(text, { pooling: 'mean', normalize: true });
90
- return Array.from(output.data);
91
- } catch {
92
- return null;
93
- }
94
- }
95
-
96
- // ============================================================================
97
- // Domain-Aware Semantic Hash Embeddings (fallback)
98
- // ============================================================================
99
-
100
- const DOMAIN_CLUSTERS = {
101
- database: ['typeorm', 'mongodb', 'database', 'entity', 'schema', 'table', 'collection',
102
- 'query', 'sql', 'nosql', 'orm', 'model', 'migration', 'repository', 'column',
103
- 'relation', 'foreign', 'primary', 'index', 'constraint', 'transaction',
104
- 'mikroorm', 'mikro', 'postgresql', 'postgres', 'soft', 'delete', 'deletedat'],
105
- frontend: ['react', 'component', 'ui', 'styling', 'css', 'html', 'jsx', 'tsx', 'frontend',
106
- 'material', 'mui', 'tailwind', 'dom', 'render', 'hook', 'state', 'props',
107
- 'redux', 'context', 'styled', 'emotion', 'theme', 'layout', 'responsive',
108
- 'mantis', 'syncfusion', 'scheduler', 'i18n', 'intl', 'locale'],
109
- backend: ['fastify', 'api', 'route', 'handler', 'rest', 'endpoint', 'server', 'controller',
110
- 'middleware', 'request', 'response', 'http', 'express', 'nest', 'graphql',
111
- 'websocket', 'socket', 'cors', 'auth', 'jwt', 'session', 'cookie',
112
- 'awilix', 'dependency', 'injection', 'scope'],
113
- testing: ['test', 'testing', 'vitest', 'jest', 'mock', 'spy', 'assert', 'expect', 'describe',
114
- 'it', 'spec', 'unit', 'integration', 'e2e', 'playwright', 'cypress', 'coverage',
115
- 'fixture', 'stub', 'fake', 'snapshot', 'beforeeach', 'aftereach',
116
- 'anti-pattern', 'antipattern', 'mocking'],
117
- tenancy: ['tenant', 'tenancy', 'companyid', 'company', 'isolation', 'multi', 'multitenant',
118
- 'organization', 'workspace', 'account', 'customer', 'client', 'subdomain'],
119
- security: ['security', 'auth', 'authentication', 'authorization', 'permission', 'role',
120
- 'access', 'token', 'jwt', 'oauth', 'password', 'encrypt', 'hash', 'salt',
121
- 'csrf', 'xss', 'injection', 'sanitize', 'validate', 'rbac'],
122
- patterns: ['pattern', 'service', 'factory', 'singleton', 'decorator', 'adapter', 'facade',
123
- 'observer', 'strategy', 'command', 'repository', 'usecase', 'domain', 'ddd',
124
- 'clean', 'architecture', 'solid', 'dry', 'kiss', 'functional', 'pipeasync'],
125
- workflow: ['workflow', 'pipeline', 'ci', 'cd', 'deploy', 'build', 'actions',
126
- 'hook', 'trigger', 'job', 'step', 'artifact', 'release', 'version', 'tag'],
127
- memory: ['memory', 'cache', 'store', 'persist', 'storage', 'redis', 'session', 'state',
128
- 'buffer', 'queue', 'stack', 'heap', 'gc', 'leak', 'embedding', 'vector', 'hnsw',
129
- 'semantic', 'search', 'index', 'retrieval'],
130
- agent: ['agent', 'swarm', 'coordinator', 'orchestrator', 'task', 'worker', 'spawn',
131
- 'parallel', 'concurrent', 'async', 'promise', 'queue', 'priority', 'schedule'],
132
- github: ['github', 'issue', 'branch', 'pr', 'pull', 'request', 'merge', 'commit', 'push',
133
- 'clone', 'fork', 'remote', 'origin', 'main', 'master', 'checkout', 'rebase',
134
- 'squash', 'repository', 'repo', 'gh', 'git', 'assignee', 'label', 'mandatory',
135
- 'checklist', 'closes', 'fixes', 'conventional', 'feat', 'refactor'],
136
- documentation: ['guidance', 'documentation', 'docs', 'readme', 'guide', 'tutorial',
137
- 'reference', 'standard', 'convention', 'rule', 'policy', 'template',
138
- 'example', 'usage', 'instruction', 'meta', 'index', 'umbrella', 'claude',
139
- 'optimized', 'audience', 'structure', 'format', 'markdown']
140
- };
141
-
142
- const COMMON_WORDS = new Set([
143
- 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
144
- 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
145
- 'can', 'need', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into',
146
- 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'and', 'but',
147
- 'or', 'nor', 'so', 'yet', 'both', 'either', 'neither', 'not', 'only', 'own', 'same', 'than',
148
- 'too', 'very', 'just', 'also', 'this', 'that', 'these', 'those', 'it', 'its', 'if', 'then',
149
- 'else', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'any', 'some', 'no', 'yes',
150
- 'use', 'using', 'used', 'uses', 'get', 'set', 'new', 'see', 'like', 'make', 'made'
151
- ]);
152
-
153
- function hash(str, seed = 0) {
154
- let h = seed ^ str.length;
155
- for (let i = 0; i < str.length; i++) {
156
- h ^= str.charCodeAt(i);
157
- h = Math.imul(h, 0x5bd1e995);
158
- h ^= h >>> 15;
159
- }
160
- return h >>> 0;
161
- }
162
-
163
- // Pre-compute domain signatures
164
- const domainSignatures = {};
165
- for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
166
- const sig = new Float32Array(EMBEDDING_DIMS);
167
- for (const kw of keywords) {
168
- for (let h = 0; h < 2; h++) {
169
- const idx = hash(kw + '_dom_' + domain, h) % EMBEDDING_DIMS;
170
- sig[idx] = 1;
171
- }
172
- }
173
- domainSignatures[domain] = sig;
174
- }
175
-
176
- function semanticHashEmbed(text, dims = EMBEDDING_DIMS) {
177
- const vec = new Float32Array(dims);
178
- const lowerText = text.toLowerCase();
179
- const words = lowerText.replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 1);
180
-
181
- if (words.length === 0) return vec;
182
-
183
- // Add domain signatures
184
- for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
185
- let matchCount = 0;
186
- for (const kw of keywords) {
187
- if (lowerText.includes(kw)) matchCount++;
188
- }
189
- if (matchCount > 0) {
190
- const weight = Math.min(2.0, 0.5 + matchCount * 0.3);
191
- const sig = domainSignatures[domain];
192
- for (let i = 0; i < dims; i++) {
193
- vec[i] += sig[i] * weight;
194
- }
195
- }
196
- }
197
-
198
- // Add word features
199
- for (const word of words) {
200
- const isCommon = COMMON_WORDS.has(word);
201
- const weight = isCommon ? 0.2 : (word.length > 6 ? 0.8 : 0.5);
202
- for (let h = 0; h < 3; h++) {
203
- const idx = hash(word, h * 17) % dims;
204
- const sign = (hash(word, h * 31 + 1) % 2 === 0) ? 1 : -1;
205
- vec[idx] += sign * weight;
206
- }
207
- }
208
-
209
- // Add bigrams
210
- for (let i = 0; i < words.length - 1; i++) {
211
- if (COMMON_WORDS.has(words[i]) && COMMON_WORDS.has(words[i + 1])) continue;
212
- const bigram = words[i] + '_' + words[i + 1];
213
- const idx = hash(bigram, 42) % dims;
214
- const sign = (hash(bigram, 43) % 2 === 0) ? 1 : -1;
215
- vec[idx] += sign * 0.4;
216
- }
217
-
218
- // Add trigrams
219
- for (let i = 0; i < words.length - 2; i++) {
220
- const trigram = words[i] + '_' + words[i + 1] + '_' + words[i + 2];
221
- const idx = hash(trigram, 99) % dims;
222
- const sign = (hash(trigram, 100) % 2 === 0) ? 1 : -1;
223
- vec[idx] += sign * 0.3;
224
- }
225
-
226
- // L2 normalize
227
- let norm = 0;
228
- for (let i = 0; i < dims; i++) norm += vec[i] * vec[i];
229
- norm = Math.sqrt(norm);
230
- if (norm > 0) {
231
- for (let i = 0; i < dims; i++) vec[i] /= norm;
232
- }
233
-
234
- return vec;
235
- }
236
-
237
- // ============================================================================
238
- // Unified Embedding Generator (matches stored embeddings)
239
- // ============================================================================
240
-
241
- /**
242
- * Generate query embedding using the SAME model as stored embeddings.
243
- * Checks what model was used for stored entries and matches it.
244
- */
245
- async function generateQueryEmbedding(queryText, db) {
246
- // Check what model the stored entries use
247
- let modelCheckSql = `SELECT embedding_model, COUNT(*) as cnt FROM memory_entries
248
- WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
249
- ${namespace ? "AND namespace = ?" : ""}
250
- GROUP BY embedding_model ORDER BY cnt DESC LIMIT 1`;
251
- const modelStmt = db.prepare(modelCheckSql);
252
- modelStmt.bind(namespace ? [namespace] : []);
253
- const modelCheck = modelStmt.step() ? modelStmt.getAsObject() : null;
254
- modelStmt.free();
255
-
256
- const storedModel = modelCheck?.embedding_model || EMBEDDING_MODEL_HASH;
257
-
258
- if (debug) console.error(`[semantic-search] Stored model: ${storedModel}`);
259
-
260
- // If stored embeddings are neural, try to use neural for query too
261
- // Accept both canonical name and legacy 'onnx' tag (both use the same Xenova pipeline)
262
- if (storedModel === EMBEDDING_MODEL_NEURAL || storedModel === 'onnx') {
263
- await loadTransformersModel();
264
- if (useTransformers) {
265
- const neuralEmb = await generateNeuralEmbedding(queryText);
266
- if (neuralEmb && neuralEmb.length === EMBEDDING_DIMS) {
267
- return { embedding: neuralEmb, model: EMBEDDING_MODEL_NEURAL };
268
- }
269
- }
270
- // Neural failed — warn about model mismatch
271
- if (!json) {
272
- console.error('[semantic-search] WARNING: Stored embeddings use neural model but Transformers.js unavailable.');
273
- console.error('[semantic-search] Results may be poor. Run: npx flo-embeddings --force');
274
- }
275
- }
276
-
277
- // Use hash embeddings (either matching stored hash model, or as fallback)
278
- const hashEmb = Array.from(semanticHashEmbed(queryText));
279
- return { embedding: hashEmb, model: EMBEDDING_MODEL_HASH };
280
- }
281
-
282
- // ============================================================================
283
- // Search Functions
284
- // ============================================================================
285
-
286
- function cosineSimilarity(a, b) {
287
- if (!a || !b || a.length !== b.length) return 0;
288
- let dot = 0;
289
- for (let i = 0; i < a.length; i++) {
290
- dot += a[i] * b[i];
291
- }
292
- return dot; // Already L2 normalized
293
- }
294
-
295
- async function getDb() {
296
- if (!existsSync(DB_PATH)) {
297
- throw new Error(`Database not found: ${DB_PATH}`);
298
- }
299
- const SQL = await initSqlJs();
300
- const buffer = readFileSync(DB_PATH);
301
- return new SQL.Database(buffer);
302
- }
303
-
304
- async function semanticSearch(queryText, options = {}) {
305
- const { limit = 5, namespace = null, threshold = 0.3 } = options;
306
- const startTime = performance.now();
307
-
308
- const db = await getDb();
309
-
310
- // Generate query embedding matching the stored model
311
- const { embedding: queryEmbedding, model: queryModel } = await generateQueryEmbedding(queryText, db);
312
-
313
- if (debug) console.error(`[semantic-search] Query model: ${queryModel}`);
314
-
315
- // Get all entries with embeddings
316
- let sql = `
317
- SELECT id, key, namespace, content, embedding, embedding_model, metadata
318
- FROM memory_entries
319
- WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
320
- `;
321
- const params = [];
322
-
323
- if (namespace) {
324
- sql += ` AND namespace = ?`;
325
- params.push(namespace);
326
- }
327
-
328
- const stmt = db.prepare(sql);
329
- stmt.bind(params);
330
-
331
- // Calculate similarity scores
332
- const results = [];
333
- while (stmt.step()) {
334
- const entry = stmt.getAsObject();
335
- try {
336
- const storedIsNeural = NEURAL_ALIASES.has(entry.embedding_model);
337
- const queryIsNeural = NEURAL_ALIASES.has(queryModel);
338
- if (entry.embedding_model && entry.embedding_model !== queryModel && !(storedIsNeural && queryIsNeural)) continue;
339
-
340
- const embedding = JSON.parse(entry.embedding);
341
- if (!Array.isArray(embedding) || embedding.length !== EMBEDDING_DIMS) continue;
342
-
343
- const similarity = cosineSimilarity(queryEmbedding, embedding);
344
-
345
- if (similarity >= threshold) {
346
- let metadata = {};
347
- try {
348
- metadata = JSON.parse(entry.metadata || '{}');
349
- } catch {}
350
-
351
- results.push({
352
- key: entry.key,
353
- namespace: entry.namespace,
354
- score: similarity,
355
- preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
356
- type: metadata.type || 'unknown',
357
- parentDoc: metadata.parentDoc || null,
358
- chunkTitle: metadata.chunkTitle || null,
359
- });
360
- }
361
- } catch {
362
- // Skip entries with invalid embeddings
363
- }
364
- }
365
- stmt.free();
366
-
367
- db.close();
368
-
369
- // Sort by similarity (descending) and limit
370
- results.sort((a, b) => b.score - a.score);
371
- const topResults = results.slice(0, limit);
372
-
373
- const searchTime = performance.now() - startTime;
374
-
375
- return {
376
- query: queryText,
377
- results: topResults,
378
- totalMatches: results.length,
379
- searchTime: `${searchTime.toFixed(0)}ms`,
380
- indexType: 'vector-cosine',
381
- model: queryModel,
382
- };
383
- }
384
-
385
- // ============================================================================
386
- // Main
387
- // ============================================================================
388
-
389
- async function main() {
390
- if (!json) {
391
- console.log('');
392
- console.log(`[semantic-search] Query: "${query}"`);
393
- }
394
-
395
- try {
396
- // --with-tests: search both the specified namespace (or code-map) and tests
397
- // Auto-route: if query contains test keywords and no namespace specified, also search tests
398
- const autoRouteTests = !namespace && TEST_KEYWORDS.test(query);
399
- let results;
400
-
401
- if (withTests || autoRouteTests) {
402
- const primaryNs = namespace || 'code-map';
403
- const primaryResults = await semanticSearch(query, { limit, namespace: primaryNs, threshold });
404
- const testResults = await semanticSearch(query, { limit, namespace: 'tests', threshold });
405
-
406
- // Merge and re-sort by score
407
- const merged = [...primaryResults.results, ...testResults.results]
408
- .sort((a, b) => b.score - a.score)
409
- .slice(0, limit);
410
-
411
- results = {
412
- ...primaryResults,
413
- results: merged,
414
- totalMatches: primaryResults.totalMatches + testResults.totalMatches,
415
- searchTime: `${parseInt(primaryResults.searchTime) + parseInt(testResults.searchTime)}ms`,
416
- namespaces: [primaryNs, 'tests'],
417
- };
418
-
419
- if (!json && autoRouteTests) {
420
- console.log(`[semantic-search] Auto-routed to tests namespace (query contains test keywords)`);
421
- }
422
- } else {
423
- results = await semanticSearch(query, { limit, namespace, threshold });
424
- }
425
-
426
- if (json) {
427
- console.log(JSON.stringify(results, null, 2));
428
- return;
429
- }
430
-
431
- console.log(`[semantic-search] Found ${results.totalMatches} matches (${results.searchTime}) [${results.model}]`);
432
- console.log('');
433
-
434
- if (results.results.length === 0) {
435
- console.log('No results found above threshold. Try lowering --threshold or broadening your query.');
436
- return;
437
- }
438
-
439
- // Display results
440
- console.log('┌─────────────────────────────────────────────────────────────────────────────┐');
441
- console.log('│ Rank │ Score │ Key │ Type │ Preview │');
442
- console.log('├─────────────────────────────────────────────────────────────────────────────┤');
443
-
444
- for (let i = 0; i < results.results.length; i++) {
445
- const r = results.results[i];
446
- const rank = String(i + 1).padStart(4);
447
- const score = r.score.toFixed(3);
448
- const key = r.key.substring(0, 28).padEnd(28);
449
- const type = (r.type || '').substring(0, 6).padEnd(6);
450
- const preview = r.preview.substring(0, 18).padEnd(18);
451
-
452
- console.log(`│ ${rank} │ ${score} │ ${key} │ ${type} │ ${preview}… │`);
453
- }
454
-
455
- console.log('└─────────────────────────────────────────────────────────────────────────────┘');
456
-
457
- // Show chunk context
458
- console.log('');
459
- console.log('Top result details:');
460
- const top = results.results[0];
461
- console.log(` Key: ${top.key}`);
462
- console.log(` Score: ${top.score.toFixed(4)}`);
463
- if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
464
- if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
465
- console.log(` Preview: ${top.preview}...`);
466
-
467
- } catch (err) {
468
- console.error(`[semantic-search] Error: ${err.message}`);
469
- process.exit(1);
470
- }
471
- }
472
-
473
- main();
2
+ /**
3
+ * Semantic search using 384-dim embeddings (Xenova/all-MiniLM-L6-v2 or hash fallback)
4
+ *
5
+ * Query embedding MUST match stored embedding model:
6
+ * 1. Transformers.js with all-MiniLM-L6-v2 (best quality, matches build-embeddings)
7
+ * 2. Domain-aware semantic hash embeddings (fallback when transformers unavailable)
8
+ *
9
+ * Usage:
10
+ * node node_modules/moflo/bin/semantic-search.mjs "your search query"
11
+ * npx flo-search "your search query"
12
+ * npx flo-search "query" --limit 10
13
+ * npx flo-search "query" --namespace guidance
14
+ * npx flo-search "query" --threshold 0.3
15
+ */
16
+
17
+ import { existsSync, readFileSync } from 'fs';
18
+ import { resolve, dirname } from 'path';
19
+ import { mofloResolveURL } from './lib/moflo-resolve.mjs';
20
+ const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
21
+
22
+ function findProjectRoot() {
23
+ let dir = process.cwd();
24
+ const root = resolve(dir, '/');
25
+ while (dir !== root) {
26
+ if (existsSync(resolve(dir, 'package.json'))) return dir;
27
+ dir = dirname(dir);
28
+ }
29
+ return process.cwd();
30
+ }
31
+
32
+ const projectRoot = findProjectRoot();
33
+
34
+ const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
35
+ const EMBEDDING_DIMS = 384;
36
+ const EMBEDDING_MODEL_NEURAL = 'Xenova/all-MiniLM-L6-v2';
37
+ const EMBEDDING_MODEL_HASH = 'domain-aware-hash-v1';
38
+ // 'onnx' is a legacy alias for the Xenova model — treat them as compatible vector spaces
39
+ const NEURAL_ALIASES = new Set([EMBEDDING_MODEL_NEURAL, 'onnx']);
40
+
41
+ // Parse args
42
+ const args = process.argv.slice(2);
43
+ const query = args.find(a => !a.startsWith('--'));
44
+ const limit = args.includes('--limit') ? parseInt(args[args.indexOf('--limit') + 1]) : 5;
45
+ let namespace = args.includes('--namespace') ? args[args.indexOf('--namespace') + 1] : null;
46
+ const withTests = args.includes('--with-tests');
47
+ const threshold = args.includes('--threshold') ? parseFloat(args[args.indexOf('--threshold') + 1]) : 0.3;
48
+ const json = args.includes('--json');
49
+ const debug = args.includes('--debug');
50
+
51
+ // Auto-routing: when query mentions test-related terms, also search tests namespace
52
+ const TEST_KEYWORDS = /\b(test|spec|coverage|assert|mock|stub|fixture|describe|jest|vitest|mocha|e2e|integration test)\b/i;
53
+
54
+ if (!query) {
55
+ console.error('Usage: npx flo-search "your query" [--limit N] [--namespace X] [--threshold N]');
56
+ process.exit(1);
57
+ }
58
+
59
+ // ============================================================================
60
+ // Transformers.js Neural Embeddings (primary — matches build-embeddings.mjs)
61
+ // ============================================================================
62
+
63
+ let pipeline = null;
64
+ let useTransformers = false;
65
+
66
+ async function loadTransformersModel() {
67
+ try {
68
+ const { env, pipeline: createPipeline } = await import(mofloResolveURL('@xenova/transformers'));
69
+ env.allowLocalModels = false;
70
+ env.backends.onnx.wasm.numThreads = 1;
71
+
72
+ pipeline = await createPipeline('feature-extraction', EMBEDDING_MODEL_NEURAL, {
73
+ quantized: false,
74
+ });
75
+
76
+ useTransformers = true;
77
+ if (debug) console.error('[semantic-search] Using Transformers.js neural model');
78
+ return true;
79
+ } catch (err) {
80
+ if (debug) console.error(`[semantic-search] Transformers.js unavailable: ${err.message?.split('\n')[0]}`);
81
+ useTransformers = false;
82
+ return false;
83
+ }
84
+ }
85
+
86
+ async function generateNeuralEmbedding(text) {
87
+ if (!pipeline) return null;
88
+ try {
89
+ const output = await pipeline(text, { pooling: 'mean', normalize: true });
90
+ return Array.from(output.data);
91
+ } catch {
92
+ return null;
93
+ }
94
+ }
95
+
96
+ // ============================================================================
97
+ // Domain-Aware Semantic Hash Embeddings (fallback)
98
+ // ============================================================================
99
+
100
+ const DOMAIN_CLUSTERS = {
101
+ database: ['typeorm', 'mongodb', 'database', 'entity', 'schema', 'table', 'collection',
102
+ 'query', 'sql', 'nosql', 'orm', 'model', 'migration', 'repository', 'column',
103
+ 'relation', 'foreign', 'primary', 'index', 'constraint', 'transaction',
104
+ 'mikroorm', 'mikro', 'postgresql', 'postgres', 'soft', 'delete', 'deletedat'],
105
+ frontend: ['react', 'component', 'ui', 'styling', 'css', 'html', 'jsx', 'tsx', 'frontend',
106
+ 'material', 'mui', 'tailwind', 'dom', 'render', 'hook', 'state', 'props',
107
+ 'redux', 'context', 'styled', 'emotion', 'theme', 'layout', 'responsive',
108
+ 'mantis', 'syncfusion', 'scheduler', 'i18n', 'intl', 'locale'],
109
+ backend: ['fastify', 'api', 'route', 'handler', 'rest', 'endpoint', 'server', 'controller',
110
+ 'middleware', 'request', 'response', 'http', 'express', 'nest', 'graphql',
111
+ 'websocket', 'socket', 'cors', 'auth', 'jwt', 'session', 'cookie',
112
+ 'awilix', 'dependency', 'injection', 'scope'],
113
+ testing: ['test', 'testing', 'vitest', 'jest', 'mock', 'spy', 'assert', 'expect', 'describe',
114
+ 'it', 'spec', 'unit', 'integration', 'e2e', 'playwright', 'cypress', 'coverage',
115
+ 'fixture', 'stub', 'fake', 'snapshot', 'beforeeach', 'aftereach',
116
+ 'anti-pattern', 'antipattern', 'mocking'],
117
+ tenancy: ['tenant', 'tenancy', 'companyid', 'company', 'isolation', 'multi', 'multitenant',
118
+ 'organization', 'workspace', 'account', 'customer', 'client', 'subdomain'],
119
+ security: ['security', 'auth', 'authentication', 'authorization', 'permission', 'role',
120
+ 'access', 'token', 'jwt', 'oauth', 'password', 'encrypt', 'hash', 'salt',
121
+ 'csrf', 'xss', 'injection', 'sanitize', 'validate', 'rbac'],
122
+ patterns: ['pattern', 'service', 'factory', 'singleton', 'decorator', 'adapter', 'facade',
123
+ 'observer', 'strategy', 'command', 'repository', 'usecase', 'domain', 'ddd',
124
+ 'clean', 'architecture', 'solid', 'dry', 'kiss', 'functional', 'pipeasync'],
125
+ workflow: ['workflow', 'pipeline', 'ci', 'cd', 'deploy', 'build', 'actions',
126
+ 'hook', 'trigger', 'job', 'step', 'artifact', 'release', 'version', 'tag'],
127
+ memory: ['memory', 'cache', 'store', 'persist', 'storage', 'redis', 'session', 'state',
128
+ 'buffer', 'queue', 'stack', 'heap', 'gc', 'leak', 'embedding', 'vector', 'hnsw',
129
+ 'semantic', 'search', 'index', 'retrieval'],
130
+ agent: ['agent', 'swarm', 'coordinator', 'orchestrator', 'task', 'worker', 'spawn',
131
+ 'parallel', 'concurrent', 'async', 'promise', 'queue', 'priority', 'schedule'],
132
+ github: ['github', 'issue', 'branch', 'pr', 'pull', 'request', 'merge', 'commit', 'push',
133
+ 'clone', 'fork', 'remote', 'origin', 'main', 'master', 'checkout', 'rebase',
134
+ 'squash', 'repository', 'repo', 'gh', 'git', 'assignee', 'label', 'mandatory',
135
+ 'checklist', 'closes', 'fixes', 'conventional', 'feat', 'refactor'],
136
+ documentation: ['guidance', 'documentation', 'docs', 'readme', 'guide', 'tutorial',
137
+ 'reference', 'standard', 'convention', 'rule', 'policy', 'template',
138
+ 'example', 'usage', 'instruction', 'meta', 'index', 'umbrella', 'claude',
139
+ 'optimized', 'audience', 'structure', 'format', 'markdown']
140
+ };
141
+
142
+ const COMMON_WORDS = new Set([
143
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
144
+ 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
145
+ 'can', 'need', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into',
146
+ 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'and', 'but',
147
+ 'or', 'nor', 'so', 'yet', 'both', 'either', 'neither', 'not', 'only', 'own', 'same', 'than',
148
+ 'too', 'very', 'just', 'also', 'this', 'that', 'these', 'those', 'it', 'its', 'if', 'then',
149
+ 'else', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'any', 'some', 'no', 'yes',
150
+ 'use', 'using', 'used', 'uses', 'get', 'set', 'new', 'see', 'like', 'make', 'made'
151
+ ]);
152
+
153
+ function hash(str, seed = 0) {
154
+ let h = seed ^ str.length;
155
+ for (let i = 0; i < str.length; i++) {
156
+ h ^= str.charCodeAt(i);
157
+ h = Math.imul(h, 0x5bd1e995);
158
+ h ^= h >>> 15;
159
+ }
160
+ return h >>> 0;
161
+ }
162
+
163
+ // Pre-compute domain signatures
164
+ const domainSignatures = {};
165
+ for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
166
+ const sig = new Float32Array(EMBEDDING_DIMS);
167
+ for (const kw of keywords) {
168
+ for (let h = 0; h < 2; h++) {
169
+ const idx = hash(kw + '_dom_' + domain, h) % EMBEDDING_DIMS;
170
+ sig[idx] = 1;
171
+ }
172
+ }
173
+ domainSignatures[domain] = sig;
174
+ }
175
+
176
+ function semanticHashEmbed(text, dims = EMBEDDING_DIMS) {
177
+ const vec = new Float32Array(dims);
178
+ const lowerText = text.toLowerCase();
179
+ const words = lowerText.replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 1);
180
+
181
+ if (words.length === 0) return vec;
182
+
183
+ // Add domain signatures
184
+ for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
185
+ let matchCount = 0;
186
+ for (const kw of keywords) {
187
+ if (lowerText.includes(kw)) matchCount++;
188
+ }
189
+ if (matchCount > 0) {
190
+ const weight = Math.min(2.0, 0.5 + matchCount * 0.3);
191
+ const sig = domainSignatures[domain];
192
+ for (let i = 0; i < dims; i++) {
193
+ vec[i] += sig[i] * weight;
194
+ }
195
+ }
196
+ }
197
+
198
+ // Add word features
199
+ for (const word of words) {
200
+ const isCommon = COMMON_WORDS.has(word);
201
+ const weight = isCommon ? 0.2 : (word.length > 6 ? 0.8 : 0.5);
202
+ for (let h = 0; h < 3; h++) {
203
+ const idx = hash(word, h * 17) % dims;
204
+ const sign = (hash(word, h * 31 + 1) % 2 === 0) ? 1 : -1;
205
+ vec[idx] += sign * weight;
206
+ }
207
+ }
208
+
209
+ // Add bigrams
210
+ for (let i = 0; i < words.length - 1; i++) {
211
+ if (COMMON_WORDS.has(words[i]) && COMMON_WORDS.has(words[i + 1])) continue;
212
+ const bigram = words[i] + '_' + words[i + 1];
213
+ const idx = hash(bigram, 42) % dims;
214
+ const sign = (hash(bigram, 43) % 2 === 0) ? 1 : -1;
215
+ vec[idx] += sign * 0.4;
216
+ }
217
+
218
+ // Add trigrams
219
+ for (let i = 0; i < words.length - 2; i++) {
220
+ const trigram = words[i] + '_' + words[i + 1] + '_' + words[i + 2];
221
+ const idx = hash(trigram, 99) % dims;
222
+ const sign = (hash(trigram, 100) % 2 === 0) ? 1 : -1;
223
+ vec[idx] += sign * 0.3;
224
+ }
225
+
226
+ // L2 normalize
227
+ let norm = 0;
228
+ for (let i = 0; i < dims; i++) norm += vec[i] * vec[i];
229
+ norm = Math.sqrt(norm);
230
+ if (norm > 0) {
231
+ for (let i = 0; i < dims; i++) vec[i] /= norm;
232
+ }
233
+
234
+ return vec;
235
+ }
236
+
237
+ // ============================================================================
238
+ // Unified Embedding Generator (matches stored embeddings)
239
+ // ============================================================================
240
+
241
+ /**
242
+ * Generate query embedding using the SAME model as stored embeddings.
243
+ * Checks what model was used for stored entries and matches it.
244
+ */
245
+ async function generateQueryEmbedding(queryText, db) {
246
+ // Check what model the stored entries use
247
+ let modelCheckSql = `SELECT embedding_model, COUNT(*) as cnt FROM memory_entries
248
+ WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
249
+ ${namespace ? "AND namespace = ?" : ""}
250
+ GROUP BY embedding_model ORDER BY cnt DESC LIMIT 1`;
251
+ const modelStmt = db.prepare(modelCheckSql);
252
+ modelStmt.bind(namespace ? [namespace] : []);
253
+ const modelCheck = modelStmt.step() ? modelStmt.getAsObject() : null;
254
+ modelStmt.free();
255
+
256
+ const storedModel = modelCheck?.embedding_model || EMBEDDING_MODEL_HASH;
257
+
258
+ if (debug) console.error(`[semantic-search] Stored model: ${storedModel}`);
259
+
260
+ // If stored embeddings are neural, try to use neural for query too
261
+ // Accept both canonical name and legacy 'onnx' tag (both use the same Xenova pipeline)
262
+ if (storedModel === EMBEDDING_MODEL_NEURAL || storedModel === 'onnx') {
263
+ await loadTransformersModel();
264
+ if (useTransformers) {
265
+ const neuralEmb = await generateNeuralEmbedding(queryText);
266
+ if (neuralEmb && neuralEmb.length === EMBEDDING_DIMS) {
267
+ return { embedding: neuralEmb, model: EMBEDDING_MODEL_NEURAL };
268
+ }
269
+ }
270
+ // Neural failed — warn about model mismatch
271
+ if (!json) {
272
+ console.error('[semantic-search] WARNING: Stored embeddings use neural model but Transformers.js unavailable.');
273
+ console.error('[semantic-search] Results may be poor. Run: npx flo-embeddings --force');
274
+ }
275
+ }
276
+
277
+ // Use hash embeddings (either matching stored hash model, or as fallback)
278
+ const hashEmb = Array.from(semanticHashEmbed(queryText));
279
+ return { embedding: hashEmb, model: EMBEDDING_MODEL_HASH };
280
+ }
281
+
282
+ // ============================================================================
283
+ // Search Functions
284
+ // ============================================================================
285
+
286
+ function cosineSimilarity(a, b) {
287
+ if (!a || !b || a.length !== b.length) return 0;
288
+ let dot = 0;
289
+ for (let i = 0; i < a.length; i++) {
290
+ dot += a[i] * b[i];
291
+ }
292
+ return dot; // Already L2 normalized
293
+ }
294
+
295
+ async function getDb() {
296
+ if (!existsSync(DB_PATH)) {
297
+ throw new Error(`Database not found: ${DB_PATH}`);
298
+ }
299
+ const SQL = await initSqlJs();
300
+ const buffer = readFileSync(DB_PATH);
301
+ return new SQL.Database(buffer);
302
+ }
303
+
304
+ async function semanticSearch(queryText, options = {}) {
305
+ const { limit = 5, namespace = null, threshold = 0.3 } = options;
306
+ const startTime = performance.now();
307
+
308
+ const db = await getDb();
309
+
310
+ // Generate query embedding matching the stored model
311
+ const { embedding: queryEmbedding, model: queryModel } = await generateQueryEmbedding(queryText, db);
312
+
313
+ if (debug) console.error(`[semantic-search] Query model: ${queryModel}`);
314
+
315
+ // Get all entries with embeddings
316
+ let sql = `
317
+ SELECT id, key, namespace, content, embedding, embedding_model, metadata
318
+ FROM memory_entries
319
+ WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
320
+ `;
321
+ const params = [];
322
+
323
+ if (namespace) {
324
+ sql += ` AND namespace = ?`;
325
+ params.push(namespace);
326
+ }
327
+
328
+ const stmt = db.prepare(sql);
329
+ stmt.bind(params);
330
+
331
+ // Calculate similarity scores
332
+ const results = [];
333
+ while (stmt.step()) {
334
+ const entry = stmt.getAsObject();
335
+ try {
336
+ const storedIsNeural = NEURAL_ALIASES.has(entry.embedding_model);
337
+ const queryIsNeural = NEURAL_ALIASES.has(queryModel);
338
+ if (entry.embedding_model && entry.embedding_model !== queryModel && !(storedIsNeural && queryIsNeural)) continue;
339
+
340
+ const embedding = JSON.parse(entry.embedding);
341
+ if (!Array.isArray(embedding) || embedding.length !== EMBEDDING_DIMS) continue;
342
+
343
+ const similarity = cosineSimilarity(queryEmbedding, embedding);
344
+
345
+ if (similarity >= threshold) {
346
+ let metadata = {};
347
+ try {
348
+ metadata = JSON.parse(entry.metadata || '{}');
349
+ } catch {}
350
+
351
+ results.push({
352
+ key: entry.key,
353
+ namespace: entry.namespace,
354
+ score: similarity,
355
+ preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
356
+ type: metadata.type || 'unknown',
357
+ parentDoc: metadata.parentDoc || null,
358
+ chunkTitle: metadata.chunkTitle || null,
359
+ });
360
+ }
361
+ } catch {
362
+ // Skip entries with invalid embeddings
363
+ }
364
+ }
365
+ stmt.free();
366
+
367
+ db.close();
368
+
369
+ // Sort by similarity (descending) and limit
370
+ results.sort((a, b) => b.score - a.score);
371
+ const topResults = results.slice(0, limit);
372
+
373
+ const searchTime = performance.now() - startTime;
374
+
375
+ return {
376
+ query: queryText,
377
+ results: topResults,
378
+ totalMatches: results.length,
379
+ searchTime: `${searchTime.toFixed(0)}ms`,
380
+ indexType: 'vector-cosine',
381
+ model: queryModel,
382
+ };
383
+ }
384
+
385
+ // ============================================================================
386
+ // Main
387
+ // ============================================================================
388
+
389
+ async function main() {
390
+ if (!json) {
391
+ console.log('');
392
+ console.log(`[semantic-search] Query: "${query}"`);
393
+ }
394
+
395
+ try {
396
+ // --with-tests: search both the specified namespace (or code-map) and tests
397
+ // Auto-route: if query contains test keywords and no namespace specified, also search tests
398
+ const autoRouteTests = !namespace && TEST_KEYWORDS.test(query);
399
+ let results;
400
+
401
+ if (withTests || autoRouteTests) {
402
+ const primaryNs = namespace || 'code-map';
403
+ const primaryResults = await semanticSearch(query, { limit, namespace: primaryNs, threshold });
404
+ const testResults = await semanticSearch(query, { limit, namespace: 'tests', threshold });
405
+
406
+ // Merge and re-sort by score
407
+ const merged = [...primaryResults.results, ...testResults.results]
408
+ .sort((a, b) => b.score - a.score)
409
+ .slice(0, limit);
410
+
411
+ results = {
412
+ ...primaryResults,
413
+ results: merged,
414
+ totalMatches: primaryResults.totalMatches + testResults.totalMatches,
415
+ searchTime: `${parseInt(primaryResults.searchTime) + parseInt(testResults.searchTime)}ms`,
416
+ namespaces: [primaryNs, 'tests'],
417
+ };
418
+
419
+ if (!json && autoRouteTests) {
420
+ console.log(`[semantic-search] Auto-routed to tests namespace (query contains test keywords)`);
421
+ }
422
+ } else {
423
+ results = await semanticSearch(query, { limit, namespace, threshold });
424
+ }
425
+
426
+ if (json) {
427
+ console.log(JSON.stringify(results, null, 2));
428
+ return;
429
+ }
430
+
431
+ console.log(`[semantic-search] Found ${results.totalMatches} matches (${results.searchTime}) [${results.model}]`);
432
+ console.log('');
433
+
434
+ if (results.results.length === 0) {
435
+ console.log('No results found above threshold. Try lowering --threshold or broadening your query.');
436
+ return;
437
+ }
438
+
439
+ // Display results
440
+ console.log('┌─────────────────────────────────────────────────────────────────────────────┐');
441
+ console.log('│ Rank │ Score │ Key │ Type │ Preview │');
442
+ console.log('├─────────────────────────────────────────────────────────────────────────────┤');
443
+
444
+ for (let i = 0; i < results.results.length; i++) {
445
+ const r = results.results[i];
446
+ const rank = String(i + 1).padStart(4);
447
+ const score = r.score.toFixed(3);
448
+ const key = r.key.substring(0, 28).padEnd(28);
449
+ const type = (r.type || '').substring(0, 6).padEnd(6);
450
+ const preview = r.preview.substring(0, 18).padEnd(18);
451
+
452
+ console.log(`│ ${rank} │ ${score} │ ${key} │ ${type} │ ${preview}… │`);
453
+ }
454
+
455
+ console.log('└─────────────────────────────────────────────────────────────────────────────┘');
456
+
457
+ // Show chunk context
458
+ console.log('');
459
+ console.log('Top result details:');
460
+ const top = results.results[0];
461
+ console.log(` Key: ${top.key}`);
462
+ console.log(` Score: ${top.score.toFixed(4)}`);
463
+ if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
464
+ if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
465
+ console.log(` Preview: ${top.preview}...`);
466
+
467
+ } catch (err) {
468
+ console.error(`[semantic-search] Error: ${err.message}`);
469
+ process.exit(1);
470
+ }
471
+ }
472
+
473
+ main();