moflo 4.8.21 → 4.8.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/browser/browser-agent.yaml +182 -182
- package/.claude/agents/core/coder.md +265 -265
- package/.claude/agents/core/planner.md +167 -167
- package/.claude/agents/core/researcher.md +189 -189
- package/.claude/agents/core/reviewer.md +325 -325
- package/.claude/agents/core/tester.md +318 -318
- package/.claude/agents/database-specialist.yaml +21 -21
- package/.claude/agents/dual-mode/codex-coordinator.md +224 -224
- package/.claude/agents/dual-mode/codex-worker.md +211 -211
- package/.claude/agents/dual-mode/dual-orchestrator.md +291 -291
- package/.claude/agents/github/code-review-swarm.md +537 -537
- package/.claude/agents/github/github-modes.md +172 -172
- package/.claude/agents/github/issue-tracker.md +318 -318
- package/.claude/agents/github/multi-repo-swarm.md +552 -552
- package/.claude/agents/github/pr-manager.md +190 -190
- package/.claude/agents/github/project-board-sync.md +508 -508
- package/.claude/agents/github/release-manager.md +366 -366
- package/.claude/agents/github/release-swarm.md +582 -582
- package/.claude/agents/github/repo-architect.md +397 -397
- package/.claude/agents/github/swarm-issue.md +572 -572
- package/.claude/agents/github/swarm-pr.md +427 -427
- package/.claude/agents/github/sync-coordinator.md +451 -451
- package/.claude/agents/github/workflow-automation.md +634 -634
- package/.claude/agents/goal/code-goal-planner.md +445 -445
- package/.claude/agents/hive-mind/collective-intelligence-coordinator.md +129 -129
- package/.claude/agents/hive-mind/queen-coordinator.md +202 -202
- package/.claude/agents/hive-mind/scout-explorer.md +241 -241
- package/.claude/agents/hive-mind/swarm-memory-manager.md +192 -192
- package/.claude/agents/hive-mind/worker-specialist.md +216 -216
- package/.claude/agents/index.yaml +17 -17
- package/.claude/agents/neural/safla-neural.md +73 -73
- package/.claude/agents/project-coordinator.yaml +15 -15
- package/.claude/agents/python-specialist.yaml +21 -21
- package/.claude/agents/reasoning/goal-planner.md +72 -72
- package/.claude/agents/security-auditor.yaml +20 -20
- package/.claude/agents/swarm/adaptive-coordinator.md +395 -395
- package/.claude/agents/swarm/hierarchical-coordinator.md +326 -326
- package/.claude/agents/swarm/mesh-coordinator.md +391 -391
- package/.claude/agents/templates/migration-plan.md +745 -745
- package/.claude/agents/typescript-specialist.yaml +21 -21
- package/.claude/checkpoints/1767754460.json +8 -8
- package/.claude/commands/agents/agent-spawning.md +28 -28
- package/.claude/commands/github/github-modes.md +146 -146
- package/.claude/commands/github/github-swarm.md +121 -121
- package/.claude/commands/github/issue-tracker.md +291 -291
- package/.claude/commands/github/pr-manager.md +169 -169
- package/.claude/commands/github/release-manager.md +337 -337
- package/.claude/commands/github/repo-architect.md +366 -366
- package/.claude/commands/github/sync-coordinator.md +300 -300
- package/.claude/commands/memory/neural.md +47 -47
- package/.claude/commands/sparc/analyzer.md +51 -51
- package/.claude/commands/sparc/architect.md +53 -53
- package/.claude/commands/sparc/ask.md +97 -97
- package/.claude/commands/sparc/batch-executor.md +54 -54
- package/.claude/commands/sparc/code.md +89 -89
- package/.claude/commands/sparc/coder.md +54 -54
- package/.claude/commands/sparc/debug.md +83 -83
- package/.claude/commands/sparc/debugger.md +54 -54
- package/.claude/commands/sparc/designer.md +53 -53
- package/.claude/commands/sparc/devops.md +109 -109
- package/.claude/commands/sparc/docs-writer.md +80 -80
- package/.claude/commands/sparc/documenter.md +54 -54
- package/.claude/commands/sparc/innovator.md +54 -54
- package/.claude/commands/sparc/integration.md +83 -83
- package/.claude/commands/sparc/mcp.md +117 -117
- package/.claude/commands/sparc/memory-manager.md +54 -54
- package/.claude/commands/sparc/optimizer.md +54 -54
- package/.claude/commands/sparc/orchestrator.md +131 -131
- package/.claude/commands/sparc/post-deployment-monitoring-mode.md +83 -83
- package/.claude/commands/sparc/refinement-optimization-mode.md +83 -83
- package/.claude/commands/sparc/researcher.md +54 -54
- package/.claude/commands/sparc/reviewer.md +54 -54
- package/.claude/commands/sparc/security-review.md +80 -80
- package/.claude/commands/sparc/sparc-modes.md +174 -174
- package/.claude/commands/sparc/sparc.md +111 -111
- package/.claude/commands/sparc/spec-pseudocode.md +80 -80
- package/.claude/commands/sparc/supabase-admin.md +348 -348
- package/.claude/commands/sparc/swarm-coordinator.md +54 -54
- package/.claude/commands/sparc/tdd.md +54 -54
- package/.claude/commands/sparc/tester.md +54 -54
- package/.claude/commands/sparc/tutorial.md +79 -79
- package/.claude/commands/sparc/workflow-manager.md +54 -54
- package/.claude/commands/sparc.md +166 -166
- package/.claude/commands/swarm/analysis.md +95 -95
- package/.claude/commands/swarm/development.md +96 -96
- package/.claude/commands/swarm/examples.md +168 -168
- package/.claude/commands/swarm/maintenance.md +102 -102
- package/.claude/commands/swarm/optimization.md +117 -117
- package/.claude/commands/swarm/research.md +136 -136
- package/.claude/commands/swarm/testing.md +131 -131
- package/.claude/commands/workflows/development.md +77 -77
- package/.claude/commands/workflows/research.md +62 -62
- package/.claude/guidance/moflo-bootstrap.md +126 -126
- package/.claude/guidance/shipped/agent-bootstrap.md +126 -126
- package/.claude/guidance/shipped/guidance-memory-strategy.md +262 -262
- package/.claude/guidance/shipped/memory-strategy.md +204 -204
- package/.claude/guidance/shipped/moflo.md +668 -653
- package/.claude/guidance/shipped/task-swarm-integration.md +441 -441
- package/.claude/helpers/intelligence.cjs +207 -207
- package/.claude/helpers/statusline.cjs +851 -851
- package/.claude/settings.local.json +18 -0
- package/.claude/skills/fl/SKILL.md +583 -583
- package/.claude/skills/flo/SKILL.md +583 -583
- package/.claude/skills/github-code-review/SKILL.md +1140 -1140
- package/.claude/skills/github-multi-repo/SKILL.md +874 -874
- package/.claude/skills/github-project-management/SKILL.md +1277 -1277
- package/.claude/skills/github-release-management/SKILL.md +1081 -1081
- package/.claude/skills/github-workflow-automation/SKILL.md +1065 -1065
- package/.claude/skills/hive-mind-advanced/SKILL.md +712 -712
- package/.claude/skills/hooks-automation/SKILL.md +1201 -1201
- package/.claude/skills/performance-analysis/SKILL.md +563 -563
- package/.claude/skills/sparc-methodology/SKILL.md +1115 -1115
- package/.claude/skills/swarm-advanced/SKILL.md +973 -973
- package/.claude/workflow-state.json +4 -4
- package/LICENSE +21 -21
- package/README.md +698 -685
- package/bin/cli.js +0 -0
- package/bin/gate-hook.mjs +50 -50
- package/bin/gate.cjs +138 -138
- package/bin/generate-code-map.mjs +775 -775
- package/bin/hook-handler.cjs +83 -83
- package/bin/hooks.mjs +656 -656
- package/bin/index-guidance.mjs +892 -892
- package/bin/index-tests.mjs +709 -709
- package/bin/lib/process-manager.mjs +243 -243
- package/bin/lib/registry-cleanup.cjs +41 -41
- package/bin/prompt-hook.mjs +72 -72
- package/bin/semantic-search.mjs +472 -472
- package/bin/session-start-launcher.mjs +238 -238
- package/bin/setup-project.mjs +250 -250
- package/package.json +123 -123
- package/src/@claude-flow/cli/README.md +452 -452
- package/src/@claude-flow/cli/bin/cli.js +180 -180
- package/src/@claude-flow/cli/bin/preinstall.cjs +2 -2
- package/src/@claude-flow/cli/dist/src/commands/completions.js +409 -409
- package/src/@claude-flow/cli/dist/src/commands/doctor.js +18 -2
- package/src/@claude-flow/cli/dist/src/commands/embeddings.js +25 -25
- package/src/@claude-flow/cli/dist/src/commands/github.js +61 -61
- package/src/@claude-flow/cli/dist/src/commands/hive-mind.js +90 -90
- package/src/@claude-flow/cli/dist/src/commands/hooks.js +9 -9
- package/src/@claude-flow/cli/dist/src/commands/init.js +3 -8
- package/src/@claude-flow/cli/dist/src/commands/ruvector/import.js +14 -14
- package/src/@claude-flow/cli/dist/src/commands/ruvector/setup.js +624 -624
- package/src/@claude-flow/cli/dist/src/config/moflo-config.d.ts +3 -0
- package/src/@claude-flow/cli/dist/src/config/moflo-config.js +101 -91
- package/src/@claude-flow/cli/dist/src/index.d.ts +5 -0
- package/src/@claude-flow/cli/dist/src/index.js +44 -0
- package/src/@claude-flow/cli/dist/src/init/claudemd-generator.d.ts +29 -29
- package/src/@claude-flow/cli/dist/src/init/claudemd-generator.js +43 -43
- package/src/@claude-flow/cli/dist/src/init/executor.js +453 -453
- package/src/@claude-flow/cli/dist/src/init/helpers-generator.js +482 -482
- package/src/@claude-flow/cli/dist/src/init/moflo-init.d.ts +30 -30
- package/src/@claude-flow/cli/dist/src/init/moflo-init.js +140 -140
- package/src/@claude-flow/cli/dist/src/init/statusline-generator.js +876 -876
- package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +371 -371
- package/src/@claude-flow/cli/dist/src/runtime/headless.js +28 -28
- package/src/@claude-flow/cli/dist/src/services/container-worker-pool.d.ts +197 -0
- package/src/@claude-flow/cli/dist/src/services/container-worker-pool.js +584 -0
- package/src/@claude-flow/cli/dist/src/services/daemon-lock.d.ts +14 -0
- package/src/@claude-flow/cli/dist/src/services/daemon-lock.js +1 -1
- package/src/@claude-flow/cli/dist/src/services/headless-worker-executor.js +84 -84
- package/src/@claude-flow/cli/package.json +1 -1
- package/src/@claude-flow/guidance/README.md +1195 -1195
- package/src/@claude-flow/guidance/package.json +198 -198
- package/src/@claude-flow/memory/README.md +587 -587
- package/src/@claude-flow/memory/dist/agentdb-backend.js +26 -26
- package/src/@claude-flow/memory/dist/auto-memory-bridge.test.js +27 -27
- package/src/@claude-flow/memory/dist/hybrid-backend.d.ts +245 -0
- package/src/@claude-flow/memory/dist/hybrid-backend.js +569 -0
- package/src/@claude-flow/memory/dist/hybrid-backend.test.d.ts +8 -0
- package/src/@claude-flow/memory/dist/hybrid-backend.test.js +320 -0
- package/src/@claude-flow/memory/dist/sqlite-backend.d.ts +121 -0
- package/src/@claude-flow/memory/dist/sqlite-backend.js +572 -0
- package/src/@claude-flow/memory/dist/sqljs-backend.js +26 -26
- package/src/@claude-flow/memory/package.json +44 -44
- package/src/@claude-flow/shared/README.md +323 -323
- package/src/@claude-flow/shared/dist/events/event-store.js +31 -31
- package/src/README.md +493 -493
package/bin/semantic-search.mjs
CHANGED
|
@@ -1,473 +1,473 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Semantic search using 384-dim embeddings (Xenova/all-MiniLM-L6-v2 or hash fallback)
|
|
4
|
-
*
|
|
5
|
-
* Query embedding MUST match stored embedding model:
|
|
6
|
-
* 1. Transformers.js with all-MiniLM-L6-v2 (best quality, matches build-embeddings)
|
|
7
|
-
* 2. Domain-aware semantic hash embeddings (fallback when transformers unavailable)
|
|
8
|
-
*
|
|
9
|
-
* Usage:
|
|
10
|
-
* node node_modules/moflo/bin/semantic-search.mjs "your search query"
|
|
11
|
-
* npx flo-search "your search query"
|
|
12
|
-
* npx flo-search "query" --limit 10
|
|
13
|
-
* npx flo-search "query" --namespace guidance
|
|
14
|
-
* npx flo-search "query" --threshold 0.3
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
import { existsSync, readFileSync } from 'fs';
|
|
18
|
-
import { resolve, dirname } from 'path';
|
|
19
|
-
import { mofloResolveURL } from './lib/moflo-resolve.mjs';
|
|
20
|
-
const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
|
|
21
|
-
|
|
22
|
-
function findProjectRoot() {
|
|
23
|
-
let dir = process.cwd();
|
|
24
|
-
const root = resolve(dir, '/');
|
|
25
|
-
while (dir !== root) {
|
|
26
|
-
if (existsSync(resolve(dir, 'package.json'))) return dir;
|
|
27
|
-
dir = dirname(dir);
|
|
28
|
-
}
|
|
29
|
-
return process.cwd();
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
const projectRoot = findProjectRoot();
|
|
33
|
-
|
|
34
|
-
const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
|
|
35
|
-
const EMBEDDING_DIMS = 384;
|
|
36
|
-
const EMBEDDING_MODEL_NEURAL = 'Xenova/all-MiniLM-L6-v2';
|
|
37
|
-
const EMBEDDING_MODEL_HASH = 'domain-aware-hash-v1';
|
|
38
|
-
// 'onnx' is a legacy alias for the Xenova model — treat them as compatible vector spaces
|
|
39
|
-
const NEURAL_ALIASES = new Set([EMBEDDING_MODEL_NEURAL, 'onnx']);
|
|
40
|
-
|
|
41
|
-
// Parse args
|
|
42
|
-
const args = process.argv.slice(2);
|
|
43
|
-
const query = args.find(a => !a.startsWith('--'));
|
|
44
|
-
const limit = args.includes('--limit') ? parseInt(args[args.indexOf('--limit') + 1]) : 5;
|
|
45
|
-
let namespace = args.includes('--namespace') ? args[args.indexOf('--namespace') + 1] : null;
|
|
46
|
-
const withTests = args.includes('--with-tests');
|
|
47
|
-
const threshold = args.includes('--threshold') ? parseFloat(args[args.indexOf('--threshold') + 1]) : 0.3;
|
|
48
|
-
const json = args.includes('--json');
|
|
49
|
-
const debug = args.includes('--debug');
|
|
50
|
-
|
|
51
|
-
// Auto-routing: when query mentions test-related terms, also search tests namespace
|
|
52
|
-
const TEST_KEYWORDS = /\b(test|spec|coverage|assert|mock|stub|fixture|describe|jest|vitest|mocha|e2e|integration test)\b/i;
|
|
53
|
-
|
|
54
|
-
if (!query) {
|
|
55
|
-
console.error('Usage: npx flo-search "your query" [--limit N] [--namespace X] [--threshold N]');
|
|
56
|
-
process.exit(1);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
// ============================================================================
|
|
60
|
-
// Transformers.js Neural Embeddings (primary — matches build-embeddings.mjs)
|
|
61
|
-
// ============================================================================
|
|
62
|
-
|
|
63
|
-
let pipeline = null;
|
|
64
|
-
let useTransformers = false;
|
|
65
|
-
|
|
66
|
-
async function loadTransformersModel() {
|
|
67
|
-
try {
|
|
68
|
-
const { env, pipeline: createPipeline } = await import(mofloResolveURL('@xenova/transformers'));
|
|
69
|
-
env.allowLocalModels = false;
|
|
70
|
-
env.backends.onnx.wasm.numThreads = 1;
|
|
71
|
-
|
|
72
|
-
pipeline = await createPipeline('feature-extraction', EMBEDDING_MODEL_NEURAL, {
|
|
73
|
-
quantized: false,
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
useTransformers = true;
|
|
77
|
-
if (debug) console.error('[semantic-search] Using Transformers.js neural model');
|
|
78
|
-
return true;
|
|
79
|
-
} catch (err) {
|
|
80
|
-
if (debug) console.error(`[semantic-search] Transformers.js unavailable: ${err.message?.split('\n')[0]}`);
|
|
81
|
-
useTransformers = false;
|
|
82
|
-
return false;
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
async function generateNeuralEmbedding(text) {
|
|
87
|
-
if (!pipeline) return null;
|
|
88
|
-
try {
|
|
89
|
-
const output = await pipeline(text, { pooling: 'mean', normalize: true });
|
|
90
|
-
return Array.from(output.data);
|
|
91
|
-
} catch {
|
|
92
|
-
return null;
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
// ============================================================================
|
|
97
|
-
// Domain-Aware Semantic Hash Embeddings (fallback)
|
|
98
|
-
// ============================================================================
|
|
99
|
-
|
|
100
|
-
const DOMAIN_CLUSTERS = {
|
|
101
|
-
database: ['typeorm', 'mongodb', 'database', 'entity', 'schema', 'table', 'collection',
|
|
102
|
-
'query', 'sql', 'nosql', 'orm', 'model', 'migration', 'repository', 'column',
|
|
103
|
-
'relation', 'foreign', 'primary', 'index', 'constraint', 'transaction',
|
|
104
|
-
'mikroorm', 'mikro', 'postgresql', 'postgres', 'soft', 'delete', 'deletedat'],
|
|
105
|
-
frontend: ['react', 'component', 'ui', 'styling', 'css', 'html', 'jsx', 'tsx', 'frontend',
|
|
106
|
-
'material', 'mui', 'tailwind', 'dom', 'render', 'hook', 'state', 'props',
|
|
107
|
-
'redux', 'context', 'styled', 'emotion', 'theme', 'layout', 'responsive',
|
|
108
|
-
'mantis', 'syncfusion', 'scheduler', 'i18n', 'intl', 'locale'],
|
|
109
|
-
backend: ['fastify', 'api', 'route', 'handler', 'rest', 'endpoint', 'server', 'controller',
|
|
110
|
-
'middleware', 'request', 'response', 'http', 'express', 'nest', 'graphql',
|
|
111
|
-
'websocket', 'socket', 'cors', 'auth', 'jwt', 'session', 'cookie',
|
|
112
|
-
'awilix', 'dependency', 'injection', 'scope'],
|
|
113
|
-
testing: ['test', 'testing', 'vitest', 'jest', 'mock', 'spy', 'assert', 'expect', 'describe',
|
|
114
|
-
'it', 'spec', 'unit', 'integration', 'e2e', 'playwright', 'cypress', 'coverage',
|
|
115
|
-
'fixture', 'stub', 'fake', 'snapshot', 'beforeeach', 'aftereach',
|
|
116
|
-
'anti-pattern', 'antipattern', 'mocking'],
|
|
117
|
-
tenancy: ['tenant', 'tenancy', 'companyid', 'company', 'isolation', 'multi', 'multitenant',
|
|
118
|
-
'organization', 'workspace', 'account', 'customer', 'client', 'subdomain'],
|
|
119
|
-
security: ['security', 'auth', 'authentication', 'authorization', 'permission', 'role',
|
|
120
|
-
'access', 'token', 'jwt', 'oauth', 'password', 'encrypt', 'hash', 'salt',
|
|
121
|
-
'csrf', 'xss', 'injection', 'sanitize', 'validate', 'rbac'],
|
|
122
|
-
patterns: ['pattern', 'service', 'factory', 'singleton', 'decorator', 'adapter', 'facade',
|
|
123
|
-
'observer', 'strategy', 'command', 'repository', 'usecase', 'domain', 'ddd',
|
|
124
|
-
'clean', 'architecture', 'solid', 'dry', 'kiss', 'functional', 'pipeasync'],
|
|
125
|
-
workflow: ['workflow', 'pipeline', 'ci', 'cd', 'deploy', 'build', 'actions',
|
|
126
|
-
'hook', 'trigger', 'job', 'step', 'artifact', 'release', 'version', 'tag'],
|
|
127
|
-
memory: ['memory', 'cache', 'store', 'persist', 'storage', 'redis', 'session', 'state',
|
|
128
|
-
'buffer', 'queue', 'stack', 'heap', 'gc', 'leak', 'embedding', 'vector', 'hnsw',
|
|
129
|
-
'semantic', 'search', 'index', 'retrieval'],
|
|
130
|
-
agent: ['agent', 'swarm', 'coordinator', 'orchestrator', 'task', 'worker', 'spawn',
|
|
131
|
-
'parallel', 'concurrent', 'async', 'promise', 'queue', 'priority', 'schedule'],
|
|
132
|
-
github: ['github', 'issue', 'branch', 'pr', 'pull', 'request', 'merge', 'commit', 'push',
|
|
133
|
-
'clone', 'fork', 'remote', 'origin', 'main', 'master', 'checkout', 'rebase',
|
|
134
|
-
'squash', 'repository', 'repo', 'gh', 'git', 'assignee', 'label', 'mandatory',
|
|
135
|
-
'checklist', 'closes', 'fixes', 'conventional', 'feat', 'refactor'],
|
|
136
|
-
documentation: ['guidance', 'documentation', 'docs', 'readme', 'guide', 'tutorial',
|
|
137
|
-
'reference', 'standard', 'convention', 'rule', 'policy', 'template',
|
|
138
|
-
'example', 'usage', 'instruction', 'meta', 'index', 'umbrella', 'claude',
|
|
139
|
-
'optimized', 'audience', 'structure', 'format', 'markdown']
|
|
140
|
-
};
|
|
141
|
-
|
|
142
|
-
const COMMON_WORDS = new Set([
|
|
143
|
-
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
|
|
144
|
-
'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
|
|
145
|
-
'can', 'need', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into',
|
|
146
|
-
'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'and', 'but',
|
|
147
|
-
'or', 'nor', 'so', 'yet', 'both', 'either', 'neither', 'not', 'only', 'own', 'same', 'than',
|
|
148
|
-
'too', 'very', 'just', 'also', 'this', 'that', 'these', 'those', 'it', 'its', 'if', 'then',
|
|
149
|
-
'else', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'any', 'some', 'no', 'yes',
|
|
150
|
-
'use', 'using', 'used', 'uses', 'get', 'set', 'new', 'see', 'like', 'make', 'made'
|
|
151
|
-
]);
|
|
152
|
-
|
|
153
|
-
function hash(str, seed = 0) {
|
|
154
|
-
let h = seed ^ str.length;
|
|
155
|
-
for (let i = 0; i < str.length; i++) {
|
|
156
|
-
h ^= str.charCodeAt(i);
|
|
157
|
-
h = Math.imul(h, 0x5bd1e995);
|
|
158
|
-
h ^= h >>> 15;
|
|
159
|
-
}
|
|
160
|
-
return h >>> 0;
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// Pre-compute domain signatures
|
|
164
|
-
const domainSignatures = {};
|
|
165
|
-
for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
|
|
166
|
-
const sig = new Float32Array(EMBEDDING_DIMS);
|
|
167
|
-
for (const kw of keywords) {
|
|
168
|
-
for (let h = 0; h < 2; h++) {
|
|
169
|
-
const idx = hash(kw + '_dom_' + domain, h) % EMBEDDING_DIMS;
|
|
170
|
-
sig[idx] = 1;
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
domainSignatures[domain] = sig;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
function semanticHashEmbed(text, dims = EMBEDDING_DIMS) {
|
|
177
|
-
const vec = new Float32Array(dims);
|
|
178
|
-
const lowerText = text.toLowerCase();
|
|
179
|
-
const words = lowerText.replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 1);
|
|
180
|
-
|
|
181
|
-
if (words.length === 0) return vec;
|
|
182
|
-
|
|
183
|
-
// Add domain signatures
|
|
184
|
-
for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
|
|
185
|
-
let matchCount = 0;
|
|
186
|
-
for (const kw of keywords) {
|
|
187
|
-
if (lowerText.includes(kw)) matchCount++;
|
|
188
|
-
}
|
|
189
|
-
if (matchCount > 0) {
|
|
190
|
-
const weight = Math.min(2.0, 0.5 + matchCount * 0.3);
|
|
191
|
-
const sig = domainSignatures[domain];
|
|
192
|
-
for (let i = 0; i < dims; i++) {
|
|
193
|
-
vec[i] += sig[i] * weight;
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
// Add word features
|
|
199
|
-
for (const word of words) {
|
|
200
|
-
const isCommon = COMMON_WORDS.has(word);
|
|
201
|
-
const weight = isCommon ? 0.2 : (word.length > 6 ? 0.8 : 0.5);
|
|
202
|
-
for (let h = 0; h < 3; h++) {
|
|
203
|
-
const idx = hash(word, h * 17) % dims;
|
|
204
|
-
const sign = (hash(word, h * 31 + 1) % 2 === 0) ? 1 : -1;
|
|
205
|
-
vec[idx] += sign * weight;
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
// Add bigrams
|
|
210
|
-
for (let i = 0; i < words.length - 1; i++) {
|
|
211
|
-
if (COMMON_WORDS.has(words[i]) && COMMON_WORDS.has(words[i + 1])) continue;
|
|
212
|
-
const bigram = words[i] + '_' + words[i + 1];
|
|
213
|
-
const idx = hash(bigram, 42) % dims;
|
|
214
|
-
const sign = (hash(bigram, 43) % 2 === 0) ? 1 : -1;
|
|
215
|
-
vec[idx] += sign * 0.4;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
// Add trigrams
|
|
219
|
-
for (let i = 0; i < words.length - 2; i++) {
|
|
220
|
-
const trigram = words[i] + '_' + words[i + 1] + '_' + words[i + 2];
|
|
221
|
-
const idx = hash(trigram, 99) % dims;
|
|
222
|
-
const sign = (hash(trigram, 100) % 2 === 0) ? 1 : -1;
|
|
223
|
-
vec[idx] += sign * 0.3;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// L2 normalize
|
|
227
|
-
let norm = 0;
|
|
228
|
-
for (let i = 0; i < dims; i++) norm += vec[i] * vec[i];
|
|
229
|
-
norm = Math.sqrt(norm);
|
|
230
|
-
if (norm > 0) {
|
|
231
|
-
for (let i = 0; i < dims; i++) vec[i] /= norm;
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
return vec;
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
// ============================================================================
|
|
238
|
-
// Unified Embedding Generator (matches stored embeddings)
|
|
239
|
-
// ============================================================================
|
|
240
|
-
|
|
241
|
-
/**
|
|
242
|
-
* Generate query embedding using the SAME model as stored embeddings.
|
|
243
|
-
* Checks what model was used for stored entries and matches it.
|
|
244
|
-
*/
|
|
245
|
-
async function generateQueryEmbedding(queryText, db) {
|
|
246
|
-
// Check what model the stored entries use
|
|
247
|
-
let modelCheckSql = `SELECT embedding_model, COUNT(*) as cnt FROM memory_entries
|
|
248
|
-
WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
|
|
249
|
-
${namespace ? "AND namespace = ?" : ""}
|
|
250
|
-
GROUP BY embedding_model ORDER BY cnt DESC LIMIT 1`;
|
|
251
|
-
const modelStmt = db.prepare(modelCheckSql);
|
|
252
|
-
modelStmt.bind(namespace ? [namespace] : []);
|
|
253
|
-
const modelCheck = modelStmt.step() ? modelStmt.getAsObject() : null;
|
|
254
|
-
modelStmt.free();
|
|
255
|
-
|
|
256
|
-
const storedModel = modelCheck?.embedding_model || EMBEDDING_MODEL_HASH;
|
|
257
|
-
|
|
258
|
-
if (debug) console.error(`[semantic-search] Stored model: ${storedModel}`);
|
|
259
|
-
|
|
260
|
-
// If stored embeddings are neural, try to use neural for query too
|
|
261
|
-
// Accept both canonical name and legacy 'onnx' tag (both use the same Xenova pipeline)
|
|
262
|
-
if (storedModel === EMBEDDING_MODEL_NEURAL || storedModel === 'onnx') {
|
|
263
|
-
await loadTransformersModel();
|
|
264
|
-
if (useTransformers) {
|
|
265
|
-
const neuralEmb = await generateNeuralEmbedding(queryText);
|
|
266
|
-
if (neuralEmb && neuralEmb.length === EMBEDDING_DIMS) {
|
|
267
|
-
return { embedding: neuralEmb, model: EMBEDDING_MODEL_NEURAL };
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
// Neural failed — warn about model mismatch
|
|
271
|
-
if (!json) {
|
|
272
|
-
console.error('[semantic-search] WARNING: Stored embeddings use neural model but Transformers.js unavailable.');
|
|
273
|
-
console.error('[semantic-search] Results may be poor. Run: npx flo-embeddings --force');
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
// Use hash embeddings (either matching stored hash model, or as fallback)
|
|
278
|
-
const hashEmb = Array.from(semanticHashEmbed(queryText));
|
|
279
|
-
return { embedding: hashEmb, model: EMBEDDING_MODEL_HASH };
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
// ============================================================================
|
|
283
|
-
// Search Functions
|
|
284
|
-
// ============================================================================
|
|
285
|
-
|
|
286
|
-
function cosineSimilarity(a, b) {
|
|
287
|
-
if (!a || !b || a.length !== b.length) return 0;
|
|
288
|
-
let dot = 0;
|
|
289
|
-
for (let i = 0; i < a.length; i++) {
|
|
290
|
-
dot += a[i] * b[i];
|
|
291
|
-
}
|
|
292
|
-
return dot; // Already L2 normalized
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
async function getDb() {
|
|
296
|
-
if (!existsSync(DB_PATH)) {
|
|
297
|
-
throw new Error(`Database not found: ${DB_PATH}`);
|
|
298
|
-
}
|
|
299
|
-
const SQL = await initSqlJs();
|
|
300
|
-
const buffer = readFileSync(DB_PATH);
|
|
301
|
-
return new SQL.Database(buffer);
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
async function semanticSearch(queryText, options = {}) {
|
|
305
|
-
const { limit = 5, namespace = null, threshold = 0.3 } = options;
|
|
306
|
-
const startTime = performance.now();
|
|
307
|
-
|
|
308
|
-
const db = await getDb();
|
|
309
|
-
|
|
310
|
-
// Generate query embedding matching the stored model
|
|
311
|
-
const { embedding: queryEmbedding, model: queryModel } = await generateQueryEmbedding(queryText, db);
|
|
312
|
-
|
|
313
|
-
if (debug) console.error(`[semantic-search] Query model: ${queryModel}`);
|
|
314
|
-
|
|
315
|
-
// Get all entries with embeddings
|
|
316
|
-
let sql = `
|
|
317
|
-
SELECT id, key, namespace, content, embedding, embedding_model, metadata
|
|
318
|
-
FROM memory_entries
|
|
319
|
-
WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
|
|
320
|
-
`;
|
|
321
|
-
const params = [];
|
|
322
|
-
|
|
323
|
-
if (namespace) {
|
|
324
|
-
sql += ` AND namespace = ?`;
|
|
325
|
-
params.push(namespace);
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
const stmt = db.prepare(sql);
|
|
329
|
-
stmt.bind(params);
|
|
330
|
-
|
|
331
|
-
// Calculate similarity scores
|
|
332
|
-
const results = [];
|
|
333
|
-
while (stmt.step()) {
|
|
334
|
-
const entry = stmt.getAsObject();
|
|
335
|
-
try {
|
|
336
|
-
const storedIsNeural = NEURAL_ALIASES.has(entry.embedding_model);
|
|
337
|
-
const queryIsNeural = NEURAL_ALIASES.has(queryModel);
|
|
338
|
-
if (entry.embedding_model && entry.embedding_model !== queryModel && !(storedIsNeural && queryIsNeural)) continue;
|
|
339
|
-
|
|
340
|
-
const embedding = JSON.parse(entry.embedding);
|
|
341
|
-
if (!Array.isArray(embedding) || embedding.length !== EMBEDDING_DIMS) continue;
|
|
342
|
-
|
|
343
|
-
const similarity = cosineSimilarity(queryEmbedding, embedding);
|
|
344
|
-
|
|
345
|
-
if (similarity >= threshold) {
|
|
346
|
-
let metadata = {};
|
|
347
|
-
try {
|
|
348
|
-
metadata = JSON.parse(entry.metadata || '{}');
|
|
349
|
-
} catch {}
|
|
350
|
-
|
|
351
|
-
results.push({
|
|
352
|
-
key: entry.key,
|
|
353
|
-
namespace: entry.namespace,
|
|
354
|
-
score: similarity,
|
|
355
|
-
preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
|
|
356
|
-
type: metadata.type || 'unknown',
|
|
357
|
-
parentDoc: metadata.parentDoc || null,
|
|
358
|
-
chunkTitle: metadata.chunkTitle || null,
|
|
359
|
-
});
|
|
360
|
-
}
|
|
361
|
-
} catch {
|
|
362
|
-
// Skip entries with invalid embeddings
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
stmt.free();
|
|
366
|
-
|
|
367
|
-
db.close();
|
|
368
|
-
|
|
369
|
-
// Sort by similarity (descending) and limit
|
|
370
|
-
results.sort((a, b) => b.score - a.score);
|
|
371
|
-
const topResults = results.slice(0, limit);
|
|
372
|
-
|
|
373
|
-
const searchTime = performance.now() - startTime;
|
|
374
|
-
|
|
375
|
-
return {
|
|
376
|
-
query: queryText,
|
|
377
|
-
results: topResults,
|
|
378
|
-
totalMatches: results.length,
|
|
379
|
-
searchTime: `${searchTime.toFixed(0)}ms`,
|
|
380
|
-
indexType: 'vector-cosine',
|
|
381
|
-
model: queryModel,
|
|
382
|
-
};
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
// ============================================================================
|
|
386
|
-
// Main
|
|
387
|
-
// ============================================================================
|
|
388
|
-
|
|
389
|
-
async function main() {
|
|
390
|
-
if (!json) {
|
|
391
|
-
console.log('');
|
|
392
|
-
console.log(`[semantic-search] Query: "${query}"`);
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
try {
|
|
396
|
-
// --with-tests: search both the specified namespace (or code-map) and tests
|
|
397
|
-
// Auto-route: if query contains test keywords and no namespace specified, also search tests
|
|
398
|
-
const autoRouteTests = !namespace && TEST_KEYWORDS.test(query);
|
|
399
|
-
let results;
|
|
400
|
-
|
|
401
|
-
if (withTests || autoRouteTests) {
|
|
402
|
-
const primaryNs = namespace || 'code-map';
|
|
403
|
-
const primaryResults = await semanticSearch(query, { limit, namespace: primaryNs, threshold });
|
|
404
|
-
const testResults = await semanticSearch(query, { limit, namespace: 'tests', threshold });
|
|
405
|
-
|
|
406
|
-
// Merge and re-sort by score
|
|
407
|
-
const merged = [...primaryResults.results, ...testResults.results]
|
|
408
|
-
.sort((a, b) => b.score - a.score)
|
|
409
|
-
.slice(0, limit);
|
|
410
|
-
|
|
411
|
-
results = {
|
|
412
|
-
...primaryResults,
|
|
413
|
-
results: merged,
|
|
414
|
-
totalMatches: primaryResults.totalMatches + testResults.totalMatches,
|
|
415
|
-
searchTime: `${parseInt(primaryResults.searchTime) + parseInt(testResults.searchTime)}ms`,
|
|
416
|
-
namespaces: [primaryNs, 'tests'],
|
|
417
|
-
};
|
|
418
|
-
|
|
419
|
-
if (!json && autoRouteTests) {
|
|
420
|
-
console.log(`[semantic-search] Auto-routed to tests namespace (query contains test keywords)`);
|
|
421
|
-
}
|
|
422
|
-
} else {
|
|
423
|
-
results = await semanticSearch(query, { limit, namespace, threshold });
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
if (json) {
|
|
427
|
-
console.log(JSON.stringify(results, null, 2));
|
|
428
|
-
return;
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
console.log(`[semantic-search] Found ${results.totalMatches} matches (${results.searchTime}) [${results.model}]`);
|
|
432
|
-
console.log('');
|
|
433
|
-
|
|
434
|
-
if (results.results.length === 0) {
|
|
435
|
-
console.log('No results found above threshold. Try lowering --threshold or broadening your query.');
|
|
436
|
-
return;
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
// Display results
|
|
440
|
-
console.log('┌─────────────────────────────────────────────────────────────────────────────┐');
|
|
441
|
-
console.log('│ Rank │ Score │ Key │ Type │ Preview │');
|
|
442
|
-
console.log('├─────────────────────────────────────────────────────────────────────────────┤');
|
|
443
|
-
|
|
444
|
-
for (let i = 0; i < results.results.length; i++) {
|
|
445
|
-
const r = results.results[i];
|
|
446
|
-
const rank = String(i + 1).padStart(4);
|
|
447
|
-
const score = r.score.toFixed(3);
|
|
448
|
-
const key = r.key.substring(0, 28).padEnd(28);
|
|
449
|
-
const type = (r.type || '').substring(0, 6).padEnd(6);
|
|
450
|
-
const preview = r.preview.substring(0, 18).padEnd(18);
|
|
451
|
-
|
|
452
|
-
console.log(`│ ${rank} │ ${score} │ ${key} │ ${type} │ ${preview}… │`);
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
console.log('└─────────────────────────────────────────────────────────────────────────────┘');
|
|
456
|
-
|
|
457
|
-
// Show chunk context
|
|
458
|
-
console.log('');
|
|
459
|
-
console.log('Top result details:');
|
|
460
|
-
const top = results.results[0];
|
|
461
|
-
console.log(` Key: ${top.key}`);
|
|
462
|
-
console.log(` Score: ${top.score.toFixed(4)}`);
|
|
463
|
-
if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
|
|
464
|
-
if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
|
|
465
|
-
console.log(` Preview: ${top.preview}...`);
|
|
466
|
-
|
|
467
|
-
} catch (err) {
|
|
468
|
-
console.error(`[semantic-search] Error: ${err.message}`);
|
|
469
|
-
process.exit(1);
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
main();
|
|
2
|
+
/**
|
|
3
|
+
* Semantic search using 384-dim embeddings (Xenova/all-MiniLM-L6-v2 or hash fallback)
|
|
4
|
+
*
|
|
5
|
+
* Query embedding MUST match stored embedding model:
|
|
6
|
+
* 1. Transformers.js with all-MiniLM-L6-v2 (best quality, matches build-embeddings)
|
|
7
|
+
* 2. Domain-aware semantic hash embeddings (fallback when transformers unavailable)
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node node_modules/moflo/bin/semantic-search.mjs "your search query"
|
|
11
|
+
* npx flo-search "your search query"
|
|
12
|
+
* npx flo-search "query" --limit 10
|
|
13
|
+
* npx flo-search "query" --namespace guidance
|
|
14
|
+
* npx flo-search "query" --threshold 0.3
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { existsSync, readFileSync } from 'fs';
|
|
18
|
+
import { resolve, dirname } from 'path';
|
|
19
|
+
import { mofloResolveURL } from './lib/moflo-resolve.mjs';
|
|
20
|
+
const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
|
|
21
|
+
|
|
22
|
+
function findProjectRoot() {
|
|
23
|
+
let dir = process.cwd();
|
|
24
|
+
const root = resolve(dir, '/');
|
|
25
|
+
while (dir !== root) {
|
|
26
|
+
if (existsSync(resolve(dir, 'package.json'))) return dir;
|
|
27
|
+
dir = dirname(dir);
|
|
28
|
+
}
|
|
29
|
+
return process.cwd();
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const projectRoot = findProjectRoot();
|
|
33
|
+
|
|
34
|
+
const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
|
|
35
|
+
const EMBEDDING_DIMS = 384;
|
|
36
|
+
const EMBEDDING_MODEL_NEURAL = 'Xenova/all-MiniLM-L6-v2';
|
|
37
|
+
const EMBEDDING_MODEL_HASH = 'domain-aware-hash-v1';
|
|
38
|
+
// 'onnx' is a legacy alias for the Xenova model — treat them as compatible vector spaces
|
|
39
|
+
const NEURAL_ALIASES = new Set([EMBEDDING_MODEL_NEURAL, 'onnx']);
|
|
40
|
+
|
|
41
|
+
// Parse args
|
|
42
|
+
const args = process.argv.slice(2);
|
|
43
|
+
const query = args.find(a => !a.startsWith('--'));
|
|
44
|
+
const limit = args.includes('--limit') ? parseInt(args[args.indexOf('--limit') + 1]) : 5;
|
|
45
|
+
let namespace = args.includes('--namespace') ? args[args.indexOf('--namespace') + 1] : null;
|
|
46
|
+
const withTests = args.includes('--with-tests');
|
|
47
|
+
const threshold = args.includes('--threshold') ? parseFloat(args[args.indexOf('--threshold') + 1]) : 0.3;
|
|
48
|
+
const json = args.includes('--json');
|
|
49
|
+
const debug = args.includes('--debug');
|
|
50
|
+
|
|
51
|
+
// Auto-routing: when query mentions test-related terms, also search tests namespace
|
|
52
|
+
const TEST_KEYWORDS = /\b(test|spec|coverage|assert|mock|stub|fixture|describe|jest|vitest|mocha|e2e|integration test)\b/i;
|
|
53
|
+
|
|
54
|
+
if (!query) {
|
|
55
|
+
console.error('Usage: npx flo-search "your query" [--limit N] [--namespace X] [--threshold N]');
|
|
56
|
+
process.exit(1);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ============================================================================
|
|
60
|
+
// Transformers.js Neural Embeddings (primary — matches build-embeddings.mjs)
|
|
61
|
+
// ============================================================================
|
|
62
|
+
|
|
63
|
+
let pipeline = null;
|
|
64
|
+
let useTransformers = false;
|
|
65
|
+
|
|
66
|
+
async function loadTransformersModel() {
|
|
67
|
+
try {
|
|
68
|
+
const { env, pipeline: createPipeline } = await import(mofloResolveURL('@xenova/transformers'));
|
|
69
|
+
env.allowLocalModels = false;
|
|
70
|
+
env.backends.onnx.wasm.numThreads = 1;
|
|
71
|
+
|
|
72
|
+
pipeline = await createPipeline('feature-extraction', EMBEDDING_MODEL_NEURAL, {
|
|
73
|
+
quantized: false,
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
useTransformers = true;
|
|
77
|
+
if (debug) console.error('[semantic-search] Using Transformers.js neural model');
|
|
78
|
+
return true;
|
|
79
|
+
} catch (err) {
|
|
80
|
+
if (debug) console.error(`[semantic-search] Transformers.js unavailable: ${err.message?.split('\n')[0]}`);
|
|
81
|
+
useTransformers = false;
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async function generateNeuralEmbedding(text) {
|
|
87
|
+
if (!pipeline) return null;
|
|
88
|
+
try {
|
|
89
|
+
const output = await pipeline(text, { pooling: 'mean', normalize: true });
|
|
90
|
+
return Array.from(output.data);
|
|
91
|
+
} catch {
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// ============================================================================
|
|
97
|
+
// Domain-Aware Semantic Hash Embeddings (fallback)
|
|
98
|
+
// ============================================================================
|
|
99
|
+
|
|
100
|
+
const DOMAIN_CLUSTERS = {
|
|
101
|
+
database: ['typeorm', 'mongodb', 'database', 'entity', 'schema', 'table', 'collection',
|
|
102
|
+
'query', 'sql', 'nosql', 'orm', 'model', 'migration', 'repository', 'column',
|
|
103
|
+
'relation', 'foreign', 'primary', 'index', 'constraint', 'transaction',
|
|
104
|
+
'mikroorm', 'mikro', 'postgresql', 'postgres', 'soft', 'delete', 'deletedat'],
|
|
105
|
+
frontend: ['react', 'component', 'ui', 'styling', 'css', 'html', 'jsx', 'tsx', 'frontend',
|
|
106
|
+
'material', 'mui', 'tailwind', 'dom', 'render', 'hook', 'state', 'props',
|
|
107
|
+
'redux', 'context', 'styled', 'emotion', 'theme', 'layout', 'responsive',
|
|
108
|
+
'mantis', 'syncfusion', 'scheduler', 'i18n', 'intl', 'locale'],
|
|
109
|
+
backend: ['fastify', 'api', 'route', 'handler', 'rest', 'endpoint', 'server', 'controller',
|
|
110
|
+
'middleware', 'request', 'response', 'http', 'express', 'nest', 'graphql',
|
|
111
|
+
'websocket', 'socket', 'cors', 'auth', 'jwt', 'session', 'cookie',
|
|
112
|
+
'awilix', 'dependency', 'injection', 'scope'],
|
|
113
|
+
testing: ['test', 'testing', 'vitest', 'jest', 'mock', 'spy', 'assert', 'expect', 'describe',
|
|
114
|
+
'it', 'spec', 'unit', 'integration', 'e2e', 'playwright', 'cypress', 'coverage',
|
|
115
|
+
'fixture', 'stub', 'fake', 'snapshot', 'beforeeach', 'aftereach',
|
|
116
|
+
'anti-pattern', 'antipattern', 'mocking'],
|
|
117
|
+
tenancy: ['tenant', 'tenancy', 'companyid', 'company', 'isolation', 'multi', 'multitenant',
|
|
118
|
+
'organization', 'workspace', 'account', 'customer', 'client', 'subdomain'],
|
|
119
|
+
security: ['security', 'auth', 'authentication', 'authorization', 'permission', 'role',
|
|
120
|
+
'access', 'token', 'jwt', 'oauth', 'password', 'encrypt', 'hash', 'salt',
|
|
121
|
+
'csrf', 'xss', 'injection', 'sanitize', 'validate', 'rbac'],
|
|
122
|
+
patterns: ['pattern', 'service', 'factory', 'singleton', 'decorator', 'adapter', 'facade',
|
|
123
|
+
'observer', 'strategy', 'command', 'repository', 'usecase', 'domain', 'ddd',
|
|
124
|
+
'clean', 'architecture', 'solid', 'dry', 'kiss', 'functional', 'pipeasync'],
|
|
125
|
+
workflow: ['workflow', 'pipeline', 'ci', 'cd', 'deploy', 'build', 'actions',
|
|
126
|
+
'hook', 'trigger', 'job', 'step', 'artifact', 'release', 'version', 'tag'],
|
|
127
|
+
memory: ['memory', 'cache', 'store', 'persist', 'storage', 'redis', 'session', 'state',
|
|
128
|
+
'buffer', 'queue', 'stack', 'heap', 'gc', 'leak', 'embedding', 'vector', 'hnsw',
|
|
129
|
+
'semantic', 'search', 'index', 'retrieval'],
|
|
130
|
+
agent: ['agent', 'swarm', 'coordinator', 'orchestrator', 'task', 'worker', 'spawn',
|
|
131
|
+
'parallel', 'concurrent', 'async', 'promise', 'queue', 'priority', 'schedule'],
|
|
132
|
+
github: ['github', 'issue', 'branch', 'pr', 'pull', 'request', 'merge', 'commit', 'push',
|
|
133
|
+
'clone', 'fork', 'remote', 'origin', 'main', 'master', 'checkout', 'rebase',
|
|
134
|
+
'squash', 'repository', 'repo', 'gh', 'git', 'assignee', 'label', 'mandatory',
|
|
135
|
+
'checklist', 'closes', 'fixes', 'conventional', 'feat', 'refactor'],
|
|
136
|
+
documentation: ['guidance', 'documentation', 'docs', 'readme', 'guide', 'tutorial',
|
|
137
|
+
'reference', 'standard', 'convention', 'rule', 'policy', 'template',
|
|
138
|
+
'example', 'usage', 'instruction', 'meta', 'index', 'umbrella', 'claude',
|
|
139
|
+
'optimized', 'audience', 'structure', 'format', 'markdown']
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
const COMMON_WORDS = new Set([
|
|
143
|
+
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
|
|
144
|
+
'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
|
|
145
|
+
'can', 'need', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into',
|
|
146
|
+
'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'and', 'but',
|
|
147
|
+
'or', 'nor', 'so', 'yet', 'both', 'either', 'neither', 'not', 'only', 'own', 'same', 'than',
|
|
148
|
+
'too', 'very', 'just', 'also', 'this', 'that', 'these', 'those', 'it', 'its', 'if', 'then',
|
|
149
|
+
'else', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'any', 'some', 'no', 'yes',
|
|
150
|
+
'use', 'using', 'used', 'uses', 'get', 'set', 'new', 'see', 'like', 'make', 'made'
|
|
151
|
+
]);
|
|
152
|
+
|
|
153
|
+
function hash(str, seed = 0) {
|
|
154
|
+
let h = seed ^ str.length;
|
|
155
|
+
for (let i = 0; i < str.length; i++) {
|
|
156
|
+
h ^= str.charCodeAt(i);
|
|
157
|
+
h = Math.imul(h, 0x5bd1e995);
|
|
158
|
+
h ^= h >>> 15;
|
|
159
|
+
}
|
|
160
|
+
return h >>> 0;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Pre-compute domain signatures
|
|
164
|
+
const domainSignatures = {};
|
|
165
|
+
for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
|
|
166
|
+
const sig = new Float32Array(EMBEDDING_DIMS);
|
|
167
|
+
for (const kw of keywords) {
|
|
168
|
+
for (let h = 0; h < 2; h++) {
|
|
169
|
+
const idx = hash(kw + '_dom_' + domain, h) % EMBEDDING_DIMS;
|
|
170
|
+
sig[idx] = 1;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
domainSignatures[domain] = sig;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function semanticHashEmbed(text, dims = EMBEDDING_DIMS) {
|
|
177
|
+
const vec = new Float32Array(dims);
|
|
178
|
+
const lowerText = text.toLowerCase();
|
|
179
|
+
const words = lowerText.replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 1);
|
|
180
|
+
|
|
181
|
+
if (words.length === 0) return vec;
|
|
182
|
+
|
|
183
|
+
// Add domain signatures
|
|
184
|
+
for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
|
|
185
|
+
let matchCount = 0;
|
|
186
|
+
for (const kw of keywords) {
|
|
187
|
+
if (lowerText.includes(kw)) matchCount++;
|
|
188
|
+
}
|
|
189
|
+
if (matchCount > 0) {
|
|
190
|
+
const weight = Math.min(2.0, 0.5 + matchCount * 0.3);
|
|
191
|
+
const sig = domainSignatures[domain];
|
|
192
|
+
for (let i = 0; i < dims; i++) {
|
|
193
|
+
vec[i] += sig[i] * weight;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Add word features
|
|
199
|
+
for (const word of words) {
|
|
200
|
+
const isCommon = COMMON_WORDS.has(word);
|
|
201
|
+
const weight = isCommon ? 0.2 : (word.length > 6 ? 0.8 : 0.5);
|
|
202
|
+
for (let h = 0; h < 3; h++) {
|
|
203
|
+
const idx = hash(word, h * 17) % dims;
|
|
204
|
+
const sign = (hash(word, h * 31 + 1) % 2 === 0) ? 1 : -1;
|
|
205
|
+
vec[idx] += sign * weight;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Add bigrams
|
|
210
|
+
for (let i = 0; i < words.length - 1; i++) {
|
|
211
|
+
if (COMMON_WORDS.has(words[i]) && COMMON_WORDS.has(words[i + 1])) continue;
|
|
212
|
+
const bigram = words[i] + '_' + words[i + 1];
|
|
213
|
+
const idx = hash(bigram, 42) % dims;
|
|
214
|
+
const sign = (hash(bigram, 43) % 2 === 0) ? 1 : -1;
|
|
215
|
+
vec[idx] += sign * 0.4;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Add trigrams
|
|
219
|
+
for (let i = 0; i < words.length - 2; i++) {
|
|
220
|
+
const trigram = words[i] + '_' + words[i + 1] + '_' + words[i + 2];
|
|
221
|
+
const idx = hash(trigram, 99) % dims;
|
|
222
|
+
const sign = (hash(trigram, 100) % 2 === 0) ? 1 : -1;
|
|
223
|
+
vec[idx] += sign * 0.3;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// L2 normalize
|
|
227
|
+
let norm = 0;
|
|
228
|
+
for (let i = 0; i < dims; i++) norm += vec[i] * vec[i];
|
|
229
|
+
norm = Math.sqrt(norm);
|
|
230
|
+
if (norm > 0) {
|
|
231
|
+
for (let i = 0; i < dims; i++) vec[i] /= norm;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
return vec;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// ============================================================================
|
|
238
|
+
// Unified Embedding Generator (matches stored embeddings)
|
|
239
|
+
// ============================================================================
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Generate query embedding using the SAME model as stored embeddings.
|
|
243
|
+
* Checks what model was used for stored entries and matches it.
|
|
244
|
+
*/
|
|
245
|
+
async function generateQueryEmbedding(queryText, db) {
|
|
246
|
+
// Check what model the stored entries use
|
|
247
|
+
let modelCheckSql = `SELECT embedding_model, COUNT(*) as cnt FROM memory_entries
|
|
248
|
+
WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
|
|
249
|
+
${namespace ? "AND namespace = ?" : ""}
|
|
250
|
+
GROUP BY embedding_model ORDER BY cnt DESC LIMIT 1`;
|
|
251
|
+
const modelStmt = db.prepare(modelCheckSql);
|
|
252
|
+
modelStmt.bind(namespace ? [namespace] : []);
|
|
253
|
+
const modelCheck = modelStmt.step() ? modelStmt.getAsObject() : null;
|
|
254
|
+
modelStmt.free();
|
|
255
|
+
|
|
256
|
+
const storedModel = modelCheck?.embedding_model || EMBEDDING_MODEL_HASH;
|
|
257
|
+
|
|
258
|
+
if (debug) console.error(`[semantic-search] Stored model: ${storedModel}`);
|
|
259
|
+
|
|
260
|
+
// If stored embeddings are neural, try to use neural for query too
|
|
261
|
+
// Accept both canonical name and legacy 'onnx' tag (both use the same Xenova pipeline)
|
|
262
|
+
if (storedModel === EMBEDDING_MODEL_NEURAL || storedModel === 'onnx') {
|
|
263
|
+
await loadTransformersModel();
|
|
264
|
+
if (useTransformers) {
|
|
265
|
+
const neuralEmb = await generateNeuralEmbedding(queryText);
|
|
266
|
+
if (neuralEmb && neuralEmb.length === EMBEDDING_DIMS) {
|
|
267
|
+
return { embedding: neuralEmb, model: EMBEDDING_MODEL_NEURAL };
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
// Neural failed — warn about model mismatch
|
|
271
|
+
if (!json) {
|
|
272
|
+
console.error('[semantic-search] WARNING: Stored embeddings use neural model but Transformers.js unavailable.');
|
|
273
|
+
console.error('[semantic-search] Results may be poor. Run: npx flo-embeddings --force');
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Use hash embeddings (either matching stored hash model, or as fallback)
|
|
278
|
+
const hashEmb = Array.from(semanticHashEmbed(queryText));
|
|
279
|
+
return { embedding: hashEmb, model: EMBEDDING_MODEL_HASH };
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// ============================================================================
|
|
283
|
+
// Search Functions
|
|
284
|
+
// ============================================================================
|
|
285
|
+
|
|
286
|
+
function cosineSimilarity(a, b) {
|
|
287
|
+
if (!a || !b || a.length !== b.length) return 0;
|
|
288
|
+
let dot = 0;
|
|
289
|
+
for (let i = 0; i < a.length; i++) {
|
|
290
|
+
dot += a[i] * b[i];
|
|
291
|
+
}
|
|
292
|
+
return dot; // Already L2 normalized
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
async function getDb() {
|
|
296
|
+
if (!existsSync(DB_PATH)) {
|
|
297
|
+
throw new Error(`Database not found: ${DB_PATH}`);
|
|
298
|
+
}
|
|
299
|
+
const SQL = await initSqlJs();
|
|
300
|
+
const buffer = readFileSync(DB_PATH);
|
|
301
|
+
return new SQL.Database(buffer);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
async function semanticSearch(queryText, options = {}) {
|
|
305
|
+
const { limit = 5, namespace = null, threshold = 0.3 } = options;
|
|
306
|
+
const startTime = performance.now();
|
|
307
|
+
|
|
308
|
+
const db = await getDb();
|
|
309
|
+
|
|
310
|
+
// Generate query embedding matching the stored model
|
|
311
|
+
const { embedding: queryEmbedding, model: queryModel } = await generateQueryEmbedding(queryText, db);
|
|
312
|
+
|
|
313
|
+
if (debug) console.error(`[semantic-search] Query model: ${queryModel}`);
|
|
314
|
+
|
|
315
|
+
// Get all entries with embeddings
|
|
316
|
+
let sql = `
|
|
317
|
+
SELECT id, key, namespace, content, embedding, embedding_model, metadata
|
|
318
|
+
FROM memory_entries
|
|
319
|
+
WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''
|
|
320
|
+
`;
|
|
321
|
+
const params = [];
|
|
322
|
+
|
|
323
|
+
if (namespace) {
|
|
324
|
+
sql += ` AND namespace = ?`;
|
|
325
|
+
params.push(namespace);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const stmt = db.prepare(sql);
|
|
329
|
+
stmt.bind(params);
|
|
330
|
+
|
|
331
|
+
// Calculate similarity scores
|
|
332
|
+
const results = [];
|
|
333
|
+
while (stmt.step()) {
|
|
334
|
+
const entry = stmt.getAsObject();
|
|
335
|
+
try {
|
|
336
|
+
const storedIsNeural = NEURAL_ALIASES.has(entry.embedding_model);
|
|
337
|
+
const queryIsNeural = NEURAL_ALIASES.has(queryModel);
|
|
338
|
+
if (entry.embedding_model && entry.embedding_model !== queryModel && !(storedIsNeural && queryIsNeural)) continue;
|
|
339
|
+
|
|
340
|
+
const embedding = JSON.parse(entry.embedding);
|
|
341
|
+
if (!Array.isArray(embedding) || embedding.length !== EMBEDDING_DIMS) continue;
|
|
342
|
+
|
|
343
|
+
const similarity = cosineSimilarity(queryEmbedding, embedding);
|
|
344
|
+
|
|
345
|
+
if (similarity >= threshold) {
|
|
346
|
+
let metadata = {};
|
|
347
|
+
try {
|
|
348
|
+
metadata = JSON.parse(entry.metadata || '{}');
|
|
349
|
+
} catch {}
|
|
350
|
+
|
|
351
|
+
results.push({
|
|
352
|
+
key: entry.key,
|
|
353
|
+
namespace: entry.namespace,
|
|
354
|
+
score: similarity,
|
|
355
|
+
preview: entry.content.substring(0, 150).replace(/\n/g, ' '),
|
|
356
|
+
type: metadata.type || 'unknown',
|
|
357
|
+
parentDoc: metadata.parentDoc || null,
|
|
358
|
+
chunkTitle: metadata.chunkTitle || null,
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
} catch {
|
|
362
|
+
// Skip entries with invalid embeddings
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
stmt.free();
|
|
366
|
+
|
|
367
|
+
db.close();
|
|
368
|
+
|
|
369
|
+
// Sort by similarity (descending) and limit
|
|
370
|
+
results.sort((a, b) => b.score - a.score);
|
|
371
|
+
const topResults = results.slice(0, limit);
|
|
372
|
+
|
|
373
|
+
const searchTime = performance.now() - startTime;
|
|
374
|
+
|
|
375
|
+
return {
|
|
376
|
+
query: queryText,
|
|
377
|
+
results: topResults,
|
|
378
|
+
totalMatches: results.length,
|
|
379
|
+
searchTime: `${searchTime.toFixed(0)}ms`,
|
|
380
|
+
indexType: 'vector-cosine',
|
|
381
|
+
model: queryModel,
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// ============================================================================
|
|
386
|
+
// Main
|
|
387
|
+
// ============================================================================
|
|
388
|
+
|
|
389
|
+
async function main() {
|
|
390
|
+
if (!json) {
|
|
391
|
+
console.log('');
|
|
392
|
+
console.log(`[semantic-search] Query: "${query}"`);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
try {
|
|
396
|
+
// --with-tests: search both the specified namespace (or code-map) and tests
|
|
397
|
+
// Auto-route: if query contains test keywords and no namespace specified, also search tests
|
|
398
|
+
const autoRouteTests = !namespace && TEST_KEYWORDS.test(query);
|
|
399
|
+
let results;
|
|
400
|
+
|
|
401
|
+
if (withTests || autoRouteTests) {
|
|
402
|
+
const primaryNs = namespace || 'code-map';
|
|
403
|
+
const primaryResults = await semanticSearch(query, { limit, namespace: primaryNs, threshold });
|
|
404
|
+
const testResults = await semanticSearch(query, { limit, namespace: 'tests', threshold });
|
|
405
|
+
|
|
406
|
+
// Merge and re-sort by score
|
|
407
|
+
const merged = [...primaryResults.results, ...testResults.results]
|
|
408
|
+
.sort((a, b) => b.score - a.score)
|
|
409
|
+
.slice(0, limit);
|
|
410
|
+
|
|
411
|
+
results = {
|
|
412
|
+
...primaryResults,
|
|
413
|
+
results: merged,
|
|
414
|
+
totalMatches: primaryResults.totalMatches + testResults.totalMatches,
|
|
415
|
+
searchTime: `${parseInt(primaryResults.searchTime) + parseInt(testResults.searchTime)}ms`,
|
|
416
|
+
namespaces: [primaryNs, 'tests'],
|
|
417
|
+
};
|
|
418
|
+
|
|
419
|
+
if (!json && autoRouteTests) {
|
|
420
|
+
console.log(`[semantic-search] Auto-routed to tests namespace (query contains test keywords)`);
|
|
421
|
+
}
|
|
422
|
+
} else {
|
|
423
|
+
results = await semanticSearch(query, { limit, namespace, threshold });
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
if (json) {
|
|
427
|
+
console.log(JSON.stringify(results, null, 2));
|
|
428
|
+
return;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
console.log(`[semantic-search] Found ${results.totalMatches} matches (${results.searchTime}) [${results.model}]`);
|
|
432
|
+
console.log('');
|
|
433
|
+
|
|
434
|
+
if (results.results.length === 0) {
|
|
435
|
+
console.log('No results found above threshold. Try lowering --threshold or broadening your query.');
|
|
436
|
+
return;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Display results
|
|
440
|
+
console.log('┌─────────────────────────────────────────────────────────────────────────────┐');
|
|
441
|
+
console.log('│ Rank │ Score │ Key │ Type │ Preview │');
|
|
442
|
+
console.log('├─────────────────────────────────────────────────────────────────────────────┤');
|
|
443
|
+
|
|
444
|
+
for (let i = 0; i < results.results.length; i++) {
|
|
445
|
+
const r = results.results[i];
|
|
446
|
+
const rank = String(i + 1).padStart(4);
|
|
447
|
+
const score = r.score.toFixed(3);
|
|
448
|
+
const key = r.key.substring(0, 28).padEnd(28);
|
|
449
|
+
const type = (r.type || '').substring(0, 6).padEnd(6);
|
|
450
|
+
const preview = r.preview.substring(0, 18).padEnd(18);
|
|
451
|
+
|
|
452
|
+
console.log(`│ ${rank} │ ${score} │ ${key} │ ${type} │ ${preview}… │`);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
console.log('└─────────────────────────────────────────────────────────────────────────────┘');
|
|
456
|
+
|
|
457
|
+
// Show chunk context
|
|
458
|
+
console.log('');
|
|
459
|
+
console.log('Top result details:');
|
|
460
|
+
const top = results.results[0];
|
|
461
|
+
console.log(` Key: ${top.key}`);
|
|
462
|
+
console.log(` Score: ${top.score.toFixed(4)}`);
|
|
463
|
+
if (top.chunkTitle) console.log(` Section: ${top.chunkTitle}`);
|
|
464
|
+
if (top.parentDoc) console.log(` Parent: ${top.parentDoc}`);
|
|
465
|
+
console.log(` Preview: ${top.preview}...`);
|
|
466
|
+
|
|
467
|
+
} catch (err) {
|
|
468
|
+
console.error(`[semantic-search] Error: ${err.message}`);
|
|
469
|
+
process.exit(1);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
main();
|