moflo 4.8.12 → 4.8.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.json +1 -1
- package/.claude/workflow-state.json +3 -3
- package/package.json +1 -3
- package/src/@claude-flow/cli/dist/src/init/claudemd-generator.js +47 -46
- package/src/@claude-flow/cli/package.json +106 -106
- package/.claude/agents/MIGRATION_SUMMARY.md +0 -222
- package/.claude/agents/analysis/code-review/analyze-code-quality.md +0 -179
- package/.claude/agents/development/backend/dev-backend-api.md +0 -142
- package/.claude/agents/flow-nexus/app-store.md +0 -88
- package/.claude/agents/flow-nexus/authentication.md +0 -69
- package/.claude/agents/flow-nexus/challenges.md +0 -81
- package/.claude/agents/flow-nexus/neural-network.md +0 -88
- package/.claude/agents/flow-nexus/payments.md +0 -83
- package/.claude/agents/flow-nexus/sandbox.md +0 -76
- package/.claude/agents/flow-nexus/swarm.md +0 -76
- package/.claude/agents/flow-nexus/user-tools.md +0 -96
- package/.claude/agents/flow-nexus/workflow.md +0 -84
- package/.claude/agents/payments/agentic-payments.md +0 -126
- package/.claude/agents/sona/sona-learning-optimizer.md +0 -74
- package/.claude/agents/sublinear/consensus-coordinator.md +0 -338
- package/.claude/agents/sublinear/matrix-optimizer.md +0 -185
- package/.claude/agents/sublinear/pagerank-analyzer.md +0 -299
- package/.claude/agents/sublinear/performance-optimizer.md +0 -368
- package/.claude/agents/sublinear/trading-predictor.md +0 -246
- package/.claude/agents/testing/unit/tdd-london-swarm.md +0 -244
- package/.claude/agents/testing/validation/production-validator.md +0 -395
- package/.claude/agents/v3/database-specialist.yaml +0 -21
- package/.claude/agents/v3/index.yaml +0 -17
- package/.claude/agents/v3/project-coordinator.yaml +0 -15
- package/.claude/agents/v3/python-specialist.yaml +0 -21
- package/.claude/agents/v3/test-architect.yaml +0 -20
- package/.claude/agents/v3/typescript-specialist.yaml +0 -21
- package/.claude/agents/v3/v3-integration-architect.md +0 -346
- package/.claude/agents/v3/v3-memory-specialist.md +0 -318
- package/.claude/agents/v3/v3-performance-engineer.md +0 -397
- package/.claude/agents/v3/v3-queen-coordinator.md +0 -98
- package/.claude/agents/v3/v3-security-architect.md +0 -174
- package/.claude/commands/analysis/COMMAND_COMPLIANCE_REPORT.md +0 -54
- package/.claude/commands/analysis/README.md +0 -9
- package/.claude/commands/analysis/bottleneck-detect.md +0 -162
- package/.claude/commands/analysis/performance-bottlenecks.md +0 -59
- package/.claude/commands/analysis/performance-report.md +0 -25
- package/.claude/commands/analysis/token-efficiency.md +0 -45
- package/.claude/commands/analysis/token-usage.md +0 -25
- package/.claude/commands/automation/README.md +0 -9
- package/.claude/commands/automation/auto-agent.md +0 -122
- package/.claude/commands/automation/self-healing.md +0 -106
- package/.claude/commands/automation/session-memory.md +0 -90
- package/.claude/commands/automation/smart-agents.md +0 -73
- package/.claude/commands/automation/smart-spawn.md +0 -25
- package/.claude/commands/automation/workflow-select.md +0 -25
- package/.claude/commands/coordination/README.md +0 -9
- package/.claude/commands/coordination/agent-spawn.md +0 -25
- package/.claude/commands/coordination/init.md +0 -44
- package/.claude/commands/coordination/orchestrate.md +0 -43
- package/.claude/commands/coordination/spawn.md +0 -45
- package/.claude/commands/coordination/swarm-init.md +0 -85
- package/.claude/commands/coordination/task-orchestrate.md +0 -25
- package/.claude/commands/flow-nexus/app-store.md +0 -124
- package/.claude/commands/flow-nexus/challenges.md +0 -120
- package/.claude/commands/flow-nexus/login-registration.md +0 -65
- package/.claude/commands/flow-nexus/neural-network.md +0 -134
- package/.claude/commands/flow-nexus/payments.md +0 -116
- package/.claude/commands/flow-nexus/sandbox.md +0 -83
- package/.claude/commands/flow-nexus/swarm.md +0 -87
- package/.claude/commands/flow-nexus/user-tools.md +0 -152
- package/.claude/commands/flow-nexus/workflow.md +0 -115
- package/.claude/commands/monitoring/README.md +0 -9
- package/.claude/commands/monitoring/agent-metrics.md +0 -25
- package/.claude/commands/monitoring/agents.md +0 -44
- package/.claude/commands/monitoring/real-time-view.md +0 -25
- package/.claude/commands/monitoring/status.md +0 -46
- package/.claude/commands/monitoring/swarm-monitor.md +0 -25
- package/.claude/commands/optimization/README.md +0 -9
- package/.claude/commands/optimization/auto-topology.md +0 -62
- package/.claude/commands/optimization/cache-manage.md +0 -25
- package/.claude/commands/optimization/parallel-execute.md +0 -25
- package/.claude/commands/optimization/parallel-execution.md +0 -50
- package/.claude/commands/optimization/topology-optimize.md +0 -25
- package/.claude/commands/pair/README.md +0 -261
- package/.claude/commands/pair/commands.md +0 -546
- package/.claude/commands/pair/config.md +0 -510
- package/.claude/commands/pair/examples.md +0 -512
- package/.claude/commands/pair/modes.md +0 -348
- package/.claude/commands/pair/session.md +0 -407
- package/.claude/commands/pair/start.md +0 -209
- package/.claude/commands/stream-chain/pipeline.md +0 -121
- package/.claude/commands/stream-chain/run.md +0 -70
- package/.claude/commands/training/README.md +0 -9
- package/.claude/commands/training/model-update.md +0 -25
- package/.claude/commands/training/neural-patterns.md +0 -74
- package/.claude/commands/training/neural-train.md +0 -25
- package/.claude/commands/training/pattern-learn.md +0 -25
- package/.claude/commands/training/specialization.md +0 -63
- package/.claude/commands/truth/start.md +0 -143
- package/.claude/commands/verify/check.md +0 -50
- package/.claude/commands/verify/start.md +0 -128
- package/.claude/helpers/gate-hook.mjs +0 -50
- package/.claude/helpers/gate.cjs +0 -138
- package/.claude/helpers/hook-handler.cjs +0 -83
- package/.claude/helpers/prompt-hook.mjs +0 -72
- package/.claude/scripts/build-embeddings.mjs +0 -549
- package/.claude/scripts/generate-code-map.mjs +0 -697
- package/.claude/scripts/hooks.mjs +0 -656
- package/.claude/scripts/index-guidance.mjs +0 -893
- package/.claude/scripts/index-tests.mjs +0 -710
- package/.claude/scripts/semantic-search.mjs +0 -473
- package/.claude/scripts/session-start-launcher.mjs +0 -226
- package/.claude/skills/agentic-jujutsu/SKILL.md +0 -645
- package/.claude/skills/dual-mode/README.md +0 -71
- package/.claude/skills/dual-mode/dual-collect.md +0 -103
- package/.claude/skills/dual-mode/dual-coordinate.md +0 -85
- package/.claude/skills/dual-mode/dual-spawn.md +0 -81
- package/.claude/skills/flow-nexus-neural/SKILL.md +0 -738
- package/.claude/skills/flow-nexus-platform/SKILL.md +0 -1157
- package/.claude/skills/flow-nexus-swarm/SKILL.md +0 -610
- package/.claude/skills/pair-programming/SKILL.md +0 -1202
- package/.claude/skills/stream-chain/SKILL.md +0 -563
- package/.claude/skills/v3-cli-modernization/SKILL.md +0 -872
- package/.claude/skills/v3-core-implementation/SKILL.md +0 -797
- package/.claude/skills/v3-ddd-architecture/SKILL.md +0 -442
- package/.claude/skills/v3-integration-deep/SKILL.md +0 -241
- package/.claude/skills/v3-mcp-optimization/SKILL.md +0 -777
- package/.claude/skills/v3-memory-unification/SKILL.md +0 -174
- package/.claude/skills/v3-performance-optimization/SKILL.md +0 -390
- package/.claude/skills/v3-security-overhaul/SKILL.md +0 -82
- package/.claude/skills/v3-swarm-coordination/SKILL.md +0 -340
- package/.claude-plugin/README.md +0 -720
- package/.claude-plugin/docs/INSTALLATION.md +0 -261
- package/.claude-plugin/docs/PLUGIN_SUMMARY.md +0 -361
- package/.claude-plugin/docs/QUICKSTART.md +0 -361
- package/.claude-plugin/docs/STRUCTURE.md +0 -128
- package/.claude-plugin/hooks/hooks.json +0 -74
- package/.claude-plugin/marketplace.json +0 -96
- package/.claude-plugin/plugin.json +0 -71
- package/.claude-plugin/scripts/install.sh +0 -234
- package/.claude-plugin/scripts/uninstall.sh +0 -36
- package/.claude-plugin/scripts/verify.sh +0 -108
|
@@ -1,549 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Generate embeddings for all memory entries and build HNSW index
|
|
4
|
-
*
|
|
5
|
-
* Embedding Strategy (in order of preference):
|
|
6
|
-
* 1. Transformers.js with all-MiniLM-L6-v2 (best quality, requires sharp)
|
|
7
|
-
* 2. Domain-aware semantic hash embeddings (fast, good quality, no deps)
|
|
8
|
-
*
|
|
9
|
-
* The domain-aware hash embeddings use:
|
|
10
|
-
* - Domain clustering for semantic grouping (database, frontend, backend, testing, etc.)
|
|
11
|
-
* - SimHash-style word encoding with multiple hash positions
|
|
12
|
-
* - N-gram features (bigrams, trigrams) for phrase detection
|
|
13
|
-
* - L2 normalization for cosine similarity
|
|
14
|
-
*
|
|
15
|
-
* Usage:
|
|
16
|
-
* node node_modules/moflo/bin/build-embeddings.mjs # Embed entries without embeddings
|
|
17
|
-
* npx flo-embeddings --force # Re-embed all entries
|
|
18
|
-
* npx flo-embeddings --namespace guidance # Only specific namespace
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
22
|
-
import { resolve, dirname } from 'path';
|
|
23
|
-
import { mofloResolveURL } from './lib/moflo-resolve.mjs';
|
|
24
|
-
const initSqlJs = (await import(mofloResolveURL('sql.js'))).default;
|
|
25
|
-
|
|
26
|
-
function findProjectRoot() {
|
|
27
|
-
let dir = process.cwd();
|
|
28
|
-
const root = resolve(dir, '/');
|
|
29
|
-
while (dir !== root) {
|
|
30
|
-
if (existsSync(resolve(dir, 'package.json'))) return dir;
|
|
31
|
-
dir = dirname(dir);
|
|
32
|
-
}
|
|
33
|
-
return process.cwd();
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
const projectRoot = findProjectRoot();
|
|
37
|
-
|
|
38
|
-
const DB_PATH = resolve(projectRoot, '.swarm/memory.db');
|
|
39
|
-
|
|
40
|
-
// Embedding config
|
|
41
|
-
const EMBEDDING_MODEL_NEURAL = 'Xenova/all-MiniLM-L6-v2';
|
|
42
|
-
const EMBEDDING_MODEL_HASH = 'domain-aware-hash-v1';
|
|
43
|
-
const EMBEDDING_DIMS = 384;
|
|
44
|
-
const BATCH_SIZE = 100;
|
|
45
|
-
|
|
46
|
-
// Parse args
|
|
47
|
-
const args = process.argv.slice(2);
|
|
48
|
-
const force = args.includes('--force');
|
|
49
|
-
const namespaceFilter = args.includes('--namespace')
|
|
50
|
-
? args[args.indexOf('--namespace') + 1]
|
|
51
|
-
: null;
|
|
52
|
-
const verbose = args.includes('--verbose') || args.includes('-v');
|
|
53
|
-
|
|
54
|
-
let pipeline = null;
|
|
55
|
-
let useTransformers = false;
|
|
56
|
-
let embeddingModel = EMBEDDING_MODEL_HASH;
|
|
57
|
-
|
|
58
|
-
function log(msg) {
|
|
59
|
-
console.log(`[build-embeddings] ${msg}`);
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
function debug(msg) {
|
|
63
|
-
if (verbose) console.log(`[build-embeddings] ${msg}`);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// ============================================================================
|
|
67
|
-
// Domain-Aware Semantic Hash Embeddings
|
|
68
|
-
// ============================================================================
|
|
69
|
-
|
|
70
|
-
// Domain clusters for semantic grouping
|
|
71
|
-
const DOMAIN_CLUSTERS = {
|
|
72
|
-
database: ['typeorm', 'mongodb', 'database', 'entity', 'schema', 'table', 'collection',
|
|
73
|
-
'query', 'sql', 'nosql', 'orm', 'model', 'migration', 'repository', 'column',
|
|
74
|
-
'relation', 'foreign', 'primary', 'index', 'constraint', 'transaction'],
|
|
75
|
-
frontend: ['react', 'component', 'ui', 'styling', 'css', 'html', 'jsx', 'tsx', 'frontend',
|
|
76
|
-
'material', 'mui', 'tailwind', 'dom', 'render', 'hook', 'state', 'props',
|
|
77
|
-
'redux', 'context', 'styled', 'emotion', 'theme', 'layout', 'responsive'],
|
|
78
|
-
backend: ['fastify', 'api', 'route', 'handler', 'rest', 'endpoint', 'server', 'controller',
|
|
79
|
-
'middleware', 'request', 'response', 'http', 'express', 'nest', 'graphql',
|
|
80
|
-
'websocket', 'socket', 'cors', 'auth', 'jwt', 'session', 'cookie'],
|
|
81
|
-
testing: ['test', 'testing', 'vitest', 'jest', 'mock', 'spy', 'assert', 'expect', 'describe',
|
|
82
|
-
'it', 'spec', 'unit', 'integration', 'e2e', 'playwright', 'cypress', 'coverage',
|
|
83
|
-
'fixture', 'stub', 'fake', 'snapshot', 'beforeeach', 'aftereach'],
|
|
84
|
-
tenancy: ['tenant', 'tenancy', 'companyid', 'company', 'isolation', 'multi', 'multitenant',
|
|
85
|
-
'organization', 'workspace', 'account', 'customer', 'client'],
|
|
86
|
-
security: ['security', 'auth', 'authentication', 'authorization', 'permission', 'role',
|
|
87
|
-
'access', 'token', 'jwt', 'oauth', 'password', 'encrypt', 'hash', 'salt',
|
|
88
|
-
'csrf', 'xss', 'injection', 'sanitize', 'validate'],
|
|
89
|
-
patterns: ['pattern', 'service', 'factory', 'singleton', 'decorator', 'adapter', 'facade',
|
|
90
|
-
'observer', 'strategy', 'command', 'repository', 'usecase', 'domain', 'ddd',
|
|
91
|
-
'clean', 'architecture', 'solid', 'dry', 'kiss'],
|
|
92
|
-
workflow: ['workflow', 'pipeline', 'ci', 'cd', 'deploy', 'build', 'actions',
|
|
93
|
-
'hook', 'trigger', 'job', 'step', 'artifact', 'release', 'version', 'tag'],
|
|
94
|
-
memory: ['memory', 'cache', 'store', 'persist', 'storage', 'redis', 'session', 'state',
|
|
95
|
-
'buffer', 'queue', 'stack', 'heap', 'gc', 'leak', 'embedding', 'vector', 'hnsw',
|
|
96
|
-
'semantic', 'search', 'index', 'retrieval'],
|
|
97
|
-
agent: ['agent', 'swarm', 'coordinator', 'orchestrator', 'task', 'worker', 'spawn',
|
|
98
|
-
'parallel', 'concurrent', 'async', 'promise', 'queue', 'priority', 'schedule'],
|
|
99
|
-
github: ['github', 'issue', 'branch', 'pr', 'pull', 'request', 'merge', 'commit', 'push',
|
|
100
|
-
'clone', 'fork', 'remote', 'origin', 'main', 'master', 'checkout', 'rebase',
|
|
101
|
-
'squash', 'repository', 'repo', 'gh', 'git', 'assignee', 'label', 'mandatory',
|
|
102
|
-
'checklist', 'closes', 'fixes', 'conventional', 'feat', 'refactor'],
|
|
103
|
-
documentation: ['guidance', 'documentation', 'docs', 'readme', 'guide', 'tutorial',
|
|
104
|
-
'reference', 'standard', 'convention', 'rule', 'policy', 'template',
|
|
105
|
-
'example', 'usage', 'instruction', 'meta', 'index', 'umbrella', 'claude',
|
|
106
|
-
'optimized', 'audience', 'structure', 'format', 'markdown']
|
|
107
|
-
};
|
|
108
|
-
|
|
109
|
-
// Common words to downweight
|
|
110
|
-
const COMMON_WORDS = new Set([
|
|
111
|
-
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
|
|
112
|
-
'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall',
|
|
113
|
-
'can', 'need', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into',
|
|
114
|
-
'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'and', 'but',
|
|
115
|
-
'or', 'nor', 'so', 'yet', 'both', 'either', 'neither', 'not', 'only', 'own', 'same', 'than',
|
|
116
|
-
'too', 'very', 'just', 'also', 'this', 'that', 'these', 'those', 'it', 'its', 'if', 'then',
|
|
117
|
-
'else', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'any', 'some', 'no', 'yes',
|
|
118
|
-
'use', 'using', 'used', 'uses', 'get', 'set', 'new', 'see', 'like', 'make', 'made'
|
|
119
|
-
]);
|
|
120
|
-
|
|
121
|
-
// MurmurHash3-inspired hash function for better distribution
|
|
122
|
-
function hash(str, seed = 0) {
|
|
123
|
-
let h = seed ^ str.length;
|
|
124
|
-
for (let i = 0; i < str.length; i++) {
|
|
125
|
-
h ^= str.charCodeAt(i);
|
|
126
|
-
h = Math.imul(h, 0x5bd1e995);
|
|
127
|
-
h ^= h >>> 15;
|
|
128
|
-
}
|
|
129
|
-
return h >>> 0;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
// Pre-compute domain signature vectors
|
|
133
|
-
const domainSignatures = {};
|
|
134
|
-
for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
|
|
135
|
-
const sig = new Float32Array(EMBEDDING_DIMS);
|
|
136
|
-
for (const kw of keywords) {
|
|
137
|
-
// Use multiple positions per keyword for robustness
|
|
138
|
-
for (let h = 0; h < 2; h++) {
|
|
139
|
-
const idx = hash(kw + '_dom_' + domain, h) % EMBEDDING_DIMS;
|
|
140
|
-
sig[idx] = 1;
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
domainSignatures[domain] = sig;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
/**
|
|
147
|
-
* Generate domain-aware semantic hash embedding
|
|
148
|
-
* @param {string} text - Text to embed
|
|
149
|
-
* @param {number} dims - Embedding dimensions
|
|
150
|
-
* @returns {Float32Array} - Normalized embedding vector
|
|
151
|
-
*/
|
|
152
|
-
function semanticHashEmbed(text, dims = EMBEDDING_DIMS) {
|
|
153
|
-
const vec = new Float32Array(dims);
|
|
154
|
-
const lowerText = text.toLowerCase();
|
|
155
|
-
const words = lowerText.replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 1);
|
|
156
|
-
|
|
157
|
-
if (words.length === 0) {
|
|
158
|
-
// Empty text - return zero vector (will have low similarity to everything)
|
|
159
|
-
return vec;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// 1. Add domain signatures for matched domains
|
|
163
|
-
for (const [domain, keywords] of Object.entries(DOMAIN_CLUSTERS)) {
|
|
164
|
-
let matchCount = 0;
|
|
165
|
-
for (const kw of keywords) {
|
|
166
|
-
if (lowerText.includes(kw)) {
|
|
167
|
-
matchCount++;
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
if (matchCount > 0) {
|
|
171
|
-
const weight = Math.min(2.0, 0.5 + matchCount * 0.3); // More matches = stronger signal
|
|
172
|
-
const sig = domainSignatures[domain];
|
|
173
|
-
for (let i = 0; i < dims; i++) {
|
|
174
|
-
vec[i] += sig[i] * weight;
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
// 2. Add word features (simhash-style with multiple positions)
|
|
180
|
-
for (let i = 0; i < words.length; i++) {
|
|
181
|
-
const word = words[i];
|
|
182
|
-
const isCommon = COMMON_WORDS.has(word);
|
|
183
|
-
const weight = isCommon ? 0.2 : (word.length > 6 ? 0.8 : 0.5);
|
|
184
|
-
|
|
185
|
-
// Multiple hash positions per word
|
|
186
|
-
for (let h = 0; h < 3; h++) {
|
|
187
|
-
const idx = hash(word, h * 17) % dims;
|
|
188
|
-
const sign = (hash(word, h * 31 + 1) % 2 === 0) ? 1 : -1;
|
|
189
|
-
vec[idx] += sign * weight;
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// 3. Add bigram features for local context
|
|
194
|
-
for (let i = 0; i < words.length - 1; i++) {
|
|
195
|
-
if (COMMON_WORDS.has(words[i]) && COMMON_WORDS.has(words[i + 1])) continue;
|
|
196
|
-
const bigram = words[i] + '_' + words[i + 1];
|
|
197
|
-
const idx = hash(bigram, 42) % dims;
|
|
198
|
-
const sign = (hash(bigram, 43) % 2 === 0) ? 1 : -1;
|
|
199
|
-
vec[idx] += sign * 0.4;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
// 4. Add trigram features for phrase detection
|
|
203
|
-
for (let i = 0; i < words.length - 2; i++) {
|
|
204
|
-
const trigram = words[i] + '_' + words[i + 1] + '_' + words[i + 2];
|
|
205
|
-
const idx = hash(trigram, 99) % dims;
|
|
206
|
-
const sign = (hash(trigram, 100) % 2 === 0) ? 1 : -1;
|
|
207
|
-
vec[idx] += sign * 0.3;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
// 5. L2 normalize
|
|
211
|
-
let norm = 0;
|
|
212
|
-
for (let i = 0; i < dims; i++) norm += vec[i] * vec[i];
|
|
213
|
-
norm = Math.sqrt(norm);
|
|
214
|
-
if (norm > 0) {
|
|
215
|
-
for (let i = 0; i < dims; i++) vec[i] /= norm;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
return vec;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
// ============================================================================
|
|
222
|
-
// Transformers.js Neural Embeddings (fallback)
|
|
223
|
-
// ============================================================================
|
|
224
|
-
|
|
225
|
-
async function loadTransformersModel() {
|
|
226
|
-
if (pipeline) return pipeline;
|
|
227
|
-
|
|
228
|
-
log('Attempting to load Transformers.js neural model...');
|
|
229
|
-
|
|
230
|
-
try {
|
|
231
|
-
const { env, pipeline: createPipeline } = await import(mofloResolveURL('@xenova/transformers'));
|
|
232
|
-
env.allowLocalModels = false;
|
|
233
|
-
env.backends.onnx.wasm.numThreads = 1;
|
|
234
|
-
|
|
235
|
-
pipeline = await createPipeline('feature-extraction', EMBEDDING_MODEL_NEURAL, {
|
|
236
|
-
quantized: false,
|
|
237
|
-
});
|
|
238
|
-
|
|
239
|
-
useTransformers = true;
|
|
240
|
-
embeddingModel = EMBEDDING_MODEL_NEURAL;
|
|
241
|
-
log('Transformers.js model loaded successfully');
|
|
242
|
-
return pipeline;
|
|
243
|
-
} catch (err) {
|
|
244
|
-
const errMsg = err.message?.split('\n')[0] || err.message;
|
|
245
|
-
log(`Transformers.js not available: ${errMsg}`);
|
|
246
|
-
log('Using domain-aware hash embeddings (fast, good quality)');
|
|
247
|
-
useTransformers = false;
|
|
248
|
-
embeddingModel = EMBEDDING_MODEL_HASH;
|
|
249
|
-
return null;
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
async function generateEmbeddingNeural(text) {
|
|
254
|
-
if (!pipeline) return null;
|
|
255
|
-
try {
|
|
256
|
-
const output = await pipeline(text, { pooling: 'mean', normalize: true });
|
|
257
|
-
return Array.from(output.data);
|
|
258
|
-
} catch {
|
|
259
|
-
return null;
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// ============================================================================
|
|
264
|
-
// Database Operations
|
|
265
|
-
// ============================================================================
|
|
266
|
-
|
|
267
|
-
async function getDb() {
|
|
268
|
-
if (!existsSync(DB_PATH)) {
|
|
269
|
-
throw new Error(`Database not found: ${DB_PATH}`);
|
|
270
|
-
}
|
|
271
|
-
const SQL = await initSqlJs();
|
|
272
|
-
const buffer = readFileSync(DB_PATH);
|
|
273
|
-
return new SQL.Database(buffer);
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
function saveDb(db) {
|
|
277
|
-
const data = db.export();
|
|
278
|
-
writeFileSync(DB_PATH, Buffer.from(data));
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
function getEntriesNeedingEmbeddings(db, namespace = null, forceAll = false) {
|
|
282
|
-
let sql = `SELECT id, key, namespace, content FROM memory_entries WHERE status = 'active'`;
|
|
283
|
-
const params = [];
|
|
284
|
-
|
|
285
|
-
if (!forceAll) {
|
|
286
|
-
// Include entries with no embedding OR entries with hash/fallback embeddings
|
|
287
|
-
// that should be upgraded to Xenova when available
|
|
288
|
-
sql += ` AND (embedding IS NULL OR embedding = '' OR embedding_model IN ('domain-aware-hash-v1', 'hash-fallback', 'local'))`;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
if (namespace) {
|
|
292
|
-
sql += ` AND namespace = ?`;
|
|
293
|
-
params.push(namespace);
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
sql += ` ORDER BY created_at DESC`;
|
|
297
|
-
|
|
298
|
-
const stmt = db.prepare(sql);
|
|
299
|
-
stmt.bind(params);
|
|
300
|
-
const results = [];
|
|
301
|
-
while (stmt.step()) results.push(stmt.getAsObject());
|
|
302
|
-
stmt.free();
|
|
303
|
-
return results;
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
function updateEmbedding(db, id, embedding, model) {
|
|
307
|
-
const stmt = db.prepare(
|
|
308
|
-
`UPDATE memory_entries SET embedding = ?, embedding_model = ?, embedding_dimensions = ?, updated_at = ? WHERE id = ?`
|
|
309
|
-
);
|
|
310
|
-
stmt.run([JSON.stringify(embedding), model, EMBEDDING_DIMS, Date.now(), id]);
|
|
311
|
-
stmt.free();
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
function getNamespaceStats(db) {
|
|
315
|
-
const stmt = db.prepare(`
|
|
316
|
-
SELECT
|
|
317
|
-
namespace,
|
|
318
|
-
COUNT(*) as total,
|
|
319
|
-
SUM(CASE WHEN embedding IS NOT NULL AND embedding != '' AND embedding_model != 'domain-aware-hash-v1' THEN 1 ELSE 0 END) as vectorized,
|
|
320
|
-
SUM(CASE WHEN embedding IS NULL OR embedding = '' THEN 1 ELSE 0 END) as missing,
|
|
321
|
-
SUM(CASE WHEN embedding_model = 'domain-aware-hash-v1' THEN 1 ELSE 0 END) as hash_only
|
|
322
|
-
FROM memory_entries
|
|
323
|
-
WHERE status = 'active'
|
|
324
|
-
GROUP BY namespace
|
|
325
|
-
ORDER BY namespace
|
|
326
|
-
`);
|
|
327
|
-
const results = [];
|
|
328
|
-
while (stmt.step()) results.push(stmt.getAsObject());
|
|
329
|
-
stmt.free();
|
|
330
|
-
return results;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
function getEmbeddingStats(db) {
|
|
334
|
-
const stmtTotal = db.prepare(`SELECT COUNT(*) as cnt FROM memory_entries WHERE status = 'active'`);
|
|
335
|
-
const total = stmtTotal.step() ? stmtTotal.getAsObject() : { cnt: 0 };
|
|
336
|
-
stmtTotal.free();
|
|
337
|
-
|
|
338
|
-
const stmtEmbed = db.prepare(`SELECT COUNT(*) as cnt FROM memory_entries WHERE status = 'active' AND embedding IS NOT NULL AND embedding != ''`);
|
|
339
|
-
const withEmbed = stmtEmbed.step() ? stmtEmbed.getAsObject() : { cnt: 0 };
|
|
340
|
-
stmtEmbed.free();
|
|
341
|
-
|
|
342
|
-
const stmtModel = db.prepare(`SELECT embedding_model, COUNT(*) as cnt FROM memory_entries WHERE status = 'active' AND embedding IS NOT NULL GROUP BY embedding_model`);
|
|
343
|
-
const byModel = [];
|
|
344
|
-
while (stmtModel.step()) byModel.push(stmtModel.getAsObject());
|
|
345
|
-
stmtModel.free();
|
|
346
|
-
|
|
347
|
-
return {
|
|
348
|
-
total: total?.cnt || 0,
|
|
349
|
-
withEmbeddings: withEmbed?.cnt || 0,
|
|
350
|
-
byModel
|
|
351
|
-
};
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
// ============================================================================
|
|
355
|
-
// Main
|
|
356
|
-
// ============================================================================
|
|
357
|
-
|
|
358
|
-
async function main() {
|
|
359
|
-
console.log('');
|
|
360
|
-
log('═══════════════════════════════════════════════════════════');
|
|
361
|
-
log(' Embedding Generation for Memory Entries');
|
|
362
|
-
log('═══════════════════════════════════════════════════════════');
|
|
363
|
-
console.log('');
|
|
364
|
-
|
|
365
|
-
const db = await getDb();
|
|
366
|
-
|
|
367
|
-
// Get entries needing embeddings
|
|
368
|
-
const entries = getEntriesNeedingEmbeddings(db, namespaceFilter, force);
|
|
369
|
-
|
|
370
|
-
if (entries.length === 0) {
|
|
371
|
-
log('All entries already have embeddings');
|
|
372
|
-
const stats = getEmbeddingStats(db);
|
|
373
|
-
log(`Total: ${stats.withEmbeddings}/${stats.total} entries embedded`);
|
|
374
|
-
|
|
375
|
-
// Update vector-stats cache even on early exit
|
|
376
|
-
try {
|
|
377
|
-
const nsStats = getNamespaceStats(db);
|
|
378
|
-
const dbSizeKB = Math.floor(readFileSync(DB_PATH).length / 1024);
|
|
379
|
-
const hnswExists = existsSync(resolve(projectRoot, '.swarm', 'hnsw.index'))
|
|
380
|
-
|| existsSync(resolve(projectRoot, '.claude-flow', 'hnsw.index'));
|
|
381
|
-
const cacheData = {
|
|
382
|
-
vectorCount: stats.withEmbeddings,
|
|
383
|
-
dbSizeKB,
|
|
384
|
-
namespaces: nsStats.length,
|
|
385
|
-
hasHnsw: hnswExists,
|
|
386
|
-
updatedAt: Date.now(),
|
|
387
|
-
};
|
|
388
|
-
for (const cacheDir of [resolve(projectRoot, '.claude-flow'), resolve(projectRoot, '.swarm')]) {
|
|
389
|
-
if (!existsSync(cacheDir)) mkdirSync(cacheDir, { recursive: true });
|
|
390
|
-
writeFileSync(resolve(cacheDir, 'vector-stats.json'), JSON.stringify(cacheData));
|
|
391
|
-
}
|
|
392
|
-
} catch { /* non-fatal */ }
|
|
393
|
-
|
|
394
|
-
db.close();
|
|
395
|
-
return;
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
log(`Found ${entries.length} entries to embed`);
|
|
399
|
-
|
|
400
|
-
// Try to load Transformers.js, fall back to hash embeddings
|
|
401
|
-
await loadTransformersModel();
|
|
402
|
-
|
|
403
|
-
log(`Using embedding model: ${embeddingModel}`);
|
|
404
|
-
console.log('');
|
|
405
|
-
|
|
406
|
-
let embedded = 0;
|
|
407
|
-
let failed = 0;
|
|
408
|
-
const startTime = Date.now();
|
|
409
|
-
|
|
410
|
-
// Process entries
|
|
411
|
-
for (let i = 0; i < entries.length; i++) {
|
|
412
|
-
const entry = entries[i];
|
|
413
|
-
|
|
414
|
-
try {
|
|
415
|
-
// Truncate content for embedding (first 1500 chars for context)
|
|
416
|
-
const text = entry.content.substring(0, 1500);
|
|
417
|
-
|
|
418
|
-
let embedding;
|
|
419
|
-
if (useTransformers && pipeline) {
|
|
420
|
-
embedding = await generateEmbeddingNeural(text);
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
// Fall back to hash embedding if neural failed or not available
|
|
424
|
-
if (!embedding || embedding.length !== EMBEDDING_DIMS) {
|
|
425
|
-
embedding = Array.from(semanticHashEmbed(text));
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
if (embedding && embedding.length === EMBEDDING_DIMS) {
|
|
429
|
-
updateEmbedding(db, entry.id, embedding, embeddingModel);
|
|
430
|
-
embedded++;
|
|
431
|
-
} else {
|
|
432
|
-
failed++;
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
// Progress update
|
|
436
|
-
if ((i + 1) % 50 === 0 || i === entries.length - 1) {
|
|
437
|
-
const pct = Math.round(((i + 1) / entries.length) * 100);
|
|
438
|
-
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
439
|
-
process.stdout.write(`\r[build-embeddings] Progress: ${i + 1}/${entries.length} (${pct}%) - ${elapsed}s elapsed`);
|
|
440
|
-
}
|
|
441
|
-
} catch (err) {
|
|
442
|
-
debug(`Failed to embed ${entry.key}: ${err.message}`);
|
|
443
|
-
failed++;
|
|
444
|
-
}
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
console.log(''); // New line after progress
|
|
448
|
-
|
|
449
|
-
const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
450
|
-
const stats = getEmbeddingStats(db);
|
|
451
|
-
|
|
452
|
-
// Write changes back to disk (sql.js operates in-memory)
|
|
453
|
-
if (embedded > 0) {
|
|
454
|
-
saveDb(db);
|
|
455
|
-
|
|
456
|
-
// Delete stale HNSW index so the CLI rebuilds from fresh vectors
|
|
457
|
-
const hnswPaths = [
|
|
458
|
-
resolve(projectRoot, '.swarm/hnsw.index'),
|
|
459
|
-
resolve(projectRoot, '.swarm/hnsw.metadata.json'),
|
|
460
|
-
];
|
|
461
|
-
for (const p of hnswPaths) {
|
|
462
|
-
if (existsSync(p)) {
|
|
463
|
-
const { unlinkSync } = await import('fs');
|
|
464
|
-
unlinkSync(p);
|
|
465
|
-
log(`Deleted stale HNSW index: ${p}`);
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
console.log('');
|
|
471
|
-
log('═══════════════════════════════════════════════════════════');
|
|
472
|
-
log(' Embedding Generation Complete');
|
|
473
|
-
log('═══════════════════════════════════════════════════════════');
|
|
474
|
-
log(` Embedded: ${embedded} entries`);
|
|
475
|
-
log(` Failed: ${failed} entries`);
|
|
476
|
-
log(` Time: ${totalTime}s`);
|
|
477
|
-
log(` Model: ${embeddingModel}`);
|
|
478
|
-
log(` Dimensions: ${EMBEDDING_DIMS}`);
|
|
479
|
-
log('');
|
|
480
|
-
log(` Total Coverage: ${stats.withEmbeddings}/${stats.total} entries`);
|
|
481
|
-
if (stats.byModel.length > 0) {
|
|
482
|
-
log(' By Model:');
|
|
483
|
-
for (const m of stats.byModel) {
|
|
484
|
-
log(` - ${m.embedding_model}: ${m.cnt}`);
|
|
485
|
-
}
|
|
486
|
-
}
|
|
487
|
-
log('');
|
|
488
|
-
|
|
489
|
-
// Per-namespace health report
|
|
490
|
-
const nsStats = getNamespaceStats(db);
|
|
491
|
-
if (nsStats.length > 0) {
|
|
492
|
-
log(' Namespace Health:');
|
|
493
|
-
log(' ┌─────────────────┬───────┬────────────┬─────────┬───────────┐');
|
|
494
|
-
log(' │ Namespace │ Total │ Vectorized │ Missing │ Hash-Only │');
|
|
495
|
-
log(' ├─────────────────┼───────┼────────────┼─────────┼───────────┤');
|
|
496
|
-
let hasWarnings = false;
|
|
497
|
-
for (const ns of nsStats) {
|
|
498
|
-
const name = String(ns.namespace).padEnd(15);
|
|
499
|
-
const total = String(ns.total).padStart(5);
|
|
500
|
-
const vectorized = String(ns.vectorized).padStart(10);
|
|
501
|
-
const missing = String(ns.missing).padStart(7);
|
|
502
|
-
const hashOnly = String(ns.hash_only).padStart(9);
|
|
503
|
-
const warn = (ns.missing > 0 || ns.hash_only > 0) ? ' ⚠' : ' ';
|
|
504
|
-
log(` │ ${name} │${total} │${vectorized} │${missing} │${hashOnly} │${warn}`);
|
|
505
|
-
if (ns.missing > 0 || ns.hash_only > 0) hasWarnings = true;
|
|
506
|
-
}
|
|
507
|
-
log(' └─────────────────┴───────┴────────────┴─────────┴───────────┘');
|
|
508
|
-
if (hasWarnings) {
|
|
509
|
-
log('');
|
|
510
|
-
log(' ⚠ Some namespaces have entries without Xenova embeddings.');
|
|
511
|
-
log(' Run with --force to re-embed all entries:');
|
|
512
|
-
log(' node node_modules/moflo/bin/build-embeddings.mjs --force');
|
|
513
|
-
if (!useTransformers) {
|
|
514
|
-
log('');
|
|
515
|
-
log(' ⚠ Xenova model not available — using hash fallback.');
|
|
516
|
-
log(' Install @xenova/transformers for neural embeddings:');
|
|
517
|
-
log(' npm install @xenova/transformers');
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
|
|
522
|
-
log('═══════════════════════════════════════════════════════════');
|
|
523
|
-
|
|
524
|
-
// Update vector-stats cache for statusline display
|
|
525
|
-
try {
|
|
526
|
-
const dbSizeKB = Math.floor(readFileSync(DB_PATH).length / 1024);
|
|
527
|
-
const hnswExists = existsSync(resolve(projectRoot, '.swarm', 'hnsw.index'))
|
|
528
|
-
|| existsSync(resolve(projectRoot, '.claude-flow', 'hnsw.index'));
|
|
529
|
-
const cacheData = {
|
|
530
|
-
vectorCount: stats.withEmbeddings,
|
|
531
|
-
dbSizeKB,
|
|
532
|
-
namespaces: nsStats.length,
|
|
533
|
-
hasHnsw: hnswExists,
|
|
534
|
-
updatedAt: Date.now(),
|
|
535
|
-
};
|
|
536
|
-
// Write to both locations so statusline finds it regardless of which dir it checks
|
|
537
|
-
for (const cacheDir of [resolve(projectRoot, '.claude-flow'), resolve(projectRoot, '.swarm')]) {
|
|
538
|
-
if (!existsSync(cacheDir)) mkdirSync(cacheDir, { recursive: true });
|
|
539
|
-
writeFileSync(resolve(cacheDir, 'vector-stats.json'), JSON.stringify(cacheData));
|
|
540
|
-
}
|
|
541
|
-
} catch { /* non-fatal */ }
|
|
542
|
-
|
|
543
|
-
db.close();
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
main().catch(err => {
|
|
547
|
-
log(`Error: ${err.message}`);
|
|
548
|
-
process.exit(1);
|
|
549
|
-
});
|