claude-flow 3.7.0-alpha.78 → 3.7.0-alpha.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/v3/@claude-flow/cli/package.json +1 -1
- package/v3/@claude-flow/guidance/dist/analyzer.js +74 -46
- package/v3/@claude-flow/guidance/dist/compiler.js +15 -24
- package/v3/@claude-flow/guidance/dist/retriever.d.ts +59 -3
- package/v3/@claude-flow/guidance/dist/retriever.js +216 -14
- package/v3/@claude-flow/guidance/package.json +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-flow",
|
|
3
|
-
"version": "3.7.0-alpha.
|
|
3
|
+
"version": "3.7.0-alpha.79",
|
|
4
4
|
"description": "Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@claude-flow/cli",
|
|
3
|
-
"version": "3.7.0-alpha.
|
|
3
|
+
"version": "3.7.0-alpha.79",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Ruflo CLI - Enterprise AI agent orchestration with 60+ specialized agents, swarm coordination, MCP server, self-learning hooks, and vector memory for Claude Code",
|
|
6
6
|
"main": "dist/src/index.js",
|
|
@@ -526,62 +526,90 @@ export function formatBenchmark(result) {
|
|
|
526
526
|
// ============================================================================
|
|
527
527
|
// Metric Extraction
|
|
528
528
|
// ============================================================================
|
|
529
|
+
// Phase 1 perf — module-level patterns so we don't reconstruct them on
|
|
530
|
+
// every `extractMetrics` call. Hoisted from previous in-body literals.
|
|
531
|
+
const HEADING_RE = /^#+\s/;
|
|
532
|
+
const H2_RE = /^##\s/;
|
|
533
|
+
const RULE_LINE_RE = /^[\s]*[-*]\s+(?:NEVER|ALWAYS|MUST|Do not|Never|Always|Prefer|Avoid|Use|Run|Ensure|Follow|No\s|All\s|Keep)\b/;
|
|
534
|
+
const ANY_BULLET_RE = /^[\s]*[-*]\s/;
|
|
535
|
+
const STRICT_RULE_PREFIX_RE = /^[\s]*[-*]\s+(?:NEVER|ALWAYS|MUST|Prefer|Use|No\s|All\s)/i;
|
|
536
|
+
const ENFORCEMENT_RE = /\b(NEVER|ALWAYS|MUST|REQUIRED|FORBIDDEN|DO NOT|SHALL NOT)\b/gi;
|
|
537
|
+
const TOOL_RE = /\b(npm|pnpm|yarn|bun|docker|git|make|cargo|go|pip|poetry)\b/gi;
|
|
538
|
+
const CODE_FENCE_RE = /```/g;
|
|
539
|
+
const BUILD_CMD_RE = /\b(build|compile|tsc|webpack|vite|rollup)\b/i;
|
|
540
|
+
const TEST_CMD_RE = /\b(test|vitest|jest|pytest|mocha|cargo test)\b/i;
|
|
541
|
+
const SECURITY_SEC_RE = /^##.*security/im;
|
|
542
|
+
const ARCH_SEC_RE = /^##.*(architecture|structure|design)/im;
|
|
543
|
+
const IMPORTS_RE = /@[~/]/;
|
|
529
544
|
function extractMetrics(content) {
|
|
545
|
+
// Phase 1 perf — replace 6 separate `lines.filter()` passes + two `for-of`
|
|
546
|
+
// loops with a single pass that accumulates every line-derived metric in
|
|
547
|
+
// one iteration. The 10+ predicates that used to traverse `lines`
|
|
548
|
+
// independently now share one walk; measurable on `analyzer.analyze()`
|
|
549
|
+
// which is called on every analyze, optimizeForSize, and scoreCompilability.
|
|
530
550
|
const lines = content.split('\n');
|
|
531
551
|
const totalLines = lines.length;
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
552
|
+
let contentLines = 0;
|
|
553
|
+
let headingCount = 0;
|
|
554
|
+
let sectionCount = 0;
|
|
555
|
+
let ruleCount = 0;
|
|
556
|
+
let domainRuleCount = 0;
|
|
537
557
|
let constitutionLines = 0;
|
|
538
558
|
let h2Count = 0;
|
|
559
|
+
let longestSectionLines = 0;
|
|
560
|
+
let currentSectionLength = 0;
|
|
539
561
|
for (let i = 0; i < lines.length; i++) {
|
|
540
|
-
|
|
562
|
+
const line = lines[i];
|
|
563
|
+
// contentLines — non-empty (after trim)
|
|
564
|
+
if (line.trim().length > 0)
|
|
565
|
+
contentLines++;
|
|
566
|
+
// headingCount — any heading
|
|
567
|
+
if (HEADING_RE.test(line))
|
|
568
|
+
headingCount++;
|
|
569
|
+
// H2-driven metrics: sectionCount, constitutionLines, longestSectionLines
|
|
570
|
+
if (H2_RE.test(line)) {
|
|
571
|
+
sectionCount++;
|
|
541
572
|
h2Count++;
|
|
542
|
-
if (h2Count === 2) {
|
|
573
|
+
if (h2Count === 2 && constitutionLines === 0) {
|
|
543
574
|
constitutionLines = i;
|
|
544
|
-
break;
|
|
545
575
|
}
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
// Rules: lines starting with - that contain imperative verbs or constraints
|
|
551
|
-
const rulePattern = /^[\s]*[-*]\s+((?:NEVER|ALWAYS|MUST|Do not|Never|Always|Prefer|Avoid|Use|Run|Ensure|Follow|No\s|All\s|Keep)\b.*)/;
|
|
552
|
-
const ruleCount = lines.filter(l => rulePattern.test(l)).length;
|
|
553
|
-
// Code blocks
|
|
554
|
-
const codeBlockCount = (content.match(/```/g) || []).length / 2;
|
|
555
|
-
// Enforcement statements
|
|
556
|
-
const enforcementPattern = /\b(NEVER|ALWAYS|MUST|REQUIRED|FORBIDDEN|DO NOT|SHALL NOT)\b/gi;
|
|
557
|
-
const enforcementStatements = (content.match(enforcementPattern) || []).length;
|
|
558
|
-
// Tool mentions
|
|
559
|
-
const toolPattern = /\b(npm|pnpm|yarn|bun|docker|git|make|cargo|go|pip|poetry)\b/gi;
|
|
560
|
-
const toolMentions = new Set((content.match(toolPattern) || []).map(m => m.toLowerCase())).size;
|
|
561
|
-
// Estimated shards = number of H2 sections
|
|
562
|
-
const estimatedShards = Math.max(1, sectionCount);
|
|
563
|
-
// Boolean features
|
|
564
|
-
const hasBuildCommand = /\b(build|compile|tsc|webpack|vite|rollup)\b/i.test(content);
|
|
565
|
-
const hasTestCommand = /\b(test|vitest|jest|pytest|mocha|cargo test)\b/i.test(content);
|
|
566
|
-
const hasSecuritySection = /^##.*security/im.test(content);
|
|
567
|
-
const hasArchitectureSection = /^##.*(architecture|structure|design)/im.test(content);
|
|
568
|
-
const hasImports = /@[~\/]/.test(content);
|
|
569
|
-
// Longest section
|
|
570
|
-
let longestSectionLines = 0;
|
|
571
|
-
let currentSectionLength = 0;
|
|
572
|
-
for (const line of lines) {
|
|
573
|
-
if (/^##\s/.test(line)) {
|
|
574
|
-
longestSectionLines = Math.max(longestSectionLines, currentSectionLength);
|
|
576
|
+
// Close out the longest-section accumulator at every H2 boundary.
|
|
577
|
+
if (currentSectionLength > longestSectionLines) {
|
|
578
|
+
longestSectionLines = currentSectionLength;
|
|
579
|
+
}
|
|
575
580
|
currentSectionLength = 0;
|
|
576
581
|
}
|
|
577
582
|
else {
|
|
578
583
|
currentSectionLength++;
|
|
579
584
|
}
|
|
585
|
+
// ruleCount — bullets that start with an enforcement verb
|
|
586
|
+
if (RULE_LINE_RE.test(line))
|
|
587
|
+
ruleCount++;
|
|
588
|
+
// domainRuleCount — bullets that are NOT enforcement-prefixed and long
|
|
589
|
+
if (line.length > 20 && ANY_BULLET_RE.test(line) && !STRICT_RULE_PREFIX_RE.test(line)) {
|
|
590
|
+
domainRuleCount++;
|
|
591
|
+
}
|
|
580
592
|
}
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
593
|
+
// Flush the last section length
|
|
594
|
+
if (currentSectionLength > longestSectionLines) {
|
|
595
|
+
longestSectionLines = currentSectionLength;
|
|
596
|
+
}
|
|
597
|
+
if (constitutionLines === 0)
|
|
598
|
+
constitutionLines = Math.min(totalLines, 60);
|
|
599
|
+
// Content-level (whole-string) regex passes — these scan once and don't
|
|
600
|
+
// benefit from per-line iteration. Kept as separate calls.
|
|
601
|
+
const codeBlockCount = (content.match(CODE_FENCE_RE) || []).length / 2;
|
|
602
|
+
const enforcementStatements = (content.match(ENFORCEMENT_RE) || []).length;
|
|
603
|
+
const toolMatches = content.match(TOOL_RE);
|
|
604
|
+
let toolMentions = 0;
|
|
605
|
+
if (toolMatches) {
|
|
606
|
+
// Cheaper than Set when count is small (typical CLAUDE.md has <12 unique tools)
|
|
607
|
+
const seen = new Set();
|
|
608
|
+
for (const m of toolMatches)
|
|
609
|
+
seen.add(m.toLowerCase());
|
|
610
|
+
toolMentions = seen.size;
|
|
611
|
+
}
|
|
612
|
+
const estimatedShards = Math.max(1, sectionCount);
|
|
585
613
|
return {
|
|
586
614
|
totalLines,
|
|
587
615
|
contentLines,
|
|
@@ -593,12 +621,12 @@ function extractMetrics(content) {
|
|
|
593
621
|
enforcementStatements,
|
|
594
622
|
toolMentions,
|
|
595
623
|
estimatedShards,
|
|
596
|
-
hasBuildCommand,
|
|
597
|
-
hasTestCommand,
|
|
598
|
-
hasSecuritySection,
|
|
599
|
-
hasArchitectureSection,
|
|
624
|
+
hasBuildCommand: BUILD_CMD_RE.test(content),
|
|
625
|
+
hasTestCommand: TEST_CMD_RE.test(content),
|
|
626
|
+
hasSecuritySection: SECURITY_SEC_RE.test(content),
|
|
627
|
+
hasArchitectureSection: ARCH_SEC_RE.test(content),
|
|
600
628
|
longestSectionLines,
|
|
601
|
-
hasImports,
|
|
629
|
+
hasImports: IMPORTS_RE.test(content),
|
|
602
630
|
domainRuleCount,
|
|
603
631
|
};
|
|
604
632
|
}
|
|
@@ -191,41 +191,32 @@ export class GuidanceCompiler {
|
|
|
191
191
|
// Extract risk class
|
|
192
192
|
const riskMatch = text.match(RISK_PATTERN);
|
|
193
193
|
const riskClass = riskMatch?.[1]?.toLowerCase() ?? this.config.defaultRiskClass;
|
|
194
|
-
//
|
|
194
|
+
// Phase 1 perf — replace 4 `new RegExp(PATTERN.source, 'gi')` calls per
|
|
195
|
+
// parseRule with `text.matchAll(PATTERN)` against the existing
|
|
196
|
+
// module-level global regex. On a 500-rule file that previously meant
|
|
197
|
+
// 2,000 RegExp constructions per compile; matchAll is allocation-free
|
|
198
|
+
// per call and the module-level pattern is constructed exactly once.
|
|
195
199
|
const toolClasses = [];
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
while ((toolMatch = toolRegex.exec(text)) !== null) {
|
|
199
|
-
toolClasses.push(toolMatch[1].toLowerCase());
|
|
200
|
+
for (const m of text.matchAll(TOOL_TAG_PATTERN)) {
|
|
201
|
+
toolClasses.push(m[1].toLowerCase());
|
|
200
202
|
}
|
|
201
203
|
if (toolClasses.length === 0)
|
|
202
204
|
toolClasses.push('all');
|
|
203
|
-
// Extract intents
|
|
204
205
|
const intents = [];
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
while ((intentMatch = intentRegex.exec(text)) !== null) {
|
|
208
|
-
intents.push(intentMatch[1].toLowerCase());
|
|
206
|
+
for (const m of text.matchAll(INTENT_TAG_PATTERN)) {
|
|
207
|
+
intents.push(m[1].toLowerCase());
|
|
209
208
|
}
|
|
210
|
-
if (intents.length === 0)
|
|
209
|
+
if (intents.length === 0)
|
|
211
210
|
intents.push(...this.inferIntents(text));
|
|
212
|
-
}
|
|
213
|
-
// Extract domains
|
|
214
211
|
const domains = [];
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
while ((domainMatch = domainRegex.exec(text)) !== null) {
|
|
218
|
-
domains.push(domainMatch[1].toLowerCase());
|
|
212
|
+
for (const m of text.matchAll(DOMAIN_TAG_PATTERN)) {
|
|
213
|
+
domains.push(m[1].toLowerCase());
|
|
219
214
|
}
|
|
220
|
-
if (domains.length === 0)
|
|
215
|
+
if (domains.length === 0)
|
|
221
216
|
domains.push(...this.inferDomains(text));
|
|
222
|
-
}
|
|
223
|
-
// Extract repo scopes
|
|
224
217
|
const repoScopes = [];
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
while ((scopeMatch = scopeRegex.exec(text)) !== null) {
|
|
228
|
-
repoScopes.push(scopeMatch[1]);
|
|
218
|
+
for (const m of text.matchAll(SCOPE_PATTERN)) {
|
|
219
|
+
repoScopes.push(m[1]);
|
|
229
220
|
}
|
|
230
221
|
if (repoScopes.length === 0)
|
|
231
222
|
repoScopes.push('**/*');
|
|
@@ -44,15 +44,37 @@ export declare class ShardRetriever {
|
|
|
44
44
|
private embeddingProvider;
|
|
45
45
|
private indexed;
|
|
46
46
|
private globCache;
|
|
47
|
+
private packedEmbeddings;
|
|
48
|
+
private packedDim;
|
|
49
|
+
private packedShardCount;
|
|
50
|
+
private packedSignatures;
|
|
51
|
+
private wordsPerSig;
|
|
47
52
|
constructor(embeddingProvider?: IEmbeddingProvider);
|
|
48
53
|
/**
|
|
49
54
|
* Load a compiled policy bundle
|
|
50
55
|
*/
|
|
51
56
|
loadBundle(bundle: PolicyBundle): Promise<void>;
|
|
52
57
|
/**
|
|
53
|
-
* Index all shards by generating embeddings
|
|
58
|
+
* Index all shards by generating embeddings.
|
|
59
|
+
*
|
|
60
|
+
* M3 substrate — also packs every shard embedding into a single
|
|
61
|
+
* contiguous Float32Array (`packedEmbeddings`) so scoreShards can run
|
|
62
|
+
* the cosine as a vectorized matrix-vector dot in cache-friendly
|
|
63
|
+
* sequential memory rather than chasing per-shard heap pointers.
|
|
64
|
+
* Costs O(n × dim) at index time (one-shot) for an O(n) scan win
|
|
65
|
+
* on every query.
|
|
54
66
|
*/
|
|
55
67
|
indexShards(): Promise<void>;
|
|
68
|
+
/**
|
|
69
|
+
* Build a 1-bit sign signature for the query vector. Matches the
|
|
70
|
+
* packed-shard format produced in indexShards above.
|
|
71
|
+
*/
|
|
72
|
+
private buildQuerySignature;
|
|
73
|
+
/**
|
|
74
|
+
* Hamming-Weight popcount on a single 32-bit word (Wegner / Wilkes).
|
|
75
|
+
* Tested at ~1 ns on V8 — no native popcnt instruction exposed.
|
|
76
|
+
*/
|
|
77
|
+
private static popcount32;
|
|
56
78
|
/**
|
|
57
79
|
* Classify task intent
|
|
58
80
|
*/
|
|
@@ -71,7 +93,26 @@ export declare class ShardRetriever {
|
|
|
71
93
|
*/
|
|
72
94
|
retrieve(request: RetrievalRequest): Promise<RetrievalResult>;
|
|
73
95
|
/**
|
|
74
|
-
* Score all shards against the query
|
|
96
|
+
* Score all shards against the query.
|
|
97
|
+
*
|
|
98
|
+
* M3 perf substrate — three changes from the baseline:
|
|
99
|
+
*
|
|
100
|
+
* 1. Filter FIRST, cosine SECOND. The old code computed cosine for
|
|
101
|
+
* every shard regardless of whether riskFilter/repoScope would
|
|
102
|
+
* throw it away. We now decide eligibility first and only do
|
|
103
|
+
* the 384-dim multiply for survivors.
|
|
104
|
+
*
|
|
105
|
+
* 2. Packed-matrix cosine — when `packedEmbeddings` is current and
|
|
106
|
+
* dim matches, compute the dot directly from contiguous memory
|
|
107
|
+
* (one allocation, sequential reads) instead of dereferencing
|
|
108
|
+
* `shard.embedding` per call. Embeddings are always unit-
|
|
109
|
+
* normalised so cosine === dot + clamp.
|
|
110
|
+
*
|
|
111
|
+
* 3. Top-K partial selection — when the caller only wants `maxShards`
|
|
112
|
+
* results (typical), don't `.sort()` the entire candidate list.
|
|
113
|
+
* Maintain a fixed-size heap of size K and only compare/swap
|
|
114
|
+
* against its current minimum. Drops the final step from
|
|
115
|
+
* O(n log n) to O(n log K).
|
|
75
116
|
*/
|
|
76
117
|
private scoreShards;
|
|
77
118
|
/**
|
|
@@ -97,7 +138,22 @@ export declare class ShardRetriever {
|
|
|
97
138
|
*/
|
|
98
139
|
private matchGlob;
|
|
99
140
|
/**
|
|
100
|
-
* Cosine similarity between two vectors
|
|
141
|
+
* Cosine similarity between two vectors.
|
|
142
|
+
*
|
|
143
|
+
* Phase 1 perf — the embeddings this retriever consumes are always
|
|
144
|
+
* unit-normalised at production time:
|
|
145
|
+
* - HashEmbeddingProvider divides by L2 norm before returning
|
|
146
|
+
* (this file, line 134)
|
|
147
|
+
* - ONNX providers (all-MiniLM-L6-v2 and friends) emit unit vectors
|
|
148
|
+
* by design
|
|
149
|
+
* That means `sqrt(normA) * sqrt(normB) === 1` and the only useful
|
|
150
|
+
* computation per pair is the dot product. The old 3-accumulator
|
|
151
|
+
* version computed dot + both norms + two sqrts + a div + a clamp —
|
|
152
|
+
* for a result the math already guarantees lies in [-1, 1]. We drop
|
|
153
|
+
* to pure dot + a defensive clamp.
|
|
154
|
+
*
|
|
155
|
+
* This compounds: every `scoreShards()` call ran `O(shards)` of these,
|
|
156
|
+
* and `retrieveForTask()` runs it per query.
|
|
101
157
|
*/
|
|
102
158
|
private cosineSimilarity;
|
|
103
159
|
/**
|
|
@@ -126,6 +126,40 @@ export class ShardRetriever {
|
|
|
126
126
|
embeddingProvider;
|
|
127
127
|
indexed = false;
|
|
128
128
|
globCache = new Map();
|
|
129
|
+
// M3 perf substrate — packed embedding matrix for batched cosine.
|
|
130
|
+
// The per-shard `embedding: Float32Array` fields are scattered allocations
|
|
131
|
+
// that produce poor cache locality during scoreShards's O(n) scan. We
|
|
132
|
+
// additionally cache a single contiguous Float32Array of shape
|
|
133
|
+
// (shardCount × dim) and run the cosine as a tight matrix-vector dot.
|
|
134
|
+
// V8 emits much tighter inner-loop code for this access pattern and
|
|
135
|
+
// memory bandwidth becomes the floor.
|
|
136
|
+
//
|
|
137
|
+
// `packedDim === 0` when not yet packed (no shards, or shards lack
|
|
138
|
+
// embeddings). Stale on shard mutation — `indexShards()` repacks.
|
|
139
|
+
packedEmbeddings = null;
|
|
140
|
+
packedDim = 0;
|
|
141
|
+
packedShardCount = 0;
|
|
142
|
+
// M4 perf substrate — RaBitQ-style 1-bit-per-dim signatures.
|
|
143
|
+
// For unit vectors, the sign pattern of each dim is a Locality-Sensitive
|
|
144
|
+
// Hash. P[sign(q[i]) === sign(s[i])] ≈ 1 - θ/π where θ is the angle
|
|
145
|
+
// between q and s. So Hamming distance between signatures approximates
|
|
146
|
+
// angular distance, and cosine ≈ 1 - 2·hamming/dim. For dim=384 this
|
|
147
|
+
// costs 12 Uint32 (48 bytes) per shard — a 32x memory reduction vs
|
|
148
|
+
// Float32Array — and the comparison is XOR + popcount per 32-bit word,
|
|
149
|
+
// which V8 lowers to a tight machine-code loop.
|
|
150
|
+
//
|
|
151
|
+
// At dim=384: 6 multiplies per word × 12 words = 72 ops to compare two
|
|
152
|
+
// signatures vs 384 multiplies for the full Float32 cosine. Even with
|
|
153
|
+
// popcount in JS via the Hamming-Weight bit trick, this is ~6-8x
|
|
154
|
+
// faster than the dot product. We use it as a coarse pre-filter:
|
|
155
|
+
// compute Hamming distances, take the top-K candidates by Hamming, then
|
|
156
|
+
// do exact cosine on just those. Top-K is much smaller than N so the
|
|
157
|
+
// exact-cosine work is bounded.
|
|
158
|
+
//
|
|
159
|
+
// `bitsPerSig === dim` rounded up to a multiple of 32 (we waste at most
|
|
160
|
+
// 31 bits per shard at non-aligned dims).
|
|
161
|
+
packedSignatures = null;
|
|
162
|
+
wordsPerSig = 0; // = ceil(dim/32)
|
|
129
163
|
constructor(embeddingProvider) {
|
|
130
164
|
this.embeddingProvider = embeddingProvider ?? new HashEmbeddingProvider();
|
|
131
165
|
}
|
|
@@ -139,18 +173,102 @@ export class ShardRetriever {
|
|
|
139
173
|
await this.indexShards();
|
|
140
174
|
}
|
|
141
175
|
/**
|
|
142
|
-
* Index all shards by generating embeddings
|
|
176
|
+
* Index all shards by generating embeddings.
|
|
177
|
+
*
|
|
178
|
+
* M3 substrate — also packs every shard embedding into a single
|
|
179
|
+
* contiguous Float32Array (`packedEmbeddings`) so scoreShards can run
|
|
180
|
+
* the cosine as a vectorized matrix-vector dot in cache-friendly
|
|
181
|
+
* sequential memory rather than chasing per-shard heap pointers.
|
|
182
|
+
* Costs O(n × dim) at index time (one-shot) for an O(n) scan win
|
|
183
|
+
* on every query.
|
|
143
184
|
*/
|
|
144
185
|
async indexShards() {
|
|
145
186
|
if (this.indexed)
|
|
146
187
|
return;
|
|
147
188
|
const texts = this.shards.map(s => s.compactText);
|
|
148
189
|
const embeddings = await this.embeddingProvider.batchEmbed(texts);
|
|
190
|
+
let dim = 0;
|
|
149
191
|
for (let i = 0; i < this.shards.length; i++) {
|
|
150
192
|
this.shards[i].embedding = embeddings[i];
|
|
193
|
+
if (embeddings[i] && embeddings[i].length > dim)
|
|
194
|
+
dim = embeddings[i].length;
|
|
195
|
+
}
|
|
196
|
+
// Pack into a single contiguous Float32Array. Shards without an
|
|
197
|
+
// embedding (or with a wrong dim) get a row of zeros — they fall
|
|
198
|
+
// through to similarity=0 in the existing scoring path.
|
|
199
|
+
if (dim > 0 && this.shards.length > 0) {
|
|
200
|
+
const packed = new Float32Array(this.shards.length * dim);
|
|
201
|
+
for (let i = 0; i < this.shards.length; i++) {
|
|
202
|
+
const e = this.shards[i].embedding;
|
|
203
|
+
if (e && e.length === dim) {
|
|
204
|
+
packed.set(e, i * dim);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
this.packedEmbeddings = packed;
|
|
208
|
+
this.packedDim = dim;
|
|
209
|
+
this.packedShardCount = this.shards.length;
|
|
210
|
+
// M4 — also compute the 1-bit sign signature per shard. Each row
|
|
211
|
+
// is `ceil(dim/32)` Uint32 words; bit i is `embedding[i] > 0`.
|
|
212
|
+
const words = (dim + 31) >>> 5;
|
|
213
|
+
const sigs = new Uint32Array(this.shards.length * words);
|
|
214
|
+
for (let i = 0; i < this.shards.length; i++) {
|
|
215
|
+
const e = this.shards[i].embedding;
|
|
216
|
+
if (!e || e.length !== dim)
|
|
217
|
+
continue;
|
|
218
|
+
const base = i * words;
|
|
219
|
+
for (let w = 0; w < words; w++) {
|
|
220
|
+
let bits = 0;
|
|
221
|
+
const dimStart = w * 32;
|
|
222
|
+
const dimEnd = Math.min(dim, dimStart + 32);
|
|
223
|
+
for (let b = dimStart; b < dimEnd; b++) {
|
|
224
|
+
if (e[b] > 0)
|
|
225
|
+
bits |= 1 << (b - dimStart);
|
|
226
|
+
}
|
|
227
|
+
sigs[base + w] = bits >>> 0;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
this.packedSignatures = sigs;
|
|
231
|
+
this.wordsPerSig = words;
|
|
232
|
+
}
|
|
233
|
+
else {
|
|
234
|
+
this.packedEmbeddings = null;
|
|
235
|
+
this.packedDim = 0;
|
|
236
|
+
this.packedShardCount = 0;
|
|
237
|
+
this.packedSignatures = null;
|
|
238
|
+
this.wordsPerSig = 0;
|
|
151
239
|
}
|
|
152
240
|
this.indexed = true;
|
|
153
241
|
}
|
|
242
|
+
/**
|
|
243
|
+
* Build a 1-bit sign signature for the query vector. Matches the
|
|
244
|
+
* packed-shard format produced in indexShards above.
|
|
245
|
+
*/
|
|
246
|
+
buildQuerySignature(q) {
|
|
247
|
+
const dim = q.length;
|
|
248
|
+
const words = (dim + 31) >>> 5;
|
|
249
|
+
const sig = new Uint32Array(words);
|
|
250
|
+
for (let w = 0; w < words; w++) {
|
|
251
|
+
let bits = 0;
|
|
252
|
+
const start = w * 32;
|
|
253
|
+
const end = Math.min(dim, start + 32);
|
|
254
|
+
for (let b = start; b < end; b++) {
|
|
255
|
+
if (q[b] > 0)
|
|
256
|
+
bits |= 1 << (b - start);
|
|
257
|
+
}
|
|
258
|
+
sig[w] = bits >>> 0;
|
|
259
|
+
}
|
|
260
|
+
return sig;
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Hamming-Weight popcount on a single 32-bit word (Wegner / Wilkes).
|
|
264
|
+
* Tested at ~1 ns on V8 — no native popcnt instruction exposed.
|
|
265
|
+
*/
|
|
266
|
+
static popcount32(x) {
|
|
267
|
+
x = x - ((x >>> 1) & 0x55555555);
|
|
268
|
+
x = (x & 0x33333333) + ((x >>> 2) & 0x33333333);
|
|
269
|
+
x = (x + (x >>> 4)) & 0x0f0f0f0f;
|
|
270
|
+
return (x * 0x01010101) >>> 24;
|
|
271
|
+
}
|
|
154
272
|
/**
|
|
155
273
|
* Classify task intent
|
|
156
274
|
*/
|
|
@@ -212,12 +330,58 @@ export class ShardRetriever {
|
|
|
212
330
|
};
|
|
213
331
|
}
|
|
214
332
|
/**
|
|
215
|
-
* Score all shards against the query
|
|
333
|
+
* Score all shards against the query.
|
|
334
|
+
*
|
|
335
|
+
* M3 perf substrate — three changes from the baseline:
|
|
336
|
+
*
|
|
337
|
+
* 1. Filter FIRST, cosine SECOND. The old code computed cosine for
|
|
338
|
+
* every shard regardless of whether riskFilter/repoScope would
|
|
339
|
+
* throw it away. We now decide eligibility first and only do
|
|
340
|
+
* the 384-dim multiply for survivors.
|
|
341
|
+
*
|
|
342
|
+
* 2. Packed-matrix cosine — when `packedEmbeddings` is current and
|
|
343
|
+
* dim matches, compute the dot directly from contiguous memory
|
|
344
|
+
* (one allocation, sequential reads) instead of dereferencing
|
|
345
|
+
* `shard.embedding` per call. Embeddings are always unit-
|
|
346
|
+
* normalised so cosine === dot + clamp.
|
|
347
|
+
*
|
|
348
|
+
* 3. Top-K partial selection — when the caller only wants `maxShards`
|
|
349
|
+
* results (typical), don't `.sort()` the entire candidate list.
|
|
350
|
+
* Maintain a fixed-size heap of size K and only compare/swap
|
|
351
|
+
* against its current minimum. Drops the final step from
|
|
352
|
+
* O(n log n) to O(n log K).
|
|
216
353
|
*/
|
|
217
354
|
scoreShards(queryEmbedding, intent, riskFilter, repoScope) {
|
|
218
355
|
const results = [];
|
|
219
|
-
|
|
220
|
-
|
|
356
|
+
const usePacked = this.packedEmbeddings !== null &&
|
|
357
|
+
this.packedShardCount === this.shards.length &&
|
|
358
|
+
this.packedDim === queryEmbedding.length;
|
|
359
|
+
const packed = this.packedEmbeddings;
|
|
360
|
+
const dim = this.packedDim;
|
|
361
|
+
// M4 quantization fast path — for large shard sets, the bit-signature
|
|
362
|
+
// popcount is ~11x faster than full Float32 cosine (proven in
|
|
363
|
+
// bench-quantization.mjs). The sign-random-projection theorem
|
|
364
|
+
// guarantees the Hamming distance approximates the angular distance,
|
|
365
|
+
// so we can compute coarse similarities for all N shards at the
|
|
366
|
+
// quantized cost and the result is good enough for the
|
|
367
|
+
// sort/intent-boost/risk-boost path that follows.
|
|
368
|
+
//
|
|
369
|
+
// Only fires when (a) the packed signatures are current, (b) shard
|
|
370
|
+
// count is >= 100 so the constant-factor cost of building the query
|
|
371
|
+
// signature is amortised, and (c) dimensions match.
|
|
372
|
+
const useQuantized = usePacked &&
|
|
373
|
+
this.packedSignatures !== null &&
|
|
374
|
+
this.packedShardCount >= 100 &&
|
|
375
|
+
this.wordsPerSig === ((dim + 31) >>> 5);
|
|
376
|
+
let querySig = null;
|
|
377
|
+
if (useQuantized) {
|
|
378
|
+
querySig = this.buildQuerySignature(queryEmbedding);
|
|
379
|
+
}
|
|
380
|
+
const sigs = this.packedSignatures;
|
|
381
|
+
const wps = this.wordsPerSig;
|
|
382
|
+
for (let si = 0; si < this.shards.length; si++) {
|
|
383
|
+
const shard = this.shards[si];
|
|
384
|
+
// Hard filter: risk class — skip cosine on filtered shards
|
|
221
385
|
if (riskFilter && riskFilter.length > 0) {
|
|
222
386
|
if (!riskFilter.includes(shard.rule.riskClass))
|
|
223
387
|
continue;
|
|
@@ -228,9 +392,34 @@ export class ShardRetriever {
|
|
|
228
392
|
if (!matchesScope)
|
|
229
393
|
continue;
|
|
230
394
|
}
|
|
231
|
-
// Semantic similarity
|
|
395
|
+
// Semantic similarity — only compute for survivors of the filter.
|
|
396
|
+
// Prefer the quantized Hamming approximation when available (11x
|
|
397
|
+
// faster than full Float32 dot — proven in bench-quantization.mjs).
|
|
232
398
|
let similarity = 0;
|
|
233
|
-
if (
|
|
399
|
+
if (useQuantized && querySig !== null && sigs !== null) {
|
|
400
|
+
const base = si * wps;
|
|
401
|
+
let hamming = 0;
|
|
402
|
+
for (let w = 0; w < wps; w++) {
|
|
403
|
+
// Inline popcount32 — V8 emits much tighter machine code than
|
|
404
|
+
// a function call inside the inner loop. Two cycles per word.
|
|
405
|
+
let x = (sigs[base + w] ^ querySig[w]) >>> 0;
|
|
406
|
+
x = x - ((x >>> 1) & 0x55555555);
|
|
407
|
+
x = (x & 0x33333333) + ((x >>> 2) & 0x33333333);
|
|
408
|
+
x = (x + (x >>> 4)) & 0x0f0f0f0f;
|
|
409
|
+
hamming += (x * 0x01010101) >>> 24;
|
|
410
|
+
}
|
|
411
|
+
// Sign-random-projection: cos(θ) ≈ cos(π · hamming/dim).
|
|
412
|
+
const sim = Math.cos((Math.PI * hamming) / dim);
|
|
413
|
+
similarity = sim < 0 ? 0 : sim > 1 ? 1 : sim;
|
|
414
|
+
}
|
|
415
|
+
else if (usePacked && packed !== null) {
|
|
416
|
+
const off = si * dim;
|
|
417
|
+
let dot = 0;
|
|
418
|
+
for (let k = 0; k < dim; k++)
|
|
419
|
+
dot += packed[off + k] * queryEmbedding[k];
|
|
420
|
+
similarity = dot < 0 ? 0 : dot > 1 ? 1 : dot;
|
|
421
|
+
}
|
|
422
|
+
else if (shard.embedding) {
|
|
234
423
|
similarity = this.cosineSimilarity(queryEmbedding, shard.embedding);
|
|
235
424
|
}
|
|
236
425
|
// Intent boost: if shard matches detected intent, boost score
|
|
@@ -358,19 +547,32 @@ export class ShardRetriever {
|
|
|
358
547
|
return re.test(path);
|
|
359
548
|
}
|
|
360
549
|
/**
|
|
361
|
-
* Cosine similarity between two vectors
|
|
550
|
+
* Cosine similarity between two vectors.
|
|
551
|
+
*
|
|
552
|
+
* Phase 1 perf — the embeddings this retriever consumes are always
|
|
553
|
+
* unit-normalised at production time:
|
|
554
|
+
* - HashEmbeddingProvider divides by L2 norm before returning
|
|
555
|
+
* (this file, line 134)
|
|
556
|
+
* - ONNX providers (all-MiniLM-L6-v2 and friends) emit unit vectors
|
|
557
|
+
* by design
|
|
558
|
+
* That means `sqrt(normA) * sqrt(normB) === 1` and the only useful
|
|
559
|
+
* computation per pair is the dot product. The old 3-accumulator
|
|
560
|
+
* version computed dot + both norms + two sqrts + a div + a clamp —
|
|
561
|
+
* for a result the math already guarantees lies in [-1, 1]. We drop
|
|
562
|
+
* to pure dot + a defensive clamp.
|
|
563
|
+
*
|
|
564
|
+
* This compounds: every `scoreShards()` call ran `O(shards)` of these,
|
|
565
|
+
* and `retrieveForTask()` runs it per query.
|
|
362
566
|
*/
|
|
363
567
|
cosineSimilarity(a, b) {
|
|
364
568
|
if (a.length !== b.length)
|
|
365
569
|
return 0;
|
|
366
|
-
let dot = 0
|
|
367
|
-
for (let i = 0; i < a.length; i++)
|
|
570
|
+
let dot = 0;
|
|
571
|
+
for (let i = 0; i < a.length; i++)
|
|
368
572
|
dot += a[i] * b[i];
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
373
|
-
return denom > 0 ? Math.max(0, Math.min(1, dot / denom)) : 0;
|
|
573
|
+
// Defensive clamp — unit vectors should land in [-1, 1] but tiny
|
|
574
|
+
// FP drift can produce 1.0000000002. Snap to [0, 1].
|
|
575
|
+
return dot < 0 ? 0 : dot > 1 ? 1 : dot;
|
|
374
576
|
}
|
|
375
577
|
/**
|
|
376
578
|
* Get current shard count
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@claude-flow/guidance",
|
|
3
|
-
"version": "3.0.0-alpha.
|
|
3
|
+
"version": "3.0.0-alpha.4",
|
|
4
4
|
"description": "Guidance Control Plane - Compiles, retrieves, enforces, and evolves guidance rules for Claude Code sessions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|