@claude-flow/cli 3.7.0-alpha.45 → 3.7.0-alpha.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/dist/src/mcp-tools/embeddings-tools.d.ts.map +1 -1
- package/dist/src/mcp-tools/embeddings-tools.js +129 -0
- package/dist/src/mcp-tools/embeddings-tools.js.map +1 -1
- package/dist/src/memory/diskann-registry.d.ts +56 -0
- package/dist/src/memory/diskann-registry.d.ts.map +1 -0
- package/dist/src/memory/diskann-registry.js +88 -0
- package/dist/src/memory/diskann-registry.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +3 -3
- package/dist/src/ruvector/flash-attention.d.ts +0 -195
- package/dist/src/ruvector/flash-attention.d.ts.map +0 -1
- package/dist/src/ruvector/flash-attention.js +0 -643
- package/dist/src/ruvector/flash-attention.js.map +0 -1
- package/dist/src/ruvector/moe-router.d.ts +0 -206
- package/dist/src/ruvector/moe-router.d.ts.map +0 -1
- package/dist/src/ruvector/moe-router.js +0 -626
- package/dist/src/ruvector/moe-router.js.map +0 -1
- package/dist/src/services/event-stream.d.ts +0 -25
- package/dist/src/services/event-stream.d.ts.map +0 -1
- package/dist/src/services/event-stream.js +0 -27
- package/dist/src/services/event-stream.js.map +0 -1
- package/dist/src/services/loop-worker-runner.d.ts +0 -16
- package/dist/src/services/loop-worker-runner.d.ts.map +0 -1
- package/dist/src/services/loop-worker-runner.js +0 -34
- package/dist/src/services/loop-worker-runner.js.map +0 -1
- package/dist/src/services/runtime-capabilities.d.ts +0 -22
- package/dist/src/services/runtime-capabilities.d.ts.map +0 -1
- package/dist/src/services/runtime-capabilities.js +0 -45
- package/dist/src/services/runtime-capabilities.js.map +0 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@claude-flow/cli",
|
|
3
|
-
"version": "3.7.0-alpha.
|
|
3
|
+
"version": "3.7.0-alpha.47",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Ruflo CLI - Enterprise AI agent orchestration with 60+ specialized agents, swarm coordination, MCP server, self-learning hooks, and vector memory for Claude Code",
|
|
6
6
|
"main": "dist/src/index.js",
|
|
@@ -104,14 +104,14 @@
|
|
|
104
104
|
"semver": "^7.6.0",
|
|
105
105
|
"yaml": "^2.8.0",
|
|
106
106
|
"@claude-flow/memory": "^3.0.0-alpha.17",
|
|
107
|
-
"@claude-flow/embeddings": "^3.0.0-alpha.
|
|
107
|
+
"@claude-flow/embeddings": "^3.0.0-alpha.26",
|
|
108
108
|
"@claude-flow/security": "^3.0.0-alpha.8",
|
|
109
109
|
"@claude-flow/swarm": "^3.0.0-alpha.8"
|
|
110
110
|
},
|
|
111
111
|
"optionalDependencies": {
|
|
112
112
|
"@claude-flow/aidefence": "^3.0.2",
|
|
113
113
|
"@claude-flow/codex": "^3.0.0-alpha.8",
|
|
114
|
-
"@claude-flow/embeddings": "^3.0.0-alpha.
|
|
114
|
+
"@claude-flow/embeddings": "^3.0.0-alpha.26",
|
|
115
115
|
"@claude-flow/guidance": "^3.0.0-alpha.1",
|
|
116
116
|
"@claude-flow/memory": "^3.0.0-alpha.17",
|
|
117
117
|
"@claude-flow/plugin-gastown-bridge": "^0.1.3",
|
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Flash Attention Implementation for RuVector Intelligence System
|
|
3
|
-
*
|
|
4
|
-
* Implements block-wise attention computation for faster similarity calculations.
|
|
5
|
-
* Achieves O(N) memory instead of O(N^2) through tiling strategy.
|
|
6
|
-
*
|
|
7
|
-
* Key optimizations:
|
|
8
|
-
* - Block-wise computation to fit in L1 cache
|
|
9
|
-
* - Fused softmax-matmul operations
|
|
10
|
-
* - Float32Array for all operations
|
|
11
|
-
* - Online softmax for numerical stability
|
|
12
|
-
*
|
|
13
|
-
* Target: 2-5x speedup on CPU vs naive attention
|
|
14
|
-
*
|
|
15
|
-
* Created with love by ruv.io
|
|
16
|
-
*/
|
|
17
|
-
export interface FlashAttentionConfig {
|
|
18
|
-
/** Block size for tiling (32-64 optimal for CPU L1 cache) */
|
|
19
|
-
blockSize: number;
|
|
20
|
-
/** Number of dimensions in embedding vectors */
|
|
21
|
-
dimensions: number;
|
|
22
|
-
/** Temperature for softmax scaling */
|
|
23
|
-
temperature: number;
|
|
24
|
-
/** Enable numerical stability optimizations */
|
|
25
|
-
useStableMode: boolean;
|
|
26
|
-
/** Use optimized CPU path (default: true) */
|
|
27
|
-
useCPUOptimizations: boolean;
|
|
28
|
-
}
|
|
29
|
-
export interface AttentionResult {
|
|
30
|
-
/** Output vectors after attention */
|
|
31
|
-
output: Float32Array[];
|
|
32
|
-
/** Attention weights (optional, for debugging) */
|
|
33
|
-
weights?: Float32Array[];
|
|
34
|
-
/** Computation time in milliseconds */
|
|
35
|
-
computeTimeMs: number;
|
|
36
|
-
}
|
|
37
|
-
export interface BenchmarkResult {
|
|
38
|
-
/** Naive attention time in milliseconds */
|
|
39
|
-
naiveTimeMs: number;
|
|
40
|
-
/** Flash attention time in milliseconds */
|
|
41
|
-
flashTimeMs: number;
|
|
42
|
-
/** Speedup factor (naive / flash) */
|
|
43
|
-
speedup: number;
|
|
44
|
-
/** Number of vectors benchmarked */
|
|
45
|
-
numVectors: number;
|
|
46
|
-
/** Dimensions of vectors */
|
|
47
|
-
dimensions: number;
|
|
48
|
-
/** Memory usage estimate for naive (bytes) */
|
|
49
|
-
naiveMemoryBytes: number;
|
|
50
|
-
/** Memory usage estimate for flash (bytes) */
|
|
51
|
-
flashMemoryBytes: number;
|
|
52
|
-
/** Memory reduction factor */
|
|
53
|
-
memoryReduction: number;
|
|
54
|
-
}
|
|
55
|
-
export declare class FlashAttention {
|
|
56
|
-
private config;
|
|
57
|
-
private lastSpeedup;
|
|
58
|
-
private benchmarkHistory;
|
|
59
|
-
private scoreBuffer;
|
|
60
|
-
private expBuffer;
|
|
61
|
-
private accumBuffer;
|
|
62
|
-
constructor(config?: Partial<FlashAttentionConfig>);
|
|
63
|
-
/**
|
|
64
|
-
* Main attention computation using Flash Attention algorithm
|
|
65
|
-
*
|
|
66
|
-
* @param queries - Query vectors [N x D]
|
|
67
|
-
* @param keys - Key vectors [M x D]
|
|
68
|
-
* @param values - Value vectors [M x D]
|
|
69
|
-
* @returns Attention output [N x D]
|
|
70
|
-
*/
|
|
71
|
-
attention(queries: Float32Array[], keys: Float32Array[], values: Float32Array[]): AttentionResult;
|
|
72
|
-
/**
|
|
73
|
-
* CPU-optimized attention with aggressive optimizations
|
|
74
|
-
*
|
|
75
|
-
* Key optimizations:
|
|
76
|
-
* - Blocked score computation (better cache utilization)
|
|
77
|
-
* - Top-K sparse attention (only use most relevant keys)
|
|
78
|
-
* - Pre-allocated buffers to avoid GC pressure
|
|
79
|
-
* - 8x loop unrolling for dot products
|
|
80
|
-
* - Fused max-finding during score computation
|
|
81
|
-
*/
|
|
82
|
-
private cpuOptimizedAttention;
|
|
83
|
-
/**
|
|
84
|
-
* Partial dot product using only first N dimensions (for screening)
|
|
85
|
-
*/
|
|
86
|
-
private partialDotProduct;
|
|
87
|
-
/**
|
|
88
|
-
* Partial sort to get top-K elements (QuickSelect-like)
|
|
89
|
-
* Only ensures first K elements are the largest, not sorted
|
|
90
|
-
*/
|
|
91
|
-
private partialSort;
|
|
92
|
-
/**
|
|
93
|
-
* Swap two indices in array
|
|
94
|
-
*/
|
|
95
|
-
private swapIndices;
|
|
96
|
-
/**
|
|
97
|
-
* Fast dot product with 8x unrolling
|
|
98
|
-
*/
|
|
99
|
-
private fastDotProduct;
|
|
100
|
-
/**
|
|
101
|
-
* Block-wise attention computation (Flash Attention core algorithm)
|
|
102
|
-
*
|
|
103
|
-
* Algorithm:
|
|
104
|
-
* For each block of queries Q_b:
|
|
105
|
-
* For each block of keys K_b:
|
|
106
|
-
* S_b = Q_b @ K_b.T / sqrt(d) // Block scores
|
|
107
|
-
* P_b = softmax(S_b) // Block attention
|
|
108
|
-
* O_b += P_b @ V_b // Accumulate output
|
|
109
|
-
*
|
|
110
|
-
* @param Q - Query vectors
|
|
111
|
-
* @param K - Key vectors
|
|
112
|
-
* @param V - Value vectors
|
|
113
|
-
* @param blockSize - Block size for tiling
|
|
114
|
-
*/
|
|
115
|
-
blockAttention(Q: Float32Array[], K: Float32Array[], V: Float32Array[], blockSize: number): Float32Array[];
|
|
116
|
-
/**
|
|
117
|
-
* Get the speedup factor from the last benchmark
|
|
118
|
-
*/
|
|
119
|
-
getSpeedup(): number;
|
|
120
|
-
/**
|
|
121
|
-
* Run benchmark comparing naive vs CPU-optimized attention
|
|
122
|
-
*
|
|
123
|
-
* @param numVectors - Number of vectors to test
|
|
124
|
-
* @param dimensions - Dimensions per vector
|
|
125
|
-
* @param iterations - Number of iterations for averaging
|
|
126
|
-
*/
|
|
127
|
-
benchmark(numVectors?: number, dimensions?: number, iterations?: number): BenchmarkResult;
|
|
128
|
-
/**
|
|
129
|
-
* Get benchmark history
|
|
130
|
-
*/
|
|
131
|
-
getBenchmarkHistory(): BenchmarkResult[];
|
|
132
|
-
/**
|
|
133
|
-
* Get configuration
|
|
134
|
-
*/
|
|
135
|
-
getConfig(): FlashAttentionConfig;
|
|
136
|
-
/**
|
|
137
|
-
* Update configuration
|
|
138
|
-
*/
|
|
139
|
-
setConfig(config: Partial<FlashAttentionConfig>): void;
|
|
140
|
-
/**
|
|
141
|
-
* Naive O(N^2) attention implementation for comparison
|
|
142
|
-
*/
|
|
143
|
-
private naiveAttention;
|
|
144
|
-
/**
|
|
145
|
-
* Compute block of attention scores
|
|
146
|
-
*/
|
|
147
|
-
private computeBlockScores;
|
|
148
|
-
/**
|
|
149
|
-
* Online softmax with output accumulation (key to Flash Attention)
|
|
150
|
-
*
|
|
151
|
-
* Uses the online softmax trick to maintain numerical stability
|
|
152
|
-
* while processing blocks incrementally.
|
|
153
|
-
*/
|
|
154
|
-
private onlineSoftmaxAccumulate;
|
|
155
|
-
/**
|
|
156
|
-
* Compute dot product of two vectors
|
|
157
|
-
*/
|
|
158
|
-
private dotProduct;
|
|
159
|
-
/**
|
|
160
|
-
* Stable softmax implementation
|
|
161
|
-
*/
|
|
162
|
-
private softmax;
|
|
163
|
-
/**
|
|
164
|
-
* Generate random vectors for benchmarking
|
|
165
|
-
*/
|
|
166
|
-
private generateRandomVectors;
|
|
167
|
-
/**
|
|
168
|
-
* Validate input arrays
|
|
169
|
-
*/
|
|
170
|
-
private validateInputs;
|
|
171
|
-
}
|
|
172
|
-
/**
|
|
173
|
-
* Get singleton FlashAttention instance
|
|
174
|
-
*
|
|
175
|
-
* @param config - Optional configuration (only used on first call)
|
|
176
|
-
* @returns FlashAttention instance
|
|
177
|
-
*/
|
|
178
|
-
export declare function getFlashAttention(config?: Partial<FlashAttentionConfig>): FlashAttention;
|
|
179
|
-
/**
|
|
180
|
-
* Reset singleton (for testing)
|
|
181
|
-
*/
|
|
182
|
-
export declare function resetFlashAttention(): void;
|
|
183
|
-
/**
|
|
184
|
-
* Compute attention using Flash Attention
|
|
185
|
-
*/
|
|
186
|
-
export declare function computeAttention(queries: Float32Array[], keys: Float32Array[], values: Float32Array[], config?: Partial<FlashAttentionConfig>): AttentionResult;
|
|
187
|
-
/**
|
|
188
|
-
* Run Flash Attention benchmark
|
|
189
|
-
*/
|
|
190
|
-
export declare function benchmarkFlashAttention(numVectors?: number, dimensions?: number, iterations?: number): BenchmarkResult;
|
|
191
|
-
/**
|
|
192
|
-
* Get current speedup from last benchmark
|
|
193
|
-
*/
|
|
194
|
-
export declare function getFlashAttentionSpeedup(): number;
|
|
195
|
-
//# sourceMappingURL=flash-attention.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"flash-attention.d.ts","sourceRoot":"","sources":["../../../src/ruvector/flash-attention.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAMH,MAAM,WAAW,oBAAoB;IACnC,6DAA6D;IAC7D,SAAS,EAAE,MAAM,CAAC;IAClB,gDAAgD;IAChD,UAAU,EAAE,MAAM,CAAC;IACnB,sCAAsC;IACtC,WAAW,EAAE,MAAM,CAAC;IACpB,+CAA+C;IAC/C,aAAa,EAAE,OAAO,CAAC;IACvB,6CAA6C;IAC7C,mBAAmB,EAAE,OAAO,CAAC;CAC9B;AAED,MAAM,WAAW,eAAe;IAC9B,qCAAqC;IACrC,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB,kDAAkD;IAClD,OAAO,CAAC,EAAE,YAAY,EAAE,CAAC;IACzB,uCAAuC;IACvC,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,WAAW,EAAE,MAAM,CAAC;IACpB,2CAA2C;IAC3C,WAAW,EAAE,MAAM,CAAC;IACpB,qCAAqC;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,4BAA4B;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,gBAAgB,EAAE,MAAM,CAAC;IACzB,8CAA8C;IAC9C,gBAAgB,EAAE,MAAM,CAAC;IACzB,8BAA8B;IAC9B,eAAe,EAAE,MAAM,CAAC;CACzB;AAMD,qBAAa,cAAc;IACzB,OAAO,CAAC,MAAM,CAAuB;IACrC,OAAO,CAAC,WAAW,CAAa;IAChC,OAAO,CAAC,gBAAgB,CAAyB;IAGjD,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,SAAS,CAA6B;IAC9C,OAAO,CAAC,WAAW,CAA6B;gBAEpC,MAAM,GAAE,OAAO,CAAC,oBAAoB,CAAM;IActD;;;;;;;OAOG;IACH,SAAS,CACP,OAAO,EAAE,YAAY,EAAE,EACvB,IAAI,EAAE,YAAY,EAAE,EACpB,MAAM,EAAE,YAAY,EAAE,GACrB,eAAe;IA2BlB;;;;;;;;;OASG;IACH,OAAO,CAAC,qBAAqB;IAyI7B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAYzB;;;OAGG;IACH,OAAO,CAAC,WAAW;IAqCnB;;OAEG;IACH,OAAO,CAAC,WAAW;IAMnB;;OAEG;IACH,OAAO,CAAC,cAAc;IAwBtB;;;;;;;;;;;;;;OAcG;IACH,cAAc,CACZ,CAAC,EAAE,YAAY,EAAE,EACjB,CAAC,EAAE,YAAY,EAAE,EACjB,CAAC,EAAE,YAAY,EAAE,EACjB,SAAS,EAAE,MAAM,GAChB,YAAY,EAAE;IA0DjB;;OAEG;IACH,UAAU,IAAI,MAAM;IAIpB;;;;;;OAMG;IACH,SAAS,CACP,UAAU,GAAE,MAAY,EACxB,UAAU,GAAE,MAAY,EACxB,UAAU,GAAE,MAAU,GACrB,eAAe;IAsDlB;;OAEG;IACH,mBAAmB,IAAI,eAAe,EAAE;IAIxC;;OAEG;IACH,SAAS,IAAI,oBAAoB;IAIjC;;OAEG;IACH,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,oBAAoB,CAAC,GAAG,IAAI;IAQtD;;OAEG;IACH,OAAO,CAAC,cAAc;IAwCtB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAwB1B;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA2D/B;;OAEG;IACH,OAAO,CAAC,UAAU;IAqBlB;;OAEG;IACH,OAAO,CAAC,OAAO;IA4Bf;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAyB7B;;OAEG;IACH,OAAO,CAAC,cAAc;CA+BvB;AAQD;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC,GAAG,cAAc,CAKxF;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,IAAI,CAE1C;AAMD;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,YAAY,EAAE,EACvB,IAAI,EAAE,YAAY,EAAE,EACpB,MAAM,EAAE,YAAY,EAAE,EACtB,MAAM,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC,GACrC,eAAe,CAGjB;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CACrC,UAAU,CAAC,EAAE,MAAM,EACnB,UAAU,CAAC,EAAE,MAAM,EACnB,UAAU,CAAC,EAAE,MAAM,GAClB,eAAe,CAEjB;AAED;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,MAAM,CAEjD"}
|