clawpowers 2.2.5 → 2.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +186 -160
- package/COMPATIBILITY.md +48 -13
- package/KNOWN_LIMITATIONS.md +20 -19
- package/LICENSE +44 -44
- package/LICENSING.md +10 -10
- package/README.md +486 -462
- package/SECURITY.md +52 -52
- package/dist/index.d.ts +17 -5
- package/dist/index.js +187 -92
- package/dist/index.js.map +1 -1
- package/native/Cargo.lock +4927 -4927
- package/native/Cargo.toml +73 -73
- package/native/crates/canonical/Cargo.toml +24 -24
- package/native/crates/canonical/src/lib.rs +677 -673
- package/native/crates/compression/Cargo.toml +20 -20
- package/native/crates/compression/benches/compression_bench.rs +42 -42
- package/native/crates/compression/src/lib.rs +393 -393
- package/native/crates/evm-eth/Cargo.toml +13 -13
- package/native/crates/evm-eth/src/lib.rs +105 -105
- package/native/crates/fee/Cargo.toml +15 -15
- package/native/crates/fee/src/lib.rs +281 -281
- package/native/crates/index/Cargo.toml +16 -16
- package/native/crates/index/src/lib.rs +277 -277
- package/native/crates/policy/Cargo.toml +17 -17
- package/native/crates/policy/src/lib.rs +614 -614
- package/native/crates/security/Cargo.toml +22 -22
- package/native/crates/security/src/lib.rs +478 -478
- package/native/crates/tokens/Cargo.toml +13 -13
- package/native/crates/tokens/src/lib.rs +534 -534
- package/native/crates/verification/Cargo.toml +23 -23
- package/native/crates/verification/src/lib.rs +333 -333
- package/native/crates/wallet/Cargo.toml +20 -20
- package/native/crates/wallet/src/lib.rs +261 -261
- package/native/crates/x402/Cargo.toml +30 -30
- package/native/crates/x402/src/lib.rs +423 -423
- package/native/ffi/Cargo.toml +34 -34
- package/native/ffi/build.rs +4 -4
- package/native/ffi/src/lib.rs +352 -352
- package/native/ffi/tests/integration.rs +354 -354
- package/native/pyo3/Cargo.toml +26 -26
- package/native/pyo3/pyproject.toml +16 -16
- package/native/pyo3/src/lib.rs +407 -407
- package/native/pyo3/tests/test_smoke.py +180 -180
- package/native/wasm/Cargo.toml +47 -44
- package/native/wasm/pkg/.gitignore +6 -6
- package/native/wasm/pkg/clawpowers_wasm.d.ts +208 -208
- package/native/wasm/pkg/clawpowers_wasm.js +872 -872
- package/native/wasm/pkg/clawpowers_wasm_bg.wasm.d.ts +40 -40
- package/native/wasm/pkg/package.json +16 -16
- package/native/wasm/pkg-node/clawpowers_wasm.d.ts +143 -143
- package/native/wasm/pkg-node/clawpowers_wasm.js +798 -798
- package/native/wasm/pkg-node/clawpowers_wasm_bg.wasm.d.ts +40 -40
- package/native/wasm/pkg-node/package.json +12 -12
- package/native/wasm/src/lib.rs +433 -433
- package/package.json +13 -8
- package/scripts/build-wasm.mjs +59 -0
- package/scripts/generate_hermes_wrappers.py +211 -0
- package/scripts/hermes_wrapper_overrides.json +184 -0
- package/scripts/run-python-script.mjs +48 -0
- package/scripts/verify-consumer-install.mjs +109 -0
- package/scripts/verify-wasm-artifacts.mjs +26 -3
- package/scripts/verify_hermes_wrappers.py +154 -0
- package/skill.json +20 -0
- package/skills/1password/SKILL.md +34 -0
- package/skills/README.md +44 -0
- package/skills/agent-nexus-2/SKILL.md +34 -0
- package/skills/apple-notes/SKILL.md +34 -0
- package/skills/apple-reminders/SKILL.md +34 -0
- package/skills/autoresearch/SKILL.md +43 -0
- package/skills/bear-notes/SKILL.md +34 -0
- package/skills/blogwatcher/SKILL.md +34 -0
- package/skills/blucli/SKILL.md +34 -0
- package/skills/bluebubbles/SKILL.md +34 -0
- package/skills/business-strategy/SKILL.md +41 -0
- package/skills/camsnap/SKILL.md +34 -0
- package/skills/canvas/SKILL.md +34 -0
- package/skills/clawhub/SKILL.md +34 -0
- package/skills/coding-agent/SKILL.md +34 -0
- package/skills/coding-discipline.skill/SKILL.md +34 -0
- package/skills/content-writer/SKILL.md +41 -0
- package/skills/discord/SKILL.md +34 -0
- package/skills/eightctl/SKILL.md +34 -0
- package/skills/execution-validation.skill/SKILL.md +34 -0
- package/skills/gemini/SKILL.md +34 -0
- package/skills/gh-issues/SKILL.md +34 -0
- package/skills/gifgrep/SKILL.md +34 -0
- package/skills/github/SKILL.md +41 -0
- package/skills/gog/SKILL.md +34 -0
- package/skills/goplaces/SKILL.md +34 -0
- package/skills/healthcheck/SKILL.md +34 -0
- package/skills/himalaya/SKILL.md +34 -0
- package/skills/humanize/SKILL.md +41 -0
- package/skills/imsg/SKILL.md +34 -0
- package/skills/itp/SKILL.md +112 -0
- package/skills/mcporter/SKILL.md +34 -0
- package/skills/model-usage/SKILL.md +34 -0
- package/skills/nano-pdf/SKILL.md +34 -0
- package/skills/node-connect/SKILL.md +34 -0
- package/skills/notion/SKILL.md +34 -0
- package/skills/obsidian/SKILL.md +34 -0
- package/skills/openai-whisper/SKILL.md +34 -0
- package/skills/openai-whisper-api/SKILL.md +34 -0
- package/skills/openhue/SKILL.md +34 -0
- package/skills/oracle/SKILL.md +34 -0
- package/skills/ordercli/SKILL.md +34 -0
- package/skills/peekaboo/SKILL.md +34 -0
- package/skills/polyclaw/SKILL.md +34 -0
- package/skills/prospector/SKILL.md +41 -0
- package/skills/rsi.skill/SKILL.md +34 -0
- package/skills/sag/SKILL.md +34 -0
- package/skills/security/SKILL.md +41 -0
- package/skills/session-logs/SKILL.md +34 -0
- package/skills/sherpa-onnx-tts/SKILL.md +34 -0
- package/skills/skill-creator/SKILL.md +34 -0
- package/skills/slack/SKILL.md +34 -0
- package/skills/songsee/SKILL.md +34 -0
- package/skills/sonoscli/SKILL.md +34 -0
- package/skills/spotify-player/SKILL.md +34 -0
- package/skills/strykr-prism/SKILL.md +41 -0
- package/skills/summarize/SKILL.md +34 -0
- package/skills/taskbridge/SKILL.md +34 -0
- package/skills/things-mac/SKILL.md +34 -0
- package/skills/tmux/SKILL.md +34 -0
- package/skills/trello/SKILL.md +34 -0
- package/skills/validator-agent/SKILL.md +41 -0
- package/skills/video-frames/SKILL.md +34 -0
- package/skills/voice-call/SKILL.md +34 -0
- package/skills/wacli/SKILL.md +34 -0
- package/skills/weather/SKILL.md +34 -0
- package/skills/webmcp-payments/SKILL.md +41 -0
- package/skills/xurl/SKILL.md +34 -0
- package/src/skills/catalog.ts +435 -435
- package/src/skills/executor.ts +56 -56
- package/src/skills/index.ts +3 -3
- package/src/skills/itp/SKILL.md +112 -112
- package/src/skills/loader.ts +262 -193
- package/native/ffi/index.node +0 -0
- package/native/wasm/pkg-node/.gitignore +0 -6
|
@@ -1,277 +1,277 @@
|
|
|
1
|
-
//! Vector index adapter for TurboMemory.
|
|
2
|
-
//!
|
|
3
|
-
//! Exposes a [`VectorIndex`] trait and an [`InMemoryIndex`] implementation
|
|
4
|
-
//! that stores compressed vectors via [`TurboCompressor`] and retrieves
|
|
5
|
-
//! nearest neighbours using brute-force cosine similarity.
|
|
6
|
-
|
|
7
|
-
use clawpowers_compression::{CompressedVector, CompressionConfig, TurboCompressor};
|
|
8
|
-
use thiserror::Error;
|
|
9
|
-
use uuid::Uuid;
|
|
10
|
-
|
|
11
|
-
/// Errors produced by the index.
|
|
12
|
-
#[derive(Debug, Error)]
|
|
13
|
-
pub enum IndexError {
|
|
14
|
-
/// The query vector has a different dimensionality than the index.
|
|
15
|
-
#[error("dimension mismatch: expected {expected}, got {got}")]
|
|
16
|
-
DimensionMismatch {
|
|
17
|
-
/// Expected dimensionality.
|
|
18
|
-
expected: usize,
|
|
19
|
-
/// Provided dimensionality.
|
|
20
|
-
got: usize,
|
|
21
|
-
},
|
|
22
|
-
/// The requested number of results is zero.
|
|
23
|
-
#[error("top_k must be > 0")]
|
|
24
|
-
ZeroTopK,
|
|
25
|
-
/// Underlying compression error.
|
|
26
|
-
#[error("compression error: {0}")]
|
|
27
|
-
Compression(#[from] clawpowers_compression::CompressionError),
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/// A shorthand result type for [`IndexError`].
|
|
31
|
-
pub type Result<T> = std::result::Result<T, IndexError>;
|
|
32
|
-
|
|
33
|
-
/// A single ranked search result.
|
|
34
|
-
#[derive(Debug, Clone)]
|
|
35
|
-
pub struct SearchResult {
|
|
36
|
-
/// Identifier of the matching vector.
|
|
37
|
-
pub id: Uuid,
|
|
38
|
-
/// Similarity score (cosine similarity, higher is more similar).
|
|
39
|
-
pub score: f32,
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/// Trait for vector stores used by TurboMemory.
|
|
43
|
-
pub trait VectorIndex {
|
|
44
|
-
/// Insert a vector under `id`.
|
|
45
|
-
fn insert(&mut self, id: Uuid, vector: Vec<f32>) -> Result<()>;
|
|
46
|
-
/// Return the `top_k` most similar vectors to `query`, ranked descending by score.
|
|
47
|
-
fn search(&self, query: &[f32], top_k: usize) -> Result<Vec<SearchResult>>;
|
|
48
|
-
/// Remove the vector with `id`. Returns `true` if it was present.
|
|
49
|
-
fn remove(&mut self, id: &Uuid) -> Result<bool>;
|
|
50
|
-
/// Number of vectors currently in the index.
|
|
51
|
-
fn len(&self) -> usize;
|
|
52
|
-
/// Return `true` if the index contains no vectors.
|
|
53
|
-
fn is_empty(&self) -> bool {
|
|
54
|
-
self.len() == 0
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
struct Entry {
|
|
59
|
-
id: Uuid,
|
|
60
|
-
/// Stored for potential future fast-path approximate distance calculations.
|
|
61
|
-
#[allow(dead_code)]
|
|
62
|
-
compressed: CompressedVector,
|
|
63
|
-
/// Original vector used for exact cosine similarity.
|
|
64
|
-
original: Vec<f32>,
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
/// In-memory brute-force vector index backed by TurboQuant compression.
|
|
68
|
-
///
|
|
69
|
-
/// Suitable for up to ~100 K vectors.
|
|
70
|
-
pub struct InMemoryIndex {
|
|
71
|
-
compressor: TurboCompressor,
|
|
72
|
-
entries: Vec<Entry>,
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
impl InMemoryIndex {
|
|
76
|
-
/// Create a new index with the given compression configuration.
|
|
77
|
-
pub fn new(config: CompressionConfig) -> Self {
|
|
78
|
-
let compressor = TurboCompressor::new(config);
|
|
79
|
-
Self {
|
|
80
|
-
compressor,
|
|
81
|
-
entries: Vec::new(),
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
/// Create an index with sensible defaults for `dimensions`.
|
|
86
|
-
pub fn with_dimensions(dimensions: usize) -> Self {
|
|
87
|
-
Self::new(CompressionConfig {
|
|
88
|
-
dimensions,
|
|
89
|
-
quantization_bits: 8,
|
|
90
|
-
rotation_seed: 0xDEAD_BEEF_CAFE_1234,
|
|
91
|
-
})
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
impl VectorIndex for InMemoryIndex {
|
|
96
|
-
fn insert(&mut self, id: Uuid, vector: Vec<f32>) -> Result<()> {
|
|
97
|
-
let compressed = self.compressor.compress(&vector)?;
|
|
98
|
-
self.entries.push(Entry {
|
|
99
|
-
id,
|
|
100
|
-
compressed,
|
|
101
|
-
original: vector,
|
|
102
|
-
});
|
|
103
|
-
Ok(())
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
fn search(&self, query: &[f32], top_k: usize) -> Result<Vec<SearchResult>> {
|
|
107
|
-
if top_k == 0 {
|
|
108
|
-
return Err(IndexError::ZeroTopK);
|
|
109
|
-
}
|
|
110
|
-
let dim = self.compressor.config.dimensions;
|
|
111
|
-
if query.len() != dim {
|
|
112
|
-
return Err(IndexError::DimensionMismatch {
|
|
113
|
-
expected: dim,
|
|
114
|
-
got: query.len(),
|
|
115
|
-
});
|
|
116
|
-
}
|
|
117
|
-
let mut scored: Vec<(f32, &Entry)> = self
|
|
118
|
-
.entries
|
|
119
|
-
.iter()
|
|
120
|
-
.map(|e| (cosine_similarity(query, &e.original), e))
|
|
121
|
-
.collect();
|
|
122
|
-
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
|
123
|
-
Ok(scored
|
|
124
|
-
.into_iter()
|
|
125
|
-
.take(top_k)
|
|
126
|
-
.map(|(score, e)| SearchResult { id: e.id, score })
|
|
127
|
-
.collect())
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
fn remove(&mut self, id: &Uuid) -> Result<bool> {
|
|
131
|
-
let before = self.entries.len();
|
|
132
|
-
self.entries.retain(|e| &e.id != id);
|
|
133
|
-
Ok(self.entries.len() < before)
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
fn len(&self) -> usize {
|
|
137
|
-
self.entries.len()
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
|
142
|
-
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
|
143
|
-
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
144
|
-
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
145
|
-
if na < f32::EPSILON || nb < f32::EPSILON {
|
|
146
|
-
0.0
|
|
147
|
-
} else {
|
|
148
|
-
dot / (na * nb)
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
#[cfg(test)]
|
|
153
|
-
mod tests {
|
|
154
|
-
use super::*;
|
|
155
|
-
|
|
156
|
-
const DIM: usize = 32;
|
|
157
|
-
|
|
158
|
-
fn make_index() -> InMemoryIndex {
|
|
159
|
-
InMemoryIndex::with_dimensions(DIM)
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
fn unit_vec(hot: usize) -> Vec<f32> {
|
|
163
|
-
let mut v = vec![0.0_f32; DIM];
|
|
164
|
-
v[hot % DIM] = 1.0;
|
|
165
|
-
v
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
fn rand_vec(seed: u64) -> Vec<f32> {
|
|
169
|
-
let mut x = seed;
|
|
170
|
-
(0..DIM)
|
|
171
|
-
.map(|_| {
|
|
172
|
-
x = x
|
|
173
|
-
.wrapping_mul(6_364_136_223_846_793_005)
|
|
174
|
-
.wrapping_add(1_442_695_040_888_963_407);
|
|
175
|
-
((x >> 33) as f32 / u32::MAX as f32) * 2.0 - 1.0
|
|
176
|
-
})
|
|
177
|
-
.collect()
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
#[test]
|
|
181
|
-
fn test_insert_increases_len() {
|
|
182
|
-
let mut idx = make_index();
|
|
183
|
-
assert_eq!(idx.len(), 0);
|
|
184
|
-
idx.insert(Uuid::new_v4(), rand_vec(1)).expect("insert");
|
|
185
|
-
assert_eq!(idx.len(), 1);
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
#[test]
|
|
189
|
-
fn test_empty_index_search_returns_empty() {
|
|
190
|
-
let idx = make_index();
|
|
191
|
-
assert!(idx.search(&rand_vec(2), 5).expect("search").is_empty());
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
#[test]
|
|
195
|
-
fn test_is_empty() {
|
|
196
|
-
let mut idx = make_index();
|
|
197
|
-
assert!(idx.is_empty());
|
|
198
|
-
idx.insert(Uuid::new_v4(), rand_vec(3)).expect("insert");
|
|
199
|
-
assert!(!idx.is_empty());
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
#[test]
|
|
203
|
-
fn test_search_returns_nearest_neighbour() {
|
|
204
|
-
let mut idx = make_index();
|
|
205
|
-
let query = unit_vec(0);
|
|
206
|
-
let id_match = Uuid::new_v4();
|
|
207
|
-
let id_other = Uuid::new_v4();
|
|
208
|
-
idx.insert(id_match, unit_vec(0)).expect("insert match");
|
|
209
|
-
idx.insert(id_other, unit_vec(1)).expect("insert other");
|
|
210
|
-
let results = idx.search(&query, 1).expect("search");
|
|
211
|
-
assert_eq!(results.len(), 1);
|
|
212
|
-
assert_eq!(results[0].id, id_match);
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
#[test]
|
|
216
|
-
fn test_search_results_ordered_by_score_descending() {
|
|
217
|
-
let mut idx = make_index();
|
|
218
|
-
let query = rand_vec(42);
|
|
219
|
-
for i in 0..5 {
|
|
220
|
-
idx.insert(Uuid::new_v4(), rand_vec(i + 100))
|
|
221
|
-
.expect("insert");
|
|
222
|
-
}
|
|
223
|
-
let results = idx.search(&query, 5).expect("search");
|
|
224
|
-
for w in results.windows(2) {
|
|
225
|
-
assert!(w[0].score >= w[1].score, "{} < {}", w[0].score, w[1].score);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
#[test]
|
|
230
|
-
fn test_search_top_k_limits_results() {
|
|
231
|
-
let mut idx = make_index();
|
|
232
|
-
for i in 0..10 {
|
|
233
|
-
idx.insert(Uuid::new_v4(), rand_vec(i)).expect("insert");
|
|
234
|
-
}
|
|
235
|
-
assert_eq!(idx.search(&rand_vec(99), 3).expect("search").len(), 3);
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
#[test]
|
|
239
|
-
fn test_remove_existing_returns_true() {
|
|
240
|
-
let mut idx = make_index();
|
|
241
|
-
let id = Uuid::new_v4();
|
|
242
|
-
idx.insert(id, rand_vec(5)).expect("insert");
|
|
243
|
-
assert!(idx.remove(&id).expect("remove"));
|
|
244
|
-
assert_eq!(idx.len(), 0);
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
#[test]
|
|
248
|
-
fn test_remove_nonexistent_returns_false() {
|
|
249
|
-
let mut idx = make_index();
|
|
250
|
-
assert!(!idx.remove(&Uuid::new_v4()).expect("remove"));
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
#[test]
|
|
254
|
-
fn test_removed_vector_not_in_results() {
|
|
255
|
-
let mut idx = make_index();
|
|
256
|
-
let query = unit_vec(0);
|
|
257
|
-
let id = Uuid::new_v4();
|
|
258
|
-
idx.insert(id, unit_vec(0)).expect("insert");
|
|
259
|
-
idx.remove(&id).expect("remove");
|
|
260
|
-
assert!(
|
|
261
|
-
!idx.search(&query, 10)
|
|
262
|
-
.expect("search")
|
|
263
|
-
.iter()
|
|
264
|
-
.any(|r| r.id == id)
|
|
265
|
-
);
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
#[test]
|
|
269
|
-
fn test_zero_top_k_errors() {
|
|
270
|
-
let mut idx = make_index();
|
|
271
|
-
let _ = idx.insert(Uuid::new_v4(), rand_vec(6));
|
|
272
|
-
assert!(matches!(
|
|
273
|
-
idx.search(&rand_vec(7), 0).expect_err("e"),
|
|
274
|
-
IndexError::ZeroTopK
|
|
275
|
-
));
|
|
276
|
-
}
|
|
277
|
-
}
|
|
1
|
+
//! Vector index adapter for TurboMemory.
|
|
2
|
+
//!
|
|
3
|
+
//! Exposes a [`VectorIndex`] trait and an [`InMemoryIndex`] implementation
|
|
4
|
+
//! that stores compressed vectors via [`TurboCompressor`] and retrieves
|
|
5
|
+
//! nearest neighbours using brute-force cosine similarity.
|
|
6
|
+
|
|
7
|
+
use clawpowers_compression::{CompressedVector, CompressionConfig, TurboCompressor};
|
|
8
|
+
use thiserror::Error;
|
|
9
|
+
use uuid::Uuid;
|
|
10
|
+
|
|
11
|
+
/// Errors produced by the index.
|
|
12
|
+
#[derive(Debug, Error)]
|
|
13
|
+
pub enum IndexError {
|
|
14
|
+
/// The query vector has a different dimensionality than the index.
|
|
15
|
+
#[error("dimension mismatch: expected {expected}, got {got}")]
|
|
16
|
+
DimensionMismatch {
|
|
17
|
+
/// Expected dimensionality.
|
|
18
|
+
expected: usize,
|
|
19
|
+
/// Provided dimensionality.
|
|
20
|
+
got: usize,
|
|
21
|
+
},
|
|
22
|
+
/// The requested number of results is zero.
|
|
23
|
+
#[error("top_k must be > 0")]
|
|
24
|
+
ZeroTopK,
|
|
25
|
+
/// Underlying compression error.
|
|
26
|
+
#[error("compression error: {0}")]
|
|
27
|
+
Compression(#[from] clawpowers_compression::CompressionError),
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/// A shorthand result type for [`IndexError`].
|
|
31
|
+
pub type Result<T> = std::result::Result<T, IndexError>;
|
|
32
|
+
|
|
33
|
+
/// A single ranked search result.
|
|
34
|
+
#[derive(Debug, Clone)]
|
|
35
|
+
pub struct SearchResult {
|
|
36
|
+
/// Identifier of the matching vector.
|
|
37
|
+
pub id: Uuid,
|
|
38
|
+
/// Similarity score (cosine similarity, higher is more similar).
|
|
39
|
+
pub score: f32,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/// Trait for vector stores used by TurboMemory.
|
|
43
|
+
pub trait VectorIndex {
|
|
44
|
+
/// Insert a vector under `id`.
|
|
45
|
+
fn insert(&mut self, id: Uuid, vector: Vec<f32>) -> Result<()>;
|
|
46
|
+
/// Return the `top_k` most similar vectors to `query`, ranked descending by score.
|
|
47
|
+
fn search(&self, query: &[f32], top_k: usize) -> Result<Vec<SearchResult>>;
|
|
48
|
+
/// Remove the vector with `id`. Returns `true` if it was present.
|
|
49
|
+
fn remove(&mut self, id: &Uuid) -> Result<bool>;
|
|
50
|
+
/// Number of vectors currently in the index.
|
|
51
|
+
fn len(&self) -> usize;
|
|
52
|
+
/// Return `true` if the index contains no vectors.
|
|
53
|
+
fn is_empty(&self) -> bool {
|
|
54
|
+
self.len() == 0
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
struct Entry {
|
|
59
|
+
id: Uuid,
|
|
60
|
+
/// Stored for potential future fast-path approximate distance calculations.
|
|
61
|
+
#[allow(dead_code)]
|
|
62
|
+
compressed: CompressedVector,
|
|
63
|
+
/// Original vector used for exact cosine similarity.
|
|
64
|
+
original: Vec<f32>,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/// In-memory brute-force vector index backed by TurboQuant compression.
|
|
68
|
+
///
|
|
69
|
+
/// Suitable for up to ~100 K vectors.
|
|
70
|
+
pub struct InMemoryIndex {
|
|
71
|
+
compressor: TurboCompressor,
|
|
72
|
+
entries: Vec<Entry>,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
impl InMemoryIndex {
|
|
76
|
+
/// Create a new index with the given compression configuration.
|
|
77
|
+
pub fn new(config: CompressionConfig) -> Self {
|
|
78
|
+
let compressor = TurboCompressor::new(config);
|
|
79
|
+
Self {
|
|
80
|
+
compressor,
|
|
81
|
+
entries: Vec::new(),
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/// Create an index with sensible defaults for `dimensions`.
|
|
86
|
+
pub fn with_dimensions(dimensions: usize) -> Self {
|
|
87
|
+
Self::new(CompressionConfig {
|
|
88
|
+
dimensions,
|
|
89
|
+
quantization_bits: 8,
|
|
90
|
+
rotation_seed: 0xDEAD_BEEF_CAFE_1234,
|
|
91
|
+
})
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
impl VectorIndex for InMemoryIndex {
|
|
96
|
+
fn insert(&mut self, id: Uuid, vector: Vec<f32>) -> Result<()> {
|
|
97
|
+
let compressed = self.compressor.compress(&vector)?;
|
|
98
|
+
self.entries.push(Entry {
|
|
99
|
+
id,
|
|
100
|
+
compressed,
|
|
101
|
+
original: vector,
|
|
102
|
+
});
|
|
103
|
+
Ok(())
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
fn search(&self, query: &[f32], top_k: usize) -> Result<Vec<SearchResult>> {
|
|
107
|
+
if top_k == 0 {
|
|
108
|
+
return Err(IndexError::ZeroTopK);
|
|
109
|
+
}
|
|
110
|
+
let dim = self.compressor.config.dimensions;
|
|
111
|
+
if query.len() != dim {
|
|
112
|
+
return Err(IndexError::DimensionMismatch {
|
|
113
|
+
expected: dim,
|
|
114
|
+
got: query.len(),
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
let mut scored: Vec<(f32, &Entry)> = self
|
|
118
|
+
.entries
|
|
119
|
+
.iter()
|
|
120
|
+
.map(|e| (cosine_similarity(query, &e.original), e))
|
|
121
|
+
.collect();
|
|
122
|
+
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
|
123
|
+
Ok(scored
|
|
124
|
+
.into_iter()
|
|
125
|
+
.take(top_k)
|
|
126
|
+
.map(|(score, e)| SearchResult { id: e.id, score })
|
|
127
|
+
.collect())
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
fn remove(&mut self, id: &Uuid) -> Result<bool> {
|
|
131
|
+
let before = self.entries.len();
|
|
132
|
+
self.entries.retain(|e| &e.id != id);
|
|
133
|
+
Ok(self.entries.len() < before)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
fn len(&self) -> usize {
|
|
137
|
+
self.entries.len()
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
|
142
|
+
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
|
143
|
+
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
144
|
+
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
|
145
|
+
if na < f32::EPSILON || nb < f32::EPSILON {
|
|
146
|
+
0.0
|
|
147
|
+
} else {
|
|
148
|
+
dot / (na * nb)
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
#[cfg(test)]
|
|
153
|
+
mod tests {
|
|
154
|
+
use super::*;
|
|
155
|
+
|
|
156
|
+
const DIM: usize = 32;
|
|
157
|
+
|
|
158
|
+
fn make_index() -> InMemoryIndex {
|
|
159
|
+
InMemoryIndex::with_dimensions(DIM)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
fn unit_vec(hot: usize) -> Vec<f32> {
|
|
163
|
+
let mut v = vec![0.0_f32; DIM];
|
|
164
|
+
v[hot % DIM] = 1.0;
|
|
165
|
+
v
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
fn rand_vec(seed: u64) -> Vec<f32> {
|
|
169
|
+
let mut x = seed;
|
|
170
|
+
(0..DIM)
|
|
171
|
+
.map(|_| {
|
|
172
|
+
x = x
|
|
173
|
+
.wrapping_mul(6_364_136_223_846_793_005)
|
|
174
|
+
.wrapping_add(1_442_695_040_888_963_407);
|
|
175
|
+
((x >> 33) as f32 / u32::MAX as f32) * 2.0 - 1.0
|
|
176
|
+
})
|
|
177
|
+
.collect()
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
#[test]
|
|
181
|
+
fn test_insert_increases_len() {
|
|
182
|
+
let mut idx = make_index();
|
|
183
|
+
assert_eq!(idx.len(), 0);
|
|
184
|
+
idx.insert(Uuid::new_v4(), rand_vec(1)).expect("insert");
|
|
185
|
+
assert_eq!(idx.len(), 1);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
#[test]
|
|
189
|
+
fn test_empty_index_search_returns_empty() {
|
|
190
|
+
let idx = make_index();
|
|
191
|
+
assert!(idx.search(&rand_vec(2), 5).expect("search").is_empty());
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
#[test]
|
|
195
|
+
fn test_is_empty() {
|
|
196
|
+
let mut idx = make_index();
|
|
197
|
+
assert!(idx.is_empty());
|
|
198
|
+
idx.insert(Uuid::new_v4(), rand_vec(3)).expect("insert");
|
|
199
|
+
assert!(!idx.is_empty());
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
#[test]
|
|
203
|
+
fn test_search_returns_nearest_neighbour() {
|
|
204
|
+
let mut idx = make_index();
|
|
205
|
+
let query = unit_vec(0);
|
|
206
|
+
let id_match = Uuid::new_v4();
|
|
207
|
+
let id_other = Uuid::new_v4();
|
|
208
|
+
idx.insert(id_match, unit_vec(0)).expect("insert match");
|
|
209
|
+
idx.insert(id_other, unit_vec(1)).expect("insert other");
|
|
210
|
+
let results = idx.search(&query, 1).expect("search");
|
|
211
|
+
assert_eq!(results.len(), 1);
|
|
212
|
+
assert_eq!(results[0].id, id_match);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
#[test]
|
|
216
|
+
fn test_search_results_ordered_by_score_descending() {
|
|
217
|
+
let mut idx = make_index();
|
|
218
|
+
let query = rand_vec(42);
|
|
219
|
+
for i in 0..5 {
|
|
220
|
+
idx.insert(Uuid::new_v4(), rand_vec(i + 100))
|
|
221
|
+
.expect("insert");
|
|
222
|
+
}
|
|
223
|
+
let results = idx.search(&query, 5).expect("search");
|
|
224
|
+
for w in results.windows(2) {
|
|
225
|
+
assert!(w[0].score >= w[1].score, "{} < {}", w[0].score, w[1].score);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
#[test]
|
|
230
|
+
fn test_search_top_k_limits_results() {
|
|
231
|
+
let mut idx = make_index();
|
|
232
|
+
for i in 0..10 {
|
|
233
|
+
idx.insert(Uuid::new_v4(), rand_vec(i)).expect("insert");
|
|
234
|
+
}
|
|
235
|
+
assert_eq!(idx.search(&rand_vec(99), 3).expect("search").len(), 3);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
#[test]
|
|
239
|
+
fn test_remove_existing_returns_true() {
|
|
240
|
+
let mut idx = make_index();
|
|
241
|
+
let id = Uuid::new_v4();
|
|
242
|
+
idx.insert(id, rand_vec(5)).expect("insert");
|
|
243
|
+
assert!(idx.remove(&id).expect("remove"));
|
|
244
|
+
assert_eq!(idx.len(), 0);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
#[test]
|
|
248
|
+
fn test_remove_nonexistent_returns_false() {
|
|
249
|
+
let mut idx = make_index();
|
|
250
|
+
assert!(!idx.remove(&Uuid::new_v4()).expect("remove"));
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
#[test]
|
|
254
|
+
fn test_removed_vector_not_in_results() {
|
|
255
|
+
let mut idx = make_index();
|
|
256
|
+
let query = unit_vec(0);
|
|
257
|
+
let id = Uuid::new_v4();
|
|
258
|
+
idx.insert(id, unit_vec(0)).expect("insert");
|
|
259
|
+
idx.remove(&id).expect("remove");
|
|
260
|
+
assert!(
|
|
261
|
+
!idx.search(&query, 10)
|
|
262
|
+
.expect("search")
|
|
263
|
+
.iter()
|
|
264
|
+
.any(|r| r.id == id)
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
#[test]
|
|
269
|
+
fn test_zero_top_k_errors() {
|
|
270
|
+
let mut idx = make_index();
|
|
271
|
+
let _ = idx.insert(Uuid::new_v4(), rand_vec(6));
|
|
272
|
+
assert!(matches!(
|
|
273
|
+
idx.search(&rand_vec(7), 0).expect_err("e"),
|
|
274
|
+
IndexError::ZeroTopK
|
|
275
|
+
));
|
|
276
|
+
}
|
|
277
|
+
}
|
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
[package]
|
|
2
|
-
name = "clawpowers-policy"
|
|
3
|
-
version.workspace = true
|
|
4
|
-
edition.workspace = true
|
|
5
|
-
license.workspace = true
|
|
6
|
-
|
|
7
|
-
[dependencies]
|
|
8
|
-
serde = { workspace = true }
|
|
9
|
-
serde_json = { workspace = true }
|
|
10
|
-
thiserror = { workspace = true }
|
|
11
|
-
tracing = { workspace = true }
|
|
12
|
-
chrono = { workspace = true }
|
|
13
|
-
alloy-primitives = { workspace = true }
|
|
14
|
-
clawpowers-tokens = { path = "../tokens" }
|
|
15
|
-
|
|
16
|
-
[dev-dependencies]
|
|
17
|
-
tokio = { workspace = true }
|
|
1
|
+
[package]
|
|
2
|
+
name = "clawpowers-policy"
|
|
3
|
+
version.workspace = true
|
|
4
|
+
edition.workspace = true
|
|
5
|
+
license.workspace = true
|
|
6
|
+
|
|
7
|
+
[dependencies]
|
|
8
|
+
serde = { workspace = true }
|
|
9
|
+
serde_json = { workspace = true }
|
|
10
|
+
thiserror = { workspace = true }
|
|
11
|
+
tracing = { workspace = true }
|
|
12
|
+
chrono = { workspace = true }
|
|
13
|
+
alloy-primitives = { workspace = true }
|
|
14
|
+
clawpowers-tokens = { path = "../tokens" }
|
|
15
|
+
|
|
16
|
+
[dev-dependencies]
|
|
17
|
+
tokio = { workspace = true }
|