@semiont/vectors 0.5.6 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -6
- package/dist/{chunk-IWHPVMXW.js → chunk-LCTHZYK4.js} +87 -25
- package/dist/chunk-LCTHZYK4.js.map +1 -0
- package/dist/{chunk-INCF7JMV.js → chunk-UM3RNDW4.js} +1 -1
- package/dist/chunk-UM3RNDW4.js.map +1 -0
- package/dist/index.d.ts +41 -9
- package/dist/index.js +36 -18
- package/dist/index.js.map +1 -1
- package/dist/qdrant-SE4WDTDB.js +7 -0
- package/dist/{voyage-UWOFVIYM.js → voyage-NUSVXNJN.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-INCF7JMV.js.map +0 -1
- package/dist/chunk-IWHPVMXW.js.map +0 -1
- package/dist/qdrant-YF2TKFCE.js +0 -7
- /package/dist/{qdrant-YF2TKFCE.js.map → qdrant-SE4WDTDB.js.map} +0 -0
- /package/dist/{voyage-UWOFVIYM.js.map → voyage-NUSVXNJN.js.map} +0 -0
package/README.md
CHANGED
|
@@ -28,7 +28,7 @@ const store = await createVectorStore({
|
|
|
28
28
|
});
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
-
Requires a running [Qdrant](https://qdrant.tech) instance. The `@qdrant/js-client-rest`
|
|
31
|
+
Requires a running [Qdrant](https://qdrant.tech) instance. The `@qdrant/js-client-rest` client is lazy-loaded on `connect()`. Collections are auto-created if they don't exist.
|
|
32
32
|
|
|
33
33
|
### Memory (testing)
|
|
34
34
|
|
|
@@ -115,15 +115,17 @@ await store.upsertResourceVectors(resourceId, chunks.map((text, i) => ({
|
|
|
115
115
|
|
|
116
116
|
// Index an annotation
|
|
117
117
|
const vec = await provider.embed(annotation.exactText);
|
|
118
|
-
await store.upsertAnnotationVector(annotationId, {
|
|
118
|
+
await store.upsertAnnotationVector(annotationId, vec, {
|
|
119
119
|
annotationId,
|
|
120
120
|
resourceId,
|
|
121
121
|
motivation: 'describing',
|
|
122
122
|
entityTypes: ['Person'],
|
|
123
123
|
exactText: 'Marie Curie',
|
|
124
|
-
}
|
|
124
|
+
});
|
|
125
125
|
```
|
|
126
126
|
|
|
127
|
+
`upsertResourceVectors` replaces all existing vectors for the resource, so re-indexing a resource that shrank leaves no orphan chunks.
|
|
128
|
+
|
|
127
129
|
## Configuration
|
|
128
130
|
|
|
129
131
|
In `semiont.toml`:
|
|
@@ -134,13 +136,13 @@ type = "qdrant"
|
|
|
134
136
|
host = "localhost"
|
|
135
137
|
port = 6333
|
|
136
138
|
|
|
137
|
-
[environments.local.services.
|
|
139
|
+
[environments.local.services.embedding]
|
|
138
140
|
type = "voyage"
|
|
139
141
|
model = "voyage-3"
|
|
140
142
|
|
|
141
|
-
[environments.local.services.
|
|
143
|
+
[environments.local.services.embedding.chunking]
|
|
142
144
|
chunkSize = 512
|
|
143
|
-
overlap =
|
|
145
|
+
overlap = 64
|
|
144
146
|
```
|
|
145
147
|
|
|
146
148
|
## License
|
|
@@ -6,11 +6,14 @@ function toQdrantId(input) {
|
|
|
6
6
|
}
|
|
7
7
|
var QdrantVectorStore = class {
|
|
8
8
|
client = null;
|
|
9
|
-
connected = false;
|
|
10
9
|
config;
|
|
11
10
|
constructor(config) {
|
|
12
11
|
this.config = config;
|
|
13
12
|
}
|
|
13
|
+
get qdrant() {
|
|
14
|
+
if (!this.client) throw new Error("QdrantVectorStore is not connected");
|
|
15
|
+
return this.client;
|
|
16
|
+
}
|
|
14
17
|
async connect() {
|
|
15
18
|
const { QdrantClient } = await import("@qdrant/js-client-rest");
|
|
16
19
|
this.client = new QdrantClient({
|
|
@@ -19,37 +22,36 @@ var QdrantVectorStore = class {
|
|
|
19
22
|
});
|
|
20
23
|
await this.ensureCollection("resources", this.config.dimensions);
|
|
21
24
|
await this.ensureCollection("annotations", this.config.dimensions);
|
|
22
|
-
this.connected = true;
|
|
23
25
|
}
|
|
24
26
|
async disconnect() {
|
|
25
27
|
this.client = null;
|
|
26
|
-
this.connected = false;
|
|
27
28
|
}
|
|
28
29
|
async clearAll() {
|
|
29
30
|
try {
|
|
30
|
-
await this.
|
|
31
|
+
await this.qdrant.deleteCollection("resources");
|
|
31
32
|
} catch {
|
|
32
33
|
}
|
|
33
34
|
try {
|
|
34
|
-
await this.
|
|
35
|
+
await this.qdrant.deleteCollection("annotations");
|
|
35
36
|
} catch {
|
|
36
37
|
}
|
|
37
38
|
await this.ensureCollection("resources", this.config.dimensions);
|
|
38
39
|
await this.ensureCollection("annotations", this.config.dimensions);
|
|
39
40
|
}
|
|
40
41
|
isConnected() {
|
|
41
|
-
return this.
|
|
42
|
+
return this.client !== null;
|
|
42
43
|
}
|
|
43
44
|
async ensureCollection(name, dimensions) {
|
|
44
45
|
try {
|
|
45
|
-
await this.
|
|
46
|
+
await this.qdrant.getCollection(name);
|
|
46
47
|
} catch {
|
|
47
|
-
await this.
|
|
48
|
+
await this.qdrant.createCollection(name, {
|
|
48
49
|
vectors: { size: dimensions, distance: "Cosine" }
|
|
49
50
|
});
|
|
50
51
|
}
|
|
51
52
|
}
|
|
52
|
-
async upsertResourceVectors(resourceId, chunks) {
|
|
53
|
+
async upsertResourceVectors(resourceId, chunks, contentChecksum) {
|
|
54
|
+
await this.deleteResourceVectors(resourceId);
|
|
53
55
|
if (chunks.length === 0) return;
|
|
54
56
|
const points = chunks.map((chunk) => ({
|
|
55
57
|
id: toQdrantId(`${resourceId}-${chunk.chunkIndex}`),
|
|
@@ -57,13 +59,14 @@ var QdrantVectorStore = class {
|
|
|
57
59
|
payload: {
|
|
58
60
|
resourceId: String(resourceId),
|
|
59
61
|
chunkIndex: chunk.chunkIndex,
|
|
60
|
-
text: chunk.text
|
|
62
|
+
text: chunk.text,
|
|
63
|
+
contentChecksum
|
|
61
64
|
}
|
|
62
65
|
}));
|
|
63
|
-
await this.
|
|
66
|
+
await this.qdrant.upsert("resources", { points });
|
|
64
67
|
}
|
|
65
68
|
async upsertAnnotationVector(annotationId, embedding, payload) {
|
|
66
|
-
await this.
|
|
69
|
+
await this.qdrant.upsert("annotations", {
|
|
67
70
|
points: [{
|
|
68
71
|
id: toQdrantId(String(annotationId)),
|
|
69
72
|
vector: embedding,
|
|
@@ -78,17 +81,73 @@ var QdrantVectorStore = class {
|
|
|
78
81
|
});
|
|
79
82
|
}
|
|
80
83
|
async deleteResourceVectors(resourceId) {
|
|
81
|
-
await this.
|
|
84
|
+
await this.qdrant.delete("resources", {
|
|
82
85
|
filter: {
|
|
83
86
|
must: [{ key: "resourceId", match: { value: String(resourceId) } }]
|
|
84
87
|
}
|
|
85
88
|
});
|
|
86
89
|
}
|
|
87
90
|
async deleteAnnotationVector(annotationId) {
|
|
88
|
-
await this.
|
|
91
|
+
await this.qdrant.delete("annotations", {
|
|
89
92
|
points: [toQdrantId(String(annotationId))]
|
|
90
93
|
});
|
|
91
94
|
}
|
|
95
|
+
async deleteAnnotationVectorsForResource(resourceId) {
|
|
96
|
+
await this.qdrant.delete("annotations", {
|
|
97
|
+
filter: {
|
|
98
|
+
must: [{ key: "resourceId", match: { value: String(resourceId) } }]
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
async count() {
|
|
103
|
+
const [resources, annotations] = await Promise.all([
|
|
104
|
+
this.qdrant.count("resources", { exact: true }),
|
|
105
|
+
this.qdrant.count("annotations", { exact: true })
|
|
106
|
+
]);
|
|
107
|
+
return resources.count + annotations.count;
|
|
108
|
+
}
|
|
109
|
+
async listResourceChecksums() {
|
|
110
|
+
const checksums = /* @__PURE__ */ new Map();
|
|
111
|
+
let offset = void 0;
|
|
112
|
+
do {
|
|
113
|
+
const page = await this.qdrant.scroll("resources", {
|
|
114
|
+
limit: 1e3,
|
|
115
|
+
offset,
|
|
116
|
+
with_payload: ["resourceId", "contentChecksum"],
|
|
117
|
+
with_vector: false
|
|
118
|
+
});
|
|
119
|
+
for (const point of page.points) {
|
|
120
|
+
const rid = point.payload?.resourceId;
|
|
121
|
+
if (typeof rid !== "string" || checksums.has(rid)) continue;
|
|
122
|
+
const checksum = point.payload?.contentChecksum;
|
|
123
|
+
checksums.set(rid, typeof checksum === "string" ? checksum : void 0);
|
|
124
|
+
}
|
|
125
|
+
offset = page.next_page_offset ?? void 0;
|
|
126
|
+
} while (offset !== void 0 && offset !== null);
|
|
127
|
+
return checksums;
|
|
128
|
+
}
|
|
129
|
+
async listAnnotationIds() {
|
|
130
|
+
return this.scrollPayloadField("annotations", "annotationId");
|
|
131
|
+
}
|
|
132
|
+
/** Collect the distinct values of one payload field across a collection. */
|
|
133
|
+
async scrollPayloadField(collection, field) {
|
|
134
|
+
const values = /* @__PURE__ */ new Set();
|
|
135
|
+
let offset = void 0;
|
|
136
|
+
do {
|
|
137
|
+
const page = await this.qdrant.scroll(collection, {
|
|
138
|
+
limit: 1e3,
|
|
139
|
+
offset,
|
|
140
|
+
with_payload: [field],
|
|
141
|
+
with_vector: false
|
|
142
|
+
});
|
|
143
|
+
for (const point of page.points) {
|
|
144
|
+
const value = point.payload?.[field];
|
|
145
|
+
if (typeof value === "string") values.add(value);
|
|
146
|
+
}
|
|
147
|
+
offset = page.next_page_offset ?? void 0;
|
|
148
|
+
} while (offset !== void 0 && offset !== null);
|
|
149
|
+
return values;
|
|
150
|
+
}
|
|
92
151
|
async searchResources(embedding, opts) {
|
|
93
152
|
return this.search("resources", embedding, opts);
|
|
94
153
|
}
|
|
@@ -97,21 +156,24 @@ var QdrantVectorStore = class {
|
|
|
97
156
|
}
|
|
98
157
|
async search(collection, embedding, opts) {
|
|
99
158
|
const filter = this.buildFilter(opts.filter);
|
|
100
|
-
const results = await this.
|
|
159
|
+
const results = await this.qdrant.search(collection, {
|
|
101
160
|
vector: embedding,
|
|
102
161
|
limit: opts.limit,
|
|
103
162
|
score_threshold: opts.scoreThreshold,
|
|
104
|
-
filter: filter
|
|
163
|
+
filter: filter ?? void 0,
|
|
105
164
|
with_payload: true
|
|
106
165
|
});
|
|
107
|
-
return results.map((r) =>
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
166
|
+
return results.map((r) => {
|
|
167
|
+
const payload = r.payload ?? {};
|
|
168
|
+
return {
|
|
169
|
+
id: String(r.id),
|
|
170
|
+
score: r.score,
|
|
171
|
+
resourceId: payload.resourceId,
|
|
172
|
+
annotationId: payload.annotationId,
|
|
173
|
+
text: payload.text,
|
|
174
|
+
entityTypes: payload.entityTypes
|
|
175
|
+
};
|
|
176
|
+
});
|
|
115
177
|
}
|
|
116
178
|
buildFilter(filter) {
|
|
117
179
|
if (!filter) return null;
|
|
@@ -140,4 +202,4 @@ var QdrantVectorStore = class {
|
|
|
140
202
|
export {
|
|
141
203
|
QdrantVectorStore
|
|
142
204
|
};
|
|
143
|
-
//# sourceMappingURL=chunk-
|
|
205
|
+
//# sourceMappingURL=chunk-LCTHZYK4.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/store/qdrant.ts"],"sourcesContent":["/**\n * Qdrant VectorStore Implementation\n *\n * Uses the Qdrant REST API via @qdrant/js-client-rest.\n * Manages two collections: 'resources' and 'annotations'.\n */\n\nimport { createHash } from 'crypto';\nimport type { QdrantClient, Schemas } from '@qdrant/js-client-rest';\nimport type { ResourceId, AnnotationId } from '@semiont/core';\nimport type { VectorStore, EmbeddingChunk, AnnotationPayload, VectorSearchResult, SearchOptions } from './interface';\n\n/**\n * Generate a deterministic UUID v5-style ID from an arbitrary string.\n * Qdrant requires point IDs to be UUIDs or unsigned integers.\n */\nfunction toQdrantId(input: string): string {\n const hex = createHash('md5').update(input).digest('hex');\n return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;\n}\n\nexport interface QdrantConfig {\n host: string;\n port: number;\n dimensions: number;\n}\n\nexport class QdrantVectorStore implements VectorStore {\n private client: QdrantClient | null = null;\n private config: QdrantConfig;\n\n constructor(config: QdrantConfig) {\n this.config = config;\n }\n\n private get qdrant(): QdrantClient {\n if (!this.client) throw new Error('QdrantVectorStore is not connected');\n return this.client;\n }\n\n async connect(): Promise<void> {\n const { QdrantClient } = await import('@qdrant/js-client-rest');\n this.client = new QdrantClient({\n host: this.config.host,\n port: this.config.port,\n });\n\n // Ensure collections exist\n await this.ensureCollection('resources', this.config.dimensions);\n await this.ensureCollection('annotations', this.config.dimensions);\n }\n\n async disconnect(): Promise<void> {\n this.client = null;\n }\n\n async clearAll(): Promise<void> {\n try { await this.qdrant.deleteCollection('resources'); } catch { /* may not exist */ }\n try { await this.qdrant.deleteCollection('annotations'); } catch { /* may not exist */ }\n await this.ensureCollection('resources', this.config.dimensions);\n await this.ensureCollection('annotations', this.config.dimensions);\n }\n\n isConnected(): boolean {\n return this.client !== null;\n }\n\n private async ensureCollection(name: string, dimensions: number): Promise<void> {\n try {\n await this.qdrant.getCollection(name);\n } catch {\n await this.qdrant.createCollection(name, {\n vectors: { size: dimensions, distance: 'Cosine' },\n });\n }\n }\n\n async upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[], contentChecksum: string): Promise<void> {\n // Replace semantics: purge existing chunks first, or a resource that\n // shrinks leaves orphan points at the higher chunk indices.\n await this.deleteResourceVectors(resourceId);\n if (chunks.length === 0) return;\n\n const points = chunks.map((chunk) => ({\n id: toQdrantId(`${resourceId}-${chunk.chunkIndex}`),\n vector: chunk.embedding,\n payload: {\n resourceId: String(resourceId),\n chunkIndex: chunk.chunkIndex,\n text: chunk.text,\n contentChecksum,\n },\n }));\n\n await this.qdrant.upsert('resources', { points });\n }\n\n async upsertAnnotationVector(\n annotationId: AnnotationId,\n embedding: number[],\n payload: AnnotationPayload\n ): Promise<void> {\n await this.qdrant.upsert('annotations', {\n points: [{\n id: toQdrantId(String(annotationId)),\n vector: embedding,\n payload: {\n annotationId: String(payload.annotationId),\n resourceId: String(payload.resourceId),\n motivation: payload.motivation,\n entityTypes: payload.entityTypes,\n text: payload.exactText,\n },\n }],\n });\n }\n\n async deleteResourceVectors(resourceId: ResourceId): Promise<void> {\n await this.qdrant.delete('resources', {\n filter: {\n must: [{ key: 'resourceId', match: { value: String(resourceId) } }],\n },\n });\n }\n\n async deleteAnnotationVector(annotationId: AnnotationId): Promise<void> {\n await this.qdrant.delete('annotations', {\n points: [toQdrantId(String(annotationId))],\n });\n }\n\n async deleteAnnotationVectorsForResource(resourceId: ResourceId): Promise<void> {\n await this.qdrant.delete('annotations', {\n filter: {\n must: [{ key: 'resourceId', match: { value: String(resourceId) } }],\n },\n });\n }\n\n async count(): Promise<number> {\n const [resources, annotations] = await Promise.all([\n this.qdrant.count('resources', { exact: true }),\n this.qdrant.count('annotations', { exact: true }),\n ]);\n return resources.count + annotations.count;\n }\n\n async listResourceChecksums(): Promise<Map<string, string | undefined>> {\n const checksums = new Map<string, string | undefined>();\n let offset: Schemas['ScrollRequest']['offset'] = undefined;\n do {\n const page = await this.qdrant.scroll('resources', {\n limit: 1000,\n offset,\n with_payload: ['resourceId', 'contentChecksum'],\n with_vector: false,\n });\n for (const point of page.points) {\n const rid = point.payload?.resourceId;\n if (typeof rid !== 'string' || checksums.has(rid)) continue;\n const checksum = point.payload?.contentChecksum;\n checksums.set(rid, typeof checksum === 'string' ? checksum : undefined);\n }\n offset = page.next_page_offset ?? undefined;\n } while (offset !== undefined && offset !== null);\n return checksums;\n }\n\n async listAnnotationIds(): Promise<Set<string>> {\n return this.scrollPayloadField('annotations', 'annotationId');\n }\n\n /** Collect the distinct values of one payload field across a collection. */\n private async scrollPayloadField(collection: string, field: string): Promise<Set<string>> {\n const values = new Set<string>();\n let offset: Schemas['ScrollRequest']['offset'] = undefined;\n do {\n const page = await this.qdrant.scroll(collection, {\n limit: 1000,\n offset,\n with_payload: [field],\n with_vector: false,\n });\n for (const point of page.points) {\n const value = point.payload?.[field];\n if (typeof value === 'string') values.add(value);\n }\n offset = page.next_page_offset ?? undefined;\n } while (offset !== undefined && offset !== null);\n return values;\n }\n\n async searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search('resources', embedding, opts);\n }\n\n async searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search('annotations', embedding, opts);\n }\n\n private async search(collection: string, embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n const filter = this.buildFilter(opts.filter);\n\n const results = await this.qdrant.search(collection, {\n vector: embedding,\n limit: opts.limit,\n score_threshold: opts.scoreThreshold,\n filter: filter ?? undefined,\n with_payload: true,\n });\n\n return results.map((r) => {\n const payload = r.payload ?? {};\n return {\n id: String(r.id),\n score: r.score,\n resourceId: payload.resourceId as ResourceId,\n annotationId: payload.annotationId as AnnotationId | undefined,\n text: payload.text as string,\n entityTypes: payload.entityTypes as string[] | undefined,\n };\n });\n }\n\n private buildFilter(filter?: SearchOptions['filter']): Schemas['Filter'] | null {\n if (!filter) return null;\n\n const must: Schemas['FieldCondition'][] = [];\n\n if (filter.entityTypes && filter.entityTypes.length > 0) {\n // any-of: match payloads whose `entityTypes` array contains at least one\n // of the requested types. Matches the memory store's `some(t => ...)`\n // semantics; pushing one `must` clause per type would mean all-of.\n must.push({ key: 'entityTypes', match: { any: filter.entityTypes } });\n }\n\n if (filter.resourceId) {\n must.push({ key: 'resourceId', match: { value: String(filter.resourceId) } });\n }\n\n if (filter.motivation) {\n must.push({ key: 'motivation', match: { value: filter.motivation } });\n }\n\n const must_not: Schemas['FieldCondition'][] = [];\n\n if (filter.excludeResourceId) {\n must_not.push({ key: 'resourceId', match: { value: String(filter.excludeResourceId) } });\n }\n\n if (must.length === 0 && must_not.length === 0) return null;\n\n return {\n ...(must.length > 0 ? { must } : {}),\n ...(must_not.length > 0 ? { must_not } : {}),\n };\n }\n}\n"],"mappings":";AAOA,SAAS,kBAAkB;AAS3B,SAAS,WAAW,OAAuB;AACzC,QAAM,MAAM,WAAW,KAAK,EAAE,OAAO,KAAK,EAAE,OAAO,KAAK;AACxD,SAAO,GAAG,IAAI,MAAM,GAAG,CAAC,CAAC,IAAI,IAAI,MAAM,GAAG,EAAE,CAAC,IAAI,IAAI,MAAM,IAAI,EAAE,CAAC,IAAI,IAAI,MAAM,IAAI,EAAE,CAAC,IAAI,IAAI,MAAM,IAAI,EAAE,CAAC;AAC9G;AAQO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,SAA8B;AAAA,EAC9B;AAAA,EAER,YAAY,QAAsB;AAChC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,IAAY,SAAuB;AACjC,QAAI,CAAC,KAAK,OAAQ,OAAM,IAAI,MAAM,oCAAoC;AACtE,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,UAAyB;AAC7B,UAAM,EAAE,aAAa,IAAI,MAAM,OAAO,wBAAwB;AAC9D,SAAK,SAAS,IAAI,aAAa;AAAA,MAC7B,MAAM,KAAK,OAAO;AAAA,MAClB,MAAM,KAAK,OAAO;AAAA,IACpB,CAAC;AAGD,UAAM,KAAK,iBAAiB,aAAa,KAAK,OAAO,UAAU;AAC/D,UAAM,KAAK,iBAAiB,eAAe,KAAK,OAAO,UAAU;AAAA,EACnE;AAAA,EAEA,MAAM,aAA4B;AAChC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,WAA0B;AAC9B,QAAI;AAAE,YAAM,KAAK,OAAO,iBAAiB,WAAW;AAAA,IAAG,QAAQ;AAAA,IAAsB;AACrF,QAAI;AAAE,YAAM,KAAK,OAAO,iBAAiB,aAAa;AAAA,IAAG,QAAQ;AAAA,IAAsB;AACvF,UAAM,KAAK,iBAAiB,aAAa,KAAK,OAAO,UAAU;AAC/D,UAAM,KAAK,iBAAiB,eAAe,KAAK,OAAO,UAAU;AAAA,EACnE;AAAA,EAEA,cAAuB;AACrB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,MAAc,iBAAiB,MAAc,YAAmC;AAC9E,QAAI;AACF,YAAM,KAAK,OAAO,cAAc,IAAI;AAAA,IACtC,QAAQ;AACN,YAAM,KAAK,OAAO,iBAAiB,MAAM;AAAA,QACvC,SAAS,EAAE,MAAM,YAAY,UAAU,SAAS;AAAA,MAClD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,sBAAsB,YAAwB,QAA0B,iBAAwC;AAGpH,UAAM,KAAK,sBAAsB,UAAU;AAC3C,QAAI,OAAO,WAAW,EAAG;AAEzB,UAAM,SAAS,OAAO,IAAI,CAAC,WAAW;AAAA,MACpC,IAAI,WAAW,GAAG,UAAU,IAAI,MAAM,UAAU,EAAE;AAAA,MAClD,QAAQ,MAAM;AAAA,MACd,SAAS;AAAA,QACP,YAAY,OAAO,UAAU;AAAA,QAC7B,YAAY,MAAM;AAAA,QAClB,MAAM,MAAM;AAAA,QACZ;AAAA,MACF;AAAA,IACF,EAAE;AAEF,UAAM,KAAK,OAAO,OAAO,aAAa,EAAE,OAAO,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,uBACJ,cACA,WACA,SACe;AACf,UAAM,KAAK,OAAO,OAAO,eAAe;AAAA,MACtC,QAAQ,CAAC;AAAA,QACP,IAAI,WAAW,OAAO,YAAY,CAAC;AAAA,QACnC,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,cAAc,OAAO,QAAQ,YAAY;AAAA,UACzC,YAAY,OAAO,QAAQ,UAAU;AAAA,UACrC,YAAY,QAAQ;AAAA,UACpB,aAAa,QAAQ;AAAA,UACrB,MAAM,QAAQ;AAAA,QAChB;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,sBAAsB,YAAuC;AACjE,UAAM,KAAK,OAAO,OAAO,aAAa;AAAA,MACpC,QAAQ;AAAA,QACN,MAAM,CAAC,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,UAAU,EAAE,EAAE,CAAC;AAAA,MACpE;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,uBAAuB,cAA2C;AACtE,UAAM,KAAK,OAAO,OAAO,eAAe;AAAA,MACtC,QAAQ,CAAC,WAAW,OAAO,YAAY,CAAC,CAAC;AAAA,IAC3C,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,mCAAmC,YAAuC;AAC9E,UAAM,KAAK,OAAO,OAAO,eAAe;AAAA,MACtC,QAAQ;AAAA,QACN,MAAM,CAAC,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,UAAU,EAAE,EAAE,CAAC;AAAA,MACpE;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAyB;AAC7B,UAAM,CAAC,WAAW,WAAW,IAAI,MAAM,QAAQ,IAAI;AAAA,MACjD,KAAK,OAAO,MAAM,aAAa,EAAE,OAAO,KAAK,CAAC;AAAA,MAC9C,KAAK,OAAO,MAAM,eAAe,EAAE,OAAO,KAAK,CAAC;AAAA,IAClD,CAAC;AACD,WAAO,UAAU,QAAQ,YAAY;AAAA,EACvC;AAAA,EAEA,MAAM,wBAAkE;AACtE,UAAM,YAAY,oBAAI,IAAgC;AACtD,QAAI,SAA6C;AACjD,OAAG;AACD,YAAM,OAAO,MAAM,KAAK,OAAO,OAAO,aAAa;AAAA,QACjD,OAAO;AAAA,QACP;AAAA,QACA,cAAc,CAAC,cAAc,iBAAiB;AAAA,QAC9C,aAAa;AAAA,MACf,CAAC;AACD,iBAAW,SAAS,KAAK,QAAQ;AAC/B,cAAM,MAAM,MAAM,SAAS;AAC3B,YAAI,OAAO,QAAQ,YAAY,UAAU,IAAI,GAAG,EAAG;AACnD,cAAM,WAAW,MAAM,SAAS;AAChC,kBAAU,IAAI,KAAK,OAAO,aAAa,WAAW,WAAW,MAAS;AAAA,MACxE;AACA,eAAS,KAAK,oBAAoB;AAAA,IACpC,SAAS,WAAW,UAAa,WAAW;AAC5C,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,oBAA0C;AAC9C,WAAO,KAAK,mBAAmB,eAAe,cAAc;AAAA,EAC9D;AAAA;AAAA,EAGA,MAAc,mBAAmB,YAAoB,OAAqC;AACxF,UAAM,SAAS,oBAAI,IAAY;AAC/B,QAAI,SAA6C;AACjD,OAAG;AACD,YAAM,OAAO,MAAM,KAAK,OAAO,OAAO,YAAY;AAAA,QAChD,OAAO;AAAA,QACP;AAAA,QACA,cAAc,CAAC,KAAK;AAAA,QACpB,aAAa;AAAA,MACf,CAAC;AACD,iBAAW,SAAS,KAAK,QAAQ;AAC/B,cAAM,QAAQ,MAAM,UAAU,KAAK;AACnC,YAAI,OAAO,UAAU,SAAU,QAAO,IAAI,KAAK;AAAA,MACjD;AACA,eAAS,KAAK,oBAAoB;AAAA,IACpC,SAAS,WAAW,UAAa,WAAW;AAC5C,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,gBAAgB,WAAqB,MAAoD;AAC7F,WAAO,KAAK,OAAO,aAAa,WAAW,IAAI;AAAA,EACjD;AAAA,EAEA,MAAM,kBAAkB,WAAqB,MAAoD;AAC/F,WAAO,KAAK,OAAO,eAAe,WAAW,IAAI;AAAA,EACnD;AAAA,EAEA,MAAc,OAAO,YAAoB,WAAqB,MAAoD;AAChH,UAAM,SAAS,KAAK,YAAY,KAAK,MAAM;AAE3C,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,YAAY;AAAA,MACnD,QAAQ;AAAA,MACR,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK;AAAA,MACtB,QAAQ,UAAU;AAAA,MAClB,cAAc;AAAA,IAChB,CAAC;AAED,WAAO,QAAQ,IAAI,CAAC,MAAM;AACxB,YAAM,UAAU,EAAE,WAAW,CAAC;AAC9B,aAAO;AAAA,QACL,IAAI,OAAO,EAAE,EAAE;AAAA,QACf,OAAO,EAAE;AAAA,QACT,YAAY,QAAQ;AAAA,QACpB,cAAc,QAAQ;AAAA,QACtB,MAAM,QAAQ;AAAA,QACd,aAAa,QAAQ;AAAA,MACvB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEQ,YAAY,QAA4D;AAC9E,QAAI,CAAC,OAAQ,QAAO;AAEpB,UAAM,OAAoC,CAAC;AAE3C,QAAI,OAAO,eAAe,OAAO,YAAY,SAAS,GAAG;AAIvD,WAAK,KAAK,EAAE,KAAK,eAAe,OAAO,EAAE,KAAK,OAAO,YAAY,EAAE,CAAC;AAAA,IACtE;AAEA,QAAI,OAAO,YAAY;AACrB,WAAK,KAAK,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,OAAO,UAAU,EAAE,EAAE,CAAC;AAAA,IAC9E;AAEA,QAAI,OAAO,YAAY;AACrB,WAAK,KAAK,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,WAAW,EAAE,CAAC;AAAA,IACtE;AAEA,UAAM,WAAwC,CAAC;AAE/C,QAAI,OAAO,mBAAmB;AAC5B,eAAS,KAAK,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,OAAO,iBAAiB,EAAE,EAAE,CAAC;AAAA,IACzF;AAEA,QAAI,KAAK,WAAW,KAAK,SAAS,WAAW,EAAG,QAAO;AAEvD,WAAO;AAAA,MACL,GAAI,KAAK,SAAS,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,SAAS,SAAS,IAAI,EAAE,SAAS,IAAI,CAAC;AAAA,IAC5C;AAAA,EACF;AACF;","names":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/embedding/voyage.ts"],"sourcesContent":["/**\n * Voyage AI Embedding Provider\n *\n * Cloud embedding via the Voyage AI API.\n * Requires a Voyage AI API key (distinct from Anthropic inference keys).\n */\n\nimport type { EmbeddingProvider } from './interface';\n\nexport interface VoyageConfig {\n apiKey: string;\n model: string;\n endpoint?: string;\n}\n\nconst VOYAGE_DIMENSIONS: Record<string, number> = {\n 'voyage-3': 1024,\n 'voyage-3-lite': 512,\n 'voyage-code-3': 1024,\n 'voyage-finance-2': 1024,\n 'voyage-law-2': 1024,\n};\n\nexport class VoyageEmbeddingProvider implements EmbeddingProvider {\n private config: VoyageConfig;\n\n constructor(config: VoyageConfig) {\n this.config = config;\n }\n\n async embed(text: string): Promise<number[]> {\n const results = await this.embedBatch([text]);\n return results[0];\n }\n\n async embedBatch(texts: string[]): Promise<number[][]> {\n const endpoint = this.config.endpoint ?? 'https://api.voyageai.com/v1/embeddings';\n\n const response = await fetch(endpoint, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'Authorization': `Bearer ${this.config.apiKey}`,\n },\n body: JSON.stringify({\n model: this.config.model,\n input: texts,\n }),\n });\n\n if (!response.ok) {\n const body = await response.text();\n throw new Error(`Voyage API error ${response.status}: ${body}`);\n }\n\n const json = await response.json() as { data: Array<{ embedding: number[] }> };\n return json.data.map(d => d.embedding);\n }\n\n dimensions(): number {\n return VOYAGE_DIMENSIONS[this.config.model] ?? 1024;\n }\n\n model(): string {\n return this.config.model;\n }\n}\n"],"mappings":";AAeA,IAAM,oBAA4C;AAAA,EAChD,YAAY;AAAA,EACZ,iBAAiB;AAAA,EACjB,iBAAiB;AAAA,EACjB,oBAAoB;AAAA,EACpB,gBAAgB;AAClB;AAEO,IAAM,0BAAN,MAA2D;AAAA,EACxD;AAAA,EAER,YAAY,QAAsB;AAChC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,MAAM,MAAiC;AAC3C,UAAM,UAAU,MAAM,KAAK,WAAW,CAAC,IAAI,CAAC;AAC5C,WAAO,QAAQ,CAAC;AAAA,EAClB;AAAA,EAEA,MAAM,WAAW,OAAsC;AACrD,UAAM,WAAW,KAAK,OAAO,YAAY;AAEzC,UAAM,WAAW,MAAM,MAAM,UAAU;AAAA,MACrC,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,iBAAiB,UAAU,KAAK,OAAO,MAAM;AAAA,MAC/C;AAAA,MACA,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK,OAAO;AAAA,QACnB,OAAO;AAAA,MACT,CAAC;AAAA,IACH,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,YAAM,IAAI,MAAM,oBAAoB,SAAS,MAAM,KAAK,IAAI,EAAE;AAAA,IAChE;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,WAAO,KAAK,KAAK,IAAI,OAAK,EAAE,SAAS;AAAA,EACvC;AAAA,EAEA,aAAqB;AACnB,WAAO,kBAAkB,KAAK,OAAO,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,QAAgB;AACd,WAAO,KAAK,OAAO;AAAA,EACrB;AACF;","names":[]}
|
package/dist/index.d.ts
CHANGED
|
@@ -43,12 +43,35 @@ interface VectorStore {
|
|
|
43
43
|
disconnect(): Promise<void>;
|
|
44
44
|
isConnected(): boolean;
|
|
45
45
|
clearAll(): Promise<void>;
|
|
46
|
-
|
|
46
|
+
/**
|
|
47
|
+
* Replace all vectors for a resource with the given chunks.
|
|
48
|
+
* Existing vectors for the resource are removed first, so a resource
|
|
49
|
+
* that shrinks to fewer chunks leaves no orphans. `contentChecksum` is
|
|
50
|
+
* the checksum of the bytes the chunks were computed from; it is stamped
|
|
51
|
+
* onto the points so reconciliation can detect stale-but-present
|
|
52
|
+
* resources (SMELTER-AXIOMS.md, S12).
|
|
53
|
+
*/
|
|
54
|
+
upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[], contentChecksum: string): Promise<void>;
|
|
47
55
|
upsertAnnotationVector(annotationId: AnnotationId, embedding: number[], payload: AnnotationPayload): Promise<void>;
|
|
48
56
|
deleteResourceVectors(resourceId: ResourceId): Promise<void>;
|
|
49
57
|
deleteAnnotationVector(annotationId: AnnotationId): Promise<void>;
|
|
58
|
+
/** Delete every annotation vector whose payload points at the resource. */
|
|
59
|
+
deleteAnnotationVectorsForResource(resourceId: ResourceId): Promise<void>;
|
|
50
60
|
searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
51
61
|
searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
62
|
+
/**
|
|
63
|
+
* Total point count across all collections (resources + annotations).
|
|
64
|
+
* Feeds the `semiont.vector.index.size` gauge.
|
|
65
|
+
*/
|
|
66
|
+
count(): Promise<number>;
|
|
67
|
+
/**
|
|
68
|
+
* Distinct resourceIds present in the resources collection, each with its
|
|
69
|
+
* stamped content checksum (undefined for points written before stamping
|
|
70
|
+
* existed — reconciliation treats those as stale and re-embeds them).
|
|
71
|
+
*/
|
|
72
|
+
listResourceChecksums(): Promise<Map<string, string | undefined>>;
|
|
73
|
+
/** Distinct annotationIds present in the annotations collection. */
|
|
74
|
+
listAnnotationIds(): Promise<Set<string>>;
|
|
52
75
|
}
|
|
53
76
|
|
|
54
77
|
/**
|
|
@@ -65,18 +88,24 @@ interface QdrantConfig {
|
|
|
65
88
|
}
|
|
66
89
|
declare class QdrantVectorStore implements VectorStore {
|
|
67
90
|
private client;
|
|
68
|
-
private connected;
|
|
69
91
|
private config;
|
|
70
92
|
constructor(config: QdrantConfig);
|
|
93
|
+
private get qdrant();
|
|
71
94
|
connect(): Promise<void>;
|
|
72
95
|
disconnect(): Promise<void>;
|
|
73
96
|
clearAll(): Promise<void>;
|
|
74
97
|
isConnected(): boolean;
|
|
75
98
|
private ensureCollection;
|
|
76
|
-
upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void>;
|
|
99
|
+
upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[], contentChecksum: string): Promise<void>;
|
|
77
100
|
upsertAnnotationVector(annotationId: AnnotationId, embedding: number[], payload: AnnotationPayload): Promise<void>;
|
|
78
101
|
deleteResourceVectors(resourceId: ResourceId): Promise<void>;
|
|
79
102
|
deleteAnnotationVector(annotationId: AnnotationId): Promise<void>;
|
|
103
|
+
deleteAnnotationVectorsForResource(resourceId: ResourceId): Promise<void>;
|
|
104
|
+
count(): Promise<number>;
|
|
105
|
+
listResourceChecksums(): Promise<Map<string, string | undefined>>;
|
|
106
|
+
listAnnotationIds(): Promise<Set<string>>;
|
|
107
|
+
/** Collect the distinct values of one payload field across a collection. */
|
|
108
|
+
private scrollPayloadField;
|
|
80
109
|
searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
81
110
|
searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
82
111
|
private search;
|
|
@@ -98,10 +127,14 @@ declare class MemoryVectorStore implements VectorStore {
|
|
|
98
127
|
disconnect(): Promise<void>;
|
|
99
128
|
clearAll(): Promise<void>;
|
|
100
129
|
isConnected(): boolean;
|
|
101
|
-
upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void>;
|
|
130
|
+
upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[], contentChecksum: string): Promise<void>;
|
|
102
131
|
upsertAnnotationVector(annotationId: AnnotationId, embedding: number[], payload: AnnotationPayload): Promise<void>;
|
|
103
132
|
deleteResourceVectors(resourceId: ResourceId): Promise<void>;
|
|
104
133
|
deleteAnnotationVector(annotationId: AnnotationId): Promise<void>;
|
|
134
|
+
deleteAnnotationVectorsForResource(resourceId: ResourceId): Promise<void>;
|
|
135
|
+
count(): Promise<number>;
|
|
136
|
+
listResourceChecksums(): Promise<Map<string, string | undefined>>;
|
|
137
|
+
listAnnotationIds(): Promise<Set<string>>;
|
|
105
138
|
searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
106
139
|
searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
107
140
|
private search;
|
|
@@ -111,7 +144,7 @@ declare class MemoryVectorStore implements VectorStore {
|
|
|
111
144
|
/**
|
|
112
145
|
* VectorStore Factory
|
|
113
146
|
*
|
|
114
|
-
* Creates a VectorStore instance based on configuration.
|
|
147
|
+
* Creates a connected VectorStore instance based on configuration.
|
|
115
148
|
*/
|
|
116
149
|
|
|
117
150
|
interface VectorStoreConfig {
|
|
@@ -121,7 +154,6 @@ interface VectorStoreConfig {
|
|
|
121
154
|
dimensions: number;
|
|
122
155
|
}
|
|
123
156
|
declare function createVectorStore(config: VectorStoreConfig): Promise<VectorStore>;
|
|
124
|
-
declare function getVectorStore(): VectorStore | null;
|
|
125
157
|
|
|
126
158
|
/**
|
|
127
159
|
* EmbeddingProvider Interface
|
|
@@ -143,8 +175,8 @@ interface EmbeddingProvider {
|
|
|
143
175
|
/**
|
|
144
176
|
* Voyage AI Embedding Provider
|
|
145
177
|
*
|
|
146
|
-
* Cloud embedding via the Voyage AI API
|
|
147
|
-
*
|
|
178
|
+
* Cloud embedding via the Voyage AI API.
|
|
179
|
+
* Requires a Voyage AI API key (distinct from Anthropic inference keys).
|
|
148
180
|
*/
|
|
149
181
|
|
|
150
182
|
interface VoyageConfig {
|
|
@@ -214,5 +246,5 @@ declare const DEFAULT_CHUNKING_CONFIG: ChunkingConfig;
|
|
|
214
246
|
*/
|
|
215
247
|
declare function chunkText(text: string, config?: ChunkingConfig): string[];
|
|
216
248
|
|
|
217
|
-
export { DEFAULT_CHUNKING_CONFIG, MemoryVectorStore, OllamaEmbeddingProvider, QdrantVectorStore, VoyageEmbeddingProvider, chunkText, createEmbeddingProvider, createVectorStore
|
|
249
|
+
export { DEFAULT_CHUNKING_CONFIG, MemoryVectorStore, OllamaEmbeddingProvider, QdrantVectorStore, VoyageEmbeddingProvider, chunkText, createEmbeddingProvider, createVectorStore };
|
|
218
250
|
export type { AnnotationPayload, ChunkingConfig, EmbeddingChunk, EmbeddingConfig, EmbeddingProvider, OllamaEmbeddingConfig, QdrantConfig, SearchOptions, VectorSearchResult, VectorStore, VectorStoreConfig, VoyageConfig };
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
QdrantVectorStore
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-LCTHZYK4.js";
|
|
4
4
|
import {
|
|
5
5
|
VoyageEmbeddingProvider
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-UM3RNDW4.js";
|
|
7
7
|
import {
|
|
8
8
|
OllamaEmbeddingProvider
|
|
9
9
|
} from "./chunk-LBAPGZOW.js";
|
|
@@ -38,7 +38,7 @@ var MemoryVectorStore = class {
|
|
|
38
38
|
isConnected() {
|
|
39
39
|
return this.connected;
|
|
40
40
|
}
|
|
41
|
-
async upsertResourceVectors(resourceId, chunks) {
|
|
41
|
+
async upsertResourceVectors(resourceId, chunks, contentChecksum) {
|
|
42
42
|
this.resources = this.resources.filter((p) => p.payload.resourceId !== String(resourceId));
|
|
43
43
|
for (const chunk of chunks) {
|
|
44
44
|
this.resources.push({
|
|
@@ -47,7 +47,8 @@ var MemoryVectorStore = class {
|
|
|
47
47
|
payload: {
|
|
48
48
|
resourceId: String(resourceId),
|
|
49
49
|
chunkIndex: chunk.chunkIndex,
|
|
50
|
-
text: chunk.text
|
|
50
|
+
text: chunk.text,
|
|
51
|
+
contentChecksum
|
|
51
52
|
}
|
|
52
53
|
});
|
|
53
54
|
}
|
|
@@ -72,6 +73,28 @@ var MemoryVectorStore = class {
|
|
|
72
73
|
async deleteAnnotationVector(annotationId) {
|
|
73
74
|
this.annotations = this.annotations.filter((p) => p.id !== String(annotationId));
|
|
74
75
|
}
|
|
76
|
+
async deleteAnnotationVectorsForResource(resourceId) {
|
|
77
|
+
this.annotations = this.annotations.filter((p) => p.payload.resourceId !== String(resourceId));
|
|
78
|
+
}
|
|
79
|
+
async count() {
|
|
80
|
+
return this.resources.length + this.annotations.length;
|
|
81
|
+
}
|
|
82
|
+
async listResourceChecksums() {
|
|
83
|
+
const checksums = /* @__PURE__ */ new Map();
|
|
84
|
+
for (const p of this.resources) {
|
|
85
|
+
if (!checksums.has(p.payload.resourceId)) {
|
|
86
|
+
checksums.set(p.payload.resourceId, p.payload.contentChecksum);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return checksums;
|
|
90
|
+
}
|
|
91
|
+
async listAnnotationIds() {
|
|
92
|
+
const ids = /* @__PURE__ */ new Set();
|
|
93
|
+
for (const p of this.annotations) {
|
|
94
|
+
if (p.payload.annotationId) ids.add(p.payload.annotationId);
|
|
95
|
+
}
|
|
96
|
+
return ids;
|
|
97
|
+
}
|
|
75
98
|
async searchResources(embedding, opts) {
|
|
76
99
|
return this.search(this.resources, embedding, opts);
|
|
77
100
|
}
|
|
@@ -98,7 +121,7 @@ var MemoryVectorStore = class {
|
|
|
98
121
|
score: cosineSimilarity(embedding, p.vector)
|
|
99
122
|
}));
|
|
100
123
|
scored.sort((a, b) => b.score - a.score);
|
|
101
|
-
if (opts.scoreThreshold) {
|
|
124
|
+
if (opts.scoreThreshold !== void 0) {
|
|
102
125
|
const threshold = opts.scoreThreshold;
|
|
103
126
|
return scored.filter((s) => s.score >= threshold).slice(0, opts.limit).map((s) => this.toResult(s));
|
|
104
127
|
}
|
|
@@ -117,30 +140,26 @@ var MemoryVectorStore = class {
|
|
|
117
140
|
};
|
|
118
141
|
|
|
119
142
|
// src/store/factory.ts
|
|
120
|
-
var instance = null;
|
|
121
143
|
async function createVectorStore(config) {
|
|
122
|
-
|
|
144
|
+
let store;
|
|
123
145
|
if (config.type === "qdrant") {
|
|
124
|
-
const { QdrantVectorStore: QdrantVectorStore2 } = await import("./qdrant-
|
|
125
|
-
|
|
146
|
+
const { QdrantVectorStore: QdrantVectorStore2 } = await import("./qdrant-SE4WDTDB.js");
|
|
147
|
+
store = new QdrantVectorStore2({
|
|
126
148
|
host: config.host ?? "localhost",
|
|
127
149
|
port: config.port ?? 6333,
|
|
128
150
|
dimensions: config.dimensions
|
|
129
151
|
});
|
|
130
152
|
} else {
|
|
131
|
-
|
|
153
|
+
store = new MemoryVectorStore();
|
|
132
154
|
}
|
|
133
|
-
await
|
|
134
|
-
return
|
|
135
|
-
}
|
|
136
|
-
function getVectorStore() {
|
|
137
|
-
return instance;
|
|
155
|
+
await store.connect();
|
|
156
|
+
return store;
|
|
138
157
|
}
|
|
139
158
|
|
|
140
159
|
// src/embedding/factory.ts
|
|
141
160
|
async function createEmbeddingProvider(config) {
|
|
142
161
|
if (config.type === "voyage") {
|
|
143
|
-
const { VoyageEmbeddingProvider: VoyageEmbeddingProvider2 } = await import("./voyage-
|
|
162
|
+
const { VoyageEmbeddingProvider: VoyageEmbeddingProvider2 } = await import("./voyage-NUSVXNJN.js");
|
|
144
163
|
if (!config.apiKey) throw new Error("apiKey is required for Voyage embedding provider");
|
|
145
164
|
return new VoyageEmbeddingProvider2({
|
|
146
165
|
apiKey: config.apiKey,
|
|
@@ -209,7 +228,6 @@ export {
|
|
|
209
228
|
VoyageEmbeddingProvider,
|
|
210
229
|
chunkText,
|
|
211
230
|
createEmbeddingProvider,
|
|
212
|
-
createVectorStore
|
|
213
|
-
getVectorStore
|
|
231
|
+
createVectorStore
|
|
214
232
|
};
|
|
215
233
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/store/memory.ts","../src/store/factory.ts","../src/embedding/factory.ts","../src/chunking.ts"],"sourcesContent":["/**\n * In-Memory VectorStore Implementation\n *\n * For testing and development without a running Qdrant instance.\n * Uses brute-force cosine similarity search.\n */\n\nimport type { ResourceId, AnnotationId } from '@semiont/core';\nimport type { VectorStore, EmbeddingChunk, AnnotationPayload, VectorSearchResult, SearchOptions } from './interface';\n\ninterface StoredPoint {\n id: string;\n vector: number[];\n payload: {\n resourceId: string;\n annotationId?: string;\n chunkIndex?: number;\n text: string;\n motivation?: string;\n entityTypes?: string[];\n };\n}\n\nfunction cosineSimilarity(a: number[], b: number[]): number {\n let dotProduct = 0;\n let normA = 0;\n let normB = 0;\n for (let i = 0; i < a.length; i++) {\n dotProduct += a[i] * b[i];\n normA += a[i] * a[i];\n normB += b[i] * b[i];\n }\n const denom = Math.sqrt(normA) * Math.sqrt(normB);\n return denom === 0 ? 0 : dotProduct / denom;\n}\n\nexport class MemoryVectorStore implements VectorStore {\n private resources: StoredPoint[] = [];\n private annotations: StoredPoint[] = [];\n private connected = false;\n\n async connect(): Promise<void> {\n this.connected = true;\n }\n\n async disconnect(): Promise<void> {\n this.connected = false;\n }\n\n async clearAll(): Promise<void> {\n this.resources = [];\n this.annotations = [];\n }\n\n isConnected(): boolean {\n return this.connected;\n }\n\n async upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void> {\n // Remove existing vectors for this resource\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n\n for (const chunk of chunks) {\n this.resources.push({\n id: `${resourceId}-${chunk.chunkIndex}`,\n vector: chunk.embedding,\n payload: {\n resourceId: String(resourceId),\n chunkIndex: chunk.chunkIndex,\n text: chunk.text,\n },\n });\n }\n }\n\n async upsertAnnotationVector(\n annotationId: AnnotationId,\n embedding: number[],\n payload: AnnotationPayload\n ): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n this.annotations.push({\n id: String(annotationId),\n vector: embedding,\n payload: {\n annotationId: String(payload.annotationId),\n resourceId: String(payload.resourceId),\n motivation: payload.motivation,\n entityTypes: payload.entityTypes,\n text: payload.exactText,\n },\n });\n }\n\n async deleteResourceVectors(resourceId: ResourceId): Promise<void> {\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n }\n\n async deleteAnnotationVector(annotationId: AnnotationId): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n }\n\n async searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.resources, embedding, opts);\n }\n\n async searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.annotations, embedding, opts);\n }\n\n private search(points: StoredPoint[], embedding: number[], opts: SearchOptions): VectorSearchResult[] {\n let filtered = points;\n\n if (opts.filter) {\n const f = opts.filter;\n filtered = points.filter(p => {\n if (f.resourceId && p.payload.resourceId !== String(f.resourceId)) return false;\n if (f.excludeResourceId && p.payload.resourceId === String(f.excludeResourceId)) return false;\n if (f.motivation && p.payload.motivation !== f.motivation) return false;\n if (f.entityTypes && f.entityTypes.length > 0) {\n const pTypes = p.payload.entityTypes ?? [];\n if (!f.entityTypes.some(t => pTypes.includes(t))) return false;\n }\n return true;\n });\n }\n\n const scored = filtered.map(p => ({\n ...p,\n score: cosineSimilarity(embedding, p.vector),\n }));\n\n scored.sort((a, b) => b.score - a.score);\n\n if (opts.scoreThreshold) {\n const threshold = opts.scoreThreshold;\n return scored\n .filter(s => s.score >= threshold)\n .slice(0, opts.limit)\n .map(s => this.toResult(s));\n }\n\n return scored.slice(0, opts.limit).map(s => this.toResult(s));\n }\n\n private toResult(s: StoredPoint & { score: number }): VectorSearchResult {\n return {\n id: s.id,\n score: s.score,\n resourceId: s.payload.resourceId as ResourceId,\n annotationId: s.payload.annotationId as AnnotationId | undefined,\n text: s.payload.text,\n entityTypes: s.payload.entityTypes,\n };\n }\n}\n","/**\n * VectorStore Factory\n *\n * Creates a VectorStore instance based on configuration.\n */\n\nimport type { VectorStore } from './interface';\nimport { MemoryVectorStore } from './memory';\n\nexport interface VectorStoreConfig {\n type: 'qdrant' | 'memory';\n host?: string;\n port?: number;\n dimensions: number;\n}\n\nlet instance: VectorStore | null = null;\n\nexport async function createVectorStore(config: VectorStoreConfig): Promise<VectorStore> {\n if (instance) return instance;\n\n if (config.type === 'qdrant') {\n const { QdrantVectorStore } = await import('./qdrant');\n instance = new QdrantVectorStore({\n host: config.host ?? 'localhost',\n port: config.port ?? 6333,\n dimensions: config.dimensions,\n });\n } else {\n instance = new MemoryVectorStore();\n }\n\n await instance.connect();\n return instance;\n}\n\nexport function getVectorStore(): VectorStore | null {\n return instance;\n}\n","/**\n * EmbeddingProvider Factory\n */\n\nimport type { EmbeddingProvider } from './interface';\n\nexport interface EmbeddingConfig {\n type: 'voyage' | 'ollama';\n model: string;\n apiKey?: string;\n baseURL?: string;\n endpoint?: string;\n}\n\nexport async function createEmbeddingProvider(config: EmbeddingConfig): Promise<EmbeddingProvider> {\n if (config.type === 'voyage') {\n const { VoyageEmbeddingProvider } = await import('./voyage');\n if (!config.apiKey) throw new Error('apiKey is required for Voyage embedding provider');\n return new VoyageEmbeddingProvider({\n apiKey: config.apiKey,\n model: config.model,\n endpoint: config.endpoint,\n });\n }\n\n if (config.type === 'ollama') {\n const { OllamaEmbeddingProvider } = await import('./ollama');\n return new OllamaEmbeddingProvider({\n model: config.model,\n baseURL: config.baseURL,\n });\n }\n\n throw new Error(`Unknown embedding provider type: ${config.type}`);\n}\n","/**\n * Text Chunking Utilities\n *\n * Splits long text into overlapping chunks for embedding.\n * Each chunk is a passage that fits within the embedding model's context window.\n */\n\nexport interface ChunkingConfig {\n chunkSize: number; // approximate tokens per chunk\n overlap: number; // tokens of overlap between adjacent chunks\n}\n\nexport const DEFAULT_CHUNKING_CONFIG: ChunkingConfig = {\n chunkSize: 512,\n overlap: 64,\n};\n\n/**\n * Rough token count estimate: ~4 characters per token for English text.\n */\nfunction estimateTokens(text: string): number {\n return Math.ceil(text.length / 4);\n}\n\n/**\n * Split text into overlapping chunks.\n *\n * Splits on paragraph boundaries when possible, falling back to sentence\n * boundaries, then word boundaries. Each chunk overlaps with the previous\n * by `overlap` tokens worth of text.\n */\nexport function chunkText(text: string, config: ChunkingConfig = DEFAULT_CHUNKING_CONFIG): string[] {\n if (text.length === 0) return [];\n const totalTokens = estimateTokens(text);\n if (totalTokens <= config.chunkSize) {\n return [text];\n }\n\n const chunkChars = config.chunkSize * 4;\n const overlapChars = config.overlap * 4;\n const chunks: string[] = [];\n let start = 0;\n\n while (start < text.length) {\n let end = Math.min(start + chunkChars, text.length);\n\n // Try to break at a paragraph boundary\n if (end < text.length) {\n const paraBreak = text.lastIndexOf('\\n\\n', end);\n if (paraBreak > start + chunkChars / 2) {\n end = paraBreak;\n } else {\n // Try sentence boundary\n const sentenceBreak = text.lastIndexOf('. ', end);\n if (sentenceBreak > start + chunkChars / 2) {\n end = sentenceBreak + 1;\n } else {\n // Try word boundary\n const wordBreak = text.lastIndexOf(' ', end);\n if (wordBreak > start + chunkChars / 2) {\n end = wordBreak;\n }\n }\n }\n }\n\n chunks.push(text.slice(start, end).trim());\n const nextStart = end - overlapChars;\n start = nextStart > start ? nextStart : end;\n if (start >= text.length) break;\n }\n\n return chunks.filter(c => c.length > 0);\n}\n"],"mappings":";;;;;;;;;;;AAuBA,SAAS,iBAAiB,GAAa,GAAqB;AAC1D,MAAI,aAAa;AACjB,MAAI,QAAQ;AACZ,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,kBAAc,EAAE,CAAC,IAAI,EAAE,CAAC;AACxB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AACnB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AAAA,EACrB;AACA,QAAM,QAAQ,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK;AAChD,SAAO,UAAU,IAAI,IAAI,aAAa;AACxC;AAEO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,YAA2B,CAAC;AAAA,EAC5B,cAA6B,CAAC;AAAA,EAC9B,YAAY;AAAA,EAEpB,MAAM,UAAyB;AAC7B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,aAA4B;AAChC,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,WAA0B;AAC9B,SAAK,YAAY,CAAC;AAClB,SAAK,cAAc,CAAC;AAAA,EACtB;AAAA,EAEA,cAAuB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,sBAAsB,YAAwB,QAAyC;AAE3F,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAEvF,eAAW,SAAS,QAAQ;AAC1B,WAAK,UAAU,KAAK;AAAA,QAClB,IAAI,GAAG,UAAU,IAAI,MAAM,UAAU;AAAA,QACrC,QAAQ,MAAM;AAAA,QACd,SAAS;AAAA,UACP,YAAY,OAAO,UAAU;AAAA,UAC7B,YAAY,MAAM;AAAA,UAClB,MAAM,MAAM;AAAA,QACd;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,uBACJ,cACA,WACA,SACe;AACf,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAC7E,SAAK,YAAY,KAAK;AAAA,MACpB,IAAI,OAAO,YAAY;AAAA,MACvB,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,cAAc,OAAO,QAAQ,YAAY;AAAA,QACzC,YAAY,OAAO,QAAQ,UAAU;AAAA,QACrC,YAAY,QAAQ;AAAA,QACpB,aAAa,QAAQ;AAAA,QACrB,MAAM,QAAQ;AAAA,MAChB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,sBAAsB,YAAuC;AACjE,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAAA,EACzF;AAAA,EAEA,MAAM,uBAAuB,cAA2C;AACtE,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAAA,EAC/E;AAAA,EAEA,MAAM,gBAAgB,WAAqB,MAAoD;AAC7F,WAAO,KAAK,OAAO,KAAK,WAAW,WAAW,IAAI;AAAA,EACpD;AAAA,EAEA,MAAM,kBAAkB,WAAqB,MAAoD;AAC/F,WAAO,KAAK,OAAO,KAAK,aAAa,WAAW,IAAI;AAAA,EACtD;AAAA,EAEQ,OAAO,QAAuB,WAAqB,MAA2C;AACpG,QAAI,WAAW;AAEf,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,KAAK;AACf,iBAAW,OAAO,OAAO,OAAK;AAC5B,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,OAAO,EAAE,UAAU,EAAG,QAAO;AAC1E,YAAI,EAAE,qBAAqB,EAAE,QAAQ,eAAe,OAAO,EAAE,iBAAiB,EAAG,QAAO;AACxF,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,EAAE,WAAY,QAAO;AAClE,YAAI,EAAE,eAAe,EAAE,YAAY,SAAS,GAAG;AAC7C,gBAAM,SAAS,EAAE,QAAQ,eAAe,CAAC;AACzC,cAAI,CAAC,EAAE,YAAY,KAAK,OAAK,OAAO,SAAS,CAAC,CAAC,EAAG,QAAO;AAAA,QAC3D;AACA,eAAO;AAAA,MACT,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,SAAS,IAAI,QAAM;AAAA,MAChC,GAAG;AAAA,MACH,OAAO,iBAAiB,WAAW,EAAE,MAAM;AAAA,IAC7C,EAAE;AAEF,WAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAEvC,QAAI,KAAK,gBAAgB;AACvB,YAAM,YAAY,KAAK;AACvB,aAAO,OACJ,OAAO,OAAK,EAAE,SAAS,SAAS,EAChC,MAAM,GAAG,KAAK,KAAK,EACnB,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,IAC9B;AAEA,WAAO,OAAO,MAAM,GAAG,KAAK,KAAK,EAAE,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,EAC9D;AAAA,EAEQ,SAAS,GAAwD;AACvE,WAAO;AAAA,MACL,IAAI,EAAE;AAAA,MACN,OAAO,EAAE;AAAA,MACT,YAAY,EAAE,QAAQ;AAAA,MACtB,cAAc,EAAE,QAAQ;AAAA,MACxB,MAAM,EAAE,QAAQ;AAAA,MAChB,aAAa,EAAE,QAAQ;AAAA,IACzB;AAAA,EACF;AACF;;;AC3IA,IAAI,WAA+B;AAEnC,eAAsB,kBAAkB,QAAiD;AACvF,MAAI,SAAU,QAAO;AAErB,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,mBAAAA,mBAAkB,IAAI,MAAM,OAAO,sBAAU;AACrD,eAAW,IAAIA,mBAAkB;AAAA,MAC/B,MAAM,OAAO,QAAQ;AAAA,MACrB,MAAM,OAAO,QAAQ;AAAA,MACrB,YAAY,OAAO;AAAA,IACrB,CAAC;AAAA,EACH,OAAO;AACL,eAAW,IAAI,kBAAkB;AAAA,EACnC;AAEA,QAAM,SAAS,QAAQ;AACvB,SAAO;AACT;AAEO,SAAS,iBAAqC;AACnD,SAAO;AACT;;;ACxBA,eAAsB,wBAAwB,QAAqD;AACjG,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,QAAI,CAAC,OAAO,OAAQ,OAAM,IAAI,MAAM,kDAAkD;AACtF,WAAO,IAAIA,yBAAwB;AAAA,MACjC,QAAQ,OAAO;AAAA,MACf,OAAO,OAAO;AAAA,MACd,UAAU,OAAO;AAAA,IACnB,CAAC;AAAA,EACH;AAEA,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,WAAO,IAAIA,yBAAwB;AAAA,MACjC,OAAO,OAAO;AAAA,MACd,SAAS,OAAO;AAAA,IAClB,CAAC;AAAA,EACH;AAEA,QAAM,IAAI,MAAM,oCAAoC,OAAO,IAAI,EAAE;AACnE;;;ACtBO,IAAM,0BAA0C;AAAA,EACrD,WAAW;AAAA,EACX,SAAS;AACX;AAKA,SAAS,eAAe,MAAsB;AAC5C,SAAO,KAAK,KAAK,KAAK,SAAS,CAAC;AAClC;AASO,SAAS,UAAU,MAAc,SAAyB,yBAAmC;AAClG,MAAI,KAAK,WAAW,EAAG,QAAO,CAAC;AAC/B,QAAM,cAAc,eAAe,IAAI;AACvC,MAAI,eAAe,OAAO,WAAW;AACnC,WAAO,CAAC,IAAI;AAAA,EACd;AAEA,QAAM,aAAa,OAAO,YAAY;AACtC,QAAM,eAAe,OAAO,UAAU;AACtC,QAAM,SAAmB,CAAC;AAC1B,MAAI,QAAQ;AAEZ,SAAO,QAAQ,KAAK,QAAQ;AAC1B,QAAI,MAAM,KAAK,IAAI,QAAQ,YAAY,KAAK,MAAM;AAGlD,QAAI,MAAM,KAAK,QAAQ;AACrB,YAAM,YAAY,KAAK,YAAY,QAAQ,GAAG;AAC9C,UAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,cAAM;AAAA,MACR,OAAO;AAEL,cAAM,gBAAgB,KAAK,YAAY,MAAM,GAAG;AAChD,YAAI,gBAAgB,QAAQ,aAAa,GAAG;AAC1C,gBAAM,gBAAgB;AAAA,QACxB,OAAO;AAEL,gBAAM,YAAY,KAAK,YAAY,KAAK,GAAG;AAC3C,cAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,KAAK,MAAM,OAAO,GAAG,EAAE,KAAK,CAAC;AACzC,UAAM,YAAY,MAAM;AACxB,YAAQ,YAAY,QAAQ,YAAY;AACxC,QAAI,SAAS,KAAK,OAAQ;AAAA,EAC5B;AAEA,SAAO,OAAO,OAAO,OAAK,EAAE,SAAS,CAAC;AACxC;","names":["QdrantVectorStore","VoyageEmbeddingProvider","OllamaEmbeddingProvider"]}
|
|
1
|
+
{"version":3,"sources":["../src/store/memory.ts","../src/store/factory.ts","../src/embedding/factory.ts","../src/chunking.ts"],"sourcesContent":["/**\n * In-Memory VectorStore Implementation\n *\n * For testing and development without a running Qdrant instance.\n * Uses brute-force cosine similarity search.\n */\n\nimport type { ResourceId, AnnotationId } from '@semiont/core';\nimport type { VectorStore, EmbeddingChunk, AnnotationPayload, VectorSearchResult, SearchOptions } from './interface';\n\ninterface StoredPoint {\n id: string;\n vector: number[];\n payload: {\n resourceId: string;\n annotationId?: string;\n chunkIndex?: number;\n text: string;\n contentChecksum?: string;\n motivation?: string;\n entityTypes?: string[];\n };\n}\n\nfunction cosineSimilarity(a: number[], b: number[]): number {\n let dotProduct = 0;\n let normA = 0;\n let normB = 0;\n for (let i = 0; i < a.length; i++) {\n dotProduct += a[i] * b[i];\n normA += a[i] * a[i];\n normB += b[i] * b[i];\n }\n const denom = Math.sqrt(normA) * Math.sqrt(normB);\n return denom === 0 ? 0 : dotProduct / denom;\n}\n\nexport class MemoryVectorStore implements VectorStore {\n private resources: StoredPoint[] = [];\n private annotations: StoredPoint[] = [];\n private connected = false;\n\n async connect(): Promise<void> {\n this.connected = true;\n }\n\n async disconnect(): Promise<void> {\n this.connected = false;\n }\n\n async clearAll(): Promise<void> {\n this.resources = [];\n this.annotations = [];\n }\n\n isConnected(): boolean {\n return this.connected;\n }\n\n async upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[], contentChecksum: string): Promise<void> {\n // Remove existing vectors for this resource\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n\n for (const chunk of chunks) {\n this.resources.push({\n id: `${resourceId}-${chunk.chunkIndex}`,\n vector: chunk.embedding,\n payload: {\n resourceId: String(resourceId),\n chunkIndex: chunk.chunkIndex,\n text: chunk.text,\n contentChecksum,\n },\n });\n }\n }\n\n async upsertAnnotationVector(\n annotationId: AnnotationId,\n embedding: number[],\n payload: AnnotationPayload\n ): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n this.annotations.push({\n id: String(annotationId),\n vector: embedding,\n payload: {\n annotationId: String(payload.annotationId),\n resourceId: String(payload.resourceId),\n motivation: payload.motivation,\n entityTypes: payload.entityTypes,\n text: payload.exactText,\n },\n });\n }\n\n async deleteResourceVectors(resourceId: ResourceId): Promise<void> {\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n }\n\n async deleteAnnotationVector(annotationId: AnnotationId): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n }\n\n async deleteAnnotationVectorsForResource(resourceId: ResourceId): Promise<void> {\n this.annotations = this.annotations.filter(p => p.payload.resourceId !== String(resourceId));\n }\n\n async count(): Promise<number> {\n return this.resources.length + this.annotations.length;\n }\n\n async listResourceChecksums(): Promise<Map<string, string | undefined>> {\n const checksums = new Map<string, string | undefined>();\n for (const p of this.resources) {\n if (!checksums.has(p.payload.resourceId)) {\n checksums.set(p.payload.resourceId, p.payload.contentChecksum);\n }\n }\n return checksums;\n }\n\n async listAnnotationIds(): Promise<Set<string>> {\n const ids = new Set<string>();\n for (const p of this.annotations) {\n if (p.payload.annotationId) ids.add(p.payload.annotationId);\n }\n return ids;\n }\n\n async searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.resources, embedding, opts);\n }\n\n async searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.annotations, embedding, opts);\n }\n\n private search(points: StoredPoint[], embedding: number[], opts: SearchOptions): VectorSearchResult[] {\n let filtered = points;\n\n if (opts.filter) {\n const f = opts.filter;\n filtered = points.filter(p => {\n if (f.resourceId && p.payload.resourceId !== String(f.resourceId)) return false;\n if (f.excludeResourceId && p.payload.resourceId === String(f.excludeResourceId)) return false;\n if (f.motivation && p.payload.motivation !== f.motivation) return false;\n if (f.entityTypes && f.entityTypes.length > 0) {\n const pTypes = p.payload.entityTypes ?? [];\n if (!f.entityTypes.some(t => pTypes.includes(t))) return false;\n }\n return true;\n });\n }\n\n const scored = filtered.map(p => ({\n ...p,\n score: cosineSimilarity(embedding, p.vector),\n }));\n\n scored.sort((a, b) => b.score - a.score);\n\n if (opts.scoreThreshold !== undefined) {\n const threshold = opts.scoreThreshold;\n return scored\n .filter(s => s.score >= threshold)\n .slice(0, opts.limit)\n .map(s => this.toResult(s));\n }\n\n return scored.slice(0, opts.limit).map(s => this.toResult(s));\n }\n\n private toResult(s: StoredPoint & { score: number }): VectorSearchResult {\n return {\n id: s.id,\n score: s.score,\n resourceId: s.payload.resourceId as ResourceId,\n annotationId: s.payload.annotationId as AnnotationId | undefined,\n text: s.payload.text,\n entityTypes: s.payload.entityTypes,\n };\n }\n}\n","/**\n * VectorStore Factory\n *\n * Creates a connected VectorStore instance based on configuration.\n */\n\nimport type { VectorStore } from './interface';\nimport { MemoryVectorStore } from './memory';\n\nexport interface VectorStoreConfig {\n type: 'qdrant' | 'memory';\n host?: string;\n port?: number;\n dimensions: number;\n}\n\nexport async function createVectorStore(config: VectorStoreConfig): Promise<VectorStore> {\n let store: VectorStore;\n\n if (config.type === 'qdrant') {\n const { QdrantVectorStore } = await import('./qdrant');\n store = new QdrantVectorStore({\n host: config.host ?? 'localhost',\n port: config.port ?? 6333,\n dimensions: config.dimensions,\n });\n } else {\n store = new MemoryVectorStore();\n }\n\n await store.connect();\n return store;\n}\n","/**\n * EmbeddingProvider Factory\n */\n\nimport type { EmbeddingProvider } from './interface';\n\nexport interface EmbeddingConfig {\n type: 'voyage' | 'ollama';\n model: string;\n apiKey?: string;\n baseURL?: string;\n endpoint?: string;\n}\n\nexport async function createEmbeddingProvider(config: EmbeddingConfig): Promise<EmbeddingProvider> {\n if (config.type === 'voyage') {\n const { VoyageEmbeddingProvider } = await import('./voyage');\n if (!config.apiKey) throw new Error('apiKey is required for Voyage embedding provider');\n return new VoyageEmbeddingProvider({\n apiKey: config.apiKey,\n model: config.model,\n endpoint: config.endpoint,\n });\n }\n\n if (config.type === 'ollama') {\n const { OllamaEmbeddingProvider } = await import('./ollama');\n return new OllamaEmbeddingProvider({\n model: config.model,\n baseURL: config.baseURL,\n });\n }\n\n throw new Error(`Unknown embedding provider type: ${config.type}`);\n}\n","/**\n * Text Chunking Utilities\n *\n * Splits long text into overlapping chunks for embedding.\n * Each chunk is a passage that fits within the embedding model's context window.\n */\n\nexport interface ChunkingConfig {\n chunkSize: number; // approximate tokens per chunk\n overlap: number; // tokens of overlap between adjacent chunks\n}\n\nexport const DEFAULT_CHUNKING_CONFIG: ChunkingConfig = {\n chunkSize: 512,\n overlap: 64,\n};\n\n/**\n * Rough token count estimate: ~4 characters per token for English text.\n */\nfunction estimateTokens(text: string): number {\n return Math.ceil(text.length / 4);\n}\n\n/**\n * Split text into overlapping chunks.\n *\n * Splits on paragraph boundaries when possible, falling back to sentence\n * boundaries, then word boundaries. Each chunk overlaps with the previous\n * by `overlap` tokens worth of text.\n */\nexport function chunkText(text: string, config: ChunkingConfig = DEFAULT_CHUNKING_CONFIG): string[] {\n if (text.length === 0) return [];\n const totalTokens = estimateTokens(text);\n if (totalTokens <= config.chunkSize) {\n return [text];\n }\n\n const chunkChars = config.chunkSize * 4;\n const overlapChars = config.overlap * 4;\n const chunks: string[] = [];\n let start = 0;\n\n while (start < text.length) {\n let end = Math.min(start + chunkChars, text.length);\n\n // Try to break at a paragraph boundary\n if (end < text.length) {\n const paraBreak = text.lastIndexOf('\\n\\n', end);\n if (paraBreak > start + chunkChars / 2) {\n end = paraBreak;\n } else {\n // Try sentence boundary\n const sentenceBreak = text.lastIndexOf('. ', end);\n if (sentenceBreak > start + chunkChars / 2) {\n end = sentenceBreak + 1;\n } else {\n // Try word boundary\n const wordBreak = text.lastIndexOf(' ', end);\n if (wordBreak > start + chunkChars / 2) {\n end = wordBreak;\n }\n }\n }\n }\n\n chunks.push(text.slice(start, end).trim());\n const nextStart = end - overlapChars;\n start = nextStart > start ? nextStart : end;\n if (start >= text.length) break;\n }\n\n return chunks.filter(c => c.length > 0);\n}\n"],"mappings":";;;;;;;;;;;AAwBA,SAAS,iBAAiB,GAAa,GAAqB;AAC1D,MAAI,aAAa;AACjB,MAAI,QAAQ;AACZ,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,kBAAc,EAAE,CAAC,IAAI,EAAE,CAAC;AACxB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AACnB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AAAA,EACrB;AACA,QAAM,QAAQ,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK;AAChD,SAAO,UAAU,IAAI,IAAI,aAAa;AACxC;AAEO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,YAA2B,CAAC;AAAA,EAC5B,cAA6B,CAAC;AAAA,EAC9B,YAAY;AAAA,EAEpB,MAAM,UAAyB;AAC7B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,aAA4B;AAChC,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,WAA0B;AAC9B,SAAK,YAAY,CAAC;AAClB,SAAK,cAAc,CAAC;AAAA,EACtB;AAAA,EAEA,cAAuB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,sBAAsB,YAAwB,QAA0B,iBAAwC;AAEpH,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAEvF,eAAW,SAAS,QAAQ;AAC1B,WAAK,UAAU,KAAK;AAAA,QAClB,IAAI,GAAG,UAAU,IAAI,MAAM,UAAU;AAAA,QACrC,QAAQ,MAAM;AAAA,QACd,SAAS;AAAA,UACP,YAAY,OAAO,UAAU;AAAA,UAC7B,YAAY,MAAM;AAAA,UAClB,MAAM,MAAM;AAAA,UACZ;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,uBACJ,cACA,WACA,SACe;AACf,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAC7E,SAAK,YAAY,KAAK;AAAA,MACpB,IAAI,OAAO,YAAY;AAAA,MACvB,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,cAAc,OAAO,QAAQ,YAAY;AAAA,QACzC,YAAY,OAAO,QAAQ,UAAU;AAAA,QACrC,YAAY,QAAQ;AAAA,QACpB,aAAa,QAAQ;AAAA,QACrB,MAAM,QAAQ;AAAA,MAChB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,sBAAsB,YAAuC;AACjE,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAAA,EACzF;AAAA,EAEA,MAAM,uBAAuB,cAA2C;AACtE,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAAA,EAC/E;AAAA,EAEA,MAAM,mCAAmC,YAAuC;AAC9E,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAAA,EAC7F;AAAA,EAEA,MAAM,QAAyB;AAC7B,WAAO,KAAK,UAAU,SAAS,KAAK,YAAY;AAAA,EAClD;AAAA,EAEA,MAAM,wBAAkE;AACtE,UAAM,YAAY,oBAAI,IAAgC;AACtD,eAAW,KAAK,KAAK,WAAW;AAC9B,UAAI,CAAC,UAAU,IAAI,EAAE,QAAQ,UAAU,GAAG;AACxC,kBAAU,IAAI,EAAE,QAAQ,YAAY,EAAE,QAAQ,eAAe;AAAA,MAC/D;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,oBAA0C;AAC9C,UAAM,MAAM,oBAAI,IAAY;AAC5B,eAAW,KAAK,KAAK,aAAa;AAChC,UAAI,EAAE,QAAQ,aAAc,KAAI,IAAI,EAAE,QAAQ,YAAY;AAAA,IAC5D;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,gBAAgB,WAAqB,MAAoD;AAC7F,WAAO,KAAK,OAAO,KAAK,WAAW,WAAW,IAAI;AAAA,EACpD;AAAA,EAEA,MAAM,kBAAkB,WAAqB,MAAoD;AAC/F,WAAO,KAAK,OAAO,KAAK,aAAa,WAAW,IAAI;AAAA,EACtD;AAAA,EAEQ,OAAO,QAAuB,WAAqB,MAA2C;AACpG,QAAI,WAAW;AAEf,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,KAAK;AACf,iBAAW,OAAO,OAAO,OAAK;AAC5B,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,OAAO,EAAE,UAAU,EAAG,QAAO;AAC1E,YAAI,EAAE,qBAAqB,EAAE,QAAQ,eAAe,OAAO,EAAE,iBAAiB,EAAG,QAAO;AACxF,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,EAAE,WAAY,QAAO;AAClE,YAAI,EAAE,eAAe,EAAE,YAAY,SAAS,GAAG;AAC7C,gBAAM,SAAS,EAAE,QAAQ,eAAe,CAAC;AACzC,cAAI,CAAC,EAAE,YAAY,KAAK,OAAK,OAAO,SAAS,CAAC,CAAC,EAAG,QAAO;AAAA,QAC3D;AACA,eAAO;AAAA,MACT,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,SAAS,IAAI,QAAM;AAAA,MAChC,GAAG;AAAA,MACH,OAAO,iBAAiB,WAAW,EAAE,MAAM;AAAA,IAC7C,EAAE;AAEF,WAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAEvC,QAAI,KAAK,mBAAmB,QAAW;AACrC,YAAM,YAAY,KAAK;AACvB,aAAO,OACJ,OAAO,OAAK,EAAE,SAAS,SAAS,EAChC,MAAM,GAAG,KAAK,KAAK,EACnB,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,IAC9B;AAEA,WAAO,OAAO,MAAM,GAAG,KAAK,KAAK,EAAE,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,EAC9D;AAAA,EAEQ,SAAS,GAAwD;AACvE,WAAO;AAAA,MACL,IAAI,EAAE;AAAA,MACN,OAAO,EAAE;AAAA,MACT,YAAY,EAAE,QAAQ;AAAA,MACtB,cAAc,EAAE,QAAQ;AAAA,MACxB,MAAM,EAAE,QAAQ;AAAA,MAChB,aAAa,EAAE,QAAQ;AAAA,IACzB;AAAA,EACF;AACF;;;ACvKA,eAAsB,kBAAkB,QAAiD;AACvF,MAAI;AAEJ,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,mBAAAA,mBAAkB,IAAI,MAAM,OAAO,sBAAU;AACrD,YAAQ,IAAIA,mBAAkB;AAAA,MAC5B,MAAM,OAAO,QAAQ;AAAA,MACrB,MAAM,OAAO,QAAQ;AAAA,MACrB,YAAY,OAAO;AAAA,IACrB,CAAC;AAAA,EACH,OAAO;AACL,YAAQ,IAAI,kBAAkB;AAAA,EAChC;AAEA,QAAM,MAAM,QAAQ;AACpB,SAAO;AACT;;;AClBA,eAAsB,wBAAwB,QAAqD;AACjG,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,QAAI,CAAC,OAAO,OAAQ,OAAM,IAAI,MAAM,kDAAkD;AACtF,WAAO,IAAIA,yBAAwB;AAAA,MACjC,QAAQ,OAAO;AAAA,MACf,OAAO,OAAO;AAAA,MACd,UAAU,OAAO;AAAA,IACnB,CAAC;AAAA,EACH;AAEA,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,WAAO,IAAIA,yBAAwB;AAAA,MACjC,OAAO,OAAO;AAAA,MACd,SAAS,OAAO;AAAA,IAClB,CAAC;AAAA,EACH;AAEA,QAAM,IAAI,MAAM,oCAAoC,OAAO,IAAI,EAAE;AACnE;;;ACtBO,IAAM,0BAA0C;AAAA,EACrD,WAAW;AAAA,EACX,SAAS;AACX;AAKA,SAAS,eAAe,MAAsB;AAC5C,SAAO,KAAK,KAAK,KAAK,SAAS,CAAC;AAClC;AASO,SAAS,UAAU,MAAc,SAAyB,yBAAmC;AAClG,MAAI,KAAK,WAAW,EAAG,QAAO,CAAC;AAC/B,QAAM,cAAc,eAAe,IAAI;AACvC,MAAI,eAAe,OAAO,WAAW;AACnC,WAAO,CAAC,IAAI;AAAA,EACd;AAEA,QAAM,aAAa,OAAO,YAAY;AACtC,QAAM,eAAe,OAAO,UAAU;AACtC,QAAM,SAAmB,CAAC;AAC1B,MAAI,QAAQ;AAEZ,SAAO,QAAQ,KAAK,QAAQ;AAC1B,QAAI,MAAM,KAAK,IAAI,QAAQ,YAAY,KAAK,MAAM;AAGlD,QAAI,MAAM,KAAK,QAAQ;AACrB,YAAM,YAAY,KAAK,YAAY,QAAQ,GAAG;AAC9C,UAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,cAAM;AAAA,MACR,OAAO;AAEL,cAAM,gBAAgB,KAAK,YAAY,MAAM,GAAG;AAChD,YAAI,gBAAgB,QAAQ,aAAa,GAAG;AAC1C,gBAAM,gBAAgB;AAAA,QACxB,OAAO;AAEL,gBAAM,YAAY,KAAK,YAAY,KAAK,GAAG;AAC3C,cAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,KAAK,MAAM,OAAO,GAAG,EAAE,KAAK,CAAC;AACzC,UAAM,YAAY,MAAM;AACxB,YAAQ,YAAY,QAAQ,YAAY;AACxC,QAAI,SAAS,KAAK,OAAQ;AAAA,EAC5B;AAEA,SAAO,OAAO,OAAO,OAAK,EAAE,SAAS,CAAC;AACxC;","names":["QdrantVectorStore","VoyageEmbeddingProvider","OllamaEmbeddingProvider"]}
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/embedding/voyage.ts"],"sourcesContent":["/**\n * Voyage AI Embedding Provider\n *\n * Cloud embedding via the Voyage AI API (partner of Anthropic).\n * Uses the same API key as Anthropic inference.\n */\n\nimport type { EmbeddingProvider } from './interface';\n\nexport interface VoyageConfig {\n apiKey: string;\n model: string;\n endpoint?: string;\n}\n\nconst VOYAGE_DIMENSIONS: Record<string, number> = {\n 'voyage-3': 1024,\n 'voyage-3-lite': 512,\n 'voyage-code-3': 1024,\n 'voyage-finance-2': 1024,\n 'voyage-law-2': 1024,\n};\n\nexport class VoyageEmbeddingProvider implements EmbeddingProvider {\n private config: VoyageConfig;\n\n constructor(config: VoyageConfig) {\n this.config = config;\n }\n\n async embed(text: string): Promise<number[]> {\n const results = await this.embedBatch([text]);\n return results[0];\n }\n\n async embedBatch(texts: string[]): Promise<number[][]> {\n const endpoint = this.config.endpoint ?? 'https://api.voyageai.com/v1/embeddings';\n\n const response = await fetch(endpoint, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'Authorization': `Bearer ${this.config.apiKey}`,\n },\n body: JSON.stringify({\n model: this.config.model,\n input: texts,\n }),\n });\n\n if (!response.ok) {\n const body = await response.text();\n throw new Error(`Voyage API error ${response.status}: ${body}`);\n }\n\n const json = await response.json() as { data: Array<{ embedding: number[] }> };\n return json.data.map(d => d.embedding);\n }\n\n dimensions(): number {\n return VOYAGE_DIMENSIONS[this.config.model] ?? 1024;\n }\n\n model(): string {\n return this.config.model;\n }\n}\n"],"mappings":";AAeA,IAAM,oBAA4C;AAAA,EAChD,YAAY;AAAA,EACZ,iBAAiB;AAAA,EACjB,iBAAiB;AAAA,EACjB,oBAAoB;AAAA,EACpB,gBAAgB;AAClB;AAEO,IAAM,0BAAN,MAA2D;AAAA,EACxD;AAAA,EAER,YAAY,QAAsB;AAChC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,MAAM,MAAiC;AAC3C,UAAM,UAAU,MAAM,KAAK,WAAW,CAAC,IAAI,CAAC;AAC5C,WAAO,QAAQ,CAAC;AAAA,EAClB;AAAA,EAEA,MAAM,WAAW,OAAsC;AACrD,UAAM,WAAW,KAAK,OAAO,YAAY;AAEzC,UAAM,WAAW,MAAM,MAAM,UAAU;AAAA,MACrC,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,iBAAiB,UAAU,KAAK,OAAO,MAAM;AAAA,MAC/C;AAAA,MACA,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK,OAAO;AAAA,QACnB,OAAO;AAAA,MACT,CAAC;AAAA,IACH,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,YAAM,IAAI,MAAM,oBAAoB,SAAS,MAAM,KAAK,IAAI,EAAE;AAAA,IAChE;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,WAAO,KAAK,KAAK,IAAI,OAAK,EAAE,SAAS;AAAA,EACvC;AAAA,EAEA,aAAqB;AACnB,WAAO,kBAAkB,KAAK,OAAO,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,QAAgB;AACd,WAAO,KAAK,OAAO;AAAA,EACrB;AACF;","names":[]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/store/qdrant.ts"],"sourcesContent":["/**\n * Qdrant VectorStore Implementation\n *\n * Uses the Qdrant REST API via @qdrant/js-client-rest.\n * Manages two collections: 'resources' and 'annotations'.\n */\n\nimport { createHash } from 'crypto';\nimport type { ResourceId, AnnotationId } from '@semiont/core';\nimport type { VectorStore, EmbeddingChunk, AnnotationPayload, VectorSearchResult, SearchOptions } from './interface';\n\n/**\n * Generate a deterministic UUID v5-style ID from an arbitrary string.\n * Qdrant requires point IDs to be UUIDs or unsigned integers.\n */\nfunction toQdrantId(input: string): string {\n const hex = createHash('md5').update(input).digest('hex');\n return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;\n}\n\nexport interface QdrantConfig {\n host: string;\n port: number;\n dimensions: number;\n}\n\nexport class QdrantVectorStore implements VectorStore {\n private client: any = null;\n private connected = false;\n private config: QdrantConfig;\n\n constructor(config: QdrantConfig) {\n this.config = config;\n }\n\n async connect(): Promise<void> {\n const { QdrantClient } = await import('@qdrant/js-client-rest');\n this.client = new QdrantClient({\n host: this.config.host,\n port: this.config.port,\n });\n\n // Ensure collections exist\n await this.ensureCollection('resources', this.config.dimensions);\n await this.ensureCollection('annotations', this.config.dimensions);\n this.connected = true;\n }\n\n async disconnect(): Promise<void> {\n this.client = null;\n this.connected = false;\n }\n\n async clearAll(): Promise<void> {\n try { await this.client.deleteCollection('resources'); } catch { /* may not exist */ }\n try { await this.client.deleteCollection('annotations'); } catch { /* may not exist */ }\n await this.ensureCollection('resources', this.config.dimensions);\n await this.ensureCollection('annotations', this.config.dimensions);\n }\n\n isConnected(): boolean {\n return this.connected;\n }\n\n private async ensureCollection(name: string, dimensions: number): Promise<void> {\n try {\n await this.client.getCollection(name);\n } catch {\n await this.client.createCollection(name, {\n vectors: { size: dimensions, distance: 'Cosine' },\n });\n }\n }\n\n async upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void> {\n if (chunks.length === 0) return;\n\n const points = chunks.map((chunk) => ({\n id: toQdrantId(`${resourceId}-${chunk.chunkIndex}`),\n vector: chunk.embedding,\n payload: {\n resourceId: String(resourceId),\n chunkIndex: chunk.chunkIndex,\n text: chunk.text,\n },\n }));\n\n await this.client.upsert('resources', { points });\n }\n\n async upsertAnnotationVector(\n annotationId: AnnotationId,\n embedding: number[],\n payload: AnnotationPayload\n ): Promise<void> {\n await this.client.upsert('annotations', {\n points: [{\n id: toQdrantId(String(annotationId)),\n vector: embedding,\n payload: {\n annotationId: String(payload.annotationId),\n resourceId: String(payload.resourceId),\n motivation: payload.motivation,\n entityTypes: payload.entityTypes,\n text: payload.exactText,\n },\n }],\n });\n }\n\n async deleteResourceVectors(resourceId: ResourceId): Promise<void> {\n await this.client.delete('resources', {\n filter: {\n must: [{ key: 'resourceId', match: { value: String(resourceId) } }],\n },\n });\n }\n\n async deleteAnnotationVector(annotationId: AnnotationId): Promise<void> {\n await this.client.delete('annotations', {\n points: [toQdrantId(String(annotationId))],\n });\n }\n\n async searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search('resources', embedding, opts);\n }\n\n async searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search('annotations', embedding, opts);\n }\n\n private async search(collection: string, embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n const filter = this.buildFilter(opts.filter);\n\n const results = await this.client.search(collection, {\n vector: embedding,\n limit: opts.limit,\n score_threshold: opts.scoreThreshold,\n filter: filter || undefined,\n with_payload: true,\n });\n\n return results.map((r: any) => ({\n id: String(r.id),\n score: r.score,\n resourceId: r.payload.resourceId as ResourceId,\n annotationId: r.payload.annotationId as AnnotationId | undefined,\n text: r.payload.text as string,\n entityTypes: r.payload.entityTypes as string[] | undefined,\n }));\n }\n\n private buildFilter(filter?: SearchOptions['filter']): any | null {\n if (!filter) return null;\n\n const must: any[] = [];\n\n if (filter.entityTypes && filter.entityTypes.length > 0) {\n // any-of: match payloads whose `entityTypes` array contains at least one\n // of the requested types. Matches the memory store's `some(t => ...)`\n // semantics; pushing one `must` clause per type would mean all-of.\n must.push({ key: 'entityTypes', match: { any: filter.entityTypes } });\n }\n\n if (filter.resourceId) {\n must.push({ key: 'resourceId', match: { value: String(filter.resourceId) } });\n }\n\n if (filter.motivation) {\n must.push({ key: 'motivation', match: { value: filter.motivation } });\n }\n\n const must_not: any[] = [];\n\n if (filter.excludeResourceId) {\n must_not.push({ key: 'resourceId', match: { value: String(filter.excludeResourceId) } });\n }\n\n if (must.length === 0 && must_not.length === 0) return null;\n\n return {\n ...(must.length > 0 ? { must } : {}),\n ...(must_not.length > 0 ? { must_not } : {}),\n };\n }\n}\n"],"mappings":";AAOA,SAAS,kBAAkB;AAQ3B,SAAS,WAAW,OAAuB;AACzC,QAAM,MAAM,WAAW,KAAK,EAAE,OAAO,KAAK,EAAE,OAAO,KAAK;AACxD,SAAO,GAAG,IAAI,MAAM,GAAG,CAAC,CAAC,IAAI,IAAI,MAAM,GAAG,EAAE,CAAC,IAAI,IAAI,MAAM,IAAI,EAAE,CAAC,IAAI,IAAI,MAAM,IAAI,EAAE,CAAC,IAAI,IAAI,MAAM,IAAI,EAAE,CAAC;AAC9G;AAQO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,SAAc;AAAA,EACd,YAAY;AAAA,EACZ;AAAA,EAER,YAAY,QAAsB;AAChC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,UAAyB;AAC7B,UAAM,EAAE,aAAa,IAAI,MAAM,OAAO,wBAAwB;AAC9D,SAAK,SAAS,IAAI,aAAa;AAAA,MAC7B,MAAM,KAAK,OAAO;AAAA,MAClB,MAAM,KAAK,OAAO;AAAA,IACpB,CAAC;AAGD,UAAM,KAAK,iBAAiB,aAAa,KAAK,OAAO,UAAU;AAC/D,UAAM,KAAK,iBAAiB,eAAe,KAAK,OAAO,UAAU;AACjE,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,aAA4B;AAChC,SAAK,SAAS;AACd,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,WAA0B;AAC9B,QAAI;AAAE,YAAM,KAAK,OAAO,iBAAiB,WAAW;AAAA,IAAG,QAAQ;AAAA,IAAsB;AACrF,QAAI;AAAE,YAAM,KAAK,OAAO,iBAAiB,aAAa;AAAA,IAAG,QAAQ;AAAA,IAAsB;AACvF,UAAM,KAAK,iBAAiB,aAAa,KAAK,OAAO,UAAU;AAC/D,UAAM,KAAK,iBAAiB,eAAe,KAAK,OAAO,UAAU;AAAA,EACnE;AAAA,EAEA,cAAuB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAc,iBAAiB,MAAc,YAAmC;AAC9E,QAAI;AACF,YAAM,KAAK,OAAO,cAAc,IAAI;AAAA,IACtC,QAAQ;AACN,YAAM,KAAK,OAAO,iBAAiB,MAAM;AAAA,QACvC,SAAS,EAAE,MAAM,YAAY,UAAU,SAAS;AAAA,MAClD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,sBAAsB,YAAwB,QAAyC;AAC3F,QAAI,OAAO,WAAW,EAAG;AAEzB,UAAM,SAAS,OAAO,IAAI,CAAC,WAAW;AAAA,MACpC,IAAI,WAAW,GAAG,UAAU,IAAI,MAAM,UAAU,EAAE;AAAA,MAClD,QAAQ,MAAM;AAAA,MACd,SAAS;AAAA,QACP,YAAY,OAAO,UAAU;AAAA,QAC7B,YAAY,MAAM;AAAA,QAClB,MAAM,MAAM;AAAA,MACd;AAAA,IACF,EAAE;AAEF,UAAM,KAAK,OAAO,OAAO,aAAa,EAAE,OAAO,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,uBACJ,cACA,WACA,SACe;AACf,UAAM,KAAK,OAAO,OAAO,eAAe;AAAA,MACtC,QAAQ,CAAC;AAAA,QACP,IAAI,WAAW,OAAO,YAAY,CAAC;AAAA,QACnC,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,cAAc,OAAO,QAAQ,YAAY;AAAA,UACzC,YAAY,OAAO,QAAQ,UAAU;AAAA,UACrC,YAAY,QAAQ;AAAA,UACpB,aAAa,QAAQ;AAAA,UACrB,MAAM,QAAQ;AAAA,QAChB;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,sBAAsB,YAAuC;AACjE,UAAM,KAAK,OAAO,OAAO,aAAa;AAAA,MACpC,QAAQ;AAAA,QACN,MAAM,CAAC,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,UAAU,EAAE,EAAE,CAAC;AAAA,MACpE;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,uBAAuB,cAA2C;AACtE,UAAM,KAAK,OAAO,OAAO,eAAe;AAAA,MACtC,QAAQ,CAAC,WAAW,OAAO,YAAY,CAAC,CAAC;AAAA,IAC3C,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,gBAAgB,WAAqB,MAAoD;AAC7F,WAAO,KAAK,OAAO,aAAa,WAAW,IAAI;AAAA,EACjD;AAAA,EAEA,MAAM,kBAAkB,WAAqB,MAAoD;AAC/F,WAAO,KAAK,OAAO,eAAe,WAAW,IAAI;AAAA,EACnD;AAAA,EAEA,MAAc,OAAO,YAAoB,WAAqB,MAAoD;AAChH,UAAM,SAAS,KAAK,YAAY,KAAK,MAAM;AAE3C,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,YAAY;AAAA,MACnD,QAAQ;AAAA,MACR,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK;AAAA,MACtB,QAAQ,UAAU;AAAA,MAClB,cAAc;AAAA,IAChB,CAAC;AAED,WAAO,QAAQ,IAAI,CAAC,OAAY;AAAA,MAC9B,IAAI,OAAO,EAAE,EAAE;AAAA,MACf,OAAO,EAAE;AAAA,MACT,YAAY,EAAE,QAAQ;AAAA,MACtB,cAAc,EAAE,QAAQ;AAAA,MACxB,MAAM,EAAE,QAAQ;AAAA,MAChB,aAAa,EAAE,QAAQ;AAAA,IACzB,EAAE;AAAA,EACJ;AAAA,EAEQ,YAAY,QAA8C;AAChE,QAAI,CAAC,OAAQ,QAAO;AAEpB,UAAM,OAAc,CAAC;AAErB,QAAI,OAAO,eAAe,OAAO,YAAY,SAAS,GAAG;AAIvD,WAAK,KAAK,EAAE,KAAK,eAAe,OAAO,EAAE,KAAK,OAAO,YAAY,EAAE,CAAC;AAAA,IACtE;AAEA,QAAI,OAAO,YAAY;AACrB,WAAK,KAAK,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,OAAO,UAAU,EAAE,EAAE,CAAC;AAAA,IAC9E;AAEA,QAAI,OAAO,YAAY;AACrB,WAAK,KAAK,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,WAAW,EAAE,CAAC;AAAA,IACtE;AAEA,UAAM,WAAkB,CAAC;AAEzB,QAAI,OAAO,mBAAmB;AAC5B,eAAS,KAAK,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,OAAO,iBAAiB,EAAE,EAAE,CAAC;AAAA,IACzF;AAEA,QAAI,KAAK,WAAW,KAAK,SAAS,WAAW,EAAG,QAAO;AAEvD,WAAO;AAAA,MACL,GAAI,KAAK,SAAS,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,SAAS,SAAS,IAAI,EAAE,SAAS,IAAI,CAAC;AAAA,IAC5C;AAAA,EACF;AACF;","names":[]}
|
package/dist/qdrant-YF2TKFCE.js
DELETED
|
File without changes
|
|
File without changes
|