@semiont/vectors 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +148 -0
- package/dist/chunk-GZRNY7LY.js +128 -0
- package/dist/chunk-GZRNY7LY.js.map +1 -0
- package/dist/chunk-INCF7JMV.js +49 -0
- package/dist/chunk-INCF7JMV.js.map +1 -0
- package/dist/chunk-LBAPGZOW.js +58 -0
- package/dist/chunk-LBAPGZOW.js.map +1 -0
- package/dist/index.d.ts +214 -0
- package/dist/index.js +209 -0
- package/dist/index.js.map +1 -0
- package/dist/ollama-H62PXJ5D.js +7 -0
- package/dist/ollama-H62PXJ5D.js.map +1 -0
- package/dist/qdrant-JXH3K3RP.js +7 -0
- package/dist/qdrant-JXH3K3RP.js.map +1 -0
- package/dist/voyage-UWOFVIYM.js +7 -0
- package/dist/voyage-UWOFVIYM.js.map +1 -0
- package/package.json +52 -0
package/README.md
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# @semiont/vectors
|
|
2
|
+
|
|
3
|
+
Vector storage, embedding, and semantic search for Semiont.
|
|
4
|
+
|
|
5
|
+
Provides a pluggable abstraction over vector databases and embedding providers, with text chunking utilities. Used by the Smelter actor to index content and by Gatherer/Matcher to retrieve semantically similar resources and annotations.
|
|
6
|
+
|
|
7
|
+
## Architecture
|
|
8
|
+
|
|
9
|
+
Two separate vector collections:
|
|
10
|
+
|
|
11
|
+
- **resources** — chunked full-text content from stored files
|
|
12
|
+
- **annotations** — W3C Web Annotation entities with motivation, entity types, and exact text
|
|
13
|
+
|
|
14
|
+
Both collections support filtered similarity search with configurable score thresholds.
|
|
15
|
+
|
|
16
|
+
## Vector Stores
|
|
17
|
+
|
|
18
|
+
### Qdrant (production)
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
import { createVectorStore } from '@semiont/vectors';
|
|
22
|
+
|
|
23
|
+
const store = await createVectorStore({
|
|
24
|
+
type: 'qdrant',
|
|
25
|
+
host: 'localhost',
|
|
26
|
+
port: 6333,
|
|
27
|
+
dimensions: 1024,
|
|
28
|
+
});
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Requires a running [Qdrant](https://qdrant.tech) instance. The `@qdrant/js-client-rest` peer dependency is lazy-loaded on `connect()`. Collections are auto-created if they don't exist.
|
|
32
|
+
|
|
33
|
+
### Memory (testing)
|
|
34
|
+
|
|
35
|
+
```typescript
|
|
36
|
+
const store = await createVectorStore({
|
|
37
|
+
type: 'memory',
|
|
38
|
+
dimensions: 768,
|
|
39
|
+
});
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Brute-force cosine similarity. No external dependencies.
|
|
43
|
+
|
|
44
|
+
## Embedding Providers
|
|
45
|
+
|
|
46
|
+
### Voyage AI (cloud)
|
|
47
|
+
|
|
48
|
+
```typescript
|
|
49
|
+
import { createEmbeddingProvider } from '@semiont/vectors';
|
|
50
|
+
|
|
51
|
+
const provider = await createEmbeddingProvider({
|
|
52
|
+
type: 'voyage',
|
|
53
|
+
model: 'voyage-3', // 1024 dimensions
|
|
54
|
+
apiKey: '...',
|
|
55
|
+
});
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Models: `voyage-3` (1024), `voyage-3-lite` (512), `voyage-code-3`, `voyage-finance-2`, `voyage-law-2`.
|
|
59
|
+
|
|
60
|
+
### Ollama (local)
|
|
61
|
+
|
|
62
|
+
```typescript
|
|
63
|
+
const provider = await createEmbeddingProvider({
|
|
64
|
+
type: 'ollama',
|
|
65
|
+
model: 'nomic-embed-text', // 768 dimensions
|
|
66
|
+
baseURL: 'http://localhost:11434',
|
|
67
|
+
});
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Models: `nomic-embed-text` (768), `all-minilm` (384), `mxbai-embed-large` (1024), `snowflake-arctic-embed` (1024).
|
|
71
|
+
|
|
72
|
+
## Text Chunking
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
import { chunkText, DEFAULT_CHUNKING_CONFIG } from '@semiont/vectors';
|
|
76
|
+
|
|
77
|
+
const chunks = chunkText(longDocument, { chunkSize: 512, overlap: 50 });
|
|
78
|
+
// => string[]
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Splits on paragraph boundaries, then sentence boundaries, then word boundaries. `chunkSize` and `overlap` are in tokens (~4 characters per token).
|
|
82
|
+
|
|
83
|
+
## Search
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
const embedding = await provider.embed('quantum computing');
|
|
87
|
+
|
|
88
|
+
// Search resources
|
|
89
|
+
const resources = await store.searchResources(embedding, {
|
|
90
|
+
limit: 10,
|
|
91
|
+
scoreThreshold: 0.7,
|
|
92
|
+
filter: { excludeResourceId: 'res-already-open' },
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
// Search annotations
|
|
96
|
+
const annotations = await store.searchAnnotations(embedding, {
|
|
97
|
+
limit: 5,
|
|
98
|
+
filter: { entityTypes: ['Person', 'Organization'], motivation: 'describing' },
|
|
99
|
+
});
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Each result includes `id`, `score`, `resourceId`, `text`, and optionally `annotationId` and `entityTypes`.
|
|
103
|
+
|
|
104
|
+
## Writing Vectors
|
|
105
|
+
|
|
106
|
+
```typescript
|
|
107
|
+
// Index a resource's content
|
|
108
|
+
const chunks = chunkText(content, DEFAULT_CHUNKING_CONFIG);
|
|
109
|
+
const embeddings = await provider.embedBatch(chunks);
|
|
110
|
+
await store.upsertResourceVectors(resourceId, chunks.map((text, i) => ({
|
|
111
|
+
chunkIndex: i,
|
|
112
|
+
text,
|
|
113
|
+
embedding: embeddings[i],
|
|
114
|
+
})));
|
|
115
|
+
|
|
116
|
+
// Index an annotation
|
|
117
|
+
const vec = await provider.embed(annotation.exactText);
|
|
118
|
+
await store.upsertAnnotationVector(annotationId, {
|
|
119
|
+
annotationId,
|
|
120
|
+
resourceId,
|
|
121
|
+
motivation: 'describing',
|
|
122
|
+
entityTypes: ['Person'],
|
|
123
|
+
exactText: 'Marie Curie',
|
|
124
|
+
}, vec);
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Configuration
|
|
128
|
+
|
|
129
|
+
In `semiont.toml`:
|
|
130
|
+
|
|
131
|
+
```toml
|
|
132
|
+
[environments.local.services.vectors]
|
|
133
|
+
type = "qdrant"
|
|
134
|
+
host = "localhost"
|
|
135
|
+
port = 6333
|
|
136
|
+
|
|
137
|
+
[environments.local.services.vectors.embedding]
|
|
138
|
+
type = "voyage"
|
|
139
|
+
model = "voyage-3"
|
|
140
|
+
|
|
141
|
+
[environments.local.services.vectors.chunking]
|
|
142
|
+
chunkSize = 512
|
|
143
|
+
overlap = 50
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## License
|
|
147
|
+
|
|
148
|
+
Apache-2.0
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
// src/store/qdrant.ts
|
|
2
|
+
var QdrantVectorStore = class {
|
|
3
|
+
client = null;
|
|
4
|
+
connected = false;
|
|
5
|
+
config;
|
|
6
|
+
constructor(config) {
|
|
7
|
+
this.config = config;
|
|
8
|
+
}
|
|
9
|
+
async connect() {
|
|
10
|
+
const { QdrantClient } = await import("@qdrant/js-client-rest");
|
|
11
|
+
this.client = new QdrantClient({
|
|
12
|
+
host: this.config.host,
|
|
13
|
+
port: this.config.port
|
|
14
|
+
});
|
|
15
|
+
await this.ensureCollection("resources", this.config.dimensions);
|
|
16
|
+
await this.ensureCollection("annotations", this.config.dimensions);
|
|
17
|
+
this.connected = true;
|
|
18
|
+
}
|
|
19
|
+
async disconnect() {
|
|
20
|
+
this.client = null;
|
|
21
|
+
this.connected = false;
|
|
22
|
+
}
|
|
23
|
+
isConnected() {
|
|
24
|
+
return this.connected;
|
|
25
|
+
}
|
|
26
|
+
async ensureCollection(name, dimensions) {
|
|
27
|
+
try {
|
|
28
|
+
await this.client.getCollection(name);
|
|
29
|
+
} catch {
|
|
30
|
+
await this.client.createCollection(name, {
|
|
31
|
+
vectors: { size: dimensions, distance: "Cosine" }
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
async upsertResourceVectors(resourceId, chunks) {
|
|
36
|
+
if (chunks.length === 0) return;
|
|
37
|
+
const points = chunks.map((chunk) => ({
|
|
38
|
+
id: `${resourceId}-${chunk.chunkIndex}`,
|
|
39
|
+
vector: chunk.embedding,
|
|
40
|
+
payload: {
|
|
41
|
+
resourceId: String(resourceId),
|
|
42
|
+
chunkIndex: chunk.chunkIndex,
|
|
43
|
+
text: chunk.text
|
|
44
|
+
}
|
|
45
|
+
}));
|
|
46
|
+
await this.client.upsert("resources", { points });
|
|
47
|
+
}
|
|
48
|
+
async upsertAnnotationVector(annotationId, embedding, payload) {
|
|
49
|
+
await this.client.upsert("annotations", {
|
|
50
|
+
points: [{
|
|
51
|
+
id: String(annotationId),
|
|
52
|
+
vector: embedding,
|
|
53
|
+
payload: {
|
|
54
|
+
annotationId: String(payload.annotationId),
|
|
55
|
+
resourceId: String(payload.resourceId),
|
|
56
|
+
motivation: payload.motivation,
|
|
57
|
+
entityTypes: payload.entityTypes,
|
|
58
|
+
text: payload.exactText
|
|
59
|
+
}
|
|
60
|
+
}]
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
async deleteResourceVectors(resourceId) {
|
|
64
|
+
await this.client.delete("resources", {
|
|
65
|
+
filter: {
|
|
66
|
+
must: [{ key: "resourceId", match: { value: String(resourceId) } }]
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
async deleteAnnotationVector(annotationId) {
|
|
71
|
+
await this.client.delete("annotations", {
|
|
72
|
+
points: [String(annotationId)]
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
async searchResources(embedding, opts) {
|
|
76
|
+
return this.search("resources", embedding, opts);
|
|
77
|
+
}
|
|
78
|
+
async searchAnnotations(embedding, opts) {
|
|
79
|
+
return this.search("annotations", embedding, opts);
|
|
80
|
+
}
|
|
81
|
+
async search(collection, embedding, opts) {
|
|
82
|
+
const filter = this.buildFilter(opts.filter);
|
|
83
|
+
const results = await this.client.search(collection, {
|
|
84
|
+
vector: embedding,
|
|
85
|
+
limit: opts.limit,
|
|
86
|
+
score_threshold: opts.scoreThreshold,
|
|
87
|
+
filter: filter || void 0,
|
|
88
|
+
with_payload: true
|
|
89
|
+
});
|
|
90
|
+
return results.map((r) => ({
|
|
91
|
+
id: String(r.id),
|
|
92
|
+
score: r.score,
|
|
93
|
+
resourceId: r.payload.resourceId,
|
|
94
|
+
annotationId: r.payload.annotationId,
|
|
95
|
+
text: r.payload.text,
|
|
96
|
+
entityTypes: r.payload.entityTypes
|
|
97
|
+
}));
|
|
98
|
+
}
|
|
99
|
+
buildFilter(filter) {
|
|
100
|
+
if (!filter) return null;
|
|
101
|
+
const must = [];
|
|
102
|
+
if (filter.entityTypes && filter.entityTypes.length > 0) {
|
|
103
|
+
for (const et of filter.entityTypes) {
|
|
104
|
+
must.push({ key: "entityTypes", match: { value: et } });
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
if (filter.resourceId) {
|
|
108
|
+
must.push({ key: "resourceId", match: { value: String(filter.resourceId) } });
|
|
109
|
+
}
|
|
110
|
+
if (filter.motivation) {
|
|
111
|
+
must.push({ key: "motivation", match: { value: filter.motivation } });
|
|
112
|
+
}
|
|
113
|
+
const must_not = [];
|
|
114
|
+
if (filter.excludeResourceId) {
|
|
115
|
+
must_not.push({ key: "resourceId", match: { value: String(filter.excludeResourceId) } });
|
|
116
|
+
}
|
|
117
|
+
if (must.length === 0 && must_not.length === 0) return null;
|
|
118
|
+
return {
|
|
119
|
+
...must.length > 0 ? { must } : {},
|
|
120
|
+
...must_not.length > 0 ? { must_not } : {}
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
export {
|
|
126
|
+
QdrantVectorStore
|
|
127
|
+
};
|
|
128
|
+
//# sourceMappingURL=chunk-GZRNY7LY.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/store/qdrant.ts"],"sourcesContent":["/**\n * Qdrant VectorStore Implementation\n *\n * Uses the Qdrant REST API via @qdrant/js-client-rest.\n * Manages two collections: 'resources' and 'annotations'.\n */\n\nimport type { ResourceId, AnnotationId } from '@semiont/core';\nimport type { VectorStore, EmbeddingChunk, AnnotationPayload, VectorSearchResult, SearchOptions } from './interface';\n\nexport interface QdrantConfig {\n host: string;\n port: number;\n dimensions: number;\n}\n\nexport class QdrantVectorStore implements VectorStore {\n private client: any = null;\n private connected = false;\n private config: QdrantConfig;\n\n constructor(config: QdrantConfig) {\n this.config = config;\n }\n\n async connect(): Promise<void> {\n const { QdrantClient } = await import('@qdrant/js-client-rest');\n this.client = new QdrantClient({\n host: this.config.host,\n port: this.config.port,\n });\n\n // Ensure collections exist\n await this.ensureCollection('resources', this.config.dimensions);\n await this.ensureCollection('annotations', this.config.dimensions);\n this.connected = true;\n }\n\n async disconnect(): Promise<void> {\n this.client = null;\n this.connected = false;\n }\n\n isConnected(): boolean {\n return this.connected;\n }\n\n private async ensureCollection(name: string, dimensions: number): Promise<void> {\n try {\n await this.client.getCollection(name);\n } catch {\n await this.client.createCollection(name, {\n vectors: { size: dimensions, distance: 'Cosine' },\n });\n }\n }\n\n async upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void> {\n if (chunks.length === 0) return;\n\n const points = chunks.map((chunk) => ({\n id: `${resourceId}-${chunk.chunkIndex}`,\n vector: chunk.embedding,\n payload: {\n resourceId: String(resourceId),\n chunkIndex: chunk.chunkIndex,\n text: chunk.text,\n },\n }));\n\n await this.client.upsert('resources', { points });\n }\n\n async upsertAnnotationVector(\n annotationId: AnnotationId,\n embedding: number[],\n payload: AnnotationPayload\n ): Promise<void> {\n await this.client.upsert('annotations', {\n points: [{\n id: String(annotationId),\n vector: embedding,\n payload: {\n annotationId: String(payload.annotationId),\n resourceId: String(payload.resourceId),\n motivation: payload.motivation,\n entityTypes: payload.entityTypes,\n text: payload.exactText,\n },\n }],\n });\n }\n\n async deleteResourceVectors(resourceId: ResourceId): Promise<void> {\n await this.client.delete('resources', {\n filter: {\n must: [{ key: 'resourceId', match: { value: String(resourceId) } }],\n },\n });\n }\n\n async deleteAnnotationVector(annotationId: AnnotationId): Promise<void> {\n await this.client.delete('annotations', {\n points: [String(annotationId)],\n });\n }\n\n async searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search('resources', embedding, opts);\n }\n\n async searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search('annotations', embedding, opts);\n }\n\n private async search(collection: string, embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n const filter = this.buildFilter(opts.filter);\n\n const results = await this.client.search(collection, {\n vector: embedding,\n limit: opts.limit,\n score_threshold: opts.scoreThreshold,\n filter: filter || undefined,\n with_payload: true,\n });\n\n return results.map((r: any) => ({\n id: String(r.id),\n score: r.score,\n resourceId: r.payload.resourceId as ResourceId,\n annotationId: r.payload.annotationId as AnnotationId | undefined,\n text: r.payload.text as string,\n entityTypes: r.payload.entityTypes as string[] | undefined,\n }));\n }\n\n private buildFilter(filter?: SearchOptions['filter']): any | null {\n if (!filter) return null;\n\n const must: any[] = [];\n\n if (filter.entityTypes && filter.entityTypes.length > 0) {\n for (const et of filter.entityTypes) {\n must.push({ key: 'entityTypes', match: { value: et } });\n }\n }\n\n if (filter.resourceId) {\n must.push({ key: 'resourceId', match: { value: String(filter.resourceId) } });\n }\n\n if (filter.motivation) {\n must.push({ key: 'motivation', match: { value: filter.motivation } });\n }\n\n const must_not: any[] = [];\n\n if (filter.excludeResourceId) {\n must_not.push({ key: 'resourceId', match: { value: String(filter.excludeResourceId) } });\n }\n\n if (must.length === 0 && must_not.length === 0) return null;\n\n return {\n ...(must.length > 0 ? { must } : {}),\n ...(must_not.length > 0 ? { must_not } : {}),\n };\n }\n}\n"],"mappings":";AAgBO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,SAAc;AAAA,EACd,YAAY;AAAA,EACZ;AAAA,EAER,YAAY,QAAsB;AAChC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,UAAyB;AAC7B,UAAM,EAAE,aAAa,IAAI,MAAM,OAAO,wBAAwB;AAC9D,SAAK,SAAS,IAAI,aAAa;AAAA,MAC7B,MAAM,KAAK,OAAO;AAAA,MAClB,MAAM,KAAK,OAAO;AAAA,IACpB,CAAC;AAGD,UAAM,KAAK,iBAAiB,aAAa,KAAK,OAAO,UAAU;AAC/D,UAAM,KAAK,iBAAiB,eAAe,KAAK,OAAO,UAAU;AACjE,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,aAA4B;AAChC,SAAK,SAAS;AACd,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,cAAuB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAc,iBAAiB,MAAc,YAAmC;AAC9E,QAAI;AACF,YAAM,KAAK,OAAO,cAAc,IAAI;AAAA,IACtC,QAAQ;AACN,YAAM,KAAK,OAAO,iBAAiB,MAAM;AAAA,QACvC,SAAS,EAAE,MAAM,YAAY,UAAU,SAAS;AAAA,MAClD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,sBAAsB,YAAwB,QAAyC;AAC3F,QAAI,OAAO,WAAW,EAAG;AAEzB,UAAM,SAAS,OAAO,IAAI,CAAC,WAAW;AAAA,MACpC,IAAI,GAAG,UAAU,IAAI,MAAM,UAAU;AAAA,MACrC,QAAQ,MAAM;AAAA,MACd,SAAS;AAAA,QACP,YAAY,OAAO,UAAU;AAAA,QAC7B,YAAY,MAAM;AAAA,QAClB,MAAM,MAAM;AAAA,MACd;AAAA,IACF,EAAE;AAEF,UAAM,KAAK,OAAO,OAAO,aAAa,EAAE,OAAO,CAAC;AAAA,EAClD;AAAA,EAEA,MAAM,uBACJ,cACA,WACA,SACe;AACf,UAAM,KAAK,OAAO,OAAO,eAAe;AAAA,MACtC,QAAQ,CAAC;AAAA,QACP,IAAI,OAAO,YAAY;AAAA,QACvB,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,cAAc,OAAO,QAAQ,YAAY;AAAA,UACzC,YAAY,OAAO,QAAQ,UAAU;AAAA,UACrC,YAAY,QAAQ;AAAA,UACpB,aAAa,QAAQ;AAAA,UACrB,MAAM,QAAQ;AAAA,QAChB;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,sBAAsB,YAAuC;AACjE,UAAM,KAAK,OAAO,OAAO,aAAa;AAAA,MACpC,QAAQ;AAAA,QACN,MAAM,CAAC,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,UAAU,EAAE,EAAE,CAAC;AAAA,MACpE;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,uBAAuB,cAA2C;AACtE,UAAM,KAAK,OAAO,OAAO,eAAe;AAAA,MACtC,QAAQ,CAAC,OAAO,YAAY,CAAC;AAAA,IAC/B,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,gBAAgB,WAAqB,MAAoD;AAC7F,WAAO,KAAK,OAAO,aAAa,WAAW,IAAI;AAAA,EACjD;AAAA,EAEA,MAAM,kBAAkB,WAAqB,MAAoD;AAC/F,WAAO,KAAK,OAAO,eAAe,WAAW,IAAI;AAAA,EACnD;AAAA,EAEA,MAAc,OAAO,YAAoB,WAAqB,MAAoD;AAChH,UAAM,SAAS,KAAK,YAAY,KAAK,MAAM;AAE3C,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,YAAY;AAAA,MACnD,QAAQ;AAAA,MACR,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK;AAAA,MACtB,QAAQ,UAAU;AAAA,MAClB,cAAc;AAAA,IAChB,CAAC;AAED,WAAO,QAAQ,IAAI,CAAC,OAAY;AAAA,MAC9B,IAAI,OAAO,EAAE,EAAE;AAAA,MACf,OAAO,EAAE;AAAA,MACT,YAAY,EAAE,QAAQ;AAAA,MACtB,cAAc,EAAE,QAAQ;AAAA,MACxB,MAAM,EAAE,QAAQ;AAAA,MAChB,aAAa,EAAE,QAAQ;AAAA,IACzB,EAAE;AAAA,EACJ;AAAA,EAEQ,YAAY,QAA8C;AAChE,QAAI,CAAC,OAAQ,QAAO;AAEpB,UAAM,OAAc,CAAC;AAErB,QAAI,OAAO,eAAe,OAAO,YAAY,SAAS,GAAG;AACvD,iBAAW,MAAM,OAAO,aAAa;AACnC,aAAK,KAAK,EAAE,KAAK,eAAe,OAAO,EAAE,OAAO,GAAG,EAAE,CAAC;AAAA,MACxD;AAAA,IACF;AAEA,QAAI,OAAO,YAAY;AACrB,WAAK,KAAK,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,OAAO,UAAU,EAAE,EAAE,CAAC;AAAA,IAC9E;AAEA,QAAI,OAAO,YAAY;AACrB,WAAK,KAAK,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,WAAW,EAAE,CAAC;AAAA,IACtE;AAEA,UAAM,WAAkB,CAAC;AAEzB,QAAI,OAAO,mBAAmB;AAC5B,eAAS,KAAK,EAAE,KAAK,cAAc,OAAO,EAAE,OAAO,OAAO,OAAO,iBAAiB,EAAE,EAAE,CAAC;AAAA,IACzF;AAEA,QAAI,KAAK,WAAW,KAAK,SAAS,WAAW,EAAG,QAAO;AAEvD,WAAO;AAAA,MACL,GAAI,KAAK,SAAS,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,SAAS,SAAS,IAAI,EAAE,SAAS,IAAI,CAAC;AAAA,IAC5C;AAAA,EACF;AACF;","names":[]}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// src/embedding/voyage.ts
|
|
2
|
+
var VOYAGE_DIMENSIONS = {
|
|
3
|
+
"voyage-3": 1024,
|
|
4
|
+
"voyage-3-lite": 512,
|
|
5
|
+
"voyage-code-3": 1024,
|
|
6
|
+
"voyage-finance-2": 1024,
|
|
7
|
+
"voyage-law-2": 1024
|
|
8
|
+
};
|
|
9
|
+
var VoyageEmbeddingProvider = class {
|
|
10
|
+
config;
|
|
11
|
+
constructor(config) {
|
|
12
|
+
this.config = config;
|
|
13
|
+
}
|
|
14
|
+
async embed(text) {
|
|
15
|
+
const results = await this.embedBatch([text]);
|
|
16
|
+
return results[0];
|
|
17
|
+
}
|
|
18
|
+
async embedBatch(texts) {
|
|
19
|
+
const endpoint = this.config.endpoint ?? "https://api.voyageai.com/v1/embeddings";
|
|
20
|
+
const response = await fetch(endpoint, {
|
|
21
|
+
method: "POST",
|
|
22
|
+
headers: {
|
|
23
|
+
"Content-Type": "application/json",
|
|
24
|
+
"Authorization": `Bearer ${this.config.apiKey}`
|
|
25
|
+
},
|
|
26
|
+
body: JSON.stringify({
|
|
27
|
+
model: this.config.model,
|
|
28
|
+
input: texts
|
|
29
|
+
})
|
|
30
|
+
});
|
|
31
|
+
if (!response.ok) {
|
|
32
|
+
const body = await response.text();
|
|
33
|
+
throw new Error(`Voyage API error ${response.status}: ${body}`);
|
|
34
|
+
}
|
|
35
|
+
const json = await response.json();
|
|
36
|
+
return json.data.map((d) => d.embedding);
|
|
37
|
+
}
|
|
38
|
+
dimensions() {
|
|
39
|
+
return VOYAGE_DIMENSIONS[this.config.model] ?? 1024;
|
|
40
|
+
}
|
|
41
|
+
model() {
|
|
42
|
+
return this.config.model;
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export {
|
|
47
|
+
VoyageEmbeddingProvider
|
|
48
|
+
};
|
|
49
|
+
//# sourceMappingURL=chunk-INCF7JMV.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/embedding/voyage.ts"],"sourcesContent":["/**\n * Voyage AI Embedding Provider\n *\n * Cloud embedding via the Voyage AI API (partner of Anthropic).\n * Uses the same API key as Anthropic inference.\n */\n\nimport type { EmbeddingProvider } from './interface';\n\nexport interface VoyageConfig {\n apiKey: string;\n model: string;\n endpoint?: string;\n}\n\nconst VOYAGE_DIMENSIONS: Record<string, number> = {\n 'voyage-3': 1024,\n 'voyage-3-lite': 512,\n 'voyage-code-3': 1024,\n 'voyage-finance-2': 1024,\n 'voyage-law-2': 1024,\n};\n\nexport class VoyageEmbeddingProvider implements EmbeddingProvider {\n private config: VoyageConfig;\n\n constructor(config: VoyageConfig) {\n this.config = config;\n }\n\n async embed(text: string): Promise<number[]> {\n const results = await this.embedBatch([text]);\n return results[0];\n }\n\n async embedBatch(texts: string[]): Promise<number[][]> {\n const endpoint = this.config.endpoint ?? 'https://api.voyageai.com/v1/embeddings';\n\n const response = await fetch(endpoint, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'Authorization': `Bearer ${this.config.apiKey}`,\n },\n body: JSON.stringify({\n model: this.config.model,\n input: texts,\n }),\n });\n\n if (!response.ok) {\n const body = await response.text();\n throw new Error(`Voyage API error ${response.status}: ${body}`);\n }\n\n const json = await response.json() as { data: Array<{ embedding: number[] }> };\n return json.data.map(d => d.embedding);\n }\n\n dimensions(): number {\n return VOYAGE_DIMENSIONS[this.config.model] ?? 1024;\n }\n\n model(): string {\n return this.config.model;\n }\n}\n"],"mappings":";AAeA,IAAM,oBAA4C;AAAA,EAChD,YAAY;AAAA,EACZ,iBAAiB;AAAA,EACjB,iBAAiB;AAAA,EACjB,oBAAoB;AAAA,EACpB,gBAAgB;AAClB;AAEO,IAAM,0BAAN,MAA2D;AAAA,EACxD;AAAA,EAER,YAAY,QAAsB;AAChC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,MAAM,MAAiC;AAC3C,UAAM,UAAU,MAAM,KAAK,WAAW,CAAC,IAAI,CAAC;AAC5C,WAAO,QAAQ,CAAC;AAAA,EAClB;AAAA,EAEA,MAAM,WAAW,OAAsC;AACrD,UAAM,WAAW,KAAK,OAAO,YAAY;AAEzC,UAAM,WAAW,MAAM,MAAM,UAAU;AAAA,MACrC,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,iBAAiB,UAAU,KAAK,OAAO,MAAM;AAAA,MAC/C;AAAA,MACA,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK,OAAO;AAAA,QACnB,OAAO;AAAA,MACT,CAAC;AAAA,IACH,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,YAAM,IAAI,MAAM,oBAAoB,SAAS,MAAM,KAAK,IAAI,EAAE;AAAA,IAChE;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,WAAO,KAAK,KAAK,IAAI,OAAK,EAAE,SAAS;AAAA,EACvC;AAAA,EAEA,aAAqB;AACnB,WAAO,kBAAkB,KAAK,OAAO,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,QAAgB;AACd,WAAO,KAAK,OAAO;AAAA,EACrB;AACF;","names":[]}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
// src/embedding/ollama.ts
|
|
2
|
+
var OLLAMA_DIMENSIONS = {
|
|
3
|
+
"nomic-embed-text": 768,
|
|
4
|
+
"all-minilm": 384,
|
|
5
|
+
"mxbai-embed-large": 1024,
|
|
6
|
+
"snowflake-arctic-embed": 1024
|
|
7
|
+
};
|
|
8
|
+
var OllamaEmbeddingProvider = class {
|
|
9
|
+
config;
|
|
10
|
+
constructor(config) {
|
|
11
|
+
this.config = config;
|
|
12
|
+
}
|
|
13
|
+
async embed(text) {
|
|
14
|
+
const baseURL = this.config.baseURL ?? "http://localhost:11434";
|
|
15
|
+
const response = await fetch(`${baseURL}/api/embed`, {
|
|
16
|
+
method: "POST",
|
|
17
|
+
headers: { "Content-Type": "application/json" },
|
|
18
|
+
body: JSON.stringify({
|
|
19
|
+
model: this.config.model,
|
|
20
|
+
input: text
|
|
21
|
+
})
|
|
22
|
+
});
|
|
23
|
+
if (!response.ok) {
|
|
24
|
+
const body = await response.text();
|
|
25
|
+
throw new Error(`Ollama embed error ${response.status}: ${body}`);
|
|
26
|
+
}
|
|
27
|
+
const json = await response.json();
|
|
28
|
+
return json.embeddings[0];
|
|
29
|
+
}
|
|
30
|
+
async embedBatch(texts) {
|
|
31
|
+
const baseURL = this.config.baseURL ?? "http://localhost:11434";
|
|
32
|
+
const response = await fetch(`${baseURL}/api/embed`, {
|
|
33
|
+
method: "POST",
|
|
34
|
+
headers: { "Content-Type": "application/json" },
|
|
35
|
+
body: JSON.stringify({
|
|
36
|
+
model: this.config.model,
|
|
37
|
+
input: texts
|
|
38
|
+
})
|
|
39
|
+
});
|
|
40
|
+
if (!response.ok) {
|
|
41
|
+
const body = await response.text();
|
|
42
|
+
throw new Error(`Ollama embed error ${response.status}: ${body}`);
|
|
43
|
+
}
|
|
44
|
+
const json = await response.json();
|
|
45
|
+
return json.embeddings;
|
|
46
|
+
}
|
|
47
|
+
dimensions() {
|
|
48
|
+
return OLLAMA_DIMENSIONS[this.config.model] ?? 768;
|
|
49
|
+
}
|
|
50
|
+
model() {
|
|
51
|
+
return this.config.model;
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export {
|
|
56
|
+
OllamaEmbeddingProvider
|
|
57
|
+
};
|
|
58
|
+
//# sourceMappingURL=chunk-LBAPGZOW.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/embedding/ollama.ts"],"sourcesContent":["/**\n * Ollama Embedding Provider\n *\n * Local embedding via the Ollama API.\n * Uses models like nomic-embed-text, all-minilm, etc.\n */\n\nimport type { EmbeddingProvider } from './interface';\n\nexport interface OllamaEmbeddingConfig {\n model: string;\n baseURL?: string;\n}\n\nconst OLLAMA_DIMENSIONS: Record<string, number> = {\n 'nomic-embed-text': 768,\n 'all-minilm': 384,\n 'mxbai-embed-large': 1024,\n 'snowflake-arctic-embed': 1024,\n};\n\nexport class OllamaEmbeddingProvider implements EmbeddingProvider {\n private config: OllamaEmbeddingConfig;\n\n constructor(config: OllamaEmbeddingConfig) {\n this.config = config;\n }\n\n async embed(text: string): Promise<number[]> {\n const baseURL = this.config.baseURL ?? 'http://localhost:11434';\n\n const response = await fetch(`${baseURL}/api/embed`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.config.model,\n input: text,\n }),\n });\n\n if (!response.ok) {\n const body = await response.text();\n throw new Error(`Ollama embed error ${response.status}: ${body}`);\n }\n\n const json = await response.json() as { embeddings: number[][] };\n return json.embeddings[0];\n }\n\n async embedBatch(texts: string[]): Promise<number[][]> {\n // Ollama's /api/embed supports batch input\n const baseURL = this.config.baseURL ?? 'http://localhost:11434';\n\n const response = await fetch(`${baseURL}/api/embed`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.config.model,\n input: texts,\n }),\n });\n\n if (!response.ok) {\n const body = await response.text();\n throw new Error(`Ollama embed error ${response.status}: ${body}`);\n }\n\n const json = await response.json() as { embeddings: number[][] };\n return json.embeddings;\n }\n\n dimensions(): number {\n return OLLAMA_DIMENSIONS[this.config.model] ?? 768;\n }\n\n model(): string {\n return this.config.model;\n }\n}\n"],"mappings":";AAcA,IAAM,oBAA4C;AAAA,EAChD,oBAAoB;AAAA,EACpB,cAAc;AAAA,EACd,qBAAqB;AAAA,EACrB,0BAA0B;AAC5B;AAEO,IAAM,0BAAN,MAA2D;AAAA,EACxD;AAAA,EAER,YAAY,QAA+B;AACzC,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,MAAM,MAAiC;AAC3C,UAAM,UAAU,KAAK,OAAO,WAAW;AAEvC,UAAM,WAAW,MAAM,MAAM,GAAG,OAAO,cAAc;AAAA,MACnD,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK,OAAO;AAAA,QACnB,OAAO;AAAA,MACT,CAAC;AAAA,IACH,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,YAAM,IAAI,MAAM,sBAAsB,SAAS,MAAM,KAAK,IAAI,EAAE;AAAA,IAClE;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,WAAO,KAAK,WAAW,CAAC;AAAA,EAC1B;AAAA,EAEA,MAAM,WAAW,OAAsC;AAErD,UAAM,UAAU,KAAK,OAAO,WAAW;AAEvC,UAAM,WAAW,MAAM,MAAM,GAAG,OAAO,cAAc;AAAA,MACnD,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK,OAAO;AAAA,QACnB,OAAO;AAAA,MACT,CAAC;AAAA,IACH,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,YAAM,IAAI,MAAM,sBAAsB,SAAS,MAAM,KAAK,IAAI,EAAE;AAAA,IAClE;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,aAAqB;AACnB,WAAO,kBAAkB,KAAK,OAAO,KAAK,KAAK;AAAA,EACjD;AAAA,EAEA,QAAgB;AACd,WAAO,KAAK,OAAO;AAAA,EACrB;AACF;","names":[]}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import { AnnotationId, ResourceId } from '@semiont/core';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* VectorStore Interface
|
|
5
|
+
*
|
|
6
|
+
* Abstraction over vector database backends (Qdrant, memory).
|
|
7
|
+
* Stores pre-computed embedding vectors with metadata payloads
|
|
8
|
+
* and provides similarity search with payload filtering.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
interface EmbeddingChunk {
|
|
12
|
+
chunkIndex: number;
|
|
13
|
+
text: string;
|
|
14
|
+
embedding: number[];
|
|
15
|
+
}
|
|
16
|
+
interface AnnotationPayload {
|
|
17
|
+
annotationId: AnnotationId;
|
|
18
|
+
resourceId: ResourceId;
|
|
19
|
+
motivation: string;
|
|
20
|
+
entityTypes: string[];
|
|
21
|
+
exactText: string;
|
|
22
|
+
}
|
|
23
|
+
interface VectorSearchResult {
|
|
24
|
+
id: string;
|
|
25
|
+
score: number;
|
|
26
|
+
resourceId: ResourceId;
|
|
27
|
+
annotationId?: AnnotationId;
|
|
28
|
+
text: string;
|
|
29
|
+
entityTypes?: string[];
|
|
30
|
+
}
|
|
31
|
+
interface SearchOptions {
|
|
32
|
+
limit: number;
|
|
33
|
+
scoreThreshold?: number;
|
|
34
|
+
filter?: {
|
|
35
|
+
entityTypes?: string[];
|
|
36
|
+
resourceId?: ResourceId;
|
|
37
|
+
motivation?: string;
|
|
38
|
+
excludeResourceId?: ResourceId;
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
interface VectorStore {
|
|
42
|
+
connect(): Promise<void>;
|
|
43
|
+
disconnect(): Promise<void>;
|
|
44
|
+
isConnected(): boolean;
|
|
45
|
+
upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void>;
|
|
46
|
+
upsertAnnotationVector(annotationId: AnnotationId, embedding: number[], payload: AnnotationPayload): Promise<void>;
|
|
47
|
+
deleteResourceVectors(resourceId: ResourceId): Promise<void>;
|
|
48
|
+
deleteAnnotationVector(annotationId: AnnotationId): Promise<void>;
|
|
49
|
+
searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
50
|
+
searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Qdrant VectorStore Implementation
|
|
55
|
+
*
|
|
56
|
+
* Uses the Qdrant REST API via @qdrant/js-client-rest.
|
|
57
|
+
* Manages two collections: 'resources' and 'annotations'.
|
|
58
|
+
*/
|
|
59
|
+
|
|
60
|
+
interface QdrantConfig {
|
|
61
|
+
host: string;
|
|
62
|
+
port: number;
|
|
63
|
+
dimensions: number;
|
|
64
|
+
}
|
|
65
|
+
declare class QdrantVectorStore implements VectorStore {
|
|
66
|
+
private client;
|
|
67
|
+
private connected;
|
|
68
|
+
private config;
|
|
69
|
+
constructor(config: QdrantConfig);
|
|
70
|
+
connect(): Promise<void>;
|
|
71
|
+
disconnect(): Promise<void>;
|
|
72
|
+
isConnected(): boolean;
|
|
73
|
+
private ensureCollection;
|
|
74
|
+
upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void>;
|
|
75
|
+
upsertAnnotationVector(annotationId: AnnotationId, embedding: number[], payload: AnnotationPayload): Promise<void>;
|
|
76
|
+
deleteResourceVectors(resourceId: ResourceId): Promise<void>;
|
|
77
|
+
deleteAnnotationVector(annotationId: AnnotationId): Promise<void>;
|
|
78
|
+
searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
79
|
+
searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
80
|
+
private search;
|
|
81
|
+
private buildFilter;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* In-Memory VectorStore Implementation
|
|
86
|
+
*
|
|
87
|
+
* For testing and development without a running Qdrant instance.
|
|
88
|
+
* Uses brute-force cosine similarity search.
|
|
89
|
+
*/
|
|
90
|
+
|
|
91
|
+
declare class MemoryVectorStore implements VectorStore {
|
|
92
|
+
private resources;
|
|
93
|
+
private annotations;
|
|
94
|
+
private connected;
|
|
95
|
+
connect(): Promise<void>;
|
|
96
|
+
disconnect(): Promise<void>;
|
|
97
|
+
isConnected(): boolean;
|
|
98
|
+
upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void>;
|
|
99
|
+
upsertAnnotationVector(annotationId: AnnotationId, embedding: number[], payload: AnnotationPayload): Promise<void>;
|
|
100
|
+
deleteResourceVectors(resourceId: ResourceId): Promise<void>;
|
|
101
|
+
deleteAnnotationVector(annotationId: AnnotationId): Promise<void>;
|
|
102
|
+
searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
103
|
+
searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]>;
|
|
104
|
+
private search;
|
|
105
|
+
private toResult;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* VectorStore Factory
|
|
110
|
+
*
|
|
111
|
+
* Creates a VectorStore instance based on configuration.
|
|
112
|
+
*/
|
|
113
|
+
|
|
114
|
+
interface VectorStoreConfig {
|
|
115
|
+
type: 'qdrant' | 'memory';
|
|
116
|
+
host?: string;
|
|
117
|
+
port?: number;
|
|
118
|
+
dimensions: number;
|
|
119
|
+
}
|
|
120
|
+
declare function createVectorStore(config: VectorStoreConfig): Promise<VectorStore>;
|
|
121
|
+
declare function getVectorStore(): VectorStore | null;
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* EmbeddingProvider Interface
|
|
125
|
+
*
|
|
126
|
+
* Abstraction over embedding model providers (Voyage AI, Ollama).
|
|
127
|
+
* Converts text into dense vector representations for similarity search.
|
|
128
|
+
*/
|
|
129
|
+
interface EmbeddingProvider {
|
|
130
|
+
/** Embed a single text string. */
|
|
131
|
+
embed(text: string): Promise<number[]>;
|
|
132
|
+
/** Embed multiple texts in a single batch call. */
|
|
133
|
+
embedBatch(texts: string[]): Promise<number[][]>;
|
|
134
|
+
/** The dimensionality of vectors produced by this provider. */
|
|
135
|
+
dimensions(): number;
|
|
136
|
+
/** The model identifier (e.g. "voyage-3", "nomic-embed-text"). */
|
|
137
|
+
model(): string;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Voyage AI Embedding Provider
|
|
142
|
+
*
|
|
143
|
+
* Cloud embedding via the Voyage AI API (partner of Anthropic).
|
|
144
|
+
* Uses the same API key as Anthropic inference.
|
|
145
|
+
*/
|
|
146
|
+
|
|
147
|
+
interface VoyageConfig {
|
|
148
|
+
apiKey: string;
|
|
149
|
+
model: string;
|
|
150
|
+
endpoint?: string;
|
|
151
|
+
}
|
|
152
|
+
declare class VoyageEmbeddingProvider implements EmbeddingProvider {
|
|
153
|
+
private config;
|
|
154
|
+
constructor(config: VoyageConfig);
|
|
155
|
+
embed(text: string): Promise<number[]>;
|
|
156
|
+
embedBatch(texts: string[]): Promise<number[][]>;
|
|
157
|
+
dimensions(): number;
|
|
158
|
+
model(): string;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Ollama Embedding Provider
|
|
163
|
+
*
|
|
164
|
+
* Local embedding via the Ollama API.
|
|
165
|
+
* Uses models like nomic-embed-text, all-minilm, etc.
|
|
166
|
+
*/
|
|
167
|
+
|
|
168
|
+
interface OllamaEmbeddingConfig {
|
|
169
|
+
model: string;
|
|
170
|
+
baseURL?: string;
|
|
171
|
+
}
|
|
172
|
+
declare class OllamaEmbeddingProvider implements EmbeddingProvider {
|
|
173
|
+
private config;
|
|
174
|
+
constructor(config: OllamaEmbeddingConfig);
|
|
175
|
+
embed(text: string): Promise<number[]>;
|
|
176
|
+
embedBatch(texts: string[]): Promise<number[][]>;
|
|
177
|
+
dimensions(): number;
|
|
178
|
+
model(): string;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* EmbeddingProvider Factory
|
|
183
|
+
*/
|
|
184
|
+
|
|
185
|
+
interface EmbeddingConfig {
|
|
186
|
+
type: 'voyage' | 'ollama';
|
|
187
|
+
model: string;
|
|
188
|
+
apiKey?: string;
|
|
189
|
+
baseURL?: string;
|
|
190
|
+
endpoint?: string;
|
|
191
|
+
}
|
|
192
|
+
declare function createEmbeddingProvider(config: EmbeddingConfig): Promise<EmbeddingProvider>;
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Text Chunking Utilities
|
|
196
|
+
*
|
|
197
|
+
* Splits long text into overlapping chunks for embedding.
|
|
198
|
+
* Each chunk is a passage that fits within the embedding model's context window.
|
|
199
|
+
*/
|
|
200
|
+
interface ChunkingConfig {
|
|
201
|
+
chunkSize: number;
|
|
202
|
+
overlap: number;
|
|
203
|
+
}
|
|
204
|
+
declare const DEFAULT_CHUNKING_CONFIG: ChunkingConfig;
|
|
205
|
+
/**
|
|
206
|
+
* Split text into overlapping chunks.
|
|
207
|
+
*
|
|
208
|
+
* Splits on paragraph boundaries when possible, falling back to sentence
|
|
209
|
+
* boundaries, then word boundaries. Each chunk overlaps with the previous
|
|
210
|
+
* by `overlap` tokens worth of text.
|
|
211
|
+
*/
|
|
212
|
+
declare function chunkText(text: string, config?: ChunkingConfig): string[];
|
|
213
|
+
|
|
214
|
+
export { type AnnotationPayload, type ChunkingConfig, DEFAULT_CHUNKING_CONFIG, type EmbeddingChunk, type EmbeddingConfig, type EmbeddingProvider, MemoryVectorStore, type OllamaEmbeddingConfig, OllamaEmbeddingProvider, type QdrantConfig, QdrantVectorStore, type SearchOptions, type VectorSearchResult, type VectorStore, type VectorStoreConfig, type VoyageConfig, VoyageEmbeddingProvider, chunkText, createEmbeddingProvider, createVectorStore, getVectorStore };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
import {
|
|
2
|
+
QdrantVectorStore
|
|
3
|
+
} from "./chunk-GZRNY7LY.js";
|
|
4
|
+
import {
|
|
5
|
+
VoyageEmbeddingProvider
|
|
6
|
+
} from "./chunk-INCF7JMV.js";
|
|
7
|
+
import {
|
|
8
|
+
OllamaEmbeddingProvider
|
|
9
|
+
} from "./chunk-LBAPGZOW.js";
|
|
10
|
+
|
|
11
|
+
// src/store/memory.ts
|
|
12
|
+
function cosineSimilarity(a, b) {
|
|
13
|
+
let dotProduct = 0;
|
|
14
|
+
let normA = 0;
|
|
15
|
+
let normB = 0;
|
|
16
|
+
for (let i = 0; i < a.length; i++) {
|
|
17
|
+
dotProduct += a[i] * b[i];
|
|
18
|
+
normA += a[i] * a[i];
|
|
19
|
+
normB += b[i] * b[i];
|
|
20
|
+
}
|
|
21
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
22
|
+
return denom === 0 ? 0 : dotProduct / denom;
|
|
23
|
+
}
|
|
24
|
+
var MemoryVectorStore = class {
|
|
25
|
+
resources = [];
|
|
26
|
+
annotations = [];
|
|
27
|
+
connected = false;
|
|
28
|
+
async connect() {
|
|
29
|
+
this.connected = true;
|
|
30
|
+
}
|
|
31
|
+
async disconnect() {
|
|
32
|
+
this.connected = false;
|
|
33
|
+
}
|
|
34
|
+
isConnected() {
|
|
35
|
+
return this.connected;
|
|
36
|
+
}
|
|
37
|
+
async upsertResourceVectors(resourceId, chunks) {
|
|
38
|
+
this.resources = this.resources.filter((p) => p.payload.resourceId !== String(resourceId));
|
|
39
|
+
for (const chunk of chunks) {
|
|
40
|
+
this.resources.push({
|
|
41
|
+
id: `${resourceId}-${chunk.chunkIndex}`,
|
|
42
|
+
vector: chunk.embedding,
|
|
43
|
+
payload: {
|
|
44
|
+
resourceId: String(resourceId),
|
|
45
|
+
chunkIndex: chunk.chunkIndex,
|
|
46
|
+
text: chunk.text
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
async upsertAnnotationVector(annotationId, embedding, payload) {
|
|
52
|
+
this.annotations = this.annotations.filter((p) => p.id !== String(annotationId));
|
|
53
|
+
this.annotations.push({
|
|
54
|
+
id: String(annotationId),
|
|
55
|
+
vector: embedding,
|
|
56
|
+
payload: {
|
|
57
|
+
annotationId: String(payload.annotationId),
|
|
58
|
+
resourceId: String(payload.resourceId),
|
|
59
|
+
motivation: payload.motivation,
|
|
60
|
+
entityTypes: payload.entityTypes,
|
|
61
|
+
text: payload.exactText
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
async deleteResourceVectors(resourceId) {
|
|
66
|
+
this.resources = this.resources.filter((p) => p.payload.resourceId !== String(resourceId));
|
|
67
|
+
}
|
|
68
|
+
async deleteAnnotationVector(annotationId) {
|
|
69
|
+
this.annotations = this.annotations.filter((p) => p.id !== String(annotationId));
|
|
70
|
+
}
|
|
71
|
+
async searchResources(embedding, opts) {
|
|
72
|
+
return this.search(this.resources, embedding, opts);
|
|
73
|
+
}
|
|
74
|
+
async searchAnnotations(embedding, opts) {
|
|
75
|
+
return this.search(this.annotations, embedding, opts);
|
|
76
|
+
}
|
|
77
|
+
search(points, embedding, opts) {
|
|
78
|
+
let filtered = points;
|
|
79
|
+
if (opts.filter) {
|
|
80
|
+
const f = opts.filter;
|
|
81
|
+
filtered = points.filter((p) => {
|
|
82
|
+
if (f.resourceId && p.payload.resourceId !== String(f.resourceId)) return false;
|
|
83
|
+
if (f.excludeResourceId && p.payload.resourceId === String(f.excludeResourceId)) return false;
|
|
84
|
+
if (f.motivation && p.payload.motivation !== f.motivation) return false;
|
|
85
|
+
if (f.entityTypes && f.entityTypes.length > 0) {
|
|
86
|
+
const pTypes = p.payload.entityTypes ?? [];
|
|
87
|
+
if (!f.entityTypes.some((t) => pTypes.includes(t))) return false;
|
|
88
|
+
}
|
|
89
|
+
return true;
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
const scored = filtered.map((p) => ({
|
|
93
|
+
...p,
|
|
94
|
+
score: cosineSimilarity(embedding, p.vector)
|
|
95
|
+
}));
|
|
96
|
+
scored.sort((a, b) => b.score - a.score);
|
|
97
|
+
if (opts.scoreThreshold) {
|
|
98
|
+
const threshold = opts.scoreThreshold;
|
|
99
|
+
return scored.filter((s) => s.score >= threshold).slice(0, opts.limit).map((s) => this.toResult(s));
|
|
100
|
+
}
|
|
101
|
+
return scored.slice(0, opts.limit).map((s) => this.toResult(s));
|
|
102
|
+
}
|
|
103
|
+
toResult(s) {
|
|
104
|
+
return {
|
|
105
|
+
id: s.id,
|
|
106
|
+
score: s.score,
|
|
107
|
+
resourceId: s.payload.resourceId,
|
|
108
|
+
annotationId: s.payload.annotationId,
|
|
109
|
+
text: s.payload.text,
|
|
110
|
+
entityTypes: s.payload.entityTypes
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
// src/store/factory.ts
|
|
116
|
+
var instance = null;
|
|
117
|
+
async function createVectorStore(config) {
|
|
118
|
+
if (instance) return instance;
|
|
119
|
+
if (config.type === "qdrant") {
|
|
120
|
+
const { QdrantVectorStore: QdrantVectorStore2 } = await import("./qdrant-JXH3K3RP.js");
|
|
121
|
+
instance = new QdrantVectorStore2({
|
|
122
|
+
host: config.host ?? "localhost",
|
|
123
|
+
port: config.port ?? 6333,
|
|
124
|
+
dimensions: config.dimensions
|
|
125
|
+
});
|
|
126
|
+
} else {
|
|
127
|
+
instance = new MemoryVectorStore();
|
|
128
|
+
}
|
|
129
|
+
await instance.connect();
|
|
130
|
+
return instance;
|
|
131
|
+
}
|
|
132
|
+
function getVectorStore() {
|
|
133
|
+
return instance;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// src/embedding/factory.ts
|
|
137
|
+
async function createEmbeddingProvider(config) {
|
|
138
|
+
if (config.type === "voyage") {
|
|
139
|
+
const { VoyageEmbeddingProvider: VoyageEmbeddingProvider2 } = await import("./voyage-UWOFVIYM.js");
|
|
140
|
+
if (!config.apiKey) throw new Error("apiKey is required for Voyage embedding provider");
|
|
141
|
+
return new VoyageEmbeddingProvider2({
|
|
142
|
+
apiKey: config.apiKey,
|
|
143
|
+
model: config.model,
|
|
144
|
+
endpoint: config.endpoint
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
if (config.type === "ollama") {
|
|
148
|
+
const { OllamaEmbeddingProvider: OllamaEmbeddingProvider2 } = await import("./ollama-H62PXJ5D.js");
|
|
149
|
+
return new OllamaEmbeddingProvider2({
|
|
150
|
+
model: config.model,
|
|
151
|
+
baseURL: config.baseURL
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
throw new Error(`Unknown embedding provider type: ${config.type}`);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// src/chunking.ts
|
|
158
|
+
var DEFAULT_CHUNKING_CONFIG = {
|
|
159
|
+
chunkSize: 512,
|
|
160
|
+
overlap: 64
|
|
161
|
+
};
|
|
162
|
+
function estimateTokens(text) {
|
|
163
|
+
return Math.ceil(text.length / 4);
|
|
164
|
+
}
|
|
165
|
+
function chunkText(text, config = DEFAULT_CHUNKING_CONFIG) {
|
|
166
|
+
const totalTokens = estimateTokens(text);
|
|
167
|
+
if (totalTokens <= config.chunkSize) {
|
|
168
|
+
return [text];
|
|
169
|
+
}
|
|
170
|
+
const chunkChars = config.chunkSize * 4;
|
|
171
|
+
const overlapChars = config.overlap * 4;
|
|
172
|
+
const chunks = [];
|
|
173
|
+
let start = 0;
|
|
174
|
+
while (start < text.length) {
|
|
175
|
+
let end = Math.min(start + chunkChars, text.length);
|
|
176
|
+
if (end < text.length) {
|
|
177
|
+
const paraBreak = text.lastIndexOf("\n\n", end);
|
|
178
|
+
if (paraBreak > start + chunkChars / 2) {
|
|
179
|
+
end = paraBreak;
|
|
180
|
+
} else {
|
|
181
|
+
const sentenceBreak = text.lastIndexOf(". ", end);
|
|
182
|
+
if (sentenceBreak > start + chunkChars / 2) {
|
|
183
|
+
end = sentenceBreak + 1;
|
|
184
|
+
} else {
|
|
185
|
+
const wordBreak = text.lastIndexOf(" ", end);
|
|
186
|
+
if (wordBreak > start + chunkChars / 2) {
|
|
187
|
+
end = wordBreak;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
chunks.push(text.slice(start, end).trim());
|
|
193
|
+
start = end - overlapChars;
|
|
194
|
+
if (start >= text.length) break;
|
|
195
|
+
}
|
|
196
|
+
return chunks.filter((c) => c.length > 0);
|
|
197
|
+
}
|
|
198
|
+
export {
|
|
199
|
+
DEFAULT_CHUNKING_CONFIG,
|
|
200
|
+
MemoryVectorStore,
|
|
201
|
+
OllamaEmbeddingProvider,
|
|
202
|
+
QdrantVectorStore,
|
|
203
|
+
VoyageEmbeddingProvider,
|
|
204
|
+
chunkText,
|
|
205
|
+
createEmbeddingProvider,
|
|
206
|
+
createVectorStore,
|
|
207
|
+
getVectorStore
|
|
208
|
+
};
|
|
209
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/store/memory.ts","../src/store/factory.ts","../src/embedding/factory.ts","../src/chunking.ts"],"sourcesContent":["/**\n * In-Memory VectorStore Implementation\n *\n * For testing and development without a running Qdrant instance.\n * Uses brute-force cosine similarity search.\n */\n\nimport type { ResourceId, AnnotationId } from '@semiont/core';\nimport type { VectorStore, EmbeddingChunk, AnnotationPayload, VectorSearchResult, SearchOptions } from './interface';\n\ninterface StoredPoint {\n id: string;\n vector: number[];\n payload: {\n resourceId: string;\n annotationId?: string;\n chunkIndex?: number;\n text: string;\n motivation?: string;\n entityTypes?: string[];\n };\n}\n\nfunction cosineSimilarity(a: number[], b: number[]): number {\n let dotProduct = 0;\n let normA = 0;\n let normB = 0;\n for (let i = 0; i < a.length; i++) {\n dotProduct += a[i] * b[i];\n normA += a[i] * a[i];\n normB += b[i] * b[i];\n }\n const denom = Math.sqrt(normA) * Math.sqrt(normB);\n return denom === 0 ? 0 : dotProduct / denom;\n}\n\nexport class MemoryVectorStore implements VectorStore {\n private resources: StoredPoint[] = [];\n private annotations: StoredPoint[] = [];\n private connected = false;\n\n async connect(): Promise<void> {\n this.connected = true;\n }\n\n async disconnect(): Promise<void> {\n this.connected = false;\n }\n\n isConnected(): boolean {\n return this.connected;\n }\n\n async upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void> {\n // Remove existing vectors for this resource\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n\n for (const chunk of chunks) {\n this.resources.push({\n id: `${resourceId}-${chunk.chunkIndex}`,\n vector: chunk.embedding,\n payload: {\n resourceId: String(resourceId),\n chunkIndex: chunk.chunkIndex,\n text: chunk.text,\n },\n });\n }\n }\n\n async upsertAnnotationVector(\n annotationId: AnnotationId,\n embedding: number[],\n payload: AnnotationPayload\n ): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n this.annotations.push({\n id: String(annotationId),\n vector: embedding,\n payload: {\n annotationId: String(payload.annotationId),\n resourceId: String(payload.resourceId),\n motivation: payload.motivation,\n entityTypes: payload.entityTypes,\n text: payload.exactText,\n },\n });\n }\n\n async deleteResourceVectors(resourceId: ResourceId): Promise<void> {\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n }\n\n async deleteAnnotationVector(annotationId: AnnotationId): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n }\n\n async searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.resources, embedding, opts);\n }\n\n async searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.annotations, embedding, opts);\n }\n\n private search(points: StoredPoint[], embedding: number[], opts: SearchOptions): VectorSearchResult[] {\n let filtered = points;\n\n if (opts.filter) {\n const f = opts.filter;\n filtered = points.filter(p => {\n if (f.resourceId && p.payload.resourceId !== String(f.resourceId)) return false;\n if (f.excludeResourceId && p.payload.resourceId === String(f.excludeResourceId)) return false;\n if (f.motivation && p.payload.motivation !== f.motivation) return false;\n if (f.entityTypes && f.entityTypes.length > 0) {\n const pTypes = p.payload.entityTypes ?? [];\n if (!f.entityTypes.some(t => pTypes.includes(t))) return false;\n }\n return true;\n });\n }\n\n const scored = filtered.map(p => ({\n ...p,\n score: cosineSimilarity(embedding, p.vector),\n }));\n\n scored.sort((a, b) => b.score - a.score);\n\n if (opts.scoreThreshold) {\n const threshold = opts.scoreThreshold;\n return scored\n .filter(s => s.score >= threshold)\n .slice(0, opts.limit)\n .map(s => this.toResult(s));\n }\n\n return scored.slice(0, opts.limit).map(s => this.toResult(s));\n }\n\n private toResult(s: StoredPoint & { score: number }): VectorSearchResult {\n return {\n id: s.id,\n score: s.score,\n resourceId: s.payload.resourceId as ResourceId,\n annotationId: s.payload.annotationId as AnnotationId | undefined,\n text: s.payload.text,\n entityTypes: s.payload.entityTypes,\n };\n }\n}\n","/**\n * VectorStore Factory\n *\n * Creates a VectorStore instance based on configuration.\n */\n\nimport type { VectorStore } from './interface';\nimport { MemoryVectorStore } from './memory';\n\nexport interface VectorStoreConfig {\n type: 'qdrant' | 'memory';\n host?: string;\n port?: number;\n dimensions: number;\n}\n\nlet instance: VectorStore | null = null;\n\nexport async function createVectorStore(config: VectorStoreConfig): Promise<VectorStore> {\n if (instance) return instance;\n\n if (config.type === 'qdrant') {\n const { QdrantVectorStore } = await import('./qdrant');\n instance = new QdrantVectorStore({\n host: config.host ?? 'localhost',\n port: config.port ?? 6333,\n dimensions: config.dimensions,\n });\n } else {\n instance = new MemoryVectorStore();\n }\n\n await instance.connect();\n return instance;\n}\n\nexport function getVectorStore(): VectorStore | null {\n return instance;\n}\n","/**\n * EmbeddingProvider Factory\n */\n\nimport type { EmbeddingProvider } from './interface';\n\nexport interface EmbeddingConfig {\n type: 'voyage' | 'ollama';\n model: string;\n apiKey?: string;\n baseURL?: string;\n endpoint?: string;\n}\n\nexport async function createEmbeddingProvider(config: EmbeddingConfig): Promise<EmbeddingProvider> {\n if (config.type === 'voyage') {\n const { VoyageEmbeddingProvider } = await import('./voyage');\n if (!config.apiKey) throw new Error('apiKey is required for Voyage embedding provider');\n return new VoyageEmbeddingProvider({\n apiKey: config.apiKey,\n model: config.model,\n endpoint: config.endpoint,\n });\n }\n\n if (config.type === 'ollama') {\n const { OllamaEmbeddingProvider } = await import('./ollama');\n return new OllamaEmbeddingProvider({\n model: config.model,\n baseURL: config.baseURL,\n });\n }\n\n throw new Error(`Unknown embedding provider type: ${config.type}`);\n}\n","/**\n * Text Chunking Utilities\n *\n * Splits long text into overlapping chunks for embedding.\n * Each chunk is a passage that fits within the embedding model's context window.\n */\n\nexport interface ChunkingConfig {\n chunkSize: number; // approximate tokens per chunk\n overlap: number; // tokens of overlap between adjacent chunks\n}\n\nexport const DEFAULT_CHUNKING_CONFIG: ChunkingConfig = {\n chunkSize: 512,\n overlap: 64,\n};\n\n/**\n * Rough token count estimate: ~4 characters per token for English text.\n */\nfunction estimateTokens(text: string): number {\n return Math.ceil(text.length / 4);\n}\n\n/**\n * Split text into overlapping chunks.\n *\n * Splits on paragraph boundaries when possible, falling back to sentence\n * boundaries, then word boundaries. Each chunk overlaps with the previous\n * by `overlap` tokens worth of text.\n */\nexport function chunkText(text: string, config: ChunkingConfig = DEFAULT_CHUNKING_CONFIG): string[] {\n const totalTokens = estimateTokens(text);\n if (totalTokens <= config.chunkSize) {\n return [text];\n }\n\n const chunkChars = config.chunkSize * 4;\n const overlapChars = config.overlap * 4;\n const chunks: string[] = [];\n let start = 0;\n\n while (start < text.length) {\n let end = Math.min(start + chunkChars, text.length);\n\n // Try to break at a paragraph boundary\n if (end < text.length) {\n const paraBreak = text.lastIndexOf('\\n\\n', end);\n if (paraBreak > start + chunkChars / 2) {\n end = paraBreak;\n } else {\n // Try sentence boundary\n const sentenceBreak = text.lastIndexOf('. ', end);\n if (sentenceBreak > start + chunkChars / 2) {\n end = sentenceBreak + 1;\n } else {\n // Try word boundary\n const wordBreak = text.lastIndexOf(' ', end);\n if (wordBreak > start + chunkChars / 2) {\n end = wordBreak;\n }\n }\n }\n }\n\n chunks.push(text.slice(start, end).trim());\n start = end - overlapChars;\n if (start >= text.length) break;\n }\n\n return chunks.filter(c => c.length > 0);\n}\n"],"mappings":";;;;;;;;;;;AAuBA,SAAS,iBAAiB,GAAa,GAAqB;AAC1D,MAAI,aAAa;AACjB,MAAI,QAAQ;AACZ,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,kBAAc,EAAE,CAAC,IAAI,EAAE,CAAC;AACxB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AACnB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AAAA,EACrB;AACA,QAAM,QAAQ,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK;AAChD,SAAO,UAAU,IAAI,IAAI,aAAa;AACxC;AAEO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,YAA2B,CAAC;AAAA,EAC5B,cAA6B,CAAC;AAAA,EAC9B,YAAY;AAAA,EAEpB,MAAM,UAAyB;AAC7B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,aAA4B;AAChC,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,cAAuB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,sBAAsB,YAAwB,QAAyC;AAE3F,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAEvF,eAAW,SAAS,QAAQ;AAC1B,WAAK,UAAU,KAAK;AAAA,QAClB,IAAI,GAAG,UAAU,IAAI,MAAM,UAAU;AAAA,QACrC,QAAQ,MAAM;AAAA,QACd,SAAS;AAAA,UACP,YAAY,OAAO,UAAU;AAAA,UAC7B,YAAY,MAAM;AAAA,UAClB,MAAM,MAAM;AAAA,QACd;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,uBACJ,cACA,WACA,SACe;AACf,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAC7E,SAAK,YAAY,KAAK;AAAA,MACpB,IAAI,OAAO,YAAY;AAAA,MACvB,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,cAAc,OAAO,QAAQ,YAAY;AAAA,QACzC,YAAY,OAAO,QAAQ,UAAU;AAAA,QACrC,YAAY,QAAQ;AAAA,QACpB,aAAa,QAAQ;AAAA,QACrB,MAAM,QAAQ;AAAA,MAChB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,sBAAsB,YAAuC;AACjE,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAAA,EACzF;AAAA,EAEA,MAAM,uBAAuB,cAA2C;AACtE,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAAA,EAC/E;AAAA,EAEA,MAAM,gBAAgB,WAAqB,MAAoD;AAC7F,WAAO,KAAK,OAAO,KAAK,WAAW,WAAW,IAAI;AAAA,EACpD;AAAA,EAEA,MAAM,kBAAkB,WAAqB,MAAoD;AAC/F,WAAO,KAAK,OAAO,KAAK,aAAa,WAAW,IAAI;AAAA,EACtD;AAAA,EAEQ,OAAO,QAAuB,WAAqB,MAA2C;AACpG,QAAI,WAAW;AAEf,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,KAAK;AACf,iBAAW,OAAO,OAAO,OAAK;AAC5B,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,OAAO,EAAE,UAAU,EAAG,QAAO;AAC1E,YAAI,EAAE,qBAAqB,EAAE,QAAQ,eAAe,OAAO,EAAE,iBAAiB,EAAG,QAAO;AACxF,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,EAAE,WAAY,QAAO;AAClE,YAAI,EAAE,eAAe,EAAE,YAAY,SAAS,GAAG;AAC7C,gBAAM,SAAS,EAAE,QAAQ,eAAe,CAAC;AACzC,cAAI,CAAC,EAAE,YAAY,KAAK,OAAK,OAAO,SAAS,CAAC,CAAC,EAAG,QAAO;AAAA,QAC3D;AACA,eAAO;AAAA,MACT,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,SAAS,IAAI,QAAM;AAAA,MAChC,GAAG;AAAA,MACH,OAAO,iBAAiB,WAAW,EAAE,MAAM;AAAA,IAC7C,EAAE;AAEF,WAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAEvC,QAAI,KAAK,gBAAgB;AACvB,YAAM,YAAY,KAAK;AACvB,aAAO,OACJ,OAAO,OAAK,EAAE,SAAS,SAAS,EAChC,MAAM,GAAG,KAAK,KAAK,EACnB,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,IAC9B;AAEA,WAAO,OAAO,MAAM,GAAG,KAAK,KAAK,EAAE,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,EAC9D;AAAA,EAEQ,SAAS,GAAwD;AACvE,WAAO;AAAA,MACL,IAAI,EAAE;AAAA,MACN,OAAO,EAAE;AAAA,MACT,YAAY,EAAE,QAAQ;AAAA,MACtB,cAAc,EAAE,QAAQ;AAAA,MACxB,MAAM,EAAE,QAAQ;AAAA,MAChB,aAAa,EAAE,QAAQ;AAAA,IACzB;AAAA,EACF;AACF;;;ACtIA,IAAI,WAA+B;AAEnC,eAAsB,kBAAkB,QAAiD;AACvF,MAAI,SAAU,QAAO;AAErB,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,mBAAAA,mBAAkB,IAAI,MAAM,OAAO,sBAAU;AACrD,eAAW,IAAIA,mBAAkB;AAAA,MAC/B,MAAM,OAAO,QAAQ;AAAA,MACrB,MAAM,OAAO,QAAQ;AAAA,MACrB,YAAY,OAAO;AAAA,IACrB,CAAC;AAAA,EACH,OAAO;AACL,eAAW,IAAI,kBAAkB;AAAA,EACnC;AAEA,QAAM,SAAS,QAAQ;AACvB,SAAO;AACT;AAEO,SAAS,iBAAqC;AACnD,SAAO;AACT;;;ACxBA,eAAsB,wBAAwB,QAAqD;AACjG,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,QAAI,CAAC,OAAO,OAAQ,OAAM,IAAI,MAAM,kDAAkD;AACtF,WAAO,IAAIA,yBAAwB;AAAA,MACjC,QAAQ,OAAO;AAAA,MACf,OAAO,OAAO;AAAA,MACd,UAAU,OAAO;AAAA,IACnB,CAAC;AAAA,EACH;AAEA,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,WAAO,IAAIA,yBAAwB;AAAA,MACjC,OAAO,OAAO;AAAA,MACd,SAAS,OAAO;AAAA,IAClB,CAAC;AAAA,EACH;AAEA,QAAM,IAAI,MAAM,oCAAoC,OAAO,IAAI,EAAE;AACnE;;;ACtBO,IAAM,0BAA0C;AAAA,EACrD,WAAW;AAAA,EACX,SAAS;AACX;AAKA,SAAS,eAAe,MAAsB;AAC5C,SAAO,KAAK,KAAK,KAAK,SAAS,CAAC;AAClC;AASO,SAAS,UAAU,MAAc,SAAyB,yBAAmC;AAClG,QAAM,cAAc,eAAe,IAAI;AACvC,MAAI,eAAe,OAAO,WAAW;AACnC,WAAO,CAAC,IAAI;AAAA,EACd;AAEA,QAAM,aAAa,OAAO,YAAY;AACtC,QAAM,eAAe,OAAO,UAAU;AACtC,QAAM,SAAmB,CAAC;AAC1B,MAAI,QAAQ;AAEZ,SAAO,QAAQ,KAAK,QAAQ;AAC1B,QAAI,MAAM,KAAK,IAAI,QAAQ,YAAY,KAAK,MAAM;AAGlD,QAAI,MAAM,KAAK,QAAQ;AACrB,YAAM,YAAY,KAAK,YAAY,QAAQ,GAAG;AAC9C,UAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,cAAM;AAAA,MACR,OAAO;AAEL,cAAM,gBAAgB,KAAK,YAAY,MAAM,GAAG;AAChD,YAAI,gBAAgB,QAAQ,aAAa,GAAG;AAC1C,gBAAM,gBAAgB;AAAA,QACxB,OAAO;AAEL,gBAAM,YAAY,KAAK,YAAY,KAAK,GAAG;AAC3C,cAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,KAAK,MAAM,OAAO,GAAG,EAAE,KAAK,CAAC;AACzC,YAAQ,MAAM;AACd,QAAI,SAAS,KAAK,OAAQ;AAAA,EAC5B;AAEA,SAAO,OAAO,OAAO,OAAK,EAAE,SAAS,CAAC;AACxC;","names":["QdrantVectorStore","VoyageEmbeddingProvider","OllamaEmbeddingProvider"]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@semiont/vectors",
|
|
3
|
+
"version": "0.4.12",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Vector storage, embedding, and semantic search for Semiont",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"default": "./dist/index.js"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"scripts": {
|
|
16
|
+
"build": "npm run typecheck && tsup",
|
|
17
|
+
"typecheck": "tsc --noEmit",
|
|
18
|
+
"clean": "rm -rf dist",
|
|
19
|
+
"test": "vitest run",
|
|
20
|
+
"test:watch": "vitest"
|
|
21
|
+
},
|
|
22
|
+
"dependencies": {
|
|
23
|
+
"@semiont/core": "*"
|
|
24
|
+
},
|
|
25
|
+
"peerDependencies": {
|
|
26
|
+
"@qdrant/js-client-rest": ">=1.0.0"
|
|
27
|
+
},
|
|
28
|
+
"peerDependenciesMeta": {
|
|
29
|
+
"@qdrant/js-client-rest": {
|
|
30
|
+
"optional": true
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"devDependencies": {
|
|
34
|
+
"@qdrant/js-client-rest": "^1.13.0",
|
|
35
|
+
"tsup": "^8.5.1",
|
|
36
|
+
"typescript": "^5.6.3"
|
|
37
|
+
},
|
|
38
|
+
"files": [
|
|
39
|
+
"dist",
|
|
40
|
+
"README.md",
|
|
41
|
+
"LICENSE"
|
|
42
|
+
],
|
|
43
|
+
"publishConfig": {
|
|
44
|
+
"access": "public"
|
|
45
|
+
},
|
|
46
|
+
"repository": {
|
|
47
|
+
"type": "git",
|
|
48
|
+
"url": "git+https://github.com/The-AI-Alliance/semiont.git",
|
|
49
|
+
"directory": "packages/vectors"
|
|
50
|
+
},
|
|
51
|
+
"license": "Apache-2.0"
|
|
52
|
+
}
|