@semiont/vectors 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -167,6 +167,7 @@ function estimateTokens(text) {
167
167
  return Math.ceil(text.length / 4);
168
168
  }
169
169
  function chunkText(text, config = DEFAULT_CHUNKING_CONFIG) {
170
+ if (text.length === 0) return [];
170
171
  const totalTokens = estimateTokens(text);
171
172
  if (totalTokens <= config.chunkSize) {
172
173
  return [text];
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/store/memory.ts","../src/store/factory.ts","../src/embedding/factory.ts","../src/chunking.ts"],"sourcesContent":["/**\n * In-Memory VectorStore Implementation\n *\n * For testing and development without a running Qdrant instance.\n * Uses brute-force cosine similarity search.\n */\n\nimport type { ResourceId, AnnotationId } from '@semiont/core';\nimport type { VectorStore, EmbeddingChunk, AnnotationPayload, VectorSearchResult, SearchOptions } from './interface';\n\ninterface StoredPoint {\n id: string;\n vector: number[];\n payload: {\n resourceId: string;\n annotationId?: string;\n chunkIndex?: number;\n text: string;\n motivation?: string;\n entityTypes?: string[];\n };\n}\n\nfunction cosineSimilarity(a: number[], b: number[]): number {\n let dotProduct = 0;\n let normA = 0;\n let normB = 0;\n for (let i = 0; i < a.length; i++) {\n dotProduct += a[i] * b[i];\n normA += a[i] * a[i];\n normB += b[i] * b[i];\n }\n const denom = Math.sqrt(normA) * Math.sqrt(normB);\n return denom === 0 ? 0 : dotProduct / denom;\n}\n\nexport class MemoryVectorStore implements VectorStore {\n private resources: StoredPoint[] = [];\n private annotations: StoredPoint[] = [];\n private connected = false;\n\n async connect(): Promise<void> {\n this.connected = true;\n }\n\n async disconnect(): Promise<void> {\n this.connected = false;\n }\n\n async clearAll(): Promise<void> {\n this.resources = [];\n this.annotations = [];\n }\n\n isConnected(): boolean {\n return this.connected;\n }\n\n async upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void> {\n // Remove existing vectors for this resource\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n\n for (const chunk of chunks) {\n this.resources.push({\n id: `${resourceId}-${chunk.chunkIndex}`,\n vector: chunk.embedding,\n payload: {\n resourceId: String(resourceId),\n chunkIndex: chunk.chunkIndex,\n text: chunk.text,\n },\n });\n }\n }\n\n async upsertAnnotationVector(\n annotationId: AnnotationId,\n embedding: number[],\n payload: AnnotationPayload\n ): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n this.annotations.push({\n id: String(annotationId),\n vector: embedding,\n payload: {\n annotationId: String(payload.annotationId),\n resourceId: String(payload.resourceId),\n motivation: payload.motivation,\n entityTypes: payload.entityTypes,\n text: payload.exactText,\n },\n });\n }\n\n async deleteResourceVectors(resourceId: ResourceId): Promise<void> {\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n }\n\n async deleteAnnotationVector(annotationId: AnnotationId): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n }\n\n async searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.resources, embedding, opts);\n }\n\n async searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.annotations, embedding, opts);\n }\n\n private search(points: StoredPoint[], embedding: number[], opts: SearchOptions): VectorSearchResult[] {\n let filtered = points;\n\n if (opts.filter) {\n const f = opts.filter;\n filtered = points.filter(p => {\n if (f.resourceId && p.payload.resourceId !== String(f.resourceId)) return false;\n if (f.excludeResourceId && p.payload.resourceId === String(f.excludeResourceId)) return false;\n if (f.motivation && p.payload.motivation !== f.motivation) return false;\n if (f.entityTypes && f.entityTypes.length > 0) {\n const pTypes = p.payload.entityTypes ?? [];\n if (!f.entityTypes.some(t => pTypes.includes(t))) return false;\n }\n return true;\n });\n }\n\n const scored = filtered.map(p => ({\n ...p,\n score: cosineSimilarity(embedding, p.vector),\n }));\n\n scored.sort((a, b) => b.score - a.score);\n\n if (opts.scoreThreshold) {\n const threshold = opts.scoreThreshold;\n return scored\n .filter(s => s.score >= threshold)\n .slice(0, opts.limit)\n .map(s => this.toResult(s));\n }\n\n return scored.slice(0, opts.limit).map(s => this.toResult(s));\n }\n\n private toResult(s: StoredPoint & { score: number }): VectorSearchResult {\n return {\n id: s.id,\n score: s.score,\n resourceId: s.payload.resourceId as ResourceId,\n annotationId: s.payload.annotationId as AnnotationId | undefined,\n text: s.payload.text,\n entityTypes: s.payload.entityTypes,\n };\n }\n}\n","/**\n * VectorStore Factory\n *\n * Creates a VectorStore instance based on configuration.\n */\n\nimport type { VectorStore } from './interface';\nimport { MemoryVectorStore } from './memory';\n\nexport interface VectorStoreConfig {\n type: 'qdrant' | 'memory';\n host?: string;\n port?: number;\n dimensions: number;\n}\n\nlet instance: VectorStore | null = null;\n\nexport async function createVectorStore(config: VectorStoreConfig): Promise<VectorStore> {\n if (instance) return instance;\n\n if (config.type === 'qdrant') {\n const { QdrantVectorStore } = await import('./qdrant');\n instance = new QdrantVectorStore({\n host: config.host ?? 'localhost',\n port: config.port ?? 6333,\n dimensions: config.dimensions,\n });\n } else {\n instance = new MemoryVectorStore();\n }\n\n await instance.connect();\n return instance;\n}\n\nexport function getVectorStore(): VectorStore | null {\n return instance;\n}\n","/**\n * EmbeddingProvider Factory\n */\n\nimport type { EmbeddingProvider } from './interface';\n\nexport interface EmbeddingConfig {\n type: 'voyage' | 'ollama';\n model: string;\n apiKey?: string;\n baseURL?: string;\n endpoint?: string;\n}\n\nexport async function createEmbeddingProvider(config: EmbeddingConfig): Promise<EmbeddingProvider> {\n if (config.type === 'voyage') {\n const { VoyageEmbeddingProvider } = await import('./voyage');\n if (!config.apiKey) throw new Error('apiKey is required for Voyage embedding provider');\n return new VoyageEmbeddingProvider({\n apiKey: config.apiKey,\n model: config.model,\n endpoint: config.endpoint,\n });\n }\n\n if (config.type === 'ollama') {\n const { OllamaEmbeddingProvider } = await import('./ollama');\n return new OllamaEmbeddingProvider({\n model: config.model,\n baseURL: config.baseURL,\n });\n }\n\n throw new Error(`Unknown embedding provider type: ${config.type}`);\n}\n","/**\n * Text Chunking Utilities\n *\n * Splits long text into overlapping chunks for embedding.\n * Each chunk is a passage that fits within the embedding model's context window.\n */\n\nexport interface ChunkingConfig {\n chunkSize: number; // approximate tokens per chunk\n overlap: number; // tokens of overlap between adjacent chunks\n}\n\nexport const DEFAULT_CHUNKING_CONFIG: ChunkingConfig = {\n chunkSize: 512,\n overlap: 64,\n};\n\n/**\n * Rough token count estimate: ~4 characters per token for English text.\n */\nfunction estimateTokens(text: string): number {\n return Math.ceil(text.length / 4);\n}\n\n/**\n * Split text into overlapping chunks.\n *\n * Splits on paragraph boundaries when possible, falling back to sentence\n * boundaries, then word boundaries. Each chunk overlaps with the previous\n * by `overlap` tokens worth of text.\n */\nexport function chunkText(text: string, config: ChunkingConfig = DEFAULT_CHUNKING_CONFIG): string[] {\n const totalTokens = estimateTokens(text);\n if (totalTokens <= config.chunkSize) {\n return [text];\n }\n\n const chunkChars = config.chunkSize * 4;\n const overlapChars = config.overlap * 4;\n const chunks: string[] = [];\n let start = 0;\n\n while (start < text.length) {\n let end = Math.min(start + chunkChars, text.length);\n\n // Try to break at a paragraph boundary\n if (end < text.length) {\n const paraBreak = text.lastIndexOf('\\n\\n', end);\n if (paraBreak > start + chunkChars / 2) {\n end = paraBreak;\n } else {\n // Try sentence boundary\n const sentenceBreak = text.lastIndexOf('. ', end);\n if (sentenceBreak > start + chunkChars / 2) {\n end = sentenceBreak + 1;\n } else {\n // Try word boundary\n const wordBreak = text.lastIndexOf(' ', end);\n if (wordBreak > start + chunkChars / 2) {\n end = wordBreak;\n }\n }\n }\n }\n\n chunks.push(text.slice(start, end).trim());\n const nextStart = end - overlapChars;\n start = nextStart > start ? nextStart : end;\n if (start >= text.length) break;\n }\n\n return chunks.filter(c => c.length > 0);\n}\n"],"mappings":";;;;;;;;;;;AAuBA,SAAS,iBAAiB,GAAa,GAAqB;AAC1D,MAAI,aAAa;AACjB,MAAI,QAAQ;AACZ,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,kBAAc,EAAE,CAAC,IAAI,EAAE,CAAC;AACxB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AACnB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AAAA,EACrB;AACA,QAAM,QAAQ,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK;AAChD,SAAO,UAAU,IAAI,IAAI,aAAa;AACxC;AAEO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,YAA2B,CAAC;AAAA,EAC5B,cAA6B,CAAC;AAAA,EAC9B,YAAY;AAAA,EAEpB,MAAM,UAAyB;AAC7B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,aAA4B;AAChC,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,WAA0B;AAC9B,SAAK,YAAY,CAAC;AAClB,SAAK,cAAc,CAAC;AAAA,EACtB;AAAA,EAEA,cAAuB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,sBAAsB,YAAwB,QAAyC;AAE3F,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAEvF,eAAW,SAAS,QAAQ;AAC1B,WAAK,UAAU,KAAK;AAAA,QAClB,IAAI,GAAG,UAAU,IAAI,MAAM,UAAU;AAAA,QACrC,QAAQ,MAAM;AAAA,QACd,SAAS;AAAA,UACP,YAAY,OAAO,UAAU;AAAA,UAC7B,YAAY,MAAM;AAAA,UAClB,MAAM,MAAM;AAAA,QACd;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,uBACJ,cACA,WACA,SACe;AACf,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAC7E,SAAK,YAAY,KAAK;AAAA,MACpB,IAAI,OAAO,YAAY;AAAA,MACvB,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,cAAc,OAAO,QAAQ,YAAY;AAAA,QACzC,YAAY,OAAO,QAAQ,UAAU;AAAA,QACrC,YAAY,QAAQ;AAAA,QACpB,aAAa,QAAQ;AAAA,QACrB,MAAM,QAAQ;AAAA,MAChB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,sBAAsB,YAAuC;AACjE,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAAA,EACzF;AAAA,EAEA,MAAM,uBAAuB,cAA2C;AACtE,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAAA,EAC/E;AAAA,EAEA,MAAM,gBAAgB,WAAqB,MAAoD;AAC7F,WAAO,KAAK,OAAO,KAAK,WAAW,WAAW,IAAI;AAAA,EACpD;AAAA,EAEA,MAAM,kBAAkB,WAAqB,MAAoD;AAC/F,WAAO,KAAK,OAAO,KAAK,aAAa,WAAW,IAAI;AAAA,EACtD;AAAA,EAEQ,OAAO,QAAuB,WAAqB,MAA2C;AACpG,QAAI,WAAW;AAEf,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,KAAK;AACf,iBAAW,OAAO,OAAO,OAAK;AAC5B,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,OAAO,EAAE,UAAU,EAAG,QAAO;AAC1E,YAAI,EAAE,qBAAqB,EAAE,QAAQ,eAAe,OAAO,EAAE,iBAAiB,EAAG,QAAO;AACxF,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,EAAE,WAAY,QAAO;AAClE,YAAI,EAAE,eAAe,EAAE,YAAY,SAAS,GAAG;AAC7C,gBAAM,SAAS,EAAE,QAAQ,eAAe,CAAC;AACzC,cAAI,CAAC,EAAE,YAAY,KAAK,OAAK,OAAO,SAAS,CAAC,CAAC,EAAG,QAAO;AAAA,QAC3D;AACA,eAAO;AAAA,MACT,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,SAAS,IAAI,QAAM;AAAA,MAChC,GAAG;AAAA,MACH,OAAO,iBAAiB,WAAW,EAAE,MAAM;AAAA,IAC7C,EAAE;AAEF,WAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAEvC,QAAI,KAAK,gBAAgB;AACvB,YAAM,YAAY,KAAK;AACvB,aAAO,OACJ,OAAO,OAAK,EAAE,SAAS,SAAS,EAChC,MAAM,GAAG,KAAK,KAAK,EACnB,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,IAC9B;AAEA,WAAO,OAAO,MAAM,GAAG,KAAK,KAAK,EAAE,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,EAC9D;AAAA,EAEQ,SAAS,GAAwD;AACvE,WAAO;AAAA,MACL,IAAI,EAAE;AAAA,MACN,OAAO,EAAE;AAAA,MACT,YAAY,EAAE,QAAQ;AAAA,MACtB,cAAc,EAAE,QAAQ;AAAA,MACxB,MAAM,EAAE,QAAQ;AAAA,MAChB,aAAa,EAAE,QAAQ;AAAA,IACzB;AAAA,EACF;AACF;;;AC3IA,IAAI,WAA+B;AAEnC,eAAsB,kBAAkB,QAAiD;AACvF,MAAI,SAAU,QAAO;AAErB,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,mBAAAA,mBAAkB,IAAI,MAAM,OAAO,sBAAU;AACrD,eAAW,IAAIA,mBAAkB;AAAA,MAC/B,MAAM,OAAO,QAAQ;AAAA,MACrB,MAAM,OAAO,QAAQ;AAAA,MACrB,YAAY,OAAO;AAAA,IACrB,CAAC;AAAA,EACH,OAAO;AACL,eAAW,IAAI,kBAAkB;AAAA,EACnC;AAEA,QAAM,SAAS,QAAQ;AACvB,SAAO;AACT;AAEO,SAAS,iBAAqC;AACnD,SAAO;AACT;;;ACxBA,eAAsB,wBAAwB,QAAqD;AACjG,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,QAAI,CAAC,OAAO,OAAQ,OAAM,IAAI,MAAM,kDAAkD;AACtF,WAAO,IAAIA,yBAAwB;AAAA,MACjC,QAAQ,OAAO;AAAA,MACf,OAAO,OAAO;AAAA,MACd,UAAU,OAAO;AAAA,IACnB,CAAC;AAAA,EACH;AAEA,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,WAAO,IAAIA,yBAAwB;AAAA,MACjC,OAAO,OAAO;AAAA,MACd,SAAS,OAAO;AAAA,IAClB,CAAC;AAAA,EACH;AAEA,QAAM,IAAI,MAAM,oCAAoC,OAAO,IAAI,EAAE;AACnE;;;ACtBO,IAAM,0BAA0C;AAAA,EACrD,WAAW;AAAA,EACX,SAAS;AACX;AAKA,SAAS,eAAe,MAAsB;AAC5C,SAAO,KAAK,KAAK,KAAK,SAAS,CAAC;AAClC;AASO,SAAS,UAAU,MAAc,SAAyB,yBAAmC;AAClG,QAAM,cAAc,eAAe,IAAI;AACvC,MAAI,eAAe,OAAO,WAAW;AACnC,WAAO,CAAC,IAAI;AAAA,EACd;AAEA,QAAM,aAAa,OAAO,YAAY;AACtC,QAAM,eAAe,OAAO,UAAU;AACtC,QAAM,SAAmB,CAAC;AAC1B,MAAI,QAAQ;AAEZ,SAAO,QAAQ,KAAK,QAAQ;AAC1B,QAAI,MAAM,KAAK,IAAI,QAAQ,YAAY,KAAK,MAAM;AAGlD,QAAI,MAAM,KAAK,QAAQ;AACrB,YAAM,YAAY,KAAK,YAAY,QAAQ,GAAG;AAC9C,UAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,cAAM;AAAA,MACR,OAAO;AAEL,cAAM,gBAAgB,KAAK,YAAY,MAAM,GAAG;AAChD,YAAI,gBAAgB,QAAQ,aAAa,GAAG;AAC1C,gBAAM,gBAAgB;AAAA,QACxB,OAAO;AAEL,gBAAM,YAAY,KAAK,YAAY,KAAK,GAAG;AAC3C,cAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,KAAK,MAAM,OAAO,GAAG,EAAE,KAAK,CAAC;AACzC,UAAM,YAAY,MAAM;AACxB,YAAQ,YAAY,QAAQ,YAAY;AACxC,QAAI,SAAS,KAAK,OAAQ;AAAA,EAC5B;AAEA,SAAO,OAAO,OAAO,OAAK,EAAE,SAAS,CAAC;AACxC;","names":["QdrantVectorStore","VoyageEmbeddingProvider","OllamaEmbeddingProvider"]}
1
+ {"version":3,"sources":["../src/store/memory.ts","../src/store/factory.ts","../src/embedding/factory.ts","../src/chunking.ts"],"sourcesContent":["/**\n * In-Memory VectorStore Implementation\n *\n * For testing and development without a running Qdrant instance.\n * Uses brute-force cosine similarity search.\n */\n\nimport type { ResourceId, AnnotationId } from '@semiont/core';\nimport type { VectorStore, EmbeddingChunk, AnnotationPayload, VectorSearchResult, SearchOptions } from './interface';\n\ninterface StoredPoint {\n id: string;\n vector: number[];\n payload: {\n resourceId: string;\n annotationId?: string;\n chunkIndex?: number;\n text: string;\n motivation?: string;\n entityTypes?: string[];\n };\n}\n\nfunction cosineSimilarity(a: number[], b: number[]): number {\n let dotProduct = 0;\n let normA = 0;\n let normB = 0;\n for (let i = 0; i < a.length; i++) {\n dotProduct += a[i] * b[i];\n normA += a[i] * a[i];\n normB += b[i] * b[i];\n }\n const denom = Math.sqrt(normA) * Math.sqrt(normB);\n return denom === 0 ? 0 : dotProduct / denom;\n}\n\nexport class MemoryVectorStore implements VectorStore {\n private resources: StoredPoint[] = [];\n private annotations: StoredPoint[] = [];\n private connected = false;\n\n async connect(): Promise<void> {\n this.connected = true;\n }\n\n async disconnect(): Promise<void> {\n this.connected = false;\n }\n\n async clearAll(): Promise<void> {\n this.resources = [];\n this.annotations = [];\n }\n\n isConnected(): boolean {\n return this.connected;\n }\n\n async upsertResourceVectors(resourceId: ResourceId, chunks: EmbeddingChunk[]): Promise<void> {\n // Remove existing vectors for this resource\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n\n for (const chunk of chunks) {\n this.resources.push({\n id: `${resourceId}-${chunk.chunkIndex}`,\n vector: chunk.embedding,\n payload: {\n resourceId: String(resourceId),\n chunkIndex: chunk.chunkIndex,\n text: chunk.text,\n },\n });\n }\n }\n\n async upsertAnnotationVector(\n annotationId: AnnotationId,\n embedding: number[],\n payload: AnnotationPayload\n ): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n this.annotations.push({\n id: String(annotationId),\n vector: embedding,\n payload: {\n annotationId: String(payload.annotationId),\n resourceId: String(payload.resourceId),\n motivation: payload.motivation,\n entityTypes: payload.entityTypes,\n text: payload.exactText,\n },\n });\n }\n\n async deleteResourceVectors(resourceId: ResourceId): Promise<void> {\n this.resources = this.resources.filter(p => p.payload.resourceId !== String(resourceId));\n }\n\n async deleteAnnotationVector(annotationId: AnnotationId): Promise<void> {\n this.annotations = this.annotations.filter(p => p.id !== String(annotationId));\n }\n\n async searchResources(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.resources, embedding, opts);\n }\n\n async searchAnnotations(embedding: number[], opts: SearchOptions): Promise<VectorSearchResult[]> {\n return this.search(this.annotations, embedding, opts);\n }\n\n private search(points: StoredPoint[], embedding: number[], opts: SearchOptions): VectorSearchResult[] {\n let filtered = points;\n\n if (opts.filter) {\n const f = opts.filter;\n filtered = points.filter(p => {\n if (f.resourceId && p.payload.resourceId !== String(f.resourceId)) return false;\n if (f.excludeResourceId && p.payload.resourceId === String(f.excludeResourceId)) return false;\n if (f.motivation && p.payload.motivation !== f.motivation) return false;\n if (f.entityTypes && f.entityTypes.length > 0) {\n const pTypes = p.payload.entityTypes ?? [];\n if (!f.entityTypes.some(t => pTypes.includes(t))) return false;\n }\n return true;\n });\n }\n\n const scored = filtered.map(p => ({\n ...p,\n score: cosineSimilarity(embedding, p.vector),\n }));\n\n scored.sort((a, b) => b.score - a.score);\n\n if (opts.scoreThreshold) {\n const threshold = opts.scoreThreshold;\n return scored\n .filter(s => s.score >= threshold)\n .slice(0, opts.limit)\n .map(s => this.toResult(s));\n }\n\n return scored.slice(0, opts.limit).map(s => this.toResult(s));\n }\n\n private toResult(s: StoredPoint & { score: number }): VectorSearchResult {\n return {\n id: s.id,\n score: s.score,\n resourceId: s.payload.resourceId as ResourceId,\n annotationId: s.payload.annotationId as AnnotationId | undefined,\n text: s.payload.text,\n entityTypes: s.payload.entityTypes,\n };\n }\n}\n","/**\n * VectorStore Factory\n *\n * Creates a VectorStore instance based on configuration.\n */\n\nimport type { VectorStore } from './interface';\nimport { MemoryVectorStore } from './memory';\n\nexport interface VectorStoreConfig {\n type: 'qdrant' | 'memory';\n host?: string;\n port?: number;\n dimensions: number;\n}\n\nlet instance: VectorStore | null = null;\n\nexport async function createVectorStore(config: VectorStoreConfig): Promise<VectorStore> {\n if (instance) return instance;\n\n if (config.type === 'qdrant') {\n const { QdrantVectorStore } = await import('./qdrant');\n instance = new QdrantVectorStore({\n host: config.host ?? 'localhost',\n port: config.port ?? 6333,\n dimensions: config.dimensions,\n });\n } else {\n instance = new MemoryVectorStore();\n }\n\n await instance.connect();\n return instance;\n}\n\nexport function getVectorStore(): VectorStore | null {\n return instance;\n}\n","/**\n * EmbeddingProvider Factory\n */\n\nimport type { EmbeddingProvider } from './interface';\n\nexport interface EmbeddingConfig {\n type: 'voyage' | 'ollama';\n model: string;\n apiKey?: string;\n baseURL?: string;\n endpoint?: string;\n}\n\nexport async function createEmbeddingProvider(config: EmbeddingConfig): Promise<EmbeddingProvider> {\n if (config.type === 'voyage') {\n const { VoyageEmbeddingProvider } = await import('./voyage');\n if (!config.apiKey) throw new Error('apiKey is required for Voyage embedding provider');\n return new VoyageEmbeddingProvider({\n apiKey: config.apiKey,\n model: config.model,\n endpoint: config.endpoint,\n });\n }\n\n if (config.type === 'ollama') {\n const { OllamaEmbeddingProvider } = await import('./ollama');\n return new OllamaEmbeddingProvider({\n model: config.model,\n baseURL: config.baseURL,\n });\n }\n\n throw new Error(`Unknown embedding provider type: ${config.type}`);\n}\n","/**\n * Text Chunking Utilities\n *\n * Splits long text into overlapping chunks for embedding.\n * Each chunk is a passage that fits within the embedding model's context window.\n */\n\nexport interface ChunkingConfig {\n chunkSize: number; // approximate tokens per chunk\n overlap: number; // tokens of overlap between adjacent chunks\n}\n\nexport const DEFAULT_CHUNKING_CONFIG: ChunkingConfig = {\n chunkSize: 512,\n overlap: 64,\n};\n\n/**\n * Rough token count estimate: ~4 characters per token for English text.\n */\nfunction estimateTokens(text: string): number {\n return Math.ceil(text.length / 4);\n}\n\n/**\n * Split text into overlapping chunks.\n *\n * Splits on paragraph boundaries when possible, falling back to sentence\n * boundaries, then word boundaries. Each chunk overlaps with the previous\n * by `overlap` tokens worth of text.\n */\nexport function chunkText(text: string, config: ChunkingConfig = DEFAULT_CHUNKING_CONFIG): string[] {\n if (text.length === 0) return [];\n const totalTokens = estimateTokens(text);\n if (totalTokens <= config.chunkSize) {\n return [text];\n }\n\n const chunkChars = config.chunkSize * 4;\n const overlapChars = config.overlap * 4;\n const chunks: string[] = [];\n let start = 0;\n\n while (start < text.length) {\n let end = Math.min(start + chunkChars, text.length);\n\n // Try to break at a paragraph boundary\n if (end < text.length) {\n const paraBreak = text.lastIndexOf('\\n\\n', end);\n if (paraBreak > start + chunkChars / 2) {\n end = paraBreak;\n } else {\n // Try sentence boundary\n const sentenceBreak = text.lastIndexOf('. ', end);\n if (sentenceBreak > start + chunkChars / 2) {\n end = sentenceBreak + 1;\n } else {\n // Try word boundary\n const wordBreak = text.lastIndexOf(' ', end);\n if (wordBreak > start + chunkChars / 2) {\n end = wordBreak;\n }\n }\n }\n }\n\n chunks.push(text.slice(start, end).trim());\n const nextStart = end - overlapChars;\n start = nextStart > start ? nextStart : end;\n if (start >= text.length) break;\n }\n\n return chunks.filter(c => c.length > 0);\n}\n"],"mappings":";;;;;;;;;;;AAuBA,SAAS,iBAAiB,GAAa,GAAqB;AAC1D,MAAI,aAAa;AACjB,MAAI,QAAQ;AACZ,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,kBAAc,EAAE,CAAC,IAAI,EAAE,CAAC;AACxB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AACnB,aAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AAAA,EACrB;AACA,QAAM,QAAQ,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK;AAChD,SAAO,UAAU,IAAI,IAAI,aAAa;AACxC;AAEO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,YAA2B,CAAC;AAAA,EAC5B,cAA6B,CAAC;AAAA,EAC9B,YAAY;AAAA,EAEpB,MAAM,UAAyB;AAC7B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,aAA4B;AAChC,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,WAA0B;AAC9B,SAAK,YAAY,CAAC;AAClB,SAAK,cAAc,CAAC;AAAA,EACtB;AAAA,EAEA,cAAuB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,sBAAsB,YAAwB,QAAyC;AAE3F,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAEvF,eAAW,SAAS,QAAQ;AAC1B,WAAK,UAAU,KAAK;AAAA,QAClB,IAAI,GAAG,UAAU,IAAI,MAAM,UAAU;AAAA,QACrC,QAAQ,MAAM;AAAA,QACd,SAAS;AAAA,UACP,YAAY,OAAO,UAAU;AAAA,UAC7B,YAAY,MAAM;AAAA,UAClB,MAAM,MAAM;AAAA,QACd;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,uBACJ,cACA,WACA,SACe;AACf,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAC7E,SAAK,YAAY,KAAK;AAAA,MACpB,IAAI,OAAO,YAAY;AAAA,MACvB,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,cAAc,OAAO,QAAQ,YAAY;AAAA,QACzC,YAAY,OAAO,QAAQ,UAAU;AAAA,QACrC,YAAY,QAAQ;AAAA,QACpB,aAAa,QAAQ;AAAA,QACrB,MAAM,QAAQ;AAAA,MAChB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,sBAAsB,YAAuC;AACjE,SAAK,YAAY,KAAK,UAAU,OAAO,OAAK,EAAE,QAAQ,eAAe,OAAO,UAAU,CAAC;AAAA,EACzF;AAAA,EAEA,MAAM,uBAAuB,cAA2C;AACtE,SAAK,cAAc,KAAK,YAAY,OAAO,OAAK,EAAE,OAAO,OAAO,YAAY,CAAC;AAAA,EAC/E;AAAA,EAEA,MAAM,gBAAgB,WAAqB,MAAoD;AAC7F,WAAO,KAAK,OAAO,KAAK,WAAW,WAAW,IAAI;AAAA,EACpD;AAAA,EAEA,MAAM,kBAAkB,WAAqB,MAAoD;AAC/F,WAAO,KAAK,OAAO,KAAK,aAAa,WAAW,IAAI;AAAA,EACtD;AAAA,EAEQ,OAAO,QAAuB,WAAqB,MAA2C;AACpG,QAAI,WAAW;AAEf,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,KAAK;AACf,iBAAW,OAAO,OAAO,OAAK;AAC5B,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,OAAO,EAAE,UAAU,EAAG,QAAO;AAC1E,YAAI,EAAE,qBAAqB,EAAE,QAAQ,eAAe,OAAO,EAAE,iBAAiB,EAAG,QAAO;AACxF,YAAI,EAAE,cAAc,EAAE,QAAQ,eAAe,EAAE,WAAY,QAAO;AAClE,YAAI,EAAE,eAAe,EAAE,YAAY,SAAS,GAAG;AAC7C,gBAAM,SAAS,EAAE,QAAQ,eAAe,CAAC;AACzC,cAAI,CAAC,EAAE,YAAY,KAAK,OAAK,OAAO,SAAS,CAAC,CAAC,EAAG,QAAO;AAAA,QAC3D;AACA,eAAO;AAAA,MACT,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,SAAS,IAAI,QAAM;AAAA,MAChC,GAAG;AAAA,MACH,OAAO,iBAAiB,WAAW,EAAE,MAAM;AAAA,IAC7C,EAAE;AAEF,WAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAEvC,QAAI,KAAK,gBAAgB;AACvB,YAAM,YAAY,KAAK;AACvB,aAAO,OACJ,OAAO,OAAK,EAAE,SAAS,SAAS,EAChC,MAAM,GAAG,KAAK,KAAK,EACnB,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,IAC9B;AAEA,WAAO,OAAO,MAAM,GAAG,KAAK,KAAK,EAAE,IAAI,OAAK,KAAK,SAAS,CAAC,CAAC;AAAA,EAC9D;AAAA,EAEQ,SAAS,GAAwD;AACvE,WAAO;AAAA,MACL,IAAI,EAAE;AAAA,MACN,OAAO,EAAE;AAAA,MACT,YAAY,EAAE,QAAQ;AAAA,MACtB,cAAc,EAAE,QAAQ;AAAA,MACxB,MAAM,EAAE,QAAQ;AAAA,MAChB,aAAa,EAAE,QAAQ;AAAA,IACzB;AAAA,EACF;AACF;;;AC3IA,IAAI,WAA+B;AAEnC,eAAsB,kBAAkB,QAAiD;AACvF,MAAI,SAAU,QAAO;AAErB,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,mBAAAA,mBAAkB,IAAI,MAAM,OAAO,sBAAU;AACrD,eAAW,IAAIA,mBAAkB;AAAA,MAC/B,MAAM,OAAO,QAAQ;AAAA,MACrB,MAAM,OAAO,QAAQ;AAAA,MACrB,YAAY,OAAO;AAAA,IACrB,CAAC;AAAA,EACH,OAAO;AACL,eAAW,IAAI,kBAAkB;AAAA,EACnC;AAEA,QAAM,SAAS,QAAQ;AACvB,SAAO;AACT;AAEO,SAAS,iBAAqC;AACnD,SAAO;AACT;;;ACxBA,eAAsB,wBAAwB,QAAqD;AACjG,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,QAAI,CAAC,OAAO,OAAQ,OAAM,IAAI,MAAM,kDAAkD;AACtF,WAAO,IAAIA,yBAAwB;AAAA,MACjC,QAAQ,OAAO;AAAA,MACf,OAAO,OAAO;AAAA,MACd,UAAU,OAAO;AAAA,IACnB,CAAC;AAAA,EACH;AAEA,MAAI,OAAO,SAAS,UAAU;AAC5B,UAAM,EAAE,yBAAAC,yBAAwB,IAAI,MAAM,OAAO,sBAAU;AAC3D,WAAO,IAAIA,yBAAwB;AAAA,MACjC,OAAO,OAAO;AAAA,MACd,SAAS,OAAO;AAAA,IAClB,CAAC;AAAA,EACH;AAEA,QAAM,IAAI,MAAM,oCAAoC,OAAO,IAAI,EAAE;AACnE;;;ACtBO,IAAM,0BAA0C;AAAA,EACrD,WAAW;AAAA,EACX,SAAS;AACX;AAKA,SAAS,eAAe,MAAsB;AAC5C,SAAO,KAAK,KAAK,KAAK,SAAS,CAAC;AAClC;AASO,SAAS,UAAU,MAAc,SAAyB,yBAAmC;AAClG,MAAI,KAAK,WAAW,EAAG,QAAO,CAAC;AAC/B,QAAM,cAAc,eAAe,IAAI;AACvC,MAAI,eAAe,OAAO,WAAW;AACnC,WAAO,CAAC,IAAI;AAAA,EACd;AAEA,QAAM,aAAa,OAAO,YAAY;AACtC,QAAM,eAAe,OAAO,UAAU;AACtC,QAAM,SAAmB,CAAC;AAC1B,MAAI,QAAQ;AAEZ,SAAO,QAAQ,KAAK,QAAQ;AAC1B,QAAI,MAAM,KAAK,IAAI,QAAQ,YAAY,KAAK,MAAM;AAGlD,QAAI,MAAM,KAAK,QAAQ;AACrB,YAAM,YAAY,KAAK,YAAY,QAAQ,GAAG;AAC9C,UAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,cAAM;AAAA,MACR,OAAO;AAEL,cAAM,gBAAgB,KAAK,YAAY,MAAM,GAAG;AAChD,YAAI,gBAAgB,QAAQ,aAAa,GAAG;AAC1C,gBAAM,gBAAgB;AAAA,QACxB,OAAO;AAEL,gBAAM,YAAY,KAAK,YAAY,KAAK,GAAG;AAC3C,cAAI,YAAY,QAAQ,aAAa,GAAG;AACtC,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,KAAK,MAAM,OAAO,GAAG,EAAE,KAAK,CAAC;AACzC,UAAM,YAAY,MAAM;AACxB,YAAQ,YAAY,QAAQ,YAAY;AACxC,QAAI,SAAS,KAAK,OAAQ;AAAA,EAC5B;AAEA,SAAO,OAAO,OAAO,OAAK,EAAE,SAAS,CAAC;AACxC;","names":["QdrantVectorStore","VoyageEmbeddingProvider","OllamaEmbeddingProvider"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@semiont/vectors",
3
- "version": "0.5.1",
3
+ "version": "0.5.3",
4
4
  "type": "module",
5
5
  "description": "Vector storage, embedding, and semantic search for Semiont",
6
6
  "main": "dist/index.js",