retriv 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +282 -0
- package/dist/_chunks/split-text.mjs +85 -0
- package/dist/db/cloudflare.d.mts +36 -0
- package/dist/db/cloudflare.mjs +55 -0
- package/dist/db/libsql.d.mts +30 -0
- package/dist/db/libsql.mjs +87 -0
- package/dist/db/pgvector.d.mts +30 -0
- package/dist/db/pgvector.mjs +80 -0
- package/dist/db/sqlite-fts.d.mts +23 -0
- package/dist/db/sqlite-fts.mjs +68 -0
- package/dist/db/sqlite-vec.d.mts +27 -0
- package/dist/db/sqlite-vec.mjs +108 -0
- package/dist/db/upstash.d.mts +28 -0
- package/dist/db/upstash.mjs +56 -0
- package/dist/embeddings/cohere.d.mts +28 -0
- package/dist/embeddings/cohere.mjs +39 -0
- package/dist/embeddings/google.d.mts +28 -0
- package/dist/embeddings/google.mjs +39 -0
- package/dist/embeddings/mistral.d.mts +28 -0
- package/dist/embeddings/mistral.mjs +39 -0
- package/dist/embeddings/ollama.d.mts +26 -0
- package/dist/embeddings/ollama.mjs +37 -0
- package/dist/embeddings/openai.d.mts +28 -0
- package/dist/embeddings/openai.mjs +39 -0
- package/dist/embeddings/resolve.d.mts +10 -0
- package/dist/embeddings/resolve.mjs +4 -0
- package/dist/embeddings/transformers.d.mts +24 -0
- package/dist/embeddings/transformers.mjs +26 -0
- package/dist/index.d.mts +3 -0
- package/dist/index.mjs +2 -0
- package/dist/retriv.d.mts +9 -0
- package/dist/retriv.mjs +112 -0
- package/dist/types.d.mts +203 -0
- package/dist/types.mjs +1 -0
- package/dist/utils/split-text.d.mts +23 -0
- package/dist/utils/split-text.mjs +2 -0
- package/package.json +167 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/openai.d.ts
|
|
4
|
+
interface OpenAIEmbeddingOptions {
|
|
5
|
+
/** Model name (default: text-embedding-3-small) */
|
|
6
|
+
model?: string;
|
|
7
|
+
/** API key (falls back to OPENAI_API_KEY env) */
|
|
8
|
+
apiKey?: string;
|
|
9
|
+
/** Base URL override */
|
|
10
|
+
baseUrl?: string;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* OpenAI embedding provider
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* import { openai } from 'retriv/embeddings/openai'
|
|
18
|
+
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
19
|
+
*
|
|
20
|
+
* const db = await sqliteVec({
|
|
21
|
+
* path: 'vectors.db',
|
|
22
|
+
* embeddings: openai({ model: 'text-embedding-3-small' }),
|
|
23
|
+
* })
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
declare function openai(options?: OpenAIEmbeddingOptions): EmbeddingConfig;
|
|
27
|
+
//#endregion
|
|
28
|
+
export { OpenAIEmbeddingOptions, openai };
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
|
+
import { embed, embedMany } from "ai";
|
|
3
|
+
function openai(options = {}) {
|
|
4
|
+
const { model = "text-embedding-3-small", apiKey, baseUrl } = options;
|
|
5
|
+
let cached = null;
|
|
6
|
+
return { async resolve() {
|
|
7
|
+
if (cached) return cached;
|
|
8
|
+
const embeddingModel = createOpenAI({
|
|
9
|
+
apiKey,
|
|
10
|
+
baseURL: baseUrl
|
|
11
|
+
}).textEmbeddingModel(model);
|
|
12
|
+
const { embedding: testEmbedding } = await embed({
|
|
13
|
+
model: embeddingModel,
|
|
14
|
+
value: "test"
|
|
15
|
+
});
|
|
16
|
+
const dimensions = testEmbedding.length;
|
|
17
|
+
const embedder = async (texts) => {
|
|
18
|
+
if (texts.length === 0) return [];
|
|
19
|
+
if (texts.length === 1) {
|
|
20
|
+
const { embedding } = await embed({
|
|
21
|
+
model: embeddingModel,
|
|
22
|
+
value: texts[0]
|
|
23
|
+
});
|
|
24
|
+
return [embedding];
|
|
25
|
+
}
|
|
26
|
+
const { embeddings } = await embedMany({
|
|
27
|
+
model: embeddingModel,
|
|
28
|
+
values: texts
|
|
29
|
+
});
|
|
30
|
+
return embeddings;
|
|
31
|
+
};
|
|
32
|
+
cached = {
|
|
33
|
+
embedder,
|
|
34
|
+
dimensions
|
|
35
|
+
};
|
|
36
|
+
return cached;
|
|
37
|
+
} };
|
|
38
|
+
}
|
|
39
|
+
export { openai };
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { EmbeddingConfig, ResolvedEmbedding } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/resolve.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* Resolve an EmbeddingConfig to an embedder and dimensions
|
|
6
|
+
* Simply calls the resolve method on the config
|
|
7
|
+
*/
|
|
8
|
+
declare function resolveEmbedding(config: EmbeddingConfig): Promise<ResolvedEmbedding>;
|
|
9
|
+
//#endregion
|
|
10
|
+
export { resolveEmbedding };
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/transformers.d.ts
|
|
4
|
+
interface TransformersEmbeddingOptions {
|
|
5
|
+
/** Model name (default: Xenova/bge-base-en-v1.5) */
|
|
6
|
+
model?: string;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Transformers.js embedding provider (local, in-browser compatible)
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```ts
|
|
13
|
+
* import { transformers } from 'retriv/embeddings/transformers'
|
|
14
|
+
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
15
|
+
*
|
|
16
|
+
* const db = await sqliteVec({
|
|
17
|
+
* path: 'vectors.db',
|
|
18
|
+
* embeddings: transformers({ model: 'Xenova/bge-base-en-v1.5' }),
|
|
19
|
+
* })
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
22
|
+
declare function transformers(options?: TransformersEmbeddingOptions): EmbeddingConfig;
|
|
23
|
+
//#endregion
|
|
24
|
+
export { TransformersEmbeddingOptions, transformers };
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { pipeline } from "@huggingface/transformers";
|
|
2
|
+
function transformers(options = {}) {
|
|
3
|
+
const { model = "Xenova/bge-base-en-v1.5" } = options;
|
|
4
|
+
let cached = null;
|
|
5
|
+
return { async resolve() {
|
|
6
|
+
if (cached) return cached;
|
|
7
|
+
const extractor = await pipeline("feature-extraction", model, { dtype: "fp32" });
|
|
8
|
+
const embedder = async (texts) => {
|
|
9
|
+
const results = [];
|
|
10
|
+
for (const text of texts) {
|
|
11
|
+
const output = await extractor(text, {
|
|
12
|
+
pooling: "mean",
|
|
13
|
+
normalize: true
|
|
14
|
+
});
|
|
15
|
+
results.push(Array.from(output.data));
|
|
16
|
+
}
|
|
17
|
+
return results;
|
|
18
|
+
};
|
|
19
|
+
cached = {
|
|
20
|
+
embedder,
|
|
21
|
+
dimensions: (await embedder(["test"]))[0].length
|
|
22
|
+
};
|
|
23
|
+
return cached;
|
|
24
|
+
} };
|
|
25
|
+
}
|
|
26
|
+
export { transformers };
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import { ChunkInfo, ChunkingOptions, Document, EmbeddingConfig, EmbeddingProvider, RetrivOptions, SearchOptions, SearchProvider, SearchResult } from "./types.mjs";
|
|
2
|
+
import { createRetriv } from "./retriv.mjs";
|
|
3
|
+
export { type ChunkInfo, type ChunkingOptions, type Document, type EmbeddingConfig, type EmbeddingProvider, type RetrivOptions, type SearchOptions, type SearchProvider, type SearchResult, createRetriv };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { RetrivOptions, SearchProvider } from "./types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/retriv.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* Create a unified retrieval instance
|
|
6
|
+
*/
|
|
7
|
+
declare function createRetriv(options: RetrivOptions): Promise<SearchProvider>;
|
|
8
|
+
//#endregion
|
|
9
|
+
export { createRetriv };
|
package/dist/retriv.mjs
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { t as splitText } from "./_chunks/split-text.mjs";
|
|
2
|
+
const RRF_K = 60;
|
|
3
|
+
function isComposed(driver) {
|
|
4
|
+
return typeof driver === "object" && driver !== null && ("vector" in driver || "keyword" in driver);
|
|
5
|
+
}
|
|
6
|
+
function applyRRF(resultSets) {
|
|
7
|
+
const scores = /* @__PURE__ */ new Map();
|
|
8
|
+
for (const results of resultSets) for (let rank = 0; rank < results.length; rank++) {
|
|
9
|
+
const result = results[rank];
|
|
10
|
+
const rrfScore = 1 / (RRF_K + rank + 1);
|
|
11
|
+
const existing = scores.get(result.id);
|
|
12
|
+
if (existing) {
|
|
13
|
+
existing.score += rrfScore;
|
|
14
|
+
if (result.content && !existing.result.content) existing.result = {
|
|
15
|
+
...existing.result,
|
|
16
|
+
content: result.content
|
|
17
|
+
};
|
|
18
|
+
if (result.metadata && !existing.result.metadata) existing.result = {
|
|
19
|
+
...existing.result,
|
|
20
|
+
metadata: result.metadata
|
|
21
|
+
};
|
|
22
|
+
} else scores.set(result.id, {
|
|
23
|
+
score: rrfScore,
|
|
24
|
+
result
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
return Array.from(scores.values()).sort((a, b) => b.score - a.score).map(({ score, result }) => ({
|
|
28
|
+
...result,
|
|
29
|
+
score
|
|
30
|
+
}));
|
|
31
|
+
}
|
|
32
|
+
async function createRetriv(options) {
|
|
33
|
+
const { driver: driverInput, chunking } = options;
|
|
34
|
+
let drivers;
|
|
35
|
+
if (isComposed(driverInput)) {
|
|
36
|
+
drivers = (await Promise.all([driverInput.vector ? Promise.resolve(driverInput.vector) : null, driverInput.keyword ? Promise.resolve(driverInput.keyword) : null])).filter((d) => d !== null);
|
|
37
|
+
if (drivers.length === 0) throw new Error("At least one driver (vector or keyword) is required");
|
|
38
|
+
} else drivers = [await Promise.resolve(driverInput)];
|
|
39
|
+
const isHybrid = drivers.length > 1;
|
|
40
|
+
const parentDocs = /* @__PURE__ */ new Map();
|
|
41
|
+
function prepareDocs(docs) {
|
|
42
|
+
if (!chunking) return docs;
|
|
43
|
+
const { chunkSize = 1e3, chunkOverlap = 200 } = chunking;
|
|
44
|
+
const chunkedDocs = [];
|
|
45
|
+
for (const doc of docs) {
|
|
46
|
+
const chunks = splitText(doc.content, {
|
|
47
|
+
chunkSize,
|
|
48
|
+
chunkOverlap
|
|
49
|
+
});
|
|
50
|
+
if (chunks.length === 1) chunkedDocs.push(doc);
|
|
51
|
+
else {
|
|
52
|
+
parentDocs.set(doc.id, doc);
|
|
53
|
+
for (const chunk of chunks) chunkedDocs.push({
|
|
54
|
+
id: `${doc.id}#chunk-${chunk.index}`,
|
|
55
|
+
content: chunk.text,
|
|
56
|
+
metadata: {
|
|
57
|
+
...doc.metadata,
|
|
58
|
+
_parentId: doc.id,
|
|
59
|
+
_chunkIndex: chunk.index,
|
|
60
|
+
_chunkRange: chunk.range
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return chunkedDocs;
|
|
66
|
+
}
|
|
67
|
+
function annotateChunks(results) {
|
|
68
|
+
if (!chunking) return results;
|
|
69
|
+
return results.map((result) => {
|
|
70
|
+
const metadata = result.metadata || {};
|
|
71
|
+
const parentId = metadata._parentId;
|
|
72
|
+
const chunkIndex = metadata._chunkIndex;
|
|
73
|
+
const chunkRange = metadata._chunkRange;
|
|
74
|
+
if (parentId !== void 0 && chunkIndex !== void 0) {
|
|
75
|
+
const { _parentId, _chunkIndex, _chunkRange, ...cleanMeta } = metadata;
|
|
76
|
+
return {
|
|
77
|
+
...result,
|
|
78
|
+
metadata: Object.keys(cleanMeta).length > 0 ? cleanMeta : void 0,
|
|
79
|
+
_chunk: {
|
|
80
|
+
parentId,
|
|
81
|
+
index: chunkIndex,
|
|
82
|
+
range: chunkRange
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
return result;
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
return {
|
|
90
|
+
async index(docs) {
|
|
91
|
+
const prepared = prepareDocs(docs);
|
|
92
|
+
return { count: (await Promise.all(drivers.map((d) => d.index(prepared))))[0].count };
|
|
93
|
+
},
|
|
94
|
+
async search(query, searchOptions = {}) {
|
|
95
|
+
if (!isHybrid) return annotateChunks(await drivers[0].search(query, searchOptions));
|
|
96
|
+
let merged = applyRRF(await Promise.all(drivers.map((d) => d.search(query, searchOptions))));
|
|
97
|
+
if (searchOptions.limit) merged = merged.slice(0, searchOptions.limit);
|
|
98
|
+
return annotateChunks(merged);
|
|
99
|
+
},
|
|
100
|
+
async remove(ids) {
|
|
101
|
+
return { count: (await Promise.all(drivers.filter((d) => d.remove).map((d) => d.remove(ids))))[0]?.count ?? 0 };
|
|
102
|
+
},
|
|
103
|
+
async clear() {
|
|
104
|
+
await Promise.all(drivers.filter((d) => d.clear).map((d) => d.clear()));
|
|
105
|
+
parentDocs.clear();
|
|
106
|
+
},
|
|
107
|
+
async close() {
|
|
108
|
+
await Promise.all(drivers.filter((d) => d.close).map((d) => d.close()));
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
export { createRetriv };
|
package/dist/types.d.mts
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
//#region src/types.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* A document to be indexed for search
|
|
4
|
+
*/
|
|
5
|
+
interface Document {
|
|
6
|
+
/** Unique identifier */
|
|
7
|
+
id: string;
|
|
8
|
+
/** Text content to search */
|
|
9
|
+
content: string;
|
|
10
|
+
/** Optional metadata to store alongside */
|
|
11
|
+
metadata?: Record<string, any>;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Chunk info when chunking is enabled
|
|
15
|
+
*/
|
|
16
|
+
interface ChunkInfo {
|
|
17
|
+
parentId: string;
|
|
18
|
+
index: number;
|
|
19
|
+
range?: [number, number];
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Driver-specific metadata
|
|
23
|
+
*/
|
|
24
|
+
interface SearchMeta {
|
|
25
|
+
bm25Score?: number;
|
|
26
|
+
highlights?: string[];
|
|
27
|
+
distance?: number;
|
|
28
|
+
vector?: number[];
|
|
29
|
+
matches?: Array<{
|
|
30
|
+
indices: [number, number][];
|
|
31
|
+
value: string;
|
|
32
|
+
}>;
|
|
33
|
+
[key: string]: any;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* A search result
|
|
37
|
+
*/
|
|
38
|
+
interface SearchResult {
|
|
39
|
+
/** Document ID */
|
|
40
|
+
id: string;
|
|
41
|
+
/** Relevance score (0-1, higher is better) */
|
|
42
|
+
score: number;
|
|
43
|
+
/** Original content (if returnContent: true) */
|
|
44
|
+
content?: string;
|
|
45
|
+
/** Document metadata (if returnMetadata: true) */
|
|
46
|
+
metadata?: Record<string, any>;
|
|
47
|
+
/** Chunk info (when chunking enabled) */
|
|
48
|
+
_chunk?: ChunkInfo;
|
|
49
|
+
/** Driver-specific extras (if returnMeta: true) */
|
|
50
|
+
_meta?: SearchMeta;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Search options
|
|
54
|
+
*/
|
|
55
|
+
interface SearchOptions {
|
|
56
|
+
/** Maximum results to return */
|
|
57
|
+
limit?: number;
|
|
58
|
+
/** Return original content */
|
|
59
|
+
returnContent?: boolean;
|
|
60
|
+
/** Return metadata */
|
|
61
|
+
returnMetadata?: boolean;
|
|
62
|
+
/** Return driver-specific _meta */
|
|
63
|
+
returnMeta?: boolean;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Search provider interface - unified across all driver types
|
|
67
|
+
*/
|
|
68
|
+
interface SearchProvider {
|
|
69
|
+
/**
|
|
70
|
+
* Index documents for search
|
|
71
|
+
*/
|
|
72
|
+
index: (docs: Document[]) => Promise<{
|
|
73
|
+
count: number;
|
|
74
|
+
}>;
|
|
75
|
+
/**
|
|
76
|
+
* Search for documents
|
|
77
|
+
*/
|
|
78
|
+
search: (query: string, options?: SearchOptions) => Promise<SearchResult[]>;
|
|
79
|
+
/**
|
|
80
|
+
* Remove documents by ID
|
|
81
|
+
*/
|
|
82
|
+
remove?: (ids: string[]) => Promise<{
|
|
83
|
+
count: number;
|
|
84
|
+
}>;
|
|
85
|
+
/**
|
|
86
|
+
* Clear all indexed documents
|
|
87
|
+
*/
|
|
88
|
+
clear?: () => Promise<void>;
|
|
89
|
+
/**
|
|
90
|
+
* Close the provider and release resources
|
|
91
|
+
*/
|
|
92
|
+
close?: () => Promise<void>;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Base config shared by all drivers
|
|
96
|
+
*/
|
|
97
|
+
interface BaseDriverConfig {
|
|
98
|
+
/** Database/index path or URL */
|
|
99
|
+
path?: string;
|
|
100
|
+
url?: string;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Embedding provider function
|
|
104
|
+
* Takes text(s) and returns embedding vectors
|
|
105
|
+
*/
|
|
106
|
+
type EmbeddingProvider = (texts: string[]) => Promise<number[][]>;
|
|
107
|
+
/**
|
|
108
|
+
* Resolved embedding result
|
|
109
|
+
*/
|
|
110
|
+
interface ResolvedEmbedding {
|
|
111
|
+
embedder: EmbeddingProvider;
|
|
112
|
+
dimensions: number;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Embedding config (returned by embedding modules)
|
|
116
|
+
*/
|
|
117
|
+
interface EmbeddingConfig {
|
|
118
|
+
/** Resolve the embedding provider */
|
|
119
|
+
resolve: () => Promise<ResolvedEmbedding>;
|
|
120
|
+
}
|
|
121
|
+
type VectorFloatArray = Float32Array | Float64Array;
|
|
122
|
+
type VectorizeVectorMetadata = string | number | boolean | string[];
|
|
123
|
+
interface VectorizeVector {
|
|
124
|
+
id: string;
|
|
125
|
+
values: VectorFloatArray | number[];
|
|
126
|
+
namespace?: string;
|
|
127
|
+
metadata?: Record<string, VectorizeVectorMetadata>;
|
|
128
|
+
}
|
|
129
|
+
interface VectorizeMatch {
|
|
130
|
+
id: string;
|
|
131
|
+
score: number;
|
|
132
|
+
values?: number[];
|
|
133
|
+
namespace?: string;
|
|
134
|
+
metadata?: Record<string, VectorizeVectorMetadata>;
|
|
135
|
+
}
|
|
136
|
+
interface VectorizeMatches {
|
|
137
|
+
matches: VectorizeMatch[];
|
|
138
|
+
count: number;
|
|
139
|
+
}
|
|
140
|
+
interface VectorizeQueryOptions {
|
|
141
|
+
topK?: number;
|
|
142
|
+
namespace?: string;
|
|
143
|
+
returnValues?: boolean;
|
|
144
|
+
returnMetadata?: boolean;
|
|
145
|
+
filter?: Record<string, any>;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Semantic driver config (vector-based search)
|
|
149
|
+
*/
|
|
150
|
+
interface SemanticDriverConfig extends BaseDriverConfig {
|
|
151
|
+
/** Embedding provider from retriv/embeddings/ */
|
|
152
|
+
embeddings: EmbeddingConfig;
|
|
153
|
+
/** Auth token for remote databases */
|
|
154
|
+
authToken?: string;
|
|
155
|
+
/** Namespace for vector isolation */
|
|
156
|
+
namespace?: string;
|
|
157
|
+
/** Distance metric */
|
|
158
|
+
metric?: 'cosine' | 'euclidean' | 'dot-product';
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Text-native driver config (e.g., Upstash - handles embeddings server-side)
|
|
162
|
+
*/
|
|
163
|
+
interface TextNativeDriverConfig extends BaseDriverConfig {
|
|
164
|
+
/** API token */
|
|
165
|
+
token: string;
|
|
166
|
+
/** Namespace for isolation */
|
|
167
|
+
namespace?: string;
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Chunking configuration
|
|
171
|
+
*/
|
|
172
|
+
interface ChunkingOptions {
|
|
173
|
+
chunkSize?: number;
|
|
174
|
+
chunkOverlap?: number;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Resolvable driver (can be promise)
|
|
178
|
+
*/
|
|
179
|
+
type Resolvable<T> = T | Promise<T>;
|
|
180
|
+
/**
|
|
181
|
+
* Any search provider (for composed drivers - loosened for driver compatibility)
|
|
182
|
+
*/
|
|
183
|
+
type AnyDriver = Resolvable<SearchProvider>;
|
|
184
|
+
/**
|
|
185
|
+
* Composed driver config for hybrid search
|
|
186
|
+
*/
|
|
187
|
+
interface ComposedDriver {
|
|
188
|
+
vector?: AnyDriver;
|
|
189
|
+
keyword?: AnyDriver;
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Driver input - single driver or composed
|
|
193
|
+
*/
|
|
194
|
+
type DriverInput = AnyDriver | ComposedDriver;
|
|
195
|
+
/**
|
|
196
|
+
* Options for createRetriv factory
|
|
197
|
+
*/
|
|
198
|
+
interface RetrivOptions {
|
|
199
|
+
driver: DriverInput;
|
|
200
|
+
chunking?: ChunkingOptions;
|
|
201
|
+
}
|
|
202
|
+
//#endregion
|
|
203
|
+
export { BaseDriverConfig, ChunkInfo, ChunkingOptions, ComposedDriver, Document, DriverInput, EmbeddingConfig, EmbeddingProvider, ResolvedEmbedding, RetrivOptions, SearchMeta, SearchOptions, SearchProvider, SearchResult, SemanticDriverConfig, TextNativeDriverConfig, VectorFloatArray, VectorizeMatch, VectorizeMatches, VectorizeQueryOptions, VectorizeVector, VectorizeVectorMetadata };
|
package/dist/types.mjs
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
//#region src/utils/split-text.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* Recursive markdown text splitter (LangChain-style)
|
|
4
|
+
*/
|
|
5
|
+
interface SplitTextOptions {
|
|
6
|
+
chunkSize?: number;
|
|
7
|
+
chunkOverlap?: number;
|
|
8
|
+
separators?: string[];
|
|
9
|
+
}
|
|
10
|
+
interface TextChunk {
|
|
11
|
+
text: string;
|
|
12
|
+
index: number;
|
|
13
|
+
/** Character range [start, end] in original text */
|
|
14
|
+
range: [number, number];
|
|
15
|
+
/** Line range [startLine, endLine] (1-indexed) */
|
|
16
|
+
lines: [number, number];
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Split text recursively using markdown-aware separators
|
|
20
|
+
*/
|
|
21
|
+
declare function splitText(text: string, options?: SplitTextOptions): TextChunk[];
|
|
22
|
+
//#endregion
|
|
23
|
+
export { SplitTextOptions, TextChunk, splitText };
|
package/package.json
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "retriv",
|
|
3
|
+
"type": "module",
|
|
4
|
+
"version": "0.0.1",
|
|
5
|
+
"description": "Unified document retrieval for search - semantic, full-text, and fuzzy.",
|
|
6
|
+
"author": {
|
|
7
|
+
"name": "Harlan Wilton",
|
|
8
|
+
"email": "harlan@harlanzw.com",
|
|
9
|
+
"url": "https://harlanzw.com/"
|
|
10
|
+
},
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"repository": {
|
|
13
|
+
"type": "git",
|
|
14
|
+
"url": "https://github.com/harlan-zw/retriv"
|
|
15
|
+
},
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"types": "./dist/index.d.mts",
|
|
19
|
+
"import": "./dist/index.mjs"
|
|
20
|
+
},
|
|
21
|
+
"./db/sqlite": {
|
|
22
|
+
"types": "./dist/db/sqlite.d.mts",
|
|
23
|
+
"import": "./dist/db/sqlite.mjs"
|
|
24
|
+
},
|
|
25
|
+
"./db/sqlite-vec": {
|
|
26
|
+
"types": "./dist/db/sqlite-vec.d.mts",
|
|
27
|
+
"import": "./dist/db/sqlite-vec.mjs"
|
|
28
|
+
},
|
|
29
|
+
"./db/sqlite-fts": {
|
|
30
|
+
"types": "./dist/db/sqlite-fts.d.mts",
|
|
31
|
+
"import": "./dist/db/sqlite-fts.mjs"
|
|
32
|
+
},
|
|
33
|
+
"./db/libsql": {
|
|
34
|
+
"types": "./dist/db/libsql.d.mts",
|
|
35
|
+
"import": "./dist/db/libsql.mjs"
|
|
36
|
+
},
|
|
37
|
+
"./db/upstash": {
|
|
38
|
+
"types": "./dist/db/upstash.d.mts",
|
|
39
|
+
"import": "./dist/db/upstash.mjs"
|
|
40
|
+
},
|
|
41
|
+
"./db/pgvector": {
|
|
42
|
+
"types": "./dist/db/pgvector.d.mts",
|
|
43
|
+
"import": "./dist/db/pgvector.mjs"
|
|
44
|
+
},
|
|
45
|
+
"./db/cloudflare": {
|
|
46
|
+
"types": "./dist/db/cloudflare.d.mts",
|
|
47
|
+
"import": "./dist/db/cloudflare.mjs"
|
|
48
|
+
},
|
|
49
|
+
"./embeddings/openai": {
|
|
50
|
+
"types": "./dist/embeddings/openai.d.mts",
|
|
51
|
+
"import": "./dist/embeddings/openai.mjs"
|
|
52
|
+
},
|
|
53
|
+
"./embeddings/google": {
|
|
54
|
+
"types": "./dist/embeddings/google.d.mts",
|
|
55
|
+
"import": "./dist/embeddings/google.mjs"
|
|
56
|
+
},
|
|
57
|
+
"./embeddings/ollama": {
|
|
58
|
+
"types": "./dist/embeddings/ollama.d.mts",
|
|
59
|
+
"import": "./dist/embeddings/ollama.mjs"
|
|
60
|
+
},
|
|
61
|
+
"./embeddings/transformers": {
|
|
62
|
+
"types": "./dist/embeddings/transformers.d.mts",
|
|
63
|
+
"import": "./dist/embeddings/transformers.mjs"
|
|
64
|
+
},
|
|
65
|
+
"./embeddings/mistral": {
|
|
66
|
+
"types": "./dist/embeddings/mistral.d.mts",
|
|
67
|
+
"import": "./dist/embeddings/mistral.mjs"
|
|
68
|
+
},
|
|
69
|
+
"./embeddings/cohere": {
|
|
70
|
+
"types": "./dist/embeddings/cohere.d.mts",
|
|
71
|
+
"import": "./dist/embeddings/cohere.mjs"
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
"main": "./dist/index.mjs",
|
|
75
|
+
"types": "./dist/index.d.mts",
|
|
76
|
+
"files": [
|
|
77
|
+
"dist"
|
|
78
|
+
],
|
|
79
|
+
"peerDependencies": {
|
|
80
|
+
"@ai-sdk/cohere": "^3.0.0",
|
|
81
|
+
"@ai-sdk/google": "^3.0.0",
|
|
82
|
+
"@ai-sdk/mistral": "^3.0.0",
|
|
83
|
+
"@ai-sdk/openai": "^3.0.0",
|
|
84
|
+
"@huggingface/transformers": "^3.0.0",
|
|
85
|
+
"@libsql/client": "^0.14.0 || ^0.15.0 || ^0.16.0 || ^0.17.0",
|
|
86
|
+
"@upstash/vector": "^1.0.0",
|
|
87
|
+
"ai": "^4.0.0 || ^5.0.0 || ^6.0.0",
|
|
88
|
+
"better-sqlite3": "^11.0.0 || ^12.0.0",
|
|
89
|
+
"ollama-ai-provider-v2": "^1.0.0",
|
|
90
|
+
"pg": "^8.0.0",
|
|
91
|
+
"sqlite-vec": "^0.1.0-alpha.0"
|
|
92
|
+
},
|
|
93
|
+
"peerDependenciesMeta": {
|
|
94
|
+
"@ai-sdk/cohere": {
|
|
95
|
+
"optional": true
|
|
96
|
+
},
|
|
97
|
+
"@ai-sdk/google": {
|
|
98
|
+
"optional": true
|
|
99
|
+
},
|
|
100
|
+
"@ai-sdk/mistral": {
|
|
101
|
+
"optional": true
|
|
102
|
+
},
|
|
103
|
+
"@ai-sdk/openai": {
|
|
104
|
+
"optional": true
|
|
105
|
+
},
|
|
106
|
+
"@huggingface/transformers": {
|
|
107
|
+
"optional": true
|
|
108
|
+
},
|
|
109
|
+
"@libsql/client": {
|
|
110
|
+
"optional": true
|
|
111
|
+
},
|
|
112
|
+
"@upstash/vector": {
|
|
113
|
+
"optional": true
|
|
114
|
+
},
|
|
115
|
+
"ai": {
|
|
116
|
+
"optional": true
|
|
117
|
+
},
|
|
118
|
+
"better-sqlite3": {
|
|
119
|
+
"optional": true
|
|
120
|
+
},
|
|
121
|
+
"ollama-ai-provider-v2": {
|
|
122
|
+
"optional": true
|
|
123
|
+
},
|
|
124
|
+
"pg": {
|
|
125
|
+
"optional": true
|
|
126
|
+
},
|
|
127
|
+
"sqlite-vec": {
|
|
128
|
+
"optional": true
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
"devDependencies": {
|
|
132
|
+
"@ai-sdk/cohere": "^3.0.18",
|
|
133
|
+
"@ai-sdk/google": "^3.0.20",
|
|
134
|
+
"@ai-sdk/mistral": "^3.0.18",
|
|
135
|
+
"@ai-sdk/openai": "^3.0.25",
|
|
136
|
+
"@antfu/eslint-config": "^7.2.0",
|
|
137
|
+
"@arethetypeswrong/cli": "^0.18.2",
|
|
138
|
+
"@cloudflare/workers-types": "^4.20260131.0",
|
|
139
|
+
"@huggingface/transformers": "^3.8.1",
|
|
140
|
+
"@libsql/client": "^0.17.0",
|
|
141
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
142
|
+
"@types/node": "^25.2.0",
|
|
143
|
+
"@types/pg": "^8.11.0",
|
|
144
|
+
"@upstash/vector": "^1.1.0",
|
|
145
|
+
"@vitest/coverage-v8": "^4.0.18",
|
|
146
|
+
"ai": "^6.0.67",
|
|
147
|
+
"better-sqlite3": "^12.5.0",
|
|
148
|
+
"bumpp": "^10.3.2",
|
|
149
|
+
"eslint": "^9.39.2",
|
|
150
|
+
"obuild": "^0.4.14",
|
|
151
|
+
"pg": "^8.18.0",
|
|
152
|
+
"sqlite-vec": "^0.1.7-alpha.2",
|
|
153
|
+
"typescript": "^5.9.3",
|
|
154
|
+
"vitest": "^4.0.16",
|
|
155
|
+
"zod": "^4.3.6"
|
|
156
|
+
},
|
|
157
|
+
"scripts": {
|
|
158
|
+
"build": "obuild",
|
|
159
|
+
"dev:prepare": "obuild --stub",
|
|
160
|
+
"lint": "eslint .",
|
|
161
|
+
"typecheck": "tsc --noEmit",
|
|
162
|
+
"test": "vitest --project unit",
|
|
163
|
+
"test:eval": "vitest run --project eval",
|
|
164
|
+
"test:attw": "attw --pack",
|
|
165
|
+
"release": "pnpm build && bumpp --output=CHANGELOG.md"
|
|
166
|
+
}
|
|
167
|
+
}
|