@superlinked/sie-lancedb 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,141 @@
1
+ import * as apache_arrow from 'apache-arrow';
2
+ import { SIEClient } from '@superlinked/sie-sdk';
3
+
4
+ /**
5
+ * Configuration options for SIEEmbeddingFunction.
6
+ */
7
+ interface SIEEmbeddingFunctionOptions {
8
+ /** URL of the SIE server. @default "http://localhost:8080" */
9
+ baseUrl?: string;
10
+ /** Model name/ID to use for encoding. @default "BAAI/bge-m3" */
11
+ model?: string;
12
+ /** Optional pre-configured SIEClient instance. */
13
+ client?: SIEClient;
14
+ /** Instruction prefix for instruction-tuned models (e.g., E5). */
15
+ instruction?: string;
16
+ /** Output data type: "float32" (default), "float16", "int8", "binary". */
17
+ outputDtype?: string;
18
+ /** Target GPU type for routing (e.g., "l4", "a100-80gb"). */
19
+ gpu?: string;
20
+ /** Request timeout in milliseconds. @default 180000 */
21
+ timeout?: number;
22
+ }
23
+ /**
24
+ * Dense text embeddings via SIE for LanceDB.
25
+ *
26
+ * Implements LanceDB's EmbeddingFunction interface. Embeddings are computed
27
+ * automatically when using `sourceField()` / `vectorField()` schema helpers.
28
+ *
29
+ * Use `ndims()` or pass `{ dims }` to `vectorField()` for schema definition.
30
+ * `ndims()` queries the `/v1/models` metadata API (lightweight, no model loading).
31
+ *
32
+ * @example
33
+ * ```typescript
34
+ * import { SIEEmbeddingFunction } from "@superlinked/sie-lancedb";
35
+ * import { LanceSchema } from "@lancedb/lancedb/embedding";
36
+ *
37
+ * const func = new SIEEmbeddingFunction({ model: "BAAI/bge-m3" });
38
+ * const schema = LanceSchema({
39
+ * text: func.sourceField(),
40
+ * vector: func.vectorField({ dims: 1024 }),
41
+ * });
42
+ * ```
43
+ */
44
+ declare class SIEEmbeddingFunction {
45
+ private readonly model;
46
+ private readonly baseUrl;
47
+ private readonly instruction;
48
+ private readonly outputDtype;
49
+ private readonly clientOptions;
50
+ private _client;
51
+ private _ndims;
52
+ constructor(options?: SIEEmbeddingFunctionOptions);
53
+ private get client();
54
+ /**
55
+ * Return embedding dimensionality from /v1/models metadata.
56
+ *
57
+ * Queries the SIE server's model config (lightweight GET, no model
58
+ * loading or inference). Cached after first call.
59
+ */
60
+ ndims(): Promise<number>;
61
+ /**
62
+ * Generate dense embeddings for a list of texts.
63
+ *
64
+ * @param texts - Texts to embed.
65
+ * @returns Array of embedding vectors.
66
+ */
67
+ generateEmbeddings(texts: string[]): Promise<number[][]>;
68
+ /**
69
+ * Embed documents (no isQuery flag).
70
+ */
71
+ embedDocuments(texts: string[]): Promise<number[][]>;
72
+ /**
73
+ * Embed a single query (passes isQuery: true for asymmetric models).
74
+ */
75
+ embedQuery(text: string): Promise<number[]>;
76
+ private extractDense;
77
+ close(): Promise<void>;
78
+ }
79
+ /**
80
+ * Configuration options for SIEReranker.
81
+ */
82
+ interface SIERerankerOptions {
83
+ /** URL of the SIE server. @default "http://localhost:8080" */
84
+ baseUrl?: string;
85
+ /** Reranker model name/ID. @default "jinaai/jina-reranker-v2-base-multilingual" */
86
+ model?: string;
87
+ /** Name of the text column to score. @default "text" */
88
+ column?: string;
89
+ /** Optional pre-configured SIEClient instance. */
90
+ client?: SIEClient;
91
+ /** Target GPU type for routing. */
92
+ gpu?: string;
93
+ /** Request timeout in milliseconds. @default 180000 */
94
+ timeout?: number;
95
+ }
96
+ /**
97
+ * Cross-encoder reranker using SIE for LanceDB hybrid search.
98
+ *
99
+ * Implements LanceDB's Reranker interface. Plugs into hybrid search
100
+ * pipelines via `.rerank()`.
101
+ *
102
+ * @example
103
+ * ```typescript
104
+ * import { SIEReranker } from "@superlinked/sie-lancedb";
105
+ *
106
+ * const reranker = new SIEReranker({
107
+ * model: "jinaai/jina-reranker-v2-base-multilingual",
108
+ * });
109
+ *
110
+ * const results = await table
111
+ * .search("query", { queryType: "hybrid" })
112
+ * .rerank(reranker)
113
+ * .limit(10)
114
+ * .toArray();
115
+ * ```
116
+ */
117
+ declare class SIEReranker {
118
+ private readonly model;
119
+ private readonly column;
120
+ private readonly baseUrl;
121
+ private readonly clientOptions;
122
+ private _client;
123
+ constructor(options?: SIERerankerOptions);
124
+ private get client();
125
+ /**
126
+ * Rerank hybrid search results (vector + FTS).
127
+ *
128
+ * This is the method LanceDB calls during `.rerank()`. It scores all
129
+ * rows against the query using SIE's cross-encoder and returns a
130
+ * RecordBatch with `_relevance_score` added.
131
+ */
132
+ rerankHybrid(query: string, vecResults: apache_arrow.RecordBatch, ftsResults: apache_arrow.RecordBatch): Promise<apache_arrow.RecordBatch>;
133
+ /**
134
+ * Merge vector and FTS result batches, deduplicating by _rowid.
135
+ * Rows from vecResults take priority for duplicate _rowid values.
136
+ */
137
+ private mergeResults;
138
+ close(): Promise<void>;
139
+ }
140
+
141
+ export { SIEEmbeddingFunction, type SIEEmbeddingFunctionOptions, SIEReranker, type SIERerankerOptions };
@@ -0,0 +1,141 @@
1
+ import * as apache_arrow from 'apache-arrow';
2
+ import { SIEClient } from '@superlinked/sie-sdk';
3
+
4
+ /**
5
+ * Configuration options for SIEEmbeddingFunction.
6
+ */
7
+ interface SIEEmbeddingFunctionOptions {
8
+ /** URL of the SIE server. @default "http://localhost:8080" */
9
+ baseUrl?: string;
10
+ /** Model name/ID to use for encoding. @default "BAAI/bge-m3" */
11
+ model?: string;
12
+ /** Optional pre-configured SIEClient instance. */
13
+ client?: SIEClient;
14
+ /** Instruction prefix for instruction-tuned models (e.g., E5). */
15
+ instruction?: string;
16
+ /** Output data type: "float32" (default), "float16", "int8", "binary". */
17
+ outputDtype?: string;
18
+ /** Target GPU type for routing (e.g., "l4", "a100-80gb"). */
19
+ gpu?: string;
20
+ /** Request timeout in milliseconds. @default 180000 */
21
+ timeout?: number;
22
+ }
23
+ /**
24
+ * Dense text embeddings via SIE for LanceDB.
25
+ *
26
+ * Implements LanceDB's EmbeddingFunction interface. Embeddings are computed
27
+ * automatically when using `sourceField()` / `vectorField()` schema helpers.
28
+ *
29
+ * Use `ndims()` or pass `{ dims }` to `vectorField()` for schema definition.
30
+ * `ndims()` queries the `/v1/models` metadata API (lightweight, no model loading).
31
+ *
32
+ * @example
33
+ * ```typescript
34
+ * import { SIEEmbeddingFunction } from "@superlinked/sie-lancedb";
35
+ * import { LanceSchema } from "@lancedb/lancedb/embedding";
36
+ *
37
+ * const func = new SIEEmbeddingFunction({ model: "BAAI/bge-m3" });
38
+ * const schema = LanceSchema({
39
+ * text: func.sourceField(),
40
+ * vector: func.vectorField({ dims: 1024 }),
41
+ * });
42
+ * ```
43
+ */
44
+ declare class SIEEmbeddingFunction {
45
+ private readonly model;
46
+ private readonly baseUrl;
47
+ private readonly instruction;
48
+ private readonly outputDtype;
49
+ private readonly clientOptions;
50
+ private _client;
51
+ private _ndims;
52
+ constructor(options?: SIEEmbeddingFunctionOptions);
53
+ private get client();
54
+ /**
55
+ * Return embedding dimensionality from /v1/models metadata.
56
+ *
57
+ * Queries the SIE server's model config (lightweight GET, no model
58
+ * loading or inference). Cached after first call.
59
+ */
60
+ ndims(): Promise<number>;
61
+ /**
62
+ * Generate dense embeddings for a list of texts.
63
+ *
64
+ * @param texts - Texts to embed.
65
+ * @returns Array of embedding vectors.
66
+ */
67
+ generateEmbeddings(texts: string[]): Promise<number[][]>;
68
+ /**
69
+ * Embed documents (no isQuery flag).
70
+ */
71
+ embedDocuments(texts: string[]): Promise<number[][]>;
72
+ /**
73
+ * Embed a single query (passes isQuery: true for asymmetric models).
74
+ */
75
+ embedQuery(text: string): Promise<number[]>;
76
+ private extractDense;
77
+ close(): Promise<void>;
78
+ }
79
+ /**
80
+ * Configuration options for SIEReranker.
81
+ */
82
+ interface SIERerankerOptions {
83
+ /** URL of the SIE server. @default "http://localhost:8080" */
84
+ baseUrl?: string;
85
+ /** Reranker model name/ID. @default "jinaai/jina-reranker-v2-base-multilingual" */
86
+ model?: string;
87
+ /** Name of the text column to score. @default "text" */
88
+ column?: string;
89
+ /** Optional pre-configured SIEClient instance. */
90
+ client?: SIEClient;
91
+ /** Target GPU type for routing. */
92
+ gpu?: string;
93
+ /** Request timeout in milliseconds. @default 180000 */
94
+ timeout?: number;
95
+ }
96
+ /**
97
+ * Cross-encoder reranker using SIE for LanceDB hybrid search.
98
+ *
99
+ * Implements LanceDB's Reranker interface. Plugs into hybrid search
100
+ * pipelines via `.rerank()`.
101
+ *
102
+ * @example
103
+ * ```typescript
104
+ * import { SIEReranker } from "@superlinked/sie-lancedb";
105
+ *
106
+ * const reranker = new SIEReranker({
107
+ * model: "jinaai/jina-reranker-v2-base-multilingual",
108
+ * });
109
+ *
110
+ * const results = await table
111
+ * .search("query", { queryType: "hybrid" })
112
+ * .rerank(reranker)
113
+ * .limit(10)
114
+ * .toArray();
115
+ * ```
116
+ */
117
+ declare class SIEReranker {
118
+ private readonly model;
119
+ private readonly column;
120
+ private readonly baseUrl;
121
+ private readonly clientOptions;
122
+ private _client;
123
+ constructor(options?: SIERerankerOptions);
124
+ private get client();
125
+ /**
126
+ * Rerank hybrid search results (vector + FTS).
127
+ *
128
+ * This is the method LanceDB calls during `.rerank()`. It scores all
129
+ * rows against the query using SIE's cross-encoder and returns a
130
+ * RecordBatch with `_relevance_score` added.
131
+ */
132
+ rerankHybrid(query: string, vecResults: apache_arrow.RecordBatch, ftsResults: apache_arrow.RecordBatch): Promise<apache_arrow.RecordBatch>;
133
+ /**
134
+ * Merge vector and FTS result batches, deduplicating by _rowid.
135
+ * Rows from vecResults take priority for duplicate _rowid values.
136
+ */
137
+ private mergeResults;
138
+ close(): Promise<void>;
139
+ }
140
+
141
+ export { SIEEmbeddingFunction, type SIEEmbeddingFunctionOptions, SIEReranker, type SIERerankerOptions };