@dengxifeng/lancedb 0.26.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +13 -0
- package/CONTRIBUTING.md +76 -0
- package/README.md +37 -0
- package/dist/arrow.d.ts +279 -0
- package/dist/arrow.js +1316 -0
- package/dist/connection.d.ts +259 -0
- package/dist/connection.js +224 -0
- package/dist/embedding/embedding_function.d.ts +103 -0
- package/dist/embedding/embedding_function.js +192 -0
- package/dist/embedding/index.d.ts +27 -0
- package/dist/embedding/index.js +101 -0
- package/dist/embedding/openai.d.ts +16 -0
- package/dist/embedding/openai.js +93 -0
- package/dist/embedding/registry.d.ts +74 -0
- package/dist/embedding/registry.js +165 -0
- package/dist/embedding/transformers.d.ts +36 -0
- package/dist/embedding/transformers.js +122 -0
- package/dist/header.d.ts +162 -0
- package/dist/header.js +217 -0
- package/dist/index.d.ts +85 -0
- package/dist/index.js +106 -0
- package/dist/indices.d.ts +692 -0
- package/dist/indices.js +156 -0
- package/dist/merge.d.ts +80 -0
- package/dist/merge.js +92 -0
- package/dist/native.d.ts +585 -0
- package/dist/native.js +339 -0
- package/dist/permutation.d.ts +143 -0
- package/dist/permutation.js +184 -0
- package/dist/query.d.ts +581 -0
- package/dist/query.js +853 -0
- package/dist/rerankers/index.d.ts +5 -0
- package/dist/rerankers/index.js +19 -0
- package/dist/rerankers/rrf.d.ts +14 -0
- package/dist/rerankers/rrf.js +28 -0
- package/dist/sanitize.d.ts +32 -0
- package/dist/sanitize.js +473 -0
- package/dist/table.d.ts +581 -0
- package/dist/table.js +321 -0
- package/dist/util.d.ts +14 -0
- package/dist/util.js +77 -0
- package/license_header.txt +2 -0
- package/package.json +122 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { Float } from "../arrow";
|
|
2
|
+
import { EmbeddingFunction } from "./embedding_function";
|
|
3
|
+
export type XenovaTransformerOptions = {
|
|
4
|
+
/** The wasm compatible model to use */
|
|
5
|
+
model: string;
|
|
6
|
+
/**
|
|
7
|
+
* The wasm compatible tokenizer to use
|
|
8
|
+
* If not provided, it will use the default tokenizer for the model
|
|
9
|
+
*/
|
|
10
|
+
tokenizer?: string;
|
|
11
|
+
/**
|
|
12
|
+
* The number of dimensions of the embeddings
|
|
13
|
+
*
|
|
14
|
+
* We will attempt to infer this from the model config if not provided.
|
|
15
|
+
* Since there isn't a standard way to get this information from the model,
|
|
16
|
+
* you may need to manually specify this if using a model that doesn't have a 'hidden_size' in the config.
|
|
17
|
+
* */
|
|
18
|
+
ndims?: number;
|
|
19
|
+
/** Options for the tokenizer */
|
|
20
|
+
tokenizerOptions?: {
|
|
21
|
+
textPair?: string | string[];
|
|
22
|
+
padding?: boolean | "max_length";
|
|
23
|
+
addSpecialTokens?: boolean;
|
|
24
|
+
truncation?: boolean;
|
|
25
|
+
maxLength?: number;
|
|
26
|
+
};
|
|
27
|
+
};
|
|
28
|
+
export declare class TransformersEmbeddingFunction extends EmbeddingFunction<string, Partial<XenovaTransformerOptions>> {
|
|
29
|
+
#private;
|
|
30
|
+
constructor(optionsRaw?: Partial<XenovaTransformerOptions>);
|
|
31
|
+
init(): Promise<void>;
|
|
32
|
+
ndims(): number;
|
|
33
|
+
embeddingDataType(): Float;
|
|
34
|
+
computeSourceEmbeddings(data: string[]): Promise<number[][]>;
|
|
35
|
+
computeQueryEmbeddings(data: string): Promise<number[]>;
|
|
36
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
4
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
5
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
6
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
7
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
8
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
9
|
+
};
|
|
10
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
11
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
12
|
+
};
|
|
13
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
+
exports.TransformersEmbeddingFunction = void 0;
|
|
15
|
+
const arrow_1 = require("../arrow");
|
|
16
|
+
const embedding_function_1 = require("./embedding_function");
|
|
17
|
+
const registry_1 = require("./registry");
|
|
18
|
+
let TransformersEmbeddingFunction = class TransformersEmbeddingFunction extends embedding_function_1.EmbeddingFunction {
|
|
19
|
+
#model;
|
|
20
|
+
#tokenizer;
|
|
21
|
+
#modelName;
|
|
22
|
+
#initialized = false;
|
|
23
|
+
#tokenizerOptions;
|
|
24
|
+
#ndims;
|
|
25
|
+
constructor(optionsRaw = {
|
|
26
|
+
model: "Xenova/all-MiniLM-L6-v2",
|
|
27
|
+
}) {
|
|
28
|
+
super();
|
|
29
|
+
const options = this.resolveVariables(optionsRaw);
|
|
30
|
+
const modelName = options?.model ?? "Xenova/all-MiniLM-L6-v2";
|
|
31
|
+
this.#tokenizerOptions = {
|
|
32
|
+
padding: true,
|
|
33
|
+
...options.tokenizerOptions,
|
|
34
|
+
};
|
|
35
|
+
this.#ndims = options.ndims;
|
|
36
|
+
this.#modelName = modelName;
|
|
37
|
+
}
|
|
38
|
+
async init() {
|
|
39
|
+
let transformers;
|
|
40
|
+
try {
|
|
41
|
+
// SAFETY:
|
|
42
|
+
// since typescript transpiles `import` to `require`, we need to do this in an unsafe way
|
|
43
|
+
// We can't use `require` because `@huggingface/transformers` is an ESM module
|
|
44
|
+
// and we can't use `import` directly because typescript will transpile it to `require`.
|
|
45
|
+
// and we want to remain compatible with both ESM and CJS modules
|
|
46
|
+
// so we use `eval` to bypass typescript for this specific import.
|
|
47
|
+
transformers = await eval('import("@huggingface/transformers")');
|
|
48
|
+
}
|
|
49
|
+
catch (e) {
|
|
50
|
+
throw new Error(`error loading @huggingface/transformers\nReason: ${e}`);
|
|
51
|
+
}
|
|
52
|
+
try {
|
|
53
|
+
this.#model = await transformers.AutoModel.from_pretrained(this.#modelName, { dtype: "fp32" });
|
|
54
|
+
}
|
|
55
|
+
catch (e) {
|
|
56
|
+
throw new Error(`error loading model ${this.#modelName}. Make sure you are using a wasm compatible model.\nReason: ${e}`);
|
|
57
|
+
}
|
|
58
|
+
try {
|
|
59
|
+
this.#tokenizer = await transformers.AutoTokenizer.from_pretrained(this.#modelName);
|
|
60
|
+
}
|
|
61
|
+
catch (e) {
|
|
62
|
+
throw new Error(`error loading tokenizer for ${this.#modelName}. Make sure you are using a wasm compatible model:\nReason: ${e}`);
|
|
63
|
+
}
|
|
64
|
+
this.#initialized = true;
|
|
65
|
+
}
|
|
66
|
+
ndims() {
|
|
67
|
+
if (this.#ndims) {
|
|
68
|
+
return this.#ndims;
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
const config = this.#model.config;
|
|
72
|
+
// biome-ignore lint/style/useNamingConvention: we don't control this name.
|
|
73
|
+
const ndims = config.hidden_size;
|
|
74
|
+
if (!ndims) {
|
|
75
|
+
throw new Error("hidden_size not found in model config, you may need to manually specify the embedding dimensions. ");
|
|
76
|
+
}
|
|
77
|
+
return ndims;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
embeddingDataType() {
|
|
81
|
+
return new arrow_1.Float32();
|
|
82
|
+
}
|
|
83
|
+
async computeSourceEmbeddings(data) {
|
|
84
|
+
// this should only happen if the user is trying to use the function directly.
|
|
85
|
+
// Anything going through the registry should already be initialized.
|
|
86
|
+
if (!this.#initialized) {
|
|
87
|
+
return Promise.reject(new Error("something went wrong: embedding function not initialized. Please call init()"));
|
|
88
|
+
}
|
|
89
|
+
const tokenizer = this.#tokenizer;
|
|
90
|
+
const model = this.#model;
|
|
91
|
+
const inputs = await tokenizer(data, this.#tokenizerOptions);
|
|
92
|
+
let tokens = await model.forward(inputs);
|
|
93
|
+
tokens = tokens[Object.keys(tokens)[0]];
|
|
94
|
+
const [nItems, nTokens] = tokens.dims;
|
|
95
|
+
tokens = tensorDiv(tokens.sum(1), nTokens);
|
|
96
|
+
// TODO: support other data types
|
|
97
|
+
const tokenData = tokens.data;
|
|
98
|
+
const stride = this.ndims();
|
|
99
|
+
const embeddings = [];
|
|
100
|
+
for (let i = 0; i < nItems; i++) {
|
|
101
|
+
const start = i * stride;
|
|
102
|
+
const end = start + stride;
|
|
103
|
+
const slice = tokenData.slice(start, end);
|
|
104
|
+
embeddings.push(Array.from(slice)); // TODO: Avoid copy here
|
|
105
|
+
}
|
|
106
|
+
return embeddings;
|
|
107
|
+
}
|
|
108
|
+
async computeQueryEmbeddings(data) {
|
|
109
|
+
return (await this.computeSourceEmbeddings([data]))[0];
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
exports.TransformersEmbeddingFunction = TransformersEmbeddingFunction;
|
|
113
|
+
exports.TransformersEmbeddingFunction = TransformersEmbeddingFunction = __decorate([
|
|
114
|
+
(0, registry_1.register)("huggingface"),
|
|
115
|
+
__metadata("design:paramtypes", [Object])
|
|
116
|
+
], TransformersEmbeddingFunction);
|
|
117
|
+
const tensorDiv = (src, divBy) => {
|
|
118
|
+
for (let i = 0; i < src.data.length; ++i) {
|
|
119
|
+
src.data[i] /= divBy;
|
|
120
|
+
}
|
|
121
|
+
return src;
|
|
122
|
+
};
|
package/dist/header.d.ts
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Header providers for LanceDB remote connections.
|
|
3
|
+
*
|
|
4
|
+
* This module provides a flexible header management framework for LanceDB remote
|
|
5
|
+
* connections, allowing users to implement custom header strategies for
|
|
6
|
+
* authentication, request tracking, custom metadata, or any other header-based
|
|
7
|
+
* requirements.
|
|
8
|
+
*
|
|
9
|
+
* @module header
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Abstract base class for providing custom headers for each request.
|
|
13
|
+
*
|
|
14
|
+
* Users can implement this interface to provide dynamic headers for various purposes
|
|
15
|
+
* such as authentication (OAuth tokens, API keys), request tracking (correlation IDs),
|
|
16
|
+
* custom metadata, or any other header-based requirements. The provider is called
|
|
17
|
+
* before each request to ensure fresh header values are always used.
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* Simple JWT token provider:
|
|
21
|
+
* ```typescript
|
|
22
|
+
* class JWTProvider extends HeaderProvider {
|
|
23
|
+
* constructor(private token: string) {
|
|
24
|
+
* super();
|
|
25
|
+
* }
|
|
26
|
+
*
|
|
27
|
+
* getHeaders(): Record<string, string> {
|
|
28
|
+
* return { authorization: `Bearer ${this.token}` };
|
|
29
|
+
* }
|
|
30
|
+
* }
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* Provider with request tracking:
|
|
35
|
+
* ```typescript
|
|
36
|
+
* class RequestTrackingProvider extends HeaderProvider {
|
|
37
|
+
* constructor(private sessionId: string) {
|
|
38
|
+
* super();
|
|
39
|
+
* }
|
|
40
|
+
*
|
|
41
|
+
* getHeaders(): Record<string, string> {
|
|
42
|
+
* return {
|
|
43
|
+
* "X-Session-Id": this.sessionId,
|
|
44
|
+
* "X-Request-Id": `req-${Date.now()}`
|
|
45
|
+
* };
|
|
46
|
+
* }
|
|
47
|
+
* }
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
export declare abstract class HeaderProvider {
|
|
51
|
+
/**
|
|
52
|
+
* Get the latest headers to be added to requests.
|
|
53
|
+
*
|
|
54
|
+
* This method is called before each request to the remote LanceDB server.
|
|
55
|
+
* Implementations should return headers that will be merged with existing headers.
|
|
56
|
+
*
|
|
57
|
+
* @returns Dictionary of header names to values to add to the request.
|
|
58
|
+
* @throws If unable to fetch headers, the exception will be propagated and the request will fail.
|
|
59
|
+
*/
|
|
60
|
+
abstract getHeaders(): Record<string, string>;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Example implementation: A simple header provider that returns static headers.
|
|
64
|
+
*
|
|
65
|
+
* This is an example implementation showing how to create a HeaderProvider
|
|
66
|
+
* for cases where headers don't change during the session.
|
|
67
|
+
*
|
|
68
|
+
* @example
|
|
69
|
+
* ```typescript
|
|
70
|
+
* const provider = new StaticHeaderProvider({
|
|
71
|
+
* authorization: "Bearer my-token",
|
|
72
|
+
* "X-Custom-Header": "custom-value"
|
|
73
|
+
* });
|
|
74
|
+
* const headers = provider.getHeaders();
|
|
75
|
+
* // Returns: {authorization: 'Bearer my-token', 'X-Custom-Header': 'custom-value'}
|
|
76
|
+
* ```
|
|
77
|
+
*/
|
|
78
|
+
export declare class StaticHeaderProvider extends HeaderProvider {
|
|
79
|
+
private _headers;
|
|
80
|
+
/**
|
|
81
|
+
* Initialize with static headers.
|
|
82
|
+
* @param headers - Headers to return for every request.
|
|
83
|
+
*/
|
|
84
|
+
constructor(headers: Record<string, string>);
|
|
85
|
+
/**
|
|
86
|
+
* Return the static headers.
|
|
87
|
+
* @returns Copy of the static headers.
|
|
88
|
+
*/
|
|
89
|
+
getHeaders(): Record<string, string>;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Token response from OAuth provider.
|
|
93
|
+
* @public
|
|
94
|
+
*/
|
|
95
|
+
export interface TokenResponse {
|
|
96
|
+
accessToken: string;
|
|
97
|
+
expiresIn?: number;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Example implementation: OAuth token provider with automatic refresh.
|
|
101
|
+
*
|
|
102
|
+
* This is an example implementation showing how to manage OAuth tokens
|
|
103
|
+
* with automatic refresh when they expire.
|
|
104
|
+
*
|
|
105
|
+
* @example
|
|
106
|
+
* ```typescript
|
|
107
|
+
* async function fetchToken(): Promise<TokenResponse> {
|
|
108
|
+
* const response = await fetch("https://oauth.example.com/token", {
|
|
109
|
+
* method: "POST",
|
|
110
|
+
* body: JSON.stringify({
|
|
111
|
+
* grant_type: "client_credentials",
|
|
112
|
+
* client_id: "your-client-id",
|
|
113
|
+
* client_secret: "your-client-secret"
|
|
114
|
+
* }),
|
|
115
|
+
* headers: { "Content-Type": "application/json" }
|
|
116
|
+
* });
|
|
117
|
+
* const data = await response.json();
|
|
118
|
+
* return {
|
|
119
|
+
* accessToken: data.access_token,
|
|
120
|
+
* expiresIn: data.expires_in
|
|
121
|
+
* };
|
|
122
|
+
* }
|
|
123
|
+
*
|
|
124
|
+
* const provider = new OAuthHeaderProvider(fetchToken);
|
|
125
|
+
* const headers = provider.getHeaders();
|
|
126
|
+
* // Returns: {"authorization": "Bearer <your-token>"}
|
|
127
|
+
* ```
|
|
128
|
+
*/
|
|
129
|
+
export declare class OAuthHeaderProvider extends HeaderProvider {
|
|
130
|
+
private _tokenFetcher;
|
|
131
|
+
private _refreshBufferSeconds;
|
|
132
|
+
private _currentToken;
|
|
133
|
+
private _tokenExpiresAt;
|
|
134
|
+
private _refreshPromise;
|
|
135
|
+
/**
|
|
136
|
+
* Initialize the OAuth provider.
|
|
137
|
+
* @param tokenFetcher - Function to fetch new tokens. Should return object with 'accessToken' and optionally 'expiresIn'.
|
|
138
|
+
* @param refreshBufferSeconds - Seconds before expiry to refresh token. Default 300 (5 minutes).
|
|
139
|
+
*/
|
|
140
|
+
constructor(tokenFetcher: () => Promise<TokenResponse> | TokenResponse, refreshBufferSeconds?: number);
|
|
141
|
+
/**
|
|
142
|
+
* Check if token needs refresh.
|
|
143
|
+
*/
|
|
144
|
+
private _needsRefresh;
|
|
145
|
+
/**
|
|
146
|
+
* Refresh the token if it's expired or close to expiring.
|
|
147
|
+
*/
|
|
148
|
+
private _refreshTokenIfNeeded;
|
|
149
|
+
/**
|
|
150
|
+
* Get OAuth headers, refreshing token if needed.
|
|
151
|
+
* Note: This is synchronous for now as the Rust implementation expects sync.
|
|
152
|
+
* In a real implementation, this would need to handle async properly.
|
|
153
|
+
* @returns Headers with Bearer token authorization.
|
|
154
|
+
* @throws If unable to fetch or refresh token.
|
|
155
|
+
*/
|
|
156
|
+
getHeaders(): Record<string, string>;
|
|
157
|
+
/**
|
|
158
|
+
* Manually refresh the token.
|
|
159
|
+
* Call this before using getHeaders() to ensure token is available.
|
|
160
|
+
*/
|
|
161
|
+
refreshToken(): Promise<void>;
|
|
162
|
+
}
|
package/dist/header.js
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.OAuthHeaderProvider = exports.StaticHeaderProvider = exports.HeaderProvider = void 0;
|
|
6
|
+
/**
|
|
7
|
+
* Header providers for LanceDB remote connections.
|
|
8
|
+
*
|
|
9
|
+
* This module provides a flexible header management framework for LanceDB remote
|
|
10
|
+
* connections, allowing users to implement custom header strategies for
|
|
11
|
+
* authentication, request tracking, custom metadata, or any other header-based
|
|
12
|
+
* requirements.
|
|
13
|
+
*
|
|
14
|
+
* @module header
|
|
15
|
+
*/
|
|
16
|
+
/**
|
|
17
|
+
* Abstract base class for providing custom headers for each request.
|
|
18
|
+
*
|
|
19
|
+
* Users can implement this interface to provide dynamic headers for various purposes
|
|
20
|
+
* such as authentication (OAuth tokens, API keys), request tracking (correlation IDs),
|
|
21
|
+
* custom metadata, or any other header-based requirements. The provider is called
|
|
22
|
+
* before each request to ensure fresh header values are always used.
|
|
23
|
+
*
|
|
24
|
+
* @example
|
|
25
|
+
* Simple JWT token provider:
|
|
26
|
+
* ```typescript
|
|
27
|
+
* class JWTProvider extends HeaderProvider {
|
|
28
|
+
* constructor(private token: string) {
|
|
29
|
+
* super();
|
|
30
|
+
* }
|
|
31
|
+
*
|
|
32
|
+
* getHeaders(): Record<string, string> {
|
|
33
|
+
* return { authorization: `Bearer ${this.token}` };
|
|
34
|
+
* }
|
|
35
|
+
* }
|
|
36
|
+
* ```
|
|
37
|
+
*
|
|
38
|
+
* @example
|
|
39
|
+
* Provider with request tracking:
|
|
40
|
+
* ```typescript
|
|
41
|
+
* class RequestTrackingProvider extends HeaderProvider {
|
|
42
|
+
* constructor(private sessionId: string) {
|
|
43
|
+
* super();
|
|
44
|
+
* }
|
|
45
|
+
*
|
|
46
|
+
* getHeaders(): Record<string, string> {
|
|
47
|
+
* return {
|
|
48
|
+
* "X-Session-Id": this.sessionId,
|
|
49
|
+
* "X-Request-Id": `req-${Date.now()}`
|
|
50
|
+
* };
|
|
51
|
+
* }
|
|
52
|
+
* }
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
class HeaderProvider {
|
|
56
|
+
}
|
|
57
|
+
exports.HeaderProvider = HeaderProvider;
|
|
58
|
+
/**
|
|
59
|
+
* Example implementation: A simple header provider that returns static headers.
|
|
60
|
+
*
|
|
61
|
+
* This is an example implementation showing how to create a HeaderProvider
|
|
62
|
+
* for cases where headers don't change during the session.
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* ```typescript
|
|
66
|
+
* const provider = new StaticHeaderProvider({
|
|
67
|
+
* authorization: "Bearer my-token",
|
|
68
|
+
* "X-Custom-Header": "custom-value"
|
|
69
|
+
* });
|
|
70
|
+
* const headers = provider.getHeaders();
|
|
71
|
+
* // Returns: {authorization: 'Bearer my-token', 'X-Custom-Header': 'custom-value'}
|
|
72
|
+
* ```
|
|
73
|
+
*/
|
|
74
|
+
class StaticHeaderProvider extends HeaderProvider {
|
|
75
|
+
_headers;
|
|
76
|
+
/**
|
|
77
|
+
* Initialize with static headers.
|
|
78
|
+
* @param headers - Headers to return for every request.
|
|
79
|
+
*/
|
|
80
|
+
constructor(headers) {
|
|
81
|
+
super();
|
|
82
|
+
this._headers = { ...headers };
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Return the static headers.
|
|
86
|
+
* @returns Copy of the static headers.
|
|
87
|
+
*/
|
|
88
|
+
getHeaders() {
|
|
89
|
+
return { ...this._headers };
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
exports.StaticHeaderProvider = StaticHeaderProvider;
|
|
93
|
+
/**
|
|
94
|
+
* Example implementation: OAuth token provider with automatic refresh.
|
|
95
|
+
*
|
|
96
|
+
* This is an example implementation showing how to manage OAuth tokens
|
|
97
|
+
* with automatic refresh when they expire.
|
|
98
|
+
*
|
|
99
|
+
* @example
|
|
100
|
+
* ```typescript
|
|
101
|
+
* async function fetchToken(): Promise<TokenResponse> {
|
|
102
|
+
* const response = await fetch("https://oauth.example.com/token", {
|
|
103
|
+
* method: "POST",
|
|
104
|
+
* body: JSON.stringify({
|
|
105
|
+
* grant_type: "client_credentials",
|
|
106
|
+
* client_id: "your-client-id",
|
|
107
|
+
* client_secret: "your-client-secret"
|
|
108
|
+
* }),
|
|
109
|
+
* headers: { "Content-Type": "application/json" }
|
|
110
|
+
* });
|
|
111
|
+
* const data = await response.json();
|
|
112
|
+
* return {
|
|
113
|
+
* accessToken: data.access_token,
|
|
114
|
+
* expiresIn: data.expires_in
|
|
115
|
+
* };
|
|
116
|
+
* }
|
|
117
|
+
*
|
|
118
|
+
* const provider = new OAuthHeaderProvider(fetchToken);
|
|
119
|
+
* const headers = provider.getHeaders();
|
|
120
|
+
* // Returns: {"authorization": "Bearer <your-token>"}
|
|
121
|
+
* ```
|
|
122
|
+
*/
|
|
123
|
+
class OAuthHeaderProvider extends HeaderProvider {
|
|
124
|
+
_tokenFetcher;
|
|
125
|
+
_refreshBufferSeconds;
|
|
126
|
+
_currentToken = null;
|
|
127
|
+
_tokenExpiresAt = null;
|
|
128
|
+
_refreshPromise = null;
|
|
129
|
+
/**
|
|
130
|
+
* Initialize the OAuth provider.
|
|
131
|
+
* @param tokenFetcher - Function to fetch new tokens. Should return object with 'accessToken' and optionally 'expiresIn'.
|
|
132
|
+
* @param refreshBufferSeconds - Seconds before expiry to refresh token. Default 300 (5 minutes).
|
|
133
|
+
*/
|
|
134
|
+
constructor(tokenFetcher, refreshBufferSeconds = 300) {
|
|
135
|
+
super();
|
|
136
|
+
this._tokenFetcher = tokenFetcher;
|
|
137
|
+
this._refreshBufferSeconds = refreshBufferSeconds;
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Check if token needs refresh.
|
|
141
|
+
*/
|
|
142
|
+
_needsRefresh() {
|
|
143
|
+
if (this._currentToken === null) {
|
|
144
|
+
return true;
|
|
145
|
+
}
|
|
146
|
+
if (this._tokenExpiresAt === null) {
|
|
147
|
+
// No expiration info, assume token is valid
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
150
|
+
// Refresh if we're within the buffer time of expiration
|
|
151
|
+
const now = Date.now() / 1000;
|
|
152
|
+
return now >= this._tokenExpiresAt - this._refreshBufferSeconds;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Refresh the token if it's expired or close to expiring.
|
|
156
|
+
*/
|
|
157
|
+
async _refreshTokenIfNeeded() {
|
|
158
|
+
if (!this._needsRefresh()) {
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
// If refresh is already in progress, wait for it
|
|
162
|
+
if (this._refreshPromise) {
|
|
163
|
+
await this._refreshPromise;
|
|
164
|
+
return;
|
|
165
|
+
}
|
|
166
|
+
// Start refresh
|
|
167
|
+
this._refreshPromise = (async () => {
|
|
168
|
+
try {
|
|
169
|
+
const tokenData = await this._tokenFetcher();
|
|
170
|
+
this._currentToken = tokenData.accessToken;
|
|
171
|
+
if (!this._currentToken) {
|
|
172
|
+
throw new Error("Token fetcher did not return 'accessToken'");
|
|
173
|
+
}
|
|
174
|
+
// Set expiration if provided
|
|
175
|
+
if (tokenData.expiresIn) {
|
|
176
|
+
this._tokenExpiresAt = Date.now() / 1000 + tokenData.expiresIn;
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
// Token doesn't expire or expiration unknown
|
|
180
|
+
this._tokenExpiresAt = null;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
finally {
|
|
184
|
+
this._refreshPromise = null;
|
|
185
|
+
}
|
|
186
|
+
})();
|
|
187
|
+
await this._refreshPromise;
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Get OAuth headers, refreshing token if needed.
|
|
191
|
+
* Note: This is synchronous for now as the Rust implementation expects sync.
|
|
192
|
+
* In a real implementation, this would need to handle async properly.
|
|
193
|
+
* @returns Headers with Bearer token authorization.
|
|
194
|
+
* @throws If unable to fetch or refresh token.
|
|
195
|
+
*/
|
|
196
|
+
getHeaders() {
|
|
197
|
+
// For simplicity in this example, we assume the token is already fetched
|
|
198
|
+
// In a real implementation, this would need to handle the async nature properly
|
|
199
|
+
if (!this._currentToken && !this._refreshPromise) {
|
|
200
|
+
// Synchronously trigger refresh - this is a limitation of the current implementation
|
|
201
|
+
throw new Error("Token not initialized. Call refreshToken() first or use async initialization.");
|
|
202
|
+
}
|
|
203
|
+
if (!this._currentToken) {
|
|
204
|
+
throw new Error("Failed to obtain OAuth token");
|
|
205
|
+
}
|
|
206
|
+
return { authorization: `Bearer ${this._currentToken}` };
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Manually refresh the token.
|
|
210
|
+
* Call this before using getHeaders() to ensure token is available.
|
|
211
|
+
*/
|
|
212
|
+
async refreshToken() {
|
|
213
|
+
this._currentToken = null; // Force refresh
|
|
214
|
+
await this._refreshTokenIfNeeded();
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
exports.OAuthHeaderProvider = OAuthHeaderProvider;
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { Connection } from "./connection";
|
|
2
|
+
import { ConnectionOptions, Session } from "./native.js";
|
|
3
|
+
import { HeaderProvider } from "./header";
|
|
4
|
+
export { JsHeaderProvider as NativeJsHeaderProvider } from "./native.js";
|
|
5
|
+
export { AddColumnsSql, ConnectionOptions, IndexStatistics, IndexConfig, ClientConfig, TimeoutConfig, RetryConfig, TlsConfig, OptimizeStats, CompactionStats, RemovalStats, TableStatistics, FragmentStatistics, FragmentSummaryStats, Tags, TagContents, MergeResult, AddResult, AddColumnsResult, AlterColumnsResult, DeleteResult, DropColumnsResult, UpdateResult, SplitCalculatedOptions, SplitRandomOptions, SplitHashOptions, SplitSequentialOptions, ShuffleOptions, } from "./native.js";
|
|
6
|
+
export { makeArrowTable, MakeArrowTableOptions, Data, VectorColumnOptions, } from "./arrow";
|
|
7
|
+
export { Connection, CreateTableOptions, TableNamesOptions, OpenTableOptions, } from "./connection";
|
|
8
|
+
export { Session } from "./native.js";
|
|
9
|
+
export { ExecutableQuery, Query, QueryBase, VectorQuery, TakeQuery, QueryExecutionOptions, FullTextSearchOptions, RecordBatchIterator, FullTextQuery, MatchQuery, PhraseQuery, BoostQuery, MultiMatchQuery, BooleanQuery, FullTextQueryType, Operator, Occur, } from "./query";
|
|
10
|
+
export { Index, IndexOptions, IvfPqOptions, IvfRqOptions, IvfFlatOptions, HnswPqOptions, HnswSqOptions, FtsOptions, } from "./indices";
|
|
11
|
+
export { Table, AddDataOptions, UpdateOptions, OptimizeOptions, Version, ColumnAlteration, } from "./table";
|
|
12
|
+
export { HeaderProvider, StaticHeaderProvider, OAuthHeaderProvider, TokenResponse, } from "./header";
|
|
13
|
+
export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";
|
|
14
|
+
export * as embedding from "./embedding";
|
|
15
|
+
export { permutationBuilder, PermutationBuilder } from "./permutation";
|
|
16
|
+
export * as rerankers from "./rerankers";
|
|
17
|
+
export { SchemaLike, TableLike, FieldLike, RecordBatchLike, DataLike, IntoVector, MultiVector, } from "./arrow";
|
|
18
|
+
export { IntoSql, packBits } from "./util";
|
|
19
|
+
/**
|
|
20
|
+
* Connect to a LanceDB instance at the given URI.
|
|
21
|
+
*
|
|
22
|
+
* Accepted formats:
|
|
23
|
+
*
|
|
24
|
+
* - `/path/to/database` - local database
|
|
25
|
+
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
26
|
+
* - `db://host:port` - remote database (LanceDB cloud)
|
|
27
|
+
* @param {string} uri - The uri of the database. If the database uri starts
|
|
28
|
+
* with `db://` then it connects to a remote database.
|
|
29
|
+
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
30
|
+
* @param options - The options to use when connecting to the database
|
|
31
|
+
* @example
|
|
32
|
+
* ```ts
|
|
33
|
+
* const conn = await connect("/path/to/database");
|
|
34
|
+
* ```
|
|
35
|
+
* @example
|
|
36
|
+
* ```ts
|
|
37
|
+
* const conn = await connect(
|
|
38
|
+
* "s3://bucket/path/to/database",
|
|
39
|
+
* {storageOptions: {timeout: "60s"}
|
|
40
|
+
* });
|
|
41
|
+
* ```
|
|
42
|
+
* @example
|
|
43
|
+
* Using with a header provider for per-request authentication:
|
|
44
|
+
* ```ts
|
|
45
|
+
* const provider = new StaticHeaderProvider({
|
|
46
|
+
* "X-API-Key": "my-key"
|
|
47
|
+
* });
|
|
48
|
+
* const conn = await connectWithHeaderProvider(
|
|
49
|
+
* "db://host:port",
|
|
50
|
+
* options,
|
|
51
|
+
* provider
|
|
52
|
+
* );
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export declare function connect(uri: string, options?: Partial<ConnectionOptions>, session?: Session, headerProvider?: HeaderProvider | (() => Record<string, string>) | (() => Promise<Record<string, string>>)): Promise<Connection>;
|
|
56
|
+
/**
|
|
57
|
+
* Connect to a LanceDB instance at the given URI.
|
|
58
|
+
*
|
|
59
|
+
* Accepted formats:
|
|
60
|
+
*
|
|
61
|
+
* - `/path/to/database` - local database
|
|
62
|
+
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
|
63
|
+
* - `db://host:port` - remote database (LanceDB cloud)
|
|
64
|
+
* @param options - The options to use when connecting to the database
|
|
65
|
+
* @see {@link ConnectionOptions} for more details on the URI format.
|
|
66
|
+
* @example
|
|
67
|
+
* ```ts
|
|
68
|
+
* const conn = await connect({
|
|
69
|
+
* uri: "/path/to/database",
|
|
70
|
+
* storageOptions: {timeout: "60s"}
|
|
71
|
+
* });
|
|
72
|
+
* ```
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* ```ts
|
|
76
|
+
* const session = Session.default();
|
|
77
|
+
* const conn = await connect({
|
|
78
|
+
* uri: "/path/to/database",
|
|
79
|
+
* session: session
|
|
80
|
+
* });
|
|
81
|
+
* ```
|
|
82
|
+
*/
|
|
83
|
+
export declare function connect(options: Partial<ConnectionOptions> & {
|
|
84
|
+
uri: string;
|
|
85
|
+
}): Promise<Connection>;
|