@lancedb/lancedb 0.14.1-beta.3 → 0.14.1-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/indices.d.ts CHANGED
@@ -314,6 +314,45 @@ export interface FtsOptions {
314
314
  * which will make the index smaller and faster to build, but will not support phrase queries.
315
315
  */
316
316
  withPosition?: boolean;
317
+ /**
318
+ * The tokenizer to use when building the index.
319
+ * The default is "simple".
320
+ *
321
+ * The following tokenizers are available:
322
+ *
323
+ * "simple" - Simple tokenizer. This tokenizer splits the text into tokens using whitespace and punctuation as a delimiter.
324
+ *
325
+ * "whitespace" - Whitespace tokenizer. This tokenizer splits the text into tokens using whitespace as a delimiter.
326
+ *
327
+ * "raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
328
+ */
329
+ baseTokenizer?: "simple" | "whitespace" | "raw";
330
+ /**
331
+ * language for stemming and stop words
332
+ * this is only used when `stem` or `remove_stop_words` is true
333
+ */
334
+ language?: string;
335
+ /**
336
+ * maximum token length
337
+ * tokens longer than this length will be ignored
338
+ */
339
+ maxTokenLength?: number;
340
+ /**
341
+ * whether to lowercase tokens
342
+ */
343
+ lowercase?: boolean;
344
+ /**
345
+ * whether to stem tokens
346
+ */
347
+ stem?: boolean;
348
+ /**
349
+ * whether to remove stop words
350
+ */
351
+ removeStopWords?: boolean;
352
+ /**
353
+ * whether to remove punctuation
354
+ */
355
+ asciiFolding?: boolean;
317
356
  }
318
357
  export declare class Index {
319
358
  private readonly inner;
package/dist/indices.js CHANGED
@@ -101,7 +101,7 @@ class Index {
101
101
  * For now, the full text search index only supports English, and doesn't support phrase search.
102
102
  */
103
103
  static fts(options) {
104
- return new Index(native_1.Index.fts(options?.withPosition));
104
+ return new Index(native_1.Index.fts(options?.withPosition, options?.baseTokenizer, options?.language, options?.maxTokenLength, options?.lowercase, options?.stem, options?.removeStopWords, options?.asciiFolding));
105
105
  }
106
106
  /**
107
107
  *
package/dist/native.d.ts CHANGED
@@ -261,7 +261,7 @@ export class Index {
261
261
  static btree(): Index
262
262
  static bitmap(): Index
263
263
  static labelList(): Index
264
- static fts(withPosition?: boolean | undefined | null): Index
264
+ static fts(withPosition?: boolean | undefined | null, baseTokenizer?: string | undefined | null, language?: string | undefined | null, maxTokenLength?: number | undefined | null, lowerCase?: boolean | undefined | null, stem?: boolean | undefined | null, removeStopWords?: boolean | undefined | null, asciiFolding?: boolean | undefined | null): Index
265
265
  static hnswPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null, m?: number | undefined | null, efConstruction?: number | undefined | null): Index
266
266
  static hnswSq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null, m?: number | undefined | null, efConstruction?: number | undefined | null): Index
267
267
  }
package/package.json CHANGED
@@ -11,7 +11,7 @@
11
11
  "ann"
12
12
  ],
13
13
  "private": false,
14
- "version": "0.14.1-beta.3",
14
+ "version": "0.14.1-beta.4",
15
15
  "main": "dist/index.js",
16
16
  "exports": {
17
17
  ".": "./dist/index.js",
@@ -98,14 +98,14 @@
98
98
  "reflect-metadata": "^0.2.2"
99
99
  },
100
100
  "optionalDependencies": {
101
- "@lancedb/lancedb-darwin-x64": "0.14.1-beta.3",
102
- "@lancedb/lancedb-darwin-arm64": "0.14.1-beta.3",
103
- "@lancedb/lancedb-linux-x64-gnu": "0.14.1-beta.3",
104
- "@lancedb/lancedb-linux-arm64-gnu": "0.14.1-beta.3",
105
- "@lancedb/lancedb-linux-x64-musl": "0.14.1-beta.3",
106
- "@lancedb/lancedb-linux-arm64-musl": "0.14.1-beta.3",
107
- "@lancedb/lancedb-win32-x64-msvc": "0.14.1-beta.3",
108
- "@lancedb/lancedb-win32-arm64-msvc": "0.14.1-beta.3"
101
+ "@lancedb/lancedb-darwin-x64": "0.14.1-beta.4",
102
+ "@lancedb/lancedb-darwin-arm64": "0.14.1-beta.4",
103
+ "@lancedb/lancedb-linux-x64-gnu": "0.14.1-beta.4",
104
+ "@lancedb/lancedb-linux-arm64-gnu": "0.14.1-beta.4",
105
+ "@lancedb/lancedb-linux-x64-musl": "0.14.1-beta.4",
106
+ "@lancedb/lancedb-linux-arm64-musl": "0.14.1-beta.4",
107
+ "@lancedb/lancedb-win32-x64-msvc": "0.14.1-beta.4",
108
+ "@lancedb/lancedb-win32-arm64-msvc": "0.14.1-beta.4"
109
109
  },
110
110
  "peerDependencies": {
111
111
  "apache-arrow": ">=15.0.0 <=18.1.0"