npm - @lancedb/lancedb - Versions diffs - 0.4.3 → 0.4.15 - Mend

@lancedb/lancedb 0.4.3 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/README.md +35 -3
package/dist/arrow.d.ts +189 -0
package/dist/arrow.js +539 -0
package/dist/connection.d.ts +97 -0
package/dist/connection.js +126 -0
package/dist/embedding/embedding_function.d.ts +45 -0
package/dist/embedding/embedding_function.js +27 -0
package/dist/embedding/index.d.ts +2 -0
package/dist/embedding/index.js +7 -0
package/dist/embedding/openai.d.ts +8 -0
package/dist/embedding/openai.js +53 -0
package/dist/index.d.ts +22 -0
package/dist/index.js +52 -0
package/dist/indices.d.ts +165 -0
package/dist/indices.js +71 -0
package/dist/native.d.ts +147 -0
package/dist/native.js +314 -0
package/dist/query.d.ts +248 -0
package/dist/query.js +346 -0
package/dist/sanitize.d.ts +9 -0
package/dist/sanitize.js +369 -0
package/dist/table.d.ts +252 -0
package/dist/table.js +298 -0
package/nodejs-artifacts/arrow.d.ts +189 -0
package/nodejs-artifacts/arrow.js +539 -0
package/nodejs-artifacts/connection.d.ts +97 -0
package/nodejs-artifacts/connection.js +126 -0
package/nodejs-artifacts/embedding/embedding_function.d.ts +45 -0
package/nodejs-artifacts/embedding/embedding_function.js +27 -0
package/nodejs-artifacts/embedding/index.d.ts +2 -0
package/nodejs-artifacts/embedding/index.js +7 -0
package/nodejs-artifacts/embedding/openai.d.ts +8 -0
package/nodejs-artifacts/embedding/openai.js +53 -0
package/nodejs-artifacts/index.d.ts +22 -0
package/nodejs-artifacts/index.js +52 -0
package/nodejs-artifacts/indices.d.ts +165 -0
package/nodejs-artifacts/indices.js +71 -0
package/nodejs-artifacts/native.d.ts +147 -0
package/nodejs-artifacts/native.js +314 -0
package/nodejs-artifacts/query.d.ts +248 -0
package/nodejs-artifacts/query.js +346 -0
package/nodejs-artifacts/sanitize.d.ts +9 -0
package/nodejs-artifacts/sanitize.js +369 -0
package/nodejs-artifacts/table.d.ts +252 -0
package/nodejs-artifacts/table.js +298 -0
package/package.json +9 -11
package/typedoc.json +10 -0
package/examples/js/index.mjs +0 -40
package/examples/js/package.json +0 -14
package/examples/js-openai/index.mjs +0 -43
package/examples/js-openai/package-lock.json +0 -256
package/examples/js-openai/package.json +0 -15
package/examples/js-transformers/index.mjs +0 -65
package/examples/js-transformers/package-lock.json +0 -1418
package/examples/js-transformers/package.json +0 -15
package/examples/js-youtube-transcripts/index.mjs +0 -135
package/examples/js-youtube-transcripts/package.json +0 -15
package/examples/ts/data/sample-lancedb/vectors.lance/_latest.manifest +0 -0
package/examples/ts/data/sample-lancedb/vectors.lance/_transactions/0-adde4e05-fcfc-415c-86a6-5b252cb9e79a.txn +0 -0
package/examples/ts/data/sample-lancedb/vectors.lance/_versions/1.manifest +0 -0
package/examples/ts/data/sample-lancedb/vectors.lance/data/3618b33e-3eea-4b5e-a0fc-7d1f718d551e.lance +0 -0
package/examples/ts/package-lock.json +0 -1340
package/examples/ts/package.json +0 -22
package/examples/ts/tsconfig.json +0 -10

package/dist/table.js ADDED Viewed

@@ -0,0 +1,298 @@
+"use strict";
+// Copyright 2024 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.Table = void 0;
+const apache_arrow_1 = require("apache-arrow");
+const query_1 = require("./query");
+const arrow_1 = require("./arrow");
+/**
+ * A Table is a collection of Records in a LanceDB Database.
+ *
+ * A Table object is expected to be long lived and reused for multiple operations.
+ * Table objects will cache a certain amount of index data in memory.  This cache
+ * will be freed when the Table is garbage collected.  To eagerly free the cache you
+ * can call the `close` method.  Once the Table is closed, it cannot be used for any
+ * further operations.
+ *
+ * Closing a table is optional.  It not closed, it will be closed when it is garbage
+ * collected.
+ */
+class Table {
+    inner;
+    /** Construct a Table. Internal use only. */
+    constructor(inner) {
+        this.inner = inner;
+    }
+    /** Return true if the table has not been closed */
+    isOpen() {
+        return this.inner.isOpen();
+    }
+    /**
+     * Close the table, releasing any underlying resources.
+     *
+     * It is safe to call this method multiple times.
+     *
+     * Any attempt to use the table after it is closed will result in an error.
+     */
+    close() {
+        this.inner.close();
+    }
+    /** Return a brief description of the table */
+    display() {
+        return this.inner.display();
+    }
+    /** Get the schema of the table. */
+    async schema() {
+        const schemaBuf = await this.inner.schema();
+        const tbl = (0, apache_arrow_1.tableFromIPC)(schemaBuf);
+        return tbl.schema;
+    }
+    /**
+     * Insert records into this Table.
+     * @param {Data} data Records to be inserted into the Table
+     */
+    async add(data, options) {
+        const mode = options?.mode ?? "append";
+        const buffer = await (0, arrow_1.fromDataToBuffer)(data);
+        await this.inner.add(buffer, mode);
+    }
+    /**
+     * Update existing records in the Table
+     *
+     * An update operation can be used to adjust existing values.  Use the
+     * returned builder to specify which columns to update.  The new value
+     * can be a literal value (e.g. replacing nulls with some default value)
+     * or an expression applied to the old value (e.g. incrementing a value)
+     *
+     * An optional condition can be specified (e.g. "only update if the old
+     * value is 0")
+     *
+     * Note: if your condition is something like "some_id_column == 7" and
+     * you are updating many rows (with different ids) then you will get
+     * better performance with a single [`merge_insert`] call instead of
+     * repeatedly calilng this method.
+     * @param {Map<string, string> | Record<string, string>} updates - the
+     * columns to update
+     *
+     * Keys in the map should specify the name of the column to update.
+     * Values in the map provide the new value of the column.  These can
+     * be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
+     * based on the row being updated (e.g. "my_col + 1")
+     * @param {Partial<UpdateOptions>} options - additional options to control
+     * the update behavior
+     */
+    async update(updates, options) {
+        const onlyIf = options?.where;
+        let columns;
+        if (updates instanceof Map) {
+            columns = Array.from(updates.entries());
+        }
+        else {
+            columns = Object.entries(updates);
+        }
+        await this.inner.update(onlyIf, columns);
+    }
+    /** Count the total number of rows in the dataset. */
+    async countRows(filter) {
+        return await this.inner.countRows(filter);
+    }
+    /** Delete the rows that satisfy the predicate. */
+    async delete(predicate) {
+        await this.inner.delete(predicate);
+    }
+    /**
+     * Create an index to speed up queries.
+     *
+     * Indices can be created on vector columns or scalar columns.
+     * Indices on vector columns will speed up vector searches.
+     * Indices on scalar columns will speed up filtering (in both
+     * vector and non-vector searches)
+     * @example
+     * // If the column has a vector (fixed size list) data type then
+     * // an IvfPq vector index will be created.
+     * const table = await conn.openTable("my_table");
+     * await table.createIndex(["vector"]);
+     * @example
+     * // For advanced control over vector index creation you can specify
+     * // the index type and options.
+     * const table = await conn.openTable("my_table");
+     * await table.createIndex(["vector"], I)
+     *   .ivf_pq({ num_partitions: 128, num_sub_vectors: 16 })
+     *   .build();
+     * @example
+     * // Or create a Scalar index
+     * await table.createIndex("my_float_col").build();
+     */
+    async createIndex(column, options) {
+        // Bit of a hack to get around the fact that TS has no package-scope.
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const nativeIndex = options?.config?.inner;
+        await this.inner.createIndex(nativeIndex, column, options?.replace);
+    }
+    /**
+     * Create a {@link Query} Builder.
+     *
+     * Queries allow you to search your existing data.  By default the query will
+     * return all the data in the table in no particular order.  The builder
+     * returned by this method can be used to control the query using filtering,
+     * vector similarity, sorting, and more.
+     *
+     * Note: By default, all columns are returned.  For best performance, you should
+     * only fetch the columns you need.  See [`Query::select_with_projection`] for
+     * more details.
+     *
+     * When appropriate, various indices and statistics based pruning will be used to
+     * accelerate the query.
+     * @example
+     * // SQL-style filtering
+     * //
+     * // This query will return up to 1000 rows whose value in the `id` column
+     * // is greater than 5.  LanceDb supports a broad set of filtering functions.
+     * for await (const batch of table.query()
+     *                          .filter("id > 1").select(["id"]).limit(20)) {
+     *  console.log(batch);
+     * }
+     * @example
+     * // Vector Similarity Search
+     * //
+     * // This example will find the 10 rows whose value in the "vector" column are
+     * // closest to the query vector [1.0, 2.0, 3.0].  If an index has been created
+     * // on the "vector" column then this will perform an ANN search.
+     * //
+     * // The `refine_factor` and `nprobes` methods are used to control the recall /
+     * // latency tradeoff of the search.
+     * for await (const batch of table.query()
+     *                    .nearestTo([1, 2, 3])
+     *                    .refineFactor(5).nprobe(10)
+     *                    .limit(10)) {
+     *  console.log(batch);
+     * }
+     * @example
+     * // Scan the full dataset
+     * //
+     * // This query will return everything in the table in no particular order.
+     * for await (const batch of table.query()) {
+     *   console.log(batch);
+     * }
+     * @returns {Query} A builder that can be used to parameterize the query
+     */
+    query() {
+        return new query_1.Query(this.inner);
+    }
+    /**
+     * Search the table with a given query vector.
+     *
+     * This is a convenience method for preparing a vector query and
+     * is the same thing as calling `nearestTo` on the builder returned
+     * by `query`.  @see {@link Query#nearestTo} for more details.
+     */
+    vectorSearch(vector) {
+        return this.query().nearestTo(vector);
+    }
+    // TODO: Support BatchUDF
+    /**
+     * Add new columns with defined values.
+     * @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
+     * the SQL expression to use to calculate the value of the new column. These
+     * expressions will be evaluated for each row in the table, and can
+     * reference existing columns in the table.
+     */
+    async addColumns(newColumnTransforms) {
+        await this.inner.addColumns(newColumnTransforms);
+    }
+    /**
+     * Alter the name or nullability of columns.
+     * @param {ColumnAlteration[]} columnAlterations One or more alterations to
+     * apply to columns.
+     */
+    async alterColumns(columnAlterations) {
+        await this.inner.alterColumns(columnAlterations);
+    }
+    /**
+     * Drop one or more columns from the dataset
+     *
+     * This is a metadata-only operation and does not remove the data from the
+     * underlying storage. In order to remove the data, you must subsequently
+     * call ``compact_files`` to rewrite the data without the removed columns and
+     * then call ``cleanup_files`` to remove the old files.
+     * @param {string[]} columnNames The names of the columns to drop. These can
+     * be nested column references (e.g. "a.b.c") or top-level column names
+     * (e.g. "a").
+     */
+    async dropColumns(columnNames) {
+        await this.inner.dropColumns(columnNames);
+    }
+    /**
+     * Retrieve the version of the table
+     *
+     * LanceDb supports versioning.  Every operation that modifies the table increases
+     * version.  As long as a version hasn't been deleted you can `[Self::checkout]` that
+     * version to view the data at that point.  In addition, you can `[Self::restore]` the
+     * version to replace the current table with a previous version.
+     */
+    async version() {
+        return await this.inner.version();
+    }
+    /**
+     * Checks out a specific version of the Table
+     *
+     * Any read operation on the table will now access the data at the checked out version.
+     * As a consequence, calling this method will disable any read consistency interval
+     * that was previously set.
+     *
+     * This is a read-only operation that turns the table into a sort of "view"
+     * or "detached head".  Other table instances will not be affected.  To make the change
+     * permanent you can use the `[Self::restore]` method.
+     *
+     * Any operation that modifies the table will fail while the table is in a checked
+     * out state.
+     *
+     * To return the table to a normal state use `[Self::checkout_latest]`
+     */
+    async checkout(version) {
+        await this.inner.checkout(version);
+    }
+    /**
+     * Ensures the table is pointing at the latest version
+     *
+     * This can be used to manually update a table when the read_consistency_interval is None
+     * It can also be used to undo a `[Self::checkout]` operation
+     */
+    async checkoutLatest() {
+        await this.inner.checkoutLatest();
+    }
+    /**
+     * Restore the table to the currently checked out version
+     *
+     * This operation will fail if checkout has not been called previously
+     *
+     * This operation will overwrite the latest version of the table with a
+     * previous version.  Any changes made since the checked out version will
+     * no longer be visible.
+     *
+     * Once the operation concludes the table will no longer be in a checked
+     * out state and the read_consistency_interval, if any, will apply.
+     */
+    async restore() {
+        await this.inner.restore();
+    }
+    /**
+     * List all indices that have been created with Self::create_index
+     */
+    async listIndices() {
+        return await this.inner.listIndices();
+    }
+}
+exports.Table = Table;

package/nodejs-artifacts/arrow.d.ts ADDED Viewed

@@ -0,0 +1,189 @@
+/// <reference types="node" />
+import { type Schema, Table as ArrowTable, type Float } from "apache-arrow";
+import { type EmbeddingFunction } from "./embedding/embedding_function";
+/** Data type accepted by NodeJS SDK */
+export type Data = Record<string, unknown>[] | ArrowTable;
+export declare class VectorColumnOptions {
+    /** Vector column type. */
+    type: Float;
+    constructor(values?: Partial<VectorColumnOptions>);
+}
+/** Options to control the makeArrowTable call. */
+export declare class MakeArrowTableOptions {
+    schema?: Schema;
+    vectorColumns: Record<string, VectorColumnOptions>;
+    /**
+     * If true then string columns will be encoded with dictionary encoding
+     *
+     * Set this to true if your string columns tend to repeat the same values
+     * often.  For more precise control use the `schema` property to specify the
+     * data type for individual columns.
+     *
+     * If `schema` is provided then this property is ignored.
+     */
+    dictionaryEncodeStrings: boolean;
+    constructor(values?: Partial<MakeArrowTableOptions>);
+}
+/**
+ * An enhanced version of the {@link makeTable} function from Apache Arrow
+ * that supports nested fields and embeddings columns.
+ *
+ * (typically you do not need to call this function.  It will be called automatically
+ * when creating a table or adding data to it)
+ *
+ * This function converts an array of Record<String, any> (row-major JS objects)
+ * to an Arrow Table (a columnar structure)
+ *
+ * Note that it currently does not support nulls.
+ *
+ * If a schema is provided then it will be used to determine the resulting array
+ * types.  Fields will also be reordered to fit the order defined by the schema.
+ *
+ * If a schema is not provided then the types will be inferred and the field order
+ * will be controlled by the order of properties in the first record.  If a type
+ * is inferred it will always be nullable.
+ *
+ * If the input is empty then a schema must be provided to create an empty table.
+ *
+ * When a schema is not specified then data types will be inferred.  The inference
+ * rules are as follows:
+ *
+ *  - boolean => Bool
+ *  - number => Float64
+ *  - String => Utf8
+ *  - Buffer => Binary
+ *  - Record<String, any> => Struct
+ *  - Array<any> => List
+ * @example
+ * import { fromTableToBuffer, makeArrowTable } from "../arrow";
+ * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
+ *
+ * const schema = new Schema([
+ *   new Field("a", new Int32()),
+ *   new Field("b", new Float32()),
+ *   new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
+ *  ]);
+ *  const table = makeArrowTable([
+ *    { a: 1, b: 2, c: [1, 2, 3] },
+ *    { a: 4, b: 5, c: [4, 5, 6] },
+ *    { a: 7, b: 8, c: [7, 8, 9] },
+ *  ], { schema });
+ * ```
+ *
+ * By default it assumes that the column named `vector` is a vector column
+ * and it will be converted into a fixed size list array of type float32.
+ * The `vectorColumns` option can be used to support other vector column
+ * names and data types.
+ *
+ * ```ts
+ *
+ * const schema = new Schema([
+    new Field("a", new Float64()),
+    new Field("b", new Float64()),
+    new Field(
+      "vector",
+      new FixedSizeList(3, new Field("item", new Float32()))
+    ),
+  ]);
+  const table = makeArrowTable([
+    { a: 1, b: 2, vector: [1, 2, 3] },
+    { a: 4, b: 5, vector: [4, 5, 6] },
+    { a: 7, b: 8, vector: [7, 8, 9] },
+  ]);
+  assert.deepEqual(table.schema, schema);
+ * ```
+ *
+ * You can specify the vector column types and names using the options as well
+ *
+ * ```typescript
+ *
+ * const schema = new Schema([
+    new Field('a', new Float64()),
+    new Field('b', new Float64()),
+    new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
+    new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
+  ]);
+ * const table = makeArrowTable([
+    { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
+    { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
+    { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
+  ], {
+    vectorColumns: {
+      vec1: { type: new Float16() },
+      vec2: { type: new Float16() }
+    }
+  }
+ * assert.deepEqual(table.schema, schema)
+ * ```
+ */
+export declare function makeArrowTable(data: Array<Record<string, unknown>>, options?: Partial<MakeArrowTableOptions>): ArrowTable;
+/**
+ * Create an empty Arrow table with the provided schema
+ */
+export declare function makeEmptyTable(schema: Schema): ArrowTable;
+/**
+ * Convert an Array of records into an Arrow Table, optionally applying an
+ * embeddings function to it.
+ *
+ * This function calls `makeArrowTable` first to create the Arrow Table.
+ * Any provided `makeTableOptions` (e.g. a schema) will be passed on to
+ * that call.
+ *
+ * The embedding function will be passed a column of values (based on the
+ * `sourceColumn` of the embedding function) and expects to receive back
+ * number[][] which will be converted into a fixed size list column.  By
+ * default this will be a fixed size list of Float32 but that can be
+ * customized by the `embeddingDataType` property of the embedding function.
+ *
+ * If a schema is provided in `makeTableOptions` then it should include the
+ * embedding columns.  If no schema is provded then embedding columns will
+ * be placed at the end of the table, after all of the input columns.
+ */
+export declare function convertToTable<T>(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>, makeTableOptions?: Partial<MakeArrowTableOptions>): Promise<ArrowTable>;
+/**
+ * Serialize an Array of records into a buffer using the Arrow IPC File serialization
+ *
+ * This function will call `convertToTable` and pass on `embeddings` and `schema`
+ *
+ * `schema` is required if data is empty
+ */
+export declare function fromRecordsToBuffer<T>(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<Buffer>;
+/**
+ * Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
+ *
+ * This function will call `convertToTable` and pass on `embeddings` and `schema`
+ *
+ * `schema` is required if data is empty
+ */
+export declare function fromRecordsToStreamBuffer<T>(data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<Buffer>;
+/**
+ * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
+ *
+ * This function will apply `embeddings` to the table in a manner similar to
+ * `convertToTable`.
+ *
+ * `schema` is required if the table is empty
+ */
+export declare function fromTableToBuffer<T>(table: ArrowTable, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<Buffer>;
+/**
+ * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
+ *
+ * This function will apply `embeddings` to the table in a manner similar to
+ * `convertToTable`.
+ *
+ * `schema` is required if the table is empty
+ */
+export declare function fromDataToBuffer<T>(data: Data, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<Buffer>;
+/**
+ * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
+ *
+ * This function will apply `embeddings` to the table in a manner similar to
+ * `convertToTable`.
+ *
+ * `schema` is required if the table is empty
+ */
+export declare function fromTableToStreamBuffer<T>(table: ArrowTable, embeddings?: EmbeddingFunction<T>, schema?: Schema): Promise<Buffer>;
+/**
+ * Create an empty table with the given schema
+ */
+export declare function createEmptyTable(schema: Schema): ArrowTable;