@lancedb/lancedb 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/biome.json +8 -2
  2. package/dist/arrow.d.ts +36 -9
  3. package/dist/arrow.js +222 -24
  4. package/dist/connection.d.ts +10 -1
  5. package/dist/connection.js +13 -7
  6. package/dist/embedding/embedding_function.d.ts +54 -28
  7. package/dist/embedding/embedding_function.js +89 -10
  8. package/dist/embedding/index.d.ts +28 -2
  9. package/dist/embedding/index.js +111 -4
  10. package/dist/embedding/openai.d.ts +16 -7
  11. package/dist/embedding/openai.js +62 -12
  12. package/dist/embedding/registry.d.ts +58 -0
  13. package/dist/embedding/registry.js +127 -0
  14. package/dist/native.d.ts +5 -4
  15. package/dist/query.d.ts +19 -7
  16. package/dist/query.js +27 -13
  17. package/dist/sanitize.d.ts +22 -1
  18. package/dist/sanitize.js +123 -110
  19. package/dist/table.d.ts +18 -3
  20. package/dist/table.js +33 -3
  21. package/lancedb/arrow.ts +243 -41
  22. package/lancedb/connection.ts +35 -6
  23. package/lancedb/embedding/embedding_function.ts +147 -42
  24. package/lancedb/embedding/index.ts +113 -2
  25. package/lancedb/embedding/openai.ts +62 -16
  26. package/lancedb/embedding/registry.ts +176 -0
  27. package/lancedb/query.ts +58 -14
  28. package/lancedb/sanitize.ts +22 -22
  29. package/lancedb/table.ts +67 -5
  30. package/nodejs-artifacts/arrow.d.ts +36 -9
  31. package/nodejs-artifacts/arrow.js +222 -24
  32. package/nodejs-artifacts/connection.d.ts +10 -1
  33. package/nodejs-artifacts/connection.js +13 -7
  34. package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
  35. package/nodejs-artifacts/embedding/embedding_function.js +89 -10
  36. package/nodejs-artifacts/embedding/index.d.ts +28 -2
  37. package/nodejs-artifacts/embedding/index.js +111 -4
  38. package/nodejs-artifacts/embedding/openai.d.ts +16 -7
  39. package/nodejs-artifacts/embedding/openai.js +62 -12
  40. package/nodejs-artifacts/embedding/registry.d.ts +58 -0
  41. package/nodejs-artifacts/embedding/registry.js +127 -0
  42. package/nodejs-artifacts/native.d.ts +5 -4
  43. package/nodejs-artifacts/query.d.ts +19 -7
  44. package/nodejs-artifacts/query.js +27 -13
  45. package/nodejs-artifacts/sanitize.d.ts +22 -1
  46. package/nodejs-artifacts/sanitize.js +123 -110
  47. package/nodejs-artifacts/table.d.ts +18 -3
  48. package/nodejs-artifacts/table.js +33 -3
  49. package/package.json +14 -9
  50. package/tsconfig.json +3 -1
package/dist/query.js CHANGED
@@ -14,7 +14,7 @@
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
16
  exports.Query = exports.VectorQuery = exports.QueryBase = exports.RecordBatchIterator = void 0;
17
- const apache_arrow_1 = require("apache-arrow");
17
+ const arrow_1 = require("./arrow");
18
18
  class RecordBatchIterator {
19
19
  promisedInner;
20
20
  inner;
@@ -34,7 +34,7 @@ class RecordBatchIterator {
34
34
  if (n == null) {
35
35
  return Promise.resolve({ done: true, value: null });
36
36
  }
37
- const tbl = (0, apache_arrow_1.tableFromIPC)(n);
37
+ const tbl = (0, arrow_1.tableFromIPC)(n);
38
38
  if (tbl.batches.length != 1) {
39
39
  throw new Error("Expected only one batch");
40
40
  }
@@ -43,6 +43,18 @@ class RecordBatchIterator {
43
43
  }
44
44
  exports.RecordBatchIterator = RecordBatchIterator;
45
45
  /* eslint-enable */
46
+ class RecordBatchIterable {
47
+ inner;
48
+ options;
49
+ constructor(inner, options) {
50
+ this.inner = inner;
51
+ this.options = options;
52
+ }
53
+ // biome-ignore lint/suspicious/noExplicitAny: skip
54
+ [Symbol.asyncIterator]() {
55
+ return new RecordBatchIterator(this.inner.execute(this.options?.maxBatchLength));
56
+ }
57
+ }
46
58
  /** Common methods supported by all query types */
47
59
  class QueryBase {
48
60
  inner;
@@ -98,6 +110,9 @@ class QueryBase {
98
110
  */
99
111
  select(columns) {
100
112
  let columnTuples;
113
+ if (typeof columns === "string") {
114
+ columns = [columns];
115
+ }
101
116
  if (Array.isArray(columns)) {
102
117
  columnTuples = columns.map((c) => [c, c]);
103
118
  }
@@ -120,8 +135,8 @@ class QueryBase {
120
135
  this.inner.limit(limit);
121
136
  return this;
122
137
  }
123
- nativeExecute() {
124
- return this.inner.execute();
138
+ nativeExecute(options) {
139
+ return this.inner.execute(options?.maxBatchLength);
125
140
  }
126
141
  /**
127
142
  * Execute the query and return the results as an @see {@link AsyncIterator}
@@ -134,8 +149,8 @@ class QueryBase {
134
149
  * single query)
135
150
  *
136
151
  */
137
- execute() {
138
- return new RecordBatchIterator(this.nativeExecute());
152
+ execute(options) {
153
+ return new RecordBatchIterator(this.nativeExecute(options));
139
154
  }
140
155
  // biome-ignore lint/suspicious/noExplicitAny: skip
141
156
  [Symbol.asyncIterator]() {
@@ -143,17 +158,17 @@ class QueryBase {
143
158
  return new RecordBatchIterator(promise);
144
159
  }
145
160
  /** Collect the results as an Arrow @see {@link ArrowTable}. */
146
- async toArrow() {
161
+ async toArrow(options) {
147
162
  const batches = [];
148
- for await (const batch of this) {
163
+ for await (const batch of new RecordBatchIterable(this.inner, options)) {
149
164
  batches.push(batch);
150
165
  }
151
- return new apache_arrow_1.Table(batches);
166
+ return new arrow_1.Table(batches);
152
167
  }
153
168
  /** Collect the results as an array of objects. */
154
- async toArray() {
155
- const tbl = await this.toArrow();
156
- // eslint-disable-next-line @typescript-eslint/no-unsafe-return
169
+ // biome-ignore lint/suspicious/noExplicitAny: arrow.toArrow() returns any[]
170
+ async toArray(options) {
171
+ const tbl = await this.toArrow(options);
157
172
  return tbl.toArray();
158
173
  }
159
174
  }
@@ -339,7 +354,6 @@ class Query extends QueryBase {
339
354
  * a default `limit` of 10 will be used. @see {@link Query#limit}
340
355
  */
341
356
  nearestTo(vector) {
342
- // biome-ignore lint/suspicious/noExplicitAny: skip
343
357
  const vectorQuery = this.inner.nearestTo(Float32Array.from(vector));
344
358
  return new VectorQuery(vectorQuery);
345
359
  }
@@ -1,4 +1,25 @@
1
- import { Schema } from "apache-arrow";
1
+ import type { TKeys } from "apache-arrow/type";
2
+ import { DataType, Date_, Decimal, DenseUnion, Dictionary, Duration, Field, FixedSizeBinary, FixedSizeList, Float, Int, Interval, List, Map_, Schema, SparseUnion, Struct, Time, Timestamp, TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond, Type, Union } from "./arrow";
3
+ export declare function sanitizeMetadata(metadataLike?: unknown): Map<string, string> | undefined;
4
+ export declare function sanitizeInt(typeLike: object): Int<Type.Int | Type.Int8 | Type.Int16 | Type.Int32 | Type.Int64 | Type.Uint8 | Type.Uint16 | Type.Uint32 | Type.Uint64>;
5
+ export declare function sanitizeFloat(typeLike: object): Float<Type.Float | Type.Float16 | Type.Float32 | Type.Float64>;
6
+ export declare function sanitizeDecimal(typeLike: object): Decimal;
7
+ export declare function sanitizeDate(typeLike: object): Date_<import("apache-arrow/type").Dates>;
8
+ export declare function sanitizeTime(typeLike: object): Time<Type.Time | Type.TimeSecond | Type.TimeMillisecond | Type.TimeMicrosecond | Type.TimeNanosecond>;
9
+ export declare function sanitizeTimestamp(typeLike: object): Timestamp<Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond>;
10
+ export declare function sanitizeTypedTimestamp(typeLike: object, Datatype: typeof TimestampNanosecond | typeof TimestampMicrosecond | typeof TimestampMillisecond | typeof TimestampSecond): TimestampSecond | TimestampMillisecond | TimestampMicrosecond | TimestampNanosecond;
11
+ export declare function sanitizeInterval(typeLike: object): Interval<Type.Interval | Type.IntervalDayTime | Type.IntervalYearMonth>;
12
+ export declare function sanitizeList(typeLike: object): List<any>;
13
+ export declare function sanitizeStruct(typeLike: object): Struct<any>;
14
+ export declare function sanitizeUnion(typeLike: object): Union<Type.Union | Type.DenseUnion | Type.SparseUnion>;
15
+ export declare function sanitizeTypedUnion(typeLike: object, UnionType: typeof DenseUnion | typeof SparseUnion): SparseUnion | DenseUnion;
16
+ export declare function sanitizeFixedSizeBinary(typeLike: object): FixedSizeBinary;
17
+ export declare function sanitizeFixedSizeList(typeLike: object): FixedSizeList<any>;
18
+ export declare function sanitizeMap(typeLike: object): Map_<any, any>;
19
+ export declare function sanitizeDuration(typeLike: object): Duration<Type.Duration | Type.DurationSecond | Type.DurationMillisecond | Type.DurationMicrosecond | Type.DurationNanosecond>;
20
+ export declare function sanitizeDictionary(typeLike: object): Dictionary<DataType<any, any>, TKeys>;
21
+ export declare function sanitizeType(typeLike: unknown): DataType<any>;
22
+ export declare function sanitizeField(fieldLike: unknown): Field;
2
23
  /**
3
24
  * Convert something schemaLike into a Schema instance
4
25
  *
package/dist/sanitize.js CHANGED
@@ -13,15 +13,8 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.sanitizeSchema = void 0;
17
- // The utilities in this file help sanitize data from the user's arrow
18
- // library into the types expected by vectordb's arrow library. Node
19
- // generally allows for mulitple versions of the same library (and sometimes
20
- // even multiple copies of the same version) to be installed at the same
21
- // time. However, arrow-js uses instanceof which expected that the input
22
- // comes from the exact same library instance. This is not always the case
23
- // and so we must sanitize the input to ensure that it is compatible.
24
- const apache_arrow_1 = require("apache-arrow");
16
+ exports.sanitizeSchema = exports.sanitizeField = exports.sanitizeType = exports.sanitizeDictionary = exports.sanitizeDuration = exports.sanitizeMap = exports.sanitizeFixedSizeList = exports.sanitizeFixedSizeBinary = exports.sanitizeTypedUnion = exports.sanitizeUnion = exports.sanitizeStruct = exports.sanitizeList = exports.sanitizeInterval = exports.sanitizeTypedTimestamp = exports.sanitizeTimestamp = exports.sanitizeTime = exports.sanitizeDate = exports.sanitizeDecimal = exports.sanitizeFloat = exports.sanitizeInt = exports.sanitizeMetadata = void 0;
17
+ const arrow_1 = require("./arrow");
25
18
  function sanitizeMetadata(metadataLike) {
26
19
  if (metadataLike === undefined || metadataLike === null) {
27
20
  return undefined;
@@ -36,6 +29,7 @@ function sanitizeMetadata(metadataLike) {
36
29
  }
37
30
  return metadataLike;
38
31
  }
32
+ exports.sanitizeMetadata = sanitizeMetadata;
39
33
  function sanitizeInt(typeLike) {
40
34
  if (!("bitWidth" in typeLike) ||
41
35
  typeof typeLike.bitWidth !== "number" ||
@@ -43,14 +37,16 @@ function sanitizeInt(typeLike) {
43
37
  typeof typeLike.isSigned !== "boolean") {
44
38
  throw Error("Expected an Int Type to have a `bitWidth` and `isSigned` property");
45
39
  }
46
- return new apache_arrow_1.Int(typeLike.isSigned, typeLike.bitWidth);
40
+ return new arrow_1.Int(typeLike.isSigned, typeLike.bitWidth);
47
41
  }
42
+ exports.sanitizeInt = sanitizeInt;
48
43
  function sanitizeFloat(typeLike) {
49
44
  if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
50
45
  throw Error("Expected a Float Type to have a `precision` property");
51
46
  }
52
- return new apache_arrow_1.Float(typeLike.precision);
47
+ return new arrow_1.Float(typeLike.precision);
53
48
  }
49
+ exports.sanitizeFloat = sanitizeFloat;
54
50
  function sanitizeDecimal(typeLike) {
55
51
  if (!("scale" in typeLike) ||
56
52
  typeof typeLike.scale !== "number" ||
@@ -60,14 +56,16 @@ function sanitizeDecimal(typeLike) {
60
56
  typeof typeLike.bitWidth !== "number") {
61
57
  throw Error("Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties");
62
58
  }
63
- return new apache_arrow_1.Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
59
+ return new arrow_1.Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
64
60
  }
61
+ exports.sanitizeDecimal = sanitizeDecimal;
65
62
  function sanitizeDate(typeLike) {
66
63
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
67
64
  throw Error("Expected a Date type to have a `unit` property");
68
65
  }
69
- return new apache_arrow_1.Date_(typeLike.unit);
66
+ return new arrow_1.Date_(typeLike.unit);
70
67
  }
68
+ exports.sanitizeDate = sanitizeDate;
71
69
  function sanitizeTime(typeLike) {
72
70
  if (!("unit" in typeLike) ||
73
71
  typeof typeLike.unit !== "number" ||
@@ -75,8 +73,9 @@ function sanitizeTime(typeLike) {
75
73
  typeof typeLike.bitWidth !== "number") {
76
74
  throw Error("Expected a Time type to have `unit` and `bitWidth` properties");
77
75
  }
78
- return new apache_arrow_1.Time(typeLike.unit, typeLike.bitWidth);
76
+ return new arrow_1.Time(typeLike.unit, typeLike.bitWidth);
79
77
  }
78
+ exports.sanitizeTime = sanitizeTime;
80
79
  function sanitizeTimestamp(typeLike) {
81
80
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
82
81
  throw Error("Expected a Timestamp type to have a `unit` property");
@@ -85,8 +84,9 @@ function sanitizeTimestamp(typeLike) {
85
84
  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
86
85
  timezone = typeLike.timezone;
87
86
  }
88
- return new apache_arrow_1.Timestamp(typeLike.unit, timezone);
87
+ return new arrow_1.Timestamp(typeLike.unit, timezone);
89
88
  }
89
+ exports.sanitizeTimestamp = sanitizeTimestamp;
90
90
  function sanitizeTypedTimestamp(typeLike,
91
91
  // eslint-disable-next-line @typescript-eslint/naming-convention
92
92
  Datatype) {
@@ -96,12 +96,14 @@ Datatype) {
96
96
  }
97
97
  return new Datatype(timezone);
98
98
  }
99
+ exports.sanitizeTypedTimestamp = sanitizeTypedTimestamp;
99
100
  function sanitizeInterval(typeLike) {
100
101
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
101
102
  throw Error("Expected an Interval type to have a `unit` property");
102
103
  }
103
- return new apache_arrow_1.Interval(typeLike.unit);
104
+ return new arrow_1.Interval(typeLike.unit);
104
105
  }
106
+ exports.sanitizeInterval = sanitizeInterval;
105
107
  function sanitizeList(typeLike) {
106
108
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
107
109
  throw Error("Expected a List type to have an array-like `children` property");
@@ -109,14 +111,16 @@ function sanitizeList(typeLike) {
109
111
  if (typeLike.children.length !== 1) {
110
112
  throw Error("Expected a List type to have exactly one child");
111
113
  }
112
- return new apache_arrow_1.List(sanitizeField(typeLike.children[0]));
114
+ return new arrow_1.List(sanitizeField(typeLike.children[0]));
113
115
  }
116
+ exports.sanitizeList = sanitizeList;
114
117
  function sanitizeStruct(typeLike) {
115
118
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
116
119
  throw Error("Expected a Struct type to have an array-like `children` property");
117
120
  }
118
- return new apache_arrow_1.Struct(typeLike.children.map((child) => sanitizeField(child)));
121
+ return new arrow_1.Struct(typeLike.children.map((child) => sanitizeField(child)));
119
122
  }
123
+ exports.sanitizeStruct = sanitizeStruct;
120
124
  function sanitizeUnion(typeLike) {
121
125
  if (!("typeIds" in typeLike) ||
122
126
  !("mode" in typeLike) ||
@@ -126,10 +130,11 @@ function sanitizeUnion(typeLike) {
126
130
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
127
131
  throw Error("Expected a Union type to have an array-like `children` property");
128
132
  }
129
- return new apache_arrow_1.Union(typeLike.mode,
133
+ return new arrow_1.Union(typeLike.mode,
130
134
  // biome-ignore lint/suspicious/noExplicitAny: skip
131
135
  typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
132
136
  }
137
+ exports.sanitizeUnion = sanitizeUnion;
133
138
  function sanitizeTypedUnion(typeLike,
134
139
  // eslint-disable-next-line @typescript-eslint/naming-convention
135
140
  UnionType) {
@@ -141,12 +146,14 @@ UnionType) {
141
146
  }
142
147
  return new UnionType(typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
143
148
  }
149
+ exports.sanitizeTypedUnion = sanitizeTypedUnion;
144
150
  function sanitizeFixedSizeBinary(typeLike) {
145
151
  if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
146
152
  throw Error("Expected a FixedSizeBinary type to have a `byteWidth` property");
147
153
  }
148
- return new apache_arrow_1.FixedSizeBinary(typeLike.byteWidth);
154
+ return new arrow_1.FixedSizeBinary(typeLike.byteWidth);
149
155
  }
156
+ exports.sanitizeFixedSizeBinary = sanitizeFixedSizeBinary;
150
157
  function sanitizeFixedSizeList(typeLike) {
151
158
  if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
152
159
  throw Error("Expected a FixedSizeList type to have a `listSize` property");
@@ -157,8 +164,9 @@ function sanitizeFixedSizeList(typeLike) {
157
164
  if (typeLike.children.length !== 1) {
158
165
  throw Error("Expected a FixedSizeList type to have exactly one child");
159
166
  }
160
- return new apache_arrow_1.FixedSizeList(typeLike.listSize, sanitizeField(typeLike.children[0]));
167
+ return new arrow_1.FixedSizeList(typeLike.listSize, sanitizeField(typeLike.children[0]));
161
168
  }
169
+ exports.sanitizeFixedSizeList = sanitizeFixedSizeList;
162
170
  function sanitizeMap(typeLike) {
163
171
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
164
172
  throw Error("Expected a Map type to have an array-like `children` property");
@@ -166,16 +174,18 @@ function sanitizeMap(typeLike) {
166
174
  if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") {
167
175
  throw Error("Expected a Map type to have a `keysSorted` property");
168
176
  }
169
- return new apache_arrow_1.Map_(
177
+ return new arrow_1.Map_(
170
178
  // biome-ignore lint/suspicious/noExplicitAny: skip
171
179
  typeLike.children.map((field) => sanitizeField(field)), typeLike.keysSorted);
172
180
  }
181
+ exports.sanitizeMap = sanitizeMap;
173
182
  function sanitizeDuration(typeLike) {
174
183
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
175
184
  throw Error("Expected a Duration type to have a `unit` property");
176
185
  }
177
- return new apache_arrow_1.Duration(typeLike.unit);
186
+ return new arrow_1.Duration(typeLike.unit);
178
187
  }
188
+ exports.sanitizeDuration = sanitizeDuration;
179
189
  function sanitizeDictionary(typeLike) {
180
190
  if (!("id" in typeLike) || typeof typeLike.id !== "number") {
181
191
  throw Error("Expected a Dictionary type to have an `id` property");
@@ -189,8 +199,9 @@ function sanitizeDictionary(typeLike) {
189
199
  if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") {
190
200
  throw Error("Expected a Dictionary type to have an `isOrdered` property");
191
201
  }
192
- return new apache_arrow_1.Dictionary(sanitizeType(typeLike.dictionary), sanitizeType(typeLike.indices), typeLike.id, typeLike.isOrdered);
202
+ return new arrow_1.Dictionary(sanitizeType(typeLike.dictionary), sanitizeType(typeLike.indices), typeLike.id, typeLike.isOrdered);
193
203
  }
204
+ exports.sanitizeDictionary = sanitizeDictionary;
194
205
  // biome-ignore lint/suspicious/noExplicitAny: skip
195
206
  function sanitizeType(typeLike) {
196
207
  if (typeof typeLike !== "object" || typeLike === null) {
@@ -210,110 +221,111 @@ function sanitizeType(typeLike) {
210
221
  throw Error("Type's typeId property was not a function or number");
211
222
  }
212
223
  switch (typeId) {
213
- case apache_arrow_1.Type.NONE:
224
+ case arrow_1.Type.NONE:
214
225
  throw Error("Received a Type with a typeId of NONE");
215
- case apache_arrow_1.Type.Null:
216
- return new apache_arrow_1.Null();
217
- case apache_arrow_1.Type.Int:
226
+ case arrow_1.Type.Null:
227
+ return new arrow_1.Null();
228
+ case arrow_1.Type.Int:
218
229
  return sanitizeInt(typeLike);
219
- case apache_arrow_1.Type.Float:
230
+ case arrow_1.Type.Float:
220
231
  return sanitizeFloat(typeLike);
221
- case apache_arrow_1.Type.Binary:
222
- return new apache_arrow_1.Binary();
223
- case apache_arrow_1.Type.Utf8:
224
- return new apache_arrow_1.Utf8();
225
- case apache_arrow_1.Type.Bool:
226
- return new apache_arrow_1.Bool();
227
- case apache_arrow_1.Type.Decimal:
232
+ case arrow_1.Type.Binary:
233
+ return new arrow_1.Binary();
234
+ case arrow_1.Type.Utf8:
235
+ return new arrow_1.Utf8();
236
+ case arrow_1.Type.Bool:
237
+ return new arrow_1.Bool();
238
+ case arrow_1.Type.Decimal:
228
239
  return sanitizeDecimal(typeLike);
229
- case apache_arrow_1.Type.Date:
240
+ case arrow_1.Type.Date:
230
241
  return sanitizeDate(typeLike);
231
- case apache_arrow_1.Type.Time:
242
+ case arrow_1.Type.Time:
232
243
  return sanitizeTime(typeLike);
233
- case apache_arrow_1.Type.Timestamp:
244
+ case arrow_1.Type.Timestamp:
234
245
  return sanitizeTimestamp(typeLike);
235
- case apache_arrow_1.Type.Interval:
246
+ case arrow_1.Type.Interval:
236
247
  return sanitizeInterval(typeLike);
237
- case apache_arrow_1.Type.List:
248
+ case arrow_1.Type.List:
238
249
  return sanitizeList(typeLike);
239
- case apache_arrow_1.Type.Struct:
250
+ case arrow_1.Type.Struct:
240
251
  return sanitizeStruct(typeLike);
241
- case apache_arrow_1.Type.Union:
252
+ case arrow_1.Type.Union:
242
253
  return sanitizeUnion(typeLike);
243
- case apache_arrow_1.Type.FixedSizeBinary:
254
+ case arrow_1.Type.FixedSizeBinary:
244
255
  return sanitizeFixedSizeBinary(typeLike);
245
- case apache_arrow_1.Type.FixedSizeList:
256
+ case arrow_1.Type.FixedSizeList:
246
257
  return sanitizeFixedSizeList(typeLike);
247
- case apache_arrow_1.Type.Map:
258
+ case arrow_1.Type.Map:
248
259
  return sanitizeMap(typeLike);
249
- case apache_arrow_1.Type.Duration:
260
+ case arrow_1.Type.Duration:
250
261
  return sanitizeDuration(typeLike);
251
- case apache_arrow_1.Type.Dictionary:
262
+ case arrow_1.Type.Dictionary:
252
263
  return sanitizeDictionary(typeLike);
253
- case apache_arrow_1.Type.Int8:
254
- return new apache_arrow_1.Int8();
255
- case apache_arrow_1.Type.Int16:
256
- return new apache_arrow_1.Int16();
257
- case apache_arrow_1.Type.Int32:
258
- return new apache_arrow_1.Int32();
259
- case apache_arrow_1.Type.Int64:
260
- return new apache_arrow_1.Int64();
261
- case apache_arrow_1.Type.Uint8:
262
- return new apache_arrow_1.Uint8();
263
- case apache_arrow_1.Type.Uint16:
264
- return new apache_arrow_1.Uint16();
265
- case apache_arrow_1.Type.Uint32:
266
- return new apache_arrow_1.Uint32();
267
- case apache_arrow_1.Type.Uint64:
268
- return new apache_arrow_1.Uint64();
269
- case apache_arrow_1.Type.Float16:
270
- return new apache_arrow_1.Float16();
271
- case apache_arrow_1.Type.Float32:
272
- return new apache_arrow_1.Float32();
273
- case apache_arrow_1.Type.Float64:
274
- return new apache_arrow_1.Float64();
275
- case apache_arrow_1.Type.DateMillisecond:
276
- return new apache_arrow_1.DateMillisecond();
277
- case apache_arrow_1.Type.DateDay:
278
- return new apache_arrow_1.DateDay();
279
- case apache_arrow_1.Type.TimeNanosecond:
280
- return new apache_arrow_1.TimeNanosecond();
281
- case apache_arrow_1.Type.TimeMicrosecond:
282
- return new apache_arrow_1.TimeMicrosecond();
283
- case apache_arrow_1.Type.TimeMillisecond:
284
- return new apache_arrow_1.TimeMillisecond();
285
- case apache_arrow_1.Type.TimeSecond:
286
- return new apache_arrow_1.TimeSecond();
287
- case apache_arrow_1.Type.TimestampNanosecond:
288
- return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampNanosecond);
289
- case apache_arrow_1.Type.TimestampMicrosecond:
290
- return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampMicrosecond);
291
- case apache_arrow_1.Type.TimestampMillisecond:
292
- return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampMillisecond);
293
- case apache_arrow_1.Type.TimestampSecond:
294
- return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampSecond);
295
- case apache_arrow_1.Type.DenseUnion:
296
- return sanitizeTypedUnion(typeLike, apache_arrow_1.DenseUnion);
297
- case apache_arrow_1.Type.SparseUnion:
298
- return sanitizeTypedUnion(typeLike, apache_arrow_1.SparseUnion);
299
- case apache_arrow_1.Type.IntervalDayTime:
300
- return new apache_arrow_1.IntervalDayTime();
301
- case apache_arrow_1.Type.IntervalYearMonth:
302
- return new apache_arrow_1.IntervalYearMonth();
303
- case apache_arrow_1.Type.DurationNanosecond:
304
- return new apache_arrow_1.DurationNanosecond();
305
- case apache_arrow_1.Type.DurationMicrosecond:
306
- return new apache_arrow_1.DurationMicrosecond();
307
- case apache_arrow_1.Type.DurationMillisecond:
308
- return new apache_arrow_1.DurationMillisecond();
309
- case apache_arrow_1.Type.DurationSecond:
310
- return new apache_arrow_1.DurationSecond();
264
+ case arrow_1.Type.Int8:
265
+ return new arrow_1.Int8();
266
+ case arrow_1.Type.Int16:
267
+ return new arrow_1.Int16();
268
+ case arrow_1.Type.Int32:
269
+ return new arrow_1.Int32();
270
+ case arrow_1.Type.Int64:
271
+ return new arrow_1.Int64();
272
+ case arrow_1.Type.Uint8:
273
+ return new arrow_1.Uint8();
274
+ case arrow_1.Type.Uint16:
275
+ return new arrow_1.Uint16();
276
+ case arrow_1.Type.Uint32:
277
+ return new arrow_1.Uint32();
278
+ case arrow_1.Type.Uint64:
279
+ return new arrow_1.Uint64();
280
+ case arrow_1.Type.Float16:
281
+ return new arrow_1.Float16();
282
+ case arrow_1.Type.Float32:
283
+ return new arrow_1.Float32();
284
+ case arrow_1.Type.Float64:
285
+ return new arrow_1.Float64();
286
+ case arrow_1.Type.DateMillisecond:
287
+ return new arrow_1.DateMillisecond();
288
+ case arrow_1.Type.DateDay:
289
+ return new arrow_1.DateDay();
290
+ case arrow_1.Type.TimeNanosecond:
291
+ return new arrow_1.TimeNanosecond();
292
+ case arrow_1.Type.TimeMicrosecond:
293
+ return new arrow_1.TimeMicrosecond();
294
+ case arrow_1.Type.TimeMillisecond:
295
+ return new arrow_1.TimeMillisecond();
296
+ case arrow_1.Type.TimeSecond:
297
+ return new arrow_1.TimeSecond();
298
+ case arrow_1.Type.TimestampNanosecond:
299
+ return sanitizeTypedTimestamp(typeLike, arrow_1.TimestampNanosecond);
300
+ case arrow_1.Type.TimestampMicrosecond:
301
+ return sanitizeTypedTimestamp(typeLike, arrow_1.TimestampMicrosecond);
302
+ case arrow_1.Type.TimestampMillisecond:
303
+ return sanitizeTypedTimestamp(typeLike, arrow_1.TimestampMillisecond);
304
+ case arrow_1.Type.TimestampSecond:
305
+ return sanitizeTypedTimestamp(typeLike, arrow_1.TimestampSecond);
306
+ case arrow_1.Type.DenseUnion:
307
+ return sanitizeTypedUnion(typeLike, arrow_1.DenseUnion);
308
+ case arrow_1.Type.SparseUnion:
309
+ return sanitizeTypedUnion(typeLike, arrow_1.SparseUnion);
310
+ case arrow_1.Type.IntervalDayTime:
311
+ return new arrow_1.IntervalDayTime();
312
+ case arrow_1.Type.IntervalYearMonth:
313
+ return new arrow_1.IntervalYearMonth();
314
+ case arrow_1.Type.DurationNanosecond:
315
+ return new arrow_1.DurationNanosecond();
316
+ case arrow_1.Type.DurationMicrosecond:
317
+ return new arrow_1.DurationMicrosecond();
318
+ case arrow_1.Type.DurationMillisecond:
319
+ return new arrow_1.DurationMillisecond();
320
+ case arrow_1.Type.DurationSecond:
321
+ return new arrow_1.DurationSecond();
311
322
  default:
312
323
  throw new Error("Unrecoginized type id in schema: " + typeId);
313
324
  }
314
325
  }
326
+ exports.sanitizeType = sanitizeType;
315
327
  function sanitizeField(fieldLike) {
316
- if (fieldLike instanceof apache_arrow_1.Field) {
328
+ if (fieldLike instanceof arrow_1.Field) {
317
329
  return fieldLike;
318
330
  }
319
331
  if (typeof fieldLike !== "object" || fieldLike === null) {
@@ -337,8 +349,9 @@ function sanitizeField(fieldLike) {
337
349
  if ("metadata" in fieldLike) {
338
350
  metadata = sanitizeMetadata(fieldLike.metadata);
339
351
  }
340
- return new apache_arrow_1.Field(name, type, nullable, metadata);
352
+ return new arrow_1.Field(name, type, nullable, metadata);
341
353
  }
354
+ exports.sanitizeField = sanitizeField;
342
355
  /**
343
356
  * Convert something schemaLike into a Schema instance
344
357
  *
@@ -347,7 +360,7 @@ function sanitizeField(fieldLike) {
347
360
  * than lancedb is using.
348
361
  */
349
362
  function sanitizeSchema(schemaLike) {
350
- if (schemaLike instanceof apache_arrow_1.Schema) {
363
+ if (schemaLike instanceof arrow_1.Schema) {
351
364
  return schemaLike;
352
365
  }
353
366
  if (typeof schemaLike !== "object" || schemaLike === null) {
@@ -364,6 +377,6 @@ function sanitizeSchema(schemaLike) {
364
377
  throw Error("The schema passed in had a 'fields' property but it was not an array");
365
378
  }
366
379
  const sanitizedFields = schemaLike.fields.map((field) => sanitizeField(field));
367
- return new apache_arrow_1.Schema(sanitizedFields, metadata);
380
+ return new arrow_1.Schema(sanitizedFields, metadata);
368
381
  }
369
382
  exports.sanitizeSchema = sanitizeSchema;
package/dist/table.d.ts CHANGED
@@ -1,5 +1,4 @@
1
- import { Schema } from "apache-arrow";
2
- import { Data } from "./arrow";
1
+ import { Table as ArrowTable, Data, IntoVector, Schema } from "./arrow";
3
2
  import { IndexOptions } from "./indices";
4
3
  import { AddColumnsSql, ColumnAlteration, IndexConfig, OptimizeStats, Table as _NativeTable } from "./native";
5
4
  import { Query, VectorQuery } from "./query";
@@ -57,6 +56,7 @@ export interface OptimizeOptions {
57
56
  * collected.
58
57
  */
59
58
  export declare class Table {
59
+ #private;
60
60
  private readonly inner;
61
61
  /** Construct a Table. Internal use only. */
62
62
  constructor(inner: _NativeTable);
@@ -187,6 +187,19 @@ export declare class Table {
187
187
  * @returns {Query} A builder that can be used to parameterize the query
188
188
  */
189
189
  query(): Query;
190
+ /**
191
+ * Create a search query to find the nearest neighbors
192
+ * of the given query vector
193
+ * @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
194
+ * @rejects {Error} If no embedding functions are defined in the table
195
+ */
196
+ search(query: string): Promise<VectorQuery>;
197
+ /**
198
+ * Create a search query to find the nearest neighbors
199
+ * of the given query vector
200
+ * @param {IntoVector} query - the query vector
201
+ */
202
+ search(query: IntoVector): VectorQuery;
190
203
  /**
191
204
  * Search the table with a given query vector.
192
205
  *
@@ -194,7 +207,7 @@ export declare class Table {
194
207
  * is the same thing as calling `nearestTo` on the builder returned
195
208
  * by `query`. @see {@link Query#nearestTo} for more details.
196
209
  */
197
- vectorSearch(vector: unknown): VectorQuery;
210
+ vectorSearch(vector: IntoVector): VectorQuery;
198
211
  /**
199
212
  * Add new columns with defined values.
200
213
  * @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
@@ -302,4 +315,6 @@ export declare class Table {
302
315
  optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats>;
303
316
  /** List all indices that have been created with {@link Table.createIndex} */
304
317
  listIndices(): Promise<IndexConfig[]>;
318
+ /** Return the table as an arrow table */
319
+ toArrow(): Promise<ArrowTable>;
305
320
  }
package/dist/table.js CHANGED
@@ -14,8 +14,8 @@
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
16
  exports.Table = void 0;
17
- const apache_arrow_1 = require("apache-arrow");
18
17
  const arrow_1 = require("./arrow");
18
+ const registry_1 = require("./embedding/registry");
19
19
  const query_1 = require("./query");
20
20
  /**
21
21
  * A Table is a collection of Records in a LanceDB Database.
@@ -53,10 +53,15 @@ class Table {
53
53
  display() {
54
54
  return this.inner.display();
55
55
  }
56
+ async #getEmbeddingFunctions() {
57
+ const schema = await this.schema();
58
+ const registry = (0, registry_1.getRegistry)();
59
+ return registry.parseFunctions(schema.metadata);
60
+ }
56
61
  /** Get the schema of the table. */
57
62
  async schema() {
58
63
  const schemaBuf = await this.inner.schema();
59
- const tbl = (0, apache_arrow_1.tableFromIPC)(schemaBuf);
64
+ const tbl = (0, arrow_1.tableFromIPC)(schemaBuf);
60
65
  return tbl.schema;
61
66
  }
62
67
  /**
@@ -65,7 +70,10 @@ class Table {
65
70
  */
66
71
  async add(data, options) {
67
72
  const mode = options?.mode ?? "append";
68
- const buffer = await (0, arrow_1.fromDataToBuffer)(data);
73
+ const schema = await this.schema();
74
+ const registry = (0, registry_1.getRegistry)();
75
+ const functions = registry.parseFunctions(schema.metadata);
76
+ const buffer = await (0, arrow_1.fromDataToBuffer)(data, functions.values().next().value, schema);
69
77
  await this.inner.add(buffer, mode);
70
78
  }
71
79
  /**
@@ -197,6 +205,24 @@ class Table {
197
205
  query() {
198
206
  return new query_1.Query(this.inner);
199
207
  }
208
+ search(query) {
209
+ if (typeof query !== "string") {
210
+ return this.vectorSearch(query);
211
+ }
212
+ else {
213
+ return this.#getEmbeddingFunctions().then(async (functions) => {
214
+ // TODO: Support multiple embedding functions
215
+ const embeddingFunc = functions
216
+ .values()
217
+ .next().value;
218
+ if (!embeddingFunc) {
219
+ return Promise.reject(new Error("No embedding functions are defined in the table"));
220
+ }
221
+ const embeddings = await embeddingFunc.function.computeQueryEmbeddings(query);
222
+ return this.query().nearestTo(embeddings);
223
+ });
224
+ }
225
+ }
200
226
  /**
201
227
  * Search the table with a given query vector.
202
228
  *
@@ -339,5 +365,9 @@ class Table {
339
365
  async listIndices() {
340
366
  return await this.inner.listIndices();
341
367
  }
368
+ /** Return the table as an arrow table */
369
+ async toArrow() {
370
+ return await this.query().toArrow();
371
+ }
342
372
  }
343
373
  exports.Table = Table;