@lancedb/lancedb 0.4.20 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +5 -14
  2. package/biome.json +142 -0
  3. package/dist/arrow.d.ts +35 -9
  4. package/dist/arrow.js +247 -19
  5. package/dist/connection.d.ts +4 -1
  6. package/dist/connection.js +11 -5
  7. package/dist/embedding/embedding_function.d.ts +54 -28
  8. package/dist/embedding/embedding_function.js +71 -10
  9. package/dist/embedding/index.d.ts +28 -2
  10. package/dist/embedding/index.js +111 -4
  11. package/dist/embedding/openai.d.ts +16 -7
  12. package/dist/embedding/openai.js +62 -12
  13. package/dist/embedding/registry.d.ts +54 -0
  14. package/dist/embedding/registry.js +123 -0
  15. package/dist/native.d.ts +26 -0
  16. package/dist/query.d.ts +1 -1
  17. package/dist/query.js +7 -6
  18. package/dist/sanitize.d.ts +22 -1
  19. package/dist/sanitize.js +126 -113
  20. package/dist/table.d.ts +50 -4
  21. package/dist/table.js +47 -5
  22. package/lancedb/arrow.ts +283 -49
  23. package/lancedb/connection.ts +27 -6
  24. package/lancedb/embedding/embedding_function.ts +126 -42
  25. package/lancedb/embedding/index.ts +113 -2
  26. package/lancedb/embedding/openai.ts +62 -16
  27. package/lancedb/embedding/registry.ts +172 -0
  28. package/lancedb/query.ts +9 -6
  29. package/lancedb/sanitize.ts +62 -62
  30. package/lancedb/table.ts +72 -5
  31. package/nodejs-artifacts/arrow.d.ts +35 -9
  32. package/nodejs-artifacts/arrow.js +247 -19
  33. package/nodejs-artifacts/connection.d.ts +4 -1
  34. package/nodejs-artifacts/connection.js +11 -5
  35. package/nodejs-artifacts/embedding/embedding_function.d.ts +54 -28
  36. package/nodejs-artifacts/embedding/embedding_function.js +71 -10
  37. package/nodejs-artifacts/embedding/index.d.ts +28 -2
  38. package/nodejs-artifacts/embedding/index.js +111 -4
  39. package/nodejs-artifacts/embedding/openai.d.ts +16 -7
  40. package/nodejs-artifacts/embedding/openai.js +62 -12
  41. package/nodejs-artifacts/embedding/registry.d.ts +54 -0
  42. package/nodejs-artifacts/embedding/registry.js +123 -0
  43. package/nodejs-artifacts/native.d.ts +26 -0
  44. package/nodejs-artifacts/query.d.ts +1 -1
  45. package/nodejs-artifacts/query.js +7 -6
  46. package/nodejs-artifacts/sanitize.d.ts +22 -1
  47. package/nodejs-artifacts/sanitize.js +126 -113
  48. package/nodejs-artifacts/table.d.ts +50 -4
  49. package/nodejs-artifacts/table.js +47 -5
  50. package/package.json +23 -21
  51. package/tsconfig.json +3 -1
  52. package/.eslintignore +0 -3
  53. package/eslint.config.js +0 -28
package/dist/sanitize.js CHANGED
@@ -13,15 +13,8 @@
13
13
  // See the License for the specific language governing permissions and
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
- exports.sanitizeSchema = void 0;
17
- // The utilities in this file help sanitize data from the user's arrow
18
- // library into the types expected by vectordb's arrow library. Node
19
- // generally allows for mulitple versions of the same library (and sometimes
20
- // even multiple copies of the same version) to be installed at the same
21
- // time. However, arrow-js uses instanceof which expected that the input
22
- // comes from the exact same library instance. This is not always the case
23
- // and so we must sanitize the input to ensure that it is compatible.
24
- const apache_arrow_1 = require("apache-arrow");
16
+ exports.sanitizeSchema = exports.sanitizeField = exports.sanitizeType = exports.sanitizeDictionary = exports.sanitizeDuration = exports.sanitizeMap = exports.sanitizeFixedSizeList = exports.sanitizeFixedSizeBinary = exports.sanitizeTypedUnion = exports.sanitizeUnion = exports.sanitizeStruct = exports.sanitizeList = exports.sanitizeInterval = exports.sanitizeTypedTimestamp = exports.sanitizeTimestamp = exports.sanitizeTime = exports.sanitizeDate = exports.sanitizeDecimal = exports.sanitizeFloat = exports.sanitizeInt = exports.sanitizeMetadata = void 0;
17
+ const arrow_1 = require("./arrow");
25
18
  function sanitizeMetadata(metadataLike) {
26
19
  if (metadataLike === undefined || metadataLike === null) {
27
20
  return undefined;
@@ -36,6 +29,7 @@ function sanitizeMetadata(metadataLike) {
36
29
  }
37
30
  return metadataLike;
38
31
  }
32
+ exports.sanitizeMetadata = sanitizeMetadata;
39
33
  function sanitizeInt(typeLike) {
40
34
  if (!("bitWidth" in typeLike) ||
41
35
  typeof typeLike.bitWidth !== "number" ||
@@ -43,14 +37,16 @@ function sanitizeInt(typeLike) {
43
37
  typeof typeLike.isSigned !== "boolean") {
44
38
  throw Error("Expected an Int Type to have a `bitWidth` and `isSigned` property");
45
39
  }
46
- return new apache_arrow_1.Int(typeLike.isSigned, typeLike.bitWidth);
40
+ return new arrow_1.Int(typeLike.isSigned, typeLike.bitWidth);
47
41
  }
42
+ exports.sanitizeInt = sanitizeInt;
48
43
  function sanitizeFloat(typeLike) {
49
44
  if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
50
45
  throw Error("Expected a Float Type to have a `precision` property");
51
46
  }
52
- return new apache_arrow_1.Float(typeLike.precision);
47
+ return new arrow_1.Float(typeLike.precision);
53
48
  }
49
+ exports.sanitizeFloat = sanitizeFloat;
54
50
  function sanitizeDecimal(typeLike) {
55
51
  if (!("scale" in typeLike) ||
56
52
  typeof typeLike.scale !== "number" ||
@@ -60,14 +56,16 @@ function sanitizeDecimal(typeLike) {
60
56
  typeof typeLike.bitWidth !== "number") {
61
57
  throw Error("Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties");
62
58
  }
63
- return new apache_arrow_1.Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
59
+ return new arrow_1.Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
64
60
  }
61
+ exports.sanitizeDecimal = sanitizeDecimal;
65
62
  function sanitizeDate(typeLike) {
66
63
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
67
64
  throw Error("Expected a Date type to have a `unit` property");
68
65
  }
69
- return new apache_arrow_1.Date_(typeLike.unit);
66
+ return new arrow_1.Date_(typeLike.unit);
70
67
  }
68
+ exports.sanitizeDate = sanitizeDate;
71
69
  function sanitizeTime(typeLike) {
72
70
  if (!("unit" in typeLike) ||
73
71
  typeof typeLike.unit !== "number" ||
@@ -75,8 +73,9 @@ function sanitizeTime(typeLike) {
75
73
  typeof typeLike.bitWidth !== "number") {
76
74
  throw Error("Expected a Time type to have `unit` and `bitWidth` properties");
77
75
  }
78
- return new apache_arrow_1.Time(typeLike.unit, typeLike.bitWidth);
76
+ return new arrow_1.Time(typeLike.unit, typeLike.bitWidth);
79
77
  }
78
+ exports.sanitizeTime = sanitizeTime;
80
79
  function sanitizeTimestamp(typeLike) {
81
80
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
82
81
  throw Error("Expected a Timestamp type to have a `unit` property");
@@ -85,8 +84,9 @@ function sanitizeTimestamp(typeLike) {
85
84
  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
86
85
  timezone = typeLike.timezone;
87
86
  }
88
- return new apache_arrow_1.Timestamp(typeLike.unit, timezone);
87
+ return new arrow_1.Timestamp(typeLike.unit, timezone);
89
88
  }
89
+ exports.sanitizeTimestamp = sanitizeTimestamp;
90
90
  function sanitizeTypedTimestamp(typeLike,
91
91
  // eslint-disable-next-line @typescript-eslint/naming-convention
92
92
  Datatype) {
@@ -96,12 +96,14 @@ Datatype) {
96
96
  }
97
97
  return new Datatype(timezone);
98
98
  }
99
+ exports.sanitizeTypedTimestamp = sanitizeTypedTimestamp;
99
100
  function sanitizeInterval(typeLike) {
100
101
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
101
102
  throw Error("Expected an Interval type to have a `unit` property");
102
103
  }
103
- return new apache_arrow_1.Interval(typeLike.unit);
104
+ return new arrow_1.Interval(typeLike.unit);
104
105
  }
106
+ exports.sanitizeInterval = sanitizeInterval;
105
107
  function sanitizeList(typeLike) {
106
108
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
107
109
  throw Error("Expected a List type to have an array-like `children` property");
@@ -109,14 +111,16 @@ function sanitizeList(typeLike) {
109
111
  if (typeLike.children.length !== 1) {
110
112
  throw Error("Expected a List type to have exactly one child");
111
113
  }
112
- return new apache_arrow_1.List(sanitizeField(typeLike.children[0]));
114
+ return new arrow_1.List(sanitizeField(typeLike.children[0]));
113
115
  }
116
+ exports.sanitizeList = sanitizeList;
114
117
  function sanitizeStruct(typeLike) {
115
118
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
116
119
  throw Error("Expected a Struct type to have an array-like `children` property");
117
120
  }
118
- return new apache_arrow_1.Struct(typeLike.children.map((child) => sanitizeField(child)));
121
+ return new arrow_1.Struct(typeLike.children.map((child) => sanitizeField(child)));
119
122
  }
123
+ exports.sanitizeStruct = sanitizeStruct;
120
124
  function sanitizeUnion(typeLike) {
121
125
  if (!("typeIds" in typeLike) ||
122
126
  !("mode" in typeLike) ||
@@ -126,10 +130,11 @@ function sanitizeUnion(typeLike) {
126
130
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
127
131
  throw Error("Expected a Union type to have an array-like `children` property");
128
132
  }
129
- return new apache_arrow_1.Union(typeLike.mode,
130
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
133
+ return new arrow_1.Union(typeLike.mode,
134
+ // biome-ignore lint/suspicious/noExplicitAny: skip
131
135
  typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
132
136
  }
137
+ exports.sanitizeUnion = sanitizeUnion;
133
138
  function sanitizeTypedUnion(typeLike,
134
139
  // eslint-disable-next-line @typescript-eslint/naming-convention
135
140
  UnionType) {
@@ -141,12 +146,14 @@ UnionType) {
141
146
  }
142
147
  return new UnionType(typeLike.typeIds, typeLike.children.map((child) => sanitizeField(child)));
143
148
  }
149
+ exports.sanitizeTypedUnion = sanitizeTypedUnion;
144
150
  function sanitizeFixedSizeBinary(typeLike) {
145
151
  if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
146
152
  throw Error("Expected a FixedSizeBinary type to have a `byteWidth` property");
147
153
  }
148
- return new apache_arrow_1.FixedSizeBinary(typeLike.byteWidth);
154
+ return new arrow_1.FixedSizeBinary(typeLike.byteWidth);
149
155
  }
156
+ exports.sanitizeFixedSizeBinary = sanitizeFixedSizeBinary;
150
157
  function sanitizeFixedSizeList(typeLike) {
151
158
  if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
152
159
  throw Error("Expected a FixedSizeList type to have a `listSize` property");
@@ -157,8 +164,9 @@ function sanitizeFixedSizeList(typeLike) {
157
164
  if (typeLike.children.length !== 1) {
158
165
  throw Error("Expected a FixedSizeList type to have exactly one child");
159
166
  }
160
- return new apache_arrow_1.FixedSizeList(typeLike.listSize, sanitizeField(typeLike.children[0]));
167
+ return new arrow_1.FixedSizeList(typeLike.listSize, sanitizeField(typeLike.children[0]));
161
168
  }
169
+ exports.sanitizeFixedSizeList = sanitizeFixedSizeList;
162
170
  function sanitizeMap(typeLike) {
163
171
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
164
172
  throw Error("Expected a Map type to have an array-like `children` property");
@@ -166,16 +174,18 @@ function sanitizeMap(typeLike) {
166
174
  if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") {
167
175
  throw Error("Expected a Map type to have a `keysSorted` property");
168
176
  }
169
- return new apache_arrow_1.Map_(
170
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
177
+ return new arrow_1.Map_(
178
+ // biome-ignore lint/suspicious/noExplicitAny: skip
171
179
  typeLike.children.map((field) => sanitizeField(field)), typeLike.keysSorted);
172
180
  }
181
+ exports.sanitizeMap = sanitizeMap;
173
182
  function sanitizeDuration(typeLike) {
174
183
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
175
184
  throw Error("Expected a Duration type to have a `unit` property");
176
185
  }
177
- return new apache_arrow_1.Duration(typeLike.unit);
186
+ return new arrow_1.Duration(typeLike.unit);
178
187
  }
188
+ exports.sanitizeDuration = sanitizeDuration;
179
189
  function sanitizeDictionary(typeLike) {
180
190
  if (!("id" in typeLike) || typeof typeLike.id !== "number") {
181
191
  throw Error("Expected a Dictionary type to have an `id` property");
@@ -189,9 +199,10 @@ function sanitizeDictionary(typeLike) {
189
199
  if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") {
190
200
  throw Error("Expected a Dictionary type to have an `isOrdered` property");
191
201
  }
192
- return new apache_arrow_1.Dictionary(sanitizeType(typeLike.dictionary), sanitizeType(typeLike.indices), typeLike.id, typeLike.isOrdered);
202
+ return new arrow_1.Dictionary(sanitizeType(typeLike.dictionary), sanitizeType(typeLike.indices), typeLike.id, typeLike.isOrdered);
193
203
  }
194
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
204
+ exports.sanitizeDictionary = sanitizeDictionary;
205
+ // biome-ignore lint/suspicious/noExplicitAny: skip
195
206
  function sanitizeType(typeLike) {
196
207
  if (typeof typeLike !== "object" || typeLike === null) {
197
208
  throw Error("Expected a Type but object was null/undefined");
@@ -210,110 +221,111 @@ function sanitizeType(typeLike) {
210
221
  throw Error("Type's typeId property was not a function or number");
211
222
  }
212
223
  switch (typeId) {
213
- case apache_arrow_1.Type.NONE:
224
+ case arrow_1.Type.NONE:
214
225
  throw Error("Received a Type with a typeId of NONE");
215
- case apache_arrow_1.Type.Null:
216
- return new apache_arrow_1.Null();
217
- case apache_arrow_1.Type.Int:
226
+ case arrow_1.Type.Null:
227
+ return new arrow_1.Null();
228
+ case arrow_1.Type.Int:
218
229
  return sanitizeInt(typeLike);
219
- case apache_arrow_1.Type.Float:
230
+ case arrow_1.Type.Float:
220
231
  return sanitizeFloat(typeLike);
221
- case apache_arrow_1.Type.Binary:
222
- return new apache_arrow_1.Binary();
223
- case apache_arrow_1.Type.Utf8:
224
- return new apache_arrow_1.Utf8();
225
- case apache_arrow_1.Type.Bool:
226
- return new apache_arrow_1.Bool();
227
- case apache_arrow_1.Type.Decimal:
232
+ case arrow_1.Type.Binary:
233
+ return new arrow_1.Binary();
234
+ case arrow_1.Type.Utf8:
235
+ return new arrow_1.Utf8();
236
+ case arrow_1.Type.Bool:
237
+ return new arrow_1.Bool();
238
+ case arrow_1.Type.Decimal:
228
239
  return sanitizeDecimal(typeLike);
229
- case apache_arrow_1.Type.Date:
240
+ case arrow_1.Type.Date:
230
241
  return sanitizeDate(typeLike);
231
- case apache_arrow_1.Type.Time:
242
+ case arrow_1.Type.Time:
232
243
  return sanitizeTime(typeLike);
233
- case apache_arrow_1.Type.Timestamp:
244
+ case arrow_1.Type.Timestamp:
234
245
  return sanitizeTimestamp(typeLike);
235
- case apache_arrow_1.Type.Interval:
246
+ case arrow_1.Type.Interval:
236
247
  return sanitizeInterval(typeLike);
237
- case apache_arrow_1.Type.List:
248
+ case arrow_1.Type.List:
238
249
  return sanitizeList(typeLike);
239
- case apache_arrow_1.Type.Struct:
250
+ case arrow_1.Type.Struct:
240
251
  return sanitizeStruct(typeLike);
241
- case apache_arrow_1.Type.Union:
252
+ case arrow_1.Type.Union:
242
253
  return sanitizeUnion(typeLike);
243
- case apache_arrow_1.Type.FixedSizeBinary:
254
+ case arrow_1.Type.FixedSizeBinary:
244
255
  return sanitizeFixedSizeBinary(typeLike);
245
- case apache_arrow_1.Type.FixedSizeList:
256
+ case arrow_1.Type.FixedSizeList:
246
257
  return sanitizeFixedSizeList(typeLike);
247
- case apache_arrow_1.Type.Map:
258
+ case arrow_1.Type.Map:
248
259
  return sanitizeMap(typeLike);
249
- case apache_arrow_1.Type.Duration:
260
+ case arrow_1.Type.Duration:
250
261
  return sanitizeDuration(typeLike);
251
- case apache_arrow_1.Type.Dictionary:
262
+ case arrow_1.Type.Dictionary:
252
263
  return sanitizeDictionary(typeLike);
253
- case apache_arrow_1.Type.Int8:
254
- return new apache_arrow_1.Int8();
255
- case apache_arrow_1.Type.Int16:
256
- return new apache_arrow_1.Int16();
257
- case apache_arrow_1.Type.Int32:
258
- return new apache_arrow_1.Int32();
259
- case apache_arrow_1.Type.Int64:
260
- return new apache_arrow_1.Int64();
261
- case apache_arrow_1.Type.Uint8:
262
- return new apache_arrow_1.Uint8();
263
- case apache_arrow_1.Type.Uint16:
264
- return new apache_arrow_1.Uint16();
265
- case apache_arrow_1.Type.Uint32:
266
- return new apache_arrow_1.Uint32();
267
- case apache_arrow_1.Type.Uint64:
268
- return new apache_arrow_1.Uint64();
269
- case apache_arrow_1.Type.Float16:
270
- return new apache_arrow_1.Float16();
271
- case apache_arrow_1.Type.Float32:
272
- return new apache_arrow_1.Float32();
273
- case apache_arrow_1.Type.Float64:
274
- return new apache_arrow_1.Float64();
275
- case apache_arrow_1.Type.DateMillisecond:
276
- return new apache_arrow_1.DateMillisecond();
277
- case apache_arrow_1.Type.DateDay:
278
- return new apache_arrow_1.DateDay();
279
- case apache_arrow_1.Type.TimeNanosecond:
280
- return new apache_arrow_1.TimeNanosecond();
281
- case apache_arrow_1.Type.TimeMicrosecond:
282
- return new apache_arrow_1.TimeMicrosecond();
283
- case apache_arrow_1.Type.TimeMillisecond:
284
- return new apache_arrow_1.TimeMillisecond();
285
- case apache_arrow_1.Type.TimeSecond:
286
- return new apache_arrow_1.TimeSecond();
287
- case apache_arrow_1.Type.TimestampNanosecond:
288
- return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampNanosecond);
289
- case apache_arrow_1.Type.TimestampMicrosecond:
290
- return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampMicrosecond);
291
- case apache_arrow_1.Type.TimestampMillisecond:
292
- return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampMillisecond);
293
- case apache_arrow_1.Type.TimestampSecond:
294
- return sanitizeTypedTimestamp(typeLike, apache_arrow_1.TimestampSecond);
295
- case apache_arrow_1.Type.DenseUnion:
296
- return sanitizeTypedUnion(typeLike, apache_arrow_1.DenseUnion);
297
- case apache_arrow_1.Type.SparseUnion:
298
- return sanitizeTypedUnion(typeLike, apache_arrow_1.SparseUnion);
299
- case apache_arrow_1.Type.IntervalDayTime:
300
- return new apache_arrow_1.IntervalDayTime();
301
- case apache_arrow_1.Type.IntervalYearMonth:
302
- return new apache_arrow_1.IntervalYearMonth();
303
- case apache_arrow_1.Type.DurationNanosecond:
304
- return new apache_arrow_1.DurationNanosecond();
305
- case apache_arrow_1.Type.DurationMicrosecond:
306
- return new apache_arrow_1.DurationMicrosecond();
307
- case apache_arrow_1.Type.DurationMillisecond:
308
- return new apache_arrow_1.DurationMillisecond();
309
- case apache_arrow_1.Type.DurationSecond:
310
- return new apache_arrow_1.DurationSecond();
264
+ case arrow_1.Type.Int8:
265
+ return new arrow_1.Int8();
266
+ case arrow_1.Type.Int16:
267
+ return new arrow_1.Int16();
268
+ case arrow_1.Type.Int32:
269
+ return new arrow_1.Int32();
270
+ case arrow_1.Type.Int64:
271
+ return new arrow_1.Int64();
272
+ case arrow_1.Type.Uint8:
273
+ return new arrow_1.Uint8();
274
+ case arrow_1.Type.Uint16:
275
+ return new arrow_1.Uint16();
276
+ case arrow_1.Type.Uint32:
277
+ return new arrow_1.Uint32();
278
+ case arrow_1.Type.Uint64:
279
+ return new arrow_1.Uint64();
280
+ case arrow_1.Type.Float16:
281
+ return new arrow_1.Float16();
282
+ case arrow_1.Type.Float32:
283
+ return new arrow_1.Float32();
284
+ case arrow_1.Type.Float64:
285
+ return new arrow_1.Float64();
286
+ case arrow_1.Type.DateMillisecond:
287
+ return new arrow_1.DateMillisecond();
288
+ case arrow_1.Type.DateDay:
289
+ return new arrow_1.DateDay();
290
+ case arrow_1.Type.TimeNanosecond:
291
+ return new arrow_1.TimeNanosecond();
292
+ case arrow_1.Type.TimeMicrosecond:
293
+ return new arrow_1.TimeMicrosecond();
294
+ case arrow_1.Type.TimeMillisecond:
295
+ return new arrow_1.TimeMillisecond();
296
+ case arrow_1.Type.TimeSecond:
297
+ return new arrow_1.TimeSecond();
298
+ case arrow_1.Type.TimestampNanosecond:
299
+ return sanitizeTypedTimestamp(typeLike, arrow_1.TimestampNanosecond);
300
+ case arrow_1.Type.TimestampMicrosecond:
301
+ return sanitizeTypedTimestamp(typeLike, arrow_1.TimestampMicrosecond);
302
+ case arrow_1.Type.TimestampMillisecond:
303
+ return sanitizeTypedTimestamp(typeLike, arrow_1.TimestampMillisecond);
304
+ case arrow_1.Type.TimestampSecond:
305
+ return sanitizeTypedTimestamp(typeLike, arrow_1.TimestampSecond);
306
+ case arrow_1.Type.DenseUnion:
307
+ return sanitizeTypedUnion(typeLike, arrow_1.DenseUnion);
308
+ case arrow_1.Type.SparseUnion:
309
+ return sanitizeTypedUnion(typeLike, arrow_1.SparseUnion);
310
+ case arrow_1.Type.IntervalDayTime:
311
+ return new arrow_1.IntervalDayTime();
312
+ case arrow_1.Type.IntervalYearMonth:
313
+ return new arrow_1.IntervalYearMonth();
314
+ case arrow_1.Type.DurationNanosecond:
315
+ return new arrow_1.DurationNanosecond();
316
+ case arrow_1.Type.DurationMicrosecond:
317
+ return new arrow_1.DurationMicrosecond();
318
+ case arrow_1.Type.DurationMillisecond:
319
+ return new arrow_1.DurationMillisecond();
320
+ case arrow_1.Type.DurationSecond:
321
+ return new arrow_1.DurationSecond();
311
322
  default:
312
323
  throw new Error("Unrecoginized type id in schema: " + typeId);
313
324
  }
314
325
  }
326
+ exports.sanitizeType = sanitizeType;
315
327
  function sanitizeField(fieldLike) {
316
- if (fieldLike instanceof apache_arrow_1.Field) {
328
+ if (fieldLike instanceof arrow_1.Field) {
317
329
  return fieldLike;
318
330
  }
319
331
  if (typeof fieldLike !== "object" || fieldLike === null) {
@@ -337,8 +349,9 @@ function sanitizeField(fieldLike) {
337
349
  if ("metadata" in fieldLike) {
338
350
  metadata = sanitizeMetadata(fieldLike.metadata);
339
351
  }
340
- return new apache_arrow_1.Field(name, type, nullable, metadata);
352
+ return new arrow_1.Field(name, type, nullable, metadata);
341
353
  }
354
+ exports.sanitizeField = sanitizeField;
342
355
  /**
343
356
  * Convert something schemaLike into a Schema instance
344
357
  *
@@ -347,7 +360,7 @@ function sanitizeField(fieldLike) {
347
360
  * than lancedb is using.
348
361
  */
349
362
  function sanitizeSchema(schemaLike) {
350
- if (schemaLike instanceof apache_arrow_1.Schema) {
363
+ if (schemaLike instanceof arrow_1.Schema) {
351
364
  return schemaLike;
352
365
  }
353
366
  if (typeof schemaLike !== "object" || schemaLike === null) {
@@ -364,6 +377,6 @@ function sanitizeSchema(schemaLike) {
364
377
  throw Error("The schema passed in had a 'fields' property but it was not an array");
365
378
  }
366
379
  const sanitizedFields = schemaLike.fields.map((field) => sanitizeField(field));
367
- return new apache_arrow_1.Schema(sanitizedFields, metadata);
380
+ return new arrow_1.Schema(sanitizedFields, metadata);
368
381
  }
369
382
  exports.sanitizeSchema = sanitizeSchema;
package/dist/table.d.ts CHANGED
@@ -1,8 +1,7 @@
1
- import { Schema } from "apache-arrow";
2
- import { AddColumnsSql, ColumnAlteration, IndexConfig, Table as _NativeTable } from "./native";
3
- import { Query, VectorQuery } from "./query";
1
+ import { Data, Schema } from "./arrow";
4
2
  import { IndexOptions } from "./indices";
5
- import { Data } from "./arrow";
3
+ import { AddColumnsSql, ColumnAlteration, IndexConfig, OptimizeStats, Table as _NativeTable } from "./native";
4
+ import { Query, VectorQuery } from "./query";
6
5
  export { IndexConfig } from "./native";
7
6
  /**
8
7
  * Options for adding data to a table.
@@ -28,6 +27,22 @@ export interface UpdateOptions {
28
27
  */
29
28
  where: string;
30
29
  }
30
+ export interface OptimizeOptions {
31
+ /**
32
+ * If set then all versions older than the given date
33
+ * be removed. The current version will never be removed.
34
+ * The default is 7 days
35
+ * @example
36
+ * // Delete all versions older than 1 day
37
+ * const olderThan = new Date();
38
+ * olderThan.setDate(olderThan.getDate() - 1));
39
+ * tbl.cleanupOlderVersions(olderThan);
40
+ *
41
+ * // Delete all versions except the current version
42
+ * tbl.cleanupOlderVersions(new Date());
43
+ */
44
+ cleanupOlderThan: Date;
45
+ }
31
46
  /**
32
47
  * A Table is a collection of Records in a LanceDB Database.
33
48
  *
@@ -253,6 +268,37 @@ export declare class Table {
253
268
  * out state and the read_consistency_interval, if any, will apply.
254
269
  */
255
270
  restore(): Promise<void>;
271
+ /**
272
+ * Optimize the on-disk data and indices for better performance.
273
+ *
274
+ * Modeled after ``VACUUM`` in PostgreSQL.
275
+ *
276
+ * Optimization covers three operations:
277
+ *
278
+ * - Compaction: Merges small files into larger ones
279
+ * - Prune: Removes old versions of the dataset
280
+ * - Index: Optimizes the indices, adding new data to existing indices
281
+ *
282
+ *
283
+ * Experimental API
284
+ * ----------------
285
+ *
286
+ * The optimization process is undergoing active development and may change.
287
+ * Our goal with these changes is to improve the performance of optimization and
288
+ * reduce the complexity.
289
+ *
290
+ * That being said, it is essential today to run optimize if you want the best
291
+ * performance. It should be stable and safe to use in production, but it our
292
+ * hope that the API may be simplified (or not even need to be called) in the
293
+ * future.
294
+ *
295
+ * The frequency an application shoudl call optimize is based on the frequency of
296
+ * data modifications. If data is frequently added, deleted, or updated then
297
+ * optimize should be run frequently. A good rule of thumb is to run optimize if
298
+ * you have added or modified 100,000 or more records or run more than 20 data
299
+ * modification operations.
300
+ */
301
+ optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats>;
256
302
  /** List all indices that have been created with {@link Table.createIndex} */
257
303
  listIndices(): Promise<IndexConfig[]>;
258
304
  }
package/dist/table.js CHANGED
@@ -14,9 +14,9 @@
14
14
  // limitations under the License.
15
15
  Object.defineProperty(exports, "__esModule", { value: true });
16
16
  exports.Table = void 0;
17
- const apache_arrow_1 = require("apache-arrow");
18
- const query_1 = require("./query");
19
17
  const arrow_1 = require("./arrow");
18
+ const registry_1 = require("./embedding/registry");
19
+ const query_1 = require("./query");
20
20
  /**
21
21
  * A Table is a collection of Records in a LanceDB Database.
22
22
  *
@@ -56,7 +56,7 @@ class Table {
56
56
  /** Get the schema of the table. */
57
57
  async schema() {
58
58
  const schemaBuf = await this.inner.schema();
59
- const tbl = (0, apache_arrow_1.tableFromIPC)(schemaBuf);
59
+ const tbl = (0, arrow_1.tableFromIPC)(schemaBuf);
60
60
  return tbl.schema;
61
61
  }
62
62
  /**
@@ -65,7 +65,10 @@ class Table {
65
65
  */
66
66
  async add(data, options) {
67
67
  const mode = options?.mode ?? "append";
68
- const buffer = await (0, arrow_1.fromDataToBuffer)(data);
68
+ const schema = await this.schema();
69
+ const registry = (0, registry_1.getRegistry)();
70
+ const functions = registry.parseFunctions(schema.metadata);
71
+ const buffer = await (0, arrow_1.fromDataToBuffer)(data, functions.values().next().value);
69
72
  await this.inner.add(buffer, mode);
70
73
  }
71
74
  /**
@@ -140,7 +143,7 @@ class Table {
140
143
  */
141
144
  async createIndex(column, options) {
142
145
  // Bit of a hack to get around the fact that TS has no package-scope.
143
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
146
+ // biome-ignore lint/suspicious/noExplicitAny: skip
144
147
  const nativeIndex = options?.config?.inner;
145
148
  await this.inner.createIndex(nativeIndex, column, options?.replace);
146
149
  }
@@ -296,6 +299,45 @@ class Table {
296
299
  async restore() {
297
300
  await this.inner.restore();
298
301
  }
302
+ /**
303
+ * Optimize the on-disk data and indices for better performance.
304
+ *
305
+ * Modeled after ``VACUUM`` in PostgreSQL.
306
+ *
307
+ * Optimization covers three operations:
308
+ *
309
+ * - Compaction: Merges small files into larger ones
310
+ * - Prune: Removes old versions of the dataset
311
+ * - Index: Optimizes the indices, adding new data to existing indices
312
+ *
313
+ *
314
+ * Experimental API
315
+ * ----------------
316
+ *
317
+ * The optimization process is undergoing active development and may change.
318
+ * Our goal with these changes is to improve the performance of optimization and
319
+ * reduce the complexity.
320
+ *
321
+ * That being said, it is essential today to run optimize if you want the best
322
+ * performance. It should be stable and safe to use in production, but it our
323
+ * hope that the API may be simplified (or not even need to be called) in the
324
+ * future.
325
+ *
326
+ * The frequency an application shoudl call optimize is based on the frequency of
327
+ * data modifications. If data is frequently added, deleted, or updated then
328
+ * optimize should be run frequently. A good rule of thumb is to run optimize if
329
+ * you have added or modified 100,000 or more records or run more than 20 data
330
+ * modification operations.
331
+ */
332
+ async optimize(options) {
333
+ let cleanupOlderThanMs;
334
+ if (options?.cleanupOlderThan !== undefined &&
335
+ options?.cleanupOlderThan !== null) {
336
+ cleanupOlderThanMs =
337
+ new Date().getTime() - options.cleanupOlderThan.getTime();
338
+ }
339
+ return await this.inner.optimize(cleanupOlderThanMs);
340
+ }
299
341
  /** List all indices that have been created with {@link Table.createIndex} */
300
342
  async listIndices() {
301
343
  return await this.inner.listIndices();