deeplake 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/dist/browser/browser/client.d.ts +33 -0
  2. package/dist/browser/browser/client.d.ts.map +1 -0
  3. package/dist/browser/browser/client.js +312 -0
  4. package/dist/browser/browser/client.js.map +1 -0
  5. package/dist/browser/browser/index.d.ts +15 -0
  6. package/dist/browser/browser/index.d.ts.map +1 -0
  7. package/dist/browser/browser/index.js +11 -0
  8. package/dist/browser/browser/index.js.map +1 -0
  9. package/dist/browser/browser/wasm.d.ts +8 -0
  10. package/dist/browser/browser/wasm.d.ts.map +1 -0
  11. package/dist/browser/browser/wasm.js +53 -0
  12. package/dist/browser/browser/wasm.js.map +1 -0
  13. package/dist/browser/shared/api.d.ts +9 -0
  14. package/dist/browser/shared/api.d.ts.map +1 -0
  15. package/dist/browser/shared/api.js +67 -0
  16. package/dist/browser/shared/api.js.map +1 -0
  17. package/dist/browser/shared/credentials.d.ts +4 -0
  18. package/dist/browser/shared/credentials.d.ts.map +1 -0
  19. package/dist/browser/shared/credentials.js +24 -0
  20. package/dist/browser/shared/credentials.js.map +1 -0
  21. package/dist/browser/shared/database.d.ts +26 -0
  22. package/dist/browser/shared/database.d.ts.map +1 -0
  23. package/dist/browser/shared/database.js +67 -0
  24. package/dist/browser/shared/database.js.map +1 -0
  25. package/dist/browser/shared/dlref.d.ts +22 -0
  26. package/dist/browser/shared/dlref.d.ts.map +1 -0
  27. package/dist/browser/shared/dlref.js +48 -0
  28. package/dist/browser/shared/dlref.js.map +1 -0
  29. package/dist/browser/shared/errors.d.ts +22 -0
  30. package/dist/browser/shared/errors.d.ts.map +1 -0
  31. package/dist/browser/shared/errors.js +43 -0
  32. package/dist/browser/shared/errors.js.map +1 -0
  33. package/dist/browser/shared/schema.d.ts +7 -0
  34. package/dist/browser/shared/schema.d.ts.map +1 -0
  35. package/dist/browser/shared/schema.js +151 -0
  36. package/dist/browser/shared/schema.js.map +1 -0
  37. package/dist/browser/shared/token.d.ts +4 -0
  38. package/dist/browser/shared/token.d.ts.map +1 -0
  39. package/dist/browser/shared/token.js +69 -0
  40. package/dist/browser/shared/token.js.map +1 -0
  41. package/dist/browser/shared/types.d.ts +33 -0
  42. package/dist/browser/shared/types.d.ts.map +1 -0
  43. package/dist/browser/shared/types.js +2 -0
  44. package/dist/browser/shared/types.js.map +1 -0
  45. package/dist/browser/shared/wasm-common.d.ts +39 -0
  46. package/dist/browser/shared/wasm-common.d.ts.map +1 -0
  47. package/dist/browser/shared/wasm-common.js +287 -0
  48. package/dist/browser/shared/wasm-common.js.map +1 -0
  49. package/dist/node/node/client.d.ts +49 -0
  50. package/dist/node/node/client.d.ts.map +1 -0
  51. package/dist/node/node/client.js +670 -0
  52. package/dist/node/node/client.js.map +1 -0
  53. package/dist/node/node/formats/coco.d.ts +24 -0
  54. package/dist/node/node/formats/coco.d.ts.map +1 -0
  55. package/dist/node/node/formats/coco.js +342 -0
  56. package/dist/node/node/formats/coco.js.map +1 -0
  57. package/dist/node/node/formats/coco_panoptic.d.ts +19 -0
  58. package/dist/node/node/formats/coco_panoptic.d.ts.map +1 -0
  59. package/dist/node/node/formats/coco_panoptic.js +125 -0
  60. package/dist/node/node/formats/coco_panoptic.js.map +1 -0
  61. package/dist/node/node/formats/index.d.ts +7 -0
  62. package/dist/node/node/formats/index.d.ts.map +1 -0
  63. package/dist/node/node/formats/index.js +12 -0
  64. package/dist/node/node/formats/index.js.map +1 -0
  65. package/dist/node/node/formats/lerobot.d.ts +42 -0
  66. package/dist/node/node/formats/lerobot.d.ts.map +1 -0
  67. package/dist/node/node/formats/lerobot.js +351 -0
  68. package/dist/node/node/formats/lerobot.js.map +1 -0
  69. package/dist/node/node/index.d.ts +19 -0
  70. package/dist/node/node/index.d.ts.map +1 -0
  71. package/dist/node/node/index.js +75 -0
  72. package/dist/node/node/index.js.map +1 -0
  73. package/dist/node/node/mime.d.ts +2 -0
  74. package/dist/node/node/mime.d.ts.map +1 -0
  75. package/dist/node/node/mime.js +76 -0
  76. package/dist/node/node/mime.js.map +1 -0
  77. package/dist/node/node/normalizers/binary.d.ts +3 -0
  78. package/dist/node/node/normalizers/binary.d.ts.map +1 -0
  79. package/dist/node/node/normalizers/binary.js +51 -0
  80. package/dist/node/node/normalizers/binary.js.map +1 -0
  81. package/dist/node/node/normalizers/dict.d.ts +3 -0
  82. package/dist/node/node/normalizers/dict.d.ts.map +1 -0
  83. package/dist/node/node/normalizers/dict.js +17 -0
  84. package/dist/node/node/normalizers/dict.js.map +1 -0
  85. package/dist/node/node/normalizers/files.d.ts +3 -0
  86. package/dist/node/node/normalizers/files.d.ts.map +1 -0
  87. package/dist/node/node/normalizers/files.js +109 -0
  88. package/dist/node/node/normalizers/files.js.map +1 -0
  89. package/dist/node/node/normalizers/huggingface.d.ts +3 -0
  90. package/dist/node/node/normalizers/huggingface.d.ts.map +1 -0
  91. package/dist/node/node/normalizers/huggingface.js +158 -0
  92. package/dist/node/node/normalizers/huggingface.js.map +1 -0
  93. package/dist/node/node/normalizers/image.d.ts +3 -0
  94. package/dist/node/node/normalizers/image.d.ts.map +1 -0
  95. package/dist/node/node/normalizers/image.js +52 -0
  96. package/dist/node/node/normalizers/image.js.map +1 -0
  97. package/dist/node/node/normalizers/index.d.ts +9 -0
  98. package/dist/node/node/normalizers/index.d.ts.map +1 -0
  99. package/dist/node/node/normalizers/index.js +21 -0
  100. package/dist/node/node/normalizers/index.js.map +1 -0
  101. package/dist/node/node/normalizers/pdf.d.ts +3 -0
  102. package/dist/node/node/normalizers/pdf.d.ts.map +1 -0
  103. package/dist/node/node/normalizers/pdf.js +75 -0
  104. package/dist/node/node/normalizers/pdf.js.map +1 -0
  105. package/dist/node/node/normalizers/text.d.ts +4 -0
  106. package/dist/node/node/normalizers/text.d.ts.map +1 -0
  107. package/dist/node/node/normalizers/text.js +83 -0
  108. package/dist/node/node/normalizers/text.js.map +1 -0
  109. package/dist/node/node/normalizers/video.d.ts +3 -0
  110. package/dist/node/node/normalizers/video.d.ts.map +1 -0
  111. package/dist/node/node/normalizers/video.js +119 -0
  112. package/dist/node/node/normalizers/video.js.map +1 -0
  113. package/dist/node/node/storage.d.ts +15 -0
  114. package/dist/node/node/storage.d.ts.map +1 -0
  115. package/dist/node/node/storage.js +477 -0
  116. package/dist/node/node/storage.js.map +1 -0
  117. package/dist/node/node/wasm.d.ts +3 -0
  118. package/dist/node/node/wasm.d.ts.map +1 -0
  119. package/dist/node/node/wasm.js +49 -0
  120. package/dist/node/node/wasm.js.map +1 -0
  121. package/dist/node/shared/api.d.ts +9 -0
  122. package/dist/node/shared/api.d.ts.map +1 -0
  123. package/dist/node/shared/api.js +70 -0
  124. package/dist/node/shared/api.js.map +1 -0
  125. package/dist/node/shared/credentials.d.ts +4 -0
  126. package/dist/node/shared/credentials.d.ts.map +1 -0
  127. package/dist/node/shared/credentials.js +28 -0
  128. package/dist/node/shared/credentials.js.map +1 -0
  129. package/dist/node/shared/database.d.ts +26 -0
  130. package/dist/node/shared/database.d.ts.map +1 -0
  131. package/dist/node/shared/database.js +71 -0
  132. package/dist/node/shared/database.js.map +1 -0
  133. package/dist/node/shared/dlref.d.ts +22 -0
  134. package/dist/node/shared/dlref.d.ts.map +1 -0
  135. package/dist/node/shared/dlref.js +52 -0
  136. package/dist/node/shared/dlref.js.map +1 -0
  137. package/dist/node/shared/errors.d.ts +22 -0
  138. package/dist/node/shared/errors.d.ts.map +1 -0
  139. package/dist/node/shared/errors.js +53 -0
  140. package/dist/node/shared/errors.js.map +1 -0
  141. package/dist/node/shared/schema.d.ts +7 -0
  142. package/dist/node/shared/schema.d.ts.map +1 -0
  143. package/dist/node/shared/schema.js +157 -0
  144. package/dist/node/shared/schema.js.map +1 -0
  145. package/dist/node/shared/token.d.ts +4 -0
  146. package/dist/node/shared/token.d.ts.map +1 -0
  147. package/dist/node/shared/token.js +74 -0
  148. package/dist/node/shared/token.js.map +1 -0
  149. package/dist/node/shared/types.d.ts +33 -0
  150. package/dist/node/shared/types.d.ts.map +1 -0
  151. package/dist/node/shared/types.js +3 -0
  152. package/dist/node/shared/types.js.map +1 -0
  153. package/dist/node/shared/wasm-common.d.ts +39 -0
  154. package/dist/node/shared/wasm-common.d.ts.map +1 -0
  155. package/dist/node/shared/wasm-common.js +310 -0
  156. package/dist/node/shared/wasm-common.js.map +1 -0
  157. package/package.json +43 -18
  158. package/README.md +0 -39
  159. package/index.d.ts +0 -2
  160. package/index.js +0 -26
@@ -0,0 +1,670 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.ManagedClient = void 0;
37
+ const crypto = __importStar(require("crypto"));
38
+ const api_1 = require("../shared/api");
39
+ const credentials_1 = require("../shared/credentials");
40
+ const database_1 = require("../shared/database");
41
+ const errors_1 = require("../shared/errors");
42
+ const schema_1 = require("../shared/schema");
43
+ const token_1 = require("../shared/token");
44
+ const errors_2 = require("../shared/errors");
45
+ const wasm_common_1 = require("../shared/wasm-common");
46
+ const dict_1 = require("./normalizers/dict");
47
+ const files_1 = require("./normalizers/files");
48
+ const huggingface_1 = require("./normalizers/huggingface");
49
+ const DEFAULT_API_URL = 'https://api.deeplake.ai';
50
+ class ManagedClient {
51
+ token;
52
+ workspaceId;
53
+ apiUrl;
54
+ orgId;
55
+ _rootPath = null;
56
+ _orgIdResolved;
57
+ _orgIdPromise = null;
58
+ constructor(options) {
59
+ if (!options.token) {
60
+ throw new errors_1.AuthError('Token required. Pass token to ManagedClient() or set the ACTIVELOOP_TOKEN environment variable.');
61
+ }
62
+ this.token = options.token;
63
+ this.workspaceId = options.workspaceId ?? 'default';
64
+ this.apiUrl = (options.apiUrl ?? DEFAULT_API_URL).replace(/\/+$/, '');
65
+ // Try sync extraction first (works for most tokens)
66
+ const syncOrgId = (0, token_1.tryExtractOrgId)(this.token);
67
+ if (syncOrgId) {
68
+ this.orgId = syncOrgId;
69
+ this._orgIdResolved = true;
70
+ }
71
+ else {
72
+ this.orgId = '';
73
+ this._orgIdResolved = false;
74
+ }
75
+ }
76
+ async ensureInitialized() {
77
+ if (this._orgIdResolved)
78
+ return;
79
+ if (!this._orgIdPromise) {
80
+ this._orgIdPromise = this._resolveOrgId();
81
+ }
82
+ await this._orgIdPromise;
83
+ }
84
+ async _resolveOrgId() {
85
+ const orgId = await (0, token_1.fetchOrgIdFromApi)(this.token, this.apiUrl);
86
+ if (orgId) {
87
+ this.orgId = orgId;
88
+ this._orgIdResolved = true;
89
+ }
90
+ else {
91
+ throw new errors_2.TokenError('Token does not contain org_id claim and API fallback failed');
92
+ }
93
+ }
94
+ /**
95
+ * Start building a fluent query against the given table.
96
+ *
97
+ * @example
98
+ * const rows = await client.table("videos").select("id", "text").limit(10).execute();
99
+ */
100
+ table(tableName) {
101
+ return new database_1.ManagedQueryBuilder(this, tableName);
102
+ }
103
+ // ------------------------------------------------------------------
104
+ // Storage path
105
+ // ------------------------------------------------------------------
106
+ async getRootPath() {
107
+ await this.ensureInitialized();
108
+ if (this._rootPath !== null)
109
+ return this._rootPath;
110
+ const resp = await (0, api_1.apiRequest)(this.apiUrl, this.token, this.orgId, {
111
+ method: 'GET',
112
+ path: `/workspaces/${this.workspaceId}/storage-path`,
113
+ });
114
+ const storagePath = resp.storage_path;
115
+ if (!storagePath) {
116
+ throw new errors_1.ManagedServiceError(`No storage path returned for workspace '${this.workspaceId}'`);
117
+ }
118
+ this._rootPath = storagePath;
119
+ return this._rootPath;
120
+ }
121
+ async getDatasetPath(tableName) {
122
+ const rootPath = (await this.getRootPath()).replace(/\/+$/, '');
123
+ if (rootPath.endsWith(`/${this.workspaceId}`)) {
124
+ return `${rootPath}/${tableName}`;
125
+ }
126
+ return `${rootPath}/${this.workspaceId}/${tableName}`;
127
+ }
128
+ // ------------------------------------------------------------------
129
+ // Storage credentials
130
+ // ------------------------------------------------------------------
131
+ async getStorageCreds(mode = 'readwrite') {
132
+ await this.ensureInitialized();
133
+ return (0, credentials_1.getStorageCreds)(this.apiUrl, this.token, this.orgId, this.workspaceId, mode);
134
+ }
135
+ async applyStorageCreds(mode = 'readwrite') {
136
+ const creds = await this.getStorageCreds(mode);
137
+ (0, credentials_1.applyStorageCreds)(creds);
138
+ }
139
+ // ------------------------------------------------------------------
140
+ // Ingest pipeline
141
+ // ------------------------------------------------------------------
142
+ async ingest(tableName, data, options) {
143
+ const format = options?.format;
144
+ if (format == null && data == null) {
145
+ throw new errors_1.IngestError('Either data or format must be provided.');
146
+ }
147
+ if (data != null && typeof data !== 'object') {
148
+ throw new errors_1.IngestError('data must be a dict (object).');
149
+ }
150
+ if (data != null && Object.keys(data).length === 0 && format == null) {
151
+ throw new errors_1.IngestError('data must not be empty.');
152
+ }
153
+ let schema = options?.schema;
154
+ const onProgress = options?.onProgress;
155
+ const chunkSize = options?.chunkSize ?? 1000;
156
+ const chunkOverlap = options?.chunkOverlap ?? 200;
157
+ // al:// path for this table — used by the C++ backend to resolve
158
+ // credentials via GET /api/org/{workspace}/ds/{table}/creds
159
+ const alPath = `al://${this.workspaceId}/${tableName}`;
160
+ try {
161
+ // Determine batch source
162
+ let batches;
163
+ if (format != null) {
164
+ if (typeof format.normalize !== 'function') {
165
+ throw new errors_1.IngestError('format object must have a normalize() method.');
166
+ }
167
+ // Merge format-provided schema hints with user schema
168
+ if (typeof format.schema === 'function') {
169
+ const fmtSchema = format.schema();
170
+ if (fmtSchema) {
171
+ schema = { ...fmtSchema, ...(schema ?? {}) };
172
+ }
173
+ }
174
+ batches = format.normalize();
175
+ }
176
+ else if (data != null && '_huggingface' in data) {
177
+ const datasetName = data._huggingface;
178
+ if (typeof datasetName !== 'string' && !Array.isArray(datasetName)) {
179
+ throw new errors_1.IngestError(`_huggingface value must be a string, got ${typeof datasetName}.`);
180
+ }
181
+ batches = (0, huggingface_1.normalizeHuggingface)(Array.isArray(datasetName) ? String(datasetName[0]) : String(datasetName));
182
+ }
183
+ else if (schema &&
184
+ Object.values(schema).some((v) => v.toUpperCase() === 'FILE')) {
185
+ const fileColumns = Object.entries(schema)
186
+ .filter(([, t]) => t.toUpperCase() === 'FILE')
187
+ .map(([c]) => c);
188
+ const allFiles = [];
189
+ for (const col of fileColumns) {
190
+ if (!(col in data)) {
191
+ throw new errors_1.IngestError(`Schema declares '${col}' as FILE but column not in data.`);
192
+ }
193
+ allFiles.push(...data[col]);
194
+ }
195
+ if (allFiles.length === 0) {
196
+ throw new errors_1.IngestError('FILE columns contain no file paths.');
197
+ }
198
+ batches = (0, files_1.normalizeFiles)(allFiles, chunkSize, chunkOverlap);
199
+ }
200
+ else {
201
+ batches = (0, dict_1.normalizeDict)(data);
202
+ }
203
+ // Peek at the first batch to determine schema
204
+ const iter = Symbol.asyncIterator in batches
205
+ ? batches[Symbol.asyncIterator]()
206
+ : batches[Symbol.iterator]();
207
+ const firstResult = await iter.next();
208
+ if (firstResult.done) {
209
+ throw new errors_1.IngestError('No data to ingest.');
210
+ }
211
+ let firstBatch = firstResult.value;
212
+ // Inject _id (UUID) into first batch so PG schema includes it
213
+ if (!('_id' in firstBatch)) {
214
+ const firstCol = Object.values(firstBatch)[0];
215
+ const n = Array.isArray(firstCol) ? firstCol.length : 1;
216
+ firstBatch['_id'] = Array.from({ length: n }, () => crypto.randomUUID());
217
+ }
218
+ // Sort columns alphabetically — pg_deeplake's DuckDB scanner
219
+ // expects deeplake dataset columns to match the PG table column
220
+ // order, and the API sorts table_schema keys alphabetically.
221
+ firstBatch = Object.fromEntries(Object.entries(firstBatch).sort(([a], [b]) => a.localeCompare(b)));
222
+ const pgSchema = (0, schema_1.buildPgSchema)(firstBatch, schema, format);
223
+ // Create table via API — pg_deeplake creates both the PG table
224
+ // and the S3 dataset in one shot.
225
+ await this.createTableViaApi(tableName, alPath, pgSchema);
226
+ // Open the dataset via al:// — the C++ backend resolves creds
227
+ // automatically through /api/org/{ws}/ds/{table}/creds,
228
+ // with automatic credential rotation.
229
+ // Uses atomic setEndpoint+open to prevent concurrent clients from
230
+ // clobbering the global endpoint between set and open.
231
+ const ds = await (0, wasm_common_1.deeplakeSetEndpointAndOpen)(this.apiUrl, alPath, '', this.token);
232
+ try {
233
+ // Detect image columns for thumbnail generation
234
+ const imageCols = this.detectImageColumns(format, pgSchema, firstBatch);
235
+ // Re-chain the peeked batch with the remaining batches
236
+ const allBatches = chainBatches(firstBatch, iter);
237
+ const rowCount = await this.writeBatches(ds, allBatches, schema, onProgress, imageCols);
238
+ // Final commit for any rows written after the last periodic commit
239
+ await (0, wasm_common_1.deeplakeCommit)(ds);
240
+ // Create indexes on requested columns
241
+ if (options?.index) {
242
+ for (const col of options.index) {
243
+ try {
244
+ await this.createIndex(tableName, col);
245
+ }
246
+ catch (e) {
247
+ // Log warning but don't fail ingestion
248
+ console.warn(`Failed to create index on ${tableName}.${col}: ${e instanceof Error ? e.message : e}`);
249
+ }
250
+ }
251
+ }
252
+ return { tableName, rowCount, datasetPath: alPath };
253
+ }
254
+ finally {
255
+ (0, wasm_common_1.deeplakeRelease)(ds);
256
+ }
257
+ }
258
+ catch (e) {
259
+ if (e instanceof errors_1.IngestError || e instanceof errors_1.ManagedServiceError)
260
+ throw e;
261
+ throw new errors_1.IngestError(`Failed to ingest data: ${e instanceof Error ? e.message : e}`);
262
+ }
263
+ }
264
+ // ------------------------------------------------------------------
265
+ // Write batches
266
+ // ------------------------------------------------------------------
267
+ async writeBatches(ds, batches, schema, onProgress, imageCols = [], flushEvery = 200, commitEvery = 2000) {
268
+ let rowCount = 0;
269
+ let buffer = {};
270
+ let bufferRows = 0;
271
+ let columnsEnsured = false;
272
+ let rowsSinceCommit = 0;
273
+ // Thumbnail collection
274
+ let sharp = null;
275
+ if (imageCols.length > 0) {
276
+ try {
277
+ sharp = require('sharp');
278
+ }
279
+ catch {
280
+ // sharp not available — skip thumbnail generation
281
+ }
282
+ }
283
+ const thumbBuffer = {
284
+ file_id: [],
285
+ column_name: [],
286
+ dimension: [],
287
+ content: [],
288
+ };
289
+ let thumbCount = 0;
290
+ let thumbDs = null;
291
+ const flushThumbnails = async () => {
292
+ if (thumbCount === 0)
293
+ return;
294
+ if (!thumbDs) {
295
+ // Same flow as regular tables: create via API, then open with al://
296
+ // Uses atomic setEndpoint+open to prevent endpoint races.
297
+ const thumbAlPath = `al://${this.workspaceId}/thumbnails`;
298
+ const thumbPgSchema = {
299
+ column_name: 'TEXT',
300
+ content: 'IMAGE',
301
+ dimension: 'TEXT',
302
+ file_id: 'TEXT',
303
+ };
304
+ await this.createTableViaApi('thumbnails', thumbAlPath, thumbPgSchema);
305
+ thumbDs = await (0, wasm_common_1.deeplakeSetEndpointAndOpen)(this.apiUrl, thumbAlPath, '', this.token);
306
+ }
307
+ await (0, wasm_common_1.deeplakeAppend)(thumbDs, thumbBuffer);
308
+ for (const k of Object.keys(thumbBuffer)) {
309
+ thumbBuffer[k] = [];
310
+ }
311
+ thumbCount = 0;
312
+ };
313
+ const generateThumbnailsForBuffer = async (buf) => {
314
+ if (!sharp || imageCols.length === 0)
315
+ return;
316
+ const idVals = (buf['_id'] ?? []);
317
+ for (const col of imageCols) {
318
+ if (!(col in buf))
319
+ continue;
320
+ const values = buf[col];
321
+ for (let i = 0; i < values.length; i++) {
322
+ const val = values[i];
323
+ let raw;
324
+ if (Buffer.isBuffer(val)) {
325
+ raw = val;
326
+ }
327
+ else if (val instanceof Uint8Array) {
328
+ raw = Buffer.from(val);
329
+ }
330
+ else {
331
+ continue;
332
+ }
333
+ if (raw.length === 0)
334
+ continue;
335
+ const fid = i < idVals.length ? idVals[i] : String(rowCount + i);
336
+ for (const [w, h] of ManagedClient.THUMBNAIL_SIZES) {
337
+ try {
338
+ const thumbBuf = await sharp(raw)
339
+ .resize(w, h, { fit: 'inside' })
340
+ .jpeg({ quality: 85 })
341
+ .toBuffer();
342
+ thumbBuffer.file_id.push(fid);
343
+ thumbBuffer.column_name.push(col);
344
+ thumbBuffer.dimension.push(`${w}x${h}`);
345
+ thumbBuffer.content.push(thumbBuf);
346
+ thumbCount++;
347
+ }
348
+ catch {
349
+ // Skip this thumbnail
350
+ }
351
+ }
352
+ if (thumbCount >= 200) {
353
+ await flushThumbnails();
354
+ }
355
+ }
356
+ }
357
+ };
358
+ for await (const batch of batches) {
359
+ if (!batch)
360
+ continue;
361
+ const firstCol = Object.values(batch)[0];
362
+ if (Array.isArray(firstCol) && firstCol.length === 0)
363
+ continue;
364
+ const batchRows = Array.isArray(firstCol) ? firstCol.length : 1;
365
+ // Inject _id (UUID) if not already present or wrong length.
366
+ // Format generators may carry over an empty _id array from
367
+ // a previous yield, so always verify the length matches.
368
+ const idCol = batch['_id'];
369
+ if (!idCol || (Array.isArray(idCol) && idCol.length !== batchRows)) {
370
+ batch['_id'] = Array.from({ length: batchRows }, () => crypto.randomUUID());
371
+ }
372
+ // Merge batch into buffer
373
+ for (const [col, values] of Object.entries(batch)) {
374
+ if (!(col in buffer)) {
375
+ buffer[col] = Array.isArray(values) ? [...values] : [values];
376
+ }
377
+ else {
378
+ if (Array.isArray(values)) {
379
+ buffer[col].push(...values);
380
+ }
381
+ else {
382
+ buffer[col].push(values);
383
+ }
384
+ }
385
+ }
386
+ bufferRows += batchRows;
387
+ // Flush buffer when large enough
388
+ if (bufferRows >= flushEvery) {
389
+ if (!columnsEnsured) {
390
+ (0, schema_1.ensureColumns)(ds, buffer, schema);
391
+ columnsEnsured = true;
392
+ }
393
+ // Generate thumbnails from buffer before flushing
394
+ await generateThumbnailsForBuffer(buffer);
395
+ await (0, wasm_common_1.deeplakeAppend)(ds, buffer);
396
+ rowCount += bufferRows;
397
+ rowsSinceCommit += bufferRows;
398
+ buffer = {};
399
+ bufferRows = 0;
400
+ if (onProgress) {
401
+ onProgress(rowCount, -1);
402
+ }
403
+ // Periodic commit to free C++ buffers and enable crash recovery
404
+ if (rowsSinceCommit >= commitEvery) {
405
+ await (0, wasm_common_1.deeplakeCommit)(ds);
406
+ rowsSinceCommit = 0;
407
+ }
408
+ }
409
+ }
410
+ // Flush remaining rows
411
+ if (bufferRows > 0) {
412
+ if (!columnsEnsured) {
413
+ (0, schema_1.ensureColumns)(ds, buffer, schema);
414
+ }
415
+ await generateThumbnailsForBuffer(buffer);
416
+ await (0, wasm_common_1.deeplakeAppend)(ds, buffer);
417
+ rowCount += bufferRows;
418
+ if (onProgress) {
419
+ onProgress(rowCount, -1);
420
+ }
421
+ }
422
+ // Flush remaining thumbnails and commit
423
+ if (thumbCount > 0 || thumbDs) {
424
+ try {
425
+ await flushThumbnails();
426
+ if (thumbDs) {
427
+ await (0, wasm_common_1.deeplakeCommit)(thumbDs);
428
+ }
429
+ }
430
+ catch {
431
+ // Thumbnail finalization failed — non-fatal
432
+ }
433
+ finally {
434
+ if (thumbDs)
435
+ (0, wasm_common_1.deeplakeRelease)(thumbDs);
436
+ }
437
+ }
438
+ return rowCount;
439
+ }
440
+ // ------------------------------------------------------------------
441
+ // Table management via REST API
442
+ // ------------------------------------------------------------------
443
+ async createTableViaApi(tableName, _datasetPath, pgSchema) {
444
+ await this.ensureInitialized();
445
+ try {
446
+ const body = { table_name: tableName };
447
+ if (pgSchema)
448
+ body.table_schema = pgSchema;
449
+ await (0, api_1.apiRequest)(this.apiUrl, this.token, this.orgId, {
450
+ method: 'POST',
451
+ path: `/workspaces/${this.workspaceId}/tables`,
452
+ body,
453
+ timeoutMs: 30_000,
454
+ });
455
+ }
456
+ catch (e) {
457
+ if (e instanceof errors_1.TableError &&
458
+ String(e.message).toLowerCase().includes('already exists')) {
459
+ // Table already exists — ok
460
+ return;
461
+ }
462
+ throw e;
463
+ }
464
+ }
465
+ // ------------------------------------------------------------------
466
+ // Query via REST API
467
+ // ------------------------------------------------------------------
468
+ async query(sql, params) {
469
+ await this.ensureInitialized();
470
+ const body = { query: sql };
471
+ if (params && params.length > 0) {
472
+ body.params = params;
473
+ }
474
+ try {
475
+ const resp = await (0, api_1.apiRequest)(this.apiUrl, this.token, this.orgId, {
476
+ method: 'POST',
477
+ path: `/workspaces/${this.workspaceId}/tables/query`,
478
+ body,
479
+ timeoutMs: 60_000,
480
+ });
481
+ const columns = (resp.columns ?? []);
482
+ const rows = (resp.rows ?? []);
483
+ return rows.map((row) => {
484
+ const obj = {};
485
+ for (let i = 0; i < columns.length; i++) {
486
+ obj[columns[i]] = row[i];
487
+ }
488
+ return obj;
489
+ });
490
+ }
491
+ catch (e) {
492
+ if (e instanceof errors_1.ManagedServiceError) {
493
+ throw new errors_1.TableError(`Query failed: ${e.message}`);
494
+ }
495
+ throw new errors_1.TableError(`Query failed: ${e instanceof Error ? e.message : e}`);
496
+ }
497
+ }
498
+ // ------------------------------------------------------------------
499
+ // List / Drop tables via REST API
500
+ // ------------------------------------------------------------------
501
+ async listTables() {
502
+ await this.ensureInitialized();
503
+ const resp = await (0, api_1.apiRequest)(this.apiUrl, this.token, this.orgId, {
504
+ method: 'GET',
505
+ path: `/workspaces/${this.workspaceId}/tables`,
506
+ });
507
+ const tables = (resp.tables ?? []);
508
+ return tables.map((t) => t.table_name);
509
+ }
510
+ async dropTable(tableName, ifExists = true) {
511
+ await this.ensureInitialized();
512
+ try {
513
+ await (0, api_1.apiRequest)(this.apiUrl, this.token, this.orgId, {
514
+ method: 'DELETE',
515
+ path: `/workspaces/${this.workspaceId}/tables/${tableName}`,
516
+ });
517
+ }
518
+ catch (e) {
519
+ if (ifExists &&
520
+ e instanceof errors_1.ManagedServiceError &&
521
+ String(e.message).toLowerCase().includes('not found')) {
522
+ return;
523
+ }
524
+ throw e;
525
+ }
526
+ }
527
+ /**
528
+ * Create a `deeplake_index` on a column.
529
+ *
530
+ * Suitable for EMBEDDING columns (vector similarity search) and TEXT
531
+ * columns (BM25 text search). This is a no-op if the index already exists.
532
+ */
533
+ async createIndex(tableName, column) {
534
+ const safeTable = tableName.replace(/"/g, '""');
535
+ const safeCol = column.replace(/"/g, '""');
536
+ const idxName = `idx_${tableName}_${column}`.replace(/"/g, '""');
537
+ const sql = `CREATE INDEX IF NOT EXISTS "${idxName}" ON "${safeTable}" USING deeplake_index ("${safeCol}")`;
538
+ try {
539
+ await this.query(sql);
540
+ }
541
+ catch (e) {
542
+ if (e instanceof errors_1.TableError &&
543
+ String(e.message).toLowerCase().includes('already exists')) {
544
+ return;
545
+ }
546
+ throw e;
547
+ }
548
+ }
549
+ async openTable(tableName) {
550
+ await this.ensureInitialized();
551
+ const alPath = `al://${this.workspaceId}/${tableName}`;
552
+ // Uses atomic setEndpoint+open to prevent concurrent clients from
553
+ // clobbering the global endpoint between set and open.
554
+ return (0, wasm_common_1.deeplakeSetEndpointAndOpen)(this.apiUrl, alPath, '', this.token);
555
+ }
556
+ // ------------------------------------------------------------------
557
+ // Direct dataset data access (bypasses SQL query API)
558
+ // ------------------------------------------------------------------
559
+ async getField(tableName, rowIndex, column) {
560
+ const ds = await this.openTable(tableName);
561
+ try {
562
+ return await (0, wasm_common_1.deeplakeGetField)(ds, rowIndex, column);
563
+ }
564
+ finally {
565
+ (0, wasm_common_1.deeplakeRelease)(ds);
566
+ }
567
+ }
568
+ async getRow(tableName, rowIndex, columns) {
569
+ const ds = await this.openTable(tableName);
570
+ try {
571
+ return await (0, wasm_common_1.deeplakeGetRow)(ds, rowIndex, columns);
572
+ }
573
+ finally {
574
+ (0, wasm_common_1.deeplakeRelease)(ds);
575
+ }
576
+ }
577
+ async getColumnData(tableName, column, start, end) {
578
+ const ds = await this.openTable(tableName);
579
+ try {
580
+ return await (0, wasm_common_1.deeplakeGetColumnData)(ds, column, start, end);
581
+ }
582
+ finally {
583
+ (0, wasm_common_1.deeplakeRelease)(ds);
584
+ }
585
+ }
586
+ async getNumRows(tableName) {
587
+ const ds = await this.openTable(tableName);
588
+ try {
589
+ return (0, wasm_common_1.deeplakeNumRows)(ds);
590
+ }
591
+ finally {
592
+ (0, wasm_common_1.deeplakeRelease)(ds);
593
+ }
594
+ }
595
+ async getSampleByRef(tableName, columnId, rowId) {
596
+ const ds = await this.openTable(tableName);
597
+ try {
598
+ return await (0, wasm_common_1.deeplakeGetSampleByRef)(ds, columnId, rowId);
599
+ }
600
+ finally {
601
+ (0, wasm_common_1.deeplakeRelease)(ds);
602
+ }
603
+ }
604
+ async setField(tableName, rowIndex, column, value) {
605
+ const ds = await this.openTable(tableName);
606
+ try {
607
+ await (0, wasm_common_1.deeplakeSetField)(ds, rowIndex, column, value);
608
+ await (0, wasm_common_1.deeplakeCommit)(ds, `setField ${column}[${rowIndex}]`);
609
+ }
610
+ finally {
611
+ (0, wasm_common_1.deeplakeRelease)(ds);
612
+ }
613
+ }
614
+ async setRow(tableName, rowIndex, data) {
615
+ const ds = await this.openTable(tableName);
616
+ try {
617
+ await (0, wasm_common_1.deeplakeSetRow)(ds, rowIndex, data);
618
+ await (0, wasm_common_1.deeplakeCommit)(ds, `setRow[${rowIndex}]`);
619
+ }
620
+ finally {
621
+ (0, wasm_common_1.deeplakeRelease)(ds);
622
+ }
623
+ }
624
+ // ------------------------------------------------------------------
625
+ // Thumbnail generation
626
+ // ------------------------------------------------------------------
627
+ detectImageColumns(format, _pgSchema, firstBatch) {
628
+ // 1. Format declares image_columns explicitly
629
+ if (format && typeof format.image_columns === 'function') {
630
+ const cols = format.image_columns();
631
+ if (cols && cols.length > 0)
632
+ return cols;
633
+ }
634
+ // 2. Format pg_schema declares IMAGE type
635
+ if (format && typeof format.pg_schema === 'function') {
636
+ const pg = format.pg_schema();
637
+ const cols = Object.entries(pg)
638
+ .filter(([, t]) => t.toUpperCase() === 'IMAGE')
639
+ .map(([c]) => c);
640
+ if (cols.length > 0)
641
+ return cols;
642
+ }
643
+ // 3. Auto-detect: the image file normalizer creates an "image" column
644
+ // with Buffer/bytes data. If "image" is in the batch, use it.
645
+ if ('image' in firstBatch) {
646
+ const sample = Array.isArray(firstBatch.image) ? firstBatch.image[0] : firstBatch.image;
647
+ if (Buffer.isBuffer(sample) || sample instanceof Uint8Array) {
648
+ return ['image'];
649
+ }
650
+ }
651
+ return [];
652
+ }
653
+ static THUMBNAIL_SIZES = [
654
+ [32, 32], [64, 64], [128, 128], [256, 256],
655
+ ];
656
+ }
657
+ exports.ManagedClient = ManagedClient;
658
+ // =====================================================================
659
+ // Helpers
660
+ // =====================================================================
661
+ async function* chainBatches(first, rest) {
662
+ yield first;
663
+ while (true) {
664
+ const result = await rest.next();
665
+ if (result.done)
666
+ break;
667
+ yield result.value;
668
+ }
669
+ }
670
+ //# sourceMappingURL=client.js.map