@naturalcycles/db-lib 8.54.5 → 8.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,10 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.LocalFilePersistencePlugin = void 0;
4
- const fs = require("node:fs");
4
+ const tslib_1 = require("tslib");
5
+ const node_fs_1 = tslib_1.__importDefault(require("node:fs"));
5
6
  const node_stream_1 = require("node:stream");
6
- const fsp = require("node:fs/promises");
7
+ const promises_1 = tslib_1.__importDefault(require("node:fs/promises"));
7
8
  const node_zlib_1 = require("node:zlib");
8
9
  const js_lib_1 = require("@naturalcycles/js-lib");
9
10
  const nodejs_lib_1 = require("@naturalcycles/nodejs-lib");
@@ -20,7 +21,7 @@ class LocalFilePersistencePlugin {
20
21
  }
21
22
  async ping() { }
22
23
  async getTables() {
23
- return (await fsp.readdir(this.cfg.storagePath))
24
+ return (await promises_1.default.readdir(this.cfg.storagePath))
24
25
  .filter(f => f.includes('.ndjson'))
25
26
  .map(f => f.split('.ndjson')[0]);
26
27
  }
@@ -33,7 +34,7 @@ class LocalFilePersistencePlugin {
33
34
  const transformUnzip = this.cfg.gzip ? [(0, node_zlib_1.createUnzip)()] : [];
34
35
  const rows = [];
35
36
  await (0, nodejs_lib_1._pipeline)([
36
- fs.createReadStream(filePath),
37
+ node_fs_1.default.createReadStream(filePath),
37
38
  ...transformUnzip,
38
39
  (0, nodejs_lib_1.transformSplit)(),
39
40
  (0, nodejs_lib_1.transformJsonParse)(),
@@ -53,7 +54,7 @@ class LocalFilePersistencePlugin {
53
54
  node_stream_1.Readable.from(rows),
54
55
  (0, nodejs_lib_1.transformToNDJson)(),
55
56
  ...transformZip,
56
- fs.createWriteStream(filePath),
57
+ node_fs_1.default.createWriteStream(filePath),
57
58
  ]);
58
59
  }
59
60
  }
@@ -1,8 +1,9 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.InMemoryDB = void 0;
4
- const fs = require("node:fs");
5
- const fsp = require("node:fs/promises");
4
+ const tslib_1 = require("tslib");
5
+ const node_fs_1 = tslib_1.__importDefault(require("node:fs"));
6
+ const promises_1 = tslib_1.__importDefault(require("node:fs/promises"));
6
7
  const node_stream_1 = require("node:stream");
7
8
  const node_zlib_1 = require("node:zlib");
8
9
  const js_lib_1 = require("@naturalcycles/js-lib");
@@ -181,7 +182,7 @@ class InMemoryDB {
181
182
  node_stream_1.Readable.from(rows),
182
183
  (0, nodejs_lib_1.transformToNDJson)(),
183
184
  ...transformZip,
184
- fs.createWriteStream(fname),
185
+ node_fs_1.default.createWriteStream(fname),
185
186
  ]);
186
187
  });
187
188
  this.cfg.logger.log(`flushToDisk took ${(0, nodejs_lib_2.dimGrey)((0, js_lib_1._since)(started))} to save ${(0, nodejs_lib_2.yellow)(tables)} tables`);
@@ -195,7 +196,7 @@ class InMemoryDB {
195
196
  const started = Date.now();
196
197
  await (0, nodejs_lib_1._ensureDir)(persistentStoragePath);
197
198
  this.data = {}; // empty it in the beginning!
198
- const files = (await fsp.readdir(persistentStoragePath)).filter(f => f.includes('.ndjson'));
199
+ const files = (await promises_1.default.readdir(persistentStoragePath)).filter(f => f.includes('.ndjson'));
199
200
  // infinite concurrency for now
200
201
  await (0, js_lib_1.pMap)(files, async (file) => {
201
202
  const fname = `${persistentStoragePath}/${file}`;
@@ -203,7 +204,7 @@ class InMemoryDB {
203
204
  const transformUnzip = file.endsWith('.gz') ? [(0, node_zlib_1.createUnzip)()] : [];
204
205
  const rows = [];
205
206
  await (0, nodejs_lib_1._pipeline)([
206
- fs.createReadStream(fname),
207
+ node_fs_1.default.createReadStream(fname),
207
208
  ...transformUnzip,
208
209
  (0, nodejs_lib_1.transformSplit)(),
209
210
  (0, nodejs_lib_1.transformJsonParse)(),
@@ -1,8 +1,10 @@
1
+ /// <reference types="node" />
2
+ import { Transform } from 'node:stream';
1
3
  import { AnyObject, AsyncMapper, JsonSchemaObject, JsonSchemaRootObject, ObjectWithId, Promisable, Saved, UnixTimestampMillisNumber, Unsaved, ZodSchema } from '@naturalcycles/js-lib';
2
4
  import { AjvSchema, ObjectSchema, ReadableTyped } from '@naturalcycles/nodejs-lib';
3
5
  import { DBDeleteByIdsOperation, DBModelType, DBOperation, DBPatch, DBSaveBatchOperation, RunQueryResult } from '../db.model';
4
6
  import { DBQuery, RunnableDBQuery } from '../query/dbQuery';
5
- import { CommonDaoCfg, CommonDaoCreateOptions, CommonDaoOptions, CommonDaoSaveOptions, CommonDaoStreamForEachOptions, CommonDaoStreamOptions } from './common.dao.model';
7
+ import { CommonDaoCfg, CommonDaoCreateOptions, CommonDaoOptions, CommonDaoSaveOptions, CommonDaoStreamDeleteOptions, CommonDaoStreamForEachOptions, CommonDaoStreamOptions, CommonDaoStreamSaveOptions } from './common.dao.model';
6
8
  /**
7
9
  * Lowest common denominator API between supported Databases.
8
10
  *
@@ -62,7 +64,7 @@ export declare class CommonDao<BM extends Partial<ObjectWithId<ID>>, DBM extends
62
64
  /**
63
65
  * Stream as Readable, to be able to .pipe() it further with support of backpressure.
64
66
  */
65
- streamQueryAsDBM(q: DBQuery<DBM>, opt?: CommonDaoStreamOptions): ReadableTyped<DBM>;
67
+ streamQueryAsDBM(q: DBQuery<DBM>, opt?: CommonDaoStreamOptions<DBM>): ReadableTyped<DBM>;
66
68
  /**
67
69
  * Stream as Readable, to be able to .pipe() it further with support of backpressure.
68
70
  *
@@ -72,9 +74,9 @@ export declare class CommonDao<BM extends Partial<ObjectWithId<ID>>, DBM extends
72
74
  *
73
75
  * You can do `.pipe(transformNoOp)` to make it "valid again".
74
76
  */
75
- streamQuery(q: DBQuery<DBM>, opt?: CommonDaoStreamOptions): ReadableTyped<Saved<BM>>;
77
+ streamQuery(q: DBQuery<DBM>, opt?: CommonDaoStreamOptions<Saved<BM>>): ReadableTyped<Saved<BM>>;
76
78
  queryIds(q: DBQuery<DBM>, opt?: CommonDaoOptions): Promise<ID[]>;
77
- streamQueryIds(q: DBQuery<DBM>, opt?: CommonDaoStreamOptions): ReadableTyped<ID>;
79
+ streamQueryIds(q: DBQuery<DBM>, opt?: CommonDaoStreamOptions<ID>): ReadableTyped<ID>;
78
80
  streamQueryIdsForEach(q: DBQuery<DBM>, mapper: AsyncMapper<ID, void>, opt?: CommonDaoStreamForEachOptions<ID>): Promise<void>;
79
81
  /**
80
82
  * Mutates!
@@ -106,6 +108,13 @@ export declare class CommonDao<BM extends Partial<ObjectWithId<ID>>, DBM extends
106
108
  saveAsDBM(dbm: DBM, opt?: CommonDaoSaveOptions<DBM>): Promise<DBM>;
107
109
  saveBatch(bms: Unsaved<BM>[], opt?: CommonDaoSaveOptions<DBM>): Promise<Saved<BM>[]>;
108
110
  saveBatchAsDBM(dbms: DBM[], opt?: CommonDaoSaveOptions<DBM>): Promise<DBM[]>;
111
+ /**
112
+ * "Streaming" is implemented by buffering incoming rows into **batches**
113
+ * (of size opt.batchSize, which defaults to 500),
114
+ * and then executing db.saveBatch(batch) with the concurrency
115
+ * of opt.batchConcurrency (which defaults to 16).
116
+ */
117
+ streamSaveTransform(opt?: CommonDaoStreamSaveOptions<DBM>): Transform[];
109
118
  /**
110
119
  * @returns number of deleted items
111
120
  */
@@ -117,9 +126,7 @@ export declare class CommonDao<BM extends Partial<ObjectWithId<ID>>, DBM extends
117
126
  * `deleteByIds` for each batch concurrently (infinite concurrency).
118
127
  * This is expected to be more memory-efficient way of deleting big numbers of rows.
119
128
  */
120
- deleteByQuery(q: DBQuery<DBM>, opt?: CommonDaoStreamForEachOptions<DBM> & {
121
- stream?: boolean;
122
- }): Promise<number>;
129
+ deleteByQuery(q: DBQuery<DBM>, opt?: CommonDaoStreamDeleteOptions<DBM>): Promise<number>;
123
130
  updateById(id: ID, patch: DBPatch<DBM>, opt?: CommonDaoOptions): Promise<number>;
124
131
  updateByIds(ids: ID[], patch: DBPatch<DBM>, opt?: CommonDaoOptions): Promise<number>;
125
132
  updateByQuery(q: DBQuery<DBM>, patch: DBPatch<DBM>, opt?: CommonDaoOptions): Promise<number>;
@@ -718,6 +718,58 @@ class CommonDao {
718
718
  this.logSaveResult(started, op, table);
719
719
  return rows;
720
720
  }
721
+ /**
722
+ * "Streaming" is implemented by buffering incoming rows into **batches**
723
+ * (of size opt.batchSize, which defaults to 500),
724
+ * and then executing db.saveBatch(batch) with the concurrency
725
+ * of opt.batchConcurrency (which defaults to 16).
726
+ */
727
+ streamSaveTransform(opt = {}) {
728
+ this.requireWriteAccess();
729
+ const table = opt.table || this.cfg.table;
730
+ opt.skipValidation ??= true;
731
+ opt.skipConversion ??= true;
732
+ opt.errorMode ||= js_lib_1.ErrorMode.SUPPRESS;
733
+ if (this.cfg.immutable && !opt.allowMutability && !opt.saveMethod) {
734
+ opt = { ...opt, saveMethod: 'insert' };
735
+ }
736
+ const excludeFromIndexes = opt.excludeFromIndexes || this.cfg.excludeFromIndexes;
737
+ const { beforeSave } = this.cfg.hooks;
738
+ const { batchSize = 500, batchConcurrency = 16, errorMode } = opt;
739
+ return [
740
+ (0, nodejs_lib_1.transformMap)(async (bm) => {
741
+ this.assignIdCreatedUpdated(bm, opt); // mutates
742
+ let dbm = await this.bmToDBM(bm, opt);
743
+ if (beforeSave) {
744
+ dbm = (await beforeSave(dbm));
745
+ if (dbm === null && !opt.tx)
746
+ return js_lib_1.SKIP;
747
+ }
748
+ return dbm;
749
+ }, {
750
+ errorMode,
751
+ }),
752
+ (0, nodejs_lib_1.transformBuffer)({ batchSize }),
753
+ (0, nodejs_lib_1.transformMap)(async (batch) => {
754
+ await this.cfg.db.saveBatch(table, batch, {
755
+ ...opt,
756
+ excludeFromIndexes,
757
+ });
758
+ return batch;
759
+ }, {
760
+ concurrency: batchConcurrency,
761
+ errorMode,
762
+ flattenArrayOutput: true,
763
+ }),
764
+ (0, nodejs_lib_1.transformLogProgress)({
765
+ metric: 'saved',
766
+ ...opt,
767
+ }),
768
+ // just to satisfy and simplify typings
769
+ // It's easier to return Transform[], rather than (Transform | Writable)[]
770
+ (0, nodejs_lib_1.writableVoid)(),
771
+ ];
772
+ }
721
773
  async deleteById(id, opt = {}) {
722
774
  if (!id)
723
775
  return 0;
@@ -754,8 +806,8 @@ class CommonDao {
754
806
  const op = `deleteByQuery(${q.pretty()})`;
755
807
  const started = this.logStarted(op, q.table);
756
808
  let deleted = 0;
757
- if (opt.stream) {
758
- const batchSize = 500;
809
+ if (opt.batchSize) {
810
+ const { batchSize, batchConcurrency = 16 } = opt;
759
811
  await (0, nodejs_lib_1._pipeline)([
760
812
  this.cfg.db.streamQuery(q.select(['id']), opt),
761
813
  (0, nodejs_lib_1.transformMapSimple)(objectWithId => objectWithId.id, {
@@ -766,6 +818,7 @@ class CommonDao {
766
818
  deleted += await this.cfg.db.deleteByQuery(dbQuery_1.DBQuery.create(q.table).filterIn('id', ids), opt);
767
819
  }, {
768
820
  predicate: js_lib_1._passthroughPredicate,
821
+ concurrency: batchConcurrency,
769
822
  }),
770
823
  // LogProgress should be AFTER the mapper, to be able to report correct stats
771
824
  (0, nodejs_lib_1.transformLogProgress)({
@@ -252,9 +252,13 @@ export interface CommonDaoSaveOptions<DBM extends ObjectWithId> extends CommonDa
252
252
  */
253
253
  ensureUniqueId?: boolean;
254
254
  }
255
- export interface CommonDaoStreamForEachOptions<IN> extends CommonDaoStreamOptions, TransformMapOptions<IN, any>, TransformLogProgressOptions<IN> {
255
+ export interface CommonDaoStreamDeleteOptions<DBM extends ObjectWithId> extends CommonDaoStreamOptions<DBM> {
256
256
  }
257
- export interface CommonDaoStreamOptions extends CommonDaoOptions {
257
+ export interface CommonDaoStreamSaveOptions<DBM extends ObjectWithId> extends CommonDaoSaveOptions<DBM>, CommonDaoStreamOptions<DBM> {
258
+ }
259
+ export interface CommonDaoStreamForEachOptions<IN> extends CommonDaoStreamOptions<IN>, TransformMapOptions<IN, any> {
260
+ }
261
+ export interface CommonDaoStreamOptions<IN> extends CommonDaoOptions, TransformLogProgressOptions<IN> {
258
262
  /**
259
263
  * @default true (for streams)
260
264
  */
@@ -268,5 +272,17 @@ export interface CommonDaoStreamOptions extends CommonDaoOptions {
268
272
  * @default ErrorMode.SUPPRESS for .forEach() streams as well, but overridable
269
273
  */
270
274
  errorMode?: ErrorMode;
275
+ /**
276
+ * Applicable to some of stream operations, e.g deleteByQuery.
277
+ * If set - `deleteByQuery` won't execute it "all at once", but in batches.
278
+ *
279
+ * Defaults to undefined, so the operation is executed "all at once".
280
+ */
281
+ batchSize?: number;
282
+ /**
283
+ * When batchSize is set - this option controls how many batches to run concurrently.
284
+ * Defaults to 16, "the magic number of JavaScript concurrency".
285
+ */
286
+ batchConcurrency?: number;
271
287
  }
272
288
  export type CommonDaoCreateOptions = CommonDBCreateOptions;
@@ -1,8 +1,9 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.dbPipelineBackup = void 0;
4
- const fs = require("node:fs");
5
- const fsp = require("node:fs/promises");
4
+ const tslib_1 = require("tslib");
5
+ const node_fs_1 = tslib_1.__importDefault(require("node:fs"));
6
+ const promises_1 = tslib_1.__importDefault(require("node:fs/promises"));
6
7
  const node_zlib_1 = require("node:zlib");
7
8
  const js_lib_1 = require("@naturalcycles/js-lib");
8
9
  const nodejs_lib_1 = require("@naturalcycles/nodejs-lib");
@@ -65,9 +66,9 @@ async function dbPipelineBackup(opt) {
65
66
  }),
66
67
  (0, nodejs_lib_1.transformToNDJson)({ strict, sortObjects }),
67
68
  ...(gzip ? [(0, node_zlib_1.createGzip)(zlibOptions)] : []),
68
- fs.createWriteStream(filePath),
69
+ node_fs_1.default.createWriteStream(filePath),
69
70
  ]);
70
- const { size: sizeBytes } = await fsp.stat(filePath);
71
+ const { size: sizeBytes } = await promises_1.default.stat(filePath);
71
72
  const stats = nodejs_lib_1.NDJsonStats.create({
72
73
  tookMillis: Date.now() - started,
73
74
  rows,
@@ -1,7 +1,8 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.dbPipelineRestore = void 0;
4
- const fs = require("node:fs");
4
+ const tslib_1 = require("tslib");
5
+ const node_fs_1 = tslib_1.__importDefault(require("node:fs"));
5
6
  const node_zlib_1 = require("node:zlib");
6
7
  const js_lib_1 = require("@naturalcycles/js-lib");
7
8
  const nodejs_lib_1 = require("@naturalcycles/nodejs-lib");
@@ -24,7 +25,7 @@ async function dbPipelineRestore(opt) {
24
25
  const sizeByTable = {};
25
26
  const statsPerTable = {};
26
27
  const tables = [];
27
- fs.readdirSync(inputDirPath).forEach(f => {
28
+ node_fs_1.default.readdirSync(inputDirPath).forEach(f => {
28
29
  let table;
29
30
  let gzip = false;
30
31
  if (f.endsWith('.ndjson')) {
@@ -42,7 +43,7 @@ async function dbPipelineRestore(opt) {
42
43
  tables.push(table);
43
44
  if (gzip)
44
45
  tablesToGzip.add(table);
45
- sizeByTable[table] = fs.statSync(`${inputDirPath}/${f}`).size;
46
+ sizeByTable[table] = node_fs_1.default.statSync(`${inputDirPath}/${f}`).size;
46
47
  });
47
48
  const sizeStrByTable = (0, js_lib_1._mapValues)(sizeByTable, (_k, b) => (0, js_lib_1._hb)(b));
48
49
  console.log(`${(0, nodejs_lib_2.yellow)(tables.length)} ${(0, nodejs_lib_2.boldWhite)('table(s)')}:\n`, sizeStrByTable);
@@ -50,7 +51,7 @@ async function dbPipelineRestore(opt) {
50
51
  if (recreateTables) {
51
52
  await (0, js_lib_1.pMap)(tables, async (table) => {
52
53
  const schemaFilePath = `${inputDirPath}/${table}.schema.json`;
53
- if (!fs.existsSync(schemaFilePath)) {
54
+ if (!node_fs_1.default.existsSync(schemaFilePath)) {
54
55
  console.warn(`${schemaFilePath} does not exist!`);
55
56
  return;
56
57
  }
@@ -67,7 +68,7 @@ async function dbPipelineRestore(opt) {
67
68
  const sizeBytes = sizeByTable[table];
68
69
  console.log(`<< ${(0, nodejs_lib_2.grey)(filePath)} ${(0, nodejs_lib_2.dimWhite)((0, js_lib_1._hb)(sizeBytes))} started...`);
69
70
  await (0, nodejs_lib_1._pipeline)([
70
- fs.createReadStream(filePath),
71
+ node_fs_1.default.createReadStream(filePath),
71
72
  ...(gzip ? [(0, node_zlib_1.createUnzip)()] : []),
72
73
  (0, nodejs_lib_1.transformSplit)(),
73
74
  (0, nodejs_lib_1.transformJsonParse)({ strict }),
@@ -1,6 +1,6 @@
1
1
  import { AnyObjectWithId, ObjectWithId, AsyncMapper, Saved, AnyObject } from '@naturalcycles/js-lib';
2
2
  import { ReadableTyped } from '@naturalcycles/nodejs-lib';
3
- import { CommonDaoOptions, CommonDaoStreamForEachOptions, CommonDaoStreamOptions, DBPatch } from '..';
3
+ import { CommonDaoOptions, CommonDaoStreamDeleteOptions, CommonDaoStreamForEachOptions, CommonDaoStreamOptions, DBPatch } from '..';
4
4
  import { CommonDao } from '../commondao/common.dao';
5
5
  import { RunQueryResult } from '../db.model';
6
6
  /**
@@ -101,12 +101,10 @@ export declare class RunnableDBQuery<BM extends Partial<ObjectWithId<ID>>, DBM e
101
101
  updateByQuery(patch: DBPatch<DBM>, opt?: CommonDaoOptions): Promise<number>;
102
102
  streamQueryForEach(mapper: AsyncMapper<Saved<BM>, void>, opt?: CommonDaoStreamForEachOptions<Saved<BM>>): Promise<void>;
103
103
  streamQueryAsDBMForEach(mapper: AsyncMapper<DBM, void>, opt?: CommonDaoStreamForEachOptions<DBM>): Promise<void>;
104
- streamQuery(opt?: CommonDaoStreamOptions): ReadableTyped<Saved<BM>>;
105
- streamQueryAsDBM(opt?: CommonDaoStreamOptions): ReadableTyped<DBM>;
104
+ streamQuery(opt?: CommonDaoStreamOptions<Saved<BM>>): ReadableTyped<Saved<BM>>;
105
+ streamQueryAsDBM(opt?: CommonDaoStreamOptions<DBM>): ReadableTyped<DBM>;
106
106
  queryIds(opt?: CommonDaoOptions): Promise<ID[]>;
107
- streamQueryIds(opt?: CommonDaoStreamOptions): ReadableTyped<ID>;
107
+ streamQueryIds(opt?: CommonDaoStreamOptions<ID>): ReadableTyped<ID>;
108
108
  streamQueryIdsForEach(mapper: AsyncMapper<ID, void>, opt?: CommonDaoStreamForEachOptions<ID>): Promise<void>;
109
- deleteByQuery(opt?: CommonDaoStreamForEachOptions<DBM> & {
110
- stream?: boolean;
111
- }): Promise<number>;
109
+ deleteByQuery(opt?: CommonDaoStreamDeleteOptions<DBM>): Promise<number>;
112
110
  }
@@ -1,6 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.runCommonDaoTest = void 0;
4
+ const node_stream_1 = require("node:stream");
4
5
  const js_lib_1 = require("@naturalcycles/js-lib");
5
6
  const nodejs_lib_1 = require("@naturalcycles/nodejs-lib");
6
7
  const __1 = require("..");
@@ -175,6 +176,15 @@ function runCommonDaoTest(db, features = {}, quirks = {}) {
175
176
  ids = ids.sort();
176
177
  (0, dbTest_1.expectMatch)(expectedItems.map(i => i.id), ids, quirks);
177
178
  });
179
+ test('streamSaveTransform', async () => {
180
+ const items2 = (0, test_model_1.createTestItemsBM)(2).map(i => ({ ...i, id: i.id + '_str' }));
181
+ const ids = items2.map(i => i.id);
182
+ await (0, nodejs_lib_1._pipeline)([node_stream_1.Readable.from(items2), ...dao.streamSaveTransform()]);
183
+ const items2Loaded = await dao.getByIds(ids);
184
+ (0, dbTest_1.expectMatch)(items2, items2Loaded, quirks);
185
+ // cleanup
186
+ await dao.query().filterIn('id', ids).deleteByQuery();
187
+ });
178
188
  }
179
189
  // DELETE BY
180
190
  if (querying) {
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  },
6
6
  "dependencies": {
7
7
  "@naturalcycles/js-lib": "^14.116.0",
8
- "@naturalcycles/nodejs-lib": "^12.0.0"
8
+ "@naturalcycles/nodejs-lib": "^13.1.1"
9
9
  },
10
10
  "devDependencies": {
11
11
  "@naturalcycles/bench-lib": "^1.0.0",
@@ -40,7 +40,7 @@
40
40
  "engines": {
41
41
  "node": ">=18.12"
42
42
  },
43
- "version": "8.54.5",
43
+ "version": "8.55.0",
44
44
  "description": "Lowest Common Denominator API to supported Databases",
45
45
  "keywords": [
46
46
  "db",
@@ -1,6 +1,6 @@
1
- import * as fs from 'node:fs'
1
+ import fs from 'node:fs'
2
2
  import { Readable } from 'node:stream'
3
- import * as fsp from 'node:fs/promises'
3
+ import fsp from 'node:fs/promises'
4
4
  import { createGzip, createUnzip } from 'node:zlib'
5
5
  import { pMap, ObjectWithId } from '@naturalcycles/js-lib'
6
6
  import {
@@ -1,5 +1,5 @@
1
- import * as fs from 'node:fs'
2
- import * as fsp from 'node:fs/promises'
1
+ import fs from 'node:fs'
2
+ import fsp from 'node:fs/promises'
3
3
  import { Readable } from 'node:stream'
4
4
  import { createGzip, createUnzip } from 'node:zlib'
5
5
  import {
@@ -316,12 +316,20 @@ export interface CommonDaoSaveOptions<DBM extends ObjectWithId>
316
316
  ensureUniqueId?: boolean
317
317
  }
318
318
 
319
+ export interface CommonDaoStreamDeleteOptions<DBM extends ObjectWithId>
320
+ extends CommonDaoStreamOptions<DBM> {}
321
+
322
+ export interface CommonDaoStreamSaveOptions<DBM extends ObjectWithId>
323
+ extends CommonDaoSaveOptions<DBM>,
324
+ CommonDaoStreamOptions<DBM> {}
325
+
319
326
  export interface CommonDaoStreamForEachOptions<IN>
320
- extends CommonDaoStreamOptions,
321
- TransformMapOptions<IN, any>,
322
- TransformLogProgressOptions<IN> {}
327
+ extends CommonDaoStreamOptions<IN>,
328
+ TransformMapOptions<IN, any> {}
323
329
 
324
- export interface CommonDaoStreamOptions extends CommonDaoOptions {
330
+ export interface CommonDaoStreamOptions<IN>
331
+ extends CommonDaoOptions,
332
+ TransformLogProgressOptions<IN> {
325
333
  /**
326
334
  * @default true (for streams)
327
335
  */
@@ -337,6 +345,20 @@ export interface CommonDaoStreamOptions extends CommonDaoOptions {
337
345
  * @default ErrorMode.SUPPRESS for .forEach() streams as well, but overridable
338
346
  */
339
347
  errorMode?: ErrorMode
348
+
349
+ /**
350
+ * Applicable to some of stream operations, e.g deleteByQuery.
351
+ * If set - `deleteByQuery` won't execute it "all at once", but in batches.
352
+ *
353
+ * Defaults to undefined, so the operation is executed "all at once".
354
+ */
355
+ batchSize?: number
356
+
357
+ /**
358
+ * When batchSize is set - this option controls how many batches to run concurrently.
359
+ * Defaults to 16, "the magic number of JavaScript concurrency".
360
+ */
361
+ batchConcurrency?: number
340
362
  }
341
363
 
342
364
  export type CommonDaoCreateOptions = CommonDBCreateOptions
@@ -1,3 +1,4 @@
1
+ import { Transform } from 'node:stream'
1
2
  import {
2
3
  _assert,
3
4
  _filterNullishValues,
@@ -57,8 +58,10 @@ import {
57
58
  CommonDaoLogLevel,
58
59
  CommonDaoOptions,
59
60
  CommonDaoSaveOptions,
61
+ CommonDaoStreamDeleteOptions,
60
62
  CommonDaoStreamForEachOptions,
61
63
  CommonDaoStreamOptions,
64
+ CommonDaoStreamSaveOptions,
62
65
  } from './common.dao.model'
63
66
 
64
67
  const isGAE = !!process.env['GAE_INSTANCE']
@@ -529,7 +532,7 @@ export class CommonDao<
529
532
  /**
530
533
  * Stream as Readable, to be able to .pipe() it further with support of backpressure.
531
534
  */
532
- streamQueryAsDBM(q: DBQuery<DBM>, opt: CommonDaoStreamOptions = {}): ReadableTyped<DBM> {
535
+ streamQueryAsDBM(q: DBQuery<DBM>, opt: CommonDaoStreamOptions<DBM> = {}): ReadableTyped<DBM> {
533
536
  q.table = opt.table || q.table
534
537
  opt.skipValidation = opt.skipValidation !== false // default true
535
538
  opt.skipConversion = opt.skipConversion !== false // default true
@@ -568,7 +571,10 @@ export class CommonDao<
568
571
  *
569
572
  * You can do `.pipe(transformNoOp)` to make it "valid again".
570
573
  */
571
- streamQuery(q: DBQuery<DBM>, opt: CommonDaoStreamOptions = {}): ReadableTyped<Saved<BM>> {
574
+ streamQuery(
575
+ q: DBQuery<DBM>,
576
+ opt: CommonDaoStreamOptions<Saved<BM>> = {},
577
+ ): ReadableTyped<Saved<BM>> {
572
578
  q.table = opt.table || q.table
573
579
  opt.skipValidation = opt.skipValidation !== false // default true
574
580
  opt.skipConversion = opt.skipConversion !== false // default true
@@ -611,7 +617,7 @@ export class CommonDao<
611
617
  return rows.map(r => r.id)
612
618
  }
613
619
 
614
- streamQueryIds(q: DBQuery<DBM>, opt: CommonDaoStreamOptions = {}): ReadableTyped<ID> {
620
+ streamQueryIds(q: DBQuery<DBM>, opt: CommonDaoStreamOptions<ID> = {}): ReadableTyped<ID> {
615
621
  q.table = opt.table || q.table
616
622
  opt.errorMode ||= ErrorMode.SUPPRESS
617
623
 
@@ -958,6 +964,72 @@ export class CommonDao<
958
964
  return rows
959
965
  }
960
966
 
967
+ /**
968
+ * "Streaming" is implemented by buffering incoming rows into **batches**
969
+ * (of size opt.batchSize, which defaults to 500),
970
+ * and then executing db.saveBatch(batch) with the concurrency
971
+ * of opt.batchConcurrency (which defaults to 16).
972
+ */
973
+ streamSaveTransform(opt: CommonDaoStreamSaveOptions<DBM> = {}): Transform[] {
974
+ this.requireWriteAccess()
975
+
976
+ const table = opt.table || this.cfg.table
977
+ opt.skipValidation ??= true
978
+ opt.skipConversion ??= true
979
+ opt.errorMode ||= ErrorMode.SUPPRESS
980
+
981
+ if (this.cfg.immutable && !opt.allowMutability && !opt.saveMethod) {
982
+ opt = { ...opt, saveMethod: 'insert' }
983
+ }
984
+
985
+ const excludeFromIndexes = opt.excludeFromIndexes || this.cfg.excludeFromIndexes
986
+ const { beforeSave } = this.cfg.hooks!
987
+
988
+ const { batchSize = 500, batchConcurrency = 16, errorMode } = opt
989
+
990
+ return [
991
+ transformMap<BM, DBM>(
992
+ async bm => {
993
+ this.assignIdCreatedUpdated(bm, opt) // mutates
994
+
995
+ let dbm = await this.bmToDBM(bm, opt)
996
+
997
+ if (beforeSave) {
998
+ dbm = (await beforeSave(dbm))!
999
+ if (dbm === null && !opt.tx) return SKIP
1000
+ }
1001
+
1002
+ return dbm
1003
+ },
1004
+ {
1005
+ errorMode,
1006
+ },
1007
+ ),
1008
+ transformBuffer<DBM>({ batchSize }),
1009
+ transformMap<DBM[], DBM[]>(
1010
+ async batch => {
1011
+ await this.cfg.db.saveBatch(table, batch, {
1012
+ ...opt,
1013
+ excludeFromIndexes,
1014
+ })
1015
+ return batch
1016
+ },
1017
+ {
1018
+ concurrency: batchConcurrency,
1019
+ errorMode,
1020
+ flattenArrayOutput: true,
1021
+ },
1022
+ ),
1023
+ transformLogProgress({
1024
+ metric: 'saved',
1025
+ ...opt,
1026
+ }),
1027
+ // just to satisfy and simplify typings
1028
+ // It's easier to return Transform[], rather than (Transform | Writable)[]
1029
+ writableVoid() as Transform,
1030
+ ]
1031
+ }
1032
+
961
1033
  // DELETE
962
1034
  /**
963
1035
  * @returns number of deleted items
@@ -995,7 +1067,7 @@ export class CommonDao<
995
1067
  */
996
1068
  async deleteByQuery(
997
1069
  q: DBQuery<DBM>,
998
- opt: CommonDaoStreamForEachOptions<DBM> & { stream?: boolean } = {},
1070
+ opt: CommonDaoStreamDeleteOptions<DBM> = {},
999
1071
  ): Promise<number> {
1000
1072
  this.requireWriteAccess()
1001
1073
  this.requireObjectMutability(opt)
@@ -1004,8 +1076,8 @@ export class CommonDao<
1004
1076
  const started = this.logStarted(op, q.table)
1005
1077
  let deleted = 0
1006
1078
 
1007
- if (opt.stream) {
1008
- const batchSize = 500
1079
+ if (opt.batchSize) {
1080
+ const { batchSize, batchConcurrency = 16 } = opt
1009
1081
 
1010
1082
  await _pipeline([
1011
1083
  this.cfg.db.streamQuery<DBM>(q.select(['id']), opt),
@@ -1022,6 +1094,7 @@ export class CommonDao<
1022
1094
  },
1023
1095
  {
1024
1096
  predicate: _passthroughPredicate,
1097
+ concurrency: batchConcurrency,
1025
1098
  },
1026
1099
  ),
1027
1100
  // LogProgress should be AFTER the mapper, to be able to report correct stats
@@ -1,5 +1,5 @@
1
- import * as fs from 'node:fs'
2
- import * as fsp from 'node:fs/promises'
1
+ import fs from 'node:fs'
2
+ import fsp from 'node:fs/promises'
3
3
  import { createGzip, ZlibOptions } from 'node:zlib'
4
4
  import {
5
5
  AppError,
@@ -1,4 +1,4 @@
1
- import * as fs from 'node:fs'
1
+ import fs from 'node:fs'
2
2
  import { createUnzip } from 'node:zlib'
3
3
  import {
4
4
  AsyncMapper,
@@ -10,6 +10,7 @@ import {
10
10
  import { ReadableTyped } from '@naturalcycles/nodejs-lib'
11
11
  import {
12
12
  CommonDaoOptions,
13
+ CommonDaoStreamDeleteOptions,
13
14
  CommonDaoStreamForEachOptions,
14
15
  CommonDaoStreamOptions,
15
16
  DBPatch,
@@ -301,11 +302,11 @@ export class RunnableDBQuery<
301
302
  await this.dao.streamQueryAsDBMForEach(this, mapper, opt)
302
303
  }
303
304
 
304
- streamQuery(opt?: CommonDaoStreamOptions): ReadableTyped<Saved<BM>> {
305
+ streamQuery(opt?: CommonDaoStreamOptions<Saved<BM>>): ReadableTyped<Saved<BM>> {
305
306
  return this.dao.streamQuery(this, opt)
306
307
  }
307
308
 
308
- streamQueryAsDBM(opt?: CommonDaoStreamOptions): ReadableTyped<DBM> {
309
+ streamQueryAsDBM(opt?: CommonDaoStreamOptions<DBM>): ReadableTyped<DBM> {
309
310
  return this.dao.streamQueryAsDBM(this, opt)
310
311
  }
311
312
 
@@ -313,7 +314,7 @@ export class RunnableDBQuery<
313
314
  return await this.dao.queryIds(this, opt)
314
315
  }
315
316
 
316
- streamQueryIds(opt?: CommonDaoStreamOptions): ReadableTyped<ID> {
317
+ streamQueryIds(opt?: CommonDaoStreamOptions<ID>): ReadableTyped<ID> {
317
318
  return this.dao.streamQueryIds(this, opt)
318
319
  }
319
320
 
@@ -324,9 +325,7 @@ export class RunnableDBQuery<
324
325
  await this.dao.streamQueryIdsForEach(this, mapper, opt)
325
326
  }
326
327
 
327
- async deleteByQuery(
328
- opt?: CommonDaoStreamForEachOptions<DBM> & { stream?: boolean },
329
- ): Promise<number> {
328
+ async deleteByQuery(opt?: CommonDaoStreamDeleteOptions<DBM>): Promise<number> {
330
329
  return await this.dao.deleteByQuery(this, opt)
331
330
  }
332
331
  }
@@ -1,5 +1,6 @@
1
+ import { Readable } from 'node:stream'
1
2
  import { pDelay, _deepCopy, _pick, _sortBy, _omit, localTime } from '@naturalcycles/js-lib'
2
- import { readableToArray, transformNoOp } from '@naturalcycles/nodejs-lib'
3
+ import { _pipeline, readableToArray, transformNoOp } from '@naturalcycles/nodejs-lib'
3
4
  import { CommonDaoLogLevel, DBQuery } from '..'
4
5
  import { CommonDB } from '../common.db'
5
6
  import { CommonDao } from '../commondao/common.dao'
@@ -252,6 +253,19 @@ export function runCommonDaoTest(
252
253
  quirks,
253
254
  )
254
255
  })
256
+
257
+ test('streamSaveTransform', async () => {
258
+ const items2 = createTestItemsBM(2).map(i => ({ ...i, id: i.id + '_str' }))
259
+ const ids = items2.map(i => i.id)
260
+
261
+ await _pipeline([Readable.from(items2), ...dao.streamSaveTransform()])
262
+
263
+ const items2Loaded = await dao.getByIds(ids)
264
+ expectMatch(items2, items2Loaded, quirks)
265
+
266
+ // cleanup
267
+ await dao.query().filterIn('id', ids).deleteByQuery()
268
+ })
255
269
  }
256
270
 
257
271
  // DELETE BY