@miso.ai/server-commons 0.6.5-beta.0 → 0.6.5-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -21,5 +21,5 @@
21
21
  "uuid": "^9.0.0",
22
22
  "yargs": "^17.5.1"
23
23
  },
24
- "version": "0.6.5-beta.0"
24
+ "version": "0.6.5-beta.10"
25
25
  }
package/src/file.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { access } from 'fs/promises';
2
- import { accessSync, constants } from 'fs';
2
+ import { accessSync, constants, createReadStream } from 'fs';
3
+ import { createInterface } from 'readline';
3
4
 
4
5
  export async function fileExists(file, mode = constants.F_OK) {
5
6
  try {
@@ -18,3 +19,23 @@ export function fileExistsSync(file, mode = constants.F_OK) {
18
19
  return false;
19
20
  }
20
21
  }
22
+
23
+ export async function readFileAsLines(file) {
24
+ const fileStream = createReadStream(file, { encoding: 'utf8' });
25
+ const rl = createInterface({
26
+ input: fileStream,
27
+ crlfDelay: Infinity
28
+ });
29
+
30
+ const lines = [];
31
+ for await (const line of rl) {
32
+ const trimmed = line.trim();
33
+ if (trimmed) {
34
+ lines.push(trimmed);
35
+ }
36
+ }
37
+
38
+ rl.close();
39
+
40
+ return lines;
41
+ }
package/src/index.js CHANGED
@@ -14,3 +14,4 @@ export * as yargs from './yargs.js';
14
14
  export { default as Resolution } from './resolution.js';
15
15
  export { default as TaskQueue } from './task-queue.js';
16
16
  export { default as RateLimitingQueue } from './rate-limiting-queue.js';
17
+ export { default as HashStore } from './store.js';
package/src/sink/bps.js CHANGED
@@ -12,7 +12,7 @@ export default class BpsSink {
12
12
 
13
13
  _normalizeOptions({
14
14
  recordsPerSecord = 100000,
15
- bytesPerSecond = 4 * 1024 * 1024,
15
+ bytesPerSecond = 100 * 1024 * 1024,
16
16
  ...options
17
17
  } = {}) {
18
18
  return {
package/src/store.js ADDED
@@ -0,0 +1,129 @@
1
+ import { resolve } from 'path';
2
+ import fs from 'fs/promises';
3
+ import { Transform } from 'stream';
4
+ import { readFileAsLines } from './file.js';
5
+
6
+ const DEFAULT_FLUSH_THRESHOLD = 100;
7
+
8
+ export default class HashStore {
9
+
10
+ constructor({ file, hashFn, flushThreshold = DEFAULT_FLUSH_THRESHOLD } = {}) {
11
+ if (!file) {
12
+ throw new Error('File path is required');
13
+ }
14
+ if (!hashFn) {
15
+ throw new Error('Hash function is required');
16
+ }
17
+ this._file = file;
18
+ this._hashFn = hashFn;
19
+ this._flushThreshold = flushThreshold;
20
+ this._hashes = new Set();
21
+ this._pending = [];
22
+ }
23
+
24
+ async purge() {
25
+ // peek data length
26
+ const length = (await this._read()).length;
27
+
28
+ this._hashes = new Set();
29
+ // delete file
30
+ try {
31
+ await fs.unlink(this._file);
32
+ } catch (err) {
33
+ if (err.code !== 'ENOENT') {
34
+ throw err;
35
+ }
36
+ }
37
+
38
+ return length;
39
+ }
40
+
41
+ async load() {
42
+ this._hashes = new Set(await this._read());
43
+ this._pending = [];
44
+ return this;
45
+ }
46
+
47
+ get() {
48
+ return this._hashes;
49
+ }
50
+
51
+ contains(item) {
52
+ return this._hashes.has(this._hashFn(item));
53
+ }
54
+
55
+ async add(...items) {
56
+ for (const item of items) {
57
+ const hash = this._hashFn(item);
58
+ if (!this._hashes.has(hash)) {
59
+ this._hashes.add(hash);
60
+ this._pending.push(hash);
61
+ }
62
+ }
63
+ if (this._pending.length >= this._flushThreshold) {
64
+ await this.flush();
65
+ }
66
+ }
67
+
68
+ async flush() {
69
+ if (this._pending.length === 0) {
70
+ return;
71
+ }
72
+ const pending = this._pending;
73
+ this._pending = [];
74
+ await this._mkdir();
75
+ await fs.appendFile(this._file, pending.join('\n') + '\n');
76
+ }
77
+
78
+ exclusionStream() {
79
+ return new HashStoreFilterTransform(this, { mode: 'exclude' });
80
+ }
81
+
82
+ dedupeStream() {
83
+ return new HashStoreFilterTransform(this, { mode: 'dedupe' });
84
+ }
85
+
86
+ async _mkdir() {
87
+ const dir = resolve(this._file, '..');
88
+ await fs.mkdir(dir, { recursive: true });
89
+ }
90
+
91
+ async _read() {
92
+ try {
93
+ return await readFileAsLines(this._file);
94
+ } catch (err) {
95
+ if (err.code !== 'ENOENT') {
96
+ throw err;
97
+ }
98
+ return [];
99
+ }
100
+ }
101
+
102
+ }
103
+
104
+ class HashStoreFilterTransform extends Transform {
105
+
106
+ constructor(store, { mode } = {}) {
107
+ super({ objectMode: true });
108
+ this._store = store;
109
+ switch (mode) {
110
+ case 'exclude':
111
+ case 'dedupe':
112
+ break;
113
+ default:
114
+ throw new Error(`Unrecognized mode: ${mode}`);
115
+ }
116
+ this._mode = mode;
117
+ }
118
+
119
+ _transform(item, _, next) {
120
+ if (!this._store.contains(item)) {
121
+ this.push(item);
122
+ if (this._mode === 'dedupe') {
123
+ this._store.add(item);
124
+ }
125
+ }
126
+ next();
127
+ }
128
+
129
+ }
@@ -85,6 +85,7 @@ export default class BufferedReadStream extends Readable {
85
85
  const request = this._state.request(this._source.request());
86
86
 
87
87
  this._debug(`[BufferedReadStream] Load request: ${request}`);
88
+ // TODO: racing here! we need to somehow keep the order of loads
88
89
  const { data, ...info } = await this._source.get(request);
89
90
  const response = new Response(request, info);
90
91
  this._debug(`[BufferedReadStream] Load response: ${JSON.stringify(response)} => data = ${data && data.length}`);
@@ -192,12 +193,13 @@ class Strategy {
192
193
 
193
194
  constructor({
194
195
  highWatermark = 1000,
196
+ maxPendingLoads = 100,
195
197
  eagerLoad = false,
196
198
  initialize,
197
199
  shallLoad,
198
200
  terminate,
199
201
  } = {}) {
200
- this.options = Object.freeze({ highWatermark, eagerLoad });
202
+ this.options = Object.freeze({ highWatermark, maxPendingLoads, eagerLoad });
201
203
  // overwrite methods
202
204
  Object.assign(this, trimObj({ initialize, shallLoad, terminate }));
203
205
  }
@@ -210,7 +212,7 @@ class Strategy {
210
212
 
211
213
  shallLoad(state) {
212
214
  // TODO: we can have a slower start
213
- return state.watermark < this.options.highWatermark;
215
+ return state.pendingLoads < this.options.maxPendingLoads && state.watermark < this.options.highWatermark;
214
216
  }
215
217
 
216
218
  terminate(record, state) {
@@ -120,6 +120,14 @@ export default class State {
120
120
  category.records += request.records;
121
121
  category.bytes += request.bytes;
122
122
 
123
+ if (response.errors && response.recovered && response.recovered.records > 0) {
124
+ this._failed.records -= response.recovered.records;
125
+ this._failed.bytes -= response.recovered.bytes; // not so accurate, but close enough
126
+ this._successful.requests++;
127
+ this._successful.records += response.recovered.records;
128
+ this._successful.bytes += response.recovered.bytes;
129
+ }
130
+
123
131
  this._time.addWrite(response.timestamp - request.timestamp);
124
132
 
125
133
  this._resolutions.get(request).resolve();
@@ -183,6 +183,7 @@ export default class BufferedWriteStream extends Transform {
183
183
  result: failed ? 'failed' : 'successful',
184
184
  index: request.index,
185
185
  records: request.records,
186
+ recovered: response.recovered || { records: 0, bytes: 0 },
186
187
  bytes: request.bytes,
187
188
  time: response.timestamp - request.timestamp,
188
189
  });
package/src/yargs.js CHANGED
@@ -73,6 +73,6 @@ export function handleFail(msg, err) {
73
73
 
74
74
  export function coerceToArray(arg) {
75
75
  return Array.isArray(arg) ? arg :
76
- typeof arg === 'string' ? arg.split(',') :
76
+ typeof arg === 'string' ? arg.split(',').map(s => s.trim()) :
77
77
  arg === undefined || arg === null ? [] : [arg];
78
78
  }