@miso.ai/server-commons 0.6.5-beta.0 → 0.6.5-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/file.js +22 -1
- package/src/index.js +1 -0
- package/src/sink/bps.js +1 -1
- package/src/store.js +129 -0
- package/src/stream/buffered-read.js +4 -2
- package/src/stream/buffered-write-state.js +8 -0
- package/src/stream/buffered-write.js +1 -0
- package/src/yargs.js +1 -1
package/package.json
CHANGED
package/src/file.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { access } from 'fs/promises';
|
|
2
|
-
import { accessSync, constants } from 'fs';
|
|
2
|
+
import { accessSync, constants, createReadStream } from 'fs';
|
|
3
|
+
import { createInterface } from 'readline';
|
|
3
4
|
|
|
4
5
|
export async function fileExists(file, mode = constants.F_OK) {
|
|
5
6
|
try {
|
|
@@ -18,3 +19,23 @@ export function fileExistsSync(file, mode = constants.F_OK) {
|
|
|
18
19
|
return false;
|
|
19
20
|
}
|
|
20
21
|
}
|
|
22
|
+
|
|
23
|
+
export async function readFileAsLines(file) {
|
|
24
|
+
const fileStream = createReadStream(file, { encoding: 'utf8' });
|
|
25
|
+
const rl = createInterface({
|
|
26
|
+
input: fileStream,
|
|
27
|
+
crlfDelay: Infinity
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const lines = [];
|
|
31
|
+
for await (const line of rl) {
|
|
32
|
+
const trimmed = line.trim();
|
|
33
|
+
if (trimmed) {
|
|
34
|
+
lines.push(trimmed);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
rl.close();
|
|
39
|
+
|
|
40
|
+
return lines;
|
|
41
|
+
}
|
package/src/index.js
CHANGED
|
@@ -14,3 +14,4 @@ export * as yargs from './yargs.js';
|
|
|
14
14
|
export { default as Resolution } from './resolution.js';
|
|
15
15
|
export { default as TaskQueue } from './task-queue.js';
|
|
16
16
|
export { default as RateLimitingQueue } from './rate-limiting-queue.js';
|
|
17
|
+
export { default as HashStore } from './store.js';
|
package/src/sink/bps.js
CHANGED
package/src/store.js
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { resolve } from 'path';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import { Transform } from 'stream';
|
|
4
|
+
import { readFileAsLines } from './file.js';
|
|
5
|
+
|
|
6
|
+
const DEFAULT_FLUSH_THRESHOLD = 100;
|
|
7
|
+
|
|
8
|
+
export default class HashStore {
|
|
9
|
+
|
|
10
|
+
constructor({ file, hashFn, flushThreshold = DEFAULT_FLUSH_THRESHOLD } = {}) {
|
|
11
|
+
if (!file) {
|
|
12
|
+
throw new Error('File path is required');
|
|
13
|
+
}
|
|
14
|
+
if (!hashFn) {
|
|
15
|
+
throw new Error('Hash function is required');
|
|
16
|
+
}
|
|
17
|
+
this._file = file;
|
|
18
|
+
this._hashFn = hashFn;
|
|
19
|
+
this._flushThreshold = flushThreshold;
|
|
20
|
+
this._hashes = new Set();
|
|
21
|
+
this._pending = [];
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async purge() {
|
|
25
|
+
// peek data length
|
|
26
|
+
const length = (await this._read()).length;
|
|
27
|
+
|
|
28
|
+
this._hashes = new Set();
|
|
29
|
+
// delete file
|
|
30
|
+
try {
|
|
31
|
+
await fs.unlink(this._file);
|
|
32
|
+
} catch (err) {
|
|
33
|
+
if (err.code !== 'ENOENT') {
|
|
34
|
+
throw err;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return length;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async load() {
|
|
42
|
+
this._hashes = new Set(await this._read());
|
|
43
|
+
this._pending = [];
|
|
44
|
+
return this;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
get() {
|
|
48
|
+
return this._hashes;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
contains(item) {
|
|
52
|
+
return this._hashes.has(this._hashFn(item));
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async add(...items) {
|
|
56
|
+
for (const item of items) {
|
|
57
|
+
const hash = this._hashFn(item);
|
|
58
|
+
if (!this._hashes.has(hash)) {
|
|
59
|
+
this._hashes.add(hash);
|
|
60
|
+
this._pending.push(hash);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
if (this._pending.length >= this._flushThreshold) {
|
|
64
|
+
await this.flush();
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async flush() {
|
|
69
|
+
if (this._pending.length === 0) {
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
const pending = this._pending;
|
|
73
|
+
this._pending = [];
|
|
74
|
+
await this._mkdir();
|
|
75
|
+
await fs.appendFile(this._file, pending.join('\n') + '\n');
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
exclusionStream() {
|
|
79
|
+
return new HashStoreFilterTransform(this, { mode: 'exclude' });
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
dedupeStream() {
|
|
83
|
+
return new HashStoreFilterTransform(this, { mode: 'dedupe' });
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async _mkdir() {
|
|
87
|
+
const dir = resolve(this._file, '..');
|
|
88
|
+
await fs.mkdir(dir, { recursive: true });
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async _read() {
|
|
92
|
+
try {
|
|
93
|
+
return await readFileAsLines(this._file);
|
|
94
|
+
} catch (err) {
|
|
95
|
+
if (err.code !== 'ENOENT') {
|
|
96
|
+
throw err;
|
|
97
|
+
}
|
|
98
|
+
return [];
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
class HashStoreFilterTransform extends Transform {
|
|
105
|
+
|
|
106
|
+
constructor(store, { mode } = {}) {
|
|
107
|
+
super({ objectMode: true });
|
|
108
|
+
this._store = store;
|
|
109
|
+
switch (mode) {
|
|
110
|
+
case 'exclude':
|
|
111
|
+
case 'dedupe':
|
|
112
|
+
break;
|
|
113
|
+
default:
|
|
114
|
+
throw new Error(`Unrecognized mode: ${mode}`);
|
|
115
|
+
}
|
|
116
|
+
this._mode = mode;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
_transform(item, _, next) {
|
|
120
|
+
if (!this._store.contains(item)) {
|
|
121
|
+
this.push(item);
|
|
122
|
+
if (this._mode === 'dedupe') {
|
|
123
|
+
this._store.add(item);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
next();
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
}
|
|
@@ -85,6 +85,7 @@ export default class BufferedReadStream extends Readable {
|
|
|
85
85
|
const request = this._state.request(this._source.request());
|
|
86
86
|
|
|
87
87
|
this._debug(`[BufferedReadStream] Load request: ${request}`);
|
|
88
|
+
// TODO: racing here! we need to somehow keep the order of loads
|
|
88
89
|
const { data, ...info } = await this._source.get(request);
|
|
89
90
|
const response = new Response(request, info);
|
|
90
91
|
this._debug(`[BufferedReadStream] Load response: ${JSON.stringify(response)} => data = ${data && data.length}`);
|
|
@@ -192,12 +193,13 @@ class Strategy {
|
|
|
192
193
|
|
|
193
194
|
constructor({
|
|
194
195
|
highWatermark = 1000,
|
|
196
|
+
maxPendingLoads = 100,
|
|
195
197
|
eagerLoad = false,
|
|
196
198
|
initialize,
|
|
197
199
|
shallLoad,
|
|
198
200
|
terminate,
|
|
199
201
|
} = {}) {
|
|
200
|
-
this.options = Object.freeze({ highWatermark, eagerLoad });
|
|
202
|
+
this.options = Object.freeze({ highWatermark, maxPendingLoads, eagerLoad });
|
|
201
203
|
// overwrite methods
|
|
202
204
|
Object.assign(this, trimObj({ initialize, shallLoad, terminate }));
|
|
203
205
|
}
|
|
@@ -210,7 +212,7 @@ class Strategy {
|
|
|
210
212
|
|
|
211
213
|
shallLoad(state) {
|
|
212
214
|
// TODO: we can have a slower start
|
|
213
|
-
return state.watermark < this.options.highWatermark;
|
|
215
|
+
return state.pendingLoads < this.options.maxPendingLoads && state.watermark < this.options.highWatermark;
|
|
214
216
|
}
|
|
215
217
|
|
|
216
218
|
terminate(record, state) {
|
|
@@ -120,6 +120,14 @@ export default class State {
|
|
|
120
120
|
category.records += request.records;
|
|
121
121
|
category.bytes += request.bytes;
|
|
122
122
|
|
|
123
|
+
if (response.errors && response.recovered && response.recovered.records > 0) {
|
|
124
|
+
this._failed.records -= response.recovered.records;
|
|
125
|
+
this._failed.bytes -= response.recovered.bytes; // not so accurate, but close enough
|
|
126
|
+
this._successful.requests++;
|
|
127
|
+
this._successful.records += response.recovered.records;
|
|
128
|
+
this._successful.bytes += response.recovered.bytes;
|
|
129
|
+
}
|
|
130
|
+
|
|
123
131
|
this._time.addWrite(response.timestamp - request.timestamp);
|
|
124
132
|
|
|
125
133
|
this._resolutions.get(request).resolve();
|
|
@@ -183,6 +183,7 @@ export default class BufferedWriteStream extends Transform {
|
|
|
183
183
|
result: failed ? 'failed' : 'successful',
|
|
184
184
|
index: request.index,
|
|
185
185
|
records: request.records,
|
|
186
|
+
recovered: response.recovered || { records: 0, bytes: 0 },
|
|
186
187
|
bytes: request.bytes,
|
|
187
188
|
time: response.timestamp - request.timestamp,
|
|
188
189
|
});
|
package/src/yargs.js
CHANGED
|
@@ -73,6 +73,6 @@ export function handleFail(msg, err) {
|
|
|
73
73
|
|
|
74
74
|
export function coerceToArray(arg) {
|
|
75
75
|
return Array.isArray(arg) ? arg :
|
|
76
|
-
typeof arg === 'string' ? arg.split(',') :
|
|
76
|
+
typeof arg === 'string' ? arg.split(',').map(s => s.trim()) :
|
|
77
77
|
arg === undefined || arg === null ? [] : [arg];
|
|
78
78
|
}
|