marcattacks 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +23 -0
- package/LICENSE +21 -0
- package/README-docker.md +39 -0
- package/README.md +111 -0
- package/TYPESCRIPT.txt +6 -0
- package/data/output.rdf +12425 -0
- package/data/sample.xml +2 -0
- package/demo/demo.jsonata +44 -0
- package/dist/index.js +150 -0
- package/docker-compose.yaml +37 -0
- package/logo.jpg +0 -0
- package/package.json +46 -0
- package/plugin/demo.js +12 -0
- package/src/httpstream.ts +28 -0
- package/src/index.ts +177 -0
- package/src/input/alephseq.ts +83 -0
- package/src/input/json.ts +47 -0
- package/src/input/jsonl.ts +47 -0
- package/src/input/xml.ts +125 -0
- package/src/marcmap.ts +94 -0
- package/src/output/alephseq.ts +48 -0
- package/src/output/json.ts +38 -0
- package/src/output/jsonl.ts +23 -0
- package/src/output/rdf.ts +63 -0
- package/src/output/xml.ts +84 -0
- package/src/plugin-loader.ts +27 -0
- package/src/s3stream.ts +266 -0
- package/src/sftpstream.ts +114 -0
- package/src/slow-writable.ts +165 -0
- package/src/transform/json.ts +36 -0
- package/src/transform/rdf.ts +398 -0
- package/tsconfig.json +46 -0
package/src/s3stream.ts
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import {
|
|
2
|
+
S3Client,
|
|
3
|
+
GetObjectCommand,
|
|
4
|
+
PutObjectCommand,
|
|
5
|
+
UploadPartCommand,
|
|
6
|
+
CreateMultipartUploadCommand,
|
|
7
|
+
CompleteMultipartUploadCommand,
|
|
8
|
+
type S3ClientConfig
|
|
9
|
+
} from "@aws-sdk/client-s3";
|
|
10
|
+
import { Readable, Writable } from "stream";
|
|
11
|
+
import log4js from 'log4js';
|
|
12
|
+
|
|
13
|
+
const logger = log4js.getLogger();
|
|
14
|
+
type S3Config = {
|
|
15
|
+
region: string;
|
|
16
|
+
endpoint: string;
|
|
17
|
+
bucket: string;
|
|
18
|
+
key: string;
|
|
19
|
+
accessKeyId?: string;
|
|
20
|
+
secretAccessKey?: string;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export async function s3ReaderStream(url: URL, options: { range?: string }): Promise<Readable> {
|
|
24
|
+
const config = parseURL(url);
|
|
25
|
+
|
|
26
|
+
logger.debug(`s3 config:`,config);
|
|
27
|
+
|
|
28
|
+
const bucket = config.bucket;
|
|
29
|
+
const key = config.key;
|
|
30
|
+
const range = options.range;
|
|
31
|
+
const s3 = makeClient(config);
|
|
32
|
+
|
|
33
|
+
const res = await s3.send(new GetObjectCommand({
|
|
34
|
+
Bucket: bucket,
|
|
35
|
+
Key: key,
|
|
36
|
+
Range: range,
|
|
37
|
+
}));
|
|
38
|
+
|
|
39
|
+
const body = res.Body;
|
|
40
|
+
|
|
41
|
+
if (!body) {
|
|
42
|
+
throw new Error("S3 GetObject returned an empty body");
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// 1) If SDK returned a Node.js readable stream (typical in Node)
|
|
46
|
+
if (isNodeReadable(body)) {
|
|
47
|
+
return body as Readable;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// 2) If SDK returned a WHATWG ReadableStream (browser-ish or newer runtimes)
|
|
51
|
+
if (isReadableStream(body)) {
|
|
52
|
+
// Node.js v17+ has Readable.fromWeb
|
|
53
|
+
// Fallback: wrap async iterator
|
|
54
|
+
if (typeof (Readable as any).fromWeb === "function") {
|
|
55
|
+
return (Readable as any).fromWeb(body as ReadableStream<Uint8Array>);
|
|
56
|
+
} else {
|
|
57
|
+
// Convert using async iterator produced by the stream
|
|
58
|
+
const reader = (body as ReadableStream<Uint8Array>).getReader();
|
|
59
|
+
const nodeStream = new Readable({
|
|
60
|
+
read() {
|
|
61
|
+
// no-op. We'll push from async loop below
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
(async () => {
|
|
65
|
+
try {
|
|
66
|
+
while (true) {
|
|
67
|
+
const { done, value } = await reader.read();
|
|
68
|
+
if (done) break;
|
|
69
|
+
nodeStream.push(Buffer.from(value));
|
|
70
|
+
}
|
|
71
|
+
nodeStream.push(null);
|
|
72
|
+
} catch (err) {
|
|
73
|
+
nodeStream.destroy(err as Error);
|
|
74
|
+
}
|
|
75
|
+
})();
|
|
76
|
+
return nodeStream;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// 3) If SDK returned a Blob (browsers)
|
|
81
|
+
if (typeof Blob !== "undefined" && body instanceof Blob) {
|
|
82
|
+
const stream = (body as Blob).stream();
|
|
83
|
+
if (typeof (Readable as any).fromWeb === "function") {
|
|
84
|
+
return (Readable as any).fromWeb(stream);
|
|
85
|
+
}
|
|
86
|
+
// fallback same as above
|
|
87
|
+
const reader = stream.getReader();
|
|
88
|
+
const nodeStream = new Readable({
|
|
89
|
+
read() {}
|
|
90
|
+
});
|
|
91
|
+
(async () => {
|
|
92
|
+
try {
|
|
93
|
+
while (true) {
|
|
94
|
+
const { done, value } = await reader.read();
|
|
95
|
+
if (done) break;
|
|
96
|
+
nodeStream.push(Buffer.from(value));
|
|
97
|
+
}
|
|
98
|
+
nodeStream.push(null);
|
|
99
|
+
} catch (err) {
|
|
100
|
+
nodeStream.destroy(err as Error);
|
|
101
|
+
}
|
|
102
|
+
})();
|
|
103
|
+
return nodeStream;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// 4) If it's an async iterable (some runtimes)
|
|
107
|
+
if (isAsyncIterable(body)) {
|
|
108
|
+
return Readable.from(body as AsyncIterable<Uint8Array | string | Buffer>);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Unknown body shape
|
|
112
|
+
throw new Error("Unsupported S3 GetObject body type");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export function s3WriterStream(url: URL, options: { partSize?: number;}) : Promise<Writable> {
|
|
116
|
+
return new Promise<Writable>( (resolve) => {
|
|
117
|
+
const config = parseURL(url);
|
|
118
|
+
|
|
119
|
+
logger.debug(`s3 config:`, config);
|
|
120
|
+
const bucket = config.bucket;
|
|
121
|
+
const key = config.key;
|
|
122
|
+
const s3 = makeClient(config);
|
|
123
|
+
const partSize = options.partSize ?? 5 * 1024 * 1024;
|
|
124
|
+
|
|
125
|
+
let uploadId: string | null = null;
|
|
126
|
+
let parts: Array<{ ETag: string | undefined; PartNumber: number }> = [];
|
|
127
|
+
let buffer = Buffer.alloc(0);
|
|
128
|
+
let partNumber = 1;
|
|
129
|
+
|
|
130
|
+
const writer = new Writable({
|
|
131
|
+
async write(chunk, _encoding, callback) {
|
|
132
|
+
logger.debug("write chunk...");
|
|
133
|
+
try {
|
|
134
|
+
buffer = Buffer.concat([buffer, chunk]);
|
|
135
|
+
|
|
136
|
+
if (buffer.length >= partSize) {
|
|
137
|
+
await flushPart();
|
|
138
|
+
}
|
|
139
|
+
callback();
|
|
140
|
+
} catch (err) {
|
|
141
|
+
callback(err as Error);
|
|
142
|
+
}
|
|
143
|
+
},
|
|
144
|
+
|
|
145
|
+
async final(callback) {
|
|
146
|
+
logger.debug("final...");
|
|
147
|
+
try {
|
|
148
|
+
logger.debug("flushPart...");
|
|
149
|
+
await flushPart(true);
|
|
150
|
+
logger.debug("finishUpload...");
|
|
151
|
+
await finishUpload();
|
|
152
|
+
callback();
|
|
153
|
+
} catch (err) {
|
|
154
|
+
callback(err as Error);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
async function ensureUpload() {
|
|
160
|
+
if (!uploadId) {
|
|
161
|
+
const res = await s3.send(new CreateMultipartUploadCommand({
|
|
162
|
+
Bucket: bucket,
|
|
163
|
+
Key: key
|
|
164
|
+
}));
|
|
165
|
+
uploadId = res.UploadId!;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
async function flushPart(isLast = false) {
|
|
170
|
+
if (buffer.length === 0 && !isLast) return;
|
|
171
|
+
|
|
172
|
+
logger.debug("ensureUpload...");
|
|
173
|
+
await ensureUpload();
|
|
174
|
+
|
|
175
|
+
logger.debug("s3.send...");
|
|
176
|
+
const res = await s3.send(new UploadPartCommand({
|
|
177
|
+
Bucket: bucket,
|
|
178
|
+
Key: key,
|
|
179
|
+
PartNumber: partNumber,
|
|
180
|
+
UploadId: uploadId!,
|
|
181
|
+
Body: buffer
|
|
182
|
+
}));
|
|
183
|
+
|
|
184
|
+
parts.push({ ETag: res.ETag, PartNumber: partNumber });
|
|
185
|
+
buffer = Buffer.alloc(0);
|
|
186
|
+
partNumber++;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
async function finishUpload() {
|
|
190
|
+
if (!uploadId) {
|
|
191
|
+
// No parts written, upload empty object
|
|
192
|
+
await s3.send(new PutObjectCommand({
|
|
193
|
+
Bucket: bucket,
|
|
194
|
+
Key: key,
|
|
195
|
+
Body: Buffer.alloc(0)
|
|
196
|
+
}));
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
await s3.send(new CompleteMultipartUploadCommand({
|
|
201
|
+
Bucket: bucket,
|
|
202
|
+
Key: key,
|
|
203
|
+
UploadId: uploadId!,
|
|
204
|
+
MultipartUpload: { Parts: parts }
|
|
205
|
+
}));
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
resolve(writer);
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function isNodeReadable(x: any): x is Readable {
|
|
213
|
+
return x && typeof x.pipe === "function" && typeof x.read === "function";
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function isReadableStream(x: any): x is ReadableStream {
|
|
217
|
+
return typeof x?.getReader === "function";
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function isAsyncIterable(x: any): x is AsyncIterable<any> {
|
|
221
|
+
return x && typeof x[Symbol.asyncIterator] === "function";
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function makeClient(config: S3Config) : S3Client {
|
|
225
|
+
logger.debug(config);
|
|
226
|
+
const myConfig : S3ClientConfig = {
|
|
227
|
+
endpoint: config.endpoint,
|
|
228
|
+
forcePathStyle: true,
|
|
229
|
+
region: config.region,
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
if (config.accessKeyId && config.secretAccessKey) {
|
|
233
|
+
myConfig.credentials = {
|
|
234
|
+
accessKeyId: config.accessKeyId,
|
|
235
|
+
secretAccessKey: config.secretAccessKey
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
return new S3Client(myConfig);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function parseURL(url: URL) : S3Config {
|
|
242
|
+
const config : S3Config = {
|
|
243
|
+
region: "us-east-1",
|
|
244
|
+
endpoint: "http://localhost:3371",
|
|
245
|
+
bucket: "bbl",
|
|
246
|
+
key: "test.txt"
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
const scheme = url.protocol.startsWith("s3s") ? "https" : "http";
|
|
250
|
+
config.endpoint = `${scheme}://${url.hostname}`;
|
|
251
|
+
if (url.port) {
|
|
252
|
+
config.endpoint += `:${url.port}`;
|
|
253
|
+
}
|
|
254
|
+
config.bucket = url.pathname.split("/")[1] ?? "";
|
|
255
|
+
config.key = url.pathname.split("/").splice(2).join("/");
|
|
256
|
+
|
|
257
|
+
if (url.username) {
|
|
258
|
+
config.accessKeyId = url.username;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
if (url.password) {
|
|
262
|
+
config.secretAccessKey = url.password;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
return config;
|
|
266
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { Client } from "ssh2";
|
|
2
|
+
import { Readable , Writable } from "stream";
|
|
3
|
+
|
|
4
|
+
export interface SftpConfig {
|
|
5
|
+
host: string;
|
|
6
|
+
port?: number;
|
|
7
|
+
username: string;
|
|
8
|
+
password?: string;
|
|
9
|
+
privateKey?: Buffer | string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function sftpReadStream(remotePath: string, config: SftpConfig): Promise<Readable> {
|
|
13
|
+
return new Promise((resolve, reject) => {
|
|
14
|
+
const conn = new Client();
|
|
15
|
+
|
|
16
|
+
conn.on("ready", () => {
|
|
17
|
+
conn.sftp((err, sftp) => {
|
|
18
|
+
if (err) {
|
|
19
|
+
conn.end();
|
|
20
|
+
return reject(err);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const stream = sftp.createReadStream(remotePath);
|
|
24
|
+
|
|
25
|
+
// Close SSH connection when stream ends or errors
|
|
26
|
+
stream.on("close", () => conn.end());
|
|
27
|
+
stream.on("error", (err: any) => {
|
|
28
|
+
conn.end();
|
|
29
|
+
reject(err);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
resolve(stream);
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
conn.on("error", (err) => reject(err));
|
|
37
|
+
conn.connect(config);
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export async function sftpWriteStream(remotePath: string, config: SftpConfig): Promise<Writable> {
|
|
42
|
+
return new Promise((resolve, reject) => {
|
|
43
|
+
const conn = new Client();
|
|
44
|
+
|
|
45
|
+
conn.on("ready", () => {
|
|
46
|
+
conn.sftp((err, sftp) => {
|
|
47
|
+
if (err) {
|
|
48
|
+
conn.end();
|
|
49
|
+
return reject(err);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const stream = sftp.createWriteStream(remotePath, { encoding: "utf-8" });
|
|
53
|
+
|
|
54
|
+
// Close SSH connection when stream ends or errors
|
|
55
|
+
stream.on("close", () => conn.end());
|
|
56
|
+
stream.on("error", (err: any) => {
|
|
57
|
+
conn.end();
|
|
58
|
+
reject(err);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
resolve(stream);
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
conn.on("error", (err) => reject(err));
|
|
66
|
+
conn.connect(config);
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export async function sftpLatestFile(config: SftpConfig, remoteDir: string, extension: string): Promise<string> {
|
|
71
|
+
return new Promise((resolve, reject) => {
|
|
72
|
+
const conn = new Client();
|
|
73
|
+
|
|
74
|
+
conn.on("ready", () => {
|
|
75
|
+
conn.sftp((err, sftp) => {
|
|
76
|
+
if (err) {
|
|
77
|
+
conn.end();
|
|
78
|
+
return reject(err);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
sftp.readdir(remoteDir, (err, list) => {
|
|
82
|
+
if (err) {
|
|
83
|
+
conn.end();
|
|
84
|
+
return reject(err);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (!list || list.length === 0) {
|
|
88
|
+
conn.end();
|
|
89
|
+
return reject(new Error("No files found in directory"));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Filter only .xml files
|
|
93
|
+
const myFiles = list.filter(f => f.filename.toLowerCase().endsWith(extension));
|
|
94
|
+
|
|
95
|
+
if (myFiles.length === 0) {
|
|
96
|
+
conn.end();
|
|
97
|
+
return reject(new Error(`No ${extension} files found in directory`));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const latest = myFiles.reduce((prev, curr) =>
|
|
101
|
+
(prev.attrs.mtime > curr.attrs.mtime) ? prev : curr
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
const latestPath = `${remoteDir}/${latest.filename}`;
|
|
105
|
+
conn.end();
|
|
106
|
+
resolve(latestPath);
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
conn.on("error", (err) => reject(err));
|
|
112
|
+
conn.connect(config);
|
|
113
|
+
});
|
|
114
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
// slow-writable.ts
|
|
2
|
+
import { Writable, type WritableOptions } from "stream";
|
|
3
|
+
import log4js from 'log4js';
|
|
4
|
+
|
|
5
|
+
const logger = log4js.getLogger();
|
|
6
|
+
|
|
7
|
+
export interface SlowWritableOptions extends WritableOptions {
|
|
8
|
+
/**
|
|
9
|
+
* Delay per chunk (ms). Default 200 ms.
|
|
10
|
+
*/
|
|
11
|
+
delayMs?: number;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Maximum number of concurrent "in-flight" asynchronous writes.
|
|
15
|
+
* While more writes may be queued, only up to this number will be processed in parallel.
|
|
16
|
+
* Default 1.
|
|
17
|
+
*/
|
|
18
|
+
maxConcurrency?: number;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* If set to a positive integer n, every nth chunk will produce an error (for testing).
|
|
22
|
+
* Default 0 (never error).
|
|
23
|
+
*/
|
|
24
|
+
simulateErrorEveryN?: number;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* SlowWritable: a Writable stream that processes writes asynchronously
|
|
29
|
+
* with an artificial delay and optional concurrency control.
|
|
30
|
+
*/
|
|
31
|
+
export class SlowWritable extends Writable {
|
|
32
|
+
private delayMs: number;
|
|
33
|
+
private maxConcurrency: number;
|
|
34
|
+
private simulateErrorEveryN: number;
|
|
35
|
+
private inFlight = 0;
|
|
36
|
+
private queue: Array<{
|
|
37
|
+
chunk: any;
|
|
38
|
+
encoding: BufferEncoding;
|
|
39
|
+
callback: (err?: Error | null) => void;
|
|
40
|
+
seq: number;
|
|
41
|
+
}> = [];
|
|
42
|
+
private seqCounter = 0;
|
|
43
|
+
private destroyedFlag = false;
|
|
44
|
+
|
|
45
|
+
constructor(opts: SlowWritableOptions = {}) {
|
|
46
|
+
// Keep objectMode/encoding behavior from user but default to object mode false
|
|
47
|
+
const { delayMs = 200, maxConcurrency = 1, simulateErrorEveryN = 0, ...writableOpts } = opts;
|
|
48
|
+
super(writableOpts);
|
|
49
|
+
this.delayMs = delayMs;
|
|
50
|
+
this.maxConcurrency = Math.max(1, Math.floor(maxConcurrency));
|
|
51
|
+
this.simulateErrorEveryN = Math.max(0, Math.floor(simulateErrorEveryN));
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Node will call _write for each chunk
|
|
55
|
+
_write(chunk: any, encoding: BufferEncoding, callback: (err?: Error | null) => void): void {
|
|
56
|
+
if (this.destroyedFlag) {
|
|
57
|
+
callback(new Error("Stream is destroyed"));
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const seq = ++this.seqCounter;
|
|
62
|
+
this.queue.push({ chunk, encoding, callback, seq });
|
|
63
|
+
this.processQueue();
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Process queued writes honoring maxConcurrency
|
|
67
|
+
private processQueue(): void {
|
|
68
|
+
// If nothing to do or already at concurrency limit, return
|
|
69
|
+
while (this.inFlight < this.maxConcurrency && this.queue.length > 0 && !this.destroyedFlag) {
|
|
70
|
+
const item = this.queue.shift()!;
|
|
71
|
+
this.inFlight++;
|
|
72
|
+
this.performAsyncWrite(item)
|
|
73
|
+
.then(() => {
|
|
74
|
+
this.inFlight--;
|
|
75
|
+
// After finishing one, try to process more
|
|
76
|
+
// Use nextTick to avoid deep recursion
|
|
77
|
+
process.nextTick(() => this.processQueue());
|
|
78
|
+
})
|
|
79
|
+
.catch((err) => {
|
|
80
|
+
this.inFlight--;
|
|
81
|
+
// propagate error via callback; stream will emit 'error' as well
|
|
82
|
+
item.callback(err);
|
|
83
|
+
this.emit("error", err);
|
|
84
|
+
// continue processing queue
|
|
85
|
+
process.nextTick(() => this.processQueue());
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Simulate an asynchronous write that takes `delayMs` ms
|
|
91
|
+
private async performAsyncWrite(item: {
|
|
92
|
+
chunk: any;
|
|
93
|
+
encoding: BufferEncoding;
|
|
94
|
+
callback: (err?: Error | null) => void;
|
|
95
|
+
seq: number;
|
|
96
|
+
}): Promise<void> {
|
|
97
|
+
return new Promise((resolve, reject) => {
|
|
98
|
+
const maybeError =
|
|
99
|
+
this.simulateErrorEveryN > 0 && item.seq % this.simulateErrorEveryN === 0;
|
|
100
|
+
|
|
101
|
+
const timer = setTimeout(() => {
|
|
102
|
+
// simulate processing chunk here. For demonstration we just log.
|
|
103
|
+
// In real use, replace with actual async I/O.
|
|
104
|
+
// eslint-disable-next-line no-console
|
|
105
|
+
logger.info(`SlowWritable processed seq=${item.seq}`);
|
|
106
|
+
|
|
107
|
+
if (maybeError) {
|
|
108
|
+
const err = new Error(`Simulated error at seq ${item.seq}`);
|
|
109
|
+
item.callback(err);
|
|
110
|
+
reject(err);
|
|
111
|
+
} else {
|
|
112
|
+
item.callback();
|
|
113
|
+
resolve();
|
|
114
|
+
}
|
|
115
|
+
}, this.delayMs);
|
|
116
|
+
|
|
117
|
+
// If stream was destroyed meantime, cancel timer and callback with error
|
|
118
|
+
const onDestroy = () => {
|
|
119
|
+
clearTimeout(timer);
|
|
120
|
+
const err = new Error("Stream destroyed while writing");
|
|
121
|
+
try {
|
|
122
|
+
item.callback(err);
|
|
123
|
+
} catch (_) {
|
|
124
|
+
// ignore
|
|
125
|
+
}
|
|
126
|
+
reject(err);
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
// Ensure we don't leak listeners. If destroyedFlag becomes true quickly, call onDestroy.
|
|
130
|
+
if (this.destroyedFlag) {
|
|
131
|
+
onDestroy();
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
_final(callback: (err?: Error | null) => void): void {
|
|
137
|
+
// Wait until queue emptied and inFlight is zero
|
|
138
|
+
const check = () => {
|
|
139
|
+
if (this.destroyedFlag) {
|
|
140
|
+
callback(new Error("Stream destroyed before finalizing"));
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
if (this.queue.length === 0 && this.inFlight === 0) {
|
|
144
|
+
callback();
|
|
145
|
+
} else {
|
|
146
|
+
setTimeout(check, 10);
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
check();
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
_destroy(err: Error | null, callback: (error?: Error | null) => void): void {
|
|
153
|
+
this.destroyedFlag = true;
|
|
154
|
+
// flush callbacks in queue with error
|
|
155
|
+
while (this.queue.length > 0) {
|
|
156
|
+
const item = this.queue.shift()!;
|
|
157
|
+
try {
|
|
158
|
+
item.callback(err ?? new Error("Stream destroyed"));
|
|
159
|
+
} catch (_) {
|
|
160
|
+
// ignore
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
callback(err);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { Transform } from "stream";
|
|
2
|
+
import jsonata from "jsonata";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import { marcmap } from '../marcmap.js';
|
|
5
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
6
|
+
|
|
7
|
+
export async function transform(q: string) : Promise<Transform> {
|
|
8
|
+
let query = q;
|
|
9
|
+
|
|
10
|
+
if (q && fs.existsSync(q)) {
|
|
11
|
+
query = fs.readFileSync(q,{ encoding: 'utf-8'});
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
if (!q) {
|
|
15
|
+
query = '$';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return new Transform({
|
|
19
|
+
objectMode: true,
|
|
20
|
+
async transform(data: any, _encoding, callback) {
|
|
21
|
+
const expression = jsonata(query);
|
|
22
|
+
expression.registerFunction('marcmap', (code) => {
|
|
23
|
+
return marcmap(data['record'],code,{});
|
|
24
|
+
});
|
|
25
|
+
expression.registerFunction('genid', () => {
|
|
26
|
+
return genid();
|
|
27
|
+
});
|
|
28
|
+
data = await expression.evaluate(data);
|
|
29
|
+
callback(null,data);
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function genid() : string {
|
|
35
|
+
return `genid:${uuidv4()}`;
|
|
36
|
+
}
|