marcattacks 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/dist/httpstream.d.ts +3 -0
  2. package/dist/httpstream.d.ts.map +1 -0
  3. package/{src/httpstream.ts → dist/httpstream.js} +3 -7
  4. package/dist/httpstream.js.map +1 -0
  5. package/dist/index.d.ts +3 -0
  6. package/dist/index.d.ts.map +1 -0
  7. package/dist/index.js +5 -0
  8. package/dist/index.js.map +1 -0
  9. package/dist/input/alephseq.d.ts +3 -0
  10. package/dist/input/alephseq.d.ts.map +1 -0
  11. package/{src/input/alephseq.ts → dist/input/alephseq.js} +20 -34
  12. package/dist/input/alephseq.js.map +1 -0
  13. package/dist/input/json.d.ts +3 -0
  14. package/dist/input/json.d.ts.map +1 -0
  15. package/{src/input/json.ts → dist/input/json.js} +8 -18
  16. package/dist/input/json.js.map +1 -0
  17. package/dist/input/jsonl.d.ts +3 -0
  18. package/dist/input/jsonl.d.ts.map +1 -0
  19. package/{src/input/jsonl.ts → dist/input/jsonl.js} +11 -21
  20. package/dist/input/jsonl.js.map +1 -0
  21. package/dist/input/xml.d.ts +3 -0
  22. package/dist/input/xml.d.ts.map +1 -0
  23. package/{src/input/xml.ts → dist/input/xml.js} +29 -50
  24. package/dist/input/xml.js.map +1 -0
  25. package/dist/marcmap.d.ts +31 -0
  26. package/dist/marcmap.d.ts.map +1 -0
  27. package/{src/marcmap.ts → dist/marcmap.js} +25 -38
  28. package/dist/marcmap.js.map +1 -0
  29. package/dist/output/alephseq.d.ts +3 -0
  30. package/dist/output/alephseq.d.ts.map +1 -0
  31. package/{src/output/alephseq.ts → dist/output/alephseq.js} +17 -25
  32. package/dist/output/alephseq.js.map +1 -0
  33. package/dist/output/json.d.ts +3 -0
  34. package/dist/output/json.d.ts.map +1 -0
  35. package/{src/output/json.ts → dist/output/json.js} +4 -13
  36. package/dist/output/json.js.map +1 -0
  37. package/dist/output/jsonl.d.ts +3 -0
  38. package/dist/output/jsonl.d.ts.map +1 -0
  39. package/{src/output/jsonl.ts → dist/output/jsonl.js} +5 -8
  40. package/dist/output/jsonl.js.map +1 -0
  41. package/dist/output/rdf.d.ts +3 -0
  42. package/dist/output/rdf.d.ts.map +1 -0
  43. package/dist/output/rdf.js +44 -0
  44. package/dist/output/rdf.js.map +1 -0
  45. package/dist/output/xml.d.ts +6 -0
  46. package/dist/output/xml.d.ts.map +1 -0
  47. package/{src/output/xml.ts → dist/output/xml.js} +18 -35
  48. package/dist/output/xml.js.map +1 -0
  49. package/dist/plugin-loader.d.ts +2 -0
  50. package/dist/plugin-loader.d.ts.map +1 -0
  51. package/dist/plugin-loader.js +24 -0
  52. package/dist/plugin-loader.js.map +1 -0
  53. package/dist/s3stream.d.ts +8 -0
  54. package/dist/s3stream.d.ts.map +1 -0
  55. package/{src/s3stream.ts → dist/s3stream.js} +72 -115
  56. package/dist/s3stream.js.map +1 -0
  57. package/dist/sftpstream.d.ts +12 -0
  58. package/dist/sftpstream.d.ts.map +1 -0
  59. package/{src/sftpstream.ts → dist/sftpstream.js} +9 -39
  60. package/dist/sftpstream.js.map +1 -0
  61. package/dist/slow-writable.d.ts +38 -0
  62. package/dist/slow-writable.d.ts.map +1 -0
  63. package/dist/slow-writable.js +126 -0
  64. package/dist/slow-writable.js.map +1 -0
  65. package/dist/transform/json.d.ts +3 -0
  66. package/dist/transform/json.d.ts.map +1 -0
  67. package/{src/transform/json.ts → dist/transform/json.js} +8 -12
  68. package/dist/transform/json.js.map +1 -0
  69. package/dist/transform/rdf.d.ts +3 -0
  70. package/dist/transform/rdf.d.ts.map +1 -0
  71. package/{src/transform/rdf.ts → dist/transform/rdf.js} +82 -110
  72. package/dist/transform/rdf.js.map +1 -0
  73. package/package.json +7 -2
  74. package/Dockerfile +0 -23
  75. package/README-docker.md +0 -39
  76. package/TYPESCRIPT.txt +0 -6
  77. package/data/output.rdf +0 -12425
  78. package/data/sample.xml +0 -2
  79. package/demo/demo.jsonata +0 -44
  80. package/docker-compose.yaml +0 -37
  81. package/logo.jpg +0 -0
  82. package/plugin/demo.js +0 -12
  83. package/src/index.ts +0 -177
  84. package/src/output/rdf.ts +0 -63
  85. package/src/plugin-loader.ts +0 -27
  86. package/src/slow-writable.ts +0 -165
  87. package/tsconfig.json +0 -46
package/demo/demo.jsonata DELETED
@@ -1,44 +0,0 @@
1
- (
2
- /******start helper functions****/
3
- $strip := function ($str) {
4
- $replace($str,/\s*[\,.:\/]$/,"")
5
- };
6
-
7
- $marcmap0 := function ($path) {
8
- $marcmap($path)[0]
9
- };
10
-
11
- $typeLookupTable := {
12
- "_": "schema:CreativeWork", /* default */
13
- "book": "schema:Book",
14
- "catalog": "schema:Book",
15
- "correspondence": "schema:CreativeWork",
16
- "dissertation": "schema:Thesis",
17
- "ephemera": "schema:CreativeWork",
18
- "image": "schema:VisualWork",
19
- "manuscript": "schema:Book",
20
- "map": "schema:Map",
21
- "master": "schema:Thesis",
22
- "periodical": "schema:Periodical",
23
- "phd": "schema:Thesis"
24
- };
25
-
26
- $typeLookup := function ($val) {(
27
- $v := $lookup($typeLookupTable,$val);
28
- $v ? $v : $typeLookupTable._
29
- )};
30
- /******end helper functions******/
31
-
32
- {
33
- "@id": "my:" & $marcmap0("001"),
34
- "@type": $typeLookup($marcmap0("920a")),
35
- "name": $marcmap("245ab") ~> $join(" ") ~> $strip() ,
36
- "subject": $marcmap("500a") ~> $map(function ($val) {
37
- {
38
- "@id": $genid(),
39
- "@type": "schema:Subject",
40
- "name": $val
41
- }
42
- })
43
- }
44
- )
@@ -1,37 +0,0 @@
1
- services:
2
- minio:
3
- image: minio/minio:RELEASE.2025-04-22T22-12-26Z-cpuv1
4
- command: server /data -address ":3371" --console-address ":3372"
5
- environment:
6
- MINIO_ROOT_USER: minioadmin
7
- MINIO_ROOT_PASSWORD: minioadmin
8
- volumes:
9
- - minio-data:/data
10
- ports:
11
- - 3371:3371
12
- - 3372:3372
13
- healthcheck:
14
- test: ["CMD", "curl", "-f", "http://localhost:3371/minio/health/live"]
15
- interval: 10s
16
- timeout: 5s
17
- retries: 5
18
- start_period: 10s
19
-
20
- mc:
21
- image: minio/mc
22
- depends_on:
23
- - minio
24
- # minio:
25
- # condition: service_healthy
26
- restart: on-failure
27
- entrypoint:
28
- - sh
29
- - -c
30
- - |
31
- sleep 2;
32
- mc alias set docker http://minio:3371 minioadmin minioadmin;
33
- mc mb docker/bbl --ignore-existing;
34
-
35
- volumes:
36
- minio-data:
37
- driver: local
package/logo.jpg DELETED
Binary file
package/plugin/demo.js DELETED
@@ -1,12 +0,0 @@
1
- import { Transform } from 'stream';
2
-
3
- export function transform(opts) {
4
- return new Transform({
5
- objectMode: true,
6
- transform(data, encoding, callback) {
7
- data['id'] = "brol";
8
- data['record'] = [];
9
- callback(null,data);
10
- }
11
- });
12
- }
package/src/index.ts DELETED
@@ -1,177 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- import log4js from 'log4js';
4
- import { program } from 'commander';
5
- import { loadPlugin } from './plugin-loader.js';
6
- import { sftpReadStream , sftpWriteStream , sftpLatestFile , type SftpConfig } from './sftpstream.js';
7
- import { httpReadStream } from './httpstream.js';
8
- import { Readable } from 'stream';
9
- import { pathToFileURL } from "node:url";
10
- import type { Transform, Writable } from 'node:stream';
11
- import { SlowWritable } from './slow-writable.js';
12
- import path from "node:path";
13
- import fs from 'fs';
14
- import { s3ReaderStream, s3WriterStream } from './s3stream.js';
15
-
16
- log4js.configure({
17
- appenders: {
18
- err: {
19
- type: "stderr" ,
20
- layout: {
21
- type: "pattern",
22
- pattern: "%[%d %p %f{1} %m%]"
23
- }
24
- }
25
- },
26
- categories: {
27
- default: { appenders: ["err"], level: "off" , enableCallStack: true }
28
- }
29
- });
30
-
31
- program.version('0.1.0')
32
- .argument('<file>')
33
- .option('-f,--from <from>','input type','xml')
34
- .option('-t,--to <output>','output type','json')
35
- .option('-m,--map <map>','data mapper','json')
36
- .option('--fix <what>','jsonata')
37
- .option('-o,--out <file>','output file')
38
- .option('--key <keyfile>', 'private key file')
39
- .option('--info','output debugging messages')
40
- .option('--debug','output more debugging messages')
41
- .option('--trace','output much more debugging messages');
42
-
43
- program.parse(process.argv);
44
-
45
- const opts = program.opts();
46
- const logger = log4js.getLogger();
47
-
48
- if (opts.info) {
49
- logger.level = "info";
50
- }
51
-
52
- if (opts.debug) {
53
- logger.level = "debug";
54
- }
55
-
56
- if (opts.trace) {
57
- logger.level = "trace";
58
- }
59
-
60
- main();
61
-
62
- async function main() : Promise<void> {
63
- const url = program.args[0];
64
-
65
- if (! url) {
66
- console.error(`need an input file`);
67
- process.exit(2);
68
- }
69
-
70
- let inputFile : URL;
71
-
72
- if (fs.existsSync(url)) {
73
- const filePath = path.resolve(process.cwd(), url);
74
- inputFile = pathToFileURL(filePath);
75
- }
76
- else {
77
- inputFile = new URL(url);
78
- }
79
-
80
- logger.info(`using: ${inputFile}`);
81
-
82
- let readableStream;
83
-
84
- if (inputFile.protocol.startsWith("http")) {
85
- readableStream = await httpReadStream(inputFile.toString());
86
- }
87
- else if (inputFile.protocol.startsWith("s3")) {
88
- readableStream = await s3ReaderStream(inputFile,{});
89
- }
90
- else if (inputFile.protocol === 'sftp:') {
91
- const config = makeSftpConfig(inputFile,opts);
92
-
93
- let remotePath;
94
-
95
- if (inputFile.pathname.match(/\/@latest:\w+$/)) {
96
- const remoteDir = inputFile.pathname.replace(/\/@latest.*/,"");
97
- const extension = inputFile.pathname.replace(/.*\/@latest:/,"");
98
- remotePath = await sftpLatestFile(config,remoteDir,extension);
99
- }
100
- else {
101
- remotePath = inputFile.pathname;
102
- }
103
-
104
- readableStream = await sftpReadStream(remotePath, config);
105
- }
106
- else {
107
- readableStream = fs.createReadStream(inputFile);
108
- }
109
-
110
- let objectStream : Readable;
111
-
112
- if (opts.from) {
113
- const mod = await loadPlugin(opts.from,'input');
114
- objectStream = mod.stream2readable(readableStream);
115
- }
116
- else {
117
- console.error(`Need --from`);
118
- process.exit(1);
119
- }
120
-
121
- let resultStream = objectStream;
122
-
123
- if (opts.map) {
124
- const mod = await loadPlugin(opts.map,'transform');
125
- const transformer : Transform = await mod.transform(opts.fix);
126
- resultStream = objectStream.pipe(transformer);
127
- }
128
-
129
- let outStream : Writable;
130
-
131
- if (opts.out === '@slow') {
132
- outStream = new SlowWritable({ delayMs: 100 });
133
- }
134
- else if (opts.out) {
135
- if (opts.out.startsWith("sftp")) {
136
- const url = new URL(opts.out);
137
- const config = makeSftpConfig(url,opts);
138
- logger.info(`put ${url}`);
139
- outStream = await sftpWriteStream(url.href, config);
140
- }
141
- else if (opts.out.startsWith("s3")) {
142
- const url = new URL(opts.out);
143
- logger.info(`put ${url}`);
144
- outStream = await s3WriterStream(url,{});
145
- }
146
- else {
147
- outStream = fs.createWriteStream(opts.out, { encoding: 'utf-8'});
148
- }
149
- }
150
- else {
151
- outStream = process.stdout;
152
- }
153
-
154
- if (opts.to) {
155
- const mod = await loadPlugin(opts.to,'output');
156
- mod.readable2writable(resultStream, outStream);
157
- }
158
- }
159
-
160
- function makeSftpConfig(inputFile: URL, opts: any) : SftpConfig {
161
- let privateKey : string | undefined = undefined;
162
-
163
- if (opts.key) {
164
- privateKey = fs.readFileSync(opts.key,{ encoding: 'utf-8'});
165
- }
166
-
167
- let config: SftpConfig = {
168
- host: inputFile.hostname,
169
- port: Number(inputFile.port) ?? 22,
170
- username: inputFile.username
171
- };
172
-
173
- if (inputFile.password) { config.password = inputFile.password }
174
- if (privateKey) { config.privateKey = privateKey}
175
-
176
- return config;
177
- }
package/src/output/rdf.ts DELETED
@@ -1,63 +0,0 @@
1
- import { Readable, Writable } from 'stream';
2
- import N3 from 'n3';
3
-
4
- import log4js from 'log4js';
5
-
6
- const logger = log4js.getLogger();
7
-
8
- const { DataFactory } = N3;
9
- const { namedNode, literal, blankNode } = DataFactory;
10
-
11
- export function readable2writable(readable: Readable, writable: Writable) : void {
12
- let writer : N3.Writer;
13
-
14
- readable.on('data', (data: any) => {
15
- let prefixes = data['prefixes'];
16
-
17
- if (!writer) {
18
- writer = new N3.Writer(writable, { end: false, prefixes });
19
- }
20
-
21
- let quads : any[] = data['quads'];
22
-
23
- if (!quads) return;
24
-
25
- for (let i = 0 ; i < quads.length ; i++) {
26
- if (quads[i].subject && quads[i].predicate && quads[i].object) {
27
- // ok
28
- }
29
- else return;
30
-
31
- let subject = { type: 'NamedNode', value: '', ...quads[i].subject};
32
- let predicate = { type: 'NamedNode', value: '', ...quads[i].predicate};
33
- let object = { type: 'NamedNode', value: '', ...quads[i].object};
34
-
35
- let subjectValue =
36
- subject.type === 'NamedNode' ? namedNode(subject.value)
37
- : subject.type === 'BlankNode' ? blankNode(subject.value)
38
- : namedNode(subject.value);
39
-
40
- let predicateValue =
41
- predicate.type === 'NamedNode' ? namedNode(predicate.value)
42
- : namedNode(predicate.value);
43
-
44
- let objectValue =
45
- object.type === 'NamedNode' ? namedNode(object.value)
46
- : object.type === 'BlankNode' ? blankNode(object.value)
47
- : object.type === 'Literal' && object.as ? literal(object.value, namedNode(object.as))
48
- : object.type === 'Literal' ? literal(object.value)
49
- : namedNode(object.value);
50
-
51
- writer.addQuad(
52
- subjectValue,
53
- predicateValue,
54
- objectValue
55
- );
56
- }
57
- });
58
-
59
- readable.on('end', () => {
60
- writer.end();
61
- writable.end();
62
- });
63
- }
@@ -1,27 +0,0 @@
1
- import path from "path";
2
-
3
- export async function loadPlugin(
4
- spec: string, type: "input" | "output" | "transform"
5
- ) {
6
- // spec can be:
7
- // - "./local/file.js"
8
- // - "/absolute/path/to/plugin.js"
9
- // - "package-plugin"
10
- // - "package-plugin/submodule"
11
-
12
- try {
13
- const resolved = path.resolve(spec);
14
- return await import(resolved);
15
- } catch (e1) {
16
- try {
17
- const resolved = new URL(`./${type}/${spec}.js`, import.meta.url).href;
18
- return await import(resolved);
19
- } catch (e2) {
20
- const error = new Error(
21
- `Cannot load plugin: ${spec}. Tried direct import and local plugin directory.`
22
- );
23
- error.cause = [e1, e2];
24
- throw error;
25
- }
26
- }
27
- }
@@ -1,165 +0,0 @@
1
- // slow-writable.ts
2
- import { Writable, type WritableOptions } from "stream";
3
- import log4js from 'log4js';
4
-
5
- const logger = log4js.getLogger();
6
-
7
- export interface SlowWritableOptions extends WritableOptions {
8
- /**
9
- * Delay per chunk (ms). Default 200 ms.
10
- */
11
- delayMs?: number;
12
-
13
- /**
14
- * Maximum number of concurrent "in-flight" asynchronous writes.
15
- * While more writes may be queued, only up to this number will be processed in parallel.
16
- * Default 1.
17
- */
18
- maxConcurrency?: number;
19
-
20
- /**
21
- * If set to a positive integer n, every nth chunk will produce an error (for testing).
22
- * Default 0 (never error).
23
- */
24
- simulateErrorEveryN?: number;
25
- }
26
-
27
- /**
28
- * SlowWritable: a Writable stream that processes writes asynchronously
29
- * with an artificial delay and optional concurrency control.
30
- */
31
- export class SlowWritable extends Writable {
32
- private delayMs: number;
33
- private maxConcurrency: number;
34
- private simulateErrorEveryN: number;
35
- private inFlight = 0;
36
- private queue: Array<{
37
- chunk: any;
38
- encoding: BufferEncoding;
39
- callback: (err?: Error | null) => void;
40
- seq: number;
41
- }> = [];
42
- private seqCounter = 0;
43
- private destroyedFlag = false;
44
-
45
- constructor(opts: SlowWritableOptions = {}) {
46
- // Keep objectMode/encoding behavior from user but default to object mode false
47
- const { delayMs = 200, maxConcurrency = 1, simulateErrorEveryN = 0, ...writableOpts } = opts;
48
- super(writableOpts);
49
- this.delayMs = delayMs;
50
- this.maxConcurrency = Math.max(1, Math.floor(maxConcurrency));
51
- this.simulateErrorEveryN = Math.max(0, Math.floor(simulateErrorEveryN));
52
- }
53
-
54
- // Node will call _write for each chunk
55
- _write(chunk: any, encoding: BufferEncoding, callback: (err?: Error | null) => void): void {
56
- if (this.destroyedFlag) {
57
- callback(new Error("Stream is destroyed"));
58
- return;
59
- }
60
-
61
- const seq = ++this.seqCounter;
62
- this.queue.push({ chunk, encoding, callback, seq });
63
- this.processQueue();
64
- }
65
-
66
- // Process queued writes honoring maxConcurrency
67
- private processQueue(): void {
68
- // If nothing to do or already at concurrency limit, return
69
- while (this.inFlight < this.maxConcurrency && this.queue.length > 0 && !this.destroyedFlag) {
70
- const item = this.queue.shift()!;
71
- this.inFlight++;
72
- this.performAsyncWrite(item)
73
- .then(() => {
74
- this.inFlight--;
75
- // After finishing one, try to process more
76
- // Use nextTick to avoid deep recursion
77
- process.nextTick(() => this.processQueue());
78
- })
79
- .catch((err) => {
80
- this.inFlight--;
81
- // propagate error via callback; stream will emit 'error' as well
82
- item.callback(err);
83
- this.emit("error", err);
84
- // continue processing queue
85
- process.nextTick(() => this.processQueue());
86
- });
87
- }
88
- }
89
-
90
- // Simulate an asynchronous write that takes `delayMs` ms
91
- private async performAsyncWrite(item: {
92
- chunk: any;
93
- encoding: BufferEncoding;
94
- callback: (err?: Error | null) => void;
95
- seq: number;
96
- }): Promise<void> {
97
- return new Promise((resolve, reject) => {
98
- const maybeError =
99
- this.simulateErrorEveryN > 0 && item.seq % this.simulateErrorEveryN === 0;
100
-
101
- const timer = setTimeout(() => {
102
- // simulate processing chunk here. For demonstration we just log.
103
- // In real use, replace with actual async I/O.
104
- // eslint-disable-next-line no-console
105
- logger.info(`SlowWritable processed seq=${item.seq}`);
106
-
107
- if (maybeError) {
108
- const err = new Error(`Simulated error at seq ${item.seq}`);
109
- item.callback(err);
110
- reject(err);
111
- } else {
112
- item.callback();
113
- resolve();
114
- }
115
- }, this.delayMs);
116
-
117
- // If stream was destroyed meantime, cancel timer and callback with error
118
- const onDestroy = () => {
119
- clearTimeout(timer);
120
- const err = new Error("Stream destroyed while writing");
121
- try {
122
- item.callback(err);
123
- } catch (_) {
124
- // ignore
125
- }
126
- reject(err);
127
- };
128
-
129
- // Ensure we don't leak listeners. If destroyedFlag becomes true quickly, call onDestroy.
130
- if (this.destroyedFlag) {
131
- onDestroy();
132
- }
133
- });
134
- }
135
-
136
- _final(callback: (err?: Error | null) => void): void {
137
- // Wait until queue emptied and inFlight is zero
138
- const check = () => {
139
- if (this.destroyedFlag) {
140
- callback(new Error("Stream destroyed before finalizing"));
141
- return;
142
- }
143
- if (this.queue.length === 0 && this.inFlight === 0) {
144
- callback();
145
- } else {
146
- setTimeout(check, 10);
147
- }
148
- };
149
- check();
150
- }
151
-
152
- _destroy(err: Error | null, callback: (error?: Error | null) => void): void {
153
- this.destroyedFlag = true;
154
- // flush callbacks in queue with error
155
- while (this.queue.length > 0) {
156
- const item = this.queue.shift()!;
157
- try {
158
- item.callback(err ?? new Error("Stream destroyed"));
159
- } catch (_) {
160
- // ignore
161
- }
162
- }
163
- callback(err);
164
- }
165
- }
package/tsconfig.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- // Visit https://aka.ms/tsconfig to read more about this file
3
- "compilerOptions": {
4
- // File Layout
5
- "rootDir": "./src",
6
- "outDir": "./dist",
7
-
8
- // Environment Settings
9
- // See also https://aka.ms/tsconfig/module
10
- "module": "nodenext",
11
- "target": "esnext",
12
- "types": [
13
- "node"
14
- ],
15
- // For nodejs:
16
- // "lib": ["esnext"],
17
- // "types": ["node"],
18
- // and npm install -D @types/node
19
-
20
- // Other Outputs
21
- "sourceMap": true,
22
- "declaration": true,
23
- "declarationMap": true,
24
-
25
- // Stricter Typechecking Options
26
- "noUncheckedIndexedAccess": true,
27
- "exactOptionalPropertyTypes": true,
28
-
29
- // Style Options
30
- // "noImplicitReturns": true,
31
- // "noImplicitOverride": true,
32
- // "noUnusedLocals": true,
33
- // "noUnusedParameters": true,
34
- // "noFallthroughCasesInSwitch": true,
35
- // "noPropertyAccessFromIndexSignature": true,
36
-
37
- // Recommended Options
38
- "strict": true,
39
- "jsx": "react-jsx",
40
- "verbatimModuleSyntax": true,
41
- "isolatedModules": true,
42
- "noUncheckedSideEffectImports": true,
43
- "moduleDetection": "force",
44
- "skipLibCheck": true
45
- }
46
- }