marcattacks 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -13
- package/dist/attacker.d.ts +2 -0
- package/dist/attacker.d.ts.map +1 -0
- package/dist/attacker.js +164 -0
- package/dist/attacker.js.map +1 -0
- package/dist/filestream.d.ts +4 -0
- package/dist/filestream.d.ts.map +1 -0
- package/dist/filestream.js +32 -0
- package/dist/filestream.js.map +1 -0
- package/dist/index.js +13 -175
- package/dist/index.js.map +1 -1
- package/dist/input/alephseq.d.ts +2 -2
- package/dist/input/alephseq.d.ts.map +1 -1
- package/dist/input/alephseq.js +46 -76
- package/dist/input/alephseq.js.map +1 -1
- package/dist/input/json.d.ts +2 -2
- package/dist/input/json.d.ts.map +1 -1
- package/dist/input/json.js +15 -27
- package/dist/input/json.js.map +1 -1
- package/dist/input/jsonl.d.ts +2 -2
- package/dist/input/jsonl.d.ts.map +1 -1
- package/dist/input/jsonl.js +27 -46
- package/dist/input/jsonl.js.map +1 -1
- package/dist/input/marc.d.ts +2 -2
- package/dist/input/marc.d.ts.map +1 -1
- package/dist/input/marc.js +18 -37
- package/dist/input/marc.js.map +1 -1
- package/dist/input/rdf.d.ts +2 -2
- package/dist/input/rdf.d.ts.map +1 -1
- package/dist/input/rdf.js +23 -10
- package/dist/input/rdf.js.map +1 -1
- package/dist/{transform/rdf.d.ts → input/tsv.d.ts} +1 -1
- package/dist/input/tsv.d.ts.map +1 -0
- package/dist/input/tsv.js +45 -0
- package/dist/input/tsv.js.map +1 -0
- package/dist/input/xml.d.ts +2 -2
- package/dist/input/xml.d.ts.map +1 -1
- package/dist/input/xml.js +28 -92
- package/dist/input/xml.js.map +1 -1
- package/dist/output/alephseq.d.ts +1 -1
- package/dist/output/alephseq.d.ts.map +1 -1
- package/dist/output/alephseq.js +2 -2
- package/dist/output/alephseq.js.map +1 -1
- package/dist/output/json.d.ts +1 -1
- package/dist/output/json.d.ts.map +1 -1
- package/dist/output/json.js +2 -2
- package/dist/output/json.js.map +1 -1
- package/dist/output/jsonl.d.ts +1 -1
- package/dist/output/jsonl.d.ts.map +1 -1
- package/dist/output/jsonl.js +3 -3
- package/dist/output/jsonl.js.map +1 -1
- package/dist/output/parquet.d.ts +7 -0
- package/dist/output/parquet.d.ts.map +1 -0
- package/dist/output/parquet.js +15 -0
- package/dist/output/parquet.js.map +1 -0
- package/dist/output/rdf.d.ts +1 -1
- package/dist/output/rdf.d.ts.map +1 -1
- package/dist/output/rdf.js +3 -4
- package/dist/output/rdf.js.map +1 -1
- package/dist/output/tsv.d.ts +3 -0
- package/dist/output/tsv.d.ts.map +1 -0
- package/dist/output/tsv.js +37 -0
- package/dist/output/tsv.js.map +1 -0
- package/dist/output/xml.d.ts +1 -1
- package/dist/output/xml.d.ts.map +1 -1
- package/dist/output/xml.js +3 -3
- package/dist/output/xml.js.map +1 -1
- package/dist/s3stream.d.ts +3 -2
- package/dist/s3stream.d.ts.map +1 -1
- package/dist/s3stream.js +55 -3
- package/dist/s3stream.js.map +1 -1
- package/dist/sftpstream.d.ts +3 -3
- package/dist/sftpstream.d.ts.map +1 -1
- package/dist/sftpstream.js +48 -5
- package/dist/sftpstream.js.map +1 -1
- package/dist/transform/avram.d.ts +6 -0
- package/dist/transform/avram.d.ts.map +1 -0
- package/dist/transform/avram.js +37 -0
- package/dist/transform/avram.js.map +1 -0
- package/dist/transform/jsonata.d.ts +1 -1
- package/dist/transform/jsonata.d.ts.map +1 -1
- package/dist/transform/jsonata.js +9 -6
- package/dist/transform/jsonata.js.map +1 -1
- package/dist/transform/marc2rdf.d.ts +1 -1
- package/dist/transform/marc2rdf.d.ts.map +1 -1
- package/dist/transform/marc2rdf.js +4 -2
- package/dist/transform/marc2rdf.js.map +1 -1
- package/dist/transform/marcids.d.ts +6 -0
- package/dist/transform/marcids.d.ts.map +1 -0
- package/dist/transform/marcids.js +22 -0
- package/dist/transform/marcids.js.map +1 -0
- package/dist/transform/notation3.d.ts +1 -1
- package/dist/transform/notation3.d.ts.map +1 -1
- package/dist/transform/notation3.js +3 -3
- package/dist/transform/notation3.js.map +1 -1
- package/dist/util/stream_helpers.d.ts +8 -0
- package/dist/util/stream_helpers.d.ts.map +1 -1
- package/dist/util/stream_helpers.js +77 -3
- package/dist/util/stream_helpers.js.map +1 -1
- package/package.json +11 -7
- package/dist/transform/json.d.ts +0 -3
- package/dist/transform/json.d.ts.map +0 -1
- package/dist/transform/json.js +0 -32
- package/dist/transform/json.js.map +0 -1
- package/dist/transform/rdf.d.ts.map +0 -1
- package/dist/transform/rdf.js +0 -370
- package/dist/transform/rdf.js.map +0 -1
package/dist/input/xml.js
CHANGED
|
@@ -1,134 +1,70 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Transform } from 'stream';
|
|
2
2
|
import sax from 'sax';
|
|
3
3
|
import log4js from 'log4js';
|
|
4
4
|
const logger = log4js.getLogger();
|
|
5
|
-
export async function
|
|
6
|
-
let recordNum = 0;
|
|
7
|
-
let sourcePaused = false;
|
|
8
|
-
const readableStream = new Readable({
|
|
9
|
-
read() {
|
|
10
|
-
if (sourcePaused) {
|
|
11
|
-
logger.debug("backpressure off");
|
|
12
|
-
stream.resume();
|
|
13
|
-
sourcePaused = false;
|
|
14
|
-
}
|
|
15
|
-
},
|
|
16
|
-
destroy() {
|
|
17
|
-
stream.destroy();
|
|
18
|
-
},
|
|
19
|
-
objectMode: true
|
|
20
|
-
});
|
|
21
|
-
const parser = sax.createStream(true);
|
|
5
|
+
export async function transform(_opts) {
|
|
22
6
|
let record = [];
|
|
23
7
|
let subfield = [];
|
|
24
8
|
let attrib = {};
|
|
25
9
|
let sattrib = {};
|
|
26
|
-
let type;
|
|
27
10
|
let text = '';
|
|
28
11
|
let hasError = false;
|
|
12
|
+
const parser = sax.createStream(true);
|
|
13
|
+
const transformStream = new Transform({
|
|
14
|
+
objectMode: true,
|
|
15
|
+
transform(chunk, _encoding, callback) {
|
|
16
|
+
if (hasError)
|
|
17
|
+
return callback();
|
|
18
|
+
parser.write(chunk);
|
|
19
|
+
callback();
|
|
20
|
+
},
|
|
21
|
+
flush(callback) {
|
|
22
|
+
if (hasError)
|
|
23
|
+
return callback();
|
|
24
|
+
parser.end();
|
|
25
|
+
callback();
|
|
26
|
+
}
|
|
27
|
+
});
|
|
28
|
+
// SAX Parser Event Handlers (Logic migrated from xml.ts)
|
|
29
29
|
parser.on('opentag', (node) => {
|
|
30
|
-
if (hasError)
|
|
31
|
-
return;
|
|
32
30
|
const localName = node.name.replaceAll(/^\w+:/g, '');
|
|
33
|
-
if (localName === '
|
|
34
|
-
// Start collection...
|
|
35
|
-
}
|
|
36
|
-
else if (localName === 'record') {
|
|
37
|
-
// Start record...
|
|
38
|
-
}
|
|
39
|
-
else if (localName === 'leader') {
|
|
40
|
-
type = 'leader';
|
|
41
|
-
}
|
|
42
|
-
else if (localName == 'controlfield') {
|
|
43
|
-
type = 'control';
|
|
44
|
-
attrib = node.attributes;
|
|
45
|
-
}
|
|
46
|
-
else if (localName === 'datafield') {
|
|
31
|
+
if (localName === 'controlfield' || localName === 'datafield') {
|
|
47
32
|
attrib = node.attributes;
|
|
48
33
|
}
|
|
49
34
|
else if (localName === 'subfield') {
|
|
50
35
|
sattrib = node.attributes;
|
|
51
36
|
}
|
|
52
|
-
else {
|
|
53
|
-
logger.error(`unknown tag: ${node.name}`);
|
|
54
|
-
}
|
|
55
37
|
text = '';
|
|
56
38
|
});
|
|
57
39
|
parser.on('text', (t) => {
|
|
58
|
-
if (hasError)
|
|
59
|
-
return;
|
|
60
40
|
text += t;
|
|
61
41
|
});
|
|
62
42
|
parser.on('closetag', (tag) => {
|
|
63
|
-
if (hasError)
|
|
64
|
-
return;
|
|
65
43
|
const localName = tag.replaceAll(/^\w+:/g, '');
|
|
66
44
|
if (localName === 'leader') {
|
|
67
45
|
record.push(['LDR', ' ', ' ', '_', text]);
|
|
68
46
|
}
|
|
69
|
-
else if (localName
|
|
70
|
-
|
|
71
|
-
record.push([tag, ' ', ' ', '_', text]);
|
|
47
|
+
else if (localName === 'controlfield') {
|
|
48
|
+
record.push([attrib.tag, ' ', ' ', '_', text]);
|
|
72
49
|
}
|
|
73
50
|
else if (localName === 'datafield') {
|
|
74
|
-
|
|
75
|
-
let ind1 = attrib.ind1;
|
|
76
|
-
let ind2 = attrib.ind2;
|
|
77
|
-
record.push([tag, ind1, ind2].concat(subfield));
|
|
51
|
+
record.push([attrib.tag, attrib.ind1, attrib.ind2, ...subfield]);
|
|
78
52
|
subfield = [];
|
|
79
53
|
}
|
|
80
54
|
else if (localName === 'subfield') {
|
|
81
|
-
|
|
82
|
-
subfield = subfield.concat([code, text]);
|
|
55
|
+
subfield.push(sattrib.code, text);
|
|
83
56
|
}
|
|
84
57
|
if (localName === 'record') {
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
logger.debug("backpressure on");
|
|
88
|
-
stream.pause();
|
|
89
|
-
sourcePaused = true;
|
|
90
|
-
}
|
|
58
|
+
// Push the completed record object down the pipeline
|
|
59
|
+
transformStream.push({ record });
|
|
91
60
|
record = [];
|
|
92
|
-
recordNum++;
|
|
93
|
-
if (recordNum % 1000 === 0) {
|
|
94
|
-
logger.info(`record: ${recordNum}`);
|
|
95
|
-
}
|
|
96
61
|
}
|
|
97
62
|
});
|
|
98
63
|
parser.on("error", (err) => {
|
|
99
|
-
if (hasError)
|
|
100
|
-
return;
|
|
101
64
|
hasError = true;
|
|
102
65
|
logger.error("Parser error:", err.message);
|
|
103
|
-
|
|
104
|
-
parser.end();
|
|
105
|
-
readableStream.destroy(err instanceof Error ? err : new Error(String(err)));
|
|
106
|
-
});
|
|
107
|
-
parser.on('end', () => {
|
|
108
|
-
if (hasError)
|
|
109
|
-
return;
|
|
110
|
-
logger.info(`processed ${recordNum} records`);
|
|
111
|
-
readableStream.push(null);
|
|
112
|
-
});
|
|
113
|
-
stream.on('data', (chunk) => {
|
|
114
|
-
if (hasError)
|
|
115
|
-
return;
|
|
116
|
-
parser.write(chunk);
|
|
117
|
-
});
|
|
118
|
-
stream.on('end', () => {
|
|
119
|
-
if (hasError)
|
|
120
|
-
return;
|
|
121
|
-
parser.end();
|
|
122
|
-
});
|
|
123
|
-
stream.on('error', (err) => {
|
|
124
|
-
if (hasError)
|
|
125
|
-
return;
|
|
126
|
-
hasError = true;
|
|
127
|
-
logger.error("Source stream error:", err);
|
|
128
|
-
stream.destroy();
|
|
129
|
-
parser.end();
|
|
130
|
-
readableStream.destroy(err);
|
|
66
|
+
transformStream.destroy(err);
|
|
131
67
|
});
|
|
132
|
-
return
|
|
68
|
+
return transformStream;
|
|
133
69
|
}
|
|
134
70
|
//# sourceMappingURL=xml.js.map
|
package/dist/input/xml.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"xml.js","sourceRoot":"","sources":["../../src/input/xml.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"xml.js","sourceRoot":"","sources":["../../src/input/xml.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAC3D,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAKlC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,KAAU;IACtC,IAAI,MAAM,GAAe,EAAE,CAAC;IAC5B,IAAI,QAAQ,GAAa,EAAE,CAAC;IAC5B,IAAI,MAAM,GAAmB,EAAE,CAAC;IAChC,IAAI,OAAO,GAAsB,EAAE,CAAC;IACpC,IAAI,IAAI,GAAW,EAAE,CAAC;IACtB,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,MAAM,MAAM,GAAG,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;IAEtC,MAAM,eAAe,GAAG,IAAI,SAAS,CAAC;QAClC,UAAU,EAAE,IAAI;QAEhB,SAAS,CAAC,KAAU,EAAE,SAAiB,EAAE,QAA2B;YAChE,IAAI,QAAQ;gBAAE,OAAO,QAAQ,EAAE,CAAC;YAChC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACpB,QAAQ,EAAE,CAAC;QACf,CAAC;QAED,KAAK,CAAC,QAA2B;YAC7B,IAAI,QAAQ;gBAAE,OAAO,QAAQ,EAAE,CAAC;YAChC,MAAM,CAAC,GAAG,EAAE,CAAC;YACb,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;IAEH,yDAAyD;IACzD,MAAM,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,IAAa,EAAE,EAAE;QACnC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACrD,IAAI,SAAS,KAAK,cAAc,IAAI,SAAS,KAAK,WAAW,EAAE,CAAC;YAC5D,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC;QAC7B,CAAC;aAAM,IAAI,SAAS,KAAK,UAAU,EAAE,CAAC;YAClC,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC;QAC9B,CAAC;QACD,IAAI,GAAG,EAAE,CAAC;IACd,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAS,EAAE,EAAE;QAC5B,IAAI,IAAI,CAAC,CAAC;IACd,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,GAAW,EAAE,EAAE;QAClC,MAAM,SAAS,GAAG,GAAG,CAAC,UAAU,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAE/C,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;QAC9C,CAAC;aAAM,IAAI,SAAS,KAAK,cAAc,EAAE,CAAC;YACtC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,GAAa,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;QAC7D,CAAC;aAAM,IAAI,SAAS,KAAK,WAAW,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,GAAa,EAAE,MAAM,CAAC,IAAc,EAAE,MAAM,CAAC,IAAc,EAAE,GAAG,QAAQ,CAAC,CAAC,CAAC;YAC/F,QAAQ,GAAG,EAAE,CAAC;QAClB,CAAC;aAAM,IAAI,SAAS,KAAK,UAAU,EAAE,CAAC;YAClC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,IAAc,EAAE,IAAI,CAAC,CAAC;QAChD,CAAC;QAED,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;YACzB,qDAAqD;YACrD,eAAe,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;YACjC,MAAM,GAAG,EAAE,CAAC;QAChB,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;QACvB,QAAQ,GAAG,IAAI,CAAC;QAChB,MAAM,CAAC,KAAK,CAAC,eAAe,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;QAC3C,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,OAAO,eAAe,CAAC;AAC3B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alephseq.d.ts","sourceRoot":"","sources":["../../src/output/alephseq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAMnC,wBAAsB,SAAS,
|
|
1
|
+
{"version":3,"file":"alephseq.d.ts","sourceRoot":"","sources":["../../src/output/alephseq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAMnC,wBAAsB,SAAS,CAAC,MAAM,EAAC,GAAG,GAAI,OAAO,CAAC,SAAS,CAAC,CAwC/D"}
|
package/dist/output/alephseq.js
CHANGED
|
@@ -2,7 +2,7 @@ import { Transform } from 'stream';
|
|
|
2
2
|
import { marcmap } from '../marcmap.js';
|
|
3
3
|
import log4js from 'log4js';
|
|
4
4
|
const logger = log4js.getLogger();
|
|
5
|
-
export async function transform() {
|
|
5
|
+
export async function transform(_param) {
|
|
6
6
|
return new Transform({
|
|
7
7
|
objectMode: true,
|
|
8
8
|
transform(data, _encoding, callback) {
|
|
@@ -31,7 +31,7 @@ export async function transform() {
|
|
|
31
31
|
}
|
|
32
32
|
output += `${id} ${tag}${ind1}${ind2} L ${sf}\n`;
|
|
33
33
|
}
|
|
34
|
-
logger.
|
|
34
|
+
logger.trace(`adding ${output.length} bytes`);
|
|
35
35
|
callback(null, output);
|
|
36
36
|
}
|
|
37
37
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alephseq.js","sourceRoot":"","sources":["../../src/output/alephseq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS;
|
|
1
|
+
{"version":3,"file":"alephseq.js","sourceRoot":"","sources":["../../src/output/alephseq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,MAAU;IACtC,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,IAAS,EAAE,SAAS,EAAE,QAAQ;YACpC,IAAI,GAAG,GAAgB,IAAI,CAAC,QAAQ,CAAC,CAAC;YAEtC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACP,MAAM,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;gBACrC,QAAQ,EAAE,CAAC;gBACX,OAAO;YACX,CAAC;YAED,IAAI,EAAE,GAAG,OAAO,CAAC,GAAG,EAAC,KAAK,EAAC,EAAE,CAAC,CAAC;YAE/B,IAAI,MAAM,GAAG,GAAG,EAAE,eAAe,CAAC;YAElC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAG,CAAC,GAAG,GAAG,CAAC,MAAM,EAAG,CAAC,EAAE,EAAE,CAAC;gBACpC,IAAI,GAAG,GAAI,GAAG,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC;gBACtB,IAAI,IAAI,GAAG,GAAG,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC;gBACtB,IAAI,IAAI,GAAG,GAAG,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC;gBACtB,IAAI,EAAE,GAAG,EAAE,CAAC;gBAEZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAE,CAAC,MAAM,EAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC1C,IAAI,IAAI,GAAG,GAAG,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC;oBACtB,IAAI,GAAG,GAAI,GAAG,CAAC,CAAC,CAAE,CAAC,CAAC,GAAC,CAAC,CAAC,CAAC;oBACxB,IAAI,GAAI,CAAC,KAAK,CAAC,eAAe,CAAC,EAAE,CAAC;wBAC9B,EAAE,IAAI,GAAG,GAAG,EAAE,CAAC;oBACnB,CAAC;yBACI,CAAC;wBACF,EAAE,IAAI,OAAO,IAAI,GAAG,GAAG,EAAE,CAAC;oBAC9B,CAAC;gBACL,CAAC;gBAED,MAAM,IAAI,GAAG,EAAE,IAAI,GAAG,GAAG,IAAI,GAAG,IAAI,MAAM,EAAE,IAAI,CAAC;YACrD,CAAC;YAED,MAAM,CAAC,KAAK,CAAC,UAAU,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;YAC9C,QAAQ,CAAC,IAAI,EAAC,MAAM,CAAC,CAAC;QAC1B,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
|
package/dist/output/json.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../src/output/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAKnC,wBAAsB,SAAS,
|
|
1
|
+
{"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../src/output/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAKnC,wBAAsB,SAAS,CAAC,MAAM,EAAC,GAAG,GAAI,OAAO,CAAC,SAAS,CAAC,CAmC/D"}
|
package/dist/output/json.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Transform } from 'stream';
|
|
2
2
|
import log4js from 'log4js';
|
|
3
3
|
const logger = log4js.getLogger();
|
|
4
|
-
export async function transform() {
|
|
4
|
+
export async function transform(_param) {
|
|
5
5
|
let isFirst = true;
|
|
6
6
|
let hasClosed = false;
|
|
7
7
|
return new Transform({
|
|
@@ -16,7 +16,7 @@ export async function transform() {
|
|
|
16
16
|
}
|
|
17
17
|
output += JSON.stringify(data);
|
|
18
18
|
isFirst = false;
|
|
19
|
-
logger.
|
|
19
|
+
logger.trace(`adding ${output.length} bytes`);
|
|
20
20
|
callback(null, output);
|
|
21
21
|
},
|
|
22
22
|
flush(callback) {
|
package/dist/output/json.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/output/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS;
|
|
1
|
+
{"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/output/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,MAAU;IACtC,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,IAAI,SAAS,GAAG,KAAK,CAAC;IAEtB,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,IAAS,EAAE,SAAS,EAAE,QAAQ;YACpC,IAAI,MAAM,GAAG,EAAE,CAAC;YAEhB,IAAI,OAAO,EAAE,CAAC;gBACV,MAAM,IAAI,GAAG,CAAC;YAClB,CAAC;iBACI,CAAC;gBACF,MAAM,IAAI,GAAG,CAAC;YAClB,CAAC;YAED,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAE/B,OAAO,GAAG,KAAK,CAAC;YAEhB,MAAM,CAAC,KAAK,CAAC,UAAU,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;YAC9C,QAAQ,CAAC,IAAI,EAAC,MAAM,CAAC,CAAC;QAC1B,CAAC;QACD,KAAK,CAAC,QAAQ;YACV,yCAAyC;YACzC,IAAI,CAAC,OAAO,IAAI,CAAC,SAAS,EAAE,CAAC;gBACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACf,SAAS,GAAG,IAAI,CAAC;YACrB,CAAC;YACD,QAAQ,EAAE,CAAC;QACf,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,QAAQ;YACjB,QAAQ,CAAC,GAAG,CAAC,CAAC;QAClB,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
|
package/dist/output/jsonl.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.d.ts","sourceRoot":"","sources":["../../src/output/jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAKnC,wBAAsB,SAAS,
|
|
1
|
+
{"version":3,"file":"jsonl.d.ts","sourceRoot":"","sources":["../../src/output/jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAKnC,wBAAsB,SAAS,CAAC,MAAM,EAAC,GAAG,GAAI,OAAO,CAAC,SAAS,CAAC,CAS/D"}
|
package/dist/output/jsonl.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { Transform } from 'stream';
|
|
2
2
|
import log4js from 'log4js';
|
|
3
3
|
const logger = log4js.getLogger();
|
|
4
|
-
export async function transform() {
|
|
4
|
+
export async function transform(_param) {
|
|
5
5
|
return new Transform({
|
|
6
6
|
objectMode: true,
|
|
7
7
|
transform(data, _encoding, callback) {
|
|
8
|
-
const output = JSON.stringify(data);
|
|
9
|
-
logger.
|
|
8
|
+
const output = JSON.stringify(data) + "\n";
|
|
9
|
+
logger.trace(`adding ${output.length} bytes`);
|
|
10
10
|
callback(null, output);
|
|
11
11
|
}
|
|
12
12
|
});
|
package/dist/output/jsonl.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"jsonl.js","sourceRoot":"","sources":["../../src/output/jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS;
|
|
1
|
+
{"version":3,"file":"jsonl.js","sourceRoot":"","sources":["../../src/output/jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,MAAU;IACtC,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,IAAS,EAAE,SAAS,EAAE,QAAQ;YACpC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;YAC3C,MAAM,CAAC,KAAK,CAAC,UAAU,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;YAC9C,QAAQ,CAAC,IAAI,EAAC,MAAM,CAAC,CAAC;QAC1B,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { Transform } from 'stream';
|
|
2
|
+
/**
|
|
3
|
+
* Transforms an input stream of objects into a Parquet-formatted byte stream.
|
|
4
|
+
* @param param - Configuration or Schema definition
|
|
5
|
+
*/
|
|
6
|
+
export declare function transform(param: any): Promise<Transform>;
|
|
7
|
+
//# sourceMappingURL=parquet.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parquet.d.ts","sourceRoot":"","sources":["../../src/output/parquet.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAInC;;;GAGG;AACH,wBAAsB,SAAS,CAAC,KAAK,EAAE,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CAS9D"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { Transform } from 'stream';
|
|
2
|
+
import { ParquetSchema, ParquetTransformer } from '@dsnp/parquetjs';
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
/**
|
|
5
|
+
* Transforms an input stream of objects into a Parquet-formatted byte stream.
|
|
6
|
+
* @param param - Configuration or Schema definition
|
|
7
|
+
*/
|
|
8
|
+
export async function transform(param) {
|
|
9
|
+
const schema = typeof param.schema === "string" ?
|
|
10
|
+
new ParquetSchema(JSON.parse(fs.readFileSync(param.schema, { encoding: "utf-8" })))
|
|
11
|
+
: new ParquetSchema(param.schema);
|
|
12
|
+
const transformer = new ParquetTransformer(schema);
|
|
13
|
+
return transformer;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=parquet.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parquet.js","sourceRoot":"","sources":["../../src/output/parquet.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACpE,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,KAAU;IACtC,MAAM,MAAM,GACR,OAAO,KAAK,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC;QAC9B,IAAI,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAC,CAAC,CAAC,CAAC;QAClF,CAAC,CAAC,IAAI,aAAa,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAE5C,MAAM,WAAW,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAEnD,OAAO,WAAW,CAAC;AACrB,CAAC"}
|
package/dist/output/rdf.d.ts
CHANGED
package/dist/output/rdf.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rdf.d.ts","sourceRoot":"","sources":["../../src/output/rdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AASnC,wBAAsB,SAAS,
|
|
1
|
+
{"version":3,"file":"rdf.d.ts","sourceRoot":"","sources":["../../src/output/rdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AASnC,wBAAsB,SAAS,CAAC,MAAM,EAAC,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CA8C9D"}
|
package/dist/output/rdf.js
CHANGED
|
@@ -4,7 +4,7 @@ import { isRecord } from '../types/quad.js';
|
|
|
4
4
|
import N3 from 'n3';
|
|
5
5
|
import log4js from 'log4js';
|
|
6
6
|
const logger = log4js.getLogger();
|
|
7
|
-
export async function transform() {
|
|
7
|
+
export async function transform(_param) {
|
|
8
8
|
let writer;
|
|
9
9
|
let counter = 0;
|
|
10
10
|
return new Transform({
|
|
@@ -12,7 +12,7 @@ export async function transform() {
|
|
|
12
12
|
async transform(data, _encoding, callback) {
|
|
13
13
|
counter++;
|
|
14
14
|
if (isRecord(data)) {
|
|
15
|
-
logger.
|
|
15
|
+
logger.trace(`[${counter}] is a Record`);
|
|
16
16
|
if (!writer) {
|
|
17
17
|
writer = new N3.Writer({
|
|
18
18
|
end: false,
|
|
@@ -23,7 +23,7 @@ export async function transform() {
|
|
|
23
23
|
await writeString(data, undefined, writer);
|
|
24
24
|
}
|
|
25
25
|
else if (Object.hasOwn(data, "@context")) {
|
|
26
|
-
logger.
|
|
26
|
+
logger.trace(`[${counter}] is a JSON-LD`);
|
|
27
27
|
const dataNew = await parseString(JSON.stringify(data), "data.jsonld");
|
|
28
28
|
if (!writer) {
|
|
29
29
|
writer = new N3.Writer({
|
|
@@ -36,7 +36,6 @@ export async function transform() {
|
|
|
36
36
|
}
|
|
37
37
|
else {
|
|
38
38
|
logger.warn(`[${counter}] is not a Record or a JSON-LD`);
|
|
39
|
-
// Consider if you need to initialize a writer here if one doesn't exist
|
|
40
39
|
}
|
|
41
40
|
},
|
|
42
41
|
flush(callback) {
|
package/dist/output/rdf.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rdf.js","sourceRoot":"","sources":["../../src/output/rdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAChE,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS;
|
|
1
|
+
{"version":3,"file":"rdf.js","sourceRoot":"","sources":["../../src/output/rdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAChE,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,MAAU;IACtC,IAAI,MAA6B,CAAC;IAClC,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,KAAK,CAAC,SAAS,CAAC,IAAS,EAAE,SAAS,EAAE,QAAQ;YAC1C,OAAO,EAAE,CAAC;YAEV,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACjB,MAAM,CAAC,KAAK,CAAC,IAAI,OAAO,eAAe,CAAC,CAAC;gBACzC,IAAI,CAAC,MAAM,EAAE,CAAC;oBACV,MAAM,GAAG,IAAI,EAAE,CAAC,MAAM,CAAC;wBACnB,GAAG,EAAE,KAAK;wBACV,QAAQ,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE;wBAChC,KAAK,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;qBAC7C,CAAC,CAAC;gBACP,CAAC;gBACD,MAAM,WAAW,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;YAC/C,CAAC;iBACI,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,UAAU,CAAC,EAAE,CAAC;gBACvC,MAAM,CAAC,KAAK,CAAC,IAAI,OAAO,gBAAgB,CAAC,CAAC;gBAC1C,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,aAAa,CAAC,CAAC;gBAEvE,IAAI,CAAC,MAAM,EAAE,CAAC;oBACV,MAAM,GAAG,IAAI,EAAE,CAAC,MAAM,CAAC;wBACnB,GAAG,EAAE,KAAK;wBACV,QAAQ,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE;wBAChC,KAAK,EAAE,CAAC,KAAa,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;qBAC7C,CAAC,CAAC;gBACP,CAAC;gBACD,MAAM,WAAW,CAAC,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;YAClD,CAAC;iBACI,CAAC;gBACF,MAAM,CAAC,IAAI,CAAC,IAAI,OAAO,gCAAgC,CAAC,CAAC;YAC7D,CAAC;QACL,CAAC;QACD,KAAK,CAAC,QAAQ;YACV,MAAM,EAAE,GAAG,EAAE,CAAC;YACd,QAAQ,EAAE,CAAC;QACf,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,QAAQ;YACjB,MAAM,EAAE,GAAG,EAAE,CAAC;YACd,QAAQ,CAAC,GAAG,CAAC,CAAC;QAClB,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tsv.d.ts","sourceRoot":"","sources":["../../src/output/tsv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAKnC,wBAAsB,SAAS,CAAC,KAAK,EAAC,GAAG,GAAI,OAAO,CAAC,SAAS,CAAC,CAqC9D"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { Transform } from 'stream';
|
|
2
|
+
import log4js from 'log4js';
|
|
3
|
+
const logger = log4js.getLogger();
|
|
4
|
+
export async function transform(param) {
|
|
5
|
+
let sortedKeys;
|
|
6
|
+
return new Transform({
|
|
7
|
+
objectMode: true,
|
|
8
|
+
transform(data, _encoding, callback) {
|
|
9
|
+
let output = "";
|
|
10
|
+
let fields = [];
|
|
11
|
+
if (!sortedKeys) {
|
|
12
|
+
sortedKeys = Object.keys(data).sort();
|
|
13
|
+
if (param.header === "no") {
|
|
14
|
+
// ok skipped header
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
output += sortedKeys.join("\t") + "\n";
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
sortedKeys.forEach(key => {
|
|
21
|
+
if (Array.isArray(data[key])) {
|
|
22
|
+
fields.push(`ARRAY[${data[key].length}]`);
|
|
23
|
+
}
|
|
24
|
+
else if (typeof data[key] === 'object') {
|
|
25
|
+
fields.push(`HASH[${Object.keys(data[key]).length}]`);
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
fields.push(data[key]);
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
output += fields.join("\t") + "\n";
|
|
32
|
+
logger.trace(`adding ${output.length} bytes`);
|
|
33
|
+
callback(null, output);
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=tsv.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tsv.js","sourceRoot":"","sources":["../../src/output/tsv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,KAAS;IACrC,IAAI,UAAqB,CAAC;IAE1B,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,IAAS,EAAE,SAAS,EAAE,QAAQ;YACpC,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,MAAM,GAAc,EAAE,CAAC;YAE3B,IAAI,CAAE,UAAU,EAAG,CAAC;gBAChB,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBACtC,IAAI,KAAK,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;oBACxB,oBAAoB;gBACxB,CAAC;qBACI,CAAC;oBACF,MAAM,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;gBAC3C,CAAC;YACL,CAAC;YAED,UAAU,CAAC,OAAO,CAAE,GAAG,CAAC,EAAE;gBACtB,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;oBAC3B,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBAC9C,CAAC;qBACI,IAAI,OAAO,IAAI,CAAC,GAAG,CAAC,KAAK,QAAQ,EAAE,CAAC;oBACrC,MAAM,CAAC,IAAI,CAAC,QAAQ,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;gBAC1D,CAAC;qBACI,CAAC;oBACF,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC3B,CAAC;YACL,CAAC,CAAC,CAAC;YAEH,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;YAEnC,MAAM,CAAC,KAAK,CAAC,UAAU,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;YAC9C,QAAQ,CAAC,IAAI,EAAC,MAAM,CAAC,CAAC;QAC1B,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
|
package/dist/output/xml.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Transform } from 'stream';
|
|
2
|
-
export declare function transform(): Promise<Transform>;
|
|
2
|
+
export declare function transform(_param: any): Promise<Transform>;
|
|
3
3
|
export declare function escapeXML(value: string | number | null | undefined, options?: {
|
|
4
4
|
forAttribute?: boolean;
|
|
5
5
|
}): string;
|
package/dist/output/xml.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"xml.d.ts","sourceRoot":"","sources":["../../src/output/xml.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAMnC,wBAAsB,SAAS,
|
|
1
|
+
{"version":3,"file":"xml.d.ts","sourceRoot":"","sources":["../../src/output/xml.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAMnC,wBAAsB,SAAS,CAAC,MAAM,EAAC,GAAG,GAAI,OAAO,CAAC,SAAS,CAAC,CAiE/D;AAED,wBAAgB,SAAS,CACvB,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,GAAG,SAAS,EACzC,OAAO,CAAC,EAAE;IAAE,YAAY,CAAC,EAAE,OAAO,CAAA;CAAE,GACnC,MAAM,CAqBR"}
|
package/dist/output/xml.js
CHANGED
|
@@ -2,7 +2,7 @@ import { Transform } from 'stream';
|
|
|
2
2
|
import { marctag, marcind, marcsubfields, marcForEachSub } from '../marcmap.js';
|
|
3
3
|
import log4js from 'log4js';
|
|
4
4
|
const logger = log4js.getLogger();
|
|
5
|
-
export async function transform() {
|
|
5
|
+
export async function transform(_param) {
|
|
6
6
|
let isFirst = true;
|
|
7
7
|
let hasClosed = false;
|
|
8
8
|
return new Transform({
|
|
@@ -41,14 +41,14 @@ export async function transform() {
|
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
output += " </marc:record>\n";
|
|
44
|
-
logger.
|
|
44
|
+
logger.trace(`adding ${output.length} bytes`);
|
|
45
45
|
callback(null, output);
|
|
46
46
|
},
|
|
47
47
|
flush(callback) {
|
|
48
48
|
if (!isFirst && !hasClosed) {
|
|
49
49
|
logger.debug("flushing");
|
|
50
50
|
let output = "</marc:collection>\n";
|
|
51
|
-
logger.
|
|
51
|
+
logger.trace(`adding ${output.length} bytes`);
|
|
52
52
|
this.push(output);
|
|
53
53
|
hasClosed = true;
|
|
54
54
|
}
|
package/dist/output/xml.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"xml.js","sourceRoot":"","sources":["../../src/output/xml.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,aAAa,EAAG,cAAc,EAAC,MAAM,eAAe,CAAC;AAChF,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS;
|
|
1
|
+
{"version":3,"file":"xml.js","sourceRoot":"","sources":["../../src/output/xml.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,aAAa,EAAG,cAAc,EAAC,MAAM,eAAe,CAAC;AAChF,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,MAAU;IACtC,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,IAAI,SAAS,GAAG,KAAK,CAAC;IAEtB,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,IAAS,EAAE,SAAS,EAAE,QAAQ;YACpC,IAAI,GAAG,GAAgB,IAAI,CAAC,QAAQ,CAAC,CAAC;YAEtC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACP,QAAQ,EAAE,CAAA;gBACV,OAAO;YACX,CAAC;YAED,IAAI,MAAM,GAAG,EAAE,CAAC;YAEhB,IAAI,OAAO,EAAE,CAAC;gBACV,MAAM,IAAI,8CAA8C,CAAC;gBACzD,MAAM,IAAI,mEAAmE,CAAC;gBAC9E,OAAO,GAAG,KAAK,CAAC;YACpB,CAAC;YAED,MAAM,IAAI,kBAAkB,CAAC;YAE7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAG,CAAC,GAAG,GAAG,CAAC,MAAM,EAAG,CAAC,EAAE,EAAE,CAAC;gBACpC,IAAI,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1B,IAAI,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1B,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC,CAAA,CAAC;qBAChB,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;oBACrB,IAAI,KAAK,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAE,EAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC3C,MAAM,IAAI,kBAAkB,SAAS,CAAC,KAAK,CAAC,kBAAkB,CAAC;gBACnE,CAAC;qBACI,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBACxB,IAAI,KAAK,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAE,EAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC3C,MAAM,IAAI,6BAA6B,GAAG,KAAK,SAAS,CAAC,KAAK,CAAC,wBAAwB,CAAC;gBAC5F,CAAC;qBACI,CAAC;oBACF,MAAM,IAAI,0BAA0B,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;oBAChF,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,EAAC,KAAK,EAAE,EAAE;wBAClC,MAAM,IAAI,4BAA4B,IAAI,KAAK,SAAS,CAAC,KAAK,CAAC,oBAAoB,CAAC;oBACxF,CAAC,CAAC,CAAC;oBACH,MAAM,IAAI,uBAAuB,CAAC;gBACtC,CAAC;YACL,CAAC;YAED,MAAM,IAAI,mBAAmB,CAAC;YAE9B,MAAM,CAAC,KAAK,CAAC,UAAU,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;YAE9C,QAAQ,CAAC,IAAI,EAAC,MAAM,CAAC,CAAC;QAC1B,CAAC;QACD,KAAK,CAAC,QAAQ;YACV,IAAI,CAAC,OAAO,IAAI,CAAC,SAAS,EAAE,CAAC;gBACzB,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;gBACzB,IAAI,MAAM,GAAG,sBAAsB,CAAC;gBACpC,MAAM,CAAC,KAAK,CAAC,UAAU,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;gBAC9C,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAClB,SAAS,GAAG,IAAI,CAAC;YACrB,CAAC;YACD,QAAQ,EAAE,CAAC;QACf,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,QAAQ;YACjB,QAAQ,CAAC,GAAG,CAAC,CAAC;QAClB,CAAC;KACJ,CAAC,CAAC;AACP,CAAC;AAED,MAAM,UAAU,SAAS,CACvB,KAAyC,EACzC,OAAoC;IAElC,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,EAAE,CAAC;IAErD,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAEtB,uDAAuD;IACvD,0DAA0D;IAC1D,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,uDAAuD,EAAE,EAAE,CAAC,CAAC;IAE3E,6EAA6E;IAC7E,wDAAwD;IACxD,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,0CAA0C,EAAE,OAAO,CAAC,CAAC;IAEnE,kBAAkB;IAClB,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAElD,IAAI,OAAO,EAAE,YAAY,EAAE,CAAC;QACxB,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IAC1D,CAAC;IAED,OAAO,CAAC,CAAC;AACb,CAAC"}
|
package/dist/s3stream.d.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { Readable, Writable } from "stream";
|
|
2
|
-
export declare function
|
|
2
|
+
export declare function s3ReadStream(url: URL, options: {
|
|
3
3
|
range?: string;
|
|
4
4
|
}): Promise<Readable>;
|
|
5
|
-
export declare function
|
|
5
|
+
export declare function s3WriteStream(url: URL, options: {
|
|
6
6
|
partSize?: number;
|
|
7
7
|
}): Promise<Writable>;
|
|
8
|
+
export declare function s3LatestObject(url: URL, opts: any): Promise<URL>;
|
|
8
9
|
//# sourceMappingURL=s3stream.d.ts.map
|
package/dist/s3stream.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"s3stream.d.ts","sourceRoot":"","sources":["../src/s3stream.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"s3stream.d.ts","sourceRoot":"","sources":["../src/s3stream.ts"],"names":[],"mappings":"AAWA,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAa5C,wBAAsB,YAAY,CAAC,GAAG,EAAE,GAAG,EAAE,OAAO,EAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,QAAQ,CAAC,CA0F3F;AAED,wBAAgB,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,OAAO,EAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;CAAC,GAAI,OAAO,CAAC,QAAQ,CAAC,CA+F1F;AAED,wBAAsB,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CA8DtE"}
|
package/dist/s3stream.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { S3Client, GetObjectCommand, PutObjectCommand, UploadPartCommand, CreateMultipartUploadCommand, CompleteMultipartUploadCommand } from "@aws-sdk/client-s3";
|
|
1
|
+
import { S3Client, GetObjectCommand, PutObjectCommand, UploadPartCommand, CreateMultipartUploadCommand, CompleteMultipartUploadCommand, paginateListObjectsV2 } from "@aws-sdk/client-s3";
|
|
2
2
|
import { Readable, Writable } from "stream";
|
|
3
3
|
import log4js from 'log4js';
|
|
4
4
|
const logger = log4js.getLogger();
|
|
5
|
-
export async function
|
|
5
|
+
export async function s3ReadStream(url, options) {
|
|
6
6
|
const config = parseURL(url);
|
|
7
7
|
logger.debug(`s3 config:`, config);
|
|
8
8
|
const bucket = config.bucket;
|
|
@@ -88,7 +88,7 @@ export async function s3ReaderStream(url, options) {
|
|
|
88
88
|
// Unknown body shape
|
|
89
89
|
throw new Error("Unsupported S3 GetObject body type");
|
|
90
90
|
}
|
|
91
|
-
export function
|
|
91
|
+
export function s3WriteStream(url, options) {
|
|
92
92
|
return new Promise((resolve) => {
|
|
93
93
|
const config = parseURL(url);
|
|
94
94
|
logger.debug(`s3 config:`, config);
|
|
@@ -174,6 +174,58 @@ export function s3WriterStream(url, options) {
|
|
|
174
174
|
resolve(writer);
|
|
175
175
|
});
|
|
176
176
|
}
|
|
177
|
+
export async function s3LatestObject(url, opts) {
|
|
178
|
+
if (!url.href.includes("@latest:")) {
|
|
179
|
+
return url;
|
|
180
|
+
}
|
|
181
|
+
const bucket = url.pathname.replaceAll(/@latest:.*/g, "");
|
|
182
|
+
const extension = url.pathname.replaceAll(/.*@latest:/g, "");
|
|
183
|
+
const config = parseURL(url);
|
|
184
|
+
const s3Client = new S3Client(config);
|
|
185
|
+
const paginatorConfig = {
|
|
186
|
+
client: s3Client,
|
|
187
|
+
pageSize: 1000
|
|
188
|
+
};
|
|
189
|
+
const commandInput = {
|
|
190
|
+
Bucket: bucket,
|
|
191
|
+
// Optional: Prefix: 'uploads/'
|
|
192
|
+
};
|
|
193
|
+
try {
|
|
194
|
+
let latestFile = null;
|
|
195
|
+
const targetExt = extension.toLowerCase();
|
|
196
|
+
// Iterate through all pages of the bucket
|
|
197
|
+
for await (const page of paginateListObjectsV2(paginatorConfig, commandInput)) {
|
|
198
|
+
const contents = page.Contents || [];
|
|
199
|
+
for (const obj of contents) {
|
|
200
|
+
// Filter by extension
|
|
201
|
+
if (obj.Key?.toLowerCase().endsWith(targetExt)) {
|
|
202
|
+
// Compare timestamps to keep only the newest
|
|
203
|
+
if (!latestFile || (obj.LastModified > latestFile.LastModified)) {
|
|
204
|
+
latestFile = obj;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
if (!latestFile || !latestFile.Key) {
|
|
210
|
+
throw new Error(`No file with extension "${extension}" found in bucket "${bucket}".`);
|
|
211
|
+
}
|
|
212
|
+
const url_parts = [];
|
|
213
|
+
url_parts.push(url.protocol);
|
|
214
|
+
url_parts.push(':/');
|
|
215
|
+
url_parts.push(url.hostname);
|
|
216
|
+
if (!(url.port === "80" || url.port === "443")) {
|
|
217
|
+
url_parts.push(':');
|
|
218
|
+
url_parts.push(url.port);
|
|
219
|
+
}
|
|
220
|
+
url_parts.push(bucket + '/' + latestFile.Key);
|
|
221
|
+
logger.info(`resolved as ${url_parts.join("")}`);
|
|
222
|
+
return new URL(url_parts.join(""));
|
|
223
|
+
}
|
|
224
|
+
catch (error) {
|
|
225
|
+
console.error("Error finding latest S3 file:", error);
|
|
226
|
+
throw error;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
177
229
|
function isNodeReadable(x) {
|
|
178
230
|
return x && typeof x.pipe === "function" && typeof x.read === "function";
|
|
179
231
|
}
|