marcattacks 2.2.18 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/input/csv.d.ts +5 -0
- package/dist/input/csv.d.ts.map +1 -0
- package/dist/input/csv.js +14 -0
- package/dist/input/csv.js.map +1 -0
- package/dist/output/csv.d.ts +6 -0
- package/dist/output/csv.d.ts.map +1 -0
- package/dist/output/csv.js +51 -0
- package/dist/output/csv.js.map +1 -0
- package/package.json +3 -1
- package/dist/util/csv_parse.d.ts +0 -3
- package/dist/util/csv_parse.d.ts.map +0 -1
- package/dist/util/csv_parse.js +0 -22
- package/dist/util/csv_parse.js.map +0 -1
package/README.md
CHANGED
|
@@ -98,6 +98,7 @@ use `s3s://...` for using an SSL layer.
|
|
|
98
98
|
- jsonl
|
|
99
99
|
- marc (ISO2709)
|
|
100
100
|
- rdf
|
|
101
|
+
- csv
|
|
101
102
|
- tsv
|
|
102
103
|
- xml (MARCXML)
|
|
103
104
|
|
|
@@ -108,6 +109,7 @@ use `s3s://...` for using an SSL layer.
|
|
|
108
109
|
- jsonl
|
|
109
110
|
- parquet
|
|
110
111
|
- rdf
|
|
112
|
+
- csv
|
|
111
113
|
- tsv
|
|
112
114
|
- xml (MARCXML)
|
|
113
115
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv.d.ts","sourceRoot":"","sources":["../../src/input/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAG3D,wBAAsB,SAAS,CAAC,IAAI,EAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAYhF"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { Transform } from "stream";
|
|
2
|
+
import { parse } from "csv-parse";
|
|
3
|
+
export async function transform(opts) {
|
|
4
|
+
const delimiter = opts['delimiter'] ?? ",";
|
|
5
|
+
const parser = parse({
|
|
6
|
+
delimiter,
|
|
7
|
+
columns: true, // Use first row as header keys
|
|
8
|
+
skip_empty_lines: true,
|
|
9
|
+
trim: false, // Match original behaviour — no implicit trimming
|
|
10
|
+
relax_column_count: false,
|
|
11
|
+
});
|
|
12
|
+
return parser;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=csv.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/input/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAC3D,OAAO,EAAE,KAAK,EAAmC,MAAM,WAAW,CAAC;AAEnE,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAA4B;IACxD,MAAM,SAAS,GAAW,IAAI,CAAC,WAAW,CAAC,IAAI,GAAG,CAAC;IAEnD,MAAM,MAAM,GAAG,KAAK,CAAC;QACjB,SAAS;QACT,OAAO,EAAE,IAAI,EAAS,+BAA+B;QACrD,gBAAgB,EAAE,IAAI;QACtB,IAAI,EAAE,KAAK,EAAW,kDAAkD;QACxE,kBAAkB,EAAE,KAAK;KACF,CAAC,CAAC;IAE7B,OAAO,MAA8B,CAAC;AAC1C,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv.d.ts","sourceRoot":"","sources":["../../src/output/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAMnC,wBAAsB,SAAS,CAAC,IAAI,EAAE;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAqDhG"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { Transform } from 'stream';
|
|
2
|
+
import { stringify } from 'csv-stringify';
|
|
3
|
+
import log4js from 'log4js';
|
|
4
|
+
const logger = log4js.getLogger();
|
|
5
|
+
export async function transform(opts) {
|
|
6
|
+
const delimiter = opts['delimiter'] ?? ',';
|
|
7
|
+
let stringifier = null;
|
|
8
|
+
let sortedKeys;
|
|
9
|
+
return new Transform({
|
|
10
|
+
objectMode: true,
|
|
11
|
+
transform(data, _encoding, callback) {
|
|
12
|
+
if (Object.keys(data).length === 0) {
|
|
13
|
+
logger.debug('skipped empty record');
|
|
14
|
+
callback();
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
// Initialise the stringifier on the first record
|
|
18
|
+
if (!stringifier) {
|
|
19
|
+
sortedKeys = Object.keys(data).sort();
|
|
20
|
+
stringifier = stringify({
|
|
21
|
+
delimiter,
|
|
22
|
+
columns: sortedKeys,
|
|
23
|
+
header: opts.header !== 'no',
|
|
24
|
+
cast: {
|
|
25
|
+
object: (value) => Array.isArray(value)
|
|
26
|
+
? `ARRAY[${value.length}]`
|
|
27
|
+
: `HASH[${Object.keys(value).length}]`,
|
|
28
|
+
},
|
|
29
|
+
});
|
|
30
|
+
stringifier.on('data', (chunk) => this.push(chunk));
|
|
31
|
+
stringifier.on('error', (err) => this.destroy(err));
|
|
32
|
+
}
|
|
33
|
+
const record = {};
|
|
34
|
+
for (const key of sortedKeys) {
|
|
35
|
+
record[key] = data[key];
|
|
36
|
+
}
|
|
37
|
+
logger.trace(`writing record with ${sortedKeys.length} fields`);
|
|
38
|
+
stringifier.write(record, _encoding, callback);
|
|
39
|
+
},
|
|
40
|
+
flush(callback) {
|
|
41
|
+
if (stringifier) {
|
|
42
|
+
stringifier.end();
|
|
43
|
+
stringifier.once('finish', callback);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
callback();
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
//# sourceMappingURL=csv.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/output/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,SAAS,EAAuC,MAAM,eAAe,CAAC;AAC/E,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAA4C;IACxE,MAAM,SAAS,GAAW,IAAI,CAAC,WAAW,CAAC,IAAI,GAAG,CAAC;IAEnD,IAAI,WAAW,GAAwC,IAAI,CAAC;IAC5D,IAAI,UAAoB,CAAC;IAEzB,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,IAAS,EAAE,SAAS,EAAE,QAAQ;YACpC,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjC,MAAM,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;gBACrC,QAAQ,EAAE,CAAC;gBACX,OAAO;YACX,CAAC;YAED,iDAAiD;YACjD,IAAI,CAAC,WAAW,EAAE,CAAC;gBACf,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBAEtC,WAAW,GAAG,SAAS,CAAC;oBACpB,SAAS;oBACT,OAAO,EAAE,UAAU;oBACnB,MAAM,EAAE,IAAI,CAAC,MAAM,KAAK,IAAI;oBAC5B,IAAI,EAAE;wBACF,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CACd,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;4BAChB,CAAC,CAAC,SAAS,KAAK,CAAC,MAAM,GAAG;4BAC1B,CAAC,CAAC,QAAQ,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG;qBACjD;iBAC0B,CAAC,CAAC;gBAEjC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;gBACpD,WAAW,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;YACxD,CAAC;YAED,MAAM,MAAM,GAA4B,EAAE,CAAC;YAC3C,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;gBAC3B,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;YAC5B,CAAC;YAED,MAAM,CAAC,KAAK,CAAC,uBAAuB,UAAU,CAAC,MAAM,SAAS,CAAC,CAAC;YAChE,WAAW,CAAC,KAAK,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;QACnD,CAAC;QAED,KAAK,CAAC,QAAQ;YACV,IAAI,WAAW,EAAE,CAAC;gBACd,WAAW,CAAC,GAAG,EAAE,CAAC;gBAClB,WAAW,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YACzC,CAAC;iBAAM,CAAC;gBACJ,QAAQ,EAAE,CAAC;YACf,CAAC;QACL,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "marcattacks",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.0",
|
|
4
4
|
"description": "A powerful streaming MARC21 to RDF converter with JSONata transformation and S3 support.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"marc21",
|
|
@@ -62,6 +62,8 @@
|
|
|
62
62
|
"@dsnp/parquetjs": "^1.8.7",
|
|
63
63
|
"@fast-csv/parse": "^5.0.5",
|
|
64
64
|
"commander": "^14.0.2",
|
|
65
|
+
"csv-parse": "^6.2.1",
|
|
66
|
+
"csv-stringify": "^6.7.0",
|
|
65
67
|
"dotenv": "^17.2.3",
|
|
66
68
|
"fast-xml-parser": "^5.3.4",
|
|
67
69
|
"jsonata": "^2.1.0",
|
package/dist/util/csv_parse.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"csv_parse.d.ts","sourceRoot":"","sources":["../../src/util/csv_parse.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAGlC,wBAAsB,WAAW,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAqBvF"}
|
package/dist/util/csv_parse.js
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { Readable } from "stream";
|
|
2
|
-
import { parse } from '@fast-csv/parse';
|
|
3
|
-
export async function parseStream(readable) {
|
|
4
|
-
return new Promise((resolve, reject) => {
|
|
5
|
-
const rows = [];
|
|
6
|
-
readable
|
|
7
|
-
.pipe(parse({
|
|
8
|
-
delimiter: "\t",
|
|
9
|
-
headers: true,
|
|
10
|
-
trim: true,
|
|
11
|
-
ignoreEmpty: true,
|
|
12
|
-
}))
|
|
13
|
-
.on("error", reject)
|
|
14
|
-
.on("data", (row) => {
|
|
15
|
-
rows.push(row);
|
|
16
|
-
})
|
|
17
|
-
.on("end", () => {
|
|
18
|
-
resolve(rows);
|
|
19
|
-
});
|
|
20
|
-
});
|
|
21
|
-
}
|
|
22
|
-
//# sourceMappingURL=csv_parse.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"csv_parse.js","sourceRoot":"","sources":["../../src/util/csv_parse.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAExC,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAkB;IAClD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,IAAI,GAA6B,EAAE,CAAC;QAE1C,QAAQ;aACL,IAAI,CACH,KAAK,CAAC;YACJ,SAAS,EAAE,IAAI;YACf,OAAO,EAAE,IAAI;YACb,IAAI,EAAE,IAAI;YACV,WAAW,EAAE,IAAI;SAClB,CAAC,CACH;aACA,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC;aACnB,EAAE,CAAC,MAAM,EAAE,CAAC,GAA2B,EAAE,EAAE;YAC1C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjB,CAAC,CAAC;aACD,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;YACd,OAAO,CAAC,IAAI,CAAC,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACL,CAAC"}
|