marcattacks 2.2.18 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -98,6 +98,7 @@ use `s3s://...` for using an SSL layer.
98
98
  - jsonl
99
99
  - marc (ISO2709)
100
100
  - rdf
101
+ - csv
101
102
  - tsv
102
103
  - xml (MARCXML)
103
104
 
@@ -108,6 +109,7 @@ use `s3s://...` for using an SSL layer.
108
109
  - jsonl
109
110
  - parquet
110
111
  - rdf
112
+ - csv
111
113
  - tsv
112
114
  - xml (MARCXML)
113
115
 
@@ -0,0 +1,5 @@
1
+ import { Transform } from "stream";
2
+ export declare function transform(opts: {
3
+ delimiter?: string;
4
+ }): Promise<Transform>;
5
+ //# sourceMappingURL=csv.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv.d.ts","sourceRoot":"","sources":["../../src/input/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAG3D,wBAAsB,SAAS,CAAC,IAAI,EAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAYhF"}
@@ -0,0 +1,14 @@
1
+ import { Transform } from "stream";
2
+ import { parse } from "csv-parse";
3
+ export async function transform(opts) {
4
+ const delimiter = opts['delimiter'] ?? ",";
5
+ const parser = parse({
6
+ delimiter,
7
+ columns: true, // Use first row as header keys
8
+ skip_empty_lines: true,
9
+ trim: false, // Match original behaviour — no implicit trimming
10
+ relax_column_count: false,
11
+ });
12
+ return parser;
13
+ }
14
+ //# sourceMappingURL=csv.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/input/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAC3D,OAAO,EAAE,KAAK,EAAmC,MAAM,WAAW,CAAC;AAEnE,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAA4B;IACxD,MAAM,SAAS,GAAW,IAAI,CAAC,WAAW,CAAC,IAAI,GAAG,CAAC;IAEnD,MAAM,MAAM,GAAG,KAAK,CAAC;QACjB,SAAS;QACT,OAAO,EAAE,IAAI,EAAS,+BAA+B;QACrD,gBAAgB,EAAE,IAAI;QACtB,IAAI,EAAE,KAAK,EAAW,kDAAkD;QACxE,kBAAkB,EAAE,KAAK;KACF,CAAC,CAAC;IAE7B,OAAO,MAA8B,CAAC;AAC1C,CAAC"}
@@ -0,0 +1,6 @@
1
+ import { Transform } from 'stream';
2
+ export declare function transform(opts: {
3
+ header: string;
4
+ delimiter?: string;
5
+ }): Promise<Transform>;
6
+ //# sourceMappingURL=csv.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv.d.ts","sourceRoot":"","sources":["../../src/output/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAMnC,wBAAsB,SAAS,CAAC,IAAI,EAAE;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAqDhG"}
@@ -0,0 +1,51 @@
1
+ import { Transform } from 'stream';
2
+ import { stringify } from 'csv-stringify';
3
+ import log4js from 'log4js';
4
+ const logger = log4js.getLogger();
5
+ export async function transform(opts) {
6
+ const delimiter = opts['delimiter'] ?? ',';
7
+ let stringifier = null;
8
+ let sortedKeys;
9
+ return new Transform({
10
+ objectMode: true,
11
+ transform(data, _encoding, callback) {
12
+ if (Object.keys(data).length === 0) {
13
+ logger.debug('skipped empty record');
14
+ callback();
15
+ return;
16
+ }
17
+ // Initialise the stringifier on the first record
18
+ if (!stringifier) {
19
+ sortedKeys = Object.keys(data).sort();
20
+ stringifier = stringify({
21
+ delimiter,
22
+ columns: sortedKeys,
23
+ header: opts.header !== 'no',
24
+ cast: {
25
+ object: (value) => Array.isArray(value)
26
+ ? `ARRAY[${value.length}]`
27
+ : `HASH[${Object.keys(value).length}]`,
28
+ },
29
+ });
30
+ stringifier.on('data', (chunk) => this.push(chunk));
31
+ stringifier.on('error', (err) => this.destroy(err));
32
+ }
33
+ const record = {};
34
+ for (const key of sortedKeys) {
35
+ record[key] = data[key];
36
+ }
37
+ logger.trace(`writing record with ${sortedKeys.length} fields`);
38
+ stringifier.write(record, _encoding, callback);
39
+ },
40
+ flush(callback) {
41
+ if (stringifier) {
42
+ stringifier.end();
43
+ stringifier.once('finish', callback);
44
+ }
45
+ else {
46
+ callback();
47
+ }
48
+ },
49
+ });
50
+ }
51
+ //# sourceMappingURL=csv.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/output/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACnC,OAAO,EAAE,SAAS,EAAuC,MAAM,eAAe,CAAC;AAC/E,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAA4C;IACxE,MAAM,SAAS,GAAW,IAAI,CAAC,WAAW,CAAC,IAAI,GAAG,CAAC;IAEnD,IAAI,WAAW,GAAwC,IAAI,CAAC;IAC5D,IAAI,UAAoB,CAAC;IAEzB,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,IAAS,EAAE,SAAS,EAAE,QAAQ;YACpC,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjC,MAAM,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;gBACrC,QAAQ,EAAE,CAAC;gBACX,OAAO;YACX,CAAC;YAED,iDAAiD;YACjD,IAAI,CAAC,WAAW,EAAE,CAAC;gBACf,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBAEtC,WAAW,GAAG,SAAS,CAAC;oBACpB,SAAS;oBACT,OAAO,EAAE,UAAU;oBACnB,MAAM,EAAE,IAAI,CAAC,MAAM,KAAK,IAAI;oBAC5B,IAAI,EAAE;wBACF,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CACd,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;4BAChB,CAAC,CAAC,SAAS,KAAK,CAAC,MAAM,GAAG;4BAC1B,CAAC,CAAC,QAAQ,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG;qBACjD;iBAC0B,CAAC,CAAC;gBAEjC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;gBACpD,WAAW,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;YACxD,CAAC;YAED,MAAM,MAAM,GAA4B,EAAE,CAAC;YAC3C,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;gBAC3B,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;YAC5B,CAAC;YAED,MAAM,CAAC,KAAK,CAAC,uBAAuB,UAAU,CAAC,MAAM,SAAS,CAAC,CAAC;YAChE,WAAW,CAAC,KAAK,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;QACnD,CAAC;QAED,KAAK,CAAC,QAAQ;YACV,IAAI,WAAW,EAAE,CAAC;gBACd,WAAW,CAAC,GAAG,EAAE,CAAC;gBAClB,WAAW,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YACzC,CAAC;iBAAM,CAAC;gBACJ,QAAQ,EAAE,CAAC;YACf,CAAC;QACL,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "marcattacks",
3
- "version": "2.2.18",
3
+ "version": "2.3.0",
4
4
  "description": "A powerful streaming MARC21 to RDF converter with JSONata transformation and S3 support.",
5
5
  "keywords": [
6
6
  "marc21",
@@ -62,6 +62,8 @@
62
62
  "@dsnp/parquetjs": "^1.8.7",
63
63
  "@fast-csv/parse": "^5.0.5",
64
64
  "commander": "^14.0.2",
65
+ "csv-parse": "^6.2.1",
66
+ "csv-stringify": "^6.7.0",
65
67
  "dotenv": "^17.2.3",
66
68
  "fast-xml-parser": "^5.3.4",
67
69
  "jsonata": "^2.1.0",
@@ -1,3 +0,0 @@
1
- import { Readable } from "stream";
2
- export declare function parseStream(readable: Readable): Promise<Record<string, string>[]>;
3
- //# sourceMappingURL=csv_parse.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"csv_parse.d.ts","sourceRoot":"","sources":["../../src/util/csv_parse.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAGlC,wBAAsB,WAAW,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CAqBvF"}
@@ -1,22 +0,0 @@
1
- import { Readable } from "stream";
2
- import { parse } from '@fast-csv/parse';
3
- export async function parseStream(readable) {
4
- return new Promise((resolve, reject) => {
5
- const rows = [];
6
- readable
7
- .pipe(parse({
8
- delimiter: "\t",
9
- headers: true,
10
- trim: true,
11
- ignoreEmpty: true,
12
- }))
13
- .on("error", reject)
14
- .on("data", (row) => {
15
- rows.push(row);
16
- })
17
- .on("end", () => {
18
- resolve(rows);
19
- });
20
- });
21
- }
22
- //# sourceMappingURL=csv_parse.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"csv_parse.js","sourceRoot":"","sources":["../../src/util/csv_parse.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAExC,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAkB;IAClD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,IAAI,GAA6B,EAAE,CAAC;QAE1C,QAAQ;aACL,IAAI,CACH,KAAK,CAAC;YACJ,SAAS,EAAE,IAAI;YACf,OAAO,EAAE,IAAI;YACb,IAAI,EAAE,IAAI;YACV,WAAW,EAAE,IAAI;SAClB,CAAC,CACH;aACA,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC;aACnB,EAAE,CAAC,MAAM,EAAE,CAAC,GAA2B,EAAE,EAAE;YAC1C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjB,CAAC,CAAC;aACD,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;YACd,OAAO,CAAC,IAAI,CAAC,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACL,CAAC"}