marcattacks 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +25 -13
  2. package/dist/attacker.d.ts +2 -0
  3. package/dist/attacker.d.ts.map +1 -0
  4. package/dist/attacker.js +164 -0
  5. package/dist/attacker.js.map +1 -0
  6. package/dist/filestream.d.ts +4 -0
  7. package/dist/filestream.d.ts.map +1 -0
  8. package/dist/filestream.js +32 -0
  9. package/dist/filestream.js.map +1 -0
  10. package/dist/index.js +13 -175
  11. package/dist/index.js.map +1 -1
  12. package/dist/input/alephseq.d.ts +2 -2
  13. package/dist/input/alephseq.d.ts.map +1 -1
  14. package/dist/input/alephseq.js +46 -76
  15. package/dist/input/alephseq.js.map +1 -1
  16. package/dist/input/json.d.ts +2 -2
  17. package/dist/input/json.d.ts.map +1 -1
  18. package/dist/input/json.js +15 -27
  19. package/dist/input/json.js.map +1 -1
  20. package/dist/input/jsonl.d.ts +2 -2
  21. package/dist/input/jsonl.d.ts.map +1 -1
  22. package/dist/input/jsonl.js +27 -46
  23. package/dist/input/jsonl.js.map +1 -1
  24. package/dist/input/marc.d.ts +2 -2
  25. package/dist/input/marc.d.ts.map +1 -1
  26. package/dist/input/marc.js +18 -37
  27. package/dist/input/marc.js.map +1 -1
  28. package/dist/input/rdf.d.ts +2 -2
  29. package/dist/input/rdf.d.ts.map +1 -1
  30. package/dist/input/rdf.js +23 -10
  31. package/dist/input/rdf.js.map +1 -1
  32. package/dist/{transform/rdf.d.ts → input/tsv.d.ts} +1 -1
  33. package/dist/input/tsv.d.ts.map +1 -0
  34. package/dist/input/tsv.js +45 -0
  35. package/dist/input/tsv.js.map +1 -0
  36. package/dist/input/xml.d.ts +2 -2
  37. package/dist/input/xml.d.ts.map +1 -1
  38. package/dist/input/xml.js +28 -92
  39. package/dist/input/xml.js.map +1 -1
  40. package/dist/output/alephseq.d.ts +1 -1
  41. package/dist/output/alephseq.d.ts.map +1 -1
  42. package/dist/output/alephseq.js +2 -2
  43. package/dist/output/alephseq.js.map +1 -1
  44. package/dist/output/json.d.ts +1 -1
  45. package/dist/output/json.d.ts.map +1 -1
  46. package/dist/output/json.js +2 -2
  47. package/dist/output/json.js.map +1 -1
  48. package/dist/output/jsonl.d.ts +1 -1
  49. package/dist/output/jsonl.d.ts.map +1 -1
  50. package/dist/output/jsonl.js +3 -3
  51. package/dist/output/jsonl.js.map +1 -1
  52. package/dist/output/parquet.d.ts +7 -0
  53. package/dist/output/parquet.d.ts.map +1 -0
  54. package/dist/output/parquet.js +15 -0
  55. package/dist/output/parquet.js.map +1 -0
  56. package/dist/output/rdf.d.ts +1 -1
  57. package/dist/output/rdf.d.ts.map +1 -1
  58. package/dist/output/rdf.js +3 -4
  59. package/dist/output/rdf.js.map +1 -1
  60. package/dist/output/tsv.d.ts +3 -0
  61. package/dist/output/tsv.d.ts.map +1 -0
  62. package/dist/output/tsv.js +37 -0
  63. package/dist/output/tsv.js.map +1 -0
  64. package/dist/output/xml.d.ts +1 -1
  65. package/dist/output/xml.d.ts.map +1 -1
  66. package/dist/output/xml.js +3 -3
  67. package/dist/output/xml.js.map +1 -1
  68. package/dist/s3stream.d.ts +3 -2
  69. package/dist/s3stream.d.ts.map +1 -1
  70. package/dist/s3stream.js +55 -3
  71. package/dist/s3stream.js.map +1 -1
  72. package/dist/sftpstream.d.ts +3 -3
  73. package/dist/sftpstream.d.ts.map +1 -1
  74. package/dist/sftpstream.js +48 -5
  75. package/dist/sftpstream.js.map +1 -1
  76. package/dist/transform/avram.d.ts +6 -0
  77. package/dist/transform/avram.d.ts.map +1 -0
  78. package/dist/transform/avram.js +37 -0
  79. package/dist/transform/avram.js.map +1 -0
  80. package/dist/transform/jsonata.d.ts +1 -1
  81. package/dist/transform/jsonata.d.ts.map +1 -1
  82. package/dist/transform/jsonata.js +9 -6
  83. package/dist/transform/jsonata.js.map +1 -1
  84. package/dist/transform/marc2rdf.d.ts +1 -1
  85. package/dist/transform/marc2rdf.d.ts.map +1 -1
  86. package/dist/transform/marc2rdf.js +4 -2
  87. package/dist/transform/marc2rdf.js.map +1 -1
  88. package/dist/transform/marcids.d.ts +6 -0
  89. package/dist/transform/marcids.d.ts.map +1 -0
  90. package/dist/transform/marcids.js +22 -0
  91. package/dist/transform/marcids.js.map +1 -0
  92. package/dist/transform/notation3.d.ts +1 -1
  93. package/dist/transform/notation3.d.ts.map +1 -1
  94. package/dist/transform/notation3.js +3 -3
  95. package/dist/transform/notation3.js.map +1 -1
  96. package/dist/util/stream_helpers.d.ts +8 -0
  97. package/dist/util/stream_helpers.d.ts.map +1 -1
  98. package/dist/util/stream_helpers.js +77 -3
  99. package/dist/util/stream_helpers.js.map +1 -1
  100. package/package.json +11 -7
  101. package/dist/transform/json.d.ts +0 -3
  102. package/dist/transform/json.d.ts.map +0 -1
  103. package/dist/transform/json.js +0 -32
  104. package/dist/transform/json.js.map +0 -1
  105. package/dist/transform/rdf.d.ts.map +0 -1
  106. package/dist/transform/rdf.js +0 -370
  107. package/dist/transform/rdf.js.map +0 -1
@@ -1,86 +1,56 @@
1
- import { Readable } from "stream";
2
- import * as readline from 'node:readline';
1
+ import { Transform } from "stream";
3
2
  import log4js from 'log4js';
4
3
  const logger = log4js.getLogger();
5
- export async function stream2readable(stream, _opts) {
4
+ export async function transform(_opts) {
6
5
  let recordNum = 0;
7
- let hasError = false;
8
- const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
9
- let sourcePaused = false;
10
- const readableStream = new Readable({
11
- read() {
12
- if (sourcePaused) {
13
- logger.debug("backpressure off");
14
- rl.resume();
15
- sourcePaused = false;
16
- }
17
- },
18
- destroy() {
19
- stream.destroy();
20
- },
21
- objectMode: true
22
- });
23
6
  let rec = [];
24
7
  let previd = "";
25
- rl.on('line', (line) => {
26
- if (hasError)
27
- return;
28
- logger.debug(line);
29
- if (line.match(/^\w+\s[\x20-\x7E]{5}\sL\s.*/u)) {
30
- // ok
31
- }
32
- else {
33
- logger.error(`syntax error in record ${recordNum + 1}`);
34
- hasError = true;
35
- stream.destroy();
36
- rl.close();
37
- readableStream.destroy(new Error(String('parse error')));
38
- return;
39
- }
40
- const [id, ...rest] = line.split(" ");
41
- const data = rest.join(" ");
42
- if (previd && previd !== id) {
43
- const ok = readableStream.push({
44
- record: rec
45
- });
46
- if (!ok) {
47
- logger.debug("backpressure on");
48
- rl.pause();
49
- sourcePaused = true;
8
+ let tail = "";
9
+ return new Transform({
10
+ objectMode: true,
11
+ transform(chunk, _encoding, callback) {
12
+ const data = tail + chunk.toString();
13
+ const lines = data.split(/\r?\n/);
14
+ tail = lines.pop() || "";
15
+ for (const line of lines) {
16
+ if (line.length === 0)
17
+ continue;
18
+ if (!line.match(/^\w+\s[\x20-\x7E]{5}\sL\s.*/u)) {
19
+ const err = new Error(`syntax error in record ${recordNum + 1}`);
20
+ logger.error(err.message);
21
+ return callback(err);
22
+ }
23
+ const [id, ...rest] = line.split(" ");
24
+ const lineData = rest.join(" ");
25
+ if (previd && previd !== id) {
26
+ this.push({ record: rec });
27
+ rec = [];
28
+ recordNum++;
29
+ }
30
+ const tag = lineData?.substring(0, 3);
31
+ const ind1 = lineData?.substring(3, 4);
32
+ const ind2 = lineData?.substring(4, 5);
33
+ const sf = lineData?.substring(8);
34
+ const parts = sf.split(/\$\$(.)/);
35
+ if (tag === 'FMT' || tag === 'LDR' || tag.startsWith("00")) {
36
+ rec.push([tag, ind1, ind2, "_", ...parts]);
37
+ }
38
+ else {
39
+ rec.push([tag, ind1, ind2, ...parts.slice(1)]);
40
+ }
41
+ previd = id;
50
42
  }
51
- rec = [];
52
- recordNum++;
53
- }
54
- const tag = data?.substring(0, 3);
55
- const ind1 = data?.substring(3, 4);
56
- const ind2 = data?.substring(4, 5);
57
- const sf = data?.substring(8);
58
- const parts = sf.split(/\$\$(.)/);
59
- if (tag == 'FMT' || tag === 'LDR' || tag.startsWith("00")) {
60
- rec.push([
61
- tag, ind1, ind2
62
- ].concat(["_"].concat(parts)));
63
- }
64
- else {
65
- rec.push([
66
- tag, ind1, ind2
67
- ].concat(parts.slice(1)));
43
+ callback();
44
+ },
45
+ flush(callback) {
46
+ if (tail) {
47
+ logger.warn("ignoring partial chunk of data: ", tail);
48
+ }
49
+ if (rec.length > 0) {
50
+ this.push({ record: rec });
51
+ }
52
+ callback();
68
53
  }
69
- previd = id;
70
- });
71
- rl.on('error', (error) => {
72
- if (hasError)
73
- return;
74
- logger.error(`readline error ${error}`);
75
- });
76
- rl.on('close', () => {
77
- if (hasError)
78
- return;
79
- readableStream.push({
80
- record: rec
81
- });
82
- readableStream.push(null);
83
54
  });
84
- return readableStream;
85
55
  }
86
56
  //# sourceMappingURL=alephseq.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"alephseq.js","sourceRoot":"","sources":["../../src/input/alephseq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,KAAK,QAAQ,MAAM,eAAe,CAAA;AACzC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAAgB,EAAE,KAAU;IAC9D,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,MAAM,EAAE,GAAG,QAAQ,CAAC,eAAe,CAAC,EAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAC,CAAC,CAAC;IAE1E,IAAI,YAAY,GAAG,KAAK,CAAC;IAEzB,MAAM,cAAc,GAAG,IAAI,QAAQ,CAAC;QAChC,IAAI;YACA,IAAI,YAAY,EAAE,CAAC;gBACf,MAAM,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;gBACjC,EAAE,CAAC,MAAM,EAAE,CAAC;gBACZ,YAAY,GAAG,KAAK,CAAC;YACzB,CAAC;QACL,CAAC;QACD,OAAO;YACH,MAAM,CAAC,OAAO,EAAE,CAAC;QACrB,CAAC;QACD,UAAU,EAAE,IAAI;KACnB,CAAC,CAAC;IAEH,IAAI,GAAG,GAAgB,EAAE,CAAC;IAC1B,IAAI,MAAM,GAAY,EAAE,CAAC;IAEzB,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;QACnB,IAAI,QAAQ;YAAE,OAAO;QAErB,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEnB,IAAI,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,EAAE,CAAC;YAC7C,KAAK;QACT,CAAC;aACI,CAAC;YACF,MAAM,CAAC,KAAK,CAAC,0BAA0B,SAAS,GAAG,CAAC,EAAE,CAAC,CAAC;YACxD,QAAQ,GAAG,IAAI,CAAC;YAChB,MAAM,CAAC,OAAO,EAAE,CAAC;YACjB,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,cAAc,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;YACzD,OAAO;QACX,CAAC;QAED,MAAM,CAAC,EAAE,EAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE5B,IAAI,MAAM,IAAI,MAAM,KAAK,EAAE,EAAE,CAAC;YAC1B,MAAM,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC;gBAC3B,MAAM,EAAG,GAAG;aACf,CAAC,CAAC;YAEH,IAAI,CAAC,EAAE,EAAE,CAAC;gBACN,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;gBAChC,EAAE,CAAC,KAAK,EAAE,CAAC;gBACX,YAAY,GAAG,IAAI,CAAC;YACxB,CAAC;YAED,GAAG,GAAG,EAAE,CAAC;YACT,SAAS,EAAE,CAAC;QAChB,CAAC;QAED,MAAM,GAAG,GAAI,IAAI,EAAE,SAAS,CAAC,CAAC,EAAC,CAAC,CAAC,CAAC;QAClC,MAAM,IAAI,GAAG,IAAI,EAAE,SAAS,CAAC,CAAC,EAAC,CAAC,CAAC,CAAC;QAClC,MAAM,IAAI,GAAG,IAAI,EAAE,SAAS,CAAC,CAAC,EAAC,CAAC,CAAC,CAAC;QAClC,MAAM,EAAE,GAAK,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAElC,IAAI,GAAG,IAAI,KAAK,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACxD,GAAG,CAAC,IAAI,CAAC;gBACL,GAAG,EAAC,IAAI,EAAC,IAAI;aAChB,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC;aACI,CAAC;YACF,GAAG,CAAC,IAAI,CAAC;gBACL,GAAG,EAAC,IAAI,EAAC,IAAI;aAChB,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9B,CAAC;QAED,MAAM,GAAG,EAAG,CAAC;IACjB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;QACrB,IAAI,QAAQ;YAAE,OAAO;QACrB,MAAM,CAAC,KAAK,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;QAChB,IAAI,QAAQ;YAAE,OAAO;QACrB,cAAc,CAAC,IAAI,CAAC;YAChB,MAAM,EAAE,GAAG;SACd,CAAC,CAAC;QACH,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC,CAAC,CAAC;IAEH,OAAO,cAAc,CAAC;AAC1B,CAAC"}
1
+ {"version":3,"file":"alephseq.js","sourceRoot":"","sources":["../../src/input/alephseq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAC3D,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,KAAU;IACtC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,GAAG,GAAe,EAAE,CAAC;IACzB,IAAI,MAAM,GAAW,EAAE,CAAC;IACxB,IAAI,IAAI,GAAG,EAAE,CAAC;IAEd,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAEhB,SAAS,CAAC,KAAU,EAAE,SAAiB,EAAE,QAA2B;YAChE,MAAM,IAAI,GAAG,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC;YACrC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAElC,IAAI,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;YAEzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACvB,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;oBAAE,SAAS;gBAEhC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,EAAE,CAAC;oBAC9C,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,0BAA0B,SAAS,GAAG,CAAC,EAAE,CAAC,CAAC;oBACjE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;oBAC1B,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC;gBACzB,CAAC;gBAED,MAAM,CAAC,EAAE,EAAE,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAEhC,IAAI,MAAM,IAAI,MAAM,KAAK,EAAE,EAAE,CAAC;oBAC1B,IAAI,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;oBAC3B,GAAG,GAAG,EAAE,CAAC;oBACT,SAAS,EAAE,CAAC;gBAChB,CAAC;gBAED,MAAM,GAAG,GAAI,QAAQ,EAAE,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvC,MAAM,IAAI,GAAG,QAAQ,EAAE,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvC,MAAM,IAAI,GAAG,QAAQ,EAAE,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvC,MAAM,EAAE,GAAK,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC;gBACpC,MAAM,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;gBAElC,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzD,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC;gBAC/C,CAAC;qBAAM,CAAC;oBACJ,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACnD,CAAC;gBAED,MAAM,GAAG,EAAG,CAAC;YACjB,CAAC;YAED,QAAQ,EAAE,CAAC;QACf,CAAC;QAED,KAAK,CAAC,QAA2B;YAC7B,IAAI,IAAI,EAAE,CAAC;gBACP,MAAM,CAAC,IAAI,CAAC,kCAAkC,EAAE,IAAI,CAAC,CAAC;YAC1D,CAAC;YAED,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACjB,IAAI,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;YAC/B,CAAC;YACD,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
@@ -1,3 +1,3 @@
1
- import { Readable } from "stream";
2
- export declare function stream2readable(stream: Readable, _opts: any): Promise<Readable>;
1
+ import { Transform } from "stream";
2
+ export declare function transform(_opts: any): Promise<Transform>;
3
3
  //# sourceMappingURL=json.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../src/input/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAMlC,wBAAsB,eAAe,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,GAAI,OAAO,CAAC,QAAQ,CAAC,CAkCtF"}
1
+ {"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../src/input/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAG3D,wBAAsB,SAAS,CAAC,KAAK,EAAE,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CAsB9D"}
@@ -1,34 +1,22 @@
1
- import { Readable } from "stream";
1
+ import { Transform } from "stream";
2
2
  import streamArray from "stream-json/streamers/StreamArray.js";
3
- import log4js from 'log4js';
4
- const logger = log4js.getLogger();
5
- export async function stream2readable(stream, _opts) {
6
- const pipeline = stream.pipe(streamArray.withParser());
7
- let sourcePaused = false;
8
- const readableStream = new Readable({
9
- read() {
10
- if (sourcePaused) {
11
- logger.debug("backpressure off");
12
- pipeline.resume();
13
- sourcePaused = false;
14
- }
3
+ export async function transform(_opts) {
4
+ const jsonParser = streamArray.withParser();
5
+ const transformer = new Transform({
6
+ objectMode: true,
7
+ transform(chunk, encoding, callback) {
8
+ jsonParser.write(chunk, encoding);
9
+ callback();
15
10
  },
16
- destroy() {
17
- stream.destroy();
18
- },
19
- objectMode: true
20
- });
21
- pipeline.on('data', (data) => {
22
- const ok = readableStream.push(data.value);
23
- if (!ok) {
24
- logger.debug("backpressure on");
25
- pipeline.pause();
26
- sourcePaused = true;
11
+ flush(callback) {
12
+ jsonParser.end();
13
+ callback();
27
14
  }
28
15
  });
29
- pipeline.on('end', () => {
30
- readableStream.push(null);
16
+ jsonParser.on('data', (data) => {
17
+ transformer.push(data.value);
31
18
  });
32
- return readableStream;
19
+ jsonParser.on('error', (err) => transformer.destroy(err));
20
+ return transformer;
33
21
  }
34
22
  //# sourceMappingURL=json.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/input/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,WAAW,MAAM,sCAAsC,CAAC;AAC/D,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAAgB,EAAE,KAAU;IAC9D,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,CAAC,CAAC;IAEvD,IAAI,YAAY,GAAG,KAAK,CAAC;IAEzB,MAAM,cAAc,GAAG,IAAI,QAAQ,CAAC;QAChC,IAAI;YACA,IAAI,YAAY,EAAE,CAAC;gBACf,MAAM,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;gBACjC,QAAQ,CAAC,MAAM,EAAE,CAAC;gBAClB,YAAY,GAAG,KAAK,CAAC;YACzB,CAAC;QACL,CAAC;QACD,OAAO;YACH,MAAM,CAAC,OAAO,EAAE,CAAC;QACrB,CAAC;QACD,UAAU,EAAE,IAAI;KACnB,CAAC,CAAC;IAEH,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAS,EAAE,EAAE;QAC9B,MAAM,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAE3C,IAAI,CAAC,EAAE,EAAE,CAAC;YACN,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAA;YAC/B,QAAQ,CAAC,KAAK,EAAE,CAAC;YACjB,YAAY,GAAG,IAAI,CAAC;QACxB,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;QACpB,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC,CAAC,CAAC;IAEH,OAAO,cAAc,CAAC;AAC1B,CAAC"}
1
+ {"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/input/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAC3D,OAAO,WAAW,MAAM,sCAAsC,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,KAAU;IACtC,MAAM,UAAU,GAAG,WAAW,CAAC,UAAU,EAAE,CAAC;IAE5C,MAAM,WAAW,GAAG,IAAI,SAAS,CAAC;QAC9B,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,KAAU,EAAE,QAAgB,EAAE,QAA2B;YAC/D,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YAClC,QAAQ,EAAE,CAAC;QACf,CAAC;QACD,KAAK,CAAC,QAAQ;YACV,UAAU,CAAC,GAAG,EAAE,CAAC;YACjB,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;IAEH,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAS,EAAE,EAAE;QAChC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAQ,EAAE,EAAE,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;IAE/D,OAAO,WAAW,CAAC;AACvB,CAAC"}
@@ -1,3 +1,3 @@
1
- import { Readable } from "stream";
2
- export declare function stream2readable(stream: Readable, _opts: any): Promise<Readable>;
1
+ import { Transform } from "stream";
2
+ export declare function transform(_opts: any): Promise<Transform>;
3
3
  //# sourceMappingURL=jsonl.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"jsonl.d.ts","sourceRoot":"","sources":["../../src/input/jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAMlC,wBAAsB,eAAe,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,GAAI,OAAO,CAAC,QAAQ,CAAC,CAyDtF"}
1
+ {"version":3,"file":"jsonl.d.ts","sourceRoot":"","sources":["../../src/input/jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAK3D,wBAAsB,SAAS,CAAC,KAAK,EAAE,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CA+B9D"}
@@ -1,56 +1,37 @@
1
- import { Readable } from "stream";
2
- import * as readline from 'node:readline';
1
+ import { Transform } from "stream";
3
2
  import log4js from 'log4js';
4
3
  const logger = log4js.getLogger();
5
- export async function stream2readable(stream, _opts) {
4
+ export async function transform(_opts) {
6
5
  let recordNum = 0;
7
- let hasError = false;
8
- const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
9
- let sourcePaused = false;
10
- const readableStream = new Readable({
11
- read() {
12
- if (sourcePaused) {
13
- logger.debug("backpressure off");
14
- rl.resume();
15
- sourcePaused = false;
6
+ let tail = "";
7
+ return new Transform({
8
+ objectMode: true,
9
+ transform(chunk, _encoding, callback) {
10
+ const lines = (tail + chunk.toString()).split(/\r?\n/);
11
+ tail = lines.pop() || "";
12
+ for (const line of lines) {
13
+ if (!line.trim())
14
+ continue;
15
+ try {
16
+ this.push(JSON.parse(line));
17
+ recordNum++;
18
+ }
19
+ catch (error) {
20
+ logger.error(`JSON parse error at line ${recordNum + 1}: ${error}`);
21
+ return callback(error instanceof Error ? error : new Error(String(error)));
22
+ }
16
23
  }
24
+ callback();
17
25
  },
18
- destroy() {
19
- stream.destroy();
20
- },
21
- objectMode: true
22
- });
23
- rl.on('line', (line) => {
24
- if (hasError)
25
- return;
26
- recordNum++;
27
- try {
28
- const ok = readableStream.push(JSON.parse(line));
29
- if (!ok) {
30
- logger.debug("backpressure on");
31
- rl.pause();
32
- sourcePaused = true;
26
+ flush(callback) {
27
+ if (tail.trim()) {
28
+ try {
29
+ this.push(JSON.parse(tail));
30
+ }
31
+ catch (e) { /* ignore trailing whitespace */ }
33
32
  }
34
- recordNum++;
35
- }
36
- catch (error) {
37
- hasError = true;
38
- logger.error(`JSON parse error at line ${recordNum + 1}: ${error}`);
39
- stream.destroy();
40
- rl.close();
41
- readableStream.destroy(error instanceof Error ? error : new Error(String(error)));
33
+ callback();
42
34
  }
43
35
  });
44
- rl.on('error', (error) => {
45
- if (hasError)
46
- return;
47
- logger.error(`readline error ${error}`);
48
- });
49
- rl.on('close', () => {
50
- if (hasError)
51
- return;
52
- readableStream.push(null);
53
- });
54
- return readableStream;
55
36
  }
56
37
  //# sourceMappingURL=jsonl.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"jsonl.js","sourceRoot":"","sources":["../../src/input/jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,KAAK,QAAQ,MAAM,eAAe,CAAA;AACzC,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAAgB,EAAE,KAAU;IAC9D,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,QAAQ,GAAG,KAAK,CAAC;IAErB,MAAM,EAAE,GAAG,QAAQ,CAAC,eAAe,CAAC,EAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAC,CAAC,CAAC;IAE1E,IAAI,YAAY,GAAG,KAAK,CAAC;IAEzB,MAAM,cAAc,GAAG,IAAI,QAAQ,CAAC;QAChC,IAAI;YACA,IAAI,YAAY,EAAE,CAAC;gBACf,MAAM,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;gBACjC,EAAE,CAAC,MAAM,EAAE,CAAC;gBACZ,YAAY,GAAG,KAAK,CAAC;YACzB,CAAC;QACL,CAAC;QACD,OAAO;YACH,MAAM,CAAC,OAAO,EAAE,CAAC;QACrB,CAAC;QACD,UAAU,EAAE,IAAI;KACnB,CAAC,CAAC;IAEH,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;QACnB,IAAI,QAAQ;YAAE,OAAO;QAErB,SAAS,EAAE,CAAC;QAEZ,IAAI,CAAC;YACD,MAAM,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;YAEjD,IAAI,CAAC,EAAE,EAAE,CAAC;gBACN,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;gBAChC,EAAE,CAAC,KAAK,EAAE,CAAC;gBACX,YAAY,GAAG,IAAI,CAAC;YACxB,CAAC;YAED,SAAS,EAAE,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,QAAQ,GAAG,IAAI,CAAC;YAChB,MAAM,CAAC,KAAK,CAAC,4BAA4B,SAAS,GAAG,CAAC,KAAK,KAAK,EAAE,CAAC,CAAC;YACpE,MAAM,CAAC,OAAO,EAAE,CAAC;YACjB,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,cAAc,CAAC,OAAO,CAAC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACtF,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;QACrB,IAAI,QAAQ;YAAE,OAAO;QACrB,MAAM,CAAC,KAAK,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;QAChB,IAAI,QAAQ;YAAE,OAAO;QACrB,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC,CAAC,CAAC;IAEH,OAAO,cAAc,CAAC;AAC1B,CAAC"}
1
+ {"version":3,"file":"jsonl.js","sourceRoot":"","sources":["../../src/input/jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAC3D,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,KAAU;IACtC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,IAAI,GAAG,EAAE,CAAC;IAEd,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,KAAU,EAAE,SAAiB,EAAG,QAA2B;YACjE,MAAM,KAAK,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACvD,IAAI,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;YAEzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACvB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;oBAAE,SAAS;gBAC3B,IAAI,CAAC;oBACD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;oBAC5B,SAAS,EAAE,CAAC;gBAChB,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACb,MAAM,CAAC,KAAK,CAAC,4BAA4B,SAAS,GAAG,CAAC,KAAK,KAAK,EAAE,CAAC,CAAC;oBACpE,OAAO,QAAQ,CAAC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/E,CAAC;YACL,CAAC;YACD,QAAQ,EAAE,CAAC;QACf,CAAC;QACD,KAAK,CAAC,QAAQ;YACV,IAAI,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;gBACd,IAAI,CAAC;oBACD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;gBAChC,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC,CAAC,gCAAgC,CAAC,CAAC;YACpD,CAAC;YACD,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
@@ -1,3 +1,3 @@
1
- import { Readable } from "stream";
2
- export declare function stream2readable(stream: Readable, _opts: any): Promise<Readable>;
1
+ import { Transform } from "stream";
2
+ export declare function transform(_opts: any): Promise<Transform>;
3
3
  //# sourceMappingURL=marc.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"marc.d.ts","sourceRoot":"","sources":["../../src/input/marc.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAMlC,wBAAsB,eAAe,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,GAAI,OAAO,CAAC,QAAQ,CAAC,CA4EtF"}
1
+ {"version":3,"file":"marc.d.ts","sourceRoot":"","sources":["../../src/input/marc.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAM3D,wBAAsB,SAAS,CAAC,KAAK,EAAE,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CAgD9D"}
@@ -1,30 +1,21 @@
1
- import { Readable } from "stream";
1
+ import { Transform } from "stream";
2
2
  import { Marc } from "marcjs";
3
3
  import log4js from 'log4js';
4
4
  const logger = log4js.getLogger();
5
- export async function stream2readable(stream, _opts) {
6
- let recordNum = 0;
7
- let hasError = false;
8
- let sourcePaused = false;
5
+ export async function transform(_opts) {
9
6
  const parser = Marc.createStream('Iso2709', 'Parser');
10
- const readableStream = new Readable({
11
- read() {
12
- if (sourcePaused) {
13
- logger.debug("backpressure off");
14
- parser.resume();
15
- sourcePaused = false;
16
- }
17
- },
18
- destroy() {
19
- stream.destroy();
7
+ const transformer = new Transform({
8
+ objectMode: true,
9
+ transform(chunk, _encoding, callback) {
10
+ parser.write(chunk); //
11
+ callback();
20
12
  },
21
- objectMode: true
13
+ flush(callback) {
14
+ parser.end(); //
15
+ callback();
16
+ }
22
17
  });
23
18
  parser.on('data', (record) => {
24
- recordNum++;
25
- if (recordNum % 1000 === 0) {
26
- logger.info(`record: ${recordNum}`);
27
- }
28
19
  let rec = [];
29
20
  rec.push(['LDR', ' ', ' ', '_', record.leader]);
30
21
  for (const field of record.fields) {
@@ -33,35 +24,25 @@ export async function stream2readable(stream, _opts) {
33
24
  const ind1 = ' ';
34
25
  const ind2 = ' ';
35
26
  const data = field.slice(1);
36
- rec.push([tag, ind1, ind2].concat(data));
27
+ rec.push([tag, ind1, ind2].concat(data)); //
37
28
  }
38
29
  else if (field.length > 3) {
39
30
  const ind1 = field[1].charAt(0);
40
31
  const ind2 = field[1].charAt(1);
41
32
  const data = field.slice(2);
42
- rec.push([tag, ind1, ind2].concat(data));
33
+ rec.push([tag, ind1, ind2].concat(data)); //
43
34
  }
44
35
  else {
45
- logger.warn("marc error: ", field);
36
+ logger.warn("marc error: ", field); //
46
37
  }
47
38
  }
48
- const ok = readableStream.push({
39
+ transformer.push({
49
40
  record: rec
50
41
  });
51
- if (!ok) {
52
- logger.debug("backpressure on");
53
- parser.pause();
54
- sourcePaused = true;
55
- }
56
42
  });
57
- parser.on('close', () => {
58
- if (recordNum % 1000 === 0) {
59
- logger.info(`record: ${recordNum}`);
60
- }
61
- readableStream.push(null);
62
- logger.info(`processed ${recordNum} records`);
43
+ parser.on('error', (err) => {
44
+ transformer.destroy(err);
63
45
  });
64
- stream.pipe(parser);
65
- return readableStream;
46
+ return transformer;
66
47
  }
67
48
  //# sourceMappingURL=marc.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"marc.js","sourceRoot":"","sources":["../../src/input/marc.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAC;AAC9B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAAgB,EAAE,KAAU;IAC9D,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,YAAY,GAAG,KAAK,CAAC;IAEzB,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAEtD,MAAM,cAAc,GAAG,IAAI,QAAQ,CAAC;QAChC,IAAI;YACA,IAAI,YAAY,EAAE,CAAC;gBACf,MAAM,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;gBACjC,MAAM,CAAC,MAAM,EAAE,CAAC;gBAChB,YAAY,GAAG,KAAK,CAAC;YACzB,CAAC;QACL,CAAC;QACD,OAAO;YACH,MAAM,CAAC,OAAO,EAAE,CAAC;QACrB,CAAC;QACD,UAAU,EAAE,IAAI;KACnB,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,EAAE;QACzB,SAAS,EAAE,CAAC;QAEZ,IAAI,SAAS,GAAG,IAAI,KAAK,CAAC,EAAE,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC;QACxC,CAAC;QAED,IAAI,GAAG,GAAgB,EAAE,CAAC;QAE1B,GAAG,CAAC,IAAI,CAAC,CAAE,KAAK,EAAG,GAAG,EAAG,GAAG,EAAG,GAAG,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QAEpD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,GAAG,GAAI,KAAK,CAAC,CAAC,CAAC,CAAC;YAEtB,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5C,MAAM,IAAI,GAAG,GAAG,CAAC;gBACjB,MAAM,IAAI,GAAG,GAAG,CAAC;gBACjB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAE5B,GAAG,CAAC,IAAI,CAAC,CAAE,GAAG,EAAG,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;YAC/C,CAAC;iBACI,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBAChC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBAChC,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAE5B,GAAG,CAAC,IAAI,CAAC,CAAE,GAAG,EAAG,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;YAC/C,CAAC;iBACI,CAAC;gBACF,MAAM,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;YACvC,CAAC;QACL,CAAC;QAED,MAAM,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC;YAC3B,MAAM,EAAG,GAAG;SACf,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,EAAE,CAAC;YACN,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YAChC,MAAM,CAAC,KAAK,EAAE,CAAC;YACf,YAAY,GAAG,IAAI,CAAC;QACxB,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;QACpB,IAAI,SAAS,GAAG,IAAI,KAAK,CAAC,EAAE,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC;QACxC,CAAC;QACD,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,MAAM,CAAC,IAAI,CAAC,aAAa,SAAS,UAAU,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEpB,OAAO,cAAc,CAAC;AAC1B,CAAC"}
1
+ {"version":3,"file":"marc.js","sourceRoot":"","sources":["../../src/input/marc.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAC;AAC9B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,KAAU;IACtC,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAEtD,MAAM,WAAW,GAAG,IAAI,SAAS,CAAC;QAC9B,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,KAAU,EAAE,SAAiB,EAAE,QAA2B;YAChE,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;YACvB,QAAQ,EAAE,CAAC;QACf,CAAC;QACD,KAAK,CAAC,QAA2B;YAC7B,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE;YAChB,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,EAAE;QACzB,IAAI,GAAG,GAAe,EAAE,CAAC;QAEzB,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QAEhD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAErB,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5C,MAAM,IAAI,GAAG,GAAG,CAAC;gBACjB,MAAM,IAAI,GAAG,GAAG,CAAC;gBACjB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC5B,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE;YAChD,CAAC;iBAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBAChC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBAChC,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC5B,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE;YAChD,CAAC;iBAAM,CAAC;gBACJ,MAAM,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE;YAC1C,CAAC;QACL,CAAC;QAED,WAAW,CAAC,IAAI,CAAC;YACb,MAAM,EAAE,GAAG;SACd,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;QACvB,WAAW,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,OAAO,WAAW,CAAC;AACvB,CAAC"}
@@ -1,3 +1,3 @@
1
- import { Readable } from 'stream';
2
- export declare function stream2readable(stream: Readable, opts: any): Promise<Readable>;
1
+ import { Transform } from 'stream';
2
+ export declare function transform(opts: any): Promise<Transform>;
3
3
  //# sourceMappingURL=rdf.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"rdf.d.ts","sourceRoot":"","sources":["../../src/input/rdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AASlC,wBAAsB,eAAe,CAAC,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,GAAG,GAAI,OAAO,CAAC,QAAQ,CAAC,CAcrF"}
1
+ {"version":3,"file":"rdf.d.ts","sourceRoot":"","sources":["../../src/input/rdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAO3D,wBAAsB,SAAS,CAAC,IAAI,EAAE,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CA8B7D"}
package/dist/input/rdf.js CHANGED
@@ -1,18 +1,31 @@
1
- import { Readable } from 'stream';
2
- import { rdfParser } from "rdf-parse";
1
+ import { Transform } from 'stream';
3
2
  import { parseStream } from "../util/rdf_parse.js";
4
3
  import log4js from 'log4js';
5
4
  const logger = log4js.getLogger();
6
- export async function stream2readable(stream, opts) {
7
- const readableStream = new Readable({
5
+ export async function transform(opts) {
6
+ const chunks = []; // Collect chunks in an array instead of a stream
7
+ return new Transform({
8
8
  objectMode: true,
9
- destroy() {
10
- stream.destroy();
9
+ transform(chunk, encoding, callback) {
10
+ logger.debug('chunk received');
11
+ chunks.push(chunk);
12
+ callback();
13
+ },
14
+ async flush(callback) {
15
+ try {
16
+ logger.debug('flush started');
17
+ const fullBuffer = Buffer.concat(chunks);
18
+ const { Readable } = await import('stream');
19
+ const finalStream = Readable.from(fullBuffer);
20
+ const record = await parseStream(finalStream, opts.path.href);
21
+ this.push(record);
22
+ callback();
23
+ }
24
+ catch (error) {
25
+ logger.error(`RDF parsing error: ${error}`);
26
+ callback(error instanceof Error ? error : new Error(String(error)));
27
+ }
11
28
  }
12
29
  });
13
- const record = await parseStream(stream, opts.path.href);
14
- readableStream.push(record);
15
- readableStream.push(null);
16
- return readableStream;
17
30
  }
18
31
  //# sourceMappingURL=rdf.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"rdf.js","sourceRoot":"","sources":["../../src/input/rdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAAgB,EAAE,IAAS;IAC7D,MAAM,cAAc,GAAG,IAAI,QAAQ,CAAC;QAChC,UAAU,EAAE,IAAI;QAChB,OAAO;YACH,MAAM,CAAC,OAAO,EAAE,CAAC;QACrB,CAAC;KACJ,CAAC,CAAC;IAEH,MAAM,MAAM,GAAY,MAAM,WAAW,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAElE,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC5B,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE1B,OAAO,cAAc,CAAC;AAC1B,CAAC"}
1
+ {"version":3,"file":"rdf.js","sourceRoot":"","sources":["../../src/input/rdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAE3D,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAS;IACrC,MAAM,MAAM,GAAU,EAAE,CAAC,CAAC,iDAAiD;IAE3E,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAEhB,SAAS,CAAC,KAAU,EAAE,QAAgB,EAAE,QAA2B;YAC/D,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YAC/B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnB,QAAQ,EAAE,CAAC;QACf,CAAC;QAED,KAAK,CAAC,KAAK,CAAC,QAA2B;YACnC,IAAI,CAAC;gBACD,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;gBAC9B,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;gBAEzC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;gBAC5C,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;gBAE9C,MAAM,MAAM,GAAW,MAAM,WAAW,CAAC,WAAW,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAEtE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAClB,QAAQ,EAAE,CAAC;YACf,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACb,MAAM,CAAC,KAAK,CAAC,sBAAsB,KAAK,EAAE,CAAC,CAAC;gBAC5C,QAAQ,CAAC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACxE,CAAC;QACL,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
@@ -1,3 +1,3 @@
1
1
  import { Transform } from "stream";
2
2
  export declare function transform(_opts: any): Promise<Transform>;
3
- //# sourceMappingURL=rdf.d.ts.map
3
+ //# sourceMappingURL=tsv.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tsv.d.ts","sourceRoot":"","sources":["../../src/input/tsv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAK3D,wBAAsB,SAAS,CAAC,KAAK,EAAE,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CAgD9D"}
@@ -0,0 +1,45 @@
1
+ import { Transform } from "stream";
2
+ import log4js from 'log4js';
3
+ const logger = log4js.getLogger();
4
+ export async function transform(_opts) {
5
+ let recordNum = 0;
6
+ let tail = "";
7
+ let keys;
8
+ return new Transform({
9
+ objectMode: true,
10
+ transform(chunk, _encoding, callback) {
11
+ const lines = (tail + chunk.toString()).split(/\r?\n/);
12
+ tail = lines.pop() || "";
13
+ for (const line of lines) {
14
+ recordNum++;
15
+ if (!line.trim())
16
+ continue;
17
+ const fields = line.split("\t");
18
+ if (!keys) {
19
+ keys = fields;
20
+ continue;
21
+ }
22
+ if (keys.length != fields.length) {
23
+ logger.error(`Error on line ${recordNum}, unexpected columns`);
24
+ continue;
25
+ }
26
+ let data = {};
27
+ for (let i = 0; i < keys.length; i++) {
28
+ data[keys[i]] = fields[i];
29
+ }
30
+ this.push(data);
31
+ }
32
+ callback();
33
+ },
34
+ flush(callback) {
35
+ if (tail.trim()) {
36
+ try {
37
+ this.push(JSON.parse(tail));
38
+ }
39
+ catch (e) { /* ignore trailing whitespace */ }
40
+ }
41
+ callback();
42
+ }
43
+ });
44
+ }
45
+ //# sourceMappingURL=tsv.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tsv.js","sourceRoot":"","sources":["../../src/input/tsv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAC3D,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,KAAU;IACtC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,IAAI,IAAe,CAAC;IAEpB,OAAO,IAAI,SAAS,CAAC;QACjB,UAAU,EAAE,IAAI;QAChB,SAAS,CAAC,KAAU,EAAE,SAAiB,EAAG,QAA2B;YACjE,MAAM,KAAK,GAAG,CAAC,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACvD,IAAI,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;YAEzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACvB,SAAS,EAAE,CAAC;gBAEZ,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;oBAAE,SAAS;gBAE3B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAEhC,IAAI,CAAC,IAAI,EAAE,CAAC;oBACR,IAAI,GAAG,MAAM,CAAC;oBACd,SAAS;gBACb,CAAC;gBAED,IAAI,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;oBAC/B,MAAM,CAAC,KAAK,CAAC,iBAAiB,SAAS,sBAAsB,CAAC,CAAC;oBAC/D,SAAS;gBACb,CAAC;gBAED,IAAI,IAAI,GAAS,EAAE,CAAC;gBAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAG,CAAC,GAAG,IAAI,CAAC,MAAM,EAAG,CAAC,EAAE,EAAE,CAAC;oBACrC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;gBAC/B,CAAC;gBAED,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACpB,CAAC;YAED,QAAQ,EAAE,CAAC;QACf,CAAC;QACD,KAAK,CAAC,QAAQ;YACV,IAAI,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;gBACd,IAAI,CAAC;oBACD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;gBAChC,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC,CAAC,gCAAgC,CAAC,CAAC;YACpD,CAAC;YACD,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;AACP,CAAC"}
@@ -1,3 +1,3 @@
1
- import { Readable } from 'stream';
2
- export declare function stream2readable(stream: Readable): Promise<Readable>;
1
+ import { Transform } from 'stream';
2
+ export declare function transform(_opts: any): Promise<Transform>;
3
3
  //# sourceMappingURL=xml.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"xml.d.ts","sourceRoot":"","sources":["../../src/input/xml.ts"],"names":[],"mappings":"AAAA,OAAO,EAAY,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAU5C,wBAAsB,eAAe,CAAC,MAAM,EAAE,QAAQ,GAAI,OAAO,CAAC,QAAQ,CAAC,CAgJ1E"}
1
+ {"version":3,"file":"xml.d.ts","sourceRoot":"","sources":["../../src/input/xml.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAA0B,MAAM,QAAQ,CAAC;AAS3D,wBAAsB,SAAS,CAAC,KAAK,EAAE,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CAqE9D"}