marcattacks 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ import { Readable } from 'stream';
2
+ import sax from 'sax';
3
+ import log4js from 'log4js';
4
+
5
+ type FieldAttribute = { [key: string]: string; } | { [key: string]: sax.QualifiedAttribute; };
6
+ type SubfieldAttribute = { [key: string]: string; };
7
+ type MARCType = 'leader' | 'control' | 'field' | 'subfield' | undefined;
8
+
9
+ const logger = log4js.getLogger();
10
+
11
+ export function stream2readable(stream: Readable) : Readable {
12
+ let recordNum = 0;
13
+
14
+ let sourcePaused = false;
15
+
16
+ const readableStream = new Readable({
17
+ read() {
18
+ if (sourcePaused) {
19
+ logger.debug("backpressure off");
20
+ stream.resume();
21
+ sourcePaused = false;
22
+ }
23
+ } ,
24
+ objectMode: true
25
+ });
26
+
27
+ const parser = sax.createStream(true);
28
+
29
+ let record : string[][] = [];
30
+ let subfield : string[] = [];
31
+ let attrib : FieldAttribute = {};
32
+ let sattrib : SubfieldAttribute = {};
33
+ let type : MARCType;
34
+ let text : string = '';
35
+
36
+ parser.on('opentag', (node: sax.Tag) => {
37
+ const localName = node.name.replaceAll(/^\w+:/g,'');
38
+
39
+ if (localName === 'collection') {
40
+ // Start collection...
41
+ }
42
+ else if (localName === 'record') {
43
+ // Start record...
44
+ }
45
+ else if (localName === 'leader') {
46
+ type = 'leader';
47
+ }
48
+ else if (localName == 'controlfield') {
49
+ type = 'control';
50
+ attrib = node.attributes
51
+ }
52
+ else if (localName === 'datafield') {
53
+ attrib = node.attributes;
54
+ }
55
+ else if (localName === 'subfield') {
56
+ sattrib = node.attributes;
57
+ }
58
+ else {
59
+ logger.error(`unknown tag: ${node.name}`);
60
+ }
61
+
62
+ text = '';
63
+ });
64
+
65
+ parser.on('text', (t: string) => {
66
+ text += t;
67
+ });
68
+
69
+ parser.on('closetag', (tag: string) => {
70
+ const localName = tag.replaceAll(/^\w+:/g,'');
71
+ if (localName === 'leader') {
72
+ record.push(['LDR',' ',' ','_',text]);
73
+ }
74
+ else if (localName == 'controlfield') {
75
+ let tag = attrib.tag as string;
76
+ record.push([tag,' ',' ','_',text]);
77
+ }
78
+ else if (localName === 'datafield') {
79
+ let tag = attrib.tag as string;
80
+ let ind1 = attrib.ind1 as string;
81
+ let ind2 = attrib.ind2 as string;
82
+ record.push([tag,ind1,ind2].concat(subfield));
83
+ subfield = [];
84
+ }
85
+ else if (localName === 'subfield') {
86
+ let code = sattrib.code as string;
87
+ subfield = subfield.concat([code,text]);
88
+ }
89
+ if (localName === 'record') {
90
+ const ok = readableStream.push({ record });
91
+
92
+ if (!ok) {
93
+ logger.debug("backpressure on");
94
+ stream.pause();
95
+ sourcePaused = true;
96
+ }
97
+
98
+ record = [];
99
+ recordNum++;
100
+
101
+ if (recordNum % 1000 === 0) {
102
+ logger.info(`record: ${recordNum}`);
103
+ }
104
+ }
105
+ });
106
+
107
+ parser.on("error", (err) => {
108
+ logger.error ("Parser error:", err);
109
+ });
110
+
111
+ parser.on('end', () => {
112
+ const ok = readableStream.push(null);
113
+ if (!ok) {
114
+ logger.debug("backpressure on");
115
+ stream.pause();
116
+ sourcePaused = true;
117
+ }
118
+ logger.info(`processed ${recordNum} records`);
119
+ readableStream.destroy();
120
+ });
121
+
122
+ stream.pipe(parser);
123
+
124
+ return readableStream;
125
+ }
package/src/marcmap.ts ADDED
@@ -0,0 +1,94 @@
1
+ interface MARCMapOpts {
2
+ join_char?: string;
3
+ }
4
+
5
+ /**
6
+ * Given a marc row return the marc tag
7
+ */
8
+ export function marctag(row: string[] | undefined) : string {
9
+ if (!row) {
10
+ return "";
11
+ }
12
+ return row[0] ? row[0] : "";
13
+ }
14
+
15
+ /**
16
+ * Given a marc row return an array with the indicators
17
+ */
18
+ export function marcind(row: string[] | undefined) : string[] {
19
+ if (!row) {
20
+ return [" "," "];
21
+ }
22
+ let ind1 = row[1] ?? " ";
23
+ let ind2 = row[2] ?? " ";
24
+ return [ind1,ind2];
25
+ }
26
+
27
+ /**
28
+ * Given a marc and a callback function, call function(tag,row)
29
+ * for each tag
30
+ */
31
+ export function marcForTag(rec: string[][] | undefined , fun: (tag:string, row:string[]) => void) : void {
32
+ if (!rec) return;
33
+ for (let i = 0 ; i < rec.length ; i++) {
34
+ let row = rec[i] ?? [];
35
+ let tag = row[0] ?? "---";
36
+
37
+ if (tag !== undefined && row !== undefined) {
38
+ fun(tag,row);
39
+ }
40
+ }
41
+ }
42
+
43
+ /**
44
+ * Given a marc row and a callback function, call function(code,value)
45
+ * for each subfield
46
+ */
47
+ export function marcForEachSub(row: string[] | undefined , fun: (code:string, value:string) => void) : void {
48
+ if (!row) return;
49
+ for (let i = 3 ; i < row.length ; i +=2) {
50
+ let code = row[i];
51
+ let value = row[i+1];
52
+
53
+ if (code !== undefined && value !== undefined) {
54
+ fun(code, value);
55
+ }
56
+ }
57
+ }
58
+
59
+ /**
60
+ * Given an marc record and a field-path return a string[] with all matching values
61
+ */
62
+ export function marcmap(record: string[][], find: string, opts: MARCMapOpts) : string[] {
63
+ const fullOpts = {
64
+ join_char: opts.join_char ?? " "
65
+ };
66
+
67
+ const results : string[] = [];
68
+
69
+ const tagName = find.substring(0,3);
70
+ const subMatch = find.substring(3) ? find.substring(3).split("").join("|") : ".*";
71
+ const subRegex = new RegExp(`^${subMatch}$`);
72
+
73
+ for (const row of record) {
74
+ if (row[0] === tagName) {
75
+ results.push(marcsubfields(row,subRegex).join(fullOpts.join_char));
76
+ }
77
+ }
78
+
79
+ return results;
80
+ }
81
+
82
+ /**
83
+ * Given a marc row and a code regex, return all matching values
84
+ */
85
+ export function marcsubfields(row: string[], re: RegExp) : string[] {
86
+ const result : string[] = [];
87
+ for (let i = 3 ; i < row.length ; i += 2) {
88
+ if (row[i] !== undefined && row[i]?.match(re) && row[i+1] !== undefined) {
89
+ result.push(row[i+1] as string);
90
+ }
91
+
92
+ }
93
+ return result;
94
+ }
@@ -0,0 +1,48 @@
1
+ import { Readable, Writable } from 'stream';
2
+ import { marcmap } from '../marcmap.js';
3
+ import log4js from 'log4js';
4
+
5
+ const logger = log4js.getLogger();
6
+
7
+ export function readable2writable(readable: Readable, writable: Writable) : void {
8
+ readable.on('data', async (data: any) => {
9
+ let rec : string[][] = data['record'];
10
+
11
+ if (!rec) return;
12
+
13
+ let id = marcmap(rec,"001",{});
14
+
15
+ let output = `${id} FMT L BK\n`;
16
+
17
+ for (let i = 0 ; i < rec.length ; i++) {
18
+ let tag = rec[i]![0];
19
+ let ind1 = rec[i]![1];
20
+ let ind2 = rec[i]![2];
21
+ let sf = "";
22
+
23
+ for (let j = 3; j < rec[i]!.length ; j += 2) {
24
+ let code = rec[i]![j];
25
+ let val = rec[i]![j+1];
26
+ if (tag!.match(/^FMT|LDR|00./g)) {
27
+ sf += `${val}`;
28
+ }
29
+ else {
30
+ sf += `\$\$${code}${val}`;
31
+ }
32
+ }
33
+
34
+ output += `${id} ${tag}${ind1}${ind2} L ${sf}\n`;
35
+ }
36
+
37
+ const ok = writable.write(output);
38
+
39
+ if (!ok) {
40
+ logger.debug("backpressure on");
41
+ readable.pause();
42
+ writable.once("drain", () => {
43
+ logger.debug("backpressure off");
44
+ readable.resume()
45
+ });
46
+ }
47
+ });
48
+ }
@@ -0,0 +1,38 @@
1
+ import { Readable, Writable } from 'stream';
2
+ import log4js from 'log4js';
3
+
4
+ const logger = log4js.getLogger();
5
+
6
+ export function readable2writable(readable: Readable, writable: Writable) : void {
7
+ let isFirst = true;
8
+
9
+ const ok = writable.write("[");
10
+
11
+ readable.on('data', (data: any) => {
12
+ let output = "";
13
+
14
+ if (!isFirst) {
15
+ output += ',';
16
+ }
17
+
18
+ output += JSON.stringify(data);
19
+
20
+ const ok = writable.write(output);
21
+
22
+ if (!ok) {
23
+ logger.debug("backpressure on");
24
+ readable.pause();
25
+ writable.once('drain' , () => {
26
+ logger.debug("backpressure off");
27
+ readable.resume();
28
+ });
29
+ }
30
+
31
+ isFirst = false;
32
+ });
33
+
34
+ readable.on('close', () => {
35
+ writable.write("]");
36
+ writable.end();
37
+ });
38
+ }
@@ -0,0 +1,23 @@
1
+ import { Readable, Writable } from 'stream';
2
+ import log4js from 'log4js';
3
+
4
+ const logger = log4js.getLogger();
5
+
6
+ export function readable2writable(readable: Readable, writable: Writable) : void {
7
+ readable.on('data', (data: any) => {
8
+ const ok = writable.write(JSON.stringify(data) + "\n");
9
+
10
+ if (!ok) {
11
+ logger.debug("backpressure on");
12
+ readable.pause();
13
+ writable.once('drain' , () => {
14
+ logger.debug("backpressure off");
15
+ readable.resume();
16
+ });
17
+ }
18
+ });
19
+
20
+ readable.on('close', () => {
21
+ writable.end();
22
+ });
23
+ }
@@ -0,0 +1,63 @@
1
+ import { Readable, Writable } from 'stream';
2
+ import N3 from 'n3';
3
+
4
+ import log4js from 'log4js';
5
+
6
+ const logger = log4js.getLogger();
7
+
8
+ const { DataFactory } = N3;
9
+ const { namedNode, literal, blankNode } = DataFactory;
10
+
11
+ export function readable2writable(readable: Readable, writable: Writable) : void {
12
+ let writer : N3.Writer;
13
+
14
+ readable.on('data', (data: any) => {
15
+ let prefixes = data['prefixes'];
16
+
17
+ if (!writer) {
18
+ writer = new N3.Writer(writable, { end: false, prefixes });
19
+ }
20
+
21
+ let quads : any[] = data['quads'];
22
+
23
+ if (!quads) return;
24
+
25
+ for (let i = 0 ; i < quads.length ; i++) {
26
+ if (quads[i].subject && quads[i].predicate && quads[i].object) {
27
+ // ok
28
+ }
29
+ else return;
30
+
31
+ let subject = { type: 'NamedNode', value: '', ...quads[i].subject};
32
+ let predicate = { type: 'NamedNode', value: '', ...quads[i].predicate};
33
+ let object = { type: 'NamedNode', value: '', ...quads[i].object};
34
+
35
+ let subjectValue =
36
+ subject.type === 'NamedNode' ? namedNode(subject.value)
37
+ : subject.type === 'BlankNode' ? blankNode(subject.value)
38
+ : namedNode(subject.value);
39
+
40
+ let predicateValue =
41
+ predicate.type === 'NamedNode' ? namedNode(predicate.value)
42
+ : namedNode(predicate.value);
43
+
44
+ let objectValue =
45
+ object.type === 'NamedNode' ? namedNode(object.value)
46
+ : object.type === 'BlankNode' ? blankNode(object.value)
47
+ : object.type === 'Literal' && object.as ? literal(object.value, namedNode(object.as))
48
+ : object.type === 'Literal' ? literal(object.value)
49
+ : namedNode(object.value);
50
+
51
+ writer.addQuad(
52
+ subjectValue,
53
+ predicateValue,
54
+ objectValue
55
+ );
56
+ }
57
+ });
58
+
59
+ readable.on('end', () => {
60
+ writer.end();
61
+ writable.end();
62
+ });
63
+ }
@@ -0,0 +1,84 @@
1
+ import { Readable, Writable } from 'stream';
2
+ import { marcmap, marctag, marcind, marcsubfields , marcForEachSub} from '../marcmap.js';
3
+ import log4js from 'log4js';
4
+
5
+ const logger = log4js.getLogger();
6
+
7
+ export function readable2writable(readable: Readable, writable: Writable) : void {
8
+ let isFirst = true;
9
+
10
+ writable.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
11
+ writable.write("<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\">\n");
12
+
13
+ readable.on('data', (data: any) => {
14
+ let rec : string[][] = data['record'];
15
+
16
+ if (!rec) return;
17
+
18
+ let output = " <marc:record>\n";
19
+
20
+ for (let i = 0 ; i < rec.length ; i++) {
21
+ let tag = marctag(rec[i]);
22
+ let ind = marcind(rec[i]);
23
+ if (tag === 'FMT') {}
24
+ else if (tag === 'LDR') {
25
+ let value = marcsubfields(rec[i]!,/.*/)[0];
26
+ output += ` <marc:leader>${escapeXML(value)}</marc:leader>\n`;
27
+ }
28
+ else if (tag.match(/^00/)) {
29
+ let value = marcsubfields(rec[i]!,/.*/)[0];
30
+ output += ` <marc:controlfield tag="${tag}">${escapeXML(value)}</marc:controlfield>\n`;
31
+ }
32
+ else {
33
+ output += ` <marc:datafield tag="${tag}" ind1="${ind[0]}" ind2="${ind[1]}">\n`;
34
+ marcForEachSub(rec[i], (code,value) => {
35
+ output += ` <marc:subfield code="${code}">${escapeXML(value)}</marc:subfield>\n`;
36
+ });
37
+ output += ` </marc:datafield>\n`;
38
+ }
39
+ }
40
+ output += " </marc:record>\n";
41
+
42
+ const ok = writable.write(output);
43
+
44
+ if (!ok) {
45
+ logger.debug("backpressure on");
46
+ readable.pause();
47
+ writable.once('drain', () => {
48
+ logger.debug("backpressure off");
49
+ readable.resume();
50
+ });
51
+ }
52
+ });
53
+
54
+ readable.on('end', () => {
55
+ writable.write("</marc:collection>\n");
56
+ writable.end();
57
+ });
58
+ }
59
+
60
+ export function escapeXML(
61
+ value: string | number | null | undefined,
62
+ options?: { forAttribute?: boolean }
63
+ ): string {
64
+ if (value === null || value === undefined) return '';
65
+
66
+ let s = String(value);
67
+
68
+ // Remove control chars that are disallowed in XML 1.0:
69
+ // keep tab (0x09), newline (0x0A), carriage return (0x0D)
70
+ s = s.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\uFFFE\uFFFF]/g, '');
71
+
72
+ // Escape ampersand that are NOT part of a valid entity (avoid double-escape)
73
+ // Valid entity patterns: &name; or &#123; or &#x1A;
74
+ s = s.replace(/&(?!(?:[A-Za-z]+|#\d+|#x[0-9A-Fa-f]+);)/g, '&amp;');
75
+
76
+ // Escape the rest
77
+ s = s.replace(/</g, '&lt;').replace(/>/g, '&gt;');
78
+
79
+ if (options?.forAttribute) {
80
+ s = s.replace(/"/g, '&quot;').replace(/'/g, '&apos;');
81
+ }
82
+
83
+ return s;
84
+ }
@@ -0,0 +1,27 @@
1
+ import path from "path";
2
+
3
+ export async function loadPlugin(
4
+ spec: string, type: "input" | "output" | "transform"
5
+ ) {
6
+ // spec can be:
7
+ // - "./local/file.js"
8
+ // - "/absolute/path/to/plugin.js"
9
+ // - "package-plugin"
10
+ // - "package-plugin/submodule"
11
+
12
+ try {
13
+ const resolved = path.resolve(spec);
14
+ return await import(resolved);
15
+ } catch (e1) {
16
+ try {
17
+ const resolved = new URL(`./${type}/${spec}.js`, import.meta.url).href;
18
+ return await import(resolved);
19
+ } catch (e2) {
20
+ const error = new Error(
21
+ `Cannot load plugin: ${spec}. Tried direct import and local plugin directory.`
22
+ );
23
+ error.cause = [e1, e2];
24
+ throw error;
25
+ }
26
+ }
27
+ }