marcattacks 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,398 @@
1
+ import { Transform } from "stream";
2
+ import { marcmap , marcForTag, marcsubfields } from '../marcmap.js';
3
+ import { v4 as uuidv4 } from 'uuid';
4
+
5
+ const prefixes = {
6
+ this: 'https://lib.ugent.be/record',
7
+ schema: 'https://schema.org/',
8
+ rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
9
+ genid: 'https://lib.ugent.be/.well-known/genid/',
10
+ owl: 'http://www.w3.org/2002/07/owl#',
11
+ bibo: 'http://purl.org/ontology/bibo/',
12
+ xmlschema: 'https://www.w3.org/2001/XMLSchema#',
13
+ };
14
+
15
+ export async function transform(_opts: any) : Promise<Transform> {
16
+ return new Transform({
17
+ objectMode: true,
18
+ transform(data: any, _encoding, callback) {
19
+ let rec : string[][] = data['record'];
20
+
21
+ if (!rec) return;
22
+
23
+ data['prefixes'] = prefixes;
24
+ data['quads'] = rec2quads(rec);
25
+ callback(null,data);
26
+ }
27
+ });
28
+ }
29
+
30
+ function rec2quads(rec: string[][]) {
31
+ let quads : any[] = [];
32
+
33
+ let id = marcmap(rec,"001",{});
34
+
35
+ if (!id) return;
36
+
37
+ marcForTag(rec, (tag,row) => {
38
+ if (false) {}
39
+ else if (tag === '035') {
40
+ let value = marcsubfields(row,/a/)[0];
41
+
42
+ if (value?.match(/^\(RUG01\)\d+/)) {
43
+ value = 'https://lib.ugent.be/catalog/rug01:' + value.replace(/\(RUG01\)(.*)/,"$1");
44
+ quads.push({
45
+ subject: { value: `${prefixes.this}${id}` },
46
+ predicate: { value: `${prefixes.schema}replaces` },
47
+ object: { value: value },
48
+ });
49
+ }
50
+ else if (value?.match(/^\(BIBLIO\)\d+/)) {
51
+ value = 'https://biblio.ugent.be/record/' + value.replace(/\(BIBLIO\)(.*)/,"$1");
52
+ quads.push({
53
+ subject: { value: `${prefixes.this}${id}` },
54
+ predicate: { value: `${prefixes.schema}workExample` },
55
+ object: { value: value },
56
+ });
57
+ }
58
+ }
59
+ else if (tag === '100') {
60
+ let name = marcsubfields(row,/a/);
61
+
62
+ if (!name) {
63
+ return undefined;
64
+ }
65
+
66
+ let bn = genid();
67
+
68
+ let authorId = marcsubfields(row,/0/)[0];
69
+
70
+ if (authorId?.match(/\(viaf\)\d{2,}/)) {
71
+ bn = "https://viaf.org/viaf/" + authorId.replaceAll(/\(viaf\)/g,'');
72
+ }
73
+
74
+ quads.push({
75
+ subject: { value: `${prefixes.this}${id}` },
76
+ predicate: { value: `${prefixes.schema}author` },
77
+ object: { value: bn },
78
+ });
79
+
80
+ quads.push({
81
+ subject: { value: bn },
82
+ predicate: { value: `${prefixes.rdf}type` },
83
+ object: { value: `${prefixes.schema}Person` },
84
+ });
85
+
86
+ name.forEach( n => {
87
+ quads.push({
88
+ subject: { value: bn },
89
+ predicate: { value: `${prefixes.schema}name` },
90
+ object: { value: strip(n) , type: 'Literal' },
91
+ });
92
+ });
93
+
94
+ let dates = marcsubfields(row,/d/)[0];
95
+
96
+ if (dates && dates.match(/\d{4}-(\d{4})?/)) {
97
+ let parts = dates.split("-",2);
98
+ if (parts[0]) {
99
+ quads.push({
100
+ subject: { value: bn },
101
+ predicate: { value: `${prefixes.schema}birthDate` },
102
+ object: { value: parts[0], type: 'Literal', as: `${prefixes.xmlschema}gYear` },
103
+ });
104
+ }
105
+ if (parts[1]) {
106
+ quads.push({
107
+ subject: { value: bn },
108
+ predicate: { value: `${prefixes.schema}deathDate` },
109
+ object: { value: parts[1], type: 'Literal', as: `${prefixes.xmlschema}gYear` },
110
+ });
111
+ }
112
+ }
113
+ }
114
+ else if (tag === '245') {
115
+ let value = strip( marcsubfields(row,/[ab]/).join(" ") );
116
+ if (value) {
117
+ quads.push({
118
+ subject: { value: `${prefixes.this}${id}` },
119
+ predicate: { value: `${prefixes.schema}name` },
120
+ object: { value , type: 'Literal'}
121
+ });
122
+ }
123
+ }
124
+ else if (tag === '246') {
125
+ let value = strip( marcsubfields(row,/a/).join(" ") );
126
+ if (value) {
127
+ quads.push({
128
+ subject: { value: `${prefixes.this}${id}` },
129
+ predicate: { value: `${prefixes.schema}alternativeHeadline` },
130
+ object: { value , type: 'Literal'}
131
+ });
132
+ }
133
+ }
134
+ else if (tag === '260') {
135
+ let location = strip( marcsubfields(row,/a/).join(" ") );
136
+ let publisher = strip( marcsubfields(row,/b/).join(" ") );
137
+ let date = strip( marcsubfields(row,/c/).join(" ") );
138
+
139
+ if (location) {
140
+ quads.push({
141
+ subject: { value: `${prefixes.this}${id}` },
142
+ predicate: { value: `${prefixes.schema}locationCreated` },
143
+ object: { value: location , type: 'Literal'}
144
+ });
145
+ }
146
+
147
+ if (publisher) {
148
+ quads.push({
149
+ subject: { value: `${prefixes.this}${id}` },
150
+ predicate: { value: `${prefixes.schema}publisher` },
151
+ object: { value: publisher , type: 'Literal'}
152
+ });
153
+ }
154
+
155
+ if (date) {
156
+ if (date.match(/^\d{4}$/)) {
157
+ quads.push({
158
+ subject: { value: `${prefixes.this}${id}` },
159
+ predicate: { value: `${prefixes.schema}datePublished` },
160
+ object: { value: date , type: 'Literal', as: `${prefixes.xmlschema}gYear` }
161
+ });
162
+ }
163
+ else {
164
+ quads.push({
165
+ subject: { value: `${prefixes.this}${id}` },
166
+ predicate: { value: `${prefixes.schema}datePublished` },
167
+ object: { value: date , type: 'Literal'}
168
+ });
169
+ }
170
+ }
171
+ }
172
+ else if (tag === '300') {
173
+ let value = strip( marcsubfields(row,/a/).join(" ") );
174
+ if (value) {
175
+ quads.push({
176
+ subject: { value: `${prefixes.this}${id}` },
177
+ predicate: { value: `${prefixes.schema}numberOfPages` },
178
+ object: { value , type: 'Literal'}
179
+ });
180
+ }
181
+ }
182
+ else if (tag === '340') {
183
+ let value = strip( marcsubfields(row,/a/).join(" ") );
184
+ if (value) {
185
+ quads.push({
186
+ subject: { value: `${prefixes.this}${id}` },
187
+ predicate: { value: `${prefixes.schema}material` },
188
+ object: { value , type: 'Literal'}
189
+ });
190
+ }
191
+ }
192
+ else if (tag === '500') {
193
+ let value = strip( marcsubfields(row,/a/).join(" ") );
194
+ if (value) {
195
+ quads.push({
196
+ subject: { value: `${prefixes.this}${id}` },
197
+ predicate: { value: `${prefixes.schema}description` },
198
+ object: { value , type: 'Literal'}
199
+ });
200
+ }
201
+ }
202
+ else if (tag === '520') {
203
+ let value = strip( marcsubfields(row,/a/).join(" ") );
204
+ if (value) {
205
+ quads.push({
206
+ subject: { value: `${prefixes.this}${id}` },
207
+ predicate: { value: `${prefixes.schema}abstract` },
208
+ object: { value , type: 'Literal'}
209
+ });
210
+ }
211
+ }
212
+ else if (tag === '650') {
213
+ let bn = genid();
214
+
215
+ quads.push({
216
+ subject: { value: `${prefixes.this}${id}` },
217
+ predicate: { value: `${prefixes.schema}about` },
218
+ object: { value: bn }
219
+ });
220
+
221
+ quads.push({
222
+ subject: { value: bn },
223
+ predicate: { value: `${prefixes.rdf}type` },
224
+ object: { value: `${prefixes.schema}Thing`}
225
+ });
226
+
227
+ marcsubfields(row,/a|x/).forEach(v => {
228
+ quads.push({
229
+ subject: { value: bn },
230
+ predicate: { value: `${prefixes.schema}name` },
231
+ object: { value: strip(v) , type: 'Literal'}
232
+ });
233
+ });
234
+ marcsubfields(row,/v/).forEach(v => {
235
+ quads.push({
236
+ subject: { value: bn },
237
+ predicate: { value: `${prefixes.schema}genre` },
238
+ object: { value: strip(v) , type: 'Literal'}
239
+ });
240
+ });
241
+ marcsubfields(row,/y/).forEach(v => {
242
+ quads.push({
243
+ subject: { value: bn },
244
+ predicate: { value: `${prefixes.schema}temporalCoverage` },
245
+ object: { value: strip(v) , type: 'Literal'}
246
+ });
247
+ });
248
+ marcsubfields(row,/z/).forEach(v => {
249
+ quads.push({
250
+ subject: { value: bn },
251
+ predicate: { value: `${prefixes.schema}spatialCoverage` },
252
+ object: { value: strip(v) , type: 'Literal'}
253
+ });
254
+ });
255
+ }
256
+ else if (tag === '856') {
257
+ let value = strip( marcsubfields(row,/u/).join(" ") );
258
+ if (value) {
259
+ let bn = genid();
260
+
261
+ quads.push({
262
+ subject: { value: `${prefixes.this}${id}` },
263
+ predicate: { value: `${prefixes.schema}encoding` },
264
+ object: { value: bn }
265
+ });
266
+
267
+ quads.push({
268
+ subject: { value: bn },
269
+ predicate: { value: `${prefixes.rdf}type` },
270
+ object: { value: `${prefixes.schema}MediaObject`}
271
+ });
272
+
273
+ quads.push({
274
+ subject: { value: bn },
275
+ predicate: { value: `${prefixes.rdf}contentUrl` },
276
+ object: { value }
277
+ });
278
+ }
279
+ }
280
+ else if (tag === '920') {
281
+ let value = strip( marcsubfields(row,/a/).join(" ") );
282
+
283
+ if (!value) return;
284
+
285
+ switch (value) {
286
+ case 'catalog':
287
+ quads.push({
288
+ subject: { value: `${prefixes.this}${id}` },
289
+ predicate: { value: `${prefixes.rdf}type` },
290
+ object: { value: `${prefixes.schema}Book`}
291
+ });
292
+ break;
293
+ case 'correspondence':
294
+ quads.push({
295
+ subject: { value: `${prefixes.this}${id}` },
296
+ predicate: { value: `${prefixes.rdf}type` },
297
+ object: { value: `${prefixes.schema}CreativeWork`}
298
+ });
299
+ quads.push({
300
+ subject: { value: `${prefixes.this}${id}` },
301
+ predicate: { value: `${prefixes.rdf}type` },
302
+ object: { value: `${prefixes.bibo}Letter`}
303
+ });
304
+ break;
305
+ case 'book':
306
+ quads.push({
307
+ subject: { value: `${prefixes.this}${id}` },
308
+ predicate: { value: `${prefixes.rdf}type` },
309
+ object: { value: `${prefixes.schema}Book`}
310
+ });
311
+ break;
312
+ case 'dissertation':
313
+ quads.push({
314
+ subject: { value: `${prefixes.this}${id}` },
315
+ predicate: { value: `${prefixes.rdf}type` },
316
+ object: { value: `${prefixes.schema}Thesis`}
317
+ });
318
+ break;
319
+ case 'ephemera':
320
+ quads.push({
321
+ subject: { value: `${prefixes.this}${id}` },
322
+ predicate: { value: `${prefixes.rdf}type` },
323
+ object: { value: `${prefixes.schema}CreativeWork`}
324
+ });
325
+ quads.push({
326
+ subject: { value: `${prefixes.this}${id}` },
327
+ predicate: { value: `${prefixes.rdf}type` },
328
+ object: { value: `${prefixes.bibo}Document `}
329
+ });
330
+ break;
331
+ case 'image':
332
+ quads.push({
333
+ subject: { value: `${prefixes.this}${id}` },
334
+ predicate: { value: `${prefixes.rdf}type` },
335
+ object: { value: `${prefixes.schema}VisualArtwork`}
336
+ });
337
+ break;
338
+ case 'manuscript':
339
+ quads.push({
340
+ subject: { value: `${prefixes.this}${id}` },
341
+ predicate: { value: `${prefixes.rdf}type` },
342
+ object: { value: `${prefixes.schema}Book`}
343
+ });
344
+ quads.push({
345
+ subject: { value: `${prefixes.this}${id}` },
346
+ predicate: { value: `${prefixes.rdf}type` },
347
+ object: { value: `${prefixes.bibo}Manuscript`}
348
+ });
349
+ break;
350
+ case 'map':
351
+ quads.push({
352
+ subject: { value: `${prefixes.this}${id}` },
353
+ predicate: { value: `${prefixes.rdf}type` },
354
+ object: { value: `${prefixes.schema}Map`}
355
+ });
356
+ break;
357
+ case 'master':
358
+ quads.push({
359
+ subject: { value: `${prefixes.this}${id}` },
360
+ predicate: { value: `${prefixes.rdf}type` },
361
+ object: { value: `${prefixes.schema}Thesis`}
362
+ });
363
+ break;
364
+ case 'periodical':
365
+ quads.push({
366
+ subject: { value: `${prefixes.this}${id}` },
367
+ predicate: { value: `${prefixes.rdf}type` },
368
+ object: { value: `${prefixes.schema}Periodical`}
369
+ });
370
+ break;
371
+ case 'phd':
372
+ quads.push({
373
+ subject: { value: `${prefixes.this}${id}` },
374
+ predicate: { value: `${prefixes.rdf}type` },
375
+ object: { value: `${prefixes.schema}Thesis`}
376
+ });
377
+ break;
378
+ default:
379
+ quads.push({
380
+ subject: { value: `${prefixes.this}${id}` },
381
+ predicate: { value: `${prefixes.rdf}type` },
382
+ object: { value: `${prefixes.schema}CreativeWork`}
383
+ });
384
+ break;
385
+ }
386
+ }
387
+ });
388
+
389
+ return quads;
390
+ }
391
+
392
+ function strip(s: string) : string {
393
+ return s.replaceAll(/\s*[\,.:\/]$/g,'');
394
+ }
395
+
396
+ function genid() : string {
397
+ return `genid:${uuidv4()}`;
398
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ // Visit https://aka.ms/tsconfig to read more about this file
3
+ "compilerOptions": {
4
+ // File Layout
5
+ "rootDir": "./src",
6
+ "outDir": "./dist",
7
+
8
+ // Environment Settings
9
+ // See also https://aka.ms/tsconfig/module
10
+ "module": "nodenext",
11
+ "target": "esnext",
12
+ "types": [
13
+ "node"
14
+ ],
15
+ // For nodejs:
16
+ // "lib": ["esnext"],
17
+ // "types": ["node"],
18
+ // and npm install -D @types/node
19
+
20
+ // Other Outputs
21
+ "sourceMap": true,
22
+ "declaration": true,
23
+ "declarationMap": true,
24
+
25
+ // Stricter Typechecking Options
26
+ "noUncheckedIndexedAccess": true,
27
+ "exactOptionalPropertyTypes": true,
28
+
29
+ // Style Options
30
+ // "noImplicitReturns": true,
31
+ // "noImplicitOverride": true,
32
+ // "noUnusedLocals": true,
33
+ // "noUnusedParameters": true,
34
+ // "noFallthroughCasesInSwitch": true,
35
+ // "noPropertyAccessFromIndexSignature": true,
36
+
37
+ // Recommended Options
38
+ "strict": true,
39
+ "jsx": "react-jsx",
40
+ "verbatimModuleSyntax": true,
41
+ "isolatedModules": true,
42
+ "noUncheckedSideEffectImports": true,
43
+ "moduleDetection": "force",
44
+ "skipLibCheck": true
45
+ }
46
+ }