@lde/sparql-qlever 0.14.5 → 0.14.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/importer.js CHANGED
@@ -200,8 +200,8 @@ const nativeFormats = new Map([
200
200
  ]);
201
201
  /**
202
202
  * Accepted distribution media types, in preference order: the first match is
203
- * tried first. Native formats win over JSON-LD because they skip the Node-side
204
- * preprocessor.
203
+ * tried first. Native formats win over JSON-LD and RDF/XML because they skip
204
+ * the Node-side preprocessor.
205
205
  *
206
206
  * `application/zip` is intentionally absent — the inner RDF format must be
207
207
  * declared via `mediaType` with `application/zip` appearing only as the
@@ -212,6 +212,7 @@ const acceptedMediaTypes = [
212
212
  'application/n-triples',
213
213
  'text/turtle',
214
214
  'application/ld+json',
215
+ 'application/rdf+xml',
215
216
  ];
216
217
  const defaultQleverIndexOptions = {
217
218
  'ascii-prefixes-only': true,
@@ -9,8 +9,8 @@ export interface PreprocessResult {
9
9
  * Whether a distribution needs Node-side preprocessing before `qlever-index`
10
10
  * can read it.
11
11
  *
12
- * Only JSON-LD distributions return `true`: `qlever-index` cannot parse
13
- * JSON-LD, so we stream it through a JSON-LD parser into N-Quads first.
12
+ * JSON-LD and RDF/XML distributions return `true`: `qlever-index` cannot parse
13
+ * either, so we stream them through `rdf-parse` into N-Quads first.
14
14
  *
15
15
  * Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
16
16
  * `application/gzip` or `application/zip` — go straight through the shell
@@ -20,12 +20,13 @@ export interface PreprocessResult {
20
20
  */
21
21
  export declare function needsPreprocessing(distribution: Distribution): boolean;
22
22
  /**
23
- * Convert a JSON-LD distribution to N-Quads alongside the source file.
23
+ * Convert a JSON-LD or RDF/XML distribution to N-Quads alongside the source
24
+ * file.
24
25
  *
25
26
  * Streams the source through `rdf-parse` → `rdf-serialize` so memory use
26
27
  * stays bounded regardless of input size. Handles gzip transparently
27
28
  * (declared `compressFormat` or `.gz` filename) and zip containers (folds
28
- * each JSON-LD entry into the output stream in order).
29
+ * each matching entry into the output stream in order).
29
30
  *
30
31
  * Cached: if the output is newer than the input, it is reused as-is.
31
32
  */
@@ -1 +1 @@
1
- {"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAmB5C,MAAM,WAAW,gBAAgB;IAC/B,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,IAAI,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAEtE;AAED;;;;;;;;;GASG;AACH,wBAAsB,UAAU,CAC9B,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,gBAAgB,CAAC,CAsB3B"}
1
+ {"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAmC5C,MAAM,WAAW,gBAAgB;IAC/B,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,IAAI,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAKtE;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAC9B,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,gBAAgB,CAAC,CA2B3B"}
@@ -8,16 +8,25 @@ import { finished } from 'node:stream/promises';
8
8
  import { promisify } from 'node:util';
9
9
  import yauzl from 'yauzl';
10
10
  const JSONLD_MIME = 'application/ld+json';
11
+ const RDFXML_MIME = 'application/rdf+xml';
11
12
  const ZIP_MIME = 'application/zip';
12
13
  const GZIP_MIME = 'application/gzip';
13
14
  const GZIP_MIME_LEGACY = 'application/x-gzip';
14
- const JSONLD_ZIP_EXTENSIONS = ['.jsonld', '.json'];
15
+ /**
16
+ * RDF media types that `qlever-index` cannot read natively, so they are streamed
17
+ * through `rdf-parse` → N-Quads first. The map key is the `rdf-parse`
18
+ * `contentType`; the value carries the metadata needed to handle zip containers.
19
+ */
20
+ const preprocessFormats = new Map([
21
+ [JSONLD_MIME, { label: 'JSON-LD', zipExtensions: ['.jsonld', '.json'] }],
22
+ [RDFXML_MIME, { label: 'RDF/XML', zipExtensions: ['.rdf', '.xml', '.owl'] }],
23
+ ]);
15
24
  /**
16
25
  * Whether a distribution needs Node-side preprocessing before `qlever-index`
17
26
  * can read it.
18
27
  *
19
- * Only JSON-LD distributions return `true`: `qlever-index` cannot parse
20
- * JSON-LD, so we stream it through a JSON-LD parser into N-Quads first.
28
+ * JSON-LD and RDF/XML distributions return `true`: `qlever-index` cannot parse
29
+ * either, so we stream them through `rdf-parse` into N-Quads first.
21
30
  *
22
31
  * Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
23
32
  * `application/gzip` or `application/zip` — go straight through the shell
@@ -26,20 +35,26 @@ const JSONLD_ZIP_EXTENSIONS = ['.jsonld', '.json'];
26
35
  * format must be declared.
27
36
  */
28
37
  export function needsPreprocessing(distribution) {
29
- return distribution.mimeType === JSONLD_MIME;
38
+ return (distribution.mimeType !== undefined &&
39
+ preprocessFormats.has(distribution.mimeType));
30
40
  }
31
41
  /**
32
- * Convert a JSON-LD distribution to N-Quads alongside the source file.
42
+ * Convert a JSON-LD or RDF/XML distribution to N-Quads alongside the source
43
+ * file.
33
44
  *
34
45
  * Streams the source through `rdf-parse` → `rdf-serialize` so memory use
35
46
  * stays bounded regardless of input size. Handles gzip transparently
36
47
  * (declared `compressFormat` or `.gz` filename) and zip containers (folds
37
- * each JSON-LD entry into the output stream in order).
48
+ * each matching entry into the output stream in order).
38
49
  *
39
50
  * Cached: if the output is newer than the input, it is reused as-is.
40
51
  */
41
52
  export async function preprocess(localFile, distribution) {
42
- if (!needsPreprocessing(distribution)) {
53
+ const contentType = distribution.mimeType;
54
+ const format = contentType === undefined
55
+ ? undefined
56
+ : preprocessFormats.get(contentType);
57
+ if (contentType === undefined || format === undefined) {
43
58
  throw new Error(`preprocess called for distribution that does not need preprocessing: mediaType=${distribution.mimeType}`);
44
59
  }
45
60
  const outputFile = `${localFile}.preprocessed.nq`;
@@ -49,10 +64,10 @@ export async function preprocess(localFile, distribution) {
49
64
  await rm(outputFile, { force: true });
50
65
  const warnings = [];
51
66
  if (distribution.compressMimeType === ZIP_MIME) {
52
- await streamJsonldZip(localFile, outputFile, warnings);
67
+ await streamRdfZip(localFile, outputFile, contentType, format, warnings);
53
68
  }
54
69
  else {
55
- await streamJsonldFile(localFile, outputFile, distribution);
70
+ await streamRdfFile(localFile, outputFile, contentType, distribution);
56
71
  }
57
72
  return { path: outputFile, format: 'nq', warnings };
58
73
  }
@@ -69,12 +84,12 @@ async function outputIsUpToDate(inputFile, outputFile) {
69
84
  }
70
85
  }
71
86
  /**
72
- * Pipe one JSON-LD source through parse → N-Quads serialize into an already
87
+ * Pipe one RDF source through parse → N-Quads serialize into an already
73
88
  * open writable, without closing it. Back-pressure is handled by Node's
74
89
  * built-in `.pipe()`; the caller manages `output`'s lifecycle.
75
90
  */
76
- async function pipeJsonldToWritable(input, output) {
77
- const quads = rdfParser.parse(input, { contentType: JSONLD_MIME });
91
+ async function pipeRdfToWritable(input, output, contentType) {
92
+ const quads = rdfParser.parse(input, { contentType });
78
93
  const bytes = rdfSerializer.serialize(quads, {
79
94
  contentType: 'application/n-quads',
80
95
  });
@@ -88,7 +103,7 @@ async function closeWritable(output) {
88
103
  output.end();
89
104
  });
90
105
  }
91
- async function streamJsonldFile(localFile, outputFile, distribution) {
106
+ async function streamRdfFile(localFile, outputFile, contentType, distribution) {
92
107
  const isGzipped = distribution.compressMimeType === GZIP_MIME ||
93
108
  distribution.compressMimeType === GZIP_MIME_LEGACY ||
94
109
  localFile.toLowerCase().endsWith('.gz');
@@ -96,14 +111,14 @@ async function streamJsonldFile(localFile, outputFile, distribution) {
96
111
  const input = isGzipped ? source.pipe(createGunzip()) : source;
97
112
  const output = createWriteStream(outputFile);
98
113
  try {
99
- await pipeJsonldToWritable(input, output);
114
+ await pipeRdfToWritable(input, output, contentType);
100
115
  }
101
116
  finally {
102
117
  await closeWritable(output);
103
118
  }
104
119
  }
105
120
  const openZip = promisify(yauzl.open);
106
- async function streamJsonldZip(zipFile, outputFile, warnings) {
121
+ async function streamRdfZip(zipFile, outputFile, contentType, format, warnings) {
107
122
  const zip = await openZip(zipFile, { lazyEntries: true });
108
123
  const output = createWriteStream(outputFile);
109
124
  let entriesProcessed = 0;
@@ -119,13 +134,13 @@ async function streamJsonldZip(zipFile, outputFile, warnings) {
119
134
  return;
120
135
  }
121
136
  const extension = extname(entry.fileName).toLowerCase();
122
- if (!JSONLD_ZIP_EXTENSIONS.includes(extension)) {
123
- warnings.push(`Skipping zip entry ${entry.fileName}: extension ${extension || '(none)'} is not JSON-LD`);
137
+ if (!format.zipExtensions.includes(extension)) {
138
+ warnings.push(`Skipping zip entry ${entry.fileName}: extension ${extension || '(none)'} is not ${format.label}`);
124
139
  zip.readEntry();
125
140
  return;
126
141
  }
127
142
  const stream = await openZipEntry(zip, entry);
128
- await pipeJsonldToWritable(stream, output);
143
+ await pipeRdfToWritable(stream, output, contentType);
129
144
  entriesProcessed++;
130
145
  zip.readEntry();
131
146
  }
@@ -142,7 +157,7 @@ async function streamJsonldZip(zipFile, outputFile, warnings) {
142
157
  await closeWritable(output);
143
158
  }
144
159
  if (entriesProcessed === 0) {
145
- throw new Error(`Zip ${zipFile} contains no JSON-LD entries`);
160
+ throw new Error(`Zip ${zipFile} contains no ${format.label} entries`);
146
161
  }
147
162
  }
148
163
  function openZipEntry(zip, entry) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/sparql-qlever",
3
- "version": "0.14.5",
3
+ "version": "0.14.7",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/sparql-qlever"
@@ -24,9 +24,9 @@
24
24
  "!**/*.tsbuildinfo"
25
25
  ],
26
26
  "dependencies": {
27
- "@lde/dataset": "0.7.4",
28
- "@lde/distribution-downloader": "0.6.2",
29
- "@lde/sparql-importer": "0.6.2",
27
+ "@lde/dataset": "0.7.5",
28
+ "@lde/distribution-downloader": "0.6.3",
29
+ "@lde/sparql-importer": "0.6.3",
30
30
  "@lde/sparql-server": "0.4.11",
31
31
  "@lde/task-runner": "0.2.11",
32
32
  "@lde/task-runner-docker": "0.2.13",
@@ -35,7 +35,7 @@
35
35
  "rdf-parse": "^5.0.0",
36
36
  "rdf-serialize": "^5.1.0",
37
37
  "tslib": "^2.3.0",
38
- "yauzl": "^3.3.1"
38
+ "yauzl": "^3.3.2"
39
39
  },
40
40
  "devDependencies": {
41
41
  "@rdfjs/types": "^2.0.0",