@lde/sparql-qlever 0.14.7 → 0.14.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/importer.js +4 -2
- package/dist/preprocess.d.ts +5 -5
- package/dist/preprocess.d.ts.map +1 -1
- package/dist/preprocess.js +35 -31
- package/package.json +6 -6
package/dist/importer.js
CHANGED
|
@@ -200,8 +200,9 @@ const nativeFormats = new Map([
|
|
|
200
200
|
]);
|
|
201
201
|
/**
|
|
202
202
|
* Accepted distribution media types, in preference order: the first match is
|
|
203
|
-
* tried first. Native formats win over JSON-LD
|
|
204
|
-
* the Node-side preprocessor
|
|
203
|
+
* tried first. Native formats win over JSON-LD, RDF/XML and TriG because they
|
|
204
|
+
* skip the Node-side preprocessor; TriG comes last so a streaming native dump
|
|
205
|
+
* is always preferred when a dataset offers both.
|
|
205
206
|
*
|
|
206
207
|
* `application/zip` is intentionally absent — the inner RDF format must be
|
|
207
208
|
* declared via `mediaType` with `application/zip` appearing only as the
|
|
@@ -213,6 +214,7 @@ const acceptedMediaTypes = [
|
|
|
213
214
|
'text/turtle',
|
|
214
215
|
'application/ld+json',
|
|
215
216
|
'application/rdf+xml',
|
|
217
|
+
'application/trig',
|
|
216
218
|
];
|
|
217
219
|
const defaultQleverIndexOptions = {
|
|
218
220
|
'ascii-prefixes-only': true,
|
package/dist/preprocess.d.ts
CHANGED
|
@@ -9,8 +9,8 @@ export interface PreprocessResult {
|
|
|
9
9
|
* Whether a distribution needs Node-side preprocessing before `qlever-index`
|
|
10
10
|
* can read it.
|
|
11
11
|
*
|
|
12
|
-
* JSON-LD
|
|
13
|
-
*
|
|
12
|
+
* JSON-LD, RDF/XML and TriG distributions return `true`: `qlever-index` cannot
|
|
13
|
+
* parse any of them, so we stream them through `rdf-parse` into N-Quads first.
|
|
14
14
|
*
|
|
15
15
|
* Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
|
|
16
16
|
* `application/gzip` or `application/zip` — go straight through the shell
|
|
@@ -20,13 +20,13 @@ export interface PreprocessResult {
|
|
|
20
20
|
*/
|
|
21
21
|
export declare function needsPreprocessing(distribution: Distribution): boolean;
|
|
22
22
|
/**
|
|
23
|
-
* Convert a JSON-LD
|
|
24
|
-
* file.
|
|
23
|
+
* Convert a JSON-LD, RDF/XML or TriG distribution to N-Quads alongside the
|
|
24
|
+
* source file.
|
|
25
25
|
*
|
|
26
26
|
* Streams the source through `rdf-parse` → `rdf-serialize` so memory use
|
|
27
27
|
* stays bounded regardless of input size. Handles gzip transparently
|
|
28
28
|
* (declared `compressFormat` or `.gz` filename) and zip containers (folds
|
|
29
|
-
*
|
|
29
|
+
* every parseable entry into the output stream in order).
|
|
30
30
|
*
|
|
31
31
|
* Cached: if the output is newer than the input, it is reused as-is.
|
|
32
32
|
*/
|
package/dist/preprocess.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AA6B5C,MAAM,WAAW,gBAAgB;IAC/B,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,IAAI,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAKtE;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAC9B,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,gBAAgB,CAAC,CAyB3B"}
|
package/dist/preprocess.js
CHANGED
|
@@ -3,30 +3,31 @@ import { rdfSerializer } from 'rdf-serialize';
|
|
|
3
3
|
import { createGunzip } from 'node:zlib';
|
|
4
4
|
import { createReadStream, createWriteStream } from 'node:fs';
|
|
5
5
|
import { rm, stat } from 'node:fs/promises';
|
|
6
|
-
import { extname } from 'node:path';
|
|
7
6
|
import { finished } from 'node:stream/promises';
|
|
8
7
|
import { promisify } from 'node:util';
|
|
9
8
|
import yauzl from 'yauzl';
|
|
10
9
|
const JSONLD_MIME = 'application/ld+json';
|
|
11
10
|
const RDFXML_MIME = 'application/rdf+xml';
|
|
11
|
+
const TRIG_MIME = 'application/trig';
|
|
12
12
|
const ZIP_MIME = 'application/zip';
|
|
13
13
|
const GZIP_MIME = 'application/gzip';
|
|
14
14
|
const GZIP_MIME_LEGACY = 'application/x-gzip';
|
|
15
15
|
/**
|
|
16
|
-
* RDF media types
|
|
17
|
-
*
|
|
18
|
-
*
|
|
16
|
+
* RDF media types `qlever-index` cannot read natively, keyed by `rdf-parse`
|
|
17
|
+
* `contentType` with a label for warnings. TriG belongs here too: its
|
|
18
|
+
* `<graph> { … }` blocks parse as neither N-Quads nor Turtle.
|
|
19
19
|
*/
|
|
20
20
|
const preprocessFormats = new Map([
|
|
21
|
-
[JSONLD_MIME,
|
|
22
|
-
[RDFXML_MIME,
|
|
21
|
+
[JSONLD_MIME, 'JSON-LD'],
|
|
22
|
+
[RDFXML_MIME, 'RDF/XML'],
|
|
23
|
+
[TRIG_MIME, 'TriG'],
|
|
23
24
|
]);
|
|
24
25
|
/**
|
|
25
26
|
* Whether a distribution needs Node-side preprocessing before `qlever-index`
|
|
26
27
|
* can read it.
|
|
27
28
|
*
|
|
28
|
-
* JSON-LD
|
|
29
|
-
*
|
|
29
|
+
* JSON-LD, RDF/XML and TriG distributions return `true`: `qlever-index` cannot
|
|
30
|
+
* parse any of them, so we stream them through `rdf-parse` into N-Quads first.
|
|
30
31
|
*
|
|
31
32
|
* Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
|
|
32
33
|
* `application/gzip` or `application/zip` — go straight through the shell
|
|
@@ -39,22 +40,20 @@ export function needsPreprocessing(distribution) {
|
|
|
39
40
|
preprocessFormats.has(distribution.mimeType));
|
|
40
41
|
}
|
|
41
42
|
/**
|
|
42
|
-
* Convert a JSON-LD
|
|
43
|
-
* file.
|
|
43
|
+
* Convert a JSON-LD, RDF/XML or TriG distribution to N-Quads alongside the
|
|
44
|
+
* source file.
|
|
44
45
|
*
|
|
45
46
|
* Streams the source through `rdf-parse` → `rdf-serialize` so memory use
|
|
46
47
|
* stays bounded regardless of input size. Handles gzip transparently
|
|
47
48
|
* (declared `compressFormat` or `.gz` filename) and zip containers (folds
|
|
48
|
-
*
|
|
49
|
+
* every parseable entry into the output stream in order).
|
|
49
50
|
*
|
|
50
51
|
* Cached: if the output is newer than the input, it is reused as-is.
|
|
51
52
|
*/
|
|
52
53
|
export async function preprocess(localFile, distribution) {
|
|
53
54
|
const contentType = distribution.mimeType;
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
: preprocessFormats.get(contentType);
|
|
57
|
-
if (contentType === undefined || format === undefined) {
|
|
55
|
+
const label = contentType === undefined ? undefined : preprocessFormats.get(contentType);
|
|
56
|
+
if (contentType === undefined || label === undefined) {
|
|
58
57
|
throw new Error(`preprocess called for distribution that does not need preprocessing: mediaType=${distribution.mimeType}`);
|
|
59
58
|
}
|
|
60
59
|
const outputFile = `${localFile}.preprocessed.nq`;
|
|
@@ -64,7 +63,7 @@ export async function preprocess(localFile, distribution) {
|
|
|
64
63
|
await rm(outputFile, { force: true });
|
|
65
64
|
const warnings = [];
|
|
66
65
|
if (distribution.compressMimeType === ZIP_MIME) {
|
|
67
|
-
await streamRdfZip(localFile, outputFile, contentType,
|
|
66
|
+
await streamRdfZip(localFile, outputFile, contentType, label, warnings);
|
|
68
67
|
}
|
|
69
68
|
else {
|
|
70
69
|
await streamRdfFile(localFile, outputFile, contentType, distribution);
|
|
@@ -118,7 +117,12 @@ async function streamRdfFile(localFile, outputFile, contentType, distribution) {
|
|
|
118
117
|
}
|
|
119
118
|
}
|
|
120
119
|
const openZip = promisify(yauzl.open);
|
|
121
|
-
|
|
120
|
+
/**
|
|
121
|
+
* Fold every parseable entry of a zip into the N-Quads output, in order. The
|
|
122
|
+
* declared `contentType` drives the parser; an entry that fails to parse (a
|
|
123
|
+
* sidecar, OS metadata) is skipped with a warning. Throws if nothing parses.
|
|
124
|
+
*/
|
|
125
|
+
async function streamRdfZip(zipFile, outputFile, contentType, label, warnings) {
|
|
122
126
|
const zip = await openZip(zipFile, { lazyEntries: true });
|
|
123
127
|
const output = createWriteStream(outputFile);
|
|
124
128
|
let entriesProcessed = 0;
|
|
@@ -128,25 +132,25 @@ async function streamRdfZip(zipFile, outputFile, contentType, format, warnings)
|
|
|
128
132
|
zip.on('end', resolve);
|
|
129
133
|
zip.on('entry', (entry) => {
|
|
130
134
|
void (async () => {
|
|
135
|
+
if (entry.fileName.endsWith('/')) {
|
|
136
|
+
zip.readEntry();
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
131
139
|
try {
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
140
|
+
const stream = await openZipEntry(zip, entry);
|
|
141
|
+
try {
|
|
142
|
+
await pipeRdfToWritable(stream, output, contentType);
|
|
143
|
+
entriesProcessed++;
|
|
135
144
|
}
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
zip.readEntry();
|
|
140
|
-
return;
|
|
145
|
+
finally {
|
|
146
|
+
// yauzl lazyEntries won't advance until this stream is released.
|
|
147
|
+
stream.destroy();
|
|
141
148
|
}
|
|
142
|
-
const stream = await openZipEntry(zip, entry);
|
|
143
|
-
await pipeRdfToWritable(stream, output, contentType);
|
|
144
|
-
entriesProcessed++;
|
|
145
|
-
zip.readEntry();
|
|
146
149
|
}
|
|
147
150
|
catch (error) {
|
|
148
|
-
|
|
151
|
+
warnings.push(`Skipping zip entry ${entry.fileName}: not valid ${label} (${error.message})`);
|
|
149
152
|
}
|
|
153
|
+
zip.readEntry();
|
|
150
154
|
})();
|
|
151
155
|
});
|
|
152
156
|
zip.readEntry();
|
|
@@ -157,7 +161,7 @@ async function streamRdfZip(zipFile, outputFile, contentType, format, warnings)
|
|
|
157
161
|
await closeWritable(output);
|
|
158
162
|
}
|
|
159
163
|
if (entriesProcessed === 0) {
|
|
160
|
-
throw new Error(`Zip ${zipFile} contains no ${
|
|
164
|
+
throw new Error(`Zip ${zipFile} contains no valid ${label} entries`);
|
|
161
165
|
}
|
|
162
166
|
}
|
|
163
167
|
function openZipEntry(zip, entry) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/sparql-qlever",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.9",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/sparql-qlever"
|
|
@@ -24,9 +24,9 @@
|
|
|
24
24
|
"!**/*.tsbuildinfo"
|
|
25
25
|
],
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@lde/dataset": "0.7.
|
|
28
|
-
"@lde/distribution-downloader": "0.6.
|
|
29
|
-
"@lde/sparql-importer": "0.6.
|
|
27
|
+
"@lde/dataset": "0.7.7",
|
|
28
|
+
"@lde/distribution-downloader": "0.6.5",
|
|
29
|
+
"@lde/sparql-importer": "0.6.5",
|
|
30
30
|
"@lde/sparql-server": "0.4.11",
|
|
31
31
|
"@lde/task-runner": "0.2.11",
|
|
32
32
|
"@lde/task-runner-docker": "0.2.13",
|
|
@@ -35,10 +35,10 @@
|
|
|
35
35
|
"rdf-parse": "^5.0.0",
|
|
36
36
|
"rdf-serialize": "^5.1.0",
|
|
37
37
|
"tslib": "^2.3.0",
|
|
38
|
-
"yauzl": "^3.
|
|
38
|
+
"yauzl": "^3.4.0"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
41
|
"@rdfjs/types": "^2.0.0",
|
|
42
|
-
"@types/yauzl": "^
|
|
42
|
+
"@types/yauzl": "^3.3.0"
|
|
43
43
|
}
|
|
44
44
|
}
|