@lde/sparql-qlever 0.14.5 → 0.14.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/importer.js +3 -2
- package/dist/preprocess.d.ts +5 -4
- package/dist/preprocess.d.ts.map +1 -1
- package/dist/preprocess.js +34 -19
- package/package.json +5 -5
package/dist/importer.js
CHANGED
|
@@ -200,8 +200,8 @@ const nativeFormats = new Map([
|
|
|
200
200
|
]);
|
|
201
201
|
/**
|
|
202
202
|
* Accepted distribution media types, in preference order: the first match is
|
|
203
|
-
* tried first. Native formats win over JSON-LD because they skip
|
|
204
|
-
* preprocessor.
|
|
203
|
+
* tried first. Native formats win over JSON-LD and RDF/XML because they skip
|
|
204
|
+
* the Node-side preprocessor.
|
|
205
205
|
*
|
|
206
206
|
* `application/zip` is intentionally absent — the inner RDF format must be
|
|
207
207
|
* declared via `mediaType` with `application/zip` appearing only as the
|
|
@@ -212,6 +212,7 @@ const acceptedMediaTypes = [
|
|
|
212
212
|
'application/n-triples',
|
|
213
213
|
'text/turtle',
|
|
214
214
|
'application/ld+json',
|
|
215
|
+
'application/rdf+xml',
|
|
215
216
|
];
|
|
216
217
|
const defaultQleverIndexOptions = {
|
|
217
218
|
'ascii-prefixes-only': true,
|
package/dist/preprocess.d.ts
CHANGED
|
@@ -9,8 +9,8 @@ export interface PreprocessResult {
|
|
|
9
9
|
* Whether a distribution needs Node-side preprocessing before `qlever-index`
|
|
10
10
|
* can read it.
|
|
11
11
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
12
|
+
* JSON-LD and RDF/XML distributions return `true`: `qlever-index` cannot parse
|
|
13
|
+
* either, so we stream them through `rdf-parse` into N-Quads first.
|
|
14
14
|
*
|
|
15
15
|
* Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
|
|
16
16
|
* `application/gzip` or `application/zip` — go straight through the shell
|
|
@@ -20,12 +20,13 @@ export interface PreprocessResult {
|
|
|
20
20
|
*/
|
|
21
21
|
export declare function needsPreprocessing(distribution: Distribution): boolean;
|
|
22
22
|
/**
|
|
23
|
-
* Convert a JSON-LD distribution to N-Quads alongside the source
|
|
23
|
+
* Convert a JSON-LD or RDF/XML distribution to N-Quads alongside the source
|
|
24
|
+
* file.
|
|
24
25
|
*
|
|
25
26
|
* Streams the source through `rdf-parse` → `rdf-serialize` so memory use
|
|
26
27
|
* stays bounded regardless of input size. Handles gzip transparently
|
|
27
28
|
* (declared `compressFormat` or `.gz` filename) and zip containers (folds
|
|
28
|
-
* each
|
|
29
|
+
* each matching entry into the output stream in order).
|
|
29
30
|
*
|
|
30
31
|
* Cached: if the output is newer than the input, it is reused as-is.
|
|
31
32
|
*/
|
package/dist/preprocess.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"preprocess.d.ts","sourceRoot":"","sources":["../src/preprocess.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAmC5C,MAAM,WAAW,gBAAgB;IAC/B,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,IAAI,CAAC;IACb,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,YAAY,GAAG,OAAO,CAKtE;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAC9B,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,gBAAgB,CAAC,CA2B3B"}
|
package/dist/preprocess.js
CHANGED
|
@@ -8,16 +8,25 @@ import { finished } from 'node:stream/promises';
|
|
|
8
8
|
import { promisify } from 'node:util';
|
|
9
9
|
import yauzl from 'yauzl';
|
|
10
10
|
const JSONLD_MIME = 'application/ld+json';
|
|
11
|
+
const RDFXML_MIME = 'application/rdf+xml';
|
|
11
12
|
const ZIP_MIME = 'application/zip';
|
|
12
13
|
const GZIP_MIME = 'application/gzip';
|
|
13
14
|
const GZIP_MIME_LEGACY = 'application/x-gzip';
|
|
14
|
-
|
|
15
|
+
/**
|
|
16
|
+
* RDF media types that `qlever-index` cannot read natively, so they are streamed
|
|
17
|
+
* through `rdf-parse` → N-Quads first. The map key is the `rdf-parse`
|
|
18
|
+
* `contentType`; the value carries the metadata needed to handle zip containers.
|
|
19
|
+
*/
|
|
20
|
+
const preprocessFormats = new Map([
|
|
21
|
+
[JSONLD_MIME, { label: 'JSON-LD', zipExtensions: ['.jsonld', '.json'] }],
|
|
22
|
+
[RDFXML_MIME, { label: 'RDF/XML', zipExtensions: ['.rdf', '.xml', '.owl'] }],
|
|
23
|
+
]);
|
|
15
24
|
/**
|
|
16
25
|
* Whether a distribution needs Node-side preprocessing before `qlever-index`
|
|
17
26
|
* can read it.
|
|
18
27
|
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
28
|
+
* JSON-LD and RDF/XML distributions return `true`: `qlever-index` cannot parse
|
|
29
|
+
* either, so we stream them through `rdf-parse` into N-Quads first.
|
|
21
30
|
*
|
|
22
31
|
* Native RDF formats (`nt`, `nq`, `ttl`) — including when wrapped in
|
|
23
32
|
* `application/gzip` or `application/zip` — go straight through the shell
|
|
@@ -26,20 +35,26 @@ const JSONLD_ZIP_EXTENSIONS = ['.jsonld', '.json'];
|
|
|
26
35
|
* format must be declared.
|
|
27
36
|
*/
|
|
28
37
|
export function needsPreprocessing(distribution) {
|
|
29
|
-
return distribution.mimeType
|
|
38
|
+
return (distribution.mimeType !== undefined &&
|
|
39
|
+
preprocessFormats.has(distribution.mimeType));
|
|
30
40
|
}
|
|
31
41
|
/**
|
|
32
|
-
* Convert a JSON-LD distribution to N-Quads alongside the source
|
|
42
|
+
* Convert a JSON-LD or RDF/XML distribution to N-Quads alongside the source
|
|
43
|
+
* file.
|
|
33
44
|
*
|
|
34
45
|
* Streams the source through `rdf-parse` → `rdf-serialize` so memory use
|
|
35
46
|
* stays bounded regardless of input size. Handles gzip transparently
|
|
36
47
|
* (declared `compressFormat` or `.gz` filename) and zip containers (folds
|
|
37
|
-
* each
|
|
48
|
+
* each matching entry into the output stream in order).
|
|
38
49
|
*
|
|
39
50
|
* Cached: if the output is newer than the input, it is reused as-is.
|
|
40
51
|
*/
|
|
41
52
|
export async function preprocess(localFile, distribution) {
|
|
42
|
-
|
|
53
|
+
const contentType = distribution.mimeType;
|
|
54
|
+
const format = contentType === undefined
|
|
55
|
+
? undefined
|
|
56
|
+
: preprocessFormats.get(contentType);
|
|
57
|
+
if (contentType === undefined || format === undefined) {
|
|
43
58
|
throw new Error(`preprocess called for distribution that does not need preprocessing: mediaType=${distribution.mimeType}`);
|
|
44
59
|
}
|
|
45
60
|
const outputFile = `${localFile}.preprocessed.nq`;
|
|
@@ -49,10 +64,10 @@ export async function preprocess(localFile, distribution) {
|
|
|
49
64
|
await rm(outputFile, { force: true });
|
|
50
65
|
const warnings = [];
|
|
51
66
|
if (distribution.compressMimeType === ZIP_MIME) {
|
|
52
|
-
await
|
|
67
|
+
await streamRdfZip(localFile, outputFile, contentType, format, warnings);
|
|
53
68
|
}
|
|
54
69
|
else {
|
|
55
|
-
await
|
|
70
|
+
await streamRdfFile(localFile, outputFile, contentType, distribution);
|
|
56
71
|
}
|
|
57
72
|
return { path: outputFile, format: 'nq', warnings };
|
|
58
73
|
}
|
|
@@ -69,12 +84,12 @@ async function outputIsUpToDate(inputFile, outputFile) {
|
|
|
69
84
|
}
|
|
70
85
|
}
|
|
71
86
|
/**
|
|
72
|
-
* Pipe one
|
|
87
|
+
* Pipe one RDF source through parse → N-Quads serialize into an already
|
|
73
88
|
* open writable, without closing it. Back-pressure is handled by Node's
|
|
74
89
|
* built-in `.pipe()`; the caller manages `output`'s lifecycle.
|
|
75
90
|
*/
|
|
76
|
-
async function
|
|
77
|
-
const quads = rdfParser.parse(input, { contentType
|
|
91
|
+
async function pipeRdfToWritable(input, output, contentType) {
|
|
92
|
+
const quads = rdfParser.parse(input, { contentType });
|
|
78
93
|
const bytes = rdfSerializer.serialize(quads, {
|
|
79
94
|
contentType: 'application/n-quads',
|
|
80
95
|
});
|
|
@@ -88,7 +103,7 @@ async function closeWritable(output) {
|
|
|
88
103
|
output.end();
|
|
89
104
|
});
|
|
90
105
|
}
|
|
91
|
-
async function
|
|
106
|
+
async function streamRdfFile(localFile, outputFile, contentType, distribution) {
|
|
92
107
|
const isGzipped = distribution.compressMimeType === GZIP_MIME ||
|
|
93
108
|
distribution.compressMimeType === GZIP_MIME_LEGACY ||
|
|
94
109
|
localFile.toLowerCase().endsWith('.gz');
|
|
@@ -96,14 +111,14 @@ async function streamJsonldFile(localFile, outputFile, distribution) {
|
|
|
96
111
|
const input = isGzipped ? source.pipe(createGunzip()) : source;
|
|
97
112
|
const output = createWriteStream(outputFile);
|
|
98
113
|
try {
|
|
99
|
-
await
|
|
114
|
+
await pipeRdfToWritable(input, output, contentType);
|
|
100
115
|
}
|
|
101
116
|
finally {
|
|
102
117
|
await closeWritable(output);
|
|
103
118
|
}
|
|
104
119
|
}
|
|
105
120
|
const openZip = promisify(yauzl.open);
|
|
106
|
-
async function
|
|
121
|
+
async function streamRdfZip(zipFile, outputFile, contentType, format, warnings) {
|
|
107
122
|
const zip = await openZip(zipFile, { lazyEntries: true });
|
|
108
123
|
const output = createWriteStream(outputFile);
|
|
109
124
|
let entriesProcessed = 0;
|
|
@@ -119,13 +134,13 @@ async function streamJsonldZip(zipFile, outputFile, warnings) {
|
|
|
119
134
|
return;
|
|
120
135
|
}
|
|
121
136
|
const extension = extname(entry.fileName).toLowerCase();
|
|
122
|
-
if (!
|
|
123
|
-
warnings.push(`Skipping zip entry ${entry.fileName}: extension ${extension || '(none)'} is not
|
|
137
|
+
if (!format.zipExtensions.includes(extension)) {
|
|
138
|
+
warnings.push(`Skipping zip entry ${entry.fileName}: extension ${extension || '(none)'} is not ${format.label}`);
|
|
124
139
|
zip.readEntry();
|
|
125
140
|
return;
|
|
126
141
|
}
|
|
127
142
|
const stream = await openZipEntry(zip, entry);
|
|
128
|
-
await
|
|
143
|
+
await pipeRdfToWritable(stream, output, contentType);
|
|
129
144
|
entriesProcessed++;
|
|
130
145
|
zip.readEntry();
|
|
131
146
|
}
|
|
@@ -142,7 +157,7 @@ async function streamJsonldZip(zipFile, outputFile, warnings) {
|
|
|
142
157
|
await closeWritable(output);
|
|
143
158
|
}
|
|
144
159
|
if (entriesProcessed === 0) {
|
|
145
|
-
throw new Error(`Zip ${zipFile} contains no
|
|
160
|
+
throw new Error(`Zip ${zipFile} contains no ${format.label} entries`);
|
|
146
161
|
}
|
|
147
162
|
}
|
|
148
163
|
function openZipEntry(zip, entry) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/sparql-qlever",
|
|
3
|
-
"version": "0.14.
|
|
3
|
+
"version": "0.14.7",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/sparql-qlever"
|
|
@@ -24,9 +24,9 @@
|
|
|
24
24
|
"!**/*.tsbuildinfo"
|
|
25
25
|
],
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@lde/dataset": "0.7.
|
|
28
|
-
"@lde/distribution-downloader": "0.6.
|
|
29
|
-
"@lde/sparql-importer": "0.6.
|
|
27
|
+
"@lde/dataset": "0.7.5",
|
|
28
|
+
"@lde/distribution-downloader": "0.6.3",
|
|
29
|
+
"@lde/sparql-importer": "0.6.3",
|
|
30
30
|
"@lde/sparql-server": "0.4.11",
|
|
31
31
|
"@lde/task-runner": "0.2.11",
|
|
32
32
|
"@lde/task-runner-docker": "0.2.13",
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
"rdf-parse": "^5.0.0",
|
|
36
36
|
"rdf-serialize": "^5.1.0",
|
|
37
37
|
"tslib": "^2.3.0",
|
|
38
|
-
"yauzl": "^3.3.
|
|
38
|
+
"yauzl": "^3.3.2"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
41
|
"@rdfjs/types": "^2.0.0",
|