@lde/pipeline 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -0
- package/dist/analyzer.d.ts +36 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +29 -0
- package/dist/builder.d.ts +114 -0
- package/dist/builder.d.ts.map +1 -0
- package/dist/builder.js +115 -0
- package/dist/config.d.ts +70 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +110 -0
- package/dist/distribution/analyzer.d.ts +58 -0
- package/dist/distribution/analyzer.d.ts.map +1 -0
- package/dist/distribution/analyzer.js +120 -0
- package/dist/distribution/index.d.ts +3 -0
- package/dist/distribution/index.d.ts.map +1 -0
- package/dist/distribution/index.js +2 -0
- package/dist/distribution/probe.d.ts +47 -0
- package/dist/distribution/probe.d.ts.map +1 -0
- package/dist/distribution/probe.js +120 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/sparql/collect.d.ts +19 -0
- package/dist/sparql/collect.d.ts.map +1 -0
- package/dist/sparql/collect.js +23 -0
- package/dist/sparql/executor.d.ts +121 -0
- package/dist/sparql/executor.d.ts.map +1 -0
- package/dist/sparql/executor.js +107 -0
- package/dist/sparql/index.d.ts +3 -0
- package/dist/sparql/index.d.ts.map +1 -0
- package/dist/sparql/index.js +2 -0
- package/dist/step/sparqlQuery.d.ts +10 -5
- package/dist/step/sparqlQuery.d.ts.map +1 -1
- package/dist/step/sparqlQuery.js +16 -20
- package/dist/writer/fileWriter.d.ts +23 -0
- package/dist/writer/fileWriter.d.ts.map +1 -0
- package/dist/writer/fileWriter.js +51 -0
- package/dist/writer/index.d.ts +5 -0
- package/dist/writer/index.d.ts.map +1 -0
- package/dist/writer/index.js +4 -0
- package/dist/writer/serialize.d.ts +7 -0
- package/dist/writer/serialize.d.ts.map +1 -0
- package/dist/writer/serialize.js +20 -0
- package/dist/writer/sparqlUpdateWriter.d.ts +34 -0
- package/dist/writer/sparqlUpdateWriter.d.ts.map +1 -0
- package/dist/writer/sparqlUpdateWriter.js +43 -0
- package/dist/writer/writer.d.ts +15 -0
- package/dist/writer/writer.d.ts.map +1 -0
- package/dist/writer/writer.js +1 -0
- package/package.json +22 -6
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { Distribution } from '@lde/dataset';
|
|
2
|
+
import { ImportFailed, ImportSuccessful, NotSupported, } from '@lde/sparql-importer';
|
|
3
|
+
import { DataFactory, Store } from 'n3';
|
|
4
|
+
import { probe, NetworkError, SparqlProbeResult, } from './probe.js';
|
|
5
|
+
export { ImportFailed, ImportSuccessful, NotSupported };
|
|
6
|
+
const { quad, namedNode, blankNode, literal } = DataFactory;
|
|
7
|
+
// Namespace prefixes
|
|
8
|
+
const RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
|
|
9
|
+
const SCHEMA = 'https://schema.org/';
|
|
10
|
+
const VOID = 'http://rdfs.org/ns/void#';
|
|
11
|
+
const XSD = 'http://www.w3.org/2001/XMLSchema#';
|
|
12
|
+
const HTTP_STATUS = 'https://www.w3.org/2011/http-statusCodes#';
|
|
13
|
+
/**
|
|
14
|
+
* Result indicating the analyzer could not find a usable distribution.
|
|
15
|
+
*/
|
|
16
|
+
export class NoDistributionAvailable {
|
|
17
|
+
message;
|
|
18
|
+
constructor(message) {
|
|
19
|
+
this.message = message;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Analyzes dataset distributions by probing their availability.
|
|
24
|
+
*
|
|
25
|
+
* - Probes SPARQL endpoints with a simple SELECT query
|
|
26
|
+
* - Probes data dumps with HEAD/GET requests
|
|
27
|
+
* - Records probe results as RDF (schema:Action)
|
|
28
|
+
* - Updates distribution metadata (isValid, lastModified, byteSize)
|
|
29
|
+
* - Optionally imports data dumps if no SPARQL endpoint is available
|
|
30
|
+
*/
|
|
31
|
+
export class DistributionAnalyzer {
|
|
32
|
+
name = 'distribution';
|
|
33
|
+
importer;
|
|
34
|
+
timeout;
|
|
35
|
+
constructor(options) {
|
|
36
|
+
this.importer = options?.importer;
|
|
37
|
+
this.timeout = options?.timeout ?? 5000;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Analyze all distributions of a dataset.
|
|
41
|
+
*
|
|
42
|
+
* @returns Store with probe results as RDF, or NoDistributionAvailable if no usable distribution found
|
|
43
|
+
*/
|
|
44
|
+
async execute(dataset) {
|
|
45
|
+
const results = await Promise.all(dataset.distributions.map((distribution) => probe(distribution, this.timeout)));
|
|
46
|
+
const store = this.buildProbeResultsRdf(results, dataset);
|
|
47
|
+
// If no SPARQL endpoint available, try to import a data dump
|
|
48
|
+
if (dataset.getSparqlDistribution() === null && this.importer) {
|
|
49
|
+
const importResult = await this.importer.import(dataset);
|
|
50
|
+
if (importResult instanceof ImportSuccessful) {
|
|
51
|
+
// Add imported SPARQL distribution to dataset so subsequent steps can use it
|
|
52
|
+
const distribution = Distribution.sparql(importResult.distribution.accessUrl, importResult.identifier);
|
|
53
|
+
dataset.distributions.push(distribution);
|
|
54
|
+
}
|
|
55
|
+
else if (importResult instanceof ImportFailed) {
|
|
56
|
+
// Record import error in the store
|
|
57
|
+
this.addImportError(store, importResult);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (dataset.getSparqlDistribution() === null) {
|
|
61
|
+
return new NoDistributionAvailable('No SPARQL endpoint or importable data dump available');
|
|
62
|
+
}
|
|
63
|
+
return store;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Cleanup resources (e.g., importer connections).
|
|
67
|
+
*/
|
|
68
|
+
async finish() {
|
|
69
|
+
await this.importer?.finish?.();
|
|
70
|
+
}
|
|
71
|
+
buildProbeResultsRdf(results, dataset) {
|
|
72
|
+
const store = new Store();
|
|
73
|
+
for (const result of results) {
|
|
74
|
+
const action = blankNode();
|
|
75
|
+
// Base action triples
|
|
76
|
+
store.addQuads([
|
|
77
|
+
quad(action, namedNode(`${RDF}type`), namedNode(`${SCHEMA}Action`)),
|
|
78
|
+
quad(action, namedNode(`${SCHEMA}target`), namedNode(result.url)),
|
|
79
|
+
]);
|
|
80
|
+
if (result instanceof NetworkError) {
|
|
81
|
+
store.addQuad(action, namedNode(`${SCHEMA}error`), literal(result.message));
|
|
82
|
+
}
|
|
83
|
+
else if (result.isSuccess()) {
|
|
84
|
+
this.addSuccessTriples(store, action, result, dataset);
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
// HTTP error
|
|
88
|
+
const statusUri = `${HTTP_STATUS}${result.statusText.replace(/ /g, '')}`;
|
|
89
|
+
store.addQuad(action, namedNode(`${SCHEMA}error`), namedNode(statusUri));
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return store;
|
|
93
|
+
}
|
|
94
|
+
addSuccessTriples(store, action, result, dataset) {
|
|
95
|
+
const distributionUrl = namedNode(result.url);
|
|
96
|
+
store.addQuad(action, namedNode(`${SCHEMA}result`), distributionUrl);
|
|
97
|
+
if (result.lastModified) {
|
|
98
|
+
store.addQuad(distributionUrl, namedNode(`${SCHEMA}dateModified`), literal(result.lastModified.toISOString(), namedNode(`${XSD}dateTime`)));
|
|
99
|
+
}
|
|
100
|
+
if (result instanceof SparqlProbeResult) {
|
|
101
|
+
store.addQuad(namedNode(dataset.iri.toString()), namedNode(`${VOID}sparqlEndpoint`), distributionUrl);
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
store.addQuad(namedNode(dataset.iri.toString()), namedNode(`${VOID}dataDump`), distributionUrl);
|
|
105
|
+
if (result.contentSize) {
|
|
106
|
+
store.addQuad(distributionUrl, namedNode(`${SCHEMA}contentSize`), literal(result.contentSize));
|
|
107
|
+
}
|
|
108
|
+
if (result.contentType) {
|
|
109
|
+
store.addQuad(distributionUrl, namedNode(`${SCHEMA}encodingFormat`), literal(result.contentType));
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
addImportError(store, importResult) {
|
|
114
|
+
// Find the action for this download URL and add the error
|
|
115
|
+
const matches = store.match(null, namedNode(`${SCHEMA}target`), namedNode(importResult.distribution.accessUrl.toString()));
|
|
116
|
+
for (const match of matches) {
|
|
117
|
+
store.addQuad(match.subject, namedNode(`${SCHEMA}error`), literal(importResult.error));
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, type ProbeResultType, } from './probe.js';
|
|
2
|
+
export { DistributionAnalyzer, ImportSuccessful, ImportFailed, NoDistributionAvailable, type Importer, type DistributionAnalyzerOptions, } from './analyzer.js';
|
|
3
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/distribution/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,YAAY,EACZ,uBAAuB,EACvB,KAAK,QAAQ,EACb,KAAK,2BAA2B,GACjC,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { Distribution } from '@lde/dataset';
|
|
2
|
+
/**
|
|
3
|
+
* Result of a network error during probing.
|
|
4
|
+
*/
|
|
5
|
+
export declare class NetworkError {
|
|
6
|
+
readonly url: string;
|
|
7
|
+
readonly message: string;
|
|
8
|
+
constructor(url: string, message: string);
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Base class for successful probe results.
|
|
12
|
+
*/
|
|
13
|
+
declare abstract class ProbeResult {
|
|
14
|
+
readonly url: string;
|
|
15
|
+
readonly statusCode: number;
|
|
16
|
+
readonly statusText: string;
|
|
17
|
+
readonly lastModified: Date | null;
|
|
18
|
+
readonly contentType: string | null;
|
|
19
|
+
constructor(url: string, response: Response);
|
|
20
|
+
isSuccess(): boolean;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Result of probing a SPARQL endpoint.
|
|
24
|
+
*/
|
|
25
|
+
export declare class SparqlProbeResult extends ProbeResult {
|
|
26
|
+
readonly acceptedContentType = "application/sparql-results+json";
|
|
27
|
+
isSuccess(): boolean;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Result of probing a data dump distribution.
|
|
31
|
+
*/
|
|
32
|
+
export declare class DataDumpProbeResult extends ProbeResult {
|
|
33
|
+
readonly contentSize: number | null;
|
|
34
|
+
constructor(url: string, response: Response);
|
|
35
|
+
}
|
|
36
|
+
export type ProbeResultType = SparqlProbeResult | DataDumpProbeResult | NetworkError;
|
|
37
|
+
/**
|
|
38
|
+
* Probe a distribution to check availability and gather metadata.
|
|
39
|
+
*
|
|
40
|
+
* For SPARQL endpoints, sends a simple SELECT query.
|
|
41
|
+
* For data dumps, sends HEAD (or GET if HEAD returns no Content-Length).
|
|
42
|
+
*
|
|
43
|
+
* Updates the distribution's isValid, lastModified, and byteSize properties.
|
|
44
|
+
*/
|
|
45
|
+
export declare function probe(distribution: Distribution, timeout?: number): Promise<ProbeResultType>;
|
|
46
|
+
export {};
|
|
47
|
+
//# sourceMappingURL=probe.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../../src/distribution/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C;;GAEG;AACH,qBAAa,YAAY;aACK,GAAG,EAAE,MAAM;aAAkB,OAAO,EAAE,MAAM;gBAA5C,GAAG,EAAE,MAAM,EAAkB,OAAO,EAAE,MAAM;CACzE;AAED;;GAEG;AACH,uBAAe,WAAW;aAMI,GAAG,EAAE,MAAM;IALvC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;gBAEf,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ;IAUpD,SAAS,IAAI,OAAO;CAG5B;AAED;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD,SAAgB,mBAAmB,qCAAqC;IAE/D,SAAS,IAAI,OAAO;CAM9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAEtC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ;CAO5C;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAEjB;;;;;;;GAOG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,SAAO,GACb,OAAO,CAAC,eAAe,CAAC,CAY1B"}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Result of a network error during probing.
|
|
3
|
+
*/
|
|
4
|
+
export class NetworkError {
|
|
5
|
+
url;
|
|
6
|
+
message;
|
|
7
|
+
constructor(url, message) {
|
|
8
|
+
this.url = url;
|
|
9
|
+
this.message = message;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Base class for successful probe results.
|
|
14
|
+
*/
|
|
15
|
+
class ProbeResult {
|
|
16
|
+
url;
|
|
17
|
+
statusCode;
|
|
18
|
+
statusText;
|
|
19
|
+
lastModified = null;
|
|
20
|
+
contentType;
|
|
21
|
+
constructor(url, response) {
|
|
22
|
+
this.url = url;
|
|
23
|
+
this.statusCode = response.status;
|
|
24
|
+
this.statusText = response.statusText;
|
|
25
|
+
this.contentType = response.headers.get('Content-Type');
|
|
26
|
+
const lastModifiedHeader = response.headers.get('Last-Modified');
|
|
27
|
+
if (lastModifiedHeader) {
|
|
28
|
+
this.lastModified = new Date(lastModifiedHeader);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
isSuccess() {
|
|
32
|
+
return this.statusCode >= 200 && this.statusCode < 400;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Result of probing a SPARQL endpoint.
|
|
37
|
+
*/
|
|
38
|
+
export class SparqlProbeResult extends ProbeResult {
|
|
39
|
+
acceptedContentType = 'application/sparql-results+json';
|
|
40
|
+
isSuccess() {
|
|
41
|
+
return (super.isSuccess() &&
|
|
42
|
+
(this.contentType?.startsWith(this.acceptedContentType) ?? false));
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Result of probing a data dump distribution.
|
|
47
|
+
*/
|
|
48
|
+
export class DataDumpProbeResult extends ProbeResult {
|
|
49
|
+
contentSize = null;
|
|
50
|
+
constructor(url, response) {
|
|
51
|
+
super(url, response);
|
|
52
|
+
const contentLengthHeader = response.headers.get('Content-Length');
|
|
53
|
+
if (contentLengthHeader) {
|
|
54
|
+
this.contentSize = parseInt(contentLengthHeader);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Probe a distribution to check availability and gather metadata.
|
|
60
|
+
*
|
|
61
|
+
* For SPARQL endpoints, sends a simple SELECT query.
|
|
62
|
+
* For data dumps, sends HEAD (or GET if HEAD returns no Content-Length).
|
|
63
|
+
*
|
|
64
|
+
* Updates the distribution's isValid, lastModified, and byteSize properties.
|
|
65
|
+
*/
|
|
66
|
+
export async function probe(distribution, timeout = 5000) {
|
|
67
|
+
try {
|
|
68
|
+
if (distribution.isSparql()) {
|
|
69
|
+
return await probeSparqlEndpoint(distribution, timeout);
|
|
70
|
+
}
|
|
71
|
+
return await probeDataDump(distribution, timeout);
|
|
72
|
+
}
|
|
73
|
+
catch (e) {
|
|
74
|
+
return new NetworkError(distribution.accessUrl?.toString() ?? 'unknown', e instanceof Error ? e.message : String(e));
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
async function probeSparqlEndpoint(distribution, timeout) {
|
|
78
|
+
const url = distribution.accessUrl.toString();
|
|
79
|
+
const response = await fetch(url, {
|
|
80
|
+
signal: AbortSignal.timeout(timeout),
|
|
81
|
+
method: 'POST',
|
|
82
|
+
headers: {
|
|
83
|
+
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
|
84
|
+
Accept: 'application/sparql-results+json',
|
|
85
|
+
},
|
|
86
|
+
body: `query=${encodeURIComponent('SELECT * { ?s ?p ?o } LIMIT 1')}`,
|
|
87
|
+
});
|
|
88
|
+
const result = new SparqlProbeResult(url, response);
|
|
89
|
+
distribution.isValid = result.isSuccess();
|
|
90
|
+
return result;
|
|
91
|
+
}
|
|
92
|
+
async function probeDataDump(distribution, timeout) {
|
|
93
|
+
const url = distribution.accessUrl.toString();
|
|
94
|
+
const requestOptions = {
|
|
95
|
+
signal: AbortSignal.timeout(timeout),
|
|
96
|
+
headers: {
|
|
97
|
+
Accept: distribution.mimeType ?? '*/*',
|
|
98
|
+
'Accept-Encoding': 'identity', // Return uncompressed responses.
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
let response = await fetch(url, {
|
|
102
|
+
method: 'HEAD',
|
|
103
|
+
...requestOptions,
|
|
104
|
+
});
|
|
105
|
+
const contentLength = response.headers.get('Content-Length');
|
|
106
|
+
if (contentLength === null || contentLength === '0') {
|
|
107
|
+
// Retry as GET request for servers incorrectly returning HEAD request Content-Length,
|
|
108
|
+
// which *should* be the size of the response body when issuing a GET, not that of
|
|
109
|
+
// the response to a HEAD request, which is intentionally 0.
|
|
110
|
+
response = await fetch(url, {
|
|
111
|
+
method: 'GET',
|
|
112
|
+
...requestOptions,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
const result = new DataDumpProbeResult(url, response);
|
|
116
|
+
distribution.isValid = result.isSuccess();
|
|
117
|
+
distribution.lastModified ??= result.lastModified ?? undefined;
|
|
118
|
+
distribution.byteSize ??= result.contentSize ?? undefined;
|
|
119
|
+
return result;
|
|
120
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -2,4 +2,8 @@ export * from './pipeline.js';
|
|
|
2
2
|
export * from './selector.js';
|
|
3
3
|
export * from './step.js';
|
|
4
4
|
export * from './step/sparqlQuery.js';
|
|
5
|
+
export * from './builder.js';
|
|
6
|
+
export * from './config.js';
|
|
7
|
+
export * from './sparql/index.js';
|
|
8
|
+
export * from './distribution/index.js';
|
|
5
9
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -2,3 +2,7 @@ export * from './pipeline.js';
|
|
|
2
2
|
export * from './selector.js';
|
|
3
3
|
export * from './step.js';
|
|
4
4
|
export * from './step/sparqlQuery.js';
|
|
5
|
+
export * from './builder.js';
|
|
6
|
+
export * from './config.js';
|
|
7
|
+
export * from './sparql/index.js';
|
|
8
|
+
export * from './distribution/index.js';
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Store } from 'n3';
|
|
2
|
+
import type { QuadStream } from './executor.js';
|
|
3
|
+
/**
|
|
4
|
+
* Collect all quads from a stream into an N3 Store.
|
|
5
|
+
*
|
|
6
|
+
* @param stream The quad stream to collect from.
|
|
7
|
+
* @returns Promise that resolves to a Store containing all quads.
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* const result = await executor.execute(dataset);
|
|
12
|
+
* if (!(result instanceof NotSupported)) {
|
|
13
|
+
* const store = await collect(result);
|
|
14
|
+
* console.log(`Collected ${store.size} quads`);
|
|
15
|
+
* }
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
export declare function collect(stream: QuadStream): Promise<Store>;
|
|
19
|
+
//# sourceMappingURL=collect.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"collect.d.ts","sourceRoot":"","sources":["../../src/sparql/collect.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,IAAI,CAAC;AAC3B,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAEhD;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,OAAO,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,CAMhE"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { Store } from 'n3';
|
|
2
|
+
/**
|
|
3
|
+
* Collect all quads from a stream into an N3 Store.
|
|
4
|
+
*
|
|
5
|
+
* @param stream The quad stream to collect from.
|
|
6
|
+
* @returns Promise that resolves to a Store containing all quads.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* const result = await executor.execute(dataset);
|
|
11
|
+
* if (!(result instanceof NotSupported)) {
|
|
12
|
+
* const store = await collect(result);
|
|
13
|
+
* console.log(`Collected ${store.size} quads`);
|
|
14
|
+
* }
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export async function collect(stream) {
|
|
18
|
+
const store = new Store();
|
|
19
|
+
for await (const quad of stream) {
|
|
20
|
+
store.addQuad(quad);
|
|
21
|
+
}
|
|
22
|
+
return store;
|
|
23
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
+
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
3
|
+
import type { Quad, Stream } from '@rdfjs/types';
|
|
4
|
+
import type { Readable } from 'node:stream';
|
|
5
|
+
import { NotSupported } from '../step.js';
|
|
6
|
+
export { NotSupported } from '../step.js';
|
|
7
|
+
/**
|
|
8
|
+
* A quad stream that is both an RDFJS Stream and Node.js Readable (async iterable).
|
|
9
|
+
* This is the actual return type from SparqlEndpointFetcher.fetchTriples().
|
|
10
|
+
*/
|
|
11
|
+
export type QuadStream = Readable & Stream<Quad>;
|
|
12
|
+
/**
|
|
13
|
+
* Extended dataset with optional SPARQL filtering options.
|
|
14
|
+
*/
|
|
15
|
+
export interface ExecutableDataset extends Dataset {
|
|
16
|
+
/**
|
|
17
|
+
* Optional SPARQL filter clause to restrict analysis to a subset of the data.
|
|
18
|
+
* This is substituted for `#subjectFilter#` in queries.
|
|
19
|
+
*/
|
|
20
|
+
subjectFilter?: string;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Options for SparqlConstructExecutor.
|
|
24
|
+
*/
|
|
25
|
+
export interface SparqlConstructExecutorOptions {
|
|
26
|
+
/**
|
|
27
|
+
* SPARQL CONSTRUCT query to execute.
|
|
28
|
+
*/
|
|
29
|
+
query: string;
|
|
30
|
+
/**
|
|
31
|
+
* Optional timeout for SPARQL queries in milliseconds.
|
|
32
|
+
* @default 300000 (5 minutes)
|
|
33
|
+
*/
|
|
34
|
+
timeout?: number;
|
|
35
|
+
/**
|
|
36
|
+
* Optional custom SparqlEndpointFetcher instance.
|
|
37
|
+
*/
|
|
38
|
+
fetcher?: SparqlEndpointFetcher;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Options for `execute()`.
|
|
42
|
+
*/
|
|
43
|
+
export interface ExecuteOptions {
|
|
44
|
+
/**
|
|
45
|
+
* Explicit SPARQL endpoint URL. If not provided, uses the dataset's SPARQL distribution.
|
|
46
|
+
*/
|
|
47
|
+
endpoint?: URL;
|
|
48
|
+
/**
|
|
49
|
+
* Variable bindings to substitute in the query before standard template substitution.
|
|
50
|
+
* Each key is a literal string to replace, each value is its replacement.
|
|
51
|
+
*
|
|
52
|
+
* @example
|
|
53
|
+
* ```typescript
|
|
54
|
+
* await executor.execute(dataset, {
|
|
55
|
+
* bindings: { '<#class#>': '<http://schema.org/Person>' },
|
|
56
|
+
* });
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
bindings?: Record<string, string>;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* A streaming SPARQL CONSTRUCT executor with template substitution.
|
|
63
|
+
*
|
|
64
|
+
* Supports template substitution (applied in order):
|
|
65
|
+
* 1. `bindings` — any provided variable bindings
|
|
66
|
+
* 2. `#subjectFilter#` — replaced with the distribution's subject filter or dataset's subjectFilter
|
|
67
|
+
* 3. `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
68
|
+
* 4. `?dataset` — replaced with the dataset IRI
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* ```typescript
|
|
72
|
+
* const executor = new SparqlConstructExecutor({
|
|
73
|
+
* query: 'CONSTRUCT { ?dataset ?p ?o } WHERE { ?s ?p ?o }',
|
|
74
|
+
* });
|
|
75
|
+
* const result = await executor.execute(dataset);
|
|
76
|
+
* if (result instanceof NotSupported) {
|
|
77
|
+
* console.log(result.message);
|
|
78
|
+
* } else {
|
|
79
|
+
* for await (const quad of result) {
|
|
80
|
+
* console.log(quad);
|
|
81
|
+
* }
|
|
82
|
+
* }
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
export declare class SparqlConstructExecutor {
|
|
86
|
+
private readonly query;
|
|
87
|
+
private readonly fetcher;
|
|
88
|
+
constructor(options: SparqlConstructExecutorOptions);
|
|
89
|
+
/**
|
|
90
|
+
* Execute the SPARQL CONSTRUCT query against the dataset's SPARQL endpoint.
|
|
91
|
+
*
|
|
92
|
+
* @param dataset The dataset to execute against.
|
|
93
|
+
* @param options Optional endpoint override and variable bindings.
|
|
94
|
+
* @returns AsyncIterable<Quad> stream of results, or NotSupported if no SPARQL endpoint available.
|
|
95
|
+
*/
|
|
96
|
+
execute(dataset: ExecutableDataset, options?: ExecuteOptions): Promise<QuadStream | NotSupported>;
|
|
97
|
+
/**
|
|
98
|
+
* Substitute template variables in the query.
|
|
99
|
+
*/
|
|
100
|
+
private substituteTemplates;
|
|
101
|
+
/**
|
|
102
|
+
* Create an executor from a query file.
|
|
103
|
+
*
|
|
104
|
+
* @param filename Path to the query file.
|
|
105
|
+
* @param options Optional executor options (timeout, fetcher).
|
|
106
|
+
*/
|
|
107
|
+
static fromFile(filename: string, options?: Omit<SparqlConstructExecutorOptions, 'query'>): Promise<SparqlConstructExecutor>;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Substitute template variables in a SPARQL query.
|
|
111
|
+
*
|
|
112
|
+
* - `#subjectFilter#` — replaced with the distribution's or dataset's subject filter
|
|
113
|
+
* - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
114
|
+
* - `?dataset` — replaced with the dataset IRI
|
|
115
|
+
*/
|
|
116
|
+
export declare function substituteQueryTemplates(query: string, distribution: Distribution | null, dataset: ExecutableDataset): string;
|
|
117
|
+
/**
|
|
118
|
+
* Read a SPARQL query from a file.
|
|
119
|
+
*/
|
|
120
|
+
export declare function readQueryFile(filename: string): Promise<string>;
|
|
121
|
+
//# sourceMappingURL=executor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACjD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAG5C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAG1C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,iBAAkB,SAAQ,OAAO;IAChD;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B;;OAEG;IACH,QAAQ,CAAC,EAAE,GAAG,CAAC;IAEf;;;;;;;;;;OAUG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,uBAAuB;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,8BAA8B;IASnD;;;;;;OAMG;IACG,OAAO,CACX,OAAO,EAAE,iBAAiB,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,GAAG,YAAY,CAAC;IAyBrC;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAQ3B;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;;;;;GAMG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,YAAY,GAAG,IAAI,EACjC,OAAO,EAAE,iBAAiB,GACzB,MAAM,CAYR;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
2
|
+
import { readFile } from 'node:fs/promises';
|
|
3
|
+
import { resolve } from 'node:path';
|
|
4
|
+
import { NotSupported } from '../step.js';
|
|
5
|
+
// Re-export for convenience
|
|
6
|
+
export { NotSupported } from '../step.js';
|
|
7
|
+
/**
|
|
8
|
+
* A streaming SPARQL CONSTRUCT executor with template substitution.
|
|
9
|
+
*
|
|
10
|
+
* Supports template substitution (applied in order):
|
|
11
|
+
* 1. `bindings` — any provided variable bindings
|
|
12
|
+
* 2. `#subjectFilter#` — replaced with the distribution's subject filter or dataset's subjectFilter
|
|
13
|
+
* 3. `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
14
|
+
* 4. `?dataset` — replaced with the dataset IRI
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* const executor = new SparqlConstructExecutor({
|
|
19
|
+
* query: 'CONSTRUCT { ?dataset ?p ?o } WHERE { ?s ?p ?o }',
|
|
20
|
+
* });
|
|
21
|
+
* const result = await executor.execute(dataset);
|
|
22
|
+
* if (result instanceof NotSupported) {
|
|
23
|
+
* console.log(result.message);
|
|
24
|
+
* } else {
|
|
25
|
+
* for await (const quad of result) {
|
|
26
|
+
* console.log(quad);
|
|
27
|
+
* }
|
|
28
|
+
* }
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
export class SparqlConstructExecutor {
|
|
32
|
+
query;
|
|
33
|
+
fetcher;
|
|
34
|
+
constructor(options) {
|
|
35
|
+
this.query = options.query;
|
|
36
|
+
this.fetcher =
|
|
37
|
+
options.fetcher ??
|
|
38
|
+
new SparqlEndpointFetcher({
|
|
39
|
+
timeout: options.timeout ?? 300_000,
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Execute the SPARQL CONSTRUCT query against the dataset's SPARQL endpoint.
|
|
44
|
+
*
|
|
45
|
+
* @param dataset The dataset to execute against.
|
|
46
|
+
* @param options Optional endpoint override and variable bindings.
|
|
47
|
+
* @returns AsyncIterable<Quad> stream of results, or NotSupported if no SPARQL endpoint available.
|
|
48
|
+
*/
|
|
49
|
+
async execute(dataset, options) {
|
|
50
|
+
const distribution = dataset.getSparqlDistribution();
|
|
51
|
+
let endpoint = options?.endpoint;
|
|
52
|
+
if (endpoint === undefined) {
|
|
53
|
+
if (distribution === null || !distribution.isValid) {
|
|
54
|
+
return new NotSupported('No SPARQL distribution available');
|
|
55
|
+
}
|
|
56
|
+
endpoint = distribution.accessUrl;
|
|
57
|
+
}
|
|
58
|
+
let query = this.query;
|
|
59
|
+
// Apply bindings first.
|
|
60
|
+
if (options?.bindings) {
|
|
61
|
+
for (const [variable, value] of Object.entries(options.bindings)) {
|
|
62
|
+
query = query.replaceAll(variable, value);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
query = this.substituteTemplates(query, distribution, dataset);
|
|
66
|
+
return await this.fetcher.fetchTriples(endpoint.toString(), query);
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Substitute template variables in the query.
|
|
70
|
+
*/
|
|
71
|
+
substituteTemplates(query, distribution, dataset) {
|
|
72
|
+
return substituteQueryTemplates(query, distribution, dataset);
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Create an executor from a query file.
|
|
76
|
+
*
|
|
77
|
+
* @param filename Path to the query file.
|
|
78
|
+
* @param options Optional executor options (timeout, fetcher).
|
|
79
|
+
*/
|
|
80
|
+
static async fromFile(filename, options) {
|
|
81
|
+
const query = await readQueryFile(filename);
|
|
82
|
+
return new SparqlConstructExecutor({ ...options, query });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Substitute template variables in a SPARQL query.
|
|
87
|
+
*
|
|
88
|
+
* - `#subjectFilter#` — replaced with the distribution's or dataset's subject filter
|
|
89
|
+
* - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
90
|
+
* - `?dataset` — replaced with the dataset IRI
|
|
91
|
+
*/
|
|
92
|
+
export function substituteQueryTemplates(query, distribution, dataset) {
|
|
93
|
+
const subjectFilter = distribution?.subjectFilter ?? dataset.subjectFilter ?? '';
|
|
94
|
+
const namedGraph = distribution?.namedGraph
|
|
95
|
+
? `FROM <${distribution.namedGraph}>`
|
|
96
|
+
: '';
|
|
97
|
+
return query
|
|
98
|
+
.replace('#subjectFilter#', subjectFilter)
|
|
99
|
+
.replaceAll('?dataset', `<${dataset.iri}>`)
|
|
100
|
+
.replace('#namedGraph#', namedGraph);
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Read a SPARQL query from a file.
|
|
104
|
+
*/
|
|
105
|
+
export async function readQueryFile(filename) {
|
|
106
|
+
return (await readFile(resolve(filename))).toString();
|
|
107
|
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export { SparqlConstructExecutor, substituteQueryTemplates, NotSupported, readQueryFile, type ExecutableDataset, type ExecuteOptions, type SparqlConstructExecutorOptions, type QuadStream, } from './executor.js';
|
|
2
|
+
export { collect } from './collect.js';
|
|
3
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,wBAAwB,EACxB,YAAY,EACZ,aAAa,EACb,KAAK,iBAAiB,EACtB,KAAK,cAAc,EACnB,KAAK,8BAA8B,EACnC,KAAK,UAAU,GAChB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DataEmittingStep
|
|
1
|
+
import { DataEmittingStep } from './../step.js';
|
|
2
2
|
import { Dataset } from '@lde/dataset';
|
|
3
3
|
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
4
4
|
/**
|
|
@@ -14,15 +14,20 @@ export interface Args {
|
|
|
14
14
|
fetcher?: SparqlEndpointFetcher;
|
|
15
15
|
}
|
|
16
16
|
/**
|
|
17
|
-
* Executes a SPARQL CONSTRUCT query and emits the resulting
|
|
17
|
+
* Executes a SPARQL CONSTRUCT query and emits the resulting quads.
|
|
18
|
+
*
|
|
19
|
+
* This step wraps the SparqlConstructExecutor to provide the DataEmittingStep interface
|
|
20
|
+
* for use in pipelines.
|
|
18
21
|
*/
|
|
19
22
|
export declare class SparqlQuery implements DataEmittingStep {
|
|
20
23
|
readonly identifier: string;
|
|
21
|
-
private readonly
|
|
22
|
-
private readonly fetcher;
|
|
24
|
+
private readonly executor;
|
|
23
25
|
constructor({ identifier, query, fetcher }: Args);
|
|
24
|
-
execute(dataset: Dataset): Promise<
|
|
26
|
+
execute(dataset: Dataset): Promise<import("./../step.js").NotSupported | import("../sparql/executor.js").QuadStream>;
|
|
25
27
|
static fromFile(filename: string): Promise<SparqlQuery>;
|
|
26
28
|
}
|
|
29
|
+
/**
|
|
30
|
+
* @deprecated Use readQueryFile from '@lde/pipeline/sparql' instead.
|
|
31
|
+
*/
|
|
27
32
|
export declare function fromFile(filename: string): Promise<string>;
|
|
28
33
|
//# sourceMappingURL=sparqlQuery.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparqlQuery.d.ts","sourceRoot":"","sources":["../../src/step/sparqlQuery.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,
|
|
1
|
+
{"version":3,"file":"sparqlQuery.d.ts","sourceRoot":"","sources":["../../src/step/sparqlQuery.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAG9D;;;;;;GAMG;AACH,MAAM,WAAW,IAAI;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;GAKG;AACH,qBAAa,WAAY,YAAW,gBAAgB;IAClD,SAAgB,UAAU,SAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA0B;gBAEvC,EAAE,UAAU,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,IAAI;IAQ1C,OAAO,CAAC,OAAO,EAAE,OAAO;WAIV,QAAQ,CAAC,QAAQ,EAAE,MAAM;CAM9C;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAAC,QAAQ,EAAE,MAAM,mBAE9C"}
|