@lde/pipeline 0.6.14 → 0.6.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/distribution/index.d.ts +2 -1
- package/dist/distribution/index.d.ts.map +1 -1
- package/dist/distribution/index.js +2 -1
- package/dist/distribution/probe.d.ts +1 -1
- package/dist/distribution/probe.js +3 -9
- package/dist/distribution/report.d.ts +15 -0
- package/dist/distribution/report.d.ts.map +1 -0
- package/dist/distribution/report.js +64 -0
- package/dist/distribution/resolver.d.ts +35 -0
- package/dist/distribution/resolver.d.ts.map +1 -0
- package/dist/distribution/resolver.js +57 -0
- package/dist/index.js +1 -0
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +2 -1
- package/dist/sparql/executor.d.ts +16 -32
- package/dist/sparql/executor.d.ts.map +1 -1
- package/dist/sparql/executor.js +10 -17
- package/dist/sparql/index.d.ts +1 -1
- package/dist/sparql/index.d.ts.map +1 -1
- package/dist/stage.d.ts +3 -2
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +2 -2
- package/dist/step/sparqlQuery.d.ts +3 -3
- package/dist/step/sparqlQuery.d.ts.map +1 -1
- package/dist/step/sparqlQuery.js +2 -7
- package/dist/step.d.ts +2 -2
- package/dist/step.d.ts.map +1 -1
- package/package.json +1 -1
- package/dist/analyzer.d.ts +0 -36
- package/dist/analyzer.d.ts.map +0 -1
- package/dist/analyzer.js +0 -29
- package/dist/distribution/analyzer.d.ts +0 -58
- package/dist/distribution/analyzer.d.ts.map +0 -1
- package/dist/distribution/analyzer.js +0 -120
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, type ProbeResultType, } from './probe.js';
|
|
2
|
-
export {
|
|
2
|
+
export { probeResultsToQuads } from './report.js';
|
|
3
|
+
export { ResolvedDistribution, NoDistributionAvailable, SparqlDistributionResolver, type DistributionResolver, type SparqlDistributionResolverOptions, } from './resolver.js';
|
|
3
4
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/distribution/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC;AAEpB,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/distribution/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAElD,OAAO,EACL,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,KAAK,oBAAoB,EACzB,KAAK,iCAAiC,GACvC,MAAM,eAAe,CAAC"}
|
|
@@ -1,2 +1,3 @@
|
|
|
1
1
|
export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, } from './probe.js';
|
|
2
|
-
export {
|
|
2
|
+
export { probeResultsToQuads } from './report.js';
|
|
3
|
+
export { ResolvedDistribution, NoDistributionAvailable, SparqlDistributionResolver, } from './resolver.js';
|
|
@@ -40,7 +40,7 @@ export type ProbeResultType = SparqlProbeResult | DataDumpProbeResult | NetworkE
|
|
|
40
40
|
* For SPARQL endpoints, sends a simple SELECT query.
|
|
41
41
|
* For data dumps, sends HEAD (or GET if HEAD returns no Content-Length).
|
|
42
42
|
*
|
|
43
|
-
*
|
|
43
|
+
* Returns pure probe results without mutating the distribution.
|
|
44
44
|
*/
|
|
45
45
|
export declare function probe(distribution: Distribution, timeout?: number): Promise<ProbeResultType>;
|
|
46
46
|
export {};
|
|
@@ -61,7 +61,7 @@ export class DataDumpProbeResult extends ProbeResult {
|
|
|
61
61
|
* For SPARQL endpoints, sends a simple SELECT query.
|
|
62
62
|
* For data dumps, sends HEAD (or GET if HEAD returns no Content-Length).
|
|
63
63
|
*
|
|
64
|
-
*
|
|
64
|
+
* Returns pure probe results without mutating the distribution.
|
|
65
65
|
*/
|
|
66
66
|
export async function probe(distribution, timeout = 5000) {
|
|
67
67
|
try {
|
|
@@ -85,9 +85,7 @@ async function probeSparqlEndpoint(distribution, timeout) {
|
|
|
85
85
|
},
|
|
86
86
|
body: `query=${encodeURIComponent('SELECT * { ?s ?p ?o } LIMIT 1')}`,
|
|
87
87
|
});
|
|
88
|
-
|
|
89
|
-
distribution.isValid = result.isSuccess();
|
|
90
|
-
return result;
|
|
88
|
+
return new SparqlProbeResult(url, response);
|
|
91
89
|
}
|
|
92
90
|
async function probeDataDump(distribution, timeout) {
|
|
93
91
|
const url = distribution.accessUrl.toString();
|
|
@@ -112,9 +110,5 @@ async function probeDataDump(distribution, timeout) {
|
|
|
112
110
|
...requestOptions,
|
|
113
111
|
});
|
|
114
112
|
}
|
|
115
|
-
|
|
116
|
-
distribution.isValid = result.isSuccess();
|
|
117
|
-
distribution.lastModified ??= result.lastModified ?? undefined;
|
|
118
|
-
distribution.byteSize ??= result.contentSize ?? undefined;
|
|
119
|
-
return result;
|
|
113
|
+
return new DataDumpProbeResult(url, response);
|
|
120
114
|
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { ImportFailed } from '@lde/sparql-importer';
|
|
2
|
+
import { type Quad } from 'n3';
|
|
3
|
+
import { type ProbeResultType } from './probe.js';
|
|
4
|
+
/**
|
|
5
|
+
* Convert probe results into RDF quads describing each probe as a `schema:Action`.
|
|
6
|
+
*
|
|
7
|
+
* Successful SPARQL probes emit `void:sparqlEndpoint`;
|
|
8
|
+
* successful data-dump probes emit `void:dataDump` with optional metadata.
|
|
9
|
+
* Failed probes emit `schema:error`.
|
|
10
|
+
*
|
|
11
|
+
* When an {@link ImportFailed} is provided its error is attached to the action
|
|
12
|
+
* whose `schema:target` matches the failed distribution's access URL.
|
|
13
|
+
*/
|
|
14
|
+
export declare function probeResultsToQuads(probeResults: ProbeResultType[], datasetIri: string, importResult?: ImportFailed): AsyncIterable<Quad>;
|
|
15
|
+
//# sourceMappingURL=report.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/distribution/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAe,KAAK,IAAI,EAAE,MAAM,IAAI,CAAC;AAC5C,OAAO,EAIL,KAAK,eAAe,EACrB,MAAM,YAAY,CAAC;AAUpB;;;;;;;;;GASG;AACH,wBAAuB,mBAAmB,CACxC,YAAY,EAAE,eAAe,EAAE,EAC/B,UAAU,EAAE,MAAM,EAClB,YAAY,CAAC,EAAE,YAAY,GAC1B,aAAa,CAAC,IAAI,CAAC,CAkCrB"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { DataFactory } from 'n3';
|
|
2
|
+
import { NetworkError, SparqlProbeResult, } from './probe.js';
|
|
3
|
+
const { quad, namedNode, blankNode, literal } = DataFactory;
|
|
4
|
+
const RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
|
|
5
|
+
const SCHEMA = 'https://schema.org/';
|
|
6
|
+
const VOID = 'http://rdfs.org/ns/void#';
|
|
7
|
+
const XSD = 'http://www.w3.org/2001/XMLSchema#';
|
|
8
|
+
const HTTP_STATUS = 'https://www.w3.org/2011/http-statusCodes#';
|
|
9
|
+
/**
|
|
10
|
+
* Convert probe results into RDF quads describing each probe as a `schema:Action`.
|
|
11
|
+
*
|
|
12
|
+
* Successful SPARQL probes emit `void:sparqlEndpoint`;
|
|
13
|
+
* successful data-dump probes emit `void:dataDump` with optional metadata.
|
|
14
|
+
* Failed probes emit `schema:error`.
|
|
15
|
+
*
|
|
16
|
+
* When an {@link ImportFailed} is provided its error is attached to the action
|
|
17
|
+
* whose `schema:target` matches the failed distribution's access URL.
|
|
18
|
+
*/
|
|
19
|
+
export async function* probeResultsToQuads(probeResults, datasetIri, importResult) {
|
|
20
|
+
// Track blank nodes per URL so import errors can reference the right action.
|
|
21
|
+
const actionsByUrl = new Map();
|
|
22
|
+
for (const result of probeResults) {
|
|
23
|
+
const action = blankNode();
|
|
24
|
+
actionsByUrl.set(result.url, action);
|
|
25
|
+
yield quad(action, namedNode(`${RDF}type`), namedNode(`${SCHEMA}Action`));
|
|
26
|
+
yield quad(action, namedNode(`${SCHEMA}target`), namedNode(result.url));
|
|
27
|
+
if (result instanceof NetworkError) {
|
|
28
|
+
yield quad(action, namedNode(`${SCHEMA}error`), literal(result.message));
|
|
29
|
+
}
|
|
30
|
+
else if (result.isSuccess()) {
|
|
31
|
+
yield* successQuads(action, result, datasetIri);
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
// HTTP error
|
|
35
|
+
const statusUri = `${HTTP_STATUS}${result.statusText.replace(/ /g, '')}`;
|
|
36
|
+
yield quad(action, namedNode(`${SCHEMA}error`), namedNode(statusUri));
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
if (importResult) {
|
|
40
|
+
const action = actionsByUrl.get(importResult.distribution.accessUrl.toString());
|
|
41
|
+
if (action) {
|
|
42
|
+
yield quad(action, namedNode(`${SCHEMA}error`), literal(importResult.error));
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
function* successQuads(action, result, datasetIri) {
|
|
47
|
+
const distributionUrl = namedNode(result.url);
|
|
48
|
+
yield quad(action, namedNode(`${SCHEMA}result`), distributionUrl);
|
|
49
|
+
if (result.lastModified) {
|
|
50
|
+
yield quad(distributionUrl, namedNode(`${SCHEMA}dateModified`), literal(result.lastModified.toISOString(), namedNode(`${XSD}dateTime`)));
|
|
51
|
+
}
|
|
52
|
+
if (result instanceof SparqlProbeResult) {
|
|
53
|
+
yield quad(namedNode(datasetIri), namedNode(`${VOID}sparqlEndpoint`), distributionUrl);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
yield quad(namedNode(datasetIri), namedNode(`${VOID}dataDump`), distributionUrl);
|
|
57
|
+
if (result.contentSize) {
|
|
58
|
+
yield quad(distributionUrl, namedNode(`${SCHEMA}contentSize`), literal(result.contentSize.toString()));
|
|
59
|
+
}
|
|
60
|
+
if (result.contentType) {
|
|
61
|
+
yield quad(distributionUrl, namedNode(`${SCHEMA}encodingFormat`), literal(result.contentType));
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
+
import type { Importer } from '@lde/sparql-importer';
|
|
3
|
+
export declare class ResolvedDistribution {
|
|
4
|
+
readonly distribution: Distribution;
|
|
5
|
+
constructor(distribution: Distribution);
|
|
6
|
+
}
|
|
7
|
+
export declare class NoDistributionAvailable {
|
|
8
|
+
readonly dataset: Dataset;
|
|
9
|
+
readonly message: string;
|
|
10
|
+
constructor(dataset: Dataset, message: string);
|
|
11
|
+
}
|
|
12
|
+
export interface DistributionResolver {
|
|
13
|
+
resolve(dataset: Dataset): Promise<ResolvedDistribution | NoDistributionAvailable>;
|
|
14
|
+
}
|
|
15
|
+
export interface SparqlDistributionResolverOptions {
|
|
16
|
+
importer?: Importer;
|
|
17
|
+
timeout?: number;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Resolves a dataset to a usable SPARQL distribution by probing its distributions.
|
|
21
|
+
*
|
|
22
|
+
* 1. Probes all distributions in parallel.
|
|
23
|
+
* 2. Returns the first valid SPARQL endpoint as a `ResolvedDistribution`.
|
|
24
|
+
* 3. If none: tries the importer (if provided) and returns the imported distribution.
|
|
25
|
+
* 4. If nothing works: returns `NoDistributionAvailable`.
|
|
26
|
+
*
|
|
27
|
+
* Does not mutate `dataset.distributions`.
|
|
28
|
+
*/
|
|
29
|
+
export declare class SparqlDistributionResolver implements DistributionResolver {
|
|
30
|
+
private readonly importer?;
|
|
31
|
+
private readonly timeout;
|
|
32
|
+
constructor(options?: SparqlDistributionResolverOptions);
|
|
33
|
+
resolve(dataset: Dataset): Promise<ResolvedDistribution | NoDistributionAvailable>;
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=resolver.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAIrD,qBAAa,oBAAoB;IACnB,QAAQ,CAAC,YAAY,EAAE,YAAY;gBAA1B,YAAY,EAAE,YAAY;CAChD;AAED,qBAAa,uBAAuB;IACtB,QAAQ,CAAC,OAAO,EAAE,OAAO;IAAE,QAAQ,CAAC,OAAO,EAAE,MAAM;gBAA1C,OAAO,EAAE,OAAO,EAAW,OAAO,EAAE,MAAM;CAChE;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,CACL,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;CAC5D;AAED,MAAM,WAAW,iCAAiC;IAChD,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;;GASG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAW;IACrC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,iCAAiC;IAKjD,OAAO,CACX,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;CAsC3D"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { Distribution } from '@lde/dataset';
|
|
2
|
+
import { ImportSuccessful } from '@lde/sparql-importer';
|
|
3
|
+
import { probe, SparqlProbeResult } from './probe.js';
|
|
4
|
+
export class ResolvedDistribution {
|
|
5
|
+
distribution;
|
|
6
|
+
constructor(distribution) {
|
|
7
|
+
this.distribution = distribution;
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
export class NoDistributionAvailable {
|
|
11
|
+
dataset;
|
|
12
|
+
message;
|
|
13
|
+
constructor(dataset, message) {
|
|
14
|
+
this.dataset = dataset;
|
|
15
|
+
this.message = message;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Resolves a dataset to a usable SPARQL distribution by probing its distributions.
|
|
20
|
+
*
|
|
21
|
+
* 1. Probes all distributions in parallel.
|
|
22
|
+
* 2. Returns the first valid SPARQL endpoint as a `ResolvedDistribution`.
|
|
23
|
+
* 3. If none: tries the importer (if provided) and returns the imported distribution.
|
|
24
|
+
* 4. If nothing works: returns `NoDistributionAvailable`.
|
|
25
|
+
*
|
|
26
|
+
* Does not mutate `dataset.distributions`.
|
|
27
|
+
*/
|
|
28
|
+
export class SparqlDistributionResolver {
|
|
29
|
+
importer;
|
|
30
|
+
timeout;
|
|
31
|
+
constructor(options) {
|
|
32
|
+
this.importer = options?.importer;
|
|
33
|
+
this.timeout = options?.timeout ?? 5000;
|
|
34
|
+
}
|
|
35
|
+
async resolve(dataset) {
|
|
36
|
+
const results = await Promise.all(dataset.distributions.map((distribution) => probe(distribution, this.timeout)));
|
|
37
|
+
// Find first valid SPARQL endpoint.
|
|
38
|
+
for (let i = 0; i < dataset.distributions.length; i++) {
|
|
39
|
+
const distribution = dataset.distributions[i];
|
|
40
|
+
const result = results[i];
|
|
41
|
+
if (distribution.isSparql() &&
|
|
42
|
+
result instanceof SparqlProbeResult &&
|
|
43
|
+
result.isSuccess()) {
|
|
44
|
+
return new ResolvedDistribution(distribution);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
// No SPARQL endpoint; try importer if available.
|
|
48
|
+
if (this.importer) {
|
|
49
|
+
const importResult = await this.importer.import(dataset);
|
|
50
|
+
if (importResult instanceof ImportSuccessful) {
|
|
51
|
+
const distribution = Distribution.sparql(importResult.distribution.accessUrl, importResult.identifier);
|
|
52
|
+
return new ResolvedDistribution(distribution);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available');
|
|
56
|
+
}
|
|
57
|
+
}
|
package/dist/index.js
CHANGED
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAc,IAAI,EAAE,MAAM,WAAW,CAAC;AAI7C,qBAAa,QAAQ;IACP,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAN,MAAM,EAAE;QAAE,QAAQ,EAAE,QAAQ,CAAC;QAAC,KAAK,EAAE,IAAI,EAAE,CAAA;KAAE;IAE7D,GAAG;
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAc,IAAI,EAAE,MAAM,WAAW,CAAC;AAI7C,qBAAa,QAAQ;IACP,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAN,MAAM,EAAE;QAAE,QAAQ,EAAE,QAAQ,CAAC;QAAC,KAAK,EAAE,IAAI,EAAE,CAAA;KAAE;IAE7D,GAAG;CAiCjB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -8,8 +8,9 @@ export class Pipeline {
|
|
|
8
8
|
async run() {
|
|
9
9
|
const datasets = await this.config.selector.select();
|
|
10
10
|
for await (const dataset of datasets) {
|
|
11
|
+
const distribution = dataset.getSparqlDistribution() ?? undefined;
|
|
11
12
|
for (const step of this.config.steps) {
|
|
12
|
-
const result = await step.execute(dataset);
|
|
13
|
+
const result = await step.execute(dataset, distribution);
|
|
13
14
|
if (result instanceof NotSupported) {
|
|
14
15
|
console.error(result);
|
|
15
16
|
}
|
|
@@ -6,24 +6,21 @@ import { NotSupported } from '../step.js';
|
|
|
6
6
|
export { NotSupported } from '../step.js';
|
|
7
7
|
/** A single row of variable bindings (variable name → NamedNode). */
|
|
8
8
|
export type VariableBindings = Record<string, NamedNode>;
|
|
9
|
+
export interface ExecuteOptions {
|
|
10
|
+
/**
|
|
11
|
+
* Variable bindings to inject as a VALUES clause into the query.
|
|
12
|
+
* When non-empty, a VALUES block is prepended to the WHERE clause.
|
|
13
|
+
*/
|
|
14
|
+
bindings?: VariableBindings[];
|
|
15
|
+
}
|
|
9
16
|
export interface Executor {
|
|
10
|
-
execute(dataset:
|
|
17
|
+
execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad> | NotSupported>;
|
|
11
18
|
}
|
|
12
19
|
/**
|
|
13
20
|
* A quad stream that is both an RDFJS Stream and Node.js Readable (async iterable).
|
|
14
21
|
* This is the actual return type from SparqlEndpointFetcher.fetchTriples().
|
|
15
22
|
*/
|
|
16
23
|
export type QuadStream = Readable & Stream<Quad>;
|
|
17
|
-
/**
|
|
18
|
-
* Extended dataset with optional SPARQL filtering options.
|
|
19
|
-
*/
|
|
20
|
-
export interface ExecutableDataset extends Dataset {
|
|
21
|
-
/**
|
|
22
|
-
* Optional SPARQL filter clause to restrict analysis to a subset of the data.
|
|
23
|
-
* This is substituted for `#subjectFilter#` in queries.
|
|
24
|
-
*/
|
|
25
|
-
subjectFilter?: string;
|
|
26
|
-
}
|
|
27
24
|
/**
|
|
28
25
|
* Options for SparqlConstructExecutor.
|
|
29
26
|
*/
|
|
@@ -42,20 +39,6 @@ export interface SparqlConstructExecutorOptions {
|
|
|
42
39
|
*/
|
|
43
40
|
fetcher?: SparqlEndpointFetcher;
|
|
44
41
|
}
|
|
45
|
-
/**
|
|
46
|
-
* Options for `execute()`.
|
|
47
|
-
*/
|
|
48
|
-
export interface SparqlConstructExecuteOptions {
|
|
49
|
-
/**
|
|
50
|
-
* Explicit SPARQL endpoint URL. If not provided, uses the dataset's SPARQL distribution.
|
|
51
|
-
*/
|
|
52
|
-
endpoint?: URL;
|
|
53
|
-
/**
|
|
54
|
-
* Variable bindings to inject as a VALUES clause into the query.
|
|
55
|
-
* When non-empty, a VALUES block is prepended to the WHERE clause.
|
|
56
|
-
*/
|
|
57
|
-
bindings?: VariableBindings[];
|
|
58
|
-
}
|
|
59
42
|
/**
|
|
60
43
|
* A streaming SPARQL CONSTRUCT executor that parses the query once (in the
|
|
61
44
|
* constructor) and operates on the AST for graph and VALUES injection.
|
|
@@ -69,7 +52,7 @@ export interface SparqlConstructExecuteOptions {
|
|
|
69
52
|
* const executor = new SparqlConstructExecutor({
|
|
70
53
|
* query: 'CONSTRUCT { ?dataset ?p ?o } WHERE { ?s ?p ?o }',
|
|
71
54
|
* });
|
|
72
|
-
* const result = await executor.execute(dataset);
|
|
55
|
+
* const result = await executor.execute(dataset, distribution);
|
|
73
56
|
* if (result instanceof NotSupported) {
|
|
74
57
|
* console.log(result.message);
|
|
75
58
|
* } else {
|
|
@@ -85,13 +68,14 @@ export declare class SparqlConstructExecutor implements Executor {
|
|
|
85
68
|
private readonly generator;
|
|
86
69
|
constructor(options: SparqlConstructExecutorOptions);
|
|
87
70
|
/**
|
|
88
|
-
* Execute the SPARQL CONSTRUCT query against the
|
|
71
|
+
* Execute the SPARQL CONSTRUCT query against the distribution's endpoint.
|
|
89
72
|
*
|
|
90
73
|
* @param dataset The dataset to execute against.
|
|
91
|
-
* @param
|
|
92
|
-
* @
|
|
74
|
+
* @param distribution The distribution providing the SPARQL endpoint.
|
|
75
|
+
* @param options Optional execution options (bindings).
|
|
76
|
+
* @returns AsyncIterable<Quad> stream of results.
|
|
93
77
|
*/
|
|
94
|
-
execute(dataset:
|
|
78
|
+
execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<QuadStream>;
|
|
95
79
|
/**
|
|
96
80
|
* Create an executor from a query file.
|
|
97
81
|
*
|
|
@@ -103,11 +87,11 @@ export declare class SparqlConstructExecutor implements Executor {
|
|
|
103
87
|
/**
|
|
104
88
|
* Substitute template variables in a SPARQL query.
|
|
105
89
|
*
|
|
106
|
-
* - `#subjectFilter#` — replaced with the distribution's
|
|
90
|
+
* - `#subjectFilter#` — replaced with the distribution's subject filter
|
|
107
91
|
* - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
108
92
|
* - `?dataset` — replaced with the dataset IRI
|
|
109
93
|
*/
|
|
110
|
-
export declare function substituteQueryTemplates(query: string, distribution: Distribution | null, dataset:
|
|
94
|
+
export declare function substituteQueryTemplates(query: string, distribution: Distribution | null, dataset: Dataset): string;
|
|
111
95
|
/**
|
|
112
96
|
* Read a SPARQL query from a file.
|
|
113
97
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAI5C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAK1C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAI5C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAK1C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAiB;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAcnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,CAAC;IAoBtB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;;;;;GAMG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,YAAY,GAAG,IAAI,EACjC,OAAO,EAAE,OAAO,GACf,MAAM,CAWR;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
|
package/dist/sparql/executor.js
CHANGED
|
@@ -2,7 +2,6 @@ import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
|
2
2
|
import { readFile } from 'node:fs/promises';
|
|
3
3
|
import { resolve } from 'node:path';
|
|
4
4
|
import { Generator, Parser } from 'sparqljs';
|
|
5
|
-
import { NotSupported } from '../step.js';
|
|
6
5
|
import { withDefaultGraph } from './graph.js';
|
|
7
6
|
import { injectValues } from './values.js';
|
|
8
7
|
// Re-export for convenience
|
|
@@ -20,7 +19,7 @@ export { NotSupported } from '../step.js';
|
|
|
20
19
|
* const executor = new SparqlConstructExecutor({
|
|
21
20
|
* query: 'CONSTRUCT { ?dataset ?p ?o } WHERE { ?s ?p ?o }',
|
|
22
21
|
* });
|
|
23
|
-
* const result = await executor.execute(dataset);
|
|
22
|
+
* const result = await executor.execute(dataset, distribution);
|
|
24
23
|
* if (result instanceof NotSupported) {
|
|
25
24
|
* console.log(result.message);
|
|
26
25
|
* } else {
|
|
@@ -48,23 +47,17 @@ export class SparqlConstructExecutor {
|
|
|
48
47
|
});
|
|
49
48
|
}
|
|
50
49
|
/**
|
|
51
|
-
* Execute the SPARQL CONSTRUCT query against the
|
|
50
|
+
* Execute the SPARQL CONSTRUCT query against the distribution's endpoint.
|
|
52
51
|
*
|
|
53
52
|
* @param dataset The dataset to execute against.
|
|
54
|
-
* @param
|
|
55
|
-
* @
|
|
53
|
+
* @param distribution The distribution providing the SPARQL endpoint.
|
|
54
|
+
* @param options Optional execution options (bindings).
|
|
55
|
+
* @returns AsyncIterable<Quad> stream of results.
|
|
56
56
|
*/
|
|
57
|
-
async execute(dataset, options) {
|
|
58
|
-
const
|
|
59
|
-
let endpoint = options?.endpoint;
|
|
60
|
-
if (endpoint === undefined) {
|
|
61
|
-
if (distribution === null || !distribution.isValid) {
|
|
62
|
-
return new NotSupported('No SPARQL distribution available');
|
|
63
|
-
}
|
|
64
|
-
endpoint = distribution.accessUrl;
|
|
65
|
-
}
|
|
57
|
+
async execute(dataset, distribution, options) {
|
|
58
|
+
const endpoint = distribution.accessUrl;
|
|
66
59
|
let ast = structuredClone(this.query);
|
|
67
|
-
if (distribution
|
|
60
|
+
if (distribution.namedGraph) {
|
|
68
61
|
withDefaultGraph(ast, distribution.namedGraph);
|
|
69
62
|
}
|
|
70
63
|
const bindings = options?.bindings;
|
|
@@ -89,12 +82,12 @@ export class SparqlConstructExecutor {
|
|
|
89
82
|
/**
|
|
90
83
|
* Substitute template variables in a SPARQL query.
|
|
91
84
|
*
|
|
92
|
-
* - `#subjectFilter#` — replaced with the distribution's
|
|
85
|
+
* - `#subjectFilter#` — replaced with the distribution's subject filter
|
|
93
86
|
* - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
94
87
|
* - `?dataset` — replaced with the dataset IRI
|
|
95
88
|
*/
|
|
96
89
|
export function substituteQueryTemplates(query, distribution, dataset) {
|
|
97
|
-
const subjectFilter = distribution?.subjectFilter ??
|
|
90
|
+
const subjectFilter = distribution?.subjectFilter ?? '';
|
|
98
91
|
const namedGraph = distribution?.namedGraph
|
|
99
92
|
? `FROM <${distribution.namedGraph}>`
|
|
100
93
|
: '';
|
package/dist/sparql/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { SparqlConstructExecutor, substituteQueryTemplates, NotSupported, readQueryFile, type
|
|
1
|
+
export { SparqlConstructExecutor, substituteQueryTemplates, NotSupported, readQueryFile, type ExecuteOptions, type Executor, type SparqlConstructExecutorOptions, type QuadStream, type VariableBindings, } from './executor.js';
|
|
2
2
|
export { collect } from './collect.js';
|
|
3
3
|
export { SparqlSelector, type SparqlSelectorOptions } from './selector.js';
|
|
4
4
|
export { injectValues } from './values.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,wBAAwB,EACxB,YAAY,EACZ,aAAa,EACb,KAAK,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,wBAAwB,EACxB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAEvC,OAAO,EAAE,cAAc,EAAE,KAAK,qBAAqB,EAAE,MAAM,eAAe,CAAC;AAE3E,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/stage.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import { Dataset, Distribution } from '@lde/dataset';
|
|
1
2
|
import type { Quad } from '@rdfjs/types';
|
|
2
|
-
import type {
|
|
3
|
+
import type { Executor, VariableBindings } from './sparql/executor.js';
|
|
3
4
|
import { NotSupported } from './sparql/executor.js';
|
|
4
5
|
export interface StageOptions {
|
|
5
6
|
name: string;
|
|
@@ -11,7 +12,7 @@ export declare class Stage {
|
|
|
11
12
|
private readonly executors;
|
|
12
13
|
private readonly selector?;
|
|
13
14
|
constructor(options: StageOptions);
|
|
14
|
-
run(dataset:
|
|
15
|
+
run(dataset: Dataset, distribution: Distribution): Promise<AsyncIterable<Quad> | NotSupported>;
|
|
15
16
|
private collectBindings;
|
|
16
17
|
}
|
|
17
18
|
/** Stage-level selector that yields variable bindings for use in executor queries. Pagination is an implementation detail. */
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EACV,
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EACV,QAAQ,EAER,gBAAgB,EACjB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAgB;gBAE9B,OAAO,EAAE,YAAY;IAQ3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC;YAyBhC,eAAe;CAW9B;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
|
package/dist/stage.js
CHANGED
|
@@ -10,12 +10,12 @@ export class Stage {
|
|
|
10
10
|
: [options.executors];
|
|
11
11
|
this.selector = options.selector;
|
|
12
12
|
}
|
|
13
|
-
async run(dataset) {
|
|
13
|
+
async run(dataset, distribution) {
|
|
14
14
|
const bindings = await this.collectBindings();
|
|
15
15
|
const executeOptions = bindings.length > 0 ? { bindings } : undefined;
|
|
16
16
|
const streams = [];
|
|
17
17
|
for (const executor of this.executors) {
|
|
18
|
-
const result = await executor.execute(dataset, executeOptions);
|
|
18
|
+
const result = await executor.execute(dataset, distribution, executeOptions);
|
|
19
19
|
if (!(result instanceof NotSupported)) {
|
|
20
20
|
streams.push(result);
|
|
21
21
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { DataEmittingStep
|
|
2
|
-
import { Dataset } from '@lde/dataset';
|
|
1
|
+
import { DataEmittingStep } from './../step.js';
|
|
2
|
+
import { Dataset, Distribution } from '@lde/dataset';
|
|
3
3
|
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
4
4
|
/**
|
|
5
5
|
* Arguments for the SparqlQuery step.
|
|
@@ -25,7 +25,7 @@ export declare class SparqlQuery implements DataEmittingStep {
|
|
|
25
25
|
private readonly query;
|
|
26
26
|
private readonly fetcher?;
|
|
27
27
|
constructor({ identifier, query, fetcher }: Args);
|
|
28
|
-
execute(dataset: Dataset): Promise<
|
|
28
|
+
execute(dataset: Dataset, distribution: Distribution): Promise<import("../sparql/executor.js").QuadStream>;
|
|
29
29
|
static fromFile(filename: string): Promise<SparqlQuery>;
|
|
30
30
|
}
|
|
31
31
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparqlQuery.d.ts","sourceRoot":"","sources":["../../src/step/sparqlQuery.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,
|
|
1
|
+
{"version":3,"file":"sparqlQuery.d.ts","sourceRoot":"","sources":["../../src/step/sparqlQuery.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAO9D;;;;;;GAMG;AACH,MAAM,WAAW,IAAI;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;GAMG;AACH,qBAAa,WAAY,YAAW,gBAAgB;IAClD,SAAgB,UAAU,SAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAwB;gBAErC,EAAE,UAAU,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,IAAI;IAM1C,OAAO,CAAC,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,YAAY;WAatC,QAAQ,CAAC,QAAQ,EAAE,MAAM;CAM9C;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAAC,QAAQ,EAAE,MAAM,mBAE9C"}
|
package/dist/step/sparqlQuery.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { NotSupported } from './../step.js';
|
|
2
1
|
import { SparqlConstructExecutor, substituteQueryTemplates, readQueryFile, } from '../sparql/index.js';
|
|
3
2
|
/**
|
|
4
3
|
* Executes a SPARQL CONSTRUCT query and emits the resulting quads.
|
|
@@ -16,17 +15,13 @@ export class SparqlQuery {
|
|
|
16
15
|
this.query = query;
|
|
17
16
|
this.fetcher = fetcher;
|
|
18
17
|
}
|
|
19
|
-
async execute(dataset) {
|
|
20
|
-
const distribution = dataset.getSparqlDistribution();
|
|
21
|
-
if (distribution === null || !distribution.isValid) {
|
|
22
|
-
return new NotSupported('No SPARQL distribution available');
|
|
23
|
-
}
|
|
18
|
+
async execute(dataset, distribution) {
|
|
24
19
|
const substituted = substituteQueryTemplates(this.query, distribution, dataset);
|
|
25
20
|
const executor = new SparqlConstructExecutor({
|
|
26
21
|
query: substituted,
|
|
27
22
|
fetcher: this.fetcher,
|
|
28
23
|
});
|
|
29
|
-
return await executor.execute(dataset);
|
|
24
|
+
return await executor.execute(dataset, distribution);
|
|
30
25
|
}
|
|
31
26
|
static async fromFile(filename) {
|
|
32
27
|
return new this({
|
package/dist/step.d.ts
CHANGED
|
@@ -9,13 +9,13 @@ export type Step = DataEmittingStep | SingleStep;
|
|
|
9
9
|
* Failure is expressed by emitting an error event; success by the end event.
|
|
10
10
|
*/
|
|
11
11
|
export interface DataEmittingStep extends AbstractStep {
|
|
12
|
-
execute(dataset: Dataset): Promise<Stream | NotSupported>;
|
|
12
|
+
execute(dataset: Dataset, distribution: Distribution): Promise<Stream | NotSupported>;
|
|
13
13
|
}
|
|
14
14
|
/**
|
|
15
15
|
* A pipeline step that executes an operation without emitting data.
|
|
16
16
|
*/
|
|
17
17
|
export interface SingleStep extends AbstractStep {
|
|
18
|
-
execute(dataset: Dataset): Promise<NotSupported | Failure | Success>;
|
|
18
|
+
execute(dataset: Dataset, distribution?: Distribution): Promise<NotSupported | Failure | Success>;
|
|
19
19
|
}
|
|
20
20
|
export interface Finishable {
|
|
21
21
|
finish(): Promise<void>;
|
package/dist/step.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"step.d.ts","sourceRoot":"","sources":["../src/step.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAE3C,UAAU,YAAY;IACpB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,MAAM,IAAI,GAAG,gBAAgB,GAAG,UAAU,CAAC;AAEjD;;;GAGG;AACH,MAAM,WAAW,gBAAiB,SAAQ,YAAY;IACpD,OAAO,
|
|
1
|
+
{"version":3,"file":"step.d.ts","sourceRoot":"","sources":["../src/step.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAE3C,UAAU,YAAY;IACpB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,MAAM,IAAI,GAAG,gBAAgB,GAAG,UAAU,CAAC;AAEjD;;;GAGG;AACH,MAAM,WAAW,gBAAiB,SAAQ,YAAY;IACpD,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,MAAM,GAAG,YAAY,CAAC,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC9C,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,CAAC,EAAE,YAAY,GAC1B,OAAO,CAAC,YAAY,GAAG,OAAO,GAAG,OAAO,CAAC,CAAC;CAC9C;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACzB;AAED;;;;;GAKG;AACH,qBAAa,OAAO;aAEA,YAAY,EAAE,YAAY;aAC1B,OAAO,CAAC,EAAE,MAAM;gBADhB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,MAAM,YAAA;CAEnC;AAED;;;;;GAKG;AACH,qBAAa,OAAO;aAEA,OAAO,EAAE,OAAO;aAChB,YAAY,EAAE,YAAY;gBAD1B,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY;CAE7C;AAED;;;;GAIG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C"}
|
package/package.json
CHANGED
package/dist/analyzer.d.ts
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
import { Dataset } from '@lde/dataset';
|
|
2
|
-
import type { DatasetCore } from '@rdfjs/types';
|
|
3
|
-
import { NotSupported } from './step.js';
|
|
4
|
-
export { NotSupported } from './step.js';
|
|
5
|
-
/**
|
|
6
|
-
* Result of a successful analysis.
|
|
7
|
-
*/
|
|
8
|
-
export declare class Success {
|
|
9
|
-
readonly data: DatasetCore;
|
|
10
|
-
constructor(data: DatasetCore);
|
|
11
|
-
}
|
|
12
|
-
/**
|
|
13
|
-
* Analysis failed.
|
|
14
|
-
*/
|
|
15
|
-
export declare class Failure {
|
|
16
|
-
readonly endpoint: URL;
|
|
17
|
-
readonly message?: string | undefined;
|
|
18
|
-
constructor(endpoint: URL, message?: string | undefined);
|
|
19
|
-
}
|
|
20
|
-
/**
|
|
21
|
-
* Interface for analyzers.
|
|
22
|
-
*/
|
|
23
|
-
export interface Analyzer {
|
|
24
|
-
readonly name: string;
|
|
25
|
-
execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
|
|
26
|
-
finish?(): Promise<void>;
|
|
27
|
-
}
|
|
28
|
-
/**
|
|
29
|
-
* Base class for analyzers with default implementations.
|
|
30
|
-
*/
|
|
31
|
-
export declare abstract class BaseAnalyzer implements Analyzer {
|
|
32
|
-
abstract readonly name: string;
|
|
33
|
-
abstract execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
|
|
34
|
-
finish(): Promise<void>;
|
|
35
|
-
}
|
|
36
|
-
//# sourceMappingURL=analyzer.d.ts.map
|
package/dist/analyzer.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"analyzer.d.ts","sourceRoot":"","sources":["../src/analyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC;;GAEG;AACH,qBAAa,OAAO;aACU,IAAI,EAAE,WAAW;gBAAjB,IAAI,EAAE,WAAW;CAC9C;AAED;;GAEG;AACH,qBAAa,OAAO;aAEA,QAAQ,EAAE,GAAG;aACb,OAAO,CAAC,EAAE,MAAM;gBADhB,QAAQ,EAAE,GAAG,EACb,OAAO,CAAC,EAAE,MAAM,YAAA;CAEnC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,GAAG,YAAY,CAAC,CAAC;IACrE,MAAM,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;GAEG;AACH,8BAAsB,YAAa,YAAW,QAAQ;IACpD,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,GAAG,YAAY,CAAC;IAEvE,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;CAG9B"}
|
package/dist/analyzer.js
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
export { NotSupported } from './step.js';
|
|
2
|
-
/**
|
|
3
|
-
* Result of a successful analysis.
|
|
4
|
-
*/
|
|
5
|
-
export class Success {
|
|
6
|
-
data;
|
|
7
|
-
constructor(data) {
|
|
8
|
-
this.data = data;
|
|
9
|
-
}
|
|
10
|
-
}
|
|
11
|
-
/**
|
|
12
|
-
* Analysis failed.
|
|
13
|
-
*/
|
|
14
|
-
export class Failure {
|
|
15
|
-
endpoint;
|
|
16
|
-
message;
|
|
17
|
-
constructor(endpoint, message) {
|
|
18
|
-
this.endpoint = endpoint;
|
|
19
|
-
this.message = message;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
/**
|
|
23
|
-
* Base class for analyzers with default implementations.
|
|
24
|
-
*/
|
|
25
|
-
export class BaseAnalyzer {
|
|
26
|
-
async finish() {
|
|
27
|
-
// Default no-op implementation.
|
|
28
|
-
}
|
|
29
|
-
}
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import { Dataset } from '@lde/dataset';
|
|
2
|
-
import { Importer, ImportFailed, ImportSuccessful, NotSupported } from '@lde/sparql-importer';
|
|
3
|
-
import { Store } from 'n3';
|
|
4
|
-
export type { Importer };
|
|
5
|
-
export { ImportFailed, ImportSuccessful, NotSupported };
|
|
6
|
-
/**
|
|
7
|
-
* Extended importer interface with optional cleanup method.
|
|
8
|
-
*/
|
|
9
|
-
export interface ImporterWithFinish extends Importer {
|
|
10
|
-
finish?(): Promise<void>;
|
|
11
|
-
}
|
|
12
|
-
export interface DistributionAnalyzerOptions {
|
|
13
|
-
/**
|
|
14
|
-
* Optional importer for loading data dumps when no SPARQL endpoint is available.
|
|
15
|
-
*/
|
|
16
|
-
importer?: ImporterWithFinish;
|
|
17
|
-
/**
|
|
18
|
-
* Timeout for probe requests in milliseconds.
|
|
19
|
-
* @default 5000
|
|
20
|
-
*/
|
|
21
|
-
timeout?: number;
|
|
22
|
-
}
|
|
23
|
-
/**
|
|
24
|
-
* Result indicating the analyzer could not find a usable distribution.
|
|
25
|
-
*/
|
|
26
|
-
export declare class NoDistributionAvailable {
|
|
27
|
-
readonly message: string;
|
|
28
|
-
constructor(message: string);
|
|
29
|
-
}
|
|
30
|
-
/**
|
|
31
|
-
* Analyzes dataset distributions by probing their availability.
|
|
32
|
-
*
|
|
33
|
-
* - Probes SPARQL endpoints with a simple SELECT query
|
|
34
|
-
* - Probes data dumps with HEAD/GET requests
|
|
35
|
-
* - Records probe results as RDF (schema:Action)
|
|
36
|
-
* - Updates distribution metadata (isValid, lastModified, byteSize)
|
|
37
|
-
* - Optionally imports data dumps if no SPARQL endpoint is available
|
|
38
|
-
*/
|
|
39
|
-
export declare class DistributionAnalyzer {
|
|
40
|
-
readonly name = "distribution";
|
|
41
|
-
private readonly importer?;
|
|
42
|
-
private readonly timeout;
|
|
43
|
-
constructor(options?: DistributionAnalyzerOptions);
|
|
44
|
-
/**
|
|
45
|
-
* Analyze all distributions of a dataset.
|
|
46
|
-
*
|
|
47
|
-
* @returns Store with probe results as RDF, or NoDistributionAvailable if no usable distribution found
|
|
48
|
-
*/
|
|
49
|
-
execute(dataset: Dataset): Promise<Store | NoDistributionAvailable>;
|
|
50
|
-
/**
|
|
51
|
-
* Cleanup resources (e.g., importer connections).
|
|
52
|
-
*/
|
|
53
|
-
finish(): Promise<void>;
|
|
54
|
-
private buildProbeResultsRdf;
|
|
55
|
-
private addSuccessTriples;
|
|
56
|
-
private addImportError;
|
|
57
|
-
}
|
|
58
|
-
//# sourceMappingURL=analyzer.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"analyzer.d.ts","sourceRoot":"","sources":["../../src/distribution/analyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAe,KAAK,EAAE,MAAM,IAAI,CAAC;AASxC,YAAY,EAAE,QAAQ,EAAE,CAAC;AACzB,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,YAAY,EAAE,CAAC;AAWxD;;GAEG;AACH,MAAM,WAAW,kBAAmB,SAAQ,QAAQ;IAClD,MAAM,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED,MAAM,WAAW,2BAA2B;IAC1C;;OAEG;IACH,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAE9B;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,qBAAa,uBAAuB;aACN,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED;;;;;;;;GAQG;AACH,qBAAa,oBAAoB;IAC/B,SAAgB,IAAI,kBAAkB;IACtC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAqB;IAC/C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,2BAA2B;IAKjD;;;;OAIG;IACG,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,KAAK,GAAG,uBAAuB,CAAC;IAmCzE;;OAEG;IACG,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,OAAO,CAAC,oBAAoB;IAwC5B,OAAO,CAAC,iBAAiB;IAiDzB,OAAO,CAAC,cAAc;CAevB"}
|
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
import { Distribution } from '@lde/dataset';
|
|
2
|
-
import { ImportFailed, ImportSuccessful, NotSupported, } from '@lde/sparql-importer';
|
|
3
|
-
import { DataFactory, Store } from 'n3';
|
|
4
|
-
import { probe, NetworkError, SparqlProbeResult, } from './probe.js';
|
|
5
|
-
export { ImportFailed, ImportSuccessful, NotSupported };
|
|
6
|
-
const { quad, namedNode, blankNode, literal } = DataFactory;
|
|
7
|
-
// Namespace prefixes
|
|
8
|
-
const RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
|
|
9
|
-
const SCHEMA = 'https://schema.org/';
|
|
10
|
-
const VOID = 'http://rdfs.org/ns/void#';
|
|
11
|
-
const XSD = 'http://www.w3.org/2001/XMLSchema#';
|
|
12
|
-
const HTTP_STATUS = 'https://www.w3.org/2011/http-statusCodes#';
|
|
13
|
-
/**
|
|
14
|
-
* Result indicating the analyzer could not find a usable distribution.
|
|
15
|
-
*/
|
|
16
|
-
export class NoDistributionAvailable {
|
|
17
|
-
message;
|
|
18
|
-
constructor(message) {
|
|
19
|
-
this.message = message;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
/**
|
|
23
|
-
* Analyzes dataset distributions by probing their availability.
|
|
24
|
-
*
|
|
25
|
-
* - Probes SPARQL endpoints with a simple SELECT query
|
|
26
|
-
* - Probes data dumps with HEAD/GET requests
|
|
27
|
-
* - Records probe results as RDF (schema:Action)
|
|
28
|
-
* - Updates distribution metadata (isValid, lastModified, byteSize)
|
|
29
|
-
* - Optionally imports data dumps if no SPARQL endpoint is available
|
|
30
|
-
*/
|
|
31
|
-
export class DistributionAnalyzer {
|
|
32
|
-
name = 'distribution';
|
|
33
|
-
importer;
|
|
34
|
-
timeout;
|
|
35
|
-
constructor(options) {
|
|
36
|
-
this.importer = options?.importer;
|
|
37
|
-
this.timeout = options?.timeout ?? 5000;
|
|
38
|
-
}
|
|
39
|
-
/**
|
|
40
|
-
* Analyze all distributions of a dataset.
|
|
41
|
-
*
|
|
42
|
-
* @returns Store with probe results as RDF, or NoDistributionAvailable if no usable distribution found
|
|
43
|
-
*/
|
|
44
|
-
async execute(dataset) {
|
|
45
|
-
const results = await Promise.all(dataset.distributions.map((distribution) => probe(distribution, this.timeout)));
|
|
46
|
-
const store = this.buildProbeResultsRdf(results, dataset);
|
|
47
|
-
// If no SPARQL endpoint available, try to import a data dump
|
|
48
|
-
if (dataset.getSparqlDistribution() === null && this.importer) {
|
|
49
|
-
const importResult = await this.importer.import(dataset);
|
|
50
|
-
if (importResult instanceof ImportSuccessful) {
|
|
51
|
-
// Add imported SPARQL distribution to dataset so subsequent steps can use it
|
|
52
|
-
const distribution = Distribution.sparql(importResult.distribution.accessUrl, importResult.identifier);
|
|
53
|
-
dataset.distributions.push(distribution);
|
|
54
|
-
}
|
|
55
|
-
else if (importResult instanceof ImportFailed) {
|
|
56
|
-
// Record import error in the store
|
|
57
|
-
this.addImportError(store, importResult);
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
if (dataset.getSparqlDistribution() === null) {
|
|
61
|
-
return new NoDistributionAvailable('No SPARQL endpoint or importable data dump available');
|
|
62
|
-
}
|
|
63
|
-
return store;
|
|
64
|
-
}
|
|
65
|
-
/**
|
|
66
|
-
* Cleanup resources (e.g., importer connections).
|
|
67
|
-
*/
|
|
68
|
-
async finish() {
|
|
69
|
-
await this.importer?.finish?.();
|
|
70
|
-
}
|
|
71
|
-
buildProbeResultsRdf(results, dataset) {
|
|
72
|
-
const store = new Store();
|
|
73
|
-
for (const result of results) {
|
|
74
|
-
const action = blankNode();
|
|
75
|
-
// Base action triples
|
|
76
|
-
store.addQuads([
|
|
77
|
-
quad(action, namedNode(`${RDF}type`), namedNode(`${SCHEMA}Action`)),
|
|
78
|
-
quad(action, namedNode(`${SCHEMA}target`), namedNode(result.url)),
|
|
79
|
-
]);
|
|
80
|
-
if (result instanceof NetworkError) {
|
|
81
|
-
store.addQuad(action, namedNode(`${SCHEMA}error`), literal(result.message));
|
|
82
|
-
}
|
|
83
|
-
else if (result.isSuccess()) {
|
|
84
|
-
this.addSuccessTriples(store, action, result, dataset);
|
|
85
|
-
}
|
|
86
|
-
else {
|
|
87
|
-
// HTTP error
|
|
88
|
-
const statusUri = `${HTTP_STATUS}${result.statusText.replace(/ /g, '')}`;
|
|
89
|
-
store.addQuad(action, namedNode(`${SCHEMA}error`), namedNode(statusUri));
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
return store;
|
|
93
|
-
}
|
|
94
|
-
addSuccessTriples(store, action, result, dataset) {
|
|
95
|
-
const distributionUrl = namedNode(result.url);
|
|
96
|
-
store.addQuad(action, namedNode(`${SCHEMA}result`), distributionUrl);
|
|
97
|
-
if (result.lastModified) {
|
|
98
|
-
store.addQuad(distributionUrl, namedNode(`${SCHEMA}dateModified`), literal(result.lastModified.toISOString(), namedNode(`${XSD}dateTime`)));
|
|
99
|
-
}
|
|
100
|
-
if (result instanceof SparqlProbeResult) {
|
|
101
|
-
store.addQuad(namedNode(dataset.iri.toString()), namedNode(`${VOID}sparqlEndpoint`), distributionUrl);
|
|
102
|
-
}
|
|
103
|
-
else {
|
|
104
|
-
store.addQuad(namedNode(dataset.iri.toString()), namedNode(`${VOID}dataDump`), distributionUrl);
|
|
105
|
-
if (result.contentSize) {
|
|
106
|
-
store.addQuad(distributionUrl, namedNode(`${SCHEMA}contentSize`), literal(result.contentSize));
|
|
107
|
-
}
|
|
108
|
-
if (result.contentType) {
|
|
109
|
-
store.addQuad(distributionUrl, namedNode(`${SCHEMA}encodingFormat`), literal(result.contentType));
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
addImportError(store, importResult) {
|
|
114
|
-
// Find the action for this download URL and add the error
|
|
115
|
-
const matches = store.match(null, namedNode(`${SCHEMA}target`), namedNode(importResult.distribution.accessUrl.toString()));
|
|
116
|
-
for (const match of matches) {
|
|
117
|
-
store.addQuad(match.subject, namedNode(`${SCHEMA}error`), literal(importResult.error));
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
}
|