@lde/pipeline 0.15.2 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/distribution/importResolver.d.ts +20 -4
- package/dist/distribution/importResolver.d.ts.map +1 -1
- package/dist/distribution/importResolver.js +16 -8
- package/dist/pipeline.js +10 -10
- package/dist/progressReporter.d.ts +2 -2
- package/dist/progressReporter.d.ts.map +1 -1
- package/dist/stage.d.ts +1 -1
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +3 -3
- package/package.json +1 -1
|
@@ -4,19 +4,35 @@ import { type DistributionResolver, NoDistributionAvailable, ResolvedDistributio
|
|
|
4
4
|
export interface ImportResolverOptions {
|
|
5
5
|
importer: Importer;
|
|
6
6
|
server: SparqlServer;
|
|
7
|
+
/**
|
|
8
|
+
* Controls how a dataset's distribution is selected.
|
|
9
|
+
*
|
|
10
|
+
* - `'sparql'` (default) — use a dataset's own SPARQL endpoint when one is
|
|
11
|
+
* available; fall back to importing a data dump only when no endpoint
|
|
12
|
+
* responds.
|
|
13
|
+
* - `'import'` — always import a data dump into a local SPARQL server,
|
|
14
|
+
* even when the dataset advertises a working SPARQL endpoint. Useful when
|
|
15
|
+
* the remote endpoint is too slow or unreliable.
|
|
16
|
+
*
|
|
17
|
+
* In both modes the inner resolver still runs so that probe results are
|
|
18
|
+
* collected for reporting and the dataset knowledge graph.
|
|
19
|
+
*/
|
|
20
|
+
strategy?: 'sparql' | 'import';
|
|
7
21
|
}
|
|
8
22
|
/**
|
|
9
|
-
* A {@link DistributionResolver} decorator that adds
|
|
23
|
+
* A {@link DistributionResolver} decorator that adds data-dump import logic.
|
|
10
24
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
25
|
+
* Wraps an inner resolver (typically {@link SparqlDistributionResolver}) and
|
|
26
|
+
* adds the ability to import a data dump into a local SPARQL server. The
|
|
27
|
+
* {@link ImportResolverOptions.strategy | strategy} option controls whether the
|
|
28
|
+
* inner resolver's SPARQL endpoint is preferred or bypassed.
|
|
14
29
|
*/
|
|
15
30
|
export declare class ImportResolver implements DistributionResolver {
|
|
16
31
|
private readonly inner;
|
|
17
32
|
private readonly options;
|
|
18
33
|
constructor(inner: DistributionResolver, options: ImportResolverOptions);
|
|
19
34
|
resolve(...args: Parameters<DistributionResolver['resolve']>): Promise<ResolvedDistribution | NoDistributionAvailable>;
|
|
35
|
+
private importDataset;
|
|
20
36
|
cleanup(): Promise<void>;
|
|
21
37
|
}
|
|
22
38
|
//# sourceMappingURL=importResolver.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAErD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAEvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAErD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAEvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;IACrB;;;;;;;;;;;;OAYG;IACH,QAAQ,CAAC,EAAE,QAAQ,GAAG,QAAQ,CAAC;CAChC;AAED;;;;;;;GAOG;AACH,qBAAa,cAAe,YAAW,oBAAoB;IAEvD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,qBAAqB;IAG3C,OAAO,CACX,GAAG,IAAI,EAAE,UAAU,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,GACnD,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;YAgB5C,aAAa;IAgCrB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
|
|
@@ -2,11 +2,12 @@ import { Distribution } from '@lde/dataset';
|
|
|
2
2
|
import { ImportFailed, ImportSuccessful } from '@lde/sparql-importer';
|
|
3
3
|
import { NoDistributionAvailable, ResolvedDistribution, } from './resolver.js';
|
|
4
4
|
/**
|
|
5
|
-
* A {@link DistributionResolver} decorator that adds
|
|
5
|
+
* A {@link DistributionResolver} decorator that adds data-dump import logic.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
7
|
+
* Wraps an inner resolver (typically {@link SparqlDistributionResolver}) and
|
|
8
|
+
* adds the ability to import a data dump into a local SPARQL server. The
|
|
9
|
+
* {@link ImportResolverOptions.strategy | strategy} option controls whether the
|
|
10
|
+
* inner resolver's SPARQL endpoint is preferred or bypassed.
|
|
10
11
|
*/
|
|
11
12
|
export class ImportResolver {
|
|
12
13
|
inner;
|
|
@@ -16,19 +17,26 @@ export class ImportResolver {
|
|
|
16
17
|
this.options = options;
|
|
17
18
|
}
|
|
18
19
|
async resolve(...args) {
|
|
20
|
+
const [dataset] = args;
|
|
19
21
|
const result = await this.inner.resolve(...args);
|
|
20
|
-
|
|
22
|
+
// 'sparql' strategy (default): use SPARQL endpoint if inner found one.
|
|
23
|
+
if (this.options.strategy !== 'import' &&
|
|
24
|
+
result instanceof ResolvedDistribution) {
|
|
21
25
|
return result;
|
|
22
|
-
|
|
26
|
+
}
|
|
27
|
+
// Either 'import' strategy or inner found nothing: import a data dump.
|
|
28
|
+
return this.importDataset(dataset, result.probeResults);
|
|
29
|
+
}
|
|
30
|
+
async importDataset(dataset, probeResults) {
|
|
23
31
|
const importStart = Date.now();
|
|
24
32
|
const importResult = await this.options.importer.import(dataset);
|
|
25
33
|
if (importResult instanceof ImportSuccessful) {
|
|
26
34
|
await this.options.server.start();
|
|
27
35
|
const distribution = Distribution.sparql(this.options.server.queryEndpoint, importResult.identifier);
|
|
28
36
|
distribution.subjectFilter = importResult.distribution.subjectFilter;
|
|
29
|
-
return new ResolvedDistribution(distribution,
|
|
37
|
+
return new ResolvedDistribution(distribution, probeResults, importResult.distribution, Date.now() - importStart);
|
|
30
38
|
}
|
|
31
|
-
return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available',
|
|
39
|
+
return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available', probeResults, importResult instanceof ImportFailed ? importResult : undefined);
|
|
32
40
|
}
|
|
33
41
|
async cleanup() {
|
|
34
42
|
await this.options.server.stop();
|
package/dist/pipeline.js
CHANGED
|
@@ -115,13 +115,13 @@ export class Pipeline {
|
|
|
115
115
|
async runStage(dataset, distribution, stage) {
|
|
116
116
|
this.reporter?.stageStart?.(stage.name);
|
|
117
117
|
const stageStart = Date.now();
|
|
118
|
-
let
|
|
118
|
+
let itemsProcessed = 0;
|
|
119
119
|
let quadsGenerated = 0;
|
|
120
120
|
const result = await stage.run(dataset, distribution, this.writer, {
|
|
121
|
-
onProgress: (
|
|
122
|
-
|
|
121
|
+
onProgress: (items, quads) => {
|
|
122
|
+
itemsProcessed = items;
|
|
123
123
|
quadsGenerated = quads;
|
|
124
|
-
this.reporter?.stageProgress?.({
|
|
124
|
+
this.reporter?.stageProgress?.({ itemsProcessed, quadsGenerated });
|
|
125
125
|
},
|
|
126
126
|
});
|
|
127
127
|
if (result instanceof NotSupported) {
|
|
@@ -129,7 +129,7 @@ export class Pipeline {
|
|
|
129
129
|
}
|
|
130
130
|
else {
|
|
131
131
|
this.reporter?.stageComplete?.(stage.name, {
|
|
132
|
-
|
|
132
|
+
itemsProcessed,
|
|
133
133
|
quadsGenerated,
|
|
134
134
|
duration: Date.now() - stageStart,
|
|
135
135
|
});
|
|
@@ -170,13 +170,13 @@ export class Pipeline {
|
|
|
170
170
|
async runChainedStage(dataset, distribution, stage, stageWriter) {
|
|
171
171
|
this.reporter?.stageStart?.(stage.name);
|
|
172
172
|
const stageStart = Date.now();
|
|
173
|
-
let
|
|
173
|
+
let itemsProcessed = 0;
|
|
174
174
|
let quadsGenerated = 0;
|
|
175
175
|
const result = await stage.run(dataset, distribution, stageWriter, {
|
|
176
|
-
onProgress: (
|
|
177
|
-
|
|
176
|
+
onProgress: (items, quads) => {
|
|
177
|
+
itemsProcessed = items;
|
|
178
178
|
quadsGenerated = quads;
|
|
179
|
-
this.reporter?.stageProgress?.({
|
|
179
|
+
this.reporter?.stageProgress?.({ itemsProcessed, quadsGenerated });
|
|
180
180
|
},
|
|
181
181
|
});
|
|
182
182
|
if (result instanceof NotSupported) {
|
|
@@ -184,7 +184,7 @@ export class Pipeline {
|
|
|
184
184
|
throw new Error(`Stage '${stage.name}' returned NotSupported in chained mode`);
|
|
185
185
|
}
|
|
186
186
|
this.reporter?.stageComplete?.(stage.name, {
|
|
187
|
-
|
|
187
|
+
itemsProcessed,
|
|
188
188
|
quadsGenerated,
|
|
189
189
|
duration: Date.now() - stageStart,
|
|
190
190
|
});
|
|
@@ -14,11 +14,11 @@ export interface ProgressReporter {
|
|
|
14
14
|
distributionSelected?(dataset: Dataset, distribution: Distribution, importedFrom?: Distribution, importDuration?: number): void;
|
|
15
15
|
stageStart?(stage: string): void;
|
|
16
16
|
stageProgress?(update: {
|
|
17
|
-
|
|
17
|
+
itemsProcessed: number;
|
|
18
18
|
quadsGenerated: number;
|
|
19
19
|
}): void;
|
|
20
20
|
stageComplete?(stage: string, result: {
|
|
21
|
-
|
|
21
|
+
itemsProcessed: number;
|
|
22
22
|
quadsGenerated: number;
|
|
23
23
|
duration: number;
|
|
24
24
|
}): void;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE1D,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,qBAAqB,CAAC,CACpB,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,0BAA0B,EAAE,GACpC,IAAI,CAAC;IACR,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,GACtB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,
|
|
1
|
+
{"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE1D,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,qBAAqB,CAAC,CACpB,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,0BAA0B,EAAE,GACpC,IAAI,CAAC;IACR,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,GACtB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;KACxB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD,eAAe,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACzC,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;CACvD"}
|
package/dist/stage.d.ts
CHANGED
|
@@ -17,7 +17,7 @@ export interface StageOptions {
|
|
|
17
17
|
stages?: Stage[];
|
|
18
18
|
}
|
|
19
19
|
export interface RunOptions {
|
|
20
|
-
onProgress?: (
|
|
20
|
+
onProgress?: (itemsProcessed: number, quadsGenerated: number) => void;
|
|
21
21
|
}
|
|
22
22
|
export declare class Stage {
|
|
23
23
|
readonly name: string;
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CACvE;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAmBjB,eAAe;YA+Gf,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACrE"}
|
package/dist/stage.js
CHANGED
|
@@ -49,7 +49,7 @@ export class Stage {
|
|
|
49
49
|
}
|
|
50
50
|
})();
|
|
51
51
|
const queue = new AsyncQueue();
|
|
52
|
-
let
|
|
52
|
+
let itemsProcessed = 0;
|
|
53
53
|
let quadsGenerated = 0;
|
|
54
54
|
let hasResults = false;
|
|
55
55
|
const dispatch = async () => {
|
|
@@ -88,8 +88,8 @@ export class Stage {
|
|
|
88
88
|
quadsGenerated++;
|
|
89
89
|
}
|
|
90
90
|
}
|
|
91
|
-
|
|
92
|
-
options?.onProgress?.(
|
|
91
|
+
itemsProcessed += bindings.length;
|
|
92
|
+
options?.onProgress?.(itemsProcessed, quadsGenerated);
|
|
93
93
|
})());
|
|
94
94
|
}
|
|
95
95
|
}
|