@lde/pipeline 0.6.20 → 0.6.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,13 +10,37 @@ Framework for building RDF data processing pipelines with SPARQL.
10
10
  - **SparqlConstructExecutor** — streaming SPARQL CONSTRUCT with template substitution and variable bindings
11
11
  - **Distribution analysis** — probe and analyze dataset distributions
12
12
 
13
- ## Subpath exports
13
+ ## Components
14
14
 
15
- | Export | Description |
16
- | ------------------------ | ------------------------------------------------------------- |
17
- | `@lde/pipeline` | Steps, pipeline, builder, config, SPARQL |
18
- | `@lde/pipeline/analyzer` | Analyzer contracts (`Analyzer`, `BaseAnalyzer`, result types) |
19
- | `@lde/pipeline/writer` | Write RDF data to files or SPARQL endpoints |
15
+ A **Pipeline** consists of:
16
+
17
+ - one **[Dataset Selector](#dataset-selector)**
18
+ - one **[Distribution Resolver](#distribution-resolver)** that resolves the input dataset to a usable SPARQL distribution
19
+ - one or more **Stages**, each consisting of:
20
+ - an optional **Selector** that filters resources
21
+ - one or more **Executors** that generate triples for each selected resource
22
+
23
+ ### Dataset Selector
24
+
25
+ Selects datasets, either manually by the user or dynamically by querying a DCAT Dataset Registry.
26
+
27
+ ### Distribution Resolver
28
+
29
+ Resolves each selected dataset to a usable distribution.
30
+
31
+ #### SPARQL Distribution Resolver
32
+
33
+ If a working SPARQL endpoint is already available for the dataset, that is used.
34
+ If not, and a valid RDF datadump is available, that is imported to a local SPARQL server.
35
+
36
+ #### Other Distribution Resolvers
37
+
38
+ ### Bindings Selector
39
+
40
+ Selects resources from the dataset and to fan out queries per result in the executor.
41
+ Bindings are free, and replaced with `VALUES { ... }`.
42
+
43
+ ### Executor
20
44
 
21
45
  ## Usage
22
46
 
package/dist/index.d.ts CHANGED
@@ -8,4 +8,5 @@ export * from './builder.js';
8
8
  export * from './config.js';
9
9
  export * from './sparql/index.js';
10
10
  export * from './distribution/index.js';
11
+ export * from './writer/index.js';
11
12
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
package/dist/index.js CHANGED
@@ -8,4 +8,4 @@ export * from './builder.js';
8
8
  export * from './config.js';
9
9
  export * from './sparql/index.js';
10
10
  export * from './distribution/index.js';
11
- // first test
11
+ export * from './writer/index.js';
@@ -21,7 +21,8 @@ export interface SparqlWriterOptions {
21
21
  /**
22
22
  * Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
23
23
  *
24
- * Each dataset's data is written to a named graph based on the dataset IRI.
24
+ * Clears the named graph before writing, then streams quads in batches
25
+ * to avoid accumulating the entire dataset in memory.
25
26
  */
26
27
  export declare class SparqlUpdateWriter implements Writer {
27
28
  private readonly endpoint;
@@ -29,6 +30,8 @@ export declare class SparqlUpdateWriter implements Writer {
29
30
  private readonly batchSize;
30
31
  constructor(options: SparqlWriterOptions);
31
32
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
33
+ private clearGraph;
32
34
  private insertBatch;
35
+ private executeUpdate;
33
36
  }
34
37
  //# sourceMappingURL=sparqlUpdateWriter.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;GAIG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;IAMlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAkB1D,WAAW;CAmB1B"}
1
+ {"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;IAMlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAS1D,UAAU;YAIV,WAAW;YAOX,aAAa;CAgB5B"}
@@ -1,8 +1,10 @@
1
+ import { batch } from '../batch.js';
1
2
  import { serializeQuads } from './serialize.js';
2
3
  /**
3
4
  * Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
4
5
  *
5
- * Each dataset's data is written to a named graph based on the dataset IRI.
6
+ * Clears the named graph before writing, then streams quads in batches
7
+ * to avoid accumulating the entire dataset in memory.
6
8
  */
7
9
  export class SparqlUpdateWriter {
8
10
  endpoint;
@@ -15,22 +17,19 @@ export class SparqlUpdateWriter {
15
17
  }
16
18
  async write(dataset, quads) {
17
19
  const graphUri = dataset.iri.toString();
18
- const collected = [];
19
- for await (const quad of quads) {
20
- collected.push(quad);
21
- }
22
- if (collected.length === 0) {
23
- return;
24
- }
25
- // Process in batches to avoid hitting endpoint size limits.
26
- for (let i = 0; i < collected.length; i += this.batchSize) {
27
- const batch = collected.slice(i, i + this.batchSize);
28
- await this.insertBatch(graphUri, batch);
20
+ await this.clearGraph(graphUri);
21
+ for await (const chunk of batch(quads, this.batchSize)) {
22
+ await this.insertBatch(graphUri, chunk);
29
23
  }
30
24
  }
25
+ async clearGraph(graphUri) {
26
+ await this.executeUpdate(`CLEAR GRAPH <${graphUri}>`);
27
+ }
31
28
  async insertBatch(graphUri, quads) {
32
29
  const turtleData = await serializeQuads(quads, 'N-Triples');
33
- const query = `INSERT DATA { GRAPH <${graphUri}> { ${turtleData} } }`;
30
+ await this.executeUpdate(`INSERT DATA { GRAPH <${graphUri}> { ${turtleData} } }`);
31
+ }
32
+ async executeUpdate(query) {
34
33
  const response = await this.fetch(this.endpoint.toString(), {
35
34
  method: 'POST',
36
35
  headers: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.6.20",
3
+ "version": "0.6.22",
4
4
  "repository": {
5
5
  "url": "https://github.com/ldengine/lde",
6
6
  "directory": "packages/pipeline"
@@ -13,18 +13,6 @@
13
13
  "import": "./dist/index.js",
14
14
  "development": "./src/index.ts",
15
15
  "default": "./dist/index.js"
16
- },
17
- "./writer": {
18
- "types": "./dist/writer/index.d.ts",
19
- "import": "./dist/writer/index.js",
20
- "development": "./src/writer/index.ts",
21
- "default": "./dist/writer/index.js"
22
- },
23
- "./analyzer": {
24
- "types": "./dist/analyzer.d.ts",
25
- "import": "./dist/analyzer.js",
26
- "development": "./src/analyzer.ts",
27
- "default": "./dist/analyzer.js"
28
16
  }
29
17
  },
30
18
  "main": "./dist/index.js",