npm - @lde/pipeline - Versions diffs - 0.6.20 → 0.6.22 - Mend

@lde/pipeline 0.6.20 → 0.6.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +30 -6
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -1
package/dist/writer/sparqlUpdateWriter.d.ts +4 -1
package/dist/writer/sparqlUpdateWriter.d.ts.map +1 -1
package/dist/writer/sparqlUpdateWriter.js +12 -13
package/package.json +1 -13

package/README.md CHANGED Viewed

@@ -10,13 +10,37 @@ Framework for building RDF data processing pipelines with SPARQL.
 - **SparqlConstructExecutor** — streaming SPARQL CONSTRUCT with template substitution and variable bindings
 - **Distribution analysis** — probe and analyze dataset distributions
-## Subpath exports
+## Components
-| Export                   | Description                                                   |
-| ------------------------ | ------------------------------------------------------------- |
-| `@lde/pipeline`          | Steps, pipeline, builder, config, SPARQL                      |
-| `@lde/pipeline/analyzer` | Analyzer contracts (`Analyzer`, `BaseAnalyzer`, result types) |
-| `@lde/pipeline/writer`   | Write RDF data to files or SPARQL endpoints                   |
+A **Pipeline** consists of:
+- one **[Dataset Selector](#dataset-selector)**
+- one **[Distribution Resolver](#distribution-resolver)** that resolves the input dataset to a usable SPARQL distribution
+- one or more **Stages**, each consisting of:
+  - an optional **Selector** that filters resources
+  - one or more **Executors** that generate triples for each selected resource
+### Dataset Selector
+Selects datasets, either manually by the user or dynamically by querying a DCAT Dataset Registry.
+### Distribution Resolver
+Resolves each selected dataset to a usable distribution.
+#### SPARQL Distribution Resolver
+If a working SPARQL endpoint is already available for the dataset, that is used.
+If not, and a valid RDF datadump is available, that is imported to a local SPARQL server.
+#### Other Distribution Resolvers
+### Bindings Selector
+Selects resources from the dataset and to fan out queries per result in the executor.
+Bindings are free, and replaced with `VALUES { ... }`.
+### Executor
 ## Usage

package/dist/index.d.ts CHANGED Viewed

@@ -8,4 +8,5 @@ export * from './builder.js';
 export * from './config.js';
 export * from './sparql/index.js';
 export * from './distribution/index.js';
+export * from './writer/index.js';
 //# sourceMappingURL=index.d.ts.map

package/dist/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}

package/dist/index.js CHANGED Viewed

@@ -8,4 +8,4 @@ export * from './builder.js';
 export * from './config.js';
 export * from './sparql/index.js';
 export * from './distribution/index.js';
-// first test
+export * from './writer/index.js';

package/dist/writer/sparqlUpdateWriter.d.ts CHANGED Viewed

@@ -21,7 +21,8 @@ export interface SparqlWriterOptions {
 /**
  * Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
  *
- * Each dataset's data is written to a named graph based on the dataset IRI.
+ * Clears the named graph before writing, then streams quads in batches
+ * to avoid accumulating the entire dataset in memory.
  */
 export declare class SparqlUpdateWriter implements Writer {
     private readonly endpoint;
@@ -29,6 +30,8 @@ export declare class SparqlUpdateWriter implements Writer {
     private readonly batchSize;
     constructor(options: SparqlWriterOptions);
     write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
+    private clearGraph;
     private insertBatch;
+    private executeUpdate;
 }
 //# sourceMappingURL=sparqlUpdateWriter.d.ts.map

package/dist/writer/sparqlUpdateWriter.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;~~AACzC~~,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED~~;;;;GAIG~~;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;IAMlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;~~YAkB1D~~,WAAW;~~CAmB1B~~"}
1	+ {"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;IAMlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAS1D,UAAU;YAIV,WAAW;YAOX,aAAa;CAgB5B"}

package/dist/writer/sparqlUpdateWriter.js CHANGED Viewed

@@ -1,8 +1,10 @@
+import { batch } from '../batch.js';
 import { serializeQuads } from './serialize.js';
 /**
  * Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
  *
- * Each dataset's data is written to a named graph based on the dataset IRI.
+ * Clears the named graph before writing, then streams quads in batches
+ * to avoid accumulating the entire dataset in memory.
  */
 export class SparqlUpdateWriter {
     endpoint;
@@ -15,22 +17,19 @@ export class SparqlUpdateWriter {
     }
     async write(dataset, quads) {
         const graphUri = dataset.iri.toString();
-        const collected = [];
-        for await (const quad of quads) {
-            collected.push(quad);
-        }
-        if (collected.length === 0) {
-            return;
-        }
-        // Process in batches to avoid hitting endpoint size limits.
-        for (let i = 0; i < collected.length; i += this.batchSize) {
-            const batch = collected.slice(i, i + this.batchSize);
-            await this.insertBatch(graphUri, batch);
+        await this.clearGraph(graphUri);
+        for await (const chunk of batch(quads, this.batchSize)) {
+            await this.insertBatch(graphUri, chunk);
         }
     }
+    async clearGraph(graphUri) {
+        await this.executeUpdate(`CLEAR GRAPH <${graphUri}>`);
+    }
     async insertBatch(graphUri, quads) {
         const turtleData = await serializeQuads(quads, 'N-Triples');
-        const query = `INSERT DATA { GRAPH <${graphUri}> { ${turtleData} } }`;
+        await this.executeUpdate(`INSERT DATA { GRAPH <${graphUri}> { ${turtleData} } }`);
+    }
+    async executeUpdate(query) {
         const response = await this.fetch(this.endpoint.toString(), {
             method: 'POST',
             headers: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lde/pipeline",
-  "version": "0.6.20",
+  "version": "0.6.22",
   "repository": {
     "url": "https://github.com/ldengine/lde",
     "directory": "packages/pipeline"
@@ -13,18 +13,6 @@
       "import": "./dist/index.js",
       "development": "./src/index.ts",
       "default": "./dist/index.js"
-    },
-    "./writer": {
-      "types": "./dist/writer/index.d.ts",
-      "import": "./dist/writer/index.js",
-      "development": "./src/writer/index.ts",
-      "default": "./dist/writer/index.js"
-    },
-    "./analyzer": {
-      "types": "./dist/analyzer.d.ts",
-      "import": "./dist/analyzer.js",
-      "development": "./src/analyzer.ts",
-      "default": "./dist/analyzer.js"
     }
   },
   "main": "./dist/index.js",