@directory-builder/core 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/bin/cli.js +7 -0
- package/package.json +2 -1
- package/src/index.js +1 -0
- package/src/lift/xml.sparql +12 -0
- package/src/pipeline/federate.js +2 -2
- package/src/pipeline/ingest.js +2 -2
- package/src/pipeline/steps/fetch.js +2 -0
- package/src/pipeline/steps/lift.js +2 -0
- package/src/pipeline.js +7 -2
- package/src/utils.js +7 -0
- package/src/validate/federation.shacl.ttl +26 -0
- package/src/validate.js +52 -0
- package/test/validate.test.js +14 -0
- package/webapp/src/instanceData.js +2 -2
- package/webapp/src/loadSources.js +2 -2
package/README.md
CHANGED
|
@@ -66,6 +66,10 @@ Each source's `fetch.js` is invoked as `node fetch.js <outDir> <fetchUrl-or-stat
|
|
|
66
66
|
<runParamsJson>` — the JSON holds all `:hasRunParam` values grouped by name;
|
|
67
67
|
each fetcher picks the parameters it needs.
|
|
68
68
|
|
|
69
|
+
A source declared with `:enabled false` stays in the config but is skipped by
|
|
70
|
+
the engines and hidden from the webapp's Sources page — e.g. while its files
|
|
71
|
+
aren't available yet.
|
|
72
|
+
|
|
69
73
|
Engines journal their executed steps as p-plan RDF (`data/ingest/ingest-log.ttl`,
|
|
70
74
|
`data/pipeline/federate-log.ttl`) — evidence of what ran, not a plan.
|
|
71
75
|
|
|
@@ -84,6 +88,11 @@ npx directory-builder webapp build --base /repo/ # production build → weba
|
|
|
84
88
|
`webapp/{content,exporters}/` into `webapp/dist/` next to the bundle —
|
|
85
89
|
`webapp/dist/` is the complete site, ready to publish as-is.
|
|
86
90
|
|
|
91
|
+
The two are independent: the dev server never needs a prior build — `webapp
|
|
92
|
+
build` exists only to produce the deployable. Both show whatever `data/` the
|
|
93
|
+
pipeline last produced, so run the pipeline first (and rebuild before
|
|
94
|
+
publishing, or `dist/` keeps the stale snapshot).
|
|
95
|
+
|
|
87
96
|
For webapp development in this repo:
|
|
88
97
|
|
|
89
98
|
```sh
|
package/bin/cli.js
CHANGED
|
@@ -5,11 +5,13 @@
|
|
|
5
5
|
// directory-builder run the full pipeline (ingest + federate)
|
|
6
6
|
// directory-builder ingest fetch + lift only
|
|
7
7
|
// directory-builder federate clean → map → match → merge → resolve only
|
|
8
|
+
// directory-builder validate check the instance's config ↔ sources/ integrity
|
|
8
9
|
// directory-builder webapp dev server for the instance's webapp
|
|
9
10
|
// directory-builder webapp build [--base /x/] build the webapp → <instance>/webapp/dist/
|
|
10
11
|
|
|
11
12
|
import { webappBuild, webappDev } from "../src/webapp.js"
|
|
12
13
|
import { Pipeline } from "../src/pipeline.js"
|
|
14
|
+
import { validate } from "../src/validate.js"
|
|
13
15
|
|
|
14
16
|
const [cmd = "run", ...rest] = process.argv.slice(2)
|
|
15
17
|
const flag = (name) => {
|
|
@@ -22,6 +24,11 @@ const commands = {
|
|
|
22
24
|
run: () => pipeline.run(),
|
|
23
25
|
ingest: () => pipeline.ingest(),
|
|
24
26
|
federate: () => pipeline.federate(),
|
|
27
|
+
validate: async () => {
|
|
28
|
+
const problems = await validate()
|
|
29
|
+
if (problems.length) { console.error(problems.join("\n")); process.exit(1) }
|
|
30
|
+
console.log("instance valid")
|
|
31
|
+
},
|
|
25
32
|
webapp: () => {
|
|
26
33
|
if (rest[0] && rest[0] !== "build") {
|
|
27
34
|
console.error(`Unknown webapp subcommand "${rest[0]}" — expected "build" or nothing (dev server)`)
|
package/package.json
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@directory-builder/core",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Use-case-agnostic engine for config-driven federation pipelines",
|
|
5
5
|
"author": "Civic Data Lab",
|
|
6
6
|
"repository": "github:foederierter-datenpool/directory-builder-core",
|
|
7
7
|
"license": "MIT",
|
|
8
8
|
"type": "module",
|
|
9
9
|
"scripts": {
|
|
10
|
+
"test": "node --test",
|
|
10
11
|
"example": "cd example && node ../bin/cli.js",
|
|
11
12
|
"webapp": "vite webapp",
|
|
12
13
|
"webapp:build": "vite build webapp"
|
package/src/index.js
CHANGED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
PREFIX xyz: <http://sparql.xyz/facade-x/data/>
|
|
2
|
+
PREFIX fx: <http://sparql.xyz/facade-x/ns/>
|
|
3
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
4
|
+
CONSTRUCT {
|
|
5
|
+
?s ?p ?o
|
|
6
|
+
} WHERE {
|
|
7
|
+
SERVICE <x-sparql-anything:> {
|
|
8
|
+
fx:properties fx:location ?_location ;
|
|
9
|
+
fx:media-type "application/xml" .
|
|
10
|
+
?s ?p ?o .
|
|
11
|
+
}
|
|
12
|
+
}
|
package/src/pipeline/federate.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { newStore, parser as n3Parser, storeFromTurtles } from "@foerderfunke/sem-ops-utils"
|
|
2
|
-
import { CDP,
|
|
2
|
+
import { CDP, enabledSources, parseTtl, PATHS, sourceGraph, sourceName, stepIri, stepJournal } from "../utils.js"
|
|
3
3
|
import { COMMON_PREFIXES, writeTurtleFile } from "./write-turtle.js"
|
|
4
4
|
import { MAPPED_GRAPH, runMap } from "./steps/map.js"
|
|
5
5
|
import { runClean } from "./steps/clean.js"
|
|
@@ -26,7 +26,7 @@ export async function federate(root = process.cwd()) {
|
|
|
26
26
|
const abs = (p) => path.join(root, p)
|
|
27
27
|
const federationTtl = fs.readFileSync(abs(PATHS.federation), "utf8")
|
|
28
28
|
const defStore = storeFromTurtles([federationTtl, fs.readFileSync(abs(PATHS.matchKnowledge), "utf8")])
|
|
29
|
-
const sources =
|
|
29
|
+
const sources = enabledSources(parseTtl(federationTtl))
|
|
30
30
|
|
|
31
31
|
const store = newStore()
|
|
32
32
|
const journal = stepJournal()
|
package/src/pipeline/ingest.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { sparqlSelect, storeFromTurtles } from "@foerderfunke/sem-ops-utils"
|
|
2
|
-
import { CDP,
|
|
2
|
+
import { CDP, enabledSources, parseTtl, PATHS, sourceName, stepJournal } from "../utils.js"
|
|
3
3
|
import { ensureJar, runLift } from "./steps/lift.js"
|
|
4
4
|
import { runFetch } from "./steps/fetch.js"
|
|
5
5
|
import path from "path"
|
|
@@ -30,7 +30,7 @@ export async function ingest(root = process.cwd()) {
|
|
|
30
30
|
if (!facts.has(r.source)) facts.set(r.source, { fetchUrl: r.fetchUrl, format: r.format, params: [] })
|
|
31
31
|
if (r.paramName) facts.get(r.source).params.push([r.paramName, r.paramValue])
|
|
32
32
|
}
|
|
33
|
-
const sources = new Map(
|
|
33
|
+
const sources = new Map(enabledSources(parseTtl(federationTtl)).map((iri) => [iri, facts.get(iri)]))
|
|
34
34
|
for (const [iri, s] of sources) {
|
|
35
35
|
if (!s.format) throw new Error(`${iri} declares no :format (needed to pick the lift query)`)
|
|
36
36
|
}
|
|
@@ -11,6 +11,8 @@ export const runFetch = ({ abs, root }, { name, fetchUrl, paramsJson }) => {
|
|
|
11
11
|
const outDir = PATHS.raw(name)
|
|
12
12
|
const origin = fetchUrl ?? abs(PATHS.staticDir(name))
|
|
13
13
|
console.log(`fetch ${fetchUrl ?? PATHS.staticDir(name)} (params ${paramsJson}) → ${outDir}`)
|
|
14
|
+
// Clear any prior output first, so changed run params (or changed records) can't leave stale files behind
|
|
15
|
+
fs.rmSync(abs(outDir), { recursive: true, force: true })
|
|
14
16
|
fs.mkdirSync(abs(outDir), { recursive: true })
|
|
15
17
|
run("node", [abs(PATHS.fetchScript(name)), abs(outDir), origin, paramsJson])
|
|
16
18
|
const harvest = { time: new Date().toISOString() }
|
|
@@ -49,6 +49,8 @@ export const runLift = ({ abs }, { jar, name, format, params }) => {
|
|
|
49
49
|
const inAbs = abs(PATHS.raw(name))
|
|
50
50
|
const outAbs = abs(PATHS.lifted(name))
|
|
51
51
|
const files = fs.readdirSync(inAbs).filter(f => !f.startsWith(".")).sort()
|
|
52
|
+
// Clear stale lifted files first — the clean step reads every .ttl here.
|
|
53
|
+
fs.rmSync(outAbs, { recursive: true, force: true })
|
|
52
54
|
fs.mkdirSync(outAbs, { recursive: true })
|
|
53
55
|
console.log(`lift ${PATHS.raw(name)} (${files.length} files) → ${PATHS.lifted(name)}`)
|
|
54
56
|
for (const f of files) {
|
package/src/pipeline.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { ingest } from "./pipeline/ingest.js"
|
|
2
2
|
import { federate } from "./pipeline/federate.js"
|
|
3
|
+
import { validate } from "./validate.js"
|
|
3
4
|
|
|
4
5
|
// Programmatic entry: hold the instance root once, run the engines against it.
|
|
5
6
|
// The CLI (bin/cli.js) is this same class with defaults — root = cwd.
|
|
@@ -7,8 +8,12 @@ export class Pipeline {
|
|
|
7
8
|
constructor({ root = process.cwd() } = {}) {
|
|
8
9
|
this.root = root
|
|
9
10
|
}
|
|
10
|
-
|
|
11
|
-
|
|
11
|
+
async validate() {
|
|
12
|
+
const problems = await validate(this.root)
|
|
13
|
+
if (problems.length) throw new Error(`invalid instance at ${this.root}:\n ${problems.join("\n ")}`)
|
|
14
|
+
}
|
|
15
|
+
async ingest() { await this.validate(); return ingest(this.root) }
|
|
16
|
+
async federate() { await this.validate(); return federate(this.root) }
|
|
12
17
|
async run() {
|
|
13
18
|
await this.ingest()
|
|
14
19
|
await this.federate()
|
package/src/utils.js
CHANGED
|
@@ -108,6 +108,13 @@ export const shrink = (iri, prefixMap) => {
|
|
|
108
108
|
export const objectsOf = (quads, predIri) =>
|
|
109
109
|
[...new Set(quads.filter((q) => q.predicate.value === predIri).map((q) => q.object.value))]
|
|
110
110
|
|
|
111
|
+
// The federation's sources minus any switched off with `:enabled false`, in
|
|
112
|
+
// :hasSource declaration order — the source list engines and webapp run on.
|
|
113
|
+
export const enabledSources = (quads) => {
|
|
114
|
+
const disabled = new Set(quads.filter((q) => q.predicate.value === `${CDP}enabled` && q.object.value === "false").map((q) => q.subject.value))
|
|
115
|
+
return objectsOf(quads, `${CDP}hasSource`).filter((iri) => !disabled.has(iri))
|
|
116
|
+
}
|
|
117
|
+
|
|
111
118
|
// Set of subjects typed `rdf:type typeIri`. Iteration order = encounter order.
|
|
112
119
|
export function subjectsOfType(quads, typeIri) {
|
|
113
120
|
const out = new Set()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
@prefix : <https://civic-data.de/pipeline#> .
|
|
2
|
+
@prefix sh: <http://www.w3.org/ns/shacl#> .
|
|
3
|
+
|
|
4
|
+
# The engine's contract for a well-formed federation.ttl
|
|
5
|
+
# expressed as SHACL and enforced by ../validate.js
|
|
6
|
+
|
|
7
|
+
:federationShape a sh:NodeShape ;
|
|
8
|
+
sh:targetNode :federation ;
|
|
9
|
+
sh:property [
|
|
10
|
+
sh:path :hasSource ;
|
|
11
|
+
sh:minCount 1 ;
|
|
12
|
+
sh:nodeKind sh:IRI ;
|
|
13
|
+
sh:message "needs at least one :hasSource"
|
|
14
|
+
] .
|
|
15
|
+
|
|
16
|
+
:sourceShape a sh:NodeShape ;
|
|
17
|
+
sh:targetObjectsOf :hasSource ;
|
|
18
|
+
sh:property [
|
|
19
|
+
sh:path :format ;
|
|
20
|
+
sh:minCount 1 ;
|
|
21
|
+
sh:maxCount 1 ;
|
|
22
|
+
sh:nodeKind sh:IRI ;
|
|
23
|
+
sh:message "needs exactly one :format"
|
|
24
|
+
] .
|
|
25
|
+
|
|
26
|
+
# TODO: add the rest
|
package/src/validate.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { buildValidator, turtleToDataset } from "@foerderfunke/sem-ops-utils"
|
|
2
|
+
import { CDP, objectsOf, parseTtl, PATHS, shrink, sourceName } from "./utils.js"
|
|
3
|
+
import path from "path"
|
|
4
|
+
import fs from "fs"
|
|
5
|
+
|
|
6
|
+
// Instance integrity checks. Each check takes { abs, ttl, quads } (path
|
|
7
|
+
// resolver rooted at the instance, federation.ttl raw + parsed) and returns
|
|
8
|
+
// problem strings. validate() runs them all; empty result = valid. Runs
|
|
9
|
+
// automatically before the engines; `directory-builder validate` triggers it
|
|
10
|
+
// on its own.
|
|
11
|
+
|
|
12
|
+
const checks = [sourcesFoldersInSync, federationConformsToShape]
|
|
13
|
+
|
|
14
|
+
export async function validate(root = process.cwd()) {
|
|
15
|
+
const abs = (p) => path.join(root, p)
|
|
16
|
+
if (!fs.existsSync(abs(PATHS.federation))) return [`${PATHS.federation} missing`]
|
|
17
|
+
const ttl = fs.readFileSync(abs(PATHS.federation), "utf8")
|
|
18
|
+
const ctx = { abs, ttl, quads: parseTtl(ttl) }
|
|
19
|
+
return (await Promise.all(checks.map((check) => check(ctx)))).flat()
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Every :hasSource in federation.ttl has its sources/<name>/ folder with
|
|
23
|
+
// fetch.js + clean.sparql - and no folder exists that the federation doesn't
|
|
24
|
+
// declare. Checks all declared sources, enabled or not: folder presence is a
|
|
25
|
+
// repo-layout contract.
|
|
26
|
+
function sourcesFoldersInSync({ abs, quads }) {
|
|
27
|
+
const declared = objectsOf(quads, `${CDP}hasSource`).map(sourceName)
|
|
28
|
+
const problems = []
|
|
29
|
+
for (const name of declared) {
|
|
30
|
+
for (const file of [PATHS.fetchScript(name), PATHS.cleanQuery(name)]) {
|
|
31
|
+
if (!fs.existsSync(abs(file))) problems.push(`${file} missing`)
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
const folders = fs.existsSync(abs("sources"))
|
|
35
|
+
? fs.readdirSync(abs("sources"), { withFileTypes: true }).filter((d) => d.isDirectory()).map((d) => d.name)
|
|
36
|
+
: []
|
|
37
|
+
for (const name of folders) {
|
|
38
|
+
if (!declared.includes(name)) problems.push(`sources/${name}/ has no :hasSource declaration in ${PATHS.federation}`)
|
|
39
|
+
}
|
|
40
|
+
return problems
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// federation.ttl conforms to the engine's config contract, expressed as SHACL
|
|
44
|
+
// in federation.shacl.ttl next to this file - the shape ships with the
|
|
45
|
+
// package, instances never carry it.
|
|
46
|
+
const validator = buildValidator(fs.readFileSync(path.join(import.meta.dirname, "validate/federation.shacl.ttl"), "utf8"))
|
|
47
|
+
|
|
48
|
+
async function federationConformsToShape({ ttl }) {
|
|
49
|
+
const report = await validator.validate({ dataset: turtleToDataset(ttl) })
|
|
50
|
+
return report.results.map((r) =>
|
|
51
|
+
`${PATHS.federation}: ${shrink(r.focusNode.value, { "": CDP })} ${r.message.map((m) => m.value).join("; ")}`)
|
|
52
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { validate } from "@directory-builder/core"
|
|
2
|
+
import assert from "node:assert/strict"
|
|
3
|
+
import { test } from "node:test"
|
|
4
|
+
import path from "path"
|
|
5
|
+
|
|
6
|
+
const INSTANCE_ROOT = path.join(import.meta.dirname, "../example")
|
|
7
|
+
|
|
8
|
+
// The example instance satisfies the contract validate() enforces: every
|
|
9
|
+
// :hasSource in federation.ttl has its sources/<name>/ folder with fetch.js
|
|
10
|
+
// + clean.sparql, no folder exists that the federation doesn't declare, and
|
|
11
|
+
// federation.ttl conforms to the engine's SHACL shape.
|
|
12
|
+
test("validate() finds no problems in the example instance", async () => {
|
|
13
|
+
assert.deepEqual(await validate(INSTANCE_ROOT), [])
|
|
14
|
+
})
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
// artifact resolves to "" (pages render empty). Top-level await — importing
|
|
7
7
|
// modules stay synchronous.
|
|
8
8
|
|
|
9
|
-
import { CDP, objectsOf, parseTtl, PATHS, prefixesOf, sourceName } from "@directory-builder/core/utils"
|
|
9
|
+
import { CDP, enabledSources, objectsOf, parseTtl, PATHS, prefixesOf, sourceName } from "@directory-builder/core/utils"
|
|
10
10
|
|
|
11
11
|
const fetchText = async (path) => {
|
|
12
12
|
const res = await fetch(`${import.meta.env.BASE_URL}${path}`).catch(() => null)
|
|
@@ -16,7 +16,7 @@ const fetchText = async (path) => {
|
|
|
16
16
|
export const federationTtl = await fetchText(PATHS.federation)
|
|
17
17
|
|
|
18
18
|
const fedQuads = parseTtl(federationTtl)
|
|
19
|
-
const cleanedPaths =
|
|
19
|
+
const cleanedPaths = enabledSources(fedQuads).map((iri) => PATHS.cleaned(sourceName(iri)))
|
|
20
20
|
// The instance's repo URL (:federation :repository …) — undefined when not
|
|
21
21
|
// declared; pages hide their GitHub links then.
|
|
22
22
|
export const repositoryUrl = objectsOf(fedQuads, `${CDP}repository`)[0]
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// Reads: federation, mapped, ingest-log TTL strings passed by Sources.jsx
|
|
4
4
|
// Does: returns source[] ({iri, label, format, totalFields, mappedFields, records, …})
|
|
5
5
|
|
|
6
|
-
import { CDP as NS, formatFamily, parseTtl, PATHS, sourceName
|
|
6
|
+
import { CDP as NS, enabledSources, formatFamily, parseTtl, PATHS, sourceName } from "@directory-builder/core/utils"
|
|
7
7
|
|
|
8
8
|
const PROV_AT_TIME = "http://www.w3.org/ns/prov#atTime"
|
|
9
9
|
const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
@@ -18,7 +18,7 @@ export function loadSources(federationTtl, mappedTtl, ingestLogTtl) {
|
|
|
18
18
|
const mappedQuads = mappedTtl ? parseTtl(mappedTtl) : []
|
|
19
19
|
const logQuads = ingestLogTtl ? parseTtl(ingestLogTtl) : []
|
|
20
20
|
|
|
21
|
-
const sourceIris =
|
|
21
|
+
const sourceIris = new Set(enabledSources(fedQuads))
|
|
22
22
|
|
|
23
23
|
const props = new Map()
|
|
24
24
|
const get = (iri) => {
|