@directory-builder/core 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/package.json +1 -1
- package/src/lift/xml.sparql +12 -0
- package/src/pipeline/federate.js +2 -2
- package/src/pipeline/ingest.js +2 -2
- package/src/pipeline/steps/fetch.js +2 -0
- package/src/pipeline/steps/lift.js +2 -0
- package/src/utils.js +7 -0
- package/webapp/src/instanceData.js +2 -2
- package/webapp/src/loadSources.js +2 -2
package/README.md
CHANGED
|
@@ -66,6 +66,10 @@ Each source's `fetch.js` is invoked as `node fetch.js <outDir> <fetchUrl-or-stat
|
|
|
66
66
|
<runParamsJson>` — the JSON holds all `:hasRunParam` values grouped by name;
|
|
67
67
|
each fetcher picks the parameters it needs.
|
|
68
68
|
|
|
69
|
+
A source declared with `:enabled false` stays in the config but is skipped by
|
|
70
|
+
the engines and hidden from the webapp's Sources page — e.g. while its files
|
|
71
|
+
aren't available yet.
|
|
72
|
+
|
|
69
73
|
Engines journal their executed steps as p-plan RDF (`data/ingest/ingest-log.ttl`,
|
|
70
74
|
`data/pipeline/federate-log.ttl`) — evidence of what ran, not a plan.
|
|
71
75
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@directory-builder/core",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Use-case-agnostic engine for config-driven federation pipelines",
|
|
5
5
|
"author": "Civic Data Lab",
|
|
6
6
|
"repository": "github:foederierter-datenpool/directory-builder-core",
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
PREFIX xyz: <http://sparql.xyz/facade-x/data/>
|
|
2
|
+
PREFIX fx: <http://sparql.xyz/facade-x/ns/>
|
|
3
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
4
|
+
CONSTRUCT {
|
|
5
|
+
?s ?p ?o
|
|
6
|
+
} WHERE {
|
|
7
|
+
SERVICE <x-sparql-anything:> {
|
|
8
|
+
fx:properties fx:location ?_location ;
|
|
9
|
+
fx:media-type "application/xml" .
|
|
10
|
+
?s ?p ?o .
|
|
11
|
+
}
|
|
12
|
+
}
|
package/src/pipeline/federate.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { newStore, parser as n3Parser, storeFromTurtles } from "@foerderfunke/sem-ops-utils"
|
|
2
|
-
import { CDP,
|
|
2
|
+
import { CDP, enabledSources, parseTtl, PATHS, sourceGraph, sourceName, stepIri, stepJournal } from "../utils.js"
|
|
3
3
|
import { COMMON_PREFIXES, writeTurtleFile } from "./write-turtle.js"
|
|
4
4
|
import { MAPPED_GRAPH, runMap } from "./steps/map.js"
|
|
5
5
|
import { runClean } from "./steps/clean.js"
|
|
@@ -26,7 +26,7 @@ export async function federate(root = process.cwd()) {
|
|
|
26
26
|
const abs = (p) => path.join(root, p)
|
|
27
27
|
const federationTtl = fs.readFileSync(abs(PATHS.federation), "utf8")
|
|
28
28
|
const defStore = storeFromTurtles([federationTtl, fs.readFileSync(abs(PATHS.matchKnowledge), "utf8")])
|
|
29
|
-
const sources =
|
|
29
|
+
const sources = enabledSources(parseTtl(federationTtl))
|
|
30
30
|
|
|
31
31
|
const store = newStore()
|
|
32
32
|
const journal = stepJournal()
|
package/src/pipeline/ingest.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { sparqlSelect, storeFromTurtles } from "@foerderfunke/sem-ops-utils"
|
|
2
|
-
import { CDP,
|
|
2
|
+
import { CDP, enabledSources, parseTtl, PATHS, sourceName, stepJournal } from "../utils.js"
|
|
3
3
|
import { ensureJar, runLift } from "./steps/lift.js"
|
|
4
4
|
import { runFetch } from "./steps/fetch.js"
|
|
5
5
|
import path from "path"
|
|
@@ -30,7 +30,7 @@ export async function ingest(root = process.cwd()) {
|
|
|
30
30
|
if (!facts.has(r.source)) facts.set(r.source, { fetchUrl: r.fetchUrl, format: r.format, params: [] })
|
|
31
31
|
if (r.paramName) facts.get(r.source).params.push([r.paramName, r.paramValue])
|
|
32
32
|
}
|
|
33
|
-
const sources = new Map(
|
|
33
|
+
const sources = new Map(enabledSources(parseTtl(federationTtl)).map((iri) => [iri, facts.get(iri)]))
|
|
34
34
|
for (const [iri, s] of sources) {
|
|
35
35
|
if (!s.format) throw new Error(`${iri} declares no :format (needed to pick the lift query)`)
|
|
36
36
|
}
|
|
@@ -11,6 +11,8 @@ export const runFetch = ({ abs, root }, { name, fetchUrl, paramsJson }) => {
|
|
|
11
11
|
const outDir = PATHS.raw(name)
|
|
12
12
|
const origin = fetchUrl ?? abs(PATHS.staticDir(name))
|
|
13
13
|
console.log(`fetch ${fetchUrl ?? PATHS.staticDir(name)} (params ${paramsJson}) → ${outDir}`)
|
|
14
|
+
// Clear any prior output first, so changed run params (or changed records) can't leave stale files behind
|
|
15
|
+
fs.rmSync(abs(outDir), { recursive: true, force: true })
|
|
14
16
|
fs.mkdirSync(abs(outDir), { recursive: true })
|
|
15
17
|
run("node", [abs(PATHS.fetchScript(name)), abs(outDir), origin, paramsJson])
|
|
16
18
|
const harvest = { time: new Date().toISOString() }
|
|
@@ -49,6 +49,8 @@ export const runLift = ({ abs }, { jar, name, format, params }) => {
|
|
|
49
49
|
const inAbs = abs(PATHS.raw(name))
|
|
50
50
|
const outAbs = abs(PATHS.lifted(name))
|
|
51
51
|
const files = fs.readdirSync(inAbs).filter(f => !f.startsWith(".")).sort()
|
|
52
|
+
// Clear stale lifted files first — the clean step reads every .ttl here.
|
|
53
|
+
fs.rmSync(outAbs, { recursive: true, force: true })
|
|
52
54
|
fs.mkdirSync(outAbs, { recursive: true })
|
|
53
55
|
console.log(`lift ${PATHS.raw(name)} (${files.length} files) → ${PATHS.lifted(name)}`)
|
|
54
56
|
for (const f of files) {
|
package/src/utils.js
CHANGED
|
@@ -108,6 +108,13 @@ export const shrink = (iri, prefixMap) => {
|
|
|
108
108
|
export const objectsOf = (quads, predIri) =>
|
|
109
109
|
[...new Set(quads.filter((q) => q.predicate.value === predIri).map((q) => q.object.value))]
|
|
110
110
|
|
|
111
|
+
// The federation's sources minus any switched off with `:enabled false`, in
|
|
112
|
+
// :hasSource declaration order — the source list engines and webapp run on.
|
|
113
|
+
export const enabledSources = (quads) => {
|
|
114
|
+
const disabled = new Set(quads.filter((q) => q.predicate.value === `${CDP}enabled` && q.object.value === "false").map((q) => q.subject.value))
|
|
115
|
+
return objectsOf(quads, `${CDP}hasSource`).filter((iri) => !disabled.has(iri))
|
|
116
|
+
}
|
|
117
|
+
|
|
111
118
|
// Set of subjects typed `rdf:type typeIri`. Iteration order = encounter order.
|
|
112
119
|
export function subjectsOfType(quads, typeIri) {
|
|
113
120
|
const out = new Set()
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
// artifact resolves to "" (pages render empty). Top-level await — importing
|
|
7
7
|
// modules stay synchronous.
|
|
8
8
|
|
|
9
|
-
import { CDP, objectsOf, parseTtl, PATHS, prefixesOf, sourceName } from "@directory-builder/core/utils"
|
|
9
|
+
import { CDP, enabledSources, objectsOf, parseTtl, PATHS, prefixesOf, sourceName } from "@directory-builder/core/utils"
|
|
10
10
|
|
|
11
11
|
const fetchText = async (path) => {
|
|
12
12
|
const res = await fetch(`${import.meta.env.BASE_URL}${path}`).catch(() => null)
|
|
@@ -16,7 +16,7 @@ const fetchText = async (path) => {
|
|
|
16
16
|
export const federationTtl = await fetchText(PATHS.federation)
|
|
17
17
|
|
|
18
18
|
const fedQuads = parseTtl(federationTtl)
|
|
19
|
-
const cleanedPaths =
|
|
19
|
+
const cleanedPaths = enabledSources(fedQuads).map((iri) => PATHS.cleaned(sourceName(iri)))
|
|
20
20
|
// The instance's repo URL (:federation :repository …) — undefined when not
|
|
21
21
|
// declared; pages hide their GitHub links then.
|
|
22
22
|
export const repositoryUrl = objectsOf(fedQuads, `${CDP}repository`)[0]
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// Reads: federation, mapped, ingest-log TTL strings passed by Sources.jsx
|
|
4
4
|
// Does: returns source[] ({iri, label, format, totalFields, mappedFields, records, …})
|
|
5
5
|
|
|
6
|
-
import { CDP as NS, formatFamily, parseTtl, PATHS, sourceName
|
|
6
|
+
import { CDP as NS, enabledSources, formatFamily, parseTtl, PATHS, sourceName } from "@directory-builder/core/utils"
|
|
7
7
|
|
|
8
8
|
const PROV_AT_TIME = "http://www.w3.org/ns/prov#atTime"
|
|
9
9
|
const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
@@ -18,7 +18,7 @@ export function loadSources(federationTtl, mappedTtl, ingestLogTtl) {
|
|
|
18
18
|
const mappedQuads = mappedTtl ? parseTtl(mappedTtl) : []
|
|
19
19
|
const logQuads = ingestLogTtl ? parseTtl(ingestLogTtl) : []
|
|
20
20
|
|
|
21
|
-
const sourceIris =
|
|
21
|
+
const sourceIris = new Set(enabledSources(fedQuads))
|
|
22
22
|
|
|
23
23
|
const props = new Map()
|
|
24
24
|
const get = (iri) => {
|