@directory-builder/core 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -93,8 +93,10 @@ INSTANCE=../sosuse-directory-builder npm run webapp # any other instance dir
93
93
 
94
94
  Instances own the About page by providing `webapp/content/about.md` (markdown,
95
95
  served and deployed like config and data); without one, a generic default
96
- renders. Declaring `:federation :repository "https://github.com/…"` adds the
97
- GitHub links (nav, static-source folders); without it they stay hidden.
96
+ renders and the Query page's starting query the same way, via
97
+ `webapp/content/query.sparql`. On the `:federation` node, `rdfs:label` sets
98
+ the page title and `:repository "https://github.com/…"` adds the GitHub links
99
+ (nav, static-source folders); both stay generic/hidden when absent.
98
100
 
99
101
  Instances can inject **exporters** — output adapters mapping the directory
100
102
  into an external schema. The federation declares them (`:federation
@@ -110,3 +112,12 @@ exported separately so bundlers never see the engines' Node imports:
110
112
  ```js
111
113
  import { CDP, parseTtl, PATHS } from "@directory-builder/core/utils"
112
114
  ```
115
+
116
+ ## Roadmap
117
+
118
+ - Testing
119
+ - Periodic harvesting
120
+ - `@directory-builder/create`: an npm initializer scaffolding a new use
121
+ case, plus a `validate` command checking an instance setup
122
+ - `@directory-builder/ui`: extract the webapp into its own package
123
+ - ...
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@directory-builder/core",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "Use-case-agnostic engine for config-driven federation pipelines",
5
5
  "author": "Civic Data Lab",
6
6
  "repository": "github:foederierter-datenpool/directory-builder-core",
package/src/index.js CHANGED
@@ -2,5 +2,5 @@
2
2
  // "./utils" subpath export — import those from "@directory-builder/core/utils"
3
3
  // so bundlers never see the engines' fs/child_process imports.
4
4
  export { Pipeline } from "./pipeline.js"
5
- export { ingest } from "./ingest.js"
6
- export { federate } from "./federate.js"
5
+ export { ingest } from "./pipeline/ingest.js"
6
+ export { federate } from "./pipeline/federate.js"
@@ -0,0 +1,69 @@
1
+ import { newStore, parser as n3Parser, storeFromTurtles } from "@foerderfunke/sem-ops-utils"
2
+ import { CDP, objectsOf, parseTtl, PATHS, sourceGraph, sourceName, stepIri, stepJournal } from "../utils.js"
3
+ import { COMMON_PREFIXES, writeTurtleFile } from "./write-turtle.js"
4
+ import { MAPPED_GRAPH, runMap } from "./steps/map.js"
5
+ import { runClean } from "./steps/clean.js"
6
+ import { runMatch } from "./steps/match.js"
7
+ import { runMerge } from "./steps/merge.js"
8
+ import { runResolve } from "./steps/resolve.js"
9
+ import { DataFactory } from "n3"
10
+ import path from "path"
11
+ import fs from "fs"
12
+
13
+ const df = DataFactory
14
+
15
+ // ---- Federate engine -----------------------------------------------------
16
+ // Clean per source, load, then map → match → merge → resolve (one module per
17
+ // step under steps/, sharing the ctx of store + config + path resolver). The
18
+ // step sequence is the engine's own shape; config declares only the sources,
19
+ // processed in :hasSource declaration order. Paths follow from the source
20
+ // name (PATHS), resolved against the instance `root`. Each step runs through
21
+ // the journal, which records what executed and is rendered by the webapp's
22
+ // Pipeline page. The clean steps' predecessors are the other engine's lift
23
+ // steps, referenced by their conventional stepIri.
24
+
25
+ export async function federate(root = process.cwd()) {
26
+ const abs = (p) => path.join(root, p)
27
+ const federationTtl = fs.readFileSync(abs(PATHS.federation), "utf8")
28
+ const defStore = storeFromTurtles([federationTtl, fs.readFileSync(abs(PATHS.matchKnowledge), "utf8")])
29
+ const sources = objectsOf(parseTtl(federationTtl), `${CDP}hasSource`)
30
+
31
+ const store = newStore()
32
+ const journal = stepJournal()
33
+ const ctx = { store, defStore, abs }
34
+
35
+ const cleanSteps = []
36
+ for (const src of sources) {
37
+ const name = sourceName(src)
38
+ cleanSteps.push(await journal.step("clean", { source: src, after: [stepIri("lift", name)] },
39
+ () => runClean(ctx, name)))
40
+ }
41
+
42
+ // Load each source's cleaned TTL into its own graph — plain mechanics, not a
43
+ // pipeline step.
44
+ for (const src of sources) {
45
+ const name = sourceName(src)
46
+ console.log(`load ${PATHS.cleaned(name)} → <${sourceGraph(name)}>`)
47
+ const graph = df.namedNode(sourceGraph(name))
48
+ for (const quad of n3Parser.parse(fs.readFileSync(abs(PATHS.cleaned(name)), "utf8"))) {
49
+ store.addQuad(df.quad(quad.subject, quad.predicate, quad.object, graph))
50
+ }
51
+ }
52
+
53
+ const mapStep = await journal.step("map", { after: cleanSteps }, async () => {
54
+ await runMap(ctx, PATHS.mappingQueries)
55
+ const mappedQuads = store.getQuads(null, null, null, MAPPED_GRAPH)
56
+ await writeTurtleFile(abs(PATHS.mapped), mappedQuads, { ...COMMON_PREFIXES, cdp: CDP })
57
+ console.log(`map: wrote ${mappedQuads.length} triples → ${PATHS.mapped}`)
58
+ })
59
+ const matchStep = await journal.step("match", { after: [mapStep] }, () => runMatch(ctx, PATHS.matches))
60
+ const mergeStep = await journal.step("merge", { after: [matchStep] }, () => runMerge(ctx, PATHS.merged, PATHS.provenance))
61
+ await journal.step("resolve", { after: [mergeStep] }, () => runResolve(ctx, PATHS.final))
62
+
63
+ fs.writeFileSync(abs(PATHS.federateLog), `@prefix : <${CDP}> .
64
+ @prefix p-plan: <http://purl.org/net/p-plan#> .
65
+
66
+ ${journal.toTurtle()}
67
+ `)
68
+ console.log(`log: wrote steps → ${PATHS.federateLog}`)
69
+ }
@@ -0,0 +1,97 @@
1
+ import { sparqlSelect, storeFromTurtles } from "@foerderfunke/sem-ops-utils"
2
+ import { CDP, objectsOf, parseTtl, PATHS, sourceName, stepJournal } from "../utils.js"
3
+ import { ensureJar, runLift } from "./steps/lift.js"
4
+ import { runFetch } from "./steps/fetch.js"
5
+ import path from "path"
6
+ import fs from "fs"
7
+
8
+ // Ingest engine: fetch + lift per source declared in the instance's
9
+ // federation.ttl. `root` is the instance directory all PATHS resolve against.
10
+ export async function ingest(root = process.cwd()) {
11
+ const abs = (p) => path.join(root, p)
12
+ const federationTtl = fs.readFileSync(abs(PATHS.federation), "utf8")
13
+ const defStore = storeFromTurtles([federationTtl])
14
+
15
+ // ---- Read the sources ------------------------------------------------
16
+ // The step graph (fetch → lift per source) is the engine's own shape;
17
+ // config declares only the sources and their facts. Lift params are SPARQL
18
+ // Anything variables declared per source. Sources run in :hasSource
19
+ // declaration order.
20
+
21
+ const facts = new Map()
22
+ for (const r of await sparqlSelect(`
23
+ PREFIX : <${CDP}>
24
+ SELECT ?source ?fetchUrl ?format ?paramName ?paramValue WHERE {
25
+ :federation :hasSource ?source .
26
+ OPTIONAL { ?source :fetchUrl ?fetchUrl }
27
+ OPTIONAL { ?source :format ?format }
28
+ OPTIONAL { ?source :hasLiftParam [ :name ?paramName ; :value ?paramValue ] }
29
+ }`, [defStore])) {
30
+ if (!facts.has(r.source)) facts.set(r.source, { fetchUrl: r.fetchUrl, format: r.format, params: [] })
31
+ if (r.paramName) facts.get(r.source).params.push([r.paramName, r.paramValue])
32
+ }
33
+ const sources = new Map(objectsOf(parseTtl(federationTtl), `${CDP}hasSource`).map((iri) => [iri, facts.get(iri)]))
34
+ for (const [iri, s] of sources) {
35
+ if (!s.format) throw new Error(`${iri} declares no :format (needed to pick the lift query)`)
36
+ }
37
+
38
+ const jar = await ensureJar(abs)
39
+
40
+ // ---- Run steps ----------------------------------------------------------
41
+
42
+ // All :hasRunParam values grouped by name, handed to every fetcher as one
43
+ // JSON argument — each fetcher picks the parameters it needs.
44
+ const runParams = {}
45
+ for (const r of await sparqlSelect(`
46
+ PREFIX : <${CDP}>
47
+ SELECT ?name ?value WHERE { :federation :hasRunParam [ :name ?name ; :value ?value ] } ORDER BY ?name ?value`, [defStore])) {
48
+ (runParams[r.name] ??= []).push(r.value)
49
+ }
50
+ const paramsJson = JSON.stringify(runParams)
51
+
52
+ const runStart = new Date()
53
+ const harvests = []
54
+ const journal = stepJournal()
55
+ const fetchStepOf = new Map()
56
+ const ctx = { abs, root }
57
+
58
+ for (const [iri, s] of sources) {
59
+ const name = sourceName(iri)
60
+ fetchStepOf.set(iri, await journal.step("fetch", { source: iri }, () => {
61
+ harvests.push({ source: iri, ...runFetch(ctx, { name, fetchUrl: s.fetchUrl, paramsJson }) })
62
+ }))
63
+ }
64
+
65
+ for (const [iri, s] of sources) {
66
+ const name = sourceName(iri)
67
+ await journal.step("lift", { source: iri, after: [fetchStepOf.get(iri)] },
68
+ () => runLift(ctx, { jar, name, format: s.format, params: s.params }))
69
+ }
70
+
71
+ const dt = (s) => `"${s}"^^xsd:dateTime`
72
+ const runId = "run" + runStart.toISOString().replace(/\D/g, "").slice(0, 14)
73
+ const harvestPart = harvests.length
74
+ ? ` ;\n :harvested\n` + harvests.map((h) => {
75
+ const local = h.source.split("#").pop()
76
+ const committed = h.staticCommittedAt ? ` ; :staticCommittedAt ${dt(h.staticCommittedAt)}` : ""
77
+ return ` [ :ofSource :${local} ; prov:atTime ${dt(h.time)}${committed} ]`
78
+ }).join(" ,\n")
79
+ : ""
80
+
81
+ const block = `
82
+ ${journal.toTurtle()}
83
+
84
+ :${runId} a :IngestRun ;
85
+ prov:startedAtTime ${dt(runStart.toISOString())} ;
86
+ prov:endedAtTime ${dt(new Date().toISOString())}${harvestPart} .
87
+ `
88
+
89
+ const prefixes = `@prefix : <${CDP}> .
90
+ @prefix p-plan: <http://purl.org/net/p-plan#> .
91
+ @prefix prov: <http://www.w3.org/ns/prov#> .
92
+ @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
93
+ `
94
+ fs.mkdirSync(path.dirname(abs(PATHS.ingestLog)), { recursive: true })
95
+ fs.writeFileSync(abs(PATHS.ingestLog), prefixes + block)
96
+ console.log(`log: wrote steps + IngestRun → ${PATHS.ingestLog}`)
97
+ }
@@ -0,0 +1,8 @@
1
+ import { spawnSync } from "child_process"
2
+
3
+ // Run an external command (a fetcher's node process, SPARQL Anything's java),
4
+ // inheriting stdio; non-zero exit aborts the step.
5
+ export const run = (cmd, args) => {
6
+ const r = spawnSync(cmd, args, { stdio: "inherit" })
7
+ if (r.status !== 0) throw new Error(`Exit ${r.status}: ${cmd} ${args.join(" ")}`)
8
+ }
@@ -0,0 +1,27 @@
1
+ import { sparqlConstruct, storeFromTurtles } from "@foerderfunke/sem-ops-utils"
2
+ import { writeTurtleFile } from "../write-turtle.js"
3
+ import { CDP, PATHS } from "../../utils.js"
4
+ import path from "path"
5
+ import fs from "fs"
6
+
7
+ // Clean step: the source's clean.sparql reshapes its lifted RDF into
8
+ // federation subjects (xyz:/cdp: vocabulary only — schema: enters at map).
9
+ export const runClean = async ({ abs }, name) => {
10
+ const cleanQuery = fs.readFileSync(abs(PATHS.cleanQuery(name)), "utf8")
11
+ const inDir = PATHS.lifted(name)
12
+ const outPath = PATHS.cleaned(name)
13
+ // Run CONSTRUCT per file so each lifted TTL stays isolated in its
14
+ // own store — the clean SPARQL can't cross-join across documents.
15
+ const inAbs = abs(inDir)
16
+ const files = fs.readdirSync(inAbs).filter(f => f.endsWith(".ttl")).sort()
17
+ console.log(`clean ${inDir} (${files.length} files) → ${outPath}`)
18
+ const allQuads = []
19
+ for (const f of files) {
20
+ const fileStore = storeFromTurtles([fs.readFileSync(path.join(inAbs, f), "utf8")])
21
+ allQuads.push(...await sparqlConstruct(cleanQuery, [fileStore]))
22
+ }
23
+ await writeTurtleFile(abs(outPath), allQuads, {
24
+ xyz: "http://sparql.xyz/facade-x/data/",
25
+ cdp: CDP,
26
+ })
27
+ }
@@ -0,0 +1,24 @@
1
+ import { PATHS } from "../../utils.js"
2
+ import { execSync } from "child_process"
3
+ import { run } from "../run.js"
4
+ import fs from "fs"
5
+
6
+ // Fetch step: run the source's fetch.js. Live sources pass their :fetchUrl;
7
+ // static-file sources pass the absolute static dir instead — the script gets
8
+ // whichever applies, plus the federation's run params as one JSON argument.
9
+ // Returns the harvest record for the ingest log.
10
+ export const runFetch = ({ abs, root }, { name, fetchUrl, paramsJson }) => {
11
+ const outDir = PATHS.raw(name)
12
+ const origin = fetchUrl ?? abs(PATHS.staticDir(name))
13
+ console.log(`fetch ${fetchUrl ?? PATHS.staticDir(name)} (params ${paramsJson}) → ${outDir}`)
14
+ fs.mkdirSync(abs(outDir), { recursive: true })
15
+ run("node", [abs(PATHS.fetchScript(name)), abs(outDir), origin, paramsJson])
16
+ const harvest = { time: new Date().toISOString() }
17
+ // Static sources have no live harvest — record the files' git commit
18
+ // time instead (the freshness the Sources page shows for them).
19
+ if (!fetchUrl) try {
20
+ const iso = execSync(`git log -1 --format=%cI -- "${PATHS.staticDir(name)}"`, { cwd: root, encoding: "utf8" }).trim()
21
+ if (iso) harvest.staticCommittedAt = iso
22
+ } catch { /* not committed yet / no git → omit */ }
23
+ return harvest
24
+ }
@@ -0,0 +1,58 @@
1
+ import { localName, PATHS } from "../../utils.js"
2
+ import { run } from "../run.js"
3
+ import path from "path"
4
+ import fs from "fs"
5
+
6
+ const SPARQL_ANYTHING_VERSION = "v1.1.0"
7
+
8
+ // The generic lift queries ship with the engine — they resolve against this
9
+ // package, not the instance root like everything else in PATHS.
10
+ const liftQueryFor = (formatIri) =>
11
+ path.join(import.meta.dirname, "../../lift", `${localName(formatIri).toLowerCase()}.sparql`)
12
+
13
+ // SPARQL Anything is the lift tool — cached per instance (tools/, gitignored),
14
+ // downloaded on first run and re-downloaded on version bumps.
15
+ export async function ensureJar(abs) {
16
+ const JAR = abs("tools/sparql-anything.jar")
17
+ const VERSION_FILE = abs("tools/sparql-anything.version")
18
+ const haveCurrentJar = fs.existsSync(JAR) && fs.existsSync(VERSION_FILE)
19
+ && fs.readFileSync(VERSION_FILE, "utf8").trim() === SPARQL_ANYTHING_VERSION
20
+
21
+ if (!haveCurrentJar) {
22
+ const url = `https://github.com/SPARQL-Anything/sparql.anything/releases/download/${SPARQL_ANYTHING_VERSION}/sparql-anything-${SPARQL_ANYTHING_VERSION}.jar`
23
+ console.log(`Downloading sparql-anything ${SPARQL_ANYTHING_VERSION}...`)
24
+ fs.mkdirSync(path.dirname(JAR), { recursive: true })
25
+ const response = await fetch(url)
26
+ if (!response.ok) throw new Error(`Failed to fetch ${url}: ${response.status}`)
27
+ fs.writeFileSync(JAR, Buffer.from(await response.arrayBuffer()))
28
+ fs.writeFileSync(VERSION_FILE, SPARQL_ANYTHING_VERSION)
29
+ console.log(`Saved to ${JAR}`)
30
+ }
31
+ return JAR
32
+ }
33
+
34
+ // Lift step: SPARQL Anything turns each raw file into TTL, via the bundled
35
+ // query for the source's :format, with the source's :hasLiftParam variables.
36
+ export const runLift = ({ abs }, { jar, name, format, params }) => {
37
+ // TODO: directory mode spawns one JVM per file (~1s startup each).
38
+ // Fine at small N; revisit if a source crosses ~50 items. SPARQL Anything
39
+ // accepts VALUES ?_location { … } in the lift query, which would let one
40
+ // invocation handle the whole batch.
41
+ const liftQuery = liftQueryFor(format)
42
+ const liftOne = (location, outPath) => {
43
+ const args = ["-jar", jar, "-q", liftQuery,
44
+ "-v", `location=${location}`,
45
+ "-f", "TTL", "-o", outPath]
46
+ for (const [pName, value] of params) args.push("-v", `${pName}=${value}`)
47
+ run("java", args)
48
+ }
49
+ const inAbs = abs(PATHS.raw(name))
50
+ const outAbs = abs(PATHS.lifted(name))
51
+ const files = fs.readdirSync(inAbs).filter(f => !f.startsWith(".")).sort()
52
+ fs.mkdirSync(outAbs, { recursive: true })
53
+ console.log(`lift ${PATHS.raw(name)} (${files.length} files) → ${PATHS.lifted(name)}`)
54
+ for (const f of files) {
55
+ const stem = path.basename(f, path.extname(f))
56
+ liftOne(path.join(inAbs, f), path.join(outAbs, `${stem}.ttl`))
57
+ }
58
+ }
@@ -0,0 +1,172 @@
1
+ import { sparqlInsertDelete, sparqlSelect } from "@foerderfunke/sem-ops-utils"
2
+ import { buildPrefixBlock, CDP, PATHS, shrink, sourceName } from "../../utils.js"
3
+ import { DataFactory } from "n3"
4
+ import path from "path"
5
+ import fs from "fs"
6
+
7
+ const df = DataFactory
8
+
9
+ export const MAPPED_GRAPH = df.namedNode("urn:mapped")
10
+
11
+ // ---- Direct-mapping generator ------------------------------------------
12
+
13
+ const XYZ = "http://sparql.xyz/facade-x/data/"
14
+
15
+ const buildDirectInsert = ({ sourceGraph, source, targetClass, target }, fields) => {
16
+ const prefixes = {
17
+ xyz: XYZ,
18
+ cdp: CDP,
19
+ cdf: "https://civic-data.de/federated-directory#",
20
+ schema: "http://schema.org/",
21
+ foaf: "http://xmlns.com/foaf/0.1/",
22
+ dct: "http://purl.org/dc/terms/",
23
+ }
24
+ // shrink() returns the IRI verbatim if no prefix matches; wrap that as <…>.
25
+ const short = (iri) => {
26
+ const s = shrink(iri, prefixes)
27
+ return s === iri ? `<${iri}>` : s
28
+ }
29
+
30
+ const v = (path) => `?${path}`
31
+ // STR() before the emptiness check so the guard works for any literal
32
+ // datatype — a bare `?v != ""` errors on e.g. xsd:int and would silently
33
+ // drop the field (AWO's numeric ids hit exactly this).
34
+ const optLit = (subj, path) =>
35
+ `OPTIONAL { ${subj} xyz:${path} ${v(path)} . ` +
36
+ `FILTER(isLiteral(${v(path)}) && STR(${v(path)}) != "") }`
37
+
38
+ const insertBlock = fields
39
+ .map(f => ` ?entity ${short(f.predicate)} ${v(f.fieldPath)} .`)
40
+ .join("\n")
41
+
42
+ const topLevel = fields.filter(f => !f.parentPath)
43
+ const subFields = fields.filter(f => f.parentPath)
44
+
45
+ // Source subjects = federation IRIs after the clean step, identified via
46
+ // cdp:fromSource — no minting from a key field. Where clean reshapes one
47
+ // source into several entity kinds it tags each subject with cdp:targetSchema;
48
+ // select only those for this mapping's schema. Subjects with no marker
49
+ // (single-entity sources like caritas/dhs) match unconditionally.
50
+ const bgp = [`?entity cdp:fromSource ${short(source)} .`]
51
+ if (target) {
52
+ bgp.push(`OPTIONAL { ?entity cdp:targetSchema ?_ts }`)
53
+ bgp.push(`FILTER(!bound(?_ts) || ?_ts = ${short(target)})`)
54
+ }
55
+ for (const f of topLevel) bgp.push(optLit("?entity", f.fieldPath))
56
+
57
+ const byParent = new Map()
58
+ for (const f of subFields) {
59
+ if (!byParent.has(f.parentPath)) byParent.set(f.parentPath, [])
60
+ byParent.get(f.parentPath).push(f)
61
+ }
62
+ let parentIdx = 0
63
+ for (const [parent, subs] of byParent) {
64
+ const pv = `?_p${parentIdx++}`
65
+ const inner = subs.map(s => ` ${optLit(pv, s.fieldPath)}`).join("\n")
66
+ bgp.push(`OPTIONAL {\n ?entity xyz:${parent} ${pv} .\n${inner}\n }`)
67
+ }
68
+
69
+ // The target schema's :targetClass becomes the record's rdf:type here in the
70
+ // mapped graph — this is where schema: vocabulary first enters; the clean step
71
+ // stays in xyz:/cdp: only.
72
+ const typeClause = targetClass ? `a ${short(targetClass)} ; ` : ""
73
+
74
+ return `${buildPrefixBlock(prefixes)}
75
+
76
+ INSERT {
77
+ GRAPH <urn:mapped> {
78
+ ?entity ${typeClause}cdp:fromSource ${short(source)} .
79
+ ${insertBlock}
80
+ }
81
+ } WHERE {
82
+ GRAPH <${sourceGraph}> {
83
+ ${bgp.join("\n ")}
84
+ }
85
+ }`
86
+ }
87
+
88
+ export const runMap = async ({ store, defStore, abs }, queriesDir) => {
89
+ const mappings = await sparqlSelect(`
90
+ PREFIX : <${CDP}>
91
+ SELECT ?mapping ?source ?sourceGraph ?target ?targetClass WHERE {
92
+ ?mapping a :Mapping ;
93
+ :fromSource ?source .
94
+ OPTIONAL { ?mapping :sourceGraph ?sourceGraph }
95
+ OPTIONAL { ?mapping :toTarget ?target }
96
+ OPTIONAL { ?mapping :toTarget/:targetClass ?targetClass }
97
+ } ORDER BY ?mapping`, [defStore])
98
+
99
+ for (const m of mappings) {
100
+ const directRows = await sparqlSelect(`
101
+ PREFIX : <${CDP}>
102
+ SELECT ?fieldPath ?predicate ?parentPath WHERE {
103
+ <${m.mapping}> :hasFieldMapping ?fm .
104
+ ?fm :from ?src ; :to ?tgt .
105
+ FILTER NOT EXISTS { ?fm :via ?_v }
106
+ ?tgt :targetPredicate ?predicate .
107
+ ?src :fieldPath ?fieldPath .
108
+ OPTIONAL { ?parent :hasSubField ?src . ?parent :fieldPath ?parentPath }
109
+ }`, [defStore])
110
+
111
+ if (directRows.length && m.sourceGraph) {
112
+ const localName = m.mapping.split("#").pop()
113
+ const query = buildDirectInsert(m, directRows)
114
+ const queryPath = abs(path.join(queriesDir, `${localName}.sparql`))
115
+ fs.mkdirSync(path.dirname(queryPath), { recursive: true })
116
+ fs.writeFileSync(queryPath, query)
117
+ console.log(`map ${localName} direct (${directRows.length} mappings) → ${queryPath}`)
118
+ await sparqlInsertDelete(query, store)
119
+ }
120
+
121
+ // :via names a transform of the mapping's source — the script path
122
+ // follows by convention (sources/<source>/transform-<via>.sparql).
123
+ const viaRows = await sparqlSelect(`
124
+ PREFIX : <${CDP}>
125
+ SELECT DISTINCT ?via WHERE {
126
+ <${m.mapping}> :hasFieldMapping/:via ?via .
127
+ } ORDER BY ?via`, [defStore])
128
+
129
+ for (const v of viaRows) {
130
+ const script = PATHS.transform(sourceName(m.source), v.via)
131
+ console.log(`map ${script}`)
132
+ await sparqlInsertDelete(fs.readFileSync(abs(script), "utf8"), store)
133
+ }
134
+ }
135
+
136
+ // A mapping's :hasRelationship turns the clean step's source-level link
137
+ // (e.g. :providedBy) into a target predicate (schema:provider), matching the
138
+ // two ends by their cdp:targetSchema. Both ends are still source IRIs here;
139
+ // the merge step rewrites them to the minted cluster IRIs.
140
+ const linkRows = await sparqlSelect(`
141
+ PREFIX : <${CDP}>
142
+ SELECT ?mapping ?sourceGraph ?fromSchema ?sourcePredicate ?targetPredicate ?toSchema WHERE {
143
+ ?mapping a :Mapping ;
144
+ :sourceGraph ?sourceGraph ;
145
+ :toTarget ?fromSchema ;
146
+ :hasRelationship ?rel .
147
+ ?rel :sourcePredicate ?sourcePredicate ;
148
+ :toTargetField ?field ;
149
+ :toTargetSchema ?toSchema .
150
+ ?field :targetPredicate ?targetPredicate .
151
+ } ORDER BY ?mapping`, [defStore])
152
+
153
+ for (const rel of linkRows) {
154
+ const prefixes = { cdp: CDP, schema: "http://schema.org/" }
155
+ const short = (iri) => { const s = shrink(iri, prefixes); return s === iri ? `<${iri}>` : s }
156
+ const query = `${buildPrefixBlock(prefixes)}
157
+
158
+ INSERT {
159
+ GRAPH <urn:mapped> {
160
+ ?from ${short(rel.targetPredicate)} ?to .
161
+ }
162
+ } WHERE {
163
+ GRAPH <${rel.sourceGraph}> {
164
+ ?from ${short(rel.sourcePredicate)} ?to ;
165
+ cdp:targetSchema ${short(rel.fromSchema)} .
166
+ ?to cdp:targetSchema ${short(rel.toSchema)} .
167
+ }
168
+ }`
169
+ console.log(`map ${rel.mapping.split("#").pop()} link (${short(rel.targetPredicate)})`)
170
+ await sparqlInsertDelete(query, store)
171
+ }
172
+ }