@directory-builder/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +112 -0
  3. package/bin/cli.js +38 -0
  4. package/example/README.md +64 -0
  5. package/example/config/federation.ttl +136 -0
  6. package/example/config/match-knowledge.ttl +8 -0
  7. package/example/sources/cityopen/clean.sparql +17 -0
  8. package/example/sources/cityopen/fetch.js +14 -0
  9. package/example/sources/cityopen/static/libraries.json +32 -0
  10. package/example/sources/civichub/clean.sparql +34 -0
  11. package/example/sources/civichub/fetch.js +14 -0
  12. package/example/sources/civichub/static/libraries.json +38 -0
  13. package/package.json +38 -0
  14. package/src/federate.js +571 -0
  15. package/src/index.js +6 -0
  16. package/src/ingest.js +158 -0
  17. package/src/lift/html.sparql +12 -0
  18. package/src/lift/json.sparql +12 -0
  19. package/src/pipeline.js +16 -0
  20. package/src/utils.js +152 -0
  21. package/src/webapp.js +41 -0
  22. package/webapp/index.html +11 -0
  23. package/webapp/src/About.jsx +24 -0
  24. package/webapp/src/App.jsx +96 -0
  25. package/webapp/src/Card.jsx +32 -0
  26. package/webapp/src/ColumnGraph.jsx +290 -0
  27. package/webapp/src/Directory.jsx +15 -0
  28. package/webapp/src/Download.jsx +174 -0
  29. package/webapp/src/MapGraph.jsx +244 -0
  30. package/webapp/src/MatchGraph.jsx +137 -0
  31. package/webapp/src/MergeTables.jsx +61 -0
  32. package/webapp/src/OrgCard.jsx +126 -0
  33. package/webapp/src/Pipeline.jsx +41 -0
  34. package/webapp/src/Query.jsx +165 -0
  35. package/webapp/src/Sources.jsx +52 -0
  36. package/webapp/src/instanceData.js +35 -0
  37. package/webapp/src/loadMap.js +276 -0
  38. package/webapp/src/loadMatch.js +228 -0
  39. package/webapp/src/loadMerge.js +93 -0
  40. package/webapp/src/loadPipeline.js +130 -0
  41. package/webapp/src/loadSources.js +102 -0
  42. package/webapp/src/main.jsx +9 -0
  43. package/webapp/src/mergeOrgs.js +15 -0
  44. package/webapp/src/sourceMeta.js +81 -0
  45. package/webapp/src/styles.css +23 -0
  46. package/webapp/vite.config.js +14 -0
  47. package/webapp/vite.js +28 -0
@@ -0,0 +1,165 @@
1
+ // Query view: a Yasgui SPARQL editor wired to an in-browser n3 store (no server) —
2
+ // a fetch interceptor routes the fake endpoint through Comunica.
3
+ // Reads: data/pipeline/final.ttl (loaded into the store)
4
+ // Does: renders the Query page; answers SPARQL against final.ttl in-browser
5
+
6
+ import { storeFromTurtles } from "@foerderfunke/sem-ops-utils/core"
7
+ import { queryEngine } from "@foerderfunke/sem-ops-utils/sparql"
8
+ import { finalTtl } from "./instanceData.js"
9
+ import React, { useEffect, useRef } from "react"
10
+ import "@zazuko/yasgui/build/yasgui.min.css"
11
+ import Yasgui from "@zazuko/yasgui"
12
+ import { Writer } from "n3"
13
+
14
+ // Yasgui talks to a SPARQL endpoint over HTTP. We have no endpoint — queries
15
+ // run in-browser against an n3 Store. So we point Yasgui at this fake URL and
16
+ // install a fetch interceptor that routes those requests through Comunica.
17
+ const ENDPOINT = "http://local/sparql"
18
+
19
+ const store = storeFromTurtles([finalTtl])
20
+
21
+ const INITIAL_QUERY = `PREFIX schema: <http://schema.org/>
22
+ PREFIX cdf: <https://civic-data.de/federated-directory#>
23
+
24
+ SELECT ?org (SAMPLE(?name) AS ?title) WHERE {
25
+ ?org schema:name ?name .
26
+ }
27
+ GROUP BY ?org
28
+ ORDER BY ?title`
29
+
30
+ Yasgui.Yasqe.defaults.value = INITIAL_QUERY
31
+
32
+ const XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"
33
+ const termToJson = (term) => {
34
+ if (term.termType === "Literal") {
35
+ const v = { type: "literal", value: term.value }
36
+ if (term.language) v["xml:lang"] = term.language
37
+ else if (term.datatype && term.datatype.value !== XSD_STRING) v.datatype = term.datatype.value
38
+ return v
39
+ }
40
+ if (term.termType === "BlankNode") return { type: "bnode", value: term.value }
41
+ return { type: "uri", value: term.value }
42
+ }
43
+
44
+ const collectBindings = (stream) => new Promise((resolve, reject) => {
45
+ const vars = new Set()
46
+ const bindings = []
47
+ stream.on("data", (b) => {
48
+ const row = {}
49
+ for (const [k, v] of b) { vars.add(k.value); row[k.value] = termToJson(v) }
50
+ bindings.push(row)
51
+ })
52
+ stream.on("end", () => resolve({ vars: [...vars], bindings }))
53
+ stream.on("error", reject)
54
+ })
55
+
56
+ const collectQuadsAsTurtle = (stream) => new Promise((resolve, reject) => {
57
+ const writer = new Writer({ format: "text/turtle" })
58
+ stream.on("data", (q) => writer.addQuad(q))
59
+ stream.on("end", () => writer.end((err, ttl) => err ? reject(err) : resolve(ttl)))
60
+ stream.on("error", reject)
61
+ })
62
+
63
+ // Yasqe calls `fetch(new Request(url, opts))` rather than `fetch(url, opts)`,
64
+ // so we normalise both forms into one shape.
65
+ const requestParts = async (input, init) => {
66
+ if (input instanceof Request) {
67
+ return { url: input.url, method: input.method, headers: input.headers, body: input.method !== "GET" ? await input.text() : "" }
68
+ }
69
+ const headers = new Headers(init?.headers || {})
70
+ const body = init?.body != null ? (typeof init.body === "string" ? init.body : String(init.body)) : ""
71
+ return { url: typeof input === "string" ? input : input?.url, method: init?.method || "GET", headers, body }
72
+ }
73
+
74
+ const extractQuery = ({ url, method, headers, body }) => {
75
+ const accept = headers.get("Accept")
76
+ if (method !== "GET" && body) {
77
+ const ct = headers.get("Content-Type") || ""
78
+ if (ct.includes("application/sparql-query")) return { query: body, accept }
79
+ const query = new URLSearchParams(body).get("query") || new URLSearchParams(body).get("update")
80
+ if (query) return { query, accept }
81
+ }
82
+ return { query: new URL(url).searchParams.get("query"), accept }
83
+ }
84
+
85
+ const SPARQL_JSON = "application/sparql-results+json"
86
+ const handleSparql = async (parts) => {
87
+ const { query } = extractQuery(parts)
88
+ if (!query) return new Response("missing query", { status: 400 })
89
+ try {
90
+ const result = await queryEngine.query(query, { sources: [store] })
91
+ if (result.resultType === "bindings") {
92
+ const { vars, bindings } = await collectBindings(await result.execute())
93
+ return new Response(JSON.stringify({ head: { vars }, results: { bindings } }), { status: 200, headers: { "Content-Type": SPARQL_JSON } })
94
+ }
95
+ if (result.resultType === "boolean") {
96
+ return new Response(JSON.stringify({ head: {}, boolean: await result.execute() }), { status: 200, headers: { "Content-Type": SPARQL_JSON } })
97
+ }
98
+ if (result.resultType === "quads") {
99
+ const ttl = await collectQuadsAsTurtle(await result.execute())
100
+ return new Response(ttl, { status: 200, headers: { "Content-Type": "text/turtle" } })
101
+ }
102
+ return new Response("", { status: 200 })
103
+ } catch (e) {
104
+ return new Response(String(e?.message || e), { status: 400 })
105
+ }
106
+ }
107
+
108
+ let intercepted = false
109
+ const installInterceptor = () => {
110
+ if (intercepted) return
111
+ intercepted = true
112
+ const orig = window.fetch.bind(window)
113
+ window.fetch = async (input, init) => {
114
+ const url = input instanceof Request ? input.url : (typeof input === "string" ? input : input?.url)
115
+ if (!url?.startsWith(ENDPOINT)) return orig(input, init)
116
+ return handleSparql(await requestParts(input, init))
117
+ }
118
+ }
119
+
120
+ // Yasgui's default share link clobbers the React Router hash. Emit one
121
+ // HashRouter accepts (#/query?<params>) instead, and reverse the parse on mount.
122
+ const SHARE_PREFIX = "#/query?"
123
+ const installShareOverride = () => {
124
+ Yasgui.Tab.prototype.getShareableLink = function () {
125
+ const cfg = this.getShareObject()
126
+ const params = new URLSearchParams()
127
+ for (const [k, v] of Object.entries(cfg)) {
128
+ if (v == null || v === "") continue
129
+ params.set(k, typeof v === "string" ? v : JSON.stringify(v))
130
+ }
131
+ return `${location.origin}${location.pathname}${SHARE_PREFIX}${params}`
132
+ }
133
+ }
134
+
135
+ const sharedQueryFromUrl = () => {
136
+ if (!location.hash.startsWith(SHARE_PREFIX)) return null
137
+ return new URLSearchParams(location.hash.slice(SHARE_PREFIX.length)).get("query")
138
+ }
139
+
140
+ export default function Query() {
141
+ const ref = useRef(null)
142
+ useEffect(() => {
143
+ installInterceptor()
144
+ installShareOverride()
145
+ const el = ref.current
146
+ if (!el) return
147
+ const y = new Yasgui(el, {
148
+ requestConfig: { endpoint: ENDPOINT, method: "POST" },
149
+ copyEndpointOnNewTab: false,
150
+ populateFromUrl: false,
151
+ })
152
+ const shared = sharedQueryFromUrl()
153
+ if (shared) y.getTab()?.setQuery(shared)
154
+ return () => { el.innerHTML = ""; y?.destroy?.() }
155
+ }, [])
156
+ return (
157
+ <>
158
+ <style>{`
159
+ .yasgui .controlbar { display: none; }
160
+ .yasr .dataTable td > div.rowNumber { margin-right: 8px; }
161
+ `}</style>
162
+ <div ref={ref} className="page" style={{ height: "100%", overflow: "auto" }} />
163
+ </>
164
+ )
165
+ }
@@ -0,0 +1,52 @@
1
+ // Sources overview: one card per :Source (URL, format, freshness, record/field counts).
2
+ // Reads: config/federation.ttl, data/pipeline/mapped.ttl,
3
+ // data/ingest/ingest-log.ttl (via loadSources.js)
4
+ // Does: renders the Sources page (list of <Card>)
5
+
6
+ import { federationTtl, mappedTtl, ingestLogTtl, repositoryUrl } from "./instanceData.js"
7
+ import Card, { KeyValueTable } from "./Card.jsx"
8
+ import { loadSources } from "./loadSources.js"
9
+ import React from "react"
10
+
11
+ const sources = loadSources(federationTtl, mappedTtl, ingestLogTtl)
12
+
13
+ // Static-file sources have no live URL; link to their committed folder in the
14
+ // instance's declared :repository (plain path when none is declared).
15
+ const REPO_TREE = repositoryUrl && `${repositoryUrl}/tree/main`
16
+
17
+ const formatTime = (iso) => iso
18
+ ? new Date(iso).toLocaleString("en-GB", { dateStyle: "medium", timeStyle: "short" })
19
+ : "—"
20
+
21
+ const sourceUrl = (s) => {
22
+ if (s.fetchUrl) return <a href={s.fetchUrl} target="_blank" rel="noreferrer">{s.fetchUrl}</a>
23
+ if (s.staticSource) return REPO_TREE
24
+ ? <a href={`${REPO_TREE}/${s.staticSource.replace(/\/$/, "")}`} target="_blank" rel="noreferrer">static sources</a>
25
+ : s.staticSource
26
+ return "—"
27
+ }
28
+
29
+ // Live sources report when they were last harvested; static sources have no
30
+ // harvest, so show the commit time of when their files entered the repo
31
+ // (journaled by ingest into the log's harvest entry).
32
+ const freshnessRow = (s) => s.staticSource
33
+ ? { key: "added", label: "Added to repo", value: formatTime(s.staticCommittedAt) }
34
+ : { key: "harvested", label: "Last harvested", value: formatTime(s.lastHarvestedAt) }
35
+
36
+ export default function Sources() {
37
+ return (
38
+ <div className="page" style={{ overflowY: "auto", height: "100%" }}>
39
+ {sources.map((s) => (
40
+ <Card key={s.iri} title={s.label ?? s.iri}>
41
+ <KeyValueTable rows={[
42
+ { key: "url", label: "URL", value: sourceUrl(s) },
43
+ { key: "format", label: "Format", value: s.format },
44
+ freshnessRow(s),
45
+ { key: "records", label: "Records", value: s.records },
46
+ { key: "fields", label: "Schema fields", value: `${s.mappedFields} mapped / ${s.totalFields} total` },
47
+ ]} />
48
+ </Card>
49
+ ))}
50
+ </div>
51
+ )
52
+ }
@@ -0,0 +1,35 @@
1
+ // All config + pipeline data the webapp reads, fetched at runtime relative to
2
+ // BASE_URL — in dev the vite middleware serves config/ and data/ from the
3
+ // repo root; on gh-pages the deploy publishes them next to the bundle.
4
+ // federation.ttl is the only bootstrap: the cleaned-file list derives from
5
+ // its :hasSource, everything else from the PATHS conventions. A missing
6
+ // artifact resolves to "" (pages render empty). Top-level await — importing
7
+ // modules stay synchronous.
8
+
9
+ import { CDP, objectsOf, parseTtl, PATHS, prefixesOf, sourceName } from "@directory-builder/core/utils"
10
+
11
+ const fetchText = async (path) => {
12
+ const res = await fetch(`${import.meta.env.BASE_URL}${path}`).catch(() => null)
13
+ return res?.ok ? res.text() : ""
14
+ }
15
+
16
+ export const federationTtl = await fetchText(PATHS.federation)
17
+
18
+ const fedQuads = parseTtl(federationTtl)
19
+ const cleanedPaths = objectsOf(fedQuads, `${CDP}hasSource`).map((iri) => PATHS.cleaned(sourceName(iri)))
20
+ // The instance's repo URL (:federation :repository …) — undefined when not
21
+ // declared; pages hide their GitHub links then.
22
+ export const repositoryUrl = objectsOf(fedQuads, `${CDP}repository`)[0]
23
+ // Display prefixes = the federation's own @prefix declarations; cdp pinned
24
+ // first so cdp:… wins over the empty ":" prefix bound to the same namespace.
25
+ export const displayPrefixes = { cdp: CDP, ...prefixesOf(federationTtl) }
26
+
27
+ const FIXED = [PATHS.matchKnowledge, PATHS.ingestLog, PATHS.federateLog, PATHS.mapped,
28
+ PATHS.matches, PATHS.merged, PATHS.provenance, PATHS.final, PATHS.about]
29
+ const [fixedTexts, cleanedTexts] = await Promise.all([
30
+ Promise.all(FIXED.map(fetchText)),
31
+ Promise.all(cleanedPaths.map(fetchText)),
32
+ ])
33
+
34
+ export const [matchKnowledgeTtl, ingestLogTtl, federateLogTtl, mappedTtl, matchesTtl, mergedTtl, provenanceTtl, finalTtl, aboutMd] = fixedTexts
35
+ export const cleanedByPath = Object.fromEntries(cleanedPaths.map((p, i) => [p, cleanedTexts[i]]))
@@ -0,0 +1,276 @@
1
+ // Helpers for the Map view: build the schema-mapping graph and resolve per-org
2
+ // source/target field values. Pure (ttl in → data out).
3
+ // Reads: TTL strings passed by MapGraph.jsx (federation, mapped, cleaned source TTL)
4
+ // Does: returns { nodes, edges } plus per-source / per-org value maps
5
+
6
+ import { CDP as NS, localName, parseTtl, prefixesOf, shrink, sourceName, subjectsOfType, typesOf } from "@directory-builder/core/utils"
7
+
8
+ const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
9
+ const NODE_TYPES = [`${NS}Source`, `${NS}SourceField`, `${NS}TargetField`, `${NS}TargetSchema`]
10
+ const SUB_FIELD = `${NS}SubField`
11
+
12
+ // Group orgs by source. Each org carries a cdp:fromSource triple in mapped.ttl
13
+ // pointing at its Source IRI, so this is a single-pass scan with no prefix
14
+ // matching.
15
+ export function loadOrgsBySource(_federationTtl, mappedTtl) {
16
+ const SCHEMA_NAME = "http://schema.org/name"
17
+ const SCHEMA_IDENTIFIER = "http://schema.org/identifier"
18
+ const FROM_SOURCE = `${NS}fromSource`
19
+
20
+ const orgSource = new Map() // orgIri -> sourceIri
21
+ const ids = new Map()
22
+ const names = new Map()
23
+ for (const q of parseTtl(mappedTtl)) {
24
+ const p = q.predicate.value
25
+ if (p === FROM_SOURCE) orgSource.set(q.subject.value, q.object.value)
26
+ else if (p === SCHEMA_IDENTIFIER) ids.set(q.subject.value, q.object.value)
27
+ else if (p === SCHEMA_NAME) names.set(q.subject.value, q.object.value)
28
+ }
29
+
30
+ const result = new Map()
31
+ for (const [iri, src] of orgSource) {
32
+ if (!result.has(src)) result.set(src, [])
33
+ result.get(src).push({
34
+ iri,
35
+ id: ids.get(iri) ?? localName(iri),
36
+ name: names.get(iri) ?? "",
37
+ })
38
+ }
39
+ for (const list of result.values()) list.sort((a, b) => a.id.localeCompare(b.id))
40
+ return result
41
+ }
42
+
43
+ // For each org in mapped.ttl, resolve the literal value of each of its
44
+ // source fields/sub-fields (from the source's lifted/cleaned TTL) AND each
45
+ // target field (from mapped.ttl, indirected via the field's :targetPredicate).
46
+ // Returns Map<orgIri, Map<fieldIri, string>>.
47
+ export function loadFieldValuesByOrg(federationTtl, mappedTtl, liftedBySource) {
48
+ const fedQuads = parseTtl(federationTtl)
49
+ const fieldPathOf = new Map()
50
+ const fieldsBySource = new Map()
51
+ const subFieldsOf = new Map()
52
+ const targetPredicateOf = new Map()
53
+ for (const q of fedQuads) {
54
+ const p = q.predicate.value
55
+ if (p === `${NS}fieldPath`) fieldPathOf.set(q.subject.value, q.object.value)
56
+ else if (p === `${NS}targetPredicate`) targetPredicateOf.set(q.subject.value, q.object.value)
57
+ else if (p === `${NS}hasField`) {
58
+ if (!fieldsBySource.has(q.subject.value)) fieldsBySource.set(q.subject.value, [])
59
+ fieldsBySource.get(q.subject.value).push(q.object.value)
60
+ } else if (p === `${NS}hasSubField`) {
61
+ if (!subFieldsOf.has(q.subject.value)) subFieldsOf.set(q.subject.value, [])
62
+ subFieldsOf.get(q.subject.value).push(q.object.value)
63
+ }
64
+ }
65
+
66
+ const FROM_SOURCE = `${NS}fromSource`
67
+ const orgSource = new Map() // orgIri -> sourceIri
68
+ const literalsByOrg = new Map() // orgIri -> Map<predicateIri, string>
69
+ for (const q of parseTtl(mappedTtl)) {
70
+ if (q.predicate.value === FROM_SOURCE) orgSource.set(q.subject.value, q.object.value)
71
+ if (q.object.termType === "Literal") {
72
+ if (!literalsByOrg.has(q.subject.value)) literalsByOrg.set(q.subject.value, new Map())
73
+ literalsByOrg.get(q.subject.value).set(q.predicate.value, q.object.value)
74
+ }
75
+ }
76
+
77
+ const result = new Map()
78
+ for (const [sourceIri, liftedTtl] of liftedBySource) {
79
+ // subject -> Map<predicate-localname, [{value, isLiteral}]>
80
+ const graph = new Map()
81
+ for (const q of parseTtl(liftedTtl)) {
82
+ const sub = q.subject.value
83
+ const predLocal = localName(q.predicate.value)
84
+ if (!graph.has(sub)) graph.set(sub, new Map())
85
+ const preds = graph.get(sub)
86
+ if (!preds.has(predLocal)) preds.set(predLocal, [])
87
+ preds.get(predLocal).push({ value: q.object.value, isLiteral: q.object.termType === "Literal" })
88
+ }
89
+
90
+ const fields = fieldsBySource.get(sourceIri) ?? []
91
+ for (const [orgIri, src] of orgSource) {
92
+ if (src !== sourceIri) continue
93
+ // Source subject IS the federation IRI post-clean — no lookup needed.
94
+ const subjectPreds = graph.get(orgIri)
95
+ if (!subjectPreds) continue
96
+
97
+ const valueMap = new Map()
98
+ for (const fieldIri of fields) {
99
+ const fp = fieldPathOf.get(fieldIri)
100
+ if (!fp) continue
101
+ const vs = subjectPreds.get(fp)
102
+ if (!vs?.length) continue
103
+ const v = vs[0]
104
+ if (v.isLiteral && v.value) valueMap.set(fieldIri, v.value)
105
+ // Sub-fields hang off the parent field's blank-node value.
106
+ if (subFieldsOf.has(fieldIri) && !v.isLiteral) {
107
+ const childPreds = graph.get(v.value)
108
+ if (childPreds) {
109
+ for (const subIri of subFieldsOf.get(fieldIri)) {
110
+ const subFp = fieldPathOf.get(subIri)
111
+ if (!subFp) continue
112
+ const subVs = childPreds.get(subFp)
113
+ if (subVs?.length && subVs[0].isLiteral && subVs[0].value) valueMap.set(subIri, subVs[0].value)
114
+ }
115
+ }
116
+ }
117
+ }
118
+ result.set(orgIri, valueMap)
119
+ }
120
+ }
121
+
122
+ // Layer in target-field values: indirect each :targetPredicate through the
123
+ // org's literal predicate->value map from mapped.ttl. These are the values
124
+ // that flow OUT of transform nodes (and equal the source value for direct
125
+ // 1:1 mappings).
126
+ for (const [orgIri, preds] of literalsByOrg) {
127
+ if (!result.has(orgIri)) result.set(orgIri, new Map())
128
+ const valueMap = result.get(orgIri)
129
+ for (const [tfIri, predIri] of targetPredicateOf) {
130
+ const v = preds.get(predIri)
131
+ if (v) valueMap.set(tfIri, v)
132
+ }
133
+ }
134
+ return result
135
+ }
136
+
137
+ export function loadSources(ttl) {
138
+ const quads = parseTtl(ttl)
139
+ const sourceIris = subjectsOfType(quads, `${NS}Source`)
140
+ const labelOf = new Map()
141
+ for (const q of quads) if (q.predicate.value === RDFS_LABEL) labelOf.set(q.subject.value, q.object.value)
142
+ return [...sourceIris].map((iri) => ({ iri, label: labelOf.get(iri) ?? localName(iri) }))
143
+ }
144
+
145
+ export function loadMap(ttl, { hideUnmappedFields = true, hideUnmappedTargetFields = true, hiddenSources } = {}) {
146
+ // Render target-predicate IRIs like `schema:identifier` (instead of the
147
+ // local TargetField name) using the federation's own @prefix declarations.
148
+ const prefixes = { cdp: NS, ...prefixesOf(ttl) }
149
+ const prefixedIri = (iri) => shrink(iri, prefixes)
150
+ const quads = parseTtl(ttl)
151
+
152
+ const typeOf = typesOf(quads)
153
+
154
+ const nodeSet = new Set()
155
+ for (const [iri, types] of typeOf) {
156
+ if (NODE_TYPES.some((t) => types.has(t)) || types.has(SUB_FIELD)) nodeSet.add(iri)
157
+ }
158
+
159
+ const edges = []
160
+ const push = (from, to, label, extra) => {
161
+ if (nodeSet.has(from) && nodeSet.has(to)) edges.push({ from, to, label, ...extra })
162
+ }
163
+
164
+ // :from and :to on a field-mapping blank node can each carry multiple
165
+ // values (comma-list in turtle), so track them as arrays. :via is
166
+ // single-valued — it names a transform of the mapping's source
167
+ // (sources/<source>/transform-<via>.sparql), rendered as its own node.
168
+ const bnodeFrom = new Map()
169
+ const bnodeTo = new Map()
170
+ const bnodeVia = new Map()
171
+ const fromSourceOf = new Map()
172
+ const appendTo = (map, key, val) => {
173
+ if (!map.has(key)) map.set(key, [])
174
+ map.get(key).push(val)
175
+ }
176
+ const targetPredicate = new Map()
177
+ const fieldPath = new Map()
178
+ for (const q of quads) {
179
+ if (q.predicate.value === `${NS}hasField`) push(q.subject.value, q.object.value, "hasField")
180
+ else if (q.predicate.value === `${NS}hasSubField`) push(q.subject.value, q.object.value, "hasSubField")
181
+ else if (q.predicate.value === `${NS}hasTargetField`) push(q.object.value, q.subject.value, "isTargetFieldOf")
182
+ else if (q.predicate.value === `${NS}from`) appendTo(bnodeFrom, q.subject.value, q.object.value)
183
+ else if (q.predicate.value === `${NS}to`) appendTo(bnodeTo, q.subject.value, q.object.value)
184
+ else if (q.predicate.value === `${NS}via`) bnodeVia.set(q.subject.value, q.object.value)
185
+ else if (q.predicate.value === `${NS}fromSource`) fromSourceOf.set(q.subject.value, q.object.value)
186
+ else if (q.predicate.value === `${NS}targetPredicate`) targetPredicate.set(q.subject.value, q.object.value)
187
+ else if (q.predicate.value === `${NS}fieldPath`) fieldPath.set(q.subject.value, q.object.value)
188
+ }
189
+ // Deduplicate routed edges: the same (source, via) or (via, target) pair
190
+ // can appear across multiple field-mappings sharing one transform node.
191
+ const seen = new Set()
192
+ const pushOnce = (f, t, label, extra) => {
193
+ const k = `${f}|${label}|${t}`
194
+ if (seen.has(k)) return
195
+ seen.add(k)
196
+ push(f, t, label, extra)
197
+ }
198
+ const transformLabel = new Map() // minted node id -> "source/via" label
199
+ for (const q of quads) {
200
+ if (q.predicate.value === `${NS}hasFieldMapping`) {
201
+ const froms = bnodeFrom.get(q.object.value) ?? []
202
+ const tos = bnodeTo.get(q.object.value) ?? []
203
+ const viaName = bnodeVia.get(q.object.value)
204
+ if (viaName) {
205
+ const name = sourceName(fromSourceOf.get(q.subject.value))
206
+ const via = `transform:${name}:${viaName}`
207
+ if (!transformLabel.has(via)) { transformLabel.set(via, `${name}/${viaName}`); nodeSet.add(via) }
208
+ for (const f of froms) pushOnce(f, via, "mapsTo")
209
+ for (const t of tos) pushOnce(via, t, "mapsTo")
210
+ } else {
211
+ for (const f of froms) for (const t of tos) pushOnce(f, t, "mapsTo", { direct: true })
212
+ }
213
+ }
214
+ }
215
+
216
+ // SubFields render in the SourceField column — they're just nested fields.
217
+ const typeFor = (iri) => {
218
+ if (transformLabel.has(iri)) return "TransformNode"
219
+ const ts = typeOf.get(iri)
220
+ if (ts?.has(SUB_FIELD)) return "SourceField"
221
+ for (const t of NODE_TYPES) if (ts?.has(t)) return localName(t)
222
+ return "Node"
223
+ }
224
+
225
+ // Keep only nodes forward-reachable from a visible source. Fixed-point
226
+ // pass over `edges` until no new node is added.
227
+ if (hiddenSources?.size) {
228
+ const reachable = new Set([...nodeSet].filter((iri) =>
229
+ typeOf.get(iri)?.has(`${NS}Source`) && !hiddenSources.has(iri)))
230
+ for (let grew = true; grew;) {
231
+ grew = false
232
+ for (const e of edges) if (reachable.has(e.from) && !reachable.has(e.to)) { reachable.add(e.to); grew = true }
233
+ }
234
+ for (const iri of [...nodeSet]) if (!reachable.has(iri)) nodeSet.delete(iri)
235
+ }
236
+
237
+ // Track mapped-ness on both ends of mapsTo edges. Source fields are mapped
238
+ // when they appear as `from`; target fields when they appear as `to`. Sub-
239
+ // field parents inherit mapped-ness from any of their sub-fields. Unmapped
240
+ // nodes are either hidden or tagged dashed for the caller to style.
241
+ const mappedSources = new Set()
242
+ const mappedTargets = new Set()
243
+ for (const e of edges) if (e.label === "mapsTo") { mappedSources.add(e.from); mappedTargets.add(e.to) }
244
+ for (const e of edges) if (e.label === "hasSubField" && mappedSources.has(e.to)) mappedSources.add(e.from)
245
+ const isField = (iri) => {
246
+ const ts = typeOf.get(iri)
247
+ return ts?.has(`${NS}SourceField`) || ts?.has(SUB_FIELD)
248
+ }
249
+ const isTargetField = (iri) => typeOf.get(iri)?.has(`${NS}TargetField`) ?? false
250
+
251
+ if (hideUnmappedFields) {
252
+ for (const iri of [...nodeSet]) if (isField(iri) && !mappedSources.has(iri)) nodeSet.delete(iri)
253
+ }
254
+ if (hideUnmappedTargetFields) {
255
+ for (const iri of [...nodeSet]) if (isTargetField(iri) && !mappedTargets.has(iri)) nodeSet.delete(iri)
256
+ }
257
+ const visibleEdges = edges.filter((e) => nodeSet.has(e.from) && nodeSet.has(e.to))
258
+
259
+ const labelFor = (iri) => {
260
+ const tl = transformLabel.get(iri)
261
+ if (tl) return tl
262
+ const tp = targetPredicate.get(iri)
263
+ if (tp) return prefixedIri(tp)
264
+ const fp = fieldPath.get(iri)
265
+ if (fp) return fp
266
+ return localName(iri)
267
+ }
268
+
269
+ const nodes = [...nodeSet].map((iri) => ({
270
+ id: iri,
271
+ label: labelFor(iri),
272
+ type: typeFor(iri),
273
+ ...(((isField(iri) && !mappedSources.has(iri)) || (isTargetField(iri) && !mappedTargets.has(iri))) && { dashed: true }),
274
+ }))
275
+ return { nodes, edges: visibleEdges }
276
+ }