@directory-builder/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +112 -0
- package/bin/cli.js +38 -0
- package/example/README.md +64 -0
- package/example/config/federation.ttl +136 -0
- package/example/config/match-knowledge.ttl +8 -0
- package/example/sources/cityopen/clean.sparql +17 -0
- package/example/sources/cityopen/fetch.js +14 -0
- package/example/sources/cityopen/static/libraries.json +32 -0
- package/example/sources/civichub/clean.sparql +34 -0
- package/example/sources/civichub/fetch.js +14 -0
- package/example/sources/civichub/static/libraries.json +38 -0
- package/package.json +38 -0
- package/src/federate.js +571 -0
- package/src/index.js +6 -0
- package/src/ingest.js +158 -0
- package/src/lift/html.sparql +12 -0
- package/src/lift/json.sparql +12 -0
- package/src/pipeline.js +16 -0
- package/src/utils.js +152 -0
- package/src/webapp.js +41 -0
- package/webapp/index.html +11 -0
- package/webapp/src/About.jsx +24 -0
- package/webapp/src/App.jsx +96 -0
- package/webapp/src/Card.jsx +32 -0
- package/webapp/src/ColumnGraph.jsx +290 -0
- package/webapp/src/Directory.jsx +15 -0
- package/webapp/src/Download.jsx +174 -0
- package/webapp/src/MapGraph.jsx +244 -0
- package/webapp/src/MatchGraph.jsx +137 -0
- package/webapp/src/MergeTables.jsx +61 -0
- package/webapp/src/OrgCard.jsx +126 -0
- package/webapp/src/Pipeline.jsx +41 -0
- package/webapp/src/Query.jsx +165 -0
- package/webapp/src/Sources.jsx +52 -0
- package/webapp/src/instanceData.js +35 -0
- package/webapp/src/loadMap.js +276 -0
- package/webapp/src/loadMatch.js +228 -0
- package/webapp/src/loadMerge.js +93 -0
- package/webapp/src/loadPipeline.js +130 -0
- package/webapp/src/loadSources.js +102 -0
- package/webapp/src/main.jsx +9 -0
- package/webapp/src/mergeOrgs.js +15 -0
- package/webapp/src/sourceMeta.js +81 -0
- package/webapp/src/styles.css +23 -0
- package/webapp/vite.config.js +14 -0
- package/webapp/vite.js +28 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
// Query view: a Yasgui SPARQL editor wired to an in-browser n3 store (no server) —
|
|
2
|
+
// a fetch interceptor routes the fake endpoint through Comunica.
|
|
3
|
+
// Reads: data/pipeline/final.ttl (loaded into the store)
|
|
4
|
+
// Does: renders the Query page; answers SPARQL against final.ttl in-browser
|
|
5
|
+
|
|
6
|
+
import { storeFromTurtles } from "@foerderfunke/sem-ops-utils/core"
|
|
7
|
+
import { queryEngine } from "@foerderfunke/sem-ops-utils/sparql"
|
|
8
|
+
import { finalTtl } from "./instanceData.js"
|
|
9
|
+
import React, { useEffect, useRef } from "react"
|
|
10
|
+
import "@zazuko/yasgui/build/yasgui.min.css"
|
|
11
|
+
import Yasgui from "@zazuko/yasgui"
|
|
12
|
+
import { Writer } from "n3"
|
|
13
|
+
|
|
14
|
+
// Yasgui talks to a SPARQL endpoint over HTTP. We have no endpoint — queries
|
|
15
|
+
// run in-browser against an n3 Store. So we point Yasgui at this fake URL and
|
|
16
|
+
// install a fetch interceptor that routes those requests through Comunica.
|
|
17
|
+
const ENDPOINT = "http://local/sparql"
|
|
18
|
+
|
|
19
|
+
const store = storeFromTurtles([finalTtl])
|
|
20
|
+
|
|
21
|
+
const INITIAL_QUERY = `PREFIX schema: <http://schema.org/>
|
|
22
|
+
PREFIX cdf: <https://civic-data.de/federated-directory#>
|
|
23
|
+
|
|
24
|
+
SELECT ?org (SAMPLE(?name) AS ?title) WHERE {
|
|
25
|
+
?org schema:name ?name .
|
|
26
|
+
}
|
|
27
|
+
GROUP BY ?org
|
|
28
|
+
ORDER BY ?title`
|
|
29
|
+
|
|
30
|
+
Yasgui.Yasqe.defaults.value = INITIAL_QUERY
|
|
31
|
+
|
|
32
|
+
const XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"
|
|
33
|
+
const termToJson = (term) => {
|
|
34
|
+
if (term.termType === "Literal") {
|
|
35
|
+
const v = { type: "literal", value: term.value }
|
|
36
|
+
if (term.language) v["xml:lang"] = term.language
|
|
37
|
+
else if (term.datatype && term.datatype.value !== XSD_STRING) v.datatype = term.datatype.value
|
|
38
|
+
return v
|
|
39
|
+
}
|
|
40
|
+
if (term.termType === "BlankNode") return { type: "bnode", value: term.value }
|
|
41
|
+
return { type: "uri", value: term.value }
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const collectBindings = (stream) => new Promise((resolve, reject) => {
|
|
45
|
+
const vars = new Set()
|
|
46
|
+
const bindings = []
|
|
47
|
+
stream.on("data", (b) => {
|
|
48
|
+
const row = {}
|
|
49
|
+
for (const [k, v] of b) { vars.add(k.value); row[k.value] = termToJson(v) }
|
|
50
|
+
bindings.push(row)
|
|
51
|
+
})
|
|
52
|
+
stream.on("end", () => resolve({ vars: [...vars], bindings }))
|
|
53
|
+
stream.on("error", reject)
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
const collectQuadsAsTurtle = (stream) => new Promise((resolve, reject) => {
|
|
57
|
+
const writer = new Writer({ format: "text/turtle" })
|
|
58
|
+
stream.on("data", (q) => writer.addQuad(q))
|
|
59
|
+
stream.on("end", () => writer.end((err, ttl) => err ? reject(err) : resolve(ttl)))
|
|
60
|
+
stream.on("error", reject)
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
// Yasqe calls `fetch(new Request(url, opts))` rather than `fetch(url, opts)`,
|
|
64
|
+
// so we normalise both forms into one shape.
|
|
65
|
+
const requestParts = async (input, init) => {
|
|
66
|
+
if (input instanceof Request) {
|
|
67
|
+
return { url: input.url, method: input.method, headers: input.headers, body: input.method !== "GET" ? await input.text() : "" }
|
|
68
|
+
}
|
|
69
|
+
const headers = new Headers(init?.headers || {})
|
|
70
|
+
const body = init?.body != null ? (typeof init.body === "string" ? init.body : String(init.body)) : ""
|
|
71
|
+
return { url: typeof input === "string" ? input : input?.url, method: init?.method || "GET", headers, body }
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const extractQuery = ({ url, method, headers, body }) => {
|
|
75
|
+
const accept = headers.get("Accept")
|
|
76
|
+
if (method !== "GET" && body) {
|
|
77
|
+
const ct = headers.get("Content-Type") || ""
|
|
78
|
+
if (ct.includes("application/sparql-query")) return { query: body, accept }
|
|
79
|
+
const query = new URLSearchParams(body).get("query") || new URLSearchParams(body).get("update")
|
|
80
|
+
if (query) return { query, accept }
|
|
81
|
+
}
|
|
82
|
+
return { query: new URL(url).searchParams.get("query"), accept }
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const SPARQL_JSON = "application/sparql-results+json"
|
|
86
|
+
const handleSparql = async (parts) => {
|
|
87
|
+
const { query } = extractQuery(parts)
|
|
88
|
+
if (!query) return new Response("missing query", { status: 400 })
|
|
89
|
+
try {
|
|
90
|
+
const result = await queryEngine.query(query, { sources: [store] })
|
|
91
|
+
if (result.resultType === "bindings") {
|
|
92
|
+
const { vars, bindings } = await collectBindings(await result.execute())
|
|
93
|
+
return new Response(JSON.stringify({ head: { vars }, results: { bindings } }), { status: 200, headers: { "Content-Type": SPARQL_JSON } })
|
|
94
|
+
}
|
|
95
|
+
if (result.resultType === "boolean") {
|
|
96
|
+
return new Response(JSON.stringify({ head: {}, boolean: await result.execute() }), { status: 200, headers: { "Content-Type": SPARQL_JSON } })
|
|
97
|
+
}
|
|
98
|
+
if (result.resultType === "quads") {
|
|
99
|
+
const ttl = await collectQuadsAsTurtle(await result.execute())
|
|
100
|
+
return new Response(ttl, { status: 200, headers: { "Content-Type": "text/turtle" } })
|
|
101
|
+
}
|
|
102
|
+
return new Response("", { status: 200 })
|
|
103
|
+
} catch (e) {
|
|
104
|
+
return new Response(String(e?.message || e), { status: 400 })
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
let intercepted = false
|
|
109
|
+
const installInterceptor = () => {
|
|
110
|
+
if (intercepted) return
|
|
111
|
+
intercepted = true
|
|
112
|
+
const orig = window.fetch.bind(window)
|
|
113
|
+
window.fetch = async (input, init) => {
|
|
114
|
+
const url = input instanceof Request ? input.url : (typeof input === "string" ? input : input?.url)
|
|
115
|
+
if (!url?.startsWith(ENDPOINT)) return orig(input, init)
|
|
116
|
+
return handleSparql(await requestParts(input, init))
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Yasgui's default share link clobbers the React Router hash. Emit one
|
|
121
|
+
// HashRouter accepts (#/query?<params>) instead, and reverse the parse on mount.
|
|
122
|
+
const SHARE_PREFIX = "#/query?"
|
|
123
|
+
const installShareOverride = () => {
|
|
124
|
+
Yasgui.Tab.prototype.getShareableLink = function () {
|
|
125
|
+
const cfg = this.getShareObject()
|
|
126
|
+
const params = new URLSearchParams()
|
|
127
|
+
for (const [k, v] of Object.entries(cfg)) {
|
|
128
|
+
if (v == null || v === "") continue
|
|
129
|
+
params.set(k, typeof v === "string" ? v : JSON.stringify(v))
|
|
130
|
+
}
|
|
131
|
+
return `${location.origin}${location.pathname}${SHARE_PREFIX}${params}`
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const sharedQueryFromUrl = () => {
|
|
136
|
+
if (!location.hash.startsWith(SHARE_PREFIX)) return null
|
|
137
|
+
return new URLSearchParams(location.hash.slice(SHARE_PREFIX.length)).get("query")
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export default function Query() {
|
|
141
|
+
const ref = useRef(null)
|
|
142
|
+
useEffect(() => {
|
|
143
|
+
installInterceptor()
|
|
144
|
+
installShareOverride()
|
|
145
|
+
const el = ref.current
|
|
146
|
+
if (!el) return
|
|
147
|
+
const y = new Yasgui(el, {
|
|
148
|
+
requestConfig: { endpoint: ENDPOINT, method: "POST" },
|
|
149
|
+
copyEndpointOnNewTab: false,
|
|
150
|
+
populateFromUrl: false,
|
|
151
|
+
})
|
|
152
|
+
const shared = sharedQueryFromUrl()
|
|
153
|
+
if (shared) y.getTab()?.setQuery(shared)
|
|
154
|
+
return () => { el.innerHTML = ""; y?.destroy?.() }
|
|
155
|
+
}, [])
|
|
156
|
+
return (
|
|
157
|
+
<>
|
|
158
|
+
<style>{`
|
|
159
|
+
.yasgui .controlbar { display: none; }
|
|
160
|
+
.yasr .dataTable td > div.rowNumber { margin-right: 8px; }
|
|
161
|
+
`}</style>
|
|
162
|
+
<div ref={ref} className="page" style={{ height: "100%", overflow: "auto" }} />
|
|
163
|
+
</>
|
|
164
|
+
)
|
|
165
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
// Sources overview: one card per :Source (URL, format, freshness, record/field counts).
|
|
2
|
+
// Reads: config/federation.ttl, data/pipeline/mapped.ttl,
|
|
3
|
+
// data/ingest/ingest-log.ttl (via loadSources.js)
|
|
4
|
+
// Does: renders the Sources page (list of <Card>)
|
|
5
|
+
|
|
6
|
+
import { federationTtl, mappedTtl, ingestLogTtl, repositoryUrl } from "./instanceData.js"
|
|
7
|
+
import Card, { KeyValueTable } from "./Card.jsx"
|
|
8
|
+
import { loadSources } from "./loadSources.js"
|
|
9
|
+
import React from "react"
|
|
10
|
+
|
|
11
|
+
const sources = loadSources(federationTtl, mappedTtl, ingestLogTtl)
|
|
12
|
+
|
|
13
|
+
// Static-file sources have no live URL; link to their committed folder in the
|
|
14
|
+
// instance's declared :repository (plain path when none is declared).
|
|
15
|
+
const REPO_TREE = repositoryUrl && `${repositoryUrl}/tree/main`
|
|
16
|
+
|
|
17
|
+
const formatTime = (iso) => iso
|
|
18
|
+
? new Date(iso).toLocaleString("en-GB", { dateStyle: "medium", timeStyle: "short" })
|
|
19
|
+
: "—"
|
|
20
|
+
|
|
21
|
+
const sourceUrl = (s) => {
|
|
22
|
+
if (s.fetchUrl) return <a href={s.fetchUrl} target="_blank" rel="noreferrer">{s.fetchUrl}</a>
|
|
23
|
+
if (s.staticSource) return REPO_TREE
|
|
24
|
+
? <a href={`${REPO_TREE}/${s.staticSource.replace(/\/$/, "")}`} target="_blank" rel="noreferrer">static sources</a>
|
|
25
|
+
: s.staticSource
|
|
26
|
+
return "—"
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Live sources report when they were last harvested; static sources have no
|
|
30
|
+
// harvest, so show the commit time of when their files entered the repo
|
|
31
|
+
// (journaled by ingest into the log's harvest entry).
|
|
32
|
+
const freshnessRow = (s) => s.staticSource
|
|
33
|
+
? { key: "added", label: "Added to repo", value: formatTime(s.staticCommittedAt) }
|
|
34
|
+
: { key: "harvested", label: "Last harvested", value: formatTime(s.lastHarvestedAt) }
|
|
35
|
+
|
|
36
|
+
export default function Sources() {
|
|
37
|
+
return (
|
|
38
|
+
<div className="page" style={{ overflowY: "auto", height: "100%" }}>
|
|
39
|
+
{sources.map((s) => (
|
|
40
|
+
<Card key={s.iri} title={s.label ?? s.iri}>
|
|
41
|
+
<KeyValueTable rows={[
|
|
42
|
+
{ key: "url", label: "URL", value: sourceUrl(s) },
|
|
43
|
+
{ key: "format", label: "Format", value: s.format },
|
|
44
|
+
freshnessRow(s),
|
|
45
|
+
{ key: "records", label: "Records", value: s.records },
|
|
46
|
+
{ key: "fields", label: "Schema fields", value: `${s.mappedFields} mapped / ${s.totalFields} total` },
|
|
47
|
+
]} />
|
|
48
|
+
</Card>
|
|
49
|
+
))}
|
|
50
|
+
</div>
|
|
51
|
+
)
|
|
52
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// All config + pipeline data the webapp reads, fetched at runtime relative to
|
|
2
|
+
// BASE_URL — in dev the vite middleware serves config/ and data/ from the
|
|
3
|
+
// repo root; on gh-pages the deploy publishes them next to the bundle.
|
|
4
|
+
// federation.ttl is the only bootstrap: the cleaned-file list derives from
|
|
5
|
+
// its :hasSource, everything else from the PATHS conventions. A missing
|
|
6
|
+
// artifact resolves to "" (pages render empty). Top-level await — importing
|
|
7
|
+
// modules stay synchronous.
|
|
8
|
+
|
|
9
|
+
import { CDP, objectsOf, parseTtl, PATHS, prefixesOf, sourceName } from "@directory-builder/core/utils"
|
|
10
|
+
|
|
11
|
+
const fetchText = async (path) => {
|
|
12
|
+
const res = await fetch(`${import.meta.env.BASE_URL}${path}`).catch(() => null)
|
|
13
|
+
return res?.ok ? res.text() : ""
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export const federationTtl = await fetchText(PATHS.federation)
|
|
17
|
+
|
|
18
|
+
const fedQuads = parseTtl(federationTtl)
|
|
19
|
+
const cleanedPaths = objectsOf(fedQuads, `${CDP}hasSource`).map((iri) => PATHS.cleaned(sourceName(iri)))
|
|
20
|
+
// The instance's repo URL (:federation :repository …) — undefined when not
|
|
21
|
+
// declared; pages hide their GitHub links then.
|
|
22
|
+
export const repositoryUrl = objectsOf(fedQuads, `${CDP}repository`)[0]
|
|
23
|
+
// Display prefixes = the federation's own @prefix declarations; cdp pinned
|
|
24
|
+
// first so cdp:… wins over the empty ":" prefix bound to the same namespace.
|
|
25
|
+
export const displayPrefixes = { cdp: CDP, ...prefixesOf(federationTtl) }
|
|
26
|
+
|
|
27
|
+
const FIXED = [PATHS.matchKnowledge, PATHS.ingestLog, PATHS.federateLog, PATHS.mapped,
|
|
28
|
+
PATHS.matches, PATHS.merged, PATHS.provenance, PATHS.final, PATHS.about]
|
|
29
|
+
const [fixedTexts, cleanedTexts] = await Promise.all([
|
|
30
|
+
Promise.all(FIXED.map(fetchText)),
|
|
31
|
+
Promise.all(cleanedPaths.map(fetchText)),
|
|
32
|
+
])
|
|
33
|
+
|
|
34
|
+
export const [matchKnowledgeTtl, ingestLogTtl, federateLogTtl, mappedTtl, matchesTtl, mergedTtl, provenanceTtl, finalTtl, aboutMd] = fixedTexts
|
|
35
|
+
export const cleanedByPath = Object.fromEntries(cleanedPaths.map((p, i) => [p, cleanedTexts[i]]))
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
// Helpers for the Map view: build the schema-mapping graph and resolve per-org
|
|
2
|
+
// source/target field values. Pure (ttl in → data out).
|
|
3
|
+
// Reads: TTL strings passed by MapGraph.jsx (federation, mapped, cleaned source TTL)
|
|
4
|
+
// Does: returns { nodes, edges } plus per-source / per-org value maps
|
|
5
|
+
|
|
6
|
+
import { CDP as NS, localName, parseTtl, prefixesOf, shrink, sourceName, subjectsOfType, typesOf } from "@directory-builder/core/utils"
|
|
7
|
+
|
|
8
|
+
const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
9
|
+
const NODE_TYPES = [`${NS}Source`, `${NS}SourceField`, `${NS}TargetField`, `${NS}TargetSchema`]
|
|
10
|
+
const SUB_FIELD = `${NS}SubField`
|
|
11
|
+
|
|
12
|
+
// Group orgs by source. Each org carries a cdp:fromSource triple in mapped.ttl
|
|
13
|
+
// pointing at its Source IRI, so this is a single-pass scan with no prefix
|
|
14
|
+
// matching.
|
|
15
|
+
export function loadOrgsBySource(_federationTtl, mappedTtl) {
|
|
16
|
+
const SCHEMA_NAME = "http://schema.org/name"
|
|
17
|
+
const SCHEMA_IDENTIFIER = "http://schema.org/identifier"
|
|
18
|
+
const FROM_SOURCE = `${NS}fromSource`
|
|
19
|
+
|
|
20
|
+
const orgSource = new Map() // orgIri -> sourceIri
|
|
21
|
+
const ids = new Map()
|
|
22
|
+
const names = new Map()
|
|
23
|
+
for (const q of parseTtl(mappedTtl)) {
|
|
24
|
+
const p = q.predicate.value
|
|
25
|
+
if (p === FROM_SOURCE) orgSource.set(q.subject.value, q.object.value)
|
|
26
|
+
else if (p === SCHEMA_IDENTIFIER) ids.set(q.subject.value, q.object.value)
|
|
27
|
+
else if (p === SCHEMA_NAME) names.set(q.subject.value, q.object.value)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const result = new Map()
|
|
31
|
+
for (const [iri, src] of orgSource) {
|
|
32
|
+
if (!result.has(src)) result.set(src, [])
|
|
33
|
+
result.get(src).push({
|
|
34
|
+
iri,
|
|
35
|
+
id: ids.get(iri) ?? localName(iri),
|
|
36
|
+
name: names.get(iri) ?? "",
|
|
37
|
+
})
|
|
38
|
+
}
|
|
39
|
+
for (const list of result.values()) list.sort((a, b) => a.id.localeCompare(b.id))
|
|
40
|
+
return result
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// For each org in mapped.ttl, resolve the literal value of each of its
|
|
44
|
+
// source fields/sub-fields (from the source's lifted/cleaned TTL) AND each
|
|
45
|
+
// target field (from mapped.ttl, indirected via the field's :targetPredicate).
|
|
46
|
+
// Returns Map<orgIri, Map<fieldIri, string>>.
|
|
47
|
+
export function loadFieldValuesByOrg(federationTtl, mappedTtl, liftedBySource) {
|
|
48
|
+
const fedQuads = parseTtl(federationTtl)
|
|
49
|
+
const fieldPathOf = new Map()
|
|
50
|
+
const fieldsBySource = new Map()
|
|
51
|
+
const subFieldsOf = new Map()
|
|
52
|
+
const targetPredicateOf = new Map()
|
|
53
|
+
for (const q of fedQuads) {
|
|
54
|
+
const p = q.predicate.value
|
|
55
|
+
if (p === `${NS}fieldPath`) fieldPathOf.set(q.subject.value, q.object.value)
|
|
56
|
+
else if (p === `${NS}targetPredicate`) targetPredicateOf.set(q.subject.value, q.object.value)
|
|
57
|
+
else if (p === `${NS}hasField`) {
|
|
58
|
+
if (!fieldsBySource.has(q.subject.value)) fieldsBySource.set(q.subject.value, [])
|
|
59
|
+
fieldsBySource.get(q.subject.value).push(q.object.value)
|
|
60
|
+
} else if (p === `${NS}hasSubField`) {
|
|
61
|
+
if (!subFieldsOf.has(q.subject.value)) subFieldsOf.set(q.subject.value, [])
|
|
62
|
+
subFieldsOf.get(q.subject.value).push(q.object.value)
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const FROM_SOURCE = `${NS}fromSource`
|
|
67
|
+
const orgSource = new Map() // orgIri -> sourceIri
|
|
68
|
+
const literalsByOrg = new Map() // orgIri -> Map<predicateIri, string>
|
|
69
|
+
for (const q of parseTtl(mappedTtl)) {
|
|
70
|
+
if (q.predicate.value === FROM_SOURCE) orgSource.set(q.subject.value, q.object.value)
|
|
71
|
+
if (q.object.termType === "Literal") {
|
|
72
|
+
if (!literalsByOrg.has(q.subject.value)) literalsByOrg.set(q.subject.value, new Map())
|
|
73
|
+
literalsByOrg.get(q.subject.value).set(q.predicate.value, q.object.value)
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const result = new Map()
|
|
78
|
+
for (const [sourceIri, liftedTtl] of liftedBySource) {
|
|
79
|
+
// subject -> Map<predicate-localname, [{value, isLiteral}]>
|
|
80
|
+
const graph = new Map()
|
|
81
|
+
for (const q of parseTtl(liftedTtl)) {
|
|
82
|
+
const sub = q.subject.value
|
|
83
|
+
const predLocal = localName(q.predicate.value)
|
|
84
|
+
if (!graph.has(sub)) graph.set(sub, new Map())
|
|
85
|
+
const preds = graph.get(sub)
|
|
86
|
+
if (!preds.has(predLocal)) preds.set(predLocal, [])
|
|
87
|
+
preds.get(predLocal).push({ value: q.object.value, isLiteral: q.object.termType === "Literal" })
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const fields = fieldsBySource.get(sourceIri) ?? []
|
|
91
|
+
for (const [orgIri, src] of orgSource) {
|
|
92
|
+
if (src !== sourceIri) continue
|
|
93
|
+
// Source subject IS the federation IRI post-clean — no lookup needed.
|
|
94
|
+
const subjectPreds = graph.get(orgIri)
|
|
95
|
+
if (!subjectPreds) continue
|
|
96
|
+
|
|
97
|
+
const valueMap = new Map()
|
|
98
|
+
for (const fieldIri of fields) {
|
|
99
|
+
const fp = fieldPathOf.get(fieldIri)
|
|
100
|
+
if (!fp) continue
|
|
101
|
+
const vs = subjectPreds.get(fp)
|
|
102
|
+
if (!vs?.length) continue
|
|
103
|
+
const v = vs[0]
|
|
104
|
+
if (v.isLiteral && v.value) valueMap.set(fieldIri, v.value)
|
|
105
|
+
// Sub-fields hang off the parent field's blank-node value.
|
|
106
|
+
if (subFieldsOf.has(fieldIri) && !v.isLiteral) {
|
|
107
|
+
const childPreds = graph.get(v.value)
|
|
108
|
+
if (childPreds) {
|
|
109
|
+
for (const subIri of subFieldsOf.get(fieldIri)) {
|
|
110
|
+
const subFp = fieldPathOf.get(subIri)
|
|
111
|
+
if (!subFp) continue
|
|
112
|
+
const subVs = childPreds.get(subFp)
|
|
113
|
+
if (subVs?.length && subVs[0].isLiteral && subVs[0].value) valueMap.set(subIri, subVs[0].value)
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
result.set(orgIri, valueMap)
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Layer in target-field values: indirect each :targetPredicate through the
|
|
123
|
+
// org's literal predicate->value map from mapped.ttl. These are the values
|
|
124
|
+
// that flow OUT of transform nodes (and equal the source value for direct
|
|
125
|
+
// 1:1 mappings).
|
|
126
|
+
for (const [orgIri, preds] of literalsByOrg) {
|
|
127
|
+
if (!result.has(orgIri)) result.set(orgIri, new Map())
|
|
128
|
+
const valueMap = result.get(orgIri)
|
|
129
|
+
for (const [tfIri, predIri] of targetPredicateOf) {
|
|
130
|
+
const v = preds.get(predIri)
|
|
131
|
+
if (v) valueMap.set(tfIri, v)
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return result
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export function loadSources(ttl) {
|
|
138
|
+
const quads = parseTtl(ttl)
|
|
139
|
+
const sourceIris = subjectsOfType(quads, `${NS}Source`)
|
|
140
|
+
const labelOf = new Map()
|
|
141
|
+
for (const q of quads) if (q.predicate.value === RDFS_LABEL) labelOf.set(q.subject.value, q.object.value)
|
|
142
|
+
return [...sourceIris].map((iri) => ({ iri, label: labelOf.get(iri) ?? localName(iri) }))
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export function loadMap(ttl, { hideUnmappedFields = true, hideUnmappedTargetFields = true, hiddenSources } = {}) {
|
|
146
|
+
// Render target-predicate IRIs like `schema:identifier` (instead of the
|
|
147
|
+
// local TargetField name) using the federation's own @prefix declarations.
|
|
148
|
+
const prefixes = { cdp: NS, ...prefixesOf(ttl) }
|
|
149
|
+
const prefixedIri = (iri) => shrink(iri, prefixes)
|
|
150
|
+
const quads = parseTtl(ttl)
|
|
151
|
+
|
|
152
|
+
const typeOf = typesOf(quads)
|
|
153
|
+
|
|
154
|
+
const nodeSet = new Set()
|
|
155
|
+
for (const [iri, types] of typeOf) {
|
|
156
|
+
if (NODE_TYPES.some((t) => types.has(t)) || types.has(SUB_FIELD)) nodeSet.add(iri)
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const edges = []
|
|
160
|
+
const push = (from, to, label, extra) => {
|
|
161
|
+
if (nodeSet.has(from) && nodeSet.has(to)) edges.push({ from, to, label, ...extra })
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// :from and :to on a field-mapping blank node can each carry multiple
|
|
165
|
+
// values (comma-list in turtle), so track them as arrays. :via is
|
|
166
|
+
// single-valued — it names a transform of the mapping's source
|
|
167
|
+
// (sources/<source>/transform-<via>.sparql), rendered as its own node.
|
|
168
|
+
const bnodeFrom = new Map()
|
|
169
|
+
const bnodeTo = new Map()
|
|
170
|
+
const bnodeVia = new Map()
|
|
171
|
+
const fromSourceOf = new Map()
|
|
172
|
+
const appendTo = (map, key, val) => {
|
|
173
|
+
if (!map.has(key)) map.set(key, [])
|
|
174
|
+
map.get(key).push(val)
|
|
175
|
+
}
|
|
176
|
+
const targetPredicate = new Map()
|
|
177
|
+
const fieldPath = new Map()
|
|
178
|
+
for (const q of quads) {
|
|
179
|
+
if (q.predicate.value === `${NS}hasField`) push(q.subject.value, q.object.value, "hasField")
|
|
180
|
+
else if (q.predicate.value === `${NS}hasSubField`) push(q.subject.value, q.object.value, "hasSubField")
|
|
181
|
+
else if (q.predicate.value === `${NS}hasTargetField`) push(q.object.value, q.subject.value, "isTargetFieldOf")
|
|
182
|
+
else if (q.predicate.value === `${NS}from`) appendTo(bnodeFrom, q.subject.value, q.object.value)
|
|
183
|
+
else if (q.predicate.value === `${NS}to`) appendTo(bnodeTo, q.subject.value, q.object.value)
|
|
184
|
+
else if (q.predicate.value === `${NS}via`) bnodeVia.set(q.subject.value, q.object.value)
|
|
185
|
+
else if (q.predicate.value === `${NS}fromSource`) fromSourceOf.set(q.subject.value, q.object.value)
|
|
186
|
+
else if (q.predicate.value === `${NS}targetPredicate`) targetPredicate.set(q.subject.value, q.object.value)
|
|
187
|
+
else if (q.predicate.value === `${NS}fieldPath`) fieldPath.set(q.subject.value, q.object.value)
|
|
188
|
+
}
|
|
189
|
+
// Deduplicate routed edges: the same (source, via) or (via, target) pair
|
|
190
|
+
// can appear across multiple field-mappings sharing one transform node.
|
|
191
|
+
const seen = new Set()
|
|
192
|
+
const pushOnce = (f, t, label, extra) => {
|
|
193
|
+
const k = `${f}|${label}|${t}`
|
|
194
|
+
if (seen.has(k)) return
|
|
195
|
+
seen.add(k)
|
|
196
|
+
push(f, t, label, extra)
|
|
197
|
+
}
|
|
198
|
+
const transformLabel = new Map() // minted node id -> "source/via" label
|
|
199
|
+
for (const q of quads) {
|
|
200
|
+
if (q.predicate.value === `${NS}hasFieldMapping`) {
|
|
201
|
+
const froms = bnodeFrom.get(q.object.value) ?? []
|
|
202
|
+
const tos = bnodeTo.get(q.object.value) ?? []
|
|
203
|
+
const viaName = bnodeVia.get(q.object.value)
|
|
204
|
+
if (viaName) {
|
|
205
|
+
const name = sourceName(fromSourceOf.get(q.subject.value))
|
|
206
|
+
const via = `transform:${name}:${viaName}`
|
|
207
|
+
if (!transformLabel.has(via)) { transformLabel.set(via, `${name}/${viaName}`); nodeSet.add(via) }
|
|
208
|
+
for (const f of froms) pushOnce(f, via, "mapsTo")
|
|
209
|
+
for (const t of tos) pushOnce(via, t, "mapsTo")
|
|
210
|
+
} else {
|
|
211
|
+
for (const f of froms) for (const t of tos) pushOnce(f, t, "mapsTo", { direct: true })
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// SubFields render in the SourceField column — they're just nested fields.
|
|
217
|
+
const typeFor = (iri) => {
|
|
218
|
+
if (transformLabel.has(iri)) return "TransformNode"
|
|
219
|
+
const ts = typeOf.get(iri)
|
|
220
|
+
if (ts?.has(SUB_FIELD)) return "SourceField"
|
|
221
|
+
for (const t of NODE_TYPES) if (ts?.has(t)) return localName(t)
|
|
222
|
+
return "Node"
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Keep only nodes forward-reachable from a visible source. Fixed-point
|
|
226
|
+
// pass over `edges` until no new node is added.
|
|
227
|
+
if (hiddenSources?.size) {
|
|
228
|
+
const reachable = new Set([...nodeSet].filter((iri) =>
|
|
229
|
+
typeOf.get(iri)?.has(`${NS}Source`) && !hiddenSources.has(iri)))
|
|
230
|
+
for (let grew = true; grew;) {
|
|
231
|
+
grew = false
|
|
232
|
+
for (const e of edges) if (reachable.has(e.from) && !reachable.has(e.to)) { reachable.add(e.to); grew = true }
|
|
233
|
+
}
|
|
234
|
+
for (const iri of [...nodeSet]) if (!reachable.has(iri)) nodeSet.delete(iri)
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Track mapped-ness on both ends of mapsTo edges. Source fields are mapped
|
|
238
|
+
// when they appear as `from`; target fields when they appear as `to`. Sub-
|
|
239
|
+
// field parents inherit mapped-ness from any of their sub-fields. Unmapped
|
|
240
|
+
// nodes are either hidden or tagged dashed for the caller to style.
|
|
241
|
+
const mappedSources = new Set()
|
|
242
|
+
const mappedTargets = new Set()
|
|
243
|
+
for (const e of edges) if (e.label === "mapsTo") { mappedSources.add(e.from); mappedTargets.add(e.to) }
|
|
244
|
+
for (const e of edges) if (e.label === "hasSubField" && mappedSources.has(e.to)) mappedSources.add(e.from)
|
|
245
|
+
const isField = (iri) => {
|
|
246
|
+
const ts = typeOf.get(iri)
|
|
247
|
+
return ts?.has(`${NS}SourceField`) || ts?.has(SUB_FIELD)
|
|
248
|
+
}
|
|
249
|
+
const isTargetField = (iri) => typeOf.get(iri)?.has(`${NS}TargetField`) ?? false
|
|
250
|
+
|
|
251
|
+
if (hideUnmappedFields) {
|
|
252
|
+
for (const iri of [...nodeSet]) if (isField(iri) && !mappedSources.has(iri)) nodeSet.delete(iri)
|
|
253
|
+
}
|
|
254
|
+
if (hideUnmappedTargetFields) {
|
|
255
|
+
for (const iri of [...nodeSet]) if (isTargetField(iri) && !mappedTargets.has(iri)) nodeSet.delete(iri)
|
|
256
|
+
}
|
|
257
|
+
const visibleEdges = edges.filter((e) => nodeSet.has(e.from) && nodeSet.has(e.to))
|
|
258
|
+
|
|
259
|
+
const labelFor = (iri) => {
|
|
260
|
+
const tl = transformLabel.get(iri)
|
|
261
|
+
if (tl) return tl
|
|
262
|
+
const tp = targetPredicate.get(iri)
|
|
263
|
+
if (tp) return prefixedIri(tp)
|
|
264
|
+
const fp = fieldPath.get(iri)
|
|
265
|
+
if (fp) return fp
|
|
266
|
+
return localName(iri)
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const nodes = [...nodeSet].map((iri) => ({
|
|
270
|
+
id: iri,
|
|
271
|
+
label: labelFor(iri),
|
|
272
|
+
type: typeFor(iri),
|
|
273
|
+
...(((isField(iri) && !mappedSources.has(iri)) || (isTargetField(iri) && !mappedTargets.has(iri))) && { dashed: true }),
|
|
274
|
+
}))
|
|
275
|
+
return { nodes, edges: visibleEdges }
|
|
276
|
+
}
|