@directory-builder/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +112 -0
  3. package/bin/cli.js +38 -0
  4. package/example/README.md +64 -0
  5. package/example/config/federation.ttl +136 -0
  6. package/example/config/match-knowledge.ttl +8 -0
  7. package/example/sources/cityopen/clean.sparql +17 -0
  8. package/example/sources/cityopen/fetch.js +14 -0
  9. package/example/sources/cityopen/static/libraries.json +32 -0
  10. package/example/sources/civichub/clean.sparql +34 -0
  11. package/example/sources/civichub/fetch.js +14 -0
  12. package/example/sources/civichub/static/libraries.json +38 -0
  13. package/package.json +38 -0
  14. package/src/federate.js +571 -0
  15. package/src/index.js +6 -0
  16. package/src/ingest.js +158 -0
  17. package/src/lift/html.sparql +12 -0
  18. package/src/lift/json.sparql +12 -0
  19. package/src/pipeline.js +16 -0
  20. package/src/utils.js +152 -0
  21. package/src/webapp.js +41 -0
  22. package/webapp/index.html +11 -0
  23. package/webapp/src/About.jsx +24 -0
  24. package/webapp/src/App.jsx +96 -0
  25. package/webapp/src/Card.jsx +32 -0
  26. package/webapp/src/ColumnGraph.jsx +290 -0
  27. package/webapp/src/Directory.jsx +15 -0
  28. package/webapp/src/Download.jsx +174 -0
  29. package/webapp/src/MapGraph.jsx +244 -0
  30. package/webapp/src/MatchGraph.jsx +137 -0
  31. package/webapp/src/MergeTables.jsx +61 -0
  32. package/webapp/src/OrgCard.jsx +126 -0
  33. package/webapp/src/Pipeline.jsx +41 -0
  34. package/webapp/src/Query.jsx +165 -0
  35. package/webapp/src/Sources.jsx +52 -0
  36. package/webapp/src/instanceData.js +35 -0
  37. package/webapp/src/loadMap.js +276 -0
  38. package/webapp/src/loadMatch.js +228 -0
  39. package/webapp/src/loadMerge.js +93 -0
  40. package/webapp/src/loadPipeline.js +130 -0
  41. package/webapp/src/loadSources.js +102 -0
  42. package/webapp/src/main.jsx +9 -0
  43. package/webapp/src/mergeOrgs.js +15 -0
  44. package/webapp/src/sourceMeta.js +81 -0
  45. package/webapp/src/styles.css +23 -0
  46. package/webapp/vite.config.js +14 -0
  47. package/webapp/vite.js +28 -0
@@ -0,0 +1,228 @@
1
+ // Match "lanes" view, derived entirely from federation.ttl — no assumptions about
2
+ // how many entity types there are or how they relate. One lane per :TargetSchema
3
+ // (ordered by the relationship hierarchy, roots left), each preceded by a tinted
4
+ // "source duplications" column; cross-lane edges come from every :hasRelationship.
5
+ // When the relationships form a tree the layout groups each subtree vertically
6
+ // (parent centred on its children); otherwise it just lays out gracefully.
7
+ // Reads: federation.ttl (schemas, classes, labels, relationships),
8
+ // matches.ttl (clusters + hasMember), merged.ttl (rdf:type, name, links)
9
+ // Does: returns everything <ColumnGraph> needs + a per-lane nodeY layout.
10
+
11
+ import { CDP as NS, localName, parseTtl, prefixesOf, shrink, subjectsOfType } from "@directory-builder/core/utils"
12
+
13
+ const CDF = "https://civic-data.de/federated-directory#"
14
+ const S = "http://schema.org/"
15
+ const RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
16
+ const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
17
+ const HAS_TARGET_SCHEMA = `${NS}hasTargetSchema`
18
+ const TARGET_CLASS = `${NS}targetClass`
19
+ const TO_TARGET = `${NS}toTarget`
20
+ const HAS_RELATIONSHIP = `${NS}hasRelationship`
21
+ const TO_TARGET_SCHEMA = `${NS}toTargetSchema`
22
+ const TO_TARGET_FIELD = `${NS}toTargetField`
23
+ const TARGET_PREDICATE = `${NS}targetPredicate`
24
+ const MATCH_CLUSTER = `${NS}MatchCluster`
25
+ const HAS_MEMBER = `${NS}hasMember`
26
+ const NAME = `${S}name`
27
+ const CATEGORY = `${S}category` // label fallback for entities with no name (e.g. AWO services)
28
+
29
+ // Lane colours, assigned by hierarchy position; cycles if there are more lanes.
30
+ const PALETTE = ["#cdddff", "#f7d2e3", "#cfe9d4", "#ffe2b8", "#e3d4f7", "#cfeef0", "#f3d9c0"]
31
+ const SRC_COLOR = "#e9e9ee"
32
+ const GAP = 84 // vertical spacing between single-member leaves
33
+ const SRC_GAP = 56 // spacing of a cluster's stacked source members
34
+ const NODE_H = 48 // approx node height — keeps source stacks from colliding
35
+ const MARGIN = GAP - NODE_H // inter-cluster gap; keeps single-member spacing == GAP
36
+
37
+ const cap = (s) => s.charAt(0).toUpperCase() + s.slice(1)
38
+ // Mix a hex colour toward white by t∈[0,1] — the pale entity-column band tint.
39
+ const lighten = (hex, t) => {
40
+ const n = parseInt(hex.slice(1), 16)
41
+ const c = (sh) => { const v = (n >> sh) & 255; return Math.round(v + (255 - v) * t) }
42
+ return `#${((c(16) << 16) | (c(8) << 8) | c(0)).toString(16).padStart(6, "0")}`
43
+ }
44
+
45
+ // ---- federation.ttl → schema model -------------------------------------
46
+
47
+ function readSchemas(federationTtl) {
48
+ const q = parseTtl(federationTtl)
49
+ const classPrefixes = prefixesOf(federationTtl)
50
+ const order = [] // schema IRIs in document order
51
+ const targetClass = new Map()
52
+ const label = new Map() // any subject → its rdfs:label
53
+ const toTarget = new Map() // mapping → its :toTarget schema
54
+ const relMapping = new Map() // rel bnode → its mapping
55
+ const relToSchema = new Map() // rel bnode → :toTargetSchema
56
+ const relToField = new Map() // rel bnode → :toTargetField
57
+ const fieldPred = new Map() // target field → :targetPredicate
58
+ for (const { subject: s, predicate: p, object: o } of q) {
59
+ switch (p.value) {
60
+ case HAS_TARGET_SCHEMA: order.push(o.value); break
61
+ case TARGET_CLASS: targetClass.set(s.value, o.value); break
62
+ case RDFS_LABEL: if (!label.has(s.value)) label.set(s.value, o.value); break
63
+ case TO_TARGET: toTarget.set(s.value, o.value); break
64
+ case HAS_RELATIONSHIP: relMapping.set(o.value, s.value); break
65
+ case TO_TARGET_SCHEMA: relToSchema.set(s.value, o.value); break
66
+ case TO_TARGET_FIELD: relToField.set(s.value, o.value); break
67
+ case TARGET_PREDICATE: fieldPred.set(s.value, o.value); break
68
+ }
69
+ }
70
+
71
+ // Schema-level relationships: from = the mapping's :toTarget, to = :toTargetSchema,
72
+ // predicate = the target field's :targetPredicate. Drives both the cross-lane
73
+ // edges (by predicate) and the lane ordering (by the from→to graph).
74
+ const relPreds = new Set()
75
+ const out = new Map() // schema → Set(schema it points at)
76
+ for (const [rel, mapping] of relMapping) {
77
+ const from = toTarget.get(mapping), to = relToSchema.get(rel)
78
+ const pred = fieldPred.get(relToField.get(rel))
79
+ if (!from || !to || !pred) continue
80
+ relPreds.add(pred)
81
+ if (!out.has(from)) out.set(from, new Set())
82
+ out.get(from).add(to)
83
+ }
84
+
85
+ // Lane order: a schema sits left of anything that relates to it (a parent is
86
+ // left of its children). level = longest chain of out-edges; roots (sinks) = 0.
87
+ const docIdx = new Map(order.map((s, i) => [s, i]))
88
+ const memo = new Map()
89
+ const levelOf = (s, stack = new Set()) => {
90
+ if (memo.has(s)) return memo.get(s)
91
+ if (stack.has(s)) return 0 // cycle guard
92
+ stack.add(s)
93
+ let lvl = 0
94
+ for (const t of out.get(s) ?? []) lvl = Math.max(lvl, 1 + levelOf(t, stack))
95
+ stack.delete(s)
96
+ memo.set(s, lvl)
97
+ return lvl
98
+ }
99
+ const ordered = [...order].sort((a, b) => levelOf(a) - levelOf(b) || docIdx.get(a) - docIdx.get(b))
100
+
101
+ const lanes = ordered.map((schema, i) => {
102
+ const cls = targetClass.get(schema)
103
+ const name = label.get(schema) ?? (cls && label.get(cls)) ?? cap(localName(schema).replace(/Schema$/, ""))
104
+ return {
105
+ schema, cls,
106
+ key: localName(schema).replace(/Schema$/, ""),
107
+ label: name,
108
+ title: `${name}\n${cls ? shrink(cls, classPrefixes) : ""}`,
109
+ color: PALETTE[i % PALETTE.length],
110
+ }
111
+ })
112
+ return { lanes, relPreds }
113
+ }
114
+
115
+ // ---- main ---------------------------------------------------------------
116
+
117
+ export function loadMatch(federationTtl, matchesTtl, mergedTtl, { showDuplications = false, show1to1 = false } = {}) {
118
+ const { lanes, relPreds } = readSchemas(federationTtl)
119
+ const keyOfClass = new Map(lanes.filter((l) => l.cls).map((l) => [l.cls, l.key]))
120
+ const laneIdx = new Map(lanes.map((l, i) => [l.key, i]))
121
+
122
+ const columns = lanes.flatMap((l) => [`${l.key}Src`, l.key])
123
+ const colors = {}, columnTitles = {}, columnBands = {}, columnHeaderStyle = {}
124
+ for (const l of lanes) {
125
+ colors[l.key] = l.color; colors[`${l.key}Src`] = SRC_COLOR
126
+ columnTitles[l.key] = l.title; columnTitles[`${l.key}Src`] = "source duplications"
127
+ columnBands[l.key] = lighten(l.color, 0.6) // entity column gets a brighter tint of its nodes
128
+ columnHeaderStyle[`${l.key}Src`] = { fontSize: 10, color: "#aaa" } // de-emphasise the source-column labels
129
+ }
130
+
131
+ const merged = parseTtl(mergedTtl)
132
+ const tierOf = new Map() // entity → lane key, via its rdf:type
133
+ const nameOf = new Map(), catOf = new Map()
134
+ for (const q of merged) {
135
+ if (q.predicate.value === RDF_TYPE && keyOfClass.has(q.object.value)) tierOf.set(q.subject.value, keyOfClass.get(q.object.value))
136
+ else if (q.predicate.value === NAME && !nameOf.has(q.subject.value)) nameOf.set(q.subject.value, q.object.value)
137
+ else if (q.predicate.value === CATEGORY && !catOf.has(q.subject.value)) catOf.set(q.subject.value, q.object.value)
138
+ }
139
+
140
+ const quads = parseTtl(matchesTtl)
141
+ const clusters = subjectsOfType(quads, MATCH_CLUSTER)
142
+ const members = new Map()
143
+ for (const q of quads) if (q.predicate.value === HAS_MEMBER) {
144
+ if (!members.has(q.subject.value)) members.set(q.subject.value, [])
145
+ members.get(q.subject.value).push(q.object.value)
146
+ }
147
+
148
+ const nodes = []
149
+ const edges = []
150
+ const nodeIds = new Set()
151
+ for (const c of clusters) {
152
+ const tier = tierOf.get(c)
153
+ if (!tier) continue
154
+ nodes.push({ id: c, type: tier, label: nameOf.get(c) ?? catOf.get(c) ?? localName(c), isCluster: true })
155
+ nodeIds.add(c)
156
+ const ms = members.get(c) ?? []
157
+ if (!showDuplications || (!show1to1 && ms.length <= 1)) continue // master off → no source cols; hide 1:1 unless "show 1:1"
158
+ for (const src of ms) {
159
+ nodes.push({ id: src, type: `${tier}Src`, label: localName(src) })
160
+ nodeIds.add(src)
161
+ edges.push({ from: src, to: c }) // dedup (hasMember) edge
162
+ }
163
+ }
164
+
165
+ // Cross-lane links: any merged triple whose predicate is a declared relationship
166
+ // and whose ends are both placed entities. Stored object→subject so the parent
167
+ // (object) sits left of the child (subject) and edges flow toward the root.
168
+ for (const q of merged) {
169
+ if (relPreds.has(q.predicate.value) && q.object.termType === "NamedNode"
170
+ && tierOf.has(q.subject.value) && tierOf.has(q.object.value)) {
171
+ edges.push({ from: q.object.value, to: q.subject.value, rel: true })
172
+ }
173
+ }
174
+
175
+ return { nodes, edges, members, lanes, columns, colors, columnTitles, columnBands, columnHeaderStyle,
176
+ nodeY: layout(nodes, edges, members, nodeIds, laneIdx) }
177
+ }
178
+
179
+ // Tidy-tree vertical layout: place leaves on a running cursor, centre each parent
180
+ // on its children. Roots (entities with no parent) are ordered by lane, so the
181
+ // upper lanes' subtrees group at the top. Graceful on non-tree graphs: a node is
182
+ // placed once (first visit), so multiple parents / cycles can't loop or duplicate.
183
+ function layout(nodes, edges, members, nodeIds, laneIdx) {
184
+ const childrenOf = new Map()
185
+ const hasParent = new Set()
186
+ for (const e of edges) if (e.rel) {
187
+ if (!childrenOf.has(e.from)) childrenOf.set(e.from, [])
188
+ childrenOf.get(e.from).push(e.to)
189
+ hasParent.add(e.to)
190
+ }
191
+
192
+ const y = new Map()
193
+ let cursor = 0
194
+ const mean = (a) => a.reduce((s, v) => s + v, 0) / a.length
195
+ const place = (id) => {
196
+ if (y.has(id)) return
197
+ const kids = childrenOf.get(id) ?? []
198
+ if (kids.length) { kids.forEach(place); y.set(id, mean(kids.map((k) => y.get(k)))) }
199
+ else { y.set(id, cursor); cursor += GAP }
200
+ }
201
+ const clusters = nodes.filter((n) => n.isCluster)
202
+ clusters.filter((n) => !hasParent.has(n.id)).sort((a, b) => laneIdx.get(a.type) - laneIdx.get(b.type)).forEach((n) => place(n.id))
203
+ clusters.filter((n) => !y.has(n.id)).forEach((n) => place(n.id)) // safety net
204
+
205
+ // When source columns are shown, push clusters apart within each lane so a
206
+ // cluster's stacked source members never collide with its neighbours'.
207
+ const stackHalf = (c) => (((members.get(c) ?? []).filter((m) => nodeIds.has(m)).length || 1) - 1) * SRC_GAP / 2 + NODE_H / 2
208
+ if (nodes.some((n) => !n.isCluster)) {
209
+ const byLane = new Map()
210
+ for (const n of clusters) { if (!byLane.has(n.type)) byLane.set(n.type, []); byLane.get(n.type).push(n.id) }
211
+ for (const ids of byLane.values()) {
212
+ ids.sort((a, b) => y.get(a) - y.get(b))
213
+ let prevBottom = -Infinity
214
+ for (const id of ids) {
215
+ const h = stackHalf(id)
216
+ const cy = Math.max(y.get(id), prevBottom + MARGIN + h)
217
+ y.set(id, cy); prevBottom = cy + h
218
+ }
219
+ }
220
+ }
221
+
222
+ for (const [c, ms] of members) {
223
+ if (!y.has(c)) continue
224
+ const shown = ms.filter((m) => nodeIds.has(m))
225
+ shown.forEach((m, i) => y.set(m, y.get(c) + (i - (shown.length - 1) / 2) * SRC_GAP))
226
+ }
227
+ return y
228
+ }
@@ -0,0 +1,93 @@
1
+ // Parses merged + provenance TTL into org objects: each field's values and the
2
+ // :Source(s) that contributed them, ordered by config. Pure (ttl in → data out).
3
+ // Reads: TTL strings passed by mergeOrgs.js; resolves sources via sourceMeta.js
4
+ // Does: returns org[] (each {iri, label, type, fields[], sources[]})
5
+
6
+ import { CDP as NS, parseTtl, parseTtlStar, prefixesOf, shrink } from "@directory-builder/core/utils"
7
+ import { compareSources, loadSourceMeta } from "./sourceMeta.js"
8
+
9
+ const PROV_DERIVED_FROM = "http://www.w3.org/ns/prov#wasDerivedFrom"
10
+ const RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
11
+ const RDF_REIFIES = "http://www.w3.org/1999/02/22-rdf-syntax-ns#reifies"
12
+ const FROM_SOURCE = `${NS}fromSource`
13
+
14
+ export function loadMerge(mergedTtl, provTtl, federationTtl = "") {
15
+ // IRIs render shortened against the federation's own @prefix declarations.
16
+ const prefixes = { cdp: NS, ...prefixesOf(federationTtl) }
17
+ const prefixedIri = (iri) => shrink(iri, prefixes)
18
+ const mergedQuads = parseTtl(mergedTtl)
19
+ const provQuads = parseTtlStar(provTtl)
20
+ const sourceMeta = federationTtl ? loadSourceMeta(federationTtl) : new Map()
21
+
22
+ // Each prov:wasDerivedFrom in provenance.ttl annotates a merged triple
23
+ // `<<s p o>>` with the source record IRI it came from. n3.js exposes the
24
+ // quoted-triple subject either directly as a Quad term, or via an
25
+ // auto-generated reifier bnode + rdf:reifies triple — accept both shapes.
26
+ const reifies = new Map()
27
+ for (const q of provQuads) {
28
+ if (q.predicate.value === RDF_REIFIES && q.object.termType === "Quad") reifies.set(q.subject.value, q.object)
29
+ }
30
+ const annotations = []
31
+ for (const q of provQuads) {
32
+ if (q.predicate.value !== PROV_DERIVED_FROM) continue
33
+ const t = q.subject.termType === "Quad" ? q.subject : reifies.get(q.subject.value)
34
+ if (t) annotations.push({ s: t.subject.value, p: t.predicate.value, o: t.object.value, rec: q.object.value })
35
+ }
36
+ // Resolve each record to its :Source via cdp:fromSource (reified in
37
+ // provenance) so downstream code deals only in Source IRIs, not record IRIs.
38
+ const sourceOfRecord = new Map()
39
+ for (const { p, o, rec } of annotations) if (p === FROM_SOURCE) sourceOfRecord.set(rec, o)
40
+ const toSources = (records) => [...new Set([...records].map((r) => sourceOfRecord.get(r)))]
41
+
42
+ const provIndex = new Map()
43
+ const tripleKey = (s, p, o) => `${s}\t${p}\t${o}`
44
+ for (const { s, p, o, rec } of annotations) {
45
+ const key = tripleKey(s, p, o)
46
+ if (!provIndex.has(key)) provIndex.set(key, new Set())
47
+ provIndex.get(key).add(rec)
48
+ }
49
+
50
+ // Walk merged.ttl in parse order so card order = pipeline order.
51
+ const orgs = []
52
+ const orgIndex = new Map()
53
+ const fieldIndexByOrg = new Map()
54
+ for (const q of mergedQuads) {
55
+ const orgIri = q.subject.value
56
+ const predIri = q.predicate.value
57
+ const value = q.object.value
58
+
59
+ if (!orgIndex.has(orgIri)) {
60
+ orgIndex.set(orgIri, orgs.length)
61
+ fieldIndexByOrg.set(orgIri, new Map())
62
+ orgs.push({ iri: orgIri, label: prefixedIri(orgIri), fields: [] })
63
+ }
64
+ const org = orgs[orgIndex.get(orgIri)]
65
+ const fieldIndex = fieldIndexByOrg.get(orgIri)
66
+
67
+ // rdf:type carries the entity class — surface it in the card header
68
+ // (see OrgCard), not as a field row.
69
+ if (predIri === RDF_TYPE) { org.type = prefixedIri(value); continue }
70
+
71
+ if (!fieldIndex.has(predIri)) {
72
+ fieldIndex.set(predIri, org.fields.length)
73
+ org.fields.push({ predicate: predIri, predLabel: prefixedIri(predIri), values: [] })
74
+ }
75
+ const field = org.fields[fieldIndex.get(predIri)]
76
+ const records = [...(provIndex.get(tripleKey(orgIri, predIri, value)) ?? [])]
77
+ const sources = toSources(records)
78
+ const displayValue = q.object.termType === "NamedNode" ? prefixedIri(value) : value
79
+ field.values.push({ value: displayValue, raw: value, sources, records })
80
+ }
81
+
82
+ // Per-field: sort values by source-count desc so the most-supported one is index 0.
83
+ // Per-org: one column per contributing record (two records from the same source
84
+ // get two columns), ordered by source then record IRI.
85
+ for (const org of orgs) {
86
+ for (const f of org.fields) f.values.sort((a, b) => b.sources.length - a.sources.length)
87
+ const all = new Set()
88
+ for (const f of org.fields) for (const v of f.values) for (const r of v.records) all.add(r)
89
+ org.columns = [...all].map((r) => ({ record: r, source: sourceOfRecord.get(r) }))
90
+ .sort((a, b) => compareSources(a.source, b.source, sourceMeta) || a.record.localeCompare(b.record))
91
+ }
92
+ return orgs
93
+ }
@@ -0,0 +1,130 @@
1
+ // Helper for the Pipeline view: turn the engines' step journals into a graph.
2
+ // Reads: the step-journal TTL strings (ingest-log.ttl + federate-log.ttl —
3
+ // evidence of what actually ran) and the federation TTL, passed by
4
+ // Pipeline.jsx
5
+ // Does: returns { nodes, edges } — Source lane-header nodes (transparent
6
+ // fill, light-gray border) above each Fetch step, step nodes labelled
7
+ // by their type (fetch/lift/clean/map/match/merge/resolve), and an
8
+ // End sink so resolve's output is shown on a visible edge, plus a
9
+ // boundary node feeding the Match step with the conventional
10
+ // match-knowledge file. Edge labels come from federation.ttl —
11
+ // a source's :format (uppercased) and :retrieval — or from the
12
+ // conventions: Lift emits Turtle (LIFTED_FORMAT), other steps their
13
+ // output file(s) per PATHS, resolved per source for Clean steps.
14
+ // Multiple outputs (merge's provenance) stack as newlines.
15
+
16
+ import { CDP as NS, formatFamily, LIFTED_FORMAT, localName, parseTtl, PATHS, sourceName } from "@directory-builder/core/utils"
17
+
18
+ const PPLAN_STEP = "http://purl.org/net/p-plan#Step"
19
+ const PPLAN_IS_PRECEDED_BY = "http://purl.org/net/p-plan#isPrecededBy"
20
+ const RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
21
+ const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
22
+ const FROM_SOURCE = `${NS}fromSource`
23
+ const RETRIEVAL = `${NS}retrieval`
24
+ const FORMAT = `${NS}format`
25
+ const LANE_BORDER = "#bbb"
26
+
27
+ const basename = (path) => path.replace(/^.*\//, "")
28
+
29
+ // Output file(s) per step type, by the PATHS conventions (name = source name).
30
+ const STEP_OUTPUTS = {
31
+ Clean: (name) => [PATHS.cleaned(name)],
32
+ Map: () => [PATHS.mapped],
33
+ Match: () => [PATHS.matches],
34
+ Merge: () => [PATHS.merged, PATHS.provenance],
35
+ Resolve: () => [PATHS.final],
36
+ }
37
+
38
+ export function loadPipeline(stepTtls, federationTtl) {
39
+ const quads = stepTtls.flatMap((ttl) => ttl ? parseTtl(ttl) : [])
40
+ const fedQuads = federationTtl ? parseTtl(federationTtl) : []
41
+
42
+ // A step is whatever the journals typed p-plan:Step; its display type is
43
+ // the co-declared pipeline-NS class (:Fetch, :Lift, …) — no fixed list.
44
+ const isStep = new Set()
45
+ const nsTypeOf = new Map()
46
+ const rawEdges = []
47
+ const sourceOfStep = new Map()
48
+ const formatBySubject = new Map()
49
+ const retrievalBySubject = new Map()
50
+ for (const q of [...quads, ...fedQuads]) {
51
+ const p = q.predicate.value
52
+ if (p === RDF_TYPE) {
53
+ if (q.object.value === PPLAN_STEP) isStep.add(q.subject.value)
54
+ else if (q.object.value.startsWith(NS)) nsTypeOf.set(q.subject.value, q.object.value.slice(NS.length))
55
+ } else if (p === PPLAN_IS_PRECEDED_BY) rawEdges.push({ from: q.object.value, to: q.subject.value })
56
+ else if (p === FROM_SOURCE) sourceOfStep.set(q.subject.value, q.object.value)
57
+ else if (p === RETRIEVAL) retrievalBySubject.set(q.subject.value, q.object.value)
58
+ else if (p === FORMAT) formatBySubject.set(q.subject.value, q.object.value)
59
+ }
60
+ const stepType = new Map([...isStep].map((iri) => [iri, nsTypeOf.get(iri)]))
61
+
62
+ const fileLabel = (iri) => {
63
+ const src = sourceOfStep.get(iri)
64
+ const outs = (STEP_OUTPUTS[stepType.get(iri)] ?? (() => []))(src && sourceName(src)).map(basename)
65
+ return outs.length ? outs.join("\n") : null
66
+ }
67
+ // A Fetch step emits its source's :format from federation.ttl; a Lift
68
+ // step always emits Turtle (engine invariant, see LIFTED_FORMAT).
69
+ const formatOf = (iri) => ({
70
+ Fetch: formatBySubject.get(sourceOfStep.get(iri) ?? ""),
71
+ Lift: LIFTED_FORMAT,
72
+ })[stepType.get(iri)]
73
+ // Edge label = the format the step emits (its file-type IRI's short label),
74
+ // else its conventional output file(s); nothing hardcoded per source.
75
+ const edgeLabel = (fromIri) => {
76
+ const fmt = formatOf(fromIri)
77
+ return fmt ? formatFamily(fmt) : fileLabel(fromIri)
78
+ }
79
+
80
+ const stepEdges = rawEdges.map((e) => ({ ...e, value: edgeLabel(e.from) ?? undefined, centered: true }))
81
+
82
+ const sourceLabel = new Map()
83
+ for (const q of fedQuads) {
84
+ if (q.predicate.value === RDFS_LABEL) sourceLabel.set(q.subject.value, q.object.value)
85
+ }
86
+
87
+ const stepNodes = [...stepType].map(([iri, type]) => ({ id: iri, label: type.toLowerCase(), type }))
88
+
89
+ const laneNodes = []
90
+ const laneEdges = []
91
+ for (const [iri, type] of stepType) {
92
+ if (type !== "Fetch") continue
93
+ const sourceIri = sourceOfStep.get(iri)
94
+ if (!sourceIri) continue
95
+ const laneId = `lane:${sourceIri}`
96
+ laneNodes.push({
97
+ id: laneId,
98
+ label: sourceLabel.get(sourceIri) ?? localName(sourceIri),
99
+ type: "Source",
100
+ color: "transparent",
101
+ borderColor: LANE_BORDER,
102
+ })
103
+ laneEdges.push({ from: laneId, to: iri, value: retrievalBySubject.get(sourceIri), centered: true })
104
+ }
105
+
106
+ // End sink so resolve's output (final.ttl) is shown on a visible edge.
107
+ const resolveIri = [...stepType].find(([, t]) => t === "Resolve")?.[0]
108
+ const endNodes = []
109
+ const endEdges = []
110
+ if (resolveIri) {
111
+ endNodes.push({ id: "end", label: "end", type: "End", color: "transparent", borderColor: LANE_BORDER })
112
+ endEdges.push({ from: resolveIri, to: "end", value: edgeLabel(resolveIri) ?? undefined, centered: true })
113
+ }
114
+
115
+ // Side input: the Match step consumes the conventional match-knowledge
116
+ // file — a boundary node labelled with the file basename.
117
+ const matchIri = [...stepType].find(([, t]) => t === "Match")?.[0]
118
+ const inputNodes = []
119
+ const inputEdges = []
120
+ if (matchIri) {
121
+ const inId = `input:${PATHS.matchKnowledge}`
122
+ inputNodes.push({ id: inId, label: "input", type: "Input", color: "transparent", borderColor: LANE_BORDER })
123
+ inputEdges.push({ from: inId, to: matchIri, value: basename(PATHS.matchKnowledge), centered: true, sideInput: true })
124
+ }
125
+
126
+ return {
127
+ nodes: [...laneNodes, ...inputNodes, ...stepNodes, ...endNodes],
128
+ edges: [...laneEdges, ...inputEdges, ...stepEdges, ...endEdges],
129
+ }
130
+ }
@@ -0,0 +1,102 @@
1
+ // Helper for the Sources view: aggregate per-:Source facts (label, URL, format,
2
+ // field counts, record count, freshness) across config + pipeline data.
3
+ // Reads: federation, mapped, ingest-log TTL strings passed by Sources.jsx
4
+ // Does: returns source[] ({iri, label, format, totalFields, mappedFields, records, …})
5
+
6
+ import { CDP as NS, formatFamily, parseTtl, PATHS, sourceName, subjectsOfType } from "@directory-builder/core/utils"
7
+
8
+ const PROV_AT_TIME = "http://www.w3.org/ns/prov#atTime"
9
+ const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
10
+
11
+ const setAdd = (map, key, val) => {
12
+ if (!map.has(key)) map.set(key, new Set())
13
+ map.get(key).add(val)
14
+ }
15
+
16
+ export function loadSources(federationTtl, mappedTtl, ingestLogTtl) {
17
+ const fedQuads = parseTtl(federationTtl)
18
+ const mappedQuads = mappedTtl ? parseTtl(mappedTtl) : []
19
+ const logQuads = ingestLogTtl ? parseTtl(ingestLogTtl) : []
20
+
21
+ const sourceIris = subjectsOfType(fedQuads, `${NS}Source`)
22
+
23
+ const props = new Map()
24
+ const get = (iri) => {
25
+ if (!props.has(iri)) props.set(iri, { iri })
26
+ return props.get(iri)
27
+ }
28
+
29
+ // Source-level: label, top-level fields, sub-fields, mappings.
30
+ const topFieldsOf = new Map() // sourceIri -> Set<fieldIri>
31
+ const subFieldsOf = new Map() // fieldIri -> Set<subFieldIri>
32
+ const mappingSource = new Map() // mappingIri -> sourceIri
33
+ const fmsOfMapping = new Map() // mappingIri -> Set<fieldMappingBnode>
34
+ const fromsOfFm = new Map() // bnode -> Set<fieldIri>
35
+
36
+ for (const q of fedQuads) {
37
+ const p = q.predicate.value
38
+ if (p === RDFS_LABEL && sourceIris.has(q.subject.value)) get(q.subject.value).label = q.object.value
39
+ else if (p === `${NS}fetchUrl` && sourceIris.has(q.subject.value))
40
+ get(q.subject.value).fetchUrl = q.object.value
41
+ else if (p === `${NS}format` && sourceIris.has(q.subject.value))
42
+ get(q.subject.value).format = formatFamily(q.object.value)
43
+ else if (p === `${NS}hasField`) setAdd(topFieldsOf, q.subject.value, q.object.value)
44
+ else if (p === `${NS}hasSubField`) setAdd(subFieldsOf, q.subject.value, q.object.value)
45
+ else if (p === `${NS}fromSource`) mappingSource.set(q.subject.value, q.object.value)
46
+ else if (p === `${NS}hasFieldMapping`) setAdd(fmsOfMapping, q.subject.value, q.object.value)
47
+ else if (p === `${NS}from`) setAdd(fromsOfFm, q.subject.value, q.object.value)
48
+ }
49
+
50
+ for (const sourceIri of sourceIris) {
51
+ const top = topFieldsOf.get(sourceIri) ?? new Set()
52
+ const all = new Set(top)
53
+ for (const tf of top) for (const sf of subFieldsOf.get(tf) ?? []) all.add(sf)
54
+ get(sourceIri).totalFields = all.size
55
+
56
+ const mapped = new Set()
57
+ for (const [mappingIri, srcIri] of mappingSource) {
58
+ if (srcIri !== sourceIri) continue
59
+ for (const fm of fmsOfMapping.get(mappingIri) ?? []) {
60
+ for (const f of fromsOfFm.get(fm) ?? []) mapped.add(f)
61
+ }
62
+ }
63
+ get(sourceIri).mappedFields = mapped.size
64
+ }
65
+
66
+ // Static-file sources (no :fetchUrl) read from the conventional static dir.
67
+ for (const sourceIri of sourceIris) {
68
+ if (!get(sourceIri).fetchUrl) get(sourceIri).staticSource = PATHS.staticDir(sourceName(sourceIri))
69
+ }
70
+
71
+ // Records: count distinct orgs in mapped.ttl per source via cdp:fromSource.
72
+ const FROM_SOURCE = `${NS}fromSource`
73
+ const subjectsBySource = new Map()
74
+ for (const q of mappedQuads) {
75
+ if (q.predicate.value === FROM_SOURCE) setAdd(subjectsBySource, q.object.value, q.subject.value)
76
+ }
77
+ for (const sourceIri of sourceIris) {
78
+ get(sourceIri).records = subjectsBySource.get(sourceIri)?.size ?? 0
79
+ }
80
+
81
+ // Latest harvest timestamp per source from ingest-log.ttl. Each :harvested
82
+ // bnode carries (:ofSource ?source, prov:atTime ?time) and, for static-file
83
+ // sources, the files' git commit time (:staticCommittedAt); find the max time.
84
+ const harvestBnode = new Map()
85
+ const harvest = (bnode) => {
86
+ if (!harvestBnode.has(bnode)) harvestBnode.set(bnode, {})
87
+ return harvestBnode.get(bnode)
88
+ }
89
+ for (const q of logQuads) {
90
+ if (q.predicate.value === `${NS}ofSource`) harvest(q.subject.value).source = q.object.value
91
+ else if (q.predicate.value === PROV_AT_TIME) harvest(q.subject.value).time = q.object.value
92
+ else if (q.predicate.value === `${NS}staticCommittedAt`) harvest(q.subject.value).committedAt = q.object.value
93
+ }
94
+ for (const { source, time, committedAt } of harvestBnode.values()) {
95
+ if (!source || !time || !sourceIris.has(source)) continue
96
+ const cur = get(source).lastHarvestedAt
97
+ if (!cur || time > cur) get(source).lastHarvestedAt = time
98
+ if (committedAt) get(source).staticCommittedAt = committedAt
99
+ }
100
+
101
+ return [...sourceIris].map((iri) => get(iri))
102
+ }
@@ -0,0 +1,9 @@
1
+ // Browser entry point: mounts the React app into the page.
2
+ // Reads: App.jsx
3
+ // Does: renders <App> into the #root element
4
+
5
+ import { createRoot } from "react-dom/client"
6
+ import App from "./App.jsx"
7
+ import React from "react"
8
+
9
+ createRoot(document.getElementById("root")).render(<App />)
@@ -0,0 +1,15 @@
1
+ // Builds the org lists for the Merge and Directory views, in one shared order.
2
+ // Reads: data/pipeline/{merged,provenance,final}.ttl, config/federation.ttl (via loadMerge.js)
3
+ // Does: exports mergedOrgs and finalOrgs (consumed by MergeTables, Directory)
4
+
5
+ import { loadMerge } from "./loadMerge.js"
6
+ import { isConflict } from "./OrgCard.jsx"
7
+ import { federationTtl, provenanceTtl as provTtl, mergedTtl, finalTtl } from "./instanceData.js"
8
+
9
+ const conflictCount = (org) => org.fields.reduce((n, f) => n + (isConflict(f) ? 1 : 0), 0)
10
+
11
+ // Merge view sorts by conflict count desc; the directory mirrors that order
12
+ // so the same org sits in the same visual slot across pages.
13
+ export const mergedOrgs = loadMerge(mergedTtl, provTtl, federationTtl).sort((a, b) => conflictCount(b) - conflictCount(a) || a.iri.localeCompare(b.iri))
14
+ const orderIndex = new Map(mergedOrgs.map((o, i) => [o.iri, i]))
15
+ export const finalOrgs = loadMerge(finalTtl, "", federationTtl).sort((a, b) => (orderIndex.get(a.iri) ?? Infinity) - (orderIndex.get(b.iri) ?? Infinity))
@@ -0,0 +1,81 @@
1
+ // Source identity lives in config: federation.ttl declares each :Source (label,
2
+ // skos:notation, order); its cleaned-data file follows from the source name by
3
+ // the PATHS conventions. JS never hardcodes a source name — it resolves records
4
+ // to a :Source via cdp:fromSource.
5
+ // Reads: TTL strings passed in (federation, mapped, ingest-log)
6
+ // Does: returns lookup maps + helpers (used by loadMerge, OrgCard, MapGraph, MatchGraph)
7
+
8
+ import { CDP as NS, parseTtl, PATHS, sourceName } from "@directory-builder/core/utils"
9
+
10
+ const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
11
+ const SKOS_NOTATION = "http://www.w3.org/2004/02/skos/core#notation"
12
+ const PROV_AT_TIME = "http://www.w3.org/ns/prov#atTime"
13
+ const HAS_SOURCE = `${NS}hasSource`
14
+ const FROM_SOURCE = `${NS}fromSource`
15
+ const OF_SOURCE = `${NS}ofSource`
16
+
17
+ // Map<SourceIRI, {iri, label, notation, order}> from federation.ttl; order
18
+ // follows the :hasSource list. Assumes each :Source has a label and notation.
19
+ export function loadSourceMeta(federationTtl) {
20
+ const order = new Map()
21
+ const labelOf = new Map()
22
+ const notationOf = new Map()
23
+ let n = 0
24
+ for (const q of parseTtl(federationTtl)) {
25
+ const p = q.predicate.value
26
+ if (p === HAS_SOURCE && !order.has(q.object.value)) order.set(q.object.value, n++)
27
+ else if (p === RDFS_LABEL) labelOf.set(q.subject.value, q.object.value)
28
+ else if (p === SKOS_NOTATION) notationOf.set(q.subject.value, q.object.value)
29
+ }
30
+ const meta = new Map()
31
+ for (const iri of order.keys()) {
32
+ meta.set(iri, { iri, label: labelOf.get(iri), notation: notationOf.get(iri), order: order.get(iri) })
33
+ }
34
+ return meta
35
+ }
36
+
37
+ // Order two Source IRIs by their federation declaration order, then IRI.
38
+ export function compareSources(a, b, meta) {
39
+ const oa = meta.get(a).order
40
+ const ob = meta.get(b).order
41
+ return oa !== ob ? oa - ob : a.localeCompare(b)
42
+ }
43
+
44
+ // Map<recordIri, SourceIRI> from plain cdp:fromSource triples (mapped.ttl).
45
+ export function loadSourceOfRecord(ttl) {
46
+ const out = new Map()
47
+ for (const q of parseTtl(ttl)) if (q.predicate.value === FROM_SOURCE) out.set(q.subject.value, q.object.value)
48
+ return out
49
+ }
50
+
51
+ // Map<SourceIRI, latest ISO timestamp> from the ingest log's harvest entries.
52
+ export function loadHarvestBySource(logTtl) {
53
+ const source = new Map()
54
+ const time = new Map()
55
+ for (const q of parseTtl(logTtl)) {
56
+ if (q.predicate.value === OF_SOURCE) source.set(q.subject.value, q.object.value)
57
+ else if (q.predicate.value === PROV_AT_TIME) time.set(q.subject.value, q.object.value)
58
+ }
59
+ const out = new Map()
60
+ for (const [bnode, src] of source) {
61
+ const t = time.get(bnode)
62
+ if (t && (!out.has(src) || t > out.get(src))) out.set(src, t)
63
+ }
64
+ return out
65
+ }
66
+
67
+ // Map<SourceIRI, cleaned-TTL raw string> for every source a :Mapping draws
68
+ // from (:fromSource); the file is the conventional cleaned path's basename.
69
+ // `rawByPath` comes from import.meta.glob(".../cleaned/*.ttl", ...).
70
+ export function loadCleanedBySource(federationTtl, rawByPath) {
71
+ const basename = (p) => p.split("/").pop()
72
+ const rawByBase = new Map(Object.entries(rawByPath).map(([path, raw]) => [basename(path), raw]))
73
+
74
+ const out = new Map()
75
+ for (const q of parseTtl(federationTtl)) {
76
+ if (q.predicate.value !== FROM_SOURCE) continue
77
+ const raw = rawByBase.get(basename(PATHS.cleaned(sourceName(q.object.value))))
78
+ if (raw) out.set(q.object.value, raw)
79
+ }
80
+ return out
81
+ }