@directory-builder/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +112 -0
- package/bin/cli.js +38 -0
- package/example/README.md +64 -0
- package/example/config/federation.ttl +136 -0
- package/example/config/match-knowledge.ttl +8 -0
- package/example/sources/cityopen/clean.sparql +17 -0
- package/example/sources/cityopen/fetch.js +14 -0
- package/example/sources/cityopen/static/libraries.json +32 -0
- package/example/sources/civichub/clean.sparql +34 -0
- package/example/sources/civichub/fetch.js +14 -0
- package/example/sources/civichub/static/libraries.json +38 -0
- package/package.json +38 -0
- package/src/federate.js +571 -0
- package/src/index.js +6 -0
- package/src/ingest.js +158 -0
- package/src/lift/html.sparql +12 -0
- package/src/lift/json.sparql +12 -0
- package/src/pipeline.js +16 -0
- package/src/utils.js +152 -0
- package/src/webapp.js +41 -0
- package/webapp/index.html +11 -0
- package/webapp/src/About.jsx +24 -0
- package/webapp/src/App.jsx +96 -0
- package/webapp/src/Card.jsx +32 -0
- package/webapp/src/ColumnGraph.jsx +290 -0
- package/webapp/src/Directory.jsx +15 -0
- package/webapp/src/Download.jsx +174 -0
- package/webapp/src/MapGraph.jsx +244 -0
- package/webapp/src/MatchGraph.jsx +137 -0
- package/webapp/src/MergeTables.jsx +61 -0
- package/webapp/src/OrgCard.jsx +126 -0
- package/webapp/src/Pipeline.jsx +41 -0
- package/webapp/src/Query.jsx +165 -0
- package/webapp/src/Sources.jsx +52 -0
- package/webapp/src/instanceData.js +35 -0
- package/webapp/src/loadMap.js +276 -0
- package/webapp/src/loadMatch.js +228 -0
- package/webapp/src/loadMerge.js +93 -0
- package/webapp/src/loadPipeline.js +130 -0
- package/webapp/src/loadSources.js +102 -0
- package/webapp/src/main.jsx +9 -0
- package/webapp/src/mergeOrgs.js +15 -0
- package/webapp/src/sourceMeta.js +81 -0
- package/webapp/src/styles.css +23 -0
- package/webapp/vite.config.js +14 -0
- package/webapp/vite.js +28 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
// Match "lanes" view, derived entirely from federation.ttl — no assumptions about
|
|
2
|
+
// how many entity types there are or how they relate. One lane per :TargetSchema
|
|
3
|
+
// (ordered by the relationship hierarchy, roots left), each preceded by a tinted
|
|
4
|
+
// "source duplications" column; cross-lane edges come from every :hasRelationship.
|
|
5
|
+
// When the relationships form a tree the layout groups each subtree vertically
|
|
6
|
+
// (parent centred on its children); otherwise it just lays out gracefully.
|
|
7
|
+
// Reads: federation.ttl (schemas, classes, labels, relationships),
|
|
8
|
+
// matches.ttl (clusters + hasMember), merged.ttl (rdf:type, name, links)
|
|
9
|
+
// Does: returns everything <ColumnGraph> needs + a per-lane nodeY layout.
|
|
10
|
+
|
|
11
|
+
import { CDP as NS, localName, parseTtl, prefixesOf, shrink, subjectsOfType } from "@directory-builder/core/utils"
|
|
12
|
+
|
|
13
|
+
const CDF = "https://civic-data.de/federated-directory#"
|
|
14
|
+
const S = "http://schema.org/"
|
|
15
|
+
const RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
|
16
|
+
const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
17
|
+
const HAS_TARGET_SCHEMA = `${NS}hasTargetSchema`
|
|
18
|
+
const TARGET_CLASS = `${NS}targetClass`
|
|
19
|
+
const TO_TARGET = `${NS}toTarget`
|
|
20
|
+
const HAS_RELATIONSHIP = `${NS}hasRelationship`
|
|
21
|
+
const TO_TARGET_SCHEMA = `${NS}toTargetSchema`
|
|
22
|
+
const TO_TARGET_FIELD = `${NS}toTargetField`
|
|
23
|
+
const TARGET_PREDICATE = `${NS}targetPredicate`
|
|
24
|
+
const MATCH_CLUSTER = `${NS}MatchCluster`
|
|
25
|
+
const HAS_MEMBER = `${NS}hasMember`
|
|
26
|
+
const NAME = `${S}name`
|
|
27
|
+
const CATEGORY = `${S}category` // label fallback for entities with no name (e.g. AWO services)
|
|
28
|
+
|
|
29
|
+
// Lane colours, assigned by hierarchy position; cycles if there are more lanes.
|
|
30
|
+
const PALETTE = ["#cdddff", "#f7d2e3", "#cfe9d4", "#ffe2b8", "#e3d4f7", "#cfeef0", "#f3d9c0"]
|
|
31
|
+
const SRC_COLOR = "#e9e9ee"
|
|
32
|
+
const GAP = 84 // vertical spacing between single-member leaves
|
|
33
|
+
const SRC_GAP = 56 // spacing of a cluster's stacked source members
|
|
34
|
+
const NODE_H = 48 // approx node height — keeps source stacks from colliding
|
|
35
|
+
const MARGIN = GAP - NODE_H // inter-cluster gap; keeps single-member spacing == GAP
|
|
36
|
+
|
|
37
|
+
const cap = (s) => s.charAt(0).toUpperCase() + s.slice(1)
|
|
38
|
+
// Mix a hex colour toward white by t∈[0,1] — the pale entity-column band tint.
|
|
39
|
+
const lighten = (hex, t) => {
|
|
40
|
+
const n = parseInt(hex.slice(1), 16)
|
|
41
|
+
const c = (sh) => { const v = (n >> sh) & 255; return Math.round(v + (255 - v) * t) }
|
|
42
|
+
return `#${((c(16) << 16) | (c(8) << 8) | c(0)).toString(16).padStart(6, "0")}`
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ---- federation.ttl → schema model -------------------------------------
|
|
46
|
+
|
|
47
|
+
function readSchemas(federationTtl) {
|
|
48
|
+
const q = parseTtl(federationTtl)
|
|
49
|
+
const classPrefixes = prefixesOf(federationTtl)
|
|
50
|
+
const order = [] // schema IRIs in document order
|
|
51
|
+
const targetClass = new Map()
|
|
52
|
+
const label = new Map() // any subject → its rdfs:label
|
|
53
|
+
const toTarget = new Map() // mapping → its :toTarget schema
|
|
54
|
+
const relMapping = new Map() // rel bnode → its mapping
|
|
55
|
+
const relToSchema = new Map() // rel bnode → :toTargetSchema
|
|
56
|
+
const relToField = new Map() // rel bnode → :toTargetField
|
|
57
|
+
const fieldPred = new Map() // target field → :targetPredicate
|
|
58
|
+
for (const { subject: s, predicate: p, object: o } of q) {
|
|
59
|
+
switch (p.value) {
|
|
60
|
+
case HAS_TARGET_SCHEMA: order.push(o.value); break
|
|
61
|
+
case TARGET_CLASS: targetClass.set(s.value, o.value); break
|
|
62
|
+
case RDFS_LABEL: if (!label.has(s.value)) label.set(s.value, o.value); break
|
|
63
|
+
case TO_TARGET: toTarget.set(s.value, o.value); break
|
|
64
|
+
case HAS_RELATIONSHIP: relMapping.set(o.value, s.value); break
|
|
65
|
+
case TO_TARGET_SCHEMA: relToSchema.set(s.value, o.value); break
|
|
66
|
+
case TO_TARGET_FIELD: relToField.set(s.value, o.value); break
|
|
67
|
+
case TARGET_PREDICATE: fieldPred.set(s.value, o.value); break
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Schema-level relationships: from = the mapping's :toTarget, to = :toTargetSchema,
|
|
72
|
+
// predicate = the target field's :targetPredicate. Drives both the cross-lane
|
|
73
|
+
// edges (by predicate) and the lane ordering (by the from→to graph).
|
|
74
|
+
const relPreds = new Set()
|
|
75
|
+
const out = new Map() // schema → Set(schema it points at)
|
|
76
|
+
for (const [rel, mapping] of relMapping) {
|
|
77
|
+
const from = toTarget.get(mapping), to = relToSchema.get(rel)
|
|
78
|
+
const pred = fieldPred.get(relToField.get(rel))
|
|
79
|
+
if (!from || !to || !pred) continue
|
|
80
|
+
relPreds.add(pred)
|
|
81
|
+
if (!out.has(from)) out.set(from, new Set())
|
|
82
|
+
out.get(from).add(to)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Lane order: a schema sits left of anything that relates to it (a parent is
|
|
86
|
+
// left of its children). level = longest chain of out-edges; roots (sinks) = 0.
|
|
87
|
+
const docIdx = new Map(order.map((s, i) => [s, i]))
|
|
88
|
+
const memo = new Map()
|
|
89
|
+
const levelOf = (s, stack = new Set()) => {
|
|
90
|
+
if (memo.has(s)) return memo.get(s)
|
|
91
|
+
if (stack.has(s)) return 0 // cycle guard
|
|
92
|
+
stack.add(s)
|
|
93
|
+
let lvl = 0
|
|
94
|
+
for (const t of out.get(s) ?? []) lvl = Math.max(lvl, 1 + levelOf(t, stack))
|
|
95
|
+
stack.delete(s)
|
|
96
|
+
memo.set(s, lvl)
|
|
97
|
+
return lvl
|
|
98
|
+
}
|
|
99
|
+
const ordered = [...order].sort((a, b) => levelOf(a) - levelOf(b) || docIdx.get(a) - docIdx.get(b))
|
|
100
|
+
|
|
101
|
+
const lanes = ordered.map((schema, i) => {
|
|
102
|
+
const cls = targetClass.get(schema)
|
|
103
|
+
const name = label.get(schema) ?? (cls && label.get(cls)) ?? cap(localName(schema).replace(/Schema$/, ""))
|
|
104
|
+
return {
|
|
105
|
+
schema, cls,
|
|
106
|
+
key: localName(schema).replace(/Schema$/, ""),
|
|
107
|
+
label: name,
|
|
108
|
+
title: `${name}\n${cls ? shrink(cls, classPrefixes) : ""}`,
|
|
109
|
+
color: PALETTE[i % PALETTE.length],
|
|
110
|
+
}
|
|
111
|
+
})
|
|
112
|
+
return { lanes, relPreds }
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// ---- main ---------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
export function loadMatch(federationTtl, matchesTtl, mergedTtl, { showDuplications = false, show1to1 = false } = {}) {
|
|
118
|
+
const { lanes, relPreds } = readSchemas(federationTtl)
|
|
119
|
+
const keyOfClass = new Map(lanes.filter((l) => l.cls).map((l) => [l.cls, l.key]))
|
|
120
|
+
const laneIdx = new Map(lanes.map((l, i) => [l.key, i]))
|
|
121
|
+
|
|
122
|
+
const columns = lanes.flatMap((l) => [`${l.key}Src`, l.key])
|
|
123
|
+
const colors = {}, columnTitles = {}, columnBands = {}, columnHeaderStyle = {}
|
|
124
|
+
for (const l of lanes) {
|
|
125
|
+
colors[l.key] = l.color; colors[`${l.key}Src`] = SRC_COLOR
|
|
126
|
+
columnTitles[l.key] = l.title; columnTitles[`${l.key}Src`] = "source duplications"
|
|
127
|
+
columnBands[l.key] = lighten(l.color, 0.6) // entity column gets a brighter tint of its nodes
|
|
128
|
+
columnHeaderStyle[`${l.key}Src`] = { fontSize: 10, color: "#aaa" } // de-emphasise the source-column labels
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const merged = parseTtl(mergedTtl)
|
|
132
|
+
const tierOf = new Map() // entity → lane key, via its rdf:type
|
|
133
|
+
const nameOf = new Map(), catOf = new Map()
|
|
134
|
+
for (const q of merged) {
|
|
135
|
+
if (q.predicate.value === RDF_TYPE && keyOfClass.has(q.object.value)) tierOf.set(q.subject.value, keyOfClass.get(q.object.value))
|
|
136
|
+
else if (q.predicate.value === NAME && !nameOf.has(q.subject.value)) nameOf.set(q.subject.value, q.object.value)
|
|
137
|
+
else if (q.predicate.value === CATEGORY && !catOf.has(q.subject.value)) catOf.set(q.subject.value, q.object.value)
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const quads = parseTtl(matchesTtl)
|
|
141
|
+
const clusters = subjectsOfType(quads, MATCH_CLUSTER)
|
|
142
|
+
const members = new Map()
|
|
143
|
+
for (const q of quads) if (q.predicate.value === HAS_MEMBER) {
|
|
144
|
+
if (!members.has(q.subject.value)) members.set(q.subject.value, [])
|
|
145
|
+
members.get(q.subject.value).push(q.object.value)
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const nodes = []
|
|
149
|
+
const edges = []
|
|
150
|
+
const nodeIds = new Set()
|
|
151
|
+
for (const c of clusters) {
|
|
152
|
+
const tier = tierOf.get(c)
|
|
153
|
+
if (!tier) continue
|
|
154
|
+
nodes.push({ id: c, type: tier, label: nameOf.get(c) ?? catOf.get(c) ?? localName(c), isCluster: true })
|
|
155
|
+
nodeIds.add(c)
|
|
156
|
+
const ms = members.get(c) ?? []
|
|
157
|
+
if (!showDuplications || (!show1to1 && ms.length <= 1)) continue // master off → no source cols; hide 1:1 unless "show 1:1"
|
|
158
|
+
for (const src of ms) {
|
|
159
|
+
nodes.push({ id: src, type: `${tier}Src`, label: localName(src) })
|
|
160
|
+
nodeIds.add(src)
|
|
161
|
+
edges.push({ from: src, to: c }) // dedup (hasMember) edge
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Cross-lane links: any merged triple whose predicate is a declared relationship
|
|
166
|
+
// and whose ends are both placed entities. Stored object→subject so the parent
|
|
167
|
+
// (object) sits left of the child (subject) and edges flow toward the root.
|
|
168
|
+
for (const q of merged) {
|
|
169
|
+
if (relPreds.has(q.predicate.value) && q.object.termType === "NamedNode"
|
|
170
|
+
&& tierOf.has(q.subject.value) && tierOf.has(q.object.value)) {
|
|
171
|
+
edges.push({ from: q.object.value, to: q.subject.value, rel: true })
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return { nodes, edges, members, lanes, columns, colors, columnTitles, columnBands, columnHeaderStyle,
|
|
176
|
+
nodeY: layout(nodes, edges, members, nodeIds, laneIdx) }
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Tidy-tree vertical layout: place leaves on a running cursor, centre each parent
|
|
180
|
+
// on its children. Roots (entities with no parent) are ordered by lane, so the
|
|
181
|
+
// upper lanes' subtrees group at the top. Graceful on non-tree graphs: a node is
|
|
182
|
+
// placed once (first visit), so multiple parents / cycles can't loop or duplicate.
|
|
183
|
+
function layout(nodes, edges, members, nodeIds, laneIdx) {
|
|
184
|
+
const childrenOf = new Map()
|
|
185
|
+
const hasParent = new Set()
|
|
186
|
+
for (const e of edges) if (e.rel) {
|
|
187
|
+
if (!childrenOf.has(e.from)) childrenOf.set(e.from, [])
|
|
188
|
+
childrenOf.get(e.from).push(e.to)
|
|
189
|
+
hasParent.add(e.to)
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const y = new Map()
|
|
193
|
+
let cursor = 0
|
|
194
|
+
const mean = (a) => a.reduce((s, v) => s + v, 0) / a.length
|
|
195
|
+
const place = (id) => {
|
|
196
|
+
if (y.has(id)) return
|
|
197
|
+
const kids = childrenOf.get(id) ?? []
|
|
198
|
+
if (kids.length) { kids.forEach(place); y.set(id, mean(kids.map((k) => y.get(k)))) }
|
|
199
|
+
else { y.set(id, cursor); cursor += GAP }
|
|
200
|
+
}
|
|
201
|
+
const clusters = nodes.filter((n) => n.isCluster)
|
|
202
|
+
clusters.filter((n) => !hasParent.has(n.id)).sort((a, b) => laneIdx.get(a.type) - laneIdx.get(b.type)).forEach((n) => place(n.id))
|
|
203
|
+
clusters.filter((n) => !y.has(n.id)).forEach((n) => place(n.id)) // safety net
|
|
204
|
+
|
|
205
|
+
// When source columns are shown, push clusters apart within each lane so a
|
|
206
|
+
// cluster's stacked source members never collide with its neighbours'.
|
|
207
|
+
const stackHalf = (c) => (((members.get(c) ?? []).filter((m) => nodeIds.has(m)).length || 1) - 1) * SRC_GAP / 2 + NODE_H / 2
|
|
208
|
+
if (nodes.some((n) => !n.isCluster)) {
|
|
209
|
+
const byLane = new Map()
|
|
210
|
+
for (const n of clusters) { if (!byLane.has(n.type)) byLane.set(n.type, []); byLane.get(n.type).push(n.id) }
|
|
211
|
+
for (const ids of byLane.values()) {
|
|
212
|
+
ids.sort((a, b) => y.get(a) - y.get(b))
|
|
213
|
+
let prevBottom = -Infinity
|
|
214
|
+
for (const id of ids) {
|
|
215
|
+
const h = stackHalf(id)
|
|
216
|
+
const cy = Math.max(y.get(id), prevBottom + MARGIN + h)
|
|
217
|
+
y.set(id, cy); prevBottom = cy + h
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
for (const [c, ms] of members) {
|
|
223
|
+
if (!y.has(c)) continue
|
|
224
|
+
const shown = ms.filter((m) => nodeIds.has(m))
|
|
225
|
+
shown.forEach((m, i) => y.set(m, y.get(c) + (i - (shown.length - 1) / 2) * SRC_GAP))
|
|
226
|
+
}
|
|
227
|
+
return y
|
|
228
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
// Parses merged + provenance TTL into org objects: each field's values and the
|
|
2
|
+
// :Source(s) that contributed them, ordered by config. Pure (ttl in → data out).
|
|
3
|
+
// Reads: TTL strings passed by mergeOrgs.js; resolves sources via sourceMeta.js
|
|
4
|
+
// Does: returns org[] (each {iri, label, type, fields[], sources[]})
|
|
5
|
+
|
|
6
|
+
import { CDP as NS, parseTtl, parseTtlStar, prefixesOf, shrink } from "@directory-builder/core/utils"
|
|
7
|
+
import { compareSources, loadSourceMeta } from "./sourceMeta.js"
|
|
8
|
+
|
|
9
|
+
const PROV_DERIVED_FROM = "http://www.w3.org/ns/prov#wasDerivedFrom"
|
|
10
|
+
const RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
|
11
|
+
const RDF_REIFIES = "http://www.w3.org/1999/02/22-rdf-syntax-ns#reifies"
|
|
12
|
+
const FROM_SOURCE = `${NS}fromSource`
|
|
13
|
+
|
|
14
|
+
export function loadMerge(mergedTtl, provTtl, federationTtl = "") {
|
|
15
|
+
// IRIs render shortened against the federation's own @prefix declarations.
|
|
16
|
+
const prefixes = { cdp: NS, ...prefixesOf(federationTtl) }
|
|
17
|
+
const prefixedIri = (iri) => shrink(iri, prefixes)
|
|
18
|
+
const mergedQuads = parseTtl(mergedTtl)
|
|
19
|
+
const provQuads = parseTtlStar(provTtl)
|
|
20
|
+
const sourceMeta = federationTtl ? loadSourceMeta(federationTtl) : new Map()
|
|
21
|
+
|
|
22
|
+
// Each prov:wasDerivedFrom in provenance.ttl annotates a merged triple
|
|
23
|
+
// `<<s p o>>` with the source record IRI it came from. n3.js exposes the
|
|
24
|
+
// quoted-triple subject either directly as a Quad term, or via an
|
|
25
|
+
// auto-generated reifier bnode + rdf:reifies triple — accept both shapes.
|
|
26
|
+
const reifies = new Map()
|
|
27
|
+
for (const q of provQuads) {
|
|
28
|
+
if (q.predicate.value === RDF_REIFIES && q.object.termType === "Quad") reifies.set(q.subject.value, q.object)
|
|
29
|
+
}
|
|
30
|
+
const annotations = []
|
|
31
|
+
for (const q of provQuads) {
|
|
32
|
+
if (q.predicate.value !== PROV_DERIVED_FROM) continue
|
|
33
|
+
const t = q.subject.termType === "Quad" ? q.subject : reifies.get(q.subject.value)
|
|
34
|
+
if (t) annotations.push({ s: t.subject.value, p: t.predicate.value, o: t.object.value, rec: q.object.value })
|
|
35
|
+
}
|
|
36
|
+
// Resolve each record to its :Source via cdp:fromSource (reified in
|
|
37
|
+
// provenance) so downstream code deals only in Source IRIs, not record IRIs.
|
|
38
|
+
const sourceOfRecord = new Map()
|
|
39
|
+
for (const { p, o, rec } of annotations) if (p === FROM_SOURCE) sourceOfRecord.set(rec, o)
|
|
40
|
+
const toSources = (records) => [...new Set([...records].map((r) => sourceOfRecord.get(r)))]
|
|
41
|
+
|
|
42
|
+
const provIndex = new Map()
|
|
43
|
+
const tripleKey = (s, p, o) => `${s}\t${p}\t${o}`
|
|
44
|
+
for (const { s, p, o, rec } of annotations) {
|
|
45
|
+
const key = tripleKey(s, p, o)
|
|
46
|
+
if (!provIndex.has(key)) provIndex.set(key, new Set())
|
|
47
|
+
provIndex.get(key).add(rec)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Walk merged.ttl in parse order so card order = pipeline order.
|
|
51
|
+
const orgs = []
|
|
52
|
+
const orgIndex = new Map()
|
|
53
|
+
const fieldIndexByOrg = new Map()
|
|
54
|
+
for (const q of mergedQuads) {
|
|
55
|
+
const orgIri = q.subject.value
|
|
56
|
+
const predIri = q.predicate.value
|
|
57
|
+
const value = q.object.value
|
|
58
|
+
|
|
59
|
+
if (!orgIndex.has(orgIri)) {
|
|
60
|
+
orgIndex.set(orgIri, orgs.length)
|
|
61
|
+
fieldIndexByOrg.set(orgIri, new Map())
|
|
62
|
+
orgs.push({ iri: orgIri, label: prefixedIri(orgIri), fields: [] })
|
|
63
|
+
}
|
|
64
|
+
const org = orgs[orgIndex.get(orgIri)]
|
|
65
|
+
const fieldIndex = fieldIndexByOrg.get(orgIri)
|
|
66
|
+
|
|
67
|
+
// rdf:type carries the entity class — surface it in the card header
|
|
68
|
+
// (see OrgCard), not as a field row.
|
|
69
|
+
if (predIri === RDF_TYPE) { org.type = prefixedIri(value); continue }
|
|
70
|
+
|
|
71
|
+
if (!fieldIndex.has(predIri)) {
|
|
72
|
+
fieldIndex.set(predIri, org.fields.length)
|
|
73
|
+
org.fields.push({ predicate: predIri, predLabel: prefixedIri(predIri), values: [] })
|
|
74
|
+
}
|
|
75
|
+
const field = org.fields[fieldIndex.get(predIri)]
|
|
76
|
+
const records = [...(provIndex.get(tripleKey(orgIri, predIri, value)) ?? [])]
|
|
77
|
+
const sources = toSources(records)
|
|
78
|
+
const displayValue = q.object.termType === "NamedNode" ? prefixedIri(value) : value
|
|
79
|
+
field.values.push({ value: displayValue, raw: value, sources, records })
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Per-field: sort values by source-count desc so the most-supported one is index 0.
|
|
83
|
+
// Per-org: one column per contributing record (two records from the same source
|
|
84
|
+
// get two columns), ordered by source then record IRI.
|
|
85
|
+
for (const org of orgs) {
|
|
86
|
+
for (const f of org.fields) f.values.sort((a, b) => b.sources.length - a.sources.length)
|
|
87
|
+
const all = new Set()
|
|
88
|
+
for (const f of org.fields) for (const v of f.values) for (const r of v.records) all.add(r)
|
|
89
|
+
org.columns = [...all].map((r) => ({ record: r, source: sourceOfRecord.get(r) }))
|
|
90
|
+
.sort((a, b) => compareSources(a.source, b.source, sourceMeta) || a.record.localeCompare(b.record))
|
|
91
|
+
}
|
|
92
|
+
return orgs
|
|
93
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
// Helper for the Pipeline view: turn the engines' step journals into a graph.
|
|
2
|
+
// Reads: the step-journal TTL strings (ingest-log.ttl + federate-log.ttl —
|
|
3
|
+
// evidence of what actually ran) and the federation TTL, passed by
|
|
4
|
+
// Pipeline.jsx
|
|
5
|
+
// Does: returns { nodes, edges } — Source lane-header nodes (transparent
|
|
6
|
+
// fill, light-gray border) above each Fetch step, step nodes labelled
|
|
7
|
+
// by their type (fetch/lift/clean/map/match/merge/resolve), and an
|
|
8
|
+
// End sink so resolve's output is shown on a visible edge, plus a
|
|
9
|
+
// boundary node feeding the Match step with the conventional
|
|
10
|
+
// match-knowledge file. Edge labels come from federation.ttl —
|
|
11
|
+
// a source's :format (uppercased) and :retrieval — or from the
|
|
12
|
+
// conventions: Lift emits Turtle (LIFTED_FORMAT), other steps their
|
|
13
|
+
// output file(s) per PATHS, resolved per source for Clean steps.
|
|
14
|
+
// Multiple outputs (merge's provenance) stack as newlines.
|
|
15
|
+
|
|
16
|
+
import { CDP as NS, formatFamily, LIFTED_FORMAT, localName, parseTtl, PATHS, sourceName } from "@directory-builder/core/utils"
|
|
17
|
+
|
|
18
|
+
const PPLAN_STEP = "http://purl.org/net/p-plan#Step"
|
|
19
|
+
const PPLAN_IS_PRECEDED_BY = "http://purl.org/net/p-plan#isPrecededBy"
|
|
20
|
+
const RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
|
21
|
+
const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
22
|
+
const FROM_SOURCE = `${NS}fromSource`
|
|
23
|
+
const RETRIEVAL = `${NS}retrieval`
|
|
24
|
+
const FORMAT = `${NS}format`
|
|
25
|
+
const LANE_BORDER = "#bbb"
|
|
26
|
+
|
|
27
|
+
const basename = (path) => path.replace(/^.*\//, "")
|
|
28
|
+
|
|
29
|
+
// Output file(s) per step type, by the PATHS conventions (name = source name).
|
|
30
|
+
const STEP_OUTPUTS = {
|
|
31
|
+
Clean: (name) => [PATHS.cleaned(name)],
|
|
32
|
+
Map: () => [PATHS.mapped],
|
|
33
|
+
Match: () => [PATHS.matches],
|
|
34
|
+
Merge: () => [PATHS.merged, PATHS.provenance],
|
|
35
|
+
Resolve: () => [PATHS.final],
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function loadPipeline(stepTtls, federationTtl) {
|
|
39
|
+
const quads = stepTtls.flatMap((ttl) => ttl ? parseTtl(ttl) : [])
|
|
40
|
+
const fedQuads = federationTtl ? parseTtl(federationTtl) : []
|
|
41
|
+
|
|
42
|
+
// A step is whatever the journals typed p-plan:Step; its display type is
|
|
43
|
+
// the co-declared pipeline-NS class (:Fetch, :Lift, …) — no fixed list.
|
|
44
|
+
const isStep = new Set()
|
|
45
|
+
const nsTypeOf = new Map()
|
|
46
|
+
const rawEdges = []
|
|
47
|
+
const sourceOfStep = new Map()
|
|
48
|
+
const formatBySubject = new Map()
|
|
49
|
+
const retrievalBySubject = new Map()
|
|
50
|
+
for (const q of [...quads, ...fedQuads]) {
|
|
51
|
+
const p = q.predicate.value
|
|
52
|
+
if (p === RDF_TYPE) {
|
|
53
|
+
if (q.object.value === PPLAN_STEP) isStep.add(q.subject.value)
|
|
54
|
+
else if (q.object.value.startsWith(NS)) nsTypeOf.set(q.subject.value, q.object.value.slice(NS.length))
|
|
55
|
+
} else if (p === PPLAN_IS_PRECEDED_BY) rawEdges.push({ from: q.object.value, to: q.subject.value })
|
|
56
|
+
else if (p === FROM_SOURCE) sourceOfStep.set(q.subject.value, q.object.value)
|
|
57
|
+
else if (p === RETRIEVAL) retrievalBySubject.set(q.subject.value, q.object.value)
|
|
58
|
+
else if (p === FORMAT) formatBySubject.set(q.subject.value, q.object.value)
|
|
59
|
+
}
|
|
60
|
+
const stepType = new Map([...isStep].map((iri) => [iri, nsTypeOf.get(iri)]))
|
|
61
|
+
|
|
62
|
+
const fileLabel = (iri) => {
|
|
63
|
+
const src = sourceOfStep.get(iri)
|
|
64
|
+
const outs = (STEP_OUTPUTS[stepType.get(iri)] ?? (() => []))(src && sourceName(src)).map(basename)
|
|
65
|
+
return outs.length ? outs.join("\n") : null
|
|
66
|
+
}
|
|
67
|
+
// A Fetch step emits its source's :format from federation.ttl; a Lift
|
|
68
|
+
// step always emits Turtle (engine invariant, see LIFTED_FORMAT).
|
|
69
|
+
const formatOf = (iri) => ({
|
|
70
|
+
Fetch: formatBySubject.get(sourceOfStep.get(iri) ?? ""),
|
|
71
|
+
Lift: LIFTED_FORMAT,
|
|
72
|
+
})[stepType.get(iri)]
|
|
73
|
+
// Edge label = the format the step emits (its file-type IRI's short label),
|
|
74
|
+
// else its conventional output file(s); nothing hardcoded per source.
|
|
75
|
+
const edgeLabel = (fromIri) => {
|
|
76
|
+
const fmt = formatOf(fromIri)
|
|
77
|
+
return fmt ? formatFamily(fmt) : fileLabel(fromIri)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const stepEdges = rawEdges.map((e) => ({ ...e, value: edgeLabel(e.from) ?? undefined, centered: true }))
|
|
81
|
+
|
|
82
|
+
const sourceLabel = new Map()
|
|
83
|
+
for (const q of fedQuads) {
|
|
84
|
+
if (q.predicate.value === RDFS_LABEL) sourceLabel.set(q.subject.value, q.object.value)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const stepNodes = [...stepType].map(([iri, type]) => ({ id: iri, label: type.toLowerCase(), type }))
|
|
88
|
+
|
|
89
|
+
const laneNodes = []
|
|
90
|
+
const laneEdges = []
|
|
91
|
+
for (const [iri, type] of stepType) {
|
|
92
|
+
if (type !== "Fetch") continue
|
|
93
|
+
const sourceIri = sourceOfStep.get(iri)
|
|
94
|
+
if (!sourceIri) continue
|
|
95
|
+
const laneId = `lane:${sourceIri}`
|
|
96
|
+
laneNodes.push({
|
|
97
|
+
id: laneId,
|
|
98
|
+
label: sourceLabel.get(sourceIri) ?? localName(sourceIri),
|
|
99
|
+
type: "Source",
|
|
100
|
+
color: "transparent",
|
|
101
|
+
borderColor: LANE_BORDER,
|
|
102
|
+
})
|
|
103
|
+
laneEdges.push({ from: laneId, to: iri, value: retrievalBySubject.get(sourceIri), centered: true })
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// End sink so resolve's output (final.ttl) is shown on a visible edge.
|
|
107
|
+
const resolveIri = [...stepType].find(([, t]) => t === "Resolve")?.[0]
|
|
108
|
+
const endNodes = []
|
|
109
|
+
const endEdges = []
|
|
110
|
+
if (resolveIri) {
|
|
111
|
+
endNodes.push({ id: "end", label: "end", type: "End", color: "transparent", borderColor: LANE_BORDER })
|
|
112
|
+
endEdges.push({ from: resolveIri, to: "end", value: edgeLabel(resolveIri) ?? undefined, centered: true })
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Side input: the Match step consumes the conventional match-knowledge
|
|
116
|
+
// file — a boundary node labelled with the file basename.
|
|
117
|
+
const matchIri = [...stepType].find(([, t]) => t === "Match")?.[0]
|
|
118
|
+
const inputNodes = []
|
|
119
|
+
const inputEdges = []
|
|
120
|
+
if (matchIri) {
|
|
121
|
+
const inId = `input:${PATHS.matchKnowledge}`
|
|
122
|
+
inputNodes.push({ id: inId, label: "input", type: "Input", color: "transparent", borderColor: LANE_BORDER })
|
|
123
|
+
inputEdges.push({ from: inId, to: matchIri, value: basename(PATHS.matchKnowledge), centered: true, sideInput: true })
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
nodes: [...laneNodes, ...inputNodes, ...stepNodes, ...endNodes],
|
|
128
|
+
edges: [...laneEdges, ...inputEdges, ...stepEdges, ...endEdges],
|
|
129
|
+
}
|
|
130
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// Helper for the Sources view: aggregate per-:Source facts (label, URL, format,
|
|
2
|
+
// field counts, record count, freshness) across config + pipeline data.
|
|
3
|
+
// Reads: federation, mapped, ingest-log TTL strings passed by Sources.jsx
|
|
4
|
+
// Does: returns source[] ({iri, label, format, totalFields, mappedFields, records, …})
|
|
5
|
+
|
|
6
|
+
import { CDP as NS, formatFamily, parseTtl, PATHS, sourceName, subjectsOfType } from "@directory-builder/core/utils"
|
|
7
|
+
|
|
8
|
+
const PROV_AT_TIME = "http://www.w3.org/ns/prov#atTime"
|
|
9
|
+
const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
10
|
+
|
|
11
|
+
const setAdd = (map, key, val) => {
|
|
12
|
+
if (!map.has(key)) map.set(key, new Set())
|
|
13
|
+
map.get(key).add(val)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function loadSources(federationTtl, mappedTtl, ingestLogTtl) {
|
|
17
|
+
const fedQuads = parseTtl(federationTtl)
|
|
18
|
+
const mappedQuads = mappedTtl ? parseTtl(mappedTtl) : []
|
|
19
|
+
const logQuads = ingestLogTtl ? parseTtl(ingestLogTtl) : []
|
|
20
|
+
|
|
21
|
+
const sourceIris = subjectsOfType(fedQuads, `${NS}Source`)
|
|
22
|
+
|
|
23
|
+
const props = new Map()
|
|
24
|
+
const get = (iri) => {
|
|
25
|
+
if (!props.has(iri)) props.set(iri, { iri })
|
|
26
|
+
return props.get(iri)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Source-level: label, top-level fields, sub-fields, mappings.
|
|
30
|
+
const topFieldsOf = new Map() // sourceIri -> Set<fieldIri>
|
|
31
|
+
const subFieldsOf = new Map() // fieldIri -> Set<subFieldIri>
|
|
32
|
+
const mappingSource = new Map() // mappingIri -> sourceIri
|
|
33
|
+
const fmsOfMapping = new Map() // mappingIri -> Set<fieldMappingBnode>
|
|
34
|
+
const fromsOfFm = new Map() // bnode -> Set<fieldIri>
|
|
35
|
+
|
|
36
|
+
for (const q of fedQuads) {
|
|
37
|
+
const p = q.predicate.value
|
|
38
|
+
if (p === RDFS_LABEL && sourceIris.has(q.subject.value)) get(q.subject.value).label = q.object.value
|
|
39
|
+
else if (p === `${NS}fetchUrl` && sourceIris.has(q.subject.value))
|
|
40
|
+
get(q.subject.value).fetchUrl = q.object.value
|
|
41
|
+
else if (p === `${NS}format` && sourceIris.has(q.subject.value))
|
|
42
|
+
get(q.subject.value).format = formatFamily(q.object.value)
|
|
43
|
+
else if (p === `${NS}hasField`) setAdd(topFieldsOf, q.subject.value, q.object.value)
|
|
44
|
+
else if (p === `${NS}hasSubField`) setAdd(subFieldsOf, q.subject.value, q.object.value)
|
|
45
|
+
else if (p === `${NS}fromSource`) mappingSource.set(q.subject.value, q.object.value)
|
|
46
|
+
else if (p === `${NS}hasFieldMapping`) setAdd(fmsOfMapping, q.subject.value, q.object.value)
|
|
47
|
+
else if (p === `${NS}from`) setAdd(fromsOfFm, q.subject.value, q.object.value)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
for (const sourceIri of sourceIris) {
|
|
51
|
+
const top = topFieldsOf.get(sourceIri) ?? new Set()
|
|
52
|
+
const all = new Set(top)
|
|
53
|
+
for (const tf of top) for (const sf of subFieldsOf.get(tf) ?? []) all.add(sf)
|
|
54
|
+
get(sourceIri).totalFields = all.size
|
|
55
|
+
|
|
56
|
+
const mapped = new Set()
|
|
57
|
+
for (const [mappingIri, srcIri] of mappingSource) {
|
|
58
|
+
if (srcIri !== sourceIri) continue
|
|
59
|
+
for (const fm of fmsOfMapping.get(mappingIri) ?? []) {
|
|
60
|
+
for (const f of fromsOfFm.get(fm) ?? []) mapped.add(f)
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
get(sourceIri).mappedFields = mapped.size
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Static-file sources (no :fetchUrl) read from the conventional static dir.
|
|
67
|
+
for (const sourceIri of sourceIris) {
|
|
68
|
+
if (!get(sourceIri).fetchUrl) get(sourceIri).staticSource = PATHS.staticDir(sourceName(sourceIri))
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Records: count distinct orgs in mapped.ttl per source via cdp:fromSource.
|
|
72
|
+
const FROM_SOURCE = `${NS}fromSource`
|
|
73
|
+
const subjectsBySource = new Map()
|
|
74
|
+
for (const q of mappedQuads) {
|
|
75
|
+
if (q.predicate.value === FROM_SOURCE) setAdd(subjectsBySource, q.object.value, q.subject.value)
|
|
76
|
+
}
|
|
77
|
+
for (const sourceIri of sourceIris) {
|
|
78
|
+
get(sourceIri).records = subjectsBySource.get(sourceIri)?.size ?? 0
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Latest harvest timestamp per source from ingest-log.ttl. Each :harvested
|
|
82
|
+
// bnode carries (:ofSource ?source, prov:atTime ?time) and, for static-file
|
|
83
|
+
// sources, the files' git commit time (:staticCommittedAt); find the max time.
|
|
84
|
+
const harvestBnode = new Map()
|
|
85
|
+
const harvest = (bnode) => {
|
|
86
|
+
if (!harvestBnode.has(bnode)) harvestBnode.set(bnode, {})
|
|
87
|
+
return harvestBnode.get(bnode)
|
|
88
|
+
}
|
|
89
|
+
for (const q of logQuads) {
|
|
90
|
+
if (q.predicate.value === `${NS}ofSource`) harvest(q.subject.value).source = q.object.value
|
|
91
|
+
else if (q.predicate.value === PROV_AT_TIME) harvest(q.subject.value).time = q.object.value
|
|
92
|
+
else if (q.predicate.value === `${NS}staticCommittedAt`) harvest(q.subject.value).committedAt = q.object.value
|
|
93
|
+
}
|
|
94
|
+
for (const { source, time, committedAt } of harvestBnode.values()) {
|
|
95
|
+
if (!source || !time || !sourceIris.has(source)) continue
|
|
96
|
+
const cur = get(source).lastHarvestedAt
|
|
97
|
+
if (!cur || time > cur) get(source).lastHarvestedAt = time
|
|
98
|
+
if (committedAt) get(source).staticCommittedAt = committedAt
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return [...sourceIris].map((iri) => get(iri))
|
|
102
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
// Browser entry point: mounts the React app into the page.
|
|
2
|
+
// Reads: App.jsx
|
|
3
|
+
// Does: renders <App> into the #root element
|
|
4
|
+
|
|
5
|
+
import { createRoot } from "react-dom/client"
|
|
6
|
+
import App from "./App.jsx"
|
|
7
|
+
import React from "react"
|
|
8
|
+
|
|
9
|
+
createRoot(document.getElementById("root")).render(<App />)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// Builds the org lists for the Merge and Directory views, in one shared order.
|
|
2
|
+
// Reads: data/pipeline/{merged,provenance,final}.ttl, config/federation.ttl (via loadMerge.js)
|
|
3
|
+
// Does: exports mergedOrgs and finalOrgs (consumed by MergeTables, Directory)
|
|
4
|
+
|
|
5
|
+
import { loadMerge } from "./loadMerge.js"
|
|
6
|
+
import { isConflict } from "./OrgCard.jsx"
|
|
7
|
+
import { federationTtl, provenanceTtl as provTtl, mergedTtl, finalTtl } from "./instanceData.js"
|
|
8
|
+
|
|
9
|
+
const conflictCount = (org) => org.fields.reduce((n, f) => n + (isConflict(f) ? 1 : 0), 0)
|
|
10
|
+
|
|
11
|
+
// Merge view sorts by conflict count desc; the directory mirrors that order
|
|
12
|
+
// so the same org sits in the same visual slot across pages.
|
|
13
|
+
export const mergedOrgs = loadMerge(mergedTtl, provTtl, federationTtl).sort((a, b) => conflictCount(b) - conflictCount(a) || a.iri.localeCompare(b.iri))
|
|
14
|
+
const orderIndex = new Map(mergedOrgs.map((o, i) => [o.iri, i]))
|
|
15
|
+
export const finalOrgs = loadMerge(finalTtl, "", federationTtl).sort((a, b) => (orderIndex.get(a.iri) ?? Infinity) - (orderIndex.get(b.iri) ?? Infinity))
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
// Source identity lives in config: federation.ttl declares each :Source (label,
|
|
2
|
+
// skos:notation, order); its cleaned-data file follows from the source name by
|
|
3
|
+
// the PATHS conventions. JS never hardcodes a source name — it resolves records
|
|
4
|
+
// to a :Source via cdp:fromSource.
|
|
5
|
+
// Reads: TTL strings passed in (federation, mapped, ingest-log)
|
|
6
|
+
// Does: returns lookup maps + helpers (used by loadMerge, OrgCard, MapGraph, MatchGraph)
|
|
7
|
+
|
|
8
|
+
import { CDP as NS, parseTtl, PATHS, sourceName } from "@directory-builder/core/utils"
|
|
9
|
+
|
|
10
|
+
const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
11
|
+
const SKOS_NOTATION = "http://www.w3.org/2004/02/skos/core#notation"
|
|
12
|
+
const PROV_AT_TIME = "http://www.w3.org/ns/prov#atTime"
|
|
13
|
+
const HAS_SOURCE = `${NS}hasSource`
|
|
14
|
+
const FROM_SOURCE = `${NS}fromSource`
|
|
15
|
+
const OF_SOURCE = `${NS}ofSource`
|
|
16
|
+
|
|
17
|
+
// Map<SourceIRI, {iri, label, notation, order}> from federation.ttl; order
|
|
18
|
+
// follows the :hasSource list. Assumes each :Source has a label and notation.
|
|
19
|
+
export function loadSourceMeta(federationTtl) {
|
|
20
|
+
const order = new Map()
|
|
21
|
+
const labelOf = new Map()
|
|
22
|
+
const notationOf = new Map()
|
|
23
|
+
let n = 0
|
|
24
|
+
for (const q of parseTtl(federationTtl)) {
|
|
25
|
+
const p = q.predicate.value
|
|
26
|
+
if (p === HAS_SOURCE && !order.has(q.object.value)) order.set(q.object.value, n++)
|
|
27
|
+
else if (p === RDFS_LABEL) labelOf.set(q.subject.value, q.object.value)
|
|
28
|
+
else if (p === SKOS_NOTATION) notationOf.set(q.subject.value, q.object.value)
|
|
29
|
+
}
|
|
30
|
+
const meta = new Map()
|
|
31
|
+
for (const iri of order.keys()) {
|
|
32
|
+
meta.set(iri, { iri, label: labelOf.get(iri), notation: notationOf.get(iri), order: order.get(iri) })
|
|
33
|
+
}
|
|
34
|
+
return meta
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Order two Source IRIs by their federation declaration order, then IRI.
|
|
38
|
+
export function compareSources(a, b, meta) {
|
|
39
|
+
const oa = meta.get(a).order
|
|
40
|
+
const ob = meta.get(b).order
|
|
41
|
+
return oa !== ob ? oa - ob : a.localeCompare(b)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Map<recordIri, SourceIRI> from plain cdp:fromSource triples (mapped.ttl).
|
|
45
|
+
export function loadSourceOfRecord(ttl) {
|
|
46
|
+
const out = new Map()
|
|
47
|
+
for (const q of parseTtl(ttl)) if (q.predicate.value === FROM_SOURCE) out.set(q.subject.value, q.object.value)
|
|
48
|
+
return out
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Map<SourceIRI, latest ISO timestamp> from the ingest log's harvest entries.
|
|
52
|
+
export function loadHarvestBySource(logTtl) {
|
|
53
|
+
const source = new Map()
|
|
54
|
+
const time = new Map()
|
|
55
|
+
for (const q of parseTtl(logTtl)) {
|
|
56
|
+
if (q.predicate.value === OF_SOURCE) source.set(q.subject.value, q.object.value)
|
|
57
|
+
else if (q.predicate.value === PROV_AT_TIME) time.set(q.subject.value, q.object.value)
|
|
58
|
+
}
|
|
59
|
+
const out = new Map()
|
|
60
|
+
for (const [bnode, src] of source) {
|
|
61
|
+
const t = time.get(bnode)
|
|
62
|
+
if (t && (!out.has(src) || t > out.get(src))) out.set(src, t)
|
|
63
|
+
}
|
|
64
|
+
return out
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Map<SourceIRI, cleaned-TTL raw string> for every source a :Mapping draws
|
|
68
|
+
// from (:fromSource); the file is the conventional cleaned path's basename.
|
|
69
|
+
// `rawByPath` comes from import.meta.glob(".../cleaned/*.ttl", ...).
|
|
70
|
+
export function loadCleanedBySource(federationTtl, rawByPath) {
|
|
71
|
+
const basename = (p) => p.split("/").pop()
|
|
72
|
+
const rawByBase = new Map(Object.entries(rawByPath).map(([path, raw]) => [basename(path), raw]))
|
|
73
|
+
|
|
74
|
+
const out = new Map()
|
|
75
|
+
for (const q of parseTtl(federationTtl)) {
|
|
76
|
+
if (q.predicate.value !== FROM_SOURCE) continue
|
|
77
|
+
const raw = rawByBase.get(basename(PATHS.cleaned(sourceName(q.object.value))))
|
|
78
|
+
if (raw) out.set(q.object.value, raw)
|
|
79
|
+
}
|
|
80
|
+
return out
|
|
81
|
+
}
|