@directory-builder/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +112 -0
- package/bin/cli.js +38 -0
- package/example/README.md +64 -0
- package/example/config/federation.ttl +136 -0
- package/example/config/match-knowledge.ttl +8 -0
- package/example/sources/cityopen/clean.sparql +17 -0
- package/example/sources/cityopen/fetch.js +14 -0
- package/example/sources/cityopen/static/libraries.json +32 -0
- package/example/sources/civichub/clean.sparql +34 -0
- package/example/sources/civichub/fetch.js +14 -0
- package/example/sources/civichub/static/libraries.json +38 -0
- package/package.json +38 -0
- package/src/federate.js +571 -0
- package/src/index.js +6 -0
- package/src/ingest.js +158 -0
- package/src/lift/html.sparql +12 -0
- package/src/lift/json.sparql +12 -0
- package/src/pipeline.js +16 -0
- package/src/utils.js +152 -0
- package/src/webapp.js +41 -0
- package/webapp/index.html +11 -0
- package/webapp/src/About.jsx +24 -0
- package/webapp/src/App.jsx +96 -0
- package/webapp/src/Card.jsx +32 -0
- package/webapp/src/ColumnGraph.jsx +290 -0
- package/webapp/src/Directory.jsx +15 -0
- package/webapp/src/Download.jsx +174 -0
- package/webapp/src/MapGraph.jsx +244 -0
- package/webapp/src/MatchGraph.jsx +137 -0
- package/webapp/src/MergeTables.jsx +61 -0
- package/webapp/src/OrgCard.jsx +126 -0
- package/webapp/src/Pipeline.jsx +41 -0
- package/webapp/src/Query.jsx +165 -0
- package/webapp/src/Sources.jsx +52 -0
- package/webapp/src/instanceData.js +35 -0
- package/webapp/src/loadMap.js +276 -0
- package/webapp/src/loadMatch.js +228 -0
- package/webapp/src/loadMerge.js +93 -0
- package/webapp/src/loadPipeline.js +130 -0
- package/webapp/src/loadSources.js +102 -0
- package/webapp/src/main.jsx +9 -0
- package/webapp/src/mergeOrgs.js +15 -0
- package/webapp/src/sourceMeta.js +81 -0
- package/webapp/src/styles.css +23 -0
- package/webapp/vite.config.js +14 -0
- package/webapp/vite.js +28 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
// Map view: the source-schema → target-schema mapping graph, optionally animated
|
|
2
|
+
// with one org's field values flowing through the transform nodes.
|
|
3
|
+
// Reads: config/federation.ttl, data/pipeline/mapped.ttl,
|
|
4
|
+
// data/pipeline/cleaned/*.ttl (via loadMap.js + sourceMeta.js)
|
|
5
|
+
// Does: renders the Map page (horizontal <ColumnGraph>)
|
|
6
|
+
|
|
7
|
+
import { federationTtl as ttl, mappedTtl, cleanedByPath } from "./instanceData.js"
|
|
8
|
+
import { loadMap, loadSources, loadOrgsBySource, loadFieldValuesByOrg } from "./loadMap.js"
|
|
9
|
+
import React, { useEffect, useMemo, useRef, useState } from "react"
|
|
10
|
+
import { loadCleanedBySource } from "./sourceMeta.js"
|
|
11
|
+
import { SkipBack, SkipForward } from "lucide-react"
|
|
12
|
+
import ColumnGraph from "./ColumnGraph.jsx"
|
|
13
|
+
|
|
14
|
+
const COLUMNS = ["Source", "SourceField", "TransformNode", "TargetField", "TargetSchema"]
|
|
15
|
+
const COLORS = {
|
|
16
|
+
Source: "#d4e7ff",
|
|
17
|
+
SourceField: "#e6f3d8",
|
|
18
|
+
TransformNode: "#fff1a8",
|
|
19
|
+
TargetField: "#fde2c7",
|
|
20
|
+
TargetSchema: "#f4cfe0",
|
|
21
|
+
}
|
|
22
|
+
// Lighter tints than the node fills so labels read as belonging to the same
|
|
23
|
+
// column/moment without competing for attention against the nodes themselves.
|
|
24
|
+
const VALUE_LABEL_BG = {
|
|
25
|
+
SourceField: "#f0f8e0",
|
|
26
|
+
TransformNode: "#fff8c8",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const SOURCES = loadSources(ttl)
|
|
30
|
+
const ORGS_BY_SOURCE = loadOrgsBySource(ttl, mappedTtl)
|
|
31
|
+
// Source-to-file mapping is resolved from config: instanceData enumerates the
|
|
32
|
+
// cleaned TTLs from :hasSource, so a new source needs no edit here.
|
|
33
|
+
const FIELD_VALUES = loadFieldValuesByOrg(ttl, mappedTtl, loadCleanedBySource(ttl, cleanedByPath))
|
|
34
|
+
|
|
35
|
+
function SourcesDropdown({ visible, onChange }) {
|
|
36
|
+
const [open, setOpen] = useState(false)
|
|
37
|
+
const ref = useRef(null)
|
|
38
|
+
|
|
39
|
+
useEffect(() => {
|
|
40
|
+
if (!open) return
|
|
41
|
+
const onDown = (e) => { if (!ref.current?.contains(e.target)) setOpen(false) }
|
|
42
|
+
document.addEventListener("mousedown", onDown)
|
|
43
|
+
return () => document.removeEventListener("mousedown", onDown)
|
|
44
|
+
}, [open])
|
|
45
|
+
|
|
46
|
+
const summary = visible.size === SOURCES.length
|
|
47
|
+
? "All sources"
|
|
48
|
+
: visible.size === 0
|
|
49
|
+
? "No sources"
|
|
50
|
+
: `${visible.size} of ${SOURCES.length} sources`
|
|
51
|
+
|
|
52
|
+
const toggle = (iri) => {
|
|
53
|
+
const next = new Set(visible)
|
|
54
|
+
if (next.has(iri)) next.delete(iri); else next.add(iri)
|
|
55
|
+
onChange(next)
|
|
56
|
+
}
|
|
57
|
+
const setAll = (on) => onChange(on ? new Set(SOURCES.map(s => s.iri)) : new Set())
|
|
58
|
+
|
|
59
|
+
const linkBtn = { background: "none", border: "none", color: "#06c", cursor: "pointer", padding: 0, fontSize: 12 }
|
|
60
|
+
|
|
61
|
+
return (
|
|
62
|
+
<div ref={ref} style={{ position: "relative", display: "inline-block" }}>
|
|
63
|
+
<button onClick={() => setOpen(!open)} style={{ padding: "0.25rem 0.6rem", border: "1px solid #aaa", borderRadius: 4, background: "white", cursor: "pointer", fontSize: 13 }}>
|
|
64
|
+
{summary} ▾
|
|
65
|
+
</button>
|
|
66
|
+
{open && (
|
|
67
|
+
<div style={{ position: "absolute", top: "calc(100% + 4px)", left: 0, zIndex: 10, background: "white", border: "1px solid #aaa", borderRadius: 4, padding: 6, minWidth: 200, boxShadow: "0 2px 6px rgba(0,0,0,0.12)" }}>
|
|
68
|
+
<div style={{ display: "flex", gap: 12, paddingBottom: 4, marginBottom: 4, borderBottom: "1px solid #eee" }}>
|
|
69
|
+
<button onClick={() => setAll(true)} style={linkBtn}>Select all</button>
|
|
70
|
+
<button onClick={() => setAll(false)} style={linkBtn}>Unselect all</button>
|
|
71
|
+
</div>
|
|
72
|
+
{SOURCES.map(s => (
|
|
73
|
+
<label key={s.iri} style={{ display: "flex", alignItems: "center", gap: 6, padding: "2px 0" }}>
|
|
74
|
+
<input type="checkbox" checked={visible.has(s.iri)} onChange={() => toggle(s.iri)} />
|
|
75
|
+
{s.label}
|
|
76
|
+
</label>
|
|
77
|
+
))}
|
|
78
|
+
</div>
|
|
79
|
+
)}
|
|
80
|
+
</div>
|
|
81
|
+
)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function OrgCombobox({ orgs, value, onChange, disabled }) {
|
|
85
|
+
const [open, setOpen] = useState(false)
|
|
86
|
+
const [filter, setFilter] = useState("")
|
|
87
|
+
const ref = useRef(null)
|
|
88
|
+
|
|
89
|
+
useEffect(() => {
|
|
90
|
+
if (!open) return
|
|
91
|
+
const onDown = (e) => { if (!ref.current?.contains(e.target)) setOpen(false) }
|
|
92
|
+
document.addEventListener("mousedown", onDown)
|
|
93
|
+
return () => document.removeEventListener("mousedown", onDown)
|
|
94
|
+
}, [open])
|
|
95
|
+
|
|
96
|
+
const selected = orgs.find(o => o.iri === value)
|
|
97
|
+
const f = filter.toLowerCase()
|
|
98
|
+
const filtered = f ? orgs.filter(o => o.id.toLowerCase().includes(f) || o.name.toLowerCase().includes(f)) : orgs
|
|
99
|
+
|
|
100
|
+
return (
|
|
101
|
+
<div ref={ref} style={{ position: "relative" }}>
|
|
102
|
+
<input
|
|
103
|
+
type="text"
|
|
104
|
+
disabled={disabled}
|
|
105
|
+
value={open ? filter : (selected?.name || selected?.id || "")}
|
|
106
|
+
placeholder={disabled ? "" : "Pick organisation…"}
|
|
107
|
+
onChange={(e) => { setFilter(e.target.value); if (!open) setOpen(true) }}
|
|
108
|
+
onFocus={() => { setFilter(""); setOpen(true) }}
|
|
109
|
+
style={{
|
|
110
|
+
padding: "0.25rem 0.5rem",
|
|
111
|
+
border: "1px solid #aaa",
|
|
112
|
+
borderRadius: 4,
|
|
113
|
+
fontSize: 13,
|
|
114
|
+
width: 250,
|
|
115
|
+
background: disabled ? "#f4f4f4" : "white",
|
|
116
|
+
color: disabled ? "#bbb" : "#000",
|
|
117
|
+
}}
|
|
118
|
+
/>
|
|
119
|
+
{open && filtered.length > 0 && (
|
|
120
|
+
<div style={{ position: "absolute", top: "calc(100% + 4px)", left: 0, zIndex: 10, background: "white", border: "1px solid #aaa", borderRadius: 4, maxHeight: 280, overflowY: "auto", minWidth: "100%", boxShadow: "0 2px 6px rgba(0,0,0,0.12)" }}>
|
|
121
|
+
{filtered.slice(0, 200).map(o => (
|
|
122
|
+
<div
|
|
123
|
+
key={o.iri}
|
|
124
|
+
onClick={() => { onChange(o.iri); setOpen(false); setFilter("") }}
|
|
125
|
+
title={o.name}
|
|
126
|
+
style={{ padding: "4px 8px", cursor: "pointer", borderBottom: "1px solid #eee" }}
|
|
127
|
+
>
|
|
128
|
+
<div style={{ fontSize: 13, whiteSpace: "nowrap", overflow: "hidden", textOverflow: "ellipsis", maxWidth: 320 }}>{o.name || <span style={{ color: "#999" }}>(no name)</span>}</div>
|
|
129
|
+
<div style={{ fontFamily: "monospace", fontSize: 11, color: "#666" }}>{o.id}</div>
|
|
130
|
+
</div>
|
|
131
|
+
))}
|
|
132
|
+
</div>
|
|
133
|
+
)}
|
|
134
|
+
</div>
|
|
135
|
+
)
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export default function MapGraph() {
|
|
139
|
+
const [visible, setVisible] = useState(() => new Set(SOURCES.map(s => s.iri)))
|
|
140
|
+
const [selectedOrg, setSelectedOrg] = useState(null)
|
|
141
|
+
const [dataFlow, setDataFlow] = useState(false)
|
|
142
|
+
const [showUnmapped, setShowUnmapped] = useState(false)
|
|
143
|
+
const [showAllTargets, setShowAllTargets] = useState(false)
|
|
144
|
+
const [showDirectFlows, setShowDirectFlows] = useState(false)
|
|
145
|
+
|
|
146
|
+
const { nodes, edges: rawEdges } = useMemo(() => {
|
|
147
|
+
const hiddenSources = new Set(SOURCES.filter(s => !visible.has(s.iri)).map(s => s.iri))
|
|
148
|
+
return loadMap(ttl, { hiddenSources, hideUnmappedFields: !showUnmapped, hideUnmappedTargetFields: !showAllTargets })
|
|
149
|
+
}, [visible, showUnmapped, showAllTargets])
|
|
150
|
+
|
|
151
|
+
const oneActive = visible.size === 1
|
|
152
|
+
const enabled = dataFlow && oneActive
|
|
153
|
+
const valueByField = enabled && selectedOrg ? FIELD_VALUES.get(selectedOrg) : null
|
|
154
|
+
const edges = useMemo(() => {
|
|
155
|
+
if (!valueByField) return rawEdges
|
|
156
|
+
const typeOf = new Map(nodes.map(n => [n.id, n.type]))
|
|
157
|
+
return rawEdges.map(e => {
|
|
158
|
+
// Source-field outgoing: source literal. Transform outgoing: the
|
|
159
|
+
// post-transform target field value (the value that lands in `to`).
|
|
160
|
+
// The label tints with the from-node's column color so labels read
|
|
161
|
+
// as belonging to the same "moment" in the transformation.
|
|
162
|
+
// Direct (no-:via) source-field → target-field edges are gated
|
|
163
|
+
// behind the "Also show 1:1 flows" toggle.
|
|
164
|
+
if (e.direct && !showDirectFlows) return e
|
|
165
|
+
const fromType = typeOf.get(e.from)
|
|
166
|
+
const v = fromType === "TransformNode" ? valueByField.get(e.to)
|
|
167
|
+
: fromType === "SourceField" ? valueByField.get(e.from)
|
|
168
|
+
: undefined
|
|
169
|
+
return v ? { ...e, value: v, valueBg: VALUE_LABEL_BG[fromType] } : e
|
|
170
|
+
})
|
|
171
|
+
}, [rawEdges, nodes, valueByField, showDirectFlows])
|
|
172
|
+
|
|
173
|
+
// Remount when the visible node set changes (sources or unmapped-fields
|
|
174
|
+
// toggle). Org / data-flow changes only update edge labels in place.
|
|
175
|
+
const graphKey = useMemo(() => `${[...visible].sort().join("|")}::${showUnmapped ? "all" : "mapped"}::${showAllTargets ? "allT" : "mappedT"}`, [visible, showUnmapped, showAllTargets])
|
|
176
|
+
|
|
177
|
+
const activeSource = oneActive ? [...visible][0] : null
|
|
178
|
+
const orgs = activeSource ? (ORGS_BY_SOURCE.get(activeSource) ?? []) : []
|
|
179
|
+
|
|
180
|
+
useEffect(() => {
|
|
181
|
+
if (orgs.length > 0) {
|
|
182
|
+
if (!orgs.find(o => o.iri === selectedOrg)) setSelectedOrg(orgs[0].iri)
|
|
183
|
+
} else if (selectedOrg !== null) {
|
|
184
|
+
setSelectedOrg(null)
|
|
185
|
+
}
|
|
186
|
+
}, [orgs])
|
|
187
|
+
|
|
188
|
+
useEffect(() => {
|
|
189
|
+
if (!oneActive && dataFlow) setDataFlow(false)
|
|
190
|
+
}, [oneActive])
|
|
191
|
+
|
|
192
|
+
const cycle = (delta) => {
|
|
193
|
+
if (orgs.length === 0) return
|
|
194
|
+
const idx = orgs.findIndex(o => o.iri === selectedOrg)
|
|
195
|
+
const next = ((idx < 0 ? 0 : idx + delta) + orgs.length) % orgs.length
|
|
196
|
+
setSelectedOrg(orgs[next].iri)
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const disabledHint = !dataFlow
|
|
200
|
+
? "Enable Show data flow to use these controls"
|
|
201
|
+
: "Active only when exactly one source is selected"
|
|
202
|
+
const iconBtnStyle = {
|
|
203
|
+
display: "inline-flex",
|
|
204
|
+
alignItems: "center",
|
|
205
|
+
background: "none",
|
|
206
|
+
border: "1px solid #aaa",
|
|
207
|
+
borderRadius: 4,
|
|
208
|
+
padding: "0.25rem 0.5rem",
|
|
209
|
+
cursor: enabled ? "pointer" : "not-allowed",
|
|
210
|
+
color: enabled ? "#000" : "#bbb",
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return (
|
|
214
|
+
<div style={{ display: "flex", flexDirection: "column", height: "100%" }}>
|
|
215
|
+
<div style={{ display: "flex", alignItems: "center", gap: "0.75rem", padding: "0.5rem 1rem", fontSize: 13, borderBottom: "1px solid #ddd" }}>
|
|
216
|
+
<SourcesDropdown visible={visible} onChange={setVisible} />
|
|
217
|
+
<label style={{ display: "inline-flex", alignItems: "center", gap: "0.3rem" }}>
|
|
218
|
+
<input type="checkbox" checked={showUnmapped} onChange={(e) => setShowUnmapped(e.target.checked)} />
|
|
219
|
+
Show all source fields
|
|
220
|
+
</label>
|
|
221
|
+
<label style={{ display: "inline-flex", alignItems: "center", gap: "0.3rem" }}>
|
|
222
|
+
<input type="checkbox" checked={showAllTargets} onChange={(e) => setShowAllTargets(e.target.checked)} />
|
|
223
|
+
Show all target fields
|
|
224
|
+
</label>
|
|
225
|
+
<label style={{ display: "inline-flex", alignItems: "center", gap: "0.3rem", color: oneActive ? "#000" : "#bbb", cursor: oneActive ? "pointer" : "not-allowed" }} title={oneActive ? "" : "Active only when exactly one source is selected"}>
|
|
226
|
+
<input type="checkbox" disabled={!oneActive} checked={dataFlow} onChange={(e) => setDataFlow(e.target.checked)} />
|
|
227
|
+
Show data flow
|
|
228
|
+
</label>
|
|
229
|
+
<label style={{ display: "inline-flex", alignItems: "center", gap: "0.3rem", color: enabled ? "#000" : "#bbb", cursor: enabled ? "pointer" : "not-allowed" }} title={enabled ? "" : "Enable Show data flow first"}>
|
|
230
|
+
<input type="checkbox" disabled={!enabled} checked={showDirectFlows} onChange={(e) => setShowDirectFlows(e.target.checked)} />
|
|
231
|
+
Also show 1:1 flows
|
|
232
|
+
</label>
|
|
233
|
+
<div style={{ display: "flex", alignItems: "center", gap: "0.25rem" }}>
|
|
234
|
+
<button disabled={!enabled} onClick={() => cycle(-1)} title={enabled ? "Previous" : disabledHint} style={iconBtnStyle}><SkipBack size={13} fill="currentColor" /></button>
|
|
235
|
+
<OrgCombobox orgs={orgs} value={selectedOrg} onChange={setSelectedOrg} disabled={!enabled} />
|
|
236
|
+
<button disabled={!enabled} onClick={() => cycle(1)} title={enabled ? "Next" : disabledHint} style={iconBtnStyle}><SkipForward size={13} fill="currentColor" /></button>
|
|
237
|
+
</div>
|
|
238
|
+
</div>
|
|
239
|
+
<div style={{ flex: 1, minHeight: 0 }}>
|
|
240
|
+
<ColumnGraph key={graphKey} nodes={nodes} edges={edges} columns={COLUMNS} colors={COLORS} />
|
|
241
|
+
</div>
|
|
242
|
+
</div>
|
|
243
|
+
)
|
|
244
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
// Match view: one lane per target schema, each preceded by a tinted "source
|
|
2
|
+
// duplications" column (which source records merged onto the entity), plus the
|
|
3
|
+
// cross-lane :hasRelationship edges between merged entities. All structure — lanes,
|
|
4
|
+
// order, colours, titles, relationships — is derived from federation.ttl inside
|
|
5
|
+
// loadMatch.js; this file only adds node text labels, the stats line and the modal.
|
|
6
|
+
// Reads: data/pipeline/{matches,merged,mapped}.ttl, config/{federation,match-knowledge}.ttl
|
|
7
|
+
// Does: renders the Match page (<ColumnGraph> + per-cluster details modal)
|
|
8
|
+
|
|
9
|
+
import { displayPrefixes, federationTtl, matchKnowledgeTtl, mappedTtl, matchesTtl, mergedTtl } from "./instanceData.js"
|
|
10
|
+
import { loadSourceMeta, loadSourceOfRecord } from "./sourceMeta.js"
|
|
11
|
+
import { CDP, groupBySubject, parseTtl, shrink } from "@directory-builder/core/utils"
|
|
12
|
+
import React, { useMemo, useState } from "react"
|
|
13
|
+
import ColumnGraph from "./ColumnGraph.jsx"
|
|
14
|
+
import { loadMatch } from "./loadMatch.js"
|
|
15
|
+
|
|
16
|
+
const SCHEMA_IDENTIFIER = "http://schema.org/identifier"
|
|
17
|
+
const CDF_NS = "https://civic-data.de/federated-directory#"
|
|
18
|
+
const HARD_CRITERION = `${CDP}hasHardCriterion`
|
|
19
|
+
const WEIGHTED_CRITERION = `${CDP}hasWeightedCriterion`
|
|
20
|
+
const ON = `${CDP}on`
|
|
21
|
+
const OWL_SAME_AS = "http://www.w3.org/2002/07/owl#sameAs"
|
|
22
|
+
|
|
23
|
+
const prefixed = (iri) => shrink(iri, displayPrefixes)
|
|
24
|
+
|
|
25
|
+
// Label each source member with its :Source notation, resolved via cdp:fromSource.
|
|
26
|
+
const sourceMeta = loadSourceMeta(federationTtl)
|
|
27
|
+
const sourceOfRecord = loadSourceOfRecord(mappedTtl)
|
|
28
|
+
const sourceCode = (iri) => { const s = sourceOfRecord.get(iri); return (s && sourceMeta.get(s)?.notation) || "?" }
|
|
29
|
+
|
|
30
|
+
const criteriaPredicates = (() => {
|
|
31
|
+
const quads = parseTtl(federationTtl)
|
|
32
|
+
const bnodes = new Set()
|
|
33
|
+
for (const q of quads) if (q.predicate.value === HARD_CRITERION || q.predicate.value === WEIGHTED_CRITERION) bnodes.add(q.object.value)
|
|
34
|
+
return quads.filter(q => q.predicate.value === ON && bnodes.has(q.subject.value)).map(q => q.object.value)
|
|
35
|
+
})()
|
|
36
|
+
|
|
37
|
+
// Map<recordIri, Map<predIri, [literalValue]>> for the per-member details modal.
|
|
38
|
+
const orgInfo = groupBySubject(parseTtl(mappedTtl), { literalsOnly: true })
|
|
39
|
+
|
|
40
|
+
const manualPairs = parseTtl(matchKnowledgeTtl)
|
|
41
|
+
.filter(q => q.predicate.value === OWL_SAME_AS)
|
|
42
|
+
.map(q => [q.subject.value, q.object.value])
|
|
43
|
+
|
|
44
|
+
function MemberDetailsModal({ clusterId, memberIris, onClose }) {
|
|
45
|
+
const memberSet = new Set(memberIris)
|
|
46
|
+
const manualHere = manualPairs.filter(([a, b]) => memberSet.has(a) && memberSet.has(b))
|
|
47
|
+
return (
|
|
48
|
+
<div onClick={onClose} style={{ position: "fixed", inset: 0, background: "rgba(0,0,0,0.4)", zIndex: 9999, display: "flex", justifyContent: "center", alignItems: "flex-start", paddingTop: 60, overflowY: "auto" }}>
|
|
49
|
+
<div onClick={(e) => e.stopPropagation()} style={{ background: "white", borderRadius: 6, padding: 20, minWidth: 480, maxWidth: 800, boxShadow: "0 8px 24px rgba(0,0,0,0.3)" }}>
|
|
50
|
+
<div style={{ display: "flex", justifyContent: "space-between", alignItems: "baseline", marginBottom: 12, gap: 12 }}>
|
|
51
|
+
<h3 style={{ margin: 0, fontSize: 14 }}>Cluster <code>{clusterId.startsWith(CDF_NS) ? `cdf:${clusterId.slice(CDF_NS.length)}` : prefixed(clusterId)}</code></h3>
|
|
52
|
+
<button onClick={onClose} style={{ border: 0, background: "transparent", fontSize: 18, cursor: "pointer", lineHeight: 1 }}>×</button>
|
|
53
|
+
</div>
|
|
54
|
+
{memberIris.map((iri) => {
|
|
55
|
+
const info = orgInfo.get(iri)
|
|
56
|
+
return (
|
|
57
|
+
<div key={iri} style={{ marginBottom: 14 }}>
|
|
58
|
+
<div style={{ fontSize: 11, color: "#666", marginBottom: 4 }}><code>{prefixed(iri)}</code></div>
|
|
59
|
+
<table style={{ borderCollapse: "collapse", fontSize: 12, width: "100%" }}>
|
|
60
|
+
<tbody>
|
|
61
|
+
{criteriaPredicates.map((p) => (
|
|
62
|
+
<tr key={p}>
|
|
63
|
+
<td style={{ padding: "2px 8px", color: "#555", whiteSpace: "nowrap", verticalAlign: "top", width: 1 }}>{prefixed(p)}</td>
|
|
64
|
+
<td style={{ padding: "2px 8px" }}>{info?.get(p)?.[0] ?? <span style={{ color: "#bbb" }}>—</span>}</td>
|
|
65
|
+
</tr>
|
|
66
|
+
))}
|
|
67
|
+
</tbody>
|
|
68
|
+
</table>
|
|
69
|
+
</div>
|
|
70
|
+
)
|
|
71
|
+
})}
|
|
72
|
+
{manualHere.length > 0 && (
|
|
73
|
+
<div style={{ marginTop: 16, paddingTop: 12, borderTop: "1px solid #ddd" }}>
|
|
74
|
+
<div style={{ fontSize: 12, fontWeight: 600, marginBottom: 6 }}>Manual matches</div>
|
|
75
|
+
{manualHere.map(([a, b], i) => (
|
|
76
|
+
<div key={i} style={{ fontSize: 11, color: "#555", marginBottom: 2 }}>
|
|
77
|
+
<code>{prefixed(a)}</code> <span style={{ color: "#999" }}>owl:sameAs</span> <code>{prefixed(b)}</code>
|
|
78
|
+
</div>
|
|
79
|
+
))}
|
|
80
|
+
</div>
|
|
81
|
+
)}
|
|
82
|
+
</div>
|
|
83
|
+
</div>
|
|
84
|
+
)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export default function MatchGraph() {
|
|
88
|
+
const [showDuplications, setShowDuplications] = useState(true)
|
|
89
|
+
const [show1to1, setShow1to1] = useState(false)
|
|
90
|
+
const [openCluster, setOpenCluster] = useState(null)
|
|
91
|
+
|
|
92
|
+
const { nodes, edges, members, clusterOf, columns, colors, columnTitles, columnBands, columnHeaderStyle, nodeY } = useMemo(() => {
|
|
93
|
+
const r = loadMatch(federationTtl, matchesTtl, mergedTtl, { showDuplications, show1to1 })
|
|
94
|
+
const clusterOf = new Map()
|
|
95
|
+
for (const [c, ms] of r.members) for (const m of ms) clusterOf.set(m, c)
|
|
96
|
+
|
|
97
|
+
for (const n of r.nodes) {
|
|
98
|
+
if (n.isCluster) n.subtitle = n.id.startsWith(CDF_NS) ? `cdf:${n.id.slice(CDF_NS.length)}` : prefixed(n.id)
|
|
99
|
+
else { // a source (dedup) node
|
|
100
|
+
n.label = sourceCode(n.id)
|
|
101
|
+
n.subtitle = orgInfo.get(n.id)?.get(SCHEMA_IDENTIFIER)?.[0]
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
// Drop columns that ended up empty (schemas with no source duplication when
|
|
105
|
+
// collapsed) so they don't leave a blank tinted band.
|
|
106
|
+
const columns = r.columns.filter((c) => r.nodes.some((n) => n.type === c))
|
|
107
|
+
return { ...r, clusterOf, columns }
|
|
108
|
+
}, [showDuplications, show1to1])
|
|
109
|
+
|
|
110
|
+
const handleNodeClick = (_, node) => {
|
|
111
|
+
if (node.id.startsWith("__")) return // header / band decoration
|
|
112
|
+
const cid = members.has(node.id) ? node.id : clusterOf.get(node.id)
|
|
113
|
+
if (cid) setOpenCluster({ clusterId: cid, memberIris: members.get(cid) ?? [] })
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return (
|
|
117
|
+
<div style={{ display: "flex", flexDirection: "column", height: "100%" }}>
|
|
118
|
+
<div style={{ display: "flex", gap: "1rem", alignItems: "center", padding: "0.5rem 1rem", fontSize: 13, borderBottom: "1px solid #ddd" }}>
|
|
119
|
+
<label style={{ display: "inline-flex", alignItems: "center", gap: "0.25rem" }}>
|
|
120
|
+
<input type="checkbox" checked={showDuplications} onChange={(e) => setShowDuplications(e.target.checked)} />
|
|
121
|
+
Show duplications across sources
|
|
122
|
+
</label>
|
|
123
|
+
<label style={{ display: "inline-flex", alignItems: "center", gap: "0.25rem", color: showDuplications ? undefined : "#bbb" }}>
|
|
124
|
+
<input type="checkbox" checked={show1to1} disabled={!showDuplications} onChange={(e) => setShow1to1(e.target.checked)} />
|
|
125
|
+
Show 1:1 clusters
|
|
126
|
+
</label>
|
|
127
|
+
</div>
|
|
128
|
+
<div style={{ flex: 1, minHeight: 0 }}>
|
|
129
|
+
<ColumnGraph key={`${showDuplications}-${show1to1}`} nodes={nodes} edges={edges}
|
|
130
|
+
columns={columns} colors={colors} nodeY={nodeY}
|
|
131
|
+
columnTitles={columnTitles} columnBands={columnBands} columnHeaderStyle={columnHeaderStyle}
|
|
132
|
+
nodeWidth={150} colSpacing={236} onNodeClick={handleNodeClick} />
|
|
133
|
+
</div>
|
|
134
|
+
{openCluster && <MemberDetailsModal clusterId={openCluster.clusterId} memberIris={openCluster.memberIris} onClose={() => setOpenCluster(null)} />}
|
|
135
|
+
</div>
|
|
136
|
+
)
|
|
137
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
// Merge view: every org with its per-source field values and conflict
|
|
2
|
+
// highlighting; each org's services are nested (indented) beneath it.
|
|
3
|
+
// Reads: mergedOrgs from mergeOrgs.js (← data/pipeline/merged.ttl + provenance.ttl)
|
|
4
|
+
// Does: renders the Merge page (compact / wide <OrgCard>, toggleable)
|
|
5
|
+
|
|
6
|
+
import OrgCard from "./OrgCard.jsx"
|
|
7
|
+
import { mergedOrgs } from "./mergeOrgs.js"
|
|
8
|
+
import React, { useState } from "react"
|
|
9
|
+
|
|
10
|
+
const SCHEMA_SERVICE = "schema:Service"
|
|
11
|
+
const PROVIDER = "http://schema.org/provider"
|
|
12
|
+
const providerOf = (e) => e.fields.find((f) => f.predicate === PROVIDER)?.values[0]?.raw
|
|
13
|
+
|
|
14
|
+
// Top-level orgs keep their existing (conflict-sorted) order; services are
|
|
15
|
+
// grouped under their provider org. Any service whose provider isn't a merged
|
|
16
|
+
// org falls through as an orphan, rendered at the end.
|
|
17
|
+
const orgs = mergedOrgs.filter((e) => e.type !== SCHEMA_SERVICE)
|
|
18
|
+
const orgIris = new Set(orgs.map((o) => o.iri))
|
|
19
|
+
const servicesByOrg = new Map()
|
|
20
|
+
const orphanServices = []
|
|
21
|
+
for (const e of mergedOrgs) {
|
|
22
|
+
if (e.type !== SCHEMA_SERVICE) continue
|
|
23
|
+
const provider = providerOf(e)
|
|
24
|
+
if (provider && orgIris.has(provider)) {
|
|
25
|
+
if (!servicesByOrg.has(provider)) servicesByOrg.set(provider, [])
|
|
26
|
+
servicesByOrg.get(provider).push(e)
|
|
27
|
+
} else {
|
|
28
|
+
orphanServices.push(e)
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export default function MergeTables() {
|
|
33
|
+
const [compact, setCompact] = useState(true)
|
|
34
|
+
const [highlight, setHighlight] = useState(true)
|
|
35
|
+
const service = (svc) => (
|
|
36
|
+
<div key={svc.iri} style={{ marginLeft: "1.5rem", borderLeft: "2px solid #e0e0e0", paddingLeft: "0.75rem" }}>
|
|
37
|
+
<OrgCard org={svc} compact={compact} highlight={highlight} />
|
|
38
|
+
</div>
|
|
39
|
+
)
|
|
40
|
+
return (
|
|
41
|
+
<div className="page" style={{ overflowY: "auto", height: "100%" }}>
|
|
42
|
+
<div style={{ display: "flex", gap: "1rem", marginBottom: "0.75rem", fontSize: 13 }}>
|
|
43
|
+
<label style={{ display: "flex", alignItems: "center", gap: "0.25rem" }}>
|
|
44
|
+
<input type="checkbox" checked={compact} onChange={(e) => setCompact(e.target.checked)} />
|
|
45
|
+
Compact view
|
|
46
|
+
</label>
|
|
47
|
+
<label style={{ display: "flex", alignItems: "center", gap: "0.25rem" }}>
|
|
48
|
+
<input type="checkbox" checked={highlight} onChange={(e) => setHighlight(e.target.checked)} />
|
|
49
|
+
Highlight conflicts
|
|
50
|
+
</label>
|
|
51
|
+
</div>
|
|
52
|
+
{orgs.map((org) => (
|
|
53
|
+
<React.Fragment key={org.iri}>
|
|
54
|
+
<OrgCard org={org} compact={compact} highlight={highlight} />
|
|
55
|
+
{(servicesByOrg.get(org.iri) ?? []).map(service)}
|
|
56
|
+
</React.Fragment>
|
|
57
|
+
))}
|
|
58
|
+
{orphanServices.map(service)}
|
|
59
|
+
</div>
|
|
60
|
+
)
|
|
61
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// Renders one organisation as a card (narrow key/value, or wide per-source table)
|
|
2
|
+
// with source tags and conflict highlighting. Also exports the conflict helpers.
|
|
3
|
+
// Reads: config/federation.ttl, data/ingest/ingest-log.ttl (via sourceMeta.js);
|
|
4
|
+
// org objects from loadMerge.js
|
|
5
|
+
// Does: renders <OrgCard>; exports EXPECTED_MULTI, isConflict (used by mergeOrgs, MergeTables)
|
|
6
|
+
|
|
7
|
+
import { federationTtl, ingestLogTtl as logTtl } from "./instanceData.js"
|
|
8
|
+
import Card, { KeyValueTable } from "./Card.jsx"
|
|
9
|
+
import { loadHarvestBySource, loadSourceMeta } from "./sourceMeta.js"
|
|
10
|
+
import { CDP, parseTtl } from "@directory-builder/core/utils"
|
|
11
|
+
import React, { useState } from "react"
|
|
12
|
+
|
|
13
|
+
// org.columns are one entry per contributing record (resolved in loadMerge); look
|
|
14
|
+
// up source display data in config (notation, label) and the harvest log (time).
|
|
15
|
+
const sourceMeta = loadSourceMeta(federationTtl)
|
|
16
|
+
const harvestBySource = loadHarvestBySource(logTtl)
|
|
17
|
+
const sourceCode = (iri) => sourceMeta.get(iri).notation
|
|
18
|
+
const tagTitle = (iri) => {
|
|
19
|
+
const label = sourceMeta.get(iri).label
|
|
20
|
+
const t = harvestBySource.get(iri)
|
|
21
|
+
return t ? `${label}\n\nharvested ${t.slice(0, 19).replace("T", " ")}` : label
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Predicates where one value per contributing source is expected, not a merge
|
|
25
|
+
// conflict: target fields the federation declares :multiValued, plus the
|
|
26
|
+
// engine's own cdp:fromSource.
|
|
27
|
+
const fedQuads = parseTtl(federationTtl)
|
|
28
|
+
const multiFields = new Set(fedQuads.filter((q) => q.predicate.value === `${CDP}multiValued` && q.object.value === "true").map((q) => q.subject.value))
|
|
29
|
+
export const EXPECTED_MULTI = new Set([`${CDP}fromSource`,
|
|
30
|
+
...fedQuads.filter((q) => multiFields.has(q.subject.value) && q.predicate.value === `${CDP}targetPredicate`).map((q) => q.object.value)])
|
|
31
|
+
export const isConflict = (f) => !EXPECTED_MULTI.has(f.predicate) && f.values.length > 1
|
|
32
|
+
|
|
33
|
+
const CONFLICT_LEVELS = [
|
|
34
|
+
{ color: "#fca5a5", width: 2, bg: "rgba(220, 38, 38, 0.08)" },
|
|
35
|
+
{ color: "#f87171", width: 3, bg: "rgba(220, 38, 38, 0.16)" },
|
|
36
|
+
{ color: "#ef4444", width: 4, bg: "rgba(220, 38, 38, 0.24)" },
|
|
37
|
+
{ color: "#b91c1c", width: 5, bg: "rgba(220, 38, 38, 0.32)" },
|
|
38
|
+
]
|
|
39
|
+
const conflictStyle = (n) => {
|
|
40
|
+
if (n <= 1) return undefined
|
|
41
|
+
const lvl = CONFLICT_LEVELS[Math.min(n - 2, CONFLICT_LEVELS.length - 1)]
|
|
42
|
+
return {
|
|
43
|
+
outline: `${lvl.width}px solid ${lvl.color}`,
|
|
44
|
+
borderRadius: 2,
|
|
45
|
+
backgroundColor: lvl.bg,
|
|
46
|
+
padding: "0 4px",
|
|
47
|
+
marginRight: 6,
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function SourceTags({ sources }) {
|
|
52
|
+
return (
|
|
53
|
+
<>
|
|
54
|
+
{sources.map((iri, i) => (
|
|
55
|
+
<span key={i} className="source-tag" title={tagTitle(iri)}>{sourceCode(iri)}</span>
|
|
56
|
+
))}
|
|
57
|
+
</>
|
|
58
|
+
)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function ValueCell({ values, highlight }) {
|
|
62
|
+
const [idx, setIdx] = useState(0)
|
|
63
|
+
// idx persists across re-renders, so clamp when `values` shrinks (e.g.
|
|
64
|
+
// rendering final.ttl where every (s,p) has exactly one value).
|
|
65
|
+
const safeIdx = idx % values.length
|
|
66
|
+
const cur = values[safeIdx]
|
|
67
|
+
const multi = values.length > 1
|
|
68
|
+
const style = highlight ? conflictStyle(values.length) : undefined
|
|
69
|
+
return (
|
|
70
|
+
<>
|
|
71
|
+
{multi && (
|
|
72
|
+
<span className="flip">
|
|
73
|
+
<button className="flip-btn" onClick={() => setIdx((safeIdx - 1 + values.length) % values.length)}>◀</button>
|
|
74
|
+
<span className="flip-counter">{safeIdx + 1}/{values.length}</span>
|
|
75
|
+
<button className="flip-btn" onClick={() => setIdx((safeIdx + 1) % values.length)}>▶</button>
|
|
76
|
+
</span>
|
|
77
|
+
)}
|
|
78
|
+
<span className="value-text" title={cur.raw ?? cur.value} style={style}>{cur.value}</span>
|
|
79
|
+
<SourceTags sources={cur.sources} />
|
|
80
|
+
</>
|
|
81
|
+
)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function OrgCardNarrow({ org, highlight }) {
|
|
85
|
+
return <KeyValueTable rows={org.fields.map((f) => ({ key: f.predicate, label: f.predLabel, value: <ValueCell values={f.values} highlight={highlight && isConflict(f)} /> }))} />
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function OrgCardWide({ org, highlight }) {
|
|
89
|
+
const columns = org.columns
|
|
90
|
+
return (
|
|
91
|
+
<table>
|
|
92
|
+
<thead>
|
|
93
|
+
<tr>
|
|
94
|
+
<th></th>
|
|
95
|
+
{columns.map((c) => (
|
|
96
|
+
<th key={c.record} title={tagTitle(c.source)}>
|
|
97
|
+
<span className="source-tag">{sourceCode(c.source)}</span>
|
|
98
|
+
</th>
|
|
99
|
+
))}
|
|
100
|
+
</tr>
|
|
101
|
+
</thead>
|
|
102
|
+
<tbody>
|
|
103
|
+
{org.fields.map((f) => {
|
|
104
|
+
const conflict = highlight && isConflict(f) ? conflictStyle(f.values.length) : undefined
|
|
105
|
+
return (
|
|
106
|
+
<tr key={f.predicate}>
|
|
107
|
+
<td>{f.predLabel}</td>
|
|
108
|
+
{columns.map((c) => {
|
|
109
|
+
const v = f.values.find((val) => val.records.includes(c.record))
|
|
110
|
+
return <td key={c.record} title={v?.raw ?? v?.value}>{v && <span className="value-text" style={{ maxWidth: "50ch", ...conflict }}>{v.value}</span>}</td>
|
|
111
|
+
})}
|
|
112
|
+
</tr>
|
|
113
|
+
)
|
|
114
|
+
})}
|
|
115
|
+
</tbody>
|
|
116
|
+
</table>
|
|
117
|
+
)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export default function OrgCard({ org, compact, highlight }) {
|
|
121
|
+
return (
|
|
122
|
+
<Card title={org.label} tag={org.type}>
|
|
123
|
+
{compact ? <OrgCardNarrow org={org} highlight={highlight} /> : <OrgCardWide org={org} highlight={highlight} />}
|
|
124
|
+
</Card>
|
|
125
|
+
)
|
|
126
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// Pipeline view: the fetch→lift→…→resolve step graph the engines journaled
|
|
2
|
+
// while running — evidence of the executed pipeline.
|
|
3
|
+
// Reads: data/ingest/ingest-log.ttl, data/pipeline/federate-log.ttl,
|
|
4
|
+
// config/federation.ttl (via loadPipeline.js)
|
|
5
|
+
// Does: renders the Pipeline page (horizontal <ColumnGraph>) with a Source
|
|
6
|
+
// lane-header per Fetch and payload labels on the edges
|
|
7
|
+
|
|
8
|
+
import { federationTtl, ingestLogTtl, federateLogTtl } from "./instanceData.js"
|
|
9
|
+
import { loadPipeline } from "./loadPipeline.js"
|
|
10
|
+
import ColumnGraph from "./ColumnGraph.jsx"
|
|
11
|
+
import React from "react"
|
|
12
|
+
|
|
13
|
+
const COLUMNS = ["Source", "Fetch", "Lift", "Clean", "Map", "Input", "Match", "Merge", "Resolve", "End"]
|
|
14
|
+
const CENTER_COLUMNS = ["Clean", "Map", "Input", "Match", "Merge", "Resolve", "End"]
|
|
15
|
+
const COLORS = {
|
|
16
|
+
Fetch: "#d4e7ff",
|
|
17
|
+
Lift: "#e6f3d8",
|
|
18
|
+
Clean: "#fff1a8",
|
|
19
|
+
Map: "#f4cfe0",
|
|
20
|
+
Match: "#e2d4f4",
|
|
21
|
+
Merge: "#cfe9d8",
|
|
22
|
+
Resolve: "#c5e0e8",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const { nodes, edges } = loadPipeline([ingestLogTtl, federateLogTtl], federationTtl)
|
|
26
|
+
|
|
27
|
+
export default function Pipeline() {
|
|
28
|
+
return (
|
|
29
|
+
<ColumnGraph
|
|
30
|
+
nodes={nodes}
|
|
31
|
+
edges={edges}
|
|
32
|
+
columns={COLUMNS}
|
|
33
|
+
colors={COLORS}
|
|
34
|
+
centerColumns={CENTER_COLUMNS}
|
|
35
|
+
direction="vertical"
|
|
36
|
+
colSpacing={120}
|
|
37
|
+
siblingGap={240}
|
|
38
|
+
nodeWidth={150}
|
|
39
|
+
/>
|
|
40
|
+
)
|
|
41
|
+
}
|