@directory-builder/core 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -2
- package/package.json +1 -1
- package/src/index.js +2 -2
- package/src/pipeline/federate.js +69 -0
- package/src/pipeline/ingest.js +97 -0
- package/src/pipeline/run.js +8 -0
- package/src/pipeline/steps/clean.js +27 -0
- package/src/pipeline/steps/fetch.js +24 -0
- package/src/pipeline/steps/lift.js +58 -0
- package/src/pipeline/steps/map.js +172 -0
- package/src/pipeline/steps/match.js +212 -0
- package/src/pipeline/steps/merge.js +59 -0
- package/src/pipeline/steps/resolve.js +54 -0
- package/src/pipeline/write-turtle.js +30 -0
- package/src/pipeline.js +2 -2
- package/src/utils.js +1 -5
- package/webapp/src/App.jsx +3 -1
- package/webapp/src/Card.jsx +3 -3
- package/webapp/src/ColumnGraph.jsx +1 -1
- package/webapp/src/Directory.jsx +6 -6
- package/webapp/src/{OrgCard.jsx → EntityCard.jsx} +12 -12
- package/webapp/src/MapGraph.jsx +22 -22
- package/webapp/src/MatchGraph.jsx +3 -3
- package/webapp/src/MergeTables.jsx +43 -36
- package/webapp/src/Query.jsx +4 -9
- package/webapp/src/instanceData.js +6 -2
- package/webapp/src/loadMap.js +22 -22
- package/webapp/src/loadMerge.js +30 -32
- package/webapp/src/loadSources.js +1 -1
- package/webapp/src/mergeEntities.js +15 -0
- package/webapp/src/sourceMeta.js +1 -1
- package/webapp/src/styles.css +6 -6
- package/webapp/vite.js +1 -1
- package/src/federate.js +0 -571
- package/src/ingest.js +0 -158
- package/webapp/src/mergeOrgs.js +0 -15
|
@@ -35,7 +35,7 @@ const criteriaPredicates = (() => {
|
|
|
35
35
|
})()
|
|
36
36
|
|
|
37
37
|
// Map<recordIri, Map<predIri, [literalValue]>> for the per-member details modal.
|
|
38
|
-
const
|
|
38
|
+
const entityInfo = groupBySubject(parseTtl(mappedTtl), { literalsOnly: true })
|
|
39
39
|
|
|
40
40
|
const manualPairs = parseTtl(matchKnowledgeTtl)
|
|
41
41
|
.filter(q => q.predicate.value === OWL_SAME_AS)
|
|
@@ -52,7 +52,7 @@ function MemberDetailsModal({ clusterId, memberIris, onClose }) {
|
|
|
52
52
|
<button onClick={onClose} style={{ border: 0, background: "transparent", fontSize: 18, cursor: "pointer", lineHeight: 1 }}>×</button>
|
|
53
53
|
</div>
|
|
54
54
|
{memberIris.map((iri) => {
|
|
55
|
-
const info =
|
|
55
|
+
const info = entityInfo.get(iri)
|
|
56
56
|
return (
|
|
57
57
|
<div key={iri} style={{ marginBottom: 14 }}>
|
|
58
58
|
<div style={{ fontSize: 11, color: "#666", marginBottom: 4 }}><code>{prefixed(iri)}</code></div>
|
|
@@ -98,7 +98,7 @@ export default function MatchGraph() {
|
|
|
98
98
|
if (n.isCluster) n.subtitle = n.id.startsWith(CDF_NS) ? `cdf:${n.id.slice(CDF_NS.length)}` : prefixed(n.id)
|
|
99
99
|
else { // a source (dedup) node
|
|
100
100
|
n.label = sourceCode(n.id)
|
|
101
|
-
n.subtitle =
|
|
101
|
+
n.subtitle = entityInfo.get(n.id)?.get(SCHEMA_IDENTIFIER)?.[0]
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
// Drop columns that ended up empty (schemas with no source duplication when
|
|
@@ -1,42 +1,51 @@
|
|
|
1
|
-
// Merge view: every
|
|
2
|
-
// highlighting
|
|
3
|
-
//
|
|
4
|
-
//
|
|
1
|
+
// Merge view: every entity with its per-source field values and conflict
|
|
2
|
+
// highlighting. An entity referencing another via a relationship the
|
|
3
|
+
// federation declares (mapping :hasRelationship → :toTargetField →
|
|
4
|
+
// :targetPredicate) renders nested beneath it.
|
|
5
|
+
// Reads: mergedEntities from mergeEntities.js (← merged.ttl + provenance.ttl),
|
|
6
|
+
// config/federation.ttl (relationship predicates)
|
|
7
|
+
// Does: renders the Merge page (compact / wide <EntityCard>, toggleable)
|
|
5
8
|
|
|
6
|
-
import
|
|
7
|
-
import {
|
|
9
|
+
import { CDP, parseTtl } from "@directory-builder/core/utils"
|
|
10
|
+
import { mergedEntities } from "./mergeEntities.js"
|
|
11
|
+
import { federationTtl } from "./instanceData.js"
|
|
12
|
+
import EntityCard from "./EntityCard.jsx"
|
|
8
13
|
import React, { useState } from "react"
|
|
9
14
|
|
|
10
|
-
const
|
|
11
|
-
const
|
|
12
|
-
const
|
|
15
|
+
const fedQuads = parseTtl(federationTtl)
|
|
16
|
+
const relFields = new Set(fedQuads.filter((q) => q.predicate.value === `${CDP}toTargetField`).map((q) => q.object.value))
|
|
17
|
+
const REL_PREDS = new Set(fedQuads.filter((q) => relFields.has(q.subject.value) && q.predicate.value === `${CDP}targetPredicate`).map((q) => q.object.value))
|
|
13
18
|
|
|
14
|
-
//
|
|
15
|
-
//
|
|
16
|
-
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
const
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
if (e.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
servicesByOrg.get(provider).push(e)
|
|
27
|
-
} else {
|
|
28
|
-
orphanServices.push(e)
|
|
29
|
-
}
|
|
19
|
+
// An entity's parent = the first relationship value pointing at another merged
|
|
20
|
+
// entity. Entities keep their (conflict-sorted) order within each level.
|
|
21
|
+
const iris = new Set(mergedEntities.map((e) => e.iri))
|
|
22
|
+
const parentOf = (e) => e.fields.find((f) => REL_PREDS.has(f.predicate) && iris.has(f.values[0]?.raw))?.values[0].raw
|
|
23
|
+
const childrenOf = new Map()
|
|
24
|
+
const hasParent = new Set()
|
|
25
|
+
for (const e of mergedEntities) {
|
|
26
|
+
const p = parentOf(e)
|
|
27
|
+
if (!p || p === e.iri) continue
|
|
28
|
+
if (!childrenOf.has(p)) childrenOf.set(p, [])
|
|
29
|
+
childrenOf.get(p).push(e)
|
|
30
|
+
hasParent.add(e.iri)
|
|
30
31
|
}
|
|
31
32
|
|
|
33
|
+
// Flatten to (entity, depth) rows; the second pass catches reference cycles,
|
|
34
|
+
// which would otherwise never be reached from a top-level entity.
|
|
35
|
+
const ROWS = []
|
|
36
|
+
const seen = new Set()
|
|
37
|
+
const walk = (e, depth) => {
|
|
38
|
+
if (seen.has(e.iri)) return
|
|
39
|
+
seen.add(e.iri)
|
|
40
|
+
ROWS.push({ e, depth })
|
|
41
|
+
for (const c of childrenOf.get(e.iri) ?? []) walk(c, depth + 1)
|
|
42
|
+
}
|
|
43
|
+
for (const e of mergedEntities) if (!hasParent.has(e.iri)) walk(e, 0)
|
|
44
|
+
for (const e of mergedEntities) walk(e, 0)
|
|
45
|
+
|
|
32
46
|
export default function MergeTables() {
|
|
33
47
|
const [compact, setCompact] = useState(true)
|
|
34
48
|
const [highlight, setHighlight] = useState(true)
|
|
35
|
-
const service = (svc) => (
|
|
36
|
-
<div key={svc.iri} style={{ marginLeft: "1.5rem", borderLeft: "2px solid #e0e0e0", paddingLeft: "0.75rem" }}>
|
|
37
|
-
<OrgCard org={svc} compact={compact} highlight={highlight} />
|
|
38
|
-
</div>
|
|
39
|
-
)
|
|
40
49
|
return (
|
|
41
50
|
<div className="page" style={{ overflowY: "auto", height: "100%" }}>
|
|
42
51
|
<div style={{ display: "flex", gap: "1rem", marginBottom: "0.75rem", fontSize: 13 }}>
|
|
@@ -49,13 +58,11 @@ export default function MergeTables() {
|
|
|
49
58
|
Highlight conflicts
|
|
50
59
|
</label>
|
|
51
60
|
</div>
|
|
52
|
-
{
|
|
53
|
-
<
|
|
54
|
-
<
|
|
55
|
-
|
|
56
|
-
</React.Fragment>
|
|
61
|
+
{ROWS.map(({ e, depth }) => (
|
|
62
|
+
<div key={e.iri} style={depth ? { marginLeft: `${depth * 1.5}rem`, borderLeft: "2px solid #e0e0e0", paddingLeft: "0.75rem" } : undefined}>
|
|
63
|
+
<EntityCard entity={e} compact={compact} highlight={highlight} />
|
|
64
|
+
</div>
|
|
57
65
|
))}
|
|
58
|
-
{orphanServices.map(service)}
|
|
59
66
|
</div>
|
|
60
67
|
)
|
|
61
68
|
}
|
package/webapp/src/Query.jsx
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import { storeFromTurtles } from "@foerderfunke/sem-ops-utils/core"
|
|
7
7
|
import { queryEngine } from "@foerderfunke/sem-ops-utils/sparql"
|
|
8
|
-
import { finalTtl } from "./instanceData.js"
|
|
8
|
+
import { finalTtl, querySparql } from "./instanceData.js"
|
|
9
9
|
import React, { useEffect, useRef } from "react"
|
|
10
10
|
import "@zazuko/yasgui/build/yasgui.min.css"
|
|
11
11
|
import Yasgui from "@zazuko/yasgui"
|
|
@@ -18,14 +18,9 @@ const ENDPOINT = "http://local/sparql"
|
|
|
18
18
|
|
|
19
19
|
const store = storeFromTurtles([finalTtl])
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
SELECT ?org (SAMPLE(?name) AS ?title) WHERE {
|
|
25
|
-
?org schema:name ?name .
|
|
26
|
-
}
|
|
27
|
-
GROUP BY ?org
|
|
28
|
-
ORDER BY ?title`
|
|
21
|
+
// Instances own the editor's starting query via webapp/content/query.sparql
|
|
22
|
+
// (fetched at runtime like the About prose); plain select-all without one.
|
|
23
|
+
const INITIAL_QUERY = querySparql || "SELECT * WHERE { ?s ?p ?o } LIMIT 100"
|
|
29
24
|
|
|
30
25
|
Yasgui.Yasqe.defaults.value = INITIAL_QUERY
|
|
31
26
|
|
|
@@ -23,13 +23,17 @@ export const repositoryUrl = objectsOf(fedQuads, `${CDP}repository`)[0]
|
|
|
23
23
|
// Display prefixes = the federation's own @prefix declarations; cdp pinned
|
|
24
24
|
// first so cdp:… wins over the empty ":" prefix bound to the same namespace.
|
|
25
25
|
export const displayPrefixes = { cdp: CDP, ...prefixesOf(federationTtl) }
|
|
26
|
+
// The federation's display name (:federation rdfs:label) — optional; the
|
|
27
|
+
// webapp keeps its generic title without one.
|
|
28
|
+
export const federationLabel = fedQuads.find((q) =>
|
|
29
|
+
q.subject.value === `${CDP}federation` && q.predicate.value === "http://www.w3.org/2000/01/rdf-schema#label")?.object.value
|
|
26
30
|
|
|
27
31
|
const FIXED = [PATHS.matchKnowledge, PATHS.ingestLog, PATHS.federateLog, PATHS.mapped,
|
|
28
|
-
PATHS.matches, PATHS.merged, PATHS.provenance, PATHS.final, PATHS.about]
|
|
32
|
+
PATHS.matches, PATHS.merged, PATHS.provenance, PATHS.final, PATHS.about, PATHS.query]
|
|
29
33
|
const [fixedTexts, cleanedTexts] = await Promise.all([
|
|
30
34
|
Promise.all(FIXED.map(fetchText)),
|
|
31
35
|
Promise.all(cleanedPaths.map(fetchText)),
|
|
32
36
|
])
|
|
33
37
|
|
|
34
|
-
export const [matchKnowledgeTtl, ingestLogTtl, federateLogTtl, mappedTtl, matchesTtl, mergedTtl, provenanceTtl, finalTtl, aboutMd] = fixedTexts
|
|
38
|
+
export const [matchKnowledgeTtl, ingestLogTtl, federateLogTtl, mappedTtl, matchesTtl, mergedTtl, provenanceTtl, finalTtl, aboutMd, querySparql] = fixedTexts
|
|
35
39
|
export const cleanedByPath = Object.fromEntries(cleanedPaths.map((p, i) => [p, cleanedTexts[i]]))
|
package/webapp/src/loadMap.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
// Helpers for the Map view: build the schema-mapping graph and resolve per-
|
|
1
|
+
// Helpers for the Map view: build the schema-mapping graph and resolve per-entity
|
|
2
2
|
// source/target field values. Pure (ttl in → data out).
|
|
3
3
|
// Reads: TTL strings passed by MapGraph.jsx (federation, mapped, cleaned source TTL)
|
|
4
|
-
// Does: returns { nodes, edges } plus per-source / per-
|
|
4
|
+
// Does: returns { nodes, edges } plus per-source / per-entity value maps
|
|
5
5
|
|
|
6
6
|
import { CDP as NS, localName, parseTtl, prefixesOf, shrink, sourceName, subjectsOfType, typesOf } from "@directory-builder/core/utils"
|
|
7
7
|
|
|
@@ -9,26 +9,26 @@ const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
|
|
9
9
|
const NODE_TYPES = [`${NS}Source`, `${NS}SourceField`, `${NS}TargetField`, `${NS}TargetSchema`]
|
|
10
10
|
const SUB_FIELD = `${NS}SubField`
|
|
11
11
|
|
|
12
|
-
// Group
|
|
12
|
+
// Group entities by source. Each entity carries a cdp:fromSource triple in mapped.ttl
|
|
13
13
|
// pointing at its Source IRI, so this is a single-pass scan with no prefix
|
|
14
14
|
// matching.
|
|
15
|
-
export function
|
|
15
|
+
export function loadEntitiesBySource(_federationTtl, mappedTtl) {
|
|
16
16
|
const SCHEMA_NAME = "http://schema.org/name"
|
|
17
17
|
const SCHEMA_IDENTIFIER = "http://schema.org/identifier"
|
|
18
18
|
const FROM_SOURCE = `${NS}fromSource`
|
|
19
19
|
|
|
20
|
-
const
|
|
20
|
+
const entitySource = new Map() // entityIri -> sourceIri
|
|
21
21
|
const ids = new Map()
|
|
22
22
|
const names = new Map()
|
|
23
23
|
for (const q of parseTtl(mappedTtl)) {
|
|
24
24
|
const p = q.predicate.value
|
|
25
|
-
if (p === FROM_SOURCE)
|
|
25
|
+
if (p === FROM_SOURCE) entitySource.set(q.subject.value, q.object.value)
|
|
26
26
|
else if (p === SCHEMA_IDENTIFIER) ids.set(q.subject.value, q.object.value)
|
|
27
27
|
else if (p === SCHEMA_NAME) names.set(q.subject.value, q.object.value)
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
const result = new Map()
|
|
31
|
-
for (const [iri, src] of
|
|
31
|
+
for (const [iri, src] of entitySource) {
|
|
32
32
|
if (!result.has(src)) result.set(src, [])
|
|
33
33
|
result.get(src).push({
|
|
34
34
|
iri,
|
|
@@ -40,11 +40,11 @@ export function loadOrgsBySource(_federationTtl, mappedTtl) {
|
|
|
40
40
|
return result
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
// For each
|
|
43
|
+
// For each entity in mapped.ttl, resolve the literal value of each of its
|
|
44
44
|
// source fields/sub-fields (from the source's lifted/cleaned TTL) AND each
|
|
45
45
|
// target field (from mapped.ttl, indirected via the field's :targetPredicate).
|
|
46
|
-
// Returns Map<
|
|
47
|
-
export function
|
|
46
|
+
// Returns Map<entityIri, Map<fieldIri, string>>.
|
|
47
|
+
export function loadFieldValuesByEntity(federationTtl, mappedTtl, liftedBySource) {
|
|
48
48
|
const fedQuads = parseTtl(federationTtl)
|
|
49
49
|
const fieldPathOf = new Map()
|
|
50
50
|
const fieldsBySource = new Map()
|
|
@@ -64,13 +64,13 @@ export function loadFieldValuesByOrg(federationTtl, mappedTtl, liftedBySource) {
|
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
const FROM_SOURCE = `${NS}fromSource`
|
|
67
|
-
const
|
|
68
|
-
const
|
|
67
|
+
const entitySource = new Map() // entityIri -> sourceIri
|
|
68
|
+
const literalsByEntity = new Map() // entityIri -> Map<predicateIri, string>
|
|
69
69
|
for (const q of parseTtl(mappedTtl)) {
|
|
70
|
-
if (q.predicate.value === FROM_SOURCE)
|
|
70
|
+
if (q.predicate.value === FROM_SOURCE) entitySource.set(q.subject.value, q.object.value)
|
|
71
71
|
if (q.object.termType === "Literal") {
|
|
72
|
-
if (!
|
|
73
|
-
|
|
72
|
+
if (!literalsByEntity.has(q.subject.value)) literalsByEntity.set(q.subject.value, new Map())
|
|
73
|
+
literalsByEntity.get(q.subject.value).set(q.predicate.value, q.object.value)
|
|
74
74
|
}
|
|
75
75
|
}
|
|
76
76
|
|
|
@@ -88,10 +88,10 @@ export function loadFieldValuesByOrg(federationTtl, mappedTtl, liftedBySource) {
|
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
const fields = fieldsBySource.get(sourceIri) ?? []
|
|
91
|
-
for (const [
|
|
91
|
+
for (const [entityIri, src] of entitySource) {
|
|
92
92
|
if (src !== sourceIri) continue
|
|
93
93
|
// Source subject IS the federation IRI post-clean — no lookup needed.
|
|
94
|
-
const subjectPreds = graph.get(
|
|
94
|
+
const subjectPreds = graph.get(entityIri)
|
|
95
95
|
if (!subjectPreds) continue
|
|
96
96
|
|
|
97
97
|
const valueMap = new Map()
|
|
@@ -115,17 +115,17 @@ export function loadFieldValuesByOrg(federationTtl, mappedTtl, liftedBySource) {
|
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
}
|
|
118
|
-
result.set(
|
|
118
|
+
result.set(entityIri, valueMap)
|
|
119
119
|
}
|
|
120
120
|
}
|
|
121
121
|
|
|
122
122
|
// Layer in target-field values: indirect each :targetPredicate through the
|
|
123
|
-
//
|
|
123
|
+
// entity's literal predicate->value map from mapped.ttl. These are the values
|
|
124
124
|
// that flow OUT of transform nodes (and equal the source value for direct
|
|
125
125
|
// 1:1 mappings).
|
|
126
|
-
for (const [
|
|
127
|
-
if (!result.has(
|
|
128
|
-
const valueMap = result.get(
|
|
126
|
+
for (const [entityIri, preds] of literalsByEntity) {
|
|
127
|
+
if (!result.has(entityIri)) result.set(entityIri, new Map())
|
|
128
|
+
const valueMap = result.get(entityIri)
|
|
129
129
|
for (const [tfIri, predIri] of targetPredicateOf) {
|
|
130
130
|
const v = preds.get(predIri)
|
|
131
131
|
if (v) valueMap.set(tfIri, v)
|
package/webapp/src/loadMerge.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
// Parses merged + provenance TTL into
|
|
1
|
+
// Parses merged + provenance TTL into entity objects: each field's values and the
|
|
2
2
|
// :Source(s) that contributed them, ordered by config. Pure (ttl in → data out).
|
|
3
|
-
// Reads: TTL strings passed by
|
|
4
|
-
// Does: returns
|
|
3
|
+
// Reads: TTL strings passed by mergeEntities.js; resolves sources via sourceMeta.js
|
|
4
|
+
// Does: returns entity[] (each {iri, label, type, fields[], sources[]})
|
|
5
5
|
|
|
6
|
-
import { CDP as NS, parseTtl,
|
|
6
|
+
import { CDP as NS, parseTtl, prefixesOf, shrink } from "@directory-builder/core/utils"
|
|
7
7
|
import { compareSources, loadSourceMeta } from "./sourceMeta.js"
|
|
8
8
|
|
|
9
9
|
const PROV_DERIVED_FROM = "http://www.w3.org/ns/prov#wasDerivedFrom"
|
|
@@ -16,13 +16,11 @@ export function loadMerge(mergedTtl, provTtl, federationTtl = "") {
|
|
|
16
16
|
const prefixes = { cdp: NS, ...prefixesOf(federationTtl) }
|
|
17
17
|
const prefixedIri = (iri) => shrink(iri, prefixes)
|
|
18
18
|
const mergedQuads = parseTtl(mergedTtl)
|
|
19
|
-
const provQuads =
|
|
19
|
+
const provQuads = parseTtl(provTtl)
|
|
20
20
|
const sourceMeta = federationTtl ? loadSourceMeta(federationTtl) : new Map()
|
|
21
21
|
|
|
22
|
-
//
|
|
23
|
-
//
|
|
24
|
-
// quoted-triple subject either directly as a Quad term, or via an
|
|
25
|
-
// auto-generated reifier bnode + rdf:reifies triple — accept both shapes.
|
|
22
|
+
// provenance.ttl is RDF 1.2 reification: one reifier per derivation
|
|
23
|
+
// (`_:r rdf:reifies <<( s p o )>> ; prov:wasDerivedFrom record`).
|
|
26
24
|
const reifies = new Map()
|
|
27
25
|
for (const q of provQuads) {
|
|
28
26
|
if (q.predicate.value === RDF_REIFIES && q.object.termType === "Quad") reifies.set(q.subject.value, q.object)
|
|
@@ -30,7 +28,7 @@ export function loadMerge(mergedTtl, provTtl, federationTtl = "") {
|
|
|
30
28
|
const annotations = []
|
|
31
29
|
for (const q of provQuads) {
|
|
32
30
|
if (q.predicate.value !== PROV_DERIVED_FROM) continue
|
|
33
|
-
const t =
|
|
31
|
+
const t = reifies.get(q.subject.value)
|
|
34
32
|
if (t) annotations.push({ s: t.subject.value, p: t.predicate.value, o: t.object.value, rec: q.object.value })
|
|
35
33
|
}
|
|
36
34
|
// Resolve each record to its :Source via cdp:fromSource (reified in
|
|
@@ -48,46 +46,46 @@ export function loadMerge(mergedTtl, provTtl, federationTtl = "") {
|
|
|
48
46
|
}
|
|
49
47
|
|
|
50
48
|
// Walk merged.ttl in parse order so card order = pipeline order.
|
|
51
|
-
const
|
|
52
|
-
const
|
|
53
|
-
const
|
|
49
|
+
const entities = []
|
|
50
|
+
const entityIndex = new Map()
|
|
51
|
+
const fieldIndexByEntity = new Map()
|
|
54
52
|
for (const q of mergedQuads) {
|
|
55
|
-
const
|
|
53
|
+
const entityIri = q.subject.value
|
|
56
54
|
const predIri = q.predicate.value
|
|
57
55
|
const value = q.object.value
|
|
58
56
|
|
|
59
|
-
if (!
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
57
|
+
if (!entityIndex.has(entityIri)) {
|
|
58
|
+
entityIndex.set(entityIri, entities.length)
|
|
59
|
+
fieldIndexByEntity.set(entityIri, new Map())
|
|
60
|
+
entities.push({ iri: entityIri, label: prefixedIri(entityIri), fields: [] })
|
|
63
61
|
}
|
|
64
|
-
const
|
|
65
|
-
const fieldIndex =
|
|
62
|
+
const entity = entities[entityIndex.get(entityIri)]
|
|
63
|
+
const fieldIndex = fieldIndexByEntity.get(entityIri)
|
|
66
64
|
|
|
67
65
|
// rdf:type carries the entity class — surface it in the card header
|
|
68
|
-
// (see
|
|
69
|
-
if (predIri === RDF_TYPE) {
|
|
66
|
+
// (see EntityCard), not as a field row.
|
|
67
|
+
if (predIri === RDF_TYPE) { entity.type = prefixedIri(value); continue }
|
|
70
68
|
|
|
71
69
|
if (!fieldIndex.has(predIri)) {
|
|
72
|
-
fieldIndex.set(predIri,
|
|
73
|
-
|
|
70
|
+
fieldIndex.set(predIri, entity.fields.length)
|
|
71
|
+
entity.fields.push({ predicate: predIri, predLabel: prefixedIri(predIri), values: [] })
|
|
74
72
|
}
|
|
75
|
-
const field =
|
|
76
|
-
const records = [...(provIndex.get(tripleKey(
|
|
73
|
+
const field = entity.fields[fieldIndex.get(predIri)]
|
|
74
|
+
const records = [...(provIndex.get(tripleKey(entityIri, predIri, value)) ?? [])]
|
|
77
75
|
const sources = toSources(records)
|
|
78
76
|
const displayValue = q.object.termType === "NamedNode" ? prefixedIri(value) : value
|
|
79
77
|
field.values.push({ value: displayValue, raw: value, sources, records })
|
|
80
78
|
}
|
|
81
79
|
|
|
82
80
|
// Per-field: sort values by source-count desc so the most-supported one is index 0.
|
|
83
|
-
// Per-
|
|
81
|
+
// Per-entity: one column per contributing record (two records from the same source
|
|
84
82
|
// get two columns), ordered by source then record IRI.
|
|
85
|
-
for (const
|
|
86
|
-
for (const f of
|
|
83
|
+
for (const entity of entities) {
|
|
84
|
+
for (const f of entity.fields) f.values.sort((a, b) => b.sources.length - a.sources.length)
|
|
87
85
|
const all = new Set()
|
|
88
|
-
for (const f of
|
|
89
|
-
|
|
86
|
+
for (const f of entity.fields) for (const v of f.values) for (const r of v.records) all.add(r)
|
|
87
|
+
entity.columns = [...all].map((r) => ({ record: r, source: sourceOfRecord.get(r) }))
|
|
90
88
|
.sort((a, b) => compareSources(a.source, b.source, sourceMeta) || a.record.localeCompare(b.record))
|
|
91
89
|
}
|
|
92
|
-
return
|
|
90
|
+
return entities
|
|
93
91
|
}
|
|
@@ -68,7 +68,7 @@ export function loadSources(federationTtl, mappedTtl, ingestLogTtl) {
|
|
|
68
68
|
if (!get(sourceIri).fetchUrl) get(sourceIri).staticSource = PATHS.staticDir(sourceName(sourceIri))
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
-
// Records: count distinct
|
|
71
|
+
// Records: count distinct entities in mapped.ttl per source via cdp:fromSource.
|
|
72
72
|
const FROM_SOURCE = `${NS}fromSource`
|
|
73
73
|
const subjectsBySource = new Map()
|
|
74
74
|
for (const q of mappedQuads) {
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// Builds the entity lists for the Merge and Directory views, in one shared order.
|
|
2
|
+
// Reads: data/pipeline/{merged,provenance,final}.ttl, config/federation.ttl (via loadMerge.js)
|
|
3
|
+
// Does: exports mergedEntities and finalEntities (consumed by MergeTables, Directory)
|
|
4
|
+
|
|
5
|
+
import { loadMerge } from "./loadMerge.js"
|
|
6
|
+
import { isConflict } from "./EntityCard.jsx"
|
|
7
|
+
import { federationTtl, provenanceTtl as provTtl, mergedTtl, finalTtl } from "./instanceData.js"
|
|
8
|
+
|
|
9
|
+
const conflictCount = (entity) => entity.fields.reduce((n, f) => n + (isConflict(f) ? 1 : 0), 0)
|
|
10
|
+
|
|
11
|
+
// Merge view sorts by conflict count desc; the directory mirrors that order
|
|
12
|
+
// so the same entity sits in the same visual slot across pages.
|
|
13
|
+
export const mergedEntities = loadMerge(mergedTtl, provTtl, federationTtl).sort((a, b) => conflictCount(b) - conflictCount(a) || a.iri.localeCompare(b.iri))
|
|
14
|
+
const orderIndex = new Map(mergedEntities.map((o, i) => [o.iri, i]))
|
|
15
|
+
export const finalEntities = loadMerge(finalTtl, "", federationTtl).sort((a, b) => (orderIndex.get(a.iri) ?? Infinity) - (orderIndex.get(b.iri) ?? Infinity))
|
package/webapp/src/sourceMeta.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// the PATHS conventions. JS never hardcodes a source name — it resolves records
|
|
4
4
|
// to a :Source via cdp:fromSource.
|
|
5
5
|
// Reads: TTL strings passed in (federation, mapped, ingest-log)
|
|
6
|
-
// Does: returns lookup maps + helpers (used by loadMerge,
|
|
6
|
+
// Does: returns lookup maps + helpers (used by loadMerge, EntityCard, MapGraph, MatchGraph)
|
|
7
7
|
|
|
8
8
|
import { CDP as NS, parseTtl, PATHS, sourceName } from "@directory-builder/core/utils"
|
|
9
9
|
|
package/webapp/src/styles.css
CHANGED
|
@@ -9,12 +9,12 @@ nav a:hover { color: #000 }
|
|
|
9
9
|
nav a.active { font-weight: bold; color: #000 }
|
|
10
10
|
main { flex: 1; min-height: 0 }
|
|
11
11
|
.page { padding: 1rem }
|
|
12
|
-
.
|
|
13
|
-
.
|
|
14
|
-
.
|
|
15
|
-
.
|
|
16
|
-
.
|
|
17
|
-
.
|
|
12
|
+
.entity-card { margin: 0 0 1rem 0; padding: 0.6rem 0.9rem; border: 1px solid #ddd; border-radius: 4px; background: #fff }
|
|
13
|
+
.entity-card-header { font-size: 14px; margin-bottom: 0.4rem; color: #444 }
|
|
14
|
+
.entity-card-header code { background: #f5f5f5; padding: 2px 4px; border-radius: 2px; font-size: 12px }
|
|
15
|
+
.entity-card table { border-collapse: collapse; font-size: 13px }
|
|
16
|
+
.entity-card td { padding: 3px 8px 3px 0; vertical-align: middle }
|
|
17
|
+
.entity-card td:first-child { color: #666; white-space: nowrap; padding-right: 1rem }
|
|
18
18
|
.value-text { display: inline-block; max-width: 60ch; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; vertical-align: middle }
|
|
19
19
|
.source-tag { display: inline-block; padding: 1px 6px; margin-left: 4px; border-radius: 3px; background: #eee; color: #666; font-size: 11px; vertical-align: middle }
|
|
20
20
|
.flip { margin-right: 8px; white-space: nowrap }
|
package/webapp/vite.js
CHANGED
|
@@ -16,7 +16,7 @@ export function serveInstanceData({ root = process.cwd() } = {}) {
|
|
|
16
16
|
// Own the 404: falling through would hit the SPA fallback, which
|
|
17
17
|
// serves index.html with 200 — instanceData would parse HTML as TTL.
|
|
18
18
|
if (!existsSync(file)) { res.statusCode = 404; return res.end() }
|
|
19
|
-
res.setHeader("Content-Type", { js: "text/javascript", md: "text/markdown" }[rel.split(".").pop()] ?? "text/turtle")
|
|
19
|
+
res.setHeader("Content-Type", { js: "text/javascript", md: "text/markdown", sparql: "application/sparql-query" }[rel.split(".").pop()] ?? "text/turtle")
|
|
20
20
|
res.end(readFileSync(file))
|
|
21
21
|
}
|
|
22
22
|
return {
|