@directory-builder/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +112 -0
  3. package/bin/cli.js +38 -0
  4. package/example/README.md +64 -0
  5. package/example/config/federation.ttl +136 -0
  6. package/example/config/match-knowledge.ttl +8 -0
  7. package/example/sources/cityopen/clean.sparql +17 -0
  8. package/example/sources/cityopen/fetch.js +14 -0
  9. package/example/sources/cityopen/static/libraries.json +32 -0
  10. package/example/sources/civichub/clean.sparql +34 -0
  11. package/example/sources/civichub/fetch.js +14 -0
  12. package/example/sources/civichub/static/libraries.json +38 -0
  13. package/package.json +38 -0
  14. package/src/federate.js +571 -0
  15. package/src/index.js +6 -0
  16. package/src/ingest.js +158 -0
  17. package/src/lift/html.sparql +12 -0
  18. package/src/lift/json.sparql +12 -0
  19. package/src/pipeline.js +16 -0
  20. package/src/utils.js +152 -0
  21. package/src/webapp.js +41 -0
  22. package/webapp/index.html +11 -0
  23. package/webapp/src/About.jsx +24 -0
  24. package/webapp/src/App.jsx +96 -0
  25. package/webapp/src/Card.jsx +32 -0
  26. package/webapp/src/ColumnGraph.jsx +290 -0
  27. package/webapp/src/Directory.jsx +15 -0
  28. package/webapp/src/Download.jsx +174 -0
  29. package/webapp/src/MapGraph.jsx +244 -0
  30. package/webapp/src/MatchGraph.jsx +137 -0
  31. package/webapp/src/MergeTables.jsx +61 -0
  32. package/webapp/src/OrgCard.jsx +126 -0
  33. package/webapp/src/Pipeline.jsx +41 -0
  34. package/webapp/src/Query.jsx +165 -0
  35. package/webapp/src/Sources.jsx +52 -0
  36. package/webapp/src/instanceData.js +35 -0
  37. package/webapp/src/loadMap.js +276 -0
  38. package/webapp/src/loadMatch.js +228 -0
  39. package/webapp/src/loadMerge.js +93 -0
  40. package/webapp/src/loadPipeline.js +130 -0
  41. package/webapp/src/loadSources.js +102 -0
  42. package/webapp/src/main.jsx +9 -0
  43. package/webapp/src/mergeOrgs.js +15 -0
  44. package/webapp/src/sourceMeta.js +81 -0
  45. package/webapp/src/styles.css +23 -0
  46. package/webapp/vite.config.js +14 -0
  47. package/webapp/vite.js +28 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Civic Data Lab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,112 @@
1
+ # directory-builder-core
2
+
3
+ Use-case-agnostic engine for config-driven federation pipelines: fetch
4
+ heterogeneous sources, lift them to RDF, clean, map them onto a unified target
5
+ schema, then match, merge and resolve them into one federated directory.
6
+
7
+ An **instance** is a repo holding only declarative config and per-source
8
+ artefacts — no engine code:
9
+
10
+ ```
11
+ config/
12
+ federation.ttl # the decisions: sources + facts, target schemas,
13
+ # field mappings, match/merge/resolve rules
14
+ match-knowledge.ttl # curated owl:sameAs pairs
15
+ sources/<name>/
16
+ fetch.js # how to fetch this source
17
+ clean.sparql # how to clean its lifted RDF
18
+ static/ # the data itself, for static-file sources
19
+ webapp/
20
+ content/about.md # optional: the webapp's About page prose
21
+ exporters/<name>.js # optional: output adapters the webapp loads at runtime
22
+ ```
23
+
24
+ The `webapp/` half is entirely optional — a pipeline-only instance is just
25
+ `config/` + `sources/`, producing `data/` for downstream use.
26
+
27
+ Everything else follows by convention from the source names. The discovery
28
+ rule: a named open set (sources, exporters) is declared in federation.ttl and
29
+ its files follow by convention; a single well-known slot (the About page)
30
+ works by file presence alone. See [`example/`](example/) for a runnable
31
+ instance and the full data flow.
32
+
33
+ ## Prerequisites
34
+
35
+ - Node.js
36
+ - Java (for [SPARQL Anything](https://github.com/SPARQL-Anything/sparql.anything),
37
+ auto-downloaded on first run)
38
+
39
+ ## Run a pipeline
40
+
41
+ Two ways — both run the same engines, rooted at the instance directory.
42
+
43
+ Via command (root = where you invoke):
44
+
45
+ ```sh
46
+ npm install @directory-builder/core
47
+ npx directory-builder # full pipeline: ingest + federate
48
+ npx directory-builder ingest # fetch + lift only
49
+ npx directory-builder federate # clean → map → match → merge → resolve only
50
+ ```
51
+
52
+ Or programmatically:
53
+
54
+ ```js
55
+ import { Pipeline } from "@directory-builder/core"
56
+
57
+ const pipeline = new Pipeline() // root defaults to process.cwd()
58
+ await pipeline.run() // ingest + federate
59
+ ```
60
+
61
+ `new Pipeline({ root })` points the engines at an instance directory other
62
+ than the cwd — e.g. for driving several instances from one process or a test
63
+ fixture.
64
+
65
+ Each source's `fetch.js` is invoked as `node fetch.js <outDir> <fetchUrl-or-staticDir>
66
+ <runParamsJson>` — the JSON holds all `:hasRunParam` values grouped by name;
67
+ each fetcher picks the parameters it needs.
68
+
69
+ Engines journal their executed steps as p-plan RDF (`data/ingest/ingest-log.ttl`,
70
+ `data/pipeline/federate-log.ttl`) — evidence of what ran, not a plan.
71
+
72
+ ## Run the webapp
73
+
74
+ The webapp ships with the package; it fetches an instance's `config/` +
75
+ `data/` at runtime, so one app serves every use case and instances hold no
76
+ webapp code. From an instance directory:
77
+
78
+ ```sh
79
+ npx directory-builder webapp # dev server
80
+ npx directory-builder webapp build --base /repo/ # production build → webapp/dist/
81
+ ```
82
+
83
+ `webapp build` stages the instance's `config/`, `data/` and
84
+ `webapp/{content,exporters}/` into `webapp/dist/` next to the bundle —
85
+ `webapp/dist/` is the complete site, ready to publish as-is.
86
+
87
+ For webapp development in this repo:
88
+
89
+ ```sh
90
+ npm run webapp # dev server on example/
91
+ INSTANCE=../sosuse-directory-builder npm run webapp # any other instance dir
92
+ ```
93
+
94
+ Instances own the About page by providing `webapp/content/about.md` (markdown,
95
+ served and deployed like config and data); without one, a generic default
96
+ renders. Declaring `:federation :repository "https://github.com/…"` adds the
97
+ GitHub links (nav, static-source folders); without it they stay hidden.
98
+
99
+ Instances can inject **exporters** — output adapters mapping the directory
100
+ into an external schema. The federation declares them (`:federation
101
+ :hasExporter "x"`), the module lives at `webapp/exporters/x.js` in the instance
102
+ (served and deployed like config and data), and the Download page loads it at
103
+ runtime: it exports `label` / `filename` / `mime` and
104
+ `build(finalTtl, toolkit)`, where the toolkit passes in helpers like
105
+ `sparqlSelect`, since a runtime-loaded module cannot resolve bare imports.
106
+
107
+ Browser-safe helpers (TTL parsing, path conventions, journal vocabulary) are
108
+ exported separately so bundlers never see the engines' Node imports:
109
+
110
+ ```js
111
+ import { CDP, parseTtl, PATHS } from "@directory-builder/core/utils"
112
+ ```
package/bin/cli.js ADDED
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env node
2
+ // Config-only entry: a default Pipeline rooted at the invoking instance
3
+ // (npm runs scripts with cwd = the instance's package dir, so a downstream
4
+ // repo needs nothing but config/ + sources/ and this on its PATH).
5
+ // directory-builder run the full pipeline (ingest + federate)
6
+ // directory-builder ingest fetch + lift only
7
+ // directory-builder federate clean → map → match → merge → resolve only
8
+ // directory-builder webapp dev server for the instance's webapp
9
+ // directory-builder webapp build [--base /x/] build the webapp → <instance>/webapp/dist/
10
+
11
+ import { webappBuild, webappDev } from "../src/webapp.js"
12
+ import { Pipeline } from "../src/pipeline.js"
13
+
14
+ const [cmd = "run", ...rest] = process.argv.slice(2)
15
+ const flag = (name) => {
16
+ const i = rest.indexOf(`--${name}`)
17
+ return i >= 0 ? rest[i + 1] : rest.find((a) => a.startsWith(`--${name}=`))?.split("=")[1]
18
+ }
19
+
20
+ const pipeline = new Pipeline()
21
+ const commands = {
22
+ run: () => pipeline.run(),
23
+ ingest: () => pipeline.ingest(),
24
+ federate: () => pipeline.federate(),
25
+ webapp: () => {
26
+ if (rest[0] && rest[0] !== "build") {
27
+ console.error(`Unknown webapp subcommand "${rest[0]}" — expected "build" or nothing (dev server)`)
28
+ process.exit(1)
29
+ }
30
+ return rest[0] === "build" ? webappBuild(process.cwd(), { base: flag("base") }) : webappDev()
31
+ },
32
+ }
33
+
34
+ if (!commands[cmd]) {
35
+ console.error(`Unknown command "${cmd}" — expected one of: ${Object.keys(commands).join(", ")}`)
36
+ process.exit(1)
37
+ }
38
+ await commands[cmd]()
@@ -0,0 +1,64 @@
1
+ # Example use case
2
+
3
+ A minimal, self-contained example use case that exercises the whole pipeline with two
4
+ **static file sources** — fictional library directories (`cityopen`, `civichub`)
5
+ with **deliberately different schemas** that partly overlap, so the field
6
+ mappings, then Match/Merge/Resolve, all have visible work to do.
7
+
8
+ It doubles as the engine's smoke test and as the dataset a scaffolded use case can start from.
9
+
10
+ ## Layout
11
+
12
+ ```
13
+ example/
14
+ config/
15
+ federation.ttl # the decisions: sources + facts, one schema:Organization
16
+ # target, field mappings, match/merge/resolve rules
17
+ match-knowledge.ttl # curated owl:sameAs pairs (empty here)
18
+ sources/
19
+ cityopen/ { fetch.js, clean.sparql, static/libraries.json }
20
+ civichub/ { fetch.js, clean.sparql, static/libraries.json }
21
+ ```
22
+
23
+ That's everything a use case is: config + one folder per source. Each source
24
+ folder is self-contained — how to fetch it, how to clean it, and (for static
25
+ sources) the data itself.
26
+
27
+ ## Run it
28
+
29
+ ```shell
30
+ npm install
31
+ npm run example
32
+ ```
33
+
34
+ To browse the result, `npm run webapp` serves the webapp against this example.
35
+
36
+ Or programmatically:
37
+
38
+ ```js
39
+ import { Pipeline } from "../src/pipeline.js" // from the package: @directory-builder/core
40
+
41
+ await new Pipeline({ root: "example/" }).run() // root defaults to process.cwd()
42
+ ```
43
+
44
+ A downstream use-case repo does the same with the published package: depend on
45
+ `@directory-builder/core` and call `npx directory-builder` — no engine code in
46
+ the use case.
47
+
48
+ Outputs land in `data/` (git-ignored, regenerable):
49
+
50
+ ```
51
+ data/ingest/raw/<source>/ raw JSON copied in by fetch.js
52
+ data/ingest/lifted/<source>/ RDF after the generic JSON lift
53
+ data/ingest/ingest-log.ttl journaled fetch/lift steps + harvest times
54
+ data/pipeline/cleaned/<source>.ttl
55
+ data/pipeline/mapped.ttl schema: vocabulary, both sources
56
+ data/pipeline/matches.ttl cross-source match evidence
57
+ data/pipeline/merged.ttl clustered, minted cluster IRIs
58
+ data/pipeline/provenance.ttl which source said what
59
+ data/pipeline/final.ttl one resolved record per organisation
60
+ data/pipeline/federate-log.ttl journaled clean→…→resolve steps
61
+ ```
62
+
63
+ The two `*-log.ttl` files are the engines' p-plan step journals — written as a
64
+ side effect of execution, they are what the webapp's Pipeline page renders.
@@ -0,0 +1,136 @@
1
+ @prefix : <https://civic-data.de/pipeline#> .
2
+ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
3
+ @prefix schema: <http://schema.org/> .
4
+ @prefix foaf: <http://xmlns.com/foaf/0.1/> .
5
+ @prefix prov: <http://www.w3.org/ns/prov#> .
6
+ @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
7
+ @prefix ft: <http://publications.europa.eu/resource/authority/file-type/> .
8
+
9
+ # A minimal single-entity federation: two directories of libraries, merged into
10
+ # one set of schema:Organization records. Config declares decisions only —
11
+ # sources and their facts, target schema, mappings, match/merge/resolve rules.
12
+ # The engines own the step shape (fetch → lift, clean → map → match → merge →
13
+ # resolve) and resolve all file paths by convention from the source names.
14
+ # Add target schemas / mappings / match rules to model more entity types
15
+ # (e.g. a Service tier) and their relationships.
16
+
17
+ :federation a :Federation ;
18
+ :hasSource :cityopenSource, :civichubSource ;
19
+ :hasTargetSchema :organisationSchema ;
20
+ :hasMatchRule :organisationMatch ;
21
+ :hasMergeRule :merge ;
22
+ :hasResolveRule :resolve .
23
+
24
+ # ---- Target schema ------------------------------------------------------
25
+
26
+ :organisationSchema a :TargetSchema ;
27
+ :targetClass schema:Organization ;
28
+ rdfs:label "Organisation"@en ;
29
+ :hasTargetField :t-identifier, :t-name, :t-streetAddress, :t-postalCode,
30
+ :t-locality, :t-telephone, :t-email, :t-homepage .
31
+
32
+ # :multiValued - one value per contributing source is expected; the Merge view doesn't flag these as conflicts
33
+ :t-identifier a :TargetField ; :targetPredicate schema:identifier ; :multiValued true .
34
+ :t-name a :TargetField ; :targetPredicate schema:name .
35
+ :t-streetAddress a :TargetField ; :targetPredicate schema:streetAddress .
36
+ :t-postalCode a :TargetField ; :targetPredicate schema:postalCode .
37
+ :t-locality a :TargetField ; :targetPredicate schema:addressLocality .
38
+ :t-telephone a :TargetField ; :targetPredicate schema:telephone .
39
+ :t-email a :TargetField ; :targetPredicate schema:email .
40
+ :t-homepage a :TargetField ; :targetPredicate foaf:homepage .
41
+
42
+ # ---- Sources ------------------------------------------------------------
43
+ # Both are static-file sources: no :fetchUrl, so the engine hands their
44
+ # fetch.js the sources/<name>/static/ dir instead of a URL. :format drives
45
+ # which bundled lift query applies. :fieldPath is the JSON key as it appears
46
+ # after lifting (xyz:<fieldPath>).
47
+
48
+ :cityopenSource a :Source ;
49
+ rdfs:label "CityOpen Data" ;
50
+ skos:notation "co" ;
51
+ :retrieval "Files" ;
52
+ :format ft:JSON ;
53
+ :hasField :co-id, :co-name, :co-street, :co-postalCode, :co-city, :co-phone, :co-email, :co-website .
54
+
55
+ :co-id a :SourceField ; :fieldPath "id" .
56
+ :co-name a :SourceField ; :fieldPath "name" .
57
+ :co-street a :SourceField ; :fieldPath "street" .
58
+ :co-postalCode a :SourceField ; :fieldPath "postalCode" .
59
+ :co-city a :SourceField ; :fieldPath "city" .
60
+ :co-phone a :SourceField ; :fieldPath "phone" .
61
+ :co-email a :SourceField ; :fieldPath "email" .
62
+ :co-website a :SourceField ; :fieldPath "website" .
63
+
64
+ # A deliberately different shape from cityopen: German keys and a nested
65
+ # `anschrift` (address) object whose parts are reached via :hasSubField.
66
+ :civichubSource a :Source ;
67
+ rdfs:label "CivicHub Directory" ;
68
+ skos:notation "ch" ;
69
+ :retrieval "Files" ;
70
+ :format ft:JSON ;
71
+ :hasField :ch-uid, :ch-bezeichnung, :ch-telefon, :ch-email, :ch-webseite, :ch-anschrift .
72
+
73
+ :ch-uid a :SourceField ; :fieldPath "uid" .
74
+ :ch-bezeichnung a :SourceField ; :fieldPath "bezeichnung" .
75
+ :ch-telefon a :SourceField ; :fieldPath "telefon" .
76
+ :ch-email a :SourceField ; :fieldPath "email" .
77
+ :ch-webseite a :SourceField ; :fieldPath "webseite" .
78
+
79
+ :ch-anschrift a :SourceField ; :fieldPath "anschrift" ;
80
+ :hasSubField :ch-strasse, :ch-plz, :ch-ort .
81
+ :ch-strasse a :SubField ; :fieldPath "strasse" .
82
+ :ch-plz a :SubField ; :fieldPath "plz" .
83
+ :ch-ort a :SubField ; :fieldPath "ort" .
84
+
85
+ # ---- Mappings -----------------------------------------------------------
86
+
87
+ :cityopen-mapping a :Mapping ;
88
+ :fromSource :cityopenSource ;
89
+ :toTarget :organisationSchema ;
90
+ :sourceGraph <urn:source:cityopen> ;
91
+ :hasFieldMapping
92
+ [ :from :co-id ; :to :t-identifier ] ,
93
+ [ :from :co-name ; :to :t-name ] ,
94
+ [ :from :co-street ; :to :t-streetAddress ] ,
95
+ [ :from :co-postalCode ; :to :t-postalCode ] ,
96
+ [ :from :co-city ; :to :t-locality ] ,
97
+ [ :from :co-phone ; :to :t-telephone ] ,
98
+ [ :from :co-email ; :to :t-email ] ,
99
+ [ :from :co-website ; :to :t-homepage ] .
100
+
101
+ :civichub-mapping a :Mapping ;
102
+ :fromSource :civichubSource ;
103
+ :toTarget :organisationSchema ;
104
+ :sourceGraph <urn:source:civichub> ;
105
+ :hasFieldMapping
106
+ [ :from :ch-uid ; :to :t-identifier ] ,
107
+ [ :from :ch-bezeichnung ; :to :t-name ] ,
108
+ [ :from :ch-strasse ; :to :t-streetAddress ] ,
109
+ [ :from :ch-plz ; :to :t-postalCode ] ,
110
+ [ :from :ch-ort ; :to :t-locality ] ,
111
+ [ :from :ch-telefon ; :to :t-telephone ] ,
112
+ [ :from :ch-email ; :to :t-email ] ,
113
+ [ :from :ch-webseite ; :to :t-homepage ] .
114
+
115
+ # ---- Match --------------------------------------------------------------
116
+ # Records match when their postal code is identical (hard gate) and their names
117
+ # are fuzzily similar enough that the weighted score clears :minScore.
118
+
119
+ :organisationMatch a :MatchRule ;
120
+ :forTarget :organisationSchema ;
121
+ :targetNamespace "https://example.org/directory#" ;
122
+ :mintedSubjectPrefix "org-" ;
123
+ :hasHardCriterion [ :on schema:postalCode ] ;
124
+ :minScore 0.5 ;
125
+ :hasWeightedCriterion [ :on schema:name ; :weight 1.0 ] .
126
+
127
+ # ---- Merge --------------------------------------------------------------
128
+
129
+ :merge a :MergeRule ;
130
+ :originPredicate prov:wasDerivedFrom .
131
+
132
+ # ---- Resolve ------------------------------------------------------------
133
+ # One value per predicate per merged record; alphabeticFirst is deterministic.
134
+
135
+ :resolve a :ResolveRule ;
136
+ :defaultStrategy :alphabeticFirst .
@@ -0,0 +1,8 @@
1
+ @prefix : <https://civic-data.de/pipeline#> .
2
+ @prefix owl: <http://www.w3.org/2002/07/owl#> .
3
+
4
+ # Curated match knowledge for the example. Empty — the two sources match on
5
+ # postal code + fuzzy name alone. This is where hand-curated owl:sameAs pairs
6
+ # would go, forcing two records into one cluster regardless of score:
7
+ #
8
+ # :cityopen-co-3 owl:sameAs :civichub-ch-102 .
@@ -0,0 +1,17 @@
1
+ # Skolemise each library object from its JSON `id` into a stable
2
+ # cdp:cityopen-<id> IRI, copy its scalar fields verbatim, and tag the source.
3
+ # The lift produced one blank node per array element, all in xyz:/fx: vocabulary;
4
+ # clean stays in xyz:/cdp: — schema: vocabulary only enters at the map step.
5
+
6
+ PREFIX xyz: <http://sparql.xyz/facade-x/data/>
7
+ PREFIX cdp: <https://civic-data.de/pipeline#>
8
+
9
+ CONSTRUCT {
10
+ ?org cdp:fromSource cdp:cityopenSource ;
11
+ ?p ?o .
12
+ } WHERE {
13
+ ?node xyz:id ?id ;
14
+ ?p ?o .
15
+ FILTER(isLiteral(?o))
16
+ BIND(IRI(CONCAT(STR(cdp:), "cityopen-", STR(?id))) AS ?org)
17
+ }
@@ -0,0 +1,14 @@
1
+ import path from "path"
2
+ import fs from "fs"
3
+
4
+ // Static-file source: copy the committed JSON straight into the ingest area.
5
+ // A live source would instead call an API here and write the responses out.
6
+ // argv: [outDir, sourceDir, runParamsJson] — params unused for this static example.
7
+ const OUT_DIR = process.argv[2]
8
+ const SRC_DIR = process.argv[3]
9
+
10
+ fs.mkdirSync(OUT_DIR, { recursive: true })
11
+ for (const f of fs.readdirSync(SRC_DIR).filter((f) => f.endsWith(".json"))) {
12
+ fs.copyFileSync(path.join(SRC_DIR, f), path.join(OUT_DIR, f))
13
+ console.log(` ${f} → ${OUT_DIR}`)
14
+ }
@@ -0,0 +1,32 @@
1
+ [
2
+ {
3
+ "id": "co-1",
4
+ "name": "Central City Library",
5
+ "street": "Main Street 1",
6
+ "postalCode": "10115",
7
+ "city": "Berlin",
8
+ "phone": "+49 30 1110001",
9
+ "email": "info@central-library.example",
10
+ "website": "https://central-library.example"
11
+ },
12
+ {
13
+ "id": "co-2",
14
+ "name": "Riverside Branch Library",
15
+ "street": "River Road 22",
16
+ "postalCode": "10243",
17
+ "city": "Berlin",
18
+ "phone": "+49 30 1110002",
19
+ "email": "hello@riverside.example",
20
+ "website": "https://riverside.example"
21
+ },
22
+ {
23
+ "id": "co-3",
24
+ "name": "Old Town Reading Room",
25
+ "street": "Market Square 5",
26
+ "postalCode": "10178",
27
+ "city": "Berlin",
28
+ "phone": "+49 30 1110003",
29
+ "email": "kontakt@oldtown.example",
30
+ "website": "https://oldtown.example"
31
+ }
32
+ ]
@@ -0,0 +1,34 @@
1
+ # Skolemise each entry from its JSON `uid` into a stable cdp:civichub-<id> IRI.
2
+ # This source has a different shape from cityopen: German keys and a nested
3
+ # `anschrift` (address) object. Clean copies the top-level scalars and carries
4
+ # the address node through unflattened (as xyz:anschrift) — the map reads its
5
+ # parts via :hasSubField. Clean stays in xyz:/cdp:; schema: enters at the map.
6
+
7
+ PREFIX xyz: <http://sparql.xyz/facade-x/data/>
8
+ PREFIX cdp: <https://civic-data.de/pipeline#>
9
+
10
+ CONSTRUCT {
11
+ ?org cdp:fromSource cdp:civichubSource ;
12
+ xyz:uid ?uid ;
13
+ xyz:bezeichnung ?bezeichnung ;
14
+ xyz:telefon ?telefon ;
15
+ xyz:email ?email ;
16
+ xyz:webseite ?webseite ;
17
+ xyz:anschrift ?addr .
18
+ ?addr xyz:strasse ?strasse ;
19
+ xyz:plz ?plz ;
20
+ xyz:ort ?ort .
21
+ } WHERE {
22
+ ?node xyz:uid ?uid .
23
+ BIND(IRI(CONCAT(STR(cdp:), "civichub-", STR(?uid))) AS ?org)
24
+ OPTIONAL { ?node xyz:bezeichnung ?bezeichnung }
25
+ OPTIONAL { ?node xyz:telefon ?telefon }
26
+ OPTIONAL { ?node xyz:email ?email }
27
+ OPTIONAL { ?node xyz:webseite ?webseite }
28
+ OPTIONAL {
29
+ ?node xyz:anschrift ?addr .
30
+ OPTIONAL { ?addr xyz:strasse ?strasse }
31
+ OPTIONAL { ?addr xyz:plz ?plz }
32
+ OPTIONAL { ?addr xyz:ort ?ort }
33
+ }
34
+ }
@@ -0,0 +1,14 @@
1
+ import path from "path"
2
+ import fs from "fs"
3
+
4
+ // Static-file source: copy the committed JSON straight into the ingest area.
5
+ // A live source would instead call an API here and write the responses out.
6
+ // argv: [outDir, sourceDir, runParamsJson] — params unused for this static example.
7
+ const OUT_DIR = process.argv[2]
8
+ const SRC_DIR = process.argv[3]
9
+
10
+ fs.mkdirSync(OUT_DIR, { recursive: true })
11
+ for (const f of fs.readdirSync(SRC_DIR).filter((f) => f.endsWith(".json"))) {
12
+ fs.copyFileSync(path.join(SRC_DIR, f), path.join(OUT_DIR, f))
13
+ console.log(` ${f} → ${OUT_DIR}`)
14
+ }
@@ -0,0 +1,38 @@
1
+ [
2
+ {
3
+ "uid": "ch-100",
4
+ "bezeichnung": "Central City Library (Main Branch)",
5
+ "anschrift": {
6
+ "strasse": "Main Street 1",
7
+ "plz": "10115",
8
+ "ort": "Berlin"
9
+ },
10
+ "telefon": "030 / 111 0001",
11
+ "email": "info@central-library.example",
12
+ "webseite": "http://www.central-library.example"
13
+ },
14
+ {
15
+ "uid": "ch-101",
16
+ "bezeichnung": "Riverside Library",
17
+ "anschrift": {
18
+ "strasse": "River Rd. 22",
19
+ "plz": "10243",
20
+ "ort": "Berlin"
21
+ },
22
+ "telefon": "030 111 0002",
23
+ "email": "team@riverside.example",
24
+ "webseite": "https://riverside.example"
25
+ },
26
+ {
27
+ "uid": "ch-102",
28
+ "bezeichnung": "Northside Community Library",
29
+ "anschrift": {
30
+ "strasse": "North Avenue 9",
31
+ "plz": "13347",
32
+ "ort": "Berlin"
33
+ },
34
+ "telefon": "+49 30 1110102",
35
+ "email": "mail@northside.example",
36
+ "webseite": "https://northside.example"
37
+ }
38
+ ]
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "@directory-builder/core",
3
+ "version": "0.1.0",
4
+ "description": "Use-case-agnostic engine for config-driven federation pipelines",
5
+ "author": "Civic Data Lab",
6
+ "repository": "github:foederierter-datenpool/directory-builder-core",
7
+ "license": "MIT",
8
+ "type": "module",
9
+ "scripts": {
10
+ "example": "cd example && node ../bin/cli.js",
11
+ "webapp": "vite webapp",
12
+ "webapp:build": "vite build webapp"
13
+ },
14
+ "bin": {
15
+ "directory-builder": "bin/cli.js"
16
+ },
17
+ "exports": {
18
+ ".": "./src/index.js",
19
+ "./utils": "./src/utils.js"
20
+ },
21
+ "publishConfig": {
22
+ "access": "public"
23
+ },
24
+ "dependencies": {
25
+ "@foerderfunke/sem-ops-utils": "^0.5.1",
26
+ "@vitejs/plugin-react": "^6.0.1",
27
+ "@xyflow/react": "^12.10.2",
28
+ "@zazuko/yasgui": "^4.6.1",
29
+ "fuzzball": "^2.2.6",
30
+ "lucide-react": "^1.14.0",
31
+ "marked": "^18.0.5",
32
+ "n3": "^2.0.3",
33
+ "react": "^19.2.5",
34
+ "react-dom": "^19.2.5",
35
+ "react-router-dom": "^7.14.1",
36
+ "vite": "^8.0.9"
37
+ }
38
+ }