@directory-builder/core 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,11 +11,16 @@ artefacts — no engine code:
11
11
  config/
12
12
  federation.ttl # the decisions: sources + facts, target schemas,
13
13
  # field mappings, match/merge/resolve rules
14
- match-knowledge.ttl # curated owl:sameAs pairs
14
+ match-knowledge.ttl # optional: curated owl:sameAs pairs
15
15
  sources/<name>/
16
16
  fetch.js # how to fetch this source
17
17
  clean.sparql # how to clean its lifted RDF
18
18
  static/ # the data itself, for static-file sources
19
+ registry/
20
+ identity.ttl # engine-maintained: minted entity IRIs and their
21
+ # source members — accumulated state, commit it
22
+ history.ttl # engine-maintained: append-only log of identity
23
+ # events (mint, member joined)
19
24
  webapp/
20
25
  content/about.md # optional: the webapp's About page prose
21
26
  exporters/<name>.js # optional: output adapters the webapp loads at runtime
@@ -64,7 +69,12 @@ fixture.
64
69
 
65
70
  Each source's `fetch.js` is invoked as `node fetch.js <outDir> <fetchUrl-or-staticDir>
66
71
  <runParamsJson>` — the JSON holds all `:hasRunParam` values grouped by name;
67
- each fetcher picks the parameters it needs.
72
+ each fetcher picks the parameters it needs. For static-file sources `fetch.js`
73
+ is optional: without one, the default fetch copies `sources/<name>/static/`
74
+ verbatim. `clean.sparql` is likewise optional when the source maps a field to
75
+ `schema:identifier`: the engine derives a default clean from that mapping —
76
+ skolemise on the identifier field, copy the scalar fields — and puts the
77
+ resolved query on record under `data/pipeline/default-clean-queries/`.
68
78
 
69
79
  A source declared with `:enabled false` stays in the config but is skipped by
70
80
  the engines and hidden from the webapp's Sources page — e.g. while its files
@@ -73,6 +83,19 @@ aren't available yet.
73
83
  Engines journal their executed steps as p-plan RDF (`data/ingest/ingest-log.ttl`,
74
84
  `data/pipeline/federate-log.ttl`) — evidence of what ran, not a plan.
75
85
 
86
+ Minting is write-once: the match step keeps an identity registry
87
+ (`registry/identity.ttl`, created on the first run) assigning each source
88
+ record to its minted entity IRI. A cluster with a known member reuses the
89
+ registered IRI, so identities survive re-harvests however membership evolves;
90
+ only unseen entities mint fresh. Alongside it, `registry/history.ttl` is an
91
+ append-only log of identity events (mint, member joined) grouped under a
92
+ timestamped `:Revision` node per changing run — the registry's provenance,
93
+ where the snapshot in `identity.ttl` came from. Both are written only when
94
+ something changes, so a no-op harvest leaves them — and their git diff —
95
+ untouched, and the revision counter only advances when identity actually moves.
96
+ Unlike `data/`, the registry is accumulated state, not derived output — commit
97
+ it, and review its diff after each harvest.
98
+
76
99
  ## Run the webapp
77
100
 
78
101
  The webapp ships with the package; it fetches an instance's `config/` +
@@ -2,13 +2,12 @@
2
2
  @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
3
3
  @prefix schema: <http://schema.org/> .
4
4
  @prefix foaf: <http://xmlns.com/foaf/0.1/> .
5
- @prefix prov: <http://www.w3.org/ns/prov#> .
6
5
  @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
7
6
  @prefix ft: <http://publications.europa.eu/resource/authority/file-type/> .
8
7
 
9
8
  # A minimal single-entity federation: two directories of libraries, merged into
10
9
  # one set of schema:Organization records. Config declares decisions only —
11
- # sources and their facts, target schema, mappings, match/merge/resolve rules.
10
+ # sources and their facts, target schema, mappings, match/resolve rules.
12
11
  # The engines own the step shape (fetch → lift, clean → map → match → merge →
13
12
  # resolve) and resolve all file paths by convention from the source names.
14
13
  # Add target schemas / mappings / match rules to model more entity types
@@ -18,7 +17,6 @@
18
17
  :hasSource :cityopenSource, :civichubSource ;
19
18
  :hasTargetSchema :organisationSchema ;
20
19
  :hasMatchRule :organisationMatch ;
21
- :hasMergeRule :merge ;
22
20
  :hasResolveRule :resolve .
23
21
 
24
22
  # ---- Target schema ------------------------------------------------------
@@ -87,7 +85,6 @@
87
85
  :cityopen-mapping a :Mapping ;
88
86
  :fromSource :cityopenSource ;
89
87
  :toTarget :organisationSchema ;
90
- :sourceGraph <urn:source:cityopen> ;
91
88
  :hasFieldMapping
92
89
  [ :from :co-id ; :to :t-identifier ] ,
93
90
  [ :from :co-name ; :to :t-name ] ,
@@ -101,7 +98,6 @@
101
98
  :civichub-mapping a :Mapping ;
102
99
  :fromSource :civichubSource ;
103
100
  :toTarget :organisationSchema ;
104
- :sourceGraph <urn:source:civichub> ;
105
101
  :hasFieldMapping
106
102
  [ :from :ch-uid ; :to :t-identifier ] ,
107
103
  [ :from :ch-bezeichnung ; :to :t-name ] ,
@@ -124,11 +120,6 @@
124
120
  :minScore 0.5 ;
125
121
  :hasWeightedCriterion [ :on schema:name ; :weight 1.0 ] .
126
122
 
127
- # ---- Merge --------------------------------------------------------------
128
-
129
- :merge a :MergeRule ;
130
- :originPredicate prov:wasDerivedFrom .
131
-
132
123
  # ---- Resolve ------------------------------------------------------------
133
124
  # One value per predicate per merged record; alphabeticFirst is deterministic.
134
125
 
package/package.json CHANGED
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "@directory-builder/core",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "Use-case-agnostic engine for config-driven federation pipelines",
5
5
  "author": "Civic Data Lab",
6
6
  "repository": "github:foederierter-datenpool/directory-builder-core",
7
7
  "license": "MIT",
8
8
  "type": "module",
9
9
  "scripts": {
10
- "test": "node --test",
10
+ "test": "node --test 'test/*.test.js'",
11
11
  "example": "cd example && node ../bin/cli.js",
12
12
  "webapp": "vite webapp",
13
13
  "webapp:build": "vite build webapp"
@@ -0,0 +1,19 @@
1
+ # Default clean, applied when a source ships no clean.sparql: skolemise each
2
+ # record from its identifier field (the source field mapped to the target
3
+ # schema's schema:identifier) into a stable cdp:__name__-<id> IRI, copy its
4
+ # scalar fields verbatim, and tag the source. The engine fills __source__,
5
+ # __name__ and __idPath__ from federation.ttl and puts the resolved query on
6
+ # record under data/pipeline/default-clean-queries/.
7
+
8
+ PREFIX xyz: <http://sparql.xyz/facade-x/data/>
9
+ PREFIX cdp: <https://civic-data.de/pipeline#>
10
+
11
+ CONSTRUCT {
12
+ ?record cdp:fromSource __source__ ;
13
+ ?p ?o .
14
+ } WHERE {
15
+ ?node xyz:__idPath__ ?id ;
16
+ ?p ?o .
17
+ FILTER(isLiteral(?o))
18
+ BIND(IRI(CONCAT(STR(cdp:), "__name__-", STR(?id))) AS ?record)
19
+ }
@@ -25,18 +25,20 @@ const df = DataFactory
25
25
  export async function federate(root = process.cwd()) {
26
26
  const abs = (p) => path.join(root, p)
27
27
  const federationTtl = fs.readFileSync(abs(PATHS.federation), "utf8")
28
- const defStore = storeFromTurtles([federationTtl, fs.readFileSync(abs(PATHS.matchKnowledge), "utf8")])
29
- const sources = enabledSources(parseTtl(federationTtl))
28
+ // match-knowledge.ttl (curated owl:sameAs pairs) is optional — no file, no manual matches.
29
+ const matchKnowledge = fs.existsSync(abs(PATHS.matchKnowledge)) ? [fs.readFileSync(abs(PATHS.matchKnowledge), "utf8")] : []
30
+ const defStore = storeFromTurtles([federationTtl, ...matchKnowledge])
31
+ const federationQuads = parseTtl(federationTtl)
32
+ const sources = enabledSources(federationQuads)
30
33
 
31
34
  const store = newStore()
32
35
  const journal = stepJournal()
33
- const ctx = { store, defStore, abs }
36
+ const ctx = { store, defStore, abs, quads: federationQuads }
34
37
 
35
38
  const cleanSteps = []
36
39
  for (const src of sources) {
37
- const name = sourceName(src)
38
- cleanSteps.push(await journal.step("clean", { source: src, after: [stepIri("lift", name)] },
39
- () => runClean(ctx, name)))
40
+ cleanSteps.push(await journal.step("clean", { source: src, after: [stepIri("lift", sourceName(src))] },
41
+ () => runClean(ctx, src)))
40
42
  }
41
43
 
42
44
  // Load each source's cleaned TTL into its own graph — plain mechanics, not a
@@ -56,7 +58,7 @@ export async function federate(root = process.cwd()) {
56
58
  await writeTurtleFile(abs(PATHS.mapped), mappedQuads, { ...COMMON_PREFIXES, cdp: CDP })
57
59
  console.log(`map: wrote ${mappedQuads.length} triples → ${PATHS.mapped}`)
58
60
  })
59
- const matchStep = await journal.step("match", { after: [mapStep] }, () => runMatch(ctx, PATHS.matches))
61
+ const matchStep = await journal.step("match", { after: [mapStep] }, () => runMatch(ctx, PATHS.matches, PATHS.registry, PATHS.registryHistory))
60
62
  const mergeStep = await journal.step("merge", { after: [matchStep] }, () => runMerge(ctx, PATHS.merged, PATHS.provenance))
61
63
  await journal.step("resolve", { after: [mergeStep] }, () => runResolve(ctx, PATHS.final))
62
64
 
@@ -1,13 +1,21 @@
1
1
  import { sparqlConstruct, storeFromTurtles } from "@foerderfunke/sem-ops-utils"
2
+ import { CDP, identifierFieldPath, PATHS, sourceName } from "../../utils.js"
2
3
  import { writeTurtleFile } from "../write-turtle.js"
3
- import { CDP, PATHS } from "../../utils.js"
4
4
  import path from "path"
5
5
  import fs from "fs"
6
6
 
7
+ // The default clean ships with the engine, like the lift queries.
8
+ const DEFAULT_CLEAN = path.join(import.meta.dirname, "../../clean/default.sparql")
9
+
7
10
  // Clean step: the source's clean.sparql reshapes its lifted RDF into
8
11
  // federation subjects (xyz:/cdp: vocabulary only — schema: enters at map).
9
- export const runClean = async ({ abs }, name) => {
10
- const cleanQuery = fs.readFileSync(abs(PATHS.cleanQuery(name)), "utf8")
12
+ // clean.sparql is optional when the source maps a field to schema:identifier:
13
+ // the engine then derives the default clean from that mapping.
14
+ export const runClean = async ({ abs, quads }, sourceIri) => {
15
+ const name = sourceName(sourceIri)
16
+ const cleanQuery = fs.existsSync(abs(PATHS.cleanQuery(name)))
17
+ ? fs.readFileSync(abs(PATHS.cleanQuery(name)), "utf8")
18
+ : defaultClean({ abs, quads }, sourceIri, name)
11
19
  const inDir = PATHS.lifted(name)
12
20
  const outPath = PATHS.cleaned(name)
13
21
  // Run CONSTRUCT per file so each lifted TTL stays isolated in its
@@ -25,3 +33,18 @@ export const runClean = async ({ abs }, name) => {
25
33
  cdp: CDP,
26
34
  })
27
35
  }
36
+
37
+ // No clean.sparql given: resolve the engine's default template with the
38
+ // source's identifier field as skolem key, and put the applied query on
39
+ // record under data/ — no silent fallbacks.
40
+ const defaultClean = ({ abs, quads }, sourceIri, name) => {
41
+ const idPath = identifierFieldPath(quads, sourceIri)
42
+ if (!idPath) throw new Error(`${PATHS.cleanQuery(name)} missing and no schema:identifier mapping to derive the default clean from`)
43
+ const query = fs.readFileSync(DEFAULT_CLEAN, "utf8")
44
+ .replaceAll("__source__", `<${sourceIri}>`).replaceAll("__name__", name).replaceAll("__idPath__", idPath)
45
+ const outPath = abs(PATHS.defaultCleanQuery(name))
46
+ fs.mkdirSync(path.dirname(outPath), { recursive: true })
47
+ fs.writeFileSync(outPath, query)
48
+ console.log(`clean ${name} default (id field: ${idPath}) → ${PATHS.defaultCleanQuery(name)}`)
49
+ return query
50
+ }
@@ -6,7 +6,8 @@ import fs from "fs"
6
6
  // Fetch step: run the source's fetch.js. Live sources pass their :fetchUrl;
7
7
  // static-file sources pass the absolute static dir instead — the script gets
8
8
  // whichever applies, plus the federation's run params as one JSON argument.
9
- // Returns the harvest record for the ingest log.
9
+ // fetch.js is optional for static sources: without it, the default fetch
10
+ // copies static/ verbatim. Returns the harvest record for the ingest log.
10
11
  export const runFetch = ({ abs, root }, { name, fetchUrl, paramsJson }) => {
11
12
  const outDir = PATHS.raw(name)
12
13
  const origin = fetchUrl ?? abs(PATHS.staticDir(name))
@@ -14,7 +15,9 @@ export const runFetch = ({ abs, root }, { name, fetchUrl, paramsJson }) => {
14
15
  // Clear any prior output first, so changed run params (or changed records) can't leave stale files behind
15
16
  fs.rmSync(abs(outDir), { recursive: true, force: true })
16
17
  fs.mkdirSync(abs(outDir), { recursive: true })
17
- run("node", [abs(PATHS.fetchScript(name)), abs(outDir), origin, paramsJson])
18
+ const script = abs(PATHS.fetchScript(name))
19
+ if (fs.existsSync(script)) run("node", [script, abs(outDir), origin, paramsJson])
20
+ else localCopyFallback({ name, fetchUrl, origin, outDir: abs(outDir) })
18
21
  const harvest = { time: new Date().toISOString() }
19
22
  // Static sources have no live harvest — record the files' git commit
20
23
  // time instead (the freshness the Sources page shows for them).
@@ -24,3 +27,10 @@ export const runFetch = ({ abs, root }, { name, fetchUrl, paramsJson }) => {
24
27
  } catch { /* not committed yet / no git → omit */ }
25
28
  return harvest
26
29
  }
30
+
31
+ // Fallback when a source ships no dedicated fetch.js: static sources get
32
+ // their static/ dir copied verbatim; live sources have no fallback yet.
33
+ const localCopyFallback = ({ name, fetchUrl, origin, outDir }) => {
34
+ if (fetchUrl) throw new Error(`${PATHS.fetchScript(name)} missing (no default fetch for live sources yet)`)
35
+ fs.cpSync(origin, outDir, { recursive: true })
36
+ }
@@ -1,5 +1,5 @@
1
1
  import { sparqlInsertDelete, sparqlSelect } from "@foerderfunke/sem-ops-utils"
2
- import { buildPrefixBlock, CDP, PATHS, shrink, sourceName } from "../../utils.js"
2
+ import { buildPrefixBlock, CDP, PATHS, shrink, sourceGraph, sourceName } from "../../utils.js"
3
3
  import { DataFactory } from "n3"
4
4
  import path from "path"
5
5
  import fs from "fs"
@@ -88,10 +88,9 @@ ${insertBlock}
88
88
  export const runMap = async ({ store, defStore, abs }, queriesDir) => {
89
89
  const mappings = await sparqlSelect(`
90
90
  PREFIX : <${CDP}>
91
- SELECT ?mapping ?source ?sourceGraph ?target ?targetClass WHERE {
91
+ SELECT ?mapping ?source ?target ?targetClass WHERE {
92
92
  ?mapping a :Mapping ;
93
93
  :fromSource ?source .
94
- OPTIONAL { ?mapping :sourceGraph ?sourceGraph }
95
94
  OPTIONAL { ?mapping :toTarget ?target }
96
95
  OPTIONAL { ?mapping :toTarget/:targetClass ?targetClass }
97
96
  } ORDER BY ?mapping`, [defStore])
@@ -108,9 +107,11 @@ export const runMap = async ({ store, defStore, abs }, queriesDir) => {
108
107
  OPTIONAL { ?parent :hasSubField ?src . ?parent :fieldPath ?parentPath }
109
108
  }`, [defStore])
110
109
 
111
- if (directRows.length && m.sourceGraph) {
110
+ if (directRows.length) {
112
111
  const localName = m.mapping.split("#").pop()
113
- const query = buildDirectInsert(m, directRows)
112
+ // The mapping's source graph follows by convention from :fromSource —
113
+ // the load step names it the same way.
114
+ const query = buildDirectInsert({ ...m, sourceGraph: sourceGraph(sourceName(m.source)) }, directRows)
114
115
  const queryPath = abs(path.join(queriesDir, `${localName}.sparql`))
115
116
  fs.mkdirSync(path.dirname(queryPath), { recursive: true })
116
117
  fs.writeFileSync(queryPath, query)
@@ -139,9 +140,9 @@ export const runMap = async ({ store, defStore, abs }, queriesDir) => {
139
140
  // the merge step rewrites them to the minted cluster IRIs.
140
141
  const linkRows = await sparqlSelect(`
141
142
  PREFIX : <${CDP}>
142
- SELECT ?mapping ?sourceGraph ?fromSchema ?sourcePredicate ?targetPredicate ?toSchema WHERE {
143
+ SELECT ?mapping ?source ?fromSchema ?sourcePredicate ?targetPredicate ?toSchema WHERE {
143
144
  ?mapping a :Mapping ;
144
- :sourceGraph ?sourceGraph ;
145
+ :fromSource ?source ;
145
146
  :toTarget ?fromSchema ;
146
147
  :hasRelationship ?rel .
147
148
  ?rel :sourcePredicate ?sourcePredicate ;
@@ -160,7 +161,7 @@ INSERT {
160
161
  ?from ${short(rel.targetPredicate)} ?to .
161
162
  }
162
163
  } WHERE {
163
- GRAPH <${rel.sourceGraph}> {
164
+ GRAPH <${sourceGraph(sourceName(rel.source))}> {
164
165
  ?from ${short(rel.sourcePredicate)} ?to ;
165
166
  cdp:targetSchema ${short(rel.fromSchema)} .
166
167
  ?to cdp:targetSchema ${short(rel.toSchema)} .
@@ -1,10 +1,11 @@
1
1
  import { sparqlSelect } from "@foerderfunke/sem-ops-utils"
2
2
  import { COMMON_PREFIXES, writeTurtleFile } from "../write-turtle.js"
3
3
  import { MAPPED_GRAPH } from "./map.js"
4
- import { CDP } from "../../utils.js"
4
+ import { CDP, parseTtl, shrink } from "../../utils.js"
5
5
  import { token_set_ratio } from "fuzzball"
6
6
  import { DataFactory } from "n3"
7
7
  import { createHash } from "crypto"
8
+ import fs from "fs"
8
9
 
9
10
  const df = DataFactory
10
11
 
@@ -22,7 +23,28 @@ const MATCH_CLUSTER = df.namedNode(CDP + "MatchCluster")
22
23
  const SIMILARITY_ALGORITHM = "token_set_ratio"
23
24
  const similarity = (a, b) => token_set_ratio(a ?? "", b ?? "") / 100
24
25
 
25
- export const runMatch = async ({ store, defStore, abs }, outPath) => {
26
+ export const runMatch = async ({ store, defStore, abs }, outPath, registryPath, historyPath) => {
27
+ // The identity registry (minted IRI :hasMember source IRI, one assignment
28
+ // per member) makes minting write-once: an entity's IRI is computed at
29
+ // most once — at first sight — recorded here, and afterwards only looked
30
+ // up, so membership can change without identity churn. Instance state to
31
+ // commit, neither config nor regenerable data; empty on a fresh instance.
32
+ const registry = new Map() // member source IRI → minted IRI
33
+ if (fs.existsSync(abs(registryPath))) {
34
+ for (const q of parseTtl(fs.readFileSync(abs(registryPath), "utf8"))) {
35
+ if (q.predicate.value === HAS_MEMBER.value) registry.set(q.object.value, q.subject.value)
36
+ }
37
+ }
38
+ const reserved = new Set(registry.values()) // every IRI ever minted — never mint one again
39
+ const known = new Set(registry.keys()) // members assigned in a prior run
40
+ const taken = new Set() // minted IRIs claimed by a cluster this run
41
+ let reusedCount = 0, mintedCount = 0
42
+ // Identity events this run, appended to history.ttl (the registry's
43
+ // provenance): when each entity was first minted, gained a member, or
44
+ // absorbed/split off another. Append-only and written only when non-empty,
45
+ // so a no-change harvest leaves the file — and its git diff — untouched.
46
+ const events = []
47
+
26
48
  // One match rule per target schema; each rule scores its own fields, mints
27
49
  // with its own prefix, and clusters only subjects of its :targetClass.
28
50
  const rules = await sparqlSelect(`
@@ -169,8 +191,39 @@ export const runMatch = async ({ store, defStore, abs }, outPath) => {
169
191
  let multiSource = 0
170
192
  const clusterIriByRoot = new Map()
171
193
  for (const members of clusterMembers) {
172
- const id = createHash("sha1").update(members.join("|")).digest("hex").slice(0, 12)
173
- const minted = df.namedNode(namespace + mintedPrefix + id)
194
+ // Reconcile against the registry: any member already known → its
195
+ // entity exists, reuse the IRI (clusters come largest-first, so on
196
+ // a split the larger fragment keeps the identity). Only unseen
197
+ // entities mint, seeded by their smallest member at mint time — a
198
+ // one-time uniqueness seed, not a content address: the registry
199
+ // pins the IRI afterwards, however membership evolves.
200
+ const prior = [...new Set(members.map(m => registry.get(m)).filter(Boolean))].sort()
201
+ const free = prior.filter(iri => !taken.has(iri))
202
+ let minted
203
+ // TODO: merge and split (prior carrying ≥2 IRIs in the reuse branch,
204
+ // or any prior in the mint branch) are reconciled correctly — a
205
+ // survivor keeps the IRI — but their history events (:Merged /
206
+ // :Split) and the tombstone they imply (the retired IRI preserved
207
+ // with :isReplacedBy, rather than silently vanishing from
208
+ // identity.ttl) are their own rung. For now they only warn.
209
+ if (free.length) {
210
+ minted = df.namedNode(free[0])
211
+ reusedCount++
212
+ const joined = members.filter(m => !known.has(m))
213
+ if (joined.length) events.push({ type: "MemberJoined", entity: free[0], member: joined })
214
+ if (prior.length > 1) console.warn(`match: clusters merged (${prior.join(" + ")}) — keeping ${free[0]}`)
215
+ } else {
216
+ if (prior.length) console.warn(`match: cluster split off ${prior.join(", ")} — minting fresh`)
217
+ let id = createHash("sha1").update(members[0]).digest("hex").slice(0, 12)
218
+ // Seed collision (e.g. a split remainder re-hashing its old anchor): re-hash until free.
219
+ while (taken.has(namespace + mintedPrefix + id) || reserved.has(namespace + mintedPrefix + id))
220
+ id = createHash("sha1").update(id).digest("hex").slice(0, 12)
221
+ minted = df.namedNode(namespace + mintedPrefix + id)
222
+ mintedCount++
223
+ if (!prior.length) events.push({ type: "Minted", entity: minted.value, member: members })
224
+ }
225
+ taken.add(minted.value)
226
+ for (const m of members) registry.set(m, minted.value)
174
227
  clusterIriByRoot.set(find(members[0]), minted)
175
228
  if (members.length > 1) multiSource++
176
229
  store.addQuad(df.quad(minted, RDF_TYPE, MATCH_CLUSTER, MATCH_GRAPH))
@@ -209,4 +262,42 @@ export const runMatch = async ({ store, defStore, abs }, outPath) => {
209
262
  const matchQuads = store.getQuads(null, null, null, MATCH_GRAPH)
210
263
  await writeTurtleFile(abs(outPath), matchQuads, { cdp: CDP, cdf: rules[0].ns, ...COMMON_PREFIXES })
211
264
  console.log(`match: wrote cluster log → ${outPath}`)
265
+
266
+ await writeTurtleFile(abs(registryPath), [...registry].map(([member, minted]) =>
267
+ df.quad(df.namedNode(minted), HAS_MEMBER, df.namedNode(member))), { cdp: CDP, cdf: rules[0].ns })
268
+ console.log(`match: identity registry ${reusedCount} reused, ${mintedCount} minted → ${registryPath}`)
269
+
270
+ // Append this run's events to the history (the registry's provenance) as
271
+ // one :Revision node carrying the timestamp, with each event hung off it as
272
+ // a nested [entity ; members] binding under a type predicate (cdp:minted /
273
+ // cdp:memberJoined). Revisions count only changing runs — a no-op harvest
274
+ // appends nothing — so the next number is one past the highest on file. The
275
+ // whole block is one append, so the named :Revision and its fresh blank
276
+ // nodes never collide with earlier revisions when the file is re-parsed.
277
+ if (events.length) {
278
+ const prefixes = { cdp: CDP, cdf: rules[0].ns }
279
+ const sh = (iri) => shrink(iri, prefixes)
280
+ const list = (arr) => arr.map(sh).join(", ")
281
+ const existing = fs.existsSync(abs(historyPath)) ? fs.readFileSync(abs(historyPath), "utf8") : ""
282
+ const rev = Math.max(0, ...[...existing.matchAll(/revision-(\d+)/g)].map(m => +m[1])) + 1
283
+
284
+ const byPredicate = new Map() // cdp:minted / cdp:memberJoined → binding strings
285
+ for (const e of events) {
286
+ const pred = "cdp:" + e.type[0].toLowerCase() + e.type.slice(1)
287
+ if (!byPredicate.has(pred)) byPredicate.set(pred, [])
288
+ byPredicate.get(pred).push(`[ cdp:entity ${sh(e.entity)} ; cdp:member ${list(e.member)} ]`)
289
+ }
290
+ const props = [...byPredicate].map(([pred, bindings]) =>
291
+ ` ${pred}\n ${bindings.join(" ,\n ")}`).join(" ;\n")
292
+ const block = `cdp:revision-${rev} a cdp:Revision ; prov:atTime "${new Date().toISOString()}"^^xsd:dateTime ;\n${props} .\n`
293
+
294
+ const header = `@prefix cdp: <${CDP}> .
295
+ @prefix cdf: <${rules[0].ns}> .
296
+ @prefix prov: <http://www.w3.org/ns/prov#> .
297
+ @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
298
+
299
+ `
300
+ fs.appendFileSync(abs(historyPath), (existing ? "\n" : header) + block)
301
+ console.log(`match: revision ${rev} — ${events.length} identity event(s) → ${historyPath}`)
302
+ }
212
303
  }
@@ -11,22 +11,22 @@ export const MERGED_GRAPH = df.namedNode("urn:merged")
11
11
 
12
12
  const RDF_REIFIES = df.namedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#reifies")
13
13
 
14
+ // Engine invariant, mirrored by the webapp's loadMerge: each derivation's
15
+ // origin hangs off its reifier via prov:wasDerivedFrom.
16
+ const PROV_DERIVED_FROM = df.namedNode("http://www.w3.org/ns/prov#wasDerivedFrom")
17
+
14
18
  export const runMerge = async ({ store, defStore, abs }, outPath, provOutPath) => {
15
19
  const [cfg] = await sparqlSelect(`
16
20
  PREFIX : <${CDP}>
17
- SELECT ?ns ?originPred WHERE {
18
- ?match a :MatchRule ; :targetNamespace ?ns .
19
- ?merge a :MergeRule ; :originPredicate ?originPred .
20
- }`, [defStore])
21
- if (!cfg) throw new Error(":MergeRule / :MatchRule config missing in federation.ttl")
22
- const { ns: namespace, originPred } = cfg
21
+ SELECT ?ns WHERE { ?match a :MatchRule ; :targetNamespace ?ns . }`, [defStore])
22
+ if (!cfg) throw new Error(":MatchRule config missing in federation.ttl")
23
+ const namespace = cfg.ns
23
24
 
24
25
  const memberQuads = store.getQuads(null, HAS_MEMBER, null, MATCH_GRAPH)
25
26
  const mintedFor = new Map()
26
27
  for (const mq of memberQuads) mintedFor.set(mq.object.value, mq.subject)
27
28
 
28
29
  const fedQuads = store.getQuads(null, null, null, MAPPED_GRAPH)
29
- const originPredNode = df.namedNode(originPred)
30
30
  const provQuads = []
31
31
  for (const qu of fedQuads) {
32
32
  const minted = mintedFor.get(qu.subject.value)
@@ -43,7 +43,7 @@ export const runMerge = async ({ store, defStore, abs }, outPath, provOutPath) =
43
43
  // per-derivation metadata (time, confidence) has a home when needed.
44
44
  const reifier = df.blankNode()
45
45
  provQuads.push(df.quad(reifier, RDF_REIFIES, df.quad(minted, qu.predicate, object)))
46
- provQuads.push(df.quad(reifier, originPredNode, qu.subject))
46
+ provQuads.push(df.quad(reifier, PROV_DERIVED_FROM, qu.subject))
47
47
  }
48
48
 
49
49
  const mergedQuads = store.getQuads(null, null, null, MERGED_GRAPH)
@@ -26,11 +26,12 @@ export const runResolve = async ({ store, defStore, abs }, outPath) => {
26
26
  const [cfg] = await sparqlSelect(`
27
27
  PREFIX : <${CDP}>
28
28
  SELECT ?strategy ?ns WHERE {
29
- ?resolve a :ResolveRule ; :defaultStrategy ?strategy .
30
- ?match a :MatchRule ; :targetNamespace ?ns .
29
+ ?match a :MatchRule ; :targetNamespace ?ns .
30
+ OPTIONAL { ?resolve a :ResolveRule ; :defaultStrategy ?strategy }
31
31
  }`, [defStore])
32
- if (!cfg) throw new Error(":ResolveRule config missing in federation.ttl")
33
- const defaultPick = lookupStrategy(cfg.strategy)
32
+ if (!cfg) throw new Error(":MatchRule config missing in federation.ttl")
33
+ // No :ResolveRule (or none with a :defaultStrategy) → alphabeticFirst.
34
+ const defaultPick = lookupStrategy(cfg.strategy ?? `${CDP}alphabeticFirst`)
34
35
 
35
36
  const overrideRows = await sparqlSelect(`
36
37
  PREFIX : <${CDP}>
package/src/utils.js CHANGED
@@ -56,6 +56,8 @@ export const LIFTED_FORMAT = "http://publications.europa.eu/resource/authority/f
56
56
  export const PATHS = {
57
57
  federation: "config/federation.ttl",
58
58
  matchKnowledge: "config/match-knowledge.ttl",
59
+ registry: "registry/identity.ttl",
60
+ registryHistory: "registry/history.ttl",
59
61
  about: "webapp/content/about.md",
60
62
  query: "webapp/content/query.sparql",
61
63
  fetchScript: (name) => `sources/${name}/fetch.js`,
@@ -69,6 +71,7 @@ export const PATHS = {
69
71
  ingestLog: "data/ingest/ingest-log.ttl",
70
72
  federateLog: "data/pipeline/federate-log.ttl",
71
73
  mappingQueries: "data/pipeline/direct-mapping-queries/",
74
+ defaultCleanQuery: (name) => `data/pipeline/default-clean-queries/${name}.sparql`,
72
75
  mapped: "data/pipeline/mapped.ttl",
73
76
  matches: "data/pipeline/matches.ttl",
74
77
  merged: "data/pipeline/merged.ttl",
@@ -115,6 +118,18 @@ export const enabledSources = (quads) => {
115
118
  return objectsOf(quads, `${CDP}hasSource`).filter((iri) => !disabled.has(iri))
116
119
  }
117
120
 
121
+ // The source's skolem key for the default clean: the :fieldPath of the source
122
+ // field whose mapping points at the target field with :targetPredicate
123
+ // schema:identifier. Undefined when the source declares no such mapping.
124
+ export const identifierFieldPath = (quads, sourceIri) => {
125
+ const o = (s, p) => quads.filter((q) => q.subject.value === s && q.predicate.value === `${CDP}${p}`).map((q) => q.object.value)
126
+ for (const m of quads.filter((q) => q.predicate.value === `${CDP}fromSource` && q.object.value === sourceIri).map((q) => q.subject.value)) {
127
+ for (const fm of o(m, "hasFieldMapping")) {
128
+ if (o(o(fm, "to")[0], "targetPredicate")[0] === "http://schema.org/identifier") return o(o(fm, "from")[0], "fieldPath")[0]
129
+ }
130
+ }
131
+ }
132
+
118
133
  // Set of subjects typed `rdf:type typeIri`. Iteration order = encounter order.
119
134
  export function subjectsOfType(quads, typeIri) {
120
135
  const out = new Set()
package/src/validate.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { buildValidator, turtleToDataset } from "@foerderfunke/sem-ops-utils"
2
- import { CDP, objectsOf, parseTtl, PATHS, shrink, sourceName } from "./utils.js"
2
+ import { CDP, identifierFieldPath, objectsOf, parseTtl, PATHS, shrink, sourceName } from "./utils.js"
3
3
  import path from "path"
4
4
  import fs from "fs"
5
5
 
@@ -19,23 +19,27 @@ export async function validate(root = process.cwd()) {
19
19
  return (await Promise.all(checks.map((check) => check(ctx)))).flat()
20
20
  }
21
21
 
22
- // Every :hasSource in federation.ttl has its sources/<name>/ folder with
23
- // fetch.js + clean.sparql - and no folder exists that the federation doesn't
24
- // declare. Checks all declared sources, enabled or not: folder presence is a
25
- // repo-layout contract.
22
+ // Every :hasSource in federation.ttl has what its engine steps need: a
23
+ // fetch.js or static/ to default to, a clean.sparql or a schema:identifier
24
+ // mapping to derive the default clean from - and no sources/ folder exists
25
+ // that the federation doesn't declare. Checks all declared sources, enabled
26
+ // or not: folder presence is a repo-layout contract.
26
27
  function sourcesFoldersInSync({ abs, quads }) {
27
- const declared = objectsOf(quads, `${CDP}hasSource`).map(sourceName)
28
+ const declared = objectsOf(quads, `${CDP}hasSource`)
28
29
  const problems = []
29
- for (const name of declared) {
30
- for (const file of [PATHS.fetchScript(name), PATHS.cleanQuery(name)]) {
31
- if (!fs.existsSync(abs(file))) problems.push(`${file} missing`)
32
- }
30
+ for (const iri of declared) {
31
+ const name = sourceName(iri)
32
+ if (![PATHS.fetchScript(name), PATHS.staticDir(name)].some((f) => fs.existsSync(abs(f))))
33
+ problems.push(`${PATHS.fetchScript(name)} missing and no ${PATHS.staticDir(name)} to default to`)
34
+ if (!fs.existsSync(abs(PATHS.cleanQuery(name))) && !identifierFieldPath(quads, iri))
35
+ problems.push(`${PATHS.cleanQuery(name)} missing and no schema:identifier mapping to derive the default clean from`)
33
36
  }
37
+ const declaredNames = declared.map(sourceName)
34
38
  const folders = fs.existsSync(abs("sources"))
35
39
  ? fs.readdirSync(abs("sources"), { withFileTypes: true }).filter((d) => d.isDirectory()).map((d) => d.name)
36
40
  : []
37
41
  for (const name of folders) {
38
- if (!declared.includes(name)) problems.push(`sources/${name}/ has no :hasSource declaration in ${PATHS.federation}`)
42
+ if (!declaredNames.includes(name)) problems.push(`sources/${name}/ has no :hasSource declaration in ${PATHS.federation}`)
39
43
  }
40
44
  return problems
41
45
  }
@@ -0,0 +1,24 @@
1
+ import { PATHS } from "@directory-builder/core/utils"
2
+ import path from "path"
3
+ import fs from "fs"
4
+
5
+ // SPARQL Anything cache shared with example/ — a fixture's tools/ symlinks
6
+ // here, so test runs never re-download the jar.
7
+ const TOOLS_CACHE = path.join(import.meta.dirname, "../../example/tools")
8
+
9
+ // Materialize an in-test instance definition (federation.ttl string + records
10
+ // per source) into test/tmp/<name>/ — a real instance folder the engines run
11
+ // against, wiped at setup and left in place afterwards for inspection.
12
+ export const makeInstance = (name, { federation, sources }) => {
13
+ const root = path.join(import.meta.dirname, "../tmp", name)
14
+ fs.rmSync(root, { recursive: true, force: true })
15
+ fs.mkdirSync(path.join(root, "config"), { recursive: true })
16
+ fs.writeFileSync(path.join(root, PATHS.federation), federation)
17
+ for (const [source, records] of Object.entries(sources)) {
18
+ fs.mkdirSync(path.join(root, PATHS.staticDir(source)), { recursive: true })
19
+ fs.writeFileSync(path.join(root, PATHS.staticDir(source), "data.json"), JSON.stringify(records, null, 4))
20
+ }
21
+ fs.mkdirSync(TOOLS_CACHE, { recursive: true })
22
+ fs.symlinkSync(TOOLS_CACHE, path.join(root, "tools"))
23
+ return root
24
+ }
@@ -0,0 +1,182 @@
1
+ import { CDP, parseTtl, PATHS } from "@directory-builder/core/utils"
2
+ import { Pipeline, validate } from "@directory-builder/core"
3
+ import { makeInstance } from "./helpers/instance.js"
4
+ import assert from "node:assert/strict"
5
+ import { test } from "node:test"
6
+ import path from "path"
7
+ import fs from "fs"
8
+
9
+ // ---- Shared fixture: the ultra-minimal instance both tests run on ----------
10
+ // federation.ttl + two static JSON sources, nothing else — fetch, clean and
11
+ // resolve all run on engine defaults. The sources share one record by name
12
+ // ("Entry One"), so the pipeline should merge a1+b1 and leave a2 and b2 as
13
+ // their own entities.
14
+
15
+ const federation = `
16
+ @prefix : <https://civic-data.de/pipeline#> .
17
+ @prefix schema: <http://schema.org/> .
18
+ @prefix ft: <http://publications.europa.eu/resource/authority/file-type/> .
19
+
20
+ :federation a :Federation ;
21
+ :hasSource :alphaSource, :betaSource .
22
+
23
+ :thingSchema a :TargetSchema ;
24
+ :targetClass schema:Thing .
25
+
26
+ :t-id a :TargetField ; :targetPredicate schema:identifier .
27
+ :t-name a :TargetField ; :targetPredicate schema:name .
28
+
29
+ :alphaSource a :Source ; :format ft:JSON .
30
+ :betaSource a :Source ; :format ft:JSON .
31
+
32
+ :alpha-id a :SourceField ; :fieldPath "id" .
33
+ :alpha-name a :SourceField ; :fieldPath "name" .
34
+ :beta-id a :SourceField ; :fieldPath "id" .
35
+ :beta-label a :SourceField ; :fieldPath "label" .
36
+
37
+ :alpha-mapping a :Mapping ; :fromSource :alphaSource ; :toTarget :thingSchema ;
38
+ :hasFieldMapping [ :from :alpha-id ; :to :t-id ] , [ :from :alpha-name ; :to :t-name ] .
39
+
40
+ :beta-mapping a :Mapping ; :fromSource :betaSource ; :toTarget :thingSchema ;
41
+ :hasFieldMapping [ :from :beta-id ; :to :t-id ] , [ :from :beta-label ; :to :t-name ] .
42
+
43
+ :match a :MatchRule ;
44
+ :forTarget :thingSchema ;
45
+ :targetNamespace "urn:test:" ;
46
+ :mintedSubjectPrefix "thing-" ;
47
+ :minScore 1.0 ;
48
+ :hasWeightedCriterion [ :on schema:name ; :weight 1.0 ] .
49
+ `
50
+
51
+ const alpha = [
52
+ { id: "a1", name: "Entry One" },
53
+ { id: "a2", name: "Entry Two" },
54
+ ]
55
+ const beta = [
56
+ { id: "b1", label: "Entry One" },
57
+ { id: "b2", label: "Entry Three" },
58
+ ]
59
+
60
+ // The consumer-facing artifact the shared fixture resolves to (both tests).
61
+ const expectedFinal = `@prefix schema: <http://schema.org/>.
62
+ @prefix foaf: <http://xmlns.com/foaf/0.1/>.
63
+ @prefix dct: <http://purl.org/dc/terms/>.
64
+ @prefix cdf: <urn:test:>.
65
+
66
+ cdf:thing-5a45645edb31 a schema:Thing;
67
+ schema:name "Entry Two".
68
+ cdf:thing-d1583c098826 a schema:Thing;
69
+ schema:name "Entry Three".
70
+ cdf:thing-e427416d02ac a schema:Thing;
71
+ schema:name "Entry One".
72
+ `
73
+
74
+ // ---- Test 1: the whole pipeline on defaults --------------------------------
75
+
76
+ test("the tiny fixture validates and runs the whole pipeline on defaults", async () => {
77
+ const root = makeInstance("tiny", { federation, sources: { alpha, beta } })
78
+ // the fixture satisfies the instance contract (folders, derivable defaults, shape)
79
+ assert.deepEqual(await validate(root), [])
80
+ await new Pipeline({ root }).run()
81
+ const finalTtl = fs.readFileSync(path.join(root, PATHS.final), "utf8")
82
+ const final = parseTtl(finalTtl)
83
+ // match merged a1+b1 on their identical name; a2 and b2 stay their own entities
84
+ const subjects = new Set(final.map((q) => q.subject.value))
85
+ assert.equal(subjects.size, 3, "a1+b1 merge, a2 and b2 stay alone")
86
+ // entity IRIs are minted from the match rule's :targetNamespace + :mintedSubjectPrefix
87
+ for (const s of subjects) assert.match(s, /^urn:test:thing-/)
88
+ // map carried both sources' name fields through, resolve kept one value per entity
89
+ const names = final.filter((q) => q.predicate.value === "http://schema.org/name").map((q) => q.object.value)
90
+ assert.deepEqual(names.toSorted(), ["Entry One", "Entry Three", "Entry Two"])
91
+ // and the consumer-facing artifact as a whole
92
+ assert.equal(finalTtl, expectedFinal)
93
+ })
94
+
95
+ // ---- Test 2: periodic harvesting & the identity registry -------------------
96
+
97
+ // The identity registry the first harvest writes: each minted IRI's source
98
+ // members, the write-once record later runs reconcile against.
99
+ const expectedRegistry = `@prefix cdp: <https://civic-data.de/pipeline#>.
100
+ @prefix cdf: <urn:test:>.
101
+
102
+ cdf:thing-5a45645edb31 cdp:hasMember cdp:alpha-a2.
103
+ cdf:thing-d1583c098826 cdp:hasMember cdp:beta-b2.
104
+ cdf:thing-e427416d02ac cdp:hasMember cdp:alpha-a1, cdp:beta-b1.
105
+ `
106
+
107
+ // history.ttl events as {type, entity, member[], revision}: each event is a
108
+ // nested [entity ; members] binding hung off its :Revision node under a type
109
+ // predicate (cdp:minted / cdp:memberJoined). Timestamps vary per run, so the
110
+ // test asserts structure, not bytes.
111
+ const RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
112
+ const EVENT_PREDS = { minted: "Minted", memberJoined: "MemberJoined" }
113
+ const parseEvents = (ttl) => {
114
+ const quads = parseTtl(ttl)
115
+ const events = []
116
+ for (const [local, type] of Object.entries(EVENT_PREDS)) {
117
+ for (const q of quads.filter((x) => x.predicate.value === CDP + local)) {
118
+ const node = q.object.value // the [entity ; members] binding's blank node
119
+ events.push({
120
+ type,
121
+ entity: quads.find((x) => x.subject.value === node && x.predicate.value === CDP + "entity")?.object.value,
122
+ member: quads.filter((x) => x.subject.value === node && x.predicate.value === CDP + "member")
123
+ .map((x) => x.object.value).toSorted(),
124
+ revision: q.subject.value, // the :Revision node the binding hangs off
125
+ })
126
+ }
127
+ }
128
+ return events
129
+ }
130
+ const revisionNodes = (ttl) => parseTtl(ttl)
131
+ .filter((q) => q.predicate.value === RDF_TYPE && q.object.value === CDP + "Revision")
132
+ .map((q) => q.subject.value).toSorted()
133
+
134
+ test("harvest rounds keep minted IRIs stable (write-once identity registry)", async () => {
135
+ const root = makeInstance("harvest", { federation, sources: { alpha, beta } })
136
+ const pipeline = new Pipeline({ root })
137
+ const writeSource = (name, records) =>
138
+ fs.writeFileSync(path.join(root, PATHS.staticDir(name), "data.json"), JSON.stringify(records, null, 4))
139
+ const artifact = (p) => fs.readFileSync(path.join(root, p), "utf8")
140
+ const id = (local) => "urn:test:" + local
141
+ const src = (local) => CDP + local
142
+
143
+ // round 1 — the first harvest mints the three identities into the registry,
144
+ // and opens the history with one Minted event apiece (the genesis record).
145
+ await pipeline.run()
146
+ assert.equal(artifact(PATHS.final), expectedFinal)
147
+ assert.equal(artifact(PATHS.registry), expectedRegistry)
148
+ const history1 = artifact(PATHS.registryHistory)
149
+ assert.deepEqual(parseEvents(history1).toSorted((a, b) => a.entity.localeCompare(b.entity)), [
150
+ { type: "Minted", entity: id("thing-5a45645edb31"), member: [src("alpha-a2")], revision: src("revision-1") },
151
+ { type: "Minted", entity: id("thing-d1583c098826"), member: [src("beta-b2")], revision: src("revision-1") },
152
+ { type: "Minted", entity: id("thing-e427416d02ac"), member: [src("alpha-a1"), src("beta-b1")], revision: src("revision-1") },
153
+ ])
154
+ assert.deepEqual(revisionNodes(history1), [src("revision-1")], "genesis opens revision 1")
155
+
156
+ // round 2 — harmless upstream edit: b2 renames to "Entry Drei", membership
157
+ // unchanged. The directory carries the new name under the same IRI, and both
158
+ // registry and history stay byte-identical (a no-change harvest, clean diff).
159
+ writeSource("beta", [beta[0], { id: "b2", label: "Entry Drei" }])
160
+ await pipeline.run()
161
+ const expectedRenamed = expectedFinal.replace(`"Entry Three"`, `"Entry Drei"`)
162
+ assert.equal(artifact(PATHS.final), expectedRenamed)
163
+ assert.equal(artifact(PATHS.registry), expectedRegistry)
164
+ assert.equal(artifact(PATHS.registryHistory), history1, "no event appended for a no-op harvest")
165
+
166
+ // round 3 — a new alpha record joins b2's cluster. alpha-a3 sorts before
167
+ // beta-b2, so a stateless smallest-member seed would re-mint here — only the
168
+ // registry lookup preserves the identity: the directory is unchanged, the
169
+ // entity just gained its second member, and history records exactly that.
170
+ writeSource("alpha", [...alpha, { id: "a3", name: "Entry Drei" }])
171
+ await pipeline.run()
172
+ assert.equal(artifact(PATHS.final), expectedRenamed)
173
+ assert.equal(artifact(PATHS.registry),
174
+ expectedRegistry.replace("cdp:beta-b2.", "cdp:beta-b2, cdp:alpha-a3."))
175
+ assert.ok(artifact(PATHS.registryHistory).startsWith(history1), "history only appends, never rewrites")
176
+ const inRev2 = parseEvents(artifact(PATHS.registryHistory)).filter((e) => e.revision === src("revision-2"))
177
+ assert.deepEqual(inRev2, [
178
+ { type: "MemberJoined", entity: id("thing-d1583c098826"), member: [src("alpha-a3")], revision: src("revision-2") },
179
+ ])
180
+ assert.deepEqual(revisionNodes(artifact(PATHS.registryHistory)), [src("revision-1"), src("revision-2")],
181
+ "the changing harvest opens revision 2; the no-op round 2 added none")
182
+ })
@@ -1,14 +0,0 @@
1
- import path from "path"
2
- import fs from "fs"
3
-
4
- // Static-file source: copy the committed JSON straight into the ingest area.
5
- // A live source would instead call an API here and write the responses out.
6
- // argv: [outDir, sourceDir, runParamsJson] — params unused for this static example.
7
- const OUT_DIR = process.argv[2]
8
- const SRC_DIR = process.argv[3]
9
-
10
- fs.mkdirSync(OUT_DIR, { recursive: true })
11
- for (const f of fs.readdirSync(SRC_DIR).filter((f) => f.endsWith(".json"))) {
12
- fs.copyFileSync(path.join(SRC_DIR, f), path.join(OUT_DIR, f))
13
- console.log(` ${f} → ${OUT_DIR}`)
14
- }
@@ -1,14 +0,0 @@
1
- import path from "path"
2
- import fs from "fs"
3
-
4
- // Static-file source: copy the committed JSON straight into the ingest area.
5
- // A live source would instead call an API here and write the responses out.
6
- // argv: [outDir, sourceDir, runParamsJson] — params unused for this static example.
7
- const OUT_DIR = process.argv[2]
8
- const SRC_DIR = process.argv[3]
9
-
10
- fs.mkdirSync(OUT_DIR, { recursive: true })
11
- for (const f of fs.readdirSync(SRC_DIR).filter((f) => f.endsWith(".json"))) {
12
- fs.copyFileSync(path.join(SRC_DIR, f), path.join(OUT_DIR, f))
13
- console.log(` ${f} → ${OUT_DIR}`)
14
- }