ldkit 2.5.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,508 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.shaclToSchema = shaclToSchema;
4
+ const n3_1 = require("n3");
5
+ const schema_to_script_js_1 = require("./schema_to_script.js");
6
+ const BUILTIN_NAMESPACE_IRIS = new Set(schema_to_script_js_1.NAMESPACES.map((n) => n.$iri));
7
+ const RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
8
+ const RDFS = "http://www.w3.org/2000/01/rdf-schema#";
9
+ const XSD = "http://www.w3.org/2001/XMLSchema#";
10
+ const SH = "http://www.w3.org/ns/shacl#";
11
+ const RDF_TYPE = `${RDF}type`;
12
+ const RDF_FIRST = `${RDF}first`;
13
+ const RDF_REST = `${RDF}rest`;
14
+ const RDF_NIL = `${RDF}nil`;
15
+ const RDF_LANG_STRING = `${RDF}langString`;
16
+ const XSD_STRING = `${XSD}string`;
17
+ const RDFS_CLASS = `${RDFS}Class`;
18
+ const SH_NODE_SHAPE = `${SH}NodeShape`;
19
+ const SH_TARGET_CLASS = `${SH}targetClass`;
20
+ const SH_PROPERTY = `${SH}property`;
21
+ const SH_PATH = `${SH}path`;
22
+ const SH_INVERSE_PATH = `${SH}inversePath`;
23
+ const SH_DATATYPE = `${SH}datatype`;
24
+ const SH_NODE_KIND = `${SH}nodeKind`;
25
+ const SH_IRI = `${SH}IRI`;
26
+ const SH_NODE = `${SH}node`;
27
+ const SH_CLASS = `${SH}class`;
28
+ const SH_MIN_COUNT = `${SH}minCount`;
29
+ const SH_MAX_COUNT = `${SH}maxCount`;
30
+ const SH_UNIQUE_LANG = `${SH}uniqueLang`;
31
+ const SH_AND = `${SH}and`;
32
+ const SH_OR = `${SH}or`;
33
+ const SH_IN = `${SH}in`;
34
+ // Numeric widening order: leftmost = widest.
35
+ const NUMERIC_WIDENING = [
36
+ `${XSD}decimal`,
37
+ `${XSD}double`,
38
+ `${XSD}float`,
39
+ `${XSD}long`,
40
+ `${XSD}integer`,
41
+ `${XSD}int`,
42
+ `${XSD}short`,
43
+ `${XSD}byte`,
44
+ `${XSD}nonNegativeInteger`,
45
+ `${XSD}positiveInteger`,
46
+ `${XSD}unsignedLong`,
47
+ `${XSD}unsignedInt`,
48
+ ];
49
+ function shaclToSchema(turtle, options = {}) {
50
+ const converter = new ShaclConverter(options);
51
+ return converter.process(turtle);
52
+ }
53
+ class ShaclConverter {
54
+ constructor(options = {}) {
55
+ Object.defineProperty(this, "store", {
56
+ enumerable: true,
57
+ configurable: true,
58
+ writable: true,
59
+ value: void 0
60
+ });
61
+ Object.defineProperty(this, "schemas", {
62
+ enumerable: true,
63
+ configurable: true,
64
+ writable: true,
65
+ value: []
66
+ });
67
+ Object.defineProperty(this, "shapeIriToName", {
68
+ enumerable: true,
69
+ configurable: true,
70
+ writable: true,
71
+ value: new Map()
72
+ });
73
+ Object.defineProperty(this, "usedNames", {
74
+ enumerable: true,
75
+ configurable: true,
76
+ writable: true,
77
+ value: new Set()
78
+ });
79
+ Object.defineProperty(this, "prefixMap", {
80
+ enumerable: true,
81
+ configurable: true,
82
+ writable: true,
83
+ value: {}
84
+ });
85
+ Object.defineProperty(this, "prefixAliases", {
86
+ enumerable: true,
87
+ configurable: true,
88
+ writable: true,
89
+ value: void 0
90
+ });
91
+ this.prefixAliases = options.prefixAliases ?? {};
92
+ }
93
+ process(turtle) {
94
+ this.parseWithPrefixes(turtle);
95
+ const shapeIris = this.findNodeShapes();
96
+ for (const shapeIri of shapeIris) {
97
+ const name = this.deriveSchemaName(shapeIri);
98
+ this.shapeIriToName.set(shapeIri, name);
99
+ }
100
+ const schemaSourcePrefixes = new Map();
101
+ for (const shapeIri of shapeIris) {
102
+ const schema = this.buildSchema(shapeIri);
103
+ this.schemas.push(schema);
104
+ const baseIri = schema.type[0] ?? shapeIri;
105
+ const prefix = this.findNamespacePrefix(baseIri);
106
+ if (prefix)
107
+ schemaSourcePrefixes.set(schema.name, prefix);
108
+ }
109
+ return {
110
+ schemas: this.schemas,
111
+ extraNamespaces: this.deriveExtraNamespaces(),
112
+ schemaSourcePrefixes,
113
+ };
114
+ }
115
+ parseWithPrefixes(turtle) {
116
+ const parser = new n3_1.Parser();
117
+ let quads;
118
+ try {
119
+ quads = parser.parse(turtle);
120
+ }
121
+ catch (error) {
122
+ const detail = error instanceof Error ? error.message : String(error);
123
+ throw new Error(`Failed to parse Turtle input: ${detail}`);
124
+ }
125
+ this.store = new n3_1.Store(quads);
126
+ const prefixRe = /@prefix\s+([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*<([^>]+)>\s*\./g;
127
+ for (const match of turtle.matchAll(prefixRe)) {
128
+ const [, prefix, iri] = match;
129
+ this.prefixMap[prefix] = iri;
130
+ }
131
+ }
132
+ deriveExtraNamespaces() {
133
+ const usedIris = new Set();
134
+ for (const schema of this.schemas) {
135
+ for (const t of schema.type)
136
+ usedIris.add(t);
137
+ for (const prop of Object.values(schema.properties)) {
138
+ usedIris.add(prop.id);
139
+ if (prop.type)
140
+ usedIris.add(prop.type);
141
+ }
142
+ }
143
+ const result = [];
144
+ const seenIris = new Set();
145
+ const usedNames = new Set();
146
+ for (const [prefix, iri] of Object.entries(this.prefixMap)) {
147
+ if (BUILTIN_NAMESPACE_IRIS.has(iri))
148
+ continue;
149
+ if (seenIris.has(iri))
150
+ continue;
151
+ const isUsed = [...usedIris].some((u) => u.startsWith(iri));
152
+ if (!isUsed)
153
+ continue;
154
+ seenIris.add(iri);
155
+ let safeName = prefix;
156
+ while (usedNames.has(safeName)) {
157
+ safeName += "_";
158
+ }
159
+ usedNames.add(safeName);
160
+ result.push({ iri, prefix: safeName });
161
+ }
162
+ return result;
163
+ }
164
+ findNodeShapes() {
165
+ const quads = this.store.getQuads(null, RDF_TYPE, SH_NODE_SHAPE, null);
166
+ const iris = [];
167
+ for (const q of quads) {
168
+ if (q.subject.termType === "NamedNode") {
169
+ iris.push(q.subject.value);
170
+ }
171
+ }
172
+ return iris;
173
+ }
174
+ deriveSchemaName(shapeIri) {
175
+ const targetClass = this.getObjectIri(shapeIri, SH_TARGET_CLASS);
176
+ const baseIri = targetClass ?? shapeIri;
177
+ let local = this.getSuffix(baseIri);
178
+ if (!targetClass && local.endsWith("Shape")) {
179
+ local = local.substring(0, local.length - "Shape".length);
180
+ }
181
+ return this.uniqueName(this.composeSchemaName(baseIri, local));
182
+ }
183
+ // Schema names are always namespace-prefixed (when a @prefix declaration
184
+ // covers the IRI) so that classes sharing a local-part across vocabularies
185
+ // — e.g. m:Campaign, meta:Campaign, google:Campaign — produce
186
+ // self-documenting, deterministic names like MCampaignSchema,
187
+ // MetaCampaignSchema, GoogleCampaignSchema. IRIs without a declared prefix
188
+ // fall back to the bare local-part (preserves prior behavior for
189
+ // hand-written SHACL test fixtures that omit @prefix declarations).
190
+ composeSchemaName(iri, local) {
191
+ const prefix = this.findNamespacePrefix(iri);
192
+ const prefixPart = prefix
193
+ ? (this.prefixAliases[prefix] ??
194
+ this.capitalize(this.sanitizeIdentifier(prefix)))
195
+ : "";
196
+ const localPart = this.capitalize(this.sanitizeIdentifier(local));
197
+ return `${prefixPart}${localPart}Schema`;
198
+ }
199
+ findNamespacePrefix(iri) {
200
+ let bestPrefix = null;
201
+ let bestLength = 0;
202
+ for (const [prefix, ns] of Object.entries(this.prefixMap)) {
203
+ if (iri.startsWith(ns) && ns.length > bestLength) {
204
+ bestPrefix = prefix;
205
+ bestLength = ns.length;
206
+ }
207
+ }
208
+ return bestPrefix;
209
+ }
210
+ uniqueName(name) {
211
+ if (!this.usedNames.has(name)) {
212
+ this.usedNames.add(name);
213
+ return name;
214
+ }
215
+ for (let i = 1; i < 1000; i++) {
216
+ const candidate = `${name}${i}`;
217
+ if (!this.usedNames.has(candidate)) {
218
+ this.usedNames.add(candidate);
219
+ return candidate;
220
+ }
221
+ }
222
+ throw new Error(`Could not generate a unique name for ${name}`);
223
+ }
224
+ buildSchema(shapeIri) {
225
+ return {
226
+ name: this.shapeIriToName.get(shapeIri),
227
+ type: this.deriveType(shapeIri),
228
+ properties: this.buildProperties(shapeIri),
229
+ };
230
+ }
231
+ deriveType(shapeIri) {
232
+ const targetClasses = this.getObjectIris(shapeIri, SH_TARGET_CLASS);
233
+ if (targetClasses.length > 0) {
234
+ return targetClasses;
235
+ }
236
+ const isRdfsClass = this.store.getQuads(shapeIri, RDF_TYPE, RDFS_CLASS, null).length > 0;
237
+ return isRdfsClass ? [shapeIri] : [];
238
+ }
239
+ buildProperties(shapeIri) {
240
+ const propertyNodes = this.store.getQuads(shapeIri, SH_PROPERTY, null, null);
241
+ const properties = {};
242
+ for (const q of propertyNodes) {
243
+ if (q.object.termType !== "NamedNode" &&
244
+ q.object.termType !== "BlankNode") {
245
+ console.error(`[shacl-to-schema] warning: skipping non-node sh:property value on shape <${shapeIri}> (got ${q.object.termType})`);
246
+ continue;
247
+ }
248
+ const { name, spec } = this.buildProperty(q.object, shapeIri);
249
+ if (properties[name]) {
250
+ properties[name] = this.mergePropertySpecs(properties[name], spec);
251
+ }
252
+ else {
253
+ properties[name] = spec;
254
+ }
255
+ }
256
+ return properties;
257
+ }
258
+ // SHACL conjoins multiple property shapes on the same path (AND). LDkit's
259
+ // runtime ignores @type when @schema is set, so schemaRef wins over type.
260
+ mergePropertySpecs(a, b) {
261
+ const merged = { id: a.id };
262
+ if (b.schemaRef !== undefined || a.schemaRef !== undefined) {
263
+ merged.schemaRef = b.schemaRef ?? a.schemaRef;
264
+ }
265
+ else if (b.type !== undefined) {
266
+ merged.type = b.type;
267
+ }
268
+ else if (a.type !== undefined) {
269
+ merged.type = a.type;
270
+ }
271
+ if (a.optional && b.optional)
272
+ merged.optional = true;
273
+ if (a.array && b.array)
274
+ merged.array = true;
275
+ if (a.multilang || b.multilang)
276
+ merged.multilang = true;
277
+ if (a.inverse || b.inverse)
278
+ merged.inverse = true;
279
+ return merged;
280
+ }
281
+ buildProperty(propertyNode, enclosingShapeIri) {
282
+ const { iri: pathIri, inverse } = this.resolvePath(propertyNode, enclosingShapeIri);
283
+ const name = this.getSuffix(pathIri);
284
+ const spec = { id: pathIri };
285
+ if (inverse) {
286
+ spec.inverse = true;
287
+ }
288
+ const direct = this.collectConstraints(propertyNode);
289
+ const orBranches = this.collectOrBranches(propertyNode);
290
+ let forceOptional = false;
291
+ const refTarget = direct.refNode ?? direct.refClass;
292
+ if (direct.uniqueLang || direct.datatype === RDF_LANG_STRING) {
293
+ spec.multilang = true;
294
+ }
295
+ else if (refTarget) {
296
+ // Default to IRI for sh:node / sh:class references
297
+ spec.type = "@id";
298
+ }
299
+ else if (direct.datatype && direct.datatype !== XSD_STRING) {
300
+ spec.type = direct.datatype;
301
+ }
302
+ else if (direct.nodeKind === SH_IRI) {
303
+ spec.type = "@id";
304
+ }
305
+ else if (direct.inFirstType) {
306
+ if (direct.inFirstType === "@id") {
307
+ spec.type = "@id";
308
+ }
309
+ else if (direct.inFirstType !== XSD_STRING) {
310
+ spec.type = direct.inFirstType;
311
+ }
312
+ }
313
+ else if (orBranches.length > 0) {
314
+ const reduced = this.reduceOrBranches(orBranches);
315
+ forceOptional = true;
316
+ if (reduced.kind === "datatype" && reduced.value !== XSD_STRING) {
317
+ spec.type = reduced.value;
318
+ }
319
+ else if (reduced.kind === "iri") {
320
+ spec.type = "@id";
321
+ }
322
+ }
323
+ const minCount = this.getObjectInteger(propertyNode, SH_MIN_COUNT);
324
+ const maxCount = this.getObjectInteger(propertyNode, SH_MAX_COUNT);
325
+ if (forceOptional || minCount === undefined || minCount === 0) {
326
+ spec.optional = true;
327
+ }
328
+ if (maxCount === undefined || maxCount > 1) {
329
+ spec.array = true;
330
+ }
331
+ return { name, spec };
332
+ }
333
+ resolvePath(propertyNode, enclosingShapeIri) {
334
+ const ctx = enclosingShapeIri ? ` on shape <${enclosingShapeIri}>` : "";
335
+ const pathTerm = this.getObjectTerm(propertyNode, SH_PATH);
336
+ if (!pathTerm) {
337
+ throw new Error(`Property shape${ctx} is missing sh:path`);
338
+ }
339
+ if (pathTerm.termType === "NamedNode") {
340
+ return { iri: pathTerm.value, inverse: false };
341
+ }
342
+ if (pathTerm.termType === "BlankNode") {
343
+ const inverseIri = this.getObjectIri(pathTerm, SH_INVERSE_PATH);
344
+ if (inverseIri) {
345
+ return { iri: inverseIri, inverse: true };
346
+ }
347
+ }
348
+ throw new Error(`Unsupported sh:path${ctx}: only simple predicate IRIs and sh:inversePath are supported (got ${pathTerm.termType})`);
349
+ }
350
+ collectConstraints(node) {
351
+ const c = {
352
+ datatype: this.getObjectIri(node, SH_DATATYPE),
353
+ nodeKind: this.getObjectIri(node, SH_NODE_KIND),
354
+ refNode: this.getObjectIri(node, SH_NODE),
355
+ refClass: this.getObjectIri(node, SH_CLASS),
356
+ uniqueLang: this.getObjectBoolean(node, SH_UNIQUE_LANG),
357
+ };
358
+ const inListTerm = this.getObjectTerm(node, SH_IN);
359
+ if (inListTerm) {
360
+ const items = this.walkList(inListTerm);
361
+ const first = items[0];
362
+ if (first?.termType === "NamedNode") {
363
+ c.inFirstType = "@id";
364
+ }
365
+ else if (first?.termType === "Literal") {
366
+ const dt = first.datatype?.value ??
367
+ XSD_STRING;
368
+ c.inFirstType = dt;
369
+ }
370
+ }
371
+ const andListTerm = this.getObjectTerm(node, SH_AND);
372
+ if (andListTerm) {
373
+ for (const branch of this.walkList(andListTerm)) {
374
+ const sub = this.collectConstraints(branch);
375
+ if (sub.datatype !== undefined)
376
+ c.datatype = sub.datatype;
377
+ if (sub.nodeKind !== undefined)
378
+ c.nodeKind = sub.nodeKind;
379
+ if (sub.refNode !== undefined)
380
+ c.refNode = sub.refNode;
381
+ if (sub.refClass !== undefined)
382
+ c.refClass = sub.refClass;
383
+ if (sub.uniqueLang !== undefined)
384
+ c.uniqueLang = sub.uniqueLang;
385
+ if (sub.inFirstType !== undefined)
386
+ c.inFirstType = sub.inFirstType;
387
+ }
388
+ }
389
+ return c;
390
+ }
391
+ collectOrBranches(node) {
392
+ const orListTerm = this.getObjectTerm(node, SH_OR);
393
+ if (!orListTerm)
394
+ return [];
395
+ return this.walkList(orListTerm).map((branch) => this.collectConstraints(branch));
396
+ }
397
+ reduceOrBranches(branches) {
398
+ if (branches.length === 0) {
399
+ return { kind: "untyped" };
400
+ }
401
+ const allDatatypes = branches.every((b) => b.datatype && !b.refNode && !b.refClass);
402
+ const allRefs = branches.every((b) => (b.refNode || b.refClass) && !b.datatype);
403
+ if (allDatatypes) {
404
+ const dts = branches.map((b) => b.datatype);
405
+ const widened = this.pickWidestNumeric(dts);
406
+ if (widened) {
407
+ return { kind: "datatype", value: widened };
408
+ }
409
+ const unique = new Set(dts);
410
+ if (unique.size === 1) {
411
+ return { kind: "datatype", value: dts[0] };
412
+ }
413
+ return { kind: "untyped" };
414
+ }
415
+ if (allRefs) {
416
+ return { kind: "iri" };
417
+ }
418
+ return { kind: "untyped" };
419
+ }
420
+ pickWidestNumeric(types) {
421
+ const indices = types.map((t) => NUMERIC_WIDENING.indexOf(t));
422
+ if (indices.every((i) => i >= 0)) {
423
+ return NUMERIC_WIDENING[Math.min(...indices)];
424
+ }
425
+ return undefined;
426
+ }
427
+ walkList(listHead) {
428
+ const items = [];
429
+ const visited = new Set();
430
+ let current = listHead;
431
+ while (current &&
432
+ !(current.termType === "NamedNode" && current.value === RDF_NIL)) {
433
+ const key = `${current.termType}:${current.value}`;
434
+ if (visited.has(key))
435
+ break;
436
+ visited.add(key);
437
+ const first = this.getObjectTerm(current, RDF_FIRST);
438
+ if (!first)
439
+ break;
440
+ items.push(first);
441
+ const rest = this.getObjectTerm(current, RDF_REST);
442
+ if (!rest)
443
+ break;
444
+ current = rest;
445
+ }
446
+ return items;
447
+ }
448
+ sanitizeIdentifier(value) {
449
+ let cleaned = value.replace(/[^A-Za-z0-9_$]/g, "_");
450
+ if (cleaned.length > 0 && /^[0-9]/.test(cleaned)) {
451
+ cleaned = `_${cleaned}`;
452
+ }
453
+ return cleaned;
454
+ }
455
+ getObjectTerm(subject, predicate) {
456
+ const quads = this.store.getQuads(subject, predicate, null, null);
457
+ return quads[0]?.object;
458
+ }
459
+ getObjectIri(subject, predicate) {
460
+ const term = this.getObjectTerm(subject, predicate);
461
+ if (term && term.termType === "NamedNode") {
462
+ return term.value;
463
+ }
464
+ return undefined;
465
+ }
466
+ getObjectIris(subject, predicate) {
467
+ const quads = this.store.getQuads(subject, predicate, null, null);
468
+ const result = [];
469
+ for (const q of quads) {
470
+ if (q.object.termType === "NamedNode") {
471
+ result.push(q.object.value);
472
+ }
473
+ }
474
+ return result;
475
+ }
476
+ getObjectInteger(subject, predicate) {
477
+ const term = this.getObjectTerm(subject, predicate);
478
+ if (term && term.termType === "Literal") {
479
+ const parsed = parseInt(term.value, 10);
480
+ if (!Number.isNaN(parsed)) {
481
+ return parsed;
482
+ }
483
+ }
484
+ return undefined;
485
+ }
486
+ getObjectBoolean(subject, predicate) {
487
+ const term = this.getObjectTerm(subject, predicate);
488
+ if (term && term.termType === "Literal") {
489
+ if (term.value === "true")
490
+ return true;
491
+ if (term.value === "false")
492
+ return false;
493
+ }
494
+ return undefined;
495
+ }
496
+ getSuffix(value) {
497
+ const cutoff = Math.max(value.lastIndexOf("#"), value.lastIndexOf("/"));
498
+ if (cutoff === -1) {
499
+ return value;
500
+ }
501
+ return value.substring(cutoff + 1);
502
+ }
503
+ capitalize(value) {
504
+ if (value.length === 0)
505
+ return value;
506
+ return value.charAt(0).toUpperCase() + value.slice(1);
507
+ }
508
+ }