linkml 1.9.4rc1__py3-none-any.whl → 1.9.5rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkml/cli/main.py +4 -0
- linkml/generators/__init__.py +2 -0
- linkml/generators/common/build.py +5 -20
- linkml/generators/common/template.py +289 -3
- linkml/generators/docgen.py +55 -10
- linkml/generators/erdiagramgen.py +9 -5
- linkml/generators/graphqlgen.py +32 -6
- linkml/generators/jsonldcontextgen.py +78 -12
- linkml/generators/jsonschemagen.py +29 -12
- linkml/generators/mermaidclassdiagramgen.py +21 -3
- linkml/generators/owlgen.py +4 -1
- linkml/generators/panderagen/dataframe_class.py +13 -0
- linkml/generators/panderagen/dataframe_field.py +50 -0
- linkml/generators/panderagen/linkml_pandera_validator.py +186 -0
- linkml/generators/panderagen/panderagen.py +22 -5
- linkml/generators/panderagen/panderagen_class_based/class.jinja2 +70 -13
- linkml/generators/panderagen/panderagen_class_based/custom_checks.jinja2 +27 -0
- linkml/generators/panderagen/panderagen_class_based/enums.jinja2 +3 -3
- linkml/generators/panderagen/panderagen_class_based/pandera.jinja2 +12 -2
- linkml/generators/panderagen/panderagen_class_based/slots.jinja2 +19 -17
- linkml/generators/panderagen/slot_generator_mixin.py +143 -16
- linkml/generators/panderagen/transforms/__init__.py +19 -0
- linkml/generators/panderagen/transforms/collection_dict_model_transform.py +62 -0
- linkml/generators/panderagen/transforms/list_dict_model_transform.py +66 -0
- linkml/generators/panderagen/transforms/model_transform.py +8 -0
- linkml/generators/panderagen/transforms/nested_struct_model_transform.py +27 -0
- linkml/generators/panderagen/transforms/simple_dict_model_transform.py +86 -0
- linkml/generators/plantumlgen.py +17 -11
- linkml/generators/pydanticgen/pydanticgen.py +53 -2
- linkml/generators/pydanticgen/template.py +45 -233
- linkml/generators/pydanticgen/templates/attribute.py.jinja +1 -0
- linkml/generators/pydanticgen/templates/base_model.py.jinja +16 -2
- linkml/generators/pydanticgen/templates/imports.py.jinja +1 -1
- linkml/generators/rdfgen.py +11 -2
- linkml/generators/rustgen/__init__.py +3 -0
- linkml/generators/rustgen/build.py +94 -0
- linkml/generators/rustgen/cli.py +65 -0
- linkml/generators/rustgen/rustgen.py +1038 -0
- linkml/generators/rustgen/template.py +865 -0
- linkml/generators/rustgen/templates/Cargo.toml.jinja +42 -0
- linkml/generators/rustgen/templates/anything.rs.jinja +142 -0
- linkml/generators/rustgen/templates/as_key_value.rs.jinja +56 -0
- linkml/generators/rustgen/templates/class_module.rs.jinja +8 -0
- linkml/generators/rustgen/templates/enum.rs.jinja +54 -0
- linkml/generators/rustgen/templates/file.rs.jinja +62 -0
- linkml/generators/rustgen/templates/import.rs.jinja +4 -0
- linkml/generators/rustgen/templates/imports.rs.jinja +8 -0
- linkml/generators/rustgen/templates/poly.rs.jinja +9 -0
- linkml/generators/rustgen/templates/poly_containers.rs.jinja +439 -0
- linkml/generators/rustgen/templates/poly_trait.rs.jinja +15 -0
- linkml/generators/rustgen/templates/poly_trait_impl.rs.jinja +5 -0
- linkml/generators/rustgen/templates/poly_trait_impl_orsubtype.rs.jinja +5 -0
- linkml/generators/rustgen/templates/poly_trait_property.rs.jinja +8 -0
- linkml/generators/rustgen/templates/poly_trait_property_impl.rs.jinja +132 -0
- linkml/generators/rustgen/templates/poly_trait_property_match.rs.jinja +10 -0
- linkml/generators/rustgen/templates/property.rs.jinja +19 -0
- linkml/generators/rustgen/templates/pyproject.toml.jinja +10 -0
- linkml/generators/rustgen/templates/serde_utils.rs.jinja +310 -0
- linkml/generators/rustgen/templates/slot_range_as_union.rs.jinja +61 -0
- linkml/generators/rustgen/templates/struct.rs.jinja +75 -0
- linkml/generators/rustgen/templates/struct_or_subtype_enum.rs.jinja +108 -0
- linkml/generators/rustgen/templates/typealias.rs.jinja +13 -0
- linkml/generators/sqltablegen.py +18 -16
- linkml/generators/yarrrmlgen.py +157 -0
- linkml/linter/config/datamodel/config.py +160 -293
- linkml/linter/config/datamodel/config.yaml +34 -26
- linkml/linter/config/default.yaml +4 -0
- linkml/linter/config/recommended.yaml +4 -0
- linkml/linter/linter.py +1 -2
- linkml/linter/rules.py +37 -0
- linkml/utils/schemaloader.py +55 -3
- {linkml-1.9.4rc1.dist-info → linkml-1.9.5rc1.dist-info}/METADATA +2 -2
- {linkml-1.9.4rc1.dist-info → linkml-1.9.5rc1.dist-info}/RECORD +76 -38
- {linkml-1.9.4rc1.dist-info → linkml-1.9.5rc1.dist-info}/entry_points.txt +1 -0
- linkml/generators/panderagen/panderagen_class_based/mixins.jinja2 +0 -26
- {linkml-1.9.4rc1.dist-info → linkml-1.9.5rc1.dist-info}/WHEEL +0 -0
- {linkml-1.9.4rc1.dist-info → linkml-1.9.5rc1.dist-info}/licenses/LICENSE +0 -0
linkml/generators/graphqlgen.py
CHANGED
|
@@ -24,6 +24,7 @@ class GraphqlGenerator(Generator):
|
|
|
24
24
|
|
|
25
25
|
strict_naming: bool = False
|
|
26
26
|
_permissible_value_valid_characters = re.compile("^[_A-Za-z][_0-9A-Za-z]*?$")
|
|
27
|
+
_types_any = []
|
|
27
28
|
|
|
28
29
|
def __post_init__(self):
|
|
29
30
|
self.name_compatiblity = NameCompatibility(profile=NamingProfiles.graphql, do_not_fix=self.strict_naming)
|
|
@@ -39,6 +40,10 @@ class GraphqlGenerator(Generator):
|
|
|
39
40
|
return out
|
|
40
41
|
|
|
41
42
|
def visit_class(self, cls: ClassDefinition) -> str:
|
|
43
|
+
# no type can be declared for subtypes of "Any"
|
|
44
|
+
if cls.class_uri == "linkml:Any":
|
|
45
|
+
self._types_any.append(cls.name)
|
|
46
|
+
return f"scalar {cls.name}"
|
|
42
47
|
etype = "interface" if (cls.abstract or cls.mixin) and not cls.mixins else "type"
|
|
43
48
|
mixins = ", ".join([camelcase(mixin) for mixin in cls.mixins])
|
|
44
49
|
out = f"{etype} {camelcase(cls.name)}" + (f" implements {mixins}" if mixins else "")
|
|
@@ -46,14 +51,35 @@ class GraphqlGenerator(Generator):
|
|
|
46
51
|
return out
|
|
47
52
|
|
|
48
53
|
def end_class(self, cls: ClassDefinition) -> str:
|
|
49
|
-
|
|
54
|
+
if cls.name in self._types_any:
|
|
55
|
+
return "\n\n"
|
|
56
|
+
else:
|
|
57
|
+
return "\n }\n\n"
|
|
50
58
|
|
|
51
59
|
def visit_class_slot(self, cls: ClassDefinition, aliased_slot_name: str, slot: SlotDefinition) -> str:
|
|
52
|
-
|
|
53
|
-
camelcase(slot.range)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
60
|
+
if slot.range in self.schema.classes or slot.range in self.schema.slots or slot.range in self.schema.enums:
|
|
61
|
+
slotrange = camelcase(slot.range)
|
|
62
|
+
elif slot.range in self.schema.types:
|
|
63
|
+
if self.schema.types[slot.range].from_schema != "https://w3id.org/linkml/types":
|
|
64
|
+
slotrange = camelcase(slot.range)
|
|
65
|
+
else:
|
|
66
|
+
graphql_scalars = ["Int", "Float", "String", "Boolean", "ID"]
|
|
67
|
+
if slot.range == "integer":
|
|
68
|
+
slotrange = "Int"
|
|
69
|
+
elif slot.range == "decimal":
|
|
70
|
+
slotrange = "Float"
|
|
71
|
+
elif camelcase(slot.range) in graphql_scalars:
|
|
72
|
+
slotrange = camelcase(slot.range)
|
|
73
|
+
else:
|
|
74
|
+
if self.schema.types[slot.range].repr:
|
|
75
|
+
python_type = self.schema.types[slot.range].repr
|
|
76
|
+
elif self.schema.types[slot.range].base:
|
|
77
|
+
python_type = self.schema.types[slot.range].base
|
|
78
|
+
if str(python_type) == "float":
|
|
79
|
+
slotrange = "Float"
|
|
80
|
+
elif str(python_type) == "str":
|
|
81
|
+
slotrange = "String"
|
|
82
|
+
|
|
57
83
|
if slot.multivalued:
|
|
58
84
|
slotrange = f"[{slotrange}]"
|
|
59
85
|
if slot.required:
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Generate JSON-LD contexts
|
|
3
|
-
|
|
4
3
|
"""
|
|
5
4
|
|
|
5
|
+
import json
|
|
6
6
|
import os
|
|
7
7
|
import re
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
9
10
|
from typing import Any, Optional, Union
|
|
10
11
|
|
|
11
12
|
import click
|
|
@@ -50,6 +51,12 @@ class ContextGenerator(Generator):
|
|
|
50
51
|
prefixes: Optional[bool] = True
|
|
51
52
|
flatprefixes: Optional[bool] = False
|
|
52
53
|
|
|
54
|
+
# Framing (opt-in via CLI flag)
|
|
55
|
+
emit_frame: bool = False
|
|
56
|
+
embed_context_in_frame: bool = False
|
|
57
|
+
frame_body: dict = field(default_factory=lambda: dict())
|
|
58
|
+
frame_root: Optional[str] = None
|
|
59
|
+
|
|
53
60
|
def __post_init__(self) -> None:
|
|
54
61
|
super().__post_init__()
|
|
55
62
|
if self.namespaces is None:
|
|
@@ -121,10 +128,40 @@ class ContextGenerator(Generator):
|
|
|
121
128
|
for k, v in self.slot_class_maps.items():
|
|
122
129
|
context_content[k] = v
|
|
123
130
|
context["@context"] = context_content
|
|
124
|
-
if output:
|
|
131
|
+
if output and not self.embed_context_in_frame:
|
|
125
132
|
with open(output, "w", encoding="UTF-8") as outf:
|
|
126
133
|
outf.write(as_json(context))
|
|
127
134
|
|
|
135
|
+
if self.emit_frame and self.frame_body and output:
|
|
136
|
+
root_name = None
|
|
137
|
+
for cname, c in self.schema.classes.items():
|
|
138
|
+
if getattr(c, "tree_root", False):
|
|
139
|
+
root_name = cname
|
|
140
|
+
break
|
|
141
|
+
if root_name is None and self.schema.classes:
|
|
142
|
+
root_name = next(iter(self.schema.classes))
|
|
143
|
+
|
|
144
|
+
if self.embed_context_in_frame:
|
|
145
|
+
frame = {
|
|
146
|
+
"@context": context["@context"],
|
|
147
|
+
"@omitGraph": True,
|
|
148
|
+
}
|
|
149
|
+
else:
|
|
150
|
+
frame = {
|
|
151
|
+
"@context": Path(output).name,
|
|
152
|
+
"@omitGraph": True,
|
|
153
|
+
}
|
|
154
|
+
if root_name:
|
|
155
|
+
root_cls = self.schema.classes[root_name]
|
|
156
|
+
frame["@type"] = root_cls.class_uri or root_cls.name
|
|
157
|
+
|
|
158
|
+
for prop, rule in self.frame_body.items():
|
|
159
|
+
frame[prop] = rule
|
|
160
|
+
|
|
161
|
+
frame_path = Path(output).with_suffix(".frame.jsonld")
|
|
162
|
+
with open(frame_path, "w", encoding="UTF-8") as f:
|
|
163
|
+
json.dump(frame, f, indent=2, ensure_ascii=False)
|
|
164
|
+
|
|
128
165
|
return str(as_json(context)) + "\n"
|
|
129
166
|
|
|
130
167
|
def visit_class(self, cls: ClassDefinition) -> bool:
|
|
@@ -136,6 +173,10 @@ class ContextGenerator(Generator):
|
|
|
136
173
|
if class_def:
|
|
137
174
|
self.slot_class_maps[cn] = class_def
|
|
138
175
|
|
|
176
|
+
# prefer explicit tree_root for frame @type
|
|
177
|
+
if getattr(cls, "tree_root", False):
|
|
178
|
+
self.frame_root = cls.name
|
|
179
|
+
|
|
139
180
|
# We don't bother to visit class slots - just all slots
|
|
140
181
|
return True
|
|
141
182
|
|
|
@@ -146,13 +187,7 @@ class ContextGenerator(Generator):
|
|
|
146
187
|
slot_def = {}
|
|
147
188
|
if not slot.usage_slot_name:
|
|
148
189
|
any_of_ranges = [any_of_el.range for any_of_el in slot.any_of]
|
|
149
|
-
if slot.range in self.schema.classes:
|
|
150
|
-
range_class_uri = self.schema.classes[slot.range].class_uri
|
|
151
|
-
if range_class_uri and slot.inlined:
|
|
152
|
-
slot_def["@type"] = range_class_uri
|
|
153
|
-
else:
|
|
154
|
-
slot_def["@type"] = "@id"
|
|
155
|
-
elif any(rng in self.schema.classes for rng in any_of_ranges):
|
|
190
|
+
if slot.range in self.schema.classes or any(rng in self.schema.classes for rng in any_of_ranges):
|
|
156
191
|
slot_def["@type"] = "@id"
|
|
157
192
|
elif slot.range in self.schema.enums:
|
|
158
193
|
slot_def["@context"] = ENUM_CONTEXT
|
|
@@ -174,7 +209,12 @@ class ContextGenerator(Generator):
|
|
|
174
209
|
self._build_element_id(slot_def, slot.slot_uri)
|
|
175
210
|
self.add_mappings(slot)
|
|
176
211
|
if slot_def:
|
|
177
|
-
|
|
212
|
+
key = underscore(aliased_slot_name)
|
|
213
|
+
self.context_body[key] = slot_def
|
|
214
|
+
|
|
215
|
+
# collect @embed only for object-valued slots (range is a class)
|
|
216
|
+
if slot.range in self.schema.classes and slot.inlined is not None:
|
|
217
|
+
self.frame_body[key] = {"@embed": "@always" if bool(slot.inlined) else "@never"}
|
|
178
218
|
|
|
179
219
|
def _build_element_id(self, definition: Any, uri: str) -> None:
|
|
180
220
|
"""
|
|
@@ -227,10 +267,36 @@ class ContextGenerator(Generator):
|
|
|
227
267
|
show_default=True,
|
|
228
268
|
help="Emit non-JSON-LD compliant prefixes as an object (deprecated: use gen-prefix-map instead).",
|
|
229
269
|
)
|
|
270
|
+
@click.option(
|
|
271
|
+
"--emit-frame/--no-emit-frame",
|
|
272
|
+
default=False,
|
|
273
|
+
show_default=True,
|
|
274
|
+
help="Also emit a <schema>.frame.jsonld file with @embed rules for framing",
|
|
275
|
+
)
|
|
276
|
+
@click.option(
|
|
277
|
+
"--embed-context-in-frame/--no-embed-context-in-frame",
|
|
278
|
+
default=False,
|
|
279
|
+
show_default=True,
|
|
280
|
+
help="Emit a <schema>.frame.jsonld file with @context embedded directly (single file)",
|
|
281
|
+
)
|
|
282
|
+
@click.option(
|
|
283
|
+
"-o",
|
|
284
|
+
"--output",
|
|
285
|
+
type=click.Path(),
|
|
286
|
+
help="Output file name",
|
|
287
|
+
)
|
|
230
288
|
@click.version_option(__version__, "-V", "--version")
|
|
231
|
-
def cli(yamlfile, **args):
|
|
289
|
+
def cli(yamlfile, emit_frame, embed_context_in_frame, output, **args):
|
|
232
290
|
"""Generate jsonld @context definition from LinkML model"""
|
|
233
|
-
|
|
291
|
+
if (emit_frame or embed_context_in_frame) and not output:
|
|
292
|
+
raise click.UsageError("--emit-frame/--embed-context-in-frame requires --output")
|
|
293
|
+
gen = ContextGenerator(yamlfile, **args)
|
|
294
|
+
if embed_context_in_frame:
|
|
295
|
+
gen.emit_frame = True
|
|
296
|
+
gen.embed_context_in_frame = True
|
|
297
|
+
else:
|
|
298
|
+
gen.emit_frame = emit_frame
|
|
299
|
+
print(gen.serialize(output=output, **args))
|
|
234
300
|
|
|
235
301
|
|
|
236
302
|
if __name__ == "__main__":
|
|
@@ -49,13 +49,14 @@ json_schema_types: dict[str, tuple[str, Optional[str]]] = {
|
|
|
49
49
|
|
|
50
50
|
class JsonSchema(dict):
|
|
51
51
|
OPTIONAL_IDENTIFIER_SUFFIX = "__identifier_optional"
|
|
52
|
+
PRESERVE_NAMES: bool = False
|
|
52
53
|
|
|
53
54
|
def __init__(self, *args, **kwargs):
|
|
54
55
|
super().__init__(*args, **kwargs)
|
|
55
56
|
self._lax_forward_refs = {}
|
|
56
57
|
|
|
57
58
|
def add_def(self, name: str, subschema: "JsonSchema") -> None:
|
|
58
|
-
canonical_name = camelcase(name)
|
|
59
|
+
canonical_name = name if self.PRESERVE_NAMES else camelcase(name)
|
|
59
60
|
|
|
60
61
|
if "$defs" not in self:
|
|
61
62
|
self["$defs"] = {}
|
|
@@ -78,7 +79,7 @@ class JsonSchema(dict):
|
|
|
78
79
|
names = [names]
|
|
79
80
|
|
|
80
81
|
for name in names:
|
|
81
|
-
canonical_name = camelcase(name)
|
|
82
|
+
canonical_name = name if self.PRESERVE_NAMES else camelcase(name)
|
|
82
83
|
|
|
83
84
|
if "$defs" not in self or canonical_name not in self["$defs"]:
|
|
84
85
|
self._lax_forward_refs[canonical_name] = identifier_name
|
|
@@ -90,7 +91,7 @@ class JsonSchema(dict):
|
|
|
90
91
|
def add_property(
|
|
91
92
|
self, name: str, subschema: "JsonSchema", *, value_required: bool = False, value_disallowed: bool = False
|
|
92
93
|
) -> None:
|
|
93
|
-
canonical_name = underscore(name)
|
|
94
|
+
canonical_name = name if self.PRESERVE_NAMES else underscore(name)
|
|
94
95
|
|
|
95
96
|
if "properties" not in self:
|
|
96
97
|
self["properties"] = {}
|
|
@@ -149,7 +150,7 @@ class JsonSchema(dict):
|
|
|
149
150
|
@classmethod
|
|
150
151
|
def ref_for(cls, class_name: Union[str, list[str]], identifier_optional: bool = False, required: bool = True):
|
|
151
152
|
def _ref(class_name):
|
|
152
|
-
def_name = camelcase(class_name)
|
|
153
|
+
def_name = class_name if cls.PRESERVE_NAMES else camelcase(class_name)
|
|
153
154
|
def_suffix = cls.OPTIONAL_IDENTIFIER_SUFFIX if identifier_optional else ""
|
|
154
155
|
return JsonSchema({"$ref": f"#/$defs/{def_name}{def_suffix}"})
|
|
155
156
|
|
|
@@ -169,8 +170,8 @@ class JsonSchema(dict):
|
|
|
169
170
|
return ref
|
|
170
171
|
|
|
171
172
|
@classmethod
|
|
172
|
-
def array_of(cls, subschema: "JsonSchema", required: bool = True) -> "JsonSchema":
|
|
173
|
-
if required:
|
|
173
|
+
def array_of(cls, subschema: "JsonSchema", include_null: bool, required: bool = True) -> "JsonSchema":
|
|
174
|
+
if required or not include_null:
|
|
174
175
|
typ = "array"
|
|
175
176
|
else:
|
|
176
177
|
typ = ["array", "null"]
|
|
@@ -265,6 +266,9 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
|
|
|
265
266
|
include_null: bool = True
|
|
266
267
|
"""Whether to include a "null" type in optional slots"""
|
|
267
268
|
|
|
269
|
+
preserve_names: bool = False
|
|
270
|
+
"""If true, preserve LinkML element names in JSON Schema output (e.g., for $defs, properties, $ref targets)."""
|
|
271
|
+
|
|
268
272
|
def __post_init__(self):
|
|
269
273
|
if self.topClass:
|
|
270
274
|
logger.warning("topClass is deprecated - use top_class")
|
|
@@ -272,6 +276,9 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
|
|
|
272
276
|
|
|
273
277
|
super().__post_init__()
|
|
274
278
|
|
|
279
|
+
# Set the class variable for JsonSchema to use
|
|
280
|
+
JsonSchema.PRESERVE_NAMES = self.preserve_names
|
|
281
|
+
|
|
275
282
|
if self.top_class:
|
|
276
283
|
if self.schemaview.get_class(self.top_class) is None:
|
|
277
284
|
logger.warning(f"No class in schema named {self.top_class}")
|
|
@@ -372,9 +379,13 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
|
|
|
372
379
|
|
|
373
380
|
self.top_level_schema.add_def(cls.name, class_subschema)
|
|
374
381
|
|
|
375
|
-
if (
|
|
376
|
-
self.top_class is None
|
|
377
|
-
|
|
382
|
+
if (
|
|
383
|
+
self.top_class is not None
|
|
384
|
+
and (
|
|
385
|
+
(self.preserve_names and self.top_class == cls.name)
|
|
386
|
+
or (not self.preserve_names and camelcase(self.top_class) == camelcase(cls.name))
|
|
387
|
+
)
|
|
388
|
+
) or (self.top_class is None and cls.tree_root):
|
|
378
389
|
for key, value in class_subschema.items():
|
|
379
390
|
# check this first to ensure we don't overwrite things like additionalProperties
|
|
380
391
|
# or description on the root. But we do want to copy over properties, required,
|
|
@@ -534,7 +545,7 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
|
|
|
534
545
|
"additionalProperties": True,
|
|
535
546
|
}
|
|
536
547
|
)
|
|
537
|
-
return JsonSchema.array_of(prop, required=slot.required)
|
|
548
|
+
return JsonSchema.array_of(prop, include_null, required=slot.required)
|
|
538
549
|
slot_is_multivalued = "multivalued" in slot and slot.multivalued
|
|
539
550
|
slot_is_inlined = self.schemaview.is_inlined(slot)
|
|
540
551
|
slot_is_boolean = any([slot.any_of, slot.all_of, slot.exactly_one_of, slot.none_of])
|
|
@@ -579,7 +590,7 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
|
|
|
579
590
|
prop = JsonSchema({"type": typ, "additionalProperties": additionalProps})
|
|
580
591
|
self.top_level_schema.add_lax_def(reference, self.aliased_slot_name(range_id_slot))
|
|
581
592
|
else:
|
|
582
|
-
prop = JsonSchema.array_of(JsonSchema.ref_for(reference), required=slot.required)
|
|
593
|
+
prop = JsonSchema.array_of(JsonSchema.ref_for(reference), include_null, required=slot.required)
|
|
583
594
|
else:
|
|
584
595
|
prop = JsonSchema.ref_for(reference, required=slot.required or not include_null)
|
|
585
596
|
|
|
@@ -592,7 +603,7 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
|
|
|
592
603
|
prop = JsonSchema({"type": typ, "format": fmt})
|
|
593
604
|
|
|
594
605
|
if slot_is_multivalued:
|
|
595
|
-
prop = JsonSchema.array_of(prop, required=slot.required)
|
|
606
|
+
prop = JsonSchema.array_of(prop, include_null, required=slot.required)
|
|
596
607
|
else:
|
|
597
608
|
# handle optionals - bools like any_of, etc. below as they call this method recursively
|
|
598
609
|
if not slot.required and not slot_is_boolean and include_null:
|
|
@@ -763,6 +774,12 @@ YAML, and including it when necessary but not by default (e.g. in documentation
|
|
|
763
774
|
show_default=True,
|
|
764
775
|
help="If set, patterns will be materialized in the generated JSON Schema.",
|
|
765
776
|
)
|
|
777
|
+
@click.option(
|
|
778
|
+
"--preserve-names/--normalize-names",
|
|
779
|
+
default=False,
|
|
780
|
+
show_default=True,
|
|
781
|
+
help="Preserve original LinkML names in JSON Schema output (e.g., for $defs, properties, $ref targets).",
|
|
782
|
+
)
|
|
766
783
|
@click.version_option(__version__, "-V", "--version")
|
|
767
784
|
def cli(yamlfile, **kwargs):
|
|
768
785
|
"""Generate JSON Schema representation of a LinkML model"""
|
|
@@ -8,6 +8,7 @@ from typing import Optional
|
|
|
8
8
|
import click
|
|
9
9
|
from jinja2 import Environment, FileSystemLoader
|
|
10
10
|
from linkml_runtime.linkml_model.meta import Element, SlotDefinition
|
|
11
|
+
from linkml_runtime.utils.formatutils import camelcase, underscore
|
|
11
12
|
from linkml_runtime.utils.schemaview import SchemaView
|
|
12
13
|
|
|
13
14
|
from linkml.generators.docgen import DocGenerator
|
|
@@ -34,6 +35,7 @@ class MermaidClassDiagramGenerator(Generator):
|
|
|
34
35
|
directory: Optional[str] = None # output directory with generated markdown files
|
|
35
36
|
template_file: Optional[str] = None # custom/default jinja template for class diagrams
|
|
36
37
|
classes: list[str] = field(default_factory=list) # optional subset of classes
|
|
38
|
+
preserve_names: bool = False # preserve original LinkML names in diagram output
|
|
37
39
|
|
|
38
40
|
def __post_init__(self):
|
|
39
41
|
super().__post_init__()
|
|
@@ -59,7 +61,7 @@ class MermaidClassDiagramGenerator(Generator):
|
|
|
59
61
|
template_name = os.path.basename(self.template_file)
|
|
60
62
|
loader = FileSystemLoader(template_folder)
|
|
61
63
|
env = Environment(loader=loader)
|
|
62
|
-
temp_doc_gen = DocGenerator(self.schema, mergeimports=self.mergeimports)
|
|
64
|
+
temp_doc_gen = DocGenerator(self.schema, mergeimports=self.mergeimports, preserve_names=self.preserve_names)
|
|
63
65
|
temp_doc_gen.customize_environment(env)
|
|
64
66
|
|
|
65
67
|
template = env.get_template(template_name)
|
|
@@ -74,7 +76,8 @@ class MermaidClassDiagramGenerator(Generator):
|
|
|
74
76
|
for cn, class_def in class_items:
|
|
75
77
|
self.logger.info(f"Generating Mermaid diagram for class: {cn}")
|
|
76
78
|
rendered = template.render(gen=self, element=class_def, schemaview=self.schemaview)
|
|
77
|
-
|
|
79
|
+
filename = self.name(class_def) if self.preserve_names else cn
|
|
80
|
+
outfile = self.output_directory / f"{filename}.md"
|
|
78
81
|
with open(outfile, "w", encoding="utf-8") as f:
|
|
79
82
|
f.write(rendered)
|
|
80
83
|
|
|
@@ -88,7 +91,16 @@ class MermaidClassDiagramGenerator(Generator):
|
|
|
88
91
|
|
|
89
92
|
def name(self, element: Element) -> str:
|
|
90
93
|
"""Returns the canonical name for an element."""
|
|
91
|
-
|
|
94
|
+
if element is None:
|
|
95
|
+
return ""
|
|
96
|
+
if self.preserve_names:
|
|
97
|
+
return element.name
|
|
98
|
+
elif type(element).class_name == "slot_definition":
|
|
99
|
+
return underscore(element.name)
|
|
100
|
+
elif type(element).class_name == "class_definition":
|
|
101
|
+
return camelcase(element.name)
|
|
102
|
+
else:
|
|
103
|
+
return camelcase(element.name)
|
|
92
104
|
|
|
93
105
|
def link_mermaid(self, element):
|
|
94
106
|
"""Generates a link for the given element."""
|
|
@@ -121,6 +133,12 @@ class MermaidClassDiagramGenerator(Generator):
|
|
|
121
133
|
help="One or more classes in the schema for which to generate diagrams. "
|
|
122
134
|
"If omitted, diagrams for all classes are generated.",
|
|
123
135
|
)
|
|
136
|
+
@click.option(
|
|
137
|
+
"--preserve-names/--normalize-names",
|
|
138
|
+
default=False,
|
|
139
|
+
show_default=True,
|
|
140
|
+
help="Preserve original LinkML names in Mermaid diagram output (e.g., for class names, slot names, file names).",
|
|
141
|
+
)
|
|
124
142
|
@click.version_option(click.__version__, "-V", "--version")
|
|
125
143
|
def cli(yamlfile, template_file, directory, classes, **args):
|
|
126
144
|
logging.basicConfig(level=logging.INFO)
|
linkml/generators/owlgen.py
CHANGED
|
@@ -1250,7 +1250,10 @@ class OwlSchemaGenerator(Generator):
|
|
|
1250
1250
|
if pv.meaning:
|
|
1251
1251
|
return URIRef(self.schemaview.expand_curie(pv.meaning))
|
|
1252
1252
|
else:
|
|
1253
|
-
|
|
1253
|
+
from urllib.parse import quote
|
|
1254
|
+
|
|
1255
|
+
encoded_text = quote(pv.text.strip(), safe="", encoding="utf-8")
|
|
1256
|
+
return URIRef(enum_uri + self.enum_iri_separator + encoded_text)
|
|
1254
1257
|
|
|
1255
1258
|
def slot_owl_type(self, slot: SlotDefinition) -> URIRef:
|
|
1256
1259
|
sv = self.schemaview
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from linkml.generators.oocodegen import OOClass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class DataframeClass(OOClass):
|
|
5
|
+
"""Serves as an adapter between the template that renders the form of the
|
|
6
|
+
dataframe schema and the LinkML model and schema view.
|
|
7
|
+
|
|
8
|
+
Currently a thin wrapper around OOClass
|
|
9
|
+
until the dataframe requirements are fully understood.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def identifier_key_slot(self):
|
|
13
|
+
return self.annotations.get("identifier_key_slot", None)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from contextlib import suppress
|
|
2
|
+
|
|
3
|
+
from linkml.generators.oocodegen import OOField
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DataframeField(OOField):
|
|
7
|
+
"""Serves as an adapter between the template that renders the form of the
|
|
8
|
+
dataframe schema fields and the LinkML model and schema view.
|
|
9
|
+
|
|
10
|
+
Currently a thin wrapper around OOField
|
|
11
|
+
until the dataframe requirements are fully understood.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def inline_form(self):
|
|
15
|
+
return self.source_slot.annotations._get("inline_form", None)
|
|
16
|
+
|
|
17
|
+
def reference_class(self):
|
|
18
|
+
with suppress(AttributeError, KeyError):
|
|
19
|
+
return self.source_slot.annotations._get("reference_class", None)
|
|
20
|
+
return None
|
|
21
|
+
|
|
22
|
+
def maximum_value(self):
|
|
23
|
+
return self.source_slot.maximum_value
|
|
24
|
+
|
|
25
|
+
def minimum_value(self):
|
|
26
|
+
return self.source_slot.minimum_value
|
|
27
|
+
|
|
28
|
+
def pattern(self):
|
|
29
|
+
return self.source_slot.pattern
|
|
30
|
+
|
|
31
|
+
def minimum_cardinality(self):
|
|
32
|
+
return self.source_slot.minimum_cardinality
|
|
33
|
+
|
|
34
|
+
def maximum_cardinality(self):
|
|
35
|
+
return self.source_slot.maximum_cardinality
|
|
36
|
+
|
|
37
|
+
def permissible_values(self):
|
|
38
|
+
return self.source_slot.annotations._get("permissible_values", [])
|
|
39
|
+
|
|
40
|
+
def inline_details(self):
|
|
41
|
+
return self.source_slot.annotations._get("inline_details", None)
|
|
42
|
+
|
|
43
|
+
def required(self):
|
|
44
|
+
return self.source_slot.required
|
|
45
|
+
|
|
46
|
+
def identifier(self):
|
|
47
|
+
return self.source_slot.identifier
|
|
48
|
+
|
|
49
|
+
def description(self):
|
|
50
|
+
return self.source_slot.description
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from functools import wraps
|
|
3
|
+
|
|
4
|
+
import pandera
|
|
5
|
+
import polars as pl
|
|
6
|
+
from pandera.api.polars.types import PolarsData
|
|
7
|
+
|
|
8
|
+
from linkml.generators.panderagen.transforms import (
|
|
9
|
+
CollectionDictModelTransform,
|
|
10
|
+
ListDictModelTransform,
|
|
11
|
+
NestedStructModelTransform,
|
|
12
|
+
SimpleDictModelTransform,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def handle_validation_exceptions(func):
|
|
17
|
+
@wraps(func)
|
|
18
|
+
def wrapper(*args, **kwargs):
|
|
19
|
+
try:
|
|
20
|
+
return func(*args, **kwargs)
|
|
21
|
+
except pl.exceptions.PanicException:
|
|
22
|
+
data = args[2] if len(args) > 2 else kwargs.get("data")
|
|
23
|
+
return data.lazyframe.select(pl.lit(False))
|
|
24
|
+
except pandera.errors.SchemaError as e:
|
|
25
|
+
raise e
|
|
26
|
+
except Exception:
|
|
27
|
+
data = args[2] if len(args) > 2 else kwargs.get("data")
|
|
28
|
+
return data.lazyframe.select(pl.lit(False))
|
|
29
|
+
|
|
30
|
+
return wrapper
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class LinkmlPanderaValidator:
|
|
34
|
+
@classmethod
|
|
35
|
+
def get_id_column_name(cls):
|
|
36
|
+
return cls._id_name
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def _simple_dict_fields(cls, column_name):
|
|
40
|
+
details = cls._INLINE_DETAILS[column_name] # <-- THESE ARE GOING ON THE OUTER CLASS
|
|
41
|
+
|
|
42
|
+
return (details["id"], details["other"])
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def _prepare_simple_dict(cls, data: PolarsData):
|
|
46
|
+
"""Returns just the simple dict column transformed to an inlined list form
|
|
47
|
+
|
|
48
|
+
note that this method uses collect and iter_rows so is very inefficient
|
|
49
|
+
"""
|
|
50
|
+
column_name = data.key
|
|
51
|
+
polars_schema = cls.get_nested_range(column_name).to_schema()
|
|
52
|
+
(id_column, other_column) = cls._simple_dict_fields(column_name)
|
|
53
|
+
|
|
54
|
+
simple_dict_transformer = SimpleDictModelTransform(polars_schema, id_column, other_column)
|
|
55
|
+
|
|
56
|
+
one_column_df = data.lazyframe.select(pl.col(column_name)).collect()
|
|
57
|
+
|
|
58
|
+
list_of_structs = [simple_dict_transformer.transform(e) for [e] in one_column_df.iter_rows()]
|
|
59
|
+
|
|
60
|
+
return pl.DataFrame(pl.Series(list_of_structs).alias(column_name))
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
@handle_validation_exceptions
|
|
64
|
+
def _check_simple_dict(cls, data: PolarsData):
|
|
65
|
+
"""
|
|
66
|
+
The 'simple dict' format, in which the key serves as a local identifier is not a good match for a PolaRS
|
|
67
|
+
DataFrame. At present the format is
|
|
68
|
+
"""
|
|
69
|
+
df = cls._prepare_simple_dict(data)
|
|
70
|
+
|
|
71
|
+
column_name = data.key
|
|
72
|
+
|
|
73
|
+
polars_schema = cls.get_nested_range(column_name).to_schema()
|
|
74
|
+
simple_transform = SimpleDictModelTransform(polars_schema, *cls._simple_dict_fields(column_name))
|
|
75
|
+
df = simple_transform.explode_unnest_dataframe(df, column_name)
|
|
76
|
+
|
|
77
|
+
nested_cls = cls.get_nested_range(column_name)
|
|
78
|
+
nested_cls.validate(df)
|
|
79
|
+
return data.lazyframe.select(pl.lit(True))
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
@handle_validation_exceptions
|
|
83
|
+
def _check_collection_struct(cls, data: PolarsData):
|
|
84
|
+
column_name = data.key
|
|
85
|
+
nested_cls = cls.get_nested_range(column_name)
|
|
86
|
+
|
|
87
|
+
df = CollectionDictModelTransform.prepare_dataframe(data, column_name, nested_cls)
|
|
88
|
+
|
|
89
|
+
collection_transform = CollectionDictModelTransform(nested_cls.to_schema(), nested_cls.get_id_column_name())
|
|
90
|
+
df = collection_transform.explode_unnest_dataframe(df, column_name)
|
|
91
|
+
|
|
92
|
+
nested_cls.validate(df)
|
|
93
|
+
return data.lazyframe.select(pl.lit(True))
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
@handle_validation_exceptions
|
|
97
|
+
def _check_nested_list_struct(cls, data: PolarsData):
|
|
98
|
+
"""Use this in a custom check. Pass the nested model as pandera_model."""
|
|
99
|
+
column_name = data.key
|
|
100
|
+
nested_cls = cls.get_nested_range(column_name)
|
|
101
|
+
|
|
102
|
+
df = ListDictModelTransform.prepare_dataframe(data, column_name, nested_cls)
|
|
103
|
+
|
|
104
|
+
list_transform = ListDictModelTransform(nested_cls.to_schema())
|
|
105
|
+
df = list_transform.explode_unnest_dataframe(df, column_name, data)
|
|
106
|
+
|
|
107
|
+
nested_cls.validate(df)
|
|
108
|
+
return data.lazyframe.select(pl.lit(True))
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
@handle_validation_exceptions
|
|
112
|
+
def _check_nested_struct(cls, data: PolarsData):
|
|
113
|
+
"""Use this in a custom check. Pass the nested model as pandera_model."""
|
|
114
|
+
column_name = data.key
|
|
115
|
+
nested_cls = cls.get_nested_range(column_name)
|
|
116
|
+
|
|
117
|
+
df = NestedStructModelTransform.prepare_dataframe(data, column_name, nested_cls)
|
|
118
|
+
nested_transform = NestedStructModelTransform(nested_cls.to_schema())
|
|
119
|
+
df = nested_transform.explode_unnest_dataframe(df, column_name)
|
|
120
|
+
|
|
121
|
+
nested_cls.validate(df)
|
|
122
|
+
return data.lazyframe.select(pl.lit(True))
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def get_nested_range(cls, column_name):
|
|
126
|
+
"""Resolve a nested class range at runtime.
|
|
127
|
+
|
|
128
|
+
Nested classes are not stored in the pandera schema,
|
|
129
|
+
but rather in the _NESTED_RANGES dictionary as strings.
|
|
130
|
+
"""
|
|
131
|
+
nested_cls_name = cls._NESTED_RANGES[column_name]
|
|
132
|
+
shared_model_module = inspect.getmodule(cls)
|
|
133
|
+
nested_cls = getattr(shared_model_module, nested_cls_name)
|
|
134
|
+
|
|
135
|
+
return nested_cls
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def generate_polars_schema_simple(cls):
|
|
139
|
+
# This is not nesting or list aware, so needs to be aligned with the other method
|
|
140
|
+
return pl.Struct({k: v.dtype.type for k, v in cls.to_schema().columns.items()})
|
|
141
|
+
|
|
142
|
+
@classmethod
|
|
143
|
+
def generate_polars_schema(cls, object_to_validate, parser=False) -> dict:
|
|
144
|
+
"""Creates a nested PolaRS schema suitable for loading the object_to_validate.
|
|
145
|
+
Optional columns that are not present in the data are omitted.
|
|
146
|
+
This approach is only suitable to enable the test fixtures.
|
|
147
|
+
"""
|
|
148
|
+
polars_schema = {}
|
|
149
|
+
|
|
150
|
+
if isinstance(object_to_validate, list):
|
|
151
|
+
object_to_validate = object_to_validate[0]
|
|
152
|
+
|
|
153
|
+
for column_name, column in cls.to_schema().columns.items():
|
|
154
|
+
dtype = column.properties["dtype"]
|
|
155
|
+
required = column.properties["required"]
|
|
156
|
+
|
|
157
|
+
if required or column_name in object_to_validate:
|
|
158
|
+
if dtype.type in [pl.Struct, pl.List]: # maybe use inline form directly here
|
|
159
|
+
inline_form = cls._INLINE_FORM.get(column_name, "not_inline")
|
|
160
|
+
if inline_form == "simple_dict":
|
|
161
|
+
polars_schema[column_name] = pl.Object # make this a struct and make the nested non-
|
|
162
|
+
elif inline_form == "not_inline":
|
|
163
|
+
polars_schema[column_name] = dtype.type
|
|
164
|
+
else:
|
|
165
|
+
nested_cls = cls.get_nested_range(column_name)
|
|
166
|
+
if inline_form == "inlined_dict":
|
|
167
|
+
if parser:
|
|
168
|
+
nested_schema = nested_cls.generate_polars_schema(
|
|
169
|
+
object_to_validate[column_name], parser
|
|
170
|
+
)
|
|
171
|
+
polars_schema[column_name] = pl.Struct(nested_schema)
|
|
172
|
+
else:
|
|
173
|
+
polars_schema[column_name] = pl.Struct
|
|
174
|
+
elif inline_form == "inlined_list_dict":
|
|
175
|
+
if parser:
|
|
176
|
+
nested_schema = nested_cls.generate_polars_schema(
|
|
177
|
+
object_to_validate[column_name], parser
|
|
178
|
+
)
|
|
179
|
+
polars_schema[column_name] = pl.List(pl.Struct(nested_schema))
|
|
180
|
+
else:
|
|
181
|
+
# transformed form
|
|
182
|
+
polars_schema[column_name] = pl.List
|
|
183
|
+
else:
|
|
184
|
+
polars_schema[column_name] = dtype.type
|
|
185
|
+
|
|
186
|
+
return polars_schema
|