linkml 1.9.4rc2__py3-none-any.whl → 1.9.5rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. linkml/cli/main.py +4 -0
  2. linkml/generators/__init__.py +2 -0
  3. linkml/generators/common/build.py +5 -20
  4. linkml/generators/common/template.py +289 -3
  5. linkml/generators/docgen.py +55 -10
  6. linkml/generators/erdiagramgen.py +9 -5
  7. linkml/generators/graphqlgen.py +32 -6
  8. linkml/generators/jsonldcontextgen.py +78 -12
  9. linkml/generators/jsonschemagen.py +29 -12
  10. linkml/generators/mermaidclassdiagramgen.py +21 -3
  11. linkml/generators/owlgen.py +4 -1
  12. linkml/generators/panderagen/dataframe_class.py +13 -0
  13. linkml/generators/panderagen/dataframe_field.py +50 -0
  14. linkml/generators/panderagen/linkml_pandera_validator.py +186 -0
  15. linkml/generators/panderagen/panderagen.py +22 -5
  16. linkml/generators/panderagen/panderagen_class_based/class.jinja2 +70 -13
  17. linkml/generators/panderagen/panderagen_class_based/custom_checks.jinja2 +27 -0
  18. linkml/generators/panderagen/panderagen_class_based/enums.jinja2 +3 -3
  19. linkml/generators/panderagen/panderagen_class_based/pandera.jinja2 +12 -2
  20. linkml/generators/panderagen/panderagen_class_based/slots.jinja2 +19 -17
  21. linkml/generators/panderagen/slot_generator_mixin.py +143 -16
  22. linkml/generators/panderagen/transforms/__init__.py +19 -0
  23. linkml/generators/panderagen/transforms/collection_dict_model_transform.py +62 -0
  24. linkml/generators/panderagen/transforms/list_dict_model_transform.py +66 -0
  25. linkml/generators/panderagen/transforms/model_transform.py +8 -0
  26. linkml/generators/panderagen/transforms/nested_struct_model_transform.py +27 -0
  27. linkml/generators/panderagen/transforms/simple_dict_model_transform.py +86 -0
  28. linkml/generators/plantumlgen.py +17 -11
  29. linkml/generators/pydanticgen/pydanticgen.py +53 -2
  30. linkml/generators/pydanticgen/template.py +45 -233
  31. linkml/generators/pydanticgen/templates/attribute.py.jinja +1 -0
  32. linkml/generators/pydanticgen/templates/base_model.py.jinja +16 -2
  33. linkml/generators/pydanticgen/templates/imports.py.jinja +1 -1
  34. linkml/generators/rdfgen.py +11 -2
  35. linkml/generators/rustgen/__init__.py +3 -0
  36. linkml/generators/rustgen/build.py +94 -0
  37. linkml/generators/rustgen/cli.py +65 -0
  38. linkml/generators/rustgen/rustgen.py +1038 -0
  39. linkml/generators/rustgen/template.py +865 -0
  40. linkml/generators/rustgen/templates/Cargo.toml.jinja +42 -0
  41. linkml/generators/rustgen/templates/anything.rs.jinja +142 -0
  42. linkml/generators/rustgen/templates/as_key_value.rs.jinja +56 -0
  43. linkml/generators/rustgen/templates/class_module.rs.jinja +8 -0
  44. linkml/generators/rustgen/templates/enum.rs.jinja +54 -0
  45. linkml/generators/rustgen/templates/file.rs.jinja +62 -0
  46. linkml/generators/rustgen/templates/import.rs.jinja +4 -0
  47. linkml/generators/rustgen/templates/imports.rs.jinja +8 -0
  48. linkml/generators/rustgen/templates/poly.rs.jinja +9 -0
  49. linkml/generators/rustgen/templates/poly_containers.rs.jinja +439 -0
  50. linkml/generators/rustgen/templates/poly_trait.rs.jinja +15 -0
  51. linkml/generators/rustgen/templates/poly_trait_impl.rs.jinja +5 -0
  52. linkml/generators/rustgen/templates/poly_trait_impl_orsubtype.rs.jinja +5 -0
  53. linkml/generators/rustgen/templates/poly_trait_property.rs.jinja +8 -0
  54. linkml/generators/rustgen/templates/poly_trait_property_impl.rs.jinja +132 -0
  55. linkml/generators/rustgen/templates/poly_trait_property_match.rs.jinja +10 -0
  56. linkml/generators/rustgen/templates/property.rs.jinja +19 -0
  57. linkml/generators/rustgen/templates/pyproject.toml.jinja +10 -0
  58. linkml/generators/rustgen/templates/serde_utils.rs.jinja +310 -0
  59. linkml/generators/rustgen/templates/slot_range_as_union.rs.jinja +61 -0
  60. linkml/generators/rustgen/templates/struct.rs.jinja +75 -0
  61. linkml/generators/rustgen/templates/struct_or_subtype_enum.rs.jinja +108 -0
  62. linkml/generators/rustgen/templates/typealias.rs.jinja +13 -0
  63. linkml/generators/sqltablegen.py +18 -16
  64. linkml/generators/yarrrmlgen.py +157 -0
  65. linkml/linter/config/datamodel/config.py +160 -293
  66. linkml/linter/config/datamodel/config.yaml +34 -26
  67. linkml/linter/config/default.yaml +4 -0
  68. linkml/linter/config/recommended.yaml +4 -0
  69. linkml/linter/linter.py +1 -2
  70. linkml/linter/rules.py +37 -0
  71. linkml/utils/schemaloader.py +55 -3
  72. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/METADATA +1 -1
  73. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/RECORD +76 -38
  74. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/entry_points.txt +1 -0
  75. linkml/generators/panderagen/panderagen_class_based/mixins.jinja2 +0 -26
  76. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/WHEEL +0 -0
  77. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/licenses/LICENSE +0 -0
@@ -24,6 +24,7 @@ class GraphqlGenerator(Generator):
24
24
 
25
25
  strict_naming: bool = False
26
26
  _permissible_value_valid_characters = re.compile("^[_A-Za-z][_0-9A-Za-z]*?$")
27
+ _types_any = []
27
28
 
28
29
  def __post_init__(self):
29
30
  self.name_compatiblity = NameCompatibility(profile=NamingProfiles.graphql, do_not_fix=self.strict_naming)
@@ -39,6 +40,10 @@ class GraphqlGenerator(Generator):
39
40
  return out
40
41
 
41
42
  def visit_class(self, cls: ClassDefinition) -> str:
43
+ # no type can be declared for subtypes of "Any"
44
+ if cls.class_uri == "linkml:Any":
45
+ self._types_any.append(cls.name)
46
+ return f"scalar {cls.name}"
42
47
  etype = "interface" if (cls.abstract or cls.mixin) and not cls.mixins else "type"
43
48
  mixins = ", ".join([camelcase(mixin) for mixin in cls.mixins])
44
49
  out = f"{etype} {camelcase(cls.name)}" + (f" implements {mixins}" if mixins else "")
@@ -46,14 +51,35 @@ class GraphqlGenerator(Generator):
46
51
  return out
47
52
 
48
53
  def end_class(self, cls: ClassDefinition) -> str:
49
- return "\n }\n\n"
54
+ if cls.name in self._types_any:
55
+ return "\n\n"
56
+ else:
57
+ return "\n }\n\n"
50
58
 
51
59
  def visit_class_slot(self, cls: ClassDefinition, aliased_slot_name: str, slot: SlotDefinition) -> str:
52
- slotrange = (
53
- camelcase(slot.range)
54
- if slot.range in self.schema.classes or slot.range in self.schema.types or slot.range in self.schema.enums
55
- else "String"
56
- )
60
+ if slot.range in self.schema.classes or slot.range in self.schema.slots or slot.range in self.schema.enums:
61
+ slotrange = camelcase(slot.range)
62
+ elif slot.range in self.schema.types:
63
+ if self.schema.types[slot.range].from_schema != "https://w3id.org/linkml/types":
64
+ slotrange = camelcase(slot.range)
65
+ else:
66
+ graphql_scalars = ["Int", "Float", "String", "Boolean", "ID"]
67
+ if slot.range == "integer":
68
+ slotrange = "Int"
69
+ elif slot.range == "decimal":
70
+ slotrange = "Float"
71
+ elif camelcase(slot.range) in graphql_scalars:
72
+ slotrange = camelcase(slot.range)
73
+ else:
74
+ if self.schema.types[slot.range].repr:
75
+ python_type = self.schema.types[slot.range].repr
76
+ elif self.schema.types[slot.range].base:
77
+ python_type = self.schema.types[slot.range].base
78
+ if str(python_type) == "float":
79
+ slotrange = "Float"
80
+ elif str(python_type) == "str":
81
+ slotrange = "String"
82
+
57
83
  if slot.multivalued:
58
84
  slotrange = f"[{slotrange}]"
59
85
  if slot.required:
@@ -1,11 +1,12 @@
1
1
  """
2
2
  Generate JSON-LD contexts
3
-
4
3
  """
5
4
 
5
+ import json
6
6
  import os
7
7
  import re
8
8
  from dataclasses import dataclass, field
9
+ from pathlib import Path
9
10
  from typing import Any, Optional, Union
10
11
 
11
12
  import click
@@ -50,6 +51,12 @@ class ContextGenerator(Generator):
50
51
  prefixes: Optional[bool] = True
51
52
  flatprefixes: Optional[bool] = False
52
53
 
54
+ # Framing (opt-in via CLI flag)
55
+ emit_frame: bool = False
56
+ embed_context_in_frame: bool = False
57
+ frame_body: dict = field(default_factory=lambda: dict())
58
+ frame_root: Optional[str] = None
59
+
53
60
  def __post_init__(self) -> None:
54
61
  super().__post_init__()
55
62
  if self.namespaces is None:
@@ -121,10 +128,40 @@ class ContextGenerator(Generator):
121
128
  for k, v in self.slot_class_maps.items():
122
129
  context_content[k] = v
123
130
  context["@context"] = context_content
124
- if output:
131
+ if output and not self.embed_context_in_frame:
125
132
  with open(output, "w", encoding="UTF-8") as outf:
126
133
  outf.write(as_json(context))
127
134
 
135
+ if self.emit_frame and self.frame_body and output:
136
+ root_name = None
137
+ for cname, c in self.schema.classes.items():
138
+ if getattr(c, "tree_root", False):
139
+ root_name = cname
140
+ break
141
+ if root_name is None and self.schema.classes:
142
+ root_name = next(iter(self.schema.classes))
143
+
144
+ if self.embed_context_in_frame:
145
+ frame = {
146
+ "@context": context["@context"],
147
+ "@omitGraph": True,
148
+ }
149
+ else:
150
+ frame = {
151
+ "@context": Path(output).name,
152
+ "@omitGraph": True,
153
+ }
154
+ if root_name:
155
+ root_cls = self.schema.classes[root_name]
156
+ frame["@type"] = root_cls.class_uri or root_cls.name
157
+
158
+ for prop, rule in self.frame_body.items():
159
+ frame[prop] = rule
160
+
161
+ frame_path = Path(output).with_suffix(".frame.jsonld")
162
+ with open(frame_path, "w", encoding="UTF-8") as f:
163
+ json.dump(frame, f, indent=2, ensure_ascii=False)
164
+
128
165
  return str(as_json(context)) + "\n"
129
166
 
130
167
  def visit_class(self, cls: ClassDefinition) -> bool:
@@ -136,6 +173,10 @@ class ContextGenerator(Generator):
136
173
  if class_def:
137
174
  self.slot_class_maps[cn] = class_def
138
175
 
176
+ # prefer explicit tree_root for frame @type
177
+ if getattr(cls, "tree_root", False):
178
+ self.frame_root = cls.name
179
+
139
180
  # We don't bother to visit class slots - just all slots
140
181
  return True
141
182
 
@@ -146,13 +187,7 @@ class ContextGenerator(Generator):
146
187
  slot_def = {}
147
188
  if not slot.usage_slot_name:
148
189
  any_of_ranges = [any_of_el.range for any_of_el in slot.any_of]
149
- if slot.range in self.schema.classes:
150
- range_class_uri = self.schema.classes[slot.range].class_uri
151
- if range_class_uri and slot.inlined:
152
- slot_def["@type"] = range_class_uri
153
- else:
154
- slot_def["@type"] = "@id"
155
- elif any(rng in self.schema.classes for rng in any_of_ranges):
190
+ if slot.range in self.schema.classes or any(rng in self.schema.classes for rng in any_of_ranges):
156
191
  slot_def["@type"] = "@id"
157
192
  elif slot.range in self.schema.enums:
158
193
  slot_def["@context"] = ENUM_CONTEXT
@@ -174,7 +209,12 @@ class ContextGenerator(Generator):
174
209
  self._build_element_id(slot_def, slot.slot_uri)
175
210
  self.add_mappings(slot)
176
211
  if slot_def:
177
- self.context_body[underscore(aliased_slot_name)] = slot_def
212
+ key = underscore(aliased_slot_name)
213
+ self.context_body[key] = slot_def
214
+
215
+ # collect @embed only for object-valued slots (range is a class)
216
+ if slot.range in self.schema.classes and slot.inlined is not None:
217
+ self.frame_body[key] = {"@embed": "@always" if bool(slot.inlined) else "@never"}
178
218
 
179
219
  def _build_element_id(self, definition: Any, uri: str) -> None:
180
220
  """
@@ -227,10 +267,36 @@ class ContextGenerator(Generator):
227
267
  show_default=True,
228
268
  help="Emit non-JSON-LD compliant prefixes as an object (deprecated: use gen-prefix-map instead).",
229
269
  )
270
+ @click.option(
271
+ "--emit-frame/--no-emit-frame",
272
+ default=False,
273
+ show_default=True,
274
+ help="Also emit a <schema>.frame.jsonld file with @embed rules for framing",
275
+ )
276
+ @click.option(
277
+ "--embed-context-in-frame/--no-embed-context-in-frame",
278
+ default=False,
279
+ show_default=True,
280
+ help="Emit a <schema>.frame.jsonld file with @context embedded directly (single file)",
281
+ )
282
+ @click.option(
283
+ "-o",
284
+ "--output",
285
+ type=click.Path(),
286
+ help="Output file name",
287
+ )
230
288
  @click.version_option(__version__, "-V", "--version")
231
- def cli(yamlfile, **args):
289
+ def cli(yamlfile, emit_frame, embed_context_in_frame, output, **args):
232
290
  """Generate jsonld @context definition from LinkML model"""
233
- print(ContextGenerator(yamlfile, **args).serialize(**args))
291
+ if (emit_frame or embed_context_in_frame) and not output:
292
+ raise click.UsageError("--emit-frame/--embed-context-in-frame requires --output")
293
+ gen = ContextGenerator(yamlfile, **args)
294
+ if embed_context_in_frame:
295
+ gen.emit_frame = True
296
+ gen.embed_context_in_frame = True
297
+ else:
298
+ gen.emit_frame = emit_frame
299
+ print(gen.serialize(output=output, **args))
234
300
 
235
301
 
236
302
  if __name__ == "__main__":
@@ -49,13 +49,14 @@ json_schema_types: dict[str, tuple[str, Optional[str]]] = {
49
49
 
50
50
  class JsonSchema(dict):
51
51
  OPTIONAL_IDENTIFIER_SUFFIX = "__identifier_optional"
52
+ PRESERVE_NAMES: bool = False
52
53
 
53
54
  def __init__(self, *args, **kwargs):
54
55
  super().__init__(*args, **kwargs)
55
56
  self._lax_forward_refs = {}
56
57
 
57
58
  def add_def(self, name: str, subschema: "JsonSchema") -> None:
58
- canonical_name = camelcase(name)
59
+ canonical_name = name if self.PRESERVE_NAMES else camelcase(name)
59
60
 
60
61
  if "$defs" not in self:
61
62
  self["$defs"] = {}
@@ -78,7 +79,7 @@ class JsonSchema(dict):
78
79
  names = [names]
79
80
 
80
81
  for name in names:
81
- canonical_name = camelcase(name)
82
+ canonical_name = name if self.PRESERVE_NAMES else camelcase(name)
82
83
 
83
84
  if "$defs" not in self or canonical_name not in self["$defs"]:
84
85
  self._lax_forward_refs[canonical_name] = identifier_name
@@ -90,7 +91,7 @@ class JsonSchema(dict):
90
91
  def add_property(
91
92
  self, name: str, subschema: "JsonSchema", *, value_required: bool = False, value_disallowed: bool = False
92
93
  ) -> None:
93
- canonical_name = underscore(name)
94
+ canonical_name = name if self.PRESERVE_NAMES else underscore(name)
94
95
 
95
96
  if "properties" not in self:
96
97
  self["properties"] = {}
@@ -149,7 +150,7 @@ class JsonSchema(dict):
149
150
  @classmethod
150
151
  def ref_for(cls, class_name: Union[str, list[str]], identifier_optional: bool = False, required: bool = True):
151
152
  def _ref(class_name):
152
- def_name = camelcase(class_name)
153
+ def_name = class_name if cls.PRESERVE_NAMES else camelcase(class_name)
153
154
  def_suffix = cls.OPTIONAL_IDENTIFIER_SUFFIX if identifier_optional else ""
154
155
  return JsonSchema({"$ref": f"#/$defs/{def_name}{def_suffix}"})
155
156
 
@@ -169,8 +170,8 @@ class JsonSchema(dict):
169
170
  return ref
170
171
 
171
172
  @classmethod
172
- def array_of(cls, subschema: "JsonSchema", required: bool = True) -> "JsonSchema":
173
- if required:
173
+ def array_of(cls, subschema: "JsonSchema", include_null: bool, required: bool = True) -> "JsonSchema":
174
+ if required or not include_null:
174
175
  typ = "array"
175
176
  else:
176
177
  typ = ["array", "null"]
@@ -265,6 +266,9 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
265
266
  include_null: bool = True
266
267
  """Whether to include a "null" type in optional slots"""
267
268
 
269
+ preserve_names: bool = False
270
+ """If true, preserve LinkML element names in JSON Schema output (e.g., for $defs, properties, $ref targets)."""
271
+
268
272
  def __post_init__(self):
269
273
  if self.topClass:
270
274
  logger.warning("topClass is deprecated - use top_class")
@@ -272,6 +276,9 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
272
276
 
273
277
  super().__post_init__()
274
278
 
279
+ # Set the class variable for JsonSchema to use
280
+ JsonSchema.PRESERVE_NAMES = self.preserve_names
281
+
275
282
  if self.top_class:
276
283
  if self.schemaview.get_class(self.top_class) is None:
277
284
  logger.warning(f"No class in schema named {self.top_class}")
@@ -372,9 +379,13 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
372
379
 
373
380
  self.top_level_schema.add_def(cls.name, class_subschema)
374
381
 
375
- if (self.top_class is not None and camelcase(self.top_class) == camelcase(cls.name)) or (
376
- self.top_class is None and cls.tree_root
377
- ):
382
+ if (
383
+ self.top_class is not None
384
+ and (
385
+ (self.preserve_names and self.top_class == cls.name)
386
+ or (not self.preserve_names and camelcase(self.top_class) == camelcase(cls.name))
387
+ )
388
+ ) or (self.top_class is None and cls.tree_root):
378
389
  for key, value in class_subschema.items():
379
390
  # check this first to ensure we don't overwrite things like additionalProperties
380
391
  # or description on the root. But we do want to copy over properties, required,
@@ -534,7 +545,7 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
534
545
  "additionalProperties": True,
535
546
  }
536
547
  )
537
- return JsonSchema.array_of(prop, required=slot.required)
548
+ return JsonSchema.array_of(prop, include_null, required=slot.required)
538
549
  slot_is_multivalued = "multivalued" in slot and slot.multivalued
539
550
  slot_is_inlined = self.schemaview.is_inlined(slot)
540
551
  slot_is_boolean = any([slot.any_of, slot.all_of, slot.exactly_one_of, slot.none_of])
@@ -579,7 +590,7 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
579
590
  prop = JsonSchema({"type": typ, "additionalProperties": additionalProps})
580
591
  self.top_level_schema.add_lax_def(reference, self.aliased_slot_name(range_id_slot))
581
592
  else:
582
- prop = JsonSchema.array_of(JsonSchema.ref_for(reference), required=slot.required)
593
+ prop = JsonSchema.array_of(JsonSchema.ref_for(reference), include_null, required=slot.required)
583
594
  else:
584
595
  prop = JsonSchema.ref_for(reference, required=slot.required or not include_null)
585
596
 
@@ -592,7 +603,7 @@ class JsonSchemaGenerator(Generator, LifecycleMixin):
592
603
  prop = JsonSchema({"type": typ, "format": fmt})
593
604
 
594
605
  if slot_is_multivalued:
595
- prop = JsonSchema.array_of(prop, required=slot.required)
606
+ prop = JsonSchema.array_of(prop, include_null, required=slot.required)
596
607
  else:
597
608
  # handle optionals - bools like any_of, etc. below as they call this method recursively
598
609
  if not slot.required and not slot_is_boolean and include_null:
@@ -763,6 +774,12 @@ YAML, and including it when necessary but not by default (e.g. in documentation
763
774
  show_default=True,
764
775
  help="If set, patterns will be materialized in the generated JSON Schema.",
765
776
  )
777
+ @click.option(
778
+ "--preserve-names/--normalize-names",
779
+ default=False,
780
+ show_default=True,
781
+ help="Preserve original LinkML names in JSON Schema output (e.g., for $defs, properties, $ref targets).",
782
+ )
766
783
  @click.version_option(__version__, "-V", "--version")
767
784
  def cli(yamlfile, **kwargs):
768
785
  """Generate JSON Schema representation of a LinkML model"""
@@ -8,6 +8,7 @@ from typing import Optional
8
8
  import click
9
9
  from jinja2 import Environment, FileSystemLoader
10
10
  from linkml_runtime.linkml_model.meta import Element, SlotDefinition
11
+ from linkml_runtime.utils.formatutils import camelcase, underscore
11
12
  from linkml_runtime.utils.schemaview import SchemaView
12
13
 
13
14
  from linkml.generators.docgen import DocGenerator
@@ -34,6 +35,7 @@ class MermaidClassDiagramGenerator(Generator):
34
35
  directory: Optional[str] = None # output directory with generated markdown files
35
36
  template_file: Optional[str] = None # custom/default jinja template for class diagrams
36
37
  classes: list[str] = field(default_factory=list) # optional subset of classes
38
+ preserve_names: bool = False # preserve original LinkML names in diagram output
37
39
 
38
40
  def __post_init__(self):
39
41
  super().__post_init__()
@@ -59,7 +61,7 @@ class MermaidClassDiagramGenerator(Generator):
59
61
  template_name = os.path.basename(self.template_file)
60
62
  loader = FileSystemLoader(template_folder)
61
63
  env = Environment(loader=loader)
62
- temp_doc_gen = DocGenerator(self.schema, mergeimports=self.mergeimports)
64
+ temp_doc_gen = DocGenerator(self.schema, mergeimports=self.mergeimports, preserve_names=self.preserve_names)
63
65
  temp_doc_gen.customize_environment(env)
64
66
 
65
67
  template = env.get_template(template_name)
@@ -74,7 +76,8 @@ class MermaidClassDiagramGenerator(Generator):
74
76
  for cn, class_def in class_items:
75
77
  self.logger.info(f"Generating Mermaid diagram for class: {cn}")
76
78
  rendered = template.render(gen=self, element=class_def, schemaview=self.schemaview)
77
- outfile = self.output_directory / f"{cn}.md"
79
+ filename = self.name(class_def) if self.preserve_names else cn
80
+ outfile = self.output_directory / f"{filename}.md"
78
81
  with open(outfile, "w", encoding="utf-8") as f:
79
82
  f.write(rendered)
80
83
 
@@ -88,7 +91,16 @@ class MermaidClassDiagramGenerator(Generator):
88
91
 
89
92
  def name(self, element: Element) -> str:
90
93
  """Returns the canonical name for an element."""
91
- return element.name
94
+ if element is None:
95
+ return ""
96
+ if self.preserve_names:
97
+ return element.name
98
+ elif type(element).class_name == "slot_definition":
99
+ return underscore(element.name)
100
+ elif type(element).class_name == "class_definition":
101
+ return camelcase(element.name)
102
+ else:
103
+ return camelcase(element.name)
92
104
 
93
105
  def link_mermaid(self, element):
94
106
  """Generates a link for the given element."""
@@ -121,6 +133,12 @@ class MermaidClassDiagramGenerator(Generator):
121
133
  help="One or more classes in the schema for which to generate diagrams. "
122
134
  "If omitted, diagrams for all classes are generated.",
123
135
  )
136
+ @click.option(
137
+ "--preserve-names/--normalize-names",
138
+ default=False,
139
+ show_default=True,
140
+ help="Preserve original LinkML names in Mermaid diagram output (e.g., for class names, slot names, file names).",
141
+ )
124
142
  @click.version_option(click.__version__, "-V", "--version")
125
143
  def cli(yamlfile, template_file, directory, classes, **args):
126
144
  logging.basicConfig(level=logging.INFO)
@@ -1250,7 +1250,10 @@ class OwlSchemaGenerator(Generator):
1250
1250
  if pv.meaning:
1251
1251
  return URIRef(self.schemaview.expand_curie(pv.meaning))
1252
1252
  else:
1253
- return URIRef(enum_uri + self.enum_iri_separator + pv.text.replace(" ", "+"))
1253
+ from urllib.parse import quote
1254
+
1255
+ encoded_text = quote(pv.text.strip(), safe="", encoding="utf-8")
1256
+ return URIRef(enum_uri + self.enum_iri_separator + encoded_text)
1254
1257
 
1255
1258
  def slot_owl_type(self, slot: SlotDefinition) -> URIRef:
1256
1259
  sv = self.schemaview
@@ -0,0 +1,13 @@
1
+ from linkml.generators.oocodegen import OOClass
2
+
3
+
4
+ class DataframeClass(OOClass):
5
+ """Serves as an adapter between the template that renders the form of the
6
+ dataframe schema and the LinkML model and schema view.
7
+
8
+ Currently a thin wrapper around OOClass
9
+ until the dataframe requirements are fully understood.
10
+ """
11
+
12
+ def identifier_key_slot(self):
13
+ return self.annotations.get("identifier_key_slot", None)
@@ -0,0 +1,50 @@
1
+ from contextlib import suppress
2
+
3
+ from linkml.generators.oocodegen import OOField
4
+
5
+
6
+ class DataframeField(OOField):
7
+ """Serves as an adapter between the template that renders the form of the
8
+ dataframe schema fields and the LinkML model and schema view.
9
+
10
+ Currently a thin wrapper around OOField
11
+ until the dataframe requirements are fully understood.
12
+ """
13
+
14
+ def inline_form(self):
15
+ return self.source_slot.annotations._get("inline_form", None)
16
+
17
+ def reference_class(self):
18
+ with suppress(AttributeError, KeyError):
19
+ return self.source_slot.annotations._get("reference_class", None)
20
+ return None
21
+
22
+ def maximum_value(self):
23
+ return self.source_slot.maximum_value
24
+
25
+ def minimum_value(self):
26
+ return self.source_slot.minimum_value
27
+
28
+ def pattern(self):
29
+ return self.source_slot.pattern
30
+
31
+ def minimum_cardinality(self):
32
+ return self.source_slot.minimum_cardinality
33
+
34
+ def maximum_cardinality(self):
35
+ return self.source_slot.maximum_cardinality
36
+
37
+ def permissible_values(self):
38
+ return self.source_slot.annotations._get("permissible_values", [])
39
+
40
+ def inline_details(self):
41
+ return self.source_slot.annotations._get("inline_details", None)
42
+
43
+ def required(self):
44
+ return self.source_slot.required
45
+
46
+ def identifier(self):
47
+ return self.source_slot.identifier
48
+
49
+ def description(self):
50
+ return self.source_slot.description
@@ -0,0 +1,186 @@
1
+ import inspect
2
+ from functools import wraps
3
+
4
+ import pandera
5
+ import polars as pl
6
+ from pandera.api.polars.types import PolarsData
7
+
8
+ from linkml.generators.panderagen.transforms import (
9
+ CollectionDictModelTransform,
10
+ ListDictModelTransform,
11
+ NestedStructModelTransform,
12
+ SimpleDictModelTransform,
13
+ )
14
+
15
+
16
+ def handle_validation_exceptions(func):
17
+ @wraps(func)
18
+ def wrapper(*args, **kwargs):
19
+ try:
20
+ return func(*args, **kwargs)
21
+ except pl.exceptions.PanicException:
22
+ data = args[2] if len(args) > 2 else kwargs.get("data")
23
+ return data.lazyframe.select(pl.lit(False))
24
+ except pandera.errors.SchemaError as e:
25
+ raise e
26
+ except Exception:
27
+ data = args[2] if len(args) > 2 else kwargs.get("data")
28
+ return data.lazyframe.select(pl.lit(False))
29
+
30
+ return wrapper
31
+
32
+
33
+ class LinkmlPanderaValidator:
34
+ @classmethod
35
+ def get_id_column_name(cls):
36
+ return cls._id_name
37
+
38
+ @classmethod
39
+ def _simple_dict_fields(cls, column_name):
40
+ details = cls._INLINE_DETAILS[column_name] # <-- THESE ARE GOING ON THE OUTER CLASS
41
+
42
+ return (details["id"], details["other"])
43
+
44
+ @classmethod
45
+ def _prepare_simple_dict(cls, data: PolarsData):
46
+ """Returns just the simple dict column transformed to an inlined list form
47
+
48
+ note that this method uses collect and iter_rows so is very inefficient
49
+ """
50
+ column_name = data.key
51
+ polars_schema = cls.get_nested_range(column_name).to_schema()
52
+ (id_column, other_column) = cls._simple_dict_fields(column_name)
53
+
54
+ simple_dict_transformer = SimpleDictModelTransform(polars_schema, id_column, other_column)
55
+
56
+ one_column_df = data.lazyframe.select(pl.col(column_name)).collect()
57
+
58
+ list_of_structs = [simple_dict_transformer.transform(e) for [e] in one_column_df.iter_rows()]
59
+
60
+ return pl.DataFrame(pl.Series(list_of_structs).alias(column_name))
61
+
62
+ @classmethod
63
+ @handle_validation_exceptions
64
+ def _check_simple_dict(cls, data: PolarsData):
65
+ """
66
+ The 'simple dict' format, in which the key serves as a local identifier is not a good match for a PolaRS
67
+ DataFrame. At present the format is
68
+ """
69
+ df = cls._prepare_simple_dict(data)
70
+
71
+ column_name = data.key
72
+
73
+ polars_schema = cls.get_nested_range(column_name).to_schema()
74
+ simple_transform = SimpleDictModelTransform(polars_schema, *cls._simple_dict_fields(column_name))
75
+ df = simple_transform.explode_unnest_dataframe(df, column_name)
76
+
77
+ nested_cls = cls.get_nested_range(column_name)
78
+ nested_cls.validate(df)
79
+ return data.lazyframe.select(pl.lit(True))
80
+
81
+ @classmethod
82
+ @handle_validation_exceptions
83
+ def _check_collection_struct(cls, data: PolarsData):
84
+ column_name = data.key
85
+ nested_cls = cls.get_nested_range(column_name)
86
+
87
+ df = CollectionDictModelTransform.prepare_dataframe(data, column_name, nested_cls)
88
+
89
+ collection_transform = CollectionDictModelTransform(nested_cls.to_schema(), nested_cls.get_id_column_name())
90
+ df = collection_transform.explode_unnest_dataframe(df, column_name)
91
+
92
+ nested_cls.validate(df)
93
+ return data.lazyframe.select(pl.lit(True))
94
+
95
+ @classmethod
96
+ @handle_validation_exceptions
97
+ def _check_nested_list_struct(cls, data: PolarsData):
98
+ """Use this in a custom check. Pass the nested model as pandera_model."""
99
+ column_name = data.key
100
+ nested_cls = cls.get_nested_range(column_name)
101
+
102
+ df = ListDictModelTransform.prepare_dataframe(data, column_name, nested_cls)
103
+
104
+ list_transform = ListDictModelTransform(nested_cls.to_schema())
105
+ df = list_transform.explode_unnest_dataframe(df, column_name, data)
106
+
107
+ nested_cls.validate(df)
108
+ return data.lazyframe.select(pl.lit(True))
109
+
110
+ @classmethod
111
+ @handle_validation_exceptions
112
+ def _check_nested_struct(cls, data: PolarsData):
113
+ """Use this in a custom check. Pass the nested model as pandera_model."""
114
+ column_name = data.key
115
+ nested_cls = cls.get_nested_range(column_name)
116
+
117
+ df = NestedStructModelTransform.prepare_dataframe(data, column_name, nested_cls)
118
+ nested_transform = NestedStructModelTransform(nested_cls.to_schema())
119
+ df = nested_transform.explode_unnest_dataframe(df, column_name)
120
+
121
+ nested_cls.validate(df)
122
+ return data.lazyframe.select(pl.lit(True))
123
+
124
+ @classmethod
125
+ def get_nested_range(cls, column_name):
126
+ """Resolve a nested class range at runtime.
127
+
128
+ Nested classes are not stored in the pandera schema,
129
+ but rather in the _NESTED_RANGES dictionary as strings.
130
+ """
131
+ nested_cls_name = cls._NESTED_RANGES[column_name]
132
+ shared_model_module = inspect.getmodule(cls)
133
+ nested_cls = getattr(shared_model_module, nested_cls_name)
134
+
135
+ return nested_cls
136
+
137
+ @classmethod
138
+ def generate_polars_schema_simple(cls):
139
+ # This is not nesting or list aware, so needs to be aligned with the other method
140
+ return pl.Struct({k: v.dtype.type for k, v in cls.to_schema().columns.items()})
141
+
142
+ @classmethod
143
+ def generate_polars_schema(cls, object_to_validate, parser=False) -> dict:
144
+ """Creates a nested PolaRS schema suitable for loading the object_to_validate.
145
+ Optional columns that are not present in the data are omitted.
146
+ This approach is only suitable to enable the test fixtures.
147
+ """
148
+ polars_schema = {}
149
+
150
+ if isinstance(object_to_validate, list):
151
+ object_to_validate = object_to_validate[0]
152
+
153
+ for column_name, column in cls.to_schema().columns.items():
154
+ dtype = column.properties["dtype"]
155
+ required = column.properties["required"]
156
+
157
+ if required or column_name in object_to_validate:
158
+ if dtype.type in [pl.Struct, pl.List]: # maybe use inline form directly here
159
+ inline_form = cls._INLINE_FORM.get(column_name, "not_inline")
160
+ if inline_form == "simple_dict":
161
+ polars_schema[column_name] = pl.Object # make this a struct and make the nested non-
162
+ elif inline_form == "not_inline":
163
+ polars_schema[column_name] = dtype.type
164
+ else:
165
+ nested_cls = cls.get_nested_range(column_name)
166
+ if inline_form == "inlined_dict":
167
+ if parser:
168
+ nested_schema = nested_cls.generate_polars_schema(
169
+ object_to_validate[column_name], parser
170
+ )
171
+ polars_schema[column_name] = pl.Struct(nested_schema)
172
+ else:
173
+ polars_schema[column_name] = pl.Struct
174
+ elif inline_form == "inlined_list_dict":
175
+ if parser:
176
+ nested_schema = nested_cls.generate_polars_schema(
177
+ object_to_validate[column_name], parser
178
+ )
179
+ polars_schema[column_name] = pl.List(pl.Struct(nested_schema))
180
+ else:
181
+ # transformed form
182
+ polars_schema[column_name] = pl.List
183
+ else:
184
+ polars_schema[column_name] = dtype.type
185
+
186
+ return polars_schema