linkml 1.9.4rc2__py3-none-any.whl → 1.9.5rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. linkml/cli/main.py +5 -1
  2. linkml/converter/__init__.py +0 -0
  3. linkml/generators/__init__.py +2 -0
  4. linkml/generators/common/build.py +5 -20
  5. linkml/generators/common/template.py +289 -3
  6. linkml/generators/docgen.py +55 -10
  7. linkml/generators/erdiagramgen.py +9 -5
  8. linkml/generators/graphqlgen.py +32 -6
  9. linkml/generators/jsonldcontextgen.py +78 -12
  10. linkml/generators/jsonschemagen.py +29 -12
  11. linkml/generators/mermaidclassdiagramgen.py +21 -3
  12. linkml/generators/owlgen.py +13 -2
  13. linkml/generators/panderagen/dataframe_class.py +13 -0
  14. linkml/generators/panderagen/dataframe_field.py +50 -0
  15. linkml/generators/panderagen/linkml_pandera_validator.py +186 -0
  16. linkml/generators/panderagen/panderagen.py +22 -5
  17. linkml/generators/panderagen/panderagen_class_based/class.jinja2 +70 -13
  18. linkml/generators/panderagen/panderagen_class_based/custom_checks.jinja2 +27 -0
  19. linkml/generators/panderagen/panderagen_class_based/enums.jinja2 +3 -3
  20. linkml/generators/panderagen/panderagen_class_based/pandera.jinja2 +12 -2
  21. linkml/generators/panderagen/panderagen_class_based/slots.jinja2 +19 -17
  22. linkml/generators/panderagen/slot_generator_mixin.py +143 -16
  23. linkml/generators/panderagen/transforms/__init__.py +19 -0
  24. linkml/generators/panderagen/transforms/collection_dict_model_transform.py +62 -0
  25. linkml/generators/panderagen/transforms/list_dict_model_transform.py +66 -0
  26. linkml/generators/panderagen/transforms/model_transform.py +8 -0
  27. linkml/generators/panderagen/transforms/nested_struct_model_transform.py +27 -0
  28. linkml/generators/panderagen/transforms/simple_dict_model_transform.py +86 -0
  29. linkml/generators/plantumlgen.py +17 -11
  30. linkml/generators/pydanticgen/pydanticgen.py +53 -2
  31. linkml/generators/pydanticgen/template.py +45 -233
  32. linkml/generators/pydanticgen/templates/attribute.py.jinja +1 -0
  33. linkml/generators/pydanticgen/templates/base_model.py.jinja +16 -2
  34. linkml/generators/pydanticgen/templates/imports.py.jinja +1 -1
  35. linkml/generators/rdfgen.py +11 -2
  36. linkml/generators/rustgen/__init__.py +3 -0
  37. linkml/generators/rustgen/build.py +97 -0
  38. linkml/generators/rustgen/cli.py +83 -0
  39. linkml/generators/rustgen/rustgen.py +1186 -0
  40. linkml/generators/rustgen/template.py +910 -0
  41. linkml/generators/rustgen/templates/Cargo.toml.jinja +42 -0
  42. linkml/generators/rustgen/templates/anything.rs.jinja +149 -0
  43. linkml/generators/rustgen/templates/as_key_value.rs.jinja +86 -0
  44. linkml/generators/rustgen/templates/class_module.rs.jinja +8 -0
  45. linkml/generators/rustgen/templates/enum.rs.jinja +70 -0
  46. linkml/generators/rustgen/templates/file.rs.jinja +75 -0
  47. linkml/generators/rustgen/templates/import.rs.jinja +4 -0
  48. linkml/generators/rustgen/templates/imports.rs.jinja +8 -0
  49. linkml/generators/rustgen/templates/lib_shim.rs.jinja +52 -0
  50. linkml/generators/rustgen/templates/poly.rs.jinja +9 -0
  51. linkml/generators/rustgen/templates/poly_containers.rs.jinja +439 -0
  52. linkml/generators/rustgen/templates/poly_trait.rs.jinja +15 -0
  53. linkml/generators/rustgen/templates/poly_trait_impl.rs.jinja +5 -0
  54. linkml/generators/rustgen/templates/poly_trait_impl_orsubtype.rs.jinja +5 -0
  55. linkml/generators/rustgen/templates/poly_trait_property.rs.jinja +8 -0
  56. linkml/generators/rustgen/templates/poly_trait_property_impl.rs.jinja +134 -0
  57. linkml/generators/rustgen/templates/poly_trait_property_match.rs.jinja +10 -0
  58. linkml/generators/rustgen/templates/property.rs.jinja +28 -0
  59. linkml/generators/rustgen/templates/pyproject.toml.jinja +10 -0
  60. linkml/generators/rustgen/templates/serde_utils.rs.jinja +490 -0
  61. linkml/generators/rustgen/templates/slot_range_as_union.rs.jinja +64 -0
  62. linkml/generators/rustgen/templates/struct.rs.jinja +81 -0
  63. linkml/generators/rustgen/templates/struct_or_subtype_enum.rs.jinja +111 -0
  64. linkml/generators/rustgen/templates/stub_gen.rs.jinja +71 -0
  65. linkml/generators/rustgen/templates/stub_utils.rs.jinja +76 -0
  66. linkml/generators/rustgen/templates/typealias.rs.jinja +13 -0
  67. linkml/generators/sqltablegen.py +18 -16
  68. linkml/generators/yarrrmlgen.py +173 -0
  69. linkml/linter/config/datamodel/config.py +160 -293
  70. linkml/linter/config/datamodel/config.yaml +34 -26
  71. linkml/linter/config/default.yaml +4 -0
  72. linkml/linter/config/recommended.yaml +4 -0
  73. linkml/linter/linter.py +1 -2
  74. linkml/linter/rules.py +37 -0
  75. linkml/utils/schema_builder.py +2 -0
  76. linkml/utils/schemaloader.py +55 -3
  77. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/METADATA +1 -1
  78. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/RECORD +82 -40
  79. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/entry_points.txt +2 -1
  80. linkml/generators/panderagen/panderagen_class_based/mixins.jinja2 +0 -26
  81. /linkml/{utils/converter.py → converter/cli.py} +0 -0
  82. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/WHEEL +0 -0
  83. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/licenses/LICENSE +0 -0
@@ -1,31 +1,156 @@
1
1
  import logging
2
+ from typing import Optional
2
3
 
3
- from linkml.generators.oocodegen import OOField
4
+ from linkml_runtime.linkml_model.meta import ClassDefinitionName, SlotDefinition
5
+
6
+ from linkml.utils.helpers import get_range_associated_slots
7
+
8
+ from .dataframe_field import DataframeField
4
9
 
5
10
  logger = logging.getLogger(__file__)
6
11
 
7
12
 
8
13
  class SlotGeneratorMixin:
14
+ """
15
+ Prior to rendering the dataframe schema, this class provides
16
+ and adapter between the LinkML model and schema view
17
+ and the rendering engine.
18
+ """
19
+
9
20
  LINKML_ANY_CURIE = "linkml:Any"
21
+
22
+ # constants used to render the schema
23
+ # these will be moved to a dialect-specific place
10
24
  ANY_RANGE_STRING = "Object"
11
25
  CLASS_RANGE_STRING = "Struct"
26
+ SIMPLE_DICT_RANGE_STRING = "Struct"
12
27
  ENUM_RANGE_STRING = "Enum"
13
- DEFAULT_RANGE_STRING = "str"
14
28
 
15
- # to be implemented by the class
16
- def make_multivalued(self, range: str):
17
- raise NotImplementedError("please implement make multivalued in the class")
29
+ # association form flags used for rendering decisions
30
+ FORM_INLINED_DICT = "inlined_dict"
31
+ FORM_INLINED_LIST_DICT = "inlined_list_dict"
32
+ FORM_INLINED_COLLECTION_DICT = "inline_collection_dict"
33
+ FORM_INLINED_SIMPLE_DICT = "simple_dict"
34
+ FORM_MULTIVALUED_FOREIGN_KEY = "list_foreign_key"
35
+ FORM_FOREIGN_KEY = "foreign_key"
36
+ FORM_ERROR = "error"
37
+
38
+ # When nested inlining is done, the Pandera validator needs a specific range
39
+ INLINED_FORM_RANGE_PANDERA = {
40
+ FORM_INLINED_SIMPLE_DICT: SIMPLE_DICT_RANGE_STRING,
41
+ FORM_INLINED_LIST_DICT: CLASS_RANGE_STRING,
42
+ FORM_INLINED_COLLECTION_DICT: CLASS_RANGE_STRING,
43
+ FORM_INLINED_DICT: CLASS_RANGE_STRING,
44
+ FORM_ERROR: None,
45
+ }
46
+
47
+ def is_multivalued(self, slot):
48
+ return "multivalued" in slot and slot.multivalued is True
49
+
50
+ _INTERNAL_INLINED_FORM = {
51
+ # INLINED, INLINED_AS_LIST, MULTIVALUED,
52
+ (False, False, False): FORM_FOREIGN_KEY,
53
+ (False, False, True): FORM_MULTIVALUED_FOREIGN_KEY,
54
+ (False, True, False): FORM_INLINED_LIST_DICT,
55
+ (False, True, True): FORM_INLINED_LIST_DICT,
56
+ (True, False, False): FORM_INLINED_DICT,
57
+ (True, False, True): FORM_INLINED_COLLECTION_DICT,
58
+ (True, None, True): FORM_INLINED_DICT,
59
+ (True, True, False): FORM_INLINED_DICT,
60
+ (True, True, True): FORM_INLINED_LIST_DICT,
61
+ }
62
+
63
+ def get_identifier_or_key_slot(self, cn: ClassDefinitionName) -> Optional[SlotDefinition]:
64
+ sv = self.schemaview
65
+ id_slot = sv.get_identifier_slot(cn)
66
+ if id_slot:
67
+ return id_slot
68
+ else:
69
+ for s in sv.class_induced_slots(cn):
70
+ if s.key:
71
+ return s
72
+ return None
73
+
74
+ def calculate_inlined_form(self, slot: SlotDefinition) -> str:
75
+ is_multivalued = self.is_multivalued(slot)
76
+ internal_inlined_form_key = ((slot.inlined is True), (slot.inlined_as_list is True), is_multivalued)
77
+ logger.info(f"Inlined form key: {internal_inlined_form_key}")
78
+ internal_inlined_form = self._INTERNAL_INLINED_FORM.get(
79
+ internal_inlined_form_key, SlotGeneratorMixin.FORM_ERROR
80
+ )
81
+
82
+ if internal_inlined_form == SlotGeneratorMixin.FORM_INLINED_COLLECTION_DICT:
83
+ if self.get_identifier_or_key_slot(slot.range) is None:
84
+ internal_inlined_form = SlotGeneratorMixin.FORM_INLINED_LIST_DICT
85
+
86
+ if self.calculate_simple_dict(slot) is not None:
87
+ return SlotGeneratorMixin.FORM_INLINED_SIMPLE_DICT
88
+
89
+ return internal_inlined_form
90
+
91
+ def calculate_simple_dict(self, slot: SlotDefinition):
92
+ """slot is the container for the simple dict slot"""
93
+
94
+ (_, range_simple_dict_value_slot, _) = get_range_associated_slots(self.schemaview, slot.range)
95
+
96
+ return range_simple_dict_value_slot
18
97
 
19
- def handle_none_slot(self, slot, range: str) -> str:
98
+ def handle_none_slot(self, slot) -> str:
20
99
  range = self.schema.default_range # need to figure this out, set at the beginning?
21
100
  if range is None:
22
- range = SlotGeneratorMixin.DEFAULT_RANGE_STRING
101
+ range = "str"
23
102
 
24
103
  return range
25
104
 
26
105
  def handle_class_slot(self, slot, range: str) -> str:
27
- logger.warning(f"PanderaGen does not support class range slots. Using Struct {slot.name}")
28
- return SlotGeneratorMixin.CLASS_RANGE_STRING
106
+ range_info = self.schemaview.all_classes().get(range)
107
+
108
+ if range_info["class_uri"] == SlotGeneratorMixin.LINKML_ANY_CURIE:
109
+ range = SlotGeneratorMixin.ANY_RANGE_STRING
110
+ else:
111
+ inlined_form = self.calculate_inlined_form(slot)
112
+
113
+ if inlined_form == SlotGeneratorMixin.FORM_INLINED_COLLECTION_DICT:
114
+ logger.warning(
115
+ f"Slot {slot.name} uses inlined dictionary form,"
116
+ "which may be less efficient than inlined as list form with the current implementation."
117
+ )
118
+ elif inlined_form == SlotGeneratorMixin.FORM_INLINED_SIMPLE_DICT:
119
+ logger.warning(
120
+ f"Slot {slot.name} uses inlined simple dictionary form. Support is incomplete "
121
+ "and performance is less efficient than inlined as list form with the current implementation."
122
+ )
123
+
124
+ if inlined_form in (SlotGeneratorMixin.FORM_MULTIVALUED_FOREIGN_KEY, SlotGeneratorMixin.FORM_FOREIGN_KEY):
125
+ logger.warning(f"Foreign key not implemented for slot {slot.name}")
126
+ range = f"ID_TYPES['{self.get_class_name(range)}']"
127
+ else:
128
+ # TODO: make these setters
129
+ slot.annotations["reference_class"] = self.get_class_name(range)
130
+ slot.annotations["inline_form"] = inlined_form
131
+
132
+ range = SlotGeneratorMixin.INLINED_FORM_RANGE_PANDERA[inlined_form]
133
+
134
+ if inlined_form == SlotGeneratorMixin.FORM_INLINED_SIMPLE_DICT:
135
+ self.set_simple_dict_inline_details_annotation(slot)
136
+ elif inlined_form in [SlotGeneratorMixin.FORM_INLINED_LIST_DICT]:
137
+ range = self.make_multivalued(range)
138
+
139
+ return range
140
+
141
+ def set_simple_dict_inline_details_annotation(self, slot):
142
+ """Extra metadata is to help with the simple dict case"""
143
+ (range_id_slot, range_simple_dict_value_slot, _) = get_range_associated_slots( # range_required_slots,
144
+ self.schemaview, slot.range
145
+ )
146
+
147
+ simple_dict_id = range_id_slot.name
148
+ other_slot = range_simple_dict_value_slot.name
149
+ slot.annotations["inline_details"] = {"id": simple_dict_id, "other": other_slot}
150
+
151
+ def handle_non_inlined_class_slot(self, slot, range: str) -> str:
152
+ """non-inlined class slots have been temporarily removed but this will be needed to support them"""
153
+ return f"ID_TYPES['{self.get_class_name(range)}']"
29
154
 
30
155
  def handle_type_slot(self, slot, range: str) -> str:
31
156
  del slot # unused for now
@@ -43,9 +168,10 @@ class SlotGeneratorMixin:
43
168
  return range
44
169
 
45
170
  def handle_multivalued_slot(self, slot, range: str) -> str:
46
- if slot.multivalued:
47
- if slot.inlined_as_list and range != SlotGeneratorMixin.CLASS_RANGE_STRING:
48
- range = self.make_multivalued(range)
171
+ if (slot.inlined_as_list is True and self.is_multivalued(slot)) or (
172
+ slot.inlined is True and slot.inlined_as_list is True and self.is_multivalued(slot)
173
+ ):
174
+ range = self.make_multivalued(range)
49
175
 
50
176
  return range
51
177
 
@@ -58,19 +184,20 @@ class SlotGeneratorMixin:
58
184
  safe_sn = self.get_slot_name(slot.alias)
59
185
 
60
186
  if range is None:
61
- range = self.handle_none_slot(slot, range)
187
+ range = self.handle_none_slot(slot)
62
188
  elif range in self.schemaview.all_classes():
63
189
  range = self.handle_class_slot(slot, range)
64
190
  elif range in self.schemaview.all_types():
65
191
  range = self.handle_type_slot(slot, range)
192
+ if self.is_multivalued(slot):
193
+ range = self.make_multivalued(range)
66
194
  elif range in self.schemaview.all_enums():
67
195
  range = self.handle_enum_slot(slot, range)
196
+ range = self.handle_multivalued_slot(slot, range)
68
197
  else:
69
198
  raise Exception(f"Unknown range {range}")
70
199
 
71
- range = self.handle_multivalued_slot(slot, range)
72
-
73
- return OOField(
200
+ return DataframeField(
74
201
  name=safe_sn,
75
202
  source_slot=slot,
76
203
  range=range,
@@ -0,0 +1,19 @@
1
+ """Transform classes for LinkML Pandera validation.
2
+
3
+ This module provides transform classes that convert LinkML inline formats
4
+ into forms suitable for Polars DataFrame validation with Pandera models.
5
+ """
6
+
7
+ from .collection_dict_model_transform import CollectionDictModelTransform
8
+ from .list_dict_model_transform import ListDictModelTransform
9
+ from .model_transform import ModelTransform
10
+ from .nested_struct_model_transform import NestedStructModelTransform
11
+ from .simple_dict_model_transform import SimpleDictModelTransform
12
+
13
+ __all__ = [
14
+ "ModelTransform",
15
+ "SimpleDictModelTransform",
16
+ "CollectionDictModelTransform",
17
+ "ListDictModelTransform",
18
+ "NestedStructModelTransform",
19
+ ]
@@ -0,0 +1,62 @@
1
+ import polars as pl
2
+
3
+ from .model_transform import ModelTransform
4
+
5
+
6
+ class CollectionDictModelTransform(ModelTransform):
7
+ """This class assists in converting a LinkML 'collection dict' inline column
8
+ into a form that is better for representing in a PolaRS dataframe and
9
+ validating with a Pandera model.
10
+ """
11
+
12
+ def __init__(self, polars_schema, id_col):
13
+ self.polars_schema = polars_schema
14
+ """A polars schema representing a collection dict column"""
15
+
16
+ self.id_col = id_col
17
+ """The ID column in the sense of a LinkML inline collection dict"""
18
+
19
+ def transform(self, linkml_collection_dict):
20
+ """Converts a collection dict nested column to a list of dicts.
21
+ { 'A': {...}, 'B': {...}, ... } -> [{'id': 'A', ...}, {'id': 'B', ...}, ...]
22
+ """
23
+ return self._collection_dict_to_list_of_structs(linkml_collection_dict)
24
+
25
+ def _collection_dict_to_list_of_structs(self, linkml_collection_dict):
26
+ """Converts a collection dict nested column to a list of dicts.
27
+ { 'A': {...}, 'B': {...}, ... } -> [{'id': 'A', ...}, {'id': 'B', ...}, ...]
28
+
29
+ An inefficient conversion (relative to native PolaRS operations)
30
+ from a collection dict form to a dataframe struct column.
31
+
32
+ linkml_collection_dict : dict
33
+ A single row entry in a dataframe column (one cell), which itself is a dict.
34
+ The value entries are dicts that get the key added as an id field.
35
+ """
36
+ arr = []
37
+ for k, v in linkml_collection_dict.items():
38
+ if k not in v:
39
+ v[self.id_col] = k
40
+ arr.append(v)
41
+ return arr
42
+
43
+ @classmethod
44
+ def prepare_dataframe(cls, data, column_name, nested_cls):
45
+ """Returns just the collection dict column transformed to an inlined list form
46
+
47
+ note that this method uses collect and iter_rows so is very inefficient
48
+ """
49
+ id_column = nested_cls.get_id_column_name()
50
+ polars_schema = nested_cls.to_schema()
51
+
52
+ collection_dict_transformer = cls(polars_schema, id_column)
53
+
54
+ one_column_df = data.lazyframe.select(pl.col(column_name)).collect()
55
+
56
+ list_of_structs = [collection_dict_transformer.transform(e) for [e] in one_column_df.iter_rows()]
57
+
58
+ return pl.DataFrame(pl.Series(list_of_structs).alias(column_name))
59
+
60
+ def explode_unnest_dataframe(self, df, column_name):
61
+ """Filter, explode and unnest for collection dict."""
62
+ return df.lazy().filter(pl.col(column_name).list.len() > 0).explode(column_name).unnest(column_name).collect()
@@ -0,0 +1,66 @@
1
+ import polars as pl
2
+
3
+ from .model_transform import ModelTransform
4
+
5
+
6
+ class ListDictModelTransform(ModelTransform):
7
+ """This class assists in converting a LinkML 'list dict' inline column
8
+ into a form that is better for representing in a PolaRS dataframe and
9
+ validating with a Pandera model.
10
+ """
11
+
12
+ def __init__(self, polars_schema):
13
+ self.polars_schema = polars_schema
14
+ """A polars schema representing a list dict column"""
15
+
16
+ def transform(self, linkml_list_dict):
17
+ """Transforms a list dict nested column.
18
+ This is a pass-through since list dicts are already in the correct format.
19
+ """
20
+ return linkml_list_dict
21
+
22
+ @classmethod
23
+ def unnest_list_struct(cls, column_name: str, df):
24
+ """Use this in a custom check. Pass the nested model as pandera_model."""
25
+
26
+ # fmt: off
27
+ unnested_column = (
28
+ df
29
+ .select(column_name)
30
+ .filter(pl.col(column_name).list.len() > 0) # see: https://github.com/pola-rs/polars/issues/14381
31
+ .explode(column_name)
32
+ .unnest(column_name)
33
+ )
34
+ # fmt: on
35
+
36
+ return unnested_column
37
+
38
+ @classmethod
39
+ def prepare_dataframe(cls, data, column_name, nested_cls):
40
+ """Returns just the list dict column transformed to an inlined list form
41
+
42
+ note that this method uses collect and iter_rows so is very inefficient
43
+ """
44
+ polars_schema = nested_cls.to_schema()
45
+
46
+ list_dict_transformer = cls(polars_schema)
47
+
48
+ one_column_df = data.lazyframe.select(pl.col(column_name)).collect()
49
+
50
+ list_of_structs = [list_dict_transformer.transform(e) for [e] in one_column_df.iter_rows()]
51
+
52
+ return pl.DataFrame(pl.Series(list_of_structs).alias(column_name))
53
+
54
+ def explode_unnest_dataframe(self, df, column_name, data=None):
55
+ """Filter, explode and unnest for list dict with struct fallback."""
56
+ try:
57
+ return (
58
+ df.lazy().filter(pl.col(column_name).list.len() > 0).explode(column_name).unnest(column_name).collect()
59
+ )
60
+ except (pl.exceptions.PanicException, Exception):
61
+ if data:
62
+ from .nested_struct_model_transform import NestedStructModelTransform
63
+
64
+ nested_transform = NestedStructModelTransform(self.polars_schema)
65
+ return nested_transform.explode_unnest_dataframe(data.lazyframe, column_name)
66
+ raise
@@ -0,0 +1,8 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class ModelTransform(ABC):
5
+ @abstractmethod
6
+ def explode_unnest_dataframe(self, df, column_name):
7
+ """Abstract method for exploding and unnesting dataframes."""
8
+ pass
@@ -0,0 +1,27 @@
1
+ from .model_transform import ModelTransform
2
+
3
+
4
+ class NestedStructModelTransform(ModelTransform):
5
+ """This class assists in converting a LinkML 'nested struct' inline column
6
+ into a form that is better for representing in a PolaRS dataframe and
7
+ validating with a Pandera model.
8
+ """
9
+
10
+ def __init__(self, polars_schema):
11
+ self.polars_schema = polars_schema
12
+ """A polars schema representing a nested struct column"""
13
+
14
+ def transform(self, linkml_nested_struct):
15
+ """Transforms a nested struct column.
16
+ This is a pass-through since nested structs are already in the correct format.
17
+ """
18
+ return linkml_nested_struct
19
+
20
+ def explode_unnest_dataframe(self, df, column_name):
21
+ """Unnest for nested struct."""
22
+ return df.lazy().select(column_name).unnest(column_name).collect()
23
+
24
+ @classmethod
25
+ def prepare_dataframe(cls, data, column_name, nested_cls):
26
+ """Returns the nested struct column as-is since no transformation needed"""
27
+ return data.lazyframe.collect()
@@ -0,0 +1,86 @@
1
+ import polars as pl
2
+
3
+ from .model_transform import ModelTransform
4
+
5
+
6
+ class SimpleDictModelTransform(ModelTransform):
7
+ """This class assists in converting a LinkML 'simple dict' inline column
8
+ into a form that is better for representing in a PolaRS dataframe and
9
+ validating with a Pandera model.
10
+ """
11
+
12
+ def __init__(self, polars_schema, id_col, other_col):
13
+ self.polars_schema = polars_schema
14
+ """A polars schema representing a simple dict column"""
15
+
16
+ self.id_col = id_col
17
+ """The ID column in the sense of a LinkML inline simple dict"""
18
+
19
+ self.other_col = other_col
20
+ """The 'other' column in the sense of a LinkML inline simple dict"""
21
+
22
+ self.id_col_type = None
23
+ self.other_col_type = None
24
+ self.polars_struct = self._build_polars_struct()
25
+ """A pl.Struct representing the schema of the other range."""
26
+
27
+ def _build_polars_struct_simple(self):
28
+ """Handles the two column (id, other) form of the simple dict"""
29
+ self.id_col_type = self.polars_schema.columns[self.id_col].dtype.type
30
+ self.other_col_type = self.polars_schema.columns[self.other_col].dtype.type
31
+
32
+ return pl.Struct({self.id_col: self.id_col_type, self.other_col: self.other_col_type})
33
+
34
+ def _build_polars_struct_complex(self):
35
+ """Handles the non-two-column simple dict cases."""
36
+ struct_items = {}
37
+ for k, v in self.polars_schema.columns.items():
38
+ if v.dtype.type == pl.Object:
39
+ v.dtype.type = pl.Struct
40
+ else:
41
+ struct_items[k] = v.dtype.type
42
+ return pl.Struct(struct_items)
43
+
44
+ def _build_polars_struct(self):
45
+ if len(self.polars_schema.columns.keys()) == 2:
46
+ return self._build_polars_struct_simple()
47
+ else:
48
+ return self._build_polars_struct_complex()
49
+
50
+ def transform(self, linkml_simple_dict):
51
+ """Converts a simple dict nested column to a list of dicts.
52
+ { 'A': 1, 'B': 2, ... } -> [{'id': 'other': 1}, {'id': 'B', 'other': 2}, ...]
53
+ """
54
+ return self._simple_dict_to_list_of_structs(linkml_simple_dict)
55
+
56
+ def _simple_dict_to_list_of_structs(self, linkml_simple_dict):
57
+ """Converts a simple dict nested column to a list of dicts.
58
+ { 'A': 1, 'B': 2, ... } -> [{'id': 'other': 1}, {'id': 'B', 'other': 2}, ...]
59
+
60
+ An inefficient conversion (relative to native PolaRS operations)
61
+ from a simple dict form to a dataframe struct column.
62
+
63
+ e : dict
64
+ e is a single row entry in a dataframe column (one cell), which itself is a dict.
65
+ The value entries of e may also be dicts.
66
+ """
67
+ arr = []
68
+ for id_value, range_value in linkml_simple_dict.items():
69
+ if isinstance(range_value, dict) and (set(range_value.keys()) <= set(self.polars_schema.columns.keys())):
70
+ range_dict = range_value
71
+ range_dict[self.id_col] = id_value
72
+ for column_key in self.polars_schema.columns.keys():
73
+ if column_key not in range_dict:
74
+ range_dict[column_key] = None
75
+ else:
76
+ range_dict = {self.id_col: id_value, self.other_col: range_value}
77
+ arr.append(range_dict)
78
+
79
+ return arr
80
+
81
+ def list_dtype(self):
82
+ return pl.List(self.polars_struct)
83
+
84
+ def explode_unnest_dataframe(self, df, column_name):
85
+ """Explode and unnest for simple dict."""
86
+ return df.lazy().explode(column_name).unnest(column_name).collect()
@@ -37,6 +37,7 @@ class PlantumlGenerator(Generator):
37
37
  generatorversion = "0.1.1"
38
38
  valid_formats = ["puml", "plantuml", "png", "pdf", "jpg", "json", "svg"]
39
39
  visit_all_class_slots = False
40
+ preserve_names: bool = False
40
41
 
41
42
  referenced: Optional[set[ClassDefinitionName]] = None # List of classes that have to be emitted
42
43
  generated: Optional[set[ClassDefinitionName]] = None # List of classes that have been emitted
@@ -99,10 +100,9 @@ class PlantumlGenerator(Generator):
99
100
  return plantuml_url
100
101
  if directory:
101
102
  file_suffix = ".svg" if self.format == "puml" or self.format == "puml" else "." + self.format
102
- self.output_file_name = os.path.join(
103
- directory,
104
- camelcase(sorted(classes)[0] if classes else self.schema.name) + file_suffix,
105
- )
103
+ schema_name = sorted(classes)[0] if classes else self.schema.name
104
+ filename = schema_name if self.preserve_names else camelcase(schema_name)
105
+ self.output_file_name = os.path.join(directory, filename + file_suffix)
106
106
  resp = requests.get(plantuml_url, stream=True, timeout=REQUESTS_TIMEOUT)
107
107
  if resp.ok:
108
108
  with open(self.output_file_name, "wb") as f:
@@ -133,14 +133,14 @@ class PlantumlGenerator(Generator):
133
133
  for slot in self.filtered_cls_slots(cn, all_slots=True, filtr=lambda s: s.range not in self.schema.classes):
134
134
  if True or cn in slot.domain_of:
135
135
  mod = self.prop_modifier(cls, slot)
136
+ slot_name = (
137
+ self.aliased_slot_name(slot)
138
+ if self.preserve_names
139
+ else underscore(self.aliased_slot_name(slot))
140
+ )
141
+ range_name = slot.range if self.preserve_names else underscore(slot.range)
136
142
  slot_defs.append(
137
- " {field} "
138
- + underscore(self.aliased_slot_name(slot))
139
- + mod
140
- + " : "
141
- + underscore(slot.range)
142
- + " "
143
- + self.cardinality(slot)
143
+ " {field} " + slot_name + mod + " : " + range_name + " " + self.cardinality(slot)
144
144
  )
145
145
  self.class_generated.add(cn)
146
146
  self.referenced.add(cn)
@@ -359,6 +359,12 @@ class PlantumlGenerator(Generator):
359
359
  show_default=True,
360
360
  help="Print out Kroki URL calls instead of sending the real requests",
361
361
  )
362
+ @click.option(
363
+ "--preserve-names/--normalize-names",
364
+ default=False,
365
+ show_default=True,
366
+ help="Preserve original LinkML names in PlantUML diagram output (e.g., for class names, slot names, file names).",
367
+ )
362
368
  @click.version_option(__version__, "-V", "--version")
363
369
  def cli(yamlfile, **args):
364
370
  """Generate a UML representation of a LinkML model"""
@@ -1,4 +1,5 @@
1
1
  import inspect
2
+ import keyword
2
3
  import logging
3
4
  import os
4
5
  import re
@@ -94,7 +95,10 @@ DEFAULT_IMPORTS = (
94
95
  ObjectImport(name="ConfigDict"),
95
96
  ObjectImport(name="Field"),
96
97
  ObjectImport(name="RootModel"),
98
+ ObjectImport(name="SerializationInfo"),
99
+ ObjectImport(name="SerializerFunctionWrapHandler"),
97
100
  ObjectImport(name="field_validator"),
101
+ ObjectImport(name="model_serializer"),
98
102
  ],
99
103
  )
100
104
  )
@@ -143,6 +147,41 @@ DefinitionType = TypeVar("DefinitionType", bound=Union[SchemaDefinition, ClassDe
143
147
  TemplateType = TypeVar("TemplateType", bound=Union[PydanticModule, PydanticClass, PydanticAttribute])
144
148
 
145
149
 
150
+ def make_valid_python_identifier(name: str) -> str:
151
+ """
152
+ Convert a string to a valid Python identifier.
153
+
154
+ This is used when slot names contain characters that are not valid in Python
155
+ identifiers (e.g., '@id', '@type'). The original name can be preserved using
156
+ Pydantic field aliases.
157
+
158
+ Args:
159
+ name: The original name that may contain invalid characters
160
+
161
+ Returns:
162
+ A valid Python identifier that doesn't start with underscore (Pydantic restriction)
163
+ """
164
+ # Replace invalid characters with underscores
165
+ identifier = re.sub(r"[^a-zA-Z0-9_]", "_", name)
166
+
167
+ # Remove leading underscores (Pydantic doesn't allow field names starting with _)
168
+ identifier = identifier.lstrip("_")
169
+
170
+ # Ensure it doesn't start with a number
171
+ if identifier and identifier[0].isdigit():
172
+ identifier = f"field_{identifier}"
173
+
174
+ # Ensure it's not a keyword
175
+ if keyword.iskeyword(identifier):
176
+ identifier = f"{identifier}_"
177
+
178
+ # Ensure it's not empty
179
+ if not identifier:
180
+ identifier = "field"
181
+
182
+ return identifier
183
+
184
+
146
185
  @dataclass
147
186
  class PydanticGenerator(OOCodeGenerator, LifecycleMixin):
148
187
  """
@@ -461,7 +500,19 @@ class PydanticGenerator(OOCodeGenerator, LifecycleMixin):
461
500
  if getattr(slot, k, None) is not None
462
501
  }
463
502
  slot_alias = slot.alias if slot.alias else slot.name
464
- slot_args["name"] = underscore(slot_alias)
503
+
504
+ # Create a valid Python identifier for the field name
505
+ python_field_name = make_valid_python_identifier(underscore(slot_alias))
506
+ slot_args["name"] = python_field_name
507
+
508
+ # If the original name is different from the Python identifier, set an alias
509
+ if slot_alias != python_field_name:
510
+ slot_args["alias"] = slot_alias
511
+ else:
512
+ # Remove any existing alias if the names are the same
513
+ if "alias" in slot_args:
514
+ del slot_args["alias"]
515
+
465
516
  slot_args["description"] = slot.description.replace('"', '\\"') if slot.description is not None else None
466
517
  predef = self.predefined_slot_values.get(camelcase(cls.name), {}).get(slot.name, None)
467
518
  if predef is not None:
@@ -1241,7 +1292,7 @@ def cli(
1241
1292
  metadata_mode=meta,
1242
1293
  **args,
1243
1294
  )
1244
- print(gen.serialize())
1295
+ print(gen.serialize(), end="")
1245
1296
 
1246
1297
 
1247
1298
  if __name__ == "__main__":