linkml 1.9.4rc2__py3-none-any.whl → 1.9.5rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkml/cli/main.py +5 -1
- linkml/converter/__init__.py +0 -0
- linkml/generators/__init__.py +2 -0
- linkml/generators/common/build.py +5 -20
- linkml/generators/common/template.py +289 -3
- linkml/generators/docgen.py +55 -10
- linkml/generators/erdiagramgen.py +9 -5
- linkml/generators/graphqlgen.py +32 -6
- linkml/generators/jsonldcontextgen.py +78 -12
- linkml/generators/jsonschemagen.py +29 -12
- linkml/generators/mermaidclassdiagramgen.py +21 -3
- linkml/generators/owlgen.py +13 -2
- linkml/generators/panderagen/dataframe_class.py +13 -0
- linkml/generators/panderagen/dataframe_field.py +50 -0
- linkml/generators/panderagen/linkml_pandera_validator.py +186 -0
- linkml/generators/panderagen/panderagen.py +22 -5
- linkml/generators/panderagen/panderagen_class_based/class.jinja2 +70 -13
- linkml/generators/panderagen/panderagen_class_based/custom_checks.jinja2 +27 -0
- linkml/generators/panderagen/panderagen_class_based/enums.jinja2 +3 -3
- linkml/generators/panderagen/panderagen_class_based/pandera.jinja2 +12 -2
- linkml/generators/panderagen/panderagen_class_based/slots.jinja2 +19 -17
- linkml/generators/panderagen/slot_generator_mixin.py +143 -16
- linkml/generators/panderagen/transforms/__init__.py +19 -0
- linkml/generators/panderagen/transforms/collection_dict_model_transform.py +62 -0
- linkml/generators/panderagen/transforms/list_dict_model_transform.py +66 -0
- linkml/generators/panderagen/transforms/model_transform.py +8 -0
- linkml/generators/panderagen/transforms/nested_struct_model_transform.py +27 -0
- linkml/generators/panderagen/transforms/simple_dict_model_transform.py +86 -0
- linkml/generators/plantumlgen.py +17 -11
- linkml/generators/pydanticgen/pydanticgen.py +53 -2
- linkml/generators/pydanticgen/template.py +45 -233
- linkml/generators/pydanticgen/templates/attribute.py.jinja +1 -0
- linkml/generators/pydanticgen/templates/base_model.py.jinja +16 -2
- linkml/generators/pydanticgen/templates/imports.py.jinja +1 -1
- linkml/generators/rdfgen.py +11 -2
- linkml/generators/rustgen/__init__.py +3 -0
- linkml/generators/rustgen/build.py +97 -0
- linkml/generators/rustgen/cli.py +83 -0
- linkml/generators/rustgen/rustgen.py +1186 -0
- linkml/generators/rustgen/template.py +910 -0
- linkml/generators/rustgen/templates/Cargo.toml.jinja +42 -0
- linkml/generators/rustgen/templates/anything.rs.jinja +149 -0
- linkml/generators/rustgen/templates/as_key_value.rs.jinja +86 -0
- linkml/generators/rustgen/templates/class_module.rs.jinja +8 -0
- linkml/generators/rustgen/templates/enum.rs.jinja +70 -0
- linkml/generators/rustgen/templates/file.rs.jinja +75 -0
- linkml/generators/rustgen/templates/import.rs.jinja +4 -0
- linkml/generators/rustgen/templates/imports.rs.jinja +8 -0
- linkml/generators/rustgen/templates/lib_shim.rs.jinja +52 -0
- linkml/generators/rustgen/templates/poly.rs.jinja +9 -0
- linkml/generators/rustgen/templates/poly_containers.rs.jinja +439 -0
- linkml/generators/rustgen/templates/poly_trait.rs.jinja +15 -0
- linkml/generators/rustgen/templates/poly_trait_impl.rs.jinja +5 -0
- linkml/generators/rustgen/templates/poly_trait_impl_orsubtype.rs.jinja +5 -0
- linkml/generators/rustgen/templates/poly_trait_property.rs.jinja +8 -0
- linkml/generators/rustgen/templates/poly_trait_property_impl.rs.jinja +134 -0
- linkml/generators/rustgen/templates/poly_trait_property_match.rs.jinja +10 -0
- linkml/generators/rustgen/templates/property.rs.jinja +28 -0
- linkml/generators/rustgen/templates/pyproject.toml.jinja +10 -0
- linkml/generators/rustgen/templates/serde_utils.rs.jinja +490 -0
- linkml/generators/rustgen/templates/slot_range_as_union.rs.jinja +64 -0
- linkml/generators/rustgen/templates/struct.rs.jinja +81 -0
- linkml/generators/rustgen/templates/struct_or_subtype_enum.rs.jinja +111 -0
- linkml/generators/rustgen/templates/stub_gen.rs.jinja +71 -0
- linkml/generators/rustgen/templates/stub_utils.rs.jinja +76 -0
- linkml/generators/rustgen/templates/typealias.rs.jinja +13 -0
- linkml/generators/sqltablegen.py +18 -16
- linkml/generators/yarrrmlgen.py +173 -0
- linkml/linter/config/datamodel/config.py +160 -293
- linkml/linter/config/datamodel/config.yaml +34 -26
- linkml/linter/config/default.yaml +4 -0
- linkml/linter/config/recommended.yaml +4 -0
- linkml/linter/linter.py +1 -2
- linkml/linter/rules.py +37 -0
- linkml/utils/schema_builder.py +2 -0
- linkml/utils/schemaloader.py +55 -3
- {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/METADATA +1 -1
- {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/RECORD +82 -40
- {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/entry_points.txt +2 -1
- linkml/generators/panderagen/panderagen_class_based/mixins.jinja2 +0 -26
- /linkml/{utils/converter.py → converter/cli.py} +0 -0
- {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/WHEEL +0 -0
- {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,31 +1,156 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
|
-
from
|
|
4
|
+
from linkml_runtime.linkml_model.meta import ClassDefinitionName, SlotDefinition
|
|
5
|
+
|
|
6
|
+
from linkml.utils.helpers import get_range_associated_slots
|
|
7
|
+
|
|
8
|
+
from .dataframe_field import DataframeField
|
|
4
9
|
|
|
5
10
|
logger = logging.getLogger(__file__)
|
|
6
11
|
|
|
7
12
|
|
|
8
13
|
class SlotGeneratorMixin:
|
|
14
|
+
"""
|
|
15
|
+
Prior to rendering the dataframe schema, this class provides
|
|
16
|
+
and adapter between the LinkML model and schema view
|
|
17
|
+
and the rendering engine.
|
|
18
|
+
"""
|
|
19
|
+
|
|
9
20
|
LINKML_ANY_CURIE = "linkml:Any"
|
|
21
|
+
|
|
22
|
+
# constants used to render the schema
|
|
23
|
+
# these will be moved to a dialect-specific place
|
|
10
24
|
ANY_RANGE_STRING = "Object"
|
|
11
25
|
CLASS_RANGE_STRING = "Struct"
|
|
26
|
+
SIMPLE_DICT_RANGE_STRING = "Struct"
|
|
12
27
|
ENUM_RANGE_STRING = "Enum"
|
|
13
|
-
DEFAULT_RANGE_STRING = "str"
|
|
14
28
|
|
|
15
|
-
#
|
|
16
|
-
|
|
17
|
-
|
|
29
|
+
# association form flags used for rendering decisions
|
|
30
|
+
FORM_INLINED_DICT = "inlined_dict"
|
|
31
|
+
FORM_INLINED_LIST_DICT = "inlined_list_dict"
|
|
32
|
+
FORM_INLINED_COLLECTION_DICT = "inline_collection_dict"
|
|
33
|
+
FORM_INLINED_SIMPLE_DICT = "simple_dict"
|
|
34
|
+
FORM_MULTIVALUED_FOREIGN_KEY = "list_foreign_key"
|
|
35
|
+
FORM_FOREIGN_KEY = "foreign_key"
|
|
36
|
+
FORM_ERROR = "error"
|
|
37
|
+
|
|
38
|
+
# When nested inlining is done, the Pandera validator needs a specific range
|
|
39
|
+
INLINED_FORM_RANGE_PANDERA = {
|
|
40
|
+
FORM_INLINED_SIMPLE_DICT: SIMPLE_DICT_RANGE_STRING,
|
|
41
|
+
FORM_INLINED_LIST_DICT: CLASS_RANGE_STRING,
|
|
42
|
+
FORM_INLINED_COLLECTION_DICT: CLASS_RANGE_STRING,
|
|
43
|
+
FORM_INLINED_DICT: CLASS_RANGE_STRING,
|
|
44
|
+
FORM_ERROR: None,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def is_multivalued(self, slot):
|
|
48
|
+
return "multivalued" in slot and slot.multivalued is True
|
|
49
|
+
|
|
50
|
+
_INTERNAL_INLINED_FORM = {
|
|
51
|
+
# INLINED, INLINED_AS_LIST, MULTIVALUED,
|
|
52
|
+
(False, False, False): FORM_FOREIGN_KEY,
|
|
53
|
+
(False, False, True): FORM_MULTIVALUED_FOREIGN_KEY,
|
|
54
|
+
(False, True, False): FORM_INLINED_LIST_DICT,
|
|
55
|
+
(False, True, True): FORM_INLINED_LIST_DICT,
|
|
56
|
+
(True, False, False): FORM_INLINED_DICT,
|
|
57
|
+
(True, False, True): FORM_INLINED_COLLECTION_DICT,
|
|
58
|
+
(True, None, True): FORM_INLINED_DICT,
|
|
59
|
+
(True, True, False): FORM_INLINED_DICT,
|
|
60
|
+
(True, True, True): FORM_INLINED_LIST_DICT,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def get_identifier_or_key_slot(self, cn: ClassDefinitionName) -> Optional[SlotDefinition]:
|
|
64
|
+
sv = self.schemaview
|
|
65
|
+
id_slot = sv.get_identifier_slot(cn)
|
|
66
|
+
if id_slot:
|
|
67
|
+
return id_slot
|
|
68
|
+
else:
|
|
69
|
+
for s in sv.class_induced_slots(cn):
|
|
70
|
+
if s.key:
|
|
71
|
+
return s
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
def calculate_inlined_form(self, slot: SlotDefinition) -> str:
|
|
75
|
+
is_multivalued = self.is_multivalued(slot)
|
|
76
|
+
internal_inlined_form_key = ((slot.inlined is True), (slot.inlined_as_list is True), is_multivalued)
|
|
77
|
+
logger.info(f"Inlined form key: {internal_inlined_form_key}")
|
|
78
|
+
internal_inlined_form = self._INTERNAL_INLINED_FORM.get(
|
|
79
|
+
internal_inlined_form_key, SlotGeneratorMixin.FORM_ERROR
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if internal_inlined_form == SlotGeneratorMixin.FORM_INLINED_COLLECTION_DICT:
|
|
83
|
+
if self.get_identifier_or_key_slot(slot.range) is None:
|
|
84
|
+
internal_inlined_form = SlotGeneratorMixin.FORM_INLINED_LIST_DICT
|
|
85
|
+
|
|
86
|
+
if self.calculate_simple_dict(slot) is not None:
|
|
87
|
+
return SlotGeneratorMixin.FORM_INLINED_SIMPLE_DICT
|
|
88
|
+
|
|
89
|
+
return internal_inlined_form
|
|
90
|
+
|
|
91
|
+
def calculate_simple_dict(self, slot: SlotDefinition):
|
|
92
|
+
"""slot is the container for the simple dict slot"""
|
|
93
|
+
|
|
94
|
+
(_, range_simple_dict_value_slot, _) = get_range_associated_slots(self.schemaview, slot.range)
|
|
95
|
+
|
|
96
|
+
return range_simple_dict_value_slot
|
|
18
97
|
|
|
19
|
-
def handle_none_slot(self, slot
|
|
98
|
+
def handle_none_slot(self, slot) -> str:
|
|
20
99
|
range = self.schema.default_range # need to figure this out, set at the beginning?
|
|
21
100
|
if range is None:
|
|
22
|
-
range =
|
|
101
|
+
range = "str"
|
|
23
102
|
|
|
24
103
|
return range
|
|
25
104
|
|
|
26
105
|
def handle_class_slot(self, slot, range: str) -> str:
|
|
27
|
-
|
|
28
|
-
|
|
106
|
+
range_info = self.schemaview.all_classes().get(range)
|
|
107
|
+
|
|
108
|
+
if range_info["class_uri"] == SlotGeneratorMixin.LINKML_ANY_CURIE:
|
|
109
|
+
range = SlotGeneratorMixin.ANY_RANGE_STRING
|
|
110
|
+
else:
|
|
111
|
+
inlined_form = self.calculate_inlined_form(slot)
|
|
112
|
+
|
|
113
|
+
if inlined_form == SlotGeneratorMixin.FORM_INLINED_COLLECTION_DICT:
|
|
114
|
+
logger.warning(
|
|
115
|
+
f"Slot {slot.name} uses inlined dictionary form,"
|
|
116
|
+
"which may be less efficient than inlined as list form with the current implementation."
|
|
117
|
+
)
|
|
118
|
+
elif inlined_form == SlotGeneratorMixin.FORM_INLINED_SIMPLE_DICT:
|
|
119
|
+
logger.warning(
|
|
120
|
+
f"Slot {slot.name} uses inlined simple dictionary form. Support is incomplete "
|
|
121
|
+
"and performance is less efficient than inlined as list form with the current implementation."
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if inlined_form in (SlotGeneratorMixin.FORM_MULTIVALUED_FOREIGN_KEY, SlotGeneratorMixin.FORM_FOREIGN_KEY):
|
|
125
|
+
logger.warning(f"Foreign key not implemented for slot {slot.name}")
|
|
126
|
+
range = f"ID_TYPES['{self.get_class_name(range)}']"
|
|
127
|
+
else:
|
|
128
|
+
# TODO: make these setters
|
|
129
|
+
slot.annotations["reference_class"] = self.get_class_name(range)
|
|
130
|
+
slot.annotations["inline_form"] = inlined_form
|
|
131
|
+
|
|
132
|
+
range = SlotGeneratorMixin.INLINED_FORM_RANGE_PANDERA[inlined_form]
|
|
133
|
+
|
|
134
|
+
if inlined_form == SlotGeneratorMixin.FORM_INLINED_SIMPLE_DICT:
|
|
135
|
+
self.set_simple_dict_inline_details_annotation(slot)
|
|
136
|
+
elif inlined_form in [SlotGeneratorMixin.FORM_INLINED_LIST_DICT]:
|
|
137
|
+
range = self.make_multivalued(range)
|
|
138
|
+
|
|
139
|
+
return range
|
|
140
|
+
|
|
141
|
+
def set_simple_dict_inline_details_annotation(self, slot):
|
|
142
|
+
"""Extra metadata is to help with the simple dict case"""
|
|
143
|
+
(range_id_slot, range_simple_dict_value_slot, _) = get_range_associated_slots( # range_required_slots,
|
|
144
|
+
self.schemaview, slot.range
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
simple_dict_id = range_id_slot.name
|
|
148
|
+
other_slot = range_simple_dict_value_slot.name
|
|
149
|
+
slot.annotations["inline_details"] = {"id": simple_dict_id, "other": other_slot}
|
|
150
|
+
|
|
151
|
+
def handle_non_inlined_class_slot(self, slot, range: str) -> str:
|
|
152
|
+
"""non-inlined class slots have been temporarily removed but this will be needed to support them"""
|
|
153
|
+
return f"ID_TYPES['{self.get_class_name(range)}']"
|
|
29
154
|
|
|
30
155
|
def handle_type_slot(self, slot, range: str) -> str:
|
|
31
156
|
del slot # unused for now
|
|
@@ -43,9 +168,10 @@ class SlotGeneratorMixin:
|
|
|
43
168
|
return range
|
|
44
169
|
|
|
45
170
|
def handle_multivalued_slot(self, slot, range: str) -> str:
|
|
46
|
-
if slot.
|
|
47
|
-
|
|
48
|
-
|
|
171
|
+
if (slot.inlined_as_list is True and self.is_multivalued(slot)) or (
|
|
172
|
+
slot.inlined is True and slot.inlined_as_list is True and self.is_multivalued(slot)
|
|
173
|
+
):
|
|
174
|
+
range = self.make_multivalued(range)
|
|
49
175
|
|
|
50
176
|
return range
|
|
51
177
|
|
|
@@ -58,19 +184,20 @@ class SlotGeneratorMixin:
|
|
|
58
184
|
safe_sn = self.get_slot_name(slot.alias)
|
|
59
185
|
|
|
60
186
|
if range is None:
|
|
61
|
-
range = self.handle_none_slot(slot
|
|
187
|
+
range = self.handle_none_slot(slot)
|
|
62
188
|
elif range in self.schemaview.all_classes():
|
|
63
189
|
range = self.handle_class_slot(slot, range)
|
|
64
190
|
elif range in self.schemaview.all_types():
|
|
65
191
|
range = self.handle_type_slot(slot, range)
|
|
192
|
+
if self.is_multivalued(slot):
|
|
193
|
+
range = self.make_multivalued(range)
|
|
66
194
|
elif range in self.schemaview.all_enums():
|
|
67
195
|
range = self.handle_enum_slot(slot, range)
|
|
196
|
+
range = self.handle_multivalued_slot(slot, range)
|
|
68
197
|
else:
|
|
69
198
|
raise Exception(f"Unknown range {range}")
|
|
70
199
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
return OOField(
|
|
200
|
+
return DataframeField(
|
|
74
201
|
name=safe_sn,
|
|
75
202
|
source_slot=slot,
|
|
76
203
|
range=range,
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Transform classes for LinkML Pandera validation.
|
|
2
|
+
|
|
3
|
+
This module provides transform classes that convert LinkML inline formats
|
|
4
|
+
into forms suitable for Polars DataFrame validation with Pandera models.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .collection_dict_model_transform import CollectionDictModelTransform
|
|
8
|
+
from .list_dict_model_transform import ListDictModelTransform
|
|
9
|
+
from .model_transform import ModelTransform
|
|
10
|
+
from .nested_struct_model_transform import NestedStructModelTransform
|
|
11
|
+
from .simple_dict_model_transform import SimpleDictModelTransform
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"ModelTransform",
|
|
15
|
+
"SimpleDictModelTransform",
|
|
16
|
+
"CollectionDictModelTransform",
|
|
17
|
+
"ListDictModelTransform",
|
|
18
|
+
"NestedStructModelTransform",
|
|
19
|
+
]
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
|
|
3
|
+
from .model_transform import ModelTransform
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CollectionDictModelTransform(ModelTransform):
|
|
7
|
+
"""This class assists in converting a LinkML 'collection dict' inline column
|
|
8
|
+
into a form that is better for representing in a PolaRS dataframe and
|
|
9
|
+
validating with a Pandera model.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, polars_schema, id_col):
|
|
13
|
+
self.polars_schema = polars_schema
|
|
14
|
+
"""A polars schema representing a collection dict column"""
|
|
15
|
+
|
|
16
|
+
self.id_col = id_col
|
|
17
|
+
"""The ID column in the sense of a LinkML inline collection dict"""
|
|
18
|
+
|
|
19
|
+
def transform(self, linkml_collection_dict):
|
|
20
|
+
"""Converts a collection dict nested column to a list of dicts.
|
|
21
|
+
{ 'A': {...}, 'B': {...}, ... } -> [{'id': 'A', ...}, {'id': 'B', ...}, ...]
|
|
22
|
+
"""
|
|
23
|
+
return self._collection_dict_to_list_of_structs(linkml_collection_dict)
|
|
24
|
+
|
|
25
|
+
def _collection_dict_to_list_of_structs(self, linkml_collection_dict):
|
|
26
|
+
"""Converts a collection dict nested column to a list of dicts.
|
|
27
|
+
{ 'A': {...}, 'B': {...}, ... } -> [{'id': 'A', ...}, {'id': 'B', ...}, ...]
|
|
28
|
+
|
|
29
|
+
An inefficient conversion (relative to native PolaRS operations)
|
|
30
|
+
from a collection dict form to a dataframe struct column.
|
|
31
|
+
|
|
32
|
+
linkml_collection_dict : dict
|
|
33
|
+
A single row entry in a dataframe column (one cell), which itself is a dict.
|
|
34
|
+
The value entries are dicts that get the key added as an id field.
|
|
35
|
+
"""
|
|
36
|
+
arr = []
|
|
37
|
+
for k, v in linkml_collection_dict.items():
|
|
38
|
+
if k not in v:
|
|
39
|
+
v[self.id_col] = k
|
|
40
|
+
arr.append(v)
|
|
41
|
+
return arr
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def prepare_dataframe(cls, data, column_name, nested_cls):
|
|
45
|
+
"""Returns just the collection dict column transformed to an inlined list form
|
|
46
|
+
|
|
47
|
+
note that this method uses collect and iter_rows so is very inefficient
|
|
48
|
+
"""
|
|
49
|
+
id_column = nested_cls.get_id_column_name()
|
|
50
|
+
polars_schema = nested_cls.to_schema()
|
|
51
|
+
|
|
52
|
+
collection_dict_transformer = cls(polars_schema, id_column)
|
|
53
|
+
|
|
54
|
+
one_column_df = data.lazyframe.select(pl.col(column_name)).collect()
|
|
55
|
+
|
|
56
|
+
list_of_structs = [collection_dict_transformer.transform(e) for [e] in one_column_df.iter_rows()]
|
|
57
|
+
|
|
58
|
+
return pl.DataFrame(pl.Series(list_of_structs).alias(column_name))
|
|
59
|
+
|
|
60
|
+
def explode_unnest_dataframe(self, df, column_name):
|
|
61
|
+
"""Filter, explode and unnest for collection dict."""
|
|
62
|
+
return df.lazy().filter(pl.col(column_name).list.len() > 0).explode(column_name).unnest(column_name).collect()
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
|
|
3
|
+
from .model_transform import ModelTransform
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ListDictModelTransform(ModelTransform):
|
|
7
|
+
"""This class assists in converting a LinkML 'list dict' inline column
|
|
8
|
+
into a form that is better for representing in a PolaRS dataframe and
|
|
9
|
+
validating with a Pandera model.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, polars_schema):
|
|
13
|
+
self.polars_schema = polars_schema
|
|
14
|
+
"""A polars schema representing a list dict column"""
|
|
15
|
+
|
|
16
|
+
def transform(self, linkml_list_dict):
|
|
17
|
+
"""Transforms a list dict nested column.
|
|
18
|
+
This is a pass-through since list dicts are already in the correct format.
|
|
19
|
+
"""
|
|
20
|
+
return linkml_list_dict
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def unnest_list_struct(cls, column_name: str, df):
|
|
24
|
+
"""Use this in a custom check. Pass the nested model as pandera_model."""
|
|
25
|
+
|
|
26
|
+
# fmt: off
|
|
27
|
+
unnested_column = (
|
|
28
|
+
df
|
|
29
|
+
.select(column_name)
|
|
30
|
+
.filter(pl.col(column_name).list.len() > 0) # see: https://github.com/pola-rs/polars/issues/14381
|
|
31
|
+
.explode(column_name)
|
|
32
|
+
.unnest(column_name)
|
|
33
|
+
)
|
|
34
|
+
# fmt: on
|
|
35
|
+
|
|
36
|
+
return unnested_column
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def prepare_dataframe(cls, data, column_name, nested_cls):
|
|
40
|
+
"""Returns just the list dict column transformed to an inlined list form
|
|
41
|
+
|
|
42
|
+
note that this method uses collect and iter_rows so is very inefficient
|
|
43
|
+
"""
|
|
44
|
+
polars_schema = nested_cls.to_schema()
|
|
45
|
+
|
|
46
|
+
list_dict_transformer = cls(polars_schema)
|
|
47
|
+
|
|
48
|
+
one_column_df = data.lazyframe.select(pl.col(column_name)).collect()
|
|
49
|
+
|
|
50
|
+
list_of_structs = [list_dict_transformer.transform(e) for [e] in one_column_df.iter_rows()]
|
|
51
|
+
|
|
52
|
+
return pl.DataFrame(pl.Series(list_of_structs).alias(column_name))
|
|
53
|
+
|
|
54
|
+
def explode_unnest_dataframe(self, df, column_name, data=None):
|
|
55
|
+
"""Filter, explode and unnest for list dict with struct fallback."""
|
|
56
|
+
try:
|
|
57
|
+
return (
|
|
58
|
+
df.lazy().filter(pl.col(column_name).list.len() > 0).explode(column_name).unnest(column_name).collect()
|
|
59
|
+
)
|
|
60
|
+
except (pl.exceptions.PanicException, Exception):
|
|
61
|
+
if data:
|
|
62
|
+
from .nested_struct_model_transform import NestedStructModelTransform
|
|
63
|
+
|
|
64
|
+
nested_transform = NestedStructModelTransform(self.polars_schema)
|
|
65
|
+
return nested_transform.explode_unnest_dataframe(data.lazyframe, column_name)
|
|
66
|
+
raise
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from .model_transform import ModelTransform
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class NestedStructModelTransform(ModelTransform):
|
|
5
|
+
"""This class assists in converting a LinkML 'nested struct' inline column
|
|
6
|
+
into a form that is better for representing in a PolaRS dataframe and
|
|
7
|
+
validating with a Pandera model.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __init__(self, polars_schema):
|
|
11
|
+
self.polars_schema = polars_schema
|
|
12
|
+
"""A polars schema representing a nested struct column"""
|
|
13
|
+
|
|
14
|
+
def transform(self, linkml_nested_struct):
|
|
15
|
+
"""Transforms a nested struct column.
|
|
16
|
+
This is a pass-through since nested structs are already in the correct format.
|
|
17
|
+
"""
|
|
18
|
+
return linkml_nested_struct
|
|
19
|
+
|
|
20
|
+
def explode_unnest_dataframe(self, df, column_name):
|
|
21
|
+
"""Unnest for nested struct."""
|
|
22
|
+
return df.lazy().select(column_name).unnest(column_name).collect()
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def prepare_dataframe(cls, data, column_name, nested_cls):
|
|
26
|
+
"""Returns the nested struct column as-is since no transformation needed"""
|
|
27
|
+
return data.lazyframe.collect()
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
|
|
3
|
+
from .model_transform import ModelTransform
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SimpleDictModelTransform(ModelTransform):
|
|
7
|
+
"""This class assists in converting a LinkML 'simple dict' inline column
|
|
8
|
+
into a form that is better for representing in a PolaRS dataframe and
|
|
9
|
+
validating with a Pandera model.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, polars_schema, id_col, other_col):
|
|
13
|
+
self.polars_schema = polars_schema
|
|
14
|
+
"""A polars schema representing a simple dict column"""
|
|
15
|
+
|
|
16
|
+
self.id_col = id_col
|
|
17
|
+
"""The ID column in the sense of a LinkML inline simple dict"""
|
|
18
|
+
|
|
19
|
+
self.other_col = other_col
|
|
20
|
+
"""The 'other' column in the sense of a LinkML inline simple dict"""
|
|
21
|
+
|
|
22
|
+
self.id_col_type = None
|
|
23
|
+
self.other_col_type = None
|
|
24
|
+
self.polars_struct = self._build_polars_struct()
|
|
25
|
+
"""A pl.Struct representing the schema of the other range."""
|
|
26
|
+
|
|
27
|
+
def _build_polars_struct_simple(self):
|
|
28
|
+
"""Handles the two column (id, other) form of the simple dict"""
|
|
29
|
+
self.id_col_type = self.polars_schema.columns[self.id_col].dtype.type
|
|
30
|
+
self.other_col_type = self.polars_schema.columns[self.other_col].dtype.type
|
|
31
|
+
|
|
32
|
+
return pl.Struct({self.id_col: self.id_col_type, self.other_col: self.other_col_type})
|
|
33
|
+
|
|
34
|
+
def _build_polars_struct_complex(self):
|
|
35
|
+
"""Handles the non-two-column simple dict cases."""
|
|
36
|
+
struct_items = {}
|
|
37
|
+
for k, v in self.polars_schema.columns.items():
|
|
38
|
+
if v.dtype.type == pl.Object:
|
|
39
|
+
v.dtype.type = pl.Struct
|
|
40
|
+
else:
|
|
41
|
+
struct_items[k] = v.dtype.type
|
|
42
|
+
return pl.Struct(struct_items)
|
|
43
|
+
|
|
44
|
+
def _build_polars_struct(self):
|
|
45
|
+
if len(self.polars_schema.columns.keys()) == 2:
|
|
46
|
+
return self._build_polars_struct_simple()
|
|
47
|
+
else:
|
|
48
|
+
return self._build_polars_struct_complex()
|
|
49
|
+
|
|
50
|
+
def transform(self, linkml_simple_dict):
|
|
51
|
+
"""Converts a simple dict nested column to a list of dicts.
|
|
52
|
+
{ 'A': 1, 'B': 2, ... } -> [{'id': 'other': 1}, {'id': 'B', 'other': 2}, ...]
|
|
53
|
+
"""
|
|
54
|
+
return self._simple_dict_to_list_of_structs(linkml_simple_dict)
|
|
55
|
+
|
|
56
|
+
def _simple_dict_to_list_of_structs(self, linkml_simple_dict):
|
|
57
|
+
"""Converts a simple dict nested column to a list of dicts.
|
|
58
|
+
{ 'A': 1, 'B': 2, ... } -> [{'id': 'other': 1}, {'id': 'B', 'other': 2}, ...]
|
|
59
|
+
|
|
60
|
+
An inefficient conversion (relative to native PolaRS operations)
|
|
61
|
+
from a simple dict form to a dataframe struct column.
|
|
62
|
+
|
|
63
|
+
e : dict
|
|
64
|
+
e is a single row entry in a dataframe column (one cell), which itself is a dict.
|
|
65
|
+
The value entries of e may also be dicts.
|
|
66
|
+
"""
|
|
67
|
+
arr = []
|
|
68
|
+
for id_value, range_value in linkml_simple_dict.items():
|
|
69
|
+
if isinstance(range_value, dict) and (set(range_value.keys()) <= set(self.polars_schema.columns.keys())):
|
|
70
|
+
range_dict = range_value
|
|
71
|
+
range_dict[self.id_col] = id_value
|
|
72
|
+
for column_key in self.polars_schema.columns.keys():
|
|
73
|
+
if column_key not in range_dict:
|
|
74
|
+
range_dict[column_key] = None
|
|
75
|
+
else:
|
|
76
|
+
range_dict = {self.id_col: id_value, self.other_col: range_value}
|
|
77
|
+
arr.append(range_dict)
|
|
78
|
+
|
|
79
|
+
return arr
|
|
80
|
+
|
|
81
|
+
def list_dtype(self):
|
|
82
|
+
return pl.List(self.polars_struct)
|
|
83
|
+
|
|
84
|
+
def explode_unnest_dataframe(self, df, column_name):
|
|
85
|
+
"""Explode and unnest for simple dict."""
|
|
86
|
+
return df.lazy().explode(column_name).unnest(column_name).collect()
|
linkml/generators/plantumlgen.py
CHANGED
|
@@ -37,6 +37,7 @@ class PlantumlGenerator(Generator):
|
|
|
37
37
|
generatorversion = "0.1.1"
|
|
38
38
|
valid_formats = ["puml", "plantuml", "png", "pdf", "jpg", "json", "svg"]
|
|
39
39
|
visit_all_class_slots = False
|
|
40
|
+
preserve_names: bool = False
|
|
40
41
|
|
|
41
42
|
referenced: Optional[set[ClassDefinitionName]] = None # List of classes that have to be emitted
|
|
42
43
|
generated: Optional[set[ClassDefinitionName]] = None # List of classes that have been emitted
|
|
@@ -99,10 +100,9 @@ class PlantumlGenerator(Generator):
|
|
|
99
100
|
return plantuml_url
|
|
100
101
|
if directory:
|
|
101
102
|
file_suffix = ".svg" if self.format == "puml" or self.format == "puml" else "." + self.format
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
)
|
|
103
|
+
schema_name = sorted(classes)[0] if classes else self.schema.name
|
|
104
|
+
filename = schema_name if self.preserve_names else camelcase(schema_name)
|
|
105
|
+
self.output_file_name = os.path.join(directory, filename + file_suffix)
|
|
106
106
|
resp = requests.get(plantuml_url, stream=True, timeout=REQUESTS_TIMEOUT)
|
|
107
107
|
if resp.ok:
|
|
108
108
|
with open(self.output_file_name, "wb") as f:
|
|
@@ -133,14 +133,14 @@ class PlantumlGenerator(Generator):
|
|
|
133
133
|
for slot in self.filtered_cls_slots(cn, all_slots=True, filtr=lambda s: s.range not in self.schema.classes):
|
|
134
134
|
if True or cn in slot.domain_of:
|
|
135
135
|
mod = self.prop_modifier(cls, slot)
|
|
136
|
+
slot_name = (
|
|
137
|
+
self.aliased_slot_name(slot)
|
|
138
|
+
if self.preserve_names
|
|
139
|
+
else underscore(self.aliased_slot_name(slot))
|
|
140
|
+
)
|
|
141
|
+
range_name = slot.range if self.preserve_names else underscore(slot.range)
|
|
136
142
|
slot_defs.append(
|
|
137
|
-
" {field} "
|
|
138
|
-
+ underscore(self.aliased_slot_name(slot))
|
|
139
|
-
+ mod
|
|
140
|
-
+ " : "
|
|
141
|
-
+ underscore(slot.range)
|
|
142
|
-
+ " "
|
|
143
|
-
+ self.cardinality(slot)
|
|
143
|
+
" {field} " + slot_name + mod + " : " + range_name + " " + self.cardinality(slot)
|
|
144
144
|
)
|
|
145
145
|
self.class_generated.add(cn)
|
|
146
146
|
self.referenced.add(cn)
|
|
@@ -359,6 +359,12 @@ class PlantumlGenerator(Generator):
|
|
|
359
359
|
show_default=True,
|
|
360
360
|
help="Print out Kroki URL calls instead of sending the real requests",
|
|
361
361
|
)
|
|
362
|
+
@click.option(
|
|
363
|
+
"--preserve-names/--normalize-names",
|
|
364
|
+
default=False,
|
|
365
|
+
show_default=True,
|
|
366
|
+
help="Preserve original LinkML names in PlantUML diagram output (e.g., for class names, slot names, file names).",
|
|
367
|
+
)
|
|
362
368
|
@click.version_option(__version__, "-V", "--version")
|
|
363
369
|
def cli(yamlfile, **args):
|
|
364
370
|
"""Generate a UML representation of a LinkML model"""
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import inspect
|
|
2
|
+
import keyword
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
4
5
|
import re
|
|
@@ -94,7 +95,10 @@ DEFAULT_IMPORTS = (
|
|
|
94
95
|
ObjectImport(name="ConfigDict"),
|
|
95
96
|
ObjectImport(name="Field"),
|
|
96
97
|
ObjectImport(name="RootModel"),
|
|
98
|
+
ObjectImport(name="SerializationInfo"),
|
|
99
|
+
ObjectImport(name="SerializerFunctionWrapHandler"),
|
|
97
100
|
ObjectImport(name="field_validator"),
|
|
101
|
+
ObjectImport(name="model_serializer"),
|
|
98
102
|
],
|
|
99
103
|
)
|
|
100
104
|
)
|
|
@@ -143,6 +147,41 @@ DefinitionType = TypeVar("DefinitionType", bound=Union[SchemaDefinition, ClassDe
|
|
|
143
147
|
TemplateType = TypeVar("TemplateType", bound=Union[PydanticModule, PydanticClass, PydanticAttribute])
|
|
144
148
|
|
|
145
149
|
|
|
150
|
+
def make_valid_python_identifier(name: str) -> str:
|
|
151
|
+
"""
|
|
152
|
+
Convert a string to a valid Python identifier.
|
|
153
|
+
|
|
154
|
+
This is used when slot names contain characters that are not valid in Python
|
|
155
|
+
identifiers (e.g., '@id', '@type'). The original name can be preserved using
|
|
156
|
+
Pydantic field aliases.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
name: The original name that may contain invalid characters
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
A valid Python identifier that doesn't start with underscore (Pydantic restriction)
|
|
163
|
+
"""
|
|
164
|
+
# Replace invalid characters with underscores
|
|
165
|
+
identifier = re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
|
166
|
+
|
|
167
|
+
# Remove leading underscores (Pydantic doesn't allow field names starting with _)
|
|
168
|
+
identifier = identifier.lstrip("_")
|
|
169
|
+
|
|
170
|
+
# Ensure it doesn't start with a number
|
|
171
|
+
if identifier and identifier[0].isdigit():
|
|
172
|
+
identifier = f"field_{identifier}"
|
|
173
|
+
|
|
174
|
+
# Ensure it's not a keyword
|
|
175
|
+
if keyword.iskeyword(identifier):
|
|
176
|
+
identifier = f"{identifier}_"
|
|
177
|
+
|
|
178
|
+
# Ensure it's not empty
|
|
179
|
+
if not identifier:
|
|
180
|
+
identifier = "field"
|
|
181
|
+
|
|
182
|
+
return identifier
|
|
183
|
+
|
|
184
|
+
|
|
146
185
|
@dataclass
|
|
147
186
|
class PydanticGenerator(OOCodeGenerator, LifecycleMixin):
|
|
148
187
|
"""
|
|
@@ -461,7 +500,19 @@ class PydanticGenerator(OOCodeGenerator, LifecycleMixin):
|
|
|
461
500
|
if getattr(slot, k, None) is not None
|
|
462
501
|
}
|
|
463
502
|
slot_alias = slot.alias if slot.alias else slot.name
|
|
464
|
-
|
|
503
|
+
|
|
504
|
+
# Create a valid Python identifier for the field name
|
|
505
|
+
python_field_name = make_valid_python_identifier(underscore(slot_alias))
|
|
506
|
+
slot_args["name"] = python_field_name
|
|
507
|
+
|
|
508
|
+
# If the original name is different from the Python identifier, set an alias
|
|
509
|
+
if slot_alias != python_field_name:
|
|
510
|
+
slot_args["alias"] = slot_alias
|
|
511
|
+
else:
|
|
512
|
+
# Remove any existing alias if the names are the same
|
|
513
|
+
if "alias" in slot_args:
|
|
514
|
+
del slot_args["alias"]
|
|
515
|
+
|
|
465
516
|
slot_args["description"] = slot.description.replace('"', '\\"') if slot.description is not None else None
|
|
466
517
|
predef = self.predefined_slot_values.get(camelcase(cls.name), {}).get(slot.name, None)
|
|
467
518
|
if predef is not None:
|
|
@@ -1241,7 +1292,7 @@ def cli(
|
|
|
1241
1292
|
metadata_mode=meta,
|
|
1242
1293
|
**args,
|
|
1243
1294
|
)
|
|
1244
|
-
print(gen.serialize())
|
|
1295
|
+
print(gen.serialize(), end="")
|
|
1245
1296
|
|
|
1246
1297
|
|
|
1247
1298
|
if __name__ == "__main__":
|