linkml 1.9.4rc2__py3-none-any.whl → 1.9.5rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. linkml/cli/main.py +4 -0
  2. linkml/generators/__init__.py +2 -0
  3. linkml/generators/common/build.py +5 -20
  4. linkml/generators/common/template.py +289 -3
  5. linkml/generators/docgen.py +55 -10
  6. linkml/generators/erdiagramgen.py +9 -5
  7. linkml/generators/graphqlgen.py +32 -6
  8. linkml/generators/jsonldcontextgen.py +78 -12
  9. linkml/generators/jsonschemagen.py +29 -12
  10. linkml/generators/mermaidclassdiagramgen.py +21 -3
  11. linkml/generators/owlgen.py +4 -1
  12. linkml/generators/panderagen/dataframe_class.py +13 -0
  13. linkml/generators/panderagen/dataframe_field.py +50 -0
  14. linkml/generators/panderagen/linkml_pandera_validator.py +186 -0
  15. linkml/generators/panderagen/panderagen.py +22 -5
  16. linkml/generators/panderagen/panderagen_class_based/class.jinja2 +70 -13
  17. linkml/generators/panderagen/panderagen_class_based/custom_checks.jinja2 +27 -0
  18. linkml/generators/panderagen/panderagen_class_based/enums.jinja2 +3 -3
  19. linkml/generators/panderagen/panderagen_class_based/pandera.jinja2 +12 -2
  20. linkml/generators/panderagen/panderagen_class_based/slots.jinja2 +19 -17
  21. linkml/generators/panderagen/slot_generator_mixin.py +143 -16
  22. linkml/generators/panderagen/transforms/__init__.py +19 -0
  23. linkml/generators/panderagen/transforms/collection_dict_model_transform.py +62 -0
  24. linkml/generators/panderagen/transforms/list_dict_model_transform.py +66 -0
  25. linkml/generators/panderagen/transforms/model_transform.py +8 -0
  26. linkml/generators/panderagen/transforms/nested_struct_model_transform.py +27 -0
  27. linkml/generators/panderagen/transforms/simple_dict_model_transform.py +86 -0
  28. linkml/generators/plantumlgen.py +17 -11
  29. linkml/generators/pydanticgen/pydanticgen.py +53 -2
  30. linkml/generators/pydanticgen/template.py +45 -233
  31. linkml/generators/pydanticgen/templates/attribute.py.jinja +1 -0
  32. linkml/generators/pydanticgen/templates/base_model.py.jinja +16 -2
  33. linkml/generators/pydanticgen/templates/imports.py.jinja +1 -1
  34. linkml/generators/rdfgen.py +11 -2
  35. linkml/generators/rustgen/__init__.py +3 -0
  36. linkml/generators/rustgen/build.py +94 -0
  37. linkml/generators/rustgen/cli.py +65 -0
  38. linkml/generators/rustgen/rustgen.py +1038 -0
  39. linkml/generators/rustgen/template.py +865 -0
  40. linkml/generators/rustgen/templates/Cargo.toml.jinja +42 -0
  41. linkml/generators/rustgen/templates/anything.rs.jinja +142 -0
  42. linkml/generators/rustgen/templates/as_key_value.rs.jinja +56 -0
  43. linkml/generators/rustgen/templates/class_module.rs.jinja +8 -0
  44. linkml/generators/rustgen/templates/enum.rs.jinja +54 -0
  45. linkml/generators/rustgen/templates/file.rs.jinja +62 -0
  46. linkml/generators/rustgen/templates/import.rs.jinja +4 -0
  47. linkml/generators/rustgen/templates/imports.rs.jinja +8 -0
  48. linkml/generators/rustgen/templates/poly.rs.jinja +9 -0
  49. linkml/generators/rustgen/templates/poly_containers.rs.jinja +439 -0
  50. linkml/generators/rustgen/templates/poly_trait.rs.jinja +15 -0
  51. linkml/generators/rustgen/templates/poly_trait_impl.rs.jinja +5 -0
  52. linkml/generators/rustgen/templates/poly_trait_impl_orsubtype.rs.jinja +5 -0
  53. linkml/generators/rustgen/templates/poly_trait_property.rs.jinja +8 -0
  54. linkml/generators/rustgen/templates/poly_trait_property_impl.rs.jinja +132 -0
  55. linkml/generators/rustgen/templates/poly_trait_property_match.rs.jinja +10 -0
  56. linkml/generators/rustgen/templates/property.rs.jinja +19 -0
  57. linkml/generators/rustgen/templates/pyproject.toml.jinja +10 -0
  58. linkml/generators/rustgen/templates/serde_utils.rs.jinja +310 -0
  59. linkml/generators/rustgen/templates/slot_range_as_union.rs.jinja +61 -0
  60. linkml/generators/rustgen/templates/struct.rs.jinja +75 -0
  61. linkml/generators/rustgen/templates/struct_or_subtype_enum.rs.jinja +108 -0
  62. linkml/generators/rustgen/templates/typealias.rs.jinja +13 -0
  63. linkml/generators/sqltablegen.py +18 -16
  64. linkml/generators/yarrrmlgen.py +157 -0
  65. linkml/linter/config/datamodel/config.py +160 -293
  66. linkml/linter/config/datamodel/config.yaml +34 -26
  67. linkml/linter/config/default.yaml +4 -0
  68. linkml/linter/config/recommended.yaml +4 -0
  69. linkml/linter/linter.py +1 -2
  70. linkml/linter/rules.py +37 -0
  71. linkml/utils/schemaloader.py +55 -3
  72. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/METADATA +1 -1
  73. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/RECORD +76 -38
  74. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/entry_points.txt +1 -0
  75. linkml/generators/panderagen/panderagen_class_based/mixins.jinja2 +0 -26
  76. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/WHEEL +0 -0
  77. {linkml-1.9.4rc2.dist-info → linkml-1.9.5rc1.dist-info}/licenses/LICENSE +0 -0
@@ -14,9 +14,10 @@ from linkml_runtime.utils.formatutils import camelcase
14
14
  from linkml_runtime.utils.schemaview import SchemaView
15
15
 
16
16
  from linkml._version import __version__
17
- from linkml.generators.oocodegen import OOClass, OOCodeGenerator, OODocument
17
+ from linkml.generators.oocodegen import OOCodeGenerator, OODocument
18
18
 
19
19
  from .class_generator_mixin import ClassGeneratorMixin
20
+ from .dataframe_class import DataframeClass
20
21
  from .enum_generator_mixin import EnumGeneratorMixin
21
22
  from .slot_generator_mixin import SlotGeneratorMixin
22
23
 
@@ -28,6 +29,7 @@ TYPEMAP = {
28
29
  "panderagen_class_based": {
29
30
  "xsd:string": "str",
30
31
  "xsd:integer": "int",
32
+ "xsd:int": "int",
31
33
  "xsd:float": "float",
32
34
  "xsd:double": "float",
33
35
  "xsd:boolean": "bool",
@@ -83,6 +85,8 @@ class PanderaGenerator(OOCodeGenerator, EnumGeneratorMixin, ClassGeneratorMixin,
83
85
 
84
86
  @staticmethod
85
87
  def make_multivalued(range: str) -> str:
88
+ if range == "Struct":
89
+ return "pl.List"
86
90
  return f"List[{range}]"
87
91
 
88
92
  def uri_type_map(self, xsd_uri: str, template: str = None):
@@ -92,15 +96,22 @@ class PanderaGenerator(OOCodeGenerator, EnumGeneratorMixin, ClassGeneratorMixin,
92
96
  return TYPEMAP[template].get(xsd_uri)
93
97
 
94
98
  def map_type(self, t: TypeDefinition) -> str:
99
+ logger.info(f"type_map definition: {t}")
100
+
101
+ typ = None
102
+
95
103
  if t.uri:
96
104
  typ = self.uri_type_map(t.uri)
97
- return typ
105
+ if typ is None:
106
+ typ = self.map_type(self.schemaview.get_type(t.typeof))
98
107
  elif t.typeof:
99
108
  typ = self.map_type(self.schemaview.get_type(t.typeof))
100
- return typ
101
- else:
109
+
110
+ if typ is None:
102
111
  raise ValueError(f"{t} cannot be mapped to a type")
103
112
 
113
+ return typ
114
+
104
115
  def load_template(self, template_filename):
105
116
  jinja_env = Environment(loader=PackageLoader("linkml.generators.panderagen", self.template_path))
106
117
  return jinja_env.get_template(template_filename)
@@ -138,6 +149,7 @@ class PanderaGenerator(OOCodeGenerator, EnumGeneratorMixin, ClassGeneratorMixin,
138
149
  coerce=self.coerce,
139
150
  type_map=TYPEMAP,
140
151
  template_path=self.template_path,
152
+ pandera_validator_code=None,
141
153
  )
142
154
  return code
143
155
 
@@ -156,12 +168,17 @@ class PanderaGenerator(OOCodeGenerator, EnumGeneratorMixin, ClassGeneratorMixin,
156
168
  for c in self.ordered_classes():
157
169
  cn = c.name
158
170
  safe_cn = camelcase(cn)
159
- ooclass = OOClass(
171
+ annotations = {}
172
+ identifier_or_key_slot = self.get_identifier_or_key_slot(cn)
173
+ if identifier_or_key_slot:
174
+ annotations["identifier_key_slot"] = identifier_or_key_slot.name
175
+ ooclass = DataframeClass(
160
176
  name=safe_cn,
161
177
  description=c.description,
162
178
  package=self.package,
163
179
  fields=[],
164
180
  source_class=c,
181
+ annotations=annotations,
165
182
  )
166
183
  classes.append(ooclass)
167
184
  if c.mixin:
@@ -3,24 +3,81 @@
3
3
  Details at https://pandera.readthedocs.io/en/stable/dataframe_models.html
4
4
  -#}
5
5
  {%- import 'slots.jinja2' as slot_macros -%}
6
+ {%- import 'custom_checks.jinja2' as custom_checks -%}
6
7
 
7
- {%- macro render_class(cls) %}
8
- class {{cls.name}}(
9
- {%- if cls.is_a -%}
10
- {{ cls.is_a }}
11
- {%- else -%}
12
- pla.DataFrameModel, _LinkmlPanderaValidator
13
- {%- endif -%}
14
- ):
15
- {%- if cls.source_class.description %}
16
- """
17
- {{ cls.source_class.description }}
18
- """
19
- {% endif -%}
8
+ {%- macro render_parent_classes(cls) -%}
9
+ {%- if cls.is_a -%}
10
+ {{ cls.is_a }}
11
+ {%- else -%}
12
+ pla.DataFrameModel, _LinkmlPanderaValidator
13
+ {%- endif -%}
14
+ {%- endmacro -%}
15
+
16
+ {%- macro render_nested_ranges(cls) -%}
17
+ {#-
18
+ Helper class member that references another Pandera class that
19
+ handles a nested association.
20
+ -#}
21
+ _NESTED_RANGES = {
22
+ {%- for field in cls.fields -%}
23
+ {%- if field.reference_class() %}
24
+ "{{ field.name }}": "{{ field.reference_class() }}",
25
+ {% endif -%}
26
+ {%- endfor %}
27
+ }
28
+ {%- endmacro -%}
29
+
30
+ {%- macro render_inline_form(cls) -%}
31
+ {#-
32
+ This is a meta object with information used by the various check methods
33
+ -#}
34
+ _INLINE_FORM = {
35
+ {%- for field in cls.fields -%}
36
+ {%- if field.inline_form() %}
37
+ "{{ field.name }}": "{{ field.inline_form() }}",
38
+ {% endif -%}
39
+ {%- endfor %}
40
+ }
41
+ {%- endmacro -%}
42
+
43
+ {%- macro render_inline_details(cls) -%}
44
+ {#-
45
+ This is a meta object with information used by the various check methods
46
+ -#}
47
+ _INLINE_DETAILS = {
48
+ {%- for field in cls.fields -%}
49
+ {%- if field.inline_details() %}
50
+ "{{ field.name }}": {{ field.inline_details() }},
51
+ {% endif -%}
52
+ {%- endfor %}
53
+ }
54
+ {%- endmacro -%}
55
+
56
+ {%- macro render_slots(cls) -%}
57
+ {{ slot_macros.id_slot_name_class_variable(cls) }}
20
58
  {%- if (cls.fields | length) == 0 %}
21
59
  pass
22
60
  {% endif -%}
23
61
  {%- for field in cls.fields -%}
24
62
  {{ slot_macros.render_slot(field) }}
25
63
  {%- endfor -%}
64
+ {%- endmacro -%}
65
+
66
+ {%- macro render_class(cls) %}
67
+ {#-
68
+ Generates the main structure of a Pandera class.
69
+ This includes the slots, custom checks,
70
+ and helper class members with information used by the custom checks.
71
+ -#}
72
+ class {{cls.name}}({{ render_parent_classes(cls) }}):
73
+ {%- if cls.source_class.description %}
74
+ """
75
+ {{ cls.source_class.description }}
76
+ """
77
+ {% endif -%}
78
+ {{ render_slots(cls) }}
79
+ {{ custom_checks.render_custom_checks(cls) }}
80
+ {{ render_nested_ranges(cls) }}
81
+ {{ render_inline_form(cls) }}
82
+ {{ render_inline_details(cls) }}
26
83
  {% endmacro -%}
@@ -0,0 +1,27 @@
1
+
2
+ {%- macro nested_checks(field) -%}
3
+ {#-
4
+ field: DataframeField
5
+ Render a class method that follows nested structures
6
+ other than the simple_dict form.
7
+ -#}
8
+ {%- if field.inline_form() %}
9
+ @pla.check("{{ field.name }}")
10
+ def check_nested_struct_{{ field.name }}(cls, data: PolarsData):
11
+ {% if field.inline_form() == 'inline_list_dict' -%}
12
+ return cls._check_nested_struct(data)
13
+ {% elif field.inline_form() == 'simple_dict' -%}
14
+ return cls._check_simple_dict(data)
15
+ {% elif field.inline_form() == 'inline_collection_dict' -%}
16
+ return cls._check_collection_struct(data)
17
+ {% else -%}
18
+ return cls._check_nested_list_struct(data)
19
+ {% endif -%}
20
+ {%- endif -%}
21
+ {%- endmacro -%}
22
+
23
+ {%- macro render_custom_checks(cls) -%}
24
+ {%- for field in cls.fields -%}
25
+ {{ nested_checks(field) }}
26
+ {%- endfor %}
27
+ {%- endmacro -%}
@@ -1,7 +1,7 @@
1
- {%- macro enum_parameter(slot) -%}
2
- {%- if slot.annotations['permissible_values']|length > 0 -%}
1
+ {%- macro enum_parameter(field) -%}
2
+ {%- if field.permissible_values()|length > 0 -%}
3
3
  dtype_kwargs={"categories":(
4
- {%- for pv in slot.annotations['permissible_values'] -%}
4
+ {%- for pv in field.permissible_values() -%}
5
5
  '{{ pv }}',
6
6
  {%- endfor -%}
7
7
  )}
@@ -4,10 +4,20 @@
4
4
  -#}
5
5
  {%- import 'header.jinja2' as header -%}
6
6
  {%- import 'class.jinja2' as class_macros -%}
7
- {%- import 'mixins.jinja2' as mixins -%}
8
7
  {{ header }}
9
8
 
10
- {{ mixins }}
9
+ {% if pandera_validator_code %}
10
+ {{ pandera_validator_code }}
11
+ {% else %}
12
+ from linkml.generators.panderagen.linkml_pandera_validator import LinkmlPanderaValidator as _LinkmlPanderaValidator
13
+ {% endif %}
14
+
15
+ # These are all str for now
16
+ ID_TYPES = {
17
+ {%- for cls in doc.classes %}
18
+ "{{ cls.name }}": "str",
19
+ {%- endfor %}
20
+ }
11
21
 
12
22
  {% if metamodel_version %}# metamodel_version: {{metamodel_version}}{% endif %}
13
23
  {% if model_version %}# version: {{model_version}}{% endif %}
@@ -4,33 +4,35 @@
4
4
  -#}
5
5
  {%- import 'enums.jinja2' as enum_macros -%}
6
6
 
7
- {%- macro constraint_parameters(field, slot) -%}
7
+ {%- macro constraint_parameters(field) -%}
8
8
  {%- if coerce is true -%}coerce=True, {% endif -%}
9
9
  {%- if field.default_value is not none -%}default={{ field.default_value }}, {% endif -%}
10
- {%- if slot.minimum_value is not none -%}ge={{ slot.minimum_value }}, {% endif -%}
11
- {%- if slot.maximum_value is not none -%}le={{ slot.maximum_value }}, {% endif -%}
12
- {%- if slot.pattern is not none -%}str_matches=r"{{ slot.pattern }}", {% endif -%}
13
- {%- if (slot.required is none or slot.required is false) and slot.identifier is not true -%}nullable=True, {% endif -%}
10
+ {%- if field.minimum_value() is not none -%}ge={{ field.minimum_value() }}, {% endif -%}
11
+ {%- if field.maximum_value() is not none -%}le={{ field.maximum_value() }}, {% endif -%}
12
+ {%- if field.pattern() is not none -%}str_matches=r"{{ field.pattern() }}", {% endif -%}
13
+ {%- if (field.required() is none or field.required() is false) and field.identifier() is not true -%}nullable=True, {% endif -%}
14
14
  {%- endmacro -%}
15
15
 
16
- {%- macro render_slot(field) -%}
17
- {%- with slot = field.source_slot %}
16
+ {%- macro id_slot_name_class_variable(cls) %}
17
+ _id_name : str = {% if cls.identifier_key_slot() %} '{{ cls.identifier_key_slot() }}' {% else %}None{% endif %}
18
+ {%- endmacro -%}
19
+
20
+ {%- macro render_slot(field) %}
18
21
  {{ field.name }}:{{ ' ' }}
19
- {%- if (slot.required is none or slot.required is false) and slot.identifier is not true -%}
22
+ {%- if (field.required() is none or field.required() is false) and (field.identifier() is not true) -%}
20
23
  Optional[
21
24
  {%- endif -%}
22
25
  {{field.range}}
23
- {%- if (slot.required is none or slot.required is false) and slot.identifier is not true -%}
24
- {{ '] ' }}
26
+ {%- if (field.required() is none or field.required() is false) and (field.identifier() is not true) -%}
27
+ {{ ']' }}
25
28
  {%- endif -%}
26
- = pla.Field(
27
- {{- constraint_parameters(field, slot) -}}
28
- {{- enum_macros.enum_parameter(slot) -}}
29
+ {{ ' =' }} pla.Field(
30
+ {{- constraint_parameters(field) -}}
31
+ {{- enum_macros.enum_parameter(field) -}}
29
32
  )
30
- {%- if slot.description %}
33
+ {%- if field.description() %}
31
34
  """
32
- {{ slot.description }}
35
+ {{ field.description() }}
33
36
  """
34
- {% endif -%}
35
- {%- endwith -%}
37
+ {% endif -%}
36
38
  {%- endmacro %}
@@ -1,31 +1,156 @@
1
1
  import logging
2
+ from typing import Optional
2
3
 
3
- from linkml.generators.oocodegen import OOField
4
+ from linkml_runtime.linkml_model.meta import ClassDefinitionName, SlotDefinition
5
+
6
+ from linkml.utils.helpers import get_range_associated_slots
7
+
8
+ from .dataframe_field import DataframeField
4
9
 
5
10
  logger = logging.getLogger(__file__)
6
11
 
7
12
 
8
13
  class SlotGeneratorMixin:
14
+ """
15
+ Prior to rendering the dataframe schema, this class provides
16
+ and adapter between the LinkML model and schema view
17
+ and the rendering engine.
18
+ """
19
+
9
20
  LINKML_ANY_CURIE = "linkml:Any"
21
+
22
+ # constants used to render the schema
23
+ # these will be moved to a dialect-specific place
10
24
  ANY_RANGE_STRING = "Object"
11
25
  CLASS_RANGE_STRING = "Struct"
26
+ SIMPLE_DICT_RANGE_STRING = "Struct"
12
27
  ENUM_RANGE_STRING = "Enum"
13
- DEFAULT_RANGE_STRING = "str"
14
28
 
15
- # to be implemented by the class
16
- def make_multivalued(self, range: str):
17
- raise NotImplementedError("please implement make multivalued in the class")
29
+ # association form flags used for rendering decisions
30
+ FORM_INLINED_DICT = "inlined_dict"
31
+ FORM_INLINED_LIST_DICT = "inlined_list_dict"
32
+ FORM_INLINED_COLLECTION_DICT = "inline_collection_dict"
33
+ FORM_INLINED_SIMPLE_DICT = "simple_dict"
34
+ FORM_MULTIVALUED_FOREIGN_KEY = "list_foreign_key"
35
+ FORM_FOREIGN_KEY = "foreign_key"
36
+ FORM_ERROR = "error"
37
+
38
+ # When nested inlining is done, the Pandera validator needs a specific range
39
+ INLINED_FORM_RANGE_PANDERA = {
40
+ FORM_INLINED_SIMPLE_DICT: SIMPLE_DICT_RANGE_STRING,
41
+ FORM_INLINED_LIST_DICT: CLASS_RANGE_STRING,
42
+ FORM_INLINED_COLLECTION_DICT: CLASS_RANGE_STRING,
43
+ FORM_INLINED_DICT: CLASS_RANGE_STRING,
44
+ FORM_ERROR: None,
45
+ }
46
+
47
+ def is_multivalued(self, slot):
48
+ return "multivalued" in slot and slot.multivalued is True
49
+
50
+ _INTERNAL_INLINED_FORM = {
51
+ # INLINED, INLINED_AS_LIST, MULTIVALUED,
52
+ (False, False, False): FORM_FOREIGN_KEY,
53
+ (False, False, True): FORM_MULTIVALUED_FOREIGN_KEY,
54
+ (False, True, False): FORM_INLINED_LIST_DICT,
55
+ (False, True, True): FORM_INLINED_LIST_DICT,
56
+ (True, False, False): FORM_INLINED_DICT,
57
+ (True, False, True): FORM_INLINED_COLLECTION_DICT,
58
+ (True, None, True): FORM_INLINED_DICT,
59
+ (True, True, False): FORM_INLINED_DICT,
60
+ (True, True, True): FORM_INLINED_LIST_DICT,
61
+ }
62
+
63
+ def get_identifier_or_key_slot(self, cn: ClassDefinitionName) -> Optional[SlotDefinition]:
64
+ sv = self.schemaview
65
+ id_slot = sv.get_identifier_slot(cn)
66
+ if id_slot:
67
+ return id_slot
68
+ else:
69
+ for s in sv.class_induced_slots(cn):
70
+ if s.key:
71
+ return s
72
+ return None
73
+
74
+ def calculate_inlined_form(self, slot: SlotDefinition) -> str:
75
+ is_multivalued = self.is_multivalued(slot)
76
+ internal_inlined_form_key = ((slot.inlined is True), (slot.inlined_as_list is True), is_multivalued)
77
+ logger.info(f"Inlined form key: {internal_inlined_form_key}")
78
+ internal_inlined_form = self._INTERNAL_INLINED_FORM.get(
79
+ internal_inlined_form_key, SlotGeneratorMixin.FORM_ERROR
80
+ )
81
+
82
+ if internal_inlined_form == SlotGeneratorMixin.FORM_INLINED_COLLECTION_DICT:
83
+ if self.get_identifier_or_key_slot(slot.range) is None:
84
+ internal_inlined_form = SlotGeneratorMixin.FORM_INLINED_LIST_DICT
85
+
86
+ if self.calculate_simple_dict(slot) is not None:
87
+ return SlotGeneratorMixin.FORM_INLINED_SIMPLE_DICT
88
+
89
+ return internal_inlined_form
90
+
91
+ def calculate_simple_dict(self, slot: SlotDefinition):
92
+ """slot is the container for the simple dict slot"""
93
+
94
+ (_, range_simple_dict_value_slot, _) = get_range_associated_slots(self.schemaview, slot.range)
95
+
96
+ return range_simple_dict_value_slot
18
97
 
19
- def handle_none_slot(self, slot, range: str) -> str:
98
+ def handle_none_slot(self, slot) -> str:
20
99
  range = self.schema.default_range # need to figure this out, set at the beginning?
21
100
  if range is None:
22
- range = SlotGeneratorMixin.DEFAULT_RANGE_STRING
101
+ range = "str"
23
102
 
24
103
  return range
25
104
 
26
105
  def handle_class_slot(self, slot, range: str) -> str:
27
- logger.warning(f"PanderaGen does not support class range slots. Using Struct {slot.name}")
28
- return SlotGeneratorMixin.CLASS_RANGE_STRING
106
+ range_info = self.schemaview.all_classes().get(range)
107
+
108
+ if range_info["class_uri"] == SlotGeneratorMixin.LINKML_ANY_CURIE:
109
+ range = SlotGeneratorMixin.ANY_RANGE_STRING
110
+ else:
111
+ inlined_form = self.calculate_inlined_form(slot)
112
+
113
+ if inlined_form == SlotGeneratorMixin.FORM_INLINED_COLLECTION_DICT:
114
+ logger.warning(
115
+ f"Slot {slot.name} uses inlined dictionary form,"
116
+ "which may be less efficient than inlined as list form with the current implementation."
117
+ )
118
+ elif inlined_form == SlotGeneratorMixin.FORM_INLINED_SIMPLE_DICT:
119
+ logger.warning(
120
+ f"Slot {slot.name} uses inlined simple dictionary form. Support is incomplete "
121
+ "and performance is less efficient than inlined as list form with the current implementation."
122
+ )
123
+
124
+ if inlined_form in (SlotGeneratorMixin.FORM_MULTIVALUED_FOREIGN_KEY, SlotGeneratorMixin.FORM_FOREIGN_KEY):
125
+ logger.warning(f"Foreign key not implemented for slot {slot.name}")
126
+ range = f"ID_TYPES['{self.get_class_name(range)}']"
127
+ else:
128
+ # TODO: make these setters
129
+ slot.annotations["reference_class"] = self.get_class_name(range)
130
+ slot.annotations["inline_form"] = inlined_form
131
+
132
+ range = SlotGeneratorMixin.INLINED_FORM_RANGE_PANDERA[inlined_form]
133
+
134
+ if inlined_form == SlotGeneratorMixin.FORM_INLINED_SIMPLE_DICT:
135
+ self.set_simple_dict_inline_details_annotation(slot)
136
+ elif inlined_form in [SlotGeneratorMixin.FORM_INLINED_LIST_DICT]:
137
+ range = self.make_multivalued(range)
138
+
139
+ return range
140
+
141
+ def set_simple_dict_inline_details_annotation(self, slot):
142
+ """Extra metadata is to help with the simple dict case"""
143
+ (range_id_slot, range_simple_dict_value_slot, _) = get_range_associated_slots( # range_required_slots,
144
+ self.schemaview, slot.range
145
+ )
146
+
147
+ simple_dict_id = range_id_slot.name
148
+ other_slot = range_simple_dict_value_slot.name
149
+ slot.annotations["inline_details"] = {"id": simple_dict_id, "other": other_slot}
150
+
151
+ def handle_non_inlined_class_slot(self, slot, range: str) -> str:
152
+ """non-inlined class slots have been temporarily removed but this will be needed to support them"""
153
+ return f"ID_TYPES['{self.get_class_name(range)}']"
29
154
 
30
155
  def handle_type_slot(self, slot, range: str) -> str:
31
156
  del slot # unused for now
@@ -43,9 +168,10 @@ class SlotGeneratorMixin:
43
168
  return range
44
169
 
45
170
  def handle_multivalued_slot(self, slot, range: str) -> str:
46
- if slot.multivalued:
47
- if slot.inlined_as_list and range != SlotGeneratorMixin.CLASS_RANGE_STRING:
48
- range = self.make_multivalued(range)
171
+ if (slot.inlined_as_list is True and self.is_multivalued(slot)) or (
172
+ slot.inlined is True and slot.inlined_as_list is True and self.is_multivalued(slot)
173
+ ):
174
+ range = self.make_multivalued(range)
49
175
 
50
176
  return range
51
177
 
@@ -58,19 +184,20 @@ class SlotGeneratorMixin:
58
184
  safe_sn = self.get_slot_name(slot.alias)
59
185
 
60
186
  if range is None:
61
- range = self.handle_none_slot(slot, range)
187
+ range = self.handle_none_slot(slot)
62
188
  elif range in self.schemaview.all_classes():
63
189
  range = self.handle_class_slot(slot, range)
64
190
  elif range in self.schemaview.all_types():
65
191
  range = self.handle_type_slot(slot, range)
192
+ if self.is_multivalued(slot):
193
+ range = self.make_multivalued(range)
66
194
  elif range in self.schemaview.all_enums():
67
195
  range = self.handle_enum_slot(slot, range)
196
+ range = self.handle_multivalued_slot(slot, range)
68
197
  else:
69
198
  raise Exception(f"Unknown range {range}")
70
199
 
71
- range = self.handle_multivalued_slot(slot, range)
72
-
73
- return OOField(
200
+ return DataframeField(
74
201
  name=safe_sn,
75
202
  source_slot=slot,
76
203
  range=range,
@@ -0,0 +1,19 @@
1
+ """Transform classes for LinkML Pandera validation.
2
+
3
+ This module provides transform classes that convert LinkML inline formats
4
+ into forms suitable for Polars DataFrame validation with Pandera models.
5
+ """
6
+
7
+ from .collection_dict_model_transform import CollectionDictModelTransform
8
+ from .list_dict_model_transform import ListDictModelTransform
9
+ from .model_transform import ModelTransform
10
+ from .nested_struct_model_transform import NestedStructModelTransform
11
+ from .simple_dict_model_transform import SimpleDictModelTransform
12
+
13
+ __all__ = [
14
+ "ModelTransform",
15
+ "SimpleDictModelTransform",
16
+ "CollectionDictModelTransform",
17
+ "ListDictModelTransform",
18
+ "NestedStructModelTransform",
19
+ ]
@@ -0,0 +1,62 @@
1
+ import polars as pl
2
+
3
+ from .model_transform import ModelTransform
4
+
5
+
6
+ class CollectionDictModelTransform(ModelTransform):
7
+ """This class assists in converting a LinkML 'collection dict' inline column
8
+ into a form that is better for representing in a PolaRS dataframe and
9
+ validating with a Pandera model.
10
+ """
11
+
12
+ def __init__(self, polars_schema, id_col):
13
+ self.polars_schema = polars_schema
14
+ """A polars schema representing a collection dict column"""
15
+
16
+ self.id_col = id_col
17
+ """The ID column in the sense of a LinkML inline collection dict"""
18
+
19
+ def transform(self, linkml_collection_dict):
20
+ """Converts a collection dict nested column to a list of dicts.
21
+ { 'A': {...}, 'B': {...}, ... } -> [{'id': 'A', ...}, {'id': 'B', ...}, ...]
22
+ """
23
+ return self._collection_dict_to_list_of_structs(linkml_collection_dict)
24
+
25
+ def _collection_dict_to_list_of_structs(self, linkml_collection_dict):
26
+ """Converts a collection dict nested column to a list of dicts.
27
+ { 'A': {...}, 'B': {...}, ... } -> [{'id': 'A', ...}, {'id': 'B', ...}, ...]
28
+
29
+ An inefficient conversion (relative to native PolaRS operations)
30
+ from a collection dict form to a dataframe struct column.
31
+
32
+ linkml_collection_dict : dict
33
+ A single row entry in a dataframe column (one cell), which itself is a dict.
34
+ The value entries are dicts that get the key added as an id field.
35
+ """
36
+ arr = []
37
+ for k, v in linkml_collection_dict.items():
38
+ if k not in v:
39
+ v[self.id_col] = k
40
+ arr.append(v)
41
+ return arr
42
+
43
+ @classmethod
44
+ def prepare_dataframe(cls, data, column_name, nested_cls):
45
+ """Returns just the collection dict column transformed to an inlined list form
46
+
47
+ note that this method uses collect and iter_rows so is very inefficient
48
+ """
49
+ id_column = nested_cls.get_id_column_name()
50
+ polars_schema = nested_cls.to_schema()
51
+
52
+ collection_dict_transformer = cls(polars_schema, id_column)
53
+
54
+ one_column_df = data.lazyframe.select(pl.col(column_name)).collect()
55
+
56
+ list_of_structs = [collection_dict_transformer.transform(e) for [e] in one_column_df.iter_rows()]
57
+
58
+ return pl.DataFrame(pl.Series(list_of_structs).alias(column_name))
59
+
60
+ def explode_unnest_dataframe(self, df, column_name):
61
+ """Filter, explode and unnest for collection dict."""
62
+ return df.lazy().filter(pl.col(column_name).list.len() > 0).explode(column_name).unnest(column_name).collect()
@@ -0,0 +1,66 @@
1
+ import polars as pl
2
+
3
+ from .model_transform import ModelTransform
4
+
5
+
6
+ class ListDictModelTransform(ModelTransform):
7
+ """This class assists in converting a LinkML 'list dict' inline column
8
+ into a form that is better for representing in a PolaRS dataframe and
9
+ validating with a Pandera model.
10
+ """
11
+
12
+ def __init__(self, polars_schema):
13
+ self.polars_schema = polars_schema
14
+ """A polars schema representing a list dict column"""
15
+
16
+ def transform(self, linkml_list_dict):
17
+ """Transforms a list dict nested column.
18
+ This is a pass-through since list dicts are already in the correct format.
19
+ """
20
+ return linkml_list_dict
21
+
22
+ @classmethod
23
+ def unnest_list_struct(cls, column_name: str, df):
24
+ """Use this in a custom check. Pass the nested model as pandera_model."""
25
+
26
+ # fmt: off
27
+ unnested_column = (
28
+ df
29
+ .select(column_name)
30
+ .filter(pl.col(column_name).list.len() > 0) # see: https://github.com/pola-rs/polars/issues/14381
31
+ .explode(column_name)
32
+ .unnest(column_name)
33
+ )
34
+ # fmt: on
35
+
36
+ return unnested_column
37
+
38
+ @classmethod
39
+ def prepare_dataframe(cls, data, column_name, nested_cls):
40
+ """Returns just the list dict column transformed to an inlined list form
41
+
42
+ note that this method uses collect and iter_rows so is very inefficient
43
+ """
44
+ polars_schema = nested_cls.to_schema()
45
+
46
+ list_dict_transformer = cls(polars_schema)
47
+
48
+ one_column_df = data.lazyframe.select(pl.col(column_name)).collect()
49
+
50
+ list_of_structs = [list_dict_transformer.transform(e) for [e] in one_column_df.iter_rows()]
51
+
52
+ return pl.DataFrame(pl.Series(list_of_structs).alias(column_name))
53
+
54
+ def explode_unnest_dataframe(self, df, column_name, data=None):
55
+ """Filter, explode and unnest for list dict with struct fallback."""
56
+ try:
57
+ return (
58
+ df.lazy().filter(pl.col(column_name).list.len() > 0).explode(column_name).unnest(column_name).collect()
59
+ )
60
+ except (pl.exceptions.PanicException, Exception):
61
+ if data:
62
+ from .nested_struct_model_transform import NestedStructModelTransform
63
+
64
+ nested_transform = NestedStructModelTransform(self.polars_schema)
65
+ return nested_transform.explode_unnest_dataframe(data.lazyframe, column_name)
66
+ raise
@@ -0,0 +1,8 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class ModelTransform(ABC):
5
+ @abstractmethod
6
+ def explode_unnest_dataframe(self, df, column_name):
7
+ """Abstract method for exploding and unnesting dataframes."""
8
+ pass