cognite-neat 0.97.3__py3-none-any.whl → 0.99.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (109) hide show
  1. cognite/neat/_client/__init__.py +4 -0
  2. cognite/neat/_client/_api/data_modeling_loaders.py +512 -0
  3. cognite/neat/_client/_api/schema.py +50 -0
  4. cognite/neat/_client/_api_client.py +17 -0
  5. cognite/neat/_client/data_classes/__init__.py +0 -0
  6. cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
  7. cognite/neat/{_rules/models/dms/_schema.py → _client/data_classes/schema.py} +32 -281
  8. cognite/neat/_graph/_shared.py +14 -15
  9. cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
  10. cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
  11. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +23 -12
  12. cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
  13. cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
  14. cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
  15. cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
  16. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
  17. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
  18. cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
  19. cognite/neat/_graph/extractors/_rdf_file.py +6 -7
  20. cognite/neat/_graph/loaders/__init__.py +1 -2
  21. cognite/neat/_graph/queries/_base.py +17 -1
  22. cognite/neat/_graph/transformers/_classic_cdf.py +50 -134
  23. cognite/neat/_graph/transformers/_prune_graph.py +1 -1
  24. cognite/neat/_graph/transformers/_rdfpath.py +1 -1
  25. cognite/neat/_issues/warnings/__init__.py +6 -0
  26. cognite/neat/_issues/warnings/_external.py +8 -0
  27. cognite/neat/_issues/warnings/_models.py +9 -0
  28. cognite/neat/_issues/warnings/_properties.py +16 -0
  29. cognite/neat/_rules/_constants.py +7 -6
  30. cognite/neat/_rules/_shared.py +3 -8
  31. cognite/neat/_rules/analysis/__init__.py +1 -2
  32. cognite/neat/_rules/analysis/_base.py +10 -27
  33. cognite/neat/_rules/analysis/_dms.py +4 -10
  34. cognite/neat/_rules/analysis/_information.py +2 -10
  35. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  36. cognite/neat/_rules/exporters/_base.py +3 -4
  37. cognite/neat/_rules/exporters/_rules2dms.py +29 -40
  38. cognite/neat/_rules/exporters/_rules2excel.py +15 -72
  39. cognite/neat/_rules/exporters/_rules2ontology.py +4 -4
  40. cognite/neat/_rules/importers/_base.py +3 -4
  41. cognite/neat/_rules/importers/_dms2rules.py +21 -45
  42. cognite/neat/_rules/importers/_dtdl2rules/dtdl_converter.py +1 -7
  43. cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +7 -10
  44. cognite/neat/_rules/importers/_rdf/_base.py +17 -29
  45. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2classes.py +2 -2
  46. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2metadata.py +5 -10
  47. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2properties.py +1 -2
  48. cognite/neat/_rules/importers/_rdf/_inference2rules.py +55 -51
  49. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2classes.py +2 -2
  50. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2metadata.py +5 -8
  51. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2properties.py +1 -2
  52. cognite/neat/_rules/importers/_rdf/_shared.py +25 -140
  53. cognite/neat/_rules/importers/_spreadsheet2rules.py +10 -41
  54. cognite/neat/_rules/models/__init__.py +3 -17
  55. cognite/neat/_rules/models/_base_rules.py +118 -62
  56. cognite/neat/_rules/models/dms/__init__.py +2 -2
  57. cognite/neat/_rules/models/dms/_exporter.py +20 -178
  58. cognite/neat/_rules/models/dms/_rules.py +65 -128
  59. cognite/neat/_rules/models/dms/_rules_input.py +72 -56
  60. cognite/neat/_rules/models/dms/_validation.py +16 -109
  61. cognite/neat/_rules/models/entities/_single_value.py +32 -4
  62. cognite/neat/_rules/models/information/_rules.py +19 -122
  63. cognite/neat/_rules/models/information/_rules_input.py +32 -41
  64. cognite/neat/_rules/models/information/_validation.py +34 -102
  65. cognite/neat/_rules/models/mapping/__init__.py +2 -3
  66. cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
  67. cognite/neat/_rules/models/mapping/_classic2core.yaml +339 -0
  68. cognite/neat/_rules/transformers/__init__.py +3 -6
  69. cognite/neat/_rules/transformers/_converters.py +128 -206
  70. cognite/neat/_rules/transformers/_mapping.py +105 -34
  71. cognite/neat/_rules/transformers/_verification.py +5 -16
  72. cognite/neat/_session/_base.py +83 -21
  73. cognite/neat/_session/_collector.py +126 -0
  74. cognite/neat/_session/_drop.py +35 -0
  75. cognite/neat/_session/_inspect.py +22 -10
  76. cognite/neat/_session/_mapping.py +39 -0
  77. cognite/neat/_session/_prepare.py +222 -27
  78. cognite/neat/_session/_read.py +109 -19
  79. cognite/neat/_session/_set.py +2 -2
  80. cognite/neat/_session/_show.py +11 -11
  81. cognite/neat/_session/_to.py +27 -14
  82. cognite/neat/_session/exceptions.py +20 -3
  83. cognite/neat/_store/_base.py +27 -24
  84. cognite/neat/_store/_provenance.py +2 -2
  85. cognite/neat/_utils/auxiliary.py +19 -0
  86. cognite/neat/_utils/rdf_.py +28 -1
  87. cognite/neat/_version.py +1 -1
  88. cognite/neat/_workflows/steps/data_contracts.py +2 -10
  89. cognite/neat/_workflows/steps/lib/current/rules_exporter.py +14 -49
  90. cognite/neat/_workflows/steps/lib/current/rules_importer.py +4 -1
  91. cognite/neat/_workflows/steps/lib/current/rules_validator.py +5 -9
  92. {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/METADATA +4 -3
  93. {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/RECORD +97 -100
  94. cognite/neat/_graph/loaders/_rdf2asset.py +0 -416
  95. cognite/neat/_rules/analysis/_asset.py +0 -173
  96. cognite/neat/_rules/models/asset/__init__.py +0 -13
  97. cognite/neat/_rules/models/asset/_rules.py +0 -109
  98. cognite/neat/_rules/models/asset/_rules_input.py +0 -101
  99. cognite/neat/_rules/models/asset/_validation.py +0 -45
  100. cognite/neat/_rules/models/domain.py +0 -136
  101. cognite/neat/_rules/models/mapping/_base.py +0 -131
  102. cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
  103. cognite/neat/_utils/cdf/loaders/_base.py +0 -54
  104. cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
  105. cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
  106. /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
  107. {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/LICENSE +0 -0
  108. {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/WHEEL +0 -0
  109. {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/entry_points.txt +0 -0
@@ -1,26 +1,15 @@
1
1
  from abc import ABC, abstractmethod
2
- from collections.abc import (
3
- Hashable,
4
- ItemsView,
5
- Iterable,
6
- Iterator,
7
- KeysView,
8
- Mapping,
9
- MutableMapping,
10
- ValuesView,
2
+ from collections.abc import Hashable, ItemsView, Iterable, Iterator, KeysView, Mapping, MutableMapping, ValuesView
3
+ from typing import (
4
+ Any,
5
+ TypeVar,
6
+ cast,
7
+ final,
11
8
  )
12
- from dataclasses import dataclass
13
- from typing import Any, TypeVar, cast, final
14
9
 
15
10
  import pandas as pd
16
11
  import yaml
17
- from cognite.client import CogniteClient
18
- from cognite.client.data_classes._base import (
19
- CogniteResourceList,
20
- T_CogniteResource,
21
- WriteableCogniteResource,
22
- WriteableCogniteResourceList,
23
- )
12
+ from cognite.client.data_classes._base import T_CogniteResource
24
13
  from cognite.client.data_classes.data_modeling import (
25
14
  ContainerApply,
26
15
  ContainerId,
@@ -33,127 +22,11 @@ from cognite.client.data_classes.data_modeling import (
33
22
  ViewId,
34
23
  )
35
24
  from cognite.client.utils._auxiliary import load_yaml_or_json
36
- from cognite.client.utils._pandas_helpers import (
37
- convert_nullable_int_cols,
38
- )
39
-
40
- # The Table, TableWrite data classes in the Cognite-SDK lacks the database attribute.
41
- # This is a problem when creating the RawTableLoader that needs the data class to be able to create, update, retrieve
42
- # and delete tables.
43
- # This is a reimplemented version of the Table, TableWrite data classes with the database attribute added.
44
-
45
-
46
- @dataclass(frozen=True)
47
- class RawTableID:
48
- table: str
49
- database: str
50
-
51
- def as_tuple(self) -> tuple[str, str]:
52
- return self.database, self.table
53
-
54
-
55
- class RawTableCore(WriteableCogniteResource["RawTableWrite"], ABC):
56
- """A NoSQL database table to store customer data
57
-
58
- Args:
59
- name (str | None): Unique name of the table
60
- """
61
-
62
- def __init__(
63
- self,
64
- name: str | None = None,
65
- database: str | None = None,
66
- ) -> None:
67
- self.name = name
68
- self.database = database
69
-
70
- def as_id(self) -> RawTableID:
71
- if self.name is None or self.database is None:
72
- raise ValueError("name and database are required to create a TableID")
73
- return RawTableID(table=self.name, database=self.database)
74
-
75
-
76
- class RawTable(RawTableCore):
77
- """A NoSQL database table to store customer data.
78
- This is the reading version of the Table class, which is used when retrieving a table.
79
-
80
- Args:
81
- name (str | None): Unique name of the table
82
- created_time (int | None): Time the table was created.
83
- cognite_client (CogniteClient | None): The client to associate with this object.
84
- """
85
-
86
- def __init__(
87
- self,
88
- name: str | None = None,
89
- database: str | None = None,
90
- created_time: int | None = None,
91
- cognite_client: CogniteClient | None = None,
92
- ) -> None:
93
- super().__init__(name, database)
94
- self.created_time = created_time
95
- self._cognite_client = cast("CogniteClient", cognite_client)
96
-
97
- self._db_name: str | None = None
98
-
99
- def as_write(self) -> "RawTableWrite":
100
- """Returns this Table as a TableWrite"""
101
- if self.name is None or self.database is None:
102
- raise ValueError("name and database are required to create a Table")
103
- return RawTableWrite(name=self.name, database=self.database)
104
-
105
-
106
- class RawTableWrite(RawTableCore):
107
- """A NoSQL database table to store customer data
108
- This is the writing version of the Table class, which is used when creating a table.
109
-
110
- Args:
111
- name (str): Unique name of the table
112
- """
113
-
114
- def __init__(
115
- self,
116
- name: str,
117
- database: str,
118
- ) -> None:
119
- super().__init__(name, database)
120
-
121
- @classmethod
122
- def _load(cls, resource: dict[str, Any], cognite_client: CogniteClient | None = None) -> "RawTableWrite":
123
- return cls(resource["name"], resource["database"])
124
-
125
- def as_write(self) -> "RawTableWrite":
126
- """Returns this TableWrite instance."""
127
- return self
128
-
129
-
130
- class RawTableWriteList(CogniteResourceList[RawTableWrite]):
131
- _RESOURCE = RawTableWrite
132
-
133
- def as_ids(self) -> list[RawTableID]:
134
- """Returns this TableWriteList as a list of TableIDs"""
135
- return [table.as_id() for table in self.data]
136
-
137
-
138
- class RawTableList(
139
- WriteableCogniteResourceList[RawTableWrite, RawTable],
140
- ):
141
- _RESOURCE = RawTable
142
-
143
- def as_write(self) -> RawTableWriteList:
144
- """Returns this TableList as a TableWriteList"""
145
- return RawTableWriteList([table.as_write() for table in self.data])
146
-
147
- def as_ids(self) -> list[RawTableID]:
148
- """Returns this TableList as a list of TableIDs"""
149
- return [table.as_id() for table in self.data]
150
-
25
+ from cognite.client.utils._pandas_helpers import convert_nullable_int_cols
151
26
 
152
27
  T_ID = TypeVar("T_ID", bound=Hashable)
153
28
 
154
29
 
155
- # Inheriting from dict as we are extending it,
156
- # ref https://stackoverflow.com/questions/7148419/subclass-dict-userdict-dict-or-abc
157
30
  class CogniteResourceDict(dict, MutableMapping[T_ID, T_CogniteResource], ABC):
158
31
  """CogniteResource stored in a mapping structure.
159
32
 
@@ -1,4 +1,3 @@
1
- import json
2
1
  import sys
3
2
  import warnings
4
3
  import zipfile
@@ -6,13 +5,12 @@ from collections import ChainMap, Counter, defaultdict
6
5
  from collections.abc import Iterable, MutableMapping
7
6
  from dataclasses import Field, dataclass, field, fields
8
7
  from pathlib import Path
9
- from typing import Any, ClassVar, Literal, cast
8
+ from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast
10
9
 
11
10
  import yaml
12
11
  from cognite.client import CogniteClient
13
12
  from cognite.client import data_modeling as dm
14
- from cognite.client.data_classes import DatabaseWrite, DatabaseWriteList, TransformationWrite, TransformationWriteList
15
- from cognite.client.data_classes.data_modeling import ViewApply
13
+ from cognite.client.data_classes import DatabaseWrite, TransformationWrite
16
14
  from cognite.client.data_classes.data_modeling.views import (
17
15
  ReverseDirectRelation,
18
16
  ReverseDirectRelationApply,
@@ -23,8 +21,14 @@ from cognite.client.data_classes.data_modeling.views import (
23
21
  ViewProperty,
24
22
  ViewPropertyApply,
25
23
  )
26
- from cognite.client.data_classes.transformations.common import Edges, EdgeType, Nodes, ViewInfo
27
24
 
25
+ from cognite.neat._client.data_classes.data_modeling import (
26
+ CogniteResourceDict,
27
+ ContainerApplyDict,
28
+ NodeApplyDict,
29
+ SpaceApplyDict,
30
+ ViewApplyDict,
31
+ )
28
32
  from cognite.neat._issues import NeatError
29
33
  from cognite.neat._issues.errors import (
30
34
  NeatYamlError,
@@ -40,17 +44,6 @@ from cognite.neat._issues.warnings import (
40
44
  ResourcesDuplicatedWarning,
41
45
  )
42
46
  from cognite.neat._issues.warnings.user_modeling import DirectRelationMissingSourceWarning
43
- from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE
44
- from cognite.neat._utils.cdf.data_classes import (
45
- CogniteResourceDict,
46
- ContainerApplyDict,
47
- NodeApplyDict,
48
- RawTableWrite,
49
- RawTableWriteList,
50
- SpaceApplyDict,
51
- ViewApplyDict,
52
- )
53
- from cognite.neat._utils.cdf.loaders import ViewLoader
54
47
  from cognite.neat._utils.rdf_ import get_inheritance_path
55
48
  from cognite.neat._utils.text import to_camel
56
49
 
@@ -59,6 +52,9 @@ if sys.version_info >= (3, 11):
59
52
  else:
60
53
  from typing_extensions import Self
61
54
 
55
+ if TYPE_CHECKING:
56
+ from cognite.neat._client import NeatClient
57
+
62
58
 
63
59
  @dataclass
64
60
  class DMSSchema:
@@ -105,7 +101,7 @@ class DMSSchema:
105
101
  return directly_referenced_containers | inherited_referenced_containers
106
102
 
107
103
  @classmethod
108
- def from_model_id(cls, client: CogniteClient, data_model_id: dm.DataModelIdentifier) -> "DMSSchema":
104
+ def from_model_id(cls, client: "NeatClient", data_model_id: dm.DataModelIdentifier) -> "DMSSchema":
109
105
  data_models = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True)
110
106
  if len(data_models) == 0:
111
107
  raise ValueError(f"Data model {data_model_id} not found")
@@ -115,7 +111,7 @@ class DMSSchema:
115
111
  @classmethod
116
112
  def from_data_model(
117
113
  cls,
118
- client: CogniteClient,
114
+ client: "NeatClient",
119
115
  data_model: dm.DataModel[dm.View],
120
116
  reference_model: dm.DataModel[dm.View] | None = None,
121
117
  ) -> "DMSSchema":
@@ -139,6 +135,8 @@ class DMSSchema:
139
135
  Returns:
140
136
  DMSSchema: The schema created from the data model.
141
137
  """
138
+ from cognite.neat._client._api.data_modeling_loaders import ViewLoader
139
+
142
140
  views = dm.ViewList(data_model.views)
143
141
 
144
142
  data_model_write = data_model.as_write()
@@ -157,7 +155,7 @@ class DMSSchema:
157
155
  raise ValueError(f"Space(s) {space_read} not found")
158
156
  space_write = space_read.as_write()
159
157
 
160
- view_loader = ViewLoader(client)
158
+ view_loader = ViewLoader(NeatClient(client))
161
159
 
162
160
  existing_view_ids = set(views.as_ids())
163
161
 
@@ -179,7 +177,9 @@ class DMSSchema:
179
177
 
180
178
  # We need to include parent views in the schema to make sure that the schema is valid.
181
179
  parent_view_ids = {parent for view in views for parent in view.implements or []}
182
- parents = view_loader.retrieve_all_parents(list(parent_view_ids - existing_view_ids))
180
+ parents = view_loader.retrieve(
181
+ list(parent_view_ids - existing_view_ids), include_ancestor=True, include_connections=True
182
+ )
183
183
  views.extend([parent for parent in parents if parent.as_id() not in existing_view_ids])
184
184
 
185
185
  # Converting views from read to write format requires to account for parents (implements)
@@ -524,7 +524,7 @@ class DMSSchema:
524
524
 
525
525
  @classmethod
526
526
  def _to_sortable_identifier(cls, item: Any) -> str | tuple[str, str] | tuple[str, str, str]:
527
- if isinstance(item, dm.ContainerApply | dm.ViewApply | dm.DataModelApply | dm.NodeApply | RawTableWrite):
527
+ if isinstance(item, dm.ContainerApply | dm.ViewApply | dm.DataModelApply | dm.NodeApply):
528
528
  identifier = item.as_id().as_tuple()
529
529
  if len(identifier) == 3 and identifier[2] is None:
530
530
  return identifier[:2] # type: ignore[misc]
@@ -539,6 +539,10 @@ class DMSSchema:
539
539
  raise ValueError(f"Cannot sort item of type {type(item)}")
540
540
 
541
541
  def validate(self) -> list[NeatError]:
542
+ # TODO: This type of validation should be done in NeatSession where all the
543
+ # schema components which are not part of Rules are imported and the model as
544
+ # the whole is validated.
545
+
542
546
  errors: set[NeatError] = set()
543
547
  defined_spaces = self.spaces.copy()
544
548
  defined_containers = self.containers.copy()
@@ -708,6 +712,13 @@ class DMSSchema:
708
712
  referenced_spaces |= {s.space for s in self.spaces.values()}
709
713
  return referenced_spaces
710
714
 
715
+ def referenced_container(self) -> set[dm.ContainerId]:
716
+ referenced_containers = {
717
+ container for view in self.views.values() for container in view.referenced_containers()
718
+ }
719
+ referenced_containers |= set(self.containers.keys())
720
+ return referenced_containers
721
+
711
722
  def as_read_model(self) -> dm.DataModel[dm.View]:
712
723
  if self.data_model is None:
713
724
  raise ValueError("Data model is not defined")
@@ -828,263 +839,3 @@ class DMSSchema:
828
839
  }
829
840
  # If a container has a required property that is not used by the view, the view is not writable
830
841
  return not bool(required_properties - used_properties)
831
-
832
-
833
- @dataclass
834
- class PipelineSchema(DMSSchema):
835
- transformations: TransformationWriteList = field(default_factory=lambda: TransformationWriteList([]))
836
- databases: DatabaseWriteList = field(default_factory=lambda: DatabaseWriteList([]))
837
- raw_tables: RawTableWriteList = field(default_factory=lambda: RawTableWriteList([]))
838
-
839
- _FIELD_NAME_BY_RESOURCE_TYPE: ClassVar[dict[str, str]] = {
840
- **DMSSchema._FIELD_NAME_BY_RESOURCE_TYPE,
841
- "raw": "raw_tables",
842
- }
843
-
844
- def __post_init__(self):
845
- existing_databases = {database.name for database in self.databases}
846
- table_database = {table.database for table in self.raw_tables}
847
- if missing := table_database - existing_databases:
848
- self.databases.extend([DatabaseWrite(name=database) for database in missing])
849
-
850
- @classmethod
851
- def _read_directory(cls, directory: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
852
- data, context = super()._read_directory(directory)
853
- for yaml_file in directory.rglob("*.yaml"):
854
- if yaml_file.parent.name in ("transformations", "raw"):
855
- attr_name = cls._FIELD_NAME_BY_RESOURCE_TYPE.get(yaml_file.parent.name, yaml_file.parent.name)
856
- data.setdefault(attr_name, [])
857
- context.setdefault(attr_name, [])
858
- try:
859
- loaded = yaml.safe_load(yaml_file.read_text())
860
- except Exception as e:
861
- warnings.warn(
862
- FileTypeUnexpectedWarning(yaml_file, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
863
- )
864
- continue
865
- if isinstance(loaded, list):
866
- data[attr_name].extend(loaded)
867
- context[attr_name].extend([yaml_file] * len(loaded))
868
- else:
869
- data[attr_name].append(loaded)
870
- context[attr_name].append(yaml_file)
871
- return data, context
872
-
873
- def to_directory(
874
- self,
875
- directory: str | Path,
876
- exclude: set[str] | None = None,
877
- new_line: str | None = "\n",
878
- encoding: str | None = "utf-8",
879
- ) -> None:
880
- super().to_directory(directory, exclude)
881
- exclude_set = exclude or set()
882
- path_dir = Path(directory)
883
- if "transformations" not in exclude_set and self.transformations:
884
- transformation_dir = path_dir / "transformations"
885
- transformation_dir.mkdir(exist_ok=True, parents=True)
886
- for transformation in self.transformations:
887
- (transformation_dir / f"{transformation.external_id}.yaml").write_text(
888
- transformation.dump_yaml(), newline=new_line, encoding=encoding
889
- )
890
- if "raw" not in exclude_set and self.raw_tables:
891
- # The RAW Databases are not written to file. This is because cognite-toolkit expects the RAW databases
892
- # to be in the same file as the RAW tables.
893
- raw_dir = path_dir / "raw"
894
- raw_dir.mkdir(exist_ok=True, parents=True)
895
- for raw_table in self.raw_tables:
896
- (raw_dir / f"{raw_table.name}.yaml").write_text(
897
- raw_table.dump_yaml(), newline=new_line, encoding=encoding
898
- )
899
-
900
- def to_zip(self, zip_file: str | Path, exclude: set[str] | None = None) -> None:
901
- super().to_zip(zip_file, exclude)
902
- exclude_set = exclude or set()
903
- with zipfile.ZipFile(zip_file, "a") as zip_ref:
904
- if "transformations" not in exclude_set:
905
- for transformation in self.transformations:
906
- zip_ref.writestr(f"transformations/{transformation.external_id}.yaml", transformation.dump_yaml())
907
- if "raw" not in exclude_set:
908
- # The RAW Databases are not written to file. This is because cognite-toolkit expects the RAW databases
909
- # to be in the same file as the RAW tables.
910
- for raw_table in self.raw_tables:
911
- zip_ref.writestr(f"raw/{raw_table.name}.yaml", raw_table.dump_yaml())
912
-
913
- @classmethod
914
- def _read_zip(cls, zip_file: Path) -> tuple[dict[str, list[Any]], dict[str, list[Path]]]:
915
- data, context = super()._read_zip(zip_file)
916
- with zipfile.ZipFile(zip_file, "r") as zip_ref:
917
- for file_info in zip_ref.infolist():
918
- if file_info.filename.endswith(".yaml"):
919
- if "/" not in file_info.filename:
920
- continue
921
- filepath = Path(file_info.filename)
922
- if (parent := filepath.parent.name) in ("transformations", "raw"):
923
- attr_name = cls._FIELD_NAME_BY_RESOURCE_TYPE.get(parent, parent)
924
- data.setdefault(attr_name, [])
925
- context.setdefault(attr_name, [])
926
- try:
927
- loaded = yaml.safe_load(zip_ref.read(file_info).decode())
928
- except Exception as e:
929
- warnings.warn(
930
- FileTypeUnexpectedWarning(filepath, frozenset([".yaml", ".yml"]), str(e)), stacklevel=2
931
- )
932
- continue
933
- if isinstance(loaded, list):
934
- data[attr_name].extend(loaded)
935
- context[attr_name].extend([filepath] * len(loaded))
936
- else:
937
- data[attr_name].append(loaded)
938
- context[attr_name].append(filepath)
939
- return data, context
940
-
941
- @classmethod
942
- def from_dms(cls, schema: DMSSchema, instance_space: str | None = None) -> "PipelineSchema":
943
- if not schema.data_model:
944
- raise ValueError("PipelineSchema must contain at least one data model")
945
- first_data_model = schema.data_model
946
- # The database name is limited to 32 characters
947
- database_name = first_data_model.external_id[:32]
948
- instance_space = instance_space or first_data_model.space
949
- database = DatabaseWrite(name=database_name)
950
- parent_views = {parent for view in schema.views.values() for parent in view.implements or []}
951
- container_by_id = schema.containers.copy()
952
-
953
- transformations = TransformationWriteList([])
954
- raw_tables = RawTableWriteList([])
955
- for view in schema.views.values():
956
- if view.as_id() in parent_views:
957
- # Skipping parents as they do not have their own data
958
- continue
959
- mapped_properties = {
960
- prop_name: prop
961
- for prop_name, prop in (view.properties or {}).items()
962
- if isinstance(prop, dm.MappedPropertyApply)
963
- }
964
- if mapped_properties:
965
- view_table = RawTableWrite(name=f"{view.external_id}Properties", database=database_name)
966
- raw_tables.append(view_table)
967
- transformation = cls._create_property_transformation(
968
- mapped_properties, view, view_table, container_by_id, instance_space
969
- )
970
- transformations.append(transformation)
971
- connection_properties = {
972
- prop_name: prop
973
- for prop_name, prop in (view.properties or {}).items()
974
- if isinstance(prop, dm.EdgeConnectionApply)
975
- }
976
- for prop_name, connection_property in connection_properties.items():
977
- view_table = RawTableWrite(name=f"{view.external_id}.{prop_name}Edge", database=database_name)
978
- raw_tables.append(view_table)
979
- transformation = cls._create_edge_transformation(connection_property, view, view_table, instance_space)
980
- transformations.append(transformation)
981
-
982
- return cls(
983
- spaces=schema.spaces,
984
- data_model=schema.data_model,
985
- views=schema.views,
986
- containers=schema.containers,
987
- transformations=transformations,
988
- databases=DatabaseWriteList([database]),
989
- raw_tables=raw_tables,
990
- )
991
-
992
- @classmethod
993
- def _create_property_transformation(
994
- cls,
995
- properties: dict[str, dm.MappedPropertyApply],
996
- view: ViewApply,
997
- table: RawTableWrite,
998
- container_by_id: dict[dm.ContainerId, dm.ContainerApply],
999
- instance_space: str,
1000
- ) -> TransformationWrite:
1001
- mapping_mode = {
1002
- "version": 1,
1003
- "sourceType": "raw",
1004
- # 'mappings' is set here and overwritten further down to ensure the correct order
1005
- "mappings": [],
1006
- "sourceLevel1": table.database,
1007
- "sourceLevel2": table.name,
1008
- }
1009
- mappings = [
1010
- {"from": "externalId", "to": "externalId", "asType": "STRING"},
1011
- ]
1012
- select_rows = ["cast(`externalId` as STRING) as externalId"]
1013
- for prop_name, prop in properties.items():
1014
- container = container_by_id.get(prop.container)
1015
- if container is not None:
1016
- dms_type = container.properties[prop.container_property_identifier].type._type
1017
- if dms_type in _DATA_TYPE_BY_DMS_TYPE:
1018
- sql_type = _DATA_TYPE_BY_DMS_TYPE[dms_type].sql
1019
- else:
1020
- warnings.warn(
1021
- f"Unknown DMS type '{dms_type}' for property '{prop_name}'", RuntimeWarning, stacklevel=2
1022
- )
1023
- sql_type = "STRING"
1024
- else:
1025
- sql_type = "STRING"
1026
- select_rows.append(f"cast(`{prop_name}` as {sql_type}) as {prop_name}")
1027
- mappings.append({"from": prop_name, "to": prop_name, "asType": sql_type})
1028
- mapping_mode["mappings"] = mappings
1029
- select = ",\n ".join(select_rows)
1030
-
1031
- return TransformationWrite(
1032
- external_id=f"{table.name}Transformation",
1033
- name=f"{table.name}Transformation",
1034
- ignore_null_fields=True,
1035
- destination=Nodes(
1036
- view=ViewInfo(view.space, view.external_id, view.version),
1037
- instance_space=instance_space,
1038
- ),
1039
- conflict_mode="upsert",
1040
- query=f"""/* MAPPING_MODE_ENABLED: true */
1041
- /* {json.dumps(mapping_mode)} */
1042
- select
1043
- {select}
1044
- from
1045
- `{table.database}`.`{table.name}`;
1046
- """,
1047
- )
1048
-
1049
- @classmethod
1050
- def _create_edge_transformation(
1051
- cls, property_: dm.EdgeConnectionApply, view: ViewApply, table: RawTableWrite, instance_space: str
1052
- ) -> TransformationWrite:
1053
- start, end = view.external_id, property_.source.external_id
1054
- if property_.direction == "inwards":
1055
- start, end = end, start
1056
- mapping_mode = {
1057
- "version": 1,
1058
- "sourceType": "raw",
1059
- "mappings": [
1060
- {"from": "externalId", "to": "externalId", "asType": "STRING"},
1061
- {"from": start, "to": "startNode", "asType": "STRUCT<`space`:STRING, `externalId`:STRING>"},
1062
- {"from": end, "to": "endNode", "asType": "STRUCT<`space`:STRING, `externalId`:STRING>"},
1063
- ],
1064
- "sourceLevel1": table.database,
1065
- "sourceLevel2": table.name,
1066
- }
1067
- select_rows = [
1068
- "cast(`externalId` as STRING) as externalId",
1069
- f"node_reference('{instance_space}', `{start}`) as startNode",
1070
- f"node_reference('{instance_space}', `{end}`) as endNode",
1071
- ]
1072
- select = ",\n ".join(select_rows)
1073
-
1074
- return TransformationWrite(
1075
- external_id=f"{table.name}Transformation",
1076
- name=f"{table.name}Transformation",
1077
- ignore_null_fields=True,
1078
- destination=Edges(
1079
- instance_space=instance_space,
1080
- edge_type=EdgeType(space=property_.type.space, external_id=property_.type.external_id),
1081
- ),
1082
- conflict_mode="upsert",
1083
- query=f"""/* MAPPING_MODE_ENABLED: true */
1084
- /* {json.dumps(mapping_mode)} */
1085
- select
1086
- {select}
1087
- from
1088
- `{table.database}`.`{table.name}`;
1089
- """,
1090
- )
@@ -4,31 +4,30 @@ MIMETypes: TypeAlias = Literal[
4
4
  "application/rdf+xml", "text/turtle", "application/n-triple", "application/n-quads", "application/trig"
5
5
  ]
6
6
 
7
+ RDFTypes: TypeAlias = Literal["xml", "rdf", "owl", "n3", "ttl", "turtle", "nt", "nq", "nquads", "trig"]
7
8
 
8
- def rdflib_to_mime_types(rdflib_format: str) -> str | None:
9
+
10
+ def rdflib_to_oxi_type(rdflib_format: str) -> str | None:
9
11
  """Convert an RDFlib format to a MIME type.
10
12
 
11
13
  Args:
12
14
  rdflib_format (str): The RDFlib format.
13
15
 
14
16
  Returns:
15
- MIMETypes: The MIME type.
16
-
17
- !!! note
18
- This will be replaced once new version of oxrdflib is released.
17
+ Oxi format used to trigger correct plugging in rdflib
19
18
 
20
19
  """
21
20
 
22
21
  mapping = {
23
- "xml": "application/rdf+xml",
24
- "rdf": "application/rdf+xml",
25
- "owl": "application/rdf+xml",
26
- "n3": "application/n-triple",
27
- "ttl": "text/turtle",
28
- "turtle": "text/turtle",
29
- "nt": "application/n-triple",
30
- "nq": "application/n-quads",
31
- "nquads": "application/n-quads",
32
- "trig": "application/trig",
22
+ "xml": "ox-xml",
23
+ "rdf": "ox-xml",
24
+ "owl": "ox-xml",
25
+ "n3": "ox-n3",
26
+ "ttl": "ox-ttl",
27
+ "turtle": "ox-turtle",
28
+ "nt": "ox-nt",
29
+ "nq": "ox-nq",
30
+ "nquads": "ox-nquads",
31
+ "trig": "ox-trig",
33
32
  }
34
33
  return mapping.get(rdflib_format, None)