cognite-toolkit 0.6.97__py3-none-any.whl → 0.7.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. cognite_toolkit/_cdf.py +21 -23
  2. cognite_toolkit/_cdf_tk/apps/__init__.py +4 -0
  3. cognite_toolkit/_cdf_tk/apps/_core_app.py +19 -5
  4. cognite_toolkit/_cdf_tk/apps/_data_app.py +1 -1
  5. cognite_toolkit/_cdf_tk/apps/_dev_app.py +86 -0
  6. cognite_toolkit/_cdf_tk/apps/_download_app.py +693 -25
  7. cognite_toolkit/_cdf_tk/apps/_dump_app.py +44 -102
  8. cognite_toolkit/_cdf_tk/apps/_import_app.py +41 -0
  9. cognite_toolkit/_cdf_tk/apps/_landing_app.py +18 -4
  10. cognite_toolkit/_cdf_tk/apps/_migrate_app.py +424 -9
  11. cognite_toolkit/_cdf_tk/apps/_modules_app.py +0 -3
  12. cognite_toolkit/_cdf_tk/apps/_purge.py +15 -43
  13. cognite_toolkit/_cdf_tk/apps/_run.py +11 -0
  14. cognite_toolkit/_cdf_tk/apps/_upload_app.py +45 -6
  15. cognite_toolkit/_cdf_tk/builders/__init__.py +2 -2
  16. cognite_toolkit/_cdf_tk/builders/_base.py +28 -42
  17. cognite_toolkit/_cdf_tk/builders/_raw.py +1 -1
  18. cognite_toolkit/_cdf_tk/cdf_toml.py +20 -1
  19. cognite_toolkit/_cdf_tk/client/_toolkit_client.py +32 -12
  20. cognite_toolkit/_cdf_tk/client/api/infield.py +114 -17
  21. cognite_toolkit/_cdf_tk/client/api/{canvas.py → legacy/canvas.py} +15 -7
  22. cognite_toolkit/_cdf_tk/client/api/{charts.py → legacy/charts.py} +1 -1
  23. cognite_toolkit/_cdf_tk/client/api/{extended_data_modeling.py → legacy/extended_data_modeling.py} +1 -1
  24. cognite_toolkit/_cdf_tk/client/api/{extended_files.py → legacy/extended_files.py} +2 -2
  25. cognite_toolkit/_cdf_tk/client/api/{extended_functions.py → legacy/extended_functions.py} +15 -18
  26. cognite_toolkit/_cdf_tk/client/api/{extended_raw.py → legacy/extended_raw.py} +1 -1
  27. cognite_toolkit/_cdf_tk/client/api/{extended_timeseries.py → legacy/extended_timeseries.py} +5 -2
  28. cognite_toolkit/_cdf_tk/client/api/{location_filters.py → legacy/location_filters.py} +1 -1
  29. cognite_toolkit/_cdf_tk/client/api/legacy/robotics/__init__.py +8 -0
  30. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/capabilities.py +1 -1
  31. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/data_postprocessing.py +1 -1
  32. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/frames.py +1 -1
  33. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/locations.py +1 -1
  34. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/maps.py +1 -1
  35. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/robots.py +2 -2
  36. cognite_toolkit/_cdf_tk/client/api/{search_config.py → legacy/search_config.py} +5 -1
  37. cognite_toolkit/_cdf_tk/client/api/migration.py +177 -4
  38. cognite_toolkit/_cdf_tk/client/api/project.py +9 -8
  39. cognite_toolkit/_cdf_tk/client/api/search.py +2 -2
  40. cognite_toolkit/_cdf_tk/client/api/streams.py +88 -0
  41. cognite_toolkit/_cdf_tk/client/api/three_d.py +384 -0
  42. cognite_toolkit/_cdf_tk/client/data_classes/api_classes.py +13 -0
  43. cognite_toolkit/_cdf_tk/client/data_classes/base.py +37 -33
  44. cognite_toolkit/_cdf_tk/client/data_classes/charts_data.py +95 -213
  45. cognite_toolkit/_cdf_tk/client/data_classes/infield.py +32 -18
  46. cognite_toolkit/_cdf_tk/client/data_classes/instance_api.py +18 -13
  47. cognite_toolkit/_cdf_tk/client/data_classes/legacy/__init__.py +0 -0
  48. cognite_toolkit/_cdf_tk/client/data_classes/{canvas.py → legacy/canvas.py} +47 -4
  49. cognite_toolkit/_cdf_tk/client/data_classes/{charts.py → legacy/charts.py} +3 -3
  50. cognite_toolkit/_cdf_tk/client/data_classes/{migration.py → legacy/migration.py} +10 -2
  51. cognite_toolkit/_cdf_tk/client/data_classes/streams.py +90 -0
  52. cognite_toolkit/_cdf_tk/client/data_classes/three_d.py +112 -0
  53. cognite_toolkit/_cdf_tk/client/testing.py +42 -18
  54. cognite_toolkit/_cdf_tk/commands/__init__.py +7 -6
  55. cognite_toolkit/_cdf_tk/commands/_changes.py +3 -42
  56. cognite_toolkit/_cdf_tk/commands/_download.py +21 -11
  57. cognite_toolkit/_cdf_tk/commands/_migrate/__init__.py +0 -2
  58. cognite_toolkit/_cdf_tk/commands/_migrate/command.py +22 -20
  59. cognite_toolkit/_cdf_tk/commands/_migrate/conversion.py +140 -92
  60. cognite_toolkit/_cdf_tk/commands/_migrate/creators.py +1 -1
  61. cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py +108 -26
  62. cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py +448 -45
  63. cognite_toolkit/_cdf_tk/commands/_migrate/data_model.py +1 -0
  64. cognite_toolkit/_cdf_tk/commands/_migrate/default_mappings.py +6 -6
  65. cognite_toolkit/_cdf_tk/commands/_migrate/issues.py +52 -1
  66. cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py +377 -11
  67. cognite_toolkit/_cdf_tk/commands/_migrate/selectors.py +9 -4
  68. cognite_toolkit/_cdf_tk/commands/_profile.py +1 -1
  69. cognite_toolkit/_cdf_tk/commands/_purge.py +36 -39
  70. cognite_toolkit/_cdf_tk/commands/_questionary_style.py +16 -0
  71. cognite_toolkit/_cdf_tk/commands/_upload.py +109 -86
  72. cognite_toolkit/_cdf_tk/commands/about.py +221 -0
  73. cognite_toolkit/_cdf_tk/commands/auth.py +19 -12
  74. cognite_toolkit/_cdf_tk/commands/build_cmd.py +16 -62
  75. cognite_toolkit/_cdf_tk/commands/build_v2/__init__.py +0 -0
  76. cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py +241 -0
  77. cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py +85 -0
  78. cognite_toolkit/_cdf_tk/commands/build_v2/build_issues.py +27 -0
  79. cognite_toolkit/_cdf_tk/commands/clean.py +63 -16
  80. cognite_toolkit/_cdf_tk/commands/deploy.py +20 -17
  81. cognite_toolkit/_cdf_tk/commands/dump_resource.py +10 -8
  82. cognite_toolkit/_cdf_tk/commands/init.py +225 -3
  83. cognite_toolkit/_cdf_tk/commands/modules.py +20 -44
  84. cognite_toolkit/_cdf_tk/commands/pull.py +6 -19
  85. cognite_toolkit/_cdf_tk/commands/resources.py +179 -0
  86. cognite_toolkit/_cdf_tk/commands/run.py +1 -1
  87. cognite_toolkit/_cdf_tk/constants.py +20 -1
  88. cognite_toolkit/_cdf_tk/cruds/__init__.py +19 -5
  89. cognite_toolkit/_cdf_tk/cruds/_base_cruds.py +14 -70
  90. cognite_toolkit/_cdf_tk/cruds/_data_cruds.py +10 -19
  91. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/__init__.py +4 -1
  92. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/agent.py +11 -9
  93. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/auth.py +5 -15
  94. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/classic.py +45 -44
  95. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/configuration.py +5 -12
  96. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/data_organization.py +4 -13
  97. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/datamodel.py +206 -67
  98. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/extraction_pipeline.py +6 -18
  99. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/fieldops.py +126 -35
  100. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/file.py +7 -28
  101. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/function.py +23 -30
  102. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/hosted_extractors.py +12 -30
  103. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/industrial_tool.py +4 -8
  104. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/location.py +4 -16
  105. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/migration.py +5 -13
  106. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/raw.py +5 -11
  107. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/relationship.py +3 -8
  108. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/robotics.py +16 -45
  109. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/streams.py +94 -0
  110. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/three_d_model.py +3 -7
  111. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/timeseries.py +5 -15
  112. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/transformation.py +75 -32
  113. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/workflow.py +20 -40
  114. cognite_toolkit/_cdf_tk/cruds/_worker.py +24 -36
  115. cognite_toolkit/_cdf_tk/data_classes/_module_toml.py +1 -0
  116. cognite_toolkit/_cdf_tk/feature_flags.py +16 -36
  117. cognite_toolkit/_cdf_tk/plugins.py +2 -1
  118. cognite_toolkit/_cdf_tk/resource_classes/__init__.py +4 -0
  119. cognite_toolkit/_cdf_tk/resource_classes/capabilities.py +12 -0
  120. cognite_toolkit/_cdf_tk/resource_classes/functions.py +3 -1
  121. cognite_toolkit/_cdf_tk/resource_classes/infield_cdm_location_config.py +109 -0
  122. cognite_toolkit/_cdf_tk/resource_classes/migration.py +8 -17
  123. cognite_toolkit/_cdf_tk/resource_classes/search_config.py +1 -1
  124. cognite_toolkit/_cdf_tk/resource_classes/streams.py +29 -0
  125. cognite_toolkit/_cdf_tk/resource_classes/workflow_version.py +164 -5
  126. cognite_toolkit/_cdf_tk/storageio/__init__.py +9 -21
  127. cognite_toolkit/_cdf_tk/storageio/_annotations.py +19 -16
  128. cognite_toolkit/_cdf_tk/storageio/_applications.py +340 -28
  129. cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +67 -104
  130. cognite_toolkit/_cdf_tk/storageio/_base.py +61 -29
  131. cognite_toolkit/_cdf_tk/storageio/_datapoints.py +276 -20
  132. cognite_toolkit/_cdf_tk/storageio/_file_content.py +435 -0
  133. cognite_toolkit/_cdf_tk/storageio/_instances.py +35 -3
  134. cognite_toolkit/_cdf_tk/storageio/_raw.py +26 -0
  135. cognite_toolkit/_cdf_tk/storageio/selectors/__init__.py +71 -4
  136. cognite_toolkit/_cdf_tk/storageio/selectors/_base.py +14 -2
  137. cognite_toolkit/_cdf_tk/storageio/selectors/_canvas.py +14 -0
  138. cognite_toolkit/_cdf_tk/storageio/selectors/_charts.py +14 -0
  139. cognite_toolkit/_cdf_tk/storageio/selectors/_datapoints.py +23 -3
  140. cognite_toolkit/_cdf_tk/storageio/selectors/_file_content.py +164 -0
  141. cognite_toolkit/_cdf_tk/storageio/selectors/_three_d.py +34 -0
  142. cognite_toolkit/_cdf_tk/tk_warnings/other.py +4 -0
  143. cognite_toolkit/_cdf_tk/tracker.py +2 -2
  144. cognite_toolkit/_cdf_tk/utils/cdf.py +1 -1
  145. cognite_toolkit/_cdf_tk/utils/dtype_conversion.py +9 -3
  146. cognite_toolkit/_cdf_tk/utils/fileio/__init__.py +2 -0
  147. cognite_toolkit/_cdf_tk/utils/fileio/_base.py +5 -1
  148. cognite_toolkit/_cdf_tk/utils/fileio/_readers.py +112 -20
  149. cognite_toolkit/_cdf_tk/utils/fileio/_writers.py +15 -15
  150. cognite_toolkit/_cdf_tk/utils/http_client/__init__.py +28 -0
  151. cognite_toolkit/_cdf_tk/utils/http_client/_client.py +285 -18
  152. cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py +56 -4
  153. cognite_toolkit/_cdf_tk/utils/http_client/_data_classes2.py +247 -0
  154. cognite_toolkit/_cdf_tk/utils/http_client/_tracker.py +5 -2
  155. cognite_toolkit/_cdf_tk/utils/interactive_select.py +60 -18
  156. cognite_toolkit/_cdf_tk/utils/sql_parser.py +2 -3
  157. cognite_toolkit/_cdf_tk/utils/useful_types.py +6 -2
  158. cognite_toolkit/_cdf_tk/validation.py +83 -1
  159. cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
  160. cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
  161. cognite_toolkit/_resources/cdf.toml +5 -4
  162. cognite_toolkit/_version.py +1 -1
  163. cognite_toolkit/config.dev.yaml +13 -0
  164. {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.39.dist-info}/METADATA +24 -24
  165. cognite_toolkit-0.7.39.dist-info/RECORD +322 -0
  166. cognite_toolkit-0.7.39.dist-info/WHEEL +4 -0
  167. {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.39.dist-info}/entry_points.txt +1 -0
  168. cognite_toolkit/_cdf_tk/client/api/robotics/__init__.py +0 -3
  169. cognite_toolkit/_cdf_tk/commands/_migrate/canvas.py +0 -201
  170. cognite_toolkit/_cdf_tk/commands/dump_data.py +0 -489
  171. cognite_toolkit/_cdf_tk/commands/featureflag.py +0 -27
  172. cognite_toolkit/_cdf_tk/prototypes/import_app.py +0 -41
  173. cognite_toolkit/_cdf_tk/utils/table_writers.py +0 -434
  174. cognite_toolkit-0.6.97.dist-info/RECORD +0 -306
  175. cognite_toolkit-0.6.97.dist-info/WHEEL +0 -4
  176. cognite_toolkit-0.6.97.dist-info/licenses/LICENSE +0 -18
  177. /cognite_toolkit/_cdf_tk/{prototypes/commands → client/api/legacy}/__init__.py +0 -0
  178. /cognite_toolkit/_cdf_tk/client/api/{dml.py → legacy/dml.py} +0 -0
  179. /cognite_toolkit/_cdf_tk/client/api/{fixed_transformations.py → legacy/fixed_transformations.py} +0 -0
  180. /cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/api.py +0 -0
  181. /cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/utlis.py +0 -0
  182. /cognite_toolkit/_cdf_tk/client/data_classes/{apm_config_v1.py → legacy/apm_config_v1.py} +0 -0
  183. /cognite_toolkit/_cdf_tk/client/data_classes/{extendable_cognite_file.py → legacy/extendable_cognite_file.py} +0 -0
  184. /cognite_toolkit/_cdf_tk/client/data_classes/{extended_filemetadata.py → legacy/extended_filemetadata.py} +0 -0
  185. /cognite_toolkit/_cdf_tk/client/data_classes/{extended_filemetdata.py → legacy/extended_filemetdata.py} +0 -0
  186. /cognite_toolkit/_cdf_tk/client/data_classes/{extended_timeseries.py → legacy/extended_timeseries.py} +0 -0
  187. /cognite_toolkit/_cdf_tk/client/data_classes/{functions.py → legacy/functions.py} +0 -0
  188. /cognite_toolkit/_cdf_tk/client/data_classes/{graphql_data_models.py → legacy/graphql_data_models.py} +0 -0
  189. /cognite_toolkit/_cdf_tk/client/data_classes/{instances.py → legacy/instances.py} +0 -0
  190. /cognite_toolkit/_cdf_tk/client/data_classes/{location_filters.py → legacy/location_filters.py} +0 -0
  191. /cognite_toolkit/_cdf_tk/client/data_classes/{pending_instances_ids.py → legacy/pending_instances_ids.py} +0 -0
  192. /cognite_toolkit/_cdf_tk/client/data_classes/{project.py → legacy/project.py} +0 -0
  193. /cognite_toolkit/_cdf_tk/client/data_classes/{raw.py → legacy/raw.py} +0 -0
  194. /cognite_toolkit/_cdf_tk/client/data_classes/{robotics.py → legacy/robotics.py} +0 -0
  195. /cognite_toolkit/_cdf_tk/client/data_classes/{search_config.py → legacy/search_config.py} +0 -0
  196. /cognite_toolkit/_cdf_tk/client/data_classes/{sequences.py → legacy/sequences.py} +0 -0
  197. /cognite_toolkit/_cdf_tk/client/data_classes/{streamlit_.py → legacy/streamlit_.py} +0 -0
  198. /cognite_toolkit/_cdf_tk/{prototypes/commands/import_.py → commands/_import_cmd.py} +0 -0
@@ -4,7 +4,7 @@ from pathlib import Path
4
4
  from pydantic import BaseModel, ConfigDict
5
5
  from pydantic.alias_generators import to_camel
6
6
 
7
- from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_STEM
7
+ from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_SUFFIX
8
8
  from cognite_toolkit._cdf_tk.utils.file import safe_write, sanitize_filename, yaml_safe_dump
9
9
  from cognite_toolkit._cdf_tk.utils.text import to_sentence_case
10
10
  from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
@@ -41,7 +41,7 @@ class DataSelector(SelectorObject, ABC):
41
41
  directory: The directory where the YAML file will be saved.
42
42
  """
43
43
 
44
- filepath = directory / f"{sanitize_filename(str(self))}.{DATA_MANIFEST_STEM}.yaml"
44
+ filepath = directory / f"{sanitize_filename(str(self))}{DATA_MANIFEST_SUFFIX}"
45
45
  filepath.parent.mkdir(parents=True, exist_ok=True)
46
46
  safe_write(file=filepath, content=yaml_safe_dump(self.model_dump(mode="json", by_alias=True)), encoding="utf-8")
47
47
  return filepath
@@ -66,3 +66,15 @@ class DataSelector(SelectorObject, ABC):
66
66
  def __str__(self) -> str:
67
67
  # We want to force subclasses to implement __str__
68
68
  raise NotImplementedError()
69
+
70
+ def find_data_files(self, input_dir: Path, manifest_file: Path) -> list[Path]:
71
+ """Find data files in the specified input directory that match this selector.
72
+
73
+ Args:
74
+ input_dir: The directory to search for data files.
75
+ manifest_file: The manifest file that describes the data files.
76
+ Returns:
77
+ A list of Paths to the data files that match this selector.
78
+ """
79
+ data_file_prefix = manifest_file.name.removesuffix(DATA_MANIFEST_SUFFIX)
80
+ return [file for file in input_dir.glob(f"{data_file_prefix}*") if not file.name.endswith(DATA_MANIFEST_SUFFIX)]
@@ -1,3 +1,4 @@
1
+ import hashlib
1
2
  from abc import ABC
2
3
  from typing import Literal
3
4
 
@@ -6,3 +7,16 @@ from ._base import DataSelector
6
7
 
7
8
  class CanvasSelector(DataSelector, ABC):
8
9
  kind: Literal["IndustrialCanvas"] = "IndustrialCanvas"
10
+
11
+
12
+ class CanvasExternalIdSelector(CanvasSelector):
13
+ type: Literal["canvasExternalId"] = "canvasExternalId"
14
+ external_ids: tuple[str, ...]
15
+
16
+ @property
17
+ def group(self) -> str:
18
+ return "Canvas"
19
+
20
+ def __str__(self) -> str:
21
+ hash_ = hashlib.md5(",".join(sorted(self.external_ids)).encode()).hexdigest()[:8]
22
+ return f"canvas_count_{len(self.external_ids)}_hash_{hash_}"
@@ -1,3 +1,4 @@
1
+ import hashlib
1
2
  from abc import ABC
2
3
  from typing import Literal
3
4
 
@@ -29,3 +30,16 @@ class AllChartsSelector(ChartSelector):
29
30
 
30
31
  def __str__(self) -> str:
31
32
  return "all"
33
+
34
+
35
+ class ChartExternalIdSelector(ChartSelector):
36
+ type: Literal["chartExternalId"] = "chartExternalId"
37
+ external_ids: tuple[str, ...]
38
+
39
+ @property
40
+ def group(self) -> str:
41
+ return "Charts"
42
+
43
+ def __str__(self) -> str:
44
+ hash_ = hashlib.md5(",".join(sorted(self.external_ids)).encode()).hexdigest()[:8]
45
+ return f"chart_count_{len(self.external_ids)}_hash_{hash_}"
@@ -1,6 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from functools import cached_property
3
- from typing import Annotated, Any, Literal
3
+ from typing import Annotated, Any, ClassVar, Literal
4
4
 
5
5
  from cognite.client._proto.data_points_pb2 import (
6
6
  InstanceId,
@@ -50,9 +50,12 @@ TimeSeriesColumn = Annotated[
50
50
  ]
51
51
 
52
52
 
53
- class DataPointsFileSelector(DataSelector):
53
+ class DataPointsSelector(DataSelector, ABC):
54
+ kind: Literal["Datapoints"] = "Datapoints"
55
+
56
+
57
+ class DataPointsFileSelector(DataPointsSelector):
54
58
  type: Literal["datapointsFile"] = "datapointsFile"
55
- kind: Literal["datapoints"] = "datapoints"
56
59
 
57
60
  timestamp_column: str
58
61
  columns: tuple[TimeSeriesColumn, ...]
@@ -67,3 +70,20 @@ class DataPointsFileSelector(DataSelector):
67
70
  @cached_property
68
71
  def id_by_column(self) -> dict[str, Column]:
69
72
  return {col.column: col for col in self.columns}
73
+
74
+
75
+ class DataPointsDataSetSelector(DataPointsSelector):
76
+ required_columns: ClassVar[frozenset[str]] = frozenset({"externalId", "timestamp", "value"})
77
+ type: Literal["datapointsDataSet"] = "datapointsDataSet"
78
+
79
+ data_set_external_id: str
80
+ start: int | str | None = None
81
+ end: int | str | None = None
82
+ data_type: Literal["numeric", "string"] = "numeric"
83
+
84
+ @property
85
+ def group(self) -> str:
86
+ return f"DataSet_{self.data_set_external_id}"
87
+
88
+ def __str__(self) -> str:
89
+ return f"datapoints_dataset_{self.data_set_external_id}"
@@ -0,0 +1,164 @@
1
+ import hashlib
2
+ import json
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+ from typing import Annotated, Any, Literal
6
+
7
+ from pydantic import ConfigDict, Field, field_validator, model_validator
8
+
9
+ from ._base import DataSelector, SelectorObject
10
+ from ._instances import SelectedView
11
+
12
+ FILENAME_VARIABLE = "$FILENAME"
13
+ FILEPATH = "$FILEPATH"
14
+
15
+
16
+ class FileContentSelector(DataSelector, ABC):
17
+ kind: Literal["FileContent"] = "FileContent"
18
+
19
+
20
+ class FileTemplateSelector(FileContentSelector, ABC):
21
+ file_directory: Path
22
+
23
+ def find_data_files(self, input_dir: Path, manifest_file: Path) -> list[Path]:
24
+ file_dir = input_dir / self.file_directory
25
+ if not file_dir.is_dir():
26
+ return []
27
+ return [file for file in file_dir.iterdir() if file.is_file()]
28
+
29
+ @abstractmethod
30
+ def create_instance(self, filepath: Path) -> dict[str, Any]: ...
31
+
32
+
33
+ class FileTemplate(SelectorObject):
34
+ model_config = ConfigDict(extra="allow")
35
+
36
+ def create_instance(self, filename: str) -> dict[str, Any]:
37
+ json_str = self.model_dump_json(by_alias=True)
38
+ return json.loads(json_str.replace(FILENAME_VARIABLE, filename))
39
+
40
+
41
+ class FileMetadataTemplate(FileTemplate):
42
+ name: str
43
+ external_id: str
44
+
45
+ @field_validator("name", "external_id")
46
+ @classmethod
47
+ def _validate_filename_in_fields(cls, v: str) -> str:
48
+ if FILENAME_VARIABLE not in v:
49
+ raise ValueError(
50
+ f"{FILENAME_VARIABLE!s} must be present in 'name' and 'external_id' fields. "
51
+ f"This allows for dynamic substitution based on the file name."
52
+ )
53
+ return v
54
+
55
+
56
+ class FileMetadataTemplateSelector(FileTemplateSelector):
57
+ type: Literal["fileMetadataTemplate"] = "fileMetadataTemplate"
58
+ template: FileMetadataTemplate
59
+
60
+ @property
61
+ def group(self) -> str:
62
+ return "FileMetadata"
63
+
64
+ def __str__(self) -> str:
65
+ return "metadata_template"
66
+
67
+ def create_instance(self, filepath: Path) -> dict[str, Any]:
68
+ return self.template.create_instance(filepath.name)
69
+
70
+
71
+ class TemplateNodeId(SelectorObject):
72
+ space: str
73
+ external_id: str
74
+
75
+ @field_validator("external_id")
76
+ @classmethod
77
+ def _validate_filename_in_fields(cls, v: str) -> str:
78
+ if FILENAME_VARIABLE not in v:
79
+ raise ValueError(
80
+ f"{FILENAME_VARIABLE!s} must be present in 'external_id' field. "
81
+ f"This allows for dynamic substitution based on the file name."
82
+ )
83
+ return v
84
+
85
+
86
+ class FileDataModelingTemplate(FileTemplate):
87
+ instance_id: TemplateNodeId
88
+ # Name is required for FileMetadata but not for CogniteFiles. This is the same default behavior as in CDF.
89
+ name: str = "untitled"
90
+
91
+ @model_validator(mode="before")
92
+ def _move_space_external_id(cls, data: dict[str, Any]) -> dict[str, Any]:
93
+ if "space" in data and "externalId" in data:
94
+ data["instanceId"] = {"space": data.pop("space"), "externalId": data.pop("externalId")}
95
+ elif "space" in data and "external_id" in data:
96
+ data["instance_id"] = {"space": data.pop("space"), "external_id": data.pop("external_id")}
97
+ return data
98
+
99
+
100
+ class FileDataModelingTemplateSelector(FileTemplateSelector):
101
+ type: Literal["fileDataModelingTemplate"] = "fileDataModelingTemplate"
102
+ view_id: SelectedView = SelectedView(space="cdf_cdm", external_id="CogniteFile", version="v1")
103
+ template: FileDataModelingTemplate
104
+
105
+ @property
106
+ def group(self) -> str:
107
+ return "FileDataModeling"
108
+
109
+ def __str__(self) -> str:
110
+ return "data_modeling_template"
111
+
112
+ def create_instance(self, filepath: Path) -> dict[str, Any]:
113
+ return self.template.create_instance(filepath.name)
114
+
115
+
116
+ class FileIdentifierDefinition(SelectorObject):
117
+ id_type: str
118
+
119
+
120
+ class FileInternalID(FileIdentifierDefinition):
121
+ id_type: Literal["internalId"] = "internalId"
122
+ internal_id: int = Field(alias="id")
123
+
124
+ def __str__(self) -> str:
125
+ return f"internalId_{self.internal_id}"
126
+
127
+
128
+ class FileExternalID(FileIdentifierDefinition):
129
+ id_type: Literal["externalId"] = "externalId"
130
+ external_id: str
131
+
132
+ def __str__(self) -> str:
133
+ return f"externalId_{self.external_id}"
134
+
135
+
136
+ class NodeId(SelectorObject):
137
+ space: str
138
+ external_id: str
139
+
140
+
141
+ class FileInstanceID(FileIdentifierDefinition):
142
+ id_type: Literal["instanceId"] = "instanceId"
143
+ instance_id: NodeId
144
+
145
+ def __str__(self) -> str:
146
+ return f"instanceId_{self.instance_id.space}_{self.instance_id.external_id}"
147
+
148
+
149
+ FileIdentifier = Annotated[FileInstanceID | FileExternalID | FileInternalID, Field(discriminator="id_type")]
150
+
151
+
152
+ class FileIdentifierSelector(FileContentSelector):
153
+ type: Literal["fileIdentifier"] = "fileIdentifier"
154
+ file_directory: str = "file_content"
155
+ use_metadata_directory: bool = True
156
+ identifiers: tuple[FileIdentifier, ...]
157
+
158
+ @property
159
+ def group(self) -> str:
160
+ return "Files"
161
+
162
+ def __str__(self) -> str:
163
+ hash_ = hashlib.md5(",".join(sorted(str(self.identifiers))).encode()).hexdigest()[:8]
164
+ return f"file_{len(self.identifiers)}_identifiers_{hash_}"
@@ -0,0 +1,34 @@
1
+ import hashlib
2
+ from abc import ABC
3
+ from typing import Literal
4
+
5
+ from ._base import DataSelector
6
+
7
+
8
+ class ThreeDSelector(DataSelector, ABC):
9
+ kind: Literal["3D"] = "3D"
10
+
11
+ @property
12
+ def group(self) -> str:
13
+ return "3DModels"
14
+
15
+
16
+ class ThreeDModelFilteredSelector(ThreeDSelector):
17
+ type: Literal["3DFiltered"] = "3DFiltered"
18
+ model_type: Literal["Classic", "DataModel"] = "Classic"
19
+ published: bool | None = None
20
+
21
+ def __str__(self) -> str:
22
+ suffix = f"3DModels_{self.model_type}"
23
+ if self.published is not None:
24
+ return f"{suffix}_published_{self.published}"
25
+ return suffix
26
+
27
+
28
+ class ThreeDModelIdSelector(ThreeDSelector):
29
+ type: Literal["3DId"] = "3DId"
30
+ ids: tuple[int, ...]
31
+
32
+ def __str__(self) -> str:
33
+ hash_ = hashlib.md5(",".join(sorted(map(str, self.ids))).encode()).hexdigest()[:8]
34
+ return f"3DModels_ids_count_{len(self.ids)}_hash_{hash_}"
@@ -136,15 +136,19 @@ class MissingCapabilityWarning(GeneralWarning):
136
136
 
137
137
  @dataclass(frozen=True)
138
138
  class ToolkitDeprecationWarning(ToolkitWarning, DeprecationWarning):
139
+ severity = SeverityLevel.HIGH
139
140
  message: ClassVar[str] = "The '{feature}' is deprecated and will be removed in a future version."
140
141
 
141
142
  feature: str
142
143
  alternative: str | None = None
144
+ removal_version: str | None = None
143
145
 
144
146
  def get_message(self) -> str:
145
147
  msg = self.message.format(feature=self.feature)
146
148
  if self.alternative:
147
149
  msg += f"\nUse {self.alternative!r} instead."
150
+ if self.removal_version:
151
+ msg += f"\nIt will be removed in version {self.removal_version}."
148
152
 
149
153
  return msg
150
154
 
@@ -38,11 +38,11 @@ class Tracker:
38
38
 
39
39
  @property
40
40
  def opted_out(self) -> bool:
41
- return self._opt_status == "opted-out"
41
+ return False
42
42
 
43
43
  @property
44
44
  def opted_in(self) -> bool:
45
- return self._opt_status == "opted-in"
45
+ return True
46
46
 
47
47
  def track_cli_command(
48
48
  self,
@@ -21,7 +21,7 @@ from filelock import BaseFileLock, FileLock, Timeout
21
21
  from rich.console import Console
22
22
 
23
23
  from cognite_toolkit._cdf_tk.client import ToolkitClient, ToolkitClientConfig
24
- from cognite_toolkit._cdf_tk.client.data_classes.raw import RawTable
24
+ from cognite_toolkit._cdf_tk.client.data_classes.legacy.raw import RawTable
25
25
  from cognite_toolkit._cdf_tk.constants import ENV_VAR_PATTERN, MAX_ROW_ITERATION_RUN_QUERY, MAX_RUN_QUERY_FREQUENCY_MIN
26
26
  from cognite_toolkit._cdf_tk.exceptions import (
27
27
  ToolkitError,
@@ -20,7 +20,13 @@ from dateutil import parser
20
20
  from cognite_toolkit._cdf_tk.constants import CDF_UNIT_SPACE
21
21
  from cognite_toolkit._cdf_tk.exceptions import ToolkitNotSupported
22
22
  from cognite_toolkit._cdf_tk.utils._auxiliary import get_concrete_subclasses
23
- from cognite_toolkit._cdf_tk.utils.useful_types import AssetCentricType, DataType, JsonVal, PythonTypes
23
+ from cognite_toolkit._cdf_tk.utils.useful_types import (
24
+ AssetCentricType,
25
+ AssetCentricTypeExtended,
26
+ DataType,
27
+ JsonVal,
28
+ PythonTypes,
29
+ )
24
30
 
25
31
  from .collection import humanize_collection
26
32
 
@@ -35,7 +41,7 @@ def asset_centric_convert_to_primary_property(
35
41
  type_: PropertyType,
36
42
  nullable: bool,
37
43
  destination_container_property: tuple[ContainerId, str],
38
- source_property: tuple[AssetCentricType, str],
44
+ source_property: tuple[AssetCentricTypeExtended, str],
39
45
  direct_relation_lookup: Mapping[str | int, DirectRelationReference] | None = None,
40
46
  ) -> PropertyValueWrite:
41
47
  if (source_property, destination_container_property) in SPECIAL_CONVERTER_BY_SOURCE_DESTINATION:
@@ -574,7 +580,7 @@ CONVERTER_BY_DTYPE: Mapping[str, type[_ValueConverter]] = {
574
580
  for cls_ in _ValueConverter.__subclasses__()
575
581
  }
576
582
  SPECIAL_CONVERTER_BY_SOURCE_DESTINATION: Mapping[
577
- tuple[tuple[AssetCentricType, str], tuple[ContainerId, str]],
583
+ tuple[tuple[AssetCentricTypeExtended, str], tuple[ContainerId, str]],
578
584
  type[_SpecialCaseConverter],
579
585
  ] = {
580
586
  (subclass.source_property, subclass.destination_container_property): subclass
@@ -12,6 +12,7 @@ from ._readers import (
12
12
  CSVReader,
13
13
  FailedParsing,
14
14
  FileReader,
15
+ MultiFileReader,
15
16
  NDJsonReader,
16
17
  ParquetReader,
17
18
  YAMLReader,
@@ -45,6 +46,7 @@ __all__ = [
45
46
  "FileReader",
46
47
  "FileWriter",
47
48
  "GzipCompression",
49
+ "MultiFileReader",
48
50
  "NDJsonReader",
49
51
  "NDJsonWriter",
50
52
  "ParquetReader",
@@ -14,7 +14,11 @@ T_IO = TypeVar("T_IO", bound=IO)
14
14
 
15
15
 
16
16
  class FileIO(ABC):
17
- format: ClassVar[str]
17
+ FORMAT: ClassVar[str]
18
+
19
+ @property
20
+ def format(self) -> str:
21
+ return self.FORMAT
18
22
 
19
23
 
20
24
  @dataclass(frozen=True)
@@ -1,10 +1,11 @@
1
1
  import csv
2
2
  import json
3
+ import re
3
4
  from abc import ABC, abstractmethod
4
5
  from collections import Counter, defaultdict
5
6
  from collections.abc import Callable, Iterator, Mapping, Sequence
6
7
  from dataclasses import dataclass
7
- from functools import partial
8
+ from functools import cached_property, partial
8
9
  from io import TextIOWrapper
9
10
  from pathlib import Path
10
11
  from typing import Any
@@ -37,10 +38,10 @@ class FileReader(FileIO, ABC):
37
38
  @abstractmethod
38
39
  def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
39
40
  """Read chunks from the file."""
40
- raise NotImplementedError("This method should be implemented in subclasses.")
41
+ ...
41
42
 
42
43
  @classmethod
43
- def from_filepath(cls, filepath: Path) -> "FileReader":
44
+ def from_filepath(cls, filepath: Path) -> "type[FileReader]":
44
45
  if len(filepath.suffixes) == 0:
45
46
  raise ToolkitValueError(
46
47
  f"File has no suffix. Available formats: {humanize_collection(FILE_READ_CLS_BY_FORMAT.keys())}."
@@ -55,33 +56,110 @@ class FileReader(FileIO, ABC):
55
56
  )
56
57
 
57
58
  if suffix in FILE_READ_CLS_BY_FORMAT:
58
- return FILE_READ_CLS_BY_FORMAT[suffix](input_file=filepath)
59
+ return FILE_READ_CLS_BY_FORMAT[suffix]
59
60
 
60
61
  raise ToolkitValueError(
61
62
  f"Unknown file format: {suffix}. Available formats: {humanize_collection(FILE_READ_CLS_BY_FORMAT.keys())}."
62
63
  )
63
64
 
65
+ @abstractmethod
66
+ def count(self) -> int:
67
+ """Count the number of chunks in the file."""
68
+ ...
69
+
70
+
71
+ class MultiFileReader(FileReader):
72
+ """Reads multiple files and yields chunks from each file sequentially.
73
+
74
+ Args:
75
+ input_files (Sequence[Path]): The list of file paths to read.
76
+ """
77
+
78
+ PART_PATTERN = re.compile(r"part-(\d{4})$")
79
+
80
+ def __init__(self, input_files: Sequence[Path]) -> None:
81
+ super().__init__(input_file=input_files[0])
82
+ self.input_files = input_files
83
+
84
+ @cached_property
85
+ def reader_class(self) -> type[FileReader]:
86
+ """Determine the reader class based on the input files."""
87
+ reader_classes = Counter([FileReader.from_filepath(input_file) for input_file in self.input_files])
88
+ if len(reader_classes) > 1:
89
+ raise ToolkitValueError(
90
+ "All input files must be of the same format. "
91
+ f"Found formats: {humanize_collection([cls.FORMAT for cls in reader_classes.keys()])}."
92
+ )
93
+ return reader_classes.most_common(1)[0][0]
94
+
95
+ @property
96
+ def is_table(self) -> bool:
97
+ try:
98
+ return issubclass(self.reader_class, TableReader)
99
+ except ValueError:
100
+ # The input files are not a known format, so it is not a table.
101
+ return False
102
+
103
+ @property
104
+ def format(self) -> str:
105
+ return self.reader_class.FORMAT
106
+
107
+ def read_chunks(self) -> Iterator[dict[str, JsonVal]]:
108
+ for input_file in sorted(self.input_files, key=self._part_no):
109
+ yield from self.reader_class(input_file).read_chunks()
110
+
111
+ def _part_no(self, path: Path) -> int:
112
+ match = self.PART_PATTERN.search(path.stem)
113
+ if match:
114
+ return int(match.group(1))
115
+ return 99999
116
+
117
+ def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
118
+ raise NotImplementedError("This method is not used in MultiFileReader.")
119
+
120
+ def count(self) -> int:
121
+ """Count the total number of chunks in all files."""
122
+ total_count = 0
123
+ for input_file in self.input_files:
124
+ reader = self.reader_class(input_file)
125
+ total_count += reader.count()
126
+ return total_count
127
+
64
128
 
65
129
  class NDJsonReader(FileReader):
66
- format = ".ndjson"
130
+ FORMAT = ".ndjson"
67
131
 
68
132
  def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
69
133
  for line in file:
70
134
  if stripped := line.strip():
71
135
  yield json.loads(stripped)
72
136
 
137
+ def count(self) -> int:
138
+ """Count the number of lines (chunks) in the NDJSON file."""
139
+ compression = Compression.from_filepath(self.input_file)
140
+ with compression.open("r") as file:
141
+ line_count = sum(1 for line in file if line.strip())
142
+ return line_count
143
+
73
144
 
74
145
  class YAMLBaseReader(FileReader, ABC):
75
146
  def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
76
147
  yield from yaml.safe_load_all(file)
77
148
 
149
+ def count(self) -> int:
150
+ """Count the number of documents (chunks) in the YAML file."""
151
+ compression = Compression.from_filepath(self.input_file)
152
+ with compression.open("r") as file:
153
+ doc_count = sum(1 for _ in yaml.safe_load_all(file))
154
+ return doc_count
155
+
78
156
 
79
157
  class YAMLReader(YAMLBaseReader):
80
- format = ".yaml"
158
+ FORMAT = ".yaml"
81
159
 
82
160
 
83
161
  class YMLReader(YAMLBaseReader):
84
- format = ".yml"
162
+ FORMAT = ".yml"
85
163
 
86
164
 
87
165
  @dataclass
@@ -171,8 +249,8 @@ class TableReader(FileReader, ABC):
171
249
 
172
250
  if not input_file.exists():
173
251
  raise ToolkitFileNotFoundError(f"File not found: {input_file.as_posix()!r}.")
174
- if input_file.suffix != cls.format:
175
- raise ToolkitValueError(f"Expected a {cls.format} file got a {input_file.suffix!r} file instead.")
252
+ if input_file.suffix != cls.FORMAT:
253
+ raise ToolkitValueError(f"Expected a {cls.FORMAT} file got a {input_file.suffix!r} file instead.")
176
254
 
177
255
  column_names, sample_rows = cls._read_sample_rows(input_file, sniff_rows)
178
256
  cls._check_column_names(column_names)
@@ -213,7 +291,7 @@ class TableReader(FileReader, ABC):
213
291
  class CSVReader(TableReader):
214
292
  """Reads CSV files and yields each row as a dictionary."""
215
293
 
216
- format = ".csv"
294
+ FORMAT = ".csv"
217
295
 
218
296
  def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
219
297
  if self.keep_failed_cells and self.failed_cell:
@@ -255,9 +333,16 @@ class CSVReader(TableReader):
255
333
  raise ToolkitValueError(f"No data found in the file: {input_file.as_posix()!r}.")
256
334
  return column_names, sample_rows
257
335
 
336
+ def count(self) -> int:
337
+ """Count the number of rows in the CSV file."""
338
+ compression = Compression.from_filepath(self.input_file)
339
+ with compression.open("r") as file:
340
+ line_count = sum(1 for _ in file) - 1 # Subtract 1 for header
341
+ return line_count
342
+
258
343
 
259
344
  class ParquetReader(TableReader):
260
- format = ".parquet"
345
+ FORMAT = ".parquet"
261
346
 
262
347
  def __init__(self, input_file: Path) -> None:
263
348
  # Parquet files have their own schema, so we don't need to sniff or provide one.
@@ -308,23 +393,30 @@ class ParquetReader(TableReader):
308
393
  raise ToolkitValueError(f"No data found in the file: {input_file.as_posix()!r}.")
309
394
  return column_names, sample_rows
310
395
 
396
+ def count(self) -> int:
397
+ """Count the number of rows in the Parquet file."""
398
+ import pyarrow.parquet as pq
399
+
400
+ with pq.ParquetFile(self.input_file) as parquet_file:
401
+ return parquet_file.metadata.num_rows
402
+
311
403
 
312
404
  FILE_READ_CLS_BY_FORMAT: Mapping[str, type[FileReader]] = {}
313
405
  TABLE_READ_CLS_BY_FORMAT: Mapping[str, type[TableReader]] = {}
314
406
  for subclass in get_concrete_subclasses(FileReader): # type: ignore[type-abstract]
315
- if not getattr(subclass, "format", None):
407
+ if not getattr(subclass, "FORMAT", None):
316
408
  continue
317
- if subclass.format in FILE_READ_CLS_BY_FORMAT:
409
+ if subclass.FORMAT in FILE_READ_CLS_BY_FORMAT:
318
410
  raise TypeError(
319
- f"Duplicate file format {subclass.format!r} found for classes "
320
- f"{FILE_READ_CLS_BY_FORMAT[subclass.format].__name__!r} and {subclass.__name__!r}."
411
+ f"Duplicate file format {subclass.FORMAT!r} found for classes "
412
+ f"{FILE_READ_CLS_BY_FORMAT[subclass.FORMAT].__name__!r} and {subclass.__name__!r}."
321
413
  )
322
414
  # We know we have a dict, but we want to expose FILE_READ_CLS_BY_FORMAT as a Mapping
323
- FILE_READ_CLS_BY_FORMAT[subclass.format] = subclass # type: ignore[index]
415
+ FILE_READ_CLS_BY_FORMAT[subclass.FORMAT] = subclass # type: ignore[index]
324
416
  if issubclass(subclass, TableReader):
325
- if subclass.format in TABLE_READ_CLS_BY_FORMAT:
417
+ if subclass.FORMAT in TABLE_READ_CLS_BY_FORMAT:
326
418
  raise TypeError(
327
- f"Duplicate table file format {subclass.format!r} found for classes "
328
- f"{TABLE_READ_CLS_BY_FORMAT[subclass.format].__name__!r} and {subclass.__name__!r}."
419
+ f"Duplicate table file format {subclass.FORMAT!r} found for classes "
420
+ f"{TABLE_READ_CLS_BY_FORMAT[subclass.FORMAT].__name__!r} and {subclass.__name__!r}."
329
421
  )
330
- TABLE_READ_CLS_BY_FORMAT[subclass.format] = subclass # type: ignore[index]
422
+ TABLE_READ_CLS_BY_FORMAT[subclass.FORMAT] = subclass # type: ignore[index]