cognite-toolkit 0.7.42__py3-none-any.whl → 0.7.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. cognite_toolkit/_cdf_tk/client/_toolkit_client.py +7 -1
  2. cognite_toolkit/_cdf_tk/client/api/events.py +20 -2
  3. cognite_toolkit/_cdf_tk/client/api/filemetadata.py +145 -0
  4. cognite_toolkit/_cdf_tk/client/api/raw.py +174 -0
  5. cognite_toolkit/_cdf_tk/client/api/simulator_models.py +118 -0
  6. cognite_toolkit/_cdf_tk/client/api/simulators.py +8 -0
  7. cognite_toolkit/_cdf_tk/client/api/timeseries.py +20 -2
  8. cognite_toolkit/_cdf_tk/client/cdf_client/__init__.py +2 -1
  9. cognite_toolkit/_cdf_tk/client/cdf_client/api.py +40 -6
  10. cognite_toolkit/_cdf_tk/client/data_classes/agent.py +6 -9
  11. cognite_toolkit/_cdf_tk/client/data_classes/annotation.py +79 -0
  12. cognite_toolkit/_cdf_tk/client/data_classes/asset.py +7 -14
  13. cognite_toolkit/_cdf_tk/client/data_classes/base.py +15 -5
  14. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/__init__.py +164 -0
  15. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_constraints.py +37 -0
  16. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_container.py +50 -0
  17. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_data_model.py +73 -0
  18. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_data_types.py +116 -0
  19. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_indexes.py +26 -0
  20. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_instance.py +143 -0
  21. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_references.py +86 -0
  22. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_space.py +26 -0
  23. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_view.py +143 -0
  24. cognite_toolkit/_cdf_tk/client/data_classes/data_modeling/_view_property.py +152 -0
  25. cognite_toolkit/_cdf_tk/client/data_classes/dataset.py +35 -0
  26. cognite_toolkit/_cdf_tk/client/data_classes/event.py +12 -15
  27. cognite_toolkit/_cdf_tk/client/data_classes/extraction_pipeline.py +59 -0
  28. cognite_toolkit/_cdf_tk/client/data_classes/filemetadata.py +15 -19
  29. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_destination.py +34 -0
  30. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_job.py +134 -0
  31. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_mapping.py +72 -0
  32. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_source/__init__.py +63 -0
  33. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_source/_auth.py +63 -0
  34. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_source/_base.py +26 -0
  35. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_source/_certificate.py +20 -0
  36. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_source/_eventhub.py +31 -0
  37. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_source/_kafka.py +53 -0
  38. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_source/_mqtt.py +36 -0
  39. cognite_toolkit/_cdf_tk/client/data_classes/hosted_extractor_source/_rest.py +49 -0
  40. cognite_toolkit/_cdf_tk/client/data_classes/identifiers.py +8 -0
  41. cognite_toolkit/_cdf_tk/client/data_classes/label.py +27 -0
  42. cognite_toolkit/_cdf_tk/client/data_classes/raw.py +3 -2
  43. cognite_toolkit/_cdf_tk/client/data_classes/securitycategory.py +24 -0
  44. cognite_toolkit/_cdf_tk/client/data_classes/sequence.py +45 -0
  45. cognite_toolkit/_cdf_tk/client/data_classes/simulator_model.py +50 -0
  46. cognite_toolkit/_cdf_tk/client/data_classes/timeseries.py +15 -18
  47. cognite_toolkit/_cdf_tk/client/data_classes/transformation.py +140 -0
  48. cognite_toolkit/_cdf_tk/client/data_classes/workflow.py +27 -0
  49. cognite_toolkit/_cdf_tk/client/data_classes/workflow_trigger.py +63 -0
  50. cognite_toolkit/_cdf_tk/client/data_classes/workflow_version.py +155 -0
  51. cognite_toolkit/_cdf_tk/client/testing.py +6 -1
  52. cognite_toolkit/_cdf_tk/commands/_migrate/conversion.py +10 -7
  53. cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py +4 -4
  54. cognite_toolkit/_cdf_tk/cruds/_data_cruds.py +7 -3
  55. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/auth.py +5 -1
  56. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/classic.py +40 -39
  57. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/file.py +56 -59
  58. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/relationship.py +3 -3
  59. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/timeseries.py +48 -47
  60. cognite_toolkit/_cdf_tk/resource_classes/__init__.py +2 -0
  61. cognite_toolkit/_cdf_tk/resource_classes/simulator_model.py +17 -0
  62. cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +84 -71
  63. cognite_toolkit/_cdf_tk/storageio/_file_content.py +22 -19
  64. cognite_toolkit/_cdf_tk/utils/useful_types2.py +5 -3
  65. cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
  66. cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
  67. cognite_toolkit/_resources/cdf.toml +1 -1
  68. cognite_toolkit/_version.py +1 -1
  69. {cognite_toolkit-0.7.42.dist-info → cognite_toolkit-0.7.44.dist-info}/METADATA +11 -1
  70. {cognite_toolkit-0.7.42.dist-info → cognite_toolkit-0.7.44.dist-info}/RECORD +72 -34
  71. {cognite_toolkit-0.7.42.dist-info → cognite_toolkit-0.7.44.dist-info}/WHEEL +1 -1
  72. {cognite_toolkit-0.7.42.dist-info → cognite_toolkit-0.7.44.dist-info}/entry_points.txt +0 -0
@@ -128,11 +128,11 @@ class RelationshipCRUD(ResourceCRUD[str, RelationshipWrite, Relationship]):
128
128
  elif type_value == "sequence":
129
129
  yield SequenceCRUD, id_value
130
130
  elif type_value == "timeseries":
131
- yield TimeSeriesCRUD, id_value
131
+ yield TimeSeriesCRUD, ExternalId(external_id=id_value)
132
132
  elif type_value == "file":
133
- yield FileMetadataCRUD, id_value
133
+ yield FileMetadataCRUD, ExternalId(external_id=id_value)
134
134
  elif type_value == "event":
135
- yield EventCRUD, id_value
135
+ yield EventCRUD, ExternalId(external_id=id_value)
136
136
 
137
137
  def load_resource(self, resource: dict[str, Any], is_dry_run: bool = False) -> RelationshipWrite:
138
138
  if ds_external_id := resource.pop("dataSetExternalId", None):
@@ -2,16 +2,13 @@ import json
2
2
  from collections.abc import Hashable, Iterable, Sequence
3
3
  from itertools import zip_longest
4
4
  from pathlib import Path
5
- from typing import Any, Literal, cast, final
5
+ from typing import Any, Literal, final
6
6
 
7
7
  from cognite.client.data_classes import (
8
8
  DatapointSubscription,
9
9
  DatapointSubscriptionList,
10
10
  DataPointSubscriptionUpdate,
11
11
  DataPointSubscriptionWrite,
12
- TimeSeries,
13
- TimeSeriesList,
14
- TimeSeriesWrite,
15
12
  )
16
13
  from cognite.client.data_classes.capabilities import (
17
14
  Capability,
@@ -23,7 +20,8 @@ from cognite.client.data_classes.datapoints_subscriptions import TimeSeriesIDLis
23
20
  from cognite.client.exceptions import CogniteAPIError, CogniteNotFoundError
24
21
  from cognite.client.utils.useful_types import SequenceNotStr
25
22
 
26
- from cognite_toolkit._cdf_tk.client.data_classes.identifiers import ExternalId
23
+ from cognite_toolkit._cdf_tk.client.data_classes.identifiers import ExternalId, InternalOrExternalId
24
+ from cognite_toolkit._cdf_tk.client.data_classes.timeseries import TimeSeriesRequest, TimeSeriesResponse
27
25
  from cognite_toolkit._cdf_tk.constants import MAX_TIMESTAMP_MS, MIN_TIMESTAMP_MS
28
26
  from cognite_toolkit._cdf_tk.cruds._base_cruds import ResourceContainerCRUD, ResourceCRUD
29
27
  from cognite_toolkit._cdf_tk.exceptions import (
@@ -42,11 +40,11 @@ from .data_organization import DataSetsCRUD
42
40
 
43
41
 
44
42
  @final
45
- class TimeSeriesCRUD(ResourceContainerCRUD[str, TimeSeriesWrite, TimeSeries]):
43
+ class TimeSeriesCRUD(ResourceContainerCRUD[ExternalId, TimeSeriesRequest, TimeSeriesResponse]):
46
44
  item_name = "datapoints"
47
45
  folder_name = "timeseries"
48
- resource_cls = TimeSeries
49
- resource_write_cls = TimeSeriesWrite
46
+ resource_cls = TimeSeriesResponse
47
+ resource_write_cls = TimeSeriesRequest
50
48
  yaml_cls = TimeSeriesYAML
51
49
  kind = "TimeSeries"
52
50
  dependencies = frozenset({DataSetsCRUD, GroupAllScopedCRUD, AssetCRUD})
@@ -58,7 +56,7 @@ class TimeSeriesCRUD(ResourceContainerCRUD[str, TimeSeriesWrite, TimeSeries]):
58
56
 
59
57
  @classmethod
60
58
  def get_required_capability(
61
- cls, items: Sequence[TimeSeriesWrite] | None, read_only: bool
59
+ cls, items: Sequence[TimeSeriesRequest] | None, read_only: bool
62
60
  ) -> Capability | list[Capability]:
63
61
  if not items and items is not None:
64
62
  return []
@@ -73,22 +71,22 @@ class TimeSeriesCRUD(ResourceContainerCRUD[str, TimeSeriesWrite, TimeSeries]):
73
71
  return TimeSeriesAcl(actions, scope)
74
72
 
75
73
  @classmethod
76
- def get_id(cls, item: TimeSeries | TimeSeriesWrite | dict) -> str:
74
+ def get_id(cls, item: TimeSeriesRequest | TimeSeriesResponse | dict) -> ExternalId:
77
75
  if isinstance(item, dict):
78
- return item["externalId"]
76
+ return ExternalId(external_id=item["externalId"])
79
77
  if item.external_id is None:
80
78
  raise ToolkitRequiredValueError("TimeSeries must have external_id set.")
81
- return item.external_id
79
+ return ExternalId(external_id=item.external_id)
82
80
 
83
81
  @classmethod
84
- def get_internal_id(cls, item: TimeSeries | dict) -> int:
82
+ def get_internal_id(cls, item: TimeSeriesResponse | dict) -> int:
85
83
  if isinstance(item, dict):
86
84
  return item["id"]
87
85
  return item.id
88
86
 
89
87
  @classmethod
90
- def dump_id(cls, id: str) -> dict[str, Any]:
91
- return {"externalId": id}
88
+ def dump_id(cls, id: ExternalId) -> dict[str, Any]:
89
+ return id.dump()
92
90
 
93
91
  @classmethod
94
92
  def get_dependent_items(cls, item: dict) -> Iterable[tuple[type[ResourceCRUD], Hashable]]:
@@ -100,7 +98,7 @@ class TimeSeriesCRUD(ResourceContainerCRUD[str, TimeSeriesWrite, TimeSeries]):
100
98
  if "assetExternalId" in item:
101
99
  yield AssetCRUD, ExternalId(external_id=item["assetExternalId"])
102
100
 
103
- def load_resource(self, resource: dict[str, Any], is_dry_run: bool = False) -> TimeSeriesWrite:
101
+ def load_resource(self, resource: dict[str, Any], is_dry_run: bool = False) -> TimeSeriesRequest:
104
102
  if ds_external_id := resource.pop("dataSetExternalId", None):
105
103
  resource["dataSetId"] = self.client.lookup.data_sets.id(ds_external_id, is_dry_run)
106
104
  if security_categories_names := resource.pop("securityCategoryNames", []):
@@ -109,10 +107,10 @@ class TimeSeriesCRUD(ResourceContainerCRUD[str, TimeSeriesWrite, TimeSeries]):
109
107
  )
110
108
  if asset_external_id := resource.pop("assetExternalId", None):
111
109
  resource["assetId"] = self.client.lookup.assets.id(asset_external_id, is_dry_run)
112
- return TimeSeriesWrite._load(resource)
110
+ return TimeSeriesRequest.model_validate(resource)
113
111
 
114
- def dump_resource(self, resource: TimeSeries, local: dict[str, Any] | None = None) -> dict[str, Any]:
115
- dumped = resource.as_write().dump()
112
+ def dump_resource(self, resource: TimeSeriesResponse, local: dict[str, Any] | None = None) -> dict[str, Any]:
113
+ dumped = resource.as_request_resource().dump()
116
114
  if data_set_id := dumped.pop("dataSetId", None):
117
115
  dumped["dataSetExternalId"] = self.client.lookup.data_sets.external_id(data_set_id)
118
116
  if security_categories := dumped.pop("securityCategories", []):
@@ -121,53 +119,56 @@ class TimeSeriesCRUD(ResourceContainerCRUD[str, TimeSeriesWrite, TimeSeries]):
121
119
  dumped["assetExternalId"] = self.client.lookup.assets.external_id(asset_id)
122
120
  return dumped
123
121
 
124
- def create(self, items: Sequence[TimeSeriesWrite]) -> TimeSeriesList:
125
- return self.client.time_series.create(items)
122
+ def create(self, items: Sequence[TimeSeriesRequest]) -> list[TimeSeriesResponse]:
123
+ return self.client.tool.timeseries.create(items)
126
124
 
127
- def retrieve(self, ids: SequenceNotStr[str | int]) -> TimeSeriesList:
128
- internal_ids, external_ids = self._split_ids(ids)
129
- return self.client.time_series.retrieve_multiple(
130
- ids=internal_ids, external_ids=external_ids, ignore_unknown_ids=True
131
- )
125
+ def retrieve(self, ids: SequenceNotStr[ExternalId]) -> list[TimeSeriesResponse]:
126
+ return self.client.tool.timeseries.retrieve(list(ids), ignore_unknown_ids=True)
132
127
 
133
- def update(self, items: Sequence[TimeSeriesWrite]) -> TimeSeriesList:
134
- return self.client.time_series.update(items, mode="replace")
128
+ def update(self, items: Sequence[TimeSeriesRequest]) -> list[TimeSeriesResponse]:
129
+ return self.client.tool.timeseries.update(items, mode="replace")
135
130
 
136
- def delete(self, ids: SequenceNotStr[str | int]) -> int:
137
- existing = self.retrieve(ids)
138
- if existing:
139
- self.client.time_series.delete(id=existing.as_ids(), ignore_unknown_ids=True)
140
- return len(existing)
131
+ def delete(self, ids: SequenceNotStr[InternalOrExternalId]) -> int:
132
+ if not ids:
133
+ return 0
134
+ self.client.tool.timeseries.delete(list(ids), ignore_unknown_ids=True)
135
+ return len(ids)
141
136
 
142
137
  def _iterate(
143
138
  self,
144
139
  data_set_external_id: str | None = None,
145
140
  space: str | None = None,
146
141
  parent_ids: list[Hashable] | None = None,
147
- ) -> Iterable[TimeSeries]:
148
- return iter(
149
- self.client.time_series(data_set_external_ids=[data_set_external_id] if data_set_external_id else None)
150
- )
142
+ ) -> Iterable[TimeSeriesResponse]:
143
+ cursor: str | None = None
144
+ while True:
145
+ page = self.client.tool.timeseries.iterate(
146
+ data_set_external_ids=[data_set_external_id] if data_set_external_id else None,
147
+ limit=1000,
148
+ cursor=cursor,
149
+ )
150
+ yield from page.items
151
+ if not page.next_cursor or not page.items:
152
+ break
153
+ cursor = page.next_cursor
151
154
 
152
- def count(self, ids: str | dict[str, Any] | SequenceNotStr[str | dict[str, Any]] | None) -> int:
155
+ def count(self, ids: SequenceNotStr[ExternalId]) -> int:
153
156
  datapoints = self.client.time_series.data.retrieve(
154
- external_id=ids, # type: ignore[arg-type]
157
+ external_id=[id.external_id for id in ids],
155
158
  start=MIN_TIMESTAMP_MS,
156
159
  end=MAX_TIMESTAMP_MS + 1,
157
160
  aggregates="count",
158
161
  granularity="1000d",
159
162
  ignore_unknown_ids=True,
160
163
  )
161
- return sum(sum(data.count or []) for data in datapoints) # type: ignore[union-attr, misc, arg-type]
164
+ return sum(sum(data.count or []) for data in datapoints)
162
165
 
163
- def drop_data(self, ids: SequenceNotStr[str] | None) -> int:
166
+ def drop_data(self, ids: SequenceNotStr[ExternalId]) -> int:
164
167
  count = self.count(ids)
165
- existing = self.client.time_series.retrieve_multiple(
166
- external_ids=cast(SequenceNotStr[str], ids), ignore_unknown_ids=True
167
- ).as_external_ids()
168
- for external_id in existing:
168
+ existing = self.client.tool.timeseries.retrieve(list(ids), ignore_unknown_ids=True)
169
+ for ts in existing:
169
170
  self.client.time_series.data.delete_range(
170
- external_id=external_id, start=MIN_TIMESTAMP_MS, end=MAX_TIMESTAMP_MS + 1
171
+ external_id=ts.external_id, start=MIN_TIMESTAMP_MS, end=MAX_TIMESTAMP_MS + 1
171
172
  )
172
173
  return count
173
174
 
@@ -220,7 +221,7 @@ class DatapointSubscriptionCRUD(
220
221
  if "dataSetExternalId" in item:
221
222
  yield DataSetsCRUD, item["dataSetExternalId"]
222
223
  for timeseries_id in item.get("timeSeriesIds", []):
223
- yield TimeSeriesCRUD, timeseries_id
224
+ yield TimeSeriesCRUD, ExternalId(external_id=timeseries_id)
224
225
 
225
226
  @classmethod
226
227
  def get_required_capability(
@@ -40,6 +40,7 @@ from .robotics import RobotCapabilityYAML, RobotDataPostProcessingYAML, RobotFra
40
40
  from .search_config import SearchConfigYAML
41
41
  from .securitycategories import SecurityCategoriesYAML
42
42
  from .sequence import SequenceRowYAML, SequenceYAML
43
+ from .simulator_model import SimulatorModelYAML
43
44
  from .space import SpaceYAML
44
45
  from .streamlit_ import StreamlitYAML
45
46
  from .streams import StreamYAML
@@ -94,6 +95,7 @@ __all__ = [
94
95
  "SecurityCategoriesYAML",
95
96
  "SequenceRowYAML",
96
97
  "SequenceYAML",
98
+ "SimulatorModelYAML",
97
99
  "SpaceYAML",
98
100
  "StreamYAML",
99
101
  "StreamlitYAML",
@@ -0,0 +1,17 @@
1
+ from pydantic import Field
2
+
3
+ from .base import ToolkitResource
4
+
5
+
6
+ class SimulatorModelYAML(ToolkitResource):
7
+ """Simulator model YAML resource class.
8
+
9
+ Based on: https://api-docs.cognite.com/20230101/tag/Simulator-Models/operation/create_simulator_model_simulators_models_post
10
+ """
11
+
12
+ external_id: str = Field(description="External ID of the simulator model.", min_length=1, max_length=255)
13
+ simulator_external_id: str = Field(description="External id of the simulator.", min_length=1, max_length=50)
14
+ name: str = Field(description="The name of the simulator model.", min_length=1, max_length=50)
15
+ description: str | None = Field(None, description="Description of the simulator model.", max_length=500)
16
+ data_set_external_id: str = Field(description="The external ID of the dataset this simulator model belongs to.")
17
+ type: str = Field(description="The type of the simulator model.", min_length=1, max_length=50)
@@ -3,22 +3,14 @@ from collections import defaultdict
3
3
  from collections.abc import Iterable, Sequence
4
4
  from typing import Any, ClassVar, Generic
5
5
 
6
- from cognite.client.data_classes import (
7
- Event,
8
- EventList,
9
- EventWrite,
10
- FileMetadata,
11
- FileMetadataList,
12
- Label,
13
- LabelDefinition,
14
- TimeSeries,
15
- TimeSeriesList,
16
- TimeSeriesWrite,
17
- )
6
+ from cognite.client.data_classes import Label, LabelDefinition
18
7
 
19
8
  from cognite_toolkit._cdf_tk.client import ToolkitClient
20
9
  from cognite_toolkit._cdf_tk.client.data_classes.asset import AssetAggregateItem, AssetRequest, AssetResponse
10
+ from cognite_toolkit._cdf_tk.client.data_classes.event import EventRequest, EventResponse
11
+ from cognite_toolkit._cdf_tk.client.data_classes.filemetadata import FileMetadataResponse
21
12
  from cognite_toolkit._cdf_tk.client.data_classes.identifiers import InternalId
13
+ from cognite_toolkit._cdf_tk.client.data_classes.timeseries import TimeSeriesRequest, TimeSeriesResponse
22
14
  from cognite_toolkit._cdf_tk.cruds import (
23
15
  AssetCRUD,
24
16
  DataSetsCRUD,
@@ -111,13 +103,16 @@ class AssetCentricIO(
111
103
 
112
104
  def _collect_dependencies(
113
105
  self,
114
- resources: Sequence[AssetResponse] | FileMetadataList | TimeSeriesList | EventList,
106
+ resources: Sequence[AssetResponse]
107
+ | Sequence[FileMetadataResponse]
108
+ | Sequence[TimeSeriesResponse]
109
+ | Sequence[EventResponse],
115
110
  selector: AssetCentricSelector,
116
111
  ) -> None:
117
112
  for resource in resources:
118
113
  if resource.data_set_id:
119
114
  self._downloaded_data_sets_by_selector[selector].add(resource.data_set_id)
120
- if isinstance(resource, AssetResponse | FileMetadata):
115
+ if isinstance(resource, AssetResponse | FileMetadataResponse):
121
116
  for label in resource.labels or []:
122
117
  if isinstance(label, str):
123
118
  self._downloaded_labels_by_selector[selector].add(label)
@@ -149,17 +144,19 @@ class AssetCentricIO(
149
144
  def create_internal_identifier(cls, internal_id: int, project: str) -> str:
150
145
  return f"INTERNAL_ID_project_{project}_{internal_id!s}"
151
146
 
152
- def _populate_data_set_id_cache(self, chunk: Sequence[AssetResponse | FileMetadata | TimeSeries | Event]) -> None:
147
+ def _populate_data_set_id_cache(
148
+ self, chunk: Sequence[AssetResponse | FileMetadataResponse | TimeSeriesResponse | EventResponse]
149
+ ) -> None:
153
150
  data_set_ids = {item.data_set_id for item in chunk if item.data_set_id is not None}
154
151
  self.client.lookup.data_sets.external_id(list(data_set_ids))
155
152
 
156
- def _populate_security_category_cache(self, chunk: Sequence[FileMetadata | TimeSeries]) -> None:
153
+ def _populate_security_category_cache(self, chunk: Sequence[FileMetadataResponse | TimeSeriesResponse]) -> None:
157
154
  security_category_ids: set[int] = set()
158
155
  for item in chunk:
159
156
  security_category_ids.update(item.security_categories or [])
160
157
  self.client.lookup.security_categories.external_id(list(security_category_ids))
161
158
 
162
- def _populate_asset_id_cache(self, chunk: Sequence[FileMetadata | Event]) -> None:
159
+ def _populate_asset_id_cache(self, chunk: Sequence[FileMetadataResponse | EventResponse]) -> None:
163
160
  asset_ids: set[int] = set()
164
161
  for item in chunk:
165
162
  asset_ids.update(item.asset_ids or [])
@@ -381,7 +378,7 @@ class AssetIO(UploadableAssetCentricIO[AssetResponse, AssetRequest]):
381
378
  current_depth += 1
382
379
 
383
380
 
384
- class FileMetadataIO(AssetCentricIO[FileMetadata]):
381
+ class FileMetadataIO(AssetCentricIO[FileMetadataResponse]):
385
382
  KIND = "FileMetadata"
386
383
  RESOURCE_TYPE = "file"
387
384
  SUPPORTED_DOWNLOAD_FORMATS = frozenset({".parquet", ".csv", ".ndjson"})
@@ -393,7 +390,7 @@ class FileMetadataIO(AssetCentricIO[FileMetadata]):
393
390
  super().__init__(client)
394
391
  self._crud = FileMetadataCRUD.create_loader(self.client)
395
392
 
396
- def as_id(self, item: FileMetadata) -> str:
393
+ def as_id(self, item: FileMetadataResponse) -> str:
397
394
  return item.external_id if item.external_id is not None else self._create_identifier(item.id)
398
395
 
399
396
  def _get_aggregator(self) -> AssetCentricAggregator:
@@ -436,25 +433,31 @@ class FileMetadataIO(AssetCentricIO[FileMetadata]):
436
433
  ]
437
434
  return file_schema + metadata_schema
438
435
 
439
- def stream_data(self, selector: AssetCentricSelector, limit: int | None = None) -> Iterable[Page[FileMetadata]]:
436
+ def stream_data(
437
+ self, selector: AssetCentricSelector, limit: int | None = None
438
+ ) -> Iterable[Page[FileMetadataResponse]]:
440
439
  asset_subtree_external_ids, data_set_external_ids = self._get_hierarchy_dataset_pair(selector)
441
- for file_list in self.client.files(
442
- chunk_size=self.CHUNK_SIZE,
443
- limit=limit,
444
- asset_subtree_external_ids=asset_subtree_external_ids,
445
- data_set_external_ids=data_set_external_ids,
446
- # We cannot use partitions here as it is not thread safe. This spawn multiple threads
447
- # that are not shut down until all data is downloaded. We need to be able to abort.
448
- partitions=None,
449
- ):
450
- self._collect_dependencies(file_list, selector)
451
- yield Page(worker_id="main", items=file_list)
452
-
453
- def retrieve(self, ids: Sequence[int]) -> FileMetadataList:
454
- return self.client.files.retrieve_multiple(ids)
440
+ cursor: str | None = None
441
+ total_count = 0
442
+ while True:
443
+ page = self.client.tool.filemetadata.iterate(
444
+ data_set_external_ids=data_set_external_ids,
445
+ asset_subtree_external_ids=asset_subtree_external_ids,
446
+ limit=self.CHUNK_SIZE,
447
+ cursor=cursor,
448
+ )
449
+ self._collect_dependencies(page.items, selector)
450
+ yield Page(worker_id="main", items=page.items)
451
+ total_count += len(page.items)
452
+ if page.next_cursor is None or (limit is not None and total_count >= limit):
453
+ break
454
+ cursor = page.next_cursor
455
+
456
+ def retrieve(self, ids: Sequence[int]) -> list[FileMetadataResponse]:
457
+ return self.client.tool.filemetadata.retrieve(InternalId.from_ids(ids))
455
458
 
456
459
  def data_to_json_chunk(
457
- self, data_chunk: Sequence[FileMetadata], selector: AssetCentricSelector | None = None
460
+ self, data_chunk: Sequence[FileMetadataResponse], selector: AssetCentricSelector | None = None
458
461
  ) -> list[dict[str, JsonVal]]:
459
462
  # Ensure data sets/assets/security-categories are looked up to populate cache.
460
463
  # This is to avoid looking up each data set id individually in the .dump_resource call
@@ -465,7 +468,7 @@ class FileMetadataIO(AssetCentricIO[FileMetadata]):
465
468
  return [self._crud.dump_resource(item) for item in data_chunk]
466
469
 
467
470
 
468
- class TimeSeriesIO(UploadableAssetCentricIO[TimeSeries, TimeSeriesWrite]):
471
+ class TimeSeriesIO(UploadableAssetCentricIO[TimeSeriesResponse, TimeSeriesRequest]):
469
472
  KIND = "TimeSeries"
470
473
  SUPPORTED_DOWNLOAD_FORMATS = frozenset({".parquet", ".csv", ".ndjson"})
471
474
  SUPPORTED_COMPRESSIONS = frozenset({".gz"})
@@ -477,31 +480,35 @@ class TimeSeriesIO(UploadableAssetCentricIO[TimeSeries, TimeSeriesWrite]):
477
480
  super().__init__(client)
478
481
  self._crud = TimeSeriesCRUD.create_loader(self.client)
479
482
 
480
- def as_id(self, item: TimeSeries) -> str:
483
+ def as_id(self, item: TimeSeriesResponse) -> str:
481
484
  return item.external_id if item.external_id is not None else self._create_identifier(item.id)
482
485
 
483
486
  def _get_aggregator(self) -> AssetCentricAggregator:
484
487
  return TimeSeriesAggregator(self.client)
485
488
 
486
- def retrieve(self, ids: Sequence[int]) -> TimeSeriesList:
487
- return self.client.time_series.retrieve_multiple(ids=ids)
489
+ def retrieve(self, ids: Sequence[int]) -> list[TimeSeriesResponse]:
490
+ return self.client.tool.timeseries.retrieve(InternalId.from_ids(ids))
488
491
 
489
492
  def stream_data(self, selector: AssetCentricSelector, limit: int | None = None) -> Iterable[Page]:
490
493
  asset_subtree_external_ids, data_set_external_ids = self._get_hierarchy_dataset_pair(selector)
491
- for ts_list in self.client.time_series(
492
- chunk_size=self.CHUNK_SIZE,
493
- limit=limit,
494
- asset_subtree_external_ids=asset_subtree_external_ids,
495
- data_set_external_ids=data_set_external_ids,
496
- # We cannot use partitions here as it is not thread safe. This spawn multiple threads
497
- # that are not shut down until all data is downloaded. We need to be able to abort.
498
- partitions=None,
499
- ):
500
- self._collect_dependencies(ts_list, selector)
501
- yield Page(worker_id="main", items=ts_list)
494
+ cursor: str | None = None
495
+ total_count = 0
496
+ while True:
497
+ page = self.client.tool.timeseries.iterate(
498
+ data_set_external_ids=data_set_external_ids,
499
+ asset_subtree_external_ids=asset_subtree_external_ids,
500
+ limit=self.CHUNK_SIZE,
501
+ cursor=cursor,
502
+ )
503
+ self._collect_dependencies(page.items, selector)
504
+ yield Page(worker_id="main", items=page.items)
505
+ total_count += len(page.items)
506
+ if page.next_cursor is None or (limit is not None and total_count >= limit):
507
+ break
508
+ cursor = page.next_cursor
502
509
 
503
510
  def data_to_json_chunk(
504
- self, data_chunk: Sequence[TimeSeries], selector: AssetCentricSelector | None = None
511
+ self, data_chunk: Sequence[TimeSeriesResponse], selector: AssetCentricSelector | None = None
505
512
  ) -> list[dict[str, JsonVal]]:
506
513
  # Ensure data sets/assets/security categories are looked up to populate cache.
507
514
  self._populate_data_set_id_cache(data_chunk)
@@ -513,14 +520,14 @@ class TimeSeriesIO(UploadableAssetCentricIO[TimeSeries, TimeSeriesWrite]):
513
520
 
514
521
  def json_chunk_to_data(
515
522
  self, data_chunk: list[tuple[str, dict[str, JsonVal]]]
516
- ) -> Sequence[UploadItem[TimeSeriesWrite]]:
523
+ ) -> Sequence[UploadItem[TimeSeriesRequest]]:
517
524
  chunks = [item_json for _, item_json in data_chunk]
518
525
  self._populate_asset_external_ids_cache(chunks)
519
526
  self._populate_data_set_external_id_cache(chunks)
520
527
  self._populate_security_category_name_cache(chunks)
521
528
  return super().json_chunk_to_data(data_chunk)
522
529
 
523
- def json_to_resource(self, item_json: dict[str, JsonVal]) -> TimeSeriesWrite:
530
+ def json_to_resource(self, item_json: dict[str, JsonVal]) -> TimeSeriesRequest:
524
531
  return self._crud.load_resource(item_json)
525
532
 
526
533
  def get_schema(self, selector: AssetCentricSelector) -> list[SchemaColumn]:
@@ -561,7 +568,7 @@ class TimeSeriesIO(UploadableAssetCentricIO[TimeSeries, TimeSeriesWrite]):
561
568
  return ts_schema + metadata_schema
562
569
 
563
570
 
564
- class EventIO(UploadableAssetCentricIO[Event, EventWrite]):
571
+ class EventIO(UploadableAssetCentricIO[EventResponse, EventRequest]):
565
572
  KIND = "Events"
566
573
  SUPPORTED_DOWNLOAD_FORMATS = frozenset({".parquet", ".csv", ".ndjson"})
567
574
  SUPPORTED_COMPRESSIONS = frozenset({".gz"})
@@ -573,7 +580,7 @@ class EventIO(UploadableAssetCentricIO[Event, EventWrite]):
573
580
  super().__init__(client)
574
581
  self._crud = EventCRUD.create_loader(self.client)
575
582
 
576
- def as_id(self, item: Event) -> str:
583
+ def as_id(self, item: EventResponse) -> str:
577
584
  return item.external_id if item.external_id is not None else self._create_identifier(item.id)
578
585
 
579
586
  def _get_aggregator(self) -> AssetCentricAggregator:
@@ -618,20 +625,24 @@ class EventIO(UploadableAssetCentricIO[Event, EventWrite]):
618
625
 
619
626
  def stream_data(self, selector: AssetCentricSelector, limit: int | None = None) -> Iterable[Page]:
620
627
  asset_subtree_external_ids, data_set_external_ids = self._get_hierarchy_dataset_pair(selector)
621
- for event_list in self.client.events(
622
- chunk_size=self.CHUNK_SIZE,
623
- limit=limit,
624
- asset_subtree_external_ids=asset_subtree_external_ids,
625
- data_set_external_ids=data_set_external_ids,
626
- # We cannot use partitions here as it is not thread safe. This spawn multiple threads
627
- # that are not shut down until all data is downloaded. We need to be able to abort.
628
- partitions=None,
629
- ):
630
- self._collect_dependencies(event_list, selector)
631
- yield Page(worker_id="main", items=event_list)
628
+ cursor: str | None = None
629
+ total_count = 0
630
+ while True:
631
+ page = self.client.tool.events.iterate(
632
+ data_set_external_ids=data_set_external_ids,
633
+ asset_subtree_external_ids=asset_subtree_external_ids,
634
+ limit=self.CHUNK_SIZE,
635
+ cursor=cursor,
636
+ )
637
+ self._collect_dependencies(page.items, selector)
638
+ yield Page(worker_id="main", items=page.items)
639
+ total_count += len(page.items)
640
+ if page.next_cursor is None or (limit is not None and total_count >= limit):
641
+ break
642
+ cursor = page.next_cursor
632
643
 
633
644
  def data_to_json_chunk(
634
- self, data_chunk: Sequence[Event], selector: AssetCentricSelector | None = None
645
+ self, data_chunk: Sequence[EventResponse], selector: AssetCentricSelector | None = None
635
646
  ) -> list[dict[str, JsonVal]]:
636
647
  # Ensure data sets/assets are looked up to populate cache.
637
648
  self._populate_data_set_id_cache(data_chunk)
@@ -639,17 +650,19 @@ class EventIO(UploadableAssetCentricIO[Event, EventWrite]):
639
650
 
640
651
  return [self._crud.dump_resource(item) for item in data_chunk]
641
652
 
642
- def json_chunk_to_data(self, data_chunk: list[tuple[str, dict[str, JsonVal]]]) -> Sequence[UploadItem[EventWrite]]:
653
+ def json_chunk_to_data(
654
+ self, data_chunk: list[tuple[str, dict[str, JsonVal]]]
655
+ ) -> Sequence[UploadItem[EventRequest]]:
643
656
  chunks = [item_json for _, item_json in data_chunk]
644
657
  self._populate_asset_external_ids_cache(chunks)
645
658
  self._populate_data_set_external_id_cache(chunks)
646
659
  return super().json_chunk_to_data(data_chunk)
647
660
 
648
- def json_to_resource(self, item_json: dict[str, JsonVal]) -> EventWrite:
661
+ def json_to_resource(self, item_json: dict[str, JsonVal]) -> EventRequest:
649
662
  return self._crud.load_resource(item_json)
650
663
 
651
- def retrieve(self, ids: Sequence[int]) -> EventList:
652
- return self.client.events.retrieve_multiple(ids)
664
+ def retrieve(self, ids: Sequence[int]) -> list[EventResponse]:
665
+ return self.client.tool.events.retrieve(InternalId.from_ids(ids))
653
666
 
654
667
 
655
668
  class HierarchyIO(ConfigurableStorageIO[AssetCentricSelector, AssetCentricResource]):
@@ -6,10 +6,11 @@ from pathlib import Path
6
6
  from typing import cast
7
7
 
8
8
  import httpx
9
- from cognite.client.data_classes import FileMetadata, FileMetadataWrite
10
- from cognite.client.data_classes.data_modeling import NodeId, ViewId
9
+ from cognite.client.data_classes.data_modeling import ViewId
11
10
 
12
11
  from cognite_toolkit._cdf_tk.client import ToolkitClient
12
+ from cognite_toolkit._cdf_tk.client.data_classes.data_modeling import NodeReference
13
+ from cognite_toolkit._cdf_tk.client.data_classes.filemetadata import FileMetadataRequest, FileMetadataResponse
13
14
  from cognite_toolkit._cdf_tk.client.http_client import (
14
15
  DataBodyRequest,
15
16
  ErrorDetails,
@@ -45,21 +46,24 @@ COGNITE_FILE_VIEW = ViewId("cdf_cdm", "CogniteFile", "v1")
45
46
 
46
47
 
47
48
  @dataclass
48
- class UploadFileContentItem(UploadItem[FileMetadataWrite]):
49
+ class UploadFileContentItem(UploadItem[FileMetadataRequest]):
49
50
  file_path: Path
50
51
  mime_type: str
51
52
 
53
+ def dump(self) -> JsonVal:
54
+ return self.item.dump(camel_case=True, exclude_extra=True)
55
+
52
56
 
53
57
  @dataclass
54
58
  class MetadataWithFilePath(ResourceResponseProtocol):
55
- metadata: FileMetadata
59
+ metadata: FileMetadataResponse
56
60
  file_path: Path
57
61
 
58
- def as_write(self) -> FileMetadataWrite:
59
- return self.metadata.as_write()
62
+ def as_write(self) -> FileMetadataRequest:
63
+ return self.metadata.as_request_resource()
60
64
 
61
65
 
62
- class FileContentIO(UploadableStorageIO[FileContentSelector, MetadataWithFilePath, FileMetadataWrite]):
66
+ class FileContentIO(UploadableStorageIO[FileContentSelector, MetadataWithFilePath, FileMetadataRequest]):
63
67
  SUPPORTED_DOWNLOAD_FORMATS = frozenset({".ndjson"})
64
68
  SUPPORTED_COMPRESSIONS = frozenset({".gz"})
65
69
  CHUNK_SIZE = 10
@@ -116,7 +120,7 @@ class FileContentIO(UploadableStorageIO[FileContentSelector, MetadataWithFilePat
116
120
  )
117
121
  yield Page(items=downloaded_files, worker_id="Main")
118
122
 
119
- def _retrieve_metadata(self, identifiers: Sequence[FileIdentifier]) -> Sequence[FileMetadata] | None:
123
+ def _retrieve_metadata(self, identifiers: Sequence[FileIdentifier]) -> Sequence[FileMetadataResponse] | None:
120
124
  config = self.client.config
121
125
  responses = self.client.http_client.request_with_retries(
122
126
  message=SimpleBodyRequest(
@@ -137,12 +141,11 @@ class FileContentIO(UploadableStorageIO[FileContentSelector, MetadataWithFilePat
137
141
  items_data = body.get("items", [])
138
142
  if not isinstance(items_data, list):
139
143
  return None
140
- # MyPy does not understand that JsonVal is valid dict[Any, Any]
141
- return [FileMetadata._load(item) for item in items_data] # type: ignore[arg-type]
144
+ return [FileMetadataResponse.model_validate(item) for item in items_data]
142
145
 
143
146
  @staticmethod
144
- def _as_metadata_map(metadata: Sequence[FileMetadata]) -> dict[FileIdentifier, FileMetadata]:
145
- identifiers_map: dict[FileIdentifier, FileMetadata] = {}
147
+ def _as_metadata_map(metadata: Sequence[FileMetadataResponse]) -> dict[FileIdentifier, FileMetadataResponse]:
148
+ identifiers_map: dict[FileIdentifier, FileMetadataResponse] = {}
146
149
  for item in metadata:
147
150
  if item.id is not None:
148
151
  identifiers_map[FileInternalID(internal_id=item.id)] = item
@@ -158,9 +161,9 @@ class FileContentIO(UploadableStorageIO[FileContentSelector, MetadataWithFilePat
158
161
  ] = item
159
162
  return identifiers_map
160
163
 
161
- def _create_filepath(self, meta: FileMetadata, selector: FileIdentifierSelector) -> Path:
164
+ def _create_filepath(self, meta: FileMetadataResponse, selector: FileIdentifierSelector) -> Path:
162
165
  # We now that metadata always have name set
163
- filename = Path(sanitize_filename(cast(str, meta.name)))
166
+ filename = Path(sanitize_filename(meta.name))
164
167
  if len(filename.suffix) == 0 and meta.mime_type:
165
168
  if mime_ext := mimetypes.guess_extension(meta.mime_type):
166
169
  filename = filename.with_suffix(mime_ext)
@@ -245,12 +248,12 @@ class FileContentIO(UploadableStorageIO[FileContentSelector, MetadataWithFilePat
245
248
  )
246
249
  return result
247
250
 
248
- def json_to_resource(self, item_json: dict[str, JsonVal]) -> FileMetadataWrite:
251
+ def json_to_resource(self, item_json: dict[str, JsonVal]) -> FileMetadataRequest:
249
252
  return self._crud.load_resource(item_json)
250
253
 
251
254
  def upload_items(
252
255
  self,
253
- data_chunk: Sequence[UploadItem[FileMetadataWrite]],
256
+ data_chunk: Sequence[UploadItem[FileMetadataRequest]],
254
257
  http_client: HTTPClient,
255
258
  selector: FileContentSelector | None = None,
256
259
  ) -> Sequence[HTTPMessage]:
@@ -320,12 +323,12 @@ class FileContentIO(UploadableStorageIO[FileContentSelector, MetadataWithFilePat
320
323
 
321
324
  """
322
325
  # We know that instance_id is always set for data modeling uploads
323
- instance_id = cast(NodeId, item.item.instance_id)
326
+ instance_id = cast(NodeReference, item.item.instance_id)
324
327
  responses = http_client.request_with_retries(
325
328
  message=SimpleBodyRequest(
326
329
  endpoint_url=http_client.config.create_api_url("/files/uploadlink"),
327
330
  method="POST",
328
- body_content={"items": [{"instanceId": instance_id.dump(include_instance_type=False)}]}, # type: ignore[dict-item]
331
+ body_content={"items": [{"instanceId": instance_id.dump()}]},
329
332
  )
330
333
  )
331
334
  # We know there is only one response since we only requested one upload link
@@ -340,7 +343,7 @@ class FileContentIO(UploadableStorageIO[FileContentSelector, MetadataWithFilePat
340
343
 
341
344
  @classmethod
342
345
  def _create_cognite_file_node(
343
- cls, instance_id: NodeId, http_client: HTTPClient, upload_id: str, results: MutableSequence[HTTPMessage]
346
+ cls, instance_id: NodeReference, http_client: HTTPClient, upload_id: str, results: MutableSequence[HTTPMessage]
344
347
  ) -> bool:
345
348
  node_creation = http_client.request_with_retries(
346
349
  message=SimpleBodyRequest(