cognite-toolkit 0.6.97__py3-none-any.whl → 0.7.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. cognite_toolkit/_cdf.py +21 -23
  2. cognite_toolkit/_cdf_tk/apps/__init__.py +4 -0
  3. cognite_toolkit/_cdf_tk/apps/_core_app.py +19 -5
  4. cognite_toolkit/_cdf_tk/apps/_data_app.py +1 -1
  5. cognite_toolkit/_cdf_tk/apps/_dev_app.py +86 -0
  6. cognite_toolkit/_cdf_tk/apps/_download_app.py +693 -25
  7. cognite_toolkit/_cdf_tk/apps/_dump_app.py +44 -102
  8. cognite_toolkit/_cdf_tk/apps/_import_app.py +41 -0
  9. cognite_toolkit/_cdf_tk/apps/_landing_app.py +18 -4
  10. cognite_toolkit/_cdf_tk/apps/_migrate_app.py +424 -9
  11. cognite_toolkit/_cdf_tk/apps/_modules_app.py +0 -3
  12. cognite_toolkit/_cdf_tk/apps/_purge.py +15 -43
  13. cognite_toolkit/_cdf_tk/apps/_run.py +11 -0
  14. cognite_toolkit/_cdf_tk/apps/_upload_app.py +45 -6
  15. cognite_toolkit/_cdf_tk/builders/__init__.py +2 -2
  16. cognite_toolkit/_cdf_tk/builders/_base.py +28 -42
  17. cognite_toolkit/_cdf_tk/builders/_raw.py +1 -1
  18. cognite_toolkit/_cdf_tk/cdf_toml.py +20 -1
  19. cognite_toolkit/_cdf_tk/client/_toolkit_client.py +32 -12
  20. cognite_toolkit/_cdf_tk/client/api/infield.py +114 -17
  21. cognite_toolkit/_cdf_tk/client/api/{canvas.py → legacy/canvas.py} +15 -7
  22. cognite_toolkit/_cdf_tk/client/api/{charts.py → legacy/charts.py} +1 -1
  23. cognite_toolkit/_cdf_tk/client/api/{extended_data_modeling.py → legacy/extended_data_modeling.py} +1 -1
  24. cognite_toolkit/_cdf_tk/client/api/{extended_files.py → legacy/extended_files.py} +2 -2
  25. cognite_toolkit/_cdf_tk/client/api/{extended_functions.py → legacy/extended_functions.py} +15 -18
  26. cognite_toolkit/_cdf_tk/client/api/{extended_raw.py → legacy/extended_raw.py} +1 -1
  27. cognite_toolkit/_cdf_tk/client/api/{extended_timeseries.py → legacy/extended_timeseries.py} +5 -2
  28. cognite_toolkit/_cdf_tk/client/api/{location_filters.py → legacy/location_filters.py} +1 -1
  29. cognite_toolkit/_cdf_tk/client/api/legacy/robotics/__init__.py +8 -0
  30. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/capabilities.py +1 -1
  31. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/data_postprocessing.py +1 -1
  32. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/frames.py +1 -1
  33. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/locations.py +1 -1
  34. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/maps.py +1 -1
  35. cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/robots.py +2 -2
  36. cognite_toolkit/_cdf_tk/client/api/{search_config.py → legacy/search_config.py} +5 -1
  37. cognite_toolkit/_cdf_tk/client/api/migration.py +177 -4
  38. cognite_toolkit/_cdf_tk/client/api/project.py +9 -8
  39. cognite_toolkit/_cdf_tk/client/api/search.py +2 -2
  40. cognite_toolkit/_cdf_tk/client/api/streams.py +88 -0
  41. cognite_toolkit/_cdf_tk/client/api/three_d.py +384 -0
  42. cognite_toolkit/_cdf_tk/client/data_classes/api_classes.py +13 -0
  43. cognite_toolkit/_cdf_tk/client/data_classes/base.py +37 -33
  44. cognite_toolkit/_cdf_tk/client/data_classes/charts_data.py +95 -213
  45. cognite_toolkit/_cdf_tk/client/data_classes/infield.py +32 -18
  46. cognite_toolkit/_cdf_tk/client/data_classes/instance_api.py +18 -13
  47. cognite_toolkit/_cdf_tk/client/data_classes/legacy/__init__.py +0 -0
  48. cognite_toolkit/_cdf_tk/client/data_classes/{canvas.py → legacy/canvas.py} +47 -4
  49. cognite_toolkit/_cdf_tk/client/data_classes/{charts.py → legacy/charts.py} +3 -3
  50. cognite_toolkit/_cdf_tk/client/data_classes/{migration.py → legacy/migration.py} +10 -2
  51. cognite_toolkit/_cdf_tk/client/data_classes/streams.py +90 -0
  52. cognite_toolkit/_cdf_tk/client/data_classes/three_d.py +112 -0
  53. cognite_toolkit/_cdf_tk/client/testing.py +42 -18
  54. cognite_toolkit/_cdf_tk/commands/__init__.py +7 -6
  55. cognite_toolkit/_cdf_tk/commands/_changes.py +3 -42
  56. cognite_toolkit/_cdf_tk/commands/_download.py +21 -11
  57. cognite_toolkit/_cdf_tk/commands/_migrate/__init__.py +0 -2
  58. cognite_toolkit/_cdf_tk/commands/_migrate/command.py +22 -20
  59. cognite_toolkit/_cdf_tk/commands/_migrate/conversion.py +140 -92
  60. cognite_toolkit/_cdf_tk/commands/_migrate/creators.py +1 -1
  61. cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py +108 -26
  62. cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py +448 -45
  63. cognite_toolkit/_cdf_tk/commands/_migrate/data_model.py +1 -0
  64. cognite_toolkit/_cdf_tk/commands/_migrate/default_mappings.py +6 -6
  65. cognite_toolkit/_cdf_tk/commands/_migrate/issues.py +52 -1
  66. cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py +377 -11
  67. cognite_toolkit/_cdf_tk/commands/_migrate/selectors.py +9 -4
  68. cognite_toolkit/_cdf_tk/commands/_profile.py +1 -1
  69. cognite_toolkit/_cdf_tk/commands/_purge.py +36 -39
  70. cognite_toolkit/_cdf_tk/commands/_questionary_style.py +16 -0
  71. cognite_toolkit/_cdf_tk/commands/_upload.py +109 -86
  72. cognite_toolkit/_cdf_tk/commands/about.py +221 -0
  73. cognite_toolkit/_cdf_tk/commands/auth.py +19 -12
  74. cognite_toolkit/_cdf_tk/commands/build_cmd.py +16 -62
  75. cognite_toolkit/_cdf_tk/commands/build_v2/__init__.py +0 -0
  76. cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py +241 -0
  77. cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py +85 -0
  78. cognite_toolkit/_cdf_tk/commands/build_v2/build_issues.py +27 -0
  79. cognite_toolkit/_cdf_tk/commands/clean.py +63 -16
  80. cognite_toolkit/_cdf_tk/commands/deploy.py +20 -17
  81. cognite_toolkit/_cdf_tk/commands/dump_resource.py +10 -8
  82. cognite_toolkit/_cdf_tk/commands/init.py +225 -3
  83. cognite_toolkit/_cdf_tk/commands/modules.py +20 -44
  84. cognite_toolkit/_cdf_tk/commands/pull.py +6 -19
  85. cognite_toolkit/_cdf_tk/commands/resources.py +179 -0
  86. cognite_toolkit/_cdf_tk/commands/run.py +1 -1
  87. cognite_toolkit/_cdf_tk/constants.py +20 -1
  88. cognite_toolkit/_cdf_tk/cruds/__init__.py +19 -5
  89. cognite_toolkit/_cdf_tk/cruds/_base_cruds.py +14 -70
  90. cognite_toolkit/_cdf_tk/cruds/_data_cruds.py +10 -19
  91. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/__init__.py +4 -1
  92. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/agent.py +11 -9
  93. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/auth.py +5 -15
  94. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/classic.py +45 -44
  95. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/configuration.py +5 -12
  96. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/data_organization.py +4 -13
  97. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/datamodel.py +206 -67
  98. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/extraction_pipeline.py +6 -18
  99. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/fieldops.py +126 -35
  100. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/file.py +7 -28
  101. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/function.py +23 -30
  102. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/hosted_extractors.py +12 -30
  103. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/industrial_tool.py +4 -8
  104. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/location.py +4 -16
  105. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/migration.py +5 -13
  106. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/raw.py +5 -11
  107. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/relationship.py +3 -8
  108. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/robotics.py +16 -45
  109. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/streams.py +94 -0
  110. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/three_d_model.py +3 -7
  111. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/timeseries.py +5 -15
  112. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/transformation.py +75 -32
  113. cognite_toolkit/_cdf_tk/cruds/_resource_cruds/workflow.py +20 -40
  114. cognite_toolkit/_cdf_tk/cruds/_worker.py +24 -36
  115. cognite_toolkit/_cdf_tk/data_classes/_module_toml.py +1 -0
  116. cognite_toolkit/_cdf_tk/feature_flags.py +16 -36
  117. cognite_toolkit/_cdf_tk/plugins.py +2 -1
  118. cognite_toolkit/_cdf_tk/resource_classes/__init__.py +4 -0
  119. cognite_toolkit/_cdf_tk/resource_classes/capabilities.py +12 -0
  120. cognite_toolkit/_cdf_tk/resource_classes/functions.py +3 -1
  121. cognite_toolkit/_cdf_tk/resource_classes/infield_cdm_location_config.py +109 -0
  122. cognite_toolkit/_cdf_tk/resource_classes/migration.py +8 -17
  123. cognite_toolkit/_cdf_tk/resource_classes/search_config.py +1 -1
  124. cognite_toolkit/_cdf_tk/resource_classes/streams.py +29 -0
  125. cognite_toolkit/_cdf_tk/resource_classes/workflow_version.py +164 -5
  126. cognite_toolkit/_cdf_tk/storageio/__init__.py +9 -21
  127. cognite_toolkit/_cdf_tk/storageio/_annotations.py +19 -16
  128. cognite_toolkit/_cdf_tk/storageio/_applications.py +340 -28
  129. cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +67 -104
  130. cognite_toolkit/_cdf_tk/storageio/_base.py +61 -29
  131. cognite_toolkit/_cdf_tk/storageio/_datapoints.py +276 -20
  132. cognite_toolkit/_cdf_tk/storageio/_file_content.py +435 -0
  133. cognite_toolkit/_cdf_tk/storageio/_instances.py +35 -3
  134. cognite_toolkit/_cdf_tk/storageio/_raw.py +26 -0
  135. cognite_toolkit/_cdf_tk/storageio/selectors/__init__.py +71 -4
  136. cognite_toolkit/_cdf_tk/storageio/selectors/_base.py +14 -2
  137. cognite_toolkit/_cdf_tk/storageio/selectors/_canvas.py +14 -0
  138. cognite_toolkit/_cdf_tk/storageio/selectors/_charts.py +14 -0
  139. cognite_toolkit/_cdf_tk/storageio/selectors/_datapoints.py +23 -3
  140. cognite_toolkit/_cdf_tk/storageio/selectors/_file_content.py +164 -0
  141. cognite_toolkit/_cdf_tk/storageio/selectors/_three_d.py +34 -0
  142. cognite_toolkit/_cdf_tk/tk_warnings/other.py +4 -0
  143. cognite_toolkit/_cdf_tk/tracker.py +2 -2
  144. cognite_toolkit/_cdf_tk/utils/cdf.py +1 -1
  145. cognite_toolkit/_cdf_tk/utils/dtype_conversion.py +9 -3
  146. cognite_toolkit/_cdf_tk/utils/fileio/__init__.py +2 -0
  147. cognite_toolkit/_cdf_tk/utils/fileio/_base.py +5 -1
  148. cognite_toolkit/_cdf_tk/utils/fileio/_readers.py +112 -20
  149. cognite_toolkit/_cdf_tk/utils/fileio/_writers.py +15 -15
  150. cognite_toolkit/_cdf_tk/utils/http_client/__init__.py +28 -0
  151. cognite_toolkit/_cdf_tk/utils/http_client/_client.py +285 -18
  152. cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py +56 -4
  153. cognite_toolkit/_cdf_tk/utils/http_client/_data_classes2.py +247 -0
  154. cognite_toolkit/_cdf_tk/utils/http_client/_tracker.py +5 -2
  155. cognite_toolkit/_cdf_tk/utils/interactive_select.py +60 -18
  156. cognite_toolkit/_cdf_tk/utils/sql_parser.py +2 -3
  157. cognite_toolkit/_cdf_tk/utils/useful_types.py +6 -2
  158. cognite_toolkit/_cdf_tk/validation.py +83 -1
  159. cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
  160. cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
  161. cognite_toolkit/_resources/cdf.toml +5 -4
  162. cognite_toolkit/_version.py +1 -1
  163. cognite_toolkit/config.dev.yaml +13 -0
  164. {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.39.dist-info}/METADATA +24 -24
  165. cognite_toolkit-0.7.39.dist-info/RECORD +322 -0
  166. cognite_toolkit-0.7.39.dist-info/WHEEL +4 -0
  167. {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.39.dist-info}/entry_points.txt +1 -0
  168. cognite_toolkit/_cdf_tk/client/api/robotics/__init__.py +0 -3
  169. cognite_toolkit/_cdf_tk/commands/_migrate/canvas.py +0 -201
  170. cognite_toolkit/_cdf_tk/commands/dump_data.py +0 -489
  171. cognite_toolkit/_cdf_tk/commands/featureflag.py +0 -27
  172. cognite_toolkit/_cdf_tk/prototypes/import_app.py +0 -41
  173. cognite_toolkit/_cdf_tk/utils/table_writers.py +0 -434
  174. cognite_toolkit-0.6.97.dist-info/RECORD +0 -306
  175. cognite_toolkit-0.6.97.dist-info/WHEEL +0 -4
  176. cognite_toolkit-0.6.97.dist-info/licenses/LICENSE +0 -18
  177. /cognite_toolkit/_cdf_tk/{prototypes/commands → client/api/legacy}/__init__.py +0 -0
  178. /cognite_toolkit/_cdf_tk/client/api/{dml.py → legacy/dml.py} +0 -0
  179. /cognite_toolkit/_cdf_tk/client/api/{fixed_transformations.py → legacy/fixed_transformations.py} +0 -0
  180. /cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/api.py +0 -0
  181. /cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/utlis.py +0 -0
  182. /cognite_toolkit/_cdf_tk/client/data_classes/{apm_config_v1.py → legacy/apm_config_v1.py} +0 -0
  183. /cognite_toolkit/_cdf_tk/client/data_classes/{extendable_cognite_file.py → legacy/extendable_cognite_file.py} +0 -0
  184. /cognite_toolkit/_cdf_tk/client/data_classes/{extended_filemetadata.py → legacy/extended_filemetadata.py} +0 -0
  185. /cognite_toolkit/_cdf_tk/client/data_classes/{extended_filemetdata.py → legacy/extended_filemetdata.py} +0 -0
  186. /cognite_toolkit/_cdf_tk/client/data_classes/{extended_timeseries.py → legacy/extended_timeseries.py} +0 -0
  187. /cognite_toolkit/_cdf_tk/client/data_classes/{functions.py → legacy/functions.py} +0 -0
  188. /cognite_toolkit/_cdf_tk/client/data_classes/{graphql_data_models.py → legacy/graphql_data_models.py} +0 -0
  189. /cognite_toolkit/_cdf_tk/client/data_classes/{instances.py → legacy/instances.py} +0 -0
  190. /cognite_toolkit/_cdf_tk/client/data_classes/{location_filters.py → legacy/location_filters.py} +0 -0
  191. /cognite_toolkit/_cdf_tk/client/data_classes/{pending_instances_ids.py → legacy/pending_instances_ids.py} +0 -0
  192. /cognite_toolkit/_cdf_tk/client/data_classes/{project.py → legacy/project.py} +0 -0
  193. /cognite_toolkit/_cdf_tk/client/data_classes/{raw.py → legacy/raw.py} +0 -0
  194. /cognite_toolkit/_cdf_tk/client/data_classes/{robotics.py → legacy/robotics.py} +0 -0
  195. /cognite_toolkit/_cdf_tk/client/data_classes/{search_config.py → legacy/search_config.py} +0 -0
  196. /cognite_toolkit/_cdf_tk/client/data_classes/{sequences.py → legacy/sequences.py} +0 -0
  197. /cognite_toolkit/_cdf_tk/client/data_classes/{streamlit_.py → legacy/streamlit_.py} +0 -0
  198. /cognite_toolkit/_cdf_tk/{prototypes/commands/import_.py → commands/_import_cmd.py} +0 -0
@@ -1,4 +1,5 @@
1
1
  from collections.abc import Iterable, Mapping, Sequence
2
+ from itertools import groupby
2
3
  from typing import Any, ClassVar, cast
3
4
 
4
5
  from cognite.client._proto.data_point_insertion_request_pb2 import DataPointInsertionItem, DataPointInsertionRequest
@@ -9,34 +10,51 @@ from cognite.client._proto.data_points_pb2 import (
9
10
  StringDatapoint,
10
11
  StringDatapoints,
11
12
  )
13
+ from cognite.client.data_classes import TimeSeriesFilter
14
+ from cognite.client.data_classes.filters import Exists
15
+ from cognite.client.data_classes.time_series import TimeSeriesProperty
12
16
 
13
17
  from cognite_toolkit._cdf_tk.client import ToolkitClient
14
18
  from cognite_toolkit._cdf_tk.exceptions import ToolkitNotImplementedError
15
19
  from cognite_toolkit._cdf_tk.tk_warnings import HighSeverityWarning
20
+ from cognite_toolkit._cdf_tk.utils import humanize_collection
16
21
  from cognite_toolkit._cdf_tk.utils.dtype_conversion import (
17
22
  _EpochConverter,
18
23
  _Float64Converter,
19
24
  _TextConverter,
20
25
  _ValueConverter,
21
26
  )
22
- from cognite_toolkit._cdf_tk.utils.fileio import FileReader
23
- from cognite_toolkit._cdf_tk.utils.fileio._readers import TableReader
24
- from cognite_toolkit._cdf_tk.utils.http_client import DataBodyRequest, HTTPClient, HTTPMessage
27
+ from cognite_toolkit._cdf_tk.utils.fileio import SchemaColumn
28
+ from cognite_toolkit._cdf_tk.utils.fileio._readers import MultiFileReader
29
+ from cognite_toolkit._cdf_tk.utils.http_client import (
30
+ DataBodyRequest,
31
+ HTTPClient,
32
+ HTTPMessage,
33
+ SimpleBodyRequest,
34
+ SuccessResponse,
35
+ )
25
36
  from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
26
37
 
27
- from ._base import Page, TableUploadableStorageIO, UploadItem
28
- from .selectors import DataPointsFileSelector
38
+ from ._base import Page, TableStorageIO, TableUploadableStorageIO, UploadItem
39
+ from .selectors import DataPointsDataSetSelector, DataPointsFileSelector, DataPointsSelector
29
40
 
30
41
 
31
- class DatapointsIO(TableUploadableStorageIO[DataPointsFileSelector, DataPointListResponse, DataPointInsertionRequest]):
42
+ class DatapointsIO(
43
+ TableStorageIO[DataPointsSelector, DataPointListResponse],
44
+ TableUploadableStorageIO[DataPointsSelector, DataPointListResponse, DataPointInsertionRequest],
45
+ ):
32
46
  SUPPORTED_DOWNLOAD_FORMATS = frozenset({".csv"})
33
47
  SUPPORTED_COMPRESSIONS = frozenset({".gz"})
34
48
  CHUNK_SIZE = 10_000
35
- BASE_SELECTOR = DataPointsFileSelector
49
+ DOWNLOAD_CHUNK_SIZE = 100
50
+ BASE_SELECTOR = DataPointsSelector
36
51
  KIND = "Datapoints"
37
52
  SUPPORTED_READ_FORMATS = frozenset({".csv"})
38
53
  UPLOAD_ENDPOINT = "/timeseries/data"
39
54
  UPLOAD_EXTRA_ARGS: ClassVar[Mapping[str, JsonVal] | None] = None
55
+ MAX_TOTAL_DATAPOINTS = 10_000_000
56
+ MAX_PER_REQUEST_DATAPOINTS = 100_000
57
+ MAX_PER_REQUEST_DATAPOINTS_AGGREGATION = 10_000
40
58
 
41
59
  def __init__(self, client: ToolkitClient) -> None:
42
60
  super().__init__(client)
@@ -48,24 +66,181 @@ class DatapointsIO(TableUploadableStorageIO[DataPointsFileSelector, DataPointLis
48
66
  def as_id(self, item: DataPointListResponse) -> str:
49
67
  raise NotImplementedError()
50
68
 
51
- def stream_data(self, selector: DataPointsFileSelector, limit: int | None = None) -> Iterable[Page]:
52
- raise NotImplementedError(f"Download of {type(DatapointsIO).__name__.removesuffix('IO')} is not yet supported")
69
+ def get_schema(self, selector: DataPointsSelector) -> list[SchemaColumn]:
70
+ return [
71
+ SchemaColumn(name="externalId", type="string"),
72
+ SchemaColumn(name="timestamp", type="epoch"),
73
+ SchemaColumn(
74
+ name="value",
75
+ type="string"
76
+ if isinstance(selector, DataPointsDataSetSelector) and selector.data_type == "string"
77
+ else "float",
78
+ ),
79
+ ]
53
80
 
54
- def count(self, selector: DataPointsFileSelector) -> int | None:
55
- raise NotImplementedError(f"Download of {type(DatapointsIO).__name__.removesuffix('IO')} is not yet supported")
81
+ def stream_data(
82
+ self, selector: DataPointsSelector, limit: int | None = None
83
+ ) -> Iterable[Page[DataPointListResponse]]:
84
+ if not isinstance(selector, DataPointsDataSetSelector):
85
+ raise RuntimeError(
86
+ f"{type(self).__name__} only supports streaming data for DataPointsDataSetSelector selectors. Got {type(selector).__name__}."
87
+ )
88
+ timeseries_count = self.count(selector)
89
+ if limit is not None:
90
+ timeseries_count = min(timeseries_count or 0, limit)
91
+ limit_per_timeseries = (
92
+ (self.MAX_TOTAL_DATAPOINTS // timeseries_count) if timeseries_count else self.MAX_PER_REQUEST_DATAPOINTS
93
+ )
94
+ limit_per_timeseries = min(limit_per_timeseries, self.MAX_PER_REQUEST_DATAPOINTS)
95
+ config = self.client.config
96
+ for timeseries in self.client.time_series(
97
+ data_set_external_ids=[selector.data_set_external_id],
98
+ chunk_size=self.DOWNLOAD_CHUNK_SIZE,
99
+ is_string=True if selector.data_type == "string" else False,
100
+ advanced_filter=Exists(TimeSeriesProperty.external_id),
101
+ limit=limit,
102
+ # We cannot use partitions here as it is not thread safe. This spawn multiple threads
103
+ # that are not shut down until all data is downloaded. We need to be able to abort.
104
+ partitions=None,
105
+ ):
106
+ if not timeseries:
107
+ continue
108
+ # Aggregation of datapoints per timeseries
109
+ items = [
110
+ {
111
+ "id": ts.id,
112
+ "start": selector.start,
113
+ "end": selector.end,
114
+ "limit": self.MAX_PER_REQUEST_DATAPOINTS_AGGREGATION // len(timeseries),
115
+ "aggregates": ["count"],
116
+ "granularity": "1200mo",
117
+ }
118
+ for ts in timeseries
119
+ ]
120
+ responses = self.client.http_client.request_with_retries(
121
+ SimpleBodyRequest(
122
+ endpoint_url=config.create_api_url("/timeseries/data/list"),
123
+ method="POST",
124
+ accept="application/protobuf",
125
+ content_type="application/json",
126
+ body_content={"items": items}, # type: ignore[dict-item]
127
+ )
128
+ )
129
+ first_success = next((resp for resp in responses if isinstance(resp, SuccessResponse)), None)
130
+ if first_success is None:
131
+ continue
132
+ aggregate_response: DataPointListResponse = DataPointListResponse.FromString(first_success.content)
133
+ timeseries_ids_with_data: dict[int, int] = {}
134
+ for dp in aggregate_response.items:
135
+ if dp.aggregateDatapoints.datapoints:
136
+ ts_datapoint_count = int(sum(agg.count for agg in dp.aggregateDatapoints.datapoints))
137
+ timeseries_ids_with_data[dp.id] = ts_datapoint_count
138
+ total_datapoints = int(sum(timeseries_ids_with_data.values()))
139
+ if total_datapoints == 0:
140
+ continue
141
+
142
+ batch: list[dict[str, Any]] = []
143
+ batch_count = 0
144
+ for ts_id, count in timeseries_ids_with_data.items():
145
+ count = min(count, limit_per_timeseries)
146
+ ts_limit = count
147
+ left_over = 0
148
+ if (batch_count + ts_limit) > self.MAX_PER_REQUEST_DATAPOINTS:
149
+ ts_limit = self.MAX_PER_REQUEST_DATAPOINTS - batch_count
150
+ left_over = count - ts_limit
151
+ batch.append(
152
+ {
153
+ "id": ts_id,
154
+ "start": selector.start,
155
+ "end": selector.end,
156
+ "limit": ts_limit,
157
+ }
158
+ )
159
+ batch_count += ts_limit
160
+ if batch_count >= self.MAX_PER_REQUEST_DATAPOINTS:
161
+ if page := self._fetch_datapoints_batch(batch, config):
162
+ yield page
163
+ batch = []
164
+
165
+ if left_over > 0:
166
+ batch.append(
167
+ {
168
+ "id": ts_id,
169
+ "start": selector.start,
170
+ "end": selector.end,
171
+ "limit": left_over,
172
+ }
173
+ )
174
+ batch_count += left_over
175
+ if batch and (page := self._fetch_datapoints_batch(batch, config)):
176
+ yield page
177
+
178
+ def _fetch_datapoints_batch(self, batch: list[dict[str, Any]], config: Any) -> Page[DataPointListResponse] | None:
179
+ responses = self.client.http_client.request_with_retries(
180
+ SimpleBodyRequest(
181
+ endpoint_url=config.create_api_url("/timeseries/data/list"),
182
+ method="POST",
183
+ accept="application/protobuf",
184
+ content_type="application/json",
185
+ body_content={"items": batch}, # type: ignore[dict-item]
186
+ )
187
+ )
188
+ first_success = next((resp for resp in responses if isinstance(resp, SuccessResponse)), None)
189
+ if first_success is None:
190
+ return None
191
+ data_response: DataPointListResponse = DataPointListResponse.FromString(first_success.content)
192
+ return Page("Main", [data_response])
193
+
194
+ def count(self, selector: DataPointsSelector) -> int | None:
195
+ if isinstance(selector, DataPointsDataSetSelector):
196
+ return self.client.time_series.aggregate_count(
197
+ filter=TimeSeriesFilter(
198
+ data_set_ids=[{"externalId": selector.data_set_external_id}],
199
+ is_string=True if selector.data_type == "string" else False,
200
+ ),
201
+ # We only want time series that have externalID set.
202
+ advanced_filter=Exists(TimeSeriesProperty.external_id),
203
+ )
204
+ return None
56
205
 
57
206
  def data_to_json_chunk(
58
- self, data_chunk: Sequence[DataPointListResponse], selector: DataPointsFileSelector | None = None
207
+ self, data_chunk: Sequence[DataPointListResponse], selector: DataPointsSelector | None = None
59
208
  ) -> list[dict[str, JsonVal]]:
60
209
  raise ToolkitNotImplementedError(
61
210
  f"Download of {type(DatapointsIO).__name__.removesuffix('IO')} does not support json format."
62
211
  )
63
212
 
213
+ def data_to_row(
214
+ self, data_chunk: Sequence[DataPointListResponse], selector: DataPointsSelector | None = None
215
+ ) -> list[dict[str, JsonVal]]:
216
+ output: list[dict[str, JsonVal]] = []
217
+ for response in data_chunk:
218
+ for item in response.items:
219
+ if item.numericDatapoints.datapoints:
220
+ for dp in item.numericDatapoints.datapoints:
221
+ output.append(
222
+ {
223
+ "externalId": item.externalId,
224
+ "timestamp": dp.timestamp,
225
+ "value": dp.value,
226
+ }
227
+ )
228
+ if item.stringDatapoints.datapoints:
229
+ for dp in item.stringDatapoints.datapoints:
230
+ output.append(
231
+ {
232
+ "externalId": item.externalId,
233
+ "timestamp": dp.timestamp,
234
+ "value": dp.value,
235
+ }
236
+ )
237
+ return output
238
+
64
239
  def upload_items(
65
240
  self,
66
241
  data_chunk: Sequence[UploadItem[DataPointInsertionRequest]],
67
242
  http_client: HTTPClient,
68
- selector: DataPointsFileSelector | None = None,
243
+ selector: DataPointsSelector | None = None,
69
244
  ) -> Sequence[HTTPMessage]:
70
245
  results: list[HTTPMessage] = []
71
246
  for item in data_chunk:
@@ -81,12 +256,25 @@ class DatapointsIO(TableUploadableStorageIO[DataPointsFileSelector, DataPointLis
81
256
  return results
82
257
 
83
258
  def row_to_resource(
84
- self, source_id: str, row: dict[str, JsonVal], selector: DataPointsFileSelector | None = None
259
+ self, source_id: str, row: dict[str, JsonVal], selector: DataPointsSelector | None = None
85
260
  ) -> DataPointInsertionRequest:
86
261
  if selector is None:
87
262
  raise ValueError("Selector must be provided to convert row to DataPointInsertionItem.")
88
263
  # We assume that the row was read using the read_chunks method.
89
264
  rows = cast(dict[str, list[Any]], row)
265
+ if isinstance(selector, DataPointsFileSelector):
266
+ datapoints_items = self._rows_to_datapoint_items_file_selector(rows, selector, source_id)
267
+ elif isinstance(selector, DataPointsDataSetSelector):
268
+ datapoints_items = self._rows_to_datapoint_items_data_set_selector(rows, selector, source_id)
269
+ else:
270
+ raise RuntimeError(
271
+ f"Unsupported selector type {type(selector).__name__} for {type(self).__name__}. Trying to transform {source_id!r} from rows to DataPointInsertionRequest."
272
+ )
273
+ return DataPointInsertionRequest(items=datapoints_items)
274
+
275
+ def _rows_to_datapoint_items_file_selector(
276
+ self, rows: dict[str, list[Any]], selector: DataPointsFileSelector, source_id: str
277
+ ) -> list[DataPointInsertionItem]:
90
278
  if selector.timestamp_column not in rows:
91
279
  raise RuntimeError(f"Timestamp column '{selector.timestamp_column}' not found.")
92
280
 
@@ -134,7 +322,66 @@ class DatapointsIO(TableUploadableStorageIO[DataPointsFileSelector, DataPointLis
134
322
  raise RuntimeError(f"Unsupported dtype {column.dtype} for column {col}.")
135
323
 
136
324
  datapoints_items.append(DataPointInsertionItem(**args))
137
- return DataPointInsertionRequest(items=datapoints_items)
325
+
326
+ return datapoints_items
327
+
328
+ def _rows_to_datapoint_items_data_set_selector(
329
+ self, rows: dict[str, list[Any]], selector: DataPointsDataSetSelector, source_id: str
330
+ ) -> list[DataPointInsertionItem]:
331
+ if "externalId" not in rows:
332
+ raise RuntimeError("Column 'externalId' not found.")
333
+ if "value" not in rows:
334
+ raise RuntimeError("Column 'value' not found.")
335
+ if "timestamp" not in rows:
336
+ raise RuntimeError("Column 'timestamp' not found.")
337
+
338
+ external_ids = rows["externalId"]
339
+ timestamps = list(
340
+ self._convert_values(
341
+ rows["timestamp"],
342
+ self._epoc_converter,
343
+ "timestamps (column 'timestamp')",
344
+ source_id,
345
+ )
346
+ )
347
+ values = list(
348
+ self._convert_values(
349
+ rows["value"],
350
+ self._numeric_converter if selector.data_type == "numeric" else self._string_converter,
351
+ "values (column 'value')",
352
+ source_id,
353
+ )
354
+ )
355
+ sorted_datapoints = sorted(zip(external_ids, timestamps, values), key=lambda x: x[0])
356
+ datapoints_items: list[DataPointInsertionItem] = []
357
+ if selector.data_type == "numeric":
358
+ for external_id, datapoints in groupby(sorted_datapoints, key=lambda x: x[0]):
359
+ datapoints_items.append(
360
+ DataPointInsertionItem(
361
+ externalId=external_id,
362
+ numericDatapoints=NumericDatapoints(
363
+ datapoints=[
364
+ NumericDatapoint(timestamp=timestamp, value=value) for _, timestamp, value in datapoints
365
+ ]
366
+ ),
367
+ )
368
+ )
369
+ elif selector.data_type == "string":
370
+ for external_id, datapoints in groupby(sorted_datapoints, key=lambda x: x[0]):
371
+ datapoints_items.append(
372
+ DataPointInsertionItem(
373
+ externalId=external_id,
374
+ stringDatapoints=StringDatapoints(
375
+ datapoints=[
376
+ StringDatapoint(timestamp=timestamp, value=value) for _, timestamp, value in datapoints
377
+ ]
378
+ ),
379
+ )
380
+ )
381
+ else:
382
+ raise RuntimeError(f"Unsupported data_type {selector.data_type} for DataPointsDataSetSelector.")
383
+
384
+ return datapoints_items
138
385
 
139
386
  def _convert_values(
140
387
  self, values: list[Any], converter: _ValueConverter, name: str, source_id: str
@@ -165,9 +412,12 @@ class DatapointsIO(TableUploadableStorageIO[DataPointsFileSelector, DataPointLis
165
412
  )
166
413
 
167
414
  @classmethod
168
- def read_chunks(cls, reader: FileReader) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
169
- if not isinstance(reader, TableReader):
170
- raise RuntimeError("DatapointsIO can only read from TableReader instances.")
415
+ def read_chunks(
416
+ cls, reader: MultiFileReader, selector: DataPointsSelector
417
+ ) -> Iterable[list[tuple[str, dict[str, JsonVal]]]]:
418
+ if not reader.is_table:
419
+ raise RuntimeError(f"{cls.__name__} can only read from TableReader instances.")
420
+
171
421
  iterator = iter(reader.read_chunks_with_line_numbers())
172
422
  try:
173
423
  start_row, first = next(iterator)
@@ -175,6 +425,12 @@ class DatapointsIO(TableUploadableStorageIO[DataPointsFileSelector, DataPointLis
175
425
  # Empty file
176
426
  return
177
427
  column_names = list(first.keys())
428
+ if isinstance(selector, DataPointsDataSetSelector):
429
+ if set(column_names) != selector.required_columns:
430
+ raise RuntimeError(
431
+ "When uploading datapoints using a dataset manifest for datapoints, you must have exacatly the "
432
+ f"columns: {humanize_collection(selector.required_columns)} in the file. Got {humanize_collection(column_names)}. "
433
+ )
178
434
  batch: dict[str, list[Any]] = {col: [value] for col, value in first.items()}
179
435
  last_row = start_row
180
436
  for row_no, chunk in iterator:
@@ -188,5 +444,5 @@ class DatapointsIO(TableUploadableStorageIO[DataPointsFileSelector, DataPointLis
188
444
  start_row = row_no + 1
189
445
  batch = {col: [] for col in column_names}
190
446
  last_row = row_no
191
- if any(batch.values()):
192
- yield [(f"rows {start_row} to{last_row}", batch)] # type: ignore[list-item]
447
+ if any(batch.values()):
448
+ yield [(f"rows {start_row} to{last_row}", batch)] # type: ignore[list-item]