cognite-toolkit 0.6.97__py3-none-any.whl → 0.7.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognite_toolkit/_cdf.py +21 -23
- cognite_toolkit/_cdf_tk/apps/__init__.py +4 -0
- cognite_toolkit/_cdf_tk/apps/_core_app.py +19 -5
- cognite_toolkit/_cdf_tk/apps/_data_app.py +1 -1
- cognite_toolkit/_cdf_tk/apps/_dev_app.py +86 -0
- cognite_toolkit/_cdf_tk/apps/_download_app.py +693 -25
- cognite_toolkit/_cdf_tk/apps/_dump_app.py +44 -102
- cognite_toolkit/_cdf_tk/apps/_import_app.py +41 -0
- cognite_toolkit/_cdf_tk/apps/_landing_app.py +18 -4
- cognite_toolkit/_cdf_tk/apps/_migrate_app.py +424 -9
- cognite_toolkit/_cdf_tk/apps/_modules_app.py +0 -3
- cognite_toolkit/_cdf_tk/apps/_purge.py +15 -43
- cognite_toolkit/_cdf_tk/apps/_run.py +11 -0
- cognite_toolkit/_cdf_tk/apps/_upload_app.py +45 -6
- cognite_toolkit/_cdf_tk/builders/__init__.py +2 -2
- cognite_toolkit/_cdf_tk/builders/_base.py +28 -42
- cognite_toolkit/_cdf_tk/builders/_raw.py +1 -1
- cognite_toolkit/_cdf_tk/cdf_toml.py +20 -1
- cognite_toolkit/_cdf_tk/client/_toolkit_client.py +32 -12
- cognite_toolkit/_cdf_tk/client/api/infield.py +114 -17
- cognite_toolkit/_cdf_tk/client/api/{canvas.py → legacy/canvas.py} +15 -7
- cognite_toolkit/_cdf_tk/client/api/{charts.py → legacy/charts.py} +1 -1
- cognite_toolkit/_cdf_tk/client/api/{extended_data_modeling.py → legacy/extended_data_modeling.py} +1 -1
- cognite_toolkit/_cdf_tk/client/api/{extended_files.py → legacy/extended_files.py} +2 -2
- cognite_toolkit/_cdf_tk/client/api/{extended_functions.py → legacy/extended_functions.py} +15 -18
- cognite_toolkit/_cdf_tk/client/api/{extended_raw.py → legacy/extended_raw.py} +1 -1
- cognite_toolkit/_cdf_tk/client/api/{extended_timeseries.py → legacy/extended_timeseries.py} +5 -2
- cognite_toolkit/_cdf_tk/client/api/{location_filters.py → legacy/location_filters.py} +1 -1
- cognite_toolkit/_cdf_tk/client/api/legacy/robotics/__init__.py +8 -0
- cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/capabilities.py +1 -1
- cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/data_postprocessing.py +1 -1
- cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/frames.py +1 -1
- cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/locations.py +1 -1
- cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/maps.py +1 -1
- cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/robots.py +2 -2
- cognite_toolkit/_cdf_tk/client/api/{search_config.py → legacy/search_config.py} +5 -1
- cognite_toolkit/_cdf_tk/client/api/migration.py +177 -4
- cognite_toolkit/_cdf_tk/client/api/project.py +9 -8
- cognite_toolkit/_cdf_tk/client/api/search.py +2 -2
- cognite_toolkit/_cdf_tk/client/api/streams.py +88 -0
- cognite_toolkit/_cdf_tk/client/api/three_d.py +384 -0
- cognite_toolkit/_cdf_tk/client/data_classes/api_classes.py +13 -0
- cognite_toolkit/_cdf_tk/client/data_classes/base.py +37 -33
- cognite_toolkit/_cdf_tk/client/data_classes/charts_data.py +95 -213
- cognite_toolkit/_cdf_tk/client/data_classes/infield.py +32 -18
- cognite_toolkit/_cdf_tk/client/data_classes/instance_api.py +18 -13
- cognite_toolkit/_cdf_tk/client/data_classes/legacy/__init__.py +0 -0
- cognite_toolkit/_cdf_tk/client/data_classes/{canvas.py → legacy/canvas.py} +47 -4
- cognite_toolkit/_cdf_tk/client/data_classes/{charts.py → legacy/charts.py} +3 -3
- cognite_toolkit/_cdf_tk/client/data_classes/{migration.py → legacy/migration.py} +10 -2
- cognite_toolkit/_cdf_tk/client/data_classes/streams.py +90 -0
- cognite_toolkit/_cdf_tk/client/data_classes/three_d.py +112 -0
- cognite_toolkit/_cdf_tk/client/testing.py +42 -18
- cognite_toolkit/_cdf_tk/commands/__init__.py +7 -6
- cognite_toolkit/_cdf_tk/commands/_changes.py +3 -42
- cognite_toolkit/_cdf_tk/commands/_download.py +21 -11
- cognite_toolkit/_cdf_tk/commands/_migrate/__init__.py +0 -2
- cognite_toolkit/_cdf_tk/commands/_migrate/command.py +22 -20
- cognite_toolkit/_cdf_tk/commands/_migrate/conversion.py +140 -92
- cognite_toolkit/_cdf_tk/commands/_migrate/creators.py +1 -1
- cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py +108 -26
- cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py +448 -45
- cognite_toolkit/_cdf_tk/commands/_migrate/data_model.py +1 -0
- cognite_toolkit/_cdf_tk/commands/_migrate/default_mappings.py +6 -6
- cognite_toolkit/_cdf_tk/commands/_migrate/issues.py +52 -1
- cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py +377 -11
- cognite_toolkit/_cdf_tk/commands/_migrate/selectors.py +9 -4
- cognite_toolkit/_cdf_tk/commands/_profile.py +1 -1
- cognite_toolkit/_cdf_tk/commands/_purge.py +36 -39
- cognite_toolkit/_cdf_tk/commands/_questionary_style.py +16 -0
- cognite_toolkit/_cdf_tk/commands/_upload.py +109 -86
- cognite_toolkit/_cdf_tk/commands/about.py +221 -0
- cognite_toolkit/_cdf_tk/commands/auth.py +19 -12
- cognite_toolkit/_cdf_tk/commands/build_cmd.py +16 -62
- cognite_toolkit/_cdf_tk/commands/build_v2/__init__.py +0 -0
- cognite_toolkit/_cdf_tk/commands/build_v2/build_cmd.py +241 -0
- cognite_toolkit/_cdf_tk/commands/build_v2/build_input.py +85 -0
- cognite_toolkit/_cdf_tk/commands/build_v2/build_issues.py +27 -0
- cognite_toolkit/_cdf_tk/commands/clean.py +63 -16
- cognite_toolkit/_cdf_tk/commands/deploy.py +20 -17
- cognite_toolkit/_cdf_tk/commands/dump_resource.py +10 -8
- cognite_toolkit/_cdf_tk/commands/init.py +225 -3
- cognite_toolkit/_cdf_tk/commands/modules.py +20 -44
- cognite_toolkit/_cdf_tk/commands/pull.py +6 -19
- cognite_toolkit/_cdf_tk/commands/resources.py +179 -0
- cognite_toolkit/_cdf_tk/commands/run.py +1 -1
- cognite_toolkit/_cdf_tk/constants.py +20 -1
- cognite_toolkit/_cdf_tk/cruds/__init__.py +19 -5
- cognite_toolkit/_cdf_tk/cruds/_base_cruds.py +14 -70
- cognite_toolkit/_cdf_tk/cruds/_data_cruds.py +10 -19
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/__init__.py +4 -1
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/agent.py +11 -9
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/auth.py +5 -15
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/classic.py +45 -44
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/configuration.py +5 -12
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/data_organization.py +4 -13
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/datamodel.py +206 -67
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/extraction_pipeline.py +6 -18
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/fieldops.py +126 -35
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/file.py +7 -28
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/function.py +23 -30
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/hosted_extractors.py +12 -30
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/industrial_tool.py +4 -8
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/location.py +4 -16
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/migration.py +5 -13
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/raw.py +5 -11
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/relationship.py +3 -8
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/robotics.py +16 -45
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/streams.py +94 -0
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/three_d_model.py +3 -7
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/timeseries.py +5 -15
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/transformation.py +75 -32
- cognite_toolkit/_cdf_tk/cruds/_resource_cruds/workflow.py +20 -40
- cognite_toolkit/_cdf_tk/cruds/_worker.py +24 -36
- cognite_toolkit/_cdf_tk/data_classes/_module_toml.py +1 -0
- cognite_toolkit/_cdf_tk/feature_flags.py +16 -36
- cognite_toolkit/_cdf_tk/plugins.py +2 -1
- cognite_toolkit/_cdf_tk/resource_classes/__init__.py +4 -0
- cognite_toolkit/_cdf_tk/resource_classes/capabilities.py +12 -0
- cognite_toolkit/_cdf_tk/resource_classes/functions.py +3 -1
- cognite_toolkit/_cdf_tk/resource_classes/infield_cdm_location_config.py +109 -0
- cognite_toolkit/_cdf_tk/resource_classes/migration.py +8 -17
- cognite_toolkit/_cdf_tk/resource_classes/search_config.py +1 -1
- cognite_toolkit/_cdf_tk/resource_classes/streams.py +29 -0
- cognite_toolkit/_cdf_tk/resource_classes/workflow_version.py +164 -5
- cognite_toolkit/_cdf_tk/storageio/__init__.py +9 -21
- cognite_toolkit/_cdf_tk/storageio/_annotations.py +19 -16
- cognite_toolkit/_cdf_tk/storageio/_applications.py +340 -28
- cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +67 -104
- cognite_toolkit/_cdf_tk/storageio/_base.py +61 -29
- cognite_toolkit/_cdf_tk/storageio/_datapoints.py +276 -20
- cognite_toolkit/_cdf_tk/storageio/_file_content.py +435 -0
- cognite_toolkit/_cdf_tk/storageio/_instances.py +35 -3
- cognite_toolkit/_cdf_tk/storageio/_raw.py +26 -0
- cognite_toolkit/_cdf_tk/storageio/selectors/__init__.py +71 -4
- cognite_toolkit/_cdf_tk/storageio/selectors/_base.py +14 -2
- cognite_toolkit/_cdf_tk/storageio/selectors/_canvas.py +14 -0
- cognite_toolkit/_cdf_tk/storageio/selectors/_charts.py +14 -0
- cognite_toolkit/_cdf_tk/storageio/selectors/_datapoints.py +23 -3
- cognite_toolkit/_cdf_tk/storageio/selectors/_file_content.py +164 -0
- cognite_toolkit/_cdf_tk/storageio/selectors/_three_d.py +34 -0
- cognite_toolkit/_cdf_tk/tk_warnings/other.py +4 -0
- cognite_toolkit/_cdf_tk/tracker.py +2 -2
- cognite_toolkit/_cdf_tk/utils/cdf.py +1 -1
- cognite_toolkit/_cdf_tk/utils/dtype_conversion.py +9 -3
- cognite_toolkit/_cdf_tk/utils/fileio/__init__.py +2 -0
- cognite_toolkit/_cdf_tk/utils/fileio/_base.py +5 -1
- cognite_toolkit/_cdf_tk/utils/fileio/_readers.py +112 -20
- cognite_toolkit/_cdf_tk/utils/fileio/_writers.py +15 -15
- cognite_toolkit/_cdf_tk/utils/http_client/__init__.py +28 -0
- cognite_toolkit/_cdf_tk/utils/http_client/_client.py +285 -18
- cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py +56 -4
- cognite_toolkit/_cdf_tk/utils/http_client/_data_classes2.py +247 -0
- cognite_toolkit/_cdf_tk/utils/http_client/_tracker.py +5 -2
- cognite_toolkit/_cdf_tk/utils/interactive_select.py +60 -18
- cognite_toolkit/_cdf_tk/utils/sql_parser.py +2 -3
- cognite_toolkit/_cdf_tk/utils/useful_types.py +6 -2
- cognite_toolkit/_cdf_tk/validation.py +83 -1
- cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
- cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
- cognite_toolkit/_resources/cdf.toml +5 -4
- cognite_toolkit/_version.py +1 -1
- cognite_toolkit/config.dev.yaml +13 -0
- {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.39.dist-info}/METADATA +24 -24
- cognite_toolkit-0.7.39.dist-info/RECORD +322 -0
- cognite_toolkit-0.7.39.dist-info/WHEEL +4 -0
- {cognite_toolkit-0.6.97.dist-info → cognite_toolkit-0.7.39.dist-info}/entry_points.txt +1 -0
- cognite_toolkit/_cdf_tk/client/api/robotics/__init__.py +0 -3
- cognite_toolkit/_cdf_tk/commands/_migrate/canvas.py +0 -201
- cognite_toolkit/_cdf_tk/commands/dump_data.py +0 -489
- cognite_toolkit/_cdf_tk/commands/featureflag.py +0 -27
- cognite_toolkit/_cdf_tk/prototypes/import_app.py +0 -41
- cognite_toolkit/_cdf_tk/utils/table_writers.py +0 -434
- cognite_toolkit-0.6.97.dist-info/RECORD +0 -306
- cognite_toolkit-0.6.97.dist-info/WHEEL +0 -4
- cognite_toolkit-0.6.97.dist-info/licenses/LICENSE +0 -18
- /cognite_toolkit/_cdf_tk/{prototypes/commands → client/api/legacy}/__init__.py +0 -0
- /cognite_toolkit/_cdf_tk/client/api/{dml.py → legacy/dml.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/api/{fixed_transformations.py → legacy/fixed_transformations.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/api.py +0 -0
- /cognite_toolkit/_cdf_tk/client/api/{robotics → legacy/robotics}/utlis.py +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{apm_config_v1.py → legacy/apm_config_v1.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{extendable_cognite_file.py → legacy/extendable_cognite_file.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{extended_filemetadata.py → legacy/extended_filemetadata.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{extended_filemetdata.py → legacy/extended_filemetdata.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{extended_timeseries.py → legacy/extended_timeseries.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{functions.py → legacy/functions.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{graphql_data_models.py → legacy/graphql_data_models.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{instances.py → legacy/instances.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{location_filters.py → legacy/location_filters.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{pending_instances_ids.py → legacy/pending_instances_ids.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{project.py → legacy/project.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{raw.py → legacy/raw.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{robotics.py → legacy/robotics.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{search_config.py → legacy/search_config.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{sequences.py → legacy/sequences.py} +0 -0
- /cognite_toolkit/_cdf_tk/client/data_classes/{streamlit_.py → legacy/streamlit_.py} +0 -0
- /cognite_toolkit/_cdf_tk/{prototypes/commands/import_.py → commands/_import_cmd.py} +0 -0
|
@@ -4,7 +4,7 @@ from pathlib import Path
|
|
|
4
4
|
from pydantic import BaseModel, ConfigDict
|
|
5
5
|
from pydantic.alias_generators import to_camel
|
|
6
6
|
|
|
7
|
-
from cognite_toolkit._cdf_tk.constants import
|
|
7
|
+
from cognite_toolkit._cdf_tk.constants import DATA_MANIFEST_SUFFIX
|
|
8
8
|
from cognite_toolkit._cdf_tk.utils.file import safe_write, sanitize_filename, yaml_safe_dump
|
|
9
9
|
from cognite_toolkit._cdf_tk.utils.text import to_sentence_case
|
|
10
10
|
from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
|
|
@@ -41,7 +41,7 @@ class DataSelector(SelectorObject, ABC):
|
|
|
41
41
|
directory: The directory where the YAML file will be saved.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
filepath = directory / f"{sanitize_filename(str(self))}
|
|
44
|
+
filepath = directory / f"{sanitize_filename(str(self))}{DATA_MANIFEST_SUFFIX}"
|
|
45
45
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
46
46
|
safe_write(file=filepath, content=yaml_safe_dump(self.model_dump(mode="json", by_alias=True)), encoding="utf-8")
|
|
47
47
|
return filepath
|
|
@@ -66,3 +66,15 @@ class DataSelector(SelectorObject, ABC):
|
|
|
66
66
|
def __str__(self) -> str:
|
|
67
67
|
# We want to force subclasses to implement __str__
|
|
68
68
|
raise NotImplementedError()
|
|
69
|
+
|
|
70
|
+
def find_data_files(self, input_dir: Path, manifest_file: Path) -> list[Path]:
|
|
71
|
+
"""Find data files in the specified input directory that match this selector.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
input_dir: The directory to search for data files.
|
|
75
|
+
manifest_file: The manifest file that describes the data files.
|
|
76
|
+
Returns:
|
|
77
|
+
A list of Paths to the data files that match this selector.
|
|
78
|
+
"""
|
|
79
|
+
data_file_prefix = manifest_file.name.removesuffix(DATA_MANIFEST_SUFFIX)
|
|
80
|
+
return [file for file in input_dir.glob(f"{data_file_prefix}*") if not file.name.endswith(DATA_MANIFEST_SUFFIX)]
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
from abc import ABC
|
|
2
3
|
from typing import Literal
|
|
3
4
|
|
|
@@ -6,3 +7,16 @@ from ._base import DataSelector
|
|
|
6
7
|
|
|
7
8
|
class CanvasSelector(DataSelector, ABC):
|
|
8
9
|
kind: Literal["IndustrialCanvas"] = "IndustrialCanvas"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CanvasExternalIdSelector(CanvasSelector):
|
|
13
|
+
type: Literal["canvasExternalId"] = "canvasExternalId"
|
|
14
|
+
external_ids: tuple[str, ...]
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def group(self) -> str:
|
|
18
|
+
return "Canvas"
|
|
19
|
+
|
|
20
|
+
def __str__(self) -> str:
|
|
21
|
+
hash_ = hashlib.md5(",".join(sorted(self.external_ids)).encode()).hexdigest()[:8]
|
|
22
|
+
return f"canvas_count_{len(self.external_ids)}_hash_{hash_}"
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
from abc import ABC
|
|
2
3
|
from typing import Literal
|
|
3
4
|
|
|
@@ -29,3 +30,16 @@ class AllChartsSelector(ChartSelector):
|
|
|
29
30
|
|
|
30
31
|
def __str__(self) -> str:
|
|
31
32
|
return "all"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ChartExternalIdSelector(ChartSelector):
|
|
36
|
+
type: Literal["chartExternalId"] = "chartExternalId"
|
|
37
|
+
external_ids: tuple[str, ...]
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def group(self) -> str:
|
|
41
|
+
return "Charts"
|
|
42
|
+
|
|
43
|
+
def __str__(self) -> str:
|
|
44
|
+
hash_ = hashlib.md5(",".join(sorted(self.external_ids)).encode()).hexdigest()[:8]
|
|
45
|
+
return f"chart_count_{len(self.external_ids)}_hash_{hash_}"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from functools import cached_property
|
|
3
|
-
from typing import Annotated, Any, Literal
|
|
3
|
+
from typing import Annotated, Any, ClassVar, Literal
|
|
4
4
|
|
|
5
5
|
from cognite.client._proto.data_points_pb2 import (
|
|
6
6
|
InstanceId,
|
|
@@ -50,9 +50,12 @@ TimeSeriesColumn = Annotated[
|
|
|
50
50
|
]
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
class
|
|
53
|
+
class DataPointsSelector(DataSelector, ABC):
|
|
54
|
+
kind: Literal["Datapoints"] = "Datapoints"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class DataPointsFileSelector(DataPointsSelector):
|
|
54
58
|
type: Literal["datapointsFile"] = "datapointsFile"
|
|
55
|
-
kind: Literal["datapoints"] = "datapoints"
|
|
56
59
|
|
|
57
60
|
timestamp_column: str
|
|
58
61
|
columns: tuple[TimeSeriesColumn, ...]
|
|
@@ -67,3 +70,20 @@ class DataPointsFileSelector(DataSelector):
|
|
|
67
70
|
@cached_property
|
|
68
71
|
def id_by_column(self) -> dict[str, Column]:
|
|
69
72
|
return {col.column: col for col in self.columns}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class DataPointsDataSetSelector(DataPointsSelector):
|
|
76
|
+
required_columns: ClassVar[frozenset[str]] = frozenset({"externalId", "timestamp", "value"})
|
|
77
|
+
type: Literal["datapointsDataSet"] = "datapointsDataSet"
|
|
78
|
+
|
|
79
|
+
data_set_external_id: str
|
|
80
|
+
start: int | str | None = None
|
|
81
|
+
end: int | str | None = None
|
|
82
|
+
data_type: Literal["numeric", "string"] = "numeric"
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def group(self) -> str:
|
|
86
|
+
return f"DataSet_{self.data_set_external_id}"
|
|
87
|
+
|
|
88
|
+
def __str__(self) -> str:
|
|
89
|
+
return f"datapoints_dataset_{self.data_set_external_id}"
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Annotated, Any, Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import ConfigDict, Field, field_validator, model_validator
|
|
8
|
+
|
|
9
|
+
from ._base import DataSelector, SelectorObject
|
|
10
|
+
from ._instances import SelectedView
|
|
11
|
+
|
|
12
|
+
FILENAME_VARIABLE = "$FILENAME"
|
|
13
|
+
FILEPATH = "$FILEPATH"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FileContentSelector(DataSelector, ABC):
|
|
17
|
+
kind: Literal["FileContent"] = "FileContent"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FileTemplateSelector(FileContentSelector, ABC):
|
|
21
|
+
file_directory: Path
|
|
22
|
+
|
|
23
|
+
def find_data_files(self, input_dir: Path, manifest_file: Path) -> list[Path]:
|
|
24
|
+
file_dir = input_dir / self.file_directory
|
|
25
|
+
if not file_dir.is_dir():
|
|
26
|
+
return []
|
|
27
|
+
return [file for file in file_dir.iterdir() if file.is_file()]
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def create_instance(self, filepath: Path) -> dict[str, Any]: ...
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class FileTemplate(SelectorObject):
|
|
34
|
+
model_config = ConfigDict(extra="allow")
|
|
35
|
+
|
|
36
|
+
def create_instance(self, filename: str) -> dict[str, Any]:
|
|
37
|
+
json_str = self.model_dump_json(by_alias=True)
|
|
38
|
+
return json.loads(json_str.replace(FILENAME_VARIABLE, filename))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class FileMetadataTemplate(FileTemplate):
|
|
42
|
+
name: str
|
|
43
|
+
external_id: str
|
|
44
|
+
|
|
45
|
+
@field_validator("name", "external_id")
|
|
46
|
+
@classmethod
|
|
47
|
+
def _validate_filename_in_fields(cls, v: str) -> str:
|
|
48
|
+
if FILENAME_VARIABLE not in v:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f"{FILENAME_VARIABLE!s} must be present in 'name' and 'external_id' fields. "
|
|
51
|
+
f"This allows for dynamic substitution based on the file name."
|
|
52
|
+
)
|
|
53
|
+
return v
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class FileMetadataTemplateSelector(FileTemplateSelector):
|
|
57
|
+
type: Literal["fileMetadataTemplate"] = "fileMetadataTemplate"
|
|
58
|
+
template: FileMetadataTemplate
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def group(self) -> str:
|
|
62
|
+
return "FileMetadata"
|
|
63
|
+
|
|
64
|
+
def __str__(self) -> str:
|
|
65
|
+
return "metadata_template"
|
|
66
|
+
|
|
67
|
+
def create_instance(self, filepath: Path) -> dict[str, Any]:
|
|
68
|
+
return self.template.create_instance(filepath.name)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class TemplateNodeId(SelectorObject):
|
|
72
|
+
space: str
|
|
73
|
+
external_id: str
|
|
74
|
+
|
|
75
|
+
@field_validator("external_id")
|
|
76
|
+
@classmethod
|
|
77
|
+
def _validate_filename_in_fields(cls, v: str) -> str:
|
|
78
|
+
if FILENAME_VARIABLE not in v:
|
|
79
|
+
raise ValueError(
|
|
80
|
+
f"{FILENAME_VARIABLE!s} must be present in 'external_id' field. "
|
|
81
|
+
f"This allows for dynamic substitution based on the file name."
|
|
82
|
+
)
|
|
83
|
+
return v
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class FileDataModelingTemplate(FileTemplate):
|
|
87
|
+
instance_id: TemplateNodeId
|
|
88
|
+
# Name is required for FileMetadata but not for CogniteFiles. This is the same default behavior as in CDF.
|
|
89
|
+
name: str = "untitled"
|
|
90
|
+
|
|
91
|
+
@model_validator(mode="before")
|
|
92
|
+
def _move_space_external_id(cls, data: dict[str, Any]) -> dict[str, Any]:
|
|
93
|
+
if "space" in data and "externalId" in data:
|
|
94
|
+
data["instanceId"] = {"space": data.pop("space"), "externalId": data.pop("externalId")}
|
|
95
|
+
elif "space" in data and "external_id" in data:
|
|
96
|
+
data["instance_id"] = {"space": data.pop("space"), "external_id": data.pop("external_id")}
|
|
97
|
+
return data
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class FileDataModelingTemplateSelector(FileTemplateSelector):
|
|
101
|
+
type: Literal["fileDataModelingTemplate"] = "fileDataModelingTemplate"
|
|
102
|
+
view_id: SelectedView = SelectedView(space="cdf_cdm", external_id="CogniteFile", version="v1")
|
|
103
|
+
template: FileDataModelingTemplate
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def group(self) -> str:
|
|
107
|
+
return "FileDataModeling"
|
|
108
|
+
|
|
109
|
+
def __str__(self) -> str:
|
|
110
|
+
return "data_modeling_template"
|
|
111
|
+
|
|
112
|
+
def create_instance(self, filepath: Path) -> dict[str, Any]:
|
|
113
|
+
return self.template.create_instance(filepath.name)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class FileIdentifierDefinition(SelectorObject):
|
|
117
|
+
id_type: str
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class FileInternalID(FileIdentifierDefinition):
|
|
121
|
+
id_type: Literal["internalId"] = "internalId"
|
|
122
|
+
internal_id: int = Field(alias="id")
|
|
123
|
+
|
|
124
|
+
def __str__(self) -> str:
|
|
125
|
+
return f"internalId_{self.internal_id}"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class FileExternalID(FileIdentifierDefinition):
|
|
129
|
+
id_type: Literal["externalId"] = "externalId"
|
|
130
|
+
external_id: str
|
|
131
|
+
|
|
132
|
+
def __str__(self) -> str:
|
|
133
|
+
return f"externalId_{self.external_id}"
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class NodeId(SelectorObject):
|
|
137
|
+
space: str
|
|
138
|
+
external_id: str
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class FileInstanceID(FileIdentifierDefinition):
|
|
142
|
+
id_type: Literal["instanceId"] = "instanceId"
|
|
143
|
+
instance_id: NodeId
|
|
144
|
+
|
|
145
|
+
def __str__(self) -> str:
|
|
146
|
+
return f"instanceId_{self.instance_id.space}_{self.instance_id.external_id}"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
FileIdentifier = Annotated[FileInstanceID | FileExternalID | FileInternalID, Field(discriminator="id_type")]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class FileIdentifierSelector(FileContentSelector):
|
|
153
|
+
type: Literal["fileIdentifier"] = "fileIdentifier"
|
|
154
|
+
file_directory: str = "file_content"
|
|
155
|
+
use_metadata_directory: bool = True
|
|
156
|
+
identifiers: tuple[FileIdentifier, ...]
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def group(self) -> str:
|
|
160
|
+
return "Files"
|
|
161
|
+
|
|
162
|
+
def __str__(self) -> str:
|
|
163
|
+
hash_ = hashlib.md5(",".join(sorted(str(self.identifiers))).encode()).hexdigest()[:8]
|
|
164
|
+
return f"file_{len(self.identifiers)}_identifiers_{hash_}"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
from ._base import DataSelector
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ThreeDSelector(DataSelector, ABC):
|
|
9
|
+
kind: Literal["3D"] = "3D"
|
|
10
|
+
|
|
11
|
+
@property
|
|
12
|
+
def group(self) -> str:
|
|
13
|
+
return "3DModels"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ThreeDModelFilteredSelector(ThreeDSelector):
|
|
17
|
+
type: Literal["3DFiltered"] = "3DFiltered"
|
|
18
|
+
model_type: Literal["Classic", "DataModel"] = "Classic"
|
|
19
|
+
published: bool | None = None
|
|
20
|
+
|
|
21
|
+
def __str__(self) -> str:
|
|
22
|
+
suffix = f"3DModels_{self.model_type}"
|
|
23
|
+
if self.published is not None:
|
|
24
|
+
return f"{suffix}_published_{self.published}"
|
|
25
|
+
return suffix
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ThreeDModelIdSelector(ThreeDSelector):
|
|
29
|
+
type: Literal["3DId"] = "3DId"
|
|
30
|
+
ids: tuple[int, ...]
|
|
31
|
+
|
|
32
|
+
def __str__(self) -> str:
|
|
33
|
+
hash_ = hashlib.md5(",".join(sorted(map(str, self.ids))).encode()).hexdigest()[:8]
|
|
34
|
+
return f"3DModels_ids_count_{len(self.ids)}_hash_{hash_}"
|
|
@@ -136,15 +136,19 @@ class MissingCapabilityWarning(GeneralWarning):
|
|
|
136
136
|
|
|
137
137
|
@dataclass(frozen=True)
|
|
138
138
|
class ToolkitDeprecationWarning(ToolkitWarning, DeprecationWarning):
|
|
139
|
+
severity = SeverityLevel.HIGH
|
|
139
140
|
message: ClassVar[str] = "The '{feature}' is deprecated and will be removed in a future version."
|
|
140
141
|
|
|
141
142
|
feature: str
|
|
142
143
|
alternative: str | None = None
|
|
144
|
+
removal_version: str | None = None
|
|
143
145
|
|
|
144
146
|
def get_message(self) -> str:
|
|
145
147
|
msg = self.message.format(feature=self.feature)
|
|
146
148
|
if self.alternative:
|
|
147
149
|
msg += f"\nUse {self.alternative!r} instead."
|
|
150
|
+
if self.removal_version:
|
|
151
|
+
msg += f"\nIt will be removed in version {self.removal_version}."
|
|
148
152
|
|
|
149
153
|
return msg
|
|
150
154
|
|
|
@@ -38,11 +38,11 @@ class Tracker:
|
|
|
38
38
|
|
|
39
39
|
@property
|
|
40
40
|
def opted_out(self) -> bool:
|
|
41
|
-
return
|
|
41
|
+
return False
|
|
42
42
|
|
|
43
43
|
@property
|
|
44
44
|
def opted_in(self) -> bool:
|
|
45
|
-
return
|
|
45
|
+
return True
|
|
46
46
|
|
|
47
47
|
def track_cli_command(
|
|
48
48
|
self,
|
|
@@ -21,7 +21,7 @@ from filelock import BaseFileLock, FileLock, Timeout
|
|
|
21
21
|
from rich.console import Console
|
|
22
22
|
|
|
23
23
|
from cognite_toolkit._cdf_tk.client import ToolkitClient, ToolkitClientConfig
|
|
24
|
-
from cognite_toolkit._cdf_tk.client.data_classes.raw import RawTable
|
|
24
|
+
from cognite_toolkit._cdf_tk.client.data_classes.legacy.raw import RawTable
|
|
25
25
|
from cognite_toolkit._cdf_tk.constants import ENV_VAR_PATTERN, MAX_ROW_ITERATION_RUN_QUERY, MAX_RUN_QUERY_FREQUENCY_MIN
|
|
26
26
|
from cognite_toolkit._cdf_tk.exceptions import (
|
|
27
27
|
ToolkitError,
|
|
@@ -20,7 +20,13 @@ from dateutil import parser
|
|
|
20
20
|
from cognite_toolkit._cdf_tk.constants import CDF_UNIT_SPACE
|
|
21
21
|
from cognite_toolkit._cdf_tk.exceptions import ToolkitNotSupported
|
|
22
22
|
from cognite_toolkit._cdf_tk.utils._auxiliary import get_concrete_subclasses
|
|
23
|
-
from cognite_toolkit._cdf_tk.utils.useful_types import
|
|
23
|
+
from cognite_toolkit._cdf_tk.utils.useful_types import (
|
|
24
|
+
AssetCentricType,
|
|
25
|
+
AssetCentricTypeExtended,
|
|
26
|
+
DataType,
|
|
27
|
+
JsonVal,
|
|
28
|
+
PythonTypes,
|
|
29
|
+
)
|
|
24
30
|
|
|
25
31
|
from .collection import humanize_collection
|
|
26
32
|
|
|
@@ -35,7 +41,7 @@ def asset_centric_convert_to_primary_property(
|
|
|
35
41
|
type_: PropertyType,
|
|
36
42
|
nullable: bool,
|
|
37
43
|
destination_container_property: tuple[ContainerId, str],
|
|
38
|
-
source_property: tuple[
|
|
44
|
+
source_property: tuple[AssetCentricTypeExtended, str],
|
|
39
45
|
direct_relation_lookup: Mapping[str | int, DirectRelationReference] | None = None,
|
|
40
46
|
) -> PropertyValueWrite:
|
|
41
47
|
if (source_property, destination_container_property) in SPECIAL_CONVERTER_BY_SOURCE_DESTINATION:
|
|
@@ -574,7 +580,7 @@ CONVERTER_BY_DTYPE: Mapping[str, type[_ValueConverter]] = {
|
|
|
574
580
|
for cls_ in _ValueConverter.__subclasses__()
|
|
575
581
|
}
|
|
576
582
|
SPECIAL_CONVERTER_BY_SOURCE_DESTINATION: Mapping[
|
|
577
|
-
tuple[tuple[
|
|
583
|
+
tuple[tuple[AssetCentricTypeExtended, str], tuple[ContainerId, str]],
|
|
578
584
|
type[_SpecialCaseConverter],
|
|
579
585
|
] = {
|
|
580
586
|
(subclass.source_property, subclass.destination_container_property): subclass
|
|
@@ -12,6 +12,7 @@ from ._readers import (
|
|
|
12
12
|
CSVReader,
|
|
13
13
|
FailedParsing,
|
|
14
14
|
FileReader,
|
|
15
|
+
MultiFileReader,
|
|
15
16
|
NDJsonReader,
|
|
16
17
|
ParquetReader,
|
|
17
18
|
YAMLReader,
|
|
@@ -45,6 +46,7 @@ __all__ = [
|
|
|
45
46
|
"FileReader",
|
|
46
47
|
"FileWriter",
|
|
47
48
|
"GzipCompression",
|
|
49
|
+
"MultiFileReader",
|
|
48
50
|
"NDJsonReader",
|
|
49
51
|
"NDJsonWriter",
|
|
50
52
|
"ParquetReader",
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import csv
|
|
2
2
|
import json
|
|
3
|
+
import re
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
from collections import Counter, defaultdict
|
|
5
6
|
from collections.abc import Callable, Iterator, Mapping, Sequence
|
|
6
7
|
from dataclasses import dataclass
|
|
7
|
-
from functools import partial
|
|
8
|
+
from functools import cached_property, partial
|
|
8
9
|
from io import TextIOWrapper
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import Any
|
|
@@ -37,10 +38,10 @@ class FileReader(FileIO, ABC):
|
|
|
37
38
|
@abstractmethod
|
|
38
39
|
def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
|
|
39
40
|
"""Read chunks from the file."""
|
|
40
|
-
|
|
41
|
+
...
|
|
41
42
|
|
|
42
43
|
@classmethod
|
|
43
|
-
def from_filepath(cls, filepath: Path) -> "FileReader":
|
|
44
|
+
def from_filepath(cls, filepath: Path) -> "type[FileReader]":
|
|
44
45
|
if len(filepath.suffixes) == 0:
|
|
45
46
|
raise ToolkitValueError(
|
|
46
47
|
f"File has no suffix. Available formats: {humanize_collection(FILE_READ_CLS_BY_FORMAT.keys())}."
|
|
@@ -55,33 +56,110 @@ class FileReader(FileIO, ABC):
|
|
|
55
56
|
)
|
|
56
57
|
|
|
57
58
|
if suffix in FILE_READ_CLS_BY_FORMAT:
|
|
58
|
-
return FILE_READ_CLS_BY_FORMAT[suffix]
|
|
59
|
+
return FILE_READ_CLS_BY_FORMAT[suffix]
|
|
59
60
|
|
|
60
61
|
raise ToolkitValueError(
|
|
61
62
|
f"Unknown file format: {suffix}. Available formats: {humanize_collection(FILE_READ_CLS_BY_FORMAT.keys())}."
|
|
62
63
|
)
|
|
63
64
|
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def count(self) -> int:
|
|
67
|
+
"""Count the number of chunks in the file."""
|
|
68
|
+
...
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class MultiFileReader(FileReader):
|
|
72
|
+
"""Reads multiple files and yields chunks from each file sequentially.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
input_files (Sequence[Path]): The list of file paths to read.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
PART_PATTERN = re.compile(r"part-(\d{4})$")
|
|
79
|
+
|
|
80
|
+
def __init__(self, input_files: Sequence[Path]) -> None:
|
|
81
|
+
super().__init__(input_file=input_files[0])
|
|
82
|
+
self.input_files = input_files
|
|
83
|
+
|
|
84
|
+
@cached_property
|
|
85
|
+
def reader_class(self) -> type[FileReader]:
|
|
86
|
+
"""Determine the reader class based on the input files."""
|
|
87
|
+
reader_classes = Counter([FileReader.from_filepath(input_file) for input_file in self.input_files])
|
|
88
|
+
if len(reader_classes) > 1:
|
|
89
|
+
raise ToolkitValueError(
|
|
90
|
+
"All input files must be of the same format. "
|
|
91
|
+
f"Found formats: {humanize_collection([cls.FORMAT for cls in reader_classes.keys()])}."
|
|
92
|
+
)
|
|
93
|
+
return reader_classes.most_common(1)[0][0]
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def is_table(self) -> bool:
|
|
97
|
+
try:
|
|
98
|
+
return issubclass(self.reader_class, TableReader)
|
|
99
|
+
except ValueError:
|
|
100
|
+
# The input files are not a known format, so it is not a table.
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def format(self) -> str:
|
|
105
|
+
return self.reader_class.FORMAT
|
|
106
|
+
|
|
107
|
+
def read_chunks(self) -> Iterator[dict[str, JsonVal]]:
|
|
108
|
+
for input_file in sorted(self.input_files, key=self._part_no):
|
|
109
|
+
yield from self.reader_class(input_file).read_chunks()
|
|
110
|
+
|
|
111
|
+
def _part_no(self, path: Path) -> int:
|
|
112
|
+
match = self.PART_PATTERN.search(path.stem)
|
|
113
|
+
if match:
|
|
114
|
+
return int(match.group(1))
|
|
115
|
+
return 99999
|
|
116
|
+
|
|
117
|
+
def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
|
|
118
|
+
raise NotImplementedError("This method is not used in MultiFileReader.")
|
|
119
|
+
|
|
120
|
+
def count(self) -> int:
|
|
121
|
+
"""Count the total number of chunks in all files."""
|
|
122
|
+
total_count = 0
|
|
123
|
+
for input_file in self.input_files:
|
|
124
|
+
reader = self.reader_class(input_file)
|
|
125
|
+
total_count += reader.count()
|
|
126
|
+
return total_count
|
|
127
|
+
|
|
64
128
|
|
|
65
129
|
class NDJsonReader(FileReader):
|
|
66
|
-
|
|
130
|
+
FORMAT = ".ndjson"
|
|
67
131
|
|
|
68
132
|
def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
|
|
69
133
|
for line in file:
|
|
70
134
|
if stripped := line.strip():
|
|
71
135
|
yield json.loads(stripped)
|
|
72
136
|
|
|
137
|
+
def count(self) -> int:
|
|
138
|
+
"""Count the number of lines (chunks) in the NDJSON file."""
|
|
139
|
+
compression = Compression.from_filepath(self.input_file)
|
|
140
|
+
with compression.open("r") as file:
|
|
141
|
+
line_count = sum(1 for line in file if line.strip())
|
|
142
|
+
return line_count
|
|
143
|
+
|
|
73
144
|
|
|
74
145
|
class YAMLBaseReader(FileReader, ABC):
|
|
75
146
|
def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
|
|
76
147
|
yield from yaml.safe_load_all(file)
|
|
77
148
|
|
|
149
|
+
def count(self) -> int:
|
|
150
|
+
"""Count the number of documents (chunks) in the YAML file."""
|
|
151
|
+
compression = Compression.from_filepath(self.input_file)
|
|
152
|
+
with compression.open("r") as file:
|
|
153
|
+
doc_count = sum(1 for _ in yaml.safe_load_all(file))
|
|
154
|
+
return doc_count
|
|
155
|
+
|
|
78
156
|
|
|
79
157
|
class YAMLReader(YAMLBaseReader):
|
|
80
|
-
|
|
158
|
+
FORMAT = ".yaml"
|
|
81
159
|
|
|
82
160
|
|
|
83
161
|
class YMLReader(YAMLBaseReader):
|
|
84
|
-
|
|
162
|
+
FORMAT = ".yml"
|
|
85
163
|
|
|
86
164
|
|
|
87
165
|
@dataclass
|
|
@@ -171,8 +249,8 @@ class TableReader(FileReader, ABC):
|
|
|
171
249
|
|
|
172
250
|
if not input_file.exists():
|
|
173
251
|
raise ToolkitFileNotFoundError(f"File not found: {input_file.as_posix()!r}.")
|
|
174
|
-
if input_file.suffix != cls.
|
|
175
|
-
raise ToolkitValueError(f"Expected a {cls.
|
|
252
|
+
if input_file.suffix != cls.FORMAT:
|
|
253
|
+
raise ToolkitValueError(f"Expected a {cls.FORMAT} file got a {input_file.suffix!r} file instead.")
|
|
176
254
|
|
|
177
255
|
column_names, sample_rows = cls._read_sample_rows(input_file, sniff_rows)
|
|
178
256
|
cls._check_column_names(column_names)
|
|
@@ -213,7 +291,7 @@ class TableReader(FileReader, ABC):
|
|
|
213
291
|
class CSVReader(TableReader):
|
|
214
292
|
"""Reads CSV files and yields each row as a dictionary."""
|
|
215
293
|
|
|
216
|
-
|
|
294
|
+
FORMAT = ".csv"
|
|
217
295
|
|
|
218
296
|
def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
|
|
219
297
|
if self.keep_failed_cells and self.failed_cell:
|
|
@@ -255,9 +333,16 @@ class CSVReader(TableReader):
|
|
|
255
333
|
raise ToolkitValueError(f"No data found in the file: {input_file.as_posix()!r}.")
|
|
256
334
|
return column_names, sample_rows
|
|
257
335
|
|
|
336
|
+
def count(self) -> int:
|
|
337
|
+
"""Count the number of rows in the CSV file."""
|
|
338
|
+
compression = Compression.from_filepath(self.input_file)
|
|
339
|
+
with compression.open("r") as file:
|
|
340
|
+
line_count = sum(1 for _ in file) - 1 # Subtract 1 for header
|
|
341
|
+
return line_count
|
|
342
|
+
|
|
258
343
|
|
|
259
344
|
class ParquetReader(TableReader):
|
|
260
|
-
|
|
345
|
+
FORMAT = ".parquet"
|
|
261
346
|
|
|
262
347
|
def __init__(self, input_file: Path) -> None:
|
|
263
348
|
# Parquet files have their own schema, so we don't need to sniff or provide one.
|
|
@@ -308,23 +393,30 @@ class ParquetReader(TableReader):
|
|
|
308
393
|
raise ToolkitValueError(f"No data found in the file: {input_file.as_posix()!r}.")
|
|
309
394
|
return column_names, sample_rows
|
|
310
395
|
|
|
396
|
+
def count(self) -> int:
|
|
397
|
+
"""Count the number of rows in the Parquet file."""
|
|
398
|
+
import pyarrow.parquet as pq
|
|
399
|
+
|
|
400
|
+
with pq.ParquetFile(self.input_file) as parquet_file:
|
|
401
|
+
return parquet_file.metadata.num_rows
|
|
402
|
+
|
|
311
403
|
|
|
312
404
|
FILE_READ_CLS_BY_FORMAT: Mapping[str, type[FileReader]] = {}
|
|
313
405
|
TABLE_READ_CLS_BY_FORMAT: Mapping[str, type[TableReader]] = {}
|
|
314
406
|
for subclass in get_concrete_subclasses(FileReader): # type: ignore[type-abstract]
|
|
315
|
-
if not getattr(subclass, "
|
|
407
|
+
if not getattr(subclass, "FORMAT", None):
|
|
316
408
|
continue
|
|
317
|
-
if subclass.
|
|
409
|
+
if subclass.FORMAT in FILE_READ_CLS_BY_FORMAT:
|
|
318
410
|
raise TypeError(
|
|
319
|
-
f"Duplicate file format {subclass.
|
|
320
|
-
f"{FILE_READ_CLS_BY_FORMAT[subclass.
|
|
411
|
+
f"Duplicate file format {subclass.FORMAT!r} found for classes "
|
|
412
|
+
f"{FILE_READ_CLS_BY_FORMAT[subclass.FORMAT].__name__!r} and {subclass.__name__!r}."
|
|
321
413
|
)
|
|
322
414
|
# We know we have a dict, but we want to expose FILE_READ_CLS_BY_FORMAT as a Mapping
|
|
323
|
-
FILE_READ_CLS_BY_FORMAT[subclass.
|
|
415
|
+
FILE_READ_CLS_BY_FORMAT[subclass.FORMAT] = subclass # type: ignore[index]
|
|
324
416
|
if issubclass(subclass, TableReader):
|
|
325
|
-
if subclass.
|
|
417
|
+
if subclass.FORMAT in TABLE_READ_CLS_BY_FORMAT:
|
|
326
418
|
raise TypeError(
|
|
327
|
-
f"Duplicate table file format {subclass.
|
|
328
|
-
f"{TABLE_READ_CLS_BY_FORMAT[subclass.
|
|
419
|
+
f"Duplicate table file format {subclass.FORMAT!r} found for classes "
|
|
420
|
+
f"{TABLE_READ_CLS_BY_FORMAT[subclass.FORMAT].__name__!r} and {subclass.__name__!r}."
|
|
329
421
|
)
|
|
330
|
-
TABLE_READ_CLS_BY_FORMAT[subclass.
|
|
422
|
+
TABLE_READ_CLS_BY_FORMAT[subclass.FORMAT] = subclass # type: ignore[index]
|