fameio 3.1.0__py3-none-any.whl → 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fameio/cli/__init__.py +2 -3
- fameio/cli/convert_results.py +6 -4
- fameio/cli/make_config.py +6 -4
- fameio/cli/options.py +3 -3
- fameio/cli/parser.py +43 -31
- fameio/input/__init__.py +1 -9
- fameio/input/loader/__init__.py +9 -7
- fameio/input/loader/controller.py +64 -14
- fameio/input/loader/loader.py +14 -7
- fameio/input/metadata.py +37 -18
- fameio/input/resolver.py +5 -4
- fameio/input/scenario/__init__.py +7 -8
- fameio/input/scenario/agent.py +52 -19
- fameio/input/scenario/attribute.py +28 -29
- fameio/input/scenario/contract.py +161 -52
- fameio/input/scenario/exception.py +45 -22
- fameio/input/scenario/fameiofactory.py +63 -7
- fameio/input/scenario/generalproperties.py +17 -6
- fameio/input/scenario/scenario.py +111 -28
- fameio/input/scenario/stringset.py +27 -8
- fameio/input/schema/__init__.py +5 -5
- fameio/input/schema/agenttype.py +29 -11
- fameio/input/schema/attribute.py +174 -84
- fameio/input/schema/java_packages.py +8 -5
- fameio/input/schema/schema.py +35 -9
- fameio/input/validator.py +58 -42
- fameio/input/writer.py +139 -41
- fameio/logs.py +23 -17
- fameio/output/__init__.py +5 -1
- fameio/output/agent_type.py +93 -27
- fameio/output/conversion.py +48 -30
- fameio/output/csv_writer.py +88 -18
- fameio/output/data_transformer.py +12 -21
- fameio/output/input_dao.py +68 -32
- fameio/output/output_dao.py +26 -4
- fameio/output/reader.py +61 -18
- fameio/output/yaml_writer.py +18 -9
- fameio/scripts/__init__.py +9 -2
- fameio/scripts/convert_results.py +144 -52
- fameio/scripts/convert_results.py.license +1 -1
- fameio/scripts/exception.py +7 -0
- fameio/scripts/make_config.py +34 -12
- fameio/scripts/make_config.py.license +1 -1
- fameio/series.py +132 -47
- fameio/time.py +88 -37
- fameio/tools.py +9 -8
- {fameio-3.1.0.dist-info → fameio-3.2.0.dist-info}/METADATA +19 -13
- fameio-3.2.0.dist-info/RECORD +56 -0
- {fameio-3.1.0.dist-info → fameio-3.2.0.dist-info}/WHEEL +1 -1
- CHANGELOG.md +0 -279
- fameio-3.1.0.dist-info/RECORD +0 -56
- {fameio-3.1.0.dist-info → fameio-3.2.0.dist-info}/LICENSE.txt +0 -0
- {fameio-3.1.0.dist-info → fameio-3.2.0.dist-info}/LICENSES/Apache-2.0.txt +0 -0
- {fameio-3.1.0.dist-info → fameio-3.2.0.dist-info}/LICENSES/CC-BY-4.0.txt +0 -0
- {fameio-3.1.0.dist-info → fameio-3.2.0.dist-info}/LICENSES/CC0-1.0.txt +0 -0
- {fameio-3.1.0.dist-info → fameio-3.2.0.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,18 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
+
from __future__ import annotations
|
3
|
+
|
2
4
|
import sys
|
3
5
|
from pathlib import Path
|
6
|
+
from typing import Any, BinaryIO
|
4
7
|
|
5
8
|
import pandas as pd
|
6
9
|
|
10
|
+
from fameio.cli import update_default_config
|
7
11
|
from fameio.cli.convert_results import handle_args, CLI_DEFAULTS as DEFAULT_CONFIG
|
8
12
|
from fameio.cli.options import Options
|
9
|
-
from fameio.
|
10
|
-
from fameio.logs import
|
13
|
+
from fameio.input import InputError
|
14
|
+
from fameio.logs import fameio_logger, log, log_error, log_critical
|
15
|
+
from fameio.output import OutputError
|
11
16
|
from fameio.output.agent_type import AgentTypeLog
|
12
17
|
from fameio.output.conversion import apply_time_option, apply_time_merging
|
13
18
|
from fameio.output.csv_writer import CsvWriter
|
@@ -16,67 +21,154 @@ from fameio.output.input_dao import InputDao
|
|
16
21
|
from fameio.output.output_dao import OutputDAO
|
17
22
|
from fameio.output.reader import Reader
|
18
23
|
from fameio.output.yaml_writer import data_to_yaml_file
|
24
|
+
from fameio.scripts.exception import ScriptError
|
19
25
|
|
20
|
-
|
21
|
-
|
26
|
+
_ERR_OUT_OF_MEMORY = "Out of memory. Retry result conversion using `-m` or `--memory-saving` option."
|
27
|
+
_ERR_MEMORY_SEVERE = "Out of memory despite memory-saving mode. Reduce output interval in `FAME-Core` and rerun model."
|
28
|
+
_ERR_FILE_OPEN_FAIL = "Could not open file: '{}'"
|
29
|
+
_ERR_RECOVER_INPUT = "Could not recover inputs due to an incompatibility with this version of fameio."
|
30
|
+
_ERR_FAIL = "Results conversion script failed."
|
22
31
|
|
32
|
+
_WARN_OUTPUT_SUPPRESSED = "All output data suppressed by agent filter, but there is data available for agent types: {}"
|
33
|
+
_WARN_OUTPUT_MISSING = "Provided file did not contain any output data, only input recovery available."
|
34
|
+
_INFO_MEMORY_SAVING = "Memory saving mode enabled: Disable on conversion of small files for performance improvements."
|
23
35
|
|
24
|
-
def run(config: dict = None) -> None:
|
25
|
-
"""Reads file in protobuf format for configures FILE and extracts its content to .csv file(s)"""
|
26
|
-
config = update_default_config(config, DEFAULT_CONFIG)
|
27
|
-
fameio_logger(log_level_name=config[Options.LOG_LEVEL], file_name=config[Options.LOG_FILE])
|
28
36
|
|
29
|
-
|
30
|
-
|
31
|
-
|
37
|
+
def _read_and_extract_data(config: dict[Options, Any]) -> None:
|
38
|
+
"""
|
39
|
+
Read protobuf file, extracts, converts, and saves the converted data; Returns false if no result data was found
|
32
40
|
|
33
|
-
|
34
|
-
|
41
|
+
Args:
|
42
|
+
config: script configuration options
|
35
43
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
44
|
+
Raises:
|
45
|
+
OutputError: if file could not be opened or converted, logged with level "ERROR"
|
46
|
+
"""
|
47
|
+
file_path = Path(config[Options.FILE])
|
48
|
+
log().info("Opening file for reading...")
|
40
49
|
try:
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
for agent_name in output.get_sorted_agents_to_extract():
|
47
|
-
log().debug(f"Extracting data for {agent_name}...")
|
48
|
-
data_frames = output.get_agent_data(agent_name, data_transformer)
|
49
|
-
if not config[Options.MEMORY_SAVING]:
|
50
|
-
apply_time_merging(data_frames, config[Options.TIME_MERGING])
|
51
|
-
apply_time_option(data_frames, config[Options.TIME])
|
52
|
-
log().debug(f"Writing data for {agent_name}...")
|
53
|
-
output_writer.write_to_files(agent_name, data_frames)
|
50
|
+
with open(file_path, "rb") as file_stream:
|
51
|
+
_extract_and_convert_data(config, file_stream, file_path)
|
52
|
+
except OSError as ex:
|
53
|
+
raise log_error(OutputError(_ERR_FILE_OPEN_FAIL.format(file_path))) from ex
|
54
|
+
|
54
55
|
|
56
|
+
def _extract_and_convert_data(config: dict[Options, Any], file_stream: BinaryIO, file_path: Path) -> None:
|
57
|
+
"""
|
58
|
+
Extracts data from provided input file stream, converts it, and writes the result to output files
|
59
|
+
|
60
|
+
Args:
|
61
|
+
config: script configuration options
|
62
|
+
file_stream: opened input file
|
63
|
+
file_path: path to input file
|
64
|
+
|
65
|
+
Raises:
|
66
|
+
OutputError: if file could not be opened or converted, logged with level "ERROR"
|
67
|
+
"""
|
68
|
+
log().info("Reading and extracting data...")
|
69
|
+
output_writer = CsvWriter(config[Options.OUTPUT], file_path, config[Options.SINGLE_AGENT_EXPORT])
|
70
|
+
agent_type_log = AgentTypeLog(_agent_name_filter_list=config[Options.AGENT_LIST])
|
71
|
+
data_transformer = DataTransformer.build(config[Options.RESOLVE_COMPLEX_FIELD])
|
72
|
+
reader = Reader.get_reader(file=file_stream, read_single=config[Options.MEMORY_SAVING])
|
73
|
+
input_dao = InputDao()
|
74
|
+
while data_storages := reader.read():
|
55
75
|
if config[Options.INPUT_RECOVERY]:
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
except MemoryError:
|
73
|
-
log_critical_and_raise(MemoryError(ERR_MEMORY_SEVERE if Options.MEMORY_SAVING else ERR_MEMORY_ERROR))
|
74
|
-
|
75
|
-
file_stream.close()
|
76
|
+
input_dao.store_inputs(data_storages)
|
77
|
+
output = OutputDAO(data_storages, agent_type_log)
|
78
|
+
for agent_name in output.get_sorted_agents_to_extract():
|
79
|
+
log().debug(f"Extracting data for {agent_name}...")
|
80
|
+
data_frames = output.get_agent_data(agent_name, data_transformer)
|
81
|
+
if not config[Options.MEMORY_SAVING]:
|
82
|
+
apply_time_merging(data_frames, config[Options.TIME_MERGING])
|
83
|
+
apply_time_option(data_frames, config[Options.TIME])
|
84
|
+
log().debug(f"Writing data for {agent_name}...")
|
85
|
+
output_writer.write_to_files(agent_name, data_frames)
|
86
|
+
|
87
|
+
if config[Options.INPUT_RECOVERY]:
|
88
|
+
_recover_inputs(config, input_dao)
|
89
|
+
if config[Options.MEMORY_SAVING]:
|
90
|
+
_memory_saving_apply_conversions(config, output_writer)
|
91
|
+
|
76
92
|
if not agent_type_log.has_any_agent_type():
|
77
|
-
|
93
|
+
if len(agent_type_log.get_agents_with_output()) > 0:
|
94
|
+
log().warning(_WARN_OUTPUT_SUPPRESSED.format(agent_type_log.get_agents_with_output()))
|
95
|
+
else:
|
96
|
+
log().warning(_WARN_OUTPUT_MISSING)
|
97
|
+
log().info("Data conversion completed.")
|
98
|
+
|
99
|
+
|
100
|
+
def _recover_inputs(config: dict[Options, Any], input_dao: InputDao) -> None:
|
101
|
+
"""
|
102
|
+
Reads scenario configuration from provided input_dao
|
103
|
+
|
104
|
+
Args:
|
105
|
+
config: script configuration options
|
106
|
+
input_dao: to recover the input data from
|
107
|
+
|
108
|
+
Raises:
|
109
|
+
OutputError: if inputs could not be recovered or saved to files, logged with level "ERROR"
|
110
|
+
"""
|
111
|
+
log().info("Recovering inputs...")
|
112
|
+
try:
|
113
|
+
timeseries, scenario = input_dao.recover_inputs()
|
114
|
+
except InputError as ex:
|
115
|
+
raise log_error(OutputError(_ERR_RECOVER_INPUT)) from ex
|
116
|
+
base_path = config[Options.OUTPUT] if config[Options.OUTPUT] is not None else "./"
|
117
|
+
series_writer = CsvWriter(
|
118
|
+
config_output=Path(base_path, "./recovered"), input_file_path=Path("./"), single_export=False
|
119
|
+
)
|
120
|
+
series_writer.write_time_series_to_disk(timeseries)
|
121
|
+
data_to_yaml_file(scenario.to_dict(), Path(base_path, "./recovered/scenario.yaml"))
|
122
|
+
|
123
|
+
|
124
|
+
def _memory_saving_apply_conversions(config: dict[Options, Any], output_writer: CsvWriter) -> None:
|
125
|
+
"""
|
126
|
+
Rewrite result files in memory saving mode: apply time-merging and time conversion options on a per-file basis
|
127
|
+
|
128
|
+
Args:
|
129
|
+
config: script configuration options
|
130
|
+
output_writer: to rewrite the previously written files
|
131
|
+
|
132
|
+
Raises:
|
133
|
+
OutputError: in case files could not be read, converted, or re-written, logged with level "ERROR"
|
134
|
+
"""
|
135
|
+
log().info("Applying time conversion and merging options to extracted files...")
|
136
|
+
written_files = output_writer.pop_all_file_paths()
|
137
|
+
for agent_name, file_path in written_files.items():
|
138
|
+
parsed_data: dict[str | None, pd.DataFrame] = {None: pd.read_csv(file_path, sep=";", index_col=INDEX)}
|
139
|
+
apply_time_merging(parsed_data, config[Options.TIME_MERGING])
|
140
|
+
apply_time_option(parsed_data, config[Options.TIME])
|
141
|
+
output_writer.write_to_files(agent_name, parsed_data)
|
142
|
+
|
143
|
+
|
144
|
+
def run(config: dict[Options, Any] | None = None) -> None:
|
145
|
+
"""
|
146
|
+
Reads configured file in protobuf format and extracts its content to .CSV and .YAML file(s)
|
147
|
+
|
148
|
+
Args:
|
149
|
+
config: script configuration options
|
150
|
+
|
151
|
+
Raises:
|
152
|
+
ScriptError: if any kind of expected error or a memory error occurred, logged with level "CRITICAL"
|
153
|
+
"""
|
154
|
+
config = update_default_config(config, DEFAULT_CONFIG)
|
155
|
+
fameio_logger(log_level_name=config[Options.LOG_LEVEL], file_name=config[Options.LOG_FILE])
|
156
|
+
if config[Options.MEMORY_SAVING]:
|
157
|
+
log().info(_INFO_MEMORY_SAVING)
|
158
|
+
|
159
|
+
try:
|
160
|
+
try:
|
161
|
+
_read_and_extract_data(config)
|
162
|
+
except MemoryError as ex:
|
163
|
+
error = OutputError(_ERR_MEMORY_SEVERE if config[Options.MEMORY_SAVING] else _ERR_OUT_OF_MEMORY)
|
164
|
+
raise log_critical(error) from ex
|
165
|
+
except OutputError as ex:
|
166
|
+
raise log_critical(ScriptError(_ERR_FAIL)) from ex
|
78
167
|
|
79
168
|
|
80
169
|
if __name__ == "__main__":
|
81
170
|
run_config = handle_args(sys.argv[1:])
|
82
|
-
|
171
|
+
try:
|
172
|
+
run(run_config)
|
173
|
+
except ScriptError as e:
|
174
|
+
raise SystemExit(1) from e
|
fameio/scripts/make_config.py
CHANGED
@@ -1,34 +1,56 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
|
+
from __future__ import annotations
|
3
|
+
|
2
4
|
import sys
|
3
5
|
from pathlib import Path
|
6
|
+
from typing import Any
|
4
7
|
|
8
|
+
from fameio.cli import update_default_config
|
5
9
|
from fameio.cli.make_config import handle_args, CLI_DEFAULTS as DEFAULT_CONFIG
|
6
10
|
from fameio.cli.options import Options
|
7
|
-
from fameio.
|
11
|
+
from fameio.input import InputError
|
8
12
|
from fameio.input.loader import load_yaml, validate_yaml_file_suffix
|
9
|
-
from fameio.logs import fameio_logger, log
|
10
13
|
from fameio.input.scenario import Scenario
|
11
14
|
from fameio.input.validator import SchemaValidator
|
12
15
|
from fameio.input.writer import ProtoWriter
|
16
|
+
from fameio.logs import fameio_logger, log, log_critical
|
17
|
+
from fameio.scripts.exception import ScriptError
|
18
|
+
|
19
|
+
_ERR_FAIL: str = "Creation of run configuration file failed."
|
20
|
+
|
21
|
+
|
22
|
+
def run(config: dict[Options, Any] | None = None) -> None:
|
23
|
+
"""
|
24
|
+
Executes the main workflow of building a FAME configuration file
|
13
25
|
|
26
|
+
Args:
|
27
|
+
config: configuration options
|
14
28
|
|
15
|
-
|
16
|
-
|
29
|
+
Raises:
|
30
|
+
ScriptError: if any kind of expected error occurred, logged with level "CRITICAL"
|
31
|
+
"""
|
17
32
|
config = update_default_config(config, DEFAULT_CONFIG)
|
18
33
|
fameio_logger(log_level_name=config[Options.LOG_LEVEL], file_name=config[Options.LOG_FILE])
|
19
34
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
35
|
+
try:
|
36
|
+
file = config[Options.FILE]
|
37
|
+
validate_yaml_file_suffix(Path(file))
|
38
|
+
scenario_definition = load_yaml(Path(file), encoding=config[Options.INPUT_ENCODING])
|
39
|
+
scenario = Scenario.from_dict(scenario_definition)
|
40
|
+
SchemaValidator.check_agents_have_contracts(scenario)
|
24
41
|
|
25
|
-
|
26
|
-
|
27
|
-
|
42
|
+
timeseries_manager = SchemaValidator.validate_scenario_and_timeseries(scenario)
|
43
|
+
writer = ProtoWriter(config[Options.OUTPUT], timeseries_manager)
|
44
|
+
writer.write_validated_scenario(scenario)
|
45
|
+
except InputError as ex:
|
46
|
+
raise log_critical(ScriptError(_ERR_FAIL)) from ex
|
28
47
|
|
29
48
|
log().info("Configuration completed.")
|
30
49
|
|
31
50
|
|
32
51
|
if __name__ == "__main__":
|
33
52
|
run_config = handle_args(sys.argv[1:])
|
34
|
-
|
53
|
+
try:
|
54
|
+
run(run_config)
|
55
|
+
except ScriptError as e:
|
56
|
+
raise SystemExit(1) from e
|
fameio/series.py
CHANGED
@@ -1,26 +1,31 @@
|
|
1
|
-
# SPDX-FileCopyrightText:
|
1
|
+
# SPDX-FileCopyrightText: 2025 German Aerospace Center <fame@dlr.de>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
4
|
+
from __future__ import annotations
|
5
|
+
|
4
6
|
import math
|
5
7
|
import os
|
6
8
|
from enum import Enum, auto
|
7
9
|
from pathlib import Path
|
8
|
-
from typing import
|
10
|
+
from typing import Any
|
9
11
|
|
10
12
|
import pandas as pd
|
11
13
|
from fameprotobuf.input_file_pb2 import InputData
|
12
14
|
from google.protobuf.internal.wire_format import INT64_MIN, INT64_MAX
|
13
15
|
|
16
|
+
from fameio.input import InputError
|
14
17
|
from fameio.input.resolver import PathResolver
|
15
|
-
from fameio.logs import
|
18
|
+
from fameio.logs import log, log_error
|
19
|
+
from fameio.output import OutputError
|
16
20
|
from fameio.time import ConversionError, FameTime
|
17
21
|
from fameio.tools import clean_up_file_name
|
18
22
|
|
23
|
+
CSV_FILE_SUFFIX = ".csv"
|
24
|
+
FILE_LENGTH_WARN_LIMIT = int(50e3)
|
19
25
|
|
20
|
-
class TimeSeriesError(Exception):
|
21
|
-
"""Indicates that an error occurred during management of time series"""
|
22
26
|
|
23
|
-
|
27
|
+
class TimeSeriesError(InputError, OutputError):
|
28
|
+
"""Indicates that an error occurred during management of time series"""
|
24
29
|
|
25
30
|
|
26
31
|
class Entry(Enum):
|
@@ -40,19 +45,25 @@ class TimeSeriesManager:
|
|
40
45
|
_ERR_FILE_NOT_FOUND = "Cannot find Timeseries file '{}'."
|
41
46
|
_ERR_NUMERIC_STRING = " Remove quotes to use a constant numeric value instead of a timeseries file."
|
42
47
|
_ERR_CORRUPT_TIME_SERIES_KEY = "TimeSeries file '{}' corrupt: At least one entry in first column isn't a timestamp."
|
43
|
-
_ERR_CORRUPT_TIME_SERIES_VALUE = "TimeSeries file '{}' corrupt: At least one entry in
|
48
|
+
_ERR_CORRUPT_TIME_SERIES_VALUE = "TimeSeries file '{}' corrupt: At least one entry in second column isn't numeric."
|
44
49
|
_ERR_NON_NUMERIC = "Values in TimeSeries must be numeric but was: '{}'"
|
45
50
|
_ERR_NAN_VALUE = "Values in TimeSeries must not be missing or NaN."
|
46
51
|
_ERR_UNREGISTERED_SERIES = "No timeseries registered with identifier '{}' - was the Scenario validated?"
|
52
|
+
_ERR_UNREGISTERED_SERIES_RE = "No timeseries registered with identifier '{}' - were the timeseries reconstructed?"
|
47
53
|
_WARN_NO_DATA = "No timeseries stored in timeseries manager. Double check if you expected timeseries."
|
48
54
|
_WARN_DATA_IGNORED = "Timeseries contains additional columns with data which will be ignored."
|
55
|
+
_WARN_LARGE_CONVERSION = (
|
56
|
+
"Timeseries file '{}' is large and needs conversion of time stamps. If performance "
|
57
|
+
"issues occur and the file is reused, convert the time stamp column once with "
|
58
|
+
"`fameio.time.FameTime.convert_datetime_to_fame_time_step(datetime_string)`."
|
59
|
+
)
|
49
60
|
|
50
61
|
def __init__(self, path_resolver: PathResolver = PathResolver()) -> None:
|
51
62
|
self._path_resolver = path_resolver
|
52
63
|
self._id_count = -1
|
53
|
-
self._series_by_id: dict[
|
64
|
+
self._series_by_id: dict[str | int | float, dict[Entry, Any]] = {}
|
54
65
|
|
55
|
-
def register_and_validate(self, identifier:
|
66
|
+
def register_and_validate(self, identifier: str | int | float) -> None:
|
56
67
|
"""
|
57
68
|
Registers given timeseries `identifier` and validates associated timeseries
|
58
69
|
|
@@ -60,61 +71,112 @@ class TimeSeriesManager:
|
|
60
71
|
identifier: to be registered - either a single numeric value or a string pointing to a timeseries file
|
61
72
|
|
62
73
|
Raises:
|
63
|
-
|
74
|
+
TimeSeriesError: if the file could not be found or contains improper data, or if identifier is NaN,
|
75
|
+
logged with level "ERROR"
|
64
76
|
"""
|
65
77
|
if not self._time_series_is_registered(identifier):
|
66
78
|
self._register_time_series(identifier)
|
67
79
|
|
68
|
-
def _time_series_is_registered(self, identifier:
|
80
|
+
def _time_series_is_registered(self, identifier: str | int | float) -> bool:
|
69
81
|
"""Returns True if the value was already registered"""
|
70
|
-
return identifier in self._series_by_id
|
82
|
+
return identifier in self._series_by_id
|
83
|
+
|
84
|
+
def _register_time_series(self, identifier: str | int | float) -> None:
|
85
|
+
"""
|
86
|
+
Assigns an id to the given `identifier` and loads the time series into a dataframe
|
87
|
+
|
88
|
+
Args:
|
89
|
+
identifier: to be registered - either a single numeric value or a string pointing to a timeseries file
|
71
90
|
|
72
|
-
|
73
|
-
|
91
|
+
Raises:
|
92
|
+
TimeSeriesError: if the file could not be found or contains improper data, or if identifier is NaN,
|
93
|
+
logged with level "ERROR"
|
94
|
+
"""
|
74
95
|
self._id_count += 1
|
75
96
|
name, series = self._get_name_and_dataframe(identifier)
|
76
97
|
self._series_by_id[identifier] = {Entry.ID: self._id_count, Entry.NAME: name, Entry.DATA: series}
|
77
98
|
|
78
|
-
def _get_name_and_dataframe(self, identifier:
|
79
|
-
"""
|
99
|
+
def _get_name_and_dataframe(self, identifier: str | int | float) -> tuple[str, pd.DataFrame]:
|
100
|
+
"""
|
101
|
+
Returns name and DataFrame containing the series obtained from the given `identifier`
|
102
|
+
|
103
|
+
Args:
|
104
|
+
identifier: to be registered - either a single numeric value or a string pointing to a timeseries file
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
tuple of name & dataframe
|
108
|
+
|
109
|
+
Raises:
|
110
|
+
TimeSeriesError: if the file could not be found or contains improper data, or if identifier is NaN,
|
111
|
+
logged with level "ERROR"
|
112
|
+
"""
|
80
113
|
if isinstance(identifier, str):
|
81
114
|
series_path = self._path_resolver.resolve_series_file_path(Path(identifier).as_posix())
|
82
115
|
if series_path and os.path.exists(series_path):
|
83
|
-
data = pd.read_csv(series_path, sep=";", header=None, comment="#")
|
84
116
|
try:
|
85
|
-
|
117
|
+
data = pd.read_csv(series_path, sep=";", header=None, comment="#")
|
118
|
+
except OSError as e:
|
119
|
+
raise log_error(TimeSeriesError(e)) from e
|
120
|
+
try:
|
121
|
+
return identifier, self._check_and_convert_series(data, identifier)
|
86
122
|
except TypeError as e:
|
87
|
-
|
88
|
-
except ConversionError:
|
89
|
-
|
123
|
+
raise log_error(TimeSeriesError(self._ERR_CORRUPT_TIME_SERIES_VALUE.format(identifier), e)) from e
|
124
|
+
except ConversionError as e:
|
125
|
+
raise log_error(TimeSeriesError(self._ERR_CORRUPT_TIME_SERIES_KEY.format(identifier), e)) from e
|
90
126
|
else:
|
91
127
|
message = self._ERR_FILE_NOT_FOUND.format(identifier)
|
92
128
|
if self._is_number_string(identifier):
|
93
129
|
message += self._ERR_NUMERIC_STRING
|
94
|
-
|
130
|
+
raise log_error(TimeSeriesError(message))
|
95
131
|
else:
|
96
132
|
return self._create_timeseries_from_value(identifier)
|
97
133
|
|
98
|
-
def _check_and_convert_series(self, data: pd.DataFrame) -> pd.DataFrame:
|
99
|
-
"""
|
100
|
-
|
101
|
-
|
102
|
-
|
134
|
+
def _check_and_convert_series(self, data: pd.DataFrame, identifier: str) -> pd.DataFrame:
|
135
|
+
"""
|
136
|
+
Ensures validity of time series and convert to required format for writing to disk
|
137
|
+
|
138
|
+
Args:
|
139
|
+
data: dataframe to be converted to expected format
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
2-column dataframe, first column: integers, second column: floats (no NaN)
|
143
|
+
|
144
|
+
Raises:
|
145
|
+
ConversionError: if first data column could not be converted to integer, logged with level "ERROR"
|
146
|
+
TypeError: if second data column in given data could not be converted to float or contained NaN,
|
147
|
+
logged with level "ERROR"
|
148
|
+
"""
|
149
|
+
data, additional_columns = data.loc[:, :2], data.loc[:, 2:]
|
150
|
+
if not additional_columns.dropna(how="all").empty:
|
103
151
|
log().warning(self._WARN_DATA_IGNORED)
|
104
152
|
if data.dtypes[0] != "int64":
|
153
|
+
if len(data[0]) > FILE_LENGTH_WARN_LIMIT:
|
154
|
+
log().warning(self._WARN_LARGE_CONVERSION.format(identifier))
|
105
155
|
data[0] = [FameTime.convert_string_if_is_datetime(time) for time in data[0]]
|
106
|
-
data[1]
|
156
|
+
if data.dtypes[1] != "float64":
|
157
|
+
data[1] = [TimeSeriesManager._assert_float(value) for value in data[1]]
|
158
|
+
if data[1].isna().any():
|
159
|
+
raise log_error(TypeError(TimeSeriesManager._ERR_NAN_VALUE))
|
107
160
|
return data
|
108
161
|
|
109
162
|
@staticmethod
|
110
|
-
def
|
111
|
-
"""
|
163
|
+
def _assert_float(value: Any) -> float:
|
164
|
+
"""
|
165
|
+
Converts any given value to a float or raise an Exception
|
166
|
+
|
167
|
+
Args:
|
168
|
+
value: to be converted to float
|
169
|
+
|
170
|
+
Returns:
|
171
|
+
float representation of value
|
172
|
+
|
173
|
+
Raises:
|
174
|
+
TypeError: if given value cannot be converted to float, logged with level "ERROR"
|
175
|
+
"""
|
112
176
|
try:
|
113
177
|
value = float(value)
|
114
|
-
except ValueError:
|
115
|
-
|
116
|
-
if math.isnan(value):
|
117
|
-
log_error_and_raise(TypeError(TimeSeriesManager._ERR_NAN_VALUE))
|
178
|
+
except ValueError as e:
|
179
|
+
raise log_error(TypeError(TimeSeriesManager._ERR_NON_NUMERIC.format(value))) from e
|
118
180
|
return value
|
119
181
|
|
120
182
|
@staticmethod
|
@@ -127,14 +189,25 @@ class TimeSeriesManager:
|
|
127
189
|
return False
|
128
190
|
|
129
191
|
@staticmethod
|
130
|
-
def _create_timeseries_from_value(value:
|
131
|
-
"""
|
192
|
+
def _create_timeseries_from_value(value: int | float) -> tuple[str, pd.DataFrame]:
|
193
|
+
"""
|
194
|
+
Returns name and dataframe for a new static timeseries created from the given `value`
|
195
|
+
|
196
|
+
Args:
|
197
|
+
value: the static value of the timeseries to be created
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
tuple of name & dataframe
|
201
|
+
|
202
|
+
Raises:
|
203
|
+
TimeSeriesError: if given value is NaN, logged with level "ERROR"
|
204
|
+
"""
|
132
205
|
if math.isnan(value):
|
133
|
-
|
206
|
+
raise log_error(TimeSeriesError(TimeSeriesManager._ERR_NAN_VALUE))
|
134
207
|
data = pd.DataFrame({0: [INT64_MIN, INT64_MAX], 1: [value, value]})
|
135
208
|
return TimeSeriesManager._CONSTANT_IDENTIFIER.format(value), data
|
136
209
|
|
137
|
-
def get_series_id_by_identifier(self, identifier:
|
210
|
+
def get_series_id_by_identifier(self, identifier: str | int | float) -> int:
|
138
211
|
"""
|
139
212
|
Returns id for a previously stored time series by given `identifier`
|
140
213
|
|
@@ -145,11 +218,11 @@ class TimeSeriesManager:
|
|
145
218
|
unique ID for the given identifier
|
146
219
|
|
147
220
|
Raises:
|
148
|
-
|
221
|
+
TimeSeriesError: if identifier was not yet registered, logged with level "ERROR"
|
149
222
|
"""
|
150
223
|
if not self._time_series_is_registered(identifier):
|
151
|
-
|
152
|
-
return self._series_by_id.get(identifier)[Entry.ID]
|
224
|
+
raise log_error(TimeSeriesError(self._ERR_UNREGISTERED_SERIES.format(identifier)))
|
225
|
+
return self._series_by_id.get(identifier)[Entry.ID] # type: ignore[index]
|
153
226
|
|
154
227
|
def get_all_series(self) -> list[tuple[int, str, pd.DataFrame]]:
|
155
228
|
"""Returns iterator over id, name and dataframe of all stored series"""
|
@@ -174,16 +247,28 @@ class TimeSeriesManager:
|
|
174
247
|
)
|
175
248
|
self._series_by_id[one_series.series_id] = reconstructed
|
176
249
|
|
177
|
-
def _get_cleaned_file_name(self, timeseries_name: str):
|
178
|
-
|
250
|
+
def _get_cleaned_file_name(self, timeseries_name: str) -> str:
|
251
|
+
"""Ensure given file name has CSV file ending"""
|
252
|
+
if Path(timeseries_name).suffix.lower() == CSV_FILE_SUFFIX:
|
179
253
|
filename = Path(timeseries_name).name
|
180
254
|
else:
|
181
|
-
filename = clean_up_file_name(timeseries_name) +
|
255
|
+
filename = clean_up_file_name(timeseries_name) + CSV_FILE_SUFFIX
|
182
256
|
return str(Path(self._TIMESERIES_RECONSTRUCTION_PATH, filename))
|
183
257
|
|
184
258
|
def get_reconstructed_series_by_id(self, series_id: int) -> str:
|
185
|
-
"""
|
186
|
-
|
259
|
+
"""
|
260
|
+
Return name or path for given `series_id` if series these are identified by their number.
|
261
|
+
Use this only if series were added via `reconstruct_time_series`
|
262
|
+
|
263
|
+
Args:
|
264
|
+
series_id: number of series
|
265
|
+
|
266
|
+
Returns:
|
267
|
+
name or path of time series
|
268
|
+
|
269
|
+
Raises:
|
270
|
+
TimeSeriesError: if series was not registered during `reconstruct_time_series`, logged with level "ERROR"
|
271
|
+
"""
|
187
272
|
if series_id < 0 or series_id > self._id_count:
|
188
|
-
|
273
|
+
raise log_error(TimeSeriesError(self._ERR_UNREGISTERED_SERIES_RE.format(series_id)))
|
189
274
|
return self._series_by_id[series_id][Entry.NAME]
|