nextmv 0.18.0__py3-none-any.whl → 1.0.0.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextmv/__about__.py +1 -1
- nextmv/__entrypoint__.py +8 -13
- nextmv/__init__.py +53 -0
- nextmv/_serialization.py +96 -0
- nextmv/base_model.py +54 -9
- nextmv/cli/CONTRIBUTING.md +511 -0
- nextmv/cli/__init__.py +0 -0
- nextmv/cli/cloud/__init__.py +47 -0
- nextmv/cli/cloud/acceptance/__init__.py +27 -0
- nextmv/cli/cloud/acceptance/create.py +393 -0
- nextmv/cli/cloud/acceptance/delete.py +68 -0
- nextmv/cli/cloud/acceptance/get.py +104 -0
- nextmv/cli/cloud/acceptance/list.py +62 -0
- nextmv/cli/cloud/acceptance/update.py +95 -0
- nextmv/cli/cloud/account/__init__.py +28 -0
- nextmv/cli/cloud/account/create.py +83 -0
- nextmv/cli/cloud/account/delete.py +60 -0
- nextmv/cli/cloud/account/get.py +66 -0
- nextmv/cli/cloud/account/update.py +70 -0
- nextmv/cli/cloud/app/__init__.py +35 -0
- nextmv/cli/cloud/app/create.py +141 -0
- nextmv/cli/cloud/app/delete.py +58 -0
- nextmv/cli/cloud/app/exists.py +44 -0
- nextmv/cli/cloud/app/get.py +66 -0
- nextmv/cli/cloud/app/list.py +61 -0
- nextmv/cli/cloud/app/push.py +137 -0
- nextmv/cli/cloud/app/update.py +124 -0
- nextmv/cli/cloud/batch/__init__.py +29 -0
- nextmv/cli/cloud/batch/create.py +454 -0
- nextmv/cli/cloud/batch/delete.py +68 -0
- nextmv/cli/cloud/batch/get.py +104 -0
- nextmv/cli/cloud/batch/list.py +63 -0
- nextmv/cli/cloud/batch/metadata.py +66 -0
- nextmv/cli/cloud/batch/update.py +95 -0
- nextmv/cli/cloud/data/__init__.py +26 -0
- nextmv/cli/cloud/data/upload.py +162 -0
- nextmv/cli/cloud/ensemble/__init__.py +31 -0
- nextmv/cli/cloud/ensemble/create.py +414 -0
- nextmv/cli/cloud/ensemble/delete.py +67 -0
- nextmv/cli/cloud/ensemble/get.py +65 -0
- nextmv/cli/cloud/ensemble/update.py +103 -0
- nextmv/cli/cloud/input_set/__init__.py +30 -0
- nextmv/cli/cloud/input_set/create.py +170 -0
- nextmv/cli/cloud/input_set/get.py +63 -0
- nextmv/cli/cloud/input_set/list.py +63 -0
- nextmv/cli/cloud/input_set/update.py +123 -0
- nextmv/cli/cloud/instance/__init__.py +35 -0
- nextmv/cli/cloud/instance/create.py +290 -0
- nextmv/cli/cloud/instance/delete.py +62 -0
- nextmv/cli/cloud/instance/exists.py +39 -0
- nextmv/cli/cloud/instance/get.py +62 -0
- nextmv/cli/cloud/instance/list.py +60 -0
- nextmv/cli/cloud/instance/update.py +216 -0
- nextmv/cli/cloud/managed_input/__init__.py +31 -0
- nextmv/cli/cloud/managed_input/create.py +146 -0
- nextmv/cli/cloud/managed_input/delete.py +65 -0
- nextmv/cli/cloud/managed_input/get.py +63 -0
- nextmv/cli/cloud/managed_input/list.py +60 -0
- nextmv/cli/cloud/managed_input/update.py +97 -0
- nextmv/cli/cloud/run/__init__.py +37 -0
- nextmv/cli/cloud/run/cancel.py +37 -0
- nextmv/cli/cloud/run/create.py +530 -0
- nextmv/cli/cloud/run/get.py +199 -0
- nextmv/cli/cloud/run/input.py +86 -0
- nextmv/cli/cloud/run/list.py +80 -0
- nextmv/cli/cloud/run/logs.py +167 -0
- nextmv/cli/cloud/run/metadata.py +67 -0
- nextmv/cli/cloud/run/track.py +501 -0
- nextmv/cli/cloud/scenario/__init__.py +29 -0
- nextmv/cli/cloud/scenario/create.py +451 -0
- nextmv/cli/cloud/scenario/delete.py +65 -0
- nextmv/cli/cloud/scenario/get.py +102 -0
- nextmv/cli/cloud/scenario/list.py +63 -0
- nextmv/cli/cloud/scenario/metadata.py +67 -0
- nextmv/cli/cloud/scenario/update.py +93 -0
- nextmv/cli/cloud/secrets/__init__.py +33 -0
- nextmv/cli/cloud/secrets/create.py +206 -0
- nextmv/cli/cloud/secrets/delete.py +67 -0
- nextmv/cli/cloud/secrets/get.py +66 -0
- nextmv/cli/cloud/secrets/list.py +60 -0
- nextmv/cli/cloud/secrets/update.py +147 -0
- nextmv/cli/cloud/shadow/__init__.py +33 -0
- nextmv/cli/cloud/shadow/create.py +184 -0
- nextmv/cli/cloud/shadow/delete.py +68 -0
- nextmv/cli/cloud/shadow/get.py +61 -0
- nextmv/cli/cloud/shadow/list.py +63 -0
- nextmv/cli/cloud/shadow/metadata.py +66 -0
- nextmv/cli/cloud/shadow/start.py +43 -0
- nextmv/cli/cloud/shadow/stop.py +43 -0
- nextmv/cli/cloud/shadow/update.py +95 -0
- nextmv/cli/cloud/upload/__init__.py +22 -0
- nextmv/cli/cloud/upload/create.py +39 -0
- nextmv/cli/cloud/version/__init__.py +33 -0
- nextmv/cli/cloud/version/create.py +97 -0
- nextmv/cli/cloud/version/delete.py +62 -0
- nextmv/cli/cloud/version/exists.py +39 -0
- nextmv/cli/cloud/version/get.py +62 -0
- nextmv/cli/cloud/version/list.py +60 -0
- nextmv/cli/cloud/version/update.py +92 -0
- nextmv/cli/community/__init__.py +24 -0
- nextmv/cli/community/clone.py +270 -0
- nextmv/cli/community/list.py +265 -0
- nextmv/cli/configuration/__init__.py +23 -0
- nextmv/cli/configuration/config.py +195 -0
- nextmv/cli/configuration/create.py +94 -0
- nextmv/cli/configuration/delete.py +67 -0
- nextmv/cli/configuration/list.py +77 -0
- nextmv/cli/main.py +188 -0
- nextmv/cli/message.py +153 -0
- nextmv/cli/options.py +206 -0
- nextmv/cli/version.py +38 -0
- nextmv/cloud/__init__.py +71 -17
- nextmv/cloud/acceptance_test.py +757 -51
- nextmv/cloud/account.py +406 -17
- nextmv/cloud/application/__init__.py +957 -0
- nextmv/cloud/application/_acceptance.py +419 -0
- nextmv/cloud/application/_batch_scenario.py +860 -0
- nextmv/cloud/application/_ensemble.py +251 -0
- nextmv/cloud/application/_input_set.py +227 -0
- nextmv/cloud/application/_instance.py +289 -0
- nextmv/cloud/application/_managed_input.py +227 -0
- nextmv/cloud/application/_run.py +1393 -0
- nextmv/cloud/application/_secrets.py +294 -0
- nextmv/cloud/application/_shadow.py +314 -0
- nextmv/cloud/application/_utils.py +54 -0
- nextmv/cloud/application/_version.py +303 -0
- nextmv/cloud/assets.py +48 -0
- nextmv/cloud/batch_experiment.py +294 -33
- nextmv/cloud/client.py +307 -66
- nextmv/cloud/ensemble.py +247 -0
- nextmv/cloud/input_set.py +120 -2
- nextmv/cloud/instance.py +133 -8
- nextmv/cloud/integration.py +533 -0
- nextmv/cloud/package.py +168 -53
- nextmv/cloud/scenario.py +410 -0
- nextmv/cloud/secrets.py +234 -0
- nextmv/cloud/shadow.py +190 -0
- nextmv/cloud/url.py +73 -0
- nextmv/cloud/version.py +132 -4
- nextmv/default_app/.gitignore +1 -0
- nextmv/default_app/README.md +32 -0
- nextmv/default_app/app.yaml +12 -0
- nextmv/default_app/input.json +5 -0
- nextmv/default_app/main.py +37 -0
- nextmv/default_app/requirements.txt +2 -0
- nextmv/default_app/src/__init__.py +0 -0
- nextmv/default_app/src/visuals.py +36 -0
- nextmv/deprecated.py +47 -0
- nextmv/input.py +861 -90
- nextmv/local/__init__.py +5 -0
- nextmv/local/application.py +1251 -0
- nextmv/local/executor.py +1042 -0
- nextmv/local/geojson_handler.py +323 -0
- nextmv/local/local.py +97 -0
- nextmv/local/plotly_handler.py +61 -0
- nextmv/local/runner.py +274 -0
- nextmv/logger.py +80 -9
- nextmv/manifest.py +1466 -0
- nextmv/model.py +241 -66
- nextmv/options.py +708 -115
- nextmv/output.py +1301 -274
- nextmv/polling.py +325 -0
- nextmv/run.py +1702 -0
- nextmv/safe.py +145 -0
- nextmv/status.py +122 -0
- nextmv-1.0.0.dev2.dist-info/METADATA +311 -0
- nextmv-1.0.0.dev2.dist-info/RECORD +170 -0
- {nextmv-0.18.0.dist-info → nextmv-1.0.0.dev2.dist-info}/WHEEL +1 -1
- nextmv-1.0.0.dev2.dist-info/entry_points.txt +2 -0
- nextmv/cloud/application.py +0 -1405
- nextmv/cloud/manifest.py +0 -234
- nextmv/cloud/status.py +0 -29
- nextmv-0.18.0.dist-info/METADATA +0 -770
- nextmv-0.18.0.dist-info/RECORD +0 -25
- {nextmv-0.18.0.dist-info → nextmv-1.0.0.dev2.dist-info}/licenses/LICENSE +0 -0
nextmv/input.py
CHANGED
|
@@ -1,28 +1,360 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""
|
|
2
|
+
Module for handling input sources and data.
|
|
3
|
+
|
|
4
|
+
This module provides classes and functions for loading and handling input data
|
|
5
|
+
in various formats for decision problems. It supports JSON, plain text, CSV,
|
|
6
|
+
and CSV archive formats and can load data from standard input or files.
|
|
7
|
+
|
|
8
|
+
Classes
|
|
9
|
+
-------
|
|
10
|
+
InputFormat
|
|
11
|
+
Enum defining supported input data formats (JSON, TEXT, CSV, CSV_ARCHIVE).
|
|
12
|
+
Input
|
|
13
|
+
Container for input data with format specification and options.
|
|
14
|
+
InputLoader
|
|
15
|
+
Base class for loading inputs from various sources.
|
|
16
|
+
LocalInputLoader
|
|
17
|
+
Class for loading inputs from local files or stdin.
|
|
18
|
+
|
|
19
|
+
Functions
|
|
20
|
+
---------
|
|
21
|
+
load
|
|
22
|
+
Load input data using a specified loader.
|
|
23
|
+
|
|
24
|
+
Attributes
|
|
25
|
+
----------
|
|
26
|
+
INPUTS_KEY : str
|
|
27
|
+
Key used for identifying inputs in the run.
|
|
28
|
+
"""
|
|
2
29
|
|
|
3
30
|
import copy
|
|
4
31
|
import csv
|
|
5
32
|
import json
|
|
6
33
|
import os
|
|
7
34
|
import sys
|
|
35
|
+
from collections.abc import Callable
|
|
8
36
|
from dataclasses import dataclass
|
|
9
37
|
from enum import Enum
|
|
10
|
-
from typing import Any
|
|
38
|
+
from typing import Any
|
|
11
39
|
|
|
40
|
+
from nextmv._serialization import serialize_json
|
|
41
|
+
from nextmv.deprecated import deprecated
|
|
12
42
|
from nextmv.options import Options
|
|
13
43
|
|
|
44
|
+
INPUTS_KEY = "inputs"
|
|
45
|
+
"""
|
|
46
|
+
Inputs key constant used for identifying inputs in the run.
|
|
47
|
+
"""
|
|
48
|
+
|
|
14
49
|
|
|
15
50
|
class InputFormat(str, Enum):
|
|
16
|
-
"""
|
|
51
|
+
"""
|
|
52
|
+
Format of an `Input`.
|
|
53
|
+
|
|
54
|
+
You can import the `InputFormat` class directly from `nextmv`:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from nextmv import InputFormat
|
|
58
|
+
```
|
|
17
59
|
|
|
18
|
-
|
|
60
|
+
This enum specifies the supported formats for input data.
|
|
61
|
+
|
|
62
|
+
Attributes
|
|
63
|
+
----------
|
|
64
|
+
JSON : str
|
|
65
|
+
JSON format, utf-8 encoded.
|
|
66
|
+
TEXT : str
|
|
67
|
+
Text format, utf-8 encoded.
|
|
68
|
+
CSV : str
|
|
69
|
+
CSV format, utf-8 encoded.
|
|
70
|
+
CSV_ARCHIVE : str
|
|
71
|
+
CSV archive format: multiple CSV files.
|
|
72
|
+
MULTI_FILE : str
|
|
73
|
+
Multi-file format, used for loading multiple files in a single input.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
JSON = "json"
|
|
19
77
|
"""JSON format, utf-8 encoded."""
|
|
20
|
-
TEXT = "
|
|
78
|
+
TEXT = "text"
|
|
21
79
|
"""Text format, utf-8 encoded."""
|
|
22
|
-
|
|
23
|
-
"""CSV format, utf-8 encoded."""
|
|
24
|
-
CSV_ARCHIVE = "CSV_ARCHIVE"
|
|
80
|
+
CSV_ARCHIVE = "csv-archive"
|
|
25
81
|
"""CSV archive format: multiple CSV files."""
|
|
82
|
+
MULTI_FILE = "multi-file"
|
|
83
|
+
"""Multi-file format, used for loading multiple files in a single input."""
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class DataFile:
|
|
88
|
+
"""
|
|
89
|
+
Represents data to be read from a file.
|
|
90
|
+
|
|
91
|
+
You can import the `DataFile` class directly from `nextmv`:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from nextmv import DataFile
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
This class is used to define data that will be read from a file in the
|
|
98
|
+
filesystem. It includes the name of the file, and the reader function that
|
|
99
|
+
will handle the loading, and deserialization of the data from the file.
|
|
100
|
+
This `DataFile` class is typically used in the `Input`, when the
|
|
101
|
+
`Input.input_format` is set to `InputFormat.MULTI_FILE`. Given that it is
|
|
102
|
+
difficul to handle every edge case of how data is deserialized, and read
|
|
103
|
+
from a file, this class exists so that the user can implement the `reader`
|
|
104
|
+
callable of their choice and provide it with any `reader_args` and
|
|
105
|
+
`reader_kwargs` they might need.
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
name : str
|
|
110
|
+
Name of the data (input) file. The file extension should be included in
|
|
111
|
+
the name.
|
|
112
|
+
reader : Callable[[str], Any]
|
|
113
|
+
Callable that reads the data from the file. This should be a function
|
|
114
|
+
implemented by the user. There are convenience functions that you can
|
|
115
|
+
use as a reader as well. The `reader` must receive, at the very minimum,
|
|
116
|
+
the following arguments:
|
|
117
|
+
|
|
118
|
+
- `file_path`: a `str` argument which is the location where this
|
|
119
|
+
data will be read from. This includes the dir and name of the
|
|
120
|
+
file. As such, the `name` parameter of this class is going to be
|
|
121
|
+
passed to the `reader` function, joined with the directory where the
|
|
122
|
+
file will be read from.
|
|
123
|
+
|
|
124
|
+
The `reader` can also receive additional arguments, and keyword
|
|
125
|
+
arguments. The `reader_args` and `reader_kwargs` parameters of this
|
|
126
|
+
class can be used to provide those additional arguments.
|
|
127
|
+
|
|
128
|
+
The `reader` function should return the data that will be used in the
|
|
129
|
+
model.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
name: str
|
|
133
|
+
"""
|
|
134
|
+
Name of the data (input) file. The file extension should be included in the
|
|
135
|
+
name.
|
|
136
|
+
"""
|
|
137
|
+
loader: Callable[[str], Any]
|
|
138
|
+
"""
|
|
139
|
+
Callable that reads (loads) the data from the file. This should be a function
|
|
140
|
+
implemented by the user. There are convenience functions that you can use
|
|
141
|
+
as a `loader` as well. The `loader` must receive, at the very minimum, the
|
|
142
|
+
following arguments:
|
|
143
|
+
|
|
144
|
+
- `file_path`: a `str` argument which is the location where this
|
|
145
|
+
data will be read from. This includes the dir and name of the
|
|
146
|
+
file. As such, the `name` parameter of this class is going to be
|
|
147
|
+
passed to the `loader` function, joined with the directory where the
|
|
148
|
+
file will be read from.
|
|
149
|
+
|
|
150
|
+
The `loader` can also receive additional arguments, and keyword arguments.
|
|
151
|
+
The `loader_args` and `loader_kwargs` parameters of this class can be used
|
|
152
|
+
to provide those additional arguments.
|
|
153
|
+
|
|
154
|
+
The `loader` function should return the data that will be used in the model.
|
|
155
|
+
"""
|
|
156
|
+
loader_kwargs: dict[str, Any] | None = None
|
|
157
|
+
"""
|
|
158
|
+
Optional keyword arguments to pass to the loader function. This can be used
|
|
159
|
+
to customize the behavior of the loader.
|
|
160
|
+
"""
|
|
161
|
+
loader_args: list[Any] | None = None
|
|
162
|
+
"""
|
|
163
|
+
Optional positional arguments to pass to the loader function. This can be
|
|
164
|
+
used to customize the behavior of the loader.
|
|
165
|
+
"""
|
|
166
|
+
input_data_key: str | None = None
|
|
167
|
+
"""
|
|
168
|
+
Use this parameter to set a custom key to represent your file.
|
|
169
|
+
|
|
170
|
+
When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
|
|
171
|
+
the data from the file is loaded to the `.data` parameter of the `Input`.
|
|
172
|
+
In that case, the type of `.data` is `dict[str, Any]`, where each key
|
|
173
|
+
represents the file name (with extension) and the value is the data that is
|
|
174
|
+
actually loaded from the file using the `loader` function. You can set a
|
|
175
|
+
custom key to represent your file by using this attribute.
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def json_data_file(
|
|
180
|
+
name: str,
|
|
181
|
+
json_configurations: dict[str, Any] | None = None,
|
|
182
|
+
input_data_key: str | None = None,
|
|
183
|
+
) -> DataFile:
|
|
184
|
+
"""
|
|
185
|
+
This is a convenience function to create a `DataFile` that reads JSON data.
|
|
186
|
+
|
|
187
|
+
You can import the `json_data_file` function directly from `nextmv`:
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
from nextmv import json_data_file
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
name : str
|
|
196
|
+
Name of the data file. You don't need to include the `.json` extension.
|
|
197
|
+
json_configurations : dict[str, Any], optional
|
|
198
|
+
JSON-specific configurations for reading the data.
|
|
199
|
+
input_data_key : str, optional
|
|
200
|
+
A custom key to represent the data from this file.
|
|
201
|
+
|
|
202
|
+
When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
|
|
203
|
+
the data from the file is loaded to the `.data` parameter of the `Input`.
|
|
204
|
+
In that case, the type of `.data` is `dict[str, Any]`, where each key
|
|
205
|
+
represents the file name (with extension) and the value is the data that is
|
|
206
|
+
actually loaded from the file using the `loader` function. You can set a
|
|
207
|
+
custom key to represent your file by using this attribute.
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
DataFile
|
|
212
|
+
A `DataFile` instance that reads JSON data from a file with the given
|
|
213
|
+
name.
|
|
214
|
+
|
|
215
|
+
Examples
|
|
216
|
+
--------
|
|
217
|
+
>>> from nextmv import json_data_file
|
|
218
|
+
>>> data_file = json_data_file("my_data")
|
|
219
|
+
>>> data = data_file.read()
|
|
220
|
+
>>> print(data)
|
|
221
|
+
{
|
|
222
|
+
"key": "value",
|
|
223
|
+
"another_key": [1, 2, 3]
|
|
224
|
+
}
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
if not name.endswith(".json"):
|
|
228
|
+
name += ".json"
|
|
229
|
+
|
|
230
|
+
json_configurations = json_configurations or {}
|
|
231
|
+
|
|
232
|
+
def loader(file_path: str) -> dict[str, Any] | Any:
|
|
233
|
+
with open(file_path, encoding="utf-8") as f:
|
|
234
|
+
return json.load(f, **json_configurations)
|
|
235
|
+
|
|
236
|
+
return DataFile(
|
|
237
|
+
name=name,
|
|
238
|
+
loader=loader,
|
|
239
|
+
input_data_key=input_data_key,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def csv_data_file(
|
|
244
|
+
name: str,
|
|
245
|
+
csv_configurations: dict[str, Any] | None = None,
|
|
246
|
+
input_data_key: str | None = None,
|
|
247
|
+
) -> DataFile:
|
|
248
|
+
"""
|
|
249
|
+
This is a convenience function to create a `DataFile` that reads CSV data.
|
|
250
|
+
|
|
251
|
+
You can import the `csv_data_file` function directly from `nextmv`:
|
|
252
|
+
|
|
253
|
+
```python
|
|
254
|
+
from nextmv import csv_data_file
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
name : str
|
|
260
|
+
Name of the data file. You don't need to include the `.csv` extension.
|
|
261
|
+
csv_configurations : dict[str, Any], optional
|
|
262
|
+
CSV-specific configurations for reading the data.
|
|
263
|
+
input_data_key : str, optional
|
|
264
|
+
A custom key to represent the data from this file.
|
|
265
|
+
|
|
266
|
+
When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
|
|
267
|
+
the data from the file is loaded to the `.data` parameter of the `Input`.
|
|
268
|
+
In that case, the type of `.data` is `dict[str, Any]`, where each key
|
|
269
|
+
represents the file name (with extension) and the value is the data that is
|
|
270
|
+
actually loaded from the file using the `loader` function. You can set a
|
|
271
|
+
custom key to represent your file by using this attribute.
|
|
272
|
+
|
|
273
|
+
Returns
|
|
274
|
+
-------
|
|
275
|
+
DataFile
|
|
276
|
+
A `DataFile` instance that reads CSV data from a file with the given
|
|
277
|
+
name.
|
|
278
|
+
|
|
279
|
+
Examples
|
|
280
|
+
--------
|
|
281
|
+
>>> from nextmv import csv_data_file
|
|
282
|
+
>>> data_file = csv_data_file("my_data")
|
|
283
|
+
>>> data = data_file.read()
|
|
284
|
+
>>> print(data)
|
|
285
|
+
[
|
|
286
|
+
{"column1": "value1", "column2": "value2"},
|
|
287
|
+
{"column1": "value3", "column2": "value4"}
|
|
288
|
+
]
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
if not name.endswith(".csv"):
|
|
292
|
+
name += ".csv"
|
|
293
|
+
|
|
294
|
+
csv_configurations = csv_configurations or {}
|
|
295
|
+
|
|
296
|
+
def loader(file_path: str) -> list[dict[str, Any]]:
|
|
297
|
+
with open(file_path, encoding="utf-8") as f:
|
|
298
|
+
return list(csv.DictReader(f, **csv_configurations))
|
|
299
|
+
|
|
300
|
+
return DataFile(
|
|
301
|
+
name=name,
|
|
302
|
+
loader=loader,
|
|
303
|
+
input_data_key=input_data_key,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def text_data_file(name: str, input_data_key: str | None = None) -> DataFile:
|
|
308
|
+
"""
|
|
309
|
+
This is a convenience function to create a `DataFile` that reads utf-8
|
|
310
|
+
encoded text data.
|
|
311
|
+
|
|
312
|
+
You can import the `text_data_file` function directly from `nextmv`:
|
|
313
|
+
|
|
314
|
+
```python
|
|
315
|
+
from nextmv import text_data_file
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
You must provide the extension as part of the `name` parameter.
|
|
319
|
+
|
|
320
|
+
Parameters
|
|
321
|
+
----------
|
|
322
|
+
name : str
|
|
323
|
+
Name of the data file. The file extension must be provided in the name.
|
|
324
|
+
input_data_key : str, optional
|
|
325
|
+
A custom key to represent the data from this file.
|
|
326
|
+
|
|
327
|
+
When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
|
|
328
|
+
the data from the file is loaded to the `.data` parameter of the `Input`.
|
|
329
|
+
In that case, the type of `.data` is `dict[str, Any]`, where each key
|
|
330
|
+
represents the file name (with extension) and the value is the data that is
|
|
331
|
+
actually loaded from the file using the `loader` function. You can set a
|
|
332
|
+
custom key to represent your file by using this attribute.
|
|
333
|
+
|
|
334
|
+
Returns
|
|
335
|
+
-------
|
|
336
|
+
DataFile
|
|
337
|
+
A `DataFile` instance that reads text data from a file with the given
|
|
338
|
+
name.
|
|
339
|
+
|
|
340
|
+
Examples
|
|
341
|
+
--------
|
|
342
|
+
>>> from nextmv import text_data_file
|
|
343
|
+
>>> data_file = text_data_file("my_data")
|
|
344
|
+
>>> data = data_file.read()
|
|
345
|
+
>>> print(data)
|
|
346
|
+
This is some text data.
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
def loader(file_path: str) -> str:
|
|
350
|
+
with open(file_path, encoding="utf-8") as f:
|
|
351
|
+
return f.read().rstrip("\n")
|
|
352
|
+
|
|
353
|
+
return DataFile(
|
|
354
|
+
name=name,
|
|
355
|
+
loader=loader,
|
|
356
|
+
input_data_key=input_data_key,
|
|
357
|
+
)
|
|
26
358
|
|
|
27
359
|
|
|
28
360
|
@dataclass
|
|
@@ -30,37 +362,94 @@ class Input:
|
|
|
30
362
|
"""
|
|
31
363
|
Input for a decision problem.
|
|
32
364
|
|
|
365
|
+
You can import the `Input` class directly from `nextmv`:
|
|
366
|
+
|
|
367
|
+
```python
|
|
368
|
+
from nextmv import Input
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
The `data`'s type must match the `input_format`:
|
|
372
|
+
|
|
373
|
+
- `InputFormat.JSON`: the data is `Union[dict[str, Any], Any]`. This just
|
|
374
|
+
means that the data must be JSON-deserializable, which includes dicts and
|
|
375
|
+
lists.
|
|
376
|
+
- `InputFormat.TEXT`: the data is `str`, and it must be utf-8 encoded.
|
|
377
|
+
- `InputFormat.CSV_ARCHIVE`: the data is `dict[str, list[dict[str, Any]]]`,
|
|
378
|
+
where each key is the name of a CSV file and the value is a list of dicts
|
|
379
|
+
representing the rows in that CSV file.
|
|
380
|
+
- `InputFormat.MULTI_FILE`: the data is `dict[str, Any]`, where for each
|
|
381
|
+
item, the key is the file name (with the extension) and the actual data
|
|
382
|
+
from the file is the value. When working with multi-file, data is loaded
|
|
383
|
+
from one or more files in a specific directory. Given that each file can
|
|
384
|
+
be of different types (JSON, CSV, Excel, etc...), the data captured from
|
|
385
|
+
each might vary. To reflect this, the data is loaded as a dict of items.
|
|
386
|
+
You can have a custom key for the data, that is not the file name, if
|
|
387
|
+
you use the `input_data_key` parameter of the `DataFile` class.
|
|
388
|
+
|
|
33
389
|
Parameters
|
|
34
390
|
----------
|
|
35
|
-
data : Any
|
|
391
|
+
data : Union[Union[dict[str, Any], Any], str, list[dict[str, Any]],
|
|
392
|
+
dict[str, list[dict[str, Any]]], dict[str, Any]]
|
|
36
393
|
The actual data.
|
|
37
394
|
input_format : InputFormat, optional
|
|
38
395
|
Format of the input data. Default is `InputFormat.JSON`.
|
|
39
396
|
options : Options, optional
|
|
40
397
|
Options that the input was created with.
|
|
398
|
+
|
|
399
|
+
Raises
|
|
400
|
+
------
|
|
401
|
+
ValueError
|
|
402
|
+
If the data type doesn't match the expected type for the given format.
|
|
403
|
+
ValueError
|
|
404
|
+
If the `input_format` is not one of the supported formats.
|
|
41
405
|
"""
|
|
42
406
|
|
|
43
|
-
data:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
407
|
+
data: dict[str, Any] | Any | str | list[dict[str, Any]] | dict[str, list[dict[str, Any]]] | dict[str, Any]
|
|
408
|
+
"""
|
|
409
|
+
The actual data.
|
|
410
|
+
|
|
411
|
+
The data can be of various types, depending on the input format:
|
|
412
|
+
|
|
413
|
+
- For `JSON`: `Union[dict[str, Any], Any]`
|
|
414
|
+
- For `TEXT`: `str`
|
|
415
|
+
- For `CSV`: `list[dict[str, Any]]`
|
|
416
|
+
- For `CSV_ARCHIVE`: `dict[str, list[dict[str, Any]]]`
|
|
417
|
+
- For `MULTI_FILE`: `dict[str, Any]`
|
|
418
|
+
"""
|
|
419
|
+
|
|
420
|
+
input_format: InputFormat | None = InputFormat.JSON
|
|
421
|
+
"""
|
|
422
|
+
Format of the input data.
|
|
423
|
+
|
|
424
|
+
Default is `InputFormat.JSON`.
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
options: Options | None = None
|
|
428
|
+
"""
|
|
429
|
+
Options that the `Input` was created with.
|
|
51
430
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
"""Options that the `Input` were created with."""
|
|
431
|
+
A copy of the options is made during initialization, ensuring the original
|
|
432
|
+
options remain unchanged even if modified later.
|
|
433
|
+
"""
|
|
56
434
|
|
|
57
435
|
def __post_init__(self):
|
|
58
|
-
"""
|
|
59
|
-
class.
|
|
436
|
+
"""
|
|
437
|
+
Check that the data matches the format given to initialize the class.
|
|
438
|
+
|
|
439
|
+
This method is automatically called after the dataclass is initialized.
|
|
440
|
+
It validates that the data provided is of the correct type according to
|
|
441
|
+
the specified input_format and makes a deep copy of the options to ensure
|
|
442
|
+
the input maintains its own copy.
|
|
443
|
+
|
|
444
|
+
Raises
|
|
445
|
+
------
|
|
446
|
+
ValueError
|
|
447
|
+
If the data type doesn't match the expected type for the given format.
|
|
448
|
+
"""
|
|
60
449
|
|
|
61
450
|
if self.input_format == InputFormat.JSON:
|
|
62
451
|
try:
|
|
63
|
-
_ =
|
|
452
|
+
_ = serialize_json(self.data)
|
|
64
453
|
except (TypeError, OverflowError) as e:
|
|
65
454
|
raise ValueError(
|
|
66
455
|
f"Input has input_format InputFormat.JSON and "
|
|
@@ -73,16 +462,16 @@ class Input:
|
|
|
73
462
|
"input_format InputFormat.TEXT, supported type is `str`"
|
|
74
463
|
)
|
|
75
464
|
|
|
76
|
-
elif self.input_format == InputFormat.
|
|
465
|
+
elif self.input_format == InputFormat.CSV_ARCHIVE and not isinstance(self.data, dict):
|
|
77
466
|
raise ValueError(
|
|
78
467
|
f"unsupported Input.data type: {type(self.data)} with "
|
|
79
|
-
"input_format InputFormat.
|
|
468
|
+
"input_format InputFormat.CSV_ARCHIVE, supported type is `dict`"
|
|
80
469
|
)
|
|
81
470
|
|
|
82
|
-
elif self.input_format == InputFormat.
|
|
471
|
+
elif self.input_format == InputFormat.MULTI_FILE and not isinstance(self.data, dict):
|
|
83
472
|
raise ValueError(
|
|
84
473
|
f"unsupported Input.data type: {type(self.data)} with "
|
|
85
|
-
"input_format InputFormat.
|
|
474
|
+
"input_format InputFormat.MULTI_FILE, supported type is `dict`"
|
|
86
475
|
)
|
|
87
476
|
|
|
88
477
|
# Capture a snapshot of the options that were used to create the class
|
|
@@ -91,14 +480,69 @@ class Input:
|
|
|
91
480
|
new_options = copy.deepcopy(init_options)
|
|
92
481
|
self.options = new_options
|
|
93
482
|
|
|
483
|
+
def to_dict(self) -> dict[str, Any]:
|
|
484
|
+
"""
|
|
485
|
+
Convert the input to a dictionary.
|
|
486
|
+
|
|
487
|
+
This method serializes the Input object to a dictionary format that can
|
|
488
|
+
be easily converted to JSON or other serialization formats. When the
|
|
489
|
+
`input_type` is set to `InputFormat.MULTI_FILE`, it will not include
|
|
490
|
+
the `data` field, as it is uncertain how data is deserialized from the file.
|
|
491
|
+
|
|
492
|
+
Returns
|
|
493
|
+
-------
|
|
494
|
+
dict[str, Any]
|
|
495
|
+
A dictionary containing the input data, format, and options.
|
|
496
|
+
|
|
497
|
+
The structure is:
|
|
498
|
+
```python
|
|
499
|
+
{
|
|
500
|
+
"data": <the input data>,
|
|
501
|
+
"input_format": <the input format as a string>,
|
|
502
|
+
"options": <the options as a dictionary or None>
|
|
503
|
+
}
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
Examples
|
|
507
|
+
--------
|
|
508
|
+
>>> from nextmv.input import Input, InputFormat
|
|
509
|
+
>>> input_obj = Input(data={"key": "value"}, input_format=InputFormat.JSON)
|
|
510
|
+
>>> input_dict = input_obj.to_dict()
|
|
511
|
+
>>> print(input_dict)
|
|
512
|
+
{'data': {'key': 'value'}, 'input_format': 'json', 'options': None}
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
input_dict = {
|
|
516
|
+
"input_format": self.input_format.value,
|
|
517
|
+
"options": self.options.to_dict() if self.options is not None else None,
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
if self.input_format == InputFormat.MULTI_FILE:
|
|
521
|
+
return input_dict
|
|
522
|
+
|
|
523
|
+
input_dict["data"] = self.data
|
|
524
|
+
|
|
525
|
+
return input_dict
|
|
526
|
+
|
|
94
527
|
|
|
95
528
|
class InputLoader:
|
|
96
|
-
"""
|
|
529
|
+
"""
|
|
530
|
+
Base class for loading inputs.
|
|
531
|
+
|
|
532
|
+
You can import the `InputLoader` class directly from `nextmv`:
|
|
533
|
+
|
|
534
|
+
```python
|
|
535
|
+
from nextmv import InputLoader
|
|
536
|
+
```
|
|
537
|
+
|
|
538
|
+
This abstract class defines the interface for input loaders. Subclasses must
|
|
539
|
+
implement the `load` method to provide concrete input loading functionality.
|
|
540
|
+
"""
|
|
97
541
|
|
|
98
542
|
def load(
|
|
99
543
|
self,
|
|
100
544
|
input_format: InputFormat = InputFormat.JSON,
|
|
101
|
-
options:
|
|
545
|
+
options: Options | None = None,
|
|
102
546
|
*args,
|
|
103
547
|
**kwargs,
|
|
104
548
|
) -> Input:
|
|
@@ -133,20 +577,82 @@ class InputLoader:
|
|
|
133
577
|
|
|
134
578
|
class LocalInputLoader(InputLoader):
|
|
135
579
|
"""
|
|
136
|
-
Class for loading local inputs.
|
|
137
|
-
|
|
580
|
+
Class for loading local inputs.
|
|
581
|
+
|
|
582
|
+
You can import the `LocalInputLoader` class directly from `nextmv`:
|
|
583
|
+
|
|
584
|
+
```python
|
|
585
|
+
from nextmv import LocalInputLoader
|
|
586
|
+
```
|
|
587
|
+
|
|
588
|
+
This class can load input data from the local filesystem, by using stdin,
|
|
589
|
+
a file, or a directory, where applicable. It supports various input formats
|
|
590
|
+
like JSON, TEXT, CSV, and CSV archive.
|
|
591
|
+
|
|
138
592
|
Call the `load` method to read the input data.
|
|
593
|
+
|
|
594
|
+
Examples
|
|
595
|
+
--------
|
|
596
|
+
>>> from nextmv.input import LocalInputLoader, InputFormat
|
|
597
|
+
>>> loader = LocalInputLoader()
|
|
598
|
+
>>> # Load JSON from stdin or file
|
|
599
|
+
>>> input_obj = loader.load(input_format=InputFormat.JSON, path="data.json")
|
|
139
600
|
"""
|
|
140
601
|
|
|
141
602
|
def _read_text(path: str, _) -> str:
|
|
603
|
+
"""
|
|
604
|
+
Read a text file and return its contents.
|
|
605
|
+
|
|
606
|
+
Parameters
|
|
607
|
+
----------
|
|
608
|
+
path : str
|
|
609
|
+
Path to the text file.
|
|
610
|
+
_ : Any
|
|
611
|
+
Placeholder for unused parameter (for API consistency).
|
|
612
|
+
|
|
613
|
+
Returns
|
|
614
|
+
-------
|
|
615
|
+
str
|
|
616
|
+
Contents of the text file with trailing newlines removed.
|
|
617
|
+
"""
|
|
142
618
|
with open(path, encoding="utf-8") as f:
|
|
143
619
|
return f.read().rstrip("\n")
|
|
144
620
|
|
|
145
|
-
def _read_csv(path: str, csv_configurations:
|
|
621
|
+
def _read_csv(path: str, csv_configurations: dict[str, Any] | None) -> list[dict[str, Any]]:
|
|
622
|
+
"""
|
|
623
|
+
Read a CSV file and return its contents as a list of dictionaries.
|
|
624
|
+
|
|
625
|
+
Parameters
|
|
626
|
+
----------
|
|
627
|
+
path : str
|
|
628
|
+
Path to the CSV file.
|
|
629
|
+
csv_configurations : dict[str, Any], optional
|
|
630
|
+
Configuration parameters for the CSV DictReader.
|
|
631
|
+
|
|
632
|
+
Returns
|
|
633
|
+
-------
|
|
634
|
+
list[dict[str, Any]]
|
|
635
|
+
List of dictionaries where each dictionary represents a row in the CSV.
|
|
636
|
+
"""
|
|
146
637
|
with open(path, encoding="utf-8") as f:
|
|
147
638
|
return list(csv.DictReader(f, **csv_configurations))
|
|
148
639
|
|
|
149
|
-
def _read_json(path: str, _) ->
|
|
640
|
+
def _read_json(path: str, _) -> dict[str, Any] | Any:
|
|
641
|
+
"""
|
|
642
|
+
Read a JSON file and return its parsed contents.
|
|
643
|
+
|
|
644
|
+
Parameters
|
|
645
|
+
----------
|
|
646
|
+
path : str
|
|
647
|
+
Path to the JSON file.
|
|
648
|
+
_ : Any
|
|
649
|
+
Placeholder for unused parameter (for API consistency).
|
|
650
|
+
|
|
651
|
+
Returns
|
|
652
|
+
-------
|
|
653
|
+
Union[dict[str, Any], Any]
|
|
654
|
+
Parsed JSON data.
|
|
655
|
+
"""
|
|
150
656
|
with open(path, encoding="utf-8") as f:
|
|
151
657
|
return json.load(f)
|
|
152
658
|
|
|
@@ -154,43 +660,60 @@ class LocalInputLoader(InputLoader):
|
|
|
154
660
|
STDIN_READERS = {
|
|
155
661
|
InputFormat.JSON: lambda _: json.load(sys.stdin),
|
|
156
662
|
InputFormat.TEXT: lambda _: sys.stdin.read().rstrip("\n"),
|
|
157
|
-
InputFormat.CSV: lambda csv_configurations: list(csv.DictReader(sys.stdin, **csv_configurations)),
|
|
158
663
|
}
|
|
664
|
+
"""
|
|
665
|
+
Dictionary of functions to read from standard input.
|
|
666
|
+
|
|
667
|
+
Each key is an InputFormat, and each value is a function that reads from
|
|
668
|
+
standard input in that format.
|
|
669
|
+
"""
|
|
670
|
+
|
|
159
671
|
# These callbacks were not implemented with lambda because we needed
|
|
160
672
|
# multiple lines. By using `open`, we needed the `with` to be able to close
|
|
161
673
|
# the file.
|
|
162
674
|
FILE_READERS = {
|
|
163
675
|
InputFormat.JSON: _read_json,
|
|
164
676
|
InputFormat.TEXT: _read_text,
|
|
165
|
-
|
|
677
|
+
"CSV": _read_csv,
|
|
166
678
|
}
|
|
679
|
+
"""
|
|
680
|
+
Dictionary of functions to read from files.
|
|
681
|
+
|
|
682
|
+
Each key is an InputFormat, and each value is a function that reads from
|
|
683
|
+
a file in that format.
|
|
684
|
+
"""
|
|
167
685
|
|
|
168
686
|
def load(
|
|
169
687
|
self,
|
|
170
|
-
input_format:
|
|
171
|
-
options:
|
|
172
|
-
path:
|
|
173
|
-
csv_configurations:
|
|
688
|
+
input_format: InputFormat | None = InputFormat.JSON,
|
|
689
|
+
options: Options | None = None,
|
|
690
|
+
path: str | None = None,
|
|
691
|
+
csv_configurations: dict[str, Any] | None = None,
|
|
692
|
+
data_files: list[DataFile] | None = None,
|
|
174
693
|
) -> Input:
|
|
175
694
|
"""
|
|
176
695
|
Load the input data. The input data can be in various formats. For
|
|
177
|
-
`InputFormat.JSON
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
696
|
+
`InputFormat.JSON` and `InputFormat.TEXT`, the data can be streamed
|
|
697
|
+
from stdin or read from a file. When the `path` argument is provided
|
|
698
|
+
(and valid), the input data is read from the file specified by `path`,
|
|
699
|
+
otherwise, it is streamed from stdin. For `InputFormat.CSV_ARCHIVE`,
|
|
700
|
+
the input data is read from the directory specified by `path`. If the
|
|
701
|
+
`path` is not provided, the default location `input` is used. The
|
|
702
|
+
directory should contain one or more files, where each file in the
|
|
703
|
+
directory is a CSV file.
|
|
185
704
|
|
|
186
705
|
The `Input` that is returned contains the `data` attribute. This data
|
|
187
706
|
can be of different types, depending on the provided `input_format`:
|
|
188
707
|
|
|
189
708
|
- `InputFormat.JSON`: the data is a `dict[str, Any]`.
|
|
190
709
|
- `InputFormat.TEXT`: the data is a `str`.
|
|
191
|
-
- `InputFormat.
|
|
192
|
-
|
|
193
|
-
|
|
710
|
+
- `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str,
|
|
711
|
+
Any]]]`. Each key is the name of the CSV file, minus the `.csv`
|
|
712
|
+
extension.
|
|
713
|
+
- `InputFormat.MULTI_FILE`: the data is a `dict[str, Any]`, where each
|
|
714
|
+
key is the file name (with extension) and the value is the data read
|
|
715
|
+
from the file. The data can be of any type, depending on the file
|
|
716
|
+
type and the reader function provided in the `DataFile` instances.
|
|
194
717
|
|
|
195
718
|
Parameters
|
|
196
719
|
----------
|
|
@@ -204,6 +727,16 @@ class LocalInputLoader(InputLoader):
|
|
|
204
727
|
Configurations for loading CSV files. The default `DictReader` is
|
|
205
728
|
used when loading a CSV file, so you have the option to pass in a
|
|
206
729
|
dictionary with custom kwargs for the `DictReader`.
|
|
730
|
+
data_files : list[DataFile], optional
|
|
731
|
+
List of `DataFile` instances to read from. This is used when the
|
|
732
|
+
`input_format` is set to `InputFormat.MULTI_FILE`. Each `DataFile`
|
|
733
|
+
instance should have a `name` (the file name with extension) and a
|
|
734
|
+
`loader` function that reads the data from the file. The `loader`
|
|
735
|
+
function should accept the file path as its first argument and
|
|
736
|
+
return the data read from the file. The `loader` can also accept
|
|
737
|
+
additional positional and keyword arguments, which can be provided
|
|
738
|
+
through the `loader_args` and `loader_kwargs` attributes of the
|
|
739
|
+
`DataFile` instance.
|
|
207
740
|
|
|
208
741
|
Returns
|
|
209
742
|
-------
|
|
@@ -220,23 +753,50 @@ class LocalInputLoader(InputLoader):
|
|
|
220
753
|
if csv_configurations is None:
|
|
221
754
|
csv_configurations = {}
|
|
222
755
|
|
|
223
|
-
if input_format in [InputFormat.JSON, InputFormat.TEXT
|
|
756
|
+
if input_format in [InputFormat.JSON, InputFormat.TEXT]:
|
|
224
757
|
data = self._load_utf8_encoded(path=path, input_format=input_format, csv_configurations=csv_configurations)
|
|
225
758
|
elif input_format == InputFormat.CSV_ARCHIVE:
|
|
226
759
|
data = self._load_archive(path=path, csv_configurations=csv_configurations)
|
|
760
|
+
elif input_format == InputFormat.MULTI_FILE:
|
|
761
|
+
if data_files is None:
|
|
762
|
+
raise ValueError("data_files must be provided when input_format is InputFormat.MULTI_FILE")
|
|
763
|
+
|
|
764
|
+
if not isinstance(data_files, list):
|
|
765
|
+
raise ValueError("data_files must be a list of DataFile instances")
|
|
766
|
+
|
|
767
|
+
data = self._load_multi_file(data_files=data_files, path=path)
|
|
227
768
|
|
|
228
769
|
return Input(data=data, input_format=input_format, options=options)
|
|
229
770
|
|
|
230
771
|
def _load_utf8_encoded(
|
|
231
772
|
self,
|
|
232
|
-
csv_configurations:
|
|
233
|
-
path:
|
|
234
|
-
input_format:
|
|
773
|
+
csv_configurations: dict[str, Any] | None,
|
|
774
|
+
path: str | None = None,
|
|
775
|
+
input_format: InputFormat | str | None = InputFormat.JSON,
|
|
235
776
|
use_file_reader: bool = False,
|
|
236
|
-
) ->
|
|
777
|
+
) -> dict[str, Any] | str | list[dict[str, Any]]:
|
|
237
778
|
"""
|
|
238
|
-
Load a utf-8 encoded file
|
|
239
|
-
|
|
779
|
+
Load a utf-8 encoded file from stdin or filesystem.
|
|
780
|
+
|
|
781
|
+
This internal method handles loading data in various formats from either
|
|
782
|
+
standard input or a file.
|
|
783
|
+
|
|
784
|
+
Parameters
|
|
785
|
+
----------
|
|
786
|
+
csv_configurations : dict[str, Any], optional
|
|
787
|
+
Configuration parameters for the CSV DictReader.
|
|
788
|
+
path : str, optional
|
|
789
|
+
Path to the file to read from. If None or empty, reads from stdin.
|
|
790
|
+
input_format : InputFormat, optional
|
|
791
|
+
Format of the input data. Default is JSON.
|
|
792
|
+
use_file_reader : bool, optional
|
|
793
|
+
Whether to force using the file reader even if path is None.
|
|
794
|
+
Default is False.
|
|
795
|
+
|
|
796
|
+
Returns
|
|
797
|
+
-------
|
|
798
|
+
Union[dict[str, Any], str, list[dict[str, Any]]]
|
|
799
|
+
Data read from stdin or file in the specified format.
|
|
240
800
|
"""
|
|
241
801
|
|
|
242
802
|
# If we forcibly want to use the file reader, we can do so.
|
|
@@ -252,11 +812,33 @@ class LocalInputLoader(InputLoader):
|
|
|
252
812
|
|
|
253
813
|
def _load_archive(
|
|
254
814
|
self,
|
|
255
|
-
csv_configurations:
|
|
256
|
-
path:
|
|
815
|
+
csv_configurations: dict[str, Any] | None,
|
|
816
|
+
path: str | None = None,
|
|
257
817
|
) -> dict[str, list[dict[str, Any]]]:
|
|
258
818
|
"""
|
|
259
|
-
Load files from a directory.
|
|
819
|
+
Load CSV files from a directory.
|
|
820
|
+
|
|
821
|
+
This internal method loads all CSV files from a specified directory,
|
|
822
|
+
organizing them into a dictionary where each key is the filename
|
|
823
|
+
(without .csv extension) and each value is the parsed CSV content.
|
|
824
|
+
|
|
825
|
+
Parameters
|
|
826
|
+
----------
|
|
827
|
+
csv_configurations : dict[str, Any], optional
|
|
828
|
+
Configuration parameters for the CSV DictReader.
|
|
829
|
+
path : str, optional
|
|
830
|
+
Path to the directory containing CSV files. If None or empty,
|
|
831
|
+
uses "./input" as the default directory.
|
|
832
|
+
|
|
833
|
+
Returns
|
|
834
|
+
-------
|
|
835
|
+
dict[str, list[dict[str, Any]]]
|
|
836
|
+
Dictionary mapping filenames to CSV contents.
|
|
837
|
+
|
|
838
|
+
Raises
|
|
839
|
+
------
|
|
840
|
+
ValueError
|
|
841
|
+
If the path is not a directory or the default directory doesn't exist.
|
|
260
842
|
"""
|
|
261
843
|
|
|
262
844
|
dir_path = "input"
|
|
@@ -276,43 +858,104 @@ class LocalInputLoader(InputLoader):
|
|
|
276
858
|
stripped = file.removesuffix(csv_ext)
|
|
277
859
|
data[stripped] = self._load_utf8_encoded(
|
|
278
860
|
path=os.path.join(dir_path, file),
|
|
279
|
-
input_format=
|
|
861
|
+
input_format="CSV",
|
|
280
862
|
use_file_reader=True,
|
|
281
863
|
csv_configurations=csv_configurations,
|
|
282
864
|
)
|
|
283
865
|
|
|
284
866
|
return data
|
|
285
867
|
|
|
868
|
+
def _load_multi_file(
|
|
869
|
+
self,
|
|
870
|
+
data_files: list[DataFile],
|
|
871
|
+
path: str | None = None,
|
|
872
|
+
) -> dict[str, Any]:
|
|
873
|
+
"""
|
|
874
|
+
Load multiple files from a directory.
|
|
875
|
+
|
|
876
|
+
This internal method loads all supported files from a specified
|
|
877
|
+
directory, organizing them into a dictionary where each key is the
|
|
878
|
+
filename and each value is the parsed file content. Supports CSV files
|
|
879
|
+
(parsed as list of dictionaries), JSON files (parsed as JSON objects),
|
|
880
|
+
and any other utf-8 encoded text files (loaded as plain text strings).
|
|
881
|
+
It also supports Excel files, loading them as DataFrames.
|
|
882
|
+
|
|
883
|
+
Parameters
|
|
884
|
+
----------
|
|
885
|
+
data_files : list[DataFile]
|
|
886
|
+
List of `DataFile` instances to read from.
|
|
887
|
+
path : str, optional
|
|
888
|
+
Path to the directory containing files. If None or empty,
|
|
889
|
+
uses "./inputs" as the default directory.
|
|
890
|
+
|
|
891
|
+
Returns
|
|
892
|
+
-------
|
|
893
|
+
dict[str, Any]
|
|
894
|
+
Dictionary mapping filenames to file contents. CSV files are loaded
|
|
895
|
+
as lists of dictionaries, JSON files as parsed JSON objects, and
|
|
896
|
+
other utf-8 text files as strings. Excel files are loaded as
|
|
897
|
+
DataFrames.
|
|
898
|
+
|
|
899
|
+
Raises
|
|
900
|
+
------
|
|
901
|
+
ValueError
|
|
902
|
+
If the path is not a directory or the default directory doesn't exist.
|
|
903
|
+
"""
|
|
904
|
+
|
|
905
|
+
dir_path = INPUTS_KEY
|
|
906
|
+
if path is not None and path != "":
|
|
907
|
+
if not os.path.isdir(path):
|
|
908
|
+
raise ValueError(f"path {path} is not a directory")
|
|
909
|
+
|
|
910
|
+
dir_path = path
|
|
911
|
+
|
|
912
|
+
if not os.path.isdir(dir_path):
|
|
913
|
+
raise ValueError(f'expected input directoy "{dir_path}" to exist as a default location')
|
|
914
|
+
|
|
915
|
+
data = {}
|
|
916
|
+
|
|
917
|
+
for data_file in data_files:
|
|
918
|
+
name = data_file.name
|
|
919
|
+
file_path = os.path.join(dir_path, name)
|
|
920
|
+
|
|
921
|
+
if data_file.loader_args is None:
|
|
922
|
+
data_file.loader_args = []
|
|
923
|
+
if data_file.loader_kwargs is None:
|
|
924
|
+
data_file.loader_kwargs = {}
|
|
925
|
+
|
|
926
|
+
d = data_file.loader(
|
|
927
|
+
file_path,
|
|
928
|
+
*data_file.loader_args,
|
|
929
|
+
**data_file.loader_kwargs,
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
key = name
|
|
933
|
+
if data_file.input_data_key is not None:
|
|
934
|
+
key = data_file.input_data_key
|
|
935
|
+
|
|
936
|
+
if data.get(key) is not None:
|
|
937
|
+
raise ValueError(f"Duplicate input data key found: {key}")
|
|
938
|
+
|
|
939
|
+
data[key] = d
|
|
940
|
+
|
|
941
|
+
return data
|
|
942
|
+
|
|
286
943
|
|
|
287
944
|
def load_local(
|
|
288
|
-
input_format:
|
|
289
|
-
options:
|
|
290
|
-
path:
|
|
291
|
-
csv_configurations:
|
|
945
|
+
input_format: InputFormat | None = InputFormat.JSON,
|
|
946
|
+
options: Options | None = None,
|
|
947
|
+
path: str | None = None,
|
|
948
|
+
csv_configurations: dict[str, Any] | None = None,
|
|
292
949
|
) -> Input:
|
|
293
950
|
"""
|
|
951
|
+
!!! warning
|
|
952
|
+
`load_local` is deprecated, use `load` instead.
|
|
953
|
+
|
|
954
|
+
Load input data from local sources.
|
|
955
|
+
|
|
294
956
|
This is a convenience function for instantiating a `LocalInputLoader`
|
|
295
957
|
and calling its `load` method.
|
|
296
958
|
|
|
297
|
-
Load the input data. The input data can be in various formats. For
|
|
298
|
-
`InputFormat.JSON`, `InputFormat.TEXT`, and `InputFormat.CSV`, the data can
|
|
299
|
-
be streamed from stdin or read from a file. When the `path` argument is
|
|
300
|
-
provided (and valid), the input data is read from the file specified by
|
|
301
|
-
`path`, otherwise, it is streamed from stdin. For
|
|
302
|
-
`InputFormat.CSV_ARCHIVE`, the input data is read from the directory
|
|
303
|
-
specified by `path`. If the `path` is not provided, the default location
|
|
304
|
-
`input` is used. The directory should contain one or more files, where each
|
|
305
|
-
file in the directory is a CSV file.
|
|
306
|
-
|
|
307
|
-
The `Input` that is returned contains the `data` attribute. This data can
|
|
308
|
-
be of different types, depending on the provided `input_format`:
|
|
309
|
-
|
|
310
|
-
- `InputFormat.JSON`: the data is a `dict[str, Any]`.
|
|
311
|
-
- `InputFormat.TEXT`: the data is a `str`.
|
|
312
|
-
- `InputFormat.CSV`: the data is a `list[dict[str, Any]]`.
|
|
313
|
-
- `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str, Any]]]`.
|
|
314
|
-
Each key is the name of the CSV file, minus the `.csv` extension.
|
|
315
|
-
|
|
316
959
|
Parameters
|
|
317
960
|
----------
|
|
318
961
|
input_format : InputFormat, optional
|
|
@@ -322,20 +965,148 @@ def load_local(
|
|
|
322
965
|
path : str, optional
|
|
323
966
|
Path to the input data.
|
|
324
967
|
csv_configurations : dict[str, Any], optional
|
|
325
|
-
Configurations for loading CSV files.
|
|
326
|
-
|
|
327
|
-
with custom kwargs for the `DictReader`.
|
|
968
|
+
Configurations for loading CSV files. Custom kwargs for
|
|
969
|
+
Python's `csv.DictReader`.
|
|
328
970
|
|
|
329
971
|
Returns
|
|
330
972
|
-------
|
|
331
973
|
Input
|
|
332
|
-
The input data.
|
|
974
|
+
The loaded input data in an Input object.
|
|
333
975
|
|
|
334
976
|
Raises
|
|
335
977
|
------
|
|
336
978
|
ValueError
|
|
337
|
-
If the path is
|
|
979
|
+
If the path is invalid or data format is incorrect.
|
|
980
|
+
|
|
981
|
+
See Also
|
|
982
|
+
--------
|
|
983
|
+
load : The recommended function to use instead.
|
|
338
984
|
"""
|
|
339
985
|
|
|
986
|
+
deprecated(
|
|
987
|
+
name="load_local",
|
|
988
|
+
reason="`load_local` is deprecated, use `load` instead",
|
|
989
|
+
)
|
|
990
|
+
|
|
340
991
|
loader = LocalInputLoader()
|
|
341
992
|
return loader.load(input_format, options, path, csv_configurations)
|
|
993
|
+
|
|
994
|
+
|
|
995
|
+
_LOCAL_INPUT_LOADER = LocalInputLoader()
|
|
996
|
+
"""Default instance of LocalInputLoader used by the load function."""
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
def load(
|
|
1000
|
+
input_format: InputFormat | None = InputFormat.JSON,
|
|
1001
|
+
options: Options | None = None,
|
|
1002
|
+
path: str | None = None,
|
|
1003
|
+
csv_configurations: dict[str, Any] | None = None,
|
|
1004
|
+
loader: InputLoader | None = _LOCAL_INPUT_LOADER,
|
|
1005
|
+
data_files: list[DataFile] | None = None,
|
|
1006
|
+
) -> Input:
|
|
1007
|
+
"""
|
|
1008
|
+
Load input data using the specified loader.
|
|
1009
|
+
|
|
1010
|
+
You can import the `load` function directly from `nextmv`:
|
|
1011
|
+
|
|
1012
|
+
```python
|
|
1013
|
+
from nextmv import load
|
|
1014
|
+
```
|
|
1015
|
+
|
|
1016
|
+
This is a convenience function for loading an `Input` object. By default,
|
|
1017
|
+
it uses the `LocalInputLoader` to load data from local sources.
|
|
1018
|
+
|
|
1019
|
+
The input data can be in various formats and can be loaded from different
|
|
1020
|
+
sources depending on the loader:
|
|
1021
|
+
|
|
1022
|
+
- `InputFormat.JSON`: the data is a `dict[str, Any]`
|
|
1023
|
+
- `InputFormat.TEXT`: the data is a `str`
|
|
1024
|
+
- `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str, Any]]]`
|
|
1025
|
+
Each key is the name of the CSV file, minus the `.csv` extension.
|
|
1026
|
+
- `InputFormat.MULTI_FILE`: the data is a `dict[str, Any]`
|
|
1027
|
+
where each key is the file name (with extension) and the value is the
|
|
1028
|
+
data read from the file. This is used for loading multiple files in a
|
|
1029
|
+
single input, where each file can be of different types (JSON, CSV,
|
|
1030
|
+
Excel, etc.). The data is loaded as a dict of items, where each item
|
|
1031
|
+
corresponds to a file and its content.
|
|
1032
|
+
|
|
1033
|
+
When specifying `input_format` as `InputFormat.MULTI_FILE`, the
|
|
1034
|
+
`data_files` argument must be provided. This argument is a list of
|
|
1035
|
+
`DataFile` instances, each representing a file to be read. Each `DataFile`
|
|
1036
|
+
instance should have a `name` (the file name with extension) and a `loader`
|
|
1037
|
+
function that reads the data from the file. The `loader` function should
|
|
1038
|
+
accept the file path as its first argument and return the data read from
|
|
1039
|
+
the file. The `loader` can also accept additional positional and keyword
|
|
1040
|
+
arguments, which can be provided through the `loader_args` and
|
|
1041
|
+
`loader_kwargs` attributes of the `DataFile` instance.
|
|
1042
|
+
|
|
1043
|
+
There are convenience functions that can be used to create `DataFile`
|
|
1044
|
+
classes, such as:
|
|
1045
|
+
|
|
1046
|
+
- `json_data_file`: Creates a `DataFile` that reads JSON data.
|
|
1047
|
+
- `csv_data_file`: Creates a `DataFile` that reads CSV data.
|
|
1048
|
+
- `text_data_file`: Creates a `DataFile` that reads utf-8 encoded text
|
|
1049
|
+
data.
|
|
1050
|
+
|
|
1051
|
+
When workiing with data in other formats, such as Excel files, you are
|
|
1052
|
+
encouraged to create your own `DataFile` objects with your own
|
|
1053
|
+
implementation of the `loader` function. This allows you to read data
|
|
1054
|
+
from files in a way that suits your needs, while still adhering to the
|
|
1055
|
+
`DataFile` interface.
|
|
1056
|
+
|
|
1057
|
+
Parameters
|
|
1058
|
+
----------
|
|
1059
|
+
input_format : InputFormat, optional
|
|
1060
|
+
Format of the input data. Default is `InputFormat.JSON`.
|
|
1061
|
+
options : Options, optional
|
|
1062
|
+
Options for loading the input data.
|
|
1063
|
+
path : str, optional
|
|
1064
|
+
Path to the input data. For file-based loaders:
|
|
1065
|
+
- If provided, reads from the specified file or directory
|
|
1066
|
+
- If None, typically reads from stdin (for JSON, TEXT, CSV)
|
|
1067
|
+
or uses a default directory (for CSV_ARCHIVE)
|
|
1068
|
+
csv_configurations : dict[str, Any], optional
|
|
1069
|
+
Configurations for loading CSV files. Custom kwargs for
|
|
1070
|
+
Python's `csv.DictReader`.
|
|
1071
|
+
loader : InputLoader, optional
|
|
1072
|
+
The loader to use for loading the input data.
|
|
1073
|
+
Default is an instance of `LocalInputLoader`.
|
|
1074
|
+
data_files : list[DataFile], optional
|
|
1075
|
+
List of `DataFile` instances to read from. This is used when the
|
|
1076
|
+
`input_format` is set to `InputFormat.MULTI_FILE`. Each `DataFile`
|
|
1077
|
+
instance should have a `name` (the file name with extension) and a
|
|
1078
|
+
`loader` function that reads the data from the file. The `loader`
|
|
1079
|
+
function should accept the file path as its first argument and return
|
|
1080
|
+
the data read from the file. The `loader` can also accept additional
|
|
1081
|
+
positional and keyword arguments, which can be provided through the
|
|
1082
|
+
`loader_args` and `loader_kwargs` attributes of the `DataFile`
|
|
1083
|
+
instance.
|
|
1084
|
+
|
|
1085
|
+
There are convenience functions that can be used to create `DataFile`
|
|
1086
|
+
classes, such as `json_data_file`, `csv_data_file`, and
|
|
1087
|
+
`text_data_file`. When working with data in other formats, such as
|
|
1088
|
+
Excel files, you are encouraged to create your own `DataFile` objects
|
|
1089
|
+
with your own implementation of the `loader` function. This allows you
|
|
1090
|
+
to read data from files in a way that suits your needs, while still
|
|
1091
|
+
adhering to the `DataFile` interface.
|
|
1092
|
+
|
|
1093
|
+
Returns
|
|
1094
|
+
-------
|
|
1095
|
+
Input
|
|
1096
|
+
The loaded input data in an Input object.
|
|
1097
|
+
|
|
1098
|
+
Raises
|
|
1099
|
+
------
|
|
1100
|
+
ValueError
|
|
1101
|
+
If the path is invalid or data format is incorrect.
|
|
1102
|
+
|
|
1103
|
+
Examples
|
|
1104
|
+
--------
|
|
1105
|
+
>>> from nextmv.input import load, InputFormat
|
|
1106
|
+
>>> # Load JSON from stdin
|
|
1107
|
+
>>> input_obj = load(input_format=InputFormat.JSON)
|
|
1108
|
+
>>> # Load CSV archive from a directory
|
|
1109
|
+
>>> input_obj = load(input_format=InputFormat.CSV_ARCHIVE, path="input_dir")
|
|
1110
|
+
"""
|
|
1111
|
+
|
|
1112
|
+
return loader.load(input_format, options, path, csv_configurations, data_files)
|