nextmv 0.28.5__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextmv/__about__.py +1 -1
- nextmv/__init__.py +8 -0
- nextmv/cloud/application.py +210 -21
- nextmv/cloud/client.py +28 -9
- nextmv/cloud/manifest.py +142 -14
- nextmv/cloud/package.py +1 -1
- nextmv/cloud/run.py +34 -0
- nextmv/input.py +476 -6
- nextmv/model.py +12 -3
- nextmv/options.py +88 -0
- nextmv/output.py +535 -51
- {nextmv-0.28.5.dist-info → nextmv-0.29.0.dist-info}/METADATA +13 -1
- {nextmv-0.28.5.dist-info → nextmv-0.29.0.dist-info}/RECORD +15 -15
- {nextmv-0.28.5.dist-info → nextmv-0.29.0.dist-info}/WHEEL +0 -0
- {nextmv-0.28.5.dist-info → nextmv-0.29.0.dist-info}/licenses/LICENSE +0 -0
nextmv/input.py
CHANGED
|
@@ -27,6 +27,7 @@ import csv
|
|
|
27
27
|
import json
|
|
28
28
|
import os
|
|
29
29
|
import sys
|
|
30
|
+
from collections.abc import Callable
|
|
30
31
|
from dataclasses import dataclass
|
|
31
32
|
from enum import Enum
|
|
32
33
|
from typing import Any, Optional, Union
|
|
@@ -58,6 +59,8 @@ class InputFormat(str, Enum):
|
|
|
58
59
|
CSV format, utf-8 encoded.
|
|
59
60
|
CSV_ARCHIVE : str
|
|
60
61
|
CSV archive format: multiple CSV files.
|
|
62
|
+
MULTI_FILE : str
|
|
63
|
+
Multi-file format, used for loading multiple files in a single input.
|
|
61
64
|
"""
|
|
62
65
|
|
|
63
66
|
JSON = "json"
|
|
@@ -68,6 +71,282 @@ class InputFormat(str, Enum):
|
|
|
68
71
|
"""CSV format, utf-8 encoded."""
|
|
69
72
|
CSV_ARCHIVE = "csv-archive"
|
|
70
73
|
"""CSV archive format: multiple CSV files."""
|
|
74
|
+
MULTI_FILE = "multi-file"
|
|
75
|
+
"""Multi-file format, used for loading multiple files in a single input."""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class DataFile:
|
|
80
|
+
"""
|
|
81
|
+
Represents data to be read from a file.
|
|
82
|
+
|
|
83
|
+
You can import the `DataFile` class directly from `nextmv`:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from nextmv import DataFile
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
This class is used to define data that will be read from a file in the
|
|
90
|
+
filesystem. It includes the name of the file, and the reader function that
|
|
91
|
+
will handle the loading, and deserialization of the data from the file.
|
|
92
|
+
This `DataFile` class is typically used in the `Input`, when the
|
|
93
|
+
`Input.input_format` is set to `InputFormat.MULTI_FILE`. Given that it is
|
|
94
|
+
difficul to handle every edge case of how data is deserialized, and read
|
|
95
|
+
from a file, this class exists so that the user can implement the `reader`
|
|
96
|
+
callable of their choice and provide it with any `reader_args` and
|
|
97
|
+
`reader_kwargs` they might need.
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
name : str
|
|
102
|
+
Name of the data (input) file. The file extension should be included in
|
|
103
|
+
the name.
|
|
104
|
+
reader : Callable[[str], Any]
|
|
105
|
+
Callable that reads the data from the file. This should be a function
|
|
106
|
+
implemented by the user. There are convenience functions that you can
|
|
107
|
+
use as a reader as well. The `reader` must receive, at the very minimum,
|
|
108
|
+
the following arguments:
|
|
109
|
+
|
|
110
|
+
- `file_path`: a `str` argument which is the location where this
|
|
111
|
+
data will be read from. This includes the dir and name of the
|
|
112
|
+
file. As such, the `name` parameter of this class is going to be
|
|
113
|
+
passed to the `reader` function, joined with the directory where the
|
|
114
|
+
file will be read from.
|
|
115
|
+
|
|
116
|
+
The `reader` can also receive additional arguments, and keyword
|
|
117
|
+
arguments. The `reader_args` and `reader_kwargs` parameters of this
|
|
118
|
+
class can be used to provide those additional arguments.
|
|
119
|
+
|
|
120
|
+
The `reader` function should return the data that will be used in the
|
|
121
|
+
model.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
name: str
|
|
125
|
+
"""
|
|
126
|
+
Name of the data (input) file. The file extension should be included in the
|
|
127
|
+
name.
|
|
128
|
+
"""
|
|
129
|
+
loader: Callable[[str], Any]
|
|
130
|
+
"""
|
|
131
|
+
Callable that reads (loads) the data from the file. This should be a function
|
|
132
|
+
implemented by the user. There are convenience functions that you can use
|
|
133
|
+
as a `loader` as well. The `loader` must receive, at the very minimum, the
|
|
134
|
+
following arguments:
|
|
135
|
+
|
|
136
|
+
- `file_path`: a `str` argument which is the location where this
|
|
137
|
+
data will be read from. This includes the dir and name of the
|
|
138
|
+
file. As such, the `name` parameter of this class is going to be
|
|
139
|
+
passed to the `loader` function, joined with the directory where the
|
|
140
|
+
file will be read from.
|
|
141
|
+
|
|
142
|
+
The `loader` can also receive additional arguments, and keyword arguments.
|
|
143
|
+
The `loader_args` and `loader_kwargs` parameters of this class can be used
|
|
144
|
+
to provide those additional arguments.
|
|
145
|
+
|
|
146
|
+
The `loader` function should return the data that will be used in the model.
|
|
147
|
+
"""
|
|
148
|
+
loader_kwargs: Optional[dict[str, Any]] = None
|
|
149
|
+
"""
|
|
150
|
+
Optional keyword arguments to pass to the loader function. This can be used
|
|
151
|
+
to customize the behavior of the loader.
|
|
152
|
+
"""
|
|
153
|
+
loader_args: Optional[list[Any]] = None
|
|
154
|
+
"""
|
|
155
|
+
Optional positional arguments to pass to the loader function. This can be
|
|
156
|
+
used to customize the behavior of the loader.
|
|
157
|
+
"""
|
|
158
|
+
input_data_key: Optional[str] = None
|
|
159
|
+
"""
|
|
160
|
+
Use this parameter to set a custom key to represent your file.
|
|
161
|
+
|
|
162
|
+
When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
|
|
163
|
+
the data from the file is loaded to the `.data` parameter of the `Input`.
|
|
164
|
+
In that case, the type of `.data` is `dict[str, Any]`, where each key
|
|
165
|
+
represents the file name (with extension) and the value is the data that is
|
|
166
|
+
actually loaded from the file using the `loader` function. You can set a
|
|
167
|
+
custom key to represent your file by using this attribute.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def json_data_file(
|
|
172
|
+
name: str,
|
|
173
|
+
json_configurations: Optional[dict[str, Any]] = None,
|
|
174
|
+
input_data_key: Optional[str] = None,
|
|
175
|
+
) -> DataFile:
|
|
176
|
+
"""
|
|
177
|
+
This is a convenience function to create a `DataFile` that reads JSON data.
|
|
178
|
+
|
|
179
|
+
You can import the `json_data_file` function directly from `nextmv`:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from nextmv import json_data_file
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
name : str
|
|
188
|
+
Name of the data file. You don't need to include the `.json` extension.
|
|
189
|
+
json_configurations : dict[str, Any], optional
|
|
190
|
+
JSON-specific configurations for reading the data.
|
|
191
|
+
input_data_key : str, optional
|
|
192
|
+
A custom key to represent the data from this file.
|
|
193
|
+
|
|
194
|
+
When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
|
|
195
|
+
the data from the file is loaded to the `.data` parameter of the `Input`.
|
|
196
|
+
In that case, the type of `.data` is `dict[str, Any]`, where each key
|
|
197
|
+
represents the file name (with extension) and the value is the data that is
|
|
198
|
+
actually loaded from the file using the `loader` function. You can set a
|
|
199
|
+
custom key to represent your file by using this attribute.
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
DataFile
|
|
204
|
+
A `DataFile` instance that reads JSON data from a file with the given
|
|
205
|
+
name.
|
|
206
|
+
|
|
207
|
+
Examples
|
|
208
|
+
--------
|
|
209
|
+
>>> from nextmv import json_data_file
|
|
210
|
+
>>> data_file = json_data_file("my_data")
|
|
211
|
+
>>> data = data_file.read()
|
|
212
|
+
>>> print(data)
|
|
213
|
+
{
|
|
214
|
+
"key": "value",
|
|
215
|
+
"another_key": [1, 2, 3]
|
|
216
|
+
}
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
if not name.endswith(".json"):
|
|
220
|
+
name += ".json"
|
|
221
|
+
|
|
222
|
+
json_configurations = json_configurations or {}
|
|
223
|
+
|
|
224
|
+
def loader(file_path: str) -> Union[dict[str, Any], Any]:
|
|
225
|
+
with open(file_path, encoding="utf-8") as f:
|
|
226
|
+
return json.load(f, **json_configurations)
|
|
227
|
+
|
|
228
|
+
return DataFile(
|
|
229
|
+
name=name,
|
|
230
|
+
loader=loader,
|
|
231
|
+
input_data_key=input_data_key,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def csv_data_file(
|
|
236
|
+
name: str,
|
|
237
|
+
csv_configurations: Optional[dict[str, Any]] = None,
|
|
238
|
+
input_data_key: Optional[str] = None,
|
|
239
|
+
) -> DataFile:
|
|
240
|
+
"""
|
|
241
|
+
This is a convenience function to create a `DataFile` that reads CSV data.
|
|
242
|
+
|
|
243
|
+
You can import the `csv_data_file` function directly from `nextmv`:
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
from nextmv import csv_data_file
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
Parameters
|
|
250
|
+
----------
|
|
251
|
+
name : str
|
|
252
|
+
Name of the data file. You don't need to include the `.csv` extension.
|
|
253
|
+
csv_configurations : dict[str, Any], optional
|
|
254
|
+
CSV-specific configurations for reading the data.
|
|
255
|
+
input_data_key : str, optional
|
|
256
|
+
A custom key to represent the data from this file.
|
|
257
|
+
|
|
258
|
+
When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
|
|
259
|
+
the data from the file is loaded to the `.data` parameter of the `Input`.
|
|
260
|
+
In that case, the type of `.data` is `dict[str, Any]`, where each key
|
|
261
|
+
represents the file name (with extension) and the value is the data that is
|
|
262
|
+
actually loaded from the file using the `loader` function. You can set a
|
|
263
|
+
custom key to represent your file by using this attribute.
|
|
264
|
+
|
|
265
|
+
Returns
|
|
266
|
+
-------
|
|
267
|
+
DataFile
|
|
268
|
+
A `DataFile` instance that reads CSV data from a file with the given
|
|
269
|
+
name.
|
|
270
|
+
|
|
271
|
+
Examples
|
|
272
|
+
--------
|
|
273
|
+
>>> from nextmv import csv_data_file
|
|
274
|
+
>>> data_file = csv_data_file("my_data")
|
|
275
|
+
>>> data = data_file.read()
|
|
276
|
+
>>> print(data)
|
|
277
|
+
[
|
|
278
|
+
{"column1": "value1", "column2": "value2"},
|
|
279
|
+
{"column1": "value3", "column2": "value4"}
|
|
280
|
+
]
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
if not name.endswith(".csv"):
|
|
284
|
+
name += ".csv"
|
|
285
|
+
|
|
286
|
+
csv_configurations = csv_configurations or {}
|
|
287
|
+
|
|
288
|
+
def loader(file_path: str) -> list[dict[str, Any]]:
|
|
289
|
+
with open(file_path, encoding="utf-8") as f:
|
|
290
|
+
return list(csv.DictReader(f, **csv_configurations))
|
|
291
|
+
|
|
292
|
+
return DataFile(
|
|
293
|
+
name=name,
|
|
294
|
+
loader=loader,
|
|
295
|
+
input_data_key=input_data_key,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def text_data_file(name: str, input_data_key: Optional[str] = None) -> DataFile:
|
|
300
|
+
"""
|
|
301
|
+
This is a convenience function to create a `DataFile` that reads utf-8
|
|
302
|
+
encoded text data.
|
|
303
|
+
|
|
304
|
+
You can import the `text_data_file` function directly from `nextmv`:
|
|
305
|
+
|
|
306
|
+
```python
|
|
307
|
+
from nextmv import text_data_file
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
You must provide the extension as part of the `name` parameter.
|
|
311
|
+
|
|
312
|
+
Parameters
|
|
313
|
+
----------
|
|
314
|
+
name : str
|
|
315
|
+
Name of the data file. The file extension must be provided in the name.
|
|
316
|
+
input_data_key : str, optional
|
|
317
|
+
A custom key to represent the data from this file.
|
|
318
|
+
|
|
319
|
+
When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
|
|
320
|
+
the data from the file is loaded to the `.data` parameter of the `Input`.
|
|
321
|
+
In that case, the type of `.data` is `dict[str, Any]`, where each key
|
|
322
|
+
represents the file name (with extension) and the value is the data that is
|
|
323
|
+
actually loaded from the file using the `loader` function. You can set a
|
|
324
|
+
custom key to represent your file by using this attribute.
|
|
325
|
+
|
|
326
|
+
Returns
|
|
327
|
+
-------
|
|
328
|
+
DataFile
|
|
329
|
+
A `DataFile` instance that reads text data from a file with the given
|
|
330
|
+
name.
|
|
331
|
+
|
|
332
|
+
Examples
|
|
333
|
+
--------
|
|
334
|
+
>>> from nextmv import text_data_file
|
|
335
|
+
>>> data_file = text_data_file("my_data")
|
|
336
|
+
>>> data = data_file.read()
|
|
337
|
+
>>> print(data)
|
|
338
|
+
This is some text data.
|
|
339
|
+
"""
|
|
340
|
+
|
|
341
|
+
def loader(file_path: str) -> str:
|
|
342
|
+
with open(file_path, encoding="utf-8") as f:
|
|
343
|
+
return f.read().rstrip("\n")
|
|
344
|
+
|
|
345
|
+
return DataFile(
|
|
346
|
+
name=name,
|
|
347
|
+
loader=loader,
|
|
348
|
+
input_data_key=input_data_key,
|
|
349
|
+
)
|
|
71
350
|
|
|
72
351
|
|
|
73
352
|
@dataclass
|
|
@@ -81,14 +360,42 @@ class Input:
|
|
|
81
360
|
from nextmv import Input
|
|
82
361
|
```
|
|
83
362
|
|
|
363
|
+
The `data`'s type must match the `input_format`:
|
|
364
|
+
|
|
365
|
+
- `InputFormat.JSON`: the data is `Union[dict[str, Any], Any]`. This just
|
|
366
|
+
means that the data must be JSON-deserializable, which includes dicts and
|
|
367
|
+
lists.
|
|
368
|
+
- `InputFormat.TEXT`: the data is `str`, and it must be utf-8 encoded.
|
|
369
|
+
- `InputFormat.CSV`: the data is `list[dict[str, Any]]`, where each dict
|
|
370
|
+
represents a row in the CSV.
|
|
371
|
+
- `InputFormat.CSV_ARCHIVE`: the data is `dict[str, list[dict[str, Any]]]`,
|
|
372
|
+
where each key is the name of a CSV file and the value is a list of dicts
|
|
373
|
+
representing the rows in that CSV file.
|
|
374
|
+
- `InputFormat.MULTI_FILE`: the data is `dict[str, Any]`, where for each
|
|
375
|
+
item, the key is the file name (with the extension) and the actual data
|
|
376
|
+
from the file is the value. When working with multi-file, data is loaded
|
|
377
|
+
from one or more files in a specific directory. Given that each file can
|
|
378
|
+
be of different types (JSON, CSV, Excel, etc...), the data captured from
|
|
379
|
+
each might vary. To reflect this, the data is loaded as a dict of items.
|
|
380
|
+
You can have a custom key for the data, that is not the file name, if
|
|
381
|
+
you use the `input_data_key` parameter of the `DataFile` class.
|
|
382
|
+
|
|
84
383
|
Parameters
|
|
85
384
|
----------
|
|
86
|
-
data : Union[dict[str, Any],
|
|
385
|
+
data : Union[Union[dict[str, Any], Any], str, list[dict[str, Any]],
|
|
386
|
+
dict[str, list[dict[str, Any]]], dict[str, Any]]
|
|
87
387
|
The actual data.
|
|
88
388
|
input_format : InputFormat, optional
|
|
89
389
|
Format of the input data. Default is `InputFormat.JSON`.
|
|
90
390
|
options : Options, optional
|
|
91
391
|
Options that the input was created with.
|
|
392
|
+
|
|
393
|
+
Raises
|
|
394
|
+
------
|
|
395
|
+
ValueError
|
|
396
|
+
If the data type doesn't match the expected type for the given format.
|
|
397
|
+
ValueError
|
|
398
|
+
If the `input_format` is not one of the supported formats.
|
|
92
399
|
"""
|
|
93
400
|
|
|
94
401
|
data: Union[
|
|
@@ -96,6 +403,7 @@ class Input:
|
|
|
96
403
|
str, # TEXT
|
|
97
404
|
list[dict[str, Any]], # CSV
|
|
98
405
|
dict[str, list[dict[str, Any]]], # CSV_ARCHIVE
|
|
406
|
+
dict[str, Any], # MULTI_FILE
|
|
99
407
|
]
|
|
100
408
|
"""
|
|
101
409
|
The actual data.
|
|
@@ -106,6 +414,7 @@ class Input:
|
|
|
106
414
|
- For `TEXT`: `str`
|
|
107
415
|
- For `CSV`: `list[dict[str, Any]]`
|
|
108
416
|
- For `CSV_ARCHIVE`: `dict[str, list[dict[str, Any]]]`
|
|
417
|
+
- For `MULTI_FILE`: `dict[str, Any]`
|
|
109
418
|
"""
|
|
110
419
|
|
|
111
420
|
input_format: Optional[InputFormat] = InputFormat.JSON
|
|
@@ -165,6 +474,12 @@ class Input:
|
|
|
165
474
|
"input_format InputFormat.CSV_ARCHIVE, supported type is `dict`"
|
|
166
475
|
)
|
|
167
476
|
|
|
477
|
+
elif self.input_format == InputFormat.MULTI_FILE and not isinstance(self.data, dict):
|
|
478
|
+
raise ValueError(
|
|
479
|
+
f"unsupported Input.data type: {type(self.data)} with "
|
|
480
|
+
"input_format InputFormat.MULTI_FILE, supported type is `dict`"
|
|
481
|
+
)
|
|
482
|
+
|
|
168
483
|
# Capture a snapshot of the options that were used to create the class
|
|
169
484
|
# so even if they are changed later, we have a record of the original.
|
|
170
485
|
init_options = self.options
|
|
@@ -175,8 +490,10 @@ class Input:
|
|
|
175
490
|
"""
|
|
176
491
|
Convert the input to a dictionary.
|
|
177
492
|
|
|
178
|
-
This method serializes the Input object to a dictionary format that can
|
|
179
|
-
easily converted to JSON or other serialization formats.
|
|
493
|
+
This method serializes the Input object to a dictionary format that can
|
|
494
|
+
be easily converted to JSON or other serialization formats. When the
|
|
495
|
+
`input_type` is set to `InputFormat.MULTI_FILE`, it will not include
|
|
496
|
+
the `data` field, as it is uncertain how data is deserialized from the file.
|
|
180
497
|
|
|
181
498
|
Returns
|
|
182
499
|
-------
|
|
@@ -201,12 +518,18 @@ class Input:
|
|
|
201
518
|
{'data': {'key': 'value'}, 'input_format': 'json', 'options': None}
|
|
202
519
|
"""
|
|
203
520
|
|
|
204
|
-
|
|
205
|
-
"data": self.data,
|
|
521
|
+
input_dict = {
|
|
206
522
|
"input_format": self.input_format.value,
|
|
207
523
|
"options": self.options.to_dict() if self.options is not None else None,
|
|
208
524
|
}
|
|
209
525
|
|
|
526
|
+
if self.input_format == InputFormat.MULTI_FILE:
|
|
527
|
+
return input_dict
|
|
528
|
+
|
|
529
|
+
input_dict["data"] = self.data
|
|
530
|
+
|
|
531
|
+
return input_dict
|
|
532
|
+
|
|
210
533
|
|
|
211
534
|
class InputLoader:
|
|
212
535
|
"""
|
|
@@ -375,6 +698,7 @@ class LocalInputLoader(InputLoader):
|
|
|
375
698
|
options: Optional[Options] = None,
|
|
376
699
|
path: Optional[str] = None,
|
|
377
700
|
csv_configurations: Optional[dict[str, Any]] = None,
|
|
701
|
+
data_files: Optional[list[DataFile]] = None,
|
|
378
702
|
) -> Input:
|
|
379
703
|
"""
|
|
380
704
|
Load the input data. The input data can be in various formats. For
|
|
@@ -395,6 +719,10 @@ class LocalInputLoader(InputLoader):
|
|
|
395
719
|
- `InputFormat.CSV`: the data is a `list[dict[str, Any]]`.
|
|
396
720
|
- `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str, Any]]]`.
|
|
397
721
|
Each key is the name of the CSV file, minus the `.csv` extension.
|
|
722
|
+
- `InputFormat.MULTI_FILE`: the data is a `dict[str, Any]`, where each
|
|
723
|
+
key is the file name (with extension) and the value is the data read
|
|
724
|
+
from the file. The data can be of any type, depending on the file
|
|
725
|
+
type and the reader function provided in the `DataFile` instances.
|
|
398
726
|
|
|
399
727
|
Parameters
|
|
400
728
|
----------
|
|
@@ -408,6 +736,16 @@ class LocalInputLoader(InputLoader):
|
|
|
408
736
|
Configurations for loading CSV files. The default `DictReader` is
|
|
409
737
|
used when loading a CSV file, so you have the option to pass in a
|
|
410
738
|
dictionary with custom kwargs for the `DictReader`.
|
|
739
|
+
data_files : list[DataFile], optional
|
|
740
|
+
List of `DataFile` instances to read from. This is used when the
|
|
741
|
+
`input_format` is set to `InputFormat.MULTI_FILE`. Each `DataFile`
|
|
742
|
+
instance should have a `name` (the file name with extension) and a
|
|
743
|
+
`loader` function that reads the data from the file. The `loader`
|
|
744
|
+
function should accept the file path as its first argument and return
|
|
745
|
+
the data read from the file. The `loader` can also accept additional
|
|
746
|
+
positional and keyword arguments, which can be provided through the
|
|
747
|
+
`loader_args` and `loader_kwargs` attributes of the `DataFile`
|
|
748
|
+
instance.
|
|
411
749
|
|
|
412
750
|
Returns
|
|
413
751
|
-------
|
|
@@ -428,6 +766,14 @@ class LocalInputLoader(InputLoader):
|
|
|
428
766
|
data = self._load_utf8_encoded(path=path, input_format=input_format, csv_configurations=csv_configurations)
|
|
429
767
|
elif input_format == InputFormat.CSV_ARCHIVE:
|
|
430
768
|
data = self._load_archive(path=path, csv_configurations=csv_configurations)
|
|
769
|
+
elif input_format == InputFormat.MULTI_FILE:
|
|
770
|
+
if data_files is None:
|
|
771
|
+
raise ValueError("data_files must be provided when input_format is InputFormat.MULTI_FILE")
|
|
772
|
+
|
|
773
|
+
if not isinstance(data_files, list):
|
|
774
|
+
raise ValueError("data_files must be a list of DataFile instances")
|
|
775
|
+
|
|
776
|
+
data = self._load_multi_file(data_files=data_files, path=path)
|
|
431
777
|
|
|
432
778
|
return Input(data=data, input_format=input_format, options=options)
|
|
433
779
|
|
|
@@ -528,6 +874,81 @@ class LocalInputLoader(InputLoader):
|
|
|
528
874
|
|
|
529
875
|
return data
|
|
530
876
|
|
|
877
|
+
def _load_multi_file(
|
|
878
|
+
self,
|
|
879
|
+
data_files: list[DataFile],
|
|
880
|
+
path: Optional[str] = None,
|
|
881
|
+
) -> dict[str, Any]:
|
|
882
|
+
"""
|
|
883
|
+
Load multiple files from a directory.
|
|
884
|
+
|
|
885
|
+
This internal method loads all supported files from a specified
|
|
886
|
+
directory, organizing them into a dictionary where each key is the
|
|
887
|
+
filename and each value is the parsed file content. Supports CSV files
|
|
888
|
+
(parsed as list of dictionaries), JSON files (parsed as JSON objects),
|
|
889
|
+
and any other utf-8 encoded text files (loaded as plain text strings).
|
|
890
|
+
It also supports Excel files, loading them as DataFrames.
|
|
891
|
+
|
|
892
|
+
Parameters
|
|
893
|
+
----------
|
|
894
|
+
data_files : list[DataFile]
|
|
895
|
+
List of `DataFile` instances to read from.
|
|
896
|
+
path : str, optional
|
|
897
|
+
Path to the directory containing files. If None or empty,
|
|
898
|
+
uses "./inputs" as the default directory.
|
|
899
|
+
|
|
900
|
+
Returns
|
|
901
|
+
-------
|
|
902
|
+
dict[str, Any]
|
|
903
|
+
Dictionary mapping filenames to file contents. CSV files are loaded
|
|
904
|
+
as lists of dictionaries, JSON files as parsed JSON objects, and
|
|
905
|
+
other utf-8 text files as strings. Excel files are loaded as
|
|
906
|
+
DataFrames.
|
|
907
|
+
|
|
908
|
+
Raises
|
|
909
|
+
------
|
|
910
|
+
ValueError
|
|
911
|
+
If the path is not a directory or the default directory doesn't exist.
|
|
912
|
+
"""
|
|
913
|
+
|
|
914
|
+
dir_path = "inputs"
|
|
915
|
+
if path is not None and path != "":
|
|
916
|
+
if not os.path.isdir(path):
|
|
917
|
+
raise ValueError(f"path {path} is not a directory")
|
|
918
|
+
|
|
919
|
+
dir_path = path
|
|
920
|
+
|
|
921
|
+
if not os.path.isdir(dir_path):
|
|
922
|
+
raise ValueError(f'expected input directoy "{dir_path}" to exist as a default location')
|
|
923
|
+
|
|
924
|
+
data = {}
|
|
925
|
+
|
|
926
|
+
for data_file in data_files:
|
|
927
|
+
name = data_file.name
|
|
928
|
+
file_path = os.path.join(dir_path, name)
|
|
929
|
+
|
|
930
|
+
if data_file.loader_args is None:
|
|
931
|
+
data_file.loader_args = []
|
|
932
|
+
if data_file.loader_kwargs is None:
|
|
933
|
+
data_file.loader_kwargs = {}
|
|
934
|
+
|
|
935
|
+
d = data_file.loader(
|
|
936
|
+
file_path,
|
|
937
|
+
*data_file.loader_args,
|
|
938
|
+
**data_file.loader_kwargs,
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
key = name
|
|
942
|
+
if data_file.input_data_key is not None:
|
|
943
|
+
key = data_file.input_data_key
|
|
944
|
+
|
|
945
|
+
if data.get(key) is not None:
|
|
946
|
+
raise ValueError(f"Duplicate input data key found: {key}")
|
|
947
|
+
|
|
948
|
+
data[key] = d
|
|
949
|
+
|
|
950
|
+
return data
|
|
951
|
+
|
|
531
952
|
|
|
532
953
|
def load_local(
|
|
533
954
|
input_format: Optional[InputFormat] = InputFormat.JSON,
|
|
@@ -590,6 +1011,7 @@ def load(
|
|
|
590
1011
|
path: Optional[str] = None,
|
|
591
1012
|
csv_configurations: Optional[dict[str, Any]] = None,
|
|
592
1013
|
loader: Optional[InputLoader] = _LOCAL_INPUT_LOADER,
|
|
1014
|
+
data_files: Optional[list[DataFile]] = None,
|
|
593
1015
|
) -> Input:
|
|
594
1016
|
"""
|
|
595
1017
|
Load input data using the specified loader.
|
|
@@ -611,6 +1033,36 @@ def load(
|
|
|
611
1033
|
- `InputFormat.CSV`: the data is a `list[dict[str, Any]]`
|
|
612
1034
|
- `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str, Any]]]`
|
|
613
1035
|
Each key is the name of the CSV file, minus the `.csv` extension.
|
|
1036
|
+
- `InputFormat.MULTI_FILE`: the data is a `dict[str, Any]`
|
|
1037
|
+
where each key is the file name (with extension) and the value is the
|
|
1038
|
+
data read from the file. This is used for loading multiple files in a
|
|
1039
|
+
single input, where each file can be of different types (JSON, CSV,
|
|
1040
|
+
Excel, etc.). The data is loaded as a dict of items, where each item
|
|
1041
|
+
corresponds to a file and its content.
|
|
1042
|
+
|
|
1043
|
+
When specifying `input_format` as `InputFormat.MULTI_FILE`, the
|
|
1044
|
+
`data_files` argument must be provided. This argument is a list of
|
|
1045
|
+
`DataFile` instances, each representing a file to be read. Each `DataFile`
|
|
1046
|
+
instance should have a `name` (the file name with extension) and a `loader`
|
|
1047
|
+
function that reads the data from the file. The `loader` function should
|
|
1048
|
+
accept the file path as its first argument and return the data read from
|
|
1049
|
+
the file. The `loader` can also accept additional positional and keyword
|
|
1050
|
+
arguments, which can be provided through the `loader_args` and
|
|
1051
|
+
`loader_kwargs` attributes of the `DataFile` instance.
|
|
1052
|
+
|
|
1053
|
+
There are convenience functions that can be used to create `DataFile`
|
|
1054
|
+
classes, such as:
|
|
1055
|
+
|
|
1056
|
+
- `json_data_file`: Creates a `DataFile` that reads JSON data.
|
|
1057
|
+
- `csv_data_file`: Creates a `DataFile` that reads CSV data.
|
|
1058
|
+
- `text_data_file`: Creates a `DataFile` that reads utf-8 encoded text
|
|
1059
|
+
data.
|
|
1060
|
+
|
|
1061
|
+
When workiing with data in other formats, such as Excel files, you are
|
|
1062
|
+
encouraged to create your own `DataFile` objects with your own
|
|
1063
|
+
implementation of the `loader` function. This allows you to read data
|
|
1064
|
+
from files in a way that suits your needs, while still adhering to the
|
|
1065
|
+
`DataFile` interface.
|
|
614
1066
|
|
|
615
1067
|
Parameters
|
|
616
1068
|
----------
|
|
@@ -629,6 +1081,24 @@ def load(
|
|
|
629
1081
|
loader : InputLoader, optional
|
|
630
1082
|
The loader to use for loading the input data.
|
|
631
1083
|
Default is an instance of `LocalInputLoader`.
|
|
1084
|
+
data_files : list[DataFile], optional
|
|
1085
|
+
List of `DataFile` instances to read from. This is used when the
|
|
1086
|
+
`input_format` is set to `InputFormat.MULTI_FILE`. Each `DataFile`
|
|
1087
|
+
instance should have a `name` (the file name with extension) and a
|
|
1088
|
+
`loader` function that reads the data from the file. The `loader`
|
|
1089
|
+
function should accept the file path as its first argument and return
|
|
1090
|
+
the data read from the file. The `loader` can also accept additional
|
|
1091
|
+
positional and keyword arguments, which can be provided through the
|
|
1092
|
+
`loader_args` and `loader_kwargs` attributes of the `DataFile`
|
|
1093
|
+
instance.
|
|
1094
|
+
|
|
1095
|
+
There are convenience functions that can be used to create `DataFile`
|
|
1096
|
+
classes, such as `json_data_file`, `csv_data_file`, and
|
|
1097
|
+
`text_data_file`. When working with data in other formats, such as
|
|
1098
|
+
Excel files, you are encouraged to create your own `DataFile` objects
|
|
1099
|
+
with your own implementation of the `loader` function. This allows you
|
|
1100
|
+
to read data from files in a way that suits your needs, while still
|
|
1101
|
+
adhering to the `DataFile` interface.
|
|
632
1102
|
|
|
633
1103
|
Returns
|
|
634
1104
|
-------
|
|
@@ -651,4 +1121,4 @@ def load(
|
|
|
651
1121
|
>>> input_obj = load(input_format=InputFormat.CSV_ARCHIVE, path="input_dir")
|
|
652
1122
|
"""
|
|
653
1123
|
|
|
654
|
-
return loader.load(input_format, options, path, csv_configurations)
|
|
1124
|
+
return loader.load(input_format, options, path, csv_configurations, data_files)
|
nextmv/model.py
CHANGED
|
@@ -24,7 +24,7 @@ from typing import Any, Optional
|
|
|
24
24
|
|
|
25
25
|
from nextmv.input import Input
|
|
26
26
|
from nextmv.logger import log
|
|
27
|
-
from nextmv.options import Options
|
|
27
|
+
from nextmv.options import Options, OptionsEnforcement
|
|
28
28
|
from nextmv.output import Output
|
|
29
29
|
|
|
30
30
|
# The following block of code is used to suppress warnings from mlflow. We
|
|
@@ -132,6 +132,9 @@ class ModelConfiguration:
|
|
|
132
132
|
formatted as they would appear in a requirements.txt file.
|
|
133
133
|
options : Options, optional
|
|
134
134
|
Options that the decision model requires.
|
|
135
|
+
options_enforcement:
|
|
136
|
+
Enforcement of options for the model. This controls how options
|
|
137
|
+
are handled when the model is run.
|
|
135
138
|
|
|
136
139
|
Examples
|
|
137
140
|
--------
|
|
@@ -139,17 +142,23 @@ class ModelConfiguration:
|
|
|
139
142
|
>>> config = ModelConfiguration(
|
|
140
143
|
... name="my_routing_model",
|
|
141
144
|
... requirements=["nextroute>=1.0.0"],
|
|
142
|
-
... options=Options({"max_time": 60})
|
|
145
|
+
... options=Options({"max_time": 60}),
|
|
146
|
+
... options_enforcement=OptionsEnforcement(
|
|
147
|
+
strict=True,
|
|
148
|
+
validation_enforce=True
|
|
149
|
+
)
|
|
143
150
|
... )
|
|
144
151
|
"""
|
|
145
152
|
|
|
146
153
|
name: str
|
|
147
154
|
"""The name of the decision model."""
|
|
148
|
-
|
|
149
155
|
requirements: Optional[list[str]] = None
|
|
150
156
|
"""A list of Python dependencies that the decision model requires."""
|
|
151
157
|
options: Optional[Options] = None
|
|
152
158
|
"""Options that the decision model requires."""
|
|
159
|
+
options_enforcement: Optional[OptionsEnforcement] = None
|
|
160
|
+
"""Enforcement of options for the model."""
|
|
161
|
+
|
|
153
162
|
|
|
154
163
|
|
|
155
164
|
class Model:
|