nextmv 0.10.3.dev0__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. nextmv/__about__.py +1 -1
  2. nextmv/__entrypoint__.py +39 -0
  3. nextmv/__init__.py +57 -0
  4. nextmv/_serialization.py +96 -0
  5. nextmv/base_model.py +79 -9
  6. nextmv/cloud/__init__.py +71 -10
  7. nextmv/cloud/acceptance_test.py +888 -17
  8. nextmv/cloud/account.py +154 -10
  9. nextmv/cloud/application.py +3644 -437
  10. nextmv/cloud/batch_experiment.py +292 -33
  11. nextmv/cloud/client.py +354 -53
  12. nextmv/cloud/ensemble.py +247 -0
  13. nextmv/cloud/input_set.py +121 -4
  14. nextmv/cloud/instance.py +125 -0
  15. nextmv/cloud/package.py +474 -0
  16. nextmv/cloud/scenario.py +410 -0
  17. nextmv/cloud/secrets.py +234 -0
  18. nextmv/cloud/url.py +73 -0
  19. nextmv/cloud/version.py +174 -0
  20. nextmv/default_app/.gitignore +1 -0
  21. nextmv/default_app/README.md +32 -0
  22. nextmv/default_app/app.yaml +12 -0
  23. nextmv/default_app/input.json +5 -0
  24. nextmv/default_app/main.py +37 -0
  25. nextmv/default_app/requirements.txt +2 -0
  26. nextmv/default_app/src/__init__.py +0 -0
  27. nextmv/default_app/src/main.py +37 -0
  28. nextmv/default_app/src/visuals.py +36 -0
  29. nextmv/deprecated.py +47 -0
  30. nextmv/input.py +883 -78
  31. nextmv/local/__init__.py +5 -0
  32. nextmv/local/application.py +1263 -0
  33. nextmv/local/executor.py +1040 -0
  34. nextmv/local/geojson_handler.py +323 -0
  35. nextmv/local/local.py +97 -0
  36. nextmv/local/plotly_handler.py +61 -0
  37. nextmv/local/runner.py +274 -0
  38. nextmv/logger.py +80 -9
  39. nextmv/manifest.py +1472 -0
  40. nextmv/model.py +431 -0
  41. nextmv/options.py +968 -78
  42. nextmv/output.py +1363 -231
  43. nextmv/polling.py +287 -0
  44. nextmv/run.py +1623 -0
  45. nextmv/safe.py +145 -0
  46. nextmv/status.py +122 -0
  47. {nextmv-0.10.3.dev0.dist-info → nextmv-0.35.0.dist-info}/METADATA +51 -288
  48. nextmv-0.35.0.dist-info/RECORD +50 -0
  49. {nextmv-0.10.3.dev0.dist-info → nextmv-0.35.0.dist-info}/WHEEL +1 -1
  50. nextmv/cloud/status.py +0 -29
  51. nextmv/nextroute/__init__.py +0 -2
  52. nextmv/nextroute/check/__init__.py +0 -26
  53. nextmv/nextroute/check/schema.py +0 -141
  54. nextmv/nextroute/schema/__init__.py +0 -19
  55. nextmv/nextroute/schema/input.py +0 -52
  56. nextmv/nextroute/schema/location.py +0 -13
  57. nextmv/nextroute/schema/output.py +0 -136
  58. nextmv/nextroute/schema/stop.py +0 -61
  59. nextmv/nextroute/schema/vehicle.py +0 -68
  60. nextmv-0.10.3.dev0.dist-info/RECORD +0 -28
  61. {nextmv-0.10.3.dev0.dist-info → nextmv-0.35.0.dist-info}/licenses/LICENSE +0 -0
nextmv/input.py CHANGED
@@ -1,28 +1,362 @@
1
- """Module for handling input sources and data."""
1
+ """
2
+ Module for handling input sources and data.
3
+
4
+ This module provides classes and functions for loading and handling input data
5
+ in various formats for decision problems. It supports JSON, plain text, CSV,
6
+ and CSV archive formats and can load data from standard input or files.
7
+
8
+ Classes
9
+ -------
10
+ InputFormat
11
+ Enum defining supported input data formats (JSON, TEXT, CSV, CSV_ARCHIVE).
12
+ Input
13
+ Container for input data with format specification and options.
14
+ InputLoader
15
+ Base class for loading inputs from various sources.
16
+ LocalInputLoader
17
+ Class for loading inputs from local files or stdin.
18
+
19
+ Functions
20
+ ---------
21
+ load
22
+ Load input data using a specified loader.
23
+
24
+ Attributes
25
+ ----------
26
+ INPUTS_KEY : str
27
+ Key used for identifying inputs in the run.
28
+ """
2
29
 
3
30
  import copy
4
31
  import csv
5
32
  import json
6
33
  import os
7
34
  import sys
35
+ from collections.abc import Callable
8
36
  from dataclasses import dataclass
9
37
  from enum import Enum
10
- from typing import Any, Dict, List, Optional, Union
38
+ from typing import Any
11
39
 
40
+ from nextmv._serialization import serialize_json
41
+ from nextmv.deprecated import deprecated
12
42
  from nextmv.options import Options
13
43
 
44
+ INPUTS_KEY = "inputs"
45
+ """
46
+ Inputs key constant used for identifying inputs in the run.
47
+ """
48
+
14
49
 
15
50
  class InputFormat(str, Enum):
16
- """Format of an `Input`."""
51
+ """
52
+ Format of an `Input`.
53
+
54
+ You can import the `InputFormat` class directly from `nextmv`:
55
+
56
+ ```python
57
+ from nextmv import InputFormat
58
+ ```
59
+
60
+ This enum specifies the supported formats for input data.
17
61
 
18
- JSON = "JSON"
62
+ Attributes
63
+ ----------
64
+ JSON : str
65
+ JSON format, utf-8 encoded.
66
+ TEXT : str
67
+ Text format, utf-8 encoded.
68
+ CSV : str
69
+ CSV format, utf-8 encoded.
70
+ CSV_ARCHIVE : str
71
+ CSV archive format: multiple CSV files.
72
+ MULTI_FILE : str
73
+ Multi-file format, used for loading multiple files in a single input.
74
+ """
75
+
76
+ JSON = "json"
19
77
  """JSON format, utf-8 encoded."""
20
- TEXT = "TEXT"
78
+ TEXT = "text"
21
79
  """Text format, utf-8 encoded."""
22
- CSV = "CSV"
80
+ CSV = "csv"
23
81
  """CSV format, utf-8 encoded."""
24
- CSV_ARCHIVE = "CSV_ARCHIVE"
82
+ CSV_ARCHIVE = "csv-archive"
25
83
  """CSV archive format: multiple CSV files."""
84
+ MULTI_FILE = "multi-file"
85
+ """Multi-file format, used for loading multiple files in a single input."""
86
+
87
+
88
+ @dataclass
89
+ class DataFile:
90
+ """
91
+ Represents data to be read from a file.
92
+
93
+ You can import the `DataFile` class directly from `nextmv`:
94
+
95
+ ```python
96
+ from nextmv import DataFile
97
+ ```
98
+
99
+ This class is used to define data that will be read from a file in the
100
+ filesystem. It includes the name of the file, and the reader function that
101
+ will handle the loading, and deserialization of the data from the file.
102
+ This `DataFile` class is typically used in the `Input`, when the
103
+ `Input.input_format` is set to `InputFormat.MULTI_FILE`. Given that it is
104
+ difficul to handle every edge case of how data is deserialized, and read
105
+ from a file, this class exists so that the user can implement the `reader`
106
+ callable of their choice and provide it with any `reader_args` and
107
+ `reader_kwargs` they might need.
108
+
109
+ Parameters
110
+ ----------
111
+ name : str
112
+ Name of the data (input) file. The file extension should be included in
113
+ the name.
114
+ reader : Callable[[str], Any]
115
+ Callable that reads the data from the file. This should be a function
116
+ implemented by the user. There are convenience functions that you can
117
+ use as a reader as well. The `reader` must receive, at the very minimum,
118
+ the following arguments:
119
+
120
+ - `file_path`: a `str` argument which is the location where this
121
+ data will be read from. This includes the dir and name of the
122
+ file. As such, the `name` parameter of this class is going to be
123
+ passed to the `reader` function, joined with the directory where the
124
+ file will be read from.
125
+
126
+ The `reader` can also receive additional arguments, and keyword
127
+ arguments. The `reader_args` and `reader_kwargs` parameters of this
128
+ class can be used to provide those additional arguments.
129
+
130
+ The `reader` function should return the data that will be used in the
131
+ model.
132
+ """
133
+
134
+ name: str
135
+ """
136
+ Name of the data (input) file. The file extension should be included in the
137
+ name.
138
+ """
139
+ loader: Callable[[str], Any]
140
+ """
141
+ Callable that reads (loads) the data from the file. This should be a function
142
+ implemented by the user. There are convenience functions that you can use
143
+ as a `loader` as well. The `loader` must receive, at the very minimum, the
144
+ following arguments:
145
+
146
+ - `file_path`: a `str` argument which is the location where this
147
+ data will be read from. This includes the dir and name of the
148
+ file. As such, the `name` parameter of this class is going to be
149
+ passed to the `loader` function, joined with the directory where the
150
+ file will be read from.
151
+
152
+ The `loader` can also receive additional arguments, and keyword arguments.
153
+ The `loader_args` and `loader_kwargs` parameters of this class can be used
154
+ to provide those additional arguments.
155
+
156
+ The `loader` function should return the data that will be used in the model.
157
+ """
158
+ loader_kwargs: dict[str, Any] | None = None
159
+ """
160
+ Optional keyword arguments to pass to the loader function. This can be used
161
+ to customize the behavior of the loader.
162
+ """
163
+ loader_args: list[Any] | None = None
164
+ """
165
+ Optional positional arguments to pass to the loader function. This can be
166
+ used to customize the behavior of the loader.
167
+ """
168
+ input_data_key: str | None = None
169
+ """
170
+ Use this parameter to set a custom key to represent your file.
171
+
172
+ When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
173
+ the data from the file is loaded to the `.data` parameter of the `Input`.
174
+ In that case, the type of `.data` is `dict[str, Any]`, where each key
175
+ represents the file name (with extension) and the value is the data that is
176
+ actually loaded from the file using the `loader` function. You can set a
177
+ custom key to represent your file by using this attribute.
178
+ """
179
+
180
+
181
+ def json_data_file(
182
+ name: str,
183
+ json_configurations: dict[str, Any] | None = None,
184
+ input_data_key: str | None = None,
185
+ ) -> DataFile:
186
+ """
187
+ This is a convenience function to create a `DataFile` that reads JSON data.
188
+
189
+ You can import the `json_data_file` function directly from `nextmv`:
190
+
191
+ ```python
192
+ from nextmv import json_data_file
193
+ ```
194
+
195
+ Parameters
196
+ ----------
197
+ name : str
198
+ Name of the data file. You don't need to include the `.json` extension.
199
+ json_configurations : dict[str, Any], optional
200
+ JSON-specific configurations for reading the data.
201
+ input_data_key : str, optional
202
+ A custom key to represent the data from this file.
203
+
204
+ When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
205
+ the data from the file is loaded to the `.data` parameter of the `Input`.
206
+ In that case, the type of `.data` is `dict[str, Any]`, where each key
207
+ represents the file name (with extension) and the value is the data that is
208
+ actually loaded from the file using the `loader` function. You can set a
209
+ custom key to represent your file by using this attribute.
210
+
211
+ Returns
212
+ -------
213
+ DataFile
214
+ A `DataFile` instance that reads JSON data from a file with the given
215
+ name.
216
+
217
+ Examples
218
+ --------
219
+ >>> from nextmv import json_data_file
220
+ >>> data_file = json_data_file("my_data")
221
+ >>> data = data_file.read()
222
+ >>> print(data)
223
+ {
224
+ "key": "value",
225
+ "another_key": [1, 2, 3]
226
+ }
227
+ """
228
+
229
+ if not name.endswith(".json"):
230
+ name += ".json"
231
+
232
+ json_configurations = json_configurations or {}
233
+
234
+ def loader(file_path: str) -> dict[str, Any] | Any:
235
+ with open(file_path, encoding="utf-8") as f:
236
+ return json.load(f, **json_configurations)
237
+
238
+ return DataFile(
239
+ name=name,
240
+ loader=loader,
241
+ input_data_key=input_data_key,
242
+ )
243
+
244
+
245
+ def csv_data_file(
246
+ name: str,
247
+ csv_configurations: dict[str, Any] | None = None,
248
+ input_data_key: str | None = None,
249
+ ) -> DataFile:
250
+ """
251
+ This is a convenience function to create a `DataFile` that reads CSV data.
252
+
253
+ You can import the `csv_data_file` function directly from `nextmv`:
254
+
255
+ ```python
256
+ from nextmv import csv_data_file
257
+ ```
258
+
259
+ Parameters
260
+ ----------
261
+ name : str
262
+ Name of the data file. You don't need to include the `.csv` extension.
263
+ csv_configurations : dict[str, Any], optional
264
+ CSV-specific configurations for reading the data.
265
+ input_data_key : str, optional
266
+ A custom key to represent the data from this file.
267
+
268
+ When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
269
+ the data from the file is loaded to the `.data` parameter of the `Input`.
270
+ In that case, the type of `.data` is `dict[str, Any]`, where each key
271
+ represents the file name (with extension) and the value is the data that is
272
+ actually loaded from the file using the `loader` function. You can set a
273
+ custom key to represent your file by using this attribute.
274
+
275
+ Returns
276
+ -------
277
+ DataFile
278
+ A `DataFile` instance that reads CSV data from a file with the given
279
+ name.
280
+
281
+ Examples
282
+ --------
283
+ >>> from nextmv import csv_data_file
284
+ >>> data_file = csv_data_file("my_data")
285
+ >>> data = data_file.read()
286
+ >>> print(data)
287
+ [
288
+ {"column1": "value1", "column2": "value2"},
289
+ {"column1": "value3", "column2": "value4"}
290
+ ]
291
+ """
292
+
293
+ if not name.endswith(".csv"):
294
+ name += ".csv"
295
+
296
+ csv_configurations = csv_configurations or {}
297
+
298
+ def loader(file_path: str) -> list[dict[str, Any]]:
299
+ with open(file_path, encoding="utf-8") as f:
300
+ return list(csv.DictReader(f, **csv_configurations))
301
+
302
+ return DataFile(
303
+ name=name,
304
+ loader=loader,
305
+ input_data_key=input_data_key,
306
+ )
307
+
308
+
309
+ def text_data_file(name: str, input_data_key: str | None = None) -> DataFile:
310
+ """
311
+ This is a convenience function to create a `DataFile` that reads utf-8
312
+ encoded text data.
313
+
314
+ You can import the `text_data_file` function directly from `nextmv`:
315
+
316
+ ```python
317
+ from nextmv import text_data_file
318
+ ```
319
+
320
+ You must provide the extension as part of the `name` parameter.
321
+
322
+ Parameters
323
+ ----------
324
+ name : str
325
+ Name of the data file. The file extension must be provided in the name.
326
+ input_data_key : str, optional
327
+ A custom key to represent the data from this file.
328
+
329
+ When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
330
+ the data from the file is loaded to the `.data` parameter of the `Input`.
331
+ In that case, the type of `.data` is `dict[str, Any]`, where each key
332
+ represents the file name (with extension) and the value is the data that is
333
+ actually loaded from the file using the `loader` function. You can set a
334
+ custom key to represent your file by using this attribute.
335
+
336
+ Returns
337
+ -------
338
+ DataFile
339
+ A `DataFile` instance that reads text data from a file with the given
340
+ name.
341
+
342
+ Examples
343
+ --------
344
+ >>> from nextmv import text_data_file
345
+ >>> data_file = text_data_file("my_data")
346
+ >>> data = data_file.read()
347
+ >>> print(data)
348
+ This is some text data.
349
+ """
350
+
351
+ def loader(file_path: str) -> str:
352
+ with open(file_path, encoding="utf-8") as f:
353
+ return f.read().rstrip("\n")
354
+
355
+ return DataFile(
356
+ name=name,
357
+ loader=loader,
358
+ input_data_key=input_data_key,
359
+ )
26
360
 
27
361
 
28
362
  @dataclass
@@ -30,37 +364,96 @@ class Input:
30
364
  """
31
365
  Input for a decision problem.
32
366
 
367
+ You can import the `Input` class directly from `nextmv`:
368
+
369
+ ```python
370
+ from nextmv import Input
371
+ ```
372
+
373
+ The `data`'s type must match the `input_format`:
374
+
375
+ - `InputFormat.JSON`: the data is `Union[dict[str, Any], Any]`. This just
376
+ means that the data must be JSON-deserializable, which includes dicts and
377
+ lists.
378
+ - `InputFormat.TEXT`: the data is `str`, and it must be utf-8 encoded.
379
+ - `InputFormat.CSV`: the data is `list[dict[str, Any]]`, where each dict
380
+ represents a row in the CSV.
381
+ - `InputFormat.CSV_ARCHIVE`: the data is `dict[str, list[dict[str, Any]]]`,
382
+ where each key is the name of a CSV file and the value is a list of dicts
383
+ representing the rows in that CSV file.
384
+ - `InputFormat.MULTI_FILE`: the data is `dict[str, Any]`, where for each
385
+ item, the key is the file name (with the extension) and the actual data
386
+ from the file is the value. When working with multi-file, data is loaded
387
+ from one or more files in a specific directory. Given that each file can
388
+ be of different types (JSON, CSV, Excel, etc...), the data captured from
389
+ each might vary. To reflect this, the data is loaded as a dict of items.
390
+ You can have a custom key for the data, that is not the file name, if
391
+ you use the `input_data_key` parameter of the `DataFile` class.
392
+
33
393
  Parameters
34
394
  ----------
35
- data : Any
395
+ data : Union[Union[dict[str, Any], Any], str, list[dict[str, Any]],
396
+ dict[str, list[dict[str, Any]]], dict[str, Any]]
36
397
  The actual data.
37
398
  input_format : InputFormat, optional
38
399
  Format of the input data. Default is `InputFormat.JSON`.
39
400
  options : Options, optional
40
401
  Options that the input was created with.
402
+
403
+ Raises
404
+ ------
405
+ ValueError
406
+ If the data type doesn't match the expected type for the given format.
407
+ ValueError
408
+ If the `input_format` is not one of the supported formats.
41
409
  """
42
410
 
43
- data: Union[
44
- Union[Dict[str, Any], Any], # JSON
45
- str, # TEXT
46
- List[Dict[str, Any]], # CSV
47
- Dict[str, List[Dict[str, Any]]], # CSV_ARCHIVE
48
- ]
49
- """The actual data. The data can be of various types, depending on the
50
- input format."""
411
+ data: dict[str, Any] | Any | str | list[dict[str, Any]] | dict[str, list[dict[str, Any]]] | dict[str, Any]
412
+ """
413
+ The actual data.
51
414
 
52
- input_format: Optional[InputFormat] = InputFormat.JSON
53
- """Format of the input data. Default is `InputFormat.JSON`."""
54
- options: Optional[Options] = None
55
- """Options that the `Input` were created with."""
415
+ The data can be of various types, depending on the input format:
416
+
417
+ - For `JSON`: `Union[dict[str, Any], Any]`
418
+ - For `TEXT`: `str`
419
+ - For `CSV`: `list[dict[str, Any]]`
420
+ - For `CSV_ARCHIVE`: `dict[str, list[dict[str, Any]]]`
421
+ - For `MULTI_FILE`: `dict[str, Any]`
422
+ """
423
+
424
+ input_format: InputFormat | None = InputFormat.JSON
425
+ """
426
+ Format of the input data.
427
+
428
+ Default is `InputFormat.JSON`.
429
+ """
430
+
431
+ options: Options | None = None
432
+ """
433
+ Options that the `Input` was created with.
434
+
435
+ A copy of the options is made during initialization, ensuring the original
436
+ options remain unchanged even if modified later.
437
+ """
56
438
 
57
439
  def __post_init__(self):
58
- """Check that the data matches the format given to initialize the
59
- class."""
440
+ """
441
+ Check that the data matches the format given to initialize the class.
442
+
443
+ This method is automatically called after the dataclass is initialized.
444
+ It validates that the data provided is of the correct type according to
445
+ the specified input_format and makes a deep copy of the options to ensure
446
+ the input maintains its own copy.
447
+
448
+ Raises
449
+ ------
450
+ ValueError
451
+ If the data type doesn't match the expected type for the given format.
452
+ """
60
453
 
61
454
  if self.input_format == InputFormat.JSON:
62
455
  try:
63
- _ = json.dumps(self.data)
456
+ _ = serialize_json(self.data)
64
457
  except (TypeError, OverflowError) as e:
65
458
  raise ValueError(
66
459
  f"Input has input_format InputFormat.JSON and "
@@ -85,20 +478,81 @@ class Input:
85
478
  "input_format InputFormat.CSV_ARCHIVE, supported type is `dict`"
86
479
  )
87
480
 
481
+ elif self.input_format == InputFormat.MULTI_FILE and not isinstance(self.data, dict):
482
+ raise ValueError(
483
+ f"unsupported Input.data type: {type(self.data)} with "
484
+ "input_format InputFormat.MULTI_FILE, supported type is `dict`"
485
+ )
486
+
88
487
  # Capture a snapshot of the options that were used to create the class
89
488
  # so even if they are changed later, we have a record of the original.
90
489
  init_options = self.options
91
490
  new_options = copy.deepcopy(init_options)
92
491
  self.options = new_options
93
492
 
493
+ def to_dict(self) -> dict[str, Any]:
494
+ """
495
+ Convert the input to a dictionary.
496
+
497
+ This method serializes the Input object to a dictionary format that can
498
+ be easily converted to JSON or other serialization formats. When the
499
+ `input_type` is set to `InputFormat.MULTI_FILE`, it will not include
500
+ the `data` field, as it is uncertain how data is deserialized from the file.
501
+
502
+ Returns
503
+ -------
504
+ dict[str, Any]
505
+ A dictionary containing the input data, format, and options.
506
+
507
+ The structure is:
508
+ ```python
509
+ {
510
+ "data": <the input data>,
511
+ "input_format": <the input format as a string>,
512
+ "options": <the options as a dictionary or None>
513
+ }
514
+ ```
515
+
516
+ Examples
517
+ --------
518
+ >>> from nextmv.input import Input, InputFormat
519
+ >>> input_obj = Input(data={"key": "value"}, input_format=InputFormat.JSON)
520
+ >>> input_dict = input_obj.to_dict()
521
+ >>> print(input_dict)
522
+ {'data': {'key': 'value'}, 'input_format': 'json', 'options': None}
523
+ """
524
+
525
+ input_dict = {
526
+ "input_format": self.input_format.value,
527
+ "options": self.options.to_dict() if self.options is not None else None,
528
+ }
529
+
530
+ if self.input_format == InputFormat.MULTI_FILE:
531
+ return input_dict
532
+
533
+ input_dict["data"] = self.data
534
+
535
+ return input_dict
536
+
94
537
 
95
538
  class InputLoader:
96
- """Base class for loading inputs."""
539
+ """
540
+ Base class for loading inputs.
541
+
542
+ You can import the `InputLoader` class directly from `nextmv`:
543
+
544
+ ```python
545
+ from nextmv import InputLoader
546
+ ```
547
+
548
+ This abstract class defines the interface for input loaders. Subclasses must
549
+ implement the `load` method to provide concrete input loading functionality.
550
+ """
97
551
 
98
552
  def load(
99
553
  self,
100
554
  input_format: InputFormat = InputFormat.JSON,
101
- options: Optional[Options] = None,
555
+ options: Options | None = None,
102
556
  *args,
103
557
  **kwargs,
104
558
  ) -> Input:
@@ -133,29 +587,100 @@ class InputLoader:
133
587
 
134
588
  class LocalInputLoader(InputLoader):
135
589
  """
136
- Class for loading local inputs. This class can load input data from the
137
- local filesystem, by using stdin, a file, or a directory, where applicable.
590
+ Class for loading local inputs.
591
+
592
+ You can import the `LocalInputLoader` class directly from `nextmv`:
593
+
594
+ ```python
595
+ from nextmv import LocalInputLoader
596
+ ```
597
+
598
+ This class can load input data from the local filesystem, by using stdin,
599
+ a file, or a directory, where applicable. It supports various input formats
600
+ like JSON, TEXT, CSV, and CSV archive.
601
+
138
602
  Call the `load` method to read the input data.
603
+
604
+ Examples
605
+ --------
606
+ >>> from nextmv.input import LocalInputLoader, InputFormat
607
+ >>> loader = LocalInputLoader()
608
+ >>> # Load JSON from stdin or file
609
+ >>> input_obj = loader.load(input_format=InputFormat.JSON, path="data.json")
610
+ >>> # Load CSV from a file
611
+ >>> input_obj = loader.load(input_format=InputFormat.CSV, path="data.csv")
139
612
  """
140
613
 
141
- def _read_text(path: str) -> str:
614
+ def _read_text(path: str, _) -> str:
615
+ """
616
+ Read a text file and return its contents.
617
+
618
+ Parameters
619
+ ----------
620
+ path : str
621
+ Path to the text file.
622
+ _ : Any
623
+ Placeholder for unused parameter (for API consistency).
624
+
625
+ Returns
626
+ -------
627
+ str
628
+ Contents of the text file with trailing newlines removed.
629
+ """
142
630
  with open(path, encoding="utf-8") as f:
143
631
  return f.read().rstrip("\n")
144
632
 
145
- def _read_csv(path: str) -> List[Dict[str, Any]]:
633
+ def _read_csv(path: str, csv_configurations: dict[str, Any] | None) -> list[dict[str, Any]]:
634
+ """
635
+ Read a CSV file and return its contents as a list of dictionaries.
636
+
637
+ Parameters
638
+ ----------
639
+ path : str
640
+ Path to the CSV file.
641
+ csv_configurations : dict[str, Any], optional
642
+ Configuration parameters for the CSV DictReader.
643
+
644
+ Returns
645
+ -------
646
+ list[dict[str, Any]]
647
+ List of dictionaries where each dictionary represents a row in the CSV.
648
+ """
146
649
  with open(path, encoding="utf-8") as f:
147
- return list(csv.DictReader(f, quoting=csv.QUOTE_NONNUMERIC))
650
+ return list(csv.DictReader(f, **csv_configurations))
651
+
652
+ def _read_json(path: str, _) -> dict[str, Any] | Any:
653
+ """
654
+ Read a JSON file and return its parsed contents.
148
655
 
149
- def _read_json(path: str) -> Union[Dict[str, Any], Any]:
656
+ Parameters
657
+ ----------
658
+ path : str
659
+ Path to the JSON file.
660
+ _ : Any
661
+ Placeholder for unused parameter (for API consistency).
662
+
663
+ Returns
664
+ -------
665
+ Union[dict[str, Any], Any]
666
+ Parsed JSON data.
667
+ """
150
668
  with open(path, encoding="utf-8") as f:
151
669
  return json.load(f)
152
670
 
153
671
  # All of these readers are callback functions.
154
672
  STDIN_READERS = {
155
- InputFormat.JSON: lambda: json.load(sys.stdin),
156
- InputFormat.TEXT: lambda: sys.stdin.read().rstrip("\n"),
157
- InputFormat.CSV: lambda: list(csv.DictReader(sys.stdin, quoting=csv.QUOTE_NONNUMERIC)),
673
+ InputFormat.JSON: lambda _: json.load(sys.stdin),
674
+ InputFormat.TEXT: lambda _: sys.stdin.read().rstrip("\n"),
675
+ InputFormat.CSV: lambda csv_configurations: list(csv.DictReader(sys.stdin, **csv_configurations)),
158
676
  }
677
+ """
678
+ Dictionary of functions to read from standard input.
679
+
680
+ Each key is an InputFormat, and each value is a function that reads from
681
+ standard input in that format.
682
+ """
683
+
159
684
  # These callbacks were not implemented with lambda because we needed
160
685
  # multiple lines. By using `open`, we needed the `with` to be able to close
161
686
  # the file.
@@ -164,12 +689,20 @@ class LocalInputLoader(InputLoader):
164
689
  InputFormat.TEXT: _read_text,
165
690
  InputFormat.CSV: _read_csv,
166
691
  }
692
+ """
693
+ Dictionary of functions to read from files.
694
+
695
+ Each key is an InputFormat, and each value is a function that reads from
696
+ a file in that format.
697
+ """
167
698
 
168
699
  def load(
169
700
  self,
170
- input_format: Optional[InputFormat] = InputFormat.JSON,
171
- options: Optional[Options] = None,
172
- path: Optional[str] = None,
701
+ input_format: InputFormat | None = InputFormat.JSON,
702
+ options: Options | None = None,
703
+ path: str | None = None,
704
+ csv_configurations: dict[str, Any] | None = None,
705
+ data_files: list[DataFile] | None = None,
173
706
  ) -> Input:
174
707
  """
175
708
  Load the input data. The input data can be in various formats. For
@@ -185,11 +718,15 @@ class LocalInputLoader(InputLoader):
185
718
  The `Input` that is returned contains the `data` attribute. This data
186
719
  can be of different types, depending on the provided `input_format`:
187
720
 
188
- - `InputFormat.JSON`: the data is a `Dict[str, Any]`.
721
+ - `InputFormat.JSON`: the data is a `dict[str, Any]`.
189
722
  - `InputFormat.TEXT`: the data is a `str`.
190
- - `InputFormat.CSV`: the data is a `List[Dict[str, Any]]`.
191
- - `InputFormat.CSV_ARCHIVE`: the data is a `Dict[str, List[Dict[str, Any]]]`.
723
+ - `InputFormat.CSV`: the data is a `list[dict[str, Any]]`.
724
+ - `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str, Any]]]`.
192
725
  Each key is the name of the CSV file, minus the `.csv` extension.
726
+ - `InputFormat.MULTI_FILE`: the data is a `dict[str, Any]`, where each
727
+ key is the file name (with extension) and the value is the data read
728
+ from the file. The data can be of any type, depending on the file
729
+ type and the reader function provided in the `DataFile` instances.
193
730
 
194
731
  Parameters
195
732
  ----------
@@ -199,6 +736,20 @@ class LocalInputLoader(InputLoader):
199
736
  Options for loading the input data.
200
737
  path : str, optional
201
738
  Path to the input data.
739
+ csv_configurations : dict[str, Any], optional
740
+ Configurations for loading CSV files. The default `DictReader` is
741
+ used when loading a CSV file, so you have the option to pass in a
742
+ dictionary with custom kwargs for the `DictReader`.
743
+ data_files : list[DataFile], optional
744
+ List of `DataFile` instances to read from. This is used when the
745
+ `input_format` is set to `InputFormat.MULTI_FILE`. Each `DataFile`
746
+ instance should have a `name` (the file name with extension) and a
747
+ `loader` function that reads the data from the file. The `loader`
748
+ function should accept the file path as its first argument and return
749
+ the data read from the file. The `loader` can also accept additional
750
+ positional and keyword arguments, which can be provided through the
751
+ `loader_args` and `loader_kwargs` attributes of the `DataFile`
752
+ instance.
202
753
 
203
754
  Returns
204
755
  -------
@@ -212,39 +763,95 @@ class LocalInputLoader(InputLoader):
212
763
  """
213
764
 
214
765
  data: Any = None
766
+ if csv_configurations is None:
767
+ csv_configurations = {}
215
768
 
216
769
  if input_format in [InputFormat.JSON, InputFormat.TEXT, InputFormat.CSV]:
217
- data = self._load_utf8_encoded(path=path, input_format=input_format)
770
+ data = self._load_utf8_encoded(path=path, input_format=input_format, csv_configurations=csv_configurations)
218
771
  elif input_format == InputFormat.CSV_ARCHIVE:
219
- data = self._load_archive(path=path)
772
+ data = self._load_archive(path=path, csv_configurations=csv_configurations)
773
+ elif input_format == InputFormat.MULTI_FILE:
774
+ if data_files is None:
775
+ raise ValueError("data_files must be provided when input_format is InputFormat.MULTI_FILE")
776
+
777
+ if not isinstance(data_files, list):
778
+ raise ValueError("data_files must be a list of DataFile instances")
779
+
780
+ data = self._load_multi_file(data_files=data_files, path=path)
220
781
 
221
782
  return Input(data=data, input_format=input_format, options=options)
222
783
 
223
784
  def _load_utf8_encoded(
224
785
  self,
225
- path: Optional[str] = None,
226
- input_format: Optional[InputFormat] = InputFormat.JSON,
786
+ csv_configurations: dict[str, Any] | None,
787
+ path: str | None = None,
788
+ input_format: InputFormat | None = InputFormat.JSON,
227
789
  use_file_reader: bool = False,
228
- ) -> Union[Dict[str, Any], str, List[Dict[str, Any]]]:
790
+ ) -> dict[str, Any] | str | list[dict[str, Any]]:
229
791
  """
230
- Load a utf-8 encoded file. Can come from stdin or a file in the
231
- filesystem.
792
+ Load a utf-8 encoded file from stdin or filesystem.
793
+
794
+ This internal method handles loading data in various formats from either
795
+ standard input or a file.
796
+
797
+ Parameters
798
+ ----------
799
+ csv_configurations : dict[str, Any], optional
800
+ Configuration parameters for the CSV DictReader.
801
+ path : str, optional
802
+ Path to the file to read from. If None or empty, reads from stdin.
803
+ input_format : InputFormat, optional
804
+ Format of the input data. Default is JSON.
805
+ use_file_reader : bool, optional
806
+ Whether to force using the file reader even if path is None.
807
+ Default is False.
808
+
809
+ Returns
810
+ -------
811
+ Union[dict[str, Any], str, list[dict[str, Any]]]
812
+ Data read from stdin or file in the specified format.
232
813
  """
233
814
 
234
815
  # If we forcibly want to use the file reader, we can do so.
235
816
  if use_file_reader:
236
- return self.FILE_READERS[input_format](path)
817
+ return self.FILE_READERS[input_format](path, csv_configurations)
237
818
 
238
819
  # Otherwise, we can use the stdin reader if no path is provided.
239
820
  if path is None or path == "":
240
- return self.STDIN_READERS[input_format]()
821
+ return self.STDIN_READERS[input_format](csv_configurations)
241
822
 
242
823
  # Lastly, we can use the file reader if a path is provided.
243
- return self.FILE_READERS[input_format](path)
824
+ return self.FILE_READERS[input_format](path, csv_configurations)
244
825
 
245
- def _load_archive(self, path: Optional[str] = None) -> Dict[str, List[Dict[str, Any]]]:
826
+ def _load_archive(
827
+ self,
828
+ csv_configurations: dict[str, Any] | None,
829
+ path: str | None = None,
830
+ ) -> dict[str, list[dict[str, Any]]]:
246
831
  """
247
- Load files from a directory. Will only load CSV files.
832
+ Load CSV files from a directory.
833
+
834
+ This internal method loads all CSV files from a specified directory,
835
+ organizing them into a dictionary where each key is the filename
836
+ (without .csv extension) and each value is the parsed CSV content.
837
+
838
+ Parameters
839
+ ----------
840
+ csv_configurations : dict[str, Any], optional
841
+ Configuration parameters for the CSV DictReader.
842
+ path : str, optional
843
+ Path to the directory containing CSV files. If None or empty,
844
+ uses "./input" as the default directory.
845
+
846
+ Returns
847
+ -------
848
+ dict[str, list[dict[str, Any]]]
849
+ Dictionary mapping filenames to CSV contents.
850
+
851
+ Raises
852
+ ------
853
+ ValueError
854
+ If the path is not a directory or the default directory doesn't exist.
248
855
  """
249
856
 
250
857
  dir_path = "input"
@@ -261,44 +868,107 @@ class LocalInputLoader(InputLoader):
261
868
  csv_ext = ".csv"
262
869
  for file in os.listdir(dir_path):
263
870
  if file.endswith(csv_ext):
264
- stripped = file.strip(csv_ext[1:]).strip(".") # Python 3.8 forces this, instead of using removesuffix()
871
+ stripped = file.removesuffix(csv_ext)
265
872
  data[stripped] = self._load_utf8_encoded(
266
873
  path=os.path.join(dir_path, file),
267
874
  input_format=InputFormat.CSV,
268
875
  use_file_reader=True,
876
+ csv_configurations=csv_configurations,
269
877
  )
270
878
 
271
879
  return data
272
880
 
881
+ def _load_multi_file(
882
+ self,
883
+ data_files: list[DataFile],
884
+ path: str | None = None,
885
+ ) -> dict[str, Any]:
886
+ """
887
+ Load multiple files from a directory.
888
+
889
+ This internal method loads all supported files from a specified
890
+ directory, organizing them into a dictionary where each key is the
891
+ filename and each value is the parsed file content. Supports CSV files
892
+ (parsed as list of dictionaries), JSON files (parsed as JSON objects),
893
+ and any other utf-8 encoded text files (loaded as plain text strings).
894
+ It also supports Excel files, loading them as DataFrames.
895
+
896
+ Parameters
897
+ ----------
898
+ data_files : list[DataFile]
899
+ List of `DataFile` instances to read from.
900
+ path : str, optional
901
+ Path to the directory containing files. If None or empty,
902
+ uses "./inputs" as the default directory.
903
+
904
+ Returns
905
+ -------
906
+ dict[str, Any]
907
+ Dictionary mapping filenames to file contents. CSV files are loaded
908
+ as lists of dictionaries, JSON files as parsed JSON objects, and
909
+ other utf-8 text files as strings. Excel files are loaded as
910
+ DataFrames.
911
+
912
+ Raises
913
+ ------
914
+ ValueError
915
+ If the path is not a directory or the default directory doesn't exist.
916
+ """
917
+
918
+ dir_path = INPUTS_KEY
919
+ if path is not None and path != "":
920
+ if not os.path.isdir(path):
921
+ raise ValueError(f"path {path} is not a directory")
922
+
923
+ dir_path = path
924
+
925
+ if not os.path.isdir(dir_path):
926
+ raise ValueError(f'expected input directoy "{dir_path}" to exist as a default location')
927
+
928
+ data = {}
929
+
930
+ for data_file in data_files:
931
+ name = data_file.name
932
+ file_path = os.path.join(dir_path, name)
933
+
934
+ if data_file.loader_args is None:
935
+ data_file.loader_args = []
936
+ if data_file.loader_kwargs is None:
937
+ data_file.loader_kwargs = {}
938
+
939
+ d = data_file.loader(
940
+ file_path,
941
+ *data_file.loader_args,
942
+ **data_file.loader_kwargs,
943
+ )
944
+
945
+ key = name
946
+ if data_file.input_data_key is not None:
947
+ key = data_file.input_data_key
948
+
949
+ if data.get(key) is not None:
950
+ raise ValueError(f"Duplicate input data key found: {key}")
951
+
952
+ data[key] = d
953
+
954
+ return data
955
+
273
956
 
274
957
  def load_local(
275
- input_format: Optional[InputFormat] = InputFormat.JSON,
276
- options: Optional[Options] = None,
277
- path: Optional[str] = None,
958
+ input_format: InputFormat | None = InputFormat.JSON,
959
+ options: Options | None = None,
960
+ path: str | None = None,
961
+ csv_configurations: dict[str, Any] | None = None,
278
962
  ) -> Input:
279
963
  """
964
+ !!! warning
965
+ `load_local` is deprecated, use `load` instead.
966
+
967
+ Load input data from local sources.
968
+
280
969
  This is a convenience function for instantiating a `LocalInputLoader`
281
970
  and calling its `load` method.
282
971
 
283
- Load the input data. The input data can be in various formats. For
284
- `InputFormat.JSON`, `InputFormat.TEXT`, and `InputFormat.CSV`, the data can
285
- be streamed from stdin or read from a file. When the `path` argument is
286
- provided (and valid), the input data is read from the file specified by
287
- `path`, otherwise, it is streamed from stdin. For
288
- `InputFormat.CSV_ARCHIVE`, the input data is read from the directory
289
- specified by `path`. If the `path` is not provided, the default location
290
- `input` is used. The directory should contain one or more files, where each
291
- file in the directory is a CSV file.
292
-
293
- The `Input` that is returned contains the `data` attribute. This data can
294
- be of different types, depending on the provided `input_format`:
295
-
296
- - `InputFormat.JSON`: the data is a `Dict[str, Any]`.
297
- - `InputFormat.TEXT`: the data is a `str`.
298
- - `InputFormat.CSV`: the data is a `List[Dict[str, Any]]`.
299
- - `InputFormat.CSV_ARCHIVE`: the data is a `Dict[str, List[Dict[str, Any]]]`.
300
- Each key is the name of the CSV file, minus the `.csv` extension.
301
-
302
972
  Parameters
303
973
  ----------
304
974
  input_format : InputFormat, optional
@@ -307,17 +977,152 @@ def load_local(
307
977
  Options for loading the input data.
308
978
  path : str, optional
309
979
  Path to the input data.
980
+ csv_configurations : dict[str, Any], optional
981
+ Configurations for loading CSV files. Custom kwargs for
982
+ Python's `csv.DictReader`.
310
983
 
311
984
  Returns
312
985
  -------
313
986
  Input
314
- The input data.
987
+ The loaded input data in an Input object.
315
988
 
316
989
  Raises
317
990
  ------
318
991
  ValueError
319
- If the path is not a directory when working with CSV_ARCHIVE.
992
+ If the path is invalid or data format is incorrect.
993
+
994
+ See Also
995
+ --------
996
+ load : The recommended function to use instead.
320
997
  """
321
998
 
999
+ deprecated(
1000
+ name="load_local",
1001
+ reason="`load_local` is deprecated, use `load` instead.",
1002
+ )
1003
+
322
1004
  loader = LocalInputLoader()
323
- return loader.load(input_format, options, path)
1005
+ return loader.load(input_format, options, path, csv_configurations)
1006
+
1007
+
1008
+ _LOCAL_INPUT_LOADER = LocalInputLoader()
1009
+ """Default instance of LocalInputLoader used by the load function."""
1010
+
1011
+
1012
+ def load(
1013
+ input_format: InputFormat | None = InputFormat.JSON,
1014
+ options: Options | None = None,
1015
+ path: str | None = None,
1016
+ csv_configurations: dict[str, Any] | None = None,
1017
+ loader: InputLoader | None = _LOCAL_INPUT_LOADER,
1018
+ data_files: list[DataFile] | None = None,
1019
+ ) -> Input:
1020
+ """
1021
+ Load input data using the specified loader.
1022
+
1023
+ You can import the `load` function directly from `nextmv`:
1024
+
1025
+ ```python
1026
+ from nextmv import load
1027
+ ```
1028
+
1029
+ This is a convenience function for loading an `Input` object. By default,
1030
+ it uses the `LocalInputLoader` to load data from local sources.
1031
+
1032
+ The input data can be in various formats and can be loaded from different
1033
+ sources depending on the loader:
1034
+
1035
+ - `InputFormat.JSON`: the data is a `dict[str, Any]`
1036
+ - `InputFormat.TEXT`: the data is a `str`
1037
+ - `InputFormat.CSV`: the data is a `list[dict[str, Any]]`
1038
+ - `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str, Any]]]`
1039
+ Each key is the name of the CSV file, minus the `.csv` extension.
1040
+ - `InputFormat.MULTI_FILE`: the data is a `dict[str, Any]`
1041
+ where each key is the file name (with extension) and the value is the
1042
+ data read from the file. This is used for loading multiple files in a
1043
+ single input, where each file can be of different types (JSON, CSV,
1044
+ Excel, etc.). The data is loaded as a dict of items, where each item
1045
+ corresponds to a file and its content.
1046
+
1047
+ When specifying `input_format` as `InputFormat.MULTI_FILE`, the
1048
+ `data_files` argument must be provided. This argument is a list of
1049
+ `DataFile` instances, each representing a file to be read. Each `DataFile`
1050
+ instance should have a `name` (the file name with extension) and a `loader`
1051
+ function that reads the data from the file. The `loader` function should
1052
+ accept the file path as its first argument and return the data read from
1053
+ the file. The `loader` can also accept additional positional and keyword
1054
+ arguments, which can be provided through the `loader_args` and
1055
+ `loader_kwargs` attributes of the `DataFile` instance.
1056
+
1057
+ There are convenience functions that can be used to create `DataFile`
1058
+ classes, such as:
1059
+
1060
+ - `json_data_file`: Creates a `DataFile` that reads JSON data.
1061
+ - `csv_data_file`: Creates a `DataFile` that reads CSV data.
1062
+ - `text_data_file`: Creates a `DataFile` that reads utf-8 encoded text
1063
+ data.
1064
+
1065
+ When workiing with data in other formats, such as Excel files, you are
1066
+ encouraged to create your own `DataFile` objects with your own
1067
+ implementation of the `loader` function. This allows you to read data
1068
+ from files in a way that suits your needs, while still adhering to the
1069
+ `DataFile` interface.
1070
+
1071
+ Parameters
1072
+ ----------
1073
+ input_format : InputFormat, optional
1074
+ Format of the input data. Default is `InputFormat.JSON`.
1075
+ options : Options, optional
1076
+ Options for loading the input data.
1077
+ path : str, optional
1078
+ Path to the input data. For file-based loaders:
1079
+ - If provided, reads from the specified file or directory
1080
+ - If None, typically reads from stdin (for JSON, TEXT, CSV)
1081
+ or uses a default directory (for CSV_ARCHIVE)
1082
+ csv_configurations : dict[str, Any], optional
1083
+ Configurations for loading CSV files. Custom kwargs for
1084
+ Python's `csv.DictReader`.
1085
+ loader : InputLoader, optional
1086
+ The loader to use for loading the input data.
1087
+ Default is an instance of `LocalInputLoader`.
1088
+ data_files : list[DataFile], optional
1089
+ List of `DataFile` instances to read from. This is used when the
1090
+ `input_format` is set to `InputFormat.MULTI_FILE`. Each `DataFile`
1091
+ instance should have a `name` (the file name with extension) and a
1092
+ `loader` function that reads the data from the file. The `loader`
1093
+ function should accept the file path as its first argument and return
1094
+ the data read from the file. The `loader` can also accept additional
1095
+ positional and keyword arguments, which can be provided through the
1096
+ `loader_args` and `loader_kwargs` attributes of the `DataFile`
1097
+ instance.
1098
+
1099
+ There are convenience functions that can be used to create `DataFile`
1100
+ classes, such as `json_data_file`, `csv_data_file`, and
1101
+ `text_data_file`. When working with data in other formats, such as
1102
+ Excel files, you are encouraged to create your own `DataFile` objects
1103
+ with your own implementation of the `loader` function. This allows you
1104
+ to read data from files in a way that suits your needs, while still
1105
+ adhering to the `DataFile` interface.
1106
+
1107
+ Returns
1108
+ -------
1109
+ Input
1110
+ The loaded input data in an Input object.
1111
+
1112
+ Raises
1113
+ ------
1114
+ ValueError
1115
+ If the path is invalid or data format is incorrect.
1116
+
1117
+ Examples
1118
+ --------
1119
+ >>> from nextmv.input import load, InputFormat
1120
+ >>> # Load JSON from stdin
1121
+ >>> input_obj = load(input_format=InputFormat.JSON)
1122
+ >>> # Load CSV from a file
1123
+ >>> input_obj = load(input_format=InputFormat.CSV, path="data.csv")
1124
+ >>> # Load CSV archive from a directory
1125
+ >>> input_obj = load(input_format=InputFormat.CSV_ARCHIVE, path="input_dir")
1126
+ """
1127
+
1128
+ return loader.load(input_format, options, path, csv_configurations, data_files)