nextmv 0.18.0__py3-none-any.whl → 1.0.0.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. nextmv/__about__.py +1 -1
  2. nextmv/__entrypoint__.py +8 -13
  3. nextmv/__init__.py +53 -0
  4. nextmv/_serialization.py +96 -0
  5. nextmv/base_model.py +54 -9
  6. nextmv/cli/CONTRIBUTING.md +511 -0
  7. nextmv/cli/__init__.py +0 -0
  8. nextmv/cli/cloud/__init__.py +47 -0
  9. nextmv/cli/cloud/acceptance/__init__.py +27 -0
  10. nextmv/cli/cloud/acceptance/create.py +393 -0
  11. nextmv/cli/cloud/acceptance/delete.py +68 -0
  12. nextmv/cli/cloud/acceptance/get.py +104 -0
  13. nextmv/cli/cloud/acceptance/list.py +62 -0
  14. nextmv/cli/cloud/acceptance/update.py +95 -0
  15. nextmv/cli/cloud/account/__init__.py +28 -0
  16. nextmv/cli/cloud/account/create.py +83 -0
  17. nextmv/cli/cloud/account/delete.py +60 -0
  18. nextmv/cli/cloud/account/get.py +66 -0
  19. nextmv/cli/cloud/account/update.py +70 -0
  20. nextmv/cli/cloud/app/__init__.py +35 -0
  21. nextmv/cli/cloud/app/create.py +141 -0
  22. nextmv/cli/cloud/app/delete.py +58 -0
  23. nextmv/cli/cloud/app/exists.py +44 -0
  24. nextmv/cli/cloud/app/get.py +66 -0
  25. nextmv/cli/cloud/app/list.py +61 -0
  26. nextmv/cli/cloud/app/push.py +137 -0
  27. nextmv/cli/cloud/app/update.py +124 -0
  28. nextmv/cli/cloud/batch/__init__.py +29 -0
  29. nextmv/cli/cloud/batch/create.py +454 -0
  30. nextmv/cli/cloud/batch/delete.py +68 -0
  31. nextmv/cli/cloud/batch/get.py +104 -0
  32. nextmv/cli/cloud/batch/list.py +63 -0
  33. nextmv/cli/cloud/batch/metadata.py +66 -0
  34. nextmv/cli/cloud/batch/update.py +95 -0
  35. nextmv/cli/cloud/data/__init__.py +26 -0
  36. nextmv/cli/cloud/data/upload.py +162 -0
  37. nextmv/cli/cloud/ensemble/__init__.py +31 -0
  38. nextmv/cli/cloud/ensemble/create.py +414 -0
  39. nextmv/cli/cloud/ensemble/delete.py +67 -0
  40. nextmv/cli/cloud/ensemble/get.py +65 -0
  41. nextmv/cli/cloud/ensemble/update.py +103 -0
  42. nextmv/cli/cloud/input_set/__init__.py +30 -0
  43. nextmv/cli/cloud/input_set/create.py +170 -0
  44. nextmv/cli/cloud/input_set/get.py +63 -0
  45. nextmv/cli/cloud/input_set/list.py +63 -0
  46. nextmv/cli/cloud/input_set/update.py +123 -0
  47. nextmv/cli/cloud/instance/__init__.py +35 -0
  48. nextmv/cli/cloud/instance/create.py +290 -0
  49. nextmv/cli/cloud/instance/delete.py +62 -0
  50. nextmv/cli/cloud/instance/exists.py +39 -0
  51. nextmv/cli/cloud/instance/get.py +62 -0
  52. nextmv/cli/cloud/instance/list.py +60 -0
  53. nextmv/cli/cloud/instance/update.py +216 -0
  54. nextmv/cli/cloud/managed_input/__init__.py +31 -0
  55. nextmv/cli/cloud/managed_input/create.py +146 -0
  56. nextmv/cli/cloud/managed_input/delete.py +65 -0
  57. nextmv/cli/cloud/managed_input/get.py +63 -0
  58. nextmv/cli/cloud/managed_input/list.py +60 -0
  59. nextmv/cli/cloud/managed_input/update.py +97 -0
  60. nextmv/cli/cloud/run/__init__.py +37 -0
  61. nextmv/cli/cloud/run/cancel.py +37 -0
  62. nextmv/cli/cloud/run/create.py +530 -0
  63. nextmv/cli/cloud/run/get.py +199 -0
  64. nextmv/cli/cloud/run/input.py +86 -0
  65. nextmv/cli/cloud/run/list.py +80 -0
  66. nextmv/cli/cloud/run/logs.py +167 -0
  67. nextmv/cli/cloud/run/metadata.py +67 -0
  68. nextmv/cli/cloud/run/track.py +501 -0
  69. nextmv/cli/cloud/scenario/__init__.py +29 -0
  70. nextmv/cli/cloud/scenario/create.py +451 -0
  71. nextmv/cli/cloud/scenario/delete.py +65 -0
  72. nextmv/cli/cloud/scenario/get.py +102 -0
  73. nextmv/cli/cloud/scenario/list.py +63 -0
  74. nextmv/cli/cloud/scenario/metadata.py +67 -0
  75. nextmv/cli/cloud/scenario/update.py +93 -0
  76. nextmv/cli/cloud/secrets/__init__.py +33 -0
  77. nextmv/cli/cloud/secrets/create.py +206 -0
  78. nextmv/cli/cloud/secrets/delete.py +67 -0
  79. nextmv/cli/cloud/secrets/get.py +66 -0
  80. nextmv/cli/cloud/secrets/list.py +60 -0
  81. nextmv/cli/cloud/secrets/update.py +147 -0
  82. nextmv/cli/cloud/shadow/__init__.py +33 -0
  83. nextmv/cli/cloud/shadow/create.py +184 -0
  84. nextmv/cli/cloud/shadow/delete.py +68 -0
  85. nextmv/cli/cloud/shadow/get.py +61 -0
  86. nextmv/cli/cloud/shadow/list.py +63 -0
  87. nextmv/cli/cloud/shadow/metadata.py +66 -0
  88. nextmv/cli/cloud/shadow/start.py +43 -0
  89. nextmv/cli/cloud/shadow/stop.py +43 -0
  90. nextmv/cli/cloud/shadow/update.py +95 -0
  91. nextmv/cli/cloud/upload/__init__.py +22 -0
  92. nextmv/cli/cloud/upload/create.py +39 -0
  93. nextmv/cli/cloud/version/__init__.py +33 -0
  94. nextmv/cli/cloud/version/create.py +97 -0
  95. nextmv/cli/cloud/version/delete.py +62 -0
  96. nextmv/cli/cloud/version/exists.py +39 -0
  97. nextmv/cli/cloud/version/get.py +62 -0
  98. nextmv/cli/cloud/version/list.py +60 -0
  99. nextmv/cli/cloud/version/update.py +92 -0
  100. nextmv/cli/community/__init__.py +24 -0
  101. nextmv/cli/community/clone.py +270 -0
  102. nextmv/cli/community/list.py +265 -0
  103. nextmv/cli/configuration/__init__.py +23 -0
  104. nextmv/cli/configuration/config.py +195 -0
  105. nextmv/cli/configuration/create.py +94 -0
  106. nextmv/cli/configuration/delete.py +67 -0
  107. nextmv/cli/configuration/list.py +77 -0
  108. nextmv/cli/main.py +188 -0
  109. nextmv/cli/message.py +153 -0
  110. nextmv/cli/options.py +206 -0
  111. nextmv/cli/version.py +38 -0
  112. nextmv/cloud/__init__.py +71 -17
  113. nextmv/cloud/acceptance_test.py +757 -51
  114. nextmv/cloud/account.py +406 -17
  115. nextmv/cloud/application/__init__.py +957 -0
  116. nextmv/cloud/application/_acceptance.py +419 -0
  117. nextmv/cloud/application/_batch_scenario.py +860 -0
  118. nextmv/cloud/application/_ensemble.py +251 -0
  119. nextmv/cloud/application/_input_set.py +227 -0
  120. nextmv/cloud/application/_instance.py +289 -0
  121. nextmv/cloud/application/_managed_input.py +227 -0
  122. nextmv/cloud/application/_run.py +1393 -0
  123. nextmv/cloud/application/_secrets.py +294 -0
  124. nextmv/cloud/application/_shadow.py +314 -0
  125. nextmv/cloud/application/_utils.py +54 -0
  126. nextmv/cloud/application/_version.py +303 -0
  127. nextmv/cloud/assets.py +48 -0
  128. nextmv/cloud/batch_experiment.py +294 -33
  129. nextmv/cloud/client.py +307 -66
  130. nextmv/cloud/ensemble.py +247 -0
  131. nextmv/cloud/input_set.py +120 -2
  132. nextmv/cloud/instance.py +133 -8
  133. nextmv/cloud/integration.py +533 -0
  134. nextmv/cloud/package.py +168 -53
  135. nextmv/cloud/scenario.py +410 -0
  136. nextmv/cloud/secrets.py +234 -0
  137. nextmv/cloud/shadow.py +190 -0
  138. nextmv/cloud/url.py +73 -0
  139. nextmv/cloud/version.py +132 -4
  140. nextmv/default_app/.gitignore +1 -0
  141. nextmv/default_app/README.md +32 -0
  142. nextmv/default_app/app.yaml +12 -0
  143. nextmv/default_app/input.json +5 -0
  144. nextmv/default_app/main.py +37 -0
  145. nextmv/default_app/requirements.txt +2 -0
  146. nextmv/default_app/src/__init__.py +0 -0
  147. nextmv/default_app/src/visuals.py +36 -0
  148. nextmv/deprecated.py +47 -0
  149. nextmv/input.py +861 -90
  150. nextmv/local/__init__.py +5 -0
  151. nextmv/local/application.py +1251 -0
  152. nextmv/local/executor.py +1042 -0
  153. nextmv/local/geojson_handler.py +323 -0
  154. nextmv/local/local.py +97 -0
  155. nextmv/local/plotly_handler.py +61 -0
  156. nextmv/local/runner.py +274 -0
  157. nextmv/logger.py +80 -9
  158. nextmv/manifest.py +1466 -0
  159. nextmv/model.py +241 -66
  160. nextmv/options.py +708 -115
  161. nextmv/output.py +1301 -274
  162. nextmv/polling.py +325 -0
  163. nextmv/run.py +1702 -0
  164. nextmv/safe.py +145 -0
  165. nextmv/status.py +122 -0
  166. nextmv-1.0.0.dev2.dist-info/METADATA +311 -0
  167. nextmv-1.0.0.dev2.dist-info/RECORD +170 -0
  168. {nextmv-0.18.0.dist-info → nextmv-1.0.0.dev2.dist-info}/WHEEL +1 -1
  169. nextmv-1.0.0.dev2.dist-info/entry_points.txt +2 -0
  170. nextmv/cloud/application.py +0 -1405
  171. nextmv/cloud/manifest.py +0 -234
  172. nextmv/cloud/status.py +0 -29
  173. nextmv-0.18.0.dist-info/METADATA +0 -770
  174. nextmv-0.18.0.dist-info/RECORD +0 -25
  175. {nextmv-0.18.0.dist-info → nextmv-1.0.0.dev2.dist-info}/licenses/LICENSE +0 -0
nextmv/input.py CHANGED
@@ -1,28 +1,360 @@
1
- """Module for handling input sources and data."""
1
+ """
2
+ Module for handling input sources and data.
3
+
4
+ This module provides classes and functions for loading and handling input data
5
+ in various formats for decision problems. It supports JSON, plain text, CSV,
6
+ and CSV archive formats and can load data from standard input or files.
7
+
8
+ Classes
9
+ -------
10
+ InputFormat
11
+ Enum defining supported input data formats (JSON, TEXT, CSV, CSV_ARCHIVE).
12
+ Input
13
+ Container for input data with format specification and options.
14
+ InputLoader
15
+ Base class for loading inputs from various sources.
16
+ LocalInputLoader
17
+ Class for loading inputs from local files or stdin.
18
+
19
+ Functions
20
+ ---------
21
+ load
22
+ Load input data using a specified loader.
23
+
24
+ Attributes
25
+ ----------
26
+ INPUTS_KEY : str
27
+ Key used for identifying inputs in the run.
28
+ """
2
29
 
3
30
  import copy
4
31
  import csv
5
32
  import json
6
33
  import os
7
34
  import sys
35
+ from collections.abc import Callable
8
36
  from dataclasses import dataclass
9
37
  from enum import Enum
10
- from typing import Any, Optional, Union
38
+ from typing import Any
11
39
 
40
+ from nextmv._serialization import serialize_json
41
+ from nextmv.deprecated import deprecated
12
42
  from nextmv.options import Options
13
43
 
44
+ INPUTS_KEY = "inputs"
45
+ """
46
+ Inputs key constant used for identifying inputs in the run.
47
+ """
48
+
14
49
 
15
50
  class InputFormat(str, Enum):
16
- """Format of an `Input`."""
51
+ """
52
+ Format of an `Input`.
53
+
54
+ You can import the `InputFormat` class directly from `nextmv`:
55
+
56
+ ```python
57
+ from nextmv import InputFormat
58
+ ```
17
59
 
18
- JSON = "JSON"
60
+ This enum specifies the supported formats for input data.
61
+
62
+ Attributes
63
+ ----------
64
+ JSON : str
65
+ JSON format, utf-8 encoded.
66
+ TEXT : str
67
+ Text format, utf-8 encoded.
68
+ CSV : str
69
+ CSV format, utf-8 encoded.
70
+ CSV_ARCHIVE : str
71
+ CSV archive format: multiple CSV files.
72
+ MULTI_FILE : str
73
+ Multi-file format, used for loading multiple files in a single input.
74
+ """
75
+
76
+ JSON = "json"
19
77
  """JSON format, utf-8 encoded."""
20
- TEXT = "TEXT"
78
+ TEXT = "text"
21
79
  """Text format, utf-8 encoded."""
22
- CSV = "CSV"
23
- """CSV format, utf-8 encoded."""
24
- CSV_ARCHIVE = "CSV_ARCHIVE"
80
+ CSV_ARCHIVE = "csv-archive"
25
81
  """CSV archive format: multiple CSV files."""
82
+ MULTI_FILE = "multi-file"
83
+ """Multi-file format, used for loading multiple files in a single input."""
84
+
85
+
86
+ @dataclass
87
+ class DataFile:
88
+ """
89
+ Represents data to be read from a file.
90
+
91
+ You can import the `DataFile` class directly from `nextmv`:
92
+
93
+ ```python
94
+ from nextmv import DataFile
95
+ ```
96
+
97
+ This class is used to define data that will be read from a file in the
98
+ filesystem. It includes the name of the file, and the reader function that
99
+ will handle the loading, and deserialization of the data from the file.
100
+ This `DataFile` class is typically used in the `Input`, when the
101
+ `Input.input_format` is set to `InputFormat.MULTI_FILE`. Given that it is
102
+ difficul to handle every edge case of how data is deserialized, and read
103
+ from a file, this class exists so that the user can implement the `reader`
104
+ callable of their choice and provide it with any `reader_args` and
105
+ `reader_kwargs` they might need.
106
+
107
+ Parameters
108
+ ----------
109
+ name : str
110
+ Name of the data (input) file. The file extension should be included in
111
+ the name.
112
+ reader : Callable[[str], Any]
113
+ Callable that reads the data from the file. This should be a function
114
+ implemented by the user. There are convenience functions that you can
115
+ use as a reader as well. The `reader` must receive, at the very minimum,
116
+ the following arguments:
117
+
118
+ - `file_path`: a `str` argument which is the location where this
119
+ data will be read from. This includes the dir and name of the
120
+ file. As such, the `name` parameter of this class is going to be
121
+ passed to the `reader` function, joined with the directory where the
122
+ file will be read from.
123
+
124
+ The `reader` can also receive additional arguments, and keyword
125
+ arguments. The `reader_args` and `reader_kwargs` parameters of this
126
+ class can be used to provide those additional arguments.
127
+
128
+ The `reader` function should return the data that will be used in the
129
+ model.
130
+ """
131
+
132
+ name: str
133
+ """
134
+ Name of the data (input) file. The file extension should be included in the
135
+ name.
136
+ """
137
+ loader: Callable[[str], Any]
138
+ """
139
+ Callable that reads (loads) the data from the file. This should be a function
140
+ implemented by the user. There are convenience functions that you can use
141
+ as a `loader` as well. The `loader` must receive, at the very minimum, the
142
+ following arguments:
143
+
144
+ - `file_path`: a `str` argument which is the location where this
145
+ data will be read from. This includes the dir and name of the
146
+ file. As such, the `name` parameter of this class is going to be
147
+ passed to the `loader` function, joined with the directory where the
148
+ file will be read from.
149
+
150
+ The `loader` can also receive additional arguments, and keyword arguments.
151
+ The `loader_args` and `loader_kwargs` parameters of this class can be used
152
+ to provide those additional arguments.
153
+
154
+ The `loader` function should return the data that will be used in the model.
155
+ """
156
+ loader_kwargs: dict[str, Any] | None = None
157
+ """
158
+ Optional keyword arguments to pass to the loader function. This can be used
159
+ to customize the behavior of the loader.
160
+ """
161
+ loader_args: list[Any] | None = None
162
+ """
163
+ Optional positional arguments to pass to the loader function. This can be
164
+ used to customize the behavior of the loader.
165
+ """
166
+ input_data_key: str | None = None
167
+ """
168
+ Use this parameter to set a custom key to represent your file.
169
+
170
+ When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
171
+ the data from the file is loaded to the `.data` parameter of the `Input`.
172
+ In that case, the type of `.data` is `dict[str, Any]`, where each key
173
+ represents the file name (with extension) and the value is the data that is
174
+ actually loaded from the file using the `loader` function. You can set a
175
+ custom key to represent your file by using this attribute.
176
+ """
177
+
178
+
179
+ def json_data_file(
180
+ name: str,
181
+ json_configurations: dict[str, Any] | None = None,
182
+ input_data_key: str | None = None,
183
+ ) -> DataFile:
184
+ """
185
+ This is a convenience function to create a `DataFile` that reads JSON data.
186
+
187
+ You can import the `json_data_file` function directly from `nextmv`:
188
+
189
+ ```python
190
+ from nextmv import json_data_file
191
+ ```
192
+
193
+ Parameters
194
+ ----------
195
+ name : str
196
+ Name of the data file. You don't need to include the `.json` extension.
197
+ json_configurations : dict[str, Any], optional
198
+ JSON-specific configurations for reading the data.
199
+ input_data_key : str, optional
200
+ A custom key to represent the data from this file.
201
+
202
+ When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
203
+ the data from the file is loaded to the `.data` parameter of the `Input`.
204
+ In that case, the type of `.data` is `dict[str, Any]`, where each key
205
+ represents the file name (with extension) and the value is the data that is
206
+ actually loaded from the file using the `loader` function. You can set a
207
+ custom key to represent your file by using this attribute.
208
+
209
+ Returns
210
+ -------
211
+ DataFile
212
+ A `DataFile` instance that reads JSON data from a file with the given
213
+ name.
214
+
215
+ Examples
216
+ --------
217
+ >>> from nextmv import json_data_file
218
+ >>> data_file = json_data_file("my_data")
219
+ >>> data = data_file.read()
220
+ >>> print(data)
221
+ {
222
+ "key": "value",
223
+ "another_key": [1, 2, 3]
224
+ }
225
+ """
226
+
227
+ if not name.endswith(".json"):
228
+ name += ".json"
229
+
230
+ json_configurations = json_configurations or {}
231
+
232
+ def loader(file_path: str) -> dict[str, Any] | Any:
233
+ with open(file_path, encoding="utf-8") as f:
234
+ return json.load(f, **json_configurations)
235
+
236
+ return DataFile(
237
+ name=name,
238
+ loader=loader,
239
+ input_data_key=input_data_key,
240
+ )
241
+
242
+
243
+ def csv_data_file(
244
+ name: str,
245
+ csv_configurations: dict[str, Any] | None = None,
246
+ input_data_key: str | None = None,
247
+ ) -> DataFile:
248
+ """
249
+ This is a convenience function to create a `DataFile` that reads CSV data.
250
+
251
+ You can import the `csv_data_file` function directly from `nextmv`:
252
+
253
+ ```python
254
+ from nextmv import csv_data_file
255
+ ```
256
+
257
+ Parameters
258
+ ----------
259
+ name : str
260
+ Name of the data file. You don't need to include the `.csv` extension.
261
+ csv_configurations : dict[str, Any], optional
262
+ CSV-specific configurations for reading the data.
263
+ input_data_key : str, optional
264
+ A custom key to represent the data from this file.
265
+
266
+ When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
267
+ the data from the file is loaded to the `.data` parameter of the `Input`.
268
+ In that case, the type of `.data` is `dict[str, Any]`, where each key
269
+ represents the file name (with extension) and the value is the data that is
270
+ actually loaded from the file using the `loader` function. You can set a
271
+ custom key to represent your file by using this attribute.
272
+
273
+ Returns
274
+ -------
275
+ DataFile
276
+ A `DataFile` instance that reads CSV data from a file with the given
277
+ name.
278
+
279
+ Examples
280
+ --------
281
+ >>> from nextmv import csv_data_file
282
+ >>> data_file = csv_data_file("my_data")
283
+ >>> data = data_file.read()
284
+ >>> print(data)
285
+ [
286
+ {"column1": "value1", "column2": "value2"},
287
+ {"column1": "value3", "column2": "value4"}
288
+ ]
289
+ """
290
+
291
+ if not name.endswith(".csv"):
292
+ name += ".csv"
293
+
294
+ csv_configurations = csv_configurations or {}
295
+
296
+ def loader(file_path: str) -> list[dict[str, Any]]:
297
+ with open(file_path, encoding="utf-8") as f:
298
+ return list(csv.DictReader(f, **csv_configurations))
299
+
300
+ return DataFile(
301
+ name=name,
302
+ loader=loader,
303
+ input_data_key=input_data_key,
304
+ )
305
+
306
+
307
+ def text_data_file(name: str, input_data_key: str | None = None) -> DataFile:
308
+ """
309
+ This is a convenience function to create a `DataFile` that reads utf-8
310
+ encoded text data.
311
+
312
+ You can import the `text_data_file` function directly from `nextmv`:
313
+
314
+ ```python
315
+ from nextmv import text_data_file
316
+ ```
317
+
318
+ You must provide the extension as part of the `name` parameter.
319
+
320
+ Parameters
321
+ ----------
322
+ name : str
323
+ Name of the data file. The file extension must be provided in the name.
324
+ input_data_key : str, optional
325
+ A custom key to represent the data from this file.
326
+
327
+ When using `InputFormat.MULTI_FILE` as the `input_format` of the `Input`,
328
+ the data from the file is loaded to the `.data` parameter of the `Input`.
329
+ In that case, the type of `.data` is `dict[str, Any]`, where each key
330
+ represents the file name (with extension) and the value is the data that is
331
+ actually loaded from the file using the `loader` function. You can set a
332
+ custom key to represent your file by using this attribute.
333
+
334
+ Returns
335
+ -------
336
+ DataFile
337
+ A `DataFile` instance that reads text data from a file with the given
338
+ name.
339
+
340
+ Examples
341
+ --------
342
+ >>> from nextmv import text_data_file
343
+ >>> data_file = text_data_file("my_data")
344
+ >>> data = data_file.read()
345
+ >>> print(data)
346
+ This is some text data.
347
+ """
348
+
349
+ def loader(file_path: str) -> str:
350
+ with open(file_path, encoding="utf-8") as f:
351
+ return f.read().rstrip("\n")
352
+
353
+ return DataFile(
354
+ name=name,
355
+ loader=loader,
356
+ input_data_key=input_data_key,
357
+ )
26
358
 
27
359
 
28
360
  @dataclass
@@ -30,37 +362,94 @@ class Input:
30
362
  """
31
363
  Input for a decision problem.
32
364
 
365
+ You can import the `Input` class directly from `nextmv`:
366
+
367
+ ```python
368
+ from nextmv import Input
369
+ ```
370
+
371
+ The `data`'s type must match the `input_format`:
372
+
373
+ - `InputFormat.JSON`: the data is `Union[dict[str, Any], Any]`. This just
374
+ means that the data must be JSON-deserializable, which includes dicts and
375
+ lists.
376
+ - `InputFormat.TEXT`: the data is `str`, and it must be utf-8 encoded.
377
+ - `InputFormat.CSV_ARCHIVE`: the data is `dict[str, list[dict[str, Any]]]`,
378
+ where each key is the name of a CSV file and the value is a list of dicts
379
+ representing the rows in that CSV file.
380
+ - `InputFormat.MULTI_FILE`: the data is `dict[str, Any]`, where for each
381
+ item, the key is the file name (with the extension) and the actual data
382
+ from the file is the value. When working with multi-file, data is loaded
383
+ from one or more files in a specific directory. Given that each file can
384
+ be of different types (JSON, CSV, Excel, etc...), the data captured from
385
+ each might vary. To reflect this, the data is loaded as a dict of items.
386
+ You can have a custom key for the data, that is not the file name, if
387
+ you use the `input_data_key` parameter of the `DataFile` class.
388
+
33
389
  Parameters
34
390
  ----------
35
- data : Any
391
+ data : Union[Union[dict[str, Any], Any], str, list[dict[str, Any]],
392
+ dict[str, list[dict[str, Any]]], dict[str, Any]]
36
393
  The actual data.
37
394
  input_format : InputFormat, optional
38
395
  Format of the input data. Default is `InputFormat.JSON`.
39
396
  options : Options, optional
40
397
  Options that the input was created with.
398
+
399
+ Raises
400
+ ------
401
+ ValueError
402
+ If the data type doesn't match the expected type for the given format.
403
+ ValueError
404
+ If the `input_format` is not one of the supported formats.
41
405
  """
42
406
 
43
- data: Union[
44
- Union[dict[str, Any], Any], # JSON
45
- str, # TEXT
46
- list[dict[str, Any]], # CSV
47
- dict[str, list[dict[str, Any]]], # CSV_ARCHIVE
48
- ]
49
- """The actual data. The data can be of various types, depending on the
50
- input format."""
407
+ data: dict[str, Any] | Any | str | list[dict[str, Any]] | dict[str, list[dict[str, Any]]] | dict[str, Any]
408
+ """
409
+ The actual data.
410
+
411
+ The data can be of various types, depending on the input format:
412
+
413
+ - For `JSON`: `Union[dict[str, Any], Any]`
414
+ - For `TEXT`: `str`
415
+ - For `CSV`: `list[dict[str, Any]]`
416
+ - For `CSV_ARCHIVE`: `dict[str, list[dict[str, Any]]]`
417
+ - For `MULTI_FILE`: `dict[str, Any]`
418
+ """
419
+
420
+ input_format: InputFormat | None = InputFormat.JSON
421
+ """
422
+ Format of the input data.
423
+
424
+ Default is `InputFormat.JSON`.
425
+ """
426
+
427
+ options: Options | None = None
428
+ """
429
+ Options that the `Input` was created with.
51
430
 
52
- input_format: Optional[InputFormat] = InputFormat.JSON
53
- """Format of the input data. Default is `InputFormat.JSON`."""
54
- options: Optional[Options] = None
55
- """Options that the `Input` were created with."""
431
+ A copy of the options is made during initialization, ensuring the original
432
+ options remain unchanged even if modified later.
433
+ """
56
434
 
57
435
  def __post_init__(self):
58
- """Check that the data matches the format given to initialize the
59
- class."""
436
+ """
437
+ Check that the data matches the format given to initialize the class.
438
+
439
+ This method is automatically called after the dataclass is initialized.
440
+ It validates that the data provided is of the correct type according to
441
+ the specified input_format and makes a deep copy of the options to ensure
442
+ the input maintains its own copy.
443
+
444
+ Raises
445
+ ------
446
+ ValueError
447
+ If the data type doesn't match the expected type for the given format.
448
+ """
60
449
 
61
450
  if self.input_format == InputFormat.JSON:
62
451
  try:
63
- _ = json.dumps(self.data)
452
+ _ = serialize_json(self.data)
64
453
  except (TypeError, OverflowError) as e:
65
454
  raise ValueError(
66
455
  f"Input has input_format InputFormat.JSON and "
@@ -73,16 +462,16 @@ class Input:
73
462
  "input_format InputFormat.TEXT, supported type is `str`"
74
463
  )
75
464
 
76
- elif self.input_format == InputFormat.CSV and not isinstance(self.data, list):
465
+ elif self.input_format == InputFormat.CSV_ARCHIVE and not isinstance(self.data, dict):
77
466
  raise ValueError(
78
467
  f"unsupported Input.data type: {type(self.data)} with "
79
- "input_format InputFormat.CSV, supported type is `list`"
468
+ "input_format InputFormat.CSV_ARCHIVE, supported type is `dict`"
80
469
  )
81
470
 
82
- elif self.input_format == InputFormat.CSV_ARCHIVE and not isinstance(self.data, dict):
471
+ elif self.input_format == InputFormat.MULTI_FILE and not isinstance(self.data, dict):
83
472
  raise ValueError(
84
473
  f"unsupported Input.data type: {type(self.data)} with "
85
- "input_format InputFormat.CSV_ARCHIVE, supported type is `dict`"
474
+ "input_format InputFormat.MULTI_FILE, supported type is `dict`"
86
475
  )
87
476
 
88
477
  # Capture a snapshot of the options that were used to create the class
@@ -91,14 +480,69 @@ class Input:
91
480
  new_options = copy.deepcopy(init_options)
92
481
  self.options = new_options
93
482
 
483
+ def to_dict(self) -> dict[str, Any]:
484
+ """
485
+ Convert the input to a dictionary.
486
+
487
+ This method serializes the Input object to a dictionary format that can
488
+ be easily converted to JSON or other serialization formats. When the
489
+ `input_type` is set to `InputFormat.MULTI_FILE`, it will not include
490
+ the `data` field, as it is uncertain how data is deserialized from the file.
491
+
492
+ Returns
493
+ -------
494
+ dict[str, Any]
495
+ A dictionary containing the input data, format, and options.
496
+
497
+ The structure is:
498
+ ```python
499
+ {
500
+ "data": <the input data>,
501
+ "input_format": <the input format as a string>,
502
+ "options": <the options as a dictionary or None>
503
+ }
504
+ ```
505
+
506
+ Examples
507
+ --------
508
+ >>> from nextmv.input import Input, InputFormat
509
+ >>> input_obj = Input(data={"key": "value"}, input_format=InputFormat.JSON)
510
+ >>> input_dict = input_obj.to_dict()
511
+ >>> print(input_dict)
512
+ {'data': {'key': 'value'}, 'input_format': 'json', 'options': None}
513
+ """
514
+
515
+ input_dict = {
516
+ "input_format": self.input_format.value,
517
+ "options": self.options.to_dict() if self.options is not None else None,
518
+ }
519
+
520
+ if self.input_format == InputFormat.MULTI_FILE:
521
+ return input_dict
522
+
523
+ input_dict["data"] = self.data
524
+
525
+ return input_dict
526
+
94
527
 
95
528
  class InputLoader:
96
- """Base class for loading inputs."""
529
+ """
530
+ Base class for loading inputs.
531
+
532
+ You can import the `InputLoader` class directly from `nextmv`:
533
+
534
+ ```python
535
+ from nextmv import InputLoader
536
+ ```
537
+
538
+ This abstract class defines the interface for input loaders. Subclasses must
539
+ implement the `load` method to provide concrete input loading functionality.
540
+ """
97
541
 
98
542
  def load(
99
543
  self,
100
544
  input_format: InputFormat = InputFormat.JSON,
101
- options: Optional[Options] = None,
545
+ options: Options | None = None,
102
546
  *args,
103
547
  **kwargs,
104
548
  ) -> Input:
@@ -133,20 +577,82 @@ class InputLoader:
133
577
 
134
578
  class LocalInputLoader(InputLoader):
135
579
  """
136
- Class for loading local inputs. This class can load input data from the
137
- local filesystem, by using stdin, a file, or a directory, where applicable.
580
+ Class for loading local inputs.
581
+
582
+ You can import the `LocalInputLoader` class directly from `nextmv`:
583
+
584
+ ```python
585
+ from nextmv import LocalInputLoader
586
+ ```
587
+
588
+ This class can load input data from the local filesystem, by using stdin,
589
+ a file, or a directory, where applicable. It supports various input formats
590
+ like JSON, TEXT, CSV, and CSV archive.
591
+
138
592
  Call the `load` method to read the input data.
593
+
594
+ Examples
595
+ --------
596
+ >>> from nextmv.input import LocalInputLoader, InputFormat
597
+ >>> loader = LocalInputLoader()
598
+ >>> # Load JSON from stdin or file
599
+ >>> input_obj = loader.load(input_format=InputFormat.JSON, path="data.json")
139
600
  """
140
601
 
141
602
  def _read_text(path: str, _) -> str:
603
+ """
604
+ Read a text file and return its contents.
605
+
606
+ Parameters
607
+ ----------
608
+ path : str
609
+ Path to the text file.
610
+ _ : Any
611
+ Placeholder for unused parameter (for API consistency).
612
+
613
+ Returns
614
+ -------
615
+ str
616
+ Contents of the text file with trailing newlines removed.
617
+ """
142
618
  with open(path, encoding="utf-8") as f:
143
619
  return f.read().rstrip("\n")
144
620
 
145
- def _read_csv(path: str, csv_configurations: Optional[dict[str, Any]]) -> list[dict[str, Any]]:
621
+ def _read_csv(path: str, csv_configurations: dict[str, Any] | None) -> list[dict[str, Any]]:
622
+ """
623
+ Read a CSV file and return its contents as a list of dictionaries.
624
+
625
+ Parameters
626
+ ----------
627
+ path : str
628
+ Path to the CSV file.
629
+ csv_configurations : dict[str, Any], optional
630
+ Configuration parameters for the CSV DictReader.
631
+
632
+ Returns
633
+ -------
634
+ list[dict[str, Any]]
635
+ List of dictionaries where each dictionary represents a row in the CSV.
636
+ """
146
637
  with open(path, encoding="utf-8") as f:
147
638
  return list(csv.DictReader(f, **csv_configurations))
148
639
 
149
- def _read_json(path: str, _) -> Union[dict[str, Any], Any]:
640
+ def _read_json(path: str, _) -> dict[str, Any] | Any:
641
+ """
642
+ Read a JSON file and return its parsed contents.
643
+
644
+ Parameters
645
+ ----------
646
+ path : str
647
+ Path to the JSON file.
648
+ _ : Any
649
+ Placeholder for unused parameter (for API consistency).
650
+
651
+ Returns
652
+ -------
653
+ Union[dict[str, Any], Any]
654
+ Parsed JSON data.
655
+ """
150
656
  with open(path, encoding="utf-8") as f:
151
657
  return json.load(f)
152
658
 
@@ -154,43 +660,60 @@ class LocalInputLoader(InputLoader):
154
660
  STDIN_READERS = {
155
661
  InputFormat.JSON: lambda _: json.load(sys.stdin),
156
662
  InputFormat.TEXT: lambda _: sys.stdin.read().rstrip("\n"),
157
- InputFormat.CSV: lambda csv_configurations: list(csv.DictReader(sys.stdin, **csv_configurations)),
158
663
  }
664
+ """
665
+ Dictionary of functions to read from standard input.
666
+
667
+ Each key is an InputFormat, and each value is a function that reads from
668
+ standard input in that format.
669
+ """
670
+
159
671
  # These callbacks were not implemented with lambda because we needed
160
672
  # multiple lines. By using `open`, we needed the `with` to be able to close
161
673
  # the file.
162
674
  FILE_READERS = {
163
675
  InputFormat.JSON: _read_json,
164
676
  InputFormat.TEXT: _read_text,
165
- InputFormat.CSV: _read_csv,
677
+ "CSV": _read_csv,
166
678
  }
679
+ """
680
+ Dictionary of functions to read from files.
681
+
682
+ Each key is an InputFormat, and each value is a function that reads from
683
+ a file in that format.
684
+ """
167
685
 
168
686
  def load(
169
687
  self,
170
- input_format: Optional[InputFormat] = InputFormat.JSON,
171
- options: Optional[Options] = None,
172
- path: Optional[str] = None,
173
- csv_configurations: Optional[dict[str, Any]] = None,
688
+ input_format: InputFormat | None = InputFormat.JSON,
689
+ options: Options | None = None,
690
+ path: str | None = None,
691
+ csv_configurations: dict[str, Any] | None = None,
692
+ data_files: list[DataFile] | None = None,
174
693
  ) -> Input:
175
694
  """
176
695
  Load the input data. The input data can be in various formats. For
177
- `InputFormat.JSON`, `InputFormat.TEXT`, and `InputFormat.CSV`, the data
178
- can be streamed from stdin or read from a file. When the `path`
179
- argument is provided (and valid), the input data is read from the file
180
- specified by `path`, otherwise, it is streamed from stdin. For
181
- `InputFormat.CSV_ARCHIVE`, the input data is read from the directory
182
- specified by `path`. If the `path` is not provided, the default
183
- location `input` is used. The directory should contain one or more
184
- files, where each file in the directory is a CSV file.
696
+ `InputFormat.JSON` and `InputFormat.TEXT`, the data can be streamed
697
+ from stdin or read from a file. When the `path` argument is provided
698
+ (and valid), the input data is read from the file specified by `path`,
699
+ otherwise, it is streamed from stdin. For `InputFormat.CSV_ARCHIVE`,
700
+ the input data is read from the directory specified by `path`. If the
701
+ `path` is not provided, the default location `input` is used. The
702
+ directory should contain one or more files, where each file in the
703
+ directory is a CSV file.
185
704
 
186
705
  The `Input` that is returned contains the `data` attribute. This data
187
706
  can be of different types, depending on the provided `input_format`:
188
707
 
189
708
  - `InputFormat.JSON`: the data is a `dict[str, Any]`.
190
709
  - `InputFormat.TEXT`: the data is a `str`.
191
- - `InputFormat.CSV`: the data is a `list[dict[str, Any]]`.
192
- - `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str, Any]]]`.
193
- Each key is the name of the CSV file, minus the `.csv` extension.
710
+ - `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str,
711
+ Any]]]`. Each key is the name of the CSV file, minus the `.csv`
712
+ extension.
713
+ - `InputFormat.MULTI_FILE`: the data is a `dict[str, Any]`, where each
714
+ key is the file name (with extension) and the value is the data read
715
+ from the file. The data can be of any type, depending on the file
716
+ type and the reader function provided in the `DataFile` instances.
194
717
 
195
718
  Parameters
196
719
  ----------
@@ -204,6 +727,16 @@ class LocalInputLoader(InputLoader):
204
727
  Configurations for loading CSV files. The default `DictReader` is
205
728
  used when loading a CSV file, so you have the option to pass in a
206
729
  dictionary with custom kwargs for the `DictReader`.
730
+ data_files : list[DataFile], optional
731
+ List of `DataFile` instances to read from. This is used when the
732
+ `input_format` is set to `InputFormat.MULTI_FILE`. Each `DataFile`
733
+ instance should have a `name` (the file name with extension) and a
734
+ `loader` function that reads the data from the file. The `loader`
735
+ function should accept the file path as its first argument and
736
+ return the data read from the file. The `loader` can also accept
737
+ additional positional and keyword arguments, which can be provided
738
+ through the `loader_args` and `loader_kwargs` attributes of the
739
+ `DataFile` instance.
207
740
 
208
741
  Returns
209
742
  -------
@@ -220,23 +753,50 @@ class LocalInputLoader(InputLoader):
220
753
  if csv_configurations is None:
221
754
  csv_configurations = {}
222
755
 
223
- if input_format in [InputFormat.JSON, InputFormat.TEXT, InputFormat.CSV]:
756
+ if input_format in [InputFormat.JSON, InputFormat.TEXT]:
224
757
  data = self._load_utf8_encoded(path=path, input_format=input_format, csv_configurations=csv_configurations)
225
758
  elif input_format == InputFormat.CSV_ARCHIVE:
226
759
  data = self._load_archive(path=path, csv_configurations=csv_configurations)
760
+ elif input_format == InputFormat.MULTI_FILE:
761
+ if data_files is None:
762
+ raise ValueError("data_files must be provided when input_format is InputFormat.MULTI_FILE")
763
+
764
+ if not isinstance(data_files, list):
765
+ raise ValueError("data_files must be a list of DataFile instances")
766
+
767
+ data = self._load_multi_file(data_files=data_files, path=path)
227
768
 
228
769
  return Input(data=data, input_format=input_format, options=options)
229
770
 
230
771
  def _load_utf8_encoded(
231
772
  self,
232
- csv_configurations: Optional[dict[str, Any]],
233
- path: Optional[str] = None,
234
- input_format: Optional[InputFormat] = InputFormat.JSON,
773
+ csv_configurations: dict[str, Any] | None,
774
+ path: str | None = None,
775
+ input_format: InputFormat | str | None = InputFormat.JSON,
235
776
  use_file_reader: bool = False,
236
- ) -> Union[dict[str, Any], str, list[dict[str, Any]]]:
777
+ ) -> dict[str, Any] | str | list[dict[str, Any]]:
237
778
  """
238
- Load a utf-8 encoded file. Can come from stdin or a file in the
239
- filesystem.
779
+ Load a utf-8 encoded file from stdin or filesystem.
780
+
781
+ This internal method handles loading data in various formats from either
782
+ standard input or a file.
783
+
784
+ Parameters
785
+ ----------
786
+ csv_configurations : dict[str, Any], optional
787
+ Configuration parameters for the CSV DictReader.
788
+ path : str, optional
789
+ Path to the file to read from. If None or empty, reads from stdin.
790
+ input_format : InputFormat, optional
791
+ Format of the input data. Default is JSON.
792
+ use_file_reader : bool, optional
793
+ Whether to force using the file reader even if path is None.
794
+ Default is False.
795
+
796
+ Returns
797
+ -------
798
+ Union[dict[str, Any], str, list[dict[str, Any]]]
799
+ Data read from stdin or file in the specified format.
240
800
  """
241
801
 
242
802
  # If we forcibly want to use the file reader, we can do so.
@@ -252,11 +812,33 @@ class LocalInputLoader(InputLoader):
252
812
 
253
813
  def _load_archive(
254
814
  self,
255
- csv_configurations: Optional[dict[str, Any]],
256
- path: Optional[str] = None,
815
+ csv_configurations: dict[str, Any] | None,
816
+ path: str | None = None,
257
817
  ) -> dict[str, list[dict[str, Any]]]:
258
818
  """
259
- Load files from a directory. Will only load CSV files.
819
+ Load CSV files from a directory.
820
+
821
+ This internal method loads all CSV files from a specified directory,
822
+ organizing them into a dictionary where each key is the filename
823
+ (without .csv extension) and each value is the parsed CSV content.
824
+
825
+ Parameters
826
+ ----------
827
+ csv_configurations : dict[str, Any], optional
828
+ Configuration parameters for the CSV DictReader.
829
+ path : str, optional
830
+ Path to the directory containing CSV files. If None or empty,
831
+ uses "./input" as the default directory.
832
+
833
+ Returns
834
+ -------
835
+ dict[str, list[dict[str, Any]]]
836
+ Dictionary mapping filenames to CSV contents.
837
+
838
+ Raises
839
+ ------
840
+ ValueError
841
+ If the path is not a directory or the default directory doesn't exist.
260
842
  """
261
843
 
262
844
  dir_path = "input"
@@ -276,43 +858,104 @@ class LocalInputLoader(InputLoader):
276
858
  stripped = file.removesuffix(csv_ext)
277
859
  data[stripped] = self._load_utf8_encoded(
278
860
  path=os.path.join(dir_path, file),
279
- input_format=InputFormat.CSV,
861
+ input_format="CSV",
280
862
  use_file_reader=True,
281
863
  csv_configurations=csv_configurations,
282
864
  )
283
865
 
284
866
  return data
285
867
 
868
+ def _load_multi_file(
869
+ self,
870
+ data_files: list[DataFile],
871
+ path: str | None = None,
872
+ ) -> dict[str, Any]:
873
+ """
874
+ Load multiple files from a directory.
875
+
876
+ This internal method loads all supported files from a specified
877
+ directory, organizing them into a dictionary where each key is the
878
+ filename and each value is the parsed file content. Supports CSV files
879
+ (parsed as list of dictionaries), JSON files (parsed as JSON objects),
880
+ and any other utf-8 encoded text files (loaded as plain text strings).
881
+ It also supports Excel files, loading them as DataFrames.
882
+
883
+ Parameters
884
+ ----------
885
+ data_files : list[DataFile]
886
+ List of `DataFile` instances to read from.
887
+ path : str, optional
888
+ Path to the directory containing files. If None or empty,
889
+ uses "./inputs" as the default directory.
890
+
891
+ Returns
892
+ -------
893
+ dict[str, Any]
894
+ Dictionary mapping filenames to file contents. CSV files are loaded
895
+ as lists of dictionaries, JSON files as parsed JSON objects, and
896
+ other utf-8 text files as strings. Excel files are loaded as
897
+ DataFrames.
898
+
899
+ Raises
900
+ ------
901
+ ValueError
902
+ If the path is not a directory or the default directory doesn't exist.
903
+ """
904
+
905
+ dir_path = INPUTS_KEY
906
+ if path is not None and path != "":
907
+ if not os.path.isdir(path):
908
+ raise ValueError(f"path {path} is not a directory")
909
+
910
+ dir_path = path
911
+
912
+ if not os.path.isdir(dir_path):
913
+ raise ValueError(f'expected input directoy "{dir_path}" to exist as a default location')
914
+
915
+ data = {}
916
+
917
+ for data_file in data_files:
918
+ name = data_file.name
919
+ file_path = os.path.join(dir_path, name)
920
+
921
+ if data_file.loader_args is None:
922
+ data_file.loader_args = []
923
+ if data_file.loader_kwargs is None:
924
+ data_file.loader_kwargs = {}
925
+
926
+ d = data_file.loader(
927
+ file_path,
928
+ *data_file.loader_args,
929
+ **data_file.loader_kwargs,
930
+ )
931
+
932
+ key = name
933
+ if data_file.input_data_key is not None:
934
+ key = data_file.input_data_key
935
+
936
+ if data.get(key) is not None:
937
+ raise ValueError(f"Duplicate input data key found: {key}")
938
+
939
+ data[key] = d
940
+
941
+ return data
942
+
286
943
 
287
944
  def load_local(
288
- input_format: Optional[InputFormat] = InputFormat.JSON,
289
- options: Optional[Options] = None,
290
- path: Optional[str] = None,
291
- csv_configurations: Optional[dict[str, Any]] = None,
945
+ input_format: InputFormat | None = InputFormat.JSON,
946
+ options: Options | None = None,
947
+ path: str | None = None,
948
+ csv_configurations: dict[str, Any] | None = None,
292
949
  ) -> Input:
293
950
  """
951
+ !!! warning
952
+ `load_local` is deprecated, use `load` instead.
953
+
954
+ Load input data from local sources.
955
+
294
956
  This is a convenience function for instantiating a `LocalInputLoader`
295
957
  and calling its `load` method.
296
958
 
297
- Load the input data. The input data can be in various formats. For
298
- `InputFormat.JSON`, `InputFormat.TEXT`, and `InputFormat.CSV`, the data can
299
- be streamed from stdin or read from a file. When the `path` argument is
300
- provided (and valid), the input data is read from the file specified by
301
- `path`, otherwise, it is streamed from stdin. For
302
- `InputFormat.CSV_ARCHIVE`, the input data is read from the directory
303
- specified by `path`. If the `path` is not provided, the default location
304
- `input` is used. The directory should contain one or more files, where each
305
- file in the directory is a CSV file.
306
-
307
- The `Input` that is returned contains the `data` attribute. This data can
308
- be of different types, depending on the provided `input_format`:
309
-
310
- - `InputFormat.JSON`: the data is a `dict[str, Any]`.
311
- - `InputFormat.TEXT`: the data is a `str`.
312
- - `InputFormat.CSV`: the data is a `list[dict[str, Any]]`.
313
- - `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str, Any]]]`.
314
- Each key is the name of the CSV file, minus the `.csv` extension.
315
-
316
959
  Parameters
317
960
  ----------
318
961
  input_format : InputFormat, optional
@@ -322,20 +965,148 @@ def load_local(
322
965
  path : str, optional
323
966
  Path to the input data.
324
967
  csv_configurations : dict[str, Any], optional
325
- Configurations for loading CSV files. The default `DictReader` is used
326
- when loading a CSV file, so you have the option to pass in a dictionary
327
- with custom kwargs for the `DictReader`.
968
+ Configurations for loading CSV files. Custom kwargs for
969
+ Python's `csv.DictReader`.
328
970
 
329
971
  Returns
330
972
  -------
331
973
  Input
332
- The input data.
974
+ The loaded input data in an Input object.
333
975
 
334
976
  Raises
335
977
  ------
336
978
  ValueError
337
- If the path is not a directory when working with CSV_ARCHIVE.
979
+ If the path is invalid or data format is incorrect.
980
+
981
+ See Also
982
+ --------
983
+ load : The recommended function to use instead.
338
984
  """
339
985
 
986
+ deprecated(
987
+ name="load_local",
988
+ reason="`load_local` is deprecated, use `load` instead",
989
+ )
990
+
340
991
  loader = LocalInputLoader()
341
992
  return loader.load(input_format, options, path, csv_configurations)
993
+
994
+
995
+ _LOCAL_INPUT_LOADER = LocalInputLoader()
996
+ """Default instance of LocalInputLoader used by the load function."""
997
+
998
+
999
+ def load(
1000
+ input_format: InputFormat | None = InputFormat.JSON,
1001
+ options: Options | None = None,
1002
+ path: str | None = None,
1003
+ csv_configurations: dict[str, Any] | None = None,
1004
+ loader: InputLoader | None = _LOCAL_INPUT_LOADER,
1005
+ data_files: list[DataFile] | None = None,
1006
+ ) -> Input:
1007
+ """
1008
+ Load input data using the specified loader.
1009
+
1010
+ You can import the `load` function directly from `nextmv`:
1011
+
1012
+ ```python
1013
+ from nextmv import load
1014
+ ```
1015
+
1016
+ This is a convenience function for loading an `Input` object. By default,
1017
+ it uses the `LocalInputLoader` to load data from local sources.
1018
+
1019
+ The input data can be in various formats and can be loaded from different
1020
+ sources depending on the loader:
1021
+
1022
+ - `InputFormat.JSON`: the data is a `dict[str, Any]`
1023
+ - `InputFormat.TEXT`: the data is a `str`
1024
+ - `InputFormat.CSV_ARCHIVE`: the data is a `dict[str, list[dict[str, Any]]]`
1025
+ Each key is the name of the CSV file, minus the `.csv` extension.
1026
+ - `InputFormat.MULTI_FILE`: the data is a `dict[str, Any]`
1027
+ where each key is the file name (with extension) and the value is the
1028
+ data read from the file. This is used for loading multiple files in a
1029
+ single input, where each file can be of different types (JSON, CSV,
1030
+ Excel, etc.). The data is loaded as a dict of items, where each item
1031
+ corresponds to a file and its content.
1032
+
1033
+ When specifying `input_format` as `InputFormat.MULTI_FILE`, the
1034
+ `data_files` argument must be provided. This argument is a list of
1035
+ `DataFile` instances, each representing a file to be read. Each `DataFile`
1036
+ instance should have a `name` (the file name with extension) and a `loader`
1037
+ function that reads the data from the file. The `loader` function should
1038
+ accept the file path as its first argument and return the data read from
1039
+ the file. The `loader` can also accept additional positional and keyword
1040
+ arguments, which can be provided through the `loader_args` and
1041
+ `loader_kwargs` attributes of the `DataFile` instance.
1042
+
1043
+ There are convenience functions that can be used to create `DataFile`
1044
+ classes, such as:
1045
+
1046
+ - `json_data_file`: Creates a `DataFile` that reads JSON data.
1047
+ - `csv_data_file`: Creates a `DataFile` that reads CSV data.
1048
+ - `text_data_file`: Creates a `DataFile` that reads utf-8 encoded text
1049
+ data.
1050
+
1051
+ When workiing with data in other formats, such as Excel files, you are
1052
+ encouraged to create your own `DataFile` objects with your own
1053
+ implementation of the `loader` function. This allows you to read data
1054
+ from files in a way that suits your needs, while still adhering to the
1055
+ `DataFile` interface.
1056
+
1057
+ Parameters
1058
+ ----------
1059
+ input_format : InputFormat, optional
1060
+ Format of the input data. Default is `InputFormat.JSON`.
1061
+ options : Options, optional
1062
+ Options for loading the input data.
1063
+ path : str, optional
1064
+ Path to the input data. For file-based loaders:
1065
+ - If provided, reads from the specified file or directory
1066
+ - If None, typically reads from stdin (for JSON, TEXT, CSV)
1067
+ or uses a default directory (for CSV_ARCHIVE)
1068
+ csv_configurations : dict[str, Any], optional
1069
+ Configurations for loading CSV files. Custom kwargs for
1070
+ Python's `csv.DictReader`.
1071
+ loader : InputLoader, optional
1072
+ The loader to use for loading the input data.
1073
+ Default is an instance of `LocalInputLoader`.
1074
+ data_files : list[DataFile], optional
1075
+ List of `DataFile` instances to read from. This is used when the
1076
+ `input_format` is set to `InputFormat.MULTI_FILE`. Each `DataFile`
1077
+ instance should have a `name` (the file name with extension) and a
1078
+ `loader` function that reads the data from the file. The `loader`
1079
+ function should accept the file path as its first argument and return
1080
+ the data read from the file. The `loader` can also accept additional
1081
+ positional and keyword arguments, which can be provided through the
1082
+ `loader_args` and `loader_kwargs` attributes of the `DataFile`
1083
+ instance.
1084
+
1085
+ There are convenience functions that can be used to create `DataFile`
1086
+ classes, such as `json_data_file`, `csv_data_file`, and
1087
+ `text_data_file`. When working with data in other formats, such as
1088
+ Excel files, you are encouraged to create your own `DataFile` objects
1089
+ with your own implementation of the `loader` function. This allows you
1090
+ to read data from files in a way that suits your needs, while still
1091
+ adhering to the `DataFile` interface.
1092
+
1093
+ Returns
1094
+ -------
1095
+ Input
1096
+ The loaded input data in an Input object.
1097
+
1098
+ Raises
1099
+ ------
1100
+ ValueError
1101
+ If the path is invalid or data format is incorrect.
1102
+
1103
+ Examples
1104
+ --------
1105
+ >>> from nextmv.input import load, InputFormat
1106
+ >>> # Load JSON from stdin
1107
+ >>> input_obj = load(input_format=InputFormat.JSON)
1108
+ >>> # Load CSV archive from a directory
1109
+ >>> input_obj = load(input_format=InputFormat.CSV_ARCHIVE, path="input_dir")
1110
+ """
1111
+
1112
+ return loader.load(input_format, options, path, csv_configurations, data_files)