nextmv 0.28.5__py3-none-any.whl → 0.29.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nextmv/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "v0.28.5"
1
+ __version__ = "v0.29.0"
nextmv/__init__.py CHANGED
@@ -3,12 +3,16 @@
3
3
  from .__about__ import __version__
4
4
  from .base_model import BaseModel as BaseModel
5
5
  from .base_model import from_dict as from_dict
6
+ from .input import DataFile as DataFile
6
7
  from .input import Input as Input
7
8
  from .input import InputFormat as InputFormat
8
9
  from .input import InputLoader as InputLoader
9
10
  from .input import LocalInputLoader as LocalInputLoader
11
+ from .input import csv_data_file as csv_data_file
12
+ from .input import json_data_file as json_data_file
10
13
  from .input import load as load
11
14
  from .input import load_local as load_local
15
+ from .input import text_data_file as text_data_file
12
16
  from .logger import log as log
13
17
  from .logger import redirect_stdout as redirect_stdout
14
18
  from .logger import reset_stdout as reset_stdout
@@ -27,9 +31,13 @@ from .output import ResultStatistics as ResultStatistics
27
31
  from .output import RunStatistics as RunStatistics
28
32
  from .output import Series as Series
29
33
  from .output import SeriesData as SeriesData
34
+ from .output import SolutionFile as SolutionFile
30
35
  from .output import Statistics as Statistics
31
36
  from .output import Visual as Visual
32
37
  from .output import VisualSchema as VisualSchema
38
+ from .output import csv_solution_file as csv_solution_file
39
+ from .output import json_solution_file as json_solution_file
40
+ from .output import text_solution_file as text_solution_file
33
41
  from .output import write as write
34
42
  from .output import write_local as write_local
35
43
 
@@ -23,8 +23,11 @@ poll
23
23
  """
24
24
 
25
25
  import json
26
+ import os
26
27
  import random
27
28
  import shutil
29
+ import tarfile
30
+ import tempfile
28
31
  import time
29
32
  from collections.abc import Callable
30
33
  from dataclasses import dataclass
@@ -1494,6 +1497,7 @@ class Application:
1494
1497
  batch_experiment_id: Optional[str] = None,
1495
1498
  external_result: Optional[Union[ExternalRunResult, dict[str, Any]]] = None,
1496
1499
  json_configurations: Optional[dict[str, Any]] = None,
1500
+ dir_path: Optional[str] = None,
1497
1501
  ) -> str:
1498
1502
  """
1499
1503
  Submit an input to start a new run of the application. Returns the
@@ -1503,11 +1507,35 @@ class Application:
1503
1507
  ----------
1504
1508
  input: Union[Input, dict[str, Any], BaseModel, str]
1505
1509
  Input to use for the run. This can be a `nextmv.Input` object,
1506
- `dict`, `BaseModel` or `str`. If `nextmv.Input` is used, then the
1507
- input is extracted from the `.data` property. Note that for now,
1508
- `InputFormat.CSV_ARCHIVE` is not supported as an
1509
- `input.input_format`. If an input is too large, it will be uploaded
1510
- with the `upload_large_input` method.
1510
+ `dict`, `BaseModel` or `str`.
1511
+
1512
+ If `nextmv.Input` is used, and the `input_format` is either
1513
+ `nextmv.InputFormat.JSON` or `nextmv.InputFormat.TEXT`, then the
1514
+ input data is extracted from the `.data` property.
1515
+
1516
+ If you want to work with `nextmv.InputFormat.CSV_ARCHIVE` or
1517
+ `nextmv.InputFormat.MULTI_FILE`, you should use the `dir_path`
1518
+ argument instead. This argument takes precedence over the `input`.
1519
+ If `dir_path` is specified, this function looks for files in that
1520
+ directory and tars them, to later be uploaded using the
1521
+ `upload_large_input` method. If both the `dir_path` and `input`
1522
+ arguments are provided, the `input` is ignored.
1523
+
1524
+ When `dir_path` is specified, the `configuration` argument must
1525
+ also be provided. More specifically, the
1526
+ `RunConfiguration.format.format_input.input_type` parameter
1527
+ dictates what kind of input is being submitted to the Nextmv Cloud.
1528
+ Make sure that this parameter is specified when working with the
1529
+ following input formats:
1530
+
1531
+ - `nextmv.InputFormat.CSV_ARCHIVE`
1532
+ - `nextmv.InputFormat.MULTI_FILE`
1533
+
1534
+ When working with JSON or text data, use the `input` argument
1535
+ directly.
1536
+
1537
+ In general, if an input is too large, it will be uploaded with the
1538
+ `upload_large_input` method.
1511
1539
  instance_id: Optional[str]
1512
1540
  ID of the instance to use for the run. If not provided, the default
1513
1541
  instance ID associated to the Class (`default_instance_id`) is
@@ -1545,6 +1573,13 @@ class Application:
1545
1573
  json_configurations: Optional[dict[str, Any]]
1546
1574
  Optional configurations for JSON serialization. This is used to
1547
1575
  customize the serialization before data is sent.
1576
+ dir_path: Optional[str]
1577
+ Path to a directory containing input files. If specified, the
1578
+ function will package the files in the directory into a tar file
1579
+ and upload it as a large input. This is useful for input formats
1580
+ like `nextmv.InputFormat.CSV_ARCHIVE` or `nextmv.InputFormat.MULTI_FILE`.
1581
+ If both `input` and `dir_path` are specified, the `input` is
1582
+ ignored, and the files in the directory are used instead.
1548
1583
 
1549
1584
  Returns
1550
1585
  ----------
@@ -1560,26 +1595,34 @@ class Application:
1560
1595
  not `JSON`. If the final `options` are not of type `dict[str,str]`.
1561
1596
  """
1562
1597
 
1598
+ self.__validate_dir_path_and_configuration(dir_path, configuration)
1599
+
1600
+ tar_file = ""
1601
+ if dir_path is not None and dir_path != "":
1602
+ if not os.path.exists(dir_path):
1603
+ raise ValueError(f"Directory {dir_path} does not exist.")
1604
+
1605
+ if not os.path.isdir(dir_path):
1606
+ raise ValueError(f"Path {dir_path} is not a directory.")
1607
+
1608
+ tar_file = self.__package_inputs(dir_path)
1609
+
1563
1610
  input_data = None
1564
1611
  if isinstance(input, BaseModel):
1565
1612
  input_data = input.to_dict()
1566
1613
  elif isinstance(input, dict) or isinstance(input, str):
1567
1614
  input_data = input
1568
1615
  elif isinstance(input, Input):
1569
- if input.input_format == InputFormat.CSV_ARCHIVE:
1570
- raise ValueError("csv-archive is not supported")
1571
1616
  input_data = input.data
1572
1617
 
1573
1618
  input_size = 0
1574
1619
  if input_data is not None:
1575
1620
  input_size = get_size(input_data)
1576
1621
 
1577
- upload_url_required = input_size > _MAX_RUN_SIZE
1578
1622
  upload_id_used = upload_id is not None
1579
-
1580
- if not upload_id_used and upload_url_required:
1623
+ if self.__upload_url_required(upload_id_used, input_size, tar_file, input):
1581
1624
  upload_url = self.upload_url()
1582
- self.upload_large_input(input=input_data, upload_url=upload_url)
1625
+ self.upload_large_input(input=input_data, upload_url=upload_url, tar_file=tar_file)
1583
1626
  upload_id = upload_url.upload_id
1584
1627
  upload_id_used = True
1585
1628
 
@@ -1612,11 +1655,18 @@ class Application:
1612
1655
  if not isinstance(v, str):
1613
1656
  raise ValueError(f"options must be dict[str,str], option {k} has type {type(v)} instead.")
1614
1657
  payload["options"] = options_dict
1658
+
1615
1659
  if configuration is not None:
1616
1660
  configuration_dict = (
1617
1661
  configuration.to_dict() if isinstance(configuration, RunConfiguration) else configuration
1618
1662
  )
1619
- payload["configuration"] = configuration_dict
1663
+ else:
1664
+ configuration = RunConfiguration()
1665
+ configuration.resolve(input=input, dir_path=dir_path)
1666
+ configuration_dict = configuration.to_dict()
1667
+
1668
+ payload["configuration"] = configuration_dict
1669
+
1620
1670
  if batch_experiment_id is not None:
1621
1671
  payload["batch_experiment_id"] = batch_experiment_id
1622
1672
  if external_result is not None:
@@ -1650,6 +1700,8 @@ class Application:
1650
1700
  configuration: Optional[Union[RunConfiguration, dict[str, Any]]] = None,
1651
1701
  batch_experiment_id: Optional[str] = None,
1652
1702
  external_result: Optional[Union[ExternalRunResult, dict[str, Any]]] = None,
1703
+ json_configurations: Optional[dict[str, Any]] = None,
1704
+ dir_path: Optional[str] = None,
1653
1705
  ) -> RunResult:
1654
1706
  """
1655
1707
  Submit an input to start a new run of the application and poll for the
@@ -1661,11 +1713,35 @@ class Application:
1661
1713
  ----------
1662
1714
  input: Union[Input, dict[str, Any], BaseModel, str]
1663
1715
  Input to use for the run. This can be a `nextmv.Input` object,
1664
- `dict`, `BaseModel` or `str`. If `nextmv.Input` is used, then the
1665
- input is extracted from the `.data` property. Note that for now,
1666
- `InputFormat.CSV_ARCHIVE` is not supported as an
1667
- `input.input_format`. If an input is too large, it will be uploaded
1668
- with the `upload_large_input` method.
1716
+ `dict`, `BaseModel` or `str`.
1717
+
1718
+ If `nextmv.Input` is used, and the `input_format` is either
1719
+ `nextmv.InputFormat.JSON` or `nextmv.InputFormat.TEXT`, then the
1720
+ input data is extracted from the `.data` property.
1721
+
1722
+ If you want to work with `nextmv.InputFormat.CSV_ARCHIVE` or
1723
+ `nextmv.InputFormat.MULTI_FILE`, you should use the `dir_path`
1724
+ argument instead. This argument takes precedence over the `input`.
1725
+ If `dir_path` is specified, this function looks for files in that
1726
+ directory and tars them, to later be uploaded using the
1727
+ `upload_large_input` method. If both the `dir_path` and `input`
1728
+ arguments are provided, the `input` is ignored.
1729
+
1730
+ When `dir_path` is specified, the `configuration` argument must
1731
+ also be provided. More specifically, the
1732
+ `RunConfiguration.format.format_input.input_type` parameter
1733
+ dictates what kind of input is being submitted to the Nextmv Cloud.
1734
+ Make sure that this parameter is specified when working with the
1735
+ following input formats:
1736
+
1737
+ - `nextmv.InputFormat.CSV_ARCHIVE`
1738
+ - `nextmv.InputFormat.MULTI_FILE`
1739
+
1740
+ When working with JSON or text data, use the `input` argument
1741
+ directly.
1742
+
1743
+ In general, if an input is too large, it will be uploaded with the
1744
+ `upload_large_input` method.
1669
1745
  instance_id: Optional[str]
1670
1746
  ID of the instance to use for the run. If not provided, the default
1671
1747
  instance ID associated to the Class (`default_instance_id`) is
@@ -1705,6 +1781,16 @@ class Application:
1705
1781
  configuration. This is used when the run is an external run. We
1706
1782
  suggest that instead of specifying this parameter, you use the
1707
1783
  `track_run_with_result` method of the class.
1784
+ json_configurations: Optional[dict[str, Any]]
1785
+ Optional configurations for JSON serialization. This is used to
1786
+ customize the serialization before data is sent.
1787
+ dir_path: Optional[str]
1788
+ Path to a directory containing input files. If specified, the
1789
+ function will package the files in the directory into a tar file
1790
+ and upload it as a large input. This is useful for input formats
1791
+ like `nextmv.InputFormat.CSV_ARCHIVE` or `nextmv.InputFormat.MULTI_FILE`.
1792
+ If both `input` and `dir_path` are specified, the `input` is
1793
+ ignored, and the files in the directory are used instead.
1708
1794
 
1709
1795
  Returns
1710
1796
  ----------
@@ -1736,6 +1822,8 @@ class Application:
1736
1822
  configuration=configuration,
1737
1823
  batch_experiment_id=batch_experiment_id,
1738
1824
  external_result=external_result,
1825
+ json_configurations=json_configurations,
1826
+ dir_path=dir_path,
1739
1827
  )
1740
1828
 
1741
1829
  return self.run_result_with_polling(
@@ -2797,9 +2885,10 @@ class Application:
2797
2885
 
2798
2886
  def upload_large_input(
2799
2887
  self,
2800
- input: Union[dict[str, Any], str],
2888
+ input: Optional[Union[dict[str, Any], str]],
2801
2889
  upload_url: UploadURL,
2802
2890
  json_configurations: Optional[dict[str, Any]] = None,
2891
+ tar_file: Optional[str] = None,
2803
2892
  ) -> None:
2804
2893
  """
2805
2894
  Upload large input data to the provided upload URL.
@@ -2810,14 +2899,19 @@ class Application:
2810
2899
 
2811
2900
  Parameters
2812
2901
  ----------
2813
- input : Union[dict[str, Any], str]
2902
+ input : Optional[Union[dict[str, Any], str]]
2814
2903
  Input data to upload. Can be either a dictionary that will be
2815
2904
  converted to JSON, or a pre-formatted JSON string.
2816
2905
  upload_url : UploadURL
2817
2906
  Upload URL object containing the pre-signed URL to use for uploading.
2818
2907
  json_configurations : Optional[dict[str, Any]], default=None
2819
2908
  Optional configurations for JSON serialization. If provided, these
2820
- configurations will be used when serializing the data via `json.dumps`.
2909
+ configurations will be used when serializing the data via
2910
+ `json.dumps`.
2911
+ tar_file : Optional[str], default=None
2912
+ If provided, this will be used to upload a tar file instead of
2913
+ a JSON string or dictionary. This is useful for uploading large
2914
+ files that are already packaged as a tarball.
2821
2915
 
2822
2916
  Returns
2823
2917
  -------
@@ -2841,12 +2935,13 @@ class Application:
2841
2935
  >>> app.upload_large_input(input=json_str, upload_url=url)
2842
2936
  """
2843
2937
 
2844
- if isinstance(input, dict):
2938
+ if input is not None and isinstance(input, dict):
2845
2939
  input = deflated_serialize_json(input, json_configurations=json_configurations)
2846
2940
 
2847
2941
  self.client.upload_to_presigned_url(
2848
2942
  url=upload_url.upload_url,
2849
2943
  data=input,
2944
+ tar_file=tar_file,
2850
2945
  )
2851
2946
 
2852
2947
  def upload_url(self) -> UploadURL:
@@ -3185,6 +3280,100 @@ class Application:
3185
3280
 
3186
3281
  raise ValueError(f"Unknown scenario input type: {scenario.scenario_input.scenario_input_type}")
3187
3282
 
3283
+ def __validate_dir_path_and_configuration(
3284
+ self,
3285
+ dir_path: Optional[str],
3286
+ configuration: Optional[RunConfiguration],
3287
+ ) -> None:
3288
+ """
3289
+ Auxiliary function to validate the directory path and configuration.
3290
+ """
3291
+ if dir_path is None or dir_path == "":
3292
+ return
3293
+
3294
+ if configuration is None:
3295
+ raise ValueError(
3296
+ "If dir_path is provided, a RunConfiguration must also be provided.",
3297
+ )
3298
+
3299
+ if configuration.format is None:
3300
+ raise ValueError(
3301
+ "If dir_path is provided, RunConfiguration.format must also be provided.",
3302
+ )
3303
+
3304
+ if configuration.format.format_input is None:
3305
+ raise ValueError(
3306
+ "If dir_path is provided, RunConfiguration.format.format_input must also be provided.",
3307
+ )
3308
+
3309
+ input_type = configuration.format.format_input.input_type
3310
+ if input_type is None or input_type in (InputFormat.JSON, InputFormat.TEXT):
3311
+ raise ValueError(
3312
+ "If dir_path is provided, RunConfiguration.format.format_input.input_type must be set to a valid type."
3313
+ f"Valid types are: {[InputFormat.CSV_ARCHIVE, InputFormat.MULTI_FILE]}",
3314
+ )
3315
+
3316
+ def __package_inputs(self, dir_path: str) -> str:
3317
+ """
3318
+ This is an auxiliary function for packaging the inputs found in the
3319
+ provided `dir_path`. All the files found in the directory are tarred and
3320
+ g-zipped. This function returns the tar file path that contains the
3321
+ packaged inputs.
3322
+ """
3323
+
3324
+ # Create a temporary directory for the output
3325
+ output_dir = tempfile.mkdtemp(prefix="nextmv-inputs-out-")
3326
+
3327
+ # Define the output tar file name and path
3328
+ tar_filename = "inputs.tar.gz"
3329
+ tar_file_path = os.path.join(output_dir, tar_filename)
3330
+
3331
+ # Create the tar.gz file
3332
+ with tarfile.open(tar_file_path, "w:gz") as tar:
3333
+ for root, _, files in os.walk(dir_path):
3334
+ for file in files:
3335
+ if file == tar_filename:
3336
+ continue
3337
+
3338
+ file_path = os.path.join(root, file)
3339
+
3340
+ # Skip directories, only process files
3341
+ if os.path.isdir(file_path):
3342
+ continue
3343
+
3344
+ # Create relative path for the archive
3345
+ arcname = os.path.relpath(file_path, start=dir_path)
3346
+ tar.add(file_path, arcname=arcname)
3347
+
3348
+ return tar_file_path
3349
+
3350
+ def __upload_url_required(
3351
+ self,
3352
+ upload_id_used: bool,
3353
+ input_size: int,
3354
+ tar_file: str,
3355
+ input: Union[Input, dict[str, Any], BaseModel, str] = None,
3356
+ ) -> bool:
3357
+ """
3358
+ Auxiliary function to determine if an upload URL is required
3359
+ based on the input size, type, and configuration.
3360
+ """
3361
+
3362
+ if upload_id_used:
3363
+ return True
3364
+
3365
+ non_json_payload = False
3366
+ if isinstance(input, str):
3367
+ non_json_payload = True
3368
+ elif isinstance(input, Input) and input.input_format != InputFormat.JSON:
3369
+ non_json_payload = True
3370
+ elif tar_file is not None and tar_file != "":
3371
+ non_json_payload = True
3372
+
3373
+ size_exceeds = input_size > _MAX_RUN_SIZE
3374
+
3375
+ return size_exceeds or non_json_payload
3376
+
3188
3377
 
3189
3378
  def poll( # noqa: C901
3190
3379
  polling_options: PollingOptions,
nextmv/cloud/client.py CHANGED
@@ -323,7 +323,11 @@ class Client:
323
323
  return response
324
324
 
325
325
  def upload_to_presigned_url(
326
- self, data: Union[dict[str, Any], str], url: str, json_configurations: Optional[dict[str, Any]] = None
326
+ self,
327
+ data: Optional[Union[dict[str, Any], str]],
328
+ url: str,
329
+ json_configurations: Optional[dict[str, Any]] = None,
330
+ tar_file: Optional[str] = None,
327
331
  ) -> None:
328
332
  """
329
333
  Uploads data to a presigned URL.
@@ -333,7 +337,7 @@ class Client:
333
337
 
334
338
  Parameters
335
339
  ----------
336
- data : dict[str, Any] or str
340
+ data : Union[dict[str, Any], str], optional
337
341
  The data to upload. If a dictionary is provided, it will be
338
342
  JSON-serialized. If a string is provided, it will be uploaded
339
343
  as is.
@@ -344,6 +348,11 @@ class Client:
344
348
  customization of the Python `json.dumps` function, such as
345
349
  specifying `indent` for pretty printing or `default` for custom
346
350
  serialization functions.
351
+ tar_file : str, optional
352
+ If provided, this will be used to upload a tar file instead of
353
+ a JSON string or dictionary. This is useful for uploading large
354
+ files that are already packaged as a tarball. If this is provided,
355
+ `data` is expected to be `None`.
347
356
 
348
357
  Raises
349
358
  ------
@@ -361,12 +370,13 @@ class Client:
361
370
  """
362
371
 
363
372
  upload_data: Optional[str] = None
364
- if isinstance(data, dict):
365
- upload_data = deflated_serialize_json(data, json_configurations=json_configurations)
366
- elif isinstance(data, str):
367
- upload_data = data
368
- else:
369
- raise ValueError("data must be a dictionary or a string")
373
+ if data is not None:
374
+ if isinstance(data, dict):
375
+ upload_data = deflated_serialize_json(data, json_configurations=json_configurations)
376
+ elif isinstance(data, str):
377
+ upload_data = data
378
+ else:
379
+ raise ValueError("data must be a dictionary or a string")
370
380
 
371
381
  session = requests.Session()
372
382
  retries = Retry(
@@ -379,12 +389,21 @@ class Client:
379
389
  )
380
390
  adapter = HTTPAdapter(max_retries=retries)
381
391
  session.mount("https://", adapter)
392
+
382
393
  kwargs: dict[str, Any] = {
383
394
  "url": url,
384
395
  "timeout": self.timeout,
385
- "data": upload_data,
386
396
  }
387
397
 
398
+ if upload_data is not None:
399
+ kwargs["data"] = upload_data
400
+ elif tar_file is not None:
401
+ if not os.path.exists(tar_file):
402
+ raise ValueError(f"tar_file {tar_file} does not exist")
403
+ kwargs["data"] = open(tar_file, "rb")
404
+ else:
405
+ raise ValueError("either data or tar_file must be provided")
406
+
388
407
  response = session.put(**kwargs)
389
408
 
390
409
  try: