PyPI - nextmv - Versions diffs - 0.28.5__py3-none-any.whl → 0.29.0__py3-none-any.whl - Mend

nextmv 0.28.5py3-none-any.whl → 0.29.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

nextmv/__about__.py +1 -1
nextmv/__init__.py +8 -0
nextmv/cloud/application.py +210 -21
nextmv/cloud/client.py +28 -9
nextmv/cloud/manifest.py +142 -14
nextmv/cloud/package.py +1 -1
nextmv/cloud/run.py +34 -0
nextmv/input.py +476 -6
nextmv/model.py +12 -3
nextmv/options.py +88 -0
nextmv/output.py +535 -51
{nextmv-0.28.5.dist-info → nextmv-0.29.0.dist-info}/METADATA +13 -1
{nextmv-0.28.5.dist-info → nextmv-0.29.0.dist-info}/RECORD +15 -15
{nextmv-0.28.5.dist-info → nextmv-0.29.0.dist-info}/WHEEL +0 -0
{nextmv-0.28.5.dist-info → nextmv-0.29.0.dist-info}/licenses/LICENSE +0 -0

nextmv/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "v0.28.5"
1	+ __version__ = "v0.29.0"

nextmv/__init__.py CHANGED Viewed

@@ -3,12 +3,16 @@
 from .__about__ import __version__
 from .base_model import BaseModel as BaseModel
 from .base_model import from_dict as from_dict
+from .input import DataFile as DataFile
 from .input import Input as Input
 from .input import InputFormat as InputFormat
 from .input import InputLoader as InputLoader
 from .input import LocalInputLoader as LocalInputLoader
+from .input import csv_data_file as csv_data_file
+from .input import json_data_file as json_data_file
 from .input import load as load
 from .input import load_local as load_local
+from .input import text_data_file as text_data_file
 from .logger import log as log
 from .logger import redirect_stdout as redirect_stdout
 from .logger import reset_stdout as reset_stdout
@@ -27,9 +31,13 @@ from .output import ResultStatistics as ResultStatistics
 from .output import RunStatistics as RunStatistics
 from .output import Series as Series
 from .output import SeriesData as SeriesData
+from .output import SolutionFile as SolutionFile
 from .output import Statistics as Statistics
 from .output import Visual as Visual
 from .output import VisualSchema as VisualSchema
+from .output import csv_solution_file as csv_solution_file
+from .output import json_solution_file as json_solution_file
+from .output import text_solution_file as text_solution_file
 from .output import write as write
 from .output import write_local as write_local

nextmv/cloud/application.py CHANGED Viewed

@@ -23,8 +23,11 @@ poll
 """
 import json
+import os
 import random
 import shutil
+import tarfile
+import tempfile
 import time
 from collections.abc import Callable
 from dataclasses import dataclass
@@ -1494,6 +1497,7 @@ class Application:
         batch_experiment_id: Optional[str] = None,
         external_result: Optional[Union[ExternalRunResult, dict[str, Any]]] = None,
         json_configurations: Optional[dict[str, Any]] = None,
+        dir_path: Optional[str] = None,
     ) -> str:
         """
         Submit an input to start a new run of the application. Returns the
@@ -1503,11 +1507,35 @@ class Application:
         ----------
         input: Union[Input, dict[str, Any], BaseModel, str]
             Input to use for the run. This can be a `nextmv.Input` object,
-            `dict`, `BaseModel` or `str`. If `nextmv.Input` is used, then the
-            input is extracted from the `.data` property. Note that for now,
-            `InputFormat.CSV_ARCHIVE` is not supported as an
-            `input.input_format`. If an input is too large, it will be uploaded
-            with the `upload_large_input` method.
+            `dict`, `BaseModel` or `str`.
+            If `nextmv.Input` is used, and the `input_format` is either
+            `nextmv.InputFormat.JSON` or `nextmv.InputFormat.TEXT`, then the
+            input data is extracted from the `.data` property.
+            If you want to work with `nextmv.InputFormat.CSV_ARCHIVE` or
+            `nextmv.InputFormat.MULTI_FILE`, you should use the `dir_path`
+            argument instead. This argument takes precedence over the `input`.
+            If `dir_path` is specified, this function looks for files in that
+            directory and tars them, to later be uploaded using the
+            `upload_large_input` method. If both the `dir_path` and `input`
+            arguments are provided, the `input` is ignored.
+            When `dir_path` is specified, the `configuration` argument must
+            also be provided. More specifically, the
+            `RunConfiguration.format.format_input.input_type` parameter
+            dictates what kind of input is being submitted to the Nextmv Cloud.
+            Make sure that this parameter is specified when working with the
+            following input formats:
+            - `nextmv.InputFormat.CSV_ARCHIVE`
+            - `nextmv.InputFormat.MULTI_FILE`
+            When working with JSON or text data, use the `input` argument
+            directly.
+            In general, if an input is too large, it will be uploaded with the
+            `upload_large_input` method.
         instance_id: Optional[str]
             ID of the instance to use for the run. If not provided, the default
             instance ID associated to the Class (`default_instance_id`) is
@@ -1545,6 +1573,13 @@ class Application:
         json_configurations: Optional[dict[str, Any]]
             Optional configurations for JSON serialization. This is used to
             customize the serialization before data is sent.
+        dir_path: Optional[str]
+            Path to a directory containing input files. If specified, the
+            function will package the files in the directory into a tar file
+            and upload it as a large input. This is useful for input formats
+            like `nextmv.InputFormat.CSV_ARCHIVE` or `nextmv.InputFormat.MULTI_FILE`.
+            If both `input` and `dir_path` are specified, the `input` is
+            ignored, and the files in the directory are used instead.
         Returns
         ----------
@@ -1560,26 +1595,34 @@ class Application:
             not `JSON`. If the final `options` are not of type `dict[str,str]`.
         """
+        self.__validate_dir_path_and_configuration(dir_path, configuration)
+        tar_file = ""
+        if dir_path is not None and dir_path != "":
+            if not os.path.exists(dir_path):
+                raise ValueError(f"Directory {dir_path} does not exist.")
+            if not os.path.isdir(dir_path):
+                raise ValueError(f"Path {dir_path} is not a directory.")
+            tar_file = self.__package_inputs(dir_path)
         input_data = None
         if isinstance(input, BaseModel):
             input_data = input.to_dict()
         elif isinstance(input, dict) or isinstance(input, str):
             input_data = input
         elif isinstance(input, Input):
-            if input.input_format == InputFormat.CSV_ARCHIVE:
-                raise ValueError("csv-archive is not supported")
             input_data = input.data
         input_size = 0
         if input_data is not None:
             input_size = get_size(input_data)
-        upload_url_required = input_size > _MAX_RUN_SIZE
         upload_id_used = upload_id is not None
-        if not upload_id_used and upload_url_required:
+        if self.__upload_url_required(upload_id_used, input_size, tar_file, input):
             upload_url = self.upload_url()
-            self.upload_large_input(input=input_data, upload_url=upload_url)
+            self.upload_large_input(input=input_data, upload_url=upload_url, tar_file=tar_file)
             upload_id = upload_url.upload_id
             upload_id_used = True
@@ -1612,11 +1655,18 @@ class Application:
                 if not isinstance(v, str):
                     raise ValueError(f"options must be dict[str,str], option {k} has type {type(v)} instead.")
             payload["options"] = options_dict
         if configuration is not None:
             configuration_dict = (
                 configuration.to_dict() if isinstance(configuration, RunConfiguration) else configuration
             )
-            payload["configuration"] = configuration_dict
+        else:
+            configuration = RunConfiguration()
+            configuration.resolve(input=input, dir_path=dir_path)
+            configuration_dict = configuration.to_dict()
+        payload["configuration"] = configuration_dict
         if batch_experiment_id is not None:
             payload["batch_experiment_id"] = batch_experiment_id
         if external_result is not None:
@@ -1650,6 +1700,8 @@ class Application:
         configuration: Optional[Union[RunConfiguration, dict[str, Any]]] = None,
         batch_experiment_id: Optional[str] = None,
         external_result: Optional[Union[ExternalRunResult, dict[str, Any]]] = None,
+        json_configurations: Optional[dict[str, Any]] = None,
+        dir_path: Optional[str] = None,
     ) -> RunResult:
         """
         Submit an input to start a new run of the application and poll for the
@@ -1661,11 +1713,35 @@ class Application:
         ----------
         input: Union[Input, dict[str, Any], BaseModel, str]
             Input to use for the run. This can be a `nextmv.Input` object,
-            `dict`, `BaseModel` or `str`. If `nextmv.Input` is used, then the
-            input is extracted from the `.data` property. Note that for now,
-            `InputFormat.CSV_ARCHIVE` is not supported as an
-            `input.input_format`. If an input is too large, it will be uploaded
-            with the `upload_large_input` method.
+            `dict`, `BaseModel` or `str`.
+            If `nextmv.Input` is used, and the `input_format` is either
+            `nextmv.InputFormat.JSON` or `nextmv.InputFormat.TEXT`, then the
+            input data is extracted from the `.data` property.
+            If you want to work with `nextmv.InputFormat.CSV_ARCHIVE` or
+            `nextmv.InputFormat.MULTI_FILE`, you should use the `dir_path`
+            argument instead. This argument takes precedence over the `input`.
+            If `dir_path` is specified, this function looks for files in that
+            directory and tars them, to later be uploaded using the
+            `upload_large_input` method. If both the `dir_path` and `input`
+            arguments are provided, the `input` is ignored.
+            When `dir_path` is specified, the `configuration` argument must
+            also be provided. More specifically, the
+            `RunConfiguration.format.format_input.input_type` parameter
+            dictates what kind of input is being submitted to the Nextmv Cloud.
+            Make sure that this parameter is specified when working with the
+            following input formats:
+            - `nextmv.InputFormat.CSV_ARCHIVE`
+            - `nextmv.InputFormat.MULTI_FILE`
+            When working with JSON or text data, use the `input` argument
+            directly.
+            In general, if an input is too large, it will be uploaded with the
+            `upload_large_input` method.
         instance_id: Optional[str]
             ID of the instance to use for the run. If not provided, the default
             instance ID associated to the Class (`default_instance_id`) is
@@ -1705,6 +1781,16 @@ class Application:
             configuration. This is used when the run is an external run. We
             suggest that instead of specifying this parameter, you use the
             `track_run_with_result` method of the class.
+        json_configurations: Optional[dict[str, Any]]
+            Optional configurations for JSON serialization. This is used to
+            customize the serialization before data is sent.
+        dir_path: Optional[str]
+            Path to a directory containing input files. If specified, the
+            function will package the files in the directory into a tar file
+            and upload it as a large input. This is useful for input formats
+            like `nextmv.InputFormat.CSV_ARCHIVE` or `nextmv.InputFormat.MULTI_FILE`.
+            If both `input` and `dir_path` are specified, the `input` is
+            ignored, and the files in the directory are used instead.
         Returns
         ----------
@@ -1736,6 +1822,8 @@ class Application:
             configuration=configuration,
             batch_experiment_id=batch_experiment_id,
             external_result=external_result,
+            json_configurations=json_configurations,
+            dir_path=dir_path,
         )
         return self.run_result_with_polling(
@@ -2797,9 +2885,10 @@ class Application:
     def upload_large_input(
         self,
-        input: Union[dict[str, Any], str],
+        input: Optional[Union[dict[str, Any], str]],
         upload_url: UploadURL,
         json_configurations: Optional[dict[str, Any]] = None,
+        tar_file: Optional[str] = None,
     ) -> None:
         """
         Upload large input data to the provided upload URL.
@@ -2810,14 +2899,19 @@ class Application:
         Parameters
         ----------
-        input : Union[dict[str, Any], str]
+        input : Optional[Union[dict[str, Any], str]]
             Input data to upload. Can be either a dictionary that will be
             converted to JSON, or a pre-formatted JSON string.
         upload_url : UploadURL
             Upload URL object containing the pre-signed URL to use for uploading.
         json_configurations : Optional[dict[str, Any]], default=None
             Optional configurations for JSON serialization. If provided, these
-            configurations will be used when serializing the data via `json.dumps`.
+            configurations will be used when serializing the data via
+            `json.dumps`.
+        tar_file : Optional[str], default=None
+            If provided, this will be used to upload a tar file instead of
+            a JSON string or dictionary. This is useful for uploading large
+            files that are already packaged as a tarball.
         Returns
         -------
@@ -2841,12 +2935,13 @@ class Application:
         >>> app.upload_large_input(input=json_str, upload_url=url)
         """
-        if isinstance(input, dict):
+        if input is not None and isinstance(input, dict):
             input = deflated_serialize_json(input, json_configurations=json_configurations)
         self.client.upload_to_presigned_url(
             url=upload_url.upload_url,
             data=input,
+            tar_file=tar_file,
         )
     def upload_url(self) -> UploadURL:
@@ -3185,6 +3280,100 @@ class Application:
         raise ValueError(f"Unknown scenario input type: {scenario.scenario_input.scenario_input_type}")
+    def __validate_dir_path_and_configuration(
+        self,
+        dir_path: Optional[str],
+        configuration: Optional[RunConfiguration],
+    ) -> None:
+        """
+        Auxiliary function to validate the directory path and configuration.
+        """
+        if dir_path is None or dir_path == "":
+            return
+        if configuration is None:
+            raise ValueError(
+                "If dir_path is provided, a RunConfiguration must also be provided.",
+            )
+        if configuration.format is None:
+            raise ValueError(
+                "If dir_path is provided, RunConfiguration.format must also be provided.",
+            )
+        if configuration.format.format_input is None:
+            raise ValueError(
+                "If dir_path is provided, RunConfiguration.format.format_input must also be provided.",
+            )
+        input_type = configuration.format.format_input.input_type
+        if input_type is None or input_type in (InputFormat.JSON, InputFormat.TEXT):
+            raise ValueError(
+                "If dir_path is provided, RunConfiguration.format.format_input.input_type must be set to a valid type."
+                f"Valid types are: {[InputFormat.CSV_ARCHIVE, InputFormat.MULTI_FILE]}",
+            )
+    def __package_inputs(self, dir_path: str) -> str:
+        """
+        This is an auxiliary function for packaging the inputs found in the
+        provided `dir_path`. All the files found in the directory are tarred and
+        g-zipped. This function returns the tar file path that contains the
+        packaged inputs.
+        """
+        # Create a temporary directory for the output
+        output_dir = tempfile.mkdtemp(prefix="nextmv-inputs-out-")
+        # Define the output tar file name and path
+        tar_filename = "inputs.tar.gz"
+        tar_file_path = os.path.join(output_dir, tar_filename)
+        # Create the tar.gz file
+        with tarfile.open(tar_file_path, "w:gz") as tar:
+            for root, _, files in os.walk(dir_path):
+                for file in files:
+                    if file == tar_filename:
+                        continue
+                    file_path = os.path.join(root, file)
+                    # Skip directories, only process files
+                    if os.path.isdir(file_path):
+                        continue
+                    # Create relative path for the archive
+                    arcname = os.path.relpath(file_path, start=dir_path)
+                    tar.add(file_path, arcname=arcname)
+        return tar_file_path
+    def __upload_url_required(
+        self,
+        upload_id_used: bool,
+        input_size: int,
+        tar_file: str,
+        input: Union[Input, dict[str, Any], BaseModel, str] = None,
+    ) -> bool:
+        """
+        Auxiliary function to determine if an upload URL is required
+        based on the input size, type, and configuration.
+        """
+        if upload_id_used:
+            return True
+        non_json_payload = False
+        if isinstance(input, str):
+            non_json_payload = True
+        elif isinstance(input, Input) and input.input_format != InputFormat.JSON:
+            non_json_payload = True
+        elif tar_file is not None and tar_file != "":
+            non_json_payload = True
+        size_exceeds = input_size > _MAX_RUN_SIZE
+        return size_exceeds or non_json_payload
 def poll(  # noqa: C901
     polling_options: PollingOptions,

nextmv/cloud/client.py CHANGED Viewed

@@ -323,7 +323,11 @@ class Client:
         return response
     def upload_to_presigned_url(
-        self, data: Union[dict[str, Any], str], url: str, json_configurations: Optional[dict[str, Any]] = None
+        self,
+        data: Optional[Union[dict[str, Any], str]],
+        url: str,
+        json_configurations: Optional[dict[str, Any]] = None,
+        tar_file: Optional[str] = None,
     ) -> None:
         """
         Uploads data to a presigned URL.
@@ -333,7 +337,7 @@ class Client:
         Parameters
         ----------
-        data : dict[str, Any] or str
+        data : Union[dict[str, Any], str], optional
             The data to upload. If a dictionary is provided, it will be
             JSON-serialized. If a string is provided, it will be uploaded
             as is.
@@ -344,6 +348,11 @@ class Client:
             customization of the Python `json.dumps` function, such as
             specifying `indent` for pretty printing or `default` for custom
             serialization functions.
+        tar_file : str, optional
+            If provided, this will be used to upload a tar file instead of
+            a JSON string or dictionary. This is useful for uploading large
+            files that are already packaged as a tarball. If this is provided,
+            `data` is expected to be `None`.
         Raises
         ------
@@ -361,12 +370,13 @@ class Client:
         """
         upload_data: Optional[str] = None
-        if isinstance(data, dict):
-            upload_data = deflated_serialize_json(data, json_configurations=json_configurations)
-        elif isinstance(data, str):
-            upload_data = data
-        else:
-            raise ValueError("data must be a dictionary or a string")
+        if data is not None:
+            if isinstance(data, dict):
+                upload_data = deflated_serialize_json(data, json_configurations=json_configurations)
+            elif isinstance(data, str):
+                upload_data = data
+            else:
+                raise ValueError("data must be a dictionary or a string")
         session = requests.Session()
         retries = Retry(
@@ -379,12 +389,21 @@ class Client:
         )
         adapter = HTTPAdapter(max_retries=retries)
         session.mount("https://", adapter)
         kwargs: dict[str, Any] = {
             "url": url,
             "timeout": self.timeout,
-            "data": upload_data,
         }
+        if upload_data is not None:
+            kwargs["data"] = upload_data
+        elif tar_file is not None:
+            if not os.path.exists(tar_file):
+                raise ValueError(f"tar_file {tar_file} does not exist")
+            kwargs["data"] = open(tar_file, "rb")
+        else:
+            raise ValueError("either data or tar_file must be provided")
         response = session.put(**kwargs)
         try:

nextmv 0.28.5__py3-none-any.whl → 0.29.0__py3-none-any.whl

nextmv 0.28.5py3-none-any.whl → 0.29.0py3-none-any.whl