nextmv 0.18.0__py3-none-any.whl → 1.0.0.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. nextmv/__about__.py +1 -1
  2. nextmv/__entrypoint__.py +8 -13
  3. nextmv/__init__.py +53 -0
  4. nextmv/_serialization.py +96 -0
  5. nextmv/base_model.py +54 -9
  6. nextmv/cli/CONTRIBUTING.md +511 -0
  7. nextmv/cli/__init__.py +0 -0
  8. nextmv/cli/cloud/__init__.py +47 -0
  9. nextmv/cli/cloud/acceptance/__init__.py +27 -0
  10. nextmv/cli/cloud/acceptance/create.py +393 -0
  11. nextmv/cli/cloud/acceptance/delete.py +68 -0
  12. nextmv/cli/cloud/acceptance/get.py +104 -0
  13. nextmv/cli/cloud/acceptance/list.py +62 -0
  14. nextmv/cli/cloud/acceptance/update.py +95 -0
  15. nextmv/cli/cloud/account/__init__.py +28 -0
  16. nextmv/cli/cloud/account/create.py +83 -0
  17. nextmv/cli/cloud/account/delete.py +60 -0
  18. nextmv/cli/cloud/account/get.py +66 -0
  19. nextmv/cli/cloud/account/update.py +70 -0
  20. nextmv/cli/cloud/app/__init__.py +35 -0
  21. nextmv/cli/cloud/app/create.py +141 -0
  22. nextmv/cli/cloud/app/delete.py +58 -0
  23. nextmv/cli/cloud/app/exists.py +44 -0
  24. nextmv/cli/cloud/app/get.py +66 -0
  25. nextmv/cli/cloud/app/list.py +61 -0
  26. nextmv/cli/cloud/app/push.py +137 -0
  27. nextmv/cli/cloud/app/update.py +124 -0
  28. nextmv/cli/cloud/batch/__init__.py +29 -0
  29. nextmv/cli/cloud/batch/create.py +454 -0
  30. nextmv/cli/cloud/batch/delete.py +68 -0
  31. nextmv/cli/cloud/batch/get.py +104 -0
  32. nextmv/cli/cloud/batch/list.py +63 -0
  33. nextmv/cli/cloud/batch/metadata.py +66 -0
  34. nextmv/cli/cloud/batch/update.py +95 -0
  35. nextmv/cli/cloud/data/__init__.py +26 -0
  36. nextmv/cli/cloud/data/upload.py +162 -0
  37. nextmv/cli/cloud/ensemble/__init__.py +31 -0
  38. nextmv/cli/cloud/ensemble/create.py +414 -0
  39. nextmv/cli/cloud/ensemble/delete.py +67 -0
  40. nextmv/cli/cloud/ensemble/get.py +65 -0
  41. nextmv/cli/cloud/ensemble/update.py +103 -0
  42. nextmv/cli/cloud/input_set/__init__.py +30 -0
  43. nextmv/cli/cloud/input_set/create.py +170 -0
  44. nextmv/cli/cloud/input_set/get.py +63 -0
  45. nextmv/cli/cloud/input_set/list.py +63 -0
  46. nextmv/cli/cloud/input_set/update.py +123 -0
  47. nextmv/cli/cloud/instance/__init__.py +35 -0
  48. nextmv/cli/cloud/instance/create.py +290 -0
  49. nextmv/cli/cloud/instance/delete.py +62 -0
  50. nextmv/cli/cloud/instance/exists.py +39 -0
  51. nextmv/cli/cloud/instance/get.py +62 -0
  52. nextmv/cli/cloud/instance/list.py +60 -0
  53. nextmv/cli/cloud/instance/update.py +216 -0
  54. nextmv/cli/cloud/managed_input/__init__.py +31 -0
  55. nextmv/cli/cloud/managed_input/create.py +146 -0
  56. nextmv/cli/cloud/managed_input/delete.py +65 -0
  57. nextmv/cli/cloud/managed_input/get.py +63 -0
  58. nextmv/cli/cloud/managed_input/list.py +60 -0
  59. nextmv/cli/cloud/managed_input/update.py +97 -0
  60. nextmv/cli/cloud/run/__init__.py +37 -0
  61. nextmv/cli/cloud/run/cancel.py +37 -0
  62. nextmv/cli/cloud/run/create.py +530 -0
  63. nextmv/cli/cloud/run/get.py +199 -0
  64. nextmv/cli/cloud/run/input.py +86 -0
  65. nextmv/cli/cloud/run/list.py +80 -0
  66. nextmv/cli/cloud/run/logs.py +167 -0
  67. nextmv/cli/cloud/run/metadata.py +67 -0
  68. nextmv/cli/cloud/run/track.py +501 -0
  69. nextmv/cli/cloud/scenario/__init__.py +29 -0
  70. nextmv/cli/cloud/scenario/create.py +451 -0
  71. nextmv/cli/cloud/scenario/delete.py +65 -0
  72. nextmv/cli/cloud/scenario/get.py +102 -0
  73. nextmv/cli/cloud/scenario/list.py +63 -0
  74. nextmv/cli/cloud/scenario/metadata.py +67 -0
  75. nextmv/cli/cloud/scenario/update.py +93 -0
  76. nextmv/cli/cloud/secrets/__init__.py +33 -0
  77. nextmv/cli/cloud/secrets/create.py +206 -0
  78. nextmv/cli/cloud/secrets/delete.py +67 -0
  79. nextmv/cli/cloud/secrets/get.py +66 -0
  80. nextmv/cli/cloud/secrets/list.py +60 -0
  81. nextmv/cli/cloud/secrets/update.py +147 -0
  82. nextmv/cli/cloud/shadow/__init__.py +33 -0
  83. nextmv/cli/cloud/shadow/create.py +184 -0
  84. nextmv/cli/cloud/shadow/delete.py +68 -0
  85. nextmv/cli/cloud/shadow/get.py +61 -0
  86. nextmv/cli/cloud/shadow/list.py +63 -0
  87. nextmv/cli/cloud/shadow/metadata.py +66 -0
  88. nextmv/cli/cloud/shadow/start.py +43 -0
  89. nextmv/cli/cloud/shadow/stop.py +43 -0
  90. nextmv/cli/cloud/shadow/update.py +95 -0
  91. nextmv/cli/cloud/upload/__init__.py +22 -0
  92. nextmv/cli/cloud/upload/create.py +39 -0
  93. nextmv/cli/cloud/version/__init__.py +33 -0
  94. nextmv/cli/cloud/version/create.py +97 -0
  95. nextmv/cli/cloud/version/delete.py +62 -0
  96. nextmv/cli/cloud/version/exists.py +39 -0
  97. nextmv/cli/cloud/version/get.py +62 -0
  98. nextmv/cli/cloud/version/list.py +60 -0
  99. nextmv/cli/cloud/version/update.py +92 -0
  100. nextmv/cli/community/__init__.py +24 -0
  101. nextmv/cli/community/clone.py +270 -0
  102. nextmv/cli/community/list.py +265 -0
  103. nextmv/cli/configuration/__init__.py +23 -0
  104. nextmv/cli/configuration/config.py +195 -0
  105. nextmv/cli/configuration/create.py +94 -0
  106. nextmv/cli/configuration/delete.py +67 -0
  107. nextmv/cli/configuration/list.py +77 -0
  108. nextmv/cli/main.py +188 -0
  109. nextmv/cli/message.py +153 -0
  110. nextmv/cli/options.py +206 -0
  111. nextmv/cli/version.py +38 -0
  112. nextmv/cloud/__init__.py +71 -17
  113. nextmv/cloud/acceptance_test.py +757 -51
  114. nextmv/cloud/account.py +406 -17
  115. nextmv/cloud/application/__init__.py +957 -0
  116. nextmv/cloud/application/_acceptance.py +419 -0
  117. nextmv/cloud/application/_batch_scenario.py +860 -0
  118. nextmv/cloud/application/_ensemble.py +251 -0
  119. nextmv/cloud/application/_input_set.py +227 -0
  120. nextmv/cloud/application/_instance.py +289 -0
  121. nextmv/cloud/application/_managed_input.py +227 -0
  122. nextmv/cloud/application/_run.py +1393 -0
  123. nextmv/cloud/application/_secrets.py +294 -0
  124. nextmv/cloud/application/_shadow.py +314 -0
  125. nextmv/cloud/application/_utils.py +54 -0
  126. nextmv/cloud/application/_version.py +303 -0
  127. nextmv/cloud/assets.py +48 -0
  128. nextmv/cloud/batch_experiment.py +294 -33
  129. nextmv/cloud/client.py +307 -66
  130. nextmv/cloud/ensemble.py +247 -0
  131. nextmv/cloud/input_set.py +120 -2
  132. nextmv/cloud/instance.py +133 -8
  133. nextmv/cloud/integration.py +533 -0
  134. nextmv/cloud/package.py +168 -53
  135. nextmv/cloud/scenario.py +410 -0
  136. nextmv/cloud/secrets.py +234 -0
  137. nextmv/cloud/shadow.py +190 -0
  138. nextmv/cloud/url.py +73 -0
  139. nextmv/cloud/version.py +132 -4
  140. nextmv/default_app/.gitignore +1 -0
  141. nextmv/default_app/README.md +32 -0
  142. nextmv/default_app/app.yaml +12 -0
  143. nextmv/default_app/input.json +5 -0
  144. nextmv/default_app/main.py +37 -0
  145. nextmv/default_app/requirements.txt +2 -0
  146. nextmv/default_app/src/__init__.py +0 -0
  147. nextmv/default_app/src/visuals.py +36 -0
  148. nextmv/deprecated.py +47 -0
  149. nextmv/input.py +861 -90
  150. nextmv/local/__init__.py +5 -0
  151. nextmv/local/application.py +1251 -0
  152. nextmv/local/executor.py +1042 -0
  153. nextmv/local/geojson_handler.py +323 -0
  154. nextmv/local/local.py +97 -0
  155. nextmv/local/plotly_handler.py +61 -0
  156. nextmv/local/runner.py +274 -0
  157. nextmv/logger.py +80 -9
  158. nextmv/manifest.py +1466 -0
  159. nextmv/model.py +241 -66
  160. nextmv/options.py +708 -115
  161. nextmv/output.py +1301 -274
  162. nextmv/polling.py +325 -0
  163. nextmv/run.py +1702 -0
  164. nextmv/safe.py +145 -0
  165. nextmv/status.py +122 -0
  166. nextmv-1.0.0.dev2.dist-info/METADATA +311 -0
  167. nextmv-1.0.0.dev2.dist-info/RECORD +170 -0
  168. {nextmv-0.18.0.dist-info → nextmv-1.0.0.dev2.dist-info}/WHEEL +1 -1
  169. nextmv-1.0.0.dev2.dist-info/entry_points.txt +2 -0
  170. nextmv/cloud/application.py +0 -1405
  171. nextmv/cloud/manifest.py +0 -234
  172. nextmv/cloud/status.py +0 -29
  173. nextmv-0.18.0.dist-info/METADATA +0 -770
  174. nextmv-0.18.0.dist-info/RECORD +0 -25
  175. {nextmv-0.18.0.dist-info → nextmv-1.0.0.dev2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1393 @@
1
+ """
2
+ Application mixin for managing app runs.
3
+ """
4
+
5
+ import io
6
+ import os
7
+ import pathlib
8
+ import shutil
9
+ import sys
10
+ import tarfile
11
+ import tempfile
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ import rich
15
+
16
+ from nextmv._serialization import deflated_serialize_json
17
+ from nextmv.base_model import BaseModel
18
+ from nextmv.cloud.assets import RunAsset
19
+ from nextmv.cloud.client import get_size
20
+ from nextmv.cloud.url import DownloadURL
21
+ from nextmv.input import Input, InputFormat
22
+ from nextmv.logger import log
23
+ from nextmv.options import Options
24
+ from nextmv.output import ASSETS_KEY, STATISTICS_KEY, Asset, Output, OutputFormat, Statistics
25
+ from nextmv.polling import DEFAULT_POLLING_OPTIONS, PollingOptions, poll
26
+ from nextmv.run import (
27
+ ExternalRunResult,
28
+ Run,
29
+ RunConfiguration,
30
+ RunInformation,
31
+ RunLog,
32
+ RunResult,
33
+ TimestampedRunLog,
34
+ TrackedRun,
35
+ )
36
+ from nextmv.status import StatusV2
37
+
38
+ # Maximum size of the run input/output in bytes. This constant defines the
39
+ # maximum allowed size for run inputs and outputs. When the size exceeds this
40
+ # value, the system will automatically use the large input upload and/or large
41
+ # result download endpoints.
42
+ _MAX_RUN_SIZE: int = 5 * 1024 * 1024
43
+
44
+ if TYPE_CHECKING:
45
+ from . import Application
46
+
47
+
48
+ class ApplicationRunMixin:
49
+ """
50
+ Mixin class for managing app runs within an application.
51
+ """
52
+
53
+ def cancel_run(self: "Application", run_id: str) -> None:
54
+ """
55
+ Cancel a run.
56
+
57
+ Parameters
58
+ ----------
59
+ run_id : str
60
+ ID of the run to cancel.
61
+
62
+ Raises
63
+ ------
64
+ requests.HTTPError
65
+ If the response status code is not 2xx.
66
+
67
+ Examples
68
+ --------
69
+ >>> app.cancel_run("run-456")
70
+ """
71
+
72
+ _ = self.client.request(
73
+ method="PATCH",
74
+ endpoint=f"{self.endpoint}/runs/{run_id}/cancel",
75
+ )
76
+
77
+ def download_asset_content(
78
+ self: "Application",
79
+ asset: RunAsset,
80
+ destination: str | pathlib.Path | io.BytesIO | None = None,
81
+ ) -> Any | None:
82
+ """
83
+ Downloads an asset's content to a specified destination.
84
+
85
+ Parameters
86
+ ----------
87
+ asset : RunAsset
88
+ The asset to be downloaded.
89
+ destination : str | pathlib.Path | io.BytesIO | None
90
+ The destination where the asset will be saved. This can be a file path
91
+ (as a string or pathlib.Path) or an io.BytesIO object. If None, the asset
92
+ content will not be saved to a file, but returned immediately. If the asset
93
+ type is JSON, the content will be returned as a dict.
94
+
95
+ Returns
96
+ -------
97
+ Any or None
98
+ If `destination` is None, returns the asset content: for JSON assets, a
99
+ `dict` parsed from the JSON response; for other asset types, the raw
100
+ `bytes` content. If `destination` is provided, the content is written
101
+ to the given destination and the method returns `None`.
102
+
103
+ Raises
104
+ ------
105
+ requests.HTTPError
106
+ If the response status code is not 2xx.
107
+
108
+ Examples
109
+ --------
110
+ >>> assets = app.list_assets("run-123")
111
+ >>> asset = assets[0] # Assume we want to download the first asset
112
+ >>> # Download to a file path
113
+ >>> app.download_asset_content(asset, "polygons.geojson")
114
+ >>> # Download to an in-memory bytes buffer
115
+ >>> import io
116
+ >>> buffer = io.BytesIO()
117
+ >>> app.download_asset_content(asset, buffer)
118
+ >>> # Download and get content directly (for JSON assets)
119
+ >>> content = app.download_asset_content(asset)
120
+ >>> print(content)
121
+ {'type': 'FeatureCollection', 'features': [...]}
122
+ """
123
+ # First, get the download_url for the asset.
124
+ download_url_response = self.client.request(
125
+ method="GET",
126
+ endpoint=f"{self.endpoint}/runs/{asset.run_id}/assets/{asset.id}",
127
+ ).json()
128
+ download_url = download_url_response["download_url"]
129
+ asset_type = download_url_response.get("type", "json")
130
+
131
+ # Now, download the asset content using the download_url.
132
+ download_response = self.client.request(
133
+ method="GET",
134
+ endpoint=download_url,
135
+ headers={"Content-Type": "application/json" if asset_type == "json" else "application/octet-stream"},
136
+ )
137
+
138
+ # Save the content to the specified destination.
139
+ if destination is None:
140
+ if asset_type == "json":
141
+ return download_response.json()
142
+ return download_response.content
143
+ elif isinstance(destination, io.BytesIO):
144
+ destination.write(download_response.content)
145
+ return None
146
+ else:
147
+ with open(destination, "wb") as file:
148
+ file.write(download_response.content)
149
+ return None
150
+
151
+ def list_assets(self: "Application", run_id: str) -> list[RunAsset]:
152
+ """
153
+ List the assets of a run.
154
+
155
+ Retrieves a list of assets associated with a specific run. This method ONLY
156
+ returns the asset metadata, the content needs to be fetched via the
157
+ `download_asset_content` method.
158
+
159
+ Parameters
160
+ ----------
161
+ run_id : str
162
+ ID of the run to list assets for.
163
+
164
+ Returns
165
+ -------
166
+ list[RunAsset]
167
+ List of assets associated with the run.
168
+
169
+ Raises
170
+ ------
171
+ requests.HTTPError
172
+ If the response status code is not 2xx.
173
+
174
+ Examples
175
+ --------
176
+ >>> assets = app.list_assets("run-123")
177
+ >>> for asset in assets:
178
+ ... print(asset.id, asset.name)
179
+ b459daa6-1c13-48c6-b4c3-a262ea94cd04 clustering_polygons
180
+ a1234567-89ab-cdef-0123-456789abcdef histogram
181
+ """
182
+ response = self.client.request(
183
+ method="GET",
184
+ endpoint=f"{self.endpoint}/runs/{run_id}/assets",
185
+ )
186
+ assets_data = response.json().get("items", [])
187
+ for asset_data in assets_data:
188
+ asset_data["run_id"] = run_id
189
+
190
+ return [RunAsset.from_dict(asset) for asset in assets_data]
191
+
192
+ def list_runs(self: "Application", status: StatusV2 | None = None) -> list[Run]:
193
+ """
194
+ List all runs.
195
+
196
+ You can use the optional `status` parameter to filter runs by their
197
+ status. Is not provided, all runs are returned.
198
+
199
+ Parameters
200
+ ----------
201
+ status : StatusV2 | None
202
+ Optional status to filter runs by.
203
+
204
+ Returns
205
+ -------
206
+ list[Run]
207
+ List of runs.
208
+
209
+ Raises
210
+ ------
211
+ requests.HTTPError
212
+ If the response status code is not 2xx.
213
+ """
214
+
215
+ response = self.client.request(
216
+ method="GET",
217
+ endpoint=f"{self.endpoint}/runs",
218
+ )
219
+
220
+ runs = []
221
+ for resp_run in response.json().get("runs", []):
222
+ run = Run.from_dict(resp_run)
223
+ if status is None:
224
+ runs.append(run)
225
+ continue
226
+
227
+ if run.status_v2 == status:
228
+ runs.append(run)
229
+
230
+ return runs
231
+
232
+ def new_run( # noqa: C901 # Refactor this function at some point.
233
+ self: "Application",
234
+ input: Input | dict[str, Any] | BaseModel | str = None,
235
+ instance_id: str | None = None,
236
+ name: str | None = None,
237
+ description: str | None = None,
238
+ upload_id: str | None = None,
239
+ options: Options | dict[str, str] | None = None,
240
+ configuration: RunConfiguration | dict[str, Any] | None = None,
241
+ batch_experiment_id: str | None = None,
242
+ external_result: ExternalRunResult | dict[str, Any] | None = None,
243
+ json_configurations: dict[str, Any] | None = None,
244
+ input_dir_path: str | None = None,
245
+ ) -> str:
246
+ """
247
+ Submit an input to start a new run of the application. Returns the
248
+ `run_id` of the submitted run.
249
+
250
+ Parameters
251
+ ----------
252
+ input: Union[Input, dict[str, Any], BaseModel, str]
253
+ Input to use for the run. This can be a `nextmv.Input` object,
254
+ `dict`, `BaseModel` or `str`.
255
+
256
+ If `nextmv.Input` is used, and the `input_format` is either
257
+ `nextmv.InputFormat.JSON` or `nextmv.InputFormat.TEXT`, then the
258
+ input data is extracted from the `.data` property.
259
+
260
+ If you want to work with `nextmv.InputFormat.CSV_ARCHIVE` or
261
+ `nextmv.InputFormat.MULTI_FILE`, you should use the `input_dir_path`
262
+ argument instead. This argument takes precedence over the `input`.
263
+ If `input_dir_path` is specified, this function looks for files in that
264
+ directory and tars them, to later be uploaded using the
265
+ `upload_data` method. If both the `input_dir_path` and `input`
266
+ arguments are provided, the `input` is ignored.
267
+
268
+ When `input_dir_path` is specified, the `configuration` argument must
269
+ also be provided. More specifically, the
270
+ `RunConfiguration.format.format_input.input_type` parameter
271
+ dictates what kind of input is being submitted to the Nextmv Cloud.
272
+ Make sure that this parameter is specified when working with the
273
+ following input formats:
274
+
275
+ - `nextmv.InputFormat.CSV_ARCHIVE`
276
+ - `nextmv.InputFormat.MULTI_FILE`
277
+
278
+ When working with JSON or text data, use the `input` argument
279
+ directly.
280
+
281
+ In general, if an input is too large, it will be uploaded with the
282
+ `upload_data` method.
283
+ instance_id: Optional[str]
284
+ ID of the instance to use for the run. If not provided, the default
285
+ instance ID associated to the Class (`default_instance_id`) is
286
+ used.
287
+ name: Optional[str]
288
+ Name of the run.
289
+ description: Optional[str]
290
+ Description of the run.
291
+ upload_id: Optional[str]
292
+ ID to use when running a large input. If the `input` exceeds the
293
+ maximum allowed size, then it is uploaded and the corresponding
294
+ `upload_id` is used.
295
+ options: Optional[Union[Options, dict[str, str]]]
296
+ Options to use for the run. This can be a `nextmv.Options` object
297
+ or a dict. If a dict is used, the keys must be strings and the
298
+ values must be strings as well. If a `nextmv.Options` object is
299
+ used, the options are extracted from the `.to_cloud_dict()` method.
300
+ Note that specifying `options` overrides the `input.options` (if
301
+ the `input` is of type `nextmv.Input`).
302
+ configuration: Optional[Union[RunConfiguration, dict[str, Any]]]
303
+ Configuration to use for the run. This can be a
304
+ `cloud.RunConfiguration` object or a dict. If the object is used,
305
+ then the `.to_dict()` method is applied to extract the
306
+ configuration.
307
+ batch_experiment_id: Optional[str]
308
+ ID of a batch experiment to associate the run with. This is used
309
+ when the run is part of a batch experiment.
310
+ external_result: Optional[Union[ExternalRunResult, dict[str, Any]]]
311
+ External result to use for the run. This can be a
312
+ `nextmv.ExternalRunResult` object or a dict. If the object is used,
313
+ then the `.to_dict()` method is applied to extract the
314
+ configuration. This is used when the run is an external run. We
315
+ suggest that instead of specifying this parameter, you use the
316
+ `track_run` method of the class.
317
+ json_configurations: Optional[dict[str, Any]]
318
+ Optional configurations for JSON serialization. This is used to
319
+ customize the serialization before data is sent.
320
+ input_dir_path: Optional[str]
321
+ Path to a directory containing input files. If specified, the
322
+ function will package the files in the directory into a tar file
323
+ and upload it as a large input. This is useful for input formats
324
+ like `nextmv.InputFormat.CSV_ARCHIVE` or `nextmv.InputFormat.MULTI_FILE`.
325
+ If both `input` and `input_dir_path` are specified, the `input` is
326
+ ignored, and the files in the directory are used instead.
327
+
328
+ Returns
329
+ ----------
330
+ str
331
+ ID (`run_id`) of the run that was submitted.
332
+
333
+ Raises
334
+ ----------
335
+ requests.HTTPError
336
+ If the response status code is not 2xx.
337
+ ValueError
338
+ If the `input` is of type `nextmv.Input` and the .input_format` is
339
+ not `JSON`. If the final `options` are not of type `dict[str,str]`.
340
+ """
341
+
342
+ tar_file = ""
343
+ if input_dir_path is not None and input_dir_path != "":
344
+ if not os.path.exists(input_dir_path):
345
+ raise ValueError(f"Directory {input_dir_path} does not exist.")
346
+
347
+ if not os.path.isdir(input_dir_path):
348
+ raise ValueError(f"Path {input_dir_path} is not a directory.")
349
+
350
+ tar_file = self._package_inputs(input_dir_path)
351
+
352
+ input_data = self.__extract_input_data(input)
353
+
354
+ input_size = 0
355
+ if input_data is not None:
356
+ input_size = get_size(input_data)
357
+
358
+ upload_id_used = upload_id is not None
359
+ if self.__upload_url_required(upload_id_used, input_size, tar_file, input):
360
+ upload_url = self.upload_url()
361
+ self.upload_data(data=input_data, upload_url=upload_url, tar_file=tar_file)
362
+ upload_id = upload_url.upload_id
363
+ upload_id_used = True
364
+
365
+ options_dict = self.__extract_options_dict(options, json_configurations)
366
+
367
+ # Builds the payload progressively based on the different arguments
368
+ # that must be provided.
369
+ payload = {}
370
+ if upload_id_used:
371
+ payload["upload_id"] = upload_id
372
+ else:
373
+ payload["input"] = input_data
374
+
375
+ if name is not None:
376
+ payload["name"] = name
377
+ if description is not None:
378
+ payload["description"] = description
379
+ if len(options_dict) > 0:
380
+ for k, v in options_dict.items():
381
+ if not isinstance(v, str):
382
+ raise ValueError(f"options must be dict[str,str], option {k} has type {type(v)} instead.")
383
+ payload["options"] = options_dict
384
+
385
+ configuration_dict = self.__extract_run_config(input, configuration, input_dir_path)
386
+ payload["configuration"] = configuration_dict
387
+
388
+ if batch_experiment_id is not None:
389
+ payload["batch_experiment_id"] = batch_experiment_id
390
+ if external_result is not None:
391
+ external_dict = (
392
+ external_result.to_dict() if isinstance(external_result, ExternalRunResult) else external_result
393
+ )
394
+ payload["result"] = external_dict
395
+
396
+ query_params = {}
397
+ if instance_id is not None or self.default_instance_id is not None:
398
+ query_params["instance_id"] = instance_id if instance_id is not None else self.default_instance_id
399
+
400
+ response = self.client.request(
401
+ method="POST",
402
+ endpoint=f"{self.endpoint}/runs",
403
+ payload=payload,
404
+ query_params=query_params,
405
+ json_configurations=json_configurations,
406
+ )
407
+
408
+ return response.json()["run_id"]
409
+
410
+ def new_run_with_result(
411
+ self: "Application",
412
+ input: Input | dict[str, Any] | BaseModel | str = None,
413
+ instance_id: str | None = None,
414
+ name: str | None = None,
415
+ description: str | None = None,
416
+ upload_id: str | None = None,
417
+ run_options: Options | dict[str, str] | None = None,
418
+ polling_options: PollingOptions = DEFAULT_POLLING_OPTIONS,
419
+ configuration: RunConfiguration | dict[str, Any] | None = None,
420
+ batch_experiment_id: str | None = None,
421
+ external_result: ExternalRunResult | dict[str, Any] | None = None,
422
+ json_configurations: dict[str, Any] | None = None,
423
+ input_dir_path: str | None = None,
424
+ output_dir_path: str | None = ".",
425
+ ) -> RunResult:
426
+ """
427
+ Submit an input to start a new run of the application and poll for the
428
+ result. This is a convenience method that combines the `new_run` and
429
+ `run_result_with_polling` methods, applying polling logic to check when
430
+ the run succeeded.
431
+
432
+ Parameters
433
+ ----------
434
+ input: Union[Input, dict[str, Any], BaseModel, str]
435
+ Input to use for the run. This can be a `nextmv.Input` object,
436
+ `dict`, `BaseModel` or `str`.
437
+
438
+ If `nextmv.Input` is used, and the `input_format` is either
439
+ `nextmv.InputFormat.JSON` or `nextmv.InputFormat.TEXT`, then the
440
+ input data is extracted from the `.data` property.
441
+
442
+ If you want to work with `nextmv.InputFormat.CSV_ARCHIVE` or
443
+ `nextmv.InputFormat.MULTI_FILE`, you should use the `input_dir_path`
444
+ argument instead. This argument takes precedence over the `input`.
445
+ If `input_dir_path` is specified, this function looks for files in that
446
+ directory and tars them, to later be uploaded using the
447
+ `upload_data` method. If both the `input_dir_path` and `input`
448
+ arguments are provided, the `input` is ignored.
449
+
450
+ When `input_dir_path` is specified, the `configuration` argument must
451
+ also be provided. More specifically, the
452
+ `RunConfiguration.format.format_input.input_type` parameter
453
+ dictates what kind of input is being submitted to the Nextmv Cloud.
454
+ Make sure that this parameter is specified when working with the
455
+ following input formats:
456
+
457
+ - `nextmv.InputFormat.CSV_ARCHIVE`
458
+ - `nextmv.InputFormat.MULTI_FILE`
459
+
460
+ When working with JSON or text data, use the `input` argument
461
+ directly.
462
+
463
+ In general, if an input is too large, it will be uploaded with the
464
+ `upload_data` method.
465
+ instance_id: Optional[str]
466
+ ID of the instance to use for the run. If not provided, the default
467
+ instance ID associated to the Class (`default_instance_id`) is
468
+ used.
469
+ name: Optional[str]
470
+ Name of the run.
471
+ description: Optional[str]
472
+ Description of the run.
473
+ upload_id: Optional[str]
474
+ ID to use when running a large input. If the `input` exceeds the
475
+ maximum allowed size, then it is uploaded and the corresponding
476
+ `upload_id` is used.
477
+ run_options: Optional[Union[Options, dict[str, str]]]
478
+ Options to use for the run. This can be a `nextmv.Options` object
479
+ or a dict. If a dict is used, the keys must be strings and the
480
+ values must be strings as well. If a `nextmv.Options` object is
481
+ used, the options are extracted from the `.to_cloud_dict()` method.
482
+ Note that specifying `options` overrides the `input.options` (if
483
+ the `input` is of type `nextmv.Input`).
484
+ polling_options: PollingOptions
485
+ Options to use when polling for the run result. This is a
486
+ convenience method that combines the `new_run` and
487
+ `run_result_with_polling` methods, applying polling logic to check
488
+ when the run succeeded.
489
+ configuration: Optional[Union[RunConfiguration, dict[str, Any]]]
490
+ Configuration to use for the run. This can be a
491
+ `cloud.RunConfiguration` object or a dict. If the object is used,
492
+ then the `.to_dict()` method is applied to extract the
493
+ configuration.
494
+ batch_experiment_id: Optional[str]
495
+ ID of a batch experiment to associate the run with. This is used
496
+ when the run is part of a batch experiment.
497
+ external_result: Optional[Union[ExternalRunResult, dict[str, Any]]] = None
498
+ External result to use for the run. This can be a
499
+ `cloud.ExternalRunResult` object or a dict. If the object is used,
500
+ then the `.to_dict()` method is applied to extract the
501
+ configuration. This is used when the run is an external run. We
502
+ suggest that instead of specifying this parameter, you use the
503
+ `track_run_with_result` method of the class.
504
+ json_configurations: Optional[dict[str, Any]]
505
+ Optional configurations for JSON serialization. This is used to
506
+ customize the serialization before data is sent.
507
+ input_dir_path: Optional[str]
508
+ Path to a directory containing input files. If specified, the
509
+ function will package the files in the directory into a tar file
510
+ and upload it as a large input. This is useful for input formats
511
+ like `nextmv.InputFormat.CSV_ARCHIVE` or `nextmv.InputFormat.MULTI_FILE`.
512
+ If both `input` and `input_dir_path` are specified, the `input` is
513
+ ignored, and the files in the directory are used instead.
514
+ output_dir_path : Optional[str], default="."
515
+ Path to a directory where non-JSON output files will be saved. This is
516
+ required if the output is non-JSON. If the directory does not exist, it
517
+ will be created. Uses the current directory by default.
518
+
519
+ Returns
520
+ ----------
521
+ RunResult
522
+ Result of the run.
523
+
524
+ Raises
525
+ ----------
526
+ ValueError
527
+ If the `input` is of type `nextmv.Input` and the `.input_format` is
528
+ not `JSON`. If the final `options` are not of type `dict[str,str]`.
529
+ requests.HTTPError
530
+ If the response status code is not 2xx.
531
+ TimeoutError
532
+ If the run does not succeed after the polling strategy is exhausted
533
+ based on time duration.
534
+ RuntimeError
535
+ If the run does not succeed after the polling strategy is exhausted
536
+ based on number of tries.
537
+ """
538
+
539
+ run_id = self.new_run(
540
+ input=input,
541
+ instance_id=instance_id,
542
+ name=name,
543
+ description=description,
544
+ upload_id=upload_id,
545
+ options=run_options,
546
+ configuration=configuration,
547
+ batch_experiment_id=batch_experiment_id,
548
+ external_result=external_result,
549
+ json_configurations=json_configurations,
550
+ input_dir_path=input_dir_path,
551
+ )
552
+
553
+ return self.run_result_with_polling(
554
+ run_id=run_id,
555
+ polling_options=polling_options,
556
+ output_dir_path=output_dir_path,
557
+ )
558
+
559
+ def run_input(self: "Application", run_id: str, output_dir_path: str | None = ".") -> dict[str, Any] | None:
560
+ """
561
+ Get the input of a run.
562
+
563
+ Retrieves the input data that was used for a specific run. This method
564
+ handles both small and large inputs automatically - if the input size
565
+ exceeds the maximum allowed size, it will fetch the input from a
566
+ download URL. If the content format of the run is `csv-archive` or
567
+ `multi-file`, then the `output_dir_path` parameter must be specified.
568
+
569
+ Parameters
570
+ ----------
571
+ run_id : str
572
+ ID of the run to retrieve the input for.
573
+ output_dir_path : Optional[str], default="."
574
+ Path to a directory where non-JSON input files will be saved. This
575
+ is required if the input is non-JSON. If the directory does not
576
+ exist, it will be created. Uses the current directory by default.
577
+
578
+ Returns
579
+ -------
580
+ dict[str, Any] | None
581
+ Input data of the run as a dictionary. If the input format is
582
+ non-JSON (e.g., csv-archive or multi-file), the method returns None
583
+ after saving the input files to the specified `output_dir_path`.
584
+
585
+ Raises
586
+ ------
587
+ requests.HTTPError
588
+ If the response status code is not 2xx.
589
+
590
+ Examples
591
+ --------
592
+ >>> input_data = app.run_input("run-123")
593
+ >>> print(input_data)
594
+ {'locations': [...], 'vehicles': [...]}
595
+ """
596
+ run_information = self.run_metadata(run_id=run_id)
597
+
598
+ query_params = None
599
+ large = False
600
+ if (
601
+ run_information.metadata.input_size > _MAX_RUN_SIZE
602
+ or run_information.metadata.format.format_input.input_type
603
+ in {InputFormat.CSV_ARCHIVE, InputFormat.MULTI_FILE}
604
+ ):
605
+ query_params = {"format": "url"}
606
+ large = True
607
+
608
+ response = self.client.request(
609
+ method="GET",
610
+ endpoint=f"{self.endpoint}/runs/{run_id}/input",
611
+ query_params=query_params,
612
+ )
613
+ if not large:
614
+ return response.json()
615
+
616
+ download_url = DownloadURL.from_dict(response.json())
617
+ download_response = self.client.request(
618
+ method="GET",
619
+ endpoint=download_url.url,
620
+ headers={"Content-Type": "application/json"},
621
+ )
622
+
623
+ # See whether we can return the input directly or need to save to the given
624
+ # directory
625
+ if run_information.metadata.format.format_input.input_type != OutputFormat.JSON:
626
+ if not output_dir_path or output_dir_path == "":
627
+ raise ValueError(
628
+ "If the input format is not JSON, an output_dir_path must be provided.",
629
+ )
630
+ if not os.path.exists(output_dir_path):
631
+ os.makedirs(output_dir_path, exist_ok=True)
632
+
633
+ # Save .tar.gz file to a temp directory and extract contents to output_dir_path
634
+ with tempfile.TemporaryDirectory() as tmpdirname:
635
+ temp_tar_path = os.path.join(tmpdirname, f"{run_id}.tar.gz")
636
+ with open(temp_tar_path, "wb") as f:
637
+ f.write(download_response.content)
638
+ shutil.unpack_archive(temp_tar_path, output_dir_path)
639
+
640
+ return
641
+
642
+ # JSON input can be returned directly.
643
+ return download_response.json()
644
+
645
+ def run_metadata(self: "Application", run_id: str) -> RunInformation:
646
+ """
647
+ Get the metadata of a run.
648
+
649
+ Retrieves information about a run without including the run output.
650
+ This is useful when you only need the run's status and metadata.
651
+
652
+ Parameters
653
+ ----------
654
+ run_id : str
655
+ ID of the run to retrieve metadata for.
656
+
657
+ Returns
658
+ -------
659
+ RunInformation
660
+ Metadata of the run (run information without output).
661
+
662
+ Raises
663
+ ------
664
+ requests.HTTPError
665
+ If the response status code is not 2xx.
666
+
667
+ Examples
668
+ --------
669
+ >>> metadata = app.run_metadata("run-123")
670
+ >>> print(metadata.metadata.status_v2)
671
+ StatusV2.succeeded
672
+ """
673
+
674
+ response = self.client.request(
675
+ method="GET",
676
+ endpoint=f"{self.endpoint}/runs/{run_id}/metadata",
677
+ )
678
+
679
+ info = RunInformation.from_dict(response.json())
680
+ info.console_url = self.__console_url(info.id)
681
+
682
+ return info
683
+
684
+ def run_logs(self: "Application", run_id: str) -> RunLog:
685
+ """
686
+ Get the logs of a run.
687
+
688
+ Parameters
689
+ ----------
690
+ run_id : str
691
+ ID of the run to get logs for.
692
+
693
+ Returns
694
+ -------
695
+ RunLog
696
+ Logs of the run.
697
+
698
+ Raises
699
+ ------
700
+ requests.HTTPError
701
+ If the response status code is not 2xx.
702
+
703
+ Examples
704
+ --------
705
+ >>> logs = app.run_logs("run-123")
706
+ >>> print(logs.stderr)
707
+ 'Warning: resource usage exceeded'
708
+ """
709
+ response = self.client.request(
710
+ method="GET",
711
+ endpoint=f"{self.endpoint}/runs/{run_id}/logs",
712
+ )
713
+
714
+ return RunLog.from_dict(response.json())
715
+
716
+ def run_logs_with_polling(
717
+ self: "Application",
718
+ run_id: str,
719
+ verbose: bool = False,
720
+ rich_print: bool = False,
721
+ polling_options: PollingOptions = DEFAULT_POLLING_OPTIONS,
722
+ ) -> list[TimestampedRunLog]:
723
+ """
724
+ Get the logs of a run with polling.
725
+
726
+ Retrieves the logs of a run. This method polls for the logs until the
727
+ run finishes executing or the polling strategy is exhausted. It is the
728
+ "real-time" equivalent of the `run_logs` method. After the polling is
729
+ done, all the logs are returned sorted by timestamp. You can use the
730
+ `verbose` parameter to print the logs as they are obtained during the
731
+ polling process. You can also use the `rich_print` parameter to enable
732
+ rich printing for better formatting of the logs.
733
+
734
+ Parameters
735
+ ----------
736
+ run_id : str
737
+ ID of the run to retrieve the logs for.
738
+ verbose : bool, default=False
739
+ Whether to print the logs as they are obtained during the polling
740
+ process.
741
+ rich_print : bool, default=False
742
+ Whether to use rich printing for better formatting of the logs.
743
+ polling_options : PollingOptions, default=_DEFAULT_POLLING_OPTIONS
744
+ Options to use when polling for the run logs.
745
+
746
+ Returns
747
+ -------
748
+ list[TimestampedRunLog]
749
+ List of timestamped logs of the run.
750
+
751
+ Raises
752
+ ------
753
+ requests.HTTPError
754
+ If the response status code is not 2xx.
755
+ TimeoutError
756
+ If the run does not complete after the polling strategy is
757
+ exhausted based on time duration.
758
+ RuntimeError
759
+ If the run does not complete after the polling strategy is
760
+ exhausted based on number of tries.
761
+
762
+ Examples
763
+ --------
764
+ >>> from nextmv.cloud import PollingOptions
765
+ >>> # Create custom polling options
766
+ >>> polling_opts = PollingOptions(max_tries=50, max_duration=600)
767
+ >>> # Get run logs with polling
768
+ >>> logs = app.run_logs_with_polling("run-123", polling_opts)
769
+ >>> for log in logs:
770
+ ... print(f"[{log.timestamp}] {log.log}")
771
+ [2024-01-01T12:00:00Z] Starting optimization...
772
+ [2024-01-01T12:00:05Z] Found initial solution
773
+ ...
774
+ """
775
+
776
+ sleep_duration_hint = 0
777
+ logs = []
778
+ query_params = None
779
+
780
+ def polling_func() -> tuple[Any, bool]:
781
+ nonlocal sleep_duration_hint
782
+ nonlocal query_params
783
+
784
+ # Perform the actual request to the API.
785
+ response = self.client.request(
786
+ method="GET",
787
+ endpoint=f"{self.endpoint}/runs/{run_id}/logs/live",
788
+ query_params=query_params,
789
+ )
790
+ json_resp = response.json()
791
+
792
+ # Get the logs of the current request. Print them if verbose is
793
+ # enabled and append them to the overall logs.
794
+ for resp_log in json_resp.get("items", []):
795
+ log_entry = TimestampedRunLog.from_dict(resp_log)
796
+ if verbose:
797
+ msg = f"[{log_entry.timestamp}] {log_entry.log}"
798
+ if rich_print:
799
+ rich.print(msg, file=sys.stderr)
800
+ else:
801
+ log(msg)
802
+
803
+ logs.append(log_entry)
804
+
805
+ # We are done asking for logs if the run is in a final state.
806
+ status_v2 = StatusV2(json_resp.get("status_v2", "none"))
807
+ if status_v2 in {
808
+ StatusV2.succeeded,
809
+ StatusV2.failed,
810
+ StatusV2.canceled,
811
+ }:
812
+ return logs, True
813
+
814
+ # Store the server's hint for the next sleep duration.
815
+ sleep_duration_hint = json_resp.get("next_available_in_seconds", 0)
816
+
817
+ # Update the query parameters for the next request.
818
+ since = json_resp.get("next_page_token")
819
+ if since is not None:
820
+ query_params = {"since": since}
821
+
822
+ return logs, False
823
+
824
+ def sleep_func() -> float:
825
+ return sleep_duration_hint if sleep_duration_hint > 0 else 0
826
+
827
+ polling_options.sleep_duration_func = sleep_func
828
+ logs = poll(polling_options=polling_options, polling_func=polling_func)
829
+
830
+ return sorted(logs, key=lambda log: log.timestamp)
831
+
832
+ def run_result(self: "Application", run_id: str, output_dir_path: str | None = ".") -> RunResult:
833
+ """
834
+ Get the result of a run.
835
+
836
+ Retrieves the complete result of a run, including the run output.
837
+
838
+ Parameters
839
+ ----------
840
+ run_id : str
841
+ ID of the run to get results for.
842
+ output_dir_path : Optional[str], default="."
843
+ Path to a directory where non-JSON output files will be saved. This is
844
+ required if the output is non-JSON. If the directory does not exist, it
845
+ will be created. Uses the current directory by default.
846
+
847
+ Returns
848
+ -------
849
+ RunResult
850
+ Result of the run, including output.
851
+
852
+ Raises
853
+ ------
854
+ requests.HTTPError
855
+ If the response status code is not 2xx.
856
+
857
+ Examples
858
+ --------
859
+ >>> result = app.run_result("run-123")
860
+ >>> print(result.metadata.status_v2)
861
+ 'succeeded'
862
+ """
863
+
864
+ run_information = self.run_metadata(run_id=run_id)
865
+
866
+ return self.__run_result(
867
+ run_id=run_id,
868
+ run_information=run_information,
869
+ output_dir_path=output_dir_path,
870
+ )
871
+
872
+ def run_result_with_polling(
873
+ self: "Application",
874
+ run_id: str,
875
+ polling_options: PollingOptions = DEFAULT_POLLING_OPTIONS,
876
+ output_dir_path: str | None = ".",
877
+ ) -> RunResult:
878
+ """
879
+ Get the result of a run with polling.
880
+
881
+ Retrieves the result of a run including the run output. This method polls
882
+ for the result until the run finishes executing or the polling strategy
883
+ is exhausted.
884
+
885
+ Parameters
886
+ ----------
887
+ run_id : str
888
+ ID of the run to retrieve the result for.
889
+ polling_options : PollingOptions, default=_DEFAULT_POLLING_OPTIONS
890
+ Options to use when polling for the run result.
891
+ output_dir_path : Optional[str], default="."
892
+ Path to a directory where non-JSON output files will be saved. This is
893
+ required if the output is non-JSON. If the directory does not exist, it
894
+ will be created. Uses the current directory by default.
895
+
896
+ Returns
897
+ -------
898
+ RunResult
899
+ Complete result of the run including output data.
900
+
901
+ Raises
902
+ ------
903
+ requests.HTTPError
904
+ If the response status code is not 2xx.
905
+ TimeoutError
906
+ If the run does not complete after the polling strategy is
907
+ exhausted based on time duration.
908
+ RuntimeError
909
+ If the run does not complete after the polling strategy is
910
+ exhausted based on number of tries.
911
+
912
+ Examples
913
+ --------
914
+ >>> from nextmv.cloud import PollingOptions
915
+ >>> # Create custom polling options
916
+ >>> polling_opts = PollingOptions(max_tries=50, max_duration=600)
917
+ >>> # Get run result with polling
918
+ >>> result = app.run_result_with_polling("run-123", polling_opts)
919
+ >>> print(result.output)
920
+ {'solution': {...}}
921
+ """
922
+
923
+ def polling_func() -> tuple[Any, bool]:
924
+ run_information = self.run_metadata(run_id=run_id)
925
+ if run_information.metadata.run_is_finalized():
926
+ return run_information, True
927
+
928
+ return None, False
929
+
930
+ run_information = poll(polling_options=polling_options, polling_func=polling_func)
931
+
932
+ return self.__run_result(
933
+ run_id=run_id,
934
+ run_information=run_information,
935
+ output_dir_path=output_dir_path,
936
+ )
937
+
938
+ def track_run( # noqa: C901
939
+ self: "Application",
940
+ tracked_run: TrackedRun,
941
+ instance_id: str | None = None,
942
+ configuration: RunConfiguration | dict[str, Any] | None = None,
943
+ ) -> str:
944
+ """
945
+ Track an external run.
946
+
947
+ This method allows you to register in Nextmv a run that happened
948
+ elsewhere, as though it were executed in the Nextmv platform. Having
949
+ information about a run in Nextmv is useful for things like
950
+ experimenting and testing.
951
+
952
+ Please read the documentation on the `TrackedRun` class carefully, as
953
+ there are important considerations to take into account when using this
954
+ method. For example, if you intend to upload JSON input/output, use the
955
+ `input`/`output` attributes of the `TrackedRun` class. On the other
956
+ hand, if you intend to track files-based input/output, use the
957
+ `input_dir_path`/`output_dir_path` attributes of the `TrackedRun`
958
+ class.
959
+
960
+ Parameters
961
+ ----------
962
+ tracked_run : TrackedRun
963
+ The run to track.
964
+ instance_id : Optional[str], default=None
965
+ Optional instance ID if you want to associate your tracked run with
966
+ an instance.
967
+ configuration: Optional[Union[RunConfiguration, dict[str, Any]]]
968
+ Configuration to use for the run. This can be a
969
+ `cloud.RunConfiguration` object or a dict. If the object is used,
970
+ then the `.to_dict()` method is applied to extract the
971
+ configuration.
972
+
973
+ Returns
974
+ -------
975
+ str
976
+ The ID of the run that was tracked.
977
+
978
+ Raises
979
+ ------
980
+ requests.HTTPError
981
+ If the response status code is not 2xx.
982
+ ValueError
983
+ If the tracked run does not have an input or output.
984
+
985
+ Examples
986
+ --------
987
+ >>> from nextmv.cloud import Application
988
+ >>> from nextmv import TrackedRun
989
+ >>> app = Application(id="app_123")
990
+ >>> tracked_run = TrackedRun(input={"data": [...]}, output={"solution": [...]})
991
+ >>> run_id = app.track_run(tracked_run)
992
+ """
993
+
994
+ # Get the URL to upload the input to.
995
+ url_input = self.upload_url()
996
+
997
+ # Handle the case where the input is being uploaded as files. We need
998
+ # to tar them.
999
+ input_tar_file = ""
1000
+ input_dir_path = tracked_run.input_dir_path
1001
+ if input_dir_path is not None and input_dir_path != "":
1002
+ if not os.path.exists(input_dir_path):
1003
+ raise ValueError(f"Directory {input_dir_path} does not exist.")
1004
+
1005
+ if not os.path.isdir(input_dir_path):
1006
+ raise ValueError(f"Path {input_dir_path} is not a directory.")
1007
+
1008
+ input_tar_file = self._package_inputs(input_dir_path)
1009
+
1010
+ # Handle the case where the input is uploaded as Input or a dict.
1011
+ upload_input = tracked_run.input
1012
+ if upload_input is not None and isinstance(tracked_run.input, Input):
1013
+ upload_input = tracked_run.input.data
1014
+
1015
+ # Actually uploads de input.
1016
+ self.upload_data(data=upload_input, upload_url=url_input, tar_file=input_tar_file)
1017
+
1018
+ # Get the URL to upload the output to.
1019
+ url_output = self.upload_url()
1020
+
1021
+ # Handle the case where the output is being uploaded as files. We need
1022
+ # to tar them.
1023
+ output_tar_file = ""
1024
+ output_dir_path = tracked_run.output_dir_path
1025
+ if output_dir_path is not None and output_dir_path != "":
1026
+ if not os.path.exists(output_dir_path):
1027
+ raise ValueError(f"Directory {output_dir_path} does not exist.")
1028
+
1029
+ if not os.path.isdir(output_dir_path):
1030
+ raise ValueError(f"Path {output_dir_path} is not a directory.")
1031
+
1032
+ output_tar_file = self._package_inputs(output_dir_path)
1033
+
1034
+ # Handle the case where the output is uploaded as Output or a dict.
1035
+ upload_output = tracked_run.output
1036
+ if upload_output is not None and isinstance(tracked_run.output, Output):
1037
+ upload_output = tracked_run.output.to_dict()
1038
+
1039
+ # Actually uploads the output.
1040
+ self.upload_data(data=upload_output, upload_url=url_output, tar_file=output_tar_file)
1041
+
1042
+ # Create the external run result and appends logs if required.
1043
+ external_result = ExternalRunResult(
1044
+ output_upload_id=url_output.upload_id,
1045
+ status=tracked_run.status.value,
1046
+ execution_duration=tracked_run.duration,
1047
+ )
1048
+
1049
+ # Handle the stderr logs if provided.
1050
+ if tracked_run.logs is not None:
1051
+ url_stderr = self.upload_url()
1052
+ self.upload_data(data=tracked_run.logs_text(), upload_url=url_stderr)
1053
+ external_result.error_upload_id = url_stderr.upload_id
1054
+
1055
+ if tracked_run.error is not None and tracked_run.error != "":
1056
+ external_result.error_message = tracked_run.error
1057
+
1058
+ # Handle the statistics upload if provided.
1059
+ stats = tracked_run.statistics
1060
+ if stats is not None:
1061
+ if isinstance(stats, Statistics):
1062
+ stats_dict = stats.to_dict()
1063
+ stats_dict = {STATISTICS_KEY: stats_dict}
1064
+ elif isinstance(stats, dict):
1065
+ stats_dict = stats
1066
+ if STATISTICS_KEY not in stats_dict:
1067
+ stats_dict = {STATISTICS_KEY: stats_dict}
1068
+ else:
1069
+ raise ValueError("tracked_run.statistics must be either a `Statistics` or `dict` object")
1070
+
1071
+ url_stats = self.upload_url()
1072
+ self.upload_data(data=stats_dict, upload_url=url_stats)
1073
+ external_result.statistics_upload_id = url_stats.upload_id
1074
+
1075
+ # Handle the assets upload if provided.
1076
+ assets = tracked_run.assets
1077
+ if assets is not None:
1078
+ if isinstance(assets, list):
1079
+ assets_list = []
1080
+ for ix, asset in enumerate(assets):
1081
+ if isinstance(asset, Asset):
1082
+ assets_list.append(asset.to_dict())
1083
+ elif isinstance(asset, dict):
1084
+ assets_list.append(asset)
1085
+ else:
1086
+ raise ValueError(f"tracked_run.assets, index {ix} must be an `Asset` or `dict` object")
1087
+ assets_dict = {ASSETS_KEY: assets_list}
1088
+ elif isinstance(assets, dict):
1089
+ assets_dict = assets
1090
+ if ASSETS_KEY not in assets_dict:
1091
+ assets_dict = {ASSETS_KEY: assets_dict}
1092
+ else:
1093
+ raise ValueError("tracked_run.assets must be either a `list[Asset]`, `list[dict]`, or `dict` object")
1094
+
1095
+ url_assets = self.upload_url()
1096
+ self.upload_data(data=assets_dict, upload_url=url_assets)
1097
+ external_result.assets_upload_id = url_assets.upload_id
1098
+
1099
+ return self.new_run(
1100
+ upload_id=url_input.upload_id,
1101
+ external_result=external_result,
1102
+ instance_id=instance_id,
1103
+ name=tracked_run.name,
1104
+ description=tracked_run.description,
1105
+ configuration=configuration,
1106
+ )
1107
+
1108
+ def track_run_with_result(
1109
+ self: "Application",
1110
+ tracked_run: TrackedRun,
1111
+ polling_options: PollingOptions = DEFAULT_POLLING_OPTIONS,
1112
+ instance_id: str | None = None,
1113
+ output_dir_path: str | None = ".",
1114
+ configuration: RunConfiguration | dict[str, Any] | None = None,
1115
+ ) -> RunResult:
1116
+ """
1117
+ Track an external run and poll for the result. This is a convenience
1118
+ method that combines the `track_run` and `run_result_with_polling`
1119
+ methods. It applies polling logic to check when the run was
1120
+ successfully registered.
1121
+
1122
+ Parameters
1123
+ ----------
1124
+ tracked_run : TrackedRun
1125
+ The run to track.
1126
+ polling_options : PollingOptions
1127
+ Options to use when polling for the run result.
1128
+ instance_id: Optional[str]
1129
+ Optional instance ID if you want to associate your tracked run with
1130
+ an instance.
1131
+ output_dir_path : Optional[str], default="."
1132
+ Path to a directory where non-JSON output files will be saved. This is
1133
+ required if the output is non-JSON. If the directory does not exist, it
1134
+ will be created. Uses the current directory by default.
1135
+ configuration: Optional[Union[RunConfiguration, dict[str, Any]]]
1136
+ Configuration to use for the run. This can be a
1137
+ `cloud.RunConfiguration` object or a dict. If the object is used,
1138
+ then the `.to_dict()` method is applied to extract the
1139
+ configuration.
1140
+
1141
+ Returns
1142
+ -------
1143
+ RunResult
1144
+ Result of the run.
1145
+
1146
+ Raises
1147
+ ------
1148
+ requests.HTTPError
1149
+ If the response status code is not 2xx.
1150
+ ValueError
1151
+ If the tracked run does not have an input or output.
1152
+ TimeoutError
1153
+ If the run does not succeed after the polling strategy is
1154
+ exhausted based on time duration.
1155
+ RuntimeError
1156
+ If the run does not succeed after the polling strategy is
1157
+ exhausted based on number of tries.
1158
+ """
1159
+ run_id = self.track_run(
1160
+ tracked_run=tracked_run,
1161
+ instance_id=instance_id,
1162
+ configuration=configuration,
1163
+ )
1164
+
1165
+ return self.run_result_with_polling(
1166
+ run_id=run_id,
1167
+ polling_options=polling_options,
1168
+ output_dir_path=output_dir_path,
1169
+ )
1170
+
1171
+ def _package_inputs(self: "Application", dir_path: str) -> str:
1172
+ """
1173
+ This is an auxiliary function for packaging the inputs found in the
1174
+ provided `dir_path`. All the files found in the directory are tarred and
1175
+ g-zipped. This function returns the tar file path that contains the
1176
+ packaged inputs.
1177
+ """
1178
+
1179
+ # Create a temporary directory for the output
1180
+ output_dir = tempfile.mkdtemp(prefix="nextmv-inputs-out-")
1181
+
1182
+ # Define the output tar file name and path
1183
+ tar_filename = "inputs.tar.gz"
1184
+ tar_file_path = os.path.join(output_dir, tar_filename)
1185
+
1186
+ # Create the tar.gz file
1187
+ with tarfile.open(tar_file_path, "w:gz") as tar:
1188
+ for root, _, files in os.walk(dir_path):
1189
+ for file in files:
1190
+ if file == tar_filename:
1191
+ continue
1192
+
1193
+ file_path = os.path.join(root, file)
1194
+
1195
+ # Skip directories, only process files
1196
+ if os.path.isdir(file_path):
1197
+ continue
1198
+
1199
+ # Create relative path for the archive
1200
+ arcname = os.path.relpath(file_path, start=dir_path)
1201
+ tar.add(file_path, arcname=arcname)
1202
+
1203
+ return tar_file_path
1204
+
1205
+ def __run_result(
1206
+ self: "Application",
1207
+ run_id: str,
1208
+ run_information: RunInformation,
1209
+ output_dir_path: str | None = ".",
1210
+ ) -> RunResult:
1211
+ """
1212
+ Get the result of a run.
1213
+
1214
+ This is a private method that retrieves the complete result of a run,
1215
+ including the output data. It handles both small and large outputs,
1216
+ automatically using the appropriate API endpoints based on the output
1217
+ size. This method serves as the base implementation for retrieving
1218
+ run results, regardless of polling strategy.
1219
+
1220
+ Parameters
1221
+ ----------
1222
+ run_id : str
1223
+ ID of the run to retrieve the result for.
1224
+ run_information : RunInformation
1225
+ Information about the run, including metadata such as output size.
1226
+ output_dir_path : Optional[str], default="."
1227
+ Path to a directory where non-JSON output files will be saved. This is
1228
+ required if the output is non-JSON. If the directory does not exist, it
1229
+ will be created. Uses the current directory by default.
1230
+
1231
+ Returns
1232
+ -------
1233
+ RunResult
1234
+ Result of the run, including all metadata and output data.
1235
+ For large outputs, the method will fetch the output from
1236
+ a download URL.
1237
+
1238
+ Raises
1239
+ ------
1240
+ requests.HTTPError
1241
+ If the response status code is not 2xx.
1242
+
1243
+ Notes
1244
+ -----
1245
+ This method automatically handles large outputs by checking if the
1246
+ output size exceeds _MAX_RUN_SIZE. If it does, the method will request
1247
+ a download URL and fetch the output data separately.
1248
+ """
1249
+ query_params = None
1250
+ use_presigned_url = False
1251
+ if (
1252
+ run_information.metadata.format.format_output.output_type != OutputFormat.JSON
1253
+ or run_information.metadata.output_size > _MAX_RUN_SIZE
1254
+ ):
1255
+ query_params = {"format": "url"}
1256
+ use_presigned_url = True
1257
+
1258
+ response = self.client.request(
1259
+ method="GET",
1260
+ endpoint=f"{self.endpoint}/runs/{run_id}",
1261
+ query_params=query_params,
1262
+ )
1263
+ result = RunResult.from_dict(response.json())
1264
+ result.console_url = self.__console_url(result.id)
1265
+
1266
+ if not use_presigned_url or result.metadata.status_v2 != StatusV2.succeeded:
1267
+ return result
1268
+
1269
+ download_url = DownloadURL.from_dict(response.json()["output"])
1270
+ download_response = self.client.request(
1271
+ method="GET",
1272
+ endpoint=download_url.url,
1273
+ headers={"Content-Type": "application/json"},
1274
+ )
1275
+
1276
+ # See whether we can attach the output directly or need to save to the given
1277
+ # directory
1278
+ if run_information.metadata.format.format_output.output_type != OutputFormat.JSON:
1279
+ if not output_dir_path or output_dir_path == "":
1280
+ raise ValueError(
1281
+ "If the output format is not JSON, an output_dir_path must be provided.",
1282
+ )
1283
+ if not os.path.exists(output_dir_path):
1284
+ os.makedirs(output_dir_path, exist_ok=True)
1285
+ # Save .tar.gz file to a temp directory and extract contents to output_dir_path
1286
+ with tempfile.TemporaryDirectory() as tmpdirname:
1287
+ temp_tar_path = os.path.join(tmpdirname, f"{run_id}.tar.gz")
1288
+ with open(temp_tar_path, "wb") as f:
1289
+ f.write(download_response.content)
1290
+ shutil.unpack_archive(temp_tar_path, output_dir_path)
1291
+ else:
1292
+ result.output = download_response.json()
1293
+
1294
+ return result
1295
+
1296
+ def __console_url(self: "Application", run_id: str) -> str:
1297
+ """Auxiliary method to get the console URL for a run."""
1298
+
1299
+ return f"{self.client.console_url}/app/{self.id}/run/{run_id}?view=details"
1300
+
1301
+ def __upload_url_required(
1302
+ self: "Application",
1303
+ upload_id_used: bool,
1304
+ input_size: int,
1305
+ tar_file: str,
1306
+ input: Input | dict[str, Any] | BaseModel | str = None,
1307
+ ) -> bool:
1308
+ """
1309
+ Auxiliary function to determine if an upload URL is required
1310
+ based on the input size, type, and configuration.
1311
+ """
1312
+
1313
+ if upload_id_used:
1314
+ return False
1315
+
1316
+ non_json_payload = False
1317
+ if isinstance(input, str):
1318
+ non_json_payload = True
1319
+ elif isinstance(input, Input) and input.input_format != InputFormat.JSON:
1320
+ non_json_payload = True
1321
+ elif tar_file is not None and tar_file != "":
1322
+ non_json_payload = True
1323
+
1324
+ size_exceeds = input_size > _MAX_RUN_SIZE
1325
+
1326
+ return size_exceeds or non_json_payload
1327
+
1328
+ def __extract_input_data(
1329
+ self: "Application",
1330
+ input: Input | dict[str, Any] | BaseModel | str = None,
1331
+ ) -> dict[str, Any] | str | None:
1332
+ """
1333
+ Auxiliary function to extract the input data from the input, based on
1334
+ its type.
1335
+ """
1336
+
1337
+ input_data = None
1338
+ if isinstance(input, BaseModel):
1339
+ input_data = input.to_dict()
1340
+ elif isinstance(input, dict) or isinstance(input, str):
1341
+ input_data = input
1342
+ elif isinstance(input, Input):
1343
+ input_data = input.data
1344
+
1345
+ return input_data
1346
+
1347
+ def __extract_options_dict(
1348
+ self: "Application",
1349
+ options: Options | dict[str, str] | None = None,
1350
+ json_configurations: dict[str, Any] | None = None,
1351
+ ) -> dict[str, str]:
1352
+ """
1353
+ Auxiliary function to extract the options that will be sent to the
1354
+ application for execution.
1355
+ """
1356
+
1357
+ options_dict = {}
1358
+ if options is not None:
1359
+ if isinstance(options, Options):
1360
+ options_dict = options.to_dict_cloud()
1361
+
1362
+ elif isinstance(options, dict):
1363
+ for k, v in options.items():
1364
+ if isinstance(v, str):
1365
+ options_dict[k] = v
1366
+ continue
1367
+
1368
+ options_dict[k] = deflated_serialize_json(v, json_configurations=json_configurations)
1369
+
1370
+ return options_dict
1371
+
1372
+ def __extract_run_config(
1373
+ self: "Application",
1374
+ input: Input | dict[str, Any] | BaseModel | str = None,
1375
+ configuration: RunConfiguration | dict[str, Any] | None = None,
1376
+ dir_path: str | None = None,
1377
+ ) -> dict[str, Any]:
1378
+ """
1379
+ Auxiliary function to extract the run configuration that will be sent
1380
+ to the application for execution.
1381
+ """
1382
+
1383
+ if configuration is not None:
1384
+ configuration_dict = (
1385
+ configuration.to_dict() if isinstance(configuration, RunConfiguration) else configuration
1386
+ )
1387
+ return configuration_dict
1388
+
1389
+ configuration = RunConfiguration()
1390
+ configuration.resolve(input=input, dir_path=dir_path)
1391
+ configuration_dict = configuration.to_dict()
1392
+
1393
+ return configuration_dict