nextmv 0.10.3.dev0__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextmv/__about__.py +1 -1
- nextmv/__entrypoint__.py +39 -0
- nextmv/__init__.py +57 -0
- nextmv/_serialization.py +96 -0
- nextmv/base_model.py +79 -9
- nextmv/cloud/__init__.py +71 -10
- nextmv/cloud/acceptance_test.py +888 -17
- nextmv/cloud/account.py +154 -10
- nextmv/cloud/application.py +3644 -437
- nextmv/cloud/batch_experiment.py +292 -33
- nextmv/cloud/client.py +354 -53
- nextmv/cloud/ensemble.py +247 -0
- nextmv/cloud/input_set.py +121 -4
- nextmv/cloud/instance.py +125 -0
- nextmv/cloud/package.py +474 -0
- nextmv/cloud/scenario.py +410 -0
- nextmv/cloud/secrets.py +234 -0
- nextmv/cloud/url.py +73 -0
- nextmv/cloud/version.py +174 -0
- nextmv/default_app/.gitignore +1 -0
- nextmv/default_app/README.md +32 -0
- nextmv/default_app/app.yaml +12 -0
- nextmv/default_app/input.json +5 -0
- nextmv/default_app/main.py +37 -0
- nextmv/default_app/requirements.txt +2 -0
- nextmv/default_app/src/__init__.py +0 -0
- nextmv/default_app/src/main.py +37 -0
- nextmv/default_app/src/visuals.py +36 -0
- nextmv/deprecated.py +47 -0
- nextmv/input.py +883 -78
- nextmv/local/__init__.py +5 -0
- nextmv/local/application.py +1263 -0
- nextmv/local/executor.py +1040 -0
- nextmv/local/geojson_handler.py +323 -0
- nextmv/local/local.py +97 -0
- nextmv/local/plotly_handler.py +61 -0
- nextmv/local/runner.py +274 -0
- nextmv/logger.py +80 -9
- nextmv/manifest.py +1472 -0
- nextmv/model.py +431 -0
- nextmv/options.py +968 -78
- nextmv/output.py +1363 -231
- nextmv/polling.py +287 -0
- nextmv/run.py +1623 -0
- nextmv/safe.py +145 -0
- nextmv/status.py +122 -0
- {nextmv-0.10.3.dev0.dist-info → nextmv-0.35.0.dist-info}/METADATA +51 -288
- nextmv-0.35.0.dist-info/RECORD +50 -0
- {nextmv-0.10.3.dev0.dist-info → nextmv-0.35.0.dist-info}/WHEEL +1 -1
- nextmv/cloud/status.py +0 -29
- nextmv/nextroute/__init__.py +0 -2
- nextmv/nextroute/check/__init__.py +0 -26
- nextmv/nextroute/check/schema.py +0 -141
- nextmv/nextroute/schema/__init__.py +0 -19
- nextmv/nextroute/schema/input.py +0 -52
- nextmv/nextroute/schema/location.py +0 -13
- nextmv/nextroute/schema/output.py +0 -136
- nextmv/nextroute/schema/stop.py +0 -61
- nextmv/nextroute/schema/vehicle.py +0 -68
- nextmv-0.10.3.dev0.dist-info/RECORD +0 -28
- {nextmv-0.10.3.dev0.dist-info → nextmv-0.35.0.dist-info}/licenses/LICENSE +0 -0
nextmv/local/executor.py
ADDED
|
@@ -0,0 +1,1040 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Executor module for executing local runs.
|
|
3
|
+
|
|
4
|
+
This module provides functionality to execute local runs. The `main` function
|
|
5
|
+
is summoned from the `run` function in the `runner` module.
|
|
6
|
+
|
|
7
|
+
Functions
|
|
8
|
+
---------
|
|
9
|
+
main
|
|
10
|
+
Main function to execute a local run.
|
|
11
|
+
execute_run
|
|
12
|
+
Function to execute the decision model run.
|
|
13
|
+
options_args
|
|
14
|
+
Function to convert options dictionary to command-line arguments.
|
|
15
|
+
process_run_input
|
|
16
|
+
Function to process the run input based on the format.
|
|
17
|
+
process_run_output
|
|
18
|
+
Function to process the run output and handle results.
|
|
19
|
+
resolve_output_format
|
|
20
|
+
Function to determine the output format from manifest or directory structure.
|
|
21
|
+
process_run_information
|
|
22
|
+
Function to update run metadata including duration and status.
|
|
23
|
+
process_run_logs
|
|
24
|
+
Function to process and save run logs.
|
|
25
|
+
process_run_statistics
|
|
26
|
+
Function to process and save run statistics.
|
|
27
|
+
process_run_assets
|
|
28
|
+
Function to process and save run assets.
|
|
29
|
+
process_run_solutions
|
|
30
|
+
Function to process and save run solutions.
|
|
31
|
+
process_run_visuals
|
|
32
|
+
Function to process and save run visuals.
|
|
33
|
+
resolve_stdout
|
|
34
|
+
Function to parse subprocess stdout output.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
import hashlib
|
|
38
|
+
import json
|
|
39
|
+
import os
|
|
40
|
+
import re
|
|
41
|
+
import shutil
|
|
42
|
+
import subprocess
|
|
43
|
+
import sys
|
|
44
|
+
import tempfile
|
|
45
|
+
from datetime import datetime, timezone
|
|
46
|
+
from typing import Any
|
|
47
|
+
|
|
48
|
+
from nextmv.input import INPUTS_KEY, InputFormat, load
|
|
49
|
+
from nextmv.local.geojson_handler import handle_geojson_visual
|
|
50
|
+
from nextmv.local.local import (
|
|
51
|
+
DEFAULT_OUTPUT_JSON_FILE,
|
|
52
|
+
LOGS_FILE,
|
|
53
|
+
LOGS_KEY,
|
|
54
|
+
NEXTMV_DIR,
|
|
55
|
+
OUTPUT_KEY,
|
|
56
|
+
calculate_files_size,
|
|
57
|
+
)
|
|
58
|
+
from nextmv.local.plotly_handler import handle_plotly_visual
|
|
59
|
+
from nextmv.manifest import Manifest, ManifestType
|
|
60
|
+
from nextmv.output import ASSETS_KEY, OUTPUTS_KEY, SOLUTIONS_KEY, STATISTICS_KEY, Asset, OutputFormat, VisualSchema
|
|
61
|
+
from nextmv.status import StatusV2
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def main() -> None:
|
|
65
|
+
"""
|
|
66
|
+
Main function to execute a local run. This function is called when
|
|
67
|
+
executing the script directly. It loads input data (arguments) from stdin
|
|
68
|
+
and orders the execution of the run.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
input = load()
|
|
72
|
+
execute_run(
|
|
73
|
+
run_id=input.data["run_id"],
|
|
74
|
+
src=input.data["src"],
|
|
75
|
+
manifest_dict=input.data["manifest_dict"],
|
|
76
|
+
run_dir=input.data["run_dir"],
|
|
77
|
+
run_config=input.data["run_config"],
|
|
78
|
+
inputs_dir_path=input.data["inputs_dir_path"],
|
|
79
|
+
options=input.data["options"],
|
|
80
|
+
input_data=input.data["input_data"],
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def execute_run(
|
|
85
|
+
run_id: str,
|
|
86
|
+
src: str,
|
|
87
|
+
manifest_dict: dict[str, Any],
|
|
88
|
+
run_dir: str,
|
|
89
|
+
run_config: dict[str, Any],
|
|
90
|
+
inputs_dir_path: str | None = None,
|
|
91
|
+
options: dict[str, Any] | None = None,
|
|
92
|
+
input_data: dict[str, Any] | str | None = None,
|
|
93
|
+
) -> None:
|
|
94
|
+
"""
|
|
95
|
+
Executes the decision model run using a subprocess to call the entrypoint
|
|
96
|
+
script with the appropriate input and options.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
run_id : str
|
|
101
|
+
The unique identifier for the run.
|
|
102
|
+
src : str
|
|
103
|
+
The path to the application source code.
|
|
104
|
+
manifest_dict : dict[str, Any]
|
|
105
|
+
The manifest dictionary containing application configuration.
|
|
106
|
+
run_dir : str
|
|
107
|
+
The path to the run directory where outputs will be stored.
|
|
108
|
+
run_config : dict[str, Any]
|
|
109
|
+
The run configuration containing format and other settings.
|
|
110
|
+
inputs_dir_path : Optional[str], optional
|
|
111
|
+
The path to the directory containing input files, by default None. If
|
|
112
|
+
provided, this parameter takes precedence over `input_data`.
|
|
113
|
+
options : Optional[dict[str, Any]], optional
|
|
114
|
+
Additional command-line options for the run, by default None.
|
|
115
|
+
input_data : Optional[Union[dict[str, Any], str]], optional
|
|
116
|
+
The input data for the run, by default None. If `inputs_dir_path` is
|
|
117
|
+
provided, this parameter is ignored.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
# Create the logs dir to register whatever failure might happen during the
|
|
121
|
+
# execution process.
|
|
122
|
+
logs_dir = os.path.join(run_dir, LOGS_KEY)
|
|
123
|
+
os.makedirs(logs_dir, exist_ok=True)
|
|
124
|
+
|
|
125
|
+
# The complete execution is wrapped to capture any errors.
|
|
126
|
+
try:
|
|
127
|
+
# Create a temp dir, and copy the entire src there, to have a transient
|
|
128
|
+
# place to work from, and be cleaned up afterwards.
|
|
129
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
130
|
+
temp_src = os.path.join(temp_dir, "src")
|
|
131
|
+
shutil.copytree(src, temp_src, ignore=_ignore_patterns)
|
|
132
|
+
|
|
133
|
+
manifest = Manifest.from_dict(manifest_dict)
|
|
134
|
+
|
|
135
|
+
stdin_input = process_run_input(
|
|
136
|
+
temp_src=temp_src,
|
|
137
|
+
run_format=run_config["format"]["input"]["type"],
|
|
138
|
+
manifest=manifest,
|
|
139
|
+
input_data=input_data,
|
|
140
|
+
inputs_dir_path=inputs_dir_path,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Set the run status to running.
|
|
144
|
+
info_file = os.path.join(run_dir, f"{run_id}.json")
|
|
145
|
+
with open(info_file, "r+") as f:
|
|
146
|
+
info = json.load(f)
|
|
147
|
+
info["metadata"]["status_v2"] = "running"
|
|
148
|
+
f.seek(0)
|
|
149
|
+
json.dump(info, f, indent=2)
|
|
150
|
+
f.truncate()
|
|
151
|
+
|
|
152
|
+
# Start a Python subprocess to execute the entrypoint. For now, we are
|
|
153
|
+
# supporting a Python-first experience, so we are not summoning
|
|
154
|
+
# applications that are not Python-based.
|
|
155
|
+
entrypoint = os.path.join(temp_src, __determine_entrypoint(manifest))
|
|
156
|
+
cwd = __determine_cwd(manifest, default=temp_src)
|
|
157
|
+
args = [sys.executable, entrypoint] + options_args(options)
|
|
158
|
+
|
|
159
|
+
result = subprocess.run(
|
|
160
|
+
args,
|
|
161
|
+
env=os.environ,
|
|
162
|
+
check=False,
|
|
163
|
+
text=True,
|
|
164
|
+
capture_output=True,
|
|
165
|
+
input=stdin_input,
|
|
166
|
+
cwd=cwd,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
process_run_output(
|
|
170
|
+
manifest=manifest,
|
|
171
|
+
run_id=run_id,
|
|
172
|
+
temp_src=temp_src,
|
|
173
|
+
result=result,
|
|
174
|
+
run_dir=run_dir,
|
|
175
|
+
src=src,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
except Exception as e:
|
|
179
|
+
# If we encounter an exception, we log it to the stderr log file.
|
|
180
|
+
with open(os.path.join(logs_dir, LOGS_FILE), "a") as f:
|
|
181
|
+
f.write(f"\nException during run execution: {str(e)}\n")
|
|
182
|
+
|
|
183
|
+
# Also, we update the run information file to set the status to failed.
|
|
184
|
+
info_file = os.path.join(run_dir, f"{run_id}.json")
|
|
185
|
+
with open(info_file, "r+") as f:
|
|
186
|
+
info = json.load(f)
|
|
187
|
+
info["metadata"]["status_v2"] = "failed"
|
|
188
|
+
info["metadata"]["error"] = str(e)
|
|
189
|
+
f.seek(0)
|
|
190
|
+
json.dump(info, f, indent=2)
|
|
191
|
+
f.truncate()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def options_args(options: dict[str, Any] | None = None) -> list[str]:
|
|
195
|
+
"""
|
|
196
|
+
Converts options dictionary to a list of command-line arguments.
|
|
197
|
+
|
|
198
|
+
Parameters
|
|
199
|
+
----------
|
|
200
|
+
options : Optional[dict[str, Any]], optional
|
|
201
|
+
Additional options for the run, by default None.
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
list[str]
|
|
206
|
+
A list of command-line arguments derived from the options.
|
|
207
|
+
"""
|
|
208
|
+
option_args = []
|
|
209
|
+
|
|
210
|
+
if options is not None:
|
|
211
|
+
for key, value in options.items():
|
|
212
|
+
option_args.append(f"-{key}")
|
|
213
|
+
option_args.append(str(value))
|
|
214
|
+
|
|
215
|
+
return option_args
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def process_run_input(
|
|
219
|
+
temp_src: str,
|
|
220
|
+
run_format: str,
|
|
221
|
+
manifest: Manifest,
|
|
222
|
+
input_data: dict[str, Any] | str | None = None,
|
|
223
|
+
inputs_dir_path: str | None = None,
|
|
224
|
+
) -> str:
|
|
225
|
+
"""
|
|
226
|
+
In the temp source, writes the run input according to the run format. If
|
|
227
|
+
the format is `json` or `text`, then the input is not written anywhere,
|
|
228
|
+
rather, it is returned as a string in this function. If the format is
|
|
229
|
+
`csv-archive`, then the input files are written to an `input` directory. If
|
|
230
|
+
the format is `multi-file`, then the input files are written to an `inputs`
|
|
231
|
+
directory or to a custom location specified in the manifest.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
temp_src : str
|
|
236
|
+
The path to the temporary source directory.
|
|
237
|
+
run_format : str
|
|
238
|
+
The run format, one of `json`, `text`, `csv-archive`, or `multi-file`.
|
|
239
|
+
manifest : Manifest
|
|
240
|
+
The application manifest.
|
|
241
|
+
input_data : Optional[Union[dict[str, Any], str]], optional
|
|
242
|
+
The input data for the run, by default None. If `inputs_dir_path` is
|
|
243
|
+
provided, this parameter is ignored.
|
|
244
|
+
inputs_dir_path : Optional[str], optional
|
|
245
|
+
The path to the directory containing input files, by default None. If
|
|
246
|
+
provided, this parameter takes precedence over `input_data`.
|
|
247
|
+
|
|
248
|
+
Returns
|
|
249
|
+
-------
|
|
250
|
+
str
|
|
251
|
+
The input data as a string, if the format is `json` or `text`. Otherwise,
|
|
252
|
+
returns an empty string.
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
# For JSON and TEXT formats, we return the input data as a string.
|
|
256
|
+
if run_format in (InputFormat.JSON.value, InputFormat.TEXT.value):
|
|
257
|
+
if isinstance(input_data, dict) and run_format == InputFormat.JSON.value:
|
|
258
|
+
return json.dumps(input_data)
|
|
259
|
+
|
|
260
|
+
if isinstance(input_data, str) and run_format == InputFormat.TEXT.value:
|
|
261
|
+
return input_data
|
|
262
|
+
|
|
263
|
+
raise ValueError(f"invalid input data for format {run_format}")
|
|
264
|
+
|
|
265
|
+
if input_data is not None:
|
|
266
|
+
raise ValueError("input data must be None for csv-archive or multi-file format")
|
|
267
|
+
|
|
268
|
+
# For CSV-ARCHIVE format, we write the input files to an `input` directory.
|
|
269
|
+
if run_format == InputFormat.CSV_ARCHIVE.value:
|
|
270
|
+
input_dir = os.path.join(temp_src, "input")
|
|
271
|
+
os.makedirs(input_dir, exist_ok=True)
|
|
272
|
+
|
|
273
|
+
if inputs_dir_path is not None and inputs_dir_path != "":
|
|
274
|
+
shutil.copytree(inputs_dir_path, input_dir, dirs_exist_ok=True)
|
|
275
|
+
|
|
276
|
+
return ""
|
|
277
|
+
|
|
278
|
+
# For MULTI-FILE format, we write the input files to an `inputs` directory,
|
|
279
|
+
# or to a custom location specified in the manifest.
|
|
280
|
+
if run_format == InputFormat.MULTI_FILE.value:
|
|
281
|
+
inputs_dir = os.path.join(temp_src, INPUTS_KEY)
|
|
282
|
+
if (
|
|
283
|
+
manifest.configuration is not None
|
|
284
|
+
and manifest.configuration.content is not None
|
|
285
|
+
and manifest.configuration.content.format == InputFormat.MULTI_FILE
|
|
286
|
+
and manifest.configuration.content.multi_file is not None
|
|
287
|
+
):
|
|
288
|
+
inputs_dir = os.path.join(temp_src, manifest.configuration.content.multi_file.input.path)
|
|
289
|
+
|
|
290
|
+
os.makedirs(inputs_dir, exist_ok=True)
|
|
291
|
+
|
|
292
|
+
if inputs_dir_path is not None and inputs_dir_path != "":
|
|
293
|
+
shutil.copytree(inputs_dir_path, inputs_dir, dirs_exist_ok=True)
|
|
294
|
+
|
|
295
|
+
return ""
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def process_run_output(
|
|
299
|
+
manifest: Manifest,
|
|
300
|
+
run_id: str,
|
|
301
|
+
temp_src: str,
|
|
302
|
+
result: subprocess.CompletedProcess[str],
|
|
303
|
+
run_dir: str,
|
|
304
|
+
src: str,
|
|
305
|
+
) -> None:
|
|
306
|
+
"""
|
|
307
|
+
Processes the result of the subprocess run. This function is in charge of
|
|
308
|
+
handling the run results, including solutions, statistics, logs, assets,
|
|
309
|
+
and visuals.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
manifest : Manifest
|
|
314
|
+
The application manifest containing configuration details.
|
|
315
|
+
run_id : str
|
|
316
|
+
The unique identifier for the run.
|
|
317
|
+
temp_src : str
|
|
318
|
+
The path to the temporary source directory.
|
|
319
|
+
result : subprocess.CompletedProcess[str]
|
|
320
|
+
The result of the subprocess run containing stdout, stderr, and return code.
|
|
321
|
+
run_dir : str
|
|
322
|
+
The path to the run directory where outputs will be stored.
|
|
323
|
+
src : str
|
|
324
|
+
The path to the application source code.
|
|
325
|
+
"""
|
|
326
|
+
|
|
327
|
+
stdout_output = resolve_stdout(result)
|
|
328
|
+
|
|
329
|
+
# Create outputs directory.
|
|
330
|
+
outputs_dir = os.path.join(run_dir, OUTPUTS_KEY)
|
|
331
|
+
os.makedirs(outputs_dir, exist_ok=True)
|
|
332
|
+
temp_run_outputs_dir = os.path.join(temp_src, OUTPUTS_KEY)
|
|
333
|
+
|
|
334
|
+
output_format = resolve_output_format(
|
|
335
|
+
manifest=manifest,
|
|
336
|
+
temp_run_outputs_dir=temp_run_outputs_dir,
|
|
337
|
+
temp_src=temp_src,
|
|
338
|
+
)
|
|
339
|
+
process_run_information(
|
|
340
|
+
run_id=run_id,
|
|
341
|
+
run_dir=run_dir,
|
|
342
|
+
result=result,
|
|
343
|
+
)
|
|
344
|
+
process_run_logs(
|
|
345
|
+
output_format=output_format,
|
|
346
|
+
run_dir=run_dir,
|
|
347
|
+
result=result,
|
|
348
|
+
stdout_output=stdout_output,
|
|
349
|
+
)
|
|
350
|
+
process_run_statistics(
|
|
351
|
+
temp_run_outputs_dir=temp_run_outputs_dir,
|
|
352
|
+
outputs_dir=outputs_dir,
|
|
353
|
+
stdout_output=stdout_output,
|
|
354
|
+
temp_src=temp_src,
|
|
355
|
+
manifest=manifest,
|
|
356
|
+
)
|
|
357
|
+
process_run_assets(
|
|
358
|
+
temp_run_outputs_dir=temp_run_outputs_dir,
|
|
359
|
+
outputs_dir=outputs_dir,
|
|
360
|
+
stdout_output=stdout_output,
|
|
361
|
+
temp_src=temp_src,
|
|
362
|
+
manifest=manifest,
|
|
363
|
+
)
|
|
364
|
+
process_run_solutions(
|
|
365
|
+
run_id=run_id,
|
|
366
|
+
run_dir=run_dir,
|
|
367
|
+
temp_run_outputs_dir=temp_run_outputs_dir,
|
|
368
|
+
temp_src=temp_src,
|
|
369
|
+
outputs_dir=outputs_dir,
|
|
370
|
+
stdout_output=stdout_output,
|
|
371
|
+
output_format=output_format,
|
|
372
|
+
manifest=manifest,
|
|
373
|
+
src=src,
|
|
374
|
+
)
|
|
375
|
+
process_run_visuals(
|
|
376
|
+
run_dir=run_dir,
|
|
377
|
+
outputs_dir=outputs_dir,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def resolve_output_format(
|
|
382
|
+
manifest: Manifest,
|
|
383
|
+
temp_run_outputs_dir: str,
|
|
384
|
+
temp_src: str,
|
|
385
|
+
) -> OutputFormat:
|
|
386
|
+
"""
|
|
387
|
+
Resolves the output format of the run. This function checks the manifest
|
|
388
|
+
configuration for the output format. If not specified, it checks for the
|
|
389
|
+
presence of an `output` directory (for `csv-archive`), or an
|
|
390
|
+
`outputs/solutions` directory (for `multi-file`). If neither exist, it
|
|
391
|
+
defaults to `json`.
|
|
392
|
+
|
|
393
|
+
Parameters
|
|
394
|
+
----------
|
|
395
|
+
manifest : Manifest
|
|
396
|
+
The application manifest containing configuration details.
|
|
397
|
+
temp_run_outputs_dir : str
|
|
398
|
+
The path to the temporary outputs directory.
|
|
399
|
+
temp_src : str
|
|
400
|
+
The path to the temporary source directory.
|
|
401
|
+
|
|
402
|
+
Returns
|
|
403
|
+
-------
|
|
404
|
+
OutputFormat
|
|
405
|
+
The determined output format (JSON, CSV_ARCHIVE, or MULTI_FILE).
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
if manifest.configuration is not None and manifest.configuration.content is not None:
|
|
409
|
+
return manifest.configuration.content.format
|
|
410
|
+
|
|
411
|
+
output_dir = os.path.join(temp_src, OUTPUT_KEY)
|
|
412
|
+
if os.path.exists(output_dir) and os.path.isdir(output_dir):
|
|
413
|
+
return OutputFormat.CSV_ARCHIVE
|
|
414
|
+
|
|
415
|
+
solutions_dir = os.path.join(temp_run_outputs_dir, SOLUTIONS_KEY)
|
|
416
|
+
if os.path.exists(solutions_dir) and os.path.isdir(solutions_dir):
|
|
417
|
+
return OutputFormat.MULTI_FILE
|
|
418
|
+
|
|
419
|
+
return OutputFormat.JSON
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def process_run_information(run_id: str, run_dir: str, result: subprocess.CompletedProcess[str]) -> None:
|
|
423
|
+
"""
|
|
424
|
+
Processes the run information, updating properties such as duration and
|
|
425
|
+
status.
|
|
426
|
+
|
|
427
|
+
Parameters
|
|
428
|
+
----------
|
|
429
|
+
run_id : str
|
|
430
|
+
The ID of the run.
|
|
431
|
+
run_dir : str
|
|
432
|
+
The path to the run directory.
|
|
433
|
+
result : subprocess.CompletedProcess[str]
|
|
434
|
+
The result of the subprocess run.
|
|
435
|
+
"""
|
|
436
|
+
|
|
437
|
+
info_file = os.path.join(run_dir, f"{run_id}.json")
|
|
438
|
+
|
|
439
|
+
with open(info_file) as f:
|
|
440
|
+
info = json.load(f)
|
|
441
|
+
|
|
442
|
+
# Calculate duration.
|
|
443
|
+
created_at_str = info["metadata"]["created_at"]
|
|
444
|
+
created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
|
|
445
|
+
now = datetime.now(timezone.utc)
|
|
446
|
+
duration = round((now - created_at).total_seconds() * 1000, 1)
|
|
447
|
+
|
|
448
|
+
# Update the status
|
|
449
|
+
status = StatusV2.succeeded.value
|
|
450
|
+
error = ""
|
|
451
|
+
if result.returncode != 0:
|
|
452
|
+
status = StatusV2.failed.value
|
|
453
|
+
# Truncate error message so that Cloud does not complain.
|
|
454
|
+
error = (result.stderr.strip().replace("\n", " ") if result.stderr else "unknown error")[:60]
|
|
455
|
+
|
|
456
|
+
# Update the run info file.
|
|
457
|
+
info["metadata"]["duration"] = duration
|
|
458
|
+
info["metadata"]["status_v2"] = status
|
|
459
|
+
info["metadata"]["error"] = error
|
|
460
|
+
|
|
461
|
+
with open(info_file, "w") as f:
|
|
462
|
+
json.dump(info, f, indent=2)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def process_run_logs(
|
|
466
|
+
output_format: OutputFormat,
|
|
467
|
+
run_dir: str,
|
|
468
|
+
result: subprocess.CompletedProcess[str],
|
|
469
|
+
stdout_output: str | dict[str, Any],
|
|
470
|
+
) -> None:
|
|
471
|
+
"""
|
|
472
|
+
Processes the logs of the run. Writes the logs to a logs directory.
|
|
473
|
+
For multi-file format, stdout is written to logs if present.
|
|
474
|
+
|
|
475
|
+
Parameters
|
|
476
|
+
----------
|
|
477
|
+
output_format : OutputFormat
|
|
478
|
+
The output format of the run (JSON, CSV_ARCHIVE, or MULTI_FILE).
|
|
479
|
+
run_dir : str
|
|
480
|
+
The path to the run directory where logs will be stored.
|
|
481
|
+
result : subprocess.CompletedProcess[str]
|
|
482
|
+
The result of the subprocess run containing stderr output.
|
|
483
|
+
stdout_output : Union[str, dict[str, Any]]
|
|
484
|
+
The stdout output of the run, either as raw string or parsed dictionary.
|
|
485
|
+
"""
|
|
486
|
+
|
|
487
|
+
logs_dir = os.path.join(run_dir, LOGS_KEY)
|
|
488
|
+
os.makedirs(logs_dir, exist_ok=True)
|
|
489
|
+
std_err = result.stderr
|
|
490
|
+
with open(os.path.join(logs_dir, LOGS_FILE), "w") as f:
|
|
491
|
+
if output_format == OutputFormat.MULTI_FILE and bool(stdout_output):
|
|
492
|
+
if isinstance(stdout_output, dict):
|
|
493
|
+
f.write(json.dumps(stdout_output))
|
|
494
|
+
elif isinstance(stdout_output, str):
|
|
495
|
+
f.write(stdout_output)
|
|
496
|
+
|
|
497
|
+
if std_err:
|
|
498
|
+
f.write("\n")
|
|
499
|
+
|
|
500
|
+
f.write(std_err)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def process_run_statistics(
|
|
504
|
+
temp_run_outputs_dir: str,
|
|
505
|
+
outputs_dir: str,
|
|
506
|
+
stdout_output: str | dict[str, Any],
|
|
507
|
+
temp_src: str,
|
|
508
|
+
manifest: Manifest,
|
|
509
|
+
) -> None:
|
|
510
|
+
"""
|
|
511
|
+
Processes the statistics of the run. Checks for an outputs/statistics folder
|
|
512
|
+
or custom statistics file location from manifest. If found, copies to run
|
|
513
|
+
directory. Otherwise, attempts to extract statistics from stdout.
|
|
514
|
+
|
|
515
|
+
Parameters
|
|
516
|
+
----------
|
|
517
|
+
temp_run_outputs_dir : str
|
|
518
|
+
The path to the temporary outputs directory.
|
|
519
|
+
outputs_dir : str
|
|
520
|
+
The path to the outputs directory in the run directory.
|
|
521
|
+
stdout_output : Union[str, dict[str, Any]]
|
|
522
|
+
The stdout output of the run, either as raw string or parsed dictionary.
|
|
523
|
+
temp_src : str
|
|
524
|
+
The path to the temporary source directory.
|
|
525
|
+
manifest : Manifest
|
|
526
|
+
The application manifest containing configuration and custom paths.
|
|
527
|
+
"""
|
|
528
|
+
|
|
529
|
+
stats_dst = os.path.join(outputs_dir, STATISTICS_KEY)
|
|
530
|
+
os.makedirs(stats_dst, exist_ok=True)
|
|
531
|
+
statistics_file = f"{STATISTICS_KEY}.json"
|
|
532
|
+
|
|
533
|
+
# Check for custom location in manifest and override stats_src if needed.
|
|
534
|
+
if (
|
|
535
|
+
manifest.configuration is not None
|
|
536
|
+
and manifest.configuration.content is not None
|
|
537
|
+
and manifest.configuration.content.format == OutputFormat.MULTI_FILE
|
|
538
|
+
and manifest.configuration.content.multi_file is not None
|
|
539
|
+
):
|
|
540
|
+
stats_src_file = os.path.join(temp_src, manifest.configuration.content.multi_file.output.statistics)
|
|
541
|
+
|
|
542
|
+
# If the custom statistics file exists, copy it to the stats destination
|
|
543
|
+
if os.path.exists(stats_src_file) and os.path.isfile(stats_src_file):
|
|
544
|
+
stats_dst_file = os.path.join(stats_dst, statistics_file)
|
|
545
|
+
shutil.copy2(stats_src_file, stats_dst_file)
|
|
546
|
+
return
|
|
547
|
+
|
|
548
|
+
stats_src = os.path.join(temp_run_outputs_dir, STATISTICS_KEY)
|
|
549
|
+
if os.path.exists(stats_src) and os.path.isdir(stats_src):
|
|
550
|
+
shutil.copytree(stats_src, stats_dst, dirs_exist_ok=True)
|
|
551
|
+
return
|
|
552
|
+
|
|
553
|
+
if not isinstance(stdout_output, dict):
|
|
554
|
+
return
|
|
555
|
+
|
|
556
|
+
if STATISTICS_KEY not in stdout_output:
|
|
557
|
+
return
|
|
558
|
+
|
|
559
|
+
with open(os.path.join(stats_dst, statistics_file), "w") as f:
|
|
560
|
+
statistics = {STATISTICS_KEY: stdout_output[STATISTICS_KEY]}
|
|
561
|
+
json.dump(statistics, f, indent=2)
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def process_run_assets(
|
|
565
|
+
temp_run_outputs_dir: str,
|
|
566
|
+
outputs_dir: str,
|
|
567
|
+
stdout_output: str | dict[str, Any],
|
|
568
|
+
temp_src: str,
|
|
569
|
+
manifest: Manifest,
|
|
570
|
+
) -> None:
|
|
571
|
+
"""
|
|
572
|
+
Processes the assets of the run. Checks for an outputs/assets folder or
|
|
573
|
+
custom assets file location from manifest. If found, copies to run directory.
|
|
574
|
+
Otherwise, attempts to extract assets from stdout.
|
|
575
|
+
|
|
576
|
+
Parameters
|
|
577
|
+
----------
|
|
578
|
+
temp_run_outputs_dir : str
|
|
579
|
+
The path to the temporary outputs directory.
|
|
580
|
+
outputs_dir : str
|
|
581
|
+
The path to the outputs directory in the run directory.
|
|
582
|
+
stdout_output : Union[str, dict[str, Any]]
|
|
583
|
+
The stdout output of the run, either as raw string or parsed dictionary.
|
|
584
|
+
temp_src : str
|
|
585
|
+
The path to the temporary source directory.
|
|
586
|
+
manifest : Manifest
|
|
587
|
+
The application manifest containing configuration and custom paths.
|
|
588
|
+
"""
|
|
589
|
+
|
|
590
|
+
assets_dst = os.path.join(outputs_dir, ASSETS_KEY)
|
|
591
|
+
os.makedirs(assets_dst, exist_ok=True)
|
|
592
|
+
assets_file = f"{ASSETS_KEY}.json"
|
|
593
|
+
|
|
594
|
+
# Check for custom location in manifest and override assets_src if needed.
|
|
595
|
+
if (
|
|
596
|
+
manifest.configuration is not None
|
|
597
|
+
and manifest.configuration.content is not None
|
|
598
|
+
and manifest.configuration.content.format == OutputFormat.MULTI_FILE
|
|
599
|
+
and manifest.configuration.content.multi_file is not None
|
|
600
|
+
):
|
|
601
|
+
assets_src_file = os.path.join(temp_src, manifest.configuration.content.multi_file.output.assets)
|
|
602
|
+
|
|
603
|
+
# If the custom assets file exists, copy it to the assets destination
|
|
604
|
+
if os.path.exists(assets_src_file) and os.path.isfile(assets_src_file):
|
|
605
|
+
assets_dst_file = os.path.join(assets_dst, assets_file)
|
|
606
|
+
shutil.copy2(assets_src_file, assets_dst_file)
|
|
607
|
+
return
|
|
608
|
+
|
|
609
|
+
assets_src = os.path.join(temp_run_outputs_dir, ASSETS_KEY)
|
|
610
|
+
if os.path.exists(assets_src) and os.path.isdir(assets_src):
|
|
611
|
+
shutil.copytree(assets_src, assets_dst, dirs_exist_ok=True)
|
|
612
|
+
return
|
|
613
|
+
|
|
614
|
+
if not isinstance(stdout_output, dict):
|
|
615
|
+
return
|
|
616
|
+
|
|
617
|
+
if ASSETS_KEY not in stdout_output:
|
|
618
|
+
return
|
|
619
|
+
|
|
620
|
+
with open(os.path.join(assets_dst, assets_file), "w") as f:
|
|
621
|
+
assets = {ASSETS_KEY: stdout_output[ASSETS_KEY]}
|
|
622
|
+
json.dump(assets, f, indent=2)
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def process_run_solutions(
|
|
626
|
+
run_id: str,
|
|
627
|
+
run_dir: str,
|
|
628
|
+
temp_run_outputs_dir: str,
|
|
629
|
+
temp_src: str,
|
|
630
|
+
outputs_dir: str,
|
|
631
|
+
stdout_output: str | dict[str, Any],
|
|
632
|
+
output_format: OutputFormat,
|
|
633
|
+
manifest: Manifest,
|
|
634
|
+
src: str,
|
|
635
|
+
) -> None:
|
|
636
|
+
"""
|
|
637
|
+
Processes the solutions (output) of the run. Handles all different output
|
|
638
|
+
formats including CSV-archive, multi-file, JSON, and text. Looks for
|
|
639
|
+
`output` directory (csv-archive), `outputs/solutions` directory (multi-file),
|
|
640
|
+
or custom solutions path from manifest. Falls back to stdout for JSON/text.
|
|
641
|
+
Updates run metadata with output size and format information.
|
|
642
|
+
|
|
643
|
+
Only copies files that are truly new outputs, excluding files that already
|
|
644
|
+
exist in the original source code, inputs, statistics, or assets directories
|
|
645
|
+
to prevent copying application data as solutions.
|
|
646
|
+
|
|
647
|
+
Parameters
|
|
648
|
+
----------
|
|
649
|
+
run_id : str
|
|
650
|
+
The unique identifier of the run.
|
|
651
|
+
run_dir : str
|
|
652
|
+
The path to the run directory where outputs are stored.
|
|
653
|
+
temp_run_outputs_dir : str
|
|
654
|
+
The path to the temporary outputs directory.
|
|
655
|
+
temp_src : str
|
|
656
|
+
The path to the temporary source directory.
|
|
657
|
+
outputs_dir : str
|
|
658
|
+
The path to the outputs directory in the run directory.
|
|
659
|
+
stdout_output : Union[str, dict[str, Any]]
|
|
660
|
+
The stdout output of the run, either as raw string or parsed dictionary.
|
|
661
|
+
output_format : OutputFormat
|
|
662
|
+
The determined output format (JSON, CSV_ARCHIVE, MULTI_FILE, or TEXT).
|
|
663
|
+
manifest : Manifest
|
|
664
|
+
The application manifest containing configuration and custom paths.
|
|
665
|
+
src : str
|
|
666
|
+
The path to the application source code.
|
|
667
|
+
"""
|
|
668
|
+
|
|
669
|
+
info_file = os.path.join(run_dir, f"{run_id}.json")
|
|
670
|
+
|
|
671
|
+
with open(info_file) as f:
|
|
672
|
+
info = json.load(f)
|
|
673
|
+
|
|
674
|
+
solutions_dst = os.path.join(outputs_dir, SOLUTIONS_KEY)
|
|
675
|
+
os.makedirs(solutions_dst, exist_ok=True)
|
|
676
|
+
|
|
677
|
+
if output_format == OutputFormat.CSV_ARCHIVE:
|
|
678
|
+
output_src = os.path.join(temp_src, OUTPUT_KEY)
|
|
679
|
+
shutil.copytree(output_src, solutions_dst, dirs_exist_ok=True)
|
|
680
|
+
elif output_format == OutputFormat.MULTI_FILE:
|
|
681
|
+
solutions_src = os.path.join(temp_run_outputs_dir, SOLUTIONS_KEY)
|
|
682
|
+
if (
|
|
683
|
+
manifest.configuration is not None
|
|
684
|
+
and manifest.configuration.content is not None
|
|
685
|
+
and manifest.configuration.content.format == OutputFormat.MULTI_FILE
|
|
686
|
+
and manifest.configuration.content.multi_file is not None
|
|
687
|
+
):
|
|
688
|
+
solutions_src = os.path.join(temp_src, manifest.configuration.content.multi_file.output.solutions)
|
|
689
|
+
|
|
690
|
+
_copy_new_or_modified_files(
|
|
691
|
+
runtime_dir=solutions_src,
|
|
692
|
+
dst_dir=solutions_dst,
|
|
693
|
+
original_src_dir=src,
|
|
694
|
+
exclusion_dirs=[
|
|
695
|
+
os.path.join(outputs_dir, STATISTICS_KEY),
|
|
696
|
+
os.path.join(outputs_dir, ASSETS_KEY),
|
|
697
|
+
os.path.join(run_dir, INPUTS_KEY),
|
|
698
|
+
],
|
|
699
|
+
)
|
|
700
|
+
else:
|
|
701
|
+
if bool(stdout_output):
|
|
702
|
+
with open(os.path.join(solutions_dst, DEFAULT_OUTPUT_JSON_FILE), "w") as f:
|
|
703
|
+
if isinstance(stdout_output, dict):
|
|
704
|
+
json.dump(stdout_output, f, indent=2)
|
|
705
|
+
elif isinstance(stdout_output, str):
|
|
706
|
+
f.write(stdout_output)
|
|
707
|
+
|
|
708
|
+
# Update the run information file with the output size and type.
|
|
709
|
+
calculate_files_size(run_dir, run_id, solutions_dst, metadata_key="output_size")
|
|
710
|
+
info["metadata"]["format"]["output"] = {"type": output_format.value}
|
|
711
|
+
with open(info_file, "w") as f:
|
|
712
|
+
json.dump(info, f, indent=2)
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
def process_run_visuals(run_dir: str, outputs_dir: str) -> None:
|
|
716
|
+
"""
|
|
717
|
+
Processes the visuals from the assets in the run output. This function looks
|
|
718
|
+
for visual assets (Plotly and GeoJSON) in the assets.json file and generates
|
|
719
|
+
HTML files for each visual. ChartJS visuals are ignored for local runs.
|
|
720
|
+
|
|
721
|
+
Parameters
|
|
722
|
+
----------
|
|
723
|
+
run_dir : str
|
|
724
|
+
The path to the run directory where visuals will be stored.
|
|
725
|
+
outputs_dir : str
|
|
726
|
+
The path to the outputs directory in the run directory containing assets.
|
|
727
|
+
"""
|
|
728
|
+
|
|
729
|
+
# Get the assets.
|
|
730
|
+
assets_dir = os.path.join(outputs_dir, ASSETS_KEY)
|
|
731
|
+
if not os.path.exists(assets_dir):
|
|
732
|
+
return
|
|
733
|
+
|
|
734
|
+
assets_file = os.path.join(assets_dir, f"{ASSETS_KEY}.json")
|
|
735
|
+
if not os.path.exists(assets_file):
|
|
736
|
+
return
|
|
737
|
+
|
|
738
|
+
with open(assets_file) as f:
|
|
739
|
+
assets = json.load(f)
|
|
740
|
+
|
|
741
|
+
# Create visuals directory.
|
|
742
|
+
visuals_dir = os.path.join(run_dir, "visuals")
|
|
743
|
+
os.makedirs(visuals_dir, exist_ok=True)
|
|
744
|
+
|
|
745
|
+
# Loop over all the assets to find visual assets.
|
|
746
|
+
for asset_dict in assets.get(ASSETS_KEY, []):
|
|
747
|
+
asset = Asset.from_dict(asset_dict)
|
|
748
|
+
if asset.visual is None:
|
|
749
|
+
continue
|
|
750
|
+
|
|
751
|
+
if asset.visual.visual_schema == VisualSchema.PLOTLY:
|
|
752
|
+
handle_plotly_visual(asset, visuals_dir)
|
|
753
|
+
elif asset.visual.visual_schema == VisualSchema.GEOJSON:
|
|
754
|
+
handle_geojson_visual(asset, visuals_dir)
|
|
755
|
+
|
|
756
|
+
# ChartJS is not easily supported directly from Python in local runs,
|
|
757
|
+
# so we ignore it for now.
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
def resolve_stdout(result: subprocess.CompletedProcess[str]) -> str | dict[str, Any]:
|
|
761
|
+
"""
|
|
762
|
+
Resolves the stdout output of the subprocess run. If the stdout is valid
|
|
763
|
+
JSON, it returns the parsed dictionary. Otherwise, it returns the raw
|
|
764
|
+
string output.
|
|
765
|
+
|
|
766
|
+
Parameters
|
|
767
|
+
----------
|
|
768
|
+
result : subprocess.CompletedProcess[str]
|
|
769
|
+
The result of the subprocess run.
|
|
770
|
+
|
|
771
|
+
Returns
|
|
772
|
+
-------
|
|
773
|
+
Union[str, dict[str, Any]]
|
|
774
|
+
The parsed stdout output as a dictionary if valid JSON, otherwise the
|
|
775
|
+
raw string output.
|
|
776
|
+
"""
|
|
777
|
+
raw_output = result.stdout
|
|
778
|
+
if raw_output.strip() == "":
|
|
779
|
+
return ""
|
|
780
|
+
|
|
781
|
+
try:
|
|
782
|
+
return json.loads(raw_output)
|
|
783
|
+
except json.JSONDecodeError:
|
|
784
|
+
return raw_output
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
def _ignore_patterns(dir_path: str, names: list[str]) -> list[str]:
|
|
788
|
+
"""
|
|
789
|
+
Custom ignore function for copytree that filters files and directories
|
|
790
|
+
during source code copying. Excludes virtual environments, cache files,
|
|
791
|
+
the nextmv directory, and non-essential files while preserving Python
|
|
792
|
+
source files and application manifests.
|
|
793
|
+
|
|
794
|
+
Parameters
|
|
795
|
+
----------
|
|
796
|
+
dir_path : str
|
|
797
|
+
The path to the directory being processed.
|
|
798
|
+
names : list[str]
|
|
799
|
+
A list of file and directory names in the current directory.
|
|
800
|
+
|
|
801
|
+
Returns
|
|
802
|
+
-------
|
|
803
|
+
list[str]
|
|
804
|
+
A list of names to ignore during the copy operation.
|
|
805
|
+
"""
|
|
806
|
+
ignored = []
|
|
807
|
+
for name in names:
|
|
808
|
+
full_path = os.path.join(dir_path, name)
|
|
809
|
+
|
|
810
|
+
# Ignore nextmv directory
|
|
811
|
+
if name == NEXTMV_DIR:
|
|
812
|
+
ignored.append(name)
|
|
813
|
+
continue
|
|
814
|
+
|
|
815
|
+
# Ignore virtual environment directories
|
|
816
|
+
if re.match(r"^\.?(venv|env|virtualenv).*$", name):
|
|
817
|
+
ignored.append(name)
|
|
818
|
+
continue
|
|
819
|
+
|
|
820
|
+
# Ignore __pycache__ directories
|
|
821
|
+
if name == "__pycache__":
|
|
822
|
+
ignored.append(name)
|
|
823
|
+
continue
|
|
824
|
+
|
|
825
|
+
# If it's a file, only keep Python files and app.yaml
|
|
826
|
+
if os.path.isfile(full_path):
|
|
827
|
+
if not (name.endswith(".py") or name == "app.yaml"):
|
|
828
|
+
ignored.append(name)
|
|
829
|
+
continue
|
|
830
|
+
|
|
831
|
+
# Ignore .pyc files explicitly
|
|
832
|
+
if name.endswith(".pyc"):
|
|
833
|
+
ignored.append(name)
|
|
834
|
+
continue
|
|
835
|
+
|
|
836
|
+
return ignored
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def _copy_new_or_modified_files( # noqa: C901
|
|
840
|
+
runtime_dir: str,
|
|
841
|
+
dst_dir: str,
|
|
842
|
+
original_src_dir: str | None = None,
|
|
843
|
+
exclusion_dirs: list[str] | None = None,
|
|
844
|
+
) -> None:
|
|
845
|
+
"""
|
|
846
|
+
Copy only new or modified files from runtime directory to destination directory.
|
|
847
|
+
|
|
848
|
+
This function identifies files that are either new (not present in the original
|
|
849
|
+
source) or have been modified (different content, checksum, or modification time)
|
|
850
|
+
compared to the original source. It excludes files that exist in specified
|
|
851
|
+
exclusion directories to avoid copying input data, statistics, or assets as
|
|
852
|
+
solution outputs.
|
|
853
|
+
|
|
854
|
+
Parameters
|
|
855
|
+
----------
|
|
856
|
+
runtime_dir : str
|
|
857
|
+
The path to the runtime directory containing files to potentially copy.
|
|
858
|
+
dst_dir : str
|
|
859
|
+
The destination directory where new or modified files will be copied.
|
|
860
|
+
original_src_dir : Optional[str], optional
|
|
861
|
+
The path to the original source directory for comparison, by default None.
|
|
862
|
+
If None, all files from runtime_dir are considered new.
|
|
863
|
+
exclusion_dirs : Optional[list[str]], optional
|
|
864
|
+
List of directory paths containing files to exclude from copying,
|
|
865
|
+
by default None. Files matching those in exclusion directories will
|
|
866
|
+
not be copied even if they are new or modified.
|
|
867
|
+
"""
|
|
868
|
+
|
|
869
|
+
# Gather a list of the files that are created/modified in the runtime dir,
|
|
870
|
+
# this is, the directory where the actual executable code is run from.
|
|
871
|
+
runtime_files_rel = []
|
|
872
|
+
runtime_files_abs = []
|
|
873
|
+
for root, _, files in os.walk(runtime_dir):
|
|
874
|
+
# Skip __pycache__ directories
|
|
875
|
+
if "__pycache__" in root:
|
|
876
|
+
continue
|
|
877
|
+
|
|
878
|
+
for rel_file in files:
|
|
879
|
+
# Skip .pyc files
|
|
880
|
+
if rel_file.endswith(".pyc"):
|
|
881
|
+
continue
|
|
882
|
+
|
|
883
|
+
file_path = os.path.join(root, rel_file)
|
|
884
|
+
runtime_files_rel.append(os.path.relpath(file_path, runtime_dir))
|
|
885
|
+
runtime_files_abs.append(file_path)
|
|
886
|
+
|
|
887
|
+
# Gather a list of the files that exist in the original source dir. Given
|
|
888
|
+
# that the source dir is copied to the runtime dir before execution, we can
|
|
889
|
+
# use this to determine which files are new or modified.
|
|
890
|
+
original_src_files_rel = set()
|
|
891
|
+
if original_src_dir is not None:
|
|
892
|
+
for root, _, files in os.walk(original_src_dir):
|
|
893
|
+
for rel_file in files:
|
|
894
|
+
file_path = os.path.join(root, rel_file)
|
|
895
|
+
original_src_files_rel.add(os.path.relpath(file_path, original_src_dir))
|
|
896
|
+
|
|
897
|
+
# Gather a list of the files that exist in the exclusion dirs. This is used
|
|
898
|
+
# to avoid copying files that are part of this special exclusion set.
|
|
899
|
+
exclusion_files_rel = set()
|
|
900
|
+
if exclusion_dirs is not None:
|
|
901
|
+
for exclusion_dir in exclusion_dirs:
|
|
902
|
+
for root, _, files in os.walk(exclusion_dir):
|
|
903
|
+
for rel_file in files:
|
|
904
|
+
file_path = os.path.join(root, rel_file)
|
|
905
|
+
exclusion_files_rel.add(os.path.relpath(file_path, exclusion_dir))
|
|
906
|
+
|
|
907
|
+
# Now we filter the runtime files to only keep those that are new or
|
|
908
|
+
# modified compared to the original source files.
|
|
909
|
+
files_before_exclusion = []
|
|
910
|
+
for ix, rel_file in enumerate(runtime_files_rel):
|
|
911
|
+
abs_file = runtime_files_abs[ix]
|
|
912
|
+
|
|
913
|
+
# If the file is net new, we keep it.
|
|
914
|
+
if rel_file not in original_src_files_rel:
|
|
915
|
+
files_before_exclusion.append(abs_file)
|
|
916
|
+
continue
|
|
917
|
+
|
|
918
|
+
# If content of the file is different, we keep it.
|
|
919
|
+
runtime_checksum = _calculate_file_checksum(abs_file)
|
|
920
|
+
original_abs_file = os.path.join(original_src_dir, rel_file)
|
|
921
|
+
original_checksum = _calculate_file_checksum(original_abs_file)
|
|
922
|
+
if runtime_checksum != original_checksum:
|
|
923
|
+
files_before_exclusion.append(abs_file)
|
|
924
|
+
continue
|
|
925
|
+
|
|
926
|
+
# If content of the file is the same, but the date is newer, we keep it.
|
|
927
|
+
src_mtime = os.path.getmtime(abs_file)
|
|
928
|
+
original_mtime = os.path.getmtime(original_abs_file)
|
|
929
|
+
if src_mtime > original_mtime:
|
|
930
|
+
files_before_exclusion.append(abs_file)
|
|
931
|
+
continue
|
|
932
|
+
|
|
933
|
+
# Now we filter out any files that are part of the exclusion set.
|
|
934
|
+
final_files = []
|
|
935
|
+
if exclusion_dirs is not None:
|
|
936
|
+
for file in files_before_exclusion:
|
|
937
|
+
rel_file = os.path.relpath(file, runtime_dir)
|
|
938
|
+
if rel_file in exclusion_files_rel:
|
|
939
|
+
continue
|
|
940
|
+
|
|
941
|
+
final_files.append(file)
|
|
942
|
+
else:
|
|
943
|
+
final_files = files_before_exclusion
|
|
944
|
+
|
|
945
|
+
# Now that we have a clean list of files that we are going to copy, we
|
|
946
|
+
# proceed to copy them over to the destination directory.
|
|
947
|
+
for file in final_files:
|
|
948
|
+
rel_file = os.path.relpath(file, runtime_dir)
|
|
949
|
+
dst_file = os.path.join(dst_dir, rel_file)
|
|
950
|
+
|
|
951
|
+
# Create the directory structure if it doesn't exist
|
|
952
|
+
dst_file_dir = os.path.dirname(dst_file)
|
|
953
|
+
os.makedirs(dst_file_dir, exist_ok=True)
|
|
954
|
+
|
|
955
|
+
# Copy the file
|
|
956
|
+
shutil.copy2(file, dst_file)
|
|
957
|
+
|
|
958
|
+
# Finally, we remove any empty directories that might have been created.
|
|
959
|
+
_remove_empty_directories(dst_dir)
|
|
960
|
+
|
|
961
|
+
|
|
962
|
+
def _remove_empty_directories(directory: str) -> None:
|
|
963
|
+
"""
|
|
964
|
+
Recursively remove empty directories starting from the given directory.
|
|
965
|
+
|
|
966
|
+
This function walks the directory tree bottom-up and removes any directories
|
|
967
|
+
that are empty after all files have been processed. It preserves the root
|
|
968
|
+
directory even if it's empty.
|
|
969
|
+
|
|
970
|
+
Parameters
|
|
971
|
+
----------
|
|
972
|
+
directory : str
|
|
973
|
+
The root directory path to start cleaning from.
|
|
974
|
+
"""
|
|
975
|
+
for root, dirs, files in os.walk(directory, topdown=False):
|
|
976
|
+
# Skip the root directory itself
|
|
977
|
+
if root == directory:
|
|
978
|
+
continue
|
|
979
|
+
|
|
980
|
+
# If directory is empty (no files and no subdirectories), remove it
|
|
981
|
+
if not files and not dirs:
|
|
982
|
+
try:
|
|
983
|
+
os.rmdir(root)
|
|
984
|
+
except OSError:
|
|
985
|
+
# Directory might not be empty due to hidden files or permissions
|
|
986
|
+
pass
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
def _calculate_file_checksum(file_path: str) -> str:
|
|
990
|
+
"""
|
|
991
|
+
Calculate MD5 checksum of a file.
|
|
992
|
+
|
|
993
|
+
Parameters
|
|
994
|
+
----------
|
|
995
|
+
file_path : str
|
|
996
|
+
The path to the file.
|
|
997
|
+
|
|
998
|
+
Returns
|
|
999
|
+
-------
|
|
1000
|
+
str
|
|
1001
|
+
The MD5 checksum of the file.
|
|
1002
|
+
"""
|
|
1003
|
+
hash_md5 = hashlib.md5()
|
|
1004
|
+
with open(file_path, "rb") as f:
|
|
1005
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
|
1006
|
+
hash_md5.update(chunk)
|
|
1007
|
+
return hash_md5.hexdigest()
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
def __determine_entrypoint(manifest: Manifest) -> str:
|
|
1011
|
+
"""Returns the default entrypoint based on the runtime if not explicitly set."""
|
|
1012
|
+
if manifest.execution is not None and manifest.execution.entrypoint is not None:
|
|
1013
|
+
return manifest.execution.entrypoint
|
|
1014
|
+
|
|
1015
|
+
# Determine default entrypoint based on type
|
|
1016
|
+
if manifest.type == ManifestType.PYTHON:
|
|
1017
|
+
return "./main.py"
|
|
1018
|
+
elif manifest.type == ManifestType.GO:
|
|
1019
|
+
return "./main"
|
|
1020
|
+
elif manifest.type == ManifestType.JAVA:
|
|
1021
|
+
return "./main.jar"
|
|
1022
|
+
else:
|
|
1023
|
+
raise ValueError(
|
|
1024
|
+
f'entrypoint is not provided but the app type "{manifest.type}" could not '
|
|
1025
|
+
"be resolved to establish a default entrypoint"
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
def __determine_cwd(manifest: Manifest, default: str) -> str:
|
|
1030
|
+
"""
|
|
1031
|
+
Returns the working directory based on the manifest if set, otherwise the default.
|
|
1032
|
+
"""
|
|
1033
|
+
if manifest.execution is not None and manifest.execution.cwd is not None:
|
|
1034
|
+
return manifest.execution.cwd
|
|
1035
|
+
|
|
1036
|
+
return default
|
|
1037
|
+
|
|
1038
|
+
|
|
1039
|
+
if __name__ == "__main__":
|
|
1040
|
+
main()
|