nextmv 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,323 @@
1
+ """
2
+ GeoJSON visualization handler module.
3
+
4
+ This module provides functionality to handle GeoJSON visualizations by converting
5
+ them to interactive HTML maps using Folium. It supports various GeoJSON formats
6
+ and automatically calculates optimal map positioning and zoom levels.
7
+
8
+ Functions
9
+ ---------
10
+ handle_geojson_visual
11
+ Handle and write GeoJSON visuals to HTML files.
12
+ extract_coordinates
13
+ Recursively extract coordinates from nested coordinate structures.
14
+ calculate_map_center_and_zoom
15
+ Calculate the optimal center and zoom level for a GeoJSON map.
16
+ extract_geojson_fields
17
+ Extract available fields for tooltip and popup from GeoJSON data.
18
+ create_geojson_map
19
+ Create a folium map with GeoJSON data.
20
+ """
21
+
22
+ import json
23
+ import os
24
+
25
+ import folium
26
+
27
+ from nextmv.logger import log
28
+ from nextmv.output import Asset
29
+
30
+
31
+ def handle_geojson_visual(asset: Asset, visuals_dir: str) -> None:
32
+ """
33
+ Handle and write GeoJSON visuals to HTML files.
34
+
35
+ This function processes GeoJSON visualization assets and converts them to
36
+ interactive HTML maps using Folium. It handles multiple content formats
37
+ including dictionaries, lists, and JSON strings. Each visualization is
38
+ converted to a map with appropriate positioning and saved as an HTML file.
39
+
40
+ Parameters
41
+ ----------
42
+ asset : Asset
43
+ The asset containing the GeoJSON visualization data. The content can be
44
+ a dictionary (single GeoJSON), a list (multiple GeoJSONs), or a JSON
45
+ string representation.
46
+ visuals_dir : str
47
+ The directory path where the HTML files will be written.
48
+
49
+ Notes
50
+ -----
51
+ - For list content, each GeoJSON is saved with an index suffix
52
+ (e.g., "map_0.html", "map_1.html")
53
+ - For dict content, the GeoJSON is saved with the asset label
54
+ (e.g., "map.html")
55
+ - String content is parsed as JSON before processing
56
+ - Invalid JSON strings or unsupported content types are ignored with
57
+ appropriate logging
58
+ """
59
+ if isinstance(asset.content, list):
60
+ for ix, content in enumerate(asset.content):
61
+ if isinstance(content, dict):
62
+ layer_name = f"{asset.visual.label} Layer {ix + 1}"
63
+ m = create_geojson_map(content, layer_name)
64
+ m.save(os.path.join(visuals_dir, f"{asset.visual.label}_{ix}.html"))
65
+ return
66
+
67
+ if isinstance(asset.content, dict):
68
+ layer_name = f"{asset.visual.label} Layer"
69
+ m = create_geojson_map(asset.content, layer_name)
70
+ m.save(os.path.join(visuals_dir, f"{asset.visual.label}.html"))
71
+ return
72
+
73
+ if isinstance(asset.content, str):
74
+ try:
75
+ geojson_data = json.loads(asset.content)
76
+ layer_name = f"{asset.visual.label} Layer"
77
+ m = create_geojson_map(geojson_data, layer_name)
78
+ m.save(os.path.join(visuals_dir, f"{asset.visual.label}.html"))
79
+ except json.JSONDecodeError:
80
+ log(f"Warning: Could not parse GeoJSON string content for {asset.visual.label}")
81
+ return
82
+
83
+ # If there is a different content type for geojson visuals, we ignore it for now
84
+
85
+
86
+ def extract_coordinates(coords, all_coords) -> None:
87
+ """
88
+ Recursively extract coordinates from nested coordinate structures.
89
+
90
+ This function traverses nested coordinate structures commonly found in
91
+ GeoJSON geometries and extracts all coordinate pairs. It handles various
92
+ geometry types by recursively processing nested arrays until it finds
93
+ coordinate pairs in [longitude, latitude] format.
94
+
95
+ Parameters
96
+ ----------
97
+ coords : list or tuple
98
+ The coordinate structure to extract from. Can be a nested list/tuple
99
+ containing coordinate pairs or other nested structures.
100
+ all_coords : list
101
+ A list to accumulate all extracted coordinate pairs. This list is
102
+ modified in-place to store [longitude, latitude] pairs.
103
+
104
+ Notes
105
+ -----
106
+ - Coordinate pairs are identified as lists/tuples with exactly 2 numeric
107
+ elements
108
+ - The function expects coordinates in [longitude, latitude] format as per
109
+ GeoJSON specification
110
+ - Nested structures are recursively processed to handle complex geometries
111
+ like Polygons and MultiPolygons
112
+ """
113
+ if isinstance(coords, list):
114
+ if len(coords) == 2 and isinstance(coords[0], (int, float)) and isinstance(coords[1], (int, float)):
115
+ # This is a coordinate pair [lon, lat]
116
+ all_coords.append(coords)
117
+ else:
118
+ # This is a nested structure, recurse
119
+ for coord in coords:
120
+ extract_coordinates(coord, all_coords)
121
+
122
+
123
+ def calculate_map_center_and_zoom(geojson_data: dict) -> tuple[float, float, int]:
124
+ """
125
+ Calculate the optimal center and zoom level for a GeoJSON map.
126
+
127
+ This function analyzes the geographic extent of GeoJSON features to
128
+ determine the best map center point and zoom level for visualization.
129
+ It extracts all coordinates from the features, calculates the centroid,
130
+ and determines an appropriate zoom level based on the data's geographic
131
+ spread.
132
+
133
+ Parameters
134
+ ----------
135
+ geojson_data : dict
136
+ A GeoJSON object containing features with geometric data. Should
137
+ follow the GeoJSON specification with a "features" key containing
138
+ an array of feature objects.
139
+
140
+ Returns
141
+ -------
142
+ tuple[float, float, int]
143
+ A tuple containing (center_latitude, center_longitude, zoom_level).
144
+ - center_latitude : float
145
+ The latitude coordinate for the map center
146
+ - center_longitude : float
147
+ The longitude coordinate for the map center
148
+ - zoom_level : int
149
+ The recommended zoom level (typically 4-12)
150
+
151
+ Notes
152
+ -----
153
+ - Default center is New York City (40.7128, -74.0060) with zoom level 12
154
+ - Zoom levels are calculated based on coordinate range:
155
+ - Range > 10 degrees: zoom level 4 (continental view)
156
+ - Range > 1 degree: zoom level 8 (regional view)
157
+ - Range > 0.1 degree: zoom level 10 (city view)
158
+ - Smaller ranges: zoom level 12 (neighborhood view)
159
+ - Falls back to defaults if no valid coordinates are found or errors occur
160
+ """
161
+ default_lat, default_lon, default_zoom = 40.7128, -74.0060, 12
162
+
163
+ try:
164
+ if "features" not in geojson_data or not geojson_data["features"]:
165
+ return default_lat, default_lon, default_zoom
166
+
167
+ # Calculate bounds from all features
168
+ all_coords = []
169
+ for feature in geojson_data["features"]:
170
+ if feature.get("geometry", {}).get("coordinates"):
171
+ coords = feature["geometry"]["coordinates"]
172
+ extract_coordinates(coords, all_coords)
173
+
174
+ if not all_coords:
175
+ return default_lat, default_lon, default_zoom
176
+
177
+ lats = [coord[1] for coord in all_coords]
178
+ lons = [coord[0] for coord in all_coords]
179
+ center_lat = sum(lats) / len(lats)
180
+ center_lon = sum(lons) / len(lons)
181
+
182
+ # Adjust zoom based on coordinate spread
183
+ lat_range = max(lats) - min(lats)
184
+ lon_range = max(lons) - min(lons)
185
+ max_range = max(lat_range, lon_range)
186
+
187
+ if max_range > 10:
188
+ zoom_level = 4
189
+ elif max_range > 1:
190
+ zoom_level = 8
191
+ elif max_range > 0.1:
192
+ zoom_level = 10
193
+ else:
194
+ zoom_level = default_zoom
195
+
196
+ return center_lat, center_lon, zoom_level
197
+
198
+ except (KeyError, TypeError, ValueError, IndexError) as e:
199
+ log(f"Warning: Error calculating map center and zoom from GeoJSON data: {e}")
200
+ return default_lat, default_lon, default_zoom
201
+ except Exception as e:
202
+ log(f"Warning: Unexpected error calculating map center and zoom: {e}")
203
+ return default_lat, default_lon, default_zoom
204
+
205
+
206
+ def extract_geojson_fields(geojson_data: dict) -> tuple[list[str], list[str]]:
207
+ """
208
+ Extract available fields for tooltip and popup from GeoJSON data.
209
+
210
+ This function analyzes the properties of GeoJSON features to identify
211
+ suitable fields for displaying in map tooltips and popups. It prioritizes
212
+ common field names and limits the number of fields to maintain usability.
213
+
214
+ Parameters
215
+ ----------
216
+ geojson_data : dict
217
+ A GeoJSON object containing features with properties. Should follow
218
+ the GeoJSON specification with features containing properties objects.
219
+
220
+ Returns
221
+ -------
222
+ tuple[list[str], list[str]]
223
+ A tuple containing (tooltip_fields, popup_fields).
224
+ - tooltip_fields : list[str]
225
+ List of field names suitable for tooltips (max 3 fields)
226
+ - popup_fields : list[str]
227
+ List of field names suitable for popups (max 5 fields)
228
+
229
+ Notes
230
+ -----
231
+ - Prioritizes common field names: "name", "title", "label", "id",
232
+ "popupContent", "description"
233
+ - Tooltip fields are limited to 3 to prevent overcrowding
234
+ - Popup fields are limited to 5 to maintain readability
235
+ - Returns empty lists if no features or properties are found
236
+ - Gracefully handles malformed GeoJSON data by returning empty lists
237
+ """
238
+ tooltip_fields = []
239
+ popup_fields = []
240
+
241
+ try:
242
+ if "features" not in geojson_data or not geojson_data["features"]:
243
+ return tooltip_fields, popup_fields
244
+
245
+ # Get fields from the first feature's properties
246
+ first_feature = geojson_data["features"][0]
247
+ if "properties" not in first_feature or not first_feature["properties"]:
248
+ return tooltip_fields, popup_fields
249
+
250
+ available_fields = list(first_feature["properties"].keys())
251
+ # Prioritize common field names for tooltip/popup
252
+ priority_fields = ["name", "title", "label", "id", "popupContent", "description"]
253
+
254
+ for field in priority_fields:
255
+ if field in available_fields:
256
+ tooltip_fields.append(field)
257
+ popup_fields.append(field)
258
+
259
+ # Add remaining fields up to a reasonable limit
260
+ for field in available_fields:
261
+ if field not in tooltip_fields and len(tooltip_fields) < 3:
262
+ tooltip_fields.append(field)
263
+ if field not in popup_fields and len(popup_fields) < 5:
264
+ popup_fields.append(field)
265
+
266
+ except (KeyError, TypeError, IndexError) as e:
267
+ log(f"Warning: Error extracting GeoJSON fields: {e}")
268
+ except Exception as e:
269
+ log(f"Warning: Unexpected error extracting GeoJSON fields: {e}")
270
+
271
+ return tooltip_fields, popup_fields
272
+
273
+
274
+ def create_geojson_map(geojson_data: dict, layer_name: str = "GeoJSON Layer") -> folium.Map:
275
+ """
276
+ Create a folium map with GeoJSON data.
277
+
278
+ This function creates an interactive map using Folium with the provided
279
+ GeoJSON data. It automatically calculates the optimal center point and
280
+ zoom level, extracts relevant fields for tooltips and popups, and
281
+ configures the map with appropriate interactive features.
282
+
283
+ Parameters
284
+ ----------
285
+ geojson_data : dict
286
+ A GeoJSON object containing the geographic data to display. Should
287
+ follow the GeoJSON specification with features and geometries.
288
+ layer_name : str, optional
289
+ The name to assign to the GeoJSON layer in the map, by default
290
+ "GeoJSON Layer". This name appears in the layer control widget.
291
+
292
+ Returns
293
+ -------
294
+ folium.Map
295
+ A configured Folium map object with the GeoJSON data added as a layer.
296
+ The map includes tooltips, popups, and layer controls when applicable.
297
+
298
+ Notes
299
+ -----
300
+ - Map center and zoom are automatically calculated based on the data extent
301
+ - Tooltips are added if suitable fields are found in feature properties
302
+ - Popups are added if suitable fields are found in feature properties
303
+ - Layer control is always added to allow toggling of the GeoJSON layer
304
+ - The map uses default Folium styling and can be further customized
305
+ """
306
+ center_lat, center_lon, zoom_level = calculate_map_center_and_zoom(geojson_data)
307
+ tooltip_fields, popup_fields = extract_geojson_fields(geojson_data)
308
+
309
+ m = folium.Map(location=[center_lat, center_lon], zoom_start=zoom_level)
310
+
311
+ # Create GeoJson layer with dynamic tooltip and popup configuration
312
+ geojson_kwargs = {"name": layer_name}
313
+
314
+ if tooltip_fields:
315
+ geojson_kwargs["tooltip"] = folium.GeoJsonTooltip(fields=tooltip_fields)
316
+
317
+ if popup_fields:
318
+ geojson_kwargs["popup"] = folium.GeoJsonPopup(fields=popup_fields)
319
+
320
+ folium.GeoJson(geojson_data, **geojson_kwargs).add_to(m)
321
+ folium.LayerControl().add_to(m)
322
+
323
+ return m
@@ -0,0 +1,61 @@
1
+ """
2
+ Plotly visualization handler module.
3
+
4
+ This module provides functionality to handle Plotly visualizations by converting
5
+ them to HTML files for local run processing.
6
+
7
+ Functions
8
+ ---------
9
+ handle_plotly_visual
10
+ Handle and write Plotly visuals to HTML files.
11
+ """
12
+
13
+ import json
14
+ import os
15
+
16
+ import plotly.io as pio
17
+
18
+ from nextmv.output import Asset
19
+
20
+
21
+ def handle_plotly_visual(asset: Asset, visuals_dir: str) -> None:
22
+ """
23
+ Handle and write Plotly visuals to HTML files.
24
+
25
+ This function processes Plotly visualization assets and converts them to
26
+ HTML files. It handles both single visualizations (dict content) and
27
+ multiple visualizations (list content). Each visualization is converted
28
+ from JSON format to a Plotly figure and then saved as an HTML file.
29
+
30
+ Parameters
31
+ ----------
32
+ asset : Asset
33
+ The asset containing the Plotly visualization data. The content can be
34
+ either a dictionary (single visualization) or a list (multiple
35
+ visualizations).
36
+ visuals_dir : str
37
+ The directory path where the HTML files will be written.
38
+
39
+ Notes
40
+ -----
41
+ - For list content, each visualization is saved with an index suffix
42
+ (e.g., "chart_0.html", "chart_1.html")
43
+ - For dict content, the visualization is saved with the asset label
44
+ (e.g., "chart.html")
45
+ - Content types other than dict or list are currently ignored
46
+ """
47
+ if isinstance(asset.content, list):
48
+ for ix, content in enumerate(asset.content):
49
+ fig = pio.from_json(json.dumps(content))
50
+ fig.write_html(os.path.join(visuals_dir, f"{asset.visual.label}_{ix}.html"))
51
+
52
+ return
53
+
54
+ if isinstance(asset.content, dict):
55
+ fig = pio.from_json(json.dumps(asset.content))
56
+ fig.write_html(os.path.join(visuals_dir, f"{asset.visual.label}.html"))
57
+
58
+ return
59
+
60
+ # If there is a different content type for plotly visuals, we ignore it for
61
+ # now.
nextmv/local/runner.py ADDED
@@ -0,0 +1,312 @@
1
+ """
2
+ Runner module for executing local runs.
3
+
4
+ This module provides functionality to execute local runs.
5
+
6
+ Functions
7
+ ---------
8
+ run
9
+ Function to execute a local run.
10
+ new_run
11
+ Function to initialize a new run.
12
+ record_input
13
+ Function to write the input to the appropriate location.
14
+ calculate_files_size
15
+ Function to calculate the total size of files in a directory.
16
+ """
17
+
18
+ import importlib.util
19
+ import json
20
+ import os
21
+ import shutil
22
+ import subprocess
23
+ import sys
24
+ from datetime import datetime, timezone
25
+ from typing import Any, Optional, Union
26
+
27
+ from nextmv.input import DEFAULT_INPUT_JSON_FILE, INPUTS_KEY
28
+ from nextmv.manifest import Manifest
29
+ from nextmv.run import Format, FormatInput, Metadata, RunInformation, StatusV2
30
+ from nextmv.safe import safe_id
31
+
32
+
33
+ def run(
34
+ app_id: str,
35
+ src: str,
36
+ manifest: Manifest,
37
+ run_config: dict[str, Any],
38
+ name: Optional[str] = None,
39
+ description: Optional[str] = None,
40
+ input_data: Optional[Union[dict[str, Any], str]] = None,
41
+ inputs_dir_path: Optional[str] = None,
42
+ options: Optional[dict[str, Any]] = None,
43
+ ) -> str:
44
+ """
45
+ Execute a local run.
46
+
47
+ This method recreates, partially, what the Nextmv Cloud does in the backend
48
+ when running an application. A run ID is generated, a run directory is
49
+ created, and the input data is recorded. Then, a subprocess is started to
50
+ execute the application run in a detached manner. This means that the
51
+ application run is not waited upon.
52
+
53
+ Parameters
54
+ ----------
55
+ app_id : str
56
+ The ID of the application.
57
+ src : str
58
+ The path to the application source code.
59
+ manifest : Manifest
60
+ The application manifest.
61
+ run_config : dict[str, Any]
62
+ The run configuration.
63
+ name : Optional[str], optional
64
+ The name for the run, by default None.
65
+ description : Optional[str], optional
66
+ The description for the run, by default None.
67
+ input_data : Optional[Union[dict[str, Any], str]], optional
68
+ The input data for the run, by default None. If `inputs_dir_path` is
69
+ provided, this parameter is ignored.
70
+ inputs_dir_path : Optional[str], optional
71
+ The path to the directory containing input files, by default None. If
72
+ provided, this parameter takes precedence over `input_data`.
73
+ options : Optional[dict[str, Any]], optional
74
+ Additional options for the run, by default None.
75
+
76
+ Returns
77
+ -------
78
+ str
79
+ The ID of the created run.
80
+ """
81
+
82
+ # Check for required optional dependencies
83
+ missing_deps = []
84
+ if importlib.util.find_spec("folium") is None:
85
+ missing_deps.append("folium")
86
+ if importlib.util.find_spec("plotly") is None:
87
+ missing_deps.append("plotly")
88
+
89
+ if missing_deps:
90
+ raise ImportError(
91
+ f"{' and '.join(missing_deps)} {'is' if len(missing_deps) == 1 else 'are'} not installed. "
92
+ "Please install optional dependencies with `pip install nextmv[all]`"
93
+ )
94
+
95
+ # Initialize the run: create the ID, dir, and write the input.
96
+ run_id = safe_id("local")
97
+ run_dir = new_run(
98
+ app_id=app_id,
99
+ src=src,
100
+ run_id=run_id,
101
+ run_config=run_config,
102
+ name=name,
103
+ description=description,
104
+ )
105
+ record_input(
106
+ run_dir=run_dir,
107
+ run_id=run_id,
108
+ input_data=input_data,
109
+ inputs_dir_path=inputs_dir_path,
110
+ )
111
+
112
+ # Start the process as a daemon (detached) so we don't wait for it to
113
+ # finish. We send the input via stdin and close it immediately without
114
+ # waiting. We call the `executor.py` script to do the actual execution.
115
+ stdin_input = json.dumps(
116
+ {
117
+ "run_id": run_id,
118
+ "src": os.path.abspath(src),
119
+ "manifest_dict": manifest.to_dict(),
120
+ "run_dir": os.path.abspath(run_dir),
121
+ "run_config": run_config,
122
+ "input_data": input_data,
123
+ "inputs_dir_path": os.path.abspath(inputs_dir_path) if inputs_dir_path is not None else None,
124
+ "options": options,
125
+ }
126
+ )
127
+ args = [sys.executable, "executor.py"]
128
+ process = subprocess.Popen(
129
+ args,
130
+ env=os.environ,
131
+ text=True,
132
+ stdin=subprocess.PIPE,
133
+ stdout=subprocess.DEVNULL,
134
+ stderr=subprocess.DEVNULL,
135
+ cwd=os.path.dirname(__file__),
136
+ start_new_session=True, # Detach from parent process
137
+ )
138
+ process.stdin.write(stdin_input)
139
+ process.stdin.close()
140
+
141
+ return run_id
142
+
143
+
144
+ def new_run(
145
+ app_id: str,
146
+ src: str,
147
+ run_id: str,
148
+ run_config: dict[str, Any],
149
+ name: Optional[str] = None,
150
+ description: Optional[str] = None,
151
+ ) -> str:
152
+ """
153
+ Initializes a new run.
154
+
155
+ The run information is recorded in a JSON file within the run directory.
156
+
157
+ Parameters
158
+ ----------
159
+ app_id : str
160
+ The ID of the application.
161
+ src : str
162
+ The path to the application source code.
163
+ run_id : str
164
+ The ID of the run.
165
+ run_config : dict[str, Any]
166
+ The run configuration.
167
+ name : Optional[str], optional
168
+ The name for the run, by default None.
169
+ description : Optional[str], optional
170
+ The description for the run, by default None.
171
+
172
+ Returns
173
+ -------
174
+ str
175
+ The path to the new run directory.
176
+ """
177
+
178
+ # First, ensure the runs directory exists.
179
+ runs_dir = os.path.join(src, ".nextmv", "runs")
180
+ os.makedirs(runs_dir, exist_ok=True)
181
+
182
+ # Create a new run directory.
183
+ run_dir = os.path.join(runs_dir, run_id)
184
+ os.makedirs(run_dir, exist_ok=True)
185
+
186
+ # Create the run information file.
187
+ created_at = datetime.now(timezone.utc)
188
+ metadata = Metadata(
189
+ application_id=app_id,
190
+ application_instance_id="",
191
+ application_version_id="",
192
+ created_at=created_at,
193
+ duration=0.0,
194
+ error="",
195
+ input_size=0.0,
196
+ output_size=0.0,
197
+ format=Format(
198
+ format_input=FormatInput(
199
+ input_type=run_config["format"]["input"]["type"],
200
+ ),
201
+ ),
202
+ status_v2=StatusV2.queued,
203
+ )
204
+
205
+ if description is None:
206
+ description = f"Local run created at {created_at.isoformat().replace('+00:00', 'Z')}"
207
+
208
+ if name is None:
209
+ name = f"local run {run_id}"
210
+
211
+ information = RunInformation(
212
+ description=description,
213
+ id=run_id,
214
+ metadata=metadata,
215
+ name=name,
216
+ user_email="",
217
+ )
218
+ with open(os.path.join(run_dir, f"{run_id}.json"), "w") as f:
219
+ json.dump(information.to_dict(), f, indent=2)
220
+
221
+ return run_dir
222
+
223
+
224
+ def record_input(
225
+ run_dir: str,
226
+ run_id: str,
227
+ input_data: Optional[Union[dict[str, Any], str]] = None,
228
+ inputs_dir_path: Optional[str] = None,
229
+ ) -> None:
230
+ """
231
+ Writes the input to the appropriate location.
232
+
233
+ The size of the input is calculated and recorded in the run information.
234
+
235
+ Parameters
236
+ ----------
237
+ run_dir : str
238
+ The path to the run directory.
239
+ run_id : str
240
+ The ID of the run.
241
+ input_data : Optional[Union[dict[str, Any], str]], optional
242
+ The input data for the run, by default None. If `inputs_dir_path` is
243
+ provided, this parameter is ignored.
244
+ inputs_dir_path : Optional[str], optional
245
+ The path to the directory containing input files, by default None. If
246
+ provided, this parameter takes precedence over `input_data`.
247
+ """
248
+
249
+ # Create the inputs directory.
250
+ run_inputs_dir = os.path.join(run_dir, INPUTS_KEY)
251
+ os.makedirs(run_inputs_dir, exist_ok=True)
252
+
253
+ if inputs_dir_path is not None and inputs_dir_path != "":
254
+ # If we specify an inputs directory, we ignore the input_data.
255
+ # Copy all files from inputs_dir_path to run_inputs_dir
256
+ if os.path.exists(inputs_dir_path) and os.path.isdir(inputs_dir_path):
257
+ shutil.copytree(inputs_dir_path, run_inputs_dir, dirs_exist_ok=True)
258
+
259
+ elif isinstance(input_data, dict):
260
+ # If no inputs_dir_path is provided, try a single JSON input.
261
+ with open(os.path.join(run_inputs_dir, DEFAULT_INPUT_JSON_FILE), "w") as f:
262
+ json.dump(input_data, f, indent=2)
263
+
264
+ elif isinstance(input_data, str):
265
+ # If no inputs_dir_path is provided, try a single TEXT input.
266
+ with open(os.path.join(run_inputs_dir, "input"), "w") as f:
267
+ f.write(input_data)
268
+
269
+ else:
270
+ raise ValueError(
271
+ "Invalid input data type: input_data must be a dict or str, or inputs_dir_path must be provided."
272
+ )
273
+
274
+ # Update the input size in the run information file.
275
+ calculate_files_size(run_dir, run_id, run_inputs_dir, metadata_key="input_size")
276
+
277
+
278
+ def calculate_files_size(run_dir: str, run_id: str, dir_path: str, metadata_key: str) -> None:
279
+ """
280
+ Calculates the total size of the files in a directory, in bytes.
281
+
282
+ The calculated size is stored in the run information metadata under the
283
+ specified key.
284
+
285
+ Parameters
286
+ ----------
287
+ run_dir : str
288
+ The path to the run directory.
289
+ run_id : str
290
+ The ID of the run.
291
+ dir_path : str
292
+ The path to the directory whose size is to be calculated.
293
+ metadata_key : str
294
+ The key under which to store the calculated size in the run information
295
+ metadata.
296
+ """
297
+
298
+ total_size = 0
299
+ for dirpath, _, filenames in os.walk(dir_path):
300
+ for f in filenames:
301
+ fp = os.path.join(dirpath, f)
302
+ # Skip if it is a symbolic link
303
+ if not os.path.islink(fp):
304
+ total_size += os.path.getsize(fp)
305
+
306
+ info_file = os.path.join(run_dir, f"{run_id}.json")
307
+ with open(info_file, "r+") as f:
308
+ info = json.load(f)
309
+ info["metadata"][metadata_key] = total_size
310
+ f.seek(0)
311
+ json.dump(info, f, indent=2)
312
+ f.truncate()