outerbounds 0.3.171__py3-none-any.whl → 0.3.173rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- outerbounds/apps/__init__.py +0 -0
- outerbounds/apps/app_cli.py +519 -0
- outerbounds/apps/app_config.py +308 -0
- outerbounds/apps/artifacts.py +0 -0
- outerbounds/apps/capsule.py +382 -0
- outerbounds/apps/code_package/__init__.py +3 -0
- outerbounds/apps/code_package/code_packager.py +612 -0
- outerbounds/apps/code_package/examples.py +125 -0
- outerbounds/apps/config_schema.yaml +194 -0
- outerbounds/apps/dependencies.py +115 -0
- outerbounds/apps/deployer.py +0 -0
- outerbounds/apps/secrets.py +164 -0
- outerbounds/apps/utils.py +228 -0
- outerbounds/apps/validations.py +34 -0
- outerbounds/command_groups/apps_cli.py +1 -5
- {outerbounds-0.3.171.dist-info → outerbounds-0.3.173rc0.dist-info}/METADATA +3 -3
- {outerbounds-0.3.171.dist-info → outerbounds-0.3.173rc0.dist-info}/RECORD +19 -5
- {outerbounds-0.3.171.dist-info → outerbounds-0.3.173rc0.dist-info}/WHEEL +0 -0
- {outerbounds-0.3.171.dist-info → outerbounds-0.3.173rc0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,612 @@
|
|
1
|
+
import os
|
2
|
+
import sys
|
3
|
+
import time
|
4
|
+
import tarfile
|
5
|
+
import json
|
6
|
+
from io import BytesIO
|
7
|
+
from typing import List, Tuple, Dict, Any, Optional, Callable, Union
|
8
|
+
|
9
|
+
from metaflow.datastore.content_addressed_store import ContentAddressedStore
|
10
|
+
from metaflow.metaflow_config import (
|
11
|
+
DATASTORE_SYSROOT_S3,
|
12
|
+
DATASTORE_SYSROOT_AZURE,
|
13
|
+
DATASTORE_SYSROOT_GS,
|
14
|
+
DATASTORE_SYSROOT_LOCAL,
|
15
|
+
)
|
16
|
+
|
17
|
+
# Default prefix for code packages in content addressed store
|
18
|
+
CODE_PACKAGE_PREFIX = "apps-code-packages"
|
19
|
+
|
20
|
+
|
21
|
+
class CodePackager:
|
22
|
+
"""
|
23
|
+
A datastore-agnostic class for packaging code.
|
24
|
+
|
25
|
+
This class handles creating a code package (tarball) for deployment
|
26
|
+
and provides methods for storing and retrieving it using Metaflow's
|
27
|
+
ContentAddressedStore directly.
|
28
|
+
|
29
|
+
Usage examples:
|
30
|
+
```python
|
31
|
+
packager = CodePackager(
|
32
|
+
datastore_type: str = "s3",
|
33
|
+
datastore_root = None,
|
34
|
+
code_package_prefix = None,
|
35
|
+
)
|
36
|
+
|
37
|
+
package_url, package_key = packager.store(
|
38
|
+
paths_to_include = ["./"],
|
39
|
+
file_suffixes = [".py", ".txt", ".yaml", ".yml", ".json"],
|
40
|
+
)
|
41
|
+
|
42
|
+
package_url, package_key = packager.store(
|
43
|
+
package_create_fn = lambda: my_custom_package_create_fn(),
|
44
|
+
)
|
45
|
+
```
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
datastore_type: str = "s3",
|
51
|
+
datastore_root: Optional[str] = None,
|
52
|
+
code_package_prefix: Optional[str] = None,
|
53
|
+
):
|
54
|
+
"""
|
55
|
+
Initialize the CodePackager with datastore configuration.
|
56
|
+
|
57
|
+
Parameters
|
58
|
+
----------
|
59
|
+
datastore_type : str, default "s3"
|
60
|
+
The type of datastore to use: "s3", "azure", "gs", or "local"
|
61
|
+
datastore_root : str, optional
|
62
|
+
Root path for the datastore. If not provided, uses the default for the datastore type.
|
63
|
+
code_package_prefix : str, optional
|
64
|
+
The prefix to use for storing code packages in the content addressed store.
|
65
|
+
If not provided, uses the CODE_PACKAGE_PREFIX configuration value.
|
66
|
+
"""
|
67
|
+
self._datastore_type = datastore_type
|
68
|
+
self._datastore_root = datastore_root
|
69
|
+
self._code_package_prefix = code_package_prefix
|
70
|
+
|
71
|
+
def store(
|
72
|
+
self,
|
73
|
+
package_create_fn: Optional[Callable[[], bytes]] = None,
|
74
|
+
paths_to_include: Optional[List[str]] = None,
|
75
|
+
file_suffixes: Optional[List[str]] = None,
|
76
|
+
metadata: Optional[Dict[str, Any]] = None,
|
77
|
+
) -> Tuple[str, str]:
|
78
|
+
"""
|
79
|
+
Create and store a code package using Metaflow's ContentAddressedStore.
|
80
|
+
|
81
|
+
This method can be called in two ways:
|
82
|
+
1. With paths_to_include and file_suffixes to use the default packaging
|
83
|
+
2. With a custom package_create_fn for custom packaging logic
|
84
|
+
|
85
|
+
Parameters
|
86
|
+
----------
|
87
|
+
package_create_fn : Callable[[], bytes], optional
|
88
|
+
A function that creates and returns a package as bytes.
|
89
|
+
This allows for custom packaging logic without dependency on specific objects.
|
90
|
+
paths_to_include : List[str], optional
|
91
|
+
List of paths to include in the package. Used by default_package_create.
|
92
|
+
file_suffixes : List[str], optional
|
93
|
+
List of file suffixes to include. Used by default_package_create.
|
94
|
+
metadata : Dict[str, Any], optional
|
95
|
+
Metadata to include in the package when using default_package_create.
|
96
|
+
|
97
|
+
Returns
|
98
|
+
-------
|
99
|
+
Tuple[str, str]
|
100
|
+
A tuple containing (package_url, package_key) that identifies the location
|
101
|
+
and content-addressed key of the stored package.
|
102
|
+
"""
|
103
|
+
# Prepare default values
|
104
|
+
_paths_to_include = paths_to_include or []
|
105
|
+
_file_suffixes = file_suffixes or [
|
106
|
+
".py",
|
107
|
+
".txt",
|
108
|
+
".yaml",
|
109
|
+
".yml",
|
110
|
+
".json",
|
111
|
+
".html",
|
112
|
+
".css",
|
113
|
+
".js",
|
114
|
+
".jsx",
|
115
|
+
".ts",
|
116
|
+
".tsx",
|
117
|
+
".md",
|
118
|
+
".rst",
|
119
|
+
]
|
120
|
+
_metadata = metadata or {}
|
121
|
+
|
122
|
+
# If no package_create_fn provided, use default_package_create
|
123
|
+
if package_create_fn is None:
|
124
|
+
_package_create_fn = lambda: self.default_package_create(
|
125
|
+
_paths_to_include, _file_suffixes, _metadata
|
126
|
+
)
|
127
|
+
else:
|
128
|
+
_package_create_fn = package_create_fn
|
129
|
+
|
130
|
+
# Create the package
|
131
|
+
code_package = _package_create_fn()
|
132
|
+
|
133
|
+
# Get the ContentAddressedStore for the specified datastore
|
134
|
+
ca_store = self.get_content_addressed_store(
|
135
|
+
datastore_type=self._datastore_type,
|
136
|
+
datastore_root=self._datastore_root,
|
137
|
+
prefix=(
|
138
|
+
str(self._code_package_prefix)
|
139
|
+
if self._code_package_prefix is not None
|
140
|
+
else str(CODE_PACKAGE_PREFIX)
|
141
|
+
),
|
142
|
+
)
|
143
|
+
|
144
|
+
# Store the package using raw=True to ensure we can access it directly via URL
|
145
|
+
results = ca_store.save_blobs([code_package], raw=True, len_hint=1)
|
146
|
+
package_url, package_key = results[0].uri, results[0].key
|
147
|
+
|
148
|
+
return package_url, package_key
|
149
|
+
|
150
|
+
@staticmethod
|
151
|
+
def get_content_addressed_store(
|
152
|
+
datastore_type: str = "s3",
|
153
|
+
datastore_root: Optional[str] = None,
|
154
|
+
prefix: Optional[str] = None,
|
155
|
+
) -> ContentAddressedStore:
|
156
|
+
"""
|
157
|
+
Get a ContentAddressedStore instance for the specified datastore.
|
158
|
+
|
159
|
+
Parameters
|
160
|
+
----------
|
161
|
+
datastore_type : str, default "s3"
|
162
|
+
Type of datastore: "s3", "azure", "gs", or "local"
|
163
|
+
datastore_root : str, optional
|
164
|
+
Root path for the datastore. If not provided, uses the default for the datastore type.
|
165
|
+
prefix : str, optional
|
166
|
+
Prefix to use when storing objects in the datastore.
|
167
|
+
If not provided, uses the CODE_PACKAGE_PREFIX configuration value.
|
168
|
+
|
169
|
+
Returns
|
170
|
+
-------
|
171
|
+
ContentAddressedStore
|
172
|
+
A ContentAddressedStore instance configured for the specified datastore
|
173
|
+
"""
|
174
|
+
from metaflow.plugins import DATASTORES
|
175
|
+
|
176
|
+
datastore_impls = [i for i in DATASTORES if i.TYPE == datastore_type]
|
177
|
+
if len(datastore_impls) == 0:
|
178
|
+
raise ValueError(f"Unsupported datastore type: {datastore_type}")
|
179
|
+
if len(datastore_impls) > 1:
|
180
|
+
raise ValueError(
|
181
|
+
f"Multiple datastore implementations found for type: {datastore_type}"
|
182
|
+
)
|
183
|
+
datastore_impl = datastore_impls[0]
|
184
|
+
root = None
|
185
|
+
# Import the storage implementation based on datastore_type
|
186
|
+
if datastore_type == "s3":
|
187
|
+
root = datastore_root or DATASTORE_SYSROOT_S3
|
188
|
+
elif datastore_type == "azure":
|
189
|
+
root = datastore_root or DATASTORE_SYSROOT_AZURE
|
190
|
+
elif datastore_type == "gs":
|
191
|
+
root = datastore_root or DATASTORE_SYSROOT_GS
|
192
|
+
elif datastore_type == "local":
|
193
|
+
root = datastore_root or DATASTORE_SYSROOT_LOCAL
|
194
|
+
|
195
|
+
# Ensure prefix is a string
|
196
|
+
store_prefix = str(prefix) if prefix is not None else str(CODE_PACKAGE_PREFIX)
|
197
|
+
|
198
|
+
storage_impl = datastore_impl(root=root)
|
199
|
+
# Create and return a ContentAddressedStore
|
200
|
+
return ContentAddressedStore(prefix=store_prefix, storage_impl=storage_impl)
|
201
|
+
|
202
|
+
@staticmethod
|
203
|
+
def get_download_cmd(
|
204
|
+
package_url: str,
|
205
|
+
datastore_type: str,
|
206
|
+
python_cmd: str = "python",
|
207
|
+
target_file: str = "job.tar",
|
208
|
+
escape_quotes: bool = True,
|
209
|
+
) -> str:
|
210
|
+
"""
|
211
|
+
Generate a command to download the code package.
|
212
|
+
|
213
|
+
Parameters
|
214
|
+
----------
|
215
|
+
package_url : str
|
216
|
+
The URL of the package to download
|
217
|
+
datastore_type : str
|
218
|
+
The type of datastore (s3, azure, gs, local)
|
219
|
+
python_cmd : str, optional
|
220
|
+
The Python command to use
|
221
|
+
target_file : str, optional
|
222
|
+
The target file name to save the package as
|
223
|
+
escape_quotes : bool, optional
|
224
|
+
Whether to escape quotes in the command
|
225
|
+
|
226
|
+
Returns
|
227
|
+
-------
|
228
|
+
str
|
229
|
+
A shell command string to download the package
|
230
|
+
"""
|
231
|
+
if datastore_type == "s3":
|
232
|
+
from metaflow.plugins.aws.aws_utils import parse_s3_full_path
|
233
|
+
|
234
|
+
bucket, s3_object = parse_s3_full_path(package_url)
|
235
|
+
# Simplify the script and use single quotes to avoid shell escaping issues
|
236
|
+
script = 'import boto3, os; ep=os.getenv({quote}METAFLOW_S3_ENDPOINT_URL{quote}); boto3.client("s3", **({{"endpoint_url":ep}} if ep else {{}})).download_file({quote}{bucket}{quote}, {quote}{s3_object}{quote}, {quote}{target_file}{quote})'.format(
|
237
|
+
quote='\\"' if escape_quotes else '"',
|
238
|
+
bucket=bucket,
|
239
|
+
s3_object=s3_object,
|
240
|
+
target_file=target_file,
|
241
|
+
)
|
242
|
+
# Format the command with proper quoting
|
243
|
+
return f"{python_cmd} -c '{script}'"
|
244
|
+
elif datastore_type == "azure":
|
245
|
+
from metaflow.plugins.azure.azure_utils import parse_azure_full_path
|
246
|
+
|
247
|
+
container_name, blob = parse_azure_full_path(package_url)
|
248
|
+
# remove a trailing slash, if present
|
249
|
+
blob_endpoint = "${METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT%/}"
|
250
|
+
return "download-azure-blob --blob-endpoint={blob_endpoint} --container={container} --blob={blob} --output-file={target}".format(
|
251
|
+
blob_endpoint=blob_endpoint,
|
252
|
+
blob=blob,
|
253
|
+
container=container_name,
|
254
|
+
target=target_file,
|
255
|
+
)
|
256
|
+
elif datastore_type == "gs":
|
257
|
+
from metaflow.plugins.gcp.gs_utils import parse_gs_full_path
|
258
|
+
|
259
|
+
bucket_name, gs_object = parse_gs_full_path(package_url)
|
260
|
+
return "download-gcp-object --bucket=%s --object=%s --output-file=%s" % (
|
261
|
+
bucket_name,
|
262
|
+
gs_object,
|
263
|
+
target_file,
|
264
|
+
)
|
265
|
+
elif datastore_type == "local":
|
266
|
+
# For local storage, simply copy the file
|
267
|
+
return "cp %s %s" % (package_url, target_file)
|
268
|
+
else:
|
269
|
+
raise NotImplementedError(
|
270
|
+
f"Download command not implemented for datastore type: {datastore_type}"
|
271
|
+
)
|
272
|
+
|
273
|
+
def get_package_commands(
|
274
|
+
self,
|
275
|
+
code_package_url: str,
|
276
|
+
python_cmd: str = "python",
|
277
|
+
target_file: str = "job.tar",
|
278
|
+
working_dir: str = "metaflow",
|
279
|
+
retries: int = 5,
|
280
|
+
escape_quotes: bool = True,
|
281
|
+
) -> List[str]:
|
282
|
+
"""
|
283
|
+
Get a complete list of shell commands to download and extract a code package.
|
284
|
+
|
285
|
+
This method generates a comprehensive set of shell commands for downloading
|
286
|
+
and extracting a code package, similar to MetaflowEnvironment.get_package_commands.
|
287
|
+
|
288
|
+
Parameters
|
289
|
+
----------
|
290
|
+
code_package_url : str
|
291
|
+
The URL of the code package to download
|
292
|
+
python_cmd : str, optional
|
293
|
+
The Python command to use
|
294
|
+
target_file : str, optional
|
295
|
+
The target file name to save the package as
|
296
|
+
working_dir : str, optional
|
297
|
+
The directory to create and extract the package into
|
298
|
+
retries : int, optional
|
299
|
+
Number of download retries to attempt
|
300
|
+
escape_quotes : bool, optional
|
301
|
+
Whether to escape quotes in the command
|
302
|
+
|
303
|
+
Returns
|
304
|
+
-------
|
305
|
+
List[str]
|
306
|
+
List of shell commands to execute
|
307
|
+
"""
|
308
|
+
# Use the datastore_type from initialization if not provided
|
309
|
+
datastore_type = self._datastore_type
|
310
|
+
|
311
|
+
# Helper function to create dependency installation command
|
312
|
+
def _get_install_dependencies_cmd():
|
313
|
+
base_cmd = "{} -m pip install -qqq --no-compile --no-cache-dir --disable-pip-version-check".format(
|
314
|
+
python_cmd
|
315
|
+
)
|
316
|
+
|
317
|
+
datastore_packages = {
|
318
|
+
"s3": ["boto3"],
|
319
|
+
"azure": [
|
320
|
+
"azure-identity",
|
321
|
+
"azure-storage-blob",
|
322
|
+
"azure-keyvault-secrets",
|
323
|
+
"simple-azure-blob-downloader",
|
324
|
+
],
|
325
|
+
"gs": [
|
326
|
+
"google-cloud-storage",
|
327
|
+
"google-auth",
|
328
|
+
"simple-gcp-object-downloader",
|
329
|
+
"google-cloud-secret-manager",
|
330
|
+
],
|
331
|
+
"local": [],
|
332
|
+
}
|
333
|
+
|
334
|
+
if datastore_type not in datastore_packages:
|
335
|
+
raise NotImplementedError(
|
336
|
+
"Unknown datastore type: {}".format(datastore_type)
|
337
|
+
)
|
338
|
+
|
339
|
+
if not datastore_packages[datastore_type]:
|
340
|
+
return "# No dependencies required for local datastore"
|
341
|
+
|
342
|
+
cmd = "{} {}".format(
|
343
|
+
base_cmd, " ".join(datastore_packages[datastore_type] + ["requests"])
|
344
|
+
)
|
345
|
+
# Skip pip installs if we know packages might already be available
|
346
|
+
return "if [ -z $METAFLOW_SKIP_INSTALL_DEPENDENCIES ]; then {}; fi".format(
|
347
|
+
cmd
|
348
|
+
)
|
349
|
+
|
350
|
+
download_cmd = self.get_download_cmd(
|
351
|
+
code_package_url, datastore_type, python_cmd, target_file, escape_quotes
|
352
|
+
)
|
353
|
+
|
354
|
+
# Define the log functions for bash
|
355
|
+
bash_mflog = (
|
356
|
+
'function mflog() { echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")]" "$@"; }'
|
357
|
+
)
|
358
|
+
bash_flush_logs = 'function flush_mflogs() { echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] Flushing logs"; }'
|
359
|
+
|
360
|
+
cmds = [
|
361
|
+
bash_mflog,
|
362
|
+
bash_flush_logs,
|
363
|
+
"mflog 'Setting up task environment.'",
|
364
|
+
_get_install_dependencies_cmd(),
|
365
|
+
f"mkdir -p {working_dir}",
|
366
|
+
f"cd {working_dir}",
|
367
|
+
"mkdir -p .metaflow", # mute local datastore creation log
|
368
|
+
f"i=0; while [ $i -le {retries} ]; do "
|
369
|
+
"mflog 'Downloading code package...'; "
|
370
|
+
+ download_cmd
|
371
|
+
+ " && mflog 'Code package downloaded.' && break; "
|
372
|
+
"sleep 10; i=$((i+1)); "
|
373
|
+
"done",
|
374
|
+
f"if [ $i -gt {retries} ]; then "
|
375
|
+
"mflog 'Failed to download code package from %s "
|
376
|
+
f"after {retries+1} tries. Exiting...' && exit 1; "
|
377
|
+
"fi" % code_package_url,
|
378
|
+
"TAR_OPTIONS='--warning=no-timestamp' tar xf %s" % target_file,
|
379
|
+
"mflog 'Task is starting.'",
|
380
|
+
"flush_mflogs",
|
381
|
+
]
|
382
|
+
|
383
|
+
return cmds
|
384
|
+
|
385
|
+
@staticmethod
|
386
|
+
def directory_walker(
|
387
|
+
root, exclude_hidden=True, suffixes=None, follow_symlinks=True
|
388
|
+
) -> List[Tuple[str, str]]:
|
389
|
+
"""
|
390
|
+
Walk a directory and yield tuples of (file_path, relative_arcname) for files
|
391
|
+
that match the given suffix filters.
|
392
|
+
|
393
|
+
This function is similar to MetaflowPackage._walk and handles symlinks safely.
|
394
|
+
|
395
|
+
Parameters
|
396
|
+
----------
|
397
|
+
root : str
|
398
|
+
The root directory to walk
|
399
|
+
exclude_hidden : bool, default True
|
400
|
+
Whether to exclude hidden files and directories (those starting with '.')
|
401
|
+
suffixes : List[str], optional
|
402
|
+
List of file suffixes to include (e.g. ['.py', '.txt'])
|
403
|
+
follow_symlinks : bool, default True
|
404
|
+
Whether to follow symlinks (with cycle detection)
|
405
|
+
|
406
|
+
Returns
|
407
|
+
-------
|
408
|
+
List[Tuple[str, str]]
|
409
|
+
List of tuples (file_path, relative_arcname) where:
|
410
|
+
- file_path is the full path to the file
|
411
|
+
- relative_arcname is the path to use within the archive
|
412
|
+
"""
|
413
|
+
if suffixes is None:
|
414
|
+
suffixes = []
|
415
|
+
|
416
|
+
# Convert root to unicode to handle files/folders with non-ascii chars
|
417
|
+
root = str(root)
|
418
|
+
|
419
|
+
# Calculate the prefix length to strip from paths
|
420
|
+
prefixlen = len(os.path.dirname(root)) + 1 # +1 for the trailing slash
|
421
|
+
|
422
|
+
# Use a set to track visited symlinks to avoid cycles
|
423
|
+
seen = set()
|
424
|
+
|
425
|
+
def _walk_without_cycles(walk_root):
|
426
|
+
for parent, dirs, files in os.walk(walk_root, followlinks=follow_symlinks):
|
427
|
+
# If not following symlinks, we're done
|
428
|
+
if not follow_symlinks:
|
429
|
+
yield parent, files
|
430
|
+
continue
|
431
|
+
|
432
|
+
# When following symlinks, we need to check for cycles
|
433
|
+
for d_idx in range(
|
434
|
+
len(dirs) - 1, -1, -1
|
435
|
+
): # Iterate backwards to safely remove
|
436
|
+
d = dirs[d_idx]
|
437
|
+
path = os.path.join(parent, d)
|
438
|
+
if os.path.islink(path):
|
439
|
+
# Break cycles by never following the same symlink twice
|
440
|
+
reallink = os.path.realpath(path)
|
441
|
+
if reallink in seen:
|
442
|
+
# Remove from dirs to avoid following it
|
443
|
+
dirs.pop(d_idx)
|
444
|
+
else:
|
445
|
+
seen.add(reallink)
|
446
|
+
|
447
|
+
yield parent, files
|
448
|
+
|
449
|
+
# Build the list of path tuples
|
450
|
+
result = []
|
451
|
+
for path, files in _walk_without_cycles(root):
|
452
|
+
# Skip hidden directories if requested
|
453
|
+
if exclude_hidden and "/." in path:
|
454
|
+
continue
|
455
|
+
|
456
|
+
for fname in files:
|
457
|
+
# Skip hidden files if requested, unless they have a specified suffix
|
458
|
+
if (
|
459
|
+
(fname[0] == "." and fname in suffixes)
|
460
|
+
or (fname[0] != "." or not exclude_hidden)
|
461
|
+
and any(fname.endswith(suffix) for suffix in suffixes)
|
462
|
+
):
|
463
|
+
file_path = os.path.join(path, fname)
|
464
|
+
rel_path = file_path[prefixlen:]
|
465
|
+
result.append((file_path, rel_path))
|
466
|
+
|
467
|
+
return result
|
468
|
+
|
469
|
+
@staticmethod
|
470
|
+
def default_package_create(
|
471
|
+
paths: List[str], suffixes: List[str], metadata: Optional[Dict[str, Any]] = None
|
472
|
+
) -> bytes:
|
473
|
+
"""
|
474
|
+
Create a default tarball package from specified paths.
|
475
|
+
|
476
|
+
Parameters
|
477
|
+
----------
|
478
|
+
paths : List[str]
|
479
|
+
List of paths to include in the package
|
480
|
+
suffixes : List[str]
|
481
|
+
List of file suffixes to include
|
482
|
+
metadata : Dict[str, Any], optional
|
483
|
+
Metadata to include in the package
|
484
|
+
|
485
|
+
Returns
|
486
|
+
-------
|
487
|
+
bytes
|
488
|
+
The binary content of the tarball
|
489
|
+
"""
|
490
|
+
buf = BytesIO()
|
491
|
+
|
492
|
+
with tarfile.open(fileobj=buf, mode="w:gz", compresslevel=3) as tar:
|
493
|
+
# Add metadata if provided
|
494
|
+
if metadata:
|
495
|
+
metadata_buf = BytesIO()
|
496
|
+
metadata_buf.write(json.dumps(metadata).encode("utf-8"))
|
497
|
+
metadata_buf.seek(0)
|
498
|
+
info = tarfile.TarInfo("metadata.json")
|
499
|
+
info.size = len(metadata_buf.getvalue())
|
500
|
+
info.mtime = 1747158696 # 13 May 2025 10:31:36 (so that we dont have a changing hash everytime we run)
|
501
|
+
tar.addfile(info, metadata_buf)
|
502
|
+
|
503
|
+
def no_mtime(tarinfo):
|
504
|
+
# a modification time change should not change the hash of
|
505
|
+
# the package. Only content modifications will.
|
506
|
+
# Setting this default to Dec 3, 2019
|
507
|
+
tarinfo.mtime = 1747158696 # 13 May 2025 10:31:36 (so that we dont have a changing hash everytime we run)
|
508
|
+
return tarinfo
|
509
|
+
|
510
|
+
# Add files from specified paths
|
511
|
+
for path in paths:
|
512
|
+
if os.path.isdir(path):
|
513
|
+
# Use directory_walker for directories to handle symlinks properly
|
514
|
+
for file_path, rel_path in CodePackager.directory_walker(
|
515
|
+
path,
|
516
|
+
exclude_hidden=True,
|
517
|
+
suffixes=suffixes,
|
518
|
+
follow_symlinks=True,
|
519
|
+
):
|
520
|
+
tar.add(
|
521
|
+
file_path,
|
522
|
+
arcname=rel_path,
|
523
|
+
filter=no_mtime,
|
524
|
+
recursive=False,
|
525
|
+
)
|
526
|
+
elif os.path.isfile(path):
|
527
|
+
if any(path.endswith(suffix) for suffix in suffixes):
|
528
|
+
tar.add(path, arcname=os.path.basename(path))
|
529
|
+
|
530
|
+
tarball = bytearray(buf.getvalue())
|
531
|
+
tarball[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts
|
532
|
+
return tarball
|
533
|
+
|
534
|
+
@staticmethod
|
535
|
+
def _add_tar_file(tar, filename, buf):
|
536
|
+
tarinfo = tarfile.TarInfo(name=filename)
|
537
|
+
tarinfo.size = len(buf.getvalue())
|
538
|
+
buf.seek(0)
|
539
|
+
tarinfo.mtime = 1747158696 # 13 May 2025 10:31:36 (so that we dont have a changing hash everytime we run)
|
540
|
+
tar.addfile(tarinfo, fileobj=buf)
|
541
|
+
|
542
|
+
@classmethod
|
543
|
+
def package_directory(
|
544
|
+
cls,
|
545
|
+
directory_path: str,
|
546
|
+
suffixes: Optional[List[str]] = None,
|
547
|
+
exclude_hidden: bool = True,
|
548
|
+
metadata: Optional[Dict[str, Any]] = None,
|
549
|
+
follow_symlinks: bool = True,
|
550
|
+
) -> bytes:
|
551
|
+
"""
|
552
|
+
Package a directory and all of its contents that match the given suffixes.
|
553
|
+
|
554
|
+
This is a convenience method that works similarly to MetaflowPackage._walk
|
555
|
+
to package a directory for deployment.
|
556
|
+
|
557
|
+
Parameters
|
558
|
+
----------
|
559
|
+
directory_path : str
|
560
|
+
The directory to package
|
561
|
+
suffixes : List[str], optional
|
562
|
+
List of file suffixes to include (defaults to standard code extensions)
|
563
|
+
exclude_hidden : bool, default True
|
564
|
+
Whether to exclude hidden files and directories
|
565
|
+
metadata : Dict[str, Any], optional
|
566
|
+
Metadata to include in the package
|
567
|
+
follow_symlinks : bool, default True
|
568
|
+
Whether to follow symlinks when walking the directory
|
569
|
+
|
570
|
+
Returns
|
571
|
+
-------
|
572
|
+
bytes
|
573
|
+
The binary content of the tarball
|
574
|
+
"""
|
575
|
+
if not os.path.isdir(directory_path):
|
576
|
+
raise ValueError(f"The path '{directory_path}' is not a directory")
|
577
|
+
|
578
|
+
# Use default suffixes if none provided
|
579
|
+
if suffixes is None:
|
580
|
+
suffixes = [".py", ".txt", ".yaml", ".yml", ".json"]
|
581
|
+
|
582
|
+
buf = BytesIO()
|
583
|
+
|
584
|
+
def no_mtime(tarinfo):
|
585
|
+
# a modification time change should not change the hash of
|
586
|
+
# the package. Only content modifications will.
|
587
|
+
# Setting this to a fixed date so that we don't have a changing hash everytime we run
|
588
|
+
tarinfo.mtime = 1747158696 # 13 May 2025 10:31:36
|
589
|
+
return tarinfo
|
590
|
+
|
591
|
+
with tarfile.open(
|
592
|
+
fileobj=buf, mode="w:gz", compresslevel=3, dereference=True
|
593
|
+
) as tar:
|
594
|
+
# Add metadata if provided
|
595
|
+
if metadata:
|
596
|
+
cls._add_tar_file(
|
597
|
+
tar, "metadata.json", BytesIO(json.dumps(metadata).encode("utf-8"))
|
598
|
+
)
|
599
|
+
|
600
|
+
# Walk the directory and add matching files
|
601
|
+
for file_path, rel_path in cls.directory_walker(
|
602
|
+
directory_path,
|
603
|
+
exclude_hidden=exclude_hidden,
|
604
|
+
suffixes=suffixes,
|
605
|
+
follow_symlinks=follow_symlinks,
|
606
|
+
):
|
607
|
+
# Remove debug print statement
|
608
|
+
tar.add(file_path, arcname=rel_path, recursive=False, filter=no_mtime)
|
609
|
+
|
610
|
+
tarball = bytearray(buf.getvalue())
|
611
|
+
tarball[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts
|
612
|
+
return tarball
|