outerbounds 0.3.183rc1__py3-none-any.whl → 0.3.185__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- outerbounds/__init__.py +1 -3
- outerbounds/command_groups/apps_cli.py +6 -2
- {outerbounds-0.3.183rc1.dist-info → outerbounds-0.3.185.dist-info}/METADATA +3 -3
- {outerbounds-0.3.183rc1.dist-info → outerbounds-0.3.185.dist-info}/RECORD +6 -29
- outerbounds-0.3.185.dist-info/entry_points.txt +3 -0
- outerbounds/_vendor/spinner/__init__.py +0 -4
- outerbounds/_vendor/spinner/spinners.py +0 -478
- outerbounds/_vendor/spinner.LICENSE +0 -21
- outerbounds/apps/__init__.py +0 -0
- outerbounds/apps/_state_machine.py +0 -472
- outerbounds/apps/app_cli.py +0 -1514
- outerbounds/apps/app_config.py +0 -296
- outerbounds/apps/artifacts.py +0 -0
- outerbounds/apps/capsule.py +0 -839
- outerbounds/apps/cli_to_config.py +0 -99
- outerbounds/apps/click_importer.py +0 -24
- outerbounds/apps/code_package/__init__.py +0 -3
- outerbounds/apps/code_package/code_packager.py +0 -610
- outerbounds/apps/code_package/examples.py +0 -125
- outerbounds/apps/config_schema.yaml +0 -269
- outerbounds/apps/config_schema_autogen.json +0 -336
- outerbounds/apps/dependencies.py +0 -115
- outerbounds/apps/deployer.py +0 -0
- outerbounds/apps/experimental/__init__.py +0 -110
- outerbounds/apps/perimeters.py +0 -45
- outerbounds/apps/secrets.py +0 -164
- outerbounds/apps/utils.py +0 -234
- outerbounds/apps/validations.py +0 -22
- outerbounds-0.3.183rc1.dist-info/entry_points.txt +0 -3
- {outerbounds-0.3.183rc1.dist-info → outerbounds-0.3.185.dist-info}/WHEEL +0 -0
@@ -1,610 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import sys
|
3
|
-
import time
|
4
|
-
import tarfile
|
5
|
-
import json
|
6
|
-
from io import BytesIO
|
7
|
-
from typing import List, Tuple, Dict, Any, Optional, Callable, Union
|
8
|
-
|
9
|
-
from metaflow.datastore.content_addressed_store import ContentAddressedStore
|
10
|
-
from metaflow.util import to_unicode
|
11
|
-
from metaflow.metaflow_config import (
|
12
|
-
DATASTORE_SYSROOT_S3,
|
13
|
-
DATASTORE_SYSROOT_AZURE,
|
14
|
-
DATASTORE_SYSROOT_GS,
|
15
|
-
DATASTORE_SYSROOT_LOCAL,
|
16
|
-
)
|
17
|
-
|
18
|
-
# Default prefix for code packages in content addressed store
|
19
|
-
CODE_PACKAGE_PREFIX = "apps-code-packages"
|
20
|
-
|
21
|
-
|
22
|
-
# this is os.walk(follow_symlinks=True) with cycle detection
|
23
|
-
def walk_without_cycles(top_root):
|
24
|
-
seen = set()
|
25
|
-
|
26
|
-
def _recurse(root):
|
27
|
-
for parent, dirs, files in os.walk(root):
|
28
|
-
for d in dirs:
|
29
|
-
path = os.path.join(parent, d)
|
30
|
-
if os.path.islink(path):
|
31
|
-
# Breaking loops: never follow the same symlink twice
|
32
|
-
#
|
33
|
-
# NOTE: this also means that links to sibling links are
|
34
|
-
# not followed. In this case:
|
35
|
-
#
|
36
|
-
# x -> y
|
37
|
-
# y -> oo
|
38
|
-
# oo/real_file
|
39
|
-
#
|
40
|
-
# real_file is only included twice, not three times
|
41
|
-
reallink = os.path.realpath(path)
|
42
|
-
if reallink not in seen:
|
43
|
-
seen.add(reallink)
|
44
|
-
for x in _recurse(path):
|
45
|
-
yield x
|
46
|
-
yield parent, files
|
47
|
-
|
48
|
-
for x in _recurse(top_root):
|
49
|
-
yield x
|
50
|
-
|
51
|
-
|
52
|
-
def symlink_friendly_walk(root, exclude_hidden=True, suffixes=None):
|
53
|
-
if suffixes is None:
|
54
|
-
suffixes = []
|
55
|
-
root = to_unicode(root) # handle files/folder with non ascii chars
|
56
|
-
prefixlen = len("%s/" % os.path.dirname(root))
|
57
|
-
for (
|
58
|
-
path,
|
59
|
-
files,
|
60
|
-
) in walk_without_cycles(root):
|
61
|
-
if exclude_hidden and "/." in path:
|
62
|
-
continue
|
63
|
-
# path = path[2:] # strip the ./ prefix
|
64
|
-
# if path and (path[0] == '.' or './' in path):
|
65
|
-
# continue
|
66
|
-
for fname in files:
|
67
|
-
if (fname[0] == "." and fname in suffixes) or (
|
68
|
-
fname[0] != "." and any(fname.endswith(suffix) for suffix in suffixes)
|
69
|
-
):
|
70
|
-
p = os.path.join(path, fname)
|
71
|
-
yield p, p[prefixlen:]
|
72
|
-
|
73
|
-
|
74
|
-
class CodePackager:
|
75
|
-
"""
|
76
|
-
A datastore-agnostic class for packaging code.
|
77
|
-
|
78
|
-
This class handles creating a code package (tarball) for deployment
|
79
|
-
and provides methods for storing and retrieving it using Metaflow's
|
80
|
-
ContentAddressedStore directly.
|
81
|
-
|
82
|
-
Usage examples:
|
83
|
-
```python
|
84
|
-
packager = CodePackager(
|
85
|
-
datastore_type: str = "s3",
|
86
|
-
datastore_root = None,
|
87
|
-
code_package_prefix = None,
|
88
|
-
)
|
89
|
-
|
90
|
-
package_url, package_key = packager.store(
|
91
|
-
paths_to_include = ["./"],
|
92
|
-
file_suffixes = [".py", ".txt", ".yaml", ".yml", ".json"],
|
93
|
-
)
|
94
|
-
|
95
|
-
package_url, package_key = packager.store(
|
96
|
-
package_create_fn = lambda: my_custom_package_create_fn(),
|
97
|
-
)
|
98
|
-
```
|
99
|
-
"""
|
100
|
-
|
101
|
-
def __init__(
|
102
|
-
self,
|
103
|
-
datastore_type: str = "s3",
|
104
|
-
datastore_root: Optional[str] = None,
|
105
|
-
code_package_prefix: Optional[str] = None,
|
106
|
-
):
|
107
|
-
"""
|
108
|
-
Initialize the CodePackager with datastore configuration.
|
109
|
-
|
110
|
-
Parameters
|
111
|
-
----------
|
112
|
-
datastore_type : str, default "s3"
|
113
|
-
The type of datastore to use: "s3", "azure", "gs", or "local"
|
114
|
-
datastore_root : str, optional
|
115
|
-
Root path for the datastore. If not provided, uses the default for the datastore type.
|
116
|
-
code_package_prefix : str, optional
|
117
|
-
The prefix to use for storing code packages in the content addressed store.
|
118
|
-
If not provided, uses the CODE_PACKAGE_PREFIX configuration value.
|
119
|
-
"""
|
120
|
-
self._datastore_type = datastore_type
|
121
|
-
self._datastore_root = datastore_root
|
122
|
-
self._code_package_prefix = code_package_prefix
|
123
|
-
|
124
|
-
def store(
|
125
|
-
self,
|
126
|
-
package_create_fn: Optional[Callable[[], bytes]] = None,
|
127
|
-
paths_to_include: Optional[List[str]] = None,
|
128
|
-
file_suffixes: Optional[List[str]] = None,
|
129
|
-
metadata: Optional[Dict[str, Any]] = None,
|
130
|
-
) -> Tuple[str, str]:
|
131
|
-
"""
|
132
|
-
Create and store a code package using Metaflow's ContentAddressedStore.
|
133
|
-
|
134
|
-
This method can be called in two ways:
|
135
|
-
1. With paths_to_include and file_suffixes to use the default packaging
|
136
|
-
2. With a custom package_create_fn for custom packaging logic
|
137
|
-
|
138
|
-
Parameters
|
139
|
-
----------
|
140
|
-
package_create_fn : Callable[[], bytes], optional
|
141
|
-
A function that creates and returns a package as bytes.
|
142
|
-
This allows for custom packaging logic without dependency on specific objects.
|
143
|
-
paths_to_include : List[str], optional
|
144
|
-
List of paths to include in the package. Used by default_package_create.
|
145
|
-
file_suffixes : List[str], optional
|
146
|
-
List of file suffixes to include. Used by default_package_create.
|
147
|
-
metadata : Dict[str, Any], optional
|
148
|
-
Metadata to include in the package when using default_package_create.
|
149
|
-
|
150
|
-
Returns
|
151
|
-
-------
|
152
|
-
Tuple[str, str]
|
153
|
-
A tuple containing (package_url, package_key) that identifies the location
|
154
|
-
and content-addressed key of the stored package.
|
155
|
-
"""
|
156
|
-
# Prepare default values
|
157
|
-
_paths_to_include = paths_to_include or []
|
158
|
-
_file_suffixes = file_suffixes or [
|
159
|
-
".py",
|
160
|
-
".txt",
|
161
|
-
".yaml",
|
162
|
-
".yml",
|
163
|
-
".json",
|
164
|
-
".html",
|
165
|
-
".css",
|
166
|
-
".js",
|
167
|
-
".jsx",
|
168
|
-
".ts",
|
169
|
-
".tsx",
|
170
|
-
".md",
|
171
|
-
".rst",
|
172
|
-
]
|
173
|
-
_metadata = metadata or {}
|
174
|
-
|
175
|
-
# If no package_create_fn provided, use default_package_create
|
176
|
-
if package_create_fn is None:
|
177
|
-
_package_create_fn = lambda: self.default_package_create(
|
178
|
-
_paths_to_include, _file_suffixes, _metadata
|
179
|
-
)
|
180
|
-
else:
|
181
|
-
_package_create_fn = package_create_fn
|
182
|
-
|
183
|
-
# Create the package
|
184
|
-
code_package = _package_create_fn()
|
185
|
-
|
186
|
-
# Get the ContentAddressedStore for the specified datastore
|
187
|
-
ca_store = self.get_content_addressed_store(
|
188
|
-
datastore_type=self._datastore_type,
|
189
|
-
datastore_root=self._datastore_root,
|
190
|
-
prefix=(
|
191
|
-
str(self._code_package_prefix)
|
192
|
-
if self._code_package_prefix is not None
|
193
|
-
else str(CODE_PACKAGE_PREFIX)
|
194
|
-
),
|
195
|
-
)
|
196
|
-
|
197
|
-
# Store the package using raw=True to ensure we can access it directly via URL
|
198
|
-
results = ca_store.save_blobs([code_package], raw=True, len_hint=1)
|
199
|
-
package_url, package_key = results[0].uri, results[0].key
|
200
|
-
|
201
|
-
return package_url, package_key
|
202
|
-
|
203
|
-
@staticmethod
|
204
|
-
def get_content_addressed_store(
|
205
|
-
datastore_type: str = "s3",
|
206
|
-
datastore_root: Optional[str] = None,
|
207
|
-
prefix: Optional[str] = None,
|
208
|
-
) -> ContentAddressedStore:
|
209
|
-
"""
|
210
|
-
Get a ContentAddressedStore instance for the specified datastore.
|
211
|
-
|
212
|
-
Parameters
|
213
|
-
----------
|
214
|
-
datastore_type : str, default "s3"
|
215
|
-
Type of datastore: "s3", "azure", "gs", or "local"
|
216
|
-
datastore_root : str, optional
|
217
|
-
Root path for the datastore. If not provided, uses the default for the datastore type.
|
218
|
-
prefix : str, optional
|
219
|
-
Prefix to use when storing objects in the datastore.
|
220
|
-
If not provided, uses the CODE_PACKAGE_PREFIX configuration value.
|
221
|
-
|
222
|
-
Returns
|
223
|
-
-------
|
224
|
-
ContentAddressedStore
|
225
|
-
A ContentAddressedStore instance configured for the specified datastore
|
226
|
-
"""
|
227
|
-
from metaflow.plugins import DATASTORES
|
228
|
-
|
229
|
-
datastore_impls = [i for i in DATASTORES if i.TYPE == datastore_type]
|
230
|
-
if len(datastore_impls) == 0:
|
231
|
-
raise ValueError(f"Unsupported datastore type: {datastore_type}")
|
232
|
-
if len(datastore_impls) > 1:
|
233
|
-
raise ValueError(
|
234
|
-
f"Multiple datastore implementations found for type: {datastore_type}"
|
235
|
-
)
|
236
|
-
datastore_impl = datastore_impls[0]
|
237
|
-
root = None
|
238
|
-
# Import the storage implementation based on datastore_type
|
239
|
-
if datastore_type == "s3":
|
240
|
-
root = datastore_root or DATASTORE_SYSROOT_S3
|
241
|
-
elif datastore_type == "azure":
|
242
|
-
root = datastore_root or DATASTORE_SYSROOT_AZURE
|
243
|
-
elif datastore_type == "gs":
|
244
|
-
root = datastore_root or DATASTORE_SYSROOT_GS
|
245
|
-
elif datastore_type == "local":
|
246
|
-
root = datastore_root or DATASTORE_SYSROOT_LOCAL
|
247
|
-
|
248
|
-
# Ensure prefix is a string
|
249
|
-
store_prefix = str(prefix) if prefix is not None else str(CODE_PACKAGE_PREFIX)
|
250
|
-
|
251
|
-
storage_impl = datastore_impl(root=root)
|
252
|
-
# Create and return a ContentAddressedStore
|
253
|
-
return ContentAddressedStore(prefix=store_prefix, storage_impl=storage_impl)
|
254
|
-
|
255
|
-
@staticmethod
|
256
|
-
def get_download_cmd(
|
257
|
-
package_url: str,
|
258
|
-
datastore_type: str,
|
259
|
-
python_cmd: str = "python",
|
260
|
-
target_file: str = "job.tar",
|
261
|
-
escape_quotes: bool = True,
|
262
|
-
) -> str:
|
263
|
-
"""
|
264
|
-
Generate a command to download the code package.
|
265
|
-
|
266
|
-
Parameters
|
267
|
-
----------
|
268
|
-
package_url : str
|
269
|
-
The URL of the package to download
|
270
|
-
datastore_type : str
|
271
|
-
The type of datastore (s3, azure, gs, local)
|
272
|
-
python_cmd : str, optional
|
273
|
-
The Python command to use
|
274
|
-
target_file : str, optional
|
275
|
-
The target file name to save the package as
|
276
|
-
escape_quotes : bool, optional
|
277
|
-
Whether to escape quotes in the command
|
278
|
-
|
279
|
-
Returns
|
280
|
-
-------
|
281
|
-
str
|
282
|
-
A shell command string to download the package
|
283
|
-
"""
|
284
|
-
if datastore_type == "s3":
|
285
|
-
from metaflow.plugins.aws.aws_utils import parse_s3_full_path
|
286
|
-
|
287
|
-
bucket, s3_object = parse_s3_full_path(package_url)
|
288
|
-
# Simplify the script and use single quotes to avoid shell escaping issues
|
289
|
-
script = 'import boto3, os; ep=os.getenv({quote}METAFLOW_S3_ENDPOINT_URL{quote}); boto3.client("s3", **({{"endpoint_url":ep}} if ep else {{}})).download_file({quote}{bucket}{quote}, {quote}{s3_object}{quote}, {quote}{target_file}{quote})'.format(
|
290
|
-
quote='\\"' if escape_quotes else '"',
|
291
|
-
bucket=bucket,
|
292
|
-
s3_object=s3_object,
|
293
|
-
target_file=target_file,
|
294
|
-
)
|
295
|
-
# Format the command with proper quoting
|
296
|
-
return f"{python_cmd} -c '{script}'"
|
297
|
-
elif datastore_type == "azure":
|
298
|
-
from metaflow.plugins.azure.azure_utils import parse_azure_full_path
|
299
|
-
|
300
|
-
container_name, blob = parse_azure_full_path(package_url)
|
301
|
-
# remove a trailing slash, if present
|
302
|
-
blob_endpoint = "${METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT%/}"
|
303
|
-
return "download-azure-blob --blob-endpoint={blob_endpoint} --container={container} --blob={blob} --output-file={target}".format(
|
304
|
-
blob_endpoint=blob_endpoint,
|
305
|
-
blob=blob,
|
306
|
-
container=container_name,
|
307
|
-
target=target_file,
|
308
|
-
)
|
309
|
-
elif datastore_type == "gs":
|
310
|
-
from metaflow.plugins.gcp.gs_utils import parse_gs_full_path
|
311
|
-
|
312
|
-
bucket_name, gs_object = parse_gs_full_path(package_url)
|
313
|
-
return "download-gcp-object --bucket=%s --object=%s --output-file=%s" % (
|
314
|
-
bucket_name,
|
315
|
-
gs_object,
|
316
|
-
target_file,
|
317
|
-
)
|
318
|
-
elif datastore_type == "local":
|
319
|
-
# For local storage, simply copy the file
|
320
|
-
return "cp %s %s" % (package_url, target_file)
|
321
|
-
else:
|
322
|
-
raise NotImplementedError(
|
323
|
-
f"Download command not implemented for datastore type: {datastore_type}"
|
324
|
-
)
|
325
|
-
|
326
|
-
def get_package_commands(
|
327
|
-
self,
|
328
|
-
code_package_url: str,
|
329
|
-
python_cmd: str = "python",
|
330
|
-
target_file: str = "job.tar",
|
331
|
-
working_dir: str = "metaflow",
|
332
|
-
retries: int = 5,
|
333
|
-
escape_quotes: bool = True,
|
334
|
-
) -> List[str]:
|
335
|
-
"""
|
336
|
-
Get a complete list of shell commands to download and extract a code package.
|
337
|
-
|
338
|
-
This method generates a comprehensive set of shell commands for downloading
|
339
|
-
and extracting a code package, similar to MetaflowEnvironment.get_package_commands.
|
340
|
-
|
341
|
-
Parameters
|
342
|
-
----------
|
343
|
-
code_package_url : str
|
344
|
-
The URL of the code package to download
|
345
|
-
python_cmd : str, optional
|
346
|
-
The Python command to use
|
347
|
-
target_file : str, optional
|
348
|
-
The target file name to save the package as
|
349
|
-
working_dir : str, optional
|
350
|
-
The directory to create and extract the package into
|
351
|
-
retries : int, optional
|
352
|
-
Number of download retries to attempt
|
353
|
-
escape_quotes : bool, optional
|
354
|
-
Whether to escape quotes in the command
|
355
|
-
|
356
|
-
Returns
|
357
|
-
-------
|
358
|
-
List[str]
|
359
|
-
List of shell commands to execute
|
360
|
-
"""
|
361
|
-
# Use the datastore_type from initialization if not provided
|
362
|
-
datastore_type = self._datastore_type
|
363
|
-
|
364
|
-
# Helper function to create dependency installation command
|
365
|
-
def _get_install_dependencies_cmd():
|
366
|
-
base_cmd = "{} -m pip install -qqq --no-compile --no-cache-dir --disable-pip-version-check".format(
|
367
|
-
python_cmd
|
368
|
-
)
|
369
|
-
|
370
|
-
datastore_packages = {
|
371
|
-
"s3": ["boto3"],
|
372
|
-
"azure": [
|
373
|
-
"azure-identity",
|
374
|
-
"azure-storage-blob",
|
375
|
-
"azure-keyvault-secrets",
|
376
|
-
"simple-azure-blob-downloader",
|
377
|
-
],
|
378
|
-
"gs": [
|
379
|
-
"google-cloud-storage",
|
380
|
-
"google-auth",
|
381
|
-
"simple-gcp-object-downloader",
|
382
|
-
"google-cloud-secret-manager",
|
383
|
-
],
|
384
|
-
"local": [],
|
385
|
-
}
|
386
|
-
|
387
|
-
if datastore_type not in datastore_packages:
|
388
|
-
raise NotImplementedError(
|
389
|
-
"Unknown datastore type: {}".format(datastore_type)
|
390
|
-
)
|
391
|
-
|
392
|
-
if not datastore_packages[datastore_type]:
|
393
|
-
return "# No dependencies required for local datastore"
|
394
|
-
|
395
|
-
cmd = "{} {}".format(
|
396
|
-
base_cmd, " ".join(datastore_packages[datastore_type] + ["requests"])
|
397
|
-
)
|
398
|
-
# Skip pip installs if we know packages might already be available
|
399
|
-
return "if [ -z $METAFLOW_SKIP_INSTALL_DEPENDENCIES ]; then {}; fi".format(
|
400
|
-
cmd
|
401
|
-
)
|
402
|
-
|
403
|
-
download_cmd = self.get_download_cmd(
|
404
|
-
code_package_url, datastore_type, python_cmd, target_file, escape_quotes
|
405
|
-
)
|
406
|
-
|
407
|
-
# Define the log functions for bash
|
408
|
-
bash_mflog = (
|
409
|
-
'function mflog() { echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")]" "$@"; }'
|
410
|
-
)
|
411
|
-
bash_flush_logs = 'function flush_mflogs() { echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] Flushing logs"; }'
|
412
|
-
|
413
|
-
cmds = [
|
414
|
-
bash_mflog,
|
415
|
-
bash_flush_logs,
|
416
|
-
"mflog 'Setting up task environment.'",
|
417
|
-
_get_install_dependencies_cmd(),
|
418
|
-
f"mkdir -p {working_dir}",
|
419
|
-
f"cd {working_dir}",
|
420
|
-
"mkdir -p .metaflow", # mute local datastore creation log
|
421
|
-
f"i=0; while [ $i -le {retries} ]; do "
|
422
|
-
"mflog 'Downloading code package...'; "
|
423
|
-
+ download_cmd
|
424
|
-
+ " && mflog 'Code package downloaded.' && break; "
|
425
|
-
"sleep 10; i=$((i+1)); "
|
426
|
-
"done",
|
427
|
-
f"if [ $i -gt {retries} ]; then "
|
428
|
-
"mflog 'Failed to download code package from %s "
|
429
|
-
f"after {retries+1} tries. Exiting...' && exit 1; "
|
430
|
-
"fi" % code_package_url,
|
431
|
-
"TAR_OPTIONS='--warning=no-timestamp' tar xf %s" % target_file,
|
432
|
-
"mflog 'Task is starting.'",
|
433
|
-
"flush_mflogs",
|
434
|
-
]
|
435
|
-
|
436
|
-
return cmds
|
437
|
-
|
438
|
-
@staticmethod
|
439
|
-
def directory_walker(
|
440
|
-
root,
|
441
|
-
exclude_hidden=True,
|
442
|
-
suffixes=None,
|
443
|
-
) -> List[Tuple[str, str]]:
|
444
|
-
"""
|
445
|
-
Walk a directory and yield tuples of (file_path, relative_arcname) for files
|
446
|
-
that match the given suffix filters. It will follow symlinks, but not cycles.
|
447
|
-
|
448
|
-
This function is similar to MetaflowPackage._walk and handles symlinks safely.
|
449
|
-
|
450
|
-
Parameters
|
451
|
-
----------
|
452
|
-
root : str
|
453
|
-
The root directory to walk
|
454
|
-
exclude_hidden : bool, default True
|
455
|
-
Whether to exclude hidden files and directories (those starting with '.')
|
456
|
-
suffixes : List[str], optional
|
457
|
-
List of file suffixes to include (e.g. ['.py', '.txt'])
|
458
|
-
|
459
|
-
Returns
|
460
|
-
-------
|
461
|
-
List[Tuple[str, str]]
|
462
|
-
List of tuples (file_path, relative_arcname) where:
|
463
|
-
- file_path is the full path to the file
|
464
|
-
- relative_arcname is the path to use within the archive
|
465
|
-
"""
|
466
|
-
files = []
|
467
|
-
for file_path, rel_path in symlink_friendly_walk(
|
468
|
-
root, exclude_hidden, suffixes
|
469
|
-
):
|
470
|
-
files.append((file_path, rel_path))
|
471
|
-
return files
|
472
|
-
|
473
|
-
@staticmethod
|
474
|
-
def default_package_create(
|
475
|
-
paths: List[str], suffixes: List[str], metadata: Optional[Dict[str, Any]] = None
|
476
|
-
) -> bytes:
|
477
|
-
"""
|
478
|
-
Create a default tarball package from specified paths.
|
479
|
-
|
480
|
-
Parameters
|
481
|
-
----------
|
482
|
-
paths : List[str]
|
483
|
-
List of paths to include in the package
|
484
|
-
suffixes : List[str]
|
485
|
-
List of file suffixes to include
|
486
|
-
metadata : Dict[str, Any], optional
|
487
|
-
Metadata to include in the package
|
488
|
-
|
489
|
-
Returns
|
490
|
-
-------
|
491
|
-
bytes
|
492
|
-
The binary content of the tarball
|
493
|
-
"""
|
494
|
-
buf = BytesIO()
|
495
|
-
|
496
|
-
with tarfile.open(fileobj=buf, mode="w:gz", compresslevel=3) as tar:
|
497
|
-
# Add metadata if provided
|
498
|
-
if metadata:
|
499
|
-
metadata_buf = BytesIO()
|
500
|
-
metadata_buf.write(json.dumps(metadata).encode("utf-8"))
|
501
|
-
metadata_buf.seek(0)
|
502
|
-
info = tarfile.TarInfo("metadata.json")
|
503
|
-
info.size = len(metadata_buf.getvalue())
|
504
|
-
info.mtime = 1747158696 # 13 May 2025 10:31:36 (so that we dont have a changing hash everytime we run)
|
505
|
-
tar.addfile(info, metadata_buf)
|
506
|
-
|
507
|
-
def no_mtime(tarinfo):
|
508
|
-
# a modification time change should not change the hash of
|
509
|
-
# the package. Only content modifications will.
|
510
|
-
# Setting this default to Dec 3, 2019
|
511
|
-
tarinfo.mtime = 1747158696 # 13 May 2025 10:31:36 (so that we dont have a changing hash everytime we run)
|
512
|
-
return tarinfo
|
513
|
-
|
514
|
-
# Add files from specified paths
|
515
|
-
for path in paths:
|
516
|
-
if os.path.isdir(path):
|
517
|
-
# Use directory_walker for directories to handle symlinks properly
|
518
|
-
for file_path, rel_path in CodePackager.directory_walker(
|
519
|
-
path,
|
520
|
-
exclude_hidden=True,
|
521
|
-
suffixes=suffixes,
|
522
|
-
):
|
523
|
-
tar.add(
|
524
|
-
file_path,
|
525
|
-
arcname=rel_path,
|
526
|
-
filter=no_mtime,
|
527
|
-
recursive=False,
|
528
|
-
)
|
529
|
-
elif os.path.isfile(path):
|
530
|
-
if any(path.endswith(suffix) for suffix in suffixes):
|
531
|
-
tar.add(path, arcname=os.path.basename(path))
|
532
|
-
|
533
|
-
tarball = bytearray(buf.getvalue())
|
534
|
-
tarball[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts
|
535
|
-
return tarball
|
536
|
-
|
537
|
-
@staticmethod
|
538
|
-
def _add_tar_file(tar, filename, buf):
|
539
|
-
tarinfo = tarfile.TarInfo(name=filename)
|
540
|
-
tarinfo.size = len(buf.getvalue())
|
541
|
-
buf.seek(0)
|
542
|
-
tarinfo.mtime = 1747158696 # 13 May 2025 10:31:36 (so that we dont have a changing hash everytime we run)
|
543
|
-
tar.addfile(tarinfo, fileobj=buf)
|
544
|
-
|
545
|
-
@classmethod
|
546
|
-
def package_directory(
|
547
|
-
cls,
|
548
|
-
directory_path: str,
|
549
|
-
suffixes: Optional[List[str]] = None,
|
550
|
-
exclude_hidden: bool = True,
|
551
|
-
metadata: Optional[Dict[str, Any]] = None,
|
552
|
-
) -> bytes:
|
553
|
-
"""
|
554
|
-
Package a directory and all of its contents that match the given suffixes.
|
555
|
-
|
556
|
-
This is a convenience method that works similarly to MetaflowPackage._walk
|
557
|
-
to package a directory for deployment. Will default follow_symlinks.
|
558
|
-
|
559
|
-
Parameters
|
560
|
-
----------
|
561
|
-
directory_path : str
|
562
|
-
The directory to package
|
563
|
-
suffixes : List[str], optional
|
564
|
-
List of file suffixes to include (defaults to standard code extensions)
|
565
|
-
exclude_hidden : bool, default True
|
566
|
-
Whether to exclude hidden files and directories
|
567
|
-
metadata : Dict[str, Any], optional
|
568
|
-
Metadata to include in the package
|
569
|
-
Returns
|
570
|
-
-------
|
571
|
-
bytes
|
572
|
-
The binary content of the tarball
|
573
|
-
"""
|
574
|
-
if not os.path.isdir(directory_path):
|
575
|
-
raise ValueError(f"The path '{directory_path}' is not a directory")
|
576
|
-
|
577
|
-
# Use default suffixes if none provided
|
578
|
-
if suffixes is None:
|
579
|
-
suffixes = [".py", ".txt", ".yaml", ".yml", ".json"]
|
580
|
-
|
581
|
-
buf = BytesIO()
|
582
|
-
|
583
|
-
def no_mtime(tarinfo):
|
584
|
-
# a modification time change should not change the hash of
|
585
|
-
# the package. Only content modifications will.
|
586
|
-
# Setting this to a fixed date so that we don't have a changing hash everytime we run
|
587
|
-
tarinfo.mtime = 1747158696 # 13 May 2025 10:31:36
|
588
|
-
return tarinfo
|
589
|
-
|
590
|
-
with tarfile.open(
|
591
|
-
fileobj=buf, mode="w:gz", compresslevel=3, dereference=True
|
592
|
-
) as tar:
|
593
|
-
# Add metadata if provided
|
594
|
-
if metadata:
|
595
|
-
cls._add_tar_file(
|
596
|
-
tar, "metadata.json", BytesIO(json.dumps(metadata).encode("utf-8"))
|
597
|
-
)
|
598
|
-
|
599
|
-
# Walk the directory and add matching files
|
600
|
-
for file_path, rel_path in cls.directory_walker(
|
601
|
-
directory_path,
|
602
|
-
exclude_hidden=exclude_hidden,
|
603
|
-
suffixes=suffixes,
|
604
|
-
):
|
605
|
-
# Remove debug print statement
|
606
|
-
tar.add(file_path, arcname=rel_path, recursive=False, filter=no_mtime)
|
607
|
-
|
608
|
-
tarball = bytearray(buf.getvalue())
|
609
|
-
tarball[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts
|
610
|
-
return tarball
|