ob-metaflow-extensions 1.1.151__py2.py3-none-any.whl → 1.6.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. metaflow_extensions/outerbounds/__init__.py +1 -1
  2. metaflow_extensions/outerbounds/plugins/__init__.py +24 -3
  3. metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
  4. metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +16 -0
  5. metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
  6. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
  7. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
  8. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
  9. metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +128 -0
  10. metaflow_extensions/outerbounds/plugins/apps/core/app_deploy_decorator.py +333 -0
  11. metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
  12. metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +1029 -0
  13. metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
  14. metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
  15. metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
  16. metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
  17. metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +15 -0
  18. metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +165 -0
  19. metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +966 -0
  20. metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +299 -0
  21. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +233 -0
  22. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +537 -0
  23. metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1125 -0
  24. metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
  25. metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
  26. metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +1300 -0
  27. metaflow_extensions/outerbounds/plugins/apps/core/exceptions.py +341 -0
  28. metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
  29. metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +123 -0
  30. metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
  31. metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
  32. metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
  33. metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
  34. metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
  35. metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +118 -0
  36. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +9 -77
  37. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
  38. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +7 -78
  39. metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +119 -0
  40. metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +17 -3
  41. metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
  42. metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
  43. metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
  44. metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
  45. metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
  46. metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
  47. metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
  48. metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
  49. metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +32 -8
  50. metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +1 -1
  51. metaflow_extensions/outerbounds/plugins/ollama/__init__.py +171 -16
  52. metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
  53. metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
  54. metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1710 -114
  55. metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
  56. metaflow_extensions/outerbounds/plugins/optuna/__init__.py +49 -0
  57. metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
  58. metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
  59. metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
  60. metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
  61. metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
  62. metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
  63. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
  64. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
  65. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
  66. metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +37 -7
  67. metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +18 -8
  68. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +6 -0
  69. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +45 -18
  70. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +18 -9
  71. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +10 -4
  72. metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
  73. metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
  74. metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
  75. metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
  76. metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
  77. metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
  78. metaflow_extensions/outerbounds/remote_config.py +46 -9
  79. metaflow_extensions/outerbounds/toplevel/apps/__init__.py +9 -0
  80. metaflow_extensions/outerbounds/toplevel/apps/exceptions.py +11 -0
  81. metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +86 -2
  82. metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
  83. metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
  84. metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
  85. metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
  86. metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
  87. {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/METADATA +2 -2
  88. ob_metaflow_extensions-1.6.2.dist-info/RECORD +136 -0
  89. metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
  90. ob_metaflow_extensions-1.1.151.dist-info/RECORD +0 -74
  91. {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/WHEEL +0 -0
  92. {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,24 @@
1
+ """
2
+ The purpose of this file is a little bit of cleverness to allow us to use the CLI in this package across
3
+ metaflow and outerbounds projects.
4
+
5
+ The issue is that since outerbounds and metaflow both vendor click, we can't use object from one import path
6
+ and expect them to work with objects created from the other import path.
7
+
8
+ Meaning `outerbounds._vendor.click.Group` and `metaflow._vendor.click.Group` are different classes.
9
+ So we need to ensure that based on when the import is taking place, we import the correct class.
10
+
11
+ Overall, this ONLY affects constructs in click we are using to construct related to the cli decorators but
12
+ it doesn't affect any capabilities in click for logging.
13
+ """
14
+ import os
15
+
16
+ # Import Hacks
17
+ if os.environ.get("APPS_CLI_LOADING_IN_OUTERBOUNDS", None):
18
+ from outerbounds._vendor import click as outerbounds_click
19
+
20
+ click = outerbounds_click # type: ignore
21
+ else:
22
+ from metaflow._vendor import click as metaflow_click
23
+
24
+ click = metaflow_click # type: ignore
@@ -0,0 +1,3 @@
1
+ from .code_packager import CodePackager
2
+
3
+ __all__ = ["CodePackager"]
@@ -0,0 +1,618 @@
1
+ import os
2
+ import sys
3
+ import time
4
+ import tarfile
5
+ import json
6
+ from io import BytesIO
7
+ from typing import List, Tuple, Dict, Any, Optional, Callable, Union
8
+
9
+ from metaflow.datastore.content_addressed_store import ContentAddressedStore
10
+ from metaflow.util import to_unicode
11
+ from metaflow.metaflow_config import (
12
+ DATASTORE_SYSROOT_S3,
13
+ DATASTORE_SYSROOT_AZURE,
14
+ DATASTORE_SYSROOT_GS,
15
+ DATASTORE_SYSROOT_LOCAL,
16
+ )
17
+
18
+ DEFAULT_FILE_SUFFIXES = [
19
+ ".py",
20
+ ".txt",
21
+ ".yaml",
22
+ ".yml",
23
+ ".json",
24
+ ".html",
25
+ ".css",
26
+ ".js",
27
+ ".jsx",
28
+ ".ts",
29
+ ".tsx",
30
+ ".md",
31
+ ".rst",
32
+ ]
33
+ # Default prefix for code packages in content addressed store
34
+ CODE_PACKAGE_PREFIX = "apps-code-packages"
35
+
36
+
37
+ # this is os.walk(follow_symlinks=True) with cycle detection
38
+ def walk_without_cycles(top_root):
39
+ seen = set()
40
+
41
+ def _recurse(root):
42
+ for parent, dirs, files in os.walk(root):
43
+ for d in dirs:
44
+ path = os.path.join(parent, d)
45
+ if os.path.islink(path):
46
+ # Breaking loops: never follow the same symlink twice
47
+ #
48
+ # NOTE: this also means that links to sibling links are
49
+ # not followed. In this case:
50
+ #
51
+ # x -> y
52
+ # y -> oo
53
+ # oo/real_file
54
+ #
55
+ # real_file is only included twice, not three times
56
+ reallink = os.path.realpath(path)
57
+ if reallink not in seen:
58
+ seen.add(reallink)
59
+ for x in _recurse(path):
60
+ yield x
61
+ yield parent, files
62
+
63
+ for x in _recurse(top_root):
64
+ yield x
65
+
66
+
67
+ def symlink_friendly_walk(root, exclude_hidden=True, suffixes=None):
68
+ if suffixes is None:
69
+ suffixes = []
70
+ root = to_unicode(root) # handle files/folder with non ascii chars
71
+ prefixlen = len("%s/" % os.path.dirname(root))
72
+ for (
73
+ path,
74
+ files,
75
+ ) in walk_without_cycles(root):
76
+ if exclude_hidden and "/." in path:
77
+ continue
78
+ # path = path[2:] # strip the ./ prefix
79
+ # if path and (path[0] == '.' or './' in path):
80
+ # continue
81
+ for fname in files:
82
+ if (fname[0] == "." and fname in suffixes) or (
83
+ fname[0] != "." and any(fname.endswith(suffix) for suffix in suffixes)
84
+ ):
85
+ p = os.path.join(path, fname)
86
+ yield p, p[prefixlen:]
87
+
88
+
89
+ class CodePackager:
90
+ """
91
+ A datastore-agnostic class for packaging code.
92
+
93
+ This class handles creating a code package (tarball) for deployment
94
+ and provides methods for storing and retrieving it using Metaflow's
95
+ ContentAddressedStore directly.
96
+
97
+ Usage examples:
98
+ ```python
99
+ packager = CodePackager(
100
+ datastore_type: str = "s3",
101
+ datastore_root = None,
102
+ code_package_prefix = None,
103
+ )
104
+
105
+ package_url, package_key = packager.store(
106
+ paths_to_include = ["./"],
107
+ file_suffixes = [".py", ".txt", ".yaml", ".yml", ".json"],
108
+ )
109
+
110
+ package_url, package_key = packager.store(
111
+ package_create_fn = lambda: my_custom_package_create_fn(),
112
+ )
113
+ ```
114
+ """
115
+
116
+ def __init__(
117
+ self,
118
+ datastore_type: str = "s3",
119
+ datastore_root: Optional[str] = None,
120
+ code_package_prefix: Optional[str] = None,
121
+ ):
122
+ """
123
+ Initialize the CodePackager with datastore configuration.
124
+
125
+ Parameters
126
+ ----------
127
+ datastore_type : str, default "s3"
128
+ The type of datastore to use: "s3", "azure", "gs", or "local"
129
+ datastore_root : str, optional
130
+ Root path for the datastore. If not provided, uses the default for the datastore type.
131
+ code_package_prefix : str, optional
132
+ The prefix to use for storing code packages in the content addressed store.
133
+ If not provided, uses the CODE_PACKAGE_PREFIX configuration value.
134
+ """
135
+ self._datastore_type = datastore_type
136
+ self._datastore_root = datastore_root
137
+ self._code_package_prefix = code_package_prefix
138
+
139
+ def store(
140
+ self,
141
+ package_create_fn: Optional[Callable[[], bytes]] = None,
142
+ paths_to_include: Optional[List[str]] = None,
143
+ file_suffixes: Optional[List[str]] = None,
144
+ metadata: Optional[Dict[str, Any]] = None,
145
+ ) -> Tuple[str, str]:
146
+ """
147
+ Create and store a code package using Metaflow's ContentAddressedStore.
148
+
149
+ This method can be called in two ways:
150
+ 1. With paths_to_include and file_suffixes to use the default packaging
151
+ 2. With a custom package_create_fn for custom packaging logic
152
+
153
+ Parameters
154
+ ----------
155
+ package_create_fn : Callable[[], bytes], optional
156
+ A function that creates and returns a package as bytes.
157
+ This allows for custom packaging logic without dependency on specific objects.
158
+ paths_to_include : List[str], optional
159
+ List of paths to include in the package. Used by default_package_create.
160
+ file_suffixes : List[str], optional
161
+ List of file suffixes to include. Used by default_package_create.
162
+ metadata : Dict[str, Any], optional
163
+ Metadata to include in the package when using default_package_create.
164
+
165
+ Returns
166
+ -------
167
+ Tuple[str, str]
168
+ A tuple containing (package_url, package_key) that identifies the location
169
+ and content-addressed key of the stored package.
170
+ """
171
+ # Prepare default values
172
+ _paths_to_include = paths_to_include or []
173
+ _file_suffixes = file_suffixes or DEFAULT_FILE_SUFFIXES
174
+ _metadata = metadata or {}
175
+
176
+ # If no package_create_fn provided, use default_package_create
177
+ if package_create_fn is None:
178
+ _package_create_fn = lambda: self.default_package_create(
179
+ _paths_to_include, _file_suffixes, _metadata
180
+ )
181
+ else:
182
+ _package_create_fn = package_create_fn
183
+
184
+ # Create the package
185
+ code_package = _package_create_fn()
186
+
187
+ # Get the ContentAddressedStore for the specified datastore
188
+ ca_store = self.get_content_addressed_store(
189
+ datastore_type=self._datastore_type,
190
+ datastore_root=self._datastore_root,
191
+ prefix=(
192
+ str(self._code_package_prefix)
193
+ if self._code_package_prefix is not None
194
+ else str(CODE_PACKAGE_PREFIX)
195
+ ),
196
+ )
197
+
198
+ # Store the package using raw=True to ensure we can access it directly via URL
199
+ results = ca_store.save_blobs([code_package], raw=True, len_hint=1)
200
+ package_url, package_key = results[0].uri, results[0].key
201
+
202
+ return package_url, package_key
203
+
204
+ @staticmethod
205
+ def get_content_addressed_store(
206
+ datastore_type: str = "s3",
207
+ datastore_root: Optional[str] = None,
208
+ prefix: Optional[str] = None,
209
+ ) -> ContentAddressedStore:
210
+ """
211
+ Get a ContentAddressedStore instance for the specified datastore.
212
+
213
+ Parameters
214
+ ----------
215
+ datastore_type : str, default "s3"
216
+ Type of datastore: "s3", "azure", "gs", or "local"
217
+ datastore_root : str, optional
218
+ Root path for the datastore. If not provided, uses the default for the datastore type.
219
+ prefix : str, optional
220
+ Prefix to use when storing objects in the datastore.
221
+ If not provided, uses the CODE_PACKAGE_PREFIX configuration value.
222
+
223
+ Returns
224
+ -------
225
+ ContentAddressedStore
226
+ A ContentAddressedStore instance configured for the specified datastore
227
+ """
228
+ from metaflow.plugins import DATASTORES
229
+
230
+ datastore_impls = [i for i in DATASTORES if i.TYPE == datastore_type]
231
+ if len(datastore_impls) == 0:
232
+ raise ValueError(f"Unsupported datastore type: {datastore_type}")
233
+ if len(datastore_impls) > 1:
234
+ raise ValueError(
235
+ f"Multiple datastore implementations found for type: {datastore_type}"
236
+ )
237
+ datastore_impl = datastore_impls[0]
238
+ root = None
239
+ # Import the storage implementation based on datastore_type
240
+ if datastore_type == "s3":
241
+ root = datastore_root or DATASTORE_SYSROOT_S3
242
+ elif datastore_type == "azure":
243
+ root = datastore_root or DATASTORE_SYSROOT_AZURE
244
+ elif datastore_type == "gs":
245
+ root = datastore_root or DATASTORE_SYSROOT_GS
246
+ elif datastore_type == "local":
247
+ root = datastore_root or DATASTORE_SYSROOT_LOCAL
248
+
249
+ # Ensure prefix is a string
250
+ store_prefix = str(prefix) if prefix is not None else str(CODE_PACKAGE_PREFIX)
251
+
252
+ storage_impl = datastore_impl(root=root)
253
+ # Create and return a ContentAddressedStore
254
+ return ContentAddressedStore(prefix=store_prefix, storage_impl=storage_impl)
255
+
256
+ @staticmethod
257
+ def get_download_cmd(
258
+ package_url: str,
259
+ datastore_type: str,
260
+ python_cmd: str = "python",
261
+ target_file: str = "job.tar",
262
+ escape_quotes: bool = True,
263
+ ) -> str:
264
+ """
265
+ Generate a command to download the code package.
266
+
267
+ Parameters
268
+ ----------
269
+ package_url : str
270
+ The URL of the package to download
271
+ datastore_type : str
272
+ The type of datastore (s3, azure, gs, local)
273
+ python_cmd : str, optional
274
+ The Python command to use
275
+ target_file : str, optional
276
+ The target file name to save the package as
277
+ escape_quotes : bool, optional
278
+ Whether to escape quotes in the command
279
+
280
+ Returns
281
+ -------
282
+ str
283
+ A shell command string to download the package
284
+ """
285
+ if datastore_type == "s3":
286
+ from metaflow.plugins.aws.aws_utils import parse_s3_full_path
287
+
288
+ bucket, s3_object = parse_s3_full_path(package_url)
289
+ # Simplify the script and use single quotes to avoid shell escaping issues
290
+ script = 'import boto3, os; ep=os.getenv({quote}METAFLOW_S3_ENDPOINT_URL{quote}); boto3.client("s3", **({{"endpoint_url":ep}} if ep else {{}})).download_file({quote}{bucket}{quote}, {quote}{s3_object}{quote}, {quote}{target_file}{quote})'.format(
291
+ quote='\\"' if escape_quotes else '"',
292
+ bucket=bucket,
293
+ s3_object=s3_object,
294
+ target_file=target_file,
295
+ )
296
+ # Format the command with proper quoting
297
+ return f"{python_cmd} -c '{script}'"
298
+ elif datastore_type == "azure":
299
+ from metaflow.plugins.azure.azure_utils import parse_azure_full_path
300
+
301
+ container_name, blob = parse_azure_full_path(package_url)
302
+ # remove a trailing slash, if present
303
+ blob_endpoint = "${METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT%/}"
304
+ return "download-azure-blob --blob-endpoint={blob_endpoint} --container={container} --blob={blob} --output-file={target}".format(
305
+ blob_endpoint=blob_endpoint,
306
+ blob=blob,
307
+ container=container_name,
308
+ target=target_file,
309
+ )
310
+ elif datastore_type == "gs":
311
+ from metaflow.plugins.gcp.gs_utils import parse_gs_full_path
312
+
313
+ bucket_name, gs_object = parse_gs_full_path(package_url)
314
+ return "download-gcp-object --bucket=%s --object=%s --output-file=%s" % (
315
+ bucket_name,
316
+ gs_object,
317
+ target_file,
318
+ )
319
+ elif datastore_type == "local":
320
+ # For local storage, simply copy the file
321
+ return "cp %s %s" % (package_url, target_file)
322
+ else:
323
+ raise NotImplementedError(
324
+ f"Download command not implemented for datastore type: {datastore_type}"
325
+ )
326
+
327
+ def get_package_commands(
328
+ self,
329
+ code_package_url: str,
330
+ python_cmd: str = "python",
331
+ target_file: str = "job.tar",
332
+ working_dir: str = "metaflow",
333
+ retries: int = 5,
334
+ escape_quotes: bool = True,
335
+ ) -> List[str]:
336
+ """
337
+ Get a complete list of shell commands to download and extract a code package.
338
+
339
+ This method generates a comprehensive set of shell commands for downloading
340
+ and extracting a code package, similar to MetaflowEnvironment.get_package_commands.
341
+
342
+ Parameters
343
+ ----------
344
+ code_package_url : str
345
+ The URL of the code package to download
346
+ python_cmd : str, optional
347
+ The Python command to use
348
+ target_file : str, optional
349
+ The target file name to save the package as
350
+ working_dir : str, optional
351
+ The directory to create and extract the package into
352
+ retries : int, optional
353
+ Number of download retries to attempt
354
+ escape_quotes : bool, optional
355
+ Whether to escape quotes in the command
356
+
357
+ Returns
358
+ -------
359
+ List[str]
360
+ List of shell commands to execute
361
+ """
362
+ # Use the datastore_type from initialization if not provided
363
+ datastore_type = self._datastore_type
364
+
365
+ # Helper function to create dependency installation command
366
+ def _get_install_dependencies_cmd():
367
+ base_cmd = "{} -m pip install -qqq --no-compile --no-cache-dir --disable-pip-version-check".format(
368
+ python_cmd
369
+ )
370
+
371
+ datastore_packages = {
372
+ "s3": ["boto3"],
373
+ "azure": [
374
+ "azure-identity",
375
+ "azure-storage-blob",
376
+ "azure-keyvault-secrets",
377
+ "simple-azure-blob-downloader",
378
+ ],
379
+ "gs": [
380
+ "google-cloud-storage",
381
+ "google-auth",
382
+ "simple-gcp-object-downloader",
383
+ "google-cloud-secret-manager",
384
+ "packaging",
385
+ ],
386
+ "local": [],
387
+ }
388
+
389
+ if datastore_type not in datastore_packages:
390
+ raise NotImplementedError(
391
+ "Unknown datastore type: {}".format(datastore_type)
392
+ )
393
+
394
+ if not datastore_packages[datastore_type]:
395
+ return "# No dependencies required for local datastore"
396
+
397
+ cmd = "{} {}".format(
398
+ base_cmd, " ".join(datastore_packages[datastore_type] + ["requests"])
399
+ )
400
+ # Skip pip installs if we know packages might already be available
401
+ return "if [ -z $METAFLOW_SKIP_INSTALL_DEPENDENCIES ]; then {}; fi".format(
402
+ cmd
403
+ )
404
+
405
+ download_cmd = self.get_download_cmd(
406
+ code_package_url, datastore_type, python_cmd, target_file, escape_quotes
407
+ )
408
+
409
+ # Define the log functions for bash
410
+ bash_mflog = (
411
+ 'function mflog() { echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")]" "$@"; }'
412
+ )
413
+ bash_flush_logs = 'function flush_mflogs() { echo "[$(date -u +"%Y-%m-%dT%H:%M:%SZ")] Flushing logs"; }'
414
+
415
+ cmds = [
416
+ bash_mflog,
417
+ bash_flush_logs,
418
+ "mflog 'Setting up task environment.'",
419
+ _get_install_dependencies_cmd(),
420
+ f"mkdir -p {working_dir}",
421
+ f"cd {working_dir}",
422
+ "mkdir -p .metaflow", # mute local datastore creation log
423
+ f"i=0; while [ $i -le {retries} ]; do "
424
+ "mflog 'Downloading code package...'; "
425
+ + download_cmd
426
+ + " && mflog 'Code package downloaded.' && break; "
427
+ "sleep 10; i=$((i+1)); "
428
+ "done",
429
+ f"if [ $i -gt {retries} ]; then "
430
+ "mflog 'Failed to download code package from %s "
431
+ f"after {retries+1} tries. Exiting...' && exit 1; "
432
+ "fi" % code_package_url,
433
+ "TAR_OPTIONS='--warning=no-timestamp' tar xf %s" % target_file,
434
+ "mflog 'Task is starting.'",
435
+ "flush_mflogs",
436
+ ]
437
+
438
+ return cmds
439
+
440
+ @staticmethod
441
+ def directory_walker(
442
+ root,
443
+ exclude_hidden=True,
444
+ suffixes=None,
445
+ normalized_rel_path=False,
446
+ ) -> List[Tuple[str, str]]:
447
+ """
448
+ Walk a directory and yield tuples of (file_path, relative_arcname) for files
449
+ that match the given suffix filters. It will follow symlinks, but not cycles.
450
+
451
+ This function is similar to MetaflowPackage._walk and handles symlinks safely.
452
+
453
+ Parameters
454
+ ----------
455
+ root : str
456
+ The root directory to walk
457
+ exclude_hidden : bool, default True
458
+ Whether to exclude hidden files and directories (those starting with '.')
459
+ suffixes : List[str], optional
460
+ List of file suffixes to include (e.g. ['.py', '.txt'])
461
+ normalized_rel_path : bool, default False
462
+ Whether to normalize the relative from the root. ie if the root is /a/b/c and the file is /a/b/c/d/e.py then the relative path will be d/e.py
463
+
464
+ Returns
465
+ -------
466
+ List[Tuple[str, str]]
467
+ List of tuples (file_path, relative_arcname) where:
468
+ - file_path is the full path to the file
469
+ - relative_arcname is the path to use within the archive
470
+ """
471
+ files = []
472
+ for file_path, rel_path in symlink_friendly_walk(
473
+ root, exclude_hidden, suffixes
474
+ ):
475
+ if normalized_rel_path:
476
+ rel_path = file_path.replace(root, "")
477
+ files.append((file_path, rel_path))
478
+ return files
479
+
480
+ @staticmethod
481
+ def default_package_create(
482
+ paths: List[str], suffixes: List[str], metadata: Optional[Dict[str, Any]] = None
483
+ ) -> bytes:
484
+ """
485
+ Create a default tarball package from specified paths.
486
+
487
+ Parameters
488
+ ----------
489
+ paths : List[str]
490
+ List of paths to include in the package
491
+ suffixes : List[str]
492
+ List of file suffixes to include
493
+ metadata : Dict[str, Any], optional
494
+ Metadata to include in the package
495
+
496
+ Returns
497
+ -------
498
+ bytes
499
+ The binary content of the tarball
500
+ """
501
+ buf = BytesIO()
502
+
503
+ with tarfile.open(fileobj=buf, mode="w:gz", compresslevel=3) as tar:
504
+ # Add metadata if provided
505
+ if metadata:
506
+ metadata_buf = BytesIO()
507
+ metadata_buf.write(json.dumps(metadata).encode("utf-8"))
508
+ metadata_buf.seek(0)
509
+ info = tarfile.TarInfo("metadata.json")
510
+ info.size = len(metadata_buf.getvalue())
511
+ info.mtime = 1747158696 # 13 May 2025 10:31:36 (so that we dont have a changing hash everytime we run)
512
+ tar.addfile(info, metadata_buf)
513
+
514
+ def no_mtime(tarinfo):
515
+ # a modification time change should not change the hash of
516
+ # the package. Only content modifications will.
517
+ # Setting this default to Dec 3, 2019
518
+ tarinfo.mtime = 1747158696 # 13 May 2025 10:31:36 (so that we dont have a changing hash everytime we run)
519
+ return tarinfo
520
+
521
+ # Add files from specified paths
522
+ for path in paths:
523
+ if os.path.isdir(path):
524
+ # Use directory_walker for directories to handle symlinks properly
525
+ for file_path, rel_path in CodePackager.directory_walker(
526
+ path,
527
+ exclude_hidden=True,
528
+ suffixes=suffixes,
529
+ normalized_rel_path=True,
530
+ ):
531
+ tar.add(
532
+ file_path,
533
+ arcname=rel_path,
534
+ filter=no_mtime,
535
+ recursive=False,
536
+ )
537
+ elif os.path.isfile(path):
538
+ if any(path.endswith(suffix) for suffix in suffixes):
539
+ tar.add(path, arcname=os.path.basename(path))
540
+
541
+ tarball = bytearray(buf.getvalue())
542
+ tarball[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts
543
+ return tarball
544
+
545
+ @staticmethod
546
+ def _add_tar_file(tar, filename, buf):
547
+ tarinfo = tarfile.TarInfo(name=filename)
548
+ tarinfo.size = len(buf.getvalue())
549
+ buf.seek(0)
550
+ tarinfo.mtime = 1747158696 # 13 May 2025 10:31:36 (so that we dont have a changing hash everytime we run)
551
+ tar.addfile(tarinfo, fileobj=buf)
552
+
553
+ @classmethod
554
+ def package_directory(
555
+ cls,
556
+ directory_path: str,
557
+ suffixes: Optional[List[str]] = None,
558
+ exclude_hidden: bool = True,
559
+ metadata: Optional[Dict[str, Any]] = None,
560
+ ) -> bytes:
561
+ """
562
+ Package a directory and all of its contents that match the given suffixes.
563
+
564
+ This is a convenience method that works similarly to MetaflowPackage._walk
565
+ to package a directory for deployment. Will default follow_symlinks.
566
+
567
+ Parameters
568
+ ----------
569
+ directory_path : str
570
+ The directory to package
571
+ suffixes : List[str], optional
572
+ List of file suffixes to include (defaults to standard code extensions)
573
+ exclude_hidden : bool, default True
574
+ Whether to exclude hidden files and directories
575
+ metadata : Dict[str, Any], optional
576
+ Metadata to include in the package
577
+ Returns
578
+ -------
579
+ bytes
580
+ The binary content of the tarball
581
+ """
582
+ if not os.path.isdir(directory_path):
583
+ raise ValueError(f"The path '{directory_path}' is not a directory")
584
+
585
+ # Use default suffixes if none provided
586
+ if suffixes is None:
587
+ suffixes = [".py", ".txt", ".yaml", ".yml", ".json"]
588
+
589
+ buf = BytesIO()
590
+
591
+ def no_mtime(tarinfo):
592
+ # a modification time change should not change the hash of
593
+ # the package. Only content modifications will.
594
+ # Setting this to a fixed date so that we don't have a changing hash everytime we run
595
+ tarinfo.mtime = 1747158696 # 13 May 2025 10:31:36
596
+ return tarinfo
597
+
598
+ with tarfile.open(
599
+ fileobj=buf, mode="w:gz", compresslevel=3, dereference=True
600
+ ) as tar:
601
+ # Add metadata if provided
602
+ if metadata:
603
+ cls._add_tar_file(
604
+ tar, "metadata.json", BytesIO(json.dumps(metadata).encode("utf-8"))
605
+ )
606
+
607
+ # Walk the directory and add matching files
608
+ for file_path, rel_path in cls.directory_walker(
609
+ directory_path,
610
+ exclude_hidden=exclude_hidden,
611
+ suffixes=suffixes,
612
+ ):
613
+ # Remove debug print statement
614
+ tar.add(file_path, arcname=rel_path, recursive=False, filter=no_mtime)
615
+
616
+ tarball = bytearray(buf.getvalue())
617
+ tarball[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts
618
+ return tarball