flyte 2.0.0b32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (204) hide show
  1. flyte/__init__.py +108 -0
  2. flyte/_bin/__init__.py +0 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +195 -0
  5. flyte/_bin/serve.py +178 -0
  6. flyte/_build.py +26 -0
  7. flyte/_cache/__init__.py +12 -0
  8. flyte/_cache/cache.py +147 -0
  9. flyte/_cache/defaults.py +9 -0
  10. flyte/_cache/local_cache.py +216 -0
  11. flyte/_cache/policy_function_body.py +42 -0
  12. flyte/_code_bundle/__init__.py +8 -0
  13. flyte/_code_bundle/_ignore.py +121 -0
  14. flyte/_code_bundle/_packaging.py +218 -0
  15. flyte/_code_bundle/_utils.py +347 -0
  16. flyte/_code_bundle/bundle.py +266 -0
  17. flyte/_constants.py +1 -0
  18. flyte/_context.py +155 -0
  19. flyte/_custom_context.py +73 -0
  20. flyte/_debug/__init__.py +0 -0
  21. flyte/_debug/constants.py +38 -0
  22. flyte/_debug/utils.py +17 -0
  23. flyte/_debug/vscode.py +307 -0
  24. flyte/_deploy.py +408 -0
  25. flyte/_deployer.py +109 -0
  26. flyte/_doc.py +29 -0
  27. flyte/_docstring.py +32 -0
  28. flyte/_environment.py +122 -0
  29. flyte/_excepthook.py +37 -0
  30. flyte/_group.py +32 -0
  31. flyte/_hash.py +8 -0
  32. flyte/_image.py +1055 -0
  33. flyte/_initialize.py +628 -0
  34. flyte/_interface.py +119 -0
  35. flyte/_internal/__init__.py +3 -0
  36. flyte/_internal/controllers/__init__.py +129 -0
  37. flyte/_internal/controllers/_local_controller.py +239 -0
  38. flyte/_internal/controllers/_trace.py +48 -0
  39. flyte/_internal/controllers/remote/__init__.py +58 -0
  40. flyte/_internal/controllers/remote/_action.py +211 -0
  41. flyte/_internal/controllers/remote/_client.py +47 -0
  42. flyte/_internal/controllers/remote/_controller.py +583 -0
  43. flyte/_internal/controllers/remote/_core.py +465 -0
  44. flyte/_internal/controllers/remote/_informer.py +381 -0
  45. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  46. flyte/_internal/imagebuild/__init__.py +3 -0
  47. flyte/_internal/imagebuild/docker_builder.py +706 -0
  48. flyte/_internal/imagebuild/image_builder.py +277 -0
  49. flyte/_internal/imagebuild/remote_builder.py +386 -0
  50. flyte/_internal/imagebuild/utils.py +78 -0
  51. flyte/_internal/resolvers/__init__.py +0 -0
  52. flyte/_internal/resolvers/_task_module.py +21 -0
  53. flyte/_internal/resolvers/common.py +31 -0
  54. flyte/_internal/resolvers/default.py +28 -0
  55. flyte/_internal/runtime/__init__.py +0 -0
  56. flyte/_internal/runtime/convert.py +486 -0
  57. flyte/_internal/runtime/entrypoints.py +204 -0
  58. flyte/_internal/runtime/io.py +188 -0
  59. flyte/_internal/runtime/resources_serde.py +152 -0
  60. flyte/_internal/runtime/reuse.py +125 -0
  61. flyte/_internal/runtime/rusty.py +193 -0
  62. flyte/_internal/runtime/task_serde.py +362 -0
  63. flyte/_internal/runtime/taskrunner.py +209 -0
  64. flyte/_internal/runtime/trigger_serde.py +160 -0
  65. flyte/_internal/runtime/types_serde.py +54 -0
  66. flyte/_keyring/__init__.py +0 -0
  67. flyte/_keyring/file.py +115 -0
  68. flyte/_logging.py +300 -0
  69. flyte/_map.py +312 -0
  70. flyte/_module.py +72 -0
  71. flyte/_pod.py +30 -0
  72. flyte/_resources.py +473 -0
  73. flyte/_retry.py +32 -0
  74. flyte/_reusable_environment.py +102 -0
  75. flyte/_run.py +724 -0
  76. flyte/_secret.py +96 -0
  77. flyte/_task.py +550 -0
  78. flyte/_task_environment.py +316 -0
  79. flyte/_task_plugins.py +47 -0
  80. flyte/_timeout.py +47 -0
  81. flyte/_tools.py +27 -0
  82. flyte/_trace.py +119 -0
  83. flyte/_trigger.py +1000 -0
  84. flyte/_utils/__init__.py +30 -0
  85. flyte/_utils/asyn.py +121 -0
  86. flyte/_utils/async_cache.py +139 -0
  87. flyte/_utils/coro_management.py +27 -0
  88. flyte/_utils/docker_credentials.py +173 -0
  89. flyte/_utils/file_handling.py +72 -0
  90. flyte/_utils/helpers.py +134 -0
  91. flyte/_utils/lazy_module.py +54 -0
  92. flyte/_utils/module_loader.py +104 -0
  93. flyte/_utils/org_discovery.py +57 -0
  94. flyte/_utils/uv_script_parser.py +49 -0
  95. flyte/_version.py +34 -0
  96. flyte/app/__init__.py +22 -0
  97. flyte/app/_app_environment.py +157 -0
  98. flyte/app/_deploy.py +125 -0
  99. flyte/app/_input.py +160 -0
  100. flyte/app/_runtime/__init__.py +3 -0
  101. flyte/app/_runtime/app_serde.py +347 -0
  102. flyte/app/_types.py +101 -0
  103. flyte/app/extras/__init__.py +3 -0
  104. flyte/app/extras/_fastapi.py +151 -0
  105. flyte/cli/__init__.py +12 -0
  106. flyte/cli/_abort.py +28 -0
  107. flyte/cli/_build.py +114 -0
  108. flyte/cli/_common.py +468 -0
  109. flyte/cli/_create.py +371 -0
  110. flyte/cli/_delete.py +45 -0
  111. flyte/cli/_deploy.py +293 -0
  112. flyte/cli/_gen.py +176 -0
  113. flyte/cli/_get.py +370 -0
  114. flyte/cli/_option.py +33 -0
  115. flyte/cli/_params.py +554 -0
  116. flyte/cli/_plugins.py +209 -0
  117. flyte/cli/_run.py +597 -0
  118. flyte/cli/_serve.py +64 -0
  119. flyte/cli/_update.py +37 -0
  120. flyte/cli/_user.py +17 -0
  121. flyte/cli/main.py +221 -0
  122. flyte/config/__init__.py +3 -0
  123. flyte/config/_config.py +248 -0
  124. flyte/config/_internal.py +73 -0
  125. flyte/config/_reader.py +225 -0
  126. flyte/connectors/__init__.py +11 -0
  127. flyte/connectors/_connector.py +270 -0
  128. flyte/connectors/_server.py +197 -0
  129. flyte/connectors/utils.py +135 -0
  130. flyte/errors.py +243 -0
  131. flyte/extend.py +19 -0
  132. flyte/extras/__init__.py +5 -0
  133. flyte/extras/_container.py +286 -0
  134. flyte/git/__init__.py +3 -0
  135. flyte/git/_config.py +21 -0
  136. flyte/io/__init__.py +29 -0
  137. flyte/io/_dataframe/__init__.py +131 -0
  138. flyte/io/_dataframe/basic_dfs.py +223 -0
  139. flyte/io/_dataframe/dataframe.py +1026 -0
  140. flyte/io/_dir.py +910 -0
  141. flyte/io/_file.py +914 -0
  142. flyte/io/_hashing_io.py +342 -0
  143. flyte/models.py +479 -0
  144. flyte/py.typed +0 -0
  145. flyte/remote/__init__.py +35 -0
  146. flyte/remote/_action.py +738 -0
  147. flyte/remote/_app.py +57 -0
  148. flyte/remote/_client/__init__.py +0 -0
  149. flyte/remote/_client/_protocols.py +189 -0
  150. flyte/remote/_client/auth/__init__.py +12 -0
  151. flyte/remote/_client/auth/_auth_utils.py +14 -0
  152. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  153. flyte/remote/_client/auth/_authenticators/base.py +403 -0
  154. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  155. flyte/remote/_client/auth/_authenticators/device_code.py +117 -0
  156. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  157. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  158. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  159. flyte/remote/_client/auth/_channel.py +213 -0
  160. flyte/remote/_client/auth/_client_config.py +85 -0
  161. flyte/remote/_client/auth/_default_html.py +32 -0
  162. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  163. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  164. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  165. flyte/remote/_client/auth/_keyring.py +152 -0
  166. flyte/remote/_client/auth/_token_client.py +260 -0
  167. flyte/remote/_client/auth/errors.py +16 -0
  168. flyte/remote/_client/controlplane.py +128 -0
  169. flyte/remote/_common.py +30 -0
  170. flyte/remote/_console.py +19 -0
  171. flyte/remote/_data.py +161 -0
  172. flyte/remote/_logs.py +185 -0
  173. flyte/remote/_project.py +88 -0
  174. flyte/remote/_run.py +386 -0
  175. flyte/remote/_secret.py +142 -0
  176. flyte/remote/_task.py +527 -0
  177. flyte/remote/_trigger.py +306 -0
  178. flyte/remote/_user.py +33 -0
  179. flyte/report/__init__.py +3 -0
  180. flyte/report/_report.py +182 -0
  181. flyte/report/_template.html +124 -0
  182. flyte/storage/__init__.py +36 -0
  183. flyte/storage/_config.py +237 -0
  184. flyte/storage/_parallel_reader.py +274 -0
  185. flyte/storage/_remote_fs.py +34 -0
  186. flyte/storage/_storage.py +456 -0
  187. flyte/storage/_utils.py +5 -0
  188. flyte/syncify/__init__.py +56 -0
  189. flyte/syncify/_api.py +375 -0
  190. flyte/types/__init__.py +52 -0
  191. flyte/types/_interface.py +40 -0
  192. flyte/types/_pickle.py +145 -0
  193. flyte/types/_renderer.py +162 -0
  194. flyte/types/_string_literals.py +119 -0
  195. flyte/types/_type_engine.py +2254 -0
  196. flyte/types/_utils.py +80 -0
  197. flyte-2.0.0b32.data/scripts/debug.py +38 -0
  198. flyte-2.0.0b32.data/scripts/runtime.py +195 -0
  199. flyte-2.0.0b32.dist-info/METADATA +351 -0
  200. flyte-2.0.0b32.dist-info/RECORD +204 -0
  201. flyte-2.0.0b32.dist-info/WHEEL +5 -0
  202. flyte-2.0.0b32.dist-info/entry_points.txt +7 -0
  203. flyte-2.0.0b32.dist-info/licenses/LICENSE +201 -0
  204. flyte-2.0.0b32.dist-info/top_level.txt +1 -0
@@ -0,0 +1,218 @@
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import hashlib
5
+ import os
6
+ import pathlib
7
+ import posixpath
8
+ import shutil
9
+ import stat
10
+ import subprocess
11
+ import tarfile
12
+ import time
13
+ import typing
14
+ from typing import List, Optional, Tuple, Union
15
+
16
+ import click
17
+ from rich.tree import Tree
18
+
19
+ from flyte._logging import _get_console, logger
20
+
21
+ from ._ignore import Ignore, IgnoreGroup
22
+ from ._utils import (
23
+ CopyFiles,
24
+ _filehash_update,
25
+ _pathhash_update,
26
+ ls_files,
27
+ ls_relative_files,
28
+ tar_strip_file_attributes,
29
+ )
30
+
31
+ FAST_PREFIX = "fast"
32
+ FAST_FILEENDING = ".tar.gz"
33
+
34
+
35
+ def print_ls_tree(source: os.PathLike, ls: typing.List[str]):
36
+ logger.info("Files to be copied for fast registration...")
37
+
38
+ tree_root = Tree(
39
+ f"File structure:\n:open_file_folder: {source}",
40
+ guide_style="bold bright_blue",
41
+ )
42
+ source_path = pathlib.Path(source).resolve()
43
+ trees = {source_path: tree_root}
44
+ for f in ls:
45
+ fpp = pathlib.Path(f)
46
+ if fpp.parent not in trees:
47
+ # add trees for all intermediate folders
48
+ current = tree_root
49
+ current_path = source_path # pathlib.Path(source)
50
+ for subdir in fpp.parent.relative_to(source_path).parts:
51
+ current_path = current_path / subdir
52
+ if current_path not in trees:
53
+ current = current.add(f"{subdir}", guide_style="bold bright_blue")
54
+ trees[current_path] = current
55
+ else:
56
+ current = trees[current_path]
57
+ trees[fpp.parent].add(f"{fpp.name}", guide_style="bold bright_blue")
58
+
59
+ console = _get_console()
60
+ with console.capture() as capture:
61
+ console.print(tree_root, overflow="ignore", no_wrap=True, crop=False)
62
+ logger.info(f"Root directory: [link=file://{source}]{source}[/link]")
63
+ logger.info(capture.get(), extra={"console": console})
64
+
65
+
66
+ def _compress_tarball(source: pathlib.Path, output: pathlib.Path) -> None:
67
+ """Compress code tarball using pigz if available, otherwise gzip"""
68
+ if pigz := shutil.which("pigz"):
69
+ with open(str(output), "wb") as gzipped:
70
+ subprocess.run([pigz, "--no-time", "-c", str(source)], stdout=gzipped, check=True)
71
+ else:
72
+ start_time = time.time()
73
+ with gzip.GzipFile(filename=str(output), mode="wb", mtime=0) as gzipped:
74
+ with open(source, "rb") as source_file:
75
+ gzipped.write(source_file.read())
76
+
77
+ end_time = time.time()
78
+ warning_time = 10
79
+ if end_time - start_time > warning_time:
80
+ click.secho(
81
+ f"Code tarball compression took {end_time - start_time:.0f} seconds. "
82
+ f"Consider installing `pigz` for faster compression.",
83
+ fg="yellow",
84
+ )
85
+
86
+
87
+ def list_files_to_bundle(
88
+ source: pathlib.Path,
89
+ deref_symlinks: bool = False,
90
+ *ignores: typing.Type[Ignore],
91
+ copy_style: CopyFiles = "all",
92
+ ) -> typing.Tuple[List[str], str]:
93
+ """
94
+ Takes a source directory and returns a list of all files to be included in the code bundle and a hexdigest of the
95
+ included files.
96
+ :param source: The source directory to package
97
+ :param deref_symlinks: Whether to dereference symlinks or not
98
+ :param ignores: A list of Ignore classes to use for ignoring files
99
+ :param copy_style: The copy style to use for the tarball
100
+ :return: A list of all files to be included in the code bundle and a hexdigest of the included files
101
+ """
102
+ ignore = IgnoreGroup(source, *ignores)
103
+
104
+ ls, ls_digest = ls_files(source, copy_style, deref_symlinks, ignore)
105
+ logger.debug(f"Hash of files to be included in the code bundle: {ls_digest}")
106
+ return ls, ls_digest
107
+
108
+
109
+ def list_relative_files_to_bundle(
110
+ relative_paths: tuple[str, ...],
111
+ source: pathlib.Path,
112
+ ) -> typing.Tuple[List[str], str]:
113
+ """
114
+ List the files in the relative paths.
115
+
116
+ :param relative_paths: The list of relative paths to bundle.
117
+ :param source: The source directory to package.
118
+ :param ignores: A list of Ignore classes to use for ignoring files
119
+ :param copy_style: The copy style to use for the tarball
120
+ :return: A list of all files to be included in the code bundle and a hexdigest of the included files.
121
+ """
122
+ _source = source
123
+
124
+ all_files, digest = ls_relative_files(list(relative_paths), source)
125
+ logger.debug(f"Hash of files to be included in the code bundle: {digest}")
126
+ return all_files, digest
127
+
128
+
129
+ def create_bundle(
130
+ source: pathlib.Path, output_dir: pathlib.Path, ls: List[str], ls_digest: str, deref_symlinks: bool = False
131
+ ) -> Tuple[pathlib.Path, float, float]:
132
+ """
133
+ Takes a source directory and packages everything not covered by common ignores into a tarball.
134
+ The output_dir is the directory where the tarball and a compressed version of the tarball will be written.
135
+ The output_dir can be a temporary directory.
136
+
137
+ :param source: The source directory to package
138
+ :param output_dir: The directory to write the tarball to
139
+ :param deref_symlinks: Whether to dereference symlinks or not
140
+ :param ls: The list of files to include in the tarball
141
+ :param ls_digest: The hexdigest of the included files
142
+ :return: The path to the tarball, the size of the tarball in MB, and the size of the compressed tarball in MB
143
+ """
144
+ # Compute where the archive should be written
145
+ archive_fname = output_dir / f"{FAST_PREFIX}{ls_digest}{FAST_FILEENDING}"
146
+ tar_path = output_dir / "tmp.tar"
147
+ with tarfile.open(str(tar_path), "w", dereference=deref_symlinks) as tar:
148
+ for ws_file in ls:
149
+ rel_path = os.path.relpath(ws_file, start=source)
150
+ tar.add(
151
+ os.path.join(source, ws_file),
152
+ recursive=False,
153
+ arcname=rel_path,
154
+ filter=lambda x: tar_strip_file_attributes(x),
155
+ )
156
+
157
+ size_mbs = tar_path.stat().st_size / 1024 / 1024
158
+ _compress_tarball(tar_path, archive_fname)
159
+ asize_mbs = archive_fname.stat().st_size / 1024 / 1024
160
+
161
+ return archive_fname, size_mbs, asize_mbs
162
+
163
+
164
+ def compute_digest(source: Union[os.PathLike, List[os.PathLike]], filter: Optional[typing.Callable] = None) -> str:
165
+ """
166
+ Walks the entirety of the source dir to compute a deterministic md5 hex digest of the dir contents.
167
+ :param os.PathLike source:
168
+ :param callable filter:
169
+ :return Text:
170
+ """
171
+ hasher = hashlib.md5()
172
+
173
+ def compute_digest_for_file(path: os.PathLike, rel_path: os.PathLike) -> None:
174
+ # Only consider files that exist (e.g. disregard symlinks that point to non-existent files)
175
+ if not os.path.exists(path):
176
+ logger.info(f"Skipping non-existent file {path}")
177
+ return
178
+
179
+ # Skip socket files
180
+ if stat.S_ISSOCK(os.stat(path).st_mode):
181
+ logger.info(f"Skip socket file {path}")
182
+ return
183
+
184
+ if filter:
185
+ if filter(rel_path):
186
+ return
187
+
188
+ _filehash_update(path, hasher)
189
+ _pathhash_update(rel_path, hasher)
190
+
191
+ def compute_digest_for_dir(source: os.PathLike) -> None:
192
+ for root, _, files in os.walk(str(source), topdown=True):
193
+ files.sort()
194
+
195
+ for fname in files:
196
+ abspath = os.path.join(root, fname)
197
+ relpath = os.path.relpath(abspath, source)
198
+ compute_digest_for_file(pathlib.Path(abspath), pathlib.Path(relpath))
199
+
200
+ if isinstance(source, list):
201
+ for src in source:
202
+ if os.path.isdir(src):
203
+ compute_digest_for_dir(src)
204
+ else:
205
+ compute_digest_for_file(src, os.path.basename(src))
206
+ else:
207
+ compute_digest_for_dir(source)
208
+
209
+ return hasher.hexdigest()
210
+
211
+
212
+ def get_additional_distribution_loc(remote_location: str, identifier: str) -> str:
213
+ """
214
+ :param Text remote_location:
215
+ :param Text identifier:
216
+ :return Text:
217
+ """
218
+ return posixpath.join(remote_location, "{}.{}".format(identifier, "tar.gz"))
@@ -0,0 +1,347 @@
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import hashlib
5
+ import importlib.util
6
+ import os
7
+ import pathlib
8
+ import shutil
9
+ import site
10
+ import stat
11
+ import sys
12
+ import tarfile
13
+ import tempfile
14
+ import typing
15
+ from datetime import datetime, timezone
16
+ from functools import lru_cache
17
+ from types import ModuleType
18
+ from typing import List, Literal, Optional, Tuple, Union
19
+
20
+ from flyte._logging import logger
21
+
22
+ from ._ignore import IgnoreGroup
23
+
24
+ CopyFiles = Literal["loaded_modules", "all", "none"]
25
+
26
+
27
+ def compress_scripts(source_path: str, destination: str, modules: List[ModuleType]):
28
+ """
29
+ Compresses the single script while maintaining the folder structure for that file.
30
+
31
+ For example, given the follow file structure:
32
+ .
33
+ ├── flyte
34
+ ├── __init__.py
35
+ └── workflows
36
+ ├── example.py
37
+ ├── another_example.py
38
+ ├── yet_another_example.py
39
+ ├── unused_example.py
40
+ └── __init__.py
41
+
42
+ Let's say you want to compress `example.py` imports `another_example.py`. And `another_example.py`
43
+ imports on `yet_another_example.py`. This will produce a tar file that contains only that
44
+ file alongside with the folder structure, i.e.:
45
+
46
+ .
47
+ ├── flyte
48
+ ├── __init__.py
49
+ └── workflows
50
+ ├── example.py
51
+ ├── another_example.py
52
+ ├── yet_another_example.py
53
+ └── __init__.py
54
+
55
+ """
56
+ with tempfile.TemporaryDirectory() as tmp_dir:
57
+ destination_path = os.path.join(tmp_dir, "code")
58
+ os.mkdir(destination_path)
59
+ add_imported_modules_from_source(source_path, destination_path, modules)
60
+
61
+ tar_path = os.path.join(tmp_dir, "tmp.tar")
62
+ with tarfile.open(tar_path, "w") as tar:
63
+ tmp_path: str = os.path.join(tmp_dir, "code")
64
+ files: typing.List[str] = os.listdir(tmp_path)
65
+ for ws_file in files:
66
+ tar.add(os.path.join(tmp_path, ws_file), arcname=ws_file, filter=tar_strip_file_attributes)
67
+ with gzip.GzipFile(filename=destination, mode="wb", mtime=0) as gzipped:
68
+ with open(tar_path, "rb") as tar_file:
69
+ gzipped.write(tar_file.read())
70
+
71
+
72
+ # Takes in a TarInfo and returns the modified TarInfo:
73
+ # https://docs.python.org/3/library/tarfile.html#tarinfo-objects
74
+ # intended to be passed as a filter to tarfile.add
75
+ # https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.add
76
+ def tar_strip_file_attributes(tar_info: tarfile.TarInfo) -> tarfile.TarInfo:
77
+ # set time to epoch timestamp 0, aka 00:00:00 UTC on 1 January 1980
78
+ # note that when extracting this tarfile, this time will be shown as the modified date
79
+ tar_info.mtime = datetime(1980, 1, 1, tzinfo=timezone.utc).timestamp()
80
+
81
+ # user/group info
82
+ tar_info.uid = 0
83
+ tar_info.uname = ""
84
+ tar_info.gid = 0
85
+ tar_info.gname = ""
86
+
87
+ # stripping paxheaders may not be required
88
+ # see https://stackoverflow.com/questions/34688392/paxheaders-in-tarball
89
+ tar_info.pax_headers = {}
90
+
91
+ return tar_info
92
+
93
+
94
+ def ls_files(
95
+ source_path: pathlib.Path,
96
+ copy_file_detection: CopyFiles,
97
+ deref_symlinks: bool = False,
98
+ ignore_group: Optional[IgnoreGroup] = None,
99
+ ) -> Tuple[List[str], str]:
100
+ """
101
+ user_modules_and_packages is a list of the Python modules and packages, expressed as absolute paths, that the
102
+ user has run this command with. For flyte run for instance, this is just a list of one.
103
+ This is used for two reasons.
104
+ - Everything in this list needs to be returned. Files are returned and folders are walked.
105
+ - A common source path is derived from this is, which is just the common folder that contains everything in the
106
+ list. For ex. if you do
107
+ $ pyflyte --pkgs a.b,a.c package
108
+ Then the common root is just the folder a/. The modules list is filtered against this root. Only files
109
+ representing modules under this root are included
110
+
111
+ If the copy enum is set to loaded_modules, then the loaded sys modules will be used.
112
+ """
113
+
114
+ # Unlike the below, the value error here is useful and should be returned to the user, like if absolute and
115
+ # relative paths are mixed.
116
+
117
+ # This is --copy auto
118
+ if copy_file_detection == "loaded_modules":
119
+ sys_modules = list(sys.modules.values())
120
+ all_files = list_imported_modules_as_files(str(source_path), sys_modules)
121
+ # this is --copy all (--copy none should never invoke this function)
122
+ else:
123
+ all_files = list_all_files(source_path, deref_symlinks, ignore_group)
124
+
125
+ all_files.sort()
126
+ hasher = hashlib.md5()
127
+ for abspath in all_files:
128
+ relpath = os.path.relpath(abspath, source_path)
129
+ _filehash_update(abspath, hasher)
130
+ _pathhash_update(relpath, hasher)
131
+
132
+ digest = hasher.hexdigest()
133
+
134
+ return all_files, digest
135
+
136
+
137
+ def ls_relative_files(relative_paths: list[str], source_path: pathlib.Path) -> tuple[list[str], str]:
138
+ relative_paths = list(relative_paths)
139
+ relative_paths.sort()
140
+ hasher = hashlib.md5()
141
+
142
+ all_files = []
143
+ for file in relative_paths:
144
+ path = source_path / file
145
+ all_files.append(str(path))
146
+ _filehash_update(path, hasher)
147
+ _pathhash_update(path, hasher)
148
+
149
+ digest = hasher.hexdigest()
150
+ return all_files, digest
151
+
152
+
153
+ def _filehash_update(path: Union[os.PathLike, str], hasher: hashlib._Hash) -> None:
154
+ blocksize = 65536
155
+ with open(path, "rb") as f:
156
+ bytes = f.read(blocksize)
157
+ while bytes:
158
+ hasher.update(bytes)
159
+ bytes = f.read(blocksize)
160
+
161
+
162
+ def _pathhash_update(path: Union[os.PathLike, str], hasher: hashlib._Hash) -> None:
163
+ path_list = str(path).split(os.sep)
164
+ hasher.update("".join(path_list).encode("utf-8"))
165
+
166
+
167
+ EXCLUDE_DIRS = {".git"}
168
+
169
+
170
+ def list_all_files(source_path: pathlib.Path, deref_symlinks, ignore_group: Optional[IgnoreGroup] = None) -> List[str]:
171
+ all_files = []
172
+
173
+ # This is needed to prevent infinite recursion when walking with followlinks
174
+ visited_inodes = set()
175
+ for root, dirnames, files in os.walk(source_path, topdown=True, followlinks=deref_symlinks):
176
+ dirnames[:] = [d for d in dirnames if d not in EXCLUDE_DIRS]
177
+ if deref_symlinks:
178
+ inode = os.stat(root).st_ino
179
+ if inode in visited_inodes:
180
+ continue
181
+ visited_inodes.add(inode)
182
+
183
+ ff = []
184
+ files.sort()
185
+ for fname in files:
186
+ abspath = (pathlib.Path(root) / fname).absolute()
187
+ # Only consider files that exist (e.g. disregard symlinks that point to non-existent files)
188
+ if not os.path.exists(abspath):
189
+ logger.info(f"Skipping non-existent file {abspath}")
190
+ continue
191
+ # Skip socket files
192
+ if stat.S_ISSOCK(os.stat(abspath).st_mode):
193
+ logger.info(f"Skip socket file {abspath}")
194
+ continue
195
+ if ignore_group:
196
+ if ignore_group.is_ignored(abspath):
197
+ continue
198
+
199
+ ff.append(str(abspath))
200
+ all_files.extend(ff)
201
+
202
+ # Remove directories that we've already visited from dirnames
203
+ if deref_symlinks:
204
+ dirnames[:] = [d for d in dirnames if os.stat(os.path.join(root, d)).st_ino not in visited_inodes]
205
+
206
+ return all_files
207
+
208
+
209
+ def _file_is_in_directory(file: str, directory: str) -> bool:
210
+ """Return True if file is in directory and in its children."""
211
+ try:
212
+ return pathlib.Path(file).resolve().is_relative_to(pathlib.Path(directory).resolve())
213
+ except OSError as e:
214
+ # OSError can be raised if paths cannot be resolved (permissions, broken symlinks, etc.)
215
+ logger.debug(f"Failed to resolve paths for {file} and {directory}: {e!s}")
216
+ return False
217
+
218
+
219
+ def list_imported_modules_as_files(source_path: str, modules: List[ModuleType]) -> List[str]:
220
+ """Lists the files of modules that have been loaded. The files are only included if:
221
+
222
+ 1. Not a site-packages. These are installed packages and not user files.
223
+ 2. Not in the sys.base_prefix or sys.prefix. These are also installed and not user files.
224
+ 3. Shares a common path with the source_path.
225
+ """
226
+
227
+ import flyte
228
+ from flyte._utils.lazy_module import is_imported
229
+
230
+ files = set()
231
+ flyte_root = os.path.dirname(flyte.__file__)
232
+
233
+ # These directories contain installed packages or modules from the Python standard library.
234
+ # If a module is from these directories, then they are not user files.
235
+ invalid_directories = [flyte_root, sys.prefix, sys.base_prefix, site.getusersitepackages(), *site.getsitepackages()]
236
+
237
+ for mod in modules:
238
+ # Be careful not to import a module with the .__file__ call if not yet imported.
239
+ if "LazyModule" in object.__getattribute__(mod, "__class__").__name__:
240
+ name = object.__getattribute__(mod, "__name__")
241
+ if is_imported(name):
242
+ mod_file = mod.__file__
243
+ else:
244
+ continue
245
+ else:
246
+ try:
247
+ mod_file = mod.__file__
248
+ except AttributeError:
249
+ continue
250
+
251
+ if mod_file is None:
252
+ continue
253
+
254
+ if any(_file_is_in_directory(mod_file, directory) for directory in invalid_directories):
255
+ continue
256
+
257
+ if not _file_is_in_directory(mod_file, source_path):
258
+ # Only upload files where the module file in the source directory
259
+ # print log line for files that have common ancestor with source_path, but not in it.
260
+ logger.debug(f"{mod_file} is not in {source_path}")
261
+ continue
262
+
263
+ if not pathlib.Path(mod_file).is_file():
264
+ # Some modules have a __file__ attribute that are relative to the base package. Let's skip these,
265
+ # can add more rigorous logic to really pull out the correct file location if we need to.
266
+ logger.debug(f"Skipping {mod_file} from {mod.__name__} because it is not a file")
267
+ continue
268
+
269
+ files.add(mod_file)
270
+
271
+ return list(files)
272
+
273
+
274
+ def add_imported_modules_from_source(source_path: str, destination: str, modules: List[ModuleType]):
275
+ """Copies modules into destination that are in modules. The module files are copied only if:
276
+
277
+ 1. Not a site-packages. These are installed packages and not user files.
278
+ 2. Not in the sys.base_prefix or sys.prefix. These are also installed and not user files.
279
+ 3. Does not share a common path with the source_path.
280
+ """
281
+ # source path is the folder holding the main script.
282
+ # but in register/package case, there are multiple folders.
283
+ # identify a common root amongst the packages listed?
284
+
285
+ files = list_imported_modules_as_files(source_path, modules)
286
+ for file in files:
287
+ relative_path = os.path.relpath(file, start=source_path)
288
+ new_destination = os.path.join(destination, relative_path)
289
+
290
+ if os.path.exists(new_destination):
291
+ # No need to copy if it already exists
292
+ continue
293
+
294
+ os.makedirs(os.path.dirname(new_destination), exist_ok=True)
295
+ shutil.copy(file, new_destination)
296
+
297
+
298
+ def import_module_from_file(module_name, file):
299
+ try:
300
+ spec = importlib.util.spec_from_file_location(module_name, file)
301
+ module = importlib.util.module_from_spec(spec)
302
+ return module
303
+ except Exception as exc:
304
+ raise ModuleNotFoundError(f"Module from file {file} cannot be loaded") from exc
305
+
306
+
307
+ def get_all_modules(source_path: str, module_name: Optional[str]) -> List[ModuleType]:
308
+ """Import python file with module_name in source_path and return all modules."""
309
+ sys_modules = list(sys.modules.values())
310
+ if module_name is None or module_name in sys.modules:
311
+ # module already exists, there is no need to import it again
312
+ return sys_modules
313
+
314
+ full_module = os.path.join(source_path, *module_name.split("."))
315
+ full_module_path = f"{full_module}.py"
316
+
317
+ is_python_file = os.path.exists(full_module_path) and os.path.isfile(full_module_path)
318
+ if not is_python_file:
319
+ return sys_modules
320
+
321
+ try:
322
+ new_module = import_module_from_file(module_name, full_module_path)
323
+ return [*sys_modules, new_module]
324
+ except Exception as exc:
325
+ logger.error(f"Using system modules, failed to import {module_name} from {full_module_path}: {exc!s}")
326
+ # Import failed so we fallback to `sys_modules`
327
+ return sys_modules
328
+
329
+
330
+ @lru_cache
331
+ def hash_file(file_path: typing.Union[os.PathLike, str]) -> Tuple[bytes, str, int]:
332
+ """
333
+ Hash a file and produce a digest to be used as a version
334
+ """
335
+ h = hashlib.md5()
336
+ size = 0
337
+
338
+ with open(file_path, "rb") as file:
339
+ while True:
340
+ # Reading is buffered, so we can read smaller chunks.
341
+ chunk = file.read(h.block_size)
342
+ if not chunk:
343
+ break
344
+ h.update(chunk)
345
+ size += len(chunk)
346
+
347
+ return h.digest(), h.hexdigest(), size