flyte 0.2.0b1__py3-none-any.whl → 2.0.0b46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. flyte/__init__.py +83 -30
  2. flyte/_bin/connect.py +61 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +87 -19
  5. flyte/_bin/serve.py +351 -0
  6. flyte/_build.py +3 -2
  7. flyte/_cache/cache.py +6 -5
  8. flyte/_cache/local_cache.py +216 -0
  9. flyte/_code_bundle/_ignore.py +31 -5
  10. flyte/_code_bundle/_packaging.py +42 -11
  11. flyte/_code_bundle/_utils.py +57 -34
  12. flyte/_code_bundle/bundle.py +130 -27
  13. flyte/_constants.py +1 -0
  14. flyte/_context.py +21 -5
  15. flyte/_custom_context.py +73 -0
  16. flyte/_debug/constants.py +37 -0
  17. flyte/_debug/utils.py +17 -0
  18. flyte/_debug/vscode.py +315 -0
  19. flyte/_deploy.py +396 -75
  20. flyte/_deployer.py +109 -0
  21. flyte/_environment.py +94 -11
  22. flyte/_excepthook.py +37 -0
  23. flyte/_group.py +2 -1
  24. flyte/_hash.py +1 -16
  25. flyte/_image.py +544 -231
  26. flyte/_initialize.py +456 -316
  27. flyte/_interface.py +40 -5
  28. flyte/_internal/controllers/__init__.py +22 -8
  29. flyte/_internal/controllers/_local_controller.py +159 -35
  30. flyte/_internal/controllers/_trace.py +18 -10
  31. flyte/_internal/controllers/remote/__init__.py +38 -9
  32. flyte/_internal/controllers/remote/_action.py +82 -12
  33. flyte/_internal/controllers/remote/_client.py +6 -2
  34. flyte/_internal/controllers/remote/_controller.py +290 -64
  35. flyte/_internal/controllers/remote/_core.py +155 -95
  36. flyte/_internal/controllers/remote/_informer.py +40 -20
  37. flyte/_internal/controllers/remote/_service_protocol.py +2 -2
  38. flyte/_internal/imagebuild/__init__.py +2 -10
  39. flyte/_internal/imagebuild/docker_builder.py +391 -84
  40. flyte/_internal/imagebuild/image_builder.py +111 -55
  41. flyte/_internal/imagebuild/remote_builder.py +409 -0
  42. flyte/_internal/imagebuild/utils.py +79 -0
  43. flyte/_internal/resolvers/_app_env_module.py +92 -0
  44. flyte/_internal/resolvers/_task_module.py +5 -38
  45. flyte/_internal/resolvers/app_env.py +26 -0
  46. flyte/_internal/resolvers/common.py +8 -1
  47. flyte/_internal/resolvers/default.py +2 -2
  48. flyte/_internal/runtime/convert.py +319 -36
  49. flyte/_internal/runtime/entrypoints.py +106 -18
  50. flyte/_internal/runtime/io.py +71 -23
  51. flyte/_internal/runtime/resources_serde.py +21 -7
  52. flyte/_internal/runtime/reuse.py +125 -0
  53. flyte/_internal/runtime/rusty.py +196 -0
  54. flyte/_internal/runtime/task_serde.py +239 -66
  55. flyte/_internal/runtime/taskrunner.py +48 -8
  56. flyte/_internal/runtime/trigger_serde.py +162 -0
  57. flyte/_internal/runtime/types_serde.py +7 -16
  58. flyte/_keyring/file.py +115 -0
  59. flyte/_link.py +30 -0
  60. flyte/_logging.py +241 -42
  61. flyte/_map.py +312 -0
  62. flyte/_metrics.py +59 -0
  63. flyte/_module.py +74 -0
  64. flyte/_pod.py +30 -0
  65. flyte/_resources.py +296 -33
  66. flyte/_retry.py +1 -7
  67. flyte/_reusable_environment.py +72 -7
  68. flyte/_run.py +462 -132
  69. flyte/_secret.py +47 -11
  70. flyte/_serve.py +333 -0
  71. flyte/_task.py +245 -56
  72. flyte/_task_environment.py +219 -97
  73. flyte/_task_plugins.py +47 -0
  74. flyte/_tools.py +8 -8
  75. flyte/_trace.py +15 -24
  76. flyte/_trigger.py +1027 -0
  77. flyte/_utils/__init__.py +12 -1
  78. flyte/_utils/asyn.py +3 -1
  79. flyte/_utils/async_cache.py +139 -0
  80. flyte/_utils/coro_management.py +5 -4
  81. flyte/_utils/description_parser.py +19 -0
  82. flyte/_utils/docker_credentials.py +173 -0
  83. flyte/_utils/helpers.py +45 -19
  84. flyte/_utils/module_loader.py +123 -0
  85. flyte/_utils/org_discovery.py +57 -0
  86. flyte/_utils/uv_script_parser.py +8 -1
  87. flyte/_version.py +16 -3
  88. flyte/app/__init__.py +27 -0
  89. flyte/app/_app_environment.py +362 -0
  90. flyte/app/_connector_environment.py +40 -0
  91. flyte/app/_deploy.py +130 -0
  92. flyte/app/_parameter.py +343 -0
  93. flyte/app/_runtime/__init__.py +3 -0
  94. flyte/app/_runtime/app_serde.py +383 -0
  95. flyte/app/_types.py +113 -0
  96. flyte/app/extras/__init__.py +9 -0
  97. flyte/app/extras/_auth_middleware.py +217 -0
  98. flyte/app/extras/_fastapi.py +93 -0
  99. flyte/app/extras/_model_loader/__init__.py +3 -0
  100. flyte/app/extras/_model_loader/config.py +7 -0
  101. flyte/app/extras/_model_loader/loader.py +288 -0
  102. flyte/cli/__init__.py +12 -0
  103. flyte/cli/_abort.py +28 -0
  104. flyte/cli/_build.py +114 -0
  105. flyte/cli/_common.py +493 -0
  106. flyte/cli/_create.py +371 -0
  107. flyte/cli/_delete.py +45 -0
  108. flyte/cli/_deploy.py +401 -0
  109. flyte/cli/_gen.py +316 -0
  110. flyte/cli/_get.py +446 -0
  111. flyte/cli/_option.py +33 -0
  112. flyte/{_cli → cli}/_params.py +57 -17
  113. flyte/cli/_plugins.py +209 -0
  114. flyte/cli/_prefetch.py +292 -0
  115. flyte/cli/_run.py +690 -0
  116. flyte/cli/_serve.py +338 -0
  117. flyte/cli/_update.py +86 -0
  118. flyte/cli/_user.py +20 -0
  119. flyte/cli/main.py +246 -0
  120. flyte/config/__init__.py +2 -167
  121. flyte/config/_config.py +215 -163
  122. flyte/config/_internal.py +10 -1
  123. flyte/config/_reader.py +225 -0
  124. flyte/connectors/__init__.py +11 -0
  125. flyte/connectors/_connector.py +330 -0
  126. flyte/connectors/_server.py +194 -0
  127. flyte/connectors/utils.py +159 -0
  128. flyte/errors.py +134 -2
  129. flyte/extend.py +24 -0
  130. flyte/extras/_container.py +69 -56
  131. flyte/git/__init__.py +3 -0
  132. flyte/git/_config.py +279 -0
  133. flyte/io/__init__.py +8 -1
  134. flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
  135. flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
  136. flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
  137. flyte/io/_dir.py +575 -113
  138. flyte/io/_file.py +587 -141
  139. flyte/io/_hashing_io.py +342 -0
  140. flyte/io/extend.py +7 -0
  141. flyte/models.py +635 -0
  142. flyte/prefetch/__init__.py +22 -0
  143. flyte/prefetch/_hf_model.py +563 -0
  144. flyte/remote/__init__.py +14 -3
  145. flyte/remote/_action.py +879 -0
  146. flyte/remote/_app.py +346 -0
  147. flyte/remote/_auth_metadata.py +42 -0
  148. flyte/remote/_client/_protocols.py +62 -4
  149. flyte/remote/_client/auth/_auth_utils.py +19 -0
  150. flyte/remote/_client/auth/_authenticators/base.py +8 -2
  151. flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
  152. flyte/remote/_client/auth/_authenticators/factory.py +4 -0
  153. flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
  154. flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
  155. flyte/remote/_client/auth/_channel.py +47 -18
  156. flyte/remote/_client/auth/_client_config.py +5 -3
  157. flyte/remote/_client/auth/_keyring.py +15 -2
  158. flyte/remote/_client/auth/_token_client.py +3 -3
  159. flyte/remote/_client/controlplane.py +206 -18
  160. flyte/remote/_common.py +66 -0
  161. flyte/remote/_data.py +107 -22
  162. flyte/remote/_logs.py +116 -33
  163. flyte/remote/_project.py +21 -19
  164. flyte/remote/_run.py +164 -631
  165. flyte/remote/_secret.py +72 -29
  166. flyte/remote/_task.py +387 -46
  167. flyte/remote/_trigger.py +368 -0
  168. flyte/remote/_user.py +43 -0
  169. flyte/report/_report.py +10 -6
  170. flyte/storage/__init__.py +13 -1
  171. flyte/storage/_config.py +237 -0
  172. flyte/storage/_parallel_reader.py +289 -0
  173. flyte/storage/_storage.py +268 -59
  174. flyte/syncify/__init__.py +56 -0
  175. flyte/syncify/_api.py +414 -0
  176. flyte/types/__init__.py +39 -0
  177. flyte/types/_interface.py +22 -7
  178. flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
  179. flyte/types/_string_literals.py +8 -9
  180. flyte/types/_type_engine.py +226 -126
  181. flyte/types/_utils.py +1 -1
  182. flyte-2.0.0b46.data/scripts/debug.py +38 -0
  183. flyte-2.0.0b46.data/scripts/runtime.py +194 -0
  184. flyte-2.0.0b46.dist-info/METADATA +352 -0
  185. flyte-2.0.0b46.dist-info/RECORD +221 -0
  186. flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
  187. flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
  188. flyte/_api_commons.py +0 -3
  189. flyte/_cli/_common.py +0 -299
  190. flyte/_cli/_create.py +0 -42
  191. flyte/_cli/_delete.py +0 -23
  192. flyte/_cli/_deploy.py +0 -140
  193. flyte/_cli/_get.py +0 -235
  194. flyte/_cli/_run.py +0 -174
  195. flyte/_cli/main.py +0 -98
  196. flyte/_datastructures.py +0 -342
  197. flyte/_internal/controllers/pbhash.py +0 -39
  198. flyte/_protos/common/authorization_pb2.py +0 -66
  199. flyte/_protos/common/authorization_pb2.pyi +0 -108
  200. flyte/_protos/common/authorization_pb2_grpc.py +0 -4
  201. flyte/_protos/common/identifier_pb2.py +0 -71
  202. flyte/_protos/common/identifier_pb2.pyi +0 -82
  203. flyte/_protos/common/identifier_pb2_grpc.py +0 -4
  204. flyte/_protos/common/identity_pb2.py +0 -48
  205. flyte/_protos/common/identity_pb2.pyi +0 -72
  206. flyte/_protos/common/identity_pb2_grpc.py +0 -4
  207. flyte/_protos/common/list_pb2.py +0 -36
  208. flyte/_protos/common/list_pb2.pyi +0 -69
  209. flyte/_protos/common/list_pb2_grpc.py +0 -4
  210. flyte/_protos/common/policy_pb2.py +0 -37
  211. flyte/_protos/common/policy_pb2.pyi +0 -27
  212. flyte/_protos/common/policy_pb2_grpc.py +0 -4
  213. flyte/_protos/common/role_pb2.py +0 -37
  214. flyte/_protos/common/role_pb2.pyi +0 -53
  215. flyte/_protos/common/role_pb2_grpc.py +0 -4
  216. flyte/_protos/common/runtime_version_pb2.py +0 -28
  217. flyte/_protos/common/runtime_version_pb2.pyi +0 -24
  218. flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
  219. flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
  220. flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
  221. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  222. flyte/_protos/secret/definition_pb2.py +0 -49
  223. flyte/_protos/secret/definition_pb2.pyi +0 -93
  224. flyte/_protos/secret/definition_pb2_grpc.py +0 -4
  225. flyte/_protos/secret/payload_pb2.py +0 -62
  226. flyte/_protos/secret/payload_pb2.pyi +0 -94
  227. flyte/_protos/secret/payload_pb2_grpc.py +0 -4
  228. flyte/_protos/secret/secret_pb2.py +0 -38
  229. flyte/_protos/secret/secret_pb2.pyi +0 -6
  230. flyte/_protos/secret/secret_pb2_grpc.py +0 -198
  231. flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
  232. flyte/_protos/validate/validate/validate_pb2.py +0 -76
  233. flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
  234. flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  235. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  236. flyte/_protos/workflow/queue_service_pb2.py +0 -106
  237. flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
  238. flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  239. flyte/_protos/workflow/run_definition_pb2.py +0 -128
  240. flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
  241. flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  242. flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
  243. flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  244. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  245. flyte/_protos/workflow/run_service_pb2.py +0 -133
  246. flyte/_protos/workflow/run_service_pb2.pyi +0 -175
  247. flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
  248. flyte/_protos/workflow/state_service_pb2.py +0 -58
  249. flyte/_protos/workflow/state_service_pb2.pyi +0 -71
  250. flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
  251. flyte/_protos/workflow/task_definition_pb2.py +0 -72
  252. flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
  253. flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  254. flyte/_protos/workflow/task_service_pb2.py +0 -44
  255. flyte/_protos/workflow/task_service_pb2.pyi +0 -31
  256. flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
  257. flyte/io/_dataframe.py +0 -0
  258. flyte/io/pickle/__init__.py +0 -0
  259. flyte/remote/_console.py +0 -18
  260. flyte-0.2.0b1.dist-info/METADATA +0 -179
  261. flyte-0.2.0b1.dist-info/RECORD +0 -204
  262. flyte-0.2.0b1.dist-info/entry_points.txt +0 -3
  263. /flyte/{_cli → _debug}/__init__.py +0 -0
  264. /flyte/{_protos → _keyring}/__init__.py +0 -0
  265. {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
  266. {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
@@ -14,34 +14,40 @@ import typing
14
14
  from typing import List, Optional, Tuple, Union
15
15
 
16
16
  import click
17
- from rich import print as rich_print
18
17
  from rich.tree import Tree
19
18
 
20
- from flyte._logging import logger
19
+ from flyte._logging import _get_console, logger
21
20
 
22
21
  from ._ignore import Ignore, IgnoreGroup
23
- from ._utils import CopyFiles, _filehash_update, _pathhash_update, ls_files, tar_strip_file_attributes
22
+ from ._utils import (
23
+ CopyFiles,
24
+ _filehash_update,
25
+ _pathhash_update,
26
+ ls_files,
27
+ ls_relative_files,
28
+ tar_strip_file_attributes,
29
+ )
24
30
 
25
31
  FAST_PREFIX = "fast"
26
32
  FAST_FILEENDING = ".tar.gz"
27
33
 
28
34
 
29
35
  def print_ls_tree(source: os.PathLike, ls: typing.List[str]):
30
- click.secho("Files to be copied for fast registration...", fg="bright_blue")
36
+ logger.info("Files to be copied for fast registration...")
31
37
 
32
38
  tree_root = Tree(
33
- f":open_file_folder: [link file://{source}]{source} (detected source root)",
39
+ f"File structure:\n:open_file_folder: {source}",
34
40
  guide_style="bold bright_blue",
35
41
  )
36
- trees = {pathlib.Path(source): tree_root}
37
-
42
+ source_path = pathlib.Path(source).resolve()
43
+ trees = {source_path: tree_root}
38
44
  for f in ls:
39
45
  fpp = pathlib.Path(f)
40
46
  if fpp.parent not in trees:
41
47
  # add trees for all intermediate folders
42
48
  current = tree_root
43
- current_path = pathlib.Path(source)
44
- for subdir in fpp.parent.relative_to(source).parts:
49
+ current_path = source_path # pathlib.Path(source)
50
+ for subdir in fpp.parent.relative_to(source_path).parts:
45
51
  current_path = current_path / subdir
46
52
  if current_path not in trees:
47
53
  current = current.add(f"{subdir}", guide_style="bold bright_blue")
@@ -49,7 +55,12 @@ def print_ls_tree(source: os.PathLike, ls: typing.List[str]):
49
55
  else:
50
56
  current = trees[current_path]
51
57
  trees[fpp.parent].add(f"{fpp.name}", guide_style="bold bright_blue")
52
- rich_print(tree_root)
58
+
59
+ console = _get_console()
60
+ with console.capture() as capture:
61
+ console.print(tree_root, overflow="ignore", no_wrap=True, crop=False)
62
+ logger.info(f"Root directory: [link=file://{source}]{source}[/link]")
63
+ logger.info(capture.get(), extra={"console": console})
53
64
 
54
65
 
55
66
  def _compress_tarball(source: pathlib.Path, output: pathlib.Path) -> None:
@@ -91,10 +102,30 @@ def list_files_to_bundle(
91
102
  ignore = IgnoreGroup(source, *ignores)
92
103
 
93
104
  ls, ls_digest = ls_files(source, copy_style, deref_symlinks, ignore)
94
- logger.debug(f"Hash digest: {ls_digest}")
105
+ logger.debug(f"Hash of files to be included in the code bundle: {ls_digest}")
95
106
  return ls, ls_digest
96
107
 
97
108
 
109
+ def list_relative_files_to_bundle(
110
+ relative_paths: tuple[str, ...],
111
+ source: pathlib.Path,
112
+ ) -> typing.Tuple[List[str], str]:
113
+ """
114
+ List the files in the relative paths.
115
+
116
+ :param relative_paths: The list of relative paths to bundle.
117
+ :param source: The source directory to package.
118
+ :param ignores: A list of Ignore classes to use for ignoring files
119
+ :param copy_style: The copy style to use for the tarball
120
+ :return: A list of all files to be included in the code bundle and a hexdigest of the included files.
121
+ """
122
+ _source = source
123
+
124
+ all_files, digest = ls_relative_files(list(relative_paths), source)
125
+ logger.debug(f"Hash of files to be included in the code bundle: {digest}")
126
+ return all_files, digest
127
+
128
+
98
129
  def create_bundle(
99
130
  source: pathlib.Path, output_dir: pathlib.Path, ls: List[str], ls_digest: str, deref_symlinks: bool = False
100
131
  ) -> Tuple[pathlib.Path, float, float]:
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import glob
3
4
  import gzip
4
5
  import hashlib
5
6
  import importlib.util
@@ -14,7 +15,6 @@ import tempfile
14
15
  import typing
15
16
  from datetime import datetime, timezone
16
17
  from functools import lru_cache
17
- from pathlib import Path
18
18
  from types import ModuleType
19
19
  from typing import List, Literal, Optional, Tuple, Union
20
20
 
@@ -75,9 +75,9 @@ def compress_scripts(source_path: str, destination: str, modules: List[ModuleTyp
75
75
  # intended to be passed as a filter to tarfile.add
76
76
  # https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.add
77
77
  def tar_strip_file_attributes(tar_info: tarfile.TarInfo) -> tarfile.TarInfo:
78
- # set time to epoch timestamp 0, aka 00:00:00 UTC on 1 January 1980
78
+ # set time to epoch timestamp 0, aka 00:00:00 UTC on 1 January 1981
79
79
  # note that when extracting this tarfile, this time will be shown as the modified date
80
- tar_info.mtime = datetime(1980, 1, 1, tzinfo=timezone.utc).timestamp()
80
+ tar_info.mtime = datetime(1981, 1, 1, tzinfo=timezone.utc).timestamp()
81
81
 
82
82
  # user/group info
83
83
  tar_info.uid = 0
@@ -135,13 +135,43 @@ def ls_files(
135
135
  return all_files, digest
136
136
 
137
137
 
138
+ def ls_relative_files(relative_paths: list[str], source_path: pathlib.Path) -> tuple[list[str], str]:
139
+ relative_paths = list(relative_paths)
140
+ relative_paths.sort()
141
+ hasher = hashlib.md5()
142
+
143
+ all_files: list[str] = []
144
+ for file in relative_paths:
145
+ path = source_path / file
146
+ if path.is_dir():
147
+ # Filter out directories, only include files
148
+ all_files.extend([str(p) for p in path.glob("**/*") if p.is_file()])
149
+ elif path.is_file():
150
+ all_files.append(str(path))
151
+ else:
152
+ glob_files = glob.glob(str(path))
153
+ if glob_files:
154
+ # Filter out directories from glob results
155
+ all_files.extend([str(f) for f in glob_files if pathlib.Path(f).is_file()])
156
+ else:
157
+ raise ValueError(f"File {path} is not a valid file, directory, or glob pattern")
158
+
159
+ all_files.sort()
160
+ for p in all_files:
161
+ _filehash_update(p, hasher)
162
+ _pathhash_update(p, hasher)
163
+
164
+ digest = hasher.hexdigest()
165
+ return all_files, digest
166
+
167
+
138
168
  def _filehash_update(path: Union[os.PathLike, str], hasher: hashlib._Hash) -> None:
139
169
  blocksize = 65536
140
170
  with open(path, "rb") as f:
141
- bytes = f.read(blocksize)
142
- while bytes:
143
- hasher.update(bytes)
144
- bytes = f.read(blocksize)
171
+ chunk = f.read(blocksize)
172
+ while chunk:
173
+ hasher.update(chunk)
174
+ chunk = f.read(blocksize)
145
175
 
146
176
 
147
177
  def _pathhash_update(path: Union[os.PathLike, str], hasher: hashlib._Hash) -> None:
@@ -157,7 +187,7 @@ def list_all_files(source_path: pathlib.Path, deref_symlinks, ignore_group: Opti
157
187
 
158
188
  # This is needed to prevent infinite recursion when walking with followlinks
159
189
  visited_inodes = set()
160
- for root, dirnames, files in source_path.walk(top_down=True, follow_symlinks=deref_symlinks):
190
+ for root, dirnames, files in os.walk(source_path, topdown=True, followlinks=deref_symlinks):
161
191
  dirnames[:] = [d for d in dirnames if d not in EXCLUDE_DIRS]
162
192
  if deref_symlinks:
163
193
  inode = os.stat(root).st_ino
@@ -168,7 +198,7 @@ def list_all_files(source_path: pathlib.Path, deref_symlinks, ignore_group: Opti
168
198
  ff = []
169
199
  files.sort()
170
200
  for fname in files:
171
- abspath = (root / fname).absolute()
201
+ abspath = (pathlib.Path(root) / fname).absolute()
172
202
  # Only consider files that exist (e.g. disregard symlinks that point to non-existent files)
173
203
  if not os.path.exists(abspath):
174
204
  logger.info(f"Skipping non-existent file {abspath}")
@@ -194,15 +224,15 @@ def list_all_files(source_path: pathlib.Path, deref_symlinks, ignore_group: Opti
194
224
  def _file_is_in_directory(file: str, directory: str) -> bool:
195
225
  """Return True if file is in directory and in its children."""
196
226
  try:
197
- return os.path.commonpath([file, directory]) == directory
198
- except ValueError as e:
199
- # ValueError is raised by windows if the paths are not from the same drive
200
- logger.debug(f"{file} and {directory} are not in the same drive: {e!s}")
227
+ return pathlib.Path(file).resolve().is_relative_to(pathlib.Path(directory).resolve())
228
+ except OSError as e:
229
+ # OSError can be raised if paths cannot be resolved (permissions, broken symlinks, etc.)
230
+ logger.debug(f"Failed to resolve paths for {file} and {directory}: {e!s}")
201
231
  return False
202
232
 
203
233
 
204
234
  def list_imported_modules_as_files(source_path: str, modules: List[ModuleType]) -> List[str]:
205
- """Copies modules into destination that are in modules. The module files are copied only if:
235
+ """Lists the files of modules that have been loaded. The files are only included if:
206
236
 
207
237
  1. Not a site-packages. These are installed packages and not user files.
208
238
  2. Not in the sys.base_prefix or sys.prefix. These are also installed and not user files.
@@ -212,12 +242,12 @@ def list_imported_modules_as_files(source_path: str, modules: List[ModuleType])
212
242
  import flyte
213
243
  from flyte._utils.lazy_module import is_imported
214
244
 
215
- files = []
216
- union_root = os.path.dirname(flyte.__file__)
245
+ files = set()
246
+ flyte_root = os.path.dirname(flyte.__file__)
217
247
 
218
248
  # These directories contain installed packages or modules from the Python standard library.
219
249
  # If a module is from these directories, then they are not user files.
220
- invalid_directories = [union_root, sys.prefix, sys.base_prefix, site.getusersitepackages(), *site.getsitepackages()]
250
+ invalid_directories = [flyte_root, sys.prefix, sys.base_prefix, site.getusersitepackages(), *site.getsitepackages()]
221
251
 
222
252
  for mod in modules:
223
253
  # Be careful not to import a module with the .__file__ call if not yet imported.
@@ -241,11 +271,19 @@ def list_imported_modules_as_files(source_path: str, modules: List[ModuleType])
241
271
 
242
272
  if not _file_is_in_directory(mod_file, source_path):
243
273
  # Only upload files where the module file in the source directory
274
+ # print log line for files that have common ancestor with source_path, but not in it.
275
+ logger.debug(f"{mod_file} is not in {source_path}")
244
276
  continue
245
277
 
246
- files.append(mod_file)
278
+ if not pathlib.Path(mod_file).is_file():
279
+ # Some modules have a __file__ attribute that are relative to the base package. Let's skip these,
280
+ # can add more rigorous logic to really pull out the correct file location if we need to.
281
+ logger.debug(f"Skipping {mod_file} from {mod.__name__} because it is not a file")
282
+ continue
247
283
 
248
- return files
284
+ files.add(mod_file)
285
+
286
+ return list(files)
249
287
 
250
288
 
251
289
  def add_imported_modules_from_source(source_path: str, destination: str, modules: List[ModuleType]):
@@ -322,18 +360,3 @@ def hash_file(file_path: typing.Union[os.PathLike, str]) -> Tuple[bytes, str, in
322
360
  size += len(chunk)
323
361
 
324
362
  return h.digest(), h.hexdigest(), size
325
-
326
-
327
- def _find_project_root(source_path) -> str:
328
- """
329
- Find the root of the project.
330
- The root of the project is considered to be the first ancestor from source_path that does
331
- not contain a __init__.py file.
332
-
333
- N.B.: This assumption only holds for regular packages (as opposed to namespace packages)
334
- """
335
- # Start from the directory right above source_path
336
- path = Path(source_path).parent.resolve()
337
- while os.path.exists(os.path.join(path, "__init__.py")):
338
- path = path.parent
339
- return str(path)
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import asyncio
2
4
  import gzip
3
5
  import logging
@@ -5,26 +7,54 @@ import os
5
7
  import pathlib
6
8
  import tempfile
7
9
  from pathlib import Path
8
- from typing import Type
10
+ from typing import TYPE_CHECKING, ClassVar, Type
9
11
 
10
- from flyteidl.core.tasks_pb2 import TaskTemplate
12
+ from async_lru import alru_cache
13
+ from flyteidl2.core.tasks_pb2 import TaskTemplate
11
14
 
12
- import flyte.storage as storage
13
- from flyte._datastructures import CodeBundle
14
15
  from flyte._logging import log, logger
16
+ from flyte._utils import AsyncLRUCache
17
+ from flyte.models import CodeBundle
15
18
 
16
19
  from ._ignore import GitIgnore, Ignore, StandardIgnore
17
- from ._packaging import create_bundle, list_files_to_bundle, print_ls_tree
20
+ from ._packaging import create_bundle, list_files_to_bundle, list_relative_files_to_bundle, print_ls_tree
18
21
  from ._utils import CopyFiles, hash_file
19
22
 
23
+ if TYPE_CHECKING:
24
+ from flyte.app import AppEnvironment
25
+
20
26
  _pickled_file_extension = ".pkl.gz"
21
27
  _tar_file_extension = ".tar.gz"
22
28
 
23
29
 
30
+ class _PklCache:
31
+ _pkl_cache: ClassVar[AsyncLRUCache[str, str]] = AsyncLRUCache[str, str](maxsize=100)
32
+
33
+ @classmethod
34
+ async def put(cls, digest: str, upload_to_path: str, from_path: pathlib.Path) -> str:
35
+ """
36
+ Get the pickled code bundle from the cache or build it if not present.
37
+
38
+ :param digest: The hash digest of the task template.
39
+ :param upload_to_path: The path to upload the pickled file to.
40
+ :param from_path: The path to read the pickled file from.
41
+ :return: CodeBundle object containing the pickled file path and the computed version.
42
+ """
43
+ import flyte.storage as storage
44
+
45
+ async def put_data() -> str:
46
+ return await storage.put(str(from_path), to_path=str(upload_to_path))
47
+
48
+ return await cls._pkl_cache.get(
49
+ key=digest,
50
+ value_func=put_data,
51
+ )
52
+
53
+
24
54
  async def build_pkl_bundle(
25
- o: TaskTemplate,
55
+ o: TaskTemplate | AppEnvironment,
26
56
  upload_to_controlplane: bool = True,
27
- upload_from_dataplane_path: str | None = None,
57
+ upload_from_dataplane_base_path: str | None = None,
28
58
  copy_bundle_to: pathlib.Path | None = None,
29
59
  ) -> CodeBundle:
30
60
  """
@@ -36,16 +66,14 @@ async def build_pkl_bundle(
36
66
 
37
67
  :param o: Object to be pickled. This is the task template.
38
68
  :param upload_to_controlplane: Whether to upload the pickled file to the control plane or not
39
- :param upload_from_dataplane_path: If we are on the dataplane, this is the path where the
69
+ :param upload_from_dataplane_base_path: If we are on the dataplane, this is the path where the
40
70
  pickled file should be uploaded to. upload_to_controlplane has to be False in this case.
41
71
  :param copy_bundle_to: If set, the bundle will be copied to this path. This is used for testing purposes.
42
72
  :return: CodeBundle object containing the pickled file path and the computed version.
43
73
  """
44
74
  import cloudpickle
45
75
 
46
- import flyte.storage as storage
47
-
48
- if upload_to_controlplane and upload_from_dataplane_path:
76
+ if upload_to_controlplane and upload_from_dataplane_base_path:
49
77
  raise ValueError("Cannot upload to control plane and upload from dataplane path at the same time.")
50
78
 
51
79
  logger.debug("Building pickled code bundle.")
@@ -58,13 +86,20 @@ async def build_pkl_bundle(
58
86
  logger.debug("Uploading pickled code bundle to control plane.")
59
87
  from flyte.remote import upload_file
60
88
 
61
- hash_digest, remote_path = await upload_file(dest)
89
+ hash_digest, remote_path = await upload_file.aio(dest)
62
90
  return CodeBundle(pkl=remote_path, computed_version=hash_digest)
63
91
 
64
- elif upload_from_dataplane_path:
65
- logger.debug(f"Uploading pickled code bundle to dataplane path {upload_from_dataplane_path}.")
92
+ elif upload_from_dataplane_base_path:
93
+ from flyte._internal.runtime import io
94
+
66
95
  _, str_digest, _ = hash_file(file_path=dest)
67
- final_path = await storage.put(str(dest), upload_from_dataplane_path)
96
+ upload_path = io.pkl_path(upload_from_dataplane_base_path, str_digest)
97
+ logger.debug(f"Uploading pickled code bundle to dataplane path {upload_path}.")
98
+ final_path = await _PklCache.put(
99
+ digest=str_digest,
100
+ upload_to_path=upload_path,
101
+ from_path=dest,
102
+ )
68
103
  return CodeBundle(pkl=final_path, computed_version=str_digest)
69
104
 
70
105
  else:
@@ -74,12 +109,13 @@ async def build_pkl_bundle(
74
109
  import shutil
75
110
 
76
111
  # Copy the bundle to the given path
77
- shutil.copy(dest, copy_bundle_to)
112
+ shutil.copy(dest, copy_bundle_to, follow_symlinks=True)
78
113
  local_path = copy_bundle_to / dest.name
79
114
  return CodeBundle(pkl=str(local_path), computed_version=str_digest)
80
115
  return CodeBundle(pkl=str(dest), computed_version=str_digest)
81
116
 
82
117
 
118
+ @alru_cache
83
119
  async def build_code_bundle(
84
120
  from_dir: Path,
85
121
  *ignore: Type[Ignore],
@@ -90,7 +126,7 @@ async def build_code_bundle(
90
126
  ) -> CodeBundle:
91
127
  """
92
128
  Build the code bundle for the current environment.
93
- :param from_dir: The directory to bundle of the code to bundle. This is the root directory for the source.
129
+ :param from_dir: The directory of the code to bundle. This is the root directory for the source.
94
130
  :param extract_dir: The directory to extract the code bundle to, when in the container. It defaults to the current
95
131
  working directory.
96
132
  :param ignore: The list of ignores to apply. This is a list of Ignore classes.
@@ -111,13 +147,66 @@ async def build_code_bundle(
111
147
  if logger.getEffectiveLevel() <= logging.INFO:
112
148
  print_ls_tree(from_dir, files)
113
149
 
150
+ logger.debug("Building code bundle.")
151
+ with tempfile.TemporaryDirectory() as tmp_dir:
152
+ bundle_path, tar_size, archive_size = create_bundle(
153
+ from_dir, pathlib.Path(tmp_dir), files, digest, deref_symlinks=True
154
+ )
155
+ logger.info(f"Code bundle created at {bundle_path}, size: {tar_size} MB, archive size: {archive_size} MB")
156
+ if not dryrun:
157
+ hash_digest, remote_path = await upload_file.aio(bundle_path)
158
+ logger.debug(f"Code bundle uploaded to {remote_path}")
159
+ else:
160
+ if copy_bundle_to:
161
+ remote_path = str(copy_bundle_to / bundle_path.name)
162
+ else:
163
+ import flyte.storage as storage
164
+
165
+ base_path = storage.get_random_local_path()
166
+ base_path.mkdir(parents=True, exist_ok=True)
167
+ remote_path = str(base_path / bundle_path.name)
168
+
169
+ import shutil
170
+
171
+ # Copy the bundle to the given path
172
+ shutil.copy(bundle_path, remote_path)
173
+ _, hash_digest, _ = hash_file(file_path=bundle_path)
174
+ return CodeBundle(tgz=remote_path, destination=extract_dir, computed_version=hash_digest, files=files)
175
+
176
+
177
+ @alru_cache
178
+ async def build_code_bundle_from_relative_paths(
179
+ relative_paths: tuple[str, ...],
180
+ from_dir: Path,
181
+ extract_dir: str = ".",
182
+ dryrun: bool = False,
183
+ copy_bundle_to: pathlib.Path | None = None,
184
+ ) -> CodeBundle:
185
+ """
186
+ Build a code bundle from a list of relative paths.
187
+ :param relative_paths: The list of relative paths to bundle.
188
+ :param from_dir: The directory of the code to bundle. This is the root directory for the source.
189
+ :param extract_dir: The directory to extract the code bundle to, when in the container. It defaults to the current
190
+ working directory.
191
+ :param dryrun: If dryrun is enabled, files will not be uploaded to the control plane.
192
+ :param copy_bundle_to: If set, the bundle will be copied to this path. This is used for testing purposes.
193
+ :return: The code bundle, which contains the path where the code was zipped to.
194
+ """
195
+ logger.debug("Building code bundle from relative paths.")
196
+ from flyte.remote import upload_file
197
+
198
+ logger.debug("Finding files to bundle")
199
+ files, digest = list_relative_files_to_bundle(relative_paths, from_dir)
200
+ if logger.getEffectiveLevel() <= logging.INFO:
201
+ print_ls_tree(from_dir, files)
202
+
114
203
  logger.debug("Building code bundle.")
115
204
  with tempfile.TemporaryDirectory() as tmp_dir:
116
205
  bundle_path, tar_size, archive_size = create_bundle(from_dir, pathlib.Path(tmp_dir), files, digest)
117
206
  logger.info(f"Code bundle created at {bundle_path}, size: {tar_size} MB, archive size: {archive_size} MB")
118
207
  if not dryrun:
119
- hash_digest, remote_path = await upload_file(bundle_path)
120
- logger.info(f"Code bundle uploaded to {remote_path}")
208
+ hash_digest, remote_path = await upload_file.aio(bundle_path)
209
+ logger.debug(f"Code bundle uploaded to {remote_path}")
121
210
  else:
122
211
  remote_path = "na"
123
212
  if copy_bundle_to:
@@ -127,7 +216,7 @@ async def build_code_bundle(
127
216
  shutil.copy(bundle_path, copy_bundle_to)
128
217
  remote_path = str(copy_bundle_to / bundle_path.name)
129
218
  _, hash_digest, _ = hash_file(file_path=bundle_path)
130
- return CodeBundle(tgz=remote_path, destination=extract_dir, computed_version=hash_digest)
219
+ return CodeBundle(tgz=remote_path, destination=extract_dir, computed_version=hash_digest, files=files)
131
220
 
132
221
 
133
222
  @log(level=logging.INFO)
@@ -138,29 +227,44 @@ async def download_bundle(bundle: CodeBundle) -> pathlib.Path:
138
227
 
139
228
  :return: The path to the downloaded code bundle.
140
229
  """
230
+ import sys
231
+
232
+ import flyte.storage as storage
233
+
141
234
  dest = pathlib.Path(bundle.destination)
235
+ if not dest.exists():
236
+ dest.mkdir(parents=True, exist_ok=True)
142
237
  if not dest.is_dir():
143
238
  raise ValueError(f"Destination path should be a directory, found {dest}, {dest.stat()}")
144
239
 
145
240
  # TODO make storage apis better to accept pathlib.Path
146
241
  if bundle.tgz:
147
242
  downloaded_bundle = dest / os.path.basename(bundle.tgz)
243
+ if downloaded_bundle.exists():
244
+ logger.debug(f"Code bundle {downloaded_bundle} already exists locally, skipping download.")
245
+ return downloaded_bundle.absolute()
148
246
  # Download the tgz file
149
- path = await storage.get(bundle.tgz, str(downloaded_bundle.absolute()))
150
- downloaded_bundle = pathlib.Path(path)
247
+ logger.debug(f"Downloading code bundle from {bundle.tgz} to {downloaded_bundle.absolute()}")
248
+ await storage.get(bundle.tgz, str(downloaded_bundle.absolute()))
151
249
  # NOTE the os.path.join(destination, ''). This is to ensure that the given path is in fact a directory and all
152
250
  # downloaded data should be copied into this directory. We do this to account for a difference in behavior in
153
251
  # fsspec, which requires a trailing slash in case of pre-existing directory.
154
- process = await asyncio.create_subprocess_exec(
155
- "tar",
252
+ args = [
156
253
  "-xvf",
157
254
  str(downloaded_bundle),
158
255
  "-C",
159
256
  str(dest),
257
+ ]
258
+ if sys.platform != "darwin":
259
+ args.insert(0, "--overwrite")
260
+
261
+ process = await asyncio.create_subprocess_exec(
262
+ "tar",
263
+ *args,
160
264
  stdout=asyncio.subprocess.PIPE,
161
265
  stderr=asyncio.subprocess.PIPE,
162
266
  )
163
- stdout, stderr = await process.communicate()
267
+ _stdout, stderr = await process.communicate()
164
268
 
165
269
  if process.returncode != 0:
166
270
  raise RuntimeError(stderr.decode())
@@ -171,8 +275,7 @@ async def download_bundle(bundle: CodeBundle) -> pathlib.Path:
171
275
 
172
276
  downloaded_bundle = dest / os.path.basename(bundle.pkl)
173
277
  # Download the tgz file
174
- path = await storage.get(bundle.pkl, str(downloaded_bundle.absolute()))
175
- downloaded_bundle = pathlib.Path(path)
278
+ await storage.get(bundle.pkl, str(downloaded_bundle.absolute()))
176
279
  return downloaded_bundle.absolute()
177
280
  else:
178
281
  raise ValueError("Code bundle should be either tgz or pkl, found neither.")
flyte/_constants.py ADDED
@@ -0,0 +1 @@
1
+ FLYTE_SYS_PATH = "_F_SYS_PATH" # The paths that will be appended to sys.path at runtime
flyte/_context.py CHANGED
@@ -2,9 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  import contextvars
4
4
  from dataclasses import dataclass, replace
5
- from typing import TYPE_CHECKING, Awaitable, Callable, Optional, ParamSpec, TypeVar
5
+ from typing import TYPE_CHECKING, Awaitable, Callable, Optional, ParamSpec, Tuple, TypeVar
6
6
 
7
- from flyte._datastructures import GroupData, RawDataPath, TaskContext
7
+ from flyte._logging import logger
8
+ from flyte.models import GroupData, RawDataPath, TaskContext
8
9
 
9
10
  if TYPE_CHECKING:
10
11
  from flyte.report import Report
@@ -28,6 +29,7 @@ class ContextData:
28
29
  group_data: Optional[GroupData] = None
29
30
  task_context: Optional[TaskContext] = None
30
31
  raw_data_path: Optional[RawDataPath] = None
32
+ metadata: Optional[Tuple[Tuple[str, str], ...]] = None
31
33
 
32
34
  def replace(self, **kwargs) -> ContextData:
33
35
  return replace(self, **kwargs)
@@ -49,6 +51,7 @@ class Context:
49
51
  raise ValueError("Cannot create a new context without contextdata.")
50
52
  self._data = data
51
53
  self._id = id(self) # Immutable unique identifier
54
+ self._token = None # Context variable token to restore the previous context
52
55
 
53
56
  @property
54
57
  def data(self) -> ContextData:
@@ -83,6 +86,12 @@ class Context:
83
86
  """
84
87
  return Context(self.data.replace(raw_data_path=raw_data_path))
85
88
 
89
+ def new_metadata(self, metadata: Tuple[Tuple[str, str], ...]) -> Context:
90
+ """
91
+ Return a copy of the context with the given metadata tuple
92
+ """
93
+ return Context(self.data.replace(metadata=metadata))
94
+
86
95
  def get_report(self) -> Optional[Report]:
87
96
  """
88
97
  Returns a report if within a task context, else a None
@@ -95,7 +104,7 @@ class Context:
95
104
  def is_task_context(self) -> bool:
96
105
  """
97
106
  Returns true if the context is a task context
98
- :return:
107
+ :return: bool
99
108
  """
100
109
  return self.data.task_context is not None
101
110
 
@@ -106,7 +115,11 @@ class Context:
106
115
 
107
116
  def __exit__(self, exc_type, exc_val, exc_tb):
108
117
  """Exit the context, restoring the previous context."""
109
- root_context_var.reset(self._token)
118
+ try:
119
+ root_context_var.reset(self._token)
120
+ except Exception as e:
121
+ logger.warn(f"Failed to reset context: {e}")
122
+ raise e
110
123
 
111
124
  async def __aenter__(self):
112
125
  """Async version of context entry."""
@@ -129,7 +142,10 @@ root_context_var = contextvars.ContextVar("root", default=Context(data=ContextDa
129
142
 
130
143
 
131
144
  def ctx() -> Optional[TaskContext]:
132
- """Retrieve the current task context from the context variable."""
145
+ """
146
+ Returns flyte.models.TaskContext if within a task context, else None
147
+ Note: Only use this in task code and not module level.
148
+ """
133
149
  return internal_ctx().data.task_context
134
150
 
135
151