flyte 0.1.0__py3-none-any.whl → 0.2.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (205) hide show
  1. flyte/__init__.py +62 -2
  2. flyte/_api_commons.py +3 -0
  3. flyte/_bin/__init__.py +0 -0
  4. flyte/_bin/runtime.py +126 -0
  5. flyte/_build.py +25 -0
  6. flyte/_cache/__init__.py +12 -0
  7. flyte/_cache/cache.py +146 -0
  8. flyte/_cache/defaults.py +9 -0
  9. flyte/_cache/policy_function_body.py +42 -0
  10. flyte/_cli/__init__.py +0 -0
  11. flyte/_cli/_common.py +299 -0
  12. flyte/_cli/_create.py +42 -0
  13. flyte/_cli/_delete.py +23 -0
  14. flyte/_cli/_deploy.py +140 -0
  15. flyte/_cli/_get.py +235 -0
  16. flyte/_cli/_params.py +538 -0
  17. flyte/_cli/_run.py +174 -0
  18. flyte/_cli/main.py +98 -0
  19. flyte/_code_bundle/__init__.py +8 -0
  20. flyte/_code_bundle/_ignore.py +113 -0
  21. flyte/_code_bundle/_packaging.py +187 -0
  22. flyte/_code_bundle/_utils.py +339 -0
  23. flyte/_code_bundle/bundle.py +178 -0
  24. flyte/_context.py +146 -0
  25. flyte/_datastructures.py +342 -0
  26. flyte/_deploy.py +202 -0
  27. flyte/_doc.py +29 -0
  28. flyte/_docstring.py +32 -0
  29. flyte/_environment.py +43 -0
  30. flyte/_group.py +31 -0
  31. flyte/_hash.py +23 -0
  32. flyte/_image.py +757 -0
  33. flyte/_initialize.py +643 -0
  34. flyte/_interface.py +84 -0
  35. flyte/_internal/__init__.py +3 -0
  36. flyte/_internal/controllers/__init__.py +115 -0
  37. flyte/_internal/controllers/_local_controller.py +118 -0
  38. flyte/_internal/controllers/_trace.py +40 -0
  39. flyte/_internal/controllers/pbhash.py +39 -0
  40. flyte/_internal/controllers/remote/__init__.py +40 -0
  41. flyte/_internal/controllers/remote/_action.py +141 -0
  42. flyte/_internal/controllers/remote/_client.py +43 -0
  43. flyte/_internal/controllers/remote/_controller.py +361 -0
  44. flyte/_internal/controllers/remote/_core.py +402 -0
  45. flyte/_internal/controllers/remote/_informer.py +361 -0
  46. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  47. flyte/_internal/imagebuild/__init__.py +11 -0
  48. flyte/_internal/imagebuild/docker_builder.py +416 -0
  49. flyte/_internal/imagebuild/image_builder.py +241 -0
  50. flyte/_internal/imagebuild/remote_builder.py +0 -0
  51. flyte/_internal/resolvers/__init__.py +0 -0
  52. flyte/_internal/resolvers/_task_module.py +54 -0
  53. flyte/_internal/resolvers/common.py +31 -0
  54. flyte/_internal/resolvers/default.py +28 -0
  55. flyte/_internal/runtime/__init__.py +0 -0
  56. flyte/_internal/runtime/convert.py +205 -0
  57. flyte/_internal/runtime/entrypoints.py +135 -0
  58. flyte/_internal/runtime/io.py +136 -0
  59. flyte/_internal/runtime/resources_serde.py +138 -0
  60. flyte/_internal/runtime/task_serde.py +210 -0
  61. flyte/_internal/runtime/taskrunner.py +190 -0
  62. flyte/_internal/runtime/types_serde.py +54 -0
  63. flyte/_logging.py +124 -0
  64. flyte/_protos/__init__.py +0 -0
  65. flyte/_protos/common/authorization_pb2.py +66 -0
  66. flyte/_protos/common/authorization_pb2.pyi +108 -0
  67. flyte/_protos/common/authorization_pb2_grpc.py +4 -0
  68. flyte/_protos/common/identifier_pb2.py +71 -0
  69. flyte/_protos/common/identifier_pb2.pyi +82 -0
  70. flyte/_protos/common/identifier_pb2_grpc.py +4 -0
  71. flyte/_protos/common/identity_pb2.py +48 -0
  72. flyte/_protos/common/identity_pb2.pyi +72 -0
  73. flyte/_protos/common/identity_pb2_grpc.py +4 -0
  74. flyte/_protos/common/list_pb2.py +36 -0
  75. flyte/_protos/common/list_pb2.pyi +69 -0
  76. flyte/_protos/common/list_pb2_grpc.py +4 -0
  77. flyte/_protos/common/policy_pb2.py +37 -0
  78. flyte/_protos/common/policy_pb2.pyi +27 -0
  79. flyte/_protos/common/policy_pb2_grpc.py +4 -0
  80. flyte/_protos/common/role_pb2.py +37 -0
  81. flyte/_protos/common/role_pb2.pyi +53 -0
  82. flyte/_protos/common/role_pb2_grpc.py +4 -0
  83. flyte/_protos/common/runtime_version_pb2.py +28 -0
  84. flyte/_protos/common/runtime_version_pb2.pyi +24 -0
  85. flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
  86. flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
  87. flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
  88. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  89. flyte/_protos/secret/definition_pb2.py +49 -0
  90. flyte/_protos/secret/definition_pb2.pyi +93 -0
  91. flyte/_protos/secret/definition_pb2_grpc.py +4 -0
  92. flyte/_protos/secret/payload_pb2.py +62 -0
  93. flyte/_protos/secret/payload_pb2.pyi +94 -0
  94. flyte/_protos/secret/payload_pb2_grpc.py +4 -0
  95. flyte/_protos/secret/secret_pb2.py +38 -0
  96. flyte/_protos/secret/secret_pb2.pyi +6 -0
  97. flyte/_protos/secret/secret_pb2_grpc.py +198 -0
  98. flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
  99. flyte/_protos/validate/validate/validate_pb2.py +76 -0
  100. flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
  101. flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  102. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  103. flyte/_protos/workflow/queue_service_pb2.py +106 -0
  104. flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
  105. flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  106. flyte/_protos/workflow/run_definition_pb2.py +128 -0
  107. flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
  108. flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  109. flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
  110. flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  111. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  112. flyte/_protos/workflow/run_service_pb2.py +133 -0
  113. flyte/_protos/workflow/run_service_pb2.pyi +175 -0
  114. flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
  115. flyte/_protos/workflow/state_service_pb2.py +58 -0
  116. flyte/_protos/workflow/state_service_pb2.pyi +71 -0
  117. flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
  118. flyte/_protos/workflow/task_definition_pb2.py +72 -0
  119. flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
  120. flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  121. flyte/_protos/workflow/task_service_pb2.py +44 -0
  122. flyte/_protos/workflow/task_service_pb2.pyi +31 -0
  123. flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
  124. flyte/_resources.py +226 -0
  125. flyte/_retry.py +32 -0
  126. flyte/_reusable_environment.py +25 -0
  127. flyte/_run.py +410 -0
  128. flyte/_secret.py +61 -0
  129. flyte/_task.py +367 -0
  130. flyte/_task_environment.py +200 -0
  131. flyte/_timeout.py +47 -0
  132. flyte/_tools.py +27 -0
  133. flyte/_trace.py +128 -0
  134. flyte/_utils/__init__.py +20 -0
  135. flyte/_utils/asyn.py +119 -0
  136. flyte/_utils/coro_management.py +25 -0
  137. flyte/_utils/file_handling.py +72 -0
  138. flyte/_utils/helpers.py +108 -0
  139. flyte/_utils/lazy_module.py +54 -0
  140. flyte/_utils/uv_script_parser.py +49 -0
  141. flyte/_version.py +21 -0
  142. flyte/config/__init__.py +168 -0
  143. flyte/config/_config.py +196 -0
  144. flyte/config/_internal.py +64 -0
  145. flyte/connectors/__init__.py +0 -0
  146. flyte/errors.py +143 -0
  147. flyte/extras/__init__.py +5 -0
  148. flyte/extras/_container.py +273 -0
  149. flyte/io/__init__.py +11 -0
  150. flyte/io/_dataframe.py +0 -0
  151. flyte/io/_dir.py +448 -0
  152. flyte/io/_file.py +468 -0
  153. flyte/io/pickle/__init__.py +0 -0
  154. flyte/io/pickle/transformer.py +117 -0
  155. flyte/io/structured_dataset/__init__.py +129 -0
  156. flyte/io/structured_dataset/basic_dfs.py +219 -0
  157. flyte/io/structured_dataset/structured_dataset.py +1061 -0
  158. flyte/remote/__init__.py +25 -0
  159. flyte/remote/_client/__init__.py +0 -0
  160. flyte/remote/_client/_protocols.py +131 -0
  161. flyte/remote/_client/auth/__init__.py +12 -0
  162. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  163. flyte/remote/_client/auth/_authenticators/base.py +397 -0
  164. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  165. flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
  166. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  167. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  168. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  169. flyte/remote/_client/auth/_channel.py +184 -0
  170. flyte/remote/_client/auth/_client_config.py +83 -0
  171. flyte/remote/_client/auth/_default_html.py +32 -0
  172. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  173. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  174. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  175. flyte/remote/_client/auth/_keyring.py +143 -0
  176. flyte/remote/_client/auth/_token_client.py +260 -0
  177. flyte/remote/_client/auth/errors.py +16 -0
  178. flyte/remote/_client/controlplane.py +95 -0
  179. flyte/remote/_console.py +18 -0
  180. flyte/remote/_data.py +155 -0
  181. flyte/remote/_logs.py +116 -0
  182. flyte/remote/_project.py +86 -0
  183. flyte/remote/_run.py +873 -0
  184. flyte/remote/_secret.py +132 -0
  185. flyte/remote/_task.py +227 -0
  186. flyte/report/__init__.py +3 -0
  187. flyte/report/_report.py +178 -0
  188. flyte/report/_template.html +124 -0
  189. flyte/storage/__init__.py +24 -0
  190. flyte/storage/_remote_fs.py +34 -0
  191. flyte/storage/_storage.py +251 -0
  192. flyte/storage/_utils.py +5 -0
  193. flyte/types/__init__.py +13 -0
  194. flyte/types/_interface.py +25 -0
  195. flyte/types/_renderer.py +162 -0
  196. flyte/types/_string_literals.py +120 -0
  197. flyte/types/_type_engine.py +2211 -0
  198. flyte/types/_utils.py +80 -0
  199. flyte-0.2.0b1.dist-info/METADATA +179 -0
  200. flyte-0.2.0b1.dist-info/RECORD +204 -0
  201. {flyte-0.1.0.dist-info → flyte-0.2.0b1.dist-info}/WHEEL +2 -1
  202. flyte-0.2.0b1.dist-info/entry_points.txt +3 -0
  203. flyte-0.2.0b1.dist-info/top_level.txt +1 -0
  204. flyte-0.1.0.dist-info/METADATA +0 -6
  205. flyte-0.1.0.dist-info/RECORD +0 -5
flyte/_cli/_run.py ADDED
@@ -0,0 +1,174 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ from dataclasses import dataclass, field, fields
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+ from typing import Any, Dict, List, cast
8
+
9
+ import click
10
+ from click import Context, Parameter
11
+ from rich.console import Console
12
+ from typing_extensions import get_args
13
+
14
+ import flyte
15
+
16
+ from .._code_bundle._utils import CopyFiles
17
+ from .._task import TaskTemplate
18
+ from ..remote import Run
19
+ from . import _common as common
20
+ from ._common import CLIConfig
21
+ from ._params import to_click_option
22
+
23
+
24
+ @dataclass
25
+ class RunArguments:
26
+ project: str = field(
27
+ default=cast(str, common.PROJECT_OPTION.default), metadata={"click.option": common.PROJECT_OPTION}
28
+ )
29
+ domain: str = field(
30
+ default=cast(str, common.DOMAIN_OPTION.default), metadata={"click.option": common.DOMAIN_OPTION}
31
+ )
32
+ local: bool = field(
33
+ default=False,
34
+ metadata={
35
+ "click.option": click.Option(
36
+ ["--local"],
37
+ is_flag=True,
38
+ help="Run the task locally",
39
+ )
40
+ },
41
+ )
42
+ copy_style: CopyFiles = field(
43
+ default="loaded_modules",
44
+ metadata={
45
+ "click.option": click.Option(
46
+ ["--copy-style"],
47
+ type=click.Choice(get_args(CopyFiles)),
48
+ default="loaded_modules",
49
+ help="Copy style to use when running the task",
50
+ )
51
+ },
52
+ )
53
+
54
+ @classmethod
55
+ def from_dict(cls, d: Dict[str, Any]) -> RunArguments:
56
+ return cls(**d)
57
+
58
+ @classmethod
59
+ def options(cls) -> List[click.Option]:
60
+ """
61
+ Return the set of base parameters added to run subcommand.
62
+ """
63
+ return [common.get_option_from_metadata(f.metadata) for f in fields(cls) if f.metadata]
64
+
65
+
66
+ class RunTaskCommand(click.Command):
67
+ def __init__(self, obj_name: str, obj: Any, run_args: RunArguments, *args, **kwargs):
68
+ self.obj_name = obj_name
69
+ self.obj = cast(TaskTemplate, obj)
70
+ self.run_args = run_args
71
+ kwargs.pop("name", None)
72
+ super().__init__(obj_name, *args, **kwargs)
73
+
74
+ def invoke(self, ctx: Context):
75
+ obj: CLIConfig = ctx.obj or CLIConfig()
76
+ obj.init(self.run_args.project, self.run_args.domain)
77
+
78
+ r = flyte.with_runcontext(
79
+ copy_style=self.run_args.copy_style,
80
+ version=self.run_args.copy_style,
81
+ mode="local" if self.run_args.local else "remote",
82
+ ).run(self.obj, **ctx.params)
83
+ if isinstance(r, Run) and r.action is not None:
84
+ console = Console()
85
+ console.print(
86
+ common.get_panel(
87
+ "Run",
88
+ f"[green bold]Created Run: {r.name} [/green bold] "
89
+ f"(Project: {r.action.action_id.run.project}, Domain: {r.action.action_id.run.domain})\n\n"
90
+ f"[blue bold]{r.url}[/blue bold]",
91
+ )
92
+ )
93
+
94
+ def get_params(self, ctx: Context) -> List[Parameter]:
95
+ # Note this function may be called multiple times by click.
96
+ task = self.obj
97
+ from .._internal.runtime.types_serde import transform_native_to_typed_interface
98
+
99
+ interface = transform_native_to_typed_interface(task.native_interface)
100
+ if interface is None:
101
+ return super().get_params(ctx)
102
+ inputs_interface = task.native_interface.inputs
103
+
104
+ params: List[Parameter] = []
105
+ for name, var in interface.inputs.variables.items():
106
+ default_val = None
107
+ if inputs_interface[name][1] is not inspect._empty:
108
+ default_val = inputs_interface[name][1]
109
+ params.append(to_click_option(name, var, inputs_interface[name][0], default_val))
110
+
111
+ self.params = params
112
+ return super().get_params(ctx)
113
+
114
+
115
+ class TaskPerFileGroup(common.ObjectsPerFileGroup):
116
+ """
117
+ Group that creates a command for each task in the current directory that is not __init__.py.
118
+ """
119
+
120
+ def __init__(self, filename: Path, run_args: RunArguments, *args, **kwargs):
121
+ args = (filename, *args)
122
+ super().__init__(*args, **kwargs)
123
+ self.run_args = run_args
124
+
125
+ def _filter_objects(self, module: ModuleType) -> Dict[str, Any]:
126
+ return {k: v for k, v in module.__dict__.items() if isinstance(v, TaskTemplate)}
127
+
128
+ def _get_command_for_obj(self, ctx: click.Context, obj_name: str, obj: Any) -> click.Command:
129
+ obj = cast(TaskTemplate, obj)
130
+ return RunTaskCommand(
131
+ obj_name=obj_name,
132
+ obj=obj,
133
+ help=obj.docs.__help__str__() if obj.docs else None,
134
+ run_args=self.run_args,
135
+ )
136
+
137
+
138
+ class TaskFiles(common.FileGroup):
139
+ """
140
+ Group that creates a command for each file in the current directory that is not __init__.py.
141
+ """
142
+
143
+ common_options_enabled = False
144
+
145
+ def __init__(
146
+ self,
147
+ *args,
148
+ directory: Path | None = None,
149
+ **kwargs,
150
+ ):
151
+ if "params" not in kwargs:
152
+ kwargs["params"] = []
153
+ kwargs["params"].extend(RunArguments.options())
154
+ super().__init__(*args, directory=directory, **kwargs)
155
+
156
+ def get_command(self, ctx, filename):
157
+ run_args = RunArguments.from_dict(ctx.params)
158
+ fp = Path(filename)
159
+ if not fp.exists():
160
+ raise click.BadParameter(f"File {filename} does not exist")
161
+ if fp.is_dir():
162
+ return TaskFiles(directory=fp)
163
+ return TaskPerFileGroup(
164
+ filename=Path(filename),
165
+ run_args=run_args,
166
+ name=filename,
167
+ help=f"Run, functions decorated `env.task` or instances of Tasks in {filename}",
168
+ )
169
+
170
+
171
+ run = TaskFiles(
172
+ name="run",
173
+ help="Run a task from a python file",
174
+ )
flyte/_cli/main.py ADDED
@@ -0,0 +1,98 @@
1
+ import rich_click as click
2
+
3
+ from ..config import Config
4
+ from ._common import CLIConfig
5
+ from ._create import create
6
+ from ._deploy import deploy
7
+ from ._get import get
8
+ from ._run import run
9
+
10
+
11
+ def _verbosity_to_loglevel(verbosity: int) -> int | None:
12
+ """
13
+ Converts a verbosity level from the CLI to a logging level.
14
+
15
+ :param verbosity: verbosity level from the CLI
16
+ :return: logging level
17
+ """
18
+ import logging
19
+
20
+ match verbosity:
21
+ case 0:
22
+ return None
23
+ case 1:
24
+ return logging.WARNING
25
+ case 2:
26
+ return logging.INFO
27
+ case _:
28
+ return logging.DEBUG
29
+
30
+
31
+ @click.group(cls=click.RichGroup)
32
+ @click.option(
33
+ "--endpoint",
34
+ type=str,
35
+ required=False,
36
+ help="The endpoint to connect to, this will override any config and simply used pkce to connect.",
37
+ )
38
+ @click.option(
39
+ "--insecure",
40
+ is_flag=True,
41
+ required=False,
42
+ help="insecure",
43
+ type=bool,
44
+ default=False,
45
+ )
46
+ @click.option(
47
+ "-v",
48
+ "--verbose",
49
+ required=False,
50
+ help="Show verbose messages and exception traces",
51
+ count=True,
52
+ default=0,
53
+ type=int,
54
+ )
55
+ @click.option(
56
+ "--org-override",
57
+ type=str,
58
+ required=False,
59
+ help="Override for org",
60
+ )
61
+ @click.option(
62
+ "-c",
63
+ "--config",
64
+ "config_file",
65
+ required=False,
66
+ type=click.Path(exists=True),
67
+ help="Path to config file (YAML format) to use for the CLI. If not specified,"
68
+ " the default config file will be used.",
69
+ )
70
+ @click.pass_context
71
+ def main(
72
+ ctx: click.Context,
73
+ endpoint: str | None,
74
+ insecure: bool,
75
+ verbose: int,
76
+ org_override: str | None,
77
+ config_file: str | None,
78
+ ):
79
+ """
80
+ v2 cli. Root command, please use one of the subcommands.
81
+ """
82
+ log_level = _verbosity_to_loglevel(verbose)
83
+
84
+ config = Config.auto(config_file=config_file)
85
+
86
+ ctx.obj = CLIConfig(
87
+ log_level=log_level,
88
+ endpoint=endpoint or config.platform.endpoint,
89
+ insecure=insecure or config.platform.insecure,
90
+ org_override=org_override or config.task.org,
91
+ config=config,
92
+ )
93
+
94
+
95
+ main.add_command(run)
96
+ main.add_command(deploy)
97
+ main.add_command(get) # type: ignore
98
+ main.add_command(create) # type: ignore
@@ -0,0 +1,8 @@
1
+ from ._ignore import GitIgnore, IgnoreGroup, StandardIgnore
2
+ from ._utils import CopyFiles
3
+ from .bundle import build_code_bundle, build_pkl_bundle, download_bundle
4
+
5
+ __all__ = ["CopyFiles", "build_code_bundle", "build_pkl_bundle", "default_ignores", "download_bundle"]
6
+
7
+
8
+ default_ignores = [GitIgnore, StandardIgnore, IgnoreGroup]
@@ -0,0 +1,113 @@
1
+ import os
2
+ import pathlib
3
+ import subprocess
4
+ import tarfile as _tarfile
5
+ from abc import ABC, abstractmethod
6
+ from fnmatch import fnmatch
7
+ from pathlib import Path
8
+ from shutil import which
9
+ from typing import List, Optional, Type
10
+
11
+ from flyte._logging import logger
12
+
13
+
14
+ class Ignore(ABC):
15
+ """Base for Ignores, implements core logic. Children have to implement _is_ignored"""
16
+
17
+ def __init__(self, root: Path):
18
+ self.root = root
19
+
20
+ def is_ignored(self, path: pathlib.Path) -> bool:
21
+ return self._is_ignored(path)
22
+
23
+ def tar_filter(self, tarinfo: _tarfile.TarInfo) -> Optional[_tarfile.TarInfo]:
24
+ if self.is_ignored(pathlib.Path(tarinfo.name)):
25
+ return None
26
+ return tarinfo
27
+
28
+ @abstractmethod
29
+ def _is_ignored(self, path: pathlib.Path) -> bool:
30
+ pass
31
+
32
+
33
+ class GitIgnore(Ignore):
34
+ """Uses git cli (if available) to list all ignored files and compare with those."""
35
+
36
+ def __init__(self, root: Path):
37
+ super().__init__(root)
38
+ self.has_git = which("git") is not None
39
+ self.ignored_files = self._list_ignored_files()
40
+ self.ignored_dirs = self._list_ignored_dirs()
41
+
42
+ def _git_wrapper(self, extra_args: List[str]) -> set[str]:
43
+ if self.has_git:
44
+ out = subprocess.run(
45
+ ["git", "ls-files", "-io", "--exclude-standard", *extra_args],
46
+ cwd=self.root,
47
+ capture_output=True,
48
+ check=False,
49
+ )
50
+ if out.returncode == 0:
51
+ return set(out.stdout.decode("utf-8").split("\n")[:-1])
52
+ logger.info(f"Could not determine ignored paths due to:\n{out.stderr!r}\nNot applying any filters")
53
+ return set()
54
+ logger.info("No git executable found, not applying any filters")
55
+ return set()
56
+
57
+ def _list_ignored_files(self) -> set[str]:
58
+ return self._git_wrapper([])
59
+
60
+ def _list_ignored_dirs(self) -> set[str]:
61
+ return self._git_wrapper(["--directory"])
62
+
63
+ def _is_ignored(self, path: pathlib.Path) -> bool:
64
+ if self.ignored_files:
65
+ # git-ls-files uses POSIX paths
66
+ if Path(path).as_posix() in self.ignored_files:
67
+ return True
68
+ # Ignore empty directories
69
+ if os.path.isdir(os.path.join(self.root, path)) and self.ignored_dirs:
70
+ return Path(path).as_posix() + "/" in self.ignored_dirs
71
+ return False
72
+
73
+
74
+ STANDARD_IGNORE_PATTERNS = ["*.pyc", ".cache", ".cache/*", "__pycache__", "**/__pycache__"]
75
+
76
+
77
+ class StandardIgnore(Ignore):
78
+ """Retains the standard ignore functionality that previously existed. Could in theory
79
+ by fed with custom ignore patterns from cli."""
80
+
81
+ def __init__(self, root: Path, patterns: Optional[List[str]] = None):
82
+ super().__init__(root)
83
+ self.patterns = patterns if patterns else STANDARD_IGNORE_PATTERNS
84
+
85
+ def _is_ignored(self, path: pathlib.Path) -> bool:
86
+ for pattern in self.patterns:
87
+ if fnmatch(str(path), pattern):
88
+ return True
89
+ return False
90
+
91
+
92
+ class IgnoreGroup(Ignore):
93
+ """Groups multiple Ignores and checks a path against them. A file is ignored if any
94
+ Ignore considers it ignored."""
95
+
96
+ def __init__(self, root: Path, *ignores: Type[Ignore]):
97
+ super().__init__(root)
98
+ self.ignores = [ignore(root) for ignore in ignores]
99
+
100
+ def _is_ignored(self, path: pathlib.Path) -> bool:
101
+ for ignore in self.ignores:
102
+ if ignore.is_ignored(path):
103
+ return True
104
+ return False
105
+
106
+ def list_ignored(self) -> List[str]:
107
+ ignored = []
108
+ for dir, _, files in self.root.walk():
109
+ for file in files:
110
+ abs_path = dir / file
111
+ if self.is_ignored(abs_path):
112
+ ignored.append(str(abs_path.relative_to(self.root)))
113
+ return ignored
@@ -0,0 +1,187 @@
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import hashlib
5
+ import os
6
+ import pathlib
7
+ import posixpath
8
+ import shutil
9
+ import stat
10
+ import subprocess
11
+ import tarfile
12
+ import time
13
+ import typing
14
+ from typing import List, Optional, Tuple, Union
15
+
16
+ import click
17
+ from rich import print as rich_print
18
+ from rich.tree import Tree
19
+
20
+ from flyte._logging import logger
21
+
22
+ from ._ignore import Ignore, IgnoreGroup
23
+ from ._utils import CopyFiles, _filehash_update, _pathhash_update, ls_files, tar_strip_file_attributes
24
+
25
+ FAST_PREFIX = "fast"
26
+ FAST_FILEENDING = ".tar.gz"
27
+
28
+
29
+ def print_ls_tree(source: os.PathLike, ls: typing.List[str]):
30
+ click.secho("Files to be copied for fast registration...", fg="bright_blue")
31
+
32
+ tree_root = Tree(
33
+ f":open_file_folder: [link file://{source}]{source} (detected source root)",
34
+ guide_style="bold bright_blue",
35
+ )
36
+ trees = {pathlib.Path(source): tree_root}
37
+
38
+ for f in ls:
39
+ fpp = pathlib.Path(f)
40
+ if fpp.parent not in trees:
41
+ # add trees for all intermediate folders
42
+ current = tree_root
43
+ current_path = pathlib.Path(source)
44
+ for subdir in fpp.parent.relative_to(source).parts:
45
+ current_path = current_path / subdir
46
+ if current_path not in trees:
47
+ current = current.add(f"{subdir}", guide_style="bold bright_blue")
48
+ trees[current_path] = current
49
+ else:
50
+ current = trees[current_path]
51
+ trees[fpp.parent].add(f"{fpp.name}", guide_style="bold bright_blue")
52
+ rich_print(tree_root)
53
+
54
+
55
+ def _compress_tarball(source: pathlib.Path, output: pathlib.Path) -> None:
56
+ """Compress code tarball using pigz if available, otherwise gzip"""
57
+ if pigz := shutil.which("pigz"):
58
+ with open(str(output), "wb") as gzipped:
59
+ subprocess.run([pigz, "--no-time", "-c", str(source)], stdout=gzipped, check=True)
60
+ else:
61
+ start_time = time.time()
62
+ with gzip.GzipFile(filename=str(output), mode="wb", mtime=0) as gzipped:
63
+ with open(source, "rb") as source_file:
64
+ gzipped.write(source_file.read())
65
+
66
+ end_time = time.time()
67
+ warning_time = 10
68
+ if end_time - start_time > warning_time:
69
+ click.secho(
70
+ f"Code tarball compression took {end_time - start_time:.0f} seconds. "
71
+ f"Consider installing `pigz` for faster compression.",
72
+ fg="yellow",
73
+ )
74
+
75
+
76
+ def list_files_to_bundle(
77
+ source: pathlib.Path,
78
+ deref_symlinks: bool = False,
79
+ *ignores: typing.Type[Ignore],
80
+ copy_style: CopyFiles = "all",
81
+ ) -> typing.Tuple[List[str], str]:
82
+ """
83
+ Takes a source directory and returns a list of all files to be included in the code bundle and a hexdigest of the
84
+ included files.
85
+ :param source: The source directory to package
86
+ :param deref_symlinks: Whether to dereference symlinks or not
87
+ :param ignores: A list of Ignore classes to use for ignoring files
88
+ :param copy_style: The copy style to use for the tarball
89
+ :return: A list of all files to be included in the code bundle and a hexdigest of the included files
90
+ """
91
+ ignore = IgnoreGroup(source, *ignores)
92
+
93
+ ls, ls_digest = ls_files(source, copy_style, deref_symlinks, ignore)
94
+ logger.debug(f"Hash digest: {ls_digest}")
95
+ return ls, ls_digest
96
+
97
+
98
+ def create_bundle(
99
+ source: pathlib.Path, output_dir: pathlib.Path, ls: List[str], ls_digest: str, deref_symlinks: bool = False
100
+ ) -> Tuple[pathlib.Path, float, float]:
101
+ """
102
+ Takes a source directory and packages everything not covered by common ignores into a tarball.
103
+ The output_dir is the directory where the tarball and a compressed version of the tarball will be written.
104
+ The output_dir can be a temporary directory.
105
+
106
+ :param source: The source directory to package
107
+ :param output_dir: The directory to write the tarball to
108
+ :param deref_symlinks: Whether to dereference symlinks or not
109
+ :param ls: The list of files to include in the tarball
110
+ :param ls_digest: The hexdigest of the included files
111
+ :return: The path to the tarball, the size of the tarball in MB, and the size of the compressed tarball in MB
112
+ """
113
+ # Compute where the archive should be written
114
+ archive_fname = output_dir / f"{FAST_PREFIX}{ls_digest}{FAST_FILEENDING}"
115
+ tar_path = output_dir / "tmp.tar"
116
+ with tarfile.open(str(tar_path), "w", dereference=deref_symlinks) as tar:
117
+ for ws_file in ls:
118
+ rel_path = os.path.relpath(ws_file, start=source)
119
+ tar.add(
120
+ os.path.join(source, ws_file),
121
+ recursive=False,
122
+ arcname=rel_path,
123
+ filter=lambda x: tar_strip_file_attributes(x),
124
+ )
125
+
126
+ size_mbs = tar_path.stat().st_size / 1024 / 1024
127
+ _compress_tarball(tar_path, archive_fname)
128
+ asize_mbs = archive_fname.stat().st_size / 1024 / 1024
129
+
130
+ return archive_fname, size_mbs, asize_mbs
131
+
132
+
133
+ def compute_digest(source: Union[os.PathLike, List[os.PathLike]], filter: Optional[typing.Callable] = None) -> str:
134
+ """
135
+ Walks the entirety of the source dir to compute a deterministic md5 hex digest of the dir contents.
136
+ :param os.PathLike source:
137
+ :param callable filter:
138
+ :return Text:
139
+ """
140
+ hasher = hashlib.md5()
141
+
142
+ def compute_digest_for_file(path: os.PathLike, rel_path: os.PathLike) -> None:
143
+ # Only consider files that exist (e.g. disregard symlinks that point to non-existent files)
144
+ if not os.path.exists(path):
145
+ logger.info(f"Skipping non-existent file {path}")
146
+ return
147
+
148
+ # Skip socket files
149
+ if stat.S_ISSOCK(os.stat(path).st_mode):
150
+ logger.info(f"Skip socket file {path}")
151
+ return
152
+
153
+ if filter:
154
+ if filter(rel_path):
155
+ return
156
+
157
+ _filehash_update(path, hasher)
158
+ _pathhash_update(rel_path, hasher)
159
+
160
+ def compute_digest_for_dir(source: os.PathLike) -> None:
161
+ for root, _, files in os.walk(str(source), topdown=True):
162
+ files.sort()
163
+
164
+ for fname in files:
165
+ abspath = os.path.join(root, fname)
166
+ relpath = os.path.relpath(abspath, source)
167
+ compute_digest_for_file(pathlib.Path(abspath), pathlib.Path(relpath))
168
+
169
+ if isinstance(source, list):
170
+ for src in source:
171
+ if os.path.isdir(src):
172
+ compute_digest_for_dir(src)
173
+ else:
174
+ compute_digest_for_file(src, os.path.basename(src))
175
+ else:
176
+ compute_digest_for_dir(source)
177
+
178
+ return hasher.hexdigest()
179
+
180
+
181
+ def get_additional_distribution_loc(remote_location: str, identifier: str) -> str:
182
+ """
183
+ :param Text remote_location:
184
+ :param Text identifier:
185
+ :return Text:
186
+ """
187
+ return posixpath.join(remote_location, "{}.{}".format(identifier, "tar.gz"))