flyte 0.2.0b1__py3-none-any.whl → 2.0.0b46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. flyte/__init__.py +83 -30
  2. flyte/_bin/connect.py +61 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +87 -19
  5. flyte/_bin/serve.py +351 -0
  6. flyte/_build.py +3 -2
  7. flyte/_cache/cache.py +6 -5
  8. flyte/_cache/local_cache.py +216 -0
  9. flyte/_code_bundle/_ignore.py +31 -5
  10. flyte/_code_bundle/_packaging.py +42 -11
  11. flyte/_code_bundle/_utils.py +57 -34
  12. flyte/_code_bundle/bundle.py +130 -27
  13. flyte/_constants.py +1 -0
  14. flyte/_context.py +21 -5
  15. flyte/_custom_context.py +73 -0
  16. flyte/_debug/constants.py +37 -0
  17. flyte/_debug/utils.py +17 -0
  18. flyte/_debug/vscode.py +315 -0
  19. flyte/_deploy.py +396 -75
  20. flyte/_deployer.py +109 -0
  21. flyte/_environment.py +94 -11
  22. flyte/_excepthook.py +37 -0
  23. flyte/_group.py +2 -1
  24. flyte/_hash.py +1 -16
  25. flyte/_image.py +544 -231
  26. flyte/_initialize.py +456 -316
  27. flyte/_interface.py +40 -5
  28. flyte/_internal/controllers/__init__.py +22 -8
  29. flyte/_internal/controllers/_local_controller.py +159 -35
  30. flyte/_internal/controllers/_trace.py +18 -10
  31. flyte/_internal/controllers/remote/__init__.py +38 -9
  32. flyte/_internal/controllers/remote/_action.py +82 -12
  33. flyte/_internal/controllers/remote/_client.py +6 -2
  34. flyte/_internal/controllers/remote/_controller.py +290 -64
  35. flyte/_internal/controllers/remote/_core.py +155 -95
  36. flyte/_internal/controllers/remote/_informer.py +40 -20
  37. flyte/_internal/controllers/remote/_service_protocol.py +2 -2
  38. flyte/_internal/imagebuild/__init__.py +2 -10
  39. flyte/_internal/imagebuild/docker_builder.py +391 -84
  40. flyte/_internal/imagebuild/image_builder.py +111 -55
  41. flyte/_internal/imagebuild/remote_builder.py +409 -0
  42. flyte/_internal/imagebuild/utils.py +79 -0
  43. flyte/_internal/resolvers/_app_env_module.py +92 -0
  44. flyte/_internal/resolvers/_task_module.py +5 -38
  45. flyte/_internal/resolvers/app_env.py +26 -0
  46. flyte/_internal/resolvers/common.py +8 -1
  47. flyte/_internal/resolvers/default.py +2 -2
  48. flyte/_internal/runtime/convert.py +319 -36
  49. flyte/_internal/runtime/entrypoints.py +106 -18
  50. flyte/_internal/runtime/io.py +71 -23
  51. flyte/_internal/runtime/resources_serde.py +21 -7
  52. flyte/_internal/runtime/reuse.py +125 -0
  53. flyte/_internal/runtime/rusty.py +196 -0
  54. flyte/_internal/runtime/task_serde.py +239 -66
  55. flyte/_internal/runtime/taskrunner.py +48 -8
  56. flyte/_internal/runtime/trigger_serde.py +162 -0
  57. flyte/_internal/runtime/types_serde.py +7 -16
  58. flyte/_keyring/file.py +115 -0
  59. flyte/_link.py +30 -0
  60. flyte/_logging.py +241 -42
  61. flyte/_map.py +312 -0
  62. flyte/_metrics.py +59 -0
  63. flyte/_module.py +74 -0
  64. flyte/_pod.py +30 -0
  65. flyte/_resources.py +296 -33
  66. flyte/_retry.py +1 -7
  67. flyte/_reusable_environment.py +72 -7
  68. flyte/_run.py +462 -132
  69. flyte/_secret.py +47 -11
  70. flyte/_serve.py +333 -0
  71. flyte/_task.py +245 -56
  72. flyte/_task_environment.py +219 -97
  73. flyte/_task_plugins.py +47 -0
  74. flyte/_tools.py +8 -8
  75. flyte/_trace.py +15 -24
  76. flyte/_trigger.py +1027 -0
  77. flyte/_utils/__init__.py +12 -1
  78. flyte/_utils/asyn.py +3 -1
  79. flyte/_utils/async_cache.py +139 -0
  80. flyte/_utils/coro_management.py +5 -4
  81. flyte/_utils/description_parser.py +19 -0
  82. flyte/_utils/docker_credentials.py +173 -0
  83. flyte/_utils/helpers.py +45 -19
  84. flyte/_utils/module_loader.py +123 -0
  85. flyte/_utils/org_discovery.py +57 -0
  86. flyte/_utils/uv_script_parser.py +8 -1
  87. flyte/_version.py +16 -3
  88. flyte/app/__init__.py +27 -0
  89. flyte/app/_app_environment.py +362 -0
  90. flyte/app/_connector_environment.py +40 -0
  91. flyte/app/_deploy.py +130 -0
  92. flyte/app/_parameter.py +343 -0
  93. flyte/app/_runtime/__init__.py +3 -0
  94. flyte/app/_runtime/app_serde.py +383 -0
  95. flyte/app/_types.py +113 -0
  96. flyte/app/extras/__init__.py +9 -0
  97. flyte/app/extras/_auth_middleware.py +217 -0
  98. flyte/app/extras/_fastapi.py +93 -0
  99. flyte/app/extras/_model_loader/__init__.py +3 -0
  100. flyte/app/extras/_model_loader/config.py +7 -0
  101. flyte/app/extras/_model_loader/loader.py +288 -0
  102. flyte/cli/__init__.py +12 -0
  103. flyte/cli/_abort.py +28 -0
  104. flyte/cli/_build.py +114 -0
  105. flyte/cli/_common.py +493 -0
  106. flyte/cli/_create.py +371 -0
  107. flyte/cli/_delete.py +45 -0
  108. flyte/cli/_deploy.py +401 -0
  109. flyte/cli/_gen.py +316 -0
  110. flyte/cli/_get.py +446 -0
  111. flyte/cli/_option.py +33 -0
  112. flyte/{_cli → cli}/_params.py +57 -17
  113. flyte/cli/_plugins.py +209 -0
  114. flyte/cli/_prefetch.py +292 -0
  115. flyte/cli/_run.py +690 -0
  116. flyte/cli/_serve.py +338 -0
  117. flyte/cli/_update.py +86 -0
  118. flyte/cli/_user.py +20 -0
  119. flyte/cli/main.py +246 -0
  120. flyte/config/__init__.py +2 -167
  121. flyte/config/_config.py +215 -163
  122. flyte/config/_internal.py +10 -1
  123. flyte/config/_reader.py +225 -0
  124. flyte/connectors/__init__.py +11 -0
  125. flyte/connectors/_connector.py +330 -0
  126. flyte/connectors/_server.py +194 -0
  127. flyte/connectors/utils.py +159 -0
  128. flyte/errors.py +134 -2
  129. flyte/extend.py +24 -0
  130. flyte/extras/_container.py +69 -56
  131. flyte/git/__init__.py +3 -0
  132. flyte/git/_config.py +279 -0
  133. flyte/io/__init__.py +8 -1
  134. flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
  135. flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
  136. flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
  137. flyte/io/_dir.py +575 -113
  138. flyte/io/_file.py +587 -141
  139. flyte/io/_hashing_io.py +342 -0
  140. flyte/io/extend.py +7 -0
  141. flyte/models.py +635 -0
  142. flyte/prefetch/__init__.py +22 -0
  143. flyte/prefetch/_hf_model.py +563 -0
  144. flyte/remote/__init__.py +14 -3
  145. flyte/remote/_action.py +879 -0
  146. flyte/remote/_app.py +346 -0
  147. flyte/remote/_auth_metadata.py +42 -0
  148. flyte/remote/_client/_protocols.py +62 -4
  149. flyte/remote/_client/auth/_auth_utils.py +19 -0
  150. flyte/remote/_client/auth/_authenticators/base.py +8 -2
  151. flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
  152. flyte/remote/_client/auth/_authenticators/factory.py +4 -0
  153. flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
  154. flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
  155. flyte/remote/_client/auth/_channel.py +47 -18
  156. flyte/remote/_client/auth/_client_config.py +5 -3
  157. flyte/remote/_client/auth/_keyring.py +15 -2
  158. flyte/remote/_client/auth/_token_client.py +3 -3
  159. flyte/remote/_client/controlplane.py +206 -18
  160. flyte/remote/_common.py +66 -0
  161. flyte/remote/_data.py +107 -22
  162. flyte/remote/_logs.py +116 -33
  163. flyte/remote/_project.py +21 -19
  164. flyte/remote/_run.py +164 -631
  165. flyte/remote/_secret.py +72 -29
  166. flyte/remote/_task.py +387 -46
  167. flyte/remote/_trigger.py +368 -0
  168. flyte/remote/_user.py +43 -0
  169. flyte/report/_report.py +10 -6
  170. flyte/storage/__init__.py +13 -1
  171. flyte/storage/_config.py +237 -0
  172. flyte/storage/_parallel_reader.py +289 -0
  173. flyte/storage/_storage.py +268 -59
  174. flyte/syncify/__init__.py +56 -0
  175. flyte/syncify/_api.py +414 -0
  176. flyte/types/__init__.py +39 -0
  177. flyte/types/_interface.py +22 -7
  178. flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
  179. flyte/types/_string_literals.py +8 -9
  180. flyte/types/_type_engine.py +226 -126
  181. flyte/types/_utils.py +1 -1
  182. flyte-2.0.0b46.data/scripts/debug.py +38 -0
  183. flyte-2.0.0b46.data/scripts/runtime.py +194 -0
  184. flyte-2.0.0b46.dist-info/METADATA +352 -0
  185. flyte-2.0.0b46.dist-info/RECORD +221 -0
  186. flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
  187. flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
  188. flyte/_api_commons.py +0 -3
  189. flyte/_cli/_common.py +0 -299
  190. flyte/_cli/_create.py +0 -42
  191. flyte/_cli/_delete.py +0 -23
  192. flyte/_cli/_deploy.py +0 -140
  193. flyte/_cli/_get.py +0 -235
  194. flyte/_cli/_run.py +0 -174
  195. flyte/_cli/main.py +0 -98
  196. flyte/_datastructures.py +0 -342
  197. flyte/_internal/controllers/pbhash.py +0 -39
  198. flyte/_protos/common/authorization_pb2.py +0 -66
  199. flyte/_protos/common/authorization_pb2.pyi +0 -108
  200. flyte/_protos/common/authorization_pb2_grpc.py +0 -4
  201. flyte/_protos/common/identifier_pb2.py +0 -71
  202. flyte/_protos/common/identifier_pb2.pyi +0 -82
  203. flyte/_protos/common/identifier_pb2_grpc.py +0 -4
  204. flyte/_protos/common/identity_pb2.py +0 -48
  205. flyte/_protos/common/identity_pb2.pyi +0 -72
  206. flyte/_protos/common/identity_pb2_grpc.py +0 -4
  207. flyte/_protos/common/list_pb2.py +0 -36
  208. flyte/_protos/common/list_pb2.pyi +0 -69
  209. flyte/_protos/common/list_pb2_grpc.py +0 -4
  210. flyte/_protos/common/policy_pb2.py +0 -37
  211. flyte/_protos/common/policy_pb2.pyi +0 -27
  212. flyte/_protos/common/policy_pb2_grpc.py +0 -4
  213. flyte/_protos/common/role_pb2.py +0 -37
  214. flyte/_protos/common/role_pb2.pyi +0 -53
  215. flyte/_protos/common/role_pb2_grpc.py +0 -4
  216. flyte/_protos/common/runtime_version_pb2.py +0 -28
  217. flyte/_protos/common/runtime_version_pb2.pyi +0 -24
  218. flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
  219. flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
  220. flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
  221. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  222. flyte/_protos/secret/definition_pb2.py +0 -49
  223. flyte/_protos/secret/definition_pb2.pyi +0 -93
  224. flyte/_protos/secret/definition_pb2_grpc.py +0 -4
  225. flyte/_protos/secret/payload_pb2.py +0 -62
  226. flyte/_protos/secret/payload_pb2.pyi +0 -94
  227. flyte/_protos/secret/payload_pb2_grpc.py +0 -4
  228. flyte/_protos/secret/secret_pb2.py +0 -38
  229. flyte/_protos/secret/secret_pb2.pyi +0 -6
  230. flyte/_protos/secret/secret_pb2_grpc.py +0 -198
  231. flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
  232. flyte/_protos/validate/validate/validate_pb2.py +0 -76
  233. flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
  234. flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  235. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  236. flyte/_protos/workflow/queue_service_pb2.py +0 -106
  237. flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
  238. flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  239. flyte/_protos/workflow/run_definition_pb2.py +0 -128
  240. flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
  241. flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  242. flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
  243. flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  244. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  245. flyte/_protos/workflow/run_service_pb2.py +0 -133
  246. flyte/_protos/workflow/run_service_pb2.pyi +0 -175
  247. flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
  248. flyte/_protos/workflow/state_service_pb2.py +0 -58
  249. flyte/_protos/workflow/state_service_pb2.pyi +0 -71
  250. flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
  251. flyte/_protos/workflow/task_definition_pb2.py +0 -72
  252. flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
  253. flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  254. flyte/_protos/workflow/task_service_pb2.py +0 -44
  255. flyte/_protos/workflow/task_service_pb2.pyi +0 -31
  256. flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
  257. flyte/io/_dataframe.py +0 -0
  258. flyte/io/pickle/__init__.py +0 -0
  259. flyte/remote/_console.py +0 -18
  260. flyte-0.2.0b1.dist-info/METADATA +0 -179
  261. flyte-0.2.0b1.dist-info/RECORD +0 -204
  262. flyte-0.2.0b1.dist-info/entry_points.txt +0 -3
  263. /flyte/{_cli → _debug}/__init__.py +0 -0
  264. /flyte/{_protos → _keyring}/__init__.py +0 -0
  265. {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
  266. {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
@@ -2,27 +2,23 @@ import os
2
2
  import pathlib
3
3
  from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union
4
4
 
5
- from flyteidl.core import tasks_pb2
5
+ from flyteidl2.core import tasks_pb2
6
6
 
7
7
  from flyte import Image, storage
8
- from flyte._datastructures import NativeInterface, SerializationContext
9
8
  from flyte._logging import logger
10
9
  from flyte._task import TaskTemplate
10
+ from flyte.io import Dir, File
11
+ from flyte.models import NativeInterface, SerializationContext
11
12
 
12
- _PRIMARY_CONTAINER_NAME_FIELD = "primary_container_name"
13
13
 
14
-
15
- def _extract_command_key(cmd: str, **kwargs) -> Any:
14
+ def _extract_command_key(cmd: str, **kwargs) -> List[Any] | None:
16
15
  """
17
16
  Extract the key from the command using regex.
18
17
  """
19
18
  import re
20
19
 
21
- input_regex = r"^\{\{\s*\.inputs\.(.*?)\s*\}\}$"
22
- match = re.match(input_regex, cmd)
23
- if match:
24
- return match.group(1)
25
- return None
20
+ input_regex = r"\{\{\.inputs\.([a-zA-Z0-9_]+)\}\}"
21
+ return re.findall(input_regex, cmd)
26
22
 
27
23
 
28
24
  def _extract_path_command_key(cmd: str, input_data_dir: Optional[str]) -> Optional[str]:
@@ -32,8 +28,9 @@ def _extract_path_command_key(cmd: str, input_data_dir: Optional[str]) -> Option
32
28
  import re
33
29
 
34
30
  input_data_dir = input_data_dir or ""
35
- input_regex = rf"{re.escape(input_data_dir)}/(.+)$"
36
- match = re.match(input_regex, cmd)
31
+ input_regex = rf"{re.escape(input_data_dir)}/([\w\-.]+)" # captures file or dir names
32
+
33
+ match = re.search(input_regex, cmd)
37
34
  if match:
38
35
  return match.group(1)
39
36
  return None
@@ -70,7 +67,7 @@ class ContainerTask(TaskTemplate):
70
67
  input_data_dir: str | pathlib.Path = "/var/inputs",
71
68
  output_data_dir: str | pathlib.Path = "/var/outputs",
72
69
  metadata_format: MetadataFormat = "JSON",
73
- local_logs: bool = False,
70
+ local_logs: bool = True,
74
71
  **kwargs,
75
72
  ):
76
73
  super().__init__(
@@ -83,9 +80,14 @@ class ContainerTask(TaskTemplate):
83
80
  self._image = image
84
81
  if isinstance(image, str):
85
82
  if image == "auto":
86
- self._image = Image.auto()
83
+ self._image = Image.from_debian_base()
87
84
  else:
88
- self._image = Image.from_prebuilt(image)
85
+ self._image = Image.from_base(image)
86
+
87
+ if command and any(not isinstance(c, str) for c in command):
88
+ raise ValueError("All elements in the command list must be strings.")
89
+ if arguments and any(not isinstance(a, str) for a in arguments):
90
+ raise ValueError("All elements in the arguments list must be strings.")
89
91
  self._cmd = command
90
92
  self._args = arguments
91
93
  self._input_data_dir = input_data_dir
@@ -106,32 +108,34 @@ class ContainerTask(TaskTemplate):
106
108
  For FlyteFile and FlyteDirectory commands, e.g., "/var/inputs/inputs", we extract the key from strings that
107
109
  begin with the specified `input_data_dir`.
108
110
  """
109
- # from flytekit.types.directory import FlyteDirectory
110
- # from flytekit.types.file import FlyteFile
111
+ from flyte.io import Dir, File
111
112
 
112
113
  volume_binding: Dict[str, Dict[str, str]] = {}
113
114
  path_k = _extract_path_command_key(cmd, str(self._input_data_dir))
114
- k = path_k if path_k else _extract_command_key(cmd)
115
-
116
- if k:
117
- input_val = kwargs.get(k)
118
- # TODO: Add support file and directory transformer first
119
- # if type(input_val) in [FlyteFile, FlyteDirectory]:
120
- # if not path_k:
121
- # raise AssertionError(
122
- # "FlyteFile and FlyteDirectory commands should not use the template syntax like this:
123
- # {{.inputs.infile}}\n"
124
- # "Please use a path-like syntax, such as: /var/inputs/infile.\n"
125
- # "This requirement is due to how Flyte Propeller processes template syntax inputs."
126
- # )
127
- # local_flyte_file_or_dir_path = str(input_val)
128
- # remote_flyte_file_or_dir_path = os.path.join(self._input_data_dir, k) # type: ignore
129
- # volume_binding[local_flyte_file_or_dir_path] = {
130
- # "bind": remote_flyte_file_or_dir_path,
131
- # "mode": "rw",
132
- # }
133
- # command = remote_flyte_file_or_dir_path
134
- command = str(input_val)
115
+ keys = [path_k] if path_k else _extract_command_key(cmd)
116
+
117
+ command = cmd
118
+
119
+ if keys:
120
+ for k in keys:
121
+ input_val = kwargs.get(k)
122
+ # TODO: Add support file and directory transformer first
123
+ if input_val and type(input_val) in [File, Dir]:
124
+ if not path_k:
125
+ raise AssertionError(
126
+ "File and Directory commands should not use the template syntax "
127
+ "like this: {{.inputs.infile}}\n"
128
+ "Please use a path-like syntax, such as: /var/inputs/infile.\n"
129
+ "This requirement is due to how Flyte Propeller processes template syntax inputs."
130
+ )
131
+ local_flyte_file_or_dir_path = input_val.path
132
+ remote_flyte_file_or_dir_path = os.path.join(self._input_data_dir, k) # type: ignore
133
+ volume_binding[local_flyte_file_or_dir_path] = {
134
+ "bind": remote_flyte_file_or_dir_path,
135
+ "mode": "rw",
136
+ }
137
+ else:
138
+ command = command.replace(f"{{{{.inputs.{k}}}}}", str(input_val))
135
139
  else:
136
140
  command = cmd
137
141
 
@@ -193,7 +197,9 @@ class ContainerTask(TaskTemplate):
193
197
  microseconds=microseconds,
194
198
  )
195
199
 
196
- def _convert_output_val_to_correct_type(self, output_val: Any, output_type: Type) -> Any:
200
+ async def _convert_output_val_to_correct_type(
201
+ self, output_path: pathlib.Path, output_val: Any, output_type: Type
202
+ ) -> Any:
197
203
  import datetime
198
204
 
199
205
  if issubclass(output_type, bool):
@@ -202,20 +208,31 @@ class ContainerTask(TaskTemplate):
202
208
  return datetime.datetime.fromisoformat(output_val)
203
209
  elif issubclass(output_type, datetime.timedelta):
204
210
  return self._string_to_timedelta(output_val)
211
+ elif issubclass(output_type, File):
212
+ return await File.from_local(output_path)
213
+ elif issubclass(output_type, Dir):
214
+ return await Dir.from_local(output_path)
205
215
  else:
206
216
  return output_type(output_val)
207
217
 
208
- def _get_output_dict(self, output_directory: pathlib.Path) -> Dict[str, Any]:
209
- output_dict = {}
218
+ async def _get_output(self, output_directory: pathlib.Path) -> Tuple[Any]:
219
+ output_items = []
210
220
  if self._outputs:
211
221
  for k, output_type in self._outputs.items():
212
222
  output_path = output_directory / k
213
- with output_path.open("r") as f:
214
- output_val = f.read()
215
- output_dict[k] = self._convert_output_val_to_correct_type(output_val, output_type)
216
- return output_dict
217
-
218
- def execute(self, **kwargs) -> Any:
223
+ if os.path.isfile(output_path):
224
+ with output_path.open("r") as f:
225
+ output_val = f.read()
226
+ else:
227
+ output_val = None
228
+ parsed = await self._convert_output_val_to_correct_type(output_path, output_val, output_type)
229
+ output_items.append(parsed)
230
+ # return a tuple so that each element is treated as a separate output.
231
+ # this allows flyte to map the user-defined output types (dict) to individual values.
232
+ # if we returned a list instead, it would be treated as a single output.
233
+ return tuple(output_items)
234
+
235
+ async def execute(self, **kwargs) -> Any:
219
236
  try:
220
237
  import docker
221
238
  except ImportError:
@@ -235,6 +252,7 @@ class ContainerTask(TaskTemplate):
235
252
  raise AssertionError(f"Only Image objects are supported, not strings. Got {self._image} instead.")
236
253
  uri = self._image.uri
237
254
  self._pull_image_if_not_exists(client, uri)
255
+ print(f"Command: {commands!r}")
238
256
 
239
257
  container = client.containers.run(uri, command=commands, remove=True, volumes=volume_bindings, detach=True)
240
258
 
@@ -247,8 +265,8 @@ class ContainerTask(TaskTemplate):
247
265
 
248
266
  container.wait()
249
267
 
250
- output_dict = self._get_output_dict(output_directory)
251
- return output_dict
268
+ output = await self._get_output(output_directory)
269
+ return output
252
270
 
253
271
  def data_loading_config(self, sctx: SerializationContext) -> tasks_pb2.DataLoadingConfig:
254
272
  literal_to_protobuf = {
@@ -258,16 +276,11 @@ class ContainerTask(TaskTemplate):
258
276
  }
259
277
 
260
278
  return tasks_pb2.DataLoadingConfig(
261
- input_path=self._input_data_dir,
262
- output_path=self._output_data_dir,
279
+ input_path=str(self._input_data_dir) if self._input_data_dir else None,
280
+ output_path=str(self._output_data_dir) if self._output_data_dir else None,
263
281
  enabled=True,
264
282
  format=literal_to_protobuf.get(self._metadata_format, "JSON"),
265
283
  )
266
284
 
267
285
  def container_args(self, sctx: SerializationContext) -> List[str]:
268
286
  return self._cmd + (self._args if self._args else [])
269
-
270
- def config(self, sctx: SerializationContext) -> Dict[str, str]:
271
- if self.pod_template is None:
272
- return {}
273
- return {_PRIMARY_CONTAINER_NAME_FIELD: self.primary_container_name}
flyte/git/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from ._config import GitStatus, config_from_root
2
+
3
+ __all__ = ["GitStatus", "config_from_root"]
flyte/git/_config.py ADDED
@@ -0,0 +1,279 @@
1
+ import pathlib
2
+ import subprocess
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Dict, Protocol
6
+
7
+ import flyte.config
8
+ from flyte._logging import logger
9
+
10
+
11
+ class GitUrlBuilder(Protocol):
12
+ @staticmethod
13
+ def build_url(remote_url: str, file_path: str, commit_sha: str, line_number: int, is_tree_clean: bool) -> str: ...
14
+
15
+
16
+ class GithubUrlBuilder(GitUrlBuilder):
17
+ host_name = "github.com"
18
+
19
+ @staticmethod
20
+ def build_url(remote_url: str, file_path: str, commit_sha: str, line_number: int, is_tree_clean: bool) -> str:
21
+ url = f"{remote_url}/blob/{commit_sha}/{file_path}"
22
+ if is_tree_clean:
23
+ url += f"#L{line_number}"
24
+ return url
25
+
26
+
27
+ class GitlabUrlBuilder(GitUrlBuilder):
28
+ host_name = "gitlab.com"
29
+
30
+ @staticmethod
31
+ def build_url(remote_url: str, file_path: str, commit_sha: str, line_number: int, is_tree_clean: bool) -> str:
32
+ url = f"{remote_url}/-/blob/{commit_sha}/{file_path}"
33
+ if is_tree_clean:
34
+ url += f"#L{line_number}"
35
+ return url
36
+
37
+
38
+ GIT_URL_BUILDER_REGISTRY: Dict[str, GitUrlBuilder] = {
39
+ GithubUrlBuilder.host_name: GithubUrlBuilder,
40
+ GitlabUrlBuilder.host_name: GitlabUrlBuilder,
41
+ }
42
+
43
+
44
+ @dataclass(init=True, frozen=True)
45
+ class GitStatus:
46
+ """
47
+ A class representing the status of a git repository.
48
+
49
+ :param is_valid: Whether git repository is valid
50
+ :param is_tree_clean: Whether working tree is clean
51
+ :param remote_url: Remote URL in HTTPS format
52
+ :param repo_dir: Repository root directory
53
+ :param commit_sha: Current commit SHA
54
+ """
55
+
56
+ is_valid: bool = False
57
+ is_tree_clean: bool = False
58
+ remote_url: str = ""
59
+ repo_dir: Path = Path()
60
+ commit_sha: str = ""
61
+
62
+ @classmethod
63
+ def from_current_repo(cls) -> "GitStatus":
64
+ """Discover git information from the current repository.
65
+
66
+ If Git is not installed or .git does not exist, returns GitStatus with is_valid=False.
67
+
68
+ :return: GitStatus instance with discovered git information
69
+ """
70
+ try:
71
+ # Check if we're in a git repository and get the root directory
72
+ result = subprocess.run(
73
+ ["git", "rev-parse", "--show-toplevel"],
74
+ check=False,
75
+ capture_output=True,
76
+ text=True,
77
+ )
78
+
79
+ if result.returncode != 0:
80
+ logger.warning("Not in a git repository or git is not installed")
81
+ return cls()
82
+
83
+ repo_dir = Path(result.stdout.strip())
84
+
85
+ # Get current commit SHA
86
+ result = subprocess.run(
87
+ ["git", "rev-parse", "HEAD"],
88
+ check=False,
89
+ capture_output=True,
90
+ text=True,
91
+ )
92
+ if result.returncode == 0:
93
+ commit_sha = result.stdout.strip()
94
+ else:
95
+ logger.warning("Failed to get current commit SHA")
96
+ return cls(repo_dir=repo_dir)
97
+
98
+ # Check if working tree is clean
99
+ result = subprocess.run(
100
+ ["git", "status", "--porcelain"],
101
+ check=False,
102
+ capture_output=True,
103
+ text=True,
104
+ )
105
+ if result.returncode == 0:
106
+ is_tree_clean = len(result.stdout.strip()) == 0
107
+ else:
108
+ logger.warning("Failed to check if working tree is clean")
109
+ return cls(repo_dir=repo_dir, commit_sha=commit_sha)
110
+
111
+ # Get remote URL
112
+ instance = cls(repo_dir=repo_dir, commit_sha=commit_sha, is_tree_clean=is_tree_clean)
113
+ remote_url = instance._get_remote_url()
114
+ if not remote_url:
115
+ logger.warning("Failed to get remote URL")
116
+ return cls(repo_dir=repo_dir, commit_sha=commit_sha, is_tree_clean=is_tree_clean)
117
+
118
+ return cls(
119
+ is_valid=True,
120
+ is_tree_clean=is_tree_clean,
121
+ remote_url=remote_url,
122
+ repo_dir=repo_dir,
123
+ commit_sha=commit_sha,
124
+ )
125
+
126
+ except Exception as e:
127
+ logger.debug(f"Failed to discover git repository: {e}")
128
+ return cls()
129
+
130
+ def _get_remote_url(self) -> str:
131
+ """Get the remote push URL.
132
+
133
+ Returns the 'origin' remote push URL if it exists, otherwise returns
134
+ the first remote alphabetically. Converts SSH/Git protocol URLs to HTTPS format.
135
+
136
+ :return: The remote push URL in HTTPS format, or empty string if not found
137
+ """
138
+ try:
139
+ # Try to get origin push remote first
140
+ result = subprocess.run(
141
+ ["git", "remote", "get-url", "--push", "origin"],
142
+ check=False,
143
+ capture_output=True,
144
+ text=True,
145
+ )
146
+
147
+ if result.returncode == 0:
148
+ url = result.stdout.strip()
149
+ return self._normalize_url_to_https(url)
150
+
151
+ # If origin doesn't exist, get all remotes
152
+ result = subprocess.run(
153
+ ["git", "remote"],
154
+ check=False,
155
+ capture_output=True,
156
+ text=True,
157
+ )
158
+
159
+ if result.returncode == 0:
160
+ remotes = result.stdout.strip().split("\n")
161
+ if remotes:
162
+ # Sort alphabetically and get the first one
163
+ remotes.sort()
164
+ first_remote = remotes[0]
165
+
166
+ # Get push URL for this remote
167
+ result = subprocess.run(
168
+ ["git", "remote", "get-url", "--push", first_remote],
169
+ check=False,
170
+ capture_output=True,
171
+ text=True,
172
+ )
173
+ if result.returncode == 0:
174
+ url = result.stdout.strip()
175
+ return self._normalize_url_to_https(url)
176
+
177
+ return ""
178
+
179
+ except Exception:
180
+ return ""
181
+
182
+ def _normalize_url_to_https(self, url: str) -> str:
183
+ """Convert SSH or Git protocol URLs to HTTPS format.
184
+
185
+ Examples:
186
+ git@github.com:user/repo.git -> https://github.com/user/repo
187
+ https://github.com/user/repo.git -> https://github.com/user/repo
188
+
189
+ :param url: The Git URL to normalize
190
+ :return: The normalized HTTPS URL
191
+ """
192
+ # Remove .git suffix first
193
+ url = url.removesuffix(".git")
194
+
195
+ # Handle SSH format: git@host:path or user@host:path
196
+ if url.startswith("git@"):
197
+ parts = url.split("@", 1)
198
+ if len(parts) == 2:
199
+ host_and_path = parts[1].replace(":", "/", 1)
200
+ return f"https://{host_and_path}"
201
+
202
+ return url
203
+
204
+ def _get_remote_host(self, url: str) -> str:
205
+ """Get the remote host name from a normalized HTTPS URL.
206
+
207
+ :param url: URL that has been normalized to HTTPS format by _normalize_url_to_https
208
+ :return: The host name (e.g., "github.com", "gitlab.com")
209
+ """
210
+ parts = url.split("//", 1)
211
+ if len(parts) < 2:
212
+ return ""
213
+
214
+ # Get everything after "//" and split by "/"
215
+ host_and_path = parts[1]
216
+ parts = host_and_path.split("/", 1)
217
+ if len(parts) < 2:
218
+ return ""
219
+ host = host_and_path.split("/")[0]
220
+
221
+ return host
222
+
223
+ def _get_file_path(self, path: Path | str) -> str:
224
+ """Get the path relative to the repository root directory.
225
+
226
+ :param path: Absolute or relative path to a file
227
+ :return: Path relative to repo_dir as string, or empty string if failed
228
+ """
229
+ try:
230
+ path_obj = Path(path).resolve()
231
+ relative_path = path_obj.relative_to(self.repo_dir)
232
+ return str(relative_path)
233
+ except Exception as e:
234
+ logger.warning(f"Failed to get relative path for {path}: {e}")
235
+ return ""
236
+
237
+ def build_url(self, path: Path | str, line_number: int) -> str:
238
+ """Build a git URL for the given path.
239
+
240
+ :param path: Path to a file
241
+ :param line_number: Line number of the code file
242
+ :return: Path relative to repo_dir
243
+ """
244
+ if not self.is_valid:
245
+ logger.warning("GitConfig is not valid, cannot build URL")
246
+ return ""
247
+ host_name = self._get_remote_host(self.remote_url)
248
+ git_file_path = self._get_file_path(path)
249
+ if not host_name:
250
+ logger.warning(f"Failed to extract host name from remote URL: {self.remote_url}")
251
+ return ""
252
+ if not git_file_path:
253
+ return ""
254
+ builder = GIT_URL_BUILDER_REGISTRY.get(host_name)
255
+ if not builder:
256
+ logger.warning(f"URL builder for {host_name} is not implemented")
257
+ return ""
258
+ url = builder.build_url(self.remote_url, git_file_path, self.commit_sha, line_number, self.is_tree_clean)
259
+ return url
260
+
261
+
262
+ def config_from_root(path: pathlib.Path | str = ".flyte/config.yaml") -> flyte.config.Config | None:
263
+ """Get the config file from the git root directory.
264
+
265
+ By default, the config file is expected to be in `.flyte/config.yaml` in the git root directory.
266
+
267
+ :param path: Path to the config file relative to git root directory (default: ".flyte/config.yaml")
268
+ :return: Config object if found, None otherwise
269
+ """
270
+ try:
271
+ result = subprocess.run(["git", "rev-parse", "--show-toplevel"], check=False, capture_output=True, text=True)
272
+ if result.returncode != 0:
273
+ return None
274
+ root = pathlib.Path(result.stdout.strip())
275
+ if not (root / path).exists():
276
+ return None
277
+ return flyte.config.auto(root / path)
278
+ except Exception:
279
+ return None
flyte/io/__init__.py CHANGED
@@ -3,9 +3,16 @@
3
3
 
4
4
  This package contains additional data types beyond the primitive data types in python to abstract data flow
5
5
  of large datasets in Union.
6
+
6
7
  """
7
8
 
8
- __all__ = ["Dir", "File"]
9
+ __all__ = [
10
+ "PARQUET",
11
+ "DataFrame",
12
+ "Dir",
13
+ "File",
14
+ ]
9
15
 
16
+ from ._dataframe import PARQUET, DataFrame
10
17
  from ._dir import Dir
11
18
  from ._file import File
@@ -1,15 +1,15 @@
1
1
  """
2
- Flytekit StructuredDataset
2
+ Flytekit DataFrame
3
3
  ==========================================================
4
- .. currentmodule:: flytekit.types.structured
4
+ .. currentmodule:: flyte.io._dataframe
5
5
 
6
6
  .. autosummary::
7
7
  :template: custom.rst
8
8
  :toctree: generated/
9
9
 
10
- StructuredDataset
11
- StructuredDatasetDecoder
12
- StructuredDatasetEncoder
10
+ DataFrame
11
+ DataFrameDecoder
12
+ DataFrameEncoder
13
13
  """
14
14
 
15
15
  import functools
@@ -17,12 +17,13 @@ import functools
17
17
  from flyte._logging import logger
18
18
  from flyte._utils.lazy_module import is_imported
19
19
 
20
- from .structured_dataset import (
20
+ from .dataframe import (
21
+ PARQUET,
22
+ DataFrame,
23
+ DataFrameDecoder,
24
+ DataFrameEncoder,
25
+ DataFrameTransformerEngine,
21
26
  DuplicateHandlerError,
22
- StructuredDataset,
23
- StructuredDatasetDecoder,
24
- StructuredDatasetEncoder,
25
- StructuredDatasetTransformerEngine,
26
27
  )
27
28
 
28
29
 
@@ -30,8 +31,8 @@ from .structured_dataset import (
30
31
  def register_csv_handlers():
31
32
  from .basic_dfs import CSVToPandasDecodingHandler, PandasToCSVEncodingHandler
32
33
 
33
- StructuredDatasetTransformerEngine.register(PandasToCSVEncodingHandler(), default_format_for_type=True)
34
- StructuredDatasetTransformerEngine.register(CSVToPandasDecodingHandler(), default_format_for_type=True)
34
+ DataFrameTransformerEngine.register(PandasToCSVEncodingHandler(), default_format_for_type=True)
35
+ DataFrameTransformerEngine.register(CSVToPandasDecodingHandler(), default_format_for_type=True)
35
36
 
36
37
 
37
38
  @functools.lru_cache(maxsize=None)
@@ -42,9 +43,9 @@ def register_pandas_handlers():
42
43
 
43
44
  from .basic_dfs import PandasToParquetEncodingHandler, ParquetToPandasDecodingHandler
44
45
 
45
- StructuredDatasetTransformerEngine.register(PandasToParquetEncodingHandler(), default_format_for_type=True)
46
- StructuredDatasetTransformerEngine.register(ParquetToPandasDecodingHandler(), default_format_for_type=True)
47
- StructuredDatasetTransformerEngine.register_renderer(pd.DataFrame, TopFrameRenderer())
46
+ DataFrameTransformerEngine.register(PandasToParquetEncodingHandler(), default_format_for_type=True)
47
+ DataFrameTransformerEngine.register(ParquetToPandasDecodingHandler(), default_format_for_type=True)
48
+ DataFrameTransformerEngine.register_renderer(pd.DataFrame, TopFrameRenderer())
48
49
 
49
50
 
50
51
  @functools.lru_cache(maxsize=None)
@@ -55,9 +56,9 @@ def register_arrow_handlers():
55
56
 
56
57
  from .basic_dfs import ArrowToParquetEncodingHandler, ParquetToArrowDecodingHandler
57
58
 
58
- StructuredDatasetTransformerEngine.register(ArrowToParquetEncodingHandler(), default_format_for_type=True)
59
- StructuredDatasetTransformerEngine.register(ParquetToArrowDecodingHandler(), default_format_for_type=True)
60
- StructuredDatasetTransformerEngine.register_renderer(pa.Table, ArrowRenderer())
59
+ DataFrameTransformerEngine.register(ArrowToParquetEncodingHandler(), default_format_for_type=True)
60
+ DataFrameTransformerEngine.register(ParquetToArrowDecodingHandler(), default_format_for_type=True)
61
+ DataFrameTransformerEngine.register_renderer(pa.Table, ArrowRenderer())
61
62
 
62
63
 
63
64
  @functools.lru_cache(maxsize=None)
@@ -70,10 +71,10 @@ def register_bigquery_handlers():
70
71
  PandasToBQEncodingHandlers,
71
72
  )
72
73
 
73
- StructuredDatasetTransformerEngine.register(PandasToBQEncodingHandlers())
74
- StructuredDatasetTransformerEngine.register(BQToPandasDecodingHandler())
75
- StructuredDatasetTransformerEngine.register(ArrowToBQEncodingHandlers())
76
- StructuredDatasetTransformerEngine.register(BQToArrowDecodingHandler())
74
+ DataFrameTransformerEngine.register(PandasToBQEncodingHandlers())
75
+ DataFrameTransformerEngine.register(BQToPandasDecodingHandler())
76
+ DataFrameTransformerEngine.register(ArrowToBQEncodingHandlers())
77
+ DataFrameTransformerEngine.register(BQToArrowDecodingHandler())
77
78
  except ImportError:
78
79
  logger.info(
79
80
  "We won't register bigquery handler for structured dataset because "
@@ -86,8 +87,8 @@ def register_snowflake_handlers():
86
87
  try:
87
88
  from .snowflake import PandasToSnowflakeEncodingHandlers, SnowflakeToPandasDecodingHandler
88
89
 
89
- StructuredDatasetTransformerEngine.register(SnowflakeToPandasDecodingHandler())
90
- StructuredDatasetTransformerEngine.register(PandasToSnowflakeEncodingHandlers())
90
+ DataFrameTransformerEngine.register(SnowflakeToPandasDecodingHandler())
91
+ DataFrameTransformerEngine.register(PandasToSnowflakeEncodingHandlers())
91
92
 
92
93
  except ImportError:
93
94
  logger.info(
@@ -96,7 +97,7 @@ def register_snowflake_handlers():
96
97
  )
97
98
 
98
99
 
99
- def lazy_import_structured_dataset_handler():
100
+ def lazy_import_dataframe_handler():
100
101
  if is_imported("pandas"):
101
102
  try:
102
103
  register_pandas_handlers()
@@ -121,9 +122,10 @@ def lazy_import_structured_dataset_handler():
121
122
 
122
123
 
123
124
  __all__ = [
124
- "StructuredDataset",
125
- "StructuredDatasetDecoder",
126
- "StructuredDatasetEncoder",
127
- "StructuredDatasetTransformerEngine",
128
- "lazy_import_structured_dataset_handler",
125
+ "PARQUET",
126
+ "DataFrame",
127
+ "DataFrameDecoder",
128
+ "DataFrameEncoder",
129
+ "DataFrameTransformerEngine",
130
+ "lazy_import_dataframe_handler",
129
131
  ]