flyte 0.2.0b9__py3-none-any.whl → 0.2.0b11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +4 -2
- flyte/_bin/runtime.py +6 -3
- flyte/_deploy.py +3 -0
- flyte/_initialize.py +30 -6
- flyte/_internal/controllers/_local_controller.py +4 -3
- flyte/_internal/controllers/_trace.py +1 -0
- flyte/_internal/controllers/remote/_action.py +1 -1
- flyte/_internal/controllers/remote/_informer.py +1 -1
- flyte/_internal/runtime/convert.py +7 -4
- flyte/_internal/runtime/task_serde.py +80 -10
- flyte/_internal/runtime/taskrunner.py +1 -1
- flyte/_logging.py +1 -1
- flyte/_pod.py +19 -0
- flyte/_run.py +84 -39
- flyte/_task.py +2 -13
- flyte/_utils/org_discovery.py +31 -0
- flyte/_version.py +2 -2
- flyte/cli/_common.py +6 -6
- flyte/cli/_create.py +16 -8
- flyte/cli/_params.py +2 -2
- flyte/cli/_run.py +1 -1
- flyte/cli/main.py +4 -8
- flyte/errors.py +11 -0
- flyte/extras/_container.py +29 -39
- flyte/io/__init__.py +17 -1
- flyte/io/_file.py +2 -0
- flyte/io/{structured_dataset → _structured_dataset}/basic_dfs.py +1 -1
- flyte/io/{structured_dataset → _structured_dataset}/structured_dataset.py +1 -1
- flyte/models.py +1 -0
- flyte/remote/_data.py +2 -1
- flyte/types/__init__.py +23 -0
- flyte/{io/pickle/transformer.py → types/_pickle.py} +2 -1
- flyte/types/_type_engine.py +7 -5
- {flyte-0.2.0b9.dist-info → flyte-0.2.0b11.dist-info}/METADATA +5 -6
- {flyte-0.2.0b9.dist-info → flyte-0.2.0b11.dist-info}/RECORD +39 -39
- flyte/io/_dataframe.py +0 -0
- flyte/io/pickle/__init__.py +0 -0
- /flyte/io/{structured_dataset → _structured_dataset}/__init__.py +0 -0
- {flyte-0.2.0b9.dist-info → flyte-0.2.0b11.dist-info}/WHEEL +0 -0
- {flyte-0.2.0b9.dist-info → flyte-0.2.0b11.dist-info}/entry_points.txt +0 -0
- {flyte-0.2.0b9.dist-info → flyte-0.2.0b11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
def hostname_from_url(url: str) -> str:
|
|
2
|
+
"""Parse a URL and return the hostname part."""
|
|
3
|
+
|
|
4
|
+
# Handle dns:/// format specifically (gRPC convention)
|
|
5
|
+
if url.startswith("dns:///"):
|
|
6
|
+
return url[7:] # Skip the "dns:///" prefix
|
|
7
|
+
|
|
8
|
+
# Handle standard URL formats
|
|
9
|
+
import urllib.parse
|
|
10
|
+
|
|
11
|
+
parsed = urllib.parse.urlparse(url)
|
|
12
|
+
return parsed.netloc or parsed.path.lstrip("/").rsplit("/")[0]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def org_from_endpoint(endpoint: str | None) -> str | None:
|
|
16
|
+
"""
|
|
17
|
+
Extracts the organization from the endpoint URL. The organization is assumed to be the first part of the domain.
|
|
18
|
+
This is temporary until we have a proper organization discovery mechanism through APIs.
|
|
19
|
+
|
|
20
|
+
:param endpoint: The endpoint URL
|
|
21
|
+
:return: The organization name or None if not found
|
|
22
|
+
"""
|
|
23
|
+
if not endpoint:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
hostname = hostname_from_url(endpoint)
|
|
27
|
+
domain_parts = hostname.split(".")
|
|
28
|
+
if len(domain_parts) > 2:
|
|
29
|
+
# Assuming the organization is the first part of the domain
|
|
30
|
+
return domain_parts[0]
|
|
31
|
+
return None
|
flyte/_version.py
CHANGED
|
@@ -17,5 +17,5 @@ __version__: str
|
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
|
18
18
|
version_tuple: VERSION_TUPLE
|
|
19
19
|
|
|
20
|
-
__version__ = version = '0.2.
|
|
21
|
-
__version_tuple__ = version_tuple = (0, 2, 0, '
|
|
20
|
+
__version__ = version = '0.2.0b11'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 2, 0, 'b11')
|
flyte/cli/_common.py
CHANGED
|
@@ -32,7 +32,7 @@ PROJECT_OPTION = click.Option(
|
|
|
32
32
|
required=False,
|
|
33
33
|
type=str,
|
|
34
34
|
default=None,
|
|
35
|
-
help="Project to
|
|
35
|
+
help="Project to which this command applies.",
|
|
36
36
|
show_default=True,
|
|
37
37
|
)
|
|
38
38
|
|
|
@@ -41,7 +41,7 @@ DOMAIN_OPTION = click.Option(
|
|
|
41
41
|
required=False,
|
|
42
42
|
type=str,
|
|
43
43
|
default=None,
|
|
44
|
-
help="Domain to
|
|
44
|
+
help="Domain to which this command applies.",
|
|
45
45
|
show_default=True,
|
|
46
46
|
)
|
|
47
47
|
|
|
@@ -51,7 +51,7 @@ DRY_RUN_OPTION = click.Option(
|
|
|
51
51
|
type=bool,
|
|
52
52
|
is_flag=True,
|
|
53
53
|
default=False,
|
|
54
|
-
help="Dry run
|
|
54
|
+
help="Dry run. Do not actually call the backend service.",
|
|
55
55
|
show_default=True,
|
|
56
56
|
)
|
|
57
57
|
|
|
@@ -78,7 +78,7 @@ class CLIConfig:
|
|
|
78
78
|
log_level: int | None = logging.ERROR
|
|
79
79
|
endpoint: str | None = None
|
|
80
80
|
insecure: bool = False
|
|
81
|
-
|
|
81
|
+
org: str | None = None
|
|
82
82
|
|
|
83
83
|
def replace(self, **kwargs) -> CLIConfig:
|
|
84
84
|
"""
|
|
@@ -90,7 +90,7 @@ class CLIConfig:
|
|
|
90
90
|
from flyte.config._config import TaskConfig
|
|
91
91
|
|
|
92
92
|
task_cfg = TaskConfig(
|
|
93
|
-
org=self.
|
|
93
|
+
org=self.org or self.config.task.org,
|
|
94
94
|
project=project or self.config.task.project,
|
|
95
95
|
domain=domain or self.config.task.domain,
|
|
96
96
|
)
|
|
@@ -105,7 +105,7 @@ class CLIConfig:
|
|
|
105
105
|
updated_config = self.config.with_params(platform_cfg, task_cfg)
|
|
106
106
|
|
|
107
107
|
logger.debug(f"Initializing CLI with config: {updated_config}")
|
|
108
|
-
flyte.
|
|
108
|
+
flyte.init_from_config(updated_config)
|
|
109
109
|
|
|
110
110
|
|
|
111
111
|
class InvokeBaseMixin:
|
flyte/cli/_create.py
CHANGED
|
@@ -70,7 +70,7 @@ def secret(
|
|
|
70
70
|
"--org",
|
|
71
71
|
type=str,
|
|
72
72
|
required=False,
|
|
73
|
-
help="Organization to use
|
|
73
|
+
help="Organization to use. This will override the organization in the configuration file.",
|
|
74
74
|
)
|
|
75
75
|
@click.option(
|
|
76
76
|
"-o",
|
|
@@ -86,11 +86,9 @@ def secret(
|
|
|
86
86
|
default=False,
|
|
87
87
|
help="Force overwrite of the configuration file if it already exists.",
|
|
88
88
|
show_default=True,
|
|
89
|
-
prompt="Are you sure you want to overwrite the configuration file?",
|
|
90
|
-
confirmation_prompt=True,
|
|
91
89
|
)
|
|
92
90
|
def config(
|
|
93
|
-
output:
|
|
91
|
+
output: str,
|
|
94
92
|
endpoint: str | None = None,
|
|
95
93
|
insecure: bool = False,
|
|
96
94
|
org: str | None = None,
|
|
@@ -105,8 +103,13 @@ def config(
|
|
|
105
103
|
"""
|
|
106
104
|
import yaml
|
|
107
105
|
|
|
108
|
-
|
|
109
|
-
|
|
106
|
+
output_path = Path(output)
|
|
107
|
+
|
|
108
|
+
if output_path.exists() and not force:
|
|
109
|
+
force = click.confirm(f"Overwrite [{output_path}]?", default=False)
|
|
110
|
+
if not force:
|
|
111
|
+
click.echo(f"Will not overwrite the existing config file at {output_path}")
|
|
112
|
+
return
|
|
110
113
|
|
|
111
114
|
admin: Dict[str, Any] = {}
|
|
112
115
|
if endpoint:
|
|
@@ -114,6 +117,11 @@ def config(
|
|
|
114
117
|
if insecure:
|
|
115
118
|
admin["insecure"] = insecure
|
|
116
119
|
|
|
120
|
+
if not org and endpoint:
|
|
121
|
+
from flyte._utils.org_discovery import org_from_endpoint
|
|
122
|
+
|
|
123
|
+
org = org_from_endpoint(endpoint)
|
|
124
|
+
|
|
117
125
|
task: Dict[str, str] = {}
|
|
118
126
|
if org:
|
|
119
127
|
task["org"] = org
|
|
@@ -125,7 +133,7 @@ def config(
|
|
|
125
133
|
if not admin and not task:
|
|
126
134
|
raise click.BadParameter("At least one of --endpoint or --org must be provided.")
|
|
127
135
|
|
|
128
|
-
with open(
|
|
136
|
+
with open(output_path, "w") as f:
|
|
129
137
|
d: Dict[str, Any] = {}
|
|
130
138
|
if admin:
|
|
131
139
|
d["admin"] = admin
|
|
@@ -133,4 +141,4 @@ def config(
|
|
|
133
141
|
d["task"] = task
|
|
134
142
|
yaml.dump(d, f)
|
|
135
143
|
|
|
136
|
-
click.echo(f"Config file
|
|
144
|
+
click.echo(f"Config file written to {output_path}")
|
flyte/cli/_params.py
CHANGED
|
@@ -23,7 +23,7 @@ from mashumaro.codecs.json import JSONEncoder
|
|
|
23
23
|
|
|
24
24
|
from flyte._logging import logger
|
|
25
25
|
from flyte.io import Dir, File
|
|
26
|
-
from flyte.
|
|
26
|
+
from flyte.types._pickle import FlytePickleTransformer
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class StructuredDataset:
|
|
@@ -110,7 +110,7 @@ class FileParamType(click.ParamType):
|
|
|
110
110
|
p = pathlib.Path(value)
|
|
111
111
|
if not p.exists() or not p.is_file():
|
|
112
112
|
raise click.BadParameter(f"parameter should be a valid file path, {value}")
|
|
113
|
-
return File(
|
|
113
|
+
return File.from_existing_remote(value)
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
class PickleParamType(click.ParamType):
|
flyte/cli/_run.py
CHANGED
|
@@ -97,7 +97,7 @@ class RunTaskCommand(click.Command):
|
|
|
97
97
|
if obj is None:
|
|
98
98
|
import flyte.config
|
|
99
99
|
|
|
100
|
-
obj = CLIConfig(flyte.config.auto())
|
|
100
|
+
obj = CLIConfig(flyte.config.auto(), ctx)
|
|
101
101
|
|
|
102
102
|
if not self.run_args.local:
|
|
103
103
|
assert obj.endpoint, "CLI Config should have an endpoint"
|
flyte/cli/main.py
CHANGED
|
@@ -90,7 +90,7 @@ def _verbosity_to_loglevel(verbosity: int) -> int | None:
|
|
|
90
90
|
"config_file",
|
|
91
91
|
required=False,
|
|
92
92
|
type=click.Path(exists=True),
|
|
93
|
-
help="Path to the configuration file to use. If not specified, the default configuration file is used."
|
|
93
|
+
help="Path to the configuration file to use. If not specified, the default configuration file is used.",
|
|
94
94
|
)
|
|
95
95
|
@click.rich_config(help_config=help_config)
|
|
96
96
|
@click.pass_context
|
|
@@ -146,15 +146,11 @@ def main(
|
|
|
146
146
|
cfg = config.auto(config_file=config_file)
|
|
147
147
|
logger.debug(f"Using config file discovered at location {cfg.source}")
|
|
148
148
|
|
|
149
|
-
final_insecure = cfg.platform.insecure
|
|
150
|
-
if insecure is not None:
|
|
151
|
-
final_insecure = insecure
|
|
152
|
-
|
|
153
149
|
ctx.obj = CLIConfig(
|
|
154
150
|
log_level=log_level,
|
|
155
|
-
endpoint=endpoint
|
|
156
|
-
insecure=
|
|
157
|
-
|
|
151
|
+
endpoint=endpoint,
|
|
152
|
+
insecure=insecure,
|
|
153
|
+
org=org,
|
|
158
154
|
config=cfg,
|
|
159
155
|
ctx=ctx,
|
|
160
156
|
)
|
flyte/errors.py
CHANGED
|
@@ -150,3 +150,14 @@ class LogsNotYetAvailableError(BaseRuntimeError):
|
|
|
150
150
|
|
|
151
151
|
def __init__(self, message: str):
|
|
152
152
|
super().__init__("LogsNotYetAvailable", "system", message, None)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class RuntimeDataValidationError(RuntimeUserError):
|
|
156
|
+
"""
|
|
157
|
+
This error is raised when the user tries to access a resource that does not exist or is invalid.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def __init__(self, var: str, e: Exception, task_name: str = ""):
|
|
161
|
+
super().__init__(
|
|
162
|
+
"DataValiationError", f"In task {task_name} variable {var}, failed to serialize/deserialize because {e}"
|
|
163
|
+
)
|
flyte/extras/_container.py
CHANGED
|
@@ -9,20 +9,15 @@ from flyte._logging import logger
|
|
|
9
9
|
from flyte._task import TaskTemplate
|
|
10
10
|
from flyte.models import NativeInterface, SerializationContext
|
|
11
11
|
|
|
12
|
-
_PRIMARY_CONTAINER_NAME_FIELD = "primary_container_name"
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
def _extract_command_key(cmd: str, **kwargs) -> Any:
|
|
13
|
+
def _extract_command_key(cmd: str, **kwargs) -> List[Any] | None:
|
|
16
14
|
"""
|
|
17
15
|
Extract the key from the command using regex.
|
|
18
16
|
"""
|
|
19
17
|
import re
|
|
20
18
|
|
|
21
|
-
input_regex = r"
|
|
22
|
-
|
|
23
|
-
if match:
|
|
24
|
-
return match.group(1)
|
|
25
|
-
return None
|
|
19
|
+
input_regex = r"\{\{\.inputs\.([a-zA-Z0-9_]+)\}\}"
|
|
20
|
+
return re.findall(input_regex, cmd)
|
|
26
21
|
|
|
27
22
|
|
|
28
23
|
def _extract_path_command_key(cmd: str, input_data_dir: Optional[str]) -> Optional[str]:
|
|
@@ -70,7 +65,7 @@ class ContainerTask(TaskTemplate):
|
|
|
70
65
|
input_data_dir: str | pathlib.Path = "/var/inputs",
|
|
71
66
|
output_data_dir: str | pathlib.Path = "/var/outputs",
|
|
72
67
|
metadata_format: MetadataFormat = "JSON",
|
|
73
|
-
local_logs: bool =
|
|
68
|
+
local_logs: bool = True,
|
|
74
69
|
**kwargs,
|
|
75
70
|
):
|
|
76
71
|
super().__init__(
|
|
@@ -106,34 +101,33 @@ class ContainerTask(TaskTemplate):
|
|
|
106
101
|
For FlyteFile and FlyteDirectory commands, e.g., "/var/inputs/inputs", we extract the key from strings that
|
|
107
102
|
begin with the specified `input_data_dir`.
|
|
108
103
|
"""
|
|
109
|
-
|
|
110
|
-
# from flytekit.types.file import FlyteFile
|
|
104
|
+
from flyte.io import Dir, File
|
|
111
105
|
|
|
112
106
|
volume_binding: Dict[str, Dict[str, str]] = {}
|
|
113
107
|
path_k = _extract_path_command_key(cmd, str(self._input_data_dir))
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
if
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
108
|
+
keys = path_k if path_k else _extract_command_key(cmd)
|
|
109
|
+
|
|
110
|
+
if keys:
|
|
111
|
+
for k in keys:
|
|
112
|
+
input_val = kwargs.get(k)
|
|
113
|
+
# TODO: Add support file and directory transformer first
|
|
114
|
+
if type(input_val) in [File, Dir]:
|
|
115
|
+
if not path_k:
|
|
116
|
+
raise AssertionError(
|
|
117
|
+
"File and Directory commands should not use the template syntax "
|
|
118
|
+
"like this: {{.inputs.infile}}\n"
|
|
119
|
+
"Please use a path-like syntax, such as: /var/inputs/infile.\n"
|
|
120
|
+
"This requirement is due to how Flyte Propeller processes template syntax inputs."
|
|
121
|
+
)
|
|
122
|
+
local_flyte_file_or_dir_path = str(input_val)
|
|
123
|
+
remote_flyte_file_or_dir_path = os.path.join(self._input_data_dir, k) # type: ignore
|
|
124
|
+
volume_binding[local_flyte_file_or_dir_path] = {
|
|
125
|
+
"bind": remote_flyte_file_or_dir_path,
|
|
126
|
+
"mode": "rw",
|
|
127
|
+
}
|
|
128
|
+
command = remote_flyte_file_or_dir_path
|
|
129
|
+
else:
|
|
130
|
+
command = cmd
|
|
137
131
|
|
|
138
132
|
return command, volume_binding
|
|
139
133
|
|
|
@@ -235,6 +229,7 @@ class ContainerTask(TaskTemplate):
|
|
|
235
229
|
raise AssertionError(f"Only Image objects are supported, not strings. Got {self._image} instead.")
|
|
236
230
|
uri = self._image.uri
|
|
237
231
|
self._pull_image_if_not_exists(client, uri)
|
|
232
|
+
print(f"Command: {commands!r}")
|
|
238
233
|
|
|
239
234
|
container = client.containers.run(uri, command=commands, remove=True, volumes=volume_bindings, detach=True)
|
|
240
235
|
|
|
@@ -266,8 +261,3 @@ class ContainerTask(TaskTemplate):
|
|
|
266
261
|
|
|
267
262
|
def container_args(self, sctx: SerializationContext) -> List[str]:
|
|
268
263
|
return self._cmd + (self._args if self._args else [])
|
|
269
|
-
|
|
270
|
-
def config(self, sctx: SerializationContext) -> Dict[str, str]:
|
|
271
|
-
if self.pod_template is None:
|
|
272
|
-
return {}
|
|
273
|
-
return {_PRIMARY_CONTAINER_NAME_FIELD: self.primary_container_name}
|
flyte/io/__init__.py
CHANGED
|
@@ -3,9 +3,25 @@
|
|
|
3
3
|
|
|
4
4
|
This package contains additional data types beyond the primitive data types in python to abstract data flow
|
|
5
5
|
of large datasets in Union.
|
|
6
|
+
|
|
6
7
|
"""
|
|
7
8
|
|
|
8
|
-
__all__ = [
|
|
9
|
+
__all__ = [
|
|
10
|
+
"Dir",
|
|
11
|
+
"File",
|
|
12
|
+
"StructuredDataset",
|
|
13
|
+
"StructuredDatasetDecoder",
|
|
14
|
+
"StructuredDatasetEncoder",
|
|
15
|
+
"StructuredDatasetTransformerEngine",
|
|
16
|
+
"lazy_import_structured_dataset_handler",
|
|
17
|
+
]
|
|
9
18
|
|
|
10
19
|
from ._dir import Dir
|
|
11
20
|
from ._file import File
|
|
21
|
+
from ._structured_dataset import (
|
|
22
|
+
StructuredDataset,
|
|
23
|
+
StructuredDatasetDecoder,
|
|
24
|
+
StructuredDatasetEncoder,
|
|
25
|
+
StructuredDatasetTransformerEngine,
|
|
26
|
+
lazy_import_structured_dataset_handler,
|
|
27
|
+
)
|
flyte/io/_file.py
CHANGED
|
@@ -232,6 +232,8 @@ class File(BaseModel, Generic[T], SerializableType):
|
|
|
232
232
|
# This code is broadly similar to what storage.get_stream does, but without actually reading from the stream
|
|
233
233
|
file_handle = None
|
|
234
234
|
try:
|
|
235
|
+
if "b" not in mode:
|
|
236
|
+
raise ValueError("Mode must include 'b' for binary access, when using remote files.")
|
|
235
237
|
if isinstance(fs, AsyncFileSystem):
|
|
236
238
|
file_handle = await fs.open_async(self.path, mode)
|
|
237
239
|
yield file_handle
|
|
@@ -9,7 +9,7 @@ from fsspec.core import split_protocol, strip_protocol
|
|
|
9
9
|
import flyte.storage as storage
|
|
10
10
|
from flyte._logging import logger
|
|
11
11
|
from flyte._utils import lazy_module
|
|
12
|
-
from flyte.io.
|
|
12
|
+
from flyte.io._structured_dataset.structured_dataset import (
|
|
13
13
|
CSV,
|
|
14
14
|
PARQUET,
|
|
15
15
|
StructuredDataset,
|
|
@@ -168,7 +168,7 @@ class StructuredDataset(SerializableType, DataClassJSONMixin):
|
|
|
168
168
|
return self._literal_sd
|
|
169
169
|
|
|
170
170
|
def open(self, dataframe_type: Type[DF]):
|
|
171
|
-
from flyte.io.
|
|
171
|
+
from flyte.io._structured_dataset import lazy_import_structured_dataset_handler
|
|
172
172
|
|
|
173
173
|
"""
|
|
174
174
|
Load the handler if needed. For the use case like:
|
flyte/models.py
CHANGED
|
@@ -95,6 +95,7 @@ class RawDataPath:
|
|
|
95
95
|
# Create a temporary directory for data storage
|
|
96
96
|
p = tempfile.mkdtemp()
|
|
97
97
|
logger.debug(f"Creating temporary directory for data storage: {p}")
|
|
98
|
+
pathlib.Path(p).mkdir(parents=True, exist_ok=True)
|
|
98
99
|
return RawDataPath(path=p)
|
|
99
100
|
case str():
|
|
100
101
|
return RawDataPath(path=local_folder)
|
flyte/remote/_data.py
CHANGED
|
@@ -100,7 +100,8 @@ async def _upload_single_file(
|
|
|
100
100
|
if put_resp.status_code != 200:
|
|
101
101
|
raise RuntimeSystemError(
|
|
102
102
|
"UploadFailed",
|
|
103
|
-
f"Failed to upload {fp} to {resp.signed_url}, status code: {put_resp.status_code}"
|
|
103
|
+
f"Failed to upload {fp} to {resp.signed_url}, status code: {put_resp.status_code}, "
|
|
104
|
+
f"response: {put_resp.text}",
|
|
104
105
|
)
|
|
105
106
|
# TODO in old code we did this
|
|
106
107
|
# if self._config.platform.insecure_skip_verify is True
|
flyte/types/__init__.py
CHANGED
|
@@ -1,9 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
# Flyte Type System
|
|
3
|
+
|
|
4
|
+
The Flyte type system provides a way to define, transform, and manipulate types in Flyte workflows.
|
|
5
|
+
Since the data flowing through Flyte has to often cross process, container and langauge boundaries, the type system
|
|
6
|
+
is designed to be serializable to a universal format that can be understood across different environments. This
|
|
7
|
+
universal format is based on Protocol Buffers. The types are called LiteralTypes and the runtime
|
|
8
|
+
representation of data is called Literals.
|
|
9
|
+
|
|
10
|
+
The type system includes:
|
|
11
|
+
- **TypeEngine**: The core engine that manages type transformations and serialization. This is the main entry point for
|
|
12
|
+
for all the internal type transformations and serialization logic.
|
|
13
|
+
- **TypeTransformer**: A class that defines how to transform one type to another. This is extensible
|
|
14
|
+
allowing users to define custom types and transformations.
|
|
15
|
+
- **Renderable**: An interface for types that can be rendered as HTML, that can be outputted to a flyte.report.
|
|
16
|
+
|
|
17
|
+
It is always possible to bypass the type system and use the `FlytePickle` type to serialize any python object
|
|
18
|
+
into a pickle format. The pickle format is not human-readable, but can be passed between flyte tasks that are
|
|
19
|
+
written in python. The Pickled objects cannot be represented in the UI, and may be in-efficient for large datasets.
|
|
20
|
+
"""
|
|
21
|
+
|
|
1
22
|
from ._interface import guess_interface
|
|
23
|
+
from ._pickle import FlytePickle
|
|
2
24
|
from ._renderer import Renderable
|
|
3
25
|
from ._string_literals import literal_string_repr
|
|
4
26
|
from ._type_engine import TypeEngine, TypeTransformer, TypeTransformerFailedError
|
|
5
27
|
|
|
6
28
|
__all__ = [
|
|
29
|
+
"FlytePickle",
|
|
7
30
|
"Renderable",
|
|
8
31
|
"TypeEngine",
|
|
9
32
|
"TypeTransformer",
|
flyte/types/_type_engine.py
CHANGED
|
@@ -1008,7 +1008,7 @@ class TypeEngine(typing.Generic[T]):
|
|
|
1008
1008
|
return cls._DATACLASS_TRANSFORMER
|
|
1009
1009
|
|
|
1010
1010
|
display_pickle_warning(str(python_type))
|
|
1011
|
-
from flyte.
|
|
1011
|
+
from flyte.types._pickle import FlytePickleTransformer
|
|
1012
1012
|
|
|
1013
1013
|
return FlytePickleTransformer()
|
|
1014
1014
|
|
|
@@ -1021,7 +1021,7 @@ class TypeEngine(typing.Generic[T]):
|
|
|
1021
1021
|
# Avoid a race condition where concurrent threads may exit lazy_import_transformers before the transformers
|
|
1022
1022
|
# have been imported. This could be implemented without a lock if you assume python assignments are atomic
|
|
1023
1023
|
# and re-registering transformers is acceptable, but I decided to play it safe.
|
|
1024
|
-
from flyte.io
|
|
1024
|
+
from flyte.io import lazy_import_structured_dataset_handler
|
|
1025
1025
|
|
|
1026
1026
|
# todo: bring in extras transformers (pytorch, etc.)
|
|
1027
1027
|
lazy_import_structured_dataset_handler()
|
|
@@ -1207,7 +1207,9 @@ class TypeEngine(typing.Generic[T]):
|
|
|
1207
1207
|
python_type = type_hints.get(k, type(d[k]))
|
|
1208
1208
|
e: BaseException = literal_map[k].exception() # type: ignore
|
|
1209
1209
|
if isinstance(e, TypeError):
|
|
1210
|
-
raise TypeError(
|
|
1210
|
+
raise TypeError(
|
|
1211
|
+
f"Error converting: Var:{k}, type:{type(v)}, into:{python_type}, received_value {v}"
|
|
1212
|
+
)
|
|
1211
1213
|
else:
|
|
1212
1214
|
raise e
|
|
1213
1215
|
literal_map[k] = v.result()
|
|
@@ -1657,7 +1659,7 @@ class DictTransformer(TypeTransformer[dict]):
|
|
|
1657
1659
|
Converts a Python dictionary to a Flyte-specific ``Literal`` using MessagePack encoding.
|
|
1658
1660
|
Falls back to Pickle if encoding fails and `allow_pickle` is True.
|
|
1659
1661
|
"""
|
|
1660
|
-
from flyte.
|
|
1662
|
+
from flyte.types._pickle import FlytePickle
|
|
1661
1663
|
|
|
1662
1664
|
try:
|
|
1663
1665
|
# Handle dictionaries with non-string keys (e.g., Dict[int, Type])
|
|
@@ -1763,7 +1765,7 @@ class DictTransformer(TypeTransformer[dict]):
|
|
|
1763
1765
|
# pr: han-ru is this part still necessary?
|
|
1764
1766
|
if lv and lv.HasField("scalar") and lv.scalar.HasField("generic"):
|
|
1765
1767
|
if lv.metadata and lv.metadata.get("format", None) == "pickle":
|
|
1766
|
-
from flyte.
|
|
1768
|
+
from flyte.types._pickle import FlytePickle
|
|
1767
1769
|
|
|
1768
1770
|
uri = json.loads(_json_format.MessageToJson(lv.scalar.generic)).get("pickle_file")
|
|
1769
1771
|
return await FlytePickle.from_pickle(uri)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flyte
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.0b11
|
|
4
4
|
Summary: Add your description here
|
|
5
5
|
Author-email: Ketan Umare <kumare3@users.noreply.github.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -61,7 +61,6 @@ Note that the `--follow` command is optional. Use this to stream updates to the
|
|
|
61
61
|
3. look at examples/... for various examples.
|
|
62
62
|
4. For a single script recommend using uv run scripts with metadata headers.
|
|
63
63
|
|
|
64
|
-
|
|
65
64
|
```python
|
|
66
65
|
import flyte
|
|
67
66
|
|
|
@@ -80,14 +79,14 @@ async def say_hello_nested(data: str) -> str:
|
|
|
80
79
|
|
|
81
80
|
if __name__ == "__main__":
|
|
82
81
|
import asyncio
|
|
83
|
-
|
|
82
|
+
|
|
84
83
|
# to run pure python - the SDK is not invoked at all
|
|
85
84
|
asyncio.run(say_hello_nested("test"))
|
|
86
|
-
|
|
85
|
+
|
|
87
86
|
# To run locally, but run through type system etc
|
|
88
87
|
flyte.init()
|
|
89
88
|
flyte.run(say_hello_nested, "World")
|
|
90
|
-
|
|
89
|
+
|
|
91
90
|
# To run remote
|
|
92
91
|
flyte.init(endpoint="dns:///localhost:8090", insecure=True)
|
|
93
92
|
flyte.run(say_hello_nested, "World")
|
|
@@ -95,7 +94,7 @@ if __name__ == "__main__":
|
|
|
95
94
|
flyte.with_runcontext(mode="local").run(...) # this will run locally only
|
|
96
95
|
|
|
97
96
|
# To run remote with a config
|
|
98
|
-
flyte.
|
|
97
|
+
flyte.init_from_config("config.yaml")
|
|
99
98
|
```
|
|
100
99
|
|
|
101
100
|
# CLI
|