flyte 0.2.0b14__py3-none-any.whl → 0.2.0b16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

@@ -27,19 +27,12 @@ else:
27
27
  T = TypeVar("T")
28
28
 
29
29
 
30
- # pr: add back after storage
31
- def get_pandas_storage_options(uri: str, data_config=None, anonymous: bool = False) -> typing.Optional[typing.Dict]:
30
+ def get_pandas_storage_options(uri: str, anonymous: bool = False) -> typing.Optional[typing.Dict]:
32
31
  from pandas.io.common import is_fsspec_url # type: ignore
33
32
 
34
33
  if is_fsspec_url(uri):
35
34
  if uri.startswith("s3"):
36
- # pr: after storage, replace with real call to get_fsspec_storage_options
37
- return {
38
- "cache_regions": True,
39
- "client_kwargs": {"endpoint_url": "http://localhost:30002"},
40
- "key": "minio",
41
- "secret": "miniostorage",
42
- }
35
+ return storage.get_configured_fsspec_kwargs("s3", anonymous=anonymous)
43
36
  return {}
44
37
 
45
38
  # Pandas does not allow storage_options for non-fsspec paths e.g. local.
@@ -70,7 +63,7 @@ class PandasToCSVEncodingHandler(StructuredDatasetEncoder):
70
63
  df.to_csv(
71
64
  path,
72
65
  index=False,
73
- storage_options=get_pandas_storage_options(uri=path, data_config=None),
66
+ storage_options=get_pandas_storage_options(uri=path),
74
67
  )
75
68
  structured_dataset_type.format = CSV
76
69
  return literals_pb2.StructuredDataset(
@@ -87,20 +80,21 @@ class CSVToPandasDecodingHandler(StructuredDatasetDecoder):
87
80
  proto_value: literals_pb2.StructuredDataset,
88
81
  current_task_metadata: literals_pb2.StructuredDatasetMetadata,
89
82
  ) -> "pd.DataFrame":
90
- from botocore.exceptions import NoCredentialsError
91
-
92
83
  uri = proto_value.uri
93
84
  columns = None
94
- kwargs = get_pandas_storage_options(uri=uri, data_config=None)
85
+ kwargs = get_pandas_storage_options(uri=uri)
95
86
  path = os.path.join(uri, ".csv")
96
87
  if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
97
88
  columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
98
89
  try:
99
90
  return pd.read_csv(path, usecols=columns, storage_options=kwargs)
100
- except NoCredentialsError:
101
- logger.debug("S3 source detected, attempting anonymous S3 access")
102
- kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
103
- return pd.read_csv(path, usecols=columns, storage_options=kwargs)
91
+ except Exception as exc:
92
+ if exc.__class__.__name__ == "NoCredentialsError":
93
+ logger.debug("S3 source detected, attempting anonymous S3 access")
94
+ kwargs = get_pandas_storage_options(uri=uri, anonymous=True)
95
+ return pd.read_csv(path, usecols=columns, storage_options=kwargs)
96
+ else:
97
+ raise
104
98
 
105
99
 
106
100
  class PandasToParquetEncodingHandler(StructuredDatasetEncoder):
@@ -128,7 +122,7 @@ class PandasToParquetEncodingHandler(StructuredDatasetEncoder):
128
122
  path,
129
123
  coerce_timestamps="us",
130
124
  allow_truncated_timestamps=False,
131
- storage_options=get_pandas_storage_options(uri=path, data_config=None),
125
+ storage_options=get_pandas_storage_options(uri=path),
132
126
  )
133
127
  structured_dataset_type.format = PARQUET
134
128
  return literals_pb2.StructuredDataset(
@@ -145,19 +139,20 @@ class ParquetToPandasDecodingHandler(StructuredDatasetDecoder):
145
139
  flyte_value: literals_pb2.StructuredDataset,
146
140
  current_task_metadata: literals_pb2.StructuredDatasetMetadata,
147
141
  ) -> "pd.DataFrame":
148
- from botocore.exceptions import NoCredentialsError
149
-
150
142
  uri = flyte_value.uri
151
143
  columns = None
152
- kwargs = get_pandas_storage_options(uri=uri, data_config=None)
144
+ kwargs = get_pandas_storage_options(uri=uri)
153
145
  if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
154
146
  columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
155
147
  try:
156
148
  return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
157
- except NoCredentialsError:
158
- logger.debug("S3 source detected, attempting anonymous S3 access")
159
- kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
160
- return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
149
+ except Exception as exc:
150
+ if exc.__class__.__name__ == "NoCredentialsError":
151
+ logger.debug("S3 source detected, attempting anonymous S3 access")
152
+ kwargs = get_pandas_storage_options(uri=uri, anonymous=True)
153
+ return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
154
+ else:
155
+ raise
161
156
 
162
157
 
163
158
  class ArrowToParquetEncodingHandler(StructuredDatasetEncoder):
@@ -199,7 +194,6 @@ class ParquetToArrowDecodingHandler(StructuredDatasetDecoder):
199
194
  current_task_metadata: literals_pb2.StructuredDatasetMetadata,
200
195
  ) -> "pa.Table":
201
196
  import pyarrow.parquet as pq
202
- from botocore.exceptions import NoCredentialsError
203
197
 
204
198
  uri = proto_value.uri
205
199
  if not storage.is_remote(uri):
@@ -211,9 +205,11 @@ class ParquetToArrowDecodingHandler(StructuredDatasetDecoder):
211
205
  columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
212
206
  try:
213
207
  return pq.read_table(path, columns=columns)
214
- except NoCredentialsError as e:
215
- logger.debug("S3 source detected, attempting anonymous S3 access")
216
- fs = storage.get_underlying_filesystem(path=uri, anonymous=True)
217
- if fs is not None:
218
- return pq.read_table(path, filesystem=fs, columns=columns)
219
- raise e
208
+ except Exception as exc:
209
+ if exc.__class__.__name__ == "NoCredentialsError":
210
+ logger.debug("S3 source detected, attempting anonymous S3 access")
211
+ fs = storage.get_underlying_filesystem(path=uri, anonymous=True)
212
+ if fs is not None:
213
+ return pq.read_table(path, filesystem=fs, columns=columns)
214
+ else:
215
+ raise
flyte/models.py CHANGED
@@ -5,7 +5,7 @@ import os
5
5
  import pathlib
6
6
  import tempfile
7
7
  from dataclasses import dataclass, field, replace
8
- from typing import TYPE_CHECKING, Any, Callable, Dict, Literal, Optional, Tuple, Type
8
+ from typing import TYPE_CHECKING, Any, Callable, ClassVar, Dict, Literal, Optional, Tuple, Type
9
9
 
10
10
  import rich.repr
11
11
 
@@ -15,6 +15,8 @@ from flyte._logging import logger
15
15
  from flyte._utils.helpers import base36_encode
16
16
 
17
17
  if TYPE_CHECKING:
18
+ from flyteidl.core import literals_pb2
19
+
18
20
  from flyte._internal.imagebuild.image_builder import ImageCache
19
21
  from flyte.report import Report
20
22
 
@@ -231,6 +233,13 @@ class Checkpoints:
231
233
  checkpoint_path: str | None
232
234
 
233
235
 
236
+ class _has_default:
237
+ """
238
+ A marker class to indicate that a specific input has a default value or not.
239
+ This is used to determine if the input is required or not.
240
+ """
241
+
242
+
234
243
  @dataclass(frozen=True)
235
244
  class NativeInterface:
236
245
  """
@@ -240,7 +249,14 @@ class NativeInterface:
240
249
 
241
250
  inputs: Dict[str, Tuple[Type, Any]]
242
251
  outputs: Dict[str, Type]
243
- docstring: Optional[Docstring] = field(default=None)
252
+ docstring: Optional[Docstring] = None
253
+
254
+ # This field is used to indicate that the task has a default value for the input, but already in the
255
+ # remote form.
256
+ _remote_defaults: Optional[Dict[str, literals_pb2.Literal]] = field(default=None, repr=False)
257
+
258
+ has_default: ClassVar[Type[_has_default]] = _has_default # This can be used to indicate if a specific input
259
+ # has a default value or not, in the case when the default value is not known. An example would be remote tasks.
244
260
 
245
261
  def has_outputs(self) -> bool:
246
262
  """
@@ -248,12 +264,31 @@ class NativeInterface:
248
264
  """
249
265
  return self.outputs is not None and len(self.outputs) > 0
250
266
 
267
+ def num_required_inputs(self) -> int:
268
+ """
269
+ Get the number of required inputs for the task. This is used to determine how many inputs are required for the
270
+ task execution.
271
+ """
272
+ return sum(1 for t in self.inputs.values() if t[1] is inspect.Parameter.empty)
273
+
251
274
  @classmethod
252
- def from_types(cls, inputs: Dict[str, Type], outputs: Dict[str, Type]) -> NativeInterface:
275
+ def from_types(
276
+ cls,
277
+ inputs: Dict[str, Tuple[Type, Type[_has_default] | Type[inspect._empty]]],
278
+ outputs: Dict[str, Type],
279
+ default_inputs: Optional[Dict[str, literals_pb2.Literal]] = None,
280
+ ) -> NativeInterface:
253
281
  """
254
282
  Create a new NativeInterface from the given types. This is used to create a native interface for the task.
283
+ :param inputs: A dictionary of input names and their types and a value indicating if they have a default value.
284
+ :param outputs: A dictionary of output names and their types.
285
+ :param default_inputs: Optional dictionary of default inputs for remote tasks.
286
+ :return: A NativeInterface object with the given inputs and outputs.
255
287
  """
256
- return cls(inputs={k: (v, inspect.Parameter.empty) for k, v in inputs.items()}, outputs=outputs)
288
+ for k, v in inputs.items():
289
+ if v[1] is cls.has_default and (default_inputs is None or k not in default_inputs):
290
+ raise ValueError(f"Input {k} has a default value but no default input provided for remote task.")
291
+ return cls(inputs=inputs, outputs=outputs, _remote_defaults=default_inputs)
257
292
 
258
293
  @classmethod
259
294
  def from_callable(cls, func: Callable) -> NativeInterface:
@@ -301,7 +336,10 @@ class NativeInterface:
301
336
  tp = tpe[0] if isinstance(tpe[0], str) else tpe[0].__name__
302
337
  i += f"{key}: {tp}"
303
338
  if tpe[1] is not inspect.Parameter.empty:
304
- i += f" = {tpe[1]}"
339
+ if tpe[1] is self.has_default:
340
+ i += " = ..."
341
+ else:
342
+ i += f" = {tpe[1]}"
305
343
  i += ")"
306
344
  if self.outputs:
307
345
  initial = True
flyte/remote/_data.py CHANGED
@@ -16,6 +16,7 @@ from flyteidl.service import dataproxy_pb2
16
16
  from google.protobuf import duration_pb2
17
17
 
18
18
  from flyte._initialize import CommonInit, ensure_client, get_client, get_common_config
19
+ from flyte._logging import make_hyperlink
19
20
  from flyte.errors import InitializationError, RuntimeSystemError
20
21
 
21
22
  _UPLOAD_EXPIRES_IN = timedelta(seconds=60)
@@ -89,7 +90,7 @@ async def _upload_single_file(
89
90
  raise RuntimeSystemError(e.code().value, f"Failed to get signed url for {fp}.")
90
91
  except Exception as e:
91
92
  raise RuntimeSystemError(type(e).__name__, f"Failed to get signed url for {fp}.") from e
92
- logger.debug(f"Uploading to signed url {resp.signed_url} for {fp}")
93
+ logger.debug(f"Uploading to {make_hyperlink('signed url', resp.signed_url)} for {fp}")
93
94
  extra_headers = get_extra_headers_for_protocol(resp.native_url)
94
95
  extra_headers.update(resp.headers)
95
96
  encoded_md5 = b64encode(md5_bytes)
flyte/remote/_run.py CHANGED
@@ -43,7 +43,10 @@ def _action_time_phase(action: run_definition_pb2.Action | run_definition_pb2.Ac
43
43
  yield "run_time", f"{(datetime.now(timezone.utc) - start_time).seconds} secs"
44
44
  yield "phase", run_definition_pb2.Phase.Name(action.status.phase)
45
45
  if isinstance(action, run_definition_pb2.ActionDetails):
46
- yield "error", action.error_info if action.HasField("error_info") else "NA"
46
+ yield (
47
+ "error",
48
+ f"{action.error_info.kind}: {action.error_info.message}" if action.HasField("error_info") else "NA",
49
+ )
47
50
 
48
51
 
49
52
  def _action_rich_repr(action: run_definition_pb2.Action) -> rich.repr.Result:
@@ -78,7 +81,7 @@ def _action_details_rich_repr(action: run_definition_pb2.ActionDetails) -> rich.
78
81
  yield "task_type", action.resolved_task_spec.task_template.type
79
82
  yield "task_version", action.resolved_task_spec.task_template.id.version
80
83
  yield "attempts", action.attempts
81
- yield "error_info", action.error_info if action.HasField("error_info") else "NA"
84
+ yield "error", f"{action.error_info.kind}: {action.error_info.message}" if action.HasField("error_info") else "NA"
82
85
  yield "phase", run_definition_pb2.Phase.Name(action.status.phase)
83
86
  yield "group", action.metadata.group
84
87
  yield "parent", action.metadata.parent
@@ -639,7 +642,7 @@ class Action:
639
642
  console.print(f"[bold green]Run '{self.name}' completed successfully.[/bold green]")
640
643
  else:
641
644
  console.print(
642
- f"[bold red]Run '{self.name}' exited unsuccessfully in state {ad.phase}"
645
+ f"[bold red]Run '{self.name}' exited unsuccessfully in state {ad.phase} "
643
646
  f"with error: {ad.error_info}[/bold red]"
644
647
  )
645
648
  break
@@ -667,6 +670,8 @@ class Action:
667
670
  Rich representation of the Action object.
668
671
  """
669
672
  yield from _action_rich_repr(self.pb2)
673
+ if self._details:
674
+ yield from self._details.__rich_repr__()
670
675
 
671
676
  def __repr__(self) -> str:
672
677
  """
flyte/remote/_task.py CHANGED
@@ -162,6 +162,20 @@ class TaskDetails:
162
162
  """
163
163
  return self.pb2.spec.task_template.type
164
164
 
165
+ @property
166
+ def default_input_args(self) -> Tuple[str, ...]:
167
+ """
168
+ The default input arguments of the task.
169
+ """
170
+ return tuple(x.name for x in self.pb2.spec.default_inputs)
171
+
172
+ @property
173
+ def required_args(self) -> Tuple[str, ...]:
174
+ """
175
+ The required input arguments of the task.
176
+ """
177
+ return tuple(x for x, _ in self.interface.inputs.items() if x not in self.default_input_args)
178
+
165
179
  @functools.cached_property
166
180
  def interface(self) -> NativeInterface:
167
181
  """
@@ -169,7 +183,7 @@ class TaskDetails:
169
183
  """
170
184
  import flyte.types as types
171
185
 
172
- return types.guess_interface(self.pb2.spec.task_template.interface)
186
+ return types.guess_interface(self.pb2.spec.task_template.interface, default_inputs=self.pb2.spec.default_inputs)
173
187
 
174
188
  @property
175
189
  def cache(self) -> flyte.Cache:
@@ -206,6 +220,19 @@ class TaskDetails:
206
220
  """
207
221
  Forwards the call to the underlying task. The entity will be fetched if not already present
208
222
  """
223
+ # TODO support kwargs, for this we need ordered inputs to be stored in the task spec.
224
+ if len(args) > 0:
225
+ raise flyte.errors.ReferenceTaskError(
226
+ f"Reference task {self.name} does not support positional arguments"
227
+ f"currently. Please use keyword arguments."
228
+ )
229
+ if len(self.required_args) > 0:
230
+ if len(args) + len(kwargs) < len(self.required_args):
231
+ raise ValueError(
232
+ f"Task {self.name} requires at least {self.required_args} arguments, "
233
+ f"but only received args:{args} kwargs{kwargs}."
234
+ )
235
+
209
236
  ctx = internal_ctx()
210
237
  if ctx.is_task_context():
211
238
  # If we are in a task context, that implies we are executing a Run.
@@ -238,6 +265,11 @@ class TaskDetails:
238
265
  """
239
266
  Rich representation of the task.
240
267
  """
268
+ yield "friendly_name", self.pb2.spec.short_name
269
+ yield "environment", self.pb2.spec.environment
270
+ yield "default_inputs_keys", self.default_input_args
271
+ yield "required_args", self.required_args
272
+ yield "raw_default_inputs", [str(x) for x in self.pb2.spec.default_inputs]
241
273
  yield "project", self.pb2.task_id.project
242
274
  yield "domain", self.pb2.task_id.domain
243
275
  yield "name", self.name
flyte/storage/__init__.py CHANGED
@@ -4,6 +4,7 @@ __all__ = [
4
4
  "S3",
5
5
  "Storage",
6
6
  "get",
7
+ "get_configured_fsspec_kwargs",
7
8
  "get_random_local_directory",
8
9
  "get_random_local_path",
9
10
  "get_stream",
@@ -18,6 +19,7 @@ __all__ = [
18
19
  from ._config import ABFS, GCS, S3, Storage
19
20
  from ._storage import (
20
21
  get,
22
+ get_configured_fsspec_kwargs,
21
23
  get_random_local_directory,
22
24
  get_random_local_path,
23
25
  get_stream,
flyte/storage/_storage.py CHANGED
@@ -62,40 +62,51 @@ def get_random_local_directory() -> pathlib.Path:
62
62
  return _dir
63
63
 
64
64
 
65
- def get_underlying_filesystem(
66
- protocol: typing.Optional[str] = None,
67
- anonymous: bool = False,
68
- path: typing.Optional[str] = None,
69
- **kwargs,
70
- ) -> fsspec.AbstractFileSystem:
71
- if protocol is None:
72
- # If protocol is None, get it from the path
73
- protocol = get_protocol(path)
74
-
75
- storage_config = get_storage()
76
- if storage_config:
77
- kwargs = storage_config.get_fsspec_kwargs(anonymous, **kwargs)
78
- elif protocol:
65
+ def get_configured_fsspec_kwargs(
66
+ protocol: typing.Optional[str] = None, anonymous: bool = False
67
+ ) -> typing.Dict[str, typing.Any]:
68
+ if protocol:
79
69
  match protocol:
80
70
  case "s3":
81
71
  # If the protocol is s3, we can use the s3 filesystem
82
72
  from flyte.storage import S3
83
73
 
84
- kwargs = S3.auto().get_fsspec_kwargs(anonymous=anonymous, **kwargs)
74
+ return S3.auto().get_fsspec_kwargs(anonymous=anonymous)
85
75
  case "gs":
86
76
  # If the protocol is gs, we can use the gs filesystem
87
77
  from flyte.storage import GCS
88
78
 
89
- kwargs = GCS.auto().get_fsspec_kwargs(anonymous=anonymous, **kwargs)
79
+ return GCS.auto().get_fsspec_kwargs(anonymous=anonymous)
90
80
  case "abfs" | "abfss":
91
81
  # If the protocol is abfs or abfss, we can use the abfs filesystem
92
82
  from flyte.storage import ABFS
93
83
 
94
- kwargs = ABFS.auto().get_fsspec_kwargs(anonymous=anonymous, **kwargs)
84
+ return ABFS.auto().get_fsspec_kwargs(anonymous=anonymous)
95
85
  case _:
96
- pass
86
+ return {}
87
+
88
+ # If no protocol, return args from storage config if set
89
+ storage_config = get_storage()
90
+ if storage_config:
91
+ return storage_config.get_fsspec_kwargs(anonymous)
92
+
93
+ return {}
94
+
95
+
96
+ def get_underlying_filesystem(
97
+ protocol: typing.Optional[str] = None,
98
+ anonymous: bool = False,
99
+ path: typing.Optional[str] = None,
100
+ **kwargs,
101
+ ) -> fsspec.AbstractFileSystem:
102
+ if protocol is None:
103
+ # If protocol is None, get it from the path
104
+ protocol = get_protocol(path)
105
+
106
+ configured_kwargs = get_configured_fsspec_kwargs(protocol, anonymous=anonymous)
107
+ configured_kwargs.update(kwargs)
97
108
 
98
- return fsspec.filesystem(protocol, **kwargs)
109
+ return fsspec.filesystem(protocol, **configured_kwargs)
99
110
 
100
111
 
101
112
  def _get_anonymous_filesystem(from_path):
flyte/types/_interface.py CHANGED
@@ -1,11 +1,15 @@
1
- from typing import Any, Dict, Type, cast
1
+ import inspect
2
+ from typing import Any, Dict, Iterable, Tuple, Type, cast
2
3
 
3
- from flyteidl.core import interface_pb2
4
+ from flyteidl.core import interface_pb2, literals_pb2
4
5
 
6
+ from flyte._protos.workflow import common_pb2
5
7
  from flyte.models import NativeInterface
6
8
 
7
9
 
8
- def guess_interface(interface: interface_pb2.TypedInterface) -> NativeInterface:
10
+ def guess_interface(
11
+ interface: interface_pb2.TypedInterface, default_inputs: Iterable[common_pb2.NamedParameter] | None = None
12
+ ) -> NativeInterface:
9
13
  """
10
14
  Returns the interface of the task with guessed types, as types may not be present in current env.
11
15
  """
@@ -14,12 +18,23 @@ def guess_interface(interface: interface_pb2.TypedInterface) -> NativeInterface:
14
18
  if interface is None:
15
19
  return NativeInterface({}, {})
16
20
 
17
- guessed_inputs: Dict[str, Type[Any]] = {}
21
+ default_input_literals: Dict[str, literals_pb2.Literal] = {}
22
+ if default_inputs is not None:
23
+ for param in default_inputs:
24
+ if param.parameter.HasField("default"):
25
+ default_input_literals[param.name] = param.parameter.default
26
+
27
+ guessed_inputs: Dict[str, Tuple[Type[Any], Any] | Any] = {}
18
28
  if interface.inputs is not None and len(interface.inputs.variables) > 0:
19
- guessed_inputs = flyte.types.TypeEngine.guess_python_types(cast(dict, interface.inputs.variables))
29
+ input_types = flyte.types.TypeEngine.guess_python_types(cast(dict, interface.inputs.variables))
30
+ for name, t in input_types.items():
31
+ if name not in default_input_literals:
32
+ guessed_inputs[name] = (t, inspect.Parameter.empty)
33
+ else:
34
+ guessed_inputs[name] = (t, NativeInterface.has_default)
20
35
 
21
36
  guessed_outputs: Dict[str, Type[Any]] = {}
22
37
  if interface.outputs is not None and len(interface.outputs.variables) > 0:
23
38
  guessed_outputs = flyte.types.TypeEngine.guess_python_types(cast(dict, interface.outputs.variables))
24
39
 
25
- return NativeInterface.from_types(guessed_inputs, guessed_outputs)
40
+ return NativeInterface.from_types(guessed_inputs, guessed_outputs, default_input_literals)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flyte
3
- Version: 0.2.0b14
3
+ Version: 0.2.0b16
4
4
  Summary: Add your description here
5
5
  Author-email: Ketan Umare <kumare3@users.noreply.github.com>
6
6
  Requires-Python: >=3.10
@@ -125,7 +125,7 @@ import flyte
125
125
 
126
126
  env = flyte.TaskEnvironment(
127
127
  name="hello_world",
128
- image=flyte.Image.auto().with_apt_packages(...).with_pip_packages(...),
128
+ image=flyte.Image.from_debian_base().with_apt_packages(...).with_pip_packages(...),
129
129
  )
130
130
 
131
131
  ```