flyte 0.1.0__py3-none-any.whl → 0.2.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +78 -2
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/runtime.py +152 -0
- flyte/_build.py +26 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +145 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +113 -0
- flyte/_code_bundle/_packaging.py +187 -0
- flyte/_code_bundle/_utils.py +323 -0
- flyte/_code_bundle/bundle.py +209 -0
- flyte/_context.py +152 -0
- flyte/_deploy.py +243 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +84 -0
- flyte/_excepthook.py +37 -0
- flyte/_group.py +32 -0
- flyte/_hash.py +23 -0
- flyte/_image.py +762 -0
- flyte/_initialize.py +492 -0
- flyte/_interface.py +84 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +128 -0
- flyte/_internal/controllers/_local_controller.py +193 -0
- flyte/_internal/controllers/_trace.py +41 -0
- flyte/_internal/controllers/remote/__init__.py +60 -0
- flyte/_internal/controllers/remote/_action.py +146 -0
- flyte/_internal/controllers/remote/_client.py +47 -0
- flyte/_internal/controllers/remote/_controller.py +494 -0
- flyte/_internal/controllers/remote/_core.py +410 -0
- flyte/_internal/controllers/remote/_informer.py +361 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +11 -0
- flyte/_internal/imagebuild/docker_builder.py +427 -0
- flyte/_internal/imagebuild/image_builder.py +246 -0
- flyte/_internal/imagebuild/remote_builder.py +0 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +54 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +342 -0
- flyte/_internal/runtime/entrypoints.py +135 -0
- flyte/_internal/runtime/io.py +136 -0
- flyte/_internal/runtime/resources_serde.py +138 -0
- flyte/_internal/runtime/task_serde.py +330 -0
- flyte/_internal/runtime/taskrunner.py +191 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_logging.py +135 -0
- flyte/_map.py +215 -0
- flyte/_pod.py +19 -0
- flyte/_protos/__init__.py +0 -0
- flyte/_protos/common/authorization_pb2.py +66 -0
- flyte/_protos/common/authorization_pb2.pyi +108 -0
- flyte/_protos/common/authorization_pb2_grpc.py +4 -0
- flyte/_protos/common/identifier_pb2.py +71 -0
- flyte/_protos/common/identifier_pb2.pyi +82 -0
- flyte/_protos/common/identifier_pb2_grpc.py +4 -0
- flyte/_protos/common/identity_pb2.py +48 -0
- flyte/_protos/common/identity_pb2.pyi +72 -0
- flyte/_protos/common/identity_pb2_grpc.py +4 -0
- flyte/_protos/common/list_pb2.py +36 -0
- flyte/_protos/common/list_pb2.pyi +71 -0
- flyte/_protos/common/list_pb2_grpc.py +4 -0
- flyte/_protos/common/policy_pb2.py +37 -0
- flyte/_protos/common/policy_pb2.pyi +27 -0
- flyte/_protos/common/policy_pb2_grpc.py +4 -0
- flyte/_protos/common/role_pb2.py +37 -0
- flyte/_protos/common/role_pb2.pyi +53 -0
- flyte/_protos/common/role_pb2_grpc.py +4 -0
- flyte/_protos/common/runtime_version_pb2.py +28 -0
- flyte/_protos/common/runtime_version_pb2.pyi +24 -0
- flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +100 -0
- flyte/_protos/logs/dataplane/payload_pb2.pyi +177 -0
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/definition_pb2.py +49 -0
- flyte/_protos/secret/definition_pb2.pyi +93 -0
- flyte/_protos/secret/definition_pb2_grpc.py +4 -0
- flyte/_protos/secret/payload_pb2.py +62 -0
- flyte/_protos/secret/payload_pb2.pyi +94 -0
- flyte/_protos/secret/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/secret_pb2.py +38 -0
- flyte/_protos/secret/secret_pb2.pyi +6 -0
- flyte/_protos/secret/secret_pb2_grpc.py +198 -0
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
- flyte/_protos/validate/validate/validate_pb2.py +76 -0
- flyte/_protos/workflow/common_pb2.py +27 -0
- flyte/_protos/workflow/common_pb2.pyi +14 -0
- flyte/_protos/workflow/common_pb2_grpc.py +4 -0
- flyte/_protos/workflow/environment_pb2.py +29 -0
- flyte/_protos/workflow/environment_pb2.pyi +12 -0
- flyte/_protos/workflow/environment_pb2_grpc.py +4 -0
- flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
- flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- flyte/_protos/workflow/queue_service_pb2.py +105 -0
- flyte/_protos/workflow/queue_service_pb2.pyi +146 -0
- flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- flyte/_protos/workflow/run_definition_pb2.py +128 -0
- flyte/_protos/workflow/run_definition_pb2.pyi +314 -0
- flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
- flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- flyte/_protos/workflow/run_service_pb2.py +129 -0
- flyte/_protos/workflow/run_service_pb2.pyi +171 -0
- flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
- flyte/_protos/workflow/state_service_pb2.py +66 -0
- flyte/_protos/workflow/state_service_pb2.pyi +75 -0
- flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
- flyte/_protos/workflow/task_definition_pb2.py +79 -0
- flyte/_protos/workflow/task_definition_pb2.pyi +81 -0
- flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/task_service_pb2.py +60 -0
- flyte/_protos/workflow/task_service_pb2.pyi +59 -0
- flyte/_protos/workflow/task_service_pb2_grpc.py +138 -0
- flyte/_resources.py +226 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +25 -0
- flyte/_run.py +482 -0
- flyte/_secret.py +61 -0
- flyte/_task.py +449 -0
- flyte/_task_environment.py +183 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +120 -0
- flyte/_utils/__init__.py +26 -0
- flyte/_utils/asyn.py +119 -0
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +23 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +134 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +21 -0
- flyte/cli/__init__.py +3 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_common.py +337 -0
- flyte/cli/_create.py +145 -0
- flyte/cli/_delete.py +23 -0
- flyte/cli/_deploy.py +152 -0
- flyte/cli/_gen.py +163 -0
- flyte/cli/_get.py +310 -0
- flyte/cli/_params.py +538 -0
- flyte/cli/_run.py +231 -0
- flyte/cli/main.py +166 -0
- flyte/config/__init__.py +3 -0
- flyte/config/_config.py +216 -0
- flyte/config/_internal.py +64 -0
- flyte/config/_reader.py +207 -0
- flyte/connectors/__init__.py +0 -0
- flyte/errors.py +172 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +263 -0
- flyte/io/__init__.py +27 -0
- flyte/io/_dir.py +448 -0
- flyte/io/_file.py +467 -0
- flyte/io/_structured_dataset/__init__.py +129 -0
- flyte/io/_structured_dataset/basic_dfs.py +219 -0
- flyte/io/_structured_dataset/structured_dataset.py +1061 -0
- flyte/models.py +391 -0
- flyte/remote/__init__.py +26 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +133 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_auth_utils.py +14 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +397 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +215 -0
- flyte/remote/_client/auth/_client_config.py +83 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +143 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +95 -0
- flyte/remote/_console.py +18 -0
- flyte/remote/_data.py +159 -0
- flyte/remote/_logs.py +176 -0
- flyte/remote/_project.py +85 -0
- flyte/remote/_run.py +970 -0
- flyte/remote/_secret.py +132 -0
- flyte/remote/_task.py +391 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +178 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +29 -0
- flyte/storage/_config.py +233 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +271 -0
- flyte/storage/_utils.py +5 -0
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +371 -0
- flyte/types/__init__.py +36 -0
- flyte/types/_interface.py +40 -0
- flyte/types/_pickle.py +118 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +120 -0
- flyte/types/_type_engine.py +2287 -0
- flyte/types/_utils.py +80 -0
- flyte-0.2.0a0.dist-info/METADATA +249 -0
- flyte-0.2.0a0.dist-info/RECORD +218 -0
- {flyte-0.1.0.dist-info → flyte-0.2.0a0.dist-info}/WHEEL +2 -1
- flyte-0.2.0a0.dist-info/entry_points.txt +3 -0
- flyte-0.2.0a0.dist-info/top_level.txt +1 -0
- flyte-0.1.0.dist-info/METADATA +0 -6
- flyte-0.1.0.dist-info/RECORD +0 -5
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import typing
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TypeVar
|
|
5
|
+
|
|
6
|
+
from flyteidl.core import literals_pb2, types_pb2
|
|
7
|
+
from fsspec.core import split_protocol, strip_protocol
|
|
8
|
+
|
|
9
|
+
import flyte.storage as storage
|
|
10
|
+
from flyte._logging import logger
|
|
11
|
+
from flyte._utils import lazy_module
|
|
12
|
+
from flyte.io._structured_dataset.structured_dataset import (
|
|
13
|
+
CSV,
|
|
14
|
+
PARQUET,
|
|
15
|
+
StructuredDataset,
|
|
16
|
+
StructuredDatasetDecoder,
|
|
17
|
+
StructuredDatasetEncoder,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
if typing.TYPE_CHECKING:
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import pyarrow as pa
|
|
23
|
+
else:
|
|
24
|
+
pd = lazy_module("pandas")
|
|
25
|
+
pa = lazy_module("pyarrow")
|
|
26
|
+
|
|
27
|
+
T = TypeVar("T")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# pr: add back after storage
|
|
31
|
+
def get_pandas_storage_options(uri: str, data_config=None, anonymous: bool = False) -> typing.Optional[typing.Dict]:
|
|
32
|
+
from pandas.io.common import is_fsspec_url # type: ignore
|
|
33
|
+
|
|
34
|
+
if is_fsspec_url(uri):
|
|
35
|
+
if uri.startswith("s3"):
|
|
36
|
+
# pr: after storage, replace with real call to get_fsspec_storage_options
|
|
37
|
+
return {
|
|
38
|
+
"cache_regions": True,
|
|
39
|
+
"client_kwargs": {"endpoint_url": "http://localhost:30002"},
|
|
40
|
+
"key": "minio",
|
|
41
|
+
"secret": "miniostorage",
|
|
42
|
+
}
|
|
43
|
+
return {}
|
|
44
|
+
|
|
45
|
+
# Pandas does not allow storage_options for non-fsspec paths e.g. local.
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class PandasToCSVEncodingHandler(StructuredDatasetEncoder):
|
|
50
|
+
def __init__(self):
|
|
51
|
+
super().__init__(pd.DataFrame, None, CSV)
|
|
52
|
+
|
|
53
|
+
async def encode(
|
|
54
|
+
self,
|
|
55
|
+
structured_dataset: StructuredDataset,
|
|
56
|
+
structured_dataset_type: types_pb2.StructuredDatasetType,
|
|
57
|
+
) -> literals_pb2.StructuredDataset:
|
|
58
|
+
if not structured_dataset.uri:
|
|
59
|
+
from flyte._context import internal_ctx
|
|
60
|
+
|
|
61
|
+
ctx = internal_ctx()
|
|
62
|
+
uri = ctx.raw_data.get_random_remote_path()
|
|
63
|
+
else:
|
|
64
|
+
uri = typing.cast(str, structured_dataset.uri)
|
|
65
|
+
|
|
66
|
+
if not storage.is_remote(uri):
|
|
67
|
+
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
68
|
+
path = os.path.join(uri, ".csv")
|
|
69
|
+
df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
|
|
70
|
+
df.to_csv(
|
|
71
|
+
path,
|
|
72
|
+
index=False,
|
|
73
|
+
storage_options=get_pandas_storage_options(uri=path, data_config=None),
|
|
74
|
+
)
|
|
75
|
+
structured_dataset_type.format = CSV
|
|
76
|
+
return literals_pb2.StructuredDataset(
|
|
77
|
+
uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class CSVToPandasDecodingHandler(StructuredDatasetDecoder):
|
|
82
|
+
def __init__(self):
|
|
83
|
+
super().__init__(pd.DataFrame, None, CSV)
|
|
84
|
+
|
|
85
|
+
async def decode(
|
|
86
|
+
self,
|
|
87
|
+
proto_value: literals_pb2.StructuredDataset,
|
|
88
|
+
current_task_metadata: literals_pb2.StructuredDatasetMetadata,
|
|
89
|
+
) -> "pd.DataFrame":
|
|
90
|
+
from botocore.exceptions import NoCredentialsError
|
|
91
|
+
|
|
92
|
+
uri = proto_value.uri
|
|
93
|
+
columns = None
|
|
94
|
+
kwargs = get_pandas_storage_options(uri=uri, data_config=None)
|
|
95
|
+
path = os.path.join(uri, ".csv")
|
|
96
|
+
if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
|
|
97
|
+
columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
|
|
98
|
+
try:
|
|
99
|
+
return pd.read_csv(path, usecols=columns, storage_options=kwargs)
|
|
100
|
+
except NoCredentialsError:
|
|
101
|
+
logger.debug("S3 source detected, attempting anonymous S3 access")
|
|
102
|
+
kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
|
|
103
|
+
return pd.read_csv(path, usecols=columns, storage_options=kwargs)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class PandasToParquetEncodingHandler(StructuredDatasetEncoder):
|
|
107
|
+
def __init__(self):
|
|
108
|
+
super().__init__(pd.DataFrame, None, PARQUET)
|
|
109
|
+
|
|
110
|
+
async def encode(
|
|
111
|
+
self,
|
|
112
|
+
structured_dataset: StructuredDataset,
|
|
113
|
+
structured_dataset_type: types_pb2.StructuredDatasetType,
|
|
114
|
+
) -> literals_pb2.StructuredDataset:
|
|
115
|
+
if not structured_dataset.uri:
|
|
116
|
+
from flyte._context import internal_ctx
|
|
117
|
+
|
|
118
|
+
ctx = internal_ctx()
|
|
119
|
+
uri = str(ctx.raw_data.get_random_remote_path())
|
|
120
|
+
else:
|
|
121
|
+
uri = typing.cast(str, structured_dataset.uri)
|
|
122
|
+
|
|
123
|
+
if not storage.is_remote(uri):
|
|
124
|
+
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
125
|
+
path = os.path.join(uri, f"{0:05}")
|
|
126
|
+
df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
|
|
127
|
+
df.to_parquet(
|
|
128
|
+
path,
|
|
129
|
+
coerce_timestamps="us",
|
|
130
|
+
allow_truncated_timestamps=False,
|
|
131
|
+
storage_options=get_pandas_storage_options(uri=path, data_config=None),
|
|
132
|
+
)
|
|
133
|
+
structured_dataset_type.format = PARQUET
|
|
134
|
+
return literals_pb2.StructuredDataset(
|
|
135
|
+
uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type=structured_dataset_type)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ParquetToPandasDecodingHandler(StructuredDatasetDecoder):
|
|
140
|
+
def __init__(self):
|
|
141
|
+
super().__init__(pd.DataFrame, None, PARQUET)
|
|
142
|
+
|
|
143
|
+
async def decode(
|
|
144
|
+
self,
|
|
145
|
+
flyte_value: literals_pb2.StructuredDataset,
|
|
146
|
+
current_task_metadata: literals_pb2.StructuredDatasetMetadata,
|
|
147
|
+
) -> "pd.DataFrame":
|
|
148
|
+
from botocore.exceptions import NoCredentialsError
|
|
149
|
+
|
|
150
|
+
uri = flyte_value.uri
|
|
151
|
+
columns = None
|
|
152
|
+
kwargs = get_pandas_storage_options(uri=uri, data_config=None)
|
|
153
|
+
if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
|
|
154
|
+
columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
|
|
155
|
+
try:
|
|
156
|
+
return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
|
|
157
|
+
except NoCredentialsError:
|
|
158
|
+
logger.debug("S3 source detected, attempting anonymous S3 access")
|
|
159
|
+
kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
|
|
160
|
+
return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class ArrowToParquetEncodingHandler(StructuredDatasetEncoder):
|
|
164
|
+
def __init__(self):
|
|
165
|
+
super().__init__(pa.Table, None, PARQUET)
|
|
166
|
+
|
|
167
|
+
async def encode(
|
|
168
|
+
self,
|
|
169
|
+
structured_dataset: StructuredDataset,
|
|
170
|
+
structured_dataset_type: types_pb2.StructuredDatasetType,
|
|
171
|
+
) -> literals_pb2.StructuredDataset:
|
|
172
|
+
import pyarrow.parquet as pq
|
|
173
|
+
|
|
174
|
+
if not structured_dataset.uri:
|
|
175
|
+
from flyte._context import internal_ctx
|
|
176
|
+
|
|
177
|
+
ctx = internal_ctx()
|
|
178
|
+
uri = ctx.raw_data.get_random_remote_path()
|
|
179
|
+
else:
|
|
180
|
+
uri = typing.cast(str, structured_dataset.uri)
|
|
181
|
+
|
|
182
|
+
if not storage.is_remote(uri):
|
|
183
|
+
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
184
|
+
path = os.path.join(uri, f"{0:05}")
|
|
185
|
+
filesystem = storage.get_underlying_filesystem(path=path)
|
|
186
|
+
pq.write_table(structured_dataset.dataframe, strip_protocol(path), filesystem=filesystem)
|
|
187
|
+
return literals_pb2.StructuredDataset(
|
|
188
|
+
uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class ParquetToArrowDecodingHandler(StructuredDatasetDecoder):
|
|
193
|
+
def __init__(self):
|
|
194
|
+
super().__init__(pa.Table, None, PARQUET)
|
|
195
|
+
|
|
196
|
+
async def decode(
|
|
197
|
+
self,
|
|
198
|
+
proto_value: literals_pb2.StructuredDataset,
|
|
199
|
+
current_task_metadata: literals_pb2.StructuredDatasetMetadata,
|
|
200
|
+
) -> "pa.Table":
|
|
201
|
+
import pyarrow.parquet as pq
|
|
202
|
+
from botocore.exceptions import NoCredentialsError
|
|
203
|
+
|
|
204
|
+
uri = proto_value.uri
|
|
205
|
+
if not storage.is_remote(uri):
|
|
206
|
+
Path(uri).parent.mkdir(parents=True, exist_ok=True)
|
|
207
|
+
_, path = split_protocol(uri)
|
|
208
|
+
|
|
209
|
+
columns = None
|
|
210
|
+
if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
|
|
211
|
+
columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
|
|
212
|
+
try:
|
|
213
|
+
return pq.read_table(path, columns=columns)
|
|
214
|
+
except NoCredentialsError as e:
|
|
215
|
+
logger.debug("S3 source detected, attempting anonymous S3 access")
|
|
216
|
+
fs = storage.get_underlying_filesystem(path=uri, anonymous=True)
|
|
217
|
+
if fs is not None:
|
|
218
|
+
return pq.read_table(path, filesystem=fs, columns=columns)
|
|
219
|
+
raise e
|