flyte 2.0.0b22__py3-none-any.whl → 2.0.0b23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (88) hide show
  1. flyte/__init__.py +5 -0
  2. flyte/_bin/runtime.py +35 -5
  3. flyte/_cache/cache.py +4 -2
  4. flyte/_cache/local_cache.py +215 -0
  5. flyte/_code_bundle/bundle.py +1 -0
  6. flyte/_debug/constants.py +0 -1
  7. flyte/_debug/vscode.py +6 -1
  8. flyte/_deploy.py +193 -52
  9. flyte/_environment.py +5 -0
  10. flyte/_excepthook.py +1 -1
  11. flyte/_image.py +101 -72
  12. flyte/_initialize.py +23 -0
  13. flyte/_internal/controllers/_local_controller.py +64 -24
  14. flyte/_internal/controllers/remote/_action.py +4 -1
  15. flyte/_internal/controllers/remote/_controller.py +5 -2
  16. flyte/_internal/controllers/remote/_core.py +6 -3
  17. flyte/_internal/controllers/remote/_informer.py +1 -1
  18. flyte/_internal/imagebuild/docker_builder.py +92 -28
  19. flyte/_internal/imagebuild/image_builder.py +7 -13
  20. flyte/_internal/imagebuild/remote_builder.py +6 -1
  21. flyte/_internal/runtime/io.py +13 -1
  22. flyte/_internal/runtime/rusty.py +17 -2
  23. flyte/_internal/runtime/task_serde.py +14 -20
  24. flyte/_internal/runtime/taskrunner.py +1 -1
  25. flyte/_internal/runtime/trigger_serde.py +153 -0
  26. flyte/_logging.py +1 -1
  27. flyte/_protos/common/identifier_pb2.py +19 -1
  28. flyte/_protos/common/identifier_pb2.pyi +22 -0
  29. flyte/_protos/workflow/common_pb2.py +14 -3
  30. flyte/_protos/workflow/common_pb2.pyi +49 -0
  31. flyte/_protos/workflow/queue_service_pb2.py +41 -35
  32. flyte/_protos/workflow/queue_service_pb2.pyi +26 -12
  33. flyte/_protos/workflow/queue_service_pb2_grpc.py +34 -0
  34. flyte/_protos/workflow/run_definition_pb2.py +38 -38
  35. flyte/_protos/workflow/run_definition_pb2.pyi +4 -2
  36. flyte/_protos/workflow/run_service_pb2.py +60 -50
  37. flyte/_protos/workflow/run_service_pb2.pyi +24 -6
  38. flyte/_protos/workflow/run_service_pb2_grpc.py +34 -0
  39. flyte/_protos/workflow/task_definition_pb2.py +15 -11
  40. flyte/_protos/workflow/task_definition_pb2.pyi +19 -2
  41. flyte/_protos/workflow/task_service_pb2.py +18 -17
  42. flyte/_protos/workflow/task_service_pb2.pyi +5 -2
  43. flyte/_protos/workflow/trigger_definition_pb2.py +66 -0
  44. flyte/_protos/workflow/trigger_definition_pb2.pyi +117 -0
  45. flyte/_protos/workflow/trigger_definition_pb2_grpc.py +4 -0
  46. flyte/_protos/workflow/trigger_service_pb2.py +96 -0
  47. flyte/_protos/workflow/trigger_service_pb2.pyi +110 -0
  48. flyte/_protos/workflow/trigger_service_pb2_grpc.py +281 -0
  49. flyte/_run.py +42 -15
  50. flyte/_task.py +35 -4
  51. flyte/_task_environment.py +60 -15
  52. flyte/_trigger.py +382 -0
  53. flyte/_version.py +3 -3
  54. flyte/cli/_abort.py +3 -3
  55. flyte/cli/_build.py +1 -3
  56. flyte/cli/_common.py +15 -2
  57. flyte/cli/_create.py +74 -0
  58. flyte/cli/_delete.py +23 -1
  59. flyte/cli/_deploy.py +5 -9
  60. flyte/cli/_get.py +75 -34
  61. flyte/cli/_params.py +4 -2
  62. flyte/cli/_run.py +12 -3
  63. flyte/cli/_update.py +36 -0
  64. flyte/cli/_user.py +17 -0
  65. flyte/cli/main.py +9 -1
  66. flyte/errors.py +9 -0
  67. flyte/io/_dir.py +513 -115
  68. flyte/io/_file.py +495 -135
  69. flyte/models.py +32 -0
  70. flyte/remote/__init__.py +6 -1
  71. flyte/remote/_client/_protocols.py +36 -2
  72. flyte/remote/_client/controlplane.py +19 -3
  73. flyte/remote/_run.py +42 -2
  74. flyte/remote/_task.py +14 -1
  75. flyte/remote/_trigger.py +308 -0
  76. flyte/remote/_user.py +33 -0
  77. flyte/storage/__init__.py +6 -1
  78. flyte/storage/_storage.py +119 -101
  79. flyte/types/_pickle.py +16 -3
  80. {flyte-2.0.0b22.data → flyte-2.0.0b23.data}/scripts/runtime.py +35 -5
  81. {flyte-2.0.0b22.dist-info → flyte-2.0.0b23.dist-info}/METADATA +3 -1
  82. {flyte-2.0.0b22.dist-info → flyte-2.0.0b23.dist-info}/RECORD +87 -75
  83. flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
  84. {flyte-2.0.0b22.data → flyte-2.0.0b23.data}/scripts/debug.py +0 -0
  85. {flyte-2.0.0b22.dist-info → flyte-2.0.0b23.dist-info}/WHEEL +0 -0
  86. {flyte-2.0.0b22.dist-info → flyte-2.0.0b23.dist-info}/entry_points.txt +0 -0
  87. {flyte-2.0.0b22.dist-info → flyte-2.0.0b23.dist-info}/licenses/LICENSE +0 -0
  88. {flyte-2.0.0b22.dist-info → flyte-2.0.0b23.dist-info}/top_level.txt +0 -0
flyte/io/_dir.py CHANGED
@@ -20,28 +20,176 @@ T = TypeVar("T")
20
20
  class Dir(BaseModel, Generic[T], SerializableType):
21
21
  """
22
22
  A generic directory class representing a directory with files of a specified format.
23
- Provides both async and sync interfaces for directory operations.
24
- Users are responsible for handling all I/O - the type transformer for Dir does not do any automatic uploading
25
- or downloading of files.
23
+ Provides both async and sync interfaces for directory operations. All methods without _sync suffix are async.
24
+
25
+ The class should be instantiated using one of the class methods. The constructor should only be used to
26
+ instantiate references to existing remote directories.
26
27
 
27
28
  The generic type T represents the format of the files in the directory.
28
29
 
29
- Example:
30
+ Important methods:
31
+ - `from_existing_remote`: Create a Dir object referencing an existing remote directory.
32
+ - `from_local` / `from_local_sync`: Upload a local directory to remote storage.
33
+
34
+ **Asynchronous methods**:
35
+ - `walk`: Asynchronously iterate through files in the directory.
36
+ - `list_files`: Asynchronously get a list of all files (non-recursive).
37
+ - `download`: Asynchronously download the entire directory to a local path.
38
+ - `exists`: Asynchronously check if the directory exists.
39
+ - `get_file`: Asynchronously get a specific file from the directory by name.
40
+
41
+ **Synchronous methods** (suffixed with `_sync`):
42
+ - `walk_sync`: Synchronously iterate through files in the directory.
43
+ - `list_files_sync`: Synchronously get a list of all files (non-recursive).
44
+ - `download_sync`: Synchronously download the entire directory to a local path.
45
+ - `exists_sync`: Synchronously check if the directory exists.
46
+ - `get_file_sync`: Synchronously get a specific file from the directory by name.
47
+
48
+ Example: Walk through directory files recursively (Async).
49
+
50
+ ```python
51
+ @env.task
52
+ async def process_all_files(d: Dir) -> int:
53
+ file_count = 0
54
+ async for file in d.walk(recursive=True):
55
+ async with file.open("rb") as f:
56
+ content = await f.read()
57
+ # Process content
58
+ file_count += 1
59
+ return file_count
60
+ ```
61
+
62
+ Example: Walk through directory files recursively (Sync).
63
+
64
+ ```python
65
+ @env.task
66
+ def process_all_files_sync(d: Dir) -> int:
67
+ file_count = 0
68
+ for file in d.walk_sync(recursive=True):
69
+ with file.open_sync("rb") as f:
70
+ content = f.read()
71
+ # Process content
72
+ file_count += 1
73
+ return file_count
74
+ ```
75
+
76
+ Example: List files in directory (Async).
77
+
78
+ ```python
79
+ @env.task
80
+ async def count_files(d: Dir) -> int:
81
+ files = await d.list_files()
82
+ return len(files)
83
+ ```
84
+
85
+ Example: List files in directory (Sync).
86
+
87
+ ```python
88
+ @env.task
89
+ def count_files_sync(d: Dir) -> int:
90
+ files = d.list_files_sync()
91
+ return len(files)
92
+ ```
93
+
94
+ Example: Get a specific file from directory (Async).
95
+
96
+ ```python
97
+ @env.task
98
+ async def read_config_file(d: Dir) -> str:
99
+ config_file = await d.get_file("config.json")
100
+ if config_file:
101
+ async with config_file.open("rb") as f:
102
+ return (await f.read()).decode("utf-8")
103
+ return "Config not found"
104
+ ```
105
+
106
+ Example: Get a specific file from directory (Sync).
107
+
108
+ ```python
109
+ @env.task
110
+ def read_config_file_sync(d: Dir) -> str:
111
+ config_file = d.get_file_sync("config.json")
112
+ if config_file:
113
+ with config_file.open_sync("rb") as f:
114
+ return f.read().decode("utf-8")
115
+ return "Config not found"
116
+ ```
117
+
118
+ Example: Upload a local directory to remote storage (Async).
119
+
120
+ ```python
121
+ @env.task
122
+ async def upload_directory() -> Dir:
123
+ # Create local directory with files
124
+ os.makedirs("/tmp/my_data", exist_ok=True)
125
+ with open("/tmp/my_data/file1.txt", "w") as f:
126
+ f.write("data1")
127
+ # Upload to remote storage
128
+ return await Dir.from_local("/tmp/my_data/")
129
+ ```
130
+
131
+ Example: Upload a local directory to remote storage (Sync).
132
+
133
+ ```python
134
+ @env.task
135
+ def upload_directory_sync() -> Dir:
136
+ # Create local directory with files
137
+ os.makedirs("/tmp/my_data", exist_ok=True)
138
+ with open("/tmp/my_data/file1.txt", "w") as f:
139
+ f.write("data1")
140
+ # Upload to remote storage
141
+ return Dir.from_local_sync("/tmp/my_data/")
142
+ ```
143
+
144
+ Example: Download a directory to local storage (Async).
145
+
146
+ ```python
147
+ @env.task
148
+ async def download_directory(d: Dir) -> str:
149
+ local_path = await d.download()
150
+ # Process files in local directory
151
+ return local_path
152
+ ```
153
+
154
+ Example: Download a directory to local storage (Sync).
155
+
30
156
  ```python
31
- # Async usage
32
- from pandas import DataFrame
33
- data_dir = Dir[DataFrame](path="s3://my-bucket/data/")
34
-
35
- # Walk through files
36
- async for file in data_dir.walk():
37
- async with file.open() as f:
38
- content = await f.read()
39
-
40
- # Sync alternative
41
- for file in data_dir.walk_sync():
42
- with file.open_sync() as f:
43
- content = f.read()
157
+ @env.task
158
+ def download_directory_sync(d: Dir) -> str:
159
+ local_path = d.download_sync()
160
+ # Process files in local directory
161
+ return local_path
44
162
  ```
163
+
164
+ Example: Reference an existing remote directory.
165
+
166
+ ```python
167
+ @env.task
168
+ async def process_existing_dir() -> int:
169
+ d = Dir.from_existing_remote("s3://my-bucket/data/")
170
+ files = await d.list_files()
171
+ return len(files)
172
+ ```
173
+
174
+ Example: Check if directory exists (Async).
175
+
176
+ ```python
177
+ @env.task
178
+ async def check_directory(d: Dir) -> bool:
179
+ return await d.exists()
180
+ ```
181
+
182
+ Example: Check if directory exists (Sync).
183
+
184
+ ```python
185
+ @env.task
186
+ def check_directory_sync(d: Dir) -> bool:
187
+ return d.exists_sync()
188
+ ```
189
+
190
+ Args:
191
+ path: The path to the directory (can be local or remote)
192
+ name: Optional name for the directory (defaults to basename of path)
45
193
  """
46
194
 
47
195
  # Represents either a local or remote path.
@@ -56,20 +204,24 @@ class Dir(BaseModel, Generic[T], SerializableType):
56
204
  @model_validator(mode="before")
57
205
  @classmethod
58
206
  def pre_init(cls, data):
207
+ """Internal: Pydantic validator to set default name from path. Not intended for direct use."""
59
208
  if data.get("name") is None:
60
209
  data["name"] = Path(data["path"]).name
61
210
  return data
62
211
 
63
212
  def _serialize(self) -> Dict[str, Optional[str]]:
213
+ """Internal: Serialize Dir to dictionary. Not intended for direct use."""
64
214
  pyd_dump = self.model_dump()
65
215
  return pyd_dump
66
216
 
67
217
  @classmethod
68
218
  def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> Dir:
219
+ """Internal: Deserialize Dir from dictionary. Not intended for direct use."""
69
220
  return cls.model_validate(file_dump)
70
221
 
71
222
  @classmethod
72
223
  def schema_match(cls, incoming: dict):
224
+ """Internal: Check if incoming schema matches Dir schema. Not intended for direct use."""
73
225
  this_schema = cls.model_json_schema()
74
226
  current_required = this_schema.get("required")
75
227
  incoming_required = incoming.get("required")
@@ -86,19 +238,48 @@ class Dir(BaseModel, Generic[T], SerializableType):
86
238
  """
87
239
  Asynchronously walk through the directory and yield File objects.
88
240
 
89
- Args:
90
- recursive: If True, recursively walk subdirectories
91
- max_depth: Maximum depth for recursive walking
241
+ Use this to iterate through all files in a directory. Each yielded File can be read directly without
242
+ downloading.
92
243
 
93
- Yields:
94
- File objects for each file found in the directory
244
+ Example (Async - Recursive):
245
+
246
+ ```python
247
+ @env.task
248
+ async def list_all_files(d: Dir) -> list[str]:
249
+ file_names = []
250
+ async for file in d.walk(recursive=True):
251
+ file_names.append(file.name)
252
+ return file_names
253
+ ```
254
+
255
+ Example (Async - Non-recursive):
95
256
 
96
- Example:
97
257
  ```python
98
- async for file in directory.walk():
99
- local_path = await file.download()
100
- # Process the file
258
+ @env.task
259
+ async def list_top_level_files(d: Dir) -> list[str]:
260
+ file_names = []
261
+ async for file in d.walk(recursive=False):
262
+ file_names.append(file.name)
263
+ return file_names
101
264
  ```
265
+
266
+ Example (Async - With max depth):
267
+
268
+ ```python
269
+ @env.task
270
+ async def list_files_max_depth(d: Dir) -> list[str]:
271
+ file_names = []
272
+ async for file in d.walk(recursive=True, max_depth=2):
273
+ file_names.append(file.name)
274
+ return file_names
275
+ ```
276
+
277
+ Args:
278
+ recursive: If True, recursively walk subdirectories. If False, only list files in the top-level directory.
279
+ max_depth: Maximum depth for recursive walking. If None, walk through all subdirectories.
280
+
281
+ Yields:
282
+ File objects for each file found in the directory
102
283
  """
103
284
  fs = storage.get_underlying_filesystem(path=self.path)
104
285
  if recursive is False:
@@ -125,20 +306,48 @@ class Dir(BaseModel, Generic[T], SerializableType):
125
306
  """
126
307
  Synchronously walk through the directory and yield File objects.
127
308
 
128
- Args:
129
- recursive: If True, recursively walk subdirectories
130
- file_pattern: Glob pattern to filter files
131
- max_depth: Maximum depth for recursive walking
309
+ Use this in non-async tasks to iterate through all files in a directory.
132
310
 
133
- Yields:
134
- File objects for each file found in the directory
311
+ Example (Sync - Recursive):
312
+
313
+ ```python
314
+ @env.task
315
+ def list_all_files_sync(d: Dir) -> list[str]:
316
+ file_names = []
317
+ for file in d.walk_sync(recursive=True):
318
+ file_names.append(file.name)
319
+ return file_names
320
+ ```
321
+
322
+ Example (Sync - With file pattern):
323
+
324
+ ```python
325
+ @env.task
326
+ def list_text_files(d: Dir) -> list[str]:
327
+ file_names = []
328
+ for file in d.walk_sync(recursive=True, file_pattern="*.txt"):
329
+ file_names.append(file.name)
330
+ return file_names
331
+ ```
332
+
333
+ Example (Sync - Non-recursive with max depth):
135
334
 
136
- Example:
137
335
  ```python
138
- for file in directory.walk_sync():
139
- local_path = file.download_sync()
140
- # Process the file
336
+ @env.task
337
+ def list_files_limited(d: Dir) -> list[str]:
338
+ file_names = []
339
+ for file in d.walk_sync(recursive=True, max_depth=2):
340
+ file_names.append(file.name)
341
+ return file_names
141
342
  ```
343
+
344
+ Args:
345
+ recursive: If True, recursively walk subdirectories. If False, only list files in the top-level directory.
346
+ file_pattern: Glob pattern to filter files (e.g., "*.txt", "*.csv"). Default is "*" (all files).
347
+ max_depth: Maximum depth for recursive walking. If None, walk through all subdirectories.
348
+
349
+ Yields:
350
+ File objects for each file found in the directory
142
351
  """
143
352
  fs = storage.get_underlying_filesystem(path=self.path)
144
353
  for parent, _, files in fs.walk(self.path, maxdepth=max_depth):
@@ -153,14 +362,32 @@ class Dir(BaseModel, Generic[T], SerializableType):
153
362
  """
154
363
  Asynchronously get a list of all files in the directory (non-recursive).
155
364
 
365
+ Use this when you need a list of all files in the top-level directory at once.
366
+
156
367
  Returns:
157
- A list of File objects
368
+ A list of File objects for files in the top-level directory
369
+
370
+ Example (Async):
158
371
 
159
- Example:
160
372
  ```python
161
- files = await directory.list_files()
162
- for file in files:
163
- # Process the file
373
+ @env.task
374
+ async def count_files(d: Dir) -> int:
375
+ files = await d.list_files()
376
+ return len(files)
377
+ ```
378
+
379
+ Example (Async - Process files):
380
+
381
+ ```python
382
+ @env.task
383
+ async def process_all_files(d: Dir) -> list[str]:
384
+ files = await d.list_files()
385
+ contents = []
386
+ for file in files:
387
+ async with file.open("rb") as f:
388
+ content = await f.read()
389
+ contents.append(content.decode("utf-8"))
390
+ return contents
164
391
  ```
165
392
  """
166
393
  # todo: this should probably also just defer to fsspec.find()
@@ -173,14 +400,32 @@ class Dir(BaseModel, Generic[T], SerializableType):
173
400
  """
174
401
  Synchronously get a list of all files in the directory (non-recursive).
175
402
 
403
+ Use this in non-async tasks when you need a list of all files in the top-level directory at once.
404
+
176
405
  Returns:
177
- A list of File objects
406
+ A list of File objects for files in the top-level directory
407
+
408
+ Example (Sync):
178
409
 
179
- Example:
180
410
  ```python
181
- files = directory.list_files_sync()
182
- for file in files:
183
- # Process the file
411
+ @env.task
412
+ def count_files_sync(d: Dir) -> int:
413
+ files = d.list_files_sync()
414
+ return len(files)
415
+ ```
416
+
417
+ Example (Sync - Process files):
418
+
419
+ ```python
420
+ @env.task
421
+ def process_all_files_sync(d: Dir) -> list[str]:
422
+ files = d.list_files_sync()
423
+ contents = []
424
+ for file in files:
425
+ with file.open_sync("rb") as f:
426
+ content = f.read()
427
+ contents.append(content.decode("utf-8"))
428
+ return contents
184
429
  ```
185
430
  """
186
431
  return list(self.walk_sync(recursive=False))
@@ -189,17 +434,33 @@ class Dir(BaseModel, Generic[T], SerializableType):
189
434
  """
190
435
  Asynchronously download the entire directory to a local path.
191
436
 
192
- Args:
193
- local_path: The local path to download the directory to. If None, a temporary
194
- directory will be used.
437
+ Use this when you need to download all files in a directory to your local filesystem for processing.
195
438
 
196
- Returns:
197
- The path to the downloaded directory
439
+ Example (Async):
198
440
 
199
- Example:
200
441
  ```python
201
- local_dir = await directory.download('/tmp/my_data/')
442
+ @env.task
443
+ async def download_directory(d: Dir) -> str:
444
+ local_dir = await d.download()
445
+ # Process files in the local directory
446
+ return local_dir
202
447
  ```
448
+
449
+ Example (Async - Download to specific path):
450
+
451
+ ```python
452
+ @env.task
453
+ async def download_to_path(d: Dir) -> str:
454
+ local_dir = await d.download("/tmp/my_data/")
455
+ return local_dir
456
+ ```
457
+
458
+ Args:
459
+ local_path: The local path to download the directory to. If None, a temporary
460
+ directory will be used and a path will be generated.
461
+
462
+ Returns:
463
+ The absolute path to the downloaded directory
203
464
  """
204
465
  local_dest = str(local_path) if local_path else str(storage.get_random_local_path())
205
466
  if not storage.is_remote(self.path):
@@ -222,17 +483,32 @@ class Dir(BaseModel, Generic[T], SerializableType):
222
483
  """
223
484
  Synchronously download the entire directory to a local path.
224
485
 
225
- Args:
226
- local_path: The local path to download the directory to. If None, a temporary
227
- directory will be used.
486
+ Use this in non-async tasks when you need to download all files in a directory to your local filesystem.
228
487
 
229
- Returns:
230
- The path to the downloaded directory
488
+ Example (Sync):
231
489
 
232
- Example:
233
490
  ```python
234
- local_dir = directory.download_sync('/tmp/my_data/')
491
+ @env.task
492
+ def download_directory_sync(d: Dir) -> str:
493
+ local_dir = d.download_sync()
494
+ # Process files in the local directory
495
+ return local_dir
235
496
  ```
497
+
498
+ Example (Sync - Download to specific path):
499
+
500
+ ```python
501
+ @env.task
502
+ def download_to_path_sync(d: Dir) -> str:
503
+ local_dir = d.download_sync("/tmp/my_data/")
504
+ return local_dir
505
+ ```
506
+ Args:
507
+ local_path: The local path to download the directory to. If None, a temporary
508
+ directory will be used and a path will be generated.
509
+
510
+ Returns:
511
+ The absolute path to the downloaded directory
236
512
  """
237
513
  local_dest = str(local_path) if local_path else str(storage.get_random_local_path())
238
514
  if not storage.is_remote(self.path):
@@ -245,8 +521,9 @@ class Dir(BaseModel, Generic[T], SerializableType):
245
521
 
246
522
  shutil.copytree(self.path, local_dest, dirs_exist_ok=True)
247
523
 
248
- # Figure this out when we figure out the final sync story
249
- raise NotImplementedError("Sync download is not implemented for remote paths")
524
+ fs = storage.get_underlying_filesystem(path=self.path)
525
+ fs.get(self.path, local_dest, recursive=True)
526
+ return local_dest
250
527
 
251
528
  @classmethod
252
529
  async def from_local(
@@ -256,23 +533,51 @@ class Dir(BaseModel, Generic[T], SerializableType):
256
533
  dir_cache_key: Optional[str] = None,
257
534
  ) -> Dir[T]:
258
535
  """
259
- Asynchronously create a new Dir by uploading a local directory to the configured remote store.
536
+ Asynchronously create a new Dir by uploading a local directory to remote storage.
537
+
538
+ Use this in async tasks when you have a local directory that needs to be uploaded to remote storage.
539
+
540
+ Example (Async):
541
+
542
+ ```python
543
+ @env.task
544
+ async def upload_local_directory() -> Dir:
545
+ # Create a local directory with files
546
+ os.makedirs("/tmp/data_dir", exist_ok=True)
547
+ with open("/tmp/data_dir/file1.txt", "w") as f:
548
+ f.write("data1")
549
+
550
+ # Upload to remote storage
551
+ remote_dir = await Dir.from_local("/tmp/data_dir/")
552
+ return remote_dir
553
+ ```
260
554
 
555
+ Example (Async - With specific destination):
556
+
557
+ ```python
558
+ @env.task
559
+ async def upload_to_specific_path() -> Dir:
560
+ remote_dir = await Dir.from_local("/tmp/data_dir/", "s3://my-bucket/data/")
561
+ return remote_dir
562
+ ```
563
+
564
+ Example (Async - With cache key):
565
+
566
+ ```python
567
+ @env.task
568
+ async def upload_with_cache_key() -> Dir:
569
+ remote_dir = await Dir.from_local("/tmp/data_dir/", dir_cache_key="my_cache_key_123")
570
+ return remote_dir
571
+ ```
261
572
  Args:
262
573
  local_path: Path to the local directory
263
- remote_path: Optional path to store the directory remotely. If None, a path will be generated.
264
- dir_cache_key: If you have a precomputed hash value you want to use when computing cache keys for
265
- discoverable tasks that this File is an input to.
574
+ remote_path: Optional remote path to store the directory. If None, a path will be automatically generated.
575
+ dir_cache_key: Optional precomputed hash value to use for cache key computation when this Dir is used
576
+ as an input to discoverable tasks. If not specified, the cache key will be based on
577
+ directory attributes.
266
578
 
267
579
  Returns:
268
580
  A new Dir instance pointing to the uploaded directory
269
-
270
- Example:
271
- ```python
272
- remote_dir = await Dir[DataFrame].from_local('/tmp/data_dir/', 's3://bucket/data/')
273
- # With a known hash value you want to use for cache key calculation
274
- remote_dir = await Dir[DataFrame].from_local('/tmp/data_dir/', 's3://bucket/data/', dir_cache_key='abc123')
275
- ```
276
581
  """
277
582
  local_path_str = str(local_path)
278
583
  dirname = os.path.basename(os.path.normpath(local_path_str))
@@ -281,43 +586,108 @@ class Dir(BaseModel, Generic[T], SerializableType):
281
586
  return cls(path=output_path, name=dirname, hash=dir_cache_key)
282
587
 
283
588
  @classmethod
284
- def from_existing_remote(cls, remote_path: str, dir_cache_key: Optional[str] = None) -> Dir[T]:
589
+ def from_local_sync(
590
+ cls,
591
+ local_path: Union[str, Path],
592
+ remote_path: Optional[str] = None,
593
+ dir_cache_key: Optional[str] = None,
594
+ ) -> Dir[T]:
285
595
  """
286
- Create a Dir reference from an existing remote directory.
596
+ Synchronously create a new Dir by uploading a local directory to remote storage.
287
597
 
288
- Args:
289
- remote_path: The remote path to the existing directory
290
- dir_cache_key: Optional hash value to use for cache key computation. If not specified,
291
- the cache key will be computed based on this object's attributes.
598
+ Use this in non-async tasks when you have a local directory that needs to be uploaded to remote storage.
599
+
600
+ Example (Sync):
292
601
 
293
- Example:
294
602
  ```python
295
- remote_dir = Dir.from_existing_remote("s3://bucket/data/")
296
- # With a known hash
297
- remote_dir = Dir.from_existing_remote("s3://bucket/data/", dir_cache_key="abc123")
603
+ @env.task
604
+ def upload_local_directory_sync() -> Dir:
605
+ # Create a local directory with files
606
+ os.makedirs("/tmp/data_dir", exist_ok=True)
607
+ with open("/tmp/data_dir/file1.txt", "w") as f:
608
+ f.write("data1")
609
+
610
+ # Upload to remote storage
611
+ remote_dir = Dir.from_local_sync("/tmp/data_dir/")
612
+ return remote_dir
298
613
  ```
299
- """
300
- return cls(path=remote_path, hash=dir_cache_key)
301
614
 
302
- @classmethod
303
- def from_local_sync(cls, local_path: Union[str, Path], remote_path: Optional[str] = None) -> Dir[T]:
304
- """
305
- Synchronously create a new Dir by uploading a local directory to the configured remote store.
615
+ Example (Sync - With specific destination):
616
+
617
+ ```python
618
+ @env.task
619
+ def upload_to_specific_path_sync() -> Dir:
620
+ remote_dir = Dir.from_local_sync("/tmp/data_dir/", "s3://my-bucket/data/")
621
+ return remote_dir
622
+ ```
623
+
624
+ Example (Sync - With cache key):
625
+
626
+ ```python
627
+ @env.task
628
+ def upload_with_cache_key_sync() -> Dir:
629
+ remote_dir = Dir.from_local_sync("/tmp/data_dir/", dir_cache_key="my_cache_key_123")
630
+ return remote_dir
631
+ ```
306
632
 
307
633
  Args:
308
634
  local_path: Path to the local directory
309
- remote_path: Optional path to store the directory remotely. If None, a path will be generated.
635
+ remote_path: Optional remote path to store the directory. If None, a path will be automatically generated.
636
+ dir_cache_key: Optional precomputed hash value to use for cache key computation when this Dir is used
637
+ as an input to discoverable tasks. If not specified, the cache key will be based on
638
+ directory attributes.
310
639
 
311
640
  Returns:
312
641
  A new Dir instance pointing to the uploaded directory
642
+ """
643
+ local_path_str = str(local_path)
644
+ dirname = os.path.basename(os.path.normpath(local_path_str))
645
+
646
+ if not remote_path:
647
+ from flyte._context import internal_ctx
648
+
649
+ ctx = internal_ctx()
650
+ remote_path = ctx.raw_data.get_random_remote_path(dirname)
651
+ fs = storage.get_underlying_filesystem(path=remote_path)
652
+ fs.put(local_path_str, remote_path, recursive=True)
653
+ return cls(path=remote_path, name=dirname, hash=dir_cache_key)
654
+
655
+ @classmethod
656
+ def from_existing_remote(cls, remote_path: str, dir_cache_key: Optional[str] = None) -> Dir[T]:
657
+ """
658
+ Create a Dir reference from an existing remote directory.
659
+
660
+ Use this when you want to reference a directory that already exists in remote storage without uploading it.
313
661
 
314
662
  Example:
663
+
315
664
  ```python
316
- remote_dir = Dir[DataFrame].from_local_sync('/tmp/data_dir/', 's3://bucket/data/')
665
+ @env.task
666
+ async def process_existing_directory() -> int:
667
+ d = Dir.from_existing_remote("s3://my-bucket/data/")
668
+ files = await d.list_files()
669
+ return len(files)
317
670
  ```
671
+
672
+ Example (With cache key):
673
+
674
+ ```python
675
+ @env.task
676
+ async def process_with_cache_key() -> int:
677
+ d = Dir.from_existing_remote("s3://my-bucket/data/", dir_cache_key="abc123")
678
+ files = await d.list_files()
679
+ return len(files)
680
+ ```
681
+
682
+ Args:
683
+ remote_path: The remote path to the existing directory
684
+ dir_cache_key: Optional hash value to use for cache key computation. If not specified,
685
+ the cache key will be computed based on the directory's attributes.
686
+
687
+ Returns:
688
+ A new Dir instance pointing to the existing remote directory
318
689
  """
319
- # Implement this after we figure out the final sync story
320
- raise NotImplementedError("Sync upload is not implemented for remote paths")
690
+ return cls(path=remote_path, hash=dir_cache_key)
321
691
 
322
692
  async def exists(self) -> bool:
323
693
  """
@@ -326,10 +696,15 @@ class Dir(BaseModel, Generic[T], SerializableType):
326
696
  Returns:
327
697
  True if the directory exists, False otherwise
328
698
 
329
- Example:
699
+ Example (Async):
700
+
330
701
  ```python
331
- if await directory.exists():
332
- # Process the directory
702
+ @env.task
703
+ async def check_directory(d: Dir) -> bool:
704
+ if await d.exists():
705
+ print("Directory exists!")
706
+ return True
707
+ return False
333
708
  ```
334
709
  """
335
710
  fs = storage.get_underlying_filesystem(path=self.path)
@@ -342,13 +717,20 @@ class Dir(BaseModel, Generic[T], SerializableType):
342
717
  """
343
718
  Synchronously check if the directory exists.
344
719
 
720
+ Use this in non-async tasks or when you need synchronous directory existence checking.
721
+
345
722
  Returns:
346
723
  True if the directory exists, False otherwise
347
724
 
348
- Example:
725
+ Example (Sync):
726
+
349
727
  ```python
350
- if directory.exists_sync():
351
- # Process the directory
728
+ @env.task
729
+ def check_directory_sync(d: Dir) -> bool:
730
+ if d.exists_sync():
731
+ print("Directory exists!")
732
+ return True
733
+ return False
352
734
  ```
353
735
  """
354
736
  fs = storage.get_underlying_filesystem(path=self.path)
@@ -356,20 +738,28 @@ class Dir(BaseModel, Generic[T], SerializableType):
356
738
 
357
739
  async def get_file(self, file_name: str) -> Optional[File[T]]:
358
740
  """
359
- Asynchronously get a specific file from the directory.
741
+ Asynchronously get a specific file from the directory by name.
742
+
743
+ Use this when you know the name of a specific file in the directory you want to access.
744
+
745
+ Example (Async):
746
+
747
+ ```python
748
+ @env.task
749
+ async def read_specific_file(d: Dir) -> str:
750
+ file = await d.get_file("data.csv")
751
+ if file:
752
+ async with file.open("rb") as f:
753
+ content = await f.read()
754
+ return content.decode("utf-8")
755
+ return "File not found"
756
+ ```
360
757
 
361
758
  Args:
362
759
  file_name: The name of the file to get
363
760
 
364
761
  Returns:
365
762
  A File instance if the file exists, None otherwise
366
-
367
- Example:
368
- ```python
369
- file = await directory.get_file("data.csv")
370
- if file:
371
- # Process the file
372
- ```
373
763
  """
374
764
  fs = storage.get_underlying_filesystem(path=self.path)
375
765
  file_path = fs.sep.join([self.path, file_name])
@@ -381,20 +771,28 @@ class Dir(BaseModel, Generic[T], SerializableType):
381
771
 
382
772
  def get_file_sync(self, file_name: str) -> Optional[File[T]]:
383
773
  """
384
- Synchronously get a specific file from the directory.
774
+ Synchronously get a specific file from the directory by name.
775
+
776
+ Use this in non-async tasks when you know the name of a specific file in the directory you want to access.
777
+
778
+ Example (Sync):
779
+
780
+ ```python
781
+ @env.task
782
+ def read_specific_file_sync(d: Dir) -> str:
783
+ file = d.get_file_sync("data.csv")
784
+ if file:
785
+ with file.open_sync("rb") as f:
786
+ content = f.read()
787
+ return content.decode("utf-8")
788
+ return "File not found"
789
+ ```
385
790
 
386
791
  Args:
387
792
  file_name: The name of the file to get
388
793
 
389
794
  Returns:
390
795
  A File instance if the file exists, None otherwise
391
-
392
- Example:
393
- ```python
394
- file = directory.get_file_sync("data.csv")
395
- if file:
396
- # Process the file
397
- ```
398
796
  """
399
797
  file_path = os.path.join(self.path, file_name)
400
798
  file = File[T](path=file_path)