opendal 0.1.6.pre.rc.1-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +20 -0
  3. data/.tool-versions +1 -0
  4. data/.yardopts +1 -0
  5. data/DEPENDENCIES.md +9 -0
  6. data/DEPENDENCIES.rust.tsv +277 -0
  7. data/Gemfile +35 -0
  8. data/README.md +159 -0
  9. data/Rakefile +149 -0
  10. data/core/CHANGELOG.md +4929 -0
  11. data/core/CONTRIBUTING.md +61 -0
  12. data/core/DEPENDENCIES.md +3 -0
  13. data/core/DEPENDENCIES.rust.tsv +185 -0
  14. data/core/LICENSE +201 -0
  15. data/core/README.md +228 -0
  16. data/core/benches/README.md +18 -0
  17. data/core/benches/ops/README.md +26 -0
  18. data/core/benches/types/README.md +9 -0
  19. data/core/benches/vs_fs/README.md +35 -0
  20. data/core/benches/vs_s3/README.md +55 -0
  21. data/core/edge/README.md +3 -0
  22. data/core/edge/file_write_on_full_disk/README.md +14 -0
  23. data/core/edge/s3_aws_assume_role_with_web_identity/README.md +18 -0
  24. data/core/edge/s3_read_on_wasm/.gitignore +3 -0
  25. data/core/edge/s3_read_on_wasm/README.md +42 -0
  26. data/core/edge/s3_read_on_wasm/webdriver.json +15 -0
  27. data/core/examples/README.md +23 -0
  28. data/core/examples/basic/README.md +15 -0
  29. data/core/examples/concurrent-upload/README.md +15 -0
  30. data/core/examples/multipart-upload/README.md +15 -0
  31. data/core/fuzz/.gitignore +5 -0
  32. data/core/fuzz/README.md +68 -0
  33. data/core/src/docs/comparisons/vs_object_store.md +183 -0
  34. data/core/src/docs/performance/concurrent_write.md +101 -0
  35. data/core/src/docs/performance/http_optimization.md +124 -0
  36. data/core/src/docs/rfcs/0000_example.md +74 -0
  37. data/core/src/docs/rfcs/0000_foyer_integration.md +111 -0
  38. data/core/src/docs/rfcs/0041_object_native_api.md +185 -0
  39. data/core/src/docs/rfcs/0044_error_handle.md +198 -0
  40. data/core/src/docs/rfcs/0057_auto_region.md +160 -0
  41. data/core/src/docs/rfcs/0069_object_stream.md +145 -0
  42. data/core/src/docs/rfcs/0090_limited_reader.md +155 -0
  43. data/core/src/docs/rfcs/0112_path_normalization.md +79 -0
  44. data/core/src/docs/rfcs/0191_async_streaming_io.md +328 -0
  45. data/core/src/docs/rfcs/0203_remove_credential.md +96 -0
  46. data/core/src/docs/rfcs/0221_create_dir.md +89 -0
  47. data/core/src/docs/rfcs/0247_retryable_error.md +87 -0
  48. data/core/src/docs/rfcs/0293_object_id.md +67 -0
  49. data/core/src/docs/rfcs/0337_dir_entry.md +191 -0
  50. data/core/src/docs/rfcs/0409_accessor_capabilities.md +67 -0
  51. data/core/src/docs/rfcs/0413_presign.md +154 -0
  52. data/core/src/docs/rfcs/0423_command_line_interface.md +268 -0
  53. data/core/src/docs/rfcs/0429_init_from_iter.md +107 -0
  54. data/core/src/docs/rfcs/0438_multipart.md +163 -0
  55. data/core/src/docs/rfcs/0443_gateway.md +73 -0
  56. data/core/src/docs/rfcs/0501_new_builder.md +111 -0
  57. data/core/src/docs/rfcs/0554_write_refactor.md +96 -0
  58. data/core/src/docs/rfcs/0561_list_metadata_reuse.md +210 -0
  59. data/core/src/docs/rfcs/0599_blocking_api.md +157 -0
  60. data/core/src/docs/rfcs/0623_redis_service.md +300 -0
  61. data/core/src/docs/rfcs/0627_split_capabilities.md +89 -0
  62. data/core/src/docs/rfcs/0661_path_in_accessor.md +126 -0
  63. data/core/src/docs/rfcs/0793_generic_kv_services.md +209 -0
  64. data/core/src/docs/rfcs/0926_object_reader.md +93 -0
  65. data/core/src/docs/rfcs/0977_refactor_error.md +151 -0
  66. data/core/src/docs/rfcs/1085_object_handler.md +73 -0
  67. data/core/src/docs/rfcs/1391_object_metadataer.md +110 -0
  68. data/core/src/docs/rfcs/1398_query_based_metadata.md +125 -0
  69. data/core/src/docs/rfcs/1420_object_writer.md +147 -0
  70. data/core/src/docs/rfcs/1477_remove_object_concept.md +159 -0
  71. data/core/src/docs/rfcs/1735_operation_extension.md +117 -0
  72. data/core/src/docs/rfcs/2083_writer_sink_api.md +106 -0
  73. data/core/src/docs/rfcs/2133_append_api.md +88 -0
  74. data/core/src/docs/rfcs/2299_chain_based_operator_api.md +99 -0
  75. data/core/src/docs/rfcs/2602_object_versioning.md +138 -0
  76. data/core/src/docs/rfcs/2758_merge_append_into_write.md +79 -0
  77. data/core/src/docs/rfcs/2774_lister_api.md +66 -0
  78. data/core/src/docs/rfcs/2779_list_with_metakey.md +143 -0
  79. data/core/src/docs/rfcs/2852_native_capability.md +58 -0
  80. data/core/src/docs/rfcs/2884_merge_range_read_into_read.md +80 -0
  81. data/core/src/docs/rfcs/3017_remove_write_copy_from.md +94 -0
  82. data/core/src/docs/rfcs/3197_config.md +237 -0
  83. data/core/src/docs/rfcs/3232_align_list_api.md +69 -0
  84. data/core/src/docs/rfcs/3243_list_prefix.md +128 -0
  85. data/core/src/docs/rfcs/3356_lazy_reader.md +111 -0
  86. data/core/src/docs/rfcs/3526_list_recursive.md +59 -0
  87. data/core/src/docs/rfcs/3574_concurrent_stat_in_list.md +80 -0
  88. data/core/src/docs/rfcs/3734_buffered_reader.md +64 -0
  89. data/core/src/docs/rfcs/3898_concurrent_writer.md +66 -0
  90. data/core/src/docs/rfcs/3911_deleter_api.md +165 -0
  91. data/core/src/docs/rfcs/4382_range_based_read.md +213 -0
  92. data/core/src/docs/rfcs/4638_executor.md +215 -0
  93. data/core/src/docs/rfcs/5314_remove_metakey.md +120 -0
  94. data/core/src/docs/rfcs/5444_operator_from_uri.md +162 -0
  95. data/core/src/docs/rfcs/5479_context.md +140 -0
  96. data/core/src/docs/rfcs/5485_conditional_reader.md +112 -0
  97. data/core/src/docs/rfcs/5495_list_with_deleted.md +81 -0
  98. data/core/src/docs/rfcs/5556_write_returns_metadata.md +121 -0
  99. data/core/src/docs/rfcs/5871_read_returns_metadata.md +112 -0
  100. data/core/src/docs/rfcs/6189_remove_native_blocking.md +106 -0
  101. data/core/src/docs/rfcs/6209_glob_support.md +132 -0
  102. data/core/src/docs/rfcs/6213_options_api.md +142 -0
  103. data/core/src/docs/rfcs/README.md +62 -0
  104. data/core/src/docs/upgrade.md +1556 -0
  105. data/core/src/services/aliyun_drive/docs.md +61 -0
  106. data/core/src/services/alluxio/docs.md +45 -0
  107. data/core/src/services/azblob/docs.md +77 -0
  108. data/core/src/services/azdls/docs.md +73 -0
  109. data/core/src/services/azfile/docs.md +65 -0
  110. data/core/src/services/b2/docs.md +54 -0
  111. data/core/src/services/cacache/docs.md +38 -0
  112. data/core/src/services/cloudflare_kv/docs.md +21 -0
  113. data/core/src/services/cos/docs.md +55 -0
  114. data/core/src/services/d1/docs.md +48 -0
  115. data/core/src/services/dashmap/docs.md +38 -0
  116. data/core/src/services/dbfs/docs.md +57 -0
  117. data/core/src/services/dropbox/docs.md +64 -0
  118. data/core/src/services/etcd/docs.md +45 -0
  119. data/core/src/services/foundationdb/docs.md +42 -0
  120. data/core/src/services/fs/docs.md +49 -0
  121. data/core/src/services/ftp/docs.md +42 -0
  122. data/core/src/services/gcs/docs.md +76 -0
  123. data/core/src/services/gdrive/docs.md +65 -0
  124. data/core/src/services/ghac/docs.md +84 -0
  125. data/core/src/services/github/docs.md +52 -0
  126. data/core/src/services/gridfs/docs.md +46 -0
  127. data/core/src/services/hdfs/docs.md +140 -0
  128. data/core/src/services/hdfs_native/docs.md +35 -0
  129. data/core/src/services/http/docs.md +45 -0
  130. data/core/src/services/huggingface/docs.md +61 -0
  131. data/core/src/services/ipfs/docs.md +45 -0
  132. data/core/src/services/ipmfs/docs.md +14 -0
  133. data/core/src/services/koofr/docs.md +51 -0
  134. data/core/src/services/lakefs/docs.md +62 -0
  135. data/core/src/services/memcached/docs.md +47 -0
  136. data/core/src/services/memory/docs.md +36 -0
  137. data/core/src/services/mini_moka/docs.md +19 -0
  138. data/core/src/services/moka/docs.md +42 -0
  139. data/core/src/services/mongodb/docs.md +49 -0
  140. data/core/src/services/monoiofs/docs.md +46 -0
  141. data/core/src/services/mysql/docs.md +47 -0
  142. data/core/src/services/obs/docs.md +54 -0
  143. data/core/src/services/onedrive/docs.md +115 -0
  144. data/core/src/services/opfs/docs.md +18 -0
  145. data/core/src/services/oss/docs.md +74 -0
  146. data/core/src/services/pcloud/docs.md +51 -0
  147. data/core/src/services/persy/docs.md +43 -0
  148. data/core/src/services/postgresql/docs.md +47 -0
  149. data/core/src/services/redb/docs.md +41 -0
  150. data/core/src/services/redis/docs.md +43 -0
  151. data/core/src/services/rocksdb/docs.md +54 -0
  152. data/core/src/services/s3/compatible_services.md +126 -0
  153. data/core/src/services/s3/docs.md +244 -0
  154. data/core/src/services/seafile/docs.md +54 -0
  155. data/core/src/services/sftp/docs.md +49 -0
  156. data/core/src/services/sled/docs.md +39 -0
  157. data/core/src/services/sqlite/docs.md +46 -0
  158. data/core/src/services/surrealdb/docs.md +54 -0
  159. data/core/src/services/swift/compatible_services.md +53 -0
  160. data/core/src/services/swift/docs.md +52 -0
  161. data/core/src/services/tikv/docs.md +43 -0
  162. data/core/src/services/upyun/docs.md +51 -0
  163. data/core/src/services/vercel_artifacts/docs.md +40 -0
  164. data/core/src/services/vercel_blob/docs.md +45 -0
  165. data/core/src/services/webdav/docs.md +49 -0
  166. data/core/src/services/webhdfs/docs.md +90 -0
  167. data/core/src/services/yandex_disk/docs.md +45 -0
  168. data/core/tests/behavior/README.md +77 -0
  169. data/core/tests/data/normal_dir/.gitkeep +0 -0
  170. data/core/tests/data/normal_file.txt +1041 -0
  171. data/core/tests/data/special_dir !@#$%^&()_+-=;',/.gitkeep +0 -0
  172. data/core/tests/data/special_file !@#$%^&()_+-=;',.txt +1041 -0
  173. data/core/users.md +13 -0
  174. data/extconf.rb +24 -0
  175. data/lib/opendal.rb +25 -0
  176. data/lib/opendal_ruby/entry.rb +35 -0
  177. data/lib/opendal_ruby/io.rb +70 -0
  178. data/lib/opendal_ruby/metadata.rb +44 -0
  179. data/lib/opendal_ruby/opendal_ruby.so +0 -0
  180. data/lib/opendal_ruby/operator.rb +29 -0
  181. data/lib/opendal_ruby/operator_info.rb +26 -0
  182. data/opendal.gemspec +91 -0
  183. data/test/blocking_op_test.rb +112 -0
  184. data/test/capability_test.rb +42 -0
  185. data/test/io_test.rb +172 -0
  186. data/test/lister_test.rb +77 -0
  187. data/test/metadata_test.rb +78 -0
  188. data/test/middlewares_test.rb +46 -0
  189. data/test/operator_info_test.rb +35 -0
  190. data/test/test_helper.rb +36 -0
  191. metadata +240 -0
@@ -0,0 +1,213 @@
1
+ - Proposal Name: `range_based_read`
2
+ - Start Date: 2024-03-20
3
+ - RFC PR: [apache/opendal#4382](https://github.com/apache/opendal/pull/4382)
4
+ - Tracking Issue: [apache/opendal#4383](https://github.com/apache/opendal/issues/4383)
5
+
6
+ # Summary
7
+
8
+ Convert `oio::Read` into a stateless, range-based reading pattern.
9
+
10
+ # Motivation
11
+
12
+ The current `oio::Read` API is stateful:
13
+
14
+ ```rust
15
+ pub trait Read: Unpin + Send + Sync {
16
+ fn read(&mut self, limit: usize) -> impl Future<Output = Result<Bytes>> + Send;
17
+ fn seek(&mut self, pos: io::SeekFrom) -> impl Future<Output = Result<u64>> + Send;
18
+ }
19
+ ```
20
+
21
+ Users use `read` to retrieve data from storage and can use `seek` to navigate to specific positions. OpenDAL manages the underlying state. This design is good for users from `std::io::Read`, `futures::AsyncRead` and `tokio::io::AsyncRead`.
22
+
23
+ OpenDAL also provides `range` option at the `Operator` level for users to read a specific range of data. The most common usage will be like:
24
+
25
+ ```rust
26
+ let r: Reader = op.reader_with(path).range(1024..2048).await?;
27
+ ```
28
+
29
+ However, after observing our users, we found that:
30
+
31
+ - `AsyncSeek` in `Reader` is prone to misuse.
32
+ - `Reader` does not support concurrent reading.
33
+ - `Reader` can't adopt Completion-based IO
34
+
35
+ ## Misuse of `AsyncSeek`
36
+
37
+ When designing `Reader`, I expected users to check the `read_can_seek` capability to determine if the underlying storage services natively support `seek`. However, many users are unaware of this and directly use `seek`, leading to suboptimal performance.
38
+
39
+ For example, `s3` storage does not support `seek` natively. When users call `seek`, opendal will drop current reader and sending a new request. This behavior is hidden from users and can lead to unexpected performance issues like [What's going on in my parquet stream](https://github.com/apache/opendal/issues/3725).
40
+
41
+ ## Lack of concurrent reading
42
+
43
+ `oio::Read` complicates supporting concurrent reading. Users must implement a feature similar to merge IO, as discussed in [support merge io read api by settings](https://github.com/apache/opendal/issues/3675).
44
+
45
+ There is no way for opendal to support this feature.
46
+
47
+ ## Can't adopt Completion-based IO
48
+
49
+ Completion-based IO requires take the buffer's owner ship. But API that take `&mut [u8]` can't do that.
50
+
51
+ # Guide-level explanation
52
+
53
+ So I propose to convert `Reader` into a stateless, range-based reading pattern.
54
+
55
+ We will remove the following `impl` from `Reader`:
56
+
57
+ - `futures::AsyncRead`
58
+ - `futures::AsyncSeek`
59
+ - `futures::Stream`
60
+ - `tokio::AsyncRead`
61
+ - `tokio::AsyncSeek`
62
+
63
+ We will add the following new APIs to `Reader`:
64
+
65
+ ```rust
66
+ impl Reader {
67
+ /// Read data from the storage at the specified offset.
68
+ pub async fn read(&self, buf: &mut impl BufMut, offset: u64, limit: usize) -> Result<usize>;
69
+
70
+ /// Read data from the storage at the specified range.
71
+ pub async fn read_range(
72
+ &self,
73
+ buf: &mut impl BufMut,
74
+ range: impl RangeBounds<u64>,
75
+ ) -> Result<usize>;
76
+
77
+ /// Read all data from the storage into given buf.
78
+ pub async fn read_to_end(&self, buf: &mut impl BufMut) -> Result<usize>;
79
+
80
+ /// Copy data from the storage into given writer.
81
+ pub async fn copy(&mut self, write_into: &mut impl futures::AsyncWrite) -> Result<u64>;
82
+
83
+ /// Sink date from the storage into given sink.
84
+ pub async fn sink<S, T>(&mut self, sink_from: &mut S) -> Result<u64>
85
+ where
86
+ S: futures::Sink<T, Error = Error>,
87
+ T: Into<Bytes>,
88
+ }
89
+ ```
90
+
91
+ Apart from `Reader`'s own API, we will also provide convert to existing IO APIs like:
92
+
93
+ ```rust
94
+ impl Reader {
95
+ /// Convert Reader into `futures::AsyncRead`
96
+ pub fn into_futures_io_async_read(self, range: Range<u64>) -> FuturesIoAsyncReader;
97
+
98
+ /// Convert Reader into `futures::Stream`
99
+ pub fn into_futures_bytes_stream(self, range: Range<u64>) -> FuturesBytesStream;
100
+ }
101
+ ```
102
+
103
+ After this change, users will be able to use `Reader` to read data from storage in a stateless, range-based pattern. Users can also convert `Reader` into `futures::AsyncRead`, `futures::AsyncSeek` and `futures::Stream` as needed.
104
+
105
+ # Reference-level explanation
106
+
107
+ The new raw API will be:
108
+
109
+ ```rust
110
+ pub trait Read: Unpin + Send + Sync {
111
+ fn read_at(
112
+ &self,
113
+ offset: u64,
114
+ limit: usize,
115
+ ) -> impl Future<Output = Result<oio::Buffer>> + Send;
116
+ }
117
+ ```
118
+
119
+ The API is similar to [`ReadAt`](https://doc.rust-lang.org/std/fs/struct.File.html#method.read_at), but with following changes:
120
+
121
+ ```diff
122
+ - async fn read_at(&self, buf: &mut [u8], offset: u64) -> Result<usize>
123
+ + async fn read_at(&self, offset: u64, limit: usize) -> Result<oio::Buffer>
124
+ ```
125
+
126
+ - opendal chooses to use `oio::Buffer` instead of `&mut [u8]` to avoid lifetime issues.
127
+ - opendal chooses to return `oio::Buffer` to let services itself manage the buffer.
128
+
129
+ For example, http based storage services like `s3` is a stream that generating data on the fly.
130
+
131
+ # Drawbacks
132
+
133
+ ## Breaking changes to `Reader`
134
+
135
+ This change will break the existing `Reader` API. Users will need to update their code to use the new `Reader` API.
136
+
137
+ Users wishing to migrate to the new range-based API will need to update their code. Those who simply want to use `futures::AsyncRead` can instead utilize `Reader::into_futures_read`.
138
+
139
+ # Rationale and alternatives
140
+
141
+ None.
142
+
143
+ # Prior art
144
+
145
+ ## `object_store`'s API design
146
+
147
+ Current API design inspired from `object_store`'s `ObjectStore` a lot:
148
+
149
+ ```rust
150
+ #[async_trait]
151
+ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
152
+ /// Return the bytes that are stored at the specified location.
153
+ async fn get(&self, location: &Path) -> Result<GetResult> {
154
+ self.get_opts(location, GetOptions::default()).await
155
+ }
156
+
157
+ /// Perform a get request with options
158
+ async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult>;
159
+
160
+ /// Return the bytes that are stored at the specified location
161
+ /// in the given byte range.
162
+ ///
163
+ /// See [`GetRange::Bounded`] for more details on how `range` gets interpreted
164
+ async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
165
+ let options = GetOptions {
166
+ range: Some(range.into()),
167
+ ..Default::default()
168
+ };
169
+ self.get_opts(location, options).await?.bytes().await
170
+ }
171
+
172
+ /// Return the bytes that are stored at the specified location
173
+ /// in the given byte ranges
174
+ async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
175
+ coalesce_ranges(
176
+ ranges,
177
+ |range| self.get_range(location, range),
178
+ OBJECT_STORE_COALESCE_DEFAULT,
179
+ )
180
+ .await
181
+ }
182
+ }
183
+ ```
184
+
185
+ We can add support that similar to `get_ranges` in the future.
186
+
187
+ OpenDAL opts to return a `Reader` rather than directly implementing `read` to allow for optimization with storage services like `fs` to reduce the extra `open` syscall.
188
+
189
+ # Unresolved questions
190
+
191
+ ## Buffer
192
+
193
+ After switching to range-based reading, we can no longer keep a buffer within the reader. As of writing this proposal, users should use `into_async_buf_read` instead.
194
+
195
+ # Future possibilities
196
+
197
+ ## Read Ranges
198
+
199
+ We can implement `read_ranges` support in the future. This will allow users to read multiple ranges of data in less requests.
200
+
201
+ ## Native `read_at` for fs and hdfs
202
+
203
+ We can reduce unnecessary `open` and `seek` syscalls by using the `read_at` API across different platforms.
204
+
205
+ ## Auto Range Read
206
+
207
+ We can implement [Auto ranged read support](https://github.com/apache/opendal/issues/1105) like AWS S3 Crt Client. For examples, split the range into multiple ranges and read them concurrently.
208
+
209
+ Services can define the preferred io size as default, and users can override it. For example, s3 can use `8 MiB` as preferred io size, while fs can use `4 KiB` instead.
210
+
211
+ ## Completion-based IO
212
+
213
+ `oio::Read` is designed with Completion-based IO in mind. We can add IOCP/io_uring support in the future.
@@ -0,0 +1,215 @@
1
+ - Proposal Name: `executor`
2
+ - Start Date: 2024-05-23
3
+ - RFC PR: [apache/opendal#4638](https://github.com/apache/opendal/pull/4638)
4
+ - Tracking Issue: [apache/opendal#4639](https://github.com/apache/opendal/issues/4639)
5
+
6
+ # Summary
7
+
8
+ Add executor in opendal to allow running tasks concurrently in background.
9
+
10
+ # Motivation
11
+
12
+ OpenDAL offers top-tier support for concurrent execution, allowing tasks to run simultaneously in the background. Users can easily enable concurrent file read/write operations with just one line of code:
13
+
14
+ ```diff
15
+ let mut w = op
16
+ .writer_with(path)
17
+ .chunk(8 * 1024 * 1024) // 8 MiB per chunk
18
+ + .concurrent(16) // 16 concurrent tasks
19
+ .await?;
20
+
21
+ w.write(bs).await?;
22
+ w.write(bs).await?; // The submitted tasks only be executed while user calling `write`.
23
+ ...
24
+ sleep(Duration::from_secs(10)).await; // The submitted tasks make no progress during `sleep`.
25
+ ...
26
+ w.close().await?;
27
+ ```
28
+
29
+ However, the execution of those tasks relies on users continuously calling `write`. They cannot run tasks concurrently in the background. (I explained the technical details in the `Rationale and alternatives` section.)
30
+
31
+ This can result in the following issues:
32
+
33
+ - Task latency may increase as tasks are not executed until the task queue is full.
34
+ - Memory usage may be high because all chunks must be held in memory until the task is completed.
35
+
36
+ I propose introducing an executor abstraction in OpenDAL to enable concurrent background task execution. The executor will automatically manage the tasks in the background without requiring users to drive the progress manually.
37
+
38
+ # Guide-level explanation
39
+
40
+ OpenDAL will add a new `Executor` struct to manage concurrent tasks.
41
+
42
+ ```rust
43
+ pub struct Executor {
44
+ ...
45
+ }
46
+
47
+ pub struct Task {
48
+ ...
49
+ }
50
+
51
+ impl Executor {
52
+ /// Create a new tokio based executor.
53
+ pub fn new() -> Self { ... }
54
+
55
+ /// Create a new executor with given execute impl.
56
+ pub fn with(exec: Arc<dyn Execute>) -> Self { ... }
57
+
58
+ /// Run given future in background immediately.
59
+ pub fn execute<F>(&self, f: F) -> Task<F::Output>
60
+ where
61
+ F: Future + Send + 'static,
62
+ {
63
+ ...
64
+ }
65
+ }
66
+ ```
67
+
68
+ The `Executor` uses the `tokio` runtime by default but users can also provide their own runtime by:
69
+
70
+ ```rust
71
+ pub trait Execute {
72
+ fn execute(&self, f: BoxedFuture<()>) -> Result<()>;
73
+ }
74
+ ```
75
+
76
+ Users can set executor in `OpWrite` / `OpRead` to enable concurrent background task execution:
77
+
78
+ ```rust
79
+ + let exec = Executor::new();
80
+ let w = op
81
+ .writer_with(path)
82
+ .chunk(8 * 1024 * 1024) // 8 MiB per chunk
83
+ .concurrent(16) // 16 concurrent tasks
84
+ + .executor(exec) // Use specified executor
85
+ .await?;
86
+ ```
87
+
88
+ Specifying an executor every time is cumbersome. Users can also set a global executor for given operator:
89
+
90
+ ```rust
91
+ + let exec = Executor::new();
92
+ + let op = op.with_default_executor(exec);
93
+
94
+ let w = op
95
+ .writer_with(path)
96
+ .chunk(8 * 1024 * 1024) // 8 MiB per chunk
97
+ .concurrent(16) // 16 concurrent tasks
98
+ .await?;
99
+ ```
100
+
101
+ # Reference-level explanation
102
+
103
+ As mentioned in the `Guide-level explanation`, the `Executor` struct will manage concurrent tasks in the background. `Executor` will be powered by trait `Execute` to support different underlying runtimes. To make trait `Execute` object safe, we only accept `BoxedFuture<()>` as input. `Executor` will handle the future output and return the result to the caller.
104
+
105
+ Operations that supporting concurrent execution will add a new field:
106
+
107
+ ```rust
108
+ pub struct OpXxx {
109
+ ...
110
+ executor: Option<Executor>,
111
+ }
112
+ ```
113
+
114
+ Operator will add a new field to store the default executor:
115
+
116
+ ```rust
117
+ pub struct Operator {
118
+ ...
119
+ default_executor: Option<Executor>,
120
+ }
121
+ ```
122
+
123
+ The `Task` spawned by `Executor` will be a future that can be awaited to fetch the result:
124
+
125
+ ```rust
126
+ let res = task.await;
127
+ ```
128
+
129
+ The task will be executed immediately after calling `execute`. Users can also cancel the task by dropping the `Task` object. Users don't need to poll those `Task` object to make progress.
130
+
131
+ # Drawbacks
132
+
133
+ ## Complexity
134
+
135
+ To support concurrent execution, we need to introduce:
136
+
137
+ - a new `Executor` struct
138
+ - a new `Task` struct
139
+ - a new `Execute` trait
140
+
141
+ This may increase the complexity of the codebase.
142
+
143
+ # Rationale and alternatives
144
+
145
+ ## Why introducing so many new abstractions?
146
+
147
+ We need to introduce new abstractions to support concurrent execution across different runtimes. Unfortunately, this is the current reality of async rust.
148
+
149
+ Supporting just one or two runtimes by adding features is much easier. Supporting only Tokio is extremely simple, requiring about 10 lines of changes. However, this violates our vision of free data access.
150
+
151
+ Firstly, we don't want to force our users to use Tokio. We aim to support all runtimes, including async-std, smol, and others.
152
+
153
+ Secondly, OpenDAL should be capable of running in any environment, including embedded systems. We don’t want to restrict our users to a specific runtime.
154
+
155
+ Finally, users may have their own preferences for observability and performance in their runtime. We intend to accommodate these needs effortlessly.
156
+
157
+ ## Why `ConcurrentFutures` doesn't work?
158
+
159
+ `ConcurrentFutures` is a `Vec<impl Future>`, users need to keep calling `poll_next` to make progress. This is not suitable for our use case. We need a way to run tasks in the background without user intervention.
160
+
161
+ > I've heard that futures will wake up when they're ready, and it's the runtime's job to poll them, right?
162
+
163
+ No, it's partially correct. The runtime will wake up the future when it's ready, but it's the user's job to poll the future. The runtime will not poll the future automatically unless it's managed by the runtime.
164
+
165
+ For tokio, that means all futures provided by tokio, like `tokio::time::Sleep`, will be polled by tokio runtime. However, if you create a future by yourself, you need to poll it manually.
166
+
167
+ I have an example to explain this:
168
+
169
+ *Try it at [playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=628e67adef90128151e175d22c87808e)*
170
+
171
+ ```rust
172
+ use futures::stream::FuturesUnordered;
173
+ use futures::StreamExt;
174
+ use std::time::Duration;
175
+ use tokio::time::{sleep, Instant};
176
+
177
+ #[tokio::main]
178
+ async fn main() {
179
+ let now = Instant::now();
180
+ let mut cf = FuturesUnordered::new();
181
+
182
+ // cf.push(Box::pin(sleep(Duration::from_secs(3))));
183
+ cf.push(Box::pin(async move {
184
+ sleep(Duration::from_secs(3)).await;
185
+ println!("async task finished at {}s", now.elapsed().as_secs_f64());
186
+ }));
187
+ sleep(Duration::from_secs(4)).await;
188
+ println!("outer sleep finished at {}s", now.elapsed().as_secs_f64());
189
+
190
+ let _: Vec<()> = cf.collect().await;
191
+ println!("consumed: {}s", now.elapsed().as_secs_f64())
192
+ }
193
+ ```
194
+
195
+ # Prior art
196
+
197
+ None.
198
+
199
+ # Unresolved questions
200
+
201
+ None.
202
+
203
+ # Future possibilities
204
+
205
+ ## Blocking Executor
206
+
207
+ This proposal mainly focuses on async tasks. However, we can also consider adding blocking support to `Executor`. Users can use concurrent tasks in blocking context too:
208
+
209
+ ```rust
210
+ let w = op
211
+ .writer_with(path)
212
+ .chunk(8 * 1024 * 1024) // 8 MiB per chunk
213
+ + .concurrent(16) // 16 concurrent tasks
214
+ .do()?;
215
+ ```
@@ -0,0 +1,120 @@
1
+ - Proposal Name: `remove_metakey`
2
+ - Start Date: 2024-11-12
3
+ - RFC PR: [apache/opendal#5313](https://github.com/apache/opendal/pull/5313)
4
+ - Tracking Issue: [apache/opendal#5314](https://github.com/apache/opendal/issues/5314)
5
+
6
+ # Summary
7
+
8
+ Remove the `Metakey` concept from OpenDAL and replace it with a simpler and more predictable metadata handling mechanism.
9
+
10
+ # Motivation
11
+
12
+ The current `Metakey` design has several issues:
13
+
14
+ 1. Performance Impact: Users often initiate costly operations unintentionally, such as using `Metakey::Full`, which results in extra stat calls
15
+ 2. Usability Issues: Users often try to access metadata that hasn't been explicitly requested
16
+ 3. API Confusion: There's a conflict between `Metakey::Version` and the new `version(bool)` parameter
17
+ 4. Implementation Complexity: Service developers struggle to implement `Metakey` correctly
18
+
19
+ The goal is a simpler, more intuitive API that prevents common mistakes and improves performance as standard.
20
+
21
+ # Guide-level explanation
22
+
23
+ Instead of using `Metakey` to specify which metadata fields to fetch, services will now declare their metadata capabilities upfront through a new `MetadataCapability` struct:
24
+
25
+ ```rust
26
+ let entries = op.list("path").await?;
27
+ for entry in entries {
28
+ if op.metadata_capability().content_type {
29
+ println!("Content-Type: {}", entry.metadata().content_type());
30
+ }
31
+ }
32
+ ```
33
+
34
+ If users need additional metadata not provided by `list`:
35
+
36
+ ```rust
37
+ let entries = op.list("path").await?;
38
+ for entry in entries {
39
+ let mut meta = entry.metadata();
40
+ if !op.metadata_capability().etag {
41
+ meta = op.stat(&entry.path()).await?;
42
+ }
43
+ println!("Content-Type: {}", meta.etag());
44
+ }
45
+ ```
46
+
47
+ For existing OpenDAL users, the main changes are:
48
+
49
+ - Remove all `metakey()` calls from their code
50
+ - Use `metadata_capability()` to check available metadata
51
+ - Explicitly call `stat()` when needed
52
+
53
+ # Reference-level explanation
54
+
55
+ The implementation involves:
56
+
57
+ 1. Remove the `Metakey` enum
58
+ 2. Add new `MetadataCapability` struct:
59
+ ```rust
60
+ pub struct MetadataCapability {
61
+ pub content_length: bool,
62
+ pub content_type: bool,
63
+ pub last_modified: bool,
64
+ pub etag: bool,
65
+ pub mode: bool,
66
+ pub version: bool,
67
+ ...
68
+ }
69
+ ```
70
+
71
+ 3. Add method to Operator to query capabilities:
72
+ ```rust
73
+ impl Operator {
74
+ pub fn metadata_capability(&self) -> MetadataCapability;
75
+ }
76
+ ```
77
+
78
+ 4. Modify list operation to avoid implicit stat calls
79
+ 5. Update all service implementations to declare their metadata capabilities
80
+
81
+ Each service implementation will need to:
82
+ - Remove `Metakey` handling logic
83
+ - Implement `metadata_capability()` to accurately indicate the metadata provided by default
84
+ - Ensure list operations return metadata that's always available without extra API calls
85
+
86
+ # Drawbacks
87
+
88
+ - Breaking change for existing users
89
+ - Loss of fine-grained control over metadata fetching
90
+ - Potential increased API calls if users need multiple metadata fields
91
+
92
+ # Rationale and alternatives
93
+
94
+ This design is superior because:
95
+ - Prevents performance pitfalls by default
96
+ - Makes metadata availability explicitly
97
+ - Simplifies service implementation
98
+ - Provides clearer mental model
99
+
100
+ Alternatives considered:
101
+ 1. Keep `Metakey` but make it more restrictive
102
+ 2. Add warnings for potentially costly operations
103
+ 3. Make stat calls async/lazy
104
+
105
+ Not making this change would continue the current issues of performance problems and API misuse.
106
+
107
+ # Prior art
108
+
109
+ None
110
+
111
+ # Unresolved questions
112
+
113
+ None
114
+
115
+ # Future possibilities
116
+
117
+ - Add metadata prefetching optimization
118
+ - Add metadata caching layer
119
+ - Support for custom metadata fields
120
+ - Automated capability detection