RubyGems - opendal - Versions diffs - 0.1.6.pre.rc.1-aarch64-linux - Mend

opendal 0.1.6.pre.rc.1-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (191) hide show

checksums.yaml +7 -0
data/.standard.yml +20 -0
data/.tool-versions +1 -0
data/.yardopts +1 -0
data/DEPENDENCIES.md +9 -0
data/DEPENDENCIES.rust.tsv +277 -0
data/Gemfile +35 -0
data/README.md +159 -0
data/Rakefile +149 -0
data/core/CHANGELOG.md +4929 -0
data/core/CONTRIBUTING.md +61 -0
data/core/DEPENDENCIES.md +3 -0
data/core/DEPENDENCIES.rust.tsv +185 -0
data/core/LICENSE +201 -0
data/core/README.md +228 -0
data/core/benches/README.md +18 -0
data/core/benches/ops/README.md +26 -0
data/core/benches/types/README.md +9 -0
data/core/benches/vs_fs/README.md +35 -0
data/core/benches/vs_s3/README.md +55 -0
data/core/edge/README.md +3 -0
data/core/edge/file_write_on_full_disk/README.md +14 -0
data/core/edge/s3_aws_assume_role_with_web_identity/README.md +18 -0
data/core/edge/s3_read_on_wasm/.gitignore +3 -0
data/core/edge/s3_read_on_wasm/README.md +42 -0
data/core/edge/s3_read_on_wasm/webdriver.json +15 -0
data/core/examples/README.md +23 -0
data/core/examples/basic/README.md +15 -0
data/core/examples/concurrent-upload/README.md +15 -0
data/core/examples/multipart-upload/README.md +15 -0
data/core/fuzz/.gitignore +5 -0
data/core/fuzz/README.md +68 -0
data/core/src/docs/comparisons/vs_object_store.md +183 -0
data/core/src/docs/performance/concurrent_write.md +101 -0
data/core/src/docs/performance/http_optimization.md +124 -0
data/core/src/docs/rfcs/0000_example.md +74 -0
data/core/src/docs/rfcs/0000_foyer_integration.md +111 -0
data/core/src/docs/rfcs/0041_object_native_api.md +185 -0
data/core/src/docs/rfcs/0044_error_handle.md +198 -0
data/core/src/docs/rfcs/0057_auto_region.md +160 -0
data/core/src/docs/rfcs/0069_object_stream.md +145 -0
data/core/src/docs/rfcs/0090_limited_reader.md +155 -0
data/core/src/docs/rfcs/0112_path_normalization.md +79 -0
data/core/src/docs/rfcs/0191_async_streaming_io.md +328 -0
data/core/src/docs/rfcs/0203_remove_credential.md +96 -0
data/core/src/docs/rfcs/0221_create_dir.md +89 -0
data/core/src/docs/rfcs/0247_retryable_error.md +87 -0
data/core/src/docs/rfcs/0293_object_id.md +67 -0
data/core/src/docs/rfcs/0337_dir_entry.md +191 -0
data/core/src/docs/rfcs/0409_accessor_capabilities.md +67 -0
data/core/src/docs/rfcs/0413_presign.md +154 -0
data/core/src/docs/rfcs/0423_command_line_interface.md +268 -0
data/core/src/docs/rfcs/0429_init_from_iter.md +107 -0
data/core/src/docs/rfcs/0438_multipart.md +163 -0
data/core/src/docs/rfcs/0443_gateway.md +73 -0
data/core/src/docs/rfcs/0501_new_builder.md +111 -0
data/core/src/docs/rfcs/0554_write_refactor.md +96 -0
data/core/src/docs/rfcs/0561_list_metadata_reuse.md +210 -0
data/core/src/docs/rfcs/0599_blocking_api.md +157 -0
data/core/src/docs/rfcs/0623_redis_service.md +300 -0
data/core/src/docs/rfcs/0627_split_capabilities.md +89 -0
data/core/src/docs/rfcs/0661_path_in_accessor.md +126 -0
data/core/src/docs/rfcs/0793_generic_kv_services.md +209 -0
data/core/src/docs/rfcs/0926_object_reader.md +93 -0
data/core/src/docs/rfcs/0977_refactor_error.md +151 -0
data/core/src/docs/rfcs/1085_object_handler.md +73 -0
data/core/src/docs/rfcs/1391_object_metadataer.md +110 -0
data/core/src/docs/rfcs/1398_query_based_metadata.md +125 -0
data/core/src/docs/rfcs/1420_object_writer.md +147 -0
data/core/src/docs/rfcs/1477_remove_object_concept.md +159 -0
data/core/src/docs/rfcs/1735_operation_extension.md +117 -0
data/core/src/docs/rfcs/2083_writer_sink_api.md +106 -0
data/core/src/docs/rfcs/2133_append_api.md +88 -0
data/core/src/docs/rfcs/2299_chain_based_operator_api.md +99 -0
data/core/src/docs/rfcs/2602_object_versioning.md +138 -0
data/core/src/docs/rfcs/2758_merge_append_into_write.md +79 -0
data/core/src/docs/rfcs/2774_lister_api.md +66 -0
data/core/src/docs/rfcs/2779_list_with_metakey.md +143 -0
data/core/src/docs/rfcs/2852_native_capability.md +58 -0
data/core/src/docs/rfcs/2884_merge_range_read_into_read.md +80 -0
data/core/src/docs/rfcs/3017_remove_write_copy_from.md +94 -0
data/core/src/docs/rfcs/3197_config.md +237 -0
data/core/src/docs/rfcs/3232_align_list_api.md +69 -0
data/core/src/docs/rfcs/3243_list_prefix.md +128 -0
data/core/src/docs/rfcs/3356_lazy_reader.md +111 -0
data/core/src/docs/rfcs/3526_list_recursive.md +59 -0
data/core/src/docs/rfcs/3574_concurrent_stat_in_list.md +80 -0
data/core/src/docs/rfcs/3734_buffered_reader.md +64 -0
data/core/src/docs/rfcs/3898_concurrent_writer.md +66 -0
data/core/src/docs/rfcs/3911_deleter_api.md +165 -0
data/core/src/docs/rfcs/4382_range_based_read.md +213 -0
data/core/src/docs/rfcs/4638_executor.md +215 -0
data/core/src/docs/rfcs/5314_remove_metakey.md +120 -0
data/core/src/docs/rfcs/5444_operator_from_uri.md +162 -0
data/core/src/docs/rfcs/5479_context.md +140 -0
data/core/src/docs/rfcs/5485_conditional_reader.md +112 -0
data/core/src/docs/rfcs/5495_list_with_deleted.md +81 -0
data/core/src/docs/rfcs/5556_write_returns_metadata.md +121 -0
data/core/src/docs/rfcs/5871_read_returns_metadata.md +112 -0
data/core/src/docs/rfcs/6189_remove_native_blocking.md +106 -0
data/core/src/docs/rfcs/6209_glob_support.md +132 -0
data/core/src/docs/rfcs/6213_options_api.md +142 -0
data/core/src/docs/rfcs/README.md +62 -0
data/core/src/docs/upgrade.md +1556 -0
data/core/src/services/aliyun_drive/docs.md +61 -0
data/core/src/services/alluxio/docs.md +45 -0
data/core/src/services/azblob/docs.md +77 -0
data/core/src/services/azdls/docs.md +73 -0
data/core/src/services/azfile/docs.md +65 -0
data/core/src/services/b2/docs.md +54 -0
data/core/src/services/cacache/docs.md +38 -0
data/core/src/services/cloudflare_kv/docs.md +21 -0
data/core/src/services/cos/docs.md +55 -0
data/core/src/services/d1/docs.md +48 -0
data/core/src/services/dashmap/docs.md +38 -0
data/core/src/services/dbfs/docs.md +57 -0
data/core/src/services/dropbox/docs.md +64 -0
data/core/src/services/etcd/docs.md +45 -0
data/core/src/services/foundationdb/docs.md +42 -0
data/core/src/services/fs/docs.md +49 -0
data/core/src/services/ftp/docs.md +42 -0
data/core/src/services/gcs/docs.md +76 -0
data/core/src/services/gdrive/docs.md +65 -0
data/core/src/services/ghac/docs.md +84 -0
data/core/src/services/github/docs.md +52 -0
data/core/src/services/gridfs/docs.md +46 -0
data/core/src/services/hdfs/docs.md +140 -0
data/core/src/services/hdfs_native/docs.md +35 -0
data/core/src/services/http/docs.md +45 -0
data/core/src/services/huggingface/docs.md +61 -0
data/core/src/services/ipfs/docs.md +45 -0
data/core/src/services/ipmfs/docs.md +14 -0
data/core/src/services/koofr/docs.md +51 -0
data/core/src/services/lakefs/docs.md +62 -0
data/core/src/services/memcached/docs.md +47 -0
data/core/src/services/memory/docs.md +36 -0
data/core/src/services/mini_moka/docs.md +19 -0
data/core/src/services/moka/docs.md +42 -0
data/core/src/services/mongodb/docs.md +49 -0
data/core/src/services/monoiofs/docs.md +46 -0
data/core/src/services/mysql/docs.md +47 -0
data/core/src/services/obs/docs.md +54 -0
data/core/src/services/onedrive/docs.md +115 -0
data/core/src/services/opfs/docs.md +18 -0
data/core/src/services/oss/docs.md +74 -0
data/core/src/services/pcloud/docs.md +51 -0
data/core/src/services/persy/docs.md +43 -0
data/core/src/services/postgresql/docs.md +47 -0
data/core/src/services/redb/docs.md +41 -0
data/core/src/services/redis/docs.md +43 -0
data/core/src/services/rocksdb/docs.md +54 -0
data/core/src/services/s3/compatible_services.md +126 -0
data/core/src/services/s3/docs.md +244 -0
data/core/src/services/seafile/docs.md +54 -0
data/core/src/services/sftp/docs.md +49 -0
data/core/src/services/sled/docs.md +39 -0
data/core/src/services/sqlite/docs.md +46 -0
data/core/src/services/surrealdb/docs.md +54 -0
data/core/src/services/swift/compatible_services.md +53 -0
data/core/src/services/swift/docs.md +52 -0
data/core/src/services/tikv/docs.md +43 -0
data/core/src/services/upyun/docs.md +51 -0
data/core/src/services/vercel_artifacts/docs.md +40 -0
data/core/src/services/vercel_blob/docs.md +45 -0
data/core/src/services/webdav/docs.md +49 -0
data/core/src/services/webhdfs/docs.md +90 -0
data/core/src/services/yandex_disk/docs.md +45 -0
data/core/tests/behavior/README.md +77 -0
data/core/tests/data/normal_dir/.gitkeep +0 -0
data/core/tests/data/normal_file.txt +1041 -0
data/core/tests/data/special_dir !@#$%^&()_+-=;',/.gitkeep +0 -0
data/core/tests/data/special_file !@#$%^&()_+-=;',.txt +1041 -0
data/core/users.md +13 -0
data/extconf.rb +24 -0
data/lib/opendal.rb +25 -0
data/lib/opendal_ruby/entry.rb +35 -0
data/lib/opendal_ruby/io.rb +70 -0
data/lib/opendal_ruby/metadata.rb +44 -0
data/lib/opendal_ruby/opendal_ruby.so +0 -0
data/lib/opendal_ruby/operator.rb +29 -0
data/lib/opendal_ruby/operator_info.rb +26 -0
data/opendal.gemspec +91 -0
data/test/blocking_op_test.rb +112 -0
data/test/capability_test.rb +42 -0
data/test/io_test.rb +172 -0
data/test/lister_test.rb +77 -0
data/test/metadata_test.rb +78 -0
data/test/middlewares_test.rb +46 -0
data/test/operator_info_test.rb +35 -0
data/test/test_helper.rb +36 -0
metadata +240 -0

data/core/src/docs/rfcs/3356_lazy_reader.md ADDED Viewed

@@ -0,0 +1,111 @@
+- Proposal Name: `lazy_reader`
+- Start Date: 2023-10-22
+- RFC PR: [apache/opendal#3356](https://github.com/apache/opendal/pull/3356)
+- Tracking Issue: [apache/opendal#3359](https://github.com/apache/opendal/issues/3359)
+# Summary
+Doing read IO in a lazy way.
+# Motivation
+The aim is to minimize IO cost. OpenDAL sends an actual IO request to the storage when `Accessor::read()` is invoked. For storage services such as S3, this equates to an IO request. However, in practical scenarios, users typically create a reader and use `seek` to navigate to the correct position.
+Take [parquet2 read_metadata](https://docs.rs/parquet2/latest/src/parquet2/read/metadata.rs.html) as an example:
+```rust
+/// Reads a [`FileMetaData`] from the reader, located at the end of the file.
+pub fn read_metadata<R: Read + Seek>(reader: &mut R) -> Result<FileMetaData> {
+    // check file is large enough to hold footer
+    let file_size = stream_len(reader)?;
+    if file_size < HEADER_SIZE + FOOTER_SIZE {
+        return Err(Error::oos(
+            "A parquet file must contain a header and footer with at least 12 bytes",
+        ));
+    }
+    // read and cache up to DEFAULT_FOOTER_READ_SIZE bytes from the end and process the footer
+    let default_end_len = min(DEFAULT_FOOTER_READ_SIZE, file_size) as usize;
+    reader.seek(SeekFrom::End(-(default_end_len as i64)))?;
+    ...
+    deserialize_metadata(reader, max_size)
+}
+```
+In `read_metadata`, we initiate a seek as soon as the reader is invoked. This action, when performed on non-seekable storage services such as s3, results in an immediate HTTP request and cancellation. By postponing the IO request until the first `read` call, we can significantly reduce the number of IO requests.
+The expense of initiating and immediately aborting an HTTP request is significant. Here are the benchmark results, using a stat call as our baseline:
+On minio server that setup locally:
+```rust
+service_s3_read_stat/4.00 MiB
+                        time:   [315.23 µs 328.23 µs 341.42 µs]
+service_s3_read_abort/4.00 MiB
+                        time:   [961.69 µs 980.68 µs 999.50 µs]
+```
+On remote storage services with high latency:
+```rust
+service_s3_read_stat/4.00 MiB
+                        time:   [407.85 ms 409.61 ms 411.39 ms]
+service_s3_read_abort/4.00 MiB
+                        time:   [1.5282 s 1.5554 s 1.5828 s]
+```
+# Guide-level explanation
+There have been no changes to the API. The only modification is that the IO request has been deferred until the first `read` call, meaning no errors will be returned when calling `op.reader()`. For instance, users won't encounter a `file not found` error when invoking `op.reader()`.
+# Reference-level explanation
+Most changes will happen inside `CompleteLayer`. In the past, we will call `Accessor::read()` directly in `complete_reader`:
+```rust
+async fn complete_reader(
+    &self,
+    path: &str,
+    args: OpRead,
+) -> Result<(RpRead, CompleteReader<A, A::Reader>)> {
+    ..
+    let seekable = capability.read_can_seek;
+    let streamable = capability.read_can_next;
+    let range = args.range();
+    let (rp, r) = self.inner.read(path, args).await?;
+    let content_length = rp.metadata().content_length();
+    ...
+}
+```
+In the future, we will postpone the `Accessor::read()` request until the first `read` call.
+# Drawbacks
+None
+# Rationale and alternatives
+None
+# Prior art
+None
+# Unresolved questions
+None
+# Future possibilities
+## Add `read_at` for `oio::Reader`
+After `oio::Reader` becomes zero cost, we can add `read_at` to `oio::Reader` to support read data by range.

data/core/src/docs/rfcs/3526_list_recursive.md ADDED Viewed

@@ -0,0 +1,59 @@
+- Proposal Name: `list_recursive`
+- Start Date: 2023-11-08
+- RFC PR: [apache/opendal#3526](https://github.com/apache/opendal/pull/3526)
+- Tracking Issue: [apache/opendal#0000](https://github.com/apache/opendal/issues/0000)
+# Summary
+Use `recursive` to replace `delimiter`.
+# Motivation
+OpenDAL add `delimiter` in `list` to allow users to control the list behavior:
+- `delimiter == "/"` means use `/` as delimiter of path, it behaves like list current dir.
+- `delimiter == ""` means don't set delimiter of path, it behaves like list current dir and all it's children.
+Ideally, we should allow users to input any delimiter such as `|`, `-`, and `+`.
+The `delimiter` concept can be challenging for users unfamiliar with object storage services. Currently, only `/` and empty spaces are accepted as delimiters, despite not being fully implemented across all services. We need to inform users that `delimiter == "/"` is used to list the current directory, while `delimiter == ""` is used for recursive listing. This may not be immediately clear.
+So, why not use `recursive` directly for more clear API behavior?
+# Guide-level explanation
+OpenDAL will use `recursive` to replace `delimiter`. Default behavior is not changed, so users that using `op.list()` is not affected.
+For users who is using `op.list_with(path).delimiter(delimiter)`:
+- `op.list_with(path).delimiter("")` -> `op.list_with(path).recursive(true)`
+- `op.list_with(path).delimiter("/")` -> `op.list_with(path).recursive(false)`
+- `op.list_with(path).delimiter(other_value)`: not supported anymore.
+# Reference-level explanation
+We will add `recursive` as a new arg in `OpList` and remove all fields related to `delimiter`.
+# Drawbacks
+## Can't support to use `|`, `-`, and `+` as delimiter
+We never support this feature before.
+# Rationale and alternatives
+None
+# Prior art
+None
+# Unresolved questions
+None
+# Future possibilities
+## Add delete with recursive support
+Some services have native support for delete with recursive, such as azfile. We can add this feature in the future if needed.

data/core/src/docs/rfcs/3574_concurrent_stat_in_list.md ADDED Viewed

@@ -0,0 +1,80 @@
+- Proposal Name: `concurrent_stat_in_list`
+- Start Date: 2023-11-13
+- RFC PR: [apache/opendal#3574](https://github.com/apache/opendal/pull/3574)
+- Tracking Issue: [apache/opendal#3575](https://github.com/apache/opendal/issues/3575)
+# Summary
+Add concurrent stat in list operation.
+# Motivation
+[RFC-2779](https://github.com/apache/opendal/pull/2779) allows user to list with metakey.
+However, the stat inside list could make the list process much slower. We should allow concurrent stat during list so that stat could be sent concurrently.
+# Guide-level explanation
+For users who want to concurrently run statistics in list operations, they will call the new API `concurrent`. The `concurrent` function will take a number as input, and this number will represent the maximum concurrent stat handlers.
+The default behavior remains unchanged, so users using `op.list_with()` are not affected. And this implementation should be zero cost to users who don't want to do concurrent stat.
+```rust
+op.lister_with(path).metakey(Metakey::ContentLength).concurrent(10).await?
+```
+# Reference-level explanation
+When `concurrent` is set and `list_with` is called, the list operation will be split into two parts:  list and stat.
+The list part will iterate through the entries inside the buffer, and if its `metakey` is unknown, it will send a stat request to the storage service.
+We will add a new field `concurrent` to `OpList`. The type of `concurrent` is `Option<u32>`. If `concurrent` is `None`, it means the default behavior. If `concurrent` is `Some(n)`, it means the maximum concurrent stat handlers are `n`.
+Then we could use a sized `VecDeque` to limit the maximum concurrent stat handlers. Additionally, we could use handlers `JoinHandle<T>` inside `VecDeque` to spawn and queue the stat tasks.
+While iterating through the entries, we should check if the `metakey` is unknown and if the `VecDeque` is full. If the `metakey` is unknown and the `VecDeque` is full, we should wait and join the handle once it’s finished, since we need to keep the entry order.
+If the metakey is unknown and the handlers are full, we should break the loop and wait for the spawned tasks inside handlers to finish. After the spawned tasks finish, we should iterate through the handlers and return the result.
+If the metakey is known, we should check if the handlers are empty. If true, return the result immediately; otherwise, we should wait for the spawned tasks to finish.
+# Drawbacks
+1. More memory usage
+2. More complex code
+3. More complex testing
+# Rationale and alternatives
+## Why not `VecDeque<BoxFuture<'static, X>>`?
+To maintain the order of returned entries, we need to pre-run future entries before returning the current one to address the slowness issue.
+Although we could use `VecDeque<BoxFuture<'static, X>>` to store the spawned tasks,
+using it here would prevent us from executing the async block concurrently when we only have one `cx: &mut Context<'_>`.
+## Do we need `Semaphore`?
+No, we can control the concurrent number by limiting the length of the `VecDeque`.
+Using a `semaphore` will introduce more cost and memory.
+## Why not using `JoinSet`?
+The main reason is that `JoinSet` can't maintain the order of entries.
+The other reason is that `JoinSet` requires mutability to spawn or join the next task, and `tokio::spawn()` requires the async block to be `'static`.
+This implies that we need to use `Arc<T>` to wrap our `JoinSet`. However, to change the value inside `Arc`, we need to introduce a `Mutex`. Since it's inside an async block, we need to use Tokio's `Mutex` to satisfy the `Sync` bound.
+Therefore, for every operation on the `JoinSet`, there will be an `.await` on the lock outside the async block, making concurrency impossible inside `poll_next()`.
+# Prior art
+None
+# Unresolved questions
+- How to implement a similar logic to `blocking` API?
+   - Quoted from [oowl](https://github.com/oowl): It seems these features can be implemented in blocking mode, but it may require breaking something in OpenDAL, such as using some pthread API in blocking mode.
+# Future possibilities
+None

data/core/src/docs/rfcs/3734_buffered_reader.md ADDED Viewed

@@ -0,0 +1,64 @@
+- Proposal Name: `buffered_reader`
+- Start Date: 2023-12-10
+- RFC PR: [apache/opendal#3574](https://github.com/apache/opendal/pull/3734)
+- Tracking Issue: [apache/opendal#3575](https://github.com/apache/opendal/issues/3735)
+# Summary
+Allowing the underlying reader to fetch data at the buffer's size to amortize the IO's overhead.
+# Motivation
+The objective is to mitigate the IO overhead. In certain scenarios, the reader processes the data incrementally, meaning that it utilizes the `seek()` function to navigate to a specific position within the file. Subsequently, it invokes the `read()` to reads `limit` bytes into memory and performs the decoding process.
+OpenDAL triggers an IO request upon invoking `read()` if the `seek()` has reset the inner state. For storage services like S3, [research](https://www.vldb.org/pvldb/vol16/p2769-durner.pdf) suggests that an optimal IO size falls within the range of 8MiB to 16MiB. If the IO size is too small, the Time To First Byte (TTFB) dominates the overall runtime, resulting in inefficiency.
+Therefore, this RFC proposes the implementation of a buffered reader to amortize the overhead of IO.
+# Guide-level explanation
+For users who want to buffered reader, they will call the new API `buffer`. And the default behavior remains unchanged, so users using `op.reader_with()` are not affected. The `buffer` function will take a number as input, and this number will represent the maximum buffer capability the reader is able to use.
+```rust
+op.reader_with(path).buffer(32 * 1024 * 1024).await
+```
+# Reference-level explanation
+This feature will be implemented in the `CompleteLayer`, with the addition of a `BufferReader` struct in `raw/oio/reader/buffer_reader.rs`.
+The `BufferReader` employs a `tokio::io::ReadBuf` as the inner buffer and uses `offset: Option<u64>` to track the buffered range start of the file, the buffered data should always be `file[offset..offset + buf.len()]`.
+```rust
+     ...
+     async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> {
+          BufferReader::new(self.complete_read(path, args).await)
+     }
+     ...
+    fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> {
+          BufferReader::new(self.complete_blocking_read(path, args))
+    }
+     ...
+```
+A `buffer` field of type `Option<usize>` will be introduced to `OpRead`. If `buffer` is set to `None`, it functions with default behavior. However, if buffer is set to `Some(n)`, it denotes the maximum buffer capability that the `BufferReader` can utilize. The behavior is similar to [std::io::BufReader](https://doc.rust-lang.org/std/io/struct.BufReader.html), with the difference being that our implementation always provides the `seek_relative` (without discarding the inner buffer) if it's available; And it doesn't buffer trailing reads when the read range is smaller than the buffer capability.
+# Drawbacks
+None
+# Rationale and alternatives
+None
+# Prior art
+None
+# Unresolved questions
+None
+# Future possibilities
+- Concurrent fetching.
+- Tailing buffering.

data/core/src/docs/rfcs/3898_concurrent_writer.md ADDED Viewed

@@ -0,0 +1,66 @@
+- Proposal Name: `concurrent_writer`
+- Start Date: 2024-01-02
+- RFC PR: [apache/opendal#3898](https://github.com/apache/opendal/pull/3898)
+- Tracking Issue: [apache/opendal#3899](https://github.com/apache/opendal/issues/3899)
+# Summary
+Enhance the `Writer` by adding concurrent write capabilities.
+# Motivation
+Certain services, such as S3, GCS, and AzBlob, offer the `multi_write` functionality, allowing users to perform multiple write operations for uploading of large files. If a service support `multi_write`, the [Capability::write_can_multi](https://opendal.apache.org/docs/rust/opendal/struct.Capability.html#structfield.write_can_multi) metadata should be set to `true`.
+```rust
+    let mut writer = op.writer("path/to").await?; // a writers supports the `multi_write`.
+    writer.write(part0).await?;
+    writer.write(part1).await?; // It starts to upload after the `part0` is finished.
+    writer.close().await?;
+```
+Currently, when invoking a `Writer` that supports the `multi_write` functionality, multiple writes are proceed serially, without fully leveraging the potential for improved throughput through concurrent uploads. We should enhance support to allow concurrent processing of multiple write operations.
+# Guide-level explanation
+For users who want to concurrent writer, they will call the new API `concurrent`. And the default behavior remains unchanged, so users using `op.writer_with()` are not affected. The `concurrent` function will take a number as input, and this number will represent the maximum concurrent write task amount the writer can perform.
+- If `concurrent` is set to 0 or 1, it functions with default behavior(writes serially).
+- However, if `concurrent` is set to number larger than 1. It enables concurrent uploading of up to `concurrent` write tasks and allows users to initiate additional write tasks without waiting to complete the previous write operation, as long as the inner task queue still has available slots.
+The concurrent write feature operate independently of other features.
+```rust
+let mut w = op.writer_with(path).concurrent(8).await;
+w.write(part0).await?;
+w.write(part1).await?; // `write` won't wait for part0.
+w.close().await?; // `close` will make sure all parts are finished.
+```
+# Reference-level explanation
+The S3 and similar services use `MultipartUploadWriter`, while GCS uses `RangeWriter`. We can enhance these services by adding concurrent write features to them. A `concurrent` field of type `usize` will be introduced to `OpWrite` to allow the user to set the maximum concurrent write task amount. For other services that don't support `multi_write`, setting the concurrent parameter will have no effect, maintaining the default behavior.
+This feature will be implemented in the `MultipartUploadWriter` and `RangeWriter`, which will utilize a `ConcurrentFutures<WriteTask>` as a task queue to store concurrent write tasks.
+When the upper layer invokes `poll_write`, the  `Writer` pushes write to the task queue (`ConcurrentFutures<WriteTask>`) if there are available slots, and then relinquishes control back to the upper layer. This allows for up to `concurrent` write tasks to uploaded concurrently without waiting. If the task queue is full, the `Writer` waits for the first task to yield results.
+# Drawbacks
+- More memory usage
+- More concurrent connections
+# Rationale and alternatives
+None
+# Prior art
+None
+# Unresolved questions
+None
+# Future possibilities
+- Adding `write_at` for `fs`.
+- Use `ConcurrentFutureUnordered` instead of `ConcurrentFutures.`

data/core/src/docs/rfcs/3911_deleter_api.md ADDED Viewed

@@ -0,0 +1,165 @@
+- Proposal Name: `deleter_api`
+- Start Date: 2024-01-04
+- RFC PR: [apache/opendal#3911](https://github.com/apache/opendal/pull/3911)
+- Tracking Issue: [apache/opendal#3922](https://github.com/apache/opendal/issues/3922)
+# Summary
+Introduce the `Deleter` API to enhance batch and recursive deletion capabilities.
+# Motivation
+All OpenDAL's public API follow the same design:
+- `read`: Execute a read operation.
+- `read_with`: Execute a read operation with additional options, like range and if_match.
+- `reader`: Create a reader for streaming data, enabling flexible access.
+- `reader_with`: Create a reader with advanced options.
+However, `delete` operations vary. OpenDAL offers several methods for file deletion:
+- `delete`: Delete a single file or an empty dir.
+- `remove`: Remove a list of files.
+- `remove_via`: Remove files produced by a stream.
+- `remove_all`: Remove all files under a path.
+This design is not consistent with the other APIs, and it is not easy to use.
+So I propose `Deleter` to address them all at once.
+# Guide-level explanation
+The following new API will be added to `Operator`:
+```diff
+impl Operator {
+  pub async fn delete(&self, path: &str) -> Result<()>;
++  pub fn delete_with(&self, path: &str) -> FutureDelete;
++  pub async fn deleter(&self) -> Result<Deleter>;
++  pub fn deleter_with(&self) -> FutureDeleter;
+}
+```
+- `delete` is the existing API, which deletes a single file or an empty dir.
+- `delete_with` is an extension of the existing `delete` API, which supports additional options, such as `version`.
+- `deleter` is a new API that returns a `Deleter` instance.
+- `deleter_with` is an extension of the existing `deleter` API, which supports additional options, such as `concurrent`.
+The following new options will be available for `delete_with` and `deleter_with`:
+- `concurrent`: How many delete tasks can be performed concurrently?
+- `buffer`: How many files can be buffered for send in a single batch?
+Users can delete multiple files in this way:
+```rust
+let deleter = op.deleter().await?;
+// Add a single file to the deleter.
+deleter.delete(path).await?;
+// Add a stream of files to the deleter.
+deleter.delete_all(&mut lister).await?;
+// Close deleter, make sure all input files are deleted.
+deleter.close().await?;
+```
+`Deleter` also implements [`Sink`](https://docs.rs/futures/latest/futures/sink/trait.Sink.html), so all the methods of `Sink` are available for `Deleter`. For example, users can use [`forward`](https://docs.rs/futures/latest/futures/stream/trait.StreamExt.html#method.forward) to forward a stream of files to `Deleter`:
+```rust
+// Init a deleter to start batch delete tasks.
+let deleter = op.deleter().await?;
+// List all files that ends with tmp
+let lister = op.lister(path).await?
+  .filter(|x|future::ready(x.ends_with(".tmp")));
+// Forward all paths into deleter.
+lister.forward(deleter).await?;
+// Send all from a stream into deleter.
+deleter.send_all(&mut lister).await?;
+// Close the deleter.
+deleter.close().await?;
+```
+Users can control the behavior of `Deleter` by setting the options:
+```rust
+let deleter = op.deleter_with()
+  // Allow up to 8 concurrent delete tasks, default to 1.
+  .concurrent(8)
+  // Configure the buffer size to 1000, default value provided by services.
+  .buffer(1000)
+  .await?;
+// Add a single file to the deleter.
+deleter.delete(path).await?;
+// Add a stream of files to the deleter.
+deleter.delete_all(&mut lister).await?;
+// Close deleter, make sure all input files are deleted.
+deleter.close().await?;
+```
+In response to `Deleter` API, we will remove APIs like `remove`, `remove_via` and `remove_all`.
+- `remove` and `remove_via` could be replaced by `Deleter` directly.
+- `remove_all` could be replaced by `delete_with(path).recursive(true)`.
+# Reference-level explanation
+To provide those public APIs, we will add a new associated type in `Accessor`:
+```rust
+trait Accessor {
+    ...
+    type Deleter = oio::Delete;
+    type BlockingDeleter = oio::BlockingDelete;
+}
+```
+And the `delete` API will be changed to return a `oio::Delete` instead:
+```diff
+trait Accessor {
+-  async fn delete(&self) -> Result<(RpDelete, Self::Deleter)>;
++  async fn delete(&self, args: OpDelete) -> Result<(RpDelete, Self::Deleter)>;
+}
+```
+Along with this change, we will remove the `batch` API from `Accessor`:
+```rust
+trait Accessor {
+-   async fn batch(&self, args: OpBatch) -> Result<RpBatch>;
+}
+```
+# Drawbacks
+- Big breaking changes.
+# Rationale and alternatives
+None.
+# Prior art
+None.
+# Unresolved questions
+None.
+# Future possibilities
+## Add API that accepts `IntoIterator`
+It's possible to add a new API that accepts `IntoIterator` so users can input `Vec<String>` or `Iter<String>` into `Deleter`.