opendal 0.1.6.pre.rc.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.standard.yml +20 -0
- data/.tool-versions +1 -0
- data/.yardopts +1 -0
- data/DEPENDENCIES.md +9 -0
- data/DEPENDENCIES.rust.tsv +277 -0
- data/Gemfile +35 -0
- data/README.md +159 -0
- data/Rakefile +149 -0
- data/core/CHANGELOG.md +4929 -0
- data/core/CONTRIBUTING.md +61 -0
- data/core/DEPENDENCIES.md +3 -0
- data/core/DEPENDENCIES.rust.tsv +185 -0
- data/core/LICENSE +201 -0
- data/core/README.md +228 -0
- data/core/benches/README.md +18 -0
- data/core/benches/ops/README.md +26 -0
- data/core/benches/types/README.md +9 -0
- data/core/benches/vs_fs/README.md +35 -0
- data/core/benches/vs_s3/README.md +55 -0
- data/core/edge/README.md +3 -0
- data/core/edge/file_write_on_full_disk/README.md +14 -0
- data/core/edge/s3_aws_assume_role_with_web_identity/README.md +18 -0
- data/core/edge/s3_read_on_wasm/.gitignore +3 -0
- data/core/edge/s3_read_on_wasm/README.md +42 -0
- data/core/edge/s3_read_on_wasm/webdriver.json +15 -0
- data/core/examples/README.md +23 -0
- data/core/examples/basic/README.md +15 -0
- data/core/examples/concurrent-upload/README.md +15 -0
- data/core/examples/multipart-upload/README.md +15 -0
- data/core/fuzz/.gitignore +5 -0
- data/core/fuzz/README.md +68 -0
- data/core/src/docs/comparisons/vs_object_store.md +183 -0
- data/core/src/docs/performance/concurrent_write.md +101 -0
- data/core/src/docs/performance/http_optimization.md +124 -0
- data/core/src/docs/rfcs/0000_example.md +74 -0
- data/core/src/docs/rfcs/0000_foyer_integration.md +111 -0
- data/core/src/docs/rfcs/0041_object_native_api.md +185 -0
- data/core/src/docs/rfcs/0044_error_handle.md +198 -0
- data/core/src/docs/rfcs/0057_auto_region.md +160 -0
- data/core/src/docs/rfcs/0069_object_stream.md +145 -0
- data/core/src/docs/rfcs/0090_limited_reader.md +155 -0
- data/core/src/docs/rfcs/0112_path_normalization.md +79 -0
- data/core/src/docs/rfcs/0191_async_streaming_io.md +328 -0
- data/core/src/docs/rfcs/0203_remove_credential.md +96 -0
- data/core/src/docs/rfcs/0221_create_dir.md +89 -0
- data/core/src/docs/rfcs/0247_retryable_error.md +87 -0
- data/core/src/docs/rfcs/0293_object_id.md +67 -0
- data/core/src/docs/rfcs/0337_dir_entry.md +191 -0
- data/core/src/docs/rfcs/0409_accessor_capabilities.md +67 -0
- data/core/src/docs/rfcs/0413_presign.md +154 -0
- data/core/src/docs/rfcs/0423_command_line_interface.md +268 -0
- data/core/src/docs/rfcs/0429_init_from_iter.md +107 -0
- data/core/src/docs/rfcs/0438_multipart.md +163 -0
- data/core/src/docs/rfcs/0443_gateway.md +73 -0
- data/core/src/docs/rfcs/0501_new_builder.md +111 -0
- data/core/src/docs/rfcs/0554_write_refactor.md +96 -0
- data/core/src/docs/rfcs/0561_list_metadata_reuse.md +210 -0
- data/core/src/docs/rfcs/0599_blocking_api.md +157 -0
- data/core/src/docs/rfcs/0623_redis_service.md +300 -0
- data/core/src/docs/rfcs/0627_split_capabilities.md +89 -0
- data/core/src/docs/rfcs/0661_path_in_accessor.md +126 -0
- data/core/src/docs/rfcs/0793_generic_kv_services.md +209 -0
- data/core/src/docs/rfcs/0926_object_reader.md +93 -0
- data/core/src/docs/rfcs/0977_refactor_error.md +151 -0
- data/core/src/docs/rfcs/1085_object_handler.md +73 -0
- data/core/src/docs/rfcs/1391_object_metadataer.md +110 -0
- data/core/src/docs/rfcs/1398_query_based_metadata.md +125 -0
- data/core/src/docs/rfcs/1420_object_writer.md +147 -0
- data/core/src/docs/rfcs/1477_remove_object_concept.md +159 -0
- data/core/src/docs/rfcs/1735_operation_extension.md +117 -0
- data/core/src/docs/rfcs/2083_writer_sink_api.md +106 -0
- data/core/src/docs/rfcs/2133_append_api.md +88 -0
- data/core/src/docs/rfcs/2299_chain_based_operator_api.md +99 -0
- data/core/src/docs/rfcs/2602_object_versioning.md +138 -0
- data/core/src/docs/rfcs/2758_merge_append_into_write.md +79 -0
- data/core/src/docs/rfcs/2774_lister_api.md +66 -0
- data/core/src/docs/rfcs/2779_list_with_metakey.md +143 -0
- data/core/src/docs/rfcs/2852_native_capability.md +58 -0
- data/core/src/docs/rfcs/2884_merge_range_read_into_read.md +80 -0
- data/core/src/docs/rfcs/3017_remove_write_copy_from.md +94 -0
- data/core/src/docs/rfcs/3197_config.md +237 -0
- data/core/src/docs/rfcs/3232_align_list_api.md +69 -0
- data/core/src/docs/rfcs/3243_list_prefix.md +128 -0
- data/core/src/docs/rfcs/3356_lazy_reader.md +111 -0
- data/core/src/docs/rfcs/3526_list_recursive.md +59 -0
- data/core/src/docs/rfcs/3574_concurrent_stat_in_list.md +80 -0
- data/core/src/docs/rfcs/3734_buffered_reader.md +64 -0
- data/core/src/docs/rfcs/3898_concurrent_writer.md +66 -0
- data/core/src/docs/rfcs/3911_deleter_api.md +165 -0
- data/core/src/docs/rfcs/4382_range_based_read.md +213 -0
- data/core/src/docs/rfcs/4638_executor.md +215 -0
- data/core/src/docs/rfcs/5314_remove_metakey.md +120 -0
- data/core/src/docs/rfcs/5444_operator_from_uri.md +162 -0
- data/core/src/docs/rfcs/5479_context.md +140 -0
- data/core/src/docs/rfcs/5485_conditional_reader.md +112 -0
- data/core/src/docs/rfcs/5495_list_with_deleted.md +81 -0
- data/core/src/docs/rfcs/5556_write_returns_metadata.md +121 -0
- data/core/src/docs/rfcs/5871_read_returns_metadata.md +112 -0
- data/core/src/docs/rfcs/6189_remove_native_blocking.md +106 -0
- data/core/src/docs/rfcs/6209_glob_support.md +132 -0
- data/core/src/docs/rfcs/6213_options_api.md +142 -0
- data/core/src/docs/rfcs/README.md +62 -0
- data/core/src/docs/upgrade.md +1556 -0
- data/core/src/services/aliyun_drive/docs.md +61 -0
- data/core/src/services/alluxio/docs.md +45 -0
- data/core/src/services/azblob/docs.md +77 -0
- data/core/src/services/azdls/docs.md +73 -0
- data/core/src/services/azfile/docs.md +65 -0
- data/core/src/services/b2/docs.md +54 -0
- data/core/src/services/cacache/docs.md +38 -0
- data/core/src/services/cloudflare_kv/docs.md +21 -0
- data/core/src/services/cos/docs.md +55 -0
- data/core/src/services/d1/docs.md +48 -0
- data/core/src/services/dashmap/docs.md +38 -0
- data/core/src/services/dbfs/docs.md +57 -0
- data/core/src/services/dropbox/docs.md +64 -0
- data/core/src/services/etcd/docs.md +45 -0
- data/core/src/services/foundationdb/docs.md +42 -0
- data/core/src/services/fs/docs.md +49 -0
- data/core/src/services/ftp/docs.md +42 -0
- data/core/src/services/gcs/docs.md +76 -0
- data/core/src/services/gdrive/docs.md +65 -0
- data/core/src/services/ghac/docs.md +84 -0
- data/core/src/services/github/docs.md +52 -0
- data/core/src/services/gridfs/docs.md +46 -0
- data/core/src/services/hdfs/docs.md +140 -0
- data/core/src/services/hdfs_native/docs.md +35 -0
- data/core/src/services/http/docs.md +45 -0
- data/core/src/services/huggingface/docs.md +61 -0
- data/core/src/services/ipfs/docs.md +45 -0
- data/core/src/services/ipmfs/docs.md +14 -0
- data/core/src/services/koofr/docs.md +51 -0
- data/core/src/services/lakefs/docs.md +62 -0
- data/core/src/services/memcached/docs.md +47 -0
- data/core/src/services/memory/docs.md +36 -0
- data/core/src/services/mini_moka/docs.md +19 -0
- data/core/src/services/moka/docs.md +42 -0
- data/core/src/services/mongodb/docs.md +49 -0
- data/core/src/services/monoiofs/docs.md +46 -0
- data/core/src/services/mysql/docs.md +47 -0
- data/core/src/services/obs/docs.md +54 -0
- data/core/src/services/onedrive/docs.md +115 -0
- data/core/src/services/opfs/docs.md +18 -0
- data/core/src/services/oss/docs.md +74 -0
- data/core/src/services/pcloud/docs.md +51 -0
- data/core/src/services/persy/docs.md +43 -0
- data/core/src/services/postgresql/docs.md +47 -0
- data/core/src/services/redb/docs.md +41 -0
- data/core/src/services/redis/docs.md +43 -0
- data/core/src/services/rocksdb/docs.md +54 -0
- data/core/src/services/s3/compatible_services.md +126 -0
- data/core/src/services/s3/docs.md +244 -0
- data/core/src/services/seafile/docs.md +54 -0
- data/core/src/services/sftp/docs.md +49 -0
- data/core/src/services/sled/docs.md +39 -0
- data/core/src/services/sqlite/docs.md +46 -0
- data/core/src/services/surrealdb/docs.md +54 -0
- data/core/src/services/swift/compatible_services.md +53 -0
- data/core/src/services/swift/docs.md +52 -0
- data/core/src/services/tikv/docs.md +43 -0
- data/core/src/services/upyun/docs.md +51 -0
- data/core/src/services/vercel_artifacts/docs.md +40 -0
- data/core/src/services/vercel_blob/docs.md +45 -0
- data/core/src/services/webdav/docs.md +49 -0
- data/core/src/services/webhdfs/docs.md +90 -0
- data/core/src/services/yandex_disk/docs.md +45 -0
- data/core/tests/behavior/README.md +77 -0
- data/core/tests/data/normal_dir/.gitkeep +0 -0
- data/core/tests/data/normal_file.txt +1041 -0
- data/core/tests/data/special_dir !@#$%^&()_+-=;',/.gitkeep +0 -0
- data/core/tests/data/special_file !@#$%^&()_+-=;',.txt +1041 -0
- data/core/users.md +13 -0
- data/extconf.rb +24 -0
- data/lib/opendal.rb +25 -0
- data/lib/opendal_ruby/entry.rb +35 -0
- data/lib/opendal_ruby/io.rb +70 -0
- data/lib/opendal_ruby/metadata.rb +44 -0
- data/lib/opendal_ruby/opendal_ruby.so +0 -0
- data/lib/opendal_ruby/operator.rb +29 -0
- data/lib/opendal_ruby/operator_info.rb +26 -0
- data/opendal.gemspec +91 -0
- data/test/blocking_op_test.rb +112 -0
- data/test/capability_test.rb +42 -0
- data/test/io_test.rb +172 -0
- data/test/lister_test.rb +77 -0
- data/test/metadata_test.rb +78 -0
- data/test/middlewares_test.rb +46 -0
- data/test/operator_info_test.rb +35 -0
- data/test/test_helper.rb +36 -0
- metadata +240 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
- Proposal Name: `object_native_api`
|
|
2
|
+
- Start Date: 2022-02-18
|
|
3
|
+
- RFC PR: [apache/opendal#41](https://github.com/apache/opendal/pull/41)
|
|
4
|
+
- Tracking Issue: [apache/opendal#35](https://github.com/apache/opendal/pull/35)
|
|
5
|
+
|
|
6
|
+
# Summary
|
|
7
|
+
|
|
8
|
+
Refactor API in object native way to make it easier to user.
|
|
9
|
+
|
|
10
|
+
# Motivation
|
|
11
|
+
|
|
12
|
+
`opendal` is not easy to use.
|
|
13
|
+
|
|
14
|
+
In our early adoption project `databend`, we can see a lot of code looks like:
|
|
15
|
+
|
|
16
|
+
```rust
|
|
17
|
+
let data_accessor = self.data_accessor.clone();
|
|
18
|
+
let path = self.path.clone();
|
|
19
|
+
let reader = SeekableReader::new(data_accessor, path.as_str(), stream_len);
|
|
20
|
+
let reader = BufReader::with_capacity(read_buffer_size as usize, reader);
|
|
21
|
+
Self::read_column(reader, &col_meta, data_type.clone(), arrow_type.clone()).await
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
And
|
|
25
|
+
|
|
26
|
+
```rust
|
|
27
|
+
op.stat(&path).run().await
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Conclusion
|
|
31
|
+
|
|
32
|
+
So in this proposal, I expect to address those problems. After implementing this proposal, we have a faster and easier-to-use `opendal`.
|
|
33
|
+
|
|
34
|
+
# Guide-level explanation
|
|
35
|
+
|
|
36
|
+
To operate on an object, we will use `Operator::object()` to create a new handler:
|
|
37
|
+
|
|
38
|
+
```rust
|
|
39
|
+
let o = op.object("path/to/file");
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
All operations that are available for `Object` for now includes:
|
|
43
|
+
|
|
44
|
+
- `metadata`: get object metadata (return an error if not exist).
|
|
45
|
+
- `delete`: delete an object.
|
|
46
|
+
- `reader`: create a new reader to read data from this object.
|
|
47
|
+
- `writer`: create a new writer to write data into this object.
|
|
48
|
+
|
|
49
|
+
Here is an example:
|
|
50
|
+
|
|
51
|
+
```rust
|
|
52
|
+
use anyhow::Result;
|
|
53
|
+
use futures::AsyncReadExt;
|
|
54
|
+
|
|
55
|
+
use opendal::services::fs;
|
|
56
|
+
use opendal::Operator;
|
|
57
|
+
|
|
58
|
+
#[tokio::main]
|
|
59
|
+
async fn main() -> Result<()> {
|
|
60
|
+
let op = Operator::new(fs::Backend::build().root("/tmp").finish().await?);
|
|
61
|
+
|
|
62
|
+
let o = op.object("test_file");
|
|
63
|
+
|
|
64
|
+
// Write data info file;
|
|
65
|
+
let w = o.writer();
|
|
66
|
+
let n = w
|
|
67
|
+
.write_bytes("Hello, World!".to_string().into_bytes())
|
|
68
|
+
.await?;
|
|
69
|
+
assert_eq!(n, 13);
|
|
70
|
+
|
|
71
|
+
// Read data from file;
|
|
72
|
+
let mut r = o.reader();
|
|
73
|
+
let mut buf = vec![];
|
|
74
|
+
let n = r.read_to_end(&mut buf).await?;
|
|
75
|
+
assert_eq!(n, 13);
|
|
76
|
+
assert_eq!(String::from_utf8_lossy(&buf), "Hello, World!");
|
|
77
|
+
|
|
78
|
+
// Get file's Metadata
|
|
79
|
+
let meta = o.metadata().await?;
|
|
80
|
+
assert_eq!(meta.content_length(), 13);
|
|
81
|
+
|
|
82
|
+
// Delete file.
|
|
83
|
+
o.delete().await?;
|
|
84
|
+
|
|
85
|
+
Ok(())
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
# Reference-level explanation
|
|
90
|
+
|
|
91
|
+
## Native Reader support
|
|
92
|
+
|
|
93
|
+
We will provide a `Reader` (which implement both `AsyncRead + AsyncSeek`) for user instead of just a `AsyncRead`. In this `Reader`, we will:
|
|
94
|
+
|
|
95
|
+
- Not maintain internal buffer: caller can decide to wrap into `BufReader`.
|
|
96
|
+
- Only rely on accessor's `read` and `stat` operations.
|
|
97
|
+
|
|
98
|
+
To avoid the extra cost for `stat`, we will:
|
|
99
|
+
|
|
100
|
+
- Allow user specify total_size for `Reader`.
|
|
101
|
+
- Lazily Send `stat` while the first time `SeekFrom::End()`
|
|
102
|
+
|
|
103
|
+
To avoid the extra cost for `poll_read`, we will:
|
|
104
|
+
|
|
105
|
+
- Keep the underlying `BoxedAsyncRead` open, so that we can reuse the same connection/fd.
|
|
106
|
+
|
|
107
|
+
With these change, we can improve the `Reader` performance both on local fs and remote storage:
|
|
108
|
+
|
|
109
|
+
- fs, before
|
|
110
|
+
|
|
111
|
+
```shell
|
|
112
|
+
Benchmarking fs/bench_read/64226295-b7a7-416e-94ce-666ac3ab037b:
|
|
113
|
+
time: [16.060 ms 17.109 ms 18.124 ms]
|
|
114
|
+
thrpt: [882.82 MiB/s 935.20 MiB/s 996.24 MiB/s]
|
|
115
|
+
|
|
116
|
+
Benchmarking fs/bench_buf_read/64226295-b7a7-416e-94ce-666ac3ab037b:
|
|
117
|
+
time: [14.779 ms 14.857 ms 14.938 ms]
|
|
118
|
+
thrpt: [1.0460 GiB/s 1.0517 GiB/s 1.0572 GiB/s]
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
- fs, after
|
|
122
|
+
|
|
123
|
+
```shell
|
|
124
|
+
Benchmarking fs/bench_read/df531bc7-54c8-43b6-b412-e4f7b9589876:
|
|
125
|
+
time: [14.654 ms 15.452 ms 16.273 ms]
|
|
126
|
+
thrpt: [983.20 MiB/s 1.0112 GiB/s 1.0663 GiB/s]
|
|
127
|
+
|
|
128
|
+
Benchmarking fs/bench_buf_read/df531bc7-54c8-43b6-b412-e4f7b9589876:
|
|
129
|
+
time: [5.5589 ms 5.5825 ms 5.6076 ms]
|
|
130
|
+
thrpt: [2.7864 GiB/s 2.7989 GiB/s 2.8108 GiB/s]
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
- s3, before
|
|
134
|
+
|
|
135
|
+
```shell
|
|
136
|
+
Benchmarking s3/bench_read/72025a81-a4b6-46dc-b485-8d875d23c3a5:
|
|
137
|
+
time: [4.8315 ms 4.9331 ms 5.0403 ms]
|
|
138
|
+
thrpt: [3.1000 GiB/s 3.1674 GiB/s 3.2340 GiB/s]
|
|
139
|
+
|
|
140
|
+
Benchmarking s3/bench_buf_read/72025a81-a4b6-46dc-b485-8d875d23c3a5:
|
|
141
|
+
time: [16.246 ms 16.539 ms 16.833 ms]
|
|
142
|
+
thrpt: [950.52 MiB/s 967.39 MiB/s 984.84 MiB/s]
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
- s3, after
|
|
146
|
+
|
|
147
|
+
```shell
|
|
148
|
+
Benchmarking s3/bench_read/6971c464-15f7-48d6-b69c-c8abc7774802:
|
|
149
|
+
time: [4.4222 ms 4.5685 ms 4.7181 ms]
|
|
150
|
+
thrpt: [3.3117 GiB/s 3.4202 GiB/s 3.5333 GiB/s]
|
|
151
|
+
|
|
152
|
+
Benchmarking s3/bench_buf_read/6971c464-15f7-48d6-b69c-c8abc7774802:
|
|
153
|
+
time: [5.5598 ms 5.7174 ms 5.8691 ms]
|
|
154
|
+
thrpt: [2.6622 GiB/s 2.7329 GiB/s 2.8103 GiB/s]
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Object API
|
|
158
|
+
|
|
159
|
+
Other changes are just a re-order of APIs.
|
|
160
|
+
|
|
161
|
+
- `Operator::read() -> BoxedAsyncRead` => `Object::reader() -> Reader`
|
|
162
|
+
- `Operator::write(r: BoxedAsyncRead, size: u64)` => `Object::writer() -> Writer`
|
|
163
|
+
- `Operator::stat() -> Object` => `Object::stat() -> Metadata`
|
|
164
|
+
- `Operator::delete()` => `Object::delete()`
|
|
165
|
+
|
|
166
|
+
# Drawbacks
|
|
167
|
+
|
|
168
|
+
None.
|
|
169
|
+
|
|
170
|
+
# Rationale and alternatives
|
|
171
|
+
|
|
172
|
+
None
|
|
173
|
+
|
|
174
|
+
# Prior art
|
|
175
|
+
|
|
176
|
+
None
|
|
177
|
+
|
|
178
|
+
# Unresolved questions
|
|
179
|
+
|
|
180
|
+
None
|
|
181
|
+
|
|
182
|
+
# Future possibilities
|
|
183
|
+
|
|
184
|
+
- Implement `AsyncWrite` for `Writer` so that we can use `Writer` easier.
|
|
185
|
+
- Implement `Operator::objects()` to return an object iterator.
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
- Proposal Name: `error_handle`
|
|
2
|
+
- Start Date: 2022-02-23
|
|
3
|
+
- RFC PR: [apache/opendal#44](https://github.com/apache/opendal/pull/44)
|
|
4
|
+
- Tracking Issue: [apache/opendal#43](https://github.com/apache/opendal/pull/43)
|
|
5
|
+
|
|
6
|
+
# Summary
|
|
7
|
+
|
|
8
|
+
Enhanced error handling for OpenDAL.
|
|
9
|
+
|
|
10
|
+
# Motivation
|
|
11
|
+
|
|
12
|
+
OpenDAL didn't handle errors correctly.
|
|
13
|
+
|
|
14
|
+
```rust
|
|
15
|
+
fn parse_unexpected_error<E>(_: SdkError<E>, path: &str) -> Error {
|
|
16
|
+
Error::Unexpected(path.to_string())
|
|
17
|
+
}
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Most time, we return a path that is meaningless for debugging.
|
|
21
|
+
|
|
22
|
+
There are two issues about this shortcoming:
|
|
23
|
+
|
|
24
|
+
- [error: Split ErrorKind and Context for error check easier](https://github.com/apache/opendal/issues/24)
|
|
25
|
+
- [Improvement: provides more information about the cause of DalTransportError](https://github.com/apache/opendal/issues/29)
|
|
26
|
+
|
|
27
|
+
First, we can't check `ErrorKind` quickly. We have to use `matches` for the help:
|
|
28
|
+
|
|
29
|
+
```rust
|
|
30
|
+
assert!(
|
|
31
|
+
matches!(
|
|
32
|
+
result.err().unwrap(),
|
|
33
|
+
opendal::error::Error::ObjectNotExist(_)
|
|
34
|
+
),
|
|
35
|
+
);
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Then, we didn't bring enough information for users to debug what happened inside OpenDAL.
|
|
39
|
+
|
|
40
|
+
So we must handle errors correctly, so that:
|
|
41
|
+
|
|
42
|
+
- We can check the `Kind` to know what error happened.
|
|
43
|
+
- We can read `context` to know more details.
|
|
44
|
+
- We can get the source of this error to know more details.
|
|
45
|
+
|
|
46
|
+
# Guide-level explanation
|
|
47
|
+
|
|
48
|
+
Now we are trying to get an object's metadata:
|
|
49
|
+
|
|
50
|
+
```rust
|
|
51
|
+
let meta = o.metadata().await;
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Unfortunately, the `Object` does not exist, so we can check out what happened.
|
|
55
|
+
|
|
56
|
+
```rust
|
|
57
|
+
if let Err(e) = meta {
|
|
58
|
+
if e.kind() == Kind::ObjectNotExist {
|
|
59
|
+
// Handle this error
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
It's possible that we don't care about other errors. It's OK to log it out:
|
|
65
|
+
|
|
66
|
+
```rust
|
|
67
|
+
if let Err(e) = meta {
|
|
68
|
+
if e.kind() == Kind::ObjectNotExist {
|
|
69
|
+
// Handle this error
|
|
70
|
+
} else {
|
|
71
|
+
error!("{e}");
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
For a backend implementer, we can provide as much information as possible. For example, we can return `bucket is empty` to let the user know:
|
|
77
|
+
|
|
78
|
+
```rust
|
|
79
|
+
return Err(Error::Backend {
|
|
80
|
+
kind: Kind::BackendConfigurationInvalid,
|
|
81
|
+
context: HashMap::from([("bucket".to_string(), "".to_string())]),
|
|
82
|
+
source: anyhow!("bucket is empty"),
|
|
83
|
+
});
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Or, we can return an underlying error to let users figure out:
|
|
87
|
+
|
|
88
|
+
```rust
|
|
89
|
+
Error::Object {
|
|
90
|
+
kind: Kind::Unexpected,
|
|
91
|
+
op,
|
|
92
|
+
path: path.to_string(),
|
|
93
|
+
source: anyhow::Error::from(err),
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
So our application users will get enough information now:
|
|
98
|
+
|
|
99
|
+
```shell
|
|
100
|
+
Object { kind: ObjectNotExist, op: "stat", path: "/tmp/998e4dec-c84b-4164-a7a1-1f140654934f", source: No such file or directory (os error 2) }
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# Reference-level explanation
|
|
105
|
+
|
|
106
|
+
We will split `Error` into `Error` and `Kind`.
|
|
107
|
+
|
|
108
|
+
`Kind` is an enum organized by different categories.
|
|
109
|
+
|
|
110
|
+
Every error will map to a kind, which will be in the error message.
|
|
111
|
+
|
|
112
|
+
```rust
|
|
113
|
+
pub enum Kind {
|
|
114
|
+
#[error("backend not supported")]
|
|
115
|
+
BackendNotSupported,
|
|
116
|
+
#[error("backend configuration invalid")]
|
|
117
|
+
BackendConfigurationInvalid,
|
|
118
|
+
|
|
119
|
+
#[error("object not exist")]
|
|
120
|
+
ObjectNotExist,
|
|
121
|
+
#[error("object permission denied")]
|
|
122
|
+
ObjectPermissionDenied,
|
|
123
|
+
|
|
124
|
+
#[error("unexpected")]
|
|
125
|
+
Unexpected,
|
|
126
|
+
}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
In `Error`, we will have different struct to carry different contexts:
|
|
130
|
+
|
|
131
|
+
```rust
|
|
132
|
+
pub enum Error {
|
|
133
|
+
#[error("{kind}: (context: {context:?}, source: {source})")]
|
|
134
|
+
Backend {
|
|
135
|
+
kind: Kind,
|
|
136
|
+
context: HashMap<String, String>,
|
|
137
|
+
source: anyhow::Error,
|
|
138
|
+
},
|
|
139
|
+
|
|
140
|
+
#[error("{kind}: (op: {op}, path: {path}, source: {source})")]
|
|
141
|
+
Object {
|
|
142
|
+
kind: Kind,
|
|
143
|
+
op: &'static str,
|
|
144
|
+
path: String,
|
|
145
|
+
source: anyhow::Error,
|
|
146
|
+
},
|
|
147
|
+
|
|
148
|
+
#[error("unexpected: (source: {0})")]
|
|
149
|
+
Unexpected(#[from] anyhow::Error),
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Every one of them will carry a source: `anyhow::Error` so that users can get the complete picture of this error. We have implemented `Error::kind()`, other helper functions are possible, but they are out of this RFC's scope.
|
|
154
|
+
|
|
155
|
+
```rust
|
|
156
|
+
pub fn kind(&self) -> Kind {
|
|
157
|
+
match self {
|
|
158
|
+
Error::Backend { kind, .. } => *kind,
|
|
159
|
+
Error::Object { kind, .. } => *kind,
|
|
160
|
+
Error::Unexpected(_) => Kind::Unexpected,
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
The implementer should do their best to carry as much context as possible. Such as, they should return `Error::Object` to carry the `op` and `path`, instead of just returns `Error::Unexpected(anyhow::Error::from(err))`.
|
|
166
|
+
|
|
167
|
+
```rust
|
|
168
|
+
Error::Object {
|
|
169
|
+
kind: Kind::Unexpected,
|
|
170
|
+
op,
|
|
171
|
+
path: path.to_string(),
|
|
172
|
+
source: anyhow::Error::from(err),
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
# Drawbacks
|
|
177
|
+
|
|
178
|
+
None
|
|
179
|
+
|
|
180
|
+
# Rationale and alternatives
|
|
181
|
+
|
|
182
|
+
## Why don't we implement `backtrace`?
|
|
183
|
+
|
|
184
|
+
`backtrace` is not stable yet, and `OpenDAL` must be compilable on stable Rust.
|
|
185
|
+
|
|
186
|
+
This proposal doesn't erase the possibility to add support once `backtrace` is stable.
|
|
187
|
+
|
|
188
|
+
# Prior art
|
|
189
|
+
|
|
190
|
+
None
|
|
191
|
+
|
|
192
|
+
# Unresolved questions
|
|
193
|
+
|
|
194
|
+
None
|
|
195
|
+
|
|
196
|
+
# Future possibilities
|
|
197
|
+
|
|
198
|
+
- `Backtrace` support.
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
- Proposal Name: `auto_region`
|
|
2
|
+
- Start Date: 2022-02-24
|
|
3
|
+
- RFC PR: [apache/opendal#57](https://github.com/apache/opendal/pull/57)
|
|
4
|
+
- Tracking Issue: [apache/opendal#58](https://github.com/apache/opendal/issues/58)
|
|
5
|
+
|
|
6
|
+
# Summary
|
|
7
|
+
|
|
8
|
+
Automatically detecting user's s3 region.
|
|
9
|
+
|
|
10
|
+
# Motivation
|
|
11
|
+
|
|
12
|
+
Current behavior for `region` and `endpoint` is buggy. `endpoint=https://s3.amazonaws.com` and `endpoint=""` are expected to be the same, because `endpoint=""` means take the default value `https://s3.amazonaws.com`. However, they aren't.
|
|
13
|
+
|
|
14
|
+
S3 SDK has a mechanism to construct the correct API endpoint. It works like `format!("s3.{}.amazonaws.com", region)` internally. But if we specify the endpoint to `https://s3.amazonaws.com`, SDK will take this endpoint static.
|
|
15
|
+
|
|
16
|
+
So users could meet errors like:
|
|
17
|
+
|
|
18
|
+
```shell
|
|
19
|
+
attempting to access must be addressed using the specified endpoint
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Automatically detecting the user's s3 region will help resolve this problem. Users don't need to care about the region anymore, `OpenDAL` will figure it out. Everything works regardless of whether the input is `s3.amazonaws.com` or `s3.us-east-1.amazonaws.com`.
|
|
23
|
+
|
|
24
|
+
# Guide-level explanation
|
|
25
|
+
|
|
26
|
+
`OpenDAL` will remove `region` option, and users only need to set the `endpoint` now.
|
|
27
|
+
|
|
28
|
+
Valid input including:
|
|
29
|
+
|
|
30
|
+
- `https://s3.amazonaws.com`
|
|
31
|
+
- `https://s3.us-east-1.amazonaws.com`
|
|
32
|
+
- `https://oss-ap-northeast-1.aliyuncs.com`
|
|
33
|
+
- `http://127.0.0.1:9000`
|
|
34
|
+
|
|
35
|
+
`OpenDAL` will handle the `region` internally and automatically.
|
|
36
|
+
|
|
37
|
+
# Reference-level explanation
|
|
38
|
+
|
|
39
|
+
S3 services support mechanism to indicate the correct region on itself.
|
|
40
|
+
|
|
41
|
+
Sending a `HEAD` request to `<endpoint>/<bucket>` will get a response like:
|
|
42
|
+
|
|
43
|
+
```shell
|
|
44
|
+
:) curl -I https://s3.amazonaws.com/databend-shared
|
|
45
|
+
HTTP/1.1 301 Moved Permanently
|
|
46
|
+
x-amz-bucket-region: us-east-2
|
|
47
|
+
x-amz-request-id: NPYSWK7WXJD1KQG7
|
|
48
|
+
x-amz-id-2: 3FJSJ5HACKqLbeeXBUUE3GoPL1IGDjLl6SZx/fw2MS+k0GND0UwDib5YQXE6CThiQxpYBWZjgxs=
|
|
49
|
+
Content-Type: application/xml
|
|
50
|
+
Date: Thu, 24 Feb 2022 05:15:13 GMT
|
|
51
|
+
Server: AmazonS3
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
`x-amz-bucket-region: us-east-2` will be returned, and we can use this region to construct the correct endpoint for this bucket:
|
|
55
|
+
|
|
56
|
+
```shell
|
|
57
|
+
:) curl -I https://s3.us-east-2.amazonaws.com/databend-shared
|
|
58
|
+
HTTP/1.1 403 Forbidden
|
|
59
|
+
x-amz-bucket-region: us-east-2
|
|
60
|
+
x-amz-request-id: 98CN5MYV3GQ1XMPY
|
|
61
|
+
x-amz-id-2: Tdxy36bRRP21Oip18KMQ7FG63MTeXOpXdd5/N3izFH0oalPODVaRlpCkDU3oUN0HIE24/ezX5Dc=
|
|
62
|
+
Content-Type: application/xml
|
|
63
|
+
Date: Thu, 24 Feb 2022 05:16:57 GMT
|
|
64
|
+
Server: AmazonS3
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
It also works for S3 compilable services like minio:
|
|
68
|
+
|
|
69
|
+
```shell
|
|
70
|
+
# Start minio with `MINIO_SITE_REGION` configured
|
|
71
|
+
:) MINIO_SITE_REGION=test minio server .
|
|
72
|
+
# Sending request to minio bucket
|
|
73
|
+
:) curl -I 127.0.0.1:9900/databend
|
|
74
|
+
HTTP/1.1 403 Forbidden
|
|
75
|
+
Accept-Ranges: bytes
|
|
76
|
+
Content-Length: 0
|
|
77
|
+
Content-Security-Policy: block-all-mixed-content
|
|
78
|
+
Server: MinIO
|
|
79
|
+
Strict-Transport-Security: max-age=31536000; includeSubDomains
|
|
80
|
+
Vary: Origin
|
|
81
|
+
Vary: Accept-Encoding
|
|
82
|
+
X-Amz-Bucket-Region: test
|
|
83
|
+
X-Amz-Request-Id: 16D6A12DCA57E0FA
|
|
84
|
+
X-Content-Type-Options: nosniff
|
|
85
|
+
X-Xss-Protection: 1; mode=block
|
|
86
|
+
Date: Thu, 24 Feb 2022 05:18:51 GMT
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
We can use this mechanism to detect `region` automatically. The algorithm works as follows:
|
|
90
|
+
|
|
91
|
+
- If `endpoint` is empty, fill it will `https://s3.amazonaws.com` and the corresponding template: `https://s3.{region}.amazonaws.com`.
|
|
92
|
+
- Sending a `HEAD` request to `<endpoint>/<bucket>`.
|
|
93
|
+
- If got `200` or `403` response, the endpoint works.
|
|
94
|
+
- Use this endpoint directly without filling the template.
|
|
95
|
+
- Take the header `x-amz-bucket-region` as the region to fill the endpoint.
|
|
96
|
+
- Use the fallback value `us-east-1` to make SDK happy if the header not exists.
|
|
97
|
+
- If got a `301` response, the endpoint needs construction.
|
|
98
|
+
- Take the header `x-amz-bucket-region` as the region to fill the endpoint.
|
|
99
|
+
- Return an error to the user if not exist.
|
|
100
|
+
- If got `404`, the bucket could not exist, or the endpoint is incorrect.
|
|
101
|
+
- Return an error to the user.
|
|
102
|
+
|
|
103
|
+
# Drawbacks
|
|
104
|
+
|
|
105
|
+
None.
|
|
106
|
+
|
|
107
|
+
# Rationale and alternatives
|
|
108
|
+
|
|
109
|
+
## Use virtual style `<bucket>.<endpoint>`?
|
|
110
|
+
|
|
111
|
+
The virtual style works too. But not all services support this kind of API endpoint. For example, using `http://testbucket.127.0.0.1` is wrong, and we need to do extra checks.
|
|
112
|
+
|
|
113
|
+
Using `<endpoint>/<bucket>` makes everything easier.
|
|
114
|
+
|
|
115
|
+
## Use `ListBuckets` API?
|
|
116
|
+
|
|
117
|
+
`ListBuckets` requires higher permission than normal bucket read and write operations. It's better to finish the job without requesting more permission.
|
|
118
|
+
|
|
119
|
+
## Misbehavior S3 Compilable Services
|
|
120
|
+
|
|
121
|
+
Many services didn't implement S3 API correctly.
|
|
122
|
+
|
|
123
|
+
Aliyun OSS will return `404` for every bucket:
|
|
124
|
+
|
|
125
|
+
```shell
|
|
126
|
+
:) curl -I https://aliyuncs.com/<my-existing-bucket>
|
|
127
|
+
HTTP/2 404
|
|
128
|
+
date: Thu, 24 Feb 2022 05:32:57 GMT
|
|
129
|
+
content-type: text/html
|
|
130
|
+
content-length: 690
|
|
131
|
+
ufe-result: A6
|
|
132
|
+
set-cookie: thw=cn; Path=/; Domain=.taobao.com; Expires=Fri, 24-Feb-23 05:32:57 GMT;
|
|
133
|
+
server: Tengine/Aserver
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
QingStor Object Storage will return `307` with the `Location` header:
|
|
137
|
+
|
|
138
|
+
```shell
|
|
139
|
+
:) curl -I https://s3.qingstor.com/community
|
|
140
|
+
HTTP/1.1 301 Moved Permanently
|
|
141
|
+
Server: nginx/1.13.6
|
|
142
|
+
Date: Thu, 24 Feb 2022 05:33:55 GMT
|
|
143
|
+
Connection: keep-alive
|
|
144
|
+
Location: https://pek3a.s3.qingstor.com/community
|
|
145
|
+
X-Qs-Request-Id: 05b83b615c801a3d
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
In this proposal, we will not figure them out. It's easier for the user to fill the correct endpoint instead of automatically detecting them.
|
|
149
|
+
|
|
150
|
+
# Prior art
|
|
151
|
+
|
|
152
|
+
None
|
|
153
|
+
|
|
154
|
+
# Unresolved questions
|
|
155
|
+
|
|
156
|
+
None
|
|
157
|
+
|
|
158
|
+
# Future possibilities
|
|
159
|
+
|
|
160
|
+
None
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
- Proposal Name: `object_stream`
|
|
2
|
+
- Start Date: 2022-02-25
|
|
3
|
+
- RFC PR: [apache/opendal#69](https://github.com/apache/opendal/pull/69)
|
|
4
|
+
- Tracking Issue: [apache/opendal#69](https://github.com/apache/opendal/issues/69)
|
|
5
|
+
|
|
6
|
+
# Summary
|
|
7
|
+
|
|
8
|
+
Allow user to read dir via `ObjectStream`.
|
|
9
|
+
|
|
10
|
+
# Motivation
|
|
11
|
+
|
|
12
|
+
Users need `readdir` support in `OpenDAL`: [Implement List support](https://github.com/apache/opendal/issues/12). Take [databend] for example, with `List` support, we can implement copy from `s3://bucket/path/to/dir` instead of only `s3://bucket/path/to/file`.
|
|
13
|
+
|
|
14
|
+
# Guide-level explanation
|
|
15
|
+
|
|
16
|
+
`Operator` supports new action called `objects("path/to/dir")` which returns a `ObjectStream`, we can iterator current dir like `std::fs::ReadDir`:
|
|
17
|
+
|
|
18
|
+
```rust
|
|
19
|
+
let mut obs = op.objects("").map(|o| o.expect("list object"));
|
|
20
|
+
while let Some(o) = obs.next().await {
|
|
21
|
+
// Do something upon `Object`.
|
|
22
|
+
}
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
To better support different file modes, there is a new object meta called `ObjectMode`:
|
|
26
|
+
|
|
27
|
+
```rust
|
|
28
|
+
let meta = o.metadata().await?;
|
|
29
|
+
let mode = meta.mode();
|
|
30
|
+
if mode.contains(ObjectMode::FILE) {
|
|
31
|
+
// Do something on a file object.
|
|
32
|
+
} else if mode.contains(ObjectMode::DIR) {
|
|
33
|
+
// Do something on a dir object.
|
|
34
|
+
}
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
We will try to cache some object metadata so that users can reduce `stat` calls:
|
|
38
|
+
|
|
39
|
+
```rust
|
|
40
|
+
let meta = o.metadata_cached().await?;
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
`o.metadata_cached()` will return local cached metadata if available.
|
|
44
|
+
|
|
45
|
+
# Reference-level explanation
|
|
46
|
+
|
|
47
|
+
First, we will add a new API in `Accessor`:
|
|
48
|
+
|
|
49
|
+
```rust
|
|
50
|
+
pub type BoxedObjectStream = Box<dyn futures::Stream<Item = Result<Object>> + Unpin + Send>;
|
|
51
|
+
|
|
52
|
+
async fn list(&self, args: &OpList) -> Result<BoxedObjectStream> {
|
|
53
|
+
let _ = args;
|
|
54
|
+
unimplemented!()
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
To support options in the future, we will wrap this call via `ObjectStream`:
|
|
59
|
+
|
|
60
|
+
```rust
|
|
61
|
+
pub struct ObjectStream {
|
|
62
|
+
acc: Arc<dyn Accessor>,
|
|
63
|
+
path: String,
|
|
64
|
+
|
|
65
|
+
state: State,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
enum State {
|
|
69
|
+
Idle,
|
|
70
|
+
Sending(BoxFuture<'static, Result<BoxedObjectStream>>),
|
|
71
|
+
Listing(BoxedObjectStream),
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
So the public API to end-users will be:
|
|
76
|
+
|
|
77
|
+
```rust
|
|
78
|
+
impl Operator {
|
|
79
|
+
pub fn objects(&self, path: &str) -> ObjectStream {
|
|
80
|
+
ObjectStream::new(self.inner(), path)
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
For cached metadata support, we will add a flag in `Metadata`:
|
|
86
|
+
|
|
87
|
+
```rust
|
|
88
|
+
#[derive(Debug, Clone, Default)]
|
|
89
|
+
pub struct Metadata {
|
|
90
|
+
complete: bool,
|
|
91
|
+
|
|
92
|
+
path: String,
|
|
93
|
+
mode: Option<ObjectMode>,
|
|
94
|
+
|
|
95
|
+
content_length: Option<u64>,
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
And add new API `Objbct::metadata_cached()`:
|
|
100
|
+
|
|
101
|
+
```rust
|
|
102
|
+
pub async fn metadata_cached(&mut self) -> Result<&Metadata> {
|
|
103
|
+
if self.meta.complete() {
|
|
104
|
+
return Ok(&self.meta);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
let op = &OpStat::new(self.meta.path());
|
|
108
|
+
self.meta = self.acc.stat(op).await?;
|
|
109
|
+
|
|
110
|
+
Ok(&self.meta)
|
|
111
|
+
}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
The backend implementer must make sure `complete` is correctly set.
|
|
115
|
+
|
|
116
|
+
`Metadata` will be immutable outsides, so all `set_xxx` APIs will be set to crate public only:
|
|
117
|
+
|
|
118
|
+
```rust
|
|
119
|
+
pub(crate) fn set_content_length(&mut self, content_length: u64) -> &mut Self {
|
|
120
|
+
self.content_length = Some(content_length);
|
|
121
|
+
self
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
# Drawbacks
|
|
126
|
+
|
|
127
|
+
None
|
|
128
|
+
|
|
129
|
+
# Rationale and alternatives
|
|
130
|
+
|
|
131
|
+
None
|
|
132
|
+
|
|
133
|
+
# Prior art
|
|
134
|
+
|
|
135
|
+
None
|
|
136
|
+
|
|
137
|
+
# Unresolved questions
|
|
138
|
+
|
|
139
|
+
None
|
|
140
|
+
|
|
141
|
+
# Future possibilities
|
|
142
|
+
|
|
143
|
+
- More precise field-level metadata cache so that user can send `stat` only when needed.
|
|
144
|
+
|
|
145
|
+
[databend]: https://github.com/datafuselabs/databend
|