opendal 0.1.6.pre.rc.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.standard.yml +20 -0
- data/.tool-versions +1 -0
- data/.yardopts +1 -0
- data/DEPENDENCIES.md +9 -0
- data/DEPENDENCIES.rust.tsv +277 -0
- data/Gemfile +35 -0
- data/README.md +159 -0
- data/Rakefile +149 -0
- data/core/CHANGELOG.md +4929 -0
- data/core/CONTRIBUTING.md +61 -0
- data/core/DEPENDENCIES.md +3 -0
- data/core/DEPENDENCIES.rust.tsv +185 -0
- data/core/LICENSE +201 -0
- data/core/README.md +228 -0
- data/core/benches/README.md +18 -0
- data/core/benches/ops/README.md +26 -0
- data/core/benches/types/README.md +9 -0
- data/core/benches/vs_fs/README.md +35 -0
- data/core/benches/vs_s3/README.md +55 -0
- data/core/edge/README.md +3 -0
- data/core/edge/file_write_on_full_disk/README.md +14 -0
- data/core/edge/s3_aws_assume_role_with_web_identity/README.md +18 -0
- data/core/edge/s3_read_on_wasm/.gitignore +3 -0
- data/core/edge/s3_read_on_wasm/README.md +42 -0
- data/core/edge/s3_read_on_wasm/webdriver.json +15 -0
- data/core/examples/README.md +23 -0
- data/core/examples/basic/README.md +15 -0
- data/core/examples/concurrent-upload/README.md +15 -0
- data/core/examples/multipart-upload/README.md +15 -0
- data/core/fuzz/.gitignore +5 -0
- data/core/fuzz/README.md +68 -0
- data/core/src/docs/comparisons/vs_object_store.md +183 -0
- data/core/src/docs/performance/concurrent_write.md +101 -0
- data/core/src/docs/performance/http_optimization.md +124 -0
- data/core/src/docs/rfcs/0000_example.md +74 -0
- data/core/src/docs/rfcs/0000_foyer_integration.md +111 -0
- data/core/src/docs/rfcs/0041_object_native_api.md +185 -0
- data/core/src/docs/rfcs/0044_error_handle.md +198 -0
- data/core/src/docs/rfcs/0057_auto_region.md +160 -0
- data/core/src/docs/rfcs/0069_object_stream.md +145 -0
- data/core/src/docs/rfcs/0090_limited_reader.md +155 -0
- data/core/src/docs/rfcs/0112_path_normalization.md +79 -0
- data/core/src/docs/rfcs/0191_async_streaming_io.md +328 -0
- data/core/src/docs/rfcs/0203_remove_credential.md +96 -0
- data/core/src/docs/rfcs/0221_create_dir.md +89 -0
- data/core/src/docs/rfcs/0247_retryable_error.md +87 -0
- data/core/src/docs/rfcs/0293_object_id.md +67 -0
- data/core/src/docs/rfcs/0337_dir_entry.md +191 -0
- data/core/src/docs/rfcs/0409_accessor_capabilities.md +67 -0
- data/core/src/docs/rfcs/0413_presign.md +154 -0
- data/core/src/docs/rfcs/0423_command_line_interface.md +268 -0
- data/core/src/docs/rfcs/0429_init_from_iter.md +107 -0
- data/core/src/docs/rfcs/0438_multipart.md +163 -0
- data/core/src/docs/rfcs/0443_gateway.md +73 -0
- data/core/src/docs/rfcs/0501_new_builder.md +111 -0
- data/core/src/docs/rfcs/0554_write_refactor.md +96 -0
- data/core/src/docs/rfcs/0561_list_metadata_reuse.md +210 -0
- data/core/src/docs/rfcs/0599_blocking_api.md +157 -0
- data/core/src/docs/rfcs/0623_redis_service.md +300 -0
- data/core/src/docs/rfcs/0627_split_capabilities.md +89 -0
- data/core/src/docs/rfcs/0661_path_in_accessor.md +126 -0
- data/core/src/docs/rfcs/0793_generic_kv_services.md +209 -0
- data/core/src/docs/rfcs/0926_object_reader.md +93 -0
- data/core/src/docs/rfcs/0977_refactor_error.md +151 -0
- data/core/src/docs/rfcs/1085_object_handler.md +73 -0
- data/core/src/docs/rfcs/1391_object_metadataer.md +110 -0
- data/core/src/docs/rfcs/1398_query_based_metadata.md +125 -0
- data/core/src/docs/rfcs/1420_object_writer.md +147 -0
- data/core/src/docs/rfcs/1477_remove_object_concept.md +159 -0
- data/core/src/docs/rfcs/1735_operation_extension.md +117 -0
- data/core/src/docs/rfcs/2083_writer_sink_api.md +106 -0
- data/core/src/docs/rfcs/2133_append_api.md +88 -0
- data/core/src/docs/rfcs/2299_chain_based_operator_api.md +99 -0
- data/core/src/docs/rfcs/2602_object_versioning.md +138 -0
- data/core/src/docs/rfcs/2758_merge_append_into_write.md +79 -0
- data/core/src/docs/rfcs/2774_lister_api.md +66 -0
- data/core/src/docs/rfcs/2779_list_with_metakey.md +143 -0
- data/core/src/docs/rfcs/2852_native_capability.md +58 -0
- data/core/src/docs/rfcs/2884_merge_range_read_into_read.md +80 -0
- data/core/src/docs/rfcs/3017_remove_write_copy_from.md +94 -0
- data/core/src/docs/rfcs/3197_config.md +237 -0
- data/core/src/docs/rfcs/3232_align_list_api.md +69 -0
- data/core/src/docs/rfcs/3243_list_prefix.md +128 -0
- data/core/src/docs/rfcs/3356_lazy_reader.md +111 -0
- data/core/src/docs/rfcs/3526_list_recursive.md +59 -0
- data/core/src/docs/rfcs/3574_concurrent_stat_in_list.md +80 -0
- data/core/src/docs/rfcs/3734_buffered_reader.md +64 -0
- data/core/src/docs/rfcs/3898_concurrent_writer.md +66 -0
- data/core/src/docs/rfcs/3911_deleter_api.md +165 -0
- data/core/src/docs/rfcs/4382_range_based_read.md +213 -0
- data/core/src/docs/rfcs/4638_executor.md +215 -0
- data/core/src/docs/rfcs/5314_remove_metakey.md +120 -0
- data/core/src/docs/rfcs/5444_operator_from_uri.md +162 -0
- data/core/src/docs/rfcs/5479_context.md +140 -0
- data/core/src/docs/rfcs/5485_conditional_reader.md +112 -0
- data/core/src/docs/rfcs/5495_list_with_deleted.md +81 -0
- data/core/src/docs/rfcs/5556_write_returns_metadata.md +121 -0
- data/core/src/docs/rfcs/5871_read_returns_metadata.md +112 -0
- data/core/src/docs/rfcs/6189_remove_native_blocking.md +106 -0
- data/core/src/docs/rfcs/6209_glob_support.md +132 -0
- data/core/src/docs/rfcs/6213_options_api.md +142 -0
- data/core/src/docs/rfcs/README.md +62 -0
- data/core/src/docs/upgrade.md +1556 -0
- data/core/src/services/aliyun_drive/docs.md +61 -0
- data/core/src/services/alluxio/docs.md +45 -0
- data/core/src/services/azblob/docs.md +77 -0
- data/core/src/services/azdls/docs.md +73 -0
- data/core/src/services/azfile/docs.md +65 -0
- data/core/src/services/b2/docs.md +54 -0
- data/core/src/services/cacache/docs.md +38 -0
- data/core/src/services/cloudflare_kv/docs.md +21 -0
- data/core/src/services/cos/docs.md +55 -0
- data/core/src/services/d1/docs.md +48 -0
- data/core/src/services/dashmap/docs.md +38 -0
- data/core/src/services/dbfs/docs.md +57 -0
- data/core/src/services/dropbox/docs.md +64 -0
- data/core/src/services/etcd/docs.md +45 -0
- data/core/src/services/foundationdb/docs.md +42 -0
- data/core/src/services/fs/docs.md +49 -0
- data/core/src/services/ftp/docs.md +42 -0
- data/core/src/services/gcs/docs.md +76 -0
- data/core/src/services/gdrive/docs.md +65 -0
- data/core/src/services/ghac/docs.md +84 -0
- data/core/src/services/github/docs.md +52 -0
- data/core/src/services/gridfs/docs.md +46 -0
- data/core/src/services/hdfs/docs.md +140 -0
- data/core/src/services/hdfs_native/docs.md +35 -0
- data/core/src/services/http/docs.md +45 -0
- data/core/src/services/huggingface/docs.md +61 -0
- data/core/src/services/ipfs/docs.md +45 -0
- data/core/src/services/ipmfs/docs.md +14 -0
- data/core/src/services/koofr/docs.md +51 -0
- data/core/src/services/lakefs/docs.md +62 -0
- data/core/src/services/memcached/docs.md +47 -0
- data/core/src/services/memory/docs.md +36 -0
- data/core/src/services/mini_moka/docs.md +19 -0
- data/core/src/services/moka/docs.md +42 -0
- data/core/src/services/mongodb/docs.md +49 -0
- data/core/src/services/monoiofs/docs.md +46 -0
- data/core/src/services/mysql/docs.md +47 -0
- data/core/src/services/obs/docs.md +54 -0
- data/core/src/services/onedrive/docs.md +115 -0
- data/core/src/services/opfs/docs.md +18 -0
- data/core/src/services/oss/docs.md +74 -0
- data/core/src/services/pcloud/docs.md +51 -0
- data/core/src/services/persy/docs.md +43 -0
- data/core/src/services/postgresql/docs.md +47 -0
- data/core/src/services/redb/docs.md +41 -0
- data/core/src/services/redis/docs.md +43 -0
- data/core/src/services/rocksdb/docs.md +54 -0
- data/core/src/services/s3/compatible_services.md +126 -0
- data/core/src/services/s3/docs.md +244 -0
- data/core/src/services/seafile/docs.md +54 -0
- data/core/src/services/sftp/docs.md +49 -0
- data/core/src/services/sled/docs.md +39 -0
- data/core/src/services/sqlite/docs.md +46 -0
- data/core/src/services/surrealdb/docs.md +54 -0
- data/core/src/services/swift/compatible_services.md +53 -0
- data/core/src/services/swift/docs.md +52 -0
- data/core/src/services/tikv/docs.md +43 -0
- data/core/src/services/upyun/docs.md +51 -0
- data/core/src/services/vercel_artifacts/docs.md +40 -0
- data/core/src/services/vercel_blob/docs.md +45 -0
- data/core/src/services/webdav/docs.md +49 -0
- data/core/src/services/webhdfs/docs.md +90 -0
- data/core/src/services/yandex_disk/docs.md +45 -0
- data/core/tests/behavior/README.md +77 -0
- data/core/tests/data/normal_dir/.gitkeep +0 -0
- data/core/tests/data/normal_file.txt +1041 -0
- data/core/tests/data/special_dir !@#$%^&()_+-=;',/.gitkeep +0 -0
- data/core/tests/data/special_file !@#$%^&()_+-=;',.txt +1041 -0
- data/core/users.md +13 -0
- data/extconf.rb +24 -0
- data/lib/opendal.rb +25 -0
- data/lib/opendal_ruby/entry.rb +35 -0
- data/lib/opendal_ruby/io.rb +70 -0
- data/lib/opendal_ruby/metadata.rb +44 -0
- data/lib/opendal_ruby/opendal_ruby.so +0 -0
- data/lib/opendal_ruby/operator.rb +29 -0
- data/lib/opendal_ruby/operator_info.rb +26 -0
- data/opendal.gemspec +91 -0
- data/test/blocking_op_test.rb +112 -0
- data/test/capability_test.rb +42 -0
- data/test/io_test.rb +172 -0
- data/test/lister_test.rb +77 -0
- data/test/metadata_test.rb +78 -0
- data/test/middlewares_test.rb +46 -0
- data/test/operator_info_test.rb +35 -0
- data/test/test_helper.rb +36 -0
- metadata +240 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
- Proposal Name: `generic-kv-services`
|
|
2
|
+
- Start Date: 2022-10-03
|
|
3
|
+
- RFC PR: [apache/opendal#793](https://github.com/apache/opendal/pull/793)
|
|
4
|
+
- Tracking Issue: [apache/opendal#794](https://github.com/apache/opendal/issues/794)
|
|
5
|
+
|
|
6
|
+
# Summary
|
|
7
|
+
|
|
8
|
+
Add generic kv services support OpenDAL.
|
|
9
|
+
|
|
10
|
+
# Motivation
|
|
11
|
+
|
|
12
|
+
OpenDAL now has some kv services support:
|
|
13
|
+
|
|
14
|
+
- memory
|
|
15
|
+
- redis
|
|
16
|
+
|
|
17
|
+
However, maintaining them is complex and very easy to be wrong. We don't want to implement similar logic for every kv
|
|
18
|
+
service. This RFC intends to introduce a generic kv service so that we can:
|
|
19
|
+
|
|
20
|
+
- Implement OpenDAL Accessor on this generic kv service
|
|
21
|
+
- Add new kv service support via generic kv API.
|
|
22
|
+
|
|
23
|
+
# Guide-level explanation
|
|
24
|
+
|
|
25
|
+
No user-side changes.
|
|
26
|
+
|
|
27
|
+
# Reference-level explanation
|
|
28
|
+
|
|
29
|
+
OpenDAL will introduce a generic kv service:
|
|
30
|
+
|
|
31
|
+
```rust
|
|
32
|
+
trait KeyValueAccessor {
|
|
33
|
+
async fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>>;
|
|
34
|
+
async fn set(&self, key: &[u8], value: &[u8]) -> Result<()>;
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
We will implement the OpenDAL service on `KeyValueAccessor`. To add new kv service support, users only need to implement
|
|
39
|
+
it against `KeyValueAccessor`.
|
|
40
|
+
|
|
41
|
+
## Spec
|
|
42
|
+
|
|
43
|
+
This RFC is mainly inspired
|
|
44
|
+
by [TiFS: FUSE based on TiKV](https://github.com/Hexilee/tifs/blob/main/contribution/design.md). We will use the
|
|
45
|
+
same `ScopedKey` idea in `TiFS`.
|
|
46
|
+
|
|
47
|
+
```rust
|
|
48
|
+
pub enum ScopedKey {
|
|
49
|
+
Meta,
|
|
50
|
+
Inode(u64),
|
|
51
|
+
Block {
|
|
52
|
+
ino: u64,
|
|
53
|
+
block: u64,
|
|
54
|
+
},
|
|
55
|
+
Entry {
|
|
56
|
+
parent: u64,
|
|
57
|
+
name: String,
|
|
58
|
+
},
|
|
59
|
+
}
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
We can encode a scoped key into a byte array as a key. Following is the common layout of an encoded key.
|
|
63
|
+
|
|
64
|
+
```text
|
|
65
|
+
+ 1byte +<----------------------------+ dynamic size +------------------------------------>+
|
|
66
|
+
| | |
|
|
67
|
+
| | |
|
|
68
|
+
| | |
|
|
69
|
+
| | |
|
|
70
|
+
| | |
|
|
71
|
+
| | |
|
|
72
|
+
| v v
|
|
73
|
+
+------------------------------------------------------------------------------------------+
|
|
74
|
+
| | |
|
|
75
|
+
| scope | body |
|
|
76
|
+
| | |
|
|
77
|
+
+-------+----------------------------------------------------------------------------------+
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Meta
|
|
81
|
+
|
|
82
|
+
There is only one key in the meta scope. The meta key is designed to store metadata of our filesystem. Following is the
|
|
83
|
+
layout of an encoded meta key.
|
|
84
|
+
|
|
85
|
+
```text
|
|
86
|
+
+ 1byte +
|
|
87
|
+
| |
|
|
88
|
+
| |
|
|
89
|
+
| |
|
|
90
|
+
| |
|
|
91
|
+
| |
|
|
92
|
+
| |
|
|
93
|
+
| v
|
|
94
|
+
+-------+
|
|
95
|
+
| |
|
|
96
|
+
| 0 |
|
|
97
|
+
| |
|
|
98
|
+
+-------+
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
This key will store data:
|
|
102
|
+
|
|
103
|
+
```rust
|
|
104
|
+
pub struct Meta {
|
|
105
|
+
inode_next: u64,
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
The meta-structure contains only an auto-increasing counter `inode_next`, designed to generate an inode number.
|
|
110
|
+
|
|
111
|
+
### Inode
|
|
112
|
+
|
|
113
|
+
Keys in the inode scope are designed to store attributes of files. Following is the layout of an encoded inode key.
|
|
114
|
+
|
|
115
|
+
```text
|
|
116
|
+
+ 1byte +<-------------------------------+ 8bytes +--------------------------------------->+
|
|
117
|
+
| | |
|
|
118
|
+
| | |
|
|
119
|
+
| | |
|
|
120
|
+
| | |
|
|
121
|
+
| | |
|
|
122
|
+
| | |
|
|
123
|
+
| v v
|
|
124
|
+
+------------------------------------------------------------------------------------------+
|
|
125
|
+
| | |
|
|
126
|
+
| 1 | inode number |
|
|
127
|
+
| | |
|
|
128
|
+
+-------+----------------------------------------------------------------------------------+
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
This key will store data:
|
|
132
|
+
|
|
133
|
+
```rust
|
|
134
|
+
pub struct Inode {
|
|
135
|
+
meta: Metadata,
|
|
136
|
+
blocks: HashMap<u64, u32>,
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
blocks is the map from `block_id` -> `size`. We will use this map to calculate the correct blocks to read.
|
|
141
|
+
|
|
142
|
+
### Block
|
|
143
|
+
|
|
144
|
+
Keys in the block scope are designed to store blocks of a file. Following is the layout of an encoded block key.
|
|
145
|
+
|
|
146
|
+
```text
|
|
147
|
+
+ 1byte +<----------------- 8bytes ---------------->+<------------------- 8bytes ----------------->+
|
|
148
|
+
| | | |
|
|
149
|
+
| | | |
|
|
150
|
+
| | | |
|
|
151
|
+
| | | |
|
|
152
|
+
| | | |
|
|
153
|
+
| | | |
|
|
154
|
+
| v v v
|
|
155
|
+
+--------------------------------------------------------------------------------------------------+
|
|
156
|
+
| | | |
|
|
157
|
+
| 2 | inode number | block index |
|
|
158
|
+
| | | |
|
|
159
|
+
+-------+-------------------------------------------+----------------------------------------------+
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Entry
|
|
163
|
+
|
|
164
|
+
Keys in the file index scope are designed to store the entry of the file. Following is the layout of an encoded file
|
|
165
|
+
entry key.
|
|
166
|
+
|
|
167
|
+
```text
|
|
168
|
+
+ 1byte +<----------------- 8bytes ---------------->+<-------------- dynamic size ---------------->+
|
|
169
|
+
| | | |
|
|
170
|
+
| | | |
|
|
171
|
+
| | | |
|
|
172
|
+
| | | |
|
|
173
|
+
| | | |
|
|
174
|
+
| | | |
|
|
175
|
+
| v v v
|
|
176
|
+
+--------------------------------------------------------------------------------------------------+
|
|
177
|
+
| | | |
|
|
178
|
+
| 3 | inode number of parent directory | file name in utf-8 encoding |
|
|
179
|
+
| | | |
|
|
180
|
+
+-------+-------------------------------------------+----------------------------------------------+
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Store the correct inode number for this file.
|
|
184
|
+
|
|
185
|
+
```rust
|
|
186
|
+
pub struct Index {
|
|
187
|
+
pub ino: u64,
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
# Drawbacks
|
|
192
|
+
|
|
193
|
+
None.
|
|
194
|
+
|
|
195
|
+
# Rationale and alternatives
|
|
196
|
+
|
|
197
|
+
None.
|
|
198
|
+
|
|
199
|
+
# Prior art
|
|
200
|
+
|
|
201
|
+
None.
|
|
202
|
+
|
|
203
|
+
# Unresolved questions
|
|
204
|
+
|
|
205
|
+
None.
|
|
206
|
+
|
|
207
|
+
# Future possibilities
|
|
208
|
+
|
|
209
|
+
None.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
- Proposal Name: `object_reader`
|
|
2
|
+
- Start Date: 2022-11-13
|
|
3
|
+
- RFC PR: [apache/opendal#926](https://github.com/apache/opendal/pull/926)
|
|
4
|
+
- Tracking Issue: [apache/opendal#927](https://github.com/apache/opendal/issues/927)
|
|
5
|
+
|
|
6
|
+
# Summary
|
|
7
|
+
|
|
8
|
+
Returning reading related object meta in the reader.
|
|
9
|
+
|
|
10
|
+
# Motivation
|
|
11
|
+
|
|
12
|
+
Some services like s3 could return object meta while issuing reading requests.
|
|
13
|
+
|
|
14
|
+
In `GetObject`, we could get:
|
|
15
|
+
|
|
16
|
+
- Last-Modified
|
|
17
|
+
- Content-Length
|
|
18
|
+
- ETag
|
|
19
|
+
- Content-Range
|
|
20
|
+
- Content-Type
|
|
21
|
+
- Expires
|
|
22
|
+
|
|
23
|
+
We can avoid extra `HeadObject` calls by reusing that meta wisely, which could take 50ms. For example, `Content-Range` returns the content range of this read in the whole object: `<unit> <range-start>-<range-end>/<size>`. By using the content range, we can avoid `HeadObject` to get this object's total size, which means a lot for the content cache.
|
|
24
|
+
|
|
25
|
+
# Guide-level explanation
|
|
26
|
+
|
|
27
|
+
`reader` and all its related API will return `ObjectReader` instead:
|
|
28
|
+
|
|
29
|
+
```diff
|
|
30
|
+
- pub async fn reader(&self) -> Result<impl BytesRead> {}
|
|
31
|
+
+ pub async fn reader(&self) -> Result<ObjectReader> {}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
`ObjectReader` impls `BytesRead` too, so existing code will keep working. And `ObjectReader` will provide similar APIs to `Entry`, for example:
|
|
35
|
+
|
|
36
|
+
```rust
|
|
37
|
+
pub async fn content_length(&self) -> Option<u64> {}
|
|
38
|
+
pub async fn last_modified(&self) -> Option<OffsetDateTime> {}
|
|
39
|
+
pub async fn etag(&self) -> Option<String> {}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Note:
|
|
43
|
+
|
|
44
|
+
- All fields are optional, as services like fs could not return them.
|
|
45
|
+
- `content_length` here is this read request's length, not the object's length.
|
|
46
|
+
|
|
47
|
+
# Reference-level explanation
|
|
48
|
+
|
|
49
|
+
We will change the API signature of `Accessor`:
|
|
50
|
+
|
|
51
|
+
```diff
|
|
52
|
+
- async fn read(&self, path: &str, args: OpRead) -> Result<BytesReader> {}
|
|
53
|
+
+ async fn read(&self, path: &str, args: OpRead) -> Result<ObjectReader> {}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
`ObjectReader` is a wrapper of `BytesReader` and `ObjectMeta`:
|
|
57
|
+
|
|
58
|
+
```rust
|
|
59
|
+
pub struct ObjectReader {
|
|
60
|
+
inner: BytesReader
|
|
61
|
+
meta: ObjectMetadata,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
impl ObjectReader {
|
|
65
|
+
pub async fn content_length(&self) -> Option<u64> {}
|
|
66
|
+
pub async fn last_modified(&self) -> Option<OffsetDateTime> {}
|
|
67
|
+
pub async fn etag(&self) -> Option<String> {}
|
|
68
|
+
}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Services can decide whether or not to fill them.
|
|
72
|
+
|
|
73
|
+
# Drawbacks
|
|
74
|
+
|
|
75
|
+
None.
|
|
76
|
+
|
|
77
|
+
# Rationale and alternatives
|
|
78
|
+
|
|
79
|
+
None.
|
|
80
|
+
|
|
81
|
+
# Prior art
|
|
82
|
+
|
|
83
|
+
None.
|
|
84
|
+
|
|
85
|
+
# Unresolved questions
|
|
86
|
+
|
|
87
|
+
None.
|
|
88
|
+
|
|
89
|
+
# Future possibilities
|
|
90
|
+
|
|
91
|
+
## Add content-range support
|
|
92
|
+
|
|
93
|
+
We can add `content-range` in `ObjectMeta` so that users can fetch and use them.
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
- Proposal Name: `refactor-error`
|
|
2
|
+
- Start Date: 2022-11-21
|
|
3
|
+
- RFC PR: [apache/opendal#977](https://github.com/apache/opendal/pull/977)
|
|
4
|
+
- Tracking Issue: [apache/opendal#976](https://github.com/apache/opendal/pull/976)
|
|
5
|
+
|
|
6
|
+
# Summary
|
|
7
|
+
|
|
8
|
+
Use a separate error instead of `std::io::Error`.
|
|
9
|
+
|
|
10
|
+
# Motivation
|
|
11
|
+
|
|
12
|
+
OpenDAL is used to use `std::io::Error` for all functions. This design is natural and easy to use. But there are many problems with the usage:
|
|
13
|
+
|
|
14
|
+
## Not friendly for retry
|
|
15
|
+
|
|
16
|
+
`io::Error` can't carry retry-related information. In [RFC-0247: Retryable Error](./0247-retryable-error.md), we use `io::ErrorKind::Interrupt` to indicate this error is retryable. But this change will hide the real error kind from the underlying. To mark this error has been retried, we have to add another new error wrapper:
|
|
17
|
+
|
|
18
|
+
```rust
|
|
19
|
+
#[derive(thiserror::Error, Debug)]
|
|
20
|
+
#[error("permanent error: still failing after retry, source: {source}")]
|
|
21
|
+
struct PermanentError {
|
|
22
|
+
source: Error,
|
|
23
|
+
}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## ErrorKind is inaccurate
|
|
27
|
+
|
|
28
|
+
`std::io::ErrorKind` is used to represent errors returned from system io, which is unsuitable for mistakes that have business semantics. For example, users can't distinguish `ObjectNotFound` or `BucketNotFound` from `ErrorKind::NotFound`.
|
|
29
|
+
|
|
30
|
+
## ErrorKind is incomplete
|
|
31
|
+
|
|
32
|
+
OpenDAL has been waiting for features [`io_error_more`](https://github.com/rust-lang/rust/issues/86442) to be stabilized for a long time. But there is no progress so far, which makes it impossible to return `ErrorKind::IsADirectory` or `ErrorKind::NotADirectory` on stable rust.
|
|
33
|
+
|
|
34
|
+
## Error is not easy to carry context
|
|
35
|
+
|
|
36
|
+
To carry context inside `std::io::Error`, we have to check and make sure all functions are constructed `ObjectError` or `BackendError`:
|
|
37
|
+
|
|
38
|
+
```rust
|
|
39
|
+
#[derive(Error, Debug)]
|
|
40
|
+
#[error("object error: (op: {op}, path: {path}, source: {source})")]
|
|
41
|
+
pub struct ObjectError {
|
|
42
|
+
op: Operation,
|
|
43
|
+
path: String,
|
|
44
|
+
source: anyhow::Error,
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
To make everything worse, we can't prevent opendal returns raw io errors directly. For example, in `Object::range_read`:
|
|
49
|
+
|
|
50
|
+
```rust
|
|
51
|
+
pub async fn range_read(&self, range: impl RangeBounds<u64>) -> Result<Vec<u8>> {
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
io::copy(s, &mut bs).await?;
|
|
55
|
+
|
|
56
|
+
Ok(bs.into_inner())
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
We leaked the `io::Error` without any context.
|
|
61
|
+
|
|
62
|
+
# Guide-level explanation
|
|
63
|
+
|
|
64
|
+
Thus, I propose to add `opendal::Error` back with everything improved.
|
|
65
|
+
|
|
66
|
+
Users will have similar usage as before:
|
|
67
|
+
|
|
68
|
+
```rust
|
|
69
|
+
if let Err(e) = op.object("test_file").metadata().await {
|
|
70
|
+
if e.kind() == ErrorKind::ObjectNotFound {
|
|
71
|
+
println!("object not exist")
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Users can check if this error a `temporary`:
|
|
77
|
+
|
|
78
|
+
```rust
|
|
79
|
+
if err.is_temporary() {
|
|
80
|
+
// retry the operation
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Users can print error messages via `Display`:
|
|
85
|
+
|
|
86
|
+
```rust
|
|
87
|
+
> println!("{}", err);
|
|
88
|
+
|
|
89
|
+
ObjectNotFound (permanent) at read, context: { service: S3, path: path/to/file } => status code: 404, headers: {"x-amz-request-id": "GCYDTQX51YRSF4ZF", "x-amz-id-2": "EH0vV6lTwWk+lFXqCMCBSk1oovqhG4bzALU9+sUudyw7TEVrfWm2o/AFJKhYKpdGqOoBZGgMTC0=", "content-type": "application/xml", "date": "Mon, 21 Nov 2022 05:26:37 GMT", "server": "AmazonS3"}, body: ""
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Also, users can choose to print the more verbose message via `Debug`:
|
|
93
|
+
|
|
94
|
+
```rust
|
|
95
|
+
> println!("{:?}", err);
|
|
96
|
+
|
|
97
|
+
ObjectNotFound (permanent) at read => status code: 404, headers: {"x-amz-request-id": "GCYDTQX51YRSF4ZF", "x-amz-id-2": "EH0vV6lTwWk+lFXqCMCBSk1oovqhG4bzALU9+sUudyw7TEVrfWm2o/AFJKhYKpdGqOoBZGgMTC0=", "content-type": "application/xml", "date": "Mon, 21 Nov 2022 05:26:37 GMT", "server": "AmazonS3"}, body: ""
|
|
98
|
+
|
|
99
|
+
Context:
|
|
100
|
+
service: S3
|
|
101
|
+
path: path/to/file
|
|
102
|
+
|
|
103
|
+
Source: <source error>
|
|
104
|
+
|
|
105
|
+
Backtrace: <backtrace if we have>
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
# Reference-level explanation
|
|
109
|
+
|
|
110
|
+
We will add new `Error` and `ErrorKind` in opendal:
|
|
111
|
+
|
|
112
|
+
```rust
|
|
113
|
+
pub struct Error {
|
|
114
|
+
kind: ErrorKind,
|
|
115
|
+
message: String,
|
|
116
|
+
|
|
117
|
+
status: ErrorStatus,
|
|
118
|
+
operation: &'static str,
|
|
119
|
+
context: Vec<(&'static str, String)>,
|
|
120
|
+
source: Option<anyhow::Error>,
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
- status: the status of this error, which indicates if this error is temporary
|
|
125
|
+
- operation: the operation which generates this error
|
|
126
|
+
- context: the context related to this error
|
|
127
|
+
- source: the underlying source error
|
|
128
|
+
|
|
129
|
+
# Drawbacks
|
|
130
|
+
|
|
131
|
+
## Breaking changes
|
|
132
|
+
|
|
133
|
+
This RFC will lead to a breaking at user side.
|
|
134
|
+
|
|
135
|
+
# Rationale and alternatives
|
|
136
|
+
|
|
137
|
+
None.
|
|
138
|
+
|
|
139
|
+
# Prior art
|
|
140
|
+
|
|
141
|
+
None.
|
|
142
|
+
|
|
143
|
+
# Unresolved questions
|
|
144
|
+
|
|
145
|
+
None.
|
|
146
|
+
|
|
147
|
+
# Future possibilities
|
|
148
|
+
|
|
149
|
+
## More ErrorKind
|
|
150
|
+
|
|
151
|
+
We can add more error kinds to make it possible for users to check.
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
- Proposal Name: `object_handler`
|
|
2
|
+
- Start Date: 2022-12-19
|
|
3
|
+
- RFC PR: [apache/opendal#1085](https://github.com/apache/opendal/pull/1085)
|
|
4
|
+
- Tracking Issue: [apache/opendal#1085](https://github.com/apache/opendal/issues/1085)
|
|
5
|
+
|
|
6
|
+
# Summary
|
|
7
|
+
|
|
8
|
+
Returning a `file description` to users for native seek support.
|
|
9
|
+
|
|
10
|
+
# Motivation
|
|
11
|
+
|
|
12
|
+
OpenDAL's goal is to `access data freely, painlessly, and efficiently`, so we build an operation first API which means we provide operation instead of the file description. Users don't need to call `open` before `read`; OpenDAL will handle all the open and close functions.
|
|
13
|
+
|
|
14
|
+
However, our users do want to control the complex behavior of that:
|
|
15
|
+
|
|
16
|
+
- Some storage backends have native `seek` support, but OpenDAL can't fully use them.
|
|
17
|
+
- Users want to improve performance by reusing the same file description without `open` and `close` for every read operation.
|
|
18
|
+
|
|
19
|
+
This RFC will fill this gap.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Guide-level explanation
|
|
23
|
+
|
|
24
|
+
Users can get an object handler like:
|
|
25
|
+
|
|
26
|
+
```rust
|
|
27
|
+
let oh: ObjectHandler = op.object("path/to/file").open().await?;
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
`ObjectHandler` will implement `AsyncRead` and `AsyncSeek` so it can be used like `tokio::fs::File`. If the backend supports native seek operation, we will take the native process; otherwise, we will fall back to simulation implementations.
|
|
31
|
+
|
|
32
|
+
The blocking version will be provided by:
|
|
33
|
+
|
|
34
|
+
```rust
|
|
35
|
+
let boh: BlockingObjectHandler = op.object("path/to/file").blocking_open()?;
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
And `BlockingObjectHandler` will implement `Read` and `Seek` so it can be used like `std::fs::File`. If the backend supports native seek operation, we will take the native process; otherwise, we will fall back to simulation implementations.
|
|
39
|
+
|
|
40
|
+
# Reference-level explanation
|
|
41
|
+
|
|
42
|
+
This RFC will add a new API `open` in `Accessor`:
|
|
43
|
+
|
|
44
|
+
```rust
|
|
45
|
+
pub trait Accessor {
|
|
46
|
+
async fn open(&self, path: &str, args: OpOpen) -> Result<(RpOpen, BytesHandler)>;
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Only services that support native `seek` operations can implement this API, like `fs` and `hdfs`. For services that do not support native `seek` operations like `s3` and `azblob`, we will fall back to the simulation implementations: maintaining an in-memory index instead.
|
|
51
|
+
|
|
52
|
+
# Drawbacks
|
|
53
|
+
|
|
54
|
+
None
|
|
55
|
+
|
|
56
|
+
# Rationale and alternatives
|
|
57
|
+
|
|
58
|
+
## How about writing operations?
|
|
59
|
+
|
|
60
|
+
Ideally, writing on `ObjectHandler` should also be supported. But we still don't know how this API will be used. Let's apply this API for `read` first.
|
|
61
|
+
|
|
62
|
+
# Prior art
|
|
63
|
+
|
|
64
|
+
None
|
|
65
|
+
|
|
66
|
+
# Unresolved questions
|
|
67
|
+
|
|
68
|
+
None
|
|
69
|
+
|
|
70
|
+
# Future possibilities
|
|
71
|
+
|
|
72
|
+
- Add write support
|
|
73
|
+
- Adopt native `pread`
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
- Proposal Name: `object_metadataer`
|
|
2
|
+
- Start Date: 2023-02-21
|
|
3
|
+
- RFC PR: [apache/opendal#1391](https://github.com/apache/opendal/pull/1391)
|
|
4
|
+
- Tracking Issue: [apache/opendal#1393](https://github.com/apache/opendal/issues/1393)
|
|
5
|
+
|
|
6
|
+
# Summary
|
|
7
|
+
|
|
8
|
+
Add object metadataer to avoid unneeded extra metadata call.
|
|
9
|
+
|
|
10
|
+
# Motivation
|
|
11
|
+
|
|
12
|
+
OpenDAL has native metadata cache for now:
|
|
13
|
+
|
|
14
|
+
```rust
|
|
15
|
+
let _ = o.metadata().await?;
|
|
16
|
+
// This call doesn't need to send a request.
|
|
17
|
+
let _ = o.metadata().await?;
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Also, OpenDAL can reuse metadata from `list` or `scan`:
|
|
21
|
+
|
|
22
|
+
```rust
|
|
23
|
+
let mut ds = o.scan().await?;
|
|
24
|
+
while let Some(de) = ds.try_next().await? {
|
|
25
|
+
// This call doesn't need to send a request (if we are lucky enough).
|
|
26
|
+
let _ = de.metadata().await?;
|
|
27
|
+
}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
By reusing metadata from `list` or `scan` we can reduce the extra `stat` call for each object. In our real use cases, we can reduce the total time to calculate the total length inside a dir with 6k files from 4 minutes to 2 seconds.
|
|
31
|
+
|
|
32
|
+
However, metadata can only be cached as a whole. If services could return more metadata in `stat` than in `list`, we wouldn't be able to mark the metadata as cacheable. If services add more metadata, we could inadvertently introduce the performance degradation.
|
|
33
|
+
|
|
34
|
+
This RFC aims to address this problem by hiding `ObjectMetadata` and adding `ObjectMetadataer` instead. All object metadata values will be cached separately and all user calls to object metadata will go to the cache.
|
|
35
|
+
|
|
36
|
+
# Guide-level explanation
|
|
37
|
+
|
|
38
|
+
This RFC will add `ObjectMetadataer` and `BlockingObjectMetadataer` for users:
|
|
39
|
+
|
|
40
|
+
Users call to `o.metadata()` will return `ObjectMetadataer` instead:
|
|
41
|
+
|
|
42
|
+
```rust
|
|
43
|
+
let om: ObjectMetadataer = o.metadata().await?;
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
And users can query more metadata over it:
|
|
47
|
+
|
|
48
|
+
```rust
|
|
49
|
+
let content_length = om.content_length().await;
|
|
50
|
+
let etag = om.etag().await;
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
During the whole lifetime of the corresponding `Object` or `ObjectMetadataer`, we make sure that at most one `stat` call is sent. After this change, users will never get an `ObjectMetadata` anymore.
|
|
54
|
+
|
|
55
|
+
# Reference-level explanation
|
|
56
|
+
|
|
57
|
+
We will introduce a bitmap to store the state of all object metadata fields separately. Everytime users call `key` on metadata, we will check as following:
|
|
58
|
+
|
|
59
|
+
- If `bitmap` is set, return directly.
|
|
60
|
+
- If `bitmap` is not set, but is complete, return directly.
|
|
61
|
+
- If both `bitmap` is not set and not `complete`, call `stat` to get the meta.
|
|
62
|
+
|
|
63
|
+
`Object` will return `ObjectMetadataer` instead of `ObjectMetadata`:
|
|
64
|
+
|
|
65
|
+
```diff
|
|
66
|
+
- pub async fn metadata(&self) -> Result<ObjectMetadata> {}
|
|
67
|
+
+ pub async fn metadata(&self) -> Result<ObjectMetadataer> {}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
And `ObjectMetadataer` will provide the following API:
|
|
71
|
+
|
|
72
|
+
```rust
|
|
73
|
+
impl ObjectMetadataer {
|
|
74
|
+
pub async fn mode(&self) -> Result<ObjectMode>;
|
|
75
|
+
pub async fn content_length(&self) -> Result<u64>;
|
|
76
|
+
pub async fn content_md5(&self) -> Result<Option<String>>;
|
|
77
|
+
pub async fn last_modified(&self) -> Result<Option<OffsetDateTime>>;
|
|
78
|
+
pub async fn etag(&self) -> Result<Option<String>>;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
impl BlockingObjectMetadataer {
|
|
82
|
+
pub fn mode(&self) -> Result<ObjectMode>
|
|
83
|
+
pub fn content_length(&self) -> Result<u64>;
|
|
84
|
+
pub fn content_md5(&self) -> Result<Option<String>>;
|
|
85
|
+
pub fn last_modified(&self) -> Result<Option<OffsetDateTime>>;
|
|
86
|
+
pub fn etag(&self) -> Result<Option<String>>;
|
|
87
|
+
}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
# Drawbacks
|
|
91
|
+
|
|
92
|
+
## Breaking changes
|
|
93
|
+
|
|
94
|
+
This RFC will introduce breaking changes for `Object::metadata`. And users can't do `serde::Serialize` or `serde::Deserialize` on object metadata any more. All metadata related API calls will be removed from `Object`.
|
|
95
|
+
|
|
96
|
+
# Rationale and alternatives
|
|
97
|
+
|
|
98
|
+
None.
|
|
99
|
+
|
|
100
|
+
# Prior art
|
|
101
|
+
|
|
102
|
+
None.
|
|
103
|
+
|
|
104
|
+
# Unresolved questions
|
|
105
|
+
|
|
106
|
+
None.
|
|
107
|
+
|
|
108
|
+
# Future possibilities
|
|
109
|
+
|
|
110
|
+
None.
|