opendal 0.1.6.pre.rc.1-arm64-darwin-23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +20 -0
  3. data/.tool-versions +1 -0
  4. data/.yardopts +1 -0
  5. data/DEPENDENCIES.md +9 -0
  6. data/DEPENDENCIES.rust.tsv +277 -0
  7. data/Gemfile +35 -0
  8. data/README.md +159 -0
  9. data/Rakefile +149 -0
  10. data/core/CHANGELOG.md +4929 -0
  11. data/core/CONTRIBUTING.md +61 -0
  12. data/core/DEPENDENCIES.md +3 -0
  13. data/core/DEPENDENCIES.rust.tsv +185 -0
  14. data/core/LICENSE +201 -0
  15. data/core/README.md +228 -0
  16. data/core/benches/README.md +18 -0
  17. data/core/benches/ops/README.md +26 -0
  18. data/core/benches/types/README.md +9 -0
  19. data/core/benches/vs_fs/README.md +35 -0
  20. data/core/benches/vs_s3/README.md +55 -0
  21. data/core/edge/README.md +3 -0
  22. data/core/edge/file_write_on_full_disk/README.md +14 -0
  23. data/core/edge/s3_aws_assume_role_with_web_identity/README.md +18 -0
  24. data/core/edge/s3_read_on_wasm/.gitignore +3 -0
  25. data/core/edge/s3_read_on_wasm/README.md +42 -0
  26. data/core/edge/s3_read_on_wasm/webdriver.json +15 -0
  27. data/core/examples/README.md +23 -0
  28. data/core/examples/basic/README.md +15 -0
  29. data/core/examples/concurrent-upload/README.md +15 -0
  30. data/core/examples/multipart-upload/README.md +15 -0
  31. data/core/fuzz/.gitignore +5 -0
  32. data/core/fuzz/README.md +68 -0
  33. data/core/src/docs/comparisons/vs_object_store.md +183 -0
  34. data/core/src/docs/performance/concurrent_write.md +101 -0
  35. data/core/src/docs/performance/http_optimization.md +124 -0
  36. data/core/src/docs/rfcs/0000_example.md +74 -0
  37. data/core/src/docs/rfcs/0000_foyer_integration.md +111 -0
  38. data/core/src/docs/rfcs/0041_object_native_api.md +185 -0
  39. data/core/src/docs/rfcs/0044_error_handle.md +198 -0
  40. data/core/src/docs/rfcs/0057_auto_region.md +160 -0
  41. data/core/src/docs/rfcs/0069_object_stream.md +145 -0
  42. data/core/src/docs/rfcs/0090_limited_reader.md +155 -0
  43. data/core/src/docs/rfcs/0112_path_normalization.md +79 -0
  44. data/core/src/docs/rfcs/0191_async_streaming_io.md +328 -0
  45. data/core/src/docs/rfcs/0203_remove_credential.md +96 -0
  46. data/core/src/docs/rfcs/0221_create_dir.md +89 -0
  47. data/core/src/docs/rfcs/0247_retryable_error.md +87 -0
  48. data/core/src/docs/rfcs/0293_object_id.md +67 -0
  49. data/core/src/docs/rfcs/0337_dir_entry.md +191 -0
  50. data/core/src/docs/rfcs/0409_accessor_capabilities.md +67 -0
  51. data/core/src/docs/rfcs/0413_presign.md +154 -0
  52. data/core/src/docs/rfcs/0423_command_line_interface.md +268 -0
  53. data/core/src/docs/rfcs/0429_init_from_iter.md +107 -0
  54. data/core/src/docs/rfcs/0438_multipart.md +163 -0
  55. data/core/src/docs/rfcs/0443_gateway.md +73 -0
  56. data/core/src/docs/rfcs/0501_new_builder.md +111 -0
  57. data/core/src/docs/rfcs/0554_write_refactor.md +96 -0
  58. data/core/src/docs/rfcs/0561_list_metadata_reuse.md +210 -0
  59. data/core/src/docs/rfcs/0599_blocking_api.md +157 -0
  60. data/core/src/docs/rfcs/0623_redis_service.md +300 -0
  61. data/core/src/docs/rfcs/0627_split_capabilities.md +89 -0
  62. data/core/src/docs/rfcs/0661_path_in_accessor.md +126 -0
  63. data/core/src/docs/rfcs/0793_generic_kv_services.md +209 -0
  64. data/core/src/docs/rfcs/0926_object_reader.md +93 -0
  65. data/core/src/docs/rfcs/0977_refactor_error.md +151 -0
  66. data/core/src/docs/rfcs/1085_object_handler.md +73 -0
  67. data/core/src/docs/rfcs/1391_object_metadataer.md +110 -0
  68. data/core/src/docs/rfcs/1398_query_based_metadata.md +125 -0
  69. data/core/src/docs/rfcs/1420_object_writer.md +147 -0
  70. data/core/src/docs/rfcs/1477_remove_object_concept.md +159 -0
  71. data/core/src/docs/rfcs/1735_operation_extension.md +117 -0
  72. data/core/src/docs/rfcs/2083_writer_sink_api.md +106 -0
  73. data/core/src/docs/rfcs/2133_append_api.md +88 -0
  74. data/core/src/docs/rfcs/2299_chain_based_operator_api.md +99 -0
  75. data/core/src/docs/rfcs/2602_object_versioning.md +138 -0
  76. data/core/src/docs/rfcs/2758_merge_append_into_write.md +79 -0
  77. data/core/src/docs/rfcs/2774_lister_api.md +66 -0
  78. data/core/src/docs/rfcs/2779_list_with_metakey.md +143 -0
  79. data/core/src/docs/rfcs/2852_native_capability.md +58 -0
  80. data/core/src/docs/rfcs/2884_merge_range_read_into_read.md +80 -0
  81. data/core/src/docs/rfcs/3017_remove_write_copy_from.md +94 -0
  82. data/core/src/docs/rfcs/3197_config.md +237 -0
  83. data/core/src/docs/rfcs/3232_align_list_api.md +69 -0
  84. data/core/src/docs/rfcs/3243_list_prefix.md +128 -0
  85. data/core/src/docs/rfcs/3356_lazy_reader.md +111 -0
  86. data/core/src/docs/rfcs/3526_list_recursive.md +59 -0
  87. data/core/src/docs/rfcs/3574_concurrent_stat_in_list.md +80 -0
  88. data/core/src/docs/rfcs/3734_buffered_reader.md +64 -0
  89. data/core/src/docs/rfcs/3898_concurrent_writer.md +66 -0
  90. data/core/src/docs/rfcs/3911_deleter_api.md +165 -0
  91. data/core/src/docs/rfcs/4382_range_based_read.md +213 -0
  92. data/core/src/docs/rfcs/4638_executor.md +215 -0
  93. data/core/src/docs/rfcs/5314_remove_metakey.md +120 -0
  94. data/core/src/docs/rfcs/5444_operator_from_uri.md +162 -0
  95. data/core/src/docs/rfcs/5479_context.md +140 -0
  96. data/core/src/docs/rfcs/5485_conditional_reader.md +112 -0
  97. data/core/src/docs/rfcs/5495_list_with_deleted.md +81 -0
  98. data/core/src/docs/rfcs/5556_write_returns_metadata.md +121 -0
  99. data/core/src/docs/rfcs/5871_read_returns_metadata.md +112 -0
  100. data/core/src/docs/rfcs/6189_remove_native_blocking.md +106 -0
  101. data/core/src/docs/rfcs/6209_glob_support.md +132 -0
  102. data/core/src/docs/rfcs/6213_options_api.md +142 -0
  103. data/core/src/docs/rfcs/README.md +62 -0
  104. data/core/src/docs/upgrade.md +1556 -0
  105. data/core/src/services/aliyun_drive/docs.md +61 -0
  106. data/core/src/services/alluxio/docs.md +45 -0
  107. data/core/src/services/azblob/docs.md +77 -0
  108. data/core/src/services/azdls/docs.md +73 -0
  109. data/core/src/services/azfile/docs.md +65 -0
  110. data/core/src/services/b2/docs.md +54 -0
  111. data/core/src/services/cacache/docs.md +38 -0
  112. data/core/src/services/cloudflare_kv/docs.md +21 -0
  113. data/core/src/services/cos/docs.md +55 -0
  114. data/core/src/services/d1/docs.md +48 -0
  115. data/core/src/services/dashmap/docs.md +38 -0
  116. data/core/src/services/dbfs/docs.md +57 -0
  117. data/core/src/services/dropbox/docs.md +64 -0
  118. data/core/src/services/etcd/docs.md +45 -0
  119. data/core/src/services/foundationdb/docs.md +42 -0
  120. data/core/src/services/fs/docs.md +49 -0
  121. data/core/src/services/ftp/docs.md +42 -0
  122. data/core/src/services/gcs/docs.md +76 -0
  123. data/core/src/services/gdrive/docs.md +65 -0
  124. data/core/src/services/ghac/docs.md +84 -0
  125. data/core/src/services/github/docs.md +52 -0
  126. data/core/src/services/gridfs/docs.md +46 -0
  127. data/core/src/services/hdfs/docs.md +140 -0
  128. data/core/src/services/hdfs_native/docs.md +35 -0
  129. data/core/src/services/http/docs.md +45 -0
  130. data/core/src/services/huggingface/docs.md +61 -0
  131. data/core/src/services/ipfs/docs.md +45 -0
  132. data/core/src/services/ipmfs/docs.md +14 -0
  133. data/core/src/services/koofr/docs.md +51 -0
  134. data/core/src/services/lakefs/docs.md +62 -0
  135. data/core/src/services/memcached/docs.md +47 -0
  136. data/core/src/services/memory/docs.md +36 -0
  137. data/core/src/services/mini_moka/docs.md +19 -0
  138. data/core/src/services/moka/docs.md +42 -0
  139. data/core/src/services/mongodb/docs.md +49 -0
  140. data/core/src/services/monoiofs/docs.md +46 -0
  141. data/core/src/services/mysql/docs.md +47 -0
  142. data/core/src/services/obs/docs.md +54 -0
  143. data/core/src/services/onedrive/docs.md +115 -0
  144. data/core/src/services/opfs/docs.md +18 -0
  145. data/core/src/services/oss/docs.md +74 -0
  146. data/core/src/services/pcloud/docs.md +51 -0
  147. data/core/src/services/persy/docs.md +43 -0
  148. data/core/src/services/postgresql/docs.md +47 -0
  149. data/core/src/services/redb/docs.md +41 -0
  150. data/core/src/services/redis/docs.md +43 -0
  151. data/core/src/services/rocksdb/docs.md +54 -0
  152. data/core/src/services/s3/compatible_services.md +126 -0
  153. data/core/src/services/s3/docs.md +244 -0
  154. data/core/src/services/seafile/docs.md +54 -0
  155. data/core/src/services/sftp/docs.md +49 -0
  156. data/core/src/services/sled/docs.md +39 -0
  157. data/core/src/services/sqlite/docs.md +46 -0
  158. data/core/src/services/surrealdb/docs.md +54 -0
  159. data/core/src/services/swift/compatible_services.md +53 -0
  160. data/core/src/services/swift/docs.md +52 -0
  161. data/core/src/services/tikv/docs.md +43 -0
  162. data/core/src/services/upyun/docs.md +51 -0
  163. data/core/src/services/vercel_artifacts/docs.md +40 -0
  164. data/core/src/services/vercel_blob/docs.md +45 -0
  165. data/core/src/services/webdav/docs.md +49 -0
  166. data/core/src/services/webhdfs/docs.md +90 -0
  167. data/core/src/services/yandex_disk/docs.md +45 -0
  168. data/core/tests/behavior/README.md +77 -0
  169. data/core/tests/data/normal_dir/.gitkeep +0 -0
  170. data/core/tests/data/normal_file.txt +1041 -0
  171. data/core/tests/data/special_dir !@#$%^&()_+-=;',/.gitkeep +0 -0
  172. data/core/tests/data/special_file !@#$%^&()_+-=;',.txt +1041 -0
  173. data/core/users.md +13 -0
  174. data/extconf.rb +24 -0
  175. data/lib/opendal.rb +25 -0
  176. data/lib/opendal_ruby/entry.rb +35 -0
  177. data/lib/opendal_ruby/io.rb +70 -0
  178. data/lib/opendal_ruby/metadata.rb +44 -0
  179. data/lib/opendal_ruby/opendal_ruby.bundle +0 -0
  180. data/lib/opendal_ruby/operator.rb +29 -0
  181. data/lib/opendal_ruby/operator_info.rb +26 -0
  182. data/opendal.gemspec +91 -0
  183. data/test/blocking_op_test.rb +112 -0
  184. data/test/capability_test.rb +42 -0
  185. data/test/io_test.rb +172 -0
  186. data/test/lister_test.rb +77 -0
  187. data/test/metadata_test.rb +78 -0
  188. data/test/middlewares_test.rb +46 -0
  189. data/test/operator_info_test.rb +35 -0
  190. data/test/test_helper.rb +36 -0
  191. metadata +240 -0
@@ -0,0 +1,209 @@
1
+ - Proposal Name: `generic-kv-services`
2
+ - Start Date: 2022-10-03
3
+ - RFC PR: [apache/opendal#793](https://github.com/apache/opendal/pull/793)
4
+ - Tracking Issue: [apache/opendal#794](https://github.com/apache/opendal/issues/794)
5
+
6
+ # Summary
7
+
8
+ Add generic kv services support OpenDAL.
9
+
10
+ # Motivation
11
+
12
+ OpenDAL now has some kv services support:
13
+
14
+ - memory
15
+ - redis
16
+
17
+ However, maintaining them is complex and very easy to be wrong. We don't want to implement similar logic for every kv
18
+ service. This RFC intends to introduce a generic kv service so that we can:
19
+
20
+ - Implement OpenDAL Accessor on this generic kv service
21
+ - Add new kv service support via generic kv API.
22
+
23
+ # Guide-level explanation
24
+
25
+ No user-side changes.
26
+
27
+ # Reference-level explanation
28
+
29
+ OpenDAL will introduce a generic kv service:
30
+
31
+ ```rust
32
+ trait KeyValueAccessor {
33
+ async fn get(&self, key: &[u8]) -> Result<Option<Vec<u8>>>;
34
+ async fn set(&self, key: &[u8], value: &[u8]) -> Result<()>;
35
+ }
36
+ ```
37
+
38
+ We will implement the OpenDAL service on `KeyValueAccessor`. To add new kv service support, users only need to implement
39
+ it against `KeyValueAccessor`.
40
+
41
+ ## Spec
42
+
43
+ This RFC is mainly inspired
44
+ by [TiFS: FUSE based on TiKV](https://github.com/Hexilee/tifs/blob/main/contribution/design.md). We will use the
45
+ same `ScopedKey` idea in `TiFS`.
46
+
47
+ ```rust
48
+ pub enum ScopedKey {
49
+ Meta,
50
+ Inode(u64),
51
+ Block {
52
+ ino: u64,
53
+ block: u64,
54
+ },
55
+ Entry {
56
+ parent: u64,
57
+ name: String,
58
+ },
59
+ }
60
+ ```
61
+
62
+ We can encode a scoped key into a byte array as a key. Following is the common layout of an encoded key.
63
+
64
+ ```text
65
+ + 1byte +<----------------------------+ dynamic size +------------------------------------>+
66
+ | | |
67
+ | | |
68
+ | | |
69
+ | | |
70
+ | | |
71
+ | | |
72
+ | v v
73
+ +------------------------------------------------------------------------------------------+
74
+ | | |
75
+ | scope | body |
76
+ | | |
77
+ +-------+----------------------------------------------------------------------------------+
78
+ ```
79
+
80
+ ### Meta
81
+
82
+ There is only one key in the meta scope. The meta key is designed to store metadata of our filesystem. Following is the
83
+ layout of an encoded meta key.
84
+
85
+ ```text
86
+ + 1byte +
87
+ | |
88
+ | |
89
+ | |
90
+ | |
91
+ | |
92
+ | |
93
+ | v
94
+ +-------+
95
+ | |
96
+ | 0 |
97
+ | |
98
+ +-------+
99
+ ```
100
+
101
+ This key will store data:
102
+
103
+ ```rust
104
+ pub struct Meta {
105
+ inode_next: u64,
106
+ }
107
+ ```
108
+
109
+ The meta-structure contains only an auto-increasing counter `inode_next`, designed to generate an inode number.
110
+
111
+ ### Inode
112
+
113
+ Keys in the inode scope are designed to store attributes of files. Following is the layout of an encoded inode key.
114
+
115
+ ```text
116
+ + 1byte +<-------------------------------+ 8bytes +--------------------------------------->+
117
+ | | |
118
+ | | |
119
+ | | |
120
+ | | |
121
+ | | |
122
+ | | |
123
+ | v v
124
+ +------------------------------------------------------------------------------------------+
125
+ | | |
126
+ | 1 | inode number |
127
+ | | |
128
+ +-------+----------------------------------------------------------------------------------+
129
+ ```
130
+
131
+ This key will store data:
132
+
133
+ ```rust
134
+ pub struct Inode {
135
+ meta: Metadata,
136
+ blocks: HashMap<u64, u32>,
137
+ }
138
+ ```
139
+
140
+ blocks is the map from `block_id` -> `size`. We will use this map to calculate the correct blocks to read.
141
+
142
+ ### Block
143
+
144
+ Keys in the block scope are designed to store blocks of a file. Following is the layout of an encoded block key.
145
+
146
+ ```text
147
+ + 1byte +<----------------- 8bytes ---------------->+<------------------- 8bytes ----------------->+
148
+ | | | |
149
+ | | | |
150
+ | | | |
151
+ | | | |
152
+ | | | |
153
+ | | | |
154
+ | v v v
155
+ +--------------------------------------------------------------------------------------------------+
156
+ | | | |
157
+ | 2 | inode number | block index |
158
+ | | | |
159
+ +-------+-------------------------------------------+----------------------------------------------+
160
+ ```
161
+
162
+ ### Entry
163
+
164
+ Keys in the file index scope are designed to store the entry of the file. Following is the layout of an encoded file
165
+ entry key.
166
+
167
+ ```text
168
+ + 1byte +<----------------- 8bytes ---------------->+<-------------- dynamic size ---------------->+
169
+ | | | |
170
+ | | | |
171
+ | | | |
172
+ | | | |
173
+ | | | |
174
+ | | | |
175
+ | v v v
176
+ +--------------------------------------------------------------------------------------------------+
177
+ | | | |
178
+ | 3 | inode number of parent directory | file name in utf-8 encoding |
179
+ | | | |
180
+ +-------+-------------------------------------------+----------------------------------------------+
181
+ ```
182
+
183
+ Store the correct inode number for this file.
184
+
185
+ ```rust
186
+ pub struct Index {
187
+ pub ino: u64,
188
+ }
189
+ ```
190
+
191
+ # Drawbacks
192
+
193
+ None.
194
+
195
+ # Rationale and alternatives
196
+
197
+ None.
198
+
199
+ # Prior art
200
+
201
+ None.
202
+
203
+ # Unresolved questions
204
+
205
+ None.
206
+
207
+ # Future possibilities
208
+
209
+ None.
@@ -0,0 +1,93 @@
1
+ - Proposal Name: `object_reader`
2
+ - Start Date: 2022-11-13
3
+ - RFC PR: [apache/opendal#926](https://github.com/apache/opendal/pull/926)
4
+ - Tracking Issue: [apache/opendal#927](https://github.com/apache/opendal/issues/927)
5
+
6
+ # Summary
7
+
8
+ Returning reading related object meta in the reader.
9
+
10
+ # Motivation
11
+
12
+ Some services like s3 could return object meta while issuing reading requests.
13
+
14
+ In `GetObject`, we could get:
15
+
16
+ - Last-Modified
17
+ - Content-Length
18
+ - ETag
19
+ - Content-Range
20
+ - Content-Type
21
+ - Expires
22
+
23
+ We can avoid extra `HeadObject` calls by reusing that meta wisely, which could take 50ms. For example, `Content-Range` returns the content range of this read in the whole object: `<unit> <range-start>-<range-end>/<size>`. By using the content range, we can avoid `HeadObject` to get this object's total size, which means a lot for the content cache.
24
+
25
+ # Guide-level explanation
26
+
27
+ `reader` and all its related API will return `ObjectReader` instead:
28
+
29
+ ```diff
30
+ - pub async fn reader(&self) -> Result<impl BytesRead> {}
31
+ + pub async fn reader(&self) -> Result<ObjectReader> {}
32
+ ```
33
+
34
+ `ObjectReader` impls `BytesRead` too, so existing code will keep working. And `ObjectReader` will provide similar APIs to `Entry`, for example:
35
+
36
+ ```rust
37
+ pub async fn content_length(&self) -> Option<u64> {}
38
+ pub async fn last_modified(&self) -> Option<OffsetDateTime> {}
39
+ pub async fn etag(&self) -> Option<String> {}
40
+ ```
41
+
42
+ Note:
43
+
44
+ - All fields are optional, as services like fs could not return them.
45
+ - `content_length` here is this read request's length, not the object's length.
46
+
47
+ # Reference-level explanation
48
+
49
+ We will change the API signature of `Accessor`:
50
+
51
+ ```diff
52
+ - async fn read(&self, path: &str, args: OpRead) -> Result<BytesReader> {}
53
+ + async fn read(&self, path: &str, args: OpRead) -> Result<ObjectReader> {}
54
+ ```
55
+
56
+ `ObjectReader` is a wrapper of `BytesReader` and `ObjectMeta`:
57
+
58
+ ```rust
59
+ pub struct ObjectReader {
60
+ inner: BytesReader
61
+ meta: ObjectMetadata,
62
+ }
63
+
64
+ impl ObjectReader {
65
+ pub async fn content_length(&self) -> Option<u64> {}
66
+ pub async fn last_modified(&self) -> Option<OffsetDateTime> {}
67
+ pub async fn etag(&self) -> Option<String> {}
68
+ }
69
+ ```
70
+
71
+ Services can decide whether or not to fill them.
72
+
73
+ # Drawbacks
74
+
75
+ None.
76
+
77
+ # Rationale and alternatives
78
+
79
+ None.
80
+
81
+ # Prior art
82
+
83
+ None.
84
+
85
+ # Unresolved questions
86
+
87
+ None.
88
+
89
+ # Future possibilities
90
+
91
+ ## Add content-range support
92
+
93
+ We can add `content-range` in `ObjectMeta` so that users can fetch and use them.
@@ -0,0 +1,151 @@
1
+ - Proposal Name: `refactor-error`
2
+ - Start Date: 2022-11-21
3
+ - RFC PR: [apache/opendal#977](https://github.com/apache/opendal/pull/977)
4
+ - Tracking Issue: [apache/opendal#976](https://github.com/apache/opendal/pull/976)
5
+
6
+ # Summary
7
+
8
+ Use a separate error instead of `std::io::Error`.
9
+
10
+ # Motivation
11
+
12
+ OpenDAL is used to use `std::io::Error` for all functions. This design is natural and easy to use. But there are many problems with the usage:
13
+
14
+ ## Not friendly for retry
15
+
16
+ `io::Error` can't carry retry-related information. In [RFC-0247: Retryable Error](./0247-retryable-error.md), we use `io::ErrorKind::Interrupt` to indicate this error is retryable. But this change will hide the real error kind from the underlying. To mark this error has been retried, we have to add another new error wrapper:
17
+
18
+ ```rust
19
+ #[derive(thiserror::Error, Debug)]
20
+ #[error("permanent error: still failing after retry, source: {source}")]
21
+ struct PermanentError {
22
+ source: Error,
23
+ }
24
+ ```
25
+
26
+ ## ErrorKind is inaccurate
27
+
28
+ `std::io::ErrorKind` is used to represent errors returned from system io, which is unsuitable for mistakes that have business semantics. For example, users can't distinguish `ObjectNotFound` or `BucketNotFound` from `ErrorKind::NotFound`.
29
+
30
+ ## ErrorKind is incomplete
31
+
32
+ OpenDAL has been waiting for features [`io_error_more`](https://github.com/rust-lang/rust/issues/86442) to be stabilized for a long time. But there is no progress so far, which makes it impossible to return `ErrorKind::IsADirectory` or `ErrorKind::NotADirectory` on stable rust.
33
+
34
+ ## Error is not easy to carry context
35
+
36
+ To carry context inside `std::io::Error`, we have to check and make sure all functions are constructed `ObjectError` or `BackendError`:
37
+
38
+ ```rust
39
+ #[derive(Error, Debug)]
40
+ #[error("object error: (op: {op}, path: {path}, source: {source})")]
41
+ pub struct ObjectError {
42
+ op: Operation,
43
+ path: String,
44
+ source: anyhow::Error,
45
+ }
46
+ ```
47
+
48
+ To make everything worse, we can't prevent opendal returns raw io errors directly. For example, in `Object::range_read`:
49
+
50
+ ```rust
51
+ pub async fn range_read(&self, range: impl RangeBounds<u64>) -> Result<Vec<u8>> {
52
+ ...
53
+
54
+ io::copy(s, &mut bs).await?;
55
+
56
+ Ok(bs.into_inner())
57
+ }
58
+ ```
59
+
60
+ We leaked the `io::Error` without any context.
61
+
62
+ # Guide-level explanation
63
+
64
+ Thus, I propose to add `opendal::Error` back with everything improved.
65
+
66
+ Users will have similar usage as before:
67
+
68
+ ```rust
69
+ if let Err(e) = op.object("test_file").metadata().await {
70
+ if e.kind() == ErrorKind::ObjectNotFound {
71
+ println!("object not exist")
72
+ }
73
+ }
74
+ ```
75
+
76
+ Users can check if this error a `temporary`:
77
+
78
+ ```rust
79
+ if err.is_temporary() {
80
+ // retry the operation
81
+ }
82
+ ```
83
+
84
+ Users can print error messages via `Display`:
85
+
86
+ ```rust
87
+ > println!("{}", err);
88
+
89
+ ObjectNotFound (permanent) at read, context: { service: S3, path: path/to/file } => status code: 404, headers: {"x-amz-request-id": "GCYDTQX51YRSF4ZF", "x-amz-id-2": "EH0vV6lTwWk+lFXqCMCBSk1oovqhG4bzALU9+sUudyw7TEVrfWm2o/AFJKhYKpdGqOoBZGgMTC0=", "content-type": "application/xml", "date": "Mon, 21 Nov 2022 05:26:37 GMT", "server": "AmazonS3"}, body: ""
90
+ ```
91
+
92
+ Also, users can choose to print the more verbose message via `Debug`:
93
+
94
+ ```rust
95
+ > println!("{:?}", err);
96
+
97
+ ObjectNotFound (permanent) at read => status code: 404, headers: {"x-amz-request-id": "GCYDTQX51YRSF4ZF", "x-amz-id-2": "EH0vV6lTwWk+lFXqCMCBSk1oovqhG4bzALU9+sUudyw7TEVrfWm2o/AFJKhYKpdGqOoBZGgMTC0=", "content-type": "application/xml", "date": "Mon, 21 Nov 2022 05:26:37 GMT", "server": "AmazonS3"}, body: ""
98
+
99
+ Context:
100
+ service: S3
101
+ path: path/to/file
102
+
103
+ Source: <source error>
104
+
105
+ Backtrace: <backtrace if we have>
106
+ ```
107
+
108
+ # Reference-level explanation
109
+
110
+ We will add new `Error` and `ErrorKind` in opendal:
111
+
112
+ ```rust
113
+ pub struct Error {
114
+ kind: ErrorKind,
115
+ message: String,
116
+
117
+ status: ErrorStatus,
118
+ operation: &'static str,
119
+ context: Vec<(&'static str, String)>,
120
+ source: Option<anyhow::Error>,
121
+ }
122
+ ```
123
+
124
+ - status: the status of this error, which indicates if this error is temporary
125
+ - operation: the operation which generates this error
126
+ - context: the context related to this error
127
+ - source: the underlying source error
128
+
129
+ # Drawbacks
130
+
131
+ ## Breaking changes
132
+
133
+ This RFC will lead to a breaking at user side.
134
+
135
+ # Rationale and alternatives
136
+
137
+ None.
138
+
139
+ # Prior art
140
+
141
+ None.
142
+
143
+ # Unresolved questions
144
+
145
+ None.
146
+
147
+ # Future possibilities
148
+
149
+ ## More ErrorKind
150
+
151
+ We can add more error kinds to make it possible for users to check.
@@ -0,0 +1,73 @@
1
+ - Proposal Name: `object_handler`
2
+ - Start Date: 2022-12-19
3
+ - RFC PR: [apache/opendal#1085](https://github.com/apache/opendal/pull/1085)
4
+ - Tracking Issue: [apache/opendal#1085](https://github.com/apache/opendal/issues/1085)
5
+
6
+ # Summary
7
+
8
+ Returning a `file description` to users for native seek support.
9
+
10
+ # Motivation
11
+
12
+ OpenDAL's goal is to `access data freely, painlessly, and efficiently`, so we build an operation first API which means we provide operation instead of the file description. Users don't need to call `open` before `read`; OpenDAL will handle all the open and close functions.
13
+
14
+ However, our users do want to control the complex behavior of that:
15
+
16
+ - Some storage backends have native `seek` support, but OpenDAL can't fully use them.
17
+ - Users want to improve performance by reusing the same file description without `open` and `close` for every read operation.
18
+
19
+ This RFC will fill this gap.
20
+
21
+
22
+ # Guide-level explanation
23
+
24
+ Users can get an object handler like:
25
+
26
+ ```rust
27
+ let oh: ObjectHandler = op.object("path/to/file").open().await?;
28
+ ```
29
+
30
+ `ObjectHandler` will implement `AsyncRead` and `AsyncSeek` so it can be used like `tokio::fs::File`. If the backend supports native seek operation, we will take the native process; otherwise, we will fall back to simulation implementations.
31
+
32
+ The blocking version will be provided by:
33
+
34
+ ```rust
35
+ let boh: BlockingObjectHandler = op.object("path/to/file").blocking_open()?;
36
+ ```
37
+
38
+ And `BlockingObjectHandler` will implement `Read` and `Seek` so it can be used like `std::fs::File`. If the backend supports native seek operation, we will take the native process; otherwise, we will fall back to simulation implementations.
39
+
40
+ # Reference-level explanation
41
+
42
+ This RFC will add a new API `open` in `Accessor`:
43
+
44
+ ```rust
45
+ pub trait Accessor {
46
+ async fn open(&self, path: &str, args: OpOpen) -> Result<(RpOpen, BytesHandler)>;
47
+ }
48
+ ```
49
+
50
+ Only services that support native `seek` operations can implement this API, like `fs` and `hdfs`. For services that do not support native `seek` operations like `s3` and `azblob`, we will fall back to the simulation implementations: maintaining an in-memory index instead.
51
+
52
+ # Drawbacks
53
+
54
+ None
55
+
56
+ # Rationale and alternatives
57
+
58
+ ## How about writing operations?
59
+
60
+ Ideally, writing on `ObjectHandler` should also be supported. But we still don't know how this API will be used. Let's apply this API for `read` first.
61
+
62
+ # Prior art
63
+
64
+ None
65
+
66
+ # Unresolved questions
67
+
68
+ None
69
+
70
+ # Future possibilities
71
+
72
+ - Add write support
73
+ - Adopt native `pread`
@@ -0,0 +1,110 @@
1
+ - Proposal Name: `object_metadataer`
2
+ - Start Date: 2023-02-21
3
+ - RFC PR: [apache/opendal#1391](https://github.com/apache/opendal/pull/1391)
4
+ - Tracking Issue: [apache/opendal#1393](https://github.com/apache/opendal/issues/1393)
5
+
6
+ # Summary
7
+
8
+ Add object metadataer to avoid unneeded extra metadata call.
9
+
10
+ # Motivation
11
+
12
+ OpenDAL has native metadata cache for now:
13
+
14
+ ```rust
15
+ let _ = o.metadata().await?;
16
+ // This call doesn't need to send a request.
17
+ let _ = o.metadata().await?;
18
+ ```
19
+
20
+ Also, OpenDAL can reuse metadata from `list` or `scan`:
21
+
22
+ ```rust
23
+ let mut ds = o.scan().await?;
24
+ while let Some(de) = ds.try_next().await? {
25
+ // This call doesn't need to send a request (if we are lucky enough).
26
+ let _ = de.metadata().await?;
27
+ }
28
+ ```
29
+
30
+ By reusing metadata from `list` or `scan` we can reduce the extra `stat` call for each object. In our real use cases, we can reduce the total time to calculate the total length inside a dir with 6k files from 4 minutes to 2 seconds.
31
+
32
+ However, metadata can only be cached as a whole. If services could return more metadata in `stat` than in `list`, we wouldn't be able to mark the metadata as cacheable. If services add more metadata, we could inadvertently introduce the performance degradation.
33
+
34
+ This RFC aims to address this problem by hiding `ObjectMetadata` and adding `ObjectMetadataer` instead. All object metadata values will be cached separately and all user calls to object metadata will go to the cache.
35
+
36
+ # Guide-level explanation
37
+
38
+ This RFC will add `ObjectMetadataer` and `BlockingObjectMetadataer` for users:
39
+
40
+ Users call to `o.metadata()` will return `ObjectMetadataer` instead:
41
+
42
+ ```rust
43
+ let om: ObjectMetadataer = o.metadata().await?;
44
+ ```
45
+
46
+ And users can query more metadata over it:
47
+
48
+ ```rust
49
+ let content_length = om.content_length().await;
50
+ let etag = om.etag().await;
51
+ ```
52
+
53
+ During the whole lifetime of the corresponding `Object` or `ObjectMetadataer`, we make sure that at most one `stat` call is sent. After this change, users will never get an `ObjectMetadata` anymore.
54
+
55
+ # Reference-level explanation
56
+
57
+ We will introduce a bitmap to store the state of all object metadata fields separately. Everytime users call `key` on metadata, we will check as following:
58
+
59
+ - If `bitmap` is set, return directly.
60
+ - If `bitmap` is not set, but is complete, return directly.
61
+ - If both `bitmap` is not set and not `complete`, call `stat` to get the meta.
62
+
63
+ `Object` will return `ObjectMetadataer` instead of `ObjectMetadata`:
64
+
65
+ ```diff
66
+ - pub async fn metadata(&self) -> Result<ObjectMetadata> {}
67
+ + pub async fn metadata(&self) -> Result<ObjectMetadataer> {}
68
+ ```
69
+
70
+ And `ObjectMetadataer` will provide the following API:
71
+
72
+ ```rust
73
+ impl ObjectMetadataer {
74
+ pub async fn mode(&self) -> Result<ObjectMode>;
75
+ pub async fn content_length(&self) -> Result<u64>;
76
+ pub async fn content_md5(&self) -> Result<Option<String>>;
77
+ pub async fn last_modified(&self) -> Result<Option<OffsetDateTime>>;
78
+ pub async fn etag(&self) -> Result<Option<String>>;
79
+ }
80
+
81
+ impl BlockingObjectMetadataer {
82
+ pub fn mode(&self) -> Result<ObjectMode>
83
+ pub fn content_length(&self) -> Result<u64>;
84
+ pub fn content_md5(&self) -> Result<Option<String>>;
85
+ pub fn last_modified(&self) -> Result<Option<OffsetDateTime>>;
86
+ pub fn etag(&self) -> Result<Option<String>>;
87
+ }
88
+ ```
89
+
90
+ # Drawbacks
91
+
92
+ ## Breaking changes
93
+
94
+ This RFC will introduce breaking changes for `Object::metadata`. And users can't do `serde::Serialize` or `serde::Deserialize` on object metadata any more. All metadata related API calls will be removed from `Object`.
95
+
96
+ # Rationale and alternatives
97
+
98
+ None.
99
+
100
+ # Prior art
101
+
102
+ None.
103
+
104
+ # Unresolved questions
105
+
106
+ None.
107
+
108
+ # Future possibilities
109
+
110
+ None.