opendal 0.1.6.pre.rc.1-arm64-darwin-23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +20 -0
  3. data/.tool-versions +1 -0
  4. data/.yardopts +1 -0
  5. data/DEPENDENCIES.md +9 -0
  6. data/DEPENDENCIES.rust.tsv +277 -0
  7. data/Gemfile +35 -0
  8. data/README.md +159 -0
  9. data/Rakefile +149 -0
  10. data/core/CHANGELOG.md +4929 -0
  11. data/core/CONTRIBUTING.md +61 -0
  12. data/core/DEPENDENCIES.md +3 -0
  13. data/core/DEPENDENCIES.rust.tsv +185 -0
  14. data/core/LICENSE +201 -0
  15. data/core/README.md +228 -0
  16. data/core/benches/README.md +18 -0
  17. data/core/benches/ops/README.md +26 -0
  18. data/core/benches/types/README.md +9 -0
  19. data/core/benches/vs_fs/README.md +35 -0
  20. data/core/benches/vs_s3/README.md +55 -0
  21. data/core/edge/README.md +3 -0
  22. data/core/edge/file_write_on_full_disk/README.md +14 -0
  23. data/core/edge/s3_aws_assume_role_with_web_identity/README.md +18 -0
  24. data/core/edge/s3_read_on_wasm/.gitignore +3 -0
  25. data/core/edge/s3_read_on_wasm/README.md +42 -0
  26. data/core/edge/s3_read_on_wasm/webdriver.json +15 -0
  27. data/core/examples/README.md +23 -0
  28. data/core/examples/basic/README.md +15 -0
  29. data/core/examples/concurrent-upload/README.md +15 -0
  30. data/core/examples/multipart-upload/README.md +15 -0
  31. data/core/fuzz/.gitignore +5 -0
  32. data/core/fuzz/README.md +68 -0
  33. data/core/src/docs/comparisons/vs_object_store.md +183 -0
  34. data/core/src/docs/performance/concurrent_write.md +101 -0
  35. data/core/src/docs/performance/http_optimization.md +124 -0
  36. data/core/src/docs/rfcs/0000_example.md +74 -0
  37. data/core/src/docs/rfcs/0000_foyer_integration.md +111 -0
  38. data/core/src/docs/rfcs/0041_object_native_api.md +185 -0
  39. data/core/src/docs/rfcs/0044_error_handle.md +198 -0
  40. data/core/src/docs/rfcs/0057_auto_region.md +160 -0
  41. data/core/src/docs/rfcs/0069_object_stream.md +145 -0
  42. data/core/src/docs/rfcs/0090_limited_reader.md +155 -0
  43. data/core/src/docs/rfcs/0112_path_normalization.md +79 -0
  44. data/core/src/docs/rfcs/0191_async_streaming_io.md +328 -0
  45. data/core/src/docs/rfcs/0203_remove_credential.md +96 -0
  46. data/core/src/docs/rfcs/0221_create_dir.md +89 -0
  47. data/core/src/docs/rfcs/0247_retryable_error.md +87 -0
  48. data/core/src/docs/rfcs/0293_object_id.md +67 -0
  49. data/core/src/docs/rfcs/0337_dir_entry.md +191 -0
  50. data/core/src/docs/rfcs/0409_accessor_capabilities.md +67 -0
  51. data/core/src/docs/rfcs/0413_presign.md +154 -0
  52. data/core/src/docs/rfcs/0423_command_line_interface.md +268 -0
  53. data/core/src/docs/rfcs/0429_init_from_iter.md +107 -0
  54. data/core/src/docs/rfcs/0438_multipart.md +163 -0
  55. data/core/src/docs/rfcs/0443_gateway.md +73 -0
  56. data/core/src/docs/rfcs/0501_new_builder.md +111 -0
  57. data/core/src/docs/rfcs/0554_write_refactor.md +96 -0
  58. data/core/src/docs/rfcs/0561_list_metadata_reuse.md +210 -0
  59. data/core/src/docs/rfcs/0599_blocking_api.md +157 -0
  60. data/core/src/docs/rfcs/0623_redis_service.md +300 -0
  61. data/core/src/docs/rfcs/0627_split_capabilities.md +89 -0
  62. data/core/src/docs/rfcs/0661_path_in_accessor.md +126 -0
  63. data/core/src/docs/rfcs/0793_generic_kv_services.md +209 -0
  64. data/core/src/docs/rfcs/0926_object_reader.md +93 -0
  65. data/core/src/docs/rfcs/0977_refactor_error.md +151 -0
  66. data/core/src/docs/rfcs/1085_object_handler.md +73 -0
  67. data/core/src/docs/rfcs/1391_object_metadataer.md +110 -0
  68. data/core/src/docs/rfcs/1398_query_based_metadata.md +125 -0
  69. data/core/src/docs/rfcs/1420_object_writer.md +147 -0
  70. data/core/src/docs/rfcs/1477_remove_object_concept.md +159 -0
  71. data/core/src/docs/rfcs/1735_operation_extension.md +117 -0
  72. data/core/src/docs/rfcs/2083_writer_sink_api.md +106 -0
  73. data/core/src/docs/rfcs/2133_append_api.md +88 -0
  74. data/core/src/docs/rfcs/2299_chain_based_operator_api.md +99 -0
  75. data/core/src/docs/rfcs/2602_object_versioning.md +138 -0
  76. data/core/src/docs/rfcs/2758_merge_append_into_write.md +79 -0
  77. data/core/src/docs/rfcs/2774_lister_api.md +66 -0
  78. data/core/src/docs/rfcs/2779_list_with_metakey.md +143 -0
  79. data/core/src/docs/rfcs/2852_native_capability.md +58 -0
  80. data/core/src/docs/rfcs/2884_merge_range_read_into_read.md +80 -0
  81. data/core/src/docs/rfcs/3017_remove_write_copy_from.md +94 -0
  82. data/core/src/docs/rfcs/3197_config.md +237 -0
  83. data/core/src/docs/rfcs/3232_align_list_api.md +69 -0
  84. data/core/src/docs/rfcs/3243_list_prefix.md +128 -0
  85. data/core/src/docs/rfcs/3356_lazy_reader.md +111 -0
  86. data/core/src/docs/rfcs/3526_list_recursive.md +59 -0
  87. data/core/src/docs/rfcs/3574_concurrent_stat_in_list.md +80 -0
  88. data/core/src/docs/rfcs/3734_buffered_reader.md +64 -0
  89. data/core/src/docs/rfcs/3898_concurrent_writer.md +66 -0
  90. data/core/src/docs/rfcs/3911_deleter_api.md +165 -0
  91. data/core/src/docs/rfcs/4382_range_based_read.md +213 -0
  92. data/core/src/docs/rfcs/4638_executor.md +215 -0
  93. data/core/src/docs/rfcs/5314_remove_metakey.md +120 -0
  94. data/core/src/docs/rfcs/5444_operator_from_uri.md +162 -0
  95. data/core/src/docs/rfcs/5479_context.md +140 -0
  96. data/core/src/docs/rfcs/5485_conditional_reader.md +112 -0
  97. data/core/src/docs/rfcs/5495_list_with_deleted.md +81 -0
  98. data/core/src/docs/rfcs/5556_write_returns_metadata.md +121 -0
  99. data/core/src/docs/rfcs/5871_read_returns_metadata.md +112 -0
  100. data/core/src/docs/rfcs/6189_remove_native_blocking.md +106 -0
  101. data/core/src/docs/rfcs/6209_glob_support.md +132 -0
  102. data/core/src/docs/rfcs/6213_options_api.md +142 -0
  103. data/core/src/docs/rfcs/README.md +62 -0
  104. data/core/src/docs/upgrade.md +1556 -0
  105. data/core/src/services/aliyun_drive/docs.md +61 -0
  106. data/core/src/services/alluxio/docs.md +45 -0
  107. data/core/src/services/azblob/docs.md +77 -0
  108. data/core/src/services/azdls/docs.md +73 -0
  109. data/core/src/services/azfile/docs.md +65 -0
  110. data/core/src/services/b2/docs.md +54 -0
  111. data/core/src/services/cacache/docs.md +38 -0
  112. data/core/src/services/cloudflare_kv/docs.md +21 -0
  113. data/core/src/services/cos/docs.md +55 -0
  114. data/core/src/services/d1/docs.md +48 -0
  115. data/core/src/services/dashmap/docs.md +38 -0
  116. data/core/src/services/dbfs/docs.md +57 -0
  117. data/core/src/services/dropbox/docs.md +64 -0
  118. data/core/src/services/etcd/docs.md +45 -0
  119. data/core/src/services/foundationdb/docs.md +42 -0
  120. data/core/src/services/fs/docs.md +49 -0
  121. data/core/src/services/ftp/docs.md +42 -0
  122. data/core/src/services/gcs/docs.md +76 -0
  123. data/core/src/services/gdrive/docs.md +65 -0
  124. data/core/src/services/ghac/docs.md +84 -0
  125. data/core/src/services/github/docs.md +52 -0
  126. data/core/src/services/gridfs/docs.md +46 -0
  127. data/core/src/services/hdfs/docs.md +140 -0
  128. data/core/src/services/hdfs_native/docs.md +35 -0
  129. data/core/src/services/http/docs.md +45 -0
  130. data/core/src/services/huggingface/docs.md +61 -0
  131. data/core/src/services/ipfs/docs.md +45 -0
  132. data/core/src/services/ipmfs/docs.md +14 -0
  133. data/core/src/services/koofr/docs.md +51 -0
  134. data/core/src/services/lakefs/docs.md +62 -0
  135. data/core/src/services/memcached/docs.md +47 -0
  136. data/core/src/services/memory/docs.md +36 -0
  137. data/core/src/services/mini_moka/docs.md +19 -0
  138. data/core/src/services/moka/docs.md +42 -0
  139. data/core/src/services/mongodb/docs.md +49 -0
  140. data/core/src/services/monoiofs/docs.md +46 -0
  141. data/core/src/services/mysql/docs.md +47 -0
  142. data/core/src/services/obs/docs.md +54 -0
  143. data/core/src/services/onedrive/docs.md +115 -0
  144. data/core/src/services/opfs/docs.md +18 -0
  145. data/core/src/services/oss/docs.md +74 -0
  146. data/core/src/services/pcloud/docs.md +51 -0
  147. data/core/src/services/persy/docs.md +43 -0
  148. data/core/src/services/postgresql/docs.md +47 -0
  149. data/core/src/services/redb/docs.md +41 -0
  150. data/core/src/services/redis/docs.md +43 -0
  151. data/core/src/services/rocksdb/docs.md +54 -0
  152. data/core/src/services/s3/compatible_services.md +126 -0
  153. data/core/src/services/s3/docs.md +244 -0
  154. data/core/src/services/seafile/docs.md +54 -0
  155. data/core/src/services/sftp/docs.md +49 -0
  156. data/core/src/services/sled/docs.md +39 -0
  157. data/core/src/services/sqlite/docs.md +46 -0
  158. data/core/src/services/surrealdb/docs.md +54 -0
  159. data/core/src/services/swift/compatible_services.md +53 -0
  160. data/core/src/services/swift/docs.md +52 -0
  161. data/core/src/services/tikv/docs.md +43 -0
  162. data/core/src/services/upyun/docs.md +51 -0
  163. data/core/src/services/vercel_artifacts/docs.md +40 -0
  164. data/core/src/services/vercel_blob/docs.md +45 -0
  165. data/core/src/services/webdav/docs.md +49 -0
  166. data/core/src/services/webhdfs/docs.md +90 -0
  167. data/core/src/services/yandex_disk/docs.md +45 -0
  168. data/core/tests/behavior/README.md +77 -0
  169. data/core/tests/data/normal_dir/.gitkeep +0 -0
  170. data/core/tests/data/normal_file.txt +1041 -0
  171. data/core/tests/data/special_dir !@#$%^&()_+-=;',/.gitkeep +0 -0
  172. data/core/tests/data/special_file !@#$%^&()_+-=;',.txt +1041 -0
  173. data/core/users.md +13 -0
  174. data/extconf.rb +24 -0
  175. data/lib/opendal.rb +25 -0
  176. data/lib/opendal_ruby/entry.rb +35 -0
  177. data/lib/opendal_ruby/io.rb +70 -0
  178. data/lib/opendal_ruby/metadata.rb +44 -0
  179. data/lib/opendal_ruby/opendal_ruby.bundle +0 -0
  180. data/lib/opendal_ruby/operator.rb +29 -0
  181. data/lib/opendal_ruby/operator_info.rb +26 -0
  182. data/opendal.gemspec +91 -0
  183. data/test/blocking_op_test.rb +112 -0
  184. data/test/capability_test.rb +42 -0
  185. data/test/io_test.rb +172 -0
  186. data/test/lister_test.rb +77 -0
  187. data/test/metadata_test.rb +78 -0
  188. data/test/middlewares_test.rb +46 -0
  189. data/test/operator_info_test.rb +35 -0
  190. data/test/test_helper.rb +36 -0
  191. metadata +240 -0
@@ -0,0 +1,185 @@
1
+ - Proposal Name: `object_native_api`
2
+ - Start Date: 2022-02-18
3
+ - RFC PR: [apache/opendal#41](https://github.com/apache/opendal/pull/41)
4
+ - Tracking Issue: [apache/opendal#35](https://github.com/apache/opendal/pull/35)
5
+
6
+ # Summary
7
+
8
+ Refactor API in object native way to make it easier to user.
9
+
10
+ # Motivation
11
+
12
+ `opendal` is not easy to use.
13
+
14
+ In our early adoption project `databend`, we can see a lot of code looks like:
15
+
16
+ ```rust
17
+ let data_accessor = self.data_accessor.clone();
18
+ let path = self.path.clone();
19
+ let reader = SeekableReader::new(data_accessor, path.as_str(), stream_len);
20
+ let reader = BufReader::with_capacity(read_buffer_size as usize, reader);
21
+ Self::read_column(reader, &col_meta, data_type.clone(), arrow_type.clone()).await
22
+ ```
23
+
24
+ And
25
+
26
+ ```rust
27
+ op.stat(&path).run().await
28
+ ```
29
+
30
+ ## Conclusion
31
+
32
+ So in this proposal, I expect to address those problems. After implementing this proposal, we have a faster and easier-to-use `opendal`.
33
+
34
+ # Guide-level explanation
35
+
36
+ To operate on an object, we will use `Operator::object()` to create a new handler:
37
+
38
+ ```rust
39
+ let o = op.object("path/to/file");
40
+ ```
41
+
42
+ All operations that are available for `Object` for now includes:
43
+
44
+ - `metadata`: get object metadata (return an error if not exist).
45
+ - `delete`: delete an object.
46
+ - `reader`: create a new reader to read data from this object.
47
+ - `writer`: create a new writer to write data into this object.
48
+
49
+ Here is an example:
50
+
51
+ ```rust
52
+ use anyhow::Result;
53
+ use futures::AsyncReadExt;
54
+
55
+ use opendal::services::fs;
56
+ use opendal::Operator;
57
+
58
+ #[tokio::main]
59
+ async fn main() -> Result<()> {
60
+ let op = Operator::new(fs::Backend::build().root("/tmp").finish().await?);
61
+
62
+ let o = op.object("test_file");
63
+
64
+ // Write data info file;
65
+ let w = o.writer();
66
+ let n = w
67
+ .write_bytes("Hello, World!".to_string().into_bytes())
68
+ .await?;
69
+ assert_eq!(n, 13);
70
+
71
+ // Read data from file;
72
+ let mut r = o.reader();
73
+ let mut buf = vec![];
74
+ let n = r.read_to_end(&mut buf).await?;
75
+ assert_eq!(n, 13);
76
+ assert_eq!(String::from_utf8_lossy(&buf), "Hello, World!");
77
+
78
+ // Get file's Metadata
79
+ let meta = o.metadata().await?;
80
+ assert_eq!(meta.content_length(), 13);
81
+
82
+ // Delete file.
83
+ o.delete().await?;
84
+
85
+ Ok(())
86
+ }
87
+ ```
88
+
89
+ # Reference-level explanation
90
+
91
+ ## Native Reader support
92
+
93
+ We will provide a `Reader` (which implement both `AsyncRead + AsyncSeek`) for user instead of just a `AsyncRead`. In this `Reader`, we will:
94
+
95
+ - Not maintain internal buffer: caller can decide to wrap into `BufReader`.
96
+ - Only rely on accessor's `read` and `stat` operations.
97
+
98
+ To avoid the extra cost for `stat`, we will:
99
+
100
+ - Allow user specify total_size for `Reader`.
101
+ - Lazily Send `stat` while the first time `SeekFrom::End()`
102
+
103
+ To avoid the extra cost for `poll_read`, we will:
104
+
105
+ - Keep the underlying `BoxedAsyncRead` open, so that we can reuse the same connection/fd.
106
+
107
+ With these change, we can improve the `Reader` performance both on local fs and remote storage:
108
+
109
+ - fs, before
110
+
111
+ ```shell
112
+ Benchmarking fs/bench_read/64226295-b7a7-416e-94ce-666ac3ab037b:
113
+ time: [16.060 ms 17.109 ms 18.124 ms]
114
+ thrpt: [882.82 MiB/s 935.20 MiB/s 996.24 MiB/s]
115
+
116
+ Benchmarking fs/bench_buf_read/64226295-b7a7-416e-94ce-666ac3ab037b:
117
+ time: [14.779 ms 14.857 ms 14.938 ms]
118
+ thrpt: [1.0460 GiB/s 1.0517 GiB/s 1.0572 GiB/s]
119
+ ```
120
+
121
+ - fs, after
122
+
123
+ ```shell
124
+ Benchmarking fs/bench_read/df531bc7-54c8-43b6-b412-e4f7b9589876:
125
+ time: [14.654 ms 15.452 ms 16.273 ms]
126
+ thrpt: [983.20 MiB/s 1.0112 GiB/s 1.0663 GiB/s]
127
+
128
+ Benchmarking fs/bench_buf_read/df531bc7-54c8-43b6-b412-e4f7b9589876:
129
+ time: [5.5589 ms 5.5825 ms 5.6076 ms]
130
+ thrpt: [2.7864 GiB/s 2.7989 GiB/s 2.8108 GiB/s]
131
+ ```
132
+
133
+ - s3, before
134
+
135
+ ```shell
136
+ Benchmarking s3/bench_read/72025a81-a4b6-46dc-b485-8d875d23c3a5:
137
+ time: [4.8315 ms 4.9331 ms 5.0403 ms]
138
+ thrpt: [3.1000 GiB/s 3.1674 GiB/s 3.2340 GiB/s]
139
+
140
+ Benchmarking s3/bench_buf_read/72025a81-a4b6-46dc-b485-8d875d23c3a5:
141
+ time: [16.246 ms 16.539 ms 16.833 ms]
142
+ thrpt: [950.52 MiB/s 967.39 MiB/s 984.84 MiB/s]
143
+ ```
144
+
145
+ - s3, after
146
+
147
+ ```shell
148
+ Benchmarking s3/bench_read/6971c464-15f7-48d6-b69c-c8abc7774802:
149
+ time: [4.4222 ms 4.5685 ms 4.7181 ms]
150
+ thrpt: [3.3117 GiB/s 3.4202 GiB/s 3.5333 GiB/s]
151
+
152
+ Benchmarking s3/bench_buf_read/6971c464-15f7-48d6-b69c-c8abc7774802:
153
+ time: [5.5598 ms 5.7174 ms 5.8691 ms]
154
+ thrpt: [2.6622 GiB/s 2.7329 GiB/s 2.8103 GiB/s]
155
+ ```
156
+
157
+ ## Object API
158
+
159
+ Other changes are just a re-order of APIs.
160
+
161
+ - `Operator::read() -> BoxedAsyncRead` => `Object::reader() -> Reader`
162
+ - `Operator::write(r: BoxedAsyncRead, size: u64)` => `Object::writer() -> Writer`
163
+ - `Operator::stat() -> Object` => `Object::stat() -> Metadata`
164
+ - `Operator::delete()` => `Object::delete()`
165
+
166
+ # Drawbacks
167
+
168
+ None.
169
+
170
+ # Rationale and alternatives
171
+
172
+ None
173
+
174
+ # Prior art
175
+
176
+ None
177
+
178
+ # Unresolved questions
179
+
180
+ None
181
+
182
+ # Future possibilities
183
+
184
+ - Implement `AsyncWrite` for `Writer` so that we can use `Writer` easier.
185
+ - Implement `Operator::objects()` to return an object iterator.
@@ -0,0 +1,198 @@
1
+ - Proposal Name: `error_handle`
2
+ - Start Date: 2022-02-23
3
+ - RFC PR: [apache/opendal#44](https://github.com/apache/opendal/pull/44)
4
+ - Tracking Issue: [apache/opendal#43](https://github.com/apache/opendal/pull/43)
5
+
6
+ # Summary
7
+
8
+ Enhanced error handling for OpenDAL.
9
+
10
+ # Motivation
11
+
12
+ OpenDAL didn't handle errors correctly.
13
+
14
+ ```rust
15
+ fn parse_unexpected_error<E>(_: SdkError<E>, path: &str) -> Error {
16
+ Error::Unexpected(path.to_string())
17
+ }
18
+ ```
19
+
20
+ Most time, we return a path that is meaningless for debugging.
21
+
22
+ There are two issues about this shortcoming:
23
+
24
+ - [error: Split ErrorKind and Context for error check easier](https://github.com/apache/opendal/issues/24)
25
+ - [Improvement: provides more information about the cause of DalTransportError](https://github.com/apache/opendal/issues/29)
26
+
27
+ First, we can't check `ErrorKind` quickly. We have to use `matches` for the help:
28
+
29
+ ```rust
30
+ assert!(
31
+ matches!(
32
+ result.err().unwrap(),
33
+ opendal::error::Error::ObjectNotExist(_)
34
+ ),
35
+ );
36
+ ```
37
+
38
+ Then, we didn't bring enough information for users to debug what happened inside OpenDAL.
39
+
40
+ So we must handle errors correctly, so that:
41
+
42
+ - We can check the `Kind` to know what error happened.
43
+ - We can read `context` to know more details.
44
+ - We can get the source of this error to know more details.
45
+
46
+ # Guide-level explanation
47
+
48
+ Now we are trying to get an object's metadata:
49
+
50
+ ```rust
51
+ let meta = o.metadata().await;
52
+ ```
53
+
54
+ Unfortunately, the `Object` does not exist, so we can check out what happened.
55
+
56
+ ```rust
57
+ if let Err(e) = meta {
58
+ if e.kind() == Kind::ObjectNotExist {
59
+ // Handle this error
60
+ }
61
+ }
62
+ ```
63
+
64
+ It's possible that we don't care about other errors. It's OK to log it out:
65
+
66
+ ```rust
67
+ if let Err(e) = meta {
68
+ if e.kind() == Kind::ObjectNotExist {
69
+ // Handle this error
70
+ } else {
71
+ error!("{e}");
72
+ }
73
+ }
74
+ ```
75
+
76
+ For a backend implementer, we can provide as much information as possible. For example, we can return `bucket is empty` to let the user know:
77
+
78
+ ```rust
79
+ return Err(Error::Backend {
80
+ kind: Kind::BackendConfigurationInvalid,
81
+ context: HashMap::from([("bucket".to_string(), "".to_string())]),
82
+ source: anyhow!("bucket is empty"),
83
+ });
84
+ ```
85
+
86
+ Or, we can return an underlying error to let users figure out:
87
+
88
+ ```rust
89
+ Error::Object {
90
+ kind: Kind::Unexpected,
91
+ op,
92
+ path: path.to_string(),
93
+ source: anyhow::Error::from(err),
94
+ }
95
+ ```
96
+
97
+ So our application users will get enough information now:
98
+
99
+ ```shell
100
+ Object { kind: ObjectNotExist, op: "stat", path: "/tmp/998e4dec-c84b-4164-a7a1-1f140654934f", source: No such file or directory (os error 2) }
101
+ ```
102
+
103
+
104
+ # Reference-level explanation
105
+
106
+ We will split `Error` into `Error` and `Kind`.
107
+
108
+ `Kind` is an enum organized by different categories.
109
+
110
+ Every error will map to a kind, which will be in the error message.
111
+
112
+ ```rust
113
+ pub enum Kind {
114
+ #[error("backend not supported")]
115
+ BackendNotSupported,
116
+ #[error("backend configuration invalid")]
117
+ BackendConfigurationInvalid,
118
+
119
+ #[error("object not exist")]
120
+ ObjectNotExist,
121
+ #[error("object permission denied")]
122
+ ObjectPermissionDenied,
123
+
124
+ #[error("unexpected")]
125
+ Unexpected,
126
+ }
127
+ ```
128
+
129
+ In `Error`, we will have different struct to carry different contexts:
130
+
131
+ ```rust
132
+ pub enum Error {
133
+ #[error("{kind}: (context: {context:?}, source: {source})")]
134
+ Backend {
135
+ kind: Kind,
136
+ context: HashMap<String, String>,
137
+ source: anyhow::Error,
138
+ },
139
+
140
+ #[error("{kind}: (op: {op}, path: {path}, source: {source})")]
141
+ Object {
142
+ kind: Kind,
143
+ op: &'static str,
144
+ path: String,
145
+ source: anyhow::Error,
146
+ },
147
+
148
+ #[error("unexpected: (source: {0})")]
149
+ Unexpected(#[from] anyhow::Error),
150
+ }
151
+ ```
152
+
153
+ Every one of them will carry a source: `anyhow::Error` so that users can get the complete picture of this error. We have implemented `Error::kind()`, other helper functions are possible, but they are out of this RFC's scope.
154
+
155
+ ```rust
156
+ pub fn kind(&self) -> Kind {
157
+ match self {
158
+ Error::Backend { kind, .. } => *kind,
159
+ Error::Object { kind, .. } => *kind,
160
+ Error::Unexpected(_) => Kind::Unexpected,
161
+ }
162
+ }
163
+ ```
164
+
165
+ The implementer should do their best to carry as much context as possible. Such as, they should return `Error::Object` to carry the `op` and `path`, instead of just returns `Error::Unexpected(anyhow::Error::from(err))`.
166
+
167
+ ```rust
168
+ Error::Object {
169
+ kind: Kind::Unexpected,
170
+ op,
171
+ path: path.to_string(),
172
+ source: anyhow::Error::from(err),
173
+ }
174
+ ```
175
+
176
+ # Drawbacks
177
+
178
+ None
179
+
180
+ # Rationale and alternatives
181
+
182
+ ## Why don't we implement `backtrace`?
183
+
184
+ `backtrace` is not stable yet, and `OpenDAL` must be compilable on stable Rust.
185
+
186
+ This proposal doesn't erase the possibility to add support once `backtrace` is stable.
187
+
188
+ # Prior art
189
+
190
+ None
191
+
192
+ # Unresolved questions
193
+
194
+ None
195
+
196
+ # Future possibilities
197
+
198
+ - `Backtrace` support.
@@ -0,0 +1,160 @@
1
+ - Proposal Name: `auto_region`
2
+ - Start Date: 2022-02-24
3
+ - RFC PR: [apache/opendal#57](https://github.com/apache/opendal/pull/57)
4
+ - Tracking Issue: [apache/opendal#58](https://github.com/apache/opendal/issues/58)
5
+
6
+ # Summary
7
+
8
+ Automatically detecting user's s3 region.
9
+
10
+ # Motivation
11
+
12
+ Current behavior for `region` and `endpoint` is buggy. `endpoint=https://s3.amazonaws.com` and `endpoint=""` are expected to be the same, because `endpoint=""` means take the default value `https://s3.amazonaws.com`. However, they aren't.
13
+
14
+ S3 SDK has a mechanism to construct the correct API endpoint. It works like `format!("s3.{}.amazonaws.com", region)` internally. But if we specify the endpoint to `https://s3.amazonaws.com`, SDK will take this endpoint static.
15
+
16
+ So users could meet errors like:
17
+
18
+ ```shell
19
+ attempting to access must be addressed using the specified endpoint
20
+ ```
21
+
22
+ Automatically detecting the user's s3 region will help resolve this problem. Users don't need to care about the region anymore, `OpenDAL` will figure it out. Everything works regardless of whether the input is `s3.amazonaws.com` or `s3.us-east-1.amazonaws.com`.
23
+
24
+ # Guide-level explanation
25
+
26
+ `OpenDAL` will remove `region` option, and users only need to set the `endpoint` now.
27
+
28
+ Valid input including:
29
+
30
+ - `https://s3.amazonaws.com`
31
+ - `https://s3.us-east-1.amazonaws.com`
32
+ - `https://oss-ap-northeast-1.aliyuncs.com`
33
+ - `http://127.0.0.1:9000`
34
+
35
+ `OpenDAL` will handle the `region` internally and automatically.
36
+
37
+ # Reference-level explanation
38
+
39
+ S3 services support mechanism to indicate the correct region on itself.
40
+
41
+ Sending a `HEAD` request to `<endpoint>/<bucket>` will get a response like:
42
+
43
+ ```shell
44
+ :) curl -I https://s3.amazonaws.com/databend-shared
45
+ HTTP/1.1 301 Moved Permanently
46
+ x-amz-bucket-region: us-east-2
47
+ x-amz-request-id: NPYSWK7WXJD1KQG7
48
+ x-amz-id-2: 3FJSJ5HACKqLbeeXBUUE3GoPL1IGDjLl6SZx/fw2MS+k0GND0UwDib5YQXE6CThiQxpYBWZjgxs=
49
+ Content-Type: application/xml
50
+ Date: Thu, 24 Feb 2022 05:15:13 GMT
51
+ Server: AmazonS3
52
+ ```
53
+
54
+ `x-amz-bucket-region: us-east-2` will be returned, and we can use this region to construct the correct endpoint for this bucket:
55
+
56
+ ```shell
57
+ :) curl -I https://s3.us-east-2.amazonaws.com/databend-shared
58
+ HTTP/1.1 403 Forbidden
59
+ x-amz-bucket-region: us-east-2
60
+ x-amz-request-id: 98CN5MYV3GQ1XMPY
61
+ x-amz-id-2: Tdxy36bRRP21Oip18KMQ7FG63MTeXOpXdd5/N3izFH0oalPODVaRlpCkDU3oUN0HIE24/ezX5Dc=
62
+ Content-Type: application/xml
63
+ Date: Thu, 24 Feb 2022 05:16:57 GMT
64
+ Server: AmazonS3
65
+ ```
66
+
67
+ It also works for S3 compilable services like minio:
68
+
69
+ ```shell
70
+ # Start minio with `MINIO_SITE_REGION` configured
71
+ :) MINIO_SITE_REGION=test minio server .
72
+ # Sending request to minio bucket
73
+ :) curl -I 127.0.0.1:9900/databend
74
+ HTTP/1.1 403 Forbidden
75
+ Accept-Ranges: bytes
76
+ Content-Length: 0
77
+ Content-Security-Policy: block-all-mixed-content
78
+ Server: MinIO
79
+ Strict-Transport-Security: max-age=31536000; includeSubDomains
80
+ Vary: Origin
81
+ Vary: Accept-Encoding
82
+ X-Amz-Bucket-Region: test
83
+ X-Amz-Request-Id: 16D6A12DCA57E0FA
84
+ X-Content-Type-Options: nosniff
85
+ X-Xss-Protection: 1; mode=block
86
+ Date: Thu, 24 Feb 2022 05:18:51 GMT
87
+ ```
88
+
89
+ We can use this mechanism to detect `region` automatically. The algorithm works as follows:
90
+
91
+ - If `endpoint` is empty, fill it will `https://s3.amazonaws.com` and the corresponding template: `https://s3.{region}.amazonaws.com`.
92
+ - Sending a `HEAD` request to `<endpoint>/<bucket>`.
93
+ - If got `200` or `403` response, the endpoint works.
94
+ - Use this endpoint directly without filling the template.
95
+ - Take the header `x-amz-bucket-region` as the region to fill the endpoint.
96
+ - Use the fallback value `us-east-1` to make SDK happy if the header not exists.
97
+ - If got a `301` response, the endpoint needs construction.
98
+ - Take the header `x-amz-bucket-region` as the region to fill the endpoint.
99
+ - Return an error to the user if not exist.
100
+ - If got `404`, the bucket could not exist, or the endpoint is incorrect.
101
+ - Return an error to the user.
102
+
103
+ # Drawbacks
104
+
105
+ None.
106
+
107
+ # Rationale and alternatives
108
+
109
+ ## Use virtual style `<bucket>.<endpoint>`?
110
+
111
+ The virtual style works too. But not all services support this kind of API endpoint. For example, using `http://testbucket.127.0.0.1` is wrong, and we need to do extra checks.
112
+
113
+ Using `<endpoint>/<bucket>` makes everything easier.
114
+
115
+ ## Use `ListBuckets` API?
116
+
117
+ `ListBuckets` requires higher permission than normal bucket read and write operations. It's better to finish the job without requesting more permission.
118
+
119
+ ## Misbehavior S3 Compilable Services
120
+
121
+ Many services didn't implement S3 API correctly.
122
+
123
+ Aliyun OSS will return `404` for every bucket:
124
+
125
+ ```shell
126
+ :) curl -I https://aliyuncs.com/<my-existing-bucket>
127
+ HTTP/2 404
128
+ date: Thu, 24 Feb 2022 05:32:57 GMT
129
+ content-type: text/html
130
+ content-length: 690
131
+ ufe-result: A6
132
+ set-cookie: thw=cn; Path=/; Domain=.taobao.com; Expires=Fri, 24-Feb-23 05:32:57 GMT;
133
+ server: Tengine/Aserver
134
+ ```
135
+
136
+ QingStor Object Storage will return `307` with the `Location` header:
137
+
138
+ ```shell
139
+ :) curl -I https://s3.qingstor.com/community
140
+ HTTP/1.1 301 Moved Permanently
141
+ Server: nginx/1.13.6
142
+ Date: Thu, 24 Feb 2022 05:33:55 GMT
143
+ Connection: keep-alive
144
+ Location: https://pek3a.s3.qingstor.com/community
145
+ X-Qs-Request-Id: 05b83b615c801a3d
146
+ ```
147
+
148
+ In this proposal, we will not figure them out. It's easier for the user to fill the correct endpoint instead of automatically detecting them.
149
+
150
+ # Prior art
151
+
152
+ None
153
+
154
+ # Unresolved questions
155
+
156
+ None
157
+
158
+ # Future possibilities
159
+
160
+ None
@@ -0,0 +1,145 @@
1
+ - Proposal Name: `object_stream`
2
+ - Start Date: 2022-02-25
3
+ - RFC PR: [apache/opendal#69](https://github.com/apache/opendal/pull/69)
4
+ - Tracking Issue: [apache/opendal#69](https://github.com/apache/opendal/issues/69)
5
+
6
+ # Summary
7
+
8
+ Allow user to read dir via `ObjectStream`.
9
+
10
+ # Motivation
11
+
12
+ Users need `readdir` support in `OpenDAL`: [Implement List support](https://github.com/apache/opendal/issues/12). Take [databend] for example, with `List` support, we can implement copy from `s3://bucket/path/to/dir` instead of only `s3://bucket/path/to/file`.
13
+
14
+ # Guide-level explanation
15
+
16
+ `Operator` supports new action called `objects("path/to/dir")` which returns a `ObjectStream`, we can iterator current dir like `std::fs::ReadDir`:
17
+
18
+ ```rust
19
+ let mut obs = op.objects("").map(|o| o.expect("list object"));
20
+ while let Some(o) = obs.next().await {
21
+ // Do something upon `Object`.
22
+ }
23
+ ```
24
+
25
+ To better support different file modes, there is a new object meta called `ObjectMode`:
26
+
27
+ ```rust
28
+ let meta = o.metadata().await?;
29
+ let mode = meta.mode();
30
+ if mode.contains(ObjectMode::FILE) {
31
+ // Do something on a file object.
32
+ } else if mode.contains(ObjectMode::DIR) {
33
+ // Do something on a dir object.
34
+ }
35
+ ```
36
+
37
+ We will try to cache some object metadata so that users can reduce `stat` calls:
38
+
39
+ ```rust
40
+ let meta = o.metadata_cached().await?;
41
+ ```
42
+
43
+ `o.metadata_cached()` will return local cached metadata if available.
44
+
45
+ # Reference-level explanation
46
+
47
+ First, we will add a new API in `Accessor`:
48
+
49
+ ```rust
50
+ pub type BoxedObjectStream = Box<dyn futures::Stream<Item = Result<Object>> + Unpin + Send>;
51
+
52
+ async fn list(&self, args: &OpList) -> Result<BoxedObjectStream> {
53
+ let _ = args;
54
+ unimplemented!()
55
+ }
56
+ ```
57
+
58
+ To support options in the future, we will wrap this call via `ObjectStream`:
59
+
60
+ ```rust
61
+ pub struct ObjectStream {
62
+ acc: Arc<dyn Accessor>,
63
+ path: String,
64
+
65
+ state: State,
66
+ }
67
+
68
+ enum State {
69
+ Idle,
70
+ Sending(BoxFuture<'static, Result<BoxedObjectStream>>),
71
+ Listing(BoxedObjectStream),
72
+ }
73
+ ```
74
+
75
+ So the public API to end-users will be:
76
+
77
+ ```rust
78
+ impl Operator {
79
+ pub fn objects(&self, path: &str) -> ObjectStream {
80
+ ObjectStream::new(self.inner(), path)
81
+ }
82
+ }
83
+ ```
84
+
85
+ For cached metadata support, we will add a flag in `Metadata`:
86
+
87
+ ```rust
88
+ #[derive(Debug, Clone, Default)]
89
+ pub struct Metadata {
90
+ complete: bool,
91
+
92
+ path: String,
93
+ mode: Option<ObjectMode>,
94
+
95
+ content_length: Option<u64>,
96
+ }
97
+ ```
98
+
99
+ And add new API `Objbct::metadata_cached()`:
100
+
101
+ ```rust
102
+ pub async fn metadata_cached(&mut self) -> Result<&Metadata> {
103
+ if self.meta.complete() {
104
+ return Ok(&self.meta);
105
+ }
106
+
107
+ let op = &OpStat::new(self.meta.path());
108
+ self.meta = self.acc.stat(op).await?;
109
+
110
+ Ok(&self.meta)
111
+ }
112
+ ```
113
+
114
+ The backend implementer must make sure `complete` is correctly set.
115
+
116
+ `Metadata` will be immutable outsides, so all `set_xxx` APIs will be set to crate public only:
117
+
118
+ ```rust
119
+ pub(crate) fn set_content_length(&mut self, content_length: u64) -> &mut Self {
120
+ self.content_length = Some(content_length);
121
+ self
122
+ }
123
+ ```
124
+
125
+ # Drawbacks
126
+
127
+ None
128
+
129
+ # Rationale and alternatives
130
+
131
+ None
132
+
133
+ # Prior art
134
+
135
+ None
136
+
137
+ # Unresolved questions
138
+
139
+ None
140
+
141
+ # Future possibilities
142
+
143
+ - More precise field-level metadata cache so that user can send `stat` only when needed.
144
+
145
+ [databend]: https://github.com/datafuselabs/databend