opendal 0.1.6.pre.rc.1-arm64-darwin-23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +20 -0
  3. data/.tool-versions +1 -0
  4. data/.yardopts +1 -0
  5. data/DEPENDENCIES.md +9 -0
  6. data/DEPENDENCIES.rust.tsv +277 -0
  7. data/Gemfile +35 -0
  8. data/README.md +159 -0
  9. data/Rakefile +149 -0
  10. data/core/CHANGELOG.md +4929 -0
  11. data/core/CONTRIBUTING.md +61 -0
  12. data/core/DEPENDENCIES.md +3 -0
  13. data/core/DEPENDENCIES.rust.tsv +185 -0
  14. data/core/LICENSE +201 -0
  15. data/core/README.md +228 -0
  16. data/core/benches/README.md +18 -0
  17. data/core/benches/ops/README.md +26 -0
  18. data/core/benches/types/README.md +9 -0
  19. data/core/benches/vs_fs/README.md +35 -0
  20. data/core/benches/vs_s3/README.md +55 -0
  21. data/core/edge/README.md +3 -0
  22. data/core/edge/file_write_on_full_disk/README.md +14 -0
  23. data/core/edge/s3_aws_assume_role_with_web_identity/README.md +18 -0
  24. data/core/edge/s3_read_on_wasm/.gitignore +3 -0
  25. data/core/edge/s3_read_on_wasm/README.md +42 -0
  26. data/core/edge/s3_read_on_wasm/webdriver.json +15 -0
  27. data/core/examples/README.md +23 -0
  28. data/core/examples/basic/README.md +15 -0
  29. data/core/examples/concurrent-upload/README.md +15 -0
  30. data/core/examples/multipart-upload/README.md +15 -0
  31. data/core/fuzz/.gitignore +5 -0
  32. data/core/fuzz/README.md +68 -0
  33. data/core/src/docs/comparisons/vs_object_store.md +183 -0
  34. data/core/src/docs/performance/concurrent_write.md +101 -0
  35. data/core/src/docs/performance/http_optimization.md +124 -0
  36. data/core/src/docs/rfcs/0000_example.md +74 -0
  37. data/core/src/docs/rfcs/0000_foyer_integration.md +111 -0
  38. data/core/src/docs/rfcs/0041_object_native_api.md +185 -0
  39. data/core/src/docs/rfcs/0044_error_handle.md +198 -0
  40. data/core/src/docs/rfcs/0057_auto_region.md +160 -0
  41. data/core/src/docs/rfcs/0069_object_stream.md +145 -0
  42. data/core/src/docs/rfcs/0090_limited_reader.md +155 -0
  43. data/core/src/docs/rfcs/0112_path_normalization.md +79 -0
  44. data/core/src/docs/rfcs/0191_async_streaming_io.md +328 -0
  45. data/core/src/docs/rfcs/0203_remove_credential.md +96 -0
  46. data/core/src/docs/rfcs/0221_create_dir.md +89 -0
  47. data/core/src/docs/rfcs/0247_retryable_error.md +87 -0
  48. data/core/src/docs/rfcs/0293_object_id.md +67 -0
  49. data/core/src/docs/rfcs/0337_dir_entry.md +191 -0
  50. data/core/src/docs/rfcs/0409_accessor_capabilities.md +67 -0
  51. data/core/src/docs/rfcs/0413_presign.md +154 -0
  52. data/core/src/docs/rfcs/0423_command_line_interface.md +268 -0
  53. data/core/src/docs/rfcs/0429_init_from_iter.md +107 -0
  54. data/core/src/docs/rfcs/0438_multipart.md +163 -0
  55. data/core/src/docs/rfcs/0443_gateway.md +73 -0
  56. data/core/src/docs/rfcs/0501_new_builder.md +111 -0
  57. data/core/src/docs/rfcs/0554_write_refactor.md +96 -0
  58. data/core/src/docs/rfcs/0561_list_metadata_reuse.md +210 -0
  59. data/core/src/docs/rfcs/0599_blocking_api.md +157 -0
  60. data/core/src/docs/rfcs/0623_redis_service.md +300 -0
  61. data/core/src/docs/rfcs/0627_split_capabilities.md +89 -0
  62. data/core/src/docs/rfcs/0661_path_in_accessor.md +126 -0
  63. data/core/src/docs/rfcs/0793_generic_kv_services.md +209 -0
  64. data/core/src/docs/rfcs/0926_object_reader.md +93 -0
  65. data/core/src/docs/rfcs/0977_refactor_error.md +151 -0
  66. data/core/src/docs/rfcs/1085_object_handler.md +73 -0
  67. data/core/src/docs/rfcs/1391_object_metadataer.md +110 -0
  68. data/core/src/docs/rfcs/1398_query_based_metadata.md +125 -0
  69. data/core/src/docs/rfcs/1420_object_writer.md +147 -0
  70. data/core/src/docs/rfcs/1477_remove_object_concept.md +159 -0
  71. data/core/src/docs/rfcs/1735_operation_extension.md +117 -0
  72. data/core/src/docs/rfcs/2083_writer_sink_api.md +106 -0
  73. data/core/src/docs/rfcs/2133_append_api.md +88 -0
  74. data/core/src/docs/rfcs/2299_chain_based_operator_api.md +99 -0
  75. data/core/src/docs/rfcs/2602_object_versioning.md +138 -0
  76. data/core/src/docs/rfcs/2758_merge_append_into_write.md +79 -0
  77. data/core/src/docs/rfcs/2774_lister_api.md +66 -0
  78. data/core/src/docs/rfcs/2779_list_with_metakey.md +143 -0
  79. data/core/src/docs/rfcs/2852_native_capability.md +58 -0
  80. data/core/src/docs/rfcs/2884_merge_range_read_into_read.md +80 -0
  81. data/core/src/docs/rfcs/3017_remove_write_copy_from.md +94 -0
  82. data/core/src/docs/rfcs/3197_config.md +237 -0
  83. data/core/src/docs/rfcs/3232_align_list_api.md +69 -0
  84. data/core/src/docs/rfcs/3243_list_prefix.md +128 -0
  85. data/core/src/docs/rfcs/3356_lazy_reader.md +111 -0
  86. data/core/src/docs/rfcs/3526_list_recursive.md +59 -0
  87. data/core/src/docs/rfcs/3574_concurrent_stat_in_list.md +80 -0
  88. data/core/src/docs/rfcs/3734_buffered_reader.md +64 -0
  89. data/core/src/docs/rfcs/3898_concurrent_writer.md +66 -0
  90. data/core/src/docs/rfcs/3911_deleter_api.md +165 -0
  91. data/core/src/docs/rfcs/4382_range_based_read.md +213 -0
  92. data/core/src/docs/rfcs/4638_executor.md +215 -0
  93. data/core/src/docs/rfcs/5314_remove_metakey.md +120 -0
  94. data/core/src/docs/rfcs/5444_operator_from_uri.md +162 -0
  95. data/core/src/docs/rfcs/5479_context.md +140 -0
  96. data/core/src/docs/rfcs/5485_conditional_reader.md +112 -0
  97. data/core/src/docs/rfcs/5495_list_with_deleted.md +81 -0
  98. data/core/src/docs/rfcs/5556_write_returns_metadata.md +121 -0
  99. data/core/src/docs/rfcs/5871_read_returns_metadata.md +112 -0
  100. data/core/src/docs/rfcs/6189_remove_native_blocking.md +106 -0
  101. data/core/src/docs/rfcs/6209_glob_support.md +132 -0
  102. data/core/src/docs/rfcs/6213_options_api.md +142 -0
  103. data/core/src/docs/rfcs/README.md +62 -0
  104. data/core/src/docs/upgrade.md +1556 -0
  105. data/core/src/services/aliyun_drive/docs.md +61 -0
  106. data/core/src/services/alluxio/docs.md +45 -0
  107. data/core/src/services/azblob/docs.md +77 -0
  108. data/core/src/services/azdls/docs.md +73 -0
  109. data/core/src/services/azfile/docs.md +65 -0
  110. data/core/src/services/b2/docs.md +54 -0
  111. data/core/src/services/cacache/docs.md +38 -0
  112. data/core/src/services/cloudflare_kv/docs.md +21 -0
  113. data/core/src/services/cos/docs.md +55 -0
  114. data/core/src/services/d1/docs.md +48 -0
  115. data/core/src/services/dashmap/docs.md +38 -0
  116. data/core/src/services/dbfs/docs.md +57 -0
  117. data/core/src/services/dropbox/docs.md +64 -0
  118. data/core/src/services/etcd/docs.md +45 -0
  119. data/core/src/services/foundationdb/docs.md +42 -0
  120. data/core/src/services/fs/docs.md +49 -0
  121. data/core/src/services/ftp/docs.md +42 -0
  122. data/core/src/services/gcs/docs.md +76 -0
  123. data/core/src/services/gdrive/docs.md +65 -0
  124. data/core/src/services/ghac/docs.md +84 -0
  125. data/core/src/services/github/docs.md +52 -0
  126. data/core/src/services/gridfs/docs.md +46 -0
  127. data/core/src/services/hdfs/docs.md +140 -0
  128. data/core/src/services/hdfs_native/docs.md +35 -0
  129. data/core/src/services/http/docs.md +45 -0
  130. data/core/src/services/huggingface/docs.md +61 -0
  131. data/core/src/services/ipfs/docs.md +45 -0
  132. data/core/src/services/ipmfs/docs.md +14 -0
  133. data/core/src/services/koofr/docs.md +51 -0
  134. data/core/src/services/lakefs/docs.md +62 -0
  135. data/core/src/services/memcached/docs.md +47 -0
  136. data/core/src/services/memory/docs.md +36 -0
  137. data/core/src/services/mini_moka/docs.md +19 -0
  138. data/core/src/services/moka/docs.md +42 -0
  139. data/core/src/services/mongodb/docs.md +49 -0
  140. data/core/src/services/monoiofs/docs.md +46 -0
  141. data/core/src/services/mysql/docs.md +47 -0
  142. data/core/src/services/obs/docs.md +54 -0
  143. data/core/src/services/onedrive/docs.md +115 -0
  144. data/core/src/services/opfs/docs.md +18 -0
  145. data/core/src/services/oss/docs.md +74 -0
  146. data/core/src/services/pcloud/docs.md +51 -0
  147. data/core/src/services/persy/docs.md +43 -0
  148. data/core/src/services/postgresql/docs.md +47 -0
  149. data/core/src/services/redb/docs.md +41 -0
  150. data/core/src/services/redis/docs.md +43 -0
  151. data/core/src/services/rocksdb/docs.md +54 -0
  152. data/core/src/services/s3/compatible_services.md +126 -0
  153. data/core/src/services/s3/docs.md +244 -0
  154. data/core/src/services/seafile/docs.md +54 -0
  155. data/core/src/services/sftp/docs.md +49 -0
  156. data/core/src/services/sled/docs.md +39 -0
  157. data/core/src/services/sqlite/docs.md +46 -0
  158. data/core/src/services/surrealdb/docs.md +54 -0
  159. data/core/src/services/swift/compatible_services.md +53 -0
  160. data/core/src/services/swift/docs.md +52 -0
  161. data/core/src/services/tikv/docs.md +43 -0
  162. data/core/src/services/upyun/docs.md +51 -0
  163. data/core/src/services/vercel_artifacts/docs.md +40 -0
  164. data/core/src/services/vercel_blob/docs.md +45 -0
  165. data/core/src/services/webdav/docs.md +49 -0
  166. data/core/src/services/webhdfs/docs.md +90 -0
  167. data/core/src/services/yandex_disk/docs.md +45 -0
  168. data/core/tests/behavior/README.md +77 -0
  169. data/core/tests/data/normal_dir/.gitkeep +0 -0
  170. data/core/tests/data/normal_file.txt +1041 -0
  171. data/core/tests/data/special_dir !@#$%^&()_+-=;',/.gitkeep +0 -0
  172. data/core/tests/data/special_file !@#$%^&()_+-=;',.txt +1041 -0
  173. data/core/users.md +13 -0
  174. data/extconf.rb +24 -0
  175. data/lib/opendal.rb +25 -0
  176. data/lib/opendal_ruby/entry.rb +35 -0
  177. data/lib/opendal_ruby/io.rb +70 -0
  178. data/lib/opendal_ruby/metadata.rb +44 -0
  179. data/lib/opendal_ruby/opendal_ruby.bundle +0 -0
  180. data/lib/opendal_ruby/operator.rb +29 -0
  181. data/lib/opendal_ruby/operator_info.rb +26 -0
  182. data/opendal.gemspec +91 -0
  183. data/test/blocking_op_test.rb +112 -0
  184. data/test/capability_test.rb +42 -0
  185. data/test/io_test.rb +172 -0
  186. data/test/lister_test.rb +77 -0
  187. data/test/metadata_test.rb +78 -0
  188. data/test/middlewares_test.rb +46 -0
  189. data/test/operator_info_test.rb +35 -0
  190. data/test/test_helper.rb +36 -0
  191. metadata +240 -0
@@ -0,0 +1,111 @@
1
+ - Proposal Name: `new_builder`
2
+ - Start Date: 2022-08-03
3
+ - RFC PR: [apache/opendal#501](https://github.com/apache/opendal/pull/501)
4
+ - Tracking Issue: [apache/opendal#502](https://github.com/apache/opendal/issues/502)
5
+
6
+ # Summary
7
+
8
+ Allow users to build services without async.
9
+
10
+ # Motivation
11
+
12
+ Most services share a similar builder API to construct backends.
13
+
14
+ ```rust
15
+ impl Builder {
16
+ pub async fn finish(&mut self) -> Result<Arc<dyn Accessor>> {}
17
+ }
18
+ ```
19
+
20
+ We have `async` here so that every user who wants to build services backend must go through an async runtime. Even for `memory` backend:
21
+
22
+ ```rust
23
+ impl Builder {
24
+ /// Consume builder to build a memory backend.
25
+ pub async fn finish(&mut self) -> Result<Arc<dyn Accessor>> {
26
+ Ok(Arc::new(Backend::default()))
27
+ }
28
+ }
29
+ ```
30
+
31
+ Only `s3` services need to call async functions `detect_region` to get the correct region.
32
+
33
+ So, we can provide blocking `Builder` APIs and move async-related logic out for users to call out. This way, our users can build services without playing with async runtime.
34
+
35
+ # Guide-level explanation
36
+
37
+ After this change, all our services builder will add a new API:
38
+
39
+ ```rust
40
+ impl Builder {
41
+ pub fn build(&mut self) -> Result<Backend> {}
42
+ }
43
+ ```
44
+
45
+ Along with this change, our `Operator` will accept `impl Accessor + 'static` instead of `Arc<dyn Accessor>` anymore:
46
+
47
+ ```rust
48
+ impl Operator {
49
+ pub fn new(accessor: impl Accessor + 'static) -> Self {}
50
+ }
51
+ ```
52
+
53
+ Also, we will implement `From<impl Accessor + 'static>` for `Operator`:
54
+
55
+ ```rust
56
+ impl<A> From<A> for Operator
57
+ where
58
+ A: Accessor + 'static,
59
+ {
60
+ fn from(acc: A) -> Self {
61
+ Operator::newx(acc)
62
+ }
63
+ }
64
+ ```
65
+
66
+ We can initiate an operator quicker:
67
+
68
+ ```diff
69
+ - let op: Operator = Operator::new(fs::Backend::build().finish().await?);
70
+ + let op: Operator = fs::Builder::new().build()?.into();
71
+ ```
72
+
73
+ # Reference-level explanation
74
+
75
+ We will add the following APIs:
76
+
77
+ - All builders will add `build(&mut self) -> Result<Backend>`
78
+ - `impl<A> From<A> for Operator where A: Accessor + 'static`
79
+
80
+ We will deprecate the following APIs:
81
+
82
+ - All builders `finish()` API (should be replaced by `build()`)
83
+ - All services `build()` API (should be replaced by `Builder::new()` or `Builder::default()`)
84
+
85
+ We will change the following APIs:
86
+
87
+ - Operator: `new(accessor: Arc<dyn Accessor>)` -> `fn new(accessor: impl dyn Accessor + 'static)`
88
+ - Operator: `async fn from_iter()` -> `fn from_iter()`
89
+ - Operator: `async fn from_env()` -> `fn from_env()`
90
+
91
+ Most services will work the same, except for `s3`: `s3` depends on `detect_region` to check the correct region if the user doesn't input. After this change, `s3::Builder.build()` will return error if `region` is missing. Users should call `detect_region` by themselves to get the region.
92
+
93
+ # Drawbacks
94
+
95
+ None.
96
+
97
+ # Rationale and alternatives
98
+
99
+ None.
100
+
101
+ # Prior art
102
+
103
+ None.
104
+
105
+ # Unresolved questions
106
+
107
+ None.
108
+
109
+ # Future possibilities
110
+
111
+ None.
@@ -0,0 +1,96 @@
1
+ - Proposal Name: `write_refactor`
2
+ - Start Date: 2022-08-22
3
+ - RFC PR: [apache/opendal#554](https://github.com/apache/opendal/pull/554)
4
+ - Tracking Issue: [apache/opendal#555](https://github.com/apache/opendal/issues/555)
5
+
6
+ # Summary
7
+
8
+ Refactor `write` operation to accept a `BytesReader` instead.
9
+
10
+ # Motivation
11
+
12
+ To simulate the similar operation like POSIX fs, OpenDAL returns `BytesWriter` for users to write, flush and close:
13
+
14
+ ```rust
15
+ pub trait Accessor {
16
+ async fn write(&self, args: &OpWrite) -> Result<BytesWriter> {}
17
+ }
18
+ ```
19
+
20
+ `Operator` builds the high level APIs upon this:
21
+
22
+ ```rust
23
+ impl Object {
24
+ pub async fn write(&self, bs: impl AsRef<[u8]>) -> Result<()> {}
25
+
26
+ pub async fn writer(&self, size: u64) -> Result<impl BytesWrite> {}
27
+ }
28
+ ```
29
+
30
+ However, we are meeting the following problems:
31
+
32
+ - Performance: HTTP body channel is mush slower than read from Reader directly.
33
+ - Complicity: Service implementer have to deal with APIs like `new_http_channel`.
34
+ - Extensibility: Current design can't be extended to multipart APIs.
35
+
36
+ # Guide-level explanation
37
+
38
+ Underlying `write` implementations will be replaced by:
39
+
40
+ ```rust
41
+ pub trait Accessor {
42
+ async fn write(&self, args: &OpWrite, r: BytesReader) -> Result<u64> {}
43
+ }
44
+ ```
45
+
46
+ Existing API will have no changes, and we will add a new API:
47
+
48
+ ```rust
49
+ impl Object {
50
+ pub async fn write_from(&self, size: u64, r: impl BytesRead) -> Result<u64> {}
51
+ }
52
+ ```
53
+
54
+ # Reference-level explanation
55
+
56
+ `Accessor`'s `write` API will be changed to accept a `BytesReader`:
57
+
58
+ ```rust
59
+ pub trait Accessor {
60
+ async fn write(&self, args: &OpWrite, r: BytesReader) -> Result<u64> {}
61
+ }
62
+ ```
63
+
64
+ We will provide `Operator::writer` based on this new API instead.
65
+
66
+ [RFC-0438: Multipart](./0438-multipart.md) will also be updated to:
67
+
68
+ ```rust
69
+ pub trait Accessor {
70
+ async fn write_multipart(&self, args: &OpWriteMultipart, r: BytesReader) -> Result<u64> {}
71
+ }
72
+ ```
73
+
74
+ In this way, we don't need to introduce a `PartWriter`.
75
+
76
+ # Drawbacks
77
+
78
+ ## Layer API breakage
79
+
80
+ This change will introduce break changes to layers.
81
+
82
+ # Rationale and alternatives
83
+
84
+ None.
85
+
86
+ # Prior art
87
+
88
+ - [RFC-0191: Async Streaming IO](./0191-async-streaming-io.md)
89
+
90
+ # Unresolved questions
91
+
92
+ None.
93
+
94
+ # Future possibilities
95
+
96
+ None.
@@ -0,0 +1,210 @@
1
+ - Proposal Name: `list_metadata_reuse`
2
+ - Start Date: 2022-08-23
3
+ - RFC PR: [apache/opendal#561](https://github.com/apache/opendal/pull/561)
4
+ - Tracking Issue: [apache/opendal#570](https://github.com/apache/opendal/pull/570)
5
+
6
+ # Summary
7
+
8
+ Reuse metadata returned during listing, by extending `DirEntry` with some metadata fields.
9
+
10
+ # Motivation
11
+
12
+ Users may expect to browse metadata of some directories' child files and directories. Using `walk()` of `BatchOperator` seems to be an ideal way to complete this job.
13
+
14
+ Thus, they start iterating on it, but soon they realized the `DirEntry`, could only offer the name (or path, more precisely) and access mode of the object, and it's not enough.
15
+
16
+ So they have to call `metadata()` for each name they extracted from the iterator.
17
+
18
+ The final example looks like:
19
+
20
+ ```rust
21
+ let op = Operator::from_env(Scheme::Gcs)?.batch();
22
+
23
+ // here is a network request
24
+ let mut dir_stream = op.walk("/dir/to/walk")?;
25
+
26
+ while let Some(Ok(file)) = dir_stream.next().await {
27
+ let path = file.path();
28
+
29
+ // here is another network request
30
+ let size = file.metadata().await?.content_length();
31
+ println!("size of file {} is {}B", path, size);
32
+ }
33
+ ```
34
+
35
+ But...wait! many storage-services returns object metadata when listing, like HDFS, AWS and GCS. The rust standard library returns metadata when listing local file systems, too.
36
+
37
+ In the previous versions of OpenDAL those fields were just get ignored. This wastes users' time on requesting on metadata.
38
+
39
+ # Guide-level explanation
40
+
41
+ The loop in main will be changed to the following code with this RFC:
42
+
43
+ ```rust
44
+ while let Some(Ok(file)) = dir_stream.next().await {
45
+ let size = if let Some(len) = file.content_length() {
46
+ len
47
+ } else {
48
+ file.metadata().await?.content_length();
49
+ };
50
+ let name = file.path();
51
+ println!("size of file {} is {}B", path, size);
52
+ }
53
+ ```
54
+
55
+ # Reference-level explanation
56
+
57
+ Extend `DirEntry` with metadata fields:
58
+
59
+ ```rust
60
+ pub struct DirEntry {
61
+ acc: Arc<dyn Accessor>,
62
+
63
+ mode: ObjectMode,
64
+ path: String,
65
+
66
+ // newly add metadata fields
67
+ content_length: Option<u64>, // size of file
68
+ content_md5: Option<String>,
69
+ last_modified: Option<OffsetDateTime>,
70
+ }
71
+
72
+ impl DirEntry {
73
+ pub fn content_length(&self) -> Option<u64> {
74
+ self.content_length
75
+ }
76
+ pub fn last_modified(&self) -> Option<OffsetDateTime> {
77
+ self.last_modified
78
+ }
79
+ pub fn content_md5(&self) -> Option<OffsetDateTime> {
80
+ self.content_md5
81
+ }
82
+ }
83
+ ```
84
+
85
+ For all services that supplies metadata during listing, like AWS, GCS and HDFS. Those optional fields will be filled up; Meanwhile for those services doesn't return metadata during listing, like in memory storages, just left them as `None`.
86
+
87
+ As you can see, for those services returning metadata when listing, the operation of listing metadata will save many unnecessary requests.
88
+
89
+ # Drawbacks
90
+
91
+ Add complexity to `DirEntry`. To use the improved features of `DirEntry`, users have to explicitly check the existence of metadata fields.
92
+
93
+ The size of `DirEntry` increased from 40 bytes to 80 bytes, a 100% percent growth requires more memory.
94
+
95
+ # Rational and alternatives
96
+
97
+ The largest drawback of performance usually comes from network or hard disk operations. By letting `DirEntry` storing some metadata, many redundant requests could be avoided.
98
+
99
+ ## Embed a Structure Containing Metadata
100
+
101
+ Define a `MetaLite` structure containing some metadata fields, and embed it in `DirEntry`
102
+
103
+ ```rust
104
+ struct MetaLite {
105
+ pub content_length: u64, // size of file
106
+ pub content_md5: String,
107
+ pub last_modified: OffsetDateTime,
108
+ }
109
+
110
+ pub struct DirEntry {
111
+ acc: Arc<dyn Accessor>,
112
+
113
+ mode: ObjectMode,
114
+ path: String,
115
+
116
+ // newly add metadata struct
117
+ metadata: Option<MetaLite>,
118
+ }
119
+
120
+ impl DirEntry {
121
+ // get size of file
122
+ pub fn content_length(&self) -> Option<u64> {
123
+ self.metadata.as_ref().map(|m| m.content_length)
124
+ }
125
+ // get the last modified time
126
+ pub fn last_modified(&self) -> Option<OffsetDateTime> {
127
+ self.metadata.as_ref().map(|m| m.last_modified)
128
+ }
129
+ // get md5 message digest
130
+ pub fn content_md5(&self) -> Option<String> {
131
+ self.metadata.as_ref().map(|m| m.content_md5)
132
+ }
133
+ }
134
+ ```
135
+
136
+ The existence of those newly added metadata fields is highly correlated. If one field does not exist, the others neither.
137
+
138
+ By wrapping them together in an embedded structure, 8 bytes of space for each `DirEntry` object could be saved. In the future, more metadata fields may be added to `DirEntry`, then a lot more space could be saved.
139
+
140
+ This approach could be slower because some intermediate functions are involved. But it's worth sacrificing rarely used features' performance to save memory.
141
+
142
+ ## Embed a `ObjectMetadata` into `DirEntry`
143
+
144
+ - Embed a `ObjectMetadata` struct into `DirEntry`
145
+ - Remove the `ObjectMode` field in `DirEntry`
146
+ - Change `ObjectMetadata`'s `content_length` field's type to `Option<u64>`.
147
+
148
+ ```rust
149
+ pub struct DirEntry {
150
+ acc: Arc<dyn Accessor>,
151
+
152
+ // - mode: ObjectMode, removed
153
+ path: String,
154
+
155
+ // newly add metadata struct
156
+ metadata: ObjectMetadata,
157
+ }
158
+
159
+ impl DirEntry {
160
+ pub fn mode(&self) -> ObjectMode {
161
+ self.metadata.mode()
162
+ }
163
+ pub fn content_length(&self) -> Option<u64> {
164
+ self.metadata.content_length()
165
+ }
166
+ pub fn content_md5(&self) -> Option<&str> {
167
+ self.metadata.content_md5()
168
+ }
169
+ // other metadata getters...
170
+ }
171
+ ```
172
+
173
+ In the degree of memory layout, it's the same as proposed way in this RFC. This approach offers more metadata fields and fewer changes to code.
174
+
175
+ # Prior art
176
+
177
+ None.
178
+
179
+ # Unresolved questions
180
+
181
+ None.
182
+
183
+ # Future possibilities
184
+
185
+ ## Switch to Alternative Implement Approaches
186
+
187
+ As the growing of metadata fields, someday the alternatives could be better. And other RFCs will be raised then.
188
+
189
+ ## More Fields
190
+
191
+ Add more metadata fields to DirEntry, like:
192
+
193
+ - accessed: the last access timestamp of object
194
+
195
+ ## Simplified Get
196
+
197
+ Users have to explicitly check if those metadata fields actual present in the DirEntry. This may be done inside the getter itself.
198
+
199
+ ```rust
200
+ let path = file.path();
201
+
202
+ // if content_length is not exist
203
+ // this getter will automatically fetch from the storage service.
204
+ let size = file.content_length().await?;
205
+
206
+ // the previous getter can cache metadata fetched from service
207
+ // so this function could return instantly.
208
+ let md5 = file.content_md5().await?;
209
+ println!("size of file {} is {}B, md5 outcome of file is {}", path, size, md5);
210
+ ```
@@ -0,0 +1,157 @@
1
+ - Proposal Name: `blocking_api`
2
+ - Start Date: 2022-08-30
3
+ - RFC PR: [apache/opendal#599](https://github.com/apache/opendal/pull/599)
4
+ - Tracking Issue: [apache/opendal#601](https://github.com/apache/opendal/issues/601)
5
+
6
+ # Summary
7
+
8
+ We are adding a blocking API for OpenDAL.
9
+
10
+ # Motivation
11
+
12
+ Blocking API is the most requested feature inside the OpenDAL community: [Opendal support sync read/write API](https://github.com/apache/opendal/discussions/68)
13
+
14
+ Our users want blocking API for:
15
+
16
+ - Higher performance for local IO
17
+ - Using OpenDAL in a non-async environment
18
+
19
+ However, supporting sync and async API in current Rust is a painful job, especially for an IO library like OpenDAL. For example:
20
+
21
+ ```rust
22
+ impl Object {
23
+ pub async fn reader(&self) -> Result<impl BytesRead> {}
24
+ }
25
+ ```
26
+
27
+ Supporting blocking API doesn't mean removing the `async` from the function. We should also handle the returning `Reader`:
28
+
29
+ ```rust
30
+ impl Object {
31
+ pub fn reader(&self) -> Result<impl Read> {}
32
+ }
33
+ ```
34
+
35
+ Until now, I still don't know how to handle them correctly. But we need to have a start: not perfect, but enough for our users to have a try.
36
+
37
+ So this RFC is an **experiment** try to introduce blocking API support. I expect the OpenDAL community will evaluate those APIs and keep improving them. And finally, we will pick up the best one for stabilizing.
38
+
39
+ # Guide-level explanation
40
+
41
+ With this RFC, we can call blocking API with the `blocking_` prefix:
42
+
43
+ ```rust
44
+ fn main() -> Result<()> {
45
+ // Init Operator
46
+ let op = Operator::from_env(Scheme::Fs)?;
47
+
48
+ // Create object handler.
49
+ let o = op.object("test_file");
50
+
51
+ // Write data info object;
52
+ o.blocking_write("Hello, World!")?;
53
+
54
+ // Read data from object;
55
+ let bs = o.blocking_read()?;
56
+
57
+ // Read range from the object;
58
+ let bs = o.blocking_range_read(1..=11)?;
59
+
60
+ // Get the object's path
61
+ let name = o.name();
62
+ let path = o.path();
63
+
64
+ // Fetch more meta about the object.
65
+ let meta = o.blocking_metadata()?;
66
+ let mode = meta.mode();
67
+ let length = meta.content_length();
68
+ let content_md5 = meta.content_md5();
69
+ let etag = meta.etag();
70
+
71
+ // Delete object.
72
+ o.blocking_delete()?;
73
+
74
+ // List dir object.
75
+ let o = op.object("test_dir/");
76
+ let mut ds = o.blocking_list()?;
77
+ while let Some(entry) = ds.try_next()? {
78
+ let path = entry.path();
79
+ let mode = entry.mode();
80
+ }
81
+
82
+ Ok(())
83
+ }
84
+ ```
85
+
86
+ All async public APIs of `Object` and `Operator` will have a sync version with `blocking_` prefix. And they will share precisely the same semantics.
87
+
88
+ The differences are:
89
+
90
+ - They will be executed and blocked on the current thread.
91
+ - Input and output's `Reader` will become the blocking version like `std::io::Read`.
92
+ - Output's `DirStreamer` will become the blocking version like `Iterator`.
93
+
94
+ Thanks to [RFC-0501: New Builder](./0501-new-builder.md), all our builder-related APIs have been transformed into blocking APIs, so we don't change our initiation logic.
95
+
96
+ # Reference-level explanation
97
+
98
+ Under the hood, we will add the following APIs in `Accessor`:
99
+
100
+ ```rust
101
+ trait Accessor {
102
+ fn blocking_create(&self, args: &OpCreate) -> Result<()>;
103
+
104
+ fn blocking_read(&self, args: &OpRead) -> Result<BlockingBytesReader>;
105
+
106
+ fn blocking_write(&self, args: &OpWrite, r: BlockingBytesReader) -> Result<u64>;
107
+
108
+ fn blocking_stat(&self, args: &OpStat) -> Result<ObjectMetadata>;
109
+
110
+ fn blocking_delete(&self, args: &OpDelete) -> Result<()>;
111
+
112
+ fn blocking_list(&self, args: &OpList) -> Result<DirIterator>;
113
+ }
114
+ ```
115
+
116
+ Notes:
117
+
118
+ - `BlockingBytesReader` is a boxed `std::io::Read`.
119
+ - All blocking operations are happening on the current thread.
120
+ - Blocking operation is implemented natively, no `futures::block_on`.
121
+
122
+ # Drawbacks
123
+
124
+ ## Two sets of APIs
125
+
126
+ This RFC will add a new set of APIs, adding complicity for OpenDAL.
127
+
128
+ And users may misuse them. For example: using `blocking_read` in an async context could block the entire thread.
129
+
130
+ # Rationale and alternatives
131
+
132
+ ## Use features to switch `async` and `sync`
133
+
134
+ Some crates provide features to switch the `async` and `sync` versions of API.
135
+
136
+ In this way:
137
+
138
+ - We can't provide two kinds of API at the same time.
139
+ - Users must decide to use `async` or `sync` at compile time.
140
+
141
+ ## Use blocking IO functions in local fs services
142
+
143
+ > Can we use blocking IO functions in local fs services to implement Accessor's asynchronous functions directly? What is the drawback of our current non-blocking API?
144
+
145
+ We can't run blocking IO functions inside the `async` context. We need to let the local thread pool execute them and use `mio` to listen to the events. If we do so, congrats, we are building `tokio::fs` again!
146
+
147
+ # Prior art
148
+
149
+ None
150
+
151
+ # Unresolved questions
152
+
153
+ None
154
+
155
+ # Future possibilities
156
+
157
+ None