iceberg 0.11.0 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +364 -665
- data/ext/iceberg/Cargo.toml +10 -9
- data/ext/iceberg/src/catalog.rs +80 -20
- data/ext/iceberg/src/error.rs +7 -1
- data/ext/iceberg/src/lib.rs +1 -0
- data/ext/iceberg/src/ruby.rs +51 -0
- data/ext/iceberg/src/scan.rs +12 -9
- data/ext/iceberg/src/table.rs +140 -96
- data/lib/iceberg/version.rb +1 -1
- metadata +3 -2
data/ext/iceberg/Cargo.toml
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "iceberg-ruby"
|
|
3
|
-
version = "0.11.
|
|
3
|
+
version = "0.11.2"
|
|
4
4
|
license = "Apache-2.0"
|
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
|
6
6
|
edition = "2024"
|
|
7
|
-
rust-version = "1.
|
|
7
|
+
rust-version = "1.92"
|
|
8
8
|
publish = false
|
|
9
9
|
|
|
10
10
|
[lib]
|
|
@@ -14,16 +14,17 @@ crate-type = ["cdylib"]
|
|
|
14
14
|
[dependencies]
|
|
15
15
|
arrow-array = { version = "57", features = ["ffi"] }
|
|
16
16
|
arrow-schema = "57"
|
|
17
|
-
datafusion = { version = "
|
|
17
|
+
datafusion = { version = "52", optional = true }
|
|
18
18
|
futures = "0.3"
|
|
19
|
-
iceberg = "0.
|
|
20
|
-
iceberg-catalog-glue = { version = "0.
|
|
21
|
-
iceberg-catalog-rest = { version = "0.
|
|
22
|
-
iceberg-catalog-s3tables = { version = "0.
|
|
23
|
-
iceberg-catalog-sql = { version = "0.
|
|
24
|
-
iceberg-datafusion = { version = "0.
|
|
19
|
+
iceberg = "=0.9.1"
|
|
20
|
+
iceberg-catalog-glue = { version = "=0.9.1", optional = true }
|
|
21
|
+
iceberg-catalog-rest = { version = "=0.9.1", optional = true }
|
|
22
|
+
iceberg-catalog-s3tables = { version = "=0.9.1", optional = true }
|
|
23
|
+
iceberg-catalog-sql = { version = "=0.9.1", optional = true }
|
|
24
|
+
iceberg-datafusion = { version = "=0.9.1", optional = true }
|
|
25
25
|
magnus = "0.8"
|
|
26
26
|
parquet = "57"
|
|
27
|
+
rb-sys = "0.9"
|
|
27
28
|
sqlx = { version = "0.8", features = ["postgres", "runtime-tokio", "sqlite"], default-features = false, optional = true }
|
|
28
29
|
tokio = { version = "1", features = ["rt-multi-thread"] }
|
|
29
30
|
uuid = { version = "1", features = ["v4"] }
|
data/ext/iceberg/src/catalog.rs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#[cfg(feature = "datafusion")]
|
|
2
2
|
use datafusion::execution::context::SessionContext;
|
|
3
|
+
use iceberg::io::LocalFsStorageFactory;
|
|
3
4
|
use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
|
|
4
5
|
use iceberg::spec::Schema;
|
|
5
6
|
use iceberg::{Catalog, CatalogBuilder, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
|
|
@@ -20,9 +21,8 @@ use iceberg_catalog_sql::{
|
|
|
20
21
|
};
|
|
21
22
|
#[cfg(feature = "datafusion")]
|
|
22
23
|
use iceberg_datafusion::IcebergCatalogProvider;
|
|
23
|
-
use std::cell::RefCell;
|
|
24
24
|
use std::collections::HashMap;
|
|
25
|
-
use std::sync::Arc;
|
|
25
|
+
use std::sync::{Arc, RwLock};
|
|
26
26
|
|
|
27
27
|
use crate::error::to_rb_err;
|
|
28
28
|
use crate::runtime::runtime;
|
|
@@ -74,7 +74,7 @@ impl RbCatalogType {
|
|
|
74
74
|
|
|
75
75
|
#[magnus::wrap(class = "Iceberg::RbCatalog")]
|
|
76
76
|
pub struct RbCatalog {
|
|
77
|
-
pub catalog:
|
|
77
|
+
pub catalog: RwLock<RbCatalogType>,
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
impl RbCatalog {
|
|
@@ -95,7 +95,11 @@ impl RbCatalog {
|
|
|
95
95
|
props.insert(MEMORY_CATALOG_WAREHOUSE.to_string(), v);
|
|
96
96
|
}
|
|
97
97
|
let catalog = runtime()
|
|
98
|
-
.block_on(
|
|
98
|
+
.block_on(
|
|
99
|
+
MemoryCatalogBuilder::default()
|
|
100
|
+
.with_storage_factory(Arc::new(LocalFsStorageFactory))
|
|
101
|
+
.load("memory", props),
|
|
102
|
+
)
|
|
99
103
|
.map_err(to_rb_err)?;
|
|
100
104
|
Ok(Self {
|
|
101
105
|
catalog: RbCatalogType::Memory(catalog.into()).into(),
|
|
@@ -114,7 +118,11 @@ impl RbCatalog {
|
|
|
114
118
|
props.insert(REST_CATALOG_PROP_WAREHOUSE.to_string(), v);
|
|
115
119
|
}
|
|
116
120
|
let catalog = runtime()
|
|
117
|
-
.block_on(
|
|
121
|
+
.block_on(
|
|
122
|
+
RestCatalogBuilder::default()
|
|
123
|
+
.with_storage_factory(Arc::new(LocalFsStorageFactory))
|
|
124
|
+
.load("rest", props),
|
|
125
|
+
)
|
|
118
126
|
.map_err(to_rb_err)?;
|
|
119
127
|
Ok(Self {
|
|
120
128
|
catalog: RbCatalogType::Rest(catalog.into()).into(),
|
|
@@ -148,7 +156,11 @@ impl RbCatalog {
|
|
|
148
156
|
SqlBindStyle::DollarNumeric.to_string(),
|
|
149
157
|
);
|
|
150
158
|
let catalog = runtime()
|
|
151
|
-
.block_on(
|
|
159
|
+
.block_on(
|
|
160
|
+
SqlCatalogBuilder::default()
|
|
161
|
+
.with_storage_factory(Arc::new(LocalFsStorageFactory))
|
|
162
|
+
.load(name, props),
|
|
163
|
+
)
|
|
152
164
|
.map_err(to_rb_err)?;
|
|
153
165
|
Ok(Self {
|
|
154
166
|
catalog: RbCatalogType::Sql(catalog.into()).into(),
|
|
@@ -162,7 +174,8 @@ impl RbCatalog {
|
|
|
162
174
|
let namespaces = runtime()
|
|
163
175
|
.block_on(
|
|
164
176
|
self.catalog
|
|
165
|
-
.
|
|
177
|
+
.read()
|
|
178
|
+
.unwrap()
|
|
166
179
|
.as_catalog()
|
|
167
180
|
.list_namespaces(parent.map(|v| v.0).as_ref()),
|
|
168
181
|
)
|
|
@@ -178,7 +191,8 @@ impl RbCatalog {
|
|
|
178
191
|
runtime()
|
|
179
192
|
.block_on(
|
|
180
193
|
self.catalog
|
|
181
|
-
.
|
|
194
|
+
.read()
|
|
195
|
+
.unwrap()
|
|
182
196
|
.as_catalog()
|
|
183
197
|
.create_namespace(&name.0, props),
|
|
184
198
|
)
|
|
@@ -188,7 +202,13 @@ impl RbCatalog {
|
|
|
188
202
|
|
|
189
203
|
pub fn namespace_exists(&self, name: Wrap<NamespaceIdent>) -> RbResult<bool> {
|
|
190
204
|
let exists = runtime()
|
|
191
|
-
.block_on(
|
|
205
|
+
.block_on(
|
|
206
|
+
self.catalog
|
|
207
|
+
.read()
|
|
208
|
+
.unwrap()
|
|
209
|
+
.as_catalog()
|
|
210
|
+
.namespace_exists(&name.0),
|
|
211
|
+
)
|
|
192
212
|
.map_err(to_rb_err)?;
|
|
193
213
|
Ok(exists)
|
|
194
214
|
}
|
|
@@ -198,7 +218,13 @@ impl RbCatalog {
|
|
|
198
218
|
name: Wrap<NamespaceIdent>,
|
|
199
219
|
) -> RbResult<HashMap<String, String>> {
|
|
200
220
|
let namespace = runtime()
|
|
201
|
-
.block_on(
|
|
221
|
+
.block_on(
|
|
222
|
+
self.catalog
|
|
223
|
+
.read()
|
|
224
|
+
.unwrap()
|
|
225
|
+
.as_catalog()
|
|
226
|
+
.get_namespace(&name.0),
|
|
227
|
+
)
|
|
202
228
|
.map_err(to_rb_err)?;
|
|
203
229
|
Ok(namespace.properties().clone())
|
|
204
230
|
}
|
|
@@ -211,7 +237,8 @@ impl RbCatalog {
|
|
|
211
237
|
runtime()
|
|
212
238
|
.block_on(
|
|
213
239
|
self.catalog
|
|
214
|
-
.
|
|
240
|
+
.read()
|
|
241
|
+
.unwrap()
|
|
215
242
|
.as_catalog()
|
|
216
243
|
.update_namespace(&name.0, props),
|
|
217
244
|
)
|
|
@@ -221,14 +248,26 @@ impl RbCatalog {
|
|
|
221
248
|
|
|
222
249
|
pub fn drop_namespace(&self, name: Wrap<NamespaceIdent>) -> RbResult<()> {
|
|
223
250
|
runtime()
|
|
224
|
-
.block_on(
|
|
251
|
+
.block_on(
|
|
252
|
+
self.catalog
|
|
253
|
+
.read()
|
|
254
|
+
.unwrap()
|
|
255
|
+
.as_catalog()
|
|
256
|
+
.drop_namespace(&name.0),
|
|
257
|
+
)
|
|
225
258
|
.map_err(to_rb_err)?;
|
|
226
259
|
Ok(())
|
|
227
260
|
}
|
|
228
261
|
|
|
229
262
|
pub fn list_tables(&self, namespace: Wrap<NamespaceIdent>) -> RbResult<Vec<Vec<String>>> {
|
|
230
263
|
let tables = runtime()
|
|
231
|
-
.block_on(
|
|
264
|
+
.block_on(
|
|
265
|
+
self.catalog
|
|
266
|
+
.read()
|
|
267
|
+
.unwrap()
|
|
268
|
+
.as_catalog()
|
|
269
|
+
.list_tables(&namespace.0),
|
|
270
|
+
)
|
|
232
271
|
.map_err(to_rb_err)?;
|
|
233
272
|
Ok(tables
|
|
234
273
|
.iter()
|
|
@@ -254,7 +293,8 @@ impl RbCatalog {
|
|
|
254
293
|
let table = runtime()
|
|
255
294
|
.block_on(
|
|
256
295
|
self.catalog
|
|
257
|
-
.
|
|
296
|
+
.read()
|
|
297
|
+
.unwrap()
|
|
258
298
|
.as_catalog()
|
|
259
299
|
.create_table(&name.0.namespace, creation),
|
|
260
300
|
)
|
|
@@ -266,7 +306,13 @@ impl RbCatalog {
|
|
|
266
306
|
|
|
267
307
|
pub fn load_table(&self, name: Wrap<TableIdent>) -> RbResult<RbTable> {
|
|
268
308
|
let table = runtime()
|
|
269
|
-
.block_on(
|
|
309
|
+
.block_on(
|
|
310
|
+
self.catalog
|
|
311
|
+
.read()
|
|
312
|
+
.unwrap()
|
|
313
|
+
.as_catalog()
|
|
314
|
+
.load_table(&name.0),
|
|
315
|
+
)
|
|
270
316
|
.map_err(to_rb_err)?;
|
|
271
317
|
Ok(RbTable {
|
|
272
318
|
table: table.into(),
|
|
@@ -275,14 +321,26 @@ impl RbCatalog {
|
|
|
275
321
|
|
|
276
322
|
pub fn drop_table(&self, name: Wrap<TableIdent>) -> RbResult<()> {
|
|
277
323
|
runtime()
|
|
278
|
-
.block_on(
|
|
324
|
+
.block_on(
|
|
325
|
+
self.catalog
|
|
326
|
+
.read()
|
|
327
|
+
.unwrap()
|
|
328
|
+
.as_catalog()
|
|
329
|
+
.drop_table(&name.0),
|
|
330
|
+
)
|
|
279
331
|
.map_err(to_rb_err)?;
|
|
280
332
|
Ok(())
|
|
281
333
|
}
|
|
282
334
|
|
|
283
335
|
pub fn table_exists(&self, name: Wrap<TableIdent>) -> RbResult<bool> {
|
|
284
336
|
let exists = runtime()
|
|
285
|
-
.block_on(
|
|
337
|
+
.block_on(
|
|
338
|
+
self.catalog
|
|
339
|
+
.read()
|
|
340
|
+
.unwrap()
|
|
341
|
+
.as_catalog()
|
|
342
|
+
.table_exists(&name.0),
|
|
343
|
+
)
|
|
286
344
|
.map_err(to_rb_err)?;
|
|
287
345
|
Ok(exists)
|
|
288
346
|
}
|
|
@@ -291,7 +349,8 @@ impl RbCatalog {
|
|
|
291
349
|
runtime()
|
|
292
350
|
.block_on(
|
|
293
351
|
self.catalog
|
|
294
|
-
.
|
|
352
|
+
.read()
|
|
353
|
+
.unwrap()
|
|
295
354
|
.as_catalog()
|
|
296
355
|
.rename_table(&name.0, &new_name.0),
|
|
297
356
|
)
|
|
@@ -307,7 +366,8 @@ impl RbCatalog {
|
|
|
307
366
|
runtime()
|
|
308
367
|
.block_on(
|
|
309
368
|
self.catalog
|
|
310
|
-
.
|
|
369
|
+
.read()
|
|
370
|
+
.unwrap()
|
|
311
371
|
.as_catalog()
|
|
312
372
|
.register_table(&name.0, metadata_location),
|
|
313
373
|
)
|
|
@@ -320,7 +380,7 @@ impl RbCatalog {
|
|
|
320
380
|
let runtime = runtime();
|
|
321
381
|
|
|
322
382
|
// TODO only create context once
|
|
323
|
-
let catalog = self.catalog.
|
|
383
|
+
let catalog = self.catalog.read().unwrap().as_arc();
|
|
324
384
|
let provider = runtime
|
|
325
385
|
.block_on(IcebergCatalogProvider::try_new(catalog))
|
|
326
386
|
.unwrap();
|
data/ext/iceberg/src/error.rs
CHANGED
|
@@ -11,7 +11,7 @@ pub fn to_rb_err(err: iceberg::Error) -> RbErr {
|
|
|
11
11
|
_ => "Error",
|
|
12
12
|
};
|
|
13
13
|
|
|
14
|
-
let class = Ruby::get()
|
|
14
|
+
let mut class = Ruby::get()
|
|
15
15
|
.unwrap()
|
|
16
16
|
.class_object()
|
|
17
17
|
.const_get::<_, RModule>("Iceberg")
|
|
@@ -22,6 +22,12 @@ pub fn to_rb_err(err: iceberg::Error) -> RbErr {
|
|
|
22
22
|
// no way to get context separately
|
|
23
23
|
// https://github.com/apache/iceberg-rust/issues/1071
|
|
24
24
|
let message = err.to_string();
|
|
25
|
+
|
|
26
|
+
// TODO remove in 0.12.0
|
|
27
|
+
if message.contains("target schema is not superset of current schema") {
|
|
28
|
+
class = Ruby::get().unwrap().exception_arg_error();
|
|
29
|
+
}
|
|
30
|
+
|
|
25
31
|
let message = message
|
|
26
32
|
// TODO improve
|
|
27
33
|
.strip_prefix("Unexpected => ")
|
data/ext/iceberg/src/lib.rs
CHANGED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
use std::ffi::c_void;
|
|
2
|
+
use std::ptr::null_mut;
|
|
3
|
+
|
|
4
|
+
use magnus::Ruby;
|
|
5
|
+
use rb_sys::rb_thread_call_without_gvl;
|
|
6
|
+
|
|
7
|
+
pub trait GvlExt {
|
|
8
|
+
fn detach<T, F>(&self, func: F) -> T
|
|
9
|
+
where
|
|
10
|
+
F: Send + FnOnce() -> T,
|
|
11
|
+
T: Send;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
impl GvlExt for Ruby {
|
|
15
|
+
fn detach<T, F>(&self, func: F) -> T
|
|
16
|
+
where
|
|
17
|
+
F: Send + FnOnce() -> T,
|
|
18
|
+
T: Send,
|
|
19
|
+
{
|
|
20
|
+
let mut data = CallbackData {
|
|
21
|
+
func: Some(func),
|
|
22
|
+
result: None,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
unsafe {
|
|
26
|
+
rb_thread_call_without_gvl(
|
|
27
|
+
Some(call_without_gvl::<F, T>),
|
|
28
|
+
&mut data as *mut _ as *mut c_void,
|
|
29
|
+
None,
|
|
30
|
+
null_mut(),
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
data.result.unwrap()
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
struct CallbackData<F, T> {
|
|
39
|
+
func: Option<F>,
|
|
40
|
+
result: Option<T>,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
extern "C" fn call_without_gvl<F, T>(data: *mut c_void) -> *mut c_void
|
|
44
|
+
where
|
|
45
|
+
F: FnOnce() -> T,
|
|
46
|
+
{
|
|
47
|
+
let data = unsafe { &mut *(data as *mut CallbackData<F, T>) };
|
|
48
|
+
let func = data.func.take().unwrap();
|
|
49
|
+
data.result = Some(func());
|
|
50
|
+
null_mut()
|
|
51
|
+
}
|
data/ext/iceberg/src/scan.rs
CHANGED
|
@@ -1,27 +1,30 @@
|
|
|
1
1
|
use futures::TryStreamExt;
|
|
2
2
|
use iceberg::scan::TableScan;
|
|
3
3
|
use magnus::{RArray, Ruby, Value};
|
|
4
|
-
use std::
|
|
4
|
+
use std::sync::RwLock;
|
|
5
5
|
|
|
6
6
|
use crate::RbResult;
|
|
7
7
|
use crate::error::to_rb_err;
|
|
8
|
+
use crate::ruby::GvlExt;
|
|
8
9
|
use crate::runtime::runtime;
|
|
9
10
|
use crate::utils::rb_snapshot;
|
|
10
11
|
|
|
11
12
|
#[magnus::wrap(class = "Iceberg::RbTableScan")]
|
|
12
13
|
pub struct RbTableScan {
|
|
13
|
-
pub scan:
|
|
14
|
+
pub scan: RwLock<TableScan>,
|
|
14
15
|
}
|
|
15
16
|
|
|
16
17
|
impl RbTableScan {
|
|
17
18
|
pub fn plan_files(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
|
18
|
-
let
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
19
|
+
let plan_files: Vec<_> = ruby
|
|
20
|
+
.detach(|| {
|
|
21
|
+
let scan = rb_self.scan.read().unwrap();
|
|
22
|
+
let runtime = runtime();
|
|
23
|
+
let plan_files = runtime.block_on(scan.plan_files())?;
|
|
24
|
+
runtime.block_on(plan_files.try_collect())
|
|
25
|
+
})
|
|
24
26
|
.map_err(to_rb_err)?;
|
|
27
|
+
|
|
25
28
|
let files = ruby.ary_new();
|
|
26
29
|
for v in plan_files {
|
|
27
30
|
let file = ruby.hash_new();
|
|
@@ -47,7 +50,7 @@ impl RbTableScan {
|
|
|
47
50
|
}
|
|
48
51
|
|
|
49
52
|
pub fn snapshot(ruby: &Ruby, rb_self: &Self) -> RbResult<Option<Value>> {
|
|
50
|
-
match rb_self.scan.
|
|
53
|
+
match rb_self.scan.read().unwrap().snapshot() {
|
|
51
54
|
Some(s) => Ok(Some(rb_snapshot(ruby, s)?)),
|
|
52
55
|
None => Ok(None),
|
|
53
56
|
}
|