iceberg 0.11.0 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
1
  [package]
2
2
  name = "iceberg-ruby"
3
- version = "0.11.0"
3
+ version = "0.11.2"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2024"
7
- rust-version = "1.88"
7
+ rust-version = "1.92"
8
8
  publish = false
9
9
 
10
10
  [lib]
@@ -14,16 +14,17 @@ crate-type = ["cdylib"]
14
14
  [dependencies]
15
15
  arrow-array = { version = "57", features = ["ffi"] }
16
16
  arrow-schema = "57"
17
- datafusion = { version = "51", optional = true }
17
+ datafusion = { version = "52", optional = true }
18
18
  futures = "0.3"
19
- iceberg = "0.8"
20
- iceberg-catalog-glue = { version = "0.8", optional = true }
21
- iceberg-catalog-rest = { version = "0.8", optional = true }
22
- iceberg-catalog-s3tables = { version = "0.8", optional = true }
23
- iceberg-catalog-sql = { version = "0.8", optional = true }
24
- iceberg-datafusion = { version = "0.8", optional = true }
19
+ iceberg = "=0.9.1"
20
+ iceberg-catalog-glue = { version = "=0.9.1", optional = true }
21
+ iceberg-catalog-rest = { version = "=0.9.1", optional = true }
22
+ iceberg-catalog-s3tables = { version = "=0.9.1", optional = true }
23
+ iceberg-catalog-sql = { version = "=0.9.1", optional = true }
24
+ iceberg-datafusion = { version = "=0.9.1", optional = true }
25
25
  magnus = "0.8"
26
26
  parquet = "57"
27
+ rb-sys = "0.9"
27
28
  sqlx = { version = "0.8", features = ["postgres", "runtime-tokio", "sqlite"], default-features = false, optional = true }
28
29
  tokio = { version = "1", features = ["rt-multi-thread"] }
29
30
  uuid = { version = "1", features = ["v4"] }
@@ -1,5 +1,6 @@
1
1
  #[cfg(feature = "datafusion")]
2
2
  use datafusion::execution::context::SessionContext;
3
+ use iceberg::io::LocalFsStorageFactory;
3
4
  use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
4
5
  use iceberg::spec::Schema;
5
6
  use iceberg::{Catalog, CatalogBuilder, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
@@ -20,9 +21,8 @@ use iceberg_catalog_sql::{
20
21
  };
21
22
  #[cfg(feature = "datafusion")]
22
23
  use iceberg_datafusion::IcebergCatalogProvider;
23
- use std::cell::RefCell;
24
24
  use std::collections::HashMap;
25
- use std::sync::Arc;
25
+ use std::sync::{Arc, RwLock};
26
26
 
27
27
  use crate::error::to_rb_err;
28
28
  use crate::runtime::runtime;
@@ -74,7 +74,7 @@ impl RbCatalogType {
74
74
 
75
75
  #[magnus::wrap(class = "Iceberg::RbCatalog")]
76
76
  pub struct RbCatalog {
77
- pub catalog: RefCell<RbCatalogType>,
77
+ pub catalog: RwLock<RbCatalogType>,
78
78
  }
79
79
 
80
80
  impl RbCatalog {
@@ -95,7 +95,11 @@ impl RbCatalog {
95
95
  props.insert(MEMORY_CATALOG_WAREHOUSE.to_string(), v);
96
96
  }
97
97
  let catalog = runtime()
98
- .block_on(MemoryCatalogBuilder::default().load("memory", props))
98
+ .block_on(
99
+ MemoryCatalogBuilder::default()
100
+ .with_storage_factory(Arc::new(LocalFsStorageFactory))
101
+ .load("memory", props),
102
+ )
99
103
  .map_err(to_rb_err)?;
100
104
  Ok(Self {
101
105
  catalog: RbCatalogType::Memory(catalog.into()).into(),
@@ -114,7 +118,11 @@ impl RbCatalog {
114
118
  props.insert(REST_CATALOG_PROP_WAREHOUSE.to_string(), v);
115
119
  }
116
120
  let catalog = runtime()
117
- .block_on(RestCatalogBuilder::default().load("rest", props))
121
+ .block_on(
122
+ RestCatalogBuilder::default()
123
+ .with_storage_factory(Arc::new(LocalFsStorageFactory))
124
+ .load("rest", props),
125
+ )
118
126
  .map_err(to_rb_err)?;
119
127
  Ok(Self {
120
128
  catalog: RbCatalogType::Rest(catalog.into()).into(),
@@ -148,7 +156,11 @@ impl RbCatalog {
148
156
  SqlBindStyle::DollarNumeric.to_string(),
149
157
  );
150
158
  let catalog = runtime()
151
- .block_on(SqlCatalogBuilder::default().load(name, props))
159
+ .block_on(
160
+ SqlCatalogBuilder::default()
161
+ .with_storage_factory(Arc::new(LocalFsStorageFactory))
162
+ .load(name, props),
163
+ )
152
164
  .map_err(to_rb_err)?;
153
165
  Ok(Self {
154
166
  catalog: RbCatalogType::Sql(catalog.into()).into(),
@@ -162,7 +174,8 @@ impl RbCatalog {
162
174
  let namespaces = runtime()
163
175
  .block_on(
164
176
  self.catalog
165
- .borrow()
177
+ .read()
178
+ .unwrap()
166
179
  .as_catalog()
167
180
  .list_namespaces(parent.map(|v| v.0).as_ref()),
168
181
  )
@@ -178,7 +191,8 @@ impl RbCatalog {
178
191
  runtime()
179
192
  .block_on(
180
193
  self.catalog
181
- .borrow()
194
+ .read()
195
+ .unwrap()
182
196
  .as_catalog()
183
197
  .create_namespace(&name.0, props),
184
198
  )
@@ -188,7 +202,13 @@ impl RbCatalog {
188
202
 
189
203
  pub fn namespace_exists(&self, name: Wrap<NamespaceIdent>) -> RbResult<bool> {
190
204
  let exists = runtime()
191
- .block_on(self.catalog.borrow().as_catalog().namespace_exists(&name.0))
205
+ .block_on(
206
+ self.catalog
207
+ .read()
208
+ .unwrap()
209
+ .as_catalog()
210
+ .namespace_exists(&name.0),
211
+ )
192
212
  .map_err(to_rb_err)?;
193
213
  Ok(exists)
194
214
  }
@@ -198,7 +218,13 @@ impl RbCatalog {
198
218
  name: Wrap<NamespaceIdent>,
199
219
  ) -> RbResult<HashMap<String, String>> {
200
220
  let namespace = runtime()
201
- .block_on(self.catalog.borrow().as_catalog().get_namespace(&name.0))
221
+ .block_on(
222
+ self.catalog
223
+ .read()
224
+ .unwrap()
225
+ .as_catalog()
226
+ .get_namespace(&name.0),
227
+ )
202
228
  .map_err(to_rb_err)?;
203
229
  Ok(namespace.properties().clone())
204
230
  }
@@ -211,7 +237,8 @@ impl RbCatalog {
211
237
  runtime()
212
238
  .block_on(
213
239
  self.catalog
214
- .borrow()
240
+ .read()
241
+ .unwrap()
215
242
  .as_catalog()
216
243
  .update_namespace(&name.0, props),
217
244
  )
@@ -221,14 +248,26 @@ impl RbCatalog {
221
248
 
222
249
  pub fn drop_namespace(&self, name: Wrap<NamespaceIdent>) -> RbResult<()> {
223
250
  runtime()
224
- .block_on(self.catalog.borrow().as_catalog().drop_namespace(&name.0))
251
+ .block_on(
252
+ self.catalog
253
+ .read()
254
+ .unwrap()
255
+ .as_catalog()
256
+ .drop_namespace(&name.0),
257
+ )
225
258
  .map_err(to_rb_err)?;
226
259
  Ok(())
227
260
  }
228
261
 
229
262
  pub fn list_tables(&self, namespace: Wrap<NamespaceIdent>) -> RbResult<Vec<Vec<String>>> {
230
263
  let tables = runtime()
231
- .block_on(self.catalog.borrow().as_catalog().list_tables(&namespace.0))
264
+ .block_on(
265
+ self.catalog
266
+ .read()
267
+ .unwrap()
268
+ .as_catalog()
269
+ .list_tables(&namespace.0),
270
+ )
232
271
  .map_err(to_rb_err)?;
233
272
  Ok(tables
234
273
  .iter()
@@ -254,7 +293,8 @@ impl RbCatalog {
254
293
  let table = runtime()
255
294
  .block_on(
256
295
  self.catalog
257
- .borrow()
296
+ .read()
297
+ .unwrap()
258
298
  .as_catalog()
259
299
  .create_table(&name.0.namespace, creation),
260
300
  )
@@ -266,7 +306,13 @@ impl RbCatalog {
266
306
 
267
307
  pub fn load_table(&self, name: Wrap<TableIdent>) -> RbResult<RbTable> {
268
308
  let table = runtime()
269
- .block_on(self.catalog.borrow().as_catalog().load_table(&name.0))
309
+ .block_on(
310
+ self.catalog
311
+ .read()
312
+ .unwrap()
313
+ .as_catalog()
314
+ .load_table(&name.0),
315
+ )
270
316
  .map_err(to_rb_err)?;
271
317
  Ok(RbTable {
272
318
  table: table.into(),
@@ -275,14 +321,26 @@ impl RbCatalog {
275
321
 
276
322
  pub fn drop_table(&self, name: Wrap<TableIdent>) -> RbResult<()> {
277
323
  runtime()
278
- .block_on(self.catalog.borrow().as_catalog().drop_table(&name.0))
324
+ .block_on(
325
+ self.catalog
326
+ .read()
327
+ .unwrap()
328
+ .as_catalog()
329
+ .drop_table(&name.0),
330
+ )
279
331
  .map_err(to_rb_err)?;
280
332
  Ok(())
281
333
  }
282
334
 
283
335
  pub fn table_exists(&self, name: Wrap<TableIdent>) -> RbResult<bool> {
284
336
  let exists = runtime()
285
- .block_on(self.catalog.borrow().as_catalog().table_exists(&name.0))
337
+ .block_on(
338
+ self.catalog
339
+ .read()
340
+ .unwrap()
341
+ .as_catalog()
342
+ .table_exists(&name.0),
343
+ )
286
344
  .map_err(to_rb_err)?;
287
345
  Ok(exists)
288
346
  }
@@ -291,7 +349,8 @@ impl RbCatalog {
291
349
  runtime()
292
350
  .block_on(
293
351
  self.catalog
294
- .borrow()
352
+ .read()
353
+ .unwrap()
295
354
  .as_catalog()
296
355
  .rename_table(&name.0, &new_name.0),
297
356
  )
@@ -307,7 +366,8 @@ impl RbCatalog {
307
366
  runtime()
308
367
  .block_on(
309
368
  self.catalog
310
- .borrow()
369
+ .read()
370
+ .unwrap()
311
371
  .as_catalog()
312
372
  .register_table(&name.0, metadata_location),
313
373
  )
@@ -320,7 +380,7 @@ impl RbCatalog {
320
380
  let runtime = runtime();
321
381
 
322
382
  // TODO only create context once
323
- let catalog = self.catalog.borrow().as_arc();
383
+ let catalog = self.catalog.read().unwrap().as_arc();
324
384
  let provider = runtime
325
385
  .block_on(IcebergCatalogProvider::try_new(catalog))
326
386
  .unwrap();
@@ -11,7 +11,7 @@ pub fn to_rb_err(err: iceberg::Error) -> RbErr {
11
11
  _ => "Error",
12
12
  };
13
13
 
14
- let class = Ruby::get()
14
+ let mut class = Ruby::get()
15
15
  .unwrap()
16
16
  .class_object()
17
17
  .const_get::<_, RModule>("Iceberg")
@@ -22,6 +22,12 @@ pub fn to_rb_err(err: iceberg::Error) -> RbErr {
22
22
  // no way to get context separately
23
23
  // https://github.com/apache/iceberg-rust/issues/1071
24
24
  let message = err.to_string();
25
+
26
+ // TODO remove in 0.12.0
27
+ if message.contains("target schema is not superset of current schema") {
28
+ class = Ruby::get().unwrap().exception_arg_error();
29
+ }
30
+
25
31
  let message = message
26
32
  // TODO improve
27
33
  .strip_prefix("Unexpected => ")
@@ -1,6 +1,7 @@
1
1
  mod arrow;
2
2
  mod catalog;
3
3
  mod error;
4
+ mod ruby;
4
5
  mod runtime;
5
6
  mod scan;
6
7
  mod table;
@@ -0,0 +1,51 @@
1
+ use std::ffi::c_void;
2
+ use std::ptr::null_mut;
3
+
4
+ use magnus::Ruby;
5
+ use rb_sys::rb_thread_call_without_gvl;
6
+
7
+ pub trait GvlExt {
8
+ fn detach<T, F>(&self, func: F) -> T
9
+ where
10
+ F: Send + FnOnce() -> T,
11
+ T: Send;
12
+ }
13
+
14
+ impl GvlExt for Ruby {
15
+ fn detach<T, F>(&self, func: F) -> T
16
+ where
17
+ F: Send + FnOnce() -> T,
18
+ T: Send,
19
+ {
20
+ let mut data = CallbackData {
21
+ func: Some(func),
22
+ result: None,
23
+ };
24
+
25
+ unsafe {
26
+ rb_thread_call_without_gvl(
27
+ Some(call_without_gvl::<F, T>),
28
+ &mut data as *mut _ as *mut c_void,
29
+ None,
30
+ null_mut(),
31
+ );
32
+ }
33
+
34
+ data.result.unwrap()
35
+ }
36
+ }
37
+
38
+ struct CallbackData<F, T> {
39
+ func: Option<F>,
40
+ result: Option<T>,
41
+ }
42
+
43
+ extern "C" fn call_without_gvl<F, T>(data: *mut c_void) -> *mut c_void
44
+ where
45
+ F: FnOnce() -> T,
46
+ {
47
+ let data = unsafe { &mut *(data as *mut CallbackData<F, T>) };
48
+ let func = data.func.take().unwrap();
49
+ data.result = Some(func());
50
+ null_mut()
51
+ }
@@ -1,27 +1,30 @@
1
1
  use futures::TryStreamExt;
2
2
  use iceberg::scan::TableScan;
3
3
  use magnus::{RArray, Ruby, Value};
4
- use std::cell::RefCell;
4
+ use std::sync::RwLock;
5
5
 
6
6
  use crate::RbResult;
7
7
  use crate::error::to_rb_err;
8
+ use crate::ruby::GvlExt;
8
9
  use crate::runtime::runtime;
9
10
  use crate::utils::rb_snapshot;
10
11
 
11
12
  #[magnus::wrap(class = "Iceberg::RbTableScan")]
12
13
  pub struct RbTableScan {
13
- pub scan: RefCell<TableScan>,
14
+ pub scan: RwLock<TableScan>,
14
15
  }
15
16
 
16
17
  impl RbTableScan {
17
18
  pub fn plan_files(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
18
- let scan = rb_self.scan.borrow();
19
-
20
- let runtime = runtime();
21
- let plan_files = runtime.block_on(scan.plan_files()).map_err(to_rb_err)?;
22
- let plan_files: Vec<_> = runtime
23
- .block_on(plan_files.try_collect())
19
+ let plan_files: Vec<_> = ruby
20
+ .detach(|| {
21
+ let scan = rb_self.scan.read().unwrap();
22
+ let runtime = runtime();
23
+ let plan_files = runtime.block_on(scan.plan_files())?;
24
+ runtime.block_on(plan_files.try_collect())
25
+ })
24
26
  .map_err(to_rb_err)?;
27
+
25
28
  let files = ruby.ary_new();
26
29
  for v in plan_files {
27
30
  let file = ruby.hash_new();
@@ -47,7 +50,7 @@ impl RbTableScan {
47
50
  }
48
51
 
49
52
  pub fn snapshot(ruby: &Ruby, rb_self: &Self) -> RbResult<Option<Value>> {
50
- match rb_self.scan.borrow().snapshot() {
53
+ match rb_self.scan.read().unwrap().snapshot() {
51
54
  Some(s) => Ok(Some(rb_snapshot(ruby, s)?)),
52
55
  None => Ok(None),
53
56
  }