iceberg 0.11.1 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,9 +21,8 @@ use iceberg_catalog_sql::{
21
21
  };
22
22
  #[cfg(feature = "datafusion")]
23
23
  use iceberg_datafusion::IcebergCatalogProvider;
24
- use std::cell::RefCell;
25
24
  use std::collections::HashMap;
26
- use std::sync::Arc;
25
+ use std::sync::{Arc, RwLock};
27
26
 
28
27
  use crate::error::to_rb_err;
29
28
  use crate::runtime::runtime;
@@ -75,7 +74,7 @@ impl RbCatalogType {
75
74
 
76
75
  #[magnus::wrap(class = "Iceberg::RbCatalog")]
77
76
  pub struct RbCatalog {
78
- pub catalog: RefCell<RbCatalogType>,
77
+ pub catalog: RwLock<RbCatalogType>,
79
78
  }
80
79
 
81
80
  impl RbCatalog {
@@ -175,7 +174,8 @@ impl RbCatalog {
175
174
  let namespaces = runtime()
176
175
  .block_on(
177
176
  self.catalog
178
- .borrow()
177
+ .read()
178
+ .unwrap()
179
179
  .as_catalog()
180
180
  .list_namespaces(parent.map(|v| v.0).as_ref()),
181
181
  )
@@ -191,7 +191,8 @@ impl RbCatalog {
191
191
  runtime()
192
192
  .block_on(
193
193
  self.catalog
194
- .borrow()
194
+ .read()
195
+ .unwrap()
195
196
  .as_catalog()
196
197
  .create_namespace(&name.0, props),
197
198
  )
@@ -201,7 +202,13 @@ impl RbCatalog {
201
202
 
202
203
  pub fn namespace_exists(&self, name: Wrap<NamespaceIdent>) -> RbResult<bool> {
203
204
  let exists = runtime()
204
- .block_on(self.catalog.borrow().as_catalog().namespace_exists(&name.0))
205
+ .block_on(
206
+ self.catalog
207
+ .read()
208
+ .unwrap()
209
+ .as_catalog()
210
+ .namespace_exists(&name.0),
211
+ )
205
212
  .map_err(to_rb_err)?;
206
213
  Ok(exists)
207
214
  }
@@ -211,7 +218,13 @@ impl RbCatalog {
211
218
  name: Wrap<NamespaceIdent>,
212
219
  ) -> RbResult<HashMap<String, String>> {
213
220
  let namespace = runtime()
214
- .block_on(self.catalog.borrow().as_catalog().get_namespace(&name.0))
221
+ .block_on(
222
+ self.catalog
223
+ .read()
224
+ .unwrap()
225
+ .as_catalog()
226
+ .get_namespace(&name.0),
227
+ )
215
228
  .map_err(to_rb_err)?;
216
229
  Ok(namespace.properties().clone())
217
230
  }
@@ -224,7 +237,8 @@ impl RbCatalog {
224
237
  runtime()
225
238
  .block_on(
226
239
  self.catalog
227
- .borrow()
240
+ .read()
241
+ .unwrap()
228
242
  .as_catalog()
229
243
  .update_namespace(&name.0, props),
230
244
  )
@@ -234,14 +248,26 @@ impl RbCatalog {
234
248
 
235
249
  pub fn drop_namespace(&self, name: Wrap<NamespaceIdent>) -> RbResult<()> {
236
250
  runtime()
237
- .block_on(self.catalog.borrow().as_catalog().drop_namespace(&name.0))
251
+ .block_on(
252
+ self.catalog
253
+ .read()
254
+ .unwrap()
255
+ .as_catalog()
256
+ .drop_namespace(&name.0),
257
+ )
238
258
  .map_err(to_rb_err)?;
239
259
  Ok(())
240
260
  }
241
261
 
242
262
  pub fn list_tables(&self, namespace: Wrap<NamespaceIdent>) -> RbResult<Vec<Vec<String>>> {
243
263
  let tables = runtime()
244
- .block_on(self.catalog.borrow().as_catalog().list_tables(&namespace.0))
264
+ .block_on(
265
+ self.catalog
266
+ .read()
267
+ .unwrap()
268
+ .as_catalog()
269
+ .list_tables(&namespace.0),
270
+ )
245
271
  .map_err(to_rb_err)?;
246
272
  Ok(tables
247
273
  .iter()
@@ -267,7 +293,8 @@ impl RbCatalog {
267
293
  let table = runtime()
268
294
  .block_on(
269
295
  self.catalog
270
- .borrow()
296
+ .read()
297
+ .unwrap()
271
298
  .as_catalog()
272
299
  .create_table(&name.0.namespace, creation),
273
300
  )
@@ -279,7 +306,13 @@ impl RbCatalog {
279
306
 
280
307
  pub fn load_table(&self, name: Wrap<TableIdent>) -> RbResult<RbTable> {
281
308
  let table = runtime()
282
- .block_on(self.catalog.borrow().as_catalog().load_table(&name.0))
309
+ .block_on(
310
+ self.catalog
311
+ .read()
312
+ .unwrap()
313
+ .as_catalog()
314
+ .load_table(&name.0),
315
+ )
283
316
  .map_err(to_rb_err)?;
284
317
  Ok(RbTable {
285
318
  table: table.into(),
@@ -288,14 +321,26 @@ impl RbCatalog {
288
321
 
289
322
  pub fn drop_table(&self, name: Wrap<TableIdent>) -> RbResult<()> {
290
323
  runtime()
291
- .block_on(self.catalog.borrow().as_catalog().drop_table(&name.0))
324
+ .block_on(
325
+ self.catalog
326
+ .read()
327
+ .unwrap()
328
+ .as_catalog()
329
+ .drop_table(&name.0),
330
+ )
292
331
  .map_err(to_rb_err)?;
293
332
  Ok(())
294
333
  }
295
334
 
296
335
  pub fn table_exists(&self, name: Wrap<TableIdent>) -> RbResult<bool> {
297
336
  let exists = runtime()
298
- .block_on(self.catalog.borrow().as_catalog().table_exists(&name.0))
337
+ .block_on(
338
+ self.catalog
339
+ .read()
340
+ .unwrap()
341
+ .as_catalog()
342
+ .table_exists(&name.0),
343
+ )
299
344
  .map_err(to_rb_err)?;
300
345
  Ok(exists)
301
346
  }
@@ -304,7 +349,8 @@ impl RbCatalog {
304
349
  runtime()
305
350
  .block_on(
306
351
  self.catalog
307
- .borrow()
352
+ .read()
353
+ .unwrap()
308
354
  .as_catalog()
309
355
  .rename_table(&name.0, &new_name.0),
310
356
  )
@@ -320,7 +366,8 @@ impl RbCatalog {
320
366
  runtime()
321
367
  .block_on(
322
368
  self.catalog
323
- .borrow()
369
+ .read()
370
+ .unwrap()
324
371
  .as_catalog()
325
372
  .register_table(&name.0, metadata_location),
326
373
  )
@@ -333,7 +380,7 @@ impl RbCatalog {
333
380
  let runtime = runtime();
334
381
 
335
382
  // TODO only create context once
336
- let catalog = self.catalog.borrow().as_arc();
383
+ let catalog = self.catalog.read().unwrap().as_arc();
337
384
  let provider = runtime
338
385
  .block_on(IcebergCatalogProvider::try_new(catalog))
339
386
  .unwrap();
@@ -11,7 +11,7 @@ pub fn to_rb_err(err: iceberg::Error) -> RbErr {
11
11
  _ => "Error",
12
12
  };
13
13
 
14
- let class = Ruby::get()
14
+ let mut class = Ruby::get()
15
15
  .unwrap()
16
16
  .class_object()
17
17
  .const_get::<_, RModule>("Iceberg")
@@ -22,6 +22,12 @@ pub fn to_rb_err(err: iceberg::Error) -> RbErr {
22
22
  // no way to get context separately
23
23
  // https://github.com/apache/iceberg-rust/issues/1071
24
24
  let message = err.to_string();
25
+
26
+ // TODO remove in 0.12.0
27
+ if message.contains("target schema is not superset of current schema") {
28
+ class = Ruby::get().unwrap().exception_arg_error();
29
+ }
30
+
25
31
  let message = message
26
32
  // TODO improve
27
33
  .strip_prefix("Unexpected => ")
@@ -1,6 +1,7 @@
1
1
  mod arrow;
2
2
  mod catalog;
3
3
  mod error;
4
+ mod ruby;
4
5
  mod runtime;
5
6
  mod scan;
6
7
  mod table;
@@ -0,0 +1,51 @@
1
+ use std::ffi::c_void;
2
+ use std::ptr::null_mut;
3
+
4
+ use magnus::Ruby;
5
+ use rb_sys::rb_thread_call_without_gvl;
6
+
7
+ pub trait GvlExt {
8
+ fn detach<T, F>(&self, func: F) -> T
9
+ where
10
+ F: Send + FnOnce() -> T,
11
+ T: Send;
12
+ }
13
+
14
+ impl GvlExt for Ruby {
15
+ fn detach<T, F>(&self, func: F) -> T
16
+ where
17
+ F: Send + FnOnce() -> T,
18
+ T: Send,
19
+ {
20
+ let mut data = CallbackData {
21
+ func: Some(func),
22
+ result: None,
23
+ };
24
+
25
+ unsafe {
26
+ rb_thread_call_without_gvl(
27
+ Some(call_without_gvl::<F, T>),
28
+ &mut data as *mut _ as *mut c_void,
29
+ None,
30
+ null_mut(),
31
+ );
32
+ }
33
+
34
+ data.result.unwrap()
35
+ }
36
+ }
37
+
38
+ struct CallbackData<F, T> {
39
+ func: Option<F>,
40
+ result: Option<T>,
41
+ }
42
+
43
+ extern "C" fn call_without_gvl<F, T>(data: *mut c_void) -> *mut c_void
44
+ where
45
+ F: FnOnce() -> T,
46
+ {
47
+ let data = unsafe { &mut *(data as *mut CallbackData<F, T>) };
48
+ let func = data.func.take().unwrap();
49
+ data.result = Some(func());
50
+ null_mut()
51
+ }
@@ -1,27 +1,30 @@
1
1
  use futures::TryStreamExt;
2
2
  use iceberg::scan::TableScan;
3
3
  use magnus::{RArray, Ruby, Value};
4
- use std::cell::RefCell;
4
+ use std::sync::RwLock;
5
5
 
6
6
  use crate::RbResult;
7
7
  use crate::error::to_rb_err;
8
+ use crate::ruby::GvlExt;
8
9
  use crate::runtime::runtime;
9
10
  use crate::utils::rb_snapshot;
10
11
 
11
12
  #[magnus::wrap(class = "Iceberg::RbTableScan")]
12
13
  pub struct RbTableScan {
13
- pub scan: RefCell<TableScan>,
14
+ pub scan: RwLock<TableScan>,
14
15
  }
15
16
 
16
17
  impl RbTableScan {
17
18
  pub fn plan_files(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
18
- let scan = rb_self.scan.borrow();
19
-
20
- let runtime = runtime();
21
- let plan_files = runtime.block_on(scan.plan_files()).map_err(to_rb_err)?;
22
- let plan_files: Vec<_> = runtime
23
- .block_on(plan_files.try_collect())
19
+ let plan_files: Vec<_> = ruby
20
+ .detach(|| {
21
+ let scan = rb_self.scan.read().unwrap();
22
+ let runtime = runtime();
23
+ let plan_files = runtime.block_on(scan.plan_files())?;
24
+ runtime.block_on(plan_files.try_collect())
25
+ })
24
26
  .map_err(to_rb_err)?;
27
+
25
28
  let files = ruby.ary_new();
26
29
  for v in plan_files {
27
30
  let file = ruby.hash_new();
@@ -47,7 +50,7 @@ impl RbTableScan {
47
50
  }
48
51
 
49
52
  pub fn snapshot(ruby: &Ruby, rb_self: &Self) -> RbResult<Option<Value>> {
50
- match rb_self.scan.borrow().snapshot() {
53
+ match rb_self.scan.read().unwrap().snapshot() {
51
54
  Some(s) => Ok(Some(rb_snapshot(ruby, s)?)),
52
55
  None => Ok(None),
53
56
  }