iceberg 0.11.0 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,29 +11,29 @@ use iceberg::writer::file_writer::location_generator::{
11
11
  };
12
12
  use iceberg::writer::file_writer::rolling_writer::RollingFileWriterBuilder;
13
13
  use iceberg::writer::{IcebergWriter, IcebergWriterBuilder};
14
- use magnus::{Error as RbErr, RArray, Ruby, Value};
14
+ use magnus::{RArray, Ruby, Value};
15
15
  use parquet::file::properties::WriterProperties;
16
- use std::cell::RefCell;
17
16
  use std::collections::HashMap;
18
- use std::sync::Arc;
17
+ use std::sync::{Arc, RwLock};
19
18
  use uuid::Uuid;
20
19
 
21
20
  use crate::RbResult;
22
21
  use crate::arrow::RbArrowType;
23
22
  use crate::catalog::RbCatalog;
24
23
  use crate::error::to_rb_err;
24
+ use crate::ruby::GvlExt;
25
25
  use crate::runtime::runtime;
26
26
  use crate::scan::RbTableScan;
27
27
  use crate::utils::*;
28
28
 
29
29
  #[magnus::wrap(class = "Iceberg::RbTable")]
30
30
  pub struct RbTable {
31
- pub table: RefCell<Table>,
31
+ pub table: RwLock<Table>,
32
32
  }
33
33
 
34
34
  impl RbTable {
35
35
  pub fn scan(&self, snapshot_id: Option<i64>) -> RbResult<RbTableScan> {
36
- let table = self.table.borrow();
36
+ let table = self.table.read().unwrap();
37
37
  let mut builder = table.scan();
38
38
  if let Some(si) = snapshot_id {
39
39
  builder = builder.snapshot_id(si);
@@ -48,62 +48,58 @@ impl RbTable {
48
48
  data: RbArrowType<ArrowArrayStreamReader>,
49
49
  catalog: &RbCatalog,
50
50
  ) -> RbResult<RbTable> {
51
- let runtime = runtime();
52
- let table = rb_self.table.borrow();
53
- let catalog = catalog.catalog.borrow();
54
-
55
- let table_schema: Arc<arrow_schema::Schema> = Arc::new(
56
- table
57
- .metadata()
58
- .current_schema()
59
- .as_ref()
60
- .try_into()
61
- .unwrap(),
62
- );
63
-
64
- let location_generator =
65
- DefaultLocationGenerator::new(table.metadata().clone()).map_err(to_rb_err)?;
66
- let file_name_generator = DefaultFileNameGenerator::new(
67
- // TODO move task id to suffix to match Python and Java
68
- "0".to_string(),
69
- Some(Uuid::new_v4().to_string()),
70
- iceberg::spec::DataFileFormat::Parquet,
71
- );
72
-
73
- let parquet_writer_builder = ParquetWriterBuilder::new(
74
- WriterProperties::default(),
75
- table.metadata().current_schema().clone(),
76
- );
77
- let rolling_file_writer_builder = RollingFileWriterBuilder::new_with_default_file_size(
78
- parquet_writer_builder,
79
- table.file_io().clone(),
80
- location_generator.clone(),
81
- file_name_generator.clone(),
82
- );
83
- let data_file_writer_builder = DataFileWriterBuilder::new(rolling_file_writer_builder);
84
- let mut data_file_writer = runtime
85
- .block_on(data_file_writer_builder.build(None))
86
- .map_err(to_rb_err)?;
87
-
88
- for batch in data.0 {
89
- let batch = batch
90
- .unwrap()
91
- .with_schema(table_schema.clone())
92
- .map_err(|e| RbErr::new(ruby.exception_arg_error(), e.to_string()))?;
93
- runtime
94
- .block_on(data_file_writer.write(batch))
95
- .map_err(to_rb_err)?;
96
- }
97
-
98
- let data_files = runtime
99
- .block_on(data_file_writer.close())
100
- .map_err(to_rb_err)?;
101
-
102
- let tx = Transaction::new(&table);
103
- let append_action = tx.fast_append().add_data_files(data_files.clone());
104
- let tx = append_action.apply(tx).map_err(to_rb_err)?;
105
- let table = runtime
106
- .block_on(tx.commit(catalog.as_catalog()))
51
+ let table = ruby
52
+ .detach(|| {
53
+ let runtime = runtime();
54
+ let table = rb_self.table.read().unwrap();
55
+ let catalog = catalog.catalog.read().unwrap();
56
+
57
+ let table_schema: Arc<arrow_schema::Schema> = Arc::new(
58
+ table
59
+ .metadata()
60
+ .current_schema()
61
+ .as_ref()
62
+ .try_into()
63
+ .unwrap(),
64
+ );
65
+
66
+ let location_generator = DefaultLocationGenerator::new(table.metadata().clone())?;
67
+ let file_name_generator = DefaultFileNameGenerator::new(
68
+ // TODO move task id to suffix to match Python and Java
69
+ "0".to_string(),
70
+ Some(Uuid::new_v4().to_string()),
71
+ iceberg::spec::DataFileFormat::Parquet,
72
+ );
73
+
74
+ let parquet_writer_builder = ParquetWriterBuilder::new(
75
+ WriterProperties::default(),
76
+ table.metadata().current_schema().clone(),
77
+ );
78
+ let rolling_file_writer_builder =
79
+ RollingFileWriterBuilder::new_with_default_file_size(
80
+ parquet_writer_builder,
81
+ table.file_io().clone(),
82
+ location_generator.clone(),
83
+ file_name_generator.clone(),
84
+ );
85
+ let data_file_writer_builder =
86
+ DataFileWriterBuilder::new(rolling_file_writer_builder);
87
+ let mut data_file_writer =
88
+ runtime.block_on(data_file_writer_builder.build(None))?;
89
+
90
+ for batch in data.0 {
91
+ let batch = batch.unwrap().with_schema(table_schema.clone())?;
92
+ runtime.block_on(data_file_writer.write(batch))?;
93
+ }
94
+
95
+ let data_files = runtime.block_on(data_file_writer.close())?;
96
+
97
+ let tx = Transaction::new(&table);
98
+ let append_action = tx.fast_append().add_data_files(data_files.clone());
99
+ let tx = append_action.apply(tx)?;
100
+
101
+ runtime.block_on(tx.commit(catalog.as_catalog()))
102
+ })
107
103
  .map_err(to_rb_err)?;
108
104
 
109
105
  Ok(RbTable {
@@ -112,7 +108,7 @@ impl RbTable {
112
108
  }
113
109
 
114
110
  pub fn format_version(&self) -> i32 {
115
- match self.table.borrow().metadata().format_version() {
111
+ match self.table.read().unwrap().metadata().format_version() {
116
112
  FormatVersion::V1 => 1,
117
113
  FormatVersion::V2 => 2,
118
114
  FormatVersion::V3 => 3,
@@ -120,43 +116,49 @@ impl RbTable {
120
116
  }
121
117
 
122
118
  pub fn uuid(&self) -> String {
123
- self.table.borrow().metadata().uuid().to_string()
119
+ self.table.read().unwrap().metadata().uuid().to_string()
124
120
  }
125
121
 
126
122
  pub fn location(&self) -> String {
127
- self.table.borrow().metadata().location().to_string()
123
+ self.table.read().unwrap().metadata().location().to_string()
128
124
  }
129
125
 
130
126
  pub fn last_sequence_number(&self) -> i64 {
131
- self.table.borrow().metadata().last_sequence_number()
127
+ self.table.read().unwrap().metadata().last_sequence_number()
132
128
  }
133
129
 
134
130
  pub fn next_sequence_number(&self) -> i64 {
135
- self.table.borrow().metadata().next_sequence_number()
131
+ self.table.read().unwrap().metadata().next_sequence_number()
136
132
  }
137
133
 
138
134
  pub fn last_column_id(&self) -> i32 {
139
- self.table.borrow().metadata().last_column_id()
135
+ self.table.read().unwrap().metadata().last_column_id()
140
136
  }
141
137
 
142
138
  pub fn last_partition_id(&self) -> i32 {
143
- self.table.borrow().metadata().last_partition_id()
139
+ self.table.read().unwrap().metadata().last_partition_id()
144
140
  }
145
141
 
146
142
  pub fn last_updated_ms(&self) -> i64 {
147
- self.table.borrow().metadata().last_updated_ms()
143
+ self.table.read().unwrap().metadata().last_updated_ms()
148
144
  }
149
145
 
150
146
  pub fn schemas(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
151
147
  let schemas = ruby.ary_new();
152
- for s in rb_self.table.borrow().metadata().schemas_iter() {
148
+ for s in rb_self.table.read().unwrap().metadata().schemas_iter() {
153
149
  schemas.push(rb_schema(ruby, s)?)?;
154
150
  }
155
151
  Ok(schemas)
156
152
  }
157
153
 
158
154
  pub fn schema_by_id(ruby: &Ruby, rb_self: &Self, schema_id: i32) -> RbResult<Option<Value>> {
159
- let schema = match rb_self.table.borrow().metadata().schema_by_id(schema_id) {
155
+ let schema = match rb_self
156
+ .table
157
+ .read()
158
+ .unwrap()
159
+ .metadata()
160
+ .schema_by_id(schema_id)
161
+ {
160
162
  Some(s) => Some(rb_schema(ruby, s)?),
161
163
  None => None,
162
164
  };
@@ -164,16 +166,25 @@ impl RbTable {
164
166
  }
165
167
 
166
168
  pub fn current_schema(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
167
- rb_schema(ruby, rb_self.table.borrow().metadata().current_schema())
169
+ rb_schema(
170
+ ruby,
171
+ rb_self.table.read().unwrap().metadata().current_schema(),
172
+ )
168
173
  }
169
174
 
170
175
  pub fn current_schema_id(&self) -> i32 {
171
- self.table.borrow().metadata().current_schema_id()
176
+ self.table.read().unwrap().metadata().current_schema_id()
172
177
  }
173
178
 
174
179
  pub fn partition_specs(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
175
180
  let partition_specs = ruby.ary_new();
176
- for s in rb_self.table.borrow().metadata().partition_specs_iter() {
181
+ for s in rb_self
182
+ .table
183
+ .read()
184
+ .unwrap()
185
+ .metadata()
186
+ .partition_specs_iter()
187
+ {
177
188
  partition_specs.push(rb_partition_spec(s)?)?;
178
189
  }
179
190
  Ok(partition_specs)
@@ -182,7 +193,8 @@ impl RbTable {
182
193
  pub fn partition_spec_by_id(&self, partition_spec_id: i32) -> RbResult<Option<Value>> {
183
194
  let partition_spec = match self
184
195
  .table
185
- .borrow()
196
+ .read()
197
+ .unwrap()
186
198
  .metadata()
187
199
  .partition_spec_by_id(partition_spec_id)
188
200
  {
@@ -193,16 +205,26 @@ impl RbTable {
193
205
  }
194
206
 
195
207
  pub fn default_partition_spec(&self) -> RbResult<Value> {
196
- rb_partition_spec(self.table.borrow().metadata().default_partition_spec())
208
+ rb_partition_spec(
209
+ self.table
210
+ .read()
211
+ .unwrap()
212
+ .metadata()
213
+ .default_partition_spec(),
214
+ )
197
215
  }
198
216
 
199
217
  pub fn default_partition_spec_id(&self) -> i32 {
200
- self.table.borrow().metadata().default_partition_spec_id()
218
+ self.table
219
+ .read()
220
+ .unwrap()
221
+ .metadata()
222
+ .default_partition_spec_id()
201
223
  }
202
224
 
203
225
  pub fn snapshots(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
204
226
  let snapshots = ruby.ary_new();
205
- for s in rb_self.table.borrow().metadata().snapshots() {
227
+ for s in rb_self.table.read().unwrap().metadata().snapshots() {
206
228
  snapshots.push(rb_snapshot(ruby, s)?)?;
207
229
  }
208
230
  Ok(snapshots)
@@ -215,7 +237,8 @@ impl RbTable {
215
237
  ) -> RbResult<Option<Value>> {
216
238
  let snapshot = match rb_self
217
239
  .table
218
- .borrow()
240
+ .read()
241
+ .unwrap()
219
242
  .metadata()
220
243
  .snapshot_by_id(snapshot_id)
221
244
  {
@@ -227,7 +250,7 @@ impl RbTable {
227
250
 
228
251
  pub fn history(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
229
252
  let history = ruby.ary_new();
230
- for s in rb_self.table.borrow().metadata().history() {
253
+ for s in rb_self.table.read().unwrap().metadata().history() {
231
254
  let snapshot_log = ruby.hash_new();
232
255
  snapshot_log.aset(ruby.to_symbol("snapshot_id"), s.snapshot_id)?;
233
256
  // TODO timestamp
@@ -238,7 +261,7 @@ impl RbTable {
238
261
 
239
262
  pub fn metadata_log(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
240
263
  let metadata_logs = ruby.ary_new();
241
- for s in rb_self.table.borrow().metadata().metadata_log() {
264
+ for s in rb_self.table.read().unwrap().metadata().metadata_log() {
242
265
  let metadata_log = ruby.hash_new();
243
266
  metadata_log.aset(
244
267
  ruby.to_symbol("metadata_file"),
@@ -251,7 +274,7 @@ impl RbTable {
251
274
  }
252
275
 
253
276
  pub fn current_snapshot(ruby: &Ruby, rb_self: &Self) -> RbResult<Option<Value>> {
254
- let snapshot = match rb_self.table.borrow().metadata().current_snapshot() {
277
+ let snapshot = match rb_self.table.read().unwrap().metadata().current_snapshot() {
255
278
  Some(s) => Some(rb_snapshot(ruby, s)?),
256
279
  None => None,
257
280
  };
@@ -259,7 +282,7 @@ impl RbTable {
259
282
  }
260
283
 
261
284
  pub fn current_snapshot_id(&self) -> Option<i64> {
262
- self.table.borrow().metadata().current_snapshot_id()
285
+ self.table.read().unwrap().metadata().current_snapshot_id()
263
286
  }
264
287
 
265
288
  pub fn snapshot_for_ref(
@@ -269,7 +292,8 @@ impl RbTable {
269
292
  ) -> RbResult<Option<Value>> {
270
293
  let snapshot = match rb_self
271
294
  .table
272
- .borrow()
295
+ .read()
296
+ .unwrap()
273
297
  .metadata()
274
298
  .snapshot_for_ref(&ref_name)
275
299
  {
@@ -281,7 +305,7 @@ impl RbTable {
281
305
 
282
306
  pub fn sort_orders(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
283
307
  let sort_orders = ruby.ary_new();
284
- for s in rb_self.table.borrow().metadata().sort_orders_iter() {
308
+ for s in rb_self.table.read().unwrap().metadata().sort_orders_iter() {
285
309
  sort_orders.push(rb_sort_order(s)?)?;
286
310
  }
287
311
  Ok(sort_orders)
@@ -290,7 +314,8 @@ impl RbTable {
290
314
  pub fn sort_order_by_id(&self, sort_order_id: i64) -> RbResult<Option<Value>> {
291
315
  let sort_order = match self
292
316
  .table
293
- .borrow()
317
+ .read()
318
+ .unwrap()
294
319
  .metadata()
295
320
  .sort_order_by_id(sort_order_id)
296
321
  {
@@ -301,20 +326,24 @@ impl RbTable {
301
326
  }
302
327
 
303
328
  pub fn default_sort_order(&self) -> RbResult<Value> {
304
- rb_sort_order(self.table.borrow().metadata().default_sort_order())
329
+ rb_sort_order(self.table.read().unwrap().metadata().default_sort_order())
305
330
  }
306
331
 
307
332
  pub fn default_sort_order_id(&self) -> i64 {
308
- self.table.borrow().metadata().default_sort_order_id()
333
+ self.table
334
+ .read()
335
+ .unwrap()
336
+ .metadata()
337
+ .default_sort_order_id()
309
338
  }
310
339
 
311
340
  pub fn properties(&self) -> HashMap<String, String> {
312
- self.table.borrow().metadata().properties().clone()
341
+ self.table.read().unwrap().metadata().properties().clone()
313
342
  }
314
343
 
315
344
  pub fn statistics(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
316
345
  let statistics = ruby.ary_new();
317
- for s in rb_self.table.borrow().metadata().statistics_iter() {
346
+ for s in rb_self.table.read().unwrap().metadata().statistics_iter() {
318
347
  statistics.push(rb_statistics_file(s)?)?;
319
348
  }
320
349
  Ok(statistics)
@@ -324,7 +353,8 @@ impl RbTable {
324
353
  let statistics = ruby.ary_new();
325
354
  for s in rb_self
326
355
  .table
327
- .borrow()
356
+ .read()
357
+ .unwrap()
328
358
  .metadata()
329
359
  .partition_statistics_iter()
330
360
  {
@@ -336,7 +366,8 @@ impl RbTable {
336
366
  pub fn statistics_for_snapshot(&self, snapshot_id: i64) -> RbResult<Option<Value>> {
337
367
  let statistics = match self
338
368
  .table
339
- .borrow()
369
+ .read()
370
+ .unwrap()
340
371
  .metadata()
341
372
  .statistics_for_snapshot(snapshot_id)
342
373
  {
@@ -349,7 +380,8 @@ impl RbTable {
349
380
  pub fn partition_statistics_for_snapshot(&self, snapshot_id: i64) -> RbResult<Option<Value>> {
350
381
  let statistics = match self
351
382
  .table
352
- .borrow()
383
+ .read()
384
+ .unwrap()
353
385
  .metadata()
354
386
  .partition_statistics_for_snapshot(snapshot_id)
355
387
  {
@@ -361,14 +393,26 @@ impl RbTable {
361
393
 
362
394
  pub fn encryption_keys(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
363
395
  let encryption_keys = ruby.ary_new();
364
- for k in rb_self.table.borrow().metadata().encryption_keys_iter() {
396
+ for k in rb_self
397
+ .table
398
+ .read()
399
+ .unwrap()
400
+ .metadata()
401
+ .encryption_keys_iter()
402
+ {
365
403
  encryption_keys.push(rb_encrypted_key(k)?)?;
366
404
  }
367
405
  Ok(encryption_keys)
368
406
  }
369
407
 
370
408
  pub fn encryption_key(&self, key_id: String) -> RbResult<Option<Value>> {
371
- let key = match self.table.borrow().metadata().encryption_key(&key_id) {
409
+ let key = match self
410
+ .table
411
+ .read()
412
+ .unwrap()
413
+ .metadata()
414
+ .encryption_key(&key_id)
415
+ {
372
416
  Some(k) => Some(rb_encrypted_key(k)?),
373
417
  None => None,
374
418
  };
@@ -376,7 +420,7 @@ impl RbTable {
376
420
  }
377
421
 
378
422
  pub fn from_metadata_file(location: String) -> RbResult<Self> {
379
- let file_io = FileIO::from_path(&location).unwrap().build().unwrap();
423
+ let file_io = FileIO::new_with_fs();
380
424
  let table_ident = TableIdent::from_strs(["static-table", &location]).unwrap();
381
425
  let static_table = runtime()
382
426
  .block_on(StaticTable::from_metadata_file(
@@ -1,3 +1,3 @@
1
1
  module Iceberg
2
- VERSION = "0.11.0"
2
+ VERSION = "0.11.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iceberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -41,6 +41,7 @@ files:
41
41
  - ext/iceberg/src/catalog.rs
42
42
  - ext/iceberg/src/error.rs
43
43
  - ext/iceberg/src/lib.rs
44
+ - ext/iceberg/src/ruby.rs
44
45
  - ext/iceberg/src/runtime.rs
45
46
  - ext/iceberg/src/scan.rs
46
47
  - ext/iceberg/src/table.rs
@@ -76,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
76
77
  - !ruby/object:Gem::Version
77
78
  version: '0'
78
79
  requirements: []
79
- rubygems_version: 3.6.9
80
+ rubygems_version: 4.0.10
80
81
  specification_version: 4
81
82
  summary: Apache Iceberg for Ruby
82
83
  test_files: []