iceberg 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,369 @@
1
+ use arrow_array::ffi_stream::ArrowArrayStreamReader;
2
+ use iceberg::TableIdent;
3
+ use iceberg::io::FileIO;
4
+ use iceberg::spec::FormatVersion;
5
+ use iceberg::table::{StaticTable, Table};
6
+ use iceberg::transaction::{ApplyTransactionAction, Transaction};
7
+ use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder;
8
+ use iceberg::writer::file_writer::ParquetWriterBuilder;
9
+ use iceberg::writer::file_writer::location_generator::{
10
+ DefaultFileNameGenerator, DefaultLocationGenerator,
11
+ };
12
+ use iceberg::writer::{IcebergWriter, IcebergWriterBuilder};
13
+ use magnus::{Error as RbErr, RArray, Ruby, Value};
14
+ use parquet::file::properties::WriterProperties;
15
+ use std::cell::RefCell;
16
+ use std::collections::HashMap;
17
+ use std::sync::Arc;
18
+ use uuid::Uuid;
19
+
20
+ use crate::RbResult;
21
+ use crate::arrow::RbArrowType;
22
+ use crate::catalog::RbCatalog;
23
+ use crate::error::to_rb_err;
24
+ use crate::runtime::runtime;
25
+ use crate::scan::RbTableScan;
26
+ use crate::utils::*;
27
+
28
+ #[magnus::wrap(class = "Iceberg::RbTable")]
29
+ pub struct RbTable {
30
+ pub table: RefCell<Table>,
31
+ }
32
+
33
+ impl RbTable {
34
+ pub fn scan(&self, snapshot_id: Option<i64>) -> RbResult<RbTableScan> {
35
+ let table = self.table.borrow();
36
+ let mut builder = table.scan();
37
+ if let Some(si) = snapshot_id {
38
+ builder = builder.snapshot_id(si);
39
+ }
40
+ let scan = builder.build().map_err(to_rb_err)?;
41
+ Ok(RbTableScan { scan: scan.into() })
42
+ }
43
+
44
+ pub fn append(
45
+ ruby: &Ruby,
46
+ rb_self: &Self,
47
+ data: RbArrowType<ArrowArrayStreamReader>,
48
+ catalog: &RbCatalog,
49
+ ) -> RbResult<RbTable> {
50
+ let runtime = runtime();
51
+ let table = rb_self.table.borrow();
52
+ let catalog = catalog.catalog.borrow();
53
+
54
+ let table_schema: Arc<arrow_schema::Schema> = Arc::new(
55
+ table
56
+ .metadata()
57
+ .current_schema()
58
+ .as_ref()
59
+ .try_into()
60
+ .unwrap(),
61
+ );
62
+
63
+ let location_generator =
64
+ DefaultLocationGenerator::new(table.metadata().clone()).map_err(to_rb_err)?;
65
+ let file_name_generator = DefaultFileNameGenerator::new(
66
+ // TODO move task id to suffix to match Python and Java
67
+ "0".to_string(),
68
+ Some(Uuid::new_v4().to_string()),
69
+ iceberg::spec::DataFileFormat::Parquet,
70
+ );
71
+
72
+ let parquet_writer_builder = ParquetWriterBuilder::new(
73
+ WriterProperties::default(),
74
+ table.metadata().current_schema().clone(),
75
+ table.file_io().clone(),
76
+ location_generator.clone(),
77
+ file_name_generator.clone(),
78
+ );
79
+ let data_file_writer_builder = DataFileWriterBuilder::new(parquet_writer_builder, None, 0);
80
+ let mut data_file_writer = runtime
81
+ .block_on(data_file_writer_builder.build())
82
+ .map_err(to_rb_err)?;
83
+
84
+ for batch in data.0 {
85
+ let batch = batch
86
+ .unwrap()
87
+ .with_schema(table_schema.clone())
88
+ .map_err(|e| RbErr::new(ruby.exception_arg_error(), e.to_string()))?;
89
+ runtime
90
+ .block_on(data_file_writer.write(batch))
91
+ .map_err(to_rb_err)?;
92
+ }
93
+
94
+ let data_files = runtime
95
+ .block_on(data_file_writer.close())
96
+ .map_err(to_rb_err)?;
97
+
98
+ let tx = Transaction::new(&table);
99
+ let append_action = tx.fast_append().add_data_files(data_files.clone());
100
+ let tx = append_action.apply(tx).map_err(to_rb_err)?;
101
+ let table = runtime
102
+ .block_on(tx.commit(catalog.as_catalog()))
103
+ .map_err(to_rb_err)?;
104
+
105
+ Ok(RbTable {
106
+ table: table.into(),
107
+ })
108
+ }
109
+
110
+ pub fn format_version(&self) -> i32 {
111
+ match self.table.borrow().metadata().format_version() {
112
+ FormatVersion::V1 => 1,
113
+ FormatVersion::V2 => 2,
114
+ }
115
+ }
116
+
117
+ pub fn uuid(&self) -> String {
118
+ self.table.borrow().metadata().uuid().to_string()
119
+ }
120
+
121
+ pub fn location(&self) -> String {
122
+ self.table.borrow().metadata().location().to_string()
123
+ }
124
+
125
+ pub fn last_sequence_number(&self) -> i64 {
126
+ self.table.borrow().metadata().last_sequence_number()
127
+ }
128
+
129
+ pub fn next_sequence_number(&self) -> i64 {
130
+ self.table.borrow().metadata().next_sequence_number()
131
+ }
132
+
133
+ pub fn last_column_id(&self) -> i32 {
134
+ self.table.borrow().metadata().last_column_id()
135
+ }
136
+
137
+ pub fn last_partition_id(&self) -> i32 {
138
+ self.table.borrow().metadata().last_partition_id()
139
+ }
140
+
141
+ pub fn last_updated_ms(&self) -> i64 {
142
+ self.table.borrow().metadata().last_updated_ms()
143
+ }
144
+
145
+ pub fn schemas(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
146
+ let schemas = ruby.ary_new();
147
+ for s in rb_self.table.borrow().metadata().schemas_iter() {
148
+ schemas.push(rb_schema(s)?)?;
149
+ }
150
+ Ok(schemas)
151
+ }
152
+
153
+ pub fn schema_by_id(&self, schema_id: i32) -> RbResult<Option<Value>> {
154
+ let schema = match self.table.borrow().metadata().schema_by_id(schema_id) {
155
+ Some(s) => Some(rb_schema(s)?),
156
+ None => None,
157
+ };
158
+ Ok(schema)
159
+ }
160
+
161
+ pub fn current_schema(&self) -> RbResult<Value> {
162
+ rb_schema(self.table.borrow().metadata().current_schema())
163
+ }
164
+
165
+ pub fn current_schema_id(&self) -> i32 {
166
+ self.table.borrow().metadata().current_schema_id()
167
+ }
168
+
169
+ pub fn partition_specs(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
170
+ let partition_specs = ruby.ary_new();
171
+ for s in rb_self.table.borrow().metadata().partition_specs_iter() {
172
+ partition_specs.push(rb_partition_spec(s)?)?;
173
+ }
174
+ Ok(partition_specs)
175
+ }
176
+
177
+ pub fn partition_spec_by_id(&self, partition_spec_id: i32) -> RbResult<Option<Value>> {
178
+ let partition_spec = match self
179
+ .table
180
+ .borrow()
181
+ .metadata()
182
+ .partition_spec_by_id(partition_spec_id)
183
+ {
184
+ Some(s) => Some(rb_partition_spec(s)?),
185
+ None => None,
186
+ };
187
+ Ok(partition_spec)
188
+ }
189
+
190
+ pub fn default_partition_spec(&self) -> RbResult<Value> {
191
+ rb_partition_spec(self.table.borrow().metadata().default_partition_spec())
192
+ }
193
+
194
+ pub fn default_partition_spec_id(&self) -> i32 {
195
+ self.table.borrow().metadata().default_partition_spec_id()
196
+ }
197
+
198
+ pub fn snapshots(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
199
+ let snapshots = ruby.ary_new();
200
+ for s in rb_self.table.borrow().metadata().snapshots() {
201
+ snapshots.push(rb_snapshot(s)?)?;
202
+ }
203
+ Ok(snapshots)
204
+ }
205
+
206
+ pub fn snapshot_by_id(&self, snapshot_id: i64) -> RbResult<Option<Value>> {
207
+ let snapshot = match self.table.borrow().metadata().snapshot_by_id(snapshot_id) {
208
+ Some(s) => Some(rb_snapshot(s)?),
209
+ None => None,
210
+ };
211
+ Ok(snapshot)
212
+ }
213
+
214
+ pub fn history(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
215
+ let history = ruby.ary_new();
216
+ for s in rb_self.table.borrow().metadata().history() {
217
+ let snapshot_log = ruby.hash_new();
218
+ snapshot_log.aset(ruby.to_symbol("snapshot_id"), s.snapshot_id)?;
219
+ // TODO timestamp
220
+ history.push(snapshot_log)?;
221
+ }
222
+ Ok(history)
223
+ }
224
+
225
+ pub fn metadata_log(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
226
+ let metadata_logs = ruby.ary_new();
227
+ for s in rb_self.table.borrow().metadata().metadata_log() {
228
+ let metadata_log = ruby.hash_new();
229
+ metadata_log.aset(
230
+ ruby.to_symbol("metadata_file"),
231
+ ruby.str_new(&s.metadata_file),
232
+ )?;
233
+ // TODO timestamp
234
+ metadata_logs.push(metadata_log)?;
235
+ }
236
+ Ok(metadata_logs)
237
+ }
238
+
239
+ pub fn current_snapshot(&self) -> RbResult<Option<Value>> {
240
+ let snapshot = match self.table.borrow().metadata().current_snapshot() {
241
+ Some(s) => Some(rb_snapshot(s)?),
242
+ None => None,
243
+ };
244
+ Ok(snapshot)
245
+ }
246
+
247
+ pub fn current_snapshot_id(&self) -> Option<i64> {
248
+ self.table.borrow().metadata().current_snapshot_id()
249
+ }
250
+
251
+ pub fn snapshot_for_ref(&self, ref_name: String) -> RbResult<Option<Value>> {
252
+ let snapshot = match self.table.borrow().metadata().snapshot_for_ref(&ref_name) {
253
+ Some(s) => Some(rb_snapshot(s)?),
254
+ None => None,
255
+ };
256
+ Ok(snapshot)
257
+ }
258
+
259
+ pub fn sort_orders(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
260
+ let sort_orders = ruby.ary_new();
261
+ for s in rb_self.table.borrow().metadata().sort_orders_iter() {
262
+ sort_orders.push(rb_sort_order(s)?)?;
263
+ }
264
+ Ok(sort_orders)
265
+ }
266
+
267
+ pub fn sort_order_by_id(&self, sort_order_id: i64) -> RbResult<Option<Value>> {
268
+ let sort_order = match self
269
+ .table
270
+ .borrow()
271
+ .metadata()
272
+ .sort_order_by_id(sort_order_id)
273
+ {
274
+ Some(s) => Some(rb_sort_order(s)?),
275
+ None => None,
276
+ };
277
+ Ok(sort_order)
278
+ }
279
+
280
+ pub fn default_sort_order(&self) -> RbResult<Value> {
281
+ rb_sort_order(self.table.borrow().metadata().default_sort_order())
282
+ }
283
+
284
+ pub fn default_sort_order_id(&self) -> i64 {
285
+ self.table.borrow().metadata().default_sort_order_id()
286
+ }
287
+
288
+ pub fn properties(&self) -> HashMap<String, String> {
289
+ self.table.borrow().metadata().properties().clone()
290
+ }
291
+
292
+ pub fn statistics(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
293
+ let statistics = ruby.ary_new();
294
+ for s in rb_self.table.borrow().metadata().statistics_iter() {
295
+ statistics.push(rb_statistics_file(s)?)?;
296
+ }
297
+ Ok(statistics)
298
+ }
299
+
300
+ pub fn partition_statistics(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
301
+ let statistics = ruby.ary_new();
302
+ for s in rb_self
303
+ .table
304
+ .borrow()
305
+ .metadata()
306
+ .partition_statistics_iter()
307
+ {
308
+ statistics.push(rb_partition_statistics_file(s)?)?;
309
+ }
310
+ Ok(statistics)
311
+ }
312
+
313
+ pub fn statistics_for_snapshot(&self, snapshot_id: i64) -> RbResult<Option<Value>> {
314
+ let statistics = match self
315
+ .table
316
+ .borrow()
317
+ .metadata()
318
+ .statistics_for_snapshot(snapshot_id)
319
+ {
320
+ Some(s) => Some(rb_statistics_file(s)?),
321
+ None => None,
322
+ };
323
+ Ok(statistics)
324
+ }
325
+
326
+ pub fn partition_statistics_for_snapshot(&self, snapshot_id: i64) -> RbResult<Option<Value>> {
327
+ let statistics = match self
328
+ .table
329
+ .borrow()
330
+ .metadata()
331
+ .partition_statistics_for_snapshot(snapshot_id)
332
+ {
333
+ Some(s) => Some(rb_partition_statistics_file(s)?),
334
+ None => None,
335
+ };
336
+ Ok(statistics)
337
+ }
338
+
339
+ pub fn encryption_keys(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
340
+ let encryption_keys = ruby.ary_new();
341
+ for (k, v) in rb_self.table.borrow().metadata().encryption_keys_iter() {
342
+ encryption_keys.push((ruby.str_new(k), ruby.str_new(v)))?;
343
+ }
344
+ Ok(encryption_keys)
345
+ }
346
+
347
+ pub fn encryption_key(&self, key_id: String) -> Option<String> {
348
+ self.table
349
+ .borrow()
350
+ .metadata()
351
+ .encryption_key(&key_id)
352
+ .cloned()
353
+ }
354
+
355
+ pub fn from_metadata_file(location: String) -> RbResult<Self> {
356
+ let file_io = FileIO::from_path(&location).unwrap().build().unwrap();
357
+ let table_ident = TableIdent::from_strs(["static-table", &location]).unwrap();
358
+ let static_table = runtime()
359
+ .block_on(StaticTable::from_metadata_file(
360
+ &location,
361
+ table_ident,
362
+ file_io,
363
+ ))
364
+ .map_err(to_rb_err)?;
365
+ Ok(Self {
366
+ table: static_table.into_table().into(),
367
+ })
368
+ }
369
+ }
@@ -0,0 +1,245 @@
1
+ use iceberg::spec::{
2
+ Literal, NestedField, PartitionSpec, PartitionStatisticsFile, PrimitiveLiteral, PrimitiveType,
3
+ Schema, Snapshot, SortOrder, StatisticsFile, Type,
4
+ };
5
+ use iceberg::{NamespaceIdent, TableIdent};
6
+ use magnus::{
7
+ Error as RbErr, IntoValue, RArray, RClass, RHash, RModule, Ruby, TryConvert, Value, kwargs,
8
+ prelude::*,
9
+ };
10
+
11
+ use crate::RbResult;
12
+ use crate::error::to_rb_err;
13
+
14
+ pub struct Wrap<T>(pub T);
15
+
16
+ impl TryConvert for Wrap<NamespaceIdent> {
17
+ fn try_convert(ob: Value) -> RbResult<Self> {
18
+ let ident = if let Ok(vec) = Vec::<String>::try_convert(ob) {
19
+ // prevent dots due to buggy behavior with iceberg-catalog-{rest,sql} crates
20
+ if vec.iter().any(|v| v.contains(".")) {
21
+ return Err(RbErr::new(
22
+ Ruby::get_with(ob).exception_arg_error(),
23
+ "Unsupported namespace",
24
+ ));
25
+ }
26
+ NamespaceIdent::from_vec(vec).map_err(to_rb_err)?
27
+ } else {
28
+ NamespaceIdent::from_strs(String::try_convert(ob)?.split(".")).map_err(to_rb_err)?
29
+ };
30
+ Ok(Wrap(ident))
31
+ }
32
+ }
33
+
34
+ impl TryConvert for Wrap<TableIdent> {
35
+ fn try_convert(ob: Value) -> RbResult<Self> {
36
+ let ident = if let Ok(vec) = Vec::<String>::try_convert(ob) {
37
+ TableIdent::from_strs(vec.iter()).map_err(to_rb_err)?
38
+ } else {
39
+ TableIdent::from_strs(String::try_convert(ob)?.split(".")).map_err(to_rb_err)?
40
+ };
41
+ Ok(Wrap(ident))
42
+ }
43
+ }
44
+
45
+ impl TryConvert for Wrap<Schema> {
46
+ fn try_convert(ob: Value) -> RbResult<Self> {
47
+ let ruby = Ruby::get_with(ob);
48
+ let mut fields = Vec::new();
49
+ let rb_fields: RArray = ob.funcall("fields", ())?;
50
+ for rb_field in rb_fields {
51
+ let rb_field = RHash::try_convert(rb_field)?;
52
+ let rb_type: Value = rb_field.aref(ruby.to_symbol("type"))?;
53
+ let field_type = if let Ok(s) = String::try_convert(rb_type) {
54
+ match s.as_str() {
55
+ "boolean" => Type::Primitive(PrimitiveType::Boolean),
56
+ "int" => Type::Primitive(PrimitiveType::Int),
57
+ "long" => Type::Primitive(PrimitiveType::Long),
58
+ "float" => Type::Primitive(PrimitiveType::Float),
59
+ "double" => Type::Primitive(PrimitiveType::Double),
60
+ // TODO PrimitiveType::Decimal
61
+ "date" => Type::Primitive(PrimitiveType::Date),
62
+ "time" => Type::Primitive(PrimitiveType::Time),
63
+ "timestamp" => Type::Primitive(PrimitiveType::Timestamp),
64
+ "timestamptz" => Type::Primitive(PrimitiveType::Timestamptz),
65
+ "timestamp_ns" => Type::Primitive(PrimitiveType::TimestampNs),
66
+ "timestamptz_ns" => Type::Primitive(PrimitiveType::TimestamptzNs),
67
+ "string" => Type::Primitive(PrimitiveType::String),
68
+ "uuid" => Type::Primitive(PrimitiveType::Uuid),
69
+ // TODO PrimitiveType::Fixed
70
+ "binary" => Type::Primitive(PrimitiveType::Binary),
71
+ _ => {
72
+ return Err(RbErr::new(
73
+ ruby.exception_arg_error(),
74
+ format!("Type not supported: {}", s),
75
+ ));
76
+ }
77
+ }
78
+ } else {
79
+ let class_name = unsafe { rb_type.classname() }.to_string();
80
+ match class_name.as_str() {
81
+ "Polars::Boolean" => Type::Primitive(PrimitiveType::Boolean),
82
+ "Polars::Int32" => Type::Primitive(PrimitiveType::Int),
83
+ "Polars::Int64" => Type::Primitive(PrimitiveType::Long),
84
+ "Polars::Float32" => Type::Primitive(PrimitiveType::Float),
85
+ "Polars::Float64" => Type::Primitive(PrimitiveType::Double),
86
+ "Polars::Date" => Type::Primitive(PrimitiveType::Date),
87
+ "Polars::Time" => Type::Primitive(PrimitiveType::Time),
88
+ "Polars::String" => Type::Primitive(PrimitiveType::String),
89
+ "Polars::Binary" => Type::Primitive(PrimitiveType::Binary),
90
+ _ => {
91
+ return Err(RbErr::new(
92
+ ruby.exception_arg_error(),
93
+ format!("Type not supported: {}", class_name),
94
+ ));
95
+ }
96
+ }
97
+ };
98
+
99
+ let initial_default = rb_field.aref(ruby.to_symbol("initial_default"))?;
100
+ let write_default = rb_field.aref(ruby.to_symbol("write_default"))?;
101
+
102
+ let initial_default = default_value(initial_default, &field_type)?;
103
+ let write_default = default_value(write_default, &field_type)?;
104
+
105
+ fields.push(
106
+ NestedField {
107
+ id: rb_field.aref(ruby.to_symbol("id"))?,
108
+ name: rb_field.aref(ruby.to_symbol("name"))?,
109
+ required: rb_field.aref(ruby.to_symbol("required"))?,
110
+ field_type: field_type.into(),
111
+ doc: rb_field.aref(ruby.to_symbol("doc"))?,
112
+ initial_default,
113
+ write_default,
114
+ }
115
+ .into(),
116
+ );
117
+ }
118
+ let schema = Schema::builder()
119
+ .with_fields(fields)
120
+ .build()
121
+ .map_err(to_rb_err)?;
122
+ Ok(Wrap(schema))
123
+ }
124
+ }
125
+
126
+ fn default_value(ob: Value, field_type: &Type) -> RbResult<Option<Literal>> {
127
+ if ob.is_nil() {
128
+ return Ok(None);
129
+ }
130
+
131
+ let lit = match field_type {
132
+ Type::Primitive(ty) => {
133
+ let pl = match ty {
134
+ PrimitiveType::Boolean => PrimitiveLiteral::Boolean(bool::try_convert(ob)?),
135
+ PrimitiveType::Int => PrimitiveLiteral::Int(i32::try_convert(ob)?),
136
+ PrimitiveType::Long => PrimitiveLiteral::Long(i64::try_convert(ob)?),
137
+ PrimitiveType::Float => PrimitiveLiteral::Float(f32::try_convert(ob)?.into()),
138
+ PrimitiveType::Double => PrimitiveLiteral::Double(f64::try_convert(ob)?.into()),
139
+ PrimitiveType::String => PrimitiveLiteral::String(String::try_convert(ob)?),
140
+ _ => todo!(),
141
+ };
142
+ Literal::Primitive(pl)
143
+ }
144
+ _ => todo!(),
145
+ };
146
+ Ok(Some(lit))
147
+ }
148
+
149
+ pub fn rb_schema(schema: &Schema) -> RbResult<Value> {
150
+ let ruby = Ruby::get().unwrap();
151
+ let fields = ruby.ary_new();
152
+ for f in schema.as_struct().fields() {
153
+ let field = ruby.hash_new();
154
+ field.aset(ruby.to_symbol("id"), f.id)?;
155
+ field.aset(ruby.to_symbol("name"), ruby.str_new(&f.name))?;
156
+
157
+ let field_type = match &*f.field_type {
158
+ Type::Primitive(ty) => match ty {
159
+ PrimitiveType::Boolean => "boolean",
160
+ PrimitiveType::Int => "int",
161
+ PrimitiveType::Long => "long",
162
+ PrimitiveType::Float => "float",
163
+ PrimitiveType::Double => "double",
164
+ PrimitiveType::Decimal {
165
+ precision: _,
166
+ scale: _,
167
+ } => todo!(),
168
+ PrimitiveType::Date => "date",
169
+ PrimitiveType::Time => "time",
170
+ PrimitiveType::Timestamp => "timestamp",
171
+ PrimitiveType::Timestamptz => "timestamptz",
172
+ PrimitiveType::TimestampNs => "timestamp_ns",
173
+ PrimitiveType::TimestamptzNs => "timestamptz_ns",
174
+ PrimitiveType::String => "string",
175
+ PrimitiveType::Uuid => "uuid",
176
+ PrimitiveType::Binary => "binary",
177
+ PrimitiveType::Fixed(_) => todo!(),
178
+ },
179
+ _ => todo!(),
180
+ };
181
+ field.aset(ruby.to_symbol("type"), field_type)?;
182
+
183
+ field.aset(ruby.to_symbol("required"), f.required)?;
184
+
185
+ let initial_default = f.initial_default.as_ref().map(rb_literal);
186
+ field.aset(ruby.to_symbol("initial_default"), initial_default)?;
187
+
188
+ let write_default = f.write_default.as_ref().map(rb_literal);
189
+ field.aset(ruby.to_symbol("write_default"), write_default)?;
190
+
191
+ field.aset(
192
+ ruby.to_symbol("doc"),
193
+ f.doc.as_ref().map(|v| ruby.str_new(v)),
194
+ )?;
195
+
196
+ fields.push(field)?;
197
+ }
198
+ let schema_id = schema.schema_id();
199
+
200
+ ruby.class_object()
201
+ .const_get::<_, RModule>("Iceberg")
202
+ .unwrap()
203
+ .const_get::<_, RClass>("Schema")
204
+ .unwrap()
205
+ .funcall("new", (fields, kwargs!("schema_id" => schema_id)))
206
+ }
207
+
208
+ pub fn rb_snapshot(_snapshot: &Snapshot) -> RbResult<Value> {
209
+ todo!();
210
+ }
211
+
212
+ pub fn rb_partition_spec(_partition_spec: &PartitionSpec) -> RbResult<Value> {
213
+ todo!();
214
+ }
215
+
216
+ pub fn rb_sort_order(_sort_order: &SortOrder) -> RbResult<Value> {
217
+ todo!();
218
+ }
219
+
220
+ pub fn rb_statistics_file(_statistics_file: &StatisticsFile) -> RbResult<Value> {
221
+ todo!();
222
+ }
223
+
224
+ pub fn rb_partition_statistics_file(
225
+ _partition_statistics_file: &PartitionStatisticsFile,
226
+ ) -> RbResult<Value> {
227
+ todo!();
228
+ }
229
+
230
+ pub fn rb_literal(literal: &Literal) -> Value {
231
+ let ruby = Ruby::get().unwrap();
232
+ match literal {
233
+ Literal::Primitive(pl) => match pl {
234
+ PrimitiveLiteral::Boolean(v) => v.into_value_with(&ruby),
235
+ PrimitiveLiteral::Int(v) => v.into_value_with(&ruby),
236
+ PrimitiveLiteral::Long(v) => v.into_value_with(&ruby),
237
+ PrimitiveLiteral::Float(v) => v.into_value_with(&ruby),
238
+ PrimitiveLiteral::Double(v) => v.into_value_with(&ruby),
239
+ PrimitiveLiteral::String(v) => ruby.str_new(v).as_value(),
240
+ PrimitiveLiteral::Binary(v) => ruby.str_from_slice(v).as_value(),
241
+ _ => todo!(),
242
+ },
243
+ _ => todo!(),
244
+ }
245
+ }
@@ -4,8 +4,13 @@ module Iceberg
4
4
  @catalog.list_namespaces(parent)
5
5
  end
6
6
 
7
- def create_namespace(namespace, properties: {})
7
+ def create_namespace(namespace, properties: {}, if_not_exists: nil)
8
8
  @catalog.create_namespace(namespace, properties)
9
+ rescue Error => e
10
+ if !if_not_exists || (e.message != "Cannot create namespace" && !e.message.include?("already exists"))
11
+ raise e
12
+ end
13
+ nil
9
14
  end
10
15
 
11
16
  def namespace_exists?(namespace)
data/lib/iceberg/table.rb CHANGED
@@ -83,11 +83,12 @@ module Iceberg
83
83
  @table.properties
84
84
  end
85
85
 
86
- def to_polars
86
+ def to_polars(snapshot_id: nil, storage_options: nil)
87
87
  require "polars-df"
88
88
 
89
- files = @table.plan_files
89
+ files = @table.scan(snapshot_id).plan_files
90
90
  if files.empty?
91
+ # TODO improve
91
92
  schema =
92
93
  current_schema.fields.to_h do |field|
93
94
  dtype =
@@ -122,9 +123,12 @@ module Iceberg
122
123
 
123
124
  Polars.scan_parquet(
124
125
  sources,
125
- # TODO storage options
126
- # TODO cast_options
127
- # TODO allow_missing_columns, extra_columns, _column_mapping
126
+ storage_options: storage_options,
127
+ # TODO
128
+ # cast_options: Polars::ScanCastOptions._default_iceberg,
129
+ # allow_missing_columns: true,
130
+ # extra_columns: "ignore",
131
+ # _column_mapping: column_mapping,
128
132
  _deletion_files: deletion_files
129
133
  )
130
134
  end
@@ -132,8 +136,8 @@ module Iceberg
132
136
 
133
137
  def append(df)
134
138
  check_catalog
135
-
136
139
  @table = @table.append(df.arrow_c_stream, @catalog)
140
+ nil
137
141
  end
138
142
 
139
143
  # hide internal state
@@ -1,3 +1,3 @@
1
1
  module Iceberg
2
- VERSION = "0.10.0"
2
+ VERSION = "0.10.1"
3
3
  end