iceberg 0.7.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +6494 -0
- data/Cargo.toml +6 -0
- data/LICENSE.txt +202 -20
- data/NOTICE.txt +14 -0
- data/README.md +213 -0
- data/ext/iceberg/Cargo.toml +34 -0
- data/ext/iceberg/extconf.rb +4 -0
- data/ext/iceberg/src/arrow.rs +22 -0
- data/ext/iceberg/src/catalog.rs +315 -0
- data/ext/iceberg/src/error.rs +32 -0
- data/ext/iceberg/src/lib.rs +127 -0
- data/ext/iceberg/src/runtime.rs +33 -0
- data/ext/iceberg/src/scan.rs +47 -0
- data/ext/iceberg/src/table.rs +369 -0
- data/ext/iceberg/src/utils.rs +245 -0
- data/lib/iceberg/catalog.rb +107 -0
- data/lib/iceberg/glue_catalog.rb +11 -0
- data/lib/iceberg/memory_catalog.rb +8 -0
- data/lib/iceberg/rest_catalog.rb +8 -0
- data/lib/iceberg/schema.rb +10 -0
- data/lib/iceberg/sql_catalog.rb +9 -0
- data/lib/iceberg/static_table.rb +8 -0
- data/lib/iceberg/table.rb +154 -0
- data/lib/iceberg/table_definition.rb +37 -0
- data/lib/iceberg/version.rb +3 -0
- data/lib/iceberg.rb +36 -0
- metadata +65 -121
- data/.document +0 -5
- data/Gemfile +0 -13
- data/Gemfile.lock +0 -20
- data/README.rdoc +0 -19
- data/Rakefile +0 -53
- data/VERSION +0 -1
- data/iceberg.gemspec +0 -63
- data/test/helper.rb +0 -18
- data/test/test_iceberg.rb +0 -7
@@ -0,0 +1,369 @@
|
|
1
|
+
use arrow_array::ffi_stream::ArrowArrayStreamReader;
|
2
|
+
use iceberg::TableIdent;
|
3
|
+
use iceberg::io::FileIO;
|
4
|
+
use iceberg::spec::FormatVersion;
|
5
|
+
use iceberg::table::{StaticTable, Table};
|
6
|
+
use iceberg::transaction::{ApplyTransactionAction, Transaction};
|
7
|
+
use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder;
|
8
|
+
use iceberg::writer::file_writer::ParquetWriterBuilder;
|
9
|
+
use iceberg::writer::file_writer::location_generator::{
|
10
|
+
DefaultFileNameGenerator, DefaultLocationGenerator,
|
11
|
+
};
|
12
|
+
use iceberg::writer::{IcebergWriter, IcebergWriterBuilder};
|
13
|
+
use magnus::{Error as RbErr, RArray, Ruby, Value};
|
14
|
+
use parquet::file::properties::WriterProperties;
|
15
|
+
use std::cell::RefCell;
|
16
|
+
use std::collections::HashMap;
|
17
|
+
use std::sync::Arc;
|
18
|
+
use uuid::Uuid;
|
19
|
+
|
20
|
+
use crate::RbResult;
|
21
|
+
use crate::arrow::RbArrowType;
|
22
|
+
use crate::catalog::RbCatalog;
|
23
|
+
use crate::error::to_rb_err;
|
24
|
+
use crate::runtime::runtime;
|
25
|
+
use crate::scan::RbTableScan;
|
26
|
+
use crate::utils::*;
|
27
|
+
|
28
|
+
#[magnus::wrap(class = "Iceberg::RbTable")]
|
29
|
+
pub struct RbTable {
|
30
|
+
pub table: RefCell<Table>,
|
31
|
+
}
|
32
|
+
|
33
|
+
impl RbTable {
|
34
|
+
pub fn scan(&self, snapshot_id: Option<i64>) -> RbResult<RbTableScan> {
|
35
|
+
let table = self.table.borrow();
|
36
|
+
let mut builder = table.scan();
|
37
|
+
if let Some(si) = snapshot_id {
|
38
|
+
builder = builder.snapshot_id(si);
|
39
|
+
}
|
40
|
+
let scan = builder.build().map_err(to_rb_err)?;
|
41
|
+
Ok(RbTableScan { scan: scan.into() })
|
42
|
+
}
|
43
|
+
|
44
|
+
pub fn append(
|
45
|
+
ruby: &Ruby,
|
46
|
+
rb_self: &Self,
|
47
|
+
data: RbArrowType<ArrowArrayStreamReader>,
|
48
|
+
catalog: &RbCatalog,
|
49
|
+
) -> RbResult<RbTable> {
|
50
|
+
let runtime = runtime();
|
51
|
+
let table = rb_self.table.borrow();
|
52
|
+
let catalog = catalog.catalog.borrow();
|
53
|
+
|
54
|
+
let table_schema: Arc<arrow_schema::Schema> = Arc::new(
|
55
|
+
table
|
56
|
+
.metadata()
|
57
|
+
.current_schema()
|
58
|
+
.as_ref()
|
59
|
+
.try_into()
|
60
|
+
.unwrap(),
|
61
|
+
);
|
62
|
+
|
63
|
+
let location_generator =
|
64
|
+
DefaultLocationGenerator::new(table.metadata().clone()).map_err(to_rb_err)?;
|
65
|
+
let file_name_generator = DefaultFileNameGenerator::new(
|
66
|
+
// TODO move task id to suffix to match Python and Java
|
67
|
+
"0".to_string(),
|
68
|
+
Some(Uuid::new_v4().to_string()),
|
69
|
+
iceberg::spec::DataFileFormat::Parquet,
|
70
|
+
);
|
71
|
+
|
72
|
+
let parquet_writer_builder = ParquetWriterBuilder::new(
|
73
|
+
WriterProperties::default(),
|
74
|
+
table.metadata().current_schema().clone(),
|
75
|
+
table.file_io().clone(),
|
76
|
+
location_generator.clone(),
|
77
|
+
file_name_generator.clone(),
|
78
|
+
);
|
79
|
+
let data_file_writer_builder = DataFileWriterBuilder::new(parquet_writer_builder, None, 0);
|
80
|
+
let mut data_file_writer = runtime
|
81
|
+
.block_on(data_file_writer_builder.build())
|
82
|
+
.map_err(to_rb_err)?;
|
83
|
+
|
84
|
+
for batch in data.0 {
|
85
|
+
let batch = batch
|
86
|
+
.unwrap()
|
87
|
+
.with_schema(table_schema.clone())
|
88
|
+
.map_err(|e| RbErr::new(ruby.exception_arg_error(), e.to_string()))?;
|
89
|
+
runtime
|
90
|
+
.block_on(data_file_writer.write(batch))
|
91
|
+
.map_err(to_rb_err)?;
|
92
|
+
}
|
93
|
+
|
94
|
+
let data_files = runtime
|
95
|
+
.block_on(data_file_writer.close())
|
96
|
+
.map_err(to_rb_err)?;
|
97
|
+
|
98
|
+
let tx = Transaction::new(&table);
|
99
|
+
let append_action = tx.fast_append().add_data_files(data_files.clone());
|
100
|
+
let tx = append_action.apply(tx).map_err(to_rb_err)?;
|
101
|
+
let table = runtime
|
102
|
+
.block_on(tx.commit(catalog.as_catalog()))
|
103
|
+
.map_err(to_rb_err)?;
|
104
|
+
|
105
|
+
Ok(RbTable {
|
106
|
+
table: table.into(),
|
107
|
+
})
|
108
|
+
}
|
109
|
+
|
110
|
+
pub fn format_version(&self) -> i32 {
|
111
|
+
match self.table.borrow().metadata().format_version() {
|
112
|
+
FormatVersion::V1 => 1,
|
113
|
+
FormatVersion::V2 => 2,
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
pub fn uuid(&self) -> String {
|
118
|
+
self.table.borrow().metadata().uuid().to_string()
|
119
|
+
}
|
120
|
+
|
121
|
+
pub fn location(&self) -> String {
|
122
|
+
self.table.borrow().metadata().location().to_string()
|
123
|
+
}
|
124
|
+
|
125
|
+
pub fn last_sequence_number(&self) -> i64 {
|
126
|
+
self.table.borrow().metadata().last_sequence_number()
|
127
|
+
}
|
128
|
+
|
129
|
+
pub fn next_sequence_number(&self) -> i64 {
|
130
|
+
self.table.borrow().metadata().next_sequence_number()
|
131
|
+
}
|
132
|
+
|
133
|
+
pub fn last_column_id(&self) -> i32 {
|
134
|
+
self.table.borrow().metadata().last_column_id()
|
135
|
+
}
|
136
|
+
|
137
|
+
pub fn last_partition_id(&self) -> i32 {
|
138
|
+
self.table.borrow().metadata().last_partition_id()
|
139
|
+
}
|
140
|
+
|
141
|
+
pub fn last_updated_ms(&self) -> i64 {
|
142
|
+
self.table.borrow().metadata().last_updated_ms()
|
143
|
+
}
|
144
|
+
|
145
|
+
pub fn schemas(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
146
|
+
let schemas = ruby.ary_new();
|
147
|
+
for s in rb_self.table.borrow().metadata().schemas_iter() {
|
148
|
+
schemas.push(rb_schema(s)?)?;
|
149
|
+
}
|
150
|
+
Ok(schemas)
|
151
|
+
}
|
152
|
+
|
153
|
+
pub fn schema_by_id(&self, schema_id: i32) -> RbResult<Option<Value>> {
|
154
|
+
let schema = match self.table.borrow().metadata().schema_by_id(schema_id) {
|
155
|
+
Some(s) => Some(rb_schema(s)?),
|
156
|
+
None => None,
|
157
|
+
};
|
158
|
+
Ok(schema)
|
159
|
+
}
|
160
|
+
|
161
|
+
pub fn current_schema(&self) -> RbResult<Value> {
|
162
|
+
rb_schema(self.table.borrow().metadata().current_schema())
|
163
|
+
}
|
164
|
+
|
165
|
+
pub fn current_schema_id(&self) -> i32 {
|
166
|
+
self.table.borrow().metadata().current_schema_id()
|
167
|
+
}
|
168
|
+
|
169
|
+
pub fn partition_specs(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
170
|
+
let partition_specs = ruby.ary_new();
|
171
|
+
for s in rb_self.table.borrow().metadata().partition_specs_iter() {
|
172
|
+
partition_specs.push(rb_partition_spec(s)?)?;
|
173
|
+
}
|
174
|
+
Ok(partition_specs)
|
175
|
+
}
|
176
|
+
|
177
|
+
pub fn partition_spec_by_id(&self, partition_spec_id: i32) -> RbResult<Option<Value>> {
|
178
|
+
let partition_spec = match self
|
179
|
+
.table
|
180
|
+
.borrow()
|
181
|
+
.metadata()
|
182
|
+
.partition_spec_by_id(partition_spec_id)
|
183
|
+
{
|
184
|
+
Some(s) => Some(rb_partition_spec(s)?),
|
185
|
+
None => None,
|
186
|
+
};
|
187
|
+
Ok(partition_spec)
|
188
|
+
}
|
189
|
+
|
190
|
+
pub fn default_partition_spec(&self) -> RbResult<Value> {
|
191
|
+
rb_partition_spec(self.table.borrow().metadata().default_partition_spec())
|
192
|
+
}
|
193
|
+
|
194
|
+
pub fn default_partition_spec_id(&self) -> i32 {
|
195
|
+
self.table.borrow().metadata().default_partition_spec_id()
|
196
|
+
}
|
197
|
+
|
198
|
+
pub fn snapshots(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
199
|
+
let snapshots = ruby.ary_new();
|
200
|
+
for s in rb_self.table.borrow().metadata().snapshots() {
|
201
|
+
snapshots.push(rb_snapshot(s)?)?;
|
202
|
+
}
|
203
|
+
Ok(snapshots)
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn snapshot_by_id(&self, snapshot_id: i64) -> RbResult<Option<Value>> {
|
207
|
+
let snapshot = match self.table.borrow().metadata().snapshot_by_id(snapshot_id) {
|
208
|
+
Some(s) => Some(rb_snapshot(s)?),
|
209
|
+
None => None,
|
210
|
+
};
|
211
|
+
Ok(snapshot)
|
212
|
+
}
|
213
|
+
|
214
|
+
pub fn history(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
215
|
+
let history = ruby.ary_new();
|
216
|
+
for s in rb_self.table.borrow().metadata().history() {
|
217
|
+
let snapshot_log = ruby.hash_new();
|
218
|
+
snapshot_log.aset(ruby.to_symbol("snapshot_id"), s.snapshot_id)?;
|
219
|
+
// TODO timestamp
|
220
|
+
history.push(snapshot_log)?;
|
221
|
+
}
|
222
|
+
Ok(history)
|
223
|
+
}
|
224
|
+
|
225
|
+
pub fn metadata_log(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
226
|
+
let metadata_logs = ruby.ary_new();
|
227
|
+
for s in rb_self.table.borrow().metadata().metadata_log() {
|
228
|
+
let metadata_log = ruby.hash_new();
|
229
|
+
metadata_log.aset(
|
230
|
+
ruby.to_symbol("metadata_file"),
|
231
|
+
ruby.str_new(&s.metadata_file),
|
232
|
+
)?;
|
233
|
+
// TODO timestamp
|
234
|
+
metadata_logs.push(metadata_log)?;
|
235
|
+
}
|
236
|
+
Ok(metadata_logs)
|
237
|
+
}
|
238
|
+
|
239
|
+
pub fn current_snapshot(&self) -> RbResult<Option<Value>> {
|
240
|
+
let snapshot = match self.table.borrow().metadata().current_snapshot() {
|
241
|
+
Some(s) => Some(rb_snapshot(s)?),
|
242
|
+
None => None,
|
243
|
+
};
|
244
|
+
Ok(snapshot)
|
245
|
+
}
|
246
|
+
|
247
|
+
pub fn current_snapshot_id(&self) -> Option<i64> {
|
248
|
+
self.table.borrow().metadata().current_snapshot_id()
|
249
|
+
}
|
250
|
+
|
251
|
+
pub fn snapshot_for_ref(&self, ref_name: String) -> RbResult<Option<Value>> {
|
252
|
+
let snapshot = match self.table.borrow().metadata().snapshot_for_ref(&ref_name) {
|
253
|
+
Some(s) => Some(rb_snapshot(s)?),
|
254
|
+
None => None,
|
255
|
+
};
|
256
|
+
Ok(snapshot)
|
257
|
+
}
|
258
|
+
|
259
|
+
pub fn sort_orders(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
260
|
+
let sort_orders = ruby.ary_new();
|
261
|
+
for s in rb_self.table.borrow().metadata().sort_orders_iter() {
|
262
|
+
sort_orders.push(rb_sort_order(s)?)?;
|
263
|
+
}
|
264
|
+
Ok(sort_orders)
|
265
|
+
}
|
266
|
+
|
267
|
+
pub fn sort_order_by_id(&self, sort_order_id: i64) -> RbResult<Option<Value>> {
|
268
|
+
let sort_order = match self
|
269
|
+
.table
|
270
|
+
.borrow()
|
271
|
+
.metadata()
|
272
|
+
.sort_order_by_id(sort_order_id)
|
273
|
+
{
|
274
|
+
Some(s) => Some(rb_sort_order(s)?),
|
275
|
+
None => None,
|
276
|
+
};
|
277
|
+
Ok(sort_order)
|
278
|
+
}
|
279
|
+
|
280
|
+
pub fn default_sort_order(&self) -> RbResult<Value> {
|
281
|
+
rb_sort_order(self.table.borrow().metadata().default_sort_order())
|
282
|
+
}
|
283
|
+
|
284
|
+
pub fn default_sort_order_id(&self) -> i64 {
|
285
|
+
self.table.borrow().metadata().default_sort_order_id()
|
286
|
+
}
|
287
|
+
|
288
|
+
pub fn properties(&self) -> HashMap<String, String> {
|
289
|
+
self.table.borrow().metadata().properties().clone()
|
290
|
+
}
|
291
|
+
|
292
|
+
pub fn statistics(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
293
|
+
let statistics = ruby.ary_new();
|
294
|
+
for s in rb_self.table.borrow().metadata().statistics_iter() {
|
295
|
+
statistics.push(rb_statistics_file(s)?)?;
|
296
|
+
}
|
297
|
+
Ok(statistics)
|
298
|
+
}
|
299
|
+
|
300
|
+
pub fn partition_statistics(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
301
|
+
let statistics = ruby.ary_new();
|
302
|
+
for s in rb_self
|
303
|
+
.table
|
304
|
+
.borrow()
|
305
|
+
.metadata()
|
306
|
+
.partition_statistics_iter()
|
307
|
+
{
|
308
|
+
statistics.push(rb_partition_statistics_file(s)?)?;
|
309
|
+
}
|
310
|
+
Ok(statistics)
|
311
|
+
}
|
312
|
+
|
313
|
+
pub fn statistics_for_snapshot(&self, snapshot_id: i64) -> RbResult<Option<Value>> {
|
314
|
+
let statistics = match self
|
315
|
+
.table
|
316
|
+
.borrow()
|
317
|
+
.metadata()
|
318
|
+
.statistics_for_snapshot(snapshot_id)
|
319
|
+
{
|
320
|
+
Some(s) => Some(rb_statistics_file(s)?),
|
321
|
+
None => None,
|
322
|
+
};
|
323
|
+
Ok(statistics)
|
324
|
+
}
|
325
|
+
|
326
|
+
pub fn partition_statistics_for_snapshot(&self, snapshot_id: i64) -> RbResult<Option<Value>> {
|
327
|
+
let statistics = match self
|
328
|
+
.table
|
329
|
+
.borrow()
|
330
|
+
.metadata()
|
331
|
+
.partition_statistics_for_snapshot(snapshot_id)
|
332
|
+
{
|
333
|
+
Some(s) => Some(rb_partition_statistics_file(s)?),
|
334
|
+
None => None,
|
335
|
+
};
|
336
|
+
Ok(statistics)
|
337
|
+
}
|
338
|
+
|
339
|
+
pub fn encryption_keys(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
340
|
+
let encryption_keys = ruby.ary_new();
|
341
|
+
for (k, v) in rb_self.table.borrow().metadata().encryption_keys_iter() {
|
342
|
+
encryption_keys.push((ruby.str_new(k), ruby.str_new(v)))?;
|
343
|
+
}
|
344
|
+
Ok(encryption_keys)
|
345
|
+
}
|
346
|
+
|
347
|
+
pub fn encryption_key(&self, key_id: String) -> Option<String> {
|
348
|
+
self.table
|
349
|
+
.borrow()
|
350
|
+
.metadata()
|
351
|
+
.encryption_key(&key_id)
|
352
|
+
.cloned()
|
353
|
+
}
|
354
|
+
|
355
|
+
pub fn from_metadata_file(location: String) -> RbResult<Self> {
|
356
|
+
let file_io = FileIO::from_path(&location).unwrap().build().unwrap();
|
357
|
+
let table_ident = TableIdent::from_strs(["static-table", &location]).unwrap();
|
358
|
+
let static_table = runtime()
|
359
|
+
.block_on(StaticTable::from_metadata_file(
|
360
|
+
&location,
|
361
|
+
table_ident,
|
362
|
+
file_io,
|
363
|
+
))
|
364
|
+
.map_err(to_rb_err)?;
|
365
|
+
Ok(Self {
|
366
|
+
table: static_table.into_table().into(),
|
367
|
+
})
|
368
|
+
}
|
369
|
+
}
|
@@ -0,0 +1,245 @@
|
|
1
|
+
use iceberg::spec::{
|
2
|
+
Literal, NestedField, PartitionSpec, PartitionStatisticsFile, PrimitiveLiteral, PrimitiveType,
|
3
|
+
Schema, Snapshot, SortOrder, StatisticsFile, Type,
|
4
|
+
};
|
5
|
+
use iceberg::{NamespaceIdent, TableIdent};
|
6
|
+
use magnus::{
|
7
|
+
Error as RbErr, IntoValue, RArray, RClass, RHash, RModule, Ruby, TryConvert, Value, kwargs,
|
8
|
+
prelude::*,
|
9
|
+
};
|
10
|
+
|
11
|
+
use crate::RbResult;
|
12
|
+
use crate::error::to_rb_err;
|
13
|
+
|
14
|
+
pub struct Wrap<T>(pub T);
|
15
|
+
|
16
|
+
impl TryConvert for Wrap<NamespaceIdent> {
|
17
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
18
|
+
let ident = if let Ok(vec) = Vec::<String>::try_convert(ob) {
|
19
|
+
// prevent dots due to buggy behavior with iceberg-catalog-{rest,sql} crates
|
20
|
+
if vec.iter().any(|v| v.contains(".")) {
|
21
|
+
return Err(RbErr::new(
|
22
|
+
Ruby::get_with(ob).exception_arg_error(),
|
23
|
+
"Unsupported namespace",
|
24
|
+
));
|
25
|
+
}
|
26
|
+
NamespaceIdent::from_vec(vec).map_err(to_rb_err)?
|
27
|
+
} else {
|
28
|
+
NamespaceIdent::from_strs(String::try_convert(ob)?.split(".")).map_err(to_rb_err)?
|
29
|
+
};
|
30
|
+
Ok(Wrap(ident))
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
impl TryConvert for Wrap<TableIdent> {
|
35
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
36
|
+
let ident = if let Ok(vec) = Vec::<String>::try_convert(ob) {
|
37
|
+
TableIdent::from_strs(vec.iter()).map_err(to_rb_err)?
|
38
|
+
} else {
|
39
|
+
TableIdent::from_strs(String::try_convert(ob)?.split(".")).map_err(to_rb_err)?
|
40
|
+
};
|
41
|
+
Ok(Wrap(ident))
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
impl TryConvert for Wrap<Schema> {
|
46
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
47
|
+
let ruby = Ruby::get_with(ob);
|
48
|
+
let mut fields = Vec::new();
|
49
|
+
let rb_fields: RArray = ob.funcall("fields", ())?;
|
50
|
+
for rb_field in rb_fields {
|
51
|
+
let rb_field = RHash::try_convert(rb_field)?;
|
52
|
+
let rb_type: Value = rb_field.aref(ruby.to_symbol("type"))?;
|
53
|
+
let field_type = if let Ok(s) = String::try_convert(rb_type) {
|
54
|
+
match s.as_str() {
|
55
|
+
"boolean" => Type::Primitive(PrimitiveType::Boolean),
|
56
|
+
"int" => Type::Primitive(PrimitiveType::Int),
|
57
|
+
"long" => Type::Primitive(PrimitiveType::Long),
|
58
|
+
"float" => Type::Primitive(PrimitiveType::Float),
|
59
|
+
"double" => Type::Primitive(PrimitiveType::Double),
|
60
|
+
// TODO PrimitiveType::Decimal
|
61
|
+
"date" => Type::Primitive(PrimitiveType::Date),
|
62
|
+
"time" => Type::Primitive(PrimitiveType::Time),
|
63
|
+
"timestamp" => Type::Primitive(PrimitiveType::Timestamp),
|
64
|
+
"timestamptz" => Type::Primitive(PrimitiveType::Timestamptz),
|
65
|
+
"timestamp_ns" => Type::Primitive(PrimitiveType::TimestampNs),
|
66
|
+
"timestamptz_ns" => Type::Primitive(PrimitiveType::TimestamptzNs),
|
67
|
+
"string" => Type::Primitive(PrimitiveType::String),
|
68
|
+
"uuid" => Type::Primitive(PrimitiveType::Uuid),
|
69
|
+
// TODO PrimitiveType::Fixed
|
70
|
+
"binary" => Type::Primitive(PrimitiveType::Binary),
|
71
|
+
_ => {
|
72
|
+
return Err(RbErr::new(
|
73
|
+
ruby.exception_arg_error(),
|
74
|
+
format!("Type not supported: {}", s),
|
75
|
+
));
|
76
|
+
}
|
77
|
+
}
|
78
|
+
} else {
|
79
|
+
let class_name = unsafe { rb_type.classname() }.to_string();
|
80
|
+
match class_name.as_str() {
|
81
|
+
"Polars::Boolean" => Type::Primitive(PrimitiveType::Boolean),
|
82
|
+
"Polars::Int32" => Type::Primitive(PrimitiveType::Int),
|
83
|
+
"Polars::Int64" => Type::Primitive(PrimitiveType::Long),
|
84
|
+
"Polars::Float32" => Type::Primitive(PrimitiveType::Float),
|
85
|
+
"Polars::Float64" => Type::Primitive(PrimitiveType::Double),
|
86
|
+
"Polars::Date" => Type::Primitive(PrimitiveType::Date),
|
87
|
+
"Polars::Time" => Type::Primitive(PrimitiveType::Time),
|
88
|
+
"Polars::String" => Type::Primitive(PrimitiveType::String),
|
89
|
+
"Polars::Binary" => Type::Primitive(PrimitiveType::Binary),
|
90
|
+
_ => {
|
91
|
+
return Err(RbErr::new(
|
92
|
+
ruby.exception_arg_error(),
|
93
|
+
format!("Type not supported: {}", class_name),
|
94
|
+
));
|
95
|
+
}
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
99
|
+
let initial_default = rb_field.aref(ruby.to_symbol("initial_default"))?;
|
100
|
+
let write_default = rb_field.aref(ruby.to_symbol("write_default"))?;
|
101
|
+
|
102
|
+
let initial_default = default_value(initial_default, &field_type)?;
|
103
|
+
let write_default = default_value(write_default, &field_type)?;
|
104
|
+
|
105
|
+
fields.push(
|
106
|
+
NestedField {
|
107
|
+
id: rb_field.aref(ruby.to_symbol("id"))?,
|
108
|
+
name: rb_field.aref(ruby.to_symbol("name"))?,
|
109
|
+
required: rb_field.aref(ruby.to_symbol("required"))?,
|
110
|
+
field_type: field_type.into(),
|
111
|
+
doc: rb_field.aref(ruby.to_symbol("doc"))?,
|
112
|
+
initial_default,
|
113
|
+
write_default,
|
114
|
+
}
|
115
|
+
.into(),
|
116
|
+
);
|
117
|
+
}
|
118
|
+
let schema = Schema::builder()
|
119
|
+
.with_fields(fields)
|
120
|
+
.build()
|
121
|
+
.map_err(to_rb_err)?;
|
122
|
+
Ok(Wrap(schema))
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
fn default_value(ob: Value, field_type: &Type) -> RbResult<Option<Literal>> {
|
127
|
+
if ob.is_nil() {
|
128
|
+
return Ok(None);
|
129
|
+
}
|
130
|
+
|
131
|
+
let lit = match field_type {
|
132
|
+
Type::Primitive(ty) => {
|
133
|
+
let pl = match ty {
|
134
|
+
PrimitiveType::Boolean => PrimitiveLiteral::Boolean(bool::try_convert(ob)?),
|
135
|
+
PrimitiveType::Int => PrimitiveLiteral::Int(i32::try_convert(ob)?),
|
136
|
+
PrimitiveType::Long => PrimitiveLiteral::Long(i64::try_convert(ob)?),
|
137
|
+
PrimitiveType::Float => PrimitiveLiteral::Float(f32::try_convert(ob)?.into()),
|
138
|
+
PrimitiveType::Double => PrimitiveLiteral::Double(f64::try_convert(ob)?.into()),
|
139
|
+
PrimitiveType::String => PrimitiveLiteral::String(String::try_convert(ob)?),
|
140
|
+
_ => todo!(),
|
141
|
+
};
|
142
|
+
Literal::Primitive(pl)
|
143
|
+
}
|
144
|
+
_ => todo!(),
|
145
|
+
};
|
146
|
+
Ok(Some(lit))
|
147
|
+
}
|
148
|
+
|
149
|
+
pub fn rb_schema(schema: &Schema) -> RbResult<Value> {
|
150
|
+
let ruby = Ruby::get().unwrap();
|
151
|
+
let fields = ruby.ary_new();
|
152
|
+
for f in schema.as_struct().fields() {
|
153
|
+
let field = ruby.hash_new();
|
154
|
+
field.aset(ruby.to_symbol("id"), f.id)?;
|
155
|
+
field.aset(ruby.to_symbol("name"), ruby.str_new(&f.name))?;
|
156
|
+
|
157
|
+
let field_type = match &*f.field_type {
|
158
|
+
Type::Primitive(ty) => match ty {
|
159
|
+
PrimitiveType::Boolean => "boolean",
|
160
|
+
PrimitiveType::Int => "int",
|
161
|
+
PrimitiveType::Long => "long",
|
162
|
+
PrimitiveType::Float => "float",
|
163
|
+
PrimitiveType::Double => "double",
|
164
|
+
PrimitiveType::Decimal {
|
165
|
+
precision: _,
|
166
|
+
scale: _,
|
167
|
+
} => todo!(),
|
168
|
+
PrimitiveType::Date => "date",
|
169
|
+
PrimitiveType::Time => "time",
|
170
|
+
PrimitiveType::Timestamp => "timestamp",
|
171
|
+
PrimitiveType::Timestamptz => "timestamptz",
|
172
|
+
PrimitiveType::TimestampNs => "timestamp_ns",
|
173
|
+
PrimitiveType::TimestamptzNs => "timestamptz_ns",
|
174
|
+
PrimitiveType::String => "string",
|
175
|
+
PrimitiveType::Uuid => "uuid",
|
176
|
+
PrimitiveType::Binary => "binary",
|
177
|
+
PrimitiveType::Fixed(_) => todo!(),
|
178
|
+
},
|
179
|
+
_ => todo!(),
|
180
|
+
};
|
181
|
+
field.aset(ruby.to_symbol("type"), field_type)?;
|
182
|
+
|
183
|
+
field.aset(ruby.to_symbol("required"), f.required)?;
|
184
|
+
|
185
|
+
let initial_default = f.initial_default.as_ref().map(rb_literal);
|
186
|
+
field.aset(ruby.to_symbol("initial_default"), initial_default)?;
|
187
|
+
|
188
|
+
let write_default = f.write_default.as_ref().map(rb_literal);
|
189
|
+
field.aset(ruby.to_symbol("write_default"), write_default)?;
|
190
|
+
|
191
|
+
field.aset(
|
192
|
+
ruby.to_symbol("doc"),
|
193
|
+
f.doc.as_ref().map(|v| ruby.str_new(v)),
|
194
|
+
)?;
|
195
|
+
|
196
|
+
fields.push(field)?;
|
197
|
+
}
|
198
|
+
let schema_id = schema.schema_id();
|
199
|
+
|
200
|
+
ruby.class_object()
|
201
|
+
.const_get::<_, RModule>("Iceberg")
|
202
|
+
.unwrap()
|
203
|
+
.const_get::<_, RClass>("Schema")
|
204
|
+
.unwrap()
|
205
|
+
.funcall("new", (fields, kwargs!("schema_id" => schema_id)))
|
206
|
+
}
|
207
|
+
|
208
|
+
pub fn rb_snapshot(_snapshot: &Snapshot) -> RbResult<Value> {
|
209
|
+
todo!();
|
210
|
+
}
|
211
|
+
|
212
|
+
pub fn rb_partition_spec(_partition_spec: &PartitionSpec) -> RbResult<Value> {
|
213
|
+
todo!();
|
214
|
+
}
|
215
|
+
|
216
|
+
pub fn rb_sort_order(_sort_order: &SortOrder) -> RbResult<Value> {
|
217
|
+
todo!();
|
218
|
+
}
|
219
|
+
|
220
|
+
pub fn rb_statistics_file(_statistics_file: &StatisticsFile) -> RbResult<Value> {
|
221
|
+
todo!();
|
222
|
+
}
|
223
|
+
|
224
|
+
pub fn rb_partition_statistics_file(
|
225
|
+
_partition_statistics_file: &PartitionStatisticsFile,
|
226
|
+
) -> RbResult<Value> {
|
227
|
+
todo!();
|
228
|
+
}
|
229
|
+
|
230
|
+
pub fn rb_literal(literal: &Literal) -> Value {
|
231
|
+
let ruby = Ruby::get().unwrap();
|
232
|
+
match literal {
|
233
|
+
Literal::Primitive(pl) => match pl {
|
234
|
+
PrimitiveLiteral::Boolean(v) => v.into_value_with(&ruby),
|
235
|
+
PrimitiveLiteral::Int(v) => v.into_value_with(&ruby),
|
236
|
+
PrimitiveLiteral::Long(v) => v.into_value_with(&ruby),
|
237
|
+
PrimitiveLiteral::Float(v) => v.into_value_with(&ruby),
|
238
|
+
PrimitiveLiteral::Double(v) => v.into_value_with(&ruby),
|
239
|
+
PrimitiveLiteral::String(v) => ruby.str_new(v).as_value(),
|
240
|
+
PrimitiveLiteral::Binary(v) => ruby.str_from_slice(v).as_value(),
|
241
|
+
_ => todo!(),
|
242
|
+
},
|
243
|
+
_ => todo!(),
|
244
|
+
}
|
245
|
+
}
|