iceberg 0.10.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +734 -790
- data/README.md +6 -6
- data/ext/iceberg/Cargo.toml +15 -12
- data/ext/iceberg/src/catalog.rs +62 -40
- data/ext/iceberg/src/lib.rs +4 -2
- data/ext/iceberg/src/table.rs +15 -10
- data/ext/iceberg/src/utils.rs +6 -2
- data/lib/iceberg/catalog.rb +10 -7
- data/lib/iceberg/glue_catalog.rb +0 -3
- data/lib/iceberg/s3_tables_catalog.rb +7 -0
- data/lib/iceberg/table.rb +8 -46
- data/lib/iceberg/table_scan.rb +18 -0
- data/lib/iceberg/version.rb +1 -1
- data/lib/iceberg.rb +2 -0
- metadata +4 -2
data/README.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
[Apache Iceberg](https://iceberg.apache.org/) for Ruby
|
|
4
4
|
|
|
5
|
+
:duck: Also check out [SeaDuck](https://github.com/ankane/seaduck)
|
|
6
|
+
|
|
5
7
|
[](https://github.com/ankane/iceberg-ruby/actions)
|
|
6
8
|
|
|
7
9
|
## Installation
|
|
@@ -35,7 +37,7 @@ catalog.create_table("main.events") do |t|
|
|
|
35
37
|
end
|
|
36
38
|
```
|
|
37
39
|
|
|
38
|
-
Or
|
|
40
|
+
Or with [Polars](https://github.com/ankane/ruby-polars)
|
|
39
41
|
|
|
40
42
|
```ruby
|
|
41
43
|
df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
|
|
@@ -82,9 +84,7 @@ Iceberg::MemoryCatalog.new(
|
|
|
82
84
|
)
|
|
83
85
|
```
|
|
84
86
|
|
|
85
|
-
##
|
|
86
|
-
|
|
87
|
-
### Namespaces
|
|
87
|
+
## Namespaces
|
|
88
88
|
|
|
89
89
|
List namespaces
|
|
90
90
|
|
|
@@ -122,7 +122,7 @@ Drop a namespace
|
|
|
122
122
|
catalog.drop_namespace("main")
|
|
123
123
|
```
|
|
124
124
|
|
|
125
|
-
|
|
125
|
+
## Tables
|
|
126
126
|
|
|
127
127
|
List tables
|
|
128
128
|
|
|
@@ -169,7 +169,7 @@ Drop a table
|
|
|
169
169
|
catalog.drop_table("main.events")
|
|
170
170
|
```
|
|
171
171
|
|
|
172
|
-
|
|
172
|
+
## Static Tables
|
|
173
173
|
|
|
174
174
|
Load a static table
|
|
175
175
|
|
data/ext/iceberg/Cargo.toml
CHANGED
|
@@ -1,27 +1,29 @@
|
|
|
1
1
|
[package]
|
|
2
|
-
name = "iceberg"
|
|
3
|
-
version = "0.
|
|
2
|
+
name = "iceberg-ruby"
|
|
3
|
+
version = "0.11.0"
|
|
4
4
|
license = "Apache-2.0"
|
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
|
6
6
|
edition = "2024"
|
|
7
|
-
rust-version = "1.
|
|
7
|
+
rust-version = "1.88"
|
|
8
8
|
publish = false
|
|
9
9
|
|
|
10
10
|
[lib]
|
|
11
|
+
name = "iceberg"
|
|
11
12
|
crate-type = ["cdylib"]
|
|
12
13
|
|
|
13
14
|
[dependencies]
|
|
14
|
-
arrow-array = { version = "
|
|
15
|
-
arrow-schema = "
|
|
16
|
-
datafusion = { version = "
|
|
15
|
+
arrow-array = { version = "57", features = ["ffi"] }
|
|
16
|
+
arrow-schema = "57"
|
|
17
|
+
datafusion = { version = "51", optional = true }
|
|
17
18
|
futures = "0.3"
|
|
18
|
-
iceberg = "0.
|
|
19
|
-
iceberg-catalog-glue = { version = "0.
|
|
20
|
-
iceberg-catalog-rest = { version = "0.
|
|
21
|
-
iceberg-catalog-
|
|
22
|
-
iceberg-
|
|
19
|
+
iceberg = "0.8"
|
|
20
|
+
iceberg-catalog-glue = { version = "0.8", optional = true }
|
|
21
|
+
iceberg-catalog-rest = { version = "0.8", optional = true }
|
|
22
|
+
iceberg-catalog-s3tables = { version = "0.8", optional = true }
|
|
23
|
+
iceberg-catalog-sql = { version = "0.8", optional = true }
|
|
24
|
+
iceberg-datafusion = { version = "0.8", optional = true }
|
|
23
25
|
magnus = "0.8"
|
|
24
|
-
parquet = "
|
|
26
|
+
parquet = "57"
|
|
25
27
|
sqlx = { version = "0.8", features = ["postgres", "runtime-tokio", "sqlite"], default-features = false, optional = true }
|
|
26
28
|
tokio = { version = "1", features = ["rt-multi-thread"] }
|
|
27
29
|
uuid = { version = "1", features = ["v4"] }
|
|
@@ -31,4 +33,5 @@ default = ["rest", "sql"]
|
|
|
31
33
|
datafusion = ["dep:datafusion", "dep:iceberg-datafusion"]
|
|
32
34
|
glue = ["dep:iceberg-catalog-glue"]
|
|
33
35
|
rest = ["dep:iceberg-catalog-rest"]
|
|
36
|
+
s3tables = ["dep:iceberg-catalog-s3tables"]
|
|
34
37
|
sql = ["dep:iceberg-catalog-sql", "dep:sqlx"]
|
data/ext/iceberg/src/catalog.rs
CHANGED
|
@@ -1,14 +1,23 @@
|
|
|
1
1
|
#[cfg(feature = "datafusion")]
|
|
2
2
|
use datafusion::execution::context::SessionContext;
|
|
3
|
-
use iceberg::
|
|
3
|
+
use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
|
|
4
4
|
use iceberg::spec::Schema;
|
|
5
|
-
use iceberg::{Catalog, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
|
|
5
|
+
use iceberg::{Catalog, CatalogBuilder, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
|
|
6
6
|
#[cfg(feature = "glue")]
|
|
7
|
-
use iceberg_catalog_glue::{GlueCatalog,
|
|
7
|
+
use iceberg_catalog_glue::{GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalog, GlueCatalogBuilder};
|
|
8
8
|
#[cfg(feature = "rest")]
|
|
9
|
-
use iceberg_catalog_rest::{
|
|
9
|
+
use iceberg_catalog_rest::{
|
|
10
|
+
REST_CATALOG_PROP_URI, REST_CATALOG_PROP_WAREHOUSE, RestCatalog, RestCatalogBuilder,
|
|
11
|
+
};
|
|
12
|
+
#[cfg(feature = "s3tables")]
|
|
13
|
+
use iceberg_catalog_s3tables::{
|
|
14
|
+
S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN, S3TablesCatalog, S3TablesCatalogBuilder,
|
|
15
|
+
};
|
|
10
16
|
#[cfg(feature = "sql")]
|
|
11
|
-
use iceberg_catalog_sql::{
|
|
17
|
+
use iceberg_catalog_sql::{
|
|
18
|
+
SQL_CATALOG_PROP_BIND_STYLE, SQL_CATALOG_PROP_URI, SQL_CATALOG_PROP_WAREHOUSE, SqlBindStyle,
|
|
19
|
+
SqlCatalog, SqlCatalogBuilder,
|
|
20
|
+
};
|
|
12
21
|
#[cfg(feature = "datafusion")]
|
|
13
22
|
use iceberg_datafusion::IcebergCatalogProvider;
|
|
14
23
|
use std::cell::RefCell;
|
|
@@ -26,6 +35,8 @@ pub enum RbCatalogType {
|
|
|
26
35
|
Memory(Arc<MemoryCatalog>),
|
|
27
36
|
#[cfg(feature = "rest")]
|
|
28
37
|
Rest(Arc<RestCatalog>),
|
|
38
|
+
#[cfg(feature = "s3tables")]
|
|
39
|
+
S3Tables(Arc<S3TablesCatalog>),
|
|
29
40
|
#[cfg(feature = "sql")]
|
|
30
41
|
Sql(Arc<SqlCatalog>),
|
|
31
42
|
}
|
|
@@ -38,6 +49,8 @@ impl RbCatalogType {
|
|
|
38
49
|
RbCatalogType::Memory(v) => v.as_ref(),
|
|
39
50
|
#[cfg(feature = "rest")]
|
|
40
51
|
RbCatalogType::Rest(v) => v.as_ref(),
|
|
52
|
+
#[cfg(feature = "s3tables")]
|
|
53
|
+
RbCatalogType::S3Tables(v) => v.as_ref(),
|
|
41
54
|
#[cfg(feature = "sql")]
|
|
42
55
|
RbCatalogType::Sql(v) => v.as_ref(),
|
|
43
56
|
}
|
|
@@ -51,6 +64,8 @@ impl RbCatalogType {
|
|
|
51
64
|
RbCatalogType::Memory(v) => v.clone(),
|
|
52
65
|
#[cfg(feature = "rest")]
|
|
53
66
|
RbCatalogType::Rest(v) => v.clone(),
|
|
67
|
+
#[cfg(feature = "s3tables")]
|
|
68
|
+
RbCatalogType::S3Tables(v) => v.clone(),
|
|
54
69
|
#[cfg(feature = "sql")]
|
|
55
70
|
RbCatalogType::Sql(v) => v.clone(),
|
|
56
71
|
}
|
|
@@ -65,9 +80,9 @@ pub struct RbCatalog {
|
|
|
65
80
|
impl RbCatalog {
|
|
66
81
|
#[cfg(feature = "glue")]
|
|
67
82
|
pub fn new_glue(warehouse: String) -> RbResult<Self> {
|
|
68
|
-
let
|
|
83
|
+
let props = HashMap::from([(GLUE_CATALOG_PROP_WAREHOUSE.to_string(), warehouse)]);
|
|
69
84
|
let catalog = runtime()
|
|
70
|
-
.block_on(
|
|
85
|
+
.block_on(GlueCatalogBuilder::default().load("glue", props))
|
|
71
86
|
.map_err(to_rb_err)?;
|
|
72
87
|
Ok(Self {
|
|
73
88
|
catalog: RbCatalogType::Glue(catalog.into()).into(),
|
|
@@ -75,14 +90,13 @@ impl RbCatalog {
|
|
|
75
90
|
}
|
|
76
91
|
|
|
77
92
|
pub fn new_memory(warehouse: Option<String>) -> RbResult<Self> {
|
|
78
|
-
let
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
let catalog = MemoryCatalog::new(file_io, warehouse);
|
|
93
|
+
let mut props = HashMap::new();
|
|
94
|
+
if let Some(v) = warehouse {
|
|
95
|
+
props.insert(MEMORY_CATALOG_WAREHOUSE.to_string(), v);
|
|
96
|
+
}
|
|
97
|
+
let catalog = runtime()
|
|
98
|
+
.block_on(MemoryCatalogBuilder::default().load("memory", props))
|
|
99
|
+
.map_err(to_rb_err)?;
|
|
86
100
|
Ok(Self {
|
|
87
101
|
catalog: RbCatalogType::Memory(catalog.into()).into(),
|
|
88
102
|
})
|
|
@@ -93,16 +107,30 @@ impl RbCatalog {
|
|
|
93
107
|
uri: String,
|
|
94
108
|
warehouse: Option<String>,
|
|
95
109
|
props: HashMap<String, String>,
|
|
96
|
-
) -> Self {
|
|
97
|
-
let
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
.
|
|
101
|
-
.build();
|
|
102
|
-
let catalog = RestCatalog::new(config);
|
|
103
|
-
Self {
|
|
104
|
-
catalog: RbCatalogType::Rest(catalog.into()).into(),
|
|
110
|
+
) -> RbResult<Self> {
|
|
111
|
+
let mut props = props;
|
|
112
|
+
props.insert(REST_CATALOG_PROP_URI.to_string(), uri);
|
|
113
|
+
if let Some(v) = warehouse {
|
|
114
|
+
props.insert(REST_CATALOG_PROP_WAREHOUSE.to_string(), v);
|
|
105
115
|
}
|
|
116
|
+
let catalog = runtime()
|
|
117
|
+
.block_on(RestCatalogBuilder::default().load("rest", props))
|
|
118
|
+
.map_err(to_rb_err)?;
|
|
119
|
+
Ok(Self {
|
|
120
|
+
catalog: RbCatalogType::Rest(catalog.into()).into(),
|
|
121
|
+
})
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
#[cfg(feature = "s3tables")]
|
|
125
|
+
pub fn new_s3tables(arn: String) -> RbResult<Self> {
|
|
126
|
+
let mut props = HashMap::new();
|
|
127
|
+
props.insert(S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN.to_string(), arn);
|
|
128
|
+
let catalog = runtime()
|
|
129
|
+
.block_on(S3TablesCatalogBuilder::default().load("s3tables", props))
|
|
130
|
+
.map_err(to_rb_err)?;
|
|
131
|
+
Ok(Self {
|
|
132
|
+
catalog: RbCatalogType::S3Tables(catalog.into()).into(),
|
|
133
|
+
})
|
|
106
134
|
}
|
|
107
135
|
|
|
108
136
|
#[cfg(feature = "sql")]
|
|
@@ -112,20 +140,15 @@ impl RbCatalog {
|
|
|
112
140
|
name: String,
|
|
113
141
|
props: HashMap<String, String>,
|
|
114
142
|
) -> RbResult<Self> {
|
|
115
|
-
let
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
.
|
|
121
|
-
|
|
122
|
-
.name(name)
|
|
123
|
-
.file_io(file_io)
|
|
124
|
-
.sql_bind_style(SqlBindStyle::DollarNumeric)
|
|
125
|
-
.props(props)
|
|
126
|
-
.build();
|
|
143
|
+
let mut props = props;
|
|
144
|
+
props.insert(SQL_CATALOG_PROP_URI.to_string(), uri);
|
|
145
|
+
props.insert(SQL_CATALOG_PROP_WAREHOUSE.to_string(), warehouse);
|
|
146
|
+
props.insert(
|
|
147
|
+
SQL_CATALOG_PROP_BIND_STYLE.to_string(),
|
|
148
|
+
SqlBindStyle::DollarNumeric.to_string(),
|
|
149
|
+
);
|
|
127
150
|
let catalog = runtime()
|
|
128
|
-
.block_on(
|
|
151
|
+
.block_on(SqlCatalogBuilder::default().load(name, props))
|
|
129
152
|
.map_err(to_rb_err)?;
|
|
130
153
|
Ok(Self {
|
|
131
154
|
catalog: RbCatalogType::Sql(catalog.into()).into(),
|
|
@@ -293,7 +316,7 @@ impl RbCatalog {
|
|
|
293
316
|
}
|
|
294
317
|
|
|
295
318
|
#[cfg(feature = "datafusion")]
|
|
296
|
-
pub fn
|
|
319
|
+
pub fn sql(&self, sql: String) -> RbResult<()> {
|
|
297
320
|
let runtime = runtime();
|
|
298
321
|
|
|
299
322
|
// TODO only create context once
|
|
@@ -307,8 +330,7 @@ impl RbCatalog {
|
|
|
307
330
|
let df = runtime.block_on(ctx.sql(&sql)).unwrap();
|
|
308
331
|
let _results = runtime.block_on(df.collect()).unwrap();
|
|
309
332
|
|
|
310
|
-
// println!("{:?}",
|
|
311
|
-
// println!("{:?}", results);
|
|
333
|
+
// println!("{:?}", _results);
|
|
312
334
|
|
|
313
335
|
Ok(())
|
|
314
336
|
}
|
data/ext/iceberg/src/lib.rs
CHANGED
|
@@ -14,7 +14,7 @@ use crate::table::RbTable;
|
|
|
14
14
|
|
|
15
15
|
type RbResult<T> = Result<T, RbErr>;
|
|
16
16
|
|
|
17
|
-
#[magnus::init]
|
|
17
|
+
#[magnus::init(name = "iceberg")]
|
|
18
18
|
fn init(ruby: &Ruby) -> RbResult<()> {
|
|
19
19
|
let module = ruby.define_module("Iceberg")?;
|
|
20
20
|
|
|
@@ -24,6 +24,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
24
24
|
class.define_singleton_method("new_memory", function!(RbCatalog::new_memory, 1))?;
|
|
25
25
|
#[cfg(feature = "rest")]
|
|
26
26
|
class.define_singleton_method("new_rest", function!(RbCatalog::new_rest, 3))?;
|
|
27
|
+
#[cfg(feature = "s3tables")]
|
|
28
|
+
class.define_singleton_method("new_s3tables", function!(RbCatalog::new_s3tables, 1))?;
|
|
27
29
|
#[cfg(feature = "sql")]
|
|
28
30
|
class.define_singleton_method("new_sql", function!(RbCatalog::new_sql, 4))?;
|
|
29
31
|
class.define_method("list_namespaces", method!(RbCatalog::list_namespaces, 1))?;
|
|
@@ -43,7 +45,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
43
45
|
class.define_method("rename_table", method!(RbCatalog::rename_table, 2))?;
|
|
44
46
|
class.define_method("register_table", method!(RbCatalog::register_table, 2))?;
|
|
45
47
|
#[cfg(feature = "datafusion")]
|
|
46
|
-
class.define_method("
|
|
48
|
+
class.define_method("sql", method!(RbCatalog::sql, 1))?;
|
|
47
49
|
|
|
48
50
|
let class = module.define_class("RbTable", ruby.class_object())?;
|
|
49
51
|
class.define_method("scan", method!(RbTable::scan, 1))?;
|
data/ext/iceberg/src/table.rs
CHANGED
|
@@ -9,6 +9,7 @@ use iceberg::writer::file_writer::ParquetWriterBuilder;
|
|
|
9
9
|
use iceberg::writer::file_writer::location_generator::{
|
|
10
10
|
DefaultFileNameGenerator, DefaultLocationGenerator,
|
|
11
11
|
};
|
|
12
|
+
use iceberg::writer::file_writer::rolling_writer::RollingFileWriterBuilder;
|
|
12
13
|
use iceberg::writer::{IcebergWriter, IcebergWriterBuilder};
|
|
13
14
|
use magnus::{Error as RbErr, RArray, Ruby, Value};
|
|
14
15
|
use parquet::file::properties::WriterProperties;
|
|
@@ -72,13 +73,16 @@ impl RbTable {
|
|
|
72
73
|
let parquet_writer_builder = ParquetWriterBuilder::new(
|
|
73
74
|
WriterProperties::default(),
|
|
74
75
|
table.metadata().current_schema().clone(),
|
|
76
|
+
);
|
|
77
|
+
let rolling_file_writer_builder = RollingFileWriterBuilder::new_with_default_file_size(
|
|
78
|
+
parquet_writer_builder,
|
|
75
79
|
table.file_io().clone(),
|
|
76
80
|
location_generator.clone(),
|
|
77
81
|
file_name_generator.clone(),
|
|
78
82
|
);
|
|
79
|
-
let data_file_writer_builder = DataFileWriterBuilder::new(
|
|
83
|
+
let data_file_writer_builder = DataFileWriterBuilder::new(rolling_file_writer_builder);
|
|
80
84
|
let mut data_file_writer = runtime
|
|
81
|
-
.block_on(data_file_writer_builder.build())
|
|
85
|
+
.block_on(data_file_writer_builder.build(None))
|
|
82
86
|
.map_err(to_rb_err)?;
|
|
83
87
|
|
|
84
88
|
for batch in data.0 {
|
|
@@ -111,6 +115,7 @@ impl RbTable {
|
|
|
111
115
|
match self.table.borrow().metadata().format_version() {
|
|
112
116
|
FormatVersion::V1 => 1,
|
|
113
117
|
FormatVersion::V2 => 2,
|
|
118
|
+
FormatVersion::V3 => 3,
|
|
114
119
|
}
|
|
115
120
|
}
|
|
116
121
|
|
|
@@ -356,18 +361,18 @@ impl RbTable {
|
|
|
356
361
|
|
|
357
362
|
pub fn encryption_keys(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
|
358
363
|
let encryption_keys = ruby.ary_new();
|
|
359
|
-
for
|
|
360
|
-
encryption_keys.push((
|
|
364
|
+
for k in rb_self.table.borrow().metadata().encryption_keys_iter() {
|
|
365
|
+
encryption_keys.push(rb_encrypted_key(k)?)?;
|
|
361
366
|
}
|
|
362
367
|
Ok(encryption_keys)
|
|
363
368
|
}
|
|
364
369
|
|
|
365
|
-
pub fn encryption_key(&self, key_id: String) -> Option<
|
|
366
|
-
self.table
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
370
|
+
pub fn encryption_key(&self, key_id: String) -> RbResult<Option<Value>> {
|
|
371
|
+
let key = match self.table.borrow().metadata().encryption_key(&key_id) {
|
|
372
|
+
Some(k) => Some(rb_encrypted_key(k)?),
|
|
373
|
+
None => None,
|
|
374
|
+
};
|
|
375
|
+
Ok(key)
|
|
371
376
|
}
|
|
372
377
|
|
|
373
378
|
pub fn from_metadata_file(location: String) -> RbResult<Self> {
|
data/ext/iceberg/src/utils.rs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
use iceberg::spec::{
|
|
2
|
-
Literal, NestedField, PartitionSpec, PartitionStatisticsFile, PrimitiveLiteral,
|
|
3
|
-
Schema, Snapshot, SortOrder, StatisticsFile, Type,
|
|
2
|
+
EncryptedKey, Literal, NestedField, PartitionSpec, PartitionStatisticsFile, PrimitiveLiteral,
|
|
3
|
+
PrimitiveType, Schema, Snapshot, SortOrder, StatisticsFile, Type,
|
|
4
4
|
};
|
|
5
5
|
use iceberg::{NamespaceIdent, TableIdent};
|
|
6
6
|
use magnus::{
|
|
@@ -238,6 +238,10 @@ pub fn rb_partition_statistics_file(
|
|
|
238
238
|
todo!();
|
|
239
239
|
}
|
|
240
240
|
|
|
241
|
+
pub fn rb_encrypted_key(_encrypted_key: &EncryptedKey) -> RbResult<Value> {
|
|
242
|
+
todo!();
|
|
243
|
+
}
|
|
244
|
+
|
|
241
245
|
pub fn rb_literal(ruby: &Ruby, literal: &Literal) -> Value {
|
|
242
246
|
match literal {
|
|
243
247
|
Literal::Primitive(pl) => match pl {
|
data/lib/iceberg/catalog.rb
CHANGED
|
@@ -7,6 +7,7 @@ module Iceberg
|
|
|
7
7
|
def create_namespace(namespace, properties: {}, if_not_exists: nil)
|
|
8
8
|
@catalog.create_namespace(namespace, properties)
|
|
9
9
|
rescue Error => e
|
|
10
|
+
# ideally all catalogs would use NamespaceAlreadyExistsError
|
|
10
11
|
if !if_not_exists || (e.message != "Cannot create namespace" && !e.message.include?("already exists"))
|
|
11
12
|
raise e
|
|
12
13
|
end
|
|
@@ -28,7 +29,8 @@ module Iceberg
|
|
|
28
29
|
def drop_namespace(namespace, if_exists: nil)
|
|
29
30
|
@catalog.drop_namespace(namespace)
|
|
30
31
|
rescue Error => e
|
|
31
|
-
|
|
32
|
+
# ideally all catalogs would use NamespaceNotFoundError
|
|
33
|
+
if !if_exists || (e.message != "Tried to drop a namespace that does not exist" && !e.message.include?("No such namespace") && !e.message.include?("The specified namespace does not exist") && !e.message.include?("not found"))
|
|
32
34
|
raise e
|
|
33
35
|
end
|
|
34
36
|
nil
|
|
@@ -47,9 +49,9 @@ module Iceberg
|
|
|
47
49
|
table_definition = TableDefinition.new
|
|
48
50
|
yield table_definition
|
|
49
51
|
schema = Schema.new(table_definition.fields)
|
|
50
|
-
elsif schema.is_a?(Hash)
|
|
52
|
+
elsif schema.is_a?(Hash) || (defined?(Polars::Schema) && schema.is_a?(Polars::Schema))
|
|
51
53
|
fields =
|
|
52
|
-
schema.map.with_index do |(k, v), i|
|
|
54
|
+
schema.to_h.map.with_index do |(k, v), i|
|
|
53
55
|
{
|
|
54
56
|
id: i + 1,
|
|
55
57
|
name: k.is_a?(Symbol) ? k.to_s : k,
|
|
@@ -72,7 +74,8 @@ module Iceberg
|
|
|
72
74
|
def drop_table(table_name, if_exists: nil)
|
|
73
75
|
@catalog.drop_table(table_name)
|
|
74
76
|
rescue Error => e
|
|
75
|
-
|
|
77
|
+
# ideally all catalogs would use TableNotFoundError
|
|
78
|
+
if !if_exists || (e.message != "Tried to drop a table that does not exist" && !e.message.include?("No such table") && !e.message.include?("The specified table does not exist") && !e.message.include?("not found"))
|
|
76
79
|
raise e
|
|
77
80
|
end
|
|
78
81
|
nil
|
|
@@ -92,11 +95,11 @@ module Iceberg
|
|
|
92
95
|
@catalog.register_table(table_name, metadata_location)
|
|
93
96
|
end
|
|
94
97
|
|
|
95
|
-
def
|
|
98
|
+
def sql(sql)
|
|
96
99
|
# requires datafusion feature
|
|
97
|
-
raise Todo unless @catalog.respond_to?(:
|
|
100
|
+
raise Todo unless @catalog.respond_to?(:sql)
|
|
98
101
|
|
|
99
|
-
@catalog.
|
|
102
|
+
@catalog.sql(sql)
|
|
100
103
|
end
|
|
101
104
|
|
|
102
105
|
# hide internal state
|
data/lib/iceberg/glue_catalog.rb
CHANGED
|
@@ -2,9 +2,6 @@ module Iceberg
|
|
|
2
2
|
class GlueCatalog < Catalog
|
|
3
3
|
# warehouse is URI of S3 storage bucket
|
|
4
4
|
def initialize(warehouse:)
|
|
5
|
-
# requires glue feature
|
|
6
|
-
raise Error, "Feature not enabled" unless RbCatalog.respond_to?(:new_glue)
|
|
7
|
-
|
|
8
5
|
@catalog = RbCatalog.new_glue(warehouse)
|
|
9
6
|
end
|
|
10
7
|
end
|
data/lib/iceberg/table.rb
CHANGED
|
@@ -83,56 +83,18 @@ module Iceberg
|
|
|
83
83
|
@table.properties
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
def scan(snapshot_id: nil)
|
|
87
|
+
TableScan.new(@table.scan(snapshot_id), self)
|
|
88
|
+
end
|
|
89
|
+
|
|
86
90
|
def to_polars(snapshot_id: nil, storage_options: nil)
|
|
87
91
|
require "polars-df"
|
|
88
92
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
# TODO improve
|
|
92
|
-
schema =
|
|
93
|
-
# TODO use schema from snapshot_id
|
|
94
|
-
current_schema.fields.to_h do |field|
|
|
95
|
-
dtype =
|
|
96
|
-
case field[:type]
|
|
97
|
-
when "int"
|
|
98
|
-
Polars::Int32
|
|
99
|
-
when "long"
|
|
100
|
-
Polars::Int64
|
|
101
|
-
when "double"
|
|
102
|
-
Polars::Float64
|
|
103
|
-
when "string"
|
|
104
|
-
Polars::String
|
|
105
|
-
when "timestamp"
|
|
106
|
-
Polars::Datetime
|
|
107
|
-
else
|
|
108
|
-
raise Todo
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
[field[:name], dtype]
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
Polars::LazyFrame.new(schema: schema)
|
|
115
|
-
else
|
|
116
|
-
sources = files.map { |v| v[:data_file_path] }
|
|
117
|
-
|
|
118
|
-
deletion_files = [
|
|
119
|
-
"iceberg-position-delete",
|
|
120
|
-
files.map.with_index
|
|
121
|
-
.select { |v, i| v[:deletes].any? }
|
|
122
|
-
.to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
|
|
123
|
-
]
|
|
124
|
-
|
|
125
|
-
Polars.scan_parquet(
|
|
126
|
-
sources,
|
|
127
|
-
storage_options: storage_options,
|
|
128
|
-
# TODO
|
|
129
|
-
# cast_options: Polars::ScanCastOptions._default_iceberg,
|
|
130
|
-
# allow_missing_columns: true,
|
|
131
|
-
# extra_columns: "ignore",
|
|
132
|
-
# _column_mapping: column_mapping,
|
|
133
|
-
_deletion_files: deletion_files
|
|
134
|
-
)
|
|
93
|
+
if Gem::Version.new(Polars::VERSION) < Gem::Version.new("0.23")
|
|
94
|
+
raise "Requires polars-df >= 0.23"
|
|
135
95
|
end
|
|
96
|
+
|
|
97
|
+
Polars.scan_iceberg(self, snapshot_id:, storage_options:)
|
|
136
98
|
end
|
|
137
99
|
|
|
138
100
|
def append(df)
|
data/lib/iceberg/version.rb
CHANGED
data/lib/iceberg.rb
CHANGED
|
@@ -9,6 +9,7 @@ end
|
|
|
9
9
|
require_relative "iceberg/catalog"
|
|
10
10
|
require_relative "iceberg/schema"
|
|
11
11
|
require_relative "iceberg/table"
|
|
12
|
+
require_relative "iceberg/table_scan"
|
|
12
13
|
require_relative "iceberg/static_table"
|
|
13
14
|
require_relative "iceberg/table_definition"
|
|
14
15
|
require_relative "iceberg/version"
|
|
@@ -17,6 +18,7 @@ require_relative "iceberg/version"
|
|
|
17
18
|
require_relative "iceberg/glue_catalog"
|
|
18
19
|
require_relative "iceberg/memory_catalog"
|
|
19
20
|
require_relative "iceberg/rest_catalog"
|
|
21
|
+
require_relative "iceberg/s3_tables_catalog"
|
|
20
22
|
require_relative "iceberg/sql_catalog"
|
|
21
23
|
|
|
22
24
|
module Iceberg
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iceberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.11.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
@@ -50,11 +50,13 @@ files:
|
|
|
50
50
|
- lib/iceberg/glue_catalog.rb
|
|
51
51
|
- lib/iceberg/memory_catalog.rb
|
|
52
52
|
- lib/iceberg/rest_catalog.rb
|
|
53
|
+
- lib/iceberg/s3_tables_catalog.rb
|
|
53
54
|
- lib/iceberg/schema.rb
|
|
54
55
|
- lib/iceberg/sql_catalog.rb
|
|
55
56
|
- lib/iceberg/static_table.rb
|
|
56
57
|
- lib/iceberg/table.rb
|
|
57
58
|
- lib/iceberg/table_definition.rb
|
|
59
|
+
- lib/iceberg/table_scan.rb
|
|
58
60
|
- lib/iceberg/version.rb
|
|
59
61
|
homepage: https://github.com/ankane/iceberg-ruby
|
|
60
62
|
licenses:
|
|
@@ -67,7 +69,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
67
69
|
requirements:
|
|
68
70
|
- - ">="
|
|
69
71
|
- !ruby/object:Gem::Version
|
|
70
|
-
version: '3.
|
|
72
|
+
version: '3.3'
|
|
71
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
72
74
|
requirements:
|
|
73
75
|
- - ">="
|