iceberg 0.10.3 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Cargo.lock +636 -548
- data/README.md +3 -1
- data/ext/iceberg/Cargo.toml +15 -12
- data/ext/iceberg/src/catalog.rs +36 -18
- data/ext/iceberg/src/lib.rs +4 -2
- data/ext/iceberg/src/table.rs +15 -11
- data/ext/iceberg/src/utils.rs +6 -2
- data/lib/iceberg/catalog.rb +10 -7
- data/lib/iceberg/glue_catalog.rb +0 -3
- data/lib/iceberg/s3_tables_catalog.rb +7 -0
- data/lib/iceberg/table.rb +4 -51
- data/lib/iceberg/version.rb +1 -1
- data/lib/iceberg.rb +1 -0
- metadata +3 -2
data/README.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
[Apache Iceberg](https://iceberg.apache.org/) for Ruby
|
|
4
4
|
|
|
5
|
+
:duck: Also check out [SeaDuck](https://github.com/ankane/seaduck)
|
|
6
|
+
|
|
5
7
|
[](https://github.com/ankane/iceberg-ruby/actions)
|
|
6
8
|
|
|
7
9
|
## Installation
|
|
@@ -35,7 +37,7 @@ catalog.create_table("main.events") do |t|
|
|
|
35
37
|
end
|
|
36
38
|
```
|
|
37
39
|
|
|
38
|
-
Or
|
|
40
|
+
Or with [Polars](https://github.com/ankane/ruby-polars)
|
|
39
41
|
|
|
40
42
|
```ruby
|
|
41
43
|
df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
|
data/ext/iceberg/Cargo.toml
CHANGED
|
@@ -1,27 +1,29 @@
|
|
|
1
1
|
[package]
|
|
2
|
-
name = "iceberg"
|
|
3
|
-
version = "0.
|
|
2
|
+
name = "iceberg-ruby"
|
|
3
|
+
version = "0.11.0"
|
|
4
4
|
license = "Apache-2.0"
|
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
|
6
6
|
edition = "2024"
|
|
7
|
-
rust-version = "1.
|
|
7
|
+
rust-version = "1.88"
|
|
8
8
|
publish = false
|
|
9
9
|
|
|
10
10
|
[lib]
|
|
11
|
+
name = "iceberg"
|
|
11
12
|
crate-type = ["cdylib"]
|
|
12
13
|
|
|
13
14
|
[dependencies]
|
|
14
|
-
arrow-array = { version = "
|
|
15
|
-
arrow-schema = "
|
|
16
|
-
datafusion = { version = "
|
|
15
|
+
arrow-array = { version = "57", features = ["ffi"] }
|
|
16
|
+
arrow-schema = "57"
|
|
17
|
+
datafusion = { version = "51", optional = true }
|
|
17
18
|
futures = "0.3"
|
|
18
|
-
iceberg = "0.
|
|
19
|
-
iceberg-catalog-glue = { version = "0.
|
|
20
|
-
iceberg-catalog-rest = { version = "0.
|
|
21
|
-
iceberg-catalog-
|
|
22
|
-
iceberg-
|
|
19
|
+
iceberg = "0.8"
|
|
20
|
+
iceberg-catalog-glue = { version = "0.8", optional = true }
|
|
21
|
+
iceberg-catalog-rest = { version = "0.8", optional = true }
|
|
22
|
+
iceberg-catalog-s3tables = { version = "0.8", optional = true }
|
|
23
|
+
iceberg-catalog-sql = { version = "0.8", optional = true }
|
|
24
|
+
iceberg-datafusion = { version = "0.8", optional = true }
|
|
23
25
|
magnus = "0.8"
|
|
24
|
-
parquet = "
|
|
26
|
+
parquet = "57"
|
|
25
27
|
sqlx = { version = "0.8", features = ["postgres", "runtime-tokio", "sqlite"], default-features = false, optional = true }
|
|
26
28
|
tokio = { version = "1", features = ["rt-multi-thread"] }
|
|
27
29
|
uuid = { version = "1", features = ["v4"] }
|
|
@@ -31,4 +33,5 @@ default = ["rest", "sql"]
|
|
|
31
33
|
datafusion = ["dep:datafusion", "dep:iceberg-datafusion"]
|
|
32
34
|
glue = ["dep:iceberg-catalog-glue"]
|
|
33
35
|
rest = ["dep:iceberg-catalog-rest"]
|
|
36
|
+
s3tables = ["dep:iceberg-catalog-s3tables"]
|
|
34
37
|
sql = ["dep:iceberg-catalog-sql", "dep:sqlx"]
|
data/ext/iceberg/src/catalog.rs
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
#[cfg(feature = "datafusion")]
|
|
2
2
|
use datafusion::execution::context::SessionContext;
|
|
3
|
-
use iceberg::io::FileIO;
|
|
4
3
|
use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
|
|
5
4
|
use iceberg::spec::Schema;
|
|
6
5
|
use iceberg::{Catalog, CatalogBuilder, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
|
|
@@ -10,8 +9,15 @@ use iceberg_catalog_glue::{GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalog, GlueCatalog
|
|
|
10
9
|
use iceberg_catalog_rest::{
|
|
11
10
|
REST_CATALOG_PROP_URI, REST_CATALOG_PROP_WAREHOUSE, RestCatalog, RestCatalogBuilder,
|
|
12
11
|
};
|
|
12
|
+
#[cfg(feature = "s3tables")]
|
|
13
|
+
use iceberg_catalog_s3tables::{
|
|
14
|
+
S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN, S3TablesCatalog, S3TablesCatalogBuilder,
|
|
15
|
+
};
|
|
13
16
|
#[cfg(feature = "sql")]
|
|
14
|
-
use iceberg_catalog_sql::{
|
|
17
|
+
use iceberg_catalog_sql::{
|
|
18
|
+
SQL_CATALOG_PROP_BIND_STYLE, SQL_CATALOG_PROP_URI, SQL_CATALOG_PROP_WAREHOUSE, SqlBindStyle,
|
|
19
|
+
SqlCatalog, SqlCatalogBuilder,
|
|
20
|
+
};
|
|
15
21
|
#[cfg(feature = "datafusion")]
|
|
16
22
|
use iceberg_datafusion::IcebergCatalogProvider;
|
|
17
23
|
use std::cell::RefCell;
|
|
@@ -29,6 +35,8 @@ pub enum RbCatalogType {
|
|
|
29
35
|
Memory(Arc<MemoryCatalog>),
|
|
30
36
|
#[cfg(feature = "rest")]
|
|
31
37
|
Rest(Arc<RestCatalog>),
|
|
38
|
+
#[cfg(feature = "s3tables")]
|
|
39
|
+
S3Tables(Arc<S3TablesCatalog>),
|
|
32
40
|
#[cfg(feature = "sql")]
|
|
33
41
|
Sql(Arc<SqlCatalog>),
|
|
34
42
|
}
|
|
@@ -41,6 +49,8 @@ impl RbCatalogType {
|
|
|
41
49
|
RbCatalogType::Memory(v) => v.as_ref(),
|
|
42
50
|
#[cfg(feature = "rest")]
|
|
43
51
|
RbCatalogType::Rest(v) => v.as_ref(),
|
|
52
|
+
#[cfg(feature = "s3tables")]
|
|
53
|
+
RbCatalogType::S3Tables(v) => v.as_ref(),
|
|
44
54
|
#[cfg(feature = "sql")]
|
|
45
55
|
RbCatalogType::Sql(v) => v.as_ref(),
|
|
46
56
|
}
|
|
@@ -54,6 +64,8 @@ impl RbCatalogType {
|
|
|
54
64
|
RbCatalogType::Memory(v) => v.clone(),
|
|
55
65
|
#[cfg(feature = "rest")]
|
|
56
66
|
RbCatalogType::Rest(v) => v.clone(),
|
|
67
|
+
#[cfg(feature = "s3tables")]
|
|
68
|
+
RbCatalogType::S3Tables(v) => v.clone(),
|
|
57
69
|
#[cfg(feature = "sql")]
|
|
58
70
|
RbCatalogType::Sql(v) => v.clone(),
|
|
59
71
|
}
|
|
@@ -109,6 +121,18 @@ impl RbCatalog {
|
|
|
109
121
|
})
|
|
110
122
|
}
|
|
111
123
|
|
|
124
|
+
#[cfg(feature = "s3tables")]
|
|
125
|
+
pub fn new_s3tables(arn: String) -> RbResult<Self> {
|
|
126
|
+
let mut props = HashMap::new();
|
|
127
|
+
props.insert(S3TABLES_CATALOG_PROP_TABLE_BUCKET_ARN.to_string(), arn);
|
|
128
|
+
let catalog = runtime()
|
|
129
|
+
.block_on(S3TablesCatalogBuilder::default().load("s3tables", props))
|
|
130
|
+
.map_err(to_rb_err)?;
|
|
131
|
+
Ok(Self {
|
|
132
|
+
catalog: RbCatalogType::S3Tables(catalog.into()).into(),
|
|
133
|
+
})
|
|
134
|
+
}
|
|
135
|
+
|
|
112
136
|
#[cfg(feature = "sql")]
|
|
113
137
|
pub fn new_sql(
|
|
114
138
|
uri: String,
|
|
@@ -116,20 +140,15 @@ impl RbCatalog {
|
|
|
116
140
|
name: String,
|
|
117
141
|
props: HashMap<String, String>,
|
|
118
142
|
) -> RbResult<Self> {
|
|
119
|
-
let
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
.
|
|
125
|
-
|
|
126
|
-
.name(name)
|
|
127
|
-
.file_io(file_io)
|
|
128
|
-
.sql_bind_style(SqlBindStyle::DollarNumeric)
|
|
129
|
-
.props(props)
|
|
130
|
-
.build();
|
|
143
|
+
let mut props = props;
|
|
144
|
+
props.insert(SQL_CATALOG_PROP_URI.to_string(), uri);
|
|
145
|
+
props.insert(SQL_CATALOG_PROP_WAREHOUSE.to_string(), warehouse);
|
|
146
|
+
props.insert(
|
|
147
|
+
SQL_CATALOG_PROP_BIND_STYLE.to_string(),
|
|
148
|
+
SqlBindStyle::DollarNumeric.to_string(),
|
|
149
|
+
);
|
|
131
150
|
let catalog = runtime()
|
|
132
|
-
.block_on(
|
|
151
|
+
.block_on(SqlCatalogBuilder::default().load(name, props))
|
|
133
152
|
.map_err(to_rb_err)?;
|
|
134
153
|
Ok(Self {
|
|
135
154
|
catalog: RbCatalogType::Sql(catalog.into()).into(),
|
|
@@ -297,7 +316,7 @@ impl RbCatalog {
|
|
|
297
316
|
}
|
|
298
317
|
|
|
299
318
|
#[cfg(feature = "datafusion")]
|
|
300
|
-
pub fn
|
|
319
|
+
pub fn sql(&self, sql: String) -> RbResult<()> {
|
|
301
320
|
let runtime = runtime();
|
|
302
321
|
|
|
303
322
|
// TODO only create context once
|
|
@@ -311,8 +330,7 @@ impl RbCatalog {
|
|
|
311
330
|
let df = runtime.block_on(ctx.sql(&sql)).unwrap();
|
|
312
331
|
let _results = runtime.block_on(df.collect()).unwrap();
|
|
313
332
|
|
|
314
|
-
// println!("{:?}",
|
|
315
|
-
// println!("{:?}", results);
|
|
333
|
+
// println!("{:?}", _results);
|
|
316
334
|
|
|
317
335
|
Ok(())
|
|
318
336
|
}
|
data/ext/iceberg/src/lib.rs
CHANGED
|
@@ -14,7 +14,7 @@ use crate::table::RbTable;
|
|
|
14
14
|
|
|
15
15
|
type RbResult<T> = Result<T, RbErr>;
|
|
16
16
|
|
|
17
|
-
#[magnus::init]
|
|
17
|
+
#[magnus::init(name = "iceberg")]
|
|
18
18
|
fn init(ruby: &Ruby) -> RbResult<()> {
|
|
19
19
|
let module = ruby.define_module("Iceberg")?;
|
|
20
20
|
|
|
@@ -24,6 +24,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
24
24
|
class.define_singleton_method("new_memory", function!(RbCatalog::new_memory, 1))?;
|
|
25
25
|
#[cfg(feature = "rest")]
|
|
26
26
|
class.define_singleton_method("new_rest", function!(RbCatalog::new_rest, 3))?;
|
|
27
|
+
#[cfg(feature = "s3tables")]
|
|
28
|
+
class.define_singleton_method("new_s3tables", function!(RbCatalog::new_s3tables, 1))?;
|
|
27
29
|
#[cfg(feature = "sql")]
|
|
28
30
|
class.define_singleton_method("new_sql", function!(RbCatalog::new_sql, 4))?;
|
|
29
31
|
class.define_method("list_namespaces", method!(RbCatalog::list_namespaces, 1))?;
|
|
@@ -43,7 +45,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
43
45
|
class.define_method("rename_table", method!(RbCatalog::rename_table, 2))?;
|
|
44
46
|
class.define_method("register_table", method!(RbCatalog::register_table, 2))?;
|
|
45
47
|
#[cfg(feature = "datafusion")]
|
|
46
|
-
class.define_method("
|
|
48
|
+
class.define_method("sql", method!(RbCatalog::sql, 1))?;
|
|
47
49
|
|
|
48
50
|
let class = module.define_class("RbTable", ruby.class_object())?;
|
|
49
51
|
class.define_method("scan", method!(RbTable::scan, 1))?;
|
data/ext/iceberg/src/table.rs
CHANGED
|
@@ -9,6 +9,7 @@ use iceberg::writer::file_writer::ParquetWriterBuilder;
|
|
|
9
9
|
use iceberg::writer::file_writer::location_generator::{
|
|
10
10
|
DefaultFileNameGenerator, DefaultLocationGenerator,
|
|
11
11
|
};
|
|
12
|
+
use iceberg::writer::file_writer::rolling_writer::RollingFileWriterBuilder;
|
|
12
13
|
use iceberg::writer::{IcebergWriter, IcebergWriterBuilder};
|
|
13
14
|
use magnus::{Error as RbErr, RArray, Ruby, Value};
|
|
14
15
|
use parquet::file::properties::WriterProperties;
|
|
@@ -72,14 +73,16 @@ impl RbTable {
|
|
|
72
73
|
let parquet_writer_builder = ParquetWriterBuilder::new(
|
|
73
74
|
WriterProperties::default(),
|
|
74
75
|
table.metadata().current_schema().clone(),
|
|
75
|
-
|
|
76
|
+
);
|
|
77
|
+
let rolling_file_writer_builder = RollingFileWriterBuilder::new_with_default_file_size(
|
|
78
|
+
parquet_writer_builder,
|
|
76
79
|
table.file_io().clone(),
|
|
77
80
|
location_generator.clone(),
|
|
78
81
|
file_name_generator.clone(),
|
|
79
82
|
);
|
|
80
|
-
let data_file_writer_builder = DataFileWriterBuilder::new(
|
|
83
|
+
let data_file_writer_builder = DataFileWriterBuilder::new(rolling_file_writer_builder);
|
|
81
84
|
let mut data_file_writer = runtime
|
|
82
|
-
.block_on(data_file_writer_builder.build())
|
|
85
|
+
.block_on(data_file_writer_builder.build(None))
|
|
83
86
|
.map_err(to_rb_err)?;
|
|
84
87
|
|
|
85
88
|
for batch in data.0 {
|
|
@@ -112,6 +115,7 @@ impl RbTable {
|
|
|
112
115
|
match self.table.borrow().metadata().format_version() {
|
|
113
116
|
FormatVersion::V1 => 1,
|
|
114
117
|
FormatVersion::V2 => 2,
|
|
118
|
+
FormatVersion::V3 => 3,
|
|
115
119
|
}
|
|
116
120
|
}
|
|
117
121
|
|
|
@@ -357,18 +361,18 @@ impl RbTable {
|
|
|
357
361
|
|
|
358
362
|
pub fn encryption_keys(ruby: &Ruby, rb_self: &Self) -> RbResult<RArray> {
|
|
359
363
|
let encryption_keys = ruby.ary_new();
|
|
360
|
-
for
|
|
361
|
-
encryption_keys.push((
|
|
364
|
+
for k in rb_self.table.borrow().metadata().encryption_keys_iter() {
|
|
365
|
+
encryption_keys.push(rb_encrypted_key(k)?)?;
|
|
362
366
|
}
|
|
363
367
|
Ok(encryption_keys)
|
|
364
368
|
}
|
|
365
369
|
|
|
366
|
-
pub fn encryption_key(&self, key_id: String) -> Option<
|
|
367
|
-
self.table
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
370
|
+
pub fn encryption_key(&self, key_id: String) -> RbResult<Option<Value>> {
|
|
371
|
+
let key = match self.table.borrow().metadata().encryption_key(&key_id) {
|
|
372
|
+
Some(k) => Some(rb_encrypted_key(k)?),
|
|
373
|
+
None => None,
|
|
374
|
+
};
|
|
375
|
+
Ok(key)
|
|
372
376
|
}
|
|
373
377
|
|
|
374
378
|
pub fn from_metadata_file(location: String) -> RbResult<Self> {
|
data/ext/iceberg/src/utils.rs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
use iceberg::spec::{
|
|
2
|
-
Literal, NestedField, PartitionSpec, PartitionStatisticsFile, PrimitiveLiteral,
|
|
3
|
-
Schema, Snapshot, SortOrder, StatisticsFile, Type,
|
|
2
|
+
EncryptedKey, Literal, NestedField, PartitionSpec, PartitionStatisticsFile, PrimitiveLiteral,
|
|
3
|
+
PrimitiveType, Schema, Snapshot, SortOrder, StatisticsFile, Type,
|
|
4
4
|
};
|
|
5
5
|
use iceberg::{NamespaceIdent, TableIdent};
|
|
6
6
|
use magnus::{
|
|
@@ -238,6 +238,10 @@ pub fn rb_partition_statistics_file(
|
|
|
238
238
|
todo!();
|
|
239
239
|
}
|
|
240
240
|
|
|
241
|
+
pub fn rb_encrypted_key(_encrypted_key: &EncryptedKey) -> RbResult<Value> {
|
|
242
|
+
todo!();
|
|
243
|
+
}
|
|
244
|
+
|
|
241
245
|
pub fn rb_literal(ruby: &Ruby, literal: &Literal) -> Value {
|
|
242
246
|
match literal {
|
|
243
247
|
Literal::Primitive(pl) => match pl {
|
data/lib/iceberg/catalog.rb
CHANGED
|
@@ -7,6 +7,7 @@ module Iceberg
|
|
|
7
7
|
def create_namespace(namespace, properties: {}, if_not_exists: nil)
|
|
8
8
|
@catalog.create_namespace(namespace, properties)
|
|
9
9
|
rescue Error => e
|
|
10
|
+
# ideally all catalogs would use NamespaceAlreadyExistsError
|
|
10
11
|
if !if_not_exists || (e.message != "Cannot create namespace" && !e.message.include?("already exists"))
|
|
11
12
|
raise e
|
|
12
13
|
end
|
|
@@ -28,7 +29,8 @@ module Iceberg
|
|
|
28
29
|
def drop_namespace(namespace, if_exists: nil)
|
|
29
30
|
@catalog.drop_namespace(namespace)
|
|
30
31
|
rescue Error => e
|
|
31
|
-
|
|
32
|
+
# ideally all catalogs would use NamespaceNotFoundError
|
|
33
|
+
if !if_exists || (e.message != "Tried to drop a namespace that does not exist" && !e.message.include?("No such namespace") && !e.message.include?("The specified namespace does not exist") && !e.message.include?("not found"))
|
|
32
34
|
raise e
|
|
33
35
|
end
|
|
34
36
|
nil
|
|
@@ -47,9 +49,9 @@ module Iceberg
|
|
|
47
49
|
table_definition = TableDefinition.new
|
|
48
50
|
yield table_definition
|
|
49
51
|
schema = Schema.new(table_definition.fields)
|
|
50
|
-
elsif schema.is_a?(Hash)
|
|
52
|
+
elsif schema.is_a?(Hash) || (defined?(Polars::Schema) && schema.is_a?(Polars::Schema))
|
|
51
53
|
fields =
|
|
52
|
-
schema.map.with_index do |(k, v), i|
|
|
54
|
+
schema.to_h.map.with_index do |(k, v), i|
|
|
53
55
|
{
|
|
54
56
|
id: i + 1,
|
|
55
57
|
name: k.is_a?(Symbol) ? k.to_s : k,
|
|
@@ -72,7 +74,8 @@ module Iceberg
|
|
|
72
74
|
def drop_table(table_name, if_exists: nil)
|
|
73
75
|
@catalog.drop_table(table_name)
|
|
74
76
|
rescue Error => e
|
|
75
|
-
|
|
77
|
+
# ideally all catalogs would use TableNotFoundError
|
|
78
|
+
if !if_exists || (e.message != "Tried to drop a table that does not exist" && !e.message.include?("No such table") && !e.message.include?("The specified table does not exist") && !e.message.include?("not found"))
|
|
76
79
|
raise e
|
|
77
80
|
end
|
|
78
81
|
nil
|
|
@@ -92,11 +95,11 @@ module Iceberg
|
|
|
92
95
|
@catalog.register_table(table_name, metadata_location)
|
|
93
96
|
end
|
|
94
97
|
|
|
95
|
-
def
|
|
98
|
+
def sql(sql)
|
|
96
99
|
# requires datafusion feature
|
|
97
|
-
raise Todo unless @catalog.respond_to?(:
|
|
100
|
+
raise Todo unless @catalog.respond_to?(:sql)
|
|
98
101
|
|
|
99
|
-
@catalog.
|
|
102
|
+
@catalog.sql(sql)
|
|
100
103
|
end
|
|
101
104
|
|
|
102
105
|
# hide internal state
|
data/lib/iceberg/glue_catalog.rb
CHANGED
|
@@ -2,9 +2,6 @@ module Iceberg
|
|
|
2
2
|
class GlueCatalog < Catalog
|
|
3
3
|
# warehouse is URI of S3 storage bucket
|
|
4
4
|
def initialize(warehouse:)
|
|
5
|
-
# requires glue feature
|
|
6
|
-
raise Error, "Feature not enabled" unless RbCatalog.respond_to?(:new_glue)
|
|
7
|
-
|
|
8
5
|
@catalog = RbCatalog.new_glue(warehouse)
|
|
9
6
|
end
|
|
10
7
|
end
|
data/lib/iceberg/table.rb
CHANGED
|
@@ -87,61 +87,14 @@ module Iceberg
|
|
|
87
87
|
TableScan.new(@table.scan(snapshot_id), self)
|
|
88
88
|
end
|
|
89
89
|
|
|
90
|
-
def to_polars(snapshot_id: nil, storage_options: nil
|
|
90
|
+
def to_polars(snapshot_id: nil, storage_options: nil)
|
|
91
91
|
require "polars-df"
|
|
92
92
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
return Polars.scan_iceberg(self, snapshot_id:, storage_options:)
|
|
93
|
+
if Gem::Version.new(Polars::VERSION) < Gem::Version.new("0.23")
|
|
94
|
+
raise "Requires polars-df >= 0.23"
|
|
96
95
|
end
|
|
97
96
|
|
|
98
|
-
|
|
99
|
-
files = scan.plan_files
|
|
100
|
-
|
|
101
|
-
if files.empty?
|
|
102
|
-
snapshot = scan.snapshot
|
|
103
|
-
scan_schema = snapshot ? schema_by_id(snapshot[:schema_id]) : current_schema
|
|
104
|
-
|
|
105
|
-
# TODO improve
|
|
106
|
-
schema =
|
|
107
|
-
scan_schema.fields.to_h do |field|
|
|
108
|
-
dtype =
|
|
109
|
-
case field[:type]
|
|
110
|
-
when "int"
|
|
111
|
-
Polars::Int32
|
|
112
|
-
when "long"
|
|
113
|
-
Polars::Int64
|
|
114
|
-
when "double"
|
|
115
|
-
Polars::Float64
|
|
116
|
-
when "string"
|
|
117
|
-
Polars::String
|
|
118
|
-
when "timestamp"
|
|
119
|
-
Polars::Datetime
|
|
120
|
-
else
|
|
121
|
-
raise Todo
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
[field[:name], dtype]
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
Polars::LazyFrame.new(schema: schema)
|
|
128
|
-
else
|
|
129
|
-
sources = files.map { |v| v[:data_file_path] }
|
|
130
|
-
|
|
131
|
-
deletion_files = [
|
|
132
|
-
"iceberg-position-delete",
|
|
133
|
-
files.map.with_index
|
|
134
|
-
.select { |v, i| v[:deletes].any? }
|
|
135
|
-
.to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
|
|
136
|
-
]
|
|
137
|
-
|
|
138
|
-
scan_options = {
|
|
139
|
-
storage_options: storage_options,
|
|
140
|
-
_deletion_files: deletion_files,
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
Polars.scan_parquet(sources, **scan_options)
|
|
144
|
-
end
|
|
97
|
+
Polars.scan_iceberg(self, snapshot_id:, storage_options:)
|
|
145
98
|
end
|
|
146
99
|
|
|
147
100
|
def append(df)
|
data/lib/iceberg/version.rb
CHANGED
data/lib/iceberg.rb
CHANGED
|
@@ -18,6 +18,7 @@ require_relative "iceberg/version"
|
|
|
18
18
|
require_relative "iceberg/glue_catalog"
|
|
19
19
|
require_relative "iceberg/memory_catalog"
|
|
20
20
|
require_relative "iceberg/rest_catalog"
|
|
21
|
+
require_relative "iceberg/s3_tables_catalog"
|
|
21
22
|
require_relative "iceberg/sql_catalog"
|
|
22
23
|
|
|
23
24
|
module Iceberg
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iceberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.11.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
@@ -50,6 +50,7 @@ files:
|
|
|
50
50
|
- lib/iceberg/glue_catalog.rb
|
|
51
51
|
- lib/iceberg/memory_catalog.rb
|
|
52
52
|
- lib/iceberg/rest_catalog.rb
|
|
53
|
+
- lib/iceberg/s3_tables_catalog.rb
|
|
53
54
|
- lib/iceberg/schema.rb
|
|
54
55
|
- lib/iceberg/sql_catalog.rb
|
|
55
56
|
- lib/iceberg/static_table.rb
|
|
@@ -68,7 +69,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
68
69
|
requirements:
|
|
69
70
|
- - ">="
|
|
70
71
|
- !ruby/object:Gem::Version
|
|
71
|
-
version: '3.
|
|
72
|
+
version: '3.3'
|
|
72
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
74
|
requirements:
|
|
74
75
|
- - ">="
|