iceberg 0.10.2 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +188 -332
- data/README.md +3 -5
- data/ext/iceberg/Cargo.toml +8 -8
- data/ext/iceberg/src/catalog.rs +27 -23
- data/ext/iceberg/src/table.rs +1 -0
- data/lib/iceberg/table.rb +22 -13
- data/lib/iceberg/table_scan.rb +18 -0
- data/lib/iceberg/version.rb +1 -1
- data/lib/iceberg.rb +1 -0
- metadata +2 -1
data/README.md
CHANGED
|
@@ -82,9 +82,7 @@ Iceberg::MemoryCatalog.new(
|
|
|
82
82
|
)
|
|
83
83
|
```
|
|
84
84
|
|
|
85
|
-
##
|
|
86
|
-
|
|
87
|
-
### Namespaces
|
|
85
|
+
## Namespaces
|
|
88
86
|
|
|
89
87
|
List namespaces
|
|
90
88
|
|
|
@@ -122,7 +120,7 @@ Drop a namespace
|
|
|
122
120
|
catalog.drop_namespace("main")
|
|
123
121
|
```
|
|
124
122
|
|
|
125
|
-
|
|
123
|
+
## Tables
|
|
126
124
|
|
|
127
125
|
List tables
|
|
128
126
|
|
|
@@ -169,7 +167,7 @@ Drop a table
|
|
|
169
167
|
catalog.drop_table("main.events")
|
|
170
168
|
```
|
|
171
169
|
|
|
172
|
-
|
|
170
|
+
## Static Tables
|
|
173
171
|
|
|
174
172
|
Load a static table
|
|
175
173
|
|
data/ext/iceberg/Cargo.toml
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "iceberg"
|
|
3
|
-
version = "0.10.
|
|
3
|
+
version = "0.10.3"
|
|
4
4
|
license = "Apache-2.0"
|
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
|
6
6
|
edition = "2024"
|
|
7
|
-
rust-version = "1.
|
|
7
|
+
rust-version = "1.87"
|
|
8
8
|
publish = false
|
|
9
9
|
|
|
10
10
|
[lib]
|
|
@@ -13,13 +13,13 @@ crate-type = ["cdylib"]
|
|
|
13
13
|
[dependencies]
|
|
14
14
|
arrow-array = { version = "55", features = ["ffi"] }
|
|
15
15
|
arrow-schema = "55"
|
|
16
|
-
datafusion = { version = "
|
|
16
|
+
datafusion = { version = "48", optional = true }
|
|
17
17
|
futures = "0.3"
|
|
18
|
-
iceberg = "0.
|
|
19
|
-
iceberg-catalog-glue = { version = "0.
|
|
20
|
-
iceberg-catalog-rest = { version = "0.
|
|
21
|
-
iceberg-catalog-sql = { version = "0.
|
|
22
|
-
iceberg-datafusion = { version = "0.
|
|
18
|
+
iceberg = "0.7"
|
|
19
|
+
iceberg-catalog-glue = { version = "0.7", optional = true }
|
|
20
|
+
iceberg-catalog-rest = { version = "0.7", optional = true }
|
|
21
|
+
iceberg-catalog-sql = { version = "0.7", optional = true }
|
|
22
|
+
iceberg-datafusion = { version = "0.7", optional = true }
|
|
23
23
|
magnus = "0.8"
|
|
24
24
|
parquet = "55"
|
|
25
25
|
sqlx = { version = "0.8", features = ["postgres", "runtime-tokio", "sqlite"], default-features = false, optional = true }
|
data/ext/iceberg/src/catalog.rs
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
#[cfg(feature = "datafusion")]
|
|
2
2
|
use datafusion::execution::context::SessionContext;
|
|
3
|
-
use iceberg::io::
|
|
3
|
+
use iceberg::io::FileIO;
|
|
4
|
+
use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
|
|
4
5
|
use iceberg::spec::Schema;
|
|
5
|
-
use iceberg::{Catalog, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
|
|
6
|
+
use iceberg::{Catalog, CatalogBuilder, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
|
|
6
7
|
#[cfg(feature = "glue")]
|
|
7
|
-
use iceberg_catalog_glue::{GlueCatalog,
|
|
8
|
+
use iceberg_catalog_glue::{GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalog, GlueCatalogBuilder};
|
|
8
9
|
#[cfg(feature = "rest")]
|
|
9
|
-
use iceberg_catalog_rest::{
|
|
10
|
+
use iceberg_catalog_rest::{
|
|
11
|
+
REST_CATALOG_PROP_URI, REST_CATALOG_PROP_WAREHOUSE, RestCatalog, RestCatalogBuilder,
|
|
12
|
+
};
|
|
10
13
|
#[cfg(feature = "sql")]
|
|
11
14
|
use iceberg_catalog_sql::{SqlBindStyle, SqlCatalog, SqlCatalogConfig};
|
|
12
15
|
#[cfg(feature = "datafusion")]
|
|
@@ -65,9 +68,9 @@ pub struct RbCatalog {
|
|
|
65
68
|
impl RbCatalog {
|
|
66
69
|
#[cfg(feature = "glue")]
|
|
67
70
|
pub fn new_glue(warehouse: String) -> RbResult<Self> {
|
|
68
|
-
let
|
|
71
|
+
let props = HashMap::from([(GLUE_CATALOG_PROP_WAREHOUSE.to_string(), warehouse)]);
|
|
69
72
|
let catalog = runtime()
|
|
70
|
-
.block_on(
|
|
73
|
+
.block_on(GlueCatalogBuilder::default().load("glue", props))
|
|
71
74
|
.map_err(to_rb_err)?;
|
|
72
75
|
Ok(Self {
|
|
73
76
|
catalog: RbCatalogType::Glue(catalog.into()).into(),
|
|
@@ -75,14 +78,13 @@ impl RbCatalog {
|
|
|
75
78
|
}
|
|
76
79
|
|
|
77
80
|
pub fn new_memory(warehouse: Option<String>) -> RbResult<Self> {
|
|
78
|
-
let
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
let catalog = MemoryCatalog::new(file_io, warehouse);
|
|
81
|
+
let mut props = HashMap::new();
|
|
82
|
+
if let Some(v) = warehouse {
|
|
83
|
+
props.insert(MEMORY_CATALOG_WAREHOUSE.to_string(), v);
|
|
84
|
+
}
|
|
85
|
+
let catalog = runtime()
|
|
86
|
+
.block_on(MemoryCatalogBuilder::default().load("memory", props))
|
|
87
|
+
.map_err(to_rb_err)?;
|
|
86
88
|
Ok(Self {
|
|
87
89
|
catalog: RbCatalogType::Memory(catalog.into()).into(),
|
|
88
90
|
})
|
|
@@ -93,16 +95,18 @@ impl RbCatalog {
|
|
|
93
95
|
uri: String,
|
|
94
96
|
warehouse: Option<String>,
|
|
95
97
|
props: HashMap<String, String>,
|
|
96
|
-
) -> Self {
|
|
97
|
-
let
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
.
|
|
101
|
-
.build();
|
|
102
|
-
let catalog = RestCatalog::new(config);
|
|
103
|
-
Self {
|
|
104
|
-
catalog: RbCatalogType::Rest(catalog.into()).into(),
|
|
98
|
+
) -> RbResult<Self> {
|
|
99
|
+
let mut props = props;
|
|
100
|
+
props.insert(REST_CATALOG_PROP_URI.to_string(), uri);
|
|
101
|
+
if let Some(v) = warehouse {
|
|
102
|
+
props.insert(REST_CATALOG_PROP_WAREHOUSE.to_string(), v);
|
|
105
103
|
}
|
|
104
|
+
let catalog = runtime()
|
|
105
|
+
.block_on(RestCatalogBuilder::default().load("rest", props))
|
|
106
|
+
.map_err(to_rb_err)?;
|
|
107
|
+
Ok(Self {
|
|
108
|
+
catalog: RbCatalogType::Rest(catalog.into()).into(),
|
|
109
|
+
})
|
|
106
110
|
}
|
|
107
111
|
|
|
108
112
|
#[cfg(feature = "sql")]
|
data/ext/iceberg/src/table.rs
CHANGED
data/lib/iceberg/table.rb
CHANGED
|
@@ -83,15 +83,28 @@ module Iceberg
|
|
|
83
83
|
@table.properties
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
-
def
|
|
86
|
+
def scan(snapshot_id: nil)
|
|
87
|
+
TableScan.new(@table.scan(snapshot_id), self)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def to_polars(snapshot_id: nil, storage_options: nil, _schema_changes: false)
|
|
87
91
|
require "polars-df"
|
|
88
92
|
|
|
89
|
-
|
|
93
|
+
# TODO always take this path in 0.2.0
|
|
94
|
+
if _schema_changes
|
|
95
|
+
return Polars.scan_iceberg(self, snapshot_id:, storage_options:)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
scan = scan(snapshot_id:)
|
|
99
|
+
files = scan.plan_files
|
|
100
|
+
|
|
90
101
|
if files.empty?
|
|
102
|
+
snapshot = scan.snapshot
|
|
103
|
+
scan_schema = snapshot ? schema_by_id(snapshot[:schema_id]) : current_schema
|
|
104
|
+
|
|
91
105
|
# TODO improve
|
|
92
106
|
schema =
|
|
93
|
-
|
|
94
|
-
current_schema.fields.to_h do |field|
|
|
107
|
+
scan_schema.fields.to_h do |field|
|
|
95
108
|
dtype =
|
|
96
109
|
case field[:type]
|
|
97
110
|
when "int"
|
|
@@ -122,16 +135,12 @@ module Iceberg
|
|
|
122
135
|
.to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
|
|
123
136
|
]
|
|
124
137
|
|
|
125
|
-
|
|
126
|
-
sources,
|
|
138
|
+
scan_options = {
|
|
127
139
|
storage_options: storage_options,
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# _column_mapping: column_mapping,
|
|
133
|
-
_deletion_files: deletion_files
|
|
134
|
-
)
|
|
140
|
+
_deletion_files: deletion_files,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
Polars.scan_parquet(sources, **scan_options)
|
|
135
144
|
end
|
|
136
145
|
end
|
|
137
146
|
|
data/lib/iceberg/version.rb
CHANGED
data/lib/iceberg.rb
CHANGED
|
@@ -9,6 +9,7 @@ end
|
|
|
9
9
|
require_relative "iceberg/catalog"
|
|
10
10
|
require_relative "iceberg/schema"
|
|
11
11
|
require_relative "iceberg/table"
|
|
12
|
+
require_relative "iceberg/table_scan"
|
|
12
13
|
require_relative "iceberg/static_table"
|
|
13
14
|
require_relative "iceberg/table_definition"
|
|
14
15
|
require_relative "iceberg/version"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iceberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.10.
|
|
4
|
+
version: 0.10.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
@@ -55,6 +55,7 @@ files:
|
|
|
55
55
|
- lib/iceberg/static_table.rb
|
|
56
56
|
- lib/iceberg/table.rb
|
|
57
57
|
- lib/iceberg/table_definition.rb
|
|
58
|
+
- lib/iceberg/table_scan.rb
|
|
58
59
|
- lib/iceberg/version.rb
|
|
59
60
|
homepage: https://github.com/ankane/iceberg-ruby
|
|
60
61
|
licenses:
|