iceberg 0.10.2 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -82,9 +82,7 @@ Iceberg::MemoryCatalog.new(
82
82
  )
83
83
  ```
84
84
 
85
- ## Reference
86
-
87
- ### Namespaces
85
+ ## Namespaces
88
86
 
89
87
  List namespaces
90
88
 
@@ -122,7 +120,7 @@ Drop a namespace
122
120
  catalog.drop_namespace("main")
123
121
  ```
124
122
 
125
- ### Tables
123
+ ## Tables
126
124
 
127
125
  List tables
128
126
 
@@ -169,7 +167,7 @@ Drop a table
169
167
  catalog.drop_table("main.events")
170
168
  ```
171
169
 
172
- ### Static Tables
170
+ ## Static Tables
173
171
 
174
172
  Load a static table
175
173
 
@@ -1,10 +1,10 @@
1
1
  [package]
2
2
  name = "iceberg"
3
- version = "0.10.2"
3
+ version = "0.10.3"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2024"
7
- rust-version = "1.86"
7
+ rust-version = "1.87"
8
8
  publish = false
9
9
 
10
10
  [lib]
@@ -13,13 +13,13 @@ crate-type = ["cdylib"]
13
13
  [dependencies]
14
14
  arrow-array = { version = "55", features = ["ffi"] }
15
15
  arrow-schema = "55"
16
- datafusion = { version = "47", optional = true }
16
+ datafusion = { version = "48", optional = true }
17
17
  futures = "0.3"
18
- iceberg = "0.6"
19
- iceberg-catalog-glue = { version = "0.6", optional = true }
20
- iceberg-catalog-rest = { version = "0.6", optional = true }
21
- iceberg-catalog-sql = { version = "0.6", optional = true }
22
- iceberg-datafusion = { version = "0.6", optional = true }
18
+ iceberg = "0.7"
19
+ iceberg-catalog-glue = { version = "0.7", optional = true }
20
+ iceberg-catalog-rest = { version = "0.7", optional = true }
21
+ iceberg-catalog-sql = { version = "0.7", optional = true }
22
+ iceberg-datafusion = { version = "0.7", optional = true }
23
23
  magnus = "0.8"
24
24
  parquet = "55"
25
25
  sqlx = { version = "0.8", features = ["postgres", "runtime-tokio", "sqlite"], default-features = false, optional = true }
@@ -1,12 +1,15 @@
1
1
  #[cfg(feature = "datafusion")]
2
2
  use datafusion::execution::context::SessionContext;
3
- use iceberg::io::{FileIO, FileIOBuilder};
3
+ use iceberg::io::FileIO;
4
+ use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
4
5
  use iceberg::spec::Schema;
5
- use iceberg::{Catalog, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
6
+ use iceberg::{Catalog, CatalogBuilder, MemoryCatalog, NamespaceIdent, TableCreation, TableIdent};
6
7
  #[cfg(feature = "glue")]
7
- use iceberg_catalog_glue::{GlueCatalog, GlueCatalogConfig};
8
+ use iceberg_catalog_glue::{GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalog, GlueCatalogBuilder};
8
9
  #[cfg(feature = "rest")]
9
- use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig};
10
+ use iceberg_catalog_rest::{
11
+ REST_CATALOG_PROP_URI, REST_CATALOG_PROP_WAREHOUSE, RestCatalog, RestCatalogBuilder,
12
+ };
10
13
  #[cfg(feature = "sql")]
11
14
  use iceberg_catalog_sql::{SqlBindStyle, SqlCatalog, SqlCatalogConfig};
12
15
  #[cfg(feature = "datafusion")]
@@ -65,9 +68,9 @@ pub struct RbCatalog {
65
68
  impl RbCatalog {
66
69
  #[cfg(feature = "glue")]
67
70
  pub fn new_glue(warehouse: String) -> RbResult<Self> {
68
- let config = GlueCatalogConfig::builder().warehouse(warehouse).build();
71
+ let props = HashMap::from([(GLUE_CATALOG_PROP_WAREHOUSE.to_string(), warehouse)]);
69
72
  let catalog = runtime()
70
- .block_on(GlueCatalog::new(config))
73
+ .block_on(GlueCatalogBuilder::default().load("glue", props))
71
74
  .map_err(to_rb_err)?;
72
75
  Ok(Self {
73
76
  catalog: RbCatalogType::Glue(catalog.into()).into(),
@@ -75,14 +78,13 @@ impl RbCatalog {
75
78
  }
76
79
 
77
80
  pub fn new_memory(warehouse: Option<String>) -> RbResult<Self> {
78
- let file_io = match warehouse {
79
- Some(ref v) => FileIO::from_path(v)
80
- .map_err(to_rb_err)?
81
- .build()
82
- .map_err(to_rb_err)?,
83
- None => FileIOBuilder::new_fs_io().build().map_err(to_rb_err)?,
84
- };
85
- let catalog = MemoryCatalog::new(file_io, warehouse);
81
+ let mut props = HashMap::new();
82
+ if let Some(v) = warehouse {
83
+ props.insert(MEMORY_CATALOG_WAREHOUSE.to_string(), v);
84
+ }
85
+ let catalog = runtime()
86
+ .block_on(MemoryCatalogBuilder::default().load("memory", props))
87
+ .map_err(to_rb_err)?;
86
88
  Ok(Self {
87
89
  catalog: RbCatalogType::Memory(catalog.into()).into(),
88
90
  })
@@ -93,16 +95,18 @@ impl RbCatalog {
93
95
  uri: String,
94
96
  warehouse: Option<String>,
95
97
  props: HashMap<String, String>,
96
- ) -> Self {
97
- let config = RestCatalogConfig::builder()
98
- .uri(uri)
99
- .warehouse_opt(warehouse)
100
- .props(props)
101
- .build();
102
- let catalog = RestCatalog::new(config);
103
- Self {
104
- catalog: RbCatalogType::Rest(catalog.into()).into(),
98
+ ) -> RbResult<Self> {
99
+ let mut props = props;
100
+ props.insert(REST_CATALOG_PROP_URI.to_string(), uri);
101
+ if let Some(v) = warehouse {
102
+ props.insert(REST_CATALOG_PROP_WAREHOUSE.to_string(), v);
105
103
  }
104
+ let catalog = runtime()
105
+ .block_on(RestCatalogBuilder::default().load("rest", props))
106
+ .map_err(to_rb_err)?;
107
+ Ok(Self {
108
+ catalog: RbCatalogType::Rest(catalog.into()).into(),
109
+ })
106
110
  }
107
111
 
108
112
  #[cfg(feature = "sql")]
@@ -72,6 +72,7 @@ impl RbTable {
72
72
  let parquet_writer_builder = ParquetWriterBuilder::new(
73
73
  WriterProperties::default(),
74
74
  table.metadata().current_schema().clone(),
75
+ None,
75
76
  table.file_io().clone(),
76
77
  location_generator.clone(),
77
78
  file_name_generator.clone(),
data/lib/iceberg/table.rb CHANGED
@@ -83,15 +83,28 @@ module Iceberg
83
83
  @table.properties
84
84
  end
85
85
 
86
- def to_polars(snapshot_id: nil, storage_options: nil)
86
+ def scan(snapshot_id: nil)
87
+ TableScan.new(@table.scan(snapshot_id), self)
88
+ end
89
+
90
+ def to_polars(snapshot_id: nil, storage_options: nil, _schema_changes: false)
87
91
  require "polars-df"
88
92
 
89
- files = @table.scan(snapshot_id).plan_files
93
+ # TODO always take this path in 0.2.0
94
+ if _schema_changes
95
+ return Polars.scan_iceberg(self, snapshot_id:, storage_options:)
96
+ end
97
+
98
+ scan = scan(snapshot_id:)
99
+ files = scan.plan_files
100
+
90
101
  if files.empty?
102
+ snapshot = scan.snapshot
103
+ scan_schema = snapshot ? schema_by_id(snapshot[:schema_id]) : current_schema
104
+
91
105
  # TODO improve
92
106
  schema =
93
- # TODO use schema from snapshot_id
94
- current_schema.fields.to_h do |field|
107
+ scan_schema.fields.to_h do |field|
95
108
  dtype =
96
109
  case field[:type]
97
110
  when "int"
@@ -122,16 +135,12 @@ module Iceberg
122
135
  .to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
123
136
  ]
124
137
 
125
- Polars.scan_parquet(
126
- sources,
138
+ scan_options = {
127
139
  storage_options: storage_options,
128
- # TODO
129
- # cast_options: Polars::ScanCastOptions._default_iceberg,
130
- # allow_missing_columns: true,
131
- # extra_columns: "ignore",
132
- # _column_mapping: column_mapping,
133
- _deletion_files: deletion_files
134
- )
140
+ _deletion_files: deletion_files,
141
+ }
142
+
143
+ Polars.scan_parquet(sources, **scan_options)
135
144
  end
136
145
  end
137
146
 
@@ -0,0 +1,18 @@
1
+ module Iceberg
2
+ class TableScan
3
+ attr_reader :table
4
+
5
+ def initialize(scan, table)
6
+ @scan = scan
7
+ @table = table
8
+ end
9
+
10
+ def plan_files
11
+ @scan.plan_files
12
+ end
13
+
14
+ def snapshot
15
+ @scan.snapshot
16
+ end
17
+ end
18
+ end
@@ -1,3 +1,3 @@
1
1
  module Iceberg
2
- VERSION = "0.10.2"
2
+ VERSION = "0.10.3"
3
3
  end
data/lib/iceberg.rb CHANGED
@@ -9,6 +9,7 @@ end
9
9
  require_relative "iceberg/catalog"
10
10
  require_relative "iceberg/schema"
11
11
  require_relative "iceberg/table"
12
+ require_relative "iceberg/table_scan"
12
13
  require_relative "iceberg/static_table"
13
14
  require_relative "iceberg/table_definition"
14
15
  require_relative "iceberg/version"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iceberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.2
4
+ version: 0.10.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -55,6 +55,7 @@ files:
55
55
  - lib/iceberg/static_table.rb
56
56
  - lib/iceberg/table.rb
57
57
  - lib/iceberg/table_definition.rb
58
+ - lib/iceberg/table_scan.rb
58
59
  - lib/iceberg/version.rb
59
60
  homepage: https://github.com/ankane/iceberg-ruby
60
61
  licenses: