iceberg 0.10.2-arm64-darwin → 0.11.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  [Apache Iceberg](https://iceberg.apache.org/) for Ruby
4
4
 
5
+ :duck: Also check out [SeaDuck](https://github.com/ankane/seaduck)
6
+
5
7
  [![Build Status](https://github.com/ankane/iceberg-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/iceberg-ruby/actions)
6
8
 
7
9
  ## Installation
@@ -35,7 +37,7 @@ catalog.create_table("main.events") do |t|
35
37
  end
36
38
  ```
37
39
 
38
- Or
40
+ Or with [Polars](https://github.com/ankane/ruby-polars)
39
41
 
40
42
  ```ruby
41
43
  df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
@@ -82,9 +84,7 @@ Iceberg::MemoryCatalog.new(
82
84
  )
83
85
  ```
84
86
 
85
- ## Reference
86
-
87
- ### Namespaces
87
+ ## Namespaces
88
88
 
89
89
  List namespaces
90
90
 
@@ -122,7 +122,7 @@ Drop a namespace
122
122
  catalog.drop_namespace("main")
123
123
  ```
124
124
 
125
- ### Tables
125
+ ## Tables
126
126
 
127
127
  List tables
128
128
 
@@ -169,7 +169,7 @@ Drop a table
169
169
  catalog.drop_table("main.events")
170
170
  ```
171
171
 
172
- ### Static Tables
172
+ ## Static Tables
173
173
 
174
174
  Load a static table
175
175
 
Binary file
Binary file
@@ -7,6 +7,7 @@ module Iceberg
7
7
  def create_namespace(namespace, properties: {}, if_not_exists: nil)
8
8
  @catalog.create_namespace(namespace, properties)
9
9
  rescue Error => e
10
+ # ideally all catalogs would use NamespaceAlreadyExistsError
10
11
  if !if_not_exists || (e.message != "Cannot create namespace" && !e.message.include?("already exists"))
11
12
  raise e
12
13
  end
@@ -28,7 +29,8 @@ module Iceberg
28
29
  def drop_namespace(namespace, if_exists: nil)
29
30
  @catalog.drop_namespace(namespace)
30
31
  rescue Error => e
31
- if !if_exists || (e.message != "Tried to drop a namespace that does not exist" && !e.message.include?("No such namespace"))
32
+ # ideally all catalogs would use NamespaceNotFoundError
33
+ if !if_exists || (e.message != "Tried to drop a namespace that does not exist" && !e.message.include?("No such namespace") && !e.message.include?("The specified namespace does not exist") && !e.message.include?("not found"))
32
34
  raise e
33
35
  end
34
36
  nil
@@ -47,9 +49,9 @@ module Iceberg
47
49
  table_definition = TableDefinition.new
48
50
  yield table_definition
49
51
  schema = Schema.new(table_definition.fields)
50
- elsif schema.is_a?(Hash)
52
+ elsif schema.is_a?(Hash) || (defined?(Polars::Schema) && schema.is_a?(Polars::Schema))
51
53
  fields =
52
- schema.map.with_index do |(k, v), i|
54
+ schema.to_h.map.with_index do |(k, v), i|
53
55
  {
54
56
  id: i + 1,
55
57
  name: k.is_a?(Symbol) ? k.to_s : k,
@@ -72,7 +74,8 @@ module Iceberg
72
74
  def drop_table(table_name, if_exists: nil)
73
75
  @catalog.drop_table(table_name)
74
76
  rescue Error => e
75
- if !if_exists || (e.message != "Tried to drop a table that does not exist" && !e.message.include?("No such table"))
77
+ # ideally all catalogs would use TableNotFoundError
78
+ if !if_exists || (e.message != "Tried to drop a table that does not exist" && !e.message.include?("No such table") && !e.message.include?("The specified table does not exist") && !e.message.include?("not found"))
76
79
  raise e
77
80
  end
78
81
  nil
@@ -92,11 +95,11 @@ module Iceberg
92
95
  @catalog.register_table(table_name, metadata_location)
93
96
  end
94
97
 
95
- def query(sql)
98
+ def sql(sql)
96
99
  # requires datafusion feature
97
- raise Todo unless @catalog.respond_to?(:query)
100
+ raise Todo unless @catalog.respond_to?(:sql)
98
101
 
99
- @catalog.query(sql)
102
+ @catalog.sql(sql)
100
103
  end
101
104
 
102
105
  # hide internal state
@@ -2,9 +2,6 @@ module Iceberg
2
2
  class GlueCatalog < Catalog
3
3
  # warehouse is URI of S3 storage bucket
4
4
  def initialize(warehouse:)
5
- # requires glue feature
6
- raise Error, "Feature not enabled" unless RbCatalog.respond_to?(:new_glue)
7
-
8
5
  @catalog = RbCatalog.new_glue(warehouse)
9
6
  end
10
7
  end
@@ -0,0 +1,7 @@
1
+ module Iceberg
2
+ class S3TablesCatalog < Catalog
3
+ def initialize(arn:)
4
+ @catalog = RbCatalog.new_s3tables(arn)
5
+ end
6
+ end
7
+ end
data/lib/iceberg/table.rb CHANGED
@@ -83,56 +83,18 @@ module Iceberg
83
83
  @table.properties
84
84
  end
85
85
 
86
+ def scan(snapshot_id: nil)
87
+ TableScan.new(@table.scan(snapshot_id), self)
88
+ end
89
+
86
90
  def to_polars(snapshot_id: nil, storage_options: nil)
87
91
  require "polars-df"
88
92
 
89
- files = @table.scan(snapshot_id).plan_files
90
- if files.empty?
91
- # TODO improve
92
- schema =
93
- # TODO use schema from snapshot_id
94
- current_schema.fields.to_h do |field|
95
- dtype =
96
- case field[:type]
97
- when "int"
98
- Polars::Int32
99
- when "long"
100
- Polars::Int64
101
- when "double"
102
- Polars::Float64
103
- when "string"
104
- Polars::String
105
- when "timestamp"
106
- Polars::Datetime
107
- else
108
- raise Todo
109
- end
110
-
111
- [field[:name], dtype]
112
- end
113
-
114
- Polars::LazyFrame.new(schema: schema)
115
- else
116
- sources = files.map { |v| v[:data_file_path] }
117
-
118
- deletion_files = [
119
- "iceberg-position-delete",
120
- files.map.with_index
121
- .select { |v, i| v[:deletes].any? }
122
- .to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
123
- ]
124
-
125
- Polars.scan_parquet(
126
- sources,
127
- storage_options: storage_options,
128
- # TODO
129
- # cast_options: Polars::ScanCastOptions._default_iceberg,
130
- # allow_missing_columns: true,
131
- # extra_columns: "ignore",
132
- # _column_mapping: column_mapping,
133
- _deletion_files: deletion_files
134
- )
93
+ if Gem::Version.new(Polars::VERSION) < Gem::Version.new("0.23")
94
+ raise "Requires polars-df >= 0.23"
135
95
  end
96
+
97
+ Polars.scan_iceberg(self, snapshot_id:, storage_options:)
136
98
  end
137
99
 
138
100
  def append(df)
@@ -0,0 +1,18 @@
1
+ module Iceberg
2
+ class TableScan
3
+ attr_reader :table
4
+
5
+ def initialize(scan, table)
6
+ @scan = scan
7
+ @table = table
8
+ end
9
+
10
+ def plan_files
11
+ @scan.plan_files
12
+ end
13
+
14
+ def snapshot
15
+ @scan.snapshot
16
+ end
17
+ end
18
+ end
@@ -1,3 +1,3 @@
1
1
  module Iceberg
2
- VERSION = "0.10.2"
2
+ VERSION = "0.11.0"
3
3
  end
data/lib/iceberg.rb CHANGED
@@ -9,6 +9,7 @@ end
9
9
  require_relative "iceberg/catalog"
10
10
  require_relative "iceberg/schema"
11
11
  require_relative "iceberg/table"
12
+ require_relative "iceberg/table_scan"
12
13
  require_relative "iceberg/static_table"
13
14
  require_relative "iceberg/table_definition"
14
15
  require_relative "iceberg/version"
@@ -17,6 +18,7 @@ require_relative "iceberg/version"
17
18
  require_relative "iceberg/glue_catalog"
18
19
  require_relative "iceberg/memory_catalog"
19
20
  require_relative "iceberg/rest_catalog"
21
+ require_relative "iceberg/s3_tables_catalog"
20
22
  require_relative "iceberg/sql_catalog"
21
23
 
22
24
  module Iceberg
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iceberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.2
4
+ version: 0.11.0
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-08-28 00:00:00.000000000 Z
11
+ date: 2026-01-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -24,18 +24,20 @@ files:
24
24
  - NOTICE.txt
25
25
  - README.md
26
26
  - lib/iceberg.rb
27
- - lib/iceberg/3.2/iceberg.bundle
28
27
  - lib/iceberg/3.3/iceberg.bundle
29
28
  - lib/iceberg/3.4/iceberg.bundle
29
+ - lib/iceberg/4.0/iceberg.bundle
30
30
  - lib/iceberg/catalog.rb
31
31
  - lib/iceberg/glue_catalog.rb
32
32
  - lib/iceberg/memory_catalog.rb
33
33
  - lib/iceberg/rest_catalog.rb
34
+ - lib/iceberg/s3_tables_catalog.rb
34
35
  - lib/iceberg/schema.rb
35
36
  - lib/iceberg/sql_catalog.rb
36
37
  - lib/iceberg/static_table.rb
37
38
  - lib/iceberg/table.rb
38
39
  - lib/iceberg/table_definition.rb
40
+ - lib/iceberg/table_scan.rb
39
41
  - lib/iceberg/version.rb
40
42
  homepage: https://github.com/ankane/iceberg-ruby
41
43
  licenses:
@@ -49,10 +51,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
49
51
  requirements:
50
52
  - - ">="
51
53
  - !ruby/object:Gem::Version
52
- version: '3.2'
54
+ version: '3.3'
53
55
  - - "<"
54
56
  - !ruby/object:Gem::Version
55
- version: 3.5.dev
57
+ version: 4.1.dev
56
58
  required_rubygems_version: !ruby/object:Gem::Requirement
57
59
  requirements:
58
60
  - - ">="