iceberg 0.10.3-x86_64-linux → 0.11.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Cargo.lock +636 -548
- data/LICENSE-THIRD-PARTY.txt +1244 -1275
- data/README.md +3 -1
- data/lib/iceberg/3.3/iceberg.so +0 -0
- data/lib/iceberg/3.4/iceberg.so +0 -0
- data/lib/iceberg/{3.2 → 4.0}/iceberg.so +0 -0
- data/lib/iceberg/catalog.rb +10 -7
- data/lib/iceberg/glue_catalog.rb +0 -3
- data/lib/iceberg/s3_tables_catalog.rb +7 -0
- data/lib/iceberg/table.rb +4 -51
- data/lib/iceberg/version.rb +1 -1
- data/lib/iceberg.rb +1 -0
- metadata +6 -5
data/README.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
[Apache Iceberg](https://iceberg.apache.org/) for Ruby
|
|
4
4
|
|
|
5
|
+
:duck: Also check out [SeaDuck](https://github.com/ankane/seaduck)
|
|
6
|
+
|
|
5
7
|
[](https://github.com/ankane/iceberg-ruby/actions)
|
|
6
8
|
|
|
7
9
|
## Installation
|
|
@@ -35,7 +37,7 @@ catalog.create_table("main.events") do |t|
|
|
|
35
37
|
end
|
|
36
38
|
```
|
|
37
39
|
|
|
38
|
-
Or
|
|
40
|
+
Or with [Polars](https://github.com/ankane/ruby-polars)
|
|
39
41
|
|
|
40
42
|
```ruby
|
|
41
43
|
df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
|
data/lib/iceberg/3.3/iceberg.so
CHANGED
|
Binary file
|
data/lib/iceberg/3.4/iceberg.so
CHANGED
|
Binary file
|
|
Binary file
|
data/lib/iceberg/catalog.rb
CHANGED
|
@@ -7,6 +7,7 @@ module Iceberg
|
|
|
7
7
|
def create_namespace(namespace, properties: {}, if_not_exists: nil)
|
|
8
8
|
@catalog.create_namespace(namespace, properties)
|
|
9
9
|
rescue Error => e
|
|
10
|
+
# ideally all catalogs would use NamespaceAlreadyExistsError
|
|
10
11
|
if !if_not_exists || (e.message != "Cannot create namespace" && !e.message.include?("already exists"))
|
|
11
12
|
raise e
|
|
12
13
|
end
|
|
@@ -28,7 +29,8 @@ module Iceberg
|
|
|
28
29
|
def drop_namespace(namespace, if_exists: nil)
|
|
29
30
|
@catalog.drop_namespace(namespace)
|
|
30
31
|
rescue Error => e
|
|
31
|
-
|
|
32
|
+
# ideally all catalogs would use NamespaceNotFoundError
|
|
33
|
+
if !if_exists || (e.message != "Tried to drop a namespace that does not exist" && !e.message.include?("No such namespace") && !e.message.include?("The specified namespace does not exist") && !e.message.include?("not found"))
|
|
32
34
|
raise e
|
|
33
35
|
end
|
|
34
36
|
nil
|
|
@@ -47,9 +49,9 @@ module Iceberg
|
|
|
47
49
|
table_definition = TableDefinition.new
|
|
48
50
|
yield table_definition
|
|
49
51
|
schema = Schema.new(table_definition.fields)
|
|
50
|
-
elsif schema.is_a?(Hash)
|
|
52
|
+
elsif schema.is_a?(Hash) || (defined?(Polars::Schema) && schema.is_a?(Polars::Schema))
|
|
51
53
|
fields =
|
|
52
|
-
schema.map.with_index do |(k, v), i|
|
|
54
|
+
schema.to_h.map.with_index do |(k, v), i|
|
|
53
55
|
{
|
|
54
56
|
id: i + 1,
|
|
55
57
|
name: k.is_a?(Symbol) ? k.to_s : k,
|
|
@@ -72,7 +74,8 @@ module Iceberg
|
|
|
72
74
|
def drop_table(table_name, if_exists: nil)
|
|
73
75
|
@catalog.drop_table(table_name)
|
|
74
76
|
rescue Error => e
|
|
75
|
-
|
|
77
|
+
# ideally all catalogs would use TableNotFoundError
|
|
78
|
+
if !if_exists || (e.message != "Tried to drop a table that does not exist" && !e.message.include?("No such table") && !e.message.include?("The specified table does not exist") && !e.message.include?("not found"))
|
|
76
79
|
raise e
|
|
77
80
|
end
|
|
78
81
|
nil
|
|
@@ -92,11 +95,11 @@ module Iceberg
|
|
|
92
95
|
@catalog.register_table(table_name, metadata_location)
|
|
93
96
|
end
|
|
94
97
|
|
|
95
|
-
def
|
|
98
|
+
def sql(sql)
|
|
96
99
|
# requires datafusion feature
|
|
97
|
-
raise Todo unless @catalog.respond_to?(:
|
|
100
|
+
raise Todo unless @catalog.respond_to?(:sql)
|
|
98
101
|
|
|
99
|
-
@catalog.
|
|
102
|
+
@catalog.sql(sql)
|
|
100
103
|
end
|
|
101
104
|
|
|
102
105
|
# hide internal state
|
data/lib/iceberg/glue_catalog.rb
CHANGED
|
@@ -2,9 +2,6 @@ module Iceberg
|
|
|
2
2
|
class GlueCatalog < Catalog
|
|
3
3
|
# warehouse is URI of S3 storage bucket
|
|
4
4
|
def initialize(warehouse:)
|
|
5
|
-
# requires glue feature
|
|
6
|
-
raise Error, "Feature not enabled" unless RbCatalog.respond_to?(:new_glue)
|
|
7
|
-
|
|
8
5
|
@catalog = RbCatalog.new_glue(warehouse)
|
|
9
6
|
end
|
|
10
7
|
end
|
data/lib/iceberg/table.rb
CHANGED
|
@@ -87,61 +87,14 @@ module Iceberg
|
|
|
87
87
|
TableScan.new(@table.scan(snapshot_id), self)
|
|
88
88
|
end
|
|
89
89
|
|
|
90
|
-
def to_polars(snapshot_id: nil, storage_options: nil
|
|
90
|
+
def to_polars(snapshot_id: nil, storage_options: nil)
|
|
91
91
|
require "polars-df"
|
|
92
92
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
return Polars.scan_iceberg(self, snapshot_id:, storage_options:)
|
|
93
|
+
if Gem::Version.new(Polars::VERSION) < Gem::Version.new("0.23")
|
|
94
|
+
raise "Requires polars-df >= 0.23"
|
|
96
95
|
end
|
|
97
96
|
|
|
98
|
-
|
|
99
|
-
files = scan.plan_files
|
|
100
|
-
|
|
101
|
-
if files.empty?
|
|
102
|
-
snapshot = scan.snapshot
|
|
103
|
-
scan_schema = snapshot ? schema_by_id(snapshot[:schema_id]) : current_schema
|
|
104
|
-
|
|
105
|
-
# TODO improve
|
|
106
|
-
schema =
|
|
107
|
-
scan_schema.fields.to_h do |field|
|
|
108
|
-
dtype =
|
|
109
|
-
case field[:type]
|
|
110
|
-
when "int"
|
|
111
|
-
Polars::Int32
|
|
112
|
-
when "long"
|
|
113
|
-
Polars::Int64
|
|
114
|
-
when "double"
|
|
115
|
-
Polars::Float64
|
|
116
|
-
when "string"
|
|
117
|
-
Polars::String
|
|
118
|
-
when "timestamp"
|
|
119
|
-
Polars::Datetime
|
|
120
|
-
else
|
|
121
|
-
raise Todo
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
[field[:name], dtype]
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
Polars::LazyFrame.new(schema: schema)
|
|
128
|
-
else
|
|
129
|
-
sources = files.map { |v| v[:data_file_path] }
|
|
130
|
-
|
|
131
|
-
deletion_files = [
|
|
132
|
-
"iceberg-position-delete",
|
|
133
|
-
files.map.with_index
|
|
134
|
-
.select { |v, i| v[:deletes].any? }
|
|
135
|
-
.to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
|
|
136
|
-
]
|
|
137
|
-
|
|
138
|
-
scan_options = {
|
|
139
|
-
storage_options: storage_options,
|
|
140
|
-
_deletion_files: deletion_files,
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
Polars.scan_parquet(sources, **scan_options)
|
|
144
|
-
end
|
|
97
|
+
Polars.scan_iceberg(self, snapshot_id:, storage_options:)
|
|
145
98
|
end
|
|
146
99
|
|
|
147
100
|
def append(df)
|
data/lib/iceberg/version.rb
CHANGED
data/lib/iceberg.rb
CHANGED
|
@@ -18,6 +18,7 @@ require_relative "iceberg/version"
|
|
|
18
18
|
require_relative "iceberg/glue_catalog"
|
|
19
19
|
require_relative "iceberg/memory_catalog"
|
|
20
20
|
require_relative "iceberg/rest_catalog"
|
|
21
|
+
require_relative "iceberg/s3_tables_catalog"
|
|
21
22
|
require_relative "iceberg/sql_catalog"
|
|
22
23
|
|
|
23
24
|
module Iceberg
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iceberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.11.0
|
|
5
5
|
platform: x86_64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-01-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description:
|
|
14
14
|
email: andrew@ankane.org
|
|
@@ -24,13 +24,14 @@ files:
|
|
|
24
24
|
- NOTICE.txt
|
|
25
25
|
- README.md
|
|
26
26
|
- lib/iceberg.rb
|
|
27
|
-
- lib/iceberg/3.2/iceberg.so
|
|
28
27
|
- lib/iceberg/3.3/iceberg.so
|
|
29
28
|
- lib/iceberg/3.4/iceberg.so
|
|
29
|
+
- lib/iceberg/4.0/iceberg.so
|
|
30
30
|
- lib/iceberg/catalog.rb
|
|
31
31
|
- lib/iceberg/glue_catalog.rb
|
|
32
32
|
- lib/iceberg/memory_catalog.rb
|
|
33
33
|
- lib/iceberg/rest_catalog.rb
|
|
34
|
+
- lib/iceberg/s3_tables_catalog.rb
|
|
34
35
|
- lib/iceberg/schema.rb
|
|
35
36
|
- lib/iceberg/sql_catalog.rb
|
|
36
37
|
- lib/iceberg/static_table.rb
|
|
@@ -50,10 +51,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
50
51
|
requirements:
|
|
51
52
|
- - ">="
|
|
52
53
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: '3.
|
|
54
|
+
version: '3.3'
|
|
54
55
|
- - "<"
|
|
55
56
|
- !ruby/object:Gem::Version
|
|
56
|
-
version:
|
|
57
|
+
version: 4.1.dev
|
|
57
58
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
59
|
requirements:
|
|
59
60
|
- - ">="
|