iceberg 0.10.2-arm64-darwin → 0.11.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +734 -790
- data/LICENSE-THIRD-PARTY.txt +4720 -4637
- data/README.md +6 -6
- data/lib/iceberg/3.3/iceberg.bundle +0 -0
- data/lib/iceberg/3.4/iceberg.bundle +0 -0
- data/lib/iceberg/{3.2 → 4.0}/iceberg.bundle +0 -0
- data/lib/iceberg/catalog.rb +10 -7
- data/lib/iceberg/glue_catalog.rb +0 -3
- data/lib/iceberg/s3_tables_catalog.rb +7 -0
- data/lib/iceberg/table.rb +8 -46
- data/lib/iceberg/table_scan.rb +18 -0
- data/lib/iceberg/version.rb +1 -1
- data/lib/iceberg.rb +2 -0
- metadata +7 -5
data/README.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
[Apache Iceberg](https://iceberg.apache.org/) for Ruby
|
|
4
4
|
|
|
5
|
+
:duck: Also check out [SeaDuck](https://github.com/ankane/seaduck)
|
|
6
|
+
|
|
5
7
|
[](https://github.com/ankane/iceberg-ruby/actions)
|
|
6
8
|
|
|
7
9
|
## Installation
|
|
@@ -35,7 +37,7 @@ catalog.create_table("main.events") do |t|
|
|
|
35
37
|
end
|
|
36
38
|
```
|
|
37
39
|
|
|
38
|
-
Or
|
|
40
|
+
Or with [Polars](https://github.com/ankane/ruby-polars)
|
|
39
41
|
|
|
40
42
|
```ruby
|
|
41
43
|
df = Polars::DataFrame.new({"id" => [1, 2], "value" => [3.0, 4.0]})
|
|
@@ -82,9 +84,7 @@ Iceberg::MemoryCatalog.new(
|
|
|
82
84
|
)
|
|
83
85
|
```
|
|
84
86
|
|
|
85
|
-
##
|
|
86
|
-
|
|
87
|
-
### Namespaces
|
|
87
|
+
## Namespaces
|
|
88
88
|
|
|
89
89
|
List namespaces
|
|
90
90
|
|
|
@@ -122,7 +122,7 @@ Drop a namespace
|
|
|
122
122
|
catalog.drop_namespace("main")
|
|
123
123
|
```
|
|
124
124
|
|
|
125
|
-
|
|
125
|
+
## Tables
|
|
126
126
|
|
|
127
127
|
List tables
|
|
128
128
|
|
|
@@ -169,7 +169,7 @@ Drop a table
|
|
|
169
169
|
catalog.drop_table("main.events")
|
|
170
170
|
```
|
|
171
171
|
|
|
172
|
-
|
|
172
|
+
## Static Tables
|
|
173
173
|
|
|
174
174
|
Load a static table
|
|
175
175
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/iceberg/catalog.rb
CHANGED
|
@@ -7,6 +7,7 @@ module Iceberg
|
|
|
7
7
|
def create_namespace(namespace, properties: {}, if_not_exists: nil)
|
|
8
8
|
@catalog.create_namespace(namespace, properties)
|
|
9
9
|
rescue Error => e
|
|
10
|
+
# ideally all catalogs would use NamespaceAlreadyExistsError
|
|
10
11
|
if !if_not_exists || (e.message != "Cannot create namespace" && !e.message.include?("already exists"))
|
|
11
12
|
raise e
|
|
12
13
|
end
|
|
@@ -28,7 +29,8 @@ module Iceberg
|
|
|
28
29
|
def drop_namespace(namespace, if_exists: nil)
|
|
29
30
|
@catalog.drop_namespace(namespace)
|
|
30
31
|
rescue Error => e
|
|
31
|
-
|
|
32
|
+
# ideally all catalogs would use NamespaceNotFoundError
|
|
33
|
+
if !if_exists || (e.message != "Tried to drop a namespace that does not exist" && !e.message.include?("No such namespace") && !e.message.include?("The specified namespace does not exist") && !e.message.include?("not found"))
|
|
32
34
|
raise e
|
|
33
35
|
end
|
|
34
36
|
nil
|
|
@@ -47,9 +49,9 @@ module Iceberg
|
|
|
47
49
|
table_definition = TableDefinition.new
|
|
48
50
|
yield table_definition
|
|
49
51
|
schema = Schema.new(table_definition.fields)
|
|
50
|
-
elsif schema.is_a?(Hash)
|
|
52
|
+
elsif schema.is_a?(Hash) || (defined?(Polars::Schema) && schema.is_a?(Polars::Schema))
|
|
51
53
|
fields =
|
|
52
|
-
schema.map.with_index do |(k, v), i|
|
|
54
|
+
schema.to_h.map.with_index do |(k, v), i|
|
|
53
55
|
{
|
|
54
56
|
id: i + 1,
|
|
55
57
|
name: k.is_a?(Symbol) ? k.to_s : k,
|
|
@@ -72,7 +74,8 @@ module Iceberg
|
|
|
72
74
|
def drop_table(table_name, if_exists: nil)
|
|
73
75
|
@catalog.drop_table(table_name)
|
|
74
76
|
rescue Error => e
|
|
75
|
-
|
|
77
|
+
# ideally all catalogs would use TableNotFoundError
|
|
78
|
+
if !if_exists || (e.message != "Tried to drop a table that does not exist" && !e.message.include?("No such table") && !e.message.include?("The specified table does not exist") && !e.message.include?("not found"))
|
|
76
79
|
raise e
|
|
77
80
|
end
|
|
78
81
|
nil
|
|
@@ -92,11 +95,11 @@ module Iceberg
|
|
|
92
95
|
@catalog.register_table(table_name, metadata_location)
|
|
93
96
|
end
|
|
94
97
|
|
|
95
|
-
def
|
|
98
|
+
def sql(sql)
|
|
96
99
|
# requires datafusion feature
|
|
97
|
-
raise Todo unless @catalog.respond_to?(:
|
|
100
|
+
raise Todo unless @catalog.respond_to?(:sql)
|
|
98
101
|
|
|
99
|
-
@catalog.
|
|
102
|
+
@catalog.sql(sql)
|
|
100
103
|
end
|
|
101
104
|
|
|
102
105
|
# hide internal state
|
data/lib/iceberg/glue_catalog.rb
CHANGED
|
@@ -2,9 +2,6 @@ module Iceberg
|
|
|
2
2
|
class GlueCatalog < Catalog
|
|
3
3
|
# warehouse is URI of S3 storage bucket
|
|
4
4
|
def initialize(warehouse:)
|
|
5
|
-
# requires glue feature
|
|
6
|
-
raise Error, "Feature not enabled" unless RbCatalog.respond_to?(:new_glue)
|
|
7
|
-
|
|
8
5
|
@catalog = RbCatalog.new_glue(warehouse)
|
|
9
6
|
end
|
|
10
7
|
end
|
data/lib/iceberg/table.rb
CHANGED
|
@@ -83,56 +83,18 @@ module Iceberg
|
|
|
83
83
|
@table.properties
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
+
def scan(snapshot_id: nil)
|
|
87
|
+
TableScan.new(@table.scan(snapshot_id), self)
|
|
88
|
+
end
|
|
89
|
+
|
|
86
90
|
def to_polars(snapshot_id: nil, storage_options: nil)
|
|
87
91
|
require "polars-df"
|
|
88
92
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
# TODO improve
|
|
92
|
-
schema =
|
|
93
|
-
# TODO use schema from snapshot_id
|
|
94
|
-
current_schema.fields.to_h do |field|
|
|
95
|
-
dtype =
|
|
96
|
-
case field[:type]
|
|
97
|
-
when "int"
|
|
98
|
-
Polars::Int32
|
|
99
|
-
when "long"
|
|
100
|
-
Polars::Int64
|
|
101
|
-
when "double"
|
|
102
|
-
Polars::Float64
|
|
103
|
-
when "string"
|
|
104
|
-
Polars::String
|
|
105
|
-
when "timestamp"
|
|
106
|
-
Polars::Datetime
|
|
107
|
-
else
|
|
108
|
-
raise Todo
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
[field[:name], dtype]
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
Polars::LazyFrame.new(schema: schema)
|
|
115
|
-
else
|
|
116
|
-
sources = files.map { |v| v[:data_file_path] }
|
|
117
|
-
|
|
118
|
-
deletion_files = [
|
|
119
|
-
"iceberg-position-delete",
|
|
120
|
-
files.map.with_index
|
|
121
|
-
.select { |v, i| v[:deletes].any? }
|
|
122
|
-
.to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
|
|
123
|
-
]
|
|
124
|
-
|
|
125
|
-
Polars.scan_parquet(
|
|
126
|
-
sources,
|
|
127
|
-
storage_options: storage_options,
|
|
128
|
-
# TODO
|
|
129
|
-
# cast_options: Polars::ScanCastOptions._default_iceberg,
|
|
130
|
-
# allow_missing_columns: true,
|
|
131
|
-
# extra_columns: "ignore",
|
|
132
|
-
# _column_mapping: column_mapping,
|
|
133
|
-
_deletion_files: deletion_files
|
|
134
|
-
)
|
|
93
|
+
if Gem::Version.new(Polars::VERSION) < Gem::Version.new("0.23")
|
|
94
|
+
raise "Requires polars-df >= 0.23"
|
|
135
95
|
end
|
|
96
|
+
|
|
97
|
+
Polars.scan_iceberg(self, snapshot_id:, storage_options:)
|
|
136
98
|
end
|
|
137
99
|
|
|
138
100
|
def append(df)
|
data/lib/iceberg/version.rb
CHANGED
data/lib/iceberg.rb
CHANGED
|
@@ -9,6 +9,7 @@ end
|
|
|
9
9
|
require_relative "iceberg/catalog"
|
|
10
10
|
require_relative "iceberg/schema"
|
|
11
11
|
require_relative "iceberg/table"
|
|
12
|
+
require_relative "iceberg/table_scan"
|
|
12
13
|
require_relative "iceberg/static_table"
|
|
13
14
|
require_relative "iceberg/table_definition"
|
|
14
15
|
require_relative "iceberg/version"
|
|
@@ -17,6 +18,7 @@ require_relative "iceberg/version"
|
|
|
17
18
|
require_relative "iceberg/glue_catalog"
|
|
18
19
|
require_relative "iceberg/memory_catalog"
|
|
19
20
|
require_relative "iceberg/rest_catalog"
|
|
21
|
+
require_relative "iceberg/s3_tables_catalog"
|
|
20
22
|
require_relative "iceberg/sql_catalog"
|
|
21
23
|
|
|
22
24
|
module Iceberg
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iceberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.11.0
|
|
5
5
|
platform: arm64-darwin
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-01-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description:
|
|
14
14
|
email: andrew@ankane.org
|
|
@@ -24,18 +24,20 @@ files:
|
|
|
24
24
|
- NOTICE.txt
|
|
25
25
|
- README.md
|
|
26
26
|
- lib/iceberg.rb
|
|
27
|
-
- lib/iceberg/3.2/iceberg.bundle
|
|
28
27
|
- lib/iceberg/3.3/iceberg.bundle
|
|
29
28
|
- lib/iceberg/3.4/iceberg.bundle
|
|
29
|
+
- lib/iceberg/4.0/iceberg.bundle
|
|
30
30
|
- lib/iceberg/catalog.rb
|
|
31
31
|
- lib/iceberg/glue_catalog.rb
|
|
32
32
|
- lib/iceberg/memory_catalog.rb
|
|
33
33
|
- lib/iceberg/rest_catalog.rb
|
|
34
|
+
- lib/iceberg/s3_tables_catalog.rb
|
|
34
35
|
- lib/iceberg/schema.rb
|
|
35
36
|
- lib/iceberg/sql_catalog.rb
|
|
36
37
|
- lib/iceberg/static_table.rb
|
|
37
38
|
- lib/iceberg/table.rb
|
|
38
39
|
- lib/iceberg/table_definition.rb
|
|
40
|
+
- lib/iceberg/table_scan.rb
|
|
39
41
|
- lib/iceberg/version.rb
|
|
40
42
|
homepage: https://github.com/ankane/iceberg-ruby
|
|
41
43
|
licenses:
|
|
@@ -49,10 +51,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
49
51
|
requirements:
|
|
50
52
|
- - ">="
|
|
51
53
|
- !ruby/object:Gem::Version
|
|
52
|
-
version: '3.
|
|
54
|
+
version: '3.3'
|
|
53
55
|
- - "<"
|
|
54
56
|
- !ruby/object:Gem::Version
|
|
55
|
-
version:
|
|
57
|
+
version: 4.1.dev
|
|
56
58
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
57
59
|
requirements:
|
|
58
60
|
- - ">="
|