iceberg 0.10.2-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +6494 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +67580 -0
- data/LICENSE.txt +202 -0
- data/NOTICE.txt +14 -0
- data/README.md +211 -0
- data/lib/iceberg/3.2/iceberg.so +0 -0
- data/lib/iceberg/3.3/iceberg.so +0 -0
- data/lib/iceberg/3.4/iceberg.so +0 -0
- data/lib/iceberg/catalog.rb +107 -0
- data/lib/iceberg/glue_catalog.rb +11 -0
- data/lib/iceberg/memory_catalog.rb +8 -0
- data/lib/iceberg/rest_catalog.rb +8 -0
- data/lib/iceberg/schema.rb +10 -0
- data/lib/iceberg/sql_catalog.rb +9 -0
- data/lib/iceberg/static_table.rb +8 -0
- data/lib/iceberg/table.rb +155 -0
- data/lib/iceberg/table_definition.rb +37 -0
- data/lib/iceberg/version.rb +3 -0
- data/lib/iceberg.rb +36 -0
- metadata +66 -0
@@ -0,0 +1,155 @@
|
|
1
|
+
module Iceberg
|
2
|
+
class Table
|
3
|
+
def initialize(table, catalog)
|
4
|
+
@table = table
|
5
|
+
@catalog = catalog
|
6
|
+
end
|
7
|
+
|
8
|
+
def format_version
|
9
|
+
@table.format_version
|
10
|
+
end
|
11
|
+
|
12
|
+
def uuid
|
13
|
+
@table.uuid
|
14
|
+
end
|
15
|
+
|
16
|
+
def location
|
17
|
+
@table.location
|
18
|
+
end
|
19
|
+
|
20
|
+
def last_sequence_number
|
21
|
+
@table.last_sequence_number
|
22
|
+
end
|
23
|
+
|
24
|
+
def next_sequence_number
|
25
|
+
@table.next_sequence_number
|
26
|
+
end
|
27
|
+
|
28
|
+
def last_column_id
|
29
|
+
@table.last_column_id
|
30
|
+
end
|
31
|
+
|
32
|
+
def last_partition_id
|
33
|
+
@table.last_partition_id
|
34
|
+
end
|
35
|
+
|
36
|
+
def schemas
|
37
|
+
@table.schemas
|
38
|
+
end
|
39
|
+
|
40
|
+
def schema_by_id(schema_id)
|
41
|
+
@table.schema_by_id(schema_id)
|
42
|
+
end
|
43
|
+
|
44
|
+
def current_schema
|
45
|
+
@table.current_schema
|
46
|
+
end
|
47
|
+
alias_method :schema, :current_schema
|
48
|
+
|
49
|
+
def current_schema_id
|
50
|
+
@table.current_schema_id
|
51
|
+
end
|
52
|
+
alias_method :schema_id, :current_schema_id
|
53
|
+
|
54
|
+
def default_partition_spec_id
|
55
|
+
@table.default_partition_spec_id
|
56
|
+
end
|
57
|
+
|
58
|
+
def snapshots
|
59
|
+
@table.snapshots
|
60
|
+
end
|
61
|
+
|
62
|
+
def snapshot_by_id(snapshot_id)
|
63
|
+
@table.snapshot_by_id(snapshot_id)
|
64
|
+
end
|
65
|
+
|
66
|
+
def history
|
67
|
+
@table.history
|
68
|
+
end
|
69
|
+
|
70
|
+
def metadata_log
|
71
|
+
@table.metadata_log
|
72
|
+
end
|
73
|
+
|
74
|
+
def current_snapshot
|
75
|
+
@table.current_snapshot
|
76
|
+
end
|
77
|
+
|
78
|
+
def current_snapshot_id
|
79
|
+
@table.current_snapshot_id
|
80
|
+
end
|
81
|
+
|
82
|
+
def properties
|
83
|
+
@table.properties
|
84
|
+
end
|
85
|
+
|
86
|
+
def to_polars(snapshot_id: nil, storage_options: nil)
|
87
|
+
require "polars-df"
|
88
|
+
|
89
|
+
files = @table.scan(snapshot_id).plan_files
|
90
|
+
if files.empty?
|
91
|
+
# TODO improve
|
92
|
+
schema =
|
93
|
+
# TODO use schema from snapshot_id
|
94
|
+
current_schema.fields.to_h do |field|
|
95
|
+
dtype =
|
96
|
+
case field[:type]
|
97
|
+
when "int"
|
98
|
+
Polars::Int32
|
99
|
+
when "long"
|
100
|
+
Polars::Int64
|
101
|
+
when "double"
|
102
|
+
Polars::Float64
|
103
|
+
when "string"
|
104
|
+
Polars::String
|
105
|
+
when "timestamp"
|
106
|
+
Polars::Datetime
|
107
|
+
else
|
108
|
+
raise Todo
|
109
|
+
end
|
110
|
+
|
111
|
+
[field[:name], dtype]
|
112
|
+
end
|
113
|
+
|
114
|
+
Polars::LazyFrame.new(schema: schema)
|
115
|
+
else
|
116
|
+
sources = files.map { |v| v[:data_file_path] }
|
117
|
+
|
118
|
+
deletion_files = [
|
119
|
+
"iceberg-position-delete",
|
120
|
+
files.map.with_index
|
121
|
+
.select { |v, i| v[:deletes].any? }
|
122
|
+
.to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
|
123
|
+
]
|
124
|
+
|
125
|
+
Polars.scan_parquet(
|
126
|
+
sources,
|
127
|
+
storage_options: storage_options,
|
128
|
+
# TODO
|
129
|
+
# cast_options: Polars::ScanCastOptions._default_iceberg,
|
130
|
+
# allow_missing_columns: true,
|
131
|
+
# extra_columns: "ignore",
|
132
|
+
# _column_mapping: column_mapping,
|
133
|
+
_deletion_files: deletion_files
|
134
|
+
)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def append(df)
|
139
|
+
check_catalog
|
140
|
+
@table = @table.append(df.arrow_c_stream, @catalog)
|
141
|
+
nil
|
142
|
+
end
|
143
|
+
|
144
|
+
# hide internal state
|
145
|
+
def inspect
|
146
|
+
to_s
|
147
|
+
end
|
148
|
+
|
149
|
+
private
|
150
|
+
|
151
|
+
def check_catalog
|
152
|
+
raise Error, "Read-only table" if @catalog.nil?
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Iceberg
|
2
|
+
class TableDefinition
|
3
|
+
TYPES = %w[
|
4
|
+
boolean int long float double date timestamp timestamptz string uuid binary
|
5
|
+
]
|
6
|
+
|
7
|
+
TYPE_ALIASES = {
|
8
|
+
"integer" => "int",
|
9
|
+
"bigint" => "long"
|
10
|
+
}
|
11
|
+
|
12
|
+
attr_reader :fields
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@fields = []
|
16
|
+
end
|
17
|
+
|
18
|
+
(TYPES + TYPE_ALIASES.keys).each do |type|
|
19
|
+
define_method type do |name, **options|
|
20
|
+
column(name, type, **options)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def column(name, type, null: true, default: nil, comment: nil)
|
25
|
+
type = type.to_s
|
26
|
+
@fields << {
|
27
|
+
id: @fields.size + 1,
|
28
|
+
name: name.to_s,
|
29
|
+
type: TYPE_ALIASES.fetch(type, type),
|
30
|
+
required: !null,
|
31
|
+
doc: comment,
|
32
|
+
# no need for initial default (and not supported until v3)
|
33
|
+
write_default: default
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/iceberg.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# ext
|
2
|
+
begin
|
3
|
+
require "iceberg/#{RUBY_VERSION.to_f}/iceberg"
|
4
|
+
rescue LoadError
|
5
|
+
require "iceberg/iceberg"
|
6
|
+
end
|
7
|
+
|
8
|
+
# modules
|
9
|
+
require_relative "iceberg/catalog"
|
10
|
+
require_relative "iceberg/schema"
|
11
|
+
require_relative "iceberg/table"
|
12
|
+
require_relative "iceberg/static_table"
|
13
|
+
require_relative "iceberg/table_definition"
|
14
|
+
require_relative "iceberg/version"
|
15
|
+
|
16
|
+
# catalogs
|
17
|
+
require_relative "iceberg/glue_catalog"
|
18
|
+
require_relative "iceberg/memory_catalog"
|
19
|
+
require_relative "iceberg/rest_catalog"
|
20
|
+
require_relative "iceberg/sql_catalog"
|
21
|
+
|
22
|
+
module Iceberg
|
23
|
+
class Error < StandardError; end
|
24
|
+
class InvalidDataError < Error; end
|
25
|
+
class NamespaceAlreadyExistsError < Error; end
|
26
|
+
class NamespaceNotFoundError < Error; end
|
27
|
+
class TableAlreadyExistsError < Error; end
|
28
|
+
class TableNotFoundError < Error; end
|
29
|
+
class UnsupportedFeatureError < Error; end
|
30
|
+
|
31
|
+
class Todo < Error
|
32
|
+
def message
|
33
|
+
"not implemented yet"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iceberg
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.10.2
|
5
|
+
platform: aarch64-linux
|
6
|
+
authors:
|
7
|
+
- Andrew Kane
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-08-28 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description:
|
14
|
+
email: andrew@ankane.org
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- CHANGELOG.md
|
20
|
+
- Cargo.lock
|
21
|
+
- Cargo.toml
|
22
|
+
- LICENSE-THIRD-PARTY.txt
|
23
|
+
- LICENSE.txt
|
24
|
+
- NOTICE.txt
|
25
|
+
- README.md
|
26
|
+
- lib/iceberg.rb
|
27
|
+
- lib/iceberg/3.2/iceberg.so
|
28
|
+
- lib/iceberg/3.3/iceberg.so
|
29
|
+
- lib/iceberg/3.4/iceberg.so
|
30
|
+
- lib/iceberg/catalog.rb
|
31
|
+
- lib/iceberg/glue_catalog.rb
|
32
|
+
- lib/iceberg/memory_catalog.rb
|
33
|
+
- lib/iceberg/rest_catalog.rb
|
34
|
+
- lib/iceberg/schema.rb
|
35
|
+
- lib/iceberg/sql_catalog.rb
|
36
|
+
- lib/iceberg/static_table.rb
|
37
|
+
- lib/iceberg/table.rb
|
38
|
+
- lib/iceberg/table_definition.rb
|
39
|
+
- lib/iceberg/version.rb
|
40
|
+
homepage: https://github.com/ankane/iceberg-ruby
|
41
|
+
licenses:
|
42
|
+
- Apache-2.0
|
43
|
+
metadata: {}
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '3.2'
|
53
|
+
- - "<"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 3.5.dev
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
requirements: []
|
62
|
+
rubygems_version: 3.5.23
|
63
|
+
signing_key:
|
64
|
+
specification_version: 4
|
65
|
+
summary: Apache Iceberg for Ruby
|
66
|
+
test_files: []
|