iceberg 0.7.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,107 @@
1
+ module Iceberg
2
+ class Catalog
3
+ def list_namespaces(parent = nil)
4
+ @catalog.list_namespaces(parent)
5
+ end
6
+
7
+ def create_namespace(namespace, properties: {}, if_not_exists: nil)
8
+ @catalog.create_namespace(namespace, properties)
9
+ rescue Error => e
10
+ if !if_not_exists || (e.message != "Cannot create namespace" && !e.message.include?("already exists"))
11
+ raise e
12
+ end
13
+ nil
14
+ end
15
+
16
+ def namespace_exists?(namespace)
17
+ @catalog.namespace_exists?(namespace)
18
+ end
19
+
20
+ def namespace_properties(namespace)
21
+ @catalog.namespace_properties(namespace)
22
+ end
23
+
24
+ def update_namespace(namespace, properties:)
25
+ @catalog.update_namespace(namespace, properties)
26
+ end
27
+
28
+ def drop_namespace(namespace, if_exists: nil)
29
+ @catalog.drop_namespace(namespace)
30
+ rescue Error => e
31
+ if !if_exists || (e.message != "Tried to drop a namespace that does not exist" && !e.message.include?("No such namespace"))
32
+ raise e
33
+ end
34
+ nil
35
+ end
36
+
37
+ def list_tables(namespace)
38
+ @catalog.list_tables(namespace)
39
+ end
40
+
41
+ def create_table(table_name, schema: nil, location: nil)
42
+ if !schema.nil? && block_given?
43
+ raise ArgumentError, "Must pass schema or block"
44
+ end
45
+
46
+ if block_given?
47
+ table_definition = TableDefinition.new
48
+ yield table_definition
49
+ schema = Schema.new(table_definition.fields)
50
+ elsif schema.is_a?(Hash)
51
+ fields =
52
+ schema.map.with_index do |(k, v), i|
53
+ {
54
+ id: i + 1,
55
+ name: k.is_a?(Symbol) ? k.to_s : k,
56
+ type: v,
57
+ required: false
58
+ }
59
+ end
60
+ schema = Schema.new(fields)
61
+ elsif schema.nil?
62
+ schema = Schema.new([])
63
+ end
64
+
65
+ Table.new(@catalog.create_table(table_name, schema, location), @catalog)
66
+ end
67
+
68
+ def load_table(table_name)
69
+ Table.new(@catalog.load_table(table_name), @catalog)
70
+ end
71
+
72
+ def drop_table(table_name, if_exists: nil)
73
+ @catalog.drop_table(table_name)
74
+ rescue Error => e
75
+ if !if_exists || (e.message != "Tried to drop a table that does not exist" && !e.message.include?("No such table"))
76
+ raise e
77
+ end
78
+ nil
79
+ end
80
+
81
+ def table_exists?(table_name)
82
+ @catalog.table_exists?(table_name)
83
+ rescue NamespaceNotFoundError
84
+ false
85
+ end
86
+
87
+ def rename_table(table_name, new_name)
88
+ @catalog.rename_table(table_name, new_name)
89
+ end
90
+
91
+ def register_table(table_name, metadata_location)
92
+ @catalog.register_table(table_name, metadata_location)
93
+ end
94
+
95
+ def query(sql)
96
+ # requires datafusion feature
97
+ raise Todo unless @catalog.respond_to?(:query)
98
+
99
+ @catalog.query(sql)
100
+ end
101
+
102
+ # hide internal state
103
+ def inspect
104
+ to_s
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,11 @@
1
+ module Iceberg
2
+ class GlueCatalog < Catalog
3
+ # warehouse is URI of S3 storage bucket
4
+ def initialize(warehouse:)
5
+ # requires glue feature
6
+ raise Error, "Feature not enabled" unless RbCatalog.respond_to?(:new_glue)
7
+
8
+ @catalog = RbCatalog.new_glue(warehouse)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,8 @@
1
+ module Iceberg
2
+ class MemoryCatalog < Catalog
3
+ # warehouse is default storage location
4
+ def initialize(warehouse: nil)
5
+ @catalog = RbCatalog.new_memory(warehouse)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Iceberg
2
+ class RestCatalog < Catalog
3
+ # warehouse is passed to REST server
4
+ def initialize(uri:, warehouse: nil, properties: {})
5
+ @catalog = RbCatalog.new_rest(uri, warehouse, properties)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,10 @@
1
+ module Iceberg
2
+ class Schema
3
+ attr_reader :fields, :schema_id
4
+
5
+ def initialize(fields, schema_id: nil)
6
+ @fields = fields
7
+ @schema_id = schema_id
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,9 @@
1
+ module Iceberg
2
+ class SqlCatalog < Catalog
3
+ # warehouse is default storage location
4
+ # name is stored in SQL table
5
+ def initialize(uri:, warehouse:, name: "main", properties: {})
6
+ @catalog = RbCatalog.new_sql(uri, warehouse, name, properties)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,8 @@
1
+ module Iceberg
2
+ class StaticTable < Table
3
+ def initialize(metadata_location)
4
+ table = RbTable.from_metadata_file(metadata_location)
5
+ super(table, nil)
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,154 @@
1
+ module Iceberg
2
+ class Table
3
+ def initialize(table, catalog)
4
+ @table = table
5
+ @catalog = catalog
6
+ end
7
+
8
+ def format_version
9
+ @table.format_version
10
+ end
11
+
12
+ def uuid
13
+ @table.uuid
14
+ end
15
+
16
+ def location
17
+ @table.location
18
+ end
19
+
20
+ def last_sequence_number
21
+ @table.last_sequence_number
22
+ end
23
+
24
+ def next_sequence_number
25
+ @table.next_sequence_number
26
+ end
27
+
28
+ def last_column_id
29
+ @table.last_column_id
30
+ end
31
+
32
+ def last_partition_id
33
+ @table.last_partition_id
34
+ end
35
+
36
+ def schemas
37
+ @table.schemas
38
+ end
39
+
40
+ def schema_by_id(schema_id)
41
+ @table.schema_by_id(schema_id)
42
+ end
43
+
44
+ def current_schema
45
+ @table.current_schema
46
+ end
47
+ alias_method :schema, :current_schema
48
+
49
+ def current_schema_id
50
+ @table.current_schema_id
51
+ end
52
+ alias_method :schema_id, :current_schema_id
53
+
54
+ def default_partition_spec_id
55
+ @table.default_partition_spec_id
56
+ end
57
+
58
+ def snapshots
59
+ @table.snapshots
60
+ end
61
+
62
+ def snapshot_by_id(snapshot_id)
63
+ @table.snapshot_by_id(snapshot_id)
64
+ end
65
+
66
+ def history
67
+ @table.history
68
+ end
69
+
70
+ def metadata_log
71
+ @table.metadata_log
72
+ end
73
+
74
+ def current_snapshot
75
+ @table.current_snapshot
76
+ end
77
+
78
+ def current_snapshot_id
79
+ @table.current_snapshot_id
80
+ end
81
+
82
+ def properties
83
+ @table.properties
84
+ end
85
+
86
+ def to_polars(snapshot_id: nil, storage_options: nil)
87
+ require "polars-df"
88
+
89
+ files = @table.scan(snapshot_id).plan_files
90
+ if files.empty?
91
+ # TODO improve
92
+ schema =
93
+ current_schema.fields.to_h do |field|
94
+ dtype =
95
+ case field[:type]
96
+ when "int"
97
+ Polars::Int32
98
+ when "long"
99
+ Polars::Int64
100
+ when "double"
101
+ Polars::Float64
102
+ when "string"
103
+ Polars::String
104
+ when "timestamp"
105
+ Polars::Datetime
106
+ else
107
+ raise Todo
108
+ end
109
+
110
+ [field[:name], dtype]
111
+ end
112
+
113
+ Polars::LazyFrame.new(schema: schema)
114
+ else
115
+ sources = files.map { |v| v[:data_file_path] }
116
+
117
+ deletion_files = [
118
+ "iceberg-position-delete",
119
+ files.map.with_index
120
+ .select { |v, i| v[:deletes].any? }
121
+ .to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
122
+ ]
123
+
124
+ Polars.scan_parquet(
125
+ sources,
126
+ storage_options: storage_options,
127
+ # TODO
128
+ # cast_options: Polars::ScanCastOptions._default_iceberg,
129
+ # allow_missing_columns: true,
130
+ # extra_columns: "ignore",
131
+ # _column_mapping: column_mapping,
132
+ _deletion_files: deletion_files
133
+ )
134
+ end
135
+ end
136
+
137
+ def append(df)
138
+ check_catalog
139
+ @table = @table.append(df.arrow_c_stream, @catalog)
140
+ nil
141
+ end
142
+
143
+ # hide internal state
144
+ def inspect
145
+ to_s
146
+ end
147
+
148
+ private
149
+
150
+ def check_catalog
151
+ raise Error, "Read-only table" if @catalog.nil?
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,37 @@
1
+ module Iceberg
2
+ class TableDefinition
3
+ TYPES = %w[
4
+ boolean int long float double date timestamp timestamptz string uuid binary
5
+ ]
6
+
7
+ TYPE_ALIASES = {
8
+ "integer" => "int",
9
+ "bigint" => "long"
10
+ }
11
+
12
+ attr_reader :fields
13
+
14
+ def initialize
15
+ @fields = []
16
+ end
17
+
18
+ (TYPES + TYPE_ALIASES.keys).each do |type|
19
+ define_method type do |name, **options|
20
+ column(name, type, **options)
21
+ end
22
+ end
23
+
24
+ def column(name, type, null: true, default: nil, comment: nil)
25
+ type = type.to_s
26
+ @fields << {
27
+ id: @fields.size + 1,
28
+ name: name.to_s,
29
+ type: TYPE_ALIASES.fetch(type, type),
30
+ required: !null,
31
+ doc: comment,
32
+ # no need for initial default (and not supported until v3)
33
+ write_default: default
34
+ }
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,3 @@
1
+ module Iceberg
2
+ VERSION = "0.10.1"
3
+ end
data/lib/iceberg.rb CHANGED
@@ -0,0 +1,36 @@
1
+ # ext
2
+ begin
3
+ require "iceberg/#{RUBY_VERSION.to_f}/iceberg"
4
+ rescue LoadError
5
+ require "iceberg/iceberg"
6
+ end
7
+
8
+ # modules
9
+ require_relative "iceberg/catalog"
10
+ require_relative "iceberg/schema"
11
+ require_relative "iceberg/table"
12
+ require_relative "iceberg/static_table"
13
+ require_relative "iceberg/table_definition"
14
+ require_relative "iceberg/version"
15
+
16
+ # catalogs
17
+ require_relative "iceberg/glue_catalog"
18
+ require_relative "iceberg/memory_catalog"
19
+ require_relative "iceberg/rest_catalog"
20
+ require_relative "iceberg/sql_catalog"
21
+
22
+ module Iceberg
23
+ class Error < StandardError; end
24
+ class InvalidDataError < Error; end
25
+ class NamespaceAlreadyExistsError < Error; end
26
+ class NamespaceNotFoundError < Error; end
27
+ class TableAlreadyExistsError < Error; end
28
+ class TableNotFoundError < Error; end
29
+ class UnsupportedFeatureError < Error; end
30
+
31
+ class Todo < Error
32
+ def message
33
+ "not implemented yet"
34
+ end
35
+ end
36
+ end
metadata CHANGED
@@ -1,136 +1,80 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: iceberg
3
- version: !ruby/object:Gem::Version
4
- hash: 3
5
- prerelease:
6
- segments:
7
- - 0
8
- - 7
9
- - 0
10
- version: 0.7.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.10.1
11
5
  platform: ruby
12
- authors:
13
- - stellard
14
- autorequire:
6
+ authors:
7
+ - Andrew Kane
15
8
  bindir: bin
16
9
  cert_chain: []
17
-
18
- date: 2011-08-03 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- requirement: &id001 !ruby/object:Gem::Requirement
22
- none: false
23
- requirements:
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rb_sys
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
24
16
  - - ">="
25
- - !ruby/object:Gem::Version
26
- hash: 3
27
- segments:
28
- - 0
29
- version: "0"
30
- version_requirements: *id001
31
- type: :development
32
- name: shoulda
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
33
20
  prerelease: false
34
- - !ruby/object:Gem::Dependency
35
- requirement: &id002 !ruby/object:Gem::Requirement
36
- none: false
37
- requirements:
38
- - - ~>
39
- - !ruby/object:Gem::Version
40
- hash: 23
41
- segments:
42
- - 1
43
- - 0
44
- - 0
45
- version: 1.0.0
46
- version_requirements: *id002
47
- type: :development
48
- name: bundler
49
- prerelease: false
50
- - !ruby/object:Gem::Dependency
51
- requirement: &id003 !ruby/object:Gem::Requirement
52
- none: false
53
- requirements:
54
- - - ~>
55
- - !ruby/object:Gem::Version
56
- hash: 7
57
- segments:
58
- - 1
59
- - 5
60
- - 2
61
- version: 1.5.2
62
- version_requirements: *id003
63
- type: :development
64
- name: jeweler
65
- prerelease: false
66
- - !ruby/object:Gem::Dependency
67
- requirement: &id004 !ruby/object:Gem::Requirement
68
- none: false
69
- requirements:
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
70
23
  - - ">="
71
- - !ruby/object:Gem::Version
72
- hash: 3
73
- segments:
74
- - 0
75
- version: "0"
76
- version_requirements: *id004
77
- type: :development
78
- name: rcov
79
- prerelease: false
80
- description: just a placeholder for now until public release
81
- email: scott.ellard@gmail.com
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ email: andrew@ankane.org
82
27
  executables: []
83
-
84
- extensions: []
85
-
86
- extra_rdoc_files:
87
- - LICENSE.txt
88
- - README.rdoc
89
- files:
90
- - .document
91
- - Gemfile
92
- - Gemfile.lock
28
+ extensions:
29
+ - ext/iceberg/extconf.rb
30
+ extra_rdoc_files: []
31
+ files:
32
+ - CHANGELOG.md
33
+ - Cargo.lock
34
+ - Cargo.toml
93
35
  - LICENSE.txt
94
- - README.rdoc
95
- - Rakefile
96
- - VERSION
97
- - iceberg.gemspec
36
+ - NOTICE.txt
37
+ - README.md
38
+ - ext/iceberg/Cargo.toml
39
+ - ext/iceberg/extconf.rb
40
+ - ext/iceberg/src/arrow.rs
41
+ - ext/iceberg/src/catalog.rs
42
+ - ext/iceberg/src/error.rs
43
+ - ext/iceberg/src/lib.rs
44
+ - ext/iceberg/src/runtime.rs
45
+ - ext/iceberg/src/scan.rs
46
+ - ext/iceberg/src/table.rs
47
+ - ext/iceberg/src/utils.rs
98
48
  - lib/iceberg.rb
99
- - test/helper.rb
100
- - test/test_iceberg.rb
101
- homepage: http://github.com/stellard/iceberg
102
- licenses:
103
- - MIT
104
- post_install_message:
49
+ - lib/iceberg/catalog.rb
50
+ - lib/iceberg/glue_catalog.rb
51
+ - lib/iceberg/memory_catalog.rb
52
+ - lib/iceberg/rest_catalog.rb
53
+ - lib/iceberg/schema.rb
54
+ - lib/iceberg/sql_catalog.rb
55
+ - lib/iceberg/static_table.rb
56
+ - lib/iceberg/table.rb
57
+ - lib/iceberg/table_definition.rb
58
+ - lib/iceberg/version.rb
59
+ homepage: https://github.com/ankane/iceberg-ruby
60
+ licenses:
61
+ - Apache-2.0
62
+ metadata: {}
105
63
  rdoc_options: []
106
-
107
- require_paths:
64
+ require_paths:
108
65
  - lib
109
- required_ruby_version: !ruby/object:Gem::Requirement
110
- none: false
111
- requirements:
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
112
68
  - - ">="
113
- - !ruby/object:Gem::Version
114
- hash: 3
115
- segments:
116
- - 0
117
- version: "0"
118
- required_rubygems_version: !ruby/object:Gem::Requirement
119
- none: false
120
- requirements:
69
+ - !ruby/object:Gem::Version
70
+ version: '3.2'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
121
73
  - - ">="
122
- - !ruby/object:Gem::Version
123
- hash: 3
124
- segments:
125
- - 0
126
- version: "0"
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
127
76
  requirements: []
128
-
129
- rubyforge_project:
130
- rubygems_version: 1.8.5
131
- signing_key:
132
- specification_version: 3
133
- summary: just a placeholder for now until public release
134
- test_files:
135
- - test/helper.rb
136
- - test/test_iceberg.rb
77
+ rubygems_version: 3.6.9
78
+ specification_version: 4
79
+ summary: Apache Iceberg for Ruby
80
+ test_files: []
data/.document DELETED
@@ -1,5 +0,0 @@
1
- lib/**/*.rb
2
- bin/*
3
- -
4
- features/**/*.feature
5
- LICENSE.txt
data/Gemfile DELETED
@@ -1,13 +0,0 @@
1
- source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
5
-
6
- # Add dependencies to develop your gem here.
7
- # Include everything needed to run rake, tests, features, etc.
8
- group :development do
9
- gem "shoulda", ">= 0"
10
- gem "bundler", "~> 1.0.0"
11
- gem "jeweler", "~> 1.5.2"
12
- gem "rcov", ">= 0"
13
- end
data/Gemfile.lock DELETED
@@ -1,20 +0,0 @@
1
- GEM
2
- remote: http://rubygems.org/
3
- specs:
4
- git (1.2.5)
5
- jeweler (1.5.2)
6
- bundler (~> 1.0.0)
7
- git (>= 1.2.5)
8
- rake
9
- rake (0.9.2)
10
- rcov (0.9.9)
11
- shoulda (2.11.3)
12
-
13
- PLATFORMS
14
- ruby
15
-
16
- DEPENDENCIES
17
- bundler (~> 1.0.0)
18
- jeweler (~> 1.5.2)
19
- rcov
20
- shoulda
data/README.rdoc DELETED
@@ -1,19 +0,0 @@
1
- = iceberg
2
-
3
- Description goes here.
4
-
5
- == Contributing to iceberg
6
-
7
- * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
- * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
- * Fork the project
10
- * Start a feature/bugfix branch
11
- * Commit and push until you are happy with your contribution
12
- * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
- * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
-
15
- == Copyright
16
-
17
- Copyright (c) 2011 stellard. See LICENSE.txt for
18
- further details.
19
-