deltalake-rb 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +504 -337
- data/README.md +11 -11
- data/ext/deltalake/Cargo.toml +5 -4
- data/ext/deltalake/src/error.rs +62 -15
- data/ext/deltalake/src/features.rs +67 -0
- data/ext/deltalake/src/lib.rs +632 -61
- data/ext/deltalake/src/merge.rs +205 -0
- data/lib/deltalake/table.rb +77 -28
- data/lib/deltalake/table_alterer.rb +33 -0
- data/lib/deltalake/table_merger.rb +38 -0
- data/lib/deltalake/table_optimizer.rb +20 -4
- data/lib/deltalake/utils.rb +59 -0
- data/lib/deltalake/version.rb +1 -1
- data/lib/deltalake.rb +34 -59
- metadata +6 -2
data/lib/deltalake.rb
CHANGED
@@ -7,6 +7,7 @@ end
|
|
7
7
|
|
8
8
|
# stdlib
|
9
9
|
require "json"
|
10
|
+
require "time"
|
10
11
|
|
11
12
|
# modules
|
12
13
|
require_relative "deltalake/field"
|
@@ -14,7 +15,9 @@ require_relative "deltalake/metadata"
|
|
14
15
|
require_relative "deltalake/schema"
|
15
16
|
require_relative "deltalake/table"
|
16
17
|
require_relative "deltalake/table_alterer"
|
18
|
+
require_relative "deltalake/table_merger"
|
17
19
|
require_relative "deltalake/table_optimizer"
|
20
|
+
require_relative "deltalake/utils"
|
18
21
|
require_relative "deltalake/version"
|
19
22
|
|
20
23
|
module DeltaLake
|
@@ -38,6 +41,28 @@ module DeltaLake
|
|
38
41
|
:reader_features
|
39
42
|
)
|
40
43
|
|
44
|
+
CommitProperties =
|
45
|
+
Struct.new(
|
46
|
+
:custom_metadata,
|
47
|
+
:max_commit_retries,
|
48
|
+
# TODO
|
49
|
+
# :app_transactions,
|
50
|
+
keyword_init: true
|
51
|
+
)
|
52
|
+
|
53
|
+
PostCommitHookProperties =
|
54
|
+
Struct.new(
|
55
|
+
:create_checkpoint,
|
56
|
+
:cleanup_expired_logs,
|
57
|
+
keyword_init: true
|
58
|
+
)
|
59
|
+
|
60
|
+
class ArrowArrayStream
|
61
|
+
def arrow_c_stream
|
62
|
+
self
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
41
66
|
class << self
|
42
67
|
def write(
|
43
68
|
table_or_uri,
|
@@ -50,7 +75,10 @@ module DeltaLake
|
|
50
75
|
schema_mode: nil,
|
51
76
|
storage_options: nil,
|
52
77
|
predicate: nil,
|
53
|
-
target_file_size: nil
|
78
|
+
target_file_size: nil,
|
79
|
+
writer_properties: nil,
|
80
|
+
commit_properties: nil,
|
81
|
+
post_commithook_properties: nil
|
54
82
|
)
|
55
83
|
table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options)
|
56
84
|
|
@@ -62,7 +90,7 @@ module DeltaLake
|
|
62
90
|
return
|
63
91
|
end
|
64
92
|
|
65
|
-
data = convert_data(data)
|
93
|
+
data = Utils.convert_data(data)
|
66
94
|
|
67
95
|
write_deltalake_rust(
|
68
96
|
table_uri,
|
@@ -76,7 +104,10 @@ module DeltaLake
|
|
76
104
|
name,
|
77
105
|
description,
|
78
106
|
configuration,
|
79
|
-
storage_options
|
107
|
+
storage_options,
|
108
|
+
writer_properties,
|
109
|
+
commit_properties,
|
110
|
+
post_commithook_properties
|
80
111
|
)
|
81
112
|
|
82
113
|
if table
|
@@ -107,61 +138,5 @@ module DeltaLake
|
|
107
138
|
rescue TableNotFoundError
|
108
139
|
nil
|
109
140
|
end
|
110
|
-
|
111
|
-
def convert_data(data)
|
112
|
-
if data.respond_to?(:arrow_c_stream)
|
113
|
-
# TODO convert other object types
|
114
|
-
# should probably move logic to Rust
|
115
|
-
if defined?(Polars::DataFrame) && data.is_a?(Polars::DataFrame)
|
116
|
-
data = convert_polars_data(data)
|
117
|
-
end
|
118
|
-
|
119
|
-
data.arrow_c_stream
|
120
|
-
else
|
121
|
-
raise TypeError, "Only objects implementing the Arrow C stream interface are valid inputs for source."
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
# unsigned integers are not part of the protocol
|
126
|
-
# https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types
|
127
|
-
def convert_polars_data(data)
|
128
|
-
new_schema = {}
|
129
|
-
data.schema.each do |k, v|
|
130
|
-
new_type = convert_polars_type(v)
|
131
|
-
new_schema[k] = new_type if new_type
|
132
|
-
end
|
133
|
-
|
134
|
-
if new_schema.any?
|
135
|
-
data.cast(new_schema)
|
136
|
-
else
|
137
|
-
data
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
def convert_polars_type(t)
|
142
|
-
case t
|
143
|
-
when Polars::UInt8
|
144
|
-
Polars::Int8
|
145
|
-
when Polars::UInt16
|
146
|
-
Polars::Int16
|
147
|
-
when Polars::UInt32
|
148
|
-
Polars::Int32
|
149
|
-
when Polars::UInt64
|
150
|
-
Polars::Int64
|
151
|
-
when Polars::Datetime
|
152
|
-
Polars::Datetime.new("us", t.time_zone) if t.time_unit != "us"
|
153
|
-
when Polars::List
|
154
|
-
inner = convert_polars_type(t.inner)
|
155
|
-
Polars::List.new(inner) if inner
|
156
|
-
when Polars::Array
|
157
|
-
inner = convert_polars_type(t.inner)
|
158
|
-
Polars::Array.new(t.inner, t.width) if inner
|
159
|
-
when Polars::Struct
|
160
|
-
if t.fields.any? { |f| convert_polars_type(f.dtype) }
|
161
|
-
fields = t.fields.map { |f| Polars::Field.new(f.name, convert_polars_type(f.dtype) || f.dtype) }
|
162
|
-
Polars::Struct.new(fields)
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|
166
141
|
end
|
167
142
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deltalake-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -39,7 +39,9 @@ files:
|
|
39
39
|
- ext/deltalake/Cargo.toml
|
40
40
|
- ext/deltalake/extconf.rb
|
41
41
|
- ext/deltalake/src/error.rs
|
42
|
+
- ext/deltalake/src/features.rs
|
42
43
|
- ext/deltalake/src/lib.rs
|
44
|
+
- ext/deltalake/src/merge.rs
|
43
45
|
- ext/deltalake/src/schema.rs
|
44
46
|
- ext/deltalake/src/utils.rs
|
45
47
|
- lib/deltalake-rb.rb
|
@@ -49,7 +51,9 @@ files:
|
|
49
51
|
- lib/deltalake/schema.rb
|
50
52
|
- lib/deltalake/table.rb
|
51
53
|
- lib/deltalake/table_alterer.rb
|
54
|
+
- lib/deltalake/table_merger.rb
|
52
55
|
- lib/deltalake/table_optimizer.rb
|
56
|
+
- lib/deltalake/utils.rb
|
53
57
|
- lib/deltalake/version.rb
|
54
58
|
homepage: https://github.com/ankane/delta-ruby
|
55
59
|
licenses:
|