deltalake-rb 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +504 -337
- data/README.md +11 -11
- data/ext/deltalake/Cargo.toml +5 -4
- data/ext/deltalake/src/error.rs +62 -15
- data/ext/deltalake/src/features.rs +67 -0
- data/ext/deltalake/src/lib.rs +632 -61
- data/ext/deltalake/src/merge.rs +205 -0
- data/lib/deltalake/table.rb +77 -28
- data/lib/deltalake/table_alterer.rb +33 -0
- data/lib/deltalake/table_merger.rb +38 -0
- data/lib/deltalake/table_optimizer.rb +20 -4
- data/lib/deltalake/utils.rb +59 -0
- data/lib/deltalake/version.rb +1 -1
- data/lib/deltalake.rb +34 -59
- metadata +6 -2
data/lib/deltalake.rb
CHANGED
@@ -7,6 +7,7 @@ end
|
|
7
7
|
|
8
8
|
# stdlib
|
9
9
|
require "json"
|
10
|
+
require "time"
|
10
11
|
|
11
12
|
# modules
|
12
13
|
require_relative "deltalake/field"
|
@@ -14,7 +15,9 @@ require_relative "deltalake/metadata"
|
|
14
15
|
require_relative "deltalake/schema"
|
15
16
|
require_relative "deltalake/table"
|
16
17
|
require_relative "deltalake/table_alterer"
|
18
|
+
require_relative "deltalake/table_merger"
|
17
19
|
require_relative "deltalake/table_optimizer"
|
20
|
+
require_relative "deltalake/utils"
|
18
21
|
require_relative "deltalake/version"
|
19
22
|
|
20
23
|
module DeltaLake
|
@@ -38,6 +41,28 @@ module DeltaLake
|
|
38
41
|
:reader_features
|
39
42
|
)
|
40
43
|
|
44
|
+
CommitProperties =
|
45
|
+
Struct.new(
|
46
|
+
:custom_metadata,
|
47
|
+
:max_commit_retries,
|
48
|
+
# TODO
|
49
|
+
# :app_transactions,
|
50
|
+
keyword_init: true
|
51
|
+
)
|
52
|
+
|
53
|
+
PostCommitHookProperties =
|
54
|
+
Struct.new(
|
55
|
+
:create_checkpoint,
|
56
|
+
:cleanup_expired_logs,
|
57
|
+
keyword_init: true
|
58
|
+
)
|
59
|
+
|
60
|
+
class ArrowArrayStream
|
61
|
+
def arrow_c_stream
|
62
|
+
self
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
41
66
|
class << self
|
42
67
|
def write(
|
43
68
|
table_or_uri,
|
@@ -50,7 +75,10 @@ module DeltaLake
|
|
50
75
|
schema_mode: nil,
|
51
76
|
storage_options: nil,
|
52
77
|
predicate: nil,
|
53
|
-
target_file_size: nil
|
78
|
+
target_file_size: nil,
|
79
|
+
writer_properties: nil,
|
80
|
+
commit_properties: nil,
|
81
|
+
post_commithook_properties: nil
|
54
82
|
)
|
55
83
|
table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options)
|
56
84
|
|
@@ -62,7 +90,7 @@ module DeltaLake
|
|
62
90
|
return
|
63
91
|
end
|
64
92
|
|
65
|
-
data = convert_data(data)
|
93
|
+
data = Utils.convert_data(data)
|
66
94
|
|
67
95
|
write_deltalake_rust(
|
68
96
|
table_uri,
|
@@ -76,7 +104,10 @@ module DeltaLake
|
|
76
104
|
name,
|
77
105
|
description,
|
78
106
|
configuration,
|
79
|
-
storage_options
|
107
|
+
storage_options,
|
108
|
+
writer_properties,
|
109
|
+
commit_properties,
|
110
|
+
post_commithook_properties
|
80
111
|
)
|
81
112
|
|
82
113
|
if table
|
@@ -107,61 +138,5 @@ module DeltaLake
|
|
107
138
|
rescue TableNotFoundError
|
108
139
|
nil
|
109
140
|
end
|
110
|
-
|
111
|
-
def convert_data(data)
|
112
|
-
if data.respond_to?(:arrow_c_stream)
|
113
|
-
# TODO convert other object types
|
114
|
-
# should probably move logic to Rust
|
115
|
-
if defined?(Polars::DataFrame) && data.is_a?(Polars::DataFrame)
|
116
|
-
data = convert_polars_data(data)
|
117
|
-
end
|
118
|
-
|
119
|
-
data.arrow_c_stream
|
120
|
-
else
|
121
|
-
raise TypeError, "Only objects implementing the Arrow C stream interface are valid inputs for source."
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
# unsigned integers are not part of the protocol
|
126
|
-
# https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types
|
127
|
-
def convert_polars_data(data)
|
128
|
-
new_schema = {}
|
129
|
-
data.schema.each do |k, v|
|
130
|
-
new_type = convert_polars_type(v)
|
131
|
-
new_schema[k] = new_type if new_type
|
132
|
-
end
|
133
|
-
|
134
|
-
if new_schema.any?
|
135
|
-
data.cast(new_schema)
|
136
|
-
else
|
137
|
-
data
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
def convert_polars_type(t)
|
142
|
-
case t
|
143
|
-
when Polars::UInt8
|
144
|
-
Polars::Int8
|
145
|
-
when Polars::UInt16
|
146
|
-
Polars::Int16
|
147
|
-
when Polars::UInt32
|
148
|
-
Polars::Int32
|
149
|
-
when Polars::UInt64
|
150
|
-
Polars::Int64
|
151
|
-
when Polars::Datetime
|
152
|
-
Polars::Datetime.new("us", t.time_zone) if t.time_unit != "us"
|
153
|
-
when Polars::List
|
154
|
-
inner = convert_polars_type(t.inner)
|
155
|
-
Polars::List.new(inner) if inner
|
156
|
-
when Polars::Array
|
157
|
-
inner = convert_polars_type(t.inner)
|
158
|
-
Polars::Array.new(t.inner, t.width) if inner
|
159
|
-
when Polars::Struct
|
160
|
-
if t.fields.any? { |f| convert_polars_type(f.dtype) }
|
161
|
-
fields = t.fields.map { |f| Polars::Field.new(f.name, convert_polars_type(f.dtype) || f.dtype) }
|
162
|
-
Polars::Struct.new(fields)
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|
166
141
|
end
|
167
142
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deltalake-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -39,7 +39,9 @@ files:
|
|
39
39
|
- ext/deltalake/Cargo.toml
|
40
40
|
- ext/deltalake/extconf.rb
|
41
41
|
- ext/deltalake/src/error.rs
|
42
|
+
- ext/deltalake/src/features.rs
|
42
43
|
- ext/deltalake/src/lib.rs
|
44
|
+
- ext/deltalake/src/merge.rs
|
43
45
|
- ext/deltalake/src/schema.rs
|
44
46
|
- ext/deltalake/src/utils.rs
|
45
47
|
- lib/deltalake-rb.rb
|
@@ -49,7 +51,9 @@ files:
|
|
49
51
|
- lib/deltalake/schema.rb
|
50
52
|
- lib/deltalake/table.rb
|
51
53
|
- lib/deltalake/table_alterer.rb
|
54
|
+
- lib/deltalake/table_merger.rb
|
52
55
|
- lib/deltalake/table_optimizer.rb
|
56
|
+
- lib/deltalake/utils.rb
|
53
57
|
- lib/deltalake/version.rb
|
54
58
|
homepage: https://github.com/ankane/delta-ruby
|
55
59
|
licenses:
|