deltalake-rb 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/deltalake.rb CHANGED
@@ -7,6 +7,7 @@ end
7
7
 
8
8
  # stdlib
9
9
  require "json"
10
+ require "time"
10
11
 
11
12
  # modules
12
13
  require_relative "deltalake/field"
@@ -14,7 +15,9 @@ require_relative "deltalake/metadata"
14
15
  require_relative "deltalake/schema"
15
16
  require_relative "deltalake/table"
16
17
  require_relative "deltalake/table_alterer"
18
+ require_relative "deltalake/table_merger"
17
19
  require_relative "deltalake/table_optimizer"
20
+ require_relative "deltalake/utils"
18
21
  require_relative "deltalake/version"
19
22
 
20
23
  module DeltaLake
@@ -38,6 +41,28 @@ module DeltaLake
38
41
  :reader_features
39
42
  )
40
43
 
44
+ CommitProperties =
45
+ Struct.new(
46
+ :custom_metadata,
47
+ :max_commit_retries,
48
+ # TODO
49
+ # :app_transactions,
50
+ keyword_init: true
51
+ )
52
+
53
+ PostCommitHookProperties =
54
+ Struct.new(
55
+ :create_checkpoint,
56
+ :cleanup_expired_logs,
57
+ keyword_init: true
58
+ )
59
+
60
+ class ArrowArrayStream
61
+ def arrow_c_stream
62
+ self
63
+ end
64
+ end
65
+
41
66
  class << self
42
67
  def write(
43
68
  table_or_uri,
@@ -50,7 +75,10 @@ module DeltaLake
50
75
  schema_mode: nil,
51
76
  storage_options: nil,
52
77
  predicate: nil,
53
- target_file_size: nil
78
+ target_file_size: nil,
79
+ writer_properties: nil,
80
+ commit_properties: nil,
81
+ post_commithook_properties: nil
54
82
  )
55
83
  table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options)
56
84
 
@@ -62,7 +90,7 @@ module DeltaLake
62
90
  return
63
91
  end
64
92
 
65
- data = convert_data(data)
93
+ data = Utils.convert_data(data)
66
94
 
67
95
  write_deltalake_rust(
68
96
  table_uri,
@@ -76,7 +104,10 @@ module DeltaLake
76
104
  name,
77
105
  description,
78
106
  configuration,
79
- storage_options
107
+ storage_options,
108
+ writer_properties,
109
+ commit_properties,
110
+ post_commithook_properties
80
111
  )
81
112
 
82
113
  if table
@@ -107,61 +138,5 @@ module DeltaLake
107
138
  rescue TableNotFoundError
108
139
  nil
109
140
  end
110
-
111
- def convert_data(data)
112
- if data.respond_to?(:arrow_c_stream)
113
- # TODO convert other object types
114
- # should probably move logic to Rust
115
- if defined?(Polars::DataFrame) && data.is_a?(Polars::DataFrame)
116
- data = convert_polars_data(data)
117
- end
118
-
119
- data.arrow_c_stream
120
- else
121
- raise TypeError, "Only objects implementing the Arrow C stream interface are valid inputs for source."
122
- end
123
- end
124
-
125
- # unsigned integers are not part of the protocol
126
- # https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types
127
- def convert_polars_data(data)
128
- new_schema = {}
129
- data.schema.each do |k, v|
130
- new_type = convert_polars_type(v)
131
- new_schema[k] = new_type if new_type
132
- end
133
-
134
- if new_schema.any?
135
- data.cast(new_schema)
136
- else
137
- data
138
- end
139
- end
140
-
141
- def convert_polars_type(t)
142
- case t
143
- when Polars::UInt8
144
- Polars::Int8
145
- when Polars::UInt16
146
- Polars::Int16
147
- when Polars::UInt32
148
- Polars::Int32
149
- when Polars::UInt64
150
- Polars::Int64
151
- when Polars::Datetime
152
- Polars::Datetime.new("us", t.time_zone) if t.time_unit != "us"
153
- when Polars::List
154
- inner = convert_polars_type(t.inner)
155
- Polars::List.new(inner) if inner
156
- when Polars::Array
157
- inner = convert_polars_type(t.inner)
158
- Polars::Array.new(t.inner, t.width) if inner
159
- when Polars::Struct
160
- if t.fields.any? { |f| convert_polars_type(f.dtype) }
161
- fields = t.fields.map { |f| Polars::Field.new(f.name, convert_polars_type(f.dtype) || f.dtype) }
162
- Polars::Struct.new(fields)
163
- end
164
- end
165
- end
166
141
  end
167
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deltalake-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-11-23 00:00:00.000000000 Z
11
+ date: 2024-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -39,7 +39,9 @@ files:
39
39
  - ext/deltalake/Cargo.toml
40
40
  - ext/deltalake/extconf.rb
41
41
  - ext/deltalake/src/error.rs
42
+ - ext/deltalake/src/features.rs
42
43
  - ext/deltalake/src/lib.rs
44
+ - ext/deltalake/src/merge.rs
43
45
  - ext/deltalake/src/schema.rs
44
46
  - ext/deltalake/src/utils.rs
45
47
  - lib/deltalake-rb.rb
@@ -49,7 +51,9 @@ files:
49
51
  - lib/deltalake/schema.rb
50
52
  - lib/deltalake/table.rb
51
53
  - lib/deltalake/table_alterer.rb
54
+ - lib/deltalake/table_merger.rb
52
55
  - lib/deltalake/table_optimizer.rb
56
+ - lib/deltalake/utils.rb
53
57
  - lib/deltalake/version.rb
54
58
  homepage: https://github.com/ankane/delta-ruby
55
59
  licenses: