deltalake-rb 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/deltalake.rb CHANGED
@@ -7,6 +7,7 @@ end
7
7
 
8
8
  # stdlib
9
9
  require "json"
10
+ require "time"
10
11
 
11
12
  # modules
12
13
  require_relative "deltalake/field"
@@ -14,7 +15,9 @@ require_relative "deltalake/metadata"
14
15
  require_relative "deltalake/schema"
15
16
  require_relative "deltalake/table"
16
17
  require_relative "deltalake/table_alterer"
18
+ require_relative "deltalake/table_merger"
17
19
  require_relative "deltalake/table_optimizer"
20
+ require_relative "deltalake/utils"
18
21
  require_relative "deltalake/version"
19
22
 
20
23
  module DeltaLake
@@ -38,6 +41,28 @@ module DeltaLake
38
41
  :reader_features
39
42
  )
40
43
 
44
+ CommitProperties =
45
+ Struct.new(
46
+ :custom_metadata,
47
+ :max_commit_retries,
48
+ # TODO
49
+ # :app_transactions,
50
+ keyword_init: true
51
+ )
52
+
53
+ PostCommitHookProperties =
54
+ Struct.new(
55
+ :create_checkpoint,
56
+ :cleanup_expired_logs,
57
+ keyword_init: true
58
+ )
59
+
60
+ class ArrowArrayStream
61
+ def arrow_c_stream
62
+ self
63
+ end
64
+ end
65
+
41
66
  class << self
42
67
  def write(
43
68
  table_or_uri,
@@ -50,7 +75,10 @@ module DeltaLake
50
75
  schema_mode: nil,
51
76
  storage_options: nil,
52
77
  predicate: nil,
53
- target_file_size: nil
78
+ target_file_size: nil,
79
+ writer_properties: nil,
80
+ commit_properties: nil,
81
+ post_commithook_properties: nil
54
82
  )
55
83
  table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options)
56
84
 
@@ -62,7 +90,7 @@ module DeltaLake
62
90
  return
63
91
  end
64
92
 
65
- data = convert_data(data)
93
+ data = Utils.convert_data(data)
66
94
 
67
95
  write_deltalake_rust(
68
96
  table_uri,
@@ -76,7 +104,10 @@ module DeltaLake
76
104
  name,
77
105
  description,
78
106
  configuration,
79
- storage_options
107
+ storage_options,
108
+ writer_properties,
109
+ commit_properties,
110
+ post_commithook_properties
80
111
  )
81
112
 
82
113
  if table
@@ -107,61 +138,5 @@ module DeltaLake
107
138
  rescue TableNotFoundError
108
139
  nil
109
140
  end
110
-
111
- def convert_data(data)
112
- if data.respond_to?(:arrow_c_stream)
113
- # TODO convert other object types
114
- # should probably move logic to Rust
115
- if defined?(Polars::DataFrame) && data.is_a?(Polars::DataFrame)
116
- data = convert_polars_data(data)
117
- end
118
-
119
- data.arrow_c_stream
120
- else
121
- raise TypeError, "Only objects implementing the Arrow C stream interface are valid inputs for source."
122
- end
123
- end
124
-
125
- # unsigned integers are not part of the protocol
126
- # https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types
127
- def convert_polars_data(data)
128
- new_schema = {}
129
- data.schema.each do |k, v|
130
- new_type = convert_polars_type(v)
131
- new_schema[k] = new_type if new_type
132
- end
133
-
134
- if new_schema.any?
135
- data.cast(new_schema)
136
- else
137
- data
138
- end
139
- end
140
-
141
- def convert_polars_type(t)
142
- case t
143
- when Polars::UInt8
144
- Polars::Int8
145
- when Polars::UInt16
146
- Polars::Int16
147
- when Polars::UInt32
148
- Polars::Int32
149
- when Polars::UInt64
150
- Polars::Int64
151
- when Polars::Datetime
152
- Polars::Datetime.new("us", t.time_zone) if t.time_unit != "us"
153
- when Polars::List
154
- inner = convert_polars_type(t.inner)
155
- Polars::List.new(inner) if inner
156
- when Polars::Array
157
- inner = convert_polars_type(t.inner)
158
- Polars::Array.new(t.inner, t.width) if inner
159
- when Polars::Struct
160
- if t.fields.any? { |f| convert_polars_type(f.dtype) }
161
- fields = t.fields.map { |f| Polars::Field.new(f.name, convert_polars_type(f.dtype) || f.dtype) }
162
- Polars::Struct.new(fields)
163
- end
164
- end
165
- end
166
141
  end
167
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deltalake-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-11-23 00:00:00.000000000 Z
11
+ date: 2024-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -39,7 +39,9 @@ files:
39
39
  - ext/deltalake/Cargo.toml
40
40
  - ext/deltalake/extconf.rb
41
41
  - ext/deltalake/src/error.rs
42
+ - ext/deltalake/src/features.rs
42
43
  - ext/deltalake/src/lib.rs
44
+ - ext/deltalake/src/merge.rs
43
45
  - ext/deltalake/src/schema.rs
44
46
  - ext/deltalake/src/utils.rs
45
47
  - lib/deltalake-rb.rb
@@ -49,7 +51,9 @@ files:
49
51
  - lib/deltalake/schema.rb
50
52
  - lib/deltalake/table.rb
51
53
  - lib/deltalake/table_alterer.rb
54
+ - lib/deltalake/table_merger.rb
52
55
  - lib/deltalake/table_optimizer.rb
56
+ - lib/deltalake/utils.rb
53
57
  - lib/deltalake/version.rb
54
58
  homepage: https://github.com/ankane/delta-ruby
55
59
  licenses: