polars-df 0.4.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/Cargo.lock +447 -410
  4. data/Cargo.toml +0 -1
  5. data/README.md +6 -5
  6. data/ext/polars/Cargo.toml +10 -5
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +8 -3
  10. data/ext/polars/src/batched_csv.rs +7 -5
  11. data/ext/polars/src/conversion.rs +269 -59
  12. data/ext/polars/src/dataframe.rs +38 -40
  13. data/ext/polars/src/error.rs +6 -2
  14. data/ext/polars/src/expr/array.rs +15 -0
  15. data/ext/polars/src/expr/binary.rs +69 -0
  16. data/ext/polars/src/expr/categorical.rs +10 -0
  17. data/ext/polars/src/expr/datetime.rs +223 -0
  18. data/ext/polars/src/expr/general.rs +963 -0
  19. data/ext/polars/src/expr/list.rs +151 -0
  20. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  21. data/ext/polars/src/expr/string.rs +314 -0
  22. data/ext/polars/src/expr/struct.rs +15 -0
  23. data/ext/polars/src/expr.rs +34 -0
  24. data/ext/polars/src/functions/eager.rs +93 -0
  25. data/ext/polars/src/functions/io.rs +34 -0
  26. data/ext/polars/src/functions/lazy.rs +249 -0
  27. data/ext/polars/src/functions/meta.rs +8 -0
  28. data/ext/polars/src/functions/mod.rs +5 -0
  29. data/ext/polars/src/functions/whenthen.rs +43 -0
  30. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +26 -35
  31. data/ext/polars/src/lazygroupby.rs +29 -0
  32. data/ext/polars/src/lib.rs +223 -316
  33. data/ext/polars/src/object.rs +1 -1
  34. data/ext/polars/src/rb_modules.rs +12 -0
  35. data/ext/polars/src/series/aggregation.rs +83 -0
  36. data/ext/polars/src/series/arithmetic.rs +88 -0
  37. data/ext/polars/src/series/comparison.rs +251 -0
  38. data/ext/polars/src/series/construction.rs +190 -0
  39. data/ext/polars/src/series.rs +151 -551
  40. data/lib/polars/array_expr.rb +84 -0
  41. data/lib/polars/array_name_space.rb +77 -0
  42. data/lib/polars/batched_csv_reader.rb +1 -1
  43. data/lib/polars/convert.rb +2 -2
  44. data/lib/polars/data_frame.rb +289 -96
  45. data/lib/polars/data_types.rb +169 -33
  46. data/lib/polars/date_time_expr.rb +142 -2
  47. data/lib/polars/date_time_name_space.rb +17 -3
  48. data/lib/polars/expr.rb +145 -78
  49. data/lib/polars/functions.rb +0 -1
  50. data/lib/polars/group_by.rb +1 -22
  51. data/lib/polars/lazy_frame.rb +84 -31
  52. data/lib/polars/lazy_functions.rb +71 -32
  53. data/lib/polars/list_expr.rb +94 -45
  54. data/lib/polars/list_name_space.rb +13 -13
  55. data/lib/polars/rolling_group_by.rb +4 -2
  56. data/lib/polars/series.rb +249 -87
  57. data/lib/polars/string_expr.rb +277 -45
  58. data/lib/polars/string_name_space.rb +137 -22
  59. data/lib/polars/struct_name_space.rb +32 -0
  60. data/lib/polars/utils.rb +138 -54
  61. data/lib/polars/version.rb +1 -1
  62. data/lib/polars.rb +5 -2
  63. metadata +29 -11
  64. data/ext/polars/src/lazy/dsl.rs +0 -1775
  65. data/ext/polars/src/lazy/mod.rs +0 -5
  66. data/ext/polars/src/lazy/utils.rs +0 -13
  67. data/ext/polars/src/list_construction.rs +0 -100
  68. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  69. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/lib/polars/utils.rb CHANGED
@@ -23,52 +23,98 @@ module Polars
23
23
  Polars.col(name)
24
24
  end
25
25
 
26
+ def self.arrlen(obj)
27
+ if obj.is_a?(Range)
28
+ # size only works for numeric ranges
29
+ obj.to_a.length
30
+ elsif obj.is_a?(String)
31
+ nil
32
+ else
33
+ obj.length
34
+ end
35
+ rescue
36
+ nil
37
+ end
38
+
26
39
  def self._timedelta_to_pl_duration(td)
27
40
  td
28
41
  end
29
42
 
30
- def self._datetime_to_pl_timestamp(dt, tu)
31
- if tu == "ns"
32
- (dt.to_datetime.utc.to_f * 1e9).to_i
33
- elsif tu == "us"
34
- (dt.to_datetime.utc.to_f * 1e6).to_i
35
- elsif tu == "ms"
36
- (dt.to_datetime.utc.to_f * 1e3).to_i
37
- elsif tu.nil?
38
- (dt.to_datetime.utc.to_f * 1e6).to_i
43
+ def self._datetime_to_pl_timestamp(dt, time_unit)
44
+ dt = dt.to_datetime.to_time
45
+ if time_unit == "ns"
46
+ nanos = dt.nsec
47
+ dt.to_i * 1_000_000_000 + nanos
48
+ elsif time_unit == "us"
49
+ micros = dt.usec
50
+ dt.to_i * 1_000_000 + micros
51
+ elsif time_unit == "ms"
52
+ millis = dt.usec / 1000
53
+ dt.to_i * 1_000 + millis
54
+ elsif time_unit.nil?
55
+ # Ruby has ns precision
56
+ nanos = dt.nsec
57
+ dt.to_i * 1_000_000_000 + nanos
58
+ else
59
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
60
+ end
61
+ end
62
+
63
+ def self._date_to_pl_date(d)
64
+ dt = d.to_datetime.to_time
65
+ dt.to_i / (3600 * 24)
66
+ end
67
+
68
+ def self._to_ruby_time(value)
69
+ if value == 0
70
+ ::Time.utc(2000, 1, 1)
39
71
  else
40
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
41
- end
42
- end
43
-
44
- def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
45
- if dtype == :date || dtype == Date
46
- # days to seconds
47
- # important to create from utc. Not doing this leads
48
- # to inconsistencies dependent on the timezone you are in.
49
- ::Time.at(value * 86400).utc.to_date
50
- # TODO fix dtype
51
- elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
52
- if tz.nil? || tz == ""
53
- if tu == "ns"
54
- raise Todo
55
- elsif tu == "us"
56
- dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
57
- elsif tu == "ms"
58
- raise Todo
59
- else
60
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
61
- end
72
+ seconds, nanoseconds = value.divmod(1_000_000_000)
73
+ minutes, seconds = seconds.divmod(60)
74
+ hours, minutes = minutes.divmod(60)
75
+ ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
76
+ end
77
+ end
78
+
79
+ def self._to_ruby_duration(value, time_unit = "ns")
80
+ if time_unit == "ns"
81
+ value / 1e9
82
+ elsif time_unit == "us"
83
+ value / 1e6
84
+ elsif time_unit == "ms"
85
+ value / 1e3
86
+ else
87
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
88
+ end
89
+ end
90
+
91
+ def self._to_ruby_date(value)
92
+ # days to seconds
93
+ # important to create from utc. Not doing this leads
94
+ # to inconsistencies dependent on the timezone you are in.
95
+ ::Time.at(value * 86400).utc.to_date
96
+ end
97
+
98
+ def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
99
+ if time_zone.nil? || time_zone == ""
100
+ if time_unit == "ns"
101
+ return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
102
+ elsif time_unit == "us"
103
+ return ::Time.at(value / 1000000, value % 1000000, :usec).utc
104
+ elsif time_unit == "ms"
105
+ return ::Time.at(value / 1000, value % 1000, :millisecond).utc
62
106
  else
63
- raise Todo
107
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
64
108
  end
65
-
66
- dt
67
109
  else
68
- raise NotImplementedError
110
+ raise Todo
69
111
  end
70
112
  end
71
113
 
114
+ def self._to_ruby_decimal(digits, scale)
115
+ BigDecimal("#{digits}e#{scale}")
116
+ end
117
+
72
118
  def self.selection_to_rbexpr_list(exprs)
73
119
  if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
120
  exprs = [exprs]
@@ -102,19 +148,34 @@ module Polars
102
148
  end
103
149
 
104
150
  # TODO fix
105
- def self.is_polars_dtype(data_type)
151
+ def self.is_polars_dtype(data_type, include_unknown: false)
152
+ if data_type == Unknown
153
+ return include_unknown
154
+ end
106
155
  data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
107
156
  end
108
157
 
109
- RB_TYPE_TO_DTYPE = {
110
- Float => :f64,
111
- Integer => :i64,
112
- String => :str,
113
- TrueClass => :bool,
114
- FalseClass => :bool,
115
- ::Date => :date,
116
- ::DateTime => :datetime
117
- }
158
+ def self.map_rb_type_to_dtype(ruby_dtype)
159
+ if ruby_dtype == Float
160
+ Float64
161
+ elsif ruby_dtype == Integer
162
+ Int64
163
+ elsif ruby_dtype == String
164
+ Utf8
165
+ elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
166
+ Boolean
167
+ elsif ruby_dtype == DateTime || ruby_dtype == ::Time || (defined?(ActiveSupport::TimeWithZone) && ruby_dtype == ActiveSupport::TimeWithZone)
168
+ Datetime.new("ns")
169
+ elsif ruby_dtype == ::Date
170
+ Date
171
+ elsif ruby_dtype == ::Array
172
+ List
173
+ elsif ruby_dtype == NilClass
174
+ Null
175
+ else
176
+ raise TypeError, "Invalid type"
177
+ end
178
+ end
118
179
 
119
180
  # TODO fix
120
181
  def self.rb_type_to_dtype(data_type)
@@ -124,8 +185,8 @@ module Polars
124
185
  end
125
186
 
126
187
  begin
127
- RB_TYPE_TO_DTYPE.fetch(data_type).to_s
128
- rescue KeyError
188
+ map_rb_type_to_dtype(data_type)
189
+ rescue TypeError
129
190
  raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
130
191
  end
131
192
  end
@@ -178,7 +239,7 @@ module Polars
178
239
  end
179
240
 
180
241
  def self.bool?(value)
181
- value == true || value == false
242
+ value.is_a?(TrueClass) || value.is_a?(FalseClass)
182
243
  end
183
244
 
184
245
  def self.strlike?(value)
@@ -194,35 +255,58 @@ module Polars
194
255
  end
195
256
 
196
257
  def self.is_bool_sequence(val)
197
- val.is_a?(Array) && val.all? { |x| x == true || x == false }
258
+ val.is_a?(::Array) && val.all? { |x| x == true || x == false }
198
259
  end
199
260
 
200
261
  def self.is_dtype_sequence(val)
201
- val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
262
+ val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
202
263
  end
203
264
 
204
265
  def self.is_int_sequence(val)
205
- val.is_a?(Array) && _is_iterable_of(val, Integer)
266
+ val.is_a?(::Array) && _is_iterable_of(val, Integer)
206
267
  end
207
268
 
208
269
  def self.is_expr_sequence(val)
209
- val.is_a?(Array) && _is_iterable_of(val, Expr)
270
+ val.is_a?(::Array) && _is_iterable_of(val, Expr)
210
271
  end
211
272
 
212
273
  def self.is_rbexpr_sequence(val)
213
- val.is_a?(Array) && _is_iterable_of(val, RbExpr)
274
+ val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
214
275
  end
215
276
 
216
277
  def self.is_str_sequence(val, allow_str: false)
217
278
  if allow_str == false && val.is_a?(String)
218
279
  false
219
280
  else
220
- val.is_a?(Array) && _is_iterable_of(val, String)
281
+ val.is_a?(::Array) && _is_iterable_of(val, String)
221
282
  end
222
283
  end
223
284
 
224
285
  def self.local_file?(file)
225
286
  Dir.glob(file).any?
226
287
  end
288
+
289
+ def self.parse_as_expression(input, str_as_lit: false, structify: false)
290
+ if input.is_a?(Expr)
291
+ expr = input
292
+ elsif input.is_a?(String) && !str_as_lit
293
+ expr = Polars.col(input)
294
+ structify = false
295
+ elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
296
+ expr = Polars.lit(input)
297
+ structify = false
298
+ elsif input.is_a?(Array)
299
+ expr = Polars.lit(Polars::Series.new("", [input]))
300
+ structify = false
301
+ else
302
+ raise TypeError, "did not expect value #{input} of type #{input.class.name}, maybe disambiguate with pl.lit or pl.col"
303
+ end
304
+
305
+ if structify
306
+ raise Todo
307
+ end
308
+
309
+ expr._rbexpr
310
+ end
227
311
  end
228
312
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.4.0"
3
+ VERSION = "0.6.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -1,16 +1,19 @@
1
1
  # ext
2
2
  begin
3
- require_relative "polars/#{RUBY_VERSION.to_f}/polars"
3
+ require "polars/#{RUBY_VERSION.to_f}/polars"
4
4
  rescue LoadError
5
- require_relative "polars/polars"
5
+ require "polars/polars"
6
6
  end
7
7
 
8
8
  # stdlib
9
+ require "bigdecimal"
9
10
  require "date"
10
11
  require "stringio"
11
12
 
12
13
  # modules
13
14
  require_relative "polars/expr_dispatch"
15
+ require_relative "polars/array_expr"
16
+ require_relative "polars/array_name_space"
14
17
  require_relative "polars/batched_csv_reader"
15
18
  require_relative "polars/binary_expr"
16
19
  require_relative "polars/binary_name_space"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-01 00:00:00.000000000 Z
11
+ date: 2023-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -40,30 +40,48 @@ files:
40
40
  - ext/polars/Cargo.toml
41
41
  - ext/polars/extconf.rb
42
42
  - ext/polars/src/apply/dataframe.rs
43
+ - ext/polars/src/apply/lazy.rs
43
44
  - ext/polars/src/apply/mod.rs
44
45
  - ext/polars/src/apply/series.rs
45
46
  - ext/polars/src/batched_csv.rs
46
47
  - ext/polars/src/conversion.rs
47
48
  - ext/polars/src/dataframe.rs
48
49
  - ext/polars/src/error.rs
50
+ - ext/polars/src/expr.rs
51
+ - ext/polars/src/expr/array.rs
52
+ - ext/polars/src/expr/binary.rs
53
+ - ext/polars/src/expr/categorical.rs
54
+ - ext/polars/src/expr/datetime.rs
55
+ - ext/polars/src/expr/general.rs
56
+ - ext/polars/src/expr/list.rs
57
+ - ext/polars/src/expr/meta.rs
58
+ - ext/polars/src/expr/string.rs
59
+ - ext/polars/src/expr/struct.rs
49
60
  - ext/polars/src/file.rs
50
- - ext/polars/src/lazy/apply.rs
51
- - ext/polars/src/lazy/dataframe.rs
52
- - ext/polars/src/lazy/dsl.rs
53
- - ext/polars/src/lazy/meta.rs
54
- - ext/polars/src/lazy/mod.rs
55
- - ext/polars/src/lazy/utils.rs
61
+ - ext/polars/src/functions/eager.rs
62
+ - ext/polars/src/functions/io.rs
63
+ - ext/polars/src/functions/lazy.rs
64
+ - ext/polars/src/functions/meta.rs
65
+ - ext/polars/src/functions/mod.rs
66
+ - ext/polars/src/functions/whenthen.rs
67
+ - ext/polars/src/lazyframe.rs
68
+ - ext/polars/src/lazygroupby.rs
56
69
  - ext/polars/src/lib.rs
57
- - ext/polars/src/list_construction.rs
58
- - ext/polars/src/numo.rs
59
70
  - ext/polars/src/object.rs
60
71
  - ext/polars/src/prelude.rs
61
72
  - ext/polars/src/rb_modules.rs
62
73
  - ext/polars/src/series.rs
63
- - ext/polars/src/set.rs
74
+ - ext/polars/src/series/aggregation.rs
75
+ - ext/polars/src/series/arithmetic.rs
76
+ - ext/polars/src/series/comparison.rs
77
+ - ext/polars/src/series/construction.rs
78
+ - ext/polars/src/series/export.rs
79
+ - ext/polars/src/series/set_at_idx.rs
64
80
  - ext/polars/src/utils.rs
65
81
  - lib/polars-df.rb
66
82
  - lib/polars.rb
83
+ - lib/polars/array_expr.rb
84
+ - lib/polars/array_name_space.rb
67
85
  - lib/polars/batched_csv_reader.rb
68
86
  - lib/polars/binary_expr.rb
69
87
  - lib/polars/binary_name_space.rb