polars-df 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/Cargo.lock +337 -381
  4. data/README.md +4 -3
  5. data/ext/polars/Cargo.toml +5 -4
  6. data/ext/polars/src/apply/mod.rs +7 -3
  7. data/ext/polars/src/conversion.rs +171 -63
  8. data/ext/polars/src/dataframe.rs +19 -23
  9. data/ext/polars/src/error.rs +8 -0
  10. data/ext/polars/src/expr/array.rs +15 -0
  11. data/ext/polars/src/expr/general.rs +39 -9
  12. data/ext/polars/src/expr/list.rs +27 -22
  13. data/ext/polars/src/expr/string.rs +10 -9
  14. data/ext/polars/src/expr.rs +1 -0
  15. data/ext/polars/src/functions/lazy.rs +61 -21
  16. data/ext/polars/src/lazyframe.rs +14 -2
  17. data/ext/polars/src/lib.rs +25 -20
  18. data/ext/polars/src/object.rs +1 -1
  19. data/ext/polars/src/rb_modules.rs +4 -0
  20. data/ext/polars/src/series/construction.rs +28 -2
  21. data/ext/polars/src/series.rs +57 -17
  22. data/lib/polars/array_expr.rb +84 -0
  23. data/lib/polars/array_name_space.rb +77 -0
  24. data/lib/polars/batched_csv_reader.rb +1 -1
  25. data/lib/polars/data_frame.rb +91 -49
  26. data/lib/polars/data_types.rb +163 -29
  27. data/lib/polars/date_time_name_space.rb +17 -3
  28. data/lib/polars/expr.rb +76 -69
  29. data/lib/polars/functions.rb +0 -1
  30. data/lib/polars/group_by.rb +1 -22
  31. data/lib/polars/lazy_frame.rb +82 -30
  32. data/lib/polars/lazy_functions.rb +67 -31
  33. data/lib/polars/list_expr.rb +28 -28
  34. data/lib/polars/list_name_space.rb +13 -13
  35. data/lib/polars/rolling_group_by.rb +4 -2
  36. data/lib/polars/series.rb +70 -16
  37. data/lib/polars/string_expr.rb +137 -11
  38. data/lib/polars/string_name_space.rb +137 -22
  39. data/lib/polars/utils.rb +107 -57
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +3 -0
  42. metadata +5 -2
data/lib/polars/utils.rb CHANGED
@@ -40,17 +40,23 @@ module Polars
40
40
  td
41
41
  end
42
42
 
43
- def self._datetime_to_pl_timestamp(dt, tu)
44
- if tu == "ns"
45
- (dt.to_datetime.to_time.to_f * 1e9).to_i
46
- elsif tu == "us"
47
- (dt.to_datetime.to_time.to_f * 1e6).to_i
48
- elsif tu == "ms"
49
- (dt.to_datetime.to_time.to_f * 1e3).to_i
50
- elsif tu.nil?
51
- (dt.to_datetime.to_time.to_f * 1e6).to_i
43
+ def self._datetime_to_pl_timestamp(dt, time_unit)
44
+ dt = dt.to_datetime.to_time
45
+ if time_unit == "ns"
46
+ nanos = dt.nsec
47
+ dt.to_i * 1_000_000_000 + nanos
48
+ elsif time_unit == "us"
49
+ micros = dt.usec
50
+ dt.to_i * 1_000_000 + micros
51
+ elsif time_unit == "ms"
52
+ millis = dt.usec / 1000
53
+ dt.to_i * 1_000 + millis
54
+ elsif time_unit.nil?
55
+ # Ruby has ns precision
56
+ nanos = dt.nsec
57
+ dt.to_i * 1_000_000_000 + nanos
52
58
  else
53
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
59
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
54
60
  end
55
61
  end
56
62
 
@@ -59,46 +65,56 @@ module Polars
59
65
  dt.to_i / (3600 * 24)
60
66
  end
61
67
 
62
- def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
63
- if dtype == :date || dtype == Date
64
- # days to seconds
65
- # important to create from utc. Not doing this leads
66
- # to inconsistencies dependent on the timezone you are in.
67
- ::Time.at(value * 86400).utc.to_date
68
- # TODO fix dtype
69
- elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
70
- if tz.nil? || tz == ""
71
- if tu == "ns"
72
- raise Todo
73
- elsif tu == "us"
74
- dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
75
- elsif tu == "ms"
76
- raise Todo
77
- else
78
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
79
- end
80
- else
81
- raise Todo
82
- end
83
-
84
- dt
68
+ def self._to_ruby_time(value)
69
+ if value == 0
70
+ ::Time.utc(2000, 1, 1)
85
71
  else
86
- raise NotImplementedError
72
+ seconds, nanoseconds = value.divmod(1_000_000_000)
73
+ minutes, seconds = seconds.divmod(60)
74
+ hours, minutes = minutes.divmod(60)
75
+ ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
87
76
  end
88
77
  end
89
78
 
90
- def self._to_ruby_duration(value, tu = "ns")
91
- if tu == "ns"
79
+ def self._to_ruby_duration(value, time_unit = "ns")
80
+ if time_unit == "ns"
92
81
  value / 1e9
93
- elsif tu == "us"
82
+ elsif time_unit == "us"
94
83
  value / 1e6
95
- elsif tu == "ms"
84
+ elsif time_unit == "ms"
96
85
  value / 1e3
97
86
  else
98
- raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
87
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
99
88
  end
100
89
  end
101
90
 
91
+ def self._to_ruby_date(value)
92
+ # days to seconds
93
+ # important to create from utc. Not doing this leads
94
+ # to inconsistencies dependent on the timezone you are in.
95
+ ::Time.at(value * 86400).utc.to_date
96
+ end
97
+
98
+ def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
99
+ if time_zone.nil? || time_zone == ""
100
+ if time_unit == "ns"
101
+ return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
102
+ elsif time_unit == "us"
103
+ return ::Time.at(value / 1000000, value % 1000000, :usec).utc
104
+ elsif time_unit == "ms"
105
+ return ::Time.at(value / 1000, value % 1000, :millisecond).utc
106
+ else
107
+ raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
108
+ end
109
+ else
110
+ raise Todo
111
+ end
112
+ end
113
+
114
+ def self._to_ruby_decimal(digits, scale)
115
+ BigDecimal("#{digits}e#{scale}")
116
+ end
117
+
102
118
  def self.selection_to_rbexpr_list(exprs)
103
119
  if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
104
120
  exprs = [exprs]
@@ -139,16 +155,27 @@ module Polars
139
155
  data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
140
156
  end
141
157
 
142
- RB_TYPE_TO_DTYPE = {
143
- Float => :f64,
144
- Integer => :i64,
145
- String => :str,
146
- TrueClass => :bool,
147
- FalseClass => :bool,
148
- ::Date => :date,
149
- ::DateTime => :datetime,
150
- ::Time => :datetime
151
- }
158
+ def self.map_rb_type_to_dtype(ruby_dtype)
159
+ if ruby_dtype == Float
160
+ Float64
161
+ elsif ruby_dtype == Integer
162
+ Int64
163
+ elsif ruby_dtype == String
164
+ Utf8
165
+ elsif ruby_dtype == TrueClass || ruby_dtype == FalseClass
166
+ Boolean
167
+ elsif ruby_dtype == DateTime || ruby_dtype == ::Time || (defined?(ActiveSupport::TimeWithZone) && ruby_dtype == ActiveSupport::TimeWithZone)
168
+ Datetime.new("ns")
169
+ elsif ruby_dtype == ::Date
170
+ Date
171
+ elsif ruby_dtype == ::Array
172
+ List
173
+ elsif ruby_dtype == NilClass
174
+ Null
175
+ else
176
+ raise TypeError, "Invalid type"
177
+ end
178
+ end
152
179
 
153
180
  # TODO fix
154
181
  def self.rb_type_to_dtype(data_type)
@@ -158,8 +185,8 @@ module Polars
158
185
  end
159
186
 
160
187
  begin
161
- RB_TYPE_TO_DTYPE.fetch(data_type).to_s
162
- rescue KeyError
188
+ map_rb_type_to_dtype(data_type)
189
+ rescue TypeError
163
190
  raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented."
164
191
  end
165
192
  end
@@ -228,35 +255,58 @@ module Polars
228
255
  end
229
256
 
230
257
  def self.is_bool_sequence(val)
231
- val.is_a?(Array) && val.all? { |x| x == true || x == false }
258
+ val.is_a?(::Array) && val.all? { |x| x == true || x == false }
232
259
  end
233
260
 
234
261
  def self.is_dtype_sequence(val)
235
- val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
262
+ val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
236
263
  end
237
264
 
238
265
  def self.is_int_sequence(val)
239
- val.is_a?(Array) && _is_iterable_of(val, Integer)
266
+ val.is_a?(::Array) && _is_iterable_of(val, Integer)
240
267
  end
241
268
 
242
269
  def self.is_expr_sequence(val)
243
- val.is_a?(Array) && _is_iterable_of(val, Expr)
270
+ val.is_a?(::Array) && _is_iterable_of(val, Expr)
244
271
  end
245
272
 
246
273
  def self.is_rbexpr_sequence(val)
247
- val.is_a?(Array) && _is_iterable_of(val, RbExpr)
274
+ val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
248
275
  end
249
276
 
250
277
  def self.is_str_sequence(val, allow_str: false)
251
278
  if allow_str == false && val.is_a?(String)
252
279
  false
253
280
  else
254
- val.is_a?(Array) && _is_iterable_of(val, String)
281
+ val.is_a?(::Array) && _is_iterable_of(val, String)
255
282
  end
256
283
  end
257
284
 
258
285
  def self.local_file?(file)
259
286
  Dir.glob(file).any?
260
287
  end
288
+
289
+ def self.parse_as_expression(input, str_as_lit: false, structify: false)
290
+ if input.is_a?(Expr)
291
+ expr = input
292
+ elsif input.is_a?(String) && !str_as_lit
293
+ expr = Polars.col(input)
294
+ structify = false
295
+ elsif [Integer, Float, String, Series, ::Date, ::Time, ::DateTime].any? { |cls| input.is_a?(cls) } || input.nil?
296
+ expr = Polars.lit(input)
297
+ structify = false
298
+ elsif input.is_a?(Array)
299
+ expr = Polars.lit(Polars::Series.new("", [input]))
300
+ structify = false
301
+ else
302
+ raise TypeError, "did not expect value #{input} of type #{input.class.name}, maybe disambiguate with pl.lit or pl.col"
303
+ end
304
+
305
+ if structify
306
+ raise Todo
307
+ end
308
+
309
+ expr._rbexpr
310
+ end
261
311
  end
262
312
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.5.0"
3
+ VERSION = "0.6.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -6,11 +6,14 @@ rescue LoadError
6
6
  end
7
7
 
8
8
  # stdlib
9
+ require "bigdecimal"
9
10
  require "date"
10
11
  require "stringio"
11
12
 
12
13
  # modules
13
14
  require_relative "polars/expr_dispatch"
15
+ require_relative "polars/array_expr"
16
+ require_relative "polars/array_name_space"
14
17
  require_relative "polars/batched_csv_reader"
15
18
  require_relative "polars/binary_expr"
16
19
  require_relative "polars/binary_name_space"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-16 00:00:00.000000000 Z
11
+ date: 2023-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -48,6 +48,7 @@ files:
48
48
  - ext/polars/src/dataframe.rs
49
49
  - ext/polars/src/error.rs
50
50
  - ext/polars/src/expr.rs
51
+ - ext/polars/src/expr/array.rs
51
52
  - ext/polars/src/expr/binary.rs
52
53
  - ext/polars/src/expr/categorical.rs
53
54
  - ext/polars/src/expr/datetime.rs
@@ -79,6 +80,8 @@ files:
79
80
  - ext/polars/src/utils.rs
80
81
  - lib/polars-df.rb
81
82
  - lib/polars.rb
83
+ - lib/polars/array_expr.rb
84
+ - lib/polars/array_name_space.rb
82
85
  - lib/polars/batched_csv_reader.rb
83
86
  - lib/polars/binary_expr.rb
84
87
  - lib/polars/binary_name_space.rb