polars-df 0.11.0-x86_64-linux → 0.12.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +360 -361
  4. data/LICENSE-THIRD-PARTY.txt +1065 -878
  5. data/lib/polars/3.1/polars.so +0 -0
  6. data/lib/polars/3.2/polars.so +0 -0
  7. data/lib/polars/3.3/polars.so +0 -0
  8. data/lib/polars/array_expr.rb +4 -4
  9. data/lib/polars/batched_csv_reader.rb +2 -2
  10. data/lib/polars/cat_expr.rb +0 -36
  11. data/lib/polars/cat_name_space.rb +0 -37
  12. data/lib/polars/data_frame.rb +93 -101
  13. data/lib/polars/data_types.rb +1 -1
  14. data/lib/polars/date_time_expr.rb +525 -573
  15. data/lib/polars/date_time_name_space.rb +263 -464
  16. data/lib/polars/dynamic_group_by.rb +3 -3
  17. data/lib/polars/exceptions.rb +3 -0
  18. data/lib/polars/expr.rb +367 -330
  19. data/lib/polars/expr_dispatch.rb +1 -1
  20. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  21. data/lib/polars/functions/as_datatype.rb +63 -40
  22. data/lib/polars/functions/lazy.rb +63 -14
  23. data/lib/polars/functions/lit.rb +1 -1
  24. data/lib/polars/functions/range/date_range.rb +18 -77
  25. data/lib/polars/functions/range/datetime_range.rb +4 -4
  26. data/lib/polars/functions/range/int_range.rb +2 -2
  27. data/lib/polars/functions/range/time_range.rb +4 -4
  28. data/lib/polars/functions/repeat.rb +1 -1
  29. data/lib/polars/functions/whenthen.rb +1 -1
  30. data/lib/polars/io/csv.rb +8 -8
  31. data/lib/polars/io/ipc.rb +3 -3
  32. data/lib/polars/io/json.rb +13 -2
  33. data/lib/polars/io/ndjson.rb +15 -4
  34. data/lib/polars/io/parquet.rb +5 -4
  35. data/lib/polars/lazy_frame.rb +120 -106
  36. data/lib/polars/lazy_group_by.rb +1 -1
  37. data/lib/polars/list_expr.rb +11 -11
  38. data/lib/polars/list_name_space.rb +5 -1
  39. data/lib/polars/rolling_group_by.rb +5 -7
  40. data/lib/polars/series.rb +105 -189
  41. data/lib/polars/string_expr.rb +42 -67
  42. data/lib/polars/string_name_space.rb +5 -4
  43. data/lib/polars/testing.rb +2 -2
  44. data/lib/polars/utils/constants.rb +9 -0
  45. data/lib/polars/utils/convert.rb +97 -0
  46. data/lib/polars/utils/parse.rb +89 -0
  47. data/lib/polars/utils/various.rb +76 -0
  48. data/lib/polars/utils/wrap.rb +19 -0
  49. data/lib/polars/utils.rb +4 -330
  50. data/lib/polars/version.rb +1 -1
  51. data/lib/polars/whenthen.rb +6 -6
  52. data/lib/polars.rb +11 -0
  53. metadata +7 -2
data/lib/polars/utils.rb CHANGED
@@ -3,150 +3,6 @@ module Polars
3
3
  module Utils
4
4
  DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
5
5
 
6
- def self.wrap_s(s)
7
- Series._from_rbseries(s)
8
- end
9
-
10
- def self.wrap_df(df)
11
- DataFrame._from_rbdf(df)
12
- end
13
-
14
- def self.wrap_ldf(ldf)
15
- LazyFrame._from_rbldf(ldf)
16
- end
17
-
18
- def self.wrap_expr(rbexpr)
19
- Expr._from_rbexpr(rbexpr)
20
- end
21
-
22
- def self.col(name)
23
- Polars.col(name)
24
- end
25
-
26
- def self.arrlen(obj)
27
- if obj.is_a?(Range)
28
- # size only works for numeric ranges
29
- obj.to_a.length
30
- elsif obj.is_a?(::String)
31
- nil
32
- else
33
- obj.length
34
- end
35
- rescue
36
- nil
37
- end
38
-
39
- def self._timedelta_to_pl_duration(td)
40
- td
41
- end
42
-
43
- def self._datetime_to_pl_timestamp(dt, time_unit)
44
- dt = dt.to_datetime.to_time
45
- if time_unit == "ns"
46
- nanos = dt.nsec
47
- dt.to_i * 1_000_000_000 + nanos
48
- elsif time_unit == "us"
49
- micros = dt.usec
50
- dt.to_i * 1_000_000 + micros
51
- elsif time_unit == "ms"
52
- millis = dt.usec / 1000
53
- dt.to_i * 1_000 + millis
54
- elsif time_unit.nil?
55
- # Ruby has ns precision
56
- nanos = dt.nsec
57
- dt.to_i * 1_000_000_000 + nanos
58
- else
59
- raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
60
- end
61
- end
62
-
63
- def self._date_to_pl_date(d)
64
- dt = d.to_datetime.to_time
65
- dt.to_i / (3600 * 24)
66
- end
67
-
68
- def self._to_ruby_time(value)
69
- if value == 0
70
- ::Time.utc(2000, 1, 1)
71
- else
72
- seconds, nanoseconds = value.divmod(1_000_000_000)
73
- minutes, seconds = seconds.divmod(60)
74
- hours, minutes = minutes.divmod(60)
75
- ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
76
- end
77
- end
78
-
79
- def self._to_ruby_duration(value, time_unit = "ns")
80
- if time_unit == "ns"
81
- value / 1e9
82
- elsif time_unit == "us"
83
- value / 1e6
84
- elsif time_unit == "ms"
85
- value / 1e3
86
- else
87
- raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
88
- end
89
- end
90
-
91
- def self._to_ruby_date(value)
92
- # days to seconds
93
- # important to create from utc. Not doing this leads
94
- # to inconsistencies dependent on the timezone you are in.
95
- ::Time.at(value * 86400).utc.to_date
96
- end
97
-
98
- def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
99
- if time_zone.nil? || time_zone == ""
100
- if time_unit == "ns"
101
- return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
102
- elsif time_unit == "us"
103
- return ::Time.at(value / 1000000, value % 1000000, :usec).utc
104
- elsif time_unit == "ms"
105
- return ::Time.at(value / 1000, value % 1000, :millisecond).utc
106
- else
107
- raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
108
- end
109
- else
110
- raise Todo
111
- end
112
- end
113
-
114
- def self._to_ruby_decimal(digits, scale)
115
- BigDecimal("#{digits}e#{scale}")
116
- end
117
-
118
- def self.selection_to_rbexpr_list(exprs)
119
- if exprs.is_a?(::String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
120
- exprs = [exprs]
121
- end
122
-
123
- exprs.map { |e| expr_to_lit_or_expr(e, str_to_lit: false)._rbexpr }
124
- end
125
-
126
- def self.expr_to_lit_or_expr(expr, str_to_lit: true)
127
- if (expr.is_a?(::String) || expr.is_a?(Symbol)) && !str_to_lit
128
- col(expr)
129
- elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(::String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
130
- lit(expr)
131
- elsif expr.is_a?(Expr)
132
- expr
133
- else
134
- raise ArgumentError, "did not expect value #{expr} of type #{expr.class.name}, maybe disambiguate with Polars.lit or Polars.col"
135
- end
136
- end
137
-
138
- def self.lit(value)
139
- Polars.lit(value)
140
- end
141
-
142
- def self.normalize_filepath(path, check_not_directory: true)
143
- path = File.expand_path(path)
144
- if check_not_directory && File.exist?(path) && Dir.exist?(path)
145
- raise ArgumentError, "Expected a file path; #{path} is a directory"
146
- end
147
- path
148
- end
149
-
150
6
  # TODO fix
151
7
  def self.is_polars_dtype(data_type, include_unknown: false)
152
8
  if data_type == Unknown
@@ -191,19 +47,11 @@ module Polars
191
47
  end
192
48
  end
193
49
 
194
- def self._process_null_values(null_values)
195
- if null_values.is_a?(Hash)
196
- null_values.to_a
197
- else
198
- null_values
199
- end
200
- end
201
-
202
- def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
203
- if !row_count_name.nil?
204
- [row_count_name, row_count_offset]
205
- else
50
+ def self.parse_row_index_args(row_index_name = nil, row_index_offset = 0)
51
+ if row_index_name.nil?
206
52
  nil
53
+ else
54
+ [row_index_name, row_index_offset]
207
55
  end
208
56
  end
209
57
 
@@ -223,21 +71,6 @@ module Polars
223
71
  [projection, columns]
224
72
  end
225
73
 
226
- def self.scale_bytes(sz, to:)
227
- scaling_factor = {
228
- "b" => 1,
229
- "k" => 1024,
230
- "m" => 1024 ** 2,
231
- "g" => 1024 ** 3,
232
- "t" => 1024 ** 4
233
- }[to[0]]
234
- if scaling_factor > 1
235
- sz / scaling_factor.to_f
236
- else
237
- sz
238
- end
239
- end
240
-
241
74
  def self.bool?(value)
242
75
  value.is_a?(TrueClass) || value.is_a?(FalseClass)
243
76
  end
@@ -250,108 +83,10 @@ module Polars
250
83
  value.is_a?(::String) || (defined?(Pathname) && value.is_a?(Pathname))
251
84
  end
252
85
 
253
- def self._is_iterable_of(val, eltype)
254
- val.all? { |x| x.is_a?(eltype) }
255
- end
256
-
257
- def self.is_bool_sequence(val)
258
- val.is_a?(::Array) && val.all? { |x| x == true || x == false }
259
- end
260
-
261
- def self.is_dtype_sequence(val)
262
- val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
263
- end
264
-
265
- def self.is_int_sequence(val)
266
- val.is_a?(::Array) && _is_iterable_of(val, Integer)
267
- end
268
-
269
- def self.is_expr_sequence(val)
270
- val.is_a?(::Array) && _is_iterable_of(val, Expr)
271
- end
272
-
273
- def self.is_rbexpr_sequence(val)
274
- val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
275
- end
276
-
277
- def self.is_str_sequence(val, allow_str: false)
278
- if allow_str == false && val.is_a?(::String)
279
- false
280
- else
281
- val.is_a?(::Array) && _is_iterable_of(val, ::String)
282
- end
283
- end
284
-
285
86
  def self.local_file?(file)
286
87
  Dir.glob(file).any?
287
88
  end
288
89
 
289
- def self.parse_as_list_of_expressions(*inputs, __structify: false, **named_inputs)
290
- exprs = _parse_positional_inputs(inputs, structify: __structify)
291
- if named_inputs.any?
292
- named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
293
- exprs.concat(named_exprs)
294
- end
295
-
296
- exprs
297
- end
298
-
299
- def self._parse_positional_inputs(inputs, structify: false)
300
- inputs_iter = _parse_inputs_as_iterable(inputs)
301
- inputs_iter.map { |e| parse_as_expression(e, structify: structify) }
302
- end
303
-
304
- def self._parse_inputs_as_iterable(inputs)
305
- if inputs.empty?
306
- return []
307
- end
308
-
309
- if inputs.length == 1 && inputs[0].is_a?(::Array)
310
- return inputs[0]
311
- end
312
-
313
- inputs
314
- end
315
-
316
- def self._parse_named_inputs(named_inputs, structify: false)
317
- named_inputs.map do |name, input|
318
- parse_as_expression(input, structify: structify)._alias(name.to_s)
319
- end
320
- end
321
-
322
- def self.parse_as_expression(input, str_as_lit: false, list_as_lit: true, structify: false, dtype: nil)
323
- if input.is_a?(Expr)
324
- expr = input
325
- elsif input.is_a?(::String) && !str_as_lit
326
- expr = Polars.col(input)
327
- structify = false
328
- elsif input.is_a?(::Array) && !list_as_lit
329
- expr = Polars.lit(Series.new(input), dtype: dtype)
330
- structify = false
331
- else
332
- expr = Polars.lit(input, dtype: dtype)
333
- structify = false
334
- end
335
-
336
- if structify
337
- raise Todo
338
- end
339
-
340
- expr._rbexpr
341
- end
342
-
343
- USE_EARLIEST_TO_AMBIGUOUS = {
344
- true => "earliest",
345
- false => "latest"
346
- }
347
-
348
- def self.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
349
- unless use_earliest.nil?
350
- ambiguous = USE_EARLIEST_TO_AMBIGUOUS.fetch(use_earliest)
351
- end
352
- ambiguous
353
- end
354
-
355
90
  def self._check_arg_is_1byte(arg_name, arg, can_be_empty = false)
356
91
  if arg.is_a?(::String)
357
92
  arg_byte_length = arg.bytesize
@@ -385,72 +120,11 @@ module Polars
385
120
  false
386
121
  end
387
122
 
388
- def self.parse_predicates_constraints_as_expression(*predicates, **constraints)
389
- all_predicates = _parse_positional_inputs(predicates)
390
-
391
- if constraints.any?
392
- constraint_predicates = _parse_constraints(constraints)
393
- all_predicates.concat(constraint_predicates)
394
- end
395
-
396
- _combine_predicates(all_predicates)
397
- end
398
-
399
- def self._parse_constraints(constraints)
400
- constraints.map do |name, value|
401
- Polars.col(name).eq(value)._rbexpr
402
- end
403
- end
404
-
405
- def self._combine_predicates(predicates)
406
- if !predicates.any?
407
- msg = "at least one predicate or constraint must be provided"
408
- raise TypeError, msg
409
- end
410
-
411
- if predicates.length == 1
412
- return predicates[0]
413
- end
414
-
415
- Plr.all_horizontal(predicates)
416
- end
417
-
418
- def self.parse_when_inputs(*predicates, **constraints)
419
- parse_predicates_constraints_as_expression(*predicates, **constraints)
420
- end
421
-
422
123
  def self.parse_interval_argument(interval)
423
124
  if interval.include?(" ")
424
125
  interval = interval.gsub(" ", "")
425
126
  end
426
127
  interval.downcase
427
128
  end
428
-
429
- def self.validate_rolling_by_aggs_arguments(weights, center:)
430
- if !weights.nil?
431
- msg = "`weights` is not supported in `rolling_*(..., by=...)` expression"
432
- raise InvalidOperationError, msg
433
- end
434
- if center
435
- msg = "`center=True` is not supported in `rolling_*(..., by=...)` expression"
436
- raise InvalidOperationError, msg
437
- end
438
- end
439
-
440
- def self.validate_rolling_aggs_arguments(window_size, closed)
441
- if window_size.is_a?(::String)
442
- begin
443
- window_size = window_size.delete_suffix("i").to_i
444
- rescue
445
- msg = "Expected a string of the form 'ni', where `n` is a positive integer, got: #{window_size}"
446
- raise InvalidOperationError, msg
447
- end
448
- end
449
- if !closed.nil?
450
- msg = "`closed` is not supported in `rolling_*(...)` expression"
451
- raise InvalidOperationError, msg
452
- end
453
- window_size
454
- end
455
129
  end
456
130
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.11.0"
3
+ VERSION = "0.12.0"
4
4
  end
@@ -8,7 +8,7 @@ module Polars
8
8
  end
9
9
 
10
10
  def then(statement)
11
- statement_rbexpr = Utils.parse_as_expression(statement)
11
+ statement_rbexpr = Utils.parse_into_expression(statement)
12
12
  Then.new(_when.then(statement_rbexpr))
13
13
  end
14
14
  end
@@ -30,12 +30,12 @@ module Polars
30
30
  end
31
31
 
32
32
  def when(*predicates, **constraints)
33
- condition_rbexpr = Utils.parse_when_inputs(*predicates, **constraints)
33
+ condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
34
34
  ChainedWhen.new(_then.when(condition_rbexpr))
35
35
  end
36
36
 
37
37
  def otherwise(statement)
38
- statement_rbexpr = Utils.parse_as_expression(statement)
38
+ statement_rbexpr = Utils.parse_into_expression(statement)
39
39
  Utils.wrap_expr(_then.otherwise(statement_rbexpr))
40
40
  end
41
41
  end
@@ -49,7 +49,7 @@ module Polars
49
49
  end
50
50
 
51
51
  def then(statement)
52
- statement_rbexpr = Utils.parse_as_expression(statement)
52
+ statement_rbexpr = Utils.parse_into_expression(statement)
53
53
  ChainedThen.new(_chained_when.then(statement_rbexpr))
54
54
  end
55
55
  end
@@ -71,12 +71,12 @@ module Polars
71
71
  end
72
72
 
73
73
  def when(*predicates, **constraints)
74
- condition_rbexpr = Utils.parse_when_inputs(*predicates, **constraints)
74
+ condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
75
75
  ChainedWhen.new(_chained_then.when(condition_rbexpr))
76
76
  end
77
77
 
78
78
  def otherwise(statement)
79
- statement_rbexpr = Utils.parse_as_expression(statement)
79
+ statement_rbexpr = Utils.parse_into_expression(statement)
80
80
  Utils.wrap_expr(_chained_then.otherwise(statement_rbexpr))
81
81
  end
82
82
  end
data/lib/polars.rb CHANGED
@@ -70,6 +70,11 @@ require_relative "polars/struct_expr"
70
70
  require_relative "polars/struct_name_space"
71
71
  require_relative "polars/testing"
72
72
  require_relative "polars/utils"
73
+ require_relative "polars/utils/constants"
74
+ require_relative "polars/utils/convert"
75
+ require_relative "polars/utils/parse"
76
+ require_relative "polars/utils/various"
77
+ require_relative "polars/utils/wrap"
73
78
  require_relative "polars/version"
74
79
  require_relative "polars/whenthen"
75
80
 
@@ -77,4 +82,10 @@ module Polars
77
82
  extend Convert
78
83
  extend Functions
79
84
  extend IO
85
+
86
+ # @private
87
+ F = self
88
+
89
+ # @private
90
+ N_INFER_DEFAULT = 100
80
91
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-03 00:00:00.000000000 Z
11
+ date: 2024-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -102,6 +102,11 @@ files:
102
102
  - lib/polars/struct_name_space.rb
103
103
  - lib/polars/testing.rb
104
104
  - lib/polars/utils.rb
105
+ - lib/polars/utils/constants.rb
106
+ - lib/polars/utils/convert.rb
107
+ - lib/polars/utils/parse.rb
108
+ - lib/polars/utils/various.rb
109
+ - lib/polars/utils/wrap.rb
105
110
  - lib/polars/version.rb
106
111
  - lib/polars/whenthen.rb
107
112
  homepage: https://github.com/ankane/ruby-polars