polars-df 0.11.0-x86_64-linux-musl → 0.13.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +22 -0
  3. data/Cargo.lock +428 -450
  4. data/LICENSE-THIRD-PARTY.txt +2502 -2242
  5. data/lib/polars/3.1/polars.so +0 -0
  6. data/lib/polars/3.2/polars.so +0 -0
  7. data/lib/polars/3.3/polars.so +0 -0
  8. data/lib/polars/array_expr.rb +4 -4
  9. data/lib/polars/batched_csv_reader.rb +2 -2
  10. data/lib/polars/cat_expr.rb +0 -36
  11. data/lib/polars/cat_name_space.rb +0 -37
  12. data/lib/polars/data_frame.rb +93 -101
  13. data/lib/polars/data_types.rb +1 -1
  14. data/lib/polars/date_time_expr.rb +525 -573
  15. data/lib/polars/date_time_name_space.rb +263 -464
  16. data/lib/polars/dynamic_group_by.rb +3 -3
  17. data/lib/polars/exceptions.rb +3 -0
  18. data/lib/polars/expr.rb +367 -330
  19. data/lib/polars/expr_dispatch.rb +1 -1
  20. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  21. data/lib/polars/functions/as_datatype.rb +63 -40
  22. data/lib/polars/functions/lazy.rb +63 -14
  23. data/lib/polars/functions/lit.rb +1 -1
  24. data/lib/polars/functions/range/date_range.rb +18 -77
  25. data/lib/polars/functions/range/datetime_range.rb +4 -4
  26. data/lib/polars/functions/range/int_range.rb +2 -2
  27. data/lib/polars/functions/range/time_range.rb +4 -4
  28. data/lib/polars/functions/repeat.rb +1 -1
  29. data/lib/polars/functions/whenthen.rb +1 -1
  30. data/lib/polars/io/csv.rb +8 -8
  31. data/lib/polars/io/ipc.rb +35 -7
  32. data/lib/polars/io/json.rb +13 -2
  33. data/lib/polars/io/ndjson.rb +15 -4
  34. data/lib/polars/io/parquet.rb +15 -8
  35. data/lib/polars/lazy_frame.rb +123 -105
  36. data/lib/polars/lazy_group_by.rb +1 -1
  37. data/lib/polars/list_expr.rb +11 -11
  38. data/lib/polars/list_name_space.rb +5 -1
  39. data/lib/polars/rolling_group_by.rb +5 -7
  40. data/lib/polars/series.rb +108 -191
  41. data/lib/polars/string_expr.rb +51 -76
  42. data/lib/polars/string_name_space.rb +5 -4
  43. data/lib/polars/testing.rb +2 -2
  44. data/lib/polars/utils/constants.rb +9 -0
  45. data/lib/polars/utils/convert.rb +97 -0
  46. data/lib/polars/utils/parse.rb +89 -0
  47. data/lib/polars/utils/various.rb +76 -0
  48. data/lib/polars/utils/wrap.rb +19 -0
  49. data/lib/polars/utils.rb +4 -330
  50. data/lib/polars/version.rb +1 -1
  51. data/lib/polars/whenthen.rb +6 -6
  52. data/lib/polars.rb +11 -0
  53. metadata +7 -2
data/lib/polars/utils.rb CHANGED
@@ -3,150 +3,6 @@ module Polars
3
3
  module Utils
4
4
  DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"]
5
5
 
6
- def self.wrap_s(s)
7
- Series._from_rbseries(s)
8
- end
9
-
10
- def self.wrap_df(df)
11
- DataFrame._from_rbdf(df)
12
- end
13
-
14
- def self.wrap_ldf(ldf)
15
- LazyFrame._from_rbldf(ldf)
16
- end
17
-
18
- def self.wrap_expr(rbexpr)
19
- Expr._from_rbexpr(rbexpr)
20
- end
21
-
22
- def self.col(name)
23
- Polars.col(name)
24
- end
25
-
26
- def self.arrlen(obj)
27
- if obj.is_a?(Range)
28
- # size only works for numeric ranges
29
- obj.to_a.length
30
- elsif obj.is_a?(::String)
31
- nil
32
- else
33
- obj.length
34
- end
35
- rescue
36
- nil
37
- end
38
-
39
- def self._timedelta_to_pl_duration(td)
40
- td
41
- end
42
-
43
- def self._datetime_to_pl_timestamp(dt, time_unit)
44
- dt = dt.to_datetime.to_time
45
- if time_unit == "ns"
46
- nanos = dt.nsec
47
- dt.to_i * 1_000_000_000 + nanos
48
- elsif time_unit == "us"
49
- micros = dt.usec
50
- dt.to_i * 1_000_000 + micros
51
- elsif time_unit == "ms"
52
- millis = dt.usec / 1000
53
- dt.to_i * 1_000 + millis
54
- elsif time_unit.nil?
55
- # Ruby has ns precision
56
- nanos = dt.nsec
57
- dt.to_i * 1_000_000_000 + nanos
58
- else
59
- raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
60
- end
61
- end
62
-
63
- def self._date_to_pl_date(d)
64
- dt = d.to_datetime.to_time
65
- dt.to_i / (3600 * 24)
66
- end
67
-
68
- def self._to_ruby_time(value)
69
- if value == 0
70
- ::Time.utc(2000, 1, 1)
71
- else
72
- seconds, nanoseconds = value.divmod(1_000_000_000)
73
- minutes, seconds = seconds.divmod(60)
74
- hours, minutes = minutes.divmod(60)
75
- ::Time.utc(2000, 1, 1, hours, minutes, seconds, nanoseconds / 1000.0)
76
- end
77
- end
78
-
79
- def self._to_ruby_duration(value, time_unit = "ns")
80
- if time_unit == "ns"
81
- value / 1e9
82
- elsif time_unit == "us"
83
- value / 1e6
84
- elsif time_unit == "ms"
85
- value / 1e3
86
- else
87
- raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
88
- end
89
- end
90
-
91
- def self._to_ruby_date(value)
92
- # days to seconds
93
- # important to create from utc. Not doing this leads
94
- # to inconsistencies dependent on the timezone you are in.
95
- ::Time.at(value * 86400).utc.to_date
96
- end
97
-
98
- def self._to_ruby_datetime(value, time_unit = "ns", time_zone = nil)
99
- if time_zone.nil? || time_zone == ""
100
- if time_unit == "ns"
101
- return ::Time.at(value / 1000000000, value % 1000000000, :nsec).utc
102
- elsif time_unit == "us"
103
- return ::Time.at(value / 1000000, value % 1000000, :usec).utc
104
- elsif time_unit == "ms"
105
- return ::Time.at(value / 1000, value % 1000, :millisecond).utc
106
- else
107
- raise ArgumentError, "time_unit must be one of {{'ns', 'us', 'ms'}}, got #{time_unit}"
108
- end
109
- else
110
- raise Todo
111
- end
112
- end
113
-
114
- def self._to_ruby_decimal(digits, scale)
115
- BigDecimal("#{digits}e#{scale}")
116
- end
117
-
118
- def self.selection_to_rbexpr_list(exprs)
119
- if exprs.is_a?(::String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
120
- exprs = [exprs]
121
- end
122
-
123
- exprs.map { |e| expr_to_lit_or_expr(e, str_to_lit: false)._rbexpr }
124
- end
125
-
126
- def self.expr_to_lit_or_expr(expr, str_to_lit: true)
127
- if (expr.is_a?(::String) || expr.is_a?(Symbol)) && !str_to_lit
128
- col(expr)
129
- elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(::String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
130
- lit(expr)
131
- elsif expr.is_a?(Expr)
132
- expr
133
- else
134
- raise ArgumentError, "did not expect value #{expr} of type #{expr.class.name}, maybe disambiguate with Polars.lit or Polars.col"
135
- end
136
- end
137
-
138
- def self.lit(value)
139
- Polars.lit(value)
140
- end
141
-
142
- def self.normalize_filepath(path, check_not_directory: true)
143
- path = File.expand_path(path)
144
- if check_not_directory && File.exist?(path) && Dir.exist?(path)
145
- raise ArgumentError, "Expected a file path; #{path} is a directory"
146
- end
147
- path
148
- end
149
-
150
6
  # TODO fix
151
7
  def self.is_polars_dtype(data_type, include_unknown: false)
152
8
  if data_type == Unknown
@@ -191,19 +47,11 @@ module Polars
191
47
  end
192
48
  end
193
49
 
194
- def self._process_null_values(null_values)
195
- if null_values.is_a?(Hash)
196
- null_values.to_a
197
- else
198
- null_values
199
- end
200
- end
201
-
202
- def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0)
203
- if !row_count_name.nil?
204
- [row_count_name, row_count_offset]
205
- else
50
+ def self.parse_row_index_args(row_index_name = nil, row_index_offset = 0)
51
+ if row_index_name.nil?
206
52
  nil
53
+ else
54
+ [row_index_name, row_index_offset]
207
55
  end
208
56
  end
209
57
 
@@ -223,21 +71,6 @@ module Polars
223
71
  [projection, columns]
224
72
  end
225
73
 
226
- def self.scale_bytes(sz, to:)
227
- scaling_factor = {
228
- "b" => 1,
229
- "k" => 1024,
230
- "m" => 1024 ** 2,
231
- "g" => 1024 ** 3,
232
- "t" => 1024 ** 4
233
- }[to[0]]
234
- if scaling_factor > 1
235
- sz / scaling_factor.to_f
236
- else
237
- sz
238
- end
239
- end
240
-
241
74
  def self.bool?(value)
242
75
  value.is_a?(TrueClass) || value.is_a?(FalseClass)
243
76
  end
@@ -250,108 +83,10 @@ module Polars
250
83
  value.is_a?(::String) || (defined?(Pathname) && value.is_a?(Pathname))
251
84
  end
252
85
 
253
- def self._is_iterable_of(val, eltype)
254
- val.all? { |x| x.is_a?(eltype) }
255
- end
256
-
257
- def self.is_bool_sequence(val)
258
- val.is_a?(::Array) && val.all? { |x| x == true || x == false }
259
- end
260
-
261
- def self.is_dtype_sequence(val)
262
- val.is_a?(::Array) && val.all? { |x| is_polars_dtype(x) }
263
- end
264
-
265
- def self.is_int_sequence(val)
266
- val.is_a?(::Array) && _is_iterable_of(val, Integer)
267
- end
268
-
269
- def self.is_expr_sequence(val)
270
- val.is_a?(::Array) && _is_iterable_of(val, Expr)
271
- end
272
-
273
- def self.is_rbexpr_sequence(val)
274
- val.is_a?(::Array) && _is_iterable_of(val, RbExpr)
275
- end
276
-
277
- def self.is_str_sequence(val, allow_str: false)
278
- if allow_str == false && val.is_a?(::String)
279
- false
280
- else
281
- val.is_a?(::Array) && _is_iterable_of(val, ::String)
282
- end
283
- end
284
-
285
86
  def self.local_file?(file)
286
87
  Dir.glob(file).any?
287
88
  end
288
89
 
289
- def self.parse_as_list_of_expressions(*inputs, __structify: false, **named_inputs)
290
- exprs = _parse_positional_inputs(inputs, structify: __structify)
291
- if named_inputs.any?
292
- named_exprs = _parse_named_inputs(named_inputs, structify: __structify)
293
- exprs.concat(named_exprs)
294
- end
295
-
296
- exprs
297
- end
298
-
299
- def self._parse_positional_inputs(inputs, structify: false)
300
- inputs_iter = _parse_inputs_as_iterable(inputs)
301
- inputs_iter.map { |e| parse_as_expression(e, structify: structify) }
302
- end
303
-
304
- def self._parse_inputs_as_iterable(inputs)
305
- if inputs.empty?
306
- return []
307
- end
308
-
309
- if inputs.length == 1 && inputs[0].is_a?(::Array)
310
- return inputs[0]
311
- end
312
-
313
- inputs
314
- end
315
-
316
- def self._parse_named_inputs(named_inputs, structify: false)
317
- named_inputs.map do |name, input|
318
- parse_as_expression(input, structify: structify)._alias(name.to_s)
319
- end
320
- end
321
-
322
- def self.parse_as_expression(input, str_as_lit: false, list_as_lit: true, structify: false, dtype: nil)
323
- if input.is_a?(Expr)
324
- expr = input
325
- elsif input.is_a?(::String) && !str_as_lit
326
- expr = Polars.col(input)
327
- structify = false
328
- elsif input.is_a?(::Array) && !list_as_lit
329
- expr = Polars.lit(Series.new(input), dtype: dtype)
330
- structify = false
331
- else
332
- expr = Polars.lit(input, dtype: dtype)
333
- structify = false
334
- end
335
-
336
- if structify
337
- raise Todo
338
- end
339
-
340
- expr._rbexpr
341
- end
342
-
343
- USE_EARLIEST_TO_AMBIGUOUS = {
344
- true => "earliest",
345
- false => "latest"
346
- }
347
-
348
- def self.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
349
- unless use_earliest.nil?
350
- ambiguous = USE_EARLIEST_TO_AMBIGUOUS.fetch(use_earliest)
351
- end
352
- ambiguous
353
- end
354
-
355
90
  def self._check_arg_is_1byte(arg_name, arg, can_be_empty = false)
356
91
  if arg.is_a?(::String)
357
92
  arg_byte_length = arg.bytesize
@@ -385,72 +120,11 @@ module Polars
385
120
  false
386
121
  end
387
122
 
388
- def self.parse_predicates_constraints_as_expression(*predicates, **constraints)
389
- all_predicates = _parse_positional_inputs(predicates)
390
-
391
- if constraints.any?
392
- constraint_predicates = _parse_constraints(constraints)
393
- all_predicates.concat(constraint_predicates)
394
- end
395
-
396
- _combine_predicates(all_predicates)
397
- end
398
-
399
- def self._parse_constraints(constraints)
400
- constraints.map do |name, value|
401
- Polars.col(name).eq(value)._rbexpr
402
- end
403
- end
404
-
405
- def self._combine_predicates(predicates)
406
- if !predicates.any?
407
- msg = "at least one predicate or constraint must be provided"
408
- raise TypeError, msg
409
- end
410
-
411
- if predicates.length == 1
412
- return predicates[0]
413
- end
414
-
415
- Plr.all_horizontal(predicates)
416
- end
417
-
418
- def self.parse_when_inputs(*predicates, **constraints)
419
- parse_predicates_constraints_as_expression(*predicates, **constraints)
420
- end
421
-
422
123
  def self.parse_interval_argument(interval)
423
124
  if interval.include?(" ")
424
125
  interval = interval.gsub(" ", "")
425
126
  end
426
127
  interval.downcase
427
128
  end
428
-
429
- def self.validate_rolling_by_aggs_arguments(weights, center:)
430
- if !weights.nil?
431
- msg = "`weights` is not supported in `rolling_*(..., by=...)` expression"
432
- raise InvalidOperationError, msg
433
- end
434
- if center
435
- msg = "`center=True` is not supported in `rolling_*(..., by=...)` expression"
436
- raise InvalidOperationError, msg
437
- end
438
- end
439
-
440
- def self.validate_rolling_aggs_arguments(window_size, closed)
441
- if window_size.is_a?(::String)
442
- begin
443
- window_size = window_size.delete_suffix("i").to_i
444
- rescue
445
- msg = "Expected a string of the form 'ni', where `n` is a positive integer, got: #{window_size}"
446
- raise InvalidOperationError, msg
447
- end
448
- end
449
- if !closed.nil?
450
- msg = "`closed` is not supported in `rolling_*(...)` expression"
451
- raise InvalidOperationError, msg
452
- end
453
- window_size
454
- end
455
129
  end
456
130
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.11.0"
3
+ VERSION = "0.13.0"
4
4
  end
@@ -8,7 +8,7 @@ module Polars
8
8
  end
9
9
 
10
10
  def then(statement)
11
- statement_rbexpr = Utils.parse_as_expression(statement)
11
+ statement_rbexpr = Utils.parse_into_expression(statement)
12
12
  Then.new(_when.then(statement_rbexpr))
13
13
  end
14
14
  end
@@ -30,12 +30,12 @@ module Polars
30
30
  end
31
31
 
32
32
  def when(*predicates, **constraints)
33
- condition_rbexpr = Utils.parse_when_inputs(*predicates, **constraints)
33
+ condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
34
34
  ChainedWhen.new(_then.when(condition_rbexpr))
35
35
  end
36
36
 
37
37
  def otherwise(statement)
38
- statement_rbexpr = Utils.parse_as_expression(statement)
38
+ statement_rbexpr = Utils.parse_into_expression(statement)
39
39
  Utils.wrap_expr(_then.otherwise(statement_rbexpr))
40
40
  end
41
41
  end
@@ -49,7 +49,7 @@ module Polars
49
49
  end
50
50
 
51
51
  def then(statement)
52
- statement_rbexpr = Utils.parse_as_expression(statement)
52
+ statement_rbexpr = Utils.parse_into_expression(statement)
53
53
  ChainedThen.new(_chained_when.then(statement_rbexpr))
54
54
  end
55
55
  end
@@ -71,12 +71,12 @@ module Polars
71
71
  end
72
72
 
73
73
  def when(*predicates, **constraints)
74
- condition_rbexpr = Utils.parse_when_inputs(*predicates, **constraints)
74
+ condition_rbexpr = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
75
75
  ChainedWhen.new(_chained_then.when(condition_rbexpr))
76
76
  end
77
77
 
78
78
  def otherwise(statement)
79
- statement_rbexpr = Utils.parse_as_expression(statement)
79
+ statement_rbexpr = Utils.parse_into_expression(statement)
80
80
  Utils.wrap_expr(_chained_then.otherwise(statement_rbexpr))
81
81
  end
82
82
  end
data/lib/polars.rb CHANGED
@@ -70,6 +70,11 @@ require_relative "polars/struct_expr"
70
70
  require_relative "polars/struct_name_space"
71
71
  require_relative "polars/testing"
72
72
  require_relative "polars/utils"
73
+ require_relative "polars/utils/constants"
74
+ require_relative "polars/utils/convert"
75
+ require_relative "polars/utils/parse"
76
+ require_relative "polars/utils/various"
77
+ require_relative "polars/utils/wrap"
73
78
  require_relative "polars/version"
74
79
  require_relative "polars/whenthen"
75
80
 
@@ -77,4 +82,10 @@ module Polars
77
82
  extend Convert
78
83
  extend Functions
79
84
  extend IO
85
+
86
+ # @private
87
+ F = self
88
+
89
+ # @private
90
+ N_INFER_DEFAULT = 100
80
91
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.13.0
5
5
  platform: x86_64-linux-musl
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-03 00:00:00.000000000 Z
11
+ date: 2024-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -102,6 +102,11 @@ files:
102
102
  - lib/polars/struct_name_space.rb
103
103
  - lib/polars/testing.rb
104
104
  - lib/polars/utils.rb
105
+ - lib/polars/utils/constants.rb
106
+ - lib/polars/utils/convert.rb
107
+ - lib/polars/utils/parse.rb
108
+ - lib/polars/utils/various.rb
109
+ - lib/polars/utils/wrap.rb
105
110
  - lib/polars/version.rb
106
111
  - lib/polars/whenthen.rb
107
112
  homepage: https://github.com/ankane/ruby-polars