polars-df 0.9.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -0
  3. data/Cargo.lock +144 -57
  4. data/README.md +7 -6
  5. data/ext/polars/Cargo.toml +10 -6
  6. data/ext/polars/src/batched_csv.rs +53 -50
  7. data/ext/polars/src/conversion/anyvalue.rs +3 -2
  8. data/ext/polars/src/conversion/mod.rs +31 -67
  9. data/ext/polars/src/dataframe/construction.rs +186 -0
  10. data/ext/polars/src/dataframe/export.rs +48 -0
  11. data/ext/polars/src/dataframe/general.rs +607 -0
  12. data/ext/polars/src/dataframe/io.rs +463 -0
  13. data/ext/polars/src/dataframe/mod.rs +26 -0
  14. data/ext/polars/src/expr/array.rs +6 -2
  15. data/ext/polars/src/expr/datetime.rs +13 -4
  16. data/ext/polars/src/expr/general.rs +50 -9
  17. data/ext/polars/src/expr/list.rs +6 -2
  18. data/ext/polars/src/expr/rolling.rs +185 -69
  19. data/ext/polars/src/expr/string.rs +12 -33
  20. data/ext/polars/src/file.rs +158 -11
  21. data/ext/polars/src/functions/lazy.rs +20 -3
  22. data/ext/polars/src/functions/range.rs +74 -0
  23. data/ext/polars/src/functions/whenthen.rs +47 -17
  24. data/ext/polars/src/interop/mod.rs +1 -0
  25. data/ext/polars/src/interop/numo/mod.rs +2 -0
  26. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  27. data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
  28. data/ext/polars/src/lazyframe/mod.rs +111 -56
  29. data/ext/polars/src/lib.rs +68 -34
  30. data/ext/polars/src/map/dataframe.rs +17 -9
  31. data/ext/polars/src/map/lazy.rs +5 -25
  32. data/ext/polars/src/map/series.rs +7 -1
  33. data/ext/polars/src/series/aggregation.rs +47 -30
  34. data/ext/polars/src/series/export.rs +131 -49
  35. data/ext/polars/src/series/mod.rs +13 -133
  36. data/lib/polars/array_expr.rb +6 -2
  37. data/lib/polars/batched_csv_reader.rb +11 -3
  38. data/lib/polars/convert.rb +6 -1
  39. data/lib/polars/data_frame.rb +225 -370
  40. data/lib/polars/date_time_expr.rb +11 -4
  41. data/lib/polars/date_time_name_space.rb +14 -4
  42. data/lib/polars/dynamic_group_by.rb +2 -2
  43. data/lib/polars/exceptions.rb +4 -0
  44. data/lib/polars/expr.rb +1171 -54
  45. data/lib/polars/functions/lazy.rb +3 -3
  46. data/lib/polars/functions/range/date_range.rb +92 -0
  47. data/lib/polars/functions/range/datetime_range.rb +149 -0
  48. data/lib/polars/functions/range/time_range.rb +141 -0
  49. data/lib/polars/functions/whenthen.rb +74 -5
  50. data/lib/polars/group_by.rb +88 -23
  51. data/lib/polars/io/avro.rb +24 -0
  52. data/lib/polars/{io.rb → io/csv.rb} +307 -489
  53. data/lib/polars/io/database.rb +73 -0
  54. data/lib/polars/io/ipc.rb +247 -0
  55. data/lib/polars/io/json.rb +18 -0
  56. data/lib/polars/io/ndjson.rb +69 -0
  57. data/lib/polars/io/parquet.rb +226 -0
  58. data/lib/polars/lazy_frame.rb +55 -195
  59. data/lib/polars/lazy_group_by.rb +100 -3
  60. data/lib/polars/list_expr.rb +6 -2
  61. data/lib/polars/rolling_group_by.rb +2 -2
  62. data/lib/polars/series.rb +14 -12
  63. data/lib/polars/string_expr.rb +38 -36
  64. data/lib/polars/utils.rb +89 -1
  65. data/lib/polars/version.rb +1 -1
  66. data/lib/polars/whenthen.rb +83 -0
  67. data/lib/polars.rb +10 -3
  68. metadata +23 -8
  69. data/ext/polars/src/dataframe.rs +0 -1182
  70. data/lib/polars/when.rb +0 -16
  71. data/lib/polars/when_then.rb +0 -19
@@ -840,6 +840,7 @@ module Polars
840
840
  # # │ true │
841
841
  # # └──────────┘
842
842
  def json_path_match(json_path)
843
+ json_path = Utils.parse_as_expression(json_path, str_as_lit: true)
843
844
  Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
844
845
  end
845
846
 
@@ -1018,15 +1019,15 @@ module Polars
1018
1019
  # )
1019
1020
  # # =>
1020
1021
  # # shape: (3, 3)
1021
- # # ┌───────────────────────────────────┬───────────────────────┬──────────┐
1022
- # # │ url ┆ captures ┆ name │
1023
- # # │ --- ┆ --- ┆ --- │
1024
- # # │ str ┆ struct[2] ┆ str │
1025
- # # ╞═══════════════════════════════════╪═══════════════════════╪══════════╡
1026
- # # │ http://vote.com/ballon_dor?candi… ┆ {"messi","python"} ┆ MESSI │
1027
- # # │ http://vote.com/ballon_dor?candi… ┆ {"weghorst","polars"} ┆ WEGHORST │
1028
- # # │ http://vote.com/ballon_dor?error… ┆ {null,null} ┆ null │
1029
- # # └───────────────────────────────────┴───────────────────────┴──────────┘
1022
+ # # ┌─────────────────────────────────┬───────────────────────┬──────────┐
1023
+ # # │ url ┆ captures ┆ name │
1024
+ # # │ --- ┆ --- ┆ --- │
1025
+ # # │ str ┆ struct[2] ┆ str │
1026
+ # # ╞═════════════════════════════════╪═══════════════════════╪══════════╡
1027
+ # # │ http://vote.com/ballon_dor?can… ┆ {"messi","python"} ┆ MESSI │
1028
+ # # │ http://vote.com/ballon_dor?can… ┆ {"weghorst","polars"} ┆ WEGHORST │
1029
+ # # │ http://vote.com/ballon_dor?err… ┆ {null,null} ┆ null │
1030
+ # # └─────────────────────────────────┴───────────────────────┴──────────┘
1030
1031
  def extract_groups(pattern)
1031
1032
  Utils.wrap_expr(_rbexpr.str_extract_groups(pattern))
1032
1033
  end
@@ -1354,6 +1355,7 @@ module Polars
1354
1355
  # # │ null ┆ null │
1355
1356
  # # └──────┴────────┘
1356
1357
  def to_integer(base: 10, strict: true)
1358
+ base = Utils.parse_as_expression(base, str_as_lit: false)
1357
1359
  Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
1358
1360
  end
1359
1361
 
@@ -1417,15 +1419,15 @@ module Polars
1417
1419
  # )
1418
1420
  # # =>
1419
1421
  # # shape: (3, 2)
1420
- # # ┌───────────────────────────────────┬──────────────┐
1421
- # # │ lyrics ┆ contains_any │
1422
- # # │ --- ┆ --- │
1423
- # # │ str ┆ bool │
1424
- # # ╞═══════════════════════════════════╪══════════════╡
1425
- # # │ Everybody wants to rule the worl… ┆ false │
1426
- # # │ Tell me what you want, what you … ┆ true │
1427
- # # │ Can you feel the love tonight ┆ true │
1428
- # # └───────────────────────────────────┴──────────────┘
1422
+ # # ┌─────────────────────────────────┬──────────────┐
1423
+ # # │ lyrics ┆ contains_any │
1424
+ # # │ --- ┆ --- │
1425
+ # # │ str ┆ bool │
1426
+ # # ╞═════════════════════════════════╪══════════════╡
1427
+ # # │ Everybody wants to rule the wo… ┆ false │
1428
+ # # │ Tell me what you want, what yo… ┆ true │
1429
+ # # │ Can you feel the love tonight ┆ true │
1430
+ # # └─────────────────────────────────┴──────────────┘
1429
1431
  def contains_any(patterns, ascii_case_insensitive: false)
1430
1432
  patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
1431
1433
  Utils.wrap_expr(
@@ -1467,15 +1469,15 @@ module Polars
1467
1469
  # )
1468
1470
  # # =>
1469
1471
  # # shape: (3, 2)
1470
- # # ┌───────────────────────────────────┬───────────────────────────────────┐
1471
- # # │ lyrics ┆ removes_pronouns
1472
- # # │ --- ┆ ---
1473
- # # │ str ┆ str
1474
- # # ╞═══════════════════════════════════╪═══════════════════════════════════╡
1475
- # # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
1476
- # # │ Tell me what you want, what you … ┆ Tell what want, what really r… │
1477
- # # │ Can you feel the love tonight ┆ Can feel the love tonight
1478
- # # └───────────────────────────────────┴───────────────────────────────────┘
1472
+ # # ┌─────────────────────────────────┬─────────────────────────────────┐
1473
+ # # │ lyrics ┆ removes_pronouns
1474
+ # # │ --- ┆ ---
1475
+ # # │ str ┆ str
1476
+ # # ╞═════════════════════════════════╪═════════════════════════════════╡
1477
+ # # │ Everybody wants to rule the wo… ┆ Everybody wants to rule the wo… │
1478
+ # # │ Tell me what you want, what yo… ┆ Tell what want, what really… │
1479
+ # # │ Can you feel the love tonight ┆ Can feel the love tonight
1480
+ # # └─────────────────────────────────┴─────────────────────────────────┘
1479
1481
  #
1480
1482
  # @example
1481
1483
  # df.with_columns(
@@ -1488,15 +1490,15 @@ module Polars
1488
1490
  # )
1489
1491
  # # =>
1490
1492
  # # shape: (3, 2)
1491
- # # ┌───────────────────────────────────┬───────────────────────────────────┐
1492
- # # │ lyrics ┆ confusing
1493
- # # │ --- ┆ ---
1494
- # # │ str ┆ str
1495
- # # ╞═══════════════════════════════════╪═══════════════════════════════════╡
1496
- # # │ Everybody wants to rule the worl… ┆ Everybody wants to rule the worl… │
1497
- # # │ Tell me what you want, what you … ┆ Tell you what me want, what me r… │
1498
- # # │ Can you feel the love tonight ┆ Can me feel the love tonight
1499
- # # └───────────────────────────────────┴───────────────────────────────────┘
1493
+ # # ┌─────────────────────────────────┬─────────────────────────────────┐
1494
+ # # │ lyrics ┆ confusing
1495
+ # # │ --- ┆ ---
1496
+ # # │ str ┆ str
1497
+ # # ╞═════════════════════════════════╪═════════════════════════════════╡
1498
+ # # │ Everybody wants to rule the wo… ┆ Everybody wants to rule the wo… │
1499
+ # # │ Tell me what you want, what yo… ┆ Tell you what me want, what me… │
1500
+ # # │ Can you feel the love tonight ┆ Can me feel the love tonight
1501
+ # # └─────────────────────────────────┴─────────────────────────────────┘
1500
1502
  def replace_many(patterns, replace_with, ascii_case_insensitive: false)
1501
1503
  patterns = Utils.parse_as_expression(patterns, str_as_lit: false, list_as_lit: false)
1502
1504
  replace_with = Utils.parse_as_expression(
data/lib/polars/utils.rb CHANGED
@@ -139,7 +139,7 @@ module Polars
139
139
  Polars.lit(value)
140
140
  end
141
141
 
142
- def self.normalise_filepath(path, check_not_directory: true)
142
+ def self.normalize_filepath(path, check_not_directory: true)
143
143
  path = File.expand_path(path)
144
144
  if check_not_directory && File.exist?(path) && Dir.exist?(path)
145
145
  raise ArgumentError, "Expected a file path; #{path} is a directory"
@@ -364,5 +364,93 @@ module Polars
364
364
  end
365
365
  end
366
366
  end
367
+
368
+ def self._expand_selectors(frame, *items)
369
+ items_iter = _parse_inputs_as_iterable(items)
370
+
371
+ expanded = []
372
+ items_iter.each do |item|
373
+ if is_selector(item)
374
+ selector_cols = expand_selector(frame, item)
375
+ expanded.concat(selector_cols)
376
+ else
377
+ expanded << item
378
+ end
379
+ end
380
+ expanded
381
+ end
382
+
383
+ # TODO
384
+ def self.is_selector(obj)
385
+ false
386
+ end
387
+
388
+ def self.parse_predicates_constraints_as_expression(*predicates, **constraints)
389
+ all_predicates = _parse_positional_inputs(predicates)
390
+
391
+ if constraints.any?
392
+ constraint_predicates = _parse_constraints(constraints)
393
+ all_predicates.concat(constraint_predicates)
394
+ end
395
+
396
+ _combine_predicates(all_predicates)
397
+ end
398
+
399
+ def self._parse_constraints(constraints)
400
+ constraints.map do |name, value|
401
+ Polars.col(name).eq(value)._rbexpr
402
+ end
403
+ end
404
+
405
+ def self._combine_predicates(predicates)
406
+ if !predicates.any?
407
+ msg = "at least one predicate or constraint must be provided"
408
+ raise TypeError, msg
409
+ end
410
+
411
+ if predicates.length == 1
412
+ return predicates[0]
413
+ end
414
+
415
+ Plr.all_horizontal(predicates)
416
+ end
417
+
418
+ def self.parse_when_inputs(*predicates, **constraints)
419
+ parse_predicates_constraints_as_expression(*predicates, **constraints)
420
+ end
421
+
422
+ def self.parse_interval_argument(interval)
423
+ if interval.include?(" ")
424
+ interval = interval.gsub(" ", "")
425
+ end
426
+ interval.downcase
427
+ end
428
+
429
+ def self.validate_rolling_by_aggs_arguments(weights, center:)
430
+ if !weights.nil?
431
+ msg = "`weights` is not supported in `rolling_*(..., by=...)` expression"
432
+ raise InvalidOperationError, msg
433
+ end
434
+ if center
435
+ msg = "`center=True` is not supported in `rolling_*(..., by=...)` expression"
436
+ raise InvalidOperationError, msg
437
+ end
438
+ end
439
+
440
+ def self.validate_rolling_aggs_arguments(window_size, closed)
441
+ if window_size.is_a?(::String)
442
+ begin
443
+ window_size = window_size.delete_suffix("i").to_i
444
+ rescue
445
+ msg = "Expected a string of the form 'ni', where `n` is a positive integer, got: #{window_size}"
446
+ raise InvalidOperationError, msg
447
+ end
448
+ end
449
+ if !closed.nil?
450
+ msg = "`closed` is not supported in `rolling_*(...)` expression"
451
+ raise InvalidOperationError, msg
452
+ end
453
+ window_size
454
+ end
367
455
  end
368
456
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.9.0"
3
+ VERSION = "0.11.0"
4
4
  end
@@ -0,0 +1,83 @@
1
+ module Polars
2
+ # @private
3
+ class When
4
+ attr_accessor :_when
5
+
6
+ def initialize(rbwhen)
7
+ self._when = rbwhen
8
+ end
9
+
10
+ def then(statement)
11
+ statement_rbexpr = Utils.parse_as_expression(statement)
12
+ Then.new(_when.then(statement_rbexpr))
13
+ end
14
+ end
15
+
16
+ # @private
17
+ class Then < Expr
18
+ attr_accessor :_then
19
+
20
+ def initialize(rbthen)
21
+ self._then = rbthen
22
+ end
23
+
24
+ def self._from_rbexpr(rbexpr)
25
+ Utils.wrap_expr(rbexpr)
26
+ end
27
+
28
+ def _rbexpr
29
+ _then.otherwise(Polars.lit(nil)._rbexpr)
30
+ end
31
+
32
+ def when(*predicates, **constraints)
33
+ condition_rbexpr = Utils.parse_when_inputs(*predicates, **constraints)
34
+ ChainedWhen.new(_then.when(condition_rbexpr))
35
+ end
36
+
37
+ def otherwise(statement)
38
+ statement_rbexpr = Utils.parse_as_expression(statement)
39
+ Utils.wrap_expr(_then.otherwise(statement_rbexpr))
40
+ end
41
+ end
42
+
43
+ # @private
44
+ class ChainedWhen
45
+ attr_accessor :_chained_when
46
+
47
+ def initialize(chained_when)
48
+ self._chained_when = chained_when
49
+ end
50
+
51
+ def then(statement)
52
+ statement_rbexpr = Utils.parse_as_expression(statement)
53
+ ChainedThen.new(_chained_when.then(statement_rbexpr))
54
+ end
55
+ end
56
+
57
+ # @private
58
+ class ChainedThen < Expr
59
+ attr_accessor :_chained_then
60
+
61
+ def initialize(chained_then)
62
+ self._chained_then = chained_then
63
+ end
64
+
65
+ def self._from_rbexpr(rbexpr)
66
+ Utils.wrap_expr(rbexpr)
67
+ end
68
+
69
+ def _rbexpr
70
+ _chained_then.otherwise(Polars.lit(nil)._rbexpr)
71
+ end
72
+
73
+ def when(*predicates, **constraints)
74
+ condition_rbexpr = Utils.parse_when_inputs(*predicates, **constraints)
75
+ ChainedWhen.new(_chained_then.when(condition_rbexpr))
76
+ end
77
+
78
+ def otherwise(statement)
79
+ statement_rbexpr = Utils.parse_as_expression(statement)
80
+ Utils.wrap_expr(_chained_then.otherwise(statement_rbexpr))
81
+ end
82
+ end
83
+ end
data/lib/polars.rb CHANGED
@@ -42,9 +42,17 @@ require_relative "polars/functions/whenthen"
42
42
  require_relative "polars/functions/aggregation/horizontal"
43
43
  require_relative "polars/functions/aggregation/vertical"
44
44
  require_relative "polars/functions/range/date_range"
45
+ require_relative "polars/functions/range/datetime_range"
45
46
  require_relative "polars/functions/range/int_range"
47
+ require_relative "polars/functions/range/time_range"
46
48
  require_relative "polars/group_by"
47
- require_relative "polars/io"
49
+ require_relative "polars/io/avro"
50
+ require_relative "polars/io/csv"
51
+ require_relative "polars/io/database"
52
+ require_relative "polars/io/ipc"
53
+ require_relative "polars/io/json"
54
+ require_relative "polars/io/ndjson"
55
+ require_relative "polars/io/parquet"
48
56
  require_relative "polars/lazy_frame"
49
57
  require_relative "polars/lazy_group_by"
50
58
  require_relative "polars/list_expr"
@@ -63,8 +71,7 @@ require_relative "polars/struct_name_space"
63
71
  require_relative "polars/testing"
64
72
  require_relative "polars/utils"
65
73
  require_relative "polars/version"
66
- require_relative "polars/when"
67
- require_relative "polars/when_then"
74
+ require_relative "polars/whenthen"
68
75
 
69
76
  module Polars
70
77
  extend Convert
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-03 00:00:00.000000000 Z
11
+ date: 2024-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -57,7 +57,11 @@ files:
57
57
  - ext/polars/src/conversion/anyvalue.rs
58
58
  - ext/polars/src/conversion/chunked_array.rs
59
59
  - ext/polars/src/conversion/mod.rs
60
- - ext/polars/src/dataframe.rs
60
+ - ext/polars/src/dataframe/construction.rs
61
+ - ext/polars/src/dataframe/export.rs
62
+ - ext/polars/src/dataframe/general.rs
63
+ - ext/polars/src/dataframe/io.rs
64
+ - ext/polars/src/dataframe/mod.rs
61
65
  - ext/polars/src/error.rs
62
66
  - ext/polars/src/expr/array.rs
63
67
  - ext/polars/src/expr/binary.rs
@@ -83,6 +87,10 @@ files:
83
87
  - ext/polars/src/functions/range.rs
84
88
  - ext/polars/src/functions/string_cache.rs
85
89
  - ext/polars/src/functions/whenthen.rs
90
+ - ext/polars/src/interop/mod.rs
91
+ - ext/polars/src/interop/numo/mod.rs
92
+ - ext/polars/src/interop/numo/to_numo_df.rs
93
+ - ext/polars/src/interop/numo/to_numo_series.rs
86
94
  - ext/polars/src/lazyframe/mod.rs
87
95
  - ext/polars/src/lazygroupby.rs
88
96
  - ext/polars/src/lib.rs
@@ -133,11 +141,19 @@ files:
133
141
  - lib/polars/functions/lit.rb
134
142
  - lib/polars/functions/random.rb
135
143
  - lib/polars/functions/range/date_range.rb
144
+ - lib/polars/functions/range/datetime_range.rb
136
145
  - lib/polars/functions/range/int_range.rb
146
+ - lib/polars/functions/range/time_range.rb
137
147
  - lib/polars/functions/repeat.rb
138
148
  - lib/polars/functions/whenthen.rb
139
149
  - lib/polars/group_by.rb
140
- - lib/polars/io.rb
150
+ - lib/polars/io/avro.rb
151
+ - lib/polars/io/csv.rb
152
+ - lib/polars/io/database.rb
153
+ - lib/polars/io/ipc.rb
154
+ - lib/polars/io/json.rb
155
+ - lib/polars/io/ndjson.rb
156
+ - lib/polars/io/parquet.rb
141
157
  - lib/polars/lazy_frame.rb
142
158
  - lib/polars/lazy_group_by.rb
143
159
  - lib/polars/list_expr.rb
@@ -157,9 +173,8 @@ files:
157
173
  - lib/polars/testing.rb
158
174
  - lib/polars/utils.rb
159
175
  - lib/polars/version.rb
160
- - lib/polars/when.rb
161
- - lib/polars/when_then.rb
162
- homepage: https://github.com/ankane/polars-ruby
176
+ - lib/polars/whenthen.rb
177
+ homepage: https://github.com/ankane/ruby-polars
163
178
  licenses:
164
179
  - MIT
165
180
  metadata: {}
@@ -178,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
193
  - !ruby/object:Gem::Version
179
194
  version: '0'
180
195
  requirements: []
181
- rubygems_version: 3.5.3
196
+ rubygems_version: 3.5.9
182
197
  signing_key:
183
198
  specification_version: 4
184
199
  summary: Blazingly fast DataFrames for Ruby