polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -77,7 +77,6 @@ module Polars
77
77
  def len
78
78
  super
79
79
  end
80
- alias_method :lengths, :len
81
80
 
82
81
  # Drop all null values in the list.
83
82
  #
@@ -259,6 +258,11 @@ module Polars
259
258
 
260
259
  # Sort the arrays in the list.
261
260
  #
261
+ # @param descending [Boolean]
262
+ # Sort in descending order.
263
+ # @param nulls_last [Boolean]
264
+ # Place null values last.
265
+ #
262
266
  # @return [Series]
263
267
  #
264
268
  # @example
@@ -273,7 +277,7 @@ module Polars
273
277
  # # ]
274
278
  #
275
279
  # @example
276
- # s.list.sort(reverse: true)
280
+ # s.list.sort(descending: true)
277
281
  # # =>
278
282
  # # shape: (2,)
279
283
  # # Series: 'a' [list[i64]]
@@ -281,7 +285,7 @@ module Polars
281
285
  # # [3, 2, 1]
282
286
  # # [9, 2, 1]
283
287
  # # ]
284
- def sort(reverse: false)
288
+ def sort(descending: false, nulls_last: false)
285
289
  super
286
290
  end
287
291
 
@@ -305,6 +309,9 @@ module Polars
305
309
 
306
310
  # Get the unique/distinct values in the list.
307
311
  #
312
+ # @param maintain_order [Boolean]
313
+ # Maintain order of data. This requires more work.
314
+ #
308
315
  # @return [Series]
309
316
  #
310
317
  # @example
@@ -317,7 +324,7 @@ module Polars
317
324
  # # [1, 2]
318
325
  # # [2, 3]
319
326
  # # ]
320
- def unique
327
+ def unique(maintain_order: false)
321
328
  super
322
329
  end
323
330
 
@@ -400,8 +407,8 @@ module Polars
400
407
  # Indices to return per sublist
401
408
  # @param null_on_oob [Boolean]
402
409
  # Behavior if an index is out of bounds:
403
- # True -> set as null
404
- # False -> raise an error
410
+ # true -> set as null
411
+ # false -> raise an error
405
412
  # Note that defaulting to raising an error is much cheaper
406
413
  #
407
414
  # @return [Series]
@@ -461,6 +468,11 @@ module Polars
461
468
  #
462
469
  # @param separator [String]
463
470
  # string to separate the items with
471
+ # @param ignore_nulls [Boolean]
472
+ # Ignore null values (default).
473
+ #
474
+ # If set to `false`, null values will be propagated.
475
+ # If the sub-list contains any null values, the output is `nil`.
464
476
  #
465
477
  # @return [Series]
466
478
  #
@@ -474,7 +486,7 @@ module Polars
474
486
  # # "foo-bar"
475
487
  # # "hello-world"
476
488
  # # ]
477
- def join(separator)
489
+ def join(separator, ignore_nulls: true)
478
490
  super
479
491
  end
480
492
 
@@ -516,10 +528,33 @@ module Polars
516
528
  super
517
529
  end
518
530
 
531
+ # Get the single value of the sublists.
532
+ #
533
+ # This errors if the sublist length is not exactly one.
534
+ #
535
+ # @return [Series]
536
+ #
537
+ # @example
538
+ # s = Polars::Series.new("a", [[1], [4], [6]])
539
+ # s.list.item
540
+ # # =>
541
+ # # shape: (3,)
542
+ # # Series: 'a' [i64]
543
+ # # [
544
+ # # 1
545
+ # # 4
546
+ # # 6
547
+ # # ]
548
+ def item
549
+ super
550
+ end
551
+
519
552
  # Check if sublists contain the given item.
520
553
  #
521
554
  # @param item [Object]
522
555
  # Item that will be checked for membership.
556
+ # @param nulls_equal [Boolean]
557
+ # If true, treat null as a distinct value. Null values will not propagate.
523
558
  #
524
559
  # @return [Series]
525
560
  #
@@ -534,7 +569,7 @@ module Polars
534
569
  # # false
535
570
  # # true
536
571
  # # ]
537
- def contains(item)
572
+ def contains(item, nulls_equal: true)
538
573
  super
539
574
  end
540
575
 
@@ -599,7 +634,7 @@ module Polars
599
634
 
600
635
  # Shift values by the given period.
601
636
  #
602
- # @param periods [Integer]
637
+ # @param n [Integer]
603
638
  # Number of places to shift (may be negative).
604
639
  #
605
640
  # @return [Series]
@@ -614,7 +649,7 @@ module Polars
614
649
  # # [null, 1, … 3]
615
650
  # # [null, 10, 2]
616
651
  # # ]
617
- def shift(periods = 1)
652
+ def shift(n = 1)
618
653
  super
619
654
  end
620
655
 
@@ -799,22 +834,51 @@ module Polars
799
834
  # @return [Series]
800
835
  #
801
836
  # @example
802
- # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
803
- # df.with_column(
804
- # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
805
- # )
837
+ # s = Polars::Series.new("a", [[1, 4], [8, 5], [3, 2]])
838
+ # s.list.eval(Polars.element.rank)
806
839
  # # =>
807
- # # shape: (3, 3)
808
- # # ┌─────┬─────┬────────────┐
809
- # # │ a ┆ b ┆ rank │
810
- # # │ --- ┆ --- ┆ --- │
811
- # # │ i64 ┆ i64 ┆ list[f64]
812
- # # ╞═════╪═════╪════════════╡
813
- # # │ 1 ┆ 4 ┆ [1.0, 2.0]
814
- # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
815
- # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
816
- # # └─────┴─────┴────────────┘
840
+ # # shape: (3,)
841
+ # # Series: 'a' [list[f64]]
842
+ # # [
843
+ # # [1.0, 2.0]
844
+ # # [2.0, 1.0]
845
+ # # [2.0, 1.0]
846
+ # # ]
817
847
  def eval(expr)
848
+ s = Utils.wrap_s(_s)
849
+ s.to_frame.select(F.col(s.name).list.eval(expr)).to_series
850
+ end
851
+
852
+ # Run any polars aggregation expression against the list' elements.
853
+ #
854
+ # @param expr [Expr]
855
+ # Expression to run. Note that you can select an element with `Polars.element`.
856
+ #
857
+ # @return [Series]
858
+ #
859
+ # @example
860
+ # s = Polars::Series.new("a", [[1, nil], [42, 13], [nil, nil]])
861
+ # s.list.agg(Polars.element.null_count)
862
+ # # =>
863
+ # # shape: (3,)
864
+ # # Series: 'a' [u32]
865
+ # # [
866
+ # # 1
867
+ # # 0
868
+ # # 2
869
+ # # ]
870
+ #
871
+ # @example
872
+ # s.list.agg(Polars.element.drop_nulls)
873
+ # # =>
874
+ # # shape: (3,)
875
+ # # Series: 'a' [list[i64]]
876
+ # # [
877
+ # # [1]
878
+ # # [42, 13]
879
+ # # []
880
+ # # ]
881
+ def agg(expr)
818
882
  super
819
883
  end
820
884
 
@@ -180,8 +180,13 @@ module Polars
180
180
  # # => "foo"
181
181
  # Polars.len.meta.output_name
182
182
  # # => "len"
183
- def output_name
183
+ def output_name(raise_if_undetermined: true)
184
184
  _rbexpr.meta_output_name
185
+ rescue Polars::ComputeError
186
+ if !raise_if_undetermined
187
+ return nil
188
+ end
189
+ raise
185
190
  end
186
191
 
187
192
  # Pop the latest expression and return the input(s) of the popped expression.
@@ -292,5 +297,41 @@ module Polars
292
297
  nil
293
298
  end
294
299
  end
300
+
301
+ # Format the expression as a Graphviz graph.
302
+ #
303
+ # Note that Graphviz must be installed to render the visualization (if not
304
+ # already present, you can download it here: https://graphviz.org/download.
305
+ #
306
+ # @param show [Boolean]
307
+ # Show the figure.
308
+ # @param output_path [String]
309
+ # Write the figure to disk.
310
+ # @param raw_output [Boolean]
311
+ # Return dot syntax. This cannot be combined with `show` and/or `output_path`.
312
+ # @param figsize [Array]
313
+ # Passed to matplotlib if `show == true`.
314
+ #
315
+ # @return [Object]
316
+ #
317
+ # @example
318
+ # e = (Polars.col("foo") * Polars.col("bar")).sum.over(Polars.col("ham")) / 2
319
+ # e.meta.show_graph
320
+ def show_graph(
321
+ show: true,
322
+ output_path: nil,
323
+ raw_output: false,
324
+ figsize: [16.0, 12.0],
325
+ schema: nil
326
+ )
327
+ dot = _rbexpr.meta_show_graph(schema)
328
+ Utils.display_dot_graph(
329
+ dot: dot,
330
+ show: show,
331
+ output_path: output_path,
332
+ raw_output: raw_output,
333
+ figsize: figsize
334
+ )
335
+ end
295
336
  end
296
337
  end
@@ -77,8 +77,11 @@ module Polars
77
77
  # # │ 2 ┆ y ┆ 2 ┆ y │
78
78
  # # │ 1 ┆ x ┆ 3 ┆ z │
79
79
  # # └───────────┴───────────┴─────┴─────┘
80
- def map(&f)
81
- Utils.wrap_expr(_rbexpr.name_map(f))
80
+ def map(&function)
81
+ # need to mark function for GC
82
+ raise Todo
83
+
84
+ Utils.wrap_expr(_rbexpr.name_map(function))
82
85
  end
83
86
 
84
87
  # Add a prefix to the root column name of the expression.
@@ -208,11 +211,45 @@ module Polars
208
211
  # @example
209
212
  # df = Polars::DataFrame.new({"x" => {"a" => 1, "b" => 2}})
210
213
  # df.select(Polars.col("x").name.prefix_fields("prefix_")).schema
211
- # # => {"x"=>Polars::Struct({"prefix_a"=>Polars::Int64, "prefix_b"=>Polars::Int64})}
214
+ # # => Polars::Schema({"x"=>Polars::Struct({"prefix_a"=>Polars::Int64, "prefix_b"=>Polars::Int64})})
212
215
  def prefix_fields(prefix)
213
216
  Utils.wrap_expr(_rbexpr.name_prefix_fields(prefix))
214
217
  end
215
218
 
219
+ # Replace matching regex/literal substring in the name with a new value.
220
+ #
221
+ # @param pattern [String]
222
+ # A valid regular expression pattern, compatible with the [regex crate](https://docs.rs/regex/latest/regex/).
223
+ # @param value [String]
224
+ # String that will replace the matched substring.
225
+ # @param literal [Boolean]
226
+ # Treat `pattern` as a literal string, not a regex.
227
+ #
228
+ # @return [Expr]
229
+ #
230
+ # @example
231
+ # df = Polars::DataFrame.new(
232
+ # {
233
+ # "n_foo" => [1, 2, 3],
234
+ # "n_bar" => ["x", "y", "z"]
235
+ # }
236
+ # )
237
+ # df.select(Polars.all.name.replace("^n_", "col_"))
238
+ # # =>
239
+ # # shape: (3, 2)
240
+ # # ┌─────────┬─────────┐
241
+ # # │ col_foo ┆ col_bar │
242
+ # # │ --- ┆ --- │
243
+ # # │ i64 ┆ str │
244
+ # # ╞═════════╪═════════╡
245
+ # # │ 1 ┆ x │
246
+ # # │ 2 ┆ y │
247
+ # # │ 3 ┆ z │
248
+ # # └─────────┴─────────┘
249
+ def replace(pattern, value, literal: false)
250
+ Utils.wrap_expr(_rbexpr.name_replace(pattern, value, literal))
251
+ end
252
+
216
253
  # Add a suffix to all field names of a struct.
217
254
  #
218
255
  # @note
@@ -226,7 +263,7 @@ module Polars
226
263
  # @example
227
264
  # df = Polars::DataFrame.new({"x" => {"a" => 1, "b" => 2}})
228
265
  # df.select(Polars.col("x").name.suffix_fields("_suffix")).schema
229
- # # => {"x"=>Polars::Struct({"a_suffix"=>Polars::Int64, "b_suffix"=>Polars::Int64})}
266
+ # # => Polars::Schema({"x"=>Polars::Struct({"a_suffix"=>Polars::Int64, "b_suffix"=>Polars::Int64})})
230
267
  def suffix_fields(suffix)
231
268
  Utils.wrap_expr(_rbexpr.name_suffix_fields(suffix))
232
269
  end
@@ -5,6 +5,9 @@ module Polars
5
5
  # This functionality is considered **unstable**. It may be changed
6
6
  # at any point without it being considered a breaking change.
7
7
  class QueryOptFlags
8
+ # @private
9
+ attr_accessor :_rboptflags
10
+
8
11
  def initialize(
9
12
  predicate_pushdown: nil,
10
13
  projection_pushdown: nil,
@@ -17,7 +20,7 @@ module Polars
17
20
  check_order_observe: nil,
18
21
  fast_projection: nil
19
22
  )
20
- @_rboptflags = RbOptFlags.default
23
+ self._rboptflags = RbOptFlags.default
21
24
  update(
22
25
  predicate_pushdown: predicate_pushdown,
23
26
  projection_pushdown: projection_pushdown,
@@ -32,6 +35,35 @@ module Polars
32
35
  )
33
36
  end
34
37
 
38
+ # Create new empty set off optimizations.
39
+ def self.none(
40
+ predicate_pushdown: nil,
41
+ projection_pushdown: nil,
42
+ simplify_expression: nil,
43
+ slice_pushdown: nil,
44
+ comm_subplan_elim: nil,
45
+ comm_subexpr_elim: nil,
46
+ cluster_with_columns: nil,
47
+ collapse_joins: nil,
48
+ check_order_observe: nil,
49
+ fast_projection: nil
50
+ )
51
+ optflags = QueryOptFlags.new
52
+ optflags.no_optimizations
53
+ optflags.update(
54
+ predicate_pushdown: predicate_pushdown,
55
+ projection_pushdown: projection_pushdown,
56
+ simplify_expression: simplify_expression,
57
+ slice_pushdown: slice_pushdown,
58
+ comm_subplan_elim: comm_subplan_elim,
59
+ comm_subexpr_elim: comm_subexpr_elim,
60
+ cluster_with_columns: cluster_with_columns,
61
+ collapse_joins: collapse_joins,
62
+ check_order_observe: check_order_observe,
63
+ fast_projection: fast_projection
64
+ )
65
+ end
66
+
35
67
  def update(
36
68
  predicate_pushdown: nil,
37
69
  projection_pushdown: nil,
@@ -44,7 +76,171 @@ module Polars
44
76
  check_order_observe: nil,
45
77
  fast_projection: nil
46
78
  )
47
- raise Todo
79
+ if !predicate_pushdown.nil?
80
+ self.predicate_pushdown = predicate_pushdown
81
+ end
82
+ if !projection_pushdown.nil?
83
+ self.projection_pushdown = projection_pushdown
84
+ end
85
+ if !simplify_expression.nil?
86
+ self.simplify_expression = simplify_expression
87
+ end
88
+ if !slice_pushdown.nil?
89
+ self.slice_pushdown = slice_pushdown
90
+ end
91
+ if !comm_subplan_elim.nil?
92
+ self.comm_subplan_elim = comm_subplan_elim
93
+ end
94
+ if !comm_subexpr_elim.nil?
95
+ self.comm_subexpr_elim = comm_subexpr_elim
96
+ end
97
+ if !cluster_with_columns.nil?
98
+ self.cluster_with_columns = cluster_with_columns
99
+ end
100
+ if !collapse_joins.nil?
101
+ Utils.issue_deprecation_warning(
102
+ "the `collapse_joins` parameter for `QueryOptFlags` is deprecated. " +
103
+ "Use `predicate_pushdown` instead."
104
+ )
105
+ if !collapse_joins
106
+ self.predicate_pushdown = false
107
+ end
108
+ end
109
+ if !check_order_observe.nil?
110
+ self.check_order_observe = check_order_observe
111
+ end
112
+ if !fast_projection.nil?
113
+ self.fast_projection = fast_projection
114
+ end
115
+
116
+ self
117
+ end
118
+
119
+ # Create new empty set off optimizations.
120
+ def self._eager
121
+ optflags = QueryOptFlags.new
122
+ optflags.no_optimizations
123
+ optflags._rboptflags.eager = true
124
+ optflags.simplify_expression = true
125
+ optflags
126
+ end
127
+
128
+ # Remove selected optimizations.
129
+ def no_optimizations
130
+ _rboptflags.no_optimizations
131
+ end
132
+
133
+ # Only read columns that are used later in the query.
134
+ def projection_pushdown
135
+ _rboptflags.projection_pushdown
136
+ end
137
+
138
+ def projection_pushdown=(value)
139
+ _rboptflags.projection_pushdown = value
140
+ end
141
+
142
+ # Apply predicates/filters as early as possible.
143
+ def predicate_pushdown
144
+ _rboptflags.predicate_pushdown
145
+ end
146
+
147
+ def predicate_pushdown=(value)
148
+ _rboptflags.predicate_pushdown = value
149
+ end
150
+
151
+ # Cluster sequential `with_columns` calls to independent calls.
152
+ def cluster_with_columns
153
+ _rboptflags.cluster_with_columns
154
+ end
155
+
156
+ def cluster_with_columns=(value)
157
+ _rboptflags.cluster_with_columns = value
158
+ end
159
+
160
+ # Run many expression optimization rules until fixed point.
161
+ def simplify_expression
162
+ _rboptflags.simplify_expression
163
+ end
164
+
165
+ def simplify_expression=(value)
166
+ _rboptflags.simplify_expression = value
167
+ end
168
+
169
+ # Pushdown slices/limits.
170
+ def slice_pushdown
171
+ _rboptflags.slice_pushdown
172
+ end
173
+
174
+ def slice_pushdown=(value)
175
+ _rboptflags.slice_pushdown = value
176
+ end
177
+
178
+ # Elide duplicate plans and caches their outputs.
179
+ def comm_subplan_elim
180
+ _rboptflags.comm_subplan_elim
181
+ end
182
+
183
+ def comm_subplan_elim=(value)
184
+ _rboptflags.comm_subplan_elim = value
185
+ end
186
+
187
+ # Elide duplicate expressions and caches their outputs.
188
+ def comm_subexpr_elim
189
+ _rboptflags.comm_subexpr_elim
190
+ end
191
+
192
+ def comm_subexpr_elim=(value)
193
+ _rboptflags.comm_subexpr_elim = value
194
+ end
195
+
196
+ # Do not maintain order if the order would not be observed.
197
+ def check_order_observe
198
+ _rboptflags.check_order_observe
199
+ end
200
+
201
+ def check_order_observe=(value)
202
+ _rboptflags.check_order_observe = value
203
+ end
204
+
205
+ # Replace simple projections with a faster inlined projection that skips the expression engine.
206
+ def fast_projection
207
+ _rboptflags.fast_projection
208
+ end
209
+
210
+ def fast_projection=(value)
211
+ _rboptflags.fast_projection = value
212
+ end
213
+
214
+ def to_s
215
+ <<~STR
216
+ QueryOptFlags {
217
+ type_coercion: #{_rboptflags.type_coercion}
218
+ type_check: #{_rboptflags.type_check}
219
+
220
+ predicate_pushdown: #{predicate_pushdown}
221
+ projection_pushdown: #{projection_pushdown}
222
+ simplify_expression: #{simplify_expression}
223
+ slice_pushdown: #{slice_pushdown}
224
+ comm_subplan_elim: #{comm_subplan_elim}
225
+ comm_subexpr_elim: #{comm_subexpr_elim}
226
+ cluster_with_columns: #{cluster_with_columns}
227
+ check_order_observe: #{check_order_observe}
228
+ fast_projection: #{fast_projection}
229
+
230
+ eager: #{_rboptflags.eager}
231
+ streaming: #{_rboptflags.streaming}
232
+ }
233
+ STR
234
+ end
235
+ alias_method :inspect, :to_s
236
+
237
+ private
238
+
239
+ def initialize_copy(other)
240
+ super
241
+ self._rboptflags = _rboptflags.copy
48
242
  end
49
243
  end
244
+
245
+ DEFAULT_QUERY_OPT_FLAGS = QueryOptFlags.new
50
246
  end
@@ -25,11 +25,11 @@ module Polars
25
25
 
26
26
  def agg(*aggs, **named_aggs)
27
27
  @df.lazy
28
- .group_by_rolling(
29
- index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
28
+ .rolling(
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, group_by: @group_by
30
30
  )
31
31
  .agg(*aggs, **named_aggs)
32
- .collect(no_optimization: true, string_cache: false)
32
+ .collect(optimizations: QueryOptFlags.none)
33
33
  end
34
34
  end
35
35
  end
data/lib/polars/schema.rb CHANGED
@@ -1,13 +1,21 @@
1
1
  module Polars
2
2
  class Schema
3
+ include Enumerable
4
+
3
5
  # Ordered mapping of column names to their data type.
4
6
  #
5
7
  # @param schema [Object]
6
8
  # The schema definition given by column names and their associated
7
9
  # Polars data type. Accepts a mapping or an enumerable of arrays.
8
10
  def initialize(schema = nil, check_dtypes: true)
9
- input = schema || {}
10
11
  @schema = {}
12
+
13
+ if schema.respond_to?(:arrow_c_schema) && !schema.is_a?(Schema)
14
+ Plr.init_polars_schema_from_arrow_c_schema(@schema, schema)
15
+ return
16
+ end
17
+
18
+ input = schema || {}
11
19
  input.each do |name, tp|
12
20
  if !check_dtypes
13
21
  @schema[name] = tp
@@ -34,6 +42,11 @@ module Polars
34
42
  @schema[name] = dtype
35
43
  end
36
44
 
45
+ # @private
46
+ def arrow_c_schema
47
+ Plr.polars_schema_to_rbcapsule(self)
48
+ end
49
+
37
50
  # Get the column names of the schema.
38
51
  #
39
52
  # @return [Array]
@@ -78,14 +91,19 @@ module Polars
78
91
  end
79
92
  alias_method :inspect, :to_s
80
93
 
94
+ # @private
95
+ def each(&block)
96
+ @schema.each(&block)
97
+ end
98
+
81
99
  # @private
82
100
  def include?(name)
83
101
  @schema.include?(name)
84
102
  end
85
103
 
86
104
  # @private
87
- def to_h
88
- @schema.to_h
105
+ def to_h(&block)
106
+ @schema.to_h(&block)
89
107
  end
90
108
 
91
109
  private
@@ -127,14 +127,49 @@ module Polars
127
127
  # Only works after a wildcard or regex column selection, and you cannot provide
128
128
  # both string column names *and* dtypes (you may prefer to use selectors instead).
129
129
  #
130
- # @return [Selector]
131
- #
132
130
  # @param columns [Object]
133
131
  # The name or datatype of the column(s) to exclude. Accepts regular expression
134
132
  # input. Regular expressions should start with `^` and end with `$`.
135
133
  # @param more_columns [Array]
136
134
  # Additional names or datatypes of columns to exclude, specified as positional
137
135
  # arguments.
136
+ #
137
+ # @return [Selector]
138
+ #
139
+ # @example Exclude by column name(s):
140
+ # df = Polars::DataFrame.new(
141
+ # {
142
+ # "aa" => [1, 2, 3],
143
+ # "ba" => ["a", "b", nil],
144
+ # "cc" => [nil, 2.5, 1.5]
145
+ # }
146
+ # )
147
+ # df.select(Polars.cs.exclude("ba", "xx"))
148
+ # # =>
149
+ # # shape: (3, 2)
150
+ # # ┌─────┬──────┐
151
+ # # │ aa ┆ cc │
152
+ # # │ --- ┆ --- │
153
+ # # │ i64 ┆ f64 │
154
+ # # ╞═════╪══════╡
155
+ # # │ 1 ┆ null │
156
+ # # │ 2 ┆ 2.5 │
157
+ # # │ 3 ┆ 1.5 │
158
+ # # └─────┴──────┘
159
+ #
160
+ # @example Exclude using a column name, a selector, and a dtype:
161
+ # df.select(Polars.cs.exclude("aa", Polars.cs.string, Polars::UInt32))
162
+ # # =>
163
+ # # shape: (3, 1)
164
+ # # ┌──────┐
165
+ # # │ cc │
166
+ # # │ --- │
167
+ # # │ f64 │
168
+ # # ╞══════╡
169
+ # # │ null │
170
+ # # │ 2.5 │
171
+ # # │ 1.5 │
172
+ # # └──────┘
138
173
  def exclude(columns, *more_columns)
139
174
  exclude_cols = []
140
175
  exclude_dtypes = []