polars-df 0.20.0-x86_64-darwin → 0.21.1-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE-THIRD-PARTY.txt +1431 -1810
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.bundle +0 -0
  7. data/lib/polars/3.3/polars.bundle +0 -0
  8. data/lib/polars/3.4/polars.bundle +0 -0
  9. data/lib/polars/array_expr.rb +382 -3
  10. data/lib/polars/array_name_space.rb +281 -0
  11. data/lib/polars/binary_expr.rb +67 -0
  12. data/lib/polars/binary_name_space.rb +43 -0
  13. data/lib/polars/cat_expr.rb +224 -0
  14. data/lib/polars/cat_name_space.rb +130 -32
  15. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  16. data/lib/polars/catalog/unity/column_info.rb +31 -0
  17. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  18. data/lib/polars/catalog/unity/table_info.rb +50 -0
  19. data/lib/polars/catalog.rb +448 -0
  20. data/lib/polars/config.rb +2 -2
  21. data/lib/polars/convert.rb +12 -2
  22. data/lib/polars/data_frame.rb +834 -48
  23. data/lib/polars/data_type_expr.rb +52 -0
  24. data/lib/polars/data_types.rb +61 -5
  25. data/lib/polars/date_time_expr.rb +251 -0
  26. data/lib/polars/date_time_name_space.rb +299 -0
  27. data/lib/polars/exceptions.rb +7 -2
  28. data/lib/polars/expr.rb +1247 -211
  29. data/lib/polars/functions/col.rb +6 -5
  30. data/lib/polars/functions/datatype.rb +21 -0
  31. data/lib/polars/functions/lazy.rb +127 -15
  32. data/lib/polars/functions/repeat.rb +4 -0
  33. data/lib/polars/io/csv.rb +19 -1
  34. data/lib/polars/io/json.rb +16 -0
  35. data/lib/polars/io/ndjson.rb +13 -0
  36. data/lib/polars/io/parquet.rb +70 -66
  37. data/lib/polars/io/scan_options.rb +47 -0
  38. data/lib/polars/lazy_frame.rb +1099 -95
  39. data/lib/polars/list_expr.rb +400 -11
  40. data/lib/polars/list_name_space.rb +321 -5
  41. data/lib/polars/meta_expr.rb +71 -22
  42. data/lib/polars/name_expr.rb +36 -0
  43. data/lib/polars/scan_cast_options.rb +64 -0
  44. data/lib/polars/schema.rb +84 -3
  45. data/lib/polars/selector.rb +210 -0
  46. data/lib/polars/selectors.rb +932 -203
  47. data/lib/polars/series.rb +1083 -63
  48. data/lib/polars/string_expr.rb +435 -9
  49. data/lib/polars/string_name_space.rb +729 -45
  50. data/lib/polars/struct_expr.rb +103 -0
  51. data/lib/polars/struct_name_space.rb +19 -1
  52. data/lib/polars/utils/parse.rb +40 -0
  53. data/lib/polars/utils/various.rb +18 -1
  54. data/lib/polars/utils.rb +9 -1
  55. data/lib/polars/version.rb +1 -1
  56. data/lib/polars.rb +10 -0
  57. metadata +12 -2
@@ -66,7 +66,7 @@ module Polars
66
66
  #
67
67
  # @example
68
68
  # s = Polars::Series.new([[1, 2, 3], [5]])
69
- # s.list.lengths
69
+ # s.list.len
70
70
  # # =>
71
71
  # # shape: (2,)
72
72
  # # Series: '' [u32]
@@ -74,9 +74,10 @@ module Polars
74
74
  # # 3
75
75
  # # 1
76
76
  # # ]
77
- def lengths
77
+ def len
78
78
  super
79
79
  end
80
+ alias_method :lengths, :len
80
81
 
81
82
  # Drop all null values in the list.
82
83
  #
@@ -123,7 +124,7 @@ module Polars
123
124
  # # shape: (2,)
124
125
  # # Series: 'values' [list[i64]]
125
126
  # # [
126
- # # [2, 1]
127
+ # # [2, 3]
127
128
  # # [5]
128
129
  # # ]
129
130
  def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
@@ -202,6 +203,60 @@ module Polars
202
203
  super
203
204
  end
204
205
 
206
+ # Compute the median value of the arrays in the list.
207
+ #
208
+ # @return [Series]
209
+ #
210
+ # @example
211
+ # s = Polars::Series.new("values", [[-1, 0, 1], [1, 10]])
212
+ # s.list.median
213
+ # # =>
214
+ # # shape: (2,)
215
+ # # Series: 'values' [f64]
216
+ # # [
217
+ # # 0.0
218
+ # # 5.5
219
+ # # ]
220
+ def median
221
+ super
222
+ end
223
+
224
+ # Compute the std value of the arrays in the list.
225
+ #
226
+ # @return [Series]
227
+ #
228
+ # @example
229
+ # s = Polars::Series.new("values", [[-1, 0, 1], [1, 10]])
230
+ # s.list.std
231
+ # # =>
232
+ # # shape: (2,)
233
+ # # Series: 'values' [f64]
234
+ # # [
235
+ # # 1.0
236
+ # # 6.363961
237
+ # # ]
238
+ def std(ddof: 1)
239
+ super
240
+ end
241
+
242
+ # Compute the var value of the arrays in the list.
243
+ #
244
+ # @return [Series]
245
+ #
246
+ # @example
247
+ # s = Polars::Series.new("values", [[-1, 0, 1], [1, 10]])
248
+ # s.list.var
249
+ # # =>
250
+ # # shape: (2,)
251
+ # # Series: 'values' [f64]
252
+ # # [
253
+ # # 1.0
254
+ # # 40.5
255
+ # # ]
256
+ def var(ddof: 1)
257
+ super
258
+ end
259
+
205
260
  # Sort the arrays in the list.
206
261
  #
207
262
  # @return [Series]
@@ -254,7 +309,7 @@ module Polars
254
309
  #
255
310
  # @example
256
311
  # s = Polars::Series.new("a", [[1, 1, 2], [2, 3, 3]])
257
- # s.list.unique()
312
+ # s.list.unique
258
313
  # # =>
259
314
  # # shape: (2,)
260
315
  # # Series: 'a' [list[i64]]
@@ -266,6 +321,24 @@ module Polars
266
321
  super
267
322
  end
268
323
 
324
+ # Count the number of unique values in every sub-lists.
325
+ #
326
+ # @return [Series]
327
+ #
328
+ # @example
329
+ # s = Polars::Series.new("a", [[1, 1, 2], [2, 3, 4]])
330
+ # s.list.n_unique
331
+ # # =>
332
+ # # shape: (2,)
333
+ # # Series: 'a' [u32]
334
+ # # [
335
+ # # 2
336
+ # # 3
337
+ # # ]
338
+ def n_unique
339
+ super
340
+ end
341
+
269
342
  # Concat the arrays in a Series dtype List in linear time.
270
343
  #
271
344
  # @param other [Object]
@@ -292,7 +365,7 @@ module Polars
292
365
  #
293
366
  # So index `0` would return the first item of every sublist
294
367
  # and index `-1` would return the last item of every sublist
295
- # if an index is out of bounds, it will return a `None`.
368
+ # if an index is out of bounds, it will return a `nil`.
296
369
  #
297
370
  # @param index [Integer]
298
371
  # Index to return per sublist
@@ -318,6 +391,63 @@ module Polars
318
391
  super
319
392
  end
320
393
 
394
+ # Take sublists by multiple indices.
395
+ #
396
+ # The indices may be defined in a single column, or by sublists in another
397
+ # column of dtype `List`.
398
+ #
399
+ # @param indices [Object]
400
+ # Indices to return per sublist
401
+ # @param null_on_oob [Boolean]
402
+ # Behavior if an index is out of bounds:
403
+ # True -> set as null
404
+ # False -> raise an error
405
+ # Note that defaulting to raising an error is much cheaper
406
+ #
407
+ # @return [Series]
408
+ #
409
+ # @example
410
+ # s = Polars::Series.new("a", [[3, 2, 1], [], [1, 2]])
411
+ # s.list.gather([0, 2], null_on_oob: true)
412
+ # # =>
413
+ # # shape: (3,)
414
+ # # Series: 'a' [list[i64]]
415
+ # # [
416
+ # # [3, 1]
417
+ # # [null, null]
418
+ # # [1, null]
419
+ # # ]
420
+ def gather(
421
+ indices,
422
+ null_on_oob: false
423
+ )
424
+ super
425
+ end
426
+
427
+ # Take every n-th value start from offset in sublists.
428
+ #
429
+ # @param n [Integer]
430
+ # Gather every n-th element.
431
+ # @param offset [Integer]
432
+ # Starting index.
433
+ #
434
+ # @return [Series]
435
+ #
436
+ # @example
437
+ # s = Polars::Series.new("a", [[1, 2, 3], [], [6, 7, 8, 9]])
438
+ # s.list.gather_every(2, 1)
439
+ # # =>
440
+ # # shape: (3,)
441
+ # # Series: 'a' [list[i64]]
442
+ # # [
443
+ # # [2]
444
+ # # []
445
+ # # [7, 9]
446
+ # # ]
447
+ def gather_every(n, offset = 0)
448
+ super
449
+ end
450
+
321
451
  # Get the value by index in the sublists.
322
452
  #
323
453
  # @return [Series]
@@ -554,6 +684,73 @@ module Polars
554
684
  super
555
685
  end
556
686
 
687
+ # Returns a column with a separate row for every list element.
688
+ #
689
+ # @return [Series]
690
+ #
691
+ # @example
692
+ # s = Polars::Series.new("a", [[1, 2, 3], [4, 5, 6]])
693
+ # s.list.explode
694
+ # # =>
695
+ # # shape: (6,)
696
+ # # Series: 'a' [i64]
697
+ # # [
698
+ # # 1
699
+ # # 2
700
+ # # 3
701
+ # # 4
702
+ # # 5
703
+ # # 6
704
+ # # ]
705
+ def explode
706
+ super
707
+ end
708
+
709
+ # Count how often the value produced by `element` occurs.
710
+ #
711
+ # @param element [Object]
712
+ # An expression that produces a single value
713
+ #
714
+ # @return [Series]
715
+ #
716
+ # @example
717
+ # s = Polars::Series.new("a", [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]])
718
+ # s.list.count_matches(1)
719
+ # # =>
720
+ # # shape: (5,)
721
+ # # Series: 'a' [u32]
722
+ # # [
723
+ # # 0
724
+ # # 1
725
+ # # 1
726
+ # # 2
727
+ # # 0
728
+ # # ]
729
+ def count_matches(element)
730
+ super
731
+ end
732
+
733
+ # Convert a List column into an Array column with the same inner data type.
734
+ #
735
+ # @param width [Integer]
736
+ # Width of the resulting Array column.
737
+ #
738
+ # @return [Series]
739
+ #
740
+ # @example
741
+ # s = Polars::Series.new([[1, 2], [3, 4]], dtype: Polars::List.new(Polars::Int8))
742
+ # s.list.to_array(2)
743
+ # # =>
744
+ # # shape: (2,)
745
+ # # Series: '' [array[i8, 2]]
746
+ # # [
747
+ # # [1, 2]
748
+ # # [3, 4]
749
+ # # ]
750
+ def to_array(width)
751
+ super
752
+ end
753
+
557
754
  # Convert the series of type `List` to a series of type `Struct`.
558
755
  #
559
756
  # @param n_field_strategy ["first_non_null", "max_width"]
@@ -608,5 +805,124 @@ module Polars
608
805
  def eval(expr)
609
806
  super
610
807
  end
808
+
809
+ # Filter elements in each list by a boolean expression, returning a new Series of lists.
810
+ #
811
+ # @param predicate [Object]
812
+ # A boolean expression evaluated on each list element.
813
+ # Use `Polars.element` to refer to the current element.
814
+ #
815
+ # @return [Series]
816
+ #
817
+ # @example
818
+ # s = Polars::Series.new("a", [[1, 4], [8, 5], [3, 2]])
819
+ # s.list.filter(Polars.element % 2 == 0)
820
+ # # =>
821
+ # # shape: (3,)
822
+ # # Series: 'a' [list[i64]]
823
+ # # [
824
+ # # [4]
825
+ # # [8]
826
+ # # [2]
827
+ # # ]
828
+ def filter(predicate)
829
+ super
830
+ end
831
+
832
+ # Compute the SET UNION between the elements in this list and the elements of `other`.
833
+ #
834
+ # @param other [Object]
835
+ # Right hand side of the set operation.
836
+ #
837
+ # @return [Series]
838
+ #
839
+ # @example
840
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
841
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
842
+ # a.list.set_union(b)
843
+ # # =>
844
+ # # shape: (4,)
845
+ # # Series: '' [list[i64]]
846
+ # # [
847
+ # # [1, 2, … 4]
848
+ # # [3]
849
+ # # [null, 3, 4]
850
+ # # [5, 6, … 8]
851
+ # # ]
852
+ def set_union(other)
853
+ super
854
+ end
855
+
856
+ # Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
857
+ #
858
+ # @param other [Object]
859
+ # Right hand side of the set operation.
860
+ #
861
+ # @return [Series]
862
+ #
863
+ # @example
864
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
865
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
866
+ # a.list.set_difference(b)
867
+ # # =>
868
+ # # shape: (4,)
869
+ # # Series: '' [list[i64]]
870
+ # # [
871
+ # # [1]
872
+ # # []
873
+ # # []
874
+ # # [5, 7]
875
+ # # ]
876
+ def set_difference(other)
877
+ super
878
+ end
879
+
880
+ # Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
881
+ #
882
+ # @param other [Object]
883
+ # Right hand side of the set operation.
884
+ #
885
+ # @return [Series]
886
+ #
887
+ # @example
888
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
889
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
890
+ # a.list.set_intersection(b)
891
+ # # =>
892
+ # # shape: (4,)
893
+ # # Series: '' [list[i64]]
894
+ # # [
895
+ # # [2, 3]
896
+ # # []
897
+ # # [null, 3]
898
+ # # [6]
899
+ # # ]
900
+ def set_intersection(other)
901
+ super
902
+ end
903
+
904
+ # Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
905
+ #
906
+ # @param other [Object]
907
+ # Right hand side of the set operation.
908
+ #
909
+ # @return [Series]
910
+ #
911
+ # @example
912
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
913
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
914
+ # a.list.set_symmetric_difference(b)
915
+ # # =>
916
+ # # shape: (4,)
917
+ # # Series: '' [list[i64]]
918
+ # # [
919
+ # # [1, 4]
920
+ # # [3]
921
+ # # [4]
922
+ # # [5, 7, 8]
923
+ # # ]
924
+ def set_symmetric_difference(other)
925
+ super
926
+ end
611
927
  end
612
928
  end
@@ -97,6 +97,70 @@ module Polars
97
97
  _rbexpr.meta_is_regex_projection
98
98
  end
99
99
 
100
+ # Indicate if this expression only selects columns (optionally with aliasing).
101
+ #
102
+ # This can include bare columns, columns matched by regex or dtype, selectors
103
+ # and exclude ops, and (optionally) column/expression aliasing.
104
+ #
105
+ # @param allow_aliasing [Boolean]
106
+ # If false (default), any aliasing is not considered to be column selection.
107
+ # Set true to allow for column selection that also includes aliasing.
108
+ #
109
+ # @return [Boolean]
110
+ #
111
+ # @example
112
+ # e = Polars.col("foo")
113
+ # e.meta.is_column_selection
114
+ # # => true
115
+ #
116
+ # @example
117
+ # e = Polars.col("foo").alias("bar")
118
+ # e.meta.is_column_selection
119
+ # # => false
120
+ #
121
+ # @example
122
+ # e.meta.is_column_selection(allow_aliasing: true)
123
+ # # => true
124
+ #
125
+ # @example
126
+ # e = Polars.col("foo") * Polars.col("bar")
127
+ # e.meta.is_column_selection
128
+ # # => false
129
+ #
130
+ # @example
131
+ # e = Polars.cs.starts_with("foo")
132
+ # e.meta.is_column_selection
133
+ # # => true
134
+ #
135
+ # @example
136
+ # e = Polars.cs.starts_with("foo").exclude("foo!")
137
+ # e.meta.is_column_selection
138
+ # # => true
139
+ def is_column_selection(allow_aliasing: false)
140
+ _rbexpr.meta_is_column_selection(allow_aliasing)
141
+ end
142
+
143
+ # Indicate if this expression is a literal value (optionally aliased).
144
+ #
145
+ # @param allow_aliasing [Boolean]
146
+ # If false (default), only a bare literal will match.
147
+ # Set true to also allow for aliased literals.
148
+ #
149
+ # @return [Boolean]
150
+ #
151
+ # @example
152
+ # e = Polars.lit(123)
153
+ # e.meta.is_literal
154
+ # # => true
155
+ #
156
+ # @example
157
+ # e = Polars.lit(987.654321).alias("foo")
158
+ # e.meta.is_literal
159
+ # # => false
160
+ def is_literal(allow_aliasing: false)
161
+ _rbexpr.meta_is_literal(allow_aliasing)
162
+ end
163
+
100
164
  # Get the column name that this expression would produce.
101
165
  #
102
166
  # @return [String]
@@ -171,32 +235,17 @@ module Polars
171
235
  Utils.wrap_expr(_rbexpr.meta_undo_aliases)
172
236
  end
173
237
 
174
- # Turn this expression in a selector.
175
- #
176
- # @return [Expr]
177
- def _as_selector
178
- Utils.wrap_expr(_rbexpr._meta_as_selector)
179
- end
180
-
181
- # Add selectors.
238
+ # Try to turn this expression in a selector.
182
239
  #
183
- # @return [Expr]
184
- def _selector_add(other)
185
- Utils.wrap_expr(_rbexpr._meta_selector_add(other._rbexpr))
186
- end
187
-
188
- # Subtract selectors.
240
+ # Raises if the underlying expressions is not a column or selector.
189
241
  #
190
242
  # @return [Expr]
191
- def _selector_sub(other)
192
- Utils.wrap_expr(_rbexpr._meta_selector_sub(other._rbexpr))
193
- end
194
-
195
- # & selectors.
196
243
  #
197
- # @return [Expr]
198
- def _selector_and(other)
199
- Utils.wrap_expr(_rbexpr._meta_selector_and(other._rbexpr))
244
+ # @note
245
+ # This functionality is considered **unstable**. It may be changed
246
+ # at any point without it being considered a breaking change.
247
+ def as_selector
248
+ Selector._from_rbselector(_rbexpr.into_selector)
200
249
  end
201
250
 
202
251
  # Format the expression as a tree.
@@ -194,5 +194,41 @@ module Polars
194
194
  def to_uppercase
195
195
  Utils.wrap_expr(_rbexpr.name_to_uppercase)
196
196
  end
197
+
198
+ # Add a prefix to all field names of a struct.
199
+ #
200
+ # @note
201
+ # This only takes effect for struct columns.
202
+ #
203
+ # @param prefix [String]
204
+ # Prefix to add to the field name.
205
+ #
206
+ # @return [Expr]
207
+ #
208
+ # @example
209
+ # df = Polars::DataFrame.new({"x" => {"a" => 1, "b" => 2}})
210
+ # df.select(Polars.col("x").name.prefix_fields("prefix_")).schema
211
+ # # => {"x"=>Polars::Struct({"prefix_a"=>Polars::Int64, "prefix_b"=>Polars::Int64})}
212
+ def prefix_fields(prefix)
213
+ Utils.wrap_expr(_rbexpr.name_prefix_fields(prefix))
214
+ end
215
+
216
+ # Add a suffix to all field names of a struct.
217
+ #
218
+ # @note
219
+ # This only takes effect for struct columns.
220
+ #
221
+ # @param suffix [String]
222
+ # Suffix to add to the field name.
223
+ #
224
+ # @return [Expr]
225
+ #
226
+ # @example
227
+ # df = Polars::DataFrame.new({"x" => {"a" => 1, "b" => 2}})
228
+ # df.select(Polars.col("x").name.suffix_fields("_suffix")).schema
229
+ # # => {"x"=>Polars::Struct({"a_suffix"=>Polars::Int64, "b_suffix"=>Polars::Int64})}
230
+ def suffix_fields(suffix)
231
+ Utils.wrap_expr(_rbexpr.name_suffix_fields(suffix))
232
+ end
197
233
  end
198
234
  end
@@ -0,0 +1,64 @@
1
+ module Polars
2
+ # Options for scanning files.
3
+ class ScanCastOptions
4
+ # Common configuration for scanning files.
5
+ #
6
+ # @note
7
+ # This functionality is considered **unstable**. It may be changed
8
+ # at any point without it being considered a breaking change.
9
+ #
10
+ # @param integer_cast ['upcast', 'forbid']
11
+ # Configuration for casting from integer types:
12
+ #
13
+ # * `upcast`: Allow lossless casting to wider integer types.
14
+ # * `forbid`: Raises an error if dtypes do not match.
15
+ #
16
+ # @param float_cast ['upcast', 'downcast', 'forbid']
17
+ # Configuration for casting from float types:
18
+ #
19
+ # * `upcast`: Allow casting to higher precision float types.
20
+ # * `downcast`: Allow casting to lower precision float types.
21
+ # * `forbid`: Raises an error if dtypes do not match.
22
+ #
23
+ # @param datetime_cast ['nanosecond-downcast', 'convert-timezone', 'forbid']
24
+ # Configuration for casting from datetime types:
25
+ #
26
+ # * `nanosecond-downcast`: Allow nanosecond precision datetime to be
27
+ # downcasted to any lower precision. This has a similar effect to
28
+ # PyArrow's `coerce_int96_timestamp_unit`.
29
+ # * `convert-timezone`: Allow casting to a different timezone.
30
+ # * `forbid`: Raises an error if dtypes do not match.
31
+ #
32
+ # @param missing_struct_fields ['insert', 'raise']
33
+ # Configuration for behavior when struct fields defined in the schema
34
+ # are missing from the data:
35
+ #
36
+ # * `insert`: Inserts the missing fields.
37
+ # * `raise`: Raises an error.
38
+ #
39
+ # @param extra_struct_fields ['ignore', 'raise']
40
+ # Configuration for behavior when extra struct fields outside of the
41
+ # defined schema are encountered in the data:
42
+ #
43
+ # * `ignore`: Silently ignores.
44
+ # * `raise`: Raises an error.
45
+ def initialize(
46
+ integer_cast: "forbid",
47
+ float_cast: "forbid",
48
+ datetime_cast: "forbid",
49
+ missing_struct_fields: "raise",
50
+ extra_struct_fields: "raise",
51
+ _internal_call: false
52
+ )
53
+ @integer_cast = integer_cast
54
+ @float_cast = float_cast
55
+ @datetime_cast = datetime_cast
56
+ @missing_struct_fields = missing_struct_fields
57
+ @extra_struct_fields = extra_struct_fields
58
+ end
59
+
60
+ def self.default
61
+ new(_internal_call: true)
62
+ end
63
+ end
64
+ end