polars-df 0.19.0-x64-mingw-ucrt → 0.21.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Cargo.lock +211 -320
  4. data/LICENSE-THIRD-PARTY.txt +1376 -2634
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.so +0 -0
  7. data/lib/polars/3.3/polars.so +0 -0
  8. data/lib/polars/3.4/polars.so +0 -0
  9. data/lib/polars/cat_name_space.rb +3 -43
  10. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  11. data/lib/polars/catalog/unity/column_info.rb +31 -0
  12. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  13. data/lib/polars/catalog/unity/table_info.rb +50 -0
  14. data/lib/polars/catalog.rb +448 -0
  15. data/lib/polars/convert.rb +10 -0
  16. data/lib/polars/data_frame.rb +151 -30
  17. data/lib/polars/data_types.rb +47 -3
  18. data/lib/polars/exceptions.rb +7 -2
  19. data/lib/polars/expr.rb +48 -39
  20. data/lib/polars/functions/col.rb +6 -5
  21. data/lib/polars/functions/eager.rb +1 -1
  22. data/lib/polars/functions/lazy.rb +114 -15
  23. data/lib/polars/functions/repeat.rb +4 -0
  24. data/lib/polars/io/csv.rb +18 -0
  25. data/lib/polars/io/json.rb +16 -0
  26. data/lib/polars/io/ndjson.rb +13 -0
  27. data/lib/polars/io/parquet.rb +45 -63
  28. data/lib/polars/io/scan_options.rb +47 -0
  29. data/lib/polars/lazy_frame.rb +163 -75
  30. data/lib/polars/list_expr.rb +213 -17
  31. data/lib/polars/list_name_space.rb +121 -8
  32. data/lib/polars/meta_expr.rb +14 -29
  33. data/lib/polars/scan_cast_options.rb +64 -0
  34. data/lib/polars/schema.rb +6 -1
  35. data/lib/polars/selector.rb +138 -0
  36. data/lib/polars/selectors.rb +931 -202
  37. data/lib/polars/series.rb +46 -19
  38. data/lib/polars/string_expr.rb +24 -3
  39. data/lib/polars/string_name_space.rb +12 -1
  40. data/lib/polars/utils/parse.rb +40 -0
  41. data/lib/polars/utils.rb +5 -1
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +8 -0
  44. metadata +10 -2
@@ -123,7 +123,7 @@ module Polars
123
123
  # # shape: (2,)
124
124
  # # Series: 'values' [list[i64]]
125
125
  # # [
126
- # # [2, 1]
126
+ # # [2, 3]
127
127
  # # [5]
128
128
  # # ]
129
129
  def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
@@ -586,12 +586,6 @@ module Polars
586
586
  # @param expr [Expr]
587
587
  # Expression to run. Note that you can select an element with `Polars.first`, or
588
588
  # `Polars.col`
589
- # @param parallel [Boolean]
590
- # Run all expression parallel. Don't activate this blindly.
591
- # Parallelism is worth it if there is enough work to do per thread.
592
- #
593
- # This likely should not be use in the group by context, because we already
594
- # parallel execution per group
595
589
  #
596
590
  # @return [Series]
597
591
  #
@@ -611,7 +605,126 @@ module Polars
611
605
  # # │ 8 ┆ 5 ┆ [2.0, 1.0] │
612
606
  # # │ 3 ┆ 2 ┆ [2.0, 1.0] │
613
607
  # # └─────┴─────┴────────────┘
614
- def eval(expr, parallel: false)
608
+ def eval(expr)
609
+ super
610
+ end
611
+
612
+ # Filter elements in each list by a boolean expression, returning a new Series of lists.
613
+ #
614
+ # @param predicate [Object]
615
+ # A boolean expression evaluated on each list element.
616
+ # Use `Polars.element` to refer to the current element.
617
+ #
618
+ # @return [Series]
619
+ #
620
+ # @example
621
+ # s = Polars::Series.new("a", [[1, 4], [8, 5], [3, 2]])
622
+ # s.list.filter(Polars.element % 2 == 0)
623
+ # # =>
624
+ # # shape: (3,)
625
+ # # Series: 'a' [list[i64]]
626
+ # # [
627
+ # # [4]
628
+ # # [8]
629
+ # # [2]
630
+ # # ]
631
+ def filter(predicate)
632
+ super
633
+ end
634
+
635
+ # Compute the SET UNION between the elements in this list and the elements of `other`.
636
+ #
637
+ # @param other [Object]
638
+ # Right hand side of the set operation.
639
+ #
640
+ # @return [Series]
641
+ #
642
+ # @example
643
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
644
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
645
+ # a.list.set_union(b)
646
+ # # =>
647
+ # # shape: (4,)
648
+ # # Series: '' [list[i64]]
649
+ # # [
650
+ # # [1, 2, … 4]
651
+ # # [3]
652
+ # # [null, 3, 4]
653
+ # # [5, 6, … 8]
654
+ # # ]
655
+ def set_union(other)
656
+ super
657
+ end
658
+
659
+ # Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
660
+ #
661
+ # @param other [Object]
662
+ # Right hand side of the set operation.
663
+ #
664
+ # @return [Series]
665
+ #
666
+ # @example
667
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
668
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
669
+ # a.list.set_difference(b)
670
+ # # =>
671
+ # # shape: (4,)
672
+ # # Series: '' [list[i64]]
673
+ # # [
674
+ # # [1]
675
+ # # []
676
+ # # []
677
+ # # [5, 7]
678
+ # # ]
679
+ def set_difference(other)
680
+ super
681
+ end
682
+
683
+ # Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
684
+ #
685
+ # @param other [Object]
686
+ # Right hand side of the set operation.
687
+ #
688
+ # @return [Series]
689
+ #
690
+ # @example
691
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
692
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
693
+ # a.list.set_intersection(b)
694
+ # # =>
695
+ # # shape: (4,)
696
+ # # Series: '' [list[i64]]
697
+ # # [
698
+ # # [2, 3]
699
+ # # []
700
+ # # [null, 3]
701
+ # # [6]
702
+ # # ]
703
+ def set_intersection(other)
704
+ super
705
+ end
706
+
707
+ # Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
708
+ #
709
+ # @param other [Object]
710
+ # Right hand side of the set operation.
711
+ #
712
+ # @return [Series]
713
+ #
714
+ # @example
715
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
716
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
717
+ # a.list.set_symmetric_difference(b)
718
+ # # =>
719
+ # # shape: (4,)
720
+ # # Series: '' [list[i64]]
721
+ # # [
722
+ # # [1, 4]
723
+ # # [3]
724
+ # # [4]
725
+ # # [5, 7, 8]
726
+ # # ]
727
+ def set_symmetric_difference(other)
615
728
  super
616
729
  end
617
730
  end
@@ -125,14 +125,14 @@ module Polars
125
125
  # @return [Array]
126
126
  #
127
127
  # @example
128
- # e = Polars.col("foo").alias("bar")
128
+ # e = Polars.col("foo") + Polars.col("bar")
129
129
  # first = e.meta.pop[0]
130
- # _ = first.meta == Polars.col("foo")
131
- # # => true
132
130
  # _ = first.meta == Polars.col("bar")
131
+ # # => true
132
+ # _ = first.meta == Polars.col("foo")
133
133
  # # => false
134
- def pop
135
- _rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
134
+ def pop(schema: nil)
135
+ _rbexpr.meta_pop(schema).map { |e| Utils.wrap_expr(e) }
136
136
  end
137
137
 
138
138
  # Get a list with the root column name.
@@ -171,32 +171,17 @@ module Polars
171
171
  Utils.wrap_expr(_rbexpr.meta_undo_aliases)
172
172
  end
173
173
 
174
- # Turn this expression in a selector.
174
+ # Try to turn this expression in a selector.
175
175
  #
176
- # @return [Expr]
177
- def _as_selector
178
- Utils.wrap_expr(_rbexpr._meta_as_selector)
179
- end
180
-
181
- # Add selectors.
176
+ # Raises if the underlying expressions is not a column or selector.
182
177
  #
183
178
  # @return [Expr]
184
- def _selector_add(other)
185
- Utils.wrap_expr(_rbexpr._meta_selector_add(other._rbexpr))
186
- end
187
-
188
- # Subtract selectors.
189
179
  #
190
- # @return [Expr]
191
- def _selector_sub(other)
192
- Utils.wrap_expr(_rbexpr._meta_selector_sub(other._rbexpr))
193
- end
194
-
195
- # & selectors.
196
- #
197
- # @return [Expr]
198
- def _selector_and(other)
199
- Utils.wrap_expr(_rbexpr._meta_selector_and(other._rbexpr))
180
+ # @note
181
+ # This functionality is considered **unstable**. It may be changed
182
+ # at any point without it being considered a breaking change.
183
+ def as_selector
184
+ Selector._from_rbselector(_rbexpr.into_selector)
200
185
  end
201
186
 
202
187
  # Format the expression as a tree.
@@ -209,8 +194,8 @@ module Polars
209
194
  # @example
210
195
  # e = (Polars.col("foo") * Polars.col("bar")).sum.over(Polars.col("ham")) / 2
211
196
  # e.meta.tree_format(return_as_string: true)
212
- def tree_format(return_as_string: false)
213
- s = _rbexpr.meta_tree_format
197
+ def tree_format(return_as_string: false, schema: nil)
198
+ s = _rbexpr.meta_tree_format(schema)
214
199
  if return_as_string
215
200
  s
216
201
  else
@@ -0,0 +1,64 @@
1
+ module Polars
2
+ # Options for scanning files.
3
+ class ScanCastOptions
4
+ # Common configuration for scanning files.
5
+ #
6
+ # @note
7
+ # This functionality is considered **unstable**. It may be changed
8
+ # at any point without it being considered a breaking change.
9
+ #
10
+ # @param integer_cast ['upcast', 'forbid']
11
+ # Configuration for casting from integer types:
12
+ #
13
+ # * `upcast`: Allow lossless casting to wider integer types.
14
+ # * `forbid`: Raises an error if dtypes do not match.
15
+ #
16
+ # @param float_cast ['upcast', 'downcast', 'forbid']
17
+ # Configuration for casting from float types:
18
+ #
19
+ # * `upcast`: Allow casting to higher precision float types.
20
+ # * `downcast`: Allow casting to lower precision float types.
21
+ # * `forbid`: Raises an error if dtypes do not match.
22
+ #
23
+ # @param datetime_cast ['nanosecond-downcast', 'convert-timezone', 'forbid']
24
+ # Configuration for casting from datetime types:
25
+ #
26
+ # * `nanosecond-downcast`: Allow nanosecond precision datetime to be
27
+ # downcasted to any lower precision. This has a similar effect to
28
+ # PyArrow's `coerce_int96_timestamp_unit`.
29
+ # * `convert-timezone`: Allow casting to a different timezone.
30
+ # * `forbid`: Raises an error if dtypes do not match.
31
+ #
32
+ # @param missing_struct_fields ['insert', 'raise']
33
+ # Configuration for behavior when struct fields defined in the schema
34
+ # are missing from the data:
35
+ #
36
+ # * `insert`: Inserts the missing fields.
37
+ # * `raise`: Raises an error.
38
+ #
39
+ # @param extra_struct_fields ['ignore', 'raise']
40
+ # Configuration for behavior when extra struct fields outside of the
41
+ # defined schema are encountered in the data:
42
+ #
43
+ # * `ignore`: Silently ignores.
44
+ # * `raise`: Raises an error.
45
+ def initialize(
46
+ integer_cast: "forbid",
47
+ float_cast: "forbid",
48
+ datetime_cast: "forbid",
49
+ missing_struct_fields: "raise",
50
+ extra_struct_fields: "raise",
51
+ _internal_call: false
52
+ )
53
+ @integer_cast = integer_cast
54
+ @float_cast = float_cast
55
+ @datetime_cast = datetime_cast
56
+ @missing_struct_fields = missing_struct_fields
57
+ @extra_struct_fields = extra_struct_fields
58
+ end
59
+
60
+ def self.default
61
+ new(_internal_call: true)
62
+ end
63
+ end
64
+ end
data/lib/polars/schema.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module Polars
2
2
  class Schema
3
- def initialize(schema, check_dtypes: true)
3
+ def initialize(schema = nil, check_dtypes: true)
4
4
  raise Todo if check_dtypes
5
5
  @schema = schema.to_h
6
6
  end
@@ -9,6 +9,11 @@ module Polars
9
9
  @schema[key]
10
10
  end
11
11
 
12
+ def []=(name, dtype)
13
+ # TODO check dtype if needed
14
+ @schema[name] = dtype
15
+ end
16
+
12
17
  def names
13
18
  @schema.keys
14
19
  end
@@ -0,0 +1,138 @@
1
+ module Polars
2
+ # Base column selector expression/proxy.
3
+ class Selector < Expr
4
+ # @private
5
+ attr_accessor :_rbselector
6
+
7
+ # @private
8
+ def self._from_rbselector(rbselector)
9
+ slf = new
10
+ slf._rbselector = rbselector
11
+ slf._rbexpr = RbExpr.new_selector(rbselector)
12
+ slf
13
+ end
14
+
15
+ def inspect
16
+ Expr._from_rbexpr(_rbexpr).to_s
17
+ end
18
+
19
+ # @private
20
+ def self._by_dtype(dtypes)
21
+ selectors = []
22
+ concrete_dtypes = []
23
+ dtypes.each do |dt|
24
+ if Utils.is_polars_dtype(dt)
25
+ concrete_dtypes += [dt]
26
+ else
27
+ raise Todo
28
+ end
29
+ end
30
+
31
+ dtype_selector = _from_rbselector(RbSelector.by_dtype(concrete_dtypes))
32
+
33
+ if selectors.length == 0
34
+ return dtype_selector
35
+ end
36
+
37
+ selector = selectors[0]
38
+ selectors[1..].each do |s|
39
+ selector = selector | s
40
+ end
41
+ if concrete_dtypes.length == 0
42
+ selector
43
+ else
44
+ dtype_selector | selector
45
+ end
46
+ end
47
+
48
+ # @private
49
+ def self._by_name(names, strict:)
50
+ _from_rbselector(RbSelector.by_name(names, strict))
51
+ end
52
+
53
+ def ~
54
+ Selectors.all - self
55
+ end
56
+
57
+ def &(other)
58
+ if Utils.is_column(other)
59
+ colname = other.meta.output_name
60
+ other = by_name(colname)
61
+ end
62
+ if Utils.is_selector(other)
63
+ Selector._from_rbselector(
64
+ _rbselector.intersect(other._rbselector)
65
+ )
66
+ else
67
+ as_expr & other
68
+ end
69
+ end
70
+
71
+ def |(other)
72
+ if Utils.is_column(other)
73
+ other = by_name(other.meta.output_name)
74
+ end
75
+ if Utils.is_selector(other)
76
+ Selector._from_rbselector(
77
+ _rbselector.union(other._rbselector)
78
+ )
79
+ else
80
+ as_expr | other
81
+ end
82
+ end
83
+
84
+ def -(other)
85
+ if Utils.is_selector(other)
86
+ Selector._from_rbselector(
87
+ _rbselector.difference(other._rbselector)
88
+ )
89
+ else
90
+ as_expr - other
91
+ end
92
+ end
93
+
94
+ def ^(other)
95
+ if Utils.is_column(other)
96
+ other = by_name(other.meta.output_name)
97
+ end
98
+ if Utils.is_selector(other)
99
+ Selector._from_rbselector(
100
+ _rbselector.exclusive_or(other._rbselector)
101
+ )
102
+ else
103
+ as_expr ^ other
104
+ end
105
+ end
106
+
107
+ def exclude(columns, *more_columns)
108
+ exclude_cols = []
109
+ exclude_dtypes = []
110
+ ((columns.is_a?(::Array) ? columns : [columns]) + more_columns).each do |item|
111
+ if item.is_a?(::String)
112
+ exclude_cols << item
113
+ elsif Utils.is_polars_dtype(item)
114
+ exclude_dtypes << item
115
+ else
116
+ msg = (
117
+ "invalid input for `exclude`" +
118
+ "\n\nExpected one or more `str` or `DataType`; found #{item.inspect} instead."
119
+ )
120
+ raise TypeError, msg
121
+ end
122
+ end
123
+
124
+ if exclude_cols.any? && exclude_dtypes.any?
125
+ msg = "cannot exclude by both column name and dtype; use a selector instead"
126
+ raise TypeError, msg
127
+ elsif exclude_dtypes.any?
128
+ self - Selectors.by_dtype(exclude_dtypes)
129
+ else
130
+ self - Selectors.by_name(exclude_cols, require_all: false)
131
+ end
132
+ end
133
+
134
+ def as_expr
135
+ Expr._from_rbexpr(_rbexpr)
136
+ end
137
+ end
138
+ end