polars-df 0.20.0-x86_64-darwin → 0.21.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE-THIRD-PARTY.txt +1431 -1810
  5. data/LICENSE.txt +1 -1
  6. data/lib/polars/3.2/polars.bundle +0 -0
  7. data/lib/polars/3.3/polars.bundle +0 -0
  8. data/lib/polars/3.4/polars.bundle +0 -0
  9. data/lib/polars/cat_name_space.rb +3 -43
  10. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  11. data/lib/polars/catalog/unity/column_info.rb +31 -0
  12. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  13. data/lib/polars/catalog/unity/table_info.rb +50 -0
  14. data/lib/polars/catalog.rb +448 -0
  15. data/lib/polars/convert.rb +10 -0
  16. data/lib/polars/data_frame.rb +151 -30
  17. data/lib/polars/data_types.rb +47 -3
  18. data/lib/polars/exceptions.rb +7 -2
  19. data/lib/polars/expr.rb +34 -31
  20. data/lib/polars/functions/col.rb +6 -5
  21. data/lib/polars/functions/lazy.rb +114 -15
  22. data/lib/polars/functions/repeat.rb +4 -0
  23. data/lib/polars/io/csv.rb +18 -0
  24. data/lib/polars/io/json.rb +16 -0
  25. data/lib/polars/io/ndjson.rb +13 -0
  26. data/lib/polars/io/parquet.rb +45 -63
  27. data/lib/polars/io/scan_options.rb +47 -0
  28. data/lib/polars/lazy_frame.rb +163 -75
  29. data/lib/polars/list_expr.rb +204 -7
  30. data/lib/polars/list_name_space.rb +120 -1
  31. data/lib/polars/meta_expr.rb +7 -22
  32. data/lib/polars/scan_cast_options.rb +64 -0
  33. data/lib/polars/schema.rb +6 -1
  34. data/lib/polars/selector.rb +138 -0
  35. data/lib/polars/selectors.rb +931 -202
  36. data/lib/polars/series.rb +34 -11
  37. data/lib/polars/string_expr.rb +24 -3
  38. data/lib/polars/string_name_space.rb +11 -0
  39. data/lib/polars/utils/parse.rb +40 -0
  40. data/lib/polars/utils.rb +5 -1
  41. data/lib/polars/version.rb +1 -1
  42. data/lib/polars.rb +8 -0
  43. metadata +10 -2
@@ -123,7 +123,7 @@ module Polars
123
123
  # # shape: (2,)
124
124
  # # Series: 'values' [list[i64]]
125
125
  # # [
126
- # # [2, 1]
126
+ # # [2, 3]
127
127
  # # [5]
128
128
  # # ]
129
129
  def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
@@ -608,5 +608,124 @@ module Polars
608
608
  def eval(expr)
609
609
  super
610
610
  end
611
+
612
+ # Filter elements in each list by a boolean expression, returning a new Series of lists.
613
+ #
614
+ # @param predicate [Object]
615
+ # A boolean expression evaluated on each list element.
616
+ # Use `Polars.element` to refer to the current element.
617
+ #
618
+ # @return [Series]
619
+ #
620
+ # @example
621
+ # s = Polars::Series.new("a", [[1, 4], [8, 5], [3, 2]])
622
+ # s.list.filter(Polars.element % 2 == 0)
623
+ # # =>
624
+ # # shape: (3,)
625
+ # # Series: 'a' [list[i64]]
626
+ # # [
627
+ # # [4]
628
+ # # [8]
629
+ # # [2]
630
+ # # ]
631
+ def filter(predicate)
632
+ super
633
+ end
634
+
635
+ # Compute the SET UNION between the elements in this list and the elements of `other`.
636
+ #
637
+ # @param other [Object]
638
+ # Right hand side of the set operation.
639
+ #
640
+ # @return [Series]
641
+ #
642
+ # @example
643
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
644
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
645
+ # a.list.set_union(b)
646
+ # # =>
647
+ # # shape: (4,)
648
+ # # Series: '' [list[i64]]
649
+ # # [
650
+ # # [1, 2, … 4]
651
+ # # [3]
652
+ # # [null, 3, 4]
653
+ # # [5, 6, … 8]
654
+ # # ]
655
+ def set_union(other)
656
+ super
657
+ end
658
+
659
+ # Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
660
+ #
661
+ # @param other [Object]
662
+ # Right hand side of the set operation.
663
+ #
664
+ # @return [Series]
665
+ #
666
+ # @example
667
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
668
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
669
+ # a.list.set_difference(b)
670
+ # # =>
671
+ # # shape: (4,)
672
+ # # Series: '' [list[i64]]
673
+ # # [
674
+ # # [1]
675
+ # # []
676
+ # # []
677
+ # # [5, 7]
678
+ # # ]
679
+ def set_difference(other)
680
+ super
681
+ end
682
+
683
+ # Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
684
+ #
685
+ # @param other [Object]
686
+ # Right hand side of the set operation.
687
+ #
688
+ # @return [Series]
689
+ #
690
+ # @example
691
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
692
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
693
+ # a.list.set_intersection(b)
694
+ # # =>
695
+ # # shape: (4,)
696
+ # # Series: '' [list[i64]]
697
+ # # [
698
+ # # [2, 3]
699
+ # # []
700
+ # # [null, 3]
701
+ # # [6]
702
+ # # ]
703
+ def set_intersection(other)
704
+ super
705
+ end
706
+
707
+ # Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
708
+ #
709
+ # @param other [Object]
710
+ # Right hand side of the set operation.
711
+ #
712
+ # @return [Series]
713
+ #
714
+ # @example
715
+ # a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
716
+ # b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
717
+ # a.list.set_symmetric_difference(b)
718
+ # # =>
719
+ # # shape: (4,)
720
+ # # Series: '' [list[i64]]
721
+ # # [
722
+ # # [1, 4]
723
+ # # [3]
724
+ # # [4]
725
+ # # [5, 7, 8]
726
+ # # ]
727
+ def set_symmetric_difference(other)
728
+ super
729
+ end
611
730
  end
612
731
  end
@@ -171,32 +171,17 @@ module Polars
171
171
  Utils.wrap_expr(_rbexpr.meta_undo_aliases)
172
172
  end
173
173
 
174
- # Turn this expression in a selector.
174
+ # Try to turn this expression in a selector.
175
175
  #
176
- # @return [Expr]
177
- def _as_selector
178
- Utils.wrap_expr(_rbexpr._meta_as_selector)
179
- end
180
-
181
- # Add selectors.
182
- #
183
- # @return [Expr]
184
- def _selector_add(other)
185
- Utils.wrap_expr(_rbexpr._meta_selector_add(other._rbexpr))
186
- end
187
-
188
- # Subtract selectors.
176
+ # Raises if the underlying expressions is not a column or selector.
189
177
  #
190
178
  # @return [Expr]
191
- def _selector_sub(other)
192
- Utils.wrap_expr(_rbexpr._meta_selector_sub(other._rbexpr))
193
- end
194
-
195
- # & selectors.
196
179
  #
197
- # @return [Expr]
198
- def _selector_and(other)
199
- Utils.wrap_expr(_rbexpr._meta_selector_and(other._rbexpr))
180
+ # @note
181
+ # This functionality is considered **unstable**. It may be changed
182
+ # at any point without it being considered a breaking change.
183
+ def as_selector
184
+ Selector._from_rbselector(_rbexpr.into_selector)
200
185
  end
201
186
 
202
187
  # Format the expression as a tree.
@@ -0,0 +1,64 @@
1
+ module Polars
2
+ # Options for scanning files.
3
+ class ScanCastOptions
4
+ # Common configuration for scanning files.
5
+ #
6
+ # @note
7
+ # This functionality is considered **unstable**. It may be changed
8
+ # at any point without it being considered a breaking change.
9
+ #
10
+ # @param integer_cast ['upcast', 'forbid']
11
+ # Configuration for casting from integer types:
12
+ #
13
+ # * `upcast`: Allow lossless casting to wider integer types.
14
+ # * `forbid`: Raises an error if dtypes do not match.
15
+ #
16
+ # @param float_cast ['upcast', 'downcast', 'forbid']
17
+ # Configuration for casting from float types:
18
+ #
19
+ # * `upcast`: Allow casting to higher precision float types.
20
+ # * `downcast`: Allow casting to lower precision float types.
21
+ # * `forbid`: Raises an error if dtypes do not match.
22
+ #
23
+ # @param datetime_cast ['nanosecond-downcast', 'convert-timezone', 'forbid']
24
+ # Configuration for casting from datetime types:
25
+ #
26
+ # * `nanosecond-downcast`: Allow nanosecond precision datetime to be
27
+ # downcasted to any lower precision. This has a similar effect to
28
+ # PyArrow's `coerce_int96_timestamp_unit`.
29
+ # * `convert-timezone`: Allow casting to a different timezone.
30
+ # * `forbid`: Raises an error if dtypes do not match.
31
+ #
32
+ # @param missing_struct_fields ['insert', 'raise']
33
+ # Configuration for behavior when struct fields defined in the schema
34
+ # are missing from the data:
35
+ #
36
+ # * `insert`: Inserts the missing fields.
37
+ # * `raise`: Raises an error.
38
+ #
39
+ # @param extra_struct_fields ['ignore', 'raise']
40
+ # Configuration for behavior when extra struct fields outside of the
41
+ # defined schema are encountered in the data:
42
+ #
43
+ # * `ignore`: Silently ignores.
44
+ # * `raise`: Raises an error.
45
+ def initialize(
46
+ integer_cast: "forbid",
47
+ float_cast: "forbid",
48
+ datetime_cast: "forbid",
49
+ missing_struct_fields: "raise",
50
+ extra_struct_fields: "raise",
51
+ _internal_call: false
52
+ )
53
+ @integer_cast = integer_cast
54
+ @float_cast = float_cast
55
+ @datetime_cast = datetime_cast
56
+ @missing_struct_fields = missing_struct_fields
57
+ @extra_struct_fields = extra_struct_fields
58
+ end
59
+
60
+ def self.default
61
+ new(_internal_call: true)
62
+ end
63
+ end
64
+ end
data/lib/polars/schema.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module Polars
2
2
  class Schema
3
- def initialize(schema, check_dtypes: true)
3
+ def initialize(schema = nil, check_dtypes: true)
4
4
  raise Todo if check_dtypes
5
5
  @schema = schema.to_h
6
6
  end
@@ -9,6 +9,11 @@ module Polars
9
9
  @schema[key]
10
10
  end
11
11
 
12
+ def []=(name, dtype)
13
+ # TODO check dtype if needed
14
+ @schema[name] = dtype
15
+ end
16
+
12
17
  def names
13
18
  @schema.keys
14
19
  end
@@ -0,0 +1,138 @@
1
+ module Polars
2
+ # Base column selector expression/proxy.
3
+ class Selector < Expr
4
+ # @private
5
+ attr_accessor :_rbselector
6
+
7
+ # @private
8
+ def self._from_rbselector(rbselector)
9
+ slf = new
10
+ slf._rbselector = rbselector
11
+ slf._rbexpr = RbExpr.new_selector(rbselector)
12
+ slf
13
+ end
14
+
15
+ def inspect
16
+ Expr._from_rbexpr(_rbexpr).to_s
17
+ end
18
+
19
+ # @private
20
+ def self._by_dtype(dtypes)
21
+ selectors = []
22
+ concrete_dtypes = []
23
+ dtypes.each do |dt|
24
+ if Utils.is_polars_dtype(dt)
25
+ concrete_dtypes += [dt]
26
+ else
27
+ raise Todo
28
+ end
29
+ end
30
+
31
+ dtype_selector = _from_rbselector(RbSelector.by_dtype(concrete_dtypes))
32
+
33
+ if selectors.length == 0
34
+ return dtype_selector
35
+ end
36
+
37
+ selector = selectors[0]
38
+ selectors[1..].each do |s|
39
+ selector = selector | s
40
+ end
41
+ if concrete_dtypes.length == 0
42
+ selector
43
+ else
44
+ dtype_selector | selector
45
+ end
46
+ end
47
+
48
+ # @private
49
+ def self._by_name(names, strict:)
50
+ _from_rbselector(RbSelector.by_name(names, strict))
51
+ end
52
+
53
+ def ~
54
+ Selectors.all - self
55
+ end
56
+
57
+ def &(other)
58
+ if Utils.is_column(other)
59
+ colname = other.meta.output_name
60
+ other = by_name(colname)
61
+ end
62
+ if Utils.is_selector(other)
63
+ Selector._from_rbselector(
64
+ _rbselector.intersect(other._rbselector)
65
+ )
66
+ else
67
+ as_expr & other
68
+ end
69
+ end
70
+
71
+ def |(other)
72
+ if Utils.is_column(other)
73
+ other = by_name(other.meta.output_name)
74
+ end
75
+ if Utils.is_selector(other)
76
+ Selector._from_rbselector(
77
+ _rbselector.union(other._rbselector)
78
+ )
79
+ else
80
+ as_expr | other
81
+ end
82
+ end
83
+
84
+ def -(other)
85
+ if Utils.is_selector(other)
86
+ Selector._from_rbselector(
87
+ _rbselector.difference(other._rbselector)
88
+ )
89
+ else
90
+ as_expr - other
91
+ end
92
+ end
93
+
94
+ def ^(other)
95
+ if Utils.is_column(other)
96
+ other = by_name(other.meta.output_name)
97
+ end
98
+ if Utils.is_selector(other)
99
+ Selector._from_rbselector(
100
+ _rbselector.exclusive_or(other._rbselector)
101
+ )
102
+ else
103
+ as_expr ^ other
104
+ end
105
+ end
106
+
107
+ def exclude(columns, *more_columns)
108
+ exclude_cols = []
109
+ exclude_dtypes = []
110
+ ((columns.is_a?(::Array) ? columns : [columns]) + more_columns).each do |item|
111
+ if item.is_a?(::String)
112
+ exclude_cols << item
113
+ elsif Utils.is_polars_dtype(item)
114
+ exclude_dtypes << item
115
+ else
116
+ msg = (
117
+ "invalid input for `exclude`" +
118
+ "\n\nExpected one or more `str` or `DataType`; found #{item.inspect} instead."
119
+ )
120
+ raise TypeError, msg
121
+ end
122
+ end
123
+
124
+ if exclude_cols.any? && exclude_dtypes.any?
125
+ msg = "cannot exclude by both column name and dtype; use a selector instead"
126
+ raise TypeError, msg
127
+ elsif exclude_dtypes.any?
128
+ self - Selectors.by_dtype(exclude_dtypes)
129
+ else
130
+ self - Selectors.by_name(exclude_cols, require_all: false)
131
+ end
132
+ end
133
+
134
+ def as_expr
135
+ Expr._from_rbexpr(_rbexpr)
136
+ end
137
+ end
138
+ end