polars-df 0.20.0-x86_64-darwin → 0.21.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +192 -186
- data/LICENSE-THIRD-PARTY.txt +1431 -1810
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/3.4/polars.bundle +0 -0
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +34 -31
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +204 -7
- data/lib/polars/list_name_space.rb +120 -1
- data/lib/polars/meta_expr.rb +7 -22
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +34 -11
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +11 -0
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +10 -2
@@ -123,7 +123,7 @@ module Polars
|
|
123
123
|
# # shape: (2,)
|
124
124
|
# # Series: 'values' [list[i64]]
|
125
125
|
# # [
|
126
|
-
# # [2,
|
126
|
+
# # [2, 3]
|
127
127
|
# # [5]
|
128
128
|
# # ]
|
129
129
|
def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
|
@@ -608,5 +608,124 @@ module Polars
|
|
608
608
|
def eval(expr)
|
609
609
|
super
|
610
610
|
end
|
611
|
+
|
612
|
+
# Filter elements in each list by a boolean expression, returning a new Series of lists.
|
613
|
+
#
|
614
|
+
# @param predicate [Object]
|
615
|
+
# A boolean expression evaluated on each list element.
|
616
|
+
# Use `Polars.element` to refer to the current element.
|
617
|
+
#
|
618
|
+
# @return [Series]
|
619
|
+
#
|
620
|
+
# @example
|
621
|
+
# s = Polars::Series.new("a", [[1, 4], [8, 5], [3, 2]])
|
622
|
+
# s.list.filter(Polars.element % 2 == 0)
|
623
|
+
# # =>
|
624
|
+
# # shape: (3,)
|
625
|
+
# # Series: 'a' [list[i64]]
|
626
|
+
# # [
|
627
|
+
# # [4]
|
628
|
+
# # [8]
|
629
|
+
# # [2]
|
630
|
+
# # ]
|
631
|
+
def filter(predicate)
|
632
|
+
super
|
633
|
+
end
|
634
|
+
|
635
|
+
# Compute the SET UNION between the elements in this list and the elements of `other`.
|
636
|
+
#
|
637
|
+
# @param other [Object]
|
638
|
+
# Right hand side of the set operation.
|
639
|
+
#
|
640
|
+
# @return [Series]
|
641
|
+
#
|
642
|
+
# @example
|
643
|
+
# a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
|
644
|
+
# b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
|
645
|
+
# a.list.set_union(b)
|
646
|
+
# # =>
|
647
|
+
# # shape: (4,)
|
648
|
+
# # Series: '' [list[i64]]
|
649
|
+
# # [
|
650
|
+
# # [1, 2, … 4]
|
651
|
+
# # [3]
|
652
|
+
# # [null, 3, 4]
|
653
|
+
# # [5, 6, … 8]
|
654
|
+
# # ]
|
655
|
+
def set_union(other)
|
656
|
+
super
|
657
|
+
end
|
658
|
+
|
659
|
+
# Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
|
660
|
+
#
|
661
|
+
# @param other [Object]
|
662
|
+
# Right hand side of the set operation.
|
663
|
+
#
|
664
|
+
# @return [Series]
|
665
|
+
#
|
666
|
+
# @example
|
667
|
+
# a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
|
668
|
+
# b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
|
669
|
+
# a.list.set_difference(b)
|
670
|
+
# # =>
|
671
|
+
# # shape: (4,)
|
672
|
+
# # Series: '' [list[i64]]
|
673
|
+
# # [
|
674
|
+
# # [1]
|
675
|
+
# # []
|
676
|
+
# # []
|
677
|
+
# # [5, 7]
|
678
|
+
# # ]
|
679
|
+
def set_difference(other)
|
680
|
+
super
|
681
|
+
end
|
682
|
+
|
683
|
+
# Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
|
684
|
+
#
|
685
|
+
# @param other [Object]
|
686
|
+
# Right hand side of the set operation.
|
687
|
+
#
|
688
|
+
# @return [Series]
|
689
|
+
#
|
690
|
+
# @example
|
691
|
+
# a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
|
692
|
+
# b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
|
693
|
+
# a.list.set_intersection(b)
|
694
|
+
# # =>
|
695
|
+
# # shape: (4,)
|
696
|
+
# # Series: '' [list[i64]]
|
697
|
+
# # [
|
698
|
+
# # [2, 3]
|
699
|
+
# # []
|
700
|
+
# # [null, 3]
|
701
|
+
# # [6]
|
702
|
+
# # ]
|
703
|
+
def set_intersection(other)
|
704
|
+
super
|
705
|
+
end
|
706
|
+
|
707
|
+
# Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
|
708
|
+
#
|
709
|
+
# @param other [Object]
|
710
|
+
# Right hand side of the set operation.
|
711
|
+
#
|
712
|
+
# @return [Series]
|
713
|
+
#
|
714
|
+
# @example
|
715
|
+
# a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
|
716
|
+
# b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
|
717
|
+
# a.list.set_symmetric_difference(b)
|
718
|
+
# # =>
|
719
|
+
# # shape: (4,)
|
720
|
+
# # Series: '' [list[i64]]
|
721
|
+
# # [
|
722
|
+
# # [1, 4]
|
723
|
+
# # [3]
|
724
|
+
# # [4]
|
725
|
+
# # [5, 7, 8]
|
726
|
+
# # ]
|
727
|
+
def set_symmetric_difference(other)
|
728
|
+
super
|
729
|
+
end
|
611
730
|
end
|
612
731
|
end
|
data/lib/polars/meta_expr.rb
CHANGED
@@ -171,32 +171,17 @@ module Polars
|
|
171
171
|
Utils.wrap_expr(_rbexpr.meta_undo_aliases)
|
172
172
|
end
|
173
173
|
|
174
|
-
#
|
174
|
+
# Try to turn this expression in a selector.
|
175
175
|
#
|
176
|
-
#
|
177
|
-
def _as_selector
|
178
|
-
Utils.wrap_expr(_rbexpr._meta_as_selector)
|
179
|
-
end
|
180
|
-
|
181
|
-
# Add selectors.
|
182
|
-
#
|
183
|
-
# @return [Expr]
|
184
|
-
def _selector_add(other)
|
185
|
-
Utils.wrap_expr(_rbexpr._meta_selector_add(other._rbexpr))
|
186
|
-
end
|
187
|
-
|
188
|
-
# Subtract selectors.
|
176
|
+
# Raises if the underlying expressions is not a column or selector.
|
189
177
|
#
|
190
178
|
# @return [Expr]
|
191
|
-
def _selector_sub(other)
|
192
|
-
Utils.wrap_expr(_rbexpr._meta_selector_sub(other._rbexpr))
|
193
|
-
end
|
194
|
-
|
195
|
-
# & selectors.
|
196
179
|
#
|
197
|
-
# @
|
198
|
-
|
199
|
-
|
180
|
+
# @note
|
181
|
+
# This functionality is considered **unstable**. It may be changed
|
182
|
+
# at any point without it being considered a breaking change.
|
183
|
+
def as_selector
|
184
|
+
Selector._from_rbselector(_rbexpr.into_selector)
|
200
185
|
end
|
201
186
|
|
202
187
|
# Format the expression as a tree.
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module Polars
|
2
|
+
# Options for scanning files.
|
3
|
+
class ScanCastOptions
|
4
|
+
# Common configuration for scanning files.
|
5
|
+
#
|
6
|
+
# @note
|
7
|
+
# This functionality is considered **unstable**. It may be changed
|
8
|
+
# at any point without it being considered a breaking change.
|
9
|
+
#
|
10
|
+
# @param integer_cast ['upcast', 'forbid']
|
11
|
+
# Configuration for casting from integer types:
|
12
|
+
#
|
13
|
+
# * `upcast`: Allow lossless casting to wider integer types.
|
14
|
+
# * `forbid`: Raises an error if dtypes do not match.
|
15
|
+
#
|
16
|
+
# @param float_cast ['upcast', 'downcast', 'forbid']
|
17
|
+
# Configuration for casting from float types:
|
18
|
+
#
|
19
|
+
# * `upcast`: Allow casting to higher precision float types.
|
20
|
+
# * `downcast`: Allow casting to lower precision float types.
|
21
|
+
# * `forbid`: Raises an error if dtypes do not match.
|
22
|
+
#
|
23
|
+
# @param datetime_cast ['nanosecond-downcast', 'convert-timezone', 'forbid']
|
24
|
+
# Configuration for casting from datetime types:
|
25
|
+
#
|
26
|
+
# * `nanosecond-downcast`: Allow nanosecond precision datetime to be
|
27
|
+
# downcasted to any lower precision. This has a similar effect to
|
28
|
+
# PyArrow's `coerce_int96_timestamp_unit`.
|
29
|
+
# * `convert-timezone`: Allow casting to a different timezone.
|
30
|
+
# * `forbid`: Raises an error if dtypes do not match.
|
31
|
+
#
|
32
|
+
# @param missing_struct_fields ['insert', 'raise']
|
33
|
+
# Configuration for behavior when struct fields defined in the schema
|
34
|
+
# are missing from the data:
|
35
|
+
#
|
36
|
+
# * `insert`: Inserts the missing fields.
|
37
|
+
# * `raise`: Raises an error.
|
38
|
+
#
|
39
|
+
# @param extra_struct_fields ['ignore', 'raise']
|
40
|
+
# Configuration for behavior when extra struct fields outside of the
|
41
|
+
# defined schema are encountered in the data:
|
42
|
+
#
|
43
|
+
# * `ignore`: Silently ignores.
|
44
|
+
# * `raise`: Raises an error.
|
45
|
+
def initialize(
|
46
|
+
integer_cast: "forbid",
|
47
|
+
float_cast: "forbid",
|
48
|
+
datetime_cast: "forbid",
|
49
|
+
missing_struct_fields: "raise",
|
50
|
+
extra_struct_fields: "raise",
|
51
|
+
_internal_call: false
|
52
|
+
)
|
53
|
+
@integer_cast = integer_cast
|
54
|
+
@float_cast = float_cast
|
55
|
+
@datetime_cast = datetime_cast
|
56
|
+
@missing_struct_fields = missing_struct_fields
|
57
|
+
@extra_struct_fields = extra_struct_fields
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.default
|
61
|
+
new(_internal_call: true)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/polars/schema.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Polars
|
2
2
|
class Schema
|
3
|
-
def initialize(schema, check_dtypes: true)
|
3
|
+
def initialize(schema = nil, check_dtypes: true)
|
4
4
|
raise Todo if check_dtypes
|
5
5
|
@schema = schema.to_h
|
6
6
|
end
|
@@ -9,6 +9,11 @@ module Polars
|
|
9
9
|
@schema[key]
|
10
10
|
end
|
11
11
|
|
12
|
+
def []=(name, dtype)
|
13
|
+
# TODO check dtype if needed
|
14
|
+
@schema[name] = dtype
|
15
|
+
end
|
16
|
+
|
12
17
|
def names
|
13
18
|
@schema.keys
|
14
19
|
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
module Polars
|
2
|
+
# Base column selector expression/proxy.
|
3
|
+
class Selector < Expr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbselector
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def self._from_rbselector(rbselector)
|
9
|
+
slf = new
|
10
|
+
slf._rbselector = rbselector
|
11
|
+
slf._rbexpr = RbExpr.new_selector(rbselector)
|
12
|
+
slf
|
13
|
+
end
|
14
|
+
|
15
|
+
def inspect
|
16
|
+
Expr._from_rbexpr(_rbexpr).to_s
|
17
|
+
end
|
18
|
+
|
19
|
+
# @private
|
20
|
+
def self._by_dtype(dtypes)
|
21
|
+
selectors = []
|
22
|
+
concrete_dtypes = []
|
23
|
+
dtypes.each do |dt|
|
24
|
+
if Utils.is_polars_dtype(dt)
|
25
|
+
concrete_dtypes += [dt]
|
26
|
+
else
|
27
|
+
raise Todo
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
dtype_selector = _from_rbselector(RbSelector.by_dtype(concrete_dtypes))
|
32
|
+
|
33
|
+
if selectors.length == 0
|
34
|
+
return dtype_selector
|
35
|
+
end
|
36
|
+
|
37
|
+
selector = selectors[0]
|
38
|
+
selectors[1..].each do |s|
|
39
|
+
selector = selector | s
|
40
|
+
end
|
41
|
+
if concrete_dtypes.length == 0
|
42
|
+
selector
|
43
|
+
else
|
44
|
+
dtype_selector | selector
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# @private
|
49
|
+
def self._by_name(names, strict:)
|
50
|
+
_from_rbselector(RbSelector.by_name(names, strict))
|
51
|
+
end
|
52
|
+
|
53
|
+
def ~
|
54
|
+
Selectors.all - self
|
55
|
+
end
|
56
|
+
|
57
|
+
def &(other)
|
58
|
+
if Utils.is_column(other)
|
59
|
+
colname = other.meta.output_name
|
60
|
+
other = by_name(colname)
|
61
|
+
end
|
62
|
+
if Utils.is_selector(other)
|
63
|
+
Selector._from_rbselector(
|
64
|
+
_rbselector.intersect(other._rbselector)
|
65
|
+
)
|
66
|
+
else
|
67
|
+
as_expr & other
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def |(other)
|
72
|
+
if Utils.is_column(other)
|
73
|
+
other = by_name(other.meta.output_name)
|
74
|
+
end
|
75
|
+
if Utils.is_selector(other)
|
76
|
+
Selector._from_rbselector(
|
77
|
+
_rbselector.union(other._rbselector)
|
78
|
+
)
|
79
|
+
else
|
80
|
+
as_expr | other
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def -(other)
|
85
|
+
if Utils.is_selector(other)
|
86
|
+
Selector._from_rbselector(
|
87
|
+
_rbselector.difference(other._rbselector)
|
88
|
+
)
|
89
|
+
else
|
90
|
+
as_expr - other
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def ^(other)
|
95
|
+
if Utils.is_column(other)
|
96
|
+
other = by_name(other.meta.output_name)
|
97
|
+
end
|
98
|
+
if Utils.is_selector(other)
|
99
|
+
Selector._from_rbselector(
|
100
|
+
_rbselector.exclusive_or(other._rbselector)
|
101
|
+
)
|
102
|
+
else
|
103
|
+
as_expr ^ other
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def exclude(columns, *more_columns)
|
108
|
+
exclude_cols = []
|
109
|
+
exclude_dtypes = []
|
110
|
+
((columns.is_a?(::Array) ? columns : [columns]) + more_columns).each do |item|
|
111
|
+
if item.is_a?(::String)
|
112
|
+
exclude_cols << item
|
113
|
+
elsif Utils.is_polars_dtype(item)
|
114
|
+
exclude_dtypes << item
|
115
|
+
else
|
116
|
+
msg = (
|
117
|
+
"invalid input for `exclude`" +
|
118
|
+
"\n\nExpected one or more `str` or `DataType`; found #{item.inspect} instead."
|
119
|
+
)
|
120
|
+
raise TypeError, msg
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
if exclude_cols.any? && exclude_dtypes.any?
|
125
|
+
msg = "cannot exclude by both column name and dtype; use a selector instead"
|
126
|
+
raise TypeError, msg
|
127
|
+
elsif exclude_dtypes.any?
|
128
|
+
self - Selectors.by_dtype(exclude_dtypes)
|
129
|
+
else
|
130
|
+
self - Selectors.by_name(exclude_cols, require_all: false)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def as_expr
|
135
|
+
Expr._from_rbexpr(_rbexpr)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|