polars-df 0.19.0-x86_64-darwin → 0.21.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +211 -320
- data/LICENSE-THIRD-PARTY.txt +1256 -2131
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/3.4/polars.bundle +0 -0
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +48 -39
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/eager.rb +1 -1
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +213 -17
- data/lib/polars/list_name_space.rb +121 -8
- data/lib/polars/meta_expr.rb +14 -29
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +46 -19
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +12 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +10 -2
@@ -123,7 +123,7 @@ module Polars
|
|
123
123
|
# # shape: (2,)
|
124
124
|
# # Series: 'values' [list[i64]]
|
125
125
|
# # [
|
126
|
-
# # [2,
|
126
|
+
# # [2, 3]
|
127
127
|
# # [5]
|
128
128
|
# # ]
|
129
129
|
def sample(n: nil, fraction: nil, with_replacement: false, shuffle: false, seed: nil)
|
@@ -586,12 +586,6 @@ module Polars
|
|
586
586
|
# @param expr [Expr]
|
587
587
|
# Expression to run. Note that you can select an element with `Polars.first`, or
|
588
588
|
# `Polars.col`
|
589
|
-
# @param parallel [Boolean]
|
590
|
-
# Run all expression parallel. Don't activate this blindly.
|
591
|
-
# Parallelism is worth it if there is enough work to do per thread.
|
592
|
-
#
|
593
|
-
# This likely should not be use in the group by context, because we already
|
594
|
-
# parallel execution per group
|
595
589
|
#
|
596
590
|
# @return [Series]
|
597
591
|
#
|
@@ -611,7 +605,126 @@ module Polars
|
|
611
605
|
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
612
606
|
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
613
607
|
# # └─────┴─────┴────────────┘
|
614
|
-
def eval(expr
|
608
|
+
def eval(expr)
|
609
|
+
super
|
610
|
+
end
|
611
|
+
|
612
|
+
# Filter elements in each list by a boolean expression, returning a new Series of lists.
|
613
|
+
#
|
614
|
+
# @param predicate [Object]
|
615
|
+
# A boolean expression evaluated on each list element.
|
616
|
+
# Use `Polars.element` to refer to the current element.
|
617
|
+
#
|
618
|
+
# @return [Series]
|
619
|
+
#
|
620
|
+
# @example
|
621
|
+
# s = Polars::Series.new("a", [[1, 4], [8, 5], [3, 2]])
|
622
|
+
# s.list.filter(Polars.element % 2 == 0)
|
623
|
+
# # =>
|
624
|
+
# # shape: (3,)
|
625
|
+
# # Series: 'a' [list[i64]]
|
626
|
+
# # [
|
627
|
+
# # [4]
|
628
|
+
# # [8]
|
629
|
+
# # [2]
|
630
|
+
# # ]
|
631
|
+
def filter(predicate)
|
632
|
+
super
|
633
|
+
end
|
634
|
+
|
635
|
+
# Compute the SET UNION between the elements in this list and the elements of `other`.
|
636
|
+
#
|
637
|
+
# @param other [Object]
|
638
|
+
# Right hand side of the set operation.
|
639
|
+
#
|
640
|
+
# @return [Series]
|
641
|
+
#
|
642
|
+
# @example
|
643
|
+
# a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
|
644
|
+
# b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
|
645
|
+
# a.list.set_union(b)
|
646
|
+
# # =>
|
647
|
+
# # shape: (4,)
|
648
|
+
# # Series: '' [list[i64]]
|
649
|
+
# # [
|
650
|
+
# # [1, 2, … 4]
|
651
|
+
# # [3]
|
652
|
+
# # [null, 3, 4]
|
653
|
+
# # [5, 6, … 8]
|
654
|
+
# # ]
|
655
|
+
def set_union(other)
|
656
|
+
super
|
657
|
+
end
|
658
|
+
|
659
|
+
# Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
|
660
|
+
#
|
661
|
+
# @param other [Object]
|
662
|
+
# Right hand side of the set operation.
|
663
|
+
#
|
664
|
+
# @return [Series]
|
665
|
+
#
|
666
|
+
# @example
|
667
|
+
# a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
|
668
|
+
# b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
|
669
|
+
# a.list.set_difference(b)
|
670
|
+
# # =>
|
671
|
+
# # shape: (4,)
|
672
|
+
# # Series: '' [list[i64]]
|
673
|
+
# # [
|
674
|
+
# # [1]
|
675
|
+
# # []
|
676
|
+
# # []
|
677
|
+
# # [5, 7]
|
678
|
+
# # ]
|
679
|
+
def set_difference(other)
|
680
|
+
super
|
681
|
+
end
|
682
|
+
|
683
|
+
# Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
|
684
|
+
#
|
685
|
+
# @param other [Object]
|
686
|
+
# Right hand side of the set operation.
|
687
|
+
#
|
688
|
+
# @return [Series]
|
689
|
+
#
|
690
|
+
# @example
|
691
|
+
# a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
|
692
|
+
# b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
|
693
|
+
# a.list.set_intersection(b)
|
694
|
+
# # =>
|
695
|
+
# # shape: (4,)
|
696
|
+
# # Series: '' [list[i64]]
|
697
|
+
# # [
|
698
|
+
# # [2, 3]
|
699
|
+
# # []
|
700
|
+
# # [null, 3]
|
701
|
+
# # [6]
|
702
|
+
# # ]
|
703
|
+
def set_intersection(other)
|
704
|
+
super
|
705
|
+
end
|
706
|
+
|
707
|
+
# Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
|
708
|
+
#
|
709
|
+
# @param other [Object]
|
710
|
+
# Right hand side of the set operation.
|
711
|
+
#
|
712
|
+
# @return [Series]
|
713
|
+
#
|
714
|
+
# @example
|
715
|
+
# a = Polars::Series.new([[1, 2, 3], [], [nil, 3], [5, 6, 7]])
|
716
|
+
# b = Polars::Series.new([[2, 3, 4], [3], [3, 4, nil], [6, 8]])
|
717
|
+
# a.list.set_symmetric_difference(b)
|
718
|
+
# # =>
|
719
|
+
# # shape: (4,)
|
720
|
+
# # Series: '' [list[i64]]
|
721
|
+
# # [
|
722
|
+
# # [1, 4]
|
723
|
+
# # [3]
|
724
|
+
# # [4]
|
725
|
+
# # [5, 7, 8]
|
726
|
+
# # ]
|
727
|
+
def set_symmetric_difference(other)
|
615
728
|
super
|
616
729
|
end
|
617
730
|
end
|
data/lib/polars/meta_expr.rb
CHANGED
@@ -125,14 +125,14 @@ module Polars
|
|
125
125
|
# @return [Array]
|
126
126
|
#
|
127
127
|
# @example
|
128
|
-
# e = Polars.col("foo").
|
128
|
+
# e = Polars.col("foo") + Polars.col("bar")
|
129
129
|
# first = e.meta.pop[0]
|
130
|
-
# _ = first.meta == Polars.col("foo")
|
131
|
-
# # => true
|
132
130
|
# _ = first.meta == Polars.col("bar")
|
131
|
+
# # => true
|
132
|
+
# _ = first.meta == Polars.col("foo")
|
133
133
|
# # => false
|
134
|
-
def pop
|
135
|
-
_rbexpr.meta_pop.map { |e| Utils.wrap_expr(e) }
|
134
|
+
def pop(schema: nil)
|
135
|
+
_rbexpr.meta_pop(schema).map { |e| Utils.wrap_expr(e) }
|
136
136
|
end
|
137
137
|
|
138
138
|
# Get a list with the root column name.
|
@@ -171,32 +171,17 @@ module Polars
|
|
171
171
|
Utils.wrap_expr(_rbexpr.meta_undo_aliases)
|
172
172
|
end
|
173
173
|
|
174
|
-
#
|
174
|
+
# Try to turn this expression in a selector.
|
175
175
|
#
|
176
|
-
#
|
177
|
-
def _as_selector
|
178
|
-
Utils.wrap_expr(_rbexpr._meta_as_selector)
|
179
|
-
end
|
180
|
-
|
181
|
-
# Add selectors.
|
176
|
+
# Raises if the underlying expressions is not a column or selector.
|
182
177
|
#
|
183
178
|
# @return [Expr]
|
184
|
-
def _selector_add(other)
|
185
|
-
Utils.wrap_expr(_rbexpr._meta_selector_add(other._rbexpr))
|
186
|
-
end
|
187
|
-
|
188
|
-
# Subtract selectors.
|
189
179
|
#
|
190
|
-
# @
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
# & selectors.
|
196
|
-
#
|
197
|
-
# @return [Expr]
|
198
|
-
def _selector_and(other)
|
199
|
-
Utils.wrap_expr(_rbexpr._meta_selector_and(other._rbexpr))
|
180
|
+
# @note
|
181
|
+
# This functionality is considered **unstable**. It may be changed
|
182
|
+
# at any point without it being considered a breaking change.
|
183
|
+
def as_selector
|
184
|
+
Selector._from_rbselector(_rbexpr.into_selector)
|
200
185
|
end
|
201
186
|
|
202
187
|
# Format the expression as a tree.
|
@@ -209,8 +194,8 @@ module Polars
|
|
209
194
|
# @example
|
210
195
|
# e = (Polars.col("foo") * Polars.col("bar")).sum.over(Polars.col("ham")) / 2
|
211
196
|
# e.meta.tree_format(return_as_string: true)
|
212
|
-
def tree_format(return_as_string: false)
|
213
|
-
s = _rbexpr.meta_tree_format
|
197
|
+
def tree_format(return_as_string: false, schema: nil)
|
198
|
+
s = _rbexpr.meta_tree_format(schema)
|
214
199
|
if return_as_string
|
215
200
|
s
|
216
201
|
else
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module Polars
|
2
|
+
# Options for scanning files.
|
3
|
+
class ScanCastOptions
|
4
|
+
# Common configuration for scanning files.
|
5
|
+
#
|
6
|
+
# @note
|
7
|
+
# This functionality is considered **unstable**. It may be changed
|
8
|
+
# at any point without it being considered a breaking change.
|
9
|
+
#
|
10
|
+
# @param integer_cast ['upcast', 'forbid']
|
11
|
+
# Configuration for casting from integer types:
|
12
|
+
#
|
13
|
+
# * `upcast`: Allow lossless casting to wider integer types.
|
14
|
+
# * `forbid`: Raises an error if dtypes do not match.
|
15
|
+
#
|
16
|
+
# @param float_cast ['upcast', 'downcast', 'forbid']
|
17
|
+
# Configuration for casting from float types:
|
18
|
+
#
|
19
|
+
# * `upcast`: Allow casting to higher precision float types.
|
20
|
+
# * `downcast`: Allow casting to lower precision float types.
|
21
|
+
# * `forbid`: Raises an error if dtypes do not match.
|
22
|
+
#
|
23
|
+
# @param datetime_cast ['nanosecond-downcast', 'convert-timezone', 'forbid']
|
24
|
+
# Configuration for casting from datetime types:
|
25
|
+
#
|
26
|
+
# * `nanosecond-downcast`: Allow nanosecond precision datetime to be
|
27
|
+
# downcasted to any lower precision. This has a similar effect to
|
28
|
+
# PyArrow's `coerce_int96_timestamp_unit`.
|
29
|
+
# * `convert-timezone`: Allow casting to a different timezone.
|
30
|
+
# * `forbid`: Raises an error if dtypes do not match.
|
31
|
+
#
|
32
|
+
# @param missing_struct_fields ['insert', 'raise']
|
33
|
+
# Configuration for behavior when struct fields defined in the schema
|
34
|
+
# are missing from the data:
|
35
|
+
#
|
36
|
+
# * `insert`: Inserts the missing fields.
|
37
|
+
# * `raise`: Raises an error.
|
38
|
+
#
|
39
|
+
# @param extra_struct_fields ['ignore', 'raise']
|
40
|
+
# Configuration for behavior when extra struct fields outside of the
|
41
|
+
# defined schema are encountered in the data:
|
42
|
+
#
|
43
|
+
# * `ignore`: Silently ignores.
|
44
|
+
# * `raise`: Raises an error.
|
45
|
+
def initialize(
|
46
|
+
integer_cast: "forbid",
|
47
|
+
float_cast: "forbid",
|
48
|
+
datetime_cast: "forbid",
|
49
|
+
missing_struct_fields: "raise",
|
50
|
+
extra_struct_fields: "raise",
|
51
|
+
_internal_call: false
|
52
|
+
)
|
53
|
+
@integer_cast = integer_cast
|
54
|
+
@float_cast = float_cast
|
55
|
+
@datetime_cast = datetime_cast
|
56
|
+
@missing_struct_fields = missing_struct_fields
|
57
|
+
@extra_struct_fields = extra_struct_fields
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.default
|
61
|
+
new(_internal_call: true)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/polars/schema.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Polars
|
2
2
|
class Schema
|
3
|
-
def initialize(schema, check_dtypes: true)
|
3
|
+
def initialize(schema = nil, check_dtypes: true)
|
4
4
|
raise Todo if check_dtypes
|
5
5
|
@schema = schema.to_h
|
6
6
|
end
|
@@ -9,6 +9,11 @@ module Polars
|
|
9
9
|
@schema[key]
|
10
10
|
end
|
11
11
|
|
12
|
+
def []=(name, dtype)
|
13
|
+
# TODO check dtype if needed
|
14
|
+
@schema[name] = dtype
|
15
|
+
end
|
16
|
+
|
12
17
|
def names
|
13
18
|
@schema.keys
|
14
19
|
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
module Polars
|
2
|
+
# Base column selector expression/proxy.
|
3
|
+
class Selector < Expr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbselector
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def self._from_rbselector(rbselector)
|
9
|
+
slf = new
|
10
|
+
slf._rbselector = rbselector
|
11
|
+
slf._rbexpr = RbExpr.new_selector(rbselector)
|
12
|
+
slf
|
13
|
+
end
|
14
|
+
|
15
|
+
def inspect
|
16
|
+
Expr._from_rbexpr(_rbexpr).to_s
|
17
|
+
end
|
18
|
+
|
19
|
+
# @private
|
20
|
+
def self._by_dtype(dtypes)
|
21
|
+
selectors = []
|
22
|
+
concrete_dtypes = []
|
23
|
+
dtypes.each do |dt|
|
24
|
+
if Utils.is_polars_dtype(dt)
|
25
|
+
concrete_dtypes += [dt]
|
26
|
+
else
|
27
|
+
raise Todo
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
dtype_selector = _from_rbselector(RbSelector.by_dtype(concrete_dtypes))
|
32
|
+
|
33
|
+
if selectors.length == 0
|
34
|
+
return dtype_selector
|
35
|
+
end
|
36
|
+
|
37
|
+
selector = selectors[0]
|
38
|
+
selectors[1..].each do |s|
|
39
|
+
selector = selector | s
|
40
|
+
end
|
41
|
+
if concrete_dtypes.length == 0
|
42
|
+
selector
|
43
|
+
else
|
44
|
+
dtype_selector | selector
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# @private
|
49
|
+
def self._by_name(names, strict:)
|
50
|
+
_from_rbselector(RbSelector.by_name(names, strict))
|
51
|
+
end
|
52
|
+
|
53
|
+
def ~
|
54
|
+
Selectors.all - self
|
55
|
+
end
|
56
|
+
|
57
|
+
def &(other)
|
58
|
+
if Utils.is_column(other)
|
59
|
+
colname = other.meta.output_name
|
60
|
+
other = by_name(colname)
|
61
|
+
end
|
62
|
+
if Utils.is_selector(other)
|
63
|
+
Selector._from_rbselector(
|
64
|
+
_rbselector.intersect(other._rbselector)
|
65
|
+
)
|
66
|
+
else
|
67
|
+
as_expr & other
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def |(other)
|
72
|
+
if Utils.is_column(other)
|
73
|
+
other = by_name(other.meta.output_name)
|
74
|
+
end
|
75
|
+
if Utils.is_selector(other)
|
76
|
+
Selector._from_rbselector(
|
77
|
+
_rbselector.union(other._rbselector)
|
78
|
+
)
|
79
|
+
else
|
80
|
+
as_expr | other
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def -(other)
|
85
|
+
if Utils.is_selector(other)
|
86
|
+
Selector._from_rbselector(
|
87
|
+
_rbselector.difference(other._rbselector)
|
88
|
+
)
|
89
|
+
else
|
90
|
+
as_expr - other
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def ^(other)
|
95
|
+
if Utils.is_column(other)
|
96
|
+
other = by_name(other.meta.output_name)
|
97
|
+
end
|
98
|
+
if Utils.is_selector(other)
|
99
|
+
Selector._from_rbselector(
|
100
|
+
_rbselector.exclusive_or(other._rbselector)
|
101
|
+
)
|
102
|
+
else
|
103
|
+
as_expr ^ other
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def exclude(columns, *more_columns)
|
108
|
+
exclude_cols = []
|
109
|
+
exclude_dtypes = []
|
110
|
+
((columns.is_a?(::Array) ? columns : [columns]) + more_columns).each do |item|
|
111
|
+
if item.is_a?(::String)
|
112
|
+
exclude_cols << item
|
113
|
+
elsif Utils.is_polars_dtype(item)
|
114
|
+
exclude_dtypes << item
|
115
|
+
else
|
116
|
+
msg = (
|
117
|
+
"invalid input for `exclude`" +
|
118
|
+
"\n\nExpected one or more `str` or `DataType`; found #{item.inspect} instead."
|
119
|
+
)
|
120
|
+
raise TypeError, msg
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
if exclude_cols.any? && exclude_dtypes.any?
|
125
|
+
msg = "cannot exclude by both column name and dtype; use a selector instead"
|
126
|
+
raise TypeError, msg
|
127
|
+
elsif exclude_dtypes.any?
|
128
|
+
self - Selectors.by_dtype(exclude_dtypes)
|
129
|
+
else
|
130
|
+
self - Selectors.by_name(exclude_cols, require_all: false)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def as_expr
|
135
|
+
Expr._from_rbexpr(_rbexpr)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|