polars-df 0.3.1-aarch64-linux → 0.5.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/LICENSE-THIRD-PARTY.txt +7353 -8473
- data/README.md +31 -2
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +4 -2
data/lib/polars/string_expr.rb
CHANGED
@@ -11,8 +11,8 @@ module Polars
|
|
11
11
|
|
12
12
|
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
13
|
#
|
14
|
-
# @param
|
15
|
-
#
|
14
|
+
# @param dtype [Object]
|
15
|
+
# The data type to convert into. Can be either Date, Datetime, or Time.
|
16
16
|
# @param fmt [String]
|
17
17
|
# Format to use, refer to the
|
18
18
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
@@ -33,57 +33,56 @@ module Polars
|
|
33
33
|
# the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
|
34
34
|
# no fractional second component is found then the default is "us".
|
35
35
|
#
|
36
|
-
# @example
|
36
|
+
# @example Dealing with a consistent format:
|
37
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
38
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
39
|
+
# # =>
|
40
|
+
# # shape: (2,)
|
41
|
+
# # Series: '' [datetime[μs, +00:00]]
|
42
|
+
# # [
|
43
|
+
# # 2020-01-01 01:00:00 +00:00
|
44
|
+
# # 2020-01-01 02:00:00 +00:00
|
45
|
+
# # ]
|
46
|
+
#
|
47
|
+
# @example Dealing with different formats.
|
37
48
|
# s = Polars::Series.new(
|
38
49
|
# "date",
|
39
50
|
# [
|
40
51
|
# "2021-04-22",
|
41
52
|
# "2022-01-04 00:00:00",
|
42
53
|
# "01/31/22",
|
43
|
-
# "Sun Jul 8 00:34:60 2001"
|
54
|
+
# "Sun Jul 8 00:34:60 2001",
|
44
55
|
# ]
|
45
56
|
# )
|
46
|
-
# s.to_frame.
|
47
|
-
# Polars.
|
48
|
-
# .str.strptime(
|
49
|
-
# .
|
50
|
-
#
|
51
|
-
# )
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# )
|
57
|
+
# s.to_frame.select(
|
58
|
+
# Polars.coalesce(
|
59
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
60
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
61
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
62
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
63
|
+
# )
|
64
|
+
# ).to_series
|
55
65
|
# # =>
|
56
|
-
# # shape: (4,
|
57
|
-
# #
|
58
|
-
# #
|
59
|
-
# #
|
60
|
-
# #
|
61
|
-
# #
|
62
|
-
# #
|
63
|
-
# #
|
64
|
-
|
65
|
-
|
66
|
-
# # └────────────┘
|
67
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
68
|
-
if !Utils.is_polars_dtype(datatype)
|
69
|
-
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
|
-
end
|
71
|
-
|
72
|
-
if datatype == :date
|
66
|
+
# # shape: (4,)
|
67
|
+
# # Series: 'date' [date]
|
68
|
+
# # [
|
69
|
+
# # 2021-04-22
|
70
|
+
# # 2022-01-04
|
71
|
+
# # 2022-01-31
|
72
|
+
# # 2001-07-08
|
73
|
+
# # ]
|
74
|
+
def strptime(dtype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
75
|
+
if dtype == Date
|
73
76
|
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
|
74
|
-
elsif
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
else
|
81
|
-
dtcol.dt.cast_time_unit(tu)
|
82
|
-
end
|
83
|
-
elsif datatype == :time
|
77
|
+
elsif dtype == Datetime || dtype.is_a?(Datetime)
|
78
|
+
dtype = Datetime.new if dtype == Datetime
|
79
|
+
time_unit = dtype.time_unit
|
80
|
+
time_zone = dtype.time_zone
|
81
|
+
Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, time_unit, time_zone, strict, exact, cache, tz_aware, utc))
|
82
|
+
elsif dtype == Time
|
84
83
|
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
|
85
84
|
else
|
86
|
-
raise ArgumentError, "dtype should be of type
|
85
|
+
raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
|
87
86
|
end
|
88
87
|
end
|
89
88
|
|
@@ -332,7 +331,7 @@ module Polars
|
|
332
331
|
# # │ -0001 │
|
333
332
|
# # │ 00000 │
|
334
333
|
# # │ 00001 │
|
335
|
-
# # │
|
334
|
+
# # │ … │
|
336
335
|
# # │ 10000 │
|
337
336
|
# # │ 100000 │
|
338
337
|
# # │ 1000000 │
|
@@ -521,6 +520,40 @@ module Polars
|
|
521
520
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
522
521
|
end
|
523
522
|
|
523
|
+
# Parse string values as JSON.
|
524
|
+
#
|
525
|
+
# Throw errors if encounter invalid JSON strings.
|
526
|
+
#
|
527
|
+
# @param dtype [Object]
|
528
|
+
# The dtype to cast the extracted value to. If nil, the dtype will be
|
529
|
+
# inferred from the JSON value.
|
530
|
+
#
|
531
|
+
# @return [Expr]
|
532
|
+
#
|
533
|
+
# @example
|
534
|
+
# df = Polars::DataFrame.new(
|
535
|
+
# {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
|
536
|
+
# )
|
537
|
+
# dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
|
538
|
+
# df.select(Polars.col("json").str.json_extract(dtype))
|
539
|
+
# # =>
|
540
|
+
# # shape: (3, 1)
|
541
|
+
# # ┌─────────────┐
|
542
|
+
# # │ json │
|
543
|
+
# # │ --- │
|
544
|
+
# # │ struct[2] │
|
545
|
+
# # ╞═════════════╡
|
546
|
+
# # │ {1,true} │
|
547
|
+
# # │ {null,null} │
|
548
|
+
# # │ {2,false} │
|
549
|
+
# # └─────────────┘
|
550
|
+
def json_extract(dtype = nil)
|
551
|
+
if !dtype.nil?
|
552
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
553
|
+
end
|
554
|
+
Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
|
555
|
+
end
|
556
|
+
|
524
557
|
# Extract the first match of json string with provided JSONPath expression.
|
525
558
|
#
|
526
559
|
# Throw errors if encounter invalid json strings.
|
@@ -846,10 +879,10 @@ module Polars
|
|
846
879
|
# # │ 1 ┆ 123ABC │
|
847
880
|
# # │ 2 ┆ abc456 │
|
848
881
|
# # └─────┴────────┘
|
849
|
-
def replace(pattern, value, literal: false)
|
882
|
+
def replace(pattern, value, literal: false, n: 1)
|
850
883
|
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
851
884
|
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
852
|
-
Utils.wrap_expr(_rbexpr.
|
885
|
+
Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
|
853
886
|
end
|
854
887
|
|
855
888
|
# Replace all matching regex/literal substrings with a new string value.
|
@@ -912,5 +945,78 @@ module Polars
|
|
912
945
|
def slice(offset, length = nil)
|
913
946
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
914
947
|
end
|
948
|
+
|
949
|
+
# Returns a column with a separate row for every string character.
|
950
|
+
#
|
951
|
+
# @return [Expr]
|
952
|
+
#
|
953
|
+
# @example
|
954
|
+
# df = Polars::DataFrame.new({"a": ["foo", "bar"]})
|
955
|
+
# df.select(Polars.col("a").str.explode)
|
956
|
+
# # =>
|
957
|
+
# # shape: (6, 1)
|
958
|
+
# # ┌─────┐
|
959
|
+
# # │ a │
|
960
|
+
# # │ --- │
|
961
|
+
# # │ str │
|
962
|
+
# # ╞═════╡
|
963
|
+
# # │ f │
|
964
|
+
# # │ o │
|
965
|
+
# # │ o │
|
966
|
+
# # │ b │
|
967
|
+
# # │ a │
|
968
|
+
# # │ r │
|
969
|
+
# # └─────┘
|
970
|
+
def explode
|
971
|
+
Utils.wrap_expr(_rbexpr.explode)
|
972
|
+
end
|
973
|
+
|
974
|
+
# Parse integers with base radix from strings.
|
975
|
+
#
|
976
|
+
# By default base 2. ParseError/Overflows become Nulls.
|
977
|
+
#
|
978
|
+
# @param radix [Integer]
|
979
|
+
# Positive integer which is the base of the string we are parsing.
|
980
|
+
# Default: 2.
|
981
|
+
# @param strict [Boolean]
|
982
|
+
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
983
|
+
# False silently convert to Null.
|
984
|
+
#
|
985
|
+
# @return [Expr]
|
986
|
+
#
|
987
|
+
# @example
|
988
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
989
|
+
# df.select(Polars.col("bin").str.parse_int(2, strict: false))
|
990
|
+
# # =>
|
991
|
+
# # shape: (4, 1)
|
992
|
+
# # ┌──────┐
|
993
|
+
# # │ bin │
|
994
|
+
# # │ --- │
|
995
|
+
# # │ i32 │
|
996
|
+
# # ╞══════╡
|
997
|
+
# # │ 6 │
|
998
|
+
# # │ 5 │
|
999
|
+
# # │ 2 │
|
1000
|
+
# # │ null │
|
1001
|
+
# # └──────┘
|
1002
|
+
#
|
1003
|
+
# @example
|
1004
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1005
|
+
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1006
|
+
# # =>
|
1007
|
+
# # shape: (4, 1)
|
1008
|
+
# # ┌───────┐
|
1009
|
+
# # │ hex │
|
1010
|
+
# # │ --- │
|
1011
|
+
# # │ i32 │
|
1012
|
+
# # ╞═══════╡
|
1013
|
+
# # │ 64030 │
|
1014
|
+
# # │ 65280 │
|
1015
|
+
# # │ 51966 │
|
1016
|
+
# # │ null │
|
1017
|
+
# # └───────┘
|
1018
|
+
def parse_int(radix = 2, strict: true)
|
1019
|
+
Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
|
1020
|
+
end
|
915
1021
|
end
|
916
1022
|
end
|
@@ -38,12 +38,12 @@ module Polars
|
|
38
38
|
# )
|
39
39
|
# s.to_frame.with_column(
|
40
40
|
# Polars.col("date")
|
41
|
-
# .str.strptime(
|
41
|
+
# .str.strptime(Polars::Date, "%F", strict: false)
|
42
42
|
# .fill_null(
|
43
|
-
# Polars.col("date").str.strptime(
|
43
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false)
|
44
44
|
# )
|
45
|
-
# .fill_null(Polars.col("date").str.strptime(
|
46
|
-
# .fill_null(Polars.col("date").str.strptime(
|
45
|
+
# .fill_null(Polars.col("date").str.strptime(Polars::Date, "%D", strict: false))
|
46
|
+
# .fill_null(Polars.col("date").str.strptime(Polars::Date, "%c", strict: false))
|
47
47
|
# )
|
48
48
|
# # =>
|
49
49
|
# # shape: (4, 1)
|
@@ -60,5 +60,37 @@ module Polars
|
|
60
60
|
def rename_fields(names)
|
61
61
|
super
|
62
62
|
end
|
63
|
+
|
64
|
+
# Get the struct definition as a name/dtype schema dict.
|
65
|
+
#
|
66
|
+
# @return [Object]
|
67
|
+
def schema
|
68
|
+
if _s.nil?
|
69
|
+
{}
|
70
|
+
else
|
71
|
+
_s.dtype.to_schema
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Convert this struct Series to a DataFrame with a separate column for each field.
|
76
|
+
#
|
77
|
+
# @return [DataFrame]
|
78
|
+
#
|
79
|
+
# @example
|
80
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
81
|
+
# s.struct.unnest
|
82
|
+
# # =>
|
83
|
+
# # shape: (2, 2)
|
84
|
+
# # ┌─────┬─────┐
|
85
|
+
# # │ a ┆ b │
|
86
|
+
# # │ --- ┆ --- │
|
87
|
+
# # │ i64 ┆ i64 │
|
88
|
+
# # ╞═════╪═════╡
|
89
|
+
# # │ 1 ┆ 2 │
|
90
|
+
# # │ 3 ┆ 4 │
|
91
|
+
# # └─────┴─────┘
|
92
|
+
def unnest
|
93
|
+
Utils.wrap_df(_s.struct_unnest)
|
94
|
+
end
|
63
95
|
end
|
64
96
|
end
|
data/lib/polars/utils.rb
CHANGED
@@ -23,24 +23,42 @@ module Polars
|
|
23
23
|
Polars.col(name)
|
24
24
|
end
|
25
25
|
|
26
|
+
def self.arrlen(obj)
|
27
|
+
if obj.is_a?(Range)
|
28
|
+
# size only works for numeric ranges
|
29
|
+
obj.to_a.length
|
30
|
+
elsif obj.is_a?(String)
|
31
|
+
nil
|
32
|
+
else
|
33
|
+
obj.length
|
34
|
+
end
|
35
|
+
rescue
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
|
26
39
|
def self._timedelta_to_pl_duration(td)
|
27
40
|
td
|
28
41
|
end
|
29
42
|
|
30
43
|
def self._datetime_to_pl_timestamp(dt, tu)
|
31
44
|
if tu == "ns"
|
32
|
-
(dt.to_datetime.
|
45
|
+
(dt.to_datetime.to_time.to_f * 1e9).to_i
|
33
46
|
elsif tu == "us"
|
34
|
-
(dt.to_datetime.
|
47
|
+
(dt.to_datetime.to_time.to_f * 1e6).to_i
|
35
48
|
elsif tu == "ms"
|
36
|
-
(dt.to_datetime.
|
49
|
+
(dt.to_datetime.to_time.to_f * 1e3).to_i
|
37
50
|
elsif tu.nil?
|
38
|
-
(dt.to_datetime.
|
51
|
+
(dt.to_datetime.to_time.to_f * 1e6).to_i
|
39
52
|
else
|
40
53
|
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
41
54
|
end
|
42
55
|
end
|
43
56
|
|
57
|
+
def self._date_to_pl_date(d)
|
58
|
+
dt = d.to_datetime.to_time
|
59
|
+
dt.to_i / (3600 * 24)
|
60
|
+
end
|
61
|
+
|
44
62
|
def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
|
45
63
|
if dtype == :date || dtype == Date
|
46
64
|
# days to seconds
|
@@ -69,6 +87,18 @@ module Polars
|
|
69
87
|
end
|
70
88
|
end
|
71
89
|
|
90
|
+
def self._to_ruby_duration(value, tu = "ns")
|
91
|
+
if tu == "ns"
|
92
|
+
value / 1e9
|
93
|
+
elsif tu == "us"
|
94
|
+
value / 1e6
|
95
|
+
elsif tu == "ms"
|
96
|
+
value / 1e3
|
97
|
+
else
|
98
|
+
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
72
102
|
def self.selection_to_rbexpr_list(exprs)
|
73
103
|
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
74
104
|
exprs = [exprs]
|
@@ -93,12 +123,19 @@ module Polars
|
|
93
123
|
Polars.lit(value)
|
94
124
|
end
|
95
125
|
|
96
|
-
def self.
|
97
|
-
File.expand_path(path)
|
126
|
+
def self.normalise_filepath(path, check_not_directory: true)
|
127
|
+
path = File.expand_path(path)
|
128
|
+
if check_not_directory && File.exist?(path) && Dir.exist?(path)
|
129
|
+
raise ArgumentError, "Expected a file path; #{path} is a directory"
|
130
|
+
end
|
131
|
+
path
|
98
132
|
end
|
99
133
|
|
100
134
|
# TODO fix
|
101
|
-
def self.is_polars_dtype(data_type)
|
135
|
+
def self.is_polars_dtype(data_type, include_unknown: false)
|
136
|
+
if data_type == Unknown
|
137
|
+
return include_unknown
|
138
|
+
end
|
102
139
|
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
103
140
|
end
|
104
141
|
|
@@ -109,7 +146,8 @@ module Polars
|
|
109
146
|
TrueClass => :bool,
|
110
147
|
FalseClass => :bool,
|
111
148
|
::Date => :date,
|
112
|
-
::DateTime => :datetime
|
149
|
+
::DateTime => :datetime,
|
150
|
+
::Time => :datetime
|
113
151
|
}
|
114
152
|
|
115
153
|
# TODO fix
|
@@ -174,7 +212,7 @@ module Polars
|
|
174
212
|
end
|
175
213
|
|
176
214
|
def self.bool?(value)
|
177
|
-
value
|
215
|
+
value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
178
216
|
end
|
179
217
|
|
180
218
|
def self.strlike?(value)
|
@@ -216,5 +254,9 @@ module Polars
|
|
216
254
|
val.is_a?(Array) && _is_iterable_of(val, String)
|
217
255
|
end
|
218
256
|
end
|
257
|
+
|
258
|
+
def self.local_file?(file)
|
259
|
+
Dir.glob(file).any?
|
260
|
+
end
|
219
261
|
end
|
220
262
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# ext
|
2
2
|
begin
|
3
|
-
|
3
|
+
require "polars/#{RUBY_VERSION.to_f}/polars"
|
4
4
|
rescue LoadError
|
5
|
-
|
5
|
+
require "polars/polars"
|
6
6
|
end
|
7
7
|
|
8
8
|
# stdlib
|
@@ -12,6 +12,8 @@ require "stringio"
|
|
12
12
|
# modules
|
13
13
|
require_relative "polars/expr_dispatch"
|
14
14
|
require_relative "polars/batched_csv_reader"
|
15
|
+
require_relative "polars/binary_expr"
|
16
|
+
require_relative "polars/binary_name_space"
|
15
17
|
require_relative "polars/cat_expr"
|
16
18
|
require_relative "polars/cat_name_space"
|
17
19
|
require_relative "polars/convert"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: aarch64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -29,6 +29,8 @@ files:
|
|
29
29
|
- lib/polars/3.1/polars.so
|
30
30
|
- lib/polars/3.2/polars.so
|
31
31
|
- lib/polars/batched_csv_reader.rb
|
32
|
+
- lib/polars/binary_expr.rb
|
33
|
+
- lib/polars/binary_name_space.rb
|
32
34
|
- lib/polars/cat_expr.rb
|
33
35
|
- lib/polars/cat_name_space.rb
|
34
36
|
- lib/polars/convert.rb
|