polars-df 0.3.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/README.md +31 -2
- data/ext/polars/Cargo.toml +10 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +36 -19
- data/ext/polars/src/conversion.rs +159 -16
- data/ext/polars/src/dataframe.rs +51 -52
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +216 -300
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +103 -531
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +29 -12
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/lib/polars/string_expr.rb
CHANGED
@@ -11,8 +11,8 @@ module Polars
|
|
11
11
|
|
12
12
|
# Parse a Utf8 expression to a Date/Datetime/Time type.
|
13
13
|
#
|
14
|
-
# @param
|
15
|
-
#
|
14
|
+
# @param dtype [Object]
|
15
|
+
# The data type to convert into. Can be either Date, Datetime, or Time.
|
16
16
|
# @param fmt [String]
|
17
17
|
# Format to use, refer to the
|
18
18
|
# [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
|
@@ -33,57 +33,56 @@ module Polars
|
|
33
33
|
# the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
|
34
34
|
# no fractional second component is found then the default is "us".
|
35
35
|
#
|
36
|
-
# @example
|
36
|
+
# @example Dealing with a consistent format:
|
37
|
+
# s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
|
38
|
+
# s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
|
39
|
+
# # =>
|
40
|
+
# # shape: (2,)
|
41
|
+
# # Series: '' [datetime[μs, +00:00]]
|
42
|
+
# # [
|
43
|
+
# # 2020-01-01 01:00:00 +00:00
|
44
|
+
# # 2020-01-01 02:00:00 +00:00
|
45
|
+
# # ]
|
46
|
+
#
|
47
|
+
# @example Dealing with different formats.
|
37
48
|
# s = Polars::Series.new(
|
38
49
|
# "date",
|
39
50
|
# [
|
40
51
|
# "2021-04-22",
|
41
52
|
# "2022-01-04 00:00:00",
|
42
53
|
# "01/31/22",
|
43
|
-
# "Sun Jul 8 00:34:60 2001"
|
54
|
+
# "Sun Jul 8 00:34:60 2001",
|
44
55
|
# ]
|
45
56
|
# )
|
46
|
-
# s.to_frame.
|
47
|
-
# Polars.
|
48
|
-
# .str.strptime(
|
49
|
-
# .
|
50
|
-
#
|
51
|
-
# )
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# )
|
57
|
+
# s.to_frame.select(
|
58
|
+
# Polars.coalesce(
|
59
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
|
60
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
|
61
|
+
# Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
|
62
|
+
# Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
|
63
|
+
# )
|
64
|
+
# ).to_series
|
55
65
|
# # =>
|
56
|
-
# # shape: (4,
|
57
|
-
# #
|
58
|
-
# #
|
59
|
-
# #
|
60
|
-
# #
|
61
|
-
# #
|
62
|
-
# #
|
63
|
-
# #
|
64
|
-
|
65
|
-
|
66
|
-
# # └────────────┘
|
67
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
68
|
-
if !Utils.is_polars_dtype(datatype)
|
69
|
-
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
|
-
end
|
71
|
-
|
72
|
-
if datatype == :date
|
66
|
+
# # shape: (4,)
|
67
|
+
# # Series: 'date' [date]
|
68
|
+
# # [
|
69
|
+
# # 2021-04-22
|
70
|
+
# # 2022-01-04
|
71
|
+
# # 2022-01-31
|
72
|
+
# # 2001-07-08
|
73
|
+
# # ]
|
74
|
+
def strptime(dtype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
|
75
|
+
if dtype == Date
|
73
76
|
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
|
74
|
-
elsif
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
else
|
81
|
-
dtcol.dt.cast_time_unit(tu)
|
82
|
-
end
|
83
|
-
elsif datatype == :time
|
77
|
+
elsif dtype == Datetime || dtype.is_a?(Datetime)
|
78
|
+
dtype = Datetime.new if dtype == Datetime
|
79
|
+
time_unit = dtype.time_unit
|
80
|
+
time_zone = dtype.time_zone
|
81
|
+
Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, time_unit, time_zone, strict, exact, cache, tz_aware, utc))
|
82
|
+
elsif dtype == Time
|
84
83
|
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
|
85
84
|
else
|
86
|
-
raise ArgumentError, "dtype should be of type
|
85
|
+
raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
|
87
86
|
end
|
88
87
|
end
|
89
88
|
|
@@ -332,7 +331,7 @@ module Polars
|
|
332
331
|
# # │ -0001 │
|
333
332
|
# # │ 00000 │
|
334
333
|
# # │ 00001 │
|
335
|
-
# # │
|
334
|
+
# # │ … │
|
336
335
|
# # │ 10000 │
|
337
336
|
# # │ 100000 │
|
338
337
|
# # │ 1000000 │
|
@@ -521,6 +520,40 @@ module Polars
|
|
521
520
|
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
|
522
521
|
end
|
523
522
|
|
523
|
+
# Parse string values as JSON.
|
524
|
+
#
|
525
|
+
# Throw errors if encounter invalid JSON strings.
|
526
|
+
#
|
527
|
+
# @param dtype [Object]
|
528
|
+
# The dtype to cast the extracted value to. If nil, the dtype will be
|
529
|
+
# inferred from the JSON value.
|
530
|
+
#
|
531
|
+
# @return [Expr]
|
532
|
+
#
|
533
|
+
# @example
|
534
|
+
# df = Polars::DataFrame.new(
|
535
|
+
# {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
|
536
|
+
# )
|
537
|
+
# dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
|
538
|
+
# df.select(Polars.col("json").str.json_extract(dtype))
|
539
|
+
# # =>
|
540
|
+
# # shape: (3, 1)
|
541
|
+
# # ┌─────────────┐
|
542
|
+
# # │ json │
|
543
|
+
# # │ --- │
|
544
|
+
# # │ struct[2] │
|
545
|
+
# # ╞═════════════╡
|
546
|
+
# # │ {1,true} │
|
547
|
+
# # │ {null,null} │
|
548
|
+
# # │ {2,false} │
|
549
|
+
# # └─────────────┘
|
550
|
+
def json_extract(dtype = nil)
|
551
|
+
if !dtype.nil?
|
552
|
+
dtype = Utils.rb_type_to_dtype(dtype)
|
553
|
+
end
|
554
|
+
Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
|
555
|
+
end
|
556
|
+
|
524
557
|
# Extract the first match of json string with provided JSONPath expression.
|
525
558
|
#
|
526
559
|
# Throw errors if encounter invalid json strings.
|
@@ -846,10 +879,10 @@ module Polars
|
|
846
879
|
# # │ 1 ┆ 123ABC │
|
847
880
|
# # │ 2 ┆ abc456 │
|
848
881
|
# # └─────┴────────┘
|
849
|
-
def replace(pattern, value, literal: false)
|
882
|
+
def replace(pattern, value, literal: false, n: 1)
|
850
883
|
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
851
884
|
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
852
|
-
Utils.wrap_expr(_rbexpr.
|
885
|
+
Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
|
853
886
|
end
|
854
887
|
|
855
888
|
# Replace all matching regex/literal substrings with a new string value.
|
@@ -912,5 +945,78 @@ module Polars
|
|
912
945
|
def slice(offset, length = nil)
|
913
946
|
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
|
914
947
|
end
|
948
|
+
|
949
|
+
# Returns a column with a separate row for every string character.
|
950
|
+
#
|
951
|
+
# @return [Expr]
|
952
|
+
#
|
953
|
+
# @example
|
954
|
+
# df = Polars::DataFrame.new({"a": ["foo", "bar"]})
|
955
|
+
# df.select(Polars.col("a").str.explode)
|
956
|
+
# # =>
|
957
|
+
# # shape: (6, 1)
|
958
|
+
# # ┌─────┐
|
959
|
+
# # │ a │
|
960
|
+
# # │ --- │
|
961
|
+
# # │ str │
|
962
|
+
# # ╞═════╡
|
963
|
+
# # │ f │
|
964
|
+
# # │ o │
|
965
|
+
# # │ o │
|
966
|
+
# # │ b │
|
967
|
+
# # │ a │
|
968
|
+
# # │ r │
|
969
|
+
# # └─────┘
|
970
|
+
def explode
|
971
|
+
Utils.wrap_expr(_rbexpr.explode)
|
972
|
+
end
|
973
|
+
|
974
|
+
# Parse integers with base radix from strings.
|
975
|
+
#
|
976
|
+
# By default base 2. ParseError/Overflows become Nulls.
|
977
|
+
#
|
978
|
+
# @param radix [Integer]
|
979
|
+
# Positive integer which is the base of the string we are parsing.
|
980
|
+
# Default: 2.
|
981
|
+
# @param strict [Boolean]
|
982
|
+
# Bool, Default=true will raise any ParseError or overflow as ComputeError.
|
983
|
+
# False silently convert to Null.
|
984
|
+
#
|
985
|
+
# @return [Expr]
|
986
|
+
#
|
987
|
+
# @example
|
988
|
+
# df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
|
989
|
+
# df.select(Polars.col("bin").str.parse_int(2, strict: false))
|
990
|
+
# # =>
|
991
|
+
# # shape: (4, 1)
|
992
|
+
# # ┌──────┐
|
993
|
+
# # │ bin │
|
994
|
+
# # │ --- │
|
995
|
+
# # │ i32 │
|
996
|
+
# # ╞══════╡
|
997
|
+
# # │ 6 │
|
998
|
+
# # │ 5 │
|
999
|
+
# # │ 2 │
|
1000
|
+
# # │ null │
|
1001
|
+
# # └──────┘
|
1002
|
+
#
|
1003
|
+
# @example
|
1004
|
+
# df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
|
1005
|
+
# df.select(Polars.col("hex").str.parse_int(16, strict: true))
|
1006
|
+
# # =>
|
1007
|
+
# # shape: (4, 1)
|
1008
|
+
# # ┌───────┐
|
1009
|
+
# # │ hex │
|
1010
|
+
# # │ --- │
|
1011
|
+
# # │ i32 │
|
1012
|
+
# # ╞═══════╡
|
1013
|
+
# # │ 64030 │
|
1014
|
+
# # │ 65280 │
|
1015
|
+
# # │ 51966 │
|
1016
|
+
# # │ null │
|
1017
|
+
# # └───────┘
|
1018
|
+
def parse_int(radix = 2, strict: true)
|
1019
|
+
Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
|
1020
|
+
end
|
915
1021
|
end
|
916
1022
|
end
|
@@ -38,12 +38,12 @@ module Polars
|
|
38
38
|
# )
|
39
39
|
# s.to_frame.with_column(
|
40
40
|
# Polars.col("date")
|
41
|
-
# .str.strptime(
|
41
|
+
# .str.strptime(Polars::Date, "%F", strict: false)
|
42
42
|
# .fill_null(
|
43
|
-
# Polars.col("date").str.strptime(
|
43
|
+
# Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false)
|
44
44
|
# )
|
45
|
-
# .fill_null(Polars.col("date").str.strptime(
|
46
|
-
# .fill_null(Polars.col("date").str.strptime(
|
45
|
+
# .fill_null(Polars.col("date").str.strptime(Polars::Date, "%D", strict: false))
|
46
|
+
# .fill_null(Polars.col("date").str.strptime(Polars::Date, "%c", strict: false))
|
47
47
|
# )
|
48
48
|
# # =>
|
49
49
|
# # shape: (4, 1)
|
@@ -60,5 +60,37 @@ module Polars
|
|
60
60
|
def rename_fields(names)
|
61
61
|
super
|
62
62
|
end
|
63
|
+
|
64
|
+
# Get the struct definition as a name/dtype schema dict.
|
65
|
+
#
|
66
|
+
# @return [Object]
|
67
|
+
def schema
|
68
|
+
if _s.nil?
|
69
|
+
{}
|
70
|
+
else
|
71
|
+
_s.dtype.to_schema
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Convert this struct Series to a DataFrame with a separate column for each field.
|
76
|
+
#
|
77
|
+
# @return [DataFrame]
|
78
|
+
#
|
79
|
+
# @example
|
80
|
+
# s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
|
81
|
+
# s.struct.unnest
|
82
|
+
# # =>
|
83
|
+
# # shape: (2, 2)
|
84
|
+
# # ┌─────┬─────┐
|
85
|
+
# # │ a ┆ b │
|
86
|
+
# # │ --- ┆ --- │
|
87
|
+
# # │ i64 ┆ i64 │
|
88
|
+
# # ╞═════╪═════╡
|
89
|
+
# # │ 1 ┆ 2 │
|
90
|
+
# # │ 3 ┆ 4 │
|
91
|
+
# # └─────┴─────┘
|
92
|
+
def unnest
|
93
|
+
Utils.wrap_df(_s.struct_unnest)
|
94
|
+
end
|
63
95
|
end
|
64
96
|
end
|
data/lib/polars/utils.rb
CHANGED
@@ -23,24 +23,42 @@ module Polars
|
|
23
23
|
Polars.col(name)
|
24
24
|
end
|
25
25
|
|
26
|
+
def self.arrlen(obj)
|
27
|
+
if obj.is_a?(Range)
|
28
|
+
# size only works for numeric ranges
|
29
|
+
obj.to_a.length
|
30
|
+
elsif obj.is_a?(String)
|
31
|
+
nil
|
32
|
+
else
|
33
|
+
obj.length
|
34
|
+
end
|
35
|
+
rescue
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
|
26
39
|
def self._timedelta_to_pl_duration(td)
|
27
40
|
td
|
28
41
|
end
|
29
42
|
|
30
43
|
def self._datetime_to_pl_timestamp(dt, tu)
|
31
44
|
if tu == "ns"
|
32
|
-
(dt.to_datetime.
|
45
|
+
(dt.to_datetime.to_time.to_f * 1e9).to_i
|
33
46
|
elsif tu == "us"
|
34
|
-
(dt.to_datetime.
|
47
|
+
(dt.to_datetime.to_time.to_f * 1e6).to_i
|
35
48
|
elsif tu == "ms"
|
36
|
-
(dt.to_datetime.
|
49
|
+
(dt.to_datetime.to_time.to_f * 1e3).to_i
|
37
50
|
elsif tu.nil?
|
38
|
-
(dt.to_datetime.
|
51
|
+
(dt.to_datetime.to_time.to_f * 1e6).to_i
|
39
52
|
else
|
40
53
|
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
41
54
|
end
|
42
55
|
end
|
43
56
|
|
57
|
+
def self._date_to_pl_date(d)
|
58
|
+
dt = d.to_datetime.to_time
|
59
|
+
dt.to_i / (3600 * 24)
|
60
|
+
end
|
61
|
+
|
44
62
|
def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
|
45
63
|
if dtype == :date || dtype == Date
|
46
64
|
# days to seconds
|
@@ -69,6 +87,18 @@ module Polars
|
|
69
87
|
end
|
70
88
|
end
|
71
89
|
|
90
|
+
def self._to_ruby_duration(value, tu = "ns")
|
91
|
+
if tu == "ns"
|
92
|
+
value / 1e9
|
93
|
+
elsif tu == "us"
|
94
|
+
value / 1e6
|
95
|
+
elsif tu == "ms"
|
96
|
+
value / 1e3
|
97
|
+
else
|
98
|
+
raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
72
102
|
def self.selection_to_rbexpr_list(exprs)
|
73
103
|
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
74
104
|
exprs = [exprs]
|
@@ -93,12 +123,19 @@ module Polars
|
|
93
123
|
Polars.lit(value)
|
94
124
|
end
|
95
125
|
|
96
|
-
def self.
|
97
|
-
File.expand_path(path)
|
126
|
+
def self.normalise_filepath(path, check_not_directory: true)
|
127
|
+
path = File.expand_path(path)
|
128
|
+
if check_not_directory && File.exist?(path) && Dir.exist?(path)
|
129
|
+
raise ArgumentError, "Expected a file path; #{path} is a directory"
|
130
|
+
end
|
131
|
+
path
|
98
132
|
end
|
99
133
|
|
100
134
|
# TODO fix
|
101
|
-
def self.is_polars_dtype(data_type)
|
135
|
+
def self.is_polars_dtype(data_type, include_unknown: false)
|
136
|
+
if data_type == Unknown
|
137
|
+
return include_unknown
|
138
|
+
end
|
102
139
|
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
103
140
|
end
|
104
141
|
|
@@ -109,7 +146,8 @@ module Polars
|
|
109
146
|
TrueClass => :bool,
|
110
147
|
FalseClass => :bool,
|
111
148
|
::Date => :date,
|
112
|
-
::DateTime => :datetime
|
149
|
+
::DateTime => :datetime,
|
150
|
+
::Time => :datetime
|
113
151
|
}
|
114
152
|
|
115
153
|
# TODO fix
|
@@ -174,7 +212,7 @@ module Polars
|
|
174
212
|
end
|
175
213
|
|
176
214
|
def self.bool?(value)
|
177
|
-
value
|
215
|
+
value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
178
216
|
end
|
179
217
|
|
180
218
|
def self.strlike?(value)
|
@@ -216,5 +254,9 @@ module Polars
|
|
216
254
|
val.is_a?(Array) && _is_iterable_of(val, String)
|
217
255
|
end
|
218
256
|
end
|
257
|
+
|
258
|
+
def self.local_file?(file)
|
259
|
+
Dir.glob(file).any?
|
260
|
+
end
|
219
261
|
end
|
220
262
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# ext
|
2
2
|
begin
|
3
|
-
|
3
|
+
require "polars/#{RUBY_VERSION.to_f}/polars"
|
4
4
|
rescue LoadError
|
5
|
-
|
5
|
+
require "polars/polars"
|
6
6
|
end
|
7
7
|
|
8
8
|
# stdlib
|
@@ -12,6 +12,8 @@ require "stringio"
|
|
12
12
|
# modules
|
13
13
|
require_relative "polars/expr_dispatch"
|
14
14
|
require_relative "polars/batched_csv_reader"
|
15
|
+
require_relative "polars/binary_expr"
|
16
|
+
require_relative "polars/binary_name_space"
|
15
17
|
require_relative "polars/cat_expr"
|
16
18
|
require_relative "polars/cat_name_space"
|
17
19
|
require_relative "polars/convert"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -40,31 +40,48 @@ files:
|
|
40
40
|
- ext/polars/Cargo.toml
|
41
41
|
- ext/polars/extconf.rb
|
42
42
|
- ext/polars/src/apply/dataframe.rs
|
43
|
+
- ext/polars/src/apply/lazy.rs
|
43
44
|
- ext/polars/src/apply/mod.rs
|
44
45
|
- ext/polars/src/apply/series.rs
|
45
46
|
- ext/polars/src/batched_csv.rs
|
46
47
|
- ext/polars/src/conversion.rs
|
47
48
|
- ext/polars/src/dataframe.rs
|
48
49
|
- ext/polars/src/error.rs
|
50
|
+
- ext/polars/src/expr.rs
|
51
|
+
- ext/polars/src/expr/binary.rs
|
52
|
+
- ext/polars/src/expr/categorical.rs
|
53
|
+
- ext/polars/src/expr/datetime.rs
|
54
|
+
- ext/polars/src/expr/general.rs
|
55
|
+
- ext/polars/src/expr/list.rs
|
56
|
+
- ext/polars/src/expr/meta.rs
|
57
|
+
- ext/polars/src/expr/string.rs
|
58
|
+
- ext/polars/src/expr/struct.rs
|
49
59
|
- ext/polars/src/file.rs
|
50
|
-
- ext/polars/src/
|
51
|
-
- ext/polars/src/
|
52
|
-
- ext/polars/src/lazy
|
53
|
-
- ext/polars/src/
|
54
|
-
- ext/polars/src/
|
55
|
-
- ext/polars/src/
|
60
|
+
- ext/polars/src/functions/eager.rs
|
61
|
+
- ext/polars/src/functions/io.rs
|
62
|
+
- ext/polars/src/functions/lazy.rs
|
63
|
+
- ext/polars/src/functions/meta.rs
|
64
|
+
- ext/polars/src/functions/mod.rs
|
65
|
+
- ext/polars/src/functions/whenthen.rs
|
66
|
+
- ext/polars/src/lazyframe.rs
|
67
|
+
- ext/polars/src/lazygroupby.rs
|
56
68
|
- ext/polars/src/lib.rs
|
57
|
-
- ext/polars/src/list_construction.rs
|
58
|
-
- ext/polars/src/numo.rs
|
59
69
|
- ext/polars/src/object.rs
|
60
70
|
- ext/polars/src/prelude.rs
|
61
71
|
- ext/polars/src/rb_modules.rs
|
62
72
|
- ext/polars/src/series.rs
|
63
|
-
- ext/polars/src/
|
73
|
+
- ext/polars/src/series/aggregation.rs
|
74
|
+
- ext/polars/src/series/arithmetic.rs
|
75
|
+
- ext/polars/src/series/comparison.rs
|
76
|
+
- ext/polars/src/series/construction.rs
|
77
|
+
- ext/polars/src/series/export.rs
|
78
|
+
- ext/polars/src/series/set_at_idx.rs
|
64
79
|
- ext/polars/src/utils.rs
|
65
80
|
- lib/polars-df.rb
|
66
81
|
- lib/polars.rb
|
67
82
|
- lib/polars/batched_csv_reader.rb
|
83
|
+
- lib/polars/binary_expr.rb
|
84
|
+
- lib/polars/binary_name_space.rb
|
68
85
|
- lib/polars/cat_expr.rb
|
69
86
|
- lib/polars/cat_name_space.rb
|
70
87
|
- lib/polars/convert.rb
|
@@ -116,7 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
133
|
- !ruby/object:Gem::Version
|
117
134
|
version: '0'
|
118
135
|
requirements: []
|
119
|
-
rubygems_version: 3.4.
|
136
|
+
rubygems_version: 3.4.10
|
120
137
|
signing_key:
|
121
138
|
specification_version: 4
|
122
139
|
summary: Blazingly fast DataFrames for Ruby
|
data/ext/polars/src/lazy/mod.rs
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
use magnus::RArray;
|
2
|
-
use polars::lazy::dsl::Expr;
|
3
|
-
|
4
|
-
use crate::lazy::dsl::RbExpr;
|
5
|
-
use crate::RbResult;
|
6
|
-
|
7
|
-
pub fn rb_exprs_to_exprs(rb_exprs: RArray) -> RbResult<Vec<Expr>> {
|
8
|
-
let mut exprs = Vec::new();
|
9
|
-
for item in rb_exprs.each() {
|
10
|
-
exprs.push(item?.try_convert::<&RbExpr>()?.inner.clone());
|
11
|
-
}
|
12
|
-
Ok(exprs)
|
13
|
-
}
|
@@ -1,100 +0,0 @@
|
|
1
|
-
use magnus::Value;
|
2
|
-
use polars::prelude::*;
|
3
|
-
use polars_core::utils::CustomIterTools;
|
4
|
-
|
5
|
-
use crate::conversion::get_rbseq;
|
6
|
-
use crate::{RbPolarsErr, RbResult};
|
7
|
-
|
8
|
-
pub fn rb_seq_to_list(name: &str, seq: Value, dtype: &DataType) -> RbResult<Series> {
|
9
|
-
let (seq, len) = get_rbseq(seq)?;
|
10
|
-
|
11
|
-
let s = match dtype {
|
12
|
-
DataType::Int64 => {
|
13
|
-
let mut builder =
|
14
|
-
ListPrimitiveChunkedBuilder::<Int64Type>::new(name, len, len * 5, DataType::Int64);
|
15
|
-
for sub_seq in seq.each() {
|
16
|
-
let sub_seq = sub_seq?;
|
17
|
-
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
18
|
-
|
19
|
-
// safety: we know the iterators len
|
20
|
-
let iter = unsafe {
|
21
|
-
sub_seq
|
22
|
-
.each()
|
23
|
-
.map(|v| {
|
24
|
-
let v = v.unwrap();
|
25
|
-
if v.is_nil() {
|
26
|
-
None
|
27
|
-
} else {
|
28
|
-
Some(v.try_convert::<i64>().unwrap())
|
29
|
-
}
|
30
|
-
})
|
31
|
-
.trust_my_length(len)
|
32
|
-
};
|
33
|
-
builder.append_iter(iter)
|
34
|
-
}
|
35
|
-
builder.finish().into_series()
|
36
|
-
}
|
37
|
-
DataType::Float64 => {
|
38
|
-
let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
|
39
|
-
name,
|
40
|
-
len,
|
41
|
-
len * 5,
|
42
|
-
DataType::Float64,
|
43
|
-
);
|
44
|
-
for sub_seq in seq.each() {
|
45
|
-
let sub_seq = sub_seq?;
|
46
|
-
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
47
|
-
// safety: we know the iterators len
|
48
|
-
let iter = unsafe {
|
49
|
-
sub_seq
|
50
|
-
.each()
|
51
|
-
.map(|v| {
|
52
|
-
let v = v.unwrap();
|
53
|
-
if v.is_nil() {
|
54
|
-
None
|
55
|
-
} else {
|
56
|
-
Some(v.try_convert::<f64>().unwrap())
|
57
|
-
}
|
58
|
-
})
|
59
|
-
.trust_my_length(len)
|
60
|
-
};
|
61
|
-
builder.append_iter(iter)
|
62
|
-
}
|
63
|
-
builder.finish().into_series()
|
64
|
-
}
|
65
|
-
DataType::Boolean => {
|
66
|
-
let mut builder = ListBooleanChunkedBuilder::new(name, len, len * 5);
|
67
|
-
for sub_seq in seq.each() {
|
68
|
-
let sub_seq = sub_seq?;
|
69
|
-
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
70
|
-
// safety: we know the iterators len
|
71
|
-
let iter = unsafe {
|
72
|
-
sub_seq
|
73
|
-
.each()
|
74
|
-
.map(|v| {
|
75
|
-
let v = v.unwrap();
|
76
|
-
if v.is_nil() {
|
77
|
-
None
|
78
|
-
} else {
|
79
|
-
Some(v.try_convert::<bool>().unwrap())
|
80
|
-
}
|
81
|
-
})
|
82
|
-
.trust_my_length(len)
|
83
|
-
};
|
84
|
-
builder.append_iter(iter)
|
85
|
-
}
|
86
|
-
builder.finish().into_series()
|
87
|
-
}
|
88
|
-
DataType::Utf8 => {
|
89
|
-
return Err(RbPolarsErr::todo());
|
90
|
-
}
|
91
|
-
dt => {
|
92
|
-
return Err(RbPolarsErr::other(format!(
|
93
|
-
"cannot create list array from {:?}",
|
94
|
-
dt
|
95
|
-
)));
|
96
|
-
}
|
97
|
-
};
|
98
|
-
|
99
|
-
Ok(s)
|
100
|
-
}
|
File without changes
|
File without changes
|