polars-df 0.8.0-aarch64-linux → 0.10.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +3112 -1613
  6. data/LICENSE.txt +1 -1
  7. data/README.md +3 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/3.3/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +453 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/batched_csv_reader.rb +4 -2
  14. data/lib/polars/cat_expr.rb +24 -0
  15. data/lib/polars/cat_name_space.rb +75 -0
  16. data/lib/polars/config.rb +2 -2
  17. data/lib/polars/data_frame.rb +306 -96
  18. data/lib/polars/data_types.rb +191 -28
  19. data/lib/polars/date_time_expr.rb +41 -18
  20. data/lib/polars/date_time_name_space.rb +9 -3
  21. data/lib/polars/exceptions.rb +12 -1
  22. data/lib/polars/expr.rb +898 -215
  23. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  24. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  25. data/lib/polars/functions/as_datatype.rb +248 -0
  26. data/lib/polars/functions/col.rb +47 -0
  27. data/lib/polars/functions/eager.rb +182 -0
  28. data/lib/polars/functions/lazy.rb +1280 -0
  29. data/lib/polars/functions/len.rb +49 -0
  30. data/lib/polars/functions/lit.rb +35 -0
  31. data/lib/polars/functions/random.rb +16 -0
  32. data/lib/polars/functions/range/date_range.rb +103 -0
  33. data/lib/polars/functions/range/int_range.rb +51 -0
  34. data/lib/polars/functions/repeat.rb +144 -0
  35. data/lib/polars/functions/whenthen.rb +96 -0
  36. data/lib/polars/functions.rb +29 -416
  37. data/lib/polars/group_by.rb +2 -2
  38. data/lib/polars/io.rb +36 -31
  39. data/lib/polars/lazy_frame.rb +405 -88
  40. data/lib/polars/list_expr.rb +158 -8
  41. data/lib/polars/list_name_space.rb +102 -0
  42. data/lib/polars/meta_expr.rb +175 -7
  43. data/lib/polars/series.rb +282 -41
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +413 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/testing.rb +507 -0
  48. data/lib/polars/utils.rb +106 -8
  49. data/lib/polars/version.rb +1 -1
  50. data/lib/polars/whenthen.rb +83 -0
  51. data/lib/polars.rb +16 -4
  52. metadata +34 -6
  53. data/lib/polars/lazy_functions.rb +0 -1181
  54. data/lib/polars/when.rb +0 -16
  55. data/lib/polars/when_then.rb +0 -19
@@ -0,0 +1,49 @@
1
+ module Polars
2
+ module Functions
3
+ # Return the number of rows in the context.
4
+ #
5
+ # This is similar to `COUNT(*)` in SQL.
6
+ #
7
+ # @return [Expr]
8
+ #
9
+ # @example
10
+ # df = Polars::DataFrame.new(
11
+ # {
12
+ # "a" => [1, 2, nil],
13
+ # "b" => [3, nil, nil],
14
+ # "c" => ["foo", "bar", "foo"]
15
+ # }
16
+ # )
17
+ # df.select(Polars.len)
18
+ # # =>
19
+ # # shape: (1, 1)
20
+ # # ┌─────┐
21
+ # # │ len │
22
+ # # │ --- │
23
+ # # │ u32 │
24
+ # # ╞═════╡
25
+ # # │ 3 │
26
+ # # └─────┘
27
+ #
28
+ # @example Generate an index column by using `len` in conjunction with `int_range`.
29
+ # df.select([
30
+ # Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"),
31
+ # Polars.all
32
+ # ])
33
+ # # =>
34
+ # # shape: (3, 4)
35
+ # # ┌───────┬──────┬──────┬─────┐
36
+ # # │ index ┆ a ┆ b ┆ c │
37
+ # # │ --- ┆ --- ┆ --- ┆ --- │
38
+ # # │ u32 ┆ i64 ┆ i64 ┆ str │
39
+ # # ╞═══════╪══════╪══════╪═════╡
40
+ # # │ 0 ┆ 1 ┆ 3 ┆ foo │
41
+ # # │ 1 ┆ 2 ┆ null ┆ bar │
42
+ # # │ 2 ┆ null ┆ null ┆ foo │
43
+ # # └───────┴──────┴──────┴─────┘
44
+ def len
45
+ Utils.wrap_expr(Plr.len)
46
+ end
47
+ alias_method :length, :len
48
+ end
49
+ end
@@ -0,0 +1,35 @@
1
+ module Polars
2
+ module Functions
3
+ # Return an expression representing a literal value.
4
+ #
5
+ # @return [Expr]
6
+ def lit(value, dtype: nil, allow_object: nil)
7
+ if value.is_a?(::Time) || value.is_a?(::DateTime)
8
+ time_unit = dtype&.time_unit || "ns"
9
+ time_zone = dtype.&time_zone
10
+ e = lit(Utils._datetime_to_pl_timestamp(value, time_unit)).cast(Datetime.new(time_unit))
11
+ if time_zone
12
+ return e.dt.replace_time_zone(time_zone.to_s)
13
+ else
14
+ return e
15
+ end
16
+ elsif value.is_a?(::Date)
17
+ return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
18
+ elsif value.is_a?(Polars::Series)
19
+ name = value.name
20
+ value = value._s
21
+ e = Utils.wrap_expr(Plr.lit(value, allow_object))
22
+ if name == ""
23
+ return e
24
+ end
25
+ return e.alias(name)
26
+ elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
27
+ return lit(Series.new("", value))
28
+ elsif dtype
29
+ return Utils.wrap_expr(Plr.lit(value, allow_object)).cast(dtype)
30
+ end
31
+
32
+ Utils.wrap_expr(Plr.lit(value, allow_object))
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,16 @@
1
+ module Polars
2
+ module Functions
3
+ # Set the global random seed for Polars.
4
+ #
5
+ # This random seed is used to determine things such as shuffle ordering.
6
+ #
7
+ # @param seed [Integer]
8
+ # A non-negative integer < 2**64 used to seed the internal global
9
+ # random number generator.
10
+ #
11
+ # @return [nil]
12
+ def set_random_seed(seed)
13
+ Plr.set_random_seed(seed)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,103 @@
1
+ module Polars
2
+ module Functions
3
+ # Create a range of type `Datetime` (or `Date`).
4
+ #
5
+ # @param start [Object]
6
+ # Lower bound of the date range.
7
+ # @param stop [Object]
8
+ # Upper bound of the date range.
9
+ # @param interval [Object]
10
+ # Interval periods. It can be a polars duration string, such as `3d12h4m25s`
11
+ # representing 3 days, 12 hours, 4 minutes, and 25 seconds.
12
+ # @param lazy [Boolean]
13
+ # Return an expression.
14
+ # @param closed ["both", "left", "right", "none"]
15
+ # Define whether the temporal window interval is closed or not.
16
+ # @param name [String]
17
+ # Name of the output Series.
18
+ # @param time_unit [nil, "ns", "us", "ms"]
19
+ # Set the time unit.
20
+ # @param time_zone [String]
21
+ # Optional timezone
22
+ #
23
+ # @return [Object]
24
+ #
25
+ # @note
26
+ # If both `low` and `high` are passed as date types (not datetime), and the
27
+ # interval granularity is no finer than 1d, the returned range is also of
28
+ # type date. All other permutations return a datetime Series.
29
+ #
30
+ # @example Using polars duration string to specify the interval
31
+ # Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", name: "drange")
32
+ # # =>
33
+ # # shape: (3,)
34
+ # # Series: 'drange' [date]
35
+ # # [
36
+ # # 2022-01-01
37
+ # # 2022-02-01
38
+ # # 2022-03-01
39
+ # # ]
40
+ #
41
+ # @example Using `timedelta` object to specify the interval:
42
+ # Polars.date_range(
43
+ # DateTime.new(1985, 1, 1),
44
+ # DateTime.new(1985, 1, 10),
45
+ # "1d12h",
46
+ # time_unit: "ms"
47
+ # )
48
+ # # =>
49
+ # # shape: (7,)
50
+ # # Series: '' [datetime[ms]]
51
+ # # [
52
+ # # 1985-01-01 00:00:00
53
+ # # 1985-01-02 12:00:00
54
+ # # 1985-01-04 00:00:00
55
+ # # 1985-01-05 12:00:00
56
+ # # 1985-01-07 00:00:00
57
+ # # 1985-01-08 12:00:00
58
+ # # 1985-01-10 00:00:00
59
+ # # ]
60
+ def date_range(
61
+ start,
62
+ stop,
63
+ interval,
64
+ lazy: false,
65
+ closed: "both",
66
+ name: nil,
67
+ time_unit: nil,
68
+ time_zone: nil
69
+ )
70
+ if defined?(ActiveSupport::Duration) && interval.is_a?(ActiveSupport::Duration)
71
+ raise Todo
72
+ else
73
+ interval = interval.to_s
74
+ if interval.include?(" ")
75
+ interval = interval.gsub(" ", "")
76
+ end
77
+ end
78
+
79
+ if time_unit.nil?
80
+ if interval.include?("ns")
81
+ time_unit = "ns"
82
+ else
83
+ time_unit = "us"
84
+ end
85
+ end
86
+
87
+ start_rbexpr = Utils.parse_as_expression(start)
88
+ stop_rbexpr = Utils.parse_as_expression(stop)
89
+
90
+ result = Utils.wrap_expr(
91
+ Plr.date_range(start_rbexpr, stop_rbexpr, interval, closed, time_unit, time_zone)
92
+ )
93
+
94
+ result = result.alias(name.to_s)
95
+
96
+ if !lazy
97
+ return select(result).to_series
98
+ end
99
+
100
+ result
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,51 @@
1
+ module Polars
2
+ module Functions
3
+ # Create a range expression (or Series).
4
+ #
5
+ # This can be used in a `select`, `with_column`, etc. Be sure that the resulting
6
+ # range size is equal to the length of the DataFrame you are collecting.
7
+ #
8
+ # @param start [Integer, Expr, Series]
9
+ # Lower bound of range.
10
+ # @param stop [Integer, Expr, Series]
11
+ # Upper bound of range.
12
+ # @param step [Integer]
13
+ # Step size of the range.
14
+ # @param eager [Boolean]
15
+ # If eager evaluation is `True`, a Series is returned instead of an Expr.
16
+ # @param dtype [Symbol]
17
+ # Apply an explicit integer dtype to the resulting expression (default is `Int64`).
18
+ #
19
+ # @return [Expr, Series]
20
+ #
21
+ # @example
22
+ # Polars.arange(0, 3, eager: true)
23
+ # # =>
24
+ # # shape: (3,)
25
+ # # Series: 'arange' [i64]
26
+ # # [
27
+ # # 0
28
+ # # 1
29
+ # # 2
30
+ # # ]
31
+ def int_range(start, stop = nil, step: 1, eager: false, dtype: nil)
32
+ if stop.nil?
33
+ stop = start
34
+ start = 0
35
+ end
36
+
37
+ start = Utils.parse_as_expression(start)
38
+ stop = Utils.parse_as_expression(stop)
39
+ dtype ||= Int64
40
+ dtype = dtype.to_s if dtype.is_a?(Symbol)
41
+ result = Utils.wrap_expr(Plr.int_range(start, stop, step, dtype)).alias("arange")
42
+
43
+ if eager
44
+ return select(result).to_series
45
+ end
46
+
47
+ result
48
+ end
49
+ alias_method :arange, :int_range
50
+ end
51
+ end
@@ -0,0 +1,144 @@
1
+ module Polars
2
+ module Functions
3
+ # Repeat a single value n times.
4
+ #
5
+ # @param value [Object]
6
+ # Value to repeat.
7
+ # @param n [Integer]
8
+ # Repeat `n` times.
9
+ # @param eager [Boolean]
10
+ # Run eagerly and collect into a `Series`.
11
+ # @param name [String]
12
+ # Only used in `eager` mode. As expression, use `alias`.
13
+ #
14
+ # @return [Object]
15
+ #
16
+ # @example Construct a column with a repeated value in a lazy context.
17
+ # Polars.select(Polars.repeat("z", 3)).to_series
18
+ # # =>
19
+ # # shape: (3,)
20
+ # # Series: 'repeat' [str]
21
+ # # [
22
+ # # "z"
23
+ # # "z"
24
+ # # "z"
25
+ # # ]
26
+ #
27
+ # @example Generate a Series directly by setting `eager: true`.
28
+ # Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
29
+ # # =>
30
+ # # shape: (3,)
31
+ # # Series: 'repeat' [i8]
32
+ # # [
33
+ # # 3
34
+ # # 3
35
+ # # 3
36
+ # # ]
37
+ def repeat(value, n, dtype: nil, eager: false, name: nil)
38
+ if !name.nil?
39
+ warn "the `name` argument is deprecated. Use the `alias` method instead."
40
+ end
41
+
42
+ if n.is_a?(Integer)
43
+ n = lit(n)
44
+ end
45
+
46
+ value = Utils.parse_as_expression(value, str_as_lit: true)
47
+ expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
48
+ if !name.nil?
49
+ expr = expr.alias(name)
50
+ end
51
+ if eager
52
+ return select(expr).to_series
53
+ end
54
+ expr
55
+ end
56
+
57
+ # Construct a column of length `n` filled with ones.
58
+ #
59
+ # This is syntactic sugar for the `repeat` function.
60
+ #
61
+ # @param n [Integer]
62
+ # Length of the resulting column.
63
+ # @param dtype [Object]
64
+ # Data type of the resulting column. Defaults to Float64.
65
+ # @param eager [Boolean]
66
+ # Evaluate immediately and return a `Series`. If set to `false`,
67
+ # return an expression instead.
68
+ #
69
+ # @return [Object]
70
+ #
71
+ # @example
72
+ # Polars.ones(3, dtype: Polars::Int8, eager: true)
73
+ # # =>
74
+ # # shape: (3,)
75
+ # # Series: 'ones' [i8]
76
+ # # [
77
+ # # 1
78
+ # # 1
79
+ # # 1
80
+ # # ]
81
+ def ones(n, dtype: nil, eager: true)
82
+ if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
83
+ msg = "invalid dtype for `ones`; found #{dtype}"
84
+ raise TypeError, msg
85
+ end
86
+
87
+ repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
88
+ end
89
+
90
+ # Construct a column of length `n` filled with zeros.
91
+ #
92
+ # This is syntactic sugar for the `repeat` function.
93
+ #
94
+ # @param n [Integer]
95
+ # Length of the resulting column.
96
+ # @param dtype [Object]
97
+ # Data type of the resulting column. Defaults to Float64.
98
+ # @param eager [Boolean]
99
+ # Evaluate immediately and return a `Series`. If set to `false`,
100
+ # return an expression instead.
101
+ #
102
+ # @return [Object]
103
+ #
104
+ # @example
105
+ # Polars.zeros(3, dtype: Polars::Int8, eager: true)
106
+ # # =>
107
+ # # shape: (3,)
108
+ # # Series: 'zeros' [i8]
109
+ # # [
110
+ # # 0
111
+ # # 0
112
+ # # 0
113
+ # # ]
114
+ def zeros(n, dtype: nil, eager: true)
115
+ if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
116
+ msg = "invalid dtype for `zeros`; found #{dtype}"
117
+ raise TypeError, msg
118
+ end
119
+
120
+ repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
121
+ end
122
+
123
+ private
124
+
125
+ def _one_or_zero_by_dtype(value, dtype)
126
+ if dtype.integer?
127
+ value
128
+ elsif dtype.float?
129
+ value.to_f
130
+ elsif dtype == Boolean
131
+ value != 0
132
+ elsif dtype == Utf8
133
+ value.to_s
134
+ elsif dtype == Decimal
135
+ Decimal(value.to_s)
136
+ elsif [List, Array].include?(dtype)
137
+ arr_width = dtype.respond_to?(:width) ? dtype.width : 1
138
+ [_one_or_zero_by_dtype(value, dtype.inner)] * arr_width
139
+ else
140
+ nil
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,96 @@
1
+ module Polars
2
+ module Functions
3
+ # Start a "when, then, otherwise" expression.
4
+ #
5
+ # @return [When]
6
+ #
7
+ # @example Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.
8
+ # df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
9
+ # df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
10
+ # # =>
11
+ # # shape: (3, 3)
12
+ # # ┌─────┬─────┬─────────┐
13
+ # # │ foo ┆ bar ┆ literal │
14
+ # # │ --- ┆ --- ┆ --- │
15
+ # # │ i64 ┆ i64 ┆ i32 │
16
+ # # ╞═════╪═════╪═════════╡
17
+ # # │ 1 ┆ 3 ┆ -1 │
18
+ # # │ 3 ┆ 4 ┆ 1 │
19
+ # # │ 4 ┆ 0 ┆ 1 │
20
+ # # └─────┴─────┴─────────┘
21
+ #
22
+ # @example Or with multiple when-then operations chained:
23
+ # df.with_columns(
24
+ # Polars.when(Polars.col("foo") > 2)
25
+ # .then(1)
26
+ # .when(Polars.col("bar") > 2)
27
+ # .then(4)
28
+ # .otherwise(-1)
29
+ # .alias("val")
30
+ # )
31
+ # # =>
32
+ # # shape: (3, 3)
33
+ # # ┌─────┬─────┬─────┐
34
+ # # │ foo ┆ bar ┆ val │
35
+ # # │ --- ┆ --- ┆ --- │
36
+ # # │ i64 ┆ i64 ┆ i32 │
37
+ # # ╞═════╪═════╪═════╡
38
+ # # │ 1 ┆ 3 ┆ 4 │
39
+ # # │ 3 ┆ 4 ┆ 1 │
40
+ # # │ 4 ┆ 0 ┆ 1 │
41
+ # # └─────┴─────┴─────┘
42
+ #
43
+ # @example The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:
44
+ # df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
45
+ # # =>
46
+ # # shape: (3, 3)
47
+ # # ┌─────┬─────┬──────┐
48
+ # # │ foo ┆ bar ┆ val │
49
+ # # │ --- ┆ --- ┆ --- │
50
+ # # │ i64 ┆ i64 ┆ i32 │
51
+ # # ╞═════╪═════╪══════╡
52
+ # # │ 1 ┆ 3 ┆ null │
53
+ # # │ 3 ┆ 4 ┆ 1 │
54
+ # # │ 4 ┆ 0 ┆ 1 │
55
+ # # └─────┴─────┴──────┘
56
+ #
57
+ # @example Pass multiple predicates, each of which must be met:
58
+ # df.with_columns(
59
+ # val: Polars.when(
60
+ # Polars.col("bar") > 0,
61
+ # Polars.col("foo") % 2 != 0
62
+ # )
63
+ # .then(99)
64
+ # .otherwise(-1)
65
+ # )
66
+ # # =>
67
+ # # shape: (3, 3)
68
+ # # ┌─────┬─────┬─────┐
69
+ # # │ foo ┆ bar ┆ val │
70
+ # # │ --- ┆ --- ┆ --- │
71
+ # # │ i64 ┆ i64 ┆ i32 │
72
+ # # ╞═════╪═════╪═════╡
73
+ # # │ 1 ┆ 3 ┆ 99 │
74
+ # # │ 3 ┆ 4 ┆ 99 │
75
+ # # │ 4 ┆ 0 ┆ -1 │
76
+ # # └─────┴─────┴─────┘
77
+ #
78
+ # @example Pass conditions as keyword arguments:
79
+ # df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
80
+ # # =>
81
+ # # shape: (3, 3)
82
+ # # ┌─────┬─────┬─────┐
83
+ # # │ foo ┆ bar ┆ val │
84
+ # # │ --- ┆ --- ┆ --- │
85
+ # # │ i64 ┆ i64 ┆ i32 │
86
+ # # ╞═════╪═════╪═════╡
87
+ # # │ 1 ┆ 3 ┆ -1 │
88
+ # # │ 3 ┆ 4 ┆ -1 │
89
+ # # │ 4 ┆ 0 ┆ 99 │
90
+ # # └─────┴─────┴─────┘
91
+ def when(*predicates, **constraints)
92
+ condition = Utils.parse_when_inputs(*predicates, **constraints)
93
+ When.new(Plr.when(condition))
94
+ end
95
+ end
96
+ end