polars-df 0.10.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +175 -0
  4. data/Cargo.lock +2536 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +38726 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +98 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +72 -0
  18. data/lib/polars/cat_name_space.rb +125 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +93 -0
  21. data/lib/polars/data_frame.rb +5418 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1444 -0
  24. data/lib/polars/date_time_name_space.rb +1484 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +31 -0
  27. data/lib/polars/expr.rb +6105 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +248 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1280 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +103 -0
  39. data/lib/polars/functions/range/int_range.rb +51 -0
  40. data/lib/polars/functions/repeat.rb +144 -0
  41. data/lib/polars/functions/whenthen.rb +96 -0
  42. data/lib/polars/functions.rb +57 -0
  43. data/lib/polars/group_by.rb +548 -0
  44. data/lib/polars/io.rb +890 -0
  45. data/lib/polars/lazy_frame.rb +2833 -0
  46. data/lib/polars/lazy_group_by.rb +84 -0
  47. data/lib/polars/list_expr.rb +791 -0
  48. data/lib/polars/list_name_space.rb +445 -0
  49. data/lib/polars/meta_expr.rb +222 -0
  50. data/lib/polars/name_expr.rb +198 -0
  51. data/lib/polars/plot.rb +109 -0
  52. data/lib/polars/rolling_group_by.rb +37 -0
  53. data/lib/polars/series.rb +4527 -0
  54. data/lib/polars/slice.rb +104 -0
  55. data/lib/polars/sql_context.rb +194 -0
  56. data/lib/polars/string_cache.rb +75 -0
  57. data/lib/polars/string_expr.rb +1519 -0
  58. data/lib/polars/string_name_space.rb +810 -0
  59. data/lib/polars/struct_expr.rb +98 -0
  60. data/lib/polars/struct_name_space.rb +96 -0
  61. data/lib/polars/testing.rb +507 -0
  62. data/lib/polars/utils.rb +422 -0
  63. data/lib/polars/version.rb +4 -0
  64. data/lib/polars/whenthen.rb +83 -0
  65. data/lib/polars-df.rb +1 -0
  66. data/lib/polars.rb +72 -0
  67. metadata +125 -0
@@ -0,0 +1,198 @@
1
+ module Polars
2
+ # Namespace for expressions that operate on expression names.
3
+ class NameExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Keep the original root name of the expression.
13
+ #
14
+ # @note
15
+ # Due to implementation constraints, this method can only be called as the last
16
+ # expression in a chain.
17
+ #
18
+ # @return [Expr]
19
+ #
20
+ # @example Prevent errors due to potential duplicate column names.
21
+ # df = Polars::DataFrame.new(
22
+ # {
23
+ # "a" => [1, 2],
24
+ # "b" => [3, 4]
25
+ # }
26
+ # )
27
+ # df.select((Polars.lit(10) / Polars.all).name.keep)
28
+ # # =>
29
+ # # shape: (2, 2)
30
+ # # ┌──────┬──────────┐
31
+ # # │ a ┆ b │
32
+ # # │ --- ┆ --- │
33
+ # # │ f64 ┆ f64 │
34
+ # # ╞══════╪══════════╡
35
+ # # │ 10.0 ┆ 3.333333 │
36
+ # # │ 5.0 ┆ 2.5 │
37
+ # # └──────┴──────────┘
38
+ #
39
+ # @example Undo an alias operation.
40
+ # df.with_columns((Polars.col("a") * 9).alias("c").name.keep)
41
+ # # =>
42
+ # # shape: (2, 2)
43
+ # # ┌─────┬─────┐
44
+ # # │ a ┆ b │
45
+ # # │ --- ┆ --- │
46
+ # # │ i64 ┆ i64 │
47
+ # # ╞═════╪═════╡
48
+ # # │ 9 ┆ 3 │
49
+ # # │ 18 ┆ 4 │
50
+ # # └─────┴─────┘
51
+ def keep
52
+ Utils.wrap_expr(_rbexpr.name_keep)
53
+ end
54
+
55
+ # Rename the output of an expression by mapping a function over the root name.
56
+ #
57
+ # @return [Expr]
58
+ #
59
+ # @example Remove a common suffix and convert to lower case.
60
+ # df = Polars::DataFrame.new(
61
+ # {
62
+ # "A_reverse" => [3, 2, 1],
63
+ # "B_reverse" => ["z", "y", "x"]
64
+ # }
65
+ # )
66
+ # df.with_columns(
67
+ # Polars.all.reverse.name.map { |c| c.delete_suffix("_reverse").downcase }
68
+ # )
69
+ # # =>
70
+ # # shape: (3, 4)
71
+ # # ┌───────────┬───────────┬─────┬─────┐
72
+ # # │ A_reverse ┆ B_reverse ┆ a ┆ b │
73
+ # # │ --- ┆ --- ┆ --- ┆ --- │
74
+ # # │ i64 ┆ str ┆ i64 ┆ str │
75
+ # # ╞═══════════╪═══════════╪═════╪═════╡
76
+ # # │ 3 ┆ z ┆ 1 ┆ x │
77
+ # # │ 2 ┆ y ┆ 2 ┆ y │
78
+ # # │ 1 ┆ x ┆ 3 ┆ z │
79
+ # # └───────────┴───────────┴─────┴─────┘
80
+ def map(&f)
81
+ Utils.wrap_expr(_rbexpr.name_map(f))
82
+ end
83
+
84
+ # Add a prefix to the root column name of the expression.
85
+ #
86
+ # @param prefix [Object]
87
+ # Prefix to add to the root column name.
88
+ #
89
+ # @return [Expr]
90
+ #
91
+ # @example
92
+ # df = Polars::DataFrame.new(
93
+ # {
94
+ # "a" => [1, 2, 3],
95
+ # "b" => ["x", "y", "z"]
96
+ # }
97
+ # )
98
+ # df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
99
+ # # =>
100
+ # # shape: (3, 4)
101
+ # # ┌─────┬─────┬───────────┬───────────┐
102
+ # # │ a ┆ b ┆ reverse_a ┆ reverse_b │
103
+ # # │ --- ┆ --- ┆ --- ┆ --- │
104
+ # # │ i64 ┆ str ┆ i64 ┆ str │
105
+ # # ╞═════╪═════╪═══════════╪═══════════╡
106
+ # # │ 1 ┆ x ┆ 3 ┆ z │
107
+ # # │ 2 ┆ y ┆ 2 ┆ y │
108
+ # # │ 3 ┆ z ┆ 1 ┆ x │
109
+ # # └─────┴─────┴───────────┴───────────┘
110
+ def prefix(prefix)
111
+ Utils.wrap_expr(_rbexpr.name_prefix(prefix))
112
+ end
113
+
114
+ # Add a suffix to the root column name of the expression.
115
+ #
116
+ # @param suffix [Object]
117
+ # Suffix to add to the root column name.
118
+ #
119
+ # @return [Expr]
120
+ #
121
+ # @example
122
+ # df = Polars::DataFrame.new(
123
+ # {
124
+ # "a" => [1, 2, 3],
125
+ # "b" => ["x", "y", "z"]
126
+ # }
127
+ # )
128
+ # df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
129
+ # # =>
130
+ # # shape: (3, 4)
131
+ # # ┌─────┬─────┬───────────┬───────────┐
132
+ # # │ a ┆ b ┆ a_reverse ┆ b_reverse │
133
+ # # │ --- ┆ --- ┆ --- ┆ --- │
134
+ # # │ i64 ┆ str ┆ i64 ┆ str │
135
+ # # ╞═════╪═════╪═══════════╪═══════════╡
136
+ # # │ 1 ┆ x ┆ 3 ┆ z │
137
+ # # │ 2 ┆ y ┆ 2 ┆ y │
138
+ # # │ 3 ┆ z ┆ 1 ┆ x │
139
+ # # └─────┴─────┴───────────┴───────────┘
140
+ def suffix(suffix)
141
+ Utils.wrap_expr(_rbexpr.name_suffix(suffix))
142
+ end
143
+
144
+ # Make the root column name lowercase.
145
+ #
146
+ # @return [Expr]
147
+ #
148
+ # @example
149
+ # df = Polars::DataFrame.new(
150
+ # {
151
+ # "ColX" => [1, 2, 3],
152
+ # "ColY" => ["x", "y", "z"],
153
+ # }
154
+ # )
155
+ # df.with_columns(Polars.all.name.to_lowercase)
156
+ # # =>
157
+ # # shape: (3, 4)
158
+ # # ┌──────┬──────┬──────┬──────┐
159
+ # # │ ColX ┆ ColY ┆ colx ┆ coly │
160
+ # # │ --- ┆ --- ┆ --- ┆ --- │
161
+ # # │ i64 ┆ str ┆ i64 ┆ str │
162
+ # # ╞══════╪══════╪══════╪══════╡
163
+ # # │ 1 ┆ x ┆ 1 ┆ x │
164
+ # # │ 2 ┆ y ┆ 2 ┆ y │
165
+ # # │ 3 ┆ z ┆ 3 ┆ z │
166
+ # # └──────┴──────┴──────┴──────┘
167
+ def to_lowercase
168
+ Utils.wrap_expr(_rbexpr.name_to_lowercase)
169
+ end
170
+
171
+ # Make the root column name uppercase.
172
+ #
173
+ # @return [Expr]
174
+ #
175
+ # @example
176
+ # df = Polars::DataFrame.new(
177
+ # {
178
+ # "ColX" => [1, 2, 3],
179
+ # "ColY" => ["x", "y", "z"]
180
+ # }
181
+ # )
182
+ # df.with_columns(Polars.all.name.to_uppercase)
183
+ # # =>
184
+ # # shape: (3, 4)
185
+ # # ┌──────┬──────┬──────┬──────┐
186
+ # # │ ColX ┆ ColY ┆ COLX ┆ COLY │
187
+ # # │ --- ┆ --- ┆ --- ┆ --- │
188
+ # # │ i64 ┆ str ┆ i64 ┆ str │
189
+ # # ╞══════╪══════╪══════╪══════╡
190
+ # # │ 1 ┆ x ┆ 1 ┆ x │
191
+ # # │ 2 ┆ y ┆ 2 ┆ y │
192
+ # # │ 3 ┆ z ┆ 3 ┆ z │
193
+ # # └──────┴──────┴──────┴──────┘
194
+ def to_uppercase
195
+ Utils.wrap_expr(_rbexpr.name_to_uppercase)
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,109 @@
1
+ module Polars
2
+ module Plot
3
+ # Plot data.
4
+ #
5
+ # @return [Vega::LiteChart]
6
+ def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
7
+ require "vega"
8
+
9
+ raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y)
10
+ x ||= columns[0]
11
+ y ||= columns[1]
12
+ type ||= begin
13
+ if self[x].numeric? && self[y].numeric?
14
+ "scatter"
15
+ elsif self[x].utf8? && self[y].numeric?
16
+ "column"
17
+ elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric?
18
+ "line"
19
+ else
20
+ raise "Cannot determine type. Use the type option."
21
+ end
22
+ end
23
+ df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq]
24
+ data = df.rows(named: true)
25
+
26
+ case type
27
+ when "line", "area"
28
+ x_type =
29
+ if df[x].numeric?
30
+ "quantitative"
31
+ elsif df[x].datelike?
32
+ "temporal"
33
+ else
34
+ "nominal"
35
+ end
36
+
37
+ scale = x_type == "temporal" ? {type: "utc"} : {}
38
+ encoding = {
39
+ x: {field: x, type: x_type, scale: scale},
40
+ y: {field: y, type: "quantitative"}
41
+ }
42
+ encoding[:color] = {field: group} if group
43
+
44
+ Vega.lite
45
+ .data(data)
46
+ .mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
47
+ .encoding(encoding)
48
+ .config(axis: {labelFontSize: 12})
49
+ when "pie"
50
+ raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
51
+
52
+ Vega.lite
53
+ .data(data)
54
+ .mark(type: "arc", tooltip: true)
55
+ .encoding(
56
+ color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
57
+ theta: {field: y, type: "quantitative"}
58
+ )
59
+ .view(stroke: nil)
60
+ when "column"
61
+ encoding = {
62
+ x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
63
+ y: {field: y, type: "quantitative"}
64
+ }
65
+ if group
66
+ encoding[:color] = {field: group}
67
+ encoding[:xOffset] = {field: group} unless stacked
68
+ end
69
+
70
+ Vega.lite
71
+ .data(data)
72
+ .mark(type: "bar", tooltip: true)
73
+ .encoding(encoding)
74
+ .config(axis: {labelFontSize: 12})
75
+ when "bar"
76
+ encoding = {
77
+ # TODO determine label angle
78
+ y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
79
+ x: {field: y, type: "quantitative"}
80
+ }
81
+ if group
82
+ encoding[:color] = {field: group}
83
+ encoding[:yOffset] = {field: group} unless stacked
84
+ end
85
+
86
+ Vega.lite
87
+ .data(data)
88
+ .mark(type: "bar", tooltip: true)
89
+ .encoding(encoding)
90
+ .config(axis: {labelFontSize: 12})
91
+ when "scatter"
92
+ encoding = {
93
+ x: {field: x, type: "quantitative", scale: {zero: false}},
94
+ y: {field: y, type: "quantitative", scale: {zero: false}},
95
+ size: {value: 60}
96
+ }
97
+ encoding[:color] = {field: group} if group
98
+
99
+ Vega.lite
100
+ .data(data)
101
+ .mark(type: "circle", tooltip: true)
102
+ .encoding(encoding)
103
+ .config(axis: {labelFontSize: 12})
104
+ else
105
+ raise ArgumentError, "Invalid type: #{type}"
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,37 @@
1
+ module Polars
2
+ # A rolling grouper.
3
+ #
4
+ # This has an `.agg` method which will allow you to run all polars expressions in a
5
+ # group by context.
6
+ class RollingGroupBy
7
+ def initialize(
8
+ df,
9
+ index_column,
10
+ period,
11
+ offset,
12
+ closed,
13
+ by,
14
+ check_sorted
15
+ )
16
+ period = Utils._timedelta_to_pl_duration(period)
17
+ offset = Utils._timedelta_to_pl_duration(offset)
18
+
19
+ @df = df
20
+ @time_column = index_column
21
+ @period = period
22
+ @offset = offset
23
+ @closed = closed
24
+ @by = by
25
+ @check_sorted = check_sorted
26
+ end
27
+
28
+ def agg(aggs)
29
+ @df.lazy
30
+ .group_by_rolling(
31
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
32
+ )
33
+ .agg(aggs)
34
+ .collect(no_optimization: true, string_cache: false)
35
+ end
36
+ end
37
+ end