polars-df 0.13.0-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,198 @@
1
+ module Polars
2
+ # Namespace for expressions that operate on expression names.
3
+ class NameExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Keep the original root name of the expression.
13
+ #
14
+ # @note
15
+ # Due to implementation constraints, this method can only be called as the last
16
+ # expression in a chain.
17
+ #
18
+ # @return [Expr]
19
+ #
20
+ # @example Prevent errors due to potential duplicate column names.
21
+ # df = Polars::DataFrame.new(
22
+ # {
23
+ # "a" => [1, 2],
24
+ # "b" => [3, 4]
25
+ # }
26
+ # )
27
+ # df.select((Polars.lit(10) / Polars.all).name.keep)
28
+ # # =>
29
+ # # shape: (2, 2)
30
+ # # ┌──────┬──────────┐
31
+ # # │ a ┆ b │
32
+ # # │ --- ┆ --- │
33
+ # # │ f64 ┆ f64 │
34
+ # # ╞══════╪══════════╡
35
+ # # │ 10.0 ┆ 3.333333 │
36
+ # # │ 5.0 ┆ 2.5 │
37
+ # # └──────┴──────────┘
38
+ #
39
+ # @example Undo an alias operation.
40
+ # df.with_columns((Polars.col("a") * 9).alias("c").name.keep)
41
+ # # =>
42
+ # # shape: (2, 2)
43
+ # # ┌─────┬─────┐
44
+ # # │ a ┆ b │
45
+ # # │ --- ┆ --- │
46
+ # # │ i64 ┆ i64 │
47
+ # # ╞═════╪═════╡
48
+ # # │ 9 ┆ 3 │
49
+ # # │ 18 ┆ 4 │
50
+ # # └─────┴─────┘
51
+ def keep
52
+ Utils.wrap_expr(_rbexpr.name_keep)
53
+ end
54
+
55
+ # Rename the output of an expression by mapping a function over the root name.
56
+ #
57
+ # @return [Expr]
58
+ #
59
+ # @example Remove a common suffix and convert to lower case.
60
+ # df = Polars::DataFrame.new(
61
+ # {
62
+ # "A_reverse" => [3, 2, 1],
63
+ # "B_reverse" => ["z", "y", "x"]
64
+ # }
65
+ # )
66
+ # df.with_columns(
67
+ # Polars.all.reverse.name.map { |c| c.delete_suffix("_reverse").downcase }
68
+ # )
69
+ # # =>
70
+ # # shape: (3, 4)
71
+ # # ┌───────────┬───────────┬─────┬─────┐
72
+ # # │ A_reverse ┆ B_reverse ┆ a ┆ b │
73
+ # # │ --- ┆ --- ┆ --- ┆ --- │
74
+ # # │ i64 ┆ str ┆ i64 ┆ str │
75
+ # # ╞═══════════╪═══════════╪═════╪═════╡
76
+ # # │ 3 ┆ z ┆ 1 ┆ x │
77
+ # # │ 2 ┆ y ┆ 2 ┆ y │
78
+ # # │ 1 ┆ x ┆ 3 ┆ z │
79
+ # # └───────────┴───────────┴─────┴─────┘
80
+ def map(&f)
81
+ Utils.wrap_expr(_rbexpr.name_map(f))
82
+ end
83
+
84
+ # Add a prefix to the root column name of the expression.
85
+ #
86
+ # @param prefix [Object]
87
+ # Prefix to add to the root column name.
88
+ #
89
+ # @return [Expr]
90
+ #
91
+ # @example
92
+ # df = Polars::DataFrame.new(
93
+ # {
94
+ # "a" => [1, 2, 3],
95
+ # "b" => ["x", "y", "z"]
96
+ # }
97
+ # )
98
+ # df.with_columns(Polars.all.reverse.name.prefix("reverse_"))
99
+ # # =>
100
+ # # shape: (3, 4)
101
+ # # ┌─────┬─────┬───────────┬───────────┐
102
+ # # │ a ┆ b ┆ reverse_a ┆ reverse_b │
103
+ # # │ --- ┆ --- ┆ --- ┆ --- │
104
+ # # │ i64 ┆ str ┆ i64 ┆ str │
105
+ # # ╞═════╪═════╪═══════════╪═══════════╡
106
+ # # │ 1 ┆ x ┆ 3 ┆ z │
107
+ # # │ 2 ┆ y ┆ 2 ┆ y │
108
+ # # │ 3 ┆ z ┆ 1 ┆ x │
109
+ # # └─────┴─────┴───────────┴───────────┘
110
+ def prefix(prefix)
111
+ Utils.wrap_expr(_rbexpr.name_prefix(prefix))
112
+ end
113
+
114
+ # Add a suffix to the root column name of the expression.
115
+ #
116
+ # @param suffix [Object]
117
+ # Suffix to add to the root column name.
118
+ #
119
+ # @return [Expr]
120
+ #
121
+ # @example
122
+ # df = Polars::DataFrame.new(
123
+ # {
124
+ # "a" => [1, 2, 3],
125
+ # "b" => ["x", "y", "z"]
126
+ # }
127
+ # )
128
+ # df.with_columns(Polars.all.reverse.name.suffix("_reverse"))
129
+ # # =>
130
+ # # shape: (3, 4)
131
+ # # ┌─────┬─────┬───────────┬───────────┐
132
+ # # │ a ┆ b ┆ a_reverse ┆ b_reverse │
133
+ # # │ --- ┆ --- ┆ --- ┆ --- │
134
+ # # │ i64 ┆ str ┆ i64 ┆ str │
135
+ # # ╞═════╪═════╪═══════════╪═══════════╡
136
+ # # │ 1 ┆ x ┆ 3 ┆ z │
137
+ # # │ 2 ┆ y ┆ 2 ┆ y │
138
+ # # │ 3 ┆ z ┆ 1 ┆ x │
139
+ # # └─────┴─────┴───────────┴───────────┘
140
+ def suffix(suffix)
141
+ Utils.wrap_expr(_rbexpr.name_suffix(suffix))
142
+ end
143
+
144
+ # Make the root column name lowercase.
145
+ #
146
+ # @return [Expr]
147
+ #
148
+ # @example
149
+ # df = Polars::DataFrame.new(
150
+ # {
151
+ # "ColX" => [1, 2, 3],
152
+ # "ColY" => ["x", "y", "z"],
153
+ # }
154
+ # )
155
+ # df.with_columns(Polars.all.name.to_lowercase)
156
+ # # =>
157
+ # # shape: (3, 4)
158
+ # # ┌──────┬──────┬──────┬──────┐
159
+ # # │ ColX ┆ ColY ┆ colx ┆ coly │
160
+ # # │ --- ┆ --- ┆ --- ┆ --- │
161
+ # # │ i64 ┆ str ┆ i64 ┆ str │
162
+ # # ╞══════╪══════╪══════╪══════╡
163
+ # # │ 1 ┆ x ┆ 1 ┆ x │
164
+ # # │ 2 ┆ y ┆ 2 ┆ y │
165
+ # # │ 3 ┆ z ┆ 3 ┆ z │
166
+ # # └──────┴──────┴──────┴──────┘
167
+ def to_lowercase
168
+ Utils.wrap_expr(_rbexpr.name_to_lowercase)
169
+ end
170
+
171
+ # Make the root column name uppercase.
172
+ #
173
+ # @return [Expr]
174
+ #
175
+ # @example
176
+ # df = Polars::DataFrame.new(
177
+ # {
178
+ # "ColX" => [1, 2, 3],
179
+ # "ColY" => ["x", "y", "z"]
180
+ # }
181
+ # )
182
+ # df.with_columns(Polars.all.name.to_uppercase)
183
+ # # =>
184
+ # # shape: (3, 4)
185
+ # # ┌──────┬──────┬──────┬──────┐
186
+ # # │ ColX ┆ ColY ┆ COLX ┆ COLY │
187
+ # # │ --- ┆ --- ┆ --- ┆ --- │
188
+ # # │ i64 ┆ str ┆ i64 ┆ str │
189
+ # # ╞══════╪══════╪══════╪══════╡
190
+ # # │ 1 ┆ x ┆ 1 ┆ x │
191
+ # # │ 2 ┆ y ┆ 2 ┆ y │
192
+ # # │ 3 ┆ z ┆ 3 ┆ z │
193
+ # # └──────┴──────┴──────┴──────┘
194
+ def to_uppercase
195
+ Utils.wrap_expr(_rbexpr.name_to_uppercase)
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,109 @@
1
+ module Polars
2
+ module Plot
3
+ # Plot data.
4
+ #
5
+ # @return [Vega::LiteChart]
6
+ def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
7
+ require "vega"
8
+
9
+ raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y)
10
+ x ||= columns[0]
11
+ y ||= columns[1]
12
+ type ||= begin
13
+ if self[x].numeric? && self[y].numeric?
14
+ "scatter"
15
+ elsif self[x].utf8? && self[y].numeric?
16
+ "column"
17
+ elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric?
18
+ "line"
19
+ else
20
+ raise "Cannot determine type. Use the type option."
21
+ end
22
+ end
23
+ df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq]
24
+ data = df.rows(named: true)
25
+
26
+ case type
27
+ when "line", "area"
28
+ x_type =
29
+ if df[x].numeric?
30
+ "quantitative"
31
+ elsif df[x].datelike?
32
+ "temporal"
33
+ else
34
+ "nominal"
35
+ end
36
+
37
+ scale = x_type == "temporal" ? {type: "utc"} : {}
38
+ encoding = {
39
+ x: {field: x, type: x_type, scale: scale},
40
+ y: {field: y, type: "quantitative"}
41
+ }
42
+ encoding[:color] = {field: group} if group
43
+
44
+ Vega.lite
45
+ .data(data)
46
+ .mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
47
+ .encoding(encoding)
48
+ .config(axis: {labelFontSize: 12})
49
+ when "pie"
50
+ raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
51
+
52
+ Vega.lite
53
+ .data(data)
54
+ .mark(type: "arc", tooltip: true)
55
+ .encoding(
56
+ color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
57
+ theta: {field: y, type: "quantitative"}
58
+ )
59
+ .view(stroke: nil)
60
+ when "column"
61
+ encoding = {
62
+ x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
63
+ y: {field: y, type: "quantitative"}
64
+ }
65
+ if group
66
+ encoding[:color] = {field: group}
67
+ encoding[:xOffset] = {field: group} unless stacked
68
+ end
69
+
70
+ Vega.lite
71
+ .data(data)
72
+ .mark(type: "bar", tooltip: true)
73
+ .encoding(encoding)
74
+ .config(axis: {labelFontSize: 12})
75
+ when "bar"
76
+ encoding = {
77
+ # TODO determine label angle
78
+ y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
79
+ x: {field: y, type: "quantitative"}
80
+ }
81
+ if group
82
+ encoding[:color] = {field: group}
83
+ encoding[:yOffset] = {field: group} unless stacked
84
+ end
85
+
86
+ Vega.lite
87
+ .data(data)
88
+ .mark(type: "bar", tooltip: true)
89
+ .encoding(encoding)
90
+ .config(axis: {labelFontSize: 12})
91
+ when "scatter"
92
+ encoding = {
93
+ x: {field: x, type: "quantitative", scale: {zero: false}},
94
+ y: {field: y, type: "quantitative", scale: {zero: false}},
95
+ size: {value: 60}
96
+ }
97
+ encoding[:color] = {field: group} if group
98
+
99
+ Vega.lite
100
+ .data(data)
101
+ .mark(type: "circle", tooltip: true)
102
+ .encoding(encoding)
103
+ .config(axis: {labelFontSize: 12})
104
+ else
105
+ raise ArgumentError, "Invalid type: #{type}"
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,35 @@
1
+ module Polars
2
+ # A rolling grouper.
3
+ #
4
+ # This has an `.agg` method which will allow you to run all polars expressions in a
5
+ # group by context.
6
+ class RollingGroupBy
7
+ def initialize(
8
+ df,
9
+ index_column,
10
+ period,
11
+ offset,
12
+ closed,
13
+ group_by
14
+ )
15
+ period = Utils.parse_as_duration_string(period)
16
+ offset = Utils.parse_as_duration_string(offset)
17
+
18
+ @df = df
19
+ @time_column = index_column
20
+ @period = period
21
+ @offset = offset
22
+ @closed = closed
23
+ @group_by = group_by
24
+ end
25
+
26
+ def agg(*aggs, **named_aggs)
27
+ @df.lazy
28
+ .group_by_rolling(
29
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @group_by
30
+ )
31
+ .agg(*aggs, **named_aggs)
32
+ .collect(no_optimization: true, string_cache: false)
33
+ end
34
+ end
35
+ end