polars-df 0.2.0-x86_64-darwin → 0.2.1-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +1 -1
- data/README.md +263 -2
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/data_frame.rb +18 -4
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/io.rb +25 -2
- data/lib/polars/series.rb +7 -0
- data/lib/polars/utils.rb +3 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee93a61f42e0acf492693c5516582437239b66b29a411d49a61e43ee2c640bf0
|
4
|
+
data.tar.gz: e79a6954afbf9e66bb1b73410abdc750fd5ee7bdca289ae7aa31607608c120f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6679b43359387e0f6688c3d96337ffb7408e58296f16524708748feb4c57ff396b439d3a24a79f0c913dd252f920fb3f42be1bc5a0be364ee6aa66c094107ccf
|
7
|
+
data.tar.gz: 80f69ce23c42d4dd3345bd8364f689af571284ba95770d32be4760a99a9b0eb7cb7b21892c326321c0db33b6bffc57e4a19c989b8044878ef7e42aeffe3d7f68
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
data/README.md
CHANGED
@@ -25,7 +25,13 @@ Polars.read_csv("iris.csv")
|
|
25
25
|
.collect
|
26
26
|
```
|
27
27
|
|
28
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
28
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
|
+
|
30
|
+
## Reference
|
31
|
+
|
32
|
+
- [Series](https://www.rubydoc.info/gems/polars-df/Polars/Series)
|
33
|
+
- [DataFrame](https://www.rubydoc.info/gems/polars-df/Polars/DataFrame)
|
34
|
+
- [LazyFrame](https://www.rubydoc.info/gems/polars-df/Polars/LazyFrame)
|
29
35
|
|
30
36
|
## Examples
|
31
37
|
|
@@ -46,7 +52,7 @@ Polars.read_parquet("file.parquet")
|
|
46
52
|
From Active Record
|
47
53
|
|
48
54
|
```ruby
|
49
|
-
Polars
|
55
|
+
Polars.read_sql(User.all)
|
50
56
|
```
|
51
57
|
|
52
58
|
From a hash
|
@@ -67,6 +73,261 @@ Polars::DataFrame.new([
|
|
67
73
|
])
|
68
74
|
```
|
69
75
|
|
76
|
+
## Attributes
|
77
|
+
|
78
|
+
Get number of rows
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
df.height
|
82
|
+
```
|
83
|
+
|
84
|
+
Get column names
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
df.columns
|
88
|
+
```
|
89
|
+
|
90
|
+
Check if a column exists
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
df.include?(name)
|
94
|
+
```
|
95
|
+
|
96
|
+
## Selecting Data
|
97
|
+
|
98
|
+
Select a column
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
df["a"]
|
102
|
+
```
|
103
|
+
|
104
|
+
Select multiple columns
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
df[["a", "b"]]
|
108
|
+
```
|
109
|
+
|
110
|
+
Select first rows
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
df.head
|
114
|
+
```
|
115
|
+
|
116
|
+
Select last rows
|
117
|
+
|
118
|
+
```ruby
|
119
|
+
df.tail
|
120
|
+
```
|
121
|
+
|
122
|
+
## Filtering
|
123
|
+
|
124
|
+
Filter on a condition
|
125
|
+
|
126
|
+
```ruby
|
127
|
+
df[Polars.col("a") == 2]
|
128
|
+
df[Polars.col("a") != 2]
|
129
|
+
df[Polars.col("a") > 2]
|
130
|
+
df[Polars.col("a") >= 2]
|
131
|
+
df[Polars.col("a") < 2]
|
132
|
+
df[Polars.col("a") <= 2]
|
133
|
+
```
|
134
|
+
|
135
|
+
And, or, and exclusive or
|
136
|
+
|
137
|
+
```ruby
|
138
|
+
df[(Polars.col("a") > 100) & (Polars.col("b") == "one")] # and
|
139
|
+
df[(Polars.col("a") > 100) | (Polars.col("b") == "one")] # or
|
140
|
+
df[(Polars.col("a") > 100) ^ (Polars.col("b") == "one")] # xor
|
141
|
+
```
|
142
|
+
|
143
|
+
## Operations
|
144
|
+
|
145
|
+
Basic operations
|
146
|
+
|
147
|
+
```ruby
|
148
|
+
df["a"] + 5
|
149
|
+
df["a"] - 5
|
150
|
+
df["a"] * 5
|
151
|
+
df["a"] / 5
|
152
|
+
df["a"] % 5
|
153
|
+
df["a"] ** 2
|
154
|
+
df["a"].sqrt
|
155
|
+
df["a"].abs
|
156
|
+
```
|
157
|
+
|
158
|
+
Rounding
|
159
|
+
|
160
|
+
```ruby
|
161
|
+
df["a"].round(2)
|
162
|
+
df["a"].ceil
|
163
|
+
df["a"].floor
|
164
|
+
```
|
165
|
+
|
166
|
+
Logarithm
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
df["a"].log # natural log
|
170
|
+
df["a"].log(10)
|
171
|
+
```
|
172
|
+
|
173
|
+
Exponentiation
|
174
|
+
|
175
|
+
```ruby
|
176
|
+
df["a"].exp
|
177
|
+
```
|
178
|
+
|
179
|
+
Trigonometric functions
|
180
|
+
|
181
|
+
```ruby
|
182
|
+
df["a"].sin
|
183
|
+
df["a"].cos
|
184
|
+
df["a"].tan
|
185
|
+
df["a"].asin
|
186
|
+
df["a"].acos
|
187
|
+
df["a"].atan
|
188
|
+
```
|
189
|
+
|
190
|
+
Hyperbolic functions
|
191
|
+
|
192
|
+
```ruby
|
193
|
+
df["a"].sinh
|
194
|
+
df["a"].cosh
|
195
|
+
df["a"].tanh
|
196
|
+
df["a"].asinh
|
197
|
+
df["a"].acosh
|
198
|
+
df["a"].atanh
|
199
|
+
```
|
200
|
+
|
201
|
+
Summary statistics
|
202
|
+
|
203
|
+
```ruby
|
204
|
+
df["a"].sum
|
205
|
+
df["a"].mean
|
206
|
+
df["a"].median
|
207
|
+
df["a"].quantile(0.90)
|
208
|
+
df["a"].min
|
209
|
+
df["a"].max
|
210
|
+
df["a"].std
|
211
|
+
df["a"].var
|
212
|
+
```
|
213
|
+
|
214
|
+
## Grouping
|
215
|
+
|
216
|
+
Group
|
217
|
+
|
218
|
+
```ruby
|
219
|
+
df.groupby("a").count
|
220
|
+
```
|
221
|
+
|
222
|
+
Works with all summary statistics
|
223
|
+
|
224
|
+
```ruby
|
225
|
+
df.groupby("a").max
|
226
|
+
```
|
227
|
+
|
228
|
+
Multiple groups
|
229
|
+
|
230
|
+
```ruby
|
231
|
+
df.groupby(["a", "b"]).count
|
232
|
+
```
|
233
|
+
|
234
|
+
## Combining Data Frames
|
235
|
+
|
236
|
+
Add rows
|
237
|
+
|
238
|
+
```ruby
|
239
|
+
df.vstack(other_df)
|
240
|
+
```
|
241
|
+
|
242
|
+
Add columns
|
243
|
+
|
244
|
+
```ruby
|
245
|
+
df.hstack(other_df)
|
246
|
+
```
|
247
|
+
|
248
|
+
Inner join
|
249
|
+
|
250
|
+
```ruby
|
251
|
+
df.join(other_df, on: "a")
|
252
|
+
```
|
253
|
+
|
254
|
+
Left join
|
255
|
+
|
256
|
+
```ruby
|
257
|
+
df.join(other_df, on: "a", how: "left")
|
258
|
+
```
|
259
|
+
|
260
|
+
## Encoding
|
261
|
+
|
262
|
+
One-hot encoding
|
263
|
+
|
264
|
+
```ruby
|
265
|
+
df.to_dummies
|
266
|
+
```
|
267
|
+
|
268
|
+
## Conversion
|
269
|
+
|
270
|
+
Array of rows
|
271
|
+
|
272
|
+
```ruby
|
273
|
+
df.rows
|
274
|
+
```
|
275
|
+
|
276
|
+
Hash of series
|
277
|
+
|
278
|
+
```ruby
|
279
|
+
df.to_h
|
280
|
+
```
|
281
|
+
|
282
|
+
CSV
|
283
|
+
|
284
|
+
```ruby
|
285
|
+
df.to_csv
|
286
|
+
# or
|
287
|
+
df.write_csv("data.csv")
|
288
|
+
```
|
289
|
+
|
290
|
+
Parquet
|
291
|
+
|
292
|
+
```ruby
|
293
|
+
df.write_parquet("data.parquet")
|
294
|
+
```
|
295
|
+
|
296
|
+
## Types
|
297
|
+
|
298
|
+
You can specify column types when creating a data frame
|
299
|
+
|
300
|
+
```ruby
|
301
|
+
Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
|
302
|
+
```
|
303
|
+
|
304
|
+
Supported types are:
|
305
|
+
|
306
|
+
- boolean - `Boolean`
|
307
|
+
- float - `Float64`, `Float32`
|
308
|
+
- integer - `Int64`, `Int32`, `Int16`, `Int8`
|
309
|
+
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
310
|
+
- string - `Utf8`, `Categorical`
|
311
|
+
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
312
|
+
|
313
|
+
Get column types
|
314
|
+
|
315
|
+
```ruby
|
316
|
+
df.schema
|
317
|
+
```
|
318
|
+
|
319
|
+
For a specific column
|
320
|
+
|
321
|
+
```ruby
|
322
|
+
df["a"].dtype
|
323
|
+
```
|
324
|
+
|
325
|
+
Cast a column
|
326
|
+
|
327
|
+
```ruby
|
328
|
+
df["a"].cast(Polars::Int32)
|
329
|
+
```
|
330
|
+
|
70
331
|
## History
|
71
332
|
|
72
333
|
View the [changelog](CHANGELOG.md)
|
Binary file
|
Binary file
|
Binary file
|
data/lib/polars/data_frame.rb
CHANGED
@@ -17,6 +17,7 @@ module Polars
|
|
17
17
|
# the orientation is inferred by matching the columns and data dimensions. If
|
18
18
|
# this does not yield conclusive results, column orientation is used.
|
19
19
|
def initialize(data = nil, columns: nil, orient: nil)
|
20
|
+
# TODO deprecate in favor of read_sql
|
20
21
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
21
22
|
result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
|
22
23
|
data = {}
|
@@ -275,6 +276,7 @@ module Polars
|
|
275
276
|
def height
|
276
277
|
_df.height
|
277
278
|
end
|
279
|
+
alias_method :count, :height
|
278
280
|
|
279
281
|
# Get the width of the DataFrame.
|
280
282
|
#
|
@@ -521,13 +523,13 @@ module Polars
|
|
521
523
|
return df.slice(row_selection, 1)
|
522
524
|
end
|
523
525
|
# df[2, "a"]
|
524
|
-
if col_selection.is_a?(String)
|
526
|
+
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
525
527
|
return self[col_selection][row_selection]
|
526
528
|
end
|
527
529
|
end
|
528
530
|
|
529
531
|
# column selection can be "a" and ["a", "b"]
|
530
|
-
if col_selection.is_a?(String)
|
532
|
+
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
531
533
|
col_selection = [col_selection]
|
532
534
|
end
|
533
535
|
|
@@ -553,8 +555,8 @@ module Polars
|
|
553
555
|
|
554
556
|
# select single column
|
555
557
|
# df["foo"]
|
556
|
-
if item.is_a?(String)
|
557
|
-
return Utils.wrap_s(_df.column(item))
|
558
|
+
if item.is_a?(String) || item.is_a?(Symbol)
|
559
|
+
return Utils.wrap_s(_df.column(item.to_s))
|
558
560
|
end
|
559
561
|
|
560
562
|
# df[idx]
|
@@ -574,6 +576,11 @@ module Polars
|
|
574
576
|
end
|
575
577
|
end
|
576
578
|
|
579
|
+
# Ruby-specific
|
580
|
+
if item.is_a?(Expr)
|
581
|
+
return filter(item)
|
582
|
+
end
|
583
|
+
|
577
584
|
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
578
585
|
end
|
579
586
|
|
@@ -797,6 +804,13 @@ module Polars
|
|
797
804
|
nil
|
798
805
|
end
|
799
806
|
|
807
|
+
# Write to comma-separated values (CSV) string.
|
808
|
+
#
|
809
|
+
# @return [String]
|
810
|
+
def to_csv(**options)
|
811
|
+
write_csv(**options)
|
812
|
+
end
|
813
|
+
|
800
814
|
# Write to Apache Avro file.
|
801
815
|
#
|
802
816
|
# @param file [String]
|
data/lib/polars/data_types.rb
CHANGED
data/lib/polars/io.rb
CHANGED
@@ -590,8 +590,31 @@ module Polars
|
|
590
590
|
DataFrame._read_ndjson(file)
|
591
591
|
end
|
592
592
|
|
593
|
-
#
|
594
|
-
#
|
593
|
+
# Read a SQL query into a DataFrame.
|
594
|
+
#
|
595
|
+
# @param sql [Object]
|
596
|
+
# ActiveRecord::Relation or ActiveRecord::Result.
|
597
|
+
#
|
598
|
+
# @return [DataFrame]
|
599
|
+
def read_sql(sql)
|
600
|
+
if !defined?(ActiveRecord)
|
601
|
+
raise Error, "Active Record not available"
|
602
|
+
end
|
603
|
+
|
604
|
+
result =
|
605
|
+
if sql.is_a?(ActiveRecord::Result)
|
606
|
+
sql
|
607
|
+
elsif sql.is_a?(ActiveRecord::Relation)
|
608
|
+
sql.connection.select_all(sql.to_sql)
|
609
|
+
else
|
610
|
+
raise ArgumentError, "Expected ActiveRecord::Relation or ActiveRecord::Result"
|
611
|
+
end
|
612
|
+
data = {}
|
613
|
+
result.columns.each_with_index do |k, i|
|
614
|
+
data[k] = result.rows.map { |r| r[i] }
|
615
|
+
end
|
616
|
+
DataFrame.new(data)
|
617
|
+
end
|
595
618
|
|
596
619
|
# def read_excel
|
597
620
|
# end
|
data/lib/polars/series.rb
CHANGED
@@ -1647,6 +1647,7 @@ module Polars
|
|
1647
1647
|
def len
|
1648
1648
|
_s.len
|
1649
1649
|
end
|
1650
|
+
alias_method :count, :len
|
1650
1651
|
alias_method :length, :len
|
1651
1652
|
|
1652
1653
|
# Cast between data types.
|
@@ -2183,6 +2184,7 @@ module Polars
|
|
2183
2184
|
def arcsin
|
2184
2185
|
super
|
2185
2186
|
end
|
2187
|
+
alias_method :asin, :arcsin
|
2186
2188
|
|
2187
2189
|
# Compute the element-wise value for the inverse cosine.
|
2188
2190
|
#
|
@@ -2202,6 +2204,7 @@ module Polars
|
|
2202
2204
|
def arccos
|
2203
2205
|
super
|
2204
2206
|
end
|
2207
|
+
alias_method :acos, :arccos
|
2205
2208
|
|
2206
2209
|
# Compute the element-wise value for the inverse tangent.
|
2207
2210
|
#
|
@@ -2221,6 +2224,7 @@ module Polars
|
|
2221
2224
|
def arctan
|
2222
2225
|
super
|
2223
2226
|
end
|
2227
|
+
alias_method :atan, :arctan
|
2224
2228
|
|
2225
2229
|
# Compute the element-wise value for the inverse hyperbolic sine.
|
2226
2230
|
#
|
@@ -2240,6 +2244,7 @@ module Polars
|
|
2240
2244
|
def arcsinh
|
2241
2245
|
super
|
2242
2246
|
end
|
2247
|
+
alias_method :asinh, :arcsinh
|
2243
2248
|
|
2244
2249
|
# Compute the element-wise value for the inverse hyperbolic cosine.
|
2245
2250
|
#
|
@@ -2260,6 +2265,7 @@ module Polars
|
|
2260
2265
|
def arccosh
|
2261
2266
|
super
|
2262
2267
|
end
|
2268
|
+
alias_method :acosh, :arccosh
|
2263
2269
|
|
2264
2270
|
# Compute the element-wise value for the inverse hyperbolic tangent.
|
2265
2271
|
#
|
@@ -2283,6 +2289,7 @@ module Polars
|
|
2283
2289
|
def arctanh
|
2284
2290
|
super
|
2285
2291
|
end
|
2292
|
+
alias_method :atanh, :arctanh
|
2286
2293
|
|
2287
2294
|
# Compute the element-wise value for the hyperbolic sine.
|
2288
2295
|
#
|
data/lib/polars/utils.rb
CHANGED
@@ -70,7 +70,7 @@ module Polars
|
|
70
70
|
end
|
71
71
|
|
72
72
|
def self.selection_to_rbexpr_list(exprs)
|
73
|
-
if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
73
|
+
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
74
74
|
exprs = [exprs]
|
75
75
|
end
|
76
76
|
|
@@ -78,9 +78,9 @@ module Polars
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
81
|
-
if expr.is_a?(String) && !str_to_lit
|
81
|
+
if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
|
82
82
|
col(expr)
|
83
|
-
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
|
83
|
+
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
84
84
|
lit(expr)
|
85
85
|
elsif expr.is_a?(Expr)
|
86
86
|
expr
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|