polars-df 0.2.0-aarch64-linux → 0.2.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +1 -1
- data/README.md +263 -2
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/data_frame.rb +18 -4
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/io.rb +25 -2
- data/lib/polars/series.rb +7 -0
- data/lib/polars/utils.rb +3 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a2fa900afe8e04c20c6987d5e3a7b24227557822708bba44267bfb55990fffe2
|
4
|
+
data.tar.gz: f5952e875fdaae53d37c4849aca9edab4565e4176b2d182b6ccd27c1b334cfb3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 360617b816aba6e5807118c20b3b041bdd5d699a99b85f34fba7e58699b1f78e26976dd93f1c10f47e71a2c88882984799ab914ae1a7be3c9a2a8d322858e966
|
7
|
+
data.tar.gz: 839ca7bd6aa6a2ca70dd8f5d4c3cec389c174551919287dc7317afc945828f6d05f9fee8c036ae0ce0b4ecac35bf1a35c32dc772faf01ef0625c931289f7300f
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
data/README.md
CHANGED
@@ -25,7 +25,13 @@ Polars.read_csv("iris.csv")
|
|
25
25
|
.collect
|
26
26
|
```
|
27
27
|
|
28
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
28
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
|
+
|
30
|
+
## Reference
|
31
|
+
|
32
|
+
- [Series](https://www.rubydoc.info/gems/polars-df/Polars/Series)
|
33
|
+
- [DataFrame](https://www.rubydoc.info/gems/polars-df/Polars/DataFrame)
|
34
|
+
- [LazyFrame](https://www.rubydoc.info/gems/polars-df/Polars/LazyFrame)
|
29
35
|
|
30
36
|
## Examples
|
31
37
|
|
@@ -46,7 +52,7 @@ Polars.read_parquet("file.parquet")
|
|
46
52
|
From Active Record
|
47
53
|
|
48
54
|
```ruby
|
49
|
-
Polars
|
55
|
+
Polars.read_sql(User.all)
|
50
56
|
```
|
51
57
|
|
52
58
|
From a hash
|
@@ -67,6 +73,261 @@ Polars::DataFrame.new([
|
|
67
73
|
])
|
68
74
|
```
|
69
75
|
|
76
|
+
## Attributes
|
77
|
+
|
78
|
+
Get number of rows
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
df.height
|
82
|
+
```
|
83
|
+
|
84
|
+
Get column names
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
df.columns
|
88
|
+
```
|
89
|
+
|
90
|
+
Check if a column exists
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
df.include?(name)
|
94
|
+
```
|
95
|
+
|
96
|
+
## Selecting Data
|
97
|
+
|
98
|
+
Select a column
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
df["a"]
|
102
|
+
```
|
103
|
+
|
104
|
+
Select multiple columns
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
df[["a", "b"]]
|
108
|
+
```
|
109
|
+
|
110
|
+
Select first rows
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
df.head
|
114
|
+
```
|
115
|
+
|
116
|
+
Select last rows
|
117
|
+
|
118
|
+
```ruby
|
119
|
+
df.tail
|
120
|
+
```
|
121
|
+
|
122
|
+
## Filtering
|
123
|
+
|
124
|
+
Filter on a condition
|
125
|
+
|
126
|
+
```ruby
|
127
|
+
df[Polars.col("a") == 2]
|
128
|
+
df[Polars.col("a") != 2]
|
129
|
+
df[Polars.col("a") > 2]
|
130
|
+
df[Polars.col("a") >= 2]
|
131
|
+
df[Polars.col("a") < 2]
|
132
|
+
df[Polars.col("a") <= 2]
|
133
|
+
```
|
134
|
+
|
135
|
+
And, or, and exclusive or
|
136
|
+
|
137
|
+
```ruby
|
138
|
+
df[(Polars.col("a") > 100) & (Polars.col("b") == "one")] # and
|
139
|
+
df[(Polars.col("a") > 100) | (Polars.col("b") == "one")] # or
|
140
|
+
df[(Polars.col("a") > 100) ^ (Polars.col("b") == "one")] # xor
|
141
|
+
```
|
142
|
+
|
143
|
+
## Operations
|
144
|
+
|
145
|
+
Basic operations
|
146
|
+
|
147
|
+
```ruby
|
148
|
+
df["a"] + 5
|
149
|
+
df["a"] - 5
|
150
|
+
df["a"] * 5
|
151
|
+
df["a"] / 5
|
152
|
+
df["a"] % 5
|
153
|
+
df["a"] ** 2
|
154
|
+
df["a"].sqrt
|
155
|
+
df["a"].abs
|
156
|
+
```
|
157
|
+
|
158
|
+
Rounding
|
159
|
+
|
160
|
+
```ruby
|
161
|
+
df["a"].round(2)
|
162
|
+
df["a"].ceil
|
163
|
+
df["a"].floor
|
164
|
+
```
|
165
|
+
|
166
|
+
Logarithm
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
df["a"].log # natural log
|
170
|
+
df["a"].log(10)
|
171
|
+
```
|
172
|
+
|
173
|
+
Exponentiation
|
174
|
+
|
175
|
+
```ruby
|
176
|
+
df["a"].exp
|
177
|
+
```
|
178
|
+
|
179
|
+
Trigonometric functions
|
180
|
+
|
181
|
+
```ruby
|
182
|
+
df["a"].sin
|
183
|
+
df["a"].cos
|
184
|
+
df["a"].tan
|
185
|
+
df["a"].asin
|
186
|
+
df["a"].acos
|
187
|
+
df["a"].atan
|
188
|
+
```
|
189
|
+
|
190
|
+
Hyperbolic functions
|
191
|
+
|
192
|
+
```ruby
|
193
|
+
df["a"].sinh
|
194
|
+
df["a"].cosh
|
195
|
+
df["a"].tanh
|
196
|
+
df["a"].asinh
|
197
|
+
df["a"].acosh
|
198
|
+
df["a"].atanh
|
199
|
+
```
|
200
|
+
|
201
|
+
Summary statistics
|
202
|
+
|
203
|
+
```ruby
|
204
|
+
df["a"].sum
|
205
|
+
df["a"].mean
|
206
|
+
df["a"].median
|
207
|
+
df["a"].quantile(0.90)
|
208
|
+
df["a"].min
|
209
|
+
df["a"].max
|
210
|
+
df["a"].std
|
211
|
+
df["a"].var
|
212
|
+
```
|
213
|
+
|
214
|
+
## Grouping
|
215
|
+
|
216
|
+
Group
|
217
|
+
|
218
|
+
```ruby
|
219
|
+
df.groupby("a").count
|
220
|
+
```
|
221
|
+
|
222
|
+
Works with all summary statistics
|
223
|
+
|
224
|
+
```ruby
|
225
|
+
df.groupby("a").max
|
226
|
+
```
|
227
|
+
|
228
|
+
Multiple groups
|
229
|
+
|
230
|
+
```ruby
|
231
|
+
df.groupby(["a", "b"]).count
|
232
|
+
```
|
233
|
+
|
234
|
+
## Combining Data Frames
|
235
|
+
|
236
|
+
Add rows
|
237
|
+
|
238
|
+
```ruby
|
239
|
+
df.vstack(other_df)
|
240
|
+
```
|
241
|
+
|
242
|
+
Add columns
|
243
|
+
|
244
|
+
```ruby
|
245
|
+
df.hstack(other_df)
|
246
|
+
```
|
247
|
+
|
248
|
+
Inner join
|
249
|
+
|
250
|
+
```ruby
|
251
|
+
df.join(other_df, on: "a")
|
252
|
+
```
|
253
|
+
|
254
|
+
Left join
|
255
|
+
|
256
|
+
```ruby
|
257
|
+
df.join(other_df, on: "a", how: "left")
|
258
|
+
```
|
259
|
+
|
260
|
+
## Encoding
|
261
|
+
|
262
|
+
One-hot encoding
|
263
|
+
|
264
|
+
```ruby
|
265
|
+
df.to_dummies
|
266
|
+
```
|
267
|
+
|
268
|
+
## Conversion
|
269
|
+
|
270
|
+
Array of rows
|
271
|
+
|
272
|
+
```ruby
|
273
|
+
df.rows
|
274
|
+
```
|
275
|
+
|
276
|
+
Hash of series
|
277
|
+
|
278
|
+
```ruby
|
279
|
+
df.to_h
|
280
|
+
```
|
281
|
+
|
282
|
+
CSV
|
283
|
+
|
284
|
+
```ruby
|
285
|
+
df.to_csv
|
286
|
+
# or
|
287
|
+
df.write_csv("data.csv")
|
288
|
+
```
|
289
|
+
|
290
|
+
Parquet
|
291
|
+
|
292
|
+
```ruby
|
293
|
+
df.write_parquet("data.parquet")
|
294
|
+
```
|
295
|
+
|
296
|
+
## Types
|
297
|
+
|
298
|
+
You can specify column types when creating a data frame
|
299
|
+
|
300
|
+
```ruby
|
301
|
+
Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
|
302
|
+
```
|
303
|
+
|
304
|
+
Supported types are:
|
305
|
+
|
306
|
+
- boolean - `Boolean`
|
307
|
+
- float - `Float64`, `Float32`
|
308
|
+
- integer - `Int64`, `Int32`, `Int16`, `Int8`
|
309
|
+
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
310
|
+
- string - `Utf8`, `Categorical`
|
311
|
+
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
312
|
+
|
313
|
+
Get column types
|
314
|
+
|
315
|
+
```ruby
|
316
|
+
df.schema
|
317
|
+
```
|
318
|
+
|
319
|
+
For a specific column
|
320
|
+
|
321
|
+
```ruby
|
322
|
+
df["a"].dtype
|
323
|
+
```
|
324
|
+
|
325
|
+
Cast a column
|
326
|
+
|
327
|
+
```ruby
|
328
|
+
df["a"].cast(Polars::Int32)
|
329
|
+
```
|
330
|
+
|
70
331
|
## History
|
71
332
|
|
72
333
|
View the [changelog](CHANGELOG.md)
|
data/lib/polars/3.0/polars.so
CHANGED
Binary file
|
data/lib/polars/3.1/polars.so
CHANGED
Binary file
|
data/lib/polars/3.2/polars.so
CHANGED
Binary file
|
data/lib/polars/data_frame.rb
CHANGED
@@ -17,6 +17,7 @@ module Polars
|
|
17
17
|
# the orientation is inferred by matching the columns and data dimensions. If
|
18
18
|
# this does not yield conclusive results, column orientation is used.
|
19
19
|
def initialize(data = nil, columns: nil, orient: nil)
|
20
|
+
# TODO deprecate in favor of read_sql
|
20
21
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
21
22
|
result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
|
22
23
|
data = {}
|
@@ -275,6 +276,7 @@ module Polars
|
|
275
276
|
def height
|
276
277
|
_df.height
|
277
278
|
end
|
279
|
+
alias_method :count, :height
|
278
280
|
|
279
281
|
# Get the width of the DataFrame.
|
280
282
|
#
|
@@ -521,13 +523,13 @@ module Polars
|
|
521
523
|
return df.slice(row_selection, 1)
|
522
524
|
end
|
523
525
|
# df[2, "a"]
|
524
|
-
if col_selection.is_a?(String)
|
526
|
+
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
525
527
|
return self[col_selection][row_selection]
|
526
528
|
end
|
527
529
|
end
|
528
530
|
|
529
531
|
# column selection can be "a" and ["a", "b"]
|
530
|
-
if col_selection.is_a?(String)
|
532
|
+
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
531
533
|
col_selection = [col_selection]
|
532
534
|
end
|
533
535
|
|
@@ -553,8 +555,8 @@ module Polars
|
|
553
555
|
|
554
556
|
# select single column
|
555
557
|
# df["foo"]
|
556
|
-
if item.is_a?(String)
|
557
|
-
return Utils.wrap_s(_df.column(item))
|
558
|
+
if item.is_a?(String) || item.is_a?(Symbol)
|
559
|
+
return Utils.wrap_s(_df.column(item.to_s))
|
558
560
|
end
|
559
561
|
|
560
562
|
# df[idx]
|
@@ -574,6 +576,11 @@ module Polars
|
|
574
576
|
end
|
575
577
|
end
|
576
578
|
|
579
|
+
# Ruby-specific
|
580
|
+
if item.is_a?(Expr)
|
581
|
+
return filter(item)
|
582
|
+
end
|
583
|
+
|
577
584
|
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
578
585
|
end
|
579
586
|
|
@@ -797,6 +804,13 @@ module Polars
|
|
797
804
|
nil
|
798
805
|
end
|
799
806
|
|
807
|
+
# Write to comma-separated values (CSV) string.
|
808
|
+
#
|
809
|
+
# @return [String]
|
810
|
+
def to_csv(**options)
|
811
|
+
write_csv(**options)
|
812
|
+
end
|
813
|
+
|
800
814
|
# Write to Apache Avro file.
|
801
815
|
#
|
802
816
|
# @param file [String]
|
data/lib/polars/data_types.rb
CHANGED
data/lib/polars/io.rb
CHANGED
@@ -590,8 +590,31 @@ module Polars
|
|
590
590
|
DataFrame._read_ndjson(file)
|
591
591
|
end
|
592
592
|
|
593
|
-
#
|
594
|
-
#
|
593
|
+
# Read a SQL query into a DataFrame.
|
594
|
+
#
|
595
|
+
# @param sql [Object]
|
596
|
+
# ActiveRecord::Relation or ActiveRecord::Result.
|
597
|
+
#
|
598
|
+
# @return [DataFrame]
|
599
|
+
def read_sql(sql)
|
600
|
+
if !defined?(ActiveRecord)
|
601
|
+
raise Error, "Active Record not available"
|
602
|
+
end
|
603
|
+
|
604
|
+
result =
|
605
|
+
if sql.is_a?(ActiveRecord::Result)
|
606
|
+
sql
|
607
|
+
elsif sql.is_a?(ActiveRecord::Relation)
|
608
|
+
sql.connection.select_all(sql.to_sql)
|
609
|
+
else
|
610
|
+
raise ArgumentError, "Expected ActiveRecord::Relation or ActiveRecord::Result"
|
611
|
+
end
|
612
|
+
data = {}
|
613
|
+
result.columns.each_with_index do |k, i|
|
614
|
+
data[k] = result.rows.map { |r| r[i] }
|
615
|
+
end
|
616
|
+
DataFrame.new(data)
|
617
|
+
end
|
595
618
|
|
596
619
|
# def read_excel
|
597
620
|
# end
|
data/lib/polars/series.rb
CHANGED
@@ -1647,6 +1647,7 @@ module Polars
|
|
1647
1647
|
def len
|
1648
1648
|
_s.len
|
1649
1649
|
end
|
1650
|
+
alias_method :count, :len
|
1650
1651
|
alias_method :length, :len
|
1651
1652
|
|
1652
1653
|
# Cast between data types.
|
@@ -2183,6 +2184,7 @@ module Polars
|
|
2183
2184
|
def arcsin
|
2184
2185
|
super
|
2185
2186
|
end
|
2187
|
+
alias_method :asin, :arcsin
|
2186
2188
|
|
2187
2189
|
# Compute the element-wise value for the inverse cosine.
|
2188
2190
|
#
|
@@ -2202,6 +2204,7 @@ module Polars
|
|
2202
2204
|
def arccos
|
2203
2205
|
super
|
2204
2206
|
end
|
2207
|
+
alias_method :acos, :arccos
|
2205
2208
|
|
2206
2209
|
# Compute the element-wise value for the inverse tangent.
|
2207
2210
|
#
|
@@ -2221,6 +2224,7 @@ module Polars
|
|
2221
2224
|
def arctan
|
2222
2225
|
super
|
2223
2226
|
end
|
2227
|
+
alias_method :atan, :arctan
|
2224
2228
|
|
2225
2229
|
# Compute the element-wise value for the inverse hyperbolic sine.
|
2226
2230
|
#
|
@@ -2240,6 +2244,7 @@ module Polars
|
|
2240
2244
|
def arcsinh
|
2241
2245
|
super
|
2242
2246
|
end
|
2247
|
+
alias_method :asinh, :arcsinh
|
2243
2248
|
|
2244
2249
|
# Compute the element-wise value for the inverse hyperbolic cosine.
|
2245
2250
|
#
|
@@ -2260,6 +2265,7 @@ module Polars
|
|
2260
2265
|
def arccosh
|
2261
2266
|
super
|
2262
2267
|
end
|
2268
|
+
alias_method :acosh, :arccosh
|
2263
2269
|
|
2264
2270
|
# Compute the element-wise value for the inverse hyperbolic tangent.
|
2265
2271
|
#
|
@@ -2283,6 +2289,7 @@ module Polars
|
|
2283
2289
|
def arctanh
|
2284
2290
|
super
|
2285
2291
|
end
|
2292
|
+
alias_method :atanh, :arctanh
|
2286
2293
|
|
2287
2294
|
# Compute the element-wise value for the hyperbolic sine.
|
2288
2295
|
#
|
data/lib/polars/utils.rb
CHANGED
@@ -70,7 +70,7 @@ module Polars
|
|
70
70
|
end
|
71
71
|
|
72
72
|
def self.selection_to_rbexpr_list(exprs)
|
73
|
-
if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
73
|
+
if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
|
74
74
|
exprs = [exprs]
|
75
75
|
end
|
76
76
|
|
@@ -78,9 +78,9 @@ module Polars
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def self.expr_to_lit_or_expr(expr, str_to_lit: true)
|
81
|
-
if expr.is_a?(String) && !str_to_lit
|
81
|
+
if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
|
82
82
|
col(expr)
|
83
|
-
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
|
83
|
+
elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
|
84
84
|
lit(expr)
|
85
85
|
elsif expr.is_a?(Expr)
|
86
86
|
expr
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: aarch64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|