polars-df 0.2.0-x86_64-linux → 0.2.1-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b29f236908ce6ba564c391104421f045d9bf9fd1fcdd84d8fbd30e9999f66e54
4
- data.tar.gz: 9511bce38ac472c491596c5c929e781e608ab87edd81538312fd390c318aa61a
3
+ metadata.gz: f743e9640f5f15c0e569a023bf2866c3f273ae309c085168cc4d38647bf532d4
4
+ data.tar.gz: cfda09672859e7408a641a17b4d31532fd3d59552c0897639ecbbc8ec3e91ca2
5
5
  SHA512:
6
- metadata.gz: a92fda54be50f6153635943e7613d8fb2554f4b07c5318661dbd02cde6ee81eb710d5df51a0a9f951c9d57f1248726b5870149827b7b611cc5620a2df90bc6d5
7
- data.tar.gz: 8a0b8ae93b04e155f2d0c391ffae27f9e9749170e892dc4b10e7f4693d8dc2d7f0a5a5501ea301c4bcfadc306ba6839e907941fc58b2d8217c43f369f4a247ee
6
+ metadata.gz: 4bfa89bab471e2559e6952607064459562ca4d4a2f6f1101c54ce65dda5febd0d51a9acc65a701934b71270d67c207b6f4807b425e59c2ccd949f7eabc09b0f3
7
+ data.tar.gz: d8a0322152aa207c3e1a512c54b1bcc93f3ddb672e485f61d8fdcce49873551ae62dac7a131cc47c01d2dea8a766ac3913f54df7def66307132302ad2dd86392
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.2.1 (2023-01-18)
2
+
3
+ - Added `read_sql` method
4
+ - Added `to_csv` method
5
+ - Added support for symbol keys
6
+
1
7
  ## 0.2.0 (2023-01-14)
2
8
 
3
9
  - Updated Polars to 0.26.1
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.0"
1370
+ version = "0.2.1"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
data/README.md CHANGED
@@ -25,7 +25,13 @@ Polars.read_csv("iris.csv")
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
+
30
+ ## Reference
31
+
32
+ - [Series](https://www.rubydoc.info/gems/polars-df/Polars/Series)
33
+ - [DataFrame](https://www.rubydoc.info/gems/polars-df/Polars/DataFrame)
34
+ - [LazyFrame](https://www.rubydoc.info/gems/polars-df/Polars/LazyFrame)
29
35
 
30
36
  ## Examples
31
37
 
@@ -46,7 +52,7 @@ Polars.read_parquet("file.parquet")
46
52
  From Active Record
47
53
 
48
54
  ```ruby
49
- Polars::DataFrame.new(User.all)
55
+ Polars.read_sql(User.all)
50
56
  ```
51
57
 
52
58
  From a hash
@@ -67,6 +73,261 @@ Polars::DataFrame.new([
67
73
  ])
68
74
  ```
69
75
 
76
+ ## Attributes
77
+
78
+ Get number of rows
79
+
80
+ ```ruby
81
+ df.height
82
+ ```
83
+
84
+ Get column names
85
+
86
+ ```ruby
87
+ df.columns
88
+ ```
89
+
90
+ Check if a column exists
91
+
92
+ ```ruby
93
+ df.include?(name)
94
+ ```
95
+
96
+ ## Selecting Data
97
+
98
+ Select a column
99
+
100
+ ```ruby
101
+ df["a"]
102
+ ```
103
+
104
+ Select multiple columns
105
+
106
+ ```ruby
107
+ df[["a", "b"]]
108
+ ```
109
+
110
+ Select first rows
111
+
112
+ ```ruby
113
+ df.head
114
+ ```
115
+
116
+ Select last rows
117
+
118
+ ```ruby
119
+ df.tail
120
+ ```
121
+
122
+ ## Filtering
123
+
124
+ Filter on a condition
125
+
126
+ ```ruby
127
+ df[Polars.col("a") == 2]
128
+ df[Polars.col("a") != 2]
129
+ df[Polars.col("a") > 2]
130
+ df[Polars.col("a") >= 2]
131
+ df[Polars.col("a") < 2]
132
+ df[Polars.col("a") <= 2]
133
+ ```
134
+
135
+ And, or, and exclusive or
136
+
137
+ ```ruby
138
+ df[(Polars.col("a") > 100) & (Polars.col("b") == "one")] # and
139
+ df[(Polars.col("a") > 100) | (Polars.col("b") == "one")] # or
140
+ df[(Polars.col("a") > 100) ^ (Polars.col("b") == "one")] # xor
141
+ ```
142
+
143
+ ## Operations
144
+
145
+ Basic operations
146
+
147
+ ```ruby
148
+ df["a"] + 5
149
+ df["a"] - 5
150
+ df["a"] * 5
151
+ df["a"] / 5
152
+ df["a"] % 5
153
+ df["a"] ** 2
154
+ df["a"].sqrt
155
+ df["a"].abs
156
+ ```
157
+
158
+ Rounding
159
+
160
+ ```ruby
161
+ df["a"].round(2)
162
+ df["a"].ceil
163
+ df["a"].floor
164
+ ```
165
+
166
+ Logarithm
167
+
168
+ ```ruby
169
+ df["a"].log # natural log
170
+ df["a"].log(10)
171
+ ```
172
+
173
+ Exponentiation
174
+
175
+ ```ruby
176
+ df["a"].exp
177
+ ```
178
+
179
+ Trigonometric functions
180
+
181
+ ```ruby
182
+ df["a"].sin
183
+ df["a"].cos
184
+ df["a"].tan
185
+ df["a"].asin
186
+ df["a"].acos
187
+ df["a"].atan
188
+ ```
189
+
190
+ Hyperbolic functions
191
+
192
+ ```ruby
193
+ df["a"].sinh
194
+ df["a"].cosh
195
+ df["a"].tanh
196
+ df["a"].asinh
197
+ df["a"].acosh
198
+ df["a"].atanh
199
+ ```
200
+
201
+ Summary statistics
202
+
203
+ ```ruby
204
+ df["a"].sum
205
+ df["a"].mean
206
+ df["a"].median
207
+ df["a"].quantile(0.90)
208
+ df["a"].min
209
+ df["a"].max
210
+ df["a"].std
211
+ df["a"].var
212
+ ```
213
+
214
+ ## Grouping
215
+
216
+ Group
217
+
218
+ ```ruby
219
+ df.groupby("a").count
220
+ ```
221
+
222
+ Works with all summary statistics
223
+
224
+ ```ruby
225
+ df.groupby("a").max
226
+ ```
227
+
228
+ Multiple groups
229
+
230
+ ```ruby
231
+ df.groupby(["a", "b"]).count
232
+ ```
233
+
234
+ ## Combining Data Frames
235
+
236
+ Add rows
237
+
238
+ ```ruby
239
+ df.vstack(other_df)
240
+ ```
241
+
242
+ Add columns
243
+
244
+ ```ruby
245
+ df.hstack(other_df)
246
+ ```
247
+
248
+ Inner join
249
+
250
+ ```ruby
251
+ df.join(other_df, on: "a")
252
+ ```
253
+
254
+ Left join
255
+
256
+ ```ruby
257
+ df.join(other_df, on: "a", how: "left")
258
+ ```
259
+
260
+ ## Encoding
261
+
262
+ One-hot encoding
263
+
264
+ ```ruby
265
+ df.to_dummies
266
+ ```
267
+
268
+ ## Conversion
269
+
270
+ Array of rows
271
+
272
+ ```ruby
273
+ df.rows
274
+ ```
275
+
276
+ Hash of series
277
+
278
+ ```ruby
279
+ df.to_h
280
+ ```
281
+
282
+ CSV
283
+
284
+ ```ruby
285
+ df.to_csv
286
+ # or
287
+ df.write_csv("data.csv")
288
+ ```
289
+
290
+ Parquet
291
+
292
+ ```ruby
293
+ df.write_parquet("data.parquet")
294
+ ```
295
+
296
+ ## Types
297
+
298
+ You can specify column types when creating a data frame
299
+
300
+ ```ruby
301
+ Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
302
+ ```
303
+
304
+ Supported types are:
305
+
306
+ - boolean - `Boolean`
307
+ - float - `Float64`, `Float32`
308
+ - integer - `Int64`, `Int32`, `Int16`, `Int8`
309
+ - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
310
+ - string - `Utf8`, `Categorical`
311
+ - temporal - `Date`, `Datetime`, `Time`, `Duration`
312
+
313
+ Get column types
314
+
315
+ ```ruby
316
+ df.schema
317
+ ```
318
+
319
+ For a specific column
320
+
321
+ ```ruby
322
+ df["a"].dtype
323
+ ```
324
+
325
+ Cast a column
326
+
327
+ ```ruby
328
+ df["a"].cast(Polars::Int32)
329
+ ```
330
+
70
331
  ## History
71
332
 
72
333
  View the [changelog](CHANGELOG.md)
Binary file
Binary file
Binary file
@@ -17,6 +17,7 @@ module Polars
17
17
  # the orientation is inferred by matching the columns and data dimensions. If
18
18
  # this does not yield conclusive results, column orientation is used.
19
19
  def initialize(data = nil, columns: nil, orient: nil)
20
+ # TODO deprecate in favor of read_sql
20
21
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
21
22
  result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
22
23
  data = {}
@@ -275,6 +276,7 @@ module Polars
275
276
  def height
276
277
  _df.height
277
278
  end
279
+ alias_method :count, :height
278
280
 
279
281
  # Get the width of the DataFrame.
280
282
  #
@@ -521,13 +523,13 @@ module Polars
521
523
  return df.slice(row_selection, 1)
522
524
  end
523
525
  # df[2, "a"]
524
- if col_selection.is_a?(String)
526
+ if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
525
527
  return self[col_selection][row_selection]
526
528
  end
527
529
  end
528
530
 
529
531
  # column selection can be "a" and ["a", "b"]
530
- if col_selection.is_a?(String)
532
+ if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
531
533
  col_selection = [col_selection]
532
534
  end
533
535
 
@@ -553,8 +555,8 @@ module Polars
553
555
 
554
556
  # select single column
555
557
  # df["foo"]
556
- if item.is_a?(String)
557
- return Utils.wrap_s(_df.column(item))
558
+ if item.is_a?(String) || item.is_a?(Symbol)
559
+ return Utils.wrap_s(_df.column(item.to_s))
558
560
  end
559
561
 
560
562
  # df[idx]
@@ -574,6 +576,11 @@ module Polars
574
576
  end
575
577
  end
576
578
 
579
+ # Ruby-specific
580
+ if item.is_a?(Expr)
581
+ return filter(item)
582
+ end
583
+
577
584
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
578
585
  end
579
586
 
@@ -797,6 +804,13 @@ module Polars
797
804
  nil
798
805
  end
799
806
 
807
+ # Write to comma-separated values (CSV) string.
808
+ #
809
+ # @return [String]
810
+ def to_csv(**options)
811
+ write_csv(**options)
812
+ end
813
+
800
814
  # Write to Apache Avro file.
801
815
  #
802
816
  # @param file [String]
@@ -93,7 +93,7 @@ module Polars
93
93
  class Time < DataType
94
94
  end
95
95
 
96
- # Type for wrapping arbitrary Python objects.
96
+ # Type for wrapping arbitrary Ruby objects.
97
97
  class Object < DataType
98
98
  end
99
99
 
data/lib/polars/io.rb CHANGED
@@ -590,8 +590,31 @@ module Polars
590
590
  DataFrame._read_ndjson(file)
591
591
  end
592
592
 
593
- # def read_sql
594
- # end
593
+ # Read a SQL query into a DataFrame.
594
+ #
595
+ # @param sql [Object]
596
+ # ActiveRecord::Relation or ActiveRecord::Result.
597
+ #
598
+ # @return [DataFrame]
599
+ def read_sql(sql)
600
+ if !defined?(ActiveRecord)
601
+ raise Error, "Active Record not available"
602
+ end
603
+
604
+ result =
605
+ if sql.is_a?(ActiveRecord::Result)
606
+ sql
607
+ elsif sql.is_a?(ActiveRecord::Relation)
608
+ sql.connection.select_all(sql.to_sql)
609
+ else
610
+ raise ArgumentError, "Expected ActiveRecord::Relation or ActiveRecord::Result"
611
+ end
612
+ data = {}
613
+ result.columns.each_with_index do |k, i|
614
+ data[k] = result.rows.map { |r| r[i] }
615
+ end
616
+ DataFrame.new(data)
617
+ end
595
618
 
596
619
  # def read_excel
597
620
  # end
data/lib/polars/series.rb CHANGED
@@ -1647,6 +1647,7 @@ module Polars
1647
1647
  def len
1648
1648
  _s.len
1649
1649
  end
1650
+ alias_method :count, :len
1650
1651
  alias_method :length, :len
1651
1652
 
1652
1653
  # Cast between data types.
@@ -2183,6 +2184,7 @@ module Polars
2183
2184
  def arcsin
2184
2185
  super
2185
2186
  end
2187
+ alias_method :asin, :arcsin
2186
2188
 
2187
2189
  # Compute the element-wise value for the inverse cosine.
2188
2190
  #
@@ -2202,6 +2204,7 @@ module Polars
2202
2204
  def arccos
2203
2205
  super
2204
2206
  end
2207
+ alias_method :acos, :arccos
2205
2208
 
2206
2209
  # Compute the element-wise value for the inverse tangent.
2207
2210
  #
@@ -2221,6 +2224,7 @@ module Polars
2221
2224
  def arctan
2222
2225
  super
2223
2226
  end
2227
+ alias_method :atan, :arctan
2224
2228
 
2225
2229
  # Compute the element-wise value for the inverse hyperbolic sine.
2226
2230
  #
@@ -2240,6 +2244,7 @@ module Polars
2240
2244
  def arcsinh
2241
2245
  super
2242
2246
  end
2247
+ alias_method :asinh, :arcsinh
2243
2248
 
2244
2249
  # Compute the element-wise value for the inverse hyperbolic cosine.
2245
2250
  #
@@ -2260,6 +2265,7 @@ module Polars
2260
2265
  def arccosh
2261
2266
  super
2262
2267
  end
2268
+ alias_method :acosh, :arccosh
2263
2269
 
2264
2270
  # Compute the element-wise value for the inverse hyperbolic tangent.
2265
2271
  #
@@ -2283,6 +2289,7 @@ module Polars
2283
2289
  def arctanh
2284
2290
  super
2285
2291
  end
2292
+ alias_method :atanh, :arctanh
2286
2293
 
2287
2294
  # Compute the element-wise value for the hyperbolic sine.
2288
2295
  #
data/lib/polars/utils.rb CHANGED
@@ -70,7 +70,7 @@ module Polars
70
70
  end
71
71
 
72
72
  def self.selection_to_rbexpr_list(exprs)
73
- if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
73
+ if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
74
  exprs = [exprs]
75
75
  end
76
76
 
@@ -78,9 +78,9 @@ module Polars
78
78
  end
79
79
 
80
80
  def self.expr_to_lit_or_expr(expr, str_to_lit: true)
81
- if expr.is_a?(String) && !str_to_lit
81
+ if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
82
82
  col(expr)
83
- elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
83
+ elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
84
84
  lit(expr)
85
85
  elsif expr.is_a?(Expr)
86
86
  expr
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.0"
3
+ VERSION = "0.2.1"
4
4
  end
data/lib/polars.rb CHANGED
@@ -7,6 +7,7 @@ end
7
7
 
8
8
  # stdlib
9
9
  require "date"
10
+ require "stringio"
10
11
 
11
12
  # modules
12
13
  require_relative "polars/expr_dispatch"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-15 00:00:00.000000000 Z
11
+ date: 2023-01-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org