polars-df 0.2.0-arm64-darwin → 0.2.1-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0f6637d79970b4107c0c67e57d34a12ce12a59044a790d1df8401d66d93d6bb
4
- data.tar.gz: 681510a7b5b2b0fc62ae441b1208bff040b752dd901d2552834790debeb54dc4
3
+ metadata.gz: cf332966a5778ee2fa3b2586e22e22ff473ba1f8b90391d40672a7760451d37f
4
+ data.tar.gz: 94de770922b33f95c38d72a4ffa5b22e7f8944636a2a2f1b86aa1c75028d2485
5
5
  SHA512:
6
- metadata.gz: c2cdcd59fb0998d0d4422a3b72e602485be29dbca26fca862629aa382aa908a6bd415d31c5b72262cc5d2856d65ecf95a08696542a83c2e2c17db08e83e5b5a8
7
- data.tar.gz: e883089dd37a2616740577355a11cfba8ebad332345ea68a91a1d6f6b803538eaf103c161869344acedc1c02b874c2d41c4bc6c5aec7155362ebffff51735059
6
+ metadata.gz: 941e03500880702501b7f91deb0f7a108e02d011a7f112a843a715e4ce63133b570b37d38002087ef0093f8d29381ec393b310f5128e6d86718ee633ba836d46
7
+ data.tar.gz: 5ba4822fee712a8e83d1eb1a2838e052bbfd2c852922e94b4780b5aa6302a8e2febc2ffb3d96b01b63623932ff6f4d31fef3f8938df05e25d431b757a2e81ca0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## 0.2.1 (2023-01-18)
2
+
3
+ - Added `read_sql` method
4
+ - Added `to_csv` method
5
+ - Added support for symbol keys
6
+
1
7
  ## 0.2.0 (2023-01-14)
2
8
 
3
9
  - Updated Polars to 0.26.1
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.0"
1370
+ version = "0.2.1"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
data/README.md CHANGED
@@ -25,7 +25,13 @@ Polars.read_csv("iris.csv")
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
+
30
+ ## Reference
31
+
32
+ - [Series](https://www.rubydoc.info/gems/polars-df/Polars/Series)
33
+ - [DataFrame](https://www.rubydoc.info/gems/polars-df/Polars/DataFrame)
34
+ - [LazyFrame](https://www.rubydoc.info/gems/polars-df/Polars/LazyFrame)
29
35
 
30
36
  ## Examples
31
37
 
@@ -46,7 +52,7 @@ Polars.read_parquet("file.parquet")
46
52
  From Active Record
47
53
 
48
54
  ```ruby
49
- Polars::DataFrame.new(User.all)
55
+ Polars.read_sql(User.all)
50
56
  ```
51
57
 
52
58
  From a hash
@@ -67,6 +73,261 @@ Polars::DataFrame.new([
67
73
  ])
68
74
  ```
69
75
 
76
+ ## Attributes
77
+
78
+ Get number of rows
79
+
80
+ ```ruby
81
+ df.height
82
+ ```
83
+
84
+ Get column names
85
+
86
+ ```ruby
87
+ df.columns
88
+ ```
89
+
90
+ Check if a column exists
91
+
92
+ ```ruby
93
+ df.include?(name)
94
+ ```
95
+
96
+ ## Selecting Data
97
+
98
+ Select a column
99
+
100
+ ```ruby
101
+ df["a"]
102
+ ```
103
+
104
+ Select multiple columns
105
+
106
+ ```ruby
107
+ df[["a", "b"]]
108
+ ```
109
+
110
+ Select first rows
111
+
112
+ ```ruby
113
+ df.head
114
+ ```
115
+
116
+ Select last rows
117
+
118
+ ```ruby
119
+ df.tail
120
+ ```
121
+
122
+ ## Filtering
123
+
124
+ Filter on a condition
125
+
126
+ ```ruby
127
+ df[Polars.col("a") == 2]
128
+ df[Polars.col("a") != 2]
129
+ df[Polars.col("a") > 2]
130
+ df[Polars.col("a") >= 2]
131
+ df[Polars.col("a") < 2]
132
+ df[Polars.col("a") <= 2]
133
+ ```
134
+
135
+ And, or, and exclusive or
136
+
137
+ ```ruby
138
+ df[(Polars.col("a") > 100) & (Polars.col("b") == "one")] # and
139
+ df[(Polars.col("a") > 100) | (Polars.col("b") == "one")] # or
140
+ df[(Polars.col("a") > 100) ^ (Polars.col("b") == "one")] # xor
141
+ ```
142
+
143
+ ## Operations
144
+
145
+ Basic operations
146
+
147
+ ```ruby
148
+ df["a"] + 5
149
+ df["a"] - 5
150
+ df["a"] * 5
151
+ df["a"] / 5
152
+ df["a"] % 5
153
+ df["a"] ** 2
154
+ df["a"].sqrt
155
+ df["a"].abs
156
+ ```
157
+
158
+ Rounding
159
+
160
+ ```ruby
161
+ df["a"].round(2)
162
+ df["a"].ceil
163
+ df["a"].floor
164
+ ```
165
+
166
+ Logarithm
167
+
168
+ ```ruby
169
+ df["a"].log # natural log
170
+ df["a"].log(10)
171
+ ```
172
+
173
+ Exponentiation
174
+
175
+ ```ruby
176
+ df["a"].exp
177
+ ```
178
+
179
+ Trigonometric functions
180
+
181
+ ```ruby
182
+ df["a"].sin
183
+ df["a"].cos
184
+ df["a"].tan
185
+ df["a"].asin
186
+ df["a"].acos
187
+ df["a"].atan
188
+ ```
189
+
190
+ Hyperbolic functions
191
+
192
+ ```ruby
193
+ df["a"].sinh
194
+ df["a"].cosh
195
+ df["a"].tanh
196
+ df["a"].asinh
197
+ df["a"].acosh
198
+ df["a"].atanh
199
+ ```
200
+
201
+ Summary statistics
202
+
203
+ ```ruby
204
+ df["a"].sum
205
+ df["a"].mean
206
+ df["a"].median
207
+ df["a"].quantile(0.90)
208
+ df["a"].min
209
+ df["a"].max
210
+ df["a"].std
211
+ df["a"].var
212
+ ```
213
+
214
+ ## Grouping
215
+
216
+ Group
217
+
218
+ ```ruby
219
+ df.groupby("a").count
220
+ ```
221
+
222
+ Works with all summary statistics
223
+
224
+ ```ruby
225
+ df.groupby("a").max
226
+ ```
227
+
228
+ Multiple groups
229
+
230
+ ```ruby
231
+ df.groupby(["a", "b"]).count
232
+ ```
233
+
234
+ ## Combining Data Frames
235
+
236
+ Add rows
237
+
238
+ ```ruby
239
+ df.vstack(other_df)
240
+ ```
241
+
242
+ Add columns
243
+
244
+ ```ruby
245
+ df.hstack(other_df)
246
+ ```
247
+
248
+ Inner join
249
+
250
+ ```ruby
251
+ df.join(other_df, on: "a")
252
+ ```
253
+
254
+ Left join
255
+
256
+ ```ruby
257
+ df.join(other_df, on: "a", how: "left")
258
+ ```
259
+
260
+ ## Encoding
261
+
262
+ One-hot encoding
263
+
264
+ ```ruby
265
+ df.to_dummies
266
+ ```
267
+
268
+ ## Conversion
269
+
270
+ Array of rows
271
+
272
+ ```ruby
273
+ df.rows
274
+ ```
275
+
276
+ Hash of series
277
+
278
+ ```ruby
279
+ df.to_h
280
+ ```
281
+
282
+ CSV
283
+
284
+ ```ruby
285
+ df.to_csv
286
+ # or
287
+ df.write_csv("data.csv")
288
+ ```
289
+
290
+ Parquet
291
+
292
+ ```ruby
293
+ df.write_parquet("data.parquet")
294
+ ```
295
+
296
+ ## Types
297
+
298
+ You can specify column types when creating a data frame
299
+
300
+ ```ruby
301
+ Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
302
+ ```
303
+
304
+ Supported types are:
305
+
306
+ - boolean - `Boolean`
307
+ - float - `Float64`, `Float32`
308
+ - integer - `Int64`, `Int32`, `Int16`, `Int8`
309
+ - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
310
+ - string - `Utf8`, `Categorical`
311
+ - temporal - `Date`, `Datetime`, `Time`, `Duration`
312
+
313
+ Get column types
314
+
315
+ ```ruby
316
+ df.schema
317
+ ```
318
+
319
+ For a specific column
320
+
321
+ ```ruby
322
+ df["a"].dtype
323
+ ```
324
+
325
+ Cast a column
326
+
327
+ ```ruby
328
+ df["a"].cast(Polars::Int32)
329
+ ```
330
+
70
331
  ## History
71
332
 
72
333
  View the [changelog](CHANGELOG.md)
Binary file
Binary file
Binary file
@@ -17,6 +17,7 @@ module Polars
17
17
  # the orientation is inferred by matching the columns and data dimensions. If
18
18
  # this does not yield conclusive results, column orientation is used.
19
19
  def initialize(data = nil, columns: nil, orient: nil)
20
+ # TODO deprecate in favor of read_sql
20
21
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
21
22
  result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
22
23
  data = {}
@@ -275,6 +276,7 @@ module Polars
275
276
  def height
276
277
  _df.height
277
278
  end
279
+ alias_method :count, :height
278
280
 
279
281
  # Get the width of the DataFrame.
280
282
  #
@@ -521,13 +523,13 @@ module Polars
521
523
  return df.slice(row_selection, 1)
522
524
  end
523
525
  # df[2, "a"]
524
- if col_selection.is_a?(String)
526
+ if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
525
527
  return self[col_selection][row_selection]
526
528
  end
527
529
  end
528
530
 
529
531
  # column selection can be "a" and ["a", "b"]
530
- if col_selection.is_a?(String)
532
+ if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
531
533
  col_selection = [col_selection]
532
534
  end
533
535
 
@@ -553,8 +555,8 @@ module Polars
553
555
 
554
556
  # select single column
555
557
  # df["foo"]
556
- if item.is_a?(String)
557
- return Utils.wrap_s(_df.column(item))
558
+ if item.is_a?(String) || item.is_a?(Symbol)
559
+ return Utils.wrap_s(_df.column(item.to_s))
558
560
  end
559
561
 
560
562
  # df[idx]
@@ -574,6 +576,11 @@ module Polars
574
576
  end
575
577
  end
576
578
 
579
+ # Ruby-specific
580
+ if item.is_a?(Expr)
581
+ return filter(item)
582
+ end
583
+
577
584
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
578
585
  end
579
586
 
@@ -797,6 +804,13 @@ module Polars
797
804
  nil
798
805
  end
799
806
 
807
+ # Write to comma-separated values (CSV) string.
808
+ #
809
+ # @return [String]
810
+ def to_csv(**options)
811
+ write_csv(**options)
812
+ end
813
+
800
814
  # Write to Apache Avro file.
801
815
  #
802
816
  # @param file [String]
@@ -93,7 +93,7 @@ module Polars
93
93
  class Time < DataType
94
94
  end
95
95
 
96
- # Type for wrapping arbitrary Python objects.
96
+ # Type for wrapping arbitrary Ruby objects.
97
97
  class Object < DataType
98
98
  end
99
99
 
data/lib/polars/io.rb CHANGED
@@ -590,8 +590,31 @@ module Polars
590
590
  DataFrame._read_ndjson(file)
591
591
  end
592
592
 
593
- # def read_sql
594
- # end
593
+ # Read a SQL query into a DataFrame.
594
+ #
595
+ # @param sql [Object]
596
+ # ActiveRecord::Relation or ActiveRecord::Result.
597
+ #
598
+ # @return [DataFrame]
599
+ def read_sql(sql)
600
+ if !defined?(ActiveRecord)
601
+ raise Error, "Active Record not available"
602
+ end
603
+
604
+ result =
605
+ if sql.is_a?(ActiveRecord::Result)
606
+ sql
607
+ elsif sql.is_a?(ActiveRecord::Relation)
608
+ sql.connection.select_all(sql.to_sql)
609
+ else
610
+ raise ArgumentError, "Expected ActiveRecord::Relation or ActiveRecord::Result"
611
+ end
612
+ data = {}
613
+ result.columns.each_with_index do |k, i|
614
+ data[k] = result.rows.map { |r| r[i] }
615
+ end
616
+ DataFrame.new(data)
617
+ end
595
618
 
596
619
  # def read_excel
597
620
  # end
data/lib/polars/series.rb CHANGED
@@ -1647,6 +1647,7 @@ module Polars
1647
1647
  def len
1648
1648
  _s.len
1649
1649
  end
1650
+ alias_method :count, :len
1650
1651
  alias_method :length, :len
1651
1652
 
1652
1653
  # Cast between data types.
@@ -2183,6 +2184,7 @@ module Polars
2183
2184
  def arcsin
2184
2185
  super
2185
2186
  end
2187
+ alias_method :asin, :arcsin
2186
2188
 
2187
2189
  # Compute the element-wise value for the inverse cosine.
2188
2190
  #
@@ -2202,6 +2204,7 @@ module Polars
2202
2204
  def arccos
2203
2205
  super
2204
2206
  end
2207
+ alias_method :acos, :arccos
2205
2208
 
2206
2209
  # Compute the element-wise value for the inverse tangent.
2207
2210
  #
@@ -2221,6 +2224,7 @@ module Polars
2221
2224
  def arctan
2222
2225
  super
2223
2226
  end
2227
+ alias_method :atan, :arctan
2224
2228
 
2225
2229
  # Compute the element-wise value for the inverse hyperbolic sine.
2226
2230
  #
@@ -2240,6 +2244,7 @@ module Polars
2240
2244
  def arcsinh
2241
2245
  super
2242
2246
  end
2247
+ alias_method :asinh, :arcsinh
2243
2248
 
2244
2249
  # Compute the element-wise value for the inverse hyperbolic cosine.
2245
2250
  #
@@ -2260,6 +2265,7 @@ module Polars
2260
2265
  def arccosh
2261
2266
  super
2262
2267
  end
2268
+ alias_method :acosh, :arccosh
2263
2269
 
2264
2270
  # Compute the element-wise value for the inverse hyperbolic tangent.
2265
2271
  #
@@ -2283,6 +2289,7 @@ module Polars
2283
2289
  def arctanh
2284
2290
  super
2285
2291
  end
2292
+ alias_method :atanh, :arctanh
2286
2293
 
2287
2294
  # Compute the element-wise value for the hyperbolic sine.
2288
2295
  #
data/lib/polars/utils.rb CHANGED
@@ -70,7 +70,7 @@ module Polars
70
70
  end
71
71
 
72
72
  def self.selection_to_rbexpr_list(exprs)
73
- if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
73
+ if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
74
  exprs = [exprs]
75
75
  end
76
76
 
@@ -78,9 +78,9 @@ module Polars
78
78
  end
79
79
 
80
80
  def self.expr_to_lit_or_expr(expr, str_to_lit: true)
81
- if expr.is_a?(String) && !str_to_lit
81
+ if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
82
82
  col(expr)
83
- elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
83
+ elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
84
84
  lit(expr)
85
85
  elsif expr.is_a?(Expr)
86
86
  expr
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.0"
3
+ VERSION = "0.2.1"
4
4
  end
data/lib/polars.rb CHANGED
@@ -7,6 +7,7 @@ end
7
7
 
8
8
  # stdlib
9
9
  require "date"
10
+ require "stringio"
10
11
 
11
12
  # modules
12
13
  require_relative "polars/expr_dispatch"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-15 00:00:00.000000000 Z
11
+ date: 2023-01-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org