polars-df 0.2.0-x86_64-linux → 0.2.2-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b29f236908ce6ba564c391104421f045d9bf9fd1fcdd84d8fbd30e9999f66e54
4
- data.tar.gz: 9511bce38ac472c491596c5c929e781e608ab87edd81538312fd390c318aa61a
3
+ metadata.gz: 6cc0fce7d6a3a5a5e6aad0422b6d6fc5f74894ca27881de1ea363c8c5ac77290
4
+ data.tar.gz: bf2921df3c70489b0be71e4e8043fa0cff4b9f366f1f4744c261bfc8dacc1c76
5
5
  SHA512:
6
- metadata.gz: a92fda54be50f6153635943e7613d8fb2554f4b07c5318661dbd02cde6ee81eb710d5df51a0a9f951c9d57f1248726b5870149827b7b611cc5620a2df90bc6d5
7
- data.tar.gz: 8a0b8ae93b04e155f2d0c391ffae27f9e9749170e892dc4b10e7f4693d8dc2d7f0a5a5501ea301c4bcfadc306ba6839e907941fc58b2d8217c43f369f4a247ee
6
+ metadata.gz: 9670316d6dc4483de1c16e934a507399b332d48ef779ccb11b19ae35761f56bcb34bba87b2a77855bad788cddbb5f76f0afb1eb8ec593bef5eff99d69e769437
7
+ data.tar.gz: 231985664f2e28e965751aa6a6b0c518d206caf084c2ce464dfbf4230420df07beb5465a49e0538f5004d805f9a269883980cf8a999858488be3dce11f24bee2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## 0.2.2 (2023-01-20)
2
+
3
+ - Added support for strings to `read_sql` method
4
+ - Improved indexing
5
+ - Fixed error with precompiled gem on Mac ARM
6
+
7
+ ## 0.2.1 (2023-01-18)
8
+
9
+ - Added `read_sql` method
10
+ - Added `to_csv` method
11
+ - Added support for symbol keys
12
+
1
13
  ## 0.2.0 (2023-01-14)
2
14
 
3
15
  - Updated Polars to 0.26.1
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.0"
1370
+ version = "0.2.2"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
@@ -1640,6 +1640,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
1640
1640
  IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1641
1641
  DEALINGS IN THE SOFTWARE.
1642
1642
 
1643
+ ================================================================================
1644
+ array-init-cursor LICENSE-MIT
1645
+ ================================================================================
1646
+
1647
+ MIT License
1648
+
1649
+ Copyright (c) 2021 The Planus Project Developers
1650
+
1651
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1652
+ of this software and associated documentation files (the "Software"), to deal
1653
+ in the Software without restriction, including without limitation the rights
1654
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1655
+ copies of the Software, and to permit persons to whom the Software is
1656
+ furnished to do so, subject to the following conditions:
1657
+
1658
+ The above copyright notice and this permission notice shall be included in all
1659
+ copies or substantial portions of the Software.
1660
+
1661
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1662
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1663
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1664
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1665
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1666
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1667
+ SOFTWARE.
1668
+
1669
+
1643
1670
  ================================================================================
1644
1671
  array-init-cursor LICENSE-APACHE
1645
1672
  ================================================================================
@@ -1822,33 +1849,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1822
1849
  END OF TERMS AND CONDITIONS
1823
1850
 
1824
1851
 
1825
- ================================================================================
1826
- array-init-cursor LICENSE-MIT
1827
- ================================================================================
1828
-
1829
- MIT License
1830
-
1831
- Copyright (c) 2021 The Planus Project Developers
1832
-
1833
- Permission is hereby granted, free of charge, to any person obtaining a copy
1834
- of this software and associated documentation files (the "Software"), to deal
1835
- in the Software without restriction, including without limitation the rights
1836
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1837
- copies of the Software, and to permit persons to whom the Software is
1838
- furnished to do so, subject to the following conditions:
1839
-
1840
- The above copyright notice and this permission notice shall be included in all
1841
- copies or substantial portions of the Software.
1842
-
1843
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1844
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1845
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1846
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1847
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1848
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1849
- SOFTWARE.
1850
-
1851
-
1852
1852
  ================================================================================
1853
1853
  arrow-format LICENSE
1854
1854
  ================================================================================
@@ -14571,6 +14571,36 @@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
14571
14571
  ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
14572
14572
  --------------------------------------------------------------------------------
14573
14573
 
14574
+ ================================================================================
14575
+ jemallocator LICENSE-MIT
14576
+ ================================================================================
14577
+
14578
+ Copyright (c) 2014 Alex Crichton
14579
+
14580
+ Permission is hereby granted, free of charge, to any
14581
+ person obtaining a copy of this software and associated
14582
+ documentation files (the "Software"), to deal in the
14583
+ Software without restriction, including without
14584
+ limitation the rights to use, copy, modify, merge,
14585
+ publish, distribute, sublicense, and/or sell copies of
14586
+ the Software, and to permit persons to whom the Software
14587
+ is furnished to do so, subject to the following
14588
+ conditions:
14589
+
14590
+ The above copyright notice and this permission notice
14591
+ shall be included in all copies or substantial portions
14592
+ of the Software.
14593
+
14594
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
14595
+ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
14596
+ TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14597
+ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
14598
+ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
14599
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
14600
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
14601
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
14602
+ DEALINGS IN THE SOFTWARE.
14603
+
14574
14604
  ================================================================================
14575
14605
  jemallocator LICENSE-APACHE
14576
14606
  ================================================================================
@@ -14777,36 +14807,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14777
14807
  See the License for the specific language governing permissions and
14778
14808
  limitations under the License.
14779
14809
 
14780
- ================================================================================
14781
- jemallocator LICENSE-MIT
14782
- ================================================================================
14783
-
14784
- Copyright (c) 2014 Alex Crichton
14785
-
14786
- Permission is hereby granted, free of charge, to any
14787
- person obtaining a copy of this software and associated
14788
- documentation files (the "Software"), to deal in the
14789
- Software without restriction, including without
14790
- limitation the rights to use, copy, modify, merge,
14791
- publish, distribute, sublicense, and/or sell copies of
14792
- the Software, and to permit persons to whom the Software
14793
- is furnished to do so, subject to the following
14794
- conditions:
14795
-
14796
- The above copyright notice and this permission notice
14797
- shall be included in all copies or substantial portions
14798
- of the Software.
14799
-
14800
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
14801
- ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
14802
- TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14803
- PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
14804
- SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
14805
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
14806
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
14807
- IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
14808
- DEALINGS IN THE SOFTWARE.
14809
-
14810
14810
  ================================================================================
14811
14811
  jobserver LICENSE-APACHE
14812
14812
  ================================================================================
@@ -27075,6 +27075,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
27075
27075
  IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27076
27076
  DEALINGS IN THE SOFTWARE.
27077
27077
 
27078
+ ================================================================================
27079
+ planus LICENSE-MIT
27080
+ ================================================================================
27081
+
27082
+ MIT License
27083
+
27084
+ Copyright (c) 2021 The Planus Project Developers
27085
+
27086
+ Permission is hereby granted, free of charge, to any person obtaining a copy
27087
+ of this software and associated documentation files (the "Software"), to deal
27088
+ in the Software without restriction, including without limitation the rights
27089
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27090
+ copies of the Software, and to permit persons to whom the Software is
27091
+ furnished to do so, subject to the following conditions:
27092
+
27093
+ The above copyright notice and this permission notice shall be included in all
27094
+ copies or substantial portions of the Software.
27095
+
27096
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27097
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27098
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27099
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27100
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27101
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27102
+ SOFTWARE.
27103
+
27104
+
27078
27105
  ================================================================================
27079
27106
  planus LICENSE-APACHE
27080
27107
  ================================================================================
@@ -27257,33 +27284,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
27257
27284
  END OF TERMS AND CONDITIONS
27258
27285
 
27259
27286
 
27260
- ================================================================================
27261
- planus LICENSE-MIT
27262
- ================================================================================
27263
-
27264
- MIT License
27265
-
27266
- Copyright (c) 2021 The Planus Project Developers
27267
-
27268
- Permission is hereby granted, free of charge, to any person obtaining a copy
27269
- of this software and associated documentation files (the "Software"), to deal
27270
- in the Software without restriction, including without limitation the rights
27271
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27272
- copies of the Software, and to permit persons to whom the Software is
27273
- furnished to do so, subject to the following conditions:
27274
-
27275
- The above copyright notice and this permission notice shall be included in all
27276
- copies or substantial portions of the Software.
27277
-
27278
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27279
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27280
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27281
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27282
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27283
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27284
- SOFTWARE.
27285
-
27286
-
27287
27287
  ================================================================================
27288
27288
  polars LICENSE
27289
27289
  ================================================================================
@@ -30036,6 +30036,32 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30036
30036
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30037
30037
  SOFTWARE.
30038
30038
 
30039
+ ================================================================================
30040
+ rb-sys-env LICENSE-MIT
30041
+ ================================================================================
30042
+
30043
+ The MIT License (MIT)
30044
+
30045
+ Copyright (c) 2021-2022 Ian Ker-Seymer
30046
+
30047
+ Permission is hereby granted, free of charge, to any person obtaining a copy
30048
+ of this software and associated documentation files (the "Software"), to deal
30049
+ in the Software without restriction, including without limitation the rights
30050
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30051
+ copies of the Software, and to permit persons to whom the Software is
30052
+ furnished to do so, subject to the following conditions:
30053
+
30054
+ The above copyright notice and this permission notice shall be included in all
30055
+ copies or substantial portions of the Software.
30056
+
30057
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30058
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30059
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30060
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30061
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30062
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30063
+ SOFTWARE.
30064
+
30039
30065
  ================================================================================
30040
30066
  rb-sys-env LICENSE-APACHE
30041
30067
  ================================================================================
@@ -30231,32 +30257,6 @@ rb-sys-env LICENSE-APACHE
30231
30257
  See the License for the specific language governing permissions and
30232
30258
  limitations under the License.
30233
30259
 
30234
- ================================================================================
30235
- rb-sys-env LICENSE-MIT
30236
- ================================================================================
30237
-
30238
- The MIT License (MIT)
30239
-
30240
- Copyright (c) 2021-2022 Ian Ker-Seymer
30241
-
30242
- Permission is hereby granted, free of charge, to any person obtaining a copy
30243
- of this software and associated documentation files (the "Software"), to deal
30244
- in the Software without restriction, including without limitation the rights
30245
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30246
- copies of the Software, and to permit persons to whom the Software is
30247
- furnished to do so, subject to the following conditions:
30248
-
30249
- The above copyright notice and this permission notice shall be included in all
30250
- copies or substantial portions of the Software.
30251
-
30252
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30253
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30254
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30255
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30256
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30257
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30258
- SOFTWARE.
30259
-
30260
30260
  ================================================================================
30261
30261
  regex LICENSE-APACHE
30262
30262
  ================================================================================
data/README.md CHANGED
@@ -25,7 +25,13 @@ Polars.read_csv("iris.csv")
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
+
30
+ ## Reference
31
+
32
+ - [Series](https://www.rubydoc.info/gems/polars-df/Polars/Series)
33
+ - [DataFrame](https://www.rubydoc.info/gems/polars-df/Polars/DataFrame)
34
+ - [LazyFrame](https://www.rubydoc.info/gems/polars-df/Polars/LazyFrame)
29
35
 
30
36
  ## Examples
31
37
 
@@ -35,6 +41,9 @@ From a CSV
35
41
 
36
42
  ```ruby
37
43
  Polars.read_csv("file.csv")
44
+
45
+ # or lazily with
46
+ Polars.scan_csv("file.csv")
38
47
  ```
39
48
 
40
49
  From Parquet
@@ -46,7 +55,7 @@ Polars.read_parquet("file.parquet")
46
55
  From Active Record
47
56
 
48
57
  ```ruby
49
- Polars::DataFrame.new(User.all)
58
+ Polars.read_sql(User.all)
50
59
  ```
51
60
 
52
61
  From a hash
@@ -67,6 +76,261 @@ Polars::DataFrame.new([
67
76
  ])
68
77
  ```
69
78
 
79
+ ## Attributes
80
+
81
+ Get number of rows
82
+
83
+ ```ruby
84
+ df.height
85
+ ```
86
+
87
+ Get column names
88
+
89
+ ```ruby
90
+ df.columns
91
+ ```
92
+
93
+ Check if a column exists
94
+
95
+ ```ruby
96
+ df.include?(name)
97
+ ```
98
+
99
+ ## Selecting Data
100
+
101
+ Select a column
102
+
103
+ ```ruby
104
+ df["a"]
105
+ ```
106
+
107
+ Select multiple columns
108
+
109
+ ```ruby
110
+ df[["a", "b"]]
111
+ ```
112
+
113
+ Select first rows
114
+
115
+ ```ruby
116
+ df.head
117
+ ```
118
+
119
+ Select last rows
120
+
121
+ ```ruby
122
+ df.tail
123
+ ```
124
+
125
+ ## Filtering
126
+
127
+ Filter on a condition
128
+
129
+ ```ruby
130
+ df[Polars.col("a") == 2]
131
+ df[Polars.col("a") != 2]
132
+ df[Polars.col("a") > 2]
133
+ df[Polars.col("a") >= 2]
134
+ df[Polars.col("a") < 2]
135
+ df[Polars.col("a") <= 2]
136
+ ```
137
+
138
+ And, or, and exclusive or
139
+
140
+ ```ruby
141
+ df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
142
+ df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
143
+ df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
144
+ ```
145
+
146
+ ## Operations
147
+
148
+ Basic operations
149
+
150
+ ```ruby
151
+ df["a"] + 5
152
+ df["a"] - 5
153
+ df["a"] * 5
154
+ df["a"] / 5
155
+ df["a"] % 5
156
+ df["a"] ** 2
157
+ df["a"].sqrt
158
+ df["a"].abs
159
+ ```
160
+
161
+ Rounding
162
+
163
+ ```ruby
164
+ df["a"].round(2)
165
+ df["a"].ceil
166
+ df["a"].floor
167
+ ```
168
+
169
+ Logarithm
170
+
171
+ ```ruby
172
+ df["a"].log # natural log
173
+ df["a"].log(10)
174
+ ```
175
+
176
+ Exponentiation
177
+
178
+ ```ruby
179
+ df["a"].exp
180
+ ```
181
+
182
+ Trigonometric functions
183
+
184
+ ```ruby
185
+ df["a"].sin
186
+ df["a"].cos
187
+ df["a"].tan
188
+ df["a"].asin
189
+ df["a"].acos
190
+ df["a"].atan
191
+ ```
192
+
193
+ Hyperbolic functions
194
+
195
+ ```ruby
196
+ df["a"].sinh
197
+ df["a"].cosh
198
+ df["a"].tanh
199
+ df["a"].asinh
200
+ df["a"].acosh
201
+ df["a"].atanh
202
+ ```
203
+
204
+ Summary statistics
205
+
206
+ ```ruby
207
+ df["a"].sum
208
+ df["a"].mean
209
+ df["a"].median
210
+ df["a"].quantile(0.90)
211
+ df["a"].min
212
+ df["a"].max
213
+ df["a"].std
214
+ df["a"].var
215
+ ```
216
+
217
+ ## Grouping
218
+
219
+ Group
220
+
221
+ ```ruby
222
+ df.groupby("a").count
223
+ ```
224
+
225
+ Works with all summary statistics
226
+
227
+ ```ruby
228
+ df.groupby("a").max
229
+ ```
230
+
231
+ Multiple groups
232
+
233
+ ```ruby
234
+ df.groupby(["a", "b"]).count
235
+ ```
236
+
237
+ ## Combining Data Frames
238
+
239
+ Add rows
240
+
241
+ ```ruby
242
+ df.vstack(other_df)
243
+ ```
244
+
245
+ Add columns
246
+
247
+ ```ruby
248
+ df.hstack(other_df)
249
+ ```
250
+
251
+ Inner join
252
+
253
+ ```ruby
254
+ df.join(other_df, on: "a")
255
+ ```
256
+
257
+ Left join
258
+
259
+ ```ruby
260
+ df.join(other_df, on: "a", how: "left")
261
+ ```
262
+
263
+ ## Encoding
264
+
265
+ One-hot encoding
266
+
267
+ ```ruby
268
+ df.to_dummies
269
+ ```
270
+
271
+ ## Conversion
272
+
273
+ Array of rows
274
+
275
+ ```ruby
276
+ df.rows
277
+ ```
278
+
279
+ Hash of series
280
+
281
+ ```ruby
282
+ df.to_h
283
+ ```
284
+
285
+ CSV
286
+
287
+ ```ruby
288
+ df.to_csv
289
+ # or
290
+ df.write_csv("data.csv")
291
+ ```
292
+
293
+ Parquet
294
+
295
+ ```ruby
296
+ df.write_parquet("data.parquet")
297
+ ```
298
+
299
+ ## Types
300
+
301
+ You can specify column types when creating a data frame
302
+
303
+ ```ruby
304
+ Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
305
+ ```
306
+
307
+ Supported types are:
308
+
309
+ - boolean - `Boolean`
310
+ - float - `Float64`, `Float32`
311
+ - integer - `Int64`, `Int32`, `Int16`, `Int8`
312
+ - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
313
+ - string - `Utf8`, `Categorical`
314
+ - temporal - `Date`, `Datetime`, `Time`, `Duration`
315
+
316
+ Get column types
317
+
318
+ ```ruby
319
+ df.schema
320
+ ```
321
+
322
+ For a specific column
323
+
324
+ ```ruby
325
+ df["a"].dtype
326
+ ```
327
+
328
+ Cast a column
329
+
330
+ ```ruby
331
+ df["a"].cast(Polars::Int32)
332
+ ```
333
+
70
334
  ## History
71
335
 
72
336
  View the [changelog](CHANGELOG.md)
Binary file
Binary file
Binary file
@@ -17,6 +17,7 @@ module Polars
17
17
  # the orientation is inferred by matching the columns and data dimensions. If
18
18
  # this does not yield conclusive results, column orientation is used.
19
19
  def initialize(data = nil, columns: nil, orient: nil)
20
+ # TODO deprecate in favor of read_sql
20
21
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
21
22
  result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
22
23
  data = {}
@@ -275,6 +276,8 @@ module Polars
275
276
  def height
276
277
  _df.height
277
278
  end
279
+ alias_method :count, :height
280
+ alias_method :length, :height
278
281
 
279
282
  # Get the width of the DataFrame.
280
283
  #
@@ -521,13 +524,13 @@ module Polars
521
524
  return df.slice(row_selection, 1)
522
525
  end
523
526
  # df[2, "a"]
524
- if col_selection.is_a?(String)
527
+ if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
525
528
  return self[col_selection][row_selection]
526
529
  end
527
530
  end
528
531
 
529
532
  # column selection can be "a" and ["a", "b"]
530
- if col_selection.is_a?(String)
533
+ if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
531
534
  col_selection = [col_selection]
532
535
  end
533
536
 
@@ -539,7 +542,7 @@ module Polars
539
542
 
540
543
  if col_selection.is_a?(Array)
541
544
  # df[.., [1, 2]]
542
- if is_int_sequence(col_selection)
545
+ if Utils.is_int_sequence(col_selection)
543
546
  series_list = col_selection.map { |i| to_series(i) }
544
547
  df = self.class.new(series_list)
545
548
  return df[row_selection]
@@ -553,8 +556,8 @@ module Polars
553
556
 
554
557
  # select single column
555
558
  # df["foo"]
556
- if item.is_a?(String)
557
- return Utils.wrap_s(_df.column(item))
559
+ if item.is_a?(String) || item.is_a?(Symbol)
560
+ return Utils.wrap_s(_df.column(item.to_s))
558
561
  end
559
562
 
560
563
  # df[idx]
@@ -572,6 +575,28 @@ module Polars
572
575
  # df[["foo", "bar"]]
573
576
  return _from_rbdf(_df.select(item))
574
577
  end
578
+
579
+ if Utils.is_int_sequence(item)
580
+ item = Series.new("", item)
581
+ end
582
+
583
+ if item.is_a?(Series)
584
+ dtype = item.dtype
585
+ if dtype == Utf8
586
+ return _from_rbdf(_df.select(item))
587
+ elsif dtype == UInt32
588
+ return _from_rbdf(_df.take_with_series(item._s))
589
+ elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
590
+ return _from_rbdf(
591
+ _df.take_with_series(_pos_idxs(item, 0)._s)
592
+ )
593
+ end
594
+ end
595
+ end
596
+
597
+ # Ruby-specific
598
+ if item.is_a?(Expr)
599
+ return filter(item)
575
600
  end
576
601
 
577
602
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
@@ -797,6 +822,13 @@ module Polars
797
822
  nil
798
823
  end
799
824
 
825
+ # Write to comma-separated values (CSV) string.
826
+ #
827
+ # @return [String]
828
+ def to_csv(**options)
829
+ write_csv(**options)
830
+ end
831
+
800
832
  # Write to Apache Avro file.
801
833
  #
802
834
  # @param file [String]
@@ -4648,8 +4680,53 @@ module Polars
4648
4680
  end
4649
4681
  end
4650
4682
 
4651
- # def _pos_idxs
4652
- # end
4683
+ def _pos_idxs(idxs, dim)
4684
+ idx_type = Polars._get_idx_type
4685
+
4686
+ if idxs.is_a?(Series)
4687
+ if idxs.dtype == idx_type
4688
+ return idxs
4689
+ end
4690
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4691
+ if idx_type == UInt32
4692
+ if [Int64, UInt64].include?(idxs.dtype)
4693
+ if idxs.max >= 2**32
4694
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4695
+ end
4696
+ end
4697
+ if idxs.dtype == Int64
4698
+ if idxs.min < -(2**32)
4699
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4700
+ end
4701
+ end
4702
+ end
4703
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4704
+ if idxs.min < 0
4705
+ if idx_type == UInt32
4706
+ if [Int8, Int16].include?(idxs.dtype)
4707
+ idxs = idxs.cast(Int32)
4708
+ end
4709
+ else
4710
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4711
+ idxs = idxs.cast(Int64)
4712
+ end
4713
+ end
4714
+
4715
+ idxs =
4716
+ Polars.select(
4717
+ Polars.when(Polars.lit(idxs) < 0)
4718
+ .then(shape[dim] + Polars.lit(idxs))
4719
+ .otherwise(Polars.lit(idxs))
4720
+ ).to_series
4721
+ end
4722
+ end
4723
+
4724
+ return idxs.cast(idx_type)
4725
+ end
4726
+ end
4727
+
4728
+ raise ArgumentError, "Unsupported idxs datatype."
4729
+ end
4653
4730
 
4654
4731
  # @private
4655
4732
  def self.hash_to_rbdf(data, columns: nil)
@@ -93,7 +93,7 @@ module Polars
93
93
  class Time < DataType
94
94
  end
95
95
 
96
- # Type for wrapping arbitrary Python objects.
96
+ # Type for wrapping arbitrary Ruby objects.
97
97
  class Object < DataType
98
98
  end
99
99
 
data/lib/polars/io.rb CHANGED
@@ -590,8 +590,33 @@ module Polars
590
590
  DataFrame._read_ndjson(file)
591
591
  end
592
592
 
593
- # def read_sql
594
- # end
593
+ # Read a SQL query into a DataFrame.
594
+ #
595
+ # @param sql [Object]
596
+ # ActiveRecord::Relation or ActiveRecord::Result.
597
+ #
598
+ # @return [DataFrame]
599
+ def read_sql(sql)
600
+ if !defined?(ActiveRecord)
601
+ raise Error, "Active Record not available"
602
+ end
603
+
604
+ result =
605
+ if sql.is_a?(ActiveRecord::Result)
606
+ sql
607
+ elsif sql.is_a?(ActiveRecord::Relation)
608
+ sql.connection.select_all(sql.to_sql)
609
+ elsif sql.is_a?(String)
610
+ ActiveRecord::Base.connection.select_all(sql)
611
+ else
612
+ raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
613
+ end
614
+ data = {}
615
+ result.columns.each_with_index do |k, i|
616
+ data[k] = result.rows.map { |r| r[i] }
617
+ end
618
+ DataFrame.new(data)
619
+ end
595
620
 
596
621
  # def read_excel
597
622
  # end
data/lib/polars/series.rb CHANGED
@@ -263,6 +263,10 @@ module Polars
263
263
  #
264
264
  # @return [Object]
265
265
  def [](item)
266
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
267
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
268
+ end
269
+
266
270
  if item.is_a?(Integer)
267
271
  return _s.get_idx(item)
268
272
  end
@@ -271,6 +275,10 @@ module Polars
271
275
  return Slice.new(self).apply(item)
272
276
  end
273
277
 
278
+ if Utils.is_int_sequence(item)
279
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
280
+ end
281
+
274
282
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
275
283
  end
276
284
 
@@ -287,24 +295,23 @@ module Polars
287
295
  end
288
296
 
289
297
  if key.is_a?(Series)
290
- if key.dtype == :bool
298
+ if key.dtype == Boolean
291
299
  self._s = set(key, value)._s
292
- elsif key.dtype == :u64
293
- self._s = set_at_idx(key.cast(:u32), value)._s
294
- elsif key.dtype == :u32
300
+ elsif key.dtype == UInt64
301
+ self._s = set_at_idx(key.cast(UInt32), value)._s
302
+ elsif key.dtype == UInt32
295
303
  self._s = set_at_idx(key, value)._s
296
304
  else
297
305
  raise Todo
298
306
  end
299
- end
300
-
301
- if key.is_a?(Array)
302
- s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
307
+ elsif key.is_a?(Array)
308
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
309
+ self[s] = value
310
+ elsif key.is_a?(Range)
311
+ s = Series.new("", key, dtype: UInt32)
303
312
  self[s] = value
304
313
  elsif key.is_a?(Integer)
305
- # TODO fix
306
- # self[[key]] = value
307
- set_at_idx(key, value)
314
+ self[[key]] = value
308
315
  else
309
316
  raise ArgumentError, "cannot use #{key} for indexing"
310
317
  end
@@ -1647,6 +1654,7 @@ module Polars
1647
1654
  def len
1648
1655
  _s.len
1649
1656
  end
1657
+ alias_method :count, :len
1650
1658
  alias_method :length, :len
1651
1659
 
1652
1660
  # Cast between data types.
@@ -2183,6 +2191,7 @@ module Polars
2183
2191
  def arcsin
2184
2192
  super
2185
2193
  end
2194
+ alias_method :asin, :arcsin
2186
2195
 
2187
2196
  # Compute the element-wise value for the inverse cosine.
2188
2197
  #
@@ -2202,6 +2211,7 @@ module Polars
2202
2211
  def arccos
2203
2212
  super
2204
2213
  end
2214
+ alias_method :acos, :arccos
2205
2215
 
2206
2216
  # Compute the element-wise value for the inverse tangent.
2207
2217
  #
@@ -2221,6 +2231,7 @@ module Polars
2221
2231
  def arctan
2222
2232
  super
2223
2233
  end
2234
+ alias_method :atan, :arctan
2224
2235
 
2225
2236
  # Compute the element-wise value for the inverse hyperbolic sine.
2226
2237
  #
@@ -2240,6 +2251,7 @@ module Polars
2240
2251
  def arcsinh
2241
2252
  super
2242
2253
  end
2254
+ alias_method :asinh, :arcsinh
2243
2255
 
2244
2256
  # Compute the element-wise value for the inverse hyperbolic cosine.
2245
2257
  #
@@ -2260,6 +2272,7 @@ module Polars
2260
2272
  def arccosh
2261
2273
  super
2262
2274
  end
2275
+ alias_method :acosh, :arccosh
2263
2276
 
2264
2277
  # Compute the element-wise value for the inverse hyperbolic tangent.
2265
2278
  #
@@ -2283,6 +2296,7 @@ module Polars
2283
2296
  def arctanh
2284
2297
  super
2285
2298
  end
2299
+ alias_method :atanh, :arctanh
2286
2300
 
2287
2301
  # Compute the element-wise value for the hyperbolic sine.
2288
2302
  #
@@ -3520,6 +3534,59 @@ module Polars
3520
3534
  end
3521
3535
  end
3522
3536
 
3537
+ def _pos_idxs(idxs)
3538
+ idx_type = Polars._get_idx_type
3539
+
3540
+ if idxs.is_a?(Series)
3541
+ if idxs.dtype == idx_type
3542
+ return idxs
3543
+ end
3544
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
3545
+ if idx_type == UInt32
3546
+ if [Int64, UInt64].include?(idxs.dtype)
3547
+ if idxs.max >= 2**32
3548
+ raise ArgumentError, "Index positions should be smaller than 2^32."
3549
+ end
3550
+ end
3551
+ if idxs.dtype == Int64
3552
+ if idxs.min < -(2**32)
3553
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
3554
+ end
3555
+ end
3556
+ end
3557
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
3558
+ if idxs.min < 0
3559
+ if idx_type == UInt32
3560
+ if [Int8, Int16].include?(idxs.dtype)
3561
+ idxs = idxs.cast(Int32)
3562
+ end
3563
+ else
3564
+ if [Int8, Int16, Int32].include?(idxs.dtype)
3565
+ idxs = idxs.cast(Int64)
3566
+ end
3567
+ end
3568
+
3569
+ # Update negative indexes to absolute indexes.
3570
+ return (
3571
+ idxs.to_frame
3572
+ .select(
3573
+ Polars.when(Polars.col(idxs.name) < 0)
3574
+ .then(len + Polars.col(idxs.name))
3575
+ .otherwise(Polars.col(idxs.name))
3576
+ .cast(idx_type)
3577
+ )
3578
+ .to_series(0)
3579
+ )
3580
+ end
3581
+ end
3582
+
3583
+ return idxs.cast(idx_type)
3584
+ end
3585
+ end
3586
+
3587
+ raise ArgumentError, "Unsupported idxs datatype."
3588
+ end
3589
+
3523
3590
  def _comp(other, op)
3524
3591
  if other.is_a?(Series)
3525
3592
  return Utils.wrap_s(_s.send(op, other._s))
data/lib/polars/slice.rb CHANGED
@@ -56,7 +56,7 @@ module Polars
56
56
  # Normalize slice bounds, identify unbounded and/or zero-length slices.
57
57
  def _slice_setup(s)
58
58
  # can normalize slice indices as we know object size
59
- obj_len = @obj.len
59
+ obj_len = @obj.length
60
60
  start = if s.begin
61
61
  if s.begin < 0
62
62
  [s.begin + obj_len, 0].max
data/lib/polars/utils.rb CHANGED
@@ -70,7 +70,7 @@ module Polars
70
70
  end
71
71
 
72
72
  def self.selection_to_rbexpr_list(exprs)
73
- if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series)
73
+ if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
74
  exprs = [exprs]
75
75
  end
76
76
 
@@ -78,9 +78,9 @@ module Polars
78
78
  end
79
79
 
80
80
  def self.expr_to_lit_or_expr(expr, str_to_lit: true)
81
- if expr.is_a?(String) && !str_to_lit
81
+ if (expr.is_a?(String) || expr.is_a?(Symbol)) && !str_to_lit
82
82
  col(expr)
83
- elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil?
83
+ elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Symbol) || expr.is_a?(Series) || expr.nil?
84
84
  lit(expr)
85
85
  elsif expr.is_a?(Expr)
86
86
  expr
@@ -181,6 +181,26 @@ module Polars
181
181
  val.all? { |x| x.is_a?(eltype) }
182
182
  end
183
183
 
184
+ def self.is_bool_sequence(val)
185
+ val.is_a?(Array) && val.all? { |x| x == true || x == false }
186
+ end
187
+
188
+ def self.is_dtype_sequence(val)
189
+ val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
190
+ end
191
+
192
+ def self.is_int_sequence(val)
193
+ val.is_a?(Array) && _is_iterable_of(val, Integer)
194
+ end
195
+
196
+ def self.is_expr_sequence(val)
197
+ val.is_a?(Array) && _is_iterable_of(val, Expr)
198
+ end
199
+
200
+ def self.is_rbexpr_sequence(val)
201
+ val.is_a?(Array) && _is_iterable_of(val, RbExpr)
202
+ end
203
+
184
204
  def self.is_str_sequence(val, allow_str: false)
185
205
  if allow_str == false && val.is_a?(String)
186
206
  false
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.0"
3
+ VERSION = "0.2.2"
4
4
  end
data/lib/polars.rb CHANGED
@@ -7,6 +7,7 @@ end
7
7
 
8
8
  # stdlib
9
9
  require "date"
10
+ require "stringio"
10
11
 
11
12
  # modules
12
13
  require_relative "polars/expr_dispatch"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-15 00:00:00.000000000 Z
11
+ date: 2023-01-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  - !ruby/object:Gem::Version
83
83
  version: '0'
84
84
  requirements: []
85
- rubygems_version: 3.4.3
85
+ rubygems_version: 3.4.4
86
86
  signing_key:
87
87
  specification_version: 4
88
88
  summary: Blazingly fast DataFrames for Ruby