polars-df 0.2.1-x86_64-darwin → 0.2.3-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +1 -1
- data/LICENSE-THIRD-PARTY.txt +80 -80
- data/README.md +10 -5
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/data_frame.rb +123 -5
- data/lib/polars/data_types.rb +67 -29
- data/lib/polars/io.rb +3 -1
- data/lib/polars/series.rb +76 -11
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/utils.rb +25 -5
- data/lib/polars/version.rb +1 -1
- data/lib/polars-df.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a67aba6106276247072fe14bcd35b3381e890844ea50321c443cbcc815b84ca3
|
4
|
+
data.tar.gz: c5c3388ed010e8946903b7d8ed30fc24ddbf2a34b9eba23d1c178b476faebc7a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1343d81460555ed4baa13962ff7316814b64e3c7c6c363c6d3d7b01221bd83abd9ad62b9bd8dffb4bd7dbb30716bae5d5fc20d7830e157631767ac186638bd24
|
7
|
+
data.tar.gz: f02495aea654767675444e57ea764cb44e5e34123049cdf541c3b2b47df9d7ddc1dd9bb0499ddda6ee360e3983b0184f35bc6e5fcc28f93e49af3476960770eb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## 0.2.3 (2023-01-22)
|
2
|
+
|
3
|
+
- Fixed error with precompiled gem on Mac ARM
|
4
|
+
- Fixed issue with structs
|
5
|
+
|
6
|
+
## 0.2.2 (2023-01-20)
|
7
|
+
|
8
|
+
- Added support for strings to `read_sql` method
|
9
|
+
- Improved indexing
|
10
|
+
- Fixed error with precompiled gem on Mac ARM
|
11
|
+
|
1
12
|
## 0.2.1 (2023-01-18)
|
2
13
|
|
3
14
|
- Added `read_sql` method
|
data/Cargo.lock
CHANGED
data/LICENSE-THIRD-PARTY.txt
CHANGED
@@ -1640,6 +1640,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
|
1640
1640
|
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
1641
1641
|
DEALINGS IN THE SOFTWARE.
|
1642
1642
|
|
1643
|
+
================================================================================
|
1644
|
+
array-init-cursor LICENSE-MIT
|
1645
|
+
================================================================================
|
1646
|
+
|
1647
|
+
MIT License
|
1648
|
+
|
1649
|
+
Copyright (c) 2021 The Planus Project Developers
|
1650
|
+
|
1651
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
1652
|
+
of this software and associated documentation files (the "Software"), to deal
|
1653
|
+
in the Software without restriction, including without limitation the rights
|
1654
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
1655
|
+
copies of the Software, and to permit persons to whom the Software is
|
1656
|
+
furnished to do so, subject to the following conditions:
|
1657
|
+
|
1658
|
+
The above copyright notice and this permission notice shall be included in all
|
1659
|
+
copies or substantial portions of the Software.
|
1660
|
+
|
1661
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
1662
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
1663
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
1664
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
1665
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
1666
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
1667
|
+
SOFTWARE.
|
1668
|
+
|
1669
|
+
|
1643
1670
|
================================================================================
|
1644
1671
|
array-init-cursor LICENSE-APACHE
|
1645
1672
|
================================================================================
|
@@ -1822,33 +1849,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
1822
1849
|
END OF TERMS AND CONDITIONS
|
1823
1850
|
|
1824
1851
|
|
1825
|
-
================================================================================
|
1826
|
-
array-init-cursor LICENSE-MIT
|
1827
|
-
================================================================================
|
1828
|
-
|
1829
|
-
MIT License
|
1830
|
-
|
1831
|
-
Copyright (c) 2021 The Planus Project Developers
|
1832
|
-
|
1833
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
1834
|
-
of this software and associated documentation files (the "Software"), to deal
|
1835
|
-
in the Software without restriction, including without limitation the rights
|
1836
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
1837
|
-
copies of the Software, and to permit persons to whom the Software is
|
1838
|
-
furnished to do so, subject to the following conditions:
|
1839
|
-
|
1840
|
-
The above copyright notice and this permission notice shall be included in all
|
1841
|
-
copies or substantial portions of the Software.
|
1842
|
-
|
1843
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
1844
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
1845
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
1846
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
1847
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
1848
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
1849
|
-
SOFTWARE.
|
1850
|
-
|
1851
|
-
|
1852
1852
|
================================================================================
|
1853
1853
|
arrow-format LICENSE
|
1854
1854
|
================================================================================
|
@@ -27103,6 +27103,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
|
27103
27103
|
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
27104
27104
|
DEALINGS IN THE SOFTWARE.
|
27105
27105
|
|
27106
|
+
================================================================================
|
27107
|
+
planus LICENSE-MIT
|
27108
|
+
================================================================================
|
27109
|
+
|
27110
|
+
MIT License
|
27111
|
+
|
27112
|
+
Copyright (c) 2021 The Planus Project Developers
|
27113
|
+
|
27114
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
27115
|
+
of this software and associated documentation files (the "Software"), to deal
|
27116
|
+
in the Software without restriction, including without limitation the rights
|
27117
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
27118
|
+
copies of the Software, and to permit persons to whom the Software is
|
27119
|
+
furnished to do so, subject to the following conditions:
|
27120
|
+
|
27121
|
+
The above copyright notice and this permission notice shall be included in all
|
27122
|
+
copies or substantial portions of the Software.
|
27123
|
+
|
27124
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
27125
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
27126
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
27127
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
27128
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
27129
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
27130
|
+
SOFTWARE.
|
27131
|
+
|
27132
|
+
|
27106
27133
|
================================================================================
|
27107
27134
|
planus LICENSE-APACHE
|
27108
27135
|
================================================================================
|
@@ -27285,33 +27312,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
27285
27312
|
END OF TERMS AND CONDITIONS
|
27286
27313
|
|
27287
27314
|
|
27288
|
-
================================================================================
|
27289
|
-
planus LICENSE-MIT
|
27290
|
-
================================================================================
|
27291
|
-
|
27292
|
-
MIT License
|
27293
|
-
|
27294
|
-
Copyright (c) 2021 The Planus Project Developers
|
27295
|
-
|
27296
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
27297
|
-
of this software and associated documentation files (the "Software"), to deal
|
27298
|
-
in the Software without restriction, including without limitation the rights
|
27299
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
27300
|
-
copies of the Software, and to permit persons to whom the Software is
|
27301
|
-
furnished to do so, subject to the following conditions:
|
27302
|
-
|
27303
|
-
The above copyright notice and this permission notice shall be included in all
|
27304
|
-
copies or substantial portions of the Software.
|
27305
|
-
|
27306
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
27307
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
27308
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
27309
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
27310
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
27311
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
27312
|
-
SOFTWARE.
|
27313
|
-
|
27314
|
-
|
27315
27315
|
================================================================================
|
27316
27316
|
polars LICENSE
|
27317
27317
|
================================================================================
|
@@ -30064,6 +30064,32 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
30064
30064
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
30065
30065
|
SOFTWARE.
|
30066
30066
|
|
30067
|
+
================================================================================
|
30068
|
+
rb-sys-env LICENSE-MIT
|
30069
|
+
================================================================================
|
30070
|
+
|
30071
|
+
The MIT License (MIT)
|
30072
|
+
|
30073
|
+
Copyright (c) 2021-2022 Ian Ker-Seymer
|
30074
|
+
|
30075
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
30076
|
+
of this software and associated documentation files (the "Software"), to deal
|
30077
|
+
in the Software without restriction, including without limitation the rights
|
30078
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
30079
|
+
copies of the Software, and to permit persons to whom the Software is
|
30080
|
+
furnished to do so, subject to the following conditions:
|
30081
|
+
|
30082
|
+
The above copyright notice and this permission notice shall be included in all
|
30083
|
+
copies or substantial portions of the Software.
|
30084
|
+
|
30085
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
30086
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
30087
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
30088
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
30089
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
30090
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
30091
|
+
SOFTWARE.
|
30092
|
+
|
30067
30093
|
================================================================================
|
30068
30094
|
rb-sys-env LICENSE-APACHE
|
30069
30095
|
================================================================================
|
@@ -30259,32 +30285,6 @@ rb-sys-env LICENSE-APACHE
|
|
30259
30285
|
See the License for the specific language governing permissions and
|
30260
30286
|
limitations under the License.
|
30261
30287
|
|
30262
|
-
================================================================================
|
30263
|
-
rb-sys-env LICENSE-MIT
|
30264
|
-
================================================================================
|
30265
|
-
|
30266
|
-
The MIT License (MIT)
|
30267
|
-
|
30268
|
-
Copyright (c) 2021-2022 Ian Ker-Seymer
|
30269
|
-
|
30270
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
30271
|
-
of this software and associated documentation files (the "Software"), to deal
|
30272
|
-
in the Software without restriction, including without limitation the rights
|
30273
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
30274
|
-
copies of the Software, and to permit persons to whom the Software is
|
30275
|
-
furnished to do so, subject to the following conditions:
|
30276
|
-
|
30277
|
-
The above copyright notice and this permission notice shall be included in all
|
30278
|
-
copies or substantial portions of the Software.
|
30279
|
-
|
30280
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
30281
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
30282
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
30283
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
30284
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
30285
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
30286
|
-
SOFTWARE.
|
30287
|
-
|
30288
30288
|
================================================================================
|
30289
30289
|
regex LICENSE-APACHE
|
30290
30290
|
================================================================================
|
data/README.md
CHANGED
@@ -41,6 +41,9 @@ From a CSV
|
|
41
41
|
|
42
42
|
```ruby
|
43
43
|
Polars.read_csv("file.csv")
|
44
|
+
|
45
|
+
# or lazily with
|
46
|
+
Polars.scan_csv("file.csv")
|
44
47
|
```
|
45
48
|
|
46
49
|
From Parquet
|
@@ -53,6 +56,8 @@ From Active Record
|
|
53
56
|
|
54
57
|
```ruby
|
55
58
|
Polars.read_sql(User.all)
|
59
|
+
# or
|
60
|
+
Polars.read_sql("SELECT * FROM users")
|
56
61
|
```
|
57
62
|
|
58
63
|
From a hash
|
@@ -135,9 +140,9 @@ df[Polars.col("a") <= 2]
|
|
135
140
|
And, or, and exclusive or
|
136
141
|
|
137
142
|
```ruby
|
138
|
-
df[(Polars.col("a") >
|
139
|
-
df[(Polars.col("a") >
|
140
|
-
df[(Polars.col("a") >
|
143
|
+
df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
|
144
|
+
df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
|
145
|
+
df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
|
141
146
|
```
|
142
147
|
|
143
148
|
## Operations
|
@@ -284,13 +289,13 @@ CSV
|
|
284
289
|
```ruby
|
285
290
|
df.to_csv
|
286
291
|
# or
|
287
|
-
df.write_csv("
|
292
|
+
df.write_csv("file.csv")
|
288
293
|
```
|
289
294
|
|
290
295
|
Parquet
|
291
296
|
|
292
297
|
```ruby
|
293
|
-
df.write_parquet("
|
298
|
+
df.write_parquet("file.parquet")
|
294
299
|
```
|
295
300
|
|
296
301
|
## Types
|
Binary file
|
Binary file
|
Binary file
|
data/lib/polars/data_frame.rb
CHANGED
@@ -277,6 +277,7 @@ module Polars
|
|
277
277
|
_df.height
|
278
278
|
end
|
279
279
|
alias_method :count, :height
|
280
|
+
alias_method :length, :height
|
280
281
|
|
281
282
|
# Get the width of the DataFrame.
|
282
283
|
#
|
@@ -541,7 +542,7 @@ module Polars
|
|
541
542
|
|
542
543
|
if col_selection.is_a?(Array)
|
543
544
|
# df[.., [1, 2]]
|
544
|
-
if is_int_sequence(col_selection)
|
545
|
+
if Utils.is_int_sequence(col_selection)
|
545
546
|
series_list = col_selection.map { |i| to_series(i) }
|
546
547
|
df = self.class.new(series_list)
|
547
548
|
return df[row_selection]
|
@@ -574,6 +575,23 @@ module Polars
|
|
574
575
|
# df[["foo", "bar"]]
|
575
576
|
return _from_rbdf(_df.select(item))
|
576
577
|
end
|
578
|
+
|
579
|
+
if Utils.is_int_sequence(item)
|
580
|
+
item = Series.new("", item)
|
581
|
+
end
|
582
|
+
|
583
|
+
if item.is_a?(Series)
|
584
|
+
dtype = item.dtype
|
585
|
+
if dtype == Utf8
|
586
|
+
return _from_rbdf(_df.select(item))
|
587
|
+
elsif dtype == UInt32
|
588
|
+
return _from_rbdf(_df.take_with_series(item._s))
|
589
|
+
elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
|
590
|
+
return _from_rbdf(
|
591
|
+
_df.take_with_series(_pos_idxs(item, 0)._s)
|
592
|
+
)
|
593
|
+
end
|
594
|
+
end
|
577
595
|
end
|
578
596
|
|
579
597
|
# Ruby-specific
|
@@ -4662,8 +4680,53 @@ module Polars
|
|
4662
4680
|
end
|
4663
4681
|
end
|
4664
4682
|
|
4665
|
-
|
4666
|
-
|
4683
|
+
def _pos_idxs(idxs, dim)
|
4684
|
+
idx_type = Polars._get_idx_type
|
4685
|
+
|
4686
|
+
if idxs.is_a?(Series)
|
4687
|
+
if idxs.dtype == idx_type
|
4688
|
+
return idxs
|
4689
|
+
end
|
4690
|
+
if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
4691
|
+
if idx_type == UInt32
|
4692
|
+
if [Int64, UInt64].include?(idxs.dtype)
|
4693
|
+
if idxs.max >= 2**32
|
4694
|
+
raise ArgumentError, "Index positions should be smaller than 2^32."
|
4695
|
+
end
|
4696
|
+
end
|
4697
|
+
if idxs.dtype == Int64
|
4698
|
+
if idxs.min < -(2**32)
|
4699
|
+
raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
|
4700
|
+
end
|
4701
|
+
end
|
4702
|
+
end
|
4703
|
+
if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
4704
|
+
if idxs.min < 0
|
4705
|
+
if idx_type == UInt32
|
4706
|
+
if [Int8, Int16].include?(idxs.dtype)
|
4707
|
+
idxs = idxs.cast(Int32)
|
4708
|
+
end
|
4709
|
+
else
|
4710
|
+
if [Int8, Int16, Int32].include?(idxs.dtype)
|
4711
|
+
idxs = idxs.cast(Int64)
|
4712
|
+
end
|
4713
|
+
end
|
4714
|
+
|
4715
|
+
idxs =
|
4716
|
+
Polars.select(
|
4717
|
+
Polars.when(Polars.lit(idxs) < 0)
|
4718
|
+
.then(shape[dim] + Polars.lit(idxs))
|
4719
|
+
.otherwise(Polars.lit(idxs))
|
4720
|
+
).to_series
|
4721
|
+
end
|
4722
|
+
end
|
4723
|
+
|
4724
|
+
return idxs.cast(idx_type)
|
4725
|
+
end
|
4726
|
+
end
|
4727
|
+
|
4728
|
+
raise ArgumentError, "Unsupported idxs datatype."
|
4729
|
+
end
|
4667
4730
|
|
4668
4731
|
# @private
|
4669
4732
|
def self.hash_to_rbdf(data, columns: nil)
|
@@ -4683,7 +4746,14 @@ module Polars
|
|
4683
4746
|
end
|
4684
4747
|
|
4685
4748
|
# @private
|
4686
|
-
def self.
|
4749
|
+
def self.include_unknowns(schema, cols)
|
4750
|
+
cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
|
4751
|
+
end
|
4752
|
+
|
4753
|
+
# @private
|
4754
|
+
def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
|
4755
|
+
raise Todo if schema_overrides
|
4756
|
+
|
4687
4757
|
if columns.is_a?(Hash)
|
4688
4758
|
columns = columns.to_a
|
4689
4759
|
end
|
@@ -4727,8 +4797,48 @@ module Polars
|
|
4727
4797
|
end
|
4728
4798
|
end
|
4729
4799
|
|
4800
|
+
def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
|
4801
|
+
rbdf_columns = rbdf.columns
|
4802
|
+
rbdf_dtypes = rbdf.dtypes
|
4803
|
+
columns, dtypes = _unpack_columns(
|
4804
|
+
(columns || rbdf_columns), schema_overrides: schema_overrides
|
4805
|
+
)
|
4806
|
+
column_subset = []
|
4807
|
+
if columns != rbdf_columns
|
4808
|
+
if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
|
4809
|
+
column_subset = columns
|
4810
|
+
else
|
4811
|
+
rbdf.set_column_names(columns)
|
4812
|
+
end
|
4813
|
+
end
|
4814
|
+
|
4815
|
+
column_casts = []
|
4816
|
+
columns.each do |col, i|
|
4817
|
+
if dtypes[col] == Categorical # != rbdf_dtypes[i]
|
4818
|
+
column_casts << Polars.col(col).cast(Categorical)._rbexpr
|
4819
|
+
elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
|
4820
|
+
column_casts << Polars.col(col).cast(structs[col])._rbexpr
|
4821
|
+
elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
|
4822
|
+
column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
|
4823
|
+
end
|
4824
|
+
end
|
4825
|
+
|
4826
|
+
if column_casts.any? || column_subset.any?
|
4827
|
+
rbdf = rbdf.lazy
|
4828
|
+
if column_casts.any?
|
4829
|
+
rbdf = rbdf.with_columns(column_casts)
|
4830
|
+
end
|
4831
|
+
if column_subset.any?
|
4832
|
+
rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
|
4833
|
+
end
|
4834
|
+
rbdf = rbdf.collect
|
4835
|
+
end
|
4836
|
+
|
4837
|
+
rbdf
|
4838
|
+
end
|
4839
|
+
|
4730
4840
|
# @private
|
4731
|
-
def self.sequence_to_rbdf(data, columns: nil, orient: nil)
|
4841
|
+
def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
|
4732
4842
|
if data.length == 0
|
4733
4843
|
return hash_to_rbdf({}, columns: columns)
|
4734
4844
|
end
|
@@ -4740,6 +4850,14 @@ module Polars
|
|
4740
4850
|
data.each do |s|
|
4741
4851
|
data_series << s._s
|
4742
4852
|
end
|
4853
|
+
elsif data[0].is_a?(Hash)
|
4854
|
+
column_names, dtypes = _unpack_columns(columns)
|
4855
|
+
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
4856
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
|
4857
|
+
if column_names
|
4858
|
+
rbdf = _post_apply_columns(rbdf, column_names)
|
4859
|
+
end
|
4860
|
+
return rbdf
|
4743
4861
|
elsif data[0].is_a?(Array)
|
4744
4862
|
if orient.nil? && !columns.nil?
|
4745
4863
|
orient = columns.length == data.length ? "col" : "row"
|
data/lib/polars/data_types.rb
CHANGED
@@ -3,44 +3,64 @@ module Polars
|
|
3
3
|
class DataType
|
4
4
|
end
|
5
5
|
|
6
|
+
# Base class for numeric data types.
|
7
|
+
class NumericType < DataType
|
8
|
+
end
|
9
|
+
|
10
|
+
# Base class for integral data types.
|
11
|
+
class IntegralType < NumericType
|
12
|
+
end
|
13
|
+
|
14
|
+
# Base class for fractional data types.
|
15
|
+
class FractionalType < NumericType
|
16
|
+
end
|
17
|
+
|
18
|
+
# Base class for temporal data types.
|
19
|
+
class TemporalType < DataType
|
20
|
+
end
|
21
|
+
|
22
|
+
# Base class for nested data types.
|
23
|
+
class NestedType < DataType
|
24
|
+
end
|
25
|
+
|
6
26
|
# 8-bit signed integer type.
|
7
|
-
class Int8 <
|
27
|
+
class Int8 < IntegralType
|
8
28
|
end
|
9
29
|
|
10
30
|
# 16-bit signed integer type.
|
11
|
-
class Int16 <
|
31
|
+
class Int16 < IntegralType
|
12
32
|
end
|
13
33
|
|
14
34
|
# 32-bit signed integer type.
|
15
|
-
class Int32 <
|
35
|
+
class Int32 < IntegralType
|
16
36
|
end
|
17
37
|
|
18
38
|
# 64-bit signed integer type.
|
19
|
-
class Int64 <
|
39
|
+
class Int64 < IntegralType
|
20
40
|
end
|
21
41
|
|
22
42
|
# 8-bit unsigned integer type.
|
23
|
-
class UInt8 <
|
43
|
+
class UInt8 < IntegralType
|
24
44
|
end
|
25
45
|
|
26
46
|
# 16-bit unsigned integer type.
|
27
|
-
class UInt16 <
|
47
|
+
class UInt16 < IntegralType
|
28
48
|
end
|
29
49
|
|
30
50
|
# 32-bit unsigned integer type.
|
31
|
-
class UInt32 <
|
51
|
+
class UInt32 < IntegralType
|
32
52
|
end
|
33
53
|
|
34
54
|
# 64-bit unsigned integer type.
|
35
|
-
class UInt64 <
|
55
|
+
class UInt64 < IntegralType
|
36
56
|
end
|
37
57
|
|
38
58
|
# 32-bit floating point type.
|
39
|
-
class Float32 <
|
59
|
+
class Float32 < FractionalType
|
40
60
|
end
|
41
61
|
|
42
62
|
# 64-bit floating point type.
|
43
|
-
class Float64 <
|
63
|
+
class Float64 < FractionalType
|
44
64
|
end
|
45
65
|
|
46
66
|
# Boolean type.
|
@@ -51,31 +71,19 @@ module Polars
|
|
51
71
|
class Utf8 < DataType
|
52
72
|
end
|
53
73
|
|
54
|
-
# Binary type.
|
55
|
-
class Binary < DataType
|
56
|
-
end
|
57
|
-
|
58
|
-
# Type representing Null / None values.
|
59
|
-
class Null < DataType
|
60
|
-
end
|
61
|
-
|
62
|
-
# Type representing Datatype values that could not be determined statically.
|
63
|
-
class Unknown < DataType
|
64
|
-
end
|
65
|
-
|
66
74
|
# Nested list/array type.
|
67
|
-
class List <
|
75
|
+
class List < NestedType
|
68
76
|
def initialize(inner)
|
69
77
|
@inner = Utils.rb_type_to_dtype(inner)
|
70
78
|
end
|
71
79
|
end
|
72
80
|
|
73
81
|
# Calendar date type.
|
74
|
-
class Date <
|
82
|
+
class Date < TemporalType
|
75
83
|
end
|
76
84
|
|
77
85
|
# Calendar date and time type.
|
78
|
-
class Datetime <
|
86
|
+
class Datetime < TemporalType
|
79
87
|
def initialize(time_unit = "us", time_zone = nil)
|
80
88
|
@tu = time_unit || "us"
|
81
89
|
@time_zone = time_zone
|
@@ -83,14 +91,14 @@ module Polars
|
|
83
91
|
end
|
84
92
|
|
85
93
|
# Time duration/delta type.
|
86
|
-
class Duration <
|
94
|
+
class Duration < TemporalType
|
87
95
|
def initialize(time_unit = "us")
|
88
96
|
@tu = time_unit
|
89
97
|
end
|
90
98
|
end
|
91
99
|
|
92
100
|
# Time of day type.
|
93
|
-
class Time <
|
101
|
+
class Time < TemporalType
|
94
102
|
end
|
95
103
|
|
96
104
|
# Type for wrapping arbitrary Ruby objects.
|
@@ -102,15 +110,24 @@ module Polars
|
|
102
110
|
end
|
103
111
|
|
104
112
|
# Definition of a single field within a `Struct` DataType.
|
105
|
-
class Field
|
113
|
+
class Field
|
114
|
+
attr_reader :name, :dtype
|
115
|
+
|
106
116
|
def initialize(name, dtype)
|
107
117
|
@name = name
|
108
118
|
@dtype = Utils.rb_type_to_dtype(dtype)
|
109
119
|
end
|
120
|
+
|
121
|
+
def inspect
|
122
|
+
class_name = self.class.name
|
123
|
+
"#{class_name}(#{@name}: #{@dtype})"
|
124
|
+
end
|
110
125
|
end
|
111
126
|
|
112
127
|
# Struct composite type.
|
113
|
-
class Struct <
|
128
|
+
class Struct < NestedType
|
129
|
+
attr_reader :fields
|
130
|
+
|
114
131
|
def initialize(fields)
|
115
132
|
if fields.is_a?(Hash)
|
116
133
|
@fields = fields.map { |n, d| Field.new(n, d) }
|
@@ -118,5 +135,26 @@ module Polars
|
|
118
135
|
@fields = fields
|
119
136
|
end
|
120
137
|
end
|
138
|
+
|
139
|
+
def inspect
|
140
|
+
class_name = self.class.name
|
141
|
+
"#{class_name}(#{@fields})"
|
142
|
+
end
|
143
|
+
|
144
|
+
def to_schema
|
145
|
+
@fields.to_h { |f| [f.name, f.dtype] }
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Binary type.
|
150
|
+
class Binary < DataType
|
151
|
+
end
|
152
|
+
|
153
|
+
# Type representing Null / None values.
|
154
|
+
class Null < DataType
|
155
|
+
end
|
156
|
+
|
157
|
+
# Type representing Datatype values that could not be determined statically.
|
158
|
+
class Unknown < DataType
|
121
159
|
end
|
122
160
|
end
|
data/lib/polars/io.rb
CHANGED
@@ -606,8 +606,10 @@ module Polars
|
|
606
606
|
sql
|
607
607
|
elsif sql.is_a?(ActiveRecord::Relation)
|
608
608
|
sql.connection.select_all(sql.to_sql)
|
609
|
+
elsif sql.is_a?(String)
|
610
|
+
ActiveRecord::Base.connection.select_all(sql)
|
609
611
|
else
|
610
|
-
raise ArgumentError, "Expected ActiveRecord::Relation
|
612
|
+
raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
|
611
613
|
end
|
612
614
|
data = {}
|
613
615
|
result.columns.each_with_index do |k, i|
|
data/lib/polars/series.rb
CHANGED
@@ -263,6 +263,10 @@ module Polars
|
|
263
263
|
#
|
264
264
|
# @return [Object]
|
265
265
|
def [](item)
|
266
|
+
if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
|
267
|
+
return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
|
268
|
+
end
|
269
|
+
|
266
270
|
if item.is_a?(Integer)
|
267
271
|
return _s.get_idx(item)
|
268
272
|
end
|
@@ -271,6 +275,10 @@ module Polars
|
|
271
275
|
return Slice.new(self).apply(item)
|
272
276
|
end
|
273
277
|
|
278
|
+
if Utils.is_int_sequence(item)
|
279
|
+
return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
|
280
|
+
end
|
281
|
+
|
274
282
|
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
275
283
|
end
|
276
284
|
|
@@ -287,24 +295,23 @@ module Polars
|
|
287
295
|
end
|
288
296
|
|
289
297
|
if key.is_a?(Series)
|
290
|
-
if key.dtype ==
|
298
|
+
if key.dtype == Boolean
|
291
299
|
self._s = set(key, value)._s
|
292
|
-
elsif key.dtype ==
|
293
|
-
self._s = set_at_idx(key.cast(
|
294
|
-
elsif key.dtype ==
|
300
|
+
elsif key.dtype == UInt64
|
301
|
+
self._s = set_at_idx(key.cast(UInt32), value)._s
|
302
|
+
elsif key.dtype == UInt32
|
295
303
|
self._s = set_at_idx(key, value)._s
|
296
304
|
else
|
297
305
|
raise Todo
|
298
306
|
end
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
307
|
+
elsif key.is_a?(Array)
|
308
|
+
s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
|
309
|
+
self[s] = value
|
310
|
+
elsif key.is_a?(Range)
|
311
|
+
s = Series.new("", key, dtype: UInt32)
|
303
312
|
self[s] = value
|
304
313
|
elsif key.is_a?(Integer)
|
305
|
-
|
306
|
-
# self[[key]] = value
|
307
|
-
set_at_idx(key, value)
|
314
|
+
self[[key]] = value
|
308
315
|
else
|
309
316
|
raise ArgumentError, "cannot use #{key} for indexing"
|
310
317
|
end
|
@@ -3527,6 +3534,59 @@ module Polars
|
|
3527
3534
|
end
|
3528
3535
|
end
|
3529
3536
|
|
3537
|
+
def _pos_idxs(idxs)
|
3538
|
+
idx_type = Polars._get_idx_type
|
3539
|
+
|
3540
|
+
if idxs.is_a?(Series)
|
3541
|
+
if idxs.dtype == idx_type
|
3542
|
+
return idxs
|
3543
|
+
end
|
3544
|
+
if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
3545
|
+
if idx_type == UInt32
|
3546
|
+
if [Int64, UInt64].include?(idxs.dtype)
|
3547
|
+
if idxs.max >= 2**32
|
3548
|
+
raise ArgumentError, "Index positions should be smaller than 2^32."
|
3549
|
+
end
|
3550
|
+
end
|
3551
|
+
if idxs.dtype == Int64
|
3552
|
+
if idxs.min < -(2**32)
|
3553
|
+
raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
|
3554
|
+
end
|
3555
|
+
end
|
3556
|
+
end
|
3557
|
+
if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
3558
|
+
if idxs.min < 0
|
3559
|
+
if idx_type == UInt32
|
3560
|
+
if [Int8, Int16].include?(idxs.dtype)
|
3561
|
+
idxs = idxs.cast(Int32)
|
3562
|
+
end
|
3563
|
+
else
|
3564
|
+
if [Int8, Int16, Int32].include?(idxs.dtype)
|
3565
|
+
idxs = idxs.cast(Int64)
|
3566
|
+
end
|
3567
|
+
end
|
3568
|
+
|
3569
|
+
# Update negative indexes to absolute indexes.
|
3570
|
+
return (
|
3571
|
+
idxs.to_frame
|
3572
|
+
.select(
|
3573
|
+
Polars.when(Polars.col(idxs.name) < 0)
|
3574
|
+
.then(len + Polars.col(idxs.name))
|
3575
|
+
.otherwise(Polars.col(idxs.name))
|
3576
|
+
.cast(idx_type)
|
3577
|
+
)
|
3578
|
+
.to_series(0)
|
3579
|
+
)
|
3580
|
+
end
|
3581
|
+
end
|
3582
|
+
|
3583
|
+
return idxs.cast(idx_type)
|
3584
|
+
end
|
3585
|
+
end
|
3586
|
+
|
3587
|
+
raise ArgumentError, "Unsupported idxs datatype."
|
3588
|
+
end
|
3589
|
+
|
3530
3590
|
def _comp(other, op)
|
3531
3591
|
if other.is_a?(Series)
|
3532
3592
|
return Utils.wrap_s(_s.send(op, other._s))
|
@@ -3607,6 +3667,11 @@ module Polars
|
|
3607
3667
|
rb_temporal_types << ::Time if defined?(::Time)
|
3608
3668
|
|
3609
3669
|
value = _get_first_non_none(values)
|
3670
|
+
if !value.nil?
|
3671
|
+
if value.is_a?(Hash)
|
3672
|
+
return DataFrame.new(values).to_struct(name)._s
|
3673
|
+
end
|
3674
|
+
end
|
3610
3675
|
|
3611
3676
|
if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
|
3612
3677
|
constructor = polars_type_to_constructor(dtype)
|
data/lib/polars/slice.rb
CHANGED
@@ -56,7 +56,7 @@ module Polars
|
|
56
56
|
# Normalize slice bounds, identify unbounded and/or zero-length slices.
|
57
57
|
def _slice_setup(s)
|
58
58
|
# can normalize slice indices as we know object size
|
59
|
-
obj_len = @obj.
|
59
|
+
obj_len = @obj.length
|
60
60
|
start = if s.begin
|
61
61
|
if s.begin < 0
|
62
62
|
[s.begin + obj_len, 0].max
|
data/lib/polars/utils.rb
CHANGED
@@ -160,11 +160,11 @@ module Polars
|
|
160
160
|
|
161
161
|
def self.scale_bytes(sz, to:)
|
162
162
|
scaling_factor = {
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
163
|
+
"b" => 1,
|
164
|
+
"k" => 1024,
|
165
|
+
"m" => 1024 ** 2,
|
166
|
+
"g" => 1024 ** 3,
|
167
|
+
"t" => 1024 ** 4
|
168
168
|
}[to[0]]
|
169
169
|
if scaling_factor > 1
|
170
170
|
sz / scaling_factor.to_f
|
@@ -181,6 +181,26 @@ module Polars
|
|
181
181
|
val.all? { |x| x.is_a?(eltype) }
|
182
182
|
end
|
183
183
|
|
184
|
+
def self.is_bool_sequence(val)
|
185
|
+
val.is_a?(Array) && val.all? { |x| x == true || x == false }
|
186
|
+
end
|
187
|
+
|
188
|
+
def self.is_dtype_sequence(val)
|
189
|
+
val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
|
190
|
+
end
|
191
|
+
|
192
|
+
def self.is_int_sequence(val)
|
193
|
+
val.is_a?(Array) && _is_iterable_of(val, Integer)
|
194
|
+
end
|
195
|
+
|
196
|
+
def self.is_expr_sequence(val)
|
197
|
+
val.is_a?(Array) && _is_iterable_of(val, Expr)
|
198
|
+
end
|
199
|
+
|
200
|
+
def self.is_rbexpr_sequence(val)
|
201
|
+
val.is_a?(Array) && _is_iterable_of(val, RbExpr)
|
202
|
+
end
|
203
|
+
|
184
204
|
def self.is_str_sequence(val, allow_str: false)
|
185
205
|
if allow_str == false && val.is_a?(String)
|
186
206
|
false
|
data/lib/polars/version.rb
CHANGED
data/lib/polars-df.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
require_relative "polars"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
version: '0'
|
84
84
|
requirements: []
|
85
|
-
rubygems_version: 3.4.
|
85
|
+
rubygems_version: 3.4.4
|
86
86
|
signing_key:
|
87
87
|
specification_version: 4
|
88
88
|
summary: Blazingly fast DataFrames for Ruby
|