polars-df 0.2.1-x86_64-darwin → 0.2.3-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +1 -1
- data/LICENSE-THIRD-PARTY.txt +80 -80
- data/README.md +10 -5
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/data_frame.rb +123 -5
- data/lib/polars/data_types.rb +67 -29
- data/lib/polars/io.rb +3 -1
- data/lib/polars/series.rb +76 -11
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/utils.rb +25 -5
- data/lib/polars/version.rb +1 -1
- data/lib/polars-df.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a67aba6106276247072fe14bcd35b3381e890844ea50321c443cbcc815b84ca3
|
4
|
+
data.tar.gz: c5c3388ed010e8946903b7d8ed30fc24ddbf2a34b9eba23d1c178b476faebc7a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1343d81460555ed4baa13962ff7316814b64e3c7c6c363c6d3d7b01221bd83abd9ad62b9bd8dffb4bd7dbb30716bae5d5fc20d7830e157631767ac186638bd24
|
7
|
+
data.tar.gz: f02495aea654767675444e57ea764cb44e5e34123049cdf541c3b2b47df9d7ddc1dd9bb0499ddda6ee360e3983b0184f35bc6e5fcc28f93e49af3476960770eb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## 0.2.3 (2023-01-22)
|
2
|
+
|
3
|
+
- Fixed error with precompiled gem on Mac ARM
|
4
|
+
- Fixed issue with structs
|
5
|
+
|
6
|
+
## 0.2.2 (2023-01-20)
|
7
|
+
|
8
|
+
- Added support for strings to `read_sql` method
|
9
|
+
- Improved indexing
|
10
|
+
- Fixed error with precompiled gem on Mac ARM
|
11
|
+
|
1
12
|
## 0.2.1 (2023-01-18)
|
2
13
|
|
3
14
|
- Added `read_sql` method
|
data/Cargo.lock
CHANGED
data/LICENSE-THIRD-PARTY.txt
CHANGED
@@ -1640,6 +1640,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
|
1640
1640
|
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
1641
1641
|
DEALINGS IN THE SOFTWARE.
|
1642
1642
|
|
1643
|
+
================================================================================
|
1644
|
+
array-init-cursor LICENSE-MIT
|
1645
|
+
================================================================================
|
1646
|
+
|
1647
|
+
MIT License
|
1648
|
+
|
1649
|
+
Copyright (c) 2021 The Planus Project Developers
|
1650
|
+
|
1651
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
1652
|
+
of this software and associated documentation files (the "Software"), to deal
|
1653
|
+
in the Software without restriction, including without limitation the rights
|
1654
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
1655
|
+
copies of the Software, and to permit persons to whom the Software is
|
1656
|
+
furnished to do so, subject to the following conditions:
|
1657
|
+
|
1658
|
+
The above copyright notice and this permission notice shall be included in all
|
1659
|
+
copies or substantial portions of the Software.
|
1660
|
+
|
1661
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
1662
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
1663
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
1664
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
1665
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
1666
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
1667
|
+
SOFTWARE.
|
1668
|
+
|
1669
|
+
|
1643
1670
|
================================================================================
|
1644
1671
|
array-init-cursor LICENSE-APACHE
|
1645
1672
|
================================================================================
|
@@ -1822,33 +1849,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
1822
1849
|
END OF TERMS AND CONDITIONS
|
1823
1850
|
|
1824
1851
|
|
1825
|
-
================================================================================
|
1826
|
-
array-init-cursor LICENSE-MIT
|
1827
|
-
================================================================================
|
1828
|
-
|
1829
|
-
MIT License
|
1830
|
-
|
1831
|
-
Copyright (c) 2021 The Planus Project Developers
|
1832
|
-
|
1833
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
1834
|
-
of this software and associated documentation files (the "Software"), to deal
|
1835
|
-
in the Software without restriction, including without limitation the rights
|
1836
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
1837
|
-
copies of the Software, and to permit persons to whom the Software is
|
1838
|
-
furnished to do so, subject to the following conditions:
|
1839
|
-
|
1840
|
-
The above copyright notice and this permission notice shall be included in all
|
1841
|
-
copies or substantial portions of the Software.
|
1842
|
-
|
1843
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
1844
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
1845
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
1846
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
1847
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
1848
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
1849
|
-
SOFTWARE.
|
1850
|
-
|
1851
|
-
|
1852
1852
|
================================================================================
|
1853
1853
|
arrow-format LICENSE
|
1854
1854
|
================================================================================
|
@@ -27103,6 +27103,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
|
27103
27103
|
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
27104
27104
|
DEALINGS IN THE SOFTWARE.
|
27105
27105
|
|
27106
|
+
================================================================================
|
27107
|
+
planus LICENSE-MIT
|
27108
|
+
================================================================================
|
27109
|
+
|
27110
|
+
MIT License
|
27111
|
+
|
27112
|
+
Copyright (c) 2021 The Planus Project Developers
|
27113
|
+
|
27114
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
27115
|
+
of this software and associated documentation files (the "Software"), to deal
|
27116
|
+
in the Software without restriction, including without limitation the rights
|
27117
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
27118
|
+
copies of the Software, and to permit persons to whom the Software is
|
27119
|
+
furnished to do so, subject to the following conditions:
|
27120
|
+
|
27121
|
+
The above copyright notice and this permission notice shall be included in all
|
27122
|
+
copies or substantial portions of the Software.
|
27123
|
+
|
27124
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
27125
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
27126
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
27127
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
27128
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
27129
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
27130
|
+
SOFTWARE.
|
27131
|
+
|
27132
|
+
|
27106
27133
|
================================================================================
|
27107
27134
|
planus LICENSE-APACHE
|
27108
27135
|
================================================================================
|
@@ -27285,33 +27312,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
27285
27312
|
END OF TERMS AND CONDITIONS
|
27286
27313
|
|
27287
27314
|
|
27288
|
-
================================================================================
|
27289
|
-
planus LICENSE-MIT
|
27290
|
-
================================================================================
|
27291
|
-
|
27292
|
-
MIT License
|
27293
|
-
|
27294
|
-
Copyright (c) 2021 The Planus Project Developers
|
27295
|
-
|
27296
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
27297
|
-
of this software and associated documentation files (the "Software"), to deal
|
27298
|
-
in the Software without restriction, including without limitation the rights
|
27299
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
27300
|
-
copies of the Software, and to permit persons to whom the Software is
|
27301
|
-
furnished to do so, subject to the following conditions:
|
27302
|
-
|
27303
|
-
The above copyright notice and this permission notice shall be included in all
|
27304
|
-
copies or substantial portions of the Software.
|
27305
|
-
|
27306
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
27307
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
27308
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
27309
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
27310
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
27311
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
27312
|
-
SOFTWARE.
|
27313
|
-
|
27314
|
-
|
27315
27315
|
================================================================================
|
27316
27316
|
polars LICENSE
|
27317
27317
|
================================================================================
|
@@ -30064,6 +30064,32 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
30064
30064
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
30065
30065
|
SOFTWARE.
|
30066
30066
|
|
30067
|
+
================================================================================
|
30068
|
+
rb-sys-env LICENSE-MIT
|
30069
|
+
================================================================================
|
30070
|
+
|
30071
|
+
The MIT License (MIT)
|
30072
|
+
|
30073
|
+
Copyright (c) 2021-2022 Ian Ker-Seymer
|
30074
|
+
|
30075
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
30076
|
+
of this software and associated documentation files (the "Software"), to deal
|
30077
|
+
in the Software without restriction, including without limitation the rights
|
30078
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
30079
|
+
copies of the Software, and to permit persons to whom the Software is
|
30080
|
+
furnished to do so, subject to the following conditions:
|
30081
|
+
|
30082
|
+
The above copyright notice and this permission notice shall be included in all
|
30083
|
+
copies or substantial portions of the Software.
|
30084
|
+
|
30085
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
30086
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
30087
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
30088
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
30089
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
30090
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
30091
|
+
SOFTWARE.
|
30092
|
+
|
30067
30093
|
================================================================================
|
30068
30094
|
rb-sys-env LICENSE-APACHE
|
30069
30095
|
================================================================================
|
@@ -30259,32 +30285,6 @@ rb-sys-env LICENSE-APACHE
|
|
30259
30285
|
See the License for the specific language governing permissions and
|
30260
30286
|
limitations under the License.
|
30261
30287
|
|
30262
|
-
================================================================================
|
30263
|
-
rb-sys-env LICENSE-MIT
|
30264
|
-
================================================================================
|
30265
|
-
|
30266
|
-
The MIT License (MIT)
|
30267
|
-
|
30268
|
-
Copyright (c) 2021-2022 Ian Ker-Seymer
|
30269
|
-
|
30270
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
30271
|
-
of this software and associated documentation files (the "Software"), to deal
|
30272
|
-
in the Software without restriction, including without limitation the rights
|
30273
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
30274
|
-
copies of the Software, and to permit persons to whom the Software is
|
30275
|
-
furnished to do so, subject to the following conditions:
|
30276
|
-
|
30277
|
-
The above copyright notice and this permission notice shall be included in all
|
30278
|
-
copies or substantial portions of the Software.
|
30279
|
-
|
30280
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
30281
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
30282
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
30283
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
30284
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
30285
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
30286
|
-
SOFTWARE.
|
30287
|
-
|
30288
30288
|
================================================================================
|
30289
30289
|
regex LICENSE-APACHE
|
30290
30290
|
================================================================================
|
data/README.md
CHANGED
@@ -41,6 +41,9 @@ From a CSV
|
|
41
41
|
|
42
42
|
```ruby
|
43
43
|
Polars.read_csv("file.csv")
|
44
|
+
|
45
|
+
# or lazily with
|
46
|
+
Polars.scan_csv("file.csv")
|
44
47
|
```
|
45
48
|
|
46
49
|
From Parquet
|
@@ -53,6 +56,8 @@ From Active Record
|
|
53
56
|
|
54
57
|
```ruby
|
55
58
|
Polars.read_sql(User.all)
|
59
|
+
# or
|
60
|
+
Polars.read_sql("SELECT * FROM users")
|
56
61
|
```
|
57
62
|
|
58
63
|
From a hash
|
@@ -135,9 +140,9 @@ df[Polars.col("a") <= 2]
|
|
135
140
|
And, or, and exclusive or
|
136
141
|
|
137
142
|
```ruby
|
138
|
-
df[(Polars.col("a") >
|
139
|
-
df[(Polars.col("a") >
|
140
|
-
df[(Polars.col("a") >
|
143
|
+
df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
|
144
|
+
df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
|
145
|
+
df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
|
141
146
|
```
|
142
147
|
|
143
148
|
## Operations
|
@@ -284,13 +289,13 @@ CSV
|
|
284
289
|
```ruby
|
285
290
|
df.to_csv
|
286
291
|
# or
|
287
|
-
df.write_csv("
|
292
|
+
df.write_csv("file.csv")
|
288
293
|
```
|
289
294
|
|
290
295
|
Parquet
|
291
296
|
|
292
297
|
```ruby
|
293
|
-
df.write_parquet("
|
298
|
+
df.write_parquet("file.parquet")
|
294
299
|
```
|
295
300
|
|
296
301
|
## Types
|
Binary file
|
Binary file
|
Binary file
|
data/lib/polars/data_frame.rb
CHANGED
@@ -277,6 +277,7 @@ module Polars
|
|
277
277
|
_df.height
|
278
278
|
end
|
279
279
|
alias_method :count, :height
|
280
|
+
alias_method :length, :height
|
280
281
|
|
281
282
|
# Get the width of the DataFrame.
|
282
283
|
#
|
@@ -541,7 +542,7 @@ module Polars
|
|
541
542
|
|
542
543
|
if col_selection.is_a?(Array)
|
543
544
|
# df[.., [1, 2]]
|
544
|
-
if is_int_sequence(col_selection)
|
545
|
+
if Utils.is_int_sequence(col_selection)
|
545
546
|
series_list = col_selection.map { |i| to_series(i) }
|
546
547
|
df = self.class.new(series_list)
|
547
548
|
return df[row_selection]
|
@@ -574,6 +575,23 @@ module Polars
|
|
574
575
|
# df[["foo", "bar"]]
|
575
576
|
return _from_rbdf(_df.select(item))
|
576
577
|
end
|
578
|
+
|
579
|
+
if Utils.is_int_sequence(item)
|
580
|
+
item = Series.new("", item)
|
581
|
+
end
|
582
|
+
|
583
|
+
if item.is_a?(Series)
|
584
|
+
dtype = item.dtype
|
585
|
+
if dtype == Utf8
|
586
|
+
return _from_rbdf(_df.select(item))
|
587
|
+
elsif dtype == UInt32
|
588
|
+
return _from_rbdf(_df.take_with_series(item._s))
|
589
|
+
elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
|
590
|
+
return _from_rbdf(
|
591
|
+
_df.take_with_series(_pos_idxs(item, 0)._s)
|
592
|
+
)
|
593
|
+
end
|
594
|
+
end
|
577
595
|
end
|
578
596
|
|
579
597
|
# Ruby-specific
|
@@ -4662,8 +4680,53 @@ module Polars
|
|
4662
4680
|
end
|
4663
4681
|
end
|
4664
4682
|
|
4665
|
-
|
4666
|
-
|
4683
|
+
def _pos_idxs(idxs, dim)
|
4684
|
+
idx_type = Polars._get_idx_type
|
4685
|
+
|
4686
|
+
if idxs.is_a?(Series)
|
4687
|
+
if idxs.dtype == idx_type
|
4688
|
+
return idxs
|
4689
|
+
end
|
4690
|
+
if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
4691
|
+
if idx_type == UInt32
|
4692
|
+
if [Int64, UInt64].include?(idxs.dtype)
|
4693
|
+
if idxs.max >= 2**32
|
4694
|
+
raise ArgumentError, "Index positions should be smaller than 2^32."
|
4695
|
+
end
|
4696
|
+
end
|
4697
|
+
if idxs.dtype == Int64
|
4698
|
+
if idxs.min < -(2**32)
|
4699
|
+
raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
|
4700
|
+
end
|
4701
|
+
end
|
4702
|
+
end
|
4703
|
+
if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
4704
|
+
if idxs.min < 0
|
4705
|
+
if idx_type == UInt32
|
4706
|
+
if [Int8, Int16].include?(idxs.dtype)
|
4707
|
+
idxs = idxs.cast(Int32)
|
4708
|
+
end
|
4709
|
+
else
|
4710
|
+
if [Int8, Int16, Int32].include?(idxs.dtype)
|
4711
|
+
idxs = idxs.cast(Int64)
|
4712
|
+
end
|
4713
|
+
end
|
4714
|
+
|
4715
|
+
idxs =
|
4716
|
+
Polars.select(
|
4717
|
+
Polars.when(Polars.lit(idxs) < 0)
|
4718
|
+
.then(shape[dim] + Polars.lit(idxs))
|
4719
|
+
.otherwise(Polars.lit(idxs))
|
4720
|
+
).to_series
|
4721
|
+
end
|
4722
|
+
end
|
4723
|
+
|
4724
|
+
return idxs.cast(idx_type)
|
4725
|
+
end
|
4726
|
+
end
|
4727
|
+
|
4728
|
+
raise ArgumentError, "Unsupported idxs datatype."
|
4729
|
+
end
|
4667
4730
|
|
4668
4731
|
# @private
|
4669
4732
|
def self.hash_to_rbdf(data, columns: nil)
|
@@ -4683,7 +4746,14 @@ module Polars
|
|
4683
4746
|
end
|
4684
4747
|
|
4685
4748
|
# @private
|
4686
|
-
def self.
|
4749
|
+
def self.include_unknowns(schema, cols)
|
4750
|
+
cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
|
4751
|
+
end
|
4752
|
+
|
4753
|
+
# @private
|
4754
|
+
def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
|
4755
|
+
raise Todo if schema_overrides
|
4756
|
+
|
4687
4757
|
if columns.is_a?(Hash)
|
4688
4758
|
columns = columns.to_a
|
4689
4759
|
end
|
@@ -4727,8 +4797,48 @@ module Polars
|
|
4727
4797
|
end
|
4728
4798
|
end
|
4729
4799
|
|
4800
|
+
def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
|
4801
|
+
rbdf_columns = rbdf.columns
|
4802
|
+
rbdf_dtypes = rbdf.dtypes
|
4803
|
+
columns, dtypes = _unpack_columns(
|
4804
|
+
(columns || rbdf_columns), schema_overrides: schema_overrides
|
4805
|
+
)
|
4806
|
+
column_subset = []
|
4807
|
+
if columns != rbdf_columns
|
4808
|
+
if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
|
4809
|
+
column_subset = columns
|
4810
|
+
else
|
4811
|
+
rbdf.set_column_names(columns)
|
4812
|
+
end
|
4813
|
+
end
|
4814
|
+
|
4815
|
+
column_casts = []
|
4816
|
+
columns.each do |col, i|
|
4817
|
+
if dtypes[col] == Categorical # != rbdf_dtypes[i]
|
4818
|
+
column_casts << Polars.col(col).cast(Categorical)._rbexpr
|
4819
|
+
elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
|
4820
|
+
column_casts << Polars.col(col).cast(structs[col])._rbexpr
|
4821
|
+
elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
|
4822
|
+
column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
|
4823
|
+
end
|
4824
|
+
end
|
4825
|
+
|
4826
|
+
if column_casts.any? || column_subset.any?
|
4827
|
+
rbdf = rbdf.lazy
|
4828
|
+
if column_casts.any?
|
4829
|
+
rbdf = rbdf.with_columns(column_casts)
|
4830
|
+
end
|
4831
|
+
if column_subset.any?
|
4832
|
+
rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
|
4833
|
+
end
|
4834
|
+
rbdf = rbdf.collect
|
4835
|
+
end
|
4836
|
+
|
4837
|
+
rbdf
|
4838
|
+
end
|
4839
|
+
|
4730
4840
|
# @private
|
4731
|
-
def self.sequence_to_rbdf(data, columns: nil, orient: nil)
|
4841
|
+
def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
|
4732
4842
|
if data.length == 0
|
4733
4843
|
return hash_to_rbdf({}, columns: columns)
|
4734
4844
|
end
|
@@ -4740,6 +4850,14 @@ module Polars
|
|
4740
4850
|
data.each do |s|
|
4741
4851
|
data_series << s._s
|
4742
4852
|
end
|
4853
|
+
elsif data[0].is_a?(Hash)
|
4854
|
+
column_names, dtypes = _unpack_columns(columns)
|
4855
|
+
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
4856
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
|
4857
|
+
if column_names
|
4858
|
+
rbdf = _post_apply_columns(rbdf, column_names)
|
4859
|
+
end
|
4860
|
+
return rbdf
|
4743
4861
|
elsif data[0].is_a?(Array)
|
4744
4862
|
if orient.nil? && !columns.nil?
|
4745
4863
|
orient = columns.length == data.length ? "col" : "row"
|
data/lib/polars/data_types.rb
CHANGED
@@ -3,44 +3,64 @@ module Polars
|
|
3
3
|
class DataType
|
4
4
|
end
|
5
5
|
|
6
|
+
# Base class for numeric data types.
|
7
|
+
class NumericType < DataType
|
8
|
+
end
|
9
|
+
|
10
|
+
# Base class for integral data types.
|
11
|
+
class IntegralType < NumericType
|
12
|
+
end
|
13
|
+
|
14
|
+
# Base class for fractional data types.
|
15
|
+
class FractionalType < NumericType
|
16
|
+
end
|
17
|
+
|
18
|
+
# Base class for temporal data types.
|
19
|
+
class TemporalType < DataType
|
20
|
+
end
|
21
|
+
|
22
|
+
# Base class for nested data types.
|
23
|
+
class NestedType < DataType
|
24
|
+
end
|
25
|
+
|
6
26
|
# 8-bit signed integer type.
|
7
|
-
class Int8 <
|
27
|
+
class Int8 < IntegralType
|
8
28
|
end
|
9
29
|
|
10
30
|
# 16-bit signed integer type.
|
11
|
-
class Int16 <
|
31
|
+
class Int16 < IntegralType
|
12
32
|
end
|
13
33
|
|
14
34
|
# 32-bit signed integer type.
|
15
|
-
class Int32 <
|
35
|
+
class Int32 < IntegralType
|
16
36
|
end
|
17
37
|
|
18
38
|
# 64-bit signed integer type.
|
19
|
-
class Int64 <
|
39
|
+
class Int64 < IntegralType
|
20
40
|
end
|
21
41
|
|
22
42
|
# 8-bit unsigned integer type.
|
23
|
-
class UInt8 <
|
43
|
+
class UInt8 < IntegralType
|
24
44
|
end
|
25
45
|
|
26
46
|
# 16-bit unsigned integer type.
|
27
|
-
class UInt16 <
|
47
|
+
class UInt16 < IntegralType
|
28
48
|
end
|
29
49
|
|
30
50
|
# 32-bit unsigned integer type.
|
31
|
-
class UInt32 <
|
51
|
+
class UInt32 < IntegralType
|
32
52
|
end
|
33
53
|
|
34
54
|
# 64-bit unsigned integer type.
|
35
|
-
class UInt64 <
|
55
|
+
class UInt64 < IntegralType
|
36
56
|
end
|
37
57
|
|
38
58
|
# 32-bit floating point type.
|
39
|
-
class Float32 <
|
59
|
+
class Float32 < FractionalType
|
40
60
|
end
|
41
61
|
|
42
62
|
# 64-bit floating point type.
|
43
|
-
class Float64 <
|
63
|
+
class Float64 < FractionalType
|
44
64
|
end
|
45
65
|
|
46
66
|
# Boolean type.
|
@@ -51,31 +71,19 @@ module Polars
|
|
51
71
|
class Utf8 < DataType
|
52
72
|
end
|
53
73
|
|
54
|
-
# Binary type.
|
55
|
-
class Binary < DataType
|
56
|
-
end
|
57
|
-
|
58
|
-
# Type representing Null / None values.
|
59
|
-
class Null < DataType
|
60
|
-
end
|
61
|
-
|
62
|
-
# Type representing Datatype values that could not be determined statically.
|
63
|
-
class Unknown < DataType
|
64
|
-
end
|
65
|
-
|
66
74
|
# Nested list/array type.
|
67
|
-
class List <
|
75
|
+
class List < NestedType
|
68
76
|
def initialize(inner)
|
69
77
|
@inner = Utils.rb_type_to_dtype(inner)
|
70
78
|
end
|
71
79
|
end
|
72
80
|
|
73
81
|
# Calendar date type.
|
74
|
-
class Date <
|
82
|
+
class Date < TemporalType
|
75
83
|
end
|
76
84
|
|
77
85
|
# Calendar date and time type.
|
78
|
-
class Datetime <
|
86
|
+
class Datetime < TemporalType
|
79
87
|
def initialize(time_unit = "us", time_zone = nil)
|
80
88
|
@tu = time_unit || "us"
|
81
89
|
@time_zone = time_zone
|
@@ -83,14 +91,14 @@ module Polars
|
|
83
91
|
end
|
84
92
|
|
85
93
|
# Time duration/delta type.
|
86
|
-
class Duration <
|
94
|
+
class Duration < TemporalType
|
87
95
|
def initialize(time_unit = "us")
|
88
96
|
@tu = time_unit
|
89
97
|
end
|
90
98
|
end
|
91
99
|
|
92
100
|
# Time of day type.
|
93
|
-
class Time <
|
101
|
+
class Time < TemporalType
|
94
102
|
end
|
95
103
|
|
96
104
|
# Type for wrapping arbitrary Ruby objects.
|
@@ -102,15 +110,24 @@ module Polars
|
|
102
110
|
end
|
103
111
|
|
104
112
|
# Definition of a single field within a `Struct` DataType.
|
105
|
-
class Field
|
113
|
+
class Field
|
114
|
+
attr_reader :name, :dtype
|
115
|
+
|
106
116
|
def initialize(name, dtype)
|
107
117
|
@name = name
|
108
118
|
@dtype = Utils.rb_type_to_dtype(dtype)
|
109
119
|
end
|
120
|
+
|
121
|
+
def inspect
|
122
|
+
class_name = self.class.name
|
123
|
+
"#{class_name}(#{@name}: #{@dtype})"
|
124
|
+
end
|
110
125
|
end
|
111
126
|
|
112
127
|
# Struct composite type.
|
113
|
-
class Struct <
|
128
|
+
class Struct < NestedType
|
129
|
+
attr_reader :fields
|
130
|
+
|
114
131
|
def initialize(fields)
|
115
132
|
if fields.is_a?(Hash)
|
116
133
|
@fields = fields.map { |n, d| Field.new(n, d) }
|
@@ -118,5 +135,26 @@ module Polars
|
|
118
135
|
@fields = fields
|
119
136
|
end
|
120
137
|
end
|
138
|
+
|
139
|
+
def inspect
|
140
|
+
class_name = self.class.name
|
141
|
+
"#{class_name}(#{@fields})"
|
142
|
+
end
|
143
|
+
|
144
|
+
def to_schema
|
145
|
+
@fields.to_h { |f| [f.name, f.dtype] }
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Binary type.
|
150
|
+
class Binary < DataType
|
151
|
+
end
|
152
|
+
|
153
|
+
# Type representing Null / None values.
|
154
|
+
class Null < DataType
|
155
|
+
end
|
156
|
+
|
157
|
+
# Type representing Datatype values that could not be determined statically.
|
158
|
+
class Unknown < DataType
|
121
159
|
end
|
122
160
|
end
|
data/lib/polars/io.rb
CHANGED
@@ -606,8 +606,10 @@ module Polars
|
|
606
606
|
sql
|
607
607
|
elsif sql.is_a?(ActiveRecord::Relation)
|
608
608
|
sql.connection.select_all(sql.to_sql)
|
609
|
+
elsif sql.is_a?(String)
|
610
|
+
ActiveRecord::Base.connection.select_all(sql)
|
609
611
|
else
|
610
|
-
raise ArgumentError, "Expected ActiveRecord::Relation
|
612
|
+
raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
|
611
613
|
end
|
612
614
|
data = {}
|
613
615
|
result.columns.each_with_index do |k, i|
|
data/lib/polars/series.rb
CHANGED
@@ -263,6 +263,10 @@ module Polars
|
|
263
263
|
#
|
264
264
|
# @return [Object]
|
265
265
|
def [](item)
|
266
|
+
if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
|
267
|
+
return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
|
268
|
+
end
|
269
|
+
|
266
270
|
if item.is_a?(Integer)
|
267
271
|
return _s.get_idx(item)
|
268
272
|
end
|
@@ -271,6 +275,10 @@ module Polars
|
|
271
275
|
return Slice.new(self).apply(item)
|
272
276
|
end
|
273
277
|
|
278
|
+
if Utils.is_int_sequence(item)
|
279
|
+
return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
|
280
|
+
end
|
281
|
+
|
274
282
|
raise ArgumentError, "Cannot get item of type: #{item.class.name}"
|
275
283
|
end
|
276
284
|
|
@@ -287,24 +295,23 @@ module Polars
|
|
287
295
|
end
|
288
296
|
|
289
297
|
if key.is_a?(Series)
|
290
|
-
if key.dtype ==
|
298
|
+
if key.dtype == Boolean
|
291
299
|
self._s = set(key, value)._s
|
292
|
-
elsif key.dtype ==
|
293
|
-
self._s = set_at_idx(key.cast(
|
294
|
-
elsif key.dtype ==
|
300
|
+
elsif key.dtype == UInt64
|
301
|
+
self._s = set_at_idx(key.cast(UInt32), value)._s
|
302
|
+
elsif key.dtype == UInt32
|
295
303
|
self._s = set_at_idx(key, value)._s
|
296
304
|
else
|
297
305
|
raise Todo
|
298
306
|
end
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
307
|
+
elsif key.is_a?(Array)
|
308
|
+
s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
|
309
|
+
self[s] = value
|
310
|
+
elsif key.is_a?(Range)
|
311
|
+
s = Series.new("", key, dtype: UInt32)
|
303
312
|
self[s] = value
|
304
313
|
elsif key.is_a?(Integer)
|
305
|
-
|
306
|
-
# self[[key]] = value
|
307
|
-
set_at_idx(key, value)
|
314
|
+
self[[key]] = value
|
308
315
|
else
|
309
316
|
raise ArgumentError, "cannot use #{key} for indexing"
|
310
317
|
end
|
@@ -3527,6 +3534,59 @@ module Polars
|
|
3527
3534
|
end
|
3528
3535
|
end
|
3529
3536
|
|
3537
|
+
def _pos_idxs(idxs)
|
3538
|
+
idx_type = Polars._get_idx_type
|
3539
|
+
|
3540
|
+
if idxs.is_a?(Series)
|
3541
|
+
if idxs.dtype == idx_type
|
3542
|
+
return idxs
|
3543
|
+
end
|
3544
|
+
if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
3545
|
+
if idx_type == UInt32
|
3546
|
+
if [Int64, UInt64].include?(idxs.dtype)
|
3547
|
+
if idxs.max >= 2**32
|
3548
|
+
raise ArgumentError, "Index positions should be smaller than 2^32."
|
3549
|
+
end
|
3550
|
+
end
|
3551
|
+
if idxs.dtype == Int64
|
3552
|
+
if idxs.min < -(2**32)
|
3553
|
+
raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
|
3554
|
+
end
|
3555
|
+
end
|
3556
|
+
end
|
3557
|
+
if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
|
3558
|
+
if idxs.min < 0
|
3559
|
+
if idx_type == UInt32
|
3560
|
+
if [Int8, Int16].include?(idxs.dtype)
|
3561
|
+
idxs = idxs.cast(Int32)
|
3562
|
+
end
|
3563
|
+
else
|
3564
|
+
if [Int8, Int16, Int32].include?(idxs.dtype)
|
3565
|
+
idxs = idxs.cast(Int64)
|
3566
|
+
end
|
3567
|
+
end
|
3568
|
+
|
3569
|
+
# Update negative indexes to absolute indexes.
|
3570
|
+
return (
|
3571
|
+
idxs.to_frame
|
3572
|
+
.select(
|
3573
|
+
Polars.when(Polars.col(idxs.name) < 0)
|
3574
|
+
.then(len + Polars.col(idxs.name))
|
3575
|
+
.otherwise(Polars.col(idxs.name))
|
3576
|
+
.cast(idx_type)
|
3577
|
+
)
|
3578
|
+
.to_series(0)
|
3579
|
+
)
|
3580
|
+
end
|
3581
|
+
end
|
3582
|
+
|
3583
|
+
return idxs.cast(idx_type)
|
3584
|
+
end
|
3585
|
+
end
|
3586
|
+
|
3587
|
+
raise ArgumentError, "Unsupported idxs datatype."
|
3588
|
+
end
|
3589
|
+
|
3530
3590
|
def _comp(other, op)
|
3531
3591
|
if other.is_a?(Series)
|
3532
3592
|
return Utils.wrap_s(_s.send(op, other._s))
|
@@ -3607,6 +3667,11 @@ module Polars
|
|
3607
3667
|
rb_temporal_types << ::Time if defined?(::Time)
|
3608
3668
|
|
3609
3669
|
value = _get_first_non_none(values)
|
3670
|
+
if !value.nil?
|
3671
|
+
if value.is_a?(Hash)
|
3672
|
+
return DataFrame.new(values).to_struct(name)._s
|
3673
|
+
end
|
3674
|
+
end
|
3610
3675
|
|
3611
3676
|
if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
|
3612
3677
|
constructor = polars_type_to_constructor(dtype)
|
data/lib/polars/slice.rb
CHANGED
@@ -56,7 +56,7 @@ module Polars
|
|
56
56
|
# Normalize slice bounds, identify unbounded and/or zero-length slices.
|
57
57
|
def _slice_setup(s)
|
58
58
|
# can normalize slice indices as we know object size
|
59
|
-
obj_len = @obj.
|
59
|
+
obj_len = @obj.length
|
60
60
|
start = if s.begin
|
61
61
|
if s.begin < 0
|
62
62
|
[s.begin + obj_len, 0].max
|
data/lib/polars/utils.rb
CHANGED
@@ -160,11 +160,11 @@ module Polars
|
|
160
160
|
|
161
161
|
def self.scale_bytes(sz, to:)
|
162
162
|
scaling_factor = {
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
163
|
+
"b" => 1,
|
164
|
+
"k" => 1024,
|
165
|
+
"m" => 1024 ** 2,
|
166
|
+
"g" => 1024 ** 3,
|
167
|
+
"t" => 1024 ** 4
|
168
168
|
}[to[0]]
|
169
169
|
if scaling_factor > 1
|
170
170
|
sz / scaling_factor.to_f
|
@@ -181,6 +181,26 @@ module Polars
|
|
181
181
|
val.all? { |x| x.is_a?(eltype) }
|
182
182
|
end
|
183
183
|
|
184
|
+
def self.is_bool_sequence(val)
|
185
|
+
val.is_a?(Array) && val.all? { |x| x == true || x == false }
|
186
|
+
end
|
187
|
+
|
188
|
+
def self.is_dtype_sequence(val)
|
189
|
+
val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
|
190
|
+
end
|
191
|
+
|
192
|
+
def self.is_int_sequence(val)
|
193
|
+
val.is_a?(Array) && _is_iterable_of(val, Integer)
|
194
|
+
end
|
195
|
+
|
196
|
+
def self.is_expr_sequence(val)
|
197
|
+
val.is_a?(Array) && _is_iterable_of(val, Expr)
|
198
|
+
end
|
199
|
+
|
200
|
+
def self.is_rbexpr_sequence(val)
|
201
|
+
val.is_a?(Array) && _is_iterable_of(val, RbExpr)
|
202
|
+
end
|
203
|
+
|
184
204
|
def self.is_str_sequence(val, allow_str: false)
|
185
205
|
if allow_str == false && val.is_a?(String)
|
186
206
|
false
|
data/lib/polars/version.rb
CHANGED
data/lib/polars-df.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
require_relative "polars"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
version: '0'
|
84
84
|
requirements: []
|
85
|
-
rubygems_version: 3.4.
|
85
|
+
rubygems_version: 3.4.4
|
86
86
|
signing_key:
|
87
87
|
specification_version: 4
|
88
88
|
summary: Blazingly fast DataFrames for Ruby
|