polars-df 0.2.1-x86_64-linux → 0.2.3-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f743e9640f5f15c0e569a023bf2866c3f273ae309c085168cc4d38647bf532d4
4
- data.tar.gz: cfda09672859e7408a641a17b4d31532fd3d59552c0897639ecbbc8ec3e91ca2
3
+ metadata.gz: c6b813d5f533d15a6cdef5ea4aadd85789b61caafc36f51f278dc5701aea3614
4
+ data.tar.gz: 446e2e6fdfa6b62278dabd7546eed235294b07aef56da5517a1b8679a2478534
5
5
  SHA512:
6
- metadata.gz: 4bfa89bab471e2559e6952607064459562ca4d4a2f6f1101c54ce65dda5febd0d51a9acc65a701934b71270d67c207b6f4807b425e59c2ccd949f7eabc09b0f3
7
- data.tar.gz: d8a0322152aa207c3e1a512c54b1bcc93f3ddb672e485f61d8fdcce49873551ae62dac7a131cc47c01d2dea8a766ac3913f54df7def66307132302ad2dd86392
6
+ metadata.gz: 546c36032d2ffd3519a17850d420097f471e92e434640cf0a7a28d9323b7ea6fb02a86d9539a5651529879b8abbc642b6f0be1a5aa89686fbdf5ffaf8b5df191
7
+ data.tar.gz: d63bf89dfd1d76354582a220ede4ceb478ab596c340573e5a4088fd56f866317b96c986a8bc293e574ec2df6ece32e9d018611e091e2677a96562e65b058541e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## 0.2.3 (2023-01-22)
2
+
3
+ - Fixed error with precompiled gem on Mac ARM
4
+ - Fixed issue with structs
5
+
6
+ ## 0.2.2 (2023-01-20)
7
+
8
+ - Added support for strings to `read_sql` method
9
+ - Improved indexing
10
+ - Fixed error with precompiled gem on Mac ARM
11
+
1
12
  ## 0.2.1 (2023-01-18)
2
13
 
3
14
  - Added `read_sql` method
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.1"
1370
+ version = "0.2.3"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
@@ -1640,6 +1640,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
1640
1640
  IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1641
1641
  DEALINGS IN THE SOFTWARE.
1642
1642
 
1643
+ ================================================================================
1644
+ array-init-cursor LICENSE-MIT
1645
+ ================================================================================
1646
+
1647
+ MIT License
1648
+
1649
+ Copyright (c) 2021 The Planus Project Developers
1650
+
1651
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1652
+ of this software and associated documentation files (the "Software"), to deal
1653
+ in the Software without restriction, including without limitation the rights
1654
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1655
+ copies of the Software, and to permit persons to whom the Software is
1656
+ furnished to do so, subject to the following conditions:
1657
+
1658
+ The above copyright notice and this permission notice shall be included in all
1659
+ copies or substantial portions of the Software.
1660
+
1661
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1662
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1663
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1664
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1665
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1666
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1667
+ SOFTWARE.
1668
+
1669
+
1643
1670
  ================================================================================
1644
1671
  array-init-cursor LICENSE-APACHE
1645
1672
  ================================================================================
@@ -1822,33 +1849,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1822
1849
  END OF TERMS AND CONDITIONS
1823
1850
 
1824
1851
 
1825
- ================================================================================
1826
- array-init-cursor LICENSE-MIT
1827
- ================================================================================
1828
-
1829
- MIT License
1830
-
1831
- Copyright (c) 2021 The Planus Project Developers
1832
-
1833
- Permission is hereby granted, free of charge, to any person obtaining a copy
1834
- of this software and associated documentation files (the "Software"), to deal
1835
- in the Software without restriction, including without limitation the rights
1836
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1837
- copies of the Software, and to permit persons to whom the Software is
1838
- furnished to do so, subject to the following conditions:
1839
-
1840
- The above copyright notice and this permission notice shall be included in all
1841
- copies or substantial portions of the Software.
1842
-
1843
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1844
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1845
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1846
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1847
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1848
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1849
- SOFTWARE.
1850
-
1851
-
1852
1852
  ================================================================================
1853
1853
  arrow-format LICENSE
1854
1854
  ================================================================================
@@ -14571,6 +14571,36 @@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
14571
14571
  ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
14572
14572
  --------------------------------------------------------------------------------
14573
14573
 
14574
+ ================================================================================
14575
+ jemallocator LICENSE-MIT
14576
+ ================================================================================
14577
+
14578
+ Copyright (c) 2014 Alex Crichton
14579
+
14580
+ Permission is hereby granted, free of charge, to any
14581
+ person obtaining a copy of this software and associated
14582
+ documentation files (the "Software"), to deal in the
14583
+ Software without restriction, including without
14584
+ limitation the rights to use, copy, modify, merge,
14585
+ publish, distribute, sublicense, and/or sell copies of
14586
+ the Software, and to permit persons to whom the Software
14587
+ is furnished to do so, subject to the following
14588
+ conditions:
14589
+
14590
+ The above copyright notice and this permission notice
14591
+ shall be included in all copies or substantial portions
14592
+ of the Software.
14593
+
14594
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
14595
+ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
14596
+ TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14597
+ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
14598
+ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
14599
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
14600
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
14601
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
14602
+ DEALINGS IN THE SOFTWARE.
14603
+
14574
14604
  ================================================================================
14575
14605
  jemallocator LICENSE-APACHE
14576
14606
  ================================================================================
@@ -14777,36 +14807,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14777
14807
  See the License for the specific language governing permissions and
14778
14808
  limitations under the License.
14779
14809
 
14780
- ================================================================================
14781
- jemallocator LICENSE-MIT
14782
- ================================================================================
14783
-
14784
- Copyright (c) 2014 Alex Crichton
14785
-
14786
- Permission is hereby granted, free of charge, to any
14787
- person obtaining a copy of this software and associated
14788
- documentation files (the "Software"), to deal in the
14789
- Software without restriction, including without
14790
- limitation the rights to use, copy, modify, merge,
14791
- publish, distribute, sublicense, and/or sell copies of
14792
- the Software, and to permit persons to whom the Software
14793
- is furnished to do so, subject to the following
14794
- conditions:
14795
-
14796
- The above copyright notice and this permission notice
14797
- shall be included in all copies or substantial portions
14798
- of the Software.
14799
-
14800
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
14801
- ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
14802
- TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14803
- PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
14804
- SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
14805
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
14806
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
14807
- IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
14808
- DEALINGS IN THE SOFTWARE.
14809
-
14810
14810
  ================================================================================
14811
14811
  jobserver LICENSE-APACHE
14812
14812
  ================================================================================
@@ -27075,6 +27075,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
27075
27075
  IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27076
27076
  DEALINGS IN THE SOFTWARE.
27077
27077
 
27078
+ ================================================================================
27079
+ planus LICENSE-MIT
27080
+ ================================================================================
27081
+
27082
+ MIT License
27083
+
27084
+ Copyright (c) 2021 The Planus Project Developers
27085
+
27086
+ Permission is hereby granted, free of charge, to any person obtaining a copy
27087
+ of this software and associated documentation files (the "Software"), to deal
27088
+ in the Software without restriction, including without limitation the rights
27089
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27090
+ copies of the Software, and to permit persons to whom the Software is
27091
+ furnished to do so, subject to the following conditions:
27092
+
27093
+ The above copyright notice and this permission notice shall be included in all
27094
+ copies or substantial portions of the Software.
27095
+
27096
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27097
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27098
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27099
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27100
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27101
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27102
+ SOFTWARE.
27103
+
27104
+
27078
27105
  ================================================================================
27079
27106
  planus LICENSE-APACHE
27080
27107
  ================================================================================
@@ -27257,33 +27284,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
27257
27284
  END OF TERMS AND CONDITIONS
27258
27285
 
27259
27286
 
27260
- ================================================================================
27261
- planus LICENSE-MIT
27262
- ================================================================================
27263
-
27264
- MIT License
27265
-
27266
- Copyright (c) 2021 The Planus Project Developers
27267
-
27268
- Permission is hereby granted, free of charge, to any person obtaining a copy
27269
- of this software and associated documentation files (the "Software"), to deal
27270
- in the Software without restriction, including without limitation the rights
27271
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27272
- copies of the Software, and to permit persons to whom the Software is
27273
- furnished to do so, subject to the following conditions:
27274
-
27275
- The above copyright notice and this permission notice shall be included in all
27276
- copies or substantial portions of the Software.
27277
-
27278
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27279
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27280
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27281
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27282
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27283
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27284
- SOFTWARE.
27285
-
27286
-
27287
27287
  ================================================================================
27288
27288
  polars LICENSE
27289
27289
  ================================================================================
@@ -30036,6 +30036,32 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30036
30036
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30037
30037
  SOFTWARE.
30038
30038
 
30039
+ ================================================================================
30040
+ rb-sys-env LICENSE-MIT
30041
+ ================================================================================
30042
+
30043
+ The MIT License (MIT)
30044
+
30045
+ Copyright (c) 2021-2022 Ian Ker-Seymer
30046
+
30047
+ Permission is hereby granted, free of charge, to any person obtaining a copy
30048
+ of this software and associated documentation files (the "Software"), to deal
30049
+ in the Software without restriction, including without limitation the rights
30050
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30051
+ copies of the Software, and to permit persons to whom the Software is
30052
+ furnished to do so, subject to the following conditions:
30053
+
30054
+ The above copyright notice and this permission notice shall be included in all
30055
+ copies or substantial portions of the Software.
30056
+
30057
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30058
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30059
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30060
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30061
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30062
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30063
+ SOFTWARE.
30064
+
30039
30065
  ================================================================================
30040
30066
  rb-sys-env LICENSE-APACHE
30041
30067
  ================================================================================
@@ -30231,32 +30257,6 @@ rb-sys-env LICENSE-APACHE
30231
30257
  See the License for the specific language governing permissions and
30232
30258
  limitations under the License.
30233
30259
 
30234
- ================================================================================
30235
- rb-sys-env LICENSE-MIT
30236
- ================================================================================
30237
-
30238
- The MIT License (MIT)
30239
-
30240
- Copyright (c) 2021-2022 Ian Ker-Seymer
30241
-
30242
- Permission is hereby granted, free of charge, to any person obtaining a copy
30243
- of this software and associated documentation files (the "Software"), to deal
30244
- in the Software without restriction, including without limitation the rights
30245
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30246
- copies of the Software, and to permit persons to whom the Software is
30247
- furnished to do so, subject to the following conditions:
30248
-
30249
- The above copyright notice and this permission notice shall be included in all
30250
- copies or substantial portions of the Software.
30251
-
30252
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30253
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30254
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30255
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30256
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30257
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30258
- SOFTWARE.
30259
-
30260
30260
  ================================================================================
30261
30261
  regex LICENSE-APACHE
30262
30262
  ================================================================================
data/README.md CHANGED
@@ -41,6 +41,9 @@ From a CSV
41
41
 
42
42
  ```ruby
43
43
  Polars.read_csv("file.csv")
44
+
45
+ # or lazily with
46
+ Polars.scan_csv("file.csv")
44
47
  ```
45
48
 
46
49
  From Parquet
@@ -53,6 +56,8 @@ From Active Record
53
56
 
54
57
  ```ruby
55
58
  Polars.read_sql(User.all)
59
+ # or
60
+ Polars.read_sql("SELECT * FROM users")
56
61
  ```
57
62
 
58
63
  From a hash
@@ -135,9 +140,9 @@ df[Polars.col("a") <= 2]
135
140
  And, or, and exclusive or
136
141
 
137
142
  ```ruby
138
- df[(Polars.col("a") > 100) & (Polars.col("b") == "one")] # and
139
- df[(Polars.col("a") > 100) | (Polars.col("b") == "one")] # or
140
- df[(Polars.col("a") > 100) ^ (Polars.col("b") == "one")] # xor
143
+ df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
144
+ df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
145
+ df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
141
146
  ```
142
147
 
143
148
  ## Operations
@@ -284,13 +289,13 @@ CSV
284
289
  ```ruby
285
290
  df.to_csv
286
291
  # or
287
- df.write_csv("data.csv")
292
+ df.write_csv("file.csv")
288
293
  ```
289
294
 
290
295
  Parquet
291
296
 
292
297
  ```ruby
293
- df.write_parquet("data.parquet")
298
+ df.write_parquet("file.parquet")
294
299
  ```
295
300
 
296
301
  ## Types
Binary file
Binary file
Binary file
@@ -277,6 +277,7 @@ module Polars
277
277
  _df.height
278
278
  end
279
279
  alias_method :count, :height
280
+ alias_method :length, :height
280
281
 
281
282
  # Get the width of the DataFrame.
282
283
  #
@@ -541,7 +542,7 @@ module Polars
541
542
 
542
543
  if col_selection.is_a?(Array)
543
544
  # df[.., [1, 2]]
544
- if is_int_sequence(col_selection)
545
+ if Utils.is_int_sequence(col_selection)
545
546
  series_list = col_selection.map { |i| to_series(i) }
546
547
  df = self.class.new(series_list)
547
548
  return df[row_selection]
@@ -574,6 +575,23 @@ module Polars
574
575
  # df[["foo", "bar"]]
575
576
  return _from_rbdf(_df.select(item))
576
577
  end
578
+
579
+ if Utils.is_int_sequence(item)
580
+ item = Series.new("", item)
581
+ end
582
+
583
+ if item.is_a?(Series)
584
+ dtype = item.dtype
585
+ if dtype == Utf8
586
+ return _from_rbdf(_df.select(item))
587
+ elsif dtype == UInt32
588
+ return _from_rbdf(_df.take_with_series(item._s))
589
+ elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
590
+ return _from_rbdf(
591
+ _df.take_with_series(_pos_idxs(item, 0)._s)
592
+ )
593
+ end
594
+ end
577
595
  end
578
596
 
579
597
  # Ruby-specific
@@ -4662,8 +4680,53 @@ module Polars
4662
4680
  end
4663
4681
  end
4664
4682
 
4665
- # def _pos_idxs
4666
- # end
4683
+ def _pos_idxs(idxs, dim)
4684
+ idx_type = Polars._get_idx_type
4685
+
4686
+ if idxs.is_a?(Series)
4687
+ if idxs.dtype == idx_type
4688
+ return idxs
4689
+ end
4690
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4691
+ if idx_type == UInt32
4692
+ if [Int64, UInt64].include?(idxs.dtype)
4693
+ if idxs.max >= 2**32
4694
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4695
+ end
4696
+ end
4697
+ if idxs.dtype == Int64
4698
+ if idxs.min < -(2**32)
4699
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4700
+ end
4701
+ end
4702
+ end
4703
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4704
+ if idxs.min < 0
4705
+ if idx_type == UInt32
4706
+ if [Int8, Int16].include?(idxs.dtype)
4707
+ idxs = idxs.cast(Int32)
4708
+ end
4709
+ else
4710
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4711
+ idxs = idxs.cast(Int64)
4712
+ end
4713
+ end
4714
+
4715
+ idxs =
4716
+ Polars.select(
4717
+ Polars.when(Polars.lit(idxs) < 0)
4718
+ .then(shape[dim] + Polars.lit(idxs))
4719
+ .otherwise(Polars.lit(idxs))
4720
+ ).to_series
4721
+ end
4722
+ end
4723
+
4724
+ return idxs.cast(idx_type)
4725
+ end
4726
+ end
4727
+
4728
+ raise ArgumentError, "Unsupported idxs datatype."
4729
+ end
4667
4730
 
4668
4731
  # @private
4669
4732
  def self.hash_to_rbdf(data, columns: nil)
@@ -4683,7 +4746,14 @@ module Polars
4683
4746
  end
4684
4747
 
4685
4748
  # @private
4686
- def self._unpack_columns(columns, lookup_names: nil, n_expected: nil)
4749
+ def self.include_unknowns(schema, cols)
4750
+ cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
4751
+ end
4752
+
4753
+ # @private
4754
+ def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
4755
+ raise Todo if schema_overrides
4756
+
4687
4757
  if columns.is_a?(Hash)
4688
4758
  columns = columns.to_a
4689
4759
  end
@@ -4727,8 +4797,48 @@ module Polars
4727
4797
  end
4728
4798
  end
4729
4799
 
4800
+ def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
4801
+ rbdf_columns = rbdf.columns
4802
+ rbdf_dtypes = rbdf.dtypes
4803
+ columns, dtypes = _unpack_columns(
4804
+ (columns || rbdf_columns), schema_overrides: schema_overrides
4805
+ )
4806
+ column_subset = []
4807
+ if columns != rbdf_columns
4808
+ if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
4809
+ column_subset = columns
4810
+ else
4811
+ rbdf.set_column_names(columns)
4812
+ end
4813
+ end
4814
+
4815
+ column_casts = []
4816
+ columns.each do |col, i|
4817
+ if dtypes[col] == Categorical # != rbdf_dtypes[i]
4818
+ column_casts << Polars.col(col).cast(Categorical)._rbexpr
4819
+ elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
4820
+ column_casts << Polars.col(col).cast(structs[col])._rbexpr
4821
+ elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
4822
+ column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
4823
+ end
4824
+ end
4825
+
4826
+ if column_casts.any? || column_subset.any?
4827
+ rbdf = rbdf.lazy
4828
+ if column_casts.any?
4829
+ rbdf = rbdf.with_columns(column_casts)
4830
+ end
4831
+ if column_subset.any?
4832
+ rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
4833
+ end
4834
+ rbdf = rbdf.collect
4835
+ end
4836
+
4837
+ rbdf
4838
+ end
4839
+
4730
4840
  # @private
4731
- def self.sequence_to_rbdf(data, columns: nil, orient: nil)
4841
+ def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
4732
4842
  if data.length == 0
4733
4843
  return hash_to_rbdf({}, columns: columns)
4734
4844
  end
@@ -4740,6 +4850,14 @@ module Polars
4740
4850
  data.each do |s|
4741
4851
  data_series << s._s
4742
4852
  end
4853
+ elsif data[0].is_a?(Hash)
4854
+ column_names, dtypes = _unpack_columns(columns)
4855
+ schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
4856
+ rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
4857
+ if column_names
4858
+ rbdf = _post_apply_columns(rbdf, column_names)
4859
+ end
4860
+ return rbdf
4743
4861
  elsif data[0].is_a?(Array)
4744
4862
  if orient.nil? && !columns.nil?
4745
4863
  orient = columns.length == data.length ? "col" : "row"
@@ -3,44 +3,64 @@ module Polars
3
3
  class DataType
4
4
  end
5
5
 
6
+ # Base class for numeric data types.
7
+ class NumericType < DataType
8
+ end
9
+
10
+ # Base class for integral data types.
11
+ class IntegralType < NumericType
12
+ end
13
+
14
+ # Base class for fractional data types.
15
+ class FractionalType < NumericType
16
+ end
17
+
18
+ # Base class for temporal data types.
19
+ class TemporalType < DataType
20
+ end
21
+
22
+ # Base class for nested data types.
23
+ class NestedType < DataType
24
+ end
25
+
6
26
  # 8-bit signed integer type.
7
- class Int8 < DataType
27
+ class Int8 < IntegralType
8
28
  end
9
29
 
10
30
  # 16-bit signed integer type.
11
- class Int16 < DataType
31
+ class Int16 < IntegralType
12
32
  end
13
33
 
14
34
  # 32-bit signed integer type.
15
- class Int32 < DataType
35
+ class Int32 < IntegralType
16
36
  end
17
37
 
18
38
  # 64-bit signed integer type.
19
- class Int64 < DataType
39
+ class Int64 < IntegralType
20
40
  end
21
41
 
22
42
  # 8-bit unsigned integer type.
23
- class UInt8 < DataType
43
+ class UInt8 < IntegralType
24
44
  end
25
45
 
26
46
  # 16-bit unsigned integer type.
27
- class UInt16 < DataType
47
+ class UInt16 < IntegralType
28
48
  end
29
49
 
30
50
  # 32-bit unsigned integer type.
31
- class UInt32 < DataType
51
+ class UInt32 < IntegralType
32
52
  end
33
53
 
34
54
  # 64-bit unsigned integer type.
35
- class UInt64 < DataType
55
+ class UInt64 < IntegralType
36
56
  end
37
57
 
38
58
  # 32-bit floating point type.
39
- class Float32 < DataType
59
+ class Float32 < FractionalType
40
60
  end
41
61
 
42
62
  # 64-bit floating point type.
43
- class Float64 < DataType
63
+ class Float64 < FractionalType
44
64
  end
45
65
 
46
66
  # Boolean type.
@@ -51,31 +71,19 @@ module Polars
51
71
  class Utf8 < DataType
52
72
  end
53
73
 
54
- # Binary type.
55
- class Binary < DataType
56
- end
57
-
58
- # Type representing Null / None values.
59
- class Null < DataType
60
- end
61
-
62
- # Type representing Datatype values that could not be determined statically.
63
- class Unknown < DataType
64
- end
65
-
66
74
  # Nested list/array type.
67
- class List < DataType
75
+ class List < NestedType
68
76
  def initialize(inner)
69
77
  @inner = Utils.rb_type_to_dtype(inner)
70
78
  end
71
79
  end
72
80
 
73
81
  # Calendar date type.
74
- class Date < DataType
82
+ class Date < TemporalType
75
83
  end
76
84
 
77
85
  # Calendar date and time type.
78
- class Datetime < DataType
86
+ class Datetime < TemporalType
79
87
  def initialize(time_unit = "us", time_zone = nil)
80
88
  @tu = time_unit || "us"
81
89
  @time_zone = time_zone
@@ -83,14 +91,14 @@ module Polars
83
91
  end
84
92
 
85
93
  # Time duration/delta type.
86
- class Duration < DataType
94
+ class Duration < TemporalType
87
95
  def initialize(time_unit = "us")
88
96
  @tu = time_unit
89
97
  end
90
98
  end
91
99
 
92
100
  # Time of day type.
93
- class Time < DataType
101
+ class Time < TemporalType
94
102
  end
95
103
 
96
104
  # Type for wrapping arbitrary Ruby objects.
@@ -102,15 +110,24 @@ module Polars
102
110
  end
103
111
 
104
112
  # Definition of a single field within a `Struct` DataType.
105
- class Field < DataType
113
+ class Field
114
+ attr_reader :name, :dtype
115
+
106
116
  def initialize(name, dtype)
107
117
  @name = name
108
118
  @dtype = Utils.rb_type_to_dtype(dtype)
109
119
  end
120
+
121
+ def inspect
122
+ class_name = self.class.name
123
+ "#{class_name}(#{@name}: #{@dtype})"
124
+ end
110
125
  end
111
126
 
112
127
  # Struct composite type.
113
- class Struct < DataType
128
+ class Struct < NestedType
129
+ attr_reader :fields
130
+
114
131
  def initialize(fields)
115
132
  if fields.is_a?(Hash)
116
133
  @fields = fields.map { |n, d| Field.new(n, d) }
@@ -118,5 +135,26 @@ module Polars
118
135
  @fields = fields
119
136
  end
120
137
  end
138
+
139
+ def inspect
140
+ class_name = self.class.name
141
+ "#{class_name}(#{@fields})"
142
+ end
143
+
144
+ def to_schema
145
+ @fields.to_h { |f| [f.name, f.dtype] }
146
+ end
147
+ end
148
+
149
+ # Binary type.
150
+ class Binary < DataType
151
+ end
152
+
153
+ # Type representing Null / None values.
154
+ class Null < DataType
155
+ end
156
+
157
+ # Type representing Datatype values that could not be determined statically.
158
+ class Unknown < DataType
121
159
  end
122
160
  end
data/lib/polars/io.rb CHANGED
@@ -606,8 +606,10 @@ module Polars
606
606
  sql
607
607
  elsif sql.is_a?(ActiveRecord::Relation)
608
608
  sql.connection.select_all(sql.to_sql)
609
+ elsif sql.is_a?(String)
610
+ ActiveRecord::Base.connection.select_all(sql)
609
611
  else
610
- raise ArgumentError, "Expected ActiveRecord::Relation or ActiveRecord::Result"
612
+ raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
611
613
  end
612
614
  data = {}
613
615
  result.columns.each_with_index do |k, i|
data/lib/polars/series.rb CHANGED
@@ -263,6 +263,10 @@ module Polars
263
263
  #
264
264
  # @return [Object]
265
265
  def [](item)
266
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
267
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
268
+ end
269
+
266
270
  if item.is_a?(Integer)
267
271
  return _s.get_idx(item)
268
272
  end
@@ -271,6 +275,10 @@ module Polars
271
275
  return Slice.new(self).apply(item)
272
276
  end
273
277
 
278
+ if Utils.is_int_sequence(item)
279
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
280
+ end
281
+
274
282
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
275
283
  end
276
284
 
@@ -287,24 +295,23 @@ module Polars
287
295
  end
288
296
 
289
297
  if key.is_a?(Series)
290
- if key.dtype == :bool
298
+ if key.dtype == Boolean
291
299
  self._s = set(key, value)._s
292
- elsif key.dtype == :u64
293
- self._s = set_at_idx(key.cast(:u32), value)._s
294
- elsif key.dtype == :u32
300
+ elsif key.dtype == UInt64
301
+ self._s = set_at_idx(key.cast(UInt32), value)._s
302
+ elsif key.dtype == UInt32
295
303
  self._s = set_at_idx(key, value)._s
296
304
  else
297
305
  raise Todo
298
306
  end
299
- end
300
-
301
- if key.is_a?(Array)
302
- s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
307
+ elsif key.is_a?(Array)
308
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
309
+ self[s] = value
310
+ elsif key.is_a?(Range)
311
+ s = Series.new("", key, dtype: UInt32)
303
312
  self[s] = value
304
313
  elsif key.is_a?(Integer)
305
- # TODO fix
306
- # self[[key]] = value
307
- set_at_idx(key, value)
314
+ self[[key]] = value
308
315
  else
309
316
  raise ArgumentError, "cannot use #{key} for indexing"
310
317
  end
@@ -3527,6 +3534,59 @@ module Polars
3527
3534
  end
3528
3535
  end
3529
3536
 
3537
+ def _pos_idxs(idxs)
3538
+ idx_type = Polars._get_idx_type
3539
+
3540
+ if idxs.is_a?(Series)
3541
+ if idxs.dtype == idx_type
3542
+ return idxs
3543
+ end
3544
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
3545
+ if idx_type == UInt32
3546
+ if [Int64, UInt64].include?(idxs.dtype)
3547
+ if idxs.max >= 2**32
3548
+ raise ArgumentError, "Index positions should be smaller than 2^32."
3549
+ end
3550
+ end
3551
+ if idxs.dtype == Int64
3552
+ if idxs.min < -(2**32)
3553
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
3554
+ end
3555
+ end
3556
+ end
3557
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
3558
+ if idxs.min < 0
3559
+ if idx_type == UInt32
3560
+ if [Int8, Int16].include?(idxs.dtype)
3561
+ idxs = idxs.cast(Int32)
3562
+ end
3563
+ else
3564
+ if [Int8, Int16, Int32].include?(idxs.dtype)
3565
+ idxs = idxs.cast(Int64)
3566
+ end
3567
+ end
3568
+
3569
+ # Update negative indexes to absolute indexes.
3570
+ return (
3571
+ idxs.to_frame
3572
+ .select(
3573
+ Polars.when(Polars.col(idxs.name) < 0)
3574
+ .then(len + Polars.col(idxs.name))
3575
+ .otherwise(Polars.col(idxs.name))
3576
+ .cast(idx_type)
3577
+ )
3578
+ .to_series(0)
3579
+ )
3580
+ end
3581
+ end
3582
+
3583
+ return idxs.cast(idx_type)
3584
+ end
3585
+ end
3586
+
3587
+ raise ArgumentError, "Unsupported idxs datatype."
3588
+ end
3589
+
3530
3590
  def _comp(other, op)
3531
3591
  if other.is_a?(Series)
3532
3592
  return Utils.wrap_s(_s.send(op, other._s))
@@ -3607,6 +3667,11 @@ module Polars
3607
3667
  rb_temporal_types << ::Time if defined?(::Time)
3608
3668
 
3609
3669
  value = _get_first_non_none(values)
3670
+ if !value.nil?
3671
+ if value.is_a?(Hash)
3672
+ return DataFrame.new(values).to_struct(name)._s
3673
+ end
3674
+ end
3610
3675
 
3611
3676
  if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3612
3677
  constructor = polars_type_to_constructor(dtype)
data/lib/polars/slice.rb CHANGED
@@ -56,7 +56,7 @@ module Polars
56
56
  # Normalize slice bounds, identify unbounded and/or zero-length slices.
57
57
  def _slice_setup(s)
58
58
  # can normalize slice indices as we know object size
59
- obj_len = @obj.len
59
+ obj_len = @obj.length
60
60
  start = if s.begin
61
61
  if s.begin < 0
62
62
  [s.begin + obj_len, 0].max
data/lib/polars/utils.rb CHANGED
@@ -160,11 +160,11 @@ module Polars
160
160
 
161
161
  def self.scale_bytes(sz, to:)
162
162
  scaling_factor = {
163
- "b" => 1,
164
- "k" => 1024,
165
- "m" => 1024 ** 2,
166
- "g" => 1024 ** 3,
167
- "t" => 1024 ** 4,
163
+ "b" => 1,
164
+ "k" => 1024,
165
+ "m" => 1024 ** 2,
166
+ "g" => 1024 ** 3,
167
+ "t" => 1024 ** 4
168
168
  }[to[0]]
169
169
  if scaling_factor > 1
170
170
  sz / scaling_factor.to_f
@@ -181,6 +181,26 @@ module Polars
181
181
  val.all? { |x| x.is_a?(eltype) }
182
182
  end
183
183
 
184
+ def self.is_bool_sequence(val)
185
+ val.is_a?(Array) && val.all? { |x| x == true || x == false }
186
+ end
187
+
188
+ def self.is_dtype_sequence(val)
189
+ val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
190
+ end
191
+
192
+ def self.is_int_sequence(val)
193
+ val.is_a?(Array) && _is_iterable_of(val, Integer)
194
+ end
195
+
196
+ def self.is_expr_sequence(val)
197
+ val.is_a?(Array) && _is_iterable_of(val, Expr)
198
+ end
199
+
200
+ def self.is_rbexpr_sequence(val)
201
+ val.is_a?(Array) && _is_iterable_of(val, RbExpr)
202
+ end
203
+
184
204
  def self.is_str_sequence(val, allow_str: false)
185
205
  if allow_str == false && val.is_a?(String)
186
206
  false
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.1"
3
+ VERSION = "0.2.3"
4
4
  end
data/lib/polars-df.rb CHANGED
@@ -1 +1 @@
1
- require "polars"
1
+ require_relative "polars"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-18 00:00:00.000000000 Z
11
+ date: 2023-01-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  - !ruby/object:Gem::Version
83
83
  version: '0'
84
84
  requirements: []
85
- rubygems_version: 3.4.3
85
+ rubygems_version: 3.4.4
86
86
  signing_key:
87
87
  specification_version: 4
88
88
  summary: Blazingly fast DataFrames for Ruby