polars-df 0.2.1-x86_64-darwin → 0.2.3-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ee93a61f42e0acf492693c5516582437239b66b29a411d49a61e43ee2c640bf0
4
- data.tar.gz: e79a6954afbf9e66bb1b73410abdc750fd5ee7bdca289ae7aa31607608c120f1
3
+ metadata.gz: a67aba6106276247072fe14bcd35b3381e890844ea50321c443cbcc815b84ca3
4
+ data.tar.gz: c5c3388ed010e8946903b7d8ed30fc24ddbf2a34b9eba23d1c178b476faebc7a
5
5
  SHA512:
6
- metadata.gz: 6679b43359387e0f6688c3d96337ffb7408e58296f16524708748feb4c57ff396b439d3a24a79f0c913dd252f920fb3f42be1bc5a0be364ee6aa66c094107ccf
7
- data.tar.gz: 80f69ce23c42d4dd3345bd8364f689af571284ba95770d32be4760a99a9b0eb7cb7b21892c326321c0db33b6bffc57e4a19c989b8044878ef7e42aeffe3d7f68
6
+ metadata.gz: 1343d81460555ed4baa13962ff7316814b64e3c7c6c363c6d3d7b01221bd83abd9ad62b9bd8dffb4bd7dbb30716bae5d5fc20d7830e157631767ac186638bd24
7
+ data.tar.gz: f02495aea654767675444e57ea764cb44e5e34123049cdf541c3b2b47df9d7ddc1dd9bb0499ddda6ee360e3983b0184f35bc6e5fcc28f93e49af3476960770eb
data/CHANGELOG.md CHANGED
@@ -1,3 +1,14 @@
1
+ ## 0.2.3 (2023-01-22)
2
+
3
+ - Fixed error with precompiled gem on Mac ARM
4
+ - Fixed issue with structs
5
+
6
+ ## 0.2.2 (2023-01-20)
7
+
8
+ - Added support for strings to `read_sql` method
9
+ - Improved indexing
10
+ - Fixed error with precompiled gem on Mac ARM
11
+
1
12
  ## 0.2.1 (2023-01-18)
2
13
 
3
14
  - Added `read_sql` method
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.1"
1370
+ version = "0.2.3"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
@@ -1640,6 +1640,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
1640
1640
  IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1641
1641
  DEALINGS IN THE SOFTWARE.
1642
1642
 
1643
+ ================================================================================
1644
+ array-init-cursor LICENSE-MIT
1645
+ ================================================================================
1646
+
1647
+ MIT License
1648
+
1649
+ Copyright (c) 2021 The Planus Project Developers
1650
+
1651
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1652
+ of this software and associated documentation files (the "Software"), to deal
1653
+ in the Software without restriction, including without limitation the rights
1654
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1655
+ copies of the Software, and to permit persons to whom the Software is
1656
+ furnished to do so, subject to the following conditions:
1657
+
1658
+ The above copyright notice and this permission notice shall be included in all
1659
+ copies or substantial portions of the Software.
1660
+
1661
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1662
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1663
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1664
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1665
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1666
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1667
+ SOFTWARE.
1668
+
1669
+
1643
1670
  ================================================================================
1644
1671
  array-init-cursor LICENSE-APACHE
1645
1672
  ================================================================================
@@ -1822,33 +1849,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1822
1849
  END OF TERMS AND CONDITIONS
1823
1850
 
1824
1851
 
1825
- ================================================================================
1826
- array-init-cursor LICENSE-MIT
1827
- ================================================================================
1828
-
1829
- MIT License
1830
-
1831
- Copyright (c) 2021 The Planus Project Developers
1832
-
1833
- Permission is hereby granted, free of charge, to any person obtaining a copy
1834
- of this software and associated documentation files (the "Software"), to deal
1835
- in the Software without restriction, including without limitation the rights
1836
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1837
- copies of the Software, and to permit persons to whom the Software is
1838
- furnished to do so, subject to the following conditions:
1839
-
1840
- The above copyright notice and this permission notice shall be included in all
1841
- copies or substantial portions of the Software.
1842
-
1843
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1844
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1845
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1846
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1847
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1848
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1849
- SOFTWARE.
1850
-
1851
-
1852
1852
  ================================================================================
1853
1853
  arrow-format LICENSE
1854
1854
  ================================================================================
@@ -27103,6 +27103,33 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
27103
27103
  IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27104
27104
  DEALINGS IN THE SOFTWARE.
27105
27105
 
27106
+ ================================================================================
27107
+ planus LICENSE-MIT
27108
+ ================================================================================
27109
+
27110
+ MIT License
27111
+
27112
+ Copyright (c) 2021 The Planus Project Developers
27113
+
27114
+ Permission is hereby granted, free of charge, to any person obtaining a copy
27115
+ of this software and associated documentation files (the "Software"), to deal
27116
+ in the Software without restriction, including without limitation the rights
27117
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27118
+ copies of the Software, and to permit persons to whom the Software is
27119
+ furnished to do so, subject to the following conditions:
27120
+
27121
+ The above copyright notice and this permission notice shall be included in all
27122
+ copies or substantial portions of the Software.
27123
+
27124
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27125
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27126
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27127
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27128
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27129
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27130
+ SOFTWARE.
27131
+
27132
+
27106
27133
  ================================================================================
27107
27134
  planus LICENSE-APACHE
27108
27135
  ================================================================================
@@ -27285,33 +27312,6 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
27285
27312
  END OF TERMS AND CONDITIONS
27286
27313
 
27287
27314
 
27288
- ================================================================================
27289
- planus LICENSE-MIT
27290
- ================================================================================
27291
-
27292
- MIT License
27293
-
27294
- Copyright (c) 2021 The Planus Project Developers
27295
-
27296
- Permission is hereby granted, free of charge, to any person obtaining a copy
27297
- of this software and associated documentation files (the "Software"), to deal
27298
- in the Software without restriction, including without limitation the rights
27299
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27300
- copies of the Software, and to permit persons to whom the Software is
27301
- furnished to do so, subject to the following conditions:
27302
-
27303
- The above copyright notice and this permission notice shall be included in all
27304
- copies or substantial portions of the Software.
27305
-
27306
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27307
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27308
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27309
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27310
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27311
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27312
- SOFTWARE.
27313
-
27314
-
27315
27315
  ================================================================================
27316
27316
  polars LICENSE
27317
27317
  ================================================================================
@@ -30064,6 +30064,32 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30064
30064
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30065
30065
  SOFTWARE.
30066
30066
 
30067
+ ================================================================================
30068
+ rb-sys-env LICENSE-MIT
30069
+ ================================================================================
30070
+
30071
+ The MIT License (MIT)
30072
+
30073
+ Copyright (c) 2021-2022 Ian Ker-Seymer
30074
+
30075
+ Permission is hereby granted, free of charge, to any person obtaining a copy
30076
+ of this software and associated documentation files (the "Software"), to deal
30077
+ in the Software without restriction, including without limitation the rights
30078
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30079
+ copies of the Software, and to permit persons to whom the Software is
30080
+ furnished to do so, subject to the following conditions:
30081
+
30082
+ The above copyright notice and this permission notice shall be included in all
30083
+ copies or substantial portions of the Software.
30084
+
30085
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30086
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30087
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30088
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30089
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30090
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30091
+ SOFTWARE.
30092
+
30067
30093
  ================================================================================
30068
30094
  rb-sys-env LICENSE-APACHE
30069
30095
  ================================================================================
@@ -30259,32 +30285,6 @@ rb-sys-env LICENSE-APACHE
30259
30285
  See the License for the specific language governing permissions and
30260
30286
  limitations under the License.
30261
30287
 
30262
- ================================================================================
30263
- rb-sys-env LICENSE-MIT
30264
- ================================================================================
30265
-
30266
- The MIT License (MIT)
30267
-
30268
- Copyright (c) 2021-2022 Ian Ker-Seymer
30269
-
30270
- Permission is hereby granted, free of charge, to any person obtaining a copy
30271
- of this software and associated documentation files (the "Software"), to deal
30272
- in the Software without restriction, including without limitation the rights
30273
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30274
- copies of the Software, and to permit persons to whom the Software is
30275
- furnished to do so, subject to the following conditions:
30276
-
30277
- The above copyright notice and this permission notice shall be included in all
30278
- copies or substantial portions of the Software.
30279
-
30280
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30281
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30282
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30283
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30284
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30285
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30286
- SOFTWARE.
30287
-
30288
30288
  ================================================================================
30289
30289
  regex LICENSE-APACHE
30290
30290
  ================================================================================
data/README.md CHANGED
@@ -41,6 +41,9 @@ From a CSV
41
41
 
42
42
  ```ruby
43
43
  Polars.read_csv("file.csv")
44
+
45
+ # or lazily with
46
+ Polars.scan_csv("file.csv")
44
47
  ```
45
48
 
46
49
  From Parquet
@@ -53,6 +56,8 @@ From Active Record
53
56
 
54
57
  ```ruby
55
58
  Polars.read_sql(User.all)
59
+ # or
60
+ Polars.read_sql("SELECT * FROM users")
56
61
  ```
57
62
 
58
63
  From a hash
@@ -135,9 +140,9 @@ df[Polars.col("a") <= 2]
135
140
  And, or, and exclusive or
136
141
 
137
142
  ```ruby
138
- df[(Polars.col("a") > 100) & (Polars.col("b") == "one")] # and
139
- df[(Polars.col("a") > 100) | (Polars.col("b") == "one")] # or
140
- df[(Polars.col("a") > 100) ^ (Polars.col("b") == "one")] # xor
143
+ df[(Polars.col("a") > 1) & (Polars.col("b") == "two")] # and
144
+ df[(Polars.col("a") > 1) | (Polars.col("b") == "two")] # or
145
+ df[(Polars.col("a") > 1) ^ (Polars.col("b") == "two")] # xor
141
146
  ```
142
147
 
143
148
  ## Operations
@@ -284,13 +289,13 @@ CSV
284
289
  ```ruby
285
290
  df.to_csv
286
291
  # or
287
- df.write_csv("data.csv")
292
+ df.write_csv("file.csv")
288
293
  ```
289
294
 
290
295
  Parquet
291
296
 
292
297
  ```ruby
293
- df.write_parquet("data.parquet")
298
+ df.write_parquet("file.parquet")
294
299
  ```
295
300
 
296
301
  ## Types
Binary file
Binary file
Binary file
@@ -277,6 +277,7 @@ module Polars
277
277
  _df.height
278
278
  end
279
279
  alias_method :count, :height
280
+ alias_method :length, :height
280
281
 
281
282
  # Get the width of the DataFrame.
282
283
  #
@@ -541,7 +542,7 @@ module Polars
541
542
 
542
543
  if col_selection.is_a?(Array)
543
544
  # df[.., [1, 2]]
544
- if is_int_sequence(col_selection)
545
+ if Utils.is_int_sequence(col_selection)
545
546
  series_list = col_selection.map { |i| to_series(i) }
546
547
  df = self.class.new(series_list)
547
548
  return df[row_selection]
@@ -574,6 +575,23 @@ module Polars
574
575
  # df[["foo", "bar"]]
575
576
  return _from_rbdf(_df.select(item))
576
577
  end
578
+
579
+ if Utils.is_int_sequence(item)
580
+ item = Series.new("", item)
581
+ end
582
+
583
+ if item.is_a?(Series)
584
+ dtype = item.dtype
585
+ if dtype == Utf8
586
+ return _from_rbdf(_df.select(item))
587
+ elsif dtype == UInt32
588
+ return _from_rbdf(_df.take_with_series(item._s))
589
+ elsif [UInt8, UInt16, UInt64, Int8, Int16, Int32, Int64].include?(dtype)
590
+ return _from_rbdf(
591
+ _df.take_with_series(_pos_idxs(item, 0)._s)
592
+ )
593
+ end
594
+ end
577
595
  end
578
596
 
579
597
  # Ruby-specific
@@ -4662,8 +4680,53 @@ module Polars
4662
4680
  end
4663
4681
  end
4664
4682
 
4665
- # def _pos_idxs
4666
- # end
4683
+ def _pos_idxs(idxs, dim)
4684
+ idx_type = Polars._get_idx_type
4685
+
4686
+ if idxs.is_a?(Series)
4687
+ if idxs.dtype == idx_type
4688
+ return idxs
4689
+ end
4690
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
4691
+ if idx_type == UInt32
4692
+ if [Int64, UInt64].include?(idxs.dtype)
4693
+ if idxs.max >= 2**32
4694
+ raise ArgumentError, "Index positions should be smaller than 2^32."
4695
+ end
4696
+ end
4697
+ if idxs.dtype == Int64
4698
+ if idxs.min < -(2**32)
4699
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
4700
+ end
4701
+ end
4702
+ end
4703
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
4704
+ if idxs.min < 0
4705
+ if idx_type == UInt32
4706
+ if [Int8, Int16].include?(idxs.dtype)
4707
+ idxs = idxs.cast(Int32)
4708
+ end
4709
+ else
4710
+ if [Int8, Int16, Int32].include?(idxs.dtype)
4711
+ idxs = idxs.cast(Int64)
4712
+ end
4713
+ end
4714
+
4715
+ idxs =
4716
+ Polars.select(
4717
+ Polars.when(Polars.lit(idxs) < 0)
4718
+ .then(shape[dim] + Polars.lit(idxs))
4719
+ .otherwise(Polars.lit(idxs))
4720
+ ).to_series
4721
+ end
4722
+ end
4723
+
4724
+ return idxs.cast(idx_type)
4725
+ end
4726
+ end
4727
+
4728
+ raise ArgumentError, "Unsupported idxs datatype."
4729
+ end
4667
4730
 
4668
4731
  # @private
4669
4732
  def self.hash_to_rbdf(data, columns: nil)
@@ -4683,7 +4746,14 @@ module Polars
4683
4746
  end
4684
4747
 
4685
4748
  # @private
4686
- def self._unpack_columns(columns, lookup_names: nil, n_expected: nil)
4749
+ def self.include_unknowns(schema, cols)
4750
+ cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
4751
+ end
4752
+
4753
+ # @private
4754
+ def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
4755
+ raise Todo if schema_overrides
4756
+
4687
4757
  if columns.is_a?(Hash)
4688
4758
  columns = columns.to_a
4689
4759
  end
@@ -4727,8 +4797,48 @@ module Polars
4727
4797
  end
4728
4798
  end
4729
4799
 
4800
+ def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
4801
+ rbdf_columns = rbdf.columns
4802
+ rbdf_dtypes = rbdf.dtypes
4803
+ columns, dtypes = _unpack_columns(
4804
+ (columns || rbdf_columns), schema_overrides: schema_overrides
4805
+ )
4806
+ column_subset = []
4807
+ if columns != rbdf_columns
4808
+ if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
4809
+ column_subset = columns
4810
+ else
4811
+ rbdf.set_column_names(columns)
4812
+ end
4813
+ end
4814
+
4815
+ column_casts = []
4816
+ columns.each do |col, i|
4817
+ if dtypes[col] == Categorical # != rbdf_dtypes[i]
4818
+ column_casts << Polars.col(col).cast(Categorical)._rbexpr
4819
+ elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
4820
+ column_casts << Polars.col(col).cast(structs[col])._rbexpr
4821
+ elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
4822
+ column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
4823
+ end
4824
+ end
4825
+
4826
+ if column_casts.any? || column_subset.any?
4827
+ rbdf = rbdf.lazy
4828
+ if column_casts.any?
4829
+ rbdf = rbdf.with_columns(column_casts)
4830
+ end
4831
+ if column_subset.any?
4832
+ rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
4833
+ end
4834
+ rbdf = rbdf.collect
4835
+ end
4836
+
4837
+ rbdf
4838
+ end
4839
+
4730
4840
  # @private
4731
- def self.sequence_to_rbdf(data, columns: nil, orient: nil)
4841
+ def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
4732
4842
  if data.length == 0
4733
4843
  return hash_to_rbdf({}, columns: columns)
4734
4844
  end
@@ -4740,6 +4850,14 @@ module Polars
4740
4850
  data.each do |s|
4741
4851
  data_series << s._s
4742
4852
  end
4853
+ elsif data[0].is_a?(Hash)
4854
+ column_names, dtypes = _unpack_columns(columns)
4855
+ schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
4856
+ rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
4857
+ if column_names
4858
+ rbdf = _post_apply_columns(rbdf, column_names)
4859
+ end
4860
+ return rbdf
4743
4861
  elsif data[0].is_a?(Array)
4744
4862
  if orient.nil? && !columns.nil?
4745
4863
  orient = columns.length == data.length ? "col" : "row"
@@ -3,44 +3,64 @@ module Polars
3
3
  class DataType
4
4
  end
5
5
 
6
+ # Base class for numeric data types.
7
+ class NumericType < DataType
8
+ end
9
+
10
+ # Base class for integral data types.
11
+ class IntegralType < NumericType
12
+ end
13
+
14
+ # Base class for fractional data types.
15
+ class FractionalType < NumericType
16
+ end
17
+
18
+ # Base class for temporal data types.
19
+ class TemporalType < DataType
20
+ end
21
+
22
+ # Base class for nested data types.
23
+ class NestedType < DataType
24
+ end
25
+
6
26
  # 8-bit signed integer type.
7
- class Int8 < DataType
27
+ class Int8 < IntegralType
8
28
  end
9
29
 
10
30
  # 16-bit signed integer type.
11
- class Int16 < DataType
31
+ class Int16 < IntegralType
12
32
  end
13
33
 
14
34
  # 32-bit signed integer type.
15
- class Int32 < DataType
35
+ class Int32 < IntegralType
16
36
  end
17
37
 
18
38
  # 64-bit signed integer type.
19
- class Int64 < DataType
39
+ class Int64 < IntegralType
20
40
  end
21
41
 
22
42
  # 8-bit unsigned integer type.
23
- class UInt8 < DataType
43
+ class UInt8 < IntegralType
24
44
  end
25
45
 
26
46
  # 16-bit unsigned integer type.
27
- class UInt16 < DataType
47
+ class UInt16 < IntegralType
28
48
  end
29
49
 
30
50
  # 32-bit unsigned integer type.
31
- class UInt32 < DataType
51
+ class UInt32 < IntegralType
32
52
  end
33
53
 
34
54
  # 64-bit unsigned integer type.
35
- class UInt64 < DataType
55
+ class UInt64 < IntegralType
36
56
  end
37
57
 
38
58
  # 32-bit floating point type.
39
- class Float32 < DataType
59
+ class Float32 < FractionalType
40
60
  end
41
61
 
42
62
  # 64-bit floating point type.
43
- class Float64 < DataType
63
+ class Float64 < FractionalType
44
64
  end
45
65
 
46
66
  # Boolean type.
@@ -51,31 +71,19 @@ module Polars
51
71
  class Utf8 < DataType
52
72
  end
53
73
 
54
- # Binary type.
55
- class Binary < DataType
56
- end
57
-
58
- # Type representing Null / None values.
59
- class Null < DataType
60
- end
61
-
62
- # Type representing Datatype values that could not be determined statically.
63
- class Unknown < DataType
64
- end
65
-
66
74
  # Nested list/array type.
67
- class List < DataType
75
+ class List < NestedType
68
76
  def initialize(inner)
69
77
  @inner = Utils.rb_type_to_dtype(inner)
70
78
  end
71
79
  end
72
80
 
73
81
  # Calendar date type.
74
- class Date < DataType
82
+ class Date < TemporalType
75
83
  end
76
84
 
77
85
  # Calendar date and time type.
78
- class Datetime < DataType
86
+ class Datetime < TemporalType
79
87
  def initialize(time_unit = "us", time_zone = nil)
80
88
  @tu = time_unit || "us"
81
89
  @time_zone = time_zone
@@ -83,14 +91,14 @@ module Polars
83
91
  end
84
92
 
85
93
  # Time duration/delta type.
86
- class Duration < DataType
94
+ class Duration < TemporalType
87
95
  def initialize(time_unit = "us")
88
96
  @tu = time_unit
89
97
  end
90
98
  end
91
99
 
92
100
  # Time of day type.
93
- class Time < DataType
101
+ class Time < TemporalType
94
102
  end
95
103
 
96
104
  # Type for wrapping arbitrary Ruby objects.
@@ -102,15 +110,24 @@ module Polars
102
110
  end
103
111
 
104
112
  # Definition of a single field within a `Struct` DataType.
105
- class Field < DataType
113
+ class Field
114
+ attr_reader :name, :dtype
115
+
106
116
  def initialize(name, dtype)
107
117
  @name = name
108
118
  @dtype = Utils.rb_type_to_dtype(dtype)
109
119
  end
120
+
121
+ def inspect
122
+ class_name = self.class.name
123
+ "#{class_name}(#{@name}: #{@dtype})"
124
+ end
110
125
  end
111
126
 
112
127
  # Struct composite type.
113
- class Struct < DataType
128
+ class Struct < NestedType
129
+ attr_reader :fields
130
+
114
131
  def initialize(fields)
115
132
  if fields.is_a?(Hash)
116
133
  @fields = fields.map { |n, d| Field.new(n, d) }
@@ -118,5 +135,26 @@ module Polars
118
135
  @fields = fields
119
136
  end
120
137
  end
138
+
139
+ def inspect
140
+ class_name = self.class.name
141
+ "#{class_name}(#{@fields})"
142
+ end
143
+
144
+ def to_schema
145
+ @fields.to_h { |f| [f.name, f.dtype] }
146
+ end
147
+ end
148
+
149
+ # Binary type.
150
+ class Binary < DataType
151
+ end
152
+
153
+ # Type representing Null / None values.
154
+ class Null < DataType
155
+ end
156
+
157
+ # Type representing Datatype values that could not be determined statically.
158
+ class Unknown < DataType
121
159
  end
122
160
  end
data/lib/polars/io.rb CHANGED
@@ -606,8 +606,10 @@ module Polars
606
606
  sql
607
607
  elsif sql.is_a?(ActiveRecord::Relation)
608
608
  sql.connection.select_all(sql.to_sql)
609
+ elsif sql.is_a?(String)
610
+ ActiveRecord::Base.connection.select_all(sql)
609
611
  else
610
- raise ArgumentError, "Expected ActiveRecord::Relation or ActiveRecord::Result"
612
+ raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
611
613
  end
612
614
  data = {}
613
615
  result.columns.each_with_index do |k, i|
data/lib/polars/series.rb CHANGED
@@ -263,6 +263,10 @@ module Polars
263
263
  #
264
264
  # @return [Object]
265
265
  def [](item)
266
+ if item.is_a?(Series) && [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64].include?(item.dtype)
267
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(item)._s))
268
+ end
269
+
266
270
  if item.is_a?(Integer)
267
271
  return _s.get_idx(item)
268
272
  end
@@ -271,6 +275,10 @@ module Polars
271
275
  return Slice.new(self).apply(item)
272
276
  end
273
277
 
278
+ if Utils.is_int_sequence(item)
279
+ return Utils.wrap_s(_s.take_with_series(_pos_idxs(Series.new("", item))._s))
280
+ end
281
+
274
282
  raise ArgumentError, "Cannot get item of type: #{item.class.name}"
275
283
  end
276
284
 
@@ -287,24 +295,23 @@ module Polars
287
295
  end
288
296
 
289
297
  if key.is_a?(Series)
290
- if key.dtype == :bool
298
+ if key.dtype == Boolean
291
299
  self._s = set(key, value)._s
292
- elsif key.dtype == :u64
293
- self._s = set_at_idx(key.cast(:u32), value)._s
294
- elsif key.dtype == :u32
300
+ elsif key.dtype == UInt64
301
+ self._s = set_at_idx(key.cast(UInt32), value)._s
302
+ elsif key.dtype == UInt32
295
303
  self._s = set_at_idx(key, value)._s
296
304
  else
297
305
  raise Todo
298
306
  end
299
- end
300
-
301
- if key.is_a?(Array)
302
- s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
307
+ elsif key.is_a?(Array)
308
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
309
+ self[s] = value
310
+ elsif key.is_a?(Range)
311
+ s = Series.new("", key, dtype: UInt32)
303
312
  self[s] = value
304
313
  elsif key.is_a?(Integer)
305
- # TODO fix
306
- # self[[key]] = value
307
- set_at_idx(key, value)
314
+ self[[key]] = value
308
315
  else
309
316
  raise ArgumentError, "cannot use #{key} for indexing"
310
317
  end
@@ -3527,6 +3534,59 @@ module Polars
3527
3534
  end
3528
3535
  end
3529
3536
 
3537
+ def _pos_idxs(idxs)
3538
+ idx_type = Polars._get_idx_type
3539
+
3540
+ if idxs.is_a?(Series)
3541
+ if idxs.dtype == idx_type
3542
+ return idxs
3543
+ end
3544
+ if [UInt8, UInt16, idx_type == UInt32 ? UInt64 : UInt32, Int8, Int16, Int32, Int64].include?(idxs.dtype)
3545
+ if idx_type == UInt32
3546
+ if [Int64, UInt64].include?(idxs.dtype)
3547
+ if idxs.max >= 2**32
3548
+ raise ArgumentError, "Index positions should be smaller than 2^32."
3549
+ end
3550
+ end
3551
+ if idxs.dtype == Int64
3552
+ if idxs.min < -(2**32)
3553
+ raise ArgumentError, "Index positions should be bigger than -2^32 + 1."
3554
+ end
3555
+ end
3556
+ end
3557
+ if [Int8, Int16, Int32, Int64].include?(idxs.dtype)
3558
+ if idxs.min < 0
3559
+ if idx_type == UInt32
3560
+ if [Int8, Int16].include?(idxs.dtype)
3561
+ idxs = idxs.cast(Int32)
3562
+ end
3563
+ else
3564
+ if [Int8, Int16, Int32].include?(idxs.dtype)
3565
+ idxs = idxs.cast(Int64)
3566
+ end
3567
+ end
3568
+
3569
+ # Update negative indexes to absolute indexes.
3570
+ return (
3571
+ idxs.to_frame
3572
+ .select(
3573
+ Polars.when(Polars.col(idxs.name) < 0)
3574
+ .then(len + Polars.col(idxs.name))
3575
+ .otherwise(Polars.col(idxs.name))
3576
+ .cast(idx_type)
3577
+ )
3578
+ .to_series(0)
3579
+ )
3580
+ end
3581
+ end
3582
+
3583
+ return idxs.cast(idx_type)
3584
+ end
3585
+ end
3586
+
3587
+ raise ArgumentError, "Unsupported idxs datatype."
3588
+ end
3589
+
3530
3590
  def _comp(other, op)
3531
3591
  if other.is_a?(Series)
3532
3592
  return Utils.wrap_s(_s.send(op, other._s))
@@ -3607,6 +3667,11 @@ module Polars
3607
3667
  rb_temporal_types << ::Time if defined?(::Time)
3608
3668
 
3609
3669
  value = _get_first_non_none(values)
3670
+ if !value.nil?
3671
+ if value.is_a?(Hash)
3672
+ return DataFrame.new(values).to_struct(name)._s
3673
+ end
3674
+ end
3610
3675
 
3611
3676
  if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3612
3677
  constructor = polars_type_to_constructor(dtype)
data/lib/polars/slice.rb CHANGED
@@ -56,7 +56,7 @@ module Polars
56
56
  # Normalize slice bounds, identify unbounded and/or zero-length slices.
57
57
  def _slice_setup(s)
58
58
  # can normalize slice indices as we know object size
59
- obj_len = @obj.len
59
+ obj_len = @obj.length
60
60
  start = if s.begin
61
61
  if s.begin < 0
62
62
  [s.begin + obj_len, 0].max
data/lib/polars/utils.rb CHANGED
@@ -160,11 +160,11 @@ module Polars
160
160
 
161
161
  def self.scale_bytes(sz, to:)
162
162
  scaling_factor = {
163
- "b" => 1,
164
- "k" => 1024,
165
- "m" => 1024 ** 2,
166
- "g" => 1024 ** 3,
167
- "t" => 1024 ** 4,
163
+ "b" => 1,
164
+ "k" => 1024,
165
+ "m" => 1024 ** 2,
166
+ "g" => 1024 ** 3,
167
+ "t" => 1024 ** 4
168
168
  }[to[0]]
169
169
  if scaling_factor > 1
170
170
  sz / scaling_factor.to_f
@@ -181,6 +181,26 @@ module Polars
181
181
  val.all? { |x| x.is_a?(eltype) }
182
182
  end
183
183
 
184
+ def self.is_bool_sequence(val)
185
+ val.is_a?(Array) && val.all? { |x| x == true || x == false }
186
+ end
187
+
188
+ def self.is_dtype_sequence(val)
189
+ val.is_a?(Array) && val.all? { |x| is_polars_dtype(x) }
190
+ end
191
+
192
+ def self.is_int_sequence(val)
193
+ val.is_a?(Array) && _is_iterable_of(val, Integer)
194
+ end
195
+
196
+ def self.is_expr_sequence(val)
197
+ val.is_a?(Array) && _is_iterable_of(val, Expr)
198
+ end
199
+
200
+ def self.is_rbexpr_sequence(val)
201
+ val.is_a?(Array) && _is_iterable_of(val, RbExpr)
202
+ end
203
+
184
204
  def self.is_str_sequence(val, allow_str: false)
185
205
  if allow_str == false && val.is_a?(String)
186
206
  false
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.1"
3
+ VERSION = "0.2.3"
4
4
  end
data/lib/polars-df.rb CHANGED
@@ -1 +1 @@
1
- require "polars"
1
+ require_relative "polars"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-18 00:00:00.000000000 Z
11
+ date: 2023-01-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
82
  - !ruby/object:Gem::Version
83
83
  version: '0'
84
84
  requirements: []
85
- rubygems_version: 3.4.3
85
+ rubygems_version: 3.4.4
86
86
  signing_key:
87
87
  specification_version: 4
88
88
  summary: Blazingly fast DataFrames for Ruby