polars-df 0.2.2-x86_64-linux → 0.2.4-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6cc0fce7d6a3a5a5e6aad0422b6d6fc5f74894ca27881de1ea363c8c5ac77290
4
- data.tar.gz: bf2921df3c70489b0be71e4e8043fa0cff4b9f366f1f4744c261bfc8dacc1c76
3
+ metadata.gz: fe562be02f1336fcbf58709c43602a6779f36f0e1cf39db78f1c40e9f4833000
4
+ data.tar.gz: 3fa635450e132ad73b593ec6b7e8ed93fca249cb8762ba70a6e453d0fac33b50
5
5
  SHA512:
6
- metadata.gz: 9670316d6dc4483de1c16e934a507399b332d48ef779ccb11b19ae35761f56bcb34bba87b2a77855bad788cddbb5f76f0afb1eb8ec593bef5eff99d69e769437
7
- data.tar.gz: 231985664f2e28e965751aa6a6b0c518d206caf084c2ce464dfbf4230420df07beb5465a49e0538f5004d805f9a269883980cf8a999858488be3dce11f24bee2
6
+ metadata.gz: 6c085151acab410060f8a0cf7a3fa768b6daf4ca6584293fd54677415850e15b7e7622dfa00a9fca7f3647bf1a5d035d37057859b31edee28401af4b37eab5ea
7
+ data.tar.gz: 8c5d6720af0f7b8a23e74e3c7686a60e1f477b282e0b02c112fe5acf365bc5c9bf9b9a96aa06279674304c16152fb147ef7203031a542b61e5b4c1c8f59ccdb2
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.2.4 (2023-01-29)
2
+
3
+ - Added support for more types when creating a data frame from an array of hashes
4
+
5
+ ## 0.2.3 (2023-01-22)
6
+
7
+ - Fixed error with precompiled gem on Mac ARM
8
+ - Fixed issue with structs
9
+
1
10
  ## 0.2.2 (2023-01-20)
2
11
 
3
12
  - Added support for strings to `read_sql` method
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.2"
1370
+ version = "0.2.4"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
@@ -1653,18 +1653,18 @@ dependencies = [
1653
1653
 
1654
1654
  [[package]]
1655
1655
  name = "rb-sys"
1656
- version = "0.9.56"
1656
+ version = "0.9.58"
1657
1657
  source = "registry+https://github.com/rust-lang/crates.io-index"
1658
- checksum = "ef82428221475c6f9e7893fe30b88d45ac86bdb12e58e7c92055ba4bceb78a69"
1658
+ checksum = "0158f5115e1ad04a2ee231f597e86306af96f36a8b93ac0c01f8852d0ba89278"
1659
1659
  dependencies = [
1660
1660
  "rb-sys-build",
1661
1661
  ]
1662
1662
 
1663
1663
  [[package]]
1664
1664
  name = "rb-sys-build"
1665
- version = "0.9.56"
1665
+ version = "0.9.58"
1666
1666
  source = "registry+https://github.com/rust-lang/crates.io-index"
1667
- checksum = "950bfc239d2e7704576abe4d37b008876bbfd70a99196a188c5caeae2ba7344a"
1667
+ checksum = "6c27b779db4a2863db74ddad0011f0d0c55c528e9601126d4613ad688063bc05"
1668
1668
  dependencies = [
1669
1669
  "bindgen",
1670
1670
  "regex",
@@ -1673,9 +1673,9 @@ dependencies = [
1673
1673
 
1674
1674
  [[package]]
1675
1675
  name = "rb-sys-env"
1676
- version = "0.1.1"
1676
+ version = "0.1.2"
1677
1677
  source = "registry+https://github.com/rust-lang/crates.io-index"
1678
- checksum = "74c38752410925faeb82c400c06ba2fd9ee6aa8f719dd33994c9e53f5242d25f"
1678
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
1679
1679
 
1680
1680
  [[package]]
1681
1681
  name = "redox_syscall"
@@ -586,15 +586,15 @@ rayon-core v1.10.1
586
586
  https://github.com/rayon-rs/rayon
587
587
  MIT OR Apache-2.0
588
588
 
589
- rb-sys v0.9.56
589
+ rb-sys v0.9.58
590
590
  https://github.com/oxidize-rb/rb-sys
591
591
  MIT OR Apache-2.0
592
592
 
593
- rb-sys-build v0.9.56
593
+ rb-sys-build v0.9.58
594
594
  https://github.com/oxidize-rb/rb-sys
595
595
  MIT OR Apache-2.0
596
596
 
597
- rb-sys-env v0.1.1
597
+ rb-sys-env v0.1.2
598
598
  https://github.com/oxidize-rb/rb-sys
599
599
  MIT OR Apache-2.0
600
600
 
@@ -30036,32 +30036,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30036
30036
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30037
30037
  SOFTWARE.
30038
30038
 
30039
- ================================================================================
30040
- rb-sys-env LICENSE-MIT
30041
- ================================================================================
30042
-
30043
- The MIT License (MIT)
30044
-
30045
- Copyright (c) 2021-2022 Ian Ker-Seymer
30046
-
30047
- Permission is hereby granted, free of charge, to any person obtaining a copy
30048
- of this software and associated documentation files (the "Software"), to deal
30049
- in the Software without restriction, including without limitation the rights
30050
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30051
- copies of the Software, and to permit persons to whom the Software is
30052
- furnished to do so, subject to the following conditions:
30053
-
30054
- The above copyright notice and this permission notice shall be included in all
30055
- copies or substantial portions of the Software.
30056
-
30057
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30058
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30059
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30060
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30061
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30062
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30063
- SOFTWARE.
30064
-
30065
30039
  ================================================================================
30066
30040
  rb-sys-env LICENSE-APACHE
30067
30041
  ================================================================================
@@ -30257,6 +30231,32 @@ rb-sys-env LICENSE-APACHE
30257
30231
  See the License for the specific language governing permissions and
30258
30232
  limitations under the License.
30259
30233
 
30234
+ ================================================================================
30235
+ rb-sys-env LICENSE-MIT
30236
+ ================================================================================
30237
+
30238
+ The MIT License (MIT)
30239
+
30240
+ Copyright (c) 2021-2022 Ian Ker-Seymer
30241
+
30242
+ Permission is hereby granted, free of charge, to any person obtaining a copy
30243
+ of this software and associated documentation files (the "Software"), to deal
30244
+ in the Software without restriction, including without limitation the rights
30245
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
30246
+ copies of the Software, and to permit persons to whom the Software is
30247
+ furnished to do so, subject to the following conditions:
30248
+
30249
+ The above copyright notice and this permission notice shall be included in all
30250
+ copies or substantial portions of the Software.
30251
+
30252
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30253
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30254
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30255
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30256
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30257
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30258
+ SOFTWARE.
30259
+
30260
30260
  ================================================================================
30261
30261
  regex LICENSE-APACHE
30262
30262
  ================================================================================
data/README.md CHANGED
@@ -56,6 +56,8 @@ From Active Record
56
56
 
57
57
  ```ruby
58
58
  Polars.read_sql(User.all)
59
+ # or
60
+ Polars.read_sql("SELECT * FROM users")
59
61
  ```
60
62
 
61
63
  From a hash
@@ -67,6 +69,16 @@ Polars::DataFrame.new({
67
69
  })
68
70
  ```
69
71
 
72
+ From an array of hashes
73
+
74
+ ```ruby
75
+ Polars::DataFrame.new([
76
+ {"a" => 1, "b" => "one"},
77
+ {"a" => 2, "b" => "two"},
78
+ {"a" => 3, "b" => "three"}
79
+ ])
80
+ ```
81
+
70
82
  From an array of series
71
83
 
72
84
  ```ruby
@@ -287,13 +299,13 @@ CSV
287
299
  ```ruby
288
300
  df.to_csv
289
301
  # or
290
- df.write_csv("data.csv")
302
+ df.write_csv("file.csv")
291
303
  ```
292
304
 
293
305
  Parquet
294
306
 
295
307
  ```ruby
296
- df.write_parquet("data.parquet")
308
+ df.write_parquet("file.parquet")
297
309
  ```
298
310
 
299
311
  ## Types
Binary file
Binary file
Binary file
@@ -4746,7 +4746,14 @@ module Polars
4746
4746
  end
4747
4747
 
4748
4748
  # @private
4749
- def self._unpack_columns(columns, lookup_names: nil, n_expected: nil)
4749
+ def self.include_unknowns(schema, cols)
4750
+ cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
4751
+ end
4752
+
4753
+ # @private
4754
+ def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
4755
+ raise Todo if schema_overrides
4756
+
4750
4757
  if columns.is_a?(Hash)
4751
4758
  columns = columns.to_a
4752
4759
  end
@@ -4790,8 +4797,48 @@ module Polars
4790
4797
  end
4791
4798
  end
4792
4799
 
4800
+ def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
4801
+ rbdf_columns = rbdf.columns
4802
+ rbdf_dtypes = rbdf.dtypes
4803
+ columns, dtypes = _unpack_columns(
4804
+ (columns || rbdf_columns), schema_overrides: schema_overrides
4805
+ )
4806
+ column_subset = []
4807
+ if columns != rbdf_columns
4808
+ if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
4809
+ column_subset = columns
4810
+ else
4811
+ rbdf.set_column_names(columns)
4812
+ end
4813
+ end
4814
+
4815
+ column_casts = []
4816
+ columns.each do |col, i|
4817
+ if dtypes[col] == Categorical # != rbdf_dtypes[i]
4818
+ column_casts << Polars.col(col).cast(Categorical)._rbexpr
4819
+ elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
4820
+ column_casts << Polars.col(col).cast(structs[col])._rbexpr
4821
+ elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
4822
+ column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
4823
+ end
4824
+ end
4825
+
4826
+ if column_casts.any? || column_subset.any?
4827
+ rbdf = rbdf.lazy
4828
+ if column_casts.any?
4829
+ rbdf = rbdf.with_columns(column_casts)
4830
+ end
4831
+ if column_subset.any?
4832
+ rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
4833
+ end
4834
+ rbdf = rbdf.collect
4835
+ end
4836
+
4837
+ rbdf
4838
+ end
4839
+
4793
4840
  # @private
4794
- def self.sequence_to_rbdf(data, columns: nil, orient: nil)
4841
+ def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
4795
4842
  if data.length == 0
4796
4843
  return hash_to_rbdf({}, columns: columns)
4797
4844
  end
@@ -4803,6 +4850,14 @@ module Polars
4803
4850
  data.each do |s|
4804
4851
  data_series << s._s
4805
4852
  end
4853
+ elsif data[0].is_a?(Hash)
4854
+ column_names, dtypes = _unpack_columns(columns)
4855
+ schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
4856
+ rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
4857
+ if column_names
4858
+ rbdf = _post_apply_columns(rbdf, column_names)
4859
+ end
4860
+ return rbdf
4806
4861
  elsif data[0].is_a?(Array)
4807
4862
  if orient.nil? && !columns.nil?
4808
4863
  orient = columns.length == data.length ? "col" : "row"
@@ -3,44 +3,64 @@ module Polars
3
3
  class DataType
4
4
  end
5
5
 
6
+ # Base class for numeric data types.
7
+ class NumericType < DataType
8
+ end
9
+
10
+ # Base class for integral data types.
11
+ class IntegralType < NumericType
12
+ end
13
+
14
+ # Base class for fractional data types.
15
+ class FractionalType < NumericType
16
+ end
17
+
18
+ # Base class for temporal data types.
19
+ class TemporalType < DataType
20
+ end
21
+
22
+ # Base class for nested data types.
23
+ class NestedType < DataType
24
+ end
25
+
6
26
  # 8-bit signed integer type.
7
- class Int8 < DataType
27
+ class Int8 < IntegralType
8
28
  end
9
29
 
10
30
  # 16-bit signed integer type.
11
- class Int16 < DataType
31
+ class Int16 < IntegralType
12
32
  end
13
33
 
14
34
  # 32-bit signed integer type.
15
- class Int32 < DataType
35
+ class Int32 < IntegralType
16
36
  end
17
37
 
18
38
  # 64-bit signed integer type.
19
- class Int64 < DataType
39
+ class Int64 < IntegralType
20
40
  end
21
41
 
22
42
  # 8-bit unsigned integer type.
23
- class UInt8 < DataType
43
+ class UInt8 < IntegralType
24
44
  end
25
45
 
26
46
  # 16-bit unsigned integer type.
27
- class UInt16 < DataType
47
+ class UInt16 < IntegralType
28
48
  end
29
49
 
30
50
  # 32-bit unsigned integer type.
31
- class UInt32 < DataType
51
+ class UInt32 < IntegralType
32
52
  end
33
53
 
34
54
  # 64-bit unsigned integer type.
35
- class UInt64 < DataType
55
+ class UInt64 < IntegralType
36
56
  end
37
57
 
38
58
  # 32-bit floating point type.
39
- class Float32 < DataType
59
+ class Float32 < FractionalType
40
60
  end
41
61
 
42
62
  # 64-bit floating point type.
43
- class Float64 < DataType
63
+ class Float64 < FractionalType
44
64
  end
45
65
 
46
66
  # Boolean type.
@@ -51,31 +71,19 @@ module Polars
51
71
  class Utf8 < DataType
52
72
  end
53
73
 
54
- # Binary type.
55
- class Binary < DataType
56
- end
57
-
58
- # Type representing Null / None values.
59
- class Null < DataType
60
- end
61
-
62
- # Type representing Datatype values that could not be determined statically.
63
- class Unknown < DataType
64
- end
65
-
66
74
  # Nested list/array type.
67
- class List < DataType
75
+ class List < NestedType
68
76
  def initialize(inner)
69
77
  @inner = Utils.rb_type_to_dtype(inner)
70
78
  end
71
79
  end
72
80
 
73
81
  # Calendar date type.
74
- class Date < DataType
82
+ class Date < TemporalType
75
83
  end
76
84
 
77
85
  # Calendar date and time type.
78
- class Datetime < DataType
86
+ class Datetime < TemporalType
79
87
  def initialize(time_unit = "us", time_zone = nil)
80
88
  @tu = time_unit || "us"
81
89
  @time_zone = time_zone
@@ -83,14 +91,14 @@ module Polars
83
91
  end
84
92
 
85
93
  # Time duration/delta type.
86
- class Duration < DataType
94
+ class Duration < TemporalType
87
95
  def initialize(time_unit = "us")
88
96
  @tu = time_unit
89
97
  end
90
98
  end
91
99
 
92
100
  # Time of day type.
93
- class Time < DataType
101
+ class Time < TemporalType
94
102
  end
95
103
 
96
104
  # Type for wrapping arbitrary Ruby objects.
@@ -102,15 +110,24 @@ module Polars
102
110
  end
103
111
 
104
112
  # Definition of a single field within a `Struct` DataType.
105
- class Field < DataType
113
+ class Field
114
+ attr_reader :name, :dtype
115
+
106
116
  def initialize(name, dtype)
107
117
  @name = name
108
118
  @dtype = Utils.rb_type_to_dtype(dtype)
109
119
  end
120
+
121
+ def inspect
122
+ class_name = self.class.name
123
+ "#{class_name}(#{@name}: #{@dtype})"
124
+ end
110
125
  end
111
126
 
112
127
  # Struct composite type.
113
- class Struct < DataType
128
+ class Struct < NestedType
129
+ attr_reader :fields
130
+
114
131
  def initialize(fields)
115
132
  if fields.is_a?(Hash)
116
133
  @fields = fields.map { |n, d| Field.new(n, d) }
@@ -118,5 +135,26 @@ module Polars
118
135
  @fields = fields
119
136
  end
120
137
  end
138
+
139
+ def inspect
140
+ class_name = self.class.name
141
+ "#{class_name}(#{@fields})"
142
+ end
143
+
144
+ def to_schema
145
+ @fields.to_h { |f| [f.name, f.dtype] }
146
+ end
147
+ end
148
+
149
+ # Binary type.
150
+ class Binary < DataType
151
+ end
152
+
153
+ # Type representing Null / None values.
154
+ class Null < DataType
155
+ end
156
+
157
+ # Type representing Datatype values that could not be determined statically.
158
+ class Unknown < DataType
121
159
  end
122
160
  end
data/lib/polars/series.rb CHANGED
@@ -3667,6 +3667,11 @@ module Polars
3667
3667
  rb_temporal_types << ::Time if defined?(::Time)
3668
3668
 
3669
3669
  value = _get_first_non_none(values)
3670
+ if !value.nil?
3671
+ if value.is_a?(Hash)
3672
+ return DataFrame.new(values).to_struct(name)._s
3673
+ end
3674
+ end
3670
3675
 
3671
3676
  if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3672
3677
  constructor = polars_type_to_constructor(dtype)
data/lib/polars/utils.rb CHANGED
@@ -160,11 +160,11 @@ module Polars
160
160
 
161
161
  def self.scale_bytes(sz, to:)
162
162
  scaling_factor = {
163
- "b" => 1,
164
- "k" => 1024,
165
- "m" => 1024 ** 2,
166
- "g" => 1024 ** 3,
167
- "t" => 1024 ** 4,
163
+ "b" => 1,
164
+ "k" => 1024,
165
+ "m" => 1024 ** 2,
166
+ "g" => 1024 ** 3,
167
+ "t" => 1024 ** 4
168
168
  }[to[0]]
169
169
  if scaling_factor > 1
170
170
  sz / scaling_factor.to_f
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.2"
3
+ VERSION = "0.2.4"
4
4
  end
data/lib/polars-df.rb CHANGED
@@ -1 +1 @@
1
- require "polars"
1
+ require_relative "polars"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.4
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-20 00:00:00.000000000 Z
11
+ date: 2023-01-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org