polars-df 0.2.2-x86_64-linux → 0.2.3-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6cc0fce7d6a3a5a5e6aad0422b6d6fc5f74894ca27881de1ea363c8c5ac77290
4
- data.tar.gz: bf2921df3c70489b0be71e4e8043fa0cff4b9f366f1f4744c261bfc8dacc1c76
3
+ metadata.gz: c6b813d5f533d15a6cdef5ea4aadd85789b61caafc36f51f278dc5701aea3614
4
+ data.tar.gz: 446e2e6fdfa6b62278dabd7546eed235294b07aef56da5517a1b8679a2478534
5
5
  SHA512:
6
- metadata.gz: 9670316d6dc4483de1c16e934a507399b332d48ef779ccb11b19ae35761f56bcb34bba87b2a77855bad788cddbb5f76f0afb1eb8ec593bef5eff99d69e769437
7
- data.tar.gz: 231985664f2e28e965751aa6a6b0c518d206caf084c2ce464dfbf4230420df07beb5465a49e0538f5004d805f9a269883980cf8a999858488be3dce11f24bee2
6
+ metadata.gz: 546c36032d2ffd3519a17850d420097f471e92e434640cf0a7a28d9323b7ea6fb02a86d9539a5651529879b8abbc642b6f0be1a5aa89686fbdf5ffaf8b5df191
7
+ data.tar.gz: d63bf89dfd1d76354582a220ede4ceb478ab596c340573e5a4088fd56f866317b96c986a8bc293e574ec2df6ece32e9d018611e091e2677a96562e65b058541e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.3 (2023-01-22)
2
+
3
+ - Fixed error with precompiled gem on Mac ARM
4
+ - Fixed issue with structs
5
+
1
6
  ## 0.2.2 (2023-01-20)
2
7
 
3
8
  - Added support for strings to `read_sql` method
data/Cargo.lock CHANGED
@@ -1367,7 +1367,7 @@ dependencies = [
1367
1367
 
1368
1368
  [[package]]
1369
1369
  name = "polars"
1370
- version = "0.2.2"
1370
+ version = "0.2.3"
1371
1371
  dependencies = [
1372
1372
  "ahash",
1373
1373
  "jemallocator",
data/README.md CHANGED
@@ -56,6 +56,8 @@ From Active Record
56
56
 
57
57
  ```ruby
58
58
  Polars.read_sql(User.all)
59
+ # or
60
+ Polars.read_sql("SELECT * FROM users")
59
61
  ```
60
62
 
61
63
  From a hash
@@ -287,13 +289,13 @@ CSV
287
289
  ```ruby
288
290
  df.to_csv
289
291
  # or
290
- df.write_csv("data.csv")
292
+ df.write_csv("file.csv")
291
293
  ```
292
294
 
293
295
  Parquet
294
296
 
295
297
  ```ruby
296
- df.write_parquet("data.parquet")
298
+ df.write_parquet("file.parquet")
297
299
  ```
298
300
 
299
301
  ## Types
Binary file
Binary file
Binary file
@@ -4746,7 +4746,14 @@ module Polars
4746
4746
  end
4747
4747
 
4748
4748
  # @private
4749
- def self._unpack_columns(columns, lookup_names: nil, n_expected: nil)
4749
+ def self.include_unknowns(schema, cols)
4750
+ cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
4751
+ end
4752
+
4753
+ # @private
4754
+ def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
4755
+ raise Todo if schema_overrides
4756
+
4750
4757
  if columns.is_a?(Hash)
4751
4758
  columns = columns.to_a
4752
4759
  end
@@ -4790,8 +4797,48 @@ module Polars
4790
4797
  end
4791
4798
  end
4792
4799
 
4800
+ def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
4801
+ rbdf_columns = rbdf.columns
4802
+ rbdf_dtypes = rbdf.dtypes
4803
+ columns, dtypes = _unpack_columns(
4804
+ (columns || rbdf_columns), schema_overrides: schema_overrides
4805
+ )
4806
+ column_subset = []
4807
+ if columns != rbdf_columns
4808
+ if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
4809
+ column_subset = columns
4810
+ else
4811
+ rbdf.set_column_names(columns)
4812
+ end
4813
+ end
4814
+
4815
+ column_casts = []
4816
+ columns.each do |col, i|
4817
+ if dtypes[col] == Categorical # != rbdf_dtypes[i]
4818
+ column_casts << Polars.col(col).cast(Categorical)._rbexpr
4819
+ elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
4820
+ column_casts << Polars.col(col).cast(structs[col])._rbexpr
4821
+ elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
4822
+ column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
4823
+ end
4824
+ end
4825
+
4826
+ if column_casts.any? || column_subset.any?
4827
+ rbdf = rbdf.lazy
4828
+ if column_casts.any?
4829
+ rbdf = rbdf.with_columns(column_casts)
4830
+ end
4831
+ if column_subset.any?
4832
+ rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
4833
+ end
4834
+ rbdf = rbdf.collect
4835
+ end
4836
+
4837
+ rbdf
4838
+ end
4839
+
4793
4840
  # @private
4794
- def self.sequence_to_rbdf(data, columns: nil, orient: nil)
4841
+ def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
4795
4842
  if data.length == 0
4796
4843
  return hash_to_rbdf({}, columns: columns)
4797
4844
  end
@@ -4803,6 +4850,14 @@ module Polars
4803
4850
  data.each do |s|
4804
4851
  data_series << s._s
4805
4852
  end
4853
+ elsif data[0].is_a?(Hash)
4854
+ column_names, dtypes = _unpack_columns(columns)
4855
+ schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
4856
+ rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
4857
+ if column_names
4858
+ rbdf = _post_apply_columns(rbdf, column_names)
4859
+ end
4860
+ return rbdf
4806
4861
  elsif data[0].is_a?(Array)
4807
4862
  if orient.nil? && !columns.nil?
4808
4863
  orient = columns.length == data.length ? "col" : "row"
@@ -3,44 +3,64 @@ module Polars
3
3
  class DataType
4
4
  end
5
5
 
6
+ # Base class for numeric data types.
7
+ class NumericType < DataType
8
+ end
9
+
10
+ # Base class for integral data types.
11
+ class IntegralType < NumericType
12
+ end
13
+
14
+ # Base class for fractional data types.
15
+ class FractionalType < NumericType
16
+ end
17
+
18
+ # Base class for temporal data types.
19
+ class TemporalType < DataType
20
+ end
21
+
22
+ # Base class for nested data types.
23
+ class NestedType < DataType
24
+ end
25
+
6
26
  # 8-bit signed integer type.
7
- class Int8 < DataType
27
+ class Int8 < IntegralType
8
28
  end
9
29
 
10
30
  # 16-bit signed integer type.
11
- class Int16 < DataType
31
+ class Int16 < IntegralType
12
32
  end
13
33
 
14
34
  # 32-bit signed integer type.
15
- class Int32 < DataType
35
+ class Int32 < IntegralType
16
36
  end
17
37
 
18
38
  # 64-bit signed integer type.
19
- class Int64 < DataType
39
+ class Int64 < IntegralType
20
40
  end
21
41
 
22
42
  # 8-bit unsigned integer type.
23
- class UInt8 < DataType
43
+ class UInt8 < IntegralType
24
44
  end
25
45
 
26
46
  # 16-bit unsigned integer type.
27
- class UInt16 < DataType
47
+ class UInt16 < IntegralType
28
48
  end
29
49
 
30
50
  # 32-bit unsigned integer type.
31
- class UInt32 < DataType
51
+ class UInt32 < IntegralType
32
52
  end
33
53
 
34
54
  # 64-bit unsigned integer type.
35
- class UInt64 < DataType
55
+ class UInt64 < IntegralType
36
56
  end
37
57
 
38
58
  # 32-bit floating point type.
39
- class Float32 < DataType
59
+ class Float32 < FractionalType
40
60
  end
41
61
 
42
62
  # 64-bit floating point type.
43
- class Float64 < DataType
63
+ class Float64 < FractionalType
44
64
  end
45
65
 
46
66
  # Boolean type.
@@ -51,31 +71,19 @@ module Polars
51
71
  class Utf8 < DataType
52
72
  end
53
73
 
54
- # Binary type.
55
- class Binary < DataType
56
- end
57
-
58
- # Type representing Null / None values.
59
- class Null < DataType
60
- end
61
-
62
- # Type representing Datatype values that could not be determined statically.
63
- class Unknown < DataType
64
- end
65
-
66
74
  # Nested list/array type.
67
- class List < DataType
75
+ class List < NestedType
68
76
  def initialize(inner)
69
77
  @inner = Utils.rb_type_to_dtype(inner)
70
78
  end
71
79
  end
72
80
 
73
81
  # Calendar date type.
74
- class Date < DataType
82
+ class Date < TemporalType
75
83
  end
76
84
 
77
85
  # Calendar date and time type.
78
- class Datetime < DataType
86
+ class Datetime < TemporalType
79
87
  def initialize(time_unit = "us", time_zone = nil)
80
88
  @tu = time_unit || "us"
81
89
  @time_zone = time_zone
@@ -83,14 +91,14 @@ module Polars
83
91
  end
84
92
 
85
93
  # Time duration/delta type.
86
- class Duration < DataType
94
+ class Duration < TemporalType
87
95
  def initialize(time_unit = "us")
88
96
  @tu = time_unit
89
97
  end
90
98
  end
91
99
 
92
100
  # Time of day type.
93
- class Time < DataType
101
+ class Time < TemporalType
94
102
  end
95
103
 
96
104
  # Type for wrapping arbitrary Ruby objects.
@@ -102,15 +110,24 @@ module Polars
102
110
  end
103
111
 
104
112
  # Definition of a single field within a `Struct` DataType.
105
- class Field < DataType
113
+ class Field
114
+ attr_reader :name, :dtype
115
+
106
116
  def initialize(name, dtype)
107
117
  @name = name
108
118
  @dtype = Utils.rb_type_to_dtype(dtype)
109
119
  end
120
+
121
+ def inspect
122
+ class_name = self.class.name
123
+ "#{class_name}(#{@name}: #{@dtype})"
124
+ end
110
125
  end
111
126
 
112
127
  # Struct composite type.
113
- class Struct < DataType
128
+ class Struct < NestedType
129
+ attr_reader :fields
130
+
114
131
  def initialize(fields)
115
132
  if fields.is_a?(Hash)
116
133
  @fields = fields.map { |n, d| Field.new(n, d) }
@@ -118,5 +135,26 @@ module Polars
118
135
  @fields = fields
119
136
  end
120
137
  end
138
+
139
+ def inspect
140
+ class_name = self.class.name
141
+ "#{class_name}(#{@fields})"
142
+ end
143
+
144
+ def to_schema
145
+ @fields.to_h { |f| [f.name, f.dtype] }
146
+ end
147
+ end
148
+
149
+ # Binary type.
150
+ class Binary < DataType
151
+ end
152
+
153
+ # Type representing Null / None values.
154
+ class Null < DataType
155
+ end
156
+
157
+ # Type representing Datatype values that could not be determined statically.
158
+ class Unknown < DataType
121
159
  end
122
160
  end
data/lib/polars/series.rb CHANGED
@@ -3667,6 +3667,11 @@ module Polars
3667
3667
  rb_temporal_types << ::Time if defined?(::Time)
3668
3668
 
3669
3669
  value = _get_first_non_none(values)
3670
+ if !value.nil?
3671
+ if value.is_a?(Hash)
3672
+ return DataFrame.new(values).to_struct(name)._s
3673
+ end
3674
+ end
3670
3675
 
3671
3676
  if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3672
3677
  constructor = polars_type_to_constructor(dtype)
data/lib/polars/utils.rb CHANGED
@@ -160,11 +160,11 @@ module Polars
160
160
 
161
161
  def self.scale_bytes(sz, to:)
162
162
  scaling_factor = {
163
- "b" => 1,
164
- "k" => 1024,
165
- "m" => 1024 ** 2,
166
- "g" => 1024 ** 3,
167
- "t" => 1024 ** 4,
163
+ "b" => 1,
164
+ "k" => 1024,
165
+ "m" => 1024 ** 2,
166
+ "g" => 1024 ** 3,
167
+ "t" => 1024 ** 4
168
168
  }[to[0]]
169
169
  if scaling_factor > 1
170
170
  sz / scaling_factor.to_f
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.2"
3
+ VERSION = "0.2.3"
4
4
  end
data/lib/polars-df.rb CHANGED
@@ -1 +1 @@
1
- require "polars"
1
+ require_relative "polars"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-20 00:00:00.000000000 Z
11
+ date: 2023-01-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org