polars-df 0.2.2-x86_64-darwin → 0.2.3-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +1 -1
- data/README.md +4 -2
- data/lib/polars/3.0/polars.bundle +0 -0
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/data_frame.rb +57 -2
- data/lib/polars/data_types.rb +67 -29
- data/lib/polars/series.rb +5 -0
- data/lib/polars/utils.rb +5 -5
- data/lib/polars/version.rb +1 -1
- data/lib/polars-df.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a67aba6106276247072fe14bcd35b3381e890844ea50321c443cbcc815b84ca3
|
4
|
+
data.tar.gz: c5c3388ed010e8946903b7d8ed30fc24ddbf2a34b9eba23d1c178b476faebc7a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1343d81460555ed4baa13962ff7316814b64e3c7c6c363c6d3d7b01221bd83abd9ad62b9bd8dffb4bd7dbb30716bae5d5fc20d7830e157631767ac186638bd24
|
7
|
+
data.tar.gz: f02495aea654767675444e57ea764cb44e5e34123049cdf541c3b2b47df9d7ddc1dd9bb0499ddda6ee360e3983b0184f35bc6e5fcc28f93e49af3476960770eb
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
data/README.md
CHANGED
@@ -56,6 +56,8 @@ From Active Record
|
|
56
56
|
|
57
57
|
```ruby
|
58
58
|
Polars.read_sql(User.all)
|
59
|
+
# or
|
60
|
+
Polars.read_sql("SELECT * FROM users")
|
59
61
|
```
|
60
62
|
|
61
63
|
From a hash
|
@@ -287,13 +289,13 @@ CSV
|
|
287
289
|
```ruby
|
288
290
|
df.to_csv
|
289
291
|
# or
|
290
|
-
df.write_csv("
|
292
|
+
df.write_csv("file.csv")
|
291
293
|
```
|
292
294
|
|
293
295
|
Parquet
|
294
296
|
|
295
297
|
```ruby
|
296
|
-
df.write_parquet("
|
298
|
+
df.write_parquet("file.parquet")
|
297
299
|
```
|
298
300
|
|
299
301
|
## Types
|
Binary file
|
Binary file
|
Binary file
|
data/lib/polars/data_frame.rb
CHANGED
@@ -4746,7 +4746,14 @@ module Polars
|
|
4746
4746
|
end
|
4747
4747
|
|
4748
4748
|
# @private
|
4749
|
-
def self.
|
4749
|
+
def self.include_unknowns(schema, cols)
|
4750
|
+
cols.to_h { |col| [col, schema.fetch(col, Unknown)] }
|
4751
|
+
end
|
4752
|
+
|
4753
|
+
# @private
|
4754
|
+
def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
|
4755
|
+
raise Todo if schema_overrides
|
4756
|
+
|
4750
4757
|
if columns.is_a?(Hash)
|
4751
4758
|
columns = columns.to_a
|
4752
4759
|
end
|
@@ -4790,8 +4797,48 @@ module Polars
|
|
4790
4797
|
end
|
4791
4798
|
end
|
4792
4799
|
|
4800
|
+
def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
|
4801
|
+
rbdf_columns = rbdf.columns
|
4802
|
+
rbdf_dtypes = rbdf.dtypes
|
4803
|
+
columns, dtypes = _unpack_columns(
|
4804
|
+
(columns || rbdf_columns), schema_overrides: schema_overrides
|
4805
|
+
)
|
4806
|
+
column_subset = []
|
4807
|
+
if columns != rbdf_columns
|
4808
|
+
if columns.length < rbdf_columns.length && columns == rbdf_columns.first(columns.length)
|
4809
|
+
column_subset = columns
|
4810
|
+
else
|
4811
|
+
rbdf.set_column_names(columns)
|
4812
|
+
end
|
4813
|
+
end
|
4814
|
+
|
4815
|
+
column_casts = []
|
4816
|
+
columns.each do |col, i|
|
4817
|
+
if dtypes[col] == Categorical # != rbdf_dtypes[i]
|
4818
|
+
column_casts << Polars.col(col).cast(Categorical)._rbexpr
|
4819
|
+
elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
|
4820
|
+
column_casts << Polars.col(col).cast(structs[col])._rbexpr
|
4821
|
+
elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
|
4822
|
+
column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
|
4823
|
+
end
|
4824
|
+
end
|
4825
|
+
|
4826
|
+
if column_casts.any? || column_subset.any?
|
4827
|
+
rbdf = rbdf.lazy
|
4828
|
+
if column_casts.any?
|
4829
|
+
rbdf = rbdf.with_columns(column_casts)
|
4830
|
+
end
|
4831
|
+
if column_subset.any?
|
4832
|
+
rbdf = rbdf.select(column_subset.map { |col| Polars.col(col)._rbexpr })
|
4833
|
+
end
|
4834
|
+
rbdf = rbdf.collect
|
4835
|
+
end
|
4836
|
+
|
4837
|
+
rbdf
|
4838
|
+
end
|
4839
|
+
|
4793
4840
|
# @private
|
4794
|
-
def self.sequence_to_rbdf(data, columns: nil, orient: nil)
|
4841
|
+
def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
|
4795
4842
|
if data.length == 0
|
4796
4843
|
return hash_to_rbdf({}, columns: columns)
|
4797
4844
|
end
|
@@ -4803,6 +4850,14 @@ module Polars
|
|
4803
4850
|
data.each do |s|
|
4804
4851
|
data_series << s._s
|
4805
4852
|
end
|
4853
|
+
elsif data[0].is_a?(Hash)
|
4854
|
+
column_names, dtypes = _unpack_columns(columns)
|
4855
|
+
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
4856
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
|
4857
|
+
if column_names
|
4858
|
+
rbdf = _post_apply_columns(rbdf, column_names)
|
4859
|
+
end
|
4860
|
+
return rbdf
|
4806
4861
|
elsif data[0].is_a?(Array)
|
4807
4862
|
if orient.nil? && !columns.nil?
|
4808
4863
|
orient = columns.length == data.length ? "col" : "row"
|
data/lib/polars/data_types.rb
CHANGED
@@ -3,44 +3,64 @@ module Polars
|
|
3
3
|
class DataType
|
4
4
|
end
|
5
5
|
|
6
|
+
# Base class for numeric data types.
|
7
|
+
class NumericType < DataType
|
8
|
+
end
|
9
|
+
|
10
|
+
# Base class for integral data types.
|
11
|
+
class IntegralType < NumericType
|
12
|
+
end
|
13
|
+
|
14
|
+
# Base class for fractional data types.
|
15
|
+
class FractionalType < NumericType
|
16
|
+
end
|
17
|
+
|
18
|
+
# Base class for temporal data types.
|
19
|
+
class TemporalType < DataType
|
20
|
+
end
|
21
|
+
|
22
|
+
# Base class for nested data types.
|
23
|
+
class NestedType < DataType
|
24
|
+
end
|
25
|
+
|
6
26
|
# 8-bit signed integer type.
|
7
|
-
class Int8 <
|
27
|
+
class Int8 < IntegralType
|
8
28
|
end
|
9
29
|
|
10
30
|
# 16-bit signed integer type.
|
11
|
-
class Int16 <
|
31
|
+
class Int16 < IntegralType
|
12
32
|
end
|
13
33
|
|
14
34
|
# 32-bit signed integer type.
|
15
|
-
class Int32 <
|
35
|
+
class Int32 < IntegralType
|
16
36
|
end
|
17
37
|
|
18
38
|
# 64-bit signed integer type.
|
19
|
-
class Int64 <
|
39
|
+
class Int64 < IntegralType
|
20
40
|
end
|
21
41
|
|
22
42
|
# 8-bit unsigned integer type.
|
23
|
-
class UInt8 <
|
43
|
+
class UInt8 < IntegralType
|
24
44
|
end
|
25
45
|
|
26
46
|
# 16-bit unsigned integer type.
|
27
|
-
class UInt16 <
|
47
|
+
class UInt16 < IntegralType
|
28
48
|
end
|
29
49
|
|
30
50
|
# 32-bit unsigned integer type.
|
31
|
-
class UInt32 <
|
51
|
+
class UInt32 < IntegralType
|
32
52
|
end
|
33
53
|
|
34
54
|
# 64-bit unsigned integer type.
|
35
|
-
class UInt64 <
|
55
|
+
class UInt64 < IntegralType
|
36
56
|
end
|
37
57
|
|
38
58
|
# 32-bit floating point type.
|
39
|
-
class Float32 <
|
59
|
+
class Float32 < FractionalType
|
40
60
|
end
|
41
61
|
|
42
62
|
# 64-bit floating point type.
|
43
|
-
class Float64 <
|
63
|
+
class Float64 < FractionalType
|
44
64
|
end
|
45
65
|
|
46
66
|
# Boolean type.
|
@@ -51,31 +71,19 @@ module Polars
|
|
51
71
|
class Utf8 < DataType
|
52
72
|
end
|
53
73
|
|
54
|
-
# Binary type.
|
55
|
-
class Binary < DataType
|
56
|
-
end
|
57
|
-
|
58
|
-
# Type representing Null / None values.
|
59
|
-
class Null < DataType
|
60
|
-
end
|
61
|
-
|
62
|
-
# Type representing Datatype values that could not be determined statically.
|
63
|
-
class Unknown < DataType
|
64
|
-
end
|
65
|
-
|
66
74
|
# Nested list/array type.
|
67
|
-
class List <
|
75
|
+
class List < NestedType
|
68
76
|
def initialize(inner)
|
69
77
|
@inner = Utils.rb_type_to_dtype(inner)
|
70
78
|
end
|
71
79
|
end
|
72
80
|
|
73
81
|
# Calendar date type.
|
74
|
-
class Date <
|
82
|
+
class Date < TemporalType
|
75
83
|
end
|
76
84
|
|
77
85
|
# Calendar date and time type.
|
78
|
-
class Datetime <
|
86
|
+
class Datetime < TemporalType
|
79
87
|
def initialize(time_unit = "us", time_zone = nil)
|
80
88
|
@tu = time_unit || "us"
|
81
89
|
@time_zone = time_zone
|
@@ -83,14 +91,14 @@ module Polars
|
|
83
91
|
end
|
84
92
|
|
85
93
|
# Time duration/delta type.
|
86
|
-
class Duration <
|
94
|
+
class Duration < TemporalType
|
87
95
|
def initialize(time_unit = "us")
|
88
96
|
@tu = time_unit
|
89
97
|
end
|
90
98
|
end
|
91
99
|
|
92
100
|
# Time of day type.
|
93
|
-
class Time <
|
101
|
+
class Time < TemporalType
|
94
102
|
end
|
95
103
|
|
96
104
|
# Type for wrapping arbitrary Ruby objects.
|
@@ -102,15 +110,24 @@ module Polars
|
|
102
110
|
end
|
103
111
|
|
104
112
|
# Definition of a single field within a `Struct` DataType.
|
105
|
-
class Field
|
113
|
+
class Field
|
114
|
+
attr_reader :name, :dtype
|
115
|
+
|
106
116
|
def initialize(name, dtype)
|
107
117
|
@name = name
|
108
118
|
@dtype = Utils.rb_type_to_dtype(dtype)
|
109
119
|
end
|
120
|
+
|
121
|
+
def inspect
|
122
|
+
class_name = self.class.name
|
123
|
+
"#{class_name}(#{@name}: #{@dtype})"
|
124
|
+
end
|
110
125
|
end
|
111
126
|
|
112
127
|
# Struct composite type.
|
113
|
-
class Struct <
|
128
|
+
class Struct < NestedType
|
129
|
+
attr_reader :fields
|
130
|
+
|
114
131
|
def initialize(fields)
|
115
132
|
if fields.is_a?(Hash)
|
116
133
|
@fields = fields.map { |n, d| Field.new(n, d) }
|
@@ -118,5 +135,26 @@ module Polars
|
|
118
135
|
@fields = fields
|
119
136
|
end
|
120
137
|
end
|
138
|
+
|
139
|
+
def inspect
|
140
|
+
class_name = self.class.name
|
141
|
+
"#{class_name}(#{@fields})"
|
142
|
+
end
|
143
|
+
|
144
|
+
def to_schema
|
145
|
+
@fields.to_h { |f| [f.name, f.dtype] }
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Binary type.
|
150
|
+
class Binary < DataType
|
151
|
+
end
|
152
|
+
|
153
|
+
# Type representing Null / None values.
|
154
|
+
class Null < DataType
|
155
|
+
end
|
156
|
+
|
157
|
+
# Type representing Datatype values that could not be determined statically.
|
158
|
+
class Unknown < DataType
|
121
159
|
end
|
122
160
|
end
|
data/lib/polars/series.rb
CHANGED
@@ -3667,6 +3667,11 @@ module Polars
|
|
3667
3667
|
rb_temporal_types << ::Time if defined?(::Time)
|
3668
3668
|
|
3669
3669
|
value = _get_first_non_none(values)
|
3670
|
+
if !value.nil?
|
3671
|
+
if value.is_a?(Hash)
|
3672
|
+
return DataFrame.new(values).to_struct(name)._s
|
3673
|
+
end
|
3674
|
+
end
|
3670
3675
|
|
3671
3676
|
if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
|
3672
3677
|
constructor = polars_type_to_constructor(dtype)
|
data/lib/polars/utils.rb
CHANGED
@@ -160,11 +160,11 @@ module Polars
|
|
160
160
|
|
161
161
|
def self.scale_bytes(sz, to:)
|
162
162
|
scaling_factor = {
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
163
|
+
"b" => 1,
|
164
|
+
"k" => 1024,
|
165
|
+
"m" => 1024 ** 2,
|
166
|
+
"g" => 1024 ** 3,
|
167
|
+
"t" => 1024 ** 4
|
168
168
|
}[to[0]]
|
169
169
|
if scaling_factor > 1
|
170
170
|
sz / scaling_factor.to_f
|
data/lib/polars/version.rb
CHANGED
data/lib/polars-df.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
require_relative "polars"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|