polars-df 0.10.0-x86_64-linux → 0.11.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/LICENSE-THIRD-PARTY.txt +152 -79
- data/README.md +6 -6
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +12 -4
data/lib/polars/data_frame.rb
CHANGED
@@ -46,271 +46,6 @@ module Polars
|
|
46
46
|
df
|
47
47
|
end
|
48
48
|
|
49
|
-
# @private
|
50
|
-
def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
|
51
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
52
|
-
_from_rbdf(rbdf)
|
53
|
-
end
|
54
|
-
|
55
|
-
# @private
|
56
|
-
def self._from_hash(data, schema: nil, schema_overrides: nil)
|
57
|
-
_from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
|
58
|
-
end
|
59
|
-
|
60
|
-
# def self._from_records
|
61
|
-
# end
|
62
|
-
|
63
|
-
# def self._from_numo
|
64
|
-
# end
|
65
|
-
|
66
|
-
# no self._from_arrow
|
67
|
-
|
68
|
-
# no self._from_pandas
|
69
|
-
|
70
|
-
# @private
|
71
|
-
def self._read_csv(
|
72
|
-
file,
|
73
|
-
has_header: true,
|
74
|
-
columns: nil,
|
75
|
-
sep: str = ",",
|
76
|
-
comment_char: nil,
|
77
|
-
quote_char: '"',
|
78
|
-
skip_rows: 0,
|
79
|
-
dtypes: nil,
|
80
|
-
null_values: nil,
|
81
|
-
ignore_errors: false,
|
82
|
-
parse_dates: false,
|
83
|
-
n_threads: nil,
|
84
|
-
infer_schema_length: 100,
|
85
|
-
batch_size: 8192,
|
86
|
-
n_rows: nil,
|
87
|
-
encoding: "utf8",
|
88
|
-
low_memory: false,
|
89
|
-
rechunk: true,
|
90
|
-
skip_rows_after_header: 0,
|
91
|
-
row_count_name: nil,
|
92
|
-
row_count_offset: 0,
|
93
|
-
sample_size: 1024,
|
94
|
-
eol_char: "\n",
|
95
|
-
truncate_ragged_lines: false
|
96
|
-
)
|
97
|
-
if Utils.pathlike?(file)
|
98
|
-
path = Utils.normalise_filepath(file)
|
99
|
-
else
|
100
|
-
path = nil
|
101
|
-
# if defined?(StringIO) && file.is_a?(StringIO)
|
102
|
-
# file = file.string
|
103
|
-
# end
|
104
|
-
end
|
105
|
-
|
106
|
-
dtype_list = nil
|
107
|
-
dtype_slice = nil
|
108
|
-
if !dtypes.nil?
|
109
|
-
if dtypes.is_a?(Hash)
|
110
|
-
dtype_list = []
|
111
|
-
dtypes.each do|k, v|
|
112
|
-
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
113
|
-
end
|
114
|
-
elsif dtypes.is_a?(::Array)
|
115
|
-
dtype_slice = dtypes
|
116
|
-
else
|
117
|
-
raise ArgumentError, "dtype arg should be list or dict"
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
processed_null_values = Utils._process_null_values(null_values)
|
122
|
-
|
123
|
-
if columns.is_a?(::String)
|
124
|
-
columns = [columns]
|
125
|
-
end
|
126
|
-
if file.is_a?(::String) && file.include?("*")
|
127
|
-
dtypes_dict = nil
|
128
|
-
if !dtype_list.nil?
|
129
|
-
dtypes_dict = dtype_list.to_h
|
130
|
-
end
|
131
|
-
if !dtype_slice.nil?
|
132
|
-
raise ArgumentError, "cannot use glob patterns and unnamed dtypes as `dtypes` argument; Use dtypes: Mapping[str, Type[DataType]"
|
133
|
-
end
|
134
|
-
scan = Polars.scan_csv(
|
135
|
-
file,
|
136
|
-
has_header: has_header,
|
137
|
-
sep: sep,
|
138
|
-
comment_char: comment_char,
|
139
|
-
quote_char: quote_char,
|
140
|
-
skip_rows: skip_rows,
|
141
|
-
dtypes: dtypes_dict,
|
142
|
-
null_values: null_values,
|
143
|
-
ignore_errors: ignore_errors,
|
144
|
-
infer_schema_length: infer_schema_length,
|
145
|
-
n_rows: n_rows,
|
146
|
-
low_memory: low_memory,
|
147
|
-
rechunk: rechunk,
|
148
|
-
skip_rows_after_header: skip_rows_after_header,
|
149
|
-
row_count_name: row_count_name,
|
150
|
-
row_count_offset: row_count_offset,
|
151
|
-
eol_char: eol_char,
|
152
|
-
truncate_ragged_lines: truncate_ragged_lines
|
153
|
-
)
|
154
|
-
if columns.nil?
|
155
|
-
return _from_rbdf(scan.collect._df)
|
156
|
-
elsif is_str_sequence(columns, allow_str: false)
|
157
|
-
return _from_rbdf(scan.select(columns).collect._df)
|
158
|
-
else
|
159
|
-
raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: List[str]"
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
projection, columns = Utils.handle_projection_columns(columns)
|
164
|
-
|
165
|
-
_from_rbdf(
|
166
|
-
RbDataFrame.read_csv(
|
167
|
-
file,
|
168
|
-
infer_schema_length,
|
169
|
-
batch_size,
|
170
|
-
has_header,
|
171
|
-
ignore_errors,
|
172
|
-
n_rows,
|
173
|
-
skip_rows,
|
174
|
-
projection,
|
175
|
-
sep,
|
176
|
-
rechunk,
|
177
|
-
columns,
|
178
|
-
encoding,
|
179
|
-
n_threads,
|
180
|
-
path,
|
181
|
-
dtype_list,
|
182
|
-
dtype_slice,
|
183
|
-
low_memory,
|
184
|
-
comment_char,
|
185
|
-
quote_char,
|
186
|
-
processed_null_values,
|
187
|
-
parse_dates,
|
188
|
-
skip_rows_after_header,
|
189
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
190
|
-
sample_size,
|
191
|
-
eol_char,
|
192
|
-
truncate_ragged_lines
|
193
|
-
)
|
194
|
-
)
|
195
|
-
end
|
196
|
-
|
197
|
-
# @private
|
198
|
-
def self._read_parquet(
|
199
|
-
source,
|
200
|
-
columns: nil,
|
201
|
-
n_rows: nil,
|
202
|
-
parallel: "auto",
|
203
|
-
row_count_name: nil,
|
204
|
-
row_count_offset: 0,
|
205
|
-
low_memory: false,
|
206
|
-
use_statistics: true,
|
207
|
-
rechunk: true
|
208
|
-
)
|
209
|
-
if Utils.pathlike?(source)
|
210
|
-
source = Utils.normalise_filepath(source)
|
211
|
-
end
|
212
|
-
if columns.is_a?(::String)
|
213
|
-
columns = [columns]
|
214
|
-
end
|
215
|
-
|
216
|
-
if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
|
217
|
-
scan =
|
218
|
-
Polars.scan_parquet(
|
219
|
-
source,
|
220
|
-
n_rows: n_rows,
|
221
|
-
rechunk: true,
|
222
|
-
parallel: parallel,
|
223
|
-
row_count_name: row_count_name,
|
224
|
-
row_count_offset: row_count_offset,
|
225
|
-
low_memory: low_memory
|
226
|
-
)
|
227
|
-
|
228
|
-
if columns.nil?
|
229
|
-
return self._from_rbdf(scan.collect._df)
|
230
|
-
elsif Utils.is_str_sequence(columns, allow_str: false)
|
231
|
-
return self._from_rbdf(scan.select(columns).collect._df)
|
232
|
-
else
|
233
|
-
raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: Array[String]"
|
234
|
-
end
|
235
|
-
end
|
236
|
-
|
237
|
-
projection, columns = Utils.handle_projection_columns(columns)
|
238
|
-
_from_rbdf(
|
239
|
-
RbDataFrame.read_parquet(
|
240
|
-
source,
|
241
|
-
columns,
|
242
|
-
projection,
|
243
|
-
n_rows,
|
244
|
-
parallel,
|
245
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
246
|
-
low_memory,
|
247
|
-
use_statistics,
|
248
|
-
rechunk
|
249
|
-
)
|
250
|
-
)
|
251
|
-
end
|
252
|
-
|
253
|
-
# @private
|
254
|
-
def self._read_avro(file, columns: nil, n_rows: nil)
|
255
|
-
if Utils.pathlike?(file)
|
256
|
-
file = Utils.normalise_filepath(file)
|
257
|
-
end
|
258
|
-
projection, columns = Utils.handle_projection_columns(columns)
|
259
|
-
_from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
|
260
|
-
end
|
261
|
-
|
262
|
-
# @private
|
263
|
-
def self._read_ipc(
|
264
|
-
file,
|
265
|
-
columns: nil,
|
266
|
-
n_rows: nil,
|
267
|
-
row_count_name: nil,
|
268
|
-
row_count_offset: 0,
|
269
|
-
rechunk: true,
|
270
|
-
memory_map: true
|
271
|
-
)
|
272
|
-
if Utils.pathlike?(file)
|
273
|
-
file = Utils.normalise_filepath(file)
|
274
|
-
end
|
275
|
-
if columns.is_a?(::String)
|
276
|
-
columns = [columns]
|
277
|
-
end
|
278
|
-
|
279
|
-
if file.is_a?(::String) && file.include?("*")
|
280
|
-
raise Todo
|
281
|
-
end
|
282
|
-
|
283
|
-
projection, columns = Utils.handle_projection_columns(columns)
|
284
|
-
_from_rbdf(
|
285
|
-
RbDataFrame.read_ipc(
|
286
|
-
file,
|
287
|
-
columns,
|
288
|
-
projection,
|
289
|
-
n_rows,
|
290
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
291
|
-
memory_map
|
292
|
-
)
|
293
|
-
)
|
294
|
-
end
|
295
|
-
|
296
|
-
# @private
|
297
|
-
def self._read_json(file)
|
298
|
-
if Utils.pathlike?(file)
|
299
|
-
file = Utils.normalise_filepath(file)
|
300
|
-
end
|
301
|
-
|
302
|
-
_from_rbdf(RbDataFrame.read_json(file))
|
303
|
-
end
|
304
|
-
|
305
|
-
# @private
|
306
|
-
def self._read_ndjson(file)
|
307
|
-
if Utils.pathlike?(file)
|
308
|
-
file = Utils.normalise_filepath(file)
|
309
|
-
end
|
310
|
-
|
311
|
-
_from_rbdf(RbDataFrame.read_ndjson(file))
|
312
|
-
end
|
313
|
-
|
314
49
|
# Get the shape of the DataFrame.
|
315
50
|
#
|
316
51
|
# @return [Array]
|
@@ -419,6 +154,13 @@ module Polars
|
|
419
154
|
_df.dtypes
|
420
155
|
end
|
421
156
|
|
157
|
+
# Get flags that are set on the columns of this DataFrame.
|
158
|
+
#
|
159
|
+
# @return [Hash]
|
160
|
+
def flags
|
161
|
+
columns.to_h { |name| [name, self[name].flags] }
|
162
|
+
end
|
163
|
+
|
422
164
|
# Get the schema.
|
423
165
|
#
|
424
166
|
# @return [Hash]
|
@@ -845,7 +587,7 @@ module Polars
|
|
845
587
|
row_oriented: false
|
846
588
|
)
|
847
589
|
if Utils.pathlike?(file)
|
848
|
-
file = Utils.
|
590
|
+
file = Utils.normalize_filepath(file)
|
849
591
|
end
|
850
592
|
to_string_io = !file.nil? && file.is_a?(StringIO)
|
851
593
|
if file.nil? || to_string_io
|
@@ -884,7 +626,7 @@ module Polars
|
|
884
626
|
# # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
|
885
627
|
def write_ndjson(file = nil)
|
886
628
|
if Utils.pathlike?(file)
|
887
|
-
file = Utils.
|
629
|
+
file = Utils.normalize_filepath(file)
|
888
630
|
end
|
889
631
|
to_string_io = !file.nil? && file.is_a?(StringIO)
|
890
632
|
if file.nil? || to_string_io
|
@@ -991,7 +733,7 @@ module Polars
|
|
991
733
|
end
|
992
734
|
|
993
735
|
if Utils.pathlike?(file)
|
994
|
-
file = Utils.
|
736
|
+
file = Utils.normalize_filepath(file)
|
995
737
|
end
|
996
738
|
|
997
739
|
_df.write_csv(
|
@@ -1029,7 +771,7 @@ module Polars
|
|
1029
771
|
compression = "uncompressed"
|
1030
772
|
end
|
1031
773
|
if Utils.pathlike?(file)
|
1032
|
-
file = Utils.
|
774
|
+
file = Utils.normalize_filepath(file)
|
1033
775
|
end
|
1034
776
|
|
1035
777
|
_df.write_avro(file, compression)
|
@@ -1050,7 +792,7 @@ module Polars
|
|
1050
792
|
file.set_encoding(Encoding::BINARY)
|
1051
793
|
end
|
1052
794
|
if Utils.pathlike?(file)
|
1053
|
-
file = Utils.
|
795
|
+
file = Utils.normalize_filepath(file)
|
1054
796
|
end
|
1055
797
|
|
1056
798
|
if compression.nil?
|
@@ -1061,6 +803,47 @@ module Polars
|
|
1061
803
|
return_bytes ? file.string : nil
|
1062
804
|
end
|
1063
805
|
|
806
|
+
# Write to Arrow IPC record batch stream.
|
807
|
+
#
|
808
|
+
# See "Streaming format" in https://arrow.apache.org/docs/python/ipc.html.
|
809
|
+
#
|
810
|
+
# @param file [Object]
|
811
|
+
# Path or writable file-like object to which the IPC record batch data will
|
812
|
+
# be written. If set to `None`, the output is returned as a BytesIO object.
|
813
|
+
# @param compression ['uncompressed', 'lz4', 'zstd']
|
814
|
+
# Compression method. Defaults to "uncompressed".
|
815
|
+
#
|
816
|
+
# @return [Object]
|
817
|
+
#
|
818
|
+
# @example
|
819
|
+
# df = Polars::DataFrame.new(
|
820
|
+
# {
|
821
|
+
# "foo" => [1, 2, 3, 4, 5],
|
822
|
+
# "bar" => [6, 7, 8, 9, 10],
|
823
|
+
# "ham" => ["a", "b", "c", "d", "e"]
|
824
|
+
# }
|
825
|
+
# )
|
826
|
+
# df.write_ipc_stream("new_file.arrow")
|
827
|
+
def write_ipc_stream(
|
828
|
+
file,
|
829
|
+
compression: "uncompressed"
|
830
|
+
)
|
831
|
+
return_bytes = file.nil?
|
832
|
+
if return_bytes
|
833
|
+
file = StringIO.new
|
834
|
+
file.set_encoding(Encoding::BINARY)
|
835
|
+
elsif Utils.pathlike?(file)
|
836
|
+
file = Utils.normalize_filepath(file)
|
837
|
+
end
|
838
|
+
|
839
|
+
if compression.nil?
|
840
|
+
compression = "uncompressed"
|
841
|
+
end
|
842
|
+
|
843
|
+
_df.write_ipc_stream(file, compression)
|
844
|
+
return_bytes ? file.string : nil
|
845
|
+
end
|
846
|
+
|
1064
847
|
# Write to Apache Parquet file.
|
1065
848
|
#
|
1066
849
|
# @param file [String, Pathname, StringIO]
|
@@ -1097,7 +880,7 @@ module Polars
|
|
1097
880
|
compression = "uncompressed"
|
1098
881
|
end
|
1099
882
|
if Utils.pathlike?(file)
|
1100
|
-
file = Utils.
|
883
|
+
file = Utils.normalize_filepath(file)
|
1101
884
|
end
|
1102
885
|
|
1103
886
|
_df.write_parquet(
|
@@ -1773,10 +1556,7 @@ module Polars
|
|
1773
1556
|
# # │ 3 ┆ 8 ┆ c │
|
1774
1557
|
# # └─────┴─────┴─────┘
|
1775
1558
|
def drop_nulls(subset: nil)
|
1776
|
-
|
1777
|
-
subset = [subset]
|
1778
|
-
end
|
1779
|
-
_from_rbdf(_df.drop_nulls(subset))
|
1559
|
+
lazy.drop_nulls(subset: subset).collect(_eager: true)
|
1780
1560
|
end
|
1781
1561
|
|
1782
1562
|
# Offers a structured way to apply a sequence of user-defined functions (UDFs).
|
@@ -1838,16 +1618,16 @@ module Polars
|
|
1838
1618
|
# df.with_row_index
|
1839
1619
|
# # =>
|
1840
1620
|
# # shape: (3, 3)
|
1841
|
-
# #
|
1842
|
-
# # │
|
1843
|
-
# # │ ---
|
1844
|
-
# # │ u32
|
1845
|
-
# #
|
1846
|
-
# # │ 0
|
1847
|
-
# # │ 1
|
1848
|
-
# # │ 2
|
1849
|
-
# #
|
1850
|
-
def with_row_index(name: "
|
1621
|
+
# # ┌───────┬─────┬─────┐
|
1622
|
+
# # │ index ┆ a ┆ b │
|
1623
|
+
# # │ --- ┆ --- ┆ --- │
|
1624
|
+
# # │ u32 ┆ i64 ┆ i64 │
|
1625
|
+
# # ╞═══════╪═════╪═════╡
|
1626
|
+
# # │ 0 ┆ 1 ┆ 2 │
|
1627
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
1628
|
+
# # │ 2 ┆ 5 ┆ 6 │
|
1629
|
+
# # └───────┴─────┴─────┘
|
1630
|
+
def with_row_index(name: "index", offset: 0)
|
1851
1631
|
_from_rbdf(_df.with_row_index(name, offset))
|
1852
1632
|
end
|
1853
1633
|
alias_method :with_row_count, :with_row_index
|
@@ -2136,16 +1916,16 @@ module Polars
|
|
2136
1916
|
# )
|
2137
1917
|
# # =>
|
2138
1918
|
# # shape: (4, 3)
|
2139
|
-
# #
|
2140
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
2141
|
-
# # │ --- ┆ --- ┆ ---
|
2142
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
2143
|
-
# #
|
2144
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12
|
2145
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12
|
2146
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12
|
2147
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
2148
|
-
# #
|
1919
|
+
# # ┌─────────────────────┬────────────┬─────────────────────────────────┐
|
1920
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
1921
|
+
# # │ --- ┆ --- ┆ --- │
|
1922
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
1923
|
+
# # ╞═════════════════════╪════════════╪═════════════════════════════════╡
|
1924
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-… │
|
1925
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-… │
|
1926
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-… │
|
1927
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
1928
|
+
# # └─────────────────────┴────────────┴─────────────────────────────────┘
|
2149
1929
|
#
|
2150
1930
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
2151
1931
|
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -2620,7 +2400,7 @@ module Polars
|
|
2620
2400
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [-1, 5, 8]})
|
2621
2401
|
#
|
2622
2402
|
# @example Return a DataFrame by mapping each row to a tuple:
|
2623
|
-
# df.
|
2403
|
+
# df.map_rows { |t| [t[0] * 2, t[1] * 3] }
|
2624
2404
|
# # =>
|
2625
2405
|
# # shape: (3, 2)
|
2626
2406
|
# # ┌──────────┬──────────┐
|
@@ -2634,7 +2414,7 @@ module Polars
|
|
2634
2414
|
# # └──────────┴──────────┘
|
2635
2415
|
#
|
2636
2416
|
# @example Return a Series by mapping each row to a scalar:
|
2637
|
-
# df.
|
2417
|
+
# df.map_rows { |t| t[0] * 2 + t[1] }
|
2638
2418
|
# # =>
|
2639
2419
|
# # shape: (3, 1)
|
2640
2420
|
# # ┌───────┐
|
@@ -2646,14 +2426,15 @@ module Polars
|
|
2646
2426
|
# # │ 9 │
|
2647
2427
|
# # │ 14 │
|
2648
2428
|
# # └───────┘
|
2649
|
-
def
|
2650
|
-
out, is_df = _df.
|
2429
|
+
def map_rows(return_dtype: nil, inference_size: 256, &f)
|
2430
|
+
out, is_df = _df.map_rows(f, return_dtype, inference_size)
|
2651
2431
|
if is_df
|
2652
2432
|
_from_rbdf(out)
|
2653
2433
|
else
|
2654
2434
|
_from_rbdf(Utils.wrap_s(out).to_frame._df)
|
2655
2435
|
end
|
2656
2436
|
end
|
2437
|
+
alias_method :apply, :map_rows
|
2657
2438
|
|
2658
2439
|
# Return a new DataFrame with the column added or replaced.
|
2659
2440
|
#
|
@@ -3774,7 +3555,7 @@ module Polars
|
|
3774
3555
|
# # ┌─────────┐
|
3775
3556
|
# # │ literal │
|
3776
3557
|
# # │ --- │
|
3777
|
-
# # │
|
3558
|
+
# # │ i32 │
|
3778
3559
|
# # ╞═════════╡
|
3779
3560
|
# # │ 0 │
|
3780
3561
|
# # │ 0 │
|
@@ -5255,7 +5036,7 @@ module Polars
|
|
5255
5036
|
elsif data[0].is_a?(Hash)
|
5256
5037
|
column_names, dtypes = _unpack_schema(columns)
|
5257
5038
|
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
5258
|
-
rbdf = RbDataFrame.
|
5039
|
+
rbdf = RbDataFrame.from_hashes(data, schema, schema_overrides, false, infer_schema_length)
|
5259
5040
|
if column_names
|
5260
5041
|
rbdf = _post_apply_columns(rbdf, column_names)
|
5261
5042
|
end
|
@@ -5289,7 +5070,7 @@ module Polars
|
|
5289
5070
|
if unpack_nested
|
5290
5071
|
raise Todo
|
5291
5072
|
else
|
5292
|
-
rbdf = RbDataFrame.
|
5073
|
+
rbdf = RbDataFrame.from_rows(
|
5293
5074
|
data,
|
5294
5075
|
infer_schema_length,
|
5295
5076
|
local_schema_override.any? ? local_schema_override : nil
|
@@ -66,6 +66,8 @@ module Polars
|
|
66
66
|
if !out.nil?
|
67
67
|
if s.dtype == Date
|
68
68
|
return Utils._to_ruby_date(out.to_i)
|
69
|
+
elsif [Datetime, Duration, Time].include?(s.dtype)
|
70
|
+
return out
|
69
71
|
else
|
70
72
|
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
71
73
|
end
|
@@ -93,10 +95,12 @@ module Polars
|
|
93
95
|
# # => 2001-01-02 00:00:00 UTC
|
94
96
|
def mean
|
95
97
|
s = Utils.wrap_s(_s)
|
96
|
-
out = s.mean
|
98
|
+
out = s.mean
|
97
99
|
if !out.nil?
|
98
100
|
if s.dtype == Date
|
99
101
|
return Utils._to_ruby_date(out.to_i)
|
102
|
+
elsif [Datetime, Duration, Time].include?(s.dtype)
|
103
|
+
return out
|
100
104
|
else
|
101
105
|
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
102
106
|
end
|
@@ -32,7 +32,7 @@ module Polars
|
|
32
32
|
@start_by = start_by
|
33
33
|
end
|
34
34
|
|
35
|
-
def agg(aggs)
|
35
|
+
def agg(*aggs, **named_aggs)
|
36
36
|
@df.lazy
|
37
37
|
.group_by_dynamic(
|
38
38
|
@time_column,
|
@@ -45,7 +45,7 @@ module Polars
|
|
45
45
|
by: @by,
|
46
46
|
start_by: @start_by
|
47
47
|
)
|
48
|
-
.agg(aggs)
|
48
|
+
.agg(*aggs, **named_aggs)
|
49
49
|
.collect(no_optimization: true, string_cache: false)
|
50
50
|
end
|
51
51
|
end
|
data/lib/polars/exceptions.rb
CHANGED
@@ -3,6 +3,10 @@ module Polars
|
|
3
3
|
# Base class for all Polars errors.
|
4
4
|
class Error < StandardError; end
|
5
5
|
|
6
|
+
# @private
|
7
|
+
# Exception raised when an operation is not allowed (or possible) against a given object or data structure.
|
8
|
+
class InvalidOperationError < Error; end
|
9
|
+
|
6
10
|
# @private
|
7
11
|
# Exception raised when an unsupported testing assert is made.
|
8
12
|
class InvalidAssert < Error; end
|