polars-df 0.2.4-x86_64-linux → 0.2.5-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe562be02f1336fcbf58709c43602a6779f36f0e1cf39db78f1c40e9f4833000
4
- data.tar.gz: 3fa635450e132ad73b593ec6b7e8ed93fca249cb8762ba70a6e453d0fac33b50
3
+ metadata.gz: de084de2d9c7e6a8f79b6ede4388324aeb6c02b46230a23e5552ff31ecc772f3
4
+ data.tar.gz: bde2d21b47378f62769f9ae2545303690b146c27cd1a5f10afa131732f90b9ef
5
5
  SHA512:
6
- metadata.gz: 6c085151acab410060f8a0cf7a3fa768b6daf4ca6584293fd54677415850e15b7e7622dfa00a9fca7f3647bf1a5d035d37057859b31edee28401af4b37eab5ea
7
- data.tar.gz: 8c5d6720af0f7b8a23e74e3c7686a60e1f477b282e0b02c112fe5acf365bc5c9bf9b9a96aa06279674304c16152fb147ef7203031a542b61e5b4c1c8f59ccdb2
6
+ metadata.gz: b7575704a8ca22948393acd8fd12b624fd78215ab3601364a4eab577b378f268c79c1f6fd60abfdedd80440931702f6138d05f9e94caf49ace2706d119318939
7
+ data.tar.gz: d02b0d987bca0115010d8a61d68f5dfcffaa3266fcf51c72db27334cc4b4822178d430f6e06a50020398b66656d796b583d6e21a2793dd767991d468ae1e87d4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.5 (2023-02-01)
2
+
3
+ - Added support for glob patterns to `read_csv` method
4
+ - Added support for symbols to more methods
5
+
1
6
  ## 0.2.4 (2023-01-29)
2
7
 
3
8
  - Added support for more types when creating a data frame from an array of hashes
data/README.md CHANGED
@@ -73,9 +73,9 @@ From an array of hashes
73
73
 
74
74
  ```ruby
75
75
  Polars::DataFrame.new([
76
- {"a" => 1, "b" => "one"},
77
- {"a" => 2, "b" => "two"},
78
- {"a" => 3, "b" => "three"}
76
+ {a: 1, b: "one"},
77
+ {a: 2, b: "two"},
78
+ {a: 3, b: "three"}
79
79
  ])
80
80
  ```
81
81
 
Binary file
Binary file
Binary file
@@ -29,7 +29,7 @@ module Polars
29
29
  eol_char: "\n",
30
30
  new_columns: nil
31
31
  )
32
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
32
+ if Utils.pathlike?(file)
33
33
  path = Utils.format_path(file)
34
34
  end
35
35
 
@@ -94,7 +94,7 @@ module Polars
94
94
  sample_size: 1024,
95
95
  eol_char: "\n"
96
96
  )
97
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
97
+ if Utils.pathlike?(file)
98
98
  path = Utils.format_path(file)
99
99
  else
100
100
  path = nil
@@ -124,7 +124,39 @@ module Polars
124
124
  columns = [columns]
125
125
  end
126
126
  if file.is_a?(String) && file.include?("*")
127
- raise Todo
127
+ dtypes_dict = nil
128
+ if !dtype_list.nil?
129
+ dtypes_dict = dtype_list.to_h
130
+ end
131
+ if !dtype_slice.nil?
132
+ raise ArgumentError, "cannot use glob patterns and unnamed dtypes as `dtypes` argument; Use dtypes: Mapping[str, Type[DataType]"
133
+ end
134
+ scan = Polars.scan_csv(
135
+ file,
136
+ has_header: has_header,
137
+ sep: sep,
138
+ comment_char: comment_char,
139
+ quote_char: quote_char,
140
+ skip_rows: skip_rows,
141
+ dtypes: dtypes_dict,
142
+ null_values: null_values,
143
+ ignore_errors: ignore_errors,
144
+ infer_schema_length: infer_schema_length,
145
+ n_rows: n_rows,
146
+ low_memory: low_memory,
147
+ rechunk: rechunk,
148
+ skip_rows_after_header: skip_rows_after_header,
149
+ row_count_name: row_count_name,
150
+ row_count_offset: row_count_offset,
151
+ eol_char: eol_char
152
+ )
153
+ if columns.nil?
154
+ return _from_rbdf(scan.collect._df)
155
+ elsif is_str_sequence(columns, allow_str: false)
156
+ return _from_rbdf(scan.select(columns).collect._df)
157
+ else
158
+ raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: List[str]"
159
+ end
128
160
  end
129
161
 
130
162
  projection, columns = Utils.handle_projection_columns(columns)
@@ -170,7 +202,7 @@ module Polars
170
202
  row_count_offset: 0,
171
203
  low_memory: false
172
204
  )
173
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
205
+ if Utils.pathlike?(file)
174
206
  file = Utils.format_path(file)
175
207
  end
176
208
 
@@ -194,7 +226,7 @@ module Polars
194
226
 
195
227
  # @private
196
228
  def self._read_avro(file, columns: nil, n_rows: nil)
197
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
229
+ if Utils.pathlike?(file)
198
230
  file = Utils.format_path(file)
199
231
  end
200
232
  projection, columns = Utils.handle_projection_columns(columns)
@@ -211,7 +243,7 @@ module Polars
211
243
  rechunk: true,
212
244
  memory_map: true
213
245
  )
214
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
246
+ if Utils.pathlike?(file)
215
247
  file = Utils.format_path(file)
216
248
  end
217
249
  if columns.is_a?(String)
@@ -237,7 +269,7 @@ module Polars
237
269
 
238
270
  # @private
239
271
  def self._read_json(file)
240
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
272
+ if Utils.pathlike?(file)
241
273
  file = Utils.format_path(file)
242
274
  end
243
275
 
@@ -246,7 +278,7 @@ module Polars
246
278
 
247
279
  # @private
248
280
  def self._read_ndjson(file)
249
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
281
+ if Utils.pathlike?(file)
250
282
  file = Utils.format_path(file)
251
283
  end
252
284
 
@@ -698,7 +730,7 @@ module Polars
698
730
  pretty: false,
699
731
  row_oriented: false
700
732
  )
701
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
733
+ if Utils.pathlike?(file)
702
734
  file = Utils.format_path(file)
703
735
  end
704
736
 
@@ -713,7 +745,7 @@ module Polars
713
745
  #
714
746
  # @return [nil]
715
747
  def write_ndjson(file)
716
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
748
+ if Utils.pathlike?(file)
717
749
  file = Utils.format_path(file)
718
750
  end
719
751
 
@@ -803,7 +835,7 @@ module Polars
803
835
  return buffer.string.force_encoding(Encoding::UTF_8)
804
836
  end
805
837
 
806
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
838
+ if Utils.pathlike?(file)
807
839
  file = Utils.format_path(file)
808
840
  end
809
841
 
@@ -841,7 +873,7 @@ module Polars
841
873
  if compression.nil?
842
874
  compression = "uncompressed"
843
875
  end
844
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
876
+ if Utils.pathlike?(file)
845
877
  file = Utils.format_path(file)
846
878
  end
847
879
 
@@ -860,7 +892,7 @@ module Polars
860
892
  if compression.nil?
861
893
  compression = "uncompressed"
862
894
  end
863
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
895
+ if Utils.pathlike?(file)
864
896
  file = Utils.format_path(file)
865
897
  end
866
898
 
@@ -902,7 +934,7 @@ module Polars
902
934
  if compression.nil?
903
935
  compression = "uncompressed"
904
936
  end
905
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
937
+ if Utils.pathlike?(file)
906
938
  file = Utils.format_path(file)
907
939
  end
908
940
 
data/lib/polars/io.rb CHANGED
@@ -268,7 +268,7 @@ module Polars
268
268
  _check_arg_is_1byte("comment_char", comment_char, false)
269
269
  _check_arg_is_1byte("quote_char", quote_char, true)
270
270
 
271
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
271
+ if Utils.pathlike?(file)
272
272
  file = Utils.format_path(file)
273
273
  end
274
274
 
@@ -384,7 +384,7 @@ module Polars
384
384
  storage_options: nil,
385
385
  low_memory: false
386
386
  )
387
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
387
+ if Utils.pathlike?(file)
388
388
  file = Utils.format_path(file)
389
389
  end
390
390
 
@@ -435,7 +435,7 @@ module Polars
435
435
  row_count_name: nil,
436
436
  row_count_offset: 0
437
437
  )
438
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
438
+ if Utils.pathlike?(file)
439
439
  file = Utils.format_path(file)
440
440
  end
441
441
 
@@ -463,7 +463,7 @@ module Polars
463
463
  #
464
464
  # @return [DataFrame]
465
465
  def read_avro(file, columns: nil, n_rows: nil)
466
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
466
+ if Utils.pathlike?(file)
467
467
  file = Utils.format_path(file)
468
468
  end
469
469
 
@@ -786,7 +786,7 @@ module Polars
786
786
  #
787
787
  # @return [Hash]
788
788
  def read_ipc_schema(file)
789
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
789
+ if Utils.pathlike?(file)
790
790
  file = Utils.format_path(file)
791
791
  end
792
792
 
@@ -800,7 +800,7 @@ module Polars
800
800
  #
801
801
  # @return [Hash]
802
802
  def read_parquet_schema(file)
803
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
803
+ if Utils.pathlike?(file)
804
804
  file = Utils.format_path(file)
805
805
  end
806
806
 
@@ -106,7 +106,7 @@ module Polars
106
106
  storage_options: nil,
107
107
  memory_map: true
108
108
  )
109
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
109
+ if Utils.pathlike?(file)
110
110
  file = Utils.format_path(file)
111
111
  end
112
112
 
@@ -156,7 +156,7 @@ module Polars
156
156
  #
157
157
  # @return [LazyFrame]
158
158
  def self.read_json(file)
159
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
159
+ if Utils.pathlike?(file)
160
160
  file = Utils.format_path(file)
161
161
  end
162
162
 
@@ -263,7 +263,7 @@ module Polars
263
263
  #
264
264
  # @return [nil]
265
265
  def write_json(file)
266
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
266
+ if Utils.pathlike?(file)
267
267
  file = Utils.format_path(file)
268
268
  end
269
269
  _ldf.write_json(file)
@@ -15,7 +15,7 @@ module Polars
15
15
  if name.is_a?(DataType)
16
16
  Utils.wrap_expr(_dtype_cols([name]))
17
17
  elsif name.is_a?(Array)
18
- if name.length == 0 || name[0].is_a?(String) || name[0].is_a?(Symbol)
18
+ if name.length == 0 || Utils.strlike?(name[0])
19
19
  name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
20
20
  Utils.wrap_expr(RbExpr.cols(name))
21
21
  elsif Utils.is_polars_dtype(name[0])
@@ -119,7 +119,7 @@ module Polars
119
119
  def max(column)
120
120
  if column.is_a?(Series)
121
121
  column.max
122
- elsif column.is_a?(String) || column.is_a?(Symbol)
122
+ elsif Utils.strlike?(column)
123
123
  col(column).max
124
124
  else
125
125
  exprs = Utils.selection_to_rbexpr_list(column)
@@ -141,7 +141,7 @@ module Polars
141
141
  def min(column)
142
142
  if column.is_a?(Series)
143
143
  column.min
144
- elsif column.is_a?(String) || column.is_a?(Symbol)
144
+ elsif Utils.strlike?(column)
145
145
  col(column).min
146
146
  else
147
147
  exprs = Utils.selection_to_rbexpr_list(column)
@@ -156,7 +156,7 @@ module Polars
156
156
  def sum(column)
157
157
  if column.is_a?(Series)
158
158
  column.sum
159
- elsif column.is_a?(String) || column.is_a?(Symbol)
159
+ elsif Utils.strlike?(column)
160
160
  col(column.to_s).sum
161
161
  elsif column.is_a?(Array)
162
162
  exprs = Utils.selection_to_rbexpr_list(column)
@@ -356,7 +356,7 @@ module Polars
356
356
  def cumsum(column)
357
357
  if column.is_a?(Series)
358
358
  column.cumsum
359
- elsif column.is_a?(String)
359
+ elsif Utils.strlike?(column)
360
360
  col(column).cumsum
361
361
  else
362
362
  cumfold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("cumsum")
@@ -380,10 +380,10 @@ module Polars
380
380
  #
381
381
  # @return [Expr]
382
382
  def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
383
- if a.is_a?(String)
383
+ if Utils.strlike?(a)
384
384
  a = col(a)
385
385
  end
386
- if b.is_a?(String)
386
+ if Utils.strlike?(b)
387
387
  b = col(b)
388
388
  end
389
389
  Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
@@ -400,10 +400,10 @@ module Polars
400
400
  #
401
401
  # @return [Expr]
402
402
  def pearson_corr(a, b, ddof: 1)
403
- if a.is_a?(String)
403
+ if Utils.strlike?(a)
404
404
  a = col(a)
405
405
  end
406
- if b.is_a?(String)
406
+ if Utils.strlike?(b)
407
407
  b = col(b)
408
408
  end
409
409
  Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
@@ -418,10 +418,10 @@ module Polars
418
418
  #
419
419
  # @return [Expr]
420
420
  def cov(a, b)
421
- if a.is_a?(String)
421
+ if Utils.strlike?(a)
422
422
  a = col(a)
423
423
  end
424
- if b.is_a?(String)
424
+ if Utils.strlike?(b)
425
425
  b = col(b)
426
426
  end
427
427
  Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
@@ -486,7 +486,7 @@ module Polars
486
486
  #
487
487
  # @return [Expr]
488
488
  def any(name)
489
- if name.is_a?(String)
489
+ if Utils.strlike?(name)
490
490
  col(name).any
491
491
  else
492
492
  fold(lit(false), ->(a, b) { a.cast(:bool) | b.cast(:bool) }, name).alias("any")
@@ -589,7 +589,7 @@ module Polars
589
589
  def all(name = nil)
590
590
  if name.nil?
591
591
  col("*")
592
- elsif name.is_a?(String) || name.is_a?(Symbol)
592
+ elsif Utils.strlike?(name)
593
593
  col(name).all
594
594
  else
595
595
  raise Todo
@@ -1137,7 +1137,7 @@ module Polars
1137
1137
  # # │ 2022-10-25 07:31:39 │
1138
1138
  # # └─────────────────────┘
1139
1139
  def from_epoch(column, unit: "s", eager: false)
1140
- if column.is_a?(String)
1140
+ if Utils.strlike?(column)
1141
1141
  column = col(column)
1142
1142
  elsif !column.is_a?(Series) && !column.is_a?(Expr)
1143
1143
  column = Series.new(column)
data/lib/polars/utils.rb CHANGED
@@ -177,6 +177,14 @@ module Polars
177
177
  value == true || value == false
178
178
  end
179
179
 
180
+ def self.strlike?(value)
181
+ value.is_a?(String) || value.is_a?(Symbol)
182
+ end
183
+
184
+ def self.pathlike?(value)
185
+ value.is_a?(String) || (defined?(Pathname) && value.is_a?(Pathname))
186
+ end
187
+
180
188
  def self._is_iterable_of(val, eltype)
181
189
  val.all? { |x| x.is_a?(eltype) }
182
190
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.4"
3
+ VERSION = "0.2.5"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-30 00:00:00.000000000 Z
11
+ date: 2023-02-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org