polars-df 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5afa804963fc59154e8adde6034e07d1a2a90c077b8e0b72d2ad3ca49af34e8e
4
- data.tar.gz: b2291436080973ad43595ef25559cdc81669b07986a6779c85c4c372e220e39a
3
+ metadata.gz: 122f88f6a702c0252a98404b8ecccddcd0195cadd79b7062f451cb23f5d3b8e3
4
+ data.tar.gz: 67588b6a0aa9829af179c8e3d5339b125b50dd6941e02cea4fc29aedd2e29217
5
5
  SHA512:
6
- metadata.gz: 7d7aa73d12e9a322de83db5f69b4ab0f5e9605b737776a83476f917cf32ad1ef6770c79e80a810be68a9144206fef6c3931af3285763741232b8ba87871d0a06
7
- data.tar.gz: 2f9daceaba3edd0671a650ab1a2da6ddc6591f768e52a4c8a8f01d7e3fd573320c4464d0081762daeb75d41475bb4e87e298b426b507ef6c7bb9d3164149e7e6
6
+ metadata.gz: 6393dcab24e4001c7d1b9149ef441317d8ba76571850071ed829b569e055c9ad4d4ffcbdb256ede8225922b56378984ebb113570051d27b2761a50f1b23fa168
7
+ data.tar.gz: 4356270d7f15d964eedd84bf37c05054323a19128fa5840563829af88f46107e1277534ae976ea078808807b7c80433196eb351f7160118d1251d1dd4ddbbc4b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.5 (2023-02-01)
2
+
3
+ - Added support for glob patterns to `read_csv` method
4
+ - Added support for symbols to more methods
5
+
1
6
  ## 0.2.4 (2023-01-29)
2
7
 
3
8
  - Added support for more types when creating a data frame from an array of hashes
data/README.md CHANGED
@@ -73,9 +73,9 @@ From an array of hashes
73
73
 
74
74
  ```ruby
75
75
  Polars::DataFrame.new([
76
- {"a" => 1, "b" => "one"},
77
- {"a" => 2, "b" => "two"},
78
- {"a" => 3, "b" => "three"}
76
+ {a: 1, b: "one"},
77
+ {a: 2, b: "two"},
78
+ {a: 3, b: "three"}
79
79
  ])
80
80
  ```
81
81
 
@@ -29,7 +29,7 @@ module Polars
29
29
  eol_char: "\n",
30
30
  new_columns: nil
31
31
  )
32
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
32
+ if Utils.pathlike?(file)
33
33
  path = Utils.format_path(file)
34
34
  end
35
35
 
@@ -94,7 +94,7 @@ module Polars
94
94
  sample_size: 1024,
95
95
  eol_char: "\n"
96
96
  )
97
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
97
+ if Utils.pathlike?(file)
98
98
  path = Utils.format_path(file)
99
99
  else
100
100
  path = nil
@@ -124,7 +124,39 @@ module Polars
124
124
  columns = [columns]
125
125
  end
126
126
  if file.is_a?(String) && file.include?("*")
127
- raise Todo
127
+ dtypes_dict = nil
128
+ if !dtype_list.nil?
129
+ dtypes_dict = dtype_list.to_h
130
+ end
131
+ if !dtype_slice.nil?
132
+ raise ArgumentError, "cannot use glob patterns and unnamed dtypes as `dtypes` argument; Use dtypes: Mapping[str, Type[DataType]"
133
+ end
134
+ scan = Polars.scan_csv(
135
+ file,
136
+ has_header: has_header,
137
+ sep: sep,
138
+ comment_char: comment_char,
139
+ quote_char: quote_char,
140
+ skip_rows: skip_rows,
141
+ dtypes: dtypes_dict,
142
+ null_values: null_values,
143
+ ignore_errors: ignore_errors,
144
+ infer_schema_length: infer_schema_length,
145
+ n_rows: n_rows,
146
+ low_memory: low_memory,
147
+ rechunk: rechunk,
148
+ skip_rows_after_header: skip_rows_after_header,
149
+ row_count_name: row_count_name,
150
+ row_count_offset: row_count_offset,
151
+ eol_char: eol_char
152
+ )
153
+ if columns.nil?
154
+ return _from_rbdf(scan.collect._df)
155
+ elsif is_str_sequence(columns, allow_str: false)
156
+ return _from_rbdf(scan.select(columns).collect._df)
157
+ else
158
+ raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: List[str]"
159
+ end
128
160
  end
129
161
 
130
162
  projection, columns = Utils.handle_projection_columns(columns)
@@ -170,7 +202,7 @@ module Polars
170
202
  row_count_offset: 0,
171
203
  low_memory: false
172
204
  )
173
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
205
+ if Utils.pathlike?(file)
174
206
  file = Utils.format_path(file)
175
207
  end
176
208
 
@@ -194,7 +226,7 @@ module Polars
194
226
 
195
227
  # @private
196
228
  def self._read_avro(file, columns: nil, n_rows: nil)
197
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
229
+ if Utils.pathlike?(file)
198
230
  file = Utils.format_path(file)
199
231
  end
200
232
  projection, columns = Utils.handle_projection_columns(columns)
@@ -211,7 +243,7 @@ module Polars
211
243
  rechunk: true,
212
244
  memory_map: true
213
245
  )
214
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
246
+ if Utils.pathlike?(file)
215
247
  file = Utils.format_path(file)
216
248
  end
217
249
  if columns.is_a?(String)
@@ -237,7 +269,7 @@ module Polars
237
269
 
238
270
  # @private
239
271
  def self._read_json(file)
240
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
272
+ if Utils.pathlike?(file)
241
273
  file = Utils.format_path(file)
242
274
  end
243
275
 
@@ -246,7 +278,7 @@ module Polars
246
278
 
247
279
  # @private
248
280
  def self._read_ndjson(file)
249
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
281
+ if Utils.pathlike?(file)
250
282
  file = Utils.format_path(file)
251
283
  end
252
284
 
@@ -698,7 +730,7 @@ module Polars
698
730
  pretty: false,
699
731
  row_oriented: false
700
732
  )
701
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
733
+ if Utils.pathlike?(file)
702
734
  file = Utils.format_path(file)
703
735
  end
704
736
 
@@ -713,7 +745,7 @@ module Polars
713
745
  #
714
746
  # @return [nil]
715
747
  def write_ndjson(file)
716
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
748
+ if Utils.pathlike?(file)
717
749
  file = Utils.format_path(file)
718
750
  end
719
751
 
@@ -803,7 +835,7 @@ module Polars
803
835
  return buffer.string.force_encoding(Encoding::UTF_8)
804
836
  end
805
837
 
806
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
838
+ if Utils.pathlike?(file)
807
839
  file = Utils.format_path(file)
808
840
  end
809
841
 
@@ -841,7 +873,7 @@ module Polars
841
873
  if compression.nil?
842
874
  compression = "uncompressed"
843
875
  end
844
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
876
+ if Utils.pathlike?(file)
845
877
  file = Utils.format_path(file)
846
878
  end
847
879
 
@@ -860,7 +892,7 @@ module Polars
860
892
  if compression.nil?
861
893
  compression = "uncompressed"
862
894
  end
863
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
895
+ if Utils.pathlike?(file)
864
896
  file = Utils.format_path(file)
865
897
  end
866
898
 
@@ -902,7 +934,7 @@ module Polars
902
934
  if compression.nil?
903
935
  compression = "uncompressed"
904
936
  end
905
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
937
+ if Utils.pathlike?(file)
906
938
  file = Utils.format_path(file)
907
939
  end
908
940
 
data/lib/polars/io.rb CHANGED
@@ -268,7 +268,7 @@ module Polars
268
268
  _check_arg_is_1byte("comment_char", comment_char, false)
269
269
  _check_arg_is_1byte("quote_char", quote_char, true)
270
270
 
271
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
271
+ if Utils.pathlike?(file)
272
272
  file = Utils.format_path(file)
273
273
  end
274
274
 
@@ -384,7 +384,7 @@ module Polars
384
384
  storage_options: nil,
385
385
  low_memory: false
386
386
  )
387
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
387
+ if Utils.pathlike?(file)
388
388
  file = Utils.format_path(file)
389
389
  end
390
390
 
@@ -435,7 +435,7 @@ module Polars
435
435
  row_count_name: nil,
436
436
  row_count_offset: 0
437
437
  )
438
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
438
+ if Utils.pathlike?(file)
439
439
  file = Utils.format_path(file)
440
440
  end
441
441
 
@@ -463,7 +463,7 @@ module Polars
463
463
  #
464
464
  # @return [DataFrame]
465
465
  def read_avro(file, columns: nil, n_rows: nil)
466
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
466
+ if Utils.pathlike?(file)
467
467
  file = Utils.format_path(file)
468
468
  end
469
469
 
@@ -786,7 +786,7 @@ module Polars
786
786
  #
787
787
  # @return [Hash]
788
788
  def read_ipc_schema(file)
789
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
789
+ if Utils.pathlike?(file)
790
790
  file = Utils.format_path(file)
791
791
  end
792
792
 
@@ -800,7 +800,7 @@ module Polars
800
800
  #
801
801
  # @return [Hash]
802
802
  def read_parquet_schema(file)
803
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
803
+ if Utils.pathlike?(file)
804
804
  file = Utils.format_path(file)
805
805
  end
806
806
 
@@ -106,7 +106,7 @@ module Polars
106
106
  storage_options: nil,
107
107
  memory_map: true
108
108
  )
109
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
109
+ if Utils.pathlike?(file)
110
110
  file = Utils.format_path(file)
111
111
  end
112
112
 
@@ -156,7 +156,7 @@ module Polars
156
156
  #
157
157
  # @return [LazyFrame]
158
158
  def self.read_json(file)
159
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
159
+ if Utils.pathlike?(file)
160
160
  file = Utils.format_path(file)
161
161
  end
162
162
 
@@ -263,7 +263,7 @@ module Polars
263
263
  #
264
264
  # @return [nil]
265
265
  def write_json(file)
266
- if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
266
+ if Utils.pathlike?(file)
267
267
  file = Utils.format_path(file)
268
268
  end
269
269
  _ldf.write_json(file)
@@ -15,7 +15,7 @@ module Polars
15
15
  if name.is_a?(DataType)
16
16
  Utils.wrap_expr(_dtype_cols([name]))
17
17
  elsif name.is_a?(Array)
18
- if name.length == 0 || name[0].is_a?(String) || name[0].is_a?(Symbol)
18
+ if name.length == 0 || Utils.strlike?(name[0])
19
19
  name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
20
20
  Utils.wrap_expr(RbExpr.cols(name))
21
21
  elsif Utils.is_polars_dtype(name[0])
@@ -119,7 +119,7 @@ module Polars
119
119
  def max(column)
120
120
  if column.is_a?(Series)
121
121
  column.max
122
- elsif column.is_a?(String) || column.is_a?(Symbol)
122
+ elsif Utils.strlike?(column)
123
123
  col(column).max
124
124
  else
125
125
  exprs = Utils.selection_to_rbexpr_list(column)
@@ -141,7 +141,7 @@ module Polars
141
141
  def min(column)
142
142
  if column.is_a?(Series)
143
143
  column.min
144
- elsif column.is_a?(String) || column.is_a?(Symbol)
144
+ elsif Utils.strlike?(column)
145
145
  col(column).min
146
146
  else
147
147
  exprs = Utils.selection_to_rbexpr_list(column)
@@ -156,7 +156,7 @@ module Polars
156
156
  def sum(column)
157
157
  if column.is_a?(Series)
158
158
  column.sum
159
- elsif column.is_a?(String) || column.is_a?(Symbol)
159
+ elsif Utils.strlike?(column)
160
160
  col(column.to_s).sum
161
161
  elsif column.is_a?(Array)
162
162
  exprs = Utils.selection_to_rbexpr_list(column)
@@ -356,7 +356,7 @@ module Polars
356
356
  def cumsum(column)
357
357
  if column.is_a?(Series)
358
358
  column.cumsum
359
- elsif column.is_a?(String)
359
+ elsif Utils.strlike?(column)
360
360
  col(column).cumsum
361
361
  else
362
362
  cumfold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("cumsum")
@@ -380,10 +380,10 @@ module Polars
380
380
  #
381
381
  # @return [Expr]
382
382
  def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
383
- if a.is_a?(String)
383
+ if Utils.strlike?(a)
384
384
  a = col(a)
385
385
  end
386
- if b.is_a?(String)
386
+ if Utils.strlike?(b)
387
387
  b = col(b)
388
388
  end
389
389
  Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
@@ -400,10 +400,10 @@ module Polars
400
400
  #
401
401
  # @return [Expr]
402
402
  def pearson_corr(a, b, ddof: 1)
403
- if a.is_a?(String)
403
+ if Utils.strlike?(a)
404
404
  a = col(a)
405
405
  end
406
- if b.is_a?(String)
406
+ if Utils.strlike?(b)
407
407
  b = col(b)
408
408
  end
409
409
  Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
@@ -418,10 +418,10 @@ module Polars
418
418
  #
419
419
  # @return [Expr]
420
420
  def cov(a, b)
421
- if a.is_a?(String)
421
+ if Utils.strlike?(a)
422
422
  a = col(a)
423
423
  end
424
- if b.is_a?(String)
424
+ if Utils.strlike?(b)
425
425
  b = col(b)
426
426
  end
427
427
  Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
@@ -486,7 +486,7 @@ module Polars
486
486
  #
487
487
  # @return [Expr]
488
488
  def any(name)
489
- if name.is_a?(String)
489
+ if Utils.strlike?(name)
490
490
  col(name).any
491
491
  else
492
492
  fold(lit(false), ->(a, b) { a.cast(:bool) | b.cast(:bool) }, name).alias("any")
@@ -589,7 +589,7 @@ module Polars
589
589
  def all(name = nil)
590
590
  if name.nil?
591
591
  col("*")
592
- elsif name.is_a?(String) || name.is_a?(Symbol)
592
+ elsif Utils.strlike?(name)
593
593
  col(name).all
594
594
  else
595
595
  raise Todo
@@ -1137,7 +1137,7 @@ module Polars
1137
1137
  # # │ 2022-10-25 07:31:39 │
1138
1138
  # # └─────────────────────┘
1139
1139
  def from_epoch(column, unit: "s", eager: false)
1140
- if column.is_a?(String)
1140
+ if Utils.strlike?(column)
1141
1141
  column = col(column)
1142
1142
  elsif !column.is_a?(Series) && !column.is_a?(Expr)
1143
1143
  column = Series.new(column)
data/lib/polars/utils.rb CHANGED
@@ -177,6 +177,14 @@ module Polars
177
177
  value == true || value == false
178
178
  end
179
179
 
180
+ def self.strlike?(value)
181
+ value.is_a?(String) || value.is_a?(Symbol)
182
+ end
183
+
184
+ def self.pathlike?(value)
185
+ value.is_a?(String) || (defined?(Pathname) && value.is_a?(Pathname))
186
+ end
187
+
180
188
  def self._is_iterable_of(val, eltype)
181
189
  val.all? { |x| x.is_a?(eltype) }
182
190
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.2.4"
3
+ VERSION = "0.2.5"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-30 00:00:00.000000000 Z
11
+ date: 2023-02-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys