gtfs_df 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 127f50187dd26c4824abd94a6a44f64242715bf9092072276cdf6c811e0c28a6
4
- data.tar.gz: d5a6bd9b25830d0574c6c315712d1ec4e0b14959d25f6a1cf12f2e13eeb738a2
3
+ metadata.gz: 205ec058b41c5bd1d2b01ff3950d4cd2ebb20f304d02d5bcd3dd0c447b4e0a6e
4
+ data.tar.gz: 54cca637de421c26d2144df100f8430f5dddb639b3a37d1160dfbd9704630e33
5
5
  SHA512:
6
- metadata.gz: 9312f173960b069a31bb2bb37368bd65ad5345d527d5a29818a7485c731bcb6f6c5fb74639403a3bcbd5d5704ae2bbf629ed5cdfe3f5f629f36ea719663356da
7
- data.tar.gz: 19563c256e1cbe52a34eef47c31f6b50806a0cb07b773bacb015ad40730c6d804941aef1c936aaeb6c17ea49719ab7db64d1144eeb05c7c4bd1d83ebcd87e12f
6
+ metadata.gz: ef853b504ee701e77911259352d0057f6e327db8022370a1fa0dbaa597bfadcac7ac1c8fbe746c04ec4669d86384279a259ac35d4126f1483286937812cb7cce
7
+ data.tar.gz: 728d684dd02b653cc779aa5e8152766f5dccafcb097e8fca5810b90f994c442645119f32efb62c84d8fbf1645109f579a3c74f6eb88c9e21dd03faa72193cdd0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## [0.4.1] - 2025-12-05
2
+
3
+ ### Added
4
+
5
+ - handle extra whitespace in csvs
6
+
7
+ ### Maintenance
8
+
9
+ - remove unused initializer format
1
10
  ## [0.4.0] - 2025-12-04
2
11
 
3
12
  ### Added
@@ -10,20 +10,21 @@ module GtfsDf
10
10
  if input.is_a?(Polars::DataFrame)
11
11
  input
12
12
  elsif input.is_a?(String)
13
- # We need to account for extra columns due to: https://github.com/ankane/ruby-polars/issues/125
14
- all_columns = Polars.scan_csv(input).columns
15
- default_schema = all_columns.map { |c| [c, Polars::String] }.to_h
16
- dtypes = default_schema.merge(self.class::SCHEMA)
17
- Polars.read_csv(input, null_values: [""], dtypes:)
18
- elsif input.is_a?(Array)
19
- head, *body = input
20
- df_input = body.each_with_object({}) do |row, acc|
21
- head.each_with_index do |column, index|
22
- acc[column] ||= []
23
- acc[column] << row[index]
24
- end
25
- end
26
- Polars::DataFrame.new(df_input, schema_overrides: self.class::SCHEMA, strict: false)
13
+ # TODO: use `infer_schema: false` instead of `infer_schema_length` after polars release:
14
+ # https://github.com/ankane/ruby-polars/blob/master/CHANGELOG.md#100-unreleased
15
+ df = Polars.read_csv(input, infer_schema_length: 0)
16
+ dtypes = self.class::SCHEMA.slice(*df.columns)
17
+
18
+ df
19
+ .with_columns(dtypes.keys.map do |col|
20
+ stripped = Polars.col(col).str.strip
21
+ Polars.when(stripped.str.len_chars.gt(0))
22
+ .then(stripped)
23
+ .otherwise(Polars.lit(nil))
24
+ end)
25
+ .with_columns(dtypes.map do |name, type|
26
+ Polars.col(name).cast(type)
27
+ end)
27
28
  else
28
29
  throw GtfsDf::Error, "Unrecognized input"
29
30
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GtfsDf
4
- VERSION = "0.4.0"
4
+ VERSION = "0.4.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gtfs_df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Mejorado