gn_crossmap 2.1.3 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d9e9c42628d2d1a8a5d8aeea1b0b1614ffe5c7a9
4
- data.tar.gz: ed427b1beb9cc204a1bdf622b1521645336af96e
3
+ metadata.gz: af88cbf0b3ca5ea98ebf8cbb37b41797ba696294
4
+ data.tar.gz: 75bc41909f7c00eee0311a254e1390aa6ae8307c
5
5
  SHA512:
6
- metadata.gz: 5f61004fb4c0a2885f46fb1f16274694ee926d4d7c2ca3637f61e462e7ccf1b1c9eb7ac8ce77aad9cf46ad37760f72ecd46c61f52040c0807592de0b904d28e1
7
- data.tar.gz: f6ae3e266a4d5738feb623d07176c6641e49edeacecb78378de746adf7addce82edd9d0d501e5efe962c37d99ae55194c31d60169a9a974e656fae09c055bd0f
6
+ metadata.gz: a784f2f6bab3b46fc4f4d7f3ef0aa69116d528589ff116ad85f050d28d430882672bdaef42e2c2fe21aa526bbaac5d27e55939ec9df4e0049b44f98311c9fc1f
7
+ data.tar.gz: 2b054ace46de4c2f11b15ab52a47a8371c126a4274c4be439115f1a6e37ce77e6f2148bfc38034f096a6f7d21807c58b7101764a4f0e169589140767a485e38c
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # ``gn_crossmap`` CHANGELOG
2
2
 
3
+ ## 2.2.3
4
+
5
+ * @dimus - make csv processing more permissive, continue
6
+ it after meeting malformed csv rows. Add "errors"
7
+ field into status report.
8
+
3
9
  ## 2.1.3
4
10
 
5
11
  * @dimus - fix nil situation in headers' fields
data/README.md CHANGED
@@ -154,6 +154,7 @@ end
154
154
  |resolved_records |number of names already processed |
155
155
  |last_batches_time |time required to process the last batch of names |
156
156
  |matches |Distribution of processed data by match type (see below) |
157
+ |errors |First 0-10 errors found during the csv file processing |
157
158
 
158
159
  #### Match types
159
160
 
@@ -13,6 +13,7 @@ module GnCrossmap
13
13
  def process_row(row)
14
14
  @row = row
15
15
  @fields ? collect_data : init_fields_collector
16
+ true
16
17
  end
17
18
 
18
19
  private
@@ -9,6 +9,7 @@ module GnCrossmap
9
9
  @alt_headers = alt_headers
10
10
  @csv_io = csv_io
11
11
  @col_sep = col_sep
12
+ @quote_char = quote_char(@col_sep)
12
13
  @original_fields = nil
13
14
  @input_name = input_name
14
15
  @skip_original = skip_original
@@ -30,9 +31,13 @@ module GnCrossmap
30
31
  [";", ",", "\t"].map { |s| [line.count(s), s] }.sort.last.last
31
32
  end
32
33
 
34
+ def quote_char(col_sep)
35
+ col_sep == "\t" ? "\x00" : '"'
36
+ end
37
+
33
38
  def parse_input
34
39
  dc = Collector.new(@skip_original)
35
- csv = CSV.new(@csv_io, col_sep: col_sep)
40
+ csv = CSV.new(@csv_io, col_sep: @col_sep, quote_char: @quote_char)
36
41
  block_given? ? process(csv, dc, &Proc.new) : process(csv, dc)
37
42
  wrap_up
38
43
  yield @stats.stats if block_given?
@@ -40,13 +45,24 @@ module GnCrossmap
40
45
  end
41
46
 
42
47
  def process(csv, data_collector)
43
- csv.each_with_index do |row, i|
44
- row = process_headers(row) if @original_fields.nil?
45
- yield @stats.stats if log_progress(i) && block_given?
46
- data_collector.process_row(row)
48
+ counter = 0
49
+ loop do
50
+ yield @stats.stats if log_progress(counter) && block_given?
51
+ rl = read_line(csv, data_collector)
52
+ break unless rl
53
+ counter += 1
47
54
  end && @csv_io.close
48
55
  end
49
56
 
57
+ def read_line(csv, data_collector)
58
+ row = csv.readline
59
+ return false if row.nil?
60
+ row = process_headers(row) if @original_fields.nil?
61
+ data_collector.process_row(row)
62
+ rescue CSV::MalformedCSVError => e
63
+ @stats.stats[:errors] << e.message if @stats.stats[:errors].size < 10
64
+ end
65
+
50
66
  def wrap_up
51
67
  @stats.stats[:ingested_records] = @stats.stats[:total_records]
52
68
  @stats.stats[:ingestion_span] = Time.now - @stats.stats[:ingestion_start]
@@ -10,7 +10,8 @@ module GnCrossmap
10
10
  resolved_records: 0, ingestion_span: nil,
11
11
  resolution_span: nil, ingestion_start: nil,
12
12
  resolution_start: nil, resolution_stop: nil,
13
- last_batches_time: [], matches: match_types }
13
+ last_batches_time: [], matches: match_types,
14
+ errors: [] }
14
15
  end
15
16
 
16
17
  private
@@ -1,6 +1,6 @@
1
1
  # Namespace module for crossmapping checklists to GN sources
2
2
  module GnCrossmap
3
- VERSION = "2.1.3".freeze
3
+ VERSION = "2.2.3".freeze
4
4
 
5
5
  def self.version
6
6
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.3
4
+ version: 2.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-02-03 00:00:00.000000000 Z
11
+ date: 2017-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: trollop