gn_crossmap 2.1.3 → 2.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d9e9c42628d2d1a8a5d8aeea1b0b1614ffe5c7a9
4
- data.tar.gz: ed427b1beb9cc204a1bdf622b1521645336af96e
3
+ metadata.gz: af88cbf0b3ca5ea98ebf8cbb37b41797ba696294
4
+ data.tar.gz: 75bc41909f7c00eee0311a254e1390aa6ae8307c
5
5
  SHA512:
6
- metadata.gz: 5f61004fb4c0a2885f46fb1f16274694ee926d4d7c2ca3637f61e462e7ccf1b1c9eb7ac8ce77aad9cf46ad37760f72ecd46c61f52040c0807592de0b904d28e1
7
- data.tar.gz: f6ae3e266a4d5738feb623d07176c6641e49edeacecb78378de746adf7addce82edd9d0d501e5efe962c37d99ae55194c31d60169a9a974e656fae09c055bd0f
6
+ metadata.gz: a784f2f6bab3b46fc4f4d7f3ef0aa69116d528589ff116ad85f050d28d430882672bdaef42e2c2fe21aa526bbaac5d27e55939ec9df4e0049b44f98311c9fc1f
7
+ data.tar.gz: 2b054ace46de4c2f11b15ab52a47a8371c126a4274c4be439115f1a6e37ce77e6f2148bfc38034f096a6f7d21807c58b7101764a4f0e169589140767a485e38c
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # ``gn_crossmap`` CHANGELOG
2
2
 
3
+ ## 2.2.3
4
+
5
+ * @dimus - make csv processing more permissive, continue
6
+ it after meeting malformed csv rows. Add "errors"
7
+ field into status report.
8
+
3
9
  ## 2.1.3
4
10
 
5
11
  * @dimus - fix nil situation in headers' fields
data/README.md CHANGED
@@ -154,6 +154,7 @@ end
154
154
  |resolved_records |number of names already processed |
155
155
  |last_batches_time |time required to process the last batch of names |
156
156
  |matches |Distribution of processed data by match type (see below) |
157
+ |errors |First 0-10 errors found during the csv file processing |
157
158
 
158
159
  #### Match types
159
160
 
@@ -13,6 +13,7 @@ module GnCrossmap
13
13
  def process_row(row)
14
14
  @row = row
15
15
  @fields ? collect_data : init_fields_collector
16
+ true
16
17
  end
17
18
 
18
19
  private
@@ -9,6 +9,7 @@ module GnCrossmap
9
9
  @alt_headers = alt_headers
10
10
  @csv_io = csv_io
11
11
  @col_sep = col_sep
12
+ @quote_char = quote_char(@col_sep)
12
13
  @original_fields = nil
13
14
  @input_name = input_name
14
15
  @skip_original = skip_original
@@ -30,9 +31,13 @@ module GnCrossmap
30
31
  [";", ",", "\t"].map { |s| [line.count(s), s] }.sort.last.last
31
32
  end
32
33
 
34
+ def quote_char(col_sep)
35
+ col_sep == "\t" ? "\x00" : '"'
36
+ end
37
+
33
38
  def parse_input
34
39
  dc = Collector.new(@skip_original)
35
- csv = CSV.new(@csv_io, col_sep: col_sep)
40
+ csv = CSV.new(@csv_io, col_sep: @col_sep, quote_char: @quote_char)
36
41
  block_given? ? process(csv, dc, &Proc.new) : process(csv, dc)
37
42
  wrap_up
38
43
  yield @stats.stats if block_given?
@@ -40,13 +45,24 @@ module GnCrossmap
40
45
  end
41
46
 
42
47
  def process(csv, data_collector)
43
- csv.each_with_index do |row, i|
44
- row = process_headers(row) if @original_fields.nil?
45
- yield @stats.stats if log_progress(i) && block_given?
46
- data_collector.process_row(row)
48
+ counter = 0
49
+ loop do
50
+ yield @stats.stats if log_progress(counter) && block_given?
51
+ rl = read_line(csv, data_collector)
52
+ break unless rl
53
+ counter += 1
47
54
  end && @csv_io.close
48
55
  end
49
56
 
57
+ def read_line(csv, data_collector)
58
+ row = csv.readline
59
+ return false if row.nil?
60
+ row = process_headers(row) if @original_fields.nil?
61
+ data_collector.process_row(row)
62
+ rescue CSV::MalformedCSVError => e
63
+ @stats.stats[:errors] << e.message if @stats.stats[:errors].size < 10
64
+ end
65
+
50
66
  def wrap_up
51
67
  @stats.stats[:ingested_records] = @stats.stats[:total_records]
52
68
  @stats.stats[:ingestion_span] = Time.now - @stats.stats[:ingestion_start]
@@ -10,7 +10,8 @@ module GnCrossmap
10
10
  resolved_records: 0, ingestion_span: nil,
11
11
  resolution_span: nil, ingestion_start: nil,
12
12
  resolution_start: nil, resolution_stop: nil,
13
- last_batches_time: [], matches: match_types }
13
+ last_batches_time: [], matches: match_types,
14
+ errors: [] }
14
15
  end
15
16
 
16
17
  private
@@ -1,6 +1,6 @@
1
1
  # Namespace module for crossmapping checklists to GN sources
2
2
  module GnCrossmap
3
- VERSION = "2.1.3".freeze
3
+ VERSION = "2.2.3".freeze
4
4
 
5
5
  def self.version
6
6
  VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.3
4
+ version: 2.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-02-03 00:00:00.000000000 Z
11
+ date: 2017-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: trollop