gn_crossmap 2.1.3 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -0
- data/lib/gn_crossmap/collector.rb +1 -0
- data/lib/gn_crossmap/reader.rb +21 -5
- data/lib/gn_crossmap/stats.rb +2 -1
- data/lib/gn_crossmap/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af88cbf0b3ca5ea98ebf8cbb37b41797ba696294
|
4
|
+
data.tar.gz: 75bc41909f7c00eee0311a254e1390aa6ae8307c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a784f2f6bab3b46fc4f4d7f3ef0aa69116d528589ff116ad85f050d28d430882672bdaef42e2c2fe21aa526bbaac5d27e55939ec9df4e0049b44f98311c9fc1f
|
7
|
+
data.tar.gz: 2b054ace46de4c2f11b15ab52a47a8371c126a4274c4be439115f1a6e37ce77e6f2148bfc38034f096a6f7d21807c58b7101764a4f0e169589140767a485e38c
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -154,6 +154,7 @@ end
|
|
154
154
|
|resolved_records |number of names already processed |
|
155
155
|
|last_batches_time |time required to process the last batch of names |
|
156
156
|
|matches |Distribution of processed data by match type (see below) |
|
157
|
+
|errors |First 0-10 errors found during the csv file processing |
|
157
158
|
|
158
159
|
#### Match types
|
159
160
|
|
data/lib/gn_crossmap/reader.rb
CHANGED
@@ -9,6 +9,7 @@ module GnCrossmap
|
|
9
9
|
@alt_headers = alt_headers
|
10
10
|
@csv_io = csv_io
|
11
11
|
@col_sep = col_sep
|
12
|
+
@quote_char = quote_char(@col_sep)
|
12
13
|
@original_fields = nil
|
13
14
|
@input_name = input_name
|
14
15
|
@skip_original = skip_original
|
@@ -30,9 +31,13 @@ module GnCrossmap
|
|
30
31
|
[";", ",", "\t"].map { |s| [line.count(s), s] }.sort.last.last
|
31
32
|
end
|
32
33
|
|
34
|
+
def quote_char(col_sep)
|
35
|
+
col_sep == "\t" ? "\x00" : '"'
|
36
|
+
end
|
37
|
+
|
33
38
|
def parse_input
|
34
39
|
dc = Collector.new(@skip_original)
|
35
|
-
csv = CSV.new(@csv_io, col_sep: col_sep)
|
40
|
+
csv = CSV.new(@csv_io, col_sep: @col_sep, quote_char: @quote_char)
|
36
41
|
block_given? ? process(csv, dc, &Proc.new) : process(csv, dc)
|
37
42
|
wrap_up
|
38
43
|
yield @stats.stats if block_given?
|
@@ -40,13 +45,24 @@ module GnCrossmap
|
|
40
45
|
end
|
41
46
|
|
42
47
|
def process(csv, data_collector)
|
43
|
-
|
44
|
-
|
45
|
-
yield @stats.stats if log_progress(
|
46
|
-
data_collector
|
48
|
+
counter = 0
|
49
|
+
loop do
|
50
|
+
yield @stats.stats if log_progress(counter) && block_given?
|
51
|
+
rl = read_line(csv, data_collector)
|
52
|
+
break unless rl
|
53
|
+
counter += 1
|
47
54
|
end && @csv_io.close
|
48
55
|
end
|
49
56
|
|
57
|
+
def read_line(csv, data_collector)
|
58
|
+
row = csv.readline
|
59
|
+
return false if row.nil?
|
60
|
+
row = process_headers(row) if @original_fields.nil?
|
61
|
+
data_collector.process_row(row)
|
62
|
+
rescue CSV::MalformedCSVError => e
|
63
|
+
@stats.stats[:errors] << e.message if @stats.stats[:errors].size < 10
|
64
|
+
end
|
65
|
+
|
50
66
|
def wrap_up
|
51
67
|
@stats.stats[:ingested_records] = @stats.stats[:total_records]
|
52
68
|
@stats.stats[:ingestion_span] = Time.now - @stats.stats[:ingestion_start]
|
data/lib/gn_crossmap/stats.rb
CHANGED
@@ -10,7 +10,8 @@ module GnCrossmap
|
|
10
10
|
resolved_records: 0, ingestion_span: nil,
|
11
11
|
resolution_span: nil, ingestion_start: nil,
|
12
12
|
resolution_start: nil, resolution_stop: nil,
|
13
|
-
last_batches_time: [], matches: match_types
|
13
|
+
last_batches_time: [], matches: match_types,
|
14
|
+
errors: [] }
|
14
15
|
end
|
15
16
|
|
16
17
|
private
|
data/lib/gn_crossmap/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: trollop
|