gn_crossmap 2.1.3 → 2.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -0
- data/lib/gn_crossmap/collector.rb +1 -0
- data/lib/gn_crossmap/reader.rb +21 -5
- data/lib/gn_crossmap/stats.rb +2 -1
- data/lib/gn_crossmap/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af88cbf0b3ca5ea98ebf8cbb37b41797ba696294
|
4
|
+
data.tar.gz: 75bc41909f7c00eee0311a254e1390aa6ae8307c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a784f2f6bab3b46fc4f4d7f3ef0aa69116d528589ff116ad85f050d28d430882672bdaef42e2c2fe21aa526bbaac5d27e55939ec9df4e0049b44f98311c9fc1f
|
7
|
+
data.tar.gz: 2b054ace46de4c2f11b15ab52a47a8371c126a4274c4be439115f1a6e37ce77e6f2148bfc38034f096a6f7d21807c58b7101764a4f0e169589140767a485e38c
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -154,6 +154,7 @@ end
|
|
154
154
|
|resolved_records |number of names already processed |
|
155
155
|
|last_batches_time |time required to process the last batch of names |
|
156
156
|
|matches |Distribution of processed data by match type (see below) |
|
157
|
+
|errors |First 0-10 errors found during the csv file processing |
|
157
158
|
|
158
159
|
#### Match types
|
159
160
|
|
data/lib/gn_crossmap/reader.rb
CHANGED
@@ -9,6 +9,7 @@ module GnCrossmap
|
|
9
9
|
@alt_headers = alt_headers
|
10
10
|
@csv_io = csv_io
|
11
11
|
@col_sep = col_sep
|
12
|
+
@quote_char = quote_char(@col_sep)
|
12
13
|
@original_fields = nil
|
13
14
|
@input_name = input_name
|
14
15
|
@skip_original = skip_original
|
@@ -30,9 +31,13 @@ module GnCrossmap
|
|
30
31
|
[";", ",", "\t"].map { |s| [line.count(s), s] }.sort.last.last
|
31
32
|
end
|
32
33
|
|
34
|
+
def quote_char(col_sep)
|
35
|
+
col_sep == "\t" ? "\x00" : '"'
|
36
|
+
end
|
37
|
+
|
33
38
|
def parse_input
|
34
39
|
dc = Collector.new(@skip_original)
|
35
|
-
csv = CSV.new(@csv_io, col_sep: col_sep)
|
40
|
+
csv = CSV.new(@csv_io, col_sep: @col_sep, quote_char: @quote_char)
|
36
41
|
block_given? ? process(csv, dc, &Proc.new) : process(csv, dc)
|
37
42
|
wrap_up
|
38
43
|
yield @stats.stats if block_given?
|
@@ -40,13 +45,24 @@ module GnCrossmap
|
|
40
45
|
end
|
41
46
|
|
42
47
|
def process(csv, data_collector)
|
43
|
-
|
44
|
-
|
45
|
-
yield @stats.stats if log_progress(
|
46
|
-
data_collector
|
48
|
+
counter = 0
|
49
|
+
loop do
|
50
|
+
yield @stats.stats if log_progress(counter) && block_given?
|
51
|
+
rl = read_line(csv, data_collector)
|
52
|
+
break unless rl
|
53
|
+
counter += 1
|
47
54
|
end && @csv_io.close
|
48
55
|
end
|
49
56
|
|
57
|
+
def read_line(csv, data_collector)
|
58
|
+
row = csv.readline
|
59
|
+
return false if row.nil?
|
60
|
+
row = process_headers(row) if @original_fields.nil?
|
61
|
+
data_collector.process_row(row)
|
62
|
+
rescue CSV::MalformedCSVError => e
|
63
|
+
@stats.stats[:errors] << e.message if @stats.stats[:errors].size < 10
|
64
|
+
end
|
65
|
+
|
50
66
|
def wrap_up
|
51
67
|
@stats.stats[:ingested_records] = @stats.stats[:total_records]
|
52
68
|
@stats.stats[:ingestion_span] = Time.now - @stats.stats[:ingestion_start]
|
data/lib/gn_crossmap/stats.rb
CHANGED
@@ -10,7 +10,8 @@ module GnCrossmap
|
|
10
10
|
resolved_records: 0, ingestion_span: nil,
|
11
11
|
resolution_span: nil, ingestion_start: nil,
|
12
12
|
resolution_start: nil, resolution_stop: nil,
|
13
|
-
last_batches_time: [], matches: match_types
|
13
|
+
last_batches_time: [], matches: match_types,
|
14
|
+
errors: [] }
|
14
15
|
end
|
15
16
|
|
16
17
|
private
|
data/lib/gn_crossmap/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: trollop
|