gn_crossmap 1.2.2 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -1
- data/lib/gn_crossmap/reader.rb +9 -2
- data/lib/gn_crossmap/version.rb +1 -1
- data/lib/gn_crossmap.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 47ec0f7527076236262e11f44ad1740c730e05a4
|
4
|
+
data.tar.gz: 8656e9ce121a47a533f72fe76c23ea312b46e509
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cae7ecb4df507bef1fe64c6b9078772a3078d960f2e9af9a1d30ae7e233df85949fe2754c8830bd54dafd0ee2ec36b0788c0a61f7e0f7e8378dd69898465e487
|
7
|
+
data.tar.gz: 0ab13bc741d0f80b3bbc32d16a4e813a9db7f4954f476be23f1f0ef322bd7f1da782fbf7768fbf4c791f0544521ec9c82de1927d3c04cc22e524eb8b05033df9
|
data/README.md
CHANGED
@@ -37,6 +37,10 @@ gem install gn_crossmap
|
|
37
37
|
|
38
38
|
## Usage
|
39
39
|
|
40
|
+
### Usage as a Web Application
|
41
|
+
|
42
|
+
see [gn\_crossmap\_web] project
|
43
|
+
|
40
44
|
### Usage from command line
|
41
45
|
|
42
46
|
```bash
|
@@ -64,7 +68,7 @@ Compares an input list to a data source from [GN Resolver][resolver] and
|
|
64
68
|
writes result into an output file.
|
65
69
|
|
66
70
|
```ruby
|
67
|
-
GnCrossmap.run(input, output, data_source_id, skip_original)
|
71
|
+
GnCrossmap.run(input, output, data_source_id, skip_original, alt_headers)
|
68
72
|
```
|
69
73
|
|
70
74
|
``input``
|
@@ -81,6 +85,10 @@ designates `STDIN`
|
|
81
85
|
: (boolean) if true only `taxonID` is preserved from original data. Otherwise
|
82
86
|
all original data is preserved
|
83
87
|
|
88
|
+
``alt_headers``
|
89
|
+
: (array) empty array by default. If `alt_headers` are not empty they are used
|
90
|
+
instead of the headers supplied with the file
|
91
|
+
|
84
92
|
#### `GnCrossmap.logger=`
|
85
93
|
|
86
94
|
Allows to set logger to a custom logger (default is `STDERR`)
|
@@ -91,9 +99,15 @@ Allows to set logger to a custom logger (default is `STDERR`)
|
|
91
99
|
require "gn_crossmap"
|
92
100
|
|
93
101
|
# If you want to change logger -- default Logging is to standard error
|
102
|
+
|
94
103
|
GnCrossmap.logger = MyCustomLogger.new
|
95
104
|
|
96
105
|
GnCrossmap.run("path/to/input.csv", "path/to/output.csv", 5, true)
|
106
|
+
|
107
|
+
# if you want to use alternative headers instead of ones supplied in a file
|
108
|
+
|
109
|
+
alt_headers = %w(taxonId, scientificName, rank)
|
110
|
+
GnCrossmap.run("path/to/input.csv", "path/to/output.csv", 5, true, alt_headers)
|
97
111
|
```
|
98
112
|
|
99
113
|
If you want to get intermediate statistics for each resolution cycle use a
|
@@ -268,3 +282,4 @@ See [LICENSE][license] for details.
|
|
268
282
|
[terms]: http://rs.tdwg.org/dwc/terms
|
269
283
|
[files]: https://github.com/GlobalNamesArchitecture/gn_crossmap/tree/master/spec/files
|
270
284
|
[output]: https://github.com/GlobalNamesArchitecture/gn_crossmap/tree/master/spec/files/output-example.csv
|
285
|
+
[gn_crossmap_web]: https://github.com/GlobalNamesArchitecture/gn_crossmap_web
|
data/lib/gn_crossmap/reader.rb
CHANGED
@@ -4,8 +4,9 @@ module GnCrossmap
|
|
4
4
|
class Reader
|
5
5
|
attr_reader :original_fields
|
6
6
|
|
7
|
-
def initialize(csv_io, input_name, skip_original, stats)
|
7
|
+
def initialize(csv_io, input_name, skip_original, alt_headers, stats)
|
8
8
|
@stats = stats
|
9
|
+
@alt_headers = alt_headers
|
9
10
|
@csv_io = csv_io
|
10
11
|
@col_sep = col_sep
|
11
12
|
@original_fields = nil
|
@@ -33,13 +34,19 @@ module GnCrossmap
|
|
33
34
|
dc = Collector.new(@skip_original)
|
34
35
|
csv = CSV.new(@csv_io, col_sep: col_sep)
|
35
36
|
csv.each_with_index do |row, i|
|
36
|
-
|
37
|
+
row = process_headers(row) if @original_fields.nil?
|
37
38
|
yield @stats.stats if log_progress(i) && block_given?
|
38
39
|
dc.process_row(row)
|
39
40
|
end && @csv_io.close
|
40
41
|
dc.data
|
41
42
|
end
|
42
43
|
|
44
|
+
def process_headers(row)
|
45
|
+
@original_fields = headers(row)
|
46
|
+
row = @alt_headers unless @alt_headers.empty?
|
47
|
+
row
|
48
|
+
end
|
49
|
+
|
43
50
|
def log_progress(count)
|
44
51
|
return false unless (count % 10_000).zero?
|
45
52
|
GnCrossmap.log("Ingesting csv row #{count + 1}")
|
data/lib/gn_crossmap/version.rb
CHANGED
data/lib/gn_crossmap.rb
CHANGED
@@ -35,10 +35,11 @@ module GnCrossmap
|
|
35
35
|
|
36
36
|
# rubocop:disable Metrics/AbcSize
|
37
37
|
|
38
|
-
def run(input, output, data_source_id, skip_original)
|
38
|
+
def run(input, output, data_source_id, skip_original, alt_headers = [])
|
39
39
|
stats = Stats.new
|
40
40
|
input_io, output_io = io(input, output)
|
41
|
-
reader = Reader.new(input_io, input_name(input),
|
41
|
+
reader = Reader.new(input_io, input_name(input),
|
42
|
+
skip_original, alt_headers, stats)
|
42
43
|
data = block_given? ? reader.read(&Proc.new) : reader.read
|
43
44
|
writer = Writer.new(output_io, reader.original_fields,
|
44
45
|
output_name(output))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: trollop
|