gn_crossmap 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -1
- data/lib/gn_crossmap/reader.rb +9 -2
- data/lib/gn_crossmap/version.rb +1 -1
- data/lib/gn_crossmap.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 47ec0f7527076236262e11f44ad1740c730e05a4
|
4
|
+
data.tar.gz: 8656e9ce121a47a533f72fe76c23ea312b46e509
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cae7ecb4df507bef1fe64c6b9078772a3078d960f2e9af9a1d30ae7e233df85949fe2754c8830bd54dafd0ee2ec36b0788c0a61f7e0f7e8378dd69898465e487
|
7
|
+
data.tar.gz: 0ab13bc741d0f80b3bbc32d16a4e813a9db7f4954f476be23f1f0ef322bd7f1da782fbf7768fbf4c791f0544521ec9c82de1927d3c04cc22e524eb8b05033df9
|
data/README.md
CHANGED
@@ -37,6 +37,10 @@ gem install gn_crossmap
|
|
37
37
|
|
38
38
|
## Usage
|
39
39
|
|
40
|
+
### Usage as a Web Application
|
41
|
+
|
42
|
+
see [gn\_crossmap\_web] project
|
43
|
+
|
40
44
|
### Usage from command line
|
41
45
|
|
42
46
|
```bash
|
@@ -64,7 +68,7 @@ Compares an input list to a data source from [GN Resolver][resolver] and
|
|
64
68
|
writes result into an output file.
|
65
69
|
|
66
70
|
```ruby
|
67
|
-
GnCrossmap.run(input, output, data_source_id, skip_original)
|
71
|
+
GnCrossmap.run(input, output, data_source_id, skip_original, alt_headers)
|
68
72
|
```
|
69
73
|
|
70
74
|
``input``
|
@@ -81,6 +85,10 @@ designates `STDIN`
|
|
81
85
|
: (boolean) if true only `taxonID` is preserved from original data. Otherwise
|
82
86
|
all original data is preserved
|
83
87
|
|
88
|
+
``alt_headers``
|
89
|
+
: (array) empty array by default. If `alt_headers` are not empty they are used
|
90
|
+
instead of the headers supplied with the file
|
91
|
+
|
84
92
|
#### `GnCrossmap.logger=`
|
85
93
|
|
86
94
|
Allows to set logger to a custom logger (default is `STDERR`)
|
@@ -91,9 +99,15 @@ Allows to set logger to a custom logger (default is `STDERR`)
|
|
91
99
|
require "gn_crossmap"
|
92
100
|
|
93
101
|
# If you want to change logger -- default Logging is to standard error
|
102
|
+
|
94
103
|
GnCrossmap.logger = MyCustomLogger.new
|
95
104
|
|
96
105
|
GnCrossmap.run("path/to/input.csv", "path/to/output.csv", 5, true)
|
106
|
+
|
107
|
+
# if you want to use alternative headers instead of ones supplied in a file
|
108
|
+
|
109
|
+
alt_headers = %w(taxonId, scientificName, rank)
|
110
|
+
GnCrossmap.run("path/to/input.csv", "path/to/output.csv", 5, true, alt_headers)
|
97
111
|
```
|
98
112
|
|
99
113
|
If you want to get intermediate statistics for each resolution cycle use a
|
@@ -268,3 +282,4 @@ See [LICENSE][license] for details.
|
|
268
282
|
[terms]: http://rs.tdwg.org/dwc/terms
|
269
283
|
[files]: https://github.com/GlobalNamesArchitecture/gn_crossmap/tree/master/spec/files
|
270
284
|
[output]: https://github.com/GlobalNamesArchitecture/gn_crossmap/tree/master/spec/files/output-example.csv
|
285
|
+
[gn_crossmap_web]: https://github.com/GlobalNamesArchitecture/gn_crossmap_web
|
data/lib/gn_crossmap/reader.rb
CHANGED
@@ -4,8 +4,9 @@ module GnCrossmap
|
|
4
4
|
class Reader
|
5
5
|
attr_reader :original_fields
|
6
6
|
|
7
|
-
def initialize(csv_io, input_name, skip_original, stats)
|
7
|
+
def initialize(csv_io, input_name, skip_original, alt_headers, stats)
|
8
8
|
@stats = stats
|
9
|
+
@alt_headers = alt_headers
|
9
10
|
@csv_io = csv_io
|
10
11
|
@col_sep = col_sep
|
11
12
|
@original_fields = nil
|
@@ -33,13 +34,19 @@ module GnCrossmap
|
|
33
34
|
dc = Collector.new(@skip_original)
|
34
35
|
csv = CSV.new(@csv_io, col_sep: col_sep)
|
35
36
|
csv.each_with_index do |row, i|
|
36
|
-
|
37
|
+
row = process_headers(row) if @original_fields.nil?
|
37
38
|
yield @stats.stats if log_progress(i) && block_given?
|
38
39
|
dc.process_row(row)
|
39
40
|
end && @csv_io.close
|
40
41
|
dc.data
|
41
42
|
end
|
42
43
|
|
44
|
+
def process_headers(row)
|
45
|
+
@original_fields = headers(row)
|
46
|
+
row = @alt_headers unless @alt_headers.empty?
|
47
|
+
row
|
48
|
+
end
|
49
|
+
|
43
50
|
def log_progress(count)
|
44
51
|
return false unless (count % 10_000).zero?
|
45
52
|
GnCrossmap.log("Ingesting csv row #{count + 1}")
|
data/lib/gn_crossmap/version.rb
CHANGED
data/lib/gn_crossmap.rb
CHANGED
@@ -35,10 +35,11 @@ module GnCrossmap
|
|
35
35
|
|
36
36
|
# rubocop:disable Metrics/AbcSize
|
37
37
|
|
38
|
-
def run(input, output, data_source_id, skip_original)
|
38
|
+
def run(input, output, data_source_id, skip_original, alt_headers = [])
|
39
39
|
stats = Stats.new
|
40
40
|
input_io, output_io = io(input, output)
|
41
|
-
reader = Reader.new(input_io, input_name(input),
|
41
|
+
reader = Reader.new(input_io, input_name(input),
|
42
|
+
skip_original, alt_headers, stats)
|
42
43
|
data = block_given? ? reader.read(&Proc.new) : reader.read
|
43
44
|
writer = Writer.new(output_io, reader.original_fields,
|
44
45
|
output_name(output))
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: trollop
|