toccatore 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +50 -0
  3. data/.travis.yml +19 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +3 -0
  6. data/Gemfile.lock +110 -0
  7. data/LICENSE.md +21 -0
  8. data/README.md +28 -0
  9. data/bin/toccatore +5 -0
  10. data/lib/toccatore.rb +1 -0
  11. data/lib/toccatore/base.rb +361 -0
  12. data/lib/toccatore/cli.rb +38 -0
  13. data/lib/toccatore/orcid_update.rb +56 -0
  14. data/lib/toccatore/version.rb +3 -0
  15. data/spec/cli_spec.rb +29 -0
  16. data/spec/fixtures/orcid_update.json +1015 -0
  17. data/spec/fixtures/orcid_update_nil.json +12 -0
  18. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_fail.yml +149 -0
  19. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed.yml +3453 -0
  20. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +38 -0
  21. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +149 -0
  22. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_total/with_no_works.yml +38 -0
  23. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_total/with_works.yml +38 -0
  24. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +3307 -0
  25. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/queue_jobs/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +38 -0
  26. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +236 -0
  27. data/spec/orcid_update_spec.rb +120 -0
  28. data/spec/spec_helper.rb +91 -0
  29. data/toccatore.gemspec +37 -0
  30. metadata +314 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 13870f2c02d23ff37a9f4de7af84bfa353af388d
4
+ data.tar.gz: a356318891ee353f9dab772c9f74b1629a21b6dc
5
+ SHA512:
6
+ metadata.gz: 63cd648459e66ce0bda3434ec311e35b12fd66d617882cb59c78a1c9091a6448c9ff2f43c0bd187d1b27c8e0cfabfd861f11325d73de8245be8419f3f79a6086
7
+ data.tar.gz: c0a6a5c484535dace3a5a677e73c97858f3f27bcab2383917d9d7ea631242abd869ed85b6d5eca275a87ab55ac072938b2a6c96093e50cdc13d3b259fea2573c
@@ -0,0 +1,50 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
@@ -0,0 +1,19 @@
1
+ language: ruby
2
+ cache: bundler
3
+ sudo: false
4
+ rvm:
5
+ - 2.3.1
6
+ script:
7
+ - bundle exec rspec
8
+ - bundle exec codeclimate-test-reporter
9
+ notifications:
10
+ slack: datacite:Wt8En0ALoTA6Kjc5EOKNDWxN
11
+ email: false
12
+ deploy:
13
+ provider: rubygems
14
+ api_key:
15
+ secure: eLWrKUf5as7RNOJpm4/viPR97yOpYjj7yiwblXf1axtBr/nJCi/ZZ3UyakHbsM88DI+sF52A56cum+0KkWTQuzBVR6TXB3u/UclC0z1pyjv6QClm3qLi5/lx6f//7K9FmdB130CuAUbJgUBiIyDElPJK4bE+teBUWft/Pb49Yy1/5M5F0VV/lZrOQ/O6js9cdmxxmp8DfC+UMmw1I982VGJ1xTW6vhWlZ3pA+PLi7KkdxzA5f3/SQLIC8ij6i9FLFXz37qs5ynumzDKiyshKoVZ7mVeR0SjmGAteAXDqkwmknJPMJTHxc2dvxDpZjB1KguBw6Ohs/Bv+R14bzyXepkBaZ8Mo++Ro0EqRdP9tdLbhhtJJ4+MrVPqYVL+JakAcJY3Y58e/j4ZOvbjrWFJ3oMljDpxzSUNvMvpWCQ8NlEDAhOEG3b4jbERl+vEhjYlcDVeSxBrxA02wXCoWTrZxpkRZY8qdgA3O21W+pcixEGIYT/Ox0jTfWdQUhqjJM16qcN13i6SMzeC1FaihXkA1AltUtEIgXA+uJA9aMrhHGYktMc2XkUqO4blna6ExzBwvafzZgor47oCOh1VLxpas+5Hui3YKEnmHn8sxKa26WuQJomnXgXfhB8n+eB/KWunWOPiBvJZDMU2C6AvP6N1MDTKvQWp2bwU5Jamt9vNjTtM=
16
+ gem: toccatore
17
+ on:
18
+ tags: true
19
+ repo: datacite/toccatore
@@ -0,0 +1,5 @@
1
+ ## v.0.1 (January 30, 2017)
2
+
3
+ [toccatore 0.1](https://github.com/datacite/toccatore/releases/tag/v.0.1) was released on January 30, 2017:
4
+
5
+ * initial release
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,110 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ toccatore (0.1)
5
+ activesupport (~> 4.2, >= 4.2.5)
6
+ dotenv (~> 2.1, >= 2.1.1)
7
+ gender_detector (~> 1.0)
8
+ maremma (~> 3.1)
9
+ namae (~> 0.11.0)
10
+ thor (~> 0.19)
11
+
12
+ GEM
13
+ remote: https://rubygems.org/
14
+ specs:
15
+ activesupport (4.2.7.1)
16
+ i18n (~> 0.7)
17
+ json (~> 1.7, >= 1.7.7)
18
+ minitest (~> 5.1)
19
+ thread_safe (~> 0.3, >= 0.3.4)
20
+ tzinfo (~> 1.1)
21
+ addressable (2.5.0)
22
+ public_suffix (~> 2.0, >= 2.0.2)
23
+ builder (3.2.3)
24
+ codeclimate-test-reporter (1.0.5)
25
+ simplecov
26
+ crack (0.4.3)
27
+ safe_yaml (~> 1.0.0)
28
+ diff-lcs (1.3)
29
+ docile (1.1.5)
30
+ dotenv (2.2.0)
31
+ excon (0.45.4)
32
+ faraday (0.9.2)
33
+ multipart-post (>= 1.2, < 3)
34
+ faraday-encoding (0.0.4)
35
+ faraday
36
+ faraday_middleware (0.10.1)
37
+ faraday (>= 0.7.4, < 1.0)
38
+ gender_detector (1.0.0)
39
+ hashdiff (0.3.2)
40
+ i18n (0.7.0)
41
+ json (1.8.6)
42
+ maremma (3.1.2)
43
+ activesupport (~> 4.2, >= 4.2.5)
44
+ addressable (~> 2.5)
45
+ builder (~> 3.2, >= 3.2.2)
46
+ excon (~> 0.45.0)
47
+ faraday (~> 0.9.2)
48
+ faraday-encoding (~> 0.0.1)
49
+ faraday_middleware (~> 0.10.0)
50
+ multi_json (~> 1.11.2)
51
+ nokogiri (~> 1.6.7)
52
+ oj (~> 2.13.1)
53
+ mini_portile2 (2.1.0)
54
+ minitest (5.10.1)
55
+ multi_json (1.11.3)
56
+ multipart-post (2.0.0)
57
+ namae (0.11.3)
58
+ nokogiri (1.6.8.1)
59
+ mini_portile2 (~> 2.1.0)
60
+ oj (2.13.1)
61
+ public_suffix (2.0.5)
62
+ rack (2.0.1)
63
+ rack-test (0.6.3)
64
+ rack (>= 1.0)
65
+ rake (12.0.0)
66
+ rspec (3.5.0)
67
+ rspec-core (~> 3.5.0)
68
+ rspec-expectations (~> 3.5.0)
69
+ rspec-mocks (~> 3.5.0)
70
+ rspec-core (3.5.4)
71
+ rspec-support (~> 3.5.0)
72
+ rspec-expectations (3.5.0)
73
+ diff-lcs (>= 1.2.0, < 2.0)
74
+ rspec-support (~> 3.5.0)
75
+ rspec-mocks (3.5.0)
76
+ diff-lcs (>= 1.2.0, < 2.0)
77
+ rspec-support (~> 3.5.0)
78
+ rspec-support (3.5.0)
79
+ safe_yaml (1.0.4)
80
+ simplecov (0.12.0)
81
+ docile (~> 1.1.0)
82
+ json (>= 1.8, < 3)
83
+ simplecov-html (~> 0.10.0)
84
+ simplecov-html (0.10.0)
85
+ thor (0.19.4)
86
+ thread_safe (0.3.5)
87
+ tzinfo (1.2.2)
88
+ thread_safe (~> 0.1)
89
+ vcr (3.0.3)
90
+ webmock (1.24.6)
91
+ addressable (>= 2.3.6)
92
+ crack (>= 0.3.2)
93
+ hashdiff
94
+
95
+ PLATFORMS
96
+ ruby
97
+
98
+ DEPENDENCIES
99
+ bundler (~> 1.0)
100
+ codeclimate-test-reporter (~> 1.0, >= 1.0.0)
101
+ rack-test (~> 0)
102
+ rake (~> 12.0)
103
+ rspec (~> 3.4)
104
+ simplecov (~> 0.12.0)
105
+ toccatore!
106
+ vcr (~> 3.0, >= 3.0.3)
107
+ webmock (~> 1.22, >= 1.22.3)
108
+
109
+ BUNDLED WITH
110
+ 1.12.5
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017 DataCite
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,28 @@
1
+ # Toccatore
2
+
3
+ [![Build Status](https://travis-ci.org/datacite/toccatore.svg?branch=master)](https://travis-ci.org/datacite/toccatore)
4
+ [![Code Climate](https://codeclimate.com/github/datacite/toccatore/badges/gpa.svg)](https://codeclimate.com/github/datacite/toccatore)
5
+ [![Test Coverage](https://codeclimate.com/github/datacite/toccatore/badges/coverage.svg)](https://codeclimate.com/github/datacite/toccatore/coverage)
6
+
7
+ Command-line client for finding ORCID IDs in DataCite metadata.
8
+
9
+ ## Development
10
+
11
+ We use rspec for unit testing:
12
+
13
+ ```
14
+ bundle exec rspec
15
+ ```
16
+
17
+ Follow along via [Github Issues](https://github.com/datacite/toccatore/issues).
18
+
19
+ ### Note on Patches/Pull Requests
20
+
21
+ * Fork the project
22
+ * Write tests for your new feature or a test that reproduces a bug
23
+ * Implement your feature or make a bug fix
24
+ * Do not mess with Rakefile, version or history
25
+ * Commit, push and make a pull request. Bonus points for topical branches.
26
+
27
+ ## License
28
+ **toccatore** is released under the [MIT License](https://github.com/datacite/toccatore/blob/master/LICENSE.md).
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.expand_path("../../lib/toccatore", __FILE__)
4
+
5
+ Toccatore::CLI.start
@@ -0,0 +1 @@
1
+ require "toccatore/orcid_update"
@@ -0,0 +1,361 @@
1
+ require 'namae'
2
+ require 'gender_detector'
3
+
4
+ module Toccatore
5
+ class Base
6
+ # load ENV variables from .env file if it exists
7
+ env_file = File.expand_path("../../../.env", __FILE__)
8
+ if File.exist?(env_file)
9
+ require 'dotenv'
10
+ Dotenv.load! env_file
11
+ end
12
+
13
+ # load ENV variables from container environment if json file exists
14
+ # see https://github.com/phusion/baseimage-docker#envvar_dumps
15
+ env_json_file = "/etc/container_environment.json"
16
+ if File.exist?(env_json_file)
17
+ env_vars = JSON.parse(File.read(env_json_file))
18
+ env_vars.each { |k, v| ENV[k] = v }
19
+ end
20
+
21
+ def get_query_url(options={})
22
+ offset = options[:offset].to_i || 0
23
+ rows = options[:rows].presence || job_batch_size
24
+ from_date = options[:from_date].presence || (Time.now.to_date - 1.day).iso8601
25
+ until_date = options[:until_date].presence || Time.now.to_date.iso8601
26
+
27
+ updated = "updated:[#{from_date}T00:00:00Z TO #{until_date}T23:59:59Z]"
28
+ fq = "#{updated} AND has_metadata:true AND is_active:true"
29
+
30
+ params = { q: q,
31
+ start: offset,
32
+ rows: rows,
33
+ fl: "doi,creator,title,publisher,publicationYear,resourceTypeGeneral,datacentre_symbol,relatedIdentifier,nameIdentifier,xml,minted,updated",
34
+ fq: fq,
35
+ wt: "json" }
36
+ url + URI.encode_www_form(params)
37
+ end
38
+
39
+ def get_total(options={})
40
+ query_url = get_query_url(options.merge(rows: 0))
41
+ result = Maremma.get(query_url, options)
42
+ result.body.fetch("data", {}).fetch("response", {}).fetch("numFound", 0)
43
+ end
44
+
45
+ def queue_jobs(options={})
46
+ total = get_total(options)
47
+
48
+ if total > 0
49
+ # walk through paginated results
50
+ total_pages = (total.to_f / job_batch_size).ceil
51
+
52
+ (0...total_pages).each do |page|
53
+ options[:offset] = page * job_batch_size
54
+ process_data(options)
55
+ end
56
+ end
57
+
58
+ # return number of works queued
59
+ total
60
+ end
61
+
62
+ def process_data(options = {})
63
+ data = get_data(options.merge(timeout: timeout, source_id: source_id))
64
+ data = parse_data(data, options.merge(source_id: source_id))
65
+
66
+ # push to deposit API if no error and we have collected works and/or events
67
+ # returns hash with number of deposits created, e.g. { total: 10 }
68
+ push_data(data, options)
69
+ end
70
+
71
+ def get_data(options={})
72
+ query_url = get_query_url(options)
73
+ Maremma.get(query_url, options)
74
+ end
75
+
76
+ def parse_data(result, options={})
77
+ return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
78
+
79
+ items = result.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
80
+ get_relations_with_related_works(items)
81
+ end
82
+
83
+ # push to Lagotto deposit API if no error and we have collected works
84
+ def push_data(items, options={})
85
+ return [] if items.empty?
86
+
87
+ Array(items).map do |item|
88
+ relation = item.fetch(:relation, {})
89
+ deposit = { "deposit" => { "subj_id" => relation.fetch("subj_id", nil),
90
+ "obj_id" => relation.fetch("obj_id", nil),
91
+ "relation_type_id" => relation.fetch("relation_type_id", nil),
92
+ "source_id" => relation.fetch("source_id", nil),
93
+ "publisher_id" => relation.fetch("publisher_id", nil),
94
+ "subj" => item.fetch(:subj, {}),
95
+ "obj" => item.fetch(:obj, {}),
96
+ "message_type" => item.fetch(:message_type, "relation"),
97
+ "prefix" => item.fetch(:prefix, nil),
98
+ "source_token" => uuid } }
99
+
100
+ Maremma.post push_url, data: deposit.to_json, content_type: 'json', token: access_token
101
+ end
102
+ end
103
+
104
+ def get_relations_with_related_works(items)
105
+ Array(items).reduce([]) do |sum, item|
106
+ doi = item.fetch("doi", nil)
107
+ prefix = doi[/^10\.\d{4,5}/]
108
+ pid = doi_as_url(doi)
109
+ type = item.fetch("resourceTypeGeneral", nil)
110
+ publisher_id = item.fetch("datacentre_symbol", nil)
111
+
112
+ xml = Base64.decode64(item.fetch('xml', "PGhzaD48L2hzaD4=\n"))
113
+ xml = Hash.from_xml(xml).fetch("resource", {})
114
+ authors = xml.fetch("creators", {}).fetch("creator", [])
115
+ authors = [authors] if authors.is_a?(Hash)
116
+
117
+ subj = { "pid" => pid,
118
+ "DOI" => doi,
119
+ "author" => get_hashed_authors(authors),
120
+ "title" => item.fetch("title", []).first,
121
+ "container-title" => item.fetch("publisher", nil),
122
+ "published" => item.fetch("publicationYear", nil),
123
+ "issued" => item.fetch("minted", nil),
124
+ "publisher_id" => publisher_id,
125
+ "registration_agency_id" => "datacite",
126
+ "tracked" => true,
127
+ "type" => type }
128
+
129
+ related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
130
+ sum += get_doi_relations(subj, related_doi_identifiers)
131
+
132
+ related_github_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:URL:https:\/\/github.com.+/ }
133
+ sum += get_github_relations(subj, related_github_identifiers)
134
+
135
+ name_identifiers = item.fetch('nameIdentifier', []).select { |id| id =~ /^ORCID:.+/ }
136
+ sum += get_contributions(subj, name_identifiers)
137
+
138
+ if source_id == "datacite_import"
139
+ sum += [{ prefix: prefix,
140
+ relation: { "subj_id" => subj["pid"],
141
+ "source_id" => source_id,
142
+ "publisher_id" => subj["publisher_id"],
143
+ "occurred_at" => subj["issued"] },
144
+ subj: subj }]
145
+ end
146
+
147
+ sum
148
+ end
149
+ end
150
+
151
+ def get_github_relations(subj, items)
152
+ prefix = subj["DOI"][/^10\.\d{4,5}/]
153
+
154
+ Array(items).reduce([]) do |sum, item|
155
+ raw_relation_type, _related_identifier_type, related_identifier = item.split(':', 3)
156
+
157
+ # get parent repo
158
+ # code from https://github.com/octokit/octokit.rb/blob/master/lib/octokit/repository.rb
159
+ related_identifier = PostRank::URI.clean(related_identifier)
160
+ github_hash = github_from_url(related_identifier)
161
+ owner_url = github_as_owner_url(github_hash)
162
+ repo_url = github_as_repo_url(github_hash)
163
+
164
+ sum << { prefix: prefix,
165
+ relation: { "subj_id" => subj["pid"],
166
+ "obj_id" => related_identifier,
167
+ "relation_type_id" => raw_relation_type.underscore,
168
+ "source_id" => source_id,
169
+ "publisher_id" => subj["publisher_id"],
170
+ "registration_agency_id" => "github",
171
+ "occurred_at" => subj["issued"] },
172
+ subj: subj }
173
+
174
+ # if relatedIdentifier is release URL rather than repo URL
175
+ if related_identifier != repo_url
176
+ sum << { relation: { "subj_id" => related_identifier,
177
+ "obj_id" => repo_url,
178
+ "relation_type_id" => "is_part_of",
179
+ "source_id" => source_id,
180
+ "publisher_id" => "github",
181
+ "registration_agency_id" => "github" } }
182
+ end
183
+
184
+ sum << { message_type: "contribution",
185
+ relation: { "subj_id" => owner_url,
186
+ "obj_id" => repo_url,
187
+ "source_id" => "github_contributor",
188
+ "registration_agency_id" => "github" }}
189
+ end
190
+ end
191
+
192
+ def get_doi_relations(subj, items)
193
+ prefix = subj["DOI"][/^10\.\d{4,5}/]
194
+
195
+ Array(items).reduce([]) do |sum, item|
196
+ raw_relation_type, _related_identifier_type, related_identifier = item.split(':', 3)
197
+ doi = related_identifier.strip.upcase
198
+ registration_agency = get_doi_ra(doi)
199
+
200
+ if source_id == "datacite_crossref" && registration_agency == "datacite"
201
+ sum
202
+ else
203
+ _source_id = registration_agency == "crossref" ? "datacite_crossref" : "datacite_related"
204
+ pid = doi_as_url(doi)
205
+
206
+ sum << { prefix: prefix,
207
+ relation: { "subj_id" => subj["pid"],
208
+ "obj_id" => pid,
209
+ "relation_type_id" => raw_relation_type.underscore,
210
+ "source_id" => _source_id,
211
+ "publisher_id" => subj["publisher_id"],
212
+ "registration_agency_id" => registration_agency,
213
+ "occurred_at" => subj["issued"] },
214
+ subj: subj }
215
+ end
216
+ end
217
+ end
218
+
219
+ # we are flipping subj and obj for contributions
220
+ def get_contributions(obj, items)
221
+ prefix = obj["DOI"][/^10\.\d{4,5}/]
222
+
223
+ Array(items).reduce([]) do |sum, item|
224
+ orcid = item.split(':', 2).last
225
+ orcid = validate_orcid(orcid)
226
+
227
+ return sum if orcid.nil?
228
+
229
+ sum << { prefix: prefix,
230
+ message_type: "contribution",
231
+ relation: { "subj_id" => orcid_as_url(orcid),
232
+ "obj_id" => obj["pid"],
233
+ "relation_type_id" => nil,
234
+ "source_id" => source_id,
235
+ "publisher_id" => obj["publisher_id"],
236
+ "registration_agency_id" => "datacite",
237
+ "occurred_at" => obj["issued"] },
238
+ obj: obj }
239
+ end
240
+ end
241
+
242
+ def config_fields
243
+ [:url, :push_url, :access_token]
244
+ end
245
+
246
+ def url
247
+ "https://search.datacite.org/api?"
248
+ end
249
+
250
+ def timeout
251
+ 120
252
+ end
253
+
254
+ def job_batch_size
255
+ 1000
256
+ end
257
+
258
+ # remove non-printing whitespace
259
+ def clean_doi(doi)
260
+ doi.gsub(/\u200B/, '')
261
+ end
262
+
263
+ def doi_from_url(url)
264
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(url)
265
+ uri = Addressable::URI.parse(url)
266
+ uri.path[1..-1].upcase
267
+ elsif url.starts_with?("doi:")
268
+ url[4..-1].upcase
269
+ end
270
+ end
271
+
272
+ def doi_as_url(doi)
273
+ Addressable::URI.encode("https://doi.org/#{clean_doi(doi)}") if doi.present?
274
+ end
275
+
276
+ def orcid_from_url(url)
277
+ Array(/\Ahttp:\/\/orcid\.org\/(.+)/.match(url)).last
278
+ end
279
+
280
+ def orcid_as_url(orcid)
281
+ "http://orcid.org/#{orcid}" if orcid.present?
282
+ end
283
+
284
+ def validate_orcid(orcid)
285
+ Array(/\A(?:http:\/\/orcid\.org\/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z/.match(orcid)).last
286
+ end
287
+
288
+ # parse author string into CSL format
289
+ # only assume personal name when using sort-order: "Turing, Alan"
290
+ def get_one_author(author, options = {})
291
+ return { "literal" => "" } if author.strip.blank?
292
+
293
+ author = cleanup_author(author)
294
+ names = Namae.parse(author)
295
+
296
+ if names.blank? || is_personal_name?(author).blank?
297
+ { "literal" => author }
298
+ else
299
+ name = names.first
300
+
301
+ { "family" => name.family,
302
+ "given" => name.given }.compact
303
+ end
304
+ end
305
+
306
+ def cleanup_author(author)
307
+ # detect pattern "Smith J.", but not "Smith, John K."
308
+ author = author.gsub(/[[:space:]]([A-Z]\.)?(-?[A-Z]\.)$/, ', \1\2') unless author.include?(",")
309
+
310
+ # titleize strings
311
+ # remove non-standard space characters
312
+ author.my_titleize
313
+ .gsub(/[[:space:]]/, ' ')
314
+ end
315
+
316
+ def is_personal_name?(author)
317
+ return true if author.include?(",")
318
+
319
+ # lookup given name
320
+ name_detector.name_exists?(author.split.first)
321
+ end
322
+
323
+ # parse array of author strings into CSL format
324
+ def get_authors(authors, options={})
325
+ Array(authors).map { |author| get_one_author(author, options) }
326
+ end
327
+
328
+ # parse array of author hashes into CSL format
329
+ def get_hashed_authors(authors)
330
+ Array(authors).map { |author| get_one_hashed_author(author) }
331
+ end
332
+
333
+ def get_one_hashed_author(author)
334
+ raw_name = author.fetch("creatorName", nil)
335
+
336
+ author_hsh = get_one_author(raw_name)
337
+ author_hsh["ORCID"] = get_name_identifier(author)
338
+ author_hsh.compact
339
+ end
340
+
341
+ def get_name_identifier(author)
342
+ name_identifier = author.fetch("nameIdentifier", nil)
343
+ name_identifier_scheme = author.fetch("nameIdentifierScheme", "orcid").downcase
344
+ if name_identifier_scheme == "orcid" && name_identifier = validate_orcid(name_identifier)
345
+ "http://orcid.org/#{name_identifier}"
346
+ else
347
+ nil
348
+ end
349
+ end
350
+
351
+ def name_detector
352
+ GenderDetector.new
353
+ end
354
+ end
355
+ end
356
+
357
+ class String
358
+ def my_titleize
359
+ self.gsub(/(\b|_)(.)/) { "#{$1}#{$2.upcase}" }
360
+ end
361
+ end