toccatore 0.2.3 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Dockerfile +6 -3
  3. data/Gemfile.lock +11 -12
  4. data/README.md +1 -1
  5. data/lib/toccatore.rb +2 -0
  6. data/lib/toccatore/base.rb +26 -184
  7. data/lib/toccatore/cli.rb +13 -1
  8. data/lib/toccatore/datacite_related.rb +77 -0
  9. data/lib/toccatore/version.rb +1 -1
  10. data/spec/base_spec.rb +49 -0
  11. data/spec/cli_spec.rb +58 -11
  12. data/spec/datacite_related_spec.rb +129 -0
  13. data/spec/fixtures/datacite_related.json +14792 -0
  14. data/spec/fixtures/datacite_related_nil.json +12 -0
  15. data/spec/fixtures/orcid_update_is_previous.json +24 -1
  16. data/spec/fixtures/vcr_cassettes/Toccatore_Base/get_doi_ra/crossref.yml +72 -0
  17. data/spec/fixtures/vcr_cassettes/Toccatore_Base/get_doi_ra/datacite.yml +74 -0
  18. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_fail.yml +1408 -0
  19. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_query_by_DOI.yml +1145 -0
  20. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_query_by_related_identifier.yml +252 -0
  21. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_succeed.yml +6348 -0
  22. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_succeed_with_no_works.yml +38 -0
  23. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_delete.yml +47 -49
  24. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_fail.yml +21 -132
  25. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_DOI.yml +731 -1016
  26. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_ORCID_ID.yml +1880 -1889
  27. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed.yml +1576 -1591
  28. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed_with_no_works.yml +11 -11
  29. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_allow_queries_by_DOI_of_the_Datacite_Metadata_Search_API.yml +38 -0
  30. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_allow_queries_by_related_identifier_of_the_Datacite_Metadata_Search_API.yml +38 -0
  31. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +38 -0
  32. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +40 -0
  33. data/spec/fixtures/vcr_cassettes/{Toccatore_OrcidUpdate/get_data/should_catch_errors_with_the_Datacite_Metadata_Search_API.yml → Toccatore_DataciteRelated/get_total/with_no_works.yml} +10 -10
  34. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_total/with_works.yml +38 -0
  35. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/parse_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +1326 -0
  36. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +6274 -0
  37. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_allow_queries_by_DOI_of_the_Datacite_Metadata_Search_API.yml +13 -458
  38. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_allow_queries_by_ORCID_ID_of_the_Datacite_Metadata_Search_API.yml +10 -143
  39. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +9 -9
  40. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +10 -121
  41. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_total/with_no_works.yml +10 -10
  42. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_total/with_works.yml +10 -10
  43. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/push_data/should_delete_claims.yml +27 -25
  44. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +1506 -1404
  45. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/queue_jobs/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +9 -9
  46. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +21 -182
  47. data/spec/orcid_update_spec.rb +8 -14
  48. data/spec/spec_helper.rb +1 -0
  49. data/toccatore.gemspec +1 -1
  50. metadata +24 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18794a290d5b37831e228d170849338b67e48cc9
4
- data.tar.gz: d7a8d75e76ac3611375895cfa5baf552daf1579c
3
+ metadata.gz: e67c0fc653ac9fc750cd7a75ecd0056c72dcdfec
4
+ data.tar.gz: cf181d9073928728a382e6740912a982cb02fd61
5
5
  SHA512:
6
- metadata.gz: 394448d66f7e3673579d8ac0d0c93403ccd3d5e9549106dd50e4248c51d9a2f120842f4a5e6402e68c5d85283f3041667511e9273a4d1a4e90b53059c6b7c7e4
7
- data.tar.gz: 4e3b4fa820d446eb2a7558adcf314a371a2bac1af8f2ed1f08aef1a93f27cffa5f1f8ebf74264124d4db3ad86bab1bde53c2b3d00ff7525fb0303f5ab3ce9775
6
+ metadata.gz: 4a1c3d00575f9289cc7938028829b49a1cddbb397419e17483360e4a6586566fbf3e20a1f760727e8a90df20b4dcfce06097ac87e2a96b09ffc12c567f555e16
7
+ data.tar.gz: 43ae2875879cf644783244d554d52a6bde83c615bd5dbea12a441ac9291c61d65b2bc7fb8bc03a597f880676499d76c19c908d43cd490e8fbeb1ac52705d177d
data/Dockerfile CHANGED
@@ -1,7 +1,10 @@
1
- FROM phusion/passenger-full:0.9.19
1
+ FROM phusion/passenger-full:0.9.20
2
2
  MAINTAINER Martin Fenner "mfenner@datacite.org"
3
3
 
4
- ENV PATH="/usr/local/rvm/gems/ruby-2.3.1/bin:${PATH}"
4
+ # Install Ruby 2.3.3
5
+ RUN bash -lc 'rvm --default use ruby-2.3.3'
6
+
7
+ ENV PATH="/usr/local/rvm/gems/ruby-2.3.3/bin:${PATH}"
5
8
 
6
9
  # Update installed APT packages, clean up APT when done.
7
10
  RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" && \
@@ -10,4 +13,4 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
10
13
  # Install toccatore gem
11
14
  RUN /sbin/setuser app gem install toccatore
12
15
 
13
- CMD toccatore orcid_update --push_url $VOLPINO_URL --access_token $VOLPINO_TOKEN --from_date $FROM_DATE --until_date $UNTIL_DATE
16
+ CMD toccatore --version
@@ -1,27 +1,26 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- toccatore (0.2.3)
4
+ toccatore (0.3)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  dotenv (~> 2.1, >= 2.1.1)
7
7
  gender_detector (~> 1.0)
8
- maremma (~> 3.1)
8
+ maremma (~> 3.5)
9
9
  namae (~> 0.11.0)
10
10
  thor (~> 0.19)
11
11
 
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- activesupport (4.2.7.1)
15
+ activesupport (4.2.8)
16
16
  i18n (~> 0.7)
17
- json (~> 1.7, >= 1.7.7)
18
17
  minitest (~> 5.1)
19
18
  thread_safe (~> 0.3, >= 0.3.4)
20
19
  tzinfo (~> 1.1)
21
20
  addressable (2.5.0)
22
21
  public_suffix (~> 2.0, >= 2.0.2)
23
22
  builder (3.2.3)
24
- codeclimate-test-reporter (1.0.5)
23
+ codeclimate-test-reporter (1.0.7)
25
24
  simplecov
26
25
  crack (0.4.3)
27
26
  safe_yaml (~> 1.0.0)
@@ -37,11 +36,11 @@ GEM
37
36
  faraday (>= 0.7.4, < 1.0)
38
37
  gender_detector (1.0.0)
39
38
  hashdiff (0.3.2)
40
- i18n (0.8.0)
41
- json (1.8.6)
42
- maremma (3.1.2)
39
+ i18n (0.8.1)
40
+ json (2.0.3)
41
+ maremma (3.5.1)
43
42
  activesupport (~> 4.2, >= 4.2.5)
44
- addressable (~> 2.5)
43
+ addressable (>= 2.3.6)
45
44
  builder (~> 3.2, >= 3.2.2)
46
45
  excon (~> 0.45.0)
47
46
  faraday (~> 0.9.2)
@@ -49,7 +48,7 @@ GEM
49
48
  faraday_middleware (~> 0.10.0)
50
49
  multi_json (~> 1.11.2)
51
50
  nokogiri (~> 1.6.7)
52
- oj (~> 2.13.1)
51
+ oj (~> 2.18, >= 2.18.1)
53
52
  mini_portile2 (2.1.0)
54
53
  minitest (5.10.1)
55
54
  multi_json (1.11.3)
@@ -57,7 +56,7 @@ GEM
57
56
  namae (0.11.3)
58
57
  nokogiri (1.6.8.1)
59
58
  mini_portile2 (~> 2.1.0)
60
- oj (2.13.1)
59
+ oj (2.18.2)
61
60
  public_suffix (2.0.5)
62
61
  rack (2.0.1)
63
62
  rack-test (0.6.3)
@@ -83,7 +82,7 @@ GEM
83
82
  simplecov-html (~> 0.10.0)
84
83
  simplecov-html (0.10.0)
85
84
  thor (0.19.4)
86
- thread_safe (0.3.5)
85
+ thread_safe (0.3.6)
87
86
  tzinfo (1.2.2)
88
87
  thread_safe (~> 0.1)
89
88
  vcr (3.0.3)
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![Code Climate](https://codeclimate.com/github/datacite/toccatore/badges/gpa.svg)](https://codeclimate.com/github/datacite/toccatore)
5
5
  [![Test Coverage](https://codeclimate.com/github/datacite/toccatore/badges/coverage.svg)](https://codeclimate.com/github/datacite/toccatore/coverage)
6
6
 
7
- Command-line client for finding ORCID IDs in DataCite metadata.
7
+ Command-line client for finding ORCID IDs and related identifiers in DataCite metadata.
8
8
 
9
9
  ## Development
10
10
 
@@ -1,2 +1,4 @@
1
1
  require "toccatore/orcid_update"
2
+ require "toccatore/datacite_related"
2
3
  require "toccatore/cli"
4
+ require "toccatore/version"
@@ -28,6 +28,8 @@ module Toccatore
28
28
  q = "doi:#{options[:doi]}"
29
29
  elsif options[:orcid].present?
30
30
  q = "nameIdentifier:ORCID\\:#{options[:orcid]}"
31
+ elsif options[:related_identifier].present?
32
+ q = "relatedIdentifier:DOI\\:#{options[:related_identifier]}"
31
33
  elsif options[:query].present?
32
34
  q = options[:query]
33
35
  else
@@ -37,7 +39,7 @@ module Toccatore
37
39
  params = { q: q,
38
40
  start: options[:offset],
39
41
  rows: options[:rows],
40
- fl: "doi,creator,title,publisher,publicationYear,resourceTypeGeneral,datacentre_symbol,relatedIdentifier,nameIdentifier,xml,minted,updated",
42
+ fl: "doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated",
41
43
  fq: fq,
42
44
  wt: "json" }
43
45
  url + URI.encode_www_form(params)
@@ -87,206 +89,46 @@ module Toccatore
87
89
  Maremma.get(query_url, options)
88
90
  end
89
91
 
90
- def parse_data(result, options={})
91
- return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
92
-
93
- items = result.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
94
- get_relations_with_related_works(items)
95
- end
96
-
97
- # push to Lagotto deposit API if no error and we have collected works
98
- def push_data(items, options={})
99
- if items.empty?
100
- puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
101
- else
102
- Array(items).map do |item|
103
- relation = item.fetch(:relation, {})
104
- deposit = { "deposit" => { "subj_id" => relation.fetch("subj_id", nil),
105
- "obj_id" => relation.fetch("obj_id", nil),
106
- "relation_type_id" => relation.fetch("relation_type_id", nil),
107
- "source_id" => relation.fetch("source_id", nil),
108
- "publisher_id" => relation.fetch("publisher_id", nil),
109
- "subj" => item.fetch(:subj, {}),
110
- "obj" => item.fetch(:obj, {}),
111
- "message_type" => item.fetch(:message_type, "relation"),
112
- "prefix" => item.fetch(:prefix, nil),
113
- "source_token" => uuid } }
114
-
115
- Maremma.post push_url, data: deposit.to_json, content_type: 'json', token: access_token
116
- end
117
- end
92
+ def url
93
+ "https://search.datacite.org/api?"
118
94
  end
119
95
 
120
- def get_relations_with_related_works(items)
121
- Array(items).reduce([]) do |sum, item|
122
- doi = item.fetch("doi", nil)
123
- prefix = doi[/^10\.\d{4,5}/]
124
- pid = doi_as_url(doi)
125
- type = item.fetch("resourceTypeGeneral", nil)
126
- publisher_id = item.fetch("datacentre_symbol", nil)
127
-
128
- xml = Base64.decode64(item.fetch('xml', "PGhzaD48L2hzaD4=\n"))
129
- xml = Hash.from_xml(xml).fetch("resource", {})
130
- authors = xml.fetch("creators", {}).fetch("creator", [])
131
- authors = [authors] if authors.is_a?(Hash)
132
-
133
- subj = { "pid" => pid,
134
- "DOI" => doi,
135
- "author" => get_hashed_authors(authors),
136
- "title" => item.fetch("title", []).first,
137
- "container-title" => item.fetch("publisher", nil),
138
- "published" => item.fetch("publicationYear", nil),
139
- "issued" => item.fetch("minted", nil),
140
- "publisher_id" => publisher_id,
141
- "registration_agency_id" => "datacite",
142
- "tracked" => true,
143
- "type" => type }
144
-
145
- related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
146
- sum += get_doi_relations(subj, related_doi_identifiers)
147
-
148
- related_github_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:URL:https:\/\/github.com.+/ }
149
- sum += get_github_relations(subj, related_github_identifiers)
150
-
151
- name_identifiers = item.fetch('nameIdentifier', []).select { |id| id =~ /^ORCID:.+/ }
152
- sum += get_contributions(subj, name_identifiers)
153
-
154
- if source_id == "datacite_import"
155
- sum += [{ prefix: prefix,
156
- relation: { "subj_id" => subj["pid"],
157
- "source_id" => source_id,
158
- "publisher_id" => subj["publisher_id"],
159
- "occurred_at" => subj["issued"] },
160
- subj: subj }]
161
- end
162
-
163
- sum
164
- end
96
+ def timeout
97
+ 120
165
98
  end
166
99
 
167
- def get_github_relations(subj, items)
168
- prefix = subj["DOI"][/^10\.\d{4,5}/]
169
-
170
- Array(items).reduce([]) do |sum, item|
171
- raw_relation_type, _related_identifier_type, related_identifier = item.split(':', 3)
172
-
173
- # get parent repo
174
- # code from https://github.com/octokit/octokit.rb/blob/master/lib/octokit/repository.rb
175
- related_identifier = PostRank::URI.clean(related_identifier)
176
- github_hash = github_from_url(related_identifier)
177
- owner_url = github_as_owner_url(github_hash)
178
- repo_url = github_as_repo_url(github_hash)
179
-
180
- sum << { prefix: prefix,
181
- relation: { "subj_id" => subj["pid"],
182
- "obj_id" => related_identifier,
183
- "relation_type_id" => raw_relation_type.underscore,
184
- "source_id" => source_id,
185
- "publisher_id" => subj["publisher_id"],
186
- "registration_agency_id" => "github",
187
- "occurred_at" => subj["issued"] },
188
- subj: subj }
189
-
190
- # if relatedIdentifier is release URL rather than repo URL
191
- if related_identifier != repo_url
192
- sum << { relation: { "subj_id" => related_identifier,
193
- "obj_id" => repo_url,
194
- "relation_type_id" => "is_part_of",
195
- "source_id" => source_id,
196
- "publisher_id" => "github",
197
- "registration_agency_id" => "github" } }
198
- end
199
-
200
- sum << { message_type: "contribution",
201
- relation: { "subj_id" => owner_url,
202
- "obj_id" => repo_url,
203
- "source_id" => "github_contributor",
204
- "registration_agency_id" => "github" }}
205
- end
100
+ def job_batch_size
101
+ 1000
206
102
  end
207
103
 
208
- def get_doi_relations(subj, items)
209
- prefix = subj["DOI"][/^10\.\d{4,5}/]
210
-
211
- Array(items).reduce([]) do |sum, item|
212
- raw_relation_type, _related_identifier_type, related_identifier = item.split(':', 3)
213
- doi = related_identifier.strip.upcase
214
- registration_agency = get_doi_ra(doi)
215
-
216
- if source_id == "datacite_crossref" && registration_agency == "datacite"
217
- sum
218
- else
219
- _source_id = registration_agency == "crossref" ? "datacite_crossref" : "datacite_related"
220
- pid = doi_as_url(doi)
221
-
222
- sum << { prefix: prefix,
223
- relation: { "subj_id" => subj["pid"],
224
- "obj_id" => pid,
225
- "relation_type_id" => raw_relation_type.underscore,
226
- "source_id" => _source_id,
227
- "publisher_id" => subj["publisher_id"],
228
- "registration_agency_id" => registration_agency,
229
- "occurred_at" => subj["issued"] },
230
- subj: subj }
231
- end
232
- end
233
- end
104
+ def get_doi_ra(prefix)
105
+ return nil if prefix.blank?
234
106
 
235
- # we are flipping subj and obj for contributions
236
- def get_contributions(obj, items)
237
- prefix = obj["DOI"][/^10\.\d{4,5}/]
238
-
239
- Array(items).reduce([]) do |sum, item|
240
- orcid = item.split(':', 2).last
241
- orcid = validate_orcid(orcid)
242
-
243
- return sum if orcid.nil?
244
-
245
- sum << { prefix: prefix,
246
- message_type: "contribution",
247
- relation: { "subj_id" => orcid_as_url(orcid),
248
- "obj_id" => obj["pid"],
249
- "relation_type_id" => nil,
250
- "source_id" => source_id,
251
- "publisher_id" => obj["publisher_id"],
252
- "registration_agency_id" => "datacite",
253
- "occurred_at" => obj["issued"] },
254
- obj: obj }
255
- end
256
- end
107
+ url = "https://api.datacite.org/prefixes/#{prefix}"
108
+ result = Maremma.get(url)
257
109
 
258
- def config_fields
259
- [:url, :push_url, :access_token]
260
- end
110
+ return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
261
111
 
262
- def url
263
- "https://search.datacite.org/api?"
112
+ result.body.fetch("data", {}).fetch('attributes', {}).fetch('registration-agency', nil)
264
113
  end
265
114
 
266
- def timeout
267
- 120
115
+ def validate_doi(doi)
116
+ Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
268
117
  end
269
118
 
270
- def job_batch_size
271
- 1000
119
+ def validate_prefix(doi)
120
+ Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5})\/.+\z/.match(doi)).last
272
121
  end
273
122
 
274
- # remove non-printing whitespace
275
- def clean_doi(doi)
276
- doi.gsub(/\u200B/, '')
277
- end
123
+ def normalize_doi(doi)
124
+ doi = validate_doi(doi)
125
+ return nil unless doi.present?
278
126
 
279
- def doi_from_url(url)
280
- if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(url)
281
- uri = Addressable::URI.parse(url)
282
- uri.path[1..-1].upcase
283
- elsif url.starts_with?("doi:")
284
- url[4..-1].upcase
285
- end
286
- end
127
+ # remove non-printing whitespace and downcase
128
+ doi = doi.delete("\u200B").downcase
287
129
 
288
- def doi_as_url(doi)
289
- Addressable::URI.encode("https://doi.org/#{clean_doi(doi)}") if doi.present?
130
+ # turn DOI into URL, escape unsafe characters
131
+ "https://doi.org/" + Addressable::URI.encode(doi)
290
132
  end
291
133
 
292
134
  def orcid_from_url(url)
@@ -1,7 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  require "thor"
4
- require_relative 'orcid_update'
5
4
 
6
5
  module Toccatore
7
6
  class CLI < Thor
@@ -30,5 +29,18 @@ module Toccatore
30
29
  orcid_update = Toccatore::OrcidUpdate.new
31
30
  orcid_update.queue_jobs(orcid_update.unfreeze(options))
32
31
  end
32
+
33
+ desc "datacite_related", "push non-DataCite DOIs from DataCite MDS to Event Data"
34
+ method_option :access_token, type: :string, required: true
35
+ method_option :push_url, type: :string
36
+ method_option :from_date, type: :string, default: (Time.now.to_date - 1.day).iso8601
37
+ method_option :until_date, type: :string, default: Time.now.to_date.iso8601
38
+ method_option :q, type: :string
39
+ method_option :related_identifier, type: :string
40
+ method_option :doi, type: :string
41
+ def datacite_related
42
+ datacite_related = Toccatore::DataciteRelated.new
43
+ datacite_related.queue_jobs(datacite_related.unfreeze(options))
44
+ end
33
45
  end
34
46
  end
@@ -0,0 +1,77 @@
1
+ require_relative 'base'
2
+
3
+ module Toccatore
4
+ class DataciteRelated < Base
5
+ def source_id
6
+ "datacite_related"
7
+ end
8
+
9
+ def query
10
+ "relatedIdentifier:DOI\\:*"
11
+ end
12
+
13
+ def parse_data(result, options={})
14
+ return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
15
+
16
+ items = result.body.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
17
+ registration_agencies = {}
18
+
19
+ Array.wrap(items).reduce([]) do |sum, item|
20
+ doi = item.fetch("doi")
21
+ pid = normalize_doi(doi)
22
+ related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
23
+
24
+ sum += Array(related_doi_identifiers).reduce([]) do |ssum, iitem|
25
+ raw_relation_type, _related_identifier_type, related_identifier = iitem.split(':', 3)
26
+ related_identifier = related_identifier.strip.downcase
27
+ prefix = validate_prefix(related_identifier)
28
+ registration_agencies[prefix] = get_doi_ra(prefix) unless registration_agencies[prefix]
29
+
30
+ # check whether this is a DataCite DOI
31
+ if registration_agencies[prefix] == "DataCite"
32
+ ssum
33
+ else
34
+ ssum << { "id" => SecureRandom.uuid,
35
+ "message_action" => "create",
36
+ "subj_id" => pid,
37
+ "obj_id" => normalize_doi(related_identifier),
38
+ "relation_type_id" => raw_relation_type.underscore,
39
+ "source_id" => "datacite",
40
+ "occurred_at" => item.fetch("minted") }
41
+ end
42
+ end
43
+
44
+ sum
45
+ end
46
+ end
47
+
48
+ # push to Event Data API if no error and we have collected works
49
+ def push_data(items, options={})
50
+ if items.empty?
51
+ puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
52
+ elsif options[:access_token].blank?
53
+ puts "An error occured: Access token missing."
54
+ else
55
+ Array(items).each { |item| push_item(item, options) }
56
+ end
57
+ end
58
+
59
+ def push_item(item, options={})
60
+ return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
61
+
62
+ host = options[:push_url].presence || "https://bus.eventdata.crossref.org"
63
+ push_url = host + "/events"
64
+
65
+ response = Maremma.post(push_url, data: item.to_json,
66
+ bearer: options[:access_token],
67
+ content_type: 'json',
68
+ host: host)
69
+
70
+ if response.status == 201
71
+ puts "#{item['subj_id']} #{item['relation_type_id']} #{item['obj_id']} pushed to Event Data service."
72
+ elsif response.body["errors"].present?
73
+ puts "An error occured: #{response.body['errors'].first['title']}"
74
+ end
75
+ end
76
+ end
77
+ end