toccatore 0.2.3 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Dockerfile +6 -3
  3. data/Gemfile.lock +11 -12
  4. data/README.md +1 -1
  5. data/lib/toccatore.rb +2 -0
  6. data/lib/toccatore/base.rb +26 -184
  7. data/lib/toccatore/cli.rb +13 -1
  8. data/lib/toccatore/datacite_related.rb +77 -0
  9. data/lib/toccatore/version.rb +1 -1
  10. data/spec/base_spec.rb +49 -0
  11. data/spec/cli_spec.rb +58 -11
  12. data/spec/datacite_related_spec.rb +129 -0
  13. data/spec/fixtures/datacite_related.json +14792 -0
  14. data/spec/fixtures/datacite_related_nil.json +12 -0
  15. data/spec/fixtures/orcid_update_is_previous.json +24 -1
  16. data/spec/fixtures/vcr_cassettes/Toccatore_Base/get_doi_ra/crossref.yml +72 -0
  17. data/spec/fixtures/vcr_cassettes/Toccatore_Base/get_doi_ra/datacite.yml +74 -0
  18. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_fail.yml +1408 -0
  19. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_query_by_DOI.yml +1145 -0
  20. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_query_by_related_identifier.yml +252 -0
  21. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_succeed.yml +6348 -0
  22. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/datacite_related/should_succeed_with_no_works.yml +38 -0
  23. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_delete.yml +47 -49
  24. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_fail.yml +21 -132
  25. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_DOI.yml +731 -1016
  26. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_query_by_ORCID_ID.yml +1880 -1889
  27. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed.yml +1576 -1591
  28. data/spec/fixtures/vcr_cassettes/Toccatore_CLI/orcid_update/should_succeed_with_no_works.yml +11 -11
  29. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_allow_queries_by_DOI_of_the_Datacite_Metadata_Search_API.yml +38 -0
  30. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_allow_queries_by_related_identifier_of_the_Datacite_Metadata_Search_API.yml +38 -0
  31. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +38 -0
  32. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +40 -0
  33. data/spec/fixtures/vcr_cassettes/{Toccatore_OrcidUpdate/get_data/should_catch_errors_with_the_Datacite_Metadata_Search_API.yml → Toccatore_DataciteRelated/get_total/with_no_works.yml} +10 -10
  34. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/get_total/with_works.yml +38 -0
  35. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/parse_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +1326 -0
  36. data/spec/fixtures/vcr_cassettes/Toccatore_DataciteRelated/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +6274 -0
  37. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_allow_queries_by_DOI_of_the_Datacite_Metadata_Search_API.yml +13 -458
  38. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_allow_queries_by_ORCID_ID_of_the_Datacite_Metadata_Search_API.yml +10 -143
  39. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +9 -9
  40. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +10 -121
  41. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_total/with_no_works.yml +10 -10
  42. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/get_total/with_works.yml +10 -10
  43. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/push_data/should_delete_claims.yml +27 -25
  44. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/push_data/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +1506 -1404
  45. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/queue_jobs/should_report_if_there_are_no_works_returned_by_the_Datacite_Metadata_Search_API.yml +9 -9
  46. data/spec/fixtures/vcr_cassettes/Toccatore_OrcidUpdate/queue_jobs/should_report_if_there_are_works_returned_by_the_Datacite_Metadata_Search_API.yml +21 -182
  47. data/spec/orcid_update_spec.rb +8 -14
  48. data/spec/spec_helper.rb +1 -0
  49. data/toccatore.gemspec +1 -1
  50. metadata +24 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18794a290d5b37831e228d170849338b67e48cc9
4
- data.tar.gz: d7a8d75e76ac3611375895cfa5baf552daf1579c
3
+ metadata.gz: e67c0fc653ac9fc750cd7a75ecd0056c72dcdfec
4
+ data.tar.gz: cf181d9073928728a382e6740912a982cb02fd61
5
5
  SHA512:
6
- metadata.gz: 394448d66f7e3673579d8ac0d0c93403ccd3d5e9549106dd50e4248c51d9a2f120842f4a5e6402e68c5d85283f3041667511e9273a4d1a4e90b53059c6b7c7e4
7
- data.tar.gz: 4e3b4fa820d446eb2a7558adcf314a371a2bac1af8f2ed1f08aef1a93f27cffa5f1f8ebf74264124d4db3ad86bab1bde53c2b3d00ff7525fb0303f5ab3ce9775
6
+ metadata.gz: 4a1c3d00575f9289cc7938028829b49a1cddbb397419e17483360e4a6586566fbf3e20a1f760727e8a90df20b4dcfce06097ac87e2a96b09ffc12c567f555e16
7
+ data.tar.gz: 43ae2875879cf644783244d554d52a6bde83c615bd5dbea12a441ac9291c61d65b2bc7fb8bc03a597f880676499d76c19c908d43cd490e8fbeb1ac52705d177d
data/Dockerfile CHANGED
@@ -1,7 +1,10 @@
1
- FROM phusion/passenger-full:0.9.19
1
+ FROM phusion/passenger-full:0.9.20
2
2
  MAINTAINER Martin Fenner "mfenner@datacite.org"
3
3
 
4
- ENV PATH="/usr/local/rvm/gems/ruby-2.3.1/bin:${PATH}"
4
+ # Install Ruby 2.3.3
5
+ RUN bash -lc 'rvm --default use ruby-2.3.3'
6
+
7
+ ENV PATH="/usr/local/rvm/gems/ruby-2.3.3/bin:${PATH}"
5
8
 
6
9
  # Update installed APT packages, clean up APT when done.
7
10
  RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" && \
@@ -10,4 +13,4 @@ RUN apt-get update && apt-get upgrade -y -o Dpkg::Options::="--force-confold" &&
10
13
  # Install toccatore gem
11
14
  RUN /sbin/setuser app gem install toccatore
12
15
 
13
- CMD toccatore orcid_update --push_url $VOLPINO_URL --access_token $VOLPINO_TOKEN --from_date $FROM_DATE --until_date $UNTIL_DATE
16
+ CMD toccatore --version
@@ -1,27 +1,26 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- toccatore (0.2.3)
4
+ toccatore (0.3)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  dotenv (~> 2.1, >= 2.1.1)
7
7
  gender_detector (~> 1.0)
8
- maremma (~> 3.1)
8
+ maremma (~> 3.5)
9
9
  namae (~> 0.11.0)
10
10
  thor (~> 0.19)
11
11
 
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- activesupport (4.2.7.1)
15
+ activesupport (4.2.8)
16
16
  i18n (~> 0.7)
17
- json (~> 1.7, >= 1.7.7)
18
17
  minitest (~> 5.1)
19
18
  thread_safe (~> 0.3, >= 0.3.4)
20
19
  tzinfo (~> 1.1)
21
20
  addressable (2.5.0)
22
21
  public_suffix (~> 2.0, >= 2.0.2)
23
22
  builder (3.2.3)
24
- codeclimate-test-reporter (1.0.5)
23
+ codeclimate-test-reporter (1.0.7)
25
24
  simplecov
26
25
  crack (0.4.3)
27
26
  safe_yaml (~> 1.0.0)
@@ -37,11 +36,11 @@ GEM
37
36
  faraday (>= 0.7.4, < 1.0)
38
37
  gender_detector (1.0.0)
39
38
  hashdiff (0.3.2)
40
- i18n (0.8.0)
41
- json (1.8.6)
42
- maremma (3.1.2)
39
+ i18n (0.8.1)
40
+ json (2.0.3)
41
+ maremma (3.5.1)
43
42
  activesupport (~> 4.2, >= 4.2.5)
44
- addressable (~> 2.5)
43
+ addressable (>= 2.3.6)
45
44
  builder (~> 3.2, >= 3.2.2)
46
45
  excon (~> 0.45.0)
47
46
  faraday (~> 0.9.2)
@@ -49,7 +48,7 @@ GEM
49
48
  faraday_middleware (~> 0.10.0)
50
49
  multi_json (~> 1.11.2)
51
50
  nokogiri (~> 1.6.7)
52
- oj (~> 2.13.1)
51
+ oj (~> 2.18, >= 2.18.1)
53
52
  mini_portile2 (2.1.0)
54
53
  minitest (5.10.1)
55
54
  multi_json (1.11.3)
@@ -57,7 +56,7 @@ GEM
57
56
  namae (0.11.3)
58
57
  nokogiri (1.6.8.1)
59
58
  mini_portile2 (~> 2.1.0)
60
- oj (2.13.1)
59
+ oj (2.18.2)
61
60
  public_suffix (2.0.5)
62
61
  rack (2.0.1)
63
62
  rack-test (0.6.3)
@@ -83,7 +82,7 @@ GEM
83
82
  simplecov-html (~> 0.10.0)
84
83
  simplecov-html (0.10.0)
85
84
  thor (0.19.4)
86
- thread_safe (0.3.5)
85
+ thread_safe (0.3.6)
87
86
  tzinfo (1.2.2)
88
87
  thread_safe (~> 0.1)
89
88
  vcr (3.0.3)
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![Code Climate](https://codeclimate.com/github/datacite/toccatore/badges/gpa.svg)](https://codeclimate.com/github/datacite/toccatore)
5
5
  [![Test Coverage](https://codeclimate.com/github/datacite/toccatore/badges/coverage.svg)](https://codeclimate.com/github/datacite/toccatore/coverage)
6
6
 
7
- Command-line client for finding ORCID IDs in DataCite metadata.
7
+ Command-line client for finding ORCID IDs and related identifiers in DataCite metadata.
8
8
 
9
9
  ## Development
10
10
 
@@ -1,2 +1,4 @@
1
1
  require "toccatore/orcid_update"
2
+ require "toccatore/datacite_related"
2
3
  require "toccatore/cli"
4
+ require "toccatore/version"
@@ -28,6 +28,8 @@ module Toccatore
28
28
  q = "doi:#{options[:doi]}"
29
29
  elsif options[:orcid].present?
30
30
  q = "nameIdentifier:ORCID\\:#{options[:orcid]}"
31
+ elsif options[:related_identifier].present?
32
+ q = "relatedIdentifier:DOI\\:#{options[:related_identifier]}"
31
33
  elsif options[:query].present?
32
34
  q = options[:query]
33
35
  else
@@ -37,7 +39,7 @@ module Toccatore
37
39
  params = { q: q,
38
40
  start: options[:offset],
39
41
  rows: options[:rows],
40
- fl: "doi,creator,title,publisher,publicationYear,resourceTypeGeneral,datacentre_symbol,relatedIdentifier,nameIdentifier,xml,minted,updated",
42
+ fl: "doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated",
41
43
  fq: fq,
42
44
  wt: "json" }
43
45
  url + URI.encode_www_form(params)
@@ -87,206 +89,46 @@ module Toccatore
87
89
  Maremma.get(query_url, options)
88
90
  end
89
91
 
90
- def parse_data(result, options={})
91
- return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
92
-
93
- items = result.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
94
- get_relations_with_related_works(items)
95
- end
96
-
97
- # push to Lagotto deposit API if no error and we have collected works
98
- def push_data(items, options={})
99
- if items.empty?
100
- puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
101
- else
102
- Array(items).map do |item|
103
- relation = item.fetch(:relation, {})
104
- deposit = { "deposit" => { "subj_id" => relation.fetch("subj_id", nil),
105
- "obj_id" => relation.fetch("obj_id", nil),
106
- "relation_type_id" => relation.fetch("relation_type_id", nil),
107
- "source_id" => relation.fetch("source_id", nil),
108
- "publisher_id" => relation.fetch("publisher_id", nil),
109
- "subj" => item.fetch(:subj, {}),
110
- "obj" => item.fetch(:obj, {}),
111
- "message_type" => item.fetch(:message_type, "relation"),
112
- "prefix" => item.fetch(:prefix, nil),
113
- "source_token" => uuid } }
114
-
115
- Maremma.post push_url, data: deposit.to_json, content_type: 'json', token: access_token
116
- end
117
- end
92
+ def url
93
+ "https://search.datacite.org/api?"
118
94
  end
119
95
 
120
- def get_relations_with_related_works(items)
121
- Array(items).reduce([]) do |sum, item|
122
- doi = item.fetch("doi", nil)
123
- prefix = doi[/^10\.\d{4,5}/]
124
- pid = doi_as_url(doi)
125
- type = item.fetch("resourceTypeGeneral", nil)
126
- publisher_id = item.fetch("datacentre_symbol", nil)
127
-
128
- xml = Base64.decode64(item.fetch('xml', "PGhzaD48L2hzaD4=\n"))
129
- xml = Hash.from_xml(xml).fetch("resource", {})
130
- authors = xml.fetch("creators", {}).fetch("creator", [])
131
- authors = [authors] if authors.is_a?(Hash)
132
-
133
- subj = { "pid" => pid,
134
- "DOI" => doi,
135
- "author" => get_hashed_authors(authors),
136
- "title" => item.fetch("title", []).first,
137
- "container-title" => item.fetch("publisher", nil),
138
- "published" => item.fetch("publicationYear", nil),
139
- "issued" => item.fetch("minted", nil),
140
- "publisher_id" => publisher_id,
141
- "registration_agency_id" => "datacite",
142
- "tracked" => true,
143
- "type" => type }
144
-
145
- related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
146
- sum += get_doi_relations(subj, related_doi_identifiers)
147
-
148
- related_github_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:URL:https:\/\/github.com.+/ }
149
- sum += get_github_relations(subj, related_github_identifiers)
150
-
151
- name_identifiers = item.fetch('nameIdentifier', []).select { |id| id =~ /^ORCID:.+/ }
152
- sum += get_contributions(subj, name_identifiers)
153
-
154
- if source_id == "datacite_import"
155
- sum += [{ prefix: prefix,
156
- relation: { "subj_id" => subj["pid"],
157
- "source_id" => source_id,
158
- "publisher_id" => subj["publisher_id"],
159
- "occurred_at" => subj["issued"] },
160
- subj: subj }]
161
- end
162
-
163
- sum
164
- end
96
+ def timeout
97
+ 120
165
98
  end
166
99
 
167
- def get_github_relations(subj, items)
168
- prefix = subj["DOI"][/^10\.\d{4,5}/]
169
-
170
- Array(items).reduce([]) do |sum, item|
171
- raw_relation_type, _related_identifier_type, related_identifier = item.split(':', 3)
172
-
173
- # get parent repo
174
- # code from https://github.com/octokit/octokit.rb/blob/master/lib/octokit/repository.rb
175
- related_identifier = PostRank::URI.clean(related_identifier)
176
- github_hash = github_from_url(related_identifier)
177
- owner_url = github_as_owner_url(github_hash)
178
- repo_url = github_as_repo_url(github_hash)
179
-
180
- sum << { prefix: prefix,
181
- relation: { "subj_id" => subj["pid"],
182
- "obj_id" => related_identifier,
183
- "relation_type_id" => raw_relation_type.underscore,
184
- "source_id" => source_id,
185
- "publisher_id" => subj["publisher_id"],
186
- "registration_agency_id" => "github",
187
- "occurred_at" => subj["issued"] },
188
- subj: subj }
189
-
190
- # if relatedIdentifier is release URL rather than repo URL
191
- if related_identifier != repo_url
192
- sum << { relation: { "subj_id" => related_identifier,
193
- "obj_id" => repo_url,
194
- "relation_type_id" => "is_part_of",
195
- "source_id" => source_id,
196
- "publisher_id" => "github",
197
- "registration_agency_id" => "github" } }
198
- end
199
-
200
- sum << { message_type: "contribution",
201
- relation: { "subj_id" => owner_url,
202
- "obj_id" => repo_url,
203
- "source_id" => "github_contributor",
204
- "registration_agency_id" => "github" }}
205
- end
100
+ def job_batch_size
101
+ 1000
206
102
  end
207
103
 
208
- def get_doi_relations(subj, items)
209
- prefix = subj["DOI"][/^10\.\d{4,5}/]
210
-
211
- Array(items).reduce([]) do |sum, item|
212
- raw_relation_type, _related_identifier_type, related_identifier = item.split(':', 3)
213
- doi = related_identifier.strip.upcase
214
- registration_agency = get_doi_ra(doi)
215
-
216
- if source_id == "datacite_crossref" && registration_agency == "datacite"
217
- sum
218
- else
219
- _source_id = registration_agency == "crossref" ? "datacite_crossref" : "datacite_related"
220
- pid = doi_as_url(doi)
221
-
222
- sum << { prefix: prefix,
223
- relation: { "subj_id" => subj["pid"],
224
- "obj_id" => pid,
225
- "relation_type_id" => raw_relation_type.underscore,
226
- "source_id" => _source_id,
227
- "publisher_id" => subj["publisher_id"],
228
- "registration_agency_id" => registration_agency,
229
- "occurred_at" => subj["issued"] },
230
- subj: subj }
231
- end
232
- end
233
- end
104
+ def get_doi_ra(prefix)
105
+ return nil if prefix.blank?
234
106
 
235
- # we are flipping subj and obj for contributions
236
- def get_contributions(obj, items)
237
- prefix = obj["DOI"][/^10\.\d{4,5}/]
238
-
239
- Array(items).reduce([]) do |sum, item|
240
- orcid = item.split(':', 2).last
241
- orcid = validate_orcid(orcid)
242
-
243
- return sum if orcid.nil?
244
-
245
- sum << { prefix: prefix,
246
- message_type: "contribution",
247
- relation: { "subj_id" => orcid_as_url(orcid),
248
- "obj_id" => obj["pid"],
249
- "relation_type_id" => nil,
250
- "source_id" => source_id,
251
- "publisher_id" => obj["publisher_id"],
252
- "registration_agency_id" => "datacite",
253
- "occurred_at" => obj["issued"] },
254
- obj: obj }
255
- end
256
- end
107
+ url = "https://api.datacite.org/prefixes/#{prefix}"
108
+ result = Maremma.get(url)
257
109
 
258
- def config_fields
259
- [:url, :push_url, :access_token]
260
- end
110
+ return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
261
111
 
262
- def url
263
- "https://search.datacite.org/api?"
112
+ result.body.fetch("data", {}).fetch('attributes', {}).fetch('registration-agency', nil)
264
113
  end
265
114
 
266
- def timeout
267
- 120
115
+ def validate_doi(doi)
116
+ Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
268
117
  end
269
118
 
270
- def job_batch_size
271
- 1000
119
+ def validate_prefix(doi)
120
+ Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5})\/.+\z/.match(doi)).last
272
121
  end
273
122
 
274
- # remove non-printing whitespace
275
- def clean_doi(doi)
276
- doi.gsub(/\u200B/, '')
277
- end
123
+ def normalize_doi(doi)
124
+ doi = validate_doi(doi)
125
+ return nil unless doi.present?
278
126
 
279
- def doi_from_url(url)
280
- if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(url)
281
- uri = Addressable::URI.parse(url)
282
- uri.path[1..-1].upcase
283
- elsif url.starts_with?("doi:")
284
- url[4..-1].upcase
285
- end
286
- end
127
+ # remove non-printing whitespace and downcase
128
+ doi = doi.delete("\u200B").downcase
287
129
 
288
- def doi_as_url(doi)
289
- Addressable::URI.encode("https://doi.org/#{clean_doi(doi)}") if doi.present?
130
+ # turn DOI into URL, escape unsafe characters
131
+ "https://doi.org/" + Addressable::URI.encode(doi)
290
132
  end
291
133
 
292
134
  def orcid_from_url(url)
@@ -1,7 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  require "thor"
4
- require_relative 'orcid_update'
5
4
 
6
5
  module Toccatore
7
6
  class CLI < Thor
@@ -30,5 +29,18 @@ module Toccatore
30
29
  orcid_update = Toccatore::OrcidUpdate.new
31
30
  orcid_update.queue_jobs(orcid_update.unfreeze(options))
32
31
  end
32
+
33
+ desc "datacite_related", "push non-DataCite DOIs from DataCite MDS to Event Data"
34
+ method_option :access_token, type: :string, required: true
35
+ method_option :push_url, type: :string
36
+ method_option :from_date, type: :string, default: (Time.now.to_date - 1.day).iso8601
37
+ method_option :until_date, type: :string, default: Time.now.to_date.iso8601
38
+ method_option :q, type: :string
39
+ method_option :related_identifier, type: :string
40
+ method_option :doi, type: :string
41
+ def datacite_related
42
+ datacite_related = Toccatore::DataciteRelated.new
43
+ datacite_related.queue_jobs(datacite_related.unfreeze(options))
44
+ end
33
45
  end
34
46
  end
@@ -0,0 +1,77 @@
1
+ require_relative 'base'
2
+
3
+ module Toccatore
4
+ class DataciteRelated < Base
5
+ def source_id
6
+ "datacite_related"
7
+ end
8
+
9
+ def query
10
+ "relatedIdentifier:DOI\\:*"
11
+ end
12
+
13
+ def parse_data(result, options={})
14
+ return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
15
+
16
+ items = result.body.fetch("data", {}).fetch('response', {}).fetch('docs', nil)
17
+ registration_agencies = {}
18
+
19
+ Array.wrap(items).reduce([]) do |sum, item|
20
+ doi = item.fetch("doi")
21
+ pid = normalize_doi(doi)
22
+ related_doi_identifiers = item.fetch('relatedIdentifier', []).select { |id| id =~ /:DOI:.+/ }
23
+
24
+ sum += Array(related_doi_identifiers).reduce([]) do |ssum, iitem|
25
+ raw_relation_type, _related_identifier_type, related_identifier = iitem.split(':', 3)
26
+ related_identifier = related_identifier.strip.downcase
27
+ prefix = validate_prefix(related_identifier)
28
+ registration_agencies[prefix] = get_doi_ra(prefix) unless registration_agencies[prefix]
29
+
30
+ # check whether this is a DataCite DOI
31
+ if registration_agencies[prefix] == "DataCite"
32
+ ssum
33
+ else
34
+ ssum << { "id" => SecureRandom.uuid,
35
+ "message_action" => "create",
36
+ "subj_id" => pid,
37
+ "obj_id" => normalize_doi(related_identifier),
38
+ "relation_type_id" => raw_relation_type.underscore,
39
+ "source_id" => "datacite",
40
+ "occurred_at" => item.fetch("minted") }
41
+ end
42
+ end
43
+
44
+ sum
45
+ end
46
+ end
47
+
48
+ # push to Event Data API if no error and we have collected works
49
+ def push_data(items, options={})
50
+ if items.empty?
51
+ puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
52
+ elsif options[:access_token].blank?
53
+ puts "An error occured: Access token missing."
54
+ else
55
+ Array(items).each { |item| push_item(item, options) }
56
+ end
57
+ end
58
+
59
+ def push_item(item, options={})
60
+ return OpenStruct.new(body: { "errors" => [{ "title" => "Access token missing." }] }) if options[:access_token].blank?
61
+
62
+ host = options[:push_url].presence || "https://bus.eventdata.crossref.org"
63
+ push_url = host + "/events"
64
+
65
+ response = Maremma.post(push_url, data: item.to_json,
66
+ bearer: options[:access_token],
67
+ content_type: 'json',
68
+ host: host)
69
+
70
+ if response.status == 201
71
+ puts "#{item['subj_id']} #{item['relation_type_id']} #{item['obj_id']} pushed to Event Data service."
72
+ elsif response.body["errors"].present?
73
+ puts "An error occured: #{response.body['errors'].first['title']}"
74
+ end
75
+ end
76
+ end
77
+ end