cirneco 0.7.4 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +4 -4
  3. data/lib/cirneco/api.rb +8 -8
  4. data/lib/cirneco/base.rb +1 -1
  5. data/lib/cirneco/doi.rb +20 -9
  6. data/lib/cirneco/utils.rb +204 -128
  7. data/lib/cirneco/version.rb +1 -1
  8. data/lib/cirneco/work.rb +19 -7
  9. data/spec/api_spec.rb +10 -10
  10. data/spec/doi_spec.rb +29 -27
  11. data/spec/fixtures/cool-dois-minted.html +404 -0
  12. data/spec/fixtures/cool-dois-minted.html.md +99 -0
  13. data/spec/fixtures/cool-dois-missing-metadata.html +356 -0
  14. data/spec/fixtures/cool-dois-no-json-ld.html +352 -0
  15. data/spec/fixtures/cool-dois.html +404 -0
  16. data/spec/fixtures/cool-dois.html.md +1 -0
  17. data/spec/fixtures/index.html +271 -0
  18. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_all_dois_by_prefix.yml +499 -4
  19. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_all_urls.yml +44 -0
  20. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_url.yml +44 -0
  21. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_metadata_for_work.yml +8 -4
  22. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_all_urls.yml +130 -0
  23. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_url.yml +130 -0
  24. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_all_urls.yml +130 -0
  25. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_url.yml +130 -0
  26. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_post_metadata_for_work.yml +25 -20
  27. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_all_dois.yml +499 -4
  28. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_doi.yml +8 -4
  29. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/put/should_put_doi.yml +8 -4
  30. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_a_doi.yml +44 -0
  31. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_dois_for_list_of_urls.yml +44 -0
  32. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_a_doi.yml +130 -0
  33. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_a_doi.yml +130 -0
  34. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_dois_for_list_of_urls.yml +130 -0
  35. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_dois_for_list_of_urls.yml +130 -0
  36. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_all_dois.yml +499 -4
  37. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi.yml +8 -4
  38. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/put/should_put_doi.yml +8 -4
  39. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/get/should_get_media.yml +8 -4
  40. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/post/should_post_media.yml +8 -4
  41. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/delete/should_delete_metadata.yml +8 -4
  42. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/get/should_get_metadata.yml +10 -5
  43. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/post/should_post_metadata.yml +11 -6
  44. data/spec/utils_spec.rb +105 -48
  45. data/spec/work_spec.rb +4 -4
  46. metadata +20 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a82c5b7e10ab13eabc1913ab86f856bf1d28ee68
4
- data.tar.gz: 38c61337cd19f6623ad835182baccaf2607f260f
3
+ metadata.gz: 0b25496604d7050477dbcd531561118874f76705
4
+ data.tar.gz: f3e38823524d96cd5382a5a54d3838daaf8c3b42
5
5
  SHA512:
6
- metadata.gz: 2dbc92ffc86523a271cb1c69e2af2778f56bf2b0ca4c61714f3825fcbc84ee52ce7af2b42cd9edd1ead906e9a2620feda76de8dfec622698b74b2ab8a7910cf1
7
- data.tar.gz: 00c3050b139c651521e9e1d0c0a454002f0fff2ff5f5b1a018a0e8aefcc4a258a009c2c7a03c332c1bb762dbc3176a941d8bdce961b7680a056f5c2d6b8967e1
6
+ metadata.gz: 8b521bfb2aceebdb52d8248dccd6de030976413720d392748358620e7527c33398224a4a33aea7d7a4dca234c8506da408aceb00f5ac1a47e82177e034392c70
7
+ data.tar.gz: ada9b59941a9c2f3182736cb3ba54b2c44d3b16226a20abc51be56dc0a5b6f0e6949311c6b5ca9a20e396fe624fd872c2d152abe6fd99f823d0f164d27f46b1c
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cirneco (0.7.4)
4
+ cirneco (0.8.1)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  base32-crockford-checksum (~> 0.2.2)
7
7
  bergamasco (~> 0.3)
@@ -22,7 +22,7 @@ GEM
22
22
  tzinfo (~> 1.1)
23
23
  addressable (2.3.8)
24
24
  base32-crockford-checksum (0.2.3)
25
- bergamasco (0.3.4)
25
+ bergamasco (0.3.5)
26
26
  activesupport (~> 4.2, >= 4.2.5)
27
27
  addressable (~> 2.3.8)
28
28
  builder (~> 3.2, >= 3.2.2)
@@ -33,7 +33,7 @@ GEM
33
33
  pandoc-ruby (~> 2.0, >= 2.0.0)
34
34
  safe_yaml (~> 1.0, >= 1.0.4)
35
35
  builder (3.2.2)
36
- codeclimate-test-reporter (1.0.3)
36
+ codeclimate-test-reporter (1.0.4)
37
37
  simplecov
38
38
  crack (0.4.3)
39
39
  safe_yaml (~> 1.0.0)
@@ -47,7 +47,7 @@ GEM
47
47
  faraday
48
48
  faraday_middleware (0.10.1)
49
49
  faraday (>= 0.7.4, < 1.0)
50
- hashdiff (0.3.1)
50
+ hashdiff (0.3.2)
51
51
  i18n (0.7.0)
52
52
  json (1.8.3)
53
53
  loofah (2.0.3)
data/lib/cirneco/api.rb CHANGED
@@ -6,7 +6,7 @@ module Cirneco
6
6
  def post_metadata(data, options={})
7
7
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
8
8
 
9
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
9
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
10
10
 
11
11
  url = "#{mds_url}/metadata"
12
12
  Maremma.post(url, content_type: 'application/xml;charset=UTF-8', data: data, username: options[:username], password: options[:password])
@@ -15,7 +15,7 @@ module Cirneco
15
15
  def get_metadata(doi, options={})
16
16
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
17
17
 
18
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
18
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
19
19
 
20
20
  url = "#{mds_url}/metadata/#{doi}"
21
21
  Maremma.get(url, accept: 'application/xml', username: options[:username], password: options[:password], raw: true)
@@ -24,7 +24,7 @@ module Cirneco
24
24
  def delete_metadata(doi, options={})
25
25
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
26
26
 
27
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
27
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
28
28
 
29
29
  url = "#{mds_url}/metadata/#{doi}"
30
30
  Maremma.delete(url, username: options[:username], password: options[:password])
@@ -35,7 +35,7 @@ module Cirneco
35
35
 
36
36
  payload = "doi=#{doi}\nurl=#{options[:url]}"
37
37
 
38
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
38
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
39
39
 
40
40
  url = "#{mds_url}/doi/#{doi}"
41
41
  Maremma.put(url, content_type: 'text/plain;charset=UTF-8', data: payload, username: options[:username], password: options[:password])
@@ -44,7 +44,7 @@ module Cirneco
44
44
  def get_doi(doi, options={})
45
45
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
46
46
 
47
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
47
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
48
48
 
49
49
  url = "#{mds_url}/doi/#{doi}"
50
50
  Maremma.get(url, username: options[:username], password: options[:password])
@@ -53,7 +53,7 @@ module Cirneco
53
53
  def get_dois(options={})
54
54
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
55
55
 
56
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
56
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
57
57
 
58
58
  url = "#{mds_url}/doi"
59
59
  response = Maremma.get(url, username: options[:username], password: options[:password])
@@ -66,7 +66,7 @@ module Cirneco
66
66
 
67
67
  payload = options[:raw] ? options[:media] : options[:media].map { |m| "#{m[:mime_type]}=#{m[:url]}" }.join("\n")
68
68
 
69
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
69
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
70
70
 
71
71
  url = "#{mds_url}/media/#{doi}"
72
72
  Maremma.post(url, content_type: 'text/plain;charset=UTF-8', data: payload, username: options[:username], password: options[:password])
@@ -75,7 +75,7 @@ module Cirneco
75
75
  def get_media(doi, options={})
76
76
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
77
77
 
78
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
78
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
79
79
 
80
80
  url = "#{mds_url}/media/#{doi}"
81
81
  response = Maremma.get(url, accept: 'application/xml', username: options[:username], password: options[:password])
data/lib/cirneco/base.rb CHANGED
@@ -16,6 +16,6 @@ module Cirneco
16
16
  end
17
17
 
18
18
  # default values for some ENV variables
19
- ENV['MDS_URL'] ||= "https://mds.test.datacite.org"
19
+ ENV['MDS_URL'] ||= "https://mds-sandbox.datacite.org"
20
20
  end
21
21
  end
data/lib/cirneco/doi.rb CHANGED
@@ -14,6 +14,7 @@ module Cirneco
14
14
  method_option :username, :default => ENV['MDS_USERNAME']
15
15
  method_option :password, :default => ENV['MDS_PASSWORD']
16
16
  method_option :sandbox, :type => :boolean, :force => false
17
+ method_option :limit, :type => :numeric, :default => 25
17
18
  def get(doi)
18
19
  if doi == "all"
19
20
  response = get_dois(options)
@@ -23,6 +24,8 @@ module Cirneco
23
24
 
24
25
  if response.body["errors"]
25
26
  puts "Error: " + response.body["errors"].first.fetch("title", "An error occured")
27
+ elsif doi == "all"
28
+ puts response.body["data"][0...options[:limit]]
26
29
  else
27
30
  puts response.body["data"]
28
31
  end
@@ -54,6 +57,14 @@ module Cirneco
54
57
  end
55
58
  end
56
59
 
60
+ desc "generate DOI", "generate a DOI name"
61
+ method_option :lower_limit, :type => :numeric, :default => 0
62
+ method_option :namespace, :default => 'MS-'
63
+ method_option :number, :type => :numeric, :aliases => '-n'
64
+ def accession_number
65
+ puts generate_accession_number(options)
66
+ end
67
+
57
68
  desc "decode DOI", "decode DOI encoded using Crockford base32 algorithm"
58
69
  def decode(doi)
59
70
  number = decode_doi(doi)
@@ -86,10 +97,10 @@ module Cirneco
86
97
  method_option :sandbox, :type => :boolean, :force => false
87
98
  def mint(filepath)
88
99
 
89
- if File.directory?(filepath)
90
- response = mint_dois_for_all_files(filepath, options)
100
+ if filepath.is_a?(Array)
101
+ response = mint_dois_for_all_urls(filepath, options)
91
102
  else
92
- response = mint_doi_for_file(filepath, options)
103
+ response = mint_doi_for_url(filepath, options)
93
104
  end
94
105
 
95
106
  puts response
@@ -107,10 +118,10 @@ module Cirneco
107
118
  method_option :sandbox, :type => :boolean, :force => false
108
119
  def mint_and_hide(filepath)
109
120
 
110
- if File.directory?(filepath)
111
- response = mint_and_hide_dois_for_all_files(filepath, options)
121
+ if filepath.is_a?(Array)
122
+ response = mint_and_hide_dois_for_all_urls(filepath, options)
112
123
  else
113
- response = mint_and_hide_doi_for_file(filepath, options)
124
+ response = mint_and_hide_doi_for_url(filepath, options)
114
125
  end
115
126
 
116
127
  puts response
@@ -127,10 +138,10 @@ module Cirneco
127
138
  method_option :sandbox, :type => :boolean, :force => false
128
139
  def hide(filepath)
129
140
 
130
- if File.directory?(filepath)
131
- response = hide_dois_for_all_files(filepath, options)
141
+ if filepath.is_a?(Array)
142
+ response = hide_dois_for_all_urls(filepath, options)
132
143
  else
133
- response = hide_doi_for_file(filepath, options)
144
+ response = hide_doi_for_url(filepath, options)
134
145
  end
135
146
 
136
147
  puts response
data/lib/cirneco/utils.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'base32/crockford'
2
2
  require 'securerandom'
3
3
  require 'bergamasco'
4
+ require 'open-uri'
4
5
  require 'time'
5
6
 
6
7
  module Cirneco
@@ -8,6 +9,11 @@ module Cirneco
8
9
  # "ZZZZZZZ" decoded as number
9
10
  UPPER_LIMIT = 34359738367
10
11
 
12
+ LICENSES = {
13
+ "https://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution (CC-BY 4.0)",
14
+ "https://creativecommons.org/publicdomain/zero/1.0/" => "Creative Commons Public Domain Dedication (CC0 1.0)"
15
+ }
16
+
11
17
  def get_dois_by_prefix(prefix, options={})
12
18
  response = get_dois(options)
13
19
 
@@ -23,193 +29,207 @@ module Cirneco
23
29
  end
24
30
 
25
31
  def encode_doi(prefix, options={})
26
- number = options[:number] || SecureRandom.random_number(UPPER_LIMIT)
32
+ number = options[:number].to_s.scan(/\d+/).first.to_i
33
+ number = SecureRandom.random_number(UPPER_LIMIT) unless number > 0
27
34
  prefix.to_s + "/" + Base32::Crockford.encode(number, split: 4, length: 8, checksum: true)
28
35
  end
29
36
 
30
- # currently only supports markdown files with YAML header
31
- def mint_doi_for_file(filepath, options={})
37
+ def generate_accession_number(options={})
38
+ lower_limit = options[:lower_limit] || 0
39
+ namespace = options[:namespace] || 'MS-'
40
+ registered_numbers = options[:registered_numbers] || []
41
+
42
+ if options[:number]
43
+ number = options[:number].to_s
44
+ else
45
+ begin
46
+ number = SecureRandom.random_number(1000000) + lower_limit
47
+ end while registered_numbers.include? number
48
+ number = number.to_s
49
+ end
50
+
51
+ number = number.to_s.rjust(options[:length], '0') if options[:length]
52
+
53
+ if options[:split]
54
+ number = number.reverse
55
+ number = number.scan(/.{1,#{options[:split]}}/).map { |x| x.reverse }
56
+ number = number.reverse.join("-")
57
+ end
58
+
59
+ namespace + number
60
+ end
61
+
62
+ def get_accession_number(filepath)
63
+ metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
64
+ metadata.fetch("accession_number", nil).to_s.scan(/\d+/).first.to_i
65
+ end
66
+
67
+ def get_all_accession_numbers(folderpath)
68
+ Dir.glob("#{folderpath}/*.md").map do |filepath|
69
+ get_accession_number(filepath)
70
+ end.select { |a| a > 0 }.sort
71
+ end
72
+
73
+ def update_accession_number(filepath, options={})
32
74
  filename = File.basename(filepath)
33
75
  return "File #{filename} ignored: not a markdown file" unless File.extname(filepath) == ".md"
34
76
 
35
77
  old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
36
- return "DOI #{old_metadata["doi"]} not changed for #{filename}" if old_metadata["doi"] && old_metadata["published"]
78
+ return "Accession number #{old_metadata["accession_number"]} not changed for #{filename}" if old_metadata["accession_number"]
79
+
80
+ if old_metadata["doi"].present?
81
+ number = decode_doi(old_metadata["doi"])
82
+ options[:number] = number if number > 0
83
+ else
84
+ folderpath = File.dirname(filepath)
85
+ options[:registered_numbers] = get_all_accession_numbers(folderpath)
86
+ end
87
+
88
+ accession_number = generate_accession_number(options)
89
+
90
+ new_metadata = Bergamasco::Markdown.update_file(filepath, "accession_number" => accession_number)
91
+ "Accession number #{new_metadata["accession_number"]} generated for #{filename}"
92
+ end
93
+
94
+ def update_all_accession_numbers(folderpath)
95
+ Dir.glob("#{folderpath}/*.md").map do |filepath|
96
+ update_accession_number(filepath)
97
+ end
98
+ end
99
+
100
+ # fetch schema.org metadata in JSON-LD format to mint DOI
101
+ def mint_doi_for_url(url, options={})
102
+ filename = File.basename(url)
103
+ source_path = options[:source_path] || "/"
104
+ filepath = Dir.pwd + source_path + filename + ".md"
37
105
 
38
- metadata = generate_metadata_for_work(filepath, options)
39
- work = post_metadata_for_work(metadata, options)
106
+ metadata = generate_metadata_for_work(url, options)
107
+ return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["date_issued"]
40
108
 
41
- # return "Errors for DOI #{metadata["doi"]}:\n#{work.validation_errors}" if work.validation_errors.present?
109
+ response = post_metadata_for_work(metadata, options)
110
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
42
111
 
43
112
  new_metadata = Bergamasco::Markdown.update_file(filepath, "doi" => metadata["doi"], "published" => true)
44
113
  "DOI #{new_metadata["doi"]} minted for #{filename}"
45
114
  end
46
115
 
47
- # currently only supports markdown files with YAML header
48
- def mint_and_hide_doi_for_file(filepath, options={})
49
- filename = File.basename(filepath)
50
- return "File #{filename} ignored: not a markdown file" unless File.extname(filepath) == ".md"
51
-
52
- old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
53
- return "DOI #{old_metadata["doi"]} not changed for #{filename}" if old_metadata["doi"] && old_metadata["published"]
116
+ # fetch schema.org metadata in JSON-LD format to mint DOI
117
+ def mint_and_hide_doi_for_url(url, options={})
118
+ filename = File.basename(url)
119
+ source_path = options[:source_path] || "/"
120
+ filepath = Dir.pwd + source_path + filename + ".md"
54
121
 
55
- metadata = generate_metadata_for_work(filepath, options)
56
- work = post_and_hide_metadata_for_work(metadata, options)
122
+ metadata = generate_metadata_for_work(url, options)
123
+ return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["date_issued"]
57
124
 
58
- # return "Errors for DOI #{metadata["doi"]}:\n#{work.validation_errors}" if work.validation_errors.present?
125
+ response = post_metadata_for_work(metadata, options)
126
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
59
127
 
60
128
  new_metadata = Bergamasco::Markdown.update_file(filepath, "doi" => metadata["doi"], "published" => false)
61
129
  "DOI #{new_metadata["doi"]} minted and hidden for #{filename}"
62
130
  end
63
131
 
64
- # currently only supports markdown files with YAML header
132
+ # fetch schema.org metadata in JSON-LD format to mint DOI
65
133
  # DOIs are never deleted, but we can remove the metadata from the DataCite index
66
- def hide_doi_for_file(filepath, options={})
67
- filename = File.basename(filepath)
68
- return "File #{filename} ignored: not a markdown file" unless File.extname(filepath) == ".md"
69
-
70
- old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
71
- return "DOI #{old_metadata["doi"]} not changed for #{filename}" unless old_metadata["doi"] && old_metadata["published"]
134
+ def hide_doi_for_url(url, options={})
135
+ filename = File.basename(url)
136
+ source_path = options[:source_path] || "/"
137
+ filepath = Dir.pwd + source_path + filename + ".md"
72
138
 
73
- metadata = generate_metadata_for_work(filepath, options)
74
- work = hide_metadata_for_work(metadata, options)
139
+ metadata = generate_metadata_for_work(url, options)
140
+ return "DOI #{metadata["doi"]} not changed for #{filename}" unless metadata["doi"] && metadata["date_issued"]
75
141
 
76
- # return "Errors for DOI #{old_metadata["doi"]}:\n#{work.validation_errors}" if work.validation_errors.present?
142
+ response = hide_metadata_for_work(metadata, options)
143
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
77
144
 
78
145
  new_metadata = Bergamasco::Markdown.update_file(filepath, "published" => false)
79
- "DOI #{old_metadata["doi"]} hidden for #{filename}"
146
+ "DOI #{metadata["doi"]} hidden for #{filename}"
80
147
  end
81
148
 
82
- def mint_dois_for_all_files(folderpath, options={})
83
- Dir.glob("#{folderpath}/*.md").map do |filepath|
84
- mint_doi_for_file(filepath, options)
149
+ def mint_dois_for_all_urls(urls, options={})
150
+ urls.map do |url|
151
+ mint_doi_for_url(url, options)
85
152
  end.join("\n")
86
153
  end
87
154
 
88
- def mint_and_hide_dois_for_all_files(folderpath, options={})
89
- Dir.glob("#{folderpath}/*.md").map do |filepath|
90
- mint_and_hide_doi_for_file(filepath, options)
155
+ def mint_and_hide_dois_for_all_urls(urls, options={})
156
+ urls.map do |url|
157
+ mint_and_hide_doi_for_url(url, options)
91
158
  end.join("\n")
92
159
  end
93
160
 
94
- def hide_dois_for_all_files(folderpath, options={})
95
- Dir.glob("#{folderpath}/*.md").map do |filepath|
96
- hide_doi_for_file(filepath, options)
161
+ def hide_dois_for_all_urls(urls, options={})
162
+ urls.map do |url|
163
+ hide_doi_for_url(url, options)
97
164
  end.join("\n")
98
165
  end
99
166
 
100
- def generate_metadata_for_work(filepath, options={})
101
- sitepath = options[:sitepath] || ENV['SITE_SITEPATH'] || "data/site.yml"
102
- authorpath = options[:authorpath] || ENV['SITE_AUTHORPATH'] || "data/authors.yml"
103
- referencespath = options[:referencespath] || ENV['SITE_REFERENCESPATH'] || "data/references.yaml"
104
- csl = options[:csl] || ENV['SITE_CSLPATH'] || "styles/apa.csl"
105
- options = options.merge(csl: csl, bibliography: referencespath)
167
+ def generate_metadata_for_work(url, options={})
168
+ doc = Nokogiri::HTML(open(url))
169
+ json = doc.at_xpath("//script[@type='application/ld+json']")
170
+ return "Error: no schema.org metadata found" unless json.present?
106
171
 
107
- metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath, options.except(:number))
172
+ metadata = ActiveSupport::JSON.decode(json.text)
108
173
 
109
- return "Error: required metadata missing" unless ["author", "title", "date", "summary"].all? { |k| metadata.key? k }
110
-
111
- # read in optional yaml configuration files for site, author and references
112
- site_options = Bergamasco::Markdown.read_yaml(sitepath) || {}
113
- author_options = Bergamasco::Markdown.read_yaml(authorpath) || {}
114
- references = Bergamasco::Markdown.read_yaml(referencespath) || {}
174
+ return "Error: required metadata missing" unless ["name", "author", "publisher", "datePublished", "@type"].all? { |k| metadata.key? k }
115
175
 
116
176
  # required metadata
117
- prefix = options[:prefix] || site_options["prefix"] || ENV['PREFIX']
118
- metadata["doi"] ||= encode_doi(prefix, options)
119
-
120
- site_url = site_options["site_url"] || ENV['SITE_URL']
121
- metadata["url"] ||= url_from_path(site_url, filepath)
122
-
123
- metadata["creators"] = Array(metadata["author"]).map do |a|
124
- author = author_options.fetch(a, {})
125
- if author.present?
126
- { given_name: author["given"],
127
- family_name: author["family"],
128
- orcid: author["orcid"] }
129
- else
130
- { literal: a }
131
- end
177
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(metadata["@id"])
178
+ uri = Addressable::URI.parse(metadata["@id"])
179
+ metadata["doi"] = uri.path[1..-1].upcase
132
180
  end
133
181
 
134
- metadata["publisher"] = site_options["site_title"] || ENV['SITE_TITLE']
135
- metadata["publication_year"] = metadata["date"][0..3].to_i
182
+ metadata["title"] = metadata["name"]
183
+
184
+ metadata["creators"] = format_authors(metadata["author"])
185
+
186
+ metadata["publisher"] = metadata.fetch("publisher", {}).fetch("name", nil)
187
+ metadata["publication_year"] = metadata.fetch("datePublished", "")[0..3].to_i
136
188
 
137
- metadata["type"] ||= site_options["default_type"] || ENV['SITE_DEFAULT_TYPE'] || "BlogPosting"
138
- resource_type_general = metadata["type"] == "Dataset" ? "Dataset" : "Text"
189
+ resource_type_general = metadata["@type"] == "Dataset" ? "Dataset" : "Text"
139
190
 
140
- metadata["resource_type"] = { value: metadata["type"],
191
+ metadata["resource_type"] = { value: metadata["@type"],
141
192
  resource_type_general: resource_type_general }
142
193
 
143
194
  # recommended metadata
144
- metadata["descriptions"] = [{ value: metadata["summary"],
145
- description_type: "Abstract" }]
195
+
196
+ # use alternate_identifier to generate DOI
197
+ metadata["alternate_identifier"] = metadata["alternateName"]
198
+
199
+ if metadata["description"].present?
200
+ metadata["descriptions"] = [{ value: metadata["description"], description_type: "Abstract" }]
201
+ end
146
202
 
147
203
  # use default version 1.0
148
204
  metadata["version"] ||= "1.0"
149
205
 
150
206
  # fetch reference metadata if available
151
- metadata["related_identifiers"] = Array(metadata["references"]).map do |r|
152
- reference = references.fetch(r, {})
153
- if reference.present?
154
- if reference["DOI"].present?
155
- value = reference["DOI"].upcase
156
- type = "DOI"
157
- elsif /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(reference["URL"])
158
- uri = Addressable::URI.parse(reference["URL"])
159
- value = uri.path[1..-1].upcase
160
- type = "DOI"
161
- elsif reference["URL"].present?
162
- value = reference["URL"]
163
- type = "URL"
164
- else
165
- type = nil
166
- end
167
- else
168
- if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(r)
169
- uri = Addressable::URI.parse(r)
170
- value = uri.path[1..-1].upcase
171
- type = "DOI"
172
- elsif /(http|https):\/\//.match(r)
173
- uri = Addressable::URI.parse(r)
174
- value = uri.normalize.to_s
175
- type = "URL"
176
- else
177
- type = nil
178
- end
179
- end
180
-
181
- {
182
- value: value,
183
- related_identifier_type: type,
184
- relation_type: "References"
185
- }
186
- end.select { |t| t[:related_identifier_type].present? }
187
-
188
- metadata["license_name"] = site_options.fetch("license", {}).fetch("name", nil) || ENV['SITE_LICENCE_NAME'] || "Creative Commons Attribution"
189
- metadata["license_url"] = site_options.fetch("license", {}).fetch("url", nil) || ENV['SITE_LICENCE_URL'] || "https://creativecommons.org/licenses/by/4.0/"
190
- metadata["rights_list"] = [{ value: metadata["license_name"], rights_uri: metadata["license_url"] }]
207
+ metadata["related_identifiers"] = get_related_identifiers(metadata)
191
208
 
192
- metadata["subjects"] = Array(metadata["tags"]).select { |t| t != "featured" }
209
+ if metadata["license"].present?
210
+ value = LICENSES.fetch(metadata["license_url"], nil)
211
+ metadata["rights_list"] = [{ value: value, rights_uri: metadata["license"] }] if value.present?
212
+ end
193
213
 
194
- contributor = site_options["institution"] || ENV['SITE_INSTITUTION']
195
- metadata["contributors"] = [{ literal: contributor, contributor_type: "HostingInstitution" }]
214
+ if metadata["keywords"].present?
215
+ metadata["subjects"] = Array(metadata["keywords"].split(", ")).select { |k| k != "featured" }
216
+ end
196
217
 
197
- metadata["date_issued"] = metadata["date"]
198
- metadata["publication_month"] = metadata["date"][5..6]
199
- metadata["publication_day"] = metadata["date"][8..9]
218
+ metadata["date_created"] = metadata["dateCreated"]
219
+ metadata["date_issued"] = metadata["datePublished"]
220
+ metadata["date_updated"] = metadata["dateModified"]
200
221
 
201
- metadata = metadata.extract!(*%w(doi url creators title publisher
202
- publication_year publication_month publication_day resource_type descriptions version license_name license_url rights_list subjects contributors
203
- date_issued related_identifiers))
204
- end
205
-
206
- def url_from_path(site_url, filepath)
207
- site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
222
+ metadata = metadata.extract!(*%w(doi alternate_identifier url creators title
223
+ publisher publication_year resource_type descriptions version rights_list
224
+ subjects date_issued date_created date_updated related_identifiers))
208
225
  end
209
226
 
210
227
  def post_metadata_for_work(metadata, options={})
228
+ prefix = options[:prefix] || ENV['PREFIX']
229
+ metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
230
+
211
231
  work = Cirneco::Work.new(metadata)
212
- return work.validation_errors if work.validation_errors.present?
232
+ return work.validation_errors if work.validation_errors.body["errors"].present?
213
233
 
214
234
  response = work.post_metadata(work.data, options)
215
235
  return response unless response.status == 201
@@ -218,8 +238,11 @@ module Cirneco
218
238
  end
219
239
 
220
240
  def post_and_hide_metadata_for_work(metadata, options={})
241
+ prefix = options[:prefix] || ENV['PREFIX']
242
+ metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
243
+
221
244
  work = Cirneco::Work.new(metadata)
222
- return work.validation_errors if work.validation_errors.present?
245
+ return work.validation_errors if work.validation_errors.body["errors"].present?
223
246
 
224
247
  response = work.post_metadata(work.data, options)
225
248
  return response unless response.status == 201
@@ -231,8 +254,11 @@ module Cirneco
231
254
  end
232
255
 
233
256
  def hide_metadata_for_work(metadata, options={})
257
+ prefix = options[:prefix] || ENV['PREFIX']
258
+ metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
259
+
234
260
  work = Cirneco::Work.new(metadata)
235
- return work.validation_errors if work.validation_errors.present?
261
+ return work.validation_errors if work.validation_errors.body["errors"].present?
236
262
 
237
263
  work.delete_metadata(metadata["doi"], options)
238
264
  end
@@ -240,5 +266,55 @@ module Cirneco
240
266
  def generate_jats(filepath, options={})
241
267
  Bergamasco::Pandoc.write_jats(filepath, options)
242
268
  end
269
+
270
+ def url_from_path(site_url, filepath)
271
+ site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
272
+ end
273
+
274
+ def format_authors(authors)
275
+ Array(authors).map do |author|
276
+ orcid = orcid_from_url(author["@id"])
277
+ name = (author["givenName"].present? || author["familyName"].present?) ? nil : author["name"]
278
+
279
+ { given_name: author["givenName"],
280
+ family_name: author["familyName"],
281
+ name: name,
282
+ orcid: orcid }.compact
283
+ end
284
+ end
285
+
286
+ def get_related_identifiers(metadata)
287
+ citations = Array(metadata["citation"])
288
+ parts = Array(metadata["IsPartOf"]).map { |r| r["relation_type"] = "IsPartOf" }
289
+
290
+ (citations + parts).map do |r|
291
+ id = r.fetch("@id", "")
292
+ relation_type = r.fetch("relation_type", "References")
293
+
294
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(id)
295
+ uri = Addressable::URI.parse(id)
296
+ value = uri.path[1..-1].upcase
297
+ type = "DOI"
298
+ elsif /(http|https):\/\//.match(id)
299
+ uri = Addressable::URI.parse(id)
300
+ value = uri.normalize.to_s
301
+ type = "URL"
302
+ else
303
+ type = nil
304
+ end
305
+
306
+ {
307
+ value: value,
308
+ related_identifier_type: type,
309
+ relation_type: relation_type
310
+ }
311
+ end.select { |t| t[:related_identifier_type].present? }
312
+ end
313
+
314
+ def orcid_from_url(url)
315
+ return nil unless url.present?
316
+
317
+ Array(/\Ahttp:\/\/orcid\.org\/(.+)/.match(url)).last
318
+ end
243
319
  end
244
320
  end
@@ -1,3 +1,3 @@
1
1
  module Cirneco
2
- VERSION = "0.7.4"
2
+ VERSION = "0.8.1"
3
3
  end