cirneco 0.7.4 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +4 -4
  3. data/lib/cirneco/api.rb +8 -8
  4. data/lib/cirneco/base.rb +1 -1
  5. data/lib/cirneco/doi.rb +20 -9
  6. data/lib/cirneco/utils.rb +204 -128
  7. data/lib/cirneco/version.rb +1 -1
  8. data/lib/cirneco/work.rb +19 -7
  9. data/spec/api_spec.rb +10 -10
  10. data/spec/doi_spec.rb +29 -27
  11. data/spec/fixtures/cool-dois-minted.html +404 -0
  12. data/spec/fixtures/cool-dois-minted.html.md +99 -0
  13. data/spec/fixtures/cool-dois-missing-metadata.html +356 -0
  14. data/spec/fixtures/cool-dois-no-json-ld.html +352 -0
  15. data/spec/fixtures/cool-dois.html +404 -0
  16. data/spec/fixtures/cool-dois.html.md +1 -0
  17. data/spec/fixtures/index.html +271 -0
  18. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/get/should_get_all_dois_by_prefix.yml +499 -4
  19. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_all_urls.yml +44 -0
  20. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_for_url.yml +44 -0
  21. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_hide_metadata_for_work.yml +8 -4
  22. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_all_urls.yml +130 -0
  23. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_and_hide_for_url.yml +130 -0
  24. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_all_urls.yml +130 -0
  25. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_mint_for_url.yml +130 -0
  26. data/spec/fixtures/vcr_cassettes/Cirneco_DataCenter/mint_and_hide_DOIs/should_post_metadata_for_work.yml +25 -20
  27. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_all_dois.yml +499 -4
  28. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/get/should_get_doi.yml +8 -4
  29. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/MDS_DOI_API/put/should_put_doi.yml +8 -4
  30. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_a_doi.yml +44 -0
  31. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/hides_dois_for_list_of_urls.yml +44 -0
  32. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_a_doi.yml +130 -0
  33. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_a_doi.yml +130 -0
  34. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_and_hides_dois_for_list_of_urls.yml +130 -0
  35. data/spec/fixtures/vcr_cassettes/Cirneco_Doi/mint_and_hide_DOIs/mints_dois_for_list_of_urls.yml +130 -0
  36. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_all_dois.yml +499 -4
  37. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/get/should_get_doi.yml +8 -4
  38. data/spec/fixtures/vcr_cassettes/Cirneco_Work/DOI_API/put/should_put_doi.yml +8 -4
  39. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/get/should_get_media.yml +8 -4
  40. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Media_API/post/should_post_media.yml +8 -4
  41. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/delete/should_delete_metadata.yml +8 -4
  42. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/get/should_get_metadata.yml +10 -5
  43. data/spec/fixtures/vcr_cassettes/Cirneco_Work/Metadata_API/post/should_post_metadata.yml +11 -6
  44. data/spec/utils_spec.rb +105 -48
  45. data/spec/work_spec.rb +4 -4
  46. metadata +20 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a82c5b7e10ab13eabc1913ab86f856bf1d28ee68
4
- data.tar.gz: 38c61337cd19f6623ad835182baccaf2607f260f
3
+ metadata.gz: 0b25496604d7050477dbcd531561118874f76705
4
+ data.tar.gz: f3e38823524d96cd5382a5a54d3838daaf8c3b42
5
5
  SHA512:
6
- metadata.gz: 2dbc92ffc86523a271cb1c69e2af2778f56bf2b0ca4c61714f3825fcbc84ee52ce7af2b42cd9edd1ead906e9a2620feda76de8dfec622698b74b2ab8a7910cf1
7
- data.tar.gz: 00c3050b139c651521e9e1d0c0a454002f0fff2ff5f5b1a018a0e8aefcc4a258a009c2c7a03c332c1bb762dbc3176a941d8bdce961b7680a056f5c2d6b8967e1
6
+ metadata.gz: 8b521bfb2aceebdb52d8248dccd6de030976413720d392748358620e7527c33398224a4a33aea7d7a4dca234c8506da408aceb00f5ac1a47e82177e034392c70
7
+ data.tar.gz: ada9b59941a9c2f3182736cb3ba54b2c44d3b16226a20abc51be56dc0a5b6f0e6949311c6b5ca9a20e396fe624fd872c2d152abe6fd99f823d0f164d27f46b1c
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cirneco (0.7.4)
4
+ cirneco (0.8.1)
5
5
  activesupport (~> 4.2, >= 4.2.5)
6
6
  base32-crockford-checksum (~> 0.2.2)
7
7
  bergamasco (~> 0.3)
@@ -22,7 +22,7 @@ GEM
22
22
  tzinfo (~> 1.1)
23
23
  addressable (2.3.8)
24
24
  base32-crockford-checksum (0.2.3)
25
- bergamasco (0.3.4)
25
+ bergamasco (0.3.5)
26
26
  activesupport (~> 4.2, >= 4.2.5)
27
27
  addressable (~> 2.3.8)
28
28
  builder (~> 3.2, >= 3.2.2)
@@ -33,7 +33,7 @@ GEM
33
33
  pandoc-ruby (~> 2.0, >= 2.0.0)
34
34
  safe_yaml (~> 1.0, >= 1.0.4)
35
35
  builder (3.2.2)
36
- codeclimate-test-reporter (1.0.3)
36
+ codeclimate-test-reporter (1.0.4)
37
37
  simplecov
38
38
  crack (0.4.3)
39
39
  safe_yaml (~> 1.0.0)
@@ -47,7 +47,7 @@ GEM
47
47
  faraday
48
48
  faraday_middleware (0.10.1)
49
49
  faraday (>= 0.7.4, < 1.0)
50
- hashdiff (0.3.1)
50
+ hashdiff (0.3.2)
51
51
  i18n (0.7.0)
52
52
  json (1.8.3)
53
53
  loofah (2.0.3)
data/lib/cirneco/api.rb CHANGED
@@ -6,7 +6,7 @@ module Cirneco
6
6
  def post_metadata(data, options={})
7
7
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
8
8
 
9
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
9
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
10
10
 
11
11
  url = "#{mds_url}/metadata"
12
12
  Maremma.post(url, content_type: 'application/xml;charset=UTF-8', data: data, username: options[:username], password: options[:password])
@@ -15,7 +15,7 @@ module Cirneco
15
15
  def get_metadata(doi, options={})
16
16
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
17
17
 
18
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
18
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
19
19
 
20
20
  url = "#{mds_url}/metadata/#{doi}"
21
21
  Maremma.get(url, accept: 'application/xml', username: options[:username], password: options[:password], raw: true)
@@ -24,7 +24,7 @@ module Cirneco
24
24
  def delete_metadata(doi, options={})
25
25
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
26
26
 
27
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
27
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
28
28
 
29
29
  url = "#{mds_url}/metadata/#{doi}"
30
30
  Maremma.delete(url, username: options[:username], password: options[:password])
@@ -35,7 +35,7 @@ module Cirneco
35
35
 
36
36
  payload = "doi=#{doi}\nurl=#{options[:url]}"
37
37
 
38
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
38
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
39
39
 
40
40
  url = "#{mds_url}/doi/#{doi}"
41
41
  Maremma.put(url, content_type: 'text/plain;charset=UTF-8', data: payload, username: options[:username], password: options[:password])
@@ -44,7 +44,7 @@ module Cirneco
44
44
  def get_doi(doi, options={})
45
45
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
46
46
 
47
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
47
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
48
48
 
49
49
  url = "#{mds_url}/doi/#{doi}"
50
50
  Maremma.get(url, username: options[:username], password: options[:password])
@@ -53,7 +53,7 @@ module Cirneco
53
53
  def get_dois(options={})
54
54
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
55
55
 
56
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
56
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
57
57
 
58
58
  url = "#{mds_url}/doi"
59
59
  response = Maremma.get(url, username: options[:username], password: options[:password])
@@ -66,7 +66,7 @@ module Cirneco
66
66
 
67
67
  payload = options[:raw] ? options[:media] : options[:media].map { |m| "#{m[:mime_type]}=#{m[:url]}" }.join("\n")
68
68
 
69
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
69
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
70
70
 
71
71
  url = "#{mds_url}/media/#{doi}"
72
72
  Maremma.post(url, content_type: 'text/plain;charset=UTF-8', data: payload, username: options[:username], password: options[:password])
@@ -75,7 +75,7 @@ module Cirneco
75
75
  def get_media(doi, options={})
76
76
  return OpenStruct.new(body: { "errors" => [{ "title" => "Username or password missing" }] }) unless options[:username].present? && options[:password].present?
77
77
 
78
- mds_url = options[:sandbox] ? 'https://mds.test.datacite.org' : 'https://mds.datacite.org'
78
+ mds_url = options[:sandbox] ? 'https://mds-sandbox.datacite.org' : 'https://mds.datacite.org'
79
79
 
80
80
  url = "#{mds_url}/media/#{doi}"
81
81
  response = Maremma.get(url, accept: 'application/xml', username: options[:username], password: options[:password])
data/lib/cirneco/base.rb CHANGED
@@ -16,6 +16,6 @@ module Cirneco
16
16
  end
17
17
 
18
18
  # default values for some ENV variables
19
- ENV['MDS_URL'] ||= "https://mds.test.datacite.org"
19
+ ENV['MDS_URL'] ||= "https://mds-sandbox.datacite.org"
20
20
  end
21
21
  end
data/lib/cirneco/doi.rb CHANGED
@@ -14,6 +14,7 @@ module Cirneco
14
14
  method_option :username, :default => ENV['MDS_USERNAME']
15
15
  method_option :password, :default => ENV['MDS_PASSWORD']
16
16
  method_option :sandbox, :type => :boolean, :force => false
17
+ method_option :limit, :type => :numeric, :default => 25
17
18
  def get(doi)
18
19
  if doi == "all"
19
20
  response = get_dois(options)
@@ -23,6 +24,8 @@ module Cirneco
23
24
 
24
25
  if response.body["errors"]
25
26
  puts "Error: " + response.body["errors"].first.fetch("title", "An error occured")
27
+ elsif doi == "all"
28
+ puts response.body["data"][0...options[:limit]]
26
29
  else
27
30
  puts response.body["data"]
28
31
  end
@@ -54,6 +57,14 @@ module Cirneco
54
57
  end
55
58
  end
56
59
 
60
+ desc "generate DOI", "generate a DOI name"
61
+ method_option :lower_limit, :type => :numeric, :default => 0
62
+ method_option :namespace, :default => 'MS-'
63
+ method_option :number, :type => :numeric, :aliases => '-n'
64
+ def accession_number
65
+ puts generate_accession_number(options)
66
+ end
67
+
57
68
  desc "decode DOI", "decode DOI encoded using Crockford base32 algorithm"
58
69
  def decode(doi)
59
70
  number = decode_doi(doi)
@@ -86,10 +97,10 @@ module Cirneco
86
97
  method_option :sandbox, :type => :boolean, :force => false
87
98
  def mint(filepath)
88
99
 
89
- if File.directory?(filepath)
90
- response = mint_dois_for_all_files(filepath, options)
100
+ if filepath.is_a?(Array)
101
+ response = mint_dois_for_all_urls(filepath, options)
91
102
  else
92
- response = mint_doi_for_file(filepath, options)
103
+ response = mint_doi_for_url(filepath, options)
93
104
  end
94
105
 
95
106
  puts response
@@ -107,10 +118,10 @@ module Cirneco
107
118
  method_option :sandbox, :type => :boolean, :force => false
108
119
  def mint_and_hide(filepath)
109
120
 
110
- if File.directory?(filepath)
111
- response = mint_and_hide_dois_for_all_files(filepath, options)
121
+ if filepath.is_a?(Array)
122
+ response = mint_and_hide_dois_for_all_urls(filepath, options)
112
123
  else
113
- response = mint_and_hide_doi_for_file(filepath, options)
124
+ response = mint_and_hide_doi_for_url(filepath, options)
114
125
  end
115
126
 
116
127
  puts response
@@ -127,10 +138,10 @@ module Cirneco
127
138
  method_option :sandbox, :type => :boolean, :force => false
128
139
  def hide(filepath)
129
140
 
130
- if File.directory?(filepath)
131
- response = hide_dois_for_all_files(filepath, options)
141
+ if filepath.is_a?(Array)
142
+ response = hide_dois_for_all_urls(filepath, options)
132
143
  else
133
- response = hide_doi_for_file(filepath, options)
144
+ response = hide_doi_for_url(filepath, options)
134
145
  end
135
146
 
136
147
  puts response
data/lib/cirneco/utils.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'base32/crockford'
2
2
  require 'securerandom'
3
3
  require 'bergamasco'
4
+ require 'open-uri'
4
5
  require 'time'
5
6
 
6
7
  module Cirneco
@@ -8,6 +9,11 @@ module Cirneco
8
9
  # "ZZZZZZZ" decoded as number
9
10
  UPPER_LIMIT = 34359738367
10
11
 
12
+ LICENSES = {
13
+ "https://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution (CC-BY 4.0)",
14
+ "https://creativecommons.org/publicdomain/zero/1.0/" => "Creative Commons Public Domain Dedication (CC0 1.0)"
15
+ }
16
+
11
17
  def get_dois_by_prefix(prefix, options={})
12
18
  response = get_dois(options)
13
19
 
@@ -23,193 +29,207 @@ module Cirneco
23
29
  end
24
30
 
25
31
  def encode_doi(prefix, options={})
26
- number = options[:number] || SecureRandom.random_number(UPPER_LIMIT)
32
+ number = options[:number].to_s.scan(/\d+/).first.to_i
33
+ number = SecureRandom.random_number(UPPER_LIMIT) unless number > 0
27
34
  prefix.to_s + "/" + Base32::Crockford.encode(number, split: 4, length: 8, checksum: true)
28
35
  end
29
36
 
30
- # currently only supports markdown files with YAML header
31
- def mint_doi_for_file(filepath, options={})
37
+ def generate_accession_number(options={})
38
+ lower_limit = options[:lower_limit] || 0
39
+ namespace = options[:namespace] || 'MS-'
40
+ registered_numbers = options[:registered_numbers] || []
41
+
42
+ if options[:number]
43
+ number = options[:number].to_s
44
+ else
45
+ begin
46
+ number = SecureRandom.random_number(1000000) + lower_limit
47
+ end while registered_numbers.include? number
48
+ number = number.to_s
49
+ end
50
+
51
+ number = number.to_s.rjust(options[:length], '0') if options[:length]
52
+
53
+ if options[:split]
54
+ number = number.reverse
55
+ number = number.scan(/.{1,#{options[:split]}}/).map { |x| x.reverse }
56
+ number = number.reverse.join("-")
57
+ end
58
+
59
+ namespace + number
60
+ end
61
+
62
+ def get_accession_number(filepath)
63
+ metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
64
+ metadata.fetch("accession_number", nil).to_s.scan(/\d+/).first.to_i
65
+ end
66
+
67
+ def get_all_accession_numbers(folderpath)
68
+ Dir.glob("#{folderpath}/*.md").map do |filepath|
69
+ get_accession_number(filepath)
70
+ end.select { |a| a > 0 }.sort
71
+ end
72
+
73
+ def update_accession_number(filepath, options={})
32
74
  filename = File.basename(filepath)
33
75
  return "File #{filename} ignored: not a markdown file" unless File.extname(filepath) == ".md"
34
76
 
35
77
  old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
36
- return "DOI #{old_metadata["doi"]} not changed for #{filename}" if old_metadata["doi"] && old_metadata["published"]
78
+ return "Accession number #{old_metadata["accession_number"]} not changed for #{filename}" if old_metadata["accession_number"]
79
+
80
+ if old_metadata["doi"].present?
81
+ number = decode_doi(old_metadata["doi"])
82
+ options[:number] = number if number > 0
83
+ else
84
+ folderpath = File.dirname(filepath)
85
+ options[:registered_numbers] = get_all_accession_numbers(folderpath)
86
+ end
87
+
88
+ accession_number = generate_accession_number(options)
89
+
90
+ new_metadata = Bergamasco::Markdown.update_file(filepath, "accession_number" => accession_number)
91
+ "Accession number #{new_metadata["accession_number"]} generated for #{filename}"
92
+ end
93
+
94
+ def update_all_accession_numbers(folderpath)
95
+ Dir.glob("#{folderpath}/*.md").map do |filepath|
96
+ update_accession_number(filepath)
97
+ end
98
+ end
99
+
100
+ # fetch schema.org metadata in JSON-LD format to mint DOI
101
+ def mint_doi_for_url(url, options={})
102
+ filename = File.basename(url)
103
+ source_path = options[:source_path] || "/"
104
+ filepath = Dir.pwd + source_path + filename + ".md"
37
105
 
38
- metadata = generate_metadata_for_work(filepath, options)
39
- work = post_metadata_for_work(metadata, options)
106
+ metadata = generate_metadata_for_work(url, options)
107
+ return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["date_issued"]
40
108
 
41
- # return "Errors for DOI #{metadata["doi"]}:\n#{work.validation_errors}" if work.validation_errors.present?
109
+ response = post_metadata_for_work(metadata, options)
110
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
42
111
 
43
112
  new_metadata = Bergamasco::Markdown.update_file(filepath, "doi" => metadata["doi"], "published" => true)
44
113
  "DOI #{new_metadata["doi"]} minted for #{filename}"
45
114
  end
46
115
 
47
- # currently only supports markdown files with YAML header
48
- def mint_and_hide_doi_for_file(filepath, options={})
49
- filename = File.basename(filepath)
50
- return "File #{filename} ignored: not a markdown file" unless File.extname(filepath) == ".md"
51
-
52
- old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
53
- return "DOI #{old_metadata["doi"]} not changed for #{filename}" if old_metadata["doi"] && old_metadata["published"]
116
+ # fetch schema.org metadata in JSON-LD format to mint DOI
117
+ def mint_and_hide_doi_for_url(url, options={})
118
+ filename = File.basename(url)
119
+ source_path = options[:source_path] || "/"
120
+ filepath = Dir.pwd + source_path + filename + ".md"
54
121
 
55
- metadata = generate_metadata_for_work(filepath, options)
56
- work = post_and_hide_metadata_for_work(metadata, options)
122
+ metadata = generate_metadata_for_work(url, options)
123
+ return "DOI #{metadata["doi"]} not changed for #{filename}" if metadata["doi"] && metadata["date_issued"]
57
124
 
58
- # return "Errors for DOI #{metadata["doi"]}:\n#{work.validation_errors}" if work.validation_errors.present?
125
+ response = post_metadata_for_work(metadata, options)
126
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
59
127
 
60
128
  new_metadata = Bergamasco::Markdown.update_file(filepath, "doi" => metadata["doi"], "published" => false)
61
129
  "DOI #{new_metadata["doi"]} minted and hidden for #{filename}"
62
130
  end
63
131
 
64
- # currently only supports markdown files with YAML header
132
+ # fetch schema.org metadata in JSON-LD format to mint DOI
65
133
  # DOIs are never deleted, but we can remove the metadata from the DataCite index
66
- def hide_doi_for_file(filepath, options={})
67
- filename = File.basename(filepath)
68
- return "File #{filename} ignored: not a markdown file" unless File.extname(filepath) == ".md"
69
-
70
- old_metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath)
71
- return "DOI #{old_metadata["doi"]} not changed for #{filename}" unless old_metadata["doi"] && old_metadata["published"]
134
+ def hide_doi_for_url(url, options={})
135
+ filename = File.basename(url)
136
+ source_path = options[:source_path] || "/"
137
+ filepath = Dir.pwd + source_path + filename + ".md"
72
138
 
73
- metadata = generate_metadata_for_work(filepath, options)
74
- work = hide_metadata_for_work(metadata, options)
139
+ metadata = generate_metadata_for_work(url, options)
140
+ return "DOI #{metadata["doi"]} not changed for #{filename}" unless metadata["doi"] && metadata["date_issued"]
75
141
 
76
- # return "Errors for DOI #{old_metadata["doi"]}:\n#{work.validation_errors}" if work.validation_errors.present?
142
+ response = hide_metadata_for_work(metadata, options)
143
+ return "Errors for DOI #{metadata["doi"]}: #{response.body['errors'].first['title']}\n" if response.body['errors']
77
144
 
78
145
  new_metadata = Bergamasco::Markdown.update_file(filepath, "published" => false)
79
- "DOI #{old_metadata["doi"]} hidden for #{filename}"
146
+ "DOI #{metadata["doi"]} hidden for #{filename}"
80
147
  end
81
148
 
82
- def mint_dois_for_all_files(folderpath, options={})
83
- Dir.glob("#{folderpath}/*.md").map do |filepath|
84
- mint_doi_for_file(filepath, options)
149
+ def mint_dois_for_all_urls(urls, options={})
150
+ urls.map do |url|
151
+ mint_doi_for_url(url, options)
85
152
  end.join("\n")
86
153
  end
87
154
 
88
- def mint_and_hide_dois_for_all_files(folderpath, options={})
89
- Dir.glob("#{folderpath}/*.md").map do |filepath|
90
- mint_and_hide_doi_for_file(filepath, options)
155
+ def mint_and_hide_dois_for_all_urls(urls, options={})
156
+ urls.map do |url|
157
+ mint_and_hide_doi_for_url(url, options)
91
158
  end.join("\n")
92
159
  end
93
160
 
94
- def hide_dois_for_all_files(folderpath, options={})
95
- Dir.glob("#{folderpath}/*.md").map do |filepath|
96
- hide_doi_for_file(filepath, options)
161
+ def hide_dois_for_all_urls(urls, options={})
162
+ urls.map do |url|
163
+ hide_doi_for_url(url, options)
97
164
  end.join("\n")
98
165
  end
99
166
 
100
- def generate_metadata_for_work(filepath, options={})
101
- sitepath = options[:sitepath] || ENV['SITE_SITEPATH'] || "data/site.yml"
102
- authorpath = options[:authorpath] || ENV['SITE_AUTHORPATH'] || "data/authors.yml"
103
- referencespath = options[:referencespath] || ENV['SITE_REFERENCESPATH'] || "data/references.yaml"
104
- csl = options[:csl] || ENV['SITE_CSLPATH'] || "styles/apa.csl"
105
- options = options.merge(csl: csl, bibliography: referencespath)
167
+ def generate_metadata_for_work(url, options={})
168
+ doc = Nokogiri::HTML(open(url))
169
+ json = doc.at_xpath("//script[@type='application/ld+json']")
170
+ return "Error: no schema.org metadata found" unless json.present?
106
171
 
107
- metadata = Bergamasco::Markdown.read_yaml_for_doi_metadata(filepath, options.except(:number))
172
+ metadata = ActiveSupport::JSON.decode(json.text)
108
173
 
109
- return "Error: required metadata missing" unless ["author", "title", "date", "summary"].all? { |k| metadata.key? k }
110
-
111
- # read in optional yaml configuration files for site, author and references
112
- site_options = Bergamasco::Markdown.read_yaml(sitepath) || {}
113
- author_options = Bergamasco::Markdown.read_yaml(authorpath) || {}
114
- references = Bergamasco::Markdown.read_yaml(referencespath) || {}
174
+ return "Error: required metadata missing" unless ["name", "author", "publisher", "datePublished", "@type"].all? { |k| metadata.key? k }
115
175
 
116
176
  # required metadata
117
- prefix = options[:prefix] || site_options["prefix"] || ENV['PREFIX']
118
- metadata["doi"] ||= encode_doi(prefix, options)
119
-
120
- site_url = site_options["site_url"] || ENV['SITE_URL']
121
- metadata["url"] ||= url_from_path(site_url, filepath)
122
-
123
- metadata["creators"] = Array(metadata["author"]).map do |a|
124
- author = author_options.fetch(a, {})
125
- if author.present?
126
- { given_name: author["given"],
127
- family_name: author["family"],
128
- orcid: author["orcid"] }
129
- else
130
- { literal: a }
131
- end
177
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(metadata["@id"])
178
+ uri = Addressable::URI.parse(metadata["@id"])
179
+ metadata["doi"] = uri.path[1..-1].upcase
132
180
  end
133
181
 
134
- metadata["publisher"] = site_options["site_title"] || ENV['SITE_TITLE']
135
- metadata["publication_year"] = metadata["date"][0..3].to_i
182
+ metadata["title"] = metadata["name"]
183
+
184
+ metadata["creators"] = format_authors(metadata["author"])
185
+
186
+ metadata["publisher"] = metadata.fetch("publisher", {}).fetch("name", nil)
187
+ metadata["publication_year"] = metadata.fetch("datePublished", "")[0..3].to_i
136
188
 
137
- metadata["type"] ||= site_options["default_type"] || ENV['SITE_DEFAULT_TYPE'] || "BlogPosting"
138
- resource_type_general = metadata["type"] == "Dataset" ? "Dataset" : "Text"
189
+ resource_type_general = metadata["@type"] == "Dataset" ? "Dataset" : "Text"
139
190
 
140
- metadata["resource_type"] = { value: metadata["type"],
191
+ metadata["resource_type"] = { value: metadata["@type"],
141
192
  resource_type_general: resource_type_general }
142
193
 
143
194
  # recommended metadata
144
- metadata["descriptions"] = [{ value: metadata["summary"],
145
- description_type: "Abstract" }]
195
+
196
+ # use alternate_identifier to generate DOI
197
+ metadata["alternate_identifier"] = metadata["alternateName"]
198
+
199
+ if metadata["description"].present?
200
+ metadata["descriptions"] = [{ value: metadata["description"], description_type: "Abstract" }]
201
+ end
146
202
 
147
203
  # use default version 1.0
148
204
  metadata["version"] ||= "1.0"
149
205
 
150
206
  # fetch reference metadata if available
151
- metadata["related_identifiers"] = Array(metadata["references"]).map do |r|
152
- reference = references.fetch(r, {})
153
- if reference.present?
154
- if reference["DOI"].present?
155
- value = reference["DOI"].upcase
156
- type = "DOI"
157
- elsif /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(reference["URL"])
158
- uri = Addressable::URI.parse(reference["URL"])
159
- value = uri.path[1..-1].upcase
160
- type = "DOI"
161
- elsif reference["URL"].present?
162
- value = reference["URL"]
163
- type = "URL"
164
- else
165
- type = nil
166
- end
167
- else
168
- if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(r)
169
- uri = Addressable::URI.parse(r)
170
- value = uri.path[1..-1].upcase
171
- type = "DOI"
172
- elsif /(http|https):\/\//.match(r)
173
- uri = Addressable::URI.parse(r)
174
- value = uri.normalize.to_s
175
- type = "URL"
176
- else
177
- type = nil
178
- end
179
- end
180
-
181
- {
182
- value: value,
183
- related_identifier_type: type,
184
- relation_type: "References"
185
- }
186
- end.select { |t| t[:related_identifier_type].present? }
187
-
188
- metadata["license_name"] = site_options.fetch("license", {}).fetch("name", nil) || ENV['SITE_LICENCE_NAME'] || "Creative Commons Attribution"
189
- metadata["license_url"] = site_options.fetch("license", {}).fetch("url", nil) || ENV['SITE_LICENCE_URL'] || "https://creativecommons.org/licenses/by/4.0/"
190
- metadata["rights_list"] = [{ value: metadata["license_name"], rights_uri: metadata["license_url"] }]
207
+ metadata["related_identifiers"] = get_related_identifiers(metadata)
191
208
 
192
- metadata["subjects"] = Array(metadata["tags"]).select { |t| t != "featured" }
209
+ if metadata["license"].present?
210
+ value = LICENSES.fetch(metadata["license_url"], nil)
211
+ metadata["rights_list"] = [{ value: value, rights_uri: metadata["license"] }] if value.present?
212
+ end
193
213
 
194
- contributor = site_options["institution"] || ENV['SITE_INSTITUTION']
195
- metadata["contributors"] = [{ literal: contributor, contributor_type: "HostingInstitution" }]
214
+ if metadata["keywords"].present?
215
+ metadata["subjects"] = Array(metadata["keywords"].split(", ")).select { |k| k != "featured" }
216
+ end
196
217
 
197
- metadata["date_issued"] = metadata["date"]
198
- metadata["publication_month"] = metadata["date"][5..6]
199
- metadata["publication_day"] = metadata["date"][8..9]
218
+ metadata["date_created"] = metadata["dateCreated"]
219
+ metadata["date_issued"] = metadata["datePublished"]
220
+ metadata["date_updated"] = metadata["dateModified"]
200
221
 
201
- metadata = metadata.extract!(*%w(doi url creators title publisher
202
- publication_year publication_month publication_day resource_type descriptions version license_name license_url rights_list subjects contributors
203
- date_issued related_identifiers))
204
- end
205
-
206
- def url_from_path(site_url, filepath)
207
- site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
222
+ metadata = metadata.extract!(*%w(doi alternate_identifier url creators title
223
+ publisher publication_year resource_type descriptions version rights_list
224
+ subjects date_issued date_created date_updated related_identifiers))
208
225
  end
209
226
 
210
227
  def post_metadata_for_work(metadata, options={})
228
+ prefix = options[:prefix] || ENV['PREFIX']
229
+ metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
230
+
211
231
  work = Cirneco::Work.new(metadata)
212
- return work.validation_errors if work.validation_errors.present?
232
+ return work.validation_errors if work.validation_errors.body["errors"].present?
213
233
 
214
234
  response = work.post_metadata(work.data, options)
215
235
  return response unless response.status == 201
@@ -218,8 +238,11 @@ module Cirneco
218
238
  end
219
239
 
220
240
  def post_and_hide_metadata_for_work(metadata, options={})
241
+ prefix = options[:prefix] || ENV['PREFIX']
242
+ metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
243
+
221
244
  work = Cirneco::Work.new(metadata)
222
- return work.validation_errors if work.validation_errors.present?
245
+ return work.validation_errors if work.validation_errors.body["errors"].present?
223
246
 
224
247
  response = work.post_metadata(work.data, options)
225
248
  return response unless response.status == 201
@@ -231,8 +254,11 @@ module Cirneco
231
254
  end
232
255
 
233
256
  def hide_metadata_for_work(metadata, options={})
257
+ prefix = options[:prefix] || ENV['PREFIX']
258
+ metadata["doi"] ||= encode_doi(prefix, number: metadata["alternate_identifier"])
259
+
234
260
  work = Cirneco::Work.new(metadata)
235
- return work.validation_errors if work.validation_errors.present?
261
+ return work.validation_errors if work.validation_errors.body["errors"].present?
236
262
 
237
263
  work.delete_metadata(metadata["doi"], options)
238
264
  end
@@ -240,5 +266,55 @@ module Cirneco
240
266
  def generate_jats(filepath, options={})
241
267
  Bergamasco::Pandoc.write_jats(filepath, options)
242
268
  end
269
+
270
+ def url_from_path(site_url, filepath)
271
+ site_url.to_s.chomp("\\") + "/" + File.basename(filepath)[0..-9] + "/"
272
+ end
273
+
274
+ def format_authors(authors)
275
+ Array(authors).map do |author|
276
+ orcid = orcid_from_url(author["@id"])
277
+ name = (author["givenName"].present? || author["familyName"].present?) ? nil : author["name"]
278
+
279
+ { given_name: author["givenName"],
280
+ family_name: author["familyName"],
281
+ name: name,
282
+ orcid: orcid }.compact
283
+ end
284
+ end
285
+
286
+ def get_related_identifiers(metadata)
287
+ citations = Array(metadata["citation"])
288
+ parts = Array(metadata["IsPartOf"]).map { |r| r["relation_type"] = "IsPartOf" }
289
+
290
+ (citations + parts).map do |r|
291
+ id = r.fetch("@id", "")
292
+ relation_type = r.fetch("relation_type", "References")
293
+
294
+ if /(http|https):\/\/(dx\.)?doi\.org\/(\w+)/.match(id)
295
+ uri = Addressable::URI.parse(id)
296
+ value = uri.path[1..-1].upcase
297
+ type = "DOI"
298
+ elsif /(http|https):\/\//.match(id)
299
+ uri = Addressable::URI.parse(id)
300
+ value = uri.normalize.to_s
301
+ type = "URL"
302
+ else
303
+ type = nil
304
+ end
305
+
306
+ {
307
+ value: value,
308
+ related_identifier_type: type,
309
+ relation_type: relation_type
310
+ }
311
+ end.select { |t| t[:related_identifier_type].present? }
312
+ end
313
+
314
+ def orcid_from_url(url)
315
+ return nil unless url.present?
316
+
317
+ Array(/\Ahttp:\/\/orcid\.org\/(.+)/.match(url)).last
318
+ end
243
319
  end
244
320
  end
@@ -1,3 +1,3 @@
1
1
  module Cirneco
2
- VERSION = "0.7.4"
2
+ VERSION = "0.8.1"
3
3
  end