commonmeta-ruby 3.2.5 → 3.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +16 -2
- data/commonmeta.gemspec +1 -0
- data/lib/commonmeta/author_utils.rb +20 -3
- data/lib/commonmeta/cli.rb +17 -21
- data/lib/commonmeta/crossref_utils.rb +22 -18
- data/lib/commonmeta/readers/json_feed_reader.rb +31 -11
- data/lib/commonmeta/utils.rb +13 -3
- data/lib/commonmeta/version.rb +1 -1
- data/resources/crossref/common5.3.1.xsd +43 -35
- data/spec/author_utils_spec.rb +38 -0
- data/spec/cli_spec.rb +5 -7
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/json_feed/blog_post_uuid.yml +980 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/json_feed_unregistered/blog_post_uuid.yml +49 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed/unregistered_posts.yml +215 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/blogger_post.yml +20 -11
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_author_name_suffix.yml +215 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_doi.yml +14 -9
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_without_doi.yml +9 -8
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/jekyll_post.yml +15 -10
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/syldavia_gazette_post_with_references.yml +328 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/upstream_post_with_references.yml +824 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/wordpress_post.yml +14 -9
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/wordpress_post_with_references.yml +390 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/{json_feed_url → json_feed_unregistered_url}/all_posts.yml +11 -11
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/another_schema_org_from_front-matter.yml +103 -105
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/journal_article.yml +5 -5
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/journal_article_from_datacite.yml +4 -4
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_doi.yml +60 -9
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_upstream_blog.yml +55 -7
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_with_references.yml +824 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/posted_content.yml +17 -17
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/schema_org_from_another_science_blog.yml +12 -8
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/schema_org_from_front_matter.yml +178 -181
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/schema_org_from_upstream_blog.yml +104 -92
- data/spec/readers/json_feed_reader_spec.rb +106 -30
- data/spec/utils_spec.rb +23 -11
- data/spec/writers/crossref_xml_writer_spec.rb +31 -3
- metadata +25 -8
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/json_feed_url/front-matter_blog.yml +0 -221
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_item_from_rogue_scholar_with_doi.yml +0 -163
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_item_from_upstream_blog.yml +0 -243
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_post_from_rogue_scholar_with_doi.yml +0 -210
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_post_from_upstream_blog.yml +0 -290
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7a50ad8ff25b9ff753b60cc9a34f578ae064cedc2417a5db146898e8202f8dc3
|
|
4
|
+
data.tar.gz: cc7f32e49a6d0621a30884a8226a9dda04cd5b13ed6545aefc28fd8f19386acb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 16d052d4e0dda3aed49235f43914f3d88e46627741500edb987cbf6dad00091234aa2304541f8ba6d713b05c9b158717d2c5632aa024e60dc694f7126752dda1
|
|
7
|
+
data.tar.gz: d6a7044f3b3722dc71f22f8c98e12d3f5c5e79b82bdf1aff745ccb74ecbd4febd2d53926924b635b97334c30df233dd793802dabfe0ea038f387488acf0ae4ef
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
commonmeta-ruby (3.2.
|
|
4
|
+
commonmeta-ruby (3.2.7)
|
|
5
5
|
activesupport (>= 4.2.5, < 8.0)
|
|
6
6
|
addressable (~> 2.8.1, < 2.8.2)
|
|
7
7
|
base32-url (>= 0.5.0, < 1)
|
|
@@ -10,6 +10,7 @@ PATH
|
|
|
10
10
|
citeproc-ruby (~> 2.0)
|
|
11
11
|
csl-styles (~> 2.0)
|
|
12
12
|
edtf (~> 3.0, >= 3.0.4)
|
|
13
|
+
feedparser (~> 2.2)
|
|
13
14
|
gender_detector (~> 2.0)
|
|
14
15
|
http (~> 5.1, >= 5.1.1)
|
|
15
16
|
json-ld-preloaded (~> 3.2, >= 3.2.2)
|
|
@@ -57,7 +58,7 @@ GEM
|
|
|
57
58
|
docile (1.4.0)
|
|
58
59
|
domain_name (0.5.20190701)
|
|
59
60
|
unf (>= 0.0.5, < 1.0.0)
|
|
60
|
-
ebnf (2.3.
|
|
61
|
+
ebnf (2.3.4)
|
|
61
62
|
htmlentities (~> 4.3)
|
|
62
63
|
rdf (~> 3.2)
|
|
63
64
|
scanf (~> 1.0)
|
|
@@ -65,6 +66,9 @@ GEM
|
|
|
65
66
|
unicode-types (~> 1.8)
|
|
66
67
|
edtf (3.1.1)
|
|
67
68
|
activesupport (>= 3.0, < 8.0)
|
|
69
|
+
feedparser (2.2.0)
|
|
70
|
+
logutils (>= 0.6.1)
|
|
71
|
+
textutils (>= 1.0.0)
|
|
68
72
|
ffi (1.15.5)
|
|
69
73
|
ffi-compiler (1.0.1)
|
|
70
74
|
ffi (>= 1.0.0)
|
|
@@ -86,6 +90,7 @@ GEM
|
|
|
86
90
|
http-form_data (2.3.0)
|
|
87
91
|
i18n (1.14.1)
|
|
88
92
|
concurrent-ruby (~> 1.0)
|
|
93
|
+
iniparser (1.0.1)
|
|
89
94
|
json (2.6.3)
|
|
90
95
|
json-canonicalization (0.3.2)
|
|
91
96
|
json-ld (3.2.5)
|
|
@@ -110,6 +115,7 @@ GEM
|
|
|
110
115
|
llhttp-ffi (0.4.0)
|
|
111
116
|
ffi-compiler (~> 1.0)
|
|
112
117
|
rake (~> 13.0)
|
|
118
|
+
logutils (0.6.1)
|
|
113
119
|
loofah (2.21.3)
|
|
114
120
|
crass (~> 1.0.2)
|
|
115
121
|
nokogiri (>= 1.12.0)
|
|
@@ -129,6 +135,8 @@ GEM
|
|
|
129
135
|
addressable (>= 2.4.0)
|
|
130
136
|
nokogiri (>= 1.8.0)
|
|
131
137
|
public_suffix (>= 4.0.0, < 5)
|
|
138
|
+
props (1.2.0)
|
|
139
|
+
iniparser (>= 0.1.0)
|
|
132
140
|
public_suffix (4.0.7)
|
|
133
141
|
racc (1.7.0)
|
|
134
142
|
rack (3.0.7)
|
|
@@ -205,6 +213,7 @@ GEM
|
|
|
205
213
|
rubocop-capybara (~> 2.17)
|
|
206
214
|
rubocop-factory_bot (~> 2.22)
|
|
207
215
|
ruby-progressbar (1.13.0)
|
|
216
|
+
rubyzip (2.3.2)
|
|
208
217
|
scanf (1.0.0)
|
|
209
218
|
simplecov (0.22.0)
|
|
210
219
|
docile (~> 1.1)
|
|
@@ -218,6 +227,11 @@ GEM
|
|
|
218
227
|
matrix (~> 0.4)
|
|
219
228
|
rdf (~> 3.2)
|
|
220
229
|
temple (0.10.2)
|
|
230
|
+
textutils (1.4.0)
|
|
231
|
+
activesupport
|
|
232
|
+
logutils (>= 0.6.1)
|
|
233
|
+
props (>= 1.1.2)
|
|
234
|
+
rubyzip (>= 1.0.0)
|
|
221
235
|
thor (1.2.2)
|
|
222
236
|
tilt (2.2.0)
|
|
223
237
|
tzinfo (2.0.6)
|
data/commonmeta.gemspec
CHANGED
|
@@ -26,6 +26,7 @@ Gem::Specification.new do |s|
|
|
|
26
26
|
s.add_dependency 'citeproc-ruby', '~> 2.0'
|
|
27
27
|
s.add_dependency 'csl-styles', '~> 2.0'
|
|
28
28
|
s.add_dependency 'edtf', '~> 3.0', '>= 3.0.4'
|
|
29
|
+
s.add_dependency 'feedparser', '~> 2.2'
|
|
29
30
|
s.add_dependency 'gender_detector', '~> 2.0'
|
|
30
31
|
s.add_dependency 'http', '~> 5.1', '>= 5.1.1'
|
|
31
32
|
s.add_dependency 'json-ld-preloaded', '~> 3.2', '>= 3.2.2'
|
|
@@ -24,6 +24,8 @@ module Commonmeta
|
|
|
24
24
|
family_name = parse_attributes(author.fetch('familyName', nil)) ||
|
|
25
25
|
parse_attributes(author.fetch('family', nil))
|
|
26
26
|
|
|
27
|
+
name = cleanup_author(name)
|
|
28
|
+
|
|
27
29
|
# parse author identifier
|
|
28
30
|
id = parse_attributes(author.fetch('id', nil), first: true) ||
|
|
29
31
|
parse_attributes(author.fetch('identifier', nil), first: true) ||
|
|
@@ -62,8 +64,6 @@ module Commonmeta
|
|
|
62
64
|
# parse author contributor role
|
|
63
65
|
contributor_type = parse_attributes(author.fetch('contributorType', nil))
|
|
64
66
|
|
|
65
|
-
name = cleanup_author(name)
|
|
66
|
-
|
|
67
67
|
# split name for type Person into given/family name if not already provided
|
|
68
68
|
if type == 'Person' && given_name.blank? && family_name.blank?
|
|
69
69
|
Namae.options[:include_particle_in_family] = true
|
|
@@ -98,6 +98,19 @@ module Commonmeta
|
|
|
98
98
|
author = author.gsub(/[[:space:]]([A-Z]\.)?(-?[A-Z]\.)$/, ', \1\2')
|
|
99
99
|
end
|
|
100
100
|
|
|
101
|
+
# strip suffixes, e.g. "John Smith, MD" as the named parser doesn't handle them
|
|
102
|
+
author = author.split(',').first if %w[MD PhD].include? author.split(', ').last
|
|
103
|
+
|
|
104
|
+
# remove email addresses
|
|
105
|
+
email = validate_email(author)
|
|
106
|
+
author = author.gsub(email, '') if email.present?
|
|
107
|
+
|
|
108
|
+
# strip spaces at the beginning and end of string
|
|
109
|
+
author = author.strip
|
|
110
|
+
|
|
111
|
+
# remove parentheses around names
|
|
112
|
+
author = author[1..-2] if author[0] == '(' && author[-1] == ')'
|
|
113
|
+
|
|
101
114
|
# remove spaces around hyphens
|
|
102
115
|
author = author.gsub(' - ', '-')
|
|
103
116
|
|
|
@@ -113,12 +126,16 @@ module Commonmeta
|
|
|
113
126
|
# check if a name has only one word, e.g. "FamousOrganization"
|
|
114
127
|
return false if name.to_s.split(' ').size == 1
|
|
115
128
|
|
|
129
|
+
# check for suffixes, e.g. "John Smith, MD"
|
|
130
|
+
return true if %w[MD PhD].include? name.split(', ').last
|
|
131
|
+
|
|
116
132
|
# check of name can be parsed into given/family name
|
|
117
133
|
Namae.options[:include_particle_in_family] = true
|
|
118
134
|
names = Namae.parse(name)
|
|
135
|
+
|
|
119
136
|
parsed_name = names.first
|
|
120
137
|
return true if parsed_name && parsed_name.given
|
|
121
|
-
|
|
138
|
+
|
|
122
139
|
false
|
|
123
140
|
end
|
|
124
141
|
|
data/lib/commonmeta/cli.rb
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require
|
|
3
|
+
require "thor"
|
|
4
4
|
|
|
5
|
-
require_relative
|
|
6
|
-
require_relative
|
|
5
|
+
require_relative "doi_utils"
|
|
6
|
+
require_relative "utils"
|
|
7
7
|
|
|
8
8
|
module Commonmeta
|
|
9
9
|
class CLI < Thor
|
|
@@ -18,18 +18,18 @@ module Commonmeta
|
|
|
18
18
|
# from http://stackoverflow.com/questions/22809972/adding-a-version-option-to-a-ruby-thor-cli
|
|
19
19
|
map %w[--version -v] => :__print_version
|
|
20
20
|
|
|
21
|
-
desc
|
|
21
|
+
desc "--version, -v", "print the version"
|
|
22
22
|
|
|
23
23
|
def __print_version
|
|
24
24
|
puts Commonmeta::VERSION
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
-
desc
|
|
28
|
-
method_option :from, aliases:
|
|
29
|
-
method_option :to, aliases:
|
|
27
|
+
desc "", "convert metadata"
|
|
28
|
+
method_option :from, aliases: "-f"
|
|
29
|
+
method_option :to, aliases: "-t", default: "schema_org"
|
|
30
30
|
method_option :regenerate, type: :boolean, force: false
|
|
31
|
-
method_option :style, aliases:
|
|
32
|
-
method_option :locale, aliases:
|
|
31
|
+
method_option :style, aliases: "-s", default: "apa"
|
|
32
|
+
method_option :locale, aliases: "-l", default: "en-US"
|
|
33
33
|
method_option :show_errors, type: :boolean, force: false
|
|
34
34
|
method_option :doi
|
|
35
35
|
method_option :depositor
|
|
@@ -47,7 +47,7 @@ module Commonmeta
|
|
|
47
47
|
depositor: options[:depositor],
|
|
48
48
|
email: options[:email],
|
|
49
49
|
registrant: options[:registrant])
|
|
50
|
-
to = options[:to] ||
|
|
50
|
+
to = options[:to] || "schema_org"
|
|
51
51
|
|
|
52
52
|
if options[:show_errors] && !metadata.valid?
|
|
53
53
|
warn metadata.errors
|
|
@@ -56,38 +56,34 @@ module Commonmeta
|
|
|
56
56
|
end
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
-
desc
|
|
59
|
+
desc "", "encode"
|
|
60
60
|
|
|
61
61
|
def encode(prefix)
|
|
62
62
|
puts encode_doi(prefix)
|
|
63
63
|
end
|
|
64
64
|
|
|
65
|
-
desc
|
|
65
|
+
desc "", "encode_id"
|
|
66
66
|
|
|
67
67
|
def encode_id
|
|
68
68
|
puts encode_container_id
|
|
69
69
|
end
|
|
70
70
|
|
|
71
|
-
desc
|
|
71
|
+
desc "", "decode"
|
|
72
72
|
|
|
73
73
|
def decode(doi)
|
|
74
74
|
puts decode_doi(doi)
|
|
75
75
|
end
|
|
76
76
|
|
|
77
|
-
desc
|
|
77
|
+
desc "", "decode_id"
|
|
78
78
|
|
|
79
79
|
def decode_id(id)
|
|
80
80
|
puts decode_container_id(id)
|
|
81
81
|
end
|
|
82
82
|
|
|
83
|
-
desc
|
|
83
|
+
desc "", "json_feed"
|
|
84
84
|
|
|
85
|
-
def
|
|
86
|
-
|
|
87
|
-
puts get_json_feed(id)
|
|
88
|
-
else
|
|
89
|
-
puts get_json_feed
|
|
90
|
-
end
|
|
85
|
+
def json_feed_unregistered
|
|
86
|
+
puts get_json_feed_unregistered
|
|
91
87
|
end
|
|
92
88
|
|
|
93
89
|
default_task :convert
|
|
@@ -61,7 +61,6 @@ module Commonmeta
|
|
|
61
61
|
insert_crossref_publication_date(xml)
|
|
62
62
|
insert_crossref_abstract(xml)
|
|
63
63
|
insert_crossref_issn(xml)
|
|
64
|
-
insert_crossref_alternate_identifiers(xml)
|
|
65
64
|
insert_crossref_access_indicators(xml)
|
|
66
65
|
insert_doi_data(xml)
|
|
67
66
|
insert_citation_list(xml)
|
|
@@ -78,8 +77,8 @@ module Commonmeta
|
|
|
78
77
|
insert_crossref_titles(xml)
|
|
79
78
|
insert_posted_date(xml)
|
|
80
79
|
insert_institution(xml)
|
|
80
|
+
insert_publisher_item(xml)
|
|
81
81
|
insert_crossref_abstract(xml)
|
|
82
|
-
insert_crossref_alternate_identifiers(xml)
|
|
83
82
|
insert_crossref_access_indicators(xml)
|
|
84
83
|
insert_doi_data(xml)
|
|
85
84
|
insert_citation_list(xml)
|
|
@@ -132,13 +131,14 @@ module Commonmeta
|
|
|
132
131
|
xml.citation_list do
|
|
133
132
|
Array.wrap(references).each do |ref|
|
|
134
133
|
xml.citation('key' => ref['key']) do
|
|
135
|
-
xml.journal_article(ref['journal_title'])
|
|
136
|
-
xml.author(ref['author'])
|
|
137
|
-
xml.volume(ref['volume'])
|
|
138
|
-
xml.first_page(ref['first_page'])
|
|
139
|
-
xml.cYear(ref['cYear'])
|
|
140
|
-
xml.article_title(ref['article_title'])
|
|
134
|
+
xml.journal_article(ref['journal_title']) if ref['journal_title'].present?
|
|
135
|
+
xml.author(ref['author']) if ref['author'].present?
|
|
136
|
+
xml.volume(ref['volume']) if ref['volume'].present?
|
|
137
|
+
xml.first_page(ref['first_page']) if ref['first_page'].present?
|
|
138
|
+
xml.cYear(ref['cYear']) if ref['cYear'].present?
|
|
139
|
+
xml.article_title(ref['article_title']) if ref['article_title'].present?
|
|
141
140
|
xml.doi(ref['doi']) if ref['doi'].present?
|
|
141
|
+
xml.unstructured_citation(ref['url']) if ref['url'].present?
|
|
142
142
|
end
|
|
143
143
|
end
|
|
144
144
|
end
|
|
@@ -159,16 +159,6 @@ module Commonmeta
|
|
|
159
159
|
# 'resourceTypeGeneral' => types["resourceTypeGeneral"] || Metadata::SO_TO_DC_TRANSLATIONS[types["schemaOrg"]] || "Other")
|
|
160
160
|
# end
|
|
161
161
|
|
|
162
|
-
def insert_crossref_alternate_identifiers(xml)
|
|
163
|
-
alternate_identifier = Array.wrap(alternate_identifiers).reject do |r|
|
|
164
|
-
%w[DOI, URL].include?(r['alternate_identifier_type'])
|
|
165
|
-
end.first
|
|
166
|
-
return xml if alternate_identifier.blank?
|
|
167
|
-
|
|
168
|
-
xml.item_number(alternate_identifier['alternateIdentifier'],
|
|
169
|
-
'item_number_type' => alternate_identifier['alternateIdentifierType'])
|
|
170
|
-
end
|
|
171
|
-
|
|
172
162
|
def insert_crossref_access_indicators(xml)
|
|
173
163
|
return xml if license.blank?
|
|
174
164
|
|
|
@@ -265,6 +255,20 @@ module Commonmeta
|
|
|
265
255
|
end
|
|
266
256
|
end
|
|
267
257
|
|
|
258
|
+
def insert_publisher_item(xml)
|
|
259
|
+
return xml if alternate_identifiers.blank?
|
|
260
|
+
|
|
261
|
+
xml.publisher_item do
|
|
262
|
+
alternate_identifiers.each do |alternate_identifier|
|
|
263
|
+
attributes = {
|
|
264
|
+
'item_number_type' => alternate_identifier['alternateIdentifierType'] ? alternate_identifier['alternateIdentifierType'].downcase : nil
|
|
265
|
+
}.compact
|
|
266
|
+
|
|
267
|
+
xml.item_number(alternate_identifier['alternateIdentifier'], attributes)
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
268
272
|
def insert_doi_data(xml)
|
|
269
273
|
return xml if doi_from_url(id).blank? || url.blank?
|
|
270
274
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
require "uri"
|
|
2
3
|
|
|
3
4
|
module Commonmeta
|
|
4
5
|
module Readers
|
|
@@ -9,7 +10,7 @@ module Commonmeta
|
|
|
9
10
|
url = normalize_id(id)
|
|
10
11
|
response = HTTP.get(url)
|
|
11
12
|
return { "string" => nil, "state" => "not_found" } unless response.status.success?
|
|
12
|
-
|
|
13
|
+
|
|
13
14
|
{ "string" => response.body.to_s }
|
|
14
15
|
end
|
|
15
16
|
|
|
@@ -58,6 +59,8 @@ module Commonmeta
|
|
|
58
59
|
|
|
59
60
|
sum
|
|
60
61
|
end
|
|
62
|
+
references = get_references(meta)
|
|
63
|
+
alternate_identifiers = [{ "alternateIdentifier" => meta["uuid"], "alternateIdentifierType" => "UUID" }]
|
|
61
64
|
|
|
62
65
|
{ "id" => id,
|
|
63
66
|
"type" => type,
|
|
@@ -71,23 +74,40 @@ module Commonmeta
|
|
|
71
74
|
"descriptions" => descriptions,
|
|
72
75
|
"license" => license,
|
|
73
76
|
"subjects" => subjects.presence,
|
|
77
|
+
"references" => references.presence,
|
|
78
|
+
"alternate_identifiers" => alternate_identifiers,
|
|
74
79
|
"state" => state }.compact.merge(read_options)
|
|
75
80
|
end
|
|
76
81
|
|
|
77
|
-
def
|
|
82
|
+
def get_references(meta)
|
|
83
|
+
# check that references resolve
|
|
84
|
+
Array.wrap(meta["references"]).reduce([]) do |sum, reference|
|
|
85
|
+
sum << reference if [200, 301, 302].include? HTTP.head(reference["doi"] || reference["url"]).status
|
|
86
|
+
|
|
87
|
+
sum
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def get_json_feed_unregistered
|
|
78
92
|
# get JSON Feed items not registered as DOIs
|
|
79
|
-
|
|
80
|
-
url =
|
|
93
|
+
|
|
94
|
+
url = json_feed_unregistered_url
|
|
81
95
|
response = HTTP.get(url)
|
|
82
96
|
return { "string" => nil, "state" => "not_found" } unless response.status.success?
|
|
83
97
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
98
|
+
posts = JSON.parse(response.body.to_s)
|
|
99
|
+
posts.map { |post| post["uuid"] }.first
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def get_json_feed_updated
|
|
103
|
+
# get JSON Feed items updated since last check
|
|
104
|
+
|
|
105
|
+
url = json_feed_unregistered_url
|
|
106
|
+
response = HTTP.get(url)
|
|
107
|
+
return { "string" => nil, "state" => "not_found" } unless response.status.success?
|
|
108
|
+
|
|
109
|
+
posts = JSON.parse(response.body.to_s)
|
|
110
|
+
posts.map { |post| post["uuid"] }.first
|
|
91
111
|
end
|
|
92
112
|
end
|
|
93
113
|
end
|
data/lib/commonmeta/utils.rb
CHANGED
|
@@ -557,6 +557,12 @@ module Commonmeta
|
|
|
557
557
|
end
|
|
558
558
|
end
|
|
559
559
|
|
|
560
|
+
def validate_email(str)
|
|
561
|
+
email_regex = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b/i
|
|
562
|
+
parsed_emails = str.scan(email_regex)
|
|
563
|
+
parsed_emails.first
|
|
564
|
+
end
|
|
565
|
+
|
|
560
566
|
def parse_attributes(element, options = {})
|
|
561
567
|
content = options[:content] || "__content__"
|
|
562
568
|
|
|
@@ -604,6 +610,12 @@ module Commonmeta
|
|
|
604
610
|
# clean up URL
|
|
605
611
|
uri.path = PostRank::URI.clean(uri.path)
|
|
606
612
|
|
|
613
|
+
# optionally remove query and fragment
|
|
614
|
+
if options[:remove_query_string]
|
|
615
|
+
uri.query = nil
|
|
616
|
+
uri.fragment = nil
|
|
617
|
+
end
|
|
618
|
+
|
|
607
619
|
uri.to_s
|
|
608
620
|
rescue Addressable::URI::InvalidURIError
|
|
609
621
|
nil
|
|
@@ -1381,9 +1393,7 @@ module Commonmeta
|
|
|
1381
1393
|
Base32::URL.decode(id)
|
|
1382
1394
|
end
|
|
1383
1395
|
|
|
1384
|
-
def
|
|
1385
|
-
return "https://rogue-scholar.org/api/blogs/#{id}" if id.present?
|
|
1386
|
-
|
|
1396
|
+
def json_feed_unregistered_url
|
|
1387
1397
|
"https://rogue-scholar.org/api/posts/unregistered"
|
|
1388
1398
|
end
|
|
1389
1399
|
end
|
data/lib/commonmeta/version.rb
CHANGED