montague 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/CHANGELOG.md +17 -0
- data/Gemfile +4 -0
- data/README.md +138 -0
- data/Rakefile +2 -0
- data/lib/montague.rb +9 -0
- data/lib/montague/api/api.rb +12 -0
- data/lib/montague/api/base.rb +27 -0
- data/lib/montague/api/client.rb +27 -0
- data/lib/montague/api/journal.rb +52 -0
- data/lib/montague/api/publisher.rb +52 -0
- data/lib/montague/model/archiving.rb +26 -0
- data/lib/montague/model/copyright_link.rb +26 -0
- data/lib/montague/model/funder.rb +26 -0
- data/lib/montague/model/header.rb +22 -0
- data/lib/montague/model/journal.rb +26 -0
- data/lib/montague/model/journal_report.rb +15 -0
- data/lib/montague/model/journals_report.rb +15 -0
- data/lib/montague/model/mandate.rb +44 -0
- data/lib/montague/model/model.rb +22 -0
- data/lib/montague/model/paid_access.rb +34 -0
- data/lib/montague/model/publisher.rb +46 -0
- data/lib/montague/model/publisher_report.rb +12 -0
- data/lib/montague/model/publishers_report.rb +14 -0
- data/lib/montague/model/report_header_mixin.rb +12 -0
- data/lib/montague/model/report_http_response_mixin.rb +12 -0
- data/lib/montague/model/report_publisher_mixin.rb +12 -0
- data/lib/montague/model/structure.rb +18 -0
- data/lib/montague/reporter/journal.rb +26 -0
- data/lib/montague/reporter/journals.rb +26 -0
- data/lib/montague/reporter/publisher.rb +24 -0
- data/lib/montague/reporter/publishers.rb +24 -0
- data/lib/montague/reporter/reporter.rb +11 -0
- data/lib/montague/version.rb +5 -0
- data/lib/montague/xml_extractor/base.rb +58 -0
- data/lib/montague/xml_extractor/header.rb +43 -0
- data/lib/montague/xml_extractor/journal.rb +31 -0
- data/lib/montague/xml_extractor/publisher.rb +164 -0
- data/lib/montague/xml_extractor/xml_extractor.rb +13 -0
- data/montague.gemspec +22 -0
- data/test/test_helper.rb +102 -0
- data/test/test_search_by_journal.rb +90 -0
- data/test/test_search_by_publisher_api.rb +36 -0
- metadata +131 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
module Montague
|
2
|
+
module XMLExtractor
|
3
|
+
# Base XML extractor
|
4
|
+
#
|
5
|
+
class Base
|
6
|
+
|
7
|
+
def initialize(xml)
|
8
|
+
make_doc xml
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def make_doc(xml)
|
14
|
+
@doc = Nokogiri::XML xml
|
15
|
+
@doc.remove_namespaces!
|
16
|
+
end
|
17
|
+
|
18
|
+
# XPath search for a single value, at a given path.
|
19
|
+
#
|
20
|
+
# @return [String, nil]
|
21
|
+
def xpath_query_for_single_value(path)
|
22
|
+
xpath_result = @doc.xpath(path).text
|
23
|
+
xpath_result.empty? ? nil : xpath_result
|
24
|
+
end
|
25
|
+
|
26
|
+
# XPath search for multiple values, at a given path.
|
27
|
+
#
|
28
|
+
# @return [Array<String>]
|
29
|
+
def xpath_query_for_multi_value(path)
|
30
|
+
xpath_result = @doc.xpath path
|
31
|
+
arr = []
|
32
|
+
xpath_result.each { |i| arr << i.text.strip }
|
33
|
+
arr.uniq
|
34
|
+
end
|
35
|
+
|
36
|
+
def xpath_query(path)
|
37
|
+
@doc.xpath path
|
38
|
+
end
|
39
|
+
|
40
|
+
def root_path
|
41
|
+
'/romeoapi'
|
42
|
+
end
|
43
|
+
|
44
|
+
def header_path
|
45
|
+
File.join root_path, 'header'
|
46
|
+
end
|
47
|
+
|
48
|
+
def journal_path
|
49
|
+
File.join root_path, 'journals/journal'
|
50
|
+
end
|
51
|
+
|
52
|
+
def publisher_path
|
53
|
+
File.join root_path, 'publishers/publisher'
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Montague
|
2
|
+
module XMLExtractor
|
3
|
+
# Header extractor
|
4
|
+
#
|
5
|
+
class Header < Montague::XMLExtractor::Base
|
6
|
+
|
7
|
+
def initialize(xml)
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
# @return [String, nil]
|
12
|
+
def api_control
|
13
|
+
xpath_query_for_single_value(File.join header_path, 'apicontrol')
|
14
|
+
end
|
15
|
+
|
16
|
+
# @return [Fixnum, nil]
|
17
|
+
def hits
|
18
|
+
xpath_query_for_single_value(File.join header_path, 'numhits').to_i
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [String, nil]
|
22
|
+
def message
|
23
|
+
xpath_query_for_single_value(File.join header_path, 'message')
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [String, nil]
|
27
|
+
def outcome
|
28
|
+
xpath_query_for_single_value(File.join header_path, 'outcome')
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Montague::Model::Header]
|
32
|
+
def model
|
33
|
+
m = Montague::Model::Header.new
|
34
|
+
m.api_control = api_control
|
35
|
+
m.hits = hits
|
36
|
+
m.message = message
|
37
|
+
m.outcome = outcome
|
38
|
+
m
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Montague
|
2
|
+
module XMLExtractor
|
3
|
+
# Journal XML extractor
|
4
|
+
#
|
5
|
+
class Journal < Montague::XMLExtractor::Base
|
6
|
+
|
7
|
+
def initialize(xml)
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
# @return [Array<Montague::Model::Journal>]
|
12
|
+
def models
|
13
|
+
data = []
|
14
|
+
xpath_result = xpath_query journal_path
|
15
|
+
xpath_result.each do |i|
|
16
|
+
journal = Montague::Model::Journal.new
|
17
|
+
journal.title = i.xpath('jtitle').text.strip
|
18
|
+
journal.issn = i.xpath('issn').text.strip
|
19
|
+
data << journal
|
20
|
+
end
|
21
|
+
data
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [Montague::Model::Journal]
|
25
|
+
def model
|
26
|
+
models.first unless models.empty?
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
module Montague
|
2
|
+
module XMLExtractor
|
3
|
+
# Publisher XML extractor
|
4
|
+
#
|
5
|
+
class Publisher < Montague::XMLExtractor::Base
|
6
|
+
|
7
|
+
def initialize(xml)
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
# @return [String]
|
12
|
+
def name_alias
|
13
|
+
xpath_query_for_single_value '/publisher/alias'
|
14
|
+
end
|
15
|
+
|
16
|
+
# @return [Array<String>]
|
17
|
+
def conditions
|
18
|
+
xpath_query_for_multi_value '/publisher/conditions/condition'
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Array<Montague::Model::CopyrightLink>]
|
22
|
+
def copyright_links
|
23
|
+
data = []
|
24
|
+
xpath_result = xpath_query '/publisher/copyrightlinks/copyrightlink'
|
25
|
+
xpath_result.each do |i|
|
26
|
+
copyright_link = Montague::Model::CopyrightLink.new
|
27
|
+
copyright_link.text = i.xpath('copyrightlinktext').text.strip
|
28
|
+
copyright_link.url = i.xpath('copyrightlinkurl').text.strip
|
29
|
+
data << copyright_link if copyright_link.data?
|
30
|
+
end
|
31
|
+
data
|
32
|
+
end
|
33
|
+
|
34
|
+
# @return [String] Allows for dummy ID string when there is no RoMEO entry with numeric ID
|
35
|
+
def id
|
36
|
+
xpath_query_for_single_value('/publisher/@id').to_s
|
37
|
+
end
|
38
|
+
|
39
|
+
# Publisher compliance with the open access mandates of research funding agencies
|
40
|
+
# @return [Array<Montague::Model::Mandate>]
|
41
|
+
def mandates
|
42
|
+
xpath_result = xpath_query '/publisher/mandates/mandate'
|
43
|
+
data = []
|
44
|
+
xpath_result.each do |i|
|
45
|
+
mandate = Montague::Model::Mandate.new
|
46
|
+
funder = Montague::Model::Funder.new
|
47
|
+
funder.name = i.xpath('funder/fundername').text.strip
|
48
|
+
funder.acronym = i.xpath('funder/funderacronym').text.strip
|
49
|
+
mandate.funder = funder if funder.data?
|
50
|
+
mandate.publisher_complies = i.xpath('publishercomplies').text.strip
|
51
|
+
mandate.compliance_type = i.xpath('compliancetype').text.strip
|
52
|
+
mandate.selected_titles = i.xpath('selectedtitles').text.strip
|
53
|
+
data << mandate if mandate.data?
|
54
|
+
end
|
55
|
+
data
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [String, nil]
|
59
|
+
def name
|
60
|
+
xpath_query_for_single_value '/publisher/name'
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
# @return [Montague::Model::PaidAccess, nil]
|
65
|
+
def paid_access
|
66
|
+
xpath_result = xpath_query '/publisher/paidaccess'
|
67
|
+
if !xpath_result.empty?
|
68
|
+
paid_access = Montague::Model::PaidAccess.new
|
69
|
+
paid_access.url = xpath_result.xpath('paidaccessurl').text.strip
|
70
|
+
paid_access.name = xpath_result.xpath('paidaccessname').text.strip
|
71
|
+
paid_access.notes = xpath_result.xpath('paidaccessnotes').text.strip
|
72
|
+
paid_access
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# @return (see #prints)
|
77
|
+
def pre_prints
|
78
|
+
paths = {
|
79
|
+
archiving: 'preprints',
|
80
|
+
permission: 'prearchiving',
|
81
|
+
restrictions: 'prerestrictions'
|
82
|
+
}
|
83
|
+
prints paths
|
84
|
+
end
|
85
|
+
|
86
|
+
# @return (see #prints)
|
87
|
+
def post_prints
|
88
|
+
paths = {
|
89
|
+
archiving: 'postprints',
|
90
|
+
permission: 'postarchiving',
|
91
|
+
restrictions: 'postrestrictions'
|
92
|
+
}
|
93
|
+
prints paths
|
94
|
+
end
|
95
|
+
|
96
|
+
# @return [Montague::Model::Archiving, nil]
|
97
|
+
def pdf_version
|
98
|
+
paths = {
|
99
|
+
archiving: 'pdfversion',
|
100
|
+
permission: 'pdfarchiving',
|
101
|
+
restrictions: 'pdfrestrictions'
|
102
|
+
}
|
103
|
+
prints paths
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [String, nil]
|
107
|
+
def romeo_colour
|
108
|
+
xpath_query_for_single_value '/publisher/romeocolour'
|
109
|
+
end
|
110
|
+
|
111
|
+
# @return [Array<Montague::Model::Publisher>]
|
112
|
+
def models
|
113
|
+
data = []
|
114
|
+
xpath_result = xpath_query publisher_path
|
115
|
+
xpath_result.each do |i|
|
116
|
+
xml_extractor = Montague::XMLExtractor::Publisher.new i.to_s
|
117
|
+
# puts i.to_s
|
118
|
+
data << xml_extractor.package_model
|
119
|
+
end
|
120
|
+
data
|
121
|
+
end
|
122
|
+
|
123
|
+
# @return [Montague::Model::Publisher]
|
124
|
+
def model
|
125
|
+
models.first unless models.empty?
|
126
|
+
end
|
127
|
+
|
128
|
+
# @return [Montague::Model::Publisher]
|
129
|
+
def package_model
|
130
|
+
m = Montague::Model::Publisher.new
|
131
|
+
m.alias = name_alias
|
132
|
+
m.conditions = conditions
|
133
|
+
m.copyright_links = copyright_links
|
134
|
+
m.id = id
|
135
|
+
m.mandates = mandates
|
136
|
+
m.name = name
|
137
|
+
m.paid_access = paid_access
|
138
|
+
m.pdf_version = pdf_version
|
139
|
+
m.pre_prints = pre_prints
|
140
|
+
m.post_prints = post_prints
|
141
|
+
m.romeo_colour = romeo_colour
|
142
|
+
m
|
143
|
+
end
|
144
|
+
|
145
|
+
private
|
146
|
+
|
147
|
+
# @return [Montague::Model::Archiving, nil]
|
148
|
+
def prints(paths)
|
149
|
+
xpath_result = xpath_query "/publisher/#{paths[:archiving]}"
|
150
|
+
if !xpath_result.empty?
|
151
|
+
archiving = Montague::Model::Archiving.new
|
152
|
+
archiving.permission = xpath_result.xpath(paths[:permission]).text.strip
|
153
|
+
archiving.restrictions = []
|
154
|
+
restrictions = xpath_result.xpath paths[:restrictions]
|
155
|
+
restrictions.each do |i|
|
156
|
+
archiving.restrictions << i.text.strip if !i.text.empty?
|
157
|
+
end
|
158
|
+
archiving
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'montague/xml_extractor/base'
|
3
|
+
require 'montague/xml_extractor/publisher'
|
4
|
+
require 'montague/xml_extractor/header'
|
5
|
+
require 'montague/xml_extractor/journal'
|
6
|
+
|
7
|
+
module Montague
|
8
|
+
# An XMLExtractor manages the extraction of metadata from XML into Ruby
|
9
|
+
# data structures.
|
10
|
+
#
|
11
|
+
module XMLExtractor
|
12
|
+
end
|
13
|
+
end
|
data/montague.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'montague/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'montague'
|
8
|
+
spec.version = Montague::VERSION
|
9
|
+
spec.authors = 'Adrian Albin-Clark'
|
10
|
+
spec.email = 'a.albin-clark@lancaster.ac.uk'
|
11
|
+
spec.summary = %q{Wrapper for the SHERPA/RoMEO API.}
|
12
|
+
spec.homepage = 'https://github.com/lulibrary/montague'
|
13
|
+
spec.license = 'MIT'
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
+
spec.require_paths = ['lib']
|
18
|
+
spec.required_ruby_version = '~> 2.1'
|
19
|
+
spec.add_runtime_dependency 'http', '~> 2.0'
|
20
|
+
spec.add_runtime_dependency 'nokogiri', '~> 1.6'
|
21
|
+
spec.add_development_dependency 'minitest-reporters', '~> 1.1'
|
22
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'minitest/reporters'
|
3
|
+
Minitest::Reporters.use!
|
4
|
+
|
5
|
+
lib = File.expand_path('../../lib', __FILE__)
|
6
|
+
$LOAD_PATH.unshift(lib)
|
7
|
+
|
8
|
+
require 'montague'
|
9
|
+
|
10
|
+
def config
|
11
|
+
{
|
12
|
+
api_key: ENV['MONTAGUE_API_KEY'],
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
def asserts_publisher(x)
|
17
|
+
|
18
|
+
# puts x.inspect
|
19
|
+
|
20
|
+
assert_instance_of Montague::Model::Publisher, x
|
21
|
+
|
22
|
+
assert_instance_of String, x.alias if x.alias
|
23
|
+
|
24
|
+
assert_instance_of Array, x.conditions
|
25
|
+
x.conditions.each { |i| assert_instance_of String, i}
|
26
|
+
|
27
|
+
assert_instance_of Array, x.copyright_links
|
28
|
+
x.copyright_links.each { |i| assert_instance_of Montague::Model::CopyrightLink, i}
|
29
|
+
|
30
|
+
assert_instance_of String, x.id
|
31
|
+
|
32
|
+
assert_instance_of Array, x.mandates
|
33
|
+
|
34
|
+
assert_instance_of String, x.name
|
35
|
+
refute_empty x.name
|
36
|
+
|
37
|
+
assert_instance_of Montague::Model::PaidAccess, x.paid_access if x.paid_access
|
38
|
+
# assert_equal true, x.paid_access.data?
|
39
|
+
|
40
|
+
assert_instance_of Montague::Model::Archiving, x.pdf_version
|
41
|
+
assert_equal true, x.pdf_version.data?
|
42
|
+
|
43
|
+
assert_instance_of Montague::Model::Archiving, x.post_prints
|
44
|
+
assert_equal true, x.post_prints.data?
|
45
|
+
|
46
|
+
assert_instance_of Montague::Model::Archiving, x.pre_prints
|
47
|
+
assert_equal true, x.pre_prints.data?
|
48
|
+
|
49
|
+
assert_instance_of String, x.romeo_colour
|
50
|
+
refute_empty x.romeo_colour
|
51
|
+
end
|
52
|
+
|
53
|
+
def asserts_journal(x)
|
54
|
+
assert_instance_of Montague::Model::Journal, x
|
55
|
+
|
56
|
+
assert_instance_of String, x.issn if x.issn
|
57
|
+
|
58
|
+
assert_instance_of String, x.title
|
59
|
+
refute_empty x.title
|
60
|
+
end
|
61
|
+
|
62
|
+
def asserts_header(x)
|
63
|
+
assert_instance_of Montague::Model::Header, x
|
64
|
+
|
65
|
+
assert_instance_of Fixnum, x.hits
|
66
|
+
|
67
|
+
assert_instance_of String, x.message if x.message
|
68
|
+
|
69
|
+
assert_instance_of String, x.outcome
|
70
|
+
refute_empty x.outcome
|
71
|
+
end
|
72
|
+
|
73
|
+
def asserts_journal_report(x)
|
74
|
+
assert_instance_of Montague::Model::JournalReport, x
|
75
|
+
assert_instance_of HTTP::Response, x.http_response
|
76
|
+
asserts_header x.header
|
77
|
+
asserts_journal x.journal if x.journal
|
78
|
+
end
|
79
|
+
|
80
|
+
def asserts_journals_report(x)
|
81
|
+
assert_instance_of Montague::Model::JournalsReport, x
|
82
|
+
assert_instance_of HTTP::Response, x.http_response
|
83
|
+
asserts_header x.header
|
84
|
+
x.journals.each { |i| asserts_journal i }
|
85
|
+
end
|
86
|
+
|
87
|
+
def asserts_publishers_report(x)
|
88
|
+
x.publishers.each { |i| asserts_publisher i }
|
89
|
+
assert_instance_of HTTP::Response, x.http_response
|
90
|
+
asserts_header x.header
|
91
|
+
end
|
92
|
+
|
93
|
+
def asserts_publisher_report(x)
|
94
|
+
asserts_publisher x.publisher
|
95
|
+
assert_instance_of HTTP::Response, x.http_response
|
96
|
+
asserts_header x.header
|
97
|
+
end
|
98
|
+
|
99
|
+
def asserts_publisher_found(x)
|
100
|
+
assert_equal true, x.respond_to?(:publisher)
|
101
|
+
refute_nil x.publisher
|
102
|
+
end
|