puree 2.7.0 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +1 -0
- data/lib/puree/model/person.rb +2 -0
- data/lib/puree/version.rb +1 -1
- data/lib/puree/xml_extractor/base.rb +2 -2
- data/lib/puree/xml_extractor/collection.rb +5 -4
- data/lib/puree/xml_extractor/dataset.rb +3 -3
- data/lib/puree/xml_extractor/external_organisation.rb +1 -1
- data/lib/puree/xml_extractor/journal_article.rb +2 -1
- data/lib/puree/xml_extractor/mixins/abstract_mixin.rb +1 -1
- data/lib/puree/xml_extractor/mixins/description_mixin.rb +1 -1
- data/lib/puree/xml_extractor/mixins/event_mixin.rb +2 -1
- data/lib/puree/xml_extractor/mixins/person_mixin.rb +2 -1
- data/lib/puree/xml_extractor/mixins/project_mixin.rb +4 -3
- data/lib/puree/xml_extractor/mixins/publisher_mixin.rb +4 -2
- data/lib/puree/xml_extractor/mixins/research_output_mixin.rb +4 -2
- data/lib/puree/xml_extractor/mixins/title_mixin.rb +1 -1
- data/lib/puree/xml_extractor/mixins/type_mixin.rb +1 -1
- data/lib/puree/xml_extractor/mixins/workflow_mixin.rb +1 -1
- data/lib/puree/xml_extractor/organisational_unit.rb +3 -3
- data/lib/puree/xml_extractor/person.rb +16 -0
- data/lib/puree/xml_extractor/project.rb +1 -1
- data/lib/puree/xml_extractor/research_output.rb +7 -7
- data/lib/puree/xml_extractor/shared.rb +8 -4
- data/lib/puree/xml_extractor/thesis.rb +1 -1
- data/puree.gemspec +2 -2
- data/test/xml_extractor/xml_extractor_person_test.rb +7 -0
- data/test/xml_extractor/xml_extractor_research_output_test.rb +6 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b91d7f85a6432c1233be1361c96669c94d88b17c32d5399bc8fcd0fc55c2c9b
|
4
|
+
data.tar.gz: 55eb39c43d50a7cf50e5297099ddcee1cd3471fa3679e3703063bfc6cb3215d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91f7b09a0beaf0dc2badaea61ae558703ade237e906ec964a85ccec6bb8af45ed1ede6f8dd3fde74323d34749fbffa9dac7a02e8c1893c56eaacc970394fd377
|
7
|
+
data.tar.gz: 3102b9f34b06fa8ec08afd3dc47a1f0b914c0bbb6193fa7ce6f75c258e10f0454bb0cc6ada40fa60871da66f2adbf5f5ecc094b50a544bd9671415759f0a1250
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,13 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## 2.8.0 2019-07-04
|
6
|
+
### Changed
|
7
|
+
- For Pure API 514.
|
8
|
+
|
9
|
+
### Added
|
10
|
+
- Person - other_names.
|
11
|
+
|
5
12
|
## 2.7.0 2019-06-06
|
6
13
|
### Changed
|
7
14
|
- For Pure API 513.
|
data/README.md
CHANGED
data/lib/puree/model/person.rb
CHANGED
data/lib/puree/version.rb
CHANGED
@@ -14,8 +14,8 @@ module Puree
|
|
14
14
|
#
|
15
15
|
# @return [String, nil]
|
16
16
|
def xpath_query_for_single_value(path)
|
17
|
-
xpath_result = xpath_query(path)
|
18
|
-
xpath_result.empty? ? nil : xpath_result
|
17
|
+
xpath_result = xpath_query(path)
|
18
|
+
xpath_result.empty? ? nil : xpath_result.first.text.strip
|
19
19
|
end
|
20
20
|
|
21
21
|
# XPath search for multiple values, at a given path.
|
@@ -75,7 +75,7 @@ module Puree
|
|
75
75
|
# @param xml [String]
|
76
76
|
# @return [Hash{Symbol => Array<Puree::Model::ResearchOutput class/subclass>}]
|
77
77
|
def self.research_outputs(xml)
|
78
|
-
path_from_root = File.join 'result', '/*'
|
78
|
+
path_from_root = File.join 'result/items', '/*'
|
79
79
|
doc = Nokogiri::XML xml
|
80
80
|
doc.remove_namespaces!
|
81
81
|
xpath_result = doc.xpath path_from_root
|
@@ -86,8 +86,9 @@ module Puree
|
|
86
86
|
other: []
|
87
87
|
}
|
88
88
|
xpath_result.each do |research_output|
|
89
|
-
|
90
|
-
unless
|
89
|
+
xpath_result_type = research_output.xpath('types/type')
|
90
|
+
type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
91
|
+
if type
|
91
92
|
case type
|
92
93
|
when 'Journal article'
|
93
94
|
extractor = Puree::XMLExtractor::JournalArticle.new research_output.to_s
|
@@ -127,7 +128,7 @@ module Puree
|
|
127
128
|
def self.models(resource_type, xml, xpath_root)
|
128
129
|
doc = Nokogiri::XML xml
|
129
130
|
doc.remove_namespaces!
|
130
|
-
path_from_root = File.join 'result', xpath_root
|
131
|
+
path_from_root = File.join 'result/items', xpath_root
|
131
132
|
xpath_result = doc.xpath path_from_root
|
132
133
|
data = []
|
133
134
|
xpath_result.each do |i|
|
@@ -46,8 +46,8 @@ module Puree
|
|
46
46
|
# doc['createdDate'] = d.xpath('createdDate').text.strip
|
47
47
|
# doc['visibleOnPortalDate'] = d.xpath('visibleOnPortalDate').text.strip
|
48
48
|
# doc['limitedVisibility'] = d.xpath('limitedVisibility').text.strip
|
49
|
-
document_license = d.xpath('documentLicense')
|
50
|
-
if
|
49
|
+
document_license = d.xpath('documentLicenses/documentLicense').first
|
50
|
+
if document_license
|
51
51
|
license = Puree::Model::CopyrightLicense.new
|
52
52
|
license.name = document_license.text.strip
|
53
53
|
# license.name = document_license.xpath('term/localizedString').text.strip
|
@@ -88,7 +88,7 @@ module Puree
|
|
88
88
|
# @return [Array<String>]
|
89
89
|
def spatial_places
|
90
90
|
# Data from free-form text box
|
91
|
-
xpath_result = xpath_query '/geographicalCoverage'
|
91
|
+
xpath_result = xpath_query '/geographicalCoverages/geographicalCoverage'
|
92
92
|
data = []
|
93
93
|
xpath_result.each do |i|
|
94
94
|
data << i.text.strip
|
@@ -26,7 +26,8 @@ module Puree
|
|
26
26
|
header = Puree::Model::JournalHeader.new
|
27
27
|
header.title = xpath_result.xpath('title').text.strip
|
28
28
|
journal = xpath_result.xpath('journal')
|
29
|
-
|
29
|
+
xpath_result_type = journal.xpath('types/type')
|
30
|
+
header.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
30
31
|
header.uuid = journal.attr('uuid').text.strip
|
31
32
|
header
|
32
33
|
end
|
@@ -12,7 +12,8 @@ module Puree
|
|
12
12
|
if !xpath_result.empty?
|
13
13
|
header = Puree::Model::EventHeader.new
|
14
14
|
header.uuid = xpath_result.xpath('@uuid').text.strip
|
15
|
-
|
15
|
+
xpath_result_name = xpath_result.xpath('names/name')
|
16
|
+
header.title = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
16
17
|
return header if header.data?
|
17
18
|
end
|
18
19
|
nil
|
@@ -32,7 +32,8 @@ module Puree
|
|
32
32
|
name.first = i.xpath('name/firstName').text.strip
|
33
33
|
name.last = i.xpath('name/lastName').text.strip
|
34
34
|
person.name = name if name.data?
|
35
|
-
|
35
|
+
xpath_result_role = i.xpath('personRoles/personRole')
|
36
|
+
person.role = xpath_result_role.first.text.strip unless xpath_result_role.empty?
|
36
37
|
arr << person if person.data?
|
37
38
|
end
|
38
39
|
end
|
@@ -10,13 +10,14 @@ module Puree
|
|
10
10
|
# @return [Array<Puree::Model::RelatedContentHeader>]
|
11
11
|
def projects
|
12
12
|
xpath_result = xpath_query '/relatedProjects/relatedProject'
|
13
|
-
|
14
13
|
data_arr = []
|
15
14
|
xpath_result.each { |i|
|
16
15
|
related = Puree::Model::RelatedContentHeader.new
|
17
|
-
related.type = i.xpath('type').text.strip
|
18
|
-
related.title = i.xpath('name').text.strip
|
19
16
|
related.uuid = i.attr('uuid').strip
|
17
|
+
xpath_result_name = i.xpath('names/name')
|
18
|
+
related.title = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
19
|
+
xpath_result_type = i.xpath('types/type')
|
20
|
+
related.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
20
21
|
data_arr << related
|
21
22
|
}
|
22
23
|
data_arr.uniq { |d| d.uuid }
|
@@ -11,8 +11,10 @@ module Puree
|
|
11
11
|
xpath_result = xpath_query '/publisher'
|
12
12
|
h = Puree::Model::PublisherHeader.new
|
13
13
|
h.uuid = xpath_result.xpath('@uuid').text.strip
|
14
|
-
|
15
|
-
h.
|
14
|
+
xpath_result_name = xpath_result.xpath('names/name')
|
15
|
+
h.name = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
16
|
+
xpath_result_type = xpath_result.xpath('types/type')
|
17
|
+
h.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
16
18
|
h.data? ? h : nil
|
17
19
|
end
|
18
20
|
|
@@ -13,9 +13,11 @@ module Puree
|
|
13
13
|
data_arr = []
|
14
14
|
xpath_result.each { |i|
|
15
15
|
related = Puree::Model::RelatedContentHeader.new
|
16
|
-
related.type = i.xpath('type').text.strip
|
17
|
-
related.title = i.xpath('name').text.strip
|
18
16
|
related.uuid = i.attr('uuid').strip
|
17
|
+
xpath_result_name = i.xpath('names/name')
|
18
|
+
related.title = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
19
|
+
xpath_result_type = i.xpath('types/type')
|
20
|
+
related.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
19
21
|
data_arr << related
|
20
22
|
}
|
21
23
|
data_arr.uniq { |d| d.uuid }
|
@@ -25,8 +25,8 @@ module Puree
|
|
25
25
|
a.postcode = postcode unless building.empty?
|
26
26
|
city = xpath_result.xpath('city').text.strip
|
27
27
|
a.city = city unless city.empty?
|
28
|
-
country = xpath_result.xpath('country')
|
29
|
-
a.country = country unless country.empty?
|
28
|
+
country = xpath_result.xpath('countries/country')
|
29
|
+
a.country = country.first.text.strip unless country.empty?
|
30
30
|
a
|
31
31
|
end
|
32
32
|
end
|
@@ -38,7 +38,7 @@ module Puree
|
|
38
38
|
|
39
39
|
# @return [String, nil]
|
40
40
|
def name
|
41
|
-
xpath_query_for_single_value '/name'
|
41
|
+
xpath_query_for_single_value '/names/name'
|
42
42
|
end
|
43
43
|
|
44
44
|
# First parent
|
@@ -57,6 +57,21 @@ module Puree
|
|
57
57
|
xpath_query_for_single_value '/orcid'
|
58
58
|
end
|
59
59
|
|
60
|
+
# @return [Array<Model::PersonName>]
|
61
|
+
def other_names
|
62
|
+
xpath_result = xpath_query '/nameVariants/nameVariant/name'
|
63
|
+
data = []
|
64
|
+
xpath_result.each do |d|
|
65
|
+
first = xpath_result.xpath('firstName').text.strip
|
66
|
+
last = xpath_result.xpath('lastName').text.strip
|
67
|
+
model = Puree::Model::PersonName.new
|
68
|
+
model.first = first unless first.empty?
|
69
|
+
model.last = last unless last.empty?
|
70
|
+
data << model
|
71
|
+
end
|
72
|
+
data.uniq
|
73
|
+
end
|
74
|
+
|
60
75
|
private
|
61
76
|
|
62
77
|
def xpath_root
|
@@ -72,6 +87,7 @@ module Puree
|
|
72
87
|
@model.keywords = keywords
|
73
88
|
@model.name = name
|
74
89
|
@model.orcid = orcid
|
90
|
+
@model.other_names = other_names
|
75
91
|
@model
|
76
92
|
end
|
77
93
|
|
@@ -23,12 +23,12 @@ module Puree
|
|
23
23
|
|
24
24
|
# @return [String, nil]
|
25
25
|
def bibliographical_note
|
26
|
-
xpath_query_for_single_value
|
26
|
+
xpath_query_for_single_value '/bibliographicalNotes/bibliographicalNote'
|
27
27
|
end
|
28
28
|
|
29
29
|
# @return [String, nil]
|
30
30
|
def category
|
31
|
-
xpath_query_for_single_value '/category'
|
31
|
+
xpath_query_for_single_value '/categories/category'
|
32
32
|
end
|
33
33
|
|
34
34
|
# Digital Object Identifier (first one, if many)
|
@@ -73,7 +73,7 @@ module Puree
|
|
73
73
|
|
74
74
|
# @return [String, nil]
|
75
75
|
def language
|
76
|
-
xpath_query_for_single_value '/language'
|
76
|
+
xpath_query_for_single_value '/languages/language'
|
77
77
|
end
|
78
78
|
|
79
79
|
# @return [Array<String>, nil]
|
@@ -83,7 +83,7 @@ module Puree
|
|
83
83
|
|
84
84
|
# @return [String, nil]
|
85
85
|
def open_access_permission
|
86
|
-
xpath_query_for_single_value '/openAccessPermission'
|
86
|
+
xpath_query_for_single_value '/openAccessPermissions/openAccessPermission'
|
87
87
|
end
|
88
88
|
|
89
89
|
# @return [Array<Puree::Model::EndeavourPerson>]
|
@@ -107,7 +107,7 @@ module Puree
|
|
107
107
|
data = []
|
108
108
|
xpath_result.each do |i|
|
109
109
|
s = Puree::Model::PublicationStatus.new
|
110
|
-
s.stage = i.xpath('publicationStatus').text.strip
|
110
|
+
s.stage = i.xpath('publicationStatuses/publicationStatus').text.strip
|
111
111
|
|
112
112
|
ymd = {}
|
113
113
|
ymd['year'] = i.xpath('publicationDate/year').text.strip
|
@@ -156,12 +156,12 @@ module Puree
|
|
156
156
|
|
157
157
|
# @return [String, nil]
|
158
158
|
def translated_subtitle
|
159
|
-
xpath_query_for_single_value '/translatedSubTitle'
|
159
|
+
xpath_query_for_single_value '/translatedSubTitles/translatedSubTitle'
|
160
160
|
end
|
161
161
|
|
162
162
|
# @return [String, nil]
|
163
163
|
def translated_title
|
164
|
-
xpath_query_for_single_value '/translatedTitle'
|
164
|
+
xpath_query_for_single_value '/translatedTitles/translatedTitle'
|
165
165
|
end
|
166
166
|
|
167
167
|
private
|
@@ -10,8 +10,10 @@ module Puree
|
|
10
10
|
def self.external_organisation_header(nokogiri_xml_element)
|
11
11
|
h = Puree::Model::ExternalOrganisationHeader.new
|
12
12
|
h.uuid = nokogiri_xml_element.xpath('@uuid').text.strip
|
13
|
-
|
14
|
-
h.
|
13
|
+
xpath_result_name = nokogiri_xml_element.xpath('names/name')
|
14
|
+
h.name = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
15
|
+
xpath_result_type = nokogiri_xml_element.xpath('types/type')
|
16
|
+
h.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
15
17
|
h.data? ? h : nil
|
16
18
|
end
|
17
19
|
|
@@ -29,8 +31,10 @@ module Puree
|
|
29
31
|
def self.organisation_header(nokogiri_xml_element)
|
30
32
|
h = Puree::Model::OrganisationalUnitHeader.new
|
31
33
|
h.uuid = nokogiri_xml_element.xpath('@uuid').text.strip
|
32
|
-
|
33
|
-
h.
|
34
|
+
xpath_result_name = nokogiri_xml_element.xpath('names/name')
|
35
|
+
h.name = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
36
|
+
xpath_result_type = nokogiri_xml_element.xpath('types/type')
|
37
|
+
h.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
34
38
|
h.data? ? h : nil
|
35
39
|
end
|
36
40
|
|
data/puree.gemspec
CHANGED
@@ -10,8 +10,8 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = 'a.albin-clark@lancaster.ac.uk'
|
11
11
|
spec.summary = %q{Metadata extraction from the Pure Research Information System.}
|
12
12
|
spec.metadata = {
|
13
|
-
'source_code_uri' =>
|
14
|
-
"documentation_uri" => "https://www.rubydoc.info/gems
|
13
|
+
'source_code_uri' => "https://github.com/lulibrary/#{spec.name}",
|
14
|
+
"documentation_uri" => "https://www.rubydoc.info/gems/#{spec.name}/#{spec.version}",
|
15
15
|
}
|
16
16
|
spec.license = 'MIT'
|
17
17
|
spec.files = `git ls-files -z`.split("\x0")
|
@@ -51,6 +51,10 @@ class TestXMLExtractorPerson < Minitest::Test
|
|
51
51
|
|
52
52
|
assert_instance_of String, x.orcid
|
53
53
|
refute_empty x.orcid
|
54
|
+
|
55
|
+
assert_instance_of Array, x.other_names
|
56
|
+
assert_instance_of Puree::Model::PersonName, x.other_names.first
|
57
|
+
assert x.other_names.first.data?
|
54
58
|
end
|
55
59
|
|
56
60
|
# def test_scopus_id
|
@@ -80,6 +84,9 @@ class TestXMLExtractorPerson < Minitest::Test
|
|
80
84
|
assert_nil x.name
|
81
85
|
|
82
86
|
assert_nil x.orcid
|
87
|
+
|
88
|
+
assert_instance_of Array, x.other_names
|
89
|
+
assert_empty x.other_names
|
83
90
|
end
|
84
91
|
|
85
92
|
def test_model
|
@@ -289,8 +289,12 @@ class TestXMLExtractorResearchOutput < Minitest::Test
|
|
289
289
|
'<contributionToJournal>
|
290
290
|
<relatedProjects>
|
291
291
|
<relatedProject uuid="fe8aebdf-a926-4e7b-adf1-082425e50330">
|
292
|
-
<
|
293
|
-
|
292
|
+
<names>
|
293
|
+
<name>The Language Bases of Reading Comprehension</name>
|
294
|
+
</names>
|
295
|
+
<types>
|
296
|
+
<type uri="/dk/atira/pure/upmproject/upmprojecttypes/upmproject/research">Research</type>
|
297
|
+
</types>
|
294
298
|
</relatedProject>
|
295
299
|
</relatedProjects>
|
296
300
|
</contributionToJournal>'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: puree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|
@@ -236,7 +236,7 @@ licenses:
|
|
236
236
|
- MIT
|
237
237
|
metadata:
|
238
238
|
source_code_uri: https://github.com/lulibrary/puree
|
239
|
-
documentation_uri: https://www.rubydoc.info/gems/puree/2.
|
239
|
+
documentation_uri: https://www.rubydoc.info/gems/puree/2.8.0
|
240
240
|
post_install_message:
|
241
241
|
rdoc_options: []
|
242
242
|
require_paths:
|