puree 2.7.0 → 2.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +1 -0
- data/lib/puree/model/person.rb +2 -0
- data/lib/puree/version.rb +1 -1
- data/lib/puree/xml_extractor/base.rb +2 -2
- data/lib/puree/xml_extractor/collection.rb +5 -4
- data/lib/puree/xml_extractor/dataset.rb +3 -3
- data/lib/puree/xml_extractor/external_organisation.rb +1 -1
- data/lib/puree/xml_extractor/journal_article.rb +2 -1
- data/lib/puree/xml_extractor/mixins/abstract_mixin.rb +1 -1
- data/lib/puree/xml_extractor/mixins/description_mixin.rb +1 -1
- data/lib/puree/xml_extractor/mixins/event_mixin.rb +2 -1
- data/lib/puree/xml_extractor/mixins/person_mixin.rb +2 -1
- data/lib/puree/xml_extractor/mixins/project_mixin.rb +4 -3
- data/lib/puree/xml_extractor/mixins/publisher_mixin.rb +4 -2
- data/lib/puree/xml_extractor/mixins/research_output_mixin.rb +4 -2
- data/lib/puree/xml_extractor/mixins/title_mixin.rb +1 -1
- data/lib/puree/xml_extractor/mixins/type_mixin.rb +1 -1
- data/lib/puree/xml_extractor/mixins/workflow_mixin.rb +1 -1
- data/lib/puree/xml_extractor/organisational_unit.rb +3 -3
- data/lib/puree/xml_extractor/person.rb +16 -0
- data/lib/puree/xml_extractor/project.rb +1 -1
- data/lib/puree/xml_extractor/research_output.rb +7 -7
- data/lib/puree/xml_extractor/shared.rb +8 -4
- data/lib/puree/xml_extractor/thesis.rb +1 -1
- data/puree.gemspec +2 -2
- data/test/xml_extractor/xml_extractor_person_test.rb +7 -0
- data/test/xml_extractor/xml_extractor_research_output_test.rb +6 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b91d7f85a6432c1233be1361c96669c94d88b17c32d5399bc8fcd0fc55c2c9b
|
4
|
+
data.tar.gz: 55eb39c43d50a7cf50e5297099ddcee1cd3471fa3679e3703063bfc6cb3215d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91f7b09a0beaf0dc2badaea61ae558703ade237e906ec964a85ccec6bb8af45ed1ede6f8dd3fde74323d34749fbffa9dac7a02e8c1893c56eaacc970394fd377
|
7
|
+
data.tar.gz: 3102b9f34b06fa8ec08afd3dc47a1f0b914c0bbb6193fa7ce6f75c258e10f0454bb0cc6ada40fa60871da66f2adbf5f5ecc094b50a544bd9671415759f0a1250
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,13 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
+
## 2.8.0 2019-07-04
|
6
|
+
### Changed
|
7
|
+
- For Pure API 514.
|
8
|
+
|
9
|
+
### Added
|
10
|
+
- Person - other_names.
|
11
|
+
|
5
12
|
## 2.7.0 2019-06-06
|
6
13
|
### Changed
|
7
14
|
- For Pure API 513.
|
data/README.md
CHANGED
data/lib/puree/model/person.rb
CHANGED
data/lib/puree/version.rb
CHANGED
@@ -14,8 +14,8 @@ module Puree
|
|
14
14
|
#
|
15
15
|
# @return [String, nil]
|
16
16
|
def xpath_query_for_single_value(path)
|
17
|
-
xpath_result = xpath_query(path)
|
18
|
-
xpath_result.empty? ? nil : xpath_result
|
17
|
+
xpath_result = xpath_query(path)
|
18
|
+
xpath_result.empty? ? nil : xpath_result.first.text.strip
|
19
19
|
end
|
20
20
|
|
21
21
|
# XPath search for multiple values, at a given path.
|
@@ -75,7 +75,7 @@ module Puree
|
|
75
75
|
# @param xml [String]
|
76
76
|
# @return [Hash{Symbol => Array<Puree::Model::ResearchOutput class/subclass>}]
|
77
77
|
def self.research_outputs(xml)
|
78
|
-
path_from_root = File.join 'result', '/*'
|
78
|
+
path_from_root = File.join 'result/items', '/*'
|
79
79
|
doc = Nokogiri::XML xml
|
80
80
|
doc.remove_namespaces!
|
81
81
|
xpath_result = doc.xpath path_from_root
|
@@ -86,8 +86,9 @@ module Puree
|
|
86
86
|
other: []
|
87
87
|
}
|
88
88
|
xpath_result.each do |research_output|
|
89
|
-
|
90
|
-
unless
|
89
|
+
xpath_result_type = research_output.xpath('types/type')
|
90
|
+
type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
91
|
+
if type
|
91
92
|
case type
|
92
93
|
when 'Journal article'
|
93
94
|
extractor = Puree::XMLExtractor::JournalArticle.new research_output.to_s
|
@@ -127,7 +128,7 @@ module Puree
|
|
127
128
|
def self.models(resource_type, xml, xpath_root)
|
128
129
|
doc = Nokogiri::XML xml
|
129
130
|
doc.remove_namespaces!
|
130
|
-
path_from_root = File.join 'result', xpath_root
|
131
|
+
path_from_root = File.join 'result/items', xpath_root
|
131
132
|
xpath_result = doc.xpath path_from_root
|
132
133
|
data = []
|
133
134
|
xpath_result.each do |i|
|
@@ -46,8 +46,8 @@ module Puree
|
|
46
46
|
# doc['createdDate'] = d.xpath('createdDate').text.strip
|
47
47
|
# doc['visibleOnPortalDate'] = d.xpath('visibleOnPortalDate').text.strip
|
48
48
|
# doc['limitedVisibility'] = d.xpath('limitedVisibility').text.strip
|
49
|
-
document_license = d.xpath('documentLicense')
|
50
|
-
if
|
49
|
+
document_license = d.xpath('documentLicenses/documentLicense').first
|
50
|
+
if document_license
|
51
51
|
license = Puree::Model::CopyrightLicense.new
|
52
52
|
license.name = document_license.text.strip
|
53
53
|
# license.name = document_license.xpath('term/localizedString').text.strip
|
@@ -88,7 +88,7 @@ module Puree
|
|
88
88
|
# @return [Array<String>]
|
89
89
|
def spatial_places
|
90
90
|
# Data from free-form text box
|
91
|
-
xpath_result = xpath_query '/geographicalCoverage'
|
91
|
+
xpath_result = xpath_query '/geographicalCoverages/geographicalCoverage'
|
92
92
|
data = []
|
93
93
|
xpath_result.each do |i|
|
94
94
|
data << i.text.strip
|
@@ -26,7 +26,8 @@ module Puree
|
|
26
26
|
header = Puree::Model::JournalHeader.new
|
27
27
|
header.title = xpath_result.xpath('title').text.strip
|
28
28
|
journal = xpath_result.xpath('journal')
|
29
|
-
|
29
|
+
xpath_result_type = journal.xpath('types/type')
|
30
|
+
header.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
30
31
|
header.uuid = journal.attr('uuid').text.strip
|
31
32
|
header
|
32
33
|
end
|
@@ -12,7 +12,8 @@ module Puree
|
|
12
12
|
if !xpath_result.empty?
|
13
13
|
header = Puree::Model::EventHeader.new
|
14
14
|
header.uuid = xpath_result.xpath('@uuid').text.strip
|
15
|
-
|
15
|
+
xpath_result_name = xpath_result.xpath('names/name')
|
16
|
+
header.title = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
16
17
|
return header if header.data?
|
17
18
|
end
|
18
19
|
nil
|
@@ -32,7 +32,8 @@ module Puree
|
|
32
32
|
name.first = i.xpath('name/firstName').text.strip
|
33
33
|
name.last = i.xpath('name/lastName').text.strip
|
34
34
|
person.name = name if name.data?
|
35
|
-
|
35
|
+
xpath_result_role = i.xpath('personRoles/personRole')
|
36
|
+
person.role = xpath_result_role.first.text.strip unless xpath_result_role.empty?
|
36
37
|
arr << person if person.data?
|
37
38
|
end
|
38
39
|
end
|
@@ -10,13 +10,14 @@ module Puree
|
|
10
10
|
# @return [Array<Puree::Model::RelatedContentHeader>]
|
11
11
|
def projects
|
12
12
|
xpath_result = xpath_query '/relatedProjects/relatedProject'
|
13
|
-
|
14
13
|
data_arr = []
|
15
14
|
xpath_result.each { |i|
|
16
15
|
related = Puree::Model::RelatedContentHeader.new
|
17
|
-
related.type = i.xpath('type').text.strip
|
18
|
-
related.title = i.xpath('name').text.strip
|
19
16
|
related.uuid = i.attr('uuid').strip
|
17
|
+
xpath_result_name = i.xpath('names/name')
|
18
|
+
related.title = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
19
|
+
xpath_result_type = i.xpath('types/type')
|
20
|
+
related.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
20
21
|
data_arr << related
|
21
22
|
}
|
22
23
|
data_arr.uniq { |d| d.uuid }
|
@@ -11,8 +11,10 @@ module Puree
|
|
11
11
|
xpath_result = xpath_query '/publisher'
|
12
12
|
h = Puree::Model::PublisherHeader.new
|
13
13
|
h.uuid = xpath_result.xpath('@uuid').text.strip
|
14
|
-
|
15
|
-
h.
|
14
|
+
xpath_result_name = xpath_result.xpath('names/name')
|
15
|
+
h.name = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
16
|
+
xpath_result_type = xpath_result.xpath('types/type')
|
17
|
+
h.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
16
18
|
h.data? ? h : nil
|
17
19
|
end
|
18
20
|
|
@@ -13,9 +13,11 @@ module Puree
|
|
13
13
|
data_arr = []
|
14
14
|
xpath_result.each { |i|
|
15
15
|
related = Puree::Model::RelatedContentHeader.new
|
16
|
-
related.type = i.xpath('type').text.strip
|
17
|
-
related.title = i.xpath('name').text.strip
|
18
16
|
related.uuid = i.attr('uuid').strip
|
17
|
+
xpath_result_name = i.xpath('names/name')
|
18
|
+
related.title = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
19
|
+
xpath_result_type = i.xpath('types/type')
|
20
|
+
related.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
19
21
|
data_arr << related
|
20
22
|
}
|
21
23
|
data_arr.uniq { |d| d.uuid }
|
@@ -25,8 +25,8 @@ module Puree
|
|
25
25
|
a.postcode = postcode unless building.empty?
|
26
26
|
city = xpath_result.xpath('city').text.strip
|
27
27
|
a.city = city unless city.empty?
|
28
|
-
country = xpath_result.xpath('country')
|
29
|
-
a.country = country unless country.empty?
|
28
|
+
country = xpath_result.xpath('countries/country')
|
29
|
+
a.country = country.first.text.strip unless country.empty?
|
30
30
|
a
|
31
31
|
end
|
32
32
|
end
|
@@ -38,7 +38,7 @@ module Puree
|
|
38
38
|
|
39
39
|
# @return [String, nil]
|
40
40
|
def name
|
41
|
-
xpath_query_for_single_value '/name'
|
41
|
+
xpath_query_for_single_value '/names/name'
|
42
42
|
end
|
43
43
|
|
44
44
|
# First parent
|
@@ -57,6 +57,21 @@ module Puree
|
|
57
57
|
xpath_query_for_single_value '/orcid'
|
58
58
|
end
|
59
59
|
|
60
|
+
# @return [Array<Model::PersonName>]
|
61
|
+
def other_names
|
62
|
+
xpath_result = xpath_query '/nameVariants/nameVariant/name'
|
63
|
+
data = []
|
64
|
+
xpath_result.each do |d|
|
65
|
+
first = xpath_result.xpath('firstName').text.strip
|
66
|
+
last = xpath_result.xpath('lastName').text.strip
|
67
|
+
model = Puree::Model::PersonName.new
|
68
|
+
model.first = first unless first.empty?
|
69
|
+
model.last = last unless last.empty?
|
70
|
+
data << model
|
71
|
+
end
|
72
|
+
data.uniq
|
73
|
+
end
|
74
|
+
|
60
75
|
private
|
61
76
|
|
62
77
|
def xpath_root
|
@@ -72,6 +87,7 @@ module Puree
|
|
72
87
|
@model.keywords = keywords
|
73
88
|
@model.name = name
|
74
89
|
@model.orcid = orcid
|
90
|
+
@model.other_names = other_names
|
75
91
|
@model
|
76
92
|
end
|
77
93
|
|
@@ -23,12 +23,12 @@ module Puree
|
|
23
23
|
|
24
24
|
# @return [String, nil]
|
25
25
|
def bibliographical_note
|
26
|
-
xpath_query_for_single_value
|
26
|
+
xpath_query_for_single_value '/bibliographicalNotes/bibliographicalNote'
|
27
27
|
end
|
28
28
|
|
29
29
|
# @return [String, nil]
|
30
30
|
def category
|
31
|
-
xpath_query_for_single_value '/category'
|
31
|
+
xpath_query_for_single_value '/categories/category'
|
32
32
|
end
|
33
33
|
|
34
34
|
# Digital Object Identifier (first one, if many)
|
@@ -73,7 +73,7 @@ module Puree
|
|
73
73
|
|
74
74
|
# @return [String, nil]
|
75
75
|
def language
|
76
|
-
xpath_query_for_single_value '/language'
|
76
|
+
xpath_query_for_single_value '/languages/language'
|
77
77
|
end
|
78
78
|
|
79
79
|
# @return [Array<String>, nil]
|
@@ -83,7 +83,7 @@ module Puree
|
|
83
83
|
|
84
84
|
# @return [String, nil]
|
85
85
|
def open_access_permission
|
86
|
-
xpath_query_for_single_value '/openAccessPermission'
|
86
|
+
xpath_query_for_single_value '/openAccessPermissions/openAccessPermission'
|
87
87
|
end
|
88
88
|
|
89
89
|
# @return [Array<Puree::Model::EndeavourPerson>]
|
@@ -107,7 +107,7 @@ module Puree
|
|
107
107
|
data = []
|
108
108
|
xpath_result.each do |i|
|
109
109
|
s = Puree::Model::PublicationStatus.new
|
110
|
-
s.stage = i.xpath('publicationStatus').text.strip
|
110
|
+
s.stage = i.xpath('publicationStatuses/publicationStatus').text.strip
|
111
111
|
|
112
112
|
ymd = {}
|
113
113
|
ymd['year'] = i.xpath('publicationDate/year').text.strip
|
@@ -156,12 +156,12 @@ module Puree
|
|
156
156
|
|
157
157
|
# @return [String, nil]
|
158
158
|
def translated_subtitle
|
159
|
-
xpath_query_for_single_value '/translatedSubTitle'
|
159
|
+
xpath_query_for_single_value '/translatedSubTitles/translatedSubTitle'
|
160
160
|
end
|
161
161
|
|
162
162
|
# @return [String, nil]
|
163
163
|
def translated_title
|
164
|
-
xpath_query_for_single_value '/translatedTitle'
|
164
|
+
xpath_query_for_single_value '/translatedTitles/translatedTitle'
|
165
165
|
end
|
166
166
|
|
167
167
|
private
|
@@ -10,8 +10,10 @@ module Puree
|
|
10
10
|
def self.external_organisation_header(nokogiri_xml_element)
|
11
11
|
h = Puree::Model::ExternalOrganisationHeader.new
|
12
12
|
h.uuid = nokogiri_xml_element.xpath('@uuid').text.strip
|
13
|
-
|
14
|
-
h.
|
13
|
+
xpath_result_name = nokogiri_xml_element.xpath('names/name')
|
14
|
+
h.name = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
15
|
+
xpath_result_type = nokogiri_xml_element.xpath('types/type')
|
16
|
+
h.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
15
17
|
h.data? ? h : nil
|
16
18
|
end
|
17
19
|
|
@@ -29,8 +31,10 @@ module Puree
|
|
29
31
|
def self.organisation_header(nokogiri_xml_element)
|
30
32
|
h = Puree::Model::OrganisationalUnitHeader.new
|
31
33
|
h.uuid = nokogiri_xml_element.xpath('@uuid').text.strip
|
32
|
-
|
33
|
-
h.
|
34
|
+
xpath_result_name = nokogiri_xml_element.xpath('names/name')
|
35
|
+
h.name = xpath_result_name.first.text.strip unless xpath_result_name.empty?
|
36
|
+
xpath_result_type = nokogiri_xml_element.xpath('types/type')
|
37
|
+
h.type = xpath_result_type.first.text.strip unless xpath_result_type.empty?
|
34
38
|
h.data? ? h : nil
|
35
39
|
end
|
36
40
|
|
data/puree.gemspec
CHANGED
@@ -10,8 +10,8 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = 'a.albin-clark@lancaster.ac.uk'
|
11
11
|
spec.summary = %q{Metadata extraction from the Pure Research Information System.}
|
12
12
|
spec.metadata = {
|
13
|
-
'source_code_uri' =>
|
14
|
-
"documentation_uri" => "https://www.rubydoc.info/gems
|
13
|
+
'source_code_uri' => "https://github.com/lulibrary/#{spec.name}",
|
14
|
+
"documentation_uri" => "https://www.rubydoc.info/gems/#{spec.name}/#{spec.version}",
|
15
15
|
}
|
16
16
|
spec.license = 'MIT'
|
17
17
|
spec.files = `git ls-files -z`.split("\x0")
|
@@ -51,6 +51,10 @@ class TestXMLExtractorPerson < Minitest::Test
|
|
51
51
|
|
52
52
|
assert_instance_of String, x.orcid
|
53
53
|
refute_empty x.orcid
|
54
|
+
|
55
|
+
assert_instance_of Array, x.other_names
|
56
|
+
assert_instance_of Puree::Model::PersonName, x.other_names.first
|
57
|
+
assert x.other_names.first.data?
|
54
58
|
end
|
55
59
|
|
56
60
|
# def test_scopus_id
|
@@ -80,6 +84,9 @@ class TestXMLExtractorPerson < Minitest::Test
|
|
80
84
|
assert_nil x.name
|
81
85
|
|
82
86
|
assert_nil x.orcid
|
87
|
+
|
88
|
+
assert_instance_of Array, x.other_names
|
89
|
+
assert_empty x.other_names
|
83
90
|
end
|
84
91
|
|
85
92
|
def test_model
|
@@ -289,8 +289,12 @@ class TestXMLExtractorResearchOutput < Minitest::Test
|
|
289
289
|
'<contributionToJournal>
|
290
290
|
<relatedProjects>
|
291
291
|
<relatedProject uuid="fe8aebdf-a926-4e7b-adf1-082425e50330">
|
292
|
-
<
|
293
|
-
|
292
|
+
<names>
|
293
|
+
<name>The Language Bases of Reading Comprehension</name>
|
294
|
+
</names>
|
295
|
+
<types>
|
296
|
+
<type uri="/dk/atira/pure/upmproject/upmprojecttypes/upmproject/research">Research</type>
|
297
|
+
</types>
|
294
298
|
</relatedProject>
|
295
299
|
</relatedProjects>
|
296
300
|
</contributionToJournal>'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: puree
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adrian Albin-Clark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|
@@ -236,7 +236,7 @@ licenses:
|
|
236
236
|
- MIT
|
237
237
|
metadata:
|
238
238
|
source_code_uri: https://github.com/lulibrary/puree
|
239
|
-
documentation_uri: https://www.rubydoc.info/gems/puree/2.
|
239
|
+
documentation_uri: https://www.rubydoc.info/gems/puree/2.8.0
|
240
240
|
post_install_message:
|
241
241
|
rdoc_options: []
|
242
242
|
require_paths:
|