puree 1.9.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -12
- data/Gemfile +1 -1
- data/README.md +106 -75
- data/lib/puree/extractor/conference_paper.rb +6 -14
- data/lib/puree/extractor/dataset.rb +5 -41
- data/lib/puree/extractor/doctoral_thesis.rb +5 -6
- data/lib/puree/extractor/event.rb +6 -14
- data/lib/puree/extractor/external_organisation.rb +5 -8
- data/lib/puree/extractor/extractor.rb +19 -0
- data/lib/puree/extractor/journal.rb +5 -9
- data/lib/puree/extractor/journal_article.rb +6 -15
- data/lib/puree/extractor/masters_thesis.rb +5 -6
- data/lib/puree/extractor/organisational_unit.rb +25 -0
- data/lib/puree/extractor/paper.rb +6 -11
- data/lib/puree/extractor/person.rb +4 -53
- data/lib/puree/extractor/project.rb +5 -28
- data/lib/puree/extractor/publisher.rb +5 -12
- data/lib/puree/extractor/research_output.rb +20 -0
- data/lib/puree/extractor/resource.rb +22 -58
- data/lib/puree/extractor/thesis.rb +6 -17
- data/lib/puree/model/conference_paper.rb +1 -1
- data/lib/puree/model/dataset.rb +8 -25
- data/lib/puree/model/event.rb +0 -9
- data/lib/puree/model/external_organisation.rb +1 -1
- data/lib/puree/model/external_organisation_header.rb +2 -19
- data/lib/puree/model/identifier.rb +26 -0
- data/lib/puree/model/journal.rb +1 -1
- data/lib/puree/model/journal_article.rb +1 -4
- data/lib/puree/model/model.rb +43 -0
- data/lib/puree/model/{organisation_header.rb → organisation_header_base.rb} +2 -2
- data/lib/puree/model/{organisation.rb → organisational_unit.rb} +3 -6
- data/lib/puree/model/organisational_unit_header.rb +9 -0
- data/lib/puree/model/paper.rb +10 -1
- data/lib/puree/model/person.rb +3 -9
- data/lib/puree/model/project.rb +7 -14
- data/lib/puree/model/publication_status.rb +1 -1
- data/lib/puree/model/publisher.rb +3 -0
- data/lib/puree/model/publisher_header.rb +9 -0
- data/lib/puree/model/{publication.rb → research_output.rb} +18 -20
- data/lib/puree/model/research_output_scopus_metric.rb +26 -0
- data/lib/puree/model/resource.rb +19 -11
- data/lib/puree/model/thesis.rb +6 -3
- data/lib/puree/rest/activity.rb +24 -0
- data/lib/puree/rest/application.rb +24 -0
- data/lib/puree/rest/base.rb +131 -0
- data/lib/puree/rest/classification_scheme.rb +24 -0
- data/lib/puree/rest/client.rb +105 -0
- data/lib/puree/rest/curricula_vitae.rb +24 -0
- data/lib/puree/rest/dataset.rb +24 -0
- data/lib/puree/rest/equipment.rb +24 -0
- data/lib/puree/rest/event.rb +24 -0
- data/lib/puree/rest/external_organisation.rb +24 -0
- data/lib/puree/rest/external_person.rb +24 -0
- data/lib/puree/rest/impact.rb +24 -0
- data/lib/puree/rest/journal.rb +24 -0
- data/lib/puree/rest/mixins/active_mixin.rb +14 -0
- data/lib/puree/rest/mixins/activity_mixin.rb +14 -0
- data/lib/puree/rest/mixins/application_mixin.rb +15 -0
- data/lib/puree/rest/mixins/award_mixin.rb +15 -0
- data/lib/puree/rest/mixins/dataset_mixin.rb +15 -0
- data/lib/puree/rest/mixins/former_mixin.rb +14 -0
- data/lib/puree/rest/mixins/impact_mixin.rb +15 -0
- data/lib/puree/rest/mixins/person_mixin.rb +15 -0
- data/lib/puree/rest/mixins/press_media_mixin.rb +15 -0
- data/lib/puree/rest/mixins/prize_mixin.rb +15 -0
- data/lib/puree/rest/mixins/project_mixin.rb +15 -0
- data/lib/puree/rest/mixins/research_output_mixin.rb +15 -0
- data/lib/puree/rest/mixins/student_thesis_mixin.rb +15 -0
- data/lib/puree/rest/organisational_unit.rb +51 -0
- data/lib/puree/rest/person.rb +58 -0
- data/lib/puree/rest/press_media.rb +24 -0
- data/lib/puree/rest/prize.rb +24 -0
- data/lib/puree/rest/project.rb +27 -0
- data/lib/puree/rest/publisher.rb +31 -0
- data/lib/puree/rest/research_output.rb +24 -0
- data/lib/puree/rest/rest.rb +30 -0
- data/lib/puree/util/util.rb +3 -0
- data/lib/puree/version.rb +1 -1
- data/lib/puree/xml_extractor/base.rb +6 -6
- data/lib/puree/xml_extractor/collection.rb +112 -19
- data/lib/puree/xml_extractor/conference_paper.rb +9 -2
- data/lib/puree/xml_extractor/dataset.rb +56 -166
- data/lib/puree/xml_extractor/doctoral_thesis.rb +1 -1
- data/lib/puree/xml_extractor/event.rb +16 -19
- data/lib/puree/xml_extractor/external_organisation.rb +14 -5
- data/lib/puree/xml_extractor/journal.rb +18 -8
- data/lib/puree/xml_extractor/journal_article.rb +24 -11
- data/lib/puree/xml_extractor/masters_thesis.rb +1 -1
- data/lib/puree/xml_extractor/mixins/abstract_mixin.rb +17 -0
- data/lib/puree/xml_extractor/mixins/description_mixin.rb +17 -0
- data/lib/puree/xml_extractor/mixins/{external_organisations_mixin.rb → external_organisation_mixin.rb} +2 -2
- data/lib/puree/xml_extractor/mixins/identifier_mixin.rb +25 -0
- data/lib/puree/xml_extractor/mixins/keyword_mixin.rb +21 -0
- data/lib/puree/xml_extractor/mixins/organisational_unit_mixin.rb +18 -0
- data/lib/puree/xml_extractor/mixins/owner_mixin.rb +18 -0
- data/lib/puree/xml_extractor/mixins/peer_reviewed_mixin.rb +1 -1
- data/lib/puree/xml_extractor/mixins/person_mixin.rb +45 -0
- data/lib/puree/xml_extractor/mixins/publisher_mixin.rb +22 -0
- data/lib/puree/xml_extractor/mixins/{associated_mixin.rb → research_output_mixin.rb} +7 -7
- data/lib/puree/xml_extractor/mixins/title_mixin.rb +17 -0
- data/lib/puree/xml_extractor/mixins/type_mixin.rb +17 -0
- data/lib/puree/xml_extractor/mixins/workflow_mixin.rb +17 -0
- data/lib/puree/xml_extractor/organisational_unit.rb +82 -0
- data/lib/puree/xml_extractor/paper.rb +17 -3
- data/lib/puree/xml_extractor/person.rb +30 -35
- data/lib/puree/xml_extractor/project.rb +39 -75
- data/lib/puree/xml_extractor/publisher.rb +15 -6
- data/lib/puree/xml_extractor/research_output.rb +189 -0
- data/lib/puree/xml_extractor/resource.rb +28 -36
- data/lib/puree/xml_extractor/shared.rb +12 -9
- data/lib/puree/xml_extractor/thesis.rb +29 -15
- data/lib/puree/xml_extractor/xml_extractor.rb +43 -0
- data/lib/puree.rb +5 -114
- data/puree.gemspec +1 -1
- data/test/extractor/resource_test.rb +103 -0
- data/test/rest/base_test.rb +45 -0
- data/test/rest/common_test.rb +44 -0
- data/test/test_extractor_helper.rb +1 -0
- data/test/test_helper.rb +17 -0
- data/test/test_rest_helper.rb +82 -0
- data/test/test_xml_extractor_helper.rb +17 -0
- data/test/xml_extractor/xml_extractor_collection_test.rb +120 -0
- data/test/xml_extractor/xml_extractor_conference_paper_test.rb +69 -0
- data/test/xml_extractor/xml_extractor_dataset_test.rb +156 -0
- data/test/xml_extractor/xml_extractor_event_test.rb +58 -0
- data/test/xml_extractor/xml_extractor_external_organisation_test.rb +49 -0
- data/test/xml_extractor/xml_extractor_journal_article_test.rb +66 -0
- data/test/xml_extractor/xml_extractor_journal_test.rb +53 -0
- data/test/xml_extractor/xml_extractor_organisation_test.rb +80 -0
- data/test/xml_extractor/xml_extractor_person_test.rb +88 -0
- data/test/xml_extractor/xml_extractor_project_test.rb +136 -0
- data/test/xml_extractor/xml_extractor_publisher_test.rb +49 -0
- data/test/xml_extractor/xml_extractor_research_output_test.rb +214 -0
- data/test/xml_extractor/xml_extractor_thesis_test.rb +80 -0
- metadata +105 -68
- data/lib/puree/api/api.rb +0 -9
- data/lib/puree/api/authentication.rb +0 -33
- data/lib/puree/api/configuration.rb +0 -43
- data/lib/puree/api/map.rb +0 -80
- data/lib/puree/api/person_request.rb +0 -64
- data/lib/puree/api/request.rb +0 -119
- data/lib/puree/extractor/collection.rb +0 -131
- data/lib/puree/extractor/download.rb +0 -71
- data/lib/puree/extractor/organisation.rb +0 -34
- data/lib/puree/extractor/paper_base.rb +0 -28
- data/lib/puree/extractor/publication.rb +0 -53
- data/lib/puree/extractor/server.rb +0 -56
- data/lib/puree/model/download_header.rb +0 -21
- data/lib/puree/model/paper_base.rb +0 -19
- data/lib/puree/model/server.rb +0 -13
- data/lib/puree/query/funding.rb +0 -54
- data/lib/puree/query/person.rb +0 -121
- data/lib/puree/query/query.rb +0 -6
- data/lib/puree/xml_extractor/download.rb +0 -42
- data/lib/puree/xml_extractor/mixins/workflow_state_mixin.rb +0 -18
- data/lib/puree/xml_extractor/organisation.rb +0 -75
- data/lib/puree/xml_extractor/paper_base.rb +0 -17
- data/lib/puree/xml_extractor/publication.rb +0 -257
- data/lib/puree/xml_extractor/server.rb +0 -32
- data/spec/download_http_spec.rb +0 -31
- data/spec/open_api_dataset_http_spec.rb +0 -15
- data/spec/query/funding_http_spec.rb +0 -29
- data/spec/query/person_http_spec.rb +0 -52
- data/spec/resource/collection_all_http_spec.rb +0 -77
- data/spec/resource/collection_http_spec.rb +0 -65
- data/spec/resource/dataset_http_spec.rb +0 -112
- data/spec/resource/event_http_spec.rb +0 -52
- data/spec/resource/journal_http_spec.rb +0 -36
- data/spec/resource/organisation_http_spec.rb +0 -52
- data/spec/resource/person_http_spec.rb +0 -60
- data/spec/resource/project_http_spec.rb +0 -89
- data/spec/resource/publication_http_spec.rb +0 -126
- data/spec/resource/publisher_http_spec.rb +0 -26
- data/spec/server_http_spec.rb +0 -26
- data/spec/spec_helper.rb +0 -159
@@ -0,0 +1,189 @@
|
|
1
|
+
module Puree
|
2
|
+
|
3
|
+
module XMLExtractor
|
4
|
+
|
5
|
+
# Research output XML extractor.
|
6
|
+
#
|
7
|
+
class ResearchOutput < Puree::XMLExtractor::Resource
|
8
|
+
include Puree::XMLExtractor::AbstractMixin
|
9
|
+
include Puree::XMLExtractor::KeywordMixin
|
10
|
+
include Puree::XMLExtractor::OrganisationalUnitMixin
|
11
|
+
include Puree::XMLExtractor::OwnerMixin
|
12
|
+
include Puree::XMLExtractor::PersonMixin
|
13
|
+
include Puree::XMLExtractor::ResearchOutputMixin
|
14
|
+
include Puree::XMLExtractor::WorkflowMixin
|
15
|
+
include Puree::XMLExtractor::TitleMixin
|
16
|
+
include Puree::XMLExtractor::TypeMixin
|
17
|
+
|
18
|
+
def initialize(xml)
|
19
|
+
super
|
20
|
+
setup_model :research_output
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [String, nil]
|
24
|
+
def bibliographical_note
|
25
|
+
xpath_query_for_single_value('/bibliographicalNote')
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [String, nil]
|
29
|
+
def category
|
30
|
+
xpath_query_for_single_value '/category'
|
31
|
+
end
|
32
|
+
|
33
|
+
# Digital Object Identifier
|
34
|
+
# @return [String, nil]
|
35
|
+
def doi
|
36
|
+
xpath_query_for_single_value '/electronicVersions/electronicVersion[@type="wsElectronicVersionDoiAssociation"]/doi'
|
37
|
+
end
|
38
|
+
|
39
|
+
# @return [Array<Puree::Model::File>]
|
40
|
+
def files
|
41
|
+
xpath_result = xpath_query '/electronicVersions/electronicVersion[@type="wsElectronicVersionFileAssociation"]'
|
42
|
+
docs = []
|
43
|
+
xpath_result.each do |d|
|
44
|
+
model = Puree::Model::File.new
|
45
|
+
model.name = d.xpath('file/fileName').text.strip
|
46
|
+
model.mime = d.xpath('file/mimeType').text.strip
|
47
|
+
model.size = d.xpath('file/size').text.strip.to_i
|
48
|
+
model.url = d.xpath('file/URL').text.strip
|
49
|
+
# document_license = d.xpath('licenseType')
|
50
|
+
# if !document_license.empty?
|
51
|
+
# license = Puree::Model::CopyrightLicense.new
|
52
|
+
# license.name = document_license.xpath('term/localizedString').text.strip
|
53
|
+
# license.url = document_license.xpath('description/localizedString').text.strip
|
54
|
+
# model.license = license if license.data?
|
55
|
+
# end
|
56
|
+
docs << model
|
57
|
+
end
|
58
|
+
docs.uniq { |d| d.url }
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return [Array<String>]
|
62
|
+
def keywords
|
63
|
+
keyword_group 'keywordContainers'
|
64
|
+
end
|
65
|
+
|
66
|
+
# @return [String, nil]
|
67
|
+
def language
|
68
|
+
xpath_query_for_single_value '/language'
|
69
|
+
end
|
70
|
+
|
71
|
+
# @return [Array<String>, nil]
|
72
|
+
def links
|
73
|
+
xpath_query_for_multi_value '/electronicVersions/electronicVersion[@type="wsElectronicVersionLinkAssociation"]/link'
|
74
|
+
end
|
75
|
+
|
76
|
+
# @return [String, nil]
|
77
|
+
def open_access_permission
|
78
|
+
xpath_query_for_single_value '/openAccessPermission'
|
79
|
+
end
|
80
|
+
|
81
|
+
# @return [Array<Puree::Model::EndeavourPerson>]
|
82
|
+
def persons_internal
|
83
|
+
persons 'internal', '/personAssociations/personAssociation'
|
84
|
+
end
|
85
|
+
|
86
|
+
# @return [Array<Puree::Model::EndeavourPerson>]
|
87
|
+
def persons_external
|
88
|
+
persons 'external', '/personAssociations/personAssociation'
|
89
|
+
end
|
90
|
+
|
91
|
+
# @return [Array<Puree::Model::EndeavourPerson>]
|
92
|
+
def persons_other
|
93
|
+
persons 'other', '/personAssociations/personAssociation'
|
94
|
+
end
|
95
|
+
|
96
|
+
# @return [Array<Puree::Model::PublicationStatus>]
|
97
|
+
def publication_statuses
|
98
|
+
xpath_result = xpath_query '/publicationStatuses/publicationStatus'
|
99
|
+
data = []
|
100
|
+
xpath_result.each do |i|
|
101
|
+
s = Puree::Model::PublicationStatus.new
|
102
|
+
s.stage = i.xpath('publicationStatus').text.strip
|
103
|
+
|
104
|
+
ymd = {}
|
105
|
+
ymd['year'] = i.xpath('publicationDate/year').text.strip
|
106
|
+
ymd['month'] = i.xpath('publicationDate/month').text.strip
|
107
|
+
ymd['day'] = i.xpath('publicationDate/day').text.strip
|
108
|
+
|
109
|
+
s.date = Puree::Util::Date.hash_to_time ymd
|
110
|
+
|
111
|
+
data << s
|
112
|
+
end
|
113
|
+
data.uniq { |d| d.stage }
|
114
|
+
end
|
115
|
+
|
116
|
+
# @return [Fixnum, nil]
|
117
|
+
def scopus_citations_count
|
118
|
+
xpath_result = xpath_query_for_single_value '/totalScopusCitations'
|
119
|
+
xpath_result ? xpath_result.to_i : nil
|
120
|
+
end
|
121
|
+
|
122
|
+
# @return [Array<Puree::Model::ResearchOutputScopusMetric>]
|
123
|
+
def scopus_metrics
|
124
|
+
xpath_result = xpath_query '/scopusMetrics/scopusMetric'
|
125
|
+
data = []
|
126
|
+
xpath_result.each do |i|
|
127
|
+
s = Puree::Model::ResearchOutputScopusMetric.new
|
128
|
+
s.value = i.xpath('value').text.strip
|
129
|
+
s.year = i.xpath('year').text.strip
|
130
|
+
data << s
|
131
|
+
end
|
132
|
+
data
|
133
|
+
end
|
134
|
+
|
135
|
+
# @return [String, nil]
|
136
|
+
def subtitle
|
137
|
+
xpath_query_for_single_value '/subTitle'
|
138
|
+
end
|
139
|
+
|
140
|
+
# @return [String, nil]
|
141
|
+
def translated_subtitle
|
142
|
+
xpath_query_for_single_value '/translatedSubTitle'
|
143
|
+
end
|
144
|
+
|
145
|
+
# @return [String, nil]
|
146
|
+
def translated_title
|
147
|
+
xpath_query_for_single_value '/translatedTitle'
|
148
|
+
end
|
149
|
+
|
150
|
+
private
|
151
|
+
|
152
|
+
def xpath_root
|
153
|
+
'/*'
|
154
|
+
end
|
155
|
+
|
156
|
+
def combine_metadata
|
157
|
+
super
|
158
|
+
@model.bibliographical_note = bibliographical_note
|
159
|
+
@model.category = category
|
160
|
+
@model.description = description
|
161
|
+
@model.doi = doi
|
162
|
+
@model.files = files
|
163
|
+
@model.keywords = keywords
|
164
|
+
@model.language = language
|
165
|
+
@model.links = links
|
166
|
+
@model.open_access_permission = open_access_permission
|
167
|
+
@model.organisations = organisational_units
|
168
|
+
@model.owner = owner
|
169
|
+
@model.persons_internal = persons_internal
|
170
|
+
@model.persons_external = persons_external
|
171
|
+
@model.persons_other = persons_other
|
172
|
+
@model.publication_statuses = publication_statuses
|
173
|
+
@model.research_outputs = research_outputs
|
174
|
+
@model.scopus_citations_count = scopus_citations_count
|
175
|
+
@model.scopus_metrics = scopus_metrics
|
176
|
+
@model.subtitle = subtitle
|
177
|
+
@model.title = title
|
178
|
+
@model.translated_subtitle = translated_subtitle
|
179
|
+
@model.translated_title = translated_title
|
180
|
+
@model.type = type
|
181
|
+
@model.workflow = workflow
|
182
|
+
@model
|
183
|
+
end
|
184
|
+
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
@@ -6,34 +6,33 @@ module Puree
|
|
6
6
|
#
|
7
7
|
class Resource < Puree::XMLExtractor::Base
|
8
8
|
|
9
|
-
def initialize(xml
|
9
|
+
def initialize(xml)
|
10
10
|
super
|
11
11
|
end
|
12
12
|
|
13
|
-
#
|
14
|
-
def
|
15
|
-
|
16
|
-
@doc.xpath path_from_root
|
13
|
+
# @return [Puree::Model::Resource subclass]
|
14
|
+
def model
|
15
|
+
combine_metadata
|
17
16
|
end
|
18
17
|
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
path = service_xpath_count
|
23
|
-
xpath_result = @doc.xpath path
|
24
|
-
xpath_result.text.strip === '1' ? true : false
|
18
|
+
# @return [String, nil]
|
19
|
+
def created_by
|
20
|
+
xpath_query_for_single_value('/info/createdBy')
|
25
21
|
end
|
26
22
|
|
27
23
|
# @return [Time, nil]
|
28
|
-
def
|
29
|
-
|
30
|
-
|
24
|
+
def created_at
|
25
|
+
Time.parse xpath_query_for_single_value('/info/createdDate')
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [String, nil]
|
29
|
+
def modified_by
|
30
|
+
xpath_query_for_single_value('/info/modifiedBy')
|
31
31
|
end
|
32
32
|
|
33
33
|
# @return [Time, nil]
|
34
|
-
def
|
35
|
-
|
36
|
-
Time.parse xpath_result if xpath_result
|
34
|
+
def modified_at
|
35
|
+
Time.parse xpath_query_for_single_value('/info/modifiedDate')
|
37
36
|
end
|
38
37
|
|
39
38
|
# @return [String, nil]
|
@@ -41,29 +40,22 @@ module Puree
|
|
41
40
|
xpath_query_for_single_value '/@uuid'
|
42
41
|
end
|
43
42
|
|
44
|
-
# Locale (e.g. en-GB)
|
45
|
-
# @return [String, nil]
|
46
|
-
def locale
|
47
|
-
str = xpath_query_for_single_value '/@locale'
|
48
|
-
str.tr('_','-') if str
|
49
|
-
end
|
50
|
-
|
51
43
|
private
|
52
44
|
|
53
|
-
def
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
def service_xpath_base
|
58
|
-
service_response_name + '/result/content'
|
59
|
-
end
|
60
|
-
|
61
|
-
def service_xpath_count
|
62
|
-
service_response_name + '/count'
|
45
|
+
def xpath_query(path)
|
46
|
+
path_from_root = File.join xpath_root, path
|
47
|
+
@doc.xpath path_from_root
|
63
48
|
end
|
64
49
|
|
65
|
-
|
66
|
-
|
50
|
+
# All metadata
|
51
|
+
# @return [Hash]
|
52
|
+
def combine_metadata
|
53
|
+
raise 'No model to populate' if !@model
|
54
|
+
@model.uuid = uuid
|
55
|
+
@model.created_by = created_by
|
56
|
+
@model.created_at = created_at
|
57
|
+
@model.modified_by = modified_by
|
58
|
+
@model.modified_at = modified_at
|
67
59
|
end
|
68
60
|
|
69
61
|
end
|
@@ -11,32 +11,35 @@ module Puree
|
|
11
11
|
h = Puree::Model::ExternalOrganisationHeader.new
|
12
12
|
h.uuid = nokogiri_xml_element.xpath('@uuid').text.strip
|
13
13
|
h.name = nokogiri_xml_element.xpath('name').text.strip
|
14
|
-
h
|
14
|
+
h.type = nokogiri_xml_element.xpath('type').text.strip
|
15
|
+
h.data? ? h : nil
|
15
16
|
end
|
16
17
|
|
17
18
|
# @return [Array<Puree::Model::ExternalOrganisationHeader>]
|
18
19
|
def self.external_organisation_multi_header(nokogiri_xml_nodeset)
|
19
20
|
data = []
|
20
21
|
nokogiri_xml_nodeset.each do |i|
|
21
|
-
|
22
|
+
header = external_organisation_header(i)
|
23
|
+
data << header if header
|
22
24
|
end
|
23
25
|
data.uniq { |d| d.uuid }
|
24
26
|
end
|
25
27
|
|
26
|
-
# @return [Puree::Model::
|
28
|
+
# @return [Puree::Model::OrganisationalUnitHeader]
|
27
29
|
def self.organisation_header(nokogiri_xml_element)
|
28
|
-
h = Puree::Model::
|
30
|
+
h = Puree::Model::OrganisationalUnitHeader.new
|
29
31
|
h.uuid = nokogiri_xml_element.xpath('@uuid').text.strip
|
30
|
-
h.name = nokogiri_xml_element.xpath('name
|
31
|
-
h.type = nokogiri_xml_element.xpath('
|
32
|
-
h
|
32
|
+
h.name = nokogiri_xml_element.xpath('name').text.strip
|
33
|
+
h.type = nokogiri_xml_element.xpath('type').text.strip
|
34
|
+
h.data? ? h : nil
|
33
35
|
end
|
34
36
|
|
35
|
-
# @return [Array<Puree::Model::
|
37
|
+
# @return [Array<Puree::Model::OrganisationalUnitHeader>]
|
36
38
|
def self.organisation_multi_header(nokogiri_xml_nodeset)
|
37
39
|
data = []
|
38
40
|
nokogiri_xml_nodeset.each do |i|
|
39
|
-
|
41
|
+
header = organisation_header(i)
|
42
|
+
data << header if header
|
40
43
|
end
|
41
44
|
data.uniq { |d| d.uuid }
|
42
45
|
end
|
@@ -3,42 +3,56 @@ module Puree
|
|
3
3
|
|
4
4
|
# Thesis XML extractor.
|
5
5
|
#
|
6
|
-
class Thesis < Puree::XMLExtractor::
|
6
|
+
class Thesis < Puree::XMLExtractor::ResearchOutput
|
7
7
|
include Puree::XMLExtractor::DoiMixin
|
8
8
|
include Puree::XMLExtractor::PagesMixin
|
9
|
+
include Puree::XMLExtractor::PublisherMixin
|
9
10
|
|
10
|
-
def initialize(xml
|
11
|
+
def initialize(xml)
|
11
12
|
super
|
13
|
+
setup_model :thesis
|
12
14
|
end
|
13
15
|
|
14
16
|
# @return [Time, nil]
|
15
17
|
def award_date
|
16
|
-
xpath_result = xpath_query_for_single_value('/
|
18
|
+
xpath_result = xpath_query_for_single_value('/awardedDate')
|
17
19
|
Time.parse xpath_result if xpath_result
|
18
20
|
end
|
19
21
|
|
20
|
-
# @return [Puree::Model::
|
22
|
+
# @return [Puree::Model::ExternalOrganisationHeader, nil]
|
21
23
|
def awarding_institution
|
22
|
-
xpath_result = xpath_query '/awardingInstitution/
|
23
|
-
Puree::XMLExtractor::Shared.
|
24
|
+
xpath_result = xpath_query '/awardingInstitutions/awardingInstitution/externalOrganisationalUnit'
|
25
|
+
Puree::XMLExtractor::Shared.external_organisation_header xpath_result if xpath_result
|
24
26
|
end
|
25
27
|
|
26
28
|
# @return [String, nil]
|
27
29
|
def qualification
|
28
|
-
|
29
|
-
'/dk/atira/pure/thesis/qualification/mphil' => 'MPhil',
|
30
|
-
'/dk/atira/pure/thesis/qualification/phd' => 'PhD',
|
31
|
-
'/dk/atira/pure/thesis/qualification/masters_by_research' => 'Masters by Research'
|
32
|
-
}
|
33
|
-
xpath_result = xpath_query_for_single_value '/qualification/uri'
|
34
|
-
types[xpath_result]
|
30
|
+
xpath_query_for_single_value '/qualification'
|
35
31
|
end
|
36
32
|
|
37
|
-
# @return [Array<
|
33
|
+
# @return [Array<Puree::Model::ExternalOrganisationHeader>]
|
38
34
|
def sponsors
|
39
|
-
|
35
|
+
xpath_result = xpath_query '/sponsors/sponsor'
|
36
|
+
Puree::XMLExtractor::Shared.external_organisation_multi_header xpath_result if xpath_result
|
40
37
|
end
|
41
38
|
|
39
|
+
private
|
40
|
+
|
41
|
+
def xpath_root
|
42
|
+
'/thesis'
|
43
|
+
end
|
44
|
+
|
45
|
+
def combine_metadata
|
46
|
+
super
|
47
|
+
@model.award_date = award_date
|
48
|
+
@model.awarding_institution = awarding_institution
|
49
|
+
@model.doi = doi
|
50
|
+
@model.pages = pages
|
51
|
+
@model.publisher = publisher
|
52
|
+
@model.qualification = qualification
|
53
|
+
@model.sponsors = sponsors
|
54
|
+
@model
|
55
|
+
end
|
42
56
|
end
|
43
57
|
end
|
44
58
|
end
|
@@ -1,3 +1,46 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
require 'puree/xml_extractor/mixins/abstract_mixin'
|
4
|
+
require 'puree/xml_extractor/mixins/description_mixin'
|
5
|
+
require 'puree/xml_extractor/mixins/doi_mixin'
|
6
|
+
require 'puree/xml_extractor/mixins/event_mixin'
|
7
|
+
require 'puree/xml_extractor/mixins/external_organisation_mixin'
|
8
|
+
require 'puree/xml_extractor/mixins/identifier_mixin'
|
9
|
+
require 'puree/xml_extractor/mixins/keyword_mixin'
|
10
|
+
require 'puree/xml_extractor/mixins/organisational_unit_mixin'
|
11
|
+
require 'puree/xml_extractor/mixins/owner_mixin'
|
12
|
+
require 'puree/xml_extractor/mixins/pages_mixin'
|
13
|
+
require 'puree/xml_extractor/mixins/person_mixin'
|
14
|
+
require 'puree/xml_extractor/mixins/page_range_mixin'
|
15
|
+
require 'puree/xml_extractor/mixins/peer_reviewed_mixin'
|
16
|
+
require 'puree/xml_extractor/mixins/publisher_mixin'
|
17
|
+
require 'puree/xml_extractor/mixins/research_output_mixin'
|
18
|
+
require 'puree/xml_extractor/mixins/workflow_mixin'
|
19
|
+
require 'puree/xml_extractor/mixins/title_mixin'
|
20
|
+
require 'puree/xml_extractor/mixins/type_mixin'
|
21
|
+
|
22
|
+
require 'puree/xml_extractor/shared'
|
23
|
+
require 'puree/xml_extractor/base'
|
24
|
+
require 'puree/xml_extractor/resource'
|
25
|
+
require 'puree/xml_extractor/dataset'
|
26
|
+
require 'puree/xml_extractor/event'
|
27
|
+
require 'puree/xml_extractor/external_organisation'
|
28
|
+
require 'puree/xml_extractor/journal'
|
29
|
+
require 'puree/xml_extractor/organisational_unit'
|
30
|
+
require 'puree/xml_extractor/person'
|
31
|
+
require 'puree/xml_extractor/project'
|
32
|
+
require 'puree/xml_extractor/publisher'
|
33
|
+
|
34
|
+
require 'puree/xml_extractor/research_output'
|
35
|
+
require 'puree/xml_extractor/thesis'
|
36
|
+
require 'puree/xml_extractor/doctoral_thesis'
|
37
|
+
require 'puree/xml_extractor/masters_thesis'
|
38
|
+
require 'puree/xml_extractor/journal_article'
|
39
|
+
require 'puree/xml_extractor/paper'
|
40
|
+
require 'puree/xml_extractor/conference_paper'
|
41
|
+
|
42
|
+
require 'puree/xml_extractor/collection'
|
43
|
+
|
1
44
|
module Puree
|
2
45
|
|
3
46
|
# An XMLExtractor manages the extraction of metadata from XML into Ruby
|
data/lib/puree.rb
CHANGED
@@ -1,119 +1,10 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
|
1
|
+
require 'puree/extractor/extractor'
|
2
|
+
require 'puree/model/model'
|
3
|
+
require 'puree/rest/rest'
|
4
|
+
require 'puree/util/util'
|
5
|
+
require 'puree/xml_extractor/xml_extractor'
|
4
6
|
require 'puree/version'
|
5
7
|
|
6
|
-
require 'puree/util/date'
|
7
|
-
require 'puree/util/string'
|
8
|
-
|
9
|
-
require 'puree/xml_extractor/mixins/associated_mixin'
|
10
|
-
require 'puree/xml_extractor/mixins/doi_mixin'
|
11
|
-
require 'puree/xml_extractor/mixins/event_mixin'
|
12
|
-
require 'puree/xml_extractor/mixins/external_organisations_mixin'
|
13
|
-
require 'puree/xml_extractor/mixins/pages_mixin'
|
14
|
-
require 'puree/xml_extractor/mixins/page_range_mixin'
|
15
|
-
require 'puree/xml_extractor/mixins/peer_reviewed_mixin'
|
16
|
-
require 'puree/xml_extractor/mixins/workflow_state_mixin'
|
17
|
-
|
18
|
-
require 'puree/xml_extractor/shared'
|
19
|
-
require 'puree/xml_extractor/base'
|
20
|
-
require 'puree/xml_extractor/collection'
|
21
|
-
require 'puree/xml_extractor/resource'
|
22
|
-
require 'puree/xml_extractor/dataset'
|
23
|
-
require 'puree/xml_extractor/download'
|
24
|
-
require 'puree/xml_extractor/event'
|
25
|
-
require 'puree/xml_extractor/journal'
|
26
|
-
require 'puree/xml_extractor/organisation'
|
27
|
-
require 'puree/xml_extractor/external_organisation'
|
28
|
-
require 'puree/xml_extractor/person'
|
29
|
-
require 'puree/xml_extractor/project'
|
30
|
-
|
31
|
-
require 'puree/xml_extractor/publication'
|
32
|
-
require 'puree/xml_extractor/thesis'
|
33
|
-
require 'puree/xml_extractor/doctoral_thesis'
|
34
|
-
require 'puree/xml_extractor/masters_thesis'
|
35
|
-
require 'puree/xml_extractor/journal_article'
|
36
|
-
require 'puree/xml_extractor/paper_base'
|
37
|
-
require 'puree/xml_extractor/conference_paper'
|
38
|
-
require 'puree/xml_extractor/paper'
|
39
|
-
|
40
|
-
require 'puree/xml_extractor/publisher'
|
41
|
-
require 'puree/xml_extractor/server'
|
42
|
-
|
43
|
-
require 'puree/api/map'
|
44
|
-
require 'puree/api/request'
|
45
|
-
require 'puree/api/person_request'
|
46
|
-
require 'puree/api/configuration'
|
47
|
-
require 'puree/api/authentication'
|
48
|
-
|
49
|
-
require 'puree/model/helper/validation'
|
50
|
-
require 'puree/model/structure'
|
51
|
-
|
52
|
-
require 'puree/model/resource'
|
53
|
-
require 'puree/model/dataset'
|
54
|
-
require 'puree/model/download_header'
|
55
|
-
require 'puree/model/event'
|
56
|
-
require 'puree/model/event_header'
|
57
|
-
require 'puree/model/external_organisation'
|
58
|
-
require 'puree/model/journal'
|
59
|
-
require 'puree/model/journal_header'
|
60
|
-
require 'puree/model/link'
|
61
|
-
require 'puree/model/organisation'
|
62
|
-
require 'puree/model/person'
|
63
|
-
require 'puree/model/project'
|
64
|
-
|
65
|
-
require 'puree/model/publication'
|
66
|
-
require 'puree/model/thesis'
|
67
|
-
require 'puree/model/doctoral_thesis'
|
68
|
-
require 'puree/model/masters_thesis'
|
69
|
-
require 'puree/model/journal_article'
|
70
|
-
require 'puree/model/paper_base'
|
71
|
-
require 'puree/model/conference_paper'
|
72
|
-
require 'puree/model/paper'
|
73
|
-
|
74
|
-
require 'puree/model/publisher'
|
75
|
-
require 'puree/model/related_content_header'
|
76
|
-
require 'puree/model/spatial_point'
|
77
|
-
|
78
|
-
require 'puree/model/address'
|
79
|
-
require 'puree/model/copyright_license'
|
80
|
-
require 'puree/model/event_header'
|
81
|
-
require 'puree/model/external_organisation_header'
|
82
|
-
require 'puree/model/file'
|
83
|
-
require 'puree/model/legal_condition'
|
84
|
-
require 'puree/model/organisation_header'
|
85
|
-
require 'puree/model/endeavour_person'
|
86
|
-
require 'puree/model/person_name'
|
87
|
-
require 'puree/model/publication_status'
|
88
|
-
require 'puree/model/server'
|
89
|
-
require 'puree/model/temporal_range'
|
90
|
-
|
91
|
-
require 'puree/extractor/resource'
|
92
|
-
require 'puree/extractor/dataset'
|
93
|
-
require 'puree/extractor/event'
|
94
|
-
require 'puree/extractor/external_organisation'
|
95
|
-
require 'puree/extractor/journal'
|
96
|
-
require 'puree/extractor/organisation'
|
97
|
-
require 'puree/extractor/person'
|
98
|
-
require 'puree/extractor/project'
|
99
|
-
|
100
|
-
require 'puree/extractor/publication'
|
101
|
-
require 'puree/extractor/thesis'
|
102
|
-
require 'puree/extractor/doctoral_thesis'
|
103
|
-
require 'puree/extractor/masters_thesis'
|
104
|
-
require 'puree/extractor/journal_article'
|
105
|
-
require 'puree/extractor/paper_base'
|
106
|
-
require 'puree/extractor/conference_paper'
|
107
|
-
require 'puree/extractor/paper'
|
108
|
-
|
109
|
-
require 'puree/extractor/publisher'
|
110
|
-
require 'puree/extractor/collection'
|
111
|
-
require 'puree/extractor/download'
|
112
|
-
require 'puree/extractor/server'
|
113
|
-
|
114
|
-
require 'puree/query/funding'
|
115
|
-
require 'puree/query/person'
|
116
|
-
|
117
8
|
# Metadata extraction from the Pure Research Information System.
|
118
9
|
#
|
119
10
|
module Puree
|
data/puree.gemspec
CHANGED
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class TestResourceFind < Minitest::Test
|
4
|
+
|
5
|
+
def test_conference_paper
|
6
|
+
# A Negative Effect of Evaluation Upon Analogical Problem Solving
|
7
|
+
id = '96e1495e-70a2-4529-9721-33b2cb62df8d'
|
8
|
+
extractor = Puree::Extractor::ConferencePaper.new config
|
9
|
+
model = extractor.find id
|
10
|
+
|
11
|
+
assert_instance_of Puree::Model::ConferencePaper, model
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_dataset
|
15
|
+
# The 2014 Ebola virus disease outbreak in West Africa
|
16
|
+
id = 'b050f4b5-e272-4914-8cac-3bdc1e673c58'
|
17
|
+
extractor = Puree::Extractor::Dataset.new config
|
18
|
+
model = extractor.find id
|
19
|
+
|
20
|
+
assert_instance_of Puree::Model::Dataset, model
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_event
|
24
|
+
# 31st Annual European Meeting on Atmospheric Studies by Optical Methods and 1st International Riometer Workshop
|
25
|
+
id = 'cd2bf302-4629-4f71-9c02-2dfe50a384bf'
|
26
|
+
extractor = Puree::Extractor::Event.new config
|
27
|
+
model = extractor.find id
|
28
|
+
|
29
|
+
assert_instance_of Puree::Model::Event, model
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_external_organisation
|
33
|
+
# STFC
|
34
|
+
id = '2ea6bbc4-c957-4a07-a1e7-604a2d944c20'
|
35
|
+
extractor = Puree::Extractor::ExternalOrganisation.new config
|
36
|
+
model = extractor.find id
|
37
|
+
|
38
|
+
assert_instance_of Puree::Model::ExternalOrganisation, model
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_journal
|
42
|
+
# Chemical Geology
|
43
|
+
id = '95e40a10-1799-4e74-9a70-8b03f27d9acb'
|
44
|
+
extractor = Puree::Extractor::Journal.new config
|
45
|
+
model = extractor.find id
|
46
|
+
|
47
|
+
assert_instance_of Puree::Model::Journal, model
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_journal_article
|
51
|
+
# A theoretical framework for estimation of AUCs in complete and incomplete sampling designs
|
52
|
+
id = 'a7c104d0-e243-463e-a2a4-b4e07bcfde3f'
|
53
|
+
extractor = Puree::Extractor::JournalArticle.new config
|
54
|
+
model = extractor.find id
|
55
|
+
|
56
|
+
assert_instance_of Puree::Model::JournalArticle, model
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_organisation
|
60
|
+
# Faculty of Health and Medicine
|
61
|
+
id = '8a58c4ad-2d5a-463a-841a-38839ff73a63'
|
62
|
+
extractor = Puree::Extractor::OrganisationalUnit.new config
|
63
|
+
model = extractor.find id
|
64
|
+
|
65
|
+
assert_instance_of Puree::Model::OrganisationalUnit, model
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_person
|
69
|
+
# Peter Diggle
|
70
|
+
id = '811d7fc3-047a-40d2-89e6-c85d14a97fb8'
|
71
|
+
extractor = Puree::Extractor::Person.new config
|
72
|
+
model = extractor.find id
|
73
|
+
|
74
|
+
assert_instance_of Puree::Model::Person, model
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_project
|
78
|
+
# The Channel Scheme - Preston
|
79
|
+
id = '2af1fb5c-ac04-40f3-9cb4-073fb92fcf96'
|
80
|
+
extractor = Puree::Extractor::Project.new config
|
81
|
+
model = extractor.find id
|
82
|
+
|
83
|
+
assert_instance_of Puree::Model::Project, model
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_research_output
|
87
|
+
# A theoretical framework for estimation of AUCs in complete and incomplete sampling designs
|
88
|
+
id = 'a7c104d0-e243-463e-a2a4-b4e07bcfde3f'
|
89
|
+
extractor = Puree::Extractor::ResearchOutput.new config
|
90
|
+
model = extractor.find id
|
91
|
+
|
92
|
+
assert_instance_of Puree::Model::ResearchOutput, model
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_thesis
|
96
|
+
# Multimodalita e 'city branding'
|
97
|
+
id = '376173c0-fd7a-4d63-93d3-3f2e58e8dc01'
|
98
|
+
extractor = Puree::Extractor::Thesis.new config
|
99
|
+
model = extractor.find id
|
100
|
+
|
101
|
+
assert_instance_of Puree::Model::Thesis, model
|
102
|
+
end
|
103
|
+
end
|