puree 1.9.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -12
  3. data/Gemfile +1 -1
  4. data/README.md +106 -75
  5. data/lib/puree/extractor/conference_paper.rb +6 -14
  6. data/lib/puree/extractor/dataset.rb +5 -41
  7. data/lib/puree/extractor/doctoral_thesis.rb +5 -6
  8. data/lib/puree/extractor/event.rb +6 -14
  9. data/lib/puree/extractor/external_organisation.rb +5 -8
  10. data/lib/puree/extractor/extractor.rb +19 -0
  11. data/lib/puree/extractor/journal.rb +5 -9
  12. data/lib/puree/extractor/journal_article.rb +6 -15
  13. data/lib/puree/extractor/masters_thesis.rb +5 -6
  14. data/lib/puree/extractor/organisational_unit.rb +25 -0
  15. data/lib/puree/extractor/paper.rb +6 -11
  16. data/lib/puree/extractor/person.rb +4 -53
  17. data/lib/puree/extractor/project.rb +5 -28
  18. data/lib/puree/extractor/publisher.rb +5 -12
  19. data/lib/puree/extractor/research_output.rb +20 -0
  20. data/lib/puree/extractor/resource.rb +22 -58
  21. data/lib/puree/extractor/thesis.rb +6 -17
  22. data/lib/puree/model/conference_paper.rb +1 -1
  23. data/lib/puree/model/dataset.rb +8 -25
  24. data/lib/puree/model/event.rb +0 -9
  25. data/lib/puree/model/external_organisation.rb +1 -1
  26. data/lib/puree/model/external_organisation_header.rb +2 -19
  27. data/lib/puree/model/identifier.rb +26 -0
  28. data/lib/puree/model/journal.rb +1 -1
  29. data/lib/puree/model/journal_article.rb +1 -4
  30. data/lib/puree/model/model.rb +43 -0
  31. data/lib/puree/model/{organisation_header.rb → organisation_header_base.rb} +2 -2
  32. data/lib/puree/model/{organisation.rb → organisational_unit.rb} +3 -6
  33. data/lib/puree/model/organisational_unit_header.rb +9 -0
  34. data/lib/puree/model/paper.rb +10 -1
  35. data/lib/puree/model/person.rb +3 -9
  36. data/lib/puree/model/project.rb +7 -14
  37. data/lib/puree/model/publication_status.rb +1 -1
  38. data/lib/puree/model/publisher.rb +3 -0
  39. data/lib/puree/model/publisher_header.rb +9 -0
  40. data/lib/puree/model/{publication.rb → research_output.rb} +18 -20
  41. data/lib/puree/model/research_output_scopus_metric.rb +26 -0
  42. data/lib/puree/model/resource.rb +19 -11
  43. data/lib/puree/model/thesis.rb +6 -3
  44. data/lib/puree/rest/activity.rb +24 -0
  45. data/lib/puree/rest/application.rb +24 -0
  46. data/lib/puree/rest/base.rb +131 -0
  47. data/lib/puree/rest/classification_scheme.rb +24 -0
  48. data/lib/puree/rest/client.rb +105 -0
  49. data/lib/puree/rest/curricula_vitae.rb +24 -0
  50. data/lib/puree/rest/dataset.rb +24 -0
  51. data/lib/puree/rest/equipment.rb +24 -0
  52. data/lib/puree/rest/event.rb +24 -0
  53. data/lib/puree/rest/external_organisation.rb +24 -0
  54. data/lib/puree/rest/external_person.rb +24 -0
  55. data/lib/puree/rest/impact.rb +24 -0
  56. data/lib/puree/rest/journal.rb +24 -0
  57. data/lib/puree/rest/mixins/active_mixin.rb +14 -0
  58. data/lib/puree/rest/mixins/activity_mixin.rb +14 -0
  59. data/lib/puree/rest/mixins/application_mixin.rb +15 -0
  60. data/lib/puree/rest/mixins/award_mixin.rb +15 -0
  61. data/lib/puree/rest/mixins/dataset_mixin.rb +15 -0
  62. data/lib/puree/rest/mixins/former_mixin.rb +14 -0
  63. data/lib/puree/rest/mixins/impact_mixin.rb +15 -0
  64. data/lib/puree/rest/mixins/person_mixin.rb +15 -0
  65. data/lib/puree/rest/mixins/press_media_mixin.rb +15 -0
  66. data/lib/puree/rest/mixins/prize_mixin.rb +15 -0
  67. data/lib/puree/rest/mixins/project_mixin.rb +15 -0
  68. data/lib/puree/rest/mixins/research_output_mixin.rb +15 -0
  69. data/lib/puree/rest/mixins/student_thesis_mixin.rb +15 -0
  70. data/lib/puree/rest/organisational_unit.rb +51 -0
  71. data/lib/puree/rest/person.rb +58 -0
  72. data/lib/puree/rest/press_media.rb +24 -0
  73. data/lib/puree/rest/prize.rb +24 -0
  74. data/lib/puree/rest/project.rb +27 -0
  75. data/lib/puree/rest/publisher.rb +31 -0
  76. data/lib/puree/rest/research_output.rb +24 -0
  77. data/lib/puree/rest/rest.rb +30 -0
  78. data/lib/puree/util/util.rb +3 -0
  79. data/lib/puree/version.rb +1 -1
  80. data/lib/puree/xml_extractor/base.rb +6 -6
  81. data/lib/puree/xml_extractor/collection.rb +112 -19
  82. data/lib/puree/xml_extractor/conference_paper.rb +9 -2
  83. data/lib/puree/xml_extractor/dataset.rb +56 -166
  84. data/lib/puree/xml_extractor/doctoral_thesis.rb +1 -1
  85. data/lib/puree/xml_extractor/event.rb +16 -19
  86. data/lib/puree/xml_extractor/external_organisation.rb +14 -5
  87. data/lib/puree/xml_extractor/journal.rb +18 -8
  88. data/lib/puree/xml_extractor/journal_article.rb +24 -11
  89. data/lib/puree/xml_extractor/masters_thesis.rb +1 -1
  90. data/lib/puree/xml_extractor/mixins/abstract_mixin.rb +17 -0
  91. data/lib/puree/xml_extractor/mixins/description_mixin.rb +17 -0
  92. data/lib/puree/xml_extractor/mixins/{external_organisations_mixin.rb → external_organisation_mixin.rb} +2 -2
  93. data/lib/puree/xml_extractor/mixins/identifier_mixin.rb +25 -0
  94. data/lib/puree/xml_extractor/mixins/keyword_mixin.rb +21 -0
  95. data/lib/puree/xml_extractor/mixins/organisational_unit_mixin.rb +18 -0
  96. data/lib/puree/xml_extractor/mixins/owner_mixin.rb +18 -0
  97. data/lib/puree/xml_extractor/mixins/peer_reviewed_mixin.rb +1 -1
  98. data/lib/puree/xml_extractor/mixins/person_mixin.rb +45 -0
  99. data/lib/puree/xml_extractor/mixins/publisher_mixin.rb +22 -0
  100. data/lib/puree/xml_extractor/mixins/{associated_mixin.rb → research_output_mixin.rb} +7 -7
  101. data/lib/puree/xml_extractor/mixins/title_mixin.rb +17 -0
  102. data/lib/puree/xml_extractor/mixins/type_mixin.rb +17 -0
  103. data/lib/puree/xml_extractor/mixins/workflow_mixin.rb +17 -0
  104. data/lib/puree/xml_extractor/organisational_unit.rb +82 -0
  105. data/lib/puree/xml_extractor/paper.rb +17 -3
  106. data/lib/puree/xml_extractor/person.rb +30 -35
  107. data/lib/puree/xml_extractor/project.rb +39 -75
  108. data/lib/puree/xml_extractor/publisher.rb +15 -6
  109. data/lib/puree/xml_extractor/research_output.rb +189 -0
  110. data/lib/puree/xml_extractor/resource.rb +28 -36
  111. data/lib/puree/xml_extractor/shared.rb +12 -9
  112. data/lib/puree/xml_extractor/thesis.rb +29 -15
  113. data/lib/puree/xml_extractor/xml_extractor.rb +43 -0
  114. data/lib/puree.rb +5 -114
  115. data/puree.gemspec +1 -1
  116. data/test/extractor/resource_test.rb +103 -0
  117. data/test/rest/base_test.rb +45 -0
  118. data/test/rest/common_test.rb +44 -0
  119. data/test/test_extractor_helper.rb +1 -0
  120. data/test/test_helper.rb +17 -0
  121. data/test/test_rest_helper.rb +82 -0
  122. data/test/test_xml_extractor_helper.rb +17 -0
  123. data/test/xml_extractor/xml_extractor_collection_test.rb +120 -0
  124. data/test/xml_extractor/xml_extractor_conference_paper_test.rb +69 -0
  125. data/test/xml_extractor/xml_extractor_dataset_test.rb +156 -0
  126. data/test/xml_extractor/xml_extractor_event_test.rb +58 -0
  127. data/test/xml_extractor/xml_extractor_external_organisation_test.rb +49 -0
  128. data/test/xml_extractor/xml_extractor_journal_article_test.rb +66 -0
  129. data/test/xml_extractor/xml_extractor_journal_test.rb +53 -0
  130. data/test/xml_extractor/xml_extractor_organisation_test.rb +80 -0
  131. data/test/xml_extractor/xml_extractor_person_test.rb +88 -0
  132. data/test/xml_extractor/xml_extractor_project_test.rb +136 -0
  133. data/test/xml_extractor/xml_extractor_publisher_test.rb +49 -0
  134. data/test/xml_extractor/xml_extractor_research_output_test.rb +214 -0
  135. data/test/xml_extractor/xml_extractor_thesis_test.rb +80 -0
  136. metadata +105 -68
  137. data/lib/puree/api/api.rb +0 -9
  138. data/lib/puree/api/authentication.rb +0 -33
  139. data/lib/puree/api/configuration.rb +0 -43
  140. data/lib/puree/api/map.rb +0 -80
  141. data/lib/puree/api/person_request.rb +0 -64
  142. data/lib/puree/api/request.rb +0 -119
  143. data/lib/puree/extractor/collection.rb +0 -131
  144. data/lib/puree/extractor/download.rb +0 -71
  145. data/lib/puree/extractor/organisation.rb +0 -34
  146. data/lib/puree/extractor/paper_base.rb +0 -28
  147. data/lib/puree/extractor/publication.rb +0 -53
  148. data/lib/puree/extractor/server.rb +0 -56
  149. data/lib/puree/model/download_header.rb +0 -21
  150. data/lib/puree/model/paper_base.rb +0 -19
  151. data/lib/puree/model/server.rb +0 -13
  152. data/lib/puree/query/funding.rb +0 -54
  153. data/lib/puree/query/person.rb +0 -121
  154. data/lib/puree/query/query.rb +0 -6
  155. data/lib/puree/xml_extractor/download.rb +0 -42
  156. data/lib/puree/xml_extractor/mixins/workflow_state_mixin.rb +0 -18
  157. data/lib/puree/xml_extractor/organisation.rb +0 -75
  158. data/lib/puree/xml_extractor/paper_base.rb +0 -17
  159. data/lib/puree/xml_extractor/publication.rb +0 -257
  160. data/lib/puree/xml_extractor/server.rb +0 -32
  161. data/spec/download_http_spec.rb +0 -31
  162. data/spec/open_api_dataset_http_spec.rb +0 -15
  163. data/spec/query/funding_http_spec.rb +0 -29
  164. data/spec/query/person_http_spec.rb +0 -52
  165. data/spec/resource/collection_all_http_spec.rb +0 -77
  166. data/spec/resource/collection_http_spec.rb +0 -65
  167. data/spec/resource/dataset_http_spec.rb +0 -112
  168. data/spec/resource/event_http_spec.rb +0 -52
  169. data/spec/resource/journal_http_spec.rb +0 -36
  170. data/spec/resource/organisation_http_spec.rb +0 -52
  171. data/spec/resource/person_http_spec.rb +0 -60
  172. data/spec/resource/project_http_spec.rb +0 -89
  173. data/spec/resource/publication_http_spec.rb +0 -126
  174. data/spec/resource/publisher_http_spec.rb +0 -26
  175. data/spec/server_http_spec.rb +0 -26
  176. data/spec/spec_helper.rb +0 -159
@@ -0,0 +1,189 @@
1
+ module Puree
2
+
3
+ module XMLExtractor
4
+
5
+ # Research output XML extractor.
6
+ #
7
+ class ResearchOutput < Puree::XMLExtractor::Resource
8
+ include Puree::XMLExtractor::AbstractMixin
9
+ include Puree::XMLExtractor::KeywordMixin
10
+ include Puree::XMLExtractor::OrganisationalUnitMixin
11
+ include Puree::XMLExtractor::OwnerMixin
12
+ include Puree::XMLExtractor::PersonMixin
13
+ include Puree::XMLExtractor::ResearchOutputMixin
14
+ include Puree::XMLExtractor::WorkflowMixin
15
+ include Puree::XMLExtractor::TitleMixin
16
+ include Puree::XMLExtractor::TypeMixin
17
+
18
+ def initialize(xml)
19
+ super
20
+ setup_model :research_output
21
+ end
22
+
23
+ # @return [String, nil]
24
+ def bibliographical_note
25
+ xpath_query_for_single_value('/bibliographicalNote')
26
+ end
27
+
28
+ # @return [String, nil]
29
+ def category
30
+ xpath_query_for_single_value '/category'
31
+ end
32
+
33
+ # Digital Object Identifier
34
+ # @return [String, nil]
35
+ def doi
36
+ xpath_query_for_single_value '/electronicVersions/electronicVersion[@type="wsElectronicVersionDoiAssociation"]/doi'
37
+ end
38
+
39
+ # @return [Array<Puree::Model::File>]
40
+ def files
41
+ xpath_result = xpath_query '/electronicVersions/electronicVersion[@type="wsElectronicVersionFileAssociation"]'
42
+ docs = []
43
+ xpath_result.each do |d|
44
+ model = Puree::Model::File.new
45
+ model.name = d.xpath('file/fileName').text.strip
46
+ model.mime = d.xpath('file/mimeType').text.strip
47
+ model.size = d.xpath('file/size').text.strip.to_i
48
+ model.url = d.xpath('file/URL').text.strip
49
+ # document_license = d.xpath('licenseType')
50
+ # if !document_license.empty?
51
+ # license = Puree::Model::CopyrightLicense.new
52
+ # license.name = document_license.xpath('term/localizedString').text.strip
53
+ # license.url = document_license.xpath('description/localizedString').text.strip
54
+ # model.license = license if license.data?
55
+ # end
56
+ docs << model
57
+ end
58
+ docs.uniq { |d| d.url }
59
+ end
60
+
61
+ # @return [Array<String>]
62
+ def keywords
63
+ keyword_group 'keywordContainers'
64
+ end
65
+
66
+ # @return [String, nil]
67
+ def language
68
+ xpath_query_for_single_value '/language'
69
+ end
70
+
71
+ # @return [Array<String>, nil]
72
+ def links
73
+ xpath_query_for_multi_value '/electronicVersions/electronicVersion[@type="wsElectronicVersionLinkAssociation"]/link'
74
+ end
75
+
76
+ # @return [String, nil]
77
+ def open_access_permission
78
+ xpath_query_for_single_value '/openAccessPermission'
79
+ end
80
+
81
+ # @return [Array<Puree::Model::EndeavourPerson>]
82
+ def persons_internal
83
+ persons 'internal', '/personAssociations/personAssociation'
84
+ end
85
+
86
+ # @return [Array<Puree::Model::EndeavourPerson>]
87
+ def persons_external
88
+ persons 'external', '/personAssociations/personAssociation'
89
+ end
90
+
91
+ # @return [Array<Puree::Model::EndeavourPerson>]
92
+ def persons_other
93
+ persons 'other', '/personAssociations/personAssociation'
94
+ end
95
+
96
+ # @return [Array<Puree::Model::PublicationStatus>]
97
+ def publication_statuses
98
+ xpath_result = xpath_query '/publicationStatuses/publicationStatus'
99
+ data = []
100
+ xpath_result.each do |i|
101
+ s = Puree::Model::PublicationStatus.new
102
+ s.stage = i.xpath('publicationStatus').text.strip
103
+
104
+ ymd = {}
105
+ ymd['year'] = i.xpath('publicationDate/year').text.strip
106
+ ymd['month'] = i.xpath('publicationDate/month').text.strip
107
+ ymd['day'] = i.xpath('publicationDate/day').text.strip
108
+
109
+ s.date = Puree::Util::Date.hash_to_time ymd
110
+
111
+ data << s
112
+ end
113
+ data.uniq { |d| d.stage }
114
+ end
115
+
116
+ # @return [Fixnum, nil]
117
+ def scopus_citations_count
118
+ xpath_result = xpath_query_for_single_value '/totalScopusCitations'
119
+ xpath_result ? xpath_result.to_i : nil
120
+ end
121
+
122
+ # @return [Array<Puree::Model::ResearchOutputScopusMetric>]
123
+ def scopus_metrics
124
+ xpath_result = xpath_query '/scopusMetrics/scopusMetric'
125
+ data = []
126
+ xpath_result.each do |i|
127
+ s = Puree::Model::ResearchOutputScopusMetric.new
128
+ s.value = i.xpath('value').text.strip
129
+ s.year = i.xpath('year').text.strip
130
+ data << s
131
+ end
132
+ data
133
+ end
134
+
135
+ # @return [String, nil]
136
+ def subtitle
137
+ xpath_query_for_single_value '/subTitle'
138
+ end
139
+
140
+ # @return [String, nil]
141
+ def translated_subtitle
142
+ xpath_query_for_single_value '/translatedSubTitle'
143
+ end
144
+
145
+ # @return [String, nil]
146
+ def translated_title
147
+ xpath_query_for_single_value '/translatedTitle'
148
+ end
149
+
150
+ private
151
+
152
+ def xpath_root
153
+ '/*'
154
+ end
155
+
156
+ def combine_metadata
157
+ super
158
+ @model.bibliographical_note = bibliographical_note
159
+ @model.category = category
160
+ @model.description = description
161
+ @model.doi = doi
162
+ @model.files = files
163
+ @model.keywords = keywords
164
+ @model.language = language
165
+ @model.links = links
166
+ @model.open_access_permission = open_access_permission
167
+ @model.organisations = organisational_units
168
+ @model.owner = owner
169
+ @model.persons_internal = persons_internal
170
+ @model.persons_external = persons_external
171
+ @model.persons_other = persons_other
172
+ @model.publication_statuses = publication_statuses
173
+ @model.research_outputs = research_outputs
174
+ @model.scopus_citations_count = scopus_citations_count
175
+ @model.scopus_metrics = scopus_metrics
176
+ @model.subtitle = subtitle
177
+ @model.title = title
178
+ @model.translated_subtitle = translated_subtitle
179
+ @model.translated_title = translated_title
180
+ @model.type = type
181
+ @model.workflow = workflow
182
+ @model
183
+ end
184
+
185
+ end
186
+
187
+ end
188
+
189
+ end
@@ -6,34 +6,33 @@ module Puree
6
6
  #
7
7
  class Resource < Puree::XMLExtractor::Base
8
8
 
9
- def initialize(xml:)
9
+ def initialize(xml)
10
10
  super
11
11
  end
12
12
 
13
- # content based
14
- def xpath_query(path)
15
- path_from_root = service_xpath path
16
- @doc.xpath path_from_root
13
+ # @return [Puree::Model::Resource subclass]
14
+ def model
15
+ combine_metadata
17
16
  end
18
17
 
19
- # Is there any data after get? For a response that provides a count of the results.
20
- # @return [Boolean]
21
- def get_data?
22
- path = service_xpath_count
23
- xpath_result = @doc.xpath path
24
- xpath_result.text.strip === '1' ? true : false
18
+ # @return [String, nil]
19
+ def created_by
20
+ xpath_query_for_single_value('/info/createdBy')
25
21
  end
26
22
 
27
23
  # @return [Time, nil]
28
- def created
29
- xpath_result = xpath_query_for_single_value('/created')
30
- Time.parse xpath_result if xpath_result
24
+ def created_at
25
+ Time.parse xpath_query_for_single_value('/info/createdDate')
26
+ end
27
+
28
+ # @return [String, nil]
29
+ def modified_by
30
+ xpath_query_for_single_value('/info/modifiedBy')
31
31
  end
32
32
 
33
33
  # @return [Time, nil]
34
- def modified
35
- xpath_result = xpath_query_for_single_value('/modified')
36
- Time.parse xpath_result if xpath_result
34
+ def modified_at
35
+ Time.parse xpath_query_for_single_value('/info/modifiedDate')
37
36
  end
38
37
 
39
38
  # @return [String, nil]
@@ -41,29 +40,22 @@ module Puree
41
40
  xpath_query_for_single_value '/@uuid'
42
41
  end
43
42
 
44
- # Locale (e.g. en-GB)
45
- # @return [String, nil]
46
- def locale
47
- str = xpath_query_for_single_value '/@locale'
48
- str.tr('_','-') if str
49
- end
50
-
51
43
  private
52
44
 
53
- def service_response_name
54
- @api_map[:resource_type][@resource_type][:response]
55
- end
56
-
57
- def service_xpath_base
58
- service_response_name + '/result/content'
59
- end
60
-
61
- def service_xpath_count
62
- service_response_name + '/count'
45
+ def xpath_query(path)
46
+ path_from_root = File.join xpath_root, path
47
+ @doc.xpath path_from_root
63
48
  end
64
49
 
65
- def service_xpath(str_to_find)
66
- service_xpath_base + str_to_find
50
+ # All metadata
51
+ # @return [Hash]
52
+ def combine_metadata
53
+ raise 'No model to populate' if !@model
54
+ @model.uuid = uuid
55
+ @model.created_by = created_by
56
+ @model.created_at = created_at
57
+ @model.modified_by = modified_by
58
+ @model.modified_at = modified_at
67
59
  end
68
60
 
69
61
  end
@@ -11,32 +11,35 @@ module Puree
11
11
  h = Puree::Model::ExternalOrganisationHeader.new
12
12
  h.uuid = nokogiri_xml_element.xpath('@uuid').text.strip
13
13
  h.name = nokogiri_xml_element.xpath('name').text.strip
14
- h
14
+ h.type = nokogiri_xml_element.xpath('type').text.strip
15
+ h.data? ? h : nil
15
16
  end
16
17
 
17
18
  # @return [Array<Puree::Model::ExternalOrganisationHeader>]
18
19
  def self.external_organisation_multi_header(nokogiri_xml_nodeset)
19
20
  data = []
20
21
  nokogiri_xml_nodeset.each do |i|
21
- data << external_organisation_header(i)
22
+ header = external_organisation_header(i)
23
+ data << header if header
22
24
  end
23
25
  data.uniq { |d| d.uuid }
24
26
  end
25
27
 
26
- # @return [Puree::Model::OrganisationHeader]
28
+ # @return [Puree::Model::OrganisationalUnitHeader]
27
29
  def self.organisation_header(nokogiri_xml_element)
28
- h = Puree::Model::OrganisationHeader.new
30
+ h = Puree::Model::OrganisationalUnitHeader.new
29
31
  h.uuid = nokogiri_xml_element.xpath('@uuid').text.strip
30
- h.name = nokogiri_xml_element.xpath('name/localizedString').text.strip
31
- h.type = nokogiri_xml_element.xpath('typeClassification/term/localizedString').text.strip
32
- h
32
+ h.name = nokogiri_xml_element.xpath('name').text.strip
33
+ h.type = nokogiri_xml_element.xpath('type').text.strip
34
+ h.data? ? h : nil
33
35
  end
34
36
 
35
- # @return [Array<Puree::Model::OrganisationHeader>]
37
+ # @return [Array<Puree::Model::OrganisationalUnitHeader>]
36
38
  def self.organisation_multi_header(nokogiri_xml_nodeset)
37
39
  data = []
38
40
  nokogiri_xml_nodeset.each do |i|
39
- data << organisation_header(i)
41
+ header = organisation_header(i)
42
+ data << header if header
40
43
  end
41
44
  data.uniq { |d| d.uuid }
42
45
  end
@@ -3,42 +3,56 @@ module Puree
3
3
 
4
4
  # Thesis XML extractor.
5
5
  #
6
- class Thesis < Puree::XMLExtractor::Publication
6
+ class Thesis < Puree::XMLExtractor::ResearchOutput
7
7
  include Puree::XMLExtractor::DoiMixin
8
8
  include Puree::XMLExtractor::PagesMixin
9
+ include Puree::XMLExtractor::PublisherMixin
9
10
 
10
- def initialize(xml:)
11
+ def initialize(xml)
11
12
  super
13
+ setup_model :thesis
12
14
  end
13
15
 
14
16
  # @return [Time, nil]
15
17
  def award_date
16
- xpath_result = xpath_query_for_single_value('/awardDate')
18
+ xpath_result = xpath_query_for_single_value('/awardedDate')
17
19
  Time.parse xpath_result if xpath_result
18
20
  end
19
21
 
20
- # @return [Puree::Model::OrganisationHeader, nil]
22
+ # @return [Puree::Model::ExternalOrganisationHeader, nil]
21
23
  def awarding_institution
22
- xpath_result = xpath_query '/awardingInstitution/internalExternalOrganisationAssociation/organisation'
23
- Puree::XMLExtractor::Shared.organisation_header xpath_result
24
+ xpath_result = xpath_query '/awardingInstitutions/awardingInstitution/externalOrganisationalUnit'
25
+ Puree::XMLExtractor::Shared.external_organisation_header xpath_result if xpath_result
24
26
  end
25
27
 
26
28
  # @return [String, nil]
27
29
  def qualification
28
- types = {
29
- '/dk/atira/pure/thesis/qualification/mphil' => 'MPhil',
30
- '/dk/atira/pure/thesis/qualification/phd' => 'PhD',
31
- '/dk/atira/pure/thesis/qualification/masters_by_research' => 'Masters by Research'
32
- }
33
- xpath_result = xpath_query_for_single_value '/qualification/uri'
34
- types[xpath_result]
30
+ xpath_query_for_single_value '/qualification'
35
31
  end
36
32
 
37
- # @return [Array<String>]
33
+ # @return [Array<Puree::Model::ExternalOrganisationHeader>]
38
34
  def sponsors
39
- xpath_query_for_multi_value '/sponsors/externalOrganisation/name'
35
+ xpath_result = xpath_query '/sponsors/sponsor'
36
+ Puree::XMLExtractor::Shared.external_organisation_multi_header xpath_result if xpath_result
40
37
  end
41
38
 
39
+ private
40
+
41
+ def xpath_root
42
+ '/thesis'
43
+ end
44
+
45
+ def combine_metadata
46
+ super
47
+ @model.award_date = award_date
48
+ @model.awarding_institution = awarding_institution
49
+ @model.doi = doi
50
+ @model.pages = pages
51
+ @model.publisher = publisher
52
+ @model.qualification = qualification
53
+ @model.sponsors = sponsors
54
+ @model
55
+ end
42
56
  end
43
57
  end
44
58
  end
@@ -1,3 +1,46 @@
1
+ require 'nokogiri'
2
+
3
+ require 'puree/xml_extractor/mixins/abstract_mixin'
4
+ require 'puree/xml_extractor/mixins/description_mixin'
5
+ require 'puree/xml_extractor/mixins/doi_mixin'
6
+ require 'puree/xml_extractor/mixins/event_mixin'
7
+ require 'puree/xml_extractor/mixins/external_organisation_mixin'
8
+ require 'puree/xml_extractor/mixins/identifier_mixin'
9
+ require 'puree/xml_extractor/mixins/keyword_mixin'
10
+ require 'puree/xml_extractor/mixins/organisational_unit_mixin'
11
+ require 'puree/xml_extractor/mixins/owner_mixin'
12
+ require 'puree/xml_extractor/mixins/pages_mixin'
13
+ require 'puree/xml_extractor/mixins/person_mixin'
14
+ require 'puree/xml_extractor/mixins/page_range_mixin'
15
+ require 'puree/xml_extractor/mixins/peer_reviewed_mixin'
16
+ require 'puree/xml_extractor/mixins/publisher_mixin'
17
+ require 'puree/xml_extractor/mixins/research_output_mixin'
18
+ require 'puree/xml_extractor/mixins/workflow_mixin'
19
+ require 'puree/xml_extractor/mixins/title_mixin'
20
+ require 'puree/xml_extractor/mixins/type_mixin'
21
+
22
+ require 'puree/xml_extractor/shared'
23
+ require 'puree/xml_extractor/base'
24
+ require 'puree/xml_extractor/resource'
25
+ require 'puree/xml_extractor/dataset'
26
+ require 'puree/xml_extractor/event'
27
+ require 'puree/xml_extractor/external_organisation'
28
+ require 'puree/xml_extractor/journal'
29
+ require 'puree/xml_extractor/organisational_unit'
30
+ require 'puree/xml_extractor/person'
31
+ require 'puree/xml_extractor/project'
32
+ require 'puree/xml_extractor/publisher'
33
+
34
+ require 'puree/xml_extractor/research_output'
35
+ require 'puree/xml_extractor/thesis'
36
+ require 'puree/xml_extractor/doctoral_thesis'
37
+ require 'puree/xml_extractor/masters_thesis'
38
+ require 'puree/xml_extractor/journal_article'
39
+ require 'puree/xml_extractor/paper'
40
+ require 'puree/xml_extractor/conference_paper'
41
+
42
+ require 'puree/xml_extractor/collection'
43
+
1
44
  module Puree
2
45
 
3
46
  # An XMLExtractor manages the extraction of metadata from XML into Ruby
data/lib/puree.rb CHANGED
@@ -1,119 +1,10 @@
1
- require 'http'
2
- require 'nokogiri'
3
-
1
+ require 'puree/extractor/extractor'
2
+ require 'puree/model/model'
3
+ require 'puree/rest/rest'
4
+ require 'puree/util/util'
5
+ require 'puree/xml_extractor/xml_extractor'
4
6
  require 'puree/version'
5
7
 
6
- require 'puree/util/date'
7
- require 'puree/util/string'
8
-
9
- require 'puree/xml_extractor/mixins/associated_mixin'
10
- require 'puree/xml_extractor/mixins/doi_mixin'
11
- require 'puree/xml_extractor/mixins/event_mixin'
12
- require 'puree/xml_extractor/mixins/external_organisations_mixin'
13
- require 'puree/xml_extractor/mixins/pages_mixin'
14
- require 'puree/xml_extractor/mixins/page_range_mixin'
15
- require 'puree/xml_extractor/mixins/peer_reviewed_mixin'
16
- require 'puree/xml_extractor/mixins/workflow_state_mixin'
17
-
18
- require 'puree/xml_extractor/shared'
19
- require 'puree/xml_extractor/base'
20
- require 'puree/xml_extractor/collection'
21
- require 'puree/xml_extractor/resource'
22
- require 'puree/xml_extractor/dataset'
23
- require 'puree/xml_extractor/download'
24
- require 'puree/xml_extractor/event'
25
- require 'puree/xml_extractor/journal'
26
- require 'puree/xml_extractor/organisation'
27
- require 'puree/xml_extractor/external_organisation'
28
- require 'puree/xml_extractor/person'
29
- require 'puree/xml_extractor/project'
30
-
31
- require 'puree/xml_extractor/publication'
32
- require 'puree/xml_extractor/thesis'
33
- require 'puree/xml_extractor/doctoral_thesis'
34
- require 'puree/xml_extractor/masters_thesis'
35
- require 'puree/xml_extractor/journal_article'
36
- require 'puree/xml_extractor/paper_base'
37
- require 'puree/xml_extractor/conference_paper'
38
- require 'puree/xml_extractor/paper'
39
-
40
- require 'puree/xml_extractor/publisher'
41
- require 'puree/xml_extractor/server'
42
-
43
- require 'puree/api/map'
44
- require 'puree/api/request'
45
- require 'puree/api/person_request'
46
- require 'puree/api/configuration'
47
- require 'puree/api/authentication'
48
-
49
- require 'puree/model/helper/validation'
50
- require 'puree/model/structure'
51
-
52
- require 'puree/model/resource'
53
- require 'puree/model/dataset'
54
- require 'puree/model/download_header'
55
- require 'puree/model/event'
56
- require 'puree/model/event_header'
57
- require 'puree/model/external_organisation'
58
- require 'puree/model/journal'
59
- require 'puree/model/journal_header'
60
- require 'puree/model/link'
61
- require 'puree/model/organisation'
62
- require 'puree/model/person'
63
- require 'puree/model/project'
64
-
65
- require 'puree/model/publication'
66
- require 'puree/model/thesis'
67
- require 'puree/model/doctoral_thesis'
68
- require 'puree/model/masters_thesis'
69
- require 'puree/model/journal_article'
70
- require 'puree/model/paper_base'
71
- require 'puree/model/conference_paper'
72
- require 'puree/model/paper'
73
-
74
- require 'puree/model/publisher'
75
- require 'puree/model/related_content_header'
76
- require 'puree/model/spatial_point'
77
-
78
- require 'puree/model/address'
79
- require 'puree/model/copyright_license'
80
- require 'puree/model/event_header'
81
- require 'puree/model/external_organisation_header'
82
- require 'puree/model/file'
83
- require 'puree/model/legal_condition'
84
- require 'puree/model/organisation_header'
85
- require 'puree/model/endeavour_person'
86
- require 'puree/model/person_name'
87
- require 'puree/model/publication_status'
88
- require 'puree/model/server'
89
- require 'puree/model/temporal_range'
90
-
91
- require 'puree/extractor/resource'
92
- require 'puree/extractor/dataset'
93
- require 'puree/extractor/event'
94
- require 'puree/extractor/external_organisation'
95
- require 'puree/extractor/journal'
96
- require 'puree/extractor/organisation'
97
- require 'puree/extractor/person'
98
- require 'puree/extractor/project'
99
-
100
- require 'puree/extractor/publication'
101
- require 'puree/extractor/thesis'
102
- require 'puree/extractor/doctoral_thesis'
103
- require 'puree/extractor/masters_thesis'
104
- require 'puree/extractor/journal_article'
105
- require 'puree/extractor/paper_base'
106
- require 'puree/extractor/conference_paper'
107
- require 'puree/extractor/paper'
108
-
109
- require 'puree/extractor/publisher'
110
- require 'puree/extractor/collection'
111
- require 'puree/extractor/download'
112
- require 'puree/extractor/server'
113
-
114
- require 'puree/query/funding'
115
- require 'puree/query/person'
116
-
117
8
  # Metadata extraction from the Pure Research Information System.
118
9
  #
119
10
  module Puree
data/puree.gemspec CHANGED
@@ -21,5 +21,5 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency 'http', '~> 2.0'
22
22
  spec.add_runtime_dependency 'nokogiri', '~> 1.6'
23
23
 
24
- spec.add_development_dependency 'rspec'
24
+ spec.add_development_dependency 'minitest-reporters', '~> 1.1'
25
25
  end
@@ -0,0 +1,103 @@
1
+ require 'test_helper'
2
+
3
+ class TestResourceFind < Minitest::Test
4
+
5
+ def test_conference_paper
6
+ # A Negative Effect of Evaluation Upon Analogical Problem Solving
7
+ id = '96e1495e-70a2-4529-9721-33b2cb62df8d'
8
+ extractor = Puree::Extractor::ConferencePaper.new config
9
+ model = extractor.find id
10
+
11
+ assert_instance_of Puree::Model::ConferencePaper, model
12
+ end
13
+
14
+ def test_dataset
15
+ # The 2014 Ebola virus disease outbreak in West Africa
16
+ id = 'b050f4b5-e272-4914-8cac-3bdc1e673c58'
17
+ extractor = Puree::Extractor::Dataset.new config
18
+ model = extractor.find id
19
+
20
+ assert_instance_of Puree::Model::Dataset, model
21
+ end
22
+
23
+ def test_event
24
+ # 31st Annual European Meeting on Atmospheric Studies by Optical Methods and 1st International Riometer Workshop
25
+ id = 'cd2bf302-4629-4f71-9c02-2dfe50a384bf'
26
+ extractor = Puree::Extractor::Event.new config
27
+ model = extractor.find id
28
+
29
+ assert_instance_of Puree::Model::Event, model
30
+ end
31
+
32
+ def test_external_organisation
33
+ # STFC
34
+ id = '2ea6bbc4-c957-4a07-a1e7-604a2d944c20'
35
+ extractor = Puree::Extractor::ExternalOrganisation.new config
36
+ model = extractor.find id
37
+
38
+ assert_instance_of Puree::Model::ExternalOrganisation, model
39
+ end
40
+
41
+ def test_journal
42
+ # Chemical Geology
43
+ id = '95e40a10-1799-4e74-9a70-8b03f27d9acb'
44
+ extractor = Puree::Extractor::Journal.new config
45
+ model = extractor.find id
46
+
47
+ assert_instance_of Puree::Model::Journal, model
48
+ end
49
+
50
+ def test_journal_article
51
+ # A theoretical framework for estimation of AUCs in complete and incomplete sampling designs
52
+ id = 'a7c104d0-e243-463e-a2a4-b4e07bcfde3f'
53
+ extractor = Puree::Extractor::JournalArticle.new config
54
+ model = extractor.find id
55
+
56
+ assert_instance_of Puree::Model::JournalArticle, model
57
+ end
58
+
59
+ def test_organisation
60
+ # Faculty of Health and Medicine
61
+ id = '8a58c4ad-2d5a-463a-841a-38839ff73a63'
62
+ extractor = Puree::Extractor::OrganisationalUnit.new config
63
+ model = extractor.find id
64
+
65
+ assert_instance_of Puree::Model::OrganisationalUnit, model
66
+ end
67
+
68
+ def test_person
69
+ # Peter Diggle
70
+ id = '811d7fc3-047a-40d2-89e6-c85d14a97fb8'
71
+ extractor = Puree::Extractor::Person.new config
72
+ model = extractor.find id
73
+
74
+ assert_instance_of Puree::Model::Person, model
75
+ end
76
+
77
+ def test_project
78
+ # The Channel Scheme - Preston
79
+ id = '2af1fb5c-ac04-40f3-9cb4-073fb92fcf96'
80
+ extractor = Puree::Extractor::Project.new config
81
+ model = extractor.find id
82
+
83
+ assert_instance_of Puree::Model::Project, model
84
+ end
85
+
86
+ def test_research_output
87
+ # A theoretical framework for estimation of AUCs in complete and incomplete sampling designs
88
+ id = 'a7c104d0-e243-463e-a2a4-b4e07bcfde3f'
89
+ extractor = Puree::Extractor::ResearchOutput.new config
90
+ model = extractor.find id
91
+
92
+ assert_instance_of Puree::Model::ResearchOutput, model
93
+ end
94
+
95
+ def test_thesis
96
+ # Multimodalita e 'city branding'
97
+ id = '376173c0-fd7a-4d63-93d3-3f2e58e8dc01'
98
+ extractor = Puree::Extractor::Thesis.new config
99
+ model = extractor.find id
100
+
101
+ assert_instance_of Puree::Model::Thesis, model
102
+ end
103
+ end