puree 0.20.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -2
  3. data/PITCHME.md +43 -21
  4. data/README.md +72 -18
  5. data/lib/puree.rb +66 -21
  6. data/lib/puree/api/api.rb +9 -0
  7. data/lib/puree/api/authentication.rb +33 -0
  8. data/lib/puree/api/configuration.rb +43 -0
  9. data/lib/puree/api/map.rb +76 -0
  10. data/lib/puree/api/request.rb +116 -0
  11. data/lib/puree/extractor/collection.rb +131 -0
  12. data/lib/puree/extractor/dataset.rb +48 -0
  13. data/lib/puree/extractor/download.rb +71 -0
  14. data/lib/puree/extractor/event.rb +33 -0
  15. data/lib/puree/extractor/extractor.rb +10 -0
  16. data/lib/puree/extractor/journal.rb +29 -0
  17. data/lib/puree/extractor/organisation.rb +34 -0
  18. data/lib/puree/extractor/person.rb +32 -0
  19. data/lib/puree/extractor/project.rb +40 -0
  20. data/lib/puree/extractor/publication.rb +40 -0
  21. data/lib/puree/extractor/publisher.rb +27 -0
  22. data/lib/puree/extractor/resource.rb +69 -0
  23. data/lib/puree/extractor/server.rb +56 -0
  24. data/lib/puree/model/address.rb +50 -0
  25. data/lib/puree/model/copyright_license.rb +26 -0
  26. data/lib/puree/model/dataset.rb +84 -0
  27. data/lib/puree/model/download_header.rb +21 -0
  28. data/lib/puree/model/endeavour_person.rb +34 -0
  29. data/lib/puree/model/event.rb +31 -0
  30. data/lib/puree/model/event_header.rb +26 -0
  31. data/lib/puree/model/file.rb +45 -0
  32. data/lib/puree/model/helper/validation.rb +15 -0
  33. data/lib/puree/model/journal.rb +20 -0
  34. data/lib/puree/model/legal_condition.rb +26 -0
  35. data/lib/puree/model/link.rb +26 -0
  36. data/lib/puree/model/model.rb +7 -0
  37. data/lib/puree/model/organisation.rb +34 -0
  38. data/lib/puree/model/organisation_header.rb +34 -0
  39. data/lib/puree/model/person.rb +28 -0
  40. data/lib/puree/model/person_name.rb +52 -0
  41. data/lib/puree/model/project.rb +49 -0
  42. data/lib/puree/model/publication.rb +53 -0
  43. data/lib/puree/model/publication_status.rb +21 -0
  44. data/lib/puree/model/publisher.rb +13 -0
  45. data/lib/puree/model/related_content_header.rb +34 -0
  46. data/lib/puree/model/resource.rb +42 -0
  47. data/lib/puree/model/server.rb +13 -0
  48. data/lib/puree/model/spatial_point.rb +16 -0
  49. data/lib/puree/model/structure.rb +18 -0
  50. data/lib/puree/model/temporal_range.rb +15 -0
  51. data/lib/puree/util/date.rb +86 -0
  52. data/lib/puree/util/util.rb +8 -0
  53. data/lib/puree/version.rb +1 -1
  54. data/lib/puree/xml_extractor/base.rb +47 -0
  55. data/lib/puree/xml_extractor/collection.rb +40 -0
  56. data/lib/puree/xml_extractor/dataset.rb +305 -0
  57. data/lib/puree/xml_extractor/download.rb +42 -0
  58. data/lib/puree/xml_extractor/event.rb +63 -0
  59. data/lib/puree/xml_extractor/journal.rb +33 -0
  60. data/lib/puree/xml_extractor/organisation.rb +75 -0
  61. data/lib/puree/xml_extractor/person.rb +57 -0
  62. data/lib/puree/xml_extractor/project.rb +135 -0
  63. data/lib/puree/xml_extractor/publication.rb +189 -0
  64. data/lib/puree/xml_extractor/publisher.rb +28 -0
  65. data/lib/puree/xml_extractor/resource.rb +71 -0
  66. data/lib/puree/xml_extractor/server.rb +32 -0
  67. data/lib/puree/xml_extractor/shared.rb +31 -0
  68. data/lib/puree/xml_extractor/xml_extractor.rb +10 -0
  69. data/puree.gemspec +11 -8
  70. data/spec/download_http_spec.rb +31 -0
  71. data/spec/open_api_dataset_http_spec.rb +15 -0
  72. data/spec/resource/collection_all_http_spec.rb +77 -0
  73. data/spec/resource/collection_http_spec.rb +65 -0
  74. data/spec/resource/dataset_http_spec.rb +104 -0
  75. data/spec/resource/event_http_spec.rb +52 -0
  76. data/spec/resource/journal_http_spec.rb +36 -0
  77. data/spec/resource/organisation_http_spec.rb +52 -0
  78. data/spec/resource/person_http_spec.rb +48 -0
  79. data/spec/resource/project_http_spec.rb +76 -0
  80. data/spec/resource/publication_http_spec.rb +78 -0
  81. data/spec/resource/publisher_http_spec.rb +26 -0
  82. data/spec/server_http_spec.rb +26 -0
  83. data/spec/spec_helper.rb +106 -21
  84. metadata +110 -46
  85. data/lib/puree/collection.rb +0 -285
  86. data/lib/puree/configuration.rb +0 -15
  87. data/lib/puree/dataset.rb +0 -483
  88. data/lib/puree/date.rb +0 -63
  89. data/lib/puree/download.rb +0 -189
  90. data/lib/puree/event.rb +0 -133
  91. data/lib/puree/journal.rb +0 -75
  92. data/lib/puree/map.rb +0 -68
  93. data/lib/puree/organisation.rb +0 -177
  94. data/lib/puree/person.rb +0 -136
  95. data/lib/puree/project.rb +0 -231
  96. data/lib/puree/publication.rb +0 -258
  97. data/lib/puree/publisher.rb +0 -64
  98. data/lib/puree/resource.rb +0 -261
  99. data/lib/puree/server.rb +0 -156
  100. data/spec/collection_spec.rb +0 -62
  101. data/spec/dataset_spec.rb +0 -148
  102. data/spec/download_spec.rb +0 -33
  103. data/spec/event_spec.rb +0 -108
  104. data/spec/journal_spec.rb +0 -92
  105. data/spec/organisation_spec.rb +0 -112
  106. data/spec/person_spec.rb +0 -104
  107. data/spec/project_spec.rb +0 -120
  108. data/spec/publication_spec.rb +0 -128
  109. data/spec/publisher_spec.rb +0 -89
  110. data/spec/server_spec.rb +0 -36
@@ -0,0 +1,28 @@
1
+ module Puree
2
+
3
+ module XMLExtractor
4
+
5
+ # Publisher XML extractor.
6
+ #
7
+ class Publisher < Puree::XMLExtractor::Resource
8
+
9
+ def initialize(xml:)
10
+ super
11
+ @resource_type = :publisher
12
+ end
13
+
14
+ # @return [String, nil]
15
+ def name
16
+ xpath_query_for_single_value '/name'
17
+ end
18
+
19
+ # Adds no value as value is Publisher
20
+ # def type
21
+ # xpath_query_for_single_value '/typeClassification/term/localizedString'
22
+ # end
23
+
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,71 @@
1
+ module Puree
2
+
3
+ module XMLExtractor
4
+
5
+ # Resource XML extractor.
6
+ #
7
+ class Resource < Puree::XMLExtractor::Base
8
+
9
+ def initialize(xml:)
10
+ super
11
+ end
12
+
13
+ # content based
14
+ def xpath_query(path)
15
+ path_from_root = service_xpath path
16
+ @doc.xpath path_from_root
17
+ end
18
+
19
+ # Is there any data after get? For a response that provides a count of the results.
20
+ # @return [Boolean]
21
+ def get_data?
22
+ path = service_xpath_count
23
+ xpath_result = @doc.xpath path
24
+ xpath_result.text.strip === '1' ? true : false
25
+ end
26
+
27
+ # @return [Time, nil]
28
+ def created
29
+ Time.parse xpath_query_for_single_value('/created')
30
+ end
31
+
32
+ # @return [Time, nil]
33
+ def modified
34
+ Time.parse xpath_query_for_single_value('/modified')
35
+ end
36
+
37
+ # @return [String, nil]
38
+ def uuid
39
+ xpath_query_for_single_value '/@uuid'
40
+ end
41
+
42
+ # Locale (e.g. en-GB)
43
+ # @return [String, nil]
44
+ def locale
45
+ str = xpath_query_for_single_value '/@locale'
46
+ str.tr('_','-') if str
47
+ end
48
+
49
+ private
50
+
51
+ def service_response_name
52
+ @api_map[:resource_type][@resource_type][:response]
53
+ end
54
+
55
+ def service_xpath_base
56
+ service_response_name + '/result/content'
57
+ end
58
+
59
+ def service_xpath_count
60
+ service_response_name + '/count'
61
+ end
62
+
63
+ def service_xpath(str_to_find)
64
+ service_xpath_base + str_to_find
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+
71
+ end
@@ -0,0 +1,32 @@
1
+ module Puree
2
+
3
+ module XMLExtractor
4
+
5
+ # Server XML extractor.
6
+ #
7
+ class Server < Puree::XMLExtractor::Base
8
+
9
+ def initialize(xml:)
10
+ @resource_type = :server
11
+ super
12
+ end
13
+
14
+ # @return [String]
15
+ def version
16
+ path = "#{service_response_name}/baseVersion"
17
+ @doc.xpath(path).text.strip
18
+ end
19
+
20
+ # Is there any data after get?
21
+ #
22
+ # @return [Boolean]
23
+ def get_data?
24
+ # n.b. arbitrary element existence check
25
+ version.empty? ? false : true
26
+ end
27
+
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,31 @@
1
+ module Puree
2
+
3
+ module XMLExtractor
4
+
5
+ # Shared XML extractor.
6
+ #
7
+ module Shared
8
+
9
+ # @return [Puree::Model::OrganisationHeader]
10
+ def self.organisation_header(nokogiri_xml_element)
11
+ h = Puree::Model::OrganisationHeader.new
12
+ h.uuid = nokogiri_xml_element.xpath('@uuid').text.strip
13
+ h.name = nokogiri_xml_element.xpath('name/localizedString').text.strip
14
+ h.type = nokogiri_xml_element.xpath('typeClassification/term/localizedString').text.strip
15
+ h
16
+ end
17
+
18
+ # @return [Array<Puree::Model::OrganisationHeader>]
19
+ def self.organisation_multi_header(nokogiri_xml_nodeset)
20
+ data = []
21
+ nokogiri_xml_nodeset.each do |i|
22
+ data << organisation_header(i)
23
+ end
24
+ data.uniq { |d| d.uuid }
25
+ end
26
+
27
+ end
28
+
29
+ end
30
+
31
+ end
@@ -0,0 +1,10 @@
1
+ module Puree
2
+
3
+ # An XMLExtractor manages the extraction of metadata from XML into Ruby
4
+ # data structures.
5
+ #
6
+ module XMLExtractor
7
+
8
+ end
9
+
10
+ end
data/puree.gemspec CHANGED
@@ -4,21 +4,24 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'puree/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "puree"
7
+ spec.name = 'puree'
8
8
  spec.version = Puree::VERSION
9
- spec.authors = ["Adrian Albin-Clark"]
10
- spec.email = ["a.albin-clark@lancaster.ac.uk"]
11
- spec.summary = %q{A client for the Pure Research Information System API.}
12
- spec.description = %q{Consumes the Pure Research Information System API and puts the metadata into simple data structures.}
13
- spec.homepage = "https://aalbinclark.gitbooks.io/puree"
14
- spec.license = "MIT"
9
+ spec.authors = 'Adrian Albin-Clark'
10
+ spec.email = 'a.albin-clark@lancaster.ac.uk'
11
+ spec.summary = %q{Metadata extraction from the Pure Research Information System.}
12
+ spec.description = %q{Fetches metadata from the Pure Research Information System and
13
+ extracts it into Ruby data models.}
14
+ spec.homepage = 'https://github.com/lulibrary/puree'
15
+ spec.license = 'MIT'
15
16
  spec.files = `git ls-files -z`.split("\x0")
16
17
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
- spec.require_paths = ["lib"]
19
+ spec.require_paths = ['lib']
19
20
 
20
21
  spec.required_ruby_version = '~> 2.1'
21
22
 
22
23
  spec.add_runtime_dependency 'http', '~> 2.0'
23
24
  spec.add_runtime_dependency 'nokogiri', '~> 1.6'
25
+
26
+ spec.add_development_dependency 'rspec'
24
27
  end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Download' do
4
+
5
+ before(:all) do
6
+
7
+ end
8
+
9
+ it '#new' do
10
+ p = Puree::Extractor::Download.new config
11
+ expect(p).to be_a Puree::Extractor::Download
12
+ end
13
+
14
+ describe 'data retrieval' do
15
+ before(:all) do
16
+ @p = Puree::Extractor::Download.new config
17
+ @metadata = @p.find resource: :dataset,
18
+ limit: 10
19
+ end
20
+
21
+ it '#find' do
22
+ expect(@metadata).to all( be_a Puree::Model::DownloadHeader )
23
+ end
24
+
25
+ it '#find' do
26
+ expect(@metadata).not_to be_empty
27
+ end
28
+
29
+ end
30
+
31
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Open API' do
4
+
5
+ describe 'dataset retrieval' do
6
+ before(:all) do
7
+ resource = :dataset
8
+ request_open resource
9
+ end
10
+
11
+ resource_header
12
+
13
+ end
14
+
15
+ end
@@ -0,0 +1,77 @@
1
+ require 'spec_helper'
2
+
3
+ def new
4
+ @p = Puree::Extractor::Collection.new resource: @resource_type,
5
+ config: config
6
+ end
7
+
8
+ def go resource_type
9
+ @resource_type = resource_type
10
+ new
11
+ fetch
12
+ end
13
+
14
+ def fetch
15
+ count = @p.count
16
+ (0..count-1).each do |i|
17
+ resource = @p.find limit: 1,
18
+ offset: i
19
+ expect(resource[0]).to be_a resource_class
20
+ puts "#{i+1} of #{count} #{@resource_type}s"
21
+ sleep 1
22
+ system 'clear'
23
+ end
24
+ end
25
+
26
+ def resource_class
27
+ str = "Puree::Model::#{@resource_type.to_s.capitalize}"
28
+ Object.const_get(str)
29
+ end
30
+
31
+ describe 'dataset' do
32
+ it 'get all, one at a time' do
33
+ go :dataset
34
+ end
35
+ end
36
+
37
+ describe 'event' do
38
+ it 'get all, one at a time' do
39
+ go :event
40
+ end
41
+ end
42
+
43
+ describe 'journal' do
44
+ it 'get all, one at a time' do
45
+ go :journal
46
+ end
47
+ end
48
+
49
+ describe 'organisation' do
50
+ it 'get all, one at a time' do
51
+ go :organisation
52
+ end
53
+ end
54
+
55
+ describe 'person' do
56
+ it 'get all, one at a time' do
57
+ go :person
58
+ end
59
+ end
60
+
61
+ describe 'project' do
62
+ it 'get all, one at a time' do
63
+ go :project
64
+ end
65
+ end
66
+
67
+ describe 'publication' do
68
+ it 'get all, one at a time' do
69
+ go :publication
70
+ end
71
+ end
72
+
73
+ describe 'publisher' do
74
+ it 'get all, one at a time' do
75
+ go :publisher
76
+ end
77
+ end
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Collection of datasets' do
4
+
5
+ def new
6
+ @p = Puree::Extractor::Collection.new resource: :dataset,
7
+ config: config
8
+ end
9
+
10
+ def setup
11
+ new
12
+ end
13
+
14
+ it '#new' do
15
+ new
16
+ expect(@p).to be_a(Puree::Extractor::Collection)
17
+ end
18
+
19
+ describe 'data retrieval' do
20
+ before(:all) do
21
+ setup
22
+ @metadata = @p.find limit: 5
23
+ end
24
+
25
+ it 'collection' do
26
+ expect(@metadata).to be_a(Array)
27
+ end
28
+
29
+ end
30
+
31
+ describe 'data retrieval instance' do
32
+ before(:all) do
33
+ setup
34
+ @metadata = @p.find limit: 5
35
+ end
36
+
37
+ it 'collection' do
38
+ expect(@metadata).to be_a(Array)
39
+ end
40
+
41
+ it 'collection item' do
42
+ expect(@metadata).to all( be_a Puree::Model::Dataset )
43
+ end
44
+
45
+ end
46
+
47
+ describe 'data retrieval count' do
48
+ before(:all) do
49
+ setup
50
+ @p.find limit: 0
51
+ end
52
+
53
+ it '#count' do
54
+ expect(@p.count).to be_a(Fixnum)
55
+ end
56
+
57
+ end
58
+
59
+ it '#random_resource' do
60
+ setup
61
+ metadata = @p.random_resource
62
+ expect(metadata).to be_a Puree::Model::Dataset
63
+ end
64
+
65
+ end
@@ -0,0 +1,104 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Dataset' do
4
+
5
+ it '#new' do
6
+ p = Puree::Extractor::Dataset.new config
7
+ expect(p).to be_a Puree::Extractor::Dataset
8
+ end
9
+
10
+ before(:all) do
11
+ request :dataset
12
+ end
13
+
14
+ describe 'data retrieval' do
15
+
16
+ resource_header
17
+
18
+ it 'data structure' do
19
+ expect(@p).to be_a Puree::Model::Dataset
20
+ end
21
+
22
+ it '#access' do
23
+ expect(@p.access).to be_a String if @p.access
24
+ end
25
+
26
+ it '#associated' do
27
+ expect(@p.associated).to all( be_a Puree::Model::RelatedContentHeader )
28
+ end
29
+
30
+ it '#available' do
31
+ expect(@p.available).to be_a Time if @p.available
32
+ end
33
+
34
+ it '#description' do
35
+ expect(@p.description).to be_a String if @p.description
36
+ end
37
+
38
+ it '#doi' do
39
+ expect(@p.doi).to be_a String if @p.doi
40
+ end
41
+
42
+ it '#files' do
43
+ expect(@p.files).to all( be_a Puree::Model::File )
44
+ end
45
+
46
+ it '#keywords' do
47
+ expect(@p.keywords).to all( be_a String )
48
+ end
49
+
50
+ it '#legal_conditions' do
51
+ expect(@p.legal_conditions).to all( be_a Puree::Model::LegalCondition )
52
+ end
53
+
54
+ it '#links' do
55
+ expect(@p.links).to all( be_a Puree::Model::Link )
56
+ end
57
+
58
+ it '#persons_internal' do
59
+ expect(@p.persons_internal).to all( be_a Puree::Model::EndeavourPerson )
60
+ end
61
+
62
+ it '#persons_external' do
63
+ expect(@p.persons_external).to all( be_a Puree::Model::EndeavourPerson )
64
+ end
65
+
66
+ it '#persons_other' do
67
+ expect(@p.persons_other).to all( be_a Puree::Model::EndeavourPerson )
68
+ end
69
+
70
+ it '#production' do
71
+ expect(@p.production).to be_a Puree::Model::TemporalRange if @p.production
72
+ end
73
+
74
+ it '#projects' do
75
+ expect(@p.projects).to all( be_a Puree::Model::RelatedContentHeader )
76
+ end
77
+
78
+ it '#publications' do
79
+ expect(@p.publications).to all( be_a Puree::Model::RelatedContentHeader )
80
+ end
81
+
82
+ it '#publisher' do
83
+ expect(@p.publisher).to be_a String if @p.publisher
84
+ end
85
+
86
+ it '#spatial_places' do
87
+ expect(@p.spatial_places).to all( be_a String ) if @p.spatial_places
88
+ end
89
+
90
+ it '#spatial_point' do
91
+ expect(@p.spatial_point).to be_a Puree::Model::SpatialPoint if @p.spatial_point
92
+ end
93
+
94
+ it '#temporal' do
95
+ expect(@p.temporal).to be_a Puree::Model::TemporalRange if @p.temporal
96
+ end
97
+
98
+ it '#title' do
99
+ expect(@p.title).to be_a String if @p.title
100
+ end
101
+
102
+ end
103
+
104
+ end