marketplace_opportunity_scraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,78 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://www.digitalmarketplace.service.gov.uk/digital-outcomes-and-specialists/opportunities?q=&statusOpenClosed=open
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip,deflate,identity
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Mechanize/2.7.6 Ruby/2.6.0p0 (http://github.com/sparklemotion/mechanize/)
16
+ Accept-Charset:
17
+ - ISO-8859-1,utf-8;q=0.7,*;q=0.7
18
+ Accept-Language:
19
+ - en-us,en;q=0.5
20
+ Cookie:
21
+ - dm_session=eyJfZnJlc2giOmZhbHNlLCJfcGVybWFuZW50Ijp0cnVlLCJjc3JmX3Rva2VuIjoiZGQ5ZjY0MDliYTViMjU5MDdjNGY1NWExNjZkMzk5MTU1YmMyMmU2ZiJ9.D1G4Gw.VeWMKksXQnT6I6IViiHZMmZyQ_o
22
+ Host:
23
+ - www.digitalmarketplace.service.gov.uk
24
+ Connection:
25
+ - keep-alive
26
+ Keep-Alive:
27
+ - '300'
28
+ response:
29
+ status:
30
+ code: 200
31
+ message: OK
32
+ headers:
33
+ Content-Type:
34
+ - text/html; charset=utf-8
35
+ Transfer-Encoding:
36
+ - chunked
37
+ Connection:
38
+ - keep-alive
39
+ Content-Encoding:
40
+ - gzip
41
+ Date:
42
+ - Fri, 22 Feb 2019 16:43:08 GMT
43
+ Dm-Request-Id:
44
+ - 6eecab819821edb3
45
+ Server:
46
+ - nginx
47
+ Set-Cookie:
48
+ - dm_session=eyJfZnJlc2giOmZhbHNlLCJfcGVybWFuZW50Ijp0cnVlLCJjc3JmX3Rva2VuIjoiZGQ5ZjY0MDliYTViMjU5MDdjNGY1NWExNjZkMzk5MTU1YmMyMmU2ZiJ9.D1G4HA.lxoGJn-mnfuLCH1nNJa8le0lqwg;
49
+ Expires=Fri, 22-Feb-2019 17:43:08 GMT; Secure; HttpOnly; Path=/
50
+ Strict-Transport-Security:
51
+ - max-age=31536000; includeSubdomains
52
+ Vary:
53
+ - Cookie
54
+ X-B3-Spanid:
55
+ - 6eecab819821edb3
56
+ X-B3-Traceid:
57
+ - 6eecab819821edb3
58
+ X-Content-Type-Options:
59
+ - nosniff
60
+ X-Frame-Options:
61
+ - DENY
62
+ X-Vcap-Request-Id:
63
+ - 5ca8ae7b-61a1-4cca-7918-91493c50cb17
64
+ X-Xss-Protection:
65
+ - 1; mode=block
66
+ X-Cache:
67
+ - Miss from cloudfront
68
+ Via:
69
+ - 1.1 d887f5d446ae9c64e0365e1b394dac21.cloudfront.net (CloudFront)
70
+ X-Amz-Cf-Id:
71
+ - cNY_8sFnyTMT2_Neos0xZ1x00g_LQWvOceY06RdoW0UINFqQVqaDkQ==
72
+ body:
73
+ encoding: ASCII-8BIT
74
+ string: !binary |-
75
+ 
76
+ http_version:
77
+ recorded_at: Fri, 22 Feb 2019 16:43:08 GMT
78
+ recorded_with: VCR 4.0.0
@@ -0,0 +1,76 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://www.digitalmarketplace.service.gov.uk/digital-outcomes-and-specialists/opportunities?q=&statusOpenClosed=open
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip,deflate,identity
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Mechanize/2.7.6 Ruby/2.6.0p0 (http://github.com/sparklemotion/mechanize/)
16
+ Accept-Charset:
17
+ - ISO-8859-1,utf-8;q=0.7,*;q=0.7
18
+ Accept-Language:
19
+ - en-us,en;q=0.5
20
+ Host:
21
+ - www.digitalmarketplace.service.gov.uk
22
+ Connection:
23
+ - keep-alive
24
+ Keep-Alive:
25
+ - '300'
26
+ response:
27
+ status:
28
+ code: 200
29
+ message: OK
30
+ headers:
31
+ Content-Type:
32
+ - text/html; charset=utf-8
33
+ Transfer-Encoding:
34
+ - chunked
35
+ Connection:
36
+ - keep-alive
37
+ Content-Encoding:
38
+ - gzip
39
+ Date:
40
+ - Fri, 22 Feb 2019 16:43:07 GMT
41
+ Dm-Request-Id:
42
+ - 35b0582261fa0cab
43
+ Server:
44
+ - nginx
45
+ Set-Cookie:
46
+ - dm_session=eyJfZnJlc2giOmZhbHNlLCJfcGVybWFuZW50Ijp0cnVlLCJjc3JmX3Rva2VuIjoiZGQ5ZjY0MDliYTViMjU5MDdjNGY1NWExNjZkMzk5MTU1YmMyMmU2ZiJ9.D1G4Gw.VeWMKksXQnT6I6IViiHZMmZyQ_o;
47
+ Expires=Fri, 22-Feb-2019 17:43:07 GMT; Secure; HttpOnly; Path=/
48
+ Strict-Transport-Security:
49
+ - max-age=31536000; includeSubdomains
50
+ Vary:
51
+ - Cookie
52
+ X-B3-Spanid:
53
+ - 35b0582261fa0cab
54
+ X-B3-Traceid:
55
+ - 35b0582261fa0cab
56
+ X-Content-Type-Options:
57
+ - nosniff
58
+ X-Frame-Options:
59
+ - DENY
60
+ X-Vcap-Request-Id:
61
+ - 9db3b66f-2222-4888-6e1c-25a3837e66a9
62
+ X-Xss-Protection:
63
+ - 1; mode=block
64
+ X-Cache:
65
+ - Miss from cloudfront
66
+ Via:
67
+ - 1.1 2297ae65d06c070d63c49ed7259f5fbb.cloudfront.net (CloudFront)
68
+ X-Amz-Cf-Id:
69
+ - Jo4zLZR82phSznxxNxgsjP4ciUyUhDVfm42c32VHfMANHYEI3g0bAg==
70
+ body:
71
+ encoding: ASCII-8BIT
72
+ string: !binary |-
73
+ 
74
+ http_version:
75
+ recorded_at: Fri, 22 Feb 2019 16:43:07 GMT
76
+ recorded_with: VCR 4.0.0
@@ -0,0 +1,76 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://www.digitalmarketplace.service.gov.uk/digital-outcomes-and-specialists/opportunities/9142
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip,deflate,identity
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Mechanize/2.7.6 Ruby/2.6.0p0 (http://github.com/sparklemotion/mechanize/)
16
+ Accept-Charset:
17
+ - ISO-8859-1,utf-8;q=0.7,*;q=0.7
18
+ Accept-Language:
19
+ - en-us,en;q=0.5
20
+ Host:
21
+ - www.digitalmarketplace.service.gov.uk
22
+ Connection:
23
+ - keep-alive
24
+ Keep-Alive:
25
+ - '300'
26
+ response:
27
+ status:
28
+ code: 200
29
+ message: OK
30
+ headers:
31
+ Content-Type:
32
+ - text/html; charset=utf-8
33
+ Transfer-Encoding:
34
+ - chunked
35
+ Connection:
36
+ - keep-alive
37
+ Content-Encoding:
38
+ - gzip
39
+ Date:
40
+ - Sun, 24 Feb 2019 08:09:18 GMT
41
+ Dm-Request-Id:
42
+ - 0d5cdd214268d0c6
43
+ Server:
44
+ - nginx
45
+ Set-Cookie:
46
+ - dm_session=eyJfZnJlc2giOmZhbHNlLCJfcGVybWFuZW50Ijp0cnVlLCJjc3JmX3Rva2VuIjoiNzgyZmRlMDE1NGUwZmY3OWVmOGI1MTIwN2MyOWM4MzE1MjhlZmM5MSJ9.D1Pirg.Y_4n2SGe90SPjWTczxQLRLljXt8;
47
+ Expires=Sun, 24-Feb-2019 09:09:18 GMT; Secure; HttpOnly; Path=/
48
+ Strict-Transport-Security:
49
+ - max-age=31536000; includeSubdomains
50
+ Vary:
51
+ - Cookie
52
+ X-B3-Spanid:
53
+ - 0d5cdd214268d0c6
54
+ X-B3-Traceid:
55
+ - 0d5cdd214268d0c6
56
+ X-Content-Type-Options:
57
+ - nosniff
58
+ X-Frame-Options:
59
+ - DENY
60
+ X-Vcap-Request-Id:
61
+ - 072e2e60-e839-4a61-4627-c705677c106f
62
+ X-Xss-Protection:
63
+ - 1; mode=block
64
+ X-Cache:
65
+ - Miss from cloudfront
66
+ Via:
67
+ - 1.1 838e90f138fb32ccb28a128273765f44.cloudfront.net (CloudFront)
68
+ X-Amz-Cf-Id:
69
+ - EvvA6UQ2d3_ndeab1nDD6N6SuEunokWQfRPS_vU0QOXoaMkcD-VNVA==
70
+ body:
71
+ encoding: ASCII-8BIT
72
+ string: !binary |-
73
+ 
74
+ http_version:
75
+ recorded_at: Sun, 24 Feb 2019 08:09:18 GMT
76
+ recorded_with: VCR 4.0.0
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mechanize'
4
+
5
+ require 'marketplace_opportunity_scraper/opportunity'
6
+
7
+ module MarketplaceOpportunityScraper
8
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarketplaceOpportunityScraper
4
+ class Opportunity
5
+ BASE_URL = 'https://www.digitalmarketplace.service.gov.uk'
6
+ ATTRIBUTES = %i[
7
+ id
8
+ url
9
+ title
10
+ buyer
11
+ location
12
+ published
13
+ question_deadline
14
+ closing
15
+ description
16
+ ].freeze
17
+
18
+ attr_reader *ATTRIBUTES
19
+
20
+ def initialize(attrs)
21
+ ATTRIBUTES.each do |a|
22
+ instance_variable_set("@#{a}", attrs[a])
23
+ end
24
+ @page = attrs[:page]
25
+ end
26
+
27
+ def budget
28
+ text_from_label('Budget range')
29
+ end
30
+
31
+ def skills
32
+ list = find_by_label('Essential skills and experience').search('li')
33
+ list.map { |li| li.text.strip }
34
+ end
35
+
36
+ def self.all
37
+ url = BASE_URL + '/digital-outcomes-and-specialists/opportunities?q=&statusOpenClosed=open'
38
+ page = mechanize.get(url)
39
+ opportunities = page.search('.search-result')
40
+
41
+ opportunities.map { |o| opportunity_from_search_result(o) }
42
+ end
43
+
44
+ def self.find(id)
45
+ opportunity_from_id(id)
46
+ end
47
+
48
+ def self.mechanize
49
+ @@mechanize ||= Mechanize.new
50
+ end
51
+
52
+ private
53
+
54
+ def self.get_date(date)
55
+ Date.parse date.text.split(':').last
56
+ end
57
+
58
+ def self.opportunity_from_id(id)
59
+ url = BASE_URL + '/digital-outcomes-and-specialists/opportunities/' + id.to_s
60
+ page = mechanize.get(url)
61
+
62
+ title = page.at('h1')
63
+
64
+ attrs = {
65
+ page: page,
66
+ id: id,
67
+ title: title.text.strip,
68
+ url: url,
69
+ buyer: page.at('.context').text,
70
+ location: text_from_label(page, 'Location'),
71
+ published: Date.parse(text_from_label(page, 'Published')),
72
+ question_deadline: Date.parse(text_from_label(page, 'Deadline for asking questions')),
73
+ closing: Date.parse(text_from_label(page, 'Closing date for applications')),
74
+ description: text_from_label(page, 'Summary of the work')
75
+ }
76
+
77
+ new(attrs)
78
+ end
79
+
80
+ def self.opportunity_from_search_result(element)
81
+ title = element.at('.search-result-title')
82
+ important_metadata = element.search('ul.search-result-important-metadata li')
83
+ dates = element.search('ul.search-result-metadata')[1].search('li')
84
+ url = BASE_URL + title.at('a').attributes['href'].value
85
+
86
+ attrs = {
87
+ id: url.split('/').last.to_i,
88
+ title: title.text.strip,
89
+ url: url,
90
+ buyer: important_metadata[0].text.strip,
91
+ location: important_metadata[1].text.strip,
92
+ published: get_date(dates[0]),
93
+ question_deadline: get_date(dates[1]),
94
+ closing: get_date(dates[2]),
95
+ description: element.at('.search-result-excerpt').text.strip
96
+ }
97
+
98
+ new(attrs)
99
+ end
100
+
101
+ def self.text_from_label(page, label)
102
+ find_by_label(page, label).text.strip
103
+ end
104
+
105
+ def self.find_by_label(page, label)
106
+ selector = "//td[@class='summary-item-field-first']/span[text()='#{label}']/../../td[@class='summary-item-field']"
107
+ page.search(selector)
108
+ end
109
+
110
+ def find_by_label(label)
111
+ self.class.send(:find_by_label, page, label)
112
+ end
113
+
114
+ def text_from_label(label)
115
+ self.class.send(:text_from_label, page, label)
116
+ end
117
+
118
+ def page
119
+ @page ||= @@mechanize.get(@url)
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MarketplaceOpportunityScraper
4
+ VERSION = '0.0.1'
5
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'marketplace_opportunity_scraper/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'marketplace_opportunity_scraper'
9
+ spec.version = MarketplaceOpportunityScraper::VERSION
10
+ spec.authors = ['Stuart Harrison']
11
+ spec.email = ['stuart@dxw.com']
12
+
13
+ spec.summary = 'A Ruby gem that fetches the latest opportunities from the Gov.uk Digital Marketplace (https://www.digitalmarketplace.service.gov.uk/) '
14
+ spec.homepage = 'https://github.com/dxw/marketplace_opportunity_scraper'
15
+ spec.license = 'MIT'
16
+
17
+ # Specify which files should be added to the gem when it is released.
18
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
20
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ end
22
+ spec.bindir = 'exe'
23
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
+ spec.require_paths = ['lib']
25
+
26
+ spec.add_development_dependency 'bundler', '~> 2.0'
27
+ spec.add_development_dependency 'pry', '~> 0.12.0'
28
+ spec.add_development_dependency 'rake', '~> 10.0'
29
+ spec.add_development_dependency 'rspec', '~> 3.0'
30
+ spec.add_development_dependency 'rubocop', '~> 0.63'
31
+ spec.add_development_dependency 'vcr', '~> 4.0'
32
+ spec.add_development_dependency 'webmock', '~> 3.5'
33
+
34
+ spec.add_dependency 'mechanize', '~> 2.7'
35
+ end
metadata ADDED
@@ -0,0 +1,176 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: marketplace_opportunity_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Stuart Harrison
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-03-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.12.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.12.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.63'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.63'
83
+ - !ruby/object:Gem::Dependency
84
+ name: vcr
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '4.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '4.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: webmock
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.5'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.5'
111
+ - !ruby/object:Gem::Dependency
112
+ name: mechanize
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.7'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '2.7'
125
+ description:
126
+ email:
127
+ - stuart@dxw.com
128
+ executables: []
129
+ extensions: []
130
+ extra_rdoc_files: []
131
+ files:
132
+ - ".circleci/config.yml"
133
+ - ".circleci/setup-rubygems.sh"
134
+ - ".rspec"
135
+ - ".rubocop.yml"
136
+ - ".rubocop_todo.yml"
137
+ - ".ruby-version"
138
+ - CODE_OF_CONDUCT.md
139
+ - Gemfile
140
+ - Gemfile.lock
141
+ - LICENSE.txt
142
+ - README.md
143
+ - Rakefile
144
+ - fixtures/cassettes/MarketplaceOpportunityScraper_Opportunity/_all/gets_data_that_is_not_on_the_homepage.yml
145
+ - fixtures/cassettes/MarketplaceOpportunityScraper_Opportunity/_all/gets_the_correct_opportunity_data.yml
146
+ - fixtures/cassettes/MarketplaceOpportunityScraper_Opportunity/_all/returns_all_open_opportunities.yml
147
+ - fixtures/cassettes/MarketplaceOpportunityScraper_Opportunity/_find/gets_the_correct_opportunity_data.yml
148
+ - lib/marketplace_opportunity_scraper.rb
149
+ - lib/marketplace_opportunity_scraper/opportunity.rb
150
+ - lib/marketplace_opportunity_scraper/version.rb
151
+ - marketplace_opportunity_scraper.gemspec
152
+ homepage: https://github.com/dxw/marketplace_opportunity_scraper
153
+ licenses:
154
+ - MIT
155
+ metadata: {}
156
+ post_install_message:
157
+ rdoc_options: []
158
+ require_paths:
159
+ - lib
160
+ required_ruby_version: !ruby/object:Gem::Requirement
161
+ requirements:
162
+ - - ">="
163
+ - !ruby/object:Gem::Version
164
+ version: '0'
165
+ required_rubygems_version: !ruby/object:Gem::Requirement
166
+ requirements:
167
+ - - ">="
168
+ - !ruby/object:Gem::Version
169
+ version: '0'
170
+ requirements: []
171
+ rubygems_version: 3.0.1
172
+ signing_key:
173
+ specification_version: 4
174
+ summary: A Ruby gem that fetches the latest opportunities from the Gov.uk Digital
175
+ Marketplace (https://www.digitalmarketplace.service.gov.uk/)
176
+ test_files: []