cricos_scrape 2.0 → 2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/bin/cricos_scrape +40 -0
- data/cricos_scrape.gemspec +7 -9
- data/lib/cricos_scrape/bulk_import_courses.rb +2 -2
- data/lib/cricos_scrape/bulk_import_institutions.rb +2 -2
- data/lib/cricos_scrape/entities/address.rb +4 -0
- data/lib/cricos_scrape/entities/contact.rb +4 -0
- data/lib/cricos_scrape/entities/contact_officer.rb +4 -0
- data/lib/cricos_scrape/entities/course.rb +4 -0
- data/lib/cricos_scrape/entities/institution.rb +4 -0
- data/lib/cricos_scrape/entities/location.rb +4 -0
- data/lib/cricos_scrape/import_contacts.rb +2 -2
- data/lib/cricos_scrape/importer/contact_importer.rb +120 -0
- data/lib/cricos_scrape/importer/course_importer.rb +291 -0
- data/lib/cricos_scrape/importer/institution_importer.rb +279 -0
- data/lib/cricos_scrape/version.rb +1 -1
- data/lib/cricos_scrape.rb +4 -5
- metadata +16 -44
- data/CONTRIBUTING.md +0 -51
- data/Gemfile +0 -2
- data/Gemfile.lock +0 -64
- data/Procfile +0 -3
- data/Rakefile +0 -13
- data/spec/contact_importer_spec.rb +0 -76
- data/spec/course_importer_spec.rb +0 -71
- data/spec/fixtures/contact_details_of_state_act_uri.html +0 -546
- data/spec/fixtures/contact_details_of_state_wa_uri.html +0 -546
- data/spec/fixtures/course_details_with_contact_officers_table_grid.html +0 -467
- data/spec/fixtures/course_details_without_pagination_uri.html +0 -470
- data/spec/fixtures/courses_list_by_location_id_uri.html +0 -174
- data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +0 -406
- data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +0 -358
- data/spec/fixtures/institution_details_with_po_box_postal_address.html +0 -240
- data/spec/fixtures/institution_details_with_trading_name.html +0 -322
- data/spec/fixtures/institution_details_without_locations_details_uri.html +0 -151
- data/spec/fixtures/institution_details_without_pagination_location_uri.html +0 -299
- data/spec/fixtures/not_found_course_details_uri.html +0 -837
- data/spec/fixtures/not_found_institution_details.html +0 -36
- data/spec/institution_importer_spec.rb +0 -138
- data/spec/spec_helper.rb +0 -67
@@ -0,0 +1,279 @@
|
|
1
|
+
require 'cricos_scrape/entities/institution'
|
2
|
+
require 'cricos_scrape/entities/location'
|
3
|
+
require 'cricos_scrape/entities/contact_officer'
|
4
|
+
|
5
|
+
module CricosScrape
|
6
|
+
class InstitutionImporter
|
7
|
+
|
8
|
+
INSTITUTION_URL = 'http://cricos.education.gov.au/Institution/InstitutionDetailsOnePage.aspx'
|
9
|
+
|
10
|
+
def initialize(agent, **params)
|
11
|
+
@agent = agent
|
12
|
+
@provider_id = params.fetch(:provider_id)
|
13
|
+
@page = agent.get(url)
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
return if institution_not_found?
|
18
|
+
|
19
|
+
institution = Institution.new
|
20
|
+
institution.provider_id = provider_id
|
21
|
+
institution.provider_code = find_provider_code
|
22
|
+
institution.trading_name = find_trading_name
|
23
|
+
institution.name = find_name
|
24
|
+
institution.type = find_type
|
25
|
+
institution.total_capacity = find_total_capacity
|
26
|
+
institution.website = find_website
|
27
|
+
institution.postal_address = find_postal_address
|
28
|
+
institution.locations = find_location if location_found?
|
29
|
+
institution.contact_officers = find_contact_officers
|
30
|
+
|
31
|
+
institution
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
attr_reader :agent, :provider_id, :page
|
37
|
+
|
38
|
+
def url
|
39
|
+
"#{INSTITUTION_URL}?ProviderID=#{provider_id}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def find_value_of_field(field)
|
43
|
+
field.nil? ? nil : field.text.strip
|
44
|
+
end
|
45
|
+
|
46
|
+
def find_provider_code
|
47
|
+
field = @page.at('#institutionDetails_lblProviderCode')
|
48
|
+
find_value_of_field(field)
|
49
|
+
end
|
50
|
+
|
51
|
+
def find_trading_name
|
52
|
+
field = @page.at('#institutionDetails_lblInstitutionTradingName')
|
53
|
+
find_value_of_field(field)
|
54
|
+
end
|
55
|
+
|
56
|
+
def find_name
|
57
|
+
field = @page.at('#institutionDetails_lblInstitutionName')
|
58
|
+
find_value_of_field(field)
|
59
|
+
end
|
60
|
+
|
61
|
+
def find_type
|
62
|
+
field = @page.at('#institutionDetails_lblInstitutionType')
|
63
|
+
find_value_of_field(field)
|
64
|
+
end
|
65
|
+
|
66
|
+
def find_total_capacity
|
67
|
+
field = @page.at('#institutionDetails_lblLocationCapacity')
|
68
|
+
|
69
|
+
capacity = find_value_of_field(field)
|
70
|
+
capacity = is_number?(capacity) ? capacity.to_i : nil
|
71
|
+
capacity
|
72
|
+
end
|
73
|
+
|
74
|
+
def is_number?(text)
|
75
|
+
text =~ /\d/
|
76
|
+
end
|
77
|
+
|
78
|
+
def find_website
|
79
|
+
field = @page.at('#institutionDetails_hplInstitutionWebAddress')
|
80
|
+
find_value_of_field(field)
|
81
|
+
end
|
82
|
+
|
83
|
+
def find_postal_address
|
84
|
+
post_address_node = @page.at('#institutionDetails_lblInstitutionPostalAddress')
|
85
|
+
|
86
|
+
if post_address_node
|
87
|
+
address_lines = post_address_node.children.select { |node| node.is_a?(Nokogiri::XML::Text) }.map { |node| find_value_of_field(node) }
|
88
|
+
address_lines.join("\n")
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# there is no record not found page
|
93
|
+
# instead a search page is returned
|
94
|
+
def institution_not_found?
|
95
|
+
@page.body.include?('The Provider ID entered is invalid - please try another.')
|
96
|
+
end
|
97
|
+
|
98
|
+
def location_found?
|
99
|
+
!@page.body.include?('No locations were found for the selected institution.')
|
100
|
+
end
|
101
|
+
|
102
|
+
def find_location
|
103
|
+
locations = []
|
104
|
+
|
105
|
+
if location_results_paginated?
|
106
|
+
for page_number in 1..total_pages
|
107
|
+
jump_to_page(page_number)
|
108
|
+
locations += fetch_locations_from_current_page
|
109
|
+
end
|
110
|
+
else
|
111
|
+
locations += fetch_locations_from_current_page
|
112
|
+
end
|
113
|
+
|
114
|
+
locations
|
115
|
+
end
|
116
|
+
|
117
|
+
def pagination
|
118
|
+
@page.at('#locationList_gridSearchResults .gridPager')
|
119
|
+
end
|
120
|
+
|
121
|
+
def location_results_paginated?
|
122
|
+
!!pagination
|
123
|
+
end
|
124
|
+
|
125
|
+
def total_pages
|
126
|
+
pagination.children[1].text.strip[/^Page [0-9]+ of ([0-9]+).*/, 1].to_i
|
127
|
+
end
|
128
|
+
|
129
|
+
def current_pagination_page
|
130
|
+
pagination.children[1].text.strip[/^Page ([0-9]+) of [0-9]+.*/, 1].to_i
|
131
|
+
end
|
132
|
+
|
133
|
+
def jump_to_page(page_number)
|
134
|
+
return @page if page_number == current_pagination_page
|
135
|
+
|
136
|
+
hidden_form = @page.form_with id: 'Form1'
|
137
|
+
hidden_form['__EVENTTARGET'] = 'locationList$gridSearchResults'
|
138
|
+
hidden_form['__EVENTARGUMENT'] = "Page$#{page_number}"
|
139
|
+
begin
|
140
|
+
@page = hidden_form.submit(nil, {'action' => 'change-location-page'})
|
141
|
+
rescue Mechanize::ResponseCodeError
|
142
|
+
sleep 5
|
143
|
+
jump_to_page(page_number)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def get_location_id(row_index)
|
148
|
+
hidden_form = @page.form_with id: 'Form1'
|
149
|
+
hidden_form['__EVENTTARGET'] = 'locationList$gridSearchResults'
|
150
|
+
hidden_form['__EVENTARGUMENT'] = "click-#{row_index-3}"
|
151
|
+
|
152
|
+
begin
|
153
|
+
course_page = hidden_form.submit(nil, {'action' => 'get-location-id'})
|
154
|
+
rescue Mechanize::ResponseCodeError
|
155
|
+
sleep 5
|
156
|
+
get_location_id(row_index)
|
157
|
+
end
|
158
|
+
|
159
|
+
course_page.uri.to_s[/LocationID=([0-9]+)/, 1]
|
160
|
+
end
|
161
|
+
|
162
|
+
def fetch_locations_from_current_page
|
163
|
+
locations_of_page = []
|
164
|
+
|
165
|
+
# location_list is table contains locations in current page
|
166
|
+
if search_results_node = @page.at('#locationList_gridSearchResults')
|
167
|
+
location_list = search_results_node.children
|
168
|
+
|
169
|
+
excess_row_at_the_end_table = location_results_paginated? ? 3 : 2
|
170
|
+
start_location_row = 3
|
171
|
+
end_location_row = location_list.count - excess_row_at_the_end_table
|
172
|
+
|
173
|
+
for i in start_location_row..end_location_row
|
174
|
+
location_row = location_list[i].children
|
175
|
+
|
176
|
+
location_obj = Location.new
|
177
|
+
location_obj.location_id = get_location_id(i)
|
178
|
+
location_obj.name = find_value_of_field(location_row[1])
|
179
|
+
location_obj.state = find_value_of_field(location_row[2])
|
180
|
+
location_obj.number_of_courses = find_value_of_field(location_row[3])
|
181
|
+
|
182
|
+
locations_of_page << location_obj
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
locations_of_page
|
187
|
+
end
|
188
|
+
|
189
|
+
def find_contact_officers
|
190
|
+
contact_officers = []
|
191
|
+
|
192
|
+
contact_officers_list = @page.search('//div[starts-with(@id, "contactDetails_pnl")]')
|
193
|
+
|
194
|
+
contact_officers_list.each do |contact_officer|
|
195
|
+
@contact_officer_area = contact_officer
|
196
|
+
@contact_officer_table = @contact_officer_area.at('table').children
|
197
|
+
|
198
|
+
if contains_contact_details_grid?
|
199
|
+
contact_officers += find_contact_officer_grid
|
200
|
+
else
|
201
|
+
contact_officers << find_contact_officer
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
contact_officers
|
206
|
+
end
|
207
|
+
|
208
|
+
def find_contact_officer_grid
|
209
|
+
contact_officers = []
|
210
|
+
|
211
|
+
excess_row_at_the_end_table = 2
|
212
|
+
data_row_start = 3
|
213
|
+
data_row_end = @contact_officer_table.count - excess_row_at_the_end_table
|
214
|
+
|
215
|
+
for i in data_row_start..data_row_end
|
216
|
+
contact_row = @contact_officer_table[i].children
|
217
|
+
|
218
|
+
contact = ContactOfficer.new
|
219
|
+
contact.role = find_contact_officer_role
|
220
|
+
contact.name = find_value_of_field(contact_row[1])
|
221
|
+
contact.phone = find_value_of_field(contact_row[2])
|
222
|
+
contact.fax = find_value_of_field(contact_row[3])
|
223
|
+
contact.email = find_value_of_field(contact_row[4])
|
224
|
+
|
225
|
+
contact_officers << contact
|
226
|
+
end
|
227
|
+
|
228
|
+
contact_officers
|
229
|
+
end
|
230
|
+
|
231
|
+
def find_contact_officer
|
232
|
+
contact = ContactOfficer.new
|
233
|
+
contact.role = find_contact_officer_role
|
234
|
+
contact.name = find_contact_officer_name
|
235
|
+
contact.title = find_contact_officer_title
|
236
|
+
contact.phone = find_contact_officer_phone
|
237
|
+
contact.fax = find_contact_officer_fax
|
238
|
+
contact.email = find_contact_officer_email
|
239
|
+
|
240
|
+
contact
|
241
|
+
end
|
242
|
+
|
243
|
+
def find_contact_officer_role
|
244
|
+
row = @contact_officer_area.children
|
245
|
+
find_value_of_field(row[1]).sub(':', '')
|
246
|
+
end
|
247
|
+
|
248
|
+
def find_contact_officer_name
|
249
|
+
row = @contact_officer_table[1].children
|
250
|
+
find_value_of_field(row[3])
|
251
|
+
end
|
252
|
+
|
253
|
+
def find_contact_officer_title
|
254
|
+
row = @contact_officer_table[3].children
|
255
|
+
find_value_of_field(row[3])
|
256
|
+
end
|
257
|
+
|
258
|
+
def find_contact_officer_phone
|
259
|
+
row = @contact_officer_table[5].children
|
260
|
+
find_value_of_field(row[3])
|
261
|
+
end
|
262
|
+
|
263
|
+
def find_contact_officer_fax
|
264
|
+
row = @contact_officer_table[7].children
|
265
|
+
find_value_of_field(row[3])
|
266
|
+
end
|
267
|
+
|
268
|
+
def find_contact_officer_email
|
269
|
+
row = @contact_officer_table[9]
|
270
|
+
find_value_of_field(row.children[3]) unless row.nil?
|
271
|
+
end
|
272
|
+
|
273
|
+
def contains_contact_details_grid?
|
274
|
+
contact_officer_area_css_id = @contact_officer_area.attributes['id'].text
|
275
|
+
@page.search("//*[@id='#{contact_officer_area_css_id}']/div/table[starts-with(@id, 'contactDetails_grid')]").any?
|
276
|
+
end
|
277
|
+
|
278
|
+
end
|
279
|
+
end
|
data/lib/cricos_scrape.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'json'
|
3
3
|
require 'json/add/core'
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
require_relative 'cricos_scrape/import_contacts'
|
4
|
+
require 'cricos_scrape/json_struct'
|
5
|
+
require 'cricos_scrape/bulk_import_institutions'
|
6
|
+
require 'cricos_scrape/bulk_import_courses'
|
7
|
+
require 'cricos_scrape/import_contacts'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cricos_scrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '2.
|
4
|
+
version: '2.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Trung Lê
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-10-
|
12
|
+
date: 2015-10-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -109,42 +109,31 @@ description: Scrape Institutions, Courses, Contacts from CRICOS
|
|
109
109
|
email:
|
110
110
|
- trung.le@ruby-journal.com
|
111
111
|
- ktoanlba@gmail.com
|
112
|
-
executables:
|
112
|
+
executables:
|
113
|
+
- cricos_scrape
|
113
114
|
extensions: []
|
114
115
|
extra_rdoc_files: []
|
115
116
|
files:
|
116
|
-
- CONTRIBUTING.md
|
117
|
-
- Gemfile
|
118
|
-
- Gemfile.lock
|
119
117
|
- LICENSE.md
|
120
|
-
- Procfile
|
121
118
|
- README.md
|
122
|
-
-
|
119
|
+
- bin/cricos_scrape
|
123
120
|
- cricos_scrape.gemspec
|
124
121
|
- lib/cricos_scrape.rb
|
125
122
|
- lib/cricos_scrape/agent.rb
|
126
123
|
- lib/cricos_scrape/bulk_import_courses.rb
|
127
124
|
- lib/cricos_scrape/bulk_import_institutions.rb
|
125
|
+
- lib/cricos_scrape/entities/address.rb
|
126
|
+
- lib/cricos_scrape/entities/contact.rb
|
127
|
+
- lib/cricos_scrape/entities/contact_officer.rb
|
128
|
+
- lib/cricos_scrape/entities/course.rb
|
129
|
+
- lib/cricos_scrape/entities/institution.rb
|
130
|
+
- lib/cricos_scrape/entities/location.rb
|
128
131
|
- lib/cricos_scrape/import_contacts.rb
|
132
|
+
- lib/cricos_scrape/importer/contact_importer.rb
|
133
|
+
- lib/cricos_scrape/importer/course_importer.rb
|
134
|
+
- lib/cricos_scrape/importer/institution_importer.rb
|
129
135
|
- lib/cricos_scrape/json_struct.rb
|
130
136
|
- lib/cricos_scrape/version.rb
|
131
|
-
- spec/contact_importer_spec.rb
|
132
|
-
- spec/course_importer_spec.rb
|
133
|
-
- spec/fixtures/contact_details_of_state_act_uri.html
|
134
|
-
- spec/fixtures/contact_details_of_state_wa_uri.html
|
135
|
-
- spec/fixtures/course_details_with_contact_officers_table_grid.html
|
136
|
-
- spec/fixtures/course_details_without_pagination_uri.html
|
137
|
-
- spec/fixtures/courses_list_by_location_id_uri.html
|
138
|
-
- spec/fixtures/institution_details_with_pagination_location_page_1_uri.html
|
139
|
-
- spec/fixtures/institution_details_with_pagination_location_page_2_uri.html
|
140
|
-
- spec/fixtures/institution_details_with_po_box_postal_address.html
|
141
|
-
- spec/fixtures/institution_details_with_trading_name.html
|
142
|
-
- spec/fixtures/institution_details_without_locations_details_uri.html
|
143
|
-
- spec/fixtures/institution_details_without_pagination_location_uri.html
|
144
|
-
- spec/fixtures/not_found_course_details_uri.html
|
145
|
-
- spec/fixtures/not_found_institution_details.html
|
146
|
-
- spec/institution_importer_spec.rb
|
147
|
-
- spec/spec_helper.rb
|
148
137
|
homepage: https://github.com/ruby-journal/cricos_scrape.rb
|
149
138
|
licenses:
|
150
139
|
- MIT
|
@@ -157,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
157
146
|
requirements:
|
158
147
|
- - ">="
|
159
148
|
- !ruby/object:Gem::Version
|
160
|
-
version: 2.
|
149
|
+
version: 2.0.0
|
161
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
162
151
|
requirements:
|
163
152
|
- - ">="
|
@@ -169,22 +158,5 @@ rubygems_version: 2.4.5.1
|
|
169
158
|
signing_key:
|
170
159
|
specification_version: 4
|
171
160
|
summary: CRICOS Scrape
|
172
|
-
test_files:
|
173
|
-
- spec/contact_importer_spec.rb
|
174
|
-
- spec/course_importer_spec.rb
|
175
|
-
- spec/fixtures/contact_details_of_state_act_uri.html
|
176
|
-
- spec/fixtures/contact_details_of_state_wa_uri.html
|
177
|
-
- spec/fixtures/course_details_with_contact_officers_table_grid.html
|
178
|
-
- spec/fixtures/course_details_without_pagination_uri.html
|
179
|
-
- spec/fixtures/courses_list_by_location_id_uri.html
|
180
|
-
- spec/fixtures/institution_details_with_pagination_location_page_1_uri.html
|
181
|
-
- spec/fixtures/institution_details_with_pagination_location_page_2_uri.html
|
182
|
-
- spec/fixtures/institution_details_with_po_box_postal_address.html
|
183
|
-
- spec/fixtures/institution_details_with_trading_name.html
|
184
|
-
- spec/fixtures/institution_details_without_locations_details_uri.html
|
185
|
-
- spec/fixtures/institution_details_without_pagination_location_uri.html
|
186
|
-
- spec/fixtures/not_found_course_details_uri.html
|
187
|
-
- spec/fixtures/not_found_institution_details.html
|
188
|
-
- spec/institution_importer_spec.rb
|
189
|
-
- spec/spec_helper.rb
|
161
|
+
test_files: []
|
190
162
|
has_rdoc:
|
data/CONTRIBUTING.md
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
Lotus is an open source project and we would love you to help us make it better.
|
2
|
-
|
3
|
-
## Reporting Issues
|
4
|
-
|
5
|
-
A well formatted issue is appreciated, and goes a long way in helping us help you.
|
6
|
-
|
7
|
-
* Make sure you have a [GitHub account](https://github.com/signup/free)
|
8
|
-
* Submit a [Github issue](./issues) by:
|
9
|
-
* Clearly describing the issue
|
10
|
-
* Provide a descriptive summary
|
11
|
-
* Explain the expected behavior
|
12
|
-
* Explain the actual behavior
|
13
|
-
* Provide steps to reproduce the actual behavior
|
14
|
-
* Provide your application's complete `Gemfile.lock` as text (in a [Gist](https://gist.github.com) for bonus points)
|
15
|
-
* Any relevant stack traces
|
16
|
-
|
17
|
-
If you provide code, make sure it is formatted with the triple backticks (\`).
|
18
|
-
|
19
|
-
At this point, we'd love to tell you how long it will take for us to respond,
|
20
|
-
but we just don't know.
|
21
|
-
|
22
|
-
## Pull requests
|
23
|
-
|
24
|
-
We accept pull requests to Lotus for:
|
25
|
-
|
26
|
-
* Adding documentation
|
27
|
-
* Fixing bugs
|
28
|
-
* Adding new features
|
29
|
-
|
30
|
-
Not all features proposed will be added but we are open to having a conversation
|
31
|
-
about a feature you are championing.
|
32
|
-
|
33
|
-
Here's a quick guide:
|
34
|
-
|
35
|
-
1. Fork the repo.
|
36
|
-
|
37
|
-
2. Run the tests. This is to make sure your starting point works. Tests can be
|
38
|
-
run via `rake`
|
39
|
-
|
40
|
-
3. Create a new branch and make your changes. This includes tests for features!
|
41
|
-
|
42
|
-
4. Push to your fork and submit a pull request. For more information, see
|
43
|
-
[Github's pull request help section](https://help.github.com/articles/using-pull-requests/).
|
44
|
-
|
45
|
-
At this point you're waiting on us. Expect a conversation regarding your pull
|
46
|
-
request; Questions, clarifications, and so on.
|
47
|
-
|
48
|
-
Some things that will increase the chance that your pull request is accepted:
|
49
|
-
|
50
|
-
* Include tests that fail without your code, and pass with it
|
51
|
-
* Update the documentation
|
data/Gemfile
DELETED
data/Gemfile.lock
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
PATH
|
2
|
-
remote: .
|
3
|
-
specs:
|
4
|
-
cricos_scrape (2.0)
|
5
|
-
mechanize (~> 2.7, >= 2.7.2)
|
6
|
-
slop (~> 4.2.0, >= 4.2.0)
|
7
|
-
|
8
|
-
GEM
|
9
|
-
remote: https://rubygems.org/
|
10
|
-
specs:
|
11
|
-
diff-lcs (1.2.5)
|
12
|
-
domain_name (0.5.25)
|
13
|
-
unf (>= 0.0.5, < 1.0.0)
|
14
|
-
http-cookie (1.0.2)
|
15
|
-
domain_name (~> 0.5)
|
16
|
-
mechanize (2.7.3)
|
17
|
-
domain_name (~> 0.5, >= 0.5.1)
|
18
|
-
http-cookie (~> 1.0)
|
19
|
-
mime-types (~> 2.0)
|
20
|
-
net-http-digest_auth (~> 1.1, >= 1.1.1)
|
21
|
-
net-http-persistent (~> 2.5, >= 2.5.2)
|
22
|
-
nokogiri (~> 1.4)
|
23
|
-
ntlm-http (~> 0.1, >= 0.1.1)
|
24
|
-
webrobots (>= 0.0.9, < 0.2)
|
25
|
-
mime-types (2.6.2)
|
26
|
-
mini_portile (0.6.2)
|
27
|
-
net-http-digest_auth (1.4)
|
28
|
-
net-http-persistent (2.9.4)
|
29
|
-
nokogiri (1.6.6.2)
|
30
|
-
mini_portile (~> 0.6.0)
|
31
|
-
ntlm-http (0.1.1)
|
32
|
-
rspec (3.3.0)
|
33
|
-
rspec-core (~> 3.3.0)
|
34
|
-
rspec-expectations (~> 3.3.0)
|
35
|
-
rspec-mocks (~> 3.3.0)
|
36
|
-
rspec-core (3.3.2)
|
37
|
-
rspec-support (~> 3.3.0)
|
38
|
-
rspec-expectations (3.3.1)
|
39
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
40
|
-
rspec-support (~> 3.3.0)
|
41
|
-
rspec-its (1.2.0)
|
42
|
-
rspec-core (>= 3.0.0)
|
43
|
-
rspec-expectations (>= 3.0.0)
|
44
|
-
rspec-mocks (3.3.2)
|
45
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
46
|
-
rspec-support (~> 3.3.0)
|
47
|
-
rspec-support (3.3.0)
|
48
|
-
slop (4.2.0)
|
49
|
-
unf (0.1.4)
|
50
|
-
unf_ext
|
51
|
-
unf_ext (0.0.7.1)
|
52
|
-
webrobots (0.1.1)
|
53
|
-
|
54
|
-
PLATFORMS
|
55
|
-
ruby
|
56
|
-
|
57
|
-
DEPENDENCIES
|
58
|
-
bundler (~> 1.6)
|
59
|
-
cricos_scrape!
|
60
|
-
rspec (~> 3.3.0, >= 3.3.0)
|
61
|
-
rspec-its (~> 1.2.0, >= 1.2.0)
|
62
|
-
|
63
|
-
BUNDLED WITH
|
64
|
-
1.10.6
|
data/Procfile
DELETED
data/Rakefile
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
require 'rspec/core/rake_task'
|
2
|
-
RSpec::Core::RakeTask.new
|
3
|
-
|
4
|
-
require_relative 'lib/cricos_scrape'
|
5
|
-
namespace :import do
|
6
|
-
|
7
|
-
|
8
|
-
task :contacts do
|
9
|
-
output_file = ENV['OUTPUT_FILE'] || 'contacts.json'
|
10
|
-
CricosScrape::BulkImportContacts::new(output_file, ENV['OVERWRITE']).perform
|
11
|
-
end
|
12
|
-
|
13
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe CricosScrape::ContactImporter do
|
4
|
-
|
5
|
-
describe '#run' do
|
6
|
-
let(:agent) { CricosScrape.agent }
|
7
|
-
let(:importer) { CricosScrape::ContactImporter.new(agent) }
|
8
|
-
before do
|
9
|
-
stub_const('CricosScrape::ContactImporter::STATES_CODE', ['ACT', 'WA'])
|
10
|
-
|
11
|
-
allow(importer).to receive(:url_for).with('ACT').and_return(contact_details_of_state_act_uri)
|
12
|
-
allow(importer).to receive(:url_for).with('WA').and_return(contact_details_of_state_wa_uri)
|
13
|
-
|
14
|
-
@contacts = importer.run
|
15
|
-
end
|
16
|
-
|
17
|
-
context 'when the response body contains with states ACT and WA' do
|
18
|
-
it 'returns array contacts array' do
|
19
|
-
data = [
|
20
|
-
#contacts of ACT
|
21
|
-
CricosScrape::Contact.new('School Courses (and ELICOS and Foundation Programs where delivered by a school)',
|
22
|
-
'Ms Rebecca Hughes',
|
23
|
-
'ACT Education and Training Directorate',
|
24
|
-
CricosScrape::Address.new('GPO Box 158', nil, 'CANBERRA', 'ACT', '2601'),
|
25
|
-
'0262059299',
|
26
|
-
'',
|
27
|
-
'etd.contactus@act.gov.au'
|
28
|
-
),
|
29
|
-
CricosScrape::Contact.new('Vocational Courses (and ELICOS courses offered by an RTO or remaining ‘stand-alone’ ELICOS provider)',
|
30
|
-
'ASQA Info Line',
|
31
|
-
'Australian Skills Quality Authority',
|
32
|
-
CricosScrape::Address.new('PO Box 9928', nil, 'Melbourne', 'VIC', '3001'),
|
33
|
-
'1300701801',
|
34
|
-
'',
|
35
|
-
'enquiries@asqa.gov.au'
|
36
|
-
),
|
37
|
-
CricosScrape::Contact.new('Higher Education Courses (and ELICOS and Foundation Programs where delivered in a pathway arrangement with a Higher Education Provider)',
|
38
|
-
'Tertiary Education Quality and Standards Agency',
|
39
|
-
'Tertiary Education Quality and Standards Agency',
|
40
|
-
CricosScrape::Address.new('GPO Box 1672', nil, 'Melbourne', 'VIC', '3001'),
|
41
|
-
'1300739585',
|
42
|
-
'1300739586',
|
43
|
-
'enquiries@teqsa.gov.au'
|
44
|
-
),
|
45
|
-
#contacts of WA
|
46
|
-
CricosScrape::Contact.new('Vocational Courses (and ELICOS courses offered by an RTO or remaining ‘stand-alone’ ELICOS provider)',
|
47
|
-
'ASQA Info Line',
|
48
|
-
'Australian Skills Quality Authority',
|
49
|
-
CricosScrape::Address.new('PO Box 9928', nil, 'Melbourne', 'VIC', '3001'),
|
50
|
-
'1300701801',
|
51
|
-
'',
|
52
|
-
'enquiries@asqa.gov.au'
|
53
|
-
),
|
54
|
-
CricosScrape::Contact.new('School Courses (and ELICOS and Foundation Programs where delivered by a school)',
|
55
|
-
'Mr Steve Page Senior Registration and Policy Officer',
|
56
|
-
'Department of Education Services, Non-Government & International Education Directorate',
|
57
|
-
CricosScrape::Address.new('PO Box 1766', nil, 'OSBORNE PARK', 'WA', '6916'),
|
58
|
-
'0894411962',
|
59
|
-
'0894411901',
|
60
|
-
'ngs@des.wa.gov.au'
|
61
|
-
),
|
62
|
-
CricosScrape::Contact.new('Higher Education Courses (and ELICOS and Foundation Programs where delivered in a pathway arrangement with a Higher Education Provider)',
|
63
|
-
'Tertiary Education Quality and Standards Agency',
|
64
|
-
'Tertiary Education Quality and Standards Agency',
|
65
|
-
CricosScrape::Address.new('GPO Box 1672', nil, 'Melbourne', 'VIC', '3001'),
|
66
|
-
'1300739585',
|
67
|
-
'1300739586',
|
68
|
-
'enquiries@teqsa.gov.au'
|
69
|
-
),
|
70
|
-
]
|
71
|
-
|
72
|
-
expect(@contacts).to eq data
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,71 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe CricosScrape::CourseImporter do
|
4
|
-
|
5
|
-
describe '#run' do
|
6
|
-
let(:agent) { CricosScrape.agent }
|
7
|
-
subject(:course) { CricosScrape::CourseImporter.new(agent, course_id: 1).run }
|
8
|
-
|
9
|
-
before do
|
10
|
-
allow_any_instance_of(CricosScrape::CourseImporter).to receive(:url).and_return(uri)
|
11
|
-
course_list_page_1 = agent.get("#{uri}?LocationID=123")
|
12
|
-
course_list_page_2 = agent.get("#{uri}?LocationID=456")
|
13
|
-
allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'get-location-id'}).and_return(course_list_page_1, course_list_page_2)
|
14
|
-
end
|
15
|
-
|
16
|
-
context 'when there is no course found' do
|
17
|
-
let(:uri) { not_found_course_details_uri }
|
18
|
-
|
19
|
-
it 'does not import' do
|
20
|
-
expect(course).to be_nil
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
context 'when the details course is found' do
|
25
|
-
let(:uri) { course_details_without_pagination_uri }
|
26
|
-
|
27
|
-
its(:course_id) { is_expected.to eq 1 }
|
28
|
-
its(:course_name) { is_expected.to eq 'Primary Yrs K-6' }
|
29
|
-
its(:course_code) { is_expected.to eq '012395K' }
|
30
|
-
its(:dual_qualification) { is_expected.to eq 'No' }
|
31
|
-
its(:field_of_education) { is_expected.to eq '' }
|
32
|
-
its(:broad_field) { is_expected.to eq '12 - Mixed Field Programmes' }
|
33
|
-
its(:narrow_field) { is_expected.to eq '1201 - General Education Programmes' }
|
34
|
-
its(:detailed_field) { is_expected.to eq '120101 - General Primary and Secondary Education Programmes' }
|
35
|
-
its(:course_level) { is_expected.to eq 'Primary School Studies' }
|
36
|
-
its(:foundation_studies) { is_expected.to eq 'No' }
|
37
|
-
its(:work_component) { is_expected.to eq 'No' }
|
38
|
-
its(:course_language) { is_expected.to eq 'English' }
|
39
|
-
its(:duration) { is_expected.to eq '364' }
|
40
|
-
its(:total_cost) { is_expected.to eq '66,500' }
|
41
|
-
its(:contact_officers) do
|
42
|
-
contact_officers = [
|
43
|
-
CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Nicole King', 'Manager', '0262056998', '62059239', nil),
|
44
|
-
CricosScrape::ContactOfficer.new('International Student Contact', 'PAUL Wang', 'Study Tour Coordinator', '62077293', '', 'paul.wang@act.gov.au'),
|
45
|
-
]
|
46
|
-
|
47
|
-
is_expected.to eq contact_officers
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
context 'when the response body not contains pagination location' do
|
52
|
-
let(:uri) { course_details_without_pagination_uri }
|
53
|
-
|
54
|
-
its(:location_ids) do
|
55
|
-
location_ids = ["123", "456"]
|
56
|
-
is_expected.to eq location_ids
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
context 'when the contact officers contains table grid' do
|
61
|
-
let(:uri) { course_details_with_contact_officers_table_grid }
|
62
|
-
let(:data) { [CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Andrew Vann', 'Vice-Chancellor', '02 6338 4209', '02 6338 4809', nil),
|
63
|
-
CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 63657537', '02 63657590', 'mevans@csu.edu.au'),
|
64
|
-
CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 6365 7537', '02 6365 7590', 'mevans@csu.edu.au')] }
|
65
|
-
|
66
|
-
its(:contact_officers) do
|
67
|
-
is_expected.to eq data
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|