cricos_scrape 2.0 → 2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/bin/cricos_scrape +40 -0
- data/cricos_scrape.gemspec +7 -9
- data/lib/cricos_scrape/bulk_import_courses.rb +2 -2
- data/lib/cricos_scrape/bulk_import_institutions.rb +2 -2
- data/lib/cricos_scrape/entities/address.rb +4 -0
- data/lib/cricos_scrape/entities/contact.rb +4 -0
- data/lib/cricos_scrape/entities/contact_officer.rb +4 -0
- data/lib/cricos_scrape/entities/course.rb +4 -0
- data/lib/cricos_scrape/entities/institution.rb +4 -0
- data/lib/cricos_scrape/entities/location.rb +4 -0
- data/lib/cricos_scrape/import_contacts.rb +2 -2
- data/lib/cricos_scrape/importer/contact_importer.rb +120 -0
- data/lib/cricos_scrape/importer/course_importer.rb +291 -0
- data/lib/cricos_scrape/importer/institution_importer.rb +279 -0
- data/lib/cricos_scrape/version.rb +1 -1
- data/lib/cricos_scrape.rb +4 -5
- metadata +16 -44
- data/CONTRIBUTING.md +0 -51
- data/Gemfile +0 -2
- data/Gemfile.lock +0 -64
- data/Procfile +0 -3
- data/Rakefile +0 -13
- data/spec/contact_importer_spec.rb +0 -76
- data/spec/course_importer_spec.rb +0 -71
- data/spec/fixtures/contact_details_of_state_act_uri.html +0 -546
- data/spec/fixtures/contact_details_of_state_wa_uri.html +0 -546
- data/spec/fixtures/course_details_with_contact_officers_table_grid.html +0 -467
- data/spec/fixtures/course_details_without_pagination_uri.html +0 -470
- data/spec/fixtures/courses_list_by_location_id_uri.html +0 -174
- data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +0 -406
- data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +0 -358
- data/spec/fixtures/institution_details_with_po_box_postal_address.html +0 -240
- data/spec/fixtures/institution_details_with_trading_name.html +0 -322
- data/spec/fixtures/institution_details_without_locations_details_uri.html +0 -151
- data/spec/fixtures/institution_details_without_pagination_location_uri.html +0 -299
- data/spec/fixtures/not_found_course_details_uri.html +0 -837
- data/spec/fixtures/not_found_institution_details.html +0 -36
- data/spec/institution_importer_spec.rb +0 -138
- data/spec/spec_helper.rb +0 -67
@@ -0,0 +1,279 @@
|
|
1
|
+
require 'cricos_scrape/entities/institution'
|
2
|
+
require 'cricos_scrape/entities/location'
|
3
|
+
require 'cricos_scrape/entities/contact_officer'
|
4
|
+
|
5
|
+
module CricosScrape
|
6
|
+
class InstitutionImporter
|
7
|
+
|
8
|
+
INSTITUTION_URL = 'http://cricos.education.gov.au/Institution/InstitutionDetailsOnePage.aspx'
|
9
|
+
|
10
|
+
def initialize(agent, **params)
|
11
|
+
@agent = agent
|
12
|
+
@provider_id = params.fetch(:provider_id)
|
13
|
+
@page = agent.get(url)
|
14
|
+
end
|
15
|
+
|
16
|
+
def run
|
17
|
+
return if institution_not_found?
|
18
|
+
|
19
|
+
institution = Institution.new
|
20
|
+
institution.provider_id = provider_id
|
21
|
+
institution.provider_code = find_provider_code
|
22
|
+
institution.trading_name = find_trading_name
|
23
|
+
institution.name = find_name
|
24
|
+
institution.type = find_type
|
25
|
+
institution.total_capacity = find_total_capacity
|
26
|
+
institution.website = find_website
|
27
|
+
institution.postal_address = find_postal_address
|
28
|
+
institution.locations = find_location if location_found?
|
29
|
+
institution.contact_officers = find_contact_officers
|
30
|
+
|
31
|
+
institution
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
attr_reader :agent, :provider_id, :page
|
37
|
+
|
38
|
+
def url
|
39
|
+
"#{INSTITUTION_URL}?ProviderID=#{provider_id}"
|
40
|
+
end
|
41
|
+
|
42
|
+
def find_value_of_field(field)
|
43
|
+
field.nil? ? nil : field.text.strip
|
44
|
+
end
|
45
|
+
|
46
|
+
def find_provider_code
|
47
|
+
field = @page.at('#institutionDetails_lblProviderCode')
|
48
|
+
find_value_of_field(field)
|
49
|
+
end
|
50
|
+
|
51
|
+
def find_trading_name
|
52
|
+
field = @page.at('#institutionDetails_lblInstitutionTradingName')
|
53
|
+
find_value_of_field(field)
|
54
|
+
end
|
55
|
+
|
56
|
+
def find_name
|
57
|
+
field = @page.at('#institutionDetails_lblInstitutionName')
|
58
|
+
find_value_of_field(field)
|
59
|
+
end
|
60
|
+
|
61
|
+
def find_type
|
62
|
+
field = @page.at('#institutionDetails_lblInstitutionType')
|
63
|
+
find_value_of_field(field)
|
64
|
+
end
|
65
|
+
|
66
|
+
def find_total_capacity
|
67
|
+
field = @page.at('#institutionDetails_lblLocationCapacity')
|
68
|
+
|
69
|
+
capacity = find_value_of_field(field)
|
70
|
+
capacity = is_number?(capacity) ? capacity.to_i : nil
|
71
|
+
capacity
|
72
|
+
end
|
73
|
+
|
74
|
+
def is_number?(text)
|
75
|
+
text =~ /\d/
|
76
|
+
end
|
77
|
+
|
78
|
+
def find_website
|
79
|
+
field = @page.at('#institutionDetails_hplInstitutionWebAddress')
|
80
|
+
find_value_of_field(field)
|
81
|
+
end
|
82
|
+
|
83
|
+
def find_postal_address
|
84
|
+
post_address_node = @page.at('#institutionDetails_lblInstitutionPostalAddress')
|
85
|
+
|
86
|
+
if post_address_node
|
87
|
+
address_lines = post_address_node.children.select { |node| node.is_a?(Nokogiri::XML::Text) }.map { |node| find_value_of_field(node) }
|
88
|
+
address_lines.join("\n")
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# there is no record not found page
|
93
|
+
# instead a search page is returned
|
94
|
+
def institution_not_found?
|
95
|
+
@page.body.include?('The Provider ID entered is invalid - please try another.')
|
96
|
+
end
|
97
|
+
|
98
|
+
def location_found?
|
99
|
+
!@page.body.include?('No locations were found for the selected institution.')
|
100
|
+
end
|
101
|
+
|
102
|
+
def find_location
|
103
|
+
locations = []
|
104
|
+
|
105
|
+
if location_results_paginated?
|
106
|
+
for page_number in 1..total_pages
|
107
|
+
jump_to_page(page_number)
|
108
|
+
locations += fetch_locations_from_current_page
|
109
|
+
end
|
110
|
+
else
|
111
|
+
locations += fetch_locations_from_current_page
|
112
|
+
end
|
113
|
+
|
114
|
+
locations
|
115
|
+
end
|
116
|
+
|
117
|
+
def pagination
|
118
|
+
@page.at('#locationList_gridSearchResults .gridPager')
|
119
|
+
end
|
120
|
+
|
121
|
+
def location_results_paginated?
|
122
|
+
!!pagination
|
123
|
+
end
|
124
|
+
|
125
|
+
def total_pages
|
126
|
+
pagination.children[1].text.strip[/^Page [0-9]+ of ([0-9]+).*/, 1].to_i
|
127
|
+
end
|
128
|
+
|
129
|
+
def current_pagination_page
|
130
|
+
pagination.children[1].text.strip[/^Page ([0-9]+) of [0-9]+.*/, 1].to_i
|
131
|
+
end
|
132
|
+
|
133
|
+
def jump_to_page(page_number)
|
134
|
+
return @page if page_number == current_pagination_page
|
135
|
+
|
136
|
+
hidden_form = @page.form_with id: 'Form1'
|
137
|
+
hidden_form['__EVENTTARGET'] = 'locationList$gridSearchResults'
|
138
|
+
hidden_form['__EVENTARGUMENT'] = "Page$#{page_number}"
|
139
|
+
begin
|
140
|
+
@page = hidden_form.submit(nil, {'action' => 'change-location-page'})
|
141
|
+
rescue Mechanize::ResponseCodeError
|
142
|
+
sleep 5
|
143
|
+
jump_to_page(page_number)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def get_location_id(row_index)
|
148
|
+
hidden_form = @page.form_with id: 'Form1'
|
149
|
+
hidden_form['__EVENTTARGET'] = 'locationList$gridSearchResults'
|
150
|
+
hidden_form['__EVENTARGUMENT'] = "click-#{row_index-3}"
|
151
|
+
|
152
|
+
begin
|
153
|
+
course_page = hidden_form.submit(nil, {'action' => 'get-location-id'})
|
154
|
+
rescue Mechanize::ResponseCodeError
|
155
|
+
sleep 5
|
156
|
+
get_location_id(row_index)
|
157
|
+
end
|
158
|
+
|
159
|
+
course_page.uri.to_s[/LocationID=([0-9]+)/, 1]
|
160
|
+
end
|
161
|
+
|
162
|
+
def fetch_locations_from_current_page
|
163
|
+
locations_of_page = []
|
164
|
+
|
165
|
+
# location_list is table contains locations in current page
|
166
|
+
if search_results_node = @page.at('#locationList_gridSearchResults')
|
167
|
+
location_list = search_results_node.children
|
168
|
+
|
169
|
+
excess_row_at_the_end_table = location_results_paginated? ? 3 : 2
|
170
|
+
start_location_row = 3
|
171
|
+
end_location_row = location_list.count - excess_row_at_the_end_table
|
172
|
+
|
173
|
+
for i in start_location_row..end_location_row
|
174
|
+
location_row = location_list[i].children
|
175
|
+
|
176
|
+
location_obj = Location.new
|
177
|
+
location_obj.location_id = get_location_id(i)
|
178
|
+
location_obj.name = find_value_of_field(location_row[1])
|
179
|
+
location_obj.state = find_value_of_field(location_row[2])
|
180
|
+
location_obj.number_of_courses = find_value_of_field(location_row[3])
|
181
|
+
|
182
|
+
locations_of_page << location_obj
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
locations_of_page
|
187
|
+
end
|
188
|
+
|
189
|
+
def find_contact_officers
|
190
|
+
contact_officers = []
|
191
|
+
|
192
|
+
contact_officers_list = @page.search('//div[starts-with(@id, "contactDetails_pnl")]')
|
193
|
+
|
194
|
+
contact_officers_list.each do |contact_officer|
|
195
|
+
@contact_officer_area = contact_officer
|
196
|
+
@contact_officer_table = @contact_officer_area.at('table').children
|
197
|
+
|
198
|
+
if contains_contact_details_grid?
|
199
|
+
contact_officers += find_contact_officer_grid
|
200
|
+
else
|
201
|
+
contact_officers << find_contact_officer
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
contact_officers
|
206
|
+
end
|
207
|
+
|
208
|
+
def find_contact_officer_grid
|
209
|
+
contact_officers = []
|
210
|
+
|
211
|
+
excess_row_at_the_end_table = 2
|
212
|
+
data_row_start = 3
|
213
|
+
data_row_end = @contact_officer_table.count - excess_row_at_the_end_table
|
214
|
+
|
215
|
+
for i in data_row_start..data_row_end
|
216
|
+
contact_row = @contact_officer_table[i].children
|
217
|
+
|
218
|
+
contact = ContactOfficer.new
|
219
|
+
contact.role = find_contact_officer_role
|
220
|
+
contact.name = find_value_of_field(contact_row[1])
|
221
|
+
contact.phone = find_value_of_field(contact_row[2])
|
222
|
+
contact.fax = find_value_of_field(contact_row[3])
|
223
|
+
contact.email = find_value_of_field(contact_row[4])
|
224
|
+
|
225
|
+
contact_officers << contact
|
226
|
+
end
|
227
|
+
|
228
|
+
contact_officers
|
229
|
+
end
|
230
|
+
|
231
|
+
def find_contact_officer
|
232
|
+
contact = ContactOfficer.new
|
233
|
+
contact.role = find_contact_officer_role
|
234
|
+
contact.name = find_contact_officer_name
|
235
|
+
contact.title = find_contact_officer_title
|
236
|
+
contact.phone = find_contact_officer_phone
|
237
|
+
contact.fax = find_contact_officer_fax
|
238
|
+
contact.email = find_contact_officer_email
|
239
|
+
|
240
|
+
contact
|
241
|
+
end
|
242
|
+
|
243
|
+
def find_contact_officer_role
|
244
|
+
row = @contact_officer_area.children
|
245
|
+
find_value_of_field(row[1]).sub(':', '')
|
246
|
+
end
|
247
|
+
|
248
|
+
def find_contact_officer_name
|
249
|
+
row = @contact_officer_table[1].children
|
250
|
+
find_value_of_field(row[3])
|
251
|
+
end
|
252
|
+
|
253
|
+
def find_contact_officer_title
|
254
|
+
row = @contact_officer_table[3].children
|
255
|
+
find_value_of_field(row[3])
|
256
|
+
end
|
257
|
+
|
258
|
+
def find_contact_officer_phone
|
259
|
+
row = @contact_officer_table[5].children
|
260
|
+
find_value_of_field(row[3])
|
261
|
+
end
|
262
|
+
|
263
|
+
def find_contact_officer_fax
|
264
|
+
row = @contact_officer_table[7].children
|
265
|
+
find_value_of_field(row[3])
|
266
|
+
end
|
267
|
+
|
268
|
+
def find_contact_officer_email
|
269
|
+
row = @contact_officer_table[9]
|
270
|
+
find_value_of_field(row.children[3]) unless row.nil?
|
271
|
+
end
|
272
|
+
|
273
|
+
def contains_contact_details_grid?
|
274
|
+
contact_officer_area_css_id = @contact_officer_area.attributes['id'].text
|
275
|
+
@page.search("//*[@id='#{contact_officer_area_css_id}']/div/table[starts-with(@id, 'contactDetails_grid')]").any?
|
276
|
+
end
|
277
|
+
|
278
|
+
end
|
279
|
+
end
|
data/lib/cricos_scrape.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'json'
|
3
3
|
require 'json/add/core'
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
require_relative 'cricos_scrape/import_contacts'
|
4
|
+
require 'cricos_scrape/json_struct'
|
5
|
+
require 'cricos_scrape/bulk_import_institutions'
|
6
|
+
require 'cricos_scrape/bulk_import_courses'
|
7
|
+
require 'cricos_scrape/import_contacts'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cricos_scrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '2.
|
4
|
+
version: '2.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Trung Lê
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-10-
|
12
|
+
date: 2015-10-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -109,42 +109,31 @@ description: Scrape Institutions, Courses, Contacts from CRICOS
|
|
109
109
|
email:
|
110
110
|
- trung.le@ruby-journal.com
|
111
111
|
- ktoanlba@gmail.com
|
112
|
-
executables:
|
112
|
+
executables:
|
113
|
+
- cricos_scrape
|
113
114
|
extensions: []
|
114
115
|
extra_rdoc_files: []
|
115
116
|
files:
|
116
|
-
- CONTRIBUTING.md
|
117
|
-
- Gemfile
|
118
|
-
- Gemfile.lock
|
119
117
|
- LICENSE.md
|
120
|
-
- Procfile
|
121
118
|
- README.md
|
122
|
-
-
|
119
|
+
- bin/cricos_scrape
|
123
120
|
- cricos_scrape.gemspec
|
124
121
|
- lib/cricos_scrape.rb
|
125
122
|
- lib/cricos_scrape/agent.rb
|
126
123
|
- lib/cricos_scrape/bulk_import_courses.rb
|
127
124
|
- lib/cricos_scrape/bulk_import_institutions.rb
|
125
|
+
- lib/cricos_scrape/entities/address.rb
|
126
|
+
- lib/cricos_scrape/entities/contact.rb
|
127
|
+
- lib/cricos_scrape/entities/contact_officer.rb
|
128
|
+
- lib/cricos_scrape/entities/course.rb
|
129
|
+
- lib/cricos_scrape/entities/institution.rb
|
130
|
+
- lib/cricos_scrape/entities/location.rb
|
128
131
|
- lib/cricos_scrape/import_contacts.rb
|
132
|
+
- lib/cricos_scrape/importer/contact_importer.rb
|
133
|
+
- lib/cricos_scrape/importer/course_importer.rb
|
134
|
+
- lib/cricos_scrape/importer/institution_importer.rb
|
129
135
|
- lib/cricos_scrape/json_struct.rb
|
130
136
|
- lib/cricos_scrape/version.rb
|
131
|
-
- spec/contact_importer_spec.rb
|
132
|
-
- spec/course_importer_spec.rb
|
133
|
-
- spec/fixtures/contact_details_of_state_act_uri.html
|
134
|
-
- spec/fixtures/contact_details_of_state_wa_uri.html
|
135
|
-
- spec/fixtures/course_details_with_contact_officers_table_grid.html
|
136
|
-
- spec/fixtures/course_details_without_pagination_uri.html
|
137
|
-
- spec/fixtures/courses_list_by_location_id_uri.html
|
138
|
-
- spec/fixtures/institution_details_with_pagination_location_page_1_uri.html
|
139
|
-
- spec/fixtures/institution_details_with_pagination_location_page_2_uri.html
|
140
|
-
- spec/fixtures/institution_details_with_po_box_postal_address.html
|
141
|
-
- spec/fixtures/institution_details_with_trading_name.html
|
142
|
-
- spec/fixtures/institution_details_without_locations_details_uri.html
|
143
|
-
- spec/fixtures/institution_details_without_pagination_location_uri.html
|
144
|
-
- spec/fixtures/not_found_course_details_uri.html
|
145
|
-
- spec/fixtures/not_found_institution_details.html
|
146
|
-
- spec/institution_importer_spec.rb
|
147
|
-
- spec/spec_helper.rb
|
148
137
|
homepage: https://github.com/ruby-journal/cricos_scrape.rb
|
149
138
|
licenses:
|
150
139
|
- MIT
|
@@ -157,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
157
146
|
requirements:
|
158
147
|
- - ">="
|
159
148
|
- !ruby/object:Gem::Version
|
160
|
-
version: 2.
|
149
|
+
version: 2.0.0
|
161
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
162
151
|
requirements:
|
163
152
|
- - ">="
|
@@ -169,22 +158,5 @@ rubygems_version: 2.4.5.1
|
|
169
158
|
signing_key:
|
170
159
|
specification_version: 4
|
171
160
|
summary: CRICOS Scrape
|
172
|
-
test_files:
|
173
|
-
- spec/contact_importer_spec.rb
|
174
|
-
- spec/course_importer_spec.rb
|
175
|
-
- spec/fixtures/contact_details_of_state_act_uri.html
|
176
|
-
- spec/fixtures/contact_details_of_state_wa_uri.html
|
177
|
-
- spec/fixtures/course_details_with_contact_officers_table_grid.html
|
178
|
-
- spec/fixtures/course_details_without_pagination_uri.html
|
179
|
-
- spec/fixtures/courses_list_by_location_id_uri.html
|
180
|
-
- spec/fixtures/institution_details_with_pagination_location_page_1_uri.html
|
181
|
-
- spec/fixtures/institution_details_with_pagination_location_page_2_uri.html
|
182
|
-
- spec/fixtures/institution_details_with_po_box_postal_address.html
|
183
|
-
- spec/fixtures/institution_details_with_trading_name.html
|
184
|
-
- spec/fixtures/institution_details_without_locations_details_uri.html
|
185
|
-
- spec/fixtures/institution_details_without_pagination_location_uri.html
|
186
|
-
- spec/fixtures/not_found_course_details_uri.html
|
187
|
-
- spec/fixtures/not_found_institution_details.html
|
188
|
-
- spec/institution_importer_spec.rb
|
189
|
-
- spec/spec_helper.rb
|
161
|
+
test_files: []
|
190
162
|
has_rdoc:
|
data/CONTRIBUTING.md
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
Lotus is an open source project and we would love you to help us make it better.
|
2
|
-
|
3
|
-
## Reporting Issues
|
4
|
-
|
5
|
-
A well formatted issue is appreciated, and goes a long way in helping us help you.
|
6
|
-
|
7
|
-
* Make sure you have a [GitHub account](https://github.com/signup/free)
|
8
|
-
* Submit a [Github issue](./issues) by:
|
9
|
-
* Clearly describing the issue
|
10
|
-
* Provide a descriptive summary
|
11
|
-
* Explain the expected behavior
|
12
|
-
* Explain the actual behavior
|
13
|
-
* Provide steps to reproduce the actual behavior
|
14
|
-
* Provide your application's complete `Gemfile.lock` as text (in a [Gist](https://gist.github.com) for bonus points)
|
15
|
-
* Any relevant stack traces
|
16
|
-
|
17
|
-
If you provide code, make sure it is formatted with the triple backticks (\`).
|
18
|
-
|
19
|
-
At this point, we'd love to tell you how long it will take for us to respond,
|
20
|
-
but we just don't know.
|
21
|
-
|
22
|
-
## Pull requests
|
23
|
-
|
24
|
-
We accept pull requests to Lotus for:
|
25
|
-
|
26
|
-
* Adding documentation
|
27
|
-
* Fixing bugs
|
28
|
-
* Adding new features
|
29
|
-
|
30
|
-
Not all features proposed will be added but we are open to having a conversation
|
31
|
-
about a feature you are championing.
|
32
|
-
|
33
|
-
Here's a quick guide:
|
34
|
-
|
35
|
-
1. Fork the repo.
|
36
|
-
|
37
|
-
2. Run the tests. This is to make sure your starting point works. Tests can be
|
38
|
-
run via `rake`
|
39
|
-
|
40
|
-
3. Create a new branch and make your changes. This includes tests for features!
|
41
|
-
|
42
|
-
4. Push to your fork and submit a pull request. For more information, see
|
43
|
-
[Github's pull request help section](https://help.github.com/articles/using-pull-requests/).
|
44
|
-
|
45
|
-
At this point you're waiting on us. Expect a conversation regarding your pull
|
46
|
-
request; Questions, clarifications, and so on.
|
47
|
-
|
48
|
-
Some things that will increase the chance that your pull request is accepted:
|
49
|
-
|
50
|
-
* Include tests that fail without your code, and pass with it
|
51
|
-
* Update the documentation
|
data/Gemfile
DELETED
data/Gemfile.lock
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
PATH
|
2
|
-
remote: .
|
3
|
-
specs:
|
4
|
-
cricos_scrape (2.0)
|
5
|
-
mechanize (~> 2.7, >= 2.7.2)
|
6
|
-
slop (~> 4.2.0, >= 4.2.0)
|
7
|
-
|
8
|
-
GEM
|
9
|
-
remote: https://rubygems.org/
|
10
|
-
specs:
|
11
|
-
diff-lcs (1.2.5)
|
12
|
-
domain_name (0.5.25)
|
13
|
-
unf (>= 0.0.5, < 1.0.0)
|
14
|
-
http-cookie (1.0.2)
|
15
|
-
domain_name (~> 0.5)
|
16
|
-
mechanize (2.7.3)
|
17
|
-
domain_name (~> 0.5, >= 0.5.1)
|
18
|
-
http-cookie (~> 1.0)
|
19
|
-
mime-types (~> 2.0)
|
20
|
-
net-http-digest_auth (~> 1.1, >= 1.1.1)
|
21
|
-
net-http-persistent (~> 2.5, >= 2.5.2)
|
22
|
-
nokogiri (~> 1.4)
|
23
|
-
ntlm-http (~> 0.1, >= 0.1.1)
|
24
|
-
webrobots (>= 0.0.9, < 0.2)
|
25
|
-
mime-types (2.6.2)
|
26
|
-
mini_portile (0.6.2)
|
27
|
-
net-http-digest_auth (1.4)
|
28
|
-
net-http-persistent (2.9.4)
|
29
|
-
nokogiri (1.6.6.2)
|
30
|
-
mini_portile (~> 0.6.0)
|
31
|
-
ntlm-http (0.1.1)
|
32
|
-
rspec (3.3.0)
|
33
|
-
rspec-core (~> 3.3.0)
|
34
|
-
rspec-expectations (~> 3.3.0)
|
35
|
-
rspec-mocks (~> 3.3.0)
|
36
|
-
rspec-core (3.3.2)
|
37
|
-
rspec-support (~> 3.3.0)
|
38
|
-
rspec-expectations (3.3.1)
|
39
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
40
|
-
rspec-support (~> 3.3.0)
|
41
|
-
rspec-its (1.2.0)
|
42
|
-
rspec-core (>= 3.0.0)
|
43
|
-
rspec-expectations (>= 3.0.0)
|
44
|
-
rspec-mocks (3.3.2)
|
45
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
46
|
-
rspec-support (~> 3.3.0)
|
47
|
-
rspec-support (3.3.0)
|
48
|
-
slop (4.2.0)
|
49
|
-
unf (0.1.4)
|
50
|
-
unf_ext
|
51
|
-
unf_ext (0.0.7.1)
|
52
|
-
webrobots (0.1.1)
|
53
|
-
|
54
|
-
PLATFORMS
|
55
|
-
ruby
|
56
|
-
|
57
|
-
DEPENDENCIES
|
58
|
-
bundler (~> 1.6)
|
59
|
-
cricos_scrape!
|
60
|
-
rspec (~> 3.3.0, >= 3.3.0)
|
61
|
-
rspec-its (~> 1.2.0, >= 1.2.0)
|
62
|
-
|
63
|
-
BUNDLED WITH
|
64
|
-
1.10.6
|
data/Procfile
DELETED
data/Rakefile
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
require 'rspec/core/rake_task'
|
2
|
-
RSpec::Core::RakeTask.new
|
3
|
-
|
4
|
-
require_relative 'lib/cricos_scrape'
|
5
|
-
namespace :import do
|
6
|
-
|
7
|
-
|
8
|
-
task :contacts do
|
9
|
-
output_file = ENV['OUTPUT_FILE'] || 'contacts.json'
|
10
|
-
CricosScrape::BulkImportContacts::new(output_file, ENV['OVERWRITE']).perform
|
11
|
-
end
|
12
|
-
|
13
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe CricosScrape::ContactImporter do
|
4
|
-
|
5
|
-
describe '#run' do
|
6
|
-
let(:agent) { CricosScrape.agent }
|
7
|
-
let(:importer) { CricosScrape::ContactImporter.new(agent) }
|
8
|
-
before do
|
9
|
-
stub_const('CricosScrape::ContactImporter::STATES_CODE', ['ACT', 'WA'])
|
10
|
-
|
11
|
-
allow(importer).to receive(:url_for).with('ACT').and_return(contact_details_of_state_act_uri)
|
12
|
-
allow(importer).to receive(:url_for).with('WA').and_return(contact_details_of_state_wa_uri)
|
13
|
-
|
14
|
-
@contacts = importer.run
|
15
|
-
end
|
16
|
-
|
17
|
-
context 'when the response body contains with states ACT and WA' do
|
18
|
-
it 'returns array contacts array' do
|
19
|
-
data = [
|
20
|
-
#contacts of ACT
|
21
|
-
CricosScrape::Contact.new('School Courses (and ELICOS and Foundation Programs where delivered by a school)',
|
22
|
-
'Ms Rebecca Hughes',
|
23
|
-
'ACT Education and Training Directorate',
|
24
|
-
CricosScrape::Address.new('GPO Box 158', nil, 'CANBERRA', 'ACT', '2601'),
|
25
|
-
'0262059299',
|
26
|
-
'',
|
27
|
-
'etd.contactus@act.gov.au'
|
28
|
-
),
|
29
|
-
CricosScrape::Contact.new('Vocational Courses (and ELICOS courses offered by an RTO or remaining ‘stand-alone’ ELICOS provider)',
|
30
|
-
'ASQA Info Line',
|
31
|
-
'Australian Skills Quality Authority',
|
32
|
-
CricosScrape::Address.new('PO Box 9928', nil, 'Melbourne', 'VIC', '3001'),
|
33
|
-
'1300701801',
|
34
|
-
'',
|
35
|
-
'enquiries@asqa.gov.au'
|
36
|
-
),
|
37
|
-
CricosScrape::Contact.new('Higher Education Courses (and ELICOS and Foundation Programs where delivered in a pathway arrangement with a Higher Education Provider)',
|
38
|
-
'Tertiary Education Quality and Standards Agency',
|
39
|
-
'Tertiary Education Quality and Standards Agency',
|
40
|
-
CricosScrape::Address.new('GPO Box 1672', nil, 'Melbourne', 'VIC', '3001'),
|
41
|
-
'1300739585',
|
42
|
-
'1300739586',
|
43
|
-
'enquiries@teqsa.gov.au'
|
44
|
-
),
|
45
|
-
#contacts of WA
|
46
|
-
CricosScrape::Contact.new('Vocational Courses (and ELICOS courses offered by an RTO or remaining ‘stand-alone’ ELICOS provider)',
|
47
|
-
'ASQA Info Line',
|
48
|
-
'Australian Skills Quality Authority',
|
49
|
-
CricosScrape::Address.new('PO Box 9928', nil, 'Melbourne', 'VIC', '3001'),
|
50
|
-
'1300701801',
|
51
|
-
'',
|
52
|
-
'enquiries@asqa.gov.au'
|
53
|
-
),
|
54
|
-
CricosScrape::Contact.new('School Courses (and ELICOS and Foundation Programs where delivered by a school)',
|
55
|
-
'Mr Steve Page Senior Registration and Policy Officer',
|
56
|
-
'Department of Education Services, Non-Government & International Education Directorate',
|
57
|
-
CricosScrape::Address.new('PO Box 1766', nil, 'OSBORNE PARK', 'WA', '6916'),
|
58
|
-
'0894411962',
|
59
|
-
'0894411901',
|
60
|
-
'ngs@des.wa.gov.au'
|
61
|
-
),
|
62
|
-
CricosScrape::Contact.new('Higher Education Courses (and ELICOS and Foundation Programs where delivered in a pathway arrangement with a Higher Education Provider)',
|
63
|
-
'Tertiary Education Quality and Standards Agency',
|
64
|
-
'Tertiary Education Quality and Standards Agency',
|
65
|
-
CricosScrape::Address.new('GPO Box 1672', nil, 'Melbourne', 'VIC', '3001'),
|
66
|
-
'1300739585',
|
67
|
-
'1300739586',
|
68
|
-
'enquiries@teqsa.gov.au'
|
69
|
-
),
|
70
|
-
]
|
71
|
-
|
72
|
-
expect(@contacts).to eq data
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,71 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe CricosScrape::CourseImporter do
|
4
|
-
|
5
|
-
describe '#run' do
|
6
|
-
let(:agent) { CricosScrape.agent }
|
7
|
-
subject(:course) { CricosScrape::CourseImporter.new(agent, course_id: 1).run }
|
8
|
-
|
9
|
-
before do
|
10
|
-
allow_any_instance_of(CricosScrape::CourseImporter).to receive(:url).and_return(uri)
|
11
|
-
course_list_page_1 = agent.get("#{uri}?LocationID=123")
|
12
|
-
course_list_page_2 = agent.get("#{uri}?LocationID=456")
|
13
|
-
allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'get-location-id'}).and_return(course_list_page_1, course_list_page_2)
|
14
|
-
end
|
15
|
-
|
16
|
-
context 'when there is no course found' do
|
17
|
-
let(:uri) { not_found_course_details_uri }
|
18
|
-
|
19
|
-
it 'does not import' do
|
20
|
-
expect(course).to be_nil
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
context 'when the details course is found' do
|
25
|
-
let(:uri) { course_details_without_pagination_uri }
|
26
|
-
|
27
|
-
its(:course_id) { is_expected.to eq 1 }
|
28
|
-
its(:course_name) { is_expected.to eq 'Primary Yrs K-6' }
|
29
|
-
its(:course_code) { is_expected.to eq '012395K' }
|
30
|
-
its(:dual_qualification) { is_expected.to eq 'No' }
|
31
|
-
its(:field_of_education) { is_expected.to eq '' }
|
32
|
-
its(:broad_field) { is_expected.to eq '12 - Mixed Field Programmes' }
|
33
|
-
its(:narrow_field) { is_expected.to eq '1201 - General Education Programmes' }
|
34
|
-
its(:detailed_field) { is_expected.to eq '120101 - General Primary and Secondary Education Programmes' }
|
35
|
-
its(:course_level) { is_expected.to eq 'Primary School Studies' }
|
36
|
-
its(:foundation_studies) { is_expected.to eq 'No' }
|
37
|
-
its(:work_component) { is_expected.to eq 'No' }
|
38
|
-
its(:course_language) { is_expected.to eq 'English' }
|
39
|
-
its(:duration) { is_expected.to eq '364' }
|
40
|
-
its(:total_cost) { is_expected.to eq '66,500' }
|
41
|
-
its(:contact_officers) do
|
42
|
-
contact_officers = [
|
43
|
-
CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Nicole King', 'Manager', '0262056998', '62059239', nil),
|
44
|
-
CricosScrape::ContactOfficer.new('International Student Contact', 'PAUL Wang', 'Study Tour Coordinator', '62077293', '', 'paul.wang@act.gov.au'),
|
45
|
-
]
|
46
|
-
|
47
|
-
is_expected.to eq contact_officers
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
context 'when the response body not contains pagination location' do
|
52
|
-
let(:uri) { course_details_without_pagination_uri }
|
53
|
-
|
54
|
-
its(:location_ids) do
|
55
|
-
location_ids = ["123", "456"]
|
56
|
-
is_expected.to eq location_ids
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
context 'when the contact officers contains table grid' do
|
61
|
-
let(:uri) { course_details_with_contact_officers_table_grid }
|
62
|
-
let(:data) { [CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Andrew Vann', 'Vice-Chancellor', '02 6338 4209', '02 6338 4809', nil),
|
63
|
-
CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 63657537', '02 63657590', 'mevans@csu.edu.au'),
|
64
|
-
CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 6365 7537', '02 6365 7590', 'mevans@csu.edu.au')] }
|
65
|
-
|
66
|
-
its(:contact_officers) do
|
67
|
-
is_expected.to eq data
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|