cricos_scrape 2.0 → 2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/bin/cricos_scrape +40 -0
  4. data/cricos_scrape.gemspec +7 -9
  5. data/lib/cricos_scrape/bulk_import_courses.rb +2 -2
  6. data/lib/cricos_scrape/bulk_import_institutions.rb +2 -2
  7. data/lib/cricos_scrape/entities/address.rb +4 -0
  8. data/lib/cricos_scrape/entities/contact.rb +4 -0
  9. data/lib/cricos_scrape/entities/contact_officer.rb +4 -0
  10. data/lib/cricos_scrape/entities/course.rb +4 -0
  11. data/lib/cricos_scrape/entities/institution.rb +4 -0
  12. data/lib/cricos_scrape/entities/location.rb +4 -0
  13. data/lib/cricos_scrape/import_contacts.rb +2 -2
  14. data/lib/cricos_scrape/importer/contact_importer.rb +120 -0
  15. data/lib/cricos_scrape/importer/course_importer.rb +291 -0
  16. data/lib/cricos_scrape/importer/institution_importer.rb +279 -0
  17. data/lib/cricos_scrape/version.rb +1 -1
  18. data/lib/cricos_scrape.rb +4 -5
  19. metadata +16 -44
  20. data/CONTRIBUTING.md +0 -51
  21. data/Gemfile +0 -2
  22. data/Gemfile.lock +0 -64
  23. data/Procfile +0 -3
  24. data/Rakefile +0 -13
  25. data/spec/contact_importer_spec.rb +0 -76
  26. data/spec/course_importer_spec.rb +0 -71
  27. data/spec/fixtures/contact_details_of_state_act_uri.html +0 -546
  28. data/spec/fixtures/contact_details_of_state_wa_uri.html +0 -546
  29. data/spec/fixtures/course_details_with_contact_officers_table_grid.html +0 -467
  30. data/spec/fixtures/course_details_without_pagination_uri.html +0 -470
  31. data/spec/fixtures/courses_list_by_location_id_uri.html +0 -174
  32. data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +0 -406
  33. data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +0 -358
  34. data/spec/fixtures/institution_details_with_po_box_postal_address.html +0 -240
  35. data/spec/fixtures/institution_details_with_trading_name.html +0 -322
  36. data/spec/fixtures/institution_details_without_locations_details_uri.html +0 -151
  37. data/spec/fixtures/institution_details_without_pagination_location_uri.html +0 -299
  38. data/spec/fixtures/not_found_course_details_uri.html +0 -837
  39. data/spec/fixtures/not_found_institution_details.html +0 -36
  40. data/spec/institution_importer_spec.rb +0 -138
  41. data/spec/spec_helper.rb +0 -67
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2cb41efd407804298343699766a9d530e4d51caf
4
- data.tar.gz: 8ec6ecfb82d9ce2c2171297b8ef3026008e0eaaa
3
+ metadata.gz: 3961f2418e9bc173a4412c6d16a83408dfe2c280
4
+ data.tar.gz: fdf7c3c46869cd7e805a05e246b36649cacfaa19
5
5
  SHA512:
6
- metadata.gz: 47b66ae22e51f5846aa2aaac0bb4e93f4ceb1ce94d4ae81dab0179b0eaa95ed399d64bdf551c6a1416fac8059119fbd78292f9e963a9caf7364b5dcf5a090844
7
- data.tar.gz: e2c8f522a4444ae73feff3d6ccc40c8c55017c1aa64ceec48cf12176c56e7876597f679cc6bcdfe8ba9a2a29182ad8746d7b2254b90321f09a975f3af4d6eecf
6
+ metadata.gz: 698d1a4564472abee988d847e06c93f93f718102036b5eb3ddd68a795c86b6827302b21da9db2490b7a647922227d38da9551a76430ff593ffc9c7eed6a848e6
7
+ data.tar.gz: c4a81a6e2d005d3047f2170a85e66c4c10bdaf73196b068e5ddeb2a3fe0bfd14f1fe4a9a5e7d263aad08beb314c2532d41e4b16524aee47c3e5813a755a7f772
data/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
  CRICOS lacks API for data retrieval (so are many government-based services). This gem
9
9
  helps scrape data from [http://cricos.education.gov.au](http://cricos.education.gov.au).
10
10
 
11
- This gem supports Ruby 2.2.3+ only.
11
+ This gem supports MRI Ruby 2.0.0+.
12
12
 
13
13
  # Features
14
14
 
data/bin/cricos_scrape ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'cricos_scrape'
4
+ require 'cricos_scrape/version'
5
+ require 'commander/import'
6
+
7
+ program :version, CricosScrape::VERSION.to_s
8
+ program :description, 'Scrape data from CRICOS website'
9
+
10
+ command :scrape do |c|
11
+ c.syntax = 'cricos_scrape scrape [institutions|courses|contacts] [min_id:1] [max_id:10000]'
12
+ c.summary = 'Scrape entities from CRICOS'
13
+ c.description = c.summary
14
+ c.example 'Import institutions', 'cricos_scrape scrape institutions'
15
+ c.example 'Import institutions and persist to a file', 'cricos_scrape scrape institutions >> institutions.json'
16
+ c.example 'Import institutions with specified ID range [1-200]', 'cricos_scrape scrape institutions 1 200'
17
+ c.example 'Import courses', 'cricos_scrape scrape courses'
18
+ c.example 'Import courses and persist to a file', 'cricos_scrape scrape courses >> courses.json'
19
+ c.example 'Import courses with specified ID range [1-200]', 'cricos_scrape scrape courses 1 200'
20
+ c.example 'Import contacts', 'cricos_scrape scrape contacts. NOTE: ID range option does not apply'
21
+ c.example 'Import contacts and persist to a file', 'cricos_scrape scrape contacts >> contacts.json'
22
+
23
+ c.action do |args, options|
24
+ entity = args[0]
25
+ min_id = args[1] || 1
26
+ max_id = args[2] || 10000
27
+
28
+ case entity
29
+ when 'institutions'
30
+ CricosScrape::BulkImportInstitutions.new(min_id, max_id).perform
31
+ when 'courses'
32
+ CricosScrape::BulkImportCourses.new(min_id, max_id).perform
33
+ when 'contacts'
34
+ CricosScrape::ImportContacts.new.perform
35
+ else
36
+ STDERR.puts "[ERROR] Invalid entity. Please see `cricos_scrape scrape -h` for more details"
37
+ exit 1
38
+ end
39
+ end
40
+ end
@@ -1,4 +1,7 @@
1
- require './lib/cricos_scrape/version'
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cricos_scrape/version'
2
5
 
3
6
  Gem::Specification.new do |spec|
4
7
  spec.name = 'cricos_scrape'
@@ -10,15 +13,10 @@ Gem::Specification.new do |spec|
10
13
  spec.homepage = 'https://github.com/ruby-journal/cricos_scrape.rb'
11
14
  spec.license = 'MIT'
12
15
 
13
- spec.files = Dir['[A-Z]*',
14
- 'lib/*.rb',
15
- 'lib/cricos_scrape/*.rb',
16
- 'spec/*.rb',
17
- 'spec/fixtures/*.html']
18
-
19
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
16
+ spec.files = `git ls-files -z -- lib/* bin/* LICENSE.md README.md cricos_scrape.gemspec`.split("\x0")
17
+ spec.executables = ['cricos_scrape']
20
18
  spec.test_files = spec.files.grep(%r{^(spec)/})
21
- spec.required_ruby_version = '>= 2.2.2'
19
+ spec.required_ruby_version = '>= 2.0.0'
22
20
 
23
21
  spec.require_paths = ['lib']
24
22
 
@@ -1,5 +1,5 @@
1
- require_relative './importer/course_importer'
2
- require_relative './agent'
1
+ require 'cricos_scrape/importer/course_importer'
2
+ require 'cricos_scrape/agent'
3
3
 
4
4
  module CricosScrape
5
5
  class BulkImportCourses
@@ -1,5 +1,5 @@
1
- require_relative './importer/institution_importer'
2
- require_relative './agent'
1
+ require 'cricos_scrape/importer/institution_importer'
2
+ require 'cricos_scrape/agent'
3
3
 
4
4
  module CricosScrape
5
5
  class BulkImportInstitutions
@@ -0,0 +1,4 @@
1
+ module CricosScrape
2
+ class Address < Struct.new(:address_line_1, :address_line_2, :suburb, :state, :postcode)
3
+ end
4
+ end
@@ -0,0 +1,4 @@
1
+ module CricosScrape
2
+ class Contact < Struct.new(:type_of_course, :name, :organisation, :postal_address, :telephone, :facsimile, :email)
3
+ end
4
+ end
@@ -0,0 +1,4 @@
1
+ module CricosScrape
2
+ class ContactOfficer < Struct.new(:role, :name, :title, :phone, :fax, :email)
3
+ end
4
+ end
@@ -0,0 +1,4 @@
1
+ module CricosScrape
2
+ class Course < Struct.new(:course_id, :course_name, :course_code, :dual_qualification, :field_of_education, :broad_field, :narrow_field, :detailed_field, :course_level, :foundation_studies, :work_component, :course_language, :duration, :total_cost, :contact_officers, :location_ids)
3
+ end
4
+ end
@@ -0,0 +1,4 @@
1
+ module CricosScrape
2
+ class Institution < Struct.new(:provider_id, :provider_code, :trading_name, :name, :type, :total_capacity, :postal_address, :website, :locations, :contact_officers)
3
+ end
4
+ end
@@ -0,0 +1,4 @@
1
+ module CricosScrape
2
+ class Location < Struct.new(:location_id, :name, :state, :number_of_courses)
3
+ end
4
+ end
@@ -1,5 +1,5 @@
1
- require_relative './importer/contact_importer'
2
- require_relative './agent'
1
+ require 'cricos_scrape/importer/contact_importer'
2
+ require 'cricos_scrape/agent'
3
3
 
4
4
  module CricosScrape
5
5
  class ImportContacts
@@ -0,0 +1,120 @@
1
+ require 'cricos_scrape/entities/contact'
2
+ require 'cricos_scrape/entities/address'
3
+
4
+ module CricosScrape
5
+ class ContactImporter
6
+
7
+ CONTACT_URL = 'http://cricos.education.gov.au/Contacts/CRICOSContacts.aspx'
8
+ STATES_CODE = ['ACT', 'NSW', 'NT', 'QLD', 'SA', 'TAS', 'VIC', 'WA']
9
+
10
+ def initialize(agent)
11
+ @agent = agent
12
+ end
13
+
14
+ def run
15
+ contacts = []
16
+
17
+ for state in STATES_CODE
18
+ @page = agent.get(url_for(state))
19
+ if exist_contacts_of_state?
20
+ @table_contains_contact = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetDetails_cricosContactDetails_pnlContactLists table').children
21
+
22
+ number_of_rows_per_contact = 18
23
+ start_contact_row = 3
24
+ end_contact_row = @table_contains_contact.count - number_of_rows_per_contact
25
+
26
+ for i in (start_contact_row..end_contact_row).step(number_of_rows_per_contact)
27
+ @row_index = i
28
+
29
+ contact = Contact.new
30
+ contact.type_of_course = find_type_of_course
31
+ contact.name = find_name
32
+ contact.organisation = find_organisation
33
+ contact.postal_address = find_postal_address
34
+ contact.telephone = find_telephone
35
+ contact.facsimile = find_facsimile
36
+ contact.email = find_email
37
+
38
+ contacts << contact
39
+ end
40
+ end
41
+ end
42
+
43
+ contacts
44
+ end
45
+
46
+ private
47
+
48
+ attr_reader :agent, :page
49
+
50
+ def url_for(state_code)
51
+ "#{CONTACT_URL}?StateCode=#{state_code}"
52
+ end
53
+
54
+ def exist_contacts_of_state?
55
+ !!@page.at('#__tab_ctl00_cphDefaultPage_tabContainer_sheetDetails')
56
+ end
57
+
58
+ def find_value_of_field(field)
59
+ field.nil? ? nil : field.text.strip
60
+ end
61
+
62
+ def find_type_of_course
63
+ find_value_of_field(@table_contains_contact[@row_index])
64
+ end
65
+
66
+ def find_name
67
+ name_row = @table_contains_contact[@row_index+4].children
68
+ find_value_of_field(name_row[3]).empty? ? find_value_of_field(name_row[2]) : find_value_of_field(name_row[3])
69
+ end
70
+
71
+ def find_organisation
72
+ organisation_row = @table_contains_contact[@row_index+6].children
73
+ find_value_of_field(organisation_row[3])
74
+ end
75
+
76
+ def find_postal_address
77
+ address = Address.new
78
+
79
+ address_row = @table_contains_contact[@row_index+8].children
80
+ postal_address_cell = address_row[3].children
81
+
82
+ # delete <br>
83
+ lines = postal_address_cell - postal_address_cell.css('br')
84
+ address.address_line_1 = find_value_of_field(lines[0])
85
+
86
+ if line2 = find_value_of_field(lines[1])
87
+ address.suburb, address.state, address.postcode = extract_suburb_and_state_and_postcode_from(line2)
88
+ end
89
+
90
+ address
91
+ end
92
+
93
+ def extract_suburb_and_state_and_postcode_from(line)
94
+ line.scan(/^(.*)\s(#{australia_states_code_regex})\s(#{australia_postcode_regex})$/).first
95
+ end
96
+
97
+ def australia_states_code_regex
98
+ 'ACT|NSW|NT|QLD|SA|TAS|VIC|WA'
99
+ end
100
+
101
+ def australia_postcode_regex
102
+ '\d{4}'
103
+ end
104
+
105
+ def find_telephone
106
+ telephone_row = @table_contains_contact[@row_index+10].children
107
+ find_value_of_field(telephone_row[3])
108
+ end
109
+
110
+ def find_facsimile
111
+ facsimile_row = @table_contains_contact[@row_index+12].children
112
+ find_value_of_field(facsimile_row[3])
113
+ end
114
+
115
+ def find_email
116
+ email_row = @table_contains_contact[@row_index+14].children
117
+ find_value_of_field(email_row[3])
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,291 @@
1
+ require 'cricos_scrape/entities/course'
2
+ require 'cricos_scrape/entities/contact_officer'
3
+
4
+ module CricosScrape
5
+ class CourseImporter
6
+
7
+ COURSE_URL = 'http://cricos.education.gov.au/Course/CourseDetails.aspx'
8
+
9
+ def initialize(agent, **params)
10
+ @agent = agent
11
+ @course_id = params.fetch(:course_id)
12
+ @page = agent.get(url)
13
+ end
14
+
15
+ def run
16
+ return if course_not_found?
17
+
18
+ course = Course.new
19
+ course.course_id = course_id
20
+ course.course_name = find_course_name
21
+ course.course_code = find_course_code
22
+ course.dual_qualification = find_dual_qualification
23
+ course.field_of_education = find_field_of_education
24
+ course.broad_field = find_education_broad_field
25
+ course.narrow_field = find_education_narrow_field
26
+ course.detailed_field = find_education_detailed_field
27
+ course.course_level = find_course_level
28
+ course.foundation_studies = find_foundation_studies
29
+ course.work_component = find_work_component
30
+ course.course_language = find_course_language
31
+ course.duration = find_duration
32
+ course.total_cost = find_total_cost
33
+
34
+ course.contact_officers = find_contact_officers
35
+ course.location_ids = find_course_location
36
+
37
+ course
38
+ end
39
+
40
+ private
41
+
42
+ attr_reader :agent, :course_id, :page
43
+
44
+ def url
45
+ "#{COURSE_URL}?CourseID=#{course_id}"
46
+ end
47
+
48
+ # there is no record not found page
49
+ # instead a search page is returned
50
+ def course_not_found?
51
+ @page.at('#contentBody h1').text == "Course Search"
52
+ end
53
+
54
+ def find_value_of_field(field)
55
+ field.text.strip unless field.nil?
56
+ end
57
+
58
+ def find_course_name
59
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblCourseName')
60
+ find_value_of_field(field)
61
+ end
62
+
63
+ def find_course_code
64
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblCourseCode')
65
+ find_value_of_field(field)
66
+ end
67
+
68
+ def find_dual_qualification
69
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblDualQualification')
70
+ find_value_of_field(field)
71
+ end
72
+
73
+ def find_field_of_education
74
+ row = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_trFofEHeader').children
75
+ # NOTE: A space lookalike character might be returned. This is to ensure its conversion to a correct space
76
+ find_value_of_field(row[3]).ord == 160 ? '' : find_value_of_field(row[3])
77
+ end
78
+
79
+ def find_education_broad_field
80
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblFieldOfEducationBroad1')
81
+ find_value_of_field(field)
82
+ end
83
+
84
+ def find_education_narrow_field
85
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblFieldOfEducationNarrow1')
86
+ find_value_of_field(field)
87
+ end
88
+
89
+ def find_education_detailed_field
90
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblFieldOfEducationDetailed1')
91
+ find_value_of_field(field)
92
+ end
93
+
94
+ def find_course_level
95
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblCourseLevel')
96
+ find_value_of_field(field)
97
+ end
98
+
99
+ def find_foundation_studies
100
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblFoundationStudies')
101
+ find_value_of_field(field)
102
+ end
103
+
104
+ def find_work_component
105
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblWorkComponent')
106
+ find_value_of_field(field)
107
+ end
108
+
109
+ def find_course_language
110
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblCourseLanguage')
111
+ find_value_of_field(field)
112
+ end
113
+
114
+ def find_duration
115
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblDuration')
116
+ find_value_of_field(field)
117
+ end
118
+
119
+ def find_total_cost
120
+ field = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseDetail_lblTotalCourseCost')
121
+ find_value_of_field(field)
122
+ end
123
+
124
+ def find_contact_officers
125
+ contact_officers = []
126
+
127
+ contact_officers_list = @page.search('//div[starts-with(@id, "ctl00_cphDefaultPage_tabContainer_sheetContactDetail_contactDetail_pnl")]')
128
+
129
+ contact_officers_list.each do |contact_officer|
130
+ @contact_officer_area = contact_officer
131
+ @contact_officer_table = @contact_officer_area.at('table').children
132
+
133
+ if contains_contact_details_grid?
134
+ contact_officers += find_contact_officer_grid
135
+ else
136
+ contact_officers << find_contact_officer
137
+ end
138
+ end
139
+
140
+ contact_officers
141
+ end
142
+
143
+ def find_contact_officer_grid
144
+ contact_officers = []
145
+
146
+ excess_row_at_the_end_table = 2
147
+ data_row_start = 3
148
+ data_row_end = @contact_officer_table.count - excess_row_at_the_end_table
149
+
150
+ for i in data_row_start..data_row_end
151
+ contact_row = @contact_officer_table[i].children
152
+
153
+ contact = ContactOfficer.new
154
+ contact.role = find_contact_officer_role
155
+ contact.name = find_value_of_field(contact_row[1])
156
+ contact.phone = find_value_of_field(contact_row[2])
157
+ contact.fax = find_value_of_field(contact_row[3])
158
+ contact.email = find_value_of_field(contact_row[4])
159
+
160
+ contact_officers << contact
161
+ end
162
+
163
+ contact_officers
164
+ end
165
+
166
+ def find_contact_officer
167
+ contact = ContactOfficer.new
168
+ contact.role = find_contact_officer_role
169
+ contact.name = find_contact_officer_name
170
+ contact.title = find_contact_officer_title
171
+ contact.phone = find_contact_officer_phone
172
+ contact.fax = find_contact_officer_fax
173
+ contact.email = find_contact_officer_email
174
+
175
+ contact
176
+ end
177
+
178
+ def find_contact_officer_role
179
+ row = @contact_officer_area.children
180
+ find_value_of_field(row[1]).sub(':', '')
181
+ end
182
+
183
+ def find_contact_officer_name
184
+ row = @contact_officer_table[1].children
185
+ find_value_of_field(row[3])
186
+ end
187
+
188
+ def find_contact_officer_title
189
+ row = @contact_officer_table[3].children
190
+ find_value_of_field(row[3])
191
+ end
192
+
193
+ def find_contact_officer_phone
194
+ row = @contact_officer_table[5].children
195
+ find_value_of_field(row[3])
196
+ end
197
+
198
+ def find_contact_officer_fax
199
+ row = @contact_officer_table[7].children
200
+ find_value_of_field(row[3])
201
+ end
202
+
203
+ def find_contact_officer_email
204
+ row = @contact_officer_table[9]
205
+ find_value_of_field(row.children[3]) unless row.nil?
206
+ end
207
+
208
+ def contains_contact_details_grid?
209
+ contact_officer_area_css_id = @contact_officer_area.attributes['id'].text
210
+ @page.search("//*[@id='#{contact_officer_area_css_id}']/div/table[starts-with(@id, 'ctl00_cphDefaultPage_tabContainer_sheetContactDetail_contactDetail_grid')]").any?
211
+ end
212
+
213
+ #Get all locations of course
214
+ def find_course_location
215
+ location_ids = []
216
+
217
+ if location_results_paginated?
218
+ for page_number in 1..total_pages
219
+ jump_to_page(page_number)
220
+ location_ids += fetch_location_ids_from_current_page
221
+ end
222
+ else
223
+ location_ids += fetch_location_ids_from_current_page
224
+ end
225
+
226
+ location_ids
227
+ end
228
+
229
+ def pagination
230
+ @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseLocationList_gridSearchResults .gridPager')
231
+ end
232
+
233
+ def location_results_paginated?
234
+ !!pagination
235
+ end
236
+
237
+ def total_pages
238
+ pagination.children[1].text.strip[/^Page [0-9]+ of ([0-9]+).*/, 1].to_i
239
+ end
240
+
241
+ def current_pagination_page
242
+ pagination.children[1].text.strip[/^Page ([0-9]+) of [0-9]+.*/, 1].to_i
243
+ end
244
+
245
+ def jump_to_page(page_number)
246
+ return @page if page_number == current_pagination_page
247
+
248
+ hidden_form = @page.form_with :id => "aspnetForm"
249
+ hidden_form['__EVENTTARGET'] = 'ctl00$cphDefaultPage$tabContainer$sheetCourseDetail$courseLocationList$gridSearchResults'
250
+ hidden_form['__EVENTARGUMENT'] = "Page$#{page_number}"
251
+ begin
252
+ @page = hidden_form.submit(nil, {'action' => 'change-page'})
253
+ rescue Mechanize::ResponseCodeError
254
+ sleep 5
255
+ scrape_course(course_id)
256
+ end
257
+ end
258
+
259
+ def get_location_id(row_index)
260
+ hidden_form = @page.form_with :id => "aspnetForm"
261
+ hidden_form['__EVENTTARGET'] = 'ctl00$cphDefaultPage$tabContainer$sheetCourseDetail$courseLocationList$gridSearchResults'
262
+ hidden_form['__EVENTARGUMENT'] = "click-#{row_index-3}"
263
+ begin
264
+ course_page = hidden_form.submit(nil, {'action' => 'get-location-id'})
265
+ rescue Mechanize::ResponseCodeError
266
+ sleep 5
267
+ scrape_course(course_id)
268
+ end
269
+
270
+ course_page.uri.to_s[/LocationID=([0-9]+)/, 1]
271
+ end
272
+
273
+ def fetch_location_ids_from_current_page
274
+ location_ids = []
275
+
276
+ # location_list is table contains locations in current page
277
+ location_list = @page.at('#ctl00_cphDefaultPage_tabContainer_sheetCourseDetail_courseLocationList_gridSearchResults').children
278
+
279
+ excess_row_at_the_end_table = location_results_paginated? ? 3 : 2
280
+ start_location_row = 3
281
+ end_location_row = location_list.count - excess_row_at_the_end_table
282
+
283
+ for i in start_location_row..end_location_row
284
+ location_ids << get_location_id(i)
285
+ end
286
+
287
+ location_ids
288
+ end
289
+
290
+ end
291
+ end