cricos_scrape 2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTING.md +51 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +64 -0
  5. data/LICENSE.md +22 -0
  6. data/Procfile +3 -0
  7. data/README.md +40 -0
  8. data/Rakefile +13 -0
  9. data/cricos_scrape.gemspec +31 -0
  10. data/lib/cricos_scrape/agent.rb +9 -0
  11. data/lib/cricos_scrape/bulk_import_courses.rb +31 -0
  12. data/lib/cricos_scrape/bulk_import_institutions.rb +31 -0
  13. data/lib/cricos_scrape/import_contacts.rb +22 -0
  14. data/lib/cricos_scrape/json_struct.rb +11 -0
  15. data/lib/cricos_scrape/version.rb +3 -0
  16. data/lib/cricos_scrape.rb +8 -0
  17. data/spec/contact_importer_spec.rb +76 -0
  18. data/spec/course_importer_spec.rb +71 -0
  19. data/spec/fixtures/contact_details_of_state_act_uri.html +546 -0
  20. data/spec/fixtures/contact_details_of_state_wa_uri.html +546 -0
  21. data/spec/fixtures/course_details_with_contact_officers_table_grid.html +467 -0
  22. data/spec/fixtures/course_details_without_pagination_uri.html +470 -0
  23. data/spec/fixtures/courses_list_by_location_id_uri.html +174 -0
  24. data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +406 -0
  25. data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +358 -0
  26. data/spec/fixtures/institution_details_with_po_box_postal_address.html +240 -0
  27. data/spec/fixtures/institution_details_with_trading_name.html +322 -0
  28. data/spec/fixtures/institution_details_without_locations_details_uri.html +151 -0
  29. data/spec/fixtures/institution_details_without_pagination_location_uri.html +299 -0
  30. data/spec/fixtures/not_found_course_details_uri.html +837 -0
  31. data/spec/fixtures/not_found_institution_details.html +36 -0
  32. data/spec/institution_importer_spec.rb +138 -0
  33. data/spec/spec_helper.rb +67 -0
  34. metadata +190 -0
@@ -0,0 +1,36 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
+ <head><title>
4
+ Institution Details
5
+ </title><meta name="robots" content="noindex" /><link href="../Common/Styles/Styles.css" rel="stylesheet" type="text/css" /><link href="../App_Themes/Theme1/Theme1.css" type="text/css" rel="stylesheet" /></head>
6
+ <body>
7
+ <form method="post" action="InstitutionDetailsOnePage.aspx" id="Form1">
8
+ <div class="aspNetHidden">
9
+ <input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwULLTE1NjUwNjY5NDEPZBYCAgMPZBYCAgMPDxYCHgdWaXNpYmxlaGQWBgIDD2QWBAIBDw8WAh8AaGRkAgUPZBYCZg9kFgQCAQ8PFgIfAGhkZAIDD2QWAgIBDzwrABECARAWABYAFgAMFCsAAGQCBQ9kFgICAg88KwARAgEQFgAWABYADBQrAABkAgcPZBYCAgMPZBYCAgEPPCsAEQIBEBYAFgAWAAwUKwAAZBgDBRxjb3Vyc2VMaXN0JGdyaWRTZWFyY2hSZXN1bHRzDzwrAAwCBhUBCENvdXJzZUlkCAL/////D2QFHmxvY2F0aW9uTGlzdCRncmlkU2VhcmNoUmVzdWx0cw88KwAMAgYVAQpMb2NhdGlvbklkCAL/////D2QFHmNvbnRhY3REZXRhaWxzJGdyaWRJU0NDb250YWN0cw88KwAMAgYVAQtDb250YWN0TmFtZQgC/////w9k" />
10
+ </div>
11
+
12
+ <table role="presentation" border="0" cellpadding="0" cellspacing="0" class="tblHeader" id="AutoNumber1">
13
+ <tr>
14
+ <td>
15
+ <img src="../images/Dept-Education_Inline_rev-optimal.png" id="Img1" alt="Australian Government - Department of Education" style="margin-left: 7px; margin-top: 7px; margin-bottom: 7px;" height="65" width="261" />
16
+ <td align="right">
17
+ <img src="../images/cricos.gif" alt="CRICOS - Commonwealth Register of Institutions and Courses for Overseas Students" style="margin: 0px 0px" width="414" height="75" /></td>
18
+ </tr>
19
+ </table>
20
+ <br />
21
+
22
+ <div id="pnlErrorMessage">
23
+
24
+ <table role="presentation" style="font-family: verdana; font-size: 12pt">
25
+ <tr>
26
+ <td valign="top"><img src="../images/exclaim.gif" alt="exclaim" /></td>
27
+ <td valign="middle">
28
+ The Provider ID entered is invalid - please try another.
29
+ </td>
30
+ </tr>
31
+ </table><br/>
32
+
33
+ </div>
34
+ </form>
35
+ </body>
36
+ </html>
@@ -0,0 +1,138 @@
1
+ require 'spec_helper'
2
+
3
+ describe CricosScrape::InstitutionImporter do
4
+
5
+ describe '#run' do
6
+ let(:agent) { CricosScrape.agent }
7
+
8
+ subject(:institution) { CricosScrape::InstitutionImporter.new(agent, provider_id: 1).run }
9
+
10
+ before do
11
+ allow_any_instance_of(CricosScrape::InstitutionImporter).to receive(:url).and_return(uri)
12
+ courses_list_page = agent.get(institution_details_with_pagination_location_page_1_uri+"?LocationID=456")
13
+ allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'get-location-id'}).and_return(courses_list_page)
14
+ end
15
+
16
+ context 'when there is no institution found' do
17
+ let(:uri) { not_found_institution_details_uri }
18
+
19
+ it 'does not import' do
20
+ expect(institution).to be_nil
21
+ end
22
+ end
23
+
24
+ context 'when the response body contains Institution Trading Name' do
25
+ let(:uri) { institution_details_with_trading_name_uri }
26
+
27
+ its(:provider_id) { is_expected.to eq 1 }
28
+ its(:provider_code) { is_expected.to eq '00873F' }
29
+ its(:trading_name) { is_expected.to eq 'Australian Catholic University Limited' }
30
+ its(:name) { is_expected.to eq 'Australian Catholic University Limited' }
31
+ its(:type) { is_expected.to eq 'Government' }
32
+ its(:total_capacity) { is_expected.to eq 50 }
33
+ its(:website) { is_expected.to eq 'www.acu.edu.au' }
34
+ its(:postal_address) do
35
+ is_expected.to eq "International Education Office\nPO Box 968\nNORTH SYDNEY\nNew South Wales  2059"
36
+ end
37
+ end
38
+
39
+ context 'when the response body does not contains Address Line 2' do
40
+ let(:uri) { institution_details_with_po_box_postal_address_uri }
41
+
42
+ its(:provider_id) { is_expected.to eq 1 }
43
+ its(:provider_code) { is_expected.to eq '00780M' }
44
+ its(:trading_name) { is_expected.to be_nil }
45
+ its(:name) { is_expected.to eq 'Department of Education' }
46
+ its(:type) { is_expected.to eq 'Government' }
47
+ its(:total_capacity) { is_expected.to eq 500 }
48
+ its(:website) { is_expected.to be_nil }
49
+ its(:postal_address) do
50
+ is_expected.to eq "GPO Box 4821\nDARWIN\nNorthern Territory  0801"
51
+ end
52
+ end
53
+
54
+ context 'when the response body contains both Principal Executive Officer and International Student Contact' do
55
+ let(:uri) { institution_details_without_pagination_location_uri }
56
+
57
+ its(:contact_officers) do
58
+ data = [
59
+ CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Matthew Green', 'Principal', '0889506400', '0889524607', nil),
60
+ CricosScrape::ContactOfficer.new('International Student Contact', 'ROCHELLE Marshall', 'Secretary', '0889506400', '0889524607', 'rochelle.marshall@nt.catholic.edu.au')
61
+ ]
62
+ is_expected.to eq data
63
+ end
64
+ end
65
+
66
+ context 'when the response body only contains Principal Executive Officer' do
67
+ let(:uri) { institution_details_with_po_box_postal_address_uri }
68
+
69
+ its(:contact_officers) do
70
+ is_expected.to eq [CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Rachael Shanahan', 'Director, Education Services', '0889011336', '0889995788', nil)]
71
+ end
72
+ end
73
+
74
+ context 'when the response body not contains pagination location' do
75
+ let(:uri) { institution_details_without_pagination_location_uri }
76
+
77
+ its(:locations) do
78
+ locations = [
79
+ CricosScrape::Location.new("456", 'Bath Street Campus', 'NT', '1'),
80
+ CricosScrape::Location.new("456", 'Sadadeen Campus', 'NT', '2'),
81
+ CricosScrape::Location.new("456", 'Traeger Campus', 'NT', '2') ,
82
+ ]
83
+ is_expected.to eq locations
84
+ end
85
+ end
86
+
87
+ context 'when the response body not contains location details' do
88
+ let(:uri) { institution_details_without_locations_details_uri }
89
+
90
+ its(:locations) do
91
+ is_expected.to eq nil
92
+ end
93
+ end
94
+
95
+ context 'when the response body contains pagination location' do
96
+ let(:uri) { institution_details_with_pagination_location_page_1_uri }
97
+
98
+ before do
99
+ # Method jump_to_page don't jump to current page (page 1). with total_pages=2, form will submit once
100
+ locations_list_page_2 = agent.get(institution_details_with_pagination_location_page_2_uri)
101
+ allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'change-location-page'}).and_return(locations_list_page_2)
102
+ end
103
+
104
+ its(:locations) do
105
+ locations = [
106
+ #Locations on page 1
107
+ CricosScrape::Location.new("456", "Albury", "NSW", "51"),
108
+ CricosScrape::Location.new("456", "Bathurst", "NSW", "60"),
109
+ CricosScrape::Location.new("456", "Canberra Institute of Technology - City Campus", "ACT", "2"),
110
+ CricosScrape::Location.new("456", "CSU Study Centre Melbourne", "VIC", "22"),
111
+ CricosScrape::Location.new("456", "CSU Study Centre Sydney", "NSW", "21"),
112
+ CricosScrape::Location.new("456", "Dubbo", "NSW", "29"),
113
+ CricosScrape::Location.new("456", "Holmesglen Institute of TAFE", "VIC", "3"),
114
+ CricosScrape::Location.new("456", "Orange", "NSW", "41"),
115
+ CricosScrape::Location.new("456", "Ryde", "NSW", "1"),
116
+ CricosScrape::Location.new("456", "St Marks Theological Centre", "ACT", "12"),
117
+
118
+ #Locations on page 2
119
+ CricosScrape::Location.new("456", "United Theological College", "NSW", "11"),
120
+ CricosScrape::Location.new("456", "Wagga Wagga", "NSW", "105"),
121
+ ]
122
+ is_expected.to eq locations
123
+ end
124
+
125
+ context 'when the contact officers contains table grid' do
126
+ its(:contact_officers) do
127
+ data = [
128
+ CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Andrew Vann', 'Vice-Chancellor', '02 6338 4209', '02 6338 4809', nil),
129
+ CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 63657537', '02 63657590', 'mevans@csu.edu.au'),
130
+ CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 6365 7537', '02 6365 7590', 'mevans@csu.edu.au')
131
+ ]
132
+ is_expected.to eq data
133
+ end
134
+ end
135
+ end
136
+ end
137
+
138
+ end
@@ -0,0 +1,67 @@
1
+ require_relative '../lib/cricos_scrape'
2
+ require 'rspec/its'
3
+
4
+ def institution_details_with_po_box_postal_address_uri
5
+ file = File.expand_path("../fixtures/institution_details_with_po_box_postal_address.html", __FILE__)
6
+ "file://#{file}"
7
+ end
8
+
9
+ def institution_details_with_trading_name_uri
10
+ file = File.expand_path("../fixtures/institution_details_with_trading_name.html", __FILE__)
11
+ "file://#{file}"
12
+ end
13
+
14
+ def not_found_institution_details_uri
15
+ file = File.expand_path("../fixtures/not_found_institution_details.html", __FILE__)
16
+ "file://#{file}"
17
+ end
18
+
19
+ def institution_details_without_pagination_location_uri
20
+ file = File.expand_path("../fixtures/institution_details_without_pagination_location_uri.html", __FILE__)
21
+ "file://#{file}"
22
+ end
23
+
24
+ def institution_details_with_pagination_location_page_1_uri
25
+ file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_1_uri.html", __FILE__)
26
+ "file://#{file}"
27
+ end
28
+
29
+ def institution_details_with_pagination_location_page_2_uri
30
+ file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_2_uri.html", __FILE__)
31
+ "file://#{file}"
32
+ end
33
+
34
+ def institution_details_without_locations_details_uri
35
+ file = File.expand_path("../fixtures/institution_details_without_locations_details_uri.html", __FILE__)
36
+ "file://#{file}"
37
+ end
38
+
39
+ def course_details_with_contact_officers_table_grid
40
+ file = File.expand_path("../fixtures/course_details_with_contact_officers_table_grid.html", __FILE__)
41
+ "file://#{file}"
42
+ end
43
+
44
+ def courses_list_by_location_id_uri
45
+ file = File.expand_path("../fixtures/courses_list_by_location_id_uri.html", __FILE__)
46
+ "file://#{file}"
47
+ end
48
+
49
+ def contact_details_of_state_act_uri
50
+ file = File.expand_path("../fixtures/contact_details_of_state_act_uri.html", __FILE__)
51
+ "file://#{file}"
52
+ end
53
+
54
+ def contact_details_of_state_wa_uri
55
+ file = File.expand_path("../fixtures/contact_details_of_state_wa_uri.html", __FILE__)
56
+ "file://#{file}"
57
+ end
58
+
59
+ def not_found_course_details_uri
60
+ file = File.expand_path("../fixtures/not_found_course_details_uri.html", __FILE__)
61
+ "file://#{file}"
62
+ end
63
+
64
+ def course_details_without_pagination_uri
65
+ file = File.expand_path("../fixtures/course_details_without_pagination_uri.html", __FILE__)
66
+ "file://#{file}"
67
+ end
metadata ADDED
@@ -0,0 +1,190 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cricos_scrape
3
+ version: !ruby/object:Gem::Version
4
+ version: '2.0'
5
+ platform: ruby
6
+ authors:
7
+ - Trung Lê
8
+ - Toàn Lê
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-10-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.6'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.6'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rspec
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: 3.3.0
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: 3.3.0
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - "~>"
43
+ - !ruby/object:Gem::Version
44
+ version: 3.3.0
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 3.3.0
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec-its
50
+ requirement: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.2.0
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: 1.2.0
58
+ type: :development
59
+ prerelease: false
60
+ version_requirements: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - "~>"
63
+ - !ruby/object:Gem::Version
64
+ version: 1.2.0
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 1.2.0
68
+ - !ruby/object:Gem::Dependency
69
+ name: mechanize
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '2.7'
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: 2.7.2
78
+ type: :runtime
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - "~>"
83
+ - !ruby/object:Gem::Version
84
+ version: '2.7'
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: 2.7.2
88
+ - !ruby/object:Gem::Dependency
89
+ name: slop
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - "~>"
93
+ - !ruby/object:Gem::Version
94
+ version: 4.2.0
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: 4.2.0
98
+ type: :runtime
99
+ prerelease: false
100
+ version_requirements: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: 4.2.0
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: 4.2.0
108
+ description: Scrape Institutions, Courses, Contacts from CRICOS
109
+ email:
110
+ - trung.le@ruby-journal.com
111
+ - ktoanlba@gmail.com
112
+ executables: []
113
+ extensions: []
114
+ extra_rdoc_files: []
115
+ files:
116
+ - CONTRIBUTING.md
117
+ - Gemfile
118
+ - Gemfile.lock
119
+ - LICENSE.md
120
+ - Procfile
121
+ - README.md
122
+ - Rakefile
123
+ - cricos_scrape.gemspec
124
+ - lib/cricos_scrape.rb
125
+ - lib/cricos_scrape/agent.rb
126
+ - lib/cricos_scrape/bulk_import_courses.rb
127
+ - lib/cricos_scrape/bulk_import_institutions.rb
128
+ - lib/cricos_scrape/import_contacts.rb
129
+ - lib/cricos_scrape/json_struct.rb
130
+ - lib/cricos_scrape/version.rb
131
+ - spec/contact_importer_spec.rb
132
+ - spec/course_importer_spec.rb
133
+ - spec/fixtures/contact_details_of_state_act_uri.html
134
+ - spec/fixtures/contact_details_of_state_wa_uri.html
135
+ - spec/fixtures/course_details_with_contact_officers_table_grid.html
136
+ - spec/fixtures/course_details_without_pagination_uri.html
137
+ - spec/fixtures/courses_list_by_location_id_uri.html
138
+ - spec/fixtures/institution_details_with_pagination_location_page_1_uri.html
139
+ - spec/fixtures/institution_details_with_pagination_location_page_2_uri.html
140
+ - spec/fixtures/institution_details_with_po_box_postal_address.html
141
+ - spec/fixtures/institution_details_with_trading_name.html
142
+ - spec/fixtures/institution_details_without_locations_details_uri.html
143
+ - spec/fixtures/institution_details_without_pagination_location_uri.html
144
+ - spec/fixtures/not_found_course_details_uri.html
145
+ - spec/fixtures/not_found_institution_details.html
146
+ - spec/institution_importer_spec.rb
147
+ - spec/spec_helper.rb
148
+ homepage: https://github.com/ruby-journal/cricos_scrape.rb
149
+ licenses:
150
+ - MIT
151
+ metadata: {}
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: 2.2.2
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubyforge_project:
168
+ rubygems_version: 2.4.5.1
169
+ signing_key:
170
+ specification_version: 4
171
+ summary: CRICOS Scrape
172
+ test_files:
173
+ - spec/contact_importer_spec.rb
174
+ - spec/course_importer_spec.rb
175
+ - spec/fixtures/contact_details_of_state_act_uri.html
176
+ - spec/fixtures/contact_details_of_state_wa_uri.html
177
+ - spec/fixtures/course_details_with_contact_officers_table_grid.html
178
+ - spec/fixtures/course_details_without_pagination_uri.html
179
+ - spec/fixtures/courses_list_by_location_id_uri.html
180
+ - spec/fixtures/institution_details_with_pagination_location_page_1_uri.html
181
+ - spec/fixtures/institution_details_with_pagination_location_page_2_uri.html
182
+ - spec/fixtures/institution_details_with_po_box_postal_address.html
183
+ - spec/fixtures/institution_details_with_trading_name.html
184
+ - spec/fixtures/institution_details_without_locations_details_uri.html
185
+ - spec/fixtures/institution_details_without_pagination_location_uri.html
186
+ - spec/fixtures/not_found_course_details_uri.html
187
+ - spec/fixtures/not_found_institution_details.html
188
+ - spec/institution_importer_spec.rb
189
+ - spec/spec_helper.rb
190
+ has_rdoc: