cricos_scrape 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTING.md +51 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +64 -0
  5. data/LICENSE.md +22 -0
  6. data/Procfile +3 -0
  7. data/README.md +40 -0
  8. data/Rakefile +13 -0
  9. data/cricos_scrape.gemspec +31 -0
  10. data/lib/cricos_scrape/agent.rb +9 -0
  11. data/lib/cricos_scrape/bulk_import_courses.rb +31 -0
  12. data/lib/cricos_scrape/bulk_import_institutions.rb +31 -0
  13. data/lib/cricos_scrape/import_contacts.rb +22 -0
  14. data/lib/cricos_scrape/json_struct.rb +11 -0
  15. data/lib/cricos_scrape/version.rb +3 -0
  16. data/lib/cricos_scrape.rb +8 -0
  17. data/spec/contact_importer_spec.rb +76 -0
  18. data/spec/course_importer_spec.rb +71 -0
  19. data/spec/fixtures/contact_details_of_state_act_uri.html +546 -0
  20. data/spec/fixtures/contact_details_of_state_wa_uri.html +546 -0
  21. data/spec/fixtures/course_details_with_contact_officers_table_grid.html +467 -0
  22. data/spec/fixtures/course_details_without_pagination_uri.html +470 -0
  23. data/spec/fixtures/courses_list_by_location_id_uri.html +174 -0
  24. data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +406 -0
  25. data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +358 -0
  26. data/spec/fixtures/institution_details_with_po_box_postal_address.html +240 -0
  27. data/spec/fixtures/institution_details_with_trading_name.html +322 -0
  28. data/spec/fixtures/institution_details_without_locations_details_uri.html +151 -0
  29. data/spec/fixtures/institution_details_without_pagination_location_uri.html +299 -0
  30. data/spec/fixtures/not_found_course_details_uri.html +837 -0
  31. data/spec/fixtures/not_found_institution_details.html +36 -0
  32. data/spec/institution_importer_spec.rb +138 -0
  33. data/spec/spec_helper.rb +67 -0
  34. metadata +190 -0
@@ -0,0 +1,36 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
+ <head><title>
4
+ Institution Details
5
+ </title><meta name="robots" content="noindex" /><link href="../Common/Styles/Styles.css" rel="stylesheet" type="text/css" /><link href="../App_Themes/Theme1/Theme1.css" type="text/css" rel="stylesheet" /></head>
6
+ <body>
7
+ <form method="post" action="InstitutionDetailsOnePage.aspx" id="Form1">
8
+ <div class="aspNetHidden">
9
+ <input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwULLTE1NjUwNjY5NDEPZBYCAgMPZBYCAgMPDxYCHgdWaXNpYmxlaGQWBgIDD2QWBAIBDw8WAh8AaGRkAgUPZBYCZg9kFgQCAQ8PFgIfAGhkZAIDD2QWAgIBDzwrABECARAWABYAFgAMFCsAAGQCBQ9kFgICAg88KwARAgEQFgAWABYADBQrAABkAgcPZBYCAgMPZBYCAgEPPCsAEQIBEBYAFgAWAAwUKwAAZBgDBRxjb3Vyc2VMaXN0JGdyaWRTZWFyY2hSZXN1bHRzDzwrAAwCBhUBCENvdXJzZUlkCAL/////D2QFHmxvY2F0aW9uTGlzdCRncmlkU2VhcmNoUmVzdWx0cw88KwAMAgYVAQpMb2NhdGlvbklkCAL/////D2QFHmNvbnRhY3REZXRhaWxzJGdyaWRJU0NDb250YWN0cw88KwAMAgYVAQtDb250YWN0TmFtZQgC/////w9k" />
10
+ </div>
11
+
12
+ <table role="presentation" border="0" cellpadding="0" cellspacing="0" class="tblHeader" id="AutoNumber1">
13
+ <tr>
14
+ <td>
15
+ <img src="../images/Dept-Education_Inline_rev-optimal.png" id="Img1" alt="Australian Government - Department of Education" style="margin-left: 7px; margin-top: 7px; margin-bottom: 7px;" height="65" width="261" />
16
+ <td align="right">
17
+ <img src="../images/cricos.gif" alt="CRICOS - Commonwealth Register of Institutions and Courses for Overseas Students" style="margin: 0px 0px" width="414" height="75" /></td>
18
+ </tr>
19
+ </table>
20
+ <br />
21
+
22
+ <div id="pnlErrorMessage">
23
+
24
+ <table role="presentation" style="font-family: verdana; font-size: 12pt">
25
+ <tr>
26
+ <td valign="top"><img src="../images/exclaim.gif" alt="exclaim" /></td>
27
+ <td valign="middle">
28
+ The Provider ID entered is invalid - please try another.
29
+ </td>
30
+ </tr>
31
+ </table><br/>
32
+
33
+ </div>
34
+ </form>
35
+ </body>
36
+ </html>
@@ -0,0 +1,138 @@
1
+ require 'spec_helper'
2
+
3
+ describe CricosScrape::InstitutionImporter do
4
+
5
+ describe '#run' do
6
+ let(:agent) { CricosScrape.agent }
7
+
8
+ subject(:institution) { CricosScrape::InstitutionImporter.new(agent, provider_id: 1).run }
9
+
10
+ before do
11
+ allow_any_instance_of(CricosScrape::InstitutionImporter).to receive(:url).and_return(uri)
12
+ courses_list_page = agent.get(institution_details_with_pagination_location_page_1_uri+"?LocationID=456")
13
+ allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'get-location-id'}).and_return(courses_list_page)
14
+ end
15
+
16
+ context 'when there is no institution found' do
17
+ let(:uri) { not_found_institution_details_uri }
18
+
19
+ it 'does not import' do
20
+ expect(institution).to be_nil
21
+ end
22
+ end
23
+
24
+ context 'when the response body contains Institution Trading Name' do
25
+ let(:uri) { institution_details_with_trading_name_uri }
26
+
27
+ its(:provider_id) { is_expected.to eq 1 }
28
+ its(:provider_code) { is_expected.to eq '00873F' }
29
+ its(:trading_name) { is_expected.to eq 'Australian Catholic University Limited' }
30
+ its(:name) { is_expected.to eq 'Australian Catholic University Limited' }
31
+ its(:type) { is_expected.to eq 'Government' }
32
+ its(:total_capacity) { is_expected.to eq 50 }
33
+ its(:website) { is_expected.to eq 'www.acu.edu.au' }
34
+ its(:postal_address) do
35
+ is_expected.to eq "International Education Office\nPO Box 968\nNORTH SYDNEY\nNew South Wales  2059"
36
+ end
37
+ end
38
+
39
+ context 'when the response body does not contains Address Line 2' do
40
+ let(:uri) { institution_details_with_po_box_postal_address_uri }
41
+
42
+ its(:provider_id) { is_expected.to eq 1 }
43
+ its(:provider_code) { is_expected.to eq '00780M' }
44
+ its(:trading_name) { is_expected.to be_nil }
45
+ its(:name) { is_expected.to eq 'Department of Education' }
46
+ its(:type) { is_expected.to eq 'Government' }
47
+ its(:total_capacity) { is_expected.to eq 500 }
48
+ its(:website) { is_expected.to be_nil }
49
+ its(:postal_address) do
50
+ is_expected.to eq "GPO Box 4821\nDARWIN\nNorthern Territory  0801"
51
+ end
52
+ end
53
+
54
+ context 'when the response body contains both Principal Executive Officer and International Student Contact' do
55
+ let(:uri) { institution_details_without_pagination_location_uri }
56
+
57
+ its(:contact_officers) do
58
+ data = [
59
+ CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Matthew Green', 'Principal', '0889506400', '0889524607', nil),
60
+ CricosScrape::ContactOfficer.new('International Student Contact', 'ROCHELLE Marshall', 'Secretary', '0889506400', '0889524607', 'rochelle.marshall@nt.catholic.edu.au')
61
+ ]
62
+ is_expected.to eq data
63
+ end
64
+ end
65
+
66
+ context 'when the response body only contains Principal Executive Officer' do
67
+ let(:uri) { institution_details_with_po_box_postal_address_uri }
68
+
69
+ its(:contact_officers) do
70
+ is_expected.to eq [CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Rachael Shanahan', 'Director, Education Services', '0889011336', '0889995788', nil)]
71
+ end
72
+ end
73
+
74
+ context 'when the response body not contains pagination location' do
75
+ let(:uri) { institution_details_without_pagination_location_uri }
76
+
77
+ its(:locations) do
78
+ locations = [
79
+ CricosScrape::Location.new("456", 'Bath Street Campus', 'NT', '1'),
80
+ CricosScrape::Location.new("456", 'Sadadeen Campus', 'NT', '2'),
81
+ CricosScrape::Location.new("456", 'Traeger Campus', 'NT', '2') ,
82
+ ]
83
+ is_expected.to eq locations
84
+ end
85
+ end
86
+
87
+ context 'when the response body not contains location details' do
88
+ let(:uri) { institution_details_without_locations_details_uri }
89
+
90
+ its(:locations) do
91
+ is_expected.to eq nil
92
+ end
93
+ end
94
+
95
+ context 'when the response body contains pagination location' do
96
+ let(:uri) { institution_details_with_pagination_location_page_1_uri }
97
+
98
+ before do
99
+ # Method jump_to_page don't jump to current page (page 1). with total_pages=2, form will submit once
100
+ locations_list_page_2 = agent.get(institution_details_with_pagination_location_page_2_uri)
101
+ allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'change-location-page'}).and_return(locations_list_page_2)
102
+ end
103
+
104
+ its(:locations) do
105
+ locations = [
106
+ #Locations on page 1
107
+ CricosScrape::Location.new("456", "Albury", "NSW", "51"),
108
+ CricosScrape::Location.new("456", "Bathurst", "NSW", "60"),
109
+ CricosScrape::Location.new("456", "Canberra Institute of Technology - City Campus", "ACT", "2"),
110
+ CricosScrape::Location.new("456", "CSU Study Centre Melbourne", "VIC", "22"),
111
+ CricosScrape::Location.new("456", "CSU Study Centre Sydney", "NSW", "21"),
112
+ CricosScrape::Location.new("456", "Dubbo", "NSW", "29"),
113
+ CricosScrape::Location.new("456", "Holmesglen Institute of TAFE", "VIC", "3"),
114
+ CricosScrape::Location.new("456", "Orange", "NSW", "41"),
115
+ CricosScrape::Location.new("456", "Ryde", "NSW", "1"),
116
+ CricosScrape::Location.new("456", "St Marks Theological Centre", "ACT", "12"),
117
+
118
+ #Locations on page 2
119
+ CricosScrape::Location.new("456", "United Theological College", "NSW", "11"),
120
+ CricosScrape::Location.new("456", "Wagga Wagga", "NSW", "105"),
121
+ ]
122
+ is_expected.to eq locations
123
+ end
124
+
125
+ context 'when the contact officers contains table grid' do
126
+ its(:contact_officers) do
127
+ data = [
128
+ CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Andrew Vann', 'Vice-Chancellor', '02 6338 4209', '02 6338 4809', nil),
129
+ CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 63657537', '02 63657590', 'mevans@csu.edu.au'),
130
+ CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 6365 7537', '02 6365 7590', 'mevans@csu.edu.au')
131
+ ]
132
+ is_expected.to eq data
133
+ end
134
+ end
135
+ end
136
+ end
137
+
138
+ end
@@ -0,0 +1,67 @@
1
+ require_relative '../lib/cricos_scrape'
2
+ require 'rspec/its'
3
+
4
+ def institution_details_with_po_box_postal_address_uri
5
+ file = File.expand_path("../fixtures/institution_details_with_po_box_postal_address.html", __FILE__)
6
+ "file://#{file}"
7
+ end
8
+
9
+ def institution_details_with_trading_name_uri
10
+ file = File.expand_path("../fixtures/institution_details_with_trading_name.html", __FILE__)
11
+ "file://#{file}"
12
+ end
13
+
14
+ def not_found_institution_details_uri
15
+ file = File.expand_path("../fixtures/not_found_institution_details.html", __FILE__)
16
+ "file://#{file}"
17
+ end
18
+
19
+ def institution_details_without_pagination_location_uri
20
+ file = File.expand_path("../fixtures/institution_details_without_pagination_location_uri.html", __FILE__)
21
+ "file://#{file}"
22
+ end
23
+
24
+ def institution_details_with_pagination_location_page_1_uri
25
+ file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_1_uri.html", __FILE__)
26
+ "file://#{file}"
27
+ end
28
+
29
+ def institution_details_with_pagination_location_page_2_uri
30
+ file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_2_uri.html", __FILE__)
31
+ "file://#{file}"
32
+ end
33
+
34
+ def institution_details_without_locations_details_uri
35
+ file = File.expand_path("../fixtures/institution_details_without_locations_details_uri.html", __FILE__)
36
+ "file://#{file}"
37
+ end
38
+
39
+ def course_details_with_contact_officers_table_grid
40
+ file = File.expand_path("../fixtures/course_details_with_contact_officers_table_grid.html", __FILE__)
41
+ "file://#{file}"
42
+ end
43
+
44
+ def courses_list_by_location_id_uri
45
+ file = File.expand_path("../fixtures/courses_list_by_location_id_uri.html", __FILE__)
46
+ "file://#{file}"
47
+ end
48
+
49
+ def contact_details_of_state_act_uri
50
+ file = File.expand_path("../fixtures/contact_details_of_state_act_uri.html", __FILE__)
51
+ "file://#{file}"
52
+ end
53
+
54
+ def contact_details_of_state_wa_uri
55
+ file = File.expand_path("../fixtures/contact_details_of_state_wa_uri.html", __FILE__)
56
+ "file://#{file}"
57
+ end
58
+
59
+ def not_found_course_details_uri
60
+ file = File.expand_path("../fixtures/not_found_course_details_uri.html", __FILE__)
61
+ "file://#{file}"
62
+ end
63
+
64
+ def course_details_without_pagination_uri
65
+ file = File.expand_path("../fixtures/course_details_without_pagination_uri.html", __FILE__)
66
+ "file://#{file}"
67
+ end
metadata ADDED
@@ -0,0 +1,190 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cricos_scrape
3
+ version: !ruby/object:Gem::Version
4
+ version: '2.0'
5
+ platform: ruby
6
+ authors:
7
+ - Trung Lê
8
+ - Toàn Lê
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-10-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.6'
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.6'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rspec
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: 3.3.0
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: 3.3.0
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - "~>"
43
+ - !ruby/object:Gem::Version
44
+ version: 3.3.0
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 3.3.0
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec-its
50
+ requirement: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.2.0
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: 1.2.0
58
+ type: :development
59
+ prerelease: false
60
+ version_requirements: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - "~>"
63
+ - !ruby/object:Gem::Version
64
+ version: 1.2.0
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 1.2.0
68
+ - !ruby/object:Gem::Dependency
69
+ name: mechanize
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '2.7'
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: 2.7.2
78
+ type: :runtime
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - "~>"
83
+ - !ruby/object:Gem::Version
84
+ version: '2.7'
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: 2.7.2
88
+ - !ruby/object:Gem::Dependency
89
+ name: slop
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - "~>"
93
+ - !ruby/object:Gem::Version
94
+ version: 4.2.0
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: 4.2.0
98
+ type: :runtime
99
+ prerelease: false
100
+ version_requirements: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: 4.2.0
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: 4.2.0
108
+ description: Scrape Institutions, Courses, Contacts from CRICOS
109
+ email:
110
+ - trung.le@ruby-journal.com
111
+ - ktoanlba@gmail.com
112
+ executables: []
113
+ extensions: []
114
+ extra_rdoc_files: []
115
+ files:
116
+ - CONTRIBUTING.md
117
+ - Gemfile
118
+ - Gemfile.lock
119
+ - LICENSE.md
120
+ - Procfile
121
+ - README.md
122
+ - Rakefile
123
+ - cricos_scrape.gemspec
124
+ - lib/cricos_scrape.rb
125
+ - lib/cricos_scrape/agent.rb
126
+ - lib/cricos_scrape/bulk_import_courses.rb
127
+ - lib/cricos_scrape/bulk_import_institutions.rb
128
+ - lib/cricos_scrape/import_contacts.rb
129
+ - lib/cricos_scrape/json_struct.rb
130
+ - lib/cricos_scrape/version.rb
131
+ - spec/contact_importer_spec.rb
132
+ - spec/course_importer_spec.rb
133
+ - spec/fixtures/contact_details_of_state_act_uri.html
134
+ - spec/fixtures/contact_details_of_state_wa_uri.html
135
+ - spec/fixtures/course_details_with_contact_officers_table_grid.html
136
+ - spec/fixtures/course_details_without_pagination_uri.html
137
+ - spec/fixtures/courses_list_by_location_id_uri.html
138
+ - spec/fixtures/institution_details_with_pagination_location_page_1_uri.html
139
+ - spec/fixtures/institution_details_with_pagination_location_page_2_uri.html
140
+ - spec/fixtures/institution_details_with_po_box_postal_address.html
141
+ - spec/fixtures/institution_details_with_trading_name.html
142
+ - spec/fixtures/institution_details_without_locations_details_uri.html
143
+ - spec/fixtures/institution_details_without_pagination_location_uri.html
144
+ - spec/fixtures/not_found_course_details_uri.html
145
+ - spec/fixtures/not_found_institution_details.html
146
+ - spec/institution_importer_spec.rb
147
+ - spec/spec_helper.rb
148
+ homepage: https://github.com/ruby-journal/cricos_scrape.rb
149
+ licenses:
150
+ - MIT
151
+ metadata: {}
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: 2.2.2
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubyforge_project:
168
+ rubygems_version: 2.4.5.1
169
+ signing_key:
170
+ specification_version: 4
171
+ summary: CRICOS Scrape
172
+ test_files:
173
+ - spec/contact_importer_spec.rb
174
+ - spec/course_importer_spec.rb
175
+ - spec/fixtures/contact_details_of_state_act_uri.html
176
+ - spec/fixtures/contact_details_of_state_wa_uri.html
177
+ - spec/fixtures/course_details_with_contact_officers_table_grid.html
178
+ - spec/fixtures/course_details_without_pagination_uri.html
179
+ - spec/fixtures/courses_list_by_location_id_uri.html
180
+ - spec/fixtures/institution_details_with_pagination_location_page_1_uri.html
181
+ - spec/fixtures/institution_details_with_pagination_location_page_2_uri.html
182
+ - spec/fixtures/institution_details_with_po_box_postal_address.html
183
+ - spec/fixtures/institution_details_with_trading_name.html
184
+ - spec/fixtures/institution_details_without_locations_details_uri.html
185
+ - spec/fixtures/institution_details_without_pagination_location_uri.html
186
+ - spec/fixtures/not_found_course_details_uri.html
187
+ - spec/fixtures/not_found_institution_details.html
188
+ - spec/institution_importer_spec.rb
189
+ - spec/spec_helper.rb
190
+ has_rdoc: