cricos_scrape 2.0 → 2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/bin/cricos_scrape +40 -0
- data/cricos_scrape.gemspec +7 -9
- data/lib/cricos_scrape/bulk_import_courses.rb +2 -2
- data/lib/cricos_scrape/bulk_import_institutions.rb +2 -2
- data/lib/cricos_scrape/entities/address.rb +4 -0
- data/lib/cricos_scrape/entities/contact.rb +4 -0
- data/lib/cricos_scrape/entities/contact_officer.rb +4 -0
- data/lib/cricos_scrape/entities/course.rb +4 -0
- data/lib/cricos_scrape/entities/institution.rb +4 -0
- data/lib/cricos_scrape/entities/location.rb +4 -0
- data/lib/cricos_scrape/import_contacts.rb +2 -2
- data/lib/cricos_scrape/importer/contact_importer.rb +120 -0
- data/lib/cricos_scrape/importer/course_importer.rb +291 -0
- data/lib/cricos_scrape/importer/institution_importer.rb +279 -0
- data/lib/cricos_scrape/version.rb +1 -1
- data/lib/cricos_scrape.rb +4 -5
- metadata +16 -44
- data/CONTRIBUTING.md +0 -51
- data/Gemfile +0 -2
- data/Gemfile.lock +0 -64
- data/Procfile +0 -3
- data/Rakefile +0 -13
- data/spec/contact_importer_spec.rb +0 -76
- data/spec/course_importer_spec.rb +0 -71
- data/spec/fixtures/contact_details_of_state_act_uri.html +0 -546
- data/spec/fixtures/contact_details_of_state_wa_uri.html +0 -546
- data/spec/fixtures/course_details_with_contact_officers_table_grid.html +0 -467
- data/spec/fixtures/course_details_without_pagination_uri.html +0 -470
- data/spec/fixtures/courses_list_by_location_id_uri.html +0 -174
- data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +0 -406
- data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +0 -358
- data/spec/fixtures/institution_details_with_po_box_postal_address.html +0 -240
- data/spec/fixtures/institution_details_with_trading_name.html +0 -322
- data/spec/fixtures/institution_details_without_locations_details_uri.html +0 -151
- data/spec/fixtures/institution_details_without_pagination_location_uri.html +0 -299
- data/spec/fixtures/not_found_course_details_uri.html +0 -837
- data/spec/fixtures/not_found_institution_details.html +0 -36
- data/spec/institution_importer_spec.rb +0 -138
- data/spec/spec_helper.rb +0 -67
@@ -1,36 +0,0 @@
|
|
1
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
2
|
-
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
3
|
-
<head><title>
|
4
|
-
Institution Details
|
5
|
-
</title><meta name="robots" content="noindex" /><link href="../Common/Styles/Styles.css" rel="stylesheet" type="text/css" /><link href="../App_Themes/Theme1/Theme1.css" type="text/css" rel="stylesheet" /></head>
|
6
|
-
<body>
|
7
|
-
<form method="post" action="InstitutionDetailsOnePage.aspx" id="Form1">
|
8
|
-
<div class="aspNetHidden">
|
9
|
-
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwULLTE1NjUwNjY5NDEPZBYCAgMPZBYCAgMPDxYCHgdWaXNpYmxlaGQWBgIDD2QWBAIBDw8WAh8AaGRkAgUPZBYCZg9kFgQCAQ8PFgIfAGhkZAIDD2QWAgIBDzwrABECARAWABYAFgAMFCsAAGQCBQ9kFgICAg88KwARAgEQFgAWABYADBQrAABkAgcPZBYCAgMPZBYCAgEPPCsAEQIBEBYAFgAWAAwUKwAAZBgDBRxjb3Vyc2VMaXN0JGdyaWRTZWFyY2hSZXN1bHRzDzwrAAwCBhUBCENvdXJzZUlkCAL/////D2QFHmxvY2F0aW9uTGlzdCRncmlkU2VhcmNoUmVzdWx0cw88KwAMAgYVAQpMb2NhdGlvbklkCAL/////D2QFHmNvbnRhY3REZXRhaWxzJGdyaWRJU0NDb250YWN0cw88KwAMAgYVAQtDb250YWN0TmFtZQgC/////w9k" />
|
10
|
-
</div>
|
11
|
-
|
12
|
-
<table role="presentation" border="0" cellpadding="0" cellspacing="0" class="tblHeader" id="AutoNumber1">
|
13
|
-
<tr>
|
14
|
-
<td>
|
15
|
-
<img src="../images/Dept-Education_Inline_rev-optimal.png" id="Img1" alt="Australian Government - Department of Education" style="margin-left: 7px; margin-top: 7px; margin-bottom: 7px;" height="65" width="261" />
|
16
|
-
<td align="right">
|
17
|
-
<img src="../images/cricos.gif" alt="CRICOS - Commonwealth Register of Institutions and Courses for Overseas Students" style="margin: 0px 0px" width="414" height="75" /></td>
|
18
|
-
</tr>
|
19
|
-
</table>
|
20
|
-
<br />
|
21
|
-
|
22
|
-
<div id="pnlErrorMessage">
|
23
|
-
|
24
|
-
<table role="presentation" style="font-family: verdana; font-size: 12pt">
|
25
|
-
<tr>
|
26
|
-
<td valign="top"><img src="../images/exclaim.gif" alt="exclaim" /></td>
|
27
|
-
<td valign="middle">
|
28
|
-
The Provider ID entered is invalid - please try another.
|
29
|
-
</td>
|
30
|
-
</tr>
|
31
|
-
</table><br/>
|
32
|
-
|
33
|
-
</div>
|
34
|
-
</form>
|
35
|
-
</body>
|
36
|
-
</html>
|
@@ -1,138 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe CricosScrape::InstitutionImporter do
|
4
|
-
|
5
|
-
describe '#run' do
|
6
|
-
let(:agent) { CricosScrape.agent }
|
7
|
-
|
8
|
-
subject(:institution) { CricosScrape::InstitutionImporter.new(agent, provider_id: 1).run }
|
9
|
-
|
10
|
-
before do
|
11
|
-
allow_any_instance_of(CricosScrape::InstitutionImporter).to receive(:url).and_return(uri)
|
12
|
-
courses_list_page = agent.get(institution_details_with_pagination_location_page_1_uri+"?LocationID=456")
|
13
|
-
allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'get-location-id'}).and_return(courses_list_page)
|
14
|
-
end
|
15
|
-
|
16
|
-
context 'when there is no institution found' do
|
17
|
-
let(:uri) { not_found_institution_details_uri }
|
18
|
-
|
19
|
-
it 'does not import' do
|
20
|
-
expect(institution).to be_nil
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
context 'when the response body contains Institution Trading Name' do
|
25
|
-
let(:uri) { institution_details_with_trading_name_uri }
|
26
|
-
|
27
|
-
its(:provider_id) { is_expected.to eq 1 }
|
28
|
-
its(:provider_code) { is_expected.to eq '00873F' }
|
29
|
-
its(:trading_name) { is_expected.to eq 'Australian Catholic University Limited' }
|
30
|
-
its(:name) { is_expected.to eq 'Australian Catholic University Limited' }
|
31
|
-
its(:type) { is_expected.to eq 'Government' }
|
32
|
-
its(:total_capacity) { is_expected.to eq 50 }
|
33
|
-
its(:website) { is_expected.to eq 'www.acu.edu.au' }
|
34
|
-
its(:postal_address) do
|
35
|
-
is_expected.to eq "International Education Office\nPO Box 968\nNORTH SYDNEY\nNew South Wales 2059"
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
context 'when the response body does not contains Address Line 2' do
|
40
|
-
let(:uri) { institution_details_with_po_box_postal_address_uri }
|
41
|
-
|
42
|
-
its(:provider_id) { is_expected.to eq 1 }
|
43
|
-
its(:provider_code) { is_expected.to eq '00780M' }
|
44
|
-
its(:trading_name) { is_expected.to be_nil }
|
45
|
-
its(:name) { is_expected.to eq 'Department of Education' }
|
46
|
-
its(:type) { is_expected.to eq 'Government' }
|
47
|
-
its(:total_capacity) { is_expected.to eq 500 }
|
48
|
-
its(:website) { is_expected.to be_nil }
|
49
|
-
its(:postal_address) do
|
50
|
-
is_expected.to eq "GPO Box 4821\nDARWIN\nNorthern Territory 0801"
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
context 'when the response body contains both Principal Executive Officer and International Student Contact' do
|
55
|
-
let(:uri) { institution_details_without_pagination_location_uri }
|
56
|
-
|
57
|
-
its(:contact_officers) do
|
58
|
-
data = [
|
59
|
-
CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Matthew Green', 'Principal', '0889506400', '0889524607', nil),
|
60
|
-
CricosScrape::ContactOfficer.new('International Student Contact', 'ROCHELLE Marshall', 'Secretary', '0889506400', '0889524607', 'rochelle.marshall@nt.catholic.edu.au')
|
61
|
-
]
|
62
|
-
is_expected.to eq data
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
context 'when the response body only contains Principal Executive Officer' do
|
67
|
-
let(:uri) { institution_details_with_po_box_postal_address_uri }
|
68
|
-
|
69
|
-
its(:contact_officers) do
|
70
|
-
is_expected.to eq [CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Rachael Shanahan', 'Director, Education Services', '0889011336', '0889995788', nil)]
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
context 'when the response body not contains pagination location' do
|
75
|
-
let(:uri) { institution_details_without_pagination_location_uri }
|
76
|
-
|
77
|
-
its(:locations) do
|
78
|
-
locations = [
|
79
|
-
CricosScrape::Location.new("456", 'Bath Street Campus', 'NT', '1'),
|
80
|
-
CricosScrape::Location.new("456", 'Sadadeen Campus', 'NT', '2'),
|
81
|
-
CricosScrape::Location.new("456", 'Traeger Campus', 'NT', '2') ,
|
82
|
-
]
|
83
|
-
is_expected.to eq locations
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
context 'when the response body not contains location details' do
|
88
|
-
let(:uri) { institution_details_without_locations_details_uri }
|
89
|
-
|
90
|
-
its(:locations) do
|
91
|
-
is_expected.to eq nil
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
context 'when the response body contains pagination location' do
|
96
|
-
let(:uri) { institution_details_with_pagination_location_page_1_uri }
|
97
|
-
|
98
|
-
before do
|
99
|
-
# Method jump_to_page don't jump to current page (page 1). with total_pages=2, form will submit once
|
100
|
-
locations_list_page_2 = agent.get(institution_details_with_pagination_location_page_2_uri)
|
101
|
-
allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'change-location-page'}).and_return(locations_list_page_2)
|
102
|
-
end
|
103
|
-
|
104
|
-
its(:locations) do
|
105
|
-
locations = [
|
106
|
-
#Locations on page 1
|
107
|
-
CricosScrape::Location.new("456", "Albury", "NSW", "51"),
|
108
|
-
CricosScrape::Location.new("456", "Bathurst", "NSW", "60"),
|
109
|
-
CricosScrape::Location.new("456", "Canberra Institute of Technology - City Campus", "ACT", "2"),
|
110
|
-
CricosScrape::Location.new("456", "CSU Study Centre Melbourne", "VIC", "22"),
|
111
|
-
CricosScrape::Location.new("456", "CSU Study Centre Sydney", "NSW", "21"),
|
112
|
-
CricosScrape::Location.new("456", "Dubbo", "NSW", "29"),
|
113
|
-
CricosScrape::Location.new("456", "Holmesglen Institute of TAFE", "VIC", "3"),
|
114
|
-
CricosScrape::Location.new("456", "Orange", "NSW", "41"),
|
115
|
-
CricosScrape::Location.new("456", "Ryde", "NSW", "1"),
|
116
|
-
CricosScrape::Location.new("456", "St Marks Theological Centre", "ACT", "12"),
|
117
|
-
|
118
|
-
#Locations on page 2
|
119
|
-
CricosScrape::Location.new("456", "United Theological College", "NSW", "11"),
|
120
|
-
CricosScrape::Location.new("456", "Wagga Wagga", "NSW", "105"),
|
121
|
-
]
|
122
|
-
is_expected.to eq locations
|
123
|
-
end
|
124
|
-
|
125
|
-
context 'when the contact officers contains table grid' do
|
126
|
-
its(:contact_officers) do
|
127
|
-
data = [
|
128
|
-
CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Andrew Vann', 'Vice-Chancellor', '02 6338 4209', '02 6338 4809', nil),
|
129
|
-
CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 63657537', '02 63657590', 'mevans@csu.edu.au'),
|
130
|
-
CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 6365 7537', '02 6365 7590', 'mevans@csu.edu.au')
|
131
|
-
]
|
132
|
-
is_expected.to eq data
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
end
|
data/spec/spec_helper.rb
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
require_relative '../lib/cricos_scrape'
|
2
|
-
require 'rspec/its'
|
3
|
-
|
4
|
-
def institution_details_with_po_box_postal_address_uri
|
5
|
-
file = File.expand_path("../fixtures/institution_details_with_po_box_postal_address.html", __FILE__)
|
6
|
-
"file://#{file}"
|
7
|
-
end
|
8
|
-
|
9
|
-
def institution_details_with_trading_name_uri
|
10
|
-
file = File.expand_path("../fixtures/institution_details_with_trading_name.html", __FILE__)
|
11
|
-
"file://#{file}"
|
12
|
-
end
|
13
|
-
|
14
|
-
def not_found_institution_details_uri
|
15
|
-
file = File.expand_path("../fixtures/not_found_institution_details.html", __FILE__)
|
16
|
-
"file://#{file}"
|
17
|
-
end
|
18
|
-
|
19
|
-
def institution_details_without_pagination_location_uri
|
20
|
-
file = File.expand_path("../fixtures/institution_details_without_pagination_location_uri.html", __FILE__)
|
21
|
-
"file://#{file}"
|
22
|
-
end
|
23
|
-
|
24
|
-
def institution_details_with_pagination_location_page_1_uri
|
25
|
-
file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_1_uri.html", __FILE__)
|
26
|
-
"file://#{file}"
|
27
|
-
end
|
28
|
-
|
29
|
-
def institution_details_with_pagination_location_page_2_uri
|
30
|
-
file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_2_uri.html", __FILE__)
|
31
|
-
"file://#{file}"
|
32
|
-
end
|
33
|
-
|
34
|
-
def institution_details_without_locations_details_uri
|
35
|
-
file = File.expand_path("../fixtures/institution_details_without_locations_details_uri.html", __FILE__)
|
36
|
-
"file://#{file}"
|
37
|
-
end
|
38
|
-
|
39
|
-
def course_details_with_contact_officers_table_grid
|
40
|
-
file = File.expand_path("../fixtures/course_details_with_contact_officers_table_grid.html", __FILE__)
|
41
|
-
"file://#{file}"
|
42
|
-
end
|
43
|
-
|
44
|
-
def courses_list_by_location_id_uri
|
45
|
-
file = File.expand_path("../fixtures/courses_list_by_location_id_uri.html", __FILE__)
|
46
|
-
"file://#{file}"
|
47
|
-
end
|
48
|
-
|
49
|
-
def contact_details_of_state_act_uri
|
50
|
-
file = File.expand_path("../fixtures/contact_details_of_state_act_uri.html", __FILE__)
|
51
|
-
"file://#{file}"
|
52
|
-
end
|
53
|
-
|
54
|
-
def contact_details_of_state_wa_uri
|
55
|
-
file = File.expand_path("../fixtures/contact_details_of_state_wa_uri.html", __FILE__)
|
56
|
-
"file://#{file}"
|
57
|
-
end
|
58
|
-
|
59
|
-
def not_found_course_details_uri
|
60
|
-
file = File.expand_path("../fixtures/not_found_course_details_uri.html", __FILE__)
|
61
|
-
"file://#{file}"
|
62
|
-
end
|
63
|
-
|
64
|
-
def course_details_without_pagination_uri
|
65
|
-
file = File.expand_path("../fixtures/course_details_without_pagination_uri.html", __FILE__)
|
66
|
-
"file://#{file}"
|
67
|
-
end
|