cricos_scrape 2.0 → 2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/bin/cricos_scrape +40 -0
  4. data/cricos_scrape.gemspec +7 -9
  5. data/lib/cricos_scrape/bulk_import_courses.rb +2 -2
  6. data/lib/cricos_scrape/bulk_import_institutions.rb +2 -2
  7. data/lib/cricos_scrape/entities/address.rb +4 -0
  8. data/lib/cricos_scrape/entities/contact.rb +4 -0
  9. data/lib/cricos_scrape/entities/contact_officer.rb +4 -0
  10. data/lib/cricos_scrape/entities/course.rb +4 -0
  11. data/lib/cricos_scrape/entities/institution.rb +4 -0
  12. data/lib/cricos_scrape/entities/location.rb +4 -0
  13. data/lib/cricos_scrape/import_contacts.rb +2 -2
  14. data/lib/cricos_scrape/importer/contact_importer.rb +120 -0
  15. data/lib/cricos_scrape/importer/course_importer.rb +291 -0
  16. data/lib/cricos_scrape/importer/institution_importer.rb +279 -0
  17. data/lib/cricos_scrape/version.rb +1 -1
  18. data/lib/cricos_scrape.rb +4 -5
  19. metadata +16 -44
  20. data/CONTRIBUTING.md +0 -51
  21. data/Gemfile +0 -2
  22. data/Gemfile.lock +0 -64
  23. data/Procfile +0 -3
  24. data/Rakefile +0 -13
  25. data/spec/contact_importer_spec.rb +0 -76
  26. data/spec/course_importer_spec.rb +0 -71
  27. data/spec/fixtures/contact_details_of_state_act_uri.html +0 -546
  28. data/spec/fixtures/contact_details_of_state_wa_uri.html +0 -546
  29. data/spec/fixtures/course_details_with_contact_officers_table_grid.html +0 -467
  30. data/spec/fixtures/course_details_without_pagination_uri.html +0 -470
  31. data/spec/fixtures/courses_list_by_location_id_uri.html +0 -174
  32. data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +0 -406
  33. data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +0 -358
  34. data/spec/fixtures/institution_details_with_po_box_postal_address.html +0 -240
  35. data/spec/fixtures/institution_details_with_trading_name.html +0 -322
  36. data/spec/fixtures/institution_details_without_locations_details_uri.html +0 -151
  37. data/spec/fixtures/institution_details_without_pagination_location_uri.html +0 -299
  38. data/spec/fixtures/not_found_course_details_uri.html +0 -837
  39. data/spec/fixtures/not_found_institution_details.html +0 -36
  40. data/spec/institution_importer_spec.rb +0 -138
  41. data/spec/spec_helper.rb +0 -67
@@ -1,36 +0,0 @@
1
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
- <head><title>
4
- Institution Details
5
- </title><meta name="robots" content="noindex" /><link href="../Common/Styles/Styles.css" rel="stylesheet" type="text/css" /><link href="../App_Themes/Theme1/Theme1.css" type="text/css" rel="stylesheet" /></head>
6
- <body>
7
- <form method="post" action="InstitutionDetailsOnePage.aspx" id="Form1">
8
- <div class="aspNetHidden">
9
- <input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwULLTE1NjUwNjY5NDEPZBYCAgMPZBYCAgMPDxYCHgdWaXNpYmxlaGQWBgIDD2QWBAIBDw8WAh8AaGRkAgUPZBYCZg9kFgQCAQ8PFgIfAGhkZAIDD2QWAgIBDzwrABECARAWABYAFgAMFCsAAGQCBQ9kFgICAg88KwARAgEQFgAWABYADBQrAABkAgcPZBYCAgMPZBYCAgEPPCsAEQIBEBYAFgAWAAwUKwAAZBgDBRxjb3Vyc2VMaXN0JGdyaWRTZWFyY2hSZXN1bHRzDzwrAAwCBhUBCENvdXJzZUlkCAL/////D2QFHmxvY2F0aW9uTGlzdCRncmlkU2VhcmNoUmVzdWx0cw88KwAMAgYVAQpMb2NhdGlvbklkCAL/////D2QFHmNvbnRhY3REZXRhaWxzJGdyaWRJU0NDb250YWN0cw88KwAMAgYVAQtDb250YWN0TmFtZQgC/////w9k" />
10
- </div>
11
-
12
- <table role="presentation" border="0" cellpadding="0" cellspacing="0" class="tblHeader" id="AutoNumber1">
13
- <tr>
14
- <td>
15
- <img src="../images/Dept-Education_Inline_rev-optimal.png" id="Img1" alt="Australian Government - Department of Education" style="margin-left: 7px; margin-top: 7px; margin-bottom: 7px;" height="65" width="261" />
16
- <td align="right">
17
- <img src="../images/cricos.gif" alt="CRICOS - Commonwealth Register of Institutions and Courses for Overseas Students" style="margin: 0px 0px" width="414" height="75" /></td>
18
- </tr>
19
- </table>
20
- <br />
21
-
22
- <div id="pnlErrorMessage">
23
-
24
- <table role="presentation" style="font-family: verdana; font-size: 12pt">
25
- <tr>
26
- <td valign="top"><img src="../images/exclaim.gif" alt="exclaim" /></td>
27
- <td valign="middle">
28
- The Provider ID entered is invalid - please try another.
29
- </td>
30
- </tr>
31
- </table><br/>
32
-
33
- </div>
34
- </form>
35
- </body>
36
- </html>
@@ -1,138 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe CricosScrape::InstitutionImporter do
4
-
5
- describe '#run' do
6
- let(:agent) { CricosScrape.agent }
7
-
8
- subject(:institution) { CricosScrape::InstitutionImporter.new(agent, provider_id: 1).run }
9
-
10
- before do
11
- allow_any_instance_of(CricosScrape::InstitutionImporter).to receive(:url).and_return(uri)
12
- courses_list_page = agent.get(institution_details_with_pagination_location_page_1_uri+"?LocationID=456")
13
- allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'get-location-id'}).and_return(courses_list_page)
14
- end
15
-
16
- context 'when there is no institution found' do
17
- let(:uri) { not_found_institution_details_uri }
18
-
19
- it 'does not import' do
20
- expect(institution).to be_nil
21
- end
22
- end
23
-
24
- context 'when the response body contains Institution Trading Name' do
25
- let(:uri) { institution_details_with_trading_name_uri }
26
-
27
- its(:provider_id) { is_expected.to eq 1 }
28
- its(:provider_code) { is_expected.to eq '00873F' }
29
- its(:trading_name) { is_expected.to eq 'Australian Catholic University Limited' }
30
- its(:name) { is_expected.to eq 'Australian Catholic University Limited' }
31
- its(:type) { is_expected.to eq 'Government' }
32
- its(:total_capacity) { is_expected.to eq 50 }
33
- its(:website) { is_expected.to eq 'www.acu.edu.au' }
34
- its(:postal_address) do
35
- is_expected.to eq "International Education Office\nPO Box 968\nNORTH SYDNEY\nNew South Wales  2059"
36
- end
37
- end
38
-
39
- context 'when the response body does not contains Address Line 2' do
40
- let(:uri) { institution_details_with_po_box_postal_address_uri }
41
-
42
- its(:provider_id) { is_expected.to eq 1 }
43
- its(:provider_code) { is_expected.to eq '00780M' }
44
- its(:trading_name) { is_expected.to be_nil }
45
- its(:name) { is_expected.to eq 'Department of Education' }
46
- its(:type) { is_expected.to eq 'Government' }
47
- its(:total_capacity) { is_expected.to eq 500 }
48
- its(:website) { is_expected.to be_nil }
49
- its(:postal_address) do
50
- is_expected.to eq "GPO Box 4821\nDARWIN\nNorthern Territory  0801"
51
- end
52
- end
53
-
54
- context 'when the response body contains both Principal Executive Officer and International Student Contact' do
55
- let(:uri) { institution_details_without_pagination_location_uri }
56
-
57
- its(:contact_officers) do
58
- data = [
59
- CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Matthew Green', 'Principal', '0889506400', '0889524607', nil),
60
- CricosScrape::ContactOfficer.new('International Student Contact', 'ROCHELLE Marshall', 'Secretary', '0889506400', '0889524607', 'rochelle.marshall@nt.catholic.edu.au')
61
- ]
62
- is_expected.to eq data
63
- end
64
- end
65
-
66
- context 'when the response body only contains Principal Executive Officer' do
67
- let(:uri) { institution_details_with_po_box_postal_address_uri }
68
-
69
- its(:contact_officers) do
70
- is_expected.to eq [CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Rachael Shanahan', 'Director, Education Services', '0889011336', '0889995788', nil)]
71
- end
72
- end
73
-
74
- context 'when the response body not contains pagination location' do
75
- let(:uri) { institution_details_without_pagination_location_uri }
76
-
77
- its(:locations) do
78
- locations = [
79
- CricosScrape::Location.new("456", 'Bath Street Campus', 'NT', '1'),
80
- CricosScrape::Location.new("456", 'Sadadeen Campus', 'NT', '2'),
81
- CricosScrape::Location.new("456", 'Traeger Campus', 'NT', '2') ,
82
- ]
83
- is_expected.to eq locations
84
- end
85
- end
86
-
87
- context 'when the response body not contains location details' do
88
- let(:uri) { institution_details_without_locations_details_uri }
89
-
90
- its(:locations) do
91
- is_expected.to eq nil
92
- end
93
- end
94
-
95
- context 'when the response body contains pagination location' do
96
- let(:uri) { institution_details_with_pagination_location_page_1_uri }
97
-
98
- before do
99
- # Method jump_to_page don't jump to current page (page 1). with total_pages=2, form will submit once
100
- locations_list_page_2 = agent.get(institution_details_with_pagination_location_page_2_uri)
101
- allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'change-location-page'}).and_return(locations_list_page_2)
102
- end
103
-
104
- its(:locations) do
105
- locations = [
106
- #Locations on page 1
107
- CricosScrape::Location.new("456", "Albury", "NSW", "51"),
108
- CricosScrape::Location.new("456", "Bathurst", "NSW", "60"),
109
- CricosScrape::Location.new("456", "Canberra Institute of Technology - City Campus", "ACT", "2"),
110
- CricosScrape::Location.new("456", "CSU Study Centre Melbourne", "VIC", "22"),
111
- CricosScrape::Location.new("456", "CSU Study Centre Sydney", "NSW", "21"),
112
- CricosScrape::Location.new("456", "Dubbo", "NSW", "29"),
113
- CricosScrape::Location.new("456", "Holmesglen Institute of TAFE", "VIC", "3"),
114
- CricosScrape::Location.new("456", "Orange", "NSW", "41"),
115
- CricosScrape::Location.new("456", "Ryde", "NSW", "1"),
116
- CricosScrape::Location.new("456", "St Marks Theological Centre", "ACT", "12"),
117
-
118
- #Locations on page 2
119
- CricosScrape::Location.new("456", "United Theological College", "NSW", "11"),
120
- CricosScrape::Location.new("456", "Wagga Wagga", "NSW", "105"),
121
- ]
122
- is_expected.to eq locations
123
- end
124
-
125
- context 'when the contact officers contains table grid' do
126
- its(:contact_officers) do
127
- data = [
128
- CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Andrew Vann', 'Vice-Chancellor', '02 6338 4209', '02 6338 4809', nil),
129
- CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 63657537', '02 63657590', 'mevans@csu.edu.au'),
130
- CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 6365 7537', '02 6365 7590', 'mevans@csu.edu.au')
131
- ]
132
- is_expected.to eq data
133
- end
134
- end
135
- end
136
- end
137
-
138
- end
data/spec/spec_helper.rb DELETED
@@ -1,67 +0,0 @@
1
- require_relative '../lib/cricos_scrape'
2
- require 'rspec/its'
3
-
4
- def institution_details_with_po_box_postal_address_uri
5
- file = File.expand_path("../fixtures/institution_details_with_po_box_postal_address.html", __FILE__)
6
- "file://#{file}"
7
- end
8
-
9
- def institution_details_with_trading_name_uri
10
- file = File.expand_path("../fixtures/institution_details_with_trading_name.html", __FILE__)
11
- "file://#{file}"
12
- end
13
-
14
- def not_found_institution_details_uri
15
- file = File.expand_path("../fixtures/not_found_institution_details.html", __FILE__)
16
- "file://#{file}"
17
- end
18
-
19
- def institution_details_without_pagination_location_uri
20
- file = File.expand_path("../fixtures/institution_details_without_pagination_location_uri.html", __FILE__)
21
- "file://#{file}"
22
- end
23
-
24
- def institution_details_with_pagination_location_page_1_uri
25
- file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_1_uri.html", __FILE__)
26
- "file://#{file}"
27
- end
28
-
29
- def institution_details_with_pagination_location_page_2_uri
30
- file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_2_uri.html", __FILE__)
31
- "file://#{file}"
32
- end
33
-
34
- def institution_details_without_locations_details_uri
35
- file = File.expand_path("../fixtures/institution_details_without_locations_details_uri.html", __FILE__)
36
- "file://#{file}"
37
- end
38
-
39
- def course_details_with_contact_officers_table_grid
40
- file = File.expand_path("../fixtures/course_details_with_contact_officers_table_grid.html", __FILE__)
41
- "file://#{file}"
42
- end
43
-
44
- def courses_list_by_location_id_uri
45
- file = File.expand_path("../fixtures/courses_list_by_location_id_uri.html", __FILE__)
46
- "file://#{file}"
47
- end
48
-
49
- def contact_details_of_state_act_uri
50
- file = File.expand_path("../fixtures/contact_details_of_state_act_uri.html", __FILE__)
51
- "file://#{file}"
52
- end
53
-
54
- def contact_details_of_state_wa_uri
55
- file = File.expand_path("../fixtures/contact_details_of_state_wa_uri.html", __FILE__)
56
- "file://#{file}"
57
- end
58
-
59
- def not_found_course_details_uri
60
- file = File.expand_path("../fixtures/not_found_course_details_uri.html", __FILE__)
61
- "file://#{file}"
62
- end
63
-
64
- def course_details_without_pagination_uri
65
- file = File.expand_path("../fixtures/course_details_without_pagination_uri.html", __FILE__)
66
- "file://#{file}"
67
- end