cricos_scrape 2.0 → 2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/bin/cricos_scrape +40 -0
  4. data/cricos_scrape.gemspec +7 -9
  5. data/lib/cricos_scrape/bulk_import_courses.rb +2 -2
  6. data/lib/cricos_scrape/bulk_import_institutions.rb +2 -2
  7. data/lib/cricos_scrape/entities/address.rb +4 -0
  8. data/lib/cricos_scrape/entities/contact.rb +4 -0
  9. data/lib/cricos_scrape/entities/contact_officer.rb +4 -0
  10. data/lib/cricos_scrape/entities/course.rb +4 -0
  11. data/lib/cricos_scrape/entities/institution.rb +4 -0
  12. data/lib/cricos_scrape/entities/location.rb +4 -0
  13. data/lib/cricos_scrape/import_contacts.rb +2 -2
  14. data/lib/cricos_scrape/importer/contact_importer.rb +120 -0
  15. data/lib/cricos_scrape/importer/course_importer.rb +291 -0
  16. data/lib/cricos_scrape/importer/institution_importer.rb +279 -0
  17. data/lib/cricos_scrape/version.rb +1 -1
  18. data/lib/cricos_scrape.rb +4 -5
  19. metadata +16 -44
  20. data/CONTRIBUTING.md +0 -51
  21. data/Gemfile +0 -2
  22. data/Gemfile.lock +0 -64
  23. data/Procfile +0 -3
  24. data/Rakefile +0 -13
  25. data/spec/contact_importer_spec.rb +0 -76
  26. data/spec/course_importer_spec.rb +0 -71
  27. data/spec/fixtures/contact_details_of_state_act_uri.html +0 -546
  28. data/spec/fixtures/contact_details_of_state_wa_uri.html +0 -546
  29. data/spec/fixtures/course_details_with_contact_officers_table_grid.html +0 -467
  30. data/spec/fixtures/course_details_without_pagination_uri.html +0 -470
  31. data/spec/fixtures/courses_list_by_location_id_uri.html +0 -174
  32. data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +0 -406
  33. data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +0 -358
  34. data/spec/fixtures/institution_details_with_po_box_postal_address.html +0 -240
  35. data/spec/fixtures/institution_details_with_trading_name.html +0 -322
  36. data/spec/fixtures/institution_details_without_locations_details_uri.html +0 -151
  37. data/spec/fixtures/institution_details_without_pagination_location_uri.html +0 -299
  38. data/spec/fixtures/not_found_course_details_uri.html +0 -837
  39. data/spec/fixtures/not_found_institution_details.html +0 -36
  40. data/spec/institution_importer_spec.rb +0 -138
  41. data/spec/spec_helper.rb +0 -67
@@ -1,36 +0,0 @@
1
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
- <head><title>
4
- Institution Details
5
- </title><meta name="robots" content="noindex" /><link href="../Common/Styles/Styles.css" rel="stylesheet" type="text/css" /><link href="../App_Themes/Theme1/Theme1.css" type="text/css" rel="stylesheet" /></head>
6
- <body>
7
- <form method="post" action="InstitutionDetailsOnePage.aspx" id="Form1">
8
- <div class="aspNetHidden">
9
- <input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwULLTE1NjUwNjY5NDEPZBYCAgMPZBYCAgMPDxYCHgdWaXNpYmxlaGQWBgIDD2QWBAIBDw8WAh8AaGRkAgUPZBYCZg9kFgQCAQ8PFgIfAGhkZAIDD2QWAgIBDzwrABECARAWABYAFgAMFCsAAGQCBQ9kFgICAg88KwARAgEQFgAWABYADBQrAABkAgcPZBYCAgMPZBYCAgEPPCsAEQIBEBYAFgAWAAwUKwAAZBgDBRxjb3Vyc2VMaXN0JGdyaWRTZWFyY2hSZXN1bHRzDzwrAAwCBhUBCENvdXJzZUlkCAL/////D2QFHmxvY2F0aW9uTGlzdCRncmlkU2VhcmNoUmVzdWx0cw88KwAMAgYVAQpMb2NhdGlvbklkCAL/////D2QFHmNvbnRhY3REZXRhaWxzJGdyaWRJU0NDb250YWN0cw88KwAMAgYVAQtDb250YWN0TmFtZQgC/////w9k" />
10
- </div>
11
-
12
- <table role="presentation" border="0" cellpadding="0" cellspacing="0" class="tblHeader" id="AutoNumber1">
13
- <tr>
14
- <td>
15
- <img src="../images/Dept-Education_Inline_rev-optimal.png" id="Img1" alt="Australian Government - Department of Education" style="margin-left: 7px; margin-top: 7px; margin-bottom: 7px;" height="65" width="261" />
16
- <td align="right">
17
- <img src="../images/cricos.gif" alt="CRICOS - Commonwealth Register of Institutions and Courses for Overseas Students" style="margin: 0px 0px" width="414" height="75" /></td>
18
- </tr>
19
- </table>
20
- <br />
21
-
22
- <div id="pnlErrorMessage">
23
-
24
- <table role="presentation" style="font-family: verdana; font-size: 12pt">
25
- <tr>
26
- <td valign="top"><img src="../images/exclaim.gif" alt="exclaim" /></td>
27
- <td valign="middle">
28
- The Provider ID entered is invalid - please try another.
29
- </td>
30
- </tr>
31
- </table><br/>
32
-
33
- </div>
34
- </form>
35
- </body>
36
- </html>
@@ -1,138 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe CricosScrape::InstitutionImporter do
4
-
5
- describe '#run' do
6
- let(:agent) { CricosScrape.agent }
7
-
8
- subject(:institution) { CricosScrape::InstitutionImporter.new(agent, provider_id: 1).run }
9
-
10
- before do
11
- allow_any_instance_of(CricosScrape::InstitutionImporter).to receive(:url).and_return(uri)
12
- courses_list_page = agent.get(institution_details_with_pagination_location_page_1_uri+"?LocationID=456")
13
- allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'get-location-id'}).and_return(courses_list_page)
14
- end
15
-
16
- context 'when there is no institution found' do
17
- let(:uri) { not_found_institution_details_uri }
18
-
19
- it 'does not import' do
20
- expect(institution).to be_nil
21
- end
22
- end
23
-
24
- context 'when the response body contains Institution Trading Name' do
25
- let(:uri) { institution_details_with_trading_name_uri }
26
-
27
- its(:provider_id) { is_expected.to eq 1 }
28
- its(:provider_code) { is_expected.to eq '00873F' }
29
- its(:trading_name) { is_expected.to eq 'Australian Catholic University Limited' }
30
- its(:name) { is_expected.to eq 'Australian Catholic University Limited' }
31
- its(:type) { is_expected.to eq 'Government' }
32
- its(:total_capacity) { is_expected.to eq 50 }
33
- its(:website) { is_expected.to eq 'www.acu.edu.au' }
34
- its(:postal_address) do
35
- is_expected.to eq "International Education Office\nPO Box 968\nNORTH SYDNEY\nNew South Wales  2059"
36
- end
37
- end
38
-
39
- context 'when the response body does not contains Address Line 2' do
40
- let(:uri) { institution_details_with_po_box_postal_address_uri }
41
-
42
- its(:provider_id) { is_expected.to eq 1 }
43
- its(:provider_code) { is_expected.to eq '00780M' }
44
- its(:trading_name) { is_expected.to be_nil }
45
- its(:name) { is_expected.to eq 'Department of Education' }
46
- its(:type) { is_expected.to eq 'Government' }
47
- its(:total_capacity) { is_expected.to eq 500 }
48
- its(:website) { is_expected.to be_nil }
49
- its(:postal_address) do
50
- is_expected.to eq "GPO Box 4821\nDARWIN\nNorthern Territory  0801"
51
- end
52
- end
53
-
54
- context 'when the response body contains both Principal Executive Officer and International Student Contact' do
55
- let(:uri) { institution_details_without_pagination_location_uri }
56
-
57
- its(:contact_officers) do
58
- data = [
59
- CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Matthew Green', 'Principal', '0889506400', '0889524607', nil),
60
- CricosScrape::ContactOfficer.new('International Student Contact', 'ROCHELLE Marshall', 'Secretary', '0889506400', '0889524607', 'rochelle.marshall@nt.catholic.edu.au')
61
- ]
62
- is_expected.to eq data
63
- end
64
- end
65
-
66
- context 'when the response body only contains Principal Executive Officer' do
67
- let(:uri) { institution_details_with_po_box_postal_address_uri }
68
-
69
- its(:contact_officers) do
70
- is_expected.to eq [CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Rachael Shanahan', 'Director, Education Services', '0889011336', '0889995788', nil)]
71
- end
72
- end
73
-
74
- context 'when the response body not contains pagination location' do
75
- let(:uri) { institution_details_without_pagination_location_uri }
76
-
77
- its(:locations) do
78
- locations = [
79
- CricosScrape::Location.new("456", 'Bath Street Campus', 'NT', '1'),
80
- CricosScrape::Location.new("456", 'Sadadeen Campus', 'NT', '2'),
81
- CricosScrape::Location.new("456", 'Traeger Campus', 'NT', '2') ,
82
- ]
83
- is_expected.to eq locations
84
- end
85
- end
86
-
87
- context 'when the response body not contains location details' do
88
- let(:uri) { institution_details_without_locations_details_uri }
89
-
90
- its(:locations) do
91
- is_expected.to eq nil
92
- end
93
- end
94
-
95
- context 'when the response body contains pagination location' do
96
- let(:uri) { institution_details_with_pagination_location_page_1_uri }
97
-
98
- before do
99
- # Method jump_to_page don't jump to current page (page 1). with total_pages=2, form will submit once
100
- locations_list_page_2 = agent.get(institution_details_with_pagination_location_page_2_uri)
101
- allow_any_instance_of(Mechanize::Form).to receive(:submit).with(nil, {'action' => 'change-location-page'}).and_return(locations_list_page_2)
102
- end
103
-
104
- its(:locations) do
105
- locations = [
106
- #Locations on page 1
107
- CricosScrape::Location.new("456", "Albury", "NSW", "51"),
108
- CricosScrape::Location.new("456", "Bathurst", "NSW", "60"),
109
- CricosScrape::Location.new("456", "Canberra Institute of Technology - City Campus", "ACT", "2"),
110
- CricosScrape::Location.new("456", "CSU Study Centre Melbourne", "VIC", "22"),
111
- CricosScrape::Location.new("456", "CSU Study Centre Sydney", "NSW", "21"),
112
- CricosScrape::Location.new("456", "Dubbo", "NSW", "29"),
113
- CricosScrape::Location.new("456", "Holmesglen Institute of TAFE", "VIC", "3"),
114
- CricosScrape::Location.new("456", "Orange", "NSW", "41"),
115
- CricosScrape::Location.new("456", "Ryde", "NSW", "1"),
116
- CricosScrape::Location.new("456", "St Marks Theological Centre", "ACT", "12"),
117
-
118
- #Locations on page 2
119
- CricosScrape::Location.new("456", "United Theological College", "NSW", "11"),
120
- CricosScrape::Location.new("456", "Wagga Wagga", "NSW", "105"),
121
- ]
122
- is_expected.to eq locations
123
- end
124
-
125
- context 'when the contact officers contains table grid' do
126
- its(:contact_officers) do
127
- data = [
128
- CricosScrape::ContactOfficer.new('Principal Executive Officer', 'Andrew Vann', 'Vice-Chancellor', '02 6338 4209', '02 6338 4809', nil),
129
- CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 63657537', '02 63657590', 'mevans@csu.edu.au'),
130
- CricosScrape::ContactOfficer.new('International Student Contact', 'Matthew Evans', nil, '02 6365 7537', '02 6365 7590', 'mevans@csu.edu.au')
131
- ]
132
- is_expected.to eq data
133
- end
134
- end
135
- end
136
- end
137
-
138
- end
data/spec/spec_helper.rb DELETED
@@ -1,67 +0,0 @@
1
- require_relative '../lib/cricos_scrape'
2
- require 'rspec/its'
3
-
4
- def institution_details_with_po_box_postal_address_uri
5
- file = File.expand_path("../fixtures/institution_details_with_po_box_postal_address.html", __FILE__)
6
- "file://#{file}"
7
- end
8
-
9
- def institution_details_with_trading_name_uri
10
- file = File.expand_path("../fixtures/institution_details_with_trading_name.html", __FILE__)
11
- "file://#{file}"
12
- end
13
-
14
- def not_found_institution_details_uri
15
- file = File.expand_path("../fixtures/not_found_institution_details.html", __FILE__)
16
- "file://#{file}"
17
- end
18
-
19
- def institution_details_without_pagination_location_uri
20
- file = File.expand_path("../fixtures/institution_details_without_pagination_location_uri.html", __FILE__)
21
- "file://#{file}"
22
- end
23
-
24
- def institution_details_with_pagination_location_page_1_uri
25
- file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_1_uri.html", __FILE__)
26
- "file://#{file}"
27
- end
28
-
29
- def institution_details_with_pagination_location_page_2_uri
30
- file = File.expand_path("../fixtures/institution_details_with_pagination_location_page_2_uri.html", __FILE__)
31
- "file://#{file}"
32
- end
33
-
34
- def institution_details_without_locations_details_uri
35
- file = File.expand_path("../fixtures/institution_details_without_locations_details_uri.html", __FILE__)
36
- "file://#{file}"
37
- end
38
-
39
- def course_details_with_contact_officers_table_grid
40
- file = File.expand_path("../fixtures/course_details_with_contact_officers_table_grid.html", __FILE__)
41
- "file://#{file}"
42
- end
43
-
44
- def courses_list_by_location_id_uri
45
- file = File.expand_path("../fixtures/courses_list_by_location_id_uri.html", __FILE__)
46
- "file://#{file}"
47
- end
48
-
49
- def contact_details_of_state_act_uri
50
- file = File.expand_path("../fixtures/contact_details_of_state_act_uri.html", __FILE__)
51
- "file://#{file}"
52
- end
53
-
54
- def contact_details_of_state_wa_uri
55
- file = File.expand_path("../fixtures/contact_details_of_state_wa_uri.html", __FILE__)
56
- "file://#{file}"
57
- end
58
-
59
- def not_found_course_details_uri
60
- file = File.expand_path("../fixtures/not_found_course_details_uri.html", __FILE__)
61
- "file://#{file}"
62
- end
63
-
64
- def course_details_without_pagination_uri
65
- file = File.expand_path("../fixtures/course_details_without_pagination_uri.html", __FILE__)
66
- "file://#{file}"
67
- end