cricos_scrape 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTING.md +51 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +64 -0
  5. data/LICENSE.md +22 -0
  6. data/Procfile +3 -0
  7. data/README.md +40 -0
  8. data/Rakefile +13 -0
  9. data/cricos_scrape.gemspec +31 -0
  10. data/lib/cricos_scrape/agent.rb +9 -0
  11. data/lib/cricos_scrape/bulk_import_courses.rb +31 -0
  12. data/lib/cricos_scrape/bulk_import_institutions.rb +31 -0
  13. data/lib/cricos_scrape/import_contacts.rb +22 -0
  14. data/lib/cricos_scrape/json_struct.rb +11 -0
  15. data/lib/cricos_scrape/version.rb +3 -0
  16. data/lib/cricos_scrape.rb +8 -0
  17. data/spec/contact_importer_spec.rb +76 -0
  18. data/spec/course_importer_spec.rb +71 -0
  19. data/spec/fixtures/contact_details_of_state_act_uri.html +546 -0
  20. data/spec/fixtures/contact_details_of_state_wa_uri.html +546 -0
  21. data/spec/fixtures/course_details_with_contact_officers_table_grid.html +467 -0
  22. data/spec/fixtures/course_details_without_pagination_uri.html +470 -0
  23. data/spec/fixtures/courses_list_by_location_id_uri.html +174 -0
  24. data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +406 -0
  25. data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +358 -0
  26. data/spec/fixtures/institution_details_with_po_box_postal_address.html +240 -0
  27. data/spec/fixtures/institution_details_with_trading_name.html +322 -0
  28. data/spec/fixtures/institution_details_without_locations_details_uri.html +151 -0
  29. data/spec/fixtures/institution_details_without_pagination_location_uri.html +299 -0
  30. data/spec/fixtures/not_found_course_details_uri.html +837 -0
  31. data/spec/fixtures/not_found_institution_details.html +36 -0
  32. data/spec/institution_importer_spec.rb +138 -0
  33. data/spec/spec_helper.rb +67 -0
  34. metadata +190 -0
@@ -0,0 +1,174 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
+ <head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>
4
+ Courses at Location - ACT - Amaroo School
5
+ </title><meta name="Title" content="The Australian Commonwealth Register of Institutions and Courses for Overseas Students (CRICOS)" /><meta name="Description" content="This is the official Australian Government website that lists all Australian education providers and registered courses for overseas students" /><meta name="Keywords" content="Australian Education providers,Course Search,Institution Search,CRICOS Contacts,Study In Australia,CRICOS, Commonwealth Register of Institutions and Courses for Overseas Students" /><link href="../Common/Styles/Styles.css?v=20130114" rel="stylesheet" type="text/css" /><link type="text/css" href="../Common/Styles/jquery-ui-1.8.9.custom.css" rel="Stylesheet" />
6
+ <script type="text/javascript">
7
+
8
+ var _gaq = _gaq || [];
9
+ _gaq.push(['_setAccount', 'UA-42418309-1']);
10
+ _gaq.push(['_trackPageview']);
11
+
12
+ (function () {
13
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
14
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
15
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
16
+ })();
17
+ </script>
18
+ <link href="../App_Themes/Theme1/Theme1.css" type="text/css" rel="stylesheet" /></head>
19
+ <body>
20
+ <form method="post" action="CourseList.aspx?LocationID=5141" id="aspnetForm">
21
+ <div class="aspNetHidden">
22
+ <input type="hidden" name="__EVENTTARGET" id="__EVENTTARGET" value="" />
23
+ <input type="hidden" name="__EVENTARGUMENT" id="__EVENTARGUMENT" value="" />
24
+ <input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKLTg5MjE0OTU1MQ9kFgJmD2QWAgIDD2QWBAIRD2QWCAIBDxYCHgRUZXh0BSZBQ1QgRWR1Y2F0aW9uIGFuZCBUcmFpbmluZyBEaXJlY3RvcmF0ZWQCAw8WAh8ABT1BQ1QgLSBBbWFyb28gU2Nob29sIC0gTG9jYXRpb24gb3duZWQgYW5kIG9wZXJhdGVkIGJ5IHByb3ZpZGVyZAIFDw8WAh8ABVtUbyBkaXNwbGF5IGEgcGFydGljdWxhciBjb3Vyc2UncyBkZXRhaWxzIHBsZWFzZSBjbGljayBvbiB0aGUgY291cnNlIG5hbWUgaW4gdGhlIGxpc3QgYmVsb3cuZGQCBw88KwARAwAPFgQeC18hRGF0YUJvdW5kZx4LXyFJdGVtQ291bnQCAWQBEBYAFgAWAAwUKwAAFgJmD2QWBgIBDw9kFgIeB29uY2xpY2sFhgFqYXZhc2NyaXB0OmlmIChncmlkc0FjdGl2ZSkgeyBncmlkc0FjdGl2ZT1mYWxzZTsgX19kb1Bvc3RCYWNrKCdjdGwwMCRjcGhEZWZhdWx0UGFnZSRncmlkU2VhcmNoUmVzdWx0cycsJ2NsaWNrLTAnKTsgZ3JpZHNBY3RpdmU9dHJ1ZTsgfRYIZg8PFgIfAAUlU2Vjb25kYXJ5IEp1bmlvciB0byBTZW5pb3IgWWVhcnMgNy0xMmRkAgEPDxYCHwAFGEp1bmlvciBTZWNvbmRhcnkgU3R1ZGllc2RkAgIPDxYCHwAFAzMxMmRkAgMPZBYCZg8VBCVTZWNvbmRhcnkgSnVuaW9yIHRvIFNlbmlvciBZZWFycyA3LTEyGEp1bmlvciBTZWNvbmRhcnkgU3R1ZGllcxhKdW5pb3IgU2Vjb25kYXJ5IFN0dWRpZXMDMzEyZAICDw8WAh4HVmlzaWJsZWhkZAIDDw8WAh8EaGRkAhMPFgIfBGgWBgIBDw8WAh8ABQc0LjEuMC4wZGQCAw8PFgIfAAUKUFJPRFVDVElPTmRkAgUPDxYCHwAFCklQQVBQMDA0UzFkZBgBBSZjdGwwMCRjcGhEZWZhdWx0UGFnZSRncmlkU2VhcmNoUmVzdWx0cw88KwAMAwYVAQhDb3Vyc2VJZAcUKwABFCsAAQKPJQgCAWQ=" />
25
+ </div>
26
+
27
+ <script type="text/javascript">
28
+ //<![CDATA[
29
+ var theForm = document.forms['aspnetForm'];
30
+ if (!theForm) {
31
+ theForm = document.aspnetForm;
32
+ }
33
+ function __doPostBack(eventTarget, eventArgument) {
34
+ if (!theForm.onsubmit || (theForm.onsubmit() != false)) {
35
+ theForm.__EVENTTARGET.value = eventTarget;
36
+ theForm.__EVENTARGUMENT.value = eventArgument;
37
+ theForm.submit();
38
+ }
39
+ }
40
+ //]]>
41
+ </script>
42
+
43
+
44
+ <script src="/WebResource.axd?d=pynGkmcFUV13He1Qd6_TZKXb13Ai-C3xLAuRjAntVKr1ay1w0d8eD1ml3tVl9sUrzWACpQ2&amp;t=634773866700000000" type="text/javascript"></script>
45
+
46
+
47
+ <script src="../Common/ClientScript/jquery-1.9.1.min.js" type="text/javascript"></script>
48
+ <script src="../Common/ClientScript/jquery-ui-1.10.3.min.js" type="text/javascript"></script>
49
+ <script src="../Common/ClientScript/json.js" type="text/javascript"></script>
50
+ <script src="../Common/ClientScript/common.js" type="text/javascript"></script>
51
+ <script type='text/javascript' language='javascript'>
52
+ // <![CDATA[
53
+ var gridsActive=true;
54
+ function HandleGridMouseOver(source)
55
+ {
56
+ if (gridsActive)
57
+ {
58
+ if (typeof gridFromCRICOSSite != 'undefined')
59
+ {
60
+ source.style.backgroundColor='#F1E6E7';
61
+ source.style.cursor='pointer';
62
+
63
+ }
64
+ else
65
+ {
66
+ source.style.backgroundColor='#d9e6ed';
67
+ source.style.cursor='pointer';
68
+ }
69
+ }
70
+ }
71
+ function HandleGridMouseOut(source)
72
+ {
73
+ if (gridsActive)
74
+ {
75
+ source.style.backgroundColor='';
76
+ source.style.cursor='default';
77
+ }
78
+ }
79
+ // ]] >
80
+ </script>
81
+
82
+ <script src="/ScriptResource.axd?d=qph9tUZ6hGPLbkznkRkqTbRNRCEAymCZ5hOwYZYDR0UwCXAqD5zBXrVsKVnk7lTxw1CAcGTrY19qqsiwqouqaoU4FUWPHdMZetetysSPxbVn1QpH0&amp;t=ffffffffdc32f924" type="text/javascript"></script>
83
+ <script src="/ScriptResource.axd?d=TvpD2YGOOsCm1yWcLkKnBYs9LjwccYFLM7eXTjm-Q7kkbkgYcHoslw8QWgDGZiPzvk38rzhhDvz-UVdfBs9h5-2aeU0thnEZnvpVv0QuP5m6tW1S0&amp;t=ffffffffdc32f924" type="text/javascript"></script>
84
+ <script type="text/javascript">
85
+ //<![CDATA[
86
+ Sys.WebForms.PageRequestManager._initialize('ctl00$ScriptManager', 'aspnetForm', [], [], [], 90, 'ctl00');
87
+ //]]>
88
+ </script>
89
+
90
+
91
+ <a id="top"></a>
92
+ <div>
93
+ <a id="skipNav" href="#Content" tabindex="1">Skip to main content</a>
94
+ </div>
95
+ <table role="presentation" border="0" cellpadding="0" cellspacing="0" class="tblHeader" id="AutoNumber1">
96
+ <tr>
97
+ <td>
98
+ <img src="../images/Dept-Education_Inline_rev-optimal.png" alt="Australian Government - Department of Education" style="margin-left: 7px; margin-top: 7px; margin-bottom: 7px;" height="65" width="261" />
99
+ </td>
100
+ <td style="text-align:right;">
101
+ <a href="../default.aspx" style="border: 0">
102
+ <img src="../images/cricos.gif" alt="CRICOS - Commonwealth Register of Institutions and Courses for Overseas Students" style="border-style: none; margin: 0px 0px;" width="414" height="75" /></a>
103
+ </td>
104
+ </tr>
105
+ </table>
106
+ <div id="divMenu">
107
+ <ul>
108
+ <li><a href="../default.aspx">Home</a></li>
109
+ <li><a href="CourseSearch.aspx">Course Search</a></li>
110
+ <li><a href="../Institution/InstitutionSearch.aspx">Institution Search</a></li>
111
+ <li><a href="../Contacts/CRICOSContacts.aspx">CRICOS Contacts</a></li>
112
+ <li><a href="http://studyinaustralia.gov.au" target="_blank">Study In Australia</a></li>
113
+ </ul>
114
+ </div>
115
+
116
+ <div id="Content" tabindex="-1"></div>
117
+
118
+ <div id="contentBody">
119
+
120
+ <script type="text/javascript" language="javascript">
121
+ // <![CDATA[
122
+ var gridFromCRICOSSite = true;
123
+ // ]]>
124
+ </script>
125
+ <h1>ACT Education and Training Directorate</h1>
126
+ <p>Courses at Location: ACT - Amaroo School - Location owned and operated by provider</p>
127
+ <span id="ctl00_cphDefaultPage_lblResultsSummary" class="clsSmallText">To display a particular course's details please click on the course name in the list below.</span>
128
+ <table role="presentation" cellspacing="0" cellpadding="0" width="100%" border="0">
129
+ <tr>
130
+ <td valign="top">
131
+ <div>
132
+ <table class="grid" cellspacing="0" rules="rows" summary="This table shows the courses offered at this location." border="1" id="ctl00_cphDefaultPage_gridSearchResults" style="width:100%;border-collapse:collapse;">
133
+ <caption>
134
+ Course List
135
+ </caption><tr class="gridHeader" align="left">
136
+ <th scope="col"><a href="javascript:__doPostBack(&#39;ctl00$cphDefaultPage$gridSearchResults&#39;,&#39;Sort$CourseName&#39;)">Course Name</a><span style="cursor:default;padding-left:2px;"><img src='/Images/downarrow.png' alt='Sorted by Ascending' title='Sorted by Ascending' /></span></th><th scope="col"><a href="javascript:__doPostBack(&#39;ctl00$cphDefaultPage$gridSearchResults&#39;,&#39;Sort$CourseLevel&#39;)">Level</a></th><th align="right" scope="col"><a href="javascript:__doPostBack(&#39;ctl00$cphDefaultPage$gridSearchResults&#39;,&#39;Sort$CourseDuration&#39;)">Duration(Wks)</a></th><th scope="col">&nbsp;</th>
137
+ </tr><tr class="gridRow" onclick="javascript:if (gridsActive) { gridsActive=false; __doPostBack(&#39;ctl00$cphDefaultPage$gridSearchResults&#39;,&#39;click-0&#39;); gridsActive=true; }">
138
+ <td>Secondary Junior to Senior Years 7-12</td><td>Junior Secondary Studies</td><td align="right">312</td><td class="navCol">
139
+ <a class="readers" href="#" onclick=" return false; " onfocus=" javascript:HandleGridMouseOver(this.parentElement.parentElement); " onblur=" javascript:HandleGridMouseOut(this.parentElement.parentElement); ">Secondary Junior to Senior Years 7-12 - Junior Secondary Studies - Junior Secondary Studies, 312</a>
140
+ </td>
141
+ </tr>
142
+ </table>
143
+ </div>
144
+ </td>
145
+ </tr>
146
+ </table>
147
+
148
+ </div>
149
+ <hr style="border: 0; color: #d7e0f6; background-color: #d7e0f6;" />
150
+ <table role="presentation" border="0" cellpadding="10" cellspacing="0" style="border-collapse: collapse;"
151
+ width="100%" id="Table1">
152
+ <tr>
153
+ <td style="font-size: xx-small; color: #606060; cursor: default; line-height: 11px; text-align: center;">
154
+
155
+ <br />
156
+ <a href="http://education.gov.au" target="_blank">Department of Education</a> |
157
+ <a href="mailto:prisms@education.gov.au" target="_blank">Webmaster</a>
158
+ <br />
159
+ <a href="http://education.gov.au/copyright" target="_blank">Copyright &copy; Commonwealth of Australia</a>&nbsp;|
160
+ <a href="http://education.gov.au/privacy" target="_blank">Department of Education Web Site Privacy Statement</a>&nbsp;|
161
+ <a href="http://education.gov.au/disclaimer" target="_blank">Disclaimer</a>
162
+ </td>
163
+ <td style="text-align: left;">
164
+ <a href="#top">
165
+ <img src="../images/TopOfPage.gif" alt="Top" style="border-style: none;" width="27" height="23" />
166
+ </a>
167
+ </td>
168
+ </tr>
169
+ </table>
170
+ <input name="ctl00$tabState" type="hidden" id="ctl00_tabState" />
171
+ </form>
172
+ <input type="button" id="btnDummy" name="btnDummy" value="Dummy" style="display: none" />
173
+ </body>
174
+ </html>