cricos_scrape 2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/CONTRIBUTING.md +51 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +64 -0
  5. data/LICENSE.md +22 -0
  6. data/Procfile +3 -0
  7. data/README.md +40 -0
  8. data/Rakefile +13 -0
  9. data/cricos_scrape.gemspec +31 -0
  10. data/lib/cricos_scrape/agent.rb +9 -0
  11. data/lib/cricos_scrape/bulk_import_courses.rb +31 -0
  12. data/lib/cricos_scrape/bulk_import_institutions.rb +31 -0
  13. data/lib/cricos_scrape/import_contacts.rb +22 -0
  14. data/lib/cricos_scrape/json_struct.rb +11 -0
  15. data/lib/cricos_scrape/version.rb +3 -0
  16. data/lib/cricos_scrape.rb +8 -0
  17. data/spec/contact_importer_spec.rb +76 -0
  18. data/spec/course_importer_spec.rb +71 -0
  19. data/spec/fixtures/contact_details_of_state_act_uri.html +546 -0
  20. data/spec/fixtures/contact_details_of_state_wa_uri.html +546 -0
  21. data/spec/fixtures/course_details_with_contact_officers_table_grid.html +467 -0
  22. data/spec/fixtures/course_details_without_pagination_uri.html +470 -0
  23. data/spec/fixtures/courses_list_by_location_id_uri.html +174 -0
  24. data/spec/fixtures/institution_details_with_pagination_location_page_1_uri.html +406 -0
  25. data/spec/fixtures/institution_details_with_pagination_location_page_2_uri.html +358 -0
  26. data/spec/fixtures/institution_details_with_po_box_postal_address.html +240 -0
  27. data/spec/fixtures/institution_details_with_trading_name.html +322 -0
  28. data/spec/fixtures/institution_details_without_locations_details_uri.html +151 -0
  29. data/spec/fixtures/institution_details_without_pagination_location_uri.html +299 -0
  30. data/spec/fixtures/not_found_course_details_uri.html +837 -0
  31. data/spec/fixtures/not_found_institution_details.html +36 -0
  32. data/spec/institution_importer_spec.rb +138 -0
  33. data/spec/spec_helper.rb +67 -0
  34. metadata +190 -0
@@ -0,0 +1,174 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3
+ <head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>
4
+ Courses at Location - ACT - Amaroo School
5
+ </title><meta name="Title" content="The Australian Commonwealth Register of Institutions and Courses for Overseas Students (CRICOS)" /><meta name="Description" content="This is the official Australian Government website that lists all Australian education providers and registered courses for overseas students" /><meta name="Keywords" content="Australian Education providers,Course Search,Institution Search,CRICOS Contacts,Study In Australia,CRICOS, Commonwealth Register of Institutions and Courses for Overseas Students" /><link href="../Common/Styles/Styles.css?v=20130114" rel="stylesheet" type="text/css" /><link type="text/css" href="../Common/Styles/jquery-ui-1.8.9.custom.css" rel="Stylesheet" />
6
+ <script type="text/javascript">
7
+
8
+ var _gaq = _gaq || [];
9
+ _gaq.push(['_setAccount', 'UA-42418309-1']);
10
+ _gaq.push(['_trackPageview']);
11
+
12
+ (function () {
13
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
14
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
15
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
16
+ })();
17
+ </script>
18
+ <link href="../App_Themes/Theme1/Theme1.css" type="text/css" rel="stylesheet" /></head>
19
+ <body>
20
+ <form method="post" action="CourseList.aspx?LocationID=5141" id="aspnetForm">
21
+ <div class="aspNetHidden">
22
+ <input type="hidden" name="__EVENTTARGET" id="__EVENTTARGET" value="" />
23
+ <input type="hidden" name="__EVENTARGUMENT" id="__EVENTARGUMENT" value="" />
24
+ <input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKLTg5MjE0OTU1MQ9kFgJmD2QWAgIDD2QWBAIRD2QWCAIBDxYCHgRUZXh0BSZBQ1QgRWR1Y2F0aW9uIGFuZCBUcmFpbmluZyBEaXJlY3RvcmF0ZWQCAw8WAh8ABT1BQ1QgLSBBbWFyb28gU2Nob29sIC0gTG9jYXRpb24gb3duZWQgYW5kIG9wZXJhdGVkIGJ5IHByb3ZpZGVyZAIFDw8WAh8ABVtUbyBkaXNwbGF5IGEgcGFydGljdWxhciBjb3Vyc2UncyBkZXRhaWxzIHBsZWFzZSBjbGljayBvbiB0aGUgY291cnNlIG5hbWUgaW4gdGhlIGxpc3QgYmVsb3cuZGQCBw88KwARAwAPFgQeC18hRGF0YUJvdW5kZx4LXyFJdGVtQ291bnQCAWQBEBYAFgAWAAwUKwAAFgJmD2QWBgIBDw9kFgIeB29uY2xpY2sFhgFqYXZhc2NyaXB0OmlmIChncmlkc0FjdGl2ZSkgeyBncmlkc0FjdGl2ZT1mYWxzZTsgX19kb1Bvc3RCYWNrKCdjdGwwMCRjcGhEZWZhdWx0UGFnZSRncmlkU2VhcmNoUmVzdWx0cycsJ2NsaWNrLTAnKTsgZ3JpZHNBY3RpdmU9dHJ1ZTsgfRYIZg8PFgIfAAUlU2Vjb25kYXJ5IEp1bmlvciB0byBTZW5pb3IgWWVhcnMgNy0xMmRkAgEPDxYCHwAFGEp1bmlvciBTZWNvbmRhcnkgU3R1ZGllc2RkAgIPDxYCHwAFAzMxMmRkAgMPZBYCZg8VBCVTZWNvbmRhcnkgSnVuaW9yIHRvIFNlbmlvciBZZWFycyA3LTEyGEp1bmlvciBTZWNvbmRhcnkgU3R1ZGllcxhKdW5pb3IgU2Vjb25kYXJ5IFN0dWRpZXMDMzEyZAICDw8WAh4HVmlzaWJsZWhkZAIDDw8WAh8EaGRkAhMPFgIfBGgWBgIBDw8WAh8ABQc0LjEuMC4wZGQCAw8PFgIfAAUKUFJPRFVDVElPTmRkAgUPDxYCHwAFCklQQVBQMDA0UzFkZBgBBSZjdGwwMCRjcGhEZWZhdWx0UGFnZSRncmlkU2VhcmNoUmVzdWx0cw88KwAMAwYVAQhDb3Vyc2VJZAcUKwABFCsAAQKPJQgCAWQ=" />
25
+ </div>
26
+
27
+ <script type="text/javascript">
28
+ //<![CDATA[
29
+ var theForm = document.forms['aspnetForm'];
30
+ if (!theForm) {
31
+ theForm = document.aspnetForm;
32
+ }
33
+ function __doPostBack(eventTarget, eventArgument) {
34
+ if (!theForm.onsubmit || (theForm.onsubmit() != false)) {
35
+ theForm.__EVENTTARGET.value = eventTarget;
36
+ theForm.__EVENTARGUMENT.value = eventArgument;
37
+ theForm.submit();
38
+ }
39
+ }
40
+ //]]>
41
+ </script>
42
+
43
+
44
+ <script src="/WebResource.axd?d=pynGkmcFUV13He1Qd6_TZKXb13Ai-C3xLAuRjAntVKr1ay1w0d8eD1ml3tVl9sUrzWACpQ2&amp;t=634773866700000000" type="text/javascript"></script>
45
+
46
+
47
+ <script src="../Common/ClientScript/jquery-1.9.1.min.js" type="text/javascript"></script>
48
+ <script src="../Common/ClientScript/jquery-ui-1.10.3.min.js" type="text/javascript"></script>
49
+ <script src="../Common/ClientScript/json.js" type="text/javascript"></script>
50
+ <script src="../Common/ClientScript/common.js" type="text/javascript"></script>
51
+ <script type='text/javascript' language='javascript'>
52
+ // <![CDATA[
53
+ var gridsActive=true;
54
+ function HandleGridMouseOver(source)
55
+ {
56
+ if (gridsActive)
57
+ {
58
+ if (typeof gridFromCRICOSSite != 'undefined')
59
+ {
60
+ source.style.backgroundColor='#F1E6E7';
61
+ source.style.cursor='pointer';
62
+
63
+ }
64
+ else
65
+ {
66
+ source.style.backgroundColor='#d9e6ed';
67
+ source.style.cursor='pointer';
68
+ }
69
+ }
70
+ }
71
+ function HandleGridMouseOut(source)
72
+ {
73
+ if (gridsActive)
74
+ {
75
+ source.style.backgroundColor='';
76
+ source.style.cursor='default';
77
+ }
78
+ }
79
+ // ]] >
80
+ </script>
81
+
82
+ <script src="/ScriptResource.axd?d=qph9tUZ6hGPLbkznkRkqTbRNRCEAymCZ5hOwYZYDR0UwCXAqD5zBXrVsKVnk7lTxw1CAcGTrY19qqsiwqouqaoU4FUWPHdMZetetysSPxbVn1QpH0&amp;t=ffffffffdc32f924" type="text/javascript"></script>
83
+ <script src="/ScriptResource.axd?d=TvpD2YGOOsCm1yWcLkKnBYs9LjwccYFLM7eXTjm-Q7kkbkgYcHoslw8QWgDGZiPzvk38rzhhDvz-UVdfBs9h5-2aeU0thnEZnvpVv0QuP5m6tW1S0&amp;t=ffffffffdc32f924" type="text/javascript"></script>
84
+ <script type="text/javascript">
85
+ //<![CDATA[
86
+ Sys.WebForms.PageRequestManager._initialize('ctl00$ScriptManager', 'aspnetForm', [], [], [], 90, 'ctl00');
87
+ //]]>
88
+ </script>
89
+
90
+
91
+ <a id="top"></a>
92
+ <div>
93
+ <a id="skipNav" href="#Content" tabindex="1">Skip to main content</a>
94
+ </div>
95
+ <table role="presentation" border="0" cellpadding="0" cellspacing="0" class="tblHeader" id="AutoNumber1">
96
+ <tr>
97
+ <td>
98
+ <img src="../images/Dept-Education_Inline_rev-optimal.png" alt="Australian Government - Department of Education" style="margin-left: 7px; margin-top: 7px; margin-bottom: 7px;" height="65" width="261" />
99
+ </td>
100
+ <td style="text-align:right;">
101
+ <a href="../default.aspx" style="border: 0">
102
+ <img src="../images/cricos.gif" alt="CRICOS - Commonwealth Register of Institutions and Courses for Overseas Students" style="border-style: none; margin: 0px 0px;" width="414" height="75" /></a>
103
+ </td>
104
+ </tr>
105
+ </table>
106
+ <div id="divMenu">
107
+ <ul>
108
+ <li><a href="../default.aspx">Home</a></li>
109
+ <li><a href="CourseSearch.aspx">Course Search</a></li>
110
+ <li><a href="../Institution/InstitutionSearch.aspx">Institution Search</a></li>
111
+ <li><a href="../Contacts/CRICOSContacts.aspx">CRICOS Contacts</a></li>
112
+ <li><a href="http://studyinaustralia.gov.au" target="_blank">Study In Australia</a></li>
113
+ </ul>
114
+ </div>
115
+
116
+ <div id="Content" tabindex="-1"></div>
117
+
118
+ <div id="contentBody">
119
+
120
+ <script type="text/javascript" language="javascript">
121
+ // <![CDATA[
122
+ var gridFromCRICOSSite = true;
123
+ // ]]>
124
+ </script>
125
+ <h1>ACT Education and Training Directorate</h1>
126
+ <p>Courses at Location: ACT - Amaroo School - Location owned and operated by provider</p>
127
+ <span id="ctl00_cphDefaultPage_lblResultsSummary" class="clsSmallText">To display a particular course's details please click on the course name in the list below.</span>
128
+ <table role="presentation" cellspacing="0" cellpadding="0" width="100%" border="0">
129
+ <tr>
130
+ <td valign="top">
131
+ <div>
132
+ <table class="grid" cellspacing="0" rules="rows" summary="This table shows the courses offered at this location." border="1" id="ctl00_cphDefaultPage_gridSearchResults" style="width:100%;border-collapse:collapse;">
133
+ <caption>
134
+ Course List
135
+ </caption><tr class="gridHeader" align="left">
136
+ <th scope="col"><a href="javascript:__doPostBack(&#39;ctl00$cphDefaultPage$gridSearchResults&#39;,&#39;Sort$CourseName&#39;)">Course Name</a><span style="cursor:default;padding-left:2px;"><img src='/Images/downarrow.png' alt='Sorted by Ascending' title='Sorted by Ascending' /></span></th><th scope="col"><a href="javascript:__doPostBack(&#39;ctl00$cphDefaultPage$gridSearchResults&#39;,&#39;Sort$CourseLevel&#39;)">Level</a></th><th align="right" scope="col"><a href="javascript:__doPostBack(&#39;ctl00$cphDefaultPage$gridSearchResults&#39;,&#39;Sort$CourseDuration&#39;)">Duration(Wks)</a></th><th scope="col">&nbsp;</th>
137
+ </tr><tr class="gridRow" onclick="javascript:if (gridsActive) { gridsActive=false; __doPostBack(&#39;ctl00$cphDefaultPage$gridSearchResults&#39;,&#39;click-0&#39;); gridsActive=true; }">
138
+ <td>Secondary Junior to Senior Years 7-12</td><td>Junior Secondary Studies</td><td align="right">312</td><td class="navCol">
139
+ <a class="readers" href="#" onclick=" return false; " onfocus=" javascript:HandleGridMouseOver(this.parentElement.parentElement); " onblur=" javascript:HandleGridMouseOut(this.parentElement.parentElement); ">Secondary Junior to Senior Years 7-12 - Junior Secondary Studies - Junior Secondary Studies, 312</a>
140
+ </td>
141
+ </tr>
142
+ </table>
143
+ </div>
144
+ </td>
145
+ </tr>
146
+ </table>
147
+
148
+ </div>
149
+ <hr style="border: 0; color: #d7e0f6; background-color: #d7e0f6;" />
150
+ <table role="presentation" border="0" cellpadding="10" cellspacing="0" style="border-collapse: collapse;"
151
+ width="100%" id="Table1">
152
+ <tr>
153
+ <td style="font-size: xx-small; color: #606060; cursor: default; line-height: 11px; text-align: center;">
154
+
155
+ <br />
156
+ <a href="http://education.gov.au" target="_blank">Department of Education</a> |
157
+ <a href="mailto:prisms@education.gov.au" target="_blank">Webmaster</a>
158
+ <br />
159
+ <a href="http://education.gov.au/copyright" target="_blank">Copyright &copy; Commonwealth of Australia</a>&nbsp;|
160
+ <a href="http://education.gov.au/privacy" target="_blank">Department of Education Web Site Privacy Statement</a>&nbsp;|
161
+ <a href="http://education.gov.au/disclaimer" target="_blank">Disclaimer</a>
162
+ </td>
163
+ <td style="text-align: left;">
164
+ <a href="#top">
165
+ <img src="../images/TopOfPage.gif" alt="Top" style="border-style: none;" width="27" height="23" />
166
+ </a>
167
+ </td>
168
+ </tr>
169
+ </table>
170
+ <input name="ctl00$tabState" type="hidden" id="ctl00_tabState" />
171
+ </form>
172
+ <input type="button" id="btnDummy" name="btnDummy" value="Dummy" style="display: none" />
173
+ </body>
174
+ </html>