mechanize_content 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/.rvmrc +1 -0
  2. data/Rakefile +5 -0
  3. data/lib/mechanize_content/image.rb +61 -0
  4. data/lib/mechanize_content/page.rb +115 -0
  5. data/lib/mechanize_content/util.rb +4 -31
  6. data/lib/mechanize_content/version.rb +1 -1
  7. data/lib/mechanize_content.rb +8 -168
  8. data/mechanize_content.gemspec +2 -1
  9. data/spec/cassettes/MechanizeContent.yml +33742 -0
  10. data/spec/cassettes/MechanizeContent_Image.yml +279 -0
  11. data/spec/mechanize_content/image_spec.rb +39 -0
  12. data/spec/mechanize_content/page_spec.rb +90 -0
  13. data/spec/mechanize_content_spec.rb +149 -0
  14. data/spec/spec_helper.rb +11 -1
  15. metadata +29 -45
  16. data/spec/fixtures/a-fistful-of-red-dead-redemption-ps3-for-a-few-dollars-less-on.html +0 -754
  17. data/spec/fixtures/another-world-15th-anniversary-edition-now-on-gog-com.html +0 -2416
  18. data/spec/fixtures/another_world_15th_anniversary_edition.html +0 -805
  19. data/spec/fixtures/cmp.html +0 -333
  20. data/spec/fixtures/episodes_from_liberty_city_now_coming_to_playstation_3_and_pc_this_april.html +0 -1593
  21. data/spec/fixtures/gdc_2010_rounds_off_indie_cove.html +0 -698
  22. data/spec/fixtures/google.html +0 -42
  23. data/spec/fixtures/gta-iv-episodes-from-liberty-city-sees-slight-delay-on-pc-and-ps3.html +0 -1012
  24. data/spec/fixtures/johnny.jpg +0 -0
  25. data/spec/fixtures/joystiq-xbox-usb-support-580.jpg +0 -0
  26. data/spec/fixtures/mutiny.html +0 -264
  27. data/spec/fixtures/nuff-said-good-old-games-gets-another-world-168150.html +0 -5492
  28. data/spec/fixtures/rock-band-3-out-this-holiday-will-revolutionize-genre.html +0 -1157
  29. data/spec/fixtures/rockband_facebook.html +0 -93
  30. data/spec/fixtures/spartan.html +0 -391
  31. data/spec/fixtures/techmeme.html +0 -2216
  32. data/spec/fixtures/time-warner-retail-egm.html +0 -49
  33. data/spec/fixtures/witcher.html +0 -458
  34. data/spec/fixtures/xbox-360-gaining-usb-storage-support-in-2010-update.html +0 -2462
  35. data/spec/mechanize-content_spec.rb +0 -202
Binary file
@@ -1,264 +0,0 @@
1
- HTTP/1.1 200 OK
2
- Date: Fri, 23 Jul 2010 19:25:47 GMT
3
- Server: Apache
4
- Expires: Thu, 19 Nov 1981 08:52:00 GMT
5
- Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
6
- Pragma: no-cache
7
- Set-Cookie: PHPSESSID=39eqn5qsttllq9oj3edv332723kbu3ge; path=/
8
- Vary: Accept-Encoding
9
- Transfer-Encoding: chunked
10
- Content-Type: text/html
11
-
12
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
13
- <html xmlns="http://www.w3.org/1999/xhtml">
14
- <head>
15
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><meta name="description" content="An overview of the HTML base element and how it can be harnessed with server-side scripting to create smoother web applications." /><meta name="author" content="Mutiny Design - Web Design and Development - http://www.mutinydesign.co.uk" /><title>HTML Base Tag | Mutiny Design</title><base href="http://www.mutinydesign.co.uk/" /><link type="text/css" rel="stylesheet" href="http://static.mutinydesign.co.uk/assets/css/main.css" media="all" /><!--[if lte IE 7]><link type="text/css" rel="stylesheet" href="assets/css/ie.css" media="all" /><![endif]--><!--[if IE 6]><link type="text/css" rel="stylesheet" href="assets/css/ie6.css" media="all" /><![endif]--><link type="text/css" rel="stylesheet" href="http://static.mutinydesign.co.uk/assets/css/slimbox.css" media="all" /><script type="text/javascript" src="http://static.mutinydesign.co.uk/assets/js/mootools-core.js"></script><script type="text/javascript" src="http://static.mutinydesign.co.uk/assets/js/site.js"></script><script type="text/javascript" src="http://static.mutinydesign.co.uk/assets/js/lighter/Lighter.js"></script><script type="text/javascript" src="http://static.mutinydesign.co.uk/assets/js/slimbox.js"></script><script type="text/javascript">
16
- var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
17
- document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
18
- </script><script type="text/javascript">
19
- try {
20
- var pageTracker = _gat._getTracker("UA-4488157-4");
21
- pageTracker._trackPageview();
22
- } catch(err) {}</script></head>
23
- <body>
24
- <div id="pg">
25
- <div id="hd">
26
- <div id="hdBar">
27
- <a href="." id="hdLogo">
28
- <img src="http://static.mutinydesign.co.uk/assets/img/bgLogo.png" alt="Welcome to Mutiny Web Design" title="Welcome to Mutiny Web Design" />
29
- </a>
30
- <div id="hdNav">
31
- <div class="navc">
32
- <a href="about/" class="top">
33
- <span class="t">About Us</span>
34
- </a>
35
- <p class="sub">
36
- <a href="contact/">Contact Us</a>
37
- <a href="about/">Mutiny Design</a>
38
- <a href="price-promise/">Price Promise</a>
39
- <a href="quote-guarantee/">Guarantee</a>
40
- <a href="testimonials/">Testimonials</a>
41
- <a href="privacy-policy/">Privacy Policy</a>
42
- </p>
43
- </div>
44
- <div class="navc">
45
- <a href="portfolio/" class="top">
46
- <span class="t">Portfolio</span>
47
- </a>
48
- <p class="sub"></p>
49
- </div>
50
- <div class="navc">
51
- <a class="top">
52
- <span class="t">Resources</span>
53
- </a>
54
- <p class="sub">
55
- <a href="web-design-resources/">Web design resources</a>
56
- <a href="scripts/">Technical resources</a>
57
- <a href="free-scripts/">Free scripts</a>
58
- <a href="seo-tools-and-resources/">SEO resources</a>
59
- <a href="free-stock-images/">Free stock images</a>
60
- </p>
61
- </div>
62
- <div class="navc" id="navQuote">
63
- <a href="quote/" class="top">
64
- <span class="t">Quote Now</span>
65
- </a>
66
- </div>
67
- </div>
68
- </div>
69
- <div id="hdTitle"></div>
70
- </div>
71
- <div id="bd">
72
- <h1 id="titleBar"><span>HTML</span> Base Tag<div class="social"><a target="_blank" rel="nofollow" href="http://www.facebook.com/sharer.php?u=http%3A%2F%2Fwww.mutinydesign.co.uk%2Fscripts%2Fhtml-base-tag---1%2F" class=""><img src="assets/img/ico/facebook.png" /></a><a target="_blank" rel="nofollow" href="http://digg.com/submit?url=http%3A%2F%2Fwww.mutinydesign.co.uk%2Fscripts%2Fhtml-base-tag---1%2F&amp;amp;title=HTML+Base+Tag" class=""><img src="assets/img/ico/digg.png" /></a><a target="_blank" rel="nofollow" href="http://del.icio.us/post?url=http%3A%2F%2Fwww.mutinydesign.co.uk%2Fscripts%2Fhtml-base-tag---1%2F&amp;amp;title=HTML+Base+Tag" class=""><img src="assets/img/ico/delicious.png" /></a><a target="_blank" rel="nofollow" href="http://reddit.com/submit?url=http%3A%2F%2Fwww.mutinydesign.co.uk%2Fscripts%2Fhtml-base-tag---1%2F&amp;amp;title=HTML+Base+Tag" class=""><img src="assets/img/ico/reddit.png" /></a><a target="_blank" rel="nofollow" href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fwww.mutinydesign.co.uk%2Fscripts%2Fhtml-base-tag---1%2F&amp;amp;title=HTML+Base+Tag" class=""><img src="assets/img/ico/stumbleupon.png" /></a><a target="_blank" rel="nofollow" href="http://twitter.com/home?status=HTML+Base+Tag by Mutiny Design: http://is.gd/aVE1W" class=""><img src="assets/img/ico/twitter.png" /></a></div></h1>
73
- <div id="content">
74
- <div id="column_right" class="column col_resources_right">
75
- <div class="box res_articles">
76
- <h2><span>Other</span> articles</h2>
77
- <ul class="folders">
78
- <li>
79
- <a href="http://www.mutinydesign.co.uk/scripts/center-a-web-page-in-css/">Center a web page in CSS »
80
- </a>
81
- </li>
82
- <li>
83
- <a href="http://www.mutinydesign.co.uk/scripts/basics-of-database-optimisation/">Basics of Database optimisation »
84
- </a>
85
- </li>
86
- <li>
87
- <a href="http://www.mutinydesign.co.uk/scripts/how-to-configure-outlook-express-to-receive-emails/">How to configure Outlook Express to receive emails »
88
- </a>
89
- </li>
90
- <li>
91
- <a href="http://www.mutinydesign.co.uk/scripts/how-to-configure-outlook-2007-to-receive-emails/">How to configure Outlook 2007 to receive emails »
92
- </a>
93
- </li>
94
- <li>
95
- <a href="http://www.mutinydesign.co.uk/scripts/html-base-tag---1/">HTML Base Tag »
96
- </a>
97
- </li>
98
- <li>
99
- <a href="http://www.mutinydesign.co.uk/scripts/scaleable-html-and-css-guide-part-1--introduction---4/">Scaleable HTML and CSS Guide, Part 1 - Introduction »
100
- </a>
101
- </li>
102
- <li>
103
- <a href="http://www.mutinydesign.co.uk/scripts/problems-encountered-with-php-dom-functions---3/">Problems Encountered With PHP DOM Functions »
104
- </a>
105
- </li>
106
- <li>
107
- <a href="http://www.mutinydesign.co.uk/scripts/refactoring-code-with-find-xargs-and-sed/">Refactoring code with find, xargs and sed »
108
- </a>
109
- </li>
110
- <li>
111
- <a href="http://www.mutinydesign.co.uk/scripts/what-do-webdesigners-need-from-clients/">What do webdesigners need from clients »
112
- </a>
113
- </li>
114
- <li>
115
- <a href="http://www.mutinydesign.co.uk/scripts/using-any-font-on-a-website/">Using any font on a website »
116
- </a>
117
- </li>
118
- </ul>
119
- </div>
120
- </div>
121
- <div id="column_left" class="column col_resources_left">
122
- <div class="box text">
123
- <div class="content"><p>This is not really much of an extravaganza to get things going, but a few of the more interesting things (scripts) that will be posted here are still in their beta stages.</p>
124
- <p>This first article is about probably the most undervalued of all HTML tags - the <code>base</code> element. The base element could be considered the most obscure HTML tag. It is not even one of the tags people are starting to use now more people are actually hand-coding HTML. This tag is so obscure and underused that several seasoned developers I have asked about the tag had never heard of it and when informed of its existence their first question was always 'Does it work in IE6?' As a long-standing member of the HTML family, support for the <code>base</code> tag is good. We have tested it on IE 3 - 7, Firefox, Opera and Konqueror without any issues. The only time I have ever seen this element used in Microsoft Outlook Web Access.</p>
125
- <p>Anyway, enough of the guff. The base element is a self-closing HTML element that allows HTML documents to have a universal root path for all external media. Below is a brief demonstration on the <code>base</code> tag in use:</p>
126
- <fieldset>
127
- <legend>Code Snippet</legend>
128
- <textarea id="code_snippet_1" class="codepress html" rows="16" style="width: 100%;">
129
- <html>
130
- <head>
131
- <title>Document</title>
132
- <base href="http://www.mutinydesign.co.uk/" />
133
- <link type="text/css" rel="stylesheet" media="screen" href="main.css" />
134
- <script type="text/javascript" src="mootools.js"></script>
135
- </head>
136
- <body>
137
- <a href="scripts/">Scripts</a>
138
- <img src="logo.png" alt="MD Logo" />
139
- </body>
140
- </html>
141
- </textarea>
142
- <p>In the above instance we are setting the base path of external media (href & src) to http://www.mutinydesign.co.uk/. So the css file included will actually be http://www.mutinydesign.co.uk/main.css, the JavaScript file being includes will actually be http://www.mutinydesign.co.uk/mootools.js and the hyperlink will point to http://www.mutinydesign.co.uk/scripts/ and so on.</p>
143
- </fieldset>
144
- <p>Other then the href attribute, the only other attribute supported by the <code>base</code> tag is the target attribute. This is handled the same as the target attribute on a hyperlink. So if you really want to you could force all hyperlinks to open in a new window. The only elements that take precedence over the base element's path are the object and depreciated applet elements.</p>
145
- <h2>A Few Other Notes on the Base Element</h2>
146
- <ul>
147
- <li>It must be includes in the docuemnt's head, although IE6 < allows you to place it anywhere</li>
148
- <li>It support relative paths, e.g. href="../scripts"</li>
149
- <li>All URL's beginging with http:// or other protocols are ignored by base</li>
150
- <li>If you have a bandwith hungry site, using the base element could cut a kilobyte or two of each document</li>
151
- </ul>
152
- <h2>Adding A Bit of Server-side Scripting</h2>
153
- <p>If you are making a small web site there may be little use for the base element, but it can still be beneficial. The real benefit of base comes when you are creating a large site that has a lot external media - particularly if the site uses mod_rewrite or another URL re-writing engine. Anyone who is familiar with URL re-writing will probably know some of the problems you can run into; particularly if you are just slopping the site together. The main problem you will run into is the server being confused as to what directory it is in, so either your server wont be able to find the server-side scripts or it will be trying to access no existent external media (images, JavaScript etc.). You can use some scripting to overcome this, but it tends to be a bit bulky and may cause a few problems in future development. The simplest solution for URL re-writing issues is to use the HTML base element. Simply add the root path of your site in the base element and now you always know what path you access external media - no more ./../../../images/products/chairs/12_43_hfd.jpg! Below is how we structure our globals file for scaleable development using the Don't Repeat Yourself (DRY) principle.</p>
154
- <fieldset>
155
- <legend>Code Snippet</legend>
156
- <textarea id="code_snippet_2" class="codepress php" rows="15" style="width: 100%;">
157
- if( $_SERVER['SERVER_NAME'] != '123.0.0.0' )
158
- define( 'ROOT_PATH', 'http://localhost/re_route/' );
159
- else
160
- define( 'ROOT_PATH', 'http://192.0.0.0:81/' );
161
- define( 'CLASSES_PATH', 'classes/' );
162
- define( 'LANG_PATH', 'lang/' );
163
- define( 'TPL_PATH', 'templates/' );
164
- define( 'JS_PATH', 'js/' );
165
- define( 'CSS_PATH', 'css/' );
166
- define( 'IMAGES_PATH', 'images/' );
167
- define( 'MD_PATH', CLASSES_PATH.'md/' );
168
- define( 'MD_HTMLMIMEMAIL_PATH', CLASSES_PATH.'EMail/' );
169
- </textarea>
170
- <p>In the above instance, we first of all check what server we are on. This saves those pesky 'I forgot to change the globals' problems when you upload your site to the web. Depending on what server we are on a root path is defined, which is written to HTML with a templating engine. We also define the path where all programming and external media is stored, which saves having to go through every file to change paths if you ever need to change paths.</p>
171
- </fieldset>
172
- <h2>Advantages of the Base Element</h2>
173
- <ul>
174
- <li>Provides global access to the root path through HTML</li>
175
- <li>Can change all paths with one edit</li>
176
- <li>Results in reduced HTML</li>
177
- <li>Better facilitates URL re-writing</li>
178
- </ul>
179
- <h2>Disadvantages of the Base Element</h2>
180
- <p>When used in conjunction with Microsoft's Alpha Image Loader in external style sheets you will need to declare the source with the root path. If you use a relative path the image will not be loaded when outside the root, because the Alpha Image Loader ignores the base element's declaration. Cheers Bill!</p></div>
181
- </div>
182
- <a name="comments" id="comments"></a>
183
- <div class="box page_comments">
184
- <h2>Comments</h2>
185
- <div class="comment even">
186
- <p class="author">Roman</p>
187
- <p class="date">13th June, 2009 at 2:45am</p>
188
- <p class="body">The base element supports only absolute paths. This is stated in http://www.w3.org/TR/REC-html40/struct/links.html#edef-BASE and supported by my tests. Using relative paths failed in both Firefox 3.5 beta 4 and IE 8.</p>
189
- </div>
190
- </div>
191
- <div class="box page_leavecomment">
192
- <h2><span>Leave</span> a comment</h2>
193
- <form action="" method="post" id="page_leavecomment">
194
- <div class="npt ">
195
- <input type="text" name="comment_name" id="comment_name" value="" class="text" />
196
- <label for="comment_name">Name:</label>
197
- <div class="clr"></div>
198
- </div>
199
- <div class="npt ">
200
- <input type="text" name="comment_url" id="comment_url" value="" class="text" />
201
- <label for="comment_url">URL:</label>
202
- <div class="clr"></div>
203
- </div>
204
- <div class="txt">
205
- <textarea name="comment"></textarea>
206
- <label for="comment">Comment:</label>
207
- <div class="clr"></div>
208
- </div>
209
- <div class="cpt">
210
- <label for="comment_captcha">Antispam:</label>
211
- <img src="captcha" alt="Antispam code" id="captchaImage" />
212
- <input type="button" value="Generate New Image" class="button" id="reloadCaptcha" />
213
- <p>Enter the text you see to the left</p>
214
- <input type="text" class="text" name="comment_captcha" id="" value="" />
215
- <div class="clr"></div>
216
- </div>
217
- <div class="npt ">
218
- <input type="submit" name="add_comment" id="add_comment" value="Post comment" class="submit" />
219
- <div class="clr"></div>
220
- </div>
221
- </form>
222
- </div>
223
- </div>
224
- <div class="clr"></div>
225
- </div>
226
- </div>
227
- <div id="clrFoot"></div>
228
- </div>
229
- <div id="ft">
230
- <div class="in">
231
- <div id="ftSect">
232
- <p>
233
- <a href="web-design-resources/">Design resources</a>
234
- </p>
235
- <p>
236
- <a href="scripts/">Technical resources</a>
237
- </p>
238
- <p>
239
- <a href="seo-tools-and-resources/">SEO resources</a>
240
- </p>
241
- <p>
242
- <a href="free-scripts/">Free scripts</a>
243
- </p>
244
- </div>
245
- <div id="ftBlog">
246
- <p>update: <a href="web-design-resources/outside-europe/">Outside Europe</a></p>
247
- <p>update: <a href="scripts/center-a-web-page-in-css/">Center a web page in CSS</a></p>
248
- <p>update: <a href="seo-tools-and-resources/submitting-your-web-site-to-css-galleries/">Submitting your web site to CSS galleries</a></p>
249
- <p>update: <a href="free-scripts/using-sifr-font-replacement/">Using sIFR font replacement</a></p>
250
- </div>
251
- <div id="ftOffice">
252
- <p><a href="web-design-shrewsbury/">Web Design Shrewsbury</a><span> telephone</span> 08000 805401</p>
253
- <p><a href="web-design-manchester/">Web Design Manchester</a><span> telephone</span> 0161 7440075</p>
254
- <p><a href="web-design-birmingham/">Web Design Birmingham</a><span> telephone</span> 0121 7750085</p>
255
- <p><a href="web-design-london/">Web Design London</a><span> telephone</span> 0207 1250044</p>
256
- </div>
257
- </div>
258
- <p id="ftCopy"><a href="http://validator.w3.org/check?uri=referer" target="_blank" id="ftValid" rel="nofollow"><img src="http://static.mutinydesign.co.uk/assets/img/btnValid.png" alt="Valid XHTML/CSS" /></a>
259
- © Mutiny Design - Website Design and Development - Network House, Badgers Way, Oxon Business Park, Shrewsbury, Shropshire SY3 5AB
260
- </p>
261
- </div>
262
- <div id="ftBg"></div>
263
- </body>
264
- </html>