youtubescraper 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +20 -0
- data/MIT-LICENSE +20 -0
- data/README +23 -0
- data/lib/youtube/browsescraper.rb +260 -0
- data/lib/youtube/searchresultscraper.rb +263 -0
- data/lib/youtube/searchresultscraper.rb~ +263 -0
- data/lib/youtube/video.rb +62 -0
- data/test/html/dataN_noMsgN.htm +387 -0
- data/test/html/dataY_noMsgY.htm +1507 -0
- data/test/html/scraping_error.html +1503 -0
- data/test/youtube_scraper_test.rb +89 -0
- data/test/youtube_scraper_test.rb~ +92 -0
- metadata +69 -0
@@ -0,0 +1,387 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd">
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
<html>
|
7
|
+
|
8
|
+
<!-- machid: 99 -->
|
9
|
+
<head>
|
10
|
+
|
11
|
+
<title>YouTube - Broadcast Yourself.</title>
|
12
|
+
|
13
|
+
<link rel="stylesheet" href="/css/styles_yts1164775696.css" type="text/css">
|
14
|
+
<link rel="stylesheet" href="/css/base_yts1165878295.css" type="text/css">
|
15
|
+
<link rel="icon" href="/favicon.ico" type="image/x-icon">
|
16
|
+
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
|
17
|
+
|
18
|
+
|
19
|
+
<meta name="keywords" content="video,sharing,camera phone,video phone">
|
20
|
+
|
21
|
+
<link rel="alternate" title="YouTube - [RSS]" href="/rssls">
|
22
|
+
|
23
|
+
<script type="text/javascript" src="/js/ui_yts1164777409.js"></script>
|
24
|
+
<script type="text/javascript" src="/js/AJAX_yts1161839869.js"></script>
|
25
|
+
<script type="text/javascript" src="/js/watch_queue_yts1161839869.js"></script>
|
26
|
+
<script language="javascript" type="text/javascript">
|
27
|
+
onLoadFunctionList = new Array();
|
28
|
+
function performOnLoadFunctions()
|
29
|
+
{
|
30
|
+
for (var i in onLoadFunctionList)
|
31
|
+
{
|
32
|
+
onLoadFunctionList[i]();
|
33
|
+
}
|
34
|
+
}
|
35
|
+
</script>
|
36
|
+
|
37
|
+
<script language="javascript" type="text/javascript"> function _hbLink (a,b) { return false; } </script>
|
38
|
+
|
39
|
+
|
40
|
+
</head>
|
41
|
+
|
42
|
+
|
43
|
+
<body onLoad="performOnLoadFunctions();">
|
44
|
+
|
45
|
+
<div id="baseDiv">
|
46
|
+
<div id="logoTagDiv">
|
47
|
+
<a href="/" name="&lid=Logo&lpos=GlobalNav" title="Home"><img src="/img/pic_youtubelogo_123x63.gif" alt="Home" width="123" height="63" border="0" onmouseover="showDiv('logoHomeTip');" onmouseout="hideDiv('logoHomeTip');" /></a>
|
48
|
+
</div>
|
49
|
+
<div id="logoHomeTip" style="display: none;">
|
50
|
+
Home
|
51
|
+
</div>
|
52
|
+
|
53
|
+
<div id="utilDiv">
|
54
|
+
|
55
|
+
<div style="float:right; margin-top:5px;">
|
56
|
+
<span class="utilDelim">|</span>
|
57
|
+
<a href="/recently_watched" onclick="_hbLink('ViewingHistory','UtilityLinks');">History</a>
|
58
|
+
<span class="utilDelim">|</span>
|
59
|
+
<a href="/watch_queue?all" onclick="_hbLink('QuickList','UtilityLinks');">QuickList</a>
|
60
|
+
(<span id="quicklist_numb"><a href="/watch_queue?all"><script type="text/javascript">var quicklist_count=0;document.write(quicklist_count);</script></a></span>)
|
61
|
+
<span class="utilDelim">|</span>
|
62
|
+
<a href="/t/help_center">Help</a>
|
63
|
+
<span class="utilDelim">|</span>
|
64
|
+
|
65
|
+
<a href="#" onClick="document.logoutForm.submit()">Log Out</a>
|
66
|
+
</div>
|
67
|
+
|
68
|
+
<div class="myAccountContainer" style="margin: 5px 5px 0px 3px;">
|
69
|
+
<a href="/my_account" onclick="_hbLink('MyAccount','UtilityLinks');">My Account</a>
|
70
|
+
|
71
|
+
</div>
|
72
|
+
|
73
|
+
<div id="utilNavLeftContainer">
|
74
|
+
<b>Hello, <a href="/profile?user=rubypythonjava" onclick="_hbLink('ChannelProfile','UtilityLinks');">rubypythonjava</a></b>
|
75
|
+
<a href="/my_messages"><img src="/img/icn_nomail_21x17.gif" valign="bottom" border="0" id="iconMail"></a> (<a class="headerLink" href="/my_messages">0</a>)
|
76
|
+
<span class="utilDelim">|</span>
|
77
|
+
</div>
|
78
|
+
|
79
|
+
<form name="logoutForm" method="post" action="/index">
|
80
|
+
<input type="hidden" name="action_logout" value="1">
|
81
|
+
</form>
|
82
|
+
</div>
|
83
|
+
|
84
|
+
|
85
|
+
<div id="searchDiv">
|
86
|
+
<form name="searchForm" id="searchForm" method="get" action="/results">
|
87
|
+
<input tabindex="10000" type="text" name="search_query" maxlength="128" class="searchField" value="doraemon vs fakfj da">
|
88
|
+
|
89
|
+
<input type="submit" name="search" value="Search">
|
90
|
+
</form>
|
91
|
+
|
92
|
+
</div>
|
93
|
+
|
94
|
+
<div id="gNavDiv">
|
95
|
+
|
96
|
+
|
97
|
+
<div id="upload"><a href="/my_videos_upload"><img src="/img/pic_upload_130x28.gif" width="130" height="28" alt="upload" border="0" /></a></div>
|
98
|
+
|
99
|
+
<div class="tab">
|
100
|
+
<a href="/community"><img src="/img/tab_community_118x28.gif" width="118" height="28" border="0" alt="community" /></a></div>
|
101
|
+
<div class="tab">
|
102
|
+
<a href="/members"><img src="/img/tab_channels_118x28.gif" width="118" height="28" border="0" alt="channels" /></a></div>
|
103
|
+
<div class="tab">
|
104
|
+
<a href="/categories"><img src="/img/tab_categories_118x28.gif" width="118" height="28" border="0" alt="categories" /></a></div>
|
105
|
+
<div class="tab">
|
106
|
+
<a href="/browse?s=mp"><img src="/img/tab_videos_118x28.gif" width="118" height="28" border="0" alt="videos" /></a></div>
|
107
|
+
</div>
|
108
|
+
<!-- end gNavDiv -->
|
109
|
+
<div id="gNavBottom"> </div>
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
<div id="leaderboardAd">
|
123
|
+
<!-- google_ad_section_start -->
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
<!-- begin ad tag -->
|
131
|
+
<script type="text/javascript">
|
132
|
+
ord=Math.random()*10000000000000000 + 1;
|
133
|
+
document.write('<script language="JavaScript" src="http://ad.doubleclick.net/adj/you.results/_default;sz=728x90;kch=1600166264;kbg=FFFFFF;ksearch=doraemon%20vs%20fakfj%20da;kgender=m;kage=26;ord=' + ord + '?" type="text/javascript"><\/script>');
|
134
|
+
</script>
|
135
|
+
<noscript><a
|
136
|
+
href="http://ad.doubleclick.net/jump/you.results/_default;sz=728x90;ord=123456789?" target="_blank"><img
|
137
|
+
src="http://ad.doubleclick.net/ad/you.results/_default;sz=728x90;ord=123456789?" width="728" height="90" border="0" alt=""></a>
|
138
|
+
</noscript>
|
139
|
+
<!-- End ad tag -->
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
</div>
|
145
|
+
|
146
|
+
<div id="sideContent">
|
147
|
+
<div>
|
148
|
+
<!-- google_ad_section_start -->
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
<!-- begin ad tag -->
|
156
|
+
<script type="text/javascript">
|
157
|
+
ord=Math.random()*10000000000000000 + 2;
|
158
|
+
document.write('<script language="JavaScript" src="http://ad.doubleclick.net/adj/you.results/_default;sz=160x600;kch=1187796739;kbg=FFFFFF;ksearch=doraemon%20vs%20fakfj%20da;kgender=m;kage=26;ord=' + ord + '?" type="text/javascript"><\/script>');
|
159
|
+
</script>
|
160
|
+
<noscript><a
|
161
|
+
href="http://ad.doubleclick.net/jump/you.results/_default;sz=160x600;ord=123456789?" target="_blank"><img
|
162
|
+
src="http://ad.doubleclick.net/ad/you.results/_default;sz=160x600;ord=123456789?" width="160" height="600" border="0" alt=""></a>
|
163
|
+
</noscript>
|
164
|
+
<!-- End ad tag -->
|
165
|
+
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
</div>
|
170
|
+
|
171
|
+
<div class="spOffersDiv">
|
172
|
+
<h4 class="label">New on YouTube</h4>
|
173
|
+
<div class="spOffersEntry">
|
174
|
+
Do you know how not to?
|
175
|
+
<a href="/contest/hownotto">Enter for a chance to win</a>!
|
176
|
+
</div>
|
177
|
+
|
178
|
+
<div class="spOffersEntry">
|
179
|
+
There’s a new way play.
|
180
|
+
<a href="/profile?user=wii">Wii from Nintendo</a>.
|
181
|
+
</div>
|
182
|
+
|
183
|
+
<div class="spOffersEntry">
|
184
|
+
Real Drama all the time.
|
185
|
+
<a href="/profile?user=TheBadGirlsClub">Check out the Bad Girls Club</a>.
|
186
|
+
</div>
|
187
|
+
|
188
|
+
<div class="spOffersEntry">
|
189
|
+
The Dark Side of Fame.
|
190
|
+
<a href="/profile?user=FXDirt">Dirt on FX</a>.
|
191
|
+
</div>
|
192
|
+
|
193
|
+
<div class="spOffersEntry">
|
194
|
+
Show Us Your Undeniable Power.
|
195
|
+
<a href="/undeniabletv">Enter for a chance to win</a> a Panasonic Plasma TV.
|
196
|
+
</div>
|
197
|
+
|
198
|
+
</div>
|
199
|
+
</div> <!-- end sideContent -->
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
<div id="mainContent">
|
204
|
+
|
205
|
+
<div id="sectionHeader" class="searchColor">
|
206
|
+
<div class="name">Search</div>
|
207
|
+
<span class="title"> Video <span class="normalText">results for</span>
|
208
|
+
'doraemon vs fakfj da'
|
209
|
+
</span>
|
210
|
+
</div>
|
211
|
+
|
212
|
+
|
213
|
+
<div id="sideNav">
|
214
|
+
<div class="navHead searchColor">Search In</div>
|
215
|
+
<div class="navBody12">
|
216
|
+
<div class="label"><img src="/img/pic_selected_dot_9x9.gif" alt="selected" /> Videos</div>
|
217
|
+
<a href="/results?search_type=search_users&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=0">Channels</a><br/>
|
218
|
+
<a href="/results?search_type=search_groups&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=0">Groups</a><br/>
|
219
|
+
<a href="/results?search_type=search_playlists&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=0">Playlists</a><br/>
|
220
|
+
</div>
|
221
|
+
|
222
|
+
|
223
|
+
<div class="navHead searchColor">Sort By</div>
|
224
|
+
<div class="navBody11">
|
225
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=relevance&search_category=0">Relevance</a><br/>
|
226
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=video_date_uploaded&search_category=0">Date Added</a><br/>
|
227
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=video_view_count&search_category=0">View Count</a><br/>
|
228
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=video_avg_rating&search_category=0">Rating</a><br/>
|
229
|
+
</div>
|
230
|
+
|
231
|
+
|
232
|
+
<div class="navHead searchColor">Refine by Category</div>
|
233
|
+
<div class="navBody11">
|
234
|
+
<div class="label"><img src="/img/pic_selected_dot_9x9.gif" alt="selected" /> All</div>
|
235
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=1">Arts & Animation</a><br/>
|
236
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=2">Autos & Vehicles</a><br/>
|
237
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=23">Comedy</a><br/>
|
238
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=24">Entertainment</a><br/>
|
239
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=10">Music</a><br/>
|
240
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=25">News & Blogs</a><br/>
|
241
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=22">People</a><br/>
|
242
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=15">Pets & Animals</a><br/>
|
243
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=26">Science & Technology</a><br/>
|
244
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=17">Sports</a><br/>
|
245
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=19">Travel & Places</a><br/>
|
246
|
+
<a href="/results?search_type=search_videos&search_query=doraemon%20vs%20fakfj%20da&search_sort=&search_category=20">Video Games</a><br/>
|
247
|
+
</div>
|
248
|
+
|
249
|
+
|
250
|
+
<div id="bottomAdDiv" style="text-align: left;">
|
251
|
+
<a href="/wishcast"><img src="/img/ad_cokewishcast_120x90.jpg" width="120" height="90" border="0" alt="Send a Holiday Wishcast"></a>
|
252
|
+
</div>
|
253
|
+
|
254
|
+
</div> <!-- end sideNav -->
|
255
|
+
|
256
|
+
|
257
|
+
|
258
|
+
|
259
|
+
<div id="mainContentWithNav">
|
260
|
+
|
261
|
+
|
262
|
+
<div class="footerBox">
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
|
267
|
+
|
268
|
+
</div>
|
269
|
+
|
270
|
+
</div> <!-- end mainContentWithNav -->
|
271
|
+
</div> <!-- end mainContent -->
|
272
|
+
|
273
|
+
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
|
280
|
+
|
281
|
+
|
282
|
+
|
283
|
+
|
284
|
+
|
285
|
+
|
286
|
+
|
287
|
+
|
288
|
+
|
289
|
+
|
290
|
+
|
291
|
+
<div class="spacer"> </div>
|
292
|
+
<div id="footerDiv">
|
293
|
+
<div id="footerContent">
|
294
|
+
<div id="footerSearch">
|
295
|
+
<form name="searchFormFooter" id="searchFormFooter" method="get" action="/results">
|
296
|
+
<input type="text" name="search_query" maxlength="128" class="searchField" value="">
|
297
|
+
|
298
|
+
<input type="submit" name="search" value="Search">
|
299
|
+
</form>
|
300
|
+
</div> <!-- end footerSearch -->
|
301
|
+
|
302
|
+
|
303
|
+
<div id="footerLinks">
|
304
|
+
|
305
|
+
<table border="0" cellpadding="0" cellspacing="0" width="100%" align="center"><tr valign="top">
|
306
|
+
|
307
|
+
<td>
|
308
|
+
<div class="footColumnLeft">
|
309
|
+
<div class="footLabel">Your Account</div>
|
310
|
+
<div class="footValues">
|
311
|
+
<div class="column">
|
312
|
+
<a href="/my_videos">Videos</a><br/>
|
313
|
+
<a href="/my_favorites">Favorites</a><br/>
|
314
|
+
</div>
|
315
|
+
<div class="column">
|
316
|
+
<a href="/my_playlists">Playlists</a><br/>
|
317
|
+
<a href="/my_messages">Inbox</a><br/>
|
318
|
+
</div>
|
319
|
+
<div class="column">
|
320
|
+
<a href="/subscription_center">Subscriptions</a><br/>
|
321
|
+
<a href="/my_account">more...</a><br/>
|
322
|
+
</div>
|
323
|
+
</div>
|
324
|
+
</div>
|
325
|
+
</td>
|
326
|
+
|
327
|
+
<td>
|
328
|
+
<div class="footColumnMid">
|
329
|
+
<div class="footLabel">Help & Info</div>
|
330
|
+
<div class="footValues">
|
331
|
+
<div class="column">
|
332
|
+
<a href="/t/help_center">Help Center</a><br/>
|
333
|
+
<a href="/t/video_toolbox">Video Toolbox</a><br/>
|
334
|
+
</div>
|
335
|
+
<div class="column">
|
336
|
+
<a href="/dev">Developer APIs</a><br/>
|
337
|
+
<a href="/t/safety">Safety Tips</a><br/>
|
338
|
+
</div>
|
339
|
+
<div class="column">
|
340
|
+
<a href="/t/dmca_policy">Copyright FAQ</a><br/>
|
341
|
+
<a href="/t/community_guidelines">Code of Conduct</a><br/>
|
342
|
+
</div>
|
343
|
+
</div>
|
344
|
+
</div>
|
345
|
+
</td>
|
346
|
+
|
347
|
+
|
348
|
+
<td>
|
349
|
+
<div class="footColumnRight">
|
350
|
+
<div class="footLabel">YouTube</div>
|
351
|
+
<div class="footValues">
|
352
|
+
<div class="column">
|
353
|
+
<a href="/t/about">Company Info</a><br/>
|
354
|
+
<a href="/testtube">Test Tube</a><br/>
|
355
|
+
</div>
|
356
|
+
<div class="column">
|
357
|
+
<a href="/t/terms">Terms of Use</a><br/>
|
358
|
+
<a href="/t/privacy">Privacy Policy</a><br/>
|
359
|
+
</div>
|
360
|
+
<div class="column">
|
361
|
+
<a href="/advertise">Advertising</a><br/>
|
362
|
+
<a href="/contact">Contact</a><br/>
|
363
|
+
</div>
|
364
|
+
<div class="column">
|
365
|
+
<a href="/press_room">Press</a><br/>
|
366
|
+
<a href="http://www.pcrecruiter.net/pcrbin/regmenu.exe?uid=youtube.youtube">Jobs</a><br/>
|
367
|
+
</div>
|
368
|
+
</div>
|
369
|
+
</div>
|
370
|
+
</td>
|
371
|
+
|
372
|
+
</tr></table>
|
373
|
+
|
374
|
+
</div> <!-- end footerLinks -->
|
375
|
+
</div> <!-- end footerContent -->
|
376
|
+
|
377
|
+
|
378
|
+
<div id="footerCopyright">
|
379
|
+
Copyright © 2006 YouTube, Inc.
|
380
|
+
</div> <!-- end footerCopyright -->
|
381
|
+
|
382
|
+
</div> <!-- end footerDiv -->
|
383
|
+
|
384
|
+
</div> <!-- end baseDiv -->
|
385
|
+
</body>
|
386
|
+
|
387
|
+
</html>
|