ruby-youtube-dl 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +1 -0
- data/bin/ruby-youtube-dl +3 -0
- data/bin/youtube-dl.py +4055 -0
- data/lib/ruby-youtube-dl.rb +5 -0
- data/lib/ruby-youtube-dl/downloader.rb +11 -0
- data/lib/ruby-youtube-dl/version.rb +3 -0
- data/ruby-youtube-dl.gemspec +24 -0
- metadata +77 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/ruby-youtube-dl
ADDED
data/bin/youtube-dl.py
ADDED
@@ -0,0 +1,4055 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
__author__ = (
|
5
|
+
'Ricardo Garcia Gonzalez',
|
6
|
+
'Danny Colligan',
|
7
|
+
'Benjamin Johnson',
|
8
|
+
'Vasyl\' Vavrychuk',
|
9
|
+
'Witold Baryluk',
|
10
|
+
'Paweł Paprota',
|
11
|
+
'Gergely Imreh',
|
12
|
+
'Rogério Brito',
|
13
|
+
'Philipp Hagemeister',
|
14
|
+
'Sören Schulze',
|
15
|
+
)
|
16
|
+
|
17
|
+
__license__ = 'Public Domain'
|
18
|
+
__version__ = '2011.10.19'
|
19
|
+
|
20
|
+
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
|
21
|
+
|
22
|
+
import cookielib
|
23
|
+
import datetime
|
24
|
+
import gzip
|
25
|
+
import htmlentitydefs
|
26
|
+
import HTMLParser
|
27
|
+
import httplib
|
28
|
+
import locale
|
29
|
+
import math
|
30
|
+
import netrc
|
31
|
+
import os
|
32
|
+
import os.path
|
33
|
+
import re
|
34
|
+
import socket
|
35
|
+
import string
|
36
|
+
import subprocess
|
37
|
+
import sys
|
38
|
+
import time
|
39
|
+
import urllib
|
40
|
+
import urllib2
|
41
|
+
import warnings
|
42
|
+
import zlib
|
43
|
+
|
44
|
+
if os.name == 'nt':
|
45
|
+
import ctypes
|
46
|
+
|
47
|
+
try:
|
48
|
+
import email.utils
|
49
|
+
except ImportError: # Python 2.4
|
50
|
+
import email.Utils
|
51
|
+
try:
|
52
|
+
import cStringIO as StringIO
|
53
|
+
except ImportError:
|
54
|
+
import StringIO
|
55
|
+
|
56
|
+
# parse_qs was moved from the cgi module to the urlparse module recently.
|
57
|
+
try:
|
58
|
+
from urlparse import parse_qs
|
59
|
+
except ImportError:
|
60
|
+
from cgi import parse_qs
|
61
|
+
|
62
|
+
try:
|
63
|
+
import lxml.etree
|
64
|
+
except ImportError:
|
65
|
+
pass # Handled below
|
66
|
+
|
67
|
+
try:
|
68
|
+
import xml.etree.ElementTree
|
69
|
+
except ImportError: # Python<2.5: Not officially supported, but let it slip
|
70
|
+
warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
|
71
|
+
|
72
|
+
std_headers = {
|
73
|
+
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
|
74
|
+
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
75
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
76
|
+
'Accept-Encoding': 'gzip, deflate',
|
77
|
+
'Accept-Language': 'en-us,en;q=0.5',
|
78
|
+
}
|
79
|
+
|
80
|
+
simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
|
81
|
+
|
82
|
+
try:
|
83
|
+
import json
|
84
|
+
except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
|
85
|
+
import re
|
86
|
+
class json(object):
|
87
|
+
@staticmethod
|
88
|
+
def loads(s):
|
89
|
+
s = s.decode('UTF-8')
|
90
|
+
def raiseError(msg, i):
|
91
|
+
raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
|
92
|
+
def skipSpace(i, expectMore=True):
|
93
|
+
while i < len(s) and s[i] in ' \t\r\n':
|
94
|
+
i += 1
|
95
|
+
if expectMore:
|
96
|
+
if i >= len(s):
|
97
|
+
raiseError('Premature end', i)
|
98
|
+
return i
|
99
|
+
def decodeEscape(match):
|
100
|
+
esc = match.group(1)
|
101
|
+
_STATIC = {
|
102
|
+
'"': '"',
|
103
|
+
'\\': '\\',
|
104
|
+
'/': '/',
|
105
|
+
'b': unichr(0x8),
|
106
|
+
'f': unichr(0xc),
|
107
|
+
'n': '\n',
|
108
|
+
'r': '\r',
|
109
|
+
't': '\t',
|
110
|
+
}
|
111
|
+
if esc in _STATIC:
|
112
|
+
return _STATIC[esc]
|
113
|
+
if esc[0] == 'u':
|
114
|
+
if len(esc) == 1+4:
|
115
|
+
return unichr(int(esc[1:5], 16))
|
116
|
+
if len(esc) == 5+6 and esc[5:7] == '\\u':
|
117
|
+
hi = int(esc[1:5], 16)
|
118
|
+
low = int(esc[7:11], 16)
|
119
|
+
return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
|
120
|
+
raise ValueError('Unknown escape ' + str(esc))
|
121
|
+
def parseString(i):
|
122
|
+
i += 1
|
123
|
+
e = i
|
124
|
+
while True:
|
125
|
+
e = s.index('"', e)
|
126
|
+
bslashes = 0
|
127
|
+
while s[e-bslashes-1] == '\\':
|
128
|
+
bslashes += 1
|
129
|
+
if bslashes % 2 == 1:
|
130
|
+
e += 1
|
131
|
+
continue
|
132
|
+
break
|
133
|
+
rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
|
134
|
+
stri = rexp.sub(decodeEscape, s[i:e])
|
135
|
+
return (e+1,stri)
|
136
|
+
def parseObj(i):
|
137
|
+
i += 1
|
138
|
+
res = {}
|
139
|
+
i = skipSpace(i)
|
140
|
+
if s[i] == '}': # Empty dictionary
|
141
|
+
return (i+1,res)
|
142
|
+
while True:
|
143
|
+
if s[i] != '"':
|
144
|
+
raiseError('Expected a string object key', i)
|
145
|
+
i,key = parseString(i)
|
146
|
+
i = skipSpace(i)
|
147
|
+
if i >= len(s) or s[i] != ':':
|
148
|
+
raiseError('Expected a colon', i)
|
149
|
+
i,val = parse(i+1)
|
150
|
+
res[key] = val
|
151
|
+
i = skipSpace(i)
|
152
|
+
if s[i] == '}':
|
153
|
+
return (i+1, res)
|
154
|
+
if s[i] != ',':
|
155
|
+
raiseError('Expected comma or closing curly brace', i)
|
156
|
+
i = skipSpace(i+1)
|
157
|
+
def parseArray(i):
|
158
|
+
res = []
|
159
|
+
i = skipSpace(i+1)
|
160
|
+
if s[i] == ']': # Empty array
|
161
|
+
return (i+1,res)
|
162
|
+
while True:
|
163
|
+
i,val = parse(i)
|
164
|
+
res.append(val)
|
165
|
+
i = skipSpace(i) # Raise exception if premature end
|
166
|
+
if s[i] == ']':
|
167
|
+
return (i+1, res)
|
168
|
+
if s[i] != ',':
|
169
|
+
raiseError('Expected a comma or closing bracket', i)
|
170
|
+
i = skipSpace(i+1)
|
171
|
+
def parseDiscrete(i):
|
172
|
+
for k,v in {'true': True, 'false': False, 'null': None}.items():
|
173
|
+
if s.startswith(k, i):
|
174
|
+
return (i+len(k), v)
|
175
|
+
raiseError('Not a boolean (or null)', i)
|
176
|
+
def parseNumber(i):
|
177
|
+
mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
|
178
|
+
if mobj is None:
|
179
|
+
raiseError('Not a number', i)
|
180
|
+
nums = mobj.group(1)
|
181
|
+
if '.' in nums or 'e' in nums or 'E' in nums:
|
182
|
+
return (i+len(nums), float(nums))
|
183
|
+
return (i+len(nums), int(nums))
|
184
|
+
CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
|
185
|
+
def parse(i):
|
186
|
+
i = skipSpace(i)
|
187
|
+
i,res = CHARMAP.get(s[i], parseNumber)(i)
|
188
|
+
i = skipSpace(i, False)
|
189
|
+
return (i,res)
|
190
|
+
i,res = parse(0)
|
191
|
+
if i < len(s):
|
192
|
+
raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
|
193
|
+
return res
|
194
|
+
|
195
|
+
def preferredencoding():
|
196
|
+
"""Get preferred encoding.
|
197
|
+
|
198
|
+
Returns the best encoding scheme for the system, based on
|
199
|
+
locale.getpreferredencoding() and some further tweaks.
|
200
|
+
"""
|
201
|
+
def yield_preferredencoding():
|
202
|
+
try:
|
203
|
+
pref = locale.getpreferredencoding()
|
204
|
+
u'TEST'.encode(pref)
|
205
|
+
except:
|
206
|
+
pref = 'UTF-8'
|
207
|
+
while True:
|
208
|
+
yield pref
|
209
|
+
return yield_preferredencoding().next()
|
210
|
+
|
211
|
+
|
212
|
+
def htmlentity_transform(matchobj):
|
213
|
+
"""Transforms an HTML entity to a Unicode character.
|
214
|
+
|
215
|
+
This function receives a match object and is intended to be used with
|
216
|
+
the re.sub() function.
|
217
|
+
"""
|
218
|
+
entity = matchobj.group(1)
|
219
|
+
|
220
|
+
# Known non-numeric HTML entity
|
221
|
+
if entity in htmlentitydefs.name2codepoint:
|
222
|
+
return unichr(htmlentitydefs.name2codepoint[entity])
|
223
|
+
|
224
|
+
# Unicode character
|
225
|
+
mobj = re.match(ur'(?u)#(x?\d+)', entity)
|
226
|
+
if mobj is not None:
|
227
|
+
numstr = mobj.group(1)
|
228
|
+
if numstr.startswith(u'x'):
|
229
|
+
base = 16
|
230
|
+
numstr = u'0%s' % numstr
|
231
|
+
else:
|
232
|
+
base = 10
|
233
|
+
return unichr(long(numstr, base))
|
234
|
+
|
235
|
+
# Unknown entity in name, return its literal representation
|
236
|
+
return (u'&%s;' % entity)
|
237
|
+
|
238
|
+
|
239
|
+
def sanitize_title(utitle):
|
240
|
+
"""Sanitizes a video title so it could be used as part of a filename."""
|
241
|
+
utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
|
242
|
+
return utitle.replace(unicode(os.sep), u'%')
|
243
|
+
|
244
|
+
|
245
|
+
def sanitize_open(filename, open_mode):
|
246
|
+
"""Try to open the given filename, and slightly tweak it if this fails.
|
247
|
+
|
248
|
+
Attempts to open the given filename. If this fails, it tries to change
|
249
|
+
the filename slightly, step by step, until it's either able to open it
|
250
|
+
or it fails and raises a final exception, like the standard open()
|
251
|
+
function.
|
252
|
+
|
253
|
+
It returns the tuple (stream, definitive_file_name).
|
254
|
+
"""
|
255
|
+
try:
|
256
|
+
if filename == u'-':
|
257
|
+
if sys.platform == 'win32':
|
258
|
+
import msvcrt
|
259
|
+
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
260
|
+
return (sys.stdout, filename)
|
261
|
+
stream = open(filename, open_mode)
|
262
|
+
return (stream, filename)
|
263
|
+
except (IOError, OSError), err:
|
264
|
+
# In case of error, try to remove win32 forbidden chars
|
265
|
+
filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
|
266
|
+
|
267
|
+
# An exception here should be caught in the caller
|
268
|
+
stream = open(filename, open_mode)
|
269
|
+
return (stream, filename)
|
270
|
+
|
271
|
+
|
272
|
+
def timeconvert(timestr):
|
273
|
+
"""Convert RFC 2822 defined time string into system timestamp"""
|
274
|
+
timestamp = None
|
275
|
+
timetuple = email.utils.parsedate_tz(timestr)
|
276
|
+
if timetuple is not None:
|
277
|
+
timestamp = email.utils.mktime_tz(timetuple)
|
278
|
+
return timestamp
|
279
|
+
|
280
|
+
|
281
|
+
class DownloadError(Exception):
|
282
|
+
"""Download Error exception.
|
283
|
+
|
284
|
+
This exception may be thrown by FileDownloader objects if they are not
|
285
|
+
configured to continue on errors. They will contain the appropriate
|
286
|
+
error message.
|
287
|
+
"""
|
288
|
+
pass
|
289
|
+
|
290
|
+
|
291
|
+
class SameFileError(Exception):
|
292
|
+
"""Same File exception.
|
293
|
+
|
294
|
+
This exception will be thrown by FileDownloader objects if they detect
|
295
|
+
multiple files would have to be downloaded to the same file on disk.
|
296
|
+
"""
|
297
|
+
pass
|
298
|
+
|
299
|
+
|
300
|
+
class PostProcessingError(Exception):
|
301
|
+
"""Post Processing exception.
|
302
|
+
|
303
|
+
This exception may be raised by PostProcessor's .run() method to
|
304
|
+
indicate an error in the postprocessing task.
|
305
|
+
"""
|
306
|
+
pass
|
307
|
+
|
308
|
+
|
309
|
+
class UnavailableVideoError(Exception):
|
310
|
+
"""Unavailable Format exception.
|
311
|
+
|
312
|
+
This exception will be thrown when a video is requested
|
313
|
+
in a format that is not available for that video.
|
314
|
+
"""
|
315
|
+
pass
|
316
|
+
|
317
|
+
|
318
|
+
class ContentTooShortError(Exception):
|
319
|
+
"""Content Too Short exception.
|
320
|
+
|
321
|
+
This exception may be raised by FileDownloader objects when a file they
|
322
|
+
download is too small for what the server announced first, indicating
|
323
|
+
the connection was probably interrupted.
|
324
|
+
"""
|
325
|
+
# Both in bytes
|
326
|
+
downloaded = None
|
327
|
+
expected = None
|
328
|
+
|
329
|
+
def __init__(self, downloaded, expected):
|
330
|
+
self.downloaded = downloaded
|
331
|
+
self.expected = expected
|
332
|
+
|
333
|
+
|
334
|
+
class YoutubeDLHandler(urllib2.HTTPHandler):
|
335
|
+
"""Handler for HTTP requests and responses.
|
336
|
+
|
337
|
+
This class, when installed with an OpenerDirector, automatically adds
|
338
|
+
the standard headers to every HTTP request and handles gzipped and
|
339
|
+
deflated responses from web servers. If compression is to be avoided in
|
340
|
+
a particular request, the original request in the program code only has
|
341
|
+
to include the HTTP header "Youtubedl-No-Compression", which will be
|
342
|
+
removed before making the real request.
|
343
|
+
|
344
|
+
Part of this code was copied from:
|
345
|
+
|
346
|
+
http://techknack.net/python-urllib2-handlers/
|
347
|
+
|
348
|
+
Andrew Rowls, the author of that code, agreed to release it to the
|
349
|
+
public domain.
|
350
|
+
"""
|
351
|
+
|
352
|
+
@staticmethod
|
353
|
+
def deflate(data):
|
354
|
+
try:
|
355
|
+
return zlib.decompress(data, -zlib.MAX_WBITS)
|
356
|
+
except zlib.error:
|
357
|
+
return zlib.decompress(data)
|
358
|
+
|
359
|
+
@staticmethod
|
360
|
+
def addinfourl_wrapper(stream, headers, url, code):
|
361
|
+
if hasattr(urllib2.addinfourl, 'getcode'):
|
362
|
+
return urllib2.addinfourl(stream, headers, url, code)
|
363
|
+
ret = urllib2.addinfourl(stream, headers, url)
|
364
|
+
ret.code = code
|
365
|
+
return ret
|
366
|
+
|
367
|
+
def http_request(self, req):
|
368
|
+
for h in std_headers:
|
369
|
+
if h in req.headers:
|
370
|
+
del req.headers[h]
|
371
|
+
req.add_header(h, std_headers[h])
|
372
|
+
if 'Youtubedl-no-compression' in req.headers:
|
373
|
+
if 'Accept-encoding' in req.headers:
|
374
|
+
del req.headers['Accept-encoding']
|
375
|
+
del req.headers['Youtubedl-no-compression']
|
376
|
+
return req
|
377
|
+
|
378
|
+
def http_response(self, req, resp):
|
379
|
+
old_resp = resp
|
380
|
+
# gzip
|
381
|
+
if resp.headers.get('Content-encoding', '') == 'gzip':
|
382
|
+
gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
|
383
|
+
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
384
|
+
resp.msg = old_resp.msg
|
385
|
+
# deflate
|
386
|
+
if resp.headers.get('Content-encoding', '') == 'deflate':
|
387
|
+
gz = StringIO.StringIO(self.deflate(resp.read()))
|
388
|
+
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
389
|
+
resp.msg = old_resp.msg
|
390
|
+
return resp
|
391
|
+
|
392
|
+
|
393
|
+
class FileDownloader(object):
|
394
|
+
"""File Downloader class.
|
395
|
+
|
396
|
+
File downloader objects are the ones responsible of downloading the
|
397
|
+
actual video file and writing it to disk if the user has requested
|
398
|
+
it, among some other tasks. In most cases there should be one per
|
399
|
+
program. As, given a video URL, the downloader doesn't know how to
|
400
|
+
extract all the needed information, task that InfoExtractors do, it
|
401
|
+
has to pass the URL to one of them.
|
402
|
+
|
403
|
+
For this, file downloader objects have a method that allows
|
404
|
+
InfoExtractors to be registered in a given order. When it is passed
|
405
|
+
a URL, the file downloader handles it to the first InfoExtractor it
|
406
|
+
finds that reports being able to handle it. The InfoExtractor extracts
|
407
|
+
all the information about the video or videos the URL refers to, and
|
408
|
+
asks the FileDownloader to process the video information, possibly
|
409
|
+
downloading the video.
|
410
|
+
|
411
|
+
File downloaders accept a lot of parameters. In order not to saturate
|
412
|
+
the object constructor with arguments, it receives a dictionary of
|
413
|
+
options instead. These options are available through the params
|
414
|
+
attribute for the InfoExtractors to use. The FileDownloader also
|
415
|
+
registers itself as the downloader in charge for the InfoExtractors
|
416
|
+
that are added to it, so this is a "mutual registration".
|
417
|
+
|
418
|
+
Available options:
|
419
|
+
|
420
|
+
username: Username for authentication purposes.
|
421
|
+
password: Password for authentication purposes.
|
422
|
+
usenetrc: Use netrc for authentication instead.
|
423
|
+
quiet: Do not print messages to stdout.
|
424
|
+
forceurl: Force printing final URL.
|
425
|
+
forcetitle: Force printing title.
|
426
|
+
forcethumbnail: Force printing thumbnail URL.
|
427
|
+
forcedescription: Force printing description.
|
428
|
+
forcefilename: Force printing final filename.
|
429
|
+
simulate: Do not download the video files.
|
430
|
+
format: Video format code.
|
431
|
+
format_limit: Highest quality format to try.
|
432
|
+
outtmpl: Template for output names.
|
433
|
+
ignoreerrors: Do not stop on download errors.
|
434
|
+
ratelimit: Download speed limit, in bytes/sec.
|
435
|
+
nooverwrites: Prevent overwriting files.
|
436
|
+
retries: Number of times to retry for HTTP error 5xx
|
437
|
+
continuedl: Try to continue downloads if possible.
|
438
|
+
noprogress: Do not print the progress bar.
|
439
|
+
playliststart: Playlist item to start at.
|
440
|
+
playlistend: Playlist item to end at.
|
441
|
+
matchtitle: Download only matching titles.
|
442
|
+
rejecttitle: Reject downloads for matching titles.
|
443
|
+
logtostderr: Log messages to stderr instead of stdout.
|
444
|
+
consoletitle: Display progress in console window's titlebar.
|
445
|
+
nopart: Do not use temporary .part files.
|
446
|
+
updatetime: Use the Last-modified header to set output file timestamps.
|
447
|
+
writedescription: Write the video description to a .description file
|
448
|
+
writeinfojson: Write the video description to a .info.json file
|
449
|
+
"""
|
450
|
+
|
451
|
+
params = None
|
452
|
+
_ies = []
|
453
|
+
_pps = []
|
454
|
+
_download_retcode = None
|
455
|
+
_num_downloads = None
|
456
|
+
_screen_file = None
|
457
|
+
|
458
|
+
def __init__(self, params):
|
459
|
+
"""Create a FileDownloader object with the given options."""
|
460
|
+
self._ies = []
|
461
|
+
self._pps = []
|
462
|
+
self._download_retcode = 0
|
463
|
+
self._num_downloads = 0
|
464
|
+
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
465
|
+
self.params = params
|
466
|
+
|
467
|
+
@staticmethod
|
468
|
+
def format_bytes(bytes):
|
469
|
+
if bytes is None:
|
470
|
+
return 'N/A'
|
471
|
+
if type(bytes) is str:
|
472
|
+
bytes = float(bytes)
|
473
|
+
if bytes == 0.0:
|
474
|
+
exponent = 0
|
475
|
+
else:
|
476
|
+
exponent = long(math.log(bytes, 1024.0))
|
477
|
+
suffix = 'bkMGTPEZY'[exponent]
|
478
|
+
converted = float(bytes) / float(1024 ** exponent)
|
479
|
+
return '%.2f%s' % (converted, suffix)
|
480
|
+
|
481
|
+
@staticmethod
|
482
|
+
def calc_percent(byte_counter, data_len):
|
483
|
+
if data_len is None:
|
484
|
+
return '---.-%'
|
485
|
+
return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
|
486
|
+
|
487
|
+
@staticmethod
|
488
|
+
def calc_eta(start, now, total, current):
|
489
|
+
if total is None:
|
490
|
+
return '--:--'
|
491
|
+
dif = now - start
|
492
|
+
if current == 0 or dif < 0.001: # One millisecond
|
493
|
+
return '--:--'
|
494
|
+
rate = float(current) / dif
|
495
|
+
eta = long((float(total) - float(current)) / rate)
|
496
|
+
(eta_mins, eta_secs) = divmod(eta, 60)
|
497
|
+
if eta_mins > 99:
|
498
|
+
return '--:--'
|
499
|
+
return '%02d:%02d' % (eta_mins, eta_secs)
|
500
|
+
|
501
|
+
@staticmethod
|
502
|
+
def calc_speed(start, now, bytes):
|
503
|
+
dif = now - start
|
504
|
+
if bytes == 0 or dif < 0.001: # One millisecond
|
505
|
+
return '%10s' % '---b/s'
|
506
|
+
return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
|
507
|
+
|
508
|
+
@staticmethod
|
509
|
+
def best_block_size(elapsed_time, bytes):
|
510
|
+
new_min = max(bytes / 2.0, 1.0)
|
511
|
+
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
|
512
|
+
if elapsed_time < 0.001:
|
513
|
+
return long(new_max)
|
514
|
+
rate = bytes / elapsed_time
|
515
|
+
if rate > new_max:
|
516
|
+
return long(new_max)
|
517
|
+
if rate < new_min:
|
518
|
+
return long(new_min)
|
519
|
+
return long(rate)
|
520
|
+
|
521
|
+
@staticmethod
|
522
|
+
def parse_bytes(bytestr):
|
523
|
+
"""Parse a string indicating a byte quantity into a long integer."""
|
524
|
+
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
|
525
|
+
if matchobj is None:
|
526
|
+
return None
|
527
|
+
number = float(matchobj.group(1))
|
528
|
+
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
|
529
|
+
return long(round(number * multiplier))
|
530
|
+
|
531
|
+
def add_info_extractor(self, ie):
|
532
|
+
"""Add an InfoExtractor object to the end of the list."""
|
533
|
+
self._ies.append(ie)
|
534
|
+
ie.set_downloader(self)
|
535
|
+
|
536
|
+
def add_post_processor(self, pp):
|
537
|
+
"""Add a PostProcessor object to the end of the chain."""
|
538
|
+
self._pps.append(pp)
|
539
|
+
pp.set_downloader(self)
|
540
|
+
|
541
|
+
def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
|
542
|
+
"""Print message to stdout if not in quiet mode."""
|
543
|
+
try:
|
544
|
+
if not self.params.get('quiet', False):
|
545
|
+
terminator = [u'\n', u''][skip_eol]
|
546
|
+
print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
|
547
|
+
self._screen_file.flush()
|
548
|
+
except (UnicodeEncodeError), err:
|
549
|
+
if not ignore_encoding_errors:
|
550
|
+
raise
|
551
|
+
|
552
|
+
def to_stderr(self, message):
|
553
|
+
"""Print message to stderr."""
|
554
|
+
print >>sys.stderr, message.encode(preferredencoding())
|
555
|
+
|
556
|
+
def to_cons_title(self, message):
|
557
|
+
"""Set console/terminal window title to message."""
|
558
|
+
if not self.params.get('consoletitle', False):
|
559
|
+
return
|
560
|
+
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
561
|
+
# c_wchar_p() might not be necessary if `message` is
|
562
|
+
# already of type unicode()
|
563
|
+
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
564
|
+
elif 'TERM' in os.environ:
|
565
|
+
sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
|
566
|
+
|
567
|
+
def fixed_template(self):
|
568
|
+
"""Checks if the output template is fixed."""
|
569
|
+
return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
|
570
|
+
|
571
|
+
def trouble(self, message=None):
|
572
|
+
"""Determine action to take when a download problem appears.
|
573
|
+
|
574
|
+
Depending on if the downloader has been configured to ignore
|
575
|
+
download errors or not, this method may throw an exception or
|
576
|
+
not when errors are found, after printing the message.
|
577
|
+
"""
|
578
|
+
if message is not None:
|
579
|
+
self.to_stderr(message)
|
580
|
+
if not self.params.get('ignoreerrors', False):
|
581
|
+
raise DownloadError(message)
|
582
|
+
self._download_retcode = 1
|
583
|
+
|
584
|
+
def slow_down(self, start_time, byte_counter):
|
585
|
+
"""Sleep if the download speed is over the rate limit."""
|
586
|
+
rate_limit = self.params.get('ratelimit', None)
|
587
|
+
if rate_limit is None or byte_counter == 0:
|
588
|
+
return
|
589
|
+
now = time.time()
|
590
|
+
elapsed = now - start_time
|
591
|
+
if elapsed <= 0.0:
|
592
|
+
return
|
593
|
+
speed = float(byte_counter) / elapsed
|
594
|
+
if speed > rate_limit:
|
595
|
+
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
|
596
|
+
|
597
|
+
def temp_name(self, filename):
|
598
|
+
"""Returns a temporary filename for the given filename."""
|
599
|
+
if self.params.get('nopart', False) or filename == u'-' or \
|
600
|
+
(os.path.exists(filename) and not os.path.isfile(filename)):
|
601
|
+
return filename
|
602
|
+
return filename + u'.part'
|
603
|
+
|
604
|
+
def undo_temp_name(self, filename):
|
605
|
+
if filename.endswith(u'.part'):
|
606
|
+
return filename[:-len(u'.part')]
|
607
|
+
return filename
|
608
|
+
|
609
|
+
def try_rename(self, old_filename, new_filename):
|
610
|
+
try:
|
611
|
+
if old_filename == new_filename:
|
612
|
+
return
|
613
|
+
os.rename(old_filename, new_filename)
|
614
|
+
except (IOError, OSError), err:
|
615
|
+
self.trouble(u'ERROR: unable to rename file')
|
616
|
+
|
617
|
+
def try_utime(self, filename, last_modified_hdr):
|
618
|
+
"""Try to set the last-modified time of the given file."""
|
619
|
+
if last_modified_hdr is None:
|
620
|
+
return
|
621
|
+
if not os.path.isfile(filename):
|
622
|
+
return
|
623
|
+
timestr = last_modified_hdr
|
624
|
+
if timestr is None:
|
625
|
+
return
|
626
|
+
filetime = timeconvert(timestr)
|
627
|
+
if filetime is None:
|
628
|
+
return filetime
|
629
|
+
try:
|
630
|
+
os.utime(filename, (time.time(), filetime))
|
631
|
+
except:
|
632
|
+
pass
|
633
|
+
return filetime
|
634
|
+
|
635
|
+
def report_writedescription(self, descfn):
|
636
|
+
""" Report that the description file is being written """
|
637
|
+
self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True)
|
638
|
+
|
639
|
+
def report_writeinfojson(self, infofn):
|
640
|
+
""" Report that the metadata file has been written """
|
641
|
+
self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True)
|
642
|
+
|
643
|
+
def report_destination(self, filename):
|
644
|
+
"""Report destination filename."""
|
645
|
+
self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
|
646
|
+
|
647
|
+
def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
|
648
|
+
"""Report download progress."""
|
649
|
+
if self.params.get('noprogress', False):
|
650
|
+
return
|
651
|
+
self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
|
652
|
+
(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
|
653
|
+
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
|
654
|
+
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
|
655
|
+
|
656
|
+
def report_resuming_byte(self, resume_len):
|
657
|
+
"""Report attempt to resume at given byte."""
|
658
|
+
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
|
659
|
+
|
660
|
+
def report_retry(self, count, retries):
|
661
|
+
"""Report retry in case of HTTP error 5xx"""
|
662
|
+
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
|
663
|
+
|
664
|
+
def report_file_already_downloaded(self, file_name):
|
665
|
+
"""Report file has already been fully downloaded."""
|
666
|
+
try:
|
667
|
+
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
668
|
+
except (UnicodeEncodeError), err:
|
669
|
+
self.to_screen(u'[download] The file has already been downloaded')
|
670
|
+
|
671
|
+
def report_unable_to_resume(self):
|
672
|
+
"""Report it was impossible to resume download."""
|
673
|
+
self.to_screen(u'[download] Unable to resume')
|
674
|
+
|
675
|
+
def report_finish(self):
|
676
|
+
"""Report download finished."""
|
677
|
+
if self.params.get('noprogress', False):
|
678
|
+
self.to_screen(u'[download] Download completed')
|
679
|
+
else:
|
680
|
+
self.to_screen(u'')
|
681
|
+
|
682
|
+
def increment_downloads(self):
|
683
|
+
"""Increment the ordinal that assigns a number to each file."""
|
684
|
+
self._num_downloads += 1
|
685
|
+
|
686
|
+
def prepare_filename(self, info_dict):
|
687
|
+
"""Generate the output filename."""
|
688
|
+
try:
|
689
|
+
template_dict = dict(info_dict)
|
690
|
+
template_dict['epoch'] = unicode(long(time.time()))
|
691
|
+
template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
|
692
|
+
filename = self.params['outtmpl'] % template_dict
|
693
|
+
return filename
|
694
|
+
except (ValueError, KeyError), err:
|
695
|
+
self.trouble(u'ERROR: invalid system charset or erroneous output template')
|
696
|
+
return None
|
697
|
+
|
698
|
+
def process_info(self, info_dict):
|
699
|
+
"""Process a single dictionary returned by an InfoExtractor."""
|
700
|
+
filename = self.prepare_filename(info_dict)
|
701
|
+
|
702
|
+
# Forced printings
|
703
|
+
if self.params.get('forcetitle', False):
|
704
|
+
print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
|
705
|
+
if self.params.get('forceurl', False):
|
706
|
+
print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
|
707
|
+
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
|
708
|
+
print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
|
709
|
+
if self.params.get('forcedescription', False) and 'description' in info_dict:
|
710
|
+
print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
|
711
|
+
if self.params.get('forcefilename', False) and filename is not None:
|
712
|
+
print filename.encode(preferredencoding(), 'xmlcharrefreplace')
|
713
|
+
if self.params.get('forceformat', False):
|
714
|
+
print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
|
715
|
+
|
716
|
+
# Do nothing else if in simulate mode
|
717
|
+
if self.params.get('simulate', False):
|
718
|
+
return
|
719
|
+
|
720
|
+
if filename is None:
|
721
|
+
return
|
722
|
+
|
723
|
+
matchtitle=self.params.get('matchtitle',False)
|
724
|
+
rejecttitle=self.params.get('rejecttitle',False)
|
725
|
+
title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
|
726
|
+
if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
|
727
|
+
self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
|
728
|
+
return
|
729
|
+
if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
|
730
|
+
self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
|
731
|
+
return
|
732
|
+
|
733
|
+
if self.params.get('nooverwrites', False) and os.path.exists(filename):
|
734
|
+
self.to_stderr(u'WARNING: file exists and will be skipped')
|
735
|
+
return
|
736
|
+
|
737
|
+
try:
|
738
|
+
dn = os.path.dirname(filename)
|
739
|
+
if dn != '' and not os.path.exists(dn):
|
740
|
+
os.makedirs(dn)
|
741
|
+
except (OSError, IOError), err:
|
742
|
+
self.trouble(u'ERROR: unable to create directory ' + unicode(err))
|
743
|
+
return
|
744
|
+
|
745
|
+
if self.params.get('writedescription', False):
|
746
|
+
try:
|
747
|
+
descfn = filename + '.description'
|
748
|
+
self.report_writedescription(descfn)
|
749
|
+
descfile = open(descfn, 'wb')
|
750
|
+
try:
|
751
|
+
descfile.write(info_dict['description'].encode('utf-8'))
|
752
|
+
finally:
|
753
|
+
descfile.close()
|
754
|
+
except (OSError, IOError):
|
755
|
+
self.trouble(u'ERROR: Cannot write description file ' + descfn)
|
756
|
+
return
|
757
|
+
|
758
|
+
if self.params.get('writeinfojson', False):
|
759
|
+
infofn = filename + '.info.json'
|
760
|
+
self.report_writeinfojson(infofn)
|
761
|
+
try:
|
762
|
+
json.dump
|
763
|
+
except (NameError,AttributeError):
|
764
|
+
self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
|
765
|
+
return
|
766
|
+
try:
|
767
|
+
infof = open(infofn, 'wb')
|
768
|
+
try:
|
769
|
+
json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
|
770
|
+
json.dump(json_info_dict, infof)
|
771
|
+
finally:
|
772
|
+
infof.close()
|
773
|
+
except (OSError, IOError):
|
774
|
+
self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
|
775
|
+
return
|
776
|
+
|
777
|
+
if not self.params.get('skip_download', False):
|
778
|
+
try:
|
779
|
+
success = self._do_download(filename, info_dict)
|
780
|
+
except (OSError, IOError), err:
|
781
|
+
raise UnavailableVideoError
|
782
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
783
|
+
self.trouble(u'ERROR: unable to download video data: %s' % str(err))
|
784
|
+
return
|
785
|
+
except (ContentTooShortError, ), err:
|
786
|
+
self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
|
787
|
+
return
|
788
|
+
|
789
|
+
if success:
|
790
|
+
try:
|
791
|
+
self.post_process(filename, info_dict)
|
792
|
+
except (PostProcessingError), err:
|
793
|
+
self.trouble(u'ERROR: postprocessing: %s' % str(err))
|
794
|
+
return
|
795
|
+
|
796
|
+
def download(self, url_list):
|
797
|
+
"""Download a given list of URLs."""
|
798
|
+
if len(url_list) > 1 and self.fixed_template():
|
799
|
+
raise SameFileError(self.params['outtmpl'])
|
800
|
+
|
801
|
+
for url in url_list:
|
802
|
+
suitable_found = False
|
803
|
+
for ie in self._ies:
|
804
|
+
# Go to next InfoExtractor if not suitable
|
805
|
+
if not ie.suitable(url):
|
806
|
+
continue
|
807
|
+
|
808
|
+
# Suitable InfoExtractor found
|
809
|
+
suitable_found = True
|
810
|
+
|
811
|
+
# Extract information from URL and process it
|
812
|
+
ie.extract(url)
|
813
|
+
|
814
|
+
# Suitable InfoExtractor had been found; go to next URL
|
815
|
+
break
|
816
|
+
|
817
|
+
if not suitable_found:
|
818
|
+
self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
|
819
|
+
|
820
|
+
return self._download_retcode
|
821
|
+
|
822
|
+
def post_process(self, filename, ie_info):
|
823
|
+
"""Run the postprocessing chain on the given file."""
|
824
|
+
info = dict(ie_info)
|
825
|
+
info['filepath'] = filename
|
826
|
+
for pp in self._pps:
|
827
|
+
info = pp.run(info)
|
828
|
+
if info is None:
|
829
|
+
break
|
830
|
+
|
831
|
+
def _download_with_rtmpdump(self, filename, url, player_url):
|
832
|
+
self.report_destination(filename)
|
833
|
+
tmpfilename = self.temp_name(filename)
|
834
|
+
|
835
|
+
# Check for rtmpdump first
|
836
|
+
try:
|
837
|
+
subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
838
|
+
except (OSError, IOError):
|
839
|
+
self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
|
840
|
+
return False
|
841
|
+
|
842
|
+
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
843
|
+
# the connection was interrumpted and resuming appears to be
|
844
|
+
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
845
|
+
basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
|
846
|
+
retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
|
847
|
+
while retval == 2 or retval == 1:
|
848
|
+
prevsize = os.path.getsize(tmpfilename)
|
849
|
+
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
|
850
|
+
time.sleep(5.0) # This seems to be needed
|
851
|
+
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
852
|
+
cursize = os.path.getsize(tmpfilename)
|
853
|
+
if prevsize == cursize and retval == 1:
|
854
|
+
break
|
855
|
+
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
856
|
+
if prevsize == cursize and retval == 2 and cursize > 1024:
|
857
|
+
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
858
|
+
retval = 0
|
859
|
+
break
|
860
|
+
if retval == 0:
|
861
|
+
self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
|
862
|
+
self.try_rename(tmpfilename, filename)
|
863
|
+
return True
|
864
|
+
else:
|
865
|
+
self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
|
866
|
+
return False
|
867
|
+
|
868
|
+
def _do_download(self, filename, info_dict):
|
869
|
+
url = info_dict['url']
|
870
|
+
player_url = info_dict.get('player_url', None)
|
871
|
+
|
872
|
+
# Check file already present
|
873
|
+
if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
|
874
|
+
self.report_file_already_downloaded(filename)
|
875
|
+
return True
|
876
|
+
|
877
|
+
# Attempt to download using rtmpdump
|
878
|
+
if url.startswith('rtmp'):
|
879
|
+
return self._download_with_rtmpdump(filename, url, player_url)
|
880
|
+
|
881
|
+
tmpfilename = self.temp_name(filename)
|
882
|
+
stream = None
|
883
|
+
|
884
|
+
# Do not include the Accept-Encoding header
|
885
|
+
headers = {'Youtubedl-no-compression': 'True'}
|
886
|
+
basic_request = urllib2.Request(url, None, headers)
|
887
|
+
request = urllib2.Request(url, None, headers)
|
888
|
+
|
889
|
+
# Establish possible resume length
|
890
|
+
if os.path.isfile(tmpfilename):
|
891
|
+
resume_len = os.path.getsize(tmpfilename)
|
892
|
+
else:
|
893
|
+
resume_len = 0
|
894
|
+
|
895
|
+
open_mode = 'wb'
|
896
|
+
if resume_len != 0:
|
897
|
+
if self.params.get('continuedl', False):
|
898
|
+
self.report_resuming_byte(resume_len)
|
899
|
+
request.add_header('Range','bytes=%d-' % resume_len)
|
900
|
+
open_mode = 'ab'
|
901
|
+
else:
|
902
|
+
resume_len = 0
|
903
|
+
|
904
|
+
count = 0
|
905
|
+
retries = self.params.get('retries', 0)
|
906
|
+
while count <= retries:
|
907
|
+
# Establish connection
|
908
|
+
try:
|
909
|
+
if count == 0 and 'urlhandle' in info_dict:
|
910
|
+
data = info_dict['urlhandle']
|
911
|
+
data = urllib2.urlopen(request)
|
912
|
+
break
|
913
|
+
except (urllib2.HTTPError, ), err:
|
914
|
+
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
915
|
+
# Unexpected HTTP error
|
916
|
+
raise
|
917
|
+
elif err.code == 416:
|
918
|
+
# Unable to resume (requested range not satisfiable)
|
919
|
+
try:
|
920
|
+
# Open the connection again without the range header
|
921
|
+
data = urllib2.urlopen(basic_request)
|
922
|
+
content_length = data.info()['Content-Length']
|
923
|
+
except (urllib2.HTTPError, ), err:
|
924
|
+
if err.code < 500 or err.code >= 600:
|
925
|
+
raise
|
926
|
+
else:
|
927
|
+
# Examine the reported length
|
928
|
+
if (content_length is not None and
|
929
|
+
(resume_len - 100 < long(content_length) < resume_len + 100)):
|
930
|
+
# The file had already been fully downloaded.
|
931
|
+
# Explanation to the above condition: in issue #175 it was revealed that
|
932
|
+
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
933
|
+
# changing the file size slightly and causing problems for some users. So
|
934
|
+
# I decided to implement a suggested change and consider the file
|
935
|
+
# completely downloaded if the file size differs less than 100 bytes from
|
936
|
+
# the one in the hard drive.
|
937
|
+
self.report_file_already_downloaded(filename)
|
938
|
+
self.try_rename(tmpfilename, filename)
|
939
|
+
return True
|
940
|
+
else:
|
941
|
+
# The length does not match, we start the download over
|
942
|
+
self.report_unable_to_resume()
|
943
|
+
open_mode = 'wb'
|
944
|
+
break
|
945
|
+
# Retry
|
946
|
+
count += 1
|
947
|
+
if count <= retries:
|
948
|
+
self.report_retry(count, retries)
|
949
|
+
|
950
|
+
if count > retries:
|
951
|
+
self.trouble(u'ERROR: giving up after %s retries' % retries)
|
952
|
+
return False
|
953
|
+
|
954
|
+
data_len = data.info().get('Content-length', None)
|
955
|
+
if data_len is not None:
|
956
|
+
data_len = long(data_len) + resume_len
|
957
|
+
data_len_str = self.format_bytes(data_len)
|
958
|
+
byte_counter = 0 + resume_len
|
959
|
+
block_size = 1024
|
960
|
+
start = time.time()
|
961
|
+
while True:
|
962
|
+
# Download and write
|
963
|
+
before = time.time()
|
964
|
+
data_block = data.read(block_size)
|
965
|
+
after = time.time()
|
966
|
+
if len(data_block) == 0:
|
967
|
+
break
|
968
|
+
byte_counter += len(data_block)
|
969
|
+
|
970
|
+
# Open file just in time
|
971
|
+
if stream is None:
|
972
|
+
try:
|
973
|
+
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
974
|
+
assert stream is not None
|
975
|
+
filename = self.undo_temp_name(tmpfilename)
|
976
|
+
self.report_destination(filename)
|
977
|
+
except (OSError, IOError), err:
|
978
|
+
self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
|
979
|
+
return False
|
980
|
+
try:
|
981
|
+
stream.write(data_block)
|
982
|
+
except (IOError, OSError), err:
|
983
|
+
self.trouble(u'\nERROR: unable to write data: %s' % str(err))
|
984
|
+
return False
|
985
|
+
block_size = self.best_block_size(after - before, len(data_block))
|
986
|
+
|
987
|
+
# Progress message
|
988
|
+
speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
989
|
+
if data_len is None:
|
990
|
+
self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
|
991
|
+
else:
|
992
|
+
percent_str = self.calc_percent(byte_counter, data_len)
|
993
|
+
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
994
|
+
self.report_progress(percent_str, data_len_str, speed_str, eta_str)
|
995
|
+
|
996
|
+
# Apply rate limit
|
997
|
+
self.slow_down(start, byte_counter - resume_len)
|
998
|
+
|
999
|
+
if stream is None:
|
1000
|
+
self.trouble(u'\nERROR: Did not get any data blocks')
|
1001
|
+
return False
|
1002
|
+
stream.close()
|
1003
|
+
self.report_finish()
|
1004
|
+
if data_len is not None and byte_counter != data_len:
|
1005
|
+
raise ContentTooShortError(byte_counter, long(data_len))
|
1006
|
+
self.try_rename(tmpfilename, filename)
|
1007
|
+
|
1008
|
+
# Update file modification time
|
1009
|
+
if self.params.get('updatetime', True):
|
1010
|
+
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
1011
|
+
|
1012
|
+
return True
|
1013
|
+
|
1014
|
+
|
1015
|
+
class InfoExtractor(object):
|
1016
|
+
"""Information Extractor class.
|
1017
|
+
|
1018
|
+
Information extractors are the classes that, given a URL, extract
|
1019
|
+
information from the video (or videos) the URL refers to. This
|
1020
|
+
information includes the real video URL, the video title and simplified
|
1021
|
+
title, author and others. The information is stored in a dictionary
|
1022
|
+
which is then passed to the FileDownloader. The FileDownloader
|
1023
|
+
processes this information possibly downloading the video to the file
|
1024
|
+
system, among other possible outcomes. The dictionaries must include
|
1025
|
+
the following fields:
|
1026
|
+
|
1027
|
+
id: Video identifier.
|
1028
|
+
url: Final video URL.
|
1029
|
+
uploader: Nickname of the video uploader.
|
1030
|
+
title: Literal title.
|
1031
|
+
stitle: Simplified title.
|
1032
|
+
ext: Video filename extension.
|
1033
|
+
format: Video format.
|
1034
|
+
player_url: SWF Player URL (may be None).
|
1035
|
+
|
1036
|
+
The following fields are optional. Their primary purpose is to allow
|
1037
|
+
youtube-dl to serve as the backend for a video search function, such
|
1038
|
+
as the one in youtube2mp3. They are only used when their respective
|
1039
|
+
forced printing functions are called:
|
1040
|
+
|
1041
|
+
thumbnail: Full URL to a video thumbnail image.
|
1042
|
+
description: One-line video description.
|
1043
|
+
|
1044
|
+
Subclasses of this one should re-define the _real_initialize() and
|
1045
|
+
_real_extract() methods and define a _VALID_URL regexp.
|
1046
|
+
Probably, they should also be added to the list of extractors.
|
1047
|
+
"""
|
1048
|
+
|
1049
|
+
_ready = False
|
1050
|
+
_downloader = None
|
1051
|
+
|
1052
|
+
def __init__(self, downloader=None):
|
1053
|
+
"""Constructor. Receives an optional downloader."""
|
1054
|
+
self._ready = False
|
1055
|
+
self.set_downloader(downloader)
|
1056
|
+
|
1057
|
+
def suitable(self, url):
|
1058
|
+
"""Receives a URL and returns True if suitable for this IE."""
|
1059
|
+
return re.match(self._VALID_URL, url) is not None
|
1060
|
+
|
1061
|
+
def initialize(self):
|
1062
|
+
"""Initializes an instance (authentication, etc)."""
|
1063
|
+
if not self._ready:
|
1064
|
+
self._real_initialize()
|
1065
|
+
self._ready = True
|
1066
|
+
|
1067
|
+
def extract(self, url):
|
1068
|
+
"""Extracts URL information and returns it in list of dicts."""
|
1069
|
+
self.initialize()
|
1070
|
+
return self._real_extract(url)
|
1071
|
+
|
1072
|
+
def set_downloader(self, downloader):
|
1073
|
+
"""Sets the downloader for this IE."""
|
1074
|
+
self._downloader = downloader
|
1075
|
+
|
1076
|
+
def _real_initialize(self):
|
1077
|
+
"""Real initialization process. Redefine in subclasses."""
|
1078
|
+
pass
|
1079
|
+
|
1080
|
+
def _real_extract(self, url):
|
1081
|
+
"""Real extraction process. Redefine in subclasses."""
|
1082
|
+
pass
|
1083
|
+
|
1084
|
+
|
1085
|
+
class YoutubeIE(InfoExtractor):
|
1086
|
+
"""Information extractor for youtube.com."""
|
1087
|
+
|
1088
|
+
_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
|
1089
|
+
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
1090
|
+
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
|
1091
|
+
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
1092
|
+
_NETRC_MACHINE = 'youtube'
|
1093
|
+
# Listed in order of quality
|
1094
|
+
_available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
|
1095
|
+
_video_extensions = {
|
1096
|
+
'13': '3gp',
|
1097
|
+
'17': 'mp4',
|
1098
|
+
'18': 'mp4',
|
1099
|
+
'22': 'mp4',
|
1100
|
+
'37': 'mp4',
|
1101
|
+
'38': 'video', # You actually don't know if this will be MOV, AVI or whatever
|
1102
|
+
'43': 'webm',
|
1103
|
+
'44': 'webm',
|
1104
|
+
'45': 'webm',
|
1105
|
+
}
|
1106
|
+
_video_dimensions = {
|
1107
|
+
'5': '240x400',
|
1108
|
+
'6': '???',
|
1109
|
+
'13': '???',
|
1110
|
+
'17': '144x176',
|
1111
|
+
'18': '360x640',
|
1112
|
+
'22': '720x1280',
|
1113
|
+
'34': '360x640',
|
1114
|
+
'35': '480x854',
|
1115
|
+
'37': '1080x1920',
|
1116
|
+
'38': '3072x4096',
|
1117
|
+
'43': '360x640',
|
1118
|
+
'44': '480x854',
|
1119
|
+
'45': '720x1280',
|
1120
|
+
}
|
1121
|
+
IE_NAME = u'youtube'
|
1122
|
+
|
1123
|
+
def report_lang(self):
|
1124
|
+
"""Report attempt to set language."""
|
1125
|
+
self._downloader.to_screen(u'[youtube] Setting language')
|
1126
|
+
|
1127
|
+
def report_login(self):
|
1128
|
+
"""Report attempt to log in."""
|
1129
|
+
self._downloader.to_screen(u'[youtube] Logging in')
|
1130
|
+
|
1131
|
+
def report_age_confirmation(self):
|
1132
|
+
"""Report attempt to confirm age."""
|
1133
|
+
self._downloader.to_screen(u'[youtube] Confirming age')
|
1134
|
+
|
1135
|
+
def report_video_webpage_download(self, video_id):
|
1136
|
+
"""Report attempt to download video webpage."""
|
1137
|
+
self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
|
1138
|
+
|
1139
|
+
def report_video_info_webpage_download(self, video_id):
|
1140
|
+
"""Report attempt to download video info webpage."""
|
1141
|
+
self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
|
1142
|
+
|
1143
|
+
def report_information_extraction(self, video_id):
|
1144
|
+
"""Report attempt to extract video information."""
|
1145
|
+
self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
|
1146
|
+
|
1147
|
+
def report_unavailable_format(self, video_id, format):
|
1148
|
+
"""Report extracted video URL."""
|
1149
|
+
self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
|
1150
|
+
|
1151
|
+
def report_rtmp_download(self):
|
1152
|
+
"""Indicate the download will use the RTMP protocol."""
|
1153
|
+
self._downloader.to_screen(u'[youtube] RTMP download detected')
|
1154
|
+
|
1155
|
+
def _print_formats(self, formats):
|
1156
|
+
print 'Available formats:'
|
1157
|
+
for x in formats:
|
1158
|
+
print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
|
1159
|
+
|
1160
|
+
def _real_initialize(self):
|
1161
|
+
if self._downloader is None:
|
1162
|
+
return
|
1163
|
+
|
1164
|
+
username = None
|
1165
|
+
password = None
|
1166
|
+
downloader_params = self._downloader.params
|
1167
|
+
|
1168
|
+
# Attempt to use provided username and password or .netrc data
|
1169
|
+
if downloader_params.get('username', None) is not None:
|
1170
|
+
username = downloader_params['username']
|
1171
|
+
password = downloader_params['password']
|
1172
|
+
elif downloader_params.get('usenetrc', False):
|
1173
|
+
try:
|
1174
|
+
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
1175
|
+
if info is not None:
|
1176
|
+
username = info[0]
|
1177
|
+
password = info[2]
|
1178
|
+
else:
|
1179
|
+
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
1180
|
+
except (IOError, netrc.NetrcParseError), err:
|
1181
|
+
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
|
1182
|
+
return
|
1183
|
+
|
1184
|
+
# Set language
|
1185
|
+
request = urllib2.Request(self._LANG_URL)
|
1186
|
+
try:
|
1187
|
+
self.report_lang()
|
1188
|
+
urllib2.urlopen(request).read()
|
1189
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1190
|
+
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
|
1191
|
+
return
|
1192
|
+
|
1193
|
+
# No authentication to be performed
|
1194
|
+
if username is None:
|
1195
|
+
return
|
1196
|
+
|
1197
|
+
# Log in
|
1198
|
+
login_form = {
|
1199
|
+
'current_form': 'loginForm',
|
1200
|
+
'next': '/',
|
1201
|
+
'action_login': 'Log In',
|
1202
|
+
'username': username,
|
1203
|
+
'password': password,
|
1204
|
+
}
|
1205
|
+
request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
|
1206
|
+
try:
|
1207
|
+
self.report_login()
|
1208
|
+
login_results = urllib2.urlopen(request).read()
|
1209
|
+
if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
|
1210
|
+
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
|
1211
|
+
return
|
1212
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1213
|
+
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
|
1214
|
+
return
|
1215
|
+
|
1216
|
+
# Confirm age
|
1217
|
+
age_form = {
|
1218
|
+
'next_url': '/',
|
1219
|
+
'action_confirm': 'Confirm',
|
1220
|
+
}
|
1221
|
+
request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
|
1222
|
+
try:
|
1223
|
+
self.report_age_confirmation()
|
1224
|
+
age_results = urllib2.urlopen(request).read()
|
1225
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1226
|
+
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
|
1227
|
+
return
|
1228
|
+
|
1229
|
+
def _real_extract(self, url):
|
1230
|
+
# Extract video id from URL
|
1231
|
+
mobj = re.match(self._VALID_URL, url)
|
1232
|
+
if mobj is None:
|
1233
|
+
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
1234
|
+
return
|
1235
|
+
video_id = mobj.group(2)
|
1236
|
+
|
1237
|
+
# Get video webpage
|
1238
|
+
self.report_video_webpage_download(video_id)
|
1239
|
+
request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
|
1240
|
+
try:
|
1241
|
+
video_webpage = urllib2.urlopen(request).read()
|
1242
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1243
|
+
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
|
1244
|
+
return
|
1245
|
+
|
1246
|
+
# Attempt to extract SWF player URL
|
1247
|
+
mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
|
1248
|
+
if mobj is not None:
|
1249
|
+
player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
|
1250
|
+
else:
|
1251
|
+
player_url = None
|
1252
|
+
|
1253
|
+
# Get video info
|
1254
|
+
self.report_video_info_webpage_download(video_id)
|
1255
|
+
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
1256
|
+
video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
1257
|
+
% (video_id, el_type))
|
1258
|
+
request = urllib2.Request(video_info_url)
|
1259
|
+
try:
|
1260
|
+
video_info_webpage = urllib2.urlopen(request).read()
|
1261
|
+
video_info = parse_qs(video_info_webpage)
|
1262
|
+
if 'token' in video_info:
|
1263
|
+
break
|
1264
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1265
|
+
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
|
1266
|
+
return
|
1267
|
+
if 'token' not in video_info:
|
1268
|
+
if 'reason' in video_info:
|
1269
|
+
self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
|
1270
|
+
else:
|
1271
|
+
self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
|
1272
|
+
return
|
1273
|
+
|
1274
|
+
# Start extracting information
|
1275
|
+
self.report_information_extraction(video_id)
|
1276
|
+
|
1277
|
+
# uploader
|
1278
|
+
if 'author' not in video_info:
|
1279
|
+
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
|
1280
|
+
return
|
1281
|
+
video_uploader = urllib.unquote_plus(video_info['author'][0])
|
1282
|
+
|
1283
|
+
# title
|
1284
|
+
if 'title' not in video_info:
|
1285
|
+
self._downloader.trouble(u'ERROR: unable to extract video title')
|
1286
|
+
return
|
1287
|
+
video_title = urllib.unquote_plus(video_info['title'][0])
|
1288
|
+
video_title = video_title.decode('utf-8')
|
1289
|
+
video_title = sanitize_title(video_title)
|
1290
|
+
|
1291
|
+
# simplified title
|
1292
|
+
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
|
1293
|
+
simple_title = simple_title.strip(ur'_')
|
1294
|
+
|
1295
|
+
# thumbnail image
|
1296
|
+
if 'thumbnail_url' not in video_info:
|
1297
|
+
self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
|
1298
|
+
video_thumbnail = ''
|
1299
|
+
else: # don't panic if we can't find it
|
1300
|
+
video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
|
1301
|
+
|
1302
|
+
# upload date
|
1303
|
+
upload_date = u'NA'
|
1304
|
+
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
|
1305
|
+
if mobj is not None:
|
1306
|
+
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
1307
|
+
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
|
1308
|
+
for expression in format_expressions:
|
1309
|
+
try:
|
1310
|
+
upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
|
1311
|
+
except:
|
1312
|
+
pass
|
1313
|
+
|
1314
|
+
# description
|
1315
|
+
try:
|
1316
|
+
lxml.etree
|
1317
|
+
except NameError:
|
1318
|
+
video_description = u'No description available.'
|
1319
|
+
if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
|
1320
|
+
mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
|
1321
|
+
if mobj is not None:
|
1322
|
+
video_description = mobj.group(1).decode('utf-8')
|
1323
|
+
else:
|
1324
|
+
html_parser = lxml.etree.HTMLParser(encoding='utf-8')
|
1325
|
+
vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
|
1326
|
+
video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
|
1327
|
+
# TODO use another parser
|
1328
|
+
|
1329
|
+
# token
|
1330
|
+
video_token = urllib.unquote_plus(video_info['token'][0])
|
1331
|
+
|
1332
|
+
# Decide which formats to download
|
1333
|
+
req_format = self._downloader.params.get('format', None)
|
1334
|
+
|
1335
|
+
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
|
1336
|
+
self.report_rtmp_download()
|
1337
|
+
video_url_list = [(None, video_info['conn'][0])]
|
1338
|
+
elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
|
1339
|
+
url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
|
1340
|
+
url_data = [parse_qs(uds) for uds in url_data_strs]
|
1341
|
+
url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
|
1342
|
+
url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
|
1343
|
+
|
1344
|
+
format_limit = self._downloader.params.get('format_limit', None)
|
1345
|
+
if format_limit is not None and format_limit in self._available_formats:
|
1346
|
+
format_list = self._available_formats[self._available_formats.index(format_limit):]
|
1347
|
+
else:
|
1348
|
+
format_list = self._available_formats
|
1349
|
+
existing_formats = [x for x in format_list if x in url_map]
|
1350
|
+
if len(existing_formats) == 0:
|
1351
|
+
self._downloader.trouble(u'ERROR: no known formats available for video')
|
1352
|
+
return
|
1353
|
+
if self._downloader.params.get('listformats', None):
|
1354
|
+
self._print_formats(existing_formats)
|
1355
|
+
return
|
1356
|
+
if req_format is None or req_format == 'best':
|
1357
|
+
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
|
1358
|
+
elif req_format == 'worst':
|
1359
|
+
video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
|
1360
|
+
elif req_format in ('-1', 'all'):
|
1361
|
+
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
1362
|
+
else:
|
1363
|
+
# Specific formats. We pick the first in a slash-delimeted sequence.
|
1364
|
+
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
|
1365
|
+
req_formats = req_format.split('/')
|
1366
|
+
video_url_list = None
|
1367
|
+
for rf in req_formats:
|
1368
|
+
if rf in url_map:
|
1369
|
+
video_url_list = [(rf, url_map[rf])]
|
1370
|
+
break
|
1371
|
+
if video_url_list is None:
|
1372
|
+
self._downloader.trouble(u'ERROR: requested format not available')
|
1373
|
+
return
|
1374
|
+
else:
|
1375
|
+
self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
|
1376
|
+
return
|
1377
|
+
|
1378
|
+
for format_param, video_real_url in video_url_list:
|
1379
|
+
# At this point we have a new video
|
1380
|
+
self._downloader.increment_downloads()
|
1381
|
+
|
1382
|
+
# Extension
|
1383
|
+
video_extension = self._video_extensions.get(format_param, 'flv')
|
1384
|
+
|
1385
|
+
try:
|
1386
|
+
# Process video information
|
1387
|
+
self._downloader.process_info({
|
1388
|
+
'id': video_id.decode('utf-8'),
|
1389
|
+
'url': video_real_url.decode('utf-8'),
|
1390
|
+
'uploader': video_uploader.decode('utf-8'),
|
1391
|
+
'upload_date': upload_date,
|
1392
|
+
'title': video_title,
|
1393
|
+
'stitle': simple_title,
|
1394
|
+
'ext': video_extension.decode('utf-8'),
|
1395
|
+
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
|
1396
|
+
'thumbnail': video_thumbnail.decode('utf-8'),
|
1397
|
+
'description': video_description,
|
1398
|
+
'player_url': player_url,
|
1399
|
+
})
|
1400
|
+
except UnavailableVideoError, err:
|
1401
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
1402
|
+
|
1403
|
+
|
1404
|
+
class MetacafeIE(InfoExtractor):
|
1405
|
+
"""Information Extractor for metacafe.com."""
|
1406
|
+
|
1407
|
+
_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
1408
|
+
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
1409
|
+
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
1410
|
+
_youtube_ie = None
|
1411
|
+
IE_NAME = u'metacafe'
|
1412
|
+
|
1413
|
+
def __init__(self, youtube_ie, downloader=None):
|
1414
|
+
InfoExtractor.__init__(self, downloader)
|
1415
|
+
self._youtube_ie = youtube_ie
|
1416
|
+
|
1417
|
+
def report_disclaimer(self):
|
1418
|
+
"""Report disclaimer retrieval."""
|
1419
|
+
self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
|
1420
|
+
|
1421
|
+
def report_age_confirmation(self):
|
1422
|
+
"""Report attempt to confirm age."""
|
1423
|
+
self._downloader.to_screen(u'[metacafe] Confirming age')
|
1424
|
+
|
1425
|
+
def report_download_webpage(self, video_id):
|
1426
|
+
"""Report webpage download."""
|
1427
|
+
self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
|
1428
|
+
|
1429
|
+
def report_extraction(self, video_id):
|
1430
|
+
"""Report information extraction."""
|
1431
|
+
self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
|
1432
|
+
|
1433
|
+
def _real_initialize(self):
|
1434
|
+
# Retrieve disclaimer
|
1435
|
+
request = urllib2.Request(self._DISCLAIMER)
|
1436
|
+
try:
|
1437
|
+
self.report_disclaimer()
|
1438
|
+
disclaimer = urllib2.urlopen(request).read()
|
1439
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1440
|
+
self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
|
1441
|
+
return
|
1442
|
+
|
1443
|
+
# Confirm age
|
1444
|
+
disclaimer_form = {
|
1445
|
+
'filters': '0',
|
1446
|
+
'submit': "Continue - I'm over 18",
|
1447
|
+
}
|
1448
|
+
request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
|
1449
|
+
try:
|
1450
|
+
self.report_age_confirmation()
|
1451
|
+
disclaimer = urllib2.urlopen(request).read()
|
1452
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1453
|
+
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
|
1454
|
+
return
|
1455
|
+
|
1456
|
+
def _real_extract(self, url):
|
1457
|
+
# Extract id and simplified title from URL
|
1458
|
+
mobj = re.match(self._VALID_URL, url)
|
1459
|
+
if mobj is None:
|
1460
|
+
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
1461
|
+
return
|
1462
|
+
|
1463
|
+
video_id = mobj.group(1)
|
1464
|
+
|
1465
|
+
# Check if video comes from YouTube
|
1466
|
+
mobj2 = re.match(r'^yt-(.*)$', video_id)
|
1467
|
+
if mobj2 is not None:
|
1468
|
+
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
|
1469
|
+
return
|
1470
|
+
|
1471
|
+
# At this point we have a new video
|
1472
|
+
self._downloader.increment_downloads()
|
1473
|
+
|
1474
|
+
simple_title = mobj.group(2).decode('utf-8')
|
1475
|
+
|
1476
|
+
# Retrieve video webpage to extract further information
|
1477
|
+
request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
1478
|
+
try:
|
1479
|
+
self.report_download_webpage(video_id)
|
1480
|
+
webpage = urllib2.urlopen(request).read()
|
1481
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1482
|
+
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
|
1483
|
+
return
|
1484
|
+
|
1485
|
+
# Extract URL, uploader and title from webpage
|
1486
|
+
self.report_extraction(video_id)
|
1487
|
+
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
1488
|
+
if mobj is not None:
|
1489
|
+
mediaURL = urllib.unquote(mobj.group(1))
|
1490
|
+
video_extension = mediaURL[-3:]
|
1491
|
+
|
1492
|
+
# Extract gdaKey if available
|
1493
|
+
mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
|
1494
|
+
if mobj is None:
|
1495
|
+
video_url = mediaURL
|
1496
|
+
else:
|
1497
|
+
gdaKey = mobj.group(1)
|
1498
|
+
video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
|
1499
|
+
else:
|
1500
|
+
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
|
1501
|
+
if mobj is None:
|
1502
|
+
self._downloader.trouble(u'ERROR: unable to extract media URL')
|
1503
|
+
return
|
1504
|
+
vardict = parse_qs(mobj.group(1))
|
1505
|
+
if 'mediaData' not in vardict:
|
1506
|
+
self._downloader.trouble(u'ERROR: unable to extract media URL')
|
1507
|
+
return
|
1508
|
+
mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
|
1509
|
+
if mobj is None:
|
1510
|
+
self._downloader.trouble(u'ERROR: unable to extract media URL')
|
1511
|
+
return
|
1512
|
+
mediaURL = mobj.group(1).replace('\\/', '/')
|
1513
|
+
video_extension = mediaURL[-3:]
|
1514
|
+
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
|
1515
|
+
|
1516
|
+
mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
|
1517
|
+
if mobj is None:
|
1518
|
+
self._downloader.trouble(u'ERROR: unable to extract title')
|
1519
|
+
return
|
1520
|
+
video_title = mobj.group(1).decode('utf-8')
|
1521
|
+
video_title = sanitize_title(video_title)
|
1522
|
+
|
1523
|
+
mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
|
1524
|
+
if mobj is None:
|
1525
|
+
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
|
1526
|
+
return
|
1527
|
+
video_uploader = mobj.group(1)
|
1528
|
+
|
1529
|
+
try:
|
1530
|
+
# Process video information
|
1531
|
+
self._downloader.process_info({
|
1532
|
+
'id': video_id.decode('utf-8'),
|
1533
|
+
'url': video_url.decode('utf-8'),
|
1534
|
+
'uploader': video_uploader.decode('utf-8'),
|
1535
|
+
'upload_date': u'NA',
|
1536
|
+
'title': video_title,
|
1537
|
+
'stitle': simple_title,
|
1538
|
+
'ext': video_extension.decode('utf-8'),
|
1539
|
+
'format': u'NA',
|
1540
|
+
'player_url': None,
|
1541
|
+
})
|
1542
|
+
except UnavailableVideoError:
|
1543
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
1544
|
+
|
1545
|
+
|
1546
|
+
class DailymotionIE(InfoExtractor):
|
1547
|
+
"""Information Extractor for Dailymotion"""
|
1548
|
+
|
1549
|
+
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
|
1550
|
+
IE_NAME = u'dailymotion'
|
1551
|
+
|
1552
|
+
def __init__(self, downloader=None):
|
1553
|
+
InfoExtractor.__init__(self, downloader)
|
1554
|
+
|
1555
|
+
def report_download_webpage(self, video_id):
|
1556
|
+
"""Report webpage download."""
|
1557
|
+
self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
|
1558
|
+
|
1559
|
+
def report_extraction(self, video_id):
|
1560
|
+
"""Report information extraction."""
|
1561
|
+
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
|
1562
|
+
|
1563
|
+
def _real_initialize(self):
|
1564
|
+
return
|
1565
|
+
|
1566
|
+
def _real_extract(self, url):
|
1567
|
+
# Extract id and simplified title from URL
|
1568
|
+
mobj = re.match(self._VALID_URL, url)
|
1569
|
+
if mobj is None:
|
1570
|
+
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
1571
|
+
return
|
1572
|
+
|
1573
|
+
# At this point we have a new video
|
1574
|
+
self._downloader.increment_downloads()
|
1575
|
+
video_id = mobj.group(1)
|
1576
|
+
|
1577
|
+
simple_title = mobj.group(2).decode('utf-8')
|
1578
|
+
video_extension = 'flv'
|
1579
|
+
|
1580
|
+
# Retrieve video webpage to extract further information
|
1581
|
+
request = urllib2.Request(url)
|
1582
|
+
request.add_header('Cookie', 'family_filter=off')
|
1583
|
+
try:
|
1584
|
+
self.report_download_webpage(video_id)
|
1585
|
+
webpage = urllib2.urlopen(request).read()
|
1586
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1587
|
+
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
|
1588
|
+
return
|
1589
|
+
|
1590
|
+
# Extract URL, uploader and title from webpage
|
1591
|
+
self.report_extraction(video_id)
|
1592
|
+
mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
|
1593
|
+
if mobj is None:
|
1594
|
+
self._downloader.trouble(u'ERROR: unable to extract media URL')
|
1595
|
+
return
|
1596
|
+
sequence = urllib.unquote(mobj.group(1))
|
1597
|
+
mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
|
1598
|
+
if mobj is None:
|
1599
|
+
self._downloader.trouble(u'ERROR: unable to extract media URL')
|
1600
|
+
return
|
1601
|
+
mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
|
1602
|
+
|
1603
|
+
# if needed add http://www.dailymotion.com/ if relative URL
|
1604
|
+
|
1605
|
+
video_url = mediaURL
|
1606
|
+
|
1607
|
+
mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
|
1608
|
+
if mobj is None:
|
1609
|
+
self._downloader.trouble(u'ERROR: unable to extract title')
|
1610
|
+
return
|
1611
|
+
video_title = mobj.group(1).decode('utf-8')
|
1612
|
+
video_title = sanitize_title(video_title)
|
1613
|
+
|
1614
|
+
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
|
1615
|
+
if mobj is None:
|
1616
|
+
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
|
1617
|
+
return
|
1618
|
+
video_uploader = mobj.group(1)
|
1619
|
+
|
1620
|
+
try:
|
1621
|
+
# Process video information
|
1622
|
+
self._downloader.process_info({
|
1623
|
+
'id': video_id.decode('utf-8'),
|
1624
|
+
'url': video_url.decode('utf-8'),
|
1625
|
+
'uploader': video_uploader.decode('utf-8'),
|
1626
|
+
'upload_date': u'NA',
|
1627
|
+
'title': video_title,
|
1628
|
+
'stitle': simple_title,
|
1629
|
+
'ext': video_extension.decode('utf-8'),
|
1630
|
+
'format': u'NA',
|
1631
|
+
'player_url': None,
|
1632
|
+
})
|
1633
|
+
except UnavailableVideoError:
|
1634
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
1635
|
+
|
1636
|
+
|
1637
|
+
class GoogleIE(InfoExtractor):
|
1638
|
+
"""Information extractor for video.google.com."""
|
1639
|
+
|
1640
|
+
_VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
|
1641
|
+
IE_NAME = u'video.google'
|
1642
|
+
|
1643
|
+
def __init__(self, downloader=None):
|
1644
|
+
InfoExtractor.__init__(self, downloader)
|
1645
|
+
|
1646
|
+
def report_download_webpage(self, video_id):
|
1647
|
+
"""Report webpage download."""
|
1648
|
+
self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
|
1649
|
+
|
1650
|
+
def report_extraction(self, video_id):
|
1651
|
+
"""Report information extraction."""
|
1652
|
+
self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
|
1653
|
+
|
1654
|
+
def _real_initialize(self):
|
1655
|
+
return
|
1656
|
+
|
1657
|
+
def _real_extract(self, url):
|
1658
|
+
# Extract id from URL
|
1659
|
+
mobj = re.match(self._VALID_URL, url)
|
1660
|
+
if mobj is None:
|
1661
|
+
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
1662
|
+
return
|
1663
|
+
|
1664
|
+
# At this point we have a new video
|
1665
|
+
self._downloader.increment_downloads()
|
1666
|
+
video_id = mobj.group(1)
|
1667
|
+
|
1668
|
+
video_extension = 'mp4'
|
1669
|
+
|
1670
|
+
# Retrieve video webpage to extract further information
|
1671
|
+
request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
|
1672
|
+
try:
|
1673
|
+
self.report_download_webpage(video_id)
|
1674
|
+
webpage = urllib2.urlopen(request).read()
|
1675
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1676
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
|
1677
|
+
return
|
1678
|
+
|
1679
|
+
# Extract URL, uploader, and title from webpage
|
1680
|
+
self.report_extraction(video_id)
|
1681
|
+
mobj = re.search(r"download_url:'([^']+)'", webpage)
|
1682
|
+
if mobj is None:
|
1683
|
+
video_extension = 'flv'
|
1684
|
+
mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
|
1685
|
+
if mobj is None:
|
1686
|
+
self._downloader.trouble(u'ERROR: unable to extract media URL')
|
1687
|
+
return
|
1688
|
+
mediaURL = urllib.unquote(mobj.group(1))
|
1689
|
+
mediaURL = mediaURL.replace('\\x3d', '\x3d')
|
1690
|
+
mediaURL = mediaURL.replace('\\x26', '\x26')
|
1691
|
+
|
1692
|
+
video_url = mediaURL
|
1693
|
+
|
1694
|
+
mobj = re.search(r'<title>(.*)</title>', webpage)
|
1695
|
+
if mobj is None:
|
1696
|
+
self._downloader.trouble(u'ERROR: unable to extract title')
|
1697
|
+
return
|
1698
|
+
video_title = mobj.group(1).decode('utf-8')
|
1699
|
+
video_title = sanitize_title(video_title)
|
1700
|
+
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
|
1701
|
+
|
1702
|
+
# Extract video description
|
1703
|
+
mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
|
1704
|
+
if mobj is None:
|
1705
|
+
self._downloader.trouble(u'ERROR: unable to extract video description')
|
1706
|
+
return
|
1707
|
+
video_description = mobj.group(1).decode('utf-8')
|
1708
|
+
if not video_description:
|
1709
|
+
video_description = 'No description available.'
|
1710
|
+
|
1711
|
+
# Extract video thumbnail
|
1712
|
+
if self._downloader.params.get('forcethumbnail', False):
|
1713
|
+
request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
|
1714
|
+
try:
|
1715
|
+
webpage = urllib2.urlopen(request).read()
|
1716
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1717
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
|
1718
|
+
return
|
1719
|
+
mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
|
1720
|
+
if mobj is None:
|
1721
|
+
self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
|
1722
|
+
return
|
1723
|
+
video_thumbnail = mobj.group(1)
|
1724
|
+
else: # we need something to pass to process_info
|
1725
|
+
video_thumbnail = ''
|
1726
|
+
|
1727
|
+
try:
|
1728
|
+
# Process video information
|
1729
|
+
self._downloader.process_info({
|
1730
|
+
'id': video_id.decode('utf-8'),
|
1731
|
+
'url': video_url.decode('utf-8'),
|
1732
|
+
'uploader': u'NA',
|
1733
|
+
'upload_date': u'NA',
|
1734
|
+
'title': video_title,
|
1735
|
+
'stitle': simple_title,
|
1736
|
+
'ext': video_extension.decode('utf-8'),
|
1737
|
+
'format': u'NA',
|
1738
|
+
'player_url': None,
|
1739
|
+
})
|
1740
|
+
except UnavailableVideoError:
|
1741
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
1742
|
+
|
1743
|
+
|
1744
|
+
class PhotobucketIE(InfoExtractor):
|
1745
|
+
"""Information extractor for photobucket.com."""
|
1746
|
+
|
1747
|
+
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
|
1748
|
+
IE_NAME = u'photobucket'
|
1749
|
+
|
1750
|
+
def __init__(self, downloader=None):
|
1751
|
+
InfoExtractor.__init__(self, downloader)
|
1752
|
+
|
1753
|
+
def report_download_webpage(self, video_id):
|
1754
|
+
"""Report webpage download."""
|
1755
|
+
self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
|
1756
|
+
|
1757
|
+
def report_extraction(self, video_id):
|
1758
|
+
"""Report information extraction."""
|
1759
|
+
self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
|
1760
|
+
|
1761
|
+
def _real_initialize(self):
|
1762
|
+
return
|
1763
|
+
|
1764
|
+
def _real_extract(self, url):
|
1765
|
+
# Extract id from URL
|
1766
|
+
mobj = re.match(self._VALID_URL, url)
|
1767
|
+
if mobj is None:
|
1768
|
+
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
1769
|
+
return
|
1770
|
+
|
1771
|
+
# At this point we have a new video
|
1772
|
+
self._downloader.increment_downloads()
|
1773
|
+
video_id = mobj.group(1)
|
1774
|
+
|
1775
|
+
video_extension = 'flv'
|
1776
|
+
|
1777
|
+
# Retrieve video webpage to extract further information
|
1778
|
+
request = urllib2.Request(url)
|
1779
|
+
try:
|
1780
|
+
self.report_download_webpage(video_id)
|
1781
|
+
webpage = urllib2.urlopen(request).read()
|
1782
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1783
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
|
1784
|
+
return
|
1785
|
+
|
1786
|
+
# Extract URL, uploader, and title from webpage
|
1787
|
+
self.report_extraction(video_id)
|
1788
|
+
mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
|
1789
|
+
if mobj is None:
|
1790
|
+
self._downloader.trouble(u'ERROR: unable to extract media URL')
|
1791
|
+
return
|
1792
|
+
mediaURL = urllib.unquote(mobj.group(1))
|
1793
|
+
|
1794
|
+
video_url = mediaURL
|
1795
|
+
|
1796
|
+
mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
|
1797
|
+
if mobj is None:
|
1798
|
+
self._downloader.trouble(u'ERROR: unable to extract title')
|
1799
|
+
return
|
1800
|
+
video_title = mobj.group(1).decode('utf-8')
|
1801
|
+
video_title = sanitize_title(video_title)
|
1802
|
+
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
|
1803
|
+
|
1804
|
+
video_uploader = mobj.group(2).decode('utf-8')
|
1805
|
+
|
1806
|
+
try:
|
1807
|
+
# Process video information
|
1808
|
+
self._downloader.process_info({
|
1809
|
+
'id': video_id.decode('utf-8'),
|
1810
|
+
'url': video_url.decode('utf-8'),
|
1811
|
+
'uploader': video_uploader,
|
1812
|
+
'upload_date': u'NA',
|
1813
|
+
'title': video_title,
|
1814
|
+
'stitle': simple_title,
|
1815
|
+
'ext': video_extension.decode('utf-8'),
|
1816
|
+
'format': u'NA',
|
1817
|
+
'player_url': None,
|
1818
|
+
})
|
1819
|
+
except UnavailableVideoError:
|
1820
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
1821
|
+
|
1822
|
+
|
1823
|
+
class YahooIE(InfoExtractor):
|
1824
|
+
"""Information extractor for video.yahoo.com."""
|
1825
|
+
|
1826
|
+
# _VALID_URL matches all Yahoo! Video URLs
|
1827
|
+
# _VPAGE_URL matches only the extractable '/watch/' URLs
|
1828
|
+
_VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
|
1829
|
+
_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
|
1830
|
+
IE_NAME = u'video.yahoo'
|
1831
|
+
|
1832
|
+
def __init__(self, downloader=None):
|
1833
|
+
InfoExtractor.__init__(self, downloader)
|
1834
|
+
|
1835
|
+
def report_download_webpage(self, video_id):
|
1836
|
+
"""Report webpage download."""
|
1837
|
+
self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
|
1838
|
+
|
1839
|
+
def report_extraction(self, video_id):
|
1840
|
+
"""Report information extraction."""
|
1841
|
+
self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
|
1842
|
+
|
1843
|
+
def _real_initialize(self):
|
1844
|
+
return
|
1845
|
+
|
1846
|
+
def _real_extract(self, url, new_video=True):
|
1847
|
+
# Extract ID from URL
|
1848
|
+
mobj = re.match(self._VALID_URL, url)
|
1849
|
+
if mobj is None:
|
1850
|
+
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
1851
|
+
return
|
1852
|
+
|
1853
|
+
# At this point we have a new video
|
1854
|
+
self._downloader.increment_downloads()
|
1855
|
+
video_id = mobj.group(2)
|
1856
|
+
video_extension = 'flv'
|
1857
|
+
|
1858
|
+
# Rewrite valid but non-extractable URLs as
|
1859
|
+
# extractable English language /watch/ URLs
|
1860
|
+
if re.match(self._VPAGE_URL, url) is None:
|
1861
|
+
request = urllib2.Request(url)
|
1862
|
+
try:
|
1863
|
+
webpage = urllib2.urlopen(request).read()
|
1864
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1865
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
|
1866
|
+
return
|
1867
|
+
|
1868
|
+
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
|
1869
|
+
if mobj is None:
|
1870
|
+
self._downloader.trouble(u'ERROR: Unable to extract id field')
|
1871
|
+
return
|
1872
|
+
yahoo_id = mobj.group(1)
|
1873
|
+
|
1874
|
+
mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
|
1875
|
+
if mobj is None:
|
1876
|
+
self._downloader.trouble(u'ERROR: Unable to extract vid field')
|
1877
|
+
return
|
1878
|
+
yahoo_vid = mobj.group(1)
|
1879
|
+
|
1880
|
+
url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
|
1881
|
+
return self._real_extract(url, new_video=False)
|
1882
|
+
|
1883
|
+
# Retrieve video webpage to extract further information
|
1884
|
+
request = urllib2.Request(url)
|
1885
|
+
try:
|
1886
|
+
self.report_download_webpage(video_id)
|
1887
|
+
webpage = urllib2.urlopen(request).read()
|
1888
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1889
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
|
1890
|
+
return
|
1891
|
+
|
1892
|
+
# Extract uploader and title from webpage
|
1893
|
+
self.report_extraction(video_id)
|
1894
|
+
mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
|
1895
|
+
if mobj is None:
|
1896
|
+
self._downloader.trouble(u'ERROR: unable to extract video title')
|
1897
|
+
return
|
1898
|
+
video_title = mobj.group(1).decode('utf-8')
|
1899
|
+
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
|
1900
|
+
|
1901
|
+
mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
|
1902
|
+
if mobj is None:
|
1903
|
+
self._downloader.trouble(u'ERROR: unable to extract video uploader')
|
1904
|
+
return
|
1905
|
+
video_uploader = mobj.group(1).decode('utf-8')
|
1906
|
+
|
1907
|
+
# Extract video thumbnail
|
1908
|
+
mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
|
1909
|
+
if mobj is None:
|
1910
|
+
self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
|
1911
|
+
return
|
1912
|
+
video_thumbnail = mobj.group(1).decode('utf-8')
|
1913
|
+
|
1914
|
+
# Extract video description
|
1915
|
+
mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
|
1916
|
+
if mobj is None:
|
1917
|
+
self._downloader.trouble(u'ERROR: unable to extract video description')
|
1918
|
+
return
|
1919
|
+
video_description = mobj.group(1).decode('utf-8')
|
1920
|
+
if not video_description:
|
1921
|
+
video_description = 'No description available.'
|
1922
|
+
|
1923
|
+
# Extract video height and width
|
1924
|
+
mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
|
1925
|
+
if mobj is None:
|
1926
|
+
self._downloader.trouble(u'ERROR: unable to extract video height')
|
1927
|
+
return
|
1928
|
+
yv_video_height = mobj.group(1)
|
1929
|
+
|
1930
|
+
mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
|
1931
|
+
if mobj is None:
|
1932
|
+
self._downloader.trouble(u'ERROR: unable to extract video width')
|
1933
|
+
return
|
1934
|
+
yv_video_width = mobj.group(1)
|
1935
|
+
|
1936
|
+
# Retrieve video playlist to extract media URL
|
1937
|
+
# I'm not completely sure what all these options are, but we
|
1938
|
+
# seem to need most of them, otherwise the server sends a 401.
|
1939
|
+
yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
|
1940
|
+
yv_bitrate = '700' # according to Wikipedia this is hard-coded
|
1941
|
+
request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
|
1942
|
+
'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
|
1943
|
+
'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
|
1944
|
+
try:
|
1945
|
+
self.report_download_webpage(video_id)
|
1946
|
+
webpage = urllib2.urlopen(request).read()
|
1947
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
1948
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
|
1949
|
+
return
|
1950
|
+
|
1951
|
+
# Extract media URL from playlist XML
|
1952
|
+
mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
|
1953
|
+
if mobj is None:
|
1954
|
+
self._downloader.trouble(u'ERROR: Unable to extract media URL')
|
1955
|
+
return
|
1956
|
+
video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
|
1957
|
+
video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
|
1958
|
+
|
1959
|
+
try:
|
1960
|
+
# Process video information
|
1961
|
+
self._downloader.process_info({
|
1962
|
+
'id': video_id.decode('utf-8'),
|
1963
|
+
'url': video_url,
|
1964
|
+
'uploader': video_uploader,
|
1965
|
+
'upload_date': u'NA',
|
1966
|
+
'title': video_title,
|
1967
|
+
'stitle': simple_title,
|
1968
|
+
'ext': video_extension.decode('utf-8'),
|
1969
|
+
'thumbnail': video_thumbnail.decode('utf-8'),
|
1970
|
+
'description': video_description,
|
1971
|
+
'thumbnail': video_thumbnail,
|
1972
|
+
'player_url': None,
|
1973
|
+
})
|
1974
|
+
except UnavailableVideoError:
|
1975
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
1976
|
+
|
1977
|
+
|
1978
|
+
class VimeoIE(InfoExtractor):
|
1979
|
+
"""Information extractor for vimeo.com."""
|
1980
|
+
|
1981
|
+
# _VALID_URL matches Vimeo URLs
|
1982
|
+
_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
|
1983
|
+
IE_NAME = u'vimeo'
|
1984
|
+
|
1985
|
+
def __init__(self, downloader=None):
|
1986
|
+
InfoExtractor.__init__(self, downloader)
|
1987
|
+
|
1988
|
+
def report_download_webpage(self, video_id):
|
1989
|
+
"""Report webpage download."""
|
1990
|
+
self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
|
1991
|
+
|
1992
|
+
def report_extraction(self, video_id):
|
1993
|
+
"""Report information extraction."""
|
1994
|
+
self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
|
1995
|
+
|
1996
|
+
def _real_initialize(self):
|
1997
|
+
return
|
1998
|
+
|
1999
|
+
def _real_extract(self, url, new_video=True):
|
2000
|
+
# Extract ID from URL
|
2001
|
+
mobj = re.match(self._VALID_URL, url)
|
2002
|
+
if mobj is None:
|
2003
|
+
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
2004
|
+
return
|
2005
|
+
|
2006
|
+
# At this point we have a new video
|
2007
|
+
self._downloader.increment_downloads()
|
2008
|
+
video_id = mobj.group(1)
|
2009
|
+
|
2010
|
+
# Retrieve video webpage to extract further information
|
2011
|
+
request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
|
2012
|
+
try:
|
2013
|
+
self.report_download_webpage(video_id)
|
2014
|
+
webpage = urllib2.urlopen(request).read()
|
2015
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2016
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
|
2017
|
+
return
|
2018
|
+
|
2019
|
+
# Now we begin extracting as much information as we can from what we
|
2020
|
+
# retrieved. First we extract the information common to all extractors,
|
2021
|
+
# and latter we extract those that are Vimeo specific.
|
2022
|
+
self.report_extraction(video_id)
|
2023
|
+
|
2024
|
+
# Extract title
|
2025
|
+
mobj = re.search(r'<caption>(.*?)</caption>', webpage)
|
2026
|
+
if mobj is None:
|
2027
|
+
self._downloader.trouble(u'ERROR: unable to extract video title')
|
2028
|
+
return
|
2029
|
+
video_title = mobj.group(1).decode('utf-8')
|
2030
|
+
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
|
2031
|
+
|
2032
|
+
# Extract uploader
|
2033
|
+
mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
|
2034
|
+
if mobj is None:
|
2035
|
+
self._downloader.trouble(u'ERROR: unable to extract video uploader')
|
2036
|
+
return
|
2037
|
+
video_uploader = mobj.group(1).decode('utf-8')
|
2038
|
+
|
2039
|
+
# Extract video thumbnail
|
2040
|
+
mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
|
2041
|
+
if mobj is None:
|
2042
|
+
self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
|
2043
|
+
return
|
2044
|
+
video_thumbnail = mobj.group(1).decode('utf-8')
|
2045
|
+
|
2046
|
+
# # Extract video description
|
2047
|
+
# mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
|
2048
|
+
# if mobj is None:
|
2049
|
+
# self._downloader.trouble(u'ERROR: unable to extract video description')
|
2050
|
+
# return
|
2051
|
+
# video_description = mobj.group(1).decode('utf-8')
|
2052
|
+
# if not video_description: video_description = 'No description available.'
|
2053
|
+
video_description = 'Foo.'
|
2054
|
+
|
2055
|
+
# Vimeo specific: extract request signature
|
2056
|
+
mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
|
2057
|
+
if mobj is None:
|
2058
|
+
self._downloader.trouble(u'ERROR: unable to extract request signature')
|
2059
|
+
return
|
2060
|
+
sig = mobj.group(1).decode('utf-8')
|
2061
|
+
|
2062
|
+
# Vimeo specific: extract video quality information
|
2063
|
+
mobj = re.search(r'<isHD>(\d+)</isHD>', webpage)
|
2064
|
+
if mobj is None:
|
2065
|
+
self._downloader.trouble(u'ERROR: unable to extract video quality information')
|
2066
|
+
return
|
2067
|
+
quality = mobj.group(1).decode('utf-8')
|
2068
|
+
|
2069
|
+
if int(quality) == 1:
|
2070
|
+
quality = 'hd'
|
2071
|
+
else:
|
2072
|
+
quality = 'sd'
|
2073
|
+
|
2074
|
+
# Vimeo specific: Extract request signature expiration
|
2075
|
+
mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
|
2076
|
+
if mobj is None:
|
2077
|
+
self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
|
2078
|
+
return
|
2079
|
+
sig_exp = mobj.group(1).decode('utf-8')
|
2080
|
+
|
2081
|
+
video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id, sig, sig_exp, quality)
|
2082
|
+
|
2083
|
+
try:
|
2084
|
+
# Process video information
|
2085
|
+
self._downloader.process_info({
|
2086
|
+
'id': video_id.decode('utf-8'),
|
2087
|
+
'url': video_url,
|
2088
|
+
'uploader': video_uploader,
|
2089
|
+
'upload_date': u'NA',
|
2090
|
+
'title': video_title,
|
2091
|
+
'stitle': simple_title,
|
2092
|
+
'ext': u'mp4',
|
2093
|
+
'thumbnail': video_thumbnail.decode('utf-8'),
|
2094
|
+
'description': video_description,
|
2095
|
+
'thumbnail': video_thumbnail,
|
2096
|
+
'description': video_description,
|
2097
|
+
'player_url': None,
|
2098
|
+
})
|
2099
|
+
except UnavailableVideoError:
|
2100
|
+
self._downloader.trouble(u'ERROR: unable to download video')
|
2101
|
+
|
2102
|
+
|
2103
|
+
class GenericIE(InfoExtractor):
|
2104
|
+
"""Generic last-resort information extractor."""
|
2105
|
+
|
2106
|
+
_VALID_URL = r'.*'
|
2107
|
+
IE_NAME = u'generic'
|
2108
|
+
|
2109
|
+
def __init__(self, downloader=None):
|
2110
|
+
InfoExtractor.__init__(self, downloader)
|
2111
|
+
|
2112
|
+
def report_download_webpage(self, video_id):
|
2113
|
+
"""Report webpage download."""
|
2114
|
+
self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
|
2115
|
+
self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
|
2116
|
+
|
2117
|
+
def report_extraction(self, video_id):
|
2118
|
+
"""Report information extraction."""
|
2119
|
+
self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
|
2120
|
+
|
2121
|
+
def _real_initialize(self):
|
2122
|
+
return
|
2123
|
+
|
2124
|
+
def _real_extract(self, url):
|
2125
|
+
# At this point we have a new video
|
2126
|
+
self._downloader.increment_downloads()
|
2127
|
+
|
2128
|
+
video_id = url.split('/')[-1]
|
2129
|
+
request = urllib2.Request(url)
|
2130
|
+
try:
|
2131
|
+
self.report_download_webpage(video_id)
|
2132
|
+
webpage = urllib2.urlopen(request).read()
|
2133
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2134
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
|
2135
|
+
return
|
2136
|
+
except ValueError, err:
|
2137
|
+
# since this is the last-resort InfoExtractor, if
|
2138
|
+
# this error is thrown, it'll be thrown here
|
2139
|
+
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
2140
|
+
return
|
2141
|
+
|
2142
|
+
self.report_extraction(video_id)
|
2143
|
+
# Start with something easy: JW Player in SWFObject
|
2144
|
+
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
2145
|
+
if mobj is None:
|
2146
|
+
# Broaden the search a little bit
|
2147
|
+
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
2148
|
+
if mobj is None:
|
2149
|
+
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
2150
|
+
return
|
2151
|
+
|
2152
|
+
# It's possible that one of the regexes
|
2153
|
+
# matched, but returned an empty group:
|
2154
|
+
if mobj.group(1) is None:
|
2155
|
+
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
|
2156
|
+
return
|
2157
|
+
|
2158
|
+
video_url = urllib.unquote(mobj.group(1))
|
2159
|
+
video_id = os.path.basename(video_url)
|
2160
|
+
|
2161
|
+
# here's a fun little line of code for you:
|
2162
|
+
video_extension = os.path.splitext(video_id)[1][1:]
|
2163
|
+
video_id = os.path.splitext(video_id)[0]
|
2164
|
+
|
2165
|
+
# it's tempting to parse this further, but you would
|
2166
|
+
# have to take into account all the variations like
|
2167
|
+
# Video Title - Site Name
|
2168
|
+
# Site Name | Video Title
|
2169
|
+
# Video Title - Tagline | Site Name
|
2170
|
+
# and so on and so forth; it's just not practical
|
2171
|
+
mobj = re.search(r'<title>(.*)</title>', webpage)
|
2172
|
+
if mobj is None:
|
2173
|
+
self._downloader.trouble(u'ERROR: unable to extract title')
|
2174
|
+
return
|
2175
|
+
video_title = mobj.group(1).decode('utf-8')
|
2176
|
+
video_title = sanitize_title(video_title)
|
2177
|
+
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
|
2178
|
+
|
2179
|
+
# video uploader is domain name
|
2180
|
+
mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
|
2181
|
+
if mobj is None:
|
2182
|
+
self._downloader.trouble(u'ERROR: unable to extract title')
|
2183
|
+
return
|
2184
|
+
video_uploader = mobj.group(1).decode('utf-8')
|
2185
|
+
|
2186
|
+
try:
|
2187
|
+
# Process video information
|
2188
|
+
self._downloader.process_info({
|
2189
|
+
'id': video_id.decode('utf-8'),
|
2190
|
+
'url': video_url.decode('utf-8'),
|
2191
|
+
'uploader': video_uploader,
|
2192
|
+
'upload_date': u'NA',
|
2193
|
+
'title': video_title,
|
2194
|
+
'stitle': simple_title,
|
2195
|
+
'ext': video_extension.decode('utf-8'),
|
2196
|
+
'format': u'NA',
|
2197
|
+
'player_url': None,
|
2198
|
+
})
|
2199
|
+
except UnavailableVideoError, err:
|
2200
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
2201
|
+
|
2202
|
+
|
2203
|
+
class YoutubeSearchIE(InfoExtractor):
|
2204
|
+
"""Information Extractor for YouTube search queries."""
|
2205
|
+
_VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
|
2206
|
+
_TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
|
2207
|
+
_VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
|
2208
|
+
_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
|
2209
|
+
_youtube_ie = None
|
2210
|
+
_max_youtube_results = 1000
|
2211
|
+
IE_NAME = u'youtube:search'
|
2212
|
+
|
2213
|
+
def __init__(self, youtube_ie, downloader=None):
|
2214
|
+
InfoExtractor.__init__(self, downloader)
|
2215
|
+
self._youtube_ie = youtube_ie
|
2216
|
+
|
2217
|
+
def report_download_page(self, query, pagenum):
|
2218
|
+
"""Report attempt to download playlist page with given number."""
|
2219
|
+
query = query.decode(preferredencoding())
|
2220
|
+
self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
|
2221
|
+
|
2222
|
+
def _real_initialize(self):
|
2223
|
+
self._youtube_ie.initialize()
|
2224
|
+
|
2225
|
+
def _real_extract(self, query):
|
2226
|
+
mobj = re.match(self._VALID_URL, query)
|
2227
|
+
if mobj is None:
|
2228
|
+
self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
|
2229
|
+
return
|
2230
|
+
|
2231
|
+
prefix, query = query.split(':')
|
2232
|
+
prefix = prefix[8:]
|
2233
|
+
query = query.encode('utf-8')
|
2234
|
+
if prefix == '':
|
2235
|
+
self._download_n_results(query, 1)
|
2236
|
+
return
|
2237
|
+
elif prefix == 'all':
|
2238
|
+
self._download_n_results(query, self._max_youtube_results)
|
2239
|
+
return
|
2240
|
+
else:
|
2241
|
+
try:
|
2242
|
+
n = long(prefix)
|
2243
|
+
if n <= 0:
|
2244
|
+
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
|
2245
|
+
return
|
2246
|
+
elif n > self._max_youtube_results:
|
2247
|
+
self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
|
2248
|
+
n = self._max_youtube_results
|
2249
|
+
self._download_n_results(query, n)
|
2250
|
+
return
|
2251
|
+
except ValueError: # parsing prefix as integer fails
|
2252
|
+
self._download_n_results(query, 1)
|
2253
|
+
return
|
2254
|
+
|
2255
|
+
def _download_n_results(self, query, n):
|
2256
|
+
"""Downloads a specified number of results for a query"""
|
2257
|
+
|
2258
|
+
video_ids = []
|
2259
|
+
already_seen = set()
|
2260
|
+
pagenum = 1
|
2261
|
+
|
2262
|
+
while True:
|
2263
|
+
self.report_download_page(query, pagenum)
|
2264
|
+
result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
|
2265
|
+
request = urllib2.Request(result_url)
|
2266
|
+
try:
|
2267
|
+
page = urllib2.urlopen(request).read()
|
2268
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2269
|
+
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
|
2270
|
+
return
|
2271
|
+
|
2272
|
+
# Extract video identifiers
|
2273
|
+
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
|
2274
|
+
video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
|
2275
|
+
if video_id not in already_seen:
|
2276
|
+
video_ids.append(video_id)
|
2277
|
+
already_seen.add(video_id)
|
2278
|
+
if len(video_ids) == n:
|
2279
|
+
# Specified n videos reached
|
2280
|
+
for id in video_ids:
|
2281
|
+
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
|
2282
|
+
return
|
2283
|
+
|
2284
|
+
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
2285
|
+
for id in video_ids:
|
2286
|
+
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
|
2287
|
+
return
|
2288
|
+
|
2289
|
+
pagenum = pagenum + 1
|
2290
|
+
|
2291
|
+
|
2292
|
+
class GoogleSearchIE(InfoExtractor):
|
2293
|
+
"""Information Extractor for Google Video search queries."""
|
2294
|
+
_VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
|
2295
|
+
_TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
|
2296
|
+
_VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
|
2297
|
+
_MORE_PAGES_INDICATOR = r'<span>Next</span>'
|
2298
|
+
_google_ie = None
|
2299
|
+
_max_google_results = 1000
|
2300
|
+
IE_NAME = u'video.google:search'
|
2301
|
+
|
2302
|
+
def __init__(self, google_ie, downloader=None):
|
2303
|
+
InfoExtractor.__init__(self, downloader)
|
2304
|
+
self._google_ie = google_ie
|
2305
|
+
|
2306
|
+
def report_download_page(self, query, pagenum):
|
2307
|
+
"""Report attempt to download playlist page with given number."""
|
2308
|
+
query = query.decode(preferredencoding())
|
2309
|
+
self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
|
2310
|
+
|
2311
|
+
def _real_initialize(self):
|
2312
|
+
self._google_ie.initialize()
|
2313
|
+
|
2314
|
+
def _real_extract(self, query):
|
2315
|
+
mobj = re.match(self._VALID_URL, query)
|
2316
|
+
if mobj is None:
|
2317
|
+
self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
|
2318
|
+
return
|
2319
|
+
|
2320
|
+
prefix, query = query.split(':')
|
2321
|
+
prefix = prefix[8:]
|
2322
|
+
query = query.encode('utf-8')
|
2323
|
+
if prefix == '':
|
2324
|
+
self._download_n_results(query, 1)
|
2325
|
+
return
|
2326
|
+
elif prefix == 'all':
|
2327
|
+
self._download_n_results(query, self._max_google_results)
|
2328
|
+
return
|
2329
|
+
else:
|
2330
|
+
try:
|
2331
|
+
n = long(prefix)
|
2332
|
+
if n <= 0:
|
2333
|
+
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
|
2334
|
+
return
|
2335
|
+
elif n > self._max_google_results:
|
2336
|
+
self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
|
2337
|
+
n = self._max_google_results
|
2338
|
+
self._download_n_results(query, n)
|
2339
|
+
return
|
2340
|
+
except ValueError: # parsing prefix as integer fails
|
2341
|
+
self._download_n_results(query, 1)
|
2342
|
+
return
|
2343
|
+
|
2344
|
+
def _download_n_results(self, query, n):
|
2345
|
+
"""Downloads a specified number of results for a query"""
|
2346
|
+
|
2347
|
+
video_ids = []
|
2348
|
+
already_seen = set()
|
2349
|
+
pagenum = 1
|
2350
|
+
|
2351
|
+
while True:
|
2352
|
+
self.report_download_page(query, pagenum)
|
2353
|
+
result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
|
2354
|
+
request = urllib2.Request(result_url)
|
2355
|
+
try:
|
2356
|
+
page = urllib2.urlopen(request).read()
|
2357
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2358
|
+
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
|
2359
|
+
return
|
2360
|
+
|
2361
|
+
# Extract video identifiers
|
2362
|
+
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
|
2363
|
+
video_id = mobj.group(1)
|
2364
|
+
if video_id not in already_seen:
|
2365
|
+
video_ids.append(video_id)
|
2366
|
+
already_seen.add(video_id)
|
2367
|
+
if len(video_ids) == n:
|
2368
|
+
# Specified n videos reached
|
2369
|
+
for id in video_ids:
|
2370
|
+
self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
|
2371
|
+
return
|
2372
|
+
|
2373
|
+
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
2374
|
+
for id in video_ids:
|
2375
|
+
self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
|
2376
|
+
return
|
2377
|
+
|
2378
|
+
pagenum = pagenum + 1
|
2379
|
+
|
2380
|
+
|
2381
|
+
class YahooSearchIE(InfoExtractor):
|
2382
|
+
"""Information Extractor for Yahoo! Video search queries."""
|
2383
|
+
_VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
|
2384
|
+
_TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
|
2385
|
+
_VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
|
2386
|
+
_MORE_PAGES_INDICATOR = r'\s*Next'
|
2387
|
+
_yahoo_ie = None
|
2388
|
+
_max_yahoo_results = 1000
|
2389
|
+
IE_NAME = u'video.yahoo:search'
|
2390
|
+
|
2391
|
+
def __init__(self, yahoo_ie, downloader=None):
|
2392
|
+
InfoExtractor.__init__(self, downloader)
|
2393
|
+
self._yahoo_ie = yahoo_ie
|
2394
|
+
|
2395
|
+
def report_download_page(self, query, pagenum):
|
2396
|
+
"""Report attempt to download playlist page with given number."""
|
2397
|
+
query = query.decode(preferredencoding())
|
2398
|
+
self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
|
2399
|
+
|
2400
|
+
def _real_initialize(self):
|
2401
|
+
self._yahoo_ie.initialize()
|
2402
|
+
|
2403
|
+
def _real_extract(self, query):
|
2404
|
+
mobj = re.match(self._VALID_URL, query)
|
2405
|
+
if mobj is None:
|
2406
|
+
self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
|
2407
|
+
return
|
2408
|
+
|
2409
|
+
prefix, query = query.split(':')
|
2410
|
+
prefix = prefix[8:]
|
2411
|
+
query = query.encode('utf-8')
|
2412
|
+
if prefix == '':
|
2413
|
+
self._download_n_results(query, 1)
|
2414
|
+
return
|
2415
|
+
elif prefix == 'all':
|
2416
|
+
self._download_n_results(query, self._max_yahoo_results)
|
2417
|
+
return
|
2418
|
+
else:
|
2419
|
+
try:
|
2420
|
+
n = long(prefix)
|
2421
|
+
if n <= 0:
|
2422
|
+
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
|
2423
|
+
return
|
2424
|
+
elif n > self._max_yahoo_results:
|
2425
|
+
self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
|
2426
|
+
n = self._max_yahoo_results
|
2427
|
+
self._download_n_results(query, n)
|
2428
|
+
return
|
2429
|
+
except ValueError: # parsing prefix as integer fails
|
2430
|
+
self._download_n_results(query, 1)
|
2431
|
+
return
|
2432
|
+
|
2433
|
+
def _download_n_results(self, query, n):
|
2434
|
+
"""Downloads a specified number of results for a query"""
|
2435
|
+
|
2436
|
+
video_ids = []
|
2437
|
+
already_seen = set()
|
2438
|
+
pagenum = 1
|
2439
|
+
|
2440
|
+
while True:
|
2441
|
+
self.report_download_page(query, pagenum)
|
2442
|
+
result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
|
2443
|
+
request = urllib2.Request(result_url)
|
2444
|
+
try:
|
2445
|
+
page = urllib2.urlopen(request).read()
|
2446
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2447
|
+
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
|
2448
|
+
return
|
2449
|
+
|
2450
|
+
# Extract video identifiers
|
2451
|
+
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
|
2452
|
+
video_id = mobj.group(1)
|
2453
|
+
if video_id not in already_seen:
|
2454
|
+
video_ids.append(video_id)
|
2455
|
+
already_seen.add(video_id)
|
2456
|
+
if len(video_ids) == n:
|
2457
|
+
# Specified n videos reached
|
2458
|
+
for id in video_ids:
|
2459
|
+
self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
|
2460
|
+
return
|
2461
|
+
|
2462
|
+
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
2463
|
+
for id in video_ids:
|
2464
|
+
self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
|
2465
|
+
return
|
2466
|
+
|
2467
|
+
pagenum = pagenum + 1
|
2468
|
+
|
2469
|
+
|
2470
|
+
class YoutubePlaylistIE(InfoExtractor):
|
2471
|
+
"""Information Extractor for YouTube playlists."""
|
2472
|
+
|
2473
|
+
_VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
|
2474
|
+
_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
|
2475
|
+
_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
|
2476
|
+
_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
|
2477
|
+
_youtube_ie = None
|
2478
|
+
IE_NAME = u'youtube:playlist'
|
2479
|
+
|
2480
|
+
def __init__(self, youtube_ie, downloader=None):
|
2481
|
+
InfoExtractor.__init__(self, downloader)
|
2482
|
+
self._youtube_ie = youtube_ie
|
2483
|
+
|
2484
|
+
def report_download_page(self, playlist_id, pagenum):
|
2485
|
+
"""Report attempt to download playlist page with given number."""
|
2486
|
+
self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
|
2487
|
+
|
2488
|
+
def _real_initialize(self):
|
2489
|
+
self._youtube_ie.initialize()
|
2490
|
+
|
2491
|
+
def _real_extract(self, url):
|
2492
|
+
# Extract playlist id
|
2493
|
+
mobj = re.match(self._VALID_URL, url)
|
2494
|
+
if mobj is None:
|
2495
|
+
self._downloader.trouble(u'ERROR: invalid url: %s' % url)
|
2496
|
+
return
|
2497
|
+
|
2498
|
+
# Single video case
|
2499
|
+
if mobj.group(3) is not None:
|
2500
|
+
self._youtube_ie.extract(mobj.group(3))
|
2501
|
+
return
|
2502
|
+
|
2503
|
+
# Download playlist pages
|
2504
|
+
# prefix is 'p' as default for playlists but there are other types that need extra care
|
2505
|
+
playlist_prefix = mobj.group(1)
|
2506
|
+
if playlist_prefix == 'a':
|
2507
|
+
playlist_access = 'artist'
|
2508
|
+
else:
|
2509
|
+
playlist_prefix = 'p'
|
2510
|
+
playlist_access = 'view_play_list'
|
2511
|
+
playlist_id = mobj.group(2)
|
2512
|
+
video_ids = []
|
2513
|
+
pagenum = 1
|
2514
|
+
|
2515
|
+
while True:
|
2516
|
+
self.report_download_page(playlist_id, pagenum)
|
2517
|
+
request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
|
2518
|
+
try:
|
2519
|
+
page = urllib2.urlopen(request).read()
|
2520
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2521
|
+
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
|
2522
|
+
return
|
2523
|
+
|
2524
|
+
# Extract video identifiers
|
2525
|
+
ids_in_page = []
|
2526
|
+
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
|
2527
|
+
if mobj.group(1) not in ids_in_page:
|
2528
|
+
ids_in_page.append(mobj.group(1))
|
2529
|
+
video_ids.extend(ids_in_page)
|
2530
|
+
|
2531
|
+
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
2532
|
+
break
|
2533
|
+
pagenum = pagenum + 1
|
2534
|
+
|
2535
|
+
playliststart = self._downloader.params.get('playliststart', 1) - 1
|
2536
|
+
playlistend = self._downloader.params.get('playlistend', -1)
|
2537
|
+
video_ids = video_ids[playliststart:playlistend]
|
2538
|
+
|
2539
|
+
for id in video_ids:
|
2540
|
+
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
|
2541
|
+
return
|
2542
|
+
|
2543
|
+
|
2544
|
+
class YoutubeUserIE(InfoExtractor):
|
2545
|
+
"""Information Extractor for YouTube users."""
|
2546
|
+
|
2547
|
+
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
|
2548
|
+
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
|
2549
|
+
_GDATA_PAGE_SIZE = 50
|
2550
|
+
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
|
2551
|
+
_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
|
2552
|
+
_youtube_ie = None
|
2553
|
+
IE_NAME = u'youtube:user'
|
2554
|
+
|
2555
|
+
def __init__(self, youtube_ie, downloader=None):
|
2556
|
+
InfoExtractor.__init__(self, downloader)
|
2557
|
+
self._youtube_ie = youtube_ie
|
2558
|
+
|
2559
|
+
def report_download_page(self, username, start_index):
|
2560
|
+
"""Report attempt to download user page."""
|
2561
|
+
self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
|
2562
|
+
(username, start_index, start_index + self._GDATA_PAGE_SIZE))
|
2563
|
+
|
2564
|
+
def _real_initialize(self):
|
2565
|
+
self._youtube_ie.initialize()
|
2566
|
+
|
2567
|
+
def _real_extract(self, url):
|
2568
|
+
# Extract username
|
2569
|
+
mobj = re.match(self._VALID_URL, url)
|
2570
|
+
if mobj is None:
|
2571
|
+
self._downloader.trouble(u'ERROR: invalid url: %s' % url)
|
2572
|
+
return
|
2573
|
+
|
2574
|
+
username = mobj.group(1)
|
2575
|
+
|
2576
|
+
# Download video ids using YouTube Data API. Result size per
|
2577
|
+
# query is limited (currently to 50 videos) so we need to query
|
2578
|
+
# page by page until there are no video ids - it means we got
|
2579
|
+
# all of them.
|
2580
|
+
|
2581
|
+
video_ids = []
|
2582
|
+
pagenum = 0
|
2583
|
+
|
2584
|
+
while True:
|
2585
|
+
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
|
2586
|
+
self.report_download_page(username, start_index)
|
2587
|
+
|
2588
|
+
request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
|
2589
|
+
|
2590
|
+
try:
|
2591
|
+
page = urllib2.urlopen(request).read()
|
2592
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2593
|
+
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
|
2594
|
+
return
|
2595
|
+
|
2596
|
+
# Extract video identifiers
|
2597
|
+
ids_in_page = []
|
2598
|
+
|
2599
|
+
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
|
2600
|
+
if mobj.group(1) not in ids_in_page:
|
2601
|
+
ids_in_page.append(mobj.group(1))
|
2602
|
+
|
2603
|
+
video_ids.extend(ids_in_page)
|
2604
|
+
|
2605
|
+
# A little optimization - if current page is not
|
2606
|
+
# "full", ie. does not contain PAGE_SIZE video ids then
|
2607
|
+
# we can assume that this page is the last one - there
|
2608
|
+
# are no more ids on further pages - no need to query
|
2609
|
+
# again.
|
2610
|
+
|
2611
|
+
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
2612
|
+
break
|
2613
|
+
|
2614
|
+
pagenum += 1
|
2615
|
+
|
2616
|
+
all_ids_count = len(video_ids)
|
2617
|
+
playliststart = self._downloader.params.get('playliststart', 1) - 1
|
2618
|
+
playlistend = self._downloader.params.get('playlistend', -1)
|
2619
|
+
|
2620
|
+
if playlistend == -1:
|
2621
|
+
video_ids = video_ids[playliststart:]
|
2622
|
+
else:
|
2623
|
+
video_ids = video_ids[playliststart:playlistend]
|
2624
|
+
|
2625
|
+
self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
|
2626
|
+
(username, all_ids_count, len(video_ids)))
|
2627
|
+
|
2628
|
+
for video_id in video_ids:
|
2629
|
+
self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
|
2630
|
+
|
2631
|
+
|
2632
|
+
class DepositFilesIE(InfoExtractor):
|
2633
|
+
"""Information extractor for depositfiles.com"""
|
2634
|
+
|
2635
|
+
_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
|
2636
|
+
IE_NAME = u'DepositFiles'
|
2637
|
+
|
2638
|
+
def __init__(self, downloader=None):
|
2639
|
+
InfoExtractor.__init__(self, downloader)
|
2640
|
+
|
2641
|
+
def report_download_webpage(self, file_id):
|
2642
|
+
"""Report webpage download."""
|
2643
|
+
self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
|
2644
|
+
|
2645
|
+
def report_extraction(self, file_id):
|
2646
|
+
"""Report information extraction."""
|
2647
|
+
self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
|
2648
|
+
|
2649
|
+
def _real_initialize(self):
|
2650
|
+
return
|
2651
|
+
|
2652
|
+
def _real_extract(self, url):
|
2653
|
+
# At this point we have a new file
|
2654
|
+
self._downloader.increment_downloads()
|
2655
|
+
|
2656
|
+
file_id = url.split('/')[-1]
|
2657
|
+
# Rebuild url in english locale
|
2658
|
+
url = 'http://depositfiles.com/en/files/' + file_id
|
2659
|
+
|
2660
|
+
# Retrieve file webpage with 'Free download' button pressed
|
2661
|
+
free_download_indication = { 'gateway_result' : '1' }
|
2662
|
+
request = urllib2.Request(url, urllib.urlencode(free_download_indication))
|
2663
|
+
try:
|
2664
|
+
self.report_download_webpage(file_id)
|
2665
|
+
webpage = urllib2.urlopen(request).read()
|
2666
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2667
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
|
2668
|
+
return
|
2669
|
+
|
2670
|
+
# Search for the real file URL
|
2671
|
+
mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
|
2672
|
+
if (mobj is None) or (mobj.group(1) is None):
|
2673
|
+
# Try to figure out reason of the error.
|
2674
|
+
mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
|
2675
|
+
if (mobj is not None) and (mobj.group(1) is not None):
|
2676
|
+
restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
|
2677
|
+
self._downloader.trouble(u'ERROR: %s' % restriction_message)
|
2678
|
+
else:
|
2679
|
+
self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
|
2680
|
+
return
|
2681
|
+
|
2682
|
+
file_url = mobj.group(1)
|
2683
|
+
file_extension = os.path.splitext(file_url)[1][1:]
|
2684
|
+
|
2685
|
+
# Search for file title
|
2686
|
+
mobj = re.search(r'<b title="(.*?)">', webpage)
|
2687
|
+
if mobj is None:
|
2688
|
+
self._downloader.trouble(u'ERROR: unable to extract title')
|
2689
|
+
return
|
2690
|
+
file_title = mobj.group(1).decode('utf-8')
|
2691
|
+
|
2692
|
+
try:
|
2693
|
+
# Process file information
|
2694
|
+
self._downloader.process_info({
|
2695
|
+
'id': file_id.decode('utf-8'),
|
2696
|
+
'url': file_url.decode('utf-8'),
|
2697
|
+
'uploader': u'NA',
|
2698
|
+
'upload_date': u'NA',
|
2699
|
+
'title': file_title,
|
2700
|
+
'stitle': file_title,
|
2701
|
+
'ext': file_extension.decode('utf-8'),
|
2702
|
+
'format': u'NA',
|
2703
|
+
'player_url': None,
|
2704
|
+
})
|
2705
|
+
except UnavailableVideoError, err:
|
2706
|
+
self._downloader.trouble(u'ERROR: unable to download file')
|
2707
|
+
|
2708
|
+
|
2709
|
+
class FacebookIE(InfoExtractor):
|
2710
|
+
"""Information Extractor for Facebook"""
|
2711
|
+
|
2712
|
+
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/video/video\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
2713
|
+
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
|
2714
|
+
_NETRC_MACHINE = 'facebook'
|
2715
|
+
_available_formats = ['highqual', 'lowqual']
|
2716
|
+
_video_extensions = {
|
2717
|
+
'highqual': 'mp4',
|
2718
|
+
'lowqual': 'mp4',
|
2719
|
+
}
|
2720
|
+
IE_NAME = u'facebook'
|
2721
|
+
|
2722
|
+
def __init__(self, downloader=None):
|
2723
|
+
InfoExtractor.__init__(self, downloader)
|
2724
|
+
|
2725
|
+
def _reporter(self, message):
|
2726
|
+
"""Add header and report message."""
|
2727
|
+
self._downloader.to_screen(u'[facebook] %s' % message)
|
2728
|
+
|
2729
|
+
def report_login(self):
|
2730
|
+
"""Report attempt to log in."""
|
2731
|
+
self._reporter(u'Logging in')
|
2732
|
+
|
2733
|
+
def report_video_webpage_download(self, video_id):
|
2734
|
+
"""Report attempt to download video webpage."""
|
2735
|
+
self._reporter(u'%s: Downloading video webpage' % video_id)
|
2736
|
+
|
2737
|
+
def report_information_extraction(self, video_id):
|
2738
|
+
"""Report attempt to extract video information."""
|
2739
|
+
self._reporter(u'%s: Extracting video information' % video_id)
|
2740
|
+
|
2741
|
+
def _parse_page(self, video_webpage):
|
2742
|
+
"""Extract video information from page"""
|
2743
|
+
# General data
|
2744
|
+
data = {'title': r'class="video_title datawrap">(.*?)</',
|
2745
|
+
'description': r'<div class="datawrap">(.*?)</div>',
|
2746
|
+
'owner': r'\("video_owner_name", "(.*?)"\)',
|
2747
|
+
'upload_date': r'data-date="(.*?)"',
|
2748
|
+
'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
|
2749
|
+
}
|
2750
|
+
video_info = {}
|
2751
|
+
for piece in data.keys():
|
2752
|
+
mobj = re.search(data[piece], video_webpage)
|
2753
|
+
if mobj is not None:
|
2754
|
+
video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
|
2755
|
+
|
2756
|
+
# Video urls
|
2757
|
+
video_urls = {}
|
2758
|
+
for fmt in self._available_formats:
|
2759
|
+
mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
|
2760
|
+
if mobj is not None:
|
2761
|
+
# URL is in a Javascript segment inside an escaped Unicode format within
|
2762
|
+
# the generally utf-8 page
|
2763
|
+
video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
|
2764
|
+
video_info['video_urls'] = video_urls
|
2765
|
+
|
2766
|
+
return video_info
|
2767
|
+
|
2768
|
+
def _real_initialize(self):
|
2769
|
+
if self._downloader is None:
|
2770
|
+
return
|
2771
|
+
|
2772
|
+
useremail = None
|
2773
|
+
password = None
|
2774
|
+
downloader_params = self._downloader.params
|
2775
|
+
|
2776
|
+
# Attempt to use provided username and password or .netrc data
|
2777
|
+
if downloader_params.get('username', None) is not None:
|
2778
|
+
useremail = downloader_params['username']
|
2779
|
+
password = downloader_params['password']
|
2780
|
+
elif downloader_params.get('usenetrc', False):
|
2781
|
+
try:
|
2782
|
+
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
2783
|
+
if info is not None:
|
2784
|
+
useremail = info[0]
|
2785
|
+
password = info[2]
|
2786
|
+
else:
|
2787
|
+
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
2788
|
+
except (IOError, netrc.NetrcParseError), err:
|
2789
|
+
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
|
2790
|
+
return
|
2791
|
+
|
2792
|
+
if useremail is None:
|
2793
|
+
return
|
2794
|
+
|
2795
|
+
# Log in
|
2796
|
+
login_form = {
|
2797
|
+
'email': useremail,
|
2798
|
+
'pass': password,
|
2799
|
+
'login': 'Log+In'
|
2800
|
+
}
|
2801
|
+
request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
|
2802
|
+
try:
|
2803
|
+
self.report_login()
|
2804
|
+
login_results = urllib2.urlopen(request).read()
|
2805
|
+
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
2806
|
+
self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
2807
|
+
return
|
2808
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2809
|
+
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
|
2810
|
+
return
|
2811
|
+
|
2812
|
+
def _real_extract(self, url):
|
2813
|
+
mobj = re.match(self._VALID_URL, url)
|
2814
|
+
if mobj is None:
|
2815
|
+
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
2816
|
+
return
|
2817
|
+
video_id = mobj.group('ID')
|
2818
|
+
|
2819
|
+
# Get video webpage
|
2820
|
+
self.report_video_webpage_download(video_id)
|
2821
|
+
request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
|
2822
|
+
try:
|
2823
|
+
page = urllib2.urlopen(request)
|
2824
|
+
video_webpage = page.read()
|
2825
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2826
|
+
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
|
2827
|
+
return
|
2828
|
+
|
2829
|
+
# Start extracting information
|
2830
|
+
self.report_information_extraction(video_id)
|
2831
|
+
|
2832
|
+
# Extract information
|
2833
|
+
video_info = self._parse_page(video_webpage)
|
2834
|
+
|
2835
|
+
# uploader
|
2836
|
+
if 'owner' not in video_info:
|
2837
|
+
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
|
2838
|
+
return
|
2839
|
+
video_uploader = video_info['owner']
|
2840
|
+
|
2841
|
+
# title
|
2842
|
+
if 'title' not in video_info:
|
2843
|
+
self._downloader.trouble(u'ERROR: unable to extract video title')
|
2844
|
+
return
|
2845
|
+
video_title = video_info['title']
|
2846
|
+
video_title = video_title.decode('utf-8')
|
2847
|
+
video_title = sanitize_title(video_title)
|
2848
|
+
|
2849
|
+
# simplified title
|
2850
|
+
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
|
2851
|
+
simple_title = simple_title.strip(ur'_')
|
2852
|
+
|
2853
|
+
# thumbnail image
|
2854
|
+
if 'thumbnail' not in video_info:
|
2855
|
+
self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
|
2856
|
+
video_thumbnail = ''
|
2857
|
+
else:
|
2858
|
+
video_thumbnail = video_info['thumbnail']
|
2859
|
+
|
2860
|
+
# upload date
|
2861
|
+
upload_date = u'NA'
|
2862
|
+
if 'upload_date' in video_info:
|
2863
|
+
upload_time = video_info['upload_date']
|
2864
|
+
timetuple = email.utils.parsedate_tz(upload_time)
|
2865
|
+
if timetuple is not None:
|
2866
|
+
try:
|
2867
|
+
upload_date = time.strftime('%Y%m%d', timetuple[0:9])
|
2868
|
+
except:
|
2869
|
+
pass
|
2870
|
+
|
2871
|
+
# description
|
2872
|
+
video_description = video_info.get('description', 'No description available.')
|
2873
|
+
|
2874
|
+
url_map = video_info['video_urls']
|
2875
|
+
if len(url_map.keys()) > 0:
|
2876
|
+
# Decide which formats to download
|
2877
|
+
req_format = self._downloader.params.get('format', None)
|
2878
|
+
format_limit = self._downloader.params.get('format_limit', None)
|
2879
|
+
|
2880
|
+
if format_limit is not None and format_limit in self._available_formats:
|
2881
|
+
format_list = self._available_formats[self._available_formats.index(format_limit):]
|
2882
|
+
else:
|
2883
|
+
format_list = self._available_formats
|
2884
|
+
existing_formats = [x for x in format_list if x in url_map]
|
2885
|
+
if len(existing_formats) == 0:
|
2886
|
+
self._downloader.trouble(u'ERROR: no known formats available for video')
|
2887
|
+
return
|
2888
|
+
if req_format is None:
|
2889
|
+
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
|
2890
|
+
elif req_format == 'worst':
|
2891
|
+
video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
|
2892
|
+
elif req_format == '-1':
|
2893
|
+
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
2894
|
+
else:
|
2895
|
+
# Specific format
|
2896
|
+
if req_format not in url_map:
|
2897
|
+
self._downloader.trouble(u'ERROR: requested format not available')
|
2898
|
+
return
|
2899
|
+
video_url_list = [(req_format, url_map[req_format])] # Specific format
|
2900
|
+
|
2901
|
+
for format_param, video_real_url in video_url_list:
|
2902
|
+
|
2903
|
+
# At this point we have a new video
|
2904
|
+
self._downloader.increment_downloads()
|
2905
|
+
|
2906
|
+
# Extension
|
2907
|
+
video_extension = self._video_extensions.get(format_param, 'mp4')
|
2908
|
+
|
2909
|
+
try:
|
2910
|
+
# Process video information
|
2911
|
+
self._downloader.process_info({
|
2912
|
+
'id': video_id.decode('utf-8'),
|
2913
|
+
'url': video_real_url.decode('utf-8'),
|
2914
|
+
'uploader': video_uploader.decode('utf-8'),
|
2915
|
+
'upload_date': upload_date,
|
2916
|
+
'title': video_title,
|
2917
|
+
'stitle': simple_title,
|
2918
|
+
'ext': video_extension.decode('utf-8'),
|
2919
|
+
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
|
2920
|
+
'thumbnail': video_thumbnail.decode('utf-8'),
|
2921
|
+
'description': video_description.decode('utf-8'),
|
2922
|
+
'player_url': None,
|
2923
|
+
})
|
2924
|
+
except UnavailableVideoError, err:
|
2925
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
2926
|
+
|
2927
|
+
class BlipTVIE(InfoExtractor):
|
2928
|
+
"""Information extractor for blip.tv"""
|
2929
|
+
|
2930
|
+
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
|
2931
|
+
_URL_EXT = r'^.*\.([a-z0-9]+)$'
|
2932
|
+
IE_NAME = u'blip.tv'
|
2933
|
+
|
2934
|
+
def report_extraction(self, file_id):
|
2935
|
+
"""Report information extraction."""
|
2936
|
+
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
|
2937
|
+
|
2938
|
+
def report_direct_download(self, title):
|
2939
|
+
"""Report information extraction."""
|
2940
|
+
self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title))
|
2941
|
+
|
2942
|
+
def _simplify_title(self, title):
|
2943
|
+
res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
|
2944
|
+
res = res.strip(ur'_')
|
2945
|
+
return res
|
2946
|
+
|
2947
|
+
def _real_extract(self, url):
|
2948
|
+
mobj = re.match(self._VALID_URL, url)
|
2949
|
+
if mobj is None:
|
2950
|
+
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
2951
|
+
return
|
2952
|
+
|
2953
|
+
if '?' in url:
|
2954
|
+
cchar = '&'
|
2955
|
+
else:
|
2956
|
+
cchar = '?'
|
2957
|
+
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
2958
|
+
request = urllib2.Request(json_url)
|
2959
|
+
self.report_extraction(mobj.group(1))
|
2960
|
+
info = None
|
2961
|
+
try:
|
2962
|
+
urlh = urllib2.urlopen(request)
|
2963
|
+
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
2964
|
+
basename = url.split('/')[-1]
|
2965
|
+
title,ext = os.path.splitext(basename)
|
2966
|
+
ext = ext.replace('.', '')
|
2967
|
+
self.report_direct_download(title)
|
2968
|
+
info = {
|
2969
|
+
'id': title,
|
2970
|
+
'url': url,
|
2971
|
+
'title': title,
|
2972
|
+
'stitle': self._simplify_title(title),
|
2973
|
+
'ext': ext,
|
2974
|
+
'urlhandle': urlh
|
2975
|
+
}
|
2976
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2977
|
+
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
|
2978
|
+
return
|
2979
|
+
if info is None: # Regular URL
|
2980
|
+
try:
|
2981
|
+
json_code = urlh.read()
|
2982
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
2983
|
+
self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
|
2984
|
+
return
|
2985
|
+
|
2986
|
+
try:
|
2987
|
+
json_data = json.loads(json_code)
|
2988
|
+
if 'Post' in json_data:
|
2989
|
+
data = json_data['Post']
|
2990
|
+
else:
|
2991
|
+
data = json_data
|
2992
|
+
|
2993
|
+
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
2994
|
+
video_url = data['media']['url']
|
2995
|
+
umobj = re.match(self._URL_EXT, video_url)
|
2996
|
+
if umobj is None:
|
2997
|
+
raise ValueError('Can not determine filename extension')
|
2998
|
+
ext = umobj.group(1)
|
2999
|
+
|
3000
|
+
info = {
|
3001
|
+
'id': data['item_id'],
|
3002
|
+
'url': video_url,
|
3003
|
+
'uploader': data['display_name'],
|
3004
|
+
'upload_date': upload_date,
|
3005
|
+
'title': data['title'],
|
3006
|
+
'stitle': self._simplify_title(data['title']),
|
3007
|
+
'ext': ext,
|
3008
|
+
'format': data['media']['mimeType'],
|
3009
|
+
'thumbnail': data['thumbnailUrl'],
|
3010
|
+
'description': data['description'],
|
3011
|
+
'player_url': data['embedUrl']
|
3012
|
+
}
|
3013
|
+
except (ValueError,KeyError), err:
|
3014
|
+
self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
|
3015
|
+
return
|
3016
|
+
|
3017
|
+
self._downloader.increment_downloads()
|
3018
|
+
|
3019
|
+
try:
|
3020
|
+
self._downloader.process_info(info)
|
3021
|
+
except UnavailableVideoError, err:
|
3022
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
3023
|
+
|
3024
|
+
|
3025
|
+
class MyVideoIE(InfoExtractor):
|
3026
|
+
"""Information Extractor for myvideo.de."""
|
3027
|
+
|
3028
|
+
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
|
3029
|
+
IE_NAME = u'myvideo'
|
3030
|
+
|
3031
|
+
def __init__(self, downloader=None):
|
3032
|
+
InfoExtractor.__init__(self, downloader)
|
3033
|
+
|
3034
|
+
def report_download_webpage(self, video_id):
|
3035
|
+
"""Report webpage download."""
|
3036
|
+
self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
|
3037
|
+
|
3038
|
+
def report_extraction(self, video_id):
|
3039
|
+
"""Report information extraction."""
|
3040
|
+
self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
|
3041
|
+
|
3042
|
+
def _real_initialize(self):
|
3043
|
+
return
|
3044
|
+
|
3045
|
+
def _real_extract(self,url):
|
3046
|
+
mobj = re.match(self._VALID_URL, url)
|
3047
|
+
if mobj is None:
|
3048
|
+
self._download.trouble(u'ERROR: invalid URL: %s' % url)
|
3049
|
+
return
|
3050
|
+
|
3051
|
+
video_id = mobj.group(1)
|
3052
|
+
simple_title = mobj.group(2).decode('utf-8')
|
3053
|
+
# should actually not be necessary
|
3054
|
+
simple_title = sanitize_title(simple_title)
|
3055
|
+
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title)
|
3056
|
+
|
3057
|
+
# Get video webpage
|
3058
|
+
request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
|
3059
|
+
try:
|
3060
|
+
self.report_download_webpage(video_id)
|
3061
|
+
webpage = urllib2.urlopen(request).read()
|
3062
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3063
|
+
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
|
3064
|
+
return
|
3065
|
+
|
3066
|
+
self.report_extraction(video_id)
|
3067
|
+
mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
|
3068
|
+
webpage)
|
3069
|
+
if mobj is None:
|
3070
|
+
self._downloader.trouble(u'ERROR: unable to extract media URL')
|
3071
|
+
return
|
3072
|
+
video_url = mobj.group(1) + ('/%s.flv' % video_id)
|
3073
|
+
|
3074
|
+
mobj = re.search('<title>([^<]+)</title>', webpage)
|
3075
|
+
if mobj is None:
|
3076
|
+
self._downloader.trouble(u'ERROR: unable to extract title')
|
3077
|
+
return
|
3078
|
+
|
3079
|
+
video_title = mobj.group(1)
|
3080
|
+
video_title = sanitize_title(video_title)
|
3081
|
+
|
3082
|
+
try:
|
3083
|
+
self._downloader.process_info({
|
3084
|
+
'id': video_id,
|
3085
|
+
'url': video_url,
|
3086
|
+
'uploader': u'NA',
|
3087
|
+
'upload_date': u'NA',
|
3088
|
+
'title': video_title,
|
3089
|
+
'stitle': simple_title,
|
3090
|
+
'ext': u'flv',
|
3091
|
+
'format': u'NA',
|
3092
|
+
'player_url': None,
|
3093
|
+
})
|
3094
|
+
except UnavailableVideoError:
|
3095
|
+
self._downloader.trouble(u'\nERROR: Unable to download video')
|
3096
|
+
|
3097
|
+
class ComedyCentralIE(InfoExtractor):
|
3098
|
+
"""Information extractor for The Daily Show and Colbert Report """
|
3099
|
+
|
3100
|
+
_VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
|
3101
|
+
IE_NAME = u'comedycentral'
|
3102
|
+
|
3103
|
+
def report_extraction(self, episode_id):
|
3104
|
+
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
|
3105
|
+
|
3106
|
+
def report_config_download(self, episode_id):
|
3107
|
+
self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
|
3108
|
+
|
3109
|
+
def report_index_download(self, episode_id):
|
3110
|
+
self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
|
3111
|
+
|
3112
|
+
def report_player_url(self, episode_id):
|
3113
|
+
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
|
3114
|
+
|
3115
|
+
def _simplify_title(self, title):
|
3116
|
+
res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
|
3117
|
+
res = res.strip(ur'_')
|
3118
|
+
return res
|
3119
|
+
|
3120
|
+
def _real_extract(self, url):
|
3121
|
+
mobj = re.match(self._VALID_URL, url)
|
3122
|
+
if mobj is None:
|
3123
|
+
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
3124
|
+
return
|
3125
|
+
|
3126
|
+
if mobj.group('shortname'):
|
3127
|
+
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
3128
|
+
url = 'http://www.thedailyshow.com/full-episodes/'
|
3129
|
+
else:
|
3130
|
+
url = 'http://www.colbertnation.com/full-episodes/'
|
3131
|
+
mobj = re.match(self._VALID_URL, url)
|
3132
|
+
assert mobj is not None
|
3133
|
+
|
3134
|
+
dlNewest = not mobj.group('episode')
|
3135
|
+
if dlNewest:
|
3136
|
+
epTitle = mobj.group('showname')
|
3137
|
+
else:
|
3138
|
+
epTitle = mobj.group('episode')
|
3139
|
+
|
3140
|
+
req = urllib2.Request(url)
|
3141
|
+
self.report_extraction(epTitle)
|
3142
|
+
try:
|
3143
|
+
htmlHandle = urllib2.urlopen(req)
|
3144
|
+
html = htmlHandle.read()
|
3145
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3146
|
+
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
|
3147
|
+
return
|
3148
|
+
if dlNewest:
|
3149
|
+
url = htmlHandle.geturl()
|
3150
|
+
mobj = re.match(self._VALID_URL, url)
|
3151
|
+
if mobj is None:
|
3152
|
+
self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
|
3153
|
+
return
|
3154
|
+
if mobj.group('episode') == '':
|
3155
|
+
self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
|
3156
|
+
return
|
3157
|
+
epTitle = mobj.group('episode')
|
3158
|
+
|
3159
|
+
mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"/>', html)
|
3160
|
+
if len(mMovieParams) == 0:
|
3161
|
+
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
|
3162
|
+
return
|
3163
|
+
|
3164
|
+
playerUrl_raw = mMovieParams[0][0]
|
3165
|
+
self.report_player_url(epTitle)
|
3166
|
+
try:
|
3167
|
+
urlHandle = urllib2.urlopen(playerUrl_raw)
|
3168
|
+
playerUrl = urlHandle.geturl()
|
3169
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3170
|
+
self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
|
3171
|
+
return
|
3172
|
+
|
3173
|
+
uri = mMovieParams[0][1]
|
3174
|
+
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
|
3175
|
+
self.report_index_download(epTitle)
|
3176
|
+
try:
|
3177
|
+
indexXml = urllib2.urlopen(indexUrl).read()
|
3178
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3179
|
+
self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
|
3180
|
+
return
|
3181
|
+
|
3182
|
+
idoc = xml.etree.ElementTree.fromstring(indexXml)
|
3183
|
+
itemEls = idoc.findall('.//item')
|
3184
|
+
for itemEl in itemEls:
|
3185
|
+
mediaId = itemEl.findall('./guid')[0].text
|
3186
|
+
shortMediaId = mediaId.split(':')[-1]
|
3187
|
+
showId = mediaId.split(':')[-2].replace('.com', '')
|
3188
|
+
officialTitle = itemEl.findall('./title')[0].text
|
3189
|
+
officialDate = itemEl.findall('./pubDate')[0].text
|
3190
|
+
|
3191
|
+
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
3192
|
+
urllib.urlencode({'uri': mediaId}))
|
3193
|
+
configReq = urllib2.Request(configUrl)
|
3194
|
+
self.report_config_download(epTitle)
|
3195
|
+
try:
|
3196
|
+
configXml = urllib2.urlopen(configReq).read()
|
3197
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3198
|
+
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
|
3199
|
+
return
|
3200
|
+
|
3201
|
+
cdoc = xml.etree.ElementTree.fromstring(configXml)
|
3202
|
+
turls = []
|
3203
|
+
for rendition in cdoc.findall('.//rendition'):
|
3204
|
+
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
3205
|
+
turls.append(finfo)
|
3206
|
+
|
3207
|
+
if len(turls) == 0:
|
3208
|
+
self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
|
3209
|
+
continue
|
3210
|
+
|
3211
|
+
# For now, just pick the highest bitrate
|
3212
|
+
format,video_url = turls[-1]
|
3213
|
+
|
3214
|
+
self._downloader.increment_downloads()
|
3215
|
+
|
3216
|
+
effTitle = showId + '-' + epTitle
|
3217
|
+
info = {
|
3218
|
+
'id': shortMediaId,
|
3219
|
+
'url': video_url,
|
3220
|
+
'uploader': showId,
|
3221
|
+
'upload_date': officialDate,
|
3222
|
+
'title': effTitle,
|
3223
|
+
'stitle': self._simplify_title(effTitle),
|
3224
|
+
'ext': 'mp4',
|
3225
|
+
'format': format,
|
3226
|
+
'thumbnail': None,
|
3227
|
+
'description': officialTitle,
|
3228
|
+
'player_url': playerUrl
|
3229
|
+
}
|
3230
|
+
|
3231
|
+
try:
|
3232
|
+
self._downloader.process_info(info)
|
3233
|
+
except UnavailableVideoError, err:
|
3234
|
+
self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
|
3235
|
+
continue
|
3236
|
+
|
3237
|
+
|
3238
|
+
class EscapistIE(InfoExtractor):
|
3239
|
+
"""Information extractor for The Escapist """
|
3240
|
+
|
3241
|
+
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
3242
|
+
IE_NAME = u'escapist'
|
3243
|
+
|
3244
|
+
def report_extraction(self, showName):
|
3245
|
+
self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
|
3246
|
+
|
3247
|
+
def report_config_download(self, showName):
|
3248
|
+
self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
|
3249
|
+
|
3250
|
+
def _simplify_title(self, title):
|
3251
|
+
res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
|
3252
|
+
res = res.strip(ur'_')
|
3253
|
+
return res
|
3254
|
+
|
3255
|
+
def _real_extract(self, url):
|
3256
|
+
htmlParser = HTMLParser.HTMLParser()
|
3257
|
+
|
3258
|
+
mobj = re.match(self._VALID_URL, url)
|
3259
|
+
if mobj is None:
|
3260
|
+
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
3261
|
+
return
|
3262
|
+
showName = mobj.group('showname')
|
3263
|
+
videoId = mobj.group('episode')
|
3264
|
+
|
3265
|
+
self.report_extraction(showName)
|
3266
|
+
try:
|
3267
|
+
webPage = urllib2.urlopen(url).read()
|
3268
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3269
|
+
self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
|
3270
|
+
return
|
3271
|
+
|
3272
|
+
descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
|
3273
|
+
description = htmlParser.unescape(descMatch.group(1))
|
3274
|
+
imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
|
3275
|
+
imgUrl = htmlParser.unescape(imgMatch.group(1))
|
3276
|
+
playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
|
3277
|
+
playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
|
3278
|
+
configUrlMatch = re.search('config=(.*)$', playerUrl)
|
3279
|
+
configUrl = urllib2.unquote(configUrlMatch.group(1))
|
3280
|
+
|
3281
|
+
self.report_config_download(showName)
|
3282
|
+
try:
|
3283
|
+
configJSON = urllib2.urlopen(configUrl).read()
|
3284
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3285
|
+
self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
|
3286
|
+
return
|
3287
|
+
|
3288
|
+
# Technically, it's JavaScript, not JSON
|
3289
|
+
configJSON = configJSON.replace("'", '"')
|
3290
|
+
|
3291
|
+
try:
|
3292
|
+
config = json.loads(configJSON)
|
3293
|
+
except (ValueError,), err:
|
3294
|
+
self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
|
3295
|
+
return
|
3296
|
+
|
3297
|
+
playlist = config['playlist']
|
3298
|
+
videoUrl = playlist[1]['url']
|
3299
|
+
|
3300
|
+
self._downloader.increment_downloads()
|
3301
|
+
info = {
|
3302
|
+
'id': videoId,
|
3303
|
+
'url': videoUrl,
|
3304
|
+
'uploader': showName,
|
3305
|
+
'upload_date': None,
|
3306
|
+
'title': showName,
|
3307
|
+
'stitle': self._simplify_title(showName),
|
3308
|
+
'ext': 'flv',
|
3309
|
+
'format': 'flv',
|
3310
|
+
'thumbnail': imgUrl,
|
3311
|
+
'description': description,
|
3312
|
+
'player_url': playerUrl,
|
3313
|
+
}
|
3314
|
+
|
3315
|
+
try:
|
3316
|
+
self._downloader.process_info(info)
|
3317
|
+
except UnavailableVideoError, err:
|
3318
|
+
self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
|
3319
|
+
|
3320
|
+
|
3321
|
+
class CollegeHumorIE(InfoExtractor):
|
3322
|
+
"""Information extractor for collegehumor.com"""
|
3323
|
+
|
3324
|
+
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
|
3325
|
+
IE_NAME = u'collegehumor'
|
3326
|
+
|
3327
|
+
def report_webpage(self, video_id):
|
3328
|
+
"""Report information extraction."""
|
3329
|
+
self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
|
3330
|
+
|
3331
|
+
def report_extraction(self, video_id):
|
3332
|
+
"""Report information extraction."""
|
3333
|
+
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
|
3334
|
+
|
3335
|
+
def _simplify_title(self, title):
|
3336
|
+
res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
|
3337
|
+
res = res.strip(ur'_')
|
3338
|
+
return res
|
3339
|
+
|
3340
|
+
def _real_extract(self, url):
|
3341
|
+
htmlParser = HTMLParser.HTMLParser()
|
3342
|
+
|
3343
|
+
mobj = re.match(self._VALID_URL, url)
|
3344
|
+
if mobj is None:
|
3345
|
+
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
3346
|
+
return
|
3347
|
+
video_id = mobj.group('videoid')
|
3348
|
+
|
3349
|
+
self.report_webpage(video_id)
|
3350
|
+
request = urllib2.Request(url)
|
3351
|
+
try:
|
3352
|
+
webpage = urllib2.urlopen(request).read()
|
3353
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3354
|
+
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
|
3355
|
+
return
|
3356
|
+
|
3357
|
+
m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
|
3358
|
+
if m is None:
|
3359
|
+
self._downloader.trouble(u'ERROR: Cannot extract internal video ID')
|
3360
|
+
return
|
3361
|
+
internal_video_id = m.group('internalvideoid')
|
3362
|
+
|
3363
|
+
info = {
|
3364
|
+
'id': video_id,
|
3365
|
+
'internal_id': internal_video_id,
|
3366
|
+
}
|
3367
|
+
|
3368
|
+
self.report_extraction(video_id)
|
3369
|
+
xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
|
3370
|
+
try:
|
3371
|
+
metaXml = urllib2.urlopen(xmlUrl).read()
|
3372
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3373
|
+
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
|
3374
|
+
return
|
3375
|
+
|
3376
|
+
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
3377
|
+
try:
|
3378
|
+
videoNode = mdoc.findall('./video')[0]
|
3379
|
+
info['description'] = videoNode.findall('./description')[0].text
|
3380
|
+
info['title'] = videoNode.findall('./caption')[0].text
|
3381
|
+
info['stitle'] = self._simplify_title(info['title'])
|
3382
|
+
info['url'] = videoNode.findall('./file')[0].text
|
3383
|
+
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
3384
|
+
info['ext'] = info['url'].rpartition('.')[2]
|
3385
|
+
info['format'] = info['ext']
|
3386
|
+
except IndexError:
|
3387
|
+
self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
|
3388
|
+
return
|
3389
|
+
|
3390
|
+
self._downloader.increment_downloads()
|
3391
|
+
|
3392
|
+
try:
|
3393
|
+
self._downloader.process_info(info)
|
3394
|
+
except UnavailableVideoError, err:
|
3395
|
+
self._downloader.trouble(u'\nERROR: unable to download video')
|
3396
|
+
|
3397
|
+
|
3398
|
+
class XVideosIE(InfoExtractor):
|
3399
|
+
"""Information extractor for xvideos.com"""
|
3400
|
+
|
3401
|
+
_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
|
3402
|
+
IE_NAME = u'xvideos'
|
3403
|
+
|
3404
|
+
def report_webpage(self, video_id):
|
3405
|
+
"""Report information extraction."""
|
3406
|
+
self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
|
3407
|
+
|
3408
|
+
def report_extraction(self, video_id):
|
3409
|
+
"""Report information extraction."""
|
3410
|
+
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
|
3411
|
+
|
3412
|
+
def _simplify_title(self, title):
|
3413
|
+
res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title)
|
3414
|
+
res = res.strip(ur'_')
|
3415
|
+
return res
|
3416
|
+
|
3417
|
+
def _real_extract(self, url):
|
3418
|
+
htmlParser = HTMLParser.HTMLParser()
|
3419
|
+
|
3420
|
+
mobj = re.match(self._VALID_URL, url)
|
3421
|
+
if mobj is None:
|
3422
|
+
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
3423
|
+
return
|
3424
|
+
video_id = mobj.group(1).decode('utf-8')
|
3425
|
+
|
3426
|
+
self.report_webpage(video_id)
|
3427
|
+
|
3428
|
+
request = urllib2.Request(r'http://www.xvideos.com/video' + video_id)
|
3429
|
+
try:
|
3430
|
+
webpage = urllib2.urlopen(request).read()
|
3431
|
+
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
|
3432
|
+
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
|
3433
|
+
return
|
3434
|
+
|
3435
|
+
self.report_extraction(video_id)
|
3436
|
+
|
3437
|
+
|
3438
|
+
# Extract video URL
|
3439
|
+
mobj = re.search(r'flv_url=(.+?)&', webpage)
|
3440
|
+
if mobj is None:
|
3441
|
+
self._downloader.trouble(u'ERROR: unable to extract video url')
|
3442
|
+
return
|
3443
|
+
video_url = urllib2.unquote(mobj.group(1).decode('utf-8'))
|
3444
|
+
|
3445
|
+
|
3446
|
+
# Extract title
|
3447
|
+
mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
|
3448
|
+
if mobj is None:
|
3449
|
+
self._downloader.trouble(u'ERROR: unable to extract video title')
|
3450
|
+
return
|
3451
|
+
video_title = mobj.group(1).decode('utf-8')
|
3452
|
+
|
3453
|
+
|
3454
|
+
# Extract video thumbnail
|
3455
|
+
mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage)
|
3456
|
+
if mobj is None:
|
3457
|
+
self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
|
3458
|
+
return
|
3459
|
+
video_thumbnail = mobj.group(1).decode('utf-8')
|
3460
|
+
|
3461
|
+
|
3462
|
+
|
3463
|
+
self._downloader.increment_downloads()
|
3464
|
+
info = {
|
3465
|
+
'id': video_id,
|
3466
|
+
'url': video_url,
|
3467
|
+
'uploader': None,
|
3468
|
+
'upload_date': None,
|
3469
|
+
'title': video_title,
|
3470
|
+
'stitle': self._simplify_title(video_title),
|
3471
|
+
'ext': 'flv',
|
3472
|
+
'format': 'flv',
|
3473
|
+
'thumbnail': video_thumbnail,
|
3474
|
+
'description': None,
|
3475
|
+
'player_url': None,
|
3476
|
+
}
|
3477
|
+
|
3478
|
+
try:
|
3479
|
+
self._downloader.process_info(info)
|
3480
|
+
except UnavailableVideoError, err:
|
3481
|
+
self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
|
3482
|
+
|
3483
|
+
|
3484
|
+
class PostProcessor(object):
|
3485
|
+
"""Post Processor class.
|
3486
|
+
|
3487
|
+
PostProcessor objects can be added to downloaders with their
|
3488
|
+
add_post_processor() method. When the downloader has finished a
|
3489
|
+
successful download, it will take its internal chain of PostProcessors
|
3490
|
+
and start calling the run() method on each one of them, first with
|
3491
|
+
an initial argument and then with the returned value of the previous
|
3492
|
+
PostProcessor.
|
3493
|
+
|
3494
|
+
The chain will be stopped if one of them ever returns None or the end
|
3495
|
+
of the chain is reached.
|
3496
|
+
|
3497
|
+
PostProcessor objects follow a "mutual registration" process similar
|
3498
|
+
to InfoExtractor objects.
|
3499
|
+
"""
|
3500
|
+
|
3501
|
+
_downloader = None
|
3502
|
+
|
3503
|
+
def __init__(self, downloader=None):
|
3504
|
+
self._downloader = downloader
|
3505
|
+
|
3506
|
+
def set_downloader(self, downloader):
|
3507
|
+
"""Sets the downloader for this PP."""
|
3508
|
+
self._downloader = downloader
|
3509
|
+
|
3510
|
+
def run(self, information):
|
3511
|
+
"""Run the PostProcessor.
|
3512
|
+
|
3513
|
+
The "information" argument is a dictionary like the ones
|
3514
|
+
composed by InfoExtractors. The only difference is that this
|
3515
|
+
one has an extra field called "filepath" that points to the
|
3516
|
+
downloaded file.
|
3517
|
+
|
3518
|
+
When this method returns None, the postprocessing chain is
|
3519
|
+
stopped. However, this method may return an information
|
3520
|
+
dictionary that will be passed to the next postprocessing
|
3521
|
+
object in the chain. It can be the one it received after
|
3522
|
+
changing some fields.
|
3523
|
+
|
3524
|
+
In addition, this method may raise a PostProcessingError
|
3525
|
+
exception that will be taken into account by the downloader
|
3526
|
+
it was called from.
|
3527
|
+
"""
|
3528
|
+
return information # by default, do nothing
|
3529
|
+
|
3530
|
+
|
3531
|
+
class FFmpegExtractAudioPP(PostProcessor):
|
3532
|
+
|
3533
|
+
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
|
3534
|
+
PostProcessor.__init__(self, downloader)
|
3535
|
+
if preferredcodec is None:
|
3536
|
+
preferredcodec = 'best'
|
3537
|
+
self._preferredcodec = preferredcodec
|
3538
|
+
self._preferredquality = preferredquality
|
3539
|
+
self._keepvideo = keepvideo
|
3540
|
+
|
3541
|
+
@staticmethod
|
3542
|
+
def get_audio_codec(path):
|
3543
|
+
try:
|
3544
|
+
cmd = ['ffprobe', '-show_streams', '--', path]
|
3545
|
+
handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
|
3546
|
+
output = handle.communicate()[0]
|
3547
|
+
if handle.wait() != 0:
|
3548
|
+
return None
|
3549
|
+
except (IOError, OSError):
|
3550
|
+
return None
|
3551
|
+
audio_codec = None
|
3552
|
+
for line in output.split('\n'):
|
3553
|
+
if line.startswith('codec_name='):
|
3554
|
+
audio_codec = line.split('=')[1].strip()
|
3555
|
+
elif line.strip() == 'codec_type=audio' and audio_codec is not None:
|
3556
|
+
return audio_codec
|
3557
|
+
return None
|
3558
|
+
|
3559
|
+
@staticmethod
|
3560
|
+
def run_ffmpeg(path, out_path, codec, more_opts):
|
3561
|
+
try:
|
3562
|
+
cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
|
3563
|
+
ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
|
3564
|
+
return (ret == 0)
|
3565
|
+
except (IOError, OSError):
|
3566
|
+
return False
|
3567
|
+
|
3568
|
+
def run(self, information):
|
3569
|
+
path = information['filepath']
|
3570
|
+
|
3571
|
+
filecodec = self.get_audio_codec(path)
|
3572
|
+
if filecodec is None:
|
3573
|
+
self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
|
3574
|
+
return None
|
3575
|
+
|
3576
|
+
more_opts = []
|
3577
|
+
if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
|
3578
|
+
if filecodec in ['aac', 'mp3', 'vorbis']:
|
3579
|
+
# Lossless if possible
|
3580
|
+
acodec = 'copy'
|
3581
|
+
extension = filecodec
|
3582
|
+
if filecodec == 'aac':
|
3583
|
+
more_opts = ['-f', 'adts']
|
3584
|
+
if filecodec == 'vorbis':
|
3585
|
+
extension = 'ogg'
|
3586
|
+
else:
|
3587
|
+
# MP3 otherwise.
|
3588
|
+
acodec = 'libmp3lame'
|
3589
|
+
extension = 'mp3'
|
3590
|
+
more_opts = []
|
3591
|
+
if self._preferredquality is not None:
|
3592
|
+
more_opts += ['-ab', self._preferredquality]
|
3593
|
+
else:
|
3594
|
+
# We convert the audio (lossy)
|
3595
|
+
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self._preferredcodec]
|
3596
|
+
extension = self._preferredcodec
|
3597
|
+
more_opts = []
|
3598
|
+
if self._preferredquality is not None:
|
3599
|
+
more_opts += ['-ab', self._preferredquality]
|
3600
|
+
if self._preferredcodec == 'aac':
|
3601
|
+
more_opts += ['-f', 'adts']
|
3602
|
+
if self._preferredcodec == 'vorbis':
|
3603
|
+
extension = 'ogg'
|
3604
|
+
|
3605
|
+
(prefix, ext) = os.path.splitext(path)
|
3606
|
+
new_path = prefix + '.' + extension
|
3607
|
+
self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
|
3608
|
+
status = self.run_ffmpeg(path, new_path, acodec, more_opts)
|
3609
|
+
|
3610
|
+
if not status:
|
3611
|
+
self._downloader.to_stderr(u'WARNING: error running ffmpeg')
|
3612
|
+
return None
|
3613
|
+
|
3614
|
+
# Try to update the date time for extracted audio file.
|
3615
|
+
if information.get('filetime') is not None:
|
3616
|
+
try:
|
3617
|
+
os.utime(new_path, (time.time(), information['filetime']))
|
3618
|
+
except:
|
3619
|
+
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
|
3620
|
+
|
3621
|
+
if not self._keepvideo:
|
3622
|
+
try:
|
3623
|
+
os.remove(path)
|
3624
|
+
except (IOError, OSError):
|
3625
|
+
self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
|
3626
|
+
return None
|
3627
|
+
|
3628
|
+
information['filepath'] = new_path
|
3629
|
+
return information
|
3630
|
+
|
3631
|
+
|
3632
|
+
def updateSelf(downloader, filename):
|
3633
|
+
''' Update the program file with the latest version from the repository '''
|
3634
|
+
# Note: downloader only used for options
|
3635
|
+
if not os.access(filename, os.W_OK):
|
3636
|
+
sys.exit('ERROR: no write permissions on %s' % filename)
|
3637
|
+
|
3638
|
+
downloader.to_screen('Updating to latest version...')
|
3639
|
+
|
3640
|
+
try:
|
3641
|
+
try:
|
3642
|
+
urlh = urllib.urlopen(UPDATE_URL)
|
3643
|
+
newcontent = urlh.read()
|
3644
|
+
|
3645
|
+
vmatch = re.search("__version__ = '([^']+)'", newcontent)
|
3646
|
+
if vmatch is not None and vmatch.group(1) == __version__:
|
3647
|
+
downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')')
|
3648
|
+
return
|
3649
|
+
finally:
|
3650
|
+
urlh.close()
|
3651
|
+
except (IOError, OSError), err:
|
3652
|
+
sys.exit('ERROR: unable to download latest version')
|
3653
|
+
|
3654
|
+
try:
|
3655
|
+
outf = open(filename, 'wb')
|
3656
|
+
try:
|
3657
|
+
outf.write(newcontent)
|
3658
|
+
finally:
|
3659
|
+
outf.close()
|
3660
|
+
except (IOError, OSError), err:
|
3661
|
+
sys.exit('ERROR: unable to overwrite current version')
|
3662
|
+
|
3663
|
+
downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
|
3664
|
+
|
3665
|
+
def parseOpts():
|
3666
|
+
# Deferred imports
|
3667
|
+
import getpass
|
3668
|
+
import optparse
|
3669
|
+
|
3670
|
+
def _format_option_string(option):
|
3671
|
+
''' ('-o', '--option') -> -o, --format METAVAR'''
|
3672
|
+
|
3673
|
+
opts = []
|
3674
|
+
|
3675
|
+
if option._short_opts: opts.append(option._short_opts[0])
|
3676
|
+
if option._long_opts: opts.append(option._long_opts[0])
|
3677
|
+
if len(opts) > 1: opts.insert(1, ', ')
|
3678
|
+
|
3679
|
+
if option.takes_value(): opts.append(' %s' % option.metavar)
|
3680
|
+
|
3681
|
+
return "".join(opts)
|
3682
|
+
|
3683
|
+
def _find_term_columns():
|
3684
|
+
columns = os.environ.get('COLUMNS', None)
|
3685
|
+
if columns:
|
3686
|
+
return int(columns)
|
3687
|
+
|
3688
|
+
try:
|
3689
|
+
sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
3690
|
+
out,err = sp.communicate()
|
3691
|
+
return int(out.split()[1])
|
3692
|
+
except:
|
3693
|
+
pass
|
3694
|
+
return None
|
3695
|
+
|
3696
|
+
max_width = 80
|
3697
|
+
max_help_position = 80
|
3698
|
+
|
3699
|
+
# No need to wrap help messages if we're on a wide console
|
3700
|
+
columns = _find_term_columns()
|
3701
|
+
if columns: max_width = columns
|
3702
|
+
|
3703
|
+
fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
|
3704
|
+
fmt.format_option_strings = _format_option_string
|
3705
|
+
|
3706
|
+
kw = {
|
3707
|
+
'version' : __version__,
|
3708
|
+
'formatter' : fmt,
|
3709
|
+
'usage' : '%prog [options] url [url...]',
|
3710
|
+
'conflict_handler' : 'resolve',
|
3711
|
+
}
|
3712
|
+
|
3713
|
+
parser = optparse.OptionParser(**kw)
|
3714
|
+
|
3715
|
+
# option groups
|
3716
|
+
general = optparse.OptionGroup(parser, 'General Options')
|
3717
|
+
selection = optparse.OptionGroup(parser, 'Video Selection')
|
3718
|
+
authentication = optparse.OptionGroup(parser, 'Authentication Options')
|
3719
|
+
video_format = optparse.OptionGroup(parser, 'Video Format Options')
|
3720
|
+
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
3721
|
+
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
|
3722
|
+
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
3723
|
+
|
3724
|
+
general.add_option('-h', '--help',
|
3725
|
+
action='help', help='print this help text and exit')
|
3726
|
+
general.add_option('-v', '--version',
|
3727
|
+
action='version', help='print program version and exit')
|
3728
|
+
general.add_option('-U', '--update',
|
3729
|
+
action='store_true', dest='update_self', help='update this program to latest version')
|
3730
|
+
general.add_option('-i', '--ignore-errors',
|
3731
|
+
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
|
3732
|
+
general.add_option('-r', '--rate-limit',
|
3733
|
+
dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
|
3734
|
+
general.add_option('-R', '--retries',
|
3735
|
+
dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
|
3736
|
+
general.add_option('--dump-user-agent',
|
3737
|
+
action='store_true', dest='dump_user_agent',
|
3738
|
+
help='display the current browser identification', default=False)
|
3739
|
+
general.add_option('--list-extractors',
|
3740
|
+
action='store_true', dest='list_extractors',
|
3741
|
+
help='List all supported extractors and the URLs they would handle', default=False)
|
3742
|
+
|
3743
|
+
selection.add_option('--playlist-start',
|
3744
|
+
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
|
3745
|
+
selection.add_option('--playlist-end',
|
3746
|
+
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
3747
|
+
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
3748
|
+
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
3749
|
+
|
3750
|
+
authentication.add_option('-u', '--username',
|
3751
|
+
dest='username', metavar='USERNAME', help='account username')
|
3752
|
+
authentication.add_option('-p', '--password',
|
3753
|
+
dest='password', metavar='PASSWORD', help='account password')
|
3754
|
+
authentication.add_option('-n', '--netrc',
|
3755
|
+
action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
|
3756
|
+
|
3757
|
+
|
3758
|
+
video_format.add_option('-f', '--format',
|
3759
|
+
action='store', dest='format', metavar='FORMAT', help='video format code')
|
3760
|
+
video_format.add_option('--all-formats',
|
3761
|
+
action='store_const', dest='format', help='download all available video formats', const='all')
|
3762
|
+
video_format.add_option('--max-quality',
|
3763
|
+
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
|
3764
|
+
video_format.add_option('-F', '--list-formats',
|
3765
|
+
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
|
3766
|
+
|
3767
|
+
|
3768
|
+
verbosity.add_option('-q', '--quiet',
|
3769
|
+
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
3770
|
+
verbosity.add_option('-s', '--simulate',
|
3771
|
+
action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
|
3772
|
+
verbosity.add_option('--skip-download',
|
3773
|
+
action='store_true', dest='skip_download', help='do not download the video', default=False)
|
3774
|
+
verbosity.add_option('-g', '--get-url',
|
3775
|
+
action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
|
3776
|
+
verbosity.add_option('-e', '--get-title',
|
3777
|
+
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
|
3778
|
+
verbosity.add_option('--get-thumbnail',
|
3779
|
+
action='store_true', dest='getthumbnail',
|
3780
|
+
help='simulate, quiet but print thumbnail URL', default=False)
|
3781
|
+
verbosity.add_option('--get-description',
|
3782
|
+
action='store_true', dest='getdescription',
|
3783
|
+
help='simulate, quiet but print video description', default=False)
|
3784
|
+
verbosity.add_option('--get-filename',
|
3785
|
+
action='store_true', dest='getfilename',
|
3786
|
+
help='simulate, quiet but print output filename', default=False)
|
3787
|
+
verbosity.add_option('--get-format',
|
3788
|
+
action='store_true', dest='getformat',
|
3789
|
+
help='simulate, quiet but print output format', default=False)
|
3790
|
+
verbosity.add_option('--no-progress',
|
3791
|
+
action='store_true', dest='noprogress', help='do not print progress bar', default=False)
|
3792
|
+
verbosity.add_option('--console-title',
|
3793
|
+
action='store_true', dest='consoletitle',
|
3794
|
+
help='display progress in console titlebar', default=False)
|
3795
|
+
|
3796
|
+
|
3797
|
+
filesystem.add_option('-t', '--title',
|
3798
|
+
action='store_true', dest='usetitle', help='use title in file name', default=False)
|
3799
|
+
filesystem.add_option('-l', '--literal',
|
3800
|
+
action='store_true', dest='useliteral', help='use literal title in file name', default=False)
|
3801
|
+
filesystem.add_option('-A', '--auto-number',
|
3802
|
+
action='store_true', dest='autonumber',
|
3803
|
+
help='number downloaded files starting from 00000', default=False)
|
3804
|
+
filesystem.add_option('-o', '--output',
|
3805
|
+
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, and %% for a literal percent')
|
3806
|
+
filesystem.add_option('-a', '--batch-file',
|
3807
|
+
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
3808
|
+
filesystem.add_option('-w', '--no-overwrites',
|
3809
|
+
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
3810
|
+
filesystem.add_option('-c', '--continue',
|
3811
|
+
action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
|
3812
|
+
filesystem.add_option('--no-continue',
|
3813
|
+
action='store_false', dest='continue_dl',
|
3814
|
+
help='do not resume partially downloaded files (restart from beginning)')
|
3815
|
+
filesystem.add_option('--cookies',
|
3816
|
+
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
|
3817
|
+
filesystem.add_option('--no-part',
|
3818
|
+
action='store_true', dest='nopart', help='do not use .part files', default=False)
|
3819
|
+
filesystem.add_option('--no-mtime',
|
3820
|
+
action='store_false', dest='updatetime',
|
3821
|
+
help='do not use the Last-modified header to set the file modification time', default=True)
|
3822
|
+
filesystem.add_option('--write-description',
|
3823
|
+
action='store_true', dest='writedescription',
|
3824
|
+
help='write video description to a .description file', default=False)
|
3825
|
+
filesystem.add_option('--write-info-json',
|
3826
|
+
action='store_true', dest='writeinfojson',
|
3827
|
+
help='write video metadata to a .info.json file', default=False)
|
3828
|
+
|
3829
|
+
|
3830
|
+
postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
|
3831
|
+
help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
|
3832
|
+
postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
3833
|
+
help='"best", "aac", "vorbis" or "mp3"; best by default')
|
3834
|
+
postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
|
3835
|
+
help='ffmpeg audio bitrate specification, 128k by default')
|
3836
|
+
postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
|
3837
|
+
help='keeps the video file on disk after the post-processing; the video is erased by default')
|
3838
|
+
|
3839
|
+
|
3840
|
+
parser.add_option_group(general)
|
3841
|
+
parser.add_option_group(selection)
|
3842
|
+
parser.add_option_group(filesystem)
|
3843
|
+
parser.add_option_group(verbosity)
|
3844
|
+
parser.add_option_group(video_format)
|
3845
|
+
parser.add_option_group(authentication)
|
3846
|
+
parser.add_option_group(postproc)
|
3847
|
+
|
3848
|
+
opts, args = parser.parse_args()
|
3849
|
+
|
3850
|
+
return parser, opts, args
|
3851
|
+
|
3852
|
+
def gen_extractors():
|
3853
|
+
""" Return a list of an instance of every supported extractor.
|
3854
|
+
The order does matter; the first extractor matched is the one handling the URL.
|
3855
|
+
"""
|
3856
|
+
youtube_ie = YoutubeIE()
|
3857
|
+
google_ie = GoogleIE()
|
3858
|
+
yahoo_ie = YahooIE()
|
3859
|
+
return [
|
3860
|
+
YoutubePlaylistIE(youtube_ie),
|
3861
|
+
YoutubeUserIE(youtube_ie),
|
3862
|
+
YoutubeSearchIE(youtube_ie),
|
3863
|
+
youtube_ie,
|
3864
|
+
MetacafeIE(youtube_ie),
|
3865
|
+
DailymotionIE(),
|
3866
|
+
google_ie,
|
3867
|
+
GoogleSearchIE(google_ie),
|
3868
|
+
PhotobucketIE(),
|
3869
|
+
yahoo_ie,
|
3870
|
+
YahooSearchIE(yahoo_ie),
|
3871
|
+
DepositFilesIE(),
|
3872
|
+
FacebookIE(),
|
3873
|
+
BlipTVIE(),
|
3874
|
+
VimeoIE(),
|
3875
|
+
MyVideoIE(),
|
3876
|
+
ComedyCentralIE(),
|
3877
|
+
EscapistIE(),
|
3878
|
+
CollegeHumorIE(),
|
3879
|
+
XVideosIE(),
|
3880
|
+
|
3881
|
+
GenericIE()
|
3882
|
+
]
|
3883
|
+
|
3884
|
+
def main():
|
3885
|
+
parser, opts, args = parseOpts()
|
3886
|
+
|
3887
|
+
# Open appropriate CookieJar
|
3888
|
+
if opts.cookiefile is None:
|
3889
|
+
jar = cookielib.CookieJar()
|
3890
|
+
else:
|
3891
|
+
try:
|
3892
|
+
jar = cookielib.MozillaCookieJar(opts.cookiefile)
|
3893
|
+
if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
|
3894
|
+
jar.load()
|
3895
|
+
except (IOError, OSError), err:
|
3896
|
+
sys.exit(u'ERROR: unable to open cookie file')
|
3897
|
+
|
3898
|
+
# Dump user agent
|
3899
|
+
if opts.dump_user_agent:
|
3900
|
+
print std_headers['User-Agent']
|
3901
|
+
sys.exit(0)
|
3902
|
+
|
3903
|
+
# Batch file verification
|
3904
|
+
batchurls = []
|
3905
|
+
if opts.batchfile is not None:
|
3906
|
+
try:
|
3907
|
+
if opts.batchfile == '-':
|
3908
|
+
batchfd = sys.stdin
|
3909
|
+
else:
|
3910
|
+
batchfd = open(opts.batchfile, 'r')
|
3911
|
+
batchurls = batchfd.readlines()
|
3912
|
+
batchurls = [x.strip() for x in batchurls]
|
3913
|
+
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
|
3914
|
+
except IOError:
|
3915
|
+
sys.exit(u'ERROR: batch file could not be read')
|
3916
|
+
all_urls = batchurls + args
|
3917
|
+
|
3918
|
+
# General configuration
|
3919
|
+
cookie_processor = urllib2.HTTPCookieProcessor(jar)
|
3920
|
+
opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())
|
3921
|
+
urllib2.install_opener(opener)
|
3922
|
+
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
|
3923
|
+
|
3924
|
+
extractors = gen_extractors()
|
3925
|
+
|
3926
|
+
if opts.list_extractors:
|
3927
|
+
for ie in extractors:
|
3928
|
+
print(ie.IE_NAME)
|
3929
|
+
matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
|
3930
|
+
all_urls = filter(lambda url: url not in matchedUrls, all_urls)
|
3931
|
+
for mu in matchedUrls:
|
3932
|
+
print(u' ' + mu)
|
3933
|
+
sys.exit(0)
|
3934
|
+
|
3935
|
+
# Conflicting, missing and erroneous options
|
3936
|
+
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
3937
|
+
parser.error(u'using .netrc conflicts with giving username/password')
|
3938
|
+
if opts.password is not None and opts.username is None:
|
3939
|
+
parser.error(u'account username missing')
|
3940
|
+
if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
|
3941
|
+
parser.error(u'using output template conflicts with using title, literal title or auto number')
|
3942
|
+
if opts.usetitle and opts.useliteral:
|
3943
|
+
parser.error(u'using title conflicts with using literal title')
|
3944
|
+
if opts.username is not None and opts.password is None:
|
3945
|
+
opts.password = getpass.getpass(u'Type account password and press return:')
|
3946
|
+
if opts.ratelimit is not None:
|
3947
|
+
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
3948
|
+
if numeric_limit is None:
|
3949
|
+
parser.error(u'invalid rate limit specified')
|
3950
|
+
opts.ratelimit = numeric_limit
|
3951
|
+
if opts.retries is not None:
|
3952
|
+
try:
|
3953
|
+
opts.retries = long(opts.retries)
|
3954
|
+
except (TypeError, ValueError), err:
|
3955
|
+
parser.error(u'invalid retry count specified')
|
3956
|
+
try:
|
3957
|
+
opts.playliststart = int(opts.playliststart)
|
3958
|
+
if opts.playliststart <= 0:
|
3959
|
+
raise ValueError(u'Playlist start must be positive')
|
3960
|
+
except (TypeError, ValueError), err:
|
3961
|
+
parser.error(u'invalid playlist start number specified')
|
3962
|
+
try:
|
3963
|
+
opts.playlistend = int(opts.playlistend)
|
3964
|
+
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
3965
|
+
raise ValueError(u'Playlist end must be greater than playlist start')
|
3966
|
+
except (TypeError, ValueError), err:
|
3967
|
+
parser.error(u'invalid playlist end number specified')
|
3968
|
+
if opts.extractaudio:
|
3969
|
+
if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis']:
|
3970
|
+
parser.error(u'invalid audio format specified')
|
3971
|
+
|
3972
|
+
# File downloader
|
3973
|
+
fd = FileDownloader({
|
3974
|
+
'usenetrc': opts.usenetrc,
|
3975
|
+
'username': opts.username,
|
3976
|
+
'password': opts.password,
|
3977
|
+
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
3978
|
+
'forceurl': opts.geturl,
|
3979
|
+
'forcetitle': opts.gettitle,
|
3980
|
+
'forcethumbnail': opts.getthumbnail,
|
3981
|
+
'forcedescription': opts.getdescription,
|
3982
|
+
'forcefilename': opts.getfilename,
|
3983
|
+
'forceformat': opts.getformat,
|
3984
|
+
'simulate': opts.simulate,
|
3985
|
+
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
3986
|
+
'format': opts.format,
|
3987
|
+
'format_limit': opts.format_limit,
|
3988
|
+
'listformats': opts.listformats,
|
3989
|
+
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
|
3990
|
+
or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
|
3991
|
+
or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
|
3992
|
+
or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
|
3993
|
+
or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
|
3994
|
+
or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
|
3995
|
+
or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
|
3996
|
+
or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
|
3997
|
+
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
|
3998
|
+
or u'%(id)s.%(ext)s'),
|
3999
|
+
'ignoreerrors': opts.ignoreerrors,
|
4000
|
+
'ratelimit': opts.ratelimit,
|
4001
|
+
'nooverwrites': opts.nooverwrites,
|
4002
|
+
'retries': opts.retries,
|
4003
|
+
'continuedl': opts.continue_dl,
|
4004
|
+
'noprogress': opts.noprogress,
|
4005
|
+
'playliststart': opts.playliststart,
|
4006
|
+
'playlistend': opts.playlistend,
|
4007
|
+
'logtostderr': opts.outtmpl == '-',
|
4008
|
+
'consoletitle': opts.consoletitle,
|
4009
|
+
'nopart': opts.nopart,
|
4010
|
+
'updatetime': opts.updatetime,
|
4011
|
+
'writedescription': opts.writedescription,
|
4012
|
+
'writeinfojson': opts.writeinfojson,
|
4013
|
+
'matchtitle': opts.matchtitle,
|
4014
|
+
'rejecttitle': opts.rejecttitle,
|
4015
|
+
})
|
4016
|
+
for extractor in extractors:
|
4017
|
+
fd.add_info_extractor(extractor)
|
4018
|
+
|
4019
|
+
# PostProcessors
|
4020
|
+
if opts.extractaudio:
|
4021
|
+
fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
|
4022
|
+
|
4023
|
+
# Update version
|
4024
|
+
if opts.update_self:
|
4025
|
+
updateSelf(fd, sys.argv[0])
|
4026
|
+
|
4027
|
+
# Maybe do nothing
|
4028
|
+
if len(all_urls) < 1:
|
4029
|
+
if not opts.update_self:
|
4030
|
+
parser.error(u'you must provide at least one URL')
|
4031
|
+
else:
|
4032
|
+
sys.exit()
|
4033
|
+
retcode = fd.download(all_urls)
|
4034
|
+
|
4035
|
+
# Dump cookie jar if requested
|
4036
|
+
if opts.cookiefile is not None:
|
4037
|
+
try:
|
4038
|
+
jar.save()
|
4039
|
+
except (IOError, OSError), err:
|
4040
|
+
sys.exit(u'ERROR: unable to save cookie jar')
|
4041
|
+
|
4042
|
+
sys.exit(retcode)
|
4043
|
+
|
4044
|
+
|
4045
|
+
if __name__ == '__main__':
|
4046
|
+
try:
|
4047
|
+
main()
|
4048
|
+
except DownloadError:
|
4049
|
+
sys.exit(1)
|
4050
|
+
except SameFileError:
|
4051
|
+
sys.exit(u'ERROR: fixed output name but more than one file to download')
|
4052
|
+
except KeyboardInterrupt:
|
4053
|
+
sys.exit(u'\nERROR: Interrupted by user')
|
4054
|
+
|
4055
|
+
# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python:
|