hlspider 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +0 -2
- data/.rbenv-version +1 -0
- data/Gemfile +4 -4
- data/Gemfile.lock +28 -0
- data/README.md +42 -20
- data/Rakefile +10 -2
- data/bin/hlspider +43 -1
- data/hlspider.gemspec +12 -11
- data/lib/hlspider/async_download.rb +48 -0
- data/lib/hlspider/playlist.rb +121 -21
- data/lib/hlspider/playlist_line.rb +95 -0
- data/lib/hlspider/spider.rb +108 -69
- data/lib/hlspider/version.rb +2 -2
- data/lib/hlspider.rb +9 -19
- data/spec/hlspider/playlist_line_spec.rb +58 -0
- data/spec/hlspider/spider_spec.rb +16 -0
- data/spec/hlspider_spec.rb +12 -0
- data/spec/spec_helper.rb +4 -0
- metadata +97 -64
- data/lib/hlspider/line.rb +0 -27
data/.gitignore
CHANGED
data/.rbenv-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.8.7-p352
|
data/Gemfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in hlspider.gemspec
|
4
|
+
gemspec
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
hlspider (0.2.0)
|
5
|
+
em-http-request
|
6
|
+
eventmachine
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: http://rubygems.org/
|
10
|
+
specs:
|
11
|
+
addressable (2.2.6)
|
12
|
+
em-http-request (1.0.0)
|
13
|
+
addressable (>= 2.2.3)
|
14
|
+
em-socksify
|
15
|
+
eventmachine (>= 1.0.0.beta.3)
|
16
|
+
http_parser.rb (>= 0.5.2)
|
17
|
+
em-socksify (0.1.0)
|
18
|
+
eventmachine
|
19
|
+
eventmachine (1.0.0.beta.4)
|
20
|
+
http_parser.rb (0.5.3)
|
21
|
+
minitest (2.7.0)
|
22
|
+
|
23
|
+
PLATFORMS
|
24
|
+
ruby
|
25
|
+
|
26
|
+
DEPENDENCIES
|
27
|
+
hlspider!
|
28
|
+
minitest
|
data/README.md
CHANGED
@@ -1,26 +1,48 @@
|
|
1
|
-
HLSpider
|
2
|
-
|
3
|
-
ASYNC .m3u8 downloader. Downloads .m3u8 playlist files and confirms their segments are properly aligned.
|
1
|
+
# HLSpider - the HTTP Live Streaming Spider
|
2
|
+
Asynchronously downloads .m3u8 playlists and reports back on whether or not the playlists are aligned in time.
|
4
3
|
|
5
|
-
|
6
|
-
-----
|
7
|
-
hlspider crawl --playlists=http://site.tld/playlist1.m3u8 http://site.tld/playlist2.m3u8 http://site.tld/playlist3.m3u8
|
4
|
+
## Purpose
|
8
5
|
|
9
|
-
|
10
|
-
hlspider
|
6
|
+
Apple's HTTP Live Streaming (HLS) is used to deliver content with varying bit rate streams so a 3G connected cellphone can watch a video without buffering while a laptop can watch that same content in full 1080p. HLS uses .m3u8 playlist files (each bit rate having its own) which contain links to download the next video segment.It is very important that these different playlists are all at the same point in time so switching between bit rates is a seamless experience.
|
11
7
|
|
12
|
-
|
13
|
-
hlspider crawl --playlists=one two three # Crawl the specified playlists and make sure their segments align
|
14
|
-
hlspider help [TASK] # Describe available tasks or one specific task
|
15
|
-
|
16
|
-
<br />
|
8
|
+
Point HLSpider at multiple playlists and it will report back on whether or not these playlist contain the same number segment at the end of their playlist.
|
17
9
|
|
18
|
-
|
10
|
+
## Usage
|
19
11
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
12
|
+
```
|
13
|
+
# Point the spider at multiple playlists
|
14
|
+
playlists = ["http://host.com/video1/playlist1.m3u8", "http://host.com/video1/playlist2.m3u8", "http://host.com/video1/playlist3.m3u8"]
|
15
|
+
spider = HLSpider.new(playlists)
|
16
|
+
```
|
25
17
|
|
26
|
-
|
18
|
+
OR
|
19
|
+
|
20
|
+
```
|
21
|
+
# The parent multi bit rate playlist
|
22
|
+
parent_url = "http://host.com/video1/all_bitrates_playlist.m3u8"
|
23
|
+
spider = HLSpider.new(parent_url)
|
24
|
+
```
|
25
|
+
|
26
|
+
```
|
27
|
+
spider.aligned?
|
28
|
+
spider.invalid_playlists
|
29
|
+
|
30
|
+
playlist = spider.playlists[0]
|
31
|
+
playlist.valid?
|
32
|
+
playlist.segments
|
33
|
+
playlist.url
|
34
|
+
playlist.file
|
35
|
+
playlist.target_duration
|
36
|
+
```
|
37
|
+
|
38
|
+
HLSpider is also available from the command line
|
39
|
+
|
40
|
+
```
|
41
|
+
hlspider crawl --playlists=http://host.com/video1/playlist1.m3u8,http://host.com/video1/playlist2.m3u8 http://host.com/video1/playlist3.m3u8
|
42
|
+
```
|
43
|
+
|
44
|
+
OR
|
45
|
+
|
46
|
+
```
|
47
|
+
hlspider crawl --playlists=http://host.com/video1/all_bitrates_playlist.m3u8
|
48
|
+
```
|
data/Rakefile
CHANGED
data/bin/hlspider
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
2
3
|
|
3
4
|
begin
|
4
5
|
require 'hlspider'
|
@@ -7,4 +8,45 @@ rescue LoadError
|
|
7
8
|
require 'hlspider'
|
8
9
|
end
|
9
10
|
|
10
|
-
|
11
|
+
options = {}
|
12
|
+
|
13
|
+
opts_parser = OptionParser.new do |opts|
|
14
|
+
opts.banner = 'Downloads m3u8 playlists and confirms their segments are aligned.'
|
15
|
+
opts.banner += ''
|
16
|
+
|
17
|
+
opts.on('-p', '--playlists PLAYLISTS', Array, 'URL(s) to playlist(s)') do |playlists|
|
18
|
+
options[:playlists] = playlists
|
19
|
+
end
|
20
|
+
|
21
|
+
options[:loop] = 1
|
22
|
+
opts.on('-l', '--loop TIMES', Integer) do |l|
|
23
|
+
options[:loop] = l || 5
|
24
|
+
end
|
25
|
+
|
26
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
27
|
+
puts opts
|
28
|
+
exit
|
29
|
+
end
|
30
|
+
end
|
31
|
+
opts_parser.parse!
|
32
|
+
|
33
|
+
spider = HLSpider::Spider.new(options[:playlists])
|
34
|
+
|
35
|
+
x = 1
|
36
|
+
while x <= options[:loop] do
|
37
|
+
if spider.crawl
|
38
|
+
if spider.aligned?
|
39
|
+
puts "--- Aligned at segment : #{spider.last_segments[0]} ---"
|
40
|
+
else
|
41
|
+
puts "--- Unaligned with segments : #{spider.last_segments.join(', ')} ---"
|
42
|
+
end
|
43
|
+
else
|
44
|
+
@errors.each do |err|
|
45
|
+
puts "--- #{err} ---"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
break if @errors
|
50
|
+
|
51
|
+
x += 1
|
52
|
+
end
|
data/hlspider.gemspec
CHANGED
@@ -4,13 +4,13 @@ require "hlspider/version"
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = "hlspider"
|
7
|
-
s.version =
|
8
|
-
s.
|
9
|
-
s.
|
10
|
-
s.
|
11
|
-
s.
|
12
|
-
s.
|
13
|
-
|
7
|
+
s.version = HLSpider::VERSION
|
8
|
+
s.authors = ["brookemckim"]
|
9
|
+
s.email = ["brooke.mckim@gmail.com"]
|
10
|
+
s.homepage = "http://www.github.com/brookemckim/hlspider"
|
11
|
+
s.summary = %q{Asynchronously download and parse .m3u8 playlists.}
|
12
|
+
s.description = %q{Asynchronously downloads .m3u8 playlists and reports back on whether or not the playlists are aligned in time.}
|
13
|
+
|
14
14
|
|
15
15
|
s.rubyforge_project = "hlspider"
|
16
16
|
|
@@ -18,9 +18,10 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
20
|
s.require_paths = ["lib"]
|
21
|
+
|
22
|
+
# specify any dependencies here
|
23
|
+
s.add_runtime_dependency 'eventmachine', '~> 1.0.0.beta.4'
|
24
|
+
s.add_runtime_dependency 'em-http-request', '~> 1.0.0'
|
21
25
|
|
22
|
-
s.
|
23
|
-
s.add_dependency('em-synchrony', '>= 0.3.0.beta.1')
|
24
|
-
s.add_dependency('eventmachine', '>= 1.0.0.beta.3')
|
25
|
-
s.add_dependency('em-http-request', '>= 1.0.0.beta.3')
|
26
|
+
s.add_development_dependency 'minitest', '~> 2.7.0'
|
26
27
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Internal: Asynchronsoly downloads urls and returns Array of responses.
|
2
|
+
require 'eventmachine'
|
3
|
+
require 'em-http-request'
|
4
|
+
|
5
|
+
module HLSpider
|
6
|
+
module AsyncDownload
|
7
|
+
# Internal: Asynchronosly download given URLs.
|
8
|
+
#
|
9
|
+
# urls - An Array of strings or a single string of URL(s)
|
10
|
+
#
|
11
|
+
# Examples
|
12
|
+
#
|
13
|
+
# async_download(["http://www.google.com", "http://www.yahoo.com"])
|
14
|
+
# # =>
|
15
|
+
#
|
16
|
+
# async_download("http://www.bing.com")
|
17
|
+
# # =>
|
18
|
+
#
|
19
|
+
# Returns the Array of responses.
|
20
|
+
# Raises error if there is a request problem.
|
21
|
+
def async_download(urls)
|
22
|
+
urls = Array.new(urls)
|
23
|
+
|
24
|
+
responses = nil
|
25
|
+
EventMachine.run {
|
26
|
+
multi = EventMachine::MultiRequest.new
|
27
|
+
|
28
|
+
urls.each_with_index do |url, idx|
|
29
|
+
http = EventMachine::HttpRequest.new(url, :connect_timeout => 10)
|
30
|
+
req = http.get
|
31
|
+
multi.add idx, req
|
32
|
+
end
|
33
|
+
|
34
|
+
multi.callback do
|
35
|
+
responses = multi.responses
|
36
|
+
EventMachine.stop
|
37
|
+
end
|
38
|
+
}
|
39
|
+
|
40
|
+
if responses[:callback].size == urls.size
|
41
|
+
responses[:callback].collect { |k,v| v }
|
42
|
+
else
|
43
|
+
puts "Connection Error"
|
44
|
+
#raise ConnectError
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/hlspider/playlist.rb
CHANGED
@@ -1,62 +1,162 @@
|
|
1
|
+
# Internal: Parses out and exposes the parts of M3U8 playlist files.
|
2
|
+
#
|
3
|
+
# M3U8 References:
|
4
|
+
# http://developer.apple.com/library/ios/#documentation/networkinginternet/conceptual/streamingmediaguide/HTTPStreamingArchitecture/HTTPStreamingArchitecture.html
|
5
|
+
#
|
6
|
+
# Examples
|
7
|
+
#
|
8
|
+
# p = Playlist.new(File.read("/path/to/playlist.m3u8"), "http://url.tld/where/playlist/was/downloaded/from")
|
9
|
+
# # =>
|
10
|
+
# <HLSpider::Playlist:0x10c801a80
|
11
|
+
# @variable_playlist=false,
|
12
|
+
# @segments=["media_88868.ts", "media_88869.ts"],
|
13
|
+
# @valid=true,
|
14
|
+
# @file="#EXTM3U\n#EXT-X-ALLOW-CACHE:NO\n#EXT-X-TARGETDURATION:10\n#EXT-X-MEDIA-SEQUENCE:88868\n#EXTINF:10,
|
15
|
+
# \nmedia_88868.ts\n#EXTINF:10,\nmedia_88869.ts",
|
16
|
+
# @target_duration="10",
|
17
|
+
# @playlists=[],
|
18
|
+
# @source="http://url.tld/where/playlist/was/downloaded/from",
|
19
|
+
# @segment_playlist=true
|
20
|
+
# >
|
21
|
+
|
1
22
|
module HLSpider
|
2
23
|
class Playlist
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
24
|
+
# Public: Gets/Sets the raw M3U8 Playlist File.
|
25
|
+
attr_accessor :file
|
26
|
+
|
27
|
+
# Public: Gets/Sets Optional source of playlist file. Used only for reference.
|
28
|
+
attr_accessor :source
|
29
|
+
|
30
|
+
# Public: Gets sub-playlists if the playlist has child playlists (Variable Rate Playlist).
|
31
|
+
attr_reader :playlists
|
32
|
+
|
33
|
+
# Public: Gets segments contained in the playlist.
|
34
|
+
attr_reader :segments
|
35
|
+
|
36
|
+
# Public: Gets the target duration if available.
|
37
|
+
attr_reader :target_duration
|
38
|
+
|
39
|
+
# Internal: Initialize a Playlist.
|
40
|
+
#
|
41
|
+
# file - A String containing an .m3u8 playlist file.
|
42
|
+
# source - A String source of where the playlist was downloaded from. (optional)
|
43
|
+
def initialize(file, source = nil)
|
7
44
|
@file = file
|
8
|
-
@source = source
|
9
|
-
|
10
|
-
|
45
|
+
@source = source
|
46
|
+
@valid = false
|
47
|
+
|
11
48
|
@variable_playlist = false
|
12
49
|
@segment_playlist = false
|
13
50
|
|
14
51
|
@playlists = []
|
15
52
|
@segments = []
|
16
53
|
|
17
|
-
parse
|
54
|
+
parse(@file)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Internal: Set the m3u8 file.
|
58
|
+
#
|
59
|
+
# file - The String of the m3u8 file.
|
60
|
+
#
|
61
|
+
# Examples
|
62
|
+
#
|
63
|
+
# file( File.read('/path/to/playlist.m3u8') )
|
64
|
+
# # => '#EXTM3U\n#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=713245\n
|
65
|
+
# http://hls.telvue.com/brightstar/2-1/playlist.m3u8?wowzasessionid=268983957'
|
66
|
+
#
|
67
|
+
# Returns the file String.
|
68
|
+
def file=(file)
|
69
|
+
@file = file
|
70
|
+
parse(@file)
|
18
71
|
end
|
19
72
|
|
73
|
+
# Public: Check whether the playlist is a variable playlist or not.
|
74
|
+
#
|
75
|
+
#
|
76
|
+
# Examples
|
77
|
+
#
|
78
|
+
# variable_playlist?
|
79
|
+
# # => true
|
80
|
+
#
|
81
|
+
# Returns Boolean variable_playlist.
|
20
82
|
def variable_playlist?
|
21
83
|
@variable_playlist
|
22
84
|
end
|
23
85
|
|
86
|
+
# Public: Check whether the playlist is a segment playlist or not.
|
87
|
+
#
|
88
|
+
#
|
89
|
+
# Examples
|
90
|
+
#
|
91
|
+
# segment_playlist?
|
92
|
+
# # => false
|
93
|
+
#
|
94
|
+
# Returns Boolean segment_playlist.
|
24
95
|
def segment_playlist?
|
25
96
|
@segment_playlist
|
26
97
|
end
|
27
|
-
|
98
|
+
|
99
|
+
# Public: Check whether the playlist is valid (either a segment or variable playlist).
|
100
|
+
#
|
101
|
+
#
|
102
|
+
# Examples
|
103
|
+
#
|
104
|
+
# valid?
|
105
|
+
# # => true
|
106
|
+
#
|
107
|
+
# Returns Boolean valid.
|
28
108
|
def valid?
|
29
109
|
@valid
|
30
110
|
end
|
31
111
|
|
112
|
+
# Public: Prints contents of @file.
|
113
|
+
#
|
114
|
+
#
|
115
|
+
# Examples
|
116
|
+
#
|
117
|
+
# to_s
|
118
|
+
# #=> '#EXTM3U\n#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=713245\n
|
119
|
+
# http://hls.telvue.com/brightstar/2-1/playlist.m3u8?wowzasessionid=268983957'
|
120
|
+
#
|
121
|
+
# Returns String file.
|
32
122
|
def to_s
|
33
123
|
@file
|
34
124
|
end
|
125
|
+
alias_method :to_s, :inspect
|
35
126
|
|
36
127
|
private
|
37
|
-
|
128
|
+
include PlaylistLine
|
129
|
+
|
130
|
+
# Internal: Parses @file and sets @variable_playlist, @segment_playlist, and @valid.
|
131
|
+
#
|
132
|
+
#
|
133
|
+
# Examples
|
134
|
+
#
|
135
|
+
# parse(playlist_file)
|
136
|
+
#
|
137
|
+
# Returns nothing.
|
138
|
+
def parse(file)
|
38
139
|
@valid = true if /#EXTM3U/.match(@file)
|
140
|
+
|
39
141
|
if has_playlist?(@file) && !has_segment?(@file)
|
40
142
|
@variable_playlist = true
|
143
|
+
|
41
144
|
@file.each_line do |line|
|
42
145
|
@playlists << line.strip if has_playlist?(line)
|
43
146
|
end
|
44
147
|
elsif has_segment?(@file) && !has_playlist?(@file)
|
45
148
|
@segment_playlist = true
|
46
|
-
|
47
|
-
|
48
|
-
|
149
|
+
|
150
|
+
@file.each_line do |line|
|
151
|
+
if has_segment?(line)
|
152
|
+
@segments << filename(line.strip)
|
153
|
+
elsif duration_line?(line)
|
154
|
+
@target_duration = parse_duration(line.strip)
|
155
|
+
end
|
156
|
+
end
|
49
157
|
else
|
50
158
|
@valid = false
|
51
159
|
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def has_segment?(str)
|
55
|
-
true if /#EXT-X-MEDIA-SEQUENCE/.match(str)
|
56
|
-
end
|
57
|
-
|
58
|
-
def has_playlist?(str)
|
59
|
-
true if /.m3u8/.match(str)
|
60
160
|
end
|
61
161
|
end
|
62
162
|
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# Internal: A set of methods for examining individual lines of m3u8 playlists.
|
2
|
+
module HLSpider
|
3
|
+
module PlaylistLine
|
4
|
+
# Internal: Checks if String str contains a .ts file extension
|
5
|
+
#
|
6
|
+
# str - String to be checked
|
7
|
+
#
|
8
|
+
# Examples
|
9
|
+
#
|
10
|
+
# has_segment?("video_01.ts")
|
11
|
+
# #=> true
|
12
|
+
#
|
13
|
+
# has_segment?("arandomstring")
|
14
|
+
# #=> false
|
15
|
+
#
|
16
|
+
# Returns Boolean.
|
17
|
+
def has_segment?(str)
|
18
|
+
!!/.*.ts(\z|\?)/.match(str)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Internal: Checks if String str contains links to .m3u8 file extensions.
|
22
|
+
#
|
23
|
+
# str - String to be checked
|
24
|
+
#
|
25
|
+
# Examples
|
26
|
+
#
|
27
|
+
# has_playlist?("playlist.m3u8")
|
28
|
+
# #=> true
|
29
|
+
#
|
30
|
+
# has_playlist?("arandomstring")
|
31
|
+
# #=> false
|
32
|
+
#
|
33
|
+
# Returns Boolean.
|
34
|
+
def has_playlist?(str)
|
35
|
+
!!/.m3u8/.match(str)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Internal: Checks if String str contains 'EXT-X-TARGETDURATION'.
|
39
|
+
#
|
40
|
+
# str - String to be checked
|
41
|
+
#
|
42
|
+
# Examples
|
43
|
+
#
|
44
|
+
# duration_line?("EXT-X-TARGETDURATION:10")
|
45
|
+
# #=> true
|
46
|
+
#
|
47
|
+
# duration_line?("arandomstring")
|
48
|
+
# #=> false
|
49
|
+
#
|
50
|
+
# Returns Boolean.
|
51
|
+
def duration_line?(str)
|
52
|
+
!!/EXT-X-TARGETDURATION/.match(str)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Internal: Parses Integer target duration out of String str
|
56
|
+
#
|
57
|
+
# str - String to be parsed
|
58
|
+
#
|
59
|
+
# Examples
|
60
|
+
#
|
61
|
+
# parse_duration("EXT-X-TARGETDURATION:10")
|
62
|
+
# #=> 10
|
63
|
+
#
|
64
|
+
# parse_duration("arandomstring")
|
65
|
+
# #=> nil
|
66
|
+
#
|
67
|
+
# Returns Integer or nil.
|
68
|
+
def parse_duration(str)
|
69
|
+
/EXT-X-TARGETDURATION:(\d*)\z/.match(str)
|
70
|
+
|
71
|
+
if dur = Regexp.last_match(1)
|
72
|
+
dur.to_i
|
73
|
+
else
|
74
|
+
nil
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Internal: Parses String video segment filename out of String str.
|
79
|
+
#
|
80
|
+
# str - String to be parsed
|
81
|
+
#
|
82
|
+
# Examples
|
83
|
+
#
|
84
|
+
# filename("/media/video_01.ts?query_string=22")
|
85
|
+
# #=> 'video_01.ts'
|
86
|
+
#
|
87
|
+
# filename("arandomsring")
|
88
|
+
# #=> nil
|
89
|
+
#
|
90
|
+
# Returns String or nil.
|
91
|
+
def filename(str)
|
92
|
+
str.slice(/\w{1,}(.ts)/)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
data/lib/hlspider/spider.rb
CHANGED
@@ -1,93 +1,132 @@
|
|
1
|
+
# Public: Asynchronsoly downloads .m3u8 playlist files from specified URLs.
|
2
|
+
#
|
3
|
+
#
|
4
|
+
# Examples
|
5
|
+
#
|
6
|
+
# Spider.new(["http://host.tld/video1/playlist_1.m3u8", "http://host.tld/video1/playlist_2.m3u8"])
|
7
|
+
# # => #<HLSpider::Spider:0x10cab12d0>
|
8
|
+
#
|
9
|
+
# Spider.new("http://host.tld/video1/parent_playlist.m3u8")
|
10
|
+
# # => #<HLSpider::Spider:0x10cab12d0>
|
11
|
+
|
1
12
|
require 'rubygems'
|
2
|
-
require '
|
3
|
-
require 'em-
|
4
|
-
require 'logger'
|
5
|
-
require_relative 'playlist'
|
13
|
+
require 'eventmachine'
|
14
|
+
require 'em-http-request'
|
6
15
|
|
7
16
|
module HLSpider
|
8
|
-
class Spider
|
9
|
-
|
10
|
-
|
11
|
-
|
17
|
+
class Spider
|
18
|
+
# Public: Gets Array of urls.
|
19
|
+
attr_reader :urls
|
20
|
+
# Public: Gets Array of valid playlists.
|
21
|
+
attr_reader :playlists
|
22
|
+
# Public: Gets Array of invalid playlists.
|
23
|
+
attr_reader :invalid_playlists
|
24
|
+
# Public: Gets Array of errors.
|
25
|
+
attr_reader :errors
|
26
|
+
|
27
|
+
# Public: Initialize a Playlist Spider.
|
28
|
+
#
|
29
|
+
# urls - An Array containing multiple String urls to playlist files.
|
30
|
+
# Also accepts single String url that points to parent playlist.
|
31
|
+
def initialize(urls)
|
32
|
+
@urls = [urls].flatten
|
33
|
+
|
34
|
+
@invalid_playlists = []
|
35
|
+
@errors = []
|
12
36
|
end
|
13
|
-
|
37
|
+
|
38
|
+
# Public: Starts the download of Array urls
|
39
|
+
#
|
40
|
+
#
|
41
|
+
# Examples
|
42
|
+
#
|
43
|
+
# crawl
|
44
|
+
# # => true
|
45
|
+
#
|
46
|
+
# Returns Boolean.
|
14
47
|
def crawl
|
15
|
-
playlists = dive(@
|
16
|
-
|
17
|
-
segments = []
|
18
|
-
playlists.each { |p| segments << p.segments.first }
|
48
|
+
@playlists = dive(@urls)
|
19
49
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
log "**********"
|
27
|
-
log segments.inspect
|
28
|
-
log "**********"
|
29
|
-
elsif segments.size == 1
|
30
|
-
p "All Good. at #{segments[0]}"
|
31
|
-
log "^^^^^^^^^^^"
|
32
|
-
log segments.inspect
|
33
|
-
log "^^^^^^^^^^^"
|
34
|
-
else
|
35
|
-
p "No segements found - #{@time}"
|
36
|
-
log "~~~~~~~~~~~"
|
37
|
-
log "No segments!"
|
38
|
-
log "~~~~~~~~~~~"
|
39
|
-
end
|
40
|
-
end
|
50
|
+
if @errors.empty?
|
51
|
+
true
|
52
|
+
else
|
53
|
+
false
|
54
|
+
end
|
55
|
+
end
|
41
56
|
|
42
|
-
|
57
|
+
# Public: Checks if playlists' segments are aligned.
|
58
|
+
#
|
59
|
+
#
|
60
|
+
# Examples
|
61
|
+
#
|
62
|
+
# aligned?
|
63
|
+
# # => true
|
64
|
+
#
|
65
|
+
# Returns Boolean.
|
66
|
+
def aligned?
|
67
|
+
last_segments.uniq.size == 1
|
68
|
+
end
|
43
69
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
70
|
+
# Public: playlist getter.
|
71
|
+
#
|
72
|
+
#
|
73
|
+
# Examples
|
74
|
+
#
|
75
|
+
# playlists
|
76
|
+
# # => [#<HLSpider::Playlist:0x10ca9bef8>, #<HLSpider::Playlist:0x10ca9bef9>]
|
77
|
+
#
|
78
|
+
# Returns Array of Playlists
|
79
|
+
def playlists
|
80
|
+
crawl if @playlists.nil?
|
81
|
+
@playlists
|
82
|
+
end
|
57
83
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
84
|
+
# Public: Get Array of last segments across playlists.
|
85
|
+
#
|
86
|
+
#
|
87
|
+
# Examples
|
88
|
+
#
|
89
|
+
# last_segments
|
90
|
+
# # => ['video_05.ts', 'video_05.ts', 'video_05.ts']
|
91
|
+
#
|
92
|
+
# Returns Array of Strings
|
93
|
+
def last_segments
|
94
|
+
playlists.collect { |p| p.segments.last }
|
62
95
|
end
|
63
96
|
|
97
|
+
private
|
98
|
+
|
99
|
+
include AsyncDownload
|
100
|
+
|
101
|
+
# Internal: Download playlists from Array urls.
|
102
|
+
#
|
103
|
+
#
|
104
|
+
# Examples
|
105
|
+
#
|
106
|
+
# dive(["http://host.tld/video1/playlist_1.m3u8", "http://host.tld/video1/playlist_2.m3u8"])
|
107
|
+
# # => [#<HLSpider::Playlist:0x10ca9bef8>, #<HLSpider::Playlist:0x10ca9bef9>]
|
108
|
+
#
|
109
|
+
# Returns Array of Playlists.
|
110
|
+
# Raises Error if invalid playlists were downloaded or there was trouble downloading them.
|
64
111
|
def dive(urls = [])
|
65
112
|
playlists = []
|
66
113
|
|
67
|
-
|
68
|
-
|
69
|
-
p = Playlist.new(
|
70
|
-
|
114
|
+
responses = async_download(urls)
|
115
|
+
responses.each do |resp|
|
116
|
+
p = Playlist.new(resp.response, resp.req.uri.to_s)
|
117
|
+
|
71
118
|
if p.valid?
|
72
119
|
if p.variable_playlist?
|
73
|
-
playlists
|
120
|
+
playlists << dive(p.playlists)
|
74
121
|
else
|
75
122
|
playlists << p
|
76
123
|
end
|
124
|
+
else
|
125
|
+
@invalid_playlists << p
|
77
126
|
end
|
78
127
|
end
|
79
128
|
|
80
|
-
return playlists
|
81
|
-
end
|
82
|
-
|
83
|
-
def log(str)
|
84
|
-
#eval "@log.#{type} \"#{str}\"" if @log
|
85
|
-
@log.info str if @log
|
86
|
-
end
|
87
|
-
|
88
|
-
def put_and_log(str)
|
89
|
-
log(str)
|
90
|
-
puts str
|
91
|
-
end
|
129
|
+
return playlists.flatten
|
130
|
+
end
|
92
131
|
end
|
93
132
|
end
|
data/lib/hlspider/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
module
|
2
|
-
VERSION = "0.
|
1
|
+
module HLSpider
|
2
|
+
VERSION = "0.2.0"
|
3
3
|
end
|
data/lib/hlspider.rb
CHANGED
@@ -1,23 +1,13 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
$:.push File.dirname(__FILE__)
|
2
|
+
|
3
|
+
require 'hlspider/version'
|
4
|
+
require 'hlspider/async_download'
|
5
|
+
require 'hlspider/playlist_line'
|
6
|
+
require 'hlspider/playlist'
|
7
|
+
require 'hlspider/spider'
|
3
8
|
|
4
9
|
module HLSpider
|
5
|
-
|
6
|
-
|
7
|
-
desc "crawl", "Crawl the specified playlists and make sure their segments align"
|
8
|
-
method_option :playlists, :type => :array, :default => [], :required => true
|
9
|
-
method_option :sleep, :type => :numeric, :default => 5
|
10
|
-
method_option :log, :type => :string, :default => ""
|
11
|
-
def crawl
|
12
|
-
while true
|
13
|
-
Spider.new(options[:playlists], options[:log]).crawl
|
14
|
-
sleep options[:sleep]
|
15
|
-
end
|
16
|
-
end
|
10
|
+
def self.new(*args)
|
11
|
+
HLSpider::Spider.new(*args)
|
17
12
|
end
|
18
|
-
end
|
19
|
-
|
20
|
-
path = File.expand_path(File.dirname(__FILE__))
|
21
|
-
['spider', 'playlist'].each do |file|
|
22
|
-
require File.join(path, 'hlspider', file)
|
23
13
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe HLSpider::PlaylistLine do
|
4
|
+
before do
|
5
|
+
class PlaylistLine; extend HLSpider::PlaylistLine; end
|
6
|
+
|
7
|
+
@segment_line = "http://host.tld/video1/video_123123023030.ts?session=12391239"
|
8
|
+
@playlist_line = "http://host.told/video1/playlist_123213.m3u8"
|
9
|
+
@duration_line = "EXT-X-TARGETDURATION:55"
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "#has_segment?" do
|
13
|
+
it "returns true on String with video segment" do
|
14
|
+
PlaylistLine.has_segment?(@segment_line).must_equal(true)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "returns false on String without video segment" do
|
18
|
+
PlaylistLine.has_segment?(@playlist_line).must_equal(false)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "#has_playlist?" do
|
23
|
+
it "returns true on String with playlist" do
|
24
|
+
PlaylistLine.has_playlist?(@playlist_line).must_equal(true)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "returns false on String without playlist" do
|
28
|
+
PlaylistLine.has_playlist?(@segment_line).must_equal(false)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "#duration_line?" do
|
33
|
+
it "returns true on String with playlist duration" do
|
34
|
+
PlaylistLine.duration_line?(@duration_line).must_equal(true)
|
35
|
+
end
|
36
|
+
|
37
|
+
it "returns false on String without playlist duration" do
|
38
|
+
PlaylistLine.duration_line?(@playlist_line).must_equal(false)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "#parse_duration" do
|
43
|
+
it "returns Integer duration on String with duration" do
|
44
|
+
PlaylistLine.parse_duration(@duration_line).must_equal(55)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "returns nil on String without duration" do
|
48
|
+
PlaylistLine.parse_duration(@segment_line).must_equal(nil)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "#filename" do
|
53
|
+
it "returns String with filename on String with filename" do
|
54
|
+
PlaylistLine.filename(@segment_line).must_equal("video_123123023030.ts")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe HLSpider::Spider do
|
4
|
+
before do
|
5
|
+
@playlist = "http://host.com/playlist.m3u8"
|
6
|
+
@playlists = ["http://host.com/playlist.m3u8", "http://host.com/playlist2.m3u8"]
|
7
|
+
end
|
8
|
+
|
9
|
+
it "can be created with a String" do
|
10
|
+
HLSpider::Spider.new(@playlist).must_be_instance_of(HLSpider::Spider)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "can be created with an Array" do
|
14
|
+
HLSpider::Spider.new(@playlists).must_be_instance_of(HLSpider::Spider)
|
15
|
+
end
|
16
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
CHANGED
@@ -1,104 +1,137 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: hlspider
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
8
|
-
-
|
12
|
+
authors:
|
13
|
+
- brookemckim
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
17
|
+
|
18
|
+
date: 2011-11-17 00:00:00 -05:00
|
13
19
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
17
|
-
requirement: &2152994560 !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
|
-
requirements:
|
20
|
-
- - ! '>='
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: 0.14.0
|
23
|
-
type: :runtime
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: eventmachine
|
24
23
|
prerelease: false
|
25
|
-
|
26
|
-
- !ruby/object:Gem::Dependency
|
27
|
-
name: em-synchrony
|
28
|
-
requirement: &2152993020 !ruby/object:Gem::Requirement
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
29
25
|
none: false
|
30
|
-
requirements:
|
31
|
-
- -
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 62196363
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 0
|
33
|
+
- 0
|
34
|
+
- beta
|
35
|
+
- 4
|
36
|
+
version: 1.0.0.beta.4
|
34
37
|
type: :runtime
|
38
|
+
version_requirements: *id001
|
39
|
+
- !ruby/object:Gem::Dependency
|
40
|
+
name: em-http-request
|
35
41
|
prerelease: false
|
36
|
-
|
37
|
-
- !ruby/object:Gem::Dependency
|
38
|
-
name: eventmachine
|
39
|
-
requirement: &2152983760 !ruby/object:Gem::Requirement
|
42
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
43
|
none: false
|
41
|
-
requirements:
|
42
|
-
- -
|
43
|
-
- !ruby/object:Gem::Version
|
44
|
-
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
hash: 23
|
48
|
+
segments:
|
49
|
+
- 1
|
50
|
+
- 0
|
51
|
+
- 0
|
52
|
+
version: 1.0.0
|
45
53
|
type: :runtime
|
54
|
+
version_requirements: *id002
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
46
57
|
prerelease: false
|
47
|
-
|
48
|
-
- !ruby/object:Gem::Dependency
|
49
|
-
name: em-http-request
|
50
|
-
requirement: &2152981200 !ruby/object:Gem::Requirement
|
58
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
51
59
|
none: false
|
52
|
-
requirements:
|
53
|
-
- -
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
60
|
+
requirements:
|
61
|
+
- - ~>
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
hash: 19
|
64
|
+
segments:
|
65
|
+
- 2
|
66
|
+
- 7
|
67
|
+
- 0
|
68
|
+
version: 2.7.0
|
69
|
+
type: :development
|
70
|
+
version_requirements: *id003
|
71
|
+
description: Asynchronously downloads .m3u8 playlists and reports back on whether or not the playlists are aligned in time.
|
72
|
+
email:
|
73
|
+
- brooke.mckim@gmail.com
|
74
|
+
executables:
|
64
75
|
- hlspider
|
65
76
|
extensions: []
|
77
|
+
|
66
78
|
extra_rdoc_files: []
|
67
|
-
|
79
|
+
|
80
|
+
files:
|
68
81
|
- .gitignore
|
82
|
+
- .rbenv-version
|
69
83
|
- Gemfile
|
84
|
+
- Gemfile.lock
|
70
85
|
- README.md
|
71
86
|
- Rakefile
|
72
87
|
- bin/hlspider
|
73
88
|
- hlspider.gemspec
|
74
89
|
- lib/hlspider.rb
|
75
|
-
- lib/hlspider/
|
90
|
+
- lib/hlspider/async_download.rb
|
76
91
|
- lib/hlspider/playlist.rb
|
92
|
+
- lib/hlspider/playlist_line.rb
|
77
93
|
- lib/hlspider/spider.rb
|
78
94
|
- lib/hlspider/version.rb
|
95
|
+
- spec/hlspider/playlist_line_spec.rb
|
96
|
+
- spec/hlspider/spider_spec.rb
|
97
|
+
- spec/hlspider_spec.rb
|
98
|
+
- spec/spec_helper.rb
|
79
99
|
has_rdoc: true
|
80
|
-
homepage: http://github.com/
|
100
|
+
homepage: http://www.github.com/brookemckim/hlspider
|
81
101
|
licenses: []
|
102
|
+
|
82
103
|
post_install_message:
|
83
104
|
rdoc_options: []
|
84
|
-
|
105
|
+
|
106
|
+
require_paths:
|
85
107
|
- lib
|
86
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
109
|
none: false
|
88
|
-
requirements:
|
89
|
-
- -
|
90
|
-
- !ruby/object:Gem::Version
|
91
|
-
|
92
|
-
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
hash: 3
|
114
|
+
segments:
|
115
|
+
- 0
|
116
|
+
version: "0"
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
118
|
none: false
|
94
|
-
requirements:
|
95
|
-
- -
|
96
|
-
- !ruby/object:Gem::Version
|
97
|
-
|
119
|
+
requirements:
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
hash: 3
|
123
|
+
segments:
|
124
|
+
- 0
|
125
|
+
version: "0"
|
98
126
|
requirements: []
|
127
|
+
|
99
128
|
rubyforge_project: hlspider
|
100
129
|
rubygems_version: 1.6.2
|
101
130
|
signing_key:
|
102
131
|
specification_version: 3
|
103
|
-
summary:
|
104
|
-
test_files:
|
132
|
+
summary: Asynchronously download and parse .m3u8 playlists.
|
133
|
+
test_files:
|
134
|
+
- spec/hlspider/playlist_line_spec.rb
|
135
|
+
- spec/hlspider/spider_spec.rb
|
136
|
+
- spec/hlspider_spec.rb
|
137
|
+
- spec/spec_helper.rb
|
data/lib/hlspider/line.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
module HLSpider
|
2
|
-
module Line
|
3
|
-
def strip_query_params(line)
|
4
|
-
line.sub(/[?].{1,}/, '')
|
5
|
-
end
|
6
|
-
|
7
|
-
def get_filename(line)
|
8
|
-
line.slice(/\w{1,}(.ts)/)
|
9
|
-
end
|
10
|
-
|
11
|
-
def strip_all_but_file(line)
|
12
|
-
line.slice(/\w{1,}(.ts)/)
|
13
|
-
end
|
14
|
-
|
15
|
-
def strip_file(line)
|
16
|
-
line.sub(/\/\w{1,}(.)\w{1,}$/, "")
|
17
|
-
end
|
18
|
-
|
19
|
-
def relative_path?(line)
|
20
|
-
true if !line.match('http://')
|
21
|
-
end
|
22
|
-
|
23
|
-
def absolute_path?(line)
|
24
|
-
true if line.match('http://')
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|