hlspider 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/hlspider +9 -15
- data/lib/hlspider/{async_download.rb → downloader.rb} +8 -6
- data/lib/hlspider/playlist_line.rb +10 -14
- data/lib/hlspider/spider.rb +15 -26
- data/lib/hlspider/version.rb +1 -1
- data/lib/hlspider.rb +1 -1
- metadata +5 -5
data/bin/hlspider
CHANGED
@@ -1,6 +1,4 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
require 'optparse'
|
3
|
-
|
4
2
|
begin
|
5
3
|
require 'hlspider'
|
6
4
|
rescue LoadError
|
@@ -8,6 +6,8 @@ rescue LoadError
|
|
8
6
|
require 'hlspider'
|
9
7
|
end
|
10
8
|
|
9
|
+
require 'optparse'
|
10
|
+
|
11
11
|
options = {}
|
12
12
|
|
13
13
|
opts_parser = OptionParser.new do |opts|
|
@@ -44,20 +44,14 @@ else
|
|
44
44
|
end
|
45
45
|
|
46
46
|
while x <= options[:loop] do
|
47
|
-
|
48
|
-
if spider.aligned?
|
49
|
-
puts "--- Aligned at segment : #{spider.last_segments[0]} ---"
|
50
|
-
else
|
51
|
-
puts "--- Unaligned with segments : #{spider.last_segments.join(', ')} ---"
|
52
|
-
end
|
53
|
-
else
|
54
|
-
@errors.each do |err|
|
55
|
-
puts "--- #{err} ---"
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
break if @errors
|
47
|
+
spider.crawl!
|
60
48
|
|
49
|
+
if spider.aligned?
|
50
|
+
puts "--- Aligned at segment : #{spider.last_segments[0]} ---"
|
51
|
+
else
|
52
|
+
puts "--- Unaligned with segments : #{spider.last_segments.join(', ')} ---"
|
53
|
+
end
|
54
|
+
|
61
55
|
x += 1 unless options[:loop] == 0
|
62
56
|
|
63
57
|
sleep(options[:sleep])
|
@@ -3,7 +3,9 @@ require 'eventmachine'
|
|
3
3
|
require 'em-http-request'
|
4
4
|
|
5
5
|
module HLSpider
|
6
|
-
module
|
6
|
+
module Downloader
|
7
|
+
class ConnectionError < StandardError; end;
|
8
|
+
|
7
9
|
# Internal: Asynchronosly download given URLs.
|
8
10
|
#
|
9
11
|
# urls - An Array of strings or a single string of URL(s)
|
@@ -17,9 +19,10 @@ module HLSpider
|
|
17
19
|
# # =>
|
18
20
|
#
|
19
21
|
# Returns the Array of responses.
|
20
|
-
# Raises
|
21
|
-
|
22
|
-
|
22
|
+
# Raises HLSpider::Downloader::ConnectionError if there was a problem
|
23
|
+
# downloading all urls.
|
24
|
+
def download(urls)
|
25
|
+
urls = Array(urls)
|
23
26
|
|
24
27
|
responses = nil
|
25
28
|
EventMachine.run {
|
@@ -40,8 +43,7 @@ module HLSpider
|
|
40
43
|
if responses[:callback].size == urls.size
|
41
44
|
responses[:callback].collect { |k,v| v }
|
42
45
|
else
|
43
|
-
|
44
|
-
#raise ConnectError
|
46
|
+
raise ConnectionError, "No all urls returned responses."
|
45
47
|
end
|
46
48
|
end
|
47
49
|
end
|
@@ -15,7 +15,7 @@ module HLSpider
|
|
15
15
|
#
|
16
16
|
# Returns Boolean.
|
17
17
|
def has_segment?(str)
|
18
|
-
|
18
|
+
!!( str[/.*.ts(\z|\?|$)/] )
|
19
19
|
end
|
20
20
|
|
21
21
|
# Internal: Checks if String str contains links to .m3u8 file extensions.
|
@@ -32,7 +32,7 @@ module HLSpider
|
|
32
32
|
#
|
33
33
|
# Returns Boolean.
|
34
34
|
def has_playlist?(str)
|
35
|
-
|
35
|
+
!!( str[/.m3u8/] )
|
36
36
|
end
|
37
37
|
|
38
38
|
# Internal: Checks if String str contains 'EXT-X-TARGETDURATION'.
|
@@ -49,7 +49,7 @@ module HLSpider
|
|
49
49
|
#
|
50
50
|
# Returns Boolean.
|
51
51
|
def duration_line?(str)
|
52
|
-
|
52
|
+
!!( str[/EXT-X-TARGETDURATION/] )
|
53
53
|
end
|
54
54
|
|
55
55
|
# Internal: Parses Integer target duration out of String str
|
@@ -66,10 +66,8 @@ module HLSpider
|
|
66
66
|
#
|
67
67
|
# Returns Integer or nil.
|
68
68
|
def parse_duration(str)
|
69
|
-
/EXT-X-TARGETDURATION:(\d*)\z/.match(str)
|
70
|
-
|
71
|
-
if dur = Regexp.last_match(1)
|
72
|
-
dur.to_i
|
69
|
+
if dur = /EXT-X-TARGETDURATION:(\d*)\z/.match(str)
|
70
|
+
dur[1].to_i
|
73
71
|
else
|
74
72
|
nil
|
75
73
|
end
|
@@ -104,9 +102,9 @@ module HLSpider
|
|
104
102
|
# absolute_url?("http://www.site.tld/file.m3u8")
|
105
103
|
# #=> true
|
106
104
|
#
|
107
|
-
# Returns
|
105
|
+
# Returns Boolean.
|
108
106
|
def absolute_url?(str)
|
109
|
-
str
|
107
|
+
!!( str[/\Ahtt(ps|p)\:\/\//] )
|
110
108
|
end
|
111
109
|
|
112
110
|
# Internal: Parses string and returns whether or not it is a media sequence line.
|
@@ -123,7 +121,7 @@ module HLSpider
|
|
123
121
|
#
|
124
122
|
# Returns Boolean.
|
125
123
|
def media_sequence_line?(str)
|
126
|
-
|
124
|
+
!!( str[/EXT-X-MEDIA-SEQUENCE/] )
|
127
125
|
end
|
128
126
|
|
129
127
|
# Internal: Parses string and returns media sequence number.
|
@@ -137,10 +135,8 @@ module HLSpider
|
|
137
135
|
#
|
138
136
|
# Returns Integer or nil.
|
139
137
|
def parse_sequence(line)
|
140
|
-
/#EXT-X-MEDIA-SEQUENCE:\s*(\d*)/.match(line)
|
141
|
-
|
142
|
-
if sequence = Regexp.last_match(1)
|
143
|
-
sequence.to_i
|
138
|
+
if sequence = /#EXT-X-MEDIA-SEQUENCE:\s*(\d*)/.match(line)
|
139
|
+
sequence[1].to_i
|
144
140
|
else
|
145
141
|
nil
|
146
142
|
end
|
data/lib/hlspider/spider.rb
CHANGED
@@ -15,24 +15,20 @@ require 'em-http-request'
|
|
15
15
|
|
16
16
|
module HLSpider
|
17
17
|
class Spider
|
18
|
+
class InvalidPlaylist < StandardError; end;
|
19
|
+
|
18
20
|
# Public: Gets Array of urls.
|
19
21
|
attr_reader :urls
|
22
|
+
|
20
23
|
# Public: Gets Array of valid playlists.
|
21
24
|
attr_reader :playlists
|
22
|
-
|
23
|
-
attr_reader :invalid_playlists
|
24
|
-
# Public: Gets Array of errors.
|
25
|
-
attr_reader :errors
|
26
|
-
|
25
|
+
|
27
26
|
# Public: Initialize a Playlist Spider.
|
28
27
|
#
|
29
28
|
# urls - An Array containing multiple String urls to playlist files.
|
30
29
|
# Also accepts single String url that points to parent playlist.
|
31
30
|
def initialize(urls)
|
32
|
-
@urls
|
33
|
-
|
34
|
-
@invalid_playlists = []
|
35
|
-
@errors = []
|
31
|
+
@urls = Array(urls)
|
36
32
|
end
|
37
33
|
|
38
34
|
# Public: Starts the download of Array urls
|
@@ -41,17 +37,11 @@ module HLSpider
|
|
41
37
|
# Examples
|
42
38
|
#
|
43
39
|
# crawl
|
44
|
-
# # =>
|
40
|
+
# # => [#<HLSpider::Playlist:0x10ca9bef8>, #<HLSpider::Playlist:0x10ca9bef9>]
|
45
41
|
#
|
46
|
-
# Returns
|
47
|
-
def crawl
|
48
|
-
|
49
|
-
|
50
|
-
if @errors.empty?
|
51
|
-
true
|
52
|
-
else
|
53
|
-
false
|
54
|
-
end
|
42
|
+
# Returns Array of Playlists
|
43
|
+
def crawl!
|
44
|
+
self.playlists = dive(@urls)
|
55
45
|
end
|
56
46
|
|
57
47
|
# Public: Checks if playlists' segments are aligned.
|
@@ -77,8 +67,7 @@ module HLSpider
|
|
77
67
|
#
|
78
68
|
# Returns Array of Playlists
|
79
69
|
def playlists
|
80
|
-
|
81
|
-
@playlists
|
70
|
+
@playlists ||= crawl!
|
82
71
|
end
|
83
72
|
|
84
73
|
# Public: Get Array of last segments across playlists.
|
@@ -96,7 +85,7 @@ module HLSpider
|
|
96
85
|
|
97
86
|
private
|
98
87
|
|
99
|
-
include
|
88
|
+
include Downloader
|
100
89
|
|
101
90
|
# Internal: Download playlists from Array urls.
|
102
91
|
#
|
@@ -107,11 +96,11 @@ module HLSpider
|
|
107
96
|
# # => [#<HLSpider::Playlist:0x10ca9bef8>, #<HLSpider::Playlist:0x10ca9bef9>]
|
108
97
|
#
|
109
98
|
# Returns Array of Playlists.
|
110
|
-
# Raises
|
99
|
+
# Raises HLSpider::Spider::InvalidPlaylist if an invalid playlist is downloaded.
|
111
100
|
def dive(urls = [])
|
112
101
|
playlists = []
|
113
102
|
|
114
|
-
responses =
|
103
|
+
responses = download(urls)
|
115
104
|
responses.each do |resp|
|
116
105
|
p = Playlist.new(resp.response, resp.req.uri.to_s)
|
117
106
|
|
@@ -122,11 +111,11 @@ module HLSpider
|
|
122
111
|
playlists << p
|
123
112
|
end
|
124
113
|
else
|
125
|
-
|
114
|
+
raise InvalidPlaylist, "#{p.source} was an invalid playlist."
|
126
115
|
end
|
127
116
|
end
|
128
117
|
|
129
|
-
|
118
|
+
playlists.flatten
|
130
119
|
end
|
131
120
|
end
|
132
121
|
end
|
data/lib/hlspider/version.rb
CHANGED
data/lib/hlspider.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hlspider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 2
|
10
|
+
version: 0.3.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- brookemckim
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-07-06 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: eventmachine
|
@@ -86,7 +86,7 @@ files:
|
|
86
86
|
- bin/hlspider
|
87
87
|
- hlspider.gemspec
|
88
88
|
- lib/hlspider.rb
|
89
|
-
- lib/hlspider/
|
89
|
+
- lib/hlspider/downloader.rb
|
90
90
|
- lib/hlspider/playlist.rb
|
91
91
|
- lib/hlspider/playlist_line.rb
|
92
92
|
- lib/hlspider/spider.rb
|