rubyretriever 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/rr +41 -41
- data/lib/retriever/cli.rb +20 -26
- data/lib/retriever/fetch.rb +209 -186
- data/lib/retriever/fetchfiles.rb +65 -60
- data/lib/retriever/fetchseo.rb +20 -18
- data/lib/retriever/fetchsitemap.rb +37 -32
- data/lib/retriever/link.rb +6 -2
- data/lib/retriever/openuri-redirect-patch.rb +3 -2
- data/lib/retriever/page.rb +20 -21
- data/lib/retriever/target.rb +22 -30
- data/lib/retriever/version.rb +1 -1
- data/lib/retriever.rb +2 -2
- data/readme.md +5 -4
- data/spec/page_spec.rb +6 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 02c2b2530b3b83eb12325443c3c9214d977d8a56
|
4
|
+
data.tar.gz: f1a8b163ae3c3caed750eacb7724c2c38693ccd2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 68c56e76fff7cee17b2e48251413df199cbe46df84bb8b51f0333510d9507627f59714bd7c4fb4a502796cefa76d362b9dbe912ea1a4d419b356f0de403e606a
|
7
|
+
data.tar.gz: 2f149643ba65999c783bf17bbcbffc92648eec3979f610df2fdd101c55a5a58607c2087f47911d65457d41567f5f574b79ad0a25cfac82e61c7255c1170e3e64
|
data/bin/rr
CHANGED
@@ -4,73 +4,73 @@ require 'retriever'
|
|
4
4
|
require 'optparse'
|
5
5
|
|
6
6
|
options = {}
|
7
|
-
optparse = OptionParser.new do|opts|
|
7
|
+
optparse = OptionParser.new do |opts|
|
8
8
|
# Set a banner, displayed at the top
|
9
9
|
# of the help screen.
|
10
|
-
opts.banner =
|
11
|
-
|
12
|
-
opts.on(
|
13
|
-
options[
|
10
|
+
opts.banner = 'Usage: rr [MODE FLAG] [options] Target_URL'
|
11
|
+
options['sitemap'] = false
|
12
|
+
opts.on('-s', '--sitemap [FORMAT]', 'MODE FLAG: Sitemap mode') do |output_type|
|
13
|
+
options['sitemap'] = output_type || ''
|
14
14
|
end
|
15
|
-
|
16
|
-
opts.on(
|
17
|
-
options[
|
15
|
+
options['fileharvest'] = false
|
16
|
+
opts.on('-f', '--files FILETYPE', 'MODE FLAG: Fileharvest mode') do |file_ext|
|
17
|
+
options['fileharvest'] = file_ext
|
18
18
|
end
|
19
|
-
options[
|
20
|
-
opts.on(
|
21
|
-
options[
|
19
|
+
options['seo'] = false
|
20
|
+
opts.on('-e', '--seo', 'MODE FLAG: SEO mode') do
|
21
|
+
options['seo'] = true
|
22
22
|
end
|
23
|
-
|
24
|
-
opts.on(
|
25
|
-
options[
|
23
|
+
options['filename'] = nil
|
24
|
+
opts.on('-o', '--out FILENAME', 'Dump output to selected filename') do |filename|
|
25
|
+
options['filename'] = filename
|
26
26
|
end
|
27
27
|
# Define the options, and what they do
|
28
|
-
options[
|
29
|
-
opts.on(
|
30
|
-
options[
|
28
|
+
options['verbose'] = false
|
29
|
+
opts.on('-v', '--verbose', 'Output more information') do
|
30
|
+
options['verbose'] = true
|
31
31
|
end
|
32
|
-
options[
|
33
|
-
opts.on(
|
34
|
-
options[
|
32
|
+
options['progress'] = false
|
33
|
+
opts.on('-p', '--progress', 'Output progress bar') do
|
34
|
+
options['progress'] = true
|
35
35
|
end
|
36
|
-
|
37
|
-
opts.on(
|
36
|
+
options['maxpages'] = false
|
37
|
+
opts.on('-l', '--limit PAGE_LIMIT_#', 'set a max on the total number of crawled pages') do |maxpages|
|
38
38
|
options[:maxpages] = maxpages
|
39
39
|
end
|
40
|
-
options[
|
41
|
-
opts.on(
|
40
|
+
options['autodown'] = false
|
41
|
+
opts.on('-a', '--auto', 'Automatically download all files of filetype located') do
|
42
42
|
options[:autodown] = true
|
43
43
|
end
|
44
44
|
# This displays the help screen, all programs are
|
45
45
|
# assumed to have this option.
|
46
|
-
opts.on(
|
46
|
+
opts.on('-h', '--help', 'Display this screen') do
|
47
47
|
puts opts
|
48
48
|
exit
|
49
49
|
end
|
50
50
|
end
|
51
|
-
|
52
|
-
|
51
|
+
|
52
|
+
optparse.parse!
|
53
53
|
if ARGV[0].nil?
|
54
|
-
|
54
|
+
abort('###Missing Required Argument\nUsage: rr [mode] [options] Target_URL')
|
55
55
|
end
|
56
56
|
|
57
57
|
ARGV.each do|q|
|
58
58
|
if options[:verbose]
|
59
|
-
puts
|
60
|
-
puts
|
61
|
-
puts
|
62
|
-
puts "### Outputting in format: #{options[
|
63
|
-
puts
|
64
|
-
puts "### Searching for file extension: #{options[
|
65
|
-
puts
|
66
|
-
puts "### Writing output to filename: #{options[
|
67
|
-
puts
|
68
|
-
puts "### Stopping after #{options[
|
59
|
+
puts '###############################'
|
60
|
+
puts '### [RubyRetriever]'
|
61
|
+
puts '### Creating Sitemap' if options['sitemap']
|
62
|
+
puts "### Outputting in format: #{options['sitemap']}" if options['sitemap']
|
63
|
+
puts '### Performing File Harvest' if options['fileharvest']
|
64
|
+
puts "### Searching for file extension: #{options['fileharvest']} pages" if options['fileharvest']
|
65
|
+
puts '### Performing SEO Scrape' if options['seo']
|
66
|
+
puts "### Writing output to filename: #{options['filename']}" if options['filename']
|
67
|
+
puts '### Being verbose'
|
68
|
+
puts "### Stopping after #{options['maxpages']} pages"
|
69
69
|
end
|
70
|
-
puts
|
70
|
+
puts '###############################'
|
71
71
|
puts "### [RubyRetriever] go fetch #{q}"
|
72
72
|
Retriever::CLI.new(q, options)
|
73
|
-
puts
|
74
|
-
puts
|
73
|
+
puts '### [RubyRetriever] is done.'
|
74
|
+
puts '###############################'
|
75
75
|
puts
|
76
76
|
end
|
data/lib/retriever/cli.rb
CHANGED
@@ -1,27 +1,21 @@
|
|
1
1
|
module Retriever
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
#sitemap only
|
24
|
-
@fetch.gen_xml if /XML/i =~ options[:sitemap].to_s
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
2
|
+
#
|
3
|
+
class CLI
|
4
|
+
def initialize(url, options)
|
5
|
+
# kick off the fetch mode of choice
|
6
|
+
if options['fileharvest']
|
7
|
+
@fetch = Retriever::FetchFiles.new(url, options)
|
8
|
+
elsif options['sitemap']
|
9
|
+
@fetch = Retriever::FetchSitemap.new(url, options)
|
10
|
+
elsif options['seo']
|
11
|
+
@fetch = Retriever::FetchSEO.new(url, options)
|
12
|
+
else
|
13
|
+
fail '### Error: No Mode Selected'
|
14
|
+
end
|
15
|
+
@fetch.dump
|
16
|
+
@fetch.write if options['filename']
|
17
|
+
@fetch.autodownload if options['autodown'] && options['fileharvest']
|
18
|
+
@fetch.gen_xml if /XML/i =~ options['sitemap'].to_s
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/retriever/fetch.rb
CHANGED
@@ -7,191 +7,214 @@ require 'csv'
|
|
7
7
|
require 'bloomfilter-rb'
|
8
8
|
|
9
9
|
module Retriever
|
10
|
-
|
11
|
-
|
10
|
+
#
|
11
|
+
class Fetch
|
12
|
+
attr_reader :max_pages, :t
|
13
|
+
# given target URL and RR options, creates a fetch object.
|
14
|
+
# There is no direct output
|
15
|
+
# this is a parent class that the other fetch classes build off of.
|
16
|
+
def initialize(url, options)
|
17
|
+
@connection_tally = {
|
18
|
+
:success => 0,
|
19
|
+
:error => 0,
|
20
|
+
:error_client => 0,
|
21
|
+
:error_server => 0
|
22
|
+
}
|
23
|
+
# OPTIONS
|
24
|
+
@prgrss = options['progress']
|
25
|
+
@max_pages = options['maxpages'] ? options['maxpages'].to_i : 100
|
26
|
+
@v = options['verbose']
|
27
|
+
@output = options['filename']
|
28
|
+
@fh = options['fileharvest']
|
29
|
+
@file_ext = @fh.to_s
|
30
|
+
@s = options['sitemap']
|
31
|
+
@seo = options['seo']
|
32
|
+
@autodown = options['autodown']
|
33
|
+
#
|
34
|
+
if @fh
|
35
|
+
temp_ext_str = '.' + @file_ext + '\z'
|
36
|
+
@file_re = Regexp.new(temp_ext_str).freeze
|
37
|
+
else
|
38
|
+
# when FH is not true, and autodown is true
|
39
|
+
errlog('Cannot AUTODOWNLOAD when not in FILEHARVEST MODE') if @autodown
|
40
|
+
end
|
41
|
+
if @prgrss
|
42
|
+
# verbose & progressbar conflict
|
43
|
+
errlog('CANNOT RUN VERBOSE & PROGRESSBAR AT SAME TIME, CHOOSE ONE, -v or -p') if @v
|
44
|
+
prgress_vars = {
|
45
|
+
:title => 'Pages',
|
46
|
+
:starting_at => 1,
|
47
|
+
:total => @max_pages,
|
48
|
+
:format => '%a |%b>%i| %c/%C %t'
|
49
|
+
}
|
50
|
+
@progressbar = ProgressBar.create(prgress_vars)
|
51
|
+
end
|
52
|
+
@t = Retriever::Target.new(url, @file_re)
|
53
|
+
@output = "rr-#{@t.host.split('.')[1]}" if @fh && !@output
|
54
|
+
@already_crawled = BloomFilter::Native.new(
|
55
|
+
:size => 1_000_000,
|
56
|
+
:hashes => 5,
|
57
|
+
:seed => 1,
|
58
|
+
:bucket => 8,
|
59
|
+
:raise => false
|
60
|
+
)
|
61
|
+
@already_crawled.insert(@t.target)
|
62
|
+
end
|
12
63
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
:error => 0,
|
17
|
-
:error_client => 0,
|
18
|
-
:error_server => 0
|
19
|
-
}
|
20
|
-
#OPTIONS
|
21
|
-
@prgrss = options[:progress] ? options[:progress] : false
|
22
|
-
@maxPages = options[:maxpages] ? options[:maxpages].to_i : 100
|
23
|
-
@v= options[:verbose] ? true : false
|
24
|
-
@output=options[:filename] ? options[:filename] : false
|
25
|
-
@fh = options[:fileharvest] ? options[:fileharvest] : false
|
26
|
-
@file_ext = @fh.to_s
|
27
|
-
@s = options[:sitemap] ? options[:sitemap] : false
|
28
|
-
@seo = options[:seo] ? true : false
|
29
|
-
@autodown = options[:autodown] ? true : false
|
30
|
-
#
|
31
|
-
if @fh
|
32
|
-
tempExtStr = "."+@file_ext+'\z'
|
33
|
-
@file_re = Regexp.new(tempExtStr).freeze
|
34
|
-
else
|
35
|
-
errlog("Cannot AUTODOWNLOAD when not in FILEHARVEST MODE") if @autodown #when FH is not true, and autodown is true
|
36
|
-
end
|
37
|
-
if @prgrss
|
38
|
-
errlog("CANNOT RUN VERBOSE & PROGRESSBAR AT SAME TIME, CHOOSE ONE, -v or -p") if @v #verbose & progressbar conflict
|
39
|
-
prgressVars = {
|
40
|
-
:title => "Pages Crawled",
|
41
|
-
:starting_at => 1,
|
42
|
-
:total => @maxPages,
|
43
|
-
:format => '%a |%b>%i| %c/%C %t',
|
44
|
-
}
|
45
|
-
@progressbar = ProgressBar.create(prgressVars)
|
46
|
-
end
|
47
|
-
@t = Retriever::Target.new(url,@file_re)
|
48
|
-
@already_crawled = BloomFilter::Native.new(:size => 1000000, :hashes => 5, :seed => 1, :bucket => 8, :raise => false)
|
49
|
-
@already_crawled.insert(@t.target)
|
50
|
-
if (@fh && !@output)
|
51
|
-
@output = "rr-#{@t.host.split('.')[1]}"
|
52
|
-
end
|
53
|
-
fail "bad page source on target -- try HTTPS?" if !@t.source
|
54
|
-
end
|
55
|
-
def errlog(msg)
|
56
|
-
raise "ERROR: #{msg}"
|
57
|
-
end
|
58
|
-
def lg(msg)
|
59
|
-
puts "### #{msg}" if @v
|
60
|
-
end
|
61
|
-
def dump #prints current data collection to STDOUT, meant for CLI use.
|
62
|
-
puts "###############################"
|
63
|
-
if @v
|
64
|
-
puts "Connection Tally:"
|
65
|
-
puts @connection_tally.to_s
|
66
|
-
puts "###############################"
|
67
|
-
end
|
68
|
-
if @s
|
69
|
-
puts "#{@t.target} Sitemap"
|
70
|
-
puts "Page Count: #{@data.size}"
|
71
|
-
elsif @fh
|
72
|
-
puts "Target URL: #{@t.target}"
|
73
|
-
puts "Filetype: #{@file_ext}"
|
74
|
-
puts "File Count: #{@data.size}"
|
75
|
-
elsif @seo
|
76
|
-
puts "#{@t.target} SEO Metrics"
|
77
|
-
puts "Page Count: #{@data.size}"
|
78
|
-
else
|
79
|
-
fail "ERROR - Cannot dump - Mode Not Found"
|
80
|
-
end
|
81
|
-
puts "###############################"
|
82
|
-
@data.each do |line|
|
83
|
-
puts line
|
84
|
-
end
|
85
|
-
puts "###############################"
|
86
|
-
puts
|
87
|
-
end
|
88
|
-
def write #writes current data collection out to CSV in current directory
|
89
|
-
if @output
|
90
|
-
i = 0
|
91
|
-
CSV.open("#{@output}.csv", "w") do |csv|
|
92
|
-
if ((i == 0) && @seo)
|
93
|
-
csv << ['URL','Page Title','Meta Description','H1','H2']
|
94
|
-
i +=1
|
95
|
-
end
|
96
|
-
@data.each do |entry|
|
97
|
-
csv << entry
|
98
|
-
end
|
99
|
-
end
|
100
|
-
puts "###############################"
|
101
|
-
puts "File Created: #{@output}.csv"
|
102
|
-
puts "Object Count: #{@data.size}"
|
103
|
-
puts "###############################"
|
104
|
-
puts
|
105
|
-
end
|
106
|
-
end
|
107
|
-
def async_crawl_and_collect() #iterates over the excisting @linkStack, running asyncGetWave on it until we reach the @maxPages value.
|
108
|
-
while (@already_crawled.size < @maxPages)
|
109
|
-
if @linkStack.empty?
|
110
|
-
if @prgrss
|
111
|
-
@progressbar.log("Can't find any more links. Site might be completely mapped.")
|
112
|
-
else
|
113
|
-
lg("Can't find any more links. Site might be completely mapped.")
|
114
|
-
end
|
115
|
-
break;
|
116
|
-
end
|
117
|
-
new_links_arr = self.asyncGetWave()
|
118
|
-
next if (new_links_arr.nil? || new_links_arr.empty?)
|
119
|
-
new_link_arr = new_links_arr-@linkStack #set operations to see are these in our previous visited pages arr?
|
120
|
-
@linkStack.concat(new_links_arr).uniq!
|
121
|
-
@data.concat(new_links_arr) if @s
|
122
|
-
end
|
123
|
-
@progressbar.finish if @prgrss #if we are done, let's make sure progress bar says we are done
|
124
|
-
end
|
125
|
-
def good_response?(resp, url) #returns true is resp is ok to continue process, false is we need to 'next' it
|
126
|
-
return false if !resp
|
127
|
-
if resp.response_header.redirection? #we got redirected
|
128
|
-
loc = resp.response_header.location
|
129
|
-
lg("#{url} Redirected to #{loc}")
|
130
|
-
if t.host_re =~ loc #if being redirected to same host, let's add to linkstack
|
131
|
-
@linkStack.push(loc) if !@already_crawled.include?(loc) #but only if we haven't already crawled it
|
132
|
-
lg("--Added to linkStack for later")
|
133
|
-
return false
|
134
|
-
end
|
135
|
-
lg("Redirection outside of target host. No - go. #{loc}")
|
136
|
-
return false
|
137
|
-
end
|
138
|
-
if (!resp.response_header.successful?) #if webpage is not text/html, let's not continue and lets also make sure we dont re-queue it
|
139
|
-
lg("UNSUCCESSFUL CONNECTION -- #{url}")
|
140
|
-
@connection_tally[:error] += 1
|
141
|
-
@connection_tally[:error_server] += 1 if resp.response_header.server_error?
|
142
|
-
@connection_tally[:error_client] += 1 if resp.response_header.client_error?
|
143
|
-
return false
|
144
|
-
end
|
145
|
-
if (!(resp.response_header['CONTENT_TYPE'].include?("text/html"))) #if webpage is not text/html, let's not continue and lets also make sure we dont re-queue it
|
146
|
-
@already_crawled.insert(url)
|
147
|
-
@linkStack.delete(url)
|
148
|
-
lg("Page Not text/html -- #{url}")
|
149
|
-
return false
|
150
|
-
end
|
151
|
-
@connection_tally[:success] += 1
|
152
|
-
return true
|
153
|
-
end
|
64
|
+
def errlog(msg)
|
65
|
+
fail "ERROR: #{msg}"
|
66
|
+
end
|
154
67
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
68
|
+
def lg(msg)
|
69
|
+
puts "### #{msg}" if @v
|
70
|
+
end
|
71
|
+
|
72
|
+
# prints current data collection to STDOUT
|
73
|
+
def dump
|
74
|
+
puts '###############################'
|
75
|
+
if @v
|
76
|
+
puts 'Connection Tally:'
|
77
|
+
puts @connection_tally.to_s
|
78
|
+
puts '###############################'
|
79
|
+
end
|
80
|
+
if @s
|
81
|
+
puts "#{@t.target} Sitemap"
|
82
|
+
puts "Page Count: #{@data.size}"
|
83
|
+
elsif @fh
|
84
|
+
puts "Target URL: #{@t.target}"
|
85
|
+
puts "Filetype: #{@file_ext}"
|
86
|
+
puts "File Count: #{@data.size}"
|
87
|
+
elsif @seo
|
88
|
+
puts "#{@t.target} SEO Metrics"
|
89
|
+
puts "Page Count: #{@data.size}"
|
90
|
+
else
|
91
|
+
fail 'ERROR - Cannot dump - Mode Not Found'
|
92
|
+
end
|
93
|
+
puts '###############################'
|
94
|
+
@data.each do |line|
|
95
|
+
puts line
|
96
|
+
end
|
97
|
+
puts '###############################'
|
98
|
+
puts
|
99
|
+
end
|
100
|
+
|
101
|
+
# writes current data collection out to CSV in current directory
|
102
|
+
def write
|
103
|
+
return false unless @output
|
104
|
+
i = 0
|
105
|
+
CSV.open("#{@output}.csv", 'w') do |csv|
|
106
|
+
if (i == 0) && @seo
|
107
|
+
csv << ['URL', 'Page Title', 'Meta Description', 'H1', 'H2']
|
108
|
+
i += 1
|
109
|
+
end
|
110
|
+
@data.each do |entry|
|
111
|
+
csv << entry
|
112
|
+
end
|
113
|
+
end
|
114
|
+
puts '###############################'
|
115
|
+
puts "File Created: #{@output}.csv"
|
116
|
+
puts "Object Count: #{@data.size}"
|
117
|
+
puts '###############################'
|
118
|
+
puts
|
119
|
+
end
|
120
|
+
|
121
|
+
# iterates over the existing @link_stack
|
122
|
+
# running until we reach the @max_pages value.
|
123
|
+
def async_crawl_and_collect
|
124
|
+
while @already_crawled.size < @max_pages
|
125
|
+
if @link_stack.empty?
|
126
|
+
if @prgrss
|
127
|
+
@progressbar.log("Can't find any more links.")
|
128
|
+
else
|
129
|
+
lg("Can't find any more links.")
|
130
|
+
end
|
131
|
+
break
|
132
|
+
end
|
133
|
+
new_links_arr = process_link_stack
|
134
|
+
next if new_links_arr.nil? || new_links_arr.empty?
|
135
|
+
# set operations to see are these in our previous visited pages arr
|
136
|
+
new_links_arr -= @link_stack
|
137
|
+
@link_stack.concat(new_links_arr).uniq!
|
138
|
+
@data.concat(new_links_arr) if @s
|
139
|
+
end
|
140
|
+
# done, make sure progress bar says we are done
|
141
|
+
@progressbar.finish if @prgrss
|
142
|
+
end
|
143
|
+
|
144
|
+
# returns true is resp is ok to continue
|
145
|
+
def good_response?(resp, url)
|
146
|
+
return false unless resp
|
147
|
+
hdr = resp.response_header
|
148
|
+
if hdr.redirection?
|
149
|
+
loc = hdr.location
|
150
|
+
lg("#{url} Redirected to #{loc}")
|
151
|
+
if t.host_re =~ loc
|
152
|
+
@link_stack.push(loc) unless @already_crawled.include?(loc)
|
153
|
+
lg('--Added to linkStack for later')
|
154
|
+
return false
|
155
|
+
end
|
156
|
+
lg("Redirection outside of target host. No - go. #{loc}")
|
157
|
+
return false
|
158
|
+
end
|
159
|
+
# lets not continue if unsuccessful connection
|
160
|
+
unless hdr.successful?
|
161
|
+
lg("UNSUCCESSFUL CONNECTION -- #{url}")
|
162
|
+
|
163
|
+
@connection_tally[:error] += 1
|
164
|
+
@connection_tally[:error_server] += 1 if hdr.server_error?
|
165
|
+
@connection_tally[:error_client] += 1 if hdr.client_error?
|
166
|
+
return false
|
167
|
+
end
|
168
|
+
# let's not continue if not text/html
|
169
|
+
unless hdr['CONTENT_TYPE'].include?('text/html')
|
170
|
+
@already_crawled.insert(url)
|
171
|
+
@link_stack.delete(url)
|
172
|
+
lg("Page Not text/html -- #{url}")
|
173
|
+
return false
|
174
|
+
end
|
175
|
+
@connection_tally[:success] += 1
|
176
|
+
true
|
177
|
+
end
|
178
|
+
|
179
|
+
# send a new wave of GET requests, using current @link_stack
|
180
|
+
def process_link_stack
|
181
|
+
new_stuff = []
|
182
|
+
EM.synchrony do
|
183
|
+
concurrency = 10
|
184
|
+
EM::Synchrony::FiberIterator.new(@link_stack, concurrency).each do |url|
|
185
|
+
next if @already_crawled.size >= @max_pages
|
186
|
+
next if @already_crawled.include?(url)
|
187
|
+
|
188
|
+
resp = EventMachine::HttpRequest.new(url).get
|
189
|
+
|
190
|
+
next unless good_response?(resp, url)
|
191
|
+
lg("Page Fetched: #{url}")
|
192
|
+
@already_crawled.insert(url)
|
193
|
+
|
194
|
+
new_page = Retriever::Page.new(resp.response, @t)
|
195
|
+
if @prgrss
|
196
|
+
@progressbar.increment if @already_crawled.size < @max_pages
|
197
|
+
end
|
198
|
+
if @seo
|
199
|
+
seos = [url]
|
200
|
+
seos.concat(new_page.parse_seo)
|
201
|
+
@data.push(seos)
|
202
|
+
lg('--page SEO scraped')
|
203
|
+
end
|
204
|
+
next if new_page.links.size == 0
|
205
|
+
lg("--#{new_page.links.size} links found")
|
206
|
+
internal_links_arr = new_page.parse_internal_visitable
|
207
|
+
new_stuff.push(internal_links_arr)
|
208
|
+
if @fh
|
209
|
+
filez = new_page.parse_files
|
210
|
+
@data.concat(filez) unless filez.empty?
|
211
|
+
lg("--#{filez.size} files found")
|
212
|
+
end
|
213
|
+
end
|
214
|
+
new_stuff = new_stuff.flatten # all completed requests
|
215
|
+
EventMachine.stop
|
216
|
+
end
|
217
|
+
new_stuff.uniq!
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
data/lib/retriever/fetchfiles.rb
CHANGED
@@ -1,65 +1,70 @@
|
|
1
1
|
module Retriever
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
2
|
+
# recieves target url and RR options
|
3
|
+
# returns an array of all unique files (based on given filetype)
|
4
|
+
# found on the target site
|
5
|
+
class FetchFiles < Fetch
|
6
|
+
def initialize(url, options)
|
7
|
+
super
|
8
|
+
@data = []
|
9
|
+
page_one = Retriever::Page.new(@t.source, @t)
|
10
|
+
@link_stack = page_one.parse_internal_visitable
|
11
|
+
lg("URL Crawled: #{@t.target}")
|
12
|
+
lg("#{@link_stack.size - 1} new links found")
|
10
13
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
14
|
+
temp_file_collection = page_one.parse_files
|
15
|
+
@data.concat(tempFileCollection) if temp_file_collection.size > 0
|
16
|
+
lg("#{@data.size} new files found")
|
17
|
+
errlog("Bad URL -- #{@t.target}") unless @link_stack
|
18
|
+
@link_stack.delete(@t.target)
|
15
19
|
|
16
|
-
|
17
|
-
@linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages)
|
20
|
+
async_crawl_and_collect
|
18
21
|
|
19
|
-
|
22
|
+
@data.sort_by! { |x| x.length }
|
23
|
+
@data.uniq!
|
24
|
+
end
|
20
25
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
end
|
26
|
+
def download_file(path)
|
27
|
+
# given valid url, downloads file to current directory in /rr-downloads/
|
28
|
+
arr = path.split('/')
|
29
|
+
shortname = arr.pop
|
30
|
+
puts "Initiating Download to: '/rr-downloads/' + #{shortname}"
|
31
|
+
File.open(shortname, 'wb') do |saved_file|
|
32
|
+
open(path) do |read_file|
|
33
|
+
saved_file.write(read_file.read)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
puts ' SUCCESS: Download Complete'
|
37
|
+
end
|
38
|
+
|
39
|
+
def autodownload
|
40
|
+
# go through the fetched file URL collection and download each one.
|
41
|
+
lenny = @data.count
|
42
|
+
puts '###################'
|
43
|
+
puts '### Initiating Autodownload...'
|
44
|
+
puts '###################'
|
45
|
+
puts "#{lenny} - #{@file_ext}'s Located"
|
46
|
+
puts '###################'
|
47
|
+
if File.directory?('rr-downloads')
|
48
|
+
Dir.chdir('rr-downloads')
|
49
|
+
else
|
50
|
+
puts 'creating rr-downloads Directory'
|
51
|
+
Dir.mkdir('rr-downloads')
|
52
|
+
Dir.chdir('rr-downloads')
|
53
|
+
end
|
54
|
+
file_counter = 0
|
55
|
+
@data.each do |entry|
|
56
|
+
begin
|
57
|
+
download_file(entry)
|
58
|
+
file_counter += 1
|
59
|
+
lg(' File [#{file_counter} of #{lenny}]')
|
60
|
+
puts
|
61
|
+
rescue StandardError => e
|
62
|
+
puts 'ERROR: failed to download - #{entry}'
|
63
|
+
puts e.message
|
64
|
+
puts
|
65
|
+
end
|
66
|
+
end
|
67
|
+
Dir.chdir('..')
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/retriever/fetchseo.rb
CHANGED
@@ -1,23 +1,25 @@
|
|
1
1
|
module Retriever
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
2
|
+
#
|
3
|
+
class FetchSEO < Fetch
|
4
|
+
# recieves target url and RR options
|
5
|
+
# returns an array of onpage SEO related fields
|
6
|
+
# on all unique pages found on the site
|
7
|
+
def initialize(url, options)
|
8
|
+
super
|
9
|
+
@data = []
|
10
|
+
page_one = Retriever::Page.new(@t.source, @t)
|
11
|
+
lg("URL Crawled: #{@t.target}")
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
13
|
+
@link_stack = page_one.parse_internal_visitable
|
14
|
+
errlog("Bad URL -- #{@t.target}") unless @link_stack
|
15
|
+
lg("#{@link_stack.size - 1} links found")
|
16
|
+
@link_stack.delete(@t.target)
|
14
17
|
|
15
|
-
|
16
|
-
@linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages)
|
18
|
+
@data.push(page_one.parse_seo)
|
17
19
|
|
18
|
-
|
20
|
+
async_crawl_and_collect
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
22
|
+
@data.sort_by! { |x| x[0].length }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -1,36 +1,41 @@
|
|
1
1
|
module Retriever
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
2
|
+
#
|
3
|
+
class FetchSitemap < Fetch
|
4
|
+
# recieves target URL and RR options
|
5
|
+
# returns an array of all unique pages found on the site
|
6
|
+
def initialize(url, options)
|
7
|
+
super
|
8
|
+
@data = [@t.target]
|
9
|
+
page_one = Retriever::Page.new(@t.source, @t)
|
10
|
+
lg("URL Crawled: #{@t.target}")
|
11
|
+
@link_stack = page_one.parse_internal_visitable
|
12
|
+
errlog("Bad URL -- #{@t.target}") unless @link_stack
|
13
|
+
lg("#{@link_stack.size - 1} links found")
|
11
14
|
|
12
|
-
|
13
|
-
|
14
|
-
@data.concat(@linkStack)
|
15
|
+
@link_stack.delete(@t.target)
|
16
|
+
@data.concat(@link_stack)
|
15
17
|
|
16
|
-
|
18
|
+
async_crawl_and_collect
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
20
|
+
@data.sort_by! { |x| x.length } if @data.size > 1
|
21
|
+
@data.uniq!
|
22
|
+
end
|
23
|
+
|
24
|
+
# produces valid XML sitemap based on page collection fetched.
|
25
|
+
# Writes to current directory.
|
26
|
+
def gen_xml
|
27
|
+
f = File.open("sitemap-#{@t.host.split('.')[1]}.xml", 'w+')
|
28
|
+
f << "<?xml version='1.0' encoding='UTF-8'?><urlset xmlns='http://www.sitemaps.org/schemas/sitemap/0.9'>"
|
29
|
+
@data.each do |url|
|
30
|
+
f << "<url><loc>#{url}</loc></url>"
|
31
|
+
end
|
32
|
+
f << '</urlset>'
|
33
|
+
f.close
|
34
|
+
puts '###############################'
|
35
|
+
puts "File Created: sitemap-#{@t.host.split('.')[1]}.xml"
|
36
|
+
puts "Object Count: #{@data.size}"
|
37
|
+
puts '###############################'
|
38
|
+
puts
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/retriever/link.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Retriever
|
2
|
+
#
|
2
3
|
class Link
|
3
4
|
HTTP_RE = Regexp.new(/^http/i).freeze
|
4
5
|
SINGLE_SLASH_RE = Regexp.new(/^\/{1}[^\/]/).freeze
|
@@ -18,12 +19,15 @@ module Retriever
|
|
18
19
|
|
19
20
|
return "http://#{host}#{link}" if SINGLE_SLASH_RE =~ link
|
20
21
|
|
21
|
-
|
22
|
+
# link begins with '//'
|
23
|
+
return "http:#{link}" if DOUBLE_SLASH_RE =~ link
|
22
24
|
|
23
|
-
|
25
|
+
# link uses relative path with no slashes at all
|
26
|
+
return "http://#{host}/#{link}" if NO_SLASH_PAGE_RE =~ link
|
24
27
|
end
|
25
28
|
|
26
29
|
private
|
30
|
+
|
27
31
|
attr_reader :host, :link
|
28
32
|
end
|
29
33
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
module OpenURI
|
2
|
-
|
2
|
+
# nesc patch otherwise OPENURI blocks redirects to and from https
|
3
|
+
def OpenURI.redirectable?(uri1, uri2)
|
3
4
|
uri1.scheme.downcase == uri2.scheme.downcase ||
|
4
5
|
(/\A(?:http|https)\z/i =~ uri1.scheme && /\A(?:http|https)\z/i =~ uri2.scheme)
|
5
6
|
end
|
6
|
-
end
|
7
|
+
end
|
data/lib/retriever/page.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
module Retriever
|
2
|
-
|
2
|
+
#
|
3
3
|
class Page
|
4
|
-
|
5
4
|
HREF_CONTENTS_RE = Regexp.new(/\shref=['|"]([^\s][a-z0-9\.\/\:\-\%\+\?\!\=\&\,\:\;\~\_]+)['|"][\s|\W]/ix).freeze
|
6
5
|
NONPAGE_EXT_RE = Regexp.new(/\.(?:css|js|png|gif|jpg|mp4|wmv|flv|mp3|wav|doc|txt|ico|xml)/ix).freeze
|
7
6
|
HTTP_RE = Regexp.new(/^http/i).freeze
|
@@ -14,55 +13,55 @@ module Retriever
|
|
14
13
|
|
15
14
|
attr_reader :links, :source, :t
|
16
15
|
|
17
|
-
def initialize(source,t)
|
16
|
+
def initialize(source, t)
|
18
17
|
@t = t
|
19
18
|
@source = source.encode('UTF-8', :invalid => :replace, :undef => :replace)
|
20
19
|
@links = nil
|
21
20
|
end
|
22
21
|
|
23
|
-
#recieves page source as string
|
24
|
-
#returns array of unique href links
|
22
|
+
# recieves page source as string
|
23
|
+
# returns array of unique href links
|
25
24
|
def links
|
26
25
|
return @links if @links
|
27
|
-
return false
|
28
|
-
@links = @source.scan(HREF_CONTENTS_RE).map do |match|
|
26
|
+
return false unless @source
|
27
|
+
@links = @source.scan(HREF_CONTENTS_RE).map do |match|
|
28
|
+
# filter some malformed URLS that come in
|
29
|
+
# meant to be a loose filter to catch all reasonable HREF attributes.
|
29
30
|
link = match[0]
|
30
31
|
Link.new(@t.host, link).path
|
31
32
|
end.uniq
|
32
33
|
end
|
33
34
|
|
34
|
-
def
|
35
|
-
links.select{ |linky| (@t.host_re =~ linky) }
|
35
|
+
def parse_internal
|
36
|
+
links.select { |linky| (@t.host_re =~ linky) }
|
36
37
|
end
|
37
38
|
|
38
|
-
def
|
39
|
-
|
39
|
+
def parse_internal_visitable
|
40
|
+
parse_internal.select { |linky| (!(NONPAGE_EXT_RE =~ linky)) }
|
40
41
|
end
|
41
42
|
|
42
|
-
def
|
43
|
-
links.select{ |linky| (@t.file_re =~ linky)}
|
43
|
+
def parse_files
|
44
|
+
links.select { |linky| (@t.file_re =~ linky) }
|
44
45
|
end
|
45
46
|
|
46
47
|
def title
|
47
|
-
TITLE_RE =~ @source ? @source.match(TITLE_RE)[1] :
|
48
|
+
TITLE_RE =~ @source ? @source.match(TITLE_RE)[1] : ''
|
48
49
|
end
|
49
50
|
|
50
51
|
def desc
|
51
|
-
DESC_RE =~ @source ? @source.match(DESC_RE)[1] :
|
52
|
+
DESC_RE =~ @source ? @source.match(DESC_RE)[1] : ''
|
52
53
|
end
|
53
54
|
|
54
55
|
def h1
|
55
|
-
H1_RE =~ @source ? @source.match(H1_RE)[1] :
|
56
|
+
H1_RE =~ @source ? @source.match(H1_RE)[1] : ''
|
56
57
|
end
|
57
58
|
|
58
59
|
def h2
|
59
|
-
H2_RE =~ @source ? @source.match(H2_RE)[1] :
|
60
|
+
H2_RE =~ @source ? @source.match(H2_RE)[1] : ''
|
60
61
|
end
|
61
62
|
|
62
|
-
def
|
63
|
-
|
63
|
+
def parse_seo
|
64
|
+
[title, desc, h1, h2]
|
64
65
|
end
|
65
|
-
|
66
66
|
end
|
67
|
-
|
68
67
|
end
|
data/lib/retriever/target.rb
CHANGED
@@ -1,52 +1,44 @@
|
|
1
1
|
require 'open-uri'
|
2
2
|
|
3
3
|
module Retriever
|
4
|
-
|
4
|
+
#
|
5
5
|
class Target
|
6
|
-
|
7
6
|
HTTP_RE = Regexp.new(/^http/i).freeze
|
8
7
|
DUB_DUB_DUB_DOT_RE = Regexp.new(/^www\./i).freeze
|
9
|
-
|
8
|
+
|
10
9
|
attr_reader :host, :target, :host_re, :source, :file_re
|
11
10
|
|
12
|
-
def initialize(url,file_re=nil)
|
13
|
-
url = "http://#{url}"
|
14
|
-
fail
|
11
|
+
def initialize(url, file_re = nil)
|
12
|
+
url = "http://#{url}" unless HTTP_RE =~ url
|
13
|
+
fail 'Bad URL' unless /\./ =~ url
|
15
14
|
new_uri = URI(url)
|
16
15
|
@target = new_uri.to_s
|
17
16
|
@host = new_uri.host
|
18
|
-
@host_re = Regexp.new(@host.sub('www.',''))
|
17
|
+
@host_re = Regexp.new(@host.sub('www.', ''))
|
19
18
|
@file_re ||= file_re
|
20
19
|
end
|
21
20
|
|
22
21
|
def source
|
23
|
-
resp =
|
24
|
-
begin
|
25
|
-
resp = open(@target)
|
26
|
-
rescue StandardError => e
|
27
|
-
trap("ABRT"){
|
28
|
-
puts "#{@target} failed SSL Certification Verification"
|
29
|
-
}
|
30
|
-
return false
|
31
|
-
end
|
22
|
+
resp = open(@target)
|
32
23
|
resp_url = resp.base_uri.to_s
|
33
|
-
if
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
@host = new_t.host
|
38
|
-
return new_t.source
|
39
|
-
end
|
40
|
-
fail "Domain redirecting to new host: #{resp.base_uri.to_s}" #if it's not same host, we want to fail
|
24
|
+
if @target != resp_url
|
25
|
+
fail "Domain redirecting: #{resp_url}" unless @host_re =~ resp_url
|
26
|
+
# if redirect URL is same host, we want to re-sync @target
|
27
|
+
return resync_target_and_return_source(resp_url)
|
41
28
|
end
|
42
29
|
resp = resp.read
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
30
|
+
#
|
31
|
+
fail 'Domain is not working. Try the non-WWW version.' if resp == ''
|
32
|
+
fail 'Domain not working. Try HTTPS???' unless resp
|
33
|
+
# consider using scrub from ruby 2.1? this misses some things
|
34
|
+
resp.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace)
|
48
35
|
end
|
49
36
|
|
37
|
+
def resync_target_and_return_source(url)
|
38
|
+
new_t = Retriever::Target.new(url)
|
39
|
+
@target = new_t.target
|
40
|
+
@host = new_t.host
|
41
|
+
new_t.source
|
42
|
+
end
|
50
43
|
end
|
51
|
-
|
52
44
|
end
|
data/lib/retriever/version.rb
CHANGED
data/lib/retriever.rb
CHANGED
data/readme.md
CHANGED
@@ -4,13 +4,14 @@
|
|
4
4
|
|
5
5
|
By Joe Norton
|
6
6
|
|
7
|
-
RubyRetriever is a Web Crawler, Site Mapper, File Harvester & Autodownloader
|
7
|
+
RubyRetriever is a Web Crawler, Site Mapper, File Harvester & Autodownloader.
|
8
8
|
|
9
|
-
RubyRetriever uses
|
9
|
+
RubyRetriever (RR) uses asynchronous HTTP requests, thanks to [Eventmachine](https://github.com/eventmachine/eventmachine) & [Synchrony](https://github.com/igrigorik/em-synchrony), to crawl webpages *very quickly*. Another neat thing about RR, is it uses a ruby implementation of the [bloomfilter](https://github.com/igrigorik/bloomfilter-rb) in order to keep track of page's it has already crawled.
|
10
10
|
|
11
|
-
|
11
|
+
**Use at Own Risk**
|
12
|
+
RR does NOT respect robots.txt, and RR currently - by default - launches up to 10 parallel GET requests at once. This is a feature, do not abuse it.
|
12
13
|
|
13
|
-
v1.0 Update 6/07/2014 - Includes major code changes, a lot of bug fixes. Much better in dealing with redirects, and issues with the host changing, etc. Also, added the SEO mode -- which grabs a number of key SEO components from every page on a site. Lastly, this
|
14
|
+
**v1.0 Update (6/07/2014)** - Includes major code changes, a lot of bug fixes. Much better in dealing with redirects, and issues with the host changing, etc. Also, added the SEO mode -- which grabs a number of key SEO components from every page on a site. Lastly, this update was so extensive that I could not ensure backward compatibility -- and thus, this was update 1.0!
|
14
15
|
|
15
16
|
|
16
17
|
getting started
|
data/spec/page_spec.rb
CHANGED
@@ -20,8 +20,8 @@ SOURCE
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
describe "#
|
24
|
-
let (:links){Retriever::Page.new(@source,t).
|
23
|
+
describe "#parse_internal" do
|
24
|
+
let (:links){Retriever::Page.new(@source,t).parse_internal}
|
25
25
|
it "filters links by host" do
|
26
26
|
@source = (<<SOURCE).strip
|
27
27
|
<a href='http://www.cnet.com/'>download</a>
|
@@ -32,8 +32,8 @@ SOURCE
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
describe "#
|
36
|
-
let (:links){Retriever::Page.new(@source,t).
|
35
|
+
describe "#parse_internal_visitable" do
|
36
|
+
let (:links){Retriever::Page.new(@source,t).parse_internal_visitable}
|
37
37
|
it "filters out 'unvisitable' URLS like JS, Stylesheets, Images" do
|
38
38
|
@source = (<<SOURCE).strip
|
39
39
|
<link rel='stylesheet' id='gforms_reset_css-css' href='http://www.cnet.com/wp-content/plugins/gravityforms/css/formreset.css?ver=1.7.12' type='text/css' media='all' />
|
@@ -43,7 +43,7 @@ SOURCE
|
|
43
43
|
end
|
44
44
|
|
45
45
|
describe "#parseFiles" do
|
46
|
-
let (:links){Retriever::Page.new(@source,t).
|
46
|
+
let (:links){Retriever::Page.new(@source,t).parse_files}
|
47
47
|
it "filters links by filetype" do
|
48
48
|
@source = (<<SOURCE).strip
|
49
49
|
<a href='www.cnet.com/download.exe'>download</a>
|
@@ -90,5 +90,4 @@ SOURCE
|
|
90
90
|
expect(page.h2).to eq(' test 4 ')
|
91
91
|
end
|
92
92
|
end
|
93
|
-
|
94
|
-
end
|
93
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubyretriever
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joe Norton
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: em-synchrony
|