macaron 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +15 -2
- data/bin/macaron +38 -9
- data/lib/macaron/spawner.rb +3 -3
- data/lib/macaron/version.rb +3 -0
- metadata +4 -2
data/README.md
CHANGED
@@ -2,11 +2,24 @@
|
|
2
2
|
Macaron is a simple web scraper implemented in ruby. It's used for service alive testing.
|
3
3
|
|
4
4
|
## Install
|
5
|
-
gem install
|
5
|
+
gem install macaron
|
6
6
|
|
7
7
|
## Example
|
8
8
|
```ruby
|
9
|
-
macaron
|
9
|
+
require 'macaron'
|
10
|
+
spawner = Spawner.new()
|
11
|
+
|
12
|
+
# 1st argument is for start url, 2nd is for the depth to dig (optional)
|
13
|
+
spawner.dig("http://www.google.com/", 2)
|
14
|
+
```
|
15
|
+
|
16
|
+
## CLI (Command Line Interface)
|
17
|
+
```
|
18
|
+
Usage: macaron [options] URL
|
19
|
+
-d, --debug Show debug output
|
20
|
+
-n, --depth N Set the digging depth N
|
21
|
+
-j, --javascript Open javascript support mode
|
22
|
+
-h, --help Show this message
|
10
23
|
```
|
11
24
|
|
12
25
|
## License
|
data/bin/macaron
CHANGED
@@ -1,13 +1,42 @@
|
|
1
|
-
#!/
|
2
|
-
|
1
|
+
#!/usr/bin/ruby
|
3
2
|
require File.dirname(__FILE__) + '/../lib/macaron'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'optparse'
|
5
|
+
require 'macaron/version'
|
6
|
+
|
7
|
+
options = {:depth => 2}
|
8
|
+
options_parser = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: #{File.basename($0)} [options] URL"
|
10
|
+
|
11
|
+
opts.on("-d", "--debug", "Show debug output") do |v|
|
12
|
+
options[:debug] = v
|
13
|
+
end
|
14
|
+
|
15
|
+
opts.on("-n N", "--depth N", Integer, "Set the digging depth N") do |n|
|
16
|
+
options[:depth] = n
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on("-j", "--javascript", "Open javascript support mode") do |j|
|
20
|
+
options[:waltir] = j
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
24
|
+
puts opts
|
25
|
+
exit
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
options_parser.parse!
|
30
|
+
|
31
|
+
if ARGV.length != 1
|
32
|
+
STDERR.puts options_parser
|
33
|
+
exit 1
|
34
|
+
end
|
35
|
+
|
36
|
+
url = ARGV.first
|
37
|
+
puts "Starting at #{url}"
|
4
38
|
|
5
|
-
mother = Spawner.new(
|
6
|
-
|
7
|
-
:in_site_crawling => true,
|
8
|
-
:with_waltir => false
|
9
|
-
})
|
10
|
-
mother.dig('http://rubyconf.tw/2012/', 2)
|
11
|
-
# mother.dig('http://www.sakura.idv.tw/', 2) # url, depth
|
39
|
+
mother = Spawner.new(options)
|
40
|
+
mother.dig(url, options[:depth])
|
12
41
|
puts "Success times: #{mother.success_times}"
|
13
42
|
puts "Fail times: #{mother.fail_times}"
|
data/lib/macaron/spawner.rb
CHANGED
@@ -39,7 +39,7 @@ module Macaron
|
|
39
39
|
@@fail_times
|
40
40
|
end
|
41
41
|
|
42
|
-
def dig(url, init_depth)
|
42
|
+
def dig(url, init_depth=3)
|
43
43
|
@@task_map = @@task_map.put(url, init_depth)
|
44
44
|
loop do
|
45
45
|
@@task_map = @@task_map.remove {|url, depth|
|
@@ -54,7 +54,7 @@ module Macaron
|
|
54
54
|
}
|
55
55
|
|
56
56
|
break if @threadpool.busy_workers_count == 0 && @@task_map.empty?
|
57
|
-
|
57
|
+
|
58
58
|
if @@success_times > @@options[:pages]
|
59
59
|
print "Fetched pages exceeds the limit #{@@options[:pages]}\n"
|
60
60
|
break
|
@@ -63,7 +63,7 @@ module Macaron
|
|
63
63
|
|
64
64
|
@bot.close unless @bot.nil?
|
65
65
|
|
66
|
-
|
66
|
+
puts "result: #{@@result.size}, #{@@result.keys}" if @@options[:debug]
|
67
67
|
end
|
68
68
|
|
69
69
|
private
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: macaron
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,8 @@ date: 2012-11-17 00:00:00.000000000 Z
|
|
13
13
|
dependencies: []
|
14
14
|
description:
|
15
15
|
email: dalema22@gmail.com
|
16
|
-
executables:
|
16
|
+
executables:
|
17
|
+
- macaron
|
17
18
|
extensions: []
|
18
19
|
extra_rdoc_files: []
|
19
20
|
files:
|
@@ -21,6 +22,7 @@ files:
|
|
21
22
|
- lib/macaron/processor.rb
|
22
23
|
- lib/macaron/scraper.rb
|
23
24
|
- lib/macaron/spawner.rb
|
25
|
+
- lib/macaron/version.rb
|
24
26
|
- lib/macaron.rb
|
25
27
|
- LICENSE
|
26
28
|
- README.md
|