macaron 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +15 -2
- data/bin/macaron +38 -9
- data/lib/macaron/spawner.rb +3 -3
- data/lib/macaron/version.rb +3 -0
- metadata +4 -2
data/README.md
CHANGED
@@ -2,11 +2,24 @@
|
|
2
2
|
Macaron is a simple web scraper implemented in ruby. It's used for service alive testing.
|
3
3
|
|
4
4
|
## Install
|
5
|
-
gem install
|
5
|
+
gem install macaron
|
6
6
|
|
7
7
|
## Example
|
8
8
|
```ruby
|
9
|
-
macaron
|
9
|
+
require 'macaron'
|
10
|
+
spawner = Spawner.new()
|
11
|
+
|
12
|
+
# 1st argument is for start url, 2nd is for the depth to dig (optional)
|
13
|
+
spawner.dig("http://www.google.com/", 2)
|
14
|
+
```
|
15
|
+
|
16
|
+
## CLI (Command Line Interface)
|
17
|
+
```
|
18
|
+
Usage: macaron [options] URL
|
19
|
+
-d, --debug Show debug output
|
20
|
+
-n, --depth N Set the digging depth N
|
21
|
+
-j, --javascript Open javascript support mode
|
22
|
+
-h, --help Show this message
|
10
23
|
```
|
11
24
|
|
12
25
|
## License
|
data/bin/macaron
CHANGED
@@ -1,13 +1,42 @@
|
|
1
|
-
#!/
|
2
|
-
|
1
|
+
#!/usr/bin/ruby
|
3
2
|
require File.dirname(__FILE__) + '/../lib/macaron'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'optparse'
|
5
|
+
require 'macaron/version'
|
6
|
+
|
7
|
+
options = {:depth => 2}
|
8
|
+
options_parser = OptionParser.new do |opts|
|
9
|
+
opts.banner = "Usage: #{File.basename($0)} [options] URL"
|
10
|
+
|
11
|
+
opts.on("-d", "--debug", "Show debug output") do |v|
|
12
|
+
options[:debug] = v
|
13
|
+
end
|
14
|
+
|
15
|
+
opts.on("-n N", "--depth N", Integer, "Set the digging depth N") do |n|
|
16
|
+
options[:depth] = n
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on("-j", "--javascript", "Open javascript support mode") do |j|
|
20
|
+
options[:waltir] = j
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
24
|
+
puts opts
|
25
|
+
exit
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
options_parser.parse!
|
30
|
+
|
31
|
+
if ARGV.length != 1
|
32
|
+
STDERR.puts options_parser
|
33
|
+
exit 1
|
34
|
+
end
|
35
|
+
|
36
|
+
url = ARGV.first
|
37
|
+
puts "Starting at #{url}"
|
4
38
|
|
5
|
-
mother = Spawner.new(
|
6
|
-
|
7
|
-
:in_site_crawling => true,
|
8
|
-
:with_waltir => false
|
9
|
-
})
|
10
|
-
mother.dig('http://rubyconf.tw/2012/', 2)
|
11
|
-
# mother.dig('http://www.sakura.idv.tw/', 2) # url, depth
|
39
|
+
mother = Spawner.new(options)
|
40
|
+
mother.dig(url, options[:depth])
|
12
41
|
puts "Success times: #{mother.success_times}"
|
13
42
|
puts "Fail times: #{mother.fail_times}"
|
data/lib/macaron/spawner.rb
CHANGED
@@ -39,7 +39,7 @@ module Macaron
|
|
39
39
|
@@fail_times
|
40
40
|
end
|
41
41
|
|
42
|
-
def dig(url, init_depth)
|
42
|
+
def dig(url, init_depth=3)
|
43
43
|
@@task_map = @@task_map.put(url, init_depth)
|
44
44
|
loop do
|
45
45
|
@@task_map = @@task_map.remove {|url, depth|
|
@@ -54,7 +54,7 @@ module Macaron
|
|
54
54
|
}
|
55
55
|
|
56
56
|
break if @threadpool.busy_workers_count == 0 && @@task_map.empty?
|
57
|
-
|
57
|
+
|
58
58
|
if @@success_times > @@options[:pages]
|
59
59
|
print "Fetched pages exceeds the limit #{@@options[:pages]}\n"
|
60
60
|
break
|
@@ -63,7 +63,7 @@ module Macaron
|
|
63
63
|
|
64
64
|
@bot.close unless @bot.nil?
|
65
65
|
|
66
|
-
|
66
|
+
puts "result: #{@@result.size}, #{@@result.keys}" if @@options[:debug]
|
67
67
|
end
|
68
68
|
|
69
69
|
private
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: macaron
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,8 @@ date: 2012-11-17 00:00:00.000000000 Z
|
|
13
13
|
dependencies: []
|
14
14
|
description:
|
15
15
|
email: dalema22@gmail.com
|
16
|
-
executables:
|
16
|
+
executables:
|
17
|
+
- macaron
|
17
18
|
extensions: []
|
18
19
|
extra_rdoc_files: []
|
19
20
|
files:
|
@@ -21,6 +22,7 @@ files:
|
|
21
22
|
- lib/macaron/processor.rb
|
22
23
|
- lib/macaron/scraper.rb
|
23
24
|
- lib/macaron/spawner.rb
|
25
|
+
- lib/macaron/version.rb
|
24
26
|
- lib/macaron.rb
|
25
27
|
- LICENSE
|
26
28
|
- README.md
|