macaron 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -2,11 +2,24 @@
2
2
  Macaron is a simple web scraper implemented in ruby. It's used for service alive testing.
3
3
 
4
4
  ## Install
5
- gem install e-macaron
5
+ gem install macaron
6
6
 
7
7
  ## Example
8
8
  ```ruby
9
- macaron http://www.google.com/
9
+ require 'macaron'
10
+ spawner = Spawner.new()
11
+
12
+ # 1st argument is for start url, 2nd is for the depth to dig (optional)
13
+ spawner.dig("http://www.google.com/", 2)
14
+ ```
15
+
16
+ ## CLI (Command Line Interface)
17
+ ```
18
+ Usage: macaron [options] URL
19
+ -d, --debug Show debug output
20
+ -n, --depth N Set the digging depth N
21
+ -j, --javascript Open javascript support mode
22
+ -h, --help Show this message
10
23
  ```
11
24
 
12
25
  ## License
data/bin/macaron CHANGED
@@ -1,13 +1,42 @@
1
- #!/user/bin/ruby
2
-
1
+ #!/usr/bin/ruby
3
2
  require File.dirname(__FILE__) + '/../lib/macaron'
3
+ require 'rubygems'
4
+ require 'optparse'
5
+ require 'macaron/version'
6
+
7
+ options = {:depth => 2}
8
+ options_parser = OptionParser.new do |opts|
9
+ opts.banner = "Usage: #{File.basename($0)} [options] URL"
10
+
11
+ opts.on("-d", "--debug", "Show debug output") do |v|
12
+ options[:debug] = v
13
+ end
14
+
15
+ opts.on("-n N", "--depth N", Integer, "Set the digging depth N") do |n|
16
+ options[:depth] = n
17
+ end
18
+
19
+ opts.on("-j", "--javascript", "Open javascript support mode") do |j|
20
+ options[:waltir] = j
21
+ end
22
+
23
+ opts.on_tail("-h", "--help", "Show this message") do
24
+ puts opts
25
+ exit
26
+ end
27
+ end
28
+
29
+ options_parser.parse!
30
+
31
+ if ARGV.length != 1
32
+ STDERR.puts options_parser
33
+ exit 1
34
+ end
35
+
36
+ url = ARGV.first
37
+ puts "Starting at #{url}"
4
38
 
5
- mother = Spawner.new({
6
- :thread_timeout_seconds => 999,
7
- :in_site_crawling => true,
8
- :with_waltir => false
9
- })
10
- mother.dig('http://rubyconf.tw/2012/', 2)
11
- # mother.dig('http://www.sakura.idv.tw/', 2) # url, depth
39
+ mother = Spawner.new(options)
40
+ mother.dig(url, options[:depth])
12
41
  puts "Success times: #{mother.success_times}"
13
42
  puts "Fail times: #{mother.fail_times}"
@@ -39,7 +39,7 @@ module Macaron
39
39
  @@fail_times
40
40
  end
41
41
 
42
- def dig(url, init_depth)
42
+ def dig(url, init_depth=3)
43
43
  @@task_map = @@task_map.put(url, init_depth)
44
44
  loop do
45
45
  @@task_map = @@task_map.remove {|url, depth|
@@ -54,7 +54,7 @@ module Macaron
54
54
  }
55
55
 
56
56
  break if @threadpool.busy_workers_count == 0 && @@task_map.empty?
57
-
57
+
58
58
  if @@success_times > @@options[:pages]
59
59
  print "Fetched pages exceeds the limit #{@@options[:pages]}\n"
60
60
  break
@@ -63,7 +63,7 @@ module Macaron
63
63
 
64
64
  @bot.close unless @bot.nil?
65
65
 
66
- # puts "result: #{@@result.size}, #{@@result.keys}"
66
+ puts "result: #{@@result.size}, #{@@result.keys}" if @@options[:debug]
67
67
  end
68
68
 
69
69
  private
@@ -0,0 +1,3 @@
1
+ module Macaron
2
+ Version = "1.0.1"
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macaron
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,8 @@ date: 2012-11-17 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description:
15
15
  email: dalema22@gmail.com
16
- executables: []
16
+ executables:
17
+ - macaron
17
18
  extensions: []
18
19
  extra_rdoc_files: []
19
20
  files:
@@ -21,6 +22,7 @@ files:
21
22
  - lib/macaron/processor.rb
22
23
  - lib/macaron/scraper.rb
23
24
  - lib/macaron/spawner.rb
25
+ - lib/macaron/version.rb
24
26
  - lib/macaron.rb
25
27
  - LICENSE
26
28
  - README.md