macaron 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -2,11 +2,24 @@
2
2
  Macaron is a simple web scraper implemented in ruby. It's used for service alive testing.
3
3
 
4
4
  ## Install
5
- gem install e-macaron
5
+ gem install macaron
6
6
 
7
7
  ## Example
8
8
  ```ruby
9
- macaron http://www.google.com/
9
+ require 'macaron'
10
+ spawner = Spawner.new()
11
+
12
+ # 1st argument is for start url, 2nd is for the depth to dig (optional)
13
+ spawner.dig("http://www.google.com/", 2)
14
+ ```
15
+
16
+ ## CLI (Command Line Interface)
17
+ ```
18
+ Usage: macaron [options] URL
19
+ -d, --debug Show debug output
20
+ -n, --depth N Set the digging depth N
21
+ -j, --javascript Open javascript support mode
22
+ -h, --help Show this message
10
23
  ```
11
24
 
12
25
  ## License
data/bin/macaron CHANGED
@@ -1,13 +1,42 @@
1
- #!/user/bin/ruby
2
-
1
+ #!/usr/bin/ruby
3
2
  require File.dirname(__FILE__) + '/../lib/macaron'
3
+ require 'rubygems'
4
+ require 'optparse'
5
+ require 'macaron/version'
6
+
7
+ options = {:depth => 2}
8
+ options_parser = OptionParser.new do |opts|
9
+ opts.banner = "Usage: #{File.basename($0)} [options] URL"
10
+
11
+ opts.on("-d", "--debug", "Show debug output") do |v|
12
+ options[:debug] = v
13
+ end
14
+
15
+ opts.on("-n N", "--depth N", Integer, "Set the digging depth N") do |n|
16
+ options[:depth] = n
17
+ end
18
+
19
+ opts.on("-j", "--javascript", "Open javascript support mode") do |j|
20
+ options[:waltir] = j
21
+ end
22
+
23
+ opts.on_tail("-h", "--help", "Show this message") do
24
+ puts opts
25
+ exit
26
+ end
27
+ end
28
+
29
+ options_parser.parse!
30
+
31
+ if ARGV.length != 1
32
+ STDERR.puts options_parser
33
+ exit 1
34
+ end
35
+
36
+ url = ARGV.first
37
+ puts "Starting at #{url}"
4
38
 
5
- mother = Spawner.new({
6
- :thread_timeout_seconds => 999,
7
- :in_site_crawling => true,
8
- :with_waltir => false
9
- })
10
- mother.dig('http://rubyconf.tw/2012/', 2)
11
- # mother.dig('http://www.sakura.idv.tw/', 2) # url, depth
39
+ mother = Spawner.new(options)
40
+ mother.dig(url, options[:depth])
12
41
  puts "Success times: #{mother.success_times}"
13
42
  puts "Fail times: #{mother.fail_times}"
@@ -39,7 +39,7 @@ module Macaron
39
39
  @@fail_times
40
40
  end
41
41
 
42
- def dig(url, init_depth)
42
+ def dig(url, init_depth=3)
43
43
  @@task_map = @@task_map.put(url, init_depth)
44
44
  loop do
45
45
  @@task_map = @@task_map.remove {|url, depth|
@@ -54,7 +54,7 @@ module Macaron
54
54
  }
55
55
 
56
56
  break if @threadpool.busy_workers_count == 0 && @@task_map.empty?
57
-
57
+
58
58
  if @@success_times > @@options[:pages]
59
59
  print "Fetched pages exceeds the limit #{@@options[:pages]}\n"
60
60
  break
@@ -63,7 +63,7 @@ module Macaron
63
63
 
64
64
  @bot.close unless @bot.nil?
65
65
 
66
- # puts "result: #{@@result.size}, #{@@result.keys}"
66
+ puts "result: #{@@result.size}, #{@@result.keys}" if @@options[:debug]
67
67
  end
68
68
 
69
69
  private
@@ -0,0 +1,3 @@
1
+ module Macaron
2
+ Version = "1.0.1"
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: macaron
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,8 @@ date: 2012-11-17 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description:
15
15
  email: dalema22@gmail.com
16
- executables: []
16
+ executables:
17
+ - macaron
17
18
  extensions: []
18
19
  extra_rdoc_files: []
19
20
  files:
@@ -21,6 +22,7 @@ files:
21
22
  - lib/macaron/processor.rb
22
23
  - lib/macaron/scraper.rb
23
24
  - lib/macaron/spawner.rb
25
+ - lib/macaron/version.rb
24
26
  - lib/macaron.rb
25
27
  - LICENSE
26
28
  - README.md