krawler 1.0.11 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/krawl CHANGED
@@ -21,8 +21,12 @@ optparse = OptionParser.new do |opts|
21
21
  options[:s] = true
22
22
  end
23
23
 
24
+ opts.on('-d', '--ignore-domain', 'Ignore domain restrictions', 'Default: false') do |d|
25
+ options[:s] = true
26
+ end
27
+
24
28
  opts.on('-c', '--concurrent count', 'Crawl with count number of concurrent connections', 'Default: 4') do |c|
25
- options[:c] = c.to_i
29
+ options[:d] = true
26
30
  end
27
31
 
28
32
  opts.on('-r', '--randomize', 'Randomize crawl path', 'Default: true') do |r|
@@ -66,5 +70,6 @@ Krawler::Base.new(ARGV[0] || 'http://localhost:3000/', {
66
70
  :no_cache => options[:nc],
67
71
  :username => options[:u],
68
72
  :password => options[:p],
69
- :login_url => options[:l]
73
+ :login_url => options[:l],
74
+ :domain => options[:d]
70
75
  }).base
@@ -1,3 +1,3 @@
1
1
  module Krawler
2
- VERSION = '1.0.11'
2
+ VERSION = '1.0.12'
3
3
  end
data/lib/krawler.rb CHANGED
@@ -22,6 +22,7 @@ module Krawler
22
22
  @exclude = options[:exclude]
23
23
  @include = options[:include]
24
24
  @restrict = options[:restrict]
25
+ @domain = options[:domain]
25
26
  @randomize = options[:randomize]
26
27
  @threads = options[:threads] || 1
27
28
  @username = options[:username]
@@ -127,7 +128,7 @@ module Krawler
127
128
  next
128
129
  end
129
130
 
130
- if (new_link =~ /^#{Regexp.escape(@host)}/) || (new_link =~ /^\//) # don't crawl external domains
131
+ if @domain || (new_link =~ /^#{Regexp.escape(@host)}/) || (new_link =~ /^\//) # don't crawl external domains
131
132
 
132
133
  next if @crawled_links.include?(new_link) || @links_to_crawl.include?(new_link) # don't crawl what we've alread crawled
133
134
  next if @exclude && new_link =~ /#{@exclude}/ # don't crawl excluded matched paths
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: krawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.11
4
+ version: 1.0.12
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-28 00:00:00.000000000 Z
12
+ date: 2012-12-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
@@ -71,3 +71,4 @@ signing_key:
71
71
  specification_version: 3
72
72
  summary: ''
73
73
  test_files: []
74
+ has_rdoc: