krawler 1.0.11 → 1.0.12
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/krawl +7 -2
- data/lib/krawler/version.rb +1 -1
- data/lib/krawler.rb +2 -1
- metadata +3 -2
data/bin/krawl
CHANGED
@@ -21,8 +21,12 @@ optparse = OptionParser.new do |opts|
|
|
21
21
|
options[:s] = true
|
22
22
|
end
|
23
23
|
|
24
|
+
opts.on('-d', '--ignore-domain', 'Ignore domain restrictions', 'Default: false') do |d|
|
25
|
+
options[:s] = true
|
26
|
+
end
|
27
|
+
|
24
28
|
opts.on('-c', '--concurrent count', 'Crawl with count number of concurrent connections', 'Default: 4') do |c|
|
25
|
-
options[:
|
29
|
+
options[:d] = true
|
26
30
|
end
|
27
31
|
|
28
32
|
opts.on('-r', '--randomize', 'Randomize crawl path', 'Default: true') do |r|
|
@@ -66,5 +70,6 @@ Krawler::Base.new(ARGV[0] || 'http://localhost:3000/', {
|
|
66
70
|
:no_cache => options[:nc],
|
67
71
|
:username => options[:u],
|
68
72
|
:password => options[:p],
|
69
|
-
:login_url => options[:l]
|
73
|
+
:login_url => options[:l],
|
74
|
+
:domain => options[:d]
|
70
75
|
}).base
|
data/lib/krawler/version.rb
CHANGED
data/lib/krawler.rb
CHANGED
@@ -22,6 +22,7 @@ module Krawler
|
|
22
22
|
@exclude = options[:exclude]
|
23
23
|
@include = options[:include]
|
24
24
|
@restrict = options[:restrict]
|
25
|
+
@domain = options[:domain]
|
25
26
|
@randomize = options[:randomize]
|
26
27
|
@threads = options[:threads] || 1
|
27
28
|
@username = options[:username]
|
@@ -127,7 +128,7 @@ module Krawler
|
|
127
128
|
next
|
128
129
|
end
|
129
130
|
|
130
|
-
if (new_link =~ /^#{Regexp.escape(@host)}/) || (new_link =~ /^\//) # don't crawl external domains
|
131
|
+
if @domain || (new_link =~ /^#{Regexp.escape(@host)}/) || (new_link =~ /^\//) # don't crawl external domains
|
131
132
|
|
132
133
|
next if @crawled_links.include?(new_link) || @links_to_crawl.include?(new_link) # don't crawl what we've alread crawled
|
133
134
|
next if @exclude && new_link =~ /#{@exclude}/ # don't crawl excluded matched paths
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: krawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.12
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-12-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
@@ -71,3 +71,4 @@ signing_key:
|
|
71
71
|
specification_version: 3
|
72
72
|
summary: ''
|
73
73
|
test_files: []
|
74
|
+
has_rdoc:
|