crawl 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,15 @@
1
+ # Crawl
2
+
3
+ Crawl pages witin a domain, reporting any page that returns a bad response code
4
+
5
+ Usage:
6
+
7
+ >crawl [options] domain
8
+
9
+ -s, --start /home,/about Starting path(s), defaults to /
10
+ -u, --username username Basic auth username
11
+ -p, --password password Basic auth password
12
+ -c, --ci Output files for CI integration
13
+ -v, --verbose Give details when crawling
14
+ -m, --markup Validate HTML markup
15
+ -h, --help Show this message
data/bin/crawl CHANGED
@@ -10,6 +10,7 @@ optparse = OptionParser.new do |opts|
10
10
  opts.on('-p', '--password password', String, 'Basic auth password') { |o| options[:password] = o }
11
11
  opts.on('-c', '--ci', 'Output files for CI integration') { |o| options[:ci] = o }
12
12
  opts.on('-v', '--verbose', 'Give details when crawling') { |o| options[:verbose] = o }
13
+ opts.on('-m', '--markup', 'Validate markup') { |o| options[:markup] = o }
13
14
  opts.on_tail("-h", "--help", "Show this message") { |o| puts opts; exit }
14
15
  end.parse!
15
16
 
data/lib/crawl/engine.rb CHANGED
@@ -33,6 +33,7 @@ class Crawl::Engine
33
33
  @verbose = options[:verbose] || ENV['VERBOSE']
34
34
  @number_of_dots = 0
35
35
  @report_manager = CI::Reporter::ReportManager.new("crawler") if options[:ci]
36
+ @validate_markup = options[:markup]
36
37
  end
37
38
 
38
39
  def run
@@ -43,7 +44,7 @@ class Crawl::Engine
43
44
  next unless response.headers[:content_type] =~ %r{text/html}
44
45
  @visited_documents << link
45
46
  @found_links += links = find_links(link, response.to_str)
46
- # validate(link, response.body_str)
47
+ validate(link, response.body) if @validate_markup
47
48
  end
48
49
  end
49
50
  end
@@ -87,7 +88,6 @@ private
87
88
  error_messages = messages.select { |message| message['type'] != 'info' }
88
89
 
89
90
  if error_messages.empty?
90
- handle_success
91
91
  true
92
92
  else
93
93
  response = error_messages.map do |message|
@@ -97,7 +97,6 @@ private
97
97
  end.join("\n\n")
98
98
 
99
99
  @errors << Result.new(link, response)
100
- handle_error('I')
101
100
  false
102
101
  end
103
102
  rescue RestClient::ServiceUnavailable
@@ -132,7 +131,7 @@ private
132
131
  test_suite.finish
133
132
  @report_manager.write_report(test_suite) if options[:ci]
134
133
  return response
135
- rescue RestClient::InternalServerError, RestClient::ResourceNotFound => e
134
+ rescue RestClient::InternalServerError, RestClient::ResourceNotFound, RestClient::Unauthorized => e
136
135
  @errors << Result.new(link, "Error whilst retrieving page: #{e.message}")
137
136
  @invalid_links << link
138
137
  return nil
data/lib/crawl/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module Crawl
3
- VERSION = "0.0.4"
3
+ VERSION = "0.0.5"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-11-10 00:00:00.000000000 Z
12
+ date: 2012-02-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70243809291280 !ruby/object:Gem::Requirement
16
+ requirement: &70216317741600 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70243809291280
24
+ version_requirements: *70216317741600
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rest-client
27
- requirement: &70243809290740 !ruby/object:Gem::Requirement
27
+ requirement: &70216317740600 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70243809290740
35
+ version_requirements: *70216317740600
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ci_reporter
38
- requirement: &70243809290140 !ruby/object:Gem::Requirement
38
+ requirement: &70216317739980 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70243809290140
46
+ version_requirements: *70216317739980
47
47
  description: Crawl all pages on a domain, checking for errors
48
48
  email:
49
49
  - tor@alphasights.com
@@ -54,7 +54,7 @@ extra_rdoc_files: []
54
54
  files:
55
55
  - .gitignore
56
56
  - Gemfile
57
- - README
57
+ - README.md
58
58
  - Rakefile
59
59
  - bin/crawl
60
60
  - crawl.gemspec
data/README DELETED
@@ -1,8 +0,0 @@
1
- Crawl pages witin a domain, reporting any page that returns a bad response code
2
- Usage: crawl [options] domain
3
- -s, --start /home,/about Starting path(s), defaults to /
4
- -u, --username username Basic auth username
5
- -p, --password password Basic auth password
6
- -c, --ci Output files for CI integration
7
- -v, --verbose Give details when crawling
8
- -h, --help Show this message