crawl 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d31f6ad78385f65e06abb7837358453330394baf
4
- data.tar.gz: 7a8f0781ae652f1500b9a5fde7a08380627ce1de
3
+ metadata.gz: fe74cc42a5729f8b4a6773e6c7e2326abddefc7a
4
+ data.tar.gz: 22677615aa464e1549a34b99273631337bb11cc1
5
5
  SHA512:
6
- metadata.gz: 071573a7d5bd59416b2d6a582a51a0a56a30000184391ae05688ed894b40b50e410661dfef2ac4554fd29f2dae117504bccdda00ef9144888c8ca46210c9f980
7
- data.tar.gz: 7954d33220e05baf7667590b6fcb082800fbbda753016d2014331a4afec02ef8f8951f68256a90ea4baaee4024abadde53541e256d59b2171b9f61f629323d71
6
+ metadata.gz: 4fe1ac4cd0bf951d62a2b0c6851818b5317f8dfd469ce4133478a1a179a08e9566d627a5b2ba02b4301fa48bc511686ba02e71edeaf7f41f4e6125643b563105
7
+ data.tar.gz: 1f69f772212400ea07ad8cb27d2c6daefb5549805d3cb1e2aa632e7ac63b8fa5b801a926d8341721a4e35d94d55e0c54daa71e3393da96b593f17954416298f6
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 AlphaSights
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -10,7 +10,7 @@ Usage:
10
10
  -s, --start /home,/about Starting path(s), defaults to /
11
11
  -u, --username username Basic auth username
12
12
  -p, --password password Basic auth password
13
- -c, --connections count Max mumber of parallel connections to use. The default is 20.
13
+ -c, --connections count Max mumber of parallel connections to use. The default is 5.
14
14
  -v, --verbose Give details when crawling
15
15
  -h, --help Show this message
16
16
  --version Print version
@@ -34,3 +34,8 @@ Example:
34
34
 
35
35
  5 pages crawled without errors.
36
36
 
37
+ ### Copyright and License
38
+
39
+ Copyright AlphaSights and Contributors, 2015
40
+
41
+ [MIT Licence](LICENSE.txt)
data/bin/crawl CHANGED
@@ -23,6 +23,8 @@ unless options[:domain]
23
23
  exit -1
24
24
  end
25
25
 
26
+ options[:domain] = "http://#{options[:domain]}" unless options[:domain].include?('://')
27
+
26
28
  crawler = Crawl::Engine.new(options)
27
29
 
28
30
  trap("SIGINT") do
data/lib/crawl.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require('nokogiri')
3
3
  require('rest_client')
4
- require 'ci/reporter/core'
5
4
  require 'eventmachine'
6
5
  require 'em-http-request'
7
6
  require 'base64'
data/lib/crawl/engine.rb CHANGED
@@ -59,21 +59,21 @@ private
59
59
  def retrieve(page)
60
60
  puts "Fetching #{page.url} ..." if $verbose
61
61
 
62
- full_url = options[:domain] + page.url
62
+ absolute_url = options[:domain] + page.relative_url
63
63
 
64
- http = EventMachine::HttpRequest.new(full_url)
64
+ http = EventMachine::HttpRequest.new(absolute_url)
65
65
  req = http.get :redirects => MAX_REDIRECTS, :head => {'authorization' => [options[:username], options[:password]]}
66
66
  req.timeout(15)
67
67
 
68
68
  req.errback do
69
69
  if req.nil?
70
- page.intermittent("Req is nil. WAT?")
70
+ page.intermittent("Req is nil. WAT?")
71
71
  elsif msg = req.error
72
- page.intermittent(msg)
72
+ page.intermittent(msg)
73
73
  elsif req.response.nil? || req.response.empty?
74
- page.intermittent('Timeout?')
74
+ page.intermittent('Timeout?')
75
75
  else
76
- page.intermittent('Partial response: Server Broke Connection?')
76
+ page.intermittent('Partial response: Server Broke Connection?')
77
77
  end
78
78
  process_next
79
79
  end
data/lib/crawl/page.rb CHANGED
@@ -13,6 +13,14 @@ class Page
13
13
  @errors = nil
14
14
  end
15
15
 
16
+ def relative_url
17
+ if url.start_with?('/')
18
+ url
19
+ else
20
+ "#{source}/#{url}"
21
+ end
22
+ end
23
+
16
24
  def <=>(other)
17
25
  url <=> other.url
18
26
  end
data/lib/crawl/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module Crawl
3
- VERSION = "1.1.1"
3
+ VERSION = "1.1.2"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tor Erik Linnerud
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-12 00:00:00.000000000 Z
11
+ date: 2015-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -76,6 +76,7 @@ extra_rdoc_files: []
76
76
  files:
77
77
  - ".gitignore"
78
78
  - Gemfile
79
+ - LICENSE.txt
79
80
  - README.md
80
81
  - Rakefile
81
82
  - bin/crawl
@@ -107,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
107
108
  version: '0'
108
109
  requirements: []
109
110
  rubyforge_project:
110
- rubygems_version: 2.2.2
111
+ rubygems_version: 2.4.5.1
111
112
  signing_key:
112
113
  specification_version: 4
113
114
  summary: Crawl pages witin a domain, reporting any page that returns a bad response