cohesion 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MzAyNzU3ZmQ0MTZhMmUxYWQ4ZjkyYjViMzY1ZjZlYjc1ODBmYTdjMw==
5
+ data.tar.gz: !binary |-
6
+ NjgwZmU5Yzc3ZDBlNTFiZmJlZmFjNmY2NDlhNGJjOTIxZjU1MmEzNA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZTk5YWVjMDU0ODA0ZjVhNGVmMjczYjE4MzA5NGE4NDQ1NGEzNzNiOWFmMjQ2
10
+ YWUxZGNjMjA5Y2NlNDQ0MWU4MmExMDg5M2QxNTExN2YxNzQzZTIzNjhiOThk
11
+ ZjNiMzIyZGE0NjU2MzAyODg2OGVkYjZiZmE1ZWUyZWFiZDg2NGQ=
12
+ data.tar.gz: !binary |-
13
+ NWQwMjZjYjRlMjU1YjA5ZGQ3MGZkY2U3NzkxN2M2ZjczMjAyN2I2OWE3MzIy
14
+ OTc5MjU3ZTc5Y2IxODY5N2E5MTEzOThmYjYwOTc0ZWIxZjdhNzA2MWExZjdj
15
+ MmJmNDQwZTE4MDA3N2IxODIxY2M5NDQ5OTJiMGYwYWYyZjU1N2I=
data/.gitignore CHANGED
@@ -14,3 +14,4 @@ spec/reports
14
14
  test/tmp
15
15
  test/version_tmp
16
16
  tmp
17
+ .DS_Store
@@ -0,0 +1 @@
1
+ cohesion
@@ -0,0 +1 @@
1
+ ruby-1.9.3-p392
data/Gemfile CHANGED
@@ -3,10 +3,17 @@ source 'https://rubygems.org'
3
3
  # Specify your gem's dependencies in cohesion.gemspec
4
4
  gemspec
5
5
 
6
- gem "cobweb", ">= 1.0.10"
6
+ gem "cobweb", ">= 1.0.12"
7
7
 
8
8
  gem 'rspec'
9
+ gem 'resque'
9
10
  gem 'awesome_print'
10
11
  gem 'sqlite3'
11
12
  gem 'ptools'
12
- gem "slop"
13
+ gem "slop"
14
+ gem 'sidekiq'
15
+ gem "utf8cleaner"
16
+
17
+ group :test do
18
+ gem 'coveralls', require: false
19
+ end
@@ -1,16 +1,18 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cohesion (0.0.5)
4
+ cohesion (0.0.6)
5
5
  cobweb
6
6
  ptools
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
- addressable (2.3.2)
11
+ addressable (2.3.5)
12
12
  awesome_print (1.1.0)
13
- cobweb (1.0.10)
13
+ celluloid (0.14.1)
14
+ timers (>= 1.0.0)
15
+ cobweb (1.0.12)
14
16
  addressable
15
17
  awesome_print
16
18
  haml
@@ -18,51 +20,79 @@ GEM
18
20
  namespaced_redis
19
21
  nokogiri
20
22
  redis
21
- resque
22
23
  rspec
23
24
  sinatra
25
+ slop
24
26
  thin
27
+ colorize (0.5.8)
28
+ connection_pool (1.1.0)
29
+ coveralls (0.6.7)
30
+ colorize
31
+ multi_json (~> 1.3)
32
+ rest-client
33
+ simplecov (>= 0.7)
34
+ thor
25
35
  daemons (1.1.9)
26
- diff-lcs (1.1.3)
27
- eventmachine (1.0.0)
28
- haml (4.0.0)
36
+ diff-lcs (1.2.4)
37
+ eventmachine (1.0.3)
38
+ haml (4.0.3)
29
39
  tilt
30
- json (1.7.7)
31
- multi_json (1.6.0)
40
+ json (1.8.0)
41
+ mime-types (1.24)
42
+ mini_portile (0.5.1)
43
+ mono_logger (1.1.0)
44
+ multi_json (1.7.9)
32
45
  namespaced_redis (1.0.4)
33
46
  redis
34
- nokogiri (1.5.6)
47
+ nokogiri (1.6.0)
48
+ mini_portile (~> 0.5.0)
35
49
  ptools (1.2.2)
36
50
  rack (1.5.2)
37
- rack-protection (1.3.2)
51
+ rack-protection (1.5.0)
38
52
  rack
39
- redis (3.0.2)
40
- redis-namespace (1.2.1)
53
+ redis (3.0.4)
54
+ redis-namespace (1.3.1)
41
55
  redis (~> 3.0.0)
42
- resque (1.23.0)
56
+ resque (1.24.1)
57
+ mono_logger (~> 1.0)
43
58
  multi_json (~> 1.0)
44
- redis-namespace (~> 1.0)
59
+ redis-namespace (~> 1.2)
45
60
  sinatra (>= 0.9.2)
46
61
  vegas (~> 0.1.2)
47
- rspec (2.12.0)
48
- rspec-core (~> 2.12.0)
49
- rspec-expectations (~> 2.12.0)
50
- rspec-mocks (~> 2.12.0)
51
- rspec-core (2.12.2)
52
- rspec-expectations (2.12.1)
53
- diff-lcs (~> 1.1.3)
54
- rspec-mocks (2.12.2)
55
- sinatra (1.3.4)
62
+ rest-client (1.6.7)
63
+ mime-types (>= 1.16)
64
+ rspec (2.14.1)
65
+ rspec-core (~> 2.14.0)
66
+ rspec-expectations (~> 2.14.0)
67
+ rspec-mocks (~> 2.14.0)
68
+ rspec-core (2.14.5)
69
+ rspec-expectations (2.14.2)
70
+ diff-lcs (>= 1.1.3, < 2.0)
71
+ rspec-mocks (2.14.3)
72
+ sidekiq (2.13.1)
73
+ celluloid (>= 0.14.1)
74
+ connection_pool (>= 1.0.0)
75
+ json
76
+ redis (>= 3.0)
77
+ redis-namespace
78
+ simplecov (0.7.1)
79
+ multi_json (~> 1.0)
80
+ simplecov-html (~> 0.7.1)
81
+ simplecov-html (0.7.1)
82
+ sinatra (1.4.3)
56
83
  rack (~> 1.4)
57
- rack-protection (~> 1.3)
58
- tilt (~> 1.3, >= 1.3.3)
59
- slop (3.4.3)
60
- sqlite3 (1.3.6)
61
- thin (1.5.0)
84
+ rack-protection (~> 1.4)
85
+ tilt (~> 1.3, >= 1.3.4)
86
+ slop (3.4.6)
87
+ sqlite3 (1.3.7)
88
+ thin (1.5.1)
62
89
  daemons (>= 1.0.9)
63
90
  eventmachine (>= 0.12.6)
64
91
  rack (>= 1.0.0)
65
- tilt (1.3.3)
92
+ thor (0.18.1)
93
+ tilt (1.4.1)
94
+ timers (1.1.0)
95
+ utf8cleaner (0.0.1)
66
96
  vegas (0.1.11)
67
97
  rack (>= 1.0.0)
68
98
 
@@ -71,9 +101,13 @@ PLATFORMS
71
101
 
72
102
  DEPENDENCIES
73
103
  awesome_print
74
- cobweb (>= 1.0.10)
104
+ cobweb (>= 1.0.12)
75
105
  cohesion!
106
+ coveralls
76
107
  ptools
108
+ resque
77
109
  rspec
110
+ sidekiq
78
111
  slop
79
112
  sqlite3
113
+ utf8cleaner
@@ -1,6 +1,11 @@
1
1
 
2
2
  h1. Cohesion
3
3
 
4
+ !https://badge.fury.io/rb/cohesion.png!:http://badge.fury.io/rb/cohesion
5
+ !https://gemnasium.com/stewartmckee/coffee_table.png!
6
+ !https://coveralls.io/repos/stewartmckee/cohesion/badge.png?branch=master(Coverage Status)!:https://coveralls.io/r/stewartmckee/cohesion
7
+
8
+
4
9
  h2. Intro
5
10
 
6
11
  Cohesion is a group of tasks that allow you to check link cohesion in a site.
@@ -35,15 +40,16 @@ Cohesion allows you to control the crawl through various command line options.
35
40
  * --seed_urls <seed_url1,seed_url2> Seed urls
36
41
  * --crawl_limit <number_of_urls> Limit the crawl to a number of urls
37
42
  * --thread_count <number_of_threads> Set the number of threads used
43
+ * --cache <number_of_seconds> Sets the timeout for the cache, leave blank for no cache
38
44
  * --timeout <timeout_in_seconds> Sets the timeout for http requests
39
45
  * --output <filename> Path to output data to
46
+ * --output_format <format_for_output_file> Output format, csv or json
40
47
 
41
48
  * -v, --verbose Display crawl information
42
49
  * -d, --debug Display debug information
43
50
  * -w, --web_statistics Start web stats server
44
51
 
45
52
 
46
-
47
53
  h4. Help
48
54
 
49
55
  Help with available options is available with --help
@@ -5,6 +5,7 @@ $LOAD_PATH.unshift(lib) if File.directory?(lib) && !$LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'cohesion'
7
7
  require 'slop'
8
+ require 'csv'
8
9
 
9
10
  opts = Slop.parse(:help => true) do
10
11
  banner 'Usage: cohesion [options]'
@@ -16,8 +17,10 @@ opts = Slop.parse(:help => true) do
16
17
  on 'crawl_limit=', 'Limit the crawl to a number of urls', :as => Integer
17
18
  on 'thread_count=', "Set the number of threads used", :as => Integer
18
19
  on 'timeout=', "Sets the timeout for http requests", :as => Integer
20
+ on 'cache=', "Sets the timeout for the cache, leave blank for no cache"
19
21
 
20
22
  on 'output=', 'Path to output data to'
23
+ on 'output_format=', "Output format, csv or json"
21
24
 
22
25
  on 'v', 'verbose', 'Display crawl information'
23
26
  on 'd', 'debug', 'Display debug information'
@@ -27,15 +30,31 @@ end
27
30
  if opts[:url]
28
31
 
29
32
  options = opts.to_hash.delete_if { |k, v| v.nil? || k == :url}
30
- ap options
31
33
 
32
34
  failures = Cohesion::Check.site(opts[:url], options)
33
35
  if failures.count == 0
34
36
  exit(true)
35
37
  else
36
38
  if opts[:output]
37
- File.open(opts[:output], 'w') do |f|
38
- f.write failures.to_json
39
+ output = []
40
+ failures.each do |failure|
41
+ output << {:error_page => failure[:issue][:url], :inbound_links => failure[:inbound]}
42
+ end
43
+
44
+ opts[:output_format] = "json" unless opts[:output_format]
45
+ if opts[:output_format] == "json"
46
+ File.open(opts[:output], 'w') do |f|
47
+ f.write output.to_json
48
+ end
49
+ elsif opts[:output_format] == "csv"
50
+ CSV.open(opts[:output], "wb") do |csv|
51
+ csv << ["404 Url", "Page that contains link"]
52
+ output.each do |line|
53
+ line[:inbound_links].each do |link|
54
+ csv << [line[:error_page], link]
55
+ end
56
+ end
57
+ end
39
58
  end
40
59
  end
41
60
  exit(false)
@@ -1,5 +1,5 @@
1
1
  require "cohesion/version"
2
- require 'cobweb'
2
+ require '../cobweb/lib/cobweb'
3
3
  require 'ptools'
4
4
 
5
5
  require 'cohesion/railtie' if defined?(Rails)
@@ -68,8 +68,16 @@ module Cohesion
68
68
  def self.site(url, options={})
69
69
  errors = []
70
70
  failures = []
71
- statistics = CobwebCrawler.new({:cache => 3600, :cache_type => :full, :crawl_linked_external => true, :store_refered_url => true}.merge(options)).crawl(url) do |page|
71
+
72
+ options[:cache] = options[:cache].to_i if options[:cache]
73
+ crawler_options = {:cache_type => :full, :crawl_linked_external => true, :store_inbound_links => true}.merge(options)
74
+ puts crawler_options
75
+
76
+ statistics = CobwebCrawler.new(crawler_options).crawl(url) do |page|
72
77
  print page[:url]
78
+ if page[:status_code] == 404
79
+ page = Cobweb.new(crawler_options.merge(:cache => nil)).get(page[:url])
80
+ end
73
81
  if page[:status_code] > 399
74
82
  puts " [#{page[:status_code]}] \e[31m\u2717\e[0m"
75
83
  failures << page
@@ -78,6 +86,12 @@ module Cohesion
78
86
  end
79
87
  end
80
88
 
89
+ puts statistics.redis.namespace
90
+ puts statistics.get_statistics
91
+
92
+ total_inbound_failures = 0
93
+ total_failures = 0
94
+
81
95
  issues = []
82
96
  if failures.count == 0
83
97
  puts "All links working!"
@@ -86,12 +100,20 @@ module Cohesion
86
100
  failures.each do |f|
87
101
  inbound_links = statistics.inbound_links_for(f[:url])
88
102
  issues << {:issue => f, :inbound => inbound_links}
103
+
104
+ total_inbound_failures += inbound_links.count
105
+ total_failures += 1
106
+
89
107
  puts ""
90
108
  puts "#{f[:url]} [ #{f[:status_code]} ]"
91
109
  inbound_links.each do |inbound_link|
92
110
  puts " - #{inbound_link}"
93
111
  end
94
112
  end
113
+
114
+ puts ""
115
+ puts "Total Failed URLs: #{total_failures}"
116
+ puts "Total Inbound Failures (Pages linking to a 404): #{total_inbound_failures}"
95
117
  puts ""
96
118
  end
97
119
  puts
@@ -1,3 +1,3 @@
1
1
  module Cohesion
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
@@ -13,6 +13,21 @@ describe Cohesion do
13
13
  context "site" do
14
14
  it "should crawl the site"
15
15
  it "should return error pages"
16
+ it "should exclude urls given as external"
17
+ it "should default to accept one level of external"
18
+ it "should not allow one level external for excluded urls"
19
+ it "should detect 400 level errors"
20
+ it "should detect 500 level errors"
21
+ it "should write valid json file"
22
+ it "should not write to file if not requested"
23
+ it "should start the web stats when requested"
24
+ it "should return data when --help requested"
25
+ it "should return help when invalid params are passed"
26
+ it "should return help when no params are passed"
27
+ it "should print out success links with tick"
28
+ it "should print out failed links with cross"
29
+ it "should print out report at end of crawl"
30
+ it "should default to full cache"
16
31
  end
17
32
 
18
33
  context "rails" do
@@ -3,6 +3,9 @@
3
3
  ENVIRONMENT = "test"
4
4
  APP_ROOT = File.expand_path(File.dirname(__FILE__) + '/../')
5
5
 
6
+ require 'coveralls'
7
+ Coveralls.wear!
8
+
6
9
  require "#{APP_ROOT}/lib/cohesion"
7
10
  require 'rake'
8
11
 
metadata CHANGED
@@ -1,38 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cohesion
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
5
- prerelease:
4
+ version: 0.0.7
6
5
  platform: ruby
7
6
  authors:
8
7
  - Stewart McKee
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-02-14 00:00:00.000000000 Z
11
+ date: 2013-08-15 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: cobweb
16
- requirement: &70099176390280 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
17
  - - ! '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
- version_requirements: *70099176390280
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
25
27
  - !ruby/object:Gem::Dependency
26
28
  name: ptools
27
- requirement: &70099176389260 !ruby/object:Gem::Requirement
28
- none: false
29
+ requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
31
  - - ! '>='
31
32
  - !ruby/object:Gem::Version
32
33
  version: '0'
33
34
  type: :runtime
34
35
  prerelease: false
35
- version_requirements: *70099176389260
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
36
41
  description: Gem to test the cohesion of links within a rails site. The gem crawls
37
42
  the site and checks that external and internal links are valid
38
43
  email:
@@ -44,7 +49,8 @@ extra_rdoc_files: []
44
49
  files:
45
50
  - .gitignore
46
51
  - .rspec
47
- - .rvmrc
52
+ - .ruby-gemset
53
+ - .ruby-version
48
54
  - Gemfile
49
55
  - Gemfile.lock
50
56
  - LICENSE.txt
@@ -61,27 +67,26 @@ files:
61
67
  - spec/spec_helper.rb
62
68
  homepage: http://github.com/stewartmckee/cohesion
63
69
  licenses: []
70
+ metadata: {}
64
71
  post_install_message:
65
72
  rdoc_options: []
66
73
  require_paths:
67
74
  - lib
68
75
  required_ruby_version: !ruby/object:Gem::Requirement
69
- none: false
70
76
  requirements:
71
77
  - - ! '>='
72
78
  - !ruby/object:Gem::Version
73
79
  version: '0'
74
80
  required_rubygems_version: !ruby/object:Gem::Requirement
75
- none: false
76
81
  requirements:
77
82
  - - ! '>='
78
83
  - !ruby/object:Gem::Version
79
84
  version: '0'
80
85
  requirements: []
81
86
  rubyforge_project:
82
- rubygems_version: 1.8.10
87
+ rubygems_version: 2.0.3
83
88
  signing_key:
84
- specification_version: 3
89
+ specification_version: 4
85
90
  summary: Gem to test the cohesion of links within a rails site.
86
91
  test_files:
87
92
  - spec/lib/cohesion_spec.rb
data/.rvmrc DELETED
@@ -1,52 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
- # development environment upon cd'ing into the directory
5
-
6
- # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
- # Only full ruby name is supported here, for short names use:
8
- # echo "rvm use 1.9.3" > .rvmrc
9
- environment_id="ruby-1.9.3@cohesion"
10
-
11
- # Uncomment the following lines if you want to verify rvm version per project
12
- # rvmrc_rvm_version="1.10.3" # 1.10.1 seams as a safe start
13
- # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14
- # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15
- # return 1
16
- # }
17
-
18
- # First we attempt to load the desired environment directly from the environment
19
- # file. This is very fast and efficient compared to running through the entire
20
- # CLI and selector. If you want feedback on which environment was used then
21
- # insert the word 'use' after --create as this triggers verbose mode.
22
- if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23
- && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24
- then
25
- \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26
- [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27
- \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28
- if [[ $- == *i* ]] # check for interactive shells
29
- then echo "Using: $(tput setaf 2)$GEM_HOME$(tput sgr0)" # show the user the ruby and gemset they are using in green
30
- else echo "Using: $GEM_HOME" # don't use colors in non-interactive shells
31
- fi
32
- else
33
- # If the environment file has not yet been created, use the RVM CLI to select.
34
- rvm --create use "$environment_id" || {
35
- echo "Failed to create RVM environment '${environment_id}'."
36
- return 1
37
- }
38
- fi
39
-
40
- # If you use bundler, this might be useful to you:
41
- # if [[ -s Gemfile ]] && {
42
- # ! builtin command -v bundle >/dev/null ||
43
- # builtin command -v bundle | grep $rvm_path/bin/bundle >/dev/null
44
- # }
45
- # then
46
- # printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
47
- # gem install bundler
48
- # fi
49
- # if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
50
- # then
51
- # bundle install | grep -vE '^Using|Your bundle is complete'
52
- # fi