site_analyzer 0.3.12 → 0.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +40 -1
- data/Guardfile +70 -0
- data/Rakefile +2 -2
- data/SiteAnalyzer.iml +9 -0
- data/bin/console +3 -3
- data/lib/site_analyzer.rb +3 -1
- data/lib/site_analyzer/page.rb +29 -15
- data/lib/site_analyzer/report.rb +6 -15
- data/lib/site_analyzer/site.rb +55 -14
- data/lib/site_analyzer/version.rb +1 -1
- data/site_analyzer.gemspec +2 -0
- metadata +30 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a1808537ffb31d17a123c4494015333f4569246a
|
4
|
+
data.tar.gz: 7e742cfaf72ff0173b58467dec770a4cc1d7db68
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 83cd277f8e22a695b5a5ae6cd6ef3711ad39a2cf1e60efbc9497232b68ab2610ec8874d5ce816a9e383b1bf73c40612f2ad8065876db3abb870333e7a64b177c
|
7
|
+
data.tar.gz: 795456151c0b3c9256d020837e5c0294d77a06504d8b60a9126a406cfc7e0baa0bd103e0693daa8696f6cdf5652b5aa4975ea266e6698b37724eca7db4f4867c
|
data/Gemfile.lock
CHANGED
@@ -1,21 +1,55 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
site_analyzer (0.3.
|
4
|
+
site_analyzer (0.3.12)
|
5
5
|
addressable (~> 2.3)
|
6
6
|
nokogiri (~> 1.6)
|
7
7
|
robotstxt (~> 0.5)
|
8
|
+
stringex (~> 2.5)
|
8
9
|
terminal-table (~> 1.5)
|
9
10
|
|
10
11
|
GEM
|
11
12
|
remote: https://rubygems.org/
|
12
13
|
specs:
|
13
14
|
addressable (2.3.8)
|
15
|
+
coderay (1.1.0)
|
14
16
|
diff-lcs (1.2.5)
|
17
|
+
ffi (1.9.10)
|
18
|
+
formatador (0.2.5)
|
19
|
+
guard (2.13.0)
|
20
|
+
formatador (>= 0.2.4)
|
21
|
+
listen (>= 2.7, <= 4.0)
|
22
|
+
lumberjack (~> 1.0)
|
23
|
+
nenv (~> 0.1)
|
24
|
+
notiffany (~> 0.0)
|
25
|
+
pry (>= 0.9.12)
|
26
|
+
shellany (~> 0.0)
|
27
|
+
thor (>= 0.18.1)
|
28
|
+
guard-compat (1.2.1)
|
29
|
+
guard-rspec (4.6.3)
|
30
|
+
guard (~> 2.1)
|
31
|
+
guard-compat (~> 1.1)
|
32
|
+
rspec (>= 2.99.0, < 4.0)
|
33
|
+
listen (3.0.3)
|
34
|
+
rb-fsevent (>= 0.9.3)
|
35
|
+
rb-inotify (>= 0.9)
|
36
|
+
lumberjack (1.0.9)
|
37
|
+
method_source (0.8.2)
|
15
38
|
mini_portile (0.6.2)
|
39
|
+
nenv (0.2.0)
|
16
40
|
nokogiri (1.6.6.2)
|
17
41
|
mini_portile (~> 0.6.0)
|
42
|
+
notiffany (0.0.7)
|
43
|
+
nenv (~> 0.1)
|
44
|
+
shellany (~> 0.0)
|
45
|
+
pry (0.10.1)
|
46
|
+
coderay (~> 1.1.0)
|
47
|
+
method_source (~> 0.8.1)
|
48
|
+
slop (~> 3.4)
|
18
49
|
rake (10.4.2)
|
50
|
+
rb-fsevent (0.9.5)
|
51
|
+
rb-inotify (0.9.5)
|
52
|
+
ffi (>= 0.5.0)
|
19
53
|
robotstxt (0.5.4)
|
20
54
|
rspec (3.3.0)
|
21
55
|
rspec-core (~> 3.3.0)
|
@@ -30,13 +64,18 @@ GEM
|
|
30
64
|
diff-lcs (>= 1.2.0, < 2.0)
|
31
65
|
rspec-support (~> 3.3.0)
|
32
66
|
rspec-support (3.3.0)
|
67
|
+
shellany (0.0.1)
|
68
|
+
slop (3.6.0)
|
69
|
+
stringex (2.5.2)
|
33
70
|
terminal-table (1.5.2)
|
71
|
+
thor (0.19.1)
|
34
72
|
|
35
73
|
PLATFORMS
|
36
74
|
ruby
|
37
75
|
|
38
76
|
DEPENDENCIES
|
39
77
|
bundler (~> 1.10)
|
78
|
+
guard-rspec (~> 4.6)
|
40
79
|
rake (~> 10.4)
|
41
80
|
rspec (~> 3.3)
|
42
81
|
site_analyzer!
|
data/Guardfile
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
## Uncomment and set this to only include directories you want to watch
|
5
|
+
# directories %w(app lib config test spec features) \
|
6
|
+
# .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
|
7
|
+
|
8
|
+
## Note: if you are using the `directories` clause above and you are not
|
9
|
+
## watching the project directory ('.'), then you will want to move
|
10
|
+
## the Guardfile to a watched dir and symlink it back, e.g.
|
11
|
+
#
|
12
|
+
# $ mkdir config
|
13
|
+
# $ mv Guardfile config/
|
14
|
+
# $ ln -s config/Guardfile .
|
15
|
+
#
|
16
|
+
# and, you'll have to watch "config/Guardfile" instead of "Guardfile"
|
17
|
+
|
18
|
+
# Note: The cmd option is now required due to the increasing number of ways
|
19
|
+
# rspec may be run, below are examples of the most common uses.
|
20
|
+
# * bundler: 'bundle exec rspec'
|
21
|
+
# * bundler binstubs: 'bin/rspec'
|
22
|
+
# * spring: 'bin/rspec' (This will use spring if running and you have
|
23
|
+
# installed the spring binstubs per the docs)
|
24
|
+
# * zeus: 'zeus rspec' (requires the server to be started separately)
|
25
|
+
# * 'just' rspec: 'rspec'
|
26
|
+
|
27
|
+
guard :rspec, cmd: 'bundle exec rspec' do
|
28
|
+
require 'guard/rspec/dsl'
|
29
|
+
dsl = Guard::RSpec::Dsl.new
|
30
|
+
|
31
|
+
# Feel free to open issues for suggestions and improvements
|
32
|
+
|
33
|
+
# RSpec files
|
34
|
+
rspec = dsl.rspec
|
35
|
+
watch(rspec.spec_helper) { rspec.spec_dir }
|
36
|
+
watch(rspec.spec_support) { rspec.spec_dir }
|
37
|
+
watch(rspec.spec_files)
|
38
|
+
|
39
|
+
# Ruby files
|
40
|
+
ruby = dsl.ruby
|
41
|
+
dsl.watch_spec_files_for(ruby.lib_files)
|
42
|
+
|
43
|
+
# Rails files
|
44
|
+
rails = dsl.rails(view_extensions: %w(erb haml slim))
|
45
|
+
dsl.watch_spec_files_for(rails.app_files)
|
46
|
+
dsl.watch_spec_files_for(rails.views)
|
47
|
+
|
48
|
+
watch(rails.controllers) do |m|
|
49
|
+
[
|
50
|
+
rspec.spec.("routing/#{m[1]}_routing"),
|
51
|
+
rspec.spec.("controllers/#{m[1]}_controller"),
|
52
|
+
rspec.spec.("acceptance/#{m[1]}")
|
53
|
+
]
|
54
|
+
end
|
55
|
+
|
56
|
+
# Rails config changes
|
57
|
+
watch(rails.spec_helper) { rspec.spec_dir }
|
58
|
+
watch(rails.routes) { "#{rspec.spec_dir}/routing" }
|
59
|
+
watch(rails.app_controller) { "#{rspec.spec_dir}/controllers" }
|
60
|
+
|
61
|
+
# Capybara features specs
|
62
|
+
watch(rails.view_dirs) { |m| rspec.spec.("features/#{m[1]}") }
|
63
|
+
watch(rails.layouts) { |m| rspec.spec.("features/#{m[1]}") }
|
64
|
+
|
65
|
+
# Turnip features and steps
|
66
|
+
watch(%r{^spec/acceptance/(.+)\.feature$})
|
67
|
+
watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m|
|
68
|
+
Dir[File.join("**/#{m[1]}.feature")][0] || 'spec/acceptance'
|
69
|
+
end
|
70
|
+
end
|
data/Rakefile
CHANGED
data/SiteAnalyzer.iml
CHANGED
@@ -8,15 +8,24 @@
|
|
8
8
|
<orderEntry type="library" scope="PROVIDED" name="addressable (v2.3.8, rbenv: 2.2.2) [gem]" level="application" />
|
9
9
|
<orderEntry type="library" scope="PROVIDED" name="bundler (v1.10.5, rbenv: 2.2.2) [gem]" level="application" />
|
10
10
|
<orderEntry type="library" scope="PROVIDED" name="diff-lcs (v1.2.5, rbenv: 2.2.2) [gem]" level="application" />
|
11
|
+
<orderEntry type="library" scope="PROVIDED" name="ffi (v1.9.10, rbenv: 2.2.2) [gem]" level="application" />
|
12
|
+
<orderEntry type="library" scope="PROVIDED" name="formatador (v0.2.5, rbenv: 2.2.2) [gem]" level="application" />
|
13
|
+
<orderEntry type="library" scope="PROVIDED" name="guard-compat (v1.2.1, rbenv: 2.2.2) [gem]" level="application" />
|
11
14
|
<orderEntry type="library" scope="PROVIDED" name="mini_portile (v0.6.2, rbenv: 2.2.2) [gem]" level="application" />
|
15
|
+
<orderEntry type="library" scope="PROVIDED" name="nenv (v0.2.0, rbenv: 2.2.2) [gem]" level="application" />
|
12
16
|
<orderEntry type="library" scope="PROVIDED" name="nokogiri (v1.6.6.2, rbenv: 2.2.2) [gem]" level="application" />
|
13
17
|
<orderEntry type="library" scope="PROVIDED" name="rake (v10.4.2, rbenv: 2.2.2) [gem]" level="application" />
|
18
|
+
<orderEntry type="library" scope="PROVIDED" name="rb-inotify (v0.9.5, rbenv: 2.2.2) [gem]" level="application" />
|
14
19
|
<orderEntry type="library" scope="PROVIDED" name="robotstxt (v0.5.4, rbenv: 2.2.2) [gem]" level="application" />
|
15
20
|
<orderEntry type="library" scope="PROVIDED" name="rspec (v3.3.0, rbenv: 2.2.2) [gem]" level="application" />
|
16
21
|
<orderEntry type="library" scope="PROVIDED" name="rspec-core (v3.3.2, rbenv: 2.2.2) [gem]" level="application" />
|
17
22
|
<orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v3.3.1, rbenv: 2.2.2) [gem]" level="application" />
|
18
23
|
<orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v3.3.2, rbenv: 2.2.2) [gem]" level="application" />
|
19
24
|
<orderEntry type="library" scope="PROVIDED" name="rspec-support (v3.3.0, rbenv: 2.2.2) [gem]" level="application" />
|
25
|
+
<orderEntry type="library" scope="PROVIDED" name="shellany (v0.0.1, rbenv: 2.2.2) [gem]" level="application" />
|
26
|
+
<orderEntry type="library" scope="PROVIDED" name="slop (v3.6.0, rbenv: 2.2.2) [gem]" level="application" />
|
27
|
+
<orderEntry type="library" scope="PROVIDED" name="stringex (v2.5.2, rbenv: 2.2.2) [gem]" level="application" />
|
20
28
|
<orderEntry type="library" scope="PROVIDED" name="terminal-table (v1.5.2, rbenv: 2.2.2) [gem]" level="application" />
|
29
|
+
<orderEntry type="library" scope="PROVIDED" name="thor (v0.19.1, rbenv: 2.2.2) [gem]" level="application" />
|
21
30
|
</component>
|
22
31
|
</module>
|
data/bin/console
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'site_analizer'
|
5
5
|
|
6
6
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +10,5 @@ require "site_analizer"
|
|
10
10
|
# require "pry"
|
11
11
|
# Pry.start
|
12
12
|
|
13
|
-
require
|
13
|
+
require 'irb'
|
14
14
|
IRB.start
|
data/lib/site_analyzer.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Main class for program
|
2
2
|
module SiteAnalyzer
|
3
|
-
|
3
|
+
require 'stringex_lite'
|
4
|
+
%w(open-uri-patching page report site version).each do |file|
|
4
5
|
require "site_analyzer/#{file}"
|
5
6
|
end
|
7
|
+
Stringex::Localization.default_locale = :en
|
6
8
|
end
|
data/lib/site_analyzer/page.rb
CHANGED
@@ -3,31 +3,41 @@ module SiteAnalyzer
|
|
3
3
|
require 'nokogiri'
|
4
4
|
require 'addressable/uri'
|
5
5
|
require 'timeout'
|
6
|
+
require 'stringex_lite'
|
7
|
+
require 'open-uri'
|
6
8
|
class Page
|
7
|
-
attr_reader :page_url, :titles, :page
|
9
|
+
attr_reader :page_url, :titles, :page, :page_path, :site_domain
|
8
10
|
def initialize(url)
|
9
11
|
@page_url = url
|
10
|
-
@page =
|
11
|
-
@
|
12
|
+
@page = []
|
13
|
+
@site_domain = ''
|
14
|
+
@page_path = ''
|
15
|
+
@titles = []
|
16
|
+
get_page(url)
|
17
|
+
fill_data_field!
|
18
|
+
end
|
19
|
+
|
20
|
+
def fill_data_field!
|
12
21
|
@titles = all_titles
|
13
22
|
end
|
14
23
|
|
15
24
|
def to_s
|
16
|
-
"Page url: #{@page_url} Site url: #{@
|
25
|
+
"Page url: #{@page_url} Site url: #{@site_domain}"
|
17
26
|
end
|
18
27
|
|
19
28
|
def get_page(url)
|
20
29
|
begin
|
21
|
-
timeout(30)
|
30
|
+
timeout(30) do
|
31
|
+
page = open(url)
|
32
|
+
@site_domain = page.base_uri.host
|
33
|
+
@page_path = page.base_uri.request_uri
|
34
|
+
@page = Nokogiri::HTML(page)
|
35
|
+
end
|
22
36
|
rescue Timeout::Error, EOFError, OpenURI::HTTPError, Errno::ENOENT
|
23
37
|
return nil
|
24
38
|
end
|
25
39
|
end
|
26
40
|
|
27
|
-
def get_domain(url)
|
28
|
-
timeout(30) { Addressable::URI.parse(url).host } rescue nil
|
29
|
-
end
|
30
|
-
|
31
41
|
def title_good?
|
32
42
|
@page.css('title').size == 1 && @page.css('title').text.size < 70 if @page
|
33
43
|
end
|
@@ -104,9 +114,9 @@ module SiteAnalyzer
|
|
104
114
|
if @page
|
105
115
|
home_a = []
|
106
116
|
all_a_tags_href.uniq.each do |link|
|
107
|
-
|
108
|
-
|
109
|
-
home_a << link if
|
117
|
+
uri = URI(link.to_ascii) rescue nil #TODO: write additional logic for link to image
|
118
|
+
if uri && @site_domain
|
119
|
+
home_a << link if uri.host == @site_domain
|
110
120
|
end
|
111
121
|
end
|
112
122
|
home_a
|
@@ -117,9 +127,9 @@ module SiteAnalyzer
|
|
117
127
|
if @page
|
118
128
|
remote_a = []
|
119
129
|
all_a_tags_href.uniq.each do |link|
|
120
|
-
|
121
|
-
|
122
|
-
remote_a << link unless
|
130
|
+
uri = URI(link.to_ascii)
|
131
|
+
if uri && @site_domain
|
132
|
+
remote_a << link unless uri.host == @site_domain
|
123
133
|
end
|
124
134
|
end
|
125
135
|
remote_a
|
@@ -179,5 +189,9 @@ module SiteAnalyzer
|
|
179
189
|
h2s
|
180
190
|
end
|
181
191
|
end
|
192
|
+
|
193
|
+
def bad_url
|
194
|
+
@page_url unless @page_path.size <= 1 && @page_path =~ /^[\w.\-\/]+$/
|
195
|
+
end
|
182
196
|
end
|
183
197
|
end
|
data/lib/site_analyzer/report.rb
CHANGED
@@ -6,10 +6,10 @@ module SiteAnalyzer
|
|
6
6
|
class Report
|
7
7
|
attr_reader :site, :report
|
8
8
|
def initialize(site_url, max_pages = 10, use_robot = false)
|
9
|
-
@
|
9
|
+
@site_domain = site_url
|
10
10
|
@max_pages = max_pages
|
11
11
|
@use_robot = use_robot
|
12
|
-
@site = Site.new(@
|
12
|
+
@site = Site.new(@site_domain, @max_pages, @use_robot)
|
13
13
|
end
|
14
14
|
|
15
15
|
def self.create(options)
|
@@ -25,7 +25,7 @@ module SiteAnalyzer
|
|
25
25
|
@report = {}
|
26
26
|
@report[:title_more_then_70_symbols] = check_titles_text_less_than_70
|
27
27
|
@report[:title_and_h1_have_doubles] = check_title_and_h1_for_doubles
|
28
|
-
@report[:meta_description_more_than_200] =
|
28
|
+
@report[:meta_description_more_than_200] = check_meta_description
|
29
29
|
@report[:meta_keywords_tags_more_than_600] = check_meta_keywords_tags
|
30
30
|
@report[:dont_have_h2_tags] = check_h2
|
31
31
|
@report[:pages_size_with_url] = pages_size
|
@@ -40,7 +40,7 @@ module SiteAnalyzer
|
|
40
40
|
|
41
41
|
def to_s
|
42
42
|
return 'Report is empty' if @report.nil? || @report.empty?
|
43
|
-
header = Terminal::Table.new title: "Report for #{@
|
43
|
+
header = Terminal::Table.new title: "Report for #{@site_domain} with #{@max_pages} pages max_pages and robot check is #{@use_robot}"
|
44
44
|
puts header
|
45
45
|
@report.each_pair do |key, value|
|
46
46
|
rows = []
|
@@ -70,7 +70,7 @@ module SiteAnalyzer
|
|
70
70
|
result
|
71
71
|
end
|
72
72
|
|
73
|
-
def
|
73
|
+
def check_meta_description
|
74
74
|
result = []
|
75
75
|
@site.pages.each do |page|
|
76
76
|
result << page.page_url unless page.metadescription_good?
|
@@ -115,16 +115,7 @@ module SiteAnalyzer
|
|
115
115
|
end
|
116
116
|
|
117
117
|
def bad_url
|
118
|
-
|
119
|
-
a_tag_array.each do |url|
|
120
|
-
begin
|
121
|
-
uri = URI(url[1])
|
122
|
-
result << url if (uri.scheme == 'http' || uri.scheme == 'https' ) unless uri.path && uri.path =~ /^[\w\-\/\+\.]+$/
|
123
|
-
rescue URI::InvalidURIError
|
124
|
-
result << url
|
125
|
-
end
|
126
|
-
end
|
127
|
-
result
|
118
|
+
@site.bad_urls
|
128
119
|
end
|
129
120
|
|
130
121
|
def title_doubles
|
data/lib/site_analyzer/site.rb
CHANGED
@@ -4,16 +4,16 @@ module SiteAnalyzer
|
|
4
4
|
require 'timeout'
|
5
5
|
# Create site object with all scans
|
6
6
|
class Site
|
7
|
-
attr_reader :main_url, :pages, :
|
7
|
+
attr_reader :main_url, :pages, :pages_for_scan, :max_pages, :scanned_pages
|
8
8
|
def initialize(url, max_pages = 10, use_robot_txt = false)
|
9
|
+
Stringex::Localization.default_locale = :en
|
9
10
|
@main_url = url
|
10
11
|
@pages = []
|
11
|
-
@max_pages = max_pages
|
12
|
-
@domain = Addressable::URI.parse(url).host
|
13
12
|
@use_robot_txt = use_robot_txt
|
14
13
|
@scanned_pages = []
|
15
14
|
@pages_for_scan = []
|
16
|
-
|
15
|
+
@max_pages = max_pages - 1
|
16
|
+
@pages << Page.new(convert_to_valid(@main_url))
|
17
17
|
scan_site!
|
18
18
|
end
|
19
19
|
|
@@ -28,11 +28,14 @@ module SiteAnalyzer
|
|
28
28
|
def scan_site!
|
29
29
|
add_pages_for_scan!
|
30
30
|
while @pages_for_scan.size > 0
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
page = convert_to_valid @pages_for_scan.pop
|
32
|
+
if page
|
33
|
+
@max_pages -= 1
|
34
|
+
add_page convert_to_valid(page)
|
35
|
+
return if @max_pages <= 0
|
36
|
+
add_pages_for_scan!
|
37
|
+
optimize_scan!
|
38
|
+
end
|
36
39
|
end
|
37
40
|
end
|
38
41
|
|
@@ -43,7 +46,7 @@ module SiteAnalyzer
|
|
43
46
|
@bad_pages << page.page_url unless page.page
|
44
47
|
if page.page
|
45
48
|
page.home_a.each do |link|
|
46
|
-
@pages_for_scan << link
|
49
|
+
@pages_for_scan << link
|
47
50
|
end
|
48
51
|
end
|
49
52
|
end
|
@@ -56,7 +59,6 @@ module SiteAnalyzer
|
|
56
59
|
end
|
57
60
|
page = Page.new(url)
|
58
61
|
@pages << page
|
59
|
-
@max_pages -= 1
|
60
62
|
@scanned_pages << url
|
61
63
|
end
|
62
64
|
|
@@ -83,7 +85,7 @@ module SiteAnalyzer
|
|
83
85
|
def all_h2
|
84
86
|
result = []
|
85
87
|
@pages.each do |page|
|
86
|
-
|
88
|
+
unless page.page
|
87
89
|
result << [page.page_url, page.h2]
|
88
90
|
end
|
89
91
|
end
|
@@ -105,10 +107,49 @@ module SiteAnalyzer
|
|
105
107
|
result.compact
|
106
108
|
end
|
107
109
|
|
110
|
+
def pages_url
|
111
|
+
result = []
|
112
|
+
@pages.each do |page|
|
113
|
+
result << page.page_url if page.page
|
114
|
+
end
|
115
|
+
result
|
116
|
+
end
|
117
|
+
|
118
|
+
def bad_urls
|
119
|
+
result = []
|
120
|
+
@pages.each do |page|
|
121
|
+
result << page.bad_url
|
122
|
+
end
|
123
|
+
result.compact!
|
124
|
+
end
|
125
|
+
|
108
126
|
def optimize_scan!
|
109
|
-
@pages_for_scan.uniq
|
110
|
-
@scanned_pages.uniq
|
127
|
+
@pages_for_scan = @pages_for_scan.compact.uniq
|
128
|
+
@scanned_pages = @scanned_pages.compact.uniq
|
111
129
|
@pages_for_scan = @pages_for_scan - @scanned_pages
|
112
130
|
end
|
131
|
+
|
132
|
+
def convert_to_valid(url)
|
133
|
+
return nil if url =~ /.jpg$/i
|
134
|
+
url.insert(0, @main_url.first(5)) if url.start_with? '//'
|
135
|
+
link = URI(url)
|
136
|
+
main_page = URI(@main_url)
|
137
|
+
if link && link.scheme && link.scheme.empty?
|
138
|
+
link.scheme = main_page.scheme
|
139
|
+
elsif link.nil?
|
140
|
+
return nil
|
141
|
+
end
|
142
|
+
if link.scheme =~ /^http/
|
143
|
+
request = link.scheme + '://' + link.host
|
144
|
+
if link.request_uri
|
145
|
+
request += link.request_uri
|
146
|
+
end
|
147
|
+
else
|
148
|
+
request = nil
|
149
|
+
end
|
150
|
+
request
|
151
|
+
rescue
|
152
|
+
link
|
153
|
+
end
|
113
154
|
end
|
114
155
|
end
|
data/site_analyzer.gemspec
CHANGED
@@ -21,9 +21,11 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_development_dependency 'bundler', '~> 1.10'
|
22
22
|
spec.add_development_dependency 'rake', '~> 10.4'
|
23
23
|
spec.add_development_dependency 'rspec', '~> 3.3'
|
24
|
+
spec.add_development_dependency 'guard-rspec', '~> 3.3'
|
24
25
|
spec.add_runtime_dependency 'nokogiri', '~> 1.6'
|
25
26
|
spec.add_runtime_dependency 'addressable', '~> 2.3'
|
26
27
|
spec.add_runtime_dependency 'robotstxt', '~> 0.5'
|
27
28
|
spec.add_runtime_dependency 'terminal-table', '~> 1.5'
|
29
|
+
spec.add_runtime_dependency 'stringex', '~> 2.5'
|
28
30
|
spec.required_ruby_version = '~> 2.2'
|
29
31
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: site_analyzer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Denis Savchuk
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: guard-rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.3'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.3'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: nokogiri
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +122,20 @@ dependencies:
|
|
108
122
|
- - "~>"
|
109
123
|
- !ruby/object:Gem::Version
|
110
124
|
version: '1.5'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: stringex
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '2.5'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '2.5'
|
111
139
|
description: Create site report for SEO many options.
|
112
140
|
email:
|
113
141
|
- mordorreal@gmail.com
|
@@ -121,6 +149,7 @@ files:
|
|
121
149
|
- CODE_OF_CONDUCT.md
|
122
150
|
- Gemfile
|
123
151
|
- Gemfile.lock
|
152
|
+
- Guardfile
|
124
153
|
- LICENSE
|
125
154
|
- README.md
|
126
155
|
- Rakefile
|