site_analyzer 0.3.16 → 0.3.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c0e7966cf9a63a7dae34d25472924438852f4275
4
- data.tar.gz: f96f6d1781e13fe7e98da60349d2db4ad1df423e
3
+ metadata.gz: cc00445497ff1b19d012ade1eef41f9ec4ddad99
4
+ data.tar.gz: 21a5e0ecfe14c07069ff8fb35a2d56d60d3062ea
5
5
  SHA512:
6
- metadata.gz: 31cc2b27a8773526df07f5f50fa39b5af970db45282df38e5af34f32035970a9c5a99541dc70a19085bdae8972ee640f68e096da08a710293eab5243ea15dfe2
7
- data.tar.gz: 3fe3b59534526a07207459747013762bc137a1771cbf1e271dbfa61ac4cf536bf631c790e2ab13e8b362e06eee6225269b939395702c9157c34b0e78a3420412
6
+ metadata.gz: 13edaa969a406d7eb64c5965b037caefad6361bf3704e4e288e7b595f21bf1cd3743f95c407fe6d993b0054714cbcbe8164322b440f7f9752f438353dce1e27a
7
+ data.tar.gz: 9ce13e82b5bb231cd4d84dd14e14a3aee42dd2b3675b64eba53a4f933e230951e980341d8fa766849fc88c3d1e9d51a320a47e29b2390f69463829b3708d6557
data/.gitignore CHANGED
@@ -1,4 +1,7 @@
1
- /.idea/
1
+ .idea/*
2
2
  SiteAnalyzer.iml
3
3
  /log
4
- *.gem
4
+ *.gem
5
+ .gems
6
+ log
7
+ tmp
data/.rbenv-gemsets ADDED
@@ -0,0 +1 @@
1
+ .gems
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- site_analyzer (0.3.12)
4
+ site_analyzer (0.3.16)
5
5
  addressable (~> 2.3)
6
6
  nokogiri (~> 1.6)
7
7
  robotstxt (~> 0.5)
@@ -26,7 +26,7 @@ GEM
26
26
  shellany (~> 0.0)
27
27
  thor (>= 0.18.1)
28
28
  guard-compat (1.2.1)
29
- guard-rspec (4.6.3)
29
+ guard-rspec (4.6.4)
30
30
  guard (~> 2.1)
31
31
  guard-compat (~> 1.1)
32
32
  rspec (>= 2.99.0, < 4.0)
@@ -81,4 +81,4 @@ DEPENDENCIES
81
81
  site_analyzer!
82
82
 
83
83
  BUNDLED WITH
84
- 1.10.5
84
+ 1.10.6
data/Guardfile CHANGED
@@ -47,9 +47,9 @@ guard :rspec, cmd: 'bundle exec rspec' do
47
47
 
48
48
  watch(rails.controllers) do |m|
49
49
  [
50
- rspec.spec.("routing/#{m[1]}_routing"),
51
- rspec.spec.("controllers/#{m[1]}_controller"),
52
- rspec.spec.("acceptance/#{m[1]}")
50
+ rspec.spec.call("routing/#{m[1]}_routing"),
51
+ rspec.spec.call("controllers/#{m[1]}_controller"),
52
+ rspec.spec.call("acceptance/#{m[1]}")
53
53
  ]
54
54
  end
55
55
 
@@ -59,8 +59,8 @@ guard :rspec, cmd: 'bundle exec rspec' do
59
59
  watch(rails.app_controller) { "#{rspec.spec_dir}/controllers" }
60
60
 
61
61
  # Capybara features specs
62
- watch(rails.view_dirs) { |m| rspec.spec.("features/#{m[1]}") }
63
- watch(rails.layouts) { |m| rspec.spec.("features/#{m[1]}") }
62
+ watch(rails.view_dirs) { |m| rspec.spec.call("features/#{m[1]}") }
63
+ watch(rails.layouts) { |m| rspec.spec.call("features/#{m[1]}") }
64
64
 
65
65
  # Turnip features and steps
66
66
  watch(%r{^spec/acceptance/(.+)\.feature$})
data/Rakefile CHANGED
@@ -3,4 +3,4 @@ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
@@ -1,6 +1,5 @@
1
1
  module OpenURI
2
-
3
- def OpenURI.redirectable?(uri1, uri2) # :nodoc:
2
+ def self.redirectable?(uri1, uri2) # :nodoc:
4
3
  # This test is intended to forbid a redirection from http://... to
5
4
  # file:///etc/passwd, file:///dev/zero, etc. CVE-2011-1521
6
5
  # https to http redirect is also forbidden intentionally.
@@ -8,6 +7,6 @@ module OpenURI
8
7
  # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
9
8
  # However this is ad hoc. It should be extensible/configurable.
10
9
  uri1.scheme.downcase == uri2.scheme.downcase ||
11
- (/\A(?:http|ftp|https)\z/i =~ uri1.scheme && /\A(?:http|ftp|https)\z/i =~ uri2.scheme)
10
+ (/\A(?:http|ftp|https)\z/i =~ uri1.scheme && /\A(?:http|ftp|https)\z/i =~ uri2.scheme)
12
11
  end
13
- end
12
+ end
@@ -1,5 +1,5 @@
1
+ # Get site page and provide data for future analyse
1
2
  module SiteAnalyzer
2
- # Get site page and provide data for future analyse
3
3
  require 'nokogiri'
4
4
  require 'addressable/uri'
5
5
  require 'timeout'
@@ -25,29 +25,25 @@ module SiteAnalyzer
25
25
  end
26
26
  # get all home (that on this site) url on page
27
27
  def home_a
28
- if @page_a_tags
29
- home_a = []
30
- @page_a_tags.uniq.each do |link|
31
- uri = URI(link[0].to_ascii) rescue nil #TODO: write additional logic for link to image
32
- if uri && @site_domain
33
- home_a << link[0] if uri.host == @site_domain
34
- end
35
- end
36
- home_a
28
+ return unless @page_a_tags
29
+ home_a = []
30
+ @page_a_tags.uniq.each do |link|
31
+ uri = URI(link[0].to_ascii) rescue nil # TODO: write additional logic for link to image
32
+ home_a << link[0] if uri.host == @site_domain if uri && @site_domain
37
33
  end
34
+ home_a
38
35
  end
39
36
  # get all remote link on page
40
37
  def remote_a
41
- if @page_a_tags
42
- remote_a = []
43
- @page_a_tags.uniq.each do |link|
44
- uri = URI(link[0].to_ascii)
45
- if uri && @site_domain
46
- remote_a << link[0] unless uri.host == @site_domain
47
- end
38
+ return unless @page_a_tags
39
+ remote_a = []
40
+ @page_a_tags.uniq.each do |link|
41
+ uri = URI(link[0].to_ascii)
42
+ if uri && @site_domain
43
+ remote_a << link[0] unless uri.host == @site_domain
48
44
  end
49
- remote_a
50
45
  end
46
+ remote_a
51
47
  end
52
48
 
53
49
  private
@@ -72,16 +68,14 @@ module SiteAnalyzer
72
68
  end
73
69
  # get page with open-uri, then parse it with Nokogiri. Get site domain and path from URI
74
70
  def get_page(url)
75
- begin
76
- timeout(30) do
77
- page = open(url)
78
- @site_domain = page.base_uri.host
79
- @page_path = page.base_uri.request_uri
80
- @page = Nokogiri::HTML(page)
81
- end
82
- rescue Timeout::Error, EOFError, OpenURI::HTTPError, Errno::ENOENT, TypeError
83
- return nil
71
+ timeout(30) do
72
+ page = open(url)
73
+ @site_domain = page.base_uri.host
74
+ @page_path = page.base_uri.request_uri
75
+ @page = Nokogiri::HTML(page)
84
76
  end
77
+ rescue Timeout::Error, EOFError, OpenURI::HTTPError, Errno::ENOENT, TypeError
78
+ return nil
85
79
  end
86
80
  # check that title is one and less then 70 symbols
87
81
  def title_good?
@@ -89,48 +83,44 @@ module SiteAnalyzer
89
83
  end
90
84
  # true if title and h1 have no duplicates
91
85
  def title_and_h1_good?
92
- if @page
93
- arr = []
94
- @page.css('h1').each { |node| arr << node.text }
95
- @page.css('title').size == 1 && arr.uniq.size == arr.size
96
- end
86
+ return unless @page
87
+ arr = []
88
+ @page.css('h1').each { |node| arr << node.text }
89
+ @page.css('title').size == 1 && arr.uniq.size == arr.size
97
90
  end
98
91
  # true if metadescription less then 200 symbols
99
92
  def metadescription_good?
100
- if @page
101
- tags = @page.css("meta[name='description']")
102
- return false if tags.size == 0
103
- tags.each do |t|
104
- unless t['value'].nil?
105
- return false if t['content'].size == 0 || t['content'].size > 200
106
- end
93
+ return unless @page
94
+ tags = @page.css("meta[name='description']")
95
+ return false if tags.size == 0
96
+ tags.each do |t|
97
+ unless t['value'].nil?
98
+ return false if t['content'].size == 0 || t['content'].size > 200
107
99
  end
108
- true
109
100
  end
101
+ true
110
102
  end
111
103
  # true if keywords less then 600 symbols
112
104
  def keywords_good?
113
- if @page
114
- tags = @page.css("meta[name='keywords']")
115
- return false if tags.size == 0
116
- tags.each do |t|
117
- unless t['value'].nil?
118
- return false if t['content'].size == 0 || t['content'].size > 600
119
- end
105
+ return unless @page
106
+ tags = @page.css("meta[name='keywords']")
107
+ return false if tags.size == 0
108
+ tags.each do |t|
109
+ unless t['value'].nil?
110
+ return false if t['content'].size == 0 || t['content'].size > 600
120
111
  end
121
- true
122
112
  end
113
+ true
123
114
  end
124
115
  # true if code of page less then text on it
125
116
  def code_less?
126
- if @page
127
- sum = 0
128
- page_text = @page.text.size
129
- @page.css('script').each do |tag|
130
- sum += tag.text.size
131
- end
132
- sum < page_text / 2
117
+ return unless @page
118
+ sum = 0
119
+ page_text = @page.text.size
120
+ @page.css('script').each do |tag|
121
+ sum += tag.text.size
133
122
  end
123
+ sum < page_text / 2
134
124
  end
135
125
  # collect meta tags for future report
136
126
  def collect_metadates
@@ -140,22 +130,20 @@ module SiteAnalyzer
140
130
  end
141
131
  # check meta and title tags duplicates
142
132
  def metadates_good?
143
- if @page
144
- return false if @all_titles.size > 1 || @meta_data.empty?
145
- node_names = []
146
- @meta_data.each { |node| node_names << node['name'] }
147
- node_names.compact!
148
- node_names.uniq.size == node_names.size unless node_names.nil? || node_names.size < 1
149
- end
133
+ return unless @page
134
+ return false if @all_titles.size > 1 || @meta_data.empty?
135
+ node_names = []
136
+ @meta_data.each { |node| node_names << node['name'] }
137
+ node_names.compact!
138
+ node_names.uniq.size == node_names.size unless node_names.nil? || node_names.size < 1
150
139
  end
151
140
  # return hash with all titles, h1 and h2
152
141
  def all_titles_h1_h2
153
- if @page
154
- out = []
155
- out << @page.css('title').text << { @page_url => @page.css('h1').text }
156
- out << { @page_url => @page.css('h2').text }
157
- out
158
- end
142
+ return unless @page
143
+ out = []
144
+ out << @page.css('title').text << { @page_url => @page.css('h1').text }
145
+ out << { @page_url => @page.css('h2').text }
146
+ out
159
147
  end
160
148
  # check if page have h2 tags
161
149
  def h2?
@@ -167,43 +155,39 @@ module SiteAnalyzer
167
155
  end
168
156
  # get all a tags
169
157
  def all_a_tags
170
- if @page
171
- tags = []
172
- @page.css('a').each do |node|
173
- tags << [node['href'], node['target'], node['rel']]
174
- end
175
- tags.compact
158
+ return unless @page
159
+ tags = []
160
+ @page.css('a').each do |node|
161
+ tags << [node['href'], node['target'], node['rel']]
176
162
  end
163
+ tags.compact
177
164
  end
178
165
  # return all page titles
179
166
  def titles
180
- if @page
181
- titles = []
182
- @page.css('title').each { |tag| titles << tag.text }
183
- titles
184
- end
167
+ return unless @page
168
+ titles = []
169
+ @page.css('title').each { |tag| titles << tag.text }
170
+ titles
185
171
  end
186
172
  # return all meta description content
187
173
  def all_meta_description_content
188
- if @page
189
- tags = []
190
- @page.css("meta[name='description']").each do |t|
191
- tags << t['content']
192
- end
193
- tags
174
+ return unless @page
175
+ tags = []
176
+ @page.css("meta[name='description']").each do |t|
177
+ tags << t['content']
194
178
  end
179
+ tags
195
180
  end
196
181
  # return all h2 tags text
197
182
  def h2
198
- if @page
199
- h2s = []
200
- @page.css('h2').each { |tag| h2s << tag.text }
201
- h2s
202
- end
183
+ return unless @page
184
+ h2s = []
185
+ @page.css('h2').each { |tag| h2s << tag.text }
186
+ h2s
203
187
  end
204
188
  # check url of page that is must be HLU
205
189
  def bad_url
206
- @page_url if @page_path.size > 1 unless @page_path =~ /^[\w.\-\/]+$/i
190
+ @page_url if @page_path.size > 1 unless @page_path =~ %r(/^[\w.\-\/]+$/i)
207
191
  end
208
192
  # clear page from don't needed information
209
193
  def clear!
@@ -97,7 +97,7 @@ module SiteAnalyzer
97
97
  def pages_size
98
98
  result = []
99
99
  @site.pages.each do |page|
100
- result << [page.page_url , page.page_text_size]
100
+ result << [page.page_url, page.page_text_size]
101
101
  end
102
102
  result
103
103
  end
@@ -144,10 +144,9 @@ module SiteAnalyzer
144
144
  counter = {}
145
145
  result = []
146
146
  in_array.compact.each do |url_desc_cont|
147
- if url_desc_cont[1][0]
148
- url_desc_cont[1][0].scan(/\w+/).each do |word|
149
- all_words << word
150
- end
147
+ next unless url_desc_cont[1][0]
148
+ url_desc_cont[1][0].scan(/\w+/).each do |word|
149
+ all_words << word
151
150
  end
152
151
  end
153
152
  all_words.each do |word|
@@ -29,13 +29,12 @@ module SiteAnalyzer
29
29
  add_pages_for_scan!
30
30
  while @pages_for_scan.size > 0
31
31
  page = convert_to_valid @pages_for_scan.pop
32
- if page
33
- @max_pages -= 1
34
- add_page convert_to_valid(page)
35
- return if @max_pages <= 0
36
- add_pages_for_scan!
37
- optimize_scan!
38
- end
32
+ next unless page
33
+ @max_pages -= 1
34
+ add_page convert_to_valid(page)
35
+ return if @max_pages <= 0
36
+ add_pages_for_scan!
37
+ optimize_scan!
39
38
  end
40
39
  end
41
40
  # add pages for scan array, also add bad pages to bad_pages array
@@ -44,10 +43,9 @@ module SiteAnalyzer
44
43
  @bad_pages = []
45
44
  @pages.each do |page|
46
45
  @bad_pages << page.page_url unless page.page_a_tags
47
- if page.page_a_tags
48
- page.home_a.each do |link|
49
- @pages_for_scan << link
50
- end
46
+ next unless page.page_a_tags
47
+ page.home_a.each do |link|
48
+ @pages_for_scan << link
51
49
  end
52
50
  end
53
51
  end
@@ -65,9 +63,7 @@ module SiteAnalyzer
65
63
  def all_titles
66
64
  result = []
67
65
  @pages.each do |page|
68
- if page.page_a_tags
69
- result << [page.page_url, page.all_titles]
70
- end
66
+ result << [page.page_url, page.all_titles] if page.page_a_tags
71
67
  end
72
68
  result
73
69
  end
@@ -75,9 +71,7 @@ module SiteAnalyzer
75
71
  def all_descriptions
76
72
  result = []
77
73
  @pages.each do |page|
78
- if page.page_a_tags
79
- result << [page.page_url, page.meta_desc_content]
80
- end
74
+ result << [page.page_url, page.meta_desc_content] if page.page_a_tags
81
75
  end
82
76
  result
83
77
  end
@@ -85,9 +79,7 @@ module SiteAnalyzer
85
79
  def all_h2
86
80
  result = []
87
81
  @pages.each do |page|
88
- unless page.page_a_tags
89
- result << [page.page_url, page.h2_text]
90
- end
82
+ result << [page.page_url, page.h2_text] unless page.page_a_tags
91
83
  end
92
84
  result
93
85
  end
@@ -95,13 +87,12 @@ module SiteAnalyzer
95
87
  def all_a
96
88
  result = []
97
89
  @pages.each do |page|
98
- if page.page_a_tags
99
- page.page_a_tags.compact.each do |tag|
100
- tag[0] = '-' unless tag[0]
101
- tag[1] = '-' unless tag[1]
102
- tag[2] = '-' unless tag[2]
103
- result << [page.page_url, tag[0], tag[1], tag[2]]
104
- end
90
+ next unless page.page_a_tags
91
+ page.page_a_tags.compact.each do |tag|
92
+ tag[0] = '-' unless tag[0]
93
+ tag[1] = '-' unless tag[1]
94
+ tag[2] = '-' unless tag[2]
95
+ result << [page.page_url, tag[0], tag[1], tag[2]]
105
96
  end
106
97
  end
107
98
  result.compact
@@ -118,7 +109,7 @@ module SiteAnalyzer
118
109
  def optimize_scan!
119
110
  @pages_for_scan = @pages_for_scan.compact.uniq
120
111
  @scanned_pages = @scanned_pages.compact.uniq
121
- @pages_for_scan = @pages_for_scan - @scanned_pages
112
+ @pages_for_scan -= @scanned_pages
122
113
  end
123
114
  # check url and try to convert it to valid, remove .jpg links, add scheme to url
124
115
  def convert_to_valid(url)
@@ -1,3 +1,3 @@
1
1
  module SiteAnalyzer
2
- VERSION = '0.3.16'
2
+ VERSION = '0.3.17'
3
3
  end
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ['Denis Savchuk']
10
10
  spec.email = ['mordorreal@gmail.com']
11
11
  spec.date = '2015-07-01'
12
- spec.summary = %q{Make report for SEO. Analyse site like SEOs like. }
13
- spec.description = %q{Create site report for SEO many options.}
12
+ spec.summary = 'Make report for SEO. Analyse site like SEOs like. '
13
+ spec.description = 'Create site report for SEO many options.'
14
14
  spec.homepage = 'https://github.com/Mordorreal/SiteAnalyzer'
15
15
  spec.license = 'MIT'
16
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency 'bundler', '~> 1.10'
22
22
  spec.add_development_dependency 'rake', '~> 10.4'
23
23
  spec.add_development_dependency 'rspec', '~> 3.3'
24
- spec.add_development_dependency 'guard-rspec', '~> 3.3'
24
+ spec.add_development_dependency 'guard-rspec', '~> 4.6'
25
25
  spec.add_runtime_dependency 'nokogiri', '~> 1.6'
26
26
  spec.add_runtime_dependency 'addressable', '~> 2.3'
27
27
  spec.add_runtime_dependency 'robotstxt', '~> 0.5'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.16
4
+ version: 0.3.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Denis Savchuk
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '3.3'
61
+ version: '4.6'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '3.3'
68
+ version: '4.6'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: nokogiri
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -144,6 +144,7 @@ extensions: []
144
144
  extra_rdoc_files: []
145
145
  files:
146
146
  - ".gitignore"
147
+ - ".rbenv-gemsets"
147
148
  - ".rspec"
148
149
  - ".travis.yml"
149
150
  - CODE_OF_CONDUCT.md