site_analyzer 0.3.16 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c0e7966cf9a63a7dae34d25472924438852f4275
4
- data.tar.gz: f96f6d1781e13fe7e98da60349d2db4ad1df423e
3
+ metadata.gz: cc00445497ff1b19d012ade1eef41f9ec4ddad99
4
+ data.tar.gz: 21a5e0ecfe14c07069ff8fb35a2d56d60d3062ea
5
5
  SHA512:
6
- metadata.gz: 31cc2b27a8773526df07f5f50fa39b5af970db45282df38e5af34f32035970a9c5a99541dc70a19085bdae8972ee640f68e096da08a710293eab5243ea15dfe2
7
- data.tar.gz: 3fe3b59534526a07207459747013762bc137a1771cbf1e271dbfa61ac4cf536bf631c790e2ab13e8b362e06eee6225269b939395702c9157c34b0e78a3420412
6
+ metadata.gz: 13edaa969a406d7eb64c5965b037caefad6361bf3704e4e288e7b595f21bf1cd3743f95c407fe6d993b0054714cbcbe8164322b440f7f9752f438353dce1e27a
7
+ data.tar.gz: 9ce13e82b5bb231cd4d84dd14e14a3aee42dd2b3675b64eba53a4f933e230951e980341d8fa766849fc88c3d1e9d51a320a47e29b2390f69463829b3708d6557
data/.gitignore CHANGED
@@ -1,4 +1,7 @@
1
- /.idea/
1
+ .idea/*
2
2
  SiteAnalyzer.iml
3
3
  /log
4
- *.gem
4
+ *.gem
5
+ .gems
6
+ log
7
+ tmp
data/.rbenv-gemsets ADDED
@@ -0,0 +1 @@
1
+ .gems
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- site_analyzer (0.3.12)
4
+ site_analyzer (0.3.16)
5
5
  addressable (~> 2.3)
6
6
  nokogiri (~> 1.6)
7
7
  robotstxt (~> 0.5)
@@ -26,7 +26,7 @@ GEM
26
26
  shellany (~> 0.0)
27
27
  thor (>= 0.18.1)
28
28
  guard-compat (1.2.1)
29
- guard-rspec (4.6.3)
29
+ guard-rspec (4.6.4)
30
30
  guard (~> 2.1)
31
31
  guard-compat (~> 1.1)
32
32
  rspec (>= 2.99.0, < 4.0)
@@ -81,4 +81,4 @@ DEPENDENCIES
81
81
  site_analyzer!
82
82
 
83
83
  BUNDLED WITH
84
- 1.10.5
84
+ 1.10.6
data/Guardfile CHANGED
@@ -47,9 +47,9 @@ guard :rspec, cmd: 'bundle exec rspec' do
47
47
 
48
48
  watch(rails.controllers) do |m|
49
49
  [
50
- rspec.spec.("routing/#{m[1]}_routing"),
51
- rspec.spec.("controllers/#{m[1]}_controller"),
52
- rspec.spec.("acceptance/#{m[1]}")
50
+ rspec.spec.call("routing/#{m[1]}_routing"),
51
+ rspec.spec.call("controllers/#{m[1]}_controller"),
52
+ rspec.spec.call("acceptance/#{m[1]}")
53
53
  ]
54
54
  end
55
55
 
@@ -59,8 +59,8 @@ guard :rspec, cmd: 'bundle exec rspec' do
59
59
  watch(rails.app_controller) { "#{rspec.spec_dir}/controllers" }
60
60
 
61
61
  # Capybara features specs
62
- watch(rails.view_dirs) { |m| rspec.spec.("features/#{m[1]}") }
63
- watch(rails.layouts) { |m| rspec.spec.("features/#{m[1]}") }
62
+ watch(rails.view_dirs) { |m| rspec.spec.call("features/#{m[1]}") }
63
+ watch(rails.layouts) { |m| rspec.spec.call("features/#{m[1]}") }
64
64
 
65
65
  # Turnip features and steps
66
66
  watch(%r{^spec/acceptance/(.+)\.feature$})
data/Rakefile CHANGED
@@ -3,4 +3,4 @@ require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
@@ -1,6 +1,5 @@
1
1
  module OpenURI
2
-
3
- def OpenURI.redirectable?(uri1, uri2) # :nodoc:
2
+ def self.redirectable?(uri1, uri2) # :nodoc:
4
3
  # This test is intended to forbid a redirection from http://... to
5
4
  # file:///etc/passwd, file:///dev/zero, etc. CVE-2011-1521
6
5
  # https to http redirect is also forbidden intentionally.
@@ -8,6 +7,6 @@ module OpenURI
8
7
  # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
9
8
  # However this is ad hoc. It should be extensible/configurable.
10
9
  uri1.scheme.downcase == uri2.scheme.downcase ||
11
- (/\A(?:http|ftp|https)\z/i =~ uri1.scheme && /\A(?:http|ftp|https)\z/i =~ uri2.scheme)
10
+ (/\A(?:http|ftp|https)\z/i =~ uri1.scheme && /\A(?:http|ftp|https)\z/i =~ uri2.scheme)
12
11
  end
13
- end
12
+ end
@@ -1,5 +1,5 @@
1
+ # Get site page and provide data for future analyse
1
2
  module SiteAnalyzer
2
- # Get site page and provide data for future analyse
3
3
  require 'nokogiri'
4
4
  require 'addressable/uri'
5
5
  require 'timeout'
@@ -25,29 +25,25 @@ module SiteAnalyzer
25
25
  end
26
26
  # get all home (that on this site) url on page
27
27
  def home_a
28
- if @page_a_tags
29
- home_a = []
30
- @page_a_tags.uniq.each do |link|
31
- uri = URI(link[0].to_ascii) rescue nil #TODO: write additional logic for link to image
32
- if uri && @site_domain
33
- home_a << link[0] if uri.host == @site_domain
34
- end
35
- end
36
- home_a
28
+ return unless @page_a_tags
29
+ home_a = []
30
+ @page_a_tags.uniq.each do |link|
31
+ uri = URI(link[0].to_ascii) rescue nil # TODO: write additional logic for link to image
32
+ home_a << link[0] if uri.host == @site_domain if uri && @site_domain
37
33
  end
34
+ home_a
38
35
  end
39
36
  # get all remote link on page
40
37
  def remote_a
41
- if @page_a_tags
42
- remote_a = []
43
- @page_a_tags.uniq.each do |link|
44
- uri = URI(link[0].to_ascii)
45
- if uri && @site_domain
46
- remote_a << link[0] unless uri.host == @site_domain
47
- end
38
+ return unless @page_a_tags
39
+ remote_a = []
40
+ @page_a_tags.uniq.each do |link|
41
+ uri = URI(link[0].to_ascii)
42
+ if uri && @site_domain
43
+ remote_a << link[0] unless uri.host == @site_domain
48
44
  end
49
- remote_a
50
45
  end
46
+ remote_a
51
47
  end
52
48
 
53
49
  private
@@ -72,16 +68,14 @@ module SiteAnalyzer
72
68
  end
73
69
  # get page with open-uri, then parse it with Nokogiri. Get site domain and path from URI
74
70
  def get_page(url)
75
- begin
76
- timeout(30) do
77
- page = open(url)
78
- @site_domain = page.base_uri.host
79
- @page_path = page.base_uri.request_uri
80
- @page = Nokogiri::HTML(page)
81
- end
82
- rescue Timeout::Error, EOFError, OpenURI::HTTPError, Errno::ENOENT, TypeError
83
- return nil
71
+ timeout(30) do
72
+ page = open(url)
73
+ @site_domain = page.base_uri.host
74
+ @page_path = page.base_uri.request_uri
75
+ @page = Nokogiri::HTML(page)
84
76
  end
77
+ rescue Timeout::Error, EOFError, OpenURI::HTTPError, Errno::ENOENT, TypeError
78
+ return nil
85
79
  end
86
80
  # check that title is one and less then 70 symbols
87
81
  def title_good?
@@ -89,48 +83,44 @@ module SiteAnalyzer
89
83
  end
90
84
  # true if title and h1 have no duplicates
91
85
  def title_and_h1_good?
92
- if @page
93
- arr = []
94
- @page.css('h1').each { |node| arr << node.text }
95
- @page.css('title').size == 1 && arr.uniq.size == arr.size
96
- end
86
+ return unless @page
87
+ arr = []
88
+ @page.css('h1').each { |node| arr << node.text }
89
+ @page.css('title').size == 1 && arr.uniq.size == arr.size
97
90
  end
98
91
  # true if metadescription less then 200 symbols
99
92
  def metadescription_good?
100
- if @page
101
- tags = @page.css("meta[name='description']")
102
- return false if tags.size == 0
103
- tags.each do |t|
104
- unless t['value'].nil?
105
- return false if t['content'].size == 0 || t['content'].size > 200
106
- end
93
+ return unless @page
94
+ tags = @page.css("meta[name='description']")
95
+ return false if tags.size == 0
96
+ tags.each do |t|
97
+ unless t['value'].nil?
98
+ return false if t['content'].size == 0 || t['content'].size > 200
107
99
  end
108
- true
109
100
  end
101
+ true
110
102
  end
111
103
  # true if keywords less then 600 symbols
112
104
  def keywords_good?
113
- if @page
114
- tags = @page.css("meta[name='keywords']")
115
- return false if tags.size == 0
116
- tags.each do |t|
117
- unless t['value'].nil?
118
- return false if t['content'].size == 0 || t['content'].size > 600
119
- end
105
+ return unless @page
106
+ tags = @page.css("meta[name='keywords']")
107
+ return false if tags.size == 0
108
+ tags.each do |t|
109
+ unless t['value'].nil?
110
+ return false if t['content'].size == 0 || t['content'].size > 600
120
111
  end
121
- true
122
112
  end
113
+ true
123
114
  end
124
115
  # true if code of page less then text on it
125
116
  def code_less?
126
- if @page
127
- sum = 0
128
- page_text = @page.text.size
129
- @page.css('script').each do |tag|
130
- sum += tag.text.size
131
- end
132
- sum < page_text / 2
117
+ return unless @page
118
+ sum = 0
119
+ page_text = @page.text.size
120
+ @page.css('script').each do |tag|
121
+ sum += tag.text.size
133
122
  end
123
+ sum < page_text / 2
134
124
  end
135
125
  # collect meta tags for future report
136
126
  def collect_metadates
@@ -140,22 +130,20 @@ module SiteAnalyzer
140
130
  end
141
131
  # check meta and title tags duplicates
142
132
  def metadates_good?
143
- if @page
144
- return false if @all_titles.size > 1 || @meta_data.empty?
145
- node_names = []
146
- @meta_data.each { |node| node_names << node['name'] }
147
- node_names.compact!
148
- node_names.uniq.size == node_names.size unless node_names.nil? || node_names.size < 1
149
- end
133
+ return unless @page
134
+ return false if @all_titles.size > 1 || @meta_data.empty?
135
+ node_names = []
136
+ @meta_data.each { |node| node_names << node['name'] }
137
+ node_names.compact!
138
+ node_names.uniq.size == node_names.size unless node_names.nil? || node_names.size < 1
150
139
  end
151
140
  # return hash with all titles, h1 and h2
152
141
  def all_titles_h1_h2
153
- if @page
154
- out = []
155
- out << @page.css('title').text << { @page_url => @page.css('h1').text }
156
- out << { @page_url => @page.css('h2').text }
157
- out
158
- end
142
+ return unless @page
143
+ out = []
144
+ out << @page.css('title').text << { @page_url => @page.css('h1').text }
145
+ out << { @page_url => @page.css('h2').text }
146
+ out
159
147
  end
160
148
  # check if page have h2 tags
161
149
  def h2?
@@ -167,43 +155,39 @@ module SiteAnalyzer
167
155
  end
168
156
  # get all a tags
169
157
  def all_a_tags
170
- if @page
171
- tags = []
172
- @page.css('a').each do |node|
173
- tags << [node['href'], node['target'], node['rel']]
174
- end
175
- tags.compact
158
+ return unless @page
159
+ tags = []
160
+ @page.css('a').each do |node|
161
+ tags << [node['href'], node['target'], node['rel']]
176
162
  end
163
+ tags.compact
177
164
  end
178
165
  # return all page titles
179
166
  def titles
180
- if @page
181
- titles = []
182
- @page.css('title').each { |tag| titles << tag.text }
183
- titles
184
- end
167
+ return unless @page
168
+ titles = []
169
+ @page.css('title').each { |tag| titles << tag.text }
170
+ titles
185
171
  end
186
172
  # return all meta description content
187
173
  def all_meta_description_content
188
- if @page
189
- tags = []
190
- @page.css("meta[name='description']").each do |t|
191
- tags << t['content']
192
- end
193
- tags
174
+ return unless @page
175
+ tags = []
176
+ @page.css("meta[name='description']").each do |t|
177
+ tags << t['content']
194
178
  end
179
+ tags
195
180
  end
196
181
  # return all h2 tags text
197
182
  def h2
198
- if @page
199
- h2s = []
200
- @page.css('h2').each { |tag| h2s << tag.text }
201
- h2s
202
- end
183
+ return unless @page
184
+ h2s = []
185
+ @page.css('h2').each { |tag| h2s << tag.text }
186
+ h2s
203
187
  end
204
188
  # check url of page that is must be HLU
205
189
  def bad_url
206
- @page_url if @page_path.size > 1 unless @page_path =~ /^[\w.\-\/]+$/i
190
+ @page_url if @page_path.size > 1 unless @page_path =~ %r(/^[\w.\-\/]+$/i)
207
191
  end
208
192
  # clear page from don't needed information
209
193
  def clear!
@@ -97,7 +97,7 @@ module SiteAnalyzer
97
97
  def pages_size
98
98
  result = []
99
99
  @site.pages.each do |page|
100
- result << [page.page_url , page.page_text_size]
100
+ result << [page.page_url, page.page_text_size]
101
101
  end
102
102
  result
103
103
  end
@@ -144,10 +144,9 @@ module SiteAnalyzer
144
144
  counter = {}
145
145
  result = []
146
146
  in_array.compact.each do |url_desc_cont|
147
- if url_desc_cont[1][0]
148
- url_desc_cont[1][0].scan(/\w+/).each do |word|
149
- all_words << word
150
- end
147
+ next unless url_desc_cont[1][0]
148
+ url_desc_cont[1][0].scan(/\w+/).each do |word|
149
+ all_words << word
151
150
  end
152
151
  end
153
152
  all_words.each do |word|
@@ -29,13 +29,12 @@ module SiteAnalyzer
29
29
  add_pages_for_scan!
30
30
  while @pages_for_scan.size > 0
31
31
  page = convert_to_valid @pages_for_scan.pop
32
- if page
33
- @max_pages -= 1
34
- add_page convert_to_valid(page)
35
- return if @max_pages <= 0
36
- add_pages_for_scan!
37
- optimize_scan!
38
- end
32
+ next unless page
33
+ @max_pages -= 1
34
+ add_page convert_to_valid(page)
35
+ return if @max_pages <= 0
36
+ add_pages_for_scan!
37
+ optimize_scan!
39
38
  end
40
39
  end
41
40
  # add pages for scan array, also add bad pages to bad_pages array
@@ -44,10 +43,9 @@ module SiteAnalyzer
44
43
  @bad_pages = []
45
44
  @pages.each do |page|
46
45
  @bad_pages << page.page_url unless page.page_a_tags
47
- if page.page_a_tags
48
- page.home_a.each do |link|
49
- @pages_for_scan << link
50
- end
46
+ next unless page.page_a_tags
47
+ page.home_a.each do |link|
48
+ @pages_for_scan << link
51
49
  end
52
50
  end
53
51
  end
@@ -65,9 +63,7 @@ module SiteAnalyzer
65
63
  def all_titles
66
64
  result = []
67
65
  @pages.each do |page|
68
- if page.page_a_tags
69
- result << [page.page_url, page.all_titles]
70
- end
66
+ result << [page.page_url, page.all_titles] if page.page_a_tags
71
67
  end
72
68
  result
73
69
  end
@@ -75,9 +71,7 @@ module SiteAnalyzer
75
71
  def all_descriptions
76
72
  result = []
77
73
  @pages.each do |page|
78
- if page.page_a_tags
79
- result << [page.page_url, page.meta_desc_content]
80
- end
74
+ result << [page.page_url, page.meta_desc_content] if page.page_a_tags
81
75
  end
82
76
  result
83
77
  end
@@ -85,9 +79,7 @@ module SiteAnalyzer
85
79
  def all_h2
86
80
  result = []
87
81
  @pages.each do |page|
88
- unless page.page_a_tags
89
- result << [page.page_url, page.h2_text]
90
- end
82
+ result << [page.page_url, page.h2_text] unless page.page_a_tags
91
83
  end
92
84
  result
93
85
  end
@@ -95,13 +87,12 @@ module SiteAnalyzer
95
87
  def all_a
96
88
  result = []
97
89
  @pages.each do |page|
98
- if page.page_a_tags
99
- page.page_a_tags.compact.each do |tag|
100
- tag[0] = '-' unless tag[0]
101
- tag[1] = '-' unless tag[1]
102
- tag[2] = '-' unless tag[2]
103
- result << [page.page_url, tag[0], tag[1], tag[2]]
104
- end
90
+ next unless page.page_a_tags
91
+ page.page_a_tags.compact.each do |tag|
92
+ tag[0] = '-' unless tag[0]
93
+ tag[1] = '-' unless tag[1]
94
+ tag[2] = '-' unless tag[2]
95
+ result << [page.page_url, tag[0], tag[1], tag[2]]
105
96
  end
106
97
  end
107
98
  result.compact
@@ -118,7 +109,7 @@ module SiteAnalyzer
118
109
  def optimize_scan!
119
110
  @pages_for_scan = @pages_for_scan.compact.uniq
120
111
  @scanned_pages = @scanned_pages.compact.uniq
121
- @pages_for_scan = @pages_for_scan - @scanned_pages
112
+ @pages_for_scan -= @scanned_pages
122
113
  end
123
114
  # check url and try to convert it to valid, remove .jpg links, add scheme to url
124
115
  def convert_to_valid(url)
@@ -1,3 +1,3 @@
1
1
  module SiteAnalyzer
2
- VERSION = '0.3.16'
2
+ VERSION = '0.3.17'
3
3
  end
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ['Denis Savchuk']
10
10
  spec.email = ['mordorreal@gmail.com']
11
11
  spec.date = '2015-07-01'
12
- spec.summary = %q{Make report for SEO. Analyse site like SEOs like. }
13
- spec.description = %q{Create site report for SEO many options.}
12
+ spec.summary = 'Make report for SEO. Analyse site like SEOs like. '
13
+ spec.description = 'Create site report for SEO many options.'
14
14
  spec.homepage = 'https://github.com/Mordorreal/SiteAnalyzer'
15
15
  spec.license = 'MIT'
16
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency 'bundler', '~> 1.10'
22
22
  spec.add_development_dependency 'rake', '~> 10.4'
23
23
  spec.add_development_dependency 'rspec', '~> 3.3'
24
- spec.add_development_dependency 'guard-rspec', '~> 3.3'
24
+ spec.add_development_dependency 'guard-rspec', '~> 4.6'
25
25
  spec.add_runtime_dependency 'nokogiri', '~> 1.6'
26
26
  spec.add_runtime_dependency 'addressable', '~> 2.3'
27
27
  spec.add_runtime_dependency 'robotstxt', '~> 0.5'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.16
4
+ version: 0.3.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Denis Savchuk
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '3.3'
61
+ version: '4.6'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '3.3'
68
+ version: '4.6'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: nokogiri
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -144,6 +144,7 @@ extensions: []
144
144
  extra_rdoc_files: []
145
145
  files:
146
146
  - ".gitignore"
147
+ - ".rbenv-gemsets"
147
148
  - ".rspec"
148
149
  - ".travis.yml"
149
150
  - CODE_OF_CONDUCT.md