site_analyzer 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c6718b5591b49460b14466007203bb65c3471b7
4
- data.tar.gz: f5ca34a57a400647859935f706c576df74d85941
3
+ metadata.gz: d800ec488fdf5882e08d325ceb904af2b8559e0d
4
+ data.tar.gz: 7e963e3c8f71905e0e1dafadabd8d81c43bcb2a2
5
5
  SHA512:
6
- metadata.gz: d29b19234afe6f5f93e43f8283bce187298435fc5e46a6254ca32928f55efab8a29e49e2349cf817bd9d320c04ca4cad0681a3c0d9eef2e426ea36466e2a95ab
7
- data.tar.gz: bfce8893fc7918c6837be128a3f5efd21c269bb38be65c1be1316548aeab99cc5e5900d8c2c76ede67dc8e483668bca7446b413bc9995981bd2eed5d5a1af6d7
6
+ metadata.gz: 6bdb11a2a9c98b982c4f901e62e9a8000cdd4750f986a976414d8474065d695db8482f7ba8742237a28aa0d3d41e3e9fa233bb402695ad4b709c348675666fdb
7
+ data.tar.gz: d5c457238252264ae2722a1b1f361e8ab3f6be2210295542a347a468cbe16c807dd0a0d9a97264856b4bd347b3f4862494d2dc982ba4b50e4793853590ba981f
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- site_analyzer (0.3.8)
4
+ site_analyzer (0.3.9)
5
5
  addressable (~> 2.3)
6
6
  nokogiri (~> 1.6)
7
7
  robotstxt (~> 0.5)
@@ -7,7 +7,7 @@ module SiteAnalyzer
7
7
  attr_reader :page_url, :titles, :page
8
8
  def initialize(url)
9
9
  @page_url = url
10
- @page = get_page url
10
+ @page = get_page(url)
11
11
  @site_url = get_domain url
12
12
  @titles = all_titles
13
13
  end
@@ -17,142 +17,171 @@ module SiteAnalyzer
17
17
  end
18
18
 
19
19
  def get_page(url)
20
- timeout(30) { Nokogiri::HTML(open(url)) }
21
- rescue Timeout::Error
22
- ['Timeout exception']
20
+ begin
21
+ timeout(30) { Nokogiri::HTML(open(url)) }
22
+ rescue Timeout::Error, EOFError, OpenURI::HTTPError
23
+ return nil
24
+ end
23
25
  end
24
26
 
25
27
  def get_domain(url)
26
- timeout(30) { Addressable::URI.parse(url).host }
27
- rescue
28
- 'Error with parsing by Addressable'
28
+ timeout(30) { Addressable::URI.parse(url).host } rescue nil
29
29
  end
30
30
 
31
31
  def title_good?
32
- @page.css('title').size == 1 && @page.css('title').text.size < 70
32
+ @page.css('title').size == 1 && @page.css('title').text.size < 70 if @page
33
33
  end
34
34
  # true if title and h1 have no dublicates
35
35
  def title_and_h1_good?
36
- arr = []
37
- @page.css('h1').each { |node| arr << node.text }
38
- @page.css('title').size == 1 && arr.uniq.size == arr.size
36
+ if @page
37
+ arr = []
38
+ @page.css('h1').each { |node| arr << node.text }
39
+ @page.css('title').size == 1 && arr.uniq.size == arr.size
40
+ end
39
41
  end
40
42
  # true if metadescription less then 200 symbols
41
43
  def metadescription_good?
42
- tags = @page.css("meta[name='description']")
43
- return false if tags.size == 0
44
- tags.each do |t|
45
- unless t['value'].nil?
46
- return false if t['content'].size == 0 || t['content'].size > 200
44
+ if @page
45
+ tags = @page.css("meta[name='description']")
46
+ return false if tags.size == 0
47
+ tags.each do |t|
48
+ unless t['value'].nil?
49
+ return false if t['content'].size == 0 || t['content'].size > 200
50
+ end
47
51
  end
52
+ true
48
53
  end
49
- true
50
54
  end
51
55
  # true if keywords less then 600 symbols
52
56
  def keywords_good?
53
- tags = @page.css("meta[name='keywords']")
54
- return false if tags.size == 0
55
- tags.each do |t|
56
- unless t['value'].nil?
57
- return false if t['content'].size == 0 || t['content'].size > 600
57
+ if @page
58
+ tags = @page.css("meta[name='keywords']")
59
+ return false if tags.size == 0
60
+ tags.each do |t|
61
+ unless t['value'].nil?
62
+ return false if t['content'].size == 0 || t['content'].size > 600
63
+ end
58
64
  end
65
+ true
59
66
  end
60
- true
61
67
  end
62
68
  # true if code less then text
63
69
  def code_less?
64
- sum = 0
65
- page_text = @page.text.size
66
- @page.css('script').each do |tag|
67
- sum += tag.text.size
70
+ if @page
71
+ sum = 0
72
+ page_text = @page.text.size
73
+ @page.css('script').each do |tag|
74
+ sum += tag.text.size
75
+ end
76
+ sum < page_text / 2
68
77
  end
69
- sum < page_text / 2
70
78
  end
71
79
 
72
80
  def collect_metadates
73
- @page.css('meta')
81
+ @page.css('meta') if @page
74
82
  end
75
83
 
76
84
  def metadates_good?
77
- meta_tags = collect_metadates
78
- return false if @page.css('title').size > 1 || meta_tags.nil?
79
- node_names = []
80
- meta_tags.each { |node| node_names << node['name'] }
81
- return false if node_names.compact!.size < 1
82
- node_names.uniq.size == node_names.size
85
+ if @page
86
+ meta_tags = collect_metadates
87
+ return false if @page.css('title').size > 1 || meta_tags.nil?
88
+ node_names = []
89
+ meta_tags.each { |node| node_names << node['name'] }
90
+ return false if node_names.compact!.size < 1
91
+ node_names.uniq.size == node_names.size
92
+ end
83
93
  end
84
94
  # return hash with all titles, h1 and h2
85
95
  def all_titles_h1_h2
86
- out = []
87
- out << @page.css('title').text << { @page_url => @page.css('h1').text }
88
- out << { @page_url => @page.css('h2').text }
96
+ if @page
97
+ out = []
98
+ out << @page.css('title').text << { @page_url => @page.css('h1').text }
99
+ out << { @page_url => @page.css('h2').text }
100
+ end
89
101
  end
90
102
 
91
103
  def home_a
92
- home_a = []
93
- all_a_tags_href.each do |link|
94
- home_a << link if link.include? @site_url
104
+ if @page
105
+ home_a = []
106
+ all_a_tags_href.each do |link|
107
+ home_a << link if link.include? @site_url
108
+ end
109
+ home_a
95
110
  end
96
- home_a
97
111
  end
98
112
 
99
113
  def remote_a
100
- remote_a = []
101
- all_a_tags_href.uniq.each do |link|
102
- remote_a << link unless link.include? @site_url
114
+ if @page
115
+ remote_a = []
116
+ all_a_tags_href.uniq.each do |link|
117
+ remote_a << link unless link.include? @site_url
118
+ end
119
+ remote_a
103
120
  end
104
- remote_a
105
121
  end
106
122
 
107
123
  def all_a_tags_href
108
- tags = []
109
- @page.css('a').each do |node|
110
- tags << node['href']
124
+ if @page
125
+ tags = []
126
+ @page.css('a').each do |node|
127
+ tags << node['href']
128
+ end
129
+ tags.compact
111
130
  end
112
- tags.compact
113
131
  end
114
132
 
115
133
  def wrong_a
116
- wrong_a = []
117
- all_a_tags_href.each do |link|
118
- wrong_a << link if link.include? '?meta='
134
+ if @page
135
+ wrong_a = []
136
+ all_a_tags_href.each do |link|
137
+ wrong_a << link if link.include? '?meta='
138
+ end
139
+ wrong_a
119
140
  end
120
- wrong_a
121
141
  end
122
142
 
123
143
  def h2?
124
- @page.css('h2').size > 0
144
+ @page.css('h2').size > 0 if @page
125
145
  end
126
146
 
127
147
  def page_text_size
128
- @page.text.size
148
+ @page.text.size if @page
129
149
  end
130
150
 
131
151
  def all_a_tags
132
- tags = []
133
- @page.css('a').each do |node|
134
- tags << [node['href'], node['target'], node['rel']]
152
+ if @page
153
+ tags = []
154
+ @page.css('a').each do |node|
155
+ tags << [node['href'], node['target'], node['rel']]
156
+ end
157
+ tags.compact
135
158
  end
136
- tags.compact
137
159
  end
138
160
 
139
161
  def all_titles
140
- titles = []
141
- @page.css('title').each { |tag| titles << tag.text }
142
- titles
162
+ if @page
163
+ titles = []
164
+ @page.css('title').each { |tag| titles << tag.text }
165
+ titles
166
+ end
143
167
  end
144
168
 
145
169
  def all_meta_description_content
146
- tags = []
147
- @page.css("meta[name='description']").each do |t|
148
- tags << t['content']
170
+ if @page
171
+ tags = []
172
+ @page.css("meta[name='description']").each do |t|
173
+ tags << t['content']
174
+ end
175
+ tags
149
176
  end
150
- tags
151
177
  end
178
+
152
179
  def h2
153
- h2s = []
154
- @page.css('h2').each { |tag| h2s << tag.text }
155
- h2s
180
+ if @page
181
+ h2s = []
182
+ @page.css('h2').each { |tag| h2s << tag.text }
183
+ h2s
184
+ end
156
185
  end
157
186
  end
158
187
  end
@@ -36,9 +36,13 @@ module SiteAnalyzer
36
36
 
37
37
  def add_pages_for_scan!
38
38
  @pages_for_scan = []
39
+ @bad_pages = []
39
40
  @pages.each do |page|
40
- page.home_a.each do |link|
41
- @pages_for_scan << link unless link.nil? || @scanned_pages.include?(link) || link.include?('mailto:') || link.end_with?('.jpg')
41
+ @bad_pages << page.page_url unless page.page
42
+ if page.page
43
+ page.home_a.each do |link|
44
+ @pages_for_scan << link unless link.nil? || @scanned_pages.include?(link) || link.start_with?('mailto:') || link.start_with?('skype:') || link.end_with?('.jpg')
45
+ end
42
46
  end
43
47
  end
44
48
  @pages_for_scan.clear if @pages_for_scan.size == 0
@@ -58,7 +62,9 @@ module SiteAnalyzer
58
62
  def all_titles
59
63
  result = []
60
64
  @pages.each do |page|
61
- result << [page.page_url, page.titles]
65
+ if page.page
66
+ result << [page.page_url, page.titles]
67
+ end
62
68
  end
63
69
  result
64
70
  end
@@ -66,7 +72,9 @@ module SiteAnalyzer
66
72
  def all_descriptions
67
73
  result = []
68
74
  @pages.each do |page|
69
- result << [page.page_url, page.all_meta_description_content]
75
+ if page.page
76
+ result << [page.page_url, page.all_meta_description_content]
77
+ end
70
78
  end
71
79
  result
72
80
  end
@@ -74,7 +82,9 @@ module SiteAnalyzer
74
82
  def all_h2
75
83
  result = []
76
84
  @pages.each do |page|
77
- result << [page.page_url, page.h2]
85
+ if page.page
86
+ result << [page.page_url, page.h2]
87
+ end
78
88
  end
79
89
  result
80
90
  end
@@ -82,11 +92,13 @@ module SiteAnalyzer
82
92
  def all_a
83
93
  result = []
84
94
  @pages.each do |page|
85
- page.all_a_tags.compact.each do |tag|
86
- tag[0] = '-' unless tag[0]
87
- tag[1] = '-' unless tag[1]
88
- tag[2] = '-' unless tag[2]
89
- result << [page.page_url, tag[0], tag[1], tag[2]]
95
+ if page.page
96
+ page.all_a_tags.compact.each do |tag|
97
+ tag[0] = '-' unless tag[0]
98
+ tag[1] = '-' unless tag[1]
99
+ tag[2] = '-' unless tag[2]
100
+ result << [page.page_url, tag[0], tag[1], tag[2]]
101
+ end
90
102
  end
91
103
  end
92
104
  result.compact
@@ -1,3 +1,3 @@
1
1
  module SiteAnalyzer
2
- VERSION = "0.3.9"
2
+ VERSION = "0.3.10"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9
4
+ version: 0.3.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Denis Savchuk