site_analyzer 0.3.11 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c255cfcd2adaa25b9b6950f7a056a284a74691e
4
- data.tar.gz: c955e17a59b20e2dfeada49f53107f0c99e98c59
3
+ metadata.gz: fe1d0ebb0469f94188c31f5111d4493337eb0c01
4
+ data.tar.gz: 0f4e260c964d757cfe17ba1f0a24c6ced3ccc004
5
5
  SHA512:
6
- metadata.gz: 11aaaf9260357f86ff6ab1e00ee82004d91f9d97ee44cd62d15e0250fd96b13506d963afee6a262ec6b20782d330e9fb94ca4b6cd37410ed4116a3fe6e8f13fb
7
- data.tar.gz: 171266f6d54af493231cc560ef150ed36cc47545e2e8a4389b35dcc2f1dad83dfed31e59799c3f0ed10735823d95ef3c03142b2d401293fcd59a15e6ea2962fc
6
+ metadata.gz: 0cf6c7e6ffa7819b39bb2bd9acab7e461056206f4d7de38d45a719287c67e361f0d92471c2209aea308b10d8b795e8fe68aa91a45e375535122c753f29876497
7
+ data.tar.gz: 705126a665745b90106c274cea37d2c550ca2b8de2b4d941d1d62a4a89800f3618059dfaa35596ee7fd2f040b6b9dfe89ca62cd672b847c2489522b808987f3a
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- site_analyzer (0.3.10)
4
+ site_analyzer (0.3.11)
5
5
  addressable (~> 2.3)
6
6
  nokogiri (~> 1.6)
7
7
  robotstxt (~> 0.5)
@@ -103,8 +103,11 @@ module SiteAnalyzer
103
103
  def home_a
104
104
  if @page
105
105
  home_a = []
106
- all_a_tags_href.each do |link|
107
- home_a << link if get_domain(link) && get_domain(link).include?(@site_url)
106
+ all_a_tags_href.uniq.each do |link|
107
+ if get_domain(link) && @site_url
108
+ domain = get_domain(link)
109
+ home_a << link if domain == @site_url
110
+ end
108
111
  end
109
112
  home_a
110
113
  end
@@ -114,7 +117,10 @@ module SiteAnalyzer
114
117
  if @page
115
118
  remote_a = []
116
119
  all_a_tags_href.uniq.each do |link|
117
- remote_a << link unless get_domain(link) && get_domain(link).include?(@site_url)
120
+ if get_domain(link) && @site_url
121
+ domain = get_domain(link)
122
+ remote_a << link unless domain == @site_url
123
+ end
118
124
  end
119
125
  remote_a
120
126
  end
@@ -31,6 +31,8 @@ module SiteAnalyzer
31
31
  add_page @pages_for_scan.pop
32
32
  return if @max_pages <= 0
33
33
  add_pages_for_scan!
34
+ optimize_scan!
35
+ return if @pages_for_scan.size == 0
34
36
  end
35
37
  end
36
38
 
@@ -41,11 +43,10 @@ module SiteAnalyzer
41
43
  @bad_pages << page.page_url unless page.page
42
44
  if page.page
43
45
  page.home_a.each do |link|
44
- @pages_for_scan << link unless link.nil? || @scanned_pages.include?(link) || link.start_with?('mailto:') || link.start_with?('skype:') || link.end_with?('.jpg')
46
+ @pages_for_scan << link unless link.nil? || link.start_with?('mailto:') || link.start_with?('skype:') || link.end_with?('.jpg')
45
47
  end
46
48
  end
47
49
  end
48
- @pages_for_scan.clear if @pages_for_scan.size == 0
49
50
  end
50
51
 
51
52
  def add_page(url)
@@ -103,5 +104,11 @@ module SiteAnalyzer
103
104
  end
104
105
  result.compact
105
106
  end
107
+
108
+ def optimize_scan!
109
+ @pages_for_scan.uniq.compact!
110
+ @scanned_pages.uniq.compact!
111
+ @pages_for_scan = @pages_for_scan - @scanned_pages
112
+ end
106
113
  end
107
114
  end
@@ -1,3 +1,3 @@
1
1
  module SiteAnalyzer
2
- VERSION = "0.3.11"
2
+ VERSION = "0.3.12"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_analyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Denis Savchuk