site_analyzer 0.3.9 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Gemfile.lock +1 -1
 - data/lib/site_analyzer/page.rb +99 -70
 - data/lib/site_analyzer/site.rb +22 -10
 - data/lib/site_analyzer/version.rb +1 -1
 - metadata +1 -1
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: d800ec488fdf5882e08d325ceb904af2b8559e0d
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 7e963e3c8f71905e0e1dafadabd8d81c43bcb2a2
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 6bdb11a2a9c98b982c4f901e62e9a8000cdd4750f986a976414d8474065d695db8482f7ba8742237a28aa0d3d41e3e9fa233bb402695ad4b709c348675666fdb
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: d5c457238252264ae2722a1b1f361e8ab3f6be2210295542a347a468cbe16c807dd0a0d9a97264856b4bd347b3f4862494d2dc982ba4b50e4793853590ba981f
         
     | 
    
        data/Gemfile.lock
    CHANGED
    
    
    
        data/lib/site_analyzer/page.rb
    CHANGED
    
    | 
         @@ -7,7 +7,7 @@ module SiteAnalyzer 
     | 
|
| 
       7 
7 
     | 
    
         
             
                attr_reader :page_url, :titles, :page
         
     | 
| 
       8 
8 
     | 
    
         
             
                def initialize(url)
         
     | 
| 
       9 
9 
     | 
    
         
             
                  @page_url = url
         
     | 
| 
       10 
     | 
    
         
            -
                  @page = get_page 
     | 
| 
      
 10 
     | 
    
         
            +
                  @page = get_page(url)
         
     | 
| 
       11 
11 
     | 
    
         
             
                  @site_url = get_domain url
         
     | 
| 
       12 
12 
     | 
    
         
             
                  @titles = all_titles
         
     | 
| 
       13 
13 
     | 
    
         
             
                end
         
     | 
| 
         @@ -17,142 +17,171 @@ module SiteAnalyzer 
     | 
|
| 
       17 
17 
     | 
    
         
             
                end
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
19 
     | 
    
         
             
                def get_page(url)
         
     | 
| 
       20 
     | 
    
         
            -
                   
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
                   
     | 
| 
      
 20 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 21 
     | 
    
         
            +
                    timeout(30) { Nokogiri::HTML(open(url)) }
         
     | 
| 
      
 22 
     | 
    
         
            +
                  rescue Timeout::Error, EOFError, OpenURI::HTTPError
         
     | 
| 
      
 23 
     | 
    
         
            +
                    return nil
         
     | 
| 
      
 24 
     | 
    
         
            +
                  end
         
     | 
| 
       23 
25 
     | 
    
         
             
                end
         
     | 
| 
       24 
26 
     | 
    
         | 
| 
       25 
27 
     | 
    
         
             
                def get_domain(url)
         
     | 
| 
       26 
     | 
    
         
            -
                  timeout(30) { Addressable::URI.parse(url).host }
         
     | 
| 
       27 
     | 
    
         
            -
                rescue
         
     | 
| 
       28 
     | 
    
         
            -
                  'Error with parsing by Addressable'
         
     | 
| 
      
 28 
     | 
    
         
            +
                  timeout(30) { Addressable::URI.parse(url).host } rescue nil
         
     | 
| 
       29 
29 
     | 
    
         
             
                end
         
     | 
| 
       30 
30 
     | 
    
         | 
| 
       31 
31 
     | 
    
         
             
                def title_good?
         
     | 
| 
       32 
     | 
    
         
            -
                  @page.css('title').size == 1 && 
     | 
| 
      
 32 
     | 
    
         
            +
                  @page.css('title').size == 1 && @page.css('title').text.size < 70 if @page
         
     | 
| 
       33 
33 
     | 
    
         
             
                end
         
     | 
| 
       34 
34 
     | 
    
         
             
                # true if title and h1 have no dublicates
         
     | 
| 
       35 
35 
     | 
    
         
             
                def title_and_h1_good?
         
     | 
| 
       36 
     | 
    
         
            -
                   
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
      
 36 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 37 
     | 
    
         
            +
                    arr = []
         
     | 
| 
      
 38 
     | 
    
         
            +
                    @page.css('h1').each { |node| arr << node.text }
         
     | 
| 
      
 39 
     | 
    
         
            +
                    @page.css('title').size == 1 && arr.uniq.size == arr.size
         
     | 
| 
      
 40 
     | 
    
         
            +
                  end
         
     | 
| 
       39 
41 
     | 
    
         
             
                end
         
     | 
| 
       40 
42 
     | 
    
         
             
                # true if metadescription less then 200 symbols
         
     | 
| 
       41 
43 
     | 
    
         
             
                def metadescription_good?
         
     | 
| 
       42 
     | 
    
         
            -
                   
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
                     
     | 
| 
       46 
     | 
    
         
            -
                       
     | 
| 
      
 44 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 45 
     | 
    
         
            +
                    tags = @page.css("meta[name='description']")
         
     | 
| 
      
 46 
     | 
    
         
            +
                    return false if tags.size == 0
         
     | 
| 
      
 47 
     | 
    
         
            +
                    tags.each do |t|
         
     | 
| 
      
 48 
     | 
    
         
            +
                      unless t['value'].nil?
         
     | 
| 
      
 49 
     | 
    
         
            +
                        return false if t['content'].size == 0 || t['content'].size > 200
         
     | 
| 
      
 50 
     | 
    
         
            +
                      end
         
     | 
| 
       47 
51 
     | 
    
         
             
                    end
         
     | 
| 
      
 52 
     | 
    
         
            +
                    true
         
     | 
| 
       48 
53 
     | 
    
         
             
                  end
         
     | 
| 
       49 
     | 
    
         
            -
                  true
         
     | 
| 
       50 
54 
     | 
    
         
             
                end
         
     | 
| 
       51 
55 
     | 
    
         
             
                # true if keywords less then 600 symbols
         
     | 
| 
       52 
56 
     | 
    
         
             
                def keywords_good?
         
     | 
| 
       53 
     | 
    
         
            -
                   
     | 
| 
       54 
     | 
    
         
            -
             
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                     
     | 
| 
       57 
     | 
    
         
            -
                       
     | 
| 
      
 57 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 58 
     | 
    
         
            +
                    tags = @page.css("meta[name='keywords']")
         
     | 
| 
      
 59 
     | 
    
         
            +
                    return false if tags.size == 0
         
     | 
| 
      
 60 
     | 
    
         
            +
                    tags.each do |t|
         
     | 
| 
      
 61 
     | 
    
         
            +
                      unless t['value'].nil?
         
     | 
| 
      
 62 
     | 
    
         
            +
                        return false if t['content'].size == 0 || t['content'].size > 600
         
     | 
| 
      
 63 
     | 
    
         
            +
                      end
         
     | 
| 
       58 
64 
     | 
    
         
             
                    end
         
     | 
| 
      
 65 
     | 
    
         
            +
                    true
         
     | 
| 
       59 
66 
     | 
    
         
             
                  end
         
     | 
| 
       60 
     | 
    
         
            -
                  true
         
     | 
| 
       61 
67 
     | 
    
         
             
                end
         
     | 
| 
       62 
68 
     | 
    
         
             
                # true if code less then text
         
     | 
| 
       63 
69 
     | 
    
         
             
                def code_less?
         
     | 
| 
       64 
     | 
    
         
            -
                   
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
                     
     | 
| 
      
 70 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 71 
     | 
    
         
            +
                    sum = 0
         
     | 
| 
      
 72 
     | 
    
         
            +
                    page_text = @page.text.size
         
     | 
| 
      
 73 
     | 
    
         
            +
                    @page.css('script').each do |tag|
         
     | 
| 
      
 74 
     | 
    
         
            +
                      sum += tag.text.size
         
     | 
| 
      
 75 
     | 
    
         
            +
                    end
         
     | 
| 
      
 76 
     | 
    
         
            +
                    sum < page_text / 2
         
     | 
| 
       68 
77 
     | 
    
         
             
                  end
         
     | 
| 
       69 
     | 
    
         
            -
                  sum < page_text / 2
         
     | 
| 
       70 
78 
     | 
    
         
             
                end
         
     | 
| 
       71 
79 
     | 
    
         | 
| 
       72 
80 
     | 
    
         
             
                def collect_metadates
         
     | 
| 
       73 
     | 
    
         
            -
                  @page.css('meta')
         
     | 
| 
      
 81 
     | 
    
         
            +
                  @page.css('meta') if @page
         
     | 
| 
       74 
82 
     | 
    
         
             
                end
         
     | 
| 
       75 
83 
     | 
    
         | 
| 
       76 
84 
     | 
    
         
             
                def metadates_good?
         
     | 
| 
       77 
     | 
    
         
            -
                   
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
      
 85 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 86 
     | 
    
         
            +
                    meta_tags = collect_metadates
         
     | 
| 
      
 87 
     | 
    
         
            +
                    return false if @page.css('title').size > 1 || meta_tags.nil?
         
     | 
| 
      
 88 
     | 
    
         
            +
                    node_names = []
         
     | 
| 
      
 89 
     | 
    
         
            +
                    meta_tags.each { |node| node_names << node['name'] }
         
     | 
| 
      
 90 
     | 
    
         
            +
                    return false if node_names.compact!.size < 1
         
     | 
| 
      
 91 
     | 
    
         
            +
                    node_names.uniq.size == node_names.size
         
     | 
| 
      
 92 
     | 
    
         
            +
                  end
         
     | 
| 
       83 
93 
     | 
    
         
             
                end
         
     | 
| 
       84 
94 
     | 
    
         
             
                # return hash with all titles, h1 and h2
         
     | 
| 
       85 
95 
     | 
    
         
             
                def all_titles_h1_h2
         
     | 
| 
       86 
     | 
    
         
            -
                   
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
      
 96 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 97 
     | 
    
         
            +
                    out = []
         
     | 
| 
      
 98 
     | 
    
         
            +
                    out << @page.css('title').text << { @page_url => @page.css('h1').text }
         
     | 
| 
      
 99 
     | 
    
         
            +
                    out << { @page_url => @page.css('h2').text }
         
     | 
| 
      
 100 
     | 
    
         
            +
                  end
         
     | 
| 
       89 
101 
     | 
    
         
             
                end
         
     | 
| 
       90 
102 
     | 
    
         | 
| 
       91 
103 
     | 
    
         
             
                def home_a
         
     | 
| 
       92 
     | 
    
         
            -
                   
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
                     
     | 
| 
      
 104 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 105 
     | 
    
         
            +
                    home_a = []
         
     | 
| 
      
 106 
     | 
    
         
            +
                    all_a_tags_href.each do |link|
         
     | 
| 
      
 107 
     | 
    
         
            +
                      home_a << link if link.include? @site_url
         
     | 
| 
      
 108 
     | 
    
         
            +
                    end
         
     | 
| 
      
 109 
     | 
    
         
            +
                    home_a
         
     | 
| 
       95 
110 
     | 
    
         
             
                  end
         
     | 
| 
       96 
     | 
    
         
            -
                  home_a
         
     | 
| 
       97 
111 
     | 
    
         
             
                end
         
     | 
| 
       98 
112 
     | 
    
         | 
| 
       99 
113 
     | 
    
         
             
                def remote_a
         
     | 
| 
       100 
     | 
    
         
            -
                   
     | 
| 
       101 
     | 
    
         
            -
             
     | 
| 
       102 
     | 
    
         
            -
                     
     | 
| 
      
 114 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 115 
     | 
    
         
            +
                    remote_a = []
         
     | 
| 
      
 116 
     | 
    
         
            +
                    all_a_tags_href.uniq.each do |link|
         
     | 
| 
      
 117 
     | 
    
         
            +
                      remote_a << link unless link.include? @site_url
         
     | 
| 
      
 118 
     | 
    
         
            +
                    end
         
     | 
| 
      
 119 
     | 
    
         
            +
                    remote_a
         
     | 
| 
       103 
120 
     | 
    
         
             
                  end
         
     | 
| 
       104 
     | 
    
         
            -
                  remote_a
         
     | 
| 
       105 
121 
     | 
    
         
             
                end
         
     | 
| 
       106 
122 
     | 
    
         | 
| 
       107 
123 
     | 
    
         
             
                def all_a_tags_href
         
     | 
| 
       108 
     | 
    
         
            -
                   
     | 
| 
       109 
     | 
    
         
            -
             
     | 
| 
       110 
     | 
    
         
            -
             
     | 
| 
      
 124 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 125 
     | 
    
         
            +
                    tags = []
         
     | 
| 
      
 126 
     | 
    
         
            +
                      @page.css('a').each do |node|
         
     | 
| 
      
 127 
     | 
    
         
            +
                        tags << node['href']
         
     | 
| 
      
 128 
     | 
    
         
            +
                      end
         
     | 
| 
      
 129 
     | 
    
         
            +
                    tags.compact
         
     | 
| 
       111 
130 
     | 
    
         
             
                  end
         
     | 
| 
       112 
     | 
    
         
            -
                  tags.compact
         
     | 
| 
       113 
131 
     | 
    
         
             
                end
         
     | 
| 
       114 
132 
     | 
    
         | 
| 
       115 
133 
     | 
    
         
             
                def wrong_a
         
     | 
| 
       116 
     | 
    
         
            -
                   
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
       118 
     | 
    
         
            -
                     
     | 
| 
      
 134 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 135 
     | 
    
         
            +
                    wrong_a = []
         
     | 
| 
      
 136 
     | 
    
         
            +
                    all_a_tags_href.each do |link|
         
     | 
| 
      
 137 
     | 
    
         
            +
                      wrong_a << link if link.include? '?meta='
         
     | 
| 
      
 138 
     | 
    
         
            +
                    end
         
     | 
| 
      
 139 
     | 
    
         
            +
                    wrong_a
         
     | 
| 
       119 
140 
     | 
    
         
             
                  end
         
     | 
| 
       120 
     | 
    
         
            -
                  wrong_a
         
     | 
| 
       121 
141 
     | 
    
         
             
                end
         
     | 
| 
       122 
142 
     | 
    
         | 
| 
       123 
143 
     | 
    
         
             
                def h2?
         
     | 
| 
       124 
     | 
    
         
            -
                  @page.css('h2').size > 0
         
     | 
| 
      
 144 
     | 
    
         
            +
                  @page.css('h2').size > 0 if @page
         
     | 
| 
       125 
145 
     | 
    
         
             
                end
         
     | 
| 
       126 
146 
     | 
    
         | 
| 
       127 
147 
     | 
    
         
             
                def page_text_size
         
     | 
| 
       128 
     | 
    
         
            -
                  @page.text.size
         
     | 
| 
      
 148 
     | 
    
         
            +
                  @page.text.size if @page
         
     | 
| 
       129 
149 
     | 
    
         
             
                end
         
     | 
| 
       130 
150 
     | 
    
         | 
| 
       131 
151 
     | 
    
         
             
                def all_a_tags
         
     | 
| 
       132 
     | 
    
         
            -
                   
     | 
| 
       133 
     | 
    
         
            -
             
     | 
| 
       134 
     | 
    
         
            -
                     
     | 
| 
      
 152 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 153 
     | 
    
         
            +
                    tags = []
         
     | 
| 
      
 154 
     | 
    
         
            +
                    @page.css('a').each do |node|
         
     | 
| 
      
 155 
     | 
    
         
            +
                      tags << [node['href'], node['target'], node['rel']]
         
     | 
| 
      
 156 
     | 
    
         
            +
                    end
         
     | 
| 
      
 157 
     | 
    
         
            +
                    tags.compact
         
     | 
| 
       135 
158 
     | 
    
         
             
                  end
         
     | 
| 
       136 
     | 
    
         
            -
                  tags.compact
         
     | 
| 
       137 
159 
     | 
    
         
             
                end
         
     | 
| 
       138 
160 
     | 
    
         | 
| 
       139 
161 
     | 
    
         
             
                def all_titles
         
     | 
| 
       140 
     | 
    
         
            -
                   
     | 
| 
       141 
     | 
    
         
            -
             
     | 
| 
       142 
     | 
    
         
            -
             
     | 
| 
      
 162 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 163 
     | 
    
         
            +
                    titles = []
         
     | 
| 
      
 164 
     | 
    
         
            +
                    @page.css('title').each { |tag| titles << tag.text }
         
     | 
| 
      
 165 
     | 
    
         
            +
                    titles
         
     | 
| 
      
 166 
     | 
    
         
            +
                  end
         
     | 
| 
       143 
167 
     | 
    
         
             
                end
         
     | 
| 
       144 
168 
     | 
    
         | 
| 
       145 
169 
     | 
    
         
             
                def all_meta_description_content
         
     | 
| 
       146 
     | 
    
         
            -
                   
     | 
| 
       147 
     | 
    
         
            -
             
     | 
| 
       148 
     | 
    
         
            -
                     
     | 
| 
      
 170 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 171 
     | 
    
         
            +
                    tags = []
         
     | 
| 
      
 172 
     | 
    
         
            +
                    @page.css("meta[name='description']").each do |t|
         
     | 
| 
      
 173 
     | 
    
         
            +
                      tags << t['content']
         
     | 
| 
      
 174 
     | 
    
         
            +
                    end
         
     | 
| 
      
 175 
     | 
    
         
            +
                    tags
         
     | 
| 
       149 
176 
     | 
    
         
             
                  end
         
     | 
| 
       150 
     | 
    
         
            -
                  tags
         
     | 
| 
       151 
177 
     | 
    
         
             
                end
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
       152 
179 
     | 
    
         
             
                def h2
         
     | 
| 
       153 
     | 
    
         
            -
                   
     | 
| 
       154 
     | 
    
         
            -
             
     | 
| 
       155 
     | 
    
         
            -
             
     | 
| 
      
 180 
     | 
    
         
            +
                  if @page
         
     | 
| 
      
 181 
     | 
    
         
            +
                    h2s = []
         
     | 
| 
      
 182 
     | 
    
         
            +
                    @page.css('h2').each { |tag| h2s << tag.text }
         
     | 
| 
      
 183 
     | 
    
         
            +
                    h2s
         
     | 
| 
      
 184 
     | 
    
         
            +
                  end
         
     | 
| 
       156 
185 
     | 
    
         
             
                end
         
     | 
| 
       157 
186 
     | 
    
         
             
              end
         
     | 
| 
       158 
187 
     | 
    
         
             
            end
         
     | 
    
        data/lib/site_analyzer/site.rb
    CHANGED
    
    | 
         @@ -36,9 +36,13 @@ module SiteAnalyzer 
     | 
|
| 
       36 
36 
     | 
    
         | 
| 
       37 
37 
     | 
    
         
             
                def add_pages_for_scan!
         
     | 
| 
       38 
38 
     | 
    
         
             
                  @pages_for_scan = []
         
     | 
| 
      
 39 
     | 
    
         
            +
                  @bad_pages = []
         
     | 
| 
       39 
40 
     | 
    
         
             
                  @pages.each do |page|
         
     | 
| 
       40 
     | 
    
         
            -
                    page. 
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
      
 41 
     | 
    
         
            +
                    @bad_pages << page.page_url unless page.page
         
     | 
| 
      
 42 
     | 
    
         
            +
                    if page.page
         
     | 
| 
      
 43 
     | 
    
         
            +
                      page.home_a.each do |link|
         
     | 
| 
      
 44 
     | 
    
         
            +
                        @pages_for_scan << link unless link.nil? || @scanned_pages.include?(link) || link.start_with?('mailto:') || link.start_with?('skype:') || link.end_with?('.jpg')
         
     | 
| 
      
 45 
     | 
    
         
            +
                      end
         
     | 
| 
       42 
46 
     | 
    
         
             
                    end
         
     | 
| 
       43 
47 
     | 
    
         
             
                  end
         
     | 
| 
       44 
48 
     | 
    
         
             
                  @pages_for_scan.clear if @pages_for_scan.size == 0
         
     | 
| 
         @@ -58,7 +62,9 @@ module SiteAnalyzer 
     | 
|
| 
       58 
62 
     | 
    
         
             
                def all_titles
         
     | 
| 
       59 
63 
     | 
    
         
             
                  result = []
         
     | 
| 
       60 
64 
     | 
    
         
             
                  @pages.each do |page|
         
     | 
| 
       61 
     | 
    
         
            -
                     
     | 
| 
      
 65 
     | 
    
         
            +
                    if page.page
         
     | 
| 
      
 66 
     | 
    
         
            +
                      result << [page.page_url, page.titles]
         
     | 
| 
      
 67 
     | 
    
         
            +
                    end
         
     | 
| 
       62 
68 
     | 
    
         
             
                  end
         
     | 
| 
       63 
69 
     | 
    
         
             
                  result
         
     | 
| 
       64 
70 
     | 
    
         
             
                end
         
     | 
| 
         @@ -66,7 +72,9 @@ module SiteAnalyzer 
     | 
|
| 
       66 
72 
     | 
    
         
             
                def all_descriptions
         
     | 
| 
       67 
73 
     | 
    
         
             
                  result = []
         
     | 
| 
       68 
74 
     | 
    
         
             
                  @pages.each do |page|
         
     | 
| 
       69 
     | 
    
         
            -
                     
     | 
| 
      
 75 
     | 
    
         
            +
                    if page.page
         
     | 
| 
      
 76 
     | 
    
         
            +
                      result << [page.page_url, page.all_meta_description_content]
         
     | 
| 
      
 77 
     | 
    
         
            +
                    end
         
     | 
| 
       70 
78 
     | 
    
         
             
                  end
         
     | 
| 
       71 
79 
     | 
    
         
             
                  result
         
     | 
| 
       72 
80 
     | 
    
         
             
                end
         
     | 
| 
         @@ -74,7 +82,9 @@ module SiteAnalyzer 
     | 
|
| 
       74 
82 
     | 
    
         
             
                def all_h2
         
     | 
| 
       75 
83 
     | 
    
         
             
                  result = []
         
     | 
| 
       76 
84 
     | 
    
         
             
                  @pages.each do |page|
         
     | 
| 
       77 
     | 
    
         
            -
                     
     | 
| 
      
 85 
     | 
    
         
            +
                    if page.page
         
     | 
| 
      
 86 
     | 
    
         
            +
                      result << [page.page_url, page.h2]
         
     | 
| 
      
 87 
     | 
    
         
            +
                    end
         
     | 
| 
       78 
88 
     | 
    
         
             
                  end
         
     | 
| 
       79 
89 
     | 
    
         
             
                  result
         
     | 
| 
       80 
90 
     | 
    
         
             
                end
         
     | 
| 
         @@ -82,11 +92,13 @@ module SiteAnalyzer 
     | 
|
| 
       82 
92 
     | 
    
         
             
                def all_a
         
     | 
| 
       83 
93 
     | 
    
         
             
                  result = []
         
     | 
| 
       84 
94 
     | 
    
         
             
                  @pages.each do |page|
         
     | 
| 
       85 
     | 
    
         
            -
                    page. 
     | 
| 
       86 
     | 
    
         
            -
                       
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
      
 95 
     | 
    
         
            +
                    if page.page
         
     | 
| 
      
 96 
     | 
    
         
            +
                      page.all_a_tags.compact.each do |tag|
         
     | 
| 
      
 97 
     | 
    
         
            +
                        tag[0] = '-' unless tag[0]
         
     | 
| 
      
 98 
     | 
    
         
            +
                        tag[1] = '-' unless tag[1]
         
     | 
| 
      
 99 
     | 
    
         
            +
                        tag[2] = '-' unless tag[2]
         
     | 
| 
      
 100 
     | 
    
         
            +
                        result << [page.page_url, tag[0], tag[1], tag[2]]
         
     | 
| 
      
 101 
     | 
    
         
            +
                      end
         
     | 
| 
       90 
102 
     | 
    
         
             
                    end
         
     | 
| 
       91 
103 
     | 
    
         
             
                  end
         
     | 
| 
       92 
104 
     | 
    
         
             
                  result.compact
         
     |