wgit 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wgit/crawler.rb +13 -5
- data/lib/wgit/document.rb +40 -44
- data/lib/wgit/url.rb +84 -24
- data/lib/wgit/version.rb +1 -1
- metadata +46 -17
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: c4dc572e7d48a95d423e175ad2d6a791be52bf56f6c391e9152c075f45672ee8
         | 
| 4 | 
            +
              data.tar.gz: fd6a9c9d1e38906f500543ae92169f7bcb9e64de1567f4f29ec24f7ca74c60d8
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: f4fc425aa1b25254dba343151794893ff26a3682e58dd08bdc918c180da89ecb08cf0f1013837e41527f564d36bb0784c6ecd204e53c1276cb5b32401c88ffab
         | 
| 7 | 
            +
              data.tar.gz: 2ce1250ad7312257bc021e7414f164ec174e07e469a6956478cccaa7b05f159981a7a69ef6b713db5454233c8a03ea8487f14184c1e26dcab28ed8e81250507d
         | 
    
        data/lib/wgit/crawler.rb
    CHANGED
    
    | @@ -106,7 +106,8 @@ module Wgit | |
| 106 106 | 
             
                  doc = crawl_url(base_url, &block)
         | 
| 107 107 | 
             
                  return nil if doc.nil?
         | 
| 108 108 |  | 
| 109 | 
            -
                   | 
| 109 | 
            +
                  path = base_url.path.empty? ? '/' : base_url.path
         | 
| 110 | 
            +
                  crawled_urls  = [path]
         | 
| 110 111 | 
             
                  external_urls = doc.external_links
         | 
| 111 112 | 
             
                  internal_urls = doc.internal_links
         | 
| 112 113 |  | 
| @@ -149,7 +150,10 @@ module Wgit | |
| 149 150 | 
             
                def fetch(url)
         | 
| 150 151 | 
             
                  response = resolve(url)
         | 
| 151 152 | 
             
                  response.body.empty? ? nil : response.body
         | 
| 152 | 
            -
                rescue
         | 
| 153 | 
            +
                rescue Exception => ex
         | 
| 154 | 
            +
                  Wgit.logger.debug(
         | 
| 155 | 
            +
                    "Wgit::Crawler#fetch('#{url}') exception: #{ex.message}"
         | 
| 156 | 
            +
                  )
         | 
| 153 157 | 
             
                  nil
         | 
| 154 158 | 
             
                end
         | 
| 155 159 |  | 
| @@ -158,12 +162,16 @@ module Wgit | |
| 158 162 | 
             
                # an exception. Redirects can be disabled by setting `redirect_limit: 1`.
         | 
| 159 163 | 
             
                # The Net::HTTPResponse will be returned.
         | 
| 160 164 | 
             
                def resolve(url, redirect_limit: 5)
         | 
| 161 | 
            -
                  redirect_count =  | 
| 165 | 
            +
                  redirect_count = -1
         | 
| 162 166 | 
             
                  begin
         | 
| 163 167 | 
             
                    raise "Too many redirects" if redirect_count >= redirect_limit
         | 
| 164 | 
            -
                    response = Net::HTTP.get_response(URI.parse(url))
         | 
| 165 | 
            -
                    url = response['location']
         | 
| 166 168 | 
             
                    redirect_count += 1
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                    response = Net::HTTP.get_response(URI(url))
         | 
| 171 | 
            +
                    location = Wgit::Url.new(response.fetch('location', ''))
         | 
| 172 | 
            +
                    if not location.empty?
         | 
| 173 | 
            +
                      url = location.is_relative? ? url.to_base.concat(location) : location
         | 
| 174 | 
            +
                    end
         | 
| 167 175 | 
             
                  end while response.is_a?(Net::HTTPRedirection)
         | 
| 168 176 | 
             
                  response
         | 
| 169 177 | 
             
                end
         | 
    
        data/lib/wgit/document.rb
    CHANGED
    
    | @@ -62,6 +62,8 @@ module Wgit | |
| 62 62 | 
             
                    @html = html ||= ""
         | 
| 63 63 | 
             
                    @doc = init_nokogiri
         | 
| 64 64 | 
             
                    @score = 0.0
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                    process_url_and_html
         | 
| 65 67 |  | 
| 66 68 | 
             
                    # Dynamically run the init_*_from_html methods.
         | 
| 67 69 | 
             
                    Document.private_instance_methods(false).each do |method|
         | 
| @@ -80,7 +82,9 @@ module Wgit | |
| 80 82 | 
             
                    @html = obj.fetch("html", "")
         | 
| 81 83 | 
             
                    @doc = init_nokogiri
         | 
| 82 84 | 
             
                    @score = obj.fetch("score", 0.0)
         | 
| 83 | 
            -
             | 
| 85 | 
            +
             | 
| 86 | 
            +
                    process_url_and_html
         | 
| 87 | 
            +
             | 
| 84 88 | 
             
                    # Dynamically run the init_*_from_object methods.
         | 
| 85 89 | 
             
                    Document.private_instance_methods(false).each do |method|
         | 
| 86 90 | 
             
                      if method.to_s.start_with?("init_") && 
         | 
| @@ -174,7 +178,7 @@ module Wgit | |
| 174 178 | 
             
                # @return [Boolean] True if @html is nil/empty, false otherwise.
         | 
| 175 179 | 
             
                def empty?
         | 
| 176 180 | 
             
                  return true if @html.nil?
         | 
| 177 | 
            -
                  @html. | 
| 181 | 
            +
                  @html.empty?
         | 
| 178 182 | 
             
                end
         | 
| 179 183 |  | 
| 180 184 | 
             
                # Uses Nokogiri's xpath method to search the doc's html and return the 
         | 
| @@ -194,47 +198,54 @@ module Wgit | |
| 194 198 | 
             
                def css(selector)
         | 
| 195 199 | 
             
                  @doc.css(selector)
         | 
| 196 200 | 
             
                end
         | 
| 197 | 
            -
             | 
| 198 | 
            -
                # Get all internal links of this Document.
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                # Get all internal links of this Document in relative form. Internal
         | 
| 203 | 
            +
                # meaning a link to another page on this website. Also see
         | 
| 204 | 
            +
                # Wgit::Document#internal_full_links.
         | 
| 199 205 | 
             
                #
         | 
| 200 206 | 
             
                # @return [Array<Wgit::Url>] self's internal/relative URL's.
         | 
| 201 207 | 
             
                def internal_links
         | 
| 202 208 | 
             
                  return [] if @links.empty?
         | 
| 203 | 
            -
             | 
| 204 | 
            -
             | 
| 205 | 
            -
             | 
| 209 | 
            +
             | 
| 210 | 
            +
                  links = @links.
         | 
| 211 | 
            +
                    reject do |link|
         | 
| 212 | 
            +
                      not link.relative_link?(base: @url.to_base)
         | 
| 206 213 | 
             
                    rescue
         | 
| 207 214 | 
             
                      true
         | 
| 208 | 
            -
                    end
         | 
| 209 | 
            -
             | 
| 215 | 
            +
                    end.
         | 
| 216 | 
            +
                    map(&:to_path)
         | 
| 217 | 
            +
             | 
| 218 | 
            +
                  process_arr(links)
         | 
| 210 219 | 
             
                end
         | 
| 211 220 |  | 
| 212 221 | 
             
                # Get all internal links of this Document and append them to this
         | 
| 213 | 
            -
                # Document's base URL.
         | 
| 222 | 
            +
                # Document's base URL making them absolute. Also see
         | 
| 223 | 
            +
                # Wgit::Document#internal_links.
         | 
| 214 224 | 
             
                #
         | 
| 215 225 | 
             
                # @return [Array<Wgit::Url>] self's internal/relative URL's in absolute
         | 
| 216 226 | 
             
                #   form.
         | 
| 217 227 | 
             
                def internal_full_links
         | 
| 218 228 | 
             
                  in_links = internal_links
         | 
| 219 229 | 
             
                  return [] if in_links.empty?
         | 
| 220 | 
            -
                  in_links.map  | 
| 221 | 
            -
                    link.replace("/" + link) unless link.start_with?("/")
         | 
| 222 | 
            -
                    Wgit::Url.new(@url.to_base + link)
         | 
| 223 | 
            -
                  end
         | 
| 230 | 
            +
                  in_links.map { |link| @url.to_base.concat(link) }
         | 
| 224 231 | 
             
                end
         | 
| 225 232 |  | 
| 226 | 
            -
                # Get all external links of this Document.
         | 
| 233 | 
            +
                # Get all external links of this Document. External meaning a link to
         | 
| 234 | 
            +
                # another website.
         | 
| 227 235 | 
             
                #
         | 
| 228 236 | 
             
                # @return [Array<Wgit::Url>] self's external/absolute URL's.
         | 
| 229 237 | 
             
                def external_links
         | 
| 230 238 | 
             
                  return [] if @links.empty?
         | 
| 231 | 
            -
             | 
| 232 | 
            -
             | 
| 233 | 
            -
             | 
| 239 | 
            +
             | 
| 240 | 
            +
                  links = @links.
         | 
| 241 | 
            +
                    reject do |link|
         | 
| 242 | 
            +
                      link.relative_link?(base: @url.to_base)
         | 
| 234 243 | 
             
                    rescue
         | 
| 235 244 | 
             
                      true
         | 
| 236 | 
            -
                    end
         | 
| 237 | 
            -
             | 
| 245 | 
            +
                    end.
         | 
| 246 | 
            +
                    map { |link| link.end_with?('/') ? link.chop : link }
         | 
| 247 | 
            +
             | 
| 248 | 
            +
                  process_arr(links)
         | 
| 238 249 | 
             
                end
         | 
| 239 250 |  | 
| 240 251 | 
             
                # Searches against the @text for the given search query.
         | 
| @@ -253,8 +264,8 @@ module Wgit | |
| 253 264 | 
             
                #   sentence.
         | 
| 254 265 | 
             
                # @return [Array<String>] Representing the search results.
         | 
| 255 266 | 
             
                def search(query, sentence_limit = 80)
         | 
| 256 | 
            -
                  raise "A search  | 
| 257 | 
            -
                  raise "The  | 
| 267 | 
            +
                  raise "A search query must be provided" if query.empty?
         | 
| 268 | 
            +
                  raise "The sentence_limit value must be even" if sentence_limit.odd?
         | 
| 258 269 |  | 
| 259 270 | 
             
                  results = {}
         | 
| 260 271 | 
             
                  regex = Regexp.new(query, Regexp::IGNORECASE)
         | 
| @@ -469,29 +480,16 @@ module Wgit | |
| 469 480 | 
             
                  if array.is_a?(Array)
         | 
| 470 481 | 
             
                    array.map! { |str| process_str(str) }
         | 
| 471 482 | 
             
                    array.reject! { |str| str.is_a?(String) ? str.empty? : false }
         | 
| 483 | 
            +
                    array.compact!
         | 
| 472 484 | 
             
                    array.uniq!
         | 
| 473 485 | 
             
                  end
         | 
| 474 486 | 
             
                  array
         | 
| 475 487 | 
             
                end
         | 
| 476 | 
            -
             | 
| 477 | 
            -
                #  | 
| 478 | 
            -
                 | 
| 479 | 
            -
             | 
| 480 | 
            -
             | 
| 481 | 
            -
                def process_internal_links(links)
         | 
| 482 | 
            -
                  links.map! do |link|
         | 
| 483 | 
            -
                    host_or_base =  if link.start_with?("http")
         | 
| 484 | 
            -
                                      @url.base
         | 
| 485 | 
            -
                                    else
         | 
| 486 | 
            -
                                      @url.host
         | 
| 487 | 
            -
                                    end
         | 
| 488 | 
            -
                    if link.start_with?(host_or_base)
         | 
| 489 | 
            -
                      link.sub!(host_or_base, "")
         | 
| 490 | 
            -
                      link.replace(link[1..-1]) if link.start_with?("/")
         | 
| 491 | 
            -
                      link.strip!
         | 
| 492 | 
            -
                    end
         | 
| 493 | 
            -
                    link
         | 
| 494 | 
            -
                  end
         | 
| 488 | 
            +
             | 
| 489 | 
            +
                # Ensure the @url and @html Strings are correctly encoded etc.
         | 
| 490 | 
            +
                def process_url_and_html
         | 
| 491 | 
            +
                  @url = process_str(@url)
         | 
| 492 | 
            +
                  @html = process_str(@html)
         | 
| 495 493 | 
             
                end
         | 
| 496 494 |  | 
| 497 495 | 
             
                ### Default init_* (Document extension) methods. ###
         | 
| @@ -547,7 +545,6 @@ module Wgit | |
| 547 545 | 
             
                  xpath = "//a/@href"
         | 
| 548 546 | 
             
                  result = find_in_html(xpath, singleton: false) do |links|
         | 
| 549 547 | 
             
                    if links
         | 
| 550 | 
            -
                      links.reject! { |link| link == "/" }
         | 
| 551 548 | 
             
                      links.map! do |link|
         | 
| 552 549 | 
             
                        begin
         | 
| 553 550 | 
             
                          Wgit::Url.new(link)
         | 
| @@ -555,8 +552,7 @@ module Wgit | |
| 555 552 | 
             
                          nil
         | 
| 556 553 | 
             
                        end
         | 
| 557 554 | 
             
                      end
         | 
| 558 | 
            -
                      links. | 
| 559 | 
            -
                      process_internal_links(links)
         | 
| 555 | 
            +
                      links.compact!
         | 
| 560 556 | 
             
                    end
         | 
| 561 557 | 
             
                    links
         | 
| 562 558 | 
             
                  end
         | 
    
        data/lib/wgit/url.rb
    CHANGED
    
    | @@ -95,21 +95,29 @@ module Wgit | |
| 95 95 | 
             
                  url
         | 
| 96 96 | 
             
                end
         | 
| 97 97 |  | 
| 98 | 
            -
                # Returns if link is a relative or absolute Url. | 
| 99 | 
            -
                #  | 
| 100 | 
            -
                #  | 
| 101 | 
            -
                #  | 
| 102 | 
            -
                #  | 
| 103 | 
            -
                # internal link (regardless of whether it is valid or not).
         | 
| 98 | 
            +
                # Returns if link is a relative or absolute Url.
         | 
| 99 | 
            +
                # All external links in a page are expected to have a protocol prefix e.g.
         | 
| 100 | 
            +
                # "http://", otherwise the link is treated as an internal link (regardless
         | 
| 101 | 
            +
                # of whether it is valid or not). The only exception is if base is provided
         | 
| 102 | 
            +
                # and link is a page within that site; then the link is relative.
         | 
| 104 103 | 
             
                #
         | 
| 105 104 | 
             
                # @param link [Wgit::Url, String] The url to test if relative or not.
         | 
| 105 | 
            +
                # @param base [String] The Url base e.g. http://www.google.co.uk.
         | 
| 106 106 | 
             
                # @return [Boolean] True if relative, false if absolute.
         | 
| 107 107 | 
             
                # @raise [RuntimeError] If the link is invalid.
         | 
| 108 | 
            -
                def self.relative_link?(link)
         | 
| 109 | 
            -
                   | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 112 | 
            -
                   | 
| 108 | 
            +
                def self.relative_link?(link, base: nil)
         | 
| 109 | 
            +
                  if base and URI(base).host.nil?
         | 
| 110 | 
            +
                    raise "Invalid base, must contain protocol prefix: #{base}"
         | 
| 111 | 
            +
                  end
         | 
| 112 | 
            +
                  
         | 
| 113 | 
            +
                  uri = URI(link)
         | 
| 114 | 
            +
                  if not uri.host.nil? and not uri.host.empty?
         | 
| 115 | 
            +
                    if base
         | 
| 116 | 
            +
                      uri.host == URI(base).host
         | 
| 117 | 
            +
                    else
         | 
| 118 | 
            +
                      false
         | 
| 119 | 
            +
                    end
         | 
| 120 | 
            +
                  elsif not uri.path.nil? and not uri.path.empty?
         | 
| 113 121 | 
             
                    true
         | 
| 114 122 | 
             
                  else
         | 
| 115 123 | 
             
                    raise "Invalid link: #{link}"
         | 
| @@ -128,11 +136,14 @@ module Wgit | |
| 128 136 | 
             
                  Wgit::Url.new(url + "/" + link)
         | 
| 129 137 | 
             
                end
         | 
| 130 138 |  | 
| 131 | 
            -
                # Returns if self is a relative or absolute Url.
         | 
| 139 | 
            +
                # Returns if self is a relative or absolute Url. If base is provided and
         | 
| 140 | 
            +
                # self is a page within that site then the link is relative.
         | 
| 141 | 
            +
                # See Wgit.relative_link? for more information.
         | 
| 142 | 
            +
                #
         | 
| 132 143 | 
             
                # @return [Boolean] True if relative, false if absolute.
         | 
| 133 144 | 
             
                # @raise [RuntimeError] If the link is invalid.
         | 
| 134 | 
            -
                def relative_link?
         | 
| 135 | 
            -
                  Wgit::Url.relative_link?(self)
         | 
| 145 | 
            +
                def relative_link?(base: nil)
         | 
| 146 | 
            +
                  Wgit::Url.relative_link?(self, base: base)
         | 
| 136 147 | 
             
                end
         | 
| 137 148 |  | 
| 138 149 | 
             
                # Determines if self is a valid Url or not.
         | 
| @@ -142,7 +153,7 @@ module Wgit | |
| 142 153 | 
             
                  Wgit::Url.valid?(self)
         | 
| 143 154 | 
             
                end
         | 
| 144 155 |  | 
| 145 | 
            -
                # Concats self  | 
| 156 | 
            +
                # Concats self and the link.
         | 
| 146 157 | 
             
                #
         | 
| 147 158 | 
             
                # @param link [Wgit::Url, String] The link to concat with self.
         | 
| 148 159 | 
             
                # @return [Wgit::Url] self + "/" + link
         | 
| @@ -172,6 +183,14 @@ module Wgit | |
| 172 183 | 
             
                def to_url
         | 
| 173 184 | 
             
                  self
         | 
| 174 185 | 
             
                end
         | 
| 186 | 
            +
             | 
| 187 | 
            +
                # Returns a new Wgit::Url containing just the scheme/protocol of this URL
         | 
| 188 | 
            +
                # e.g. Given http://www.google.co.uk, http is returned.
         | 
| 189 | 
            +
                #
         | 
| 190 | 
            +
                # @return [Wgit::Url] Containing just the scheme/protocol.
         | 
| 191 | 
            +
                def to_scheme
         | 
| 192 | 
            +
                  Wgit::Url.new(@uri.scheme)
         | 
| 193 | 
            +
                end
         | 
| 175 194 |  | 
| 176 195 | 
             
                # Returns a new Wgit::Url containing just the host of this URL e.g.
         | 
| 177 196 | 
             
                # Given http://www.google.co.uk/about.html, www.google.co.uk is returned.
         | 
| @@ -181,24 +200,54 @@ module Wgit | |
| 181 200 | 
             
                  Wgit::Url.new(@uri.host)
         | 
| 182 201 | 
             
                end
         | 
| 183 202 |  | 
| 184 | 
            -
                # Returns the base of this URL e.g. the protocol and host combined.
         | 
| 185 | 
            -
                # How it works:
         | 
| 186 | 
            -
                # URI.split("http://www.google.co.uk/about.html") returns the following:
         | 
| 187 | 
            -
                # array[0]: "http://", array[2]: "www.google.co.uk", which we use.
         | 
| 203 | 
            +
                # Returns only the base of this URL e.g. the protocol and host combined.
         | 
| 188 204 | 
             
                #
         | 
| 189 | 
            -
                # @return [Wgit::Url] Base of self  | 
| 205 | 
            +
                # @return [Wgit::Url] Base of self e.g. http://www.google.co.uk.
         | 
| 190 206 | 
             
                def to_base
         | 
| 191 207 | 
             
                  if Wgit::Url.relative_link?(self)
         | 
| 192 208 | 
             
                    raise "A relative link doesn't have a base URL: #{self}"
         | 
| 193 209 | 
             
                  end
         | 
| 194 | 
            -
                   | 
| 195 | 
            -
                  if url_segs[0].nil? or url_segs[2].nil? or url_segs[2].empty?
         | 
| 210 | 
            +
                  if @uri.scheme.nil? or @uri.host.nil? or @uri.host.empty?
         | 
| 196 211 | 
             
                    raise "Both a protocol and host are needed: #{self}"
         | 
| 197 212 | 
             
                  end
         | 
| 198 | 
            -
                  base = "#{ | 
| 213 | 
            +
                  base = "#{@uri.scheme}://#{@uri.host}"
         | 
| 199 214 | 
             
                  Wgit::Url.new(base)
         | 
| 200 215 | 
             
                end
         | 
| 201 | 
            -
             | 
| 216 | 
            +
             | 
| 217 | 
            +
                # Returns the path of this URL e.g. the bit after the host without slashes.
         | 
| 218 | 
            +
                # For example:
         | 
| 219 | 
            +
                # Wgit::Url.new("http://www.google.co.uk/about.html/").to_path returns
         | 
| 220 | 
            +
                # "about.html". See Wgit::Url#to_endpoint if you want the slashes.
         | 
| 221 | 
            +
                #
         | 
| 222 | 
            +
                # @return [Wgit::Url] Path of self e.g. about.html.
         | 
| 223 | 
            +
                def to_path
         | 
| 224 | 
            +
                  path = @uri.path
         | 
| 225 | 
            +
                  return Wgit::Url.new(path) if path == '/'
         | 
| 226 | 
            +
                  path = path[1..-1] if path.start_with?('/')
         | 
| 227 | 
            +
                  path.chop! if path.end_with?('/')
         | 
| 228 | 
            +
                  Wgit::Url.new(path)
         | 
| 229 | 
            +
                end
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                # Returns the endpoint of this URL e.g. the bit after the host with any
         | 
| 232 | 
            +
                # slashes included. For example:
         | 
| 233 | 
            +
                # Wgit::Url.new("http://www.google.co.uk/about.html/").to_endpoint returns
         | 
| 234 | 
            +
                # "/about.html/". See Wgit::Url#to_path if you don't want the slashes.
         | 
| 235 | 
            +
                #
         | 
| 236 | 
            +
                # @return [Wgit::Url] Endpoint of self e.g. /about.html/.
         | 
| 237 | 
            +
                def to_endpoint
         | 
| 238 | 
            +
                  endpoint = @uri.path
         | 
| 239 | 
            +
                  endpoint = '/' + endpoint unless endpoint.start_with?('/')
         | 
| 240 | 
            +
                  Wgit::Url.new(endpoint)
         | 
| 241 | 
            +
                end
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                # Returns a new Wgit::Url containing just the query string of this URL
         | 
| 244 | 
            +
                # e.g. Given http://google.com?q=ruby, ruby is returned.
         | 
| 245 | 
            +
                #
         | 
| 246 | 
            +
                # @return [Wgit::Url] Containing just the query string.
         | 
| 247 | 
            +
                def to_query_string
         | 
| 248 | 
            +
                  Wgit::Url.new(@uri.query)
         | 
| 249 | 
            +
                end
         | 
| 250 | 
            +
             | 
| 202 251 | 
             
                # Returns a Hash containing this Url's instance vars excluding @uri.
         | 
| 203 252 | 
             
                # Used when storing the URL in a Database e.g. MongoDB etc.
         | 
| 204 253 | 
             
                #
         | 
| @@ -210,9 +259,20 @@ module Wgit | |
| 210 259 | 
             
                end
         | 
| 211 260 |  | 
| 212 261 | 
             
                alias :to_hash :to_h
         | 
| 262 | 
            +
                alias :uri :to_uri
         | 
| 263 | 
            +
                alias :url :to_url
         | 
| 264 | 
            +
                alias :scheme :to_scheme
         | 
| 265 | 
            +
                alias :to_protocol :to_scheme
         | 
| 266 | 
            +
                alias :protocol :to_scheme
         | 
| 213 267 | 
             
                alias :host :to_host
         | 
| 214 268 | 
             
                alias :base :to_base
         | 
| 269 | 
            +
                alias :path :to_path
         | 
| 270 | 
            +
                alias :endpoint :to_endpoint
         | 
| 271 | 
            +
                alias :query_string :to_query_string
         | 
| 272 | 
            +
                alias :query :to_query_string
         | 
| 215 273 | 
             
                alias :internal_link? :relative_link?
         | 
| 274 | 
            +
                alias :is_relative? :relative_link?
         | 
| 275 | 
            +
                alias :is_internal? :relative_link?
         | 
| 216 276 | 
             
                alias :crawled? :crawled
         | 
| 217 277 | 
             
              end
         | 
| 218 278 | 
             
            end
         | 
    
        data/lib/wgit/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: wgit
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.10
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Michael Telford
         | 
| @@ -28,16 +28,16 @@ dependencies: | |
| 28 28 | 
             
              name: yard
         | 
| 29 29 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 30 | 
             
                requirements:
         | 
| 31 | 
            -
                - - " | 
| 31 | 
            +
                - - ">="
         | 
| 32 32 | 
             
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            -
                    version:  | 
| 33 | 
            +
                    version: 0.9.20
         | 
| 34 34 | 
             
              type: :development
         | 
| 35 35 | 
             
              prerelease: false
         | 
| 36 36 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 37 | 
             
                requirements:
         | 
| 38 | 
            -
                - - " | 
| 38 | 
            +
                - - ">="
         | 
| 39 39 | 
             
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            -
                    version:  | 
| 40 | 
            +
                    version: 0.9.20
         | 
| 41 41 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 42 42 | 
             
              name: byebug
         | 
| 43 43 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -108,6 +108,34 @@ dependencies: | |
| 108 108 | 
             
                - - "~>"
         | 
| 109 109 | 
             
                  - !ruby/object:Gem::Version
         | 
| 110 110 | 
             
                    version: '1.3'
         | 
| 111 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 112 | 
            +
              name: webmock
         | 
| 113 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 114 | 
            +
                requirements:
         | 
| 115 | 
            +
                - - "~>"
         | 
| 116 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 117 | 
            +
                    version: '3.6'
         | 
| 118 | 
            +
              type: :development
         | 
| 119 | 
            +
              prerelease: false
         | 
| 120 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 121 | 
            +
                requirements:
         | 
| 122 | 
            +
                - - "~>"
         | 
| 123 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 124 | 
            +
                    version: '3.6'
         | 
| 125 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 126 | 
            +
              name: rack
         | 
| 127 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 128 | 
            +
                requirements:
         | 
| 129 | 
            +
                - - "~>"
         | 
| 130 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 131 | 
            +
                    version: '2.0'
         | 
| 132 | 
            +
              type: :development
         | 
| 133 | 
            +
              prerelease: false
         | 
| 134 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 135 | 
            +
                requirements:
         | 
| 136 | 
            +
                - - "~>"
         | 
| 137 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 138 | 
            +
                    version: '2.0'
         | 
| 111 139 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 112 140 | 
             
              name: nokogiri
         | 
| 113 141 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -128,21 +156,21 @@ dependencies: | |
| 128 156 | 
             
                requirements:
         | 
| 129 157 | 
             
                - - "~>"
         | 
| 130 158 | 
             
                  - !ruby/object:Gem::Version
         | 
| 131 | 
            -
                    version:  | 
| 159 | 
            +
                    version: 2.8.0
         | 
| 132 160 | 
             
              type: :runtime
         | 
| 133 161 | 
             
              prerelease: false
         | 
| 134 162 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 135 163 | 
             
                requirements:
         | 
| 136 164 | 
             
                - - "~>"
         | 
| 137 165 | 
             
                  - !ruby/object:Gem::Version
         | 
| 138 | 
            -
                    version:  | 
| 139 | 
            -
            description: Wgit is a WWW indexer/scraper which crawls URL's, retrieves | 
| 140 | 
            -
              their page contents for later use. You can use Wgit to copy entire | 
| 141 | 
            -
              Wgit also provides a means to search indexed documents stored | 
| 142 | 
            -
              this library provides the main components of a WWW search | 
| 143 | 
            -
              easily extended allowing you to pull out the parts of a | 
| 144 | 
            -
              to you, the code snippets or images for example. As Wgit | 
| 145 | 
            -
               | 
| 166 | 
            +
                    version: 2.8.0
         | 
| 167 | 
            +
            description: Fundamentally, Wgit is a WWW indexer/scraper which crawls URL's, retrieves
         | 
| 168 | 
            +
              and serialises their page contents for later use. You can use Wgit to copy entire
         | 
| 169 | 
            +
              websites if required. Wgit also provides a means to search indexed documents stored
         | 
| 170 | 
            +
              in a database. Therefore, this library provides the main components of a WWW search
         | 
| 171 | 
            +
              engine. The Wgit API is easily extended allowing you to pull out the parts of a
         | 
| 172 | 
            +
              webpage that are important to you, the code snippets or images for example. As Wgit
         | 
| 173 | 
            +
              is a library, it has uses in many different application types.
         | 
| 146 174 | 
             
            email: michael.telford@live.com
         | 
| 147 175 | 
             
            executables: []
         | 
| 148 176 | 
             
            extensions: []
         | 
| @@ -166,6 +194,7 @@ licenses: | |
| 166 194 | 
             
            - MIT
         | 
| 167 195 | 
             
            metadata:
         | 
| 168 196 | 
             
              source_code_uri: https://github.com/michaeltelford/wgit
         | 
| 197 | 
            +
              yard.run: yri
         | 
| 169 198 | 
             
              allowed_push_host: https://rubygems.org
         | 
| 170 199 | 
             
            post_install_message: 
         | 
| 171 200 | 
             
            rdoc_options: []
         | 
| @@ -183,9 +212,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 183 212 | 
             
                  version: '0'
         | 
| 184 213 | 
             
            requirements: []
         | 
| 185 214 | 
             
            rubyforge_project: 
         | 
| 186 | 
            -
            rubygems_version: 2.7. | 
| 215 | 
            +
            rubygems_version: 2.7.6
         | 
| 187 216 | 
             
            signing_key: 
         | 
| 188 217 | 
             
            specification_version: 4
         | 
| 189 | 
            -
            summary: Wgit is  | 
| 190 | 
            -
              web scraping, indexing and searching.
         | 
| 218 | 
            +
            summary: Wgit is a Ruby gem similar in nature to GNU's `wget`. It provides an easy
         | 
| 219 | 
            +
              to use API for programmatic web scraping, indexing and searching.
         | 
| 191 220 | 
             
            test_files: []
         |