crawl 1.1.4 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 103da22dba2987ec6ec5bbf700473cb243170fc5
4
- data.tar.gz: 8197231448106e4fe0b198b53e734110eb1748a9
3
+ metadata.gz: cd091ac6b2679c9dbe5336a73dfd7acab9e9ee65
4
+ data.tar.gz: 6457eced0c6e538809ad22fc5045615d37ce8a78
5
5
  SHA512:
6
- metadata.gz: 206ff34e285a074b7dad8ebb68453295df14d67b95b3a4755d823a0d4bd2a3bae92859ff02fe52dee6b134c02966c7f4d9c61e52e1938c3815d56fc7debf7387
7
- data.tar.gz: 2f8f655862a28cd4772317866b72ca31c392c7c63997226bac8c2ceccfe15fca203501189a4a0b9db0ebe002555fc9932265a016cd63c247b85b355398434592
6
+ metadata.gz: 5538098c9d5b5f6785524e1378564a7121443dca24c63eeb4ffb9f12721afbd9d09db08e3bbea22be560917468405bb519e1d09f24c683316e085d2ddf3e17a2
7
+ data.tar.gz: 3f53020b8432926125b338f5114bdcdeb35560b7060d6d123855f5d69671a8398710d0b9a6a3d1e9ae333ba6635d03570218ac0c2339804c0d1109063bc48b1b
@@ -21,7 +21,7 @@ class Crawl::Engine
21
21
  @authorization = Base64.encode64("#{options[:username]}:#{options[:password]}")
22
22
  @register = Crawl::Register.new
23
23
 
24
- start_pages = options[:start].to_a.map{|page| Page.new(@register, page, 'the command line')}
24
+ start_pages = options[:start].to_a.map{|page| Page.new(@register, page, '/')}
25
25
 
26
26
  @register.add(start_pages)
27
27
  end
@@ -109,4 +109,4 @@ private
109
109
  raw_links.delete_if{|link| IGNORE.any?{|pattern| link =~ pattern}}
110
110
  raw_links.map{ |url| Page.new(@register, url, page.url) }
111
111
  end
112
- end
112
+ end
@@ -1,3 +1,5 @@
1
+ require 'uri'
2
+
1
3
  class Page
2
4
  include Comparable
3
5
 
@@ -14,27 +16,19 @@ class Page
14
16
  end
15
17
 
16
18
  def relative_url
17
- if url.start_with?('/')
18
- url
19
- else
20
- "#{source_directory}/#{url}"
21
- end
22
- end
23
-
24
- def source_directory
25
- File.split(source).first.sub(/^\./, '').sub(/\/$/, '')
19
+ @relative_url ||= URI.join('http://example.com', source, url).path
26
20
  end
27
21
 
28
22
  def <=>(other)
29
- url <=> other.url
23
+ relative_url <=> other.relative_url
30
24
  end
31
25
 
32
26
  def eql?(other)
33
- url.eql?(other.url)
27
+ relative_url.eql?(other.relative_url)
34
28
  end
35
29
 
36
30
  def hash
37
- url.hash
31
+ relative_url.hash
38
32
  end
39
33
 
40
34
  def success
@@ -1,4 +1,4 @@
1
1
  # encoding: utf-8
2
2
  module Crawl
3
- VERSION = "1.1.4"
3
+ VERSION = "1.1.6"
4
4
  end
@@ -3,9 +3,11 @@ require './lib/crawl/page'
3
3
  RSpec.describe Page do
4
4
  describe "#relative_url" do
5
5
  specify { expect(Page.new(:register, "/", "/").relative_url).to eq "/" }
6
+ specify { expect(Page.new(:register, "./", "/").relative_url).to eq "/" }
6
7
  specify { expect(Page.new(:register, "page.html", "").relative_url).to eq "/page.html" }
7
8
  specify { expect(Page.new(:register, "/interview", "/").relative_url).to eq "/interview" }
8
9
  specify { expect(Page.new(:register, "overview.html", "/").relative_url).to eq "/overview.html" }
9
10
  specify { expect(Page.new(:register, "post-5.html", "/posts/index.html").relative_url).to eq "/posts/post-5.html" }
11
+ specify { expect(Page.new(:register, "https://staging.alphasights.com/careers/meet-us", "/posts/foo").relative_url).to eq "/careers/meet-us" }
10
12
  end
11
13
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.4
4
+ version: 1.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tor Erik Linnerud