coolCrawler 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a8c2944180ee7c5d7f1241fd62bf5e89973a2fca742c51567e1550f14f95f70a
4
- data.tar.gz: 6635f39af2babaead932e94cae419738d7bd61e0ecc2f23216cb6102607904bf
3
+ metadata.gz: 79059c4a0ef1c026082b3ef2b25ee13c2310984a2049be4345f0345b226d2e99
4
+ data.tar.gz: 32d02d444ef3553df02c2764073b1d9f5cbdabea272ee1d2b3eb9e80a5a29434
5
5
  SHA512:
6
- metadata.gz: 4f56da51ab47060e7d58b3ac469ca15501b2b457a936b634b361afa0f832a27130c74169d7c1da4f3a188f56db58335c59325de261990ee02bbdbd1431ad24da
7
- data.tar.gz: 77dc4e5cdb5d0098ba8f91279bc4d658c653130313fbb49de52c3e2c2b707c4501fcf77a31f2a6a54b0b48ca12e54ab674423ab377e22e043a6e6cce0093a6df
6
+ metadata.gz: 56d89149672219eb082eb188d2d79fde2d97931efe8295983719a5693c5b96c07e607a309d03690e8a739a2a3018f73c7d0fbd18da6003cbafa7969633890b2e
7
+ data.tar.gz: f37a6061b4b318cb19423ab4de961a98b625d1e17e6f85eb78524794a967a2f5dda4bceaa5c5414b2df33a359c9c0fd5a8e8a7bfe22c4f872e41a9e9c5f46d48
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CoolCrawler
4
- VERSION = "0.4.3"
4
+ VERSION = "0.4.4"
5
5
  end
data/lib/cool_crawler.rb CHANGED
@@ -31,7 +31,7 @@ module CoolCrawler
31
31
  end
32
32
 
33
33
  def run
34
- until queue.empty?
34
+ until queue.empty? || @visited_pages >= @max_pages
35
35
  send_crawlers
36
36
  sleep(delay)
37
37
  end
@@ -43,7 +43,10 @@ module CoolCrawler
43
43
 
44
44
  def send_crawlers
45
45
  pages = []
46
- pages << queue.pop until queue.empty? || pages.size >= max_connections
46
+ until queue.empty? || pages.size >= max_connections || @visited_pages >= @max_pages
47
+ pages << queue.pop
48
+ @visited_pages += 1
49
+ end
47
50
  Async do
48
51
  internet = Async::HTTP::Internet.new
49
52
  barrier = Async::Barrier.new
@@ -76,9 +79,7 @@ module CoolCrawler
76
79
  next if a["href"].nil?
77
80
  uri_a = URI(a["href"].strip.split('#')[0].sub(/\\|(\s+$)/, ""))
78
81
  begin
79
- if @visited_pages <= @max_pages
80
- links << URI.join(page, uri_a).path if (uri_a.host == uri.host || uri_a.host.nil?) && uri_a.path
81
- end
82
+ links << URI.join(page, uri_a).path if (uri_a.host == uri.host || uri_a.host.nil?) && uri_a.path
82
83
  rescue
83
84
  # do nothing
84
85
  end
@@ -104,7 +105,6 @@ module CoolCrawler
104
105
  else
105
106
  visited[path] = 1
106
107
  end
107
- @visited_pages += 1
108
108
  end
109
109
 
110
110
  def sorted_visited
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coolCrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - William Wright