coolCrawler 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a8c2944180ee7c5d7f1241fd62bf5e89973a2fca742c51567e1550f14f95f70a
4
- data.tar.gz: 6635f39af2babaead932e94cae419738d7bd61e0ecc2f23216cb6102607904bf
3
+ metadata.gz: 79059c4a0ef1c026082b3ef2b25ee13c2310984a2049be4345f0345b226d2e99
4
+ data.tar.gz: 32d02d444ef3553df02c2764073b1d9f5cbdabea272ee1d2b3eb9e80a5a29434
5
5
  SHA512:
6
- metadata.gz: 4f56da51ab47060e7d58b3ac469ca15501b2b457a936b634b361afa0f832a27130c74169d7c1da4f3a188f56db58335c59325de261990ee02bbdbd1431ad24da
7
- data.tar.gz: 77dc4e5cdb5d0098ba8f91279bc4d658c653130313fbb49de52c3e2c2b707c4501fcf77a31f2a6a54b0b48ca12e54ab674423ab377e22e043a6e6cce0093a6df
6
+ metadata.gz: 56d89149672219eb082eb188d2d79fde2d97931efe8295983719a5693c5b96c07e607a309d03690e8a739a2a3018f73c7d0fbd18da6003cbafa7969633890b2e
7
+ data.tar.gz: f37a6061b4b318cb19423ab4de961a98b625d1e17e6f85eb78524794a967a2f5dda4bceaa5c5414b2df33a359c9c0fd5a8e8a7bfe22c4f872e41a9e9c5f46d48
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CoolCrawler
4
- VERSION = "0.4.3"
4
+ VERSION = "0.4.4"
5
5
  end
data/lib/cool_crawler.rb CHANGED
@@ -31,7 +31,7 @@ module CoolCrawler
31
31
  end
32
32
 
33
33
  def run
34
- until queue.empty?
34
+ until queue.empty? || @visited_pages >= @max_pages
35
35
  send_crawlers
36
36
  sleep(delay)
37
37
  end
@@ -43,7 +43,10 @@ module CoolCrawler
43
43
 
44
44
  def send_crawlers
45
45
  pages = []
46
- pages << queue.pop until queue.empty? || pages.size >= max_connections
46
+ until queue.empty? || pages.size >= max_connections || @visited_pages >= @max_pages
47
+ pages << queue.pop
48
+ @visited_pages += 1
49
+ end
47
50
  Async do
48
51
  internet = Async::HTTP::Internet.new
49
52
  barrier = Async::Barrier.new
@@ -76,9 +79,7 @@ module CoolCrawler
76
79
  next if a["href"].nil?
77
80
  uri_a = URI(a["href"].strip.split('#')[0].sub(/\\|(\s+$)/, ""))
78
81
  begin
79
- if @visited_pages <= @max_pages
80
- links << URI.join(page, uri_a).path if (uri_a.host == uri.host || uri_a.host.nil?) && uri_a.path
81
- end
82
+ links << URI.join(page, uri_a).path if (uri_a.host == uri.host || uri_a.host.nil?) && uri_a.path
82
83
  rescue
83
84
  # do nothing
84
85
  end
@@ -104,7 +105,6 @@ module CoolCrawler
104
105
  else
105
106
  visited[path] = 1
106
107
  end
107
- @visited_pages += 1
108
108
  end
109
109
 
110
110
  def sorted_visited
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coolCrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - William Wright