coolCrawler 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7aecaddb63af3eb7a621f2cd75472291e0376ca57ef30ec229edcbe274bc704b
4
- data.tar.gz: f42a0b9230a6886169cc7dd35f25f03dcb8b1f6d53ec5515c32ed647e2249ee5
3
+ metadata.gz: 035e8dbe27d4648d16243fea96c692b4a9bde8973d13df02b66aad3aa742afc3
4
+ data.tar.gz: 687985a3d0e391aedc610ecae0329299a36c8edc90704b9f9d92118532d3d7cc
5
5
  SHA512:
6
- metadata.gz: 36442036c64c324adf090c7ccb4b6cc474875471ac1dc54ead21604e9de8209036c608e18d0b83bf97e37281720c66cf5b4a8084fb97ed7d81aa7aa1f58e7387
7
- data.tar.gz: 38a40361a3ee2d1b34efbf21e482fa4663f89d20731490e1ebd2e6e014cd66537cd95e0da9fdef14ec33d02f59d0aa054fcca383e158e2ed9fbe544501771ce6
6
+ metadata.gz: 224908b5e8f495063ddc81c178f10e276b2744c8d8b71b16538b3ac2eed70978e686cecc1cbf900d3380a1919ec5e438dd0a1dc559b7d6abbde30e9bd44f819e
7
+ data.tar.gz: 0ee4aa741315bdd8a2774ee28b762a521f12489356621efe6cf38fab66898a2210313b7e8f07eb74ebd95492b20b80e6548d84a0333f10ac8c09ef6e96410b53
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module CoolCrawler
4
- VERSION = "0.4.0"
4
+ VERSION = "0.4.2"
5
5
  end
data/lib/cool_crawler.rb CHANGED
@@ -13,11 +13,13 @@ module CoolCrawler
13
13
  # This is the class that handles the queue and async requests
14
14
  class CrawlerPool
15
15
 
16
- def initialize(start, max_connections, delay)
16
+ def initialize(start, max_connections, delay, max_pages=50)
17
17
  @uri = URI(start)
18
+ @max_pages = max_pages
18
19
  @site = "#{uri.scheme}://#{uri.host}"
19
20
  @max_connections = max_connections
20
21
  @delay = delay
22
+ @visited_pages = 0
21
23
  visited[uri.path] = 1
22
24
  queue << uri.path
23
25
  end
@@ -54,6 +56,7 @@ module CoolCrawler
54
56
  after(page, links, body)
55
57
  links.each do |link|
56
58
  enqueue(link)
59
+ @visited_pages += 1
57
60
  add_to_visited(link)
58
61
  end
59
62
  end
@@ -104,7 +107,18 @@ module CoolCrawler
104
107
  end
105
108
 
106
109
  def enqueue(path)
107
- queue << path unless visited.include?(path)
110
+ unless visited.include?(path) or @visited_pages > @max_pages
111
+ queue << path
112
+ p queue.size
113
+ end
114
+ end
115
+
116
+ def sum_pages
117
+ sum = 0
118
+ visited.each do |_k, v|
119
+ sum += v
120
+ end
121
+ sum
108
122
  end
109
123
  end
110
124
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: coolCrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - William Wright
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-10-14 00:00:00.000000000 Z
11
+ date: 2022-10-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec