grucrawler 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c5723aa43bafd68af0aa0499c9f748bfca57a580
4
- data.tar.gz: 3e7046a91e6594c17e987bfb8d934402ee2826dc
3
+ metadata.gz: ed98643e2c2212bb044726cf3dda3af19a06ba8d
4
+ data.tar.gz: 70fadbce9333a32831ba5c5150916cecfeda5bf6
5
5
  SHA512:
6
- metadata.gz: b17e8fdd06816a6e4a1294a9dbf384d95519d1f4046d2d1c4bbf2e4c5821b4839f26e4290e4d655431a1f23beb9fb918c729ae34c6d56c4a6e71e0daf67a6520
7
- data.tar.gz: 1f253a2fe81b1ae537a0c4de4ed9626261f8f2b72567959839550b883cf41b0c952e164ca1b4ff7a8d1a8378cb7a76a6acfcee9573da48219164069659da67df
6
+ metadata.gz: 778c8ae783b46a8cb82448156da1741c4780d10148a673702071fe53ff896e587106dc66c54df52c642b8f29d5d69d16060cd20e2f6df466c6deece3cf0d8a0f
7
+ data.tar.gz: fcfb89156687d864a66f955d9c309b5ccbc0b2cd0a88a4b94802a1d5cab52d34399f73071f1783ca930f4df510e5dba617d25e30272d5d257c4e8dc522eb2f87
data/README.md CHANGED
@@ -10,7 +10,8 @@ class ItalianCrawler
10
10
  visit_urls_only_once: true,
11
11
  follow_redirects: true,
12
12
  concurrency: 5,
13
- domain_wait: 20 # seconds between visits to the same domain
13
+ domain_wait: 20, # seconds between visits to the same domain
14
+ max_page_size: 1000000
14
15
  }
15
16
  end
16
17
 
@@ -1,3 +1,3 @@
1
1
  class GruCrawler
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/grucrawler.rb CHANGED
@@ -56,6 +56,11 @@ class GruCrawler
56
56
 
57
57
  crawl_more()
58
58
 
59
+ if response.body.length > (@options[:max_page_size] || 1000*1000*1000)
60
+ @crawler.debug("URL response size too big: #{response.body.length} from #{response.request.url}")
61
+ return
62
+ end
63
+
59
64
  nokogiri = Nokogiri::HTML(response.body)
60
65
 
61
66
  begin
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grucrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Slava Vishnyakov