sutch-anemone 0.7.2 → 0.7.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YWExNTI1ZGRiMjA0NTZhNzk3Y2YyMDYzNDcxMDQwNTk4NDJkMGI2NQ==
4
+ OGEwMGMxZjNmMDdkNmUxNmU3ZGM2ZTNiNDAzMmRhZDlhNzQ1MmIyYg==
5
5
  data.tar.gz: !binary |-
6
- YWE2MDg1OTQzNTEyOTZiZTJjMTUzNWZiMzgxNTBjMDJmMzBkYjYzZQ==
6
+ ZTcxMGJlZTgyNmEzY2U4OTA3MmUxYTY1MjQwYjRmYWViNzhmODhiMA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- YWY1MjliMGJjMzRhZGQ1OTVmYWVlYzI3YmU3YTI1ZGVjMDk5NjAwZGEwZmJh
10
- MzA0Y2Q4ZWMwODM3MzgyMTU5ZTk2NTE3ZDFhNDc4MDBmOWZjNWViOWQ1Nzlk
11
- ZDU4OTA4Y2VkNWI1MDA1ZjUyNWQ2YzJkMDA3YmJiNGQwZTczMGM=
9
+ NTFhOWRjMzk5NjE4MmU4Zjk5ODYyNmQzN2U2OWRiZGQ0OWI0YTE0NmNiMzhi
10
+ MTQ5MmQwOWUyYWJjMDkxYTBmY2RiN2QwOTAwYmI3YzY1NjM2YWI0ZTU2ZDAy
11
+ OGI2YjJmMjUwYmRhZjNlYWI3ZjYxMzAxNzdlOWQzYzQwNDZiZjA=
12
12
  data.tar.gz: !binary |-
13
- MjYzODE4ZTIwYjM5OTljMDY2NTdkNzRiY2FlOWJkOGMxNmZjNzE4M2JkN2Fk
14
- YjQ4MjE5NjM2NjllZmJhODc4M2UzYjYwYTZhY2ZhZWRiYzgwZjk4MDZmYzEy
15
- N2FkMjZiZjdiMmI2NWI2N2I3MDUyNWM2YmI0YTIyODAzZmQ1Yzg=
13
+ M2ZkNzkyZmNmMTNkMDJkYjc2ODIwNWNlOGM3OTdlOWFkZjJmMjg1ZTJiYzkz
14
+ YmVlYTYzZGNlZDA0M2I5MzA4MTk5NjVmYmI5ZDBhOTg0MGVlODE2ZjYyNmRm
15
+ MDAyZTVhNDc0NWU2M2I0YzM1ZmRhZWFkOTMxNjZjNmNmZmQwOWU=
data/CHANGELOG.rdoc CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  == sutch's branch
4
4
 
5
+ * Changed SQLite3 timeout to allow for other clients to read from database
6
+ * Changed for wmonk: allow restarting of spider by not checking starting URLs
5
7
  * Added Anemone::Resource to provide for spidering of resources other than HTML pages
6
8
 
7
9
  == 0.7.2 / 2012-05-30
data/README.rdoc CHANGED
@@ -6,6 +6,8 @@ write your own specialized spider tasks quickly and easily.
6
6
 
7
7
  See http://anemone.rubyforge.org for more information.
8
8
 
9
+ This branch of Anemone, sutch-anemone, has been enhanced for {wmonk}[https://github.com/sutch/wmonk].
10
+
9
11
  == Features
10
12
  * Multi-threaded design for high performance
11
13
  * Tracks 301 HTTP redirects
data/lib/anemone/core.rb CHANGED
@@ -168,8 +168,9 @@ module Anemone
168
168
  def run
169
169
  process_options
170
170
 
171
- @urls.delete_if { |url| !visit_link?(url) }
172
- return if @urls.empty?
171
+ # trust that we're provided with URLs that have not yet been requested
172
+ #@urls.delete_if { |url| !visit_link?(url) }
173
+ #return if @urls.empty?
173
174
 
174
175
  link_queue = build_queue(@opts[:link_queue_size_limit])
175
176
  page_queue = build_queue(@opts[:page_queue_size_limit])
@@ -11,6 +11,7 @@ module Anemone
11
11
 
12
12
  def initialize(file)
13
13
  @db = ::SQLite3::Database.new(file)
14
+ @db.busy_timeout = 4000
14
15
  create_schema
15
16
  end
16
17
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sutch-anemone
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.7.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Kite (Dennis Sutch's fork)
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-01 00:00:00.000000000 Z
11
+ date: 2013-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri