sutch-anemone 0.7.2 → 0.7.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG.rdoc +2 -0
- data/README.rdoc +2 -0
- data/lib/anemone/core.rb +3 -2
- data/lib/anemone/storage/sqlite3.rb +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OGEwMGMxZjNmMDdkNmUxNmU3ZGM2ZTNiNDAzMmRhZDlhNzQ1MmIyYg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZTcxMGJlZTgyNmEzY2U4OTA3MmUxYTY1MjQwYjRmYWViNzhmODhiMA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NTFhOWRjMzk5NjE4MmU4Zjk5ODYyNmQzN2U2OWRiZGQ0OWI0YTE0NmNiMzhi
|
10
|
+
MTQ5MmQwOWUyYWJjMDkxYTBmY2RiN2QwOTAwYmI3YzY1NjM2YWI0ZTU2ZDAy
|
11
|
+
OGI2YjJmMjUwYmRhZjNlYWI3ZjYxMzAxNzdlOWQzYzQwNDZiZjA=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
M2ZkNzkyZmNmMTNkMDJkYjc2ODIwNWNlOGM3OTdlOWFkZjJmMjg1ZTJiYzkz
|
14
|
+
YmVlYTYzZGNlZDA0M2I5MzA4MTk5NjVmYmI5ZDBhOTg0MGVlODE2ZjYyNmRm
|
15
|
+
MDAyZTVhNDc0NWU2M2I0YzM1ZmRhZWFkOTMxNjZjNmNmZmQwOWU=
|
data/CHANGELOG.rdoc
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
== sutch's branch
|
4
4
|
|
5
|
+
* Changed SQLite3 timeout to allow for other clients to read from database
|
6
|
+
* Changed for wmonk: allow restarting of spider by not checking starting URLs
|
5
7
|
* Added Anemone::Resource to provide for spidering of resources other than HTML pages
|
6
8
|
|
7
9
|
== 0.7.2 / 2012-05-30
|
data/README.rdoc
CHANGED
@@ -6,6 +6,8 @@ write your own specialized spider tasks quickly and easily.
|
|
6
6
|
|
7
7
|
See http://anemone.rubyforge.org for more information.
|
8
8
|
|
9
|
+
This branch of Anemone, sutch-anemone, has been enhanced for {wmonk}[https://github.com/sutch/wmonk].
|
10
|
+
|
9
11
|
== Features
|
10
12
|
* Multi-threaded design for high performance
|
11
13
|
* Tracks 301 HTTP redirects
|
data/lib/anemone/core.rb
CHANGED
@@ -168,8 +168,9 @@ module Anemone
|
|
168
168
|
def run
|
169
169
|
process_options
|
170
170
|
|
171
|
-
|
172
|
-
|
171
|
+
# trust that we're provided with URLs that have not yet been requested
|
172
|
+
#@urls.delete_if { |url| !visit_link?(url) }
|
173
|
+
#return if @urls.empty?
|
173
174
|
|
174
175
|
link_queue = build_queue(@opts[:link_queue_size_limit])
|
175
176
|
page_queue = build_queue(@opts[:page_queue_size_limit])
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sutch-anemone
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.2
|
4
|
+
version: 0.7.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Kite (Dennis Sutch's fork)
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|