speed_spider 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -5
- data/bin/{speed_spider → spider} +0 -0
- data/lib/speed_spider/cli.rb +1 -1
- data/lib/speed_spider/version.rb +1 -1
- data/speed_spider.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86a431c73577bdaa232e9b9d0139f908ed64c0fe
|
4
|
+
data.tar.gz: 4dbdb508c96cad39a4c9da65ed2175135dcdd4c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 624c2c4ad8693a99dffe6637410918d3144dd4217d9cd6b81852f17ac7bd0ac7e8eafa1dfdabb5fdba3f895036edc3e8c2477e08573f39478d826b433713114f
|
7
|
+
data.tar.gz: c2d2285bd6eab018ea4d1f7a48a708bbb7e1db49f7f047da57b5a42a5e5de3f1cec5c2e3ee0240d08a6f5888093c00c596a1617797a37582d0bbb10b47256f67
|
data/README.md
CHANGED
@@ -9,7 +9,7 @@ SpeedSpider is based on ruby spider framework [Anemone][1], it's easy to use and
|
|
9
9
|
### links in html pages
|
10
10
|
|
11
11
|
* link, xpath: `//a[@href]`
|
12
|
-
* stylesheet, xpath: `//link[@
|
12
|
+
* stylesheet, xpath: `//link[@href]`
|
13
13
|
* javascript, xpath: `//script[@src]`
|
14
14
|
* iframe file, xpath: `//iframe[@src]`
|
15
15
|
* image file, xpath: `//img[@src]`
|
@@ -25,7 +25,7 @@ install it with rubygem:
|
|
25
25
|
gem install 'speed_spider'
|
26
26
|
|
27
27
|
## Usage
|
28
|
-
Usage:
|
28
|
+
Usage: spider [options] start_url
|
29
29
|
|
30
30
|
options:
|
31
31
|
-S, --slient slient output
|
@@ -46,13 +46,13 @@ install it with rubygem:
|
|
46
46
|
|
47
47
|
## Example
|
48
48
|
|
49
|
-
|
49
|
+
spider http://twitter.github.io/bootstrap/
|
50
50
|
|
51
51
|
It will download all files within the same domain as `twitter.github.io`, and save to `download/twitter.github.io/`.
|
52
52
|
|
53
|
-
|
53
|
+
spider -b http://ruby-doc.org/core-2.0/ http://ruby-doc.org/core-2.0/
|
54
54
|
|
55
|
-
It will only download urls start with http://ruby-doc.org/core-2.0
|
55
|
+
It will only download urls start with `http://ruby-doc.org/core-2.0/`, notice `assets` files like image, css, js, font will not obey `base_url` rule.
|
56
56
|
|
57
57
|
## Contributing
|
58
58
|
|
data/bin/{speed_spider → spider}
RENAMED
File without changes
|
data/lib/speed_spider/cli.rb
CHANGED
data/lib/speed_spider/version.rb
CHANGED
data/speed_spider.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.version = SpeedSpider::VERSION
|
9
9
|
spec.authors = ["Ryan Wang"]
|
10
10
|
spec.email = ["wongyouth@gmail.com"]
|
11
|
-
spec.description = %q{A simple web spider
|
11
|
+
spec.description = %q{A simple and speedy web spider for pages downloading}
|
12
12
|
spec.summary = %q{A simple web spider tool for download pages from a base url including css js html and iframe source files}
|
13
13
|
spec.homepage = ""
|
14
14
|
spec.license = "MIT"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: speed_spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Wang
|
@@ -52,11 +52,11 @@ dependencies:
|
|
52
52
|
- - '>='
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
description: A simple web spider
|
55
|
+
description: A simple and speedy web spider for pages downloading
|
56
56
|
email:
|
57
57
|
- wongyouth@gmail.com
|
58
58
|
executables:
|
59
|
-
-
|
59
|
+
- spider
|
60
60
|
extensions: []
|
61
61
|
extra_rdoc_files: []
|
62
62
|
files:
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- LICENSE.txt
|
66
66
|
- README.md
|
67
67
|
- Rakefile
|
68
|
-
- bin/
|
68
|
+
- bin/spider
|
69
69
|
- lib/speed_spider.rb
|
70
70
|
- lib/speed_spider/anemone_hack.rb
|
71
71
|
- lib/speed_spider/cli.rb
|