spidermech 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -5
- data/lib/spidermech.rb +0 -1
- data/spidermech.gemspec +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 470adf10493e1607b18798221f1d43a83ddfb06b
|
4
|
+
data.tar.gz: b4fed7921f1a540c49f11c405fe3c404ece38978
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb6ffec034eeb8fb1e019dd53553aabfacb5a1bf5111d7d308fe340986ee39eb8ea7558046e634c017d3b91641702085ceb47cc6cd30861ecd2f5fe70ec579f5
|
7
|
+
data.tar.gz: 5c752359c2b958aacf2d3d39fdfca31e0224f816024f6360ca352c6f80a9d2ba7d8ed416fb5e607af19867de45eda7ace5af437af49ca42e59b730f218a1d8e9
|
data/README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
#
|
1
|
+
# SpiderMech
|
2
2
|
|
3
|
-
|
3
|
+
SpiderMech crawls a given domain, and reports on the pages linked to from given urls, and the assets that said page depends on.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
7
7
|
Add this line to your application's Gemfile:
|
8
8
|
|
9
|
-
gem '
|
9
|
+
gem 'spidermech'
|
10
10
|
|
11
11
|
And then execute:
|
12
12
|
|
@@ -18,16 +18,34 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Gem Usage
|
20
20
|
|
21
|
-
|
21
|
+
require 'spidermech'
|
22
|
+
spider = SpiderMech.new 'http://google.com'
|
23
|
+
spider.run # returns the sitemap hash
|
24
|
+
spider.save_json # saves the sitemap hash as google.com.json
|
22
25
|
|
23
26
|
## Command Line Usage
|
24
27
|
|
25
28
|
The gem provides a command line tool. You can invoke it via
|
26
29
|
|
27
|
-
bundle exec
|
30
|
+
bundle exec spidermech http://google.com
|
28
31
|
|
29
32
|
It will crawl the page and give you the appropriate output.
|
30
33
|
|
34
|
+
## Sample Output
|
35
|
+
|
36
|
+
[{:url=>"http://localhost:8321",
|
37
|
+
:assets=>
|
38
|
+
{:scripts=>["https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", "http://getbootstrap.com/dist/js/bootstrap.min.js"],
|
39
|
+
|
40
|
+
:images=>[],
|
41
|
+
|
42
|
+
:css=>
|
43
|
+
["http://getbootstrap.com/dist/css/bootstrap.min.css", "http://getbootstrap.com/examples/starter-template/starter-template.css"]},
|
44
|
+
|
45
|
+
:links
|
46
|
+
=>["/", "/about.html", "/contact.html"]},
|
47
|
+
]
|
48
|
+
|
31
49
|
## Contributing
|
32
50
|
|
33
51
|
1. Fork it ( http://github.com/<my-github-username>/crawler/fork )
|
data/lib/spidermech.rb
CHANGED
data/spidermech.gemspec
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "spidermech"
|
7
|
-
spec.version = '0.0.
|
7
|
+
spec.version = '0.0.2'
|
8
8
|
spec.authors = ["Caleb Albritton"]
|
9
9
|
spec.email = ["ithinkincode@gmail.com"]
|
10
10
|
spec.summary = "Single URL crawler."
|