spidermech 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +37 -0
- data/Rakefile +8 -0
- data/bin/spidermech +15 -0
- data/lib/spidermech.rb +134 -0
- data/spec/spec_helper.rb +28 -0
- data/spec/spidermech_spec.rb +66 -0
- data/spidermech.gemspec +27 -0
- data/test_site/about.html +67 -0
- data/test_site/contact.html +67 -0
- data/test_site/css_only.html +1 -0
- data/test_site/empty.html +0 -0
- data/test_site/image_only.html +1 -0
- data/test_site/index.html +67 -0
- data/test_site/script_only.html +1 -0
- metadata +150 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 637a38fca20a36c523b57ae807ffa30b1b9d63f3
|
4
|
+
data.tar.gz: 9b417e4ce15fea26b72127c4b4b6624f0ac85847
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 11c1e177153ab63db942a826ab877242997d9d22e2e8971cf87d391fc7969e00d82475f504724054c4722bbc981c8257150f4c75ba19d9bd59013b38f6bff6b3
|
7
|
+
data.tar.gz: fed4c3d45c6949e190239e435eab66aa74ae5c13e613be34a72edf3eb16d669bf495095d0804c7c07d4ed94470d5db2228cc7806c3e3dfcb1e89d12e43f8a58f
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.0
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Caleb Albritton
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# Crawler
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'crawler'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install spidermech
|
18
|
+
|
19
|
+
## Gem Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Command Line Usage
|
24
|
+
|
25
|
+
The gem provides a command line tool. You can invoke it via
|
26
|
+
|
27
|
+
bundle exec crawl http://google.com
|
28
|
+
|
29
|
+
It will crawl the page and give you the appropriate output.
|
30
|
+
|
31
|
+
## Contributing
|
32
|
+
|
33
|
+
1. Fork it ( http://github.com/<my-github-username>/crawler/fork )
|
34
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
35
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
36
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
37
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bin/spidermech
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << File.join(Dir.getwd, 'lib')
|
4
|
+
|
5
|
+
require 'uri'
|
6
|
+
require 'json'
|
7
|
+
require 'spidermech'
|
8
|
+
|
9
|
+
raise ArgumentError, 'You must provide a url to crawl.' unless !ARGV.empty?
|
10
|
+
|
11
|
+
url = ARGV[0]
|
12
|
+
|
13
|
+
spider = SpiderMech.new url
|
14
|
+
spider.run
|
15
|
+
spider.save_json
|
data/lib/spidermech.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
require 'logger'
|
3
|
+
require 'json'
|
4
|
+
require 'pry'
|
5
|
+
|
6
|
+
class SpiderMech
|
7
|
+
attr_reader :queue
|
8
|
+
attr_reader :crawled
|
9
|
+
attr_reader :data
|
10
|
+
|
11
|
+
def initialize(start_page)
|
12
|
+
@logger = Logger.new 'spidermech.log'
|
13
|
+
@start_page = start_page
|
14
|
+
|
15
|
+
@queue = []
|
16
|
+
@crawled = []
|
17
|
+
@data = []
|
18
|
+
|
19
|
+
@queue << @start_page
|
20
|
+
|
21
|
+
@bot = Mechanize.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def left_in_queue
|
25
|
+
i = 0
|
26
|
+
|
27
|
+
@queue.each do |link|
|
28
|
+
if @crawled.include? link
|
29
|
+
# we don't need to crawl this one
|
30
|
+
else
|
31
|
+
i += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
i
|
36
|
+
end
|
37
|
+
|
38
|
+
def save_json
|
39
|
+
filename = "#{URI.parse(@start_page).host}.json"
|
40
|
+
@logger.info "Writing sitemap data to #{filename}"
|
41
|
+
json = @data.to_json
|
42
|
+
File.open(filename, 'w') { |f| f.write json }
|
43
|
+
end
|
44
|
+
|
45
|
+
def run
|
46
|
+
while !@queue.empty?
|
47
|
+
crawl
|
48
|
+
end
|
49
|
+
|
50
|
+
@data
|
51
|
+
end
|
52
|
+
|
53
|
+
def crawl
|
54
|
+
url = @queue.shift
|
55
|
+
|
56
|
+
if @crawled.include? url
|
57
|
+
# @logger.warn "Already crawled #{url}"
|
58
|
+
return
|
59
|
+
else
|
60
|
+
@logger.info "Crawling #{url}"
|
61
|
+
@logger.info "Left in Queue: #{left_in_queue}"
|
62
|
+
end
|
63
|
+
|
64
|
+
page = @bot.get url
|
65
|
+
|
66
|
+
if page.class != Mechanize::Page
|
67
|
+
@logger.info "File crawling is not supported."
|
68
|
+
return
|
69
|
+
end
|
70
|
+
|
71
|
+
@crawled << url
|
72
|
+
|
73
|
+
# get all the assets
|
74
|
+
data = {
|
75
|
+
:url => url,
|
76
|
+
:assets => {
|
77
|
+
:scripts => find_scripts(page),
|
78
|
+
:images => find_images(page),
|
79
|
+
:css => find_css(page)
|
80
|
+
},
|
81
|
+
|
82
|
+
:links => []
|
83
|
+
}
|
84
|
+
|
85
|
+
page.links.each do |link|
|
86
|
+
|
87
|
+
begin
|
88
|
+
if link.href[0] == '/' # this is a relative link
|
89
|
+
@queue << link.href
|
90
|
+
data[:links] << link.href
|
91
|
+
elsif link.href[0..@start_page.length] == @start_page # still part of this domain
|
92
|
+
@queue << link.href
|
93
|
+
data[:links] << link.href
|
94
|
+
else
|
95
|
+
# @logger.info "This link did not fall under our jurisdiction: #{link.href}"
|
96
|
+
end
|
97
|
+
rescue Exception => e
|
98
|
+
# @logger.error e
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
@data << data
|
103
|
+
end
|
104
|
+
|
105
|
+
def find_scripts(page)
|
106
|
+
page.search('script').map do |script|
|
107
|
+
begin
|
108
|
+
script.attributes['src'].value
|
109
|
+
rescue Exception => e
|
110
|
+
# @logger.error e
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def find_images(page)
|
116
|
+
page.search('img').map do |img|
|
117
|
+
begin
|
118
|
+
img.attributes['src'].value
|
119
|
+
rescue Exception => e
|
120
|
+
# @logger.error e
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def find_css(page)
|
126
|
+
page.search('link').map do |css|
|
127
|
+
begin
|
128
|
+
css.attributes['href'].value
|
129
|
+
rescue Exception => e
|
130
|
+
# @logger.error e
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spidermech'
|
2
|
+
require 'webrick'
|
3
|
+
|
4
|
+
web_server = WEBrick::HTTPServer.new :Port => 8321, :DocumentRoot => Dir.pwd + '/test_site', :AccessLog => []
|
5
|
+
|
6
|
+
server_thread = Thread.new do
|
7
|
+
web_server.start
|
8
|
+
end
|
9
|
+
|
10
|
+
sleep 1 # wait a sec for the server to start
|
11
|
+
|
12
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
13
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
14
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
15
|
+
# loaded once.
|
16
|
+
#
|
17
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
18
|
+
RSpec.configure do |config|
|
19
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
20
|
+
config.run_all_when_everything_filtered = true
|
21
|
+
config.filter_run :focus
|
22
|
+
|
23
|
+
# Run specs in random order to surface order dependencies. If you find an
|
24
|
+
# order dependency and want to debug it, you can fix the order by providing
|
25
|
+
# the seed, which is printed after each run.
|
26
|
+
# --seed 1234
|
27
|
+
config.order = 'random'
|
28
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe SpiderMech do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@example_dataset = [{:url=>"http://localhost:8321", :assets=>{:scripts=>["https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", "http://getbootstrap.com/dist/js/bootstrap.min.js"], :images=>[], :css=>["http://getbootstrap.com/dist/css/bootstrap.min.css", "http://getbootstrap.com/examples/starter-template/starter-template.css"]}, :links=>["/", "/about.html", "/contact.html"]},
|
7
|
+
{:url=>"/", :assets=>{:scripts=>["https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", "http://getbootstrap.com/dist/js/bootstrap.min.js"], :images=>[], :css=>["http://getbootstrap.com/dist/css/bootstrap.min.css", "http://getbootstrap.com/examples/starter-template/starter-template.css"]}, :links=>["/", "/about.html", "/contact.html"]},
|
8
|
+
{:url=>"/about.html", :assets=>{:scripts=>["https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", "http://getbootstrap.com/dist/js/bootstrap.min.js"], :images=>[], :css=>["http://getbootstrap.com/dist/css/bootstrap.min.css", "http://getbootstrap.com/examples/starter-template/starter-template.css"]}, :links=>["/", "/about.html", "/contact.html"]},
|
9
|
+
{:url=>"/contact.html", :assets=>{:scripts=>["https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", "http://getbootstrap.com/dist/js/bootstrap.min.js"], :images=>[], :css=>["http://getbootstrap.com/dist/css/bootstrap.min.css", "http://getbootstrap.com/examples/starter-template/starter-template.css"]}, :links=>["/", "/about.html", "/contact.html"]}
|
10
|
+
]
|
11
|
+
|
12
|
+
@empty_dataset = [{:url=>"http://localhost:8321/empty.html", :assets=>{:scripts=>[], :images=>[], :css=>[]}, :links=>[]}]
|
13
|
+
end
|
14
|
+
|
15
|
+
after(:all) do #cleanup
|
16
|
+
File.delete 'spidermech.log'
|
17
|
+
File.delete 'localhost.json'
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should crawl a site' do
|
21
|
+
spider = SpiderMech.new 'http://localhost:8321'
|
22
|
+
data = spider.run
|
23
|
+
data.should eq(@example_dataset)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should save a json file' do
|
27
|
+
spider = SpiderMech.new 'http://localhost:8321'
|
28
|
+
spider.run
|
29
|
+
spider.save_json
|
30
|
+
|
31
|
+
File.exist?('localhost.json').should eq(true)
|
32
|
+
|
33
|
+
file = File.open 'localhost.json', 'r'
|
34
|
+
data = JSON.parse file.read
|
35
|
+
data.should eq(JSON.parse(@example_dataset.to_json)) # quick way to change symbols to quotes to test equality
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'should return a hash with empty assets for empty page' do
|
39
|
+
|
40
|
+
spider = SpiderMech.new 'http://localhost:8321/empty.html'
|
41
|
+
data = spider.run
|
42
|
+
data.length.should eq(1) # should only have on result as it contains links to no other pages
|
43
|
+
data.should eq(@empty_dataset)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should only have one image' do
|
47
|
+
spider = SpiderMech.new 'http://localhost:8321/image_only.html'
|
48
|
+
data = spider.run
|
49
|
+
data.length.should eq(1)
|
50
|
+
data.first[:assets][:images].first.should eq('test.png')
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'should only have one script' do
|
54
|
+
spider = SpiderMech.new 'http://localhost:8321/script_only.html'
|
55
|
+
data = spider.run
|
56
|
+
data.length.should eq(1)
|
57
|
+
data.first[:assets][:scripts].first.should eq('stuffs.js')
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should only have one css file' do
|
61
|
+
spider = SpiderMech.new 'http://localhost:8321/css_only.html'
|
62
|
+
data = spider.run
|
63
|
+
data.length.should eq(1)
|
64
|
+
data.first[:assets][:css].first.should eq('style.css')
|
65
|
+
end
|
66
|
+
end
|
data/spidermech.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('./lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "spidermech"
|
7
|
+
spec.version = '0.0.1'
|
8
|
+
spec.authors = ["Caleb Albritton"]
|
9
|
+
spec.email = ["ithinkincode@gmail.com"]
|
10
|
+
spec.summary = "Single URL crawler."
|
11
|
+
spec.description = "Does things"
|
12
|
+
spec.homepage = "http://github.com/C0deMaver1ck/spidermech"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_runtime_dependency 'mechanize', "~> 2.7"
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
23
|
+
spec.add_development_dependency "pry", "~> 0.9"
|
24
|
+
spec.add_development_dependency "rake", "~> 10.2"
|
25
|
+
spec.add_development_dependency "rspec", "~> 2.14"
|
26
|
+
spec.add_development_dependency "webrick", "~> 1.3"
|
27
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<meta name="description" content="">
|
8
|
+
<meta name="author" content="">
|
9
|
+
|
10
|
+
<title>About</title>
|
11
|
+
|
12
|
+
<!-- Bootstrap core CSS -->
|
13
|
+
<link href="http://getbootstrap.com/dist/css/bootstrap.min.css" rel="stylesheet">
|
14
|
+
|
15
|
+
<!-- Custom styles for this template -->
|
16
|
+
<link href="http://getbootstrap.com/examples/starter-template/starter-template.css" rel="stylesheet">
|
17
|
+
|
18
|
+
<!-- Just for debugging purposes. Don't actually copy this line! -->
|
19
|
+
<!--[if lt IE 9]><script src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
|
20
|
+
|
21
|
+
<!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
|
22
|
+
<!--[if lt IE 9]>
|
23
|
+
<script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
|
24
|
+
<script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
|
25
|
+
<![endif]-->
|
26
|
+
</head>
|
27
|
+
|
28
|
+
<body>
|
29
|
+
|
30
|
+
<div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
|
31
|
+
<div class="container">
|
32
|
+
<div class="navbar-header">
|
33
|
+
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
34
|
+
<span class="sr-only">Toggle navigation</span>
|
35
|
+
<span class="icon-bar"></span>
|
36
|
+
<span class="icon-bar"></span>
|
37
|
+
<span class="icon-bar"></span>
|
38
|
+
</button>
|
39
|
+
<a class="navbar-brand" href="#">Project name</a>
|
40
|
+
</div>
|
41
|
+
<div class="collapse navbar-collapse">
|
42
|
+
<ul class="nav navbar-nav">
|
43
|
+
<li class="active"><a href="/">Home</a></li>
|
44
|
+
<li><a href="/about.html">About</a></li>
|
45
|
+
<li><a href="/contact.html">Contact</a></li>
|
46
|
+
</ul>
|
47
|
+
</div><!--/.nav-collapse -->
|
48
|
+
</div>
|
49
|
+
</div>
|
50
|
+
|
51
|
+
<div class="container">
|
52
|
+
|
53
|
+
<div class="starter-template">
|
54
|
+
<h1>Bootstrap starter template</h1>
|
55
|
+
<p class="lead">Use this document as a way to quickly start any new project.<br> All you get is this text and a mostly barebones HTML document.</p>
|
56
|
+
</div>
|
57
|
+
|
58
|
+
</div><!-- /.container -->
|
59
|
+
|
60
|
+
|
61
|
+
<!-- Bootstrap core JavaScript
|
62
|
+
================================================== -->
|
63
|
+
<!-- Placed at the end of the document so the pages load faster -->
|
64
|
+
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
|
65
|
+
<script src="http://getbootstrap.com/dist/js/bootstrap.min.js"></script>
|
66
|
+
</body>
|
67
|
+
</html>
|
@@ -0,0 +1,67 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<meta name="description" content="">
|
8
|
+
<meta name="author" content="">
|
9
|
+
|
10
|
+
<title>Contact</title>
|
11
|
+
|
12
|
+
<!-- Bootstrap core CSS -->
|
13
|
+
<link href="http://getbootstrap.com/dist/css/bootstrap.min.css" rel="stylesheet">
|
14
|
+
|
15
|
+
<!-- Custom styles for this template -->
|
16
|
+
<link href="http://getbootstrap.com/examples/starter-template/starter-template.css" rel="stylesheet">
|
17
|
+
|
18
|
+
<!-- Just for debugging purposes. Don't actually copy this line! -->
|
19
|
+
<!--[if lt IE 9]><script src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
|
20
|
+
|
21
|
+
<!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
|
22
|
+
<!--[if lt IE 9]>
|
23
|
+
<script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
|
24
|
+
<script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
|
25
|
+
<![endif]-->
|
26
|
+
</head>
|
27
|
+
|
28
|
+
<body>
|
29
|
+
|
30
|
+
<div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
|
31
|
+
<div class="container">
|
32
|
+
<div class="navbar-header">
|
33
|
+
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
34
|
+
<span class="sr-only">Toggle navigation</span>
|
35
|
+
<span class="icon-bar"></span>
|
36
|
+
<span class="icon-bar"></span>
|
37
|
+
<span class="icon-bar"></span>
|
38
|
+
</button>
|
39
|
+
<a class="navbar-brand" href="#">Project name</a>
|
40
|
+
</div>
|
41
|
+
<div class="collapse navbar-collapse">
|
42
|
+
<ul class="nav navbar-nav">
|
43
|
+
<li class="active"><a href="/">Home</a></li>
|
44
|
+
<li><a href="/about.html">About</a></li>
|
45
|
+
<li><a href="/contact.html">Contact</a></li>
|
46
|
+
</ul>
|
47
|
+
</div><!--/.nav-collapse -->
|
48
|
+
</div>
|
49
|
+
</div>
|
50
|
+
|
51
|
+
<div class="container">
|
52
|
+
|
53
|
+
<div class="starter-template">
|
54
|
+
<h1>Bootstrap starter template</h1>
|
55
|
+
<p class="lead">Use this document as a way to quickly start any new project.<br> All you get is this text and a mostly barebones HTML document.</p>
|
56
|
+
</div>
|
57
|
+
|
58
|
+
</div><!-- /.container -->
|
59
|
+
|
60
|
+
|
61
|
+
<!-- Bootstrap core JavaScript
|
62
|
+
================================================== -->
|
63
|
+
<!-- Placed at the end of the document so the pages load faster -->
|
64
|
+
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
|
65
|
+
<script src="http://getbootstrap.com/dist/js/bootstrap.min.js"></script>
|
66
|
+
</body>
|
67
|
+
</html>
|
@@ -0,0 +1 @@
|
|
1
|
+
<link rel="stylesheet" type="text/css" href="style.css">
|
File without changes
|
@@ -0,0 +1 @@
|
|
1
|
+
<img src='test.png'>
|
@@ -0,0 +1,67 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<meta name="description" content="">
|
8
|
+
<meta name="author" content="">
|
9
|
+
|
10
|
+
<title>Home</title>
|
11
|
+
|
12
|
+
<!-- Bootstrap core CSS -->
|
13
|
+
<link href="http://getbootstrap.com/dist/css/bootstrap.min.css" rel="stylesheet">
|
14
|
+
|
15
|
+
<!-- Custom styles for this template -->
|
16
|
+
<link href="http://getbootstrap.com/examples/starter-template/starter-template.css" rel="stylesheet">
|
17
|
+
|
18
|
+
<!-- Just for debugging purposes. Don't actually copy this line! -->
|
19
|
+
<!--[if lt IE 9]><script src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
|
20
|
+
|
21
|
+
<!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
|
22
|
+
<!--[if lt IE 9]>
|
23
|
+
<script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
|
24
|
+
<script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
|
25
|
+
<![endif]-->
|
26
|
+
</head>
|
27
|
+
|
28
|
+
<body>
|
29
|
+
|
30
|
+
<div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
|
31
|
+
<div class="container">
|
32
|
+
<div class="navbar-header">
|
33
|
+
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
34
|
+
<span class="sr-only">Toggle navigation</span>
|
35
|
+
<span class="icon-bar"></span>
|
36
|
+
<span class="icon-bar"></span>
|
37
|
+
<span class="icon-bar"></span>
|
38
|
+
</button>
|
39
|
+
<a class="navbar-brand" href="#">Project name</a>
|
40
|
+
</div>
|
41
|
+
<div class="collapse navbar-collapse">
|
42
|
+
<ul class="nav navbar-nav">
|
43
|
+
<li class="active"><a href="/">Home</a></li>
|
44
|
+
<li><a href="/about.html">About</a></li>
|
45
|
+
<li><a href="/contact.html">Contact</a></li>
|
46
|
+
</ul>
|
47
|
+
</div><!--/.nav-collapse -->
|
48
|
+
</div>
|
49
|
+
</div>
|
50
|
+
|
51
|
+
<div class="container">
|
52
|
+
|
53
|
+
<div class="starter-template">
|
54
|
+
<h1>Bootstrap starter template</h1>
|
55
|
+
<p class="lead">Use this document as a way to quickly start any new project.<br> All you get is this text and a mostly barebones HTML document.</p>
|
56
|
+
</div>
|
57
|
+
|
58
|
+
</div><!-- /.container -->
|
59
|
+
|
60
|
+
|
61
|
+
<!-- Bootstrap core JavaScript
|
62
|
+
================================================== -->
|
63
|
+
<!-- Placed at the end of the document so the pages load faster -->
|
64
|
+
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
|
65
|
+
<script src="http://getbootstrap.com/dist/js/bootstrap.min.js"></script>
|
66
|
+
</body>
|
67
|
+
</html>
|
@@ -0,0 +1 @@
|
|
1
|
+
<script src='stuffs.js'></script>
|
metadata
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: spidermech
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Caleb Albritton
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-04-04 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.7'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.5'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.5'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pry
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.9'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.9'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.2'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.2'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.14'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '2.14'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: webrick
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.3'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.3'
|
97
|
+
description: Does things
|
98
|
+
email:
|
99
|
+
- ithinkincode@gmail.com
|
100
|
+
executables:
|
101
|
+
- spidermech
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- ".rspec"
|
107
|
+
- ".ruby-version"
|
108
|
+
- Gemfile
|
109
|
+
- LICENSE.txt
|
110
|
+
- README.md
|
111
|
+
- Rakefile
|
112
|
+
- bin/spidermech
|
113
|
+
- lib/spidermech.rb
|
114
|
+
- spec/spec_helper.rb
|
115
|
+
- spec/spidermech_spec.rb
|
116
|
+
- spidermech.gemspec
|
117
|
+
- test_site/about.html
|
118
|
+
- test_site/contact.html
|
119
|
+
- test_site/css_only.html
|
120
|
+
- test_site/empty.html
|
121
|
+
- test_site/image_only.html
|
122
|
+
- test_site/index.html
|
123
|
+
- test_site/script_only.html
|
124
|
+
homepage: http://github.com/C0deMaver1ck/spidermech
|
125
|
+
licenses:
|
126
|
+
- MIT
|
127
|
+
metadata: {}
|
128
|
+
post_install_message:
|
129
|
+
rdoc_options: []
|
130
|
+
require_paths:
|
131
|
+
- lib
|
132
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - ">="
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
|
+
requirements:
|
139
|
+
- - ">="
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
requirements: []
|
143
|
+
rubyforge_project:
|
144
|
+
rubygems_version: 2.2.0
|
145
|
+
signing_key:
|
146
|
+
specification_version: 4
|
147
|
+
summary: Single URL crawler.
|
148
|
+
test_files:
|
149
|
+
- spec/spec_helper.rb
|
150
|
+
- spec/spidermech_spec.rb
|