spidermech 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +37 -0
- data/Rakefile +8 -0
- data/bin/spidermech +15 -0
- data/lib/spidermech.rb +134 -0
- data/spec/spec_helper.rb +28 -0
- data/spec/spidermech_spec.rb +66 -0
- data/spidermech.gemspec +27 -0
- data/test_site/about.html +67 -0
- data/test_site/contact.html +67 -0
- data/test_site/css_only.html +1 -0
- data/test_site/empty.html +0 -0
- data/test_site/image_only.html +1 -0
- data/test_site/index.html +67 -0
- data/test_site/script_only.html +1 -0
- metadata +150 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 637a38fca20a36c523b57ae807ffa30b1b9d63f3
|
4
|
+
data.tar.gz: 9b417e4ce15fea26b72127c4b4b6624f0ac85847
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 11c1e177153ab63db942a826ab877242997d9d22e2e8971cf87d391fc7969e00d82475f504724054c4722bbc981c8257150f4c75ba19d9bd59013b38f6bff6b3
|
7
|
+
data.tar.gz: fed4c3d45c6949e190239e435eab66aa74ae5c13e613be34a72edf3eb16d669bf495095d0804c7c07d4ed94470d5db2228cc7806c3e3dfcb1e89d12e43f8a58f
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.0
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Caleb Albritton
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# Crawler
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'crawler'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install spidermech
|
18
|
+
|
19
|
+
## Gem Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Command Line Usage
|
24
|
+
|
25
|
+
The gem provides a command line tool. You can invoke it via
|
26
|
+
|
27
|
+
bundle exec crawl http://google.com
|
28
|
+
|
29
|
+
It will crawl the page and give you the appropriate output.
|
30
|
+
|
31
|
+
## Contributing
|
32
|
+
|
33
|
+
1. Fork it ( http://github.com/<my-github-username>/crawler/fork )
|
34
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
35
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
36
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
37
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bin/spidermech
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$LOAD_PATH << File.join(Dir.getwd, 'lib')
|
4
|
+
|
5
|
+
require 'uri'
|
6
|
+
require 'json'
|
7
|
+
require 'spidermech'
|
8
|
+
|
9
|
+
raise ArgumentError, 'You must provide a url to crawl.' unless !ARGV.empty?
|
10
|
+
|
11
|
+
url = ARGV[0]
|
12
|
+
|
13
|
+
spider = SpiderMech.new url
|
14
|
+
spider.run
|
15
|
+
spider.save_json
|
data/lib/spidermech.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
require 'logger'
|
3
|
+
require 'json'
|
4
|
+
require 'pry'
|
5
|
+
|
6
|
+
class SpiderMech
|
7
|
+
attr_reader :queue
|
8
|
+
attr_reader :crawled
|
9
|
+
attr_reader :data
|
10
|
+
|
11
|
+
def initialize(start_page)
|
12
|
+
@logger = Logger.new 'spidermech.log'
|
13
|
+
@start_page = start_page
|
14
|
+
|
15
|
+
@queue = []
|
16
|
+
@crawled = []
|
17
|
+
@data = []
|
18
|
+
|
19
|
+
@queue << @start_page
|
20
|
+
|
21
|
+
@bot = Mechanize.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def left_in_queue
|
25
|
+
i = 0
|
26
|
+
|
27
|
+
@queue.each do |link|
|
28
|
+
if @crawled.include? link
|
29
|
+
# we don't need to crawl this one
|
30
|
+
else
|
31
|
+
i += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
i
|
36
|
+
end
|
37
|
+
|
38
|
+
def save_json
|
39
|
+
filename = "#{URI.parse(@start_page).host}.json"
|
40
|
+
@logger.info "Writing sitemap data to #{filename}"
|
41
|
+
json = @data.to_json
|
42
|
+
File.open(filename, 'w') { |f| f.write json }
|
43
|
+
end
|
44
|
+
|
45
|
+
def run
|
46
|
+
while !@queue.empty?
|
47
|
+
crawl
|
48
|
+
end
|
49
|
+
|
50
|
+
@data
|
51
|
+
end
|
52
|
+
|
53
|
+
def crawl
|
54
|
+
url = @queue.shift
|
55
|
+
|
56
|
+
if @crawled.include? url
|
57
|
+
# @logger.warn "Already crawled #{url}"
|
58
|
+
return
|
59
|
+
else
|
60
|
+
@logger.info "Crawling #{url}"
|
61
|
+
@logger.info "Left in Queue: #{left_in_queue}"
|
62
|
+
end
|
63
|
+
|
64
|
+
page = @bot.get url
|
65
|
+
|
66
|
+
if page.class != Mechanize::Page
|
67
|
+
@logger.info "File crawling is not supported."
|
68
|
+
return
|
69
|
+
end
|
70
|
+
|
71
|
+
@crawled << url
|
72
|
+
|
73
|
+
# get all the assets
|
74
|
+
data = {
|
75
|
+
:url => url,
|
76
|
+
:assets => {
|
77
|
+
:scripts => find_scripts(page),
|
78
|
+
:images => find_images(page),
|
79
|
+
:css => find_css(page)
|
80
|
+
},
|
81
|
+
|
82
|
+
:links => []
|
83
|
+
}
|
84
|
+
|
85
|
+
page.links.each do |link|
|
86
|
+
|
87
|
+
begin
|
88
|
+
if link.href[0] == '/' # this is a relative link
|
89
|
+
@queue << link.href
|
90
|
+
data[:links] << link.href
|
91
|
+
elsif link.href[0..@start_page.length] == @start_page # still part of this domain
|
92
|
+
@queue << link.href
|
93
|
+
data[:links] << link.href
|
94
|
+
else
|
95
|
+
# @logger.info "This link did not fall under our jurisdiction: #{link.href}"
|
96
|
+
end
|
97
|
+
rescue Exception => e
|
98
|
+
# @logger.error e
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
@data << data
|
103
|
+
end
|
104
|
+
|
105
|
+
def find_scripts(page)
|
106
|
+
page.search('script').map do |script|
|
107
|
+
begin
|
108
|
+
script.attributes['src'].value
|
109
|
+
rescue Exception => e
|
110
|
+
# @logger.error e
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def find_images(page)
|
116
|
+
page.search('img').map do |img|
|
117
|
+
begin
|
118
|
+
img.attributes['src'].value
|
119
|
+
rescue Exception => e
|
120
|
+
# @logger.error e
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def find_css(page)
|
126
|
+
page.search('link').map do |css|
|
127
|
+
begin
|
128
|
+
css.attributes['href'].value
|
129
|
+
rescue Exception => e
|
130
|
+
# @logger.error e
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spidermech'
|
2
|
+
require 'webrick'
|
3
|
+
|
4
|
+
web_server = WEBrick::HTTPServer.new :Port => 8321, :DocumentRoot => Dir.pwd + '/test_site', :AccessLog => []
|
5
|
+
|
6
|
+
server_thread = Thread.new do
|
7
|
+
web_server.start
|
8
|
+
end
|
9
|
+
|
10
|
+
sleep 1 # wait a sec for the server to start
|
11
|
+
|
12
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
13
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
14
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
15
|
+
# loaded once.
|
16
|
+
#
|
17
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
18
|
+
RSpec.configure do |config|
|
19
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
20
|
+
config.run_all_when_everything_filtered = true
|
21
|
+
config.filter_run :focus
|
22
|
+
|
23
|
+
# Run specs in random order to surface order dependencies. If you find an
|
24
|
+
# order dependency and want to debug it, you can fix the order by providing
|
25
|
+
# the seed, which is printed after each run.
|
26
|
+
# --seed 1234
|
27
|
+
config.order = 'random'
|
28
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe SpiderMech do
|
4
|
+
|
5
|
+
before(:all) do
|
6
|
+
@example_dataset = [{:url=>"http://localhost:8321", :assets=>{:scripts=>["https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", "http://getbootstrap.com/dist/js/bootstrap.min.js"], :images=>[], :css=>["http://getbootstrap.com/dist/css/bootstrap.min.css", "http://getbootstrap.com/examples/starter-template/starter-template.css"]}, :links=>["/", "/about.html", "/contact.html"]},
|
7
|
+
{:url=>"/", :assets=>{:scripts=>["https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", "http://getbootstrap.com/dist/js/bootstrap.min.js"], :images=>[], :css=>["http://getbootstrap.com/dist/css/bootstrap.min.css", "http://getbootstrap.com/examples/starter-template/starter-template.css"]}, :links=>["/", "/about.html", "/contact.html"]},
|
8
|
+
{:url=>"/about.html", :assets=>{:scripts=>["https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", "http://getbootstrap.com/dist/js/bootstrap.min.js"], :images=>[], :css=>["http://getbootstrap.com/dist/css/bootstrap.min.css", "http://getbootstrap.com/examples/starter-template/starter-template.css"]}, :links=>["/", "/about.html", "/contact.html"]},
|
9
|
+
{:url=>"/contact.html", :assets=>{:scripts=>["https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js", "http://getbootstrap.com/dist/js/bootstrap.min.js"], :images=>[], :css=>["http://getbootstrap.com/dist/css/bootstrap.min.css", "http://getbootstrap.com/examples/starter-template/starter-template.css"]}, :links=>["/", "/about.html", "/contact.html"]}
|
10
|
+
]
|
11
|
+
|
12
|
+
@empty_dataset = [{:url=>"http://localhost:8321/empty.html", :assets=>{:scripts=>[], :images=>[], :css=>[]}, :links=>[]}]
|
13
|
+
end
|
14
|
+
|
15
|
+
after(:all) do #cleanup
|
16
|
+
File.delete 'spidermech.log'
|
17
|
+
File.delete 'localhost.json'
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should crawl a site' do
|
21
|
+
spider = SpiderMech.new 'http://localhost:8321'
|
22
|
+
data = spider.run
|
23
|
+
data.should eq(@example_dataset)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should save a json file' do
|
27
|
+
spider = SpiderMech.new 'http://localhost:8321'
|
28
|
+
spider.run
|
29
|
+
spider.save_json
|
30
|
+
|
31
|
+
File.exist?('localhost.json').should eq(true)
|
32
|
+
|
33
|
+
file = File.open 'localhost.json', 'r'
|
34
|
+
data = JSON.parse file.read
|
35
|
+
data.should eq(JSON.parse(@example_dataset.to_json)) # quick way to change symbols to quotes to test equality
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'should return a hash with empty assets for empty page' do
|
39
|
+
|
40
|
+
spider = SpiderMech.new 'http://localhost:8321/empty.html'
|
41
|
+
data = spider.run
|
42
|
+
data.length.should eq(1) # should only have on result as it contains links to no other pages
|
43
|
+
data.should eq(@empty_dataset)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should only have one image' do
|
47
|
+
spider = SpiderMech.new 'http://localhost:8321/image_only.html'
|
48
|
+
data = spider.run
|
49
|
+
data.length.should eq(1)
|
50
|
+
data.first[:assets][:images].first.should eq('test.png')
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'should only have one script' do
|
54
|
+
spider = SpiderMech.new 'http://localhost:8321/script_only.html'
|
55
|
+
data = spider.run
|
56
|
+
data.length.should eq(1)
|
57
|
+
data.first[:assets][:scripts].first.should eq('stuffs.js')
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should only have one css file' do
|
61
|
+
spider = SpiderMech.new 'http://localhost:8321/css_only.html'
|
62
|
+
data = spider.run
|
63
|
+
data.length.should eq(1)
|
64
|
+
data.first[:assets][:css].first.should eq('style.css')
|
65
|
+
end
|
66
|
+
end
|
data/spidermech.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('./lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "spidermech"
|
7
|
+
spec.version = '0.0.1'
|
8
|
+
spec.authors = ["Caleb Albritton"]
|
9
|
+
spec.email = ["ithinkincode@gmail.com"]
|
10
|
+
spec.summary = "Single URL crawler."
|
11
|
+
spec.description = "Does things"
|
12
|
+
spec.homepage = "http://github.com/C0deMaver1ck/spidermech"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_runtime_dependency 'mechanize', "~> 2.7"
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
23
|
+
spec.add_development_dependency "pry", "~> 0.9"
|
24
|
+
spec.add_development_dependency "rake", "~> 10.2"
|
25
|
+
spec.add_development_dependency "rspec", "~> 2.14"
|
26
|
+
spec.add_development_dependency "webrick", "~> 1.3"
|
27
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<meta name="description" content="">
|
8
|
+
<meta name="author" content="">
|
9
|
+
|
10
|
+
<title>About</title>
|
11
|
+
|
12
|
+
<!-- Bootstrap core CSS -->
|
13
|
+
<link href="http://getbootstrap.com/dist/css/bootstrap.min.css" rel="stylesheet">
|
14
|
+
|
15
|
+
<!-- Custom styles for this template -->
|
16
|
+
<link href="http://getbootstrap.com/examples/starter-template/starter-template.css" rel="stylesheet">
|
17
|
+
|
18
|
+
<!-- Just for debugging purposes. Don't actually copy this line! -->
|
19
|
+
<!--[if lt IE 9]><script src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
|
20
|
+
|
21
|
+
<!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
|
22
|
+
<!--[if lt IE 9]>
|
23
|
+
<script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
|
24
|
+
<script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
|
25
|
+
<![endif]-->
|
26
|
+
</head>
|
27
|
+
|
28
|
+
<body>
|
29
|
+
|
30
|
+
<div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
|
31
|
+
<div class="container">
|
32
|
+
<div class="navbar-header">
|
33
|
+
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
34
|
+
<span class="sr-only">Toggle navigation</span>
|
35
|
+
<span class="icon-bar"></span>
|
36
|
+
<span class="icon-bar"></span>
|
37
|
+
<span class="icon-bar"></span>
|
38
|
+
</button>
|
39
|
+
<a class="navbar-brand" href="#">Project name</a>
|
40
|
+
</div>
|
41
|
+
<div class="collapse navbar-collapse">
|
42
|
+
<ul class="nav navbar-nav">
|
43
|
+
<li class="active"><a href="/">Home</a></li>
|
44
|
+
<li><a href="/about.html">About</a></li>
|
45
|
+
<li><a href="/contact.html">Contact</a></li>
|
46
|
+
</ul>
|
47
|
+
</div><!--/.nav-collapse -->
|
48
|
+
</div>
|
49
|
+
</div>
|
50
|
+
|
51
|
+
<div class="container">
|
52
|
+
|
53
|
+
<div class="starter-template">
|
54
|
+
<h1>Bootstrap starter template</h1>
|
55
|
+
<p class="lead">Use this document as a way to quickly start any new project.<br> All you get is this text and a mostly barebones HTML document.</p>
|
56
|
+
</div>
|
57
|
+
|
58
|
+
</div><!-- /.container -->
|
59
|
+
|
60
|
+
|
61
|
+
<!-- Bootstrap core JavaScript
|
62
|
+
================================================== -->
|
63
|
+
<!-- Placed at the end of the document so the pages load faster -->
|
64
|
+
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
|
65
|
+
<script src="http://getbootstrap.com/dist/js/bootstrap.min.js"></script>
|
66
|
+
</body>
|
67
|
+
</html>
|
@@ -0,0 +1,67 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<meta name="description" content="">
|
8
|
+
<meta name="author" content="">
|
9
|
+
|
10
|
+
<title>Contact</title>
|
11
|
+
|
12
|
+
<!-- Bootstrap core CSS -->
|
13
|
+
<link href="http://getbootstrap.com/dist/css/bootstrap.min.css" rel="stylesheet">
|
14
|
+
|
15
|
+
<!-- Custom styles for this template -->
|
16
|
+
<link href="http://getbootstrap.com/examples/starter-template/starter-template.css" rel="stylesheet">
|
17
|
+
|
18
|
+
<!-- Just for debugging purposes. Don't actually copy this line! -->
|
19
|
+
<!--[if lt IE 9]><script src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
|
20
|
+
|
21
|
+
<!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
|
22
|
+
<!--[if lt IE 9]>
|
23
|
+
<script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
|
24
|
+
<script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
|
25
|
+
<![endif]-->
|
26
|
+
</head>
|
27
|
+
|
28
|
+
<body>
|
29
|
+
|
30
|
+
<div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
|
31
|
+
<div class="container">
|
32
|
+
<div class="navbar-header">
|
33
|
+
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
34
|
+
<span class="sr-only">Toggle navigation</span>
|
35
|
+
<span class="icon-bar"></span>
|
36
|
+
<span class="icon-bar"></span>
|
37
|
+
<span class="icon-bar"></span>
|
38
|
+
</button>
|
39
|
+
<a class="navbar-brand" href="#">Project name</a>
|
40
|
+
</div>
|
41
|
+
<div class="collapse navbar-collapse">
|
42
|
+
<ul class="nav navbar-nav">
|
43
|
+
<li class="active"><a href="/">Home</a></li>
|
44
|
+
<li><a href="/about.html">About</a></li>
|
45
|
+
<li><a href="/contact.html">Contact</a></li>
|
46
|
+
</ul>
|
47
|
+
</div><!--/.nav-collapse -->
|
48
|
+
</div>
|
49
|
+
</div>
|
50
|
+
|
51
|
+
<div class="container">
|
52
|
+
|
53
|
+
<div class="starter-template">
|
54
|
+
<h1>Bootstrap starter template</h1>
|
55
|
+
<p class="lead">Use this document as a way to quickly start any new project.<br> All you get is this text and a mostly barebones HTML document.</p>
|
56
|
+
</div>
|
57
|
+
|
58
|
+
</div><!-- /.container -->
|
59
|
+
|
60
|
+
|
61
|
+
<!-- Bootstrap core JavaScript
|
62
|
+
================================================== -->
|
63
|
+
<!-- Placed at the end of the document so the pages load faster -->
|
64
|
+
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
|
65
|
+
<script src="http://getbootstrap.com/dist/js/bootstrap.min.js"></script>
|
66
|
+
</body>
|
67
|
+
</html>
|
@@ -0,0 +1 @@
|
|
1
|
+
<link rel="stylesheet" type="text/css" href="style.css">
|
File without changes
|
@@ -0,0 +1 @@
|
|
1
|
+
<img src='test.png'>
|
@@ -0,0 +1,67 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
<head>
|
4
|
+
<meta charset="utf-8">
|
5
|
+
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
7
|
+
<meta name="description" content="">
|
8
|
+
<meta name="author" content="">
|
9
|
+
|
10
|
+
<title>Home</title>
|
11
|
+
|
12
|
+
<!-- Bootstrap core CSS -->
|
13
|
+
<link href="http://getbootstrap.com/dist/css/bootstrap.min.css" rel="stylesheet">
|
14
|
+
|
15
|
+
<!-- Custom styles for this template -->
|
16
|
+
<link href="http://getbootstrap.com/examples/starter-template/starter-template.css" rel="stylesheet">
|
17
|
+
|
18
|
+
<!-- Just for debugging purposes. Don't actually copy this line! -->
|
19
|
+
<!--[if lt IE 9]><script src="../../assets/js/ie8-responsive-file-warning.js"></script><![endif]-->
|
20
|
+
|
21
|
+
<!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
|
22
|
+
<!--[if lt IE 9]>
|
23
|
+
<script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
|
24
|
+
<script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
|
25
|
+
<![endif]-->
|
26
|
+
</head>
|
27
|
+
|
28
|
+
<body>
|
29
|
+
|
30
|
+
<div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
|
31
|
+
<div class="container">
|
32
|
+
<div class="navbar-header">
|
33
|
+
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
34
|
+
<span class="sr-only">Toggle navigation</span>
|
35
|
+
<span class="icon-bar"></span>
|
36
|
+
<span class="icon-bar"></span>
|
37
|
+
<span class="icon-bar"></span>
|
38
|
+
</button>
|
39
|
+
<a class="navbar-brand" href="#">Project name</a>
|
40
|
+
</div>
|
41
|
+
<div class="collapse navbar-collapse">
|
42
|
+
<ul class="nav navbar-nav">
|
43
|
+
<li class="active"><a href="/">Home</a></li>
|
44
|
+
<li><a href="/about.html">About</a></li>
|
45
|
+
<li><a href="/contact.html">Contact</a></li>
|
46
|
+
</ul>
|
47
|
+
</div><!--/.nav-collapse -->
|
48
|
+
</div>
|
49
|
+
</div>
|
50
|
+
|
51
|
+
<div class="container">
|
52
|
+
|
53
|
+
<div class="starter-template">
|
54
|
+
<h1>Bootstrap starter template</h1>
|
55
|
+
<p class="lead">Use this document as a way to quickly start any new project.<br> All you get is this text and a mostly barebones HTML document.</p>
|
56
|
+
</div>
|
57
|
+
|
58
|
+
</div><!-- /.container -->
|
59
|
+
|
60
|
+
|
61
|
+
<!-- Bootstrap core JavaScript
|
62
|
+
================================================== -->
|
63
|
+
<!-- Placed at the end of the document so the pages load faster -->
|
64
|
+
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js"></script>
|
65
|
+
<script src="http://getbootstrap.com/dist/js/bootstrap.min.js"></script>
|
66
|
+
</body>
|
67
|
+
</html>
|
@@ -0,0 +1 @@
|
|
1
|
+
<script src='stuffs.js'></script>
|
metadata
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: spidermech
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Caleb Albritton
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-04-04 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.7'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.5'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.5'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pry
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.9'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.9'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.2'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.2'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '2.14'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '2.14'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: webrick
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.3'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.3'
|
97
|
+
description: Does things
|
98
|
+
email:
|
99
|
+
- ithinkincode@gmail.com
|
100
|
+
executables:
|
101
|
+
- spidermech
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- ".rspec"
|
107
|
+
- ".ruby-version"
|
108
|
+
- Gemfile
|
109
|
+
- LICENSE.txt
|
110
|
+
- README.md
|
111
|
+
- Rakefile
|
112
|
+
- bin/spidermech
|
113
|
+
- lib/spidermech.rb
|
114
|
+
- spec/spec_helper.rb
|
115
|
+
- spec/spidermech_spec.rb
|
116
|
+
- spidermech.gemspec
|
117
|
+
- test_site/about.html
|
118
|
+
- test_site/contact.html
|
119
|
+
- test_site/css_only.html
|
120
|
+
- test_site/empty.html
|
121
|
+
- test_site/image_only.html
|
122
|
+
- test_site/index.html
|
123
|
+
- test_site/script_only.html
|
124
|
+
homepage: http://github.com/C0deMaver1ck/spidermech
|
125
|
+
licenses:
|
126
|
+
- MIT
|
127
|
+
metadata: {}
|
128
|
+
post_install_message:
|
129
|
+
rdoc_options: []
|
130
|
+
require_paths:
|
131
|
+
- lib
|
132
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
133
|
+
requirements:
|
134
|
+
- - ">="
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
|
+
requirements:
|
139
|
+
- - ">="
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
requirements: []
|
143
|
+
rubyforge_project:
|
144
|
+
rubygems_version: 2.2.0
|
145
|
+
signing_key:
|
146
|
+
specification_version: 4
|
147
|
+
summary: Single URL crawler.
|
148
|
+
test_files:
|
149
|
+
- spec/spec_helper.rb
|
150
|
+
- spec/spidermech_spec.rb
|