anaximander 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.rspec +1 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +22 -0
- data/README.md +47 -0
- data/Rakefile +2 -0
- data/anaximander.gemspec +29 -0
- data/bin/mapgen +5 -0
- data/lib/anaximander/cli.rb +17 -0
- data/lib/anaximander/crawler.rb +31 -0
- data/lib/anaximander/discovery/assets.rb +39 -0
- data/lib/anaximander/discovery/links.rb +76 -0
- data/lib/anaximander/page.rb +73 -0
- data/lib/anaximander/renderer.rb +83 -0
- data/lib/anaximander/url.rb +38 -0
- data/lib/anaximander/version.rb +3 -0
- data/lib/anaximander.rb +24 -0
- data/spec/anaximander/crawler_spec.rb +64 -0
- data/spec/anaximander/discovery/assets_spec.rb +12 -0
- data/spec/anaximander/discovery/links_spec.rb +33 -0
- data/spec/anaximander/page_spec.rb +24 -0
- data/spec/anaximander/renderer_spec.rb +64 -0
- data/spec/anaximander/url_spec.rb +45 -0
- data/spec/data/page.html +17 -0
- data/spec/data/site/config.ru +6 -0
- data/spec/data/site/public/features.html +14 -0
- data/spec/data/site/public/index.html +14 -0
- data/spec/data/site/public/pricing/high/sortof_high.html +15 -0
- data/spec/data/site/public/pricing/high/super_high.html +16 -0
- data/spec/data/site/public/pricing/high.html +19 -0
- data/spec/data/site/public/pricing/low.html +16 -0
- data/spec/data/site/public/pricing/medium.html +16 -0
- data/spec/data/site/public/pricing.html +21 -0
- data/spec/spec_helper.rb +40 -0
- metadata +208 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 009aaa84665dcd8497a59b9e47acef29008613ea
|
4
|
+
data.tar.gz: 956c8d4a45e84443af161c5b4c78c4c33f8a54fb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: af827976b24b5f734391e3229a2c6fbd2a05ea012b7766517367d547beca19964bacfd59e96c3a857d12a2ae18d071c6c34f70fe2a7e6354d639b6baab0aa865
|
7
|
+
data.tar.gz: 4e49ba0bb2b67931ac4826f3e2e1913c3ae32885af892226c563488ff069e044c3f36de3c799885381a2581f53f96bb81cb421563874134195f6b9ae328313cd
|
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.bundle
|
19
|
+
*.so
|
20
|
+
*.o
|
21
|
+
*.a
|
22
|
+
mkmf.log
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Matte Noble
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# Anaximander
|
2
|
+
|
3
|
+
Anaximander is a small library for crawling a website and rendering the
|
4
|
+
resulting site map to the console.
|
5
|
+
|
6
|
+
## Installation & Usage
|
7
|
+
|
8
|
+
```sh
|
9
|
+
gem install anaximander
|
10
|
+
```
|
11
|
+
|
12
|
+
```sh
|
13
|
+
mapgen <url>
|
14
|
+
```
|
15
|
+
|
16
|
+
## Running Tests
|
17
|
+
|
18
|
+
```sh
|
19
|
+
bundle install
|
20
|
+
bundle exec rspec spec
|
21
|
+
```
|
22
|
+
|
23
|
+
### End to End Tests
|
24
|
+
|
25
|
+
There are two tests marked with the tag `endtoend`. These tests start up
|
26
|
+
a Rack app which serves a simple website and run against that server
|
27
|
+
like the library would in "production". Think of them as the Acceptance
|
28
|
+
tests for a library.
|
29
|
+
|
30
|
+
The end to end tests are run by default. To exclude them:
|
31
|
+
|
32
|
+
```sh
|
33
|
+
bundle exec rspec spec --tag ~endtoend
|
34
|
+
```
|
35
|
+
|
36
|
+
## What does Anaximander mean?
|
37
|
+
|
38
|
+
Anaximander was a Greek cartographer who was the first person to try to
|
39
|
+
map the entire world.
|
40
|
+
|
41
|
+
## Contributing
|
42
|
+
|
43
|
+
1. Fork it ( https://github.com/[my-github-username]/anaximander/fork )
|
44
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
45
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
46
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
47
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/anaximander.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'anaximander/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "anaximander"
|
8
|
+
spec.version = Anaximander::VERSION
|
9
|
+
spec.authors = ["Matte Noble"]
|
10
|
+
spec.email = ["me@mattenoble.com"]
|
11
|
+
spec.summary = %q{Web scraper that collects assets and links.}
|
12
|
+
spec.description = %q{Web scraper that collects assets and links.}
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_dependency "nokogiri"
|
21
|
+
spec.add_dependency "colorize"
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
24
|
+
spec.add_development_dependency "rake"
|
25
|
+
spec.add_development_dependency "rack"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "rspec-autotest"
|
28
|
+
spec.add_development_dependency "fakeout"
|
29
|
+
end
|
data/bin/mapgen
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
module Anaximander
|
2
|
+
class CLI
|
3
|
+
def self.start
|
4
|
+
new(ARGV[0]).start
|
5
|
+
end
|
6
|
+
|
7
|
+
def initialize(url)
|
8
|
+
@crawler = Crawler.new(url)
|
9
|
+
@renderer = Renderer.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def start
|
13
|
+
@crawler.crawl
|
14
|
+
@renderer.draw(@crawler.root)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Anaximander
|
2
|
+
class Crawler
|
3
|
+
attr_reader :url, :root
|
4
|
+
|
5
|
+
def initialize(url)
|
6
|
+
@url = url.chomp("/")
|
7
|
+
@root = Page.new(url)
|
8
|
+
@visited = [url]
|
9
|
+
end
|
10
|
+
|
11
|
+
def crawl(page=self.root)
|
12
|
+
page.children = page.links.map { |link| visit(link.chomp("/")) }.compact
|
13
|
+
page.children.each { |child| crawl(child) }
|
14
|
+
end
|
15
|
+
|
16
|
+
def visit(link)
|
17
|
+
return if @visited.include?(link)
|
18
|
+
|
19
|
+
logger.debug(link)
|
20
|
+
@visited << link
|
21
|
+
|
22
|
+
Page.new(link)
|
23
|
+
rescue Anaximander::PageNotAccessibleError
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
|
27
|
+
def logger
|
28
|
+
Anaximander.logger
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Anaximander
|
2
|
+
module Discovery
|
3
|
+
class Assets
|
4
|
+
include Enumerable
|
5
|
+
include Comparable
|
6
|
+
extend Forwardable
|
7
|
+
def_delegators :assets, :size, :inspect, :to_a
|
8
|
+
|
9
|
+
def initialize(page)
|
10
|
+
@page = page
|
11
|
+
end
|
12
|
+
|
13
|
+
def each(&block)
|
14
|
+
assets.each(&block)
|
15
|
+
end
|
16
|
+
|
17
|
+
def <=>(other)
|
18
|
+
to_a <=> other.to_a
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
attr_reader :page
|
24
|
+
|
25
|
+
def assets
|
26
|
+
css + javascript
|
27
|
+
end
|
28
|
+
|
29
|
+
def css
|
30
|
+
page.css("link").map { |link| link[:href] }.compact
|
31
|
+
end
|
32
|
+
|
33
|
+
def javascript
|
34
|
+
page.css("script").map { |script| script[:src] }.compact
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Anaximander
|
2
|
+
module Discovery
|
3
|
+
|
4
|
+
# Collection of internal links on the given page.
|
5
|
+
#
|
6
|
+
# == Relative Paths
|
7
|
+
#
|
8
|
+
# `Anaximander::Discovery::Links` converts all relative paths into absolute
|
9
|
+
# paths using the base URL of the page being crawled.
|
10
|
+
#
|
11
|
+
# # http://example.com
|
12
|
+
# <a href="/contact">Contact</a>
|
13
|
+
#
|
14
|
+
# Anaximander::Discovery::Links.new(Nokogiri::HTML(open("http://example.com")))
|
15
|
+
# # => ["http://example.com/contact"]
|
16
|
+
#
|
17
|
+
# == Exclusions
|
18
|
+
#
|
19
|
+
# - External links (ones outside the domain of the page
|
20
|
+
# - Hash links (Javascript style links with href of "#")
|
21
|
+
#
|
22
|
+
# == Example
|
23
|
+
#
|
24
|
+
# page = Nokogiri::HTML(open("http://example.com"))
|
25
|
+
#
|
26
|
+
# Anaximander::Discovery::Links.new(page)
|
27
|
+
# # => ["http://www.iana.org/domains/example"]
|
28
|
+
#
|
29
|
+
class Links
|
30
|
+
include Enumerable
|
31
|
+
include Comparable
|
32
|
+
extend Forwardable
|
33
|
+
def_delegators :links, :size, :inspect, :to_a
|
34
|
+
|
35
|
+
# Parameters
|
36
|
+
#
|
37
|
+
# page [Nokogiri::HTML] Parsed html of the page.
|
38
|
+
# url [String|URI] URL of the page to discover.
|
39
|
+
#
|
40
|
+
def initialize(page, url)
|
41
|
+
@page = page
|
42
|
+
@url = Url.new(url)
|
43
|
+
end
|
44
|
+
|
45
|
+
def each(&block)
|
46
|
+
links.each(&block)
|
47
|
+
end
|
48
|
+
|
49
|
+
def <=>(other)
|
50
|
+
to_a <=> other.to_a
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
attr_reader :page
|
56
|
+
|
57
|
+
def links
|
58
|
+
internal_links.map(&:to_s)
|
59
|
+
end
|
60
|
+
|
61
|
+
def internal_links
|
62
|
+
all_links.select { |link| @url.base == link.base }
|
63
|
+
end
|
64
|
+
|
65
|
+
def all_links
|
66
|
+
page.css("a").map { |a| absolute(a[:href]) }.compact.uniq
|
67
|
+
end
|
68
|
+
|
69
|
+
def absolute(link)
|
70
|
+
Url.new(link).absolute(@url.base).without_fragment
|
71
|
+
rescue URI::InvalidURIError
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Anaximander
|
2
|
+
class Error < StandardError; end
|
3
|
+
|
4
|
+
# Raised when a page cannot be fetched.
|
5
|
+
#
|
6
|
+
class PageNotAccessibleError < Error; end
|
7
|
+
|
8
|
+
# Represents a single page of a website being crawled. Exposes the assets and
|
9
|
+
# links on the page.
|
10
|
+
#
|
11
|
+
# == Errors
|
12
|
+
#
|
13
|
+
# `Anaximander::Page` will raise a `PageNotAccessibleError` when the page cannot
|
14
|
+
# be fetched for some reason. This is often due to it not existing (404), SSL
|
15
|
+
# errors or infinite redirect loops.
|
16
|
+
#
|
17
|
+
# == Example
|
18
|
+
#
|
19
|
+
# page = Page.new("http://example.com")
|
20
|
+
# page.links # => ["http://www.iana.org/domains/example"]
|
21
|
+
# page.assets # => ["/main.css", "/default.js"]
|
22
|
+
#
|
23
|
+
class Page
|
24
|
+
include Comparable
|
25
|
+
|
26
|
+
# Absolute url of the page.
|
27
|
+
#
|
28
|
+
attr_reader :url
|
29
|
+
|
30
|
+
# Parsed Nokogiri HTML document.
|
31
|
+
#
|
32
|
+
attr_reader :html
|
33
|
+
|
34
|
+
# Collection of `Page` objects that are linked
|
35
|
+
# to from the current page.
|
36
|
+
#
|
37
|
+
attr_accessor :children
|
38
|
+
|
39
|
+
# Parameters
|
40
|
+
#
|
41
|
+
# [String] url URL to discover.
|
42
|
+
#
|
43
|
+
# OpenURI raises a generic RuntimeError when it cannot fetch a
|
44
|
+
# page, for a variety of reasons. Some of which are 404s, SSL
|
45
|
+
# errors, or redirect loops.
|
46
|
+
#
|
47
|
+
# raises `PageNotAccessibleError` when OpenURI fails to fetch the
|
48
|
+
# page, for any reason.
|
49
|
+
#
|
50
|
+
def initialize(url)
|
51
|
+
@url = url
|
52
|
+
@html = Nokogiri::HTML(open(url))
|
53
|
+
rescue RuntimeError, OpenURI::HTTPError
|
54
|
+
raise PageNotAccessibleError
|
55
|
+
end
|
56
|
+
|
57
|
+
def links
|
58
|
+
Discovery::Links.new(html, url)
|
59
|
+
end
|
60
|
+
|
61
|
+
def assets
|
62
|
+
Discovery::Assets.new(html)
|
63
|
+
end
|
64
|
+
|
65
|
+
def <=>(other)
|
66
|
+
self.url <=> other.url
|
67
|
+
end
|
68
|
+
|
69
|
+
def inspect
|
70
|
+
%(#<Anaximander::Page:#{object_id} url="#{url}">)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "colorize"
|
2
|
+
|
3
|
+
module Anaximander
|
4
|
+
# Draws the crawled tree of URLs and assets, generated by `Anaximander::Crawler`.
|
5
|
+
#
|
6
|
+
# == Output
|
7
|
+
#
|
8
|
+
# └── <url> [assets]
|
9
|
+
# ├── <url> [assets]
|
10
|
+
# │ └── <url> [assets]
|
11
|
+
# └── <url> [assets]
|
12
|
+
#
|
13
|
+
# == Example
|
14
|
+
#
|
15
|
+
# root
|
16
|
+
# # => #<Anaximander::Page url="http://example.com"/>
|
17
|
+
#
|
18
|
+
# root.children
|
19
|
+
# # => [#<Anaximander::Page url="http://example.com/foo"/>, #<Anaximander::Page url="http://example.com"/bar>]
|
20
|
+
#
|
21
|
+
# renderer = Anaximander::Renderer.new(root)
|
22
|
+
# renderer.draw
|
23
|
+
#
|
24
|
+
# # => └── http://example.com [main.css]
|
25
|
+
# # => ├── http://example.com/foo [main.css, foo.js]
|
26
|
+
# # => └── http://example.com/bar [main.css, bar.js]
|
27
|
+
#
|
28
|
+
class Renderer
|
29
|
+
VERTICAL_PIPE = "│ "
|
30
|
+
MEMBER_PIPE = "├── "
|
31
|
+
TAIL_PIPE = "└── "
|
32
|
+
SPACE_PIPE = " "
|
33
|
+
|
34
|
+
attr_reader :root
|
35
|
+
|
36
|
+
def initialize(options={})
|
37
|
+
@color = options.fetch(:color, true)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Draws a page URL, its assets and recursively does the same for
|
41
|
+
# all of its children.
|
42
|
+
#
|
43
|
+
# == Parameters
|
44
|
+
#
|
45
|
+
# [Anaximander::Page] page The page to render
|
46
|
+
# [String] prefix A string that should preceed the actual URLa
|
47
|
+
# and asset information.
|
48
|
+
# [Boolean] tail Is this node the last one in the collection.
|
49
|
+
#
|
50
|
+
def draw(page=self.root, prefix="", tail=true)
|
51
|
+
pipe = tail ? TAIL_PIPE : MEMBER_PIPE
|
52
|
+
|
53
|
+
url = "#{prefix}#{pipe}#{page.url} "
|
54
|
+
assets = "#{page.assets.to_a}"
|
55
|
+
assets = assets.colorize(:light_black) if @color
|
56
|
+
|
57
|
+
print url
|
58
|
+
puts assets
|
59
|
+
|
60
|
+
page.children[0..-2].each { |child| draw_child(child, prefix, tail) }
|
61
|
+
draw_tail(page.children.last, prefix, tail) if page.children.size >= 1
|
62
|
+
end
|
63
|
+
|
64
|
+
# Draws a child node, with the appropriate "connecting pipe".
|
65
|
+
#
|
66
|
+
# The "connecting pipe" is the character at the beginning of this line,
|
67
|
+
# which connects this to the previous tier of the tree.
|
68
|
+
#
|
69
|
+
def draw_child(page, prefix, parent_is_tail)
|
70
|
+
connecting_pipe = parent_is_tail ? SPACE_PIPE : VERTICAL_PIPE
|
71
|
+
draw(page, "#{prefix}#{connecting_pipe}", false)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Draws a leaf node, with the appropriate "connecting pipe".
|
75
|
+
#
|
76
|
+
# See `draw_child` for "connecting pipe" definition.
|
77
|
+
#
|
78
|
+
def draw_tail(page, prefix, parent_is_tail)
|
79
|
+
connecting_pipe = parent_is_tail ? SPACE_PIPE : VERTICAL_PIPE
|
80
|
+
draw(page, "#{prefix}#{connecting_pipe}", true)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Anaximander
|
2
|
+
class Url < SimpleDelegator
|
3
|
+
include Comparable
|
4
|
+
|
5
|
+
attr_reader :uri
|
6
|
+
|
7
|
+
def initialize(uri)
|
8
|
+
@uri = URI(uri.to_s)
|
9
|
+
super(@uri)
|
10
|
+
end
|
11
|
+
|
12
|
+
def base
|
13
|
+
domain = "#{scheme}://#{host}"
|
14
|
+
domain += ":#{port}" unless port == 80
|
15
|
+
domain
|
16
|
+
end
|
17
|
+
|
18
|
+
def join(url)
|
19
|
+
self.class.new(URI.join(self.uri, url.to_s))
|
20
|
+
end
|
21
|
+
|
22
|
+
def absolute(base)
|
23
|
+
absolute? ? self : Url.new(base).join(self)
|
24
|
+
end
|
25
|
+
|
26
|
+
def without_fragment
|
27
|
+
self.class.new(self).tap { |url| url.fragment = nil }
|
28
|
+
end
|
29
|
+
|
30
|
+
def <=>(other)
|
31
|
+
other.respond_to?(:uri) ? self.uri <=> other.uri : self.uri.to_s <=> other
|
32
|
+
end
|
33
|
+
|
34
|
+
def eql?(other)
|
35
|
+
self.uri.eql?(other.uri)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/anaximander.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "open-uri"
|
2
|
+
require "nokogiri"
|
3
|
+
require "forwardable"
|
4
|
+
require "delegate"
|
5
|
+
require "uri"
|
6
|
+
require "logger"
|
7
|
+
|
8
|
+
require "anaximander/version"
|
9
|
+
require "anaximander/url"
|
10
|
+
require "anaximander/page"
|
11
|
+
require "anaximander/crawler"
|
12
|
+
require "anaximander/renderer"
|
13
|
+
require "anaximander/discovery/links"
|
14
|
+
require "anaximander/discovery/assets"
|
15
|
+
|
16
|
+
module Anaximander
|
17
|
+
def self.logger=(out)
|
18
|
+
@logger = Logger.new(out)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.logger
|
22
|
+
@logger ||= Logger.new(STDOUT)
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Anaximander::Crawler do
|
4
|
+
let(:link) { "http://example.com" }
|
5
|
+
subject!(:crawler) { described_class.new(link) }
|
6
|
+
|
7
|
+
it "does not visit pages that have already been visited" do
|
8
|
+
crawler.visit(link)
|
9
|
+
expect(crawler.visit(link)).to be_nil
|
10
|
+
end
|
11
|
+
|
12
|
+
it "does not visit pages that do not exist" do
|
13
|
+
not_a_page = "http://example.com/definitelynotapage"
|
14
|
+
expect(crawler.visit(not_a_page)).to be_nil
|
15
|
+
end
|
16
|
+
|
17
|
+
it "visits links breadth-first" do
|
18
|
+
allow_any_instance_of(Anaximander::Page).to receive(:open).and_return("")
|
19
|
+
|
20
|
+
root = Anaximander::Page.new("http://example.com")
|
21
|
+
pricing = Anaximander::Page.new("http://example.com/pricing")
|
22
|
+
features = Anaximander::Page.new("http://example.com/features")
|
23
|
+
|
24
|
+
allow(Anaximander::Page).to receive(:new).with("http://example.com/pricing").and_return(pricing)
|
25
|
+
allow(Anaximander::Page).to receive(:new).with("http://example.com/features").and_return(features)
|
26
|
+
|
27
|
+
allow(root).to receive_messages(links: ["http://example.com/pricing", "http://example.com/features"])
|
28
|
+
allow(pricing).to receive_messages(links: ["http://example.com/features"])
|
29
|
+
|
30
|
+
crawler.crawl(root)
|
31
|
+
expect(root.children).to eq [pricing, features]
|
32
|
+
end
|
33
|
+
|
34
|
+
it "crawls a multi-tiered website", :endtoend do
|
35
|
+
# See `spec/data/site` for the website hierarchy; it matches
|
36
|
+
# the directory structure.
|
37
|
+
|
38
|
+
crawler = described_class.new("http://localhost:#{@port}/index.html")
|
39
|
+
crawler.crawl
|
40
|
+
|
41
|
+
root = crawler.root
|
42
|
+
|
43
|
+
pricing = root.children[0]
|
44
|
+
features = root.children[1]
|
45
|
+
|
46
|
+
pricing_low = pricing.children[0]
|
47
|
+
pricing_med = pricing.children[1]
|
48
|
+
pricing_high = pricing.children[2]
|
49
|
+
|
50
|
+
sortof_high = pricing_high.children[0]
|
51
|
+
super_high = pricing_high.children[1]
|
52
|
+
|
53
|
+
expect(root.children.size).to eq 2
|
54
|
+
expect(root.children).to eq [pricing, features]
|
55
|
+
|
56
|
+
expect(pricing.children.size).to eq 3
|
57
|
+
expect(pricing.children).to eq [pricing_low, pricing_med, pricing_high]
|
58
|
+
|
59
|
+
expect(pricing_high.children.size).to eq 2
|
60
|
+
expect(pricing_high.children).to eq [sortof_high, super_high]
|
61
|
+
|
62
|
+
expect(features.children.size).to eq 0
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Anaximander::Discovery::Assets do
|
4
|
+
let(:html) { Nokogiri::HTML(open("spec/data/page.html")) }
|
5
|
+
let(:assets) { described_class.new(html) }
|
6
|
+
|
7
|
+
# See `spec/data/page.html` for the HTML used in this test.
|
8
|
+
|
9
|
+
it "collects all CSS and Javascript assets" do
|
10
|
+
expect(assets.to_a).to eq(["main.css", "other.css", "allthethings.js"])
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Anaximander::Discovery::Links do
|
4
|
+
let(:html) { Nokogiri::HTML(open("spec/data/page.html")) }
|
5
|
+
let(:links) { described_class.new(html, "http://example.com/") }
|
6
|
+
|
7
|
+
# See `spec/data/page.html` for the HTML used in this test.
|
8
|
+
|
9
|
+
it "collects each unique link" do
|
10
|
+
expect(links.size).to eq(2)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "only collects links within the same domain" do
|
14
|
+
expect(links).to_not include "http://example.net"
|
15
|
+
end
|
16
|
+
|
17
|
+
it "removes trailing slashes from URLs" do
|
18
|
+
expect(links).to_not include "http://example.com/"
|
19
|
+
end
|
20
|
+
|
21
|
+
it "disgards hash links" do
|
22
|
+
links.each { |link| expect(link).to_not include "#" }
|
23
|
+
end
|
24
|
+
|
25
|
+
it "expands relative paths to absolute paths" do
|
26
|
+
expect(links).to include "http://example.com/google"
|
27
|
+
end
|
28
|
+
|
29
|
+
it "expands relative paths to absolute paths using the base url of the page" do
|
30
|
+
links = described_class.new(html, "http://example.com/some/nested/page/")
|
31
|
+
expect(links).to_not include "http://example.com/some/nested/page/google"
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Anaximander::Page do
|
4
|
+
let(:html) { File.read(File.expand_path("../../data/page.html", __FILE__)) }
|
5
|
+
let(:page) { described_class.new("http://example.com") }
|
6
|
+
|
7
|
+
before do
|
8
|
+
expect_any_instance_of(described_class).to receive(:open).and_return(html)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "has unique links" do
|
12
|
+
expect(Anaximander::Discovery::Links).to receive(:new).with(an_instance_of(Nokogiri::HTML::Document), "http://example.com")
|
13
|
+
page.links
|
14
|
+
end
|
15
|
+
|
16
|
+
it "has assets" do
|
17
|
+
expect(Anaximander::Discovery::Assets).to receive(:new).with(an_instance_of(Nokogiri::HTML::Document))
|
18
|
+
page.assets
|
19
|
+
end
|
20
|
+
|
21
|
+
it "is comparable by URL" do
|
22
|
+
expect(page).to eq(page.clone)
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Anaximander::Renderer do
|
4
|
+
include Fakeout::SpecHelpers
|
5
|
+
|
6
|
+
let(:child) { double(url: "http://example.com/foo", assets: ["/root.css", "/child.js"], children: []) }
|
7
|
+
let(:root) { double(url: "http://example.com", assets: ["/root.css"], children: [child]) }
|
8
|
+
|
9
|
+
subject(:renderer) { described_class.new(color: false) }
|
10
|
+
|
11
|
+
before :all do
|
12
|
+
Fakeout.activate!
|
13
|
+
end
|
14
|
+
|
15
|
+
after :all do
|
16
|
+
Fakeout.deactivate!
|
17
|
+
end
|
18
|
+
|
19
|
+
it "draws a root node" do
|
20
|
+
renderer.draw(root)
|
21
|
+
expect(stdout).to include %(└── http://example.com ["/root.css"])
|
22
|
+
end
|
23
|
+
|
24
|
+
it "draws a child node whose parent is not a tail" do
|
25
|
+
renderer.draw_child(child, "", false)
|
26
|
+
expect(stdout).to include %(│ ├── http://example.com/foo ["/root.css", "/child.js"])
|
27
|
+
end
|
28
|
+
|
29
|
+
it "draws a child node whose parent is a tail" do
|
30
|
+
renderer.draw_child(child, "", true)
|
31
|
+
expect(stdout).to include %( ├── http://example.com/foo ["/root.css", "/child.js"])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "draws a tail node whose parent is not a tail node" do
|
35
|
+
renderer.draw_tail(child, "", false)
|
36
|
+
expect(stdout).to include %(│ └── http://example.com/foo ["/root.css", "/child.js"])
|
37
|
+
end
|
38
|
+
|
39
|
+
it "draws a tail node whose parent is also a tail" do
|
40
|
+
renderer.draw_tail(child, "", true)
|
41
|
+
expect(stdout).to include %( └── http://example.com/foo ["/root.css", "/child.js"])
|
42
|
+
end
|
43
|
+
|
44
|
+
it "draws an entire tree", :endtoend do
|
45
|
+
tree = Anaximander::Crawler.new("http://localhost:#{@port}/index.html")
|
46
|
+
tree.crawl
|
47
|
+
|
48
|
+
domain = Anaximander::Url.new(tree.root.url).base
|
49
|
+
|
50
|
+
renderer = Anaximander::Renderer.new(color: false)
|
51
|
+
renderer.draw(tree.root)
|
52
|
+
|
53
|
+
expect(stdout).to eq <<-TREE
|
54
|
+
└── #{domain}/index.html ["/main.css", "/application.js"]
|
55
|
+
├── #{domain}/pricing.html ["/main.css", "/application.js"]
|
56
|
+
│ ├── #{domain}/pricing/low.html ["/main.css"]
|
57
|
+
│ ├── #{domain}/pricing/medium.html ["/main.css"]
|
58
|
+
│ └── #{domain}/pricing/high.html ["/main.css"]
|
59
|
+
│ ├── #{domain}/pricing/high/sortof_high.html ["/main.css"]
|
60
|
+
│ └── #{domain}/pricing/high/super_high.html ["/main.css"]
|
61
|
+
└── #{domain}/features.html ["/main.css", "/application.js"]
|
62
|
+
TREE
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe Anaximander::Url do
|
4
|
+
it "exposes the scheme and host as base" do
|
5
|
+
base = described_class.new("http://example.com/foo/bar?baz=1").base
|
6
|
+
expect(base).to eq "http://example.com"
|
7
|
+
end
|
8
|
+
|
9
|
+
it "joins other Urls" do
|
10
|
+
url = described_class.new("http://example.com").join(described_class.new("/foo"))
|
11
|
+
expect(url).to eq described_class.new("http://example.com/foo")
|
12
|
+
end
|
13
|
+
|
14
|
+
it "joins with a String url" do
|
15
|
+
url = described_class.new("http://example.com").join("/foo")
|
16
|
+
expect(url).to eq described_class.new("http://example.com/foo")
|
17
|
+
end
|
18
|
+
|
19
|
+
it "creates the absolute path when representing a relative path" do
|
20
|
+
url = described_class.new("/foo")
|
21
|
+
expect(url.absolute("http://example.com")).to eq described_class.new("http://example.com/foo")
|
22
|
+
end
|
23
|
+
|
24
|
+
it "does not create the absolute path if already representing one" do
|
25
|
+
url = described_class.new("http://example.com/foo")
|
26
|
+
expect(url.absolute("http://example.net")).to eq described_class.new("http://example.com/foo")
|
27
|
+
end
|
28
|
+
|
29
|
+
it "removes the fragment" do
|
30
|
+
url = described_class.new("http://example.com/#one-page-app")
|
31
|
+
expect(url.without_fragment).to eq described_class.new("http://example.com/")
|
32
|
+
end
|
33
|
+
|
34
|
+
it "is comparable to other Urls" do
|
35
|
+
url1 = described_class.new("http://example.com")
|
36
|
+
url2 = described_class.new("http://example.com")
|
37
|
+
expect(url1).to eq(url2)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "is comparable to a string URL" do
|
41
|
+
url1 = described_class.new("http://example.com")
|
42
|
+
url2 = "http://example.com"
|
43
|
+
expect(url1 == url2).to eq true
|
44
|
+
end
|
45
|
+
end
|
data/spec/data/page.html
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link rel="stylesheet" href="main.css" type="text/css">
|
5
|
+
<link rel="stylesheet" href="other.css" type="text/css">
|
6
|
+
<script type="text/javascript" src="allthethings.js"></script>
|
7
|
+
</head>
|
8
|
+
<body>
|
9
|
+
<a href="http://example.com">Example</a>
|
10
|
+
<a href="/google">Google</a>
|
11
|
+
<blink>OMG A BLINK TAG</blink>
|
12
|
+
<a href="http://example.com">Duplicate Example</a>
|
13
|
+
<a href="http://example.net">Different Domain</a>
|
14
|
+
<a href="#">Some javascript thing</a>
|
15
|
+
<marquee>TODAY, IN NEWS!</marquee>
|
16
|
+
</body>
|
17
|
+
</html>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link rel="stylesheet" href="/main.css" type="text/css" media="screen" charset="utf-8">
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<nav>
|
8
|
+
<a href="/pricing.html">Pricing</a>
|
9
|
+
<a href="/features.html">Features</a>
|
10
|
+
</nav>
|
11
|
+
|
12
|
+
<script type="text/javascript" src="/application.js"></script>
|
13
|
+
</body>
|
14
|
+
</html>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link rel="stylesheet" href="/main.css" type="text/css" media="screen" charset="utf-8">
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<nav>
|
8
|
+
<a href="/pricing.html">Pricing</a>
|
9
|
+
<a href="/features.html">Features</a>
|
10
|
+
</nav>
|
11
|
+
|
12
|
+
<script type="text/javascript" src="/application.js"></script>
|
13
|
+
</body>
|
14
|
+
</html>
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link rel="stylesheet" href="/main.css" type="text/css" media="screen" charset="utf-8">
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<nav>
|
8
|
+
<a href="/pricing.html">Pricing</a>
|
9
|
+
<a href="/features.html">Features</a>
|
10
|
+
</nav>
|
11
|
+
|
12
|
+
<h1>SORTOF HIGH</h1>
|
13
|
+
</body>
|
14
|
+
</html>
|
15
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link rel="stylesheet" href="/main.css" type="text/css" media="screen" charset="utf-8">
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<nav>
|
8
|
+
<a href="/pricing.html">Pricing</a>
|
9
|
+
<a href="/features.html">Features</a>
|
10
|
+
</nav>
|
11
|
+
|
12
|
+
<h1>SUPER HIGH</h1>
|
13
|
+
</body>
|
14
|
+
</html>
|
15
|
+
|
16
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link rel="stylesheet" href="/main.css" type="text/css" media="screen" charset="utf-8">
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<nav>
|
8
|
+
<a href="/pricing.html">Pricing</a>
|
9
|
+
<a href="/features.html">Features</a>
|
10
|
+
</nav>
|
11
|
+
|
12
|
+
<h1>High</h1>
|
13
|
+
|
14
|
+
<a href="/pricing/high/sortof_high.html">Sortof High</a>
|
15
|
+
<a href="/pricing/high/super_high.html">Super High</a>
|
16
|
+
</body>
|
17
|
+
</html>
|
18
|
+
|
19
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link rel="stylesheet" href="/main.css" type="text/css" media="screen" charset="utf-8">
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<nav>
|
8
|
+
<a href="/pricing.html">Pricing</a>
|
9
|
+
<a href="/features.html">Features</a>
|
10
|
+
</nav>
|
11
|
+
|
12
|
+
<h1>Low</h1>
|
13
|
+
</body>
|
14
|
+
</html>
|
15
|
+
|
16
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link rel="stylesheet" href="/main.css" type="text/css" media="screen" charset="utf-8">
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<nav>
|
8
|
+
<a href="/pricing.html">Pricing</a>
|
9
|
+
<a href="/features.html">Features</a>
|
10
|
+
</nav>
|
11
|
+
|
12
|
+
<h1>Medium</h1>
|
13
|
+
</body>
|
14
|
+
</html>
|
15
|
+
|
16
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<link rel="stylesheet" href="/main.css" type="text/css" media="screen" charset="utf-8">
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<nav>
|
8
|
+
<a href="/pricing.html">Pricing</a>
|
9
|
+
<a href="/features.html">Features</a>
|
10
|
+
</nav>
|
11
|
+
|
12
|
+
<ul>
|
13
|
+
<li><a href="/pricing/low.html">$0</a>
|
14
|
+
<li><a href="/pricing/medium.html">$10</a>
|
15
|
+
<li><a href="/pricing/high.html">$20</a>
|
16
|
+
</ul>
|
17
|
+
|
18
|
+
<script type="text/javascript" src="/application.js"></script>
|
19
|
+
</body>
|
20
|
+
</html>
|
21
|
+
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require "socket"
|
2
|
+
require "net/http"
|
3
|
+
require "fakeout/safe"
|
4
|
+
require "anaximander"
|
5
|
+
|
6
|
+
RSpec.configure do |c|
|
7
|
+
c.before do
|
8
|
+
Anaximander.logger = false
|
9
|
+
end
|
10
|
+
|
11
|
+
c.before :each, :endtoend do
|
12
|
+
@port = obtain_port
|
13
|
+
@pid = Process.spawn({}, "rackup -p #{@port}", chdir: File.expand_path("../data/site", __FILE__), out: "/dev/null", err: "/dev/null")
|
14
|
+
sleep 0.1 until server_running?
|
15
|
+
end
|
16
|
+
|
17
|
+
c.after :each, :endtoend do
|
18
|
+
Process.kill("INT", @pid)
|
19
|
+
end
|
20
|
+
|
21
|
+
# TCPServer, given a port of 0, will ask the OS for a
|
22
|
+
# random, available, port.
|
23
|
+
#
|
24
|
+
def obtain_port
|
25
|
+
server = TCPServer.new("127.0.0.1", 0)
|
26
|
+
port = server.addr[1]
|
27
|
+
server.close
|
28
|
+
port
|
29
|
+
end
|
30
|
+
|
31
|
+
# Try to retrieve the homepage of the test site.
|
32
|
+
#
|
33
|
+
def server_running?
|
34
|
+
http = Net::HTTP.new("localhost", @port)
|
35
|
+
req = Net::HTTP::Get.new("/index.html")
|
36
|
+
http.request(req).is_a?(Net::HTTPOK)
|
37
|
+
rescue Errno::ECONNREFUSED
|
38
|
+
false
|
39
|
+
end
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: anaximander
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matte Noble
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-06-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: colorize
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.6'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.6'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rack
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec-autotest
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: fakeout
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
description: Web scraper that collects assets and links.
|
126
|
+
email:
|
127
|
+
- me@mattenoble.com
|
128
|
+
executables:
|
129
|
+
- mapgen
|
130
|
+
extensions: []
|
131
|
+
extra_rdoc_files: []
|
132
|
+
files:
|
133
|
+
- ".gitignore"
|
134
|
+
- ".rspec"
|
135
|
+
- Gemfile
|
136
|
+
- LICENSE.txt
|
137
|
+
- README.md
|
138
|
+
- Rakefile
|
139
|
+
- anaximander.gemspec
|
140
|
+
- bin/mapgen
|
141
|
+
- lib/anaximander.rb
|
142
|
+
- lib/anaximander/cli.rb
|
143
|
+
- lib/anaximander/crawler.rb
|
144
|
+
- lib/anaximander/discovery/assets.rb
|
145
|
+
- lib/anaximander/discovery/links.rb
|
146
|
+
- lib/anaximander/page.rb
|
147
|
+
- lib/anaximander/renderer.rb
|
148
|
+
- lib/anaximander/url.rb
|
149
|
+
- lib/anaximander/version.rb
|
150
|
+
- spec/anaximander/crawler_spec.rb
|
151
|
+
- spec/anaximander/discovery/assets_spec.rb
|
152
|
+
- spec/anaximander/discovery/links_spec.rb
|
153
|
+
- spec/anaximander/page_spec.rb
|
154
|
+
- spec/anaximander/renderer_spec.rb
|
155
|
+
- spec/anaximander/url_spec.rb
|
156
|
+
- spec/data/page.html
|
157
|
+
- spec/data/site/config.ru
|
158
|
+
- spec/data/site/public/features.html
|
159
|
+
- spec/data/site/public/index.html
|
160
|
+
- spec/data/site/public/pricing.html
|
161
|
+
- spec/data/site/public/pricing/high.html
|
162
|
+
- spec/data/site/public/pricing/high/sortof_high.html
|
163
|
+
- spec/data/site/public/pricing/high/super_high.html
|
164
|
+
- spec/data/site/public/pricing/low.html
|
165
|
+
- spec/data/site/public/pricing/medium.html
|
166
|
+
- spec/spec_helper.rb
|
167
|
+
homepage:
|
168
|
+
licenses:
|
169
|
+
- MIT
|
170
|
+
metadata: {}
|
171
|
+
post_install_message:
|
172
|
+
rdoc_options: []
|
173
|
+
require_paths:
|
174
|
+
- lib
|
175
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - ">="
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: '0'
|
180
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
181
|
+
requirements:
|
182
|
+
- - ">="
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '0'
|
185
|
+
requirements: []
|
186
|
+
rubyforge_project:
|
187
|
+
rubygems_version: 2.2.2
|
188
|
+
signing_key:
|
189
|
+
specification_version: 4
|
190
|
+
summary: Web scraper that collects assets and links.
|
191
|
+
test_files:
|
192
|
+
- spec/anaximander/crawler_spec.rb
|
193
|
+
- spec/anaximander/discovery/assets_spec.rb
|
194
|
+
- spec/anaximander/discovery/links_spec.rb
|
195
|
+
- spec/anaximander/page_spec.rb
|
196
|
+
- spec/anaximander/renderer_spec.rb
|
197
|
+
- spec/anaximander/url_spec.rb
|
198
|
+
- spec/data/page.html
|
199
|
+
- spec/data/site/config.ru
|
200
|
+
- spec/data/site/public/features.html
|
201
|
+
- spec/data/site/public/index.html
|
202
|
+
- spec/data/site/public/pricing.html
|
203
|
+
- spec/data/site/public/pricing/high.html
|
204
|
+
- spec/data/site/public/pricing/high/sortof_high.html
|
205
|
+
- spec/data/site/public/pricing/high/super_high.html
|
206
|
+
- spec/data/site/public/pricing/low.html
|
207
|
+
- spec/data/site/public/pricing/medium.html
|
208
|
+
- spec/spec_helper.rb
|