basilisk 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
data/HISTORY ADDED
@@ -0,0 +1,3 @@
1
+ === 1.0.0 / 2008-12-16
2
+
3
+ * First release
data/LICENSE ADDED
@@ -0,0 +1,23 @@
1
+ Copyright (c) 2009, Kyle Banker, Alexander Interactive, Inc.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ Except as contained in this notice, the name(s) of the above copyright holders
14
+ shall not be used in advertising or otherwise to promote the sale, use or other
15
+ dealings in this Software without prior written authorization.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
+ THE SOFTWARE.
@@ -0,0 +1,36 @@
1
+ = basilisk
2
+
3
+ a command-line front-end for the anemone web-crawler (http://github.com/chriskite/anemone). basilisk produces useful reports for qa-ing websites. It also features an extensible page processor class for writing your own page processors.
4
+
5
+ Included page processors:
6
+
7
+ - seo: generates a csv with the following columns: url, title, description, keywords, h1s, h2s
8
+ - sitemap: generates an xml sitemap
9
+ - image: generates a list of broken images and images lacking an alt tag.
10
+ - error: generates a csv of urls returning html response codes other than success and redirect.
11
+
12
+ See the generated yml config file for even more options.
13
+
14
+ == install
15
+
16
+ sudo gem install basilisk
17
+
18
+ == usage
19
+
20
+ To create a new search:
21
+ basil create [search_name] [url]
22
+
23
+ - Creates a search config file ([search_name].yml), which you may edit to change the default options, specify which page process you want to run, any regex and css terms for searching across the site, and regexes for skipping urls.
24
+
25
+ To run the search:
26
+ basil run [search_name]
27
+
28
+ - Runs the specified search. Note: you must create a search before running it. Files generated by the page processors will reside in a folder called [search_name].
29
+
30
+ == author & license
31
+
32
+ basilisk is licensed under a modified MIT licence. See LICENCE.txt.
33
+
34
+ basilisk was written by Kyle Banker, largely dependent on the anemone web-crawler by Chris Kite.
35
+
36
+ Copyright 2009 Alexander Interactive, Inc.
@@ -0,0 +1,47 @@
1
+ #! /usr/bin/env ruby
2
+ # == Synopsis
3
+ # Crawls a site starting at the given URL, and outputs the total number
4
+ #
5
+ # == Usage
6
+ # basil create [search_name] [url]
7
+ # basil run [search_name]
8
+ #
9
+ # == Author
10
+ # Kyle Banker
11
+
12
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
13
+
14
+ require 'basilisk'
15
+
16
+ def usage
17
+ puts <<END
18
+
19
+
20
+ basil(isk): a front-end for the anemone web crawler.
21
+ Usage:
22
+
23
+ To create a new search:
24
+ basil create [search_name] [url]
25
+
26
+ - This will create a search config file, which you may edit to change the default options.
27
+
28
+ To run the search:
29
+ basil run [search_name]
30
+
31
+ - Runs the specified search. Note: you must create a search before running it.
32
+ END
33
+ end
34
+
35
+ begin
36
+ if ARGV[0] == "create" && ARGV[1] && URI(ARGV[2])
37
+ Basilisk.create(ARGV[1], ARGV[2])
38
+ elsif ARGV[0] == "run" && !ARGV[1].nil?
39
+ Basilisk.run(File.join(Dir.pwd, ARGV[1]))
40
+ else
41
+ raise BasiliskArgumentError
42
+ end
43
+
44
+ rescue BasiliskArgumentError
45
+ usage
46
+ Process.exit
47
+ end
@@ -0,0 +1,37 @@
1
+ require 'ostruct'
2
+ require 'yaml'
3
+
4
+ require 'rubygems'
5
+ require 'anemone'
6
+ require 'fastercsv'
7
+
8
+ require 'basilisk/core'
9
+ require 'basilisk/parser'
10
+ require 'basilisk/processor'
11
+ require 'basilisk/template'
12
+
13
+ $:.unshift File.join(File.dirname(__FILE__), 'basilisk', 'processors')
14
+ require 'seo_processor'
15
+ require 'sitemap_processor'
16
+ require 'error_processor'
17
+ require 'terms_processor'
18
+ require 'image_processor'
19
+
20
+ BASILISK_ROOT = File.join(File.dirname(__FILE__), "..")
21
+
22
+ class BasiliskError < StandardError; end
23
+ class BasiliskArgumentError < BasiliskError; end
24
+ class BasiliskImageError < BasiliskError; end
25
+
26
+ module Basilisk
27
+ extend self
28
+
29
+ def run(opt_file)
30
+ Basilisk::Core.run(Basilisk::Parser.get_options(opt_file))
31
+ end
32
+
33
+ def create(search_name, url)
34
+ Basilisk::Core.create(search_name, url)
35
+ end
36
+
37
+ end
@@ -0,0 +1,69 @@
1
+ require 'basilisk/processor'
2
+
3
+ module Basilisk
4
+ module Core
5
+ extend self
6
+
7
+ # Takes search options and runs the crawler with any processors.
8
+ def run(search_opts)
9
+
10
+ # We need to close the processors if user presses ctrl-c.
11
+ trap("INT") do
12
+ puts "\n**Interrupt received**\n***Closing processors...\n"
13
+ close_processors(search_opts.processor_instances)
14
+ Process.exit
15
+ end
16
+
17
+ Anemone.crawl(search_opts.url, :user_agent => search_opts.user_agent, :verbose => true) do |anemone|
18
+ anemone.skip_links_like(search_opts.skip_patterns || [])
19
+
20
+ # At least one search processor must be specified.
21
+ anemone.on_every_page do |page|
22
+ search_opts.processor_instances.each do |processor|
23
+ processor.process_page(page, anemone.pages)
24
+ end
25
+ end
26
+
27
+ # Close callback on all processors.
28
+ anemone.after_crawl do |pages|
29
+ close_processors(search_opts.processor_instances)
30
+ end
31
+
32
+ end
33
+ end
34
+
35
+ def close_processors(instances)
36
+ instances.each do |processor|
37
+ processor.close_file
38
+ end
39
+ end
40
+
41
+ # Create a folder for the processor results,
42
+ # and a default yaml config file in the current directory.
43
+ def create(search_name, url)
44
+ filename = create_config_file(search_name, url, filename)
45
+ foldername = create_results_folder(search_name)
46
+
47
+ Basilisk::Template.output_instructions(search_name, filename, foldername)
48
+ rescue => e
49
+ puts "Error: Could not create config file or folder."
50
+ puts "Please make sure that a folder of the same name doesn't already exist.\n"
51
+ puts "(#{e})"
52
+ end
53
+
54
+ def create_config_file(search_name, url, filename)
55
+ filename = File.join(Dir.pwd, search_name + ".yml")
56
+ file = File.open(filename, "w")
57
+ file.write(Basilisk::Template.default(:name => search_name, :url => url))
58
+ file.close
59
+ return filename
60
+ end
61
+
62
+ def create_results_folder(search_name)
63
+ foldername = File.join(Dir.pwd, search_name)
64
+ Dir.mkdir(foldername)
65
+ return foldername
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,69 @@
1
+ module Basilisk
2
+ # Parses YAML config file and instantiates specified processor classes.
3
+ module Parser
4
+ extend self
5
+
6
+ def get_options(opt_file)
7
+ yaml_opts = open_yaml_file(opt_file)
8
+ search_opts = assign_options(yaml_opts)
9
+ validate_options(search_opts)
10
+ return search_opts
11
+ end
12
+
13
+ private
14
+
15
+ def open_yaml_file(filename)
16
+ filename += ".yml" unless filename.include?(".yml")
17
+ YAML::parse(File.open(filename))
18
+ end
19
+
20
+ def assign_options(yaml_opts)
21
+ search_opts = OpenStruct.new
22
+ search_opts.name = yaml_opts['basilisk']['name'].value
23
+ search_opts.url = yaml_opts['basilisk']['url'].value
24
+ search_opts.user_agent = yaml_opts['basilisk']['user_agent'].value
25
+ search_opts.skip_patterns = get_patterns(yaml_opts['basilisk']['skip_url_patterns'].value)
26
+ search_opts.processor_instances =
27
+ instantiate_processors(yaml_opts['basilisk']['processors'].value, search_opts.name)
28
+
29
+ search_opts.regex_search_terms = get_patterns(yaml_opts['basilisk']['regex_search_terms'].value)
30
+ search_opts.css_search_terms = split_and_strip(yaml_opts['basilisk']['css_search_terms'].value, ";")
31
+ search_opts.processor_instances << init_term_processor(search_opts) if search_has_terms?(search_opts)
32
+ return search_opts
33
+ end
34
+
35
+ def validate_options(search_opts)
36
+ return true
37
+ end
38
+
39
+ def instantiate_processors(processors, search_name)
40
+ split_and_strip(processors, ";").map do |name|
41
+ get_processor_class(name).new(search_name)
42
+ end
43
+ end
44
+
45
+ # Returns an array of case-insensitive regexps.
46
+ def get_patterns(pattern_string)
47
+ split_and_strip(pattern_string, ";").select do |name|
48
+ name != ""
49
+ end.map {|name| Regexp.new(name, true)}
50
+ end
51
+
52
+ def split_and_strip(collection, separator)
53
+ collection.split(separator).map {|item| item.strip }
54
+ end
55
+
56
+ def get_processor_class(name)
57
+ Module.const_get("Basilisk").const_get(name.capitalize + "Processor")
58
+ end
59
+
60
+ def search_has_terms?(opts)
61
+ !opts.regex_search_terms.empty? || !opts.css_search_terms.empty?
62
+ end
63
+
64
+ def init_term_processor(opts)
65
+ Basilisk::TermsProcessor.new(opts.name, opts.regex_search_terms, opts.css_search_terms)
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,43 @@
1
+ module Basilisk
2
+ # Base class for page processors.
3
+ class Processor
4
+
5
+ def initialize(search_name)
6
+ @search_name = search_name
7
+ @base_folder = Dir.pwd
8
+ @output_folder = File.join(Dir.pwd, search_name)
9
+ end
10
+
11
+ def process_page(page, page_hash)
12
+ end
13
+
14
+ # Called when the crawl is completed.
15
+ def close_file
16
+ end
17
+
18
+ protected
19
+
20
+ def filename_for_output
21
+ File.join @output_folder,
22
+ self.class.name.sub("Processor", "").sub("Basilisk::", "").downcase + ".csv"
23
+ end
24
+
25
+ def write_file(&block)
26
+ file = File.open(filename_for_output, "a")
27
+ yield file
28
+ file.close
29
+ end
30
+
31
+ end
32
+
33
+ # Processors that outputs a csv should inherit from this class.
34
+ class CSVProcessor < Processor
35
+
36
+ def write_file(&block)
37
+ FasterCSV.open(filename_for_output, "a") do |csv|
38
+ yield csv
39
+ end
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,33 @@
1
+ module Basilisk
2
+ # Stores page errors.
3
+ class ErrorProcessor < Basilisk::CSVProcessor
4
+
5
+ def initialize(search_name)
6
+ super
7
+ save_header_row
8
+ end
9
+
10
+ def process_page(page, page_hash)
11
+ write_row(page, page_hash) if page.code != 200 && !page.redirect?
12
+ end
13
+
14
+ private
15
+
16
+ def filename_for_output
17
+ File.join @output_folder, @search_name + "-errors.csv"
18
+ end
19
+
20
+ def save_header_row
21
+ write_file do |csv|
22
+ csv << ["URL", "Error"]
23
+ end
24
+ end
25
+
26
+ def write_row(page, page_hash)
27
+ write_file do |file|
28
+ file << [page.url, page.code]
29
+ end
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,93 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+
4
+ module Basilisk
5
+ # Generates a report for broken images and images missing alt tags.
6
+ class ImageProcessor < Basilisk::CSVProcessor
7
+
8
+ def initialize(search_name)
9
+ super
10
+ save_header_row
11
+ @image_url_cache = []
12
+ end
13
+
14
+ def process_page(page, page_hash)
15
+ return unless page.doc
16
+ begin
17
+ page.doc.css('img').each do |image|
18
+ begin
19
+ image_src = image['src']
20
+ absolute_image_url = image_url(page, image_src)
21
+ next if @image_url_cache.include?(absolute_image_url)
22
+ @image_url_cache << absolute_image_url
23
+
24
+ check_for_broken_image(page, absolute_image_url)
25
+ check_for_missing_alt_tag(page, image, absolute_image_url)
26
+
27
+ rescue BasiliskImageError => e
28
+ write_row(page, image['src'], e.message)
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def check_for_broken_image(page, absolute_image_src)
37
+ http_status = image_http_status(absolute_image_src)
38
+ if http_status != "200"
39
+ write_row(page, absolute_image_src, "Image broken (#{http_status})")
40
+ end
41
+ end
42
+
43
+ def check_for_missing_alt_tag(page, image, absolute_image_src)
44
+ return unless image['alt']
45
+ image_alt = image['alt'].strip
46
+ if image_alt == ""
47
+ write_row(page, absolute_image_src, "Alt tag missing")
48
+ end
49
+ end
50
+
51
+ # Perform a head request on the image so we won't have to download it.
52
+ def image_http_status(uri)
53
+ puts "Requesting Image: #{uri}"
54
+ http = Net::HTTP.new(uri.host, uri.port)
55
+ response = http.head(uri.path)
56
+ return response.code
57
+ rescue
58
+ return "500"
59
+ end
60
+
61
+ # Construct the image's absolute url, if necessary.
62
+ def image_url(page, image_src)
63
+ image_uri = URI.parse(image_src)
64
+ if image_uri.absolute?
65
+ image_uri
66
+ elsif image_uri.relative?
67
+ root = URI::Generic.build :scheme => page.url.scheme, :host => page.url.host
68
+ URI.join root.to_s, image_uri.to_s
69
+ else
70
+ raise BasiliskImageError, "Could not parse image src."
71
+ end
72
+ rescue
73
+ raise BasiliskImageError, "Could not parse image src."
74
+ end
75
+
76
+ def filename_for_output
77
+ File.join @output_folder, @search_name + "-images.csv"
78
+ end
79
+
80
+ def save_header_row
81
+ write_file do |csv|
82
+ csv << ["Page URL", "Image URL", "Message"]
83
+ end
84
+ end
85
+
86
+ def write_row(page, image_url, message)
87
+ write_file do |file|
88
+ file << [page.url, image_url, message]
89
+ end
90
+ end
91
+
92
+ end
93
+ end
@@ -0,0 +1,74 @@
1
+ module Basilisk
2
+ # Write a csv containing important seo fields: title, h1, h2, description, and keywords.
3
+ class SeoProcessor < Basilisk::CSVProcessor
4
+ HTMLTags = ['title', 'h1', 'h2']
5
+ MetaTags = ['description', 'keywords']
6
+
7
+ def initialize(search_name)
8
+ super
9
+ save_header_row
10
+ end
11
+
12
+ def process_page(page, page_hash)
13
+ @tags = Hash.new("")
14
+ HTMLTags.each { |tag_name| check_html_element(tag_name, page.doc) }
15
+ MetaTags.each { |meta_name| check_meta_element(meta_name, page.doc) }
16
+ save_tag_row(page)
17
+ end
18
+
19
+ private
20
+
21
+ # Take a tag name (:h1, :title) and an hpricot doc.
22
+ # Stores the number of occurrences of the element
23
+ # along with its content.
24
+ def check_html_element(name, doc)
25
+ elements = doc.css(name)
26
+ process_tag(name, elements, "text")
27
+ end
28
+
29
+ # Take a meta name (:description, keywords) and an hpricot doc.
30
+ # Stores the number of occurrences of the element
31
+ # along with its content.
32
+ def check_meta_element(name, doc)
33
+ elements = doc.css("meta[@name='#{name}']")
34
+ process_tag(name, elements, ["[]", "content"])
35
+ end
36
+
37
+ # Code that processes an array of nokogiri elements
38
+ # by formatting them and saving to the @tags hash.
39
+ def process_tag(name, elements, content_method)
40
+ (@tags[name] += "MISSING") && return if elements.empty?
41
+ @tags[name] += "(#{elements.size}): " if elements.size > 1
42
+ elements.each do |e|
43
+ content = e.send(*content_method)
44
+ text_to_add = content == "" ? "BLANK" : "#{content}"
45
+ text_to_add = add_parentheses(text_to_add) if elements.size > 1
46
+ @tags[name] += text_to_add
47
+ end
48
+ @tags[name].strip!
49
+ end
50
+
51
+ def save_header_row
52
+ write_file do |csv|
53
+ csv << ["URL", HTMLTags, MetaTags].flatten
54
+ end
55
+ end
56
+
57
+ def save_tag_row(page)
58
+ row = []
59
+ row << page.url.to_s
60
+ [HTMLTags, MetaTags].flatten.each do |tag_key|
61
+ row << @tags[tag_key]
62
+ end
63
+
64
+ write_file do |csv|
65
+ csv << row
66
+ end
67
+ end
68
+
69
+ def add_parentheses(text)
70
+ "(#{text}) "
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,52 @@
1
+ module Basilisk
2
+ # Build a google-compatible xml sitemap for the crawled site.
3
+ class SitemapProcessor < Basilisk::Processor
4
+
5
+ def initialize(search_name)
6
+ super
7
+ @date = Time.now.strftime("%Y-%m-%d")
8
+ save_header
9
+ end
10
+
11
+ def process_page(page, page_hash)
12
+ write_url(page)
13
+ end
14
+
15
+ def close_file
16
+ write_file do |file|
17
+ file.write "</urlset>\n"
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def filename_for_output
24
+ File.join @output_folder, @search_name + "-sitemap.xml"
25
+ end
26
+
27
+ def save_header
28
+ write_file do |file|
29
+ file.write "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
30
+ file.write "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n"
31
+ end
32
+ end
33
+
34
+ def write_url(page)
35
+ write_file do |file|
36
+ file.write "<url>\n"
37
+ file.write " <loc>#{page.url}</loc>\n"
38
+ file.write " <lastmod>#{@date}></lastmod>\n"
39
+ file.write " <changefreq>monthly</changefreq>\n"
40
+ file.write " <priority>#{priority(page.url)}</priority>\n"
41
+ file.write "</url>\n"
42
+ end
43
+ end
44
+
45
+ # Assigns a default priority of 1.0 to 0.1 based on page depth.
46
+ def priority(url)
47
+ level = 1.0 - ((url.to_s.split("/").size - 3) / 10.0)
48
+ level < 0.1 ? 0.1 : level
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,49 @@
1
+ module Basilisk
2
+ # Stores page errors.
3
+ class TermsProcessor < Basilisk::CSVProcessor
4
+
5
+ def initialize(search_name, regex_terms, css_terms)
6
+ super(search_name)
7
+ @regex_terms = regex_terms
8
+ @css_terms = css_terms
9
+ save_header_row
10
+ end
11
+
12
+ def process_page(page, page_hash)
13
+ regexes = match_regexes(page.doc)
14
+ css_terms = match_css_terms(page.doc)
15
+ write_row(page, regexes, css_terms) if !regexes.empty? || !css_terms.empty?
16
+ end
17
+
18
+ private
19
+
20
+ def filename_for_output
21
+ File.join @output_folder, @search_name + "-terms.csv"
22
+ end
23
+
24
+ def save_header_row
25
+ write_file do |csv|
26
+ csv << ["URL", "Regex Terms", "CSS Terms"]
27
+ end
28
+ end
29
+
30
+ def write_row(page, regexes, css_terms)
31
+ write_file do |csv|
32
+ csv << [page.url, regexes.map {|r| r.source }.join(';'), css_terms.join(';')]
33
+ end
34
+ end
35
+
36
+ def match_regexes(doc)
37
+ @regex_terms.select do |term|
38
+ doc.to_s =~ term
39
+ end
40
+ end
41
+
42
+ def match_css_terms(doc)
43
+ @css_terms.select do |term|
44
+ doc.css(term)
45
+ end
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,54 @@
1
+ module Basilisk
2
+ module Template
3
+ extend self
4
+
5
+ def default(options={})
6
+ yaml = <<-CONFIG
7
+ # This is a basilisk config file.
8
+ # Available processors include the following:
9
+ # seo: generates a csv with the following columns: url, title, description, keywords, h1s, h2s
10
+ # sitemap: generates an xml sitemap
11
+ # image: generates a list of broken images and images lacking an alt tag.
12
+ # error: generates a csv of urls returning html response codes other than success and redirect.
13
+ #
14
+ # Separate processors with a semi-colon:
15
+ # processors: "seo; sitemap; error"
16
+ # Separate regex terms with a semi-colon:
17
+ # regex_search_terms: "error\w+;invalid\w+"
18
+ # Separate css terms with a semi-colon:
19
+ # css_search_terms: "#error_message; .error"
20
+ # Regex patterns separated with semi-colons
21
+ # skip_url_patterns: "[0-9]+;some silly expression\s+;"
22
+ # Optionally specify a user agent:
23
+ # user_agent: "sneaky-crawler"
24
+
25
+ basilisk:
26
+ name: "#{options[:name]}"
27
+ url: "#{options[:url]}"
28
+ processors: "seo; sitemap; error"
29
+ regex_search_terms: ""
30
+ css_search_terms: ""
31
+ skip_url_patterns: ""
32
+ user_agent: "anemone-basilisk"
33
+ CONFIG
34
+ end
35
+
36
+ def output_instructions(search_name, filename, foldername)
37
+ instruction = <<-INSTRUCTIONS
38
+
39
+ You just created the following search: #{search_name}
40
+
41
+ If you'd like to change the default options, edit the file #{filename}
42
+
43
+ To run your search:
44
+
45
+ basil run #{search_name}
46
+
47
+ Your search results will appear in #{foldername}
48
+
49
+ INSTRUCTIONS
50
+ puts instruction
51
+ end
52
+
53
+ end
54
+ end
File without changes
File without changes
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: basilisk
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.5
5
+ platform: ruby
6
+ authors:
7
+ - Kyle Banker
8
+ - Alexander Interactive, Inc.
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-08-24 00:00:00 -04:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: anemone
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: 0.1.2
25
+ version:
26
+ - !ruby/object:Gem::Dependency
27
+ name: nokogiri
28
+ type: :runtime
29
+ version_requirement:
30
+ version_requirements: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: 1.3.0
35
+ version:
36
+ - !ruby/object:Gem::Dependency
37
+ name: fastercsv
38
+ type: :runtime
39
+ version_requirement:
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: 1.5.0
45
+ version:
46
+ description:
47
+ email: knb@alexanderinteractive.com
48
+ executables:
49
+ - basil
50
+ extensions: []
51
+
52
+ extra_rdoc_files:
53
+ - README.rdoc
54
+ files:
55
+ - HISTORY
56
+ - LICENSE
57
+ - README.rdoc
58
+ - bin/basil
59
+ - lib/basilisk.rb
60
+ - lib/basilisk/core.rb
61
+ - lib/basilisk/parser.rb
62
+ - lib/basilisk/processor.rb
63
+ - lib/basilisk/template.rb
64
+ - lib/basilisk/processors/error_processor.rb
65
+ - lib/basilisk/processors/seo_processor.rb
66
+ - lib/basilisk/processors/terms_processor.rb
67
+ - lib/basilisk/processors/sitemap_processor.rb
68
+ - lib/basilisk/processors/image_processor.rb
69
+ - test/basilisk_test.rb
70
+ - test/test_helper.rb
71
+ has_rdoc: true
72
+ homepage: http://github.com/aiaio/basilisk
73
+ post_install_message:
74
+ rdoc_options:
75
+ - --main
76
+ - README.rdoc
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: "0"
84
+ version:
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: "0"
90
+ version:
91
+ requirements: []
92
+
93
+ rubyforge_project: basilisk
94
+ rubygems_version: 1.3.1
95
+ signing_key:
96
+ specification_version: 2
97
+ summary: A command-line front-end for the anemone web-spider. Generates reports for seo, http errors and an xml sitemap. Extensible page handler.
98
+ test_files:
99
+ - test/basilisk_test.rb
100
+ - test/test_helper.rb