RubyGems - ronin-web - Versions diffs - 0.1.0 - Mend

ronin-web 0.1.0

Files changed (26) hide show

data/History.txt +15 -0
data/Manifest.txt +25 -0
data/README.txt +74 -0
data/Rakefile +21 -0
data/lib/ronin/sessions/web.rb +80 -0
data/lib/ronin/web/extensions/nokogiri/xml/attr.rb +13 -0
data/lib/ronin/web/extensions/nokogiri/xml/document.rb +18 -0
data/lib/ronin/web/extensions/nokogiri/xml/element.rb +23 -0
data/lib/ronin/web/extensions/nokogiri/xml/node.rb +47 -0
data/lib/ronin/web/extensions/nokogiri/xml/text.rb +13 -0
data/lib/ronin/web/extensions/nokogiri/xml.rb +5 -0
data/lib/ronin/web/extensions/nokogiri.rb +24 -0
data/lib/ronin/web/extensions.rb +24 -0
data/lib/ronin/web/server.rb +511 -0
data/lib/ronin/web/spider.rb +89 -0
data/lib/ronin/web/version.rb +29 -0
data/lib/ronin/web/web.rb +305 -0
data/lib/ronin/web.rb +25 -0
data/spec/spec_helper.rb +7 -0
data/spec/web/extensions/nokogiri_spec.rb +38 -0
data/spec/web/helpers/root/index.html +1 -0
data/spec/web/helpers/root/test.txt +1 -0
data/spec/web/helpers/server.rb +2 -0
data/spec/web/server_spec.rb +142 -0
data/tasks/spec.rb +9 -0
metadata +141 -0

data/lib/ronin/web/web.rb ADDED Viewed

@@ -0,0 +1,305 @@
+#
+#--
+# Ronin Web - A Ruby library for Ronin that provides support for web
+# scraping and spidering functionality.
+#
+# Copyright (c) 2006-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+#
+require 'ronin/network/http'
+require 'uri/http'
+require 'nokogiri'
+require 'mechanize'
+require 'open-uri'
+module Ronin
+  module Web
+    #
+    # Returns a Nokogiri::HTML::Document object for the specified _body_
+    # of html.
+    #
+    def Web.html(body)
+      Nokogiri::HTML(body)
+    end
+    #
+    # Returns a Nokogiri::XML::Document object for the specified _body_
+    # of xml.
+    #
+    def Web.xml(body)
+      Nokogiri::XML(body)
+    end
+    #
+    # Returns the default Ronin Web proxy port.
+    #
+    def Web.default_proxy_port
+      Network::HTTP.default_proxy_port
+    end
+    #
+    # Sets the default Ronin Web proxy port to the specified _port_.
+    #
+    def Web.default_proxy_port=(port)
+      Network::HTTP.default_proxy_port = port
+    end
+    #
+    # Returns the +Hash+ of the Ronin Web proxy information.
+    #
+    def Web.proxy
+      Network::HTTP.proxy
+    end
+    #
+    # Resets the Web proxy settings.
+    #
+    def Web.disable_proxy
+      Network::HTTP.disable_proxy
+    end
+    #
+    # Creates a HTTP URI based from the given _proxy_info_ hash. The
+    # _proxy_info_ hash defaults to Web.proxy, if not given.
+    #
+    def Web.proxy_url(proxy_info=Web.proxy)
+      if Web.proxy[:host]
+        userinfo = nil
+        if (Web.proxy[:user] || Web.proxy[:password])
+          userinfo = "#{Web.proxy[:user]}:#{Web.proxy[:password]}"
+        end
+        return URI::HTTP.build(
+          :host => Web.proxy[:host],
+          :port => Web.proxy[:port],
+          :userinfo => userinfo,
+          :path => '/'
+        )
+      end
+    end
+    #
+    # Returns the supported Web User-Agent Aliases.
+    #
+    def Web.user_agent_aliases
+      WWW::Mechanize::AGENT_ALIASES
+    end
+    #
+    # Returns the Ronin Web User-Agent
+    #
+    def Web.user_agent
+      Network::HTTP.user_agent
+    end
+    #
+    # Sets the Ronin Web User-Agent to the specified _new_agent_.
+    #
+    def Web.user_agent=(new_agent)
+      Network::HTTP.user_agent = new_agent
+    end
+    #
+    # Sets the Ronin Web User-Agent to the specified user agent alias
+    # _name_.
+    #
+    def Web.user_agent_alias=(name)
+      Network::HTTP.user_agent = Web.user_agent_aliases[name.to_s]
+    end
+    #
+    # Opens the _url_ with the given _options_. The contents of the _url_
+    # will be returned.
+    #
+    # _options_ may contain the following keys:
+    # <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
+    # <tt>:user_agent</tt>:: The User-Agent string to use.
+    # <tt>:proxy</tt>:: A +Hash+ of the proxy information to use.
+    # <tt>:user</tt>:: The HTTP Basic Authentication user name.
+    # <tt>:password</tt>:: The HTTP Basic Authentication password.
+    # <tt>:content_length_proc</tt>:: A callback which will be passed the
+    #                                 content-length of the HTTP response.
+    # <tt>:progress_proc</tt>:: A callback which will be passed the size
+    #                           of each fragment, once received from the
+    #                           server.
+    #
+    #   Web.open('http://www.hackety.org/')
+    #
+    #   Web.open('http://tenderlovemaking.com/',
+    #     :user_agent_alias => 'Linux Mozilla')
+    #
+    #   Web.open('http://www.wired.com/', :user_agent => 'the future')
+    #
+    def Web.open(url,options={})
+      user_agent_alias = options.delete(:user_agent_alias)
+      proxy = (options.delete(:proxy) || Web.proxy)
+      user = options.delete(:user)
+      password = options.delete(:password)
+      content_length_proc = options.delete(:content_length_proc)
+      progress_proc = options.delete(:progress_proc)
+      headers = Network::HTTP.headers(options)
+      if user_agent_alias
+        headers['User-Agent'] = Web.user_agent_aliases[user_agent_alias]
+      end
+      if proxy[:host]
+        headers[:proxy] = Web.proxy_url(proxy)
+      end
+      if user
+        headers[:http_basic_authentication] = [user, password]
+      end
+      if content_length_proc
+        headers[:content_length_proc] = content_length_proc
+      end
+      if progress_proc
+        headers[:progress_proc] = progress_proc
+      end
+      return Kernel.open(url,headers)
+    end
+    #
+    # Creates a new Mechanize agent with the given _options_.
+    #
+    # _options_ may contain the following keys:
+    # <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
+    # <tt>:user_agent</tt>:: The User-Agent string to use.
+    # <tt>:proxy</tt>:: A +Hash+ of the proxy information to use.
+    #
+    #   Web.agent
+    #   Web.agent(:user_agent_alias => 'Linux Mozilla')
+    #   Web.agent(:user_agent => 'wooden pants')
+    #
+    def Web.agent(options={},&block)
+      agent = WWW::Mechanize.new
+      if options[:user_agent_alias]
+        agent.user_agent_alias = options[:user_agent_alias]
+      elsif options[:user_agent]
+        agent.user_agent = options[:user_agent]
+      elsif Web.user_agent
+        agent.user_agent = Web.user_agent
+      end
+      proxy = (options[:proxy] || Web.proxy)
+      if proxy[:host]
+        agent.set_proxy(proxy[:host],proxy[:port],proxy[:user],proxy[:password])
+      end
+      block.call(agent) if block
+      return agent
+    end
+    #
+    # Gets the specified _url_ with the given _options_. If a _block_ is
+    # given, it will be passed the retrieved page.
+    #
+    # _options_ may contain the following keys:
+    # <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
+    # <tt>:user_agent</tt>:: The User-Agent string to use.
+    # <tt>:proxy</tt>:: A +Hash+ of the proxy information to use.
+    #
+    #   Web.get('http://www.0x000000.com') # => WWW::Mechanize::Page
+    #
+    #   Web.get('http://www.rubyinside.com') do |page|
+    #     page.search('div.post/h2/a').each do |title|
+    #       puts title.inner_text
+    #     end
+    #   end
+    #
+    def Web.get(url,options={},&block)
+      page = Web.agent(options).get(url)
+      block.call(page) if block
+      return page
+    end
+    #
+    # Gets the specified _url_ with the given _options_, returning the body
+    # of the requested page. If a _block_ is given, it will be passed the
+    # body of the retrieved page.
+    #
+    # _options_ may contain the following keys:
+    # <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
+    # <tt>:user_agent</tt>:: The User-Agent string to use.
+    # <tt>:proxy</tt>:: A +Hash+ of the proxy information to use.
+    #
+    #   Web.get_body('http://www.rubyinside.com') # => String
+    #
+    #   Web.get_body('http://www.rubyinside.com') do |body|
+    #     puts body
+    #   end
+    #
+    def Web.get_body(url,options={},&block)
+      body = Web.get(url,options).body
+      block.call(body) if block
+      return body
+    end
+    #
+    # Posts the specified _url_ with the given _options_. If a _block_ is
+    # given, it will be passed the posted page.
+    #
+    # _options_ may contain the following keys:
+    # <tt>:query</tt>:: The query parameters to post to the specified _url_.
+    # <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
+    # <tt>:user_agent</tt>:: The User-Agent string to use.
+    # <tt>:proxy</tt>:: A +Hash+ of the proxy information to use.
+    #
+    #   Web.post('http://www.rubyinside.com') # => WWW::Mechanize::Page
+    #
+    def Web.post(url,options={},&block)
+      query = (options[:query] || {})
+      page = Web.agent(options).post(url,query)
+      block.call(page) if block
+      return page
+    end
+    #
+    # Poststhe specified _url_ with the given _options_, returning the body
+    # of the posted page. If a _block_ is given, it will be passed the
+    # body of the posted page.
+    #
+    # _options_ may contain the following keys:
+    # <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
+    # <tt>:user_agent</tt>:: The User-Agent string to use.
+    # <tt>:proxy</tt>:: A +Hash+ of the proxy information to use.
+    #
+    #   Web.post_body('http://www.rubyinside.com') # => String
+    #
+    #   Web.post_body('http://www.rubyinside.com') do |body|
+    #     puts body
+    #   end
+    #
+    def Web.post_body(url,options={},&block)
+      body = Web.post(url,options).body
+      block.call(body) if block
+      return body
+    end
+  end
+end

data/lib/ronin/web.rb ADDED Viewed

@@ -0,0 +1,25 @@
+#
+#--
+# Ronin Web - A Ruby library for Ronin that provides support for web
+# scraping and spidering functionality.
+#
+# Copyright (c) 2006-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+#
+require 'ronin/web/extensions'
+require 'ronin/web/web'

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,7 @@
+require 'rubygems'
+gem 'rspec', '>=1.1.3'
+require 'spec'
+require 'ronin/web/version'
+include Ronin

data/spec/web/extensions/nokogiri_spec.rb ADDED Viewed

@@ -0,0 +1,38 @@
+require 'ronin/web/extensions/nokogiri'
+require 'spec_helper'
+require 'nokogiri'
+describe Nokogiri::HTML do
+  before(:all) do
+    @doc = Nokogiri::HTML(%{<html><head><title>test</title></head><body><p><b>This is a test</b> html <i>page</i>.</p></div></body></html>})
+    @edited_doc = Nokogiri::HTML(%{<html><head><title>test</title></head><body><p><b>This is a test</b> html page.</p></div></body></html>})
+  end
+  it "should be able to test if two elements are similar" do
+    elem1 = @doc.at('b')
+    elem2 = @edited_doc.at('b')
+    elem1.similar?(elem2).should == true
+  end
+  it "should be able to test if two elements are not similar" do
+    elem1 = @doc.at('p').children.last
+    elem2 = @edited_doc.at('b')
+    elem1.similar?(elem2).should == false
+  end
+  it "should be able to traverse over every text node" do
+    text = []
+    @doc.traverse_text { |node| text << node.content }
+    text.should == ['test', 'This is a test', ' html ', 'page', '.']
+  end
+  it "should provide a count of all sub-children" do
+    @doc.total_children.should == 12
+  end
+end

data/spec/web/helpers/root/index.html ADDED Viewed

	@@ -0,0 +1 @@
1	+ Index of files.

data/spec/web/helpers/root/test.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ This is a test.

data/spec/web/helpers/server.rb ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # Web Server root directory
2	+ WEB_SERVER_ROOT = File.expand_path(File.join(File.dirname(__FILE__),'root'))

data/spec/web/server_spec.rb ADDED Viewed

@@ -0,0 +1,142 @@
+require 'ronin/web/server'
+require 'spec_helper'
+require 'web/helpers/server'
+describe Web::Server do
+  before(:all) do
+    @server = Web::Server.new do
+      default do |env|
+        response('This is default.')
+      end
+      bind('/test/bind.xml') do |env|
+        response('<secret/>', :content_type => 'text/xml')
+      end
+      paths_like(/path_patterns\/secret\./) do |env|
+        response('No secrets here.')
+      end
+      map('/test/map') do |env|
+        response('mapped')
+      end
+      file('/test/file.txt',File.join(WEB_SERVER_ROOT,'test.txt'))
+      mount('/test/mount/',WEB_SERVER_ROOT)
+    end
+    @virtual_host = Web::Server.new do
+      bind('/test/virtual_host.xml') do |env|
+        response('<virtual/>', :content_type => 'text/xml')
+      end
+    end
+    @server.host('virtual.host.com') do
+      bind('/test/virtual_host.xml') do |env|
+        response('<virtual/>', :content_type => 'text/xml')
+      end
+    end
+    @server.hosts_like(/^virtual[0-9]\./) do
+      bind('/test/virtual_host_patterns.xml') do |env|
+        response('<virtual-patterns/>', :content_type => 'text/xml')
+      end
+    end
+  end
+  it "should have a default host to listen on" do
+    Web::Server.default_host.should_not be_nil
+  end
+  it "should have a default port to listen on" do
+    Web::Server.default_port.should_not be_nil
+  end
+  it "should have built-in content types" do
+    Web::Server.content_types.should_not be_empty
+  end
+  it "should map file extensions to content-types" do
+    @server.content_type('html').should == 'text/html'
+  end
+  it "should have a default content-type for unknown files" do
+    @server.content_type('lol').should == 'application/x-unknown-content-type'
+  end
+  it "should find the index file for a directory" do
+    dir = WEB_SERVER_ROOT
+    @server.index_of(dir).should == File.join(dir,'index.html')
+  end
+  it "should have a default response for un-matched paths" do
+    path = '/test/default'
+    @server.route_path(path).body.should == ['This is default.']
+  end
+  it "should bind a path to a certain response" do
+    path = '/test/bind.xml'
+    @server.route_path(path).body.should == ['<secret/>']
+  end
+  it "should match paths with patterns" do
+    path = '/test/path_patterns/secret.pdf'
+    @server.route_path(path).body.should == ['No secrets here.']
+  end
+  it "should match paths to sub-directories" do
+    path = '/test/map/impossible.html'
+    @server.route_path(path).body.should == ['mapped']
+  end
+  it "should return a response for a file" do
+    path = '/test/file.txt'
+    @server.route_path(path).body.should == ["This is a test.\n"]
+  end
+  it "should return files from mounted directories" do
+    path = '/test/mount/test.txt'
+    @server.route_path(path).body.should == ["This is a test.\n"]
+  end
+  it "should return the index file for a mounted directory" do
+    path = '/test/mount/'
+    @server.route_path(path).body.should == ["Index of files.\n"]
+  end
+  it "should match virtual hosts" do
+    url = 'http://virtual.host.com/test/virtual_host.xml'
+    @server.route(url).body.should == ['<virtual/>']
+  end
+  it "should match virtual hosts with patterns" do
+    url = 'http://virtual0.host.com/test/virtual_host_patterns.xml'
+    @server.route(url).body.should == ['<virtual-patterns/>']
+  end
+  it "should provide access to servers via their host-names" do
+    virtual_host = @server.virtual_host('virtual.host.com')
+    url = 'http://virtual.host.com/test/virtual_host.xml'
+    virtual_host.route(url).body.should == ['<virtual/>']
+  end
+  it "should provide access to servers via their host-names that match virtual host patterns" do
+    virtual_host = @server.virtual_host('virtual1.host.com')
+    url = 'http://virtual0.host.com/test/virtual_host_patterns.xml'
+    virtual_host.route(url).body.should == ['<virtual-patterns/>']
+  end
+end

data/tasks/spec.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require 'spec/rake/spectask'
+desc "Run all specifications"
+Spec::Rake::SpecTask.new(:spec) do |t|
+  t.libs += ['lib', 'spec']
+  t.spec_opts = ['--colour', '--format', 'specdoc']
+end
+task :default => :spec

metadata ADDED Viewed

@@ -0,0 +1,141 @@
+--- !ruby/object:Gem::Specification
+name: ronin-web
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Postmodern
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2009-01-22 00:00:00 -08:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.1.0
+    version:
+- !ruby/object:Gem::Dependency
+  name: mechanize
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.9.0
+    version:
+- !ruby/object:Gem::Dependency
+  name: spidr
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.1.3
+    version:
+- !ruby/object:Gem::Dependency
+  name: rack
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.9.1
+    version:
+- !ruby/object:Gem::Dependency
+  name: ronin
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.1.4
+    version:
+- !ruby/object:Gem::Dependency
+  name: hoe
+  type: :development
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.8.3
+    version:
+description: Ronin Web is a Ruby library for Ronin that provides support for web scraping and spidering functionality.  Ronin is a Ruby platform designed for information security and data exploration tasks. Ronin allows for the rapid development and distribution of code over many of the common Source-Code-Management (SCM) systems.
+email:
+- postmodern.mod3@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files:
+- History.txt
+- Manifest.txt
+- README.txt
+- spec/web/helpers/root/test.txt
+files:
+- History.txt
+- Manifest.txt
+- README.txt
+- Rakefile
+- lib/ronin/sessions/web.rb
+- lib/ronin/web.rb
+- lib/ronin/web/extensions.rb
+- lib/ronin/web/extensions/nokogiri.rb
+- lib/ronin/web/extensions/nokogiri/xml.rb
+- lib/ronin/web/extensions/nokogiri/xml/node.rb
+- lib/ronin/web/extensions/nokogiri/xml/text.rb
+- lib/ronin/web/extensions/nokogiri/xml/attr.rb
+- lib/ronin/web/extensions/nokogiri/xml/element.rb
+- lib/ronin/web/extensions/nokogiri/xml/document.rb
+- lib/ronin/web/server.rb
+- lib/ronin/web/spider.rb
+- lib/ronin/web/web.rb
+- lib/ronin/web/version.rb
+- tasks/spec.rb
+- spec/spec_helper.rb
+- spec/web/helpers/server.rb
+- spec/web/helpers/root/index.html
+- spec/web/helpers/root/test.txt
+- spec/web/extensions/nokogiri_spec.rb
+- spec/web/server_spec.rb
+has_rdoc: true
+homepage: http://ronin.rubyforge.org/web/
+post_install_message:
+rdoc_options:
+- --main
+- README.txt
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project: ronin
+rubygems_version: 1.3.1
+signing_key:
+specification_version: 2
+summary: Ronin Web is a Ruby library for Ronin that provides support for web scraping and spidering functionality
+test_files: []