RubyGems - scrappy - Versions diffs - 0.2.1 → 0.3.0 - Mend

scrappy 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

data/History.txt +5 -0
data/Manifest +1 -3
data/README.rdoc +11 -50
data/bin/scrappy +9 -19
data/lib/scrappy.rb +2 -1
data/lib/scrappy/agent/agent.rb +5 -14
data/lib/scrappy/agent/blind_agent.rb +2 -5
data/lib/scrappy/agent/extractor.rb +2 -2
data/lib/scrappy/agent/formats.rb +10 -2
data/lib/scrappy/selectors/root.rb +1 -1
data/lib/scrappy/server/errors.rb +13 -0
data/lib/scrappy/server/server.rb +3 -1
data/public/stylesheets/application.css +1 -1
data/scrappy.gemspec +6 -6
data/views/kb.haml +1 -1
metadata +30 -11
data/lib/scrappy/agent/visual_agent.rb +0 -101
data/lib/scrappy/shell.rb +0 -87
data/lib/scrappy/webkit/webkit.rb +0 -18

data/History.txt CHANGED

@@ -1,3 +1,8 @@
+=== 0.3.0 2011-03-11
+* Removed unused features: shell, browser, scripting
+* Correction in RootSelector
 === 0.2.1 2011-03-11
 * Added a web admin interface mode

data/Manifest CHANGED

@@ -12,7 +12,6 @@ lib/scrappy/agent/dumper.rb
 lib/scrappy/agent/extractor.rb
 lib/scrappy/agent/formats.rb
 lib/scrappy/agent/map_reduce.rb
-lib/scrappy/agent/visual_agent.rb
 lib/scrappy/repository.rb
 lib/scrappy/selectors/base_uri.rb
 lib/scrappy/selectors/css.rb
@@ -24,11 +23,10 @@ lib/scrappy/selectors/uri.rb
 lib/scrappy/selectors/uri_pattern.rb
 lib/scrappy/selectors/xpath.rb
 lib/scrappy/server/admin.rb
+lib/scrappy/server/errors.rb
 lib/scrappy/server/helpers.rb
 lib/scrappy/server/server.rb
-lib/scrappy/shell.rb
 lib/scrappy/support.rb
-lib/scrappy/webkit/webkit.rb
 public/favicon.ico
 public/images/logo.png
 public/images/logo_tiny.png

data/README.rdoc CHANGED

@@ -58,62 +58,23 @@ scrappy offers many different interfaces to get RDF data from a web page:
 * Command-line interface:
-    $ scrappy -g elmundo.es
-* Interactive shell:
-    $ scrappy -i
-    Launching scrappy Shell...
-    $ get elmundo.es
-    dc: http://purl.org/dc/elements/1.1/
-    owl: http://www.w3.org/2002/07/owl#
-    rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
-    sc: http://lab.gsi.dit.upm.es/scraping.rdf#
-    rdfs: http://www.w3.org/2000/01/rdf-schema#
-    http://www.elmundo.es/elmundo/2010/10/05/gentes/1286310993.html:
-      dc:description: "Las vacaciones del n\u00famero uno"
-      dc:title:
-        "Una suite de 5.000 euros para Nadal en Tailandia"
-        "Una suite de 5.000 euros para Nadal"
-      rdf:type: http://rdfs.org/sioc/ns#Post
-      dc:creator: "Fernando Domingo | John Bali (V\u00eddeo)"
-      http://www.daml.org/experiment/ontology/location-ont#location:
-        *:
-          rdf:label: "Bangkok"
-          rdf:type: http://www.daml.org/experiment/ontology/location-ont#Location
-      dc:date: "mi\u00e9rcoles 06/10/2010"
-    ...
-    http://www.elmundo.es$
+    $ scrappy -g example.com
-* Web Service interface:
+* Web Admin interface:
-    $ scrappy -s
-    Launching scrappy Web Server...
-    ** Starting Mongrel on localhost:3434
+    $ scrappy -a
+    Launching Scrappy Web Admin (browse http://localhost:3434)...
+    == Sinatra/1.1.3 has taken the stage on 3434 for production with backup from Thin
   Then point your browser to http://localhost:3434 for additional directions.
-* Web Proxy interface:
-    $ scrappy -S
-    Launching scrappy Web Proxy...
-    ** Starting Mongrel on localhost:3434
-  Then configure your browser's HTTP proxy to http://localhost:3434 and browse http://www.elmundo.es
-* Scripting (experimental):
-  You can create scripts that retrieve many web pages and run them using scrappy.
-    #!/usr/bin/scrappy
-    get elmundo.es
-    get google.com/search?q=testing
+* Web Service interface:
-  Then you can run your script from the command line just as any other bash script.
+    $ scrappy -s
+    Launching Scrappy Web Server...
+    == Sinatra/1.1.3 has taken the stage on 3434 for production with backup from Thin
-  We plan to enable complex operations such as posting forms and definining a useful language
-  with variables to enable flow control in order to build web service mashups.
+  Then use the service in the same way as the Web Admin but for read-only operations.
 * Ruby interface:
@@ -129,7 +90,7 @@ scrappy offers many different interfaces to get RDF data from a web page:
     Scrappy::Agent::Options.kb = kb
     # Create an agent
-    agent = Scrappy::Agent.create
+    agent = Scrappy::Agent.new
     # Get RDF output
     output = agent.request :method=>:get, :uri=>'http://www.elmundo.es'

data/bin/scrappy CHANGED

@@ -38,17 +38,14 @@ module Scrappy
         opts.on('-p URI', '--post URI')         { |uri| Options.uri = uri; Options.http_method=:post }
         opts.on('-D', '--dump')                 { Agent::Options.dump = true; Agent::Options.format = :rdf }
         opts.on('-u', '--debug')                { Agent::Options.debug = true }
-        opts.on('-i', '--interactive')          { Options.shell = true; Agent::Options.format_header = false }
         opts.on('-s', '--server [BASE_URI]')    { |uri| Options.server = true; Options.base_uri = uri }
         opts.on('-a', '--admin [BASE_URI]')     { |uri| Options.admin = true; Options.base_uri = uri }
         opts.on('-P P', '--port P')             { |p| Options.port = p }
         opts.on('-c C', '--concurrence C')      { |c| Agent::Options.workers = c.to_i }
         opts.on('-d D', '--delay D')            { |d| Agent::Options.delay = d; Agent::Options.workers = 1 }
         opts.on('-l L', '--levels L')           { |l| Agent::Options.depth = l.to_i }
-        opts.on('-V', '--visual')               { Agent::Options.agent = :visual }
         opts.on('-r', '--reference')            { Agent::Options.referenceable = :minimum }
         opts.on('-R', '--reference-all')        { Agent::Options.referenceable = :dump }
-        opts.on('-w', '--window')               { Agent::Options.window = true }
         opts.on('-f FORMAT', '--format FORMAT') { |f| Agent::Options.format = f.to_sym }
         opts.on('-t TIME', '--time TIME')       { |t| Agent::Options.time = t.to_i*60 } # converts minutes to seconds
         opts.on('-o URIs', '--observe URIs')    { |uris| Options.observe = uris.split(',') }
@@ -60,11 +57,11 @@ module Scrappy
       onload
       if Options.uri
         Options.quiet = true
-        puts Agent.create.proxy(:http_method=>:get, :uri=>Options.uri).output
+        puts Agent.new.proxy(:http_method=>:get, :uri=>Options.uri).output
       elsif Options.observe
-        Agent.create.observe(Options.observe)
+        Agent.new.observe(Options.observe)
       elsif Options.admin
-        puts "Launching Scrappy Admin Web Server (browse http://localhost:#{Options.port})..."
+        puts "Launching Scrappy Web Admin (browse http://localhost:#{Options.port})..."
         require 'scrappy/server/server'
         Thin::Logging.silent = true
         Scrappy::Server.register Scrappy::Admin
@@ -76,14 +73,10 @@ module Scrappy
         Thin::Logging.silent = true
         Scrappy::Server.run! :host => 'localhost', :port => Options.port, :environment => :production,
                              :base_uri => Options.base_uri
-      elsif Options.shell
-        puts "Launching Scrappy Shell..."
-        require 'scrappy/shell'
-        Shell.new.run
       else
-        require 'scrappy/shell'
-        Options.quiet = true
-        Shell.new(@file).run
+        output_version
+        puts 'To get help use: scrappy -h'
+        exit 0
       end
       Scrappy::App.quit
     end
@@ -106,7 +99,7 @@ Usage
 Options
   -h, --help               Displays help message
   -v, --version            Display the version, then exit
-  -f, --format             Picks output format (json, ejson, rdfxml, ntriples, png)
+  -f, --format             Picks output format (json, ejson, rdf, ntriples, png)
   -g, --get URL            Gets requested URL
   -p, --post URL           Posts requested URL
   -c, --concurrence VALUE  Sets number of concurrent connections for crawling (default is 10)
@@ -114,16 +107,13 @@ Options
   -d, --delay VALUE        Sets delay (in ms) between requests (default is 0)
   -D, --dump               Dumps RDF data to disk
   -u, --debug              Shows debugging traces
-  -i, --interactive        Runs interactive shell
   -o, --observe URLs       Observes the specified URLs storing their data into the repository
   -s, --server [ROOT]      Runs web server (optionally specify server's root url)
-  -S, --proxy-server       Runs web proxy
+  -a, --admin [ROOT]       Runs admin web server (optionally specify server's root url)
   -P, --port PORT          Selects port number (default is 3434)
-  -V, --visual             Uses visual agent (slow)
-  -t, --time DAYS          Returns repository data from the last given minutes
+  -t, --time TIME          Returns repository data from the last given minutes
   -r, --reference          Outputs referenceable data
   -R, --reference-all      Outputs all HTML referenceable data
-  -w, --window             Shows browser window (requires -v)
 Authors
   José Ignacio Fernández, Alberto Mardomingo, Jacobo Blasco

data/lib/scrappy.rb CHANGED

@@ -17,12 +17,13 @@ require 'scrappy/agent/map_reduce'
 require 'scrappy/agent/cache'
 require 'scrappy/agent/dumper'
 require 'scrappy/agent/formats'
+require 'scrappy/agent/blind_agent'
 require 'scrappy/agent/agent'
 Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
 module Scrappy
-  VERSION = '0.2.1'
+  VERSION = '0.3.0'
 end
 # Require selectors

data/lib/scrappy/agent/agent.rb CHANGED

@@ -4,8 +4,9 @@ module Scrappy
     include Extractor
     include MapReduce
     include Cached
+    include BlindAgent
-    Options = OpenStruct.new :format=>:yarf, :format_header=>true, :depth=>0, :agent=>:blind, :delay=>0, :workers=>10
+    Options = OpenStruct.new :format=>:yarf, :format_header=>true, :depth=>0, :delay=>0, :workers=>10
     ContentTypes = { :png => 'image/png', :rdfxml => 'application/rdf+xml',
                      :rdf => 'application/rdf+xml' }
@@ -13,17 +14,7 @@ module Scrappy
       @pool ||= {}
     end
     def self.[] id
-      pool[id] || Agent.create(:id=>id)
-    end
-    def self.create args={}
-      if (args[:agent] || Options.agent) == :visual
-        require 'scrappy/agent/visual_agent'
-        VisualAgent.new args
-      else
-        require 'scrappy/agent/blind_agent'
-        BlindAgent.new args
-      end
+      pool[id] || Agent.new(:id=>id)
     end
     attr_accessor :id, :options, :kb
@@ -160,7 +151,8 @@ module Scrappy
     end
     def clean triples
-      triples.uniq.select { |s,p,o| p!=Node('rdf:type') or ![Node('sc:Index'), Node('sc:Page')].include?(o) }
+      triples.uniq.select { |s,p,o| p!=ID('rdf:type') or ![ID('sc:Index'), ID('sc:Page')].include?(o) }.
+                   select { |s,p,o| p!=Node('rdf:type') or ![Node('sc:Index'), Node('sc:Page')].include?(o) }
     end
     # Do the extraction using RDF repository
@@ -231,7 +223,6 @@ module Scrappy
       puts 'done!' if options.debug
       if self.html_data?
-        add_visual_data! if options.referenceable               # Adds tags including visual information
         triples = extract(self.uri, html, options.referenceable) # Extract data
         Dumper.dump self.uri, clean(triples), options.format if options.dump # Dump results to disk
         triples

data/lib/scrappy/agent/blind_agent.rb CHANGED

@@ -1,7 +1,7 @@
 module Scrappy
-  class BlindAgent < Agent
+  module BlindAgent
     def initialize args={}
-      super
+      super()
       @mechanize = Mechanize.new
       @mechanize.max_history = 20
     end
@@ -36,8 +36,5 @@ module Scrappy
     def html
       @mechanize.current_page.root.to_html :encoding=>'UTF-8'
     end
-    def add_visual_data!
-    end
   end
 end

data/lib/scrappy/agent/extractor.rb CHANGED

@@ -96,7 +96,7 @@ module Scrappy
     end
     def filter selector, doc
-      if !selector.sc::debug.empty? and options.debug
+      if selector.sc::debug.first=="true" and options.debug
         puts '== DEBUG'
         puts '== Selector:'
         puts selector.serialize(:yarf, false)
@@ -109,7 +109,7 @@ module Scrappy
       # Process selector
       results = selector_pool(selector).filter doc
-      if !selector.sc::debug.empty? and options.debug
+      if selector.sc::debug.first=="true" and options.debug
         puts "== No results" if results.empty?
         results.each_with_index do |result, i|
           puts "== Result ##{i}:"

data/lib/scrappy/agent/formats.rb CHANGED

@@ -25,9 +25,17 @@ module Scrappy
         doc.search("p").each {|n| n.replace(Nokogiri::XML::Text.new("#{n.text.strip}\n", n.document)) }
         doc.text.strip
       when Node('sc:Html') then
-        node.to_html
+        if node.respond_to? :to_html
+          node.to_html
+        else
+          node.to_s
+        end
       else
-        node.text
+        if node.respond_to? :text
+          node.text
+        else
+          node.to_s
+        end
       end
     end

data/lib/scrappy/selectors/root.rb CHANGED

@@ -8,7 +8,7 @@ module Sc
         # Select node's attribute if given
         sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content][attribute] } }
       else
-        [ { :uri=>doc[:uri], :content=>doc[:content], :value=>format(doc[:content], sc::format, doc[:uri]) } ]
+        [ { :uri=>doc[:uri], :content=>doc[:content], :value=>format(doc[:value], sc::format, doc[:uri]) } ]
       end
     end
   end

data/lib/scrappy/server/errors.rb ADDED

@@ -0,0 +1,13 @@
+module Scrappy
+  module Errors
+    def self.registered app
+      app.error do
+        "Internal error"
+      end
+      app.not_found do
+        "Resource not found"
+      end
+    end
+  end
+end

data/lib/scrappy/server/server.rb CHANGED

@@ -3,10 +3,12 @@ require 'thin'
 require 'haml'
 require 'scrappy/server/helpers'
 require 'scrappy/server/admin'
+require 'scrappy/server/errors'
 module Scrappy
   class Server < Sinatra::Base
     helpers JavaScriptHelpers
+    register Errors
     enable :sessions
     set    :root,   File.join(File.dirname(__FILE__), '..', '..', '..')
@@ -40,7 +42,7 @@ module Scrappy
       return @agent if @agent
       if session[:agent].nil? || session[:token] != SESSION_TOKEN
         session[:token] = SESSION_TOKEN
-        session[:agent] = Scrappy::Agent.create.id
+        session[:agent] = Scrappy::Agent.new.id
       end
       @agent = Scrappy::Agent[session[:agent]]
     end

data/public/stylesheets/application.css CHANGED

@@ -150,7 +150,7 @@ ul.detail li span {
   display: inline-block;
 }
 ul.detail li span.name {
-  width: 600px;
+  width: 550px;
   overflow-x: hidden;
   font-family: monospace;
   font-size: 12px;

data/scrappy.gemspec CHANGED

@@ -2,7 +2,7 @@
 Gem::Specification.new do |s|
   s.name = %q{scrappy}
-  s.version = "0.2.1"
+  s.version = "0.3.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
   s.authors = ["Jose Ignacio"]
@@ -11,22 +11,22 @@ Gem::Specification.new do |s|
   s.description = %q{RDF web scraper}
   s.email = %q{joseignacio.fernandez@gmail.com}
   s.executables = ["scrappy"]
-  s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/agent/visual_agent.rb", "lib/scrappy/repository.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/shell.rb", "lib/scrappy/support.rb", "lib/scrappy/webkit/webkit.rb"]
-  s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "kb/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/agent/visual_agent.rb", "lib/scrappy/repository.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/shell.rb", "lib/scrappy/support.rb", "lib/scrappy/webkit/webkit.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/scrappy.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/help.haml", "views/home.haml", "views/kb.haml", "views/layout.haml", "scrappy.gemspec"]
+  s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/repository.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb"]
+  s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "kb/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/repository.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/scrappy.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/help.haml", "views/home.haml", "views/kb.haml", "views/layout.haml", "scrappy.gemspec"]
   s.homepage = %q{http://github.com/josei/scrappy}
   s.post_install_message = %q{**(Optional) Remember to install rbwebkitgtk for visual parsing features**}
   s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Scrappy", "--main", "README.rdoc"]
   s.require_paths = ["lib"]
   s.rubyforge_project = %q{scrappy}
-  s.rubygems_version = %q{1.3.6}
+  s.rubygems_version = %q{1.3.7}
   s.summary = %q{Web scraper that allows producing RDF data out of plain web pages}
-  s.test_files = ["test/test_scrappy.rb", "test/test_helper.rb"]
+  s.test_files = ["test/test_helper.rb", "test/test_scrappy.rb"]
   if s.respond_to? :specification_version then
     current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
     s.specification_version = 3
-    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+    if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
       s.add_runtime_dependency(%q<activesupport>, [">= 2.3.5"])
       s.add_runtime_dependency(%q<sinatra>, [">= 1.1.2"])
       s.add_runtime_dependency(%q<thin>, [">= 1.2.7"])

data/views/kb.haml CHANGED

@@ -10,6 +10,6 @@
             -else
               =uri
           -if !uri.include?('*')
-            -[['RDF', :rdf], ['JSON', :ejson], ['YARF', :yarf], ['PNG', :png]].reverse.each do |format, format_code|
+            -[['RDF', :rdf], ['JSON', :ejson], ['YARF', :yarf], ['nTriples', :ntriples], ['PNG', :png]].reverse.each do |format, format_code|
               %span.format
                 %a{:href=>"#{settings.base_uri}/#{format_code}/#{uri}"}=format

metadata CHANGED

@@ -1,12 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: scrappy
 version: !ruby/object:Gem::Version
+  hash: 19
   prerelease: false
   segments:
   - 0
-  - 2
-  - 1
-  version: 0.2.1
+  - 3
+  - 0
+  version: 0.3.0
 platform: ruby
 authors:
 - Jose Ignacio
@@ -21,9 +22,11 @@ dependencies:
   name: activesupport
   prerelease: false
   requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 9
         segments:
         - 2
         - 3
@@ -35,9 +38,11 @@ dependencies:
   name: sinatra
   prerelease: false
   requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 23
         segments:
         - 1
         - 1
@@ -49,9 +54,11 @@ dependencies:
   name: thin
   prerelease: false
   requirement: &id003 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 17
         segments:
         - 1
         - 2
@@ -63,9 +70,11 @@ dependencies:
   name: nokogiri
   prerelease: false
   requirement: &id004 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 5
         segments:
         - 1
         - 4
@@ -77,9 +86,11 @@ dependencies:
   name: mechanize
   prerelease: false
   requirement: &id005 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 23
         segments:
         - 1
         - 0
@@ -91,9 +102,11 @@ dependencies:
   name: lightrdf
   prerelease: false
   requirement: &id006 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 21
         segments:
         - 0
         - 2
@@ -105,9 +118,11 @@ dependencies:
   name: i18n
   prerelease: false
   requirement: &id007 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 11
         segments:
         - 0
         - 4
@@ -119,9 +134,11 @@ dependencies:
   name: rest-client
   prerelease: false
   requirement: &id008 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 13
         segments:
         - 1
         - 6
@@ -133,9 +150,11 @@ dependencies:
   name: haml
   prerelease: false
   requirement: &id009 !ruby/object:Gem::Requirement
+    none: false
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
+        hash: 55
         segments:
         - 3
         - 0
@@ -160,7 +179,6 @@ extra_rdoc_files:
 - lib/scrappy/agent/extractor.rb
 - lib/scrappy/agent/formats.rb
 - lib/scrappy/agent/map_reduce.rb
-- lib/scrappy/agent/visual_agent.rb
 - lib/scrappy/repository.rb
 - lib/scrappy/selectors/base_uri.rb
 - lib/scrappy/selectors/css.rb
@@ -172,11 +190,10 @@ extra_rdoc_files:
 - lib/scrappy/selectors/uri_pattern.rb
 - lib/scrappy/selectors/xpath.rb
 - lib/scrappy/server/admin.rb
+- lib/scrappy/server/errors.rb
 - lib/scrappy/server/helpers.rb
 - lib/scrappy/server/server.rb
-- lib/scrappy/shell.rb
 - lib/scrappy/support.rb
-- lib/scrappy/webkit/webkit.rb
 files:
 - History.txt
 - Manifest
@@ -192,7 +209,6 @@ files:
 - lib/scrappy/agent/extractor.rb
 - lib/scrappy/agent/formats.rb
 - lib/scrappy/agent/map_reduce.rb
-- lib/scrappy/agent/visual_agent.rb
 - lib/scrappy/repository.rb
 - lib/scrappy/selectors/base_uri.rb
 - lib/scrappy/selectors/css.rb
@@ -204,11 +220,10 @@ files:
 - lib/scrappy/selectors/uri_pattern.rb
 - lib/scrappy/selectors/xpath.rb
 - lib/scrappy/server/admin.rb
+- lib/scrappy/server/errors.rb
 - lib/scrappy/server/helpers.rb
 - lib/scrappy/server/server.rb
-- lib/scrappy/shell.rb
 - lib/scrappy/support.rb
-- lib/scrappy/webkit/webkit.rb
 - public/favicon.ico
 - public/images/logo.png
 - public/images/logo_tiny.png
@@ -236,16 +251,20 @@ rdoc_options:
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 3
       segments:
       - 0
       version: "0"
 required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      hash: 11
       segments:
       - 1
       - 2
@@ -253,10 +272,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project: scrappy
-rubygems_version: 1.3.6
+rubygems_version: 1.3.7
 signing_key:
 specification_version: 3
 summary: Web scraper that allows producing RDF data out of plain web pages
 test_files:
-- test/test_scrappy.rb
 - test/test_helper.rb
+- test/test_scrappy.rb

data/lib/scrappy/agent/visual_agent.rb DELETED

@@ -1,101 +0,0 @@
-# Hack to hide annoying gtk debug messages
-old_stderr = $stderr.clone
-$stderr.reopen '/dev/null'
-require 'scrappy/webkit/webkit'
-$stderr = old_stderr
-module Scrappy
-  class VisualAgent < Agent
-    attr_reader :visible
-    def initialize args={}
-      super
-      @cv = new_cond
-      @webview = Gtk::WebKit::WebView.new
-      @webview.signal_connect("load_finished") { synchronize { @cv.signal } }
-      @window = Gtk::Window.new
-      @window.signal_connect("destroy") { Gtk.main_quit }
-      @window.add(@webview)
-      @window.set_size_request(1024, 600)
-      if args[:window] or (args[:window].nil? and Agent::Options.window)
-        @window.show_all
-        @visible = true
-      end
-      @mechanize = Mechanize.new
-    end
-    def uri
-      @uri
-    end
-    def uri= uri
-      # First, check if the requested uri is a valid HTML page
-      valid = begin
-        @mechanize.get(uri).is_a?(Mechanize::Page)
-      rescue
-        false
-      end
-      # Open the page in the browser if it's an HTML page
-      if valid
-        synchronize do
-          @webview.open uri.to_s
-          @cv.wait(60) # 1 minute to open the page
-          @uri = @webview.uri
-        end
-      else
-        @uri = nil
-      end
-    end
-    def html_data?
-      uri.to_s != ""
-    end
-    def html
-      js "document.documentElement.outerHTML"
-    end
-    def add_visual_data!
-      js """var items = document.documentElement.getElementsByTagName('*');
-            var i=0;
-            for(var i=0; i<items.length; i++) {
-              var item = items[i];
-              item.setAttribute('vx', item.offsetLeft);
-              item.setAttribute('vy', item.offsetTop);
-              item.setAttribute('vw', item.offsetWidth);
-              item.setAttribute('vh', item.offsetHeight);
-              item.setAttribute('vsize', document.defaultView.getComputedStyle(item, null).getPropertyValue('font-size'));
-              var weight = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-weight');
-              if (weight == 'normal') weight = 400;
-              if (weight == 'bold')   weight = 700;
-              item.setAttribute('vweight', weight);
-              item.setAttribute('vcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('color'));
-              item.setAttribute('vbcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('background-color'));
-            }"""
-    end
-    def js code
-      old_title = @webview.title
-      @webview.execute_script("document.title = JSON.stringify(eval(#{ActiveSupport::JSON.encode(code)}))")
-      title = ActiveSupport::JSON.decode(@webview.title)
-      @webview.execute_script("document.title = #{ActiveSupport::JSON.encode(old_title)}")
-      title
-    end
-    def load_js url
-      function = """function include(destination) {
-          var e=window.document.createElement('script');
-          e.setAttribute('src',destination);
-          window.document.body.appendChild(e);
-        }"""
-      js function
-      js "include('#{url}')"
-    end
-  end
-end
-Thread.new { Gtk.main }

data/lib/scrappy/shell.rb DELETED

@@ -1,87 +0,0 @@
-require 'readline'
-module Scrappy
-  class Shell
-    def initialize file=nil
-      @agent = Agent.create
-      @file = file
-    end
-    def run
-      commands = ['get', 'quit', 'help', 'annotate', 'html']
-      Readline.completion_append_character = " "
-      Readline.completer_word_break_characters = ""
-      Readline.completion_proc = proc { |line| commands.grep(/^#{Regexp.escape(line)}/).sort }
-      if @file
-        open(@file, 'r').lines.each do |line|
-          break if process(line) == :quit
-        end
-      else
-        begin
-          line = Readline.readline(bash, true)
-          code = process line.nil? ? (puts 'quit' unless Options.quiet; 'quit') : line
-        end while code != :quit
-      end
-    end
-    protected
-    def process raw_command
-      command = raw_command.strip
-      code = if command =~ /\Aget\W(.*)\Z/
-        puts @agent.proxy(:uri=>$1).output
-        puts
-      elsif command == 'help'
-        puts 'Available commands:'
-        puts '  get URL: Visit the specified URL'
-        puts '  html: Show HTML code of the current URL'
-        puts '  annotate: Start the annotation tool that helps building extractors'
-        puts '  help: Show this information'
-        puts '  quit: Exit scrappy shell'
-        puts
-      elsif command == 'annotate'
-        if @agent.class.to_s == 'Scrappy::VisualAgent' and @agent.visible
-          @agent.load_js "http://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"
-          @agent.load_js "http://github.com/josei/scrappy/raw/master/lib/js/annotator.js"
-          puts "Use the browser's window to annotate resources"
-          puts
-        else
-          puts 'ERROR: Scrappy must be run with -v and -w options to use this feature'
-          puts
-        end
-      elsif command == 'html'
-        puts @agent.html
-        puts
-      elsif command == 'quit'
-        :quit
-      elsif command == '' or command[0..0] == '#'
-        nil
-      else
-        puts "ERROR: Unknown command '#{command}'"
-        puts
-      end
-      code
-    end
-    def bash
-      return '' if Options.quiet
-      location = if @agent.uri
-        uri = URI::parse(@agent.uri)
-        path = uri.path.to_s
-        path = path[0..0] + "..." + path[-16..-1] if path.size > 20
-        if uri.query
-          query = "?" + uri.query
-          query = "?..." + query[-10..-1] if query.size > 13
-        else
-          query = ""
-        end
-        "#{uri.base}#{path}#{query}"
-      else
-        ''
-      end
-      "#{location}$ "
-    end
-  end
-end

data/lib/scrappy/webkit/webkit.rb DELETED

@@ -1,18 +0,0 @@
-require 'gtk2'
-module Gtk
-  module WebKit
-  end
-end
-require 'rbwebkitgtk.so'
-class Gtk::WebKit::WebView
-  alias :load_html_string_no_defaults :load_html_string
-  def load_html_string(content, base_uri=nil)
-    load_html_string_no_defaults(content, base_uri)
-  end
-  def mark_text_matches(test, case_sensitive=false, limit=0)
-    mark_text_matches_with_limit(test, case_sensitive, limit)
-  end
-end