RubyGems - scrappy - Versions diffs - 0.1 → 0.1.1 - Mend

scrappy 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

data/History.txt +6 -0
data/{Manifest.txt → Manifest} +2 -1
data/README.rdoc +9 -9
data/Rakefile +16 -14
data/bin/scrappy +2 -1
data/lib/js/annotator.js +44 -0
data/lib/scrappy.rb +1 -1
data/lib/scrappy/agent/extractor.rb +14 -3
data/lib/scrappy/agent/visual_agent.rb +34 -9
data/lib/scrappy/shell.rb +19 -4
data/scrappy.gemspec +55 -0
metadata +40 -76

data/History.txt CHANGED

@@ -1,3 +1,9 @@
+=== 0.1.1 2010-09-30
+* Extremely basic annotator tool
+* Visual agent forces no concurrency to prevent memory leaks
+* Minor fixes
 === 0.1 2010-09-30
 * Initial release

data/{Manifest.txt → Manifest} RENAMED

@@ -1,9 +1,9 @@
 History.txt
-Manifest.txt
 README.rdoc
 Rakefile
 bin/scrappy
 kb/elmundo.yarf
+lib/js/annotator.js
 lib/scrappy.rb
 lib/scrappy/agent/agent.rb
 lib/scrappy/agent/blind_agent.rb
@@ -17,3 +17,4 @@ lib/scrappy/support.rb
 lib/scrappy/webkit/webkit.rb
 test/test_helper.rb
 test/test_scrappy.rb
+Manifest

data/README.rdoc CHANGED

@@ -1,10 +1,10 @@
-= Scrappy
+= scrappy
 * http://github.com/josei/scrappy
 == DESCRIPTION:
-Scrappy is a tool that allows extracting information from web pages and producing RDF data.
+scrappy is a tool that allows extracting information from web pages and producing RDF data.
 It uses the scraping ontology to define the mappings between HTML contents and RDF data.
 An example of mapping is shown next, which allows extracting all titles from http://www.elmundo.es:
@@ -54,7 +54,7 @@ tool by typing:
   $ scrappy --help
-Scrappy offers many different interfaces to get RDF data from a web page:
+scrappy offers many different interfaces to get RDF data from a web page:
 * Command-line interface:
@@ -63,7 +63,7 @@ Scrappy offers many different interfaces to get RDF data from a web page:
 * Interactive shell:
     $ scrappy -i
-    Launching Scrappy Shell...
+    Launching scrappy Shell...
     $ get elmundo.es
     dc: http://purl.org/dc/elements/1.1/
     owl: http://www.w3.org/2002/07/owl#
@@ -89,7 +89,7 @@ Scrappy offers many different interfaces to get RDF data from a web page:
 * Web Service interface:
     $ scrappy -s
-    Launching Scrappy Web Server...
+    Launching scrappy Web Server...
     ** Starting Mongrel on localhost:3434
   Then point your browser to http://localhost:3434 for additional directions.
@@ -97,7 +97,7 @@ Scrappy offers many different interfaces to get RDF data from a web page:
 * Web Proxy interface:
     $ scrappy -S
-    Launching Scrappy Web Proxy...
+    Launching scrappy Web Proxy...
     ** Starting Mongrel on localhost:3434
   Then configure your browser's HTTP proxy to http://localhost:3434 and browse http://www.elmundo.es
@@ -117,7 +117,7 @@ Scrappy offers many different interfaces to get RDF data from a web page:
 * Ruby interface:
-  You can use Scrappy in a Ruby program by requiring the gem:
+  You can use scrappy in a Ruby program by requiring the gem:
     require 'rubygems'
     require 'scrappy'
@@ -126,13 +126,13 @@ Scrappy offers many different interfaces to get RDF data from a web page:
     kb = RDF::Parser.parse(:rdf, open("kb.rdf").read)
     # Create an agent
-    agent = Scrappy::Agent.create :kb=>kb
+    agent = scrappy::Agent.create :kb=>kb
     # Get RDF output
     output = agent.request :get, 'http://www.example.com'
     # Output all titles from the web page
-    titles = output.find(Node('http://www.example.com'), Node('dc:title'), nil)
+    titles = output.find([], Node('dc:title'), nil)
     titles.each { |title| puts title }
 == INSTALL:

data/Rakefile CHANGED

@@ -1,20 +1,22 @@
 require 'rubygems'
-gem 'hoe', '>= 2.1.0'
-require 'hoe'
-require 'fileutils'
+require 'rake'
+require 'echoe'
 require './lib/scrappy'
-Hoe.plugin :newgem
+Echoe.new('scrappy', Scrappy::VERSION) do |p|
+  p.description    = "RDF web scraper"
+  p.summary        = "Web scraper that allows producing RDF data out of plain web pages"
+  p.url            = "http://github.com/josei/scrappy"
+  p.author         = "Jose Ignacio"
+  p.email          = "joseignacio.fernandez@gmail.com"
+  p.install_message = '**(Optional) Remember to install rbwebkitgtk for visual parsing features**'
+  p.ignore_pattern = ["pkg/*"]
+  p.development_dependencies = [['activesupport','>= 2.3.5'], ['markaby', '>= 0.7.1'], ['camping', '= 2.0'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1'], ['mongrel', '>= 1.1.5']]
+end
-# Generate all the Rake tasks
-# Run 'rake -T' to see list of generated tasks (from gem root directory)
-$hoe = Hoe.spec 'scrappy' do
-  self.developer 'Jose Ignacio', 'joseignacio.fernandez@gmail.com'
-  self.summary = "Web scraper that allows producing RDF data out of plain web pages"
-  self.post_install_message = '**(Optional) Remember to install rbwebkitgtk for visual parsing features**'
-  self.rubyforge_name       = self.name
-  self.extra_deps         = [['activesupport','>= 2.3.5'], ['markaby', '>= 0.7.1'], ['camping', '= 2.0'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1']]
+Rake::RDocTask.new(:rdoc) do |rdoc|
+  rdoc.rdoc_files.include('README.rdoc').include('lib/**/*.rb')
+  rdoc.main = "README.rdoc"
 end
-require 'newgem/tasks'
-Dir['tasks/**/*.rake'].each { |t| load t }
+Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each

data/bin/scrappy CHANGED

@@ -1,4 +1,5 @@
 #!/usr/bin/ruby
+# encoding: UTF-8
 stty_save = `stty -g`.chomp
 trap('INT') { system('stty', stty_save); Scrappy::App.quit }
@@ -42,7 +43,7 @@ module Scrappy
         opts.on('-c C', '--concurrence C')      { |c| Options.concurrence = c.to_i }
         opts.on('-d D', '--delay D')            { |d| Agent::Options.delay = d; Options.concurrence = 1 }
         opts.on('-l L', '--levels L')           { |l| Agent::Options.depth = l.to_i }
-        opts.on('-v', '--visual')               { Agent::Options.agent = :visual }
+        opts.on('-v', '--visual')               { Agent::Options.agent = :visual; Options.concurrence = 1 }
         opts.on('-r', '--reference')            { Agent::Options.referenceable = :minimum }
         opts.on('-R', '--reference-all')        { Agent::Options.referenceable = :dump }
         opts.on('-w', '--window')               { Agent::Options.window = true }

data/lib/js/annotator.js ADDED

@@ -0,0 +1,44 @@
+$(document).ready(function(){
+  $("body").append("<div id='myTrees'></div>")
+	$("#page > *").bind('mouseover', function(e){
+	e.stopPropagation();
+		$(this).addClass("changeBg");
+		})
+		.mouseout(function(){
+		$(this).removeClass("changeBg");
+		});
+});
+$(document).ready(function(){
+  $("*").bind('click', function(e){
+    e.stopPropagation();
+    var element = $(e.target).closest(this.tagName).get(0).tagName;
+    var parents = $(this).parents();
+    var string = element.toString();
+    for(j=0;j<parents.length;j++) {
+      string = string + " " + parents[j].tagName;
+    }
+    var tree = [];
+    var treeString = "";
+    for(h=parents.length-1; h>=0; h-- ) {
+      tree.push(parents[h].tagName);
+      if( treeString == "" ) {
+        treeString = treeString + parents[h].tagName;
+      } else {
+        treeString = treeString + " > " + parents[h].tagName;
+      }
+    }
+    tree.push(element);
+    treeString = treeString + " > " + element;
+    var myTrees = document.getElementById("myTrees");
+    var ul = document.createElement("ul");
+    var li = document.createElement("li");
+    myTrees.appendChild(ul);
+    li.innerHTML = treeString;
+    myTrees.appendChild(li);
+  });
+});

data/lib/scrappy.rb CHANGED

@@ -18,5 +18,5 @@ require 'scrappy/agent/agent'
 Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
 module Scrappy
-  VERSION = '0.1'
+  VERSION = '0.1.1'
 end

data/lib/scrappy/agent/extractor.rb CHANGED

@@ -1,3 +1,5 @@
+require 'digest/md5'
 module Scrappy
   module Extractor
     def extract uri, html, referenceable=nil
@@ -39,6 +41,7 @@ module Scrappy
               bnode = Node(nil)
               bnode.rdf::value = value
               bnode.rdf::type = Node('rdf:Literal')
+              options[:triples].push *bnode.triples
               bnode
             else
               value
@@ -53,7 +56,7 @@ module Scrappy
           # Add referenceable data if requested
           if options[:referenceable]
-            source = Node("_:#{doc[:uri]}|#{doc[:content].path}")
+            source = Node(node_hash(doc[:uri], doc[:content].path))
             options[:triples] << [ object, Node("sc:source"), source ]
             fragment.sc::type.each { |t| options[:triples] << [ source, Node("sc:type"), t ] }
             fragment.sc::relation.each { |relation| options[:triples] << [ source, Node("sc:relation"), relation ] }
@@ -115,12 +118,13 @@ module Scrappy
     def add_referenceable_data content, triples, referenceable
       resources = triples.map{|s,p,o| [[s],[o]]}.flatten
-      fragment = Node("_:#{uri}|/")
+      fragment = Node(node_hash(uri, '/'))
       selector = Node(nil)
       presentation = Node(nil)
       selector.rdf::type = Node('sc:UnivocalSelector')
       selector.sc::path = '/'
+      selector.sc::children = content.search('*').size.to_s
       selector.sc::uri = uri
       fragment.sc::selector = selector
@@ -128,7 +132,7 @@ module Scrappy
       triples.push(*fragment.graph.merge(presentation.graph).merge(selector.graph).triples) if referenceable==:dump or resources.include?(fragment)
       content.search('*').each do |node|
-        fragment = Node("_:#{uri}|#{node.path}")
+        fragment = Node(node_hash(uri, node.path))
         if referenceable == :dump or resources.include?(fragment)
           selector = Node(nil)
@@ -147,6 +151,8 @@ module Scrappy
           presentation.sc::font_weight = node[:vweight].to_s if node[:vweight]
           presentation.sc::color = node[:vcolor].to_s if node[:vcolor]
           presentation.sc::background_color = node[:vbcolor].to_s if node[:vbcolor]
+          presentation.sc::text = node.text.strip
+          presentation.sc::children_count = node.search('*').size.to_s
           fragment.sc::selector = selector
           fragment.sc::presentation = presentation unless presentation.empty?
@@ -155,5 +161,10 @@ module Scrappy
         end
       end
     end
+    def node_hash uri, path
+      digest = Digest::MD5.hexdigest("#{uri} #{path}")
+      "_:bnode#{digest}"
+    end
   end
 end

data/lib/scrappy/agent/visual_agent.rb CHANGED

@@ -6,6 +6,8 @@ $stderr = old_stderr
 module Scrappy
   class VisualAgent < Agent
+    attr_reader :visible
     def initialize args={}
       super
@@ -18,18 +20,33 @@ module Scrappy
       @window.signal_connect("destroy") { Gtk.main_quit }
       @window.add(@webview)
       @window.set_size_request(1024, 600)
-      @window.show_all if args[:window] or (args[:window].nil? and Agent::Options.window)
+      if args[:window] or (args[:window].nil? and Agent::Options.window)
+        @window.show_all
+        @visible = true
+      end
     end
     def uri
-      @webview.uri
+      @uri
     end
     def uri= uri
-      synchronize do
-        @webview.open uri.to_s
-        @cv.wait(60) # 1 minute to open the page
-        sleep(1) while !Nokogiri::HTML(html).search("head").empty? and Nokogiri::HTML(html).search("body").empty?
+      # First, check if the requested uri is a valid HTML page
+      valid = begin
+        Mechanize.new.get(uri).is_a?(Mechanize::Page)
+      rescue
+        false
+      end
+      # Open the page in the browser if it's an HTML page
+      if valid
+        synchronize do
+          @webview.open uri.to_s
+          @cv.wait(60) # 1 minute to open the page
+          @uri = @webview.uri
+        end
+      else
+        @uri = nil
       end
     end
@@ -40,7 +57,7 @@ module Scrappy
     def html
       js "document.documentElement.outerHTML"
     end
     def add_visual_data!
       js """var items = document.documentElement.getElementsByTagName('*');
             var i=0;
@@ -57,8 +74,6 @@ module Scrappy
             }"""
     end
-    private
     def js code
       old_title = @webview.title
       @webview.execute_script("document.title = JSON.stringify(eval(#{ActiveSupport::JSON.encode(code)}))")
@@ -66,6 +81,16 @@ module Scrappy
       @webview.execute_script("document.title = #{ActiveSupport::JSON.encode(old_title)}")
       title
     end
+    def load_js url
+      function = """function include(destination) {
+          var e=window.document.createElement('script');
+          e.setAttribute('src',destination);
+          window.document.body.appendChild(e);
+        }"""
+      js function
+      js "include('#{url}')"
+    end
   end
 end

data/lib/scrappy/shell.rb CHANGED

@@ -6,7 +6,7 @@ module Scrappy
     end
     def run
-      commands = ['get', 'put', 'help']
+      commands = ['get', 'quit', 'help', 'annotate', 'html']
       Readline.completion_append_character = " "
       Readline.completer_word_break_characters = ""
@@ -30,20 +30,35 @@ module Scrappy
       code = if command =~ /\Aget\W(.*)\Z/
         puts @agent.proxy :get, $1
-        puts ''
+        puts
       elsif command == 'help'
         puts 'Available commands:'
         puts '  get URL: Visit the specified URL'
+        puts '  html: Show HTML code of the current URL'
+        puts '  annotate: Start the annotation tool that helps building extractors'
         puts '  help: Show this information'
         puts '  quit: Exit scrappy shell'
-        puts ''
+        puts
+      elsif command == 'annotate'
+        if @agent.class.to_s == 'Scrappy::VisualAgent' and @agent.visible
+          @agent.load_js "http://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"
+          @agent.load_js "http://github.com/josei/scrappy/raw/master/lib/js/annotator.js"
+          puts "Use the browser's window to annotate resources"
+          puts
+        else
+          puts 'ERROR: Scrappy must be run with -v and -w options to use this feature'
+          puts
+        end
+      elsif command == 'html'
+        puts @agent.html
+        puts
       elsif command == 'quit'
         :quit
       elsif command == '' or command[0..0] == '#'
         nil
       else
         puts "ERROR: Unknown command '#{command}'"
-        puts ''
+        puts
       end
       code
     end

data/scrappy.gemspec ADDED

@@ -0,0 +1,55 @@
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name = %q{scrappy}
+  s.version = "0.1.1"
+  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
+  s.authors = ["Jose Ignacio"]
+  s.date = %q{2010-10-29}
+  s.default_executable = %q{scrappy}
+  s.description = %q{RDF web scraper}
+  s.email = %q{joseignacio.fernandez@gmail.com}
+  s.executables = ["scrappy"]
+  s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "lib/js/annotator.js", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cluster.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/visual_agent.rb", "lib/scrappy/proxy.rb", "lib/scrappy/server.rb", "lib/scrappy/shell.rb", "lib/scrappy/support.rb", "lib/scrappy/webkit/webkit.rb"]
+  s.files = ["History.txt", "README.rdoc", "Rakefile", "bin/scrappy", "kb/elmundo.yarf", "lib/js/annotator.js", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cluster.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/visual_agent.rb", "lib/scrappy/proxy.rb", "lib/scrappy/server.rb", "lib/scrappy/shell.rb", "lib/scrappy/support.rb", "lib/scrappy/webkit/webkit.rb", "test/test_helper.rb", "test/test_scrappy.rb", "Manifest", "scrappy.gemspec"]
+  s.homepage = %q{http://github.com/josei/scrappy}
+  s.post_install_message = %q{**(Optional) Remember to install rbwebkitgtk for visual parsing features**}
+  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Scrappy", "--main", "README.rdoc"]
+  s.require_paths = ["lib"]
+  s.rubyforge_project = %q{scrappy}
+  s.rubygems_version = %q{1.3.6}
+  s.summary = %q{Web scraper that allows producing RDF data out of plain web pages}
+  s.test_files = ["test/test_scrappy.rb", "test/test_helper.rb"]
+  if s.respond_to? :specification_version then
+    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+    s.specification_version = 3
+    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+      s.add_development_dependency(%q<activesupport>, [">= 2.3.5"])
+      s.add_development_dependency(%q<markaby>, [">= 0.7.1"])
+      s.add_development_dependency(%q<camping>, ["= 2.0"])
+      s.add_development_dependency(%q<nokogiri>, [">= 1.4.1"])
+      s.add_development_dependency(%q<mechanize>, [">= 1.0.0"])
+      s.add_development_dependency(%q<lightrdf>, [">= 0.1"])
+      s.add_development_dependency(%q<mongrel>, [">= 1.1.5"])
+    else
+      s.add_dependency(%q<activesupport>, [">= 2.3.5"])
+      s.add_dependency(%q<markaby>, [">= 0.7.1"])
+      s.add_dependency(%q<camping>, ["= 2.0"])
+      s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
+      s.add_dependency(%q<mechanize>, [">= 1.0.0"])
+      s.add_dependency(%q<lightrdf>, [">= 0.1"])
+      s.add_dependency(%q<mongrel>, [">= 1.1.5"])
+    end
+  else
+    s.add_dependency(%q<activesupport>, [">= 2.3.5"])
+    s.add_dependency(%q<markaby>, [">= 0.7.1"])
+    s.add_dependency(%q<camping>, ["= 2.0"])
+    s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
+    s.add_dependency(%q<mechanize>, [">= 1.0.0"])
+    s.add_dependency(%q<lightrdf>, [">= 0.1"])
+    s.add_dependency(%q<mongrel>, [">= 1.1.5"])
+  end
+end

metadata CHANGED

@@ -5,7 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 1
-  version: "0.1"
+  - 1
+  version: 0.1.1
 platform: ruby
 authors:
 - Jose Ignacio
@@ -13,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-10-07 00:00:00 +02:00
+date: 2010-10-29 00:00:00 +02:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -28,7 +29,7 @@ dependencies:
         - 3
         - 5
         version: 2.3.5
-  type: :runtime
+  type: :development
   version_requirements: *id001
 - !ruby/object:Gem::Dependency
   name: markaby
@@ -42,7 +43,7 @@ dependencies:
         - 7
         - 1
         version: 0.7.1
-  type: :runtime
+  type: :development
   version_requirements: *id002
 - !ruby/object:Gem::Dependency
   name: camping
@@ -55,7 +56,7 @@ dependencies:
         - 2
         - 0
         version: "2.0"
-  type: :runtime
+  type: :development
   version_requirements: *id003
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -69,7 +70,7 @@ dependencies:
         - 4
         - 1
         version: 1.4.1
-  type: :runtime
+  type: :development
   version_requirements: *id004
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -83,7 +84,7 @@ dependencies:
         - 0
         - 0
         version: 1.0.0
-  type: :runtime
+  type: :development
   version_requirements: *id005
 - !ruby/object:Gem::Dependency
   name: lightrdf
@@ -96,94 +97,50 @@ dependencies:
         - 0
         - 1
         version: "0.1"
-  type: :runtime
+  type: :development
   version_requirements: *id006
 - !ruby/object:Gem::Dependency
-  name: rubyforge
+  name: mongrel
   prerelease: false
   requirement: &id007 !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
         segments:
-        - 2
-        - 0
-        - 4
-        version: 2.0.4
+        - 1
+        - 1
+        - 5
+        version: 1.1.5
   type: :development
   version_requirements: *id007
-- !ruby/object:Gem::Dependency
-  name: hoe
-  prerelease: false
-  requirement: &id008 !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        segments:
-        - 2
-        - 6
-        - 0
-        version: 2.6.0
-  type: :development
-  version_requirements: *id008
-description: |-
-  Scrappy is a tool that allows extracting information from web pages and producing RDF data.
-  It uses the scraping ontology to define the mappings between HTML contents and RDF data.
-  An example of mapping is shown next, which allows extracting all titles from http://www.elmundo.es:
-    dc: http://purl.org/dc/elements/1.1/
-    rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
-    sioc: http://rdfs.org/sioc/ns#
-    sc: http://lab.gsi.dit.upm.es/scraping.rdf#
-    *:
-      rdf:type: sc:Fragment
-      sc:selector:
-        *:
-          rdf:type: sc:UriSelector
-          rdf:value: "http://www.elmundo.es/"
-      sc:identifier:
-        *:
-          rdf:type: sc:BaseUriSelector
-      sc:subfragment:
-        *:
-          sc:type: sioc:Post
-          sc:selector:
-            *:
-              rdf:type: sc:CssSelector
-              rdf:value: ".noticia h2, .noticia h3, .noticia h4"
-          sc:identifier:
-            *:
-              rdf:type: sc:CssSelector
-              rdf:value: "a"
-              sc:attribute: "href"
-          sc:subfragment:
-            *:
-              sc:type:     rdf:Literal
-              sc:relation: dc:title
-              sc:selector:
-                *:
-                  rdf:type:  sc:CssSelector
-                  rdf:value: "a"
-  (The above code is serialized using YARF format, supported by LightRDF gem, as well as
-  RDFXML, JSON, NTriples formats, which can also be used to define the mappings).
-email:
-- joseignacio.fernandez@gmail.com
+description: RDF web scraper
+email: joseignacio.fernandez@gmail.com
 executables:
 - scrappy
 extensions: []
 extra_rdoc_files:
-- History.txt
-- Manifest.txt
+- README.rdoc
+- bin/scrappy
+- lib/js/annotator.js
+- lib/scrappy.rb
+- lib/scrappy/agent/agent.rb
+- lib/scrappy/agent/blind_agent.rb
+- lib/scrappy/agent/cluster.rb
+- lib/scrappy/agent/extractor.rb
+- lib/scrappy/agent/visual_agent.rb
+- lib/scrappy/proxy.rb
+- lib/scrappy/server.rb
+- lib/scrappy/shell.rb
+- lib/scrappy/support.rb
+- lib/scrappy/webkit/webkit.rb
 files:
 - History.txt
-- Manifest.txt
 - README.rdoc
 - Rakefile
 - bin/scrappy
 - kb/elmundo.yarf
+- lib/js/annotator.js
 - lib/scrappy.rb
 - lib/scrappy/agent/agent.rb
 - lib/scrappy/agent/blind_agent.rb
@@ -197,12 +154,18 @@ files:
 - lib/scrappy/webkit/webkit.rb
 - test/test_helper.rb
 - test/test_scrappy.rb
+- Manifest
+- scrappy.gemspec
 has_rdoc: true
 homepage: http://github.com/josei/scrappy
 licenses: []
 post_install_message: "**(Optional) Remember to install rbwebkitgtk for visual parsing features**"
 rdoc_options:
+- --line-numbers
+- --inline-source
+- --title
+- Scrappy
 - --main
 - README.rdoc
 require_paths:
@@ -219,8 +182,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
   - - ">="
     - !ruby/object:Gem::Version
       segments:
-      - 0
-      version: "0"
+      - 1
+      - 2
+      version: "1.2"
 requirements: []
 rubyforge_project: scrappy