RubyGems - scrappy - Versions diffs - 0.1.24 → 0.2.0 - Mend

scrappy 0.1.24 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/History.txt +4 -0
data/Manifest +1 -0
data/README.rdoc +42 -2
data/Rakefile +1 -1
data/bin/scrappy +40 -12
data/lib/scrappy.rb +2 -1
data/lib/scrappy/agent/agent.rb +112 -35
data/lib/scrappy/agent/extractor.rb +28 -24
data/lib/scrappy/repository.rb +34 -0
data/scrappy.gemspec +10 -7
metadata +24 -8

data/History.txt CHANGED Viewed

@@ -1,3 +1,7 @@
+=== 0.2.0 2011-03-09
+* Integration with cmoft's improvements: Sesame support, time-aggregated requests and observing URLs
 === 0.1.24 2011-03-08
 * Using RDF::NodeProxy from lightRDF 0.2

data/Manifest CHANGED Viewed

@@ -30,6 +30,7 @@ lib/scrappy/server/public/images/logo_small.png
 lib/scrappy/server/public/stylesheets/application.css
 lib/scrappy/server/views/home.haml
 lib/scrappy/server/views/help.haml
+lib/scrappy/repository.rb
 lib/scrappy/shell.rb
 lib/scrappy/support.rb
 lib/scrappy/webkit/webkit.rb

data/README.rdoc CHANGED Viewed

@@ -138,8 +138,43 @@ scrappy offers many different interfaces to get RDF data from a web page:
     titles = output.find([], Node('dc:title'), nil)
     titles.each { |title| puts title }
-== INSTALL:
+* RDF repository:
+Sesame functionality has been included in Scrappy. You can configure
+the repository options by editing the file config.yml placed the folder .scrappy, in your home dir.
+An example of this file can be found at the end of this README.
+You can get the data for a certain period of time, by using the time (-t, --time) option:
+  $ scrappy -g example.org -t 3
+This would get all the data stored in Sesame for example.org in the last 3 minutes.
+* Sample config.yml
+  # This is a sample configuration file, with the options to communicate with Sesame using Scrappy
+  repository:
+    # The host were Sesame is. Do not add the trailing '/'
+    host: http://localhost
+    # The port for the connection
+    port: 8080
+    # The time to consider the data in the repository valid, in minutes
+    time: 15
+    # The name of the repository
+    repository: memory
+    # The format to communicate with the repository
+    format: ntriples
+    # You can use any of the following formats:
+    # rdfxml, ntriples, turtle, n3, trix, trig
+== INSTALL:
 Install it as any other gem:
   $ gem install scrappy
@@ -153,10 +188,15 @@ Additionally, some extra libraries are needed for certain features:
 * PNG output of RDF graphs requires Graphviz (in Debian systems: sudo aptitude install graphviz).
+In order to use Sesame, you will need to install it. Further instructions can be found
+in the openRDF website, more precisely, in http://www.openrdf.org/doc/sesame2/users/ch06.html .
 == CONTRIBUTORS:
 * José Ignacio Fernández
+* Alberto Mardomingo
 * Jacobo Blasco
 == LICENSE:
@@ -182,4 +222,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/Rakefile CHANGED Viewed

@@ -11,7 +11,7 @@ Echoe.new('scrappy', Scrappy::VERSION) do |p|
   p.email          = "joseignacio.fernandez@gmail.com"
   p.install_message = '**(Optional) Remember to install rbwebkitgtk for visual parsing features**'
   p.ignore_pattern = ["pkg/*"]
-  p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.2.0'], ['i18n', '>= 0.4.2'], ['haml', '>= 3.0.24']]
+  p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.2.1'], ['i18n', '>= 0.4.2'], ['rest-client', '>=1.6.1'], ['haml', '>= 3.0.24']]
 end
 Rake::RDocTask.new(:rdoc) do |rdoc|

data/bin/scrappy CHANGED Viewed

@@ -1,7 +1,10 @@
 #!/usr/bin/ruby
 # encoding: UTF-8
-if !RUBY_PLATFORM.include?("mswin")
+require 'rbconfig'
+WINDOWS_PLATFORM = Config::CONFIG['host_os'] =~ /mswin|mingw/
+if !WINDOWS_PLATFORM
   stty_save = `stty -g`.chomp
   trap('INT') { system('stty', stty_save); Scrappy::App.quit }
 end
@@ -31,8 +34,8 @@ module Scrappy
       OptionParser.new do |opts|
         opts.on('-v', '--version')              { output_version; exit 0 }
         opts.on('-h', '--help')                 { output_help; exit 0 }
-        opts.on('-g URL', '--get URL')          { |url| Options.url = url; Options.http_method=:get }
-        opts.on('-p URL', '--post URL')         { |url| Options.url = url; Options.http_method=:post }
+        opts.on('-g URI', '--get URI')          { |uri| Options.uri = uri; Options.http_method=:get }
+        opts.on('-p URI', '--post URI')         { |uri| Options.uri = uri; Options.http_method=:post }
         opts.on('-D', '--dump')                 { Agent::Options.dump = true; Agent::Options.format = :rdf }
         opts.on('-u', '--debug')                { Agent::Options.debug = true }
         opts.on('-i', '--interactive')          { Options.shell = true; Agent::Options.format_header = false }
@@ -47,15 +50,19 @@ module Scrappy
         opts.on('-R', '--reference-all')        { Agent::Options.referenceable = :dump }
         opts.on('-w', '--window')               { Agent::Options.window = true }
         opts.on('-f FORMAT', '--format FORMAT') { |f| Agent::Options.format = f.to_sym }
+        opts.on('-t TIME', '--time TIME')       { |t| Agent::Options.time = t.to_i*60 } # converts minutes to seconds
+        opts.on('-o URIs', '--observe URIs')    { |uris| Options.observe = uris.split(',') }
       end.parse!(args)
       @file = args.shift
     end
     def run
       onload
-      if Options.url
+      if Options.uri
         Options.quiet = true
-        puts Agent.create.proxy(:http_method=>:get, :uri=>Options.url).output
+        puts Agent.create.proxy(:http_method=>:get, :uri=>Options.uri).output
+      elsif Options.observe
+        Agent.create.observe(Options.observe)
       elsif Options.proxy
         puts "Launching Scrappy Web Proxy (set http://localhost:#{Options.port} as proxy)..."
         require 'scrappy/server/proxy'
@@ -105,10 +112,12 @@ Options
   -D, --dump               Dumps RDF data to disk
   -u, --debug              Shows debugging traces
   -i, --interactive        Runs interactive shell
+  -o, --observe URLs       Observes the specified URLs storing their data into the repository
   -s, --server [ROOT]      Runs web server (optionally specify server's root url)
   -S, --proxy-server       Runs web proxy
   -P, --port PORT          Selects port number (default is 3434)
   -V, --visual             Uses visual agent (slow)
+  -t, --time DAYS          Returns repository data from the last given minutes
   -r, --reference          Outputs referenceable data
   -R, --reference-all      Outputs all HTML referenceable data
   -w, --window             Shows browser window (requires -v)
@@ -127,15 +136,23 @@ Copyright
     def onload
       # Check local or global knowledge base
-      home = RUBY_PLATFORM.include?("mswin") ? "#{ENV['HOME']}/scrappy" : "#{ENV['HOME']}/.scrappy"
+      home = WINDOWS_PLATFORM ? "#{ENV['HOME']}/scrappy" : "#{ENV['HOME']}/.scrappy"
+      data_dirname    = "kb"
+      cache_dirname   = "cache"
+      cache_filename  = "scrappy-#{Scrappy::VERSION}.kb"
+      config_filename = "config.yml"
-      if File.exists?("#{home}/kb")
-        data_folder = "#{home}/kb"
-        cache_file  = "#{home}/kb.cache"
+      if File.exists?(File.join(home, data_dirname))
+        data_folder  = File.join home, data_dirname
+        cache_folder = File.join home, cache_dirname
       else
-        data_folder = "#{Scrappy::Root}/kb"
-        cache_file  = "#{Dir.tmpdir}/scrappy.kb.cache"
+        data_folder  = File.join Scrappy::Root, data_dirname
+        cache_folder = Dir.tmpdir
       end
+      Dir.mkdir cache_folder if Dir[cache_folder].empty?
+      cache_file  = File.join cache_folder, cache_filename
+      config_file = File.join home, config_filename
       # Load knowledge base
       Agent::Options.kb = if File.exists?(cache_file) and File.mtime(cache_file) >= Dir["#{data_folder}/*", data_folder].map{ |f| File.mtime(f) }.max
@@ -143,7 +160,7 @@ Copyright
         open(cache_file) { |f| Marshal.load(f) }
       else
         # Load YARF files and cache kb
-        data = Dir["#{data_folder}/*"].inject(RDF::Graph.new) do |kb, file|
+        data = Dir[File.join(data_folder, "*")].inject(RDF::Graph.new) do |kb, file|
           extension = file.split('.').last.to_sym
           graph = RDF::Parser.parse(extension, open(file).read)
           kb.ns.merge! graph.ns
@@ -153,6 +170,17 @@ Copyright
         open(cache_file, "w") { |f| Marshal.dump(data, f) }
         data
       end
+      # Looks for a configuration file. If it does not exist, Scrappy does not uses Sesame
+      # It looks for it in the home .scrappy dir
+      if File.exist?(config_file)
+        config = YAML::load_file(config_file)["repository"]
+        # Convert the strings from the YAML file into symbols
+        repository_options = {}
+        config.each { |k,v| repository_options[k.to_sym] = v }
+        Agent::Options.repository = Repository.new repository_options
+      end
       RDF::ID.ns.merge! Agent::Options.kb.ns
     end
   end

data/lib/scrappy.rb CHANGED Viewed

@@ -10,6 +10,7 @@ require 'tmpdir'
 require 'lightrdf'
 require 'scrappy/support'
+require 'scrappy/repository'
 require 'scrappy/agent/extractor'
 require 'scrappy/agent/map_reduce'
@@ -21,7 +22,7 @@ require 'scrappy/agent/agent'
 Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
 module Scrappy
-  VERSION = '0.1.24'
+  VERSION = '0.2.0'
 end
 # Require selectors

data/lib/scrappy/agent/agent.rb CHANGED Viewed

@@ -38,6 +38,7 @@ module Scrappy
       Agent.pool[@id] = self
       @kb = args[:kb] || Options.kb
       @options = Options.clone
+      @repository = args[:repository] || Options.repository
     end
     def map args, queue=nil
@@ -52,51 +53,35 @@ module Scrappy
         puts "Retrieving cached #{request[:uri]}...done!" if options.debug
         cache[request][:response]
+      elsif @repository
+        # Extracts from the repository
+        request_from_repository(request)
       else
         # Perform the request
-        sleep 0.001 * options.delay.to_f # Sleep if requested
-        if options.debug
-          print "Opening #{request[:uri]}..."; $stdout.flush
-        end
-        if request[:method] == :get
-          self.uri = request[:uri]
-        else
-          raise Exception, 'POST requests not supported yet'
-        end
-        puts 'done!' if options.debug
-        response = if self.html_data?
-          add_visual_data! if options.referenceable                     # Adds tags including visual information
-          extraction = extract self.uri, html, options.referenceable       # Extract data
-          Dumper.dump self.uri, clean(extraction), options.format if options.dump # Dump results to disk
-          extraction
-        else
-          []
-        end
+        request_uncached(request)
+      end
+      # If previous cache exists, do not cache it again
+      unless cache[request]
         # Cache the request
-        cache[request]                       = { :time=>Time.now, :response=>response }
-        cache[request.merge(:uri=>self.uri)] = { :time=>Time.now, :response=>response } unless self.uri.nil?
-        response
+        cache[request]                       = { :time=>Time.now, :response=>triples }
+        cache[request.merge(:uri=>self.uri)] = { :time=>Time.now, :response=>triples } if self.uri
       end
       # Enqueue subresources
       # Pages are enqueued without reducing depth
-      pages = triples.select { |s,p,o| p==Node("rdf:type") and o==Node("sc:Page") }.map{|s,p,o| s}.select{|n| n.is_a?(RDF::Node) and n.id.is_a?(Symbol)}
+      pages = triples.select { |s,p,o| p==ID("rdf:type") and o==ID("sc:Page") }.map{|s,p,o| s}.select{|n| n.is_a?(Symbol)}
       # All other URIS are enqueued with depth reduced
       uris = if depth != 0
-        (triples.map { |s, p, o| [s,o] }.flatten - [Node(self.uri)] - pages).select{|n| n.is_a?(RDF::Node) and n.id.is_a?(Symbol)}
+        (triples.map { |s, p, o| [s,o] }.flatten - [ID(self.uri)] - pages).select{|n| n.is_a?(Symbol)}
       else
         []
       end
-      items = (pages.map { |uri| {:uri=>uri.to_s, :depth=>[-1, depth].max} } + uris.map { |uri| {:uri=>uri.to_s, :depth=>[-1, depth-1].max} }).uniq
+      items = ( pages.map { |uri| {:uri=>uri.to_s, :depth=>[-1, depth].max} } +
+                uris.map  { |uri| {:uri=>uri.to_s, :depth=>[-1, depth-1].max} } ).
+                uniq.select{ |item| !RDF::ID.bnode?(item[:uri]) }
       items.each { |item| puts "Enqueuing (depth = #{item[:depth]}): #{item[:uri]}" } if options.debug
@@ -120,7 +105,7 @@ module Scrappy
       puts 'done!'if options.debug
-      triples
+      triples.uniq
     end
     def request args={}
@@ -139,7 +124,7 @@ module Scrappy
           print "Serializing..."; $stdout.flush
         end
-        output = response.serialize request[:format], @options.format_header
+        output = response.serialize request[:format], options.format_header
         puts 'done!'if options.debug
@@ -152,14 +137,106 @@ module Scrappy
                      :status => self.html_data? ? (self.uri == request[:uri] ? :ok : :redirect) : :error
     end
+    # Method to observe several webs, and extract the data periodically
+    def observe uris
+      while true
+        time_init = Time.now.to_i
+        uris.each do |uri|
+          puts "Pinging #{uri}..."
+          request :uri=>uri
+        end
+        time = options.repository.time * 60 - (Time.now.to_i - time_init)
+        puts "Sleeping until #{Time.now + time}..."
+        sleep time
+      end
+    end
+    private
     def complete_uri uri
       uri = "#{uri}.com" if uri =~ /\A\w+\Z/
-      uri = "http://#{uri}" if uri.index(/\A\w*:/) != 0
+      uri = "http://#{uri}" unless uri =~ /\A\w*:/
       uri
     end
     def clean triples
-      triples.uniq.select { |s,p,o| p!=Node('rdf:type') or ![Node('sc:Index'), Node('sc:Page')].include?(o) }
+      triples.uniq.select { |s,p,o| p!=Node('rdf:type') or ![Node('sc:Index'), Node('sc:Page')].include?(o) }
+    end
+    # Do the extraction using RDF repository
+    def request_from_repository request
+      triples = []
+      # Checks if there is any previous extraction within the last 15 minutes
+      contexts = if Options.time
+        @repository.recent_contexts(request[:uri], Options.time)
+      else
+        @repository.recent_contexts(request[:uri])
+      end
+      if contexts.empty?
+        # Extracts data from the uri
+        triples = request_uncached request
+        if options.debug
+          print "Storing into repository #{request[:uri]}..."; $stdout.flush
+        end
+        # Checks if the extraction returned something
+        graph = if triples.empty?
+          # Creates a triple to indicate that nothing was extracted from the uri
+          # This is done because otherwise the context wouldn't be stored
+          RDF::Graph.new [ [ID(request[:uri]), ID("sc:extraction"), ID("sc:Empty")] ]
+        else
+          RDF::Graph.new triples.uniq
+        end
+        # Adds data to sesame
+        @repository.data = graph, "#{request[:uri]}:#{Time.now.to_i}"
+        @repository.data = graph, "#{self.uri}:#{Time.now.to_i}" if self.uri
+        puts 'done!' if options.debug
+        triples
+      else
+        # Data found in repository. Asking for it
+        triples = []
+        if options.debug
+          print "Retrieving from repository #{request[:uri]}..."; $stdout.flush
+        end
+        contexts.each do |context|
+          graph    = @repository.data(context)
+          triples += graph.triples.select{|s,p,o| p!=ID("sc:extraction")}
+        end
+        puts 'done!' if options.debug
+        triples
+      end
+    end
+    # Extracts from the uri
+    def request_uncached request
+      sleep 0.001 * options.delay.to_f # Sleep if requested
+      if options.debug
+        print "Opening #{request[:uri]}..."; $stdout.flush
+      end
+      if request[:method] == :get
+        self.uri = request[:uri]
+      else
+        raise Exception, 'POST requests not supported yet'
+      end
+      puts 'done!' if options.debug
+      if self.html_data?
+        add_visual_data! if options.referenceable               # Adds tags including visual information
+        triples = extract(self.uri, html, options.referenceable) # Extract data
+        Dumper.dump self.uri, clean(triples), options.format if options.dump # Dump results to disk
+        triples
+      else
+        []
+      end
     end
   end
 end

data/lib/scrappy/agent/extractor.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Scrappy
       if options.debug
         print "Extracting #{uri}..."; $stdout.flush
       end
       @selector_pool ||= {}
       triples = []
       content = Nokogiri::HTML(html, nil, 'utf-8')
@@ -27,7 +27,11 @@ module Scrappy
       puts "done!" if options.debug
-      triples
+      triples.map do |s,p,o|
+        [ s.is_a?(RDF::Node) ? s.id : s,
+          p.is_a?(RDF::Node) ? p.id : p,
+          o.is_a?(RDF::Node) ? o.id : o ]
+      end
     end
     private
@@ -150,32 +154,32 @@ module Scrappy
       triples.push(*fragment.graph.merge(presentation.graph).merge(selector.graph).triples) if referenceable==:dump or resources.include?(fragment)
       content.search('*').each do |node|
+        next if node.text?
         fragment = Node(node_hash(uri, node.path))
         if referenceable == :dump or resources[fragment]
-          selector = Node(nil)
+          selector     = Node(nil)
           presentation = Node(nil)
-          selector.rdf::type = Node('sc:UnivocalSelector')
-          selector.sc::path = node.path.to_s
-          selector.sc::tag = node.name.to_s
-          selector.sc::document = uri
-          presentation.sc::x = node[:vx].to_s if node[:vx]
-          presentation.sc::y = node[:vy].to_s if node[:vy]
-          presentation.sc::width = node[:vw].to_s if node[:vw]
-          presentation.sc::height = node[:vh].to_s if node[:vh]
-          presentation.sc::font_size = node[:vsize].gsub("px","").to_s if node[:vsize]
-          presentation.sc::font_weight = node[:vweight].to_s if node[:vweight]
-          presentation.sc::color = node[:vcolor].to_s if node[:vcolor]
-          presentation.sc::background_color = node[:vbcolor].to_s if node[:vbcolor]
-          presentation.sc::text = node.text.strip
-          presentation.sc::children_count = node.search('*').size.to_s
-          fragment.sc::selector = selector
-          fragment.sc::presentation = presentation unless presentation.empty?
-          triples.push(*fragment.graph.merge(presentation.graph).merge(selector.graph).triples)
+          triples << [selector, ID('rdf:type'), ID('sc:UnivocalSelector')]
+          triples << [selector, ID('sc:path'), node.path.to_s]
+          triples << [selector, ID('sc:tag'), node.name.to_s]
+          triples << [selector, ID('sc:document'), uri]
+          triples << [presentation, ID('sc:x'), node[:vx].to_s] if node[:vx]
+          triples << [presentation, ID('sc:y'), node[:vy].to_s] if node[:vy]
+          triples << [presentation, ID('sc:width'), node[:vw].to_s] if node[:vw]
+          triples << [presentation, ID('sc:height'), node[:vh].to_s] if node[:vh]
+          triples << [presentation, ID('sc:font_size'), node[:vsize].gsub("px","").to_s] if node[:vsize]
+          triples << [presentation, ID('sc:font_weight'), node[:vweight].to_s] if node[:vweight]
+          triples << [presentation, ID('sc:color'), node[:vcolor].to_s] if node[:vcolor]
+          triples << [presentation, ID('sc:background_color'), node[:vbcolor].to_s] if node[:vbcolor]
+          triples << [presentation, ID('sc:text'), node.text.strip]
+          triples << [presentation, ID('sc:children_count'), node.children.select{|n| !n.text?}.size.to_s]
+          triples << [fragment, ID('sc:selector'), selector]
+          triples << [fragment, ID('sc:presentation'), presentation]
         end
       end
     end

data/lib/scrappy/repository.rb ADDED Viewed

@@ -0,0 +1,34 @@
+module Scrappy
+  class Repository < RDF::Repository
+    # Processes the list of context, checks if there is any extraction
+    # from the last X minutes, and returns an array with them.
+    # If there is not any extraction, returns an empty array
+    def recent_contexts uri, seconds=@options[:time].to_i*60
+      return [] unless uri
+      contexts.select do |context|
+        date = context_date(context)
+        date and check_date(date, seconds) and context_uri(context) == uri
+      end
+    end
+    def time
+      @options[:time]
+    end
+    protected
+    # Checks if the context date is within the indicated time
+    def check_date date, seconds
+      (Time.now.to_i - date) <= seconds
+    end
+    # Returns an integer with the date of a given context
+    def context_date context
+      $1.to_i if context =~ /:(\d+)\Z/
+    end
+    # Returns the URI of a context
+    def context_uri context
+      $1 if context =~ /\A(.*):(\d+)\Z/
+    end
+  end
+end

data/scrappy.gemspec CHANGED Viewed

@@ -2,17 +2,17 @@
 Gem::Specification.new do |s|
   s.name = %q{scrappy}
-  s.version = "0.1.24"
+  s.version = "0.2.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
   s.authors = ["Jose Ignacio"]
-  s.date = %q{2011-03-08}
+  s.date = %q{2011-03-09}
   s.default_executable = %q{scrappy}
   s.description = %q{RDF web scraper}
   s.email = %q{joseignacio.fernandez@gmail.com}
   s.executables = ["scrappy"]
-  s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "lib/js/annotator.js", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/visual_agent.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/proxy.rb", "lib/scrappy/server/server.rb", "lib/scrappy/server/public/images/logo.png", "lib/scrappy/server/public/images/logo_small.png", "lib/scrappy/server/public/stylesheets/application.css", "lib/scrappy/server/views/home.haml", "lib/scrappy/server/views/help.haml", "lib/scrappy/shell.rb", "lib/scrappy/support.rb", "lib/scrappy/webkit/webkit.rb"]
-  s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "kb/elmundo.yarf", "lib/js/annotator.js", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/visual_agent.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/proxy.rb", "lib/scrappy/server/server.rb", "lib/scrappy/server/public/images/logo.png", "lib/scrappy/server/public/images/logo_small.png", "lib/scrappy/server/public/stylesheets/application.css", "lib/scrappy/server/views/home.haml", "lib/scrappy/server/views/help.haml", "lib/scrappy/shell.rb", "lib/scrappy/support.rb", "lib/scrappy/webkit/webkit.rb", "test/test_helper.rb", "test/test_scrappy.rb", "scrappy.gemspec"]
+  s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "lib/js/annotator.js", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/visual_agent.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/proxy.rb", "lib/scrappy/server/server.rb", "lib/scrappy/server/public/images/logo.png", "lib/scrappy/server/public/images/logo_small.png", "lib/scrappy/server/public/stylesheets/application.css", "lib/scrappy/server/views/home.haml", "lib/scrappy/server/views/help.haml", "lib/scrappy/repository.rb", "lib/scrappy/shell.rb", "lib/scrappy/support.rb", "lib/scrappy/webkit/webkit.rb"]
+  s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "kb/elmundo.yarf", "lib/js/annotator.js", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/visual_agent.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/proxy.rb", "lib/scrappy/server/server.rb", "lib/scrappy/server/public/images/logo.png", "lib/scrappy/server/public/images/logo_small.png", "lib/scrappy/server/public/stylesheets/application.css", "lib/scrappy/server/views/home.haml", "lib/scrappy/server/views/help.haml", "lib/scrappy/repository.rb", "lib/scrappy/shell.rb", "lib/scrappy/support.rb", "lib/scrappy/webkit/webkit.rb", "test/test_helper.rb", "test/test_scrappy.rb", "scrappy.gemspec"]
   s.homepage = %q{http://github.com/josei/scrappy}
   s.post_install_message = %q{**(Optional) Remember to install rbwebkitgtk for visual parsing features**}
   s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Scrappy", "--main", "README.rdoc"]
@@ -32,8 +32,9 @@ Gem::Specification.new do |s|
       s.add_runtime_dependency(%q<thin>, [">= 1.2.7"])
       s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
       s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
-      s.add_runtime_dependency(%q<lightrdf>, [">= 0.2.0"])
+      s.add_runtime_dependency(%q<lightrdf>, [">= 0.2.1"])
       s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
+      s.add_runtime_dependency(%q<rest-client>, [">= 1.6.1"])
       s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
     else
       s.add_dependency(%q<activesupport>, [">= 2.3.5"])
@@ -41,8 +42,9 @@ Gem::Specification.new do |s|
       s.add_dependency(%q<thin>, [">= 1.2.7"])
       s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
       s.add_dependency(%q<mechanize>, [">= 1.0.0"])
-      s.add_dependency(%q<lightrdf>, [">= 0.2.0"])
+      s.add_dependency(%q<lightrdf>, [">= 0.2.1"])
       s.add_dependency(%q<i18n>, [">= 0.4.2"])
+      s.add_dependency(%q<rest-client>, [">= 1.6.1"])
       s.add_dependency(%q<haml>, [">= 3.0.24"])
     end
   else
@@ -51,8 +53,9 @@ Gem::Specification.new do |s|
     s.add_dependency(%q<thin>, [">= 1.2.7"])
     s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
     s.add_dependency(%q<mechanize>, [">= 1.0.0"])
-    s.add_dependency(%q<lightrdf>, [">= 0.2.0"])
+    s.add_dependency(%q<lightrdf>, [">= 0.2.1"])
     s.add_dependency(%q<i18n>, [">= 0.4.2"])
+    s.add_dependency(%q<rest-client>, [">= 1.6.1"])
     s.add_dependency(%q<haml>, [">= 3.0.24"])
   end
 end

metadata CHANGED Viewed

@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
   prerelease: false
   segments:
   - 0
-  - 1
-  - 24
-  version: 0.1.24
+  - 2
+  - 0
+  version: 0.2.0
 platform: ruby
 authors:
 - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-03-08 00:00:00 +01:00
+date: 2011-03-09 00:00:00 +01:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -97,8 +97,8 @@ dependencies:
         segments:
         - 0
         - 2
-        - 0
-        version: 0.2.0
+        - 1
+        version: 0.2.1
   type: :runtime
   version_requirements: *id006
 - !ruby/object:Gem::Dependency
@@ -116,9 +116,23 @@ dependencies:
   type: :runtime
   version_requirements: *id007
 - !ruby/object:Gem::Dependency
-  name: haml
+  name: rest-client
   prerelease: false
   requirement: &id008 !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+        - 1
+        - 6
+        - 1
+        version: 1.6.1
+  type: :runtime
+  version_requirements: *id008
+- !ruby/object:Gem::Dependency
+  name: haml
+  prerelease: false
+  requirement: &id009 !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
@@ -128,7 +142,7 @@ dependencies:
         - 24
         version: 3.0.24
   type: :runtime
-  version_requirements: *id008
+  version_requirements: *id009
 description: RDF web scraper
 email: joseignacio.fernandez@gmail.com
 executables:
@@ -164,6 +178,7 @@ extra_rdoc_files:
 - lib/scrappy/server/public/stylesheets/application.css
 - lib/scrappy/server/views/home.haml
 - lib/scrappy/server/views/help.haml
+- lib/scrappy/repository.rb
 - lib/scrappy/shell.rb
 - lib/scrappy/support.rb
 - lib/scrappy/webkit/webkit.rb
@@ -200,6 +215,7 @@ files:
 - lib/scrappy/server/public/stylesheets/application.css
 - lib/scrappy/server/views/home.haml
 - lib/scrappy/server/views/help.haml
+- lib/scrappy/repository.rb
 - lib/scrappy/shell.rb
 - lib/scrappy/support.rb
 - lib/scrappy/webkit/webkit.rb