RubyGems - scrapify - Versions diffs - 0.0.1 - Mend

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*

data/Gemfile ADDED Viewed

@@ -0,0 +1,10 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in apify.gemspec
+gemspec
+gem 'rake'
+gem 'rspec'
+gem 'mocha'
+gem 'fakeweb'
+gem 'active_support'
+gem 'nokogiri'

data/README.md ADDED Viewed

@@ -0,0 +1,36 @@
+## ScrApify
+ScrApify is a library to build APIs by scraping static sites with an ActiveRecord like querying interface
+### Installation
+```
+$ gem install scrapify
+```
+### Usage
+Define html url and declare attributes using xpath or css selectors.
+Scrapify classes must have a key attribute defined.
+```
+class Pizza
+  include Scrapify::Base
+  html "http://www.dominos.co.in/menuDetails_ajx.php?catgId=1"
+  attribute :name, css: ".menu_lft li a"
+  attribute :image_url, xpath: "//li//input//@value"
+  key :name
+end
+```
+Now you can use finder methods to extract data from a static site
+```
+> Pizza.all
+> pizza = Pizza.find('mushroom')
+> pizza.name
+> pizza.image_url
+```

data/Rakefile ADDED Viewed

@@ -0,0 +1,6 @@
+require "bundler/gem_tasks"
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new('spec')
+task :default => :spec

data/lib/meta_define.rb ADDED Viewed

@@ -0,0 +1,5 @@
+class Object # http://whytheluckystiff.net/articles/seeingMetaclassesClearly.html
+  def meta_define name, &blk
+    (class << self; self; end).instance_eval { define_method name, &blk }
+  end
+end

data/lib/scrapify/base.rb ADDED Viewed

@@ -0,0 +1,76 @@
+module Scrapify
+  module Base
+    def self.included(klass)
+      klass.extend ClassMethods
+      klass.cattr_accessor :url, :doc, :attribute_names
+    end
+    module ClassMethods
+      def html(url)
+        self.url = url
+        define_finders
+      end
+      def attribute(name, options={})
+        add_attribute(name)
+        parser = options[:xpath] ? :xpath : :css
+        selector = options[parser]
+        meta_define "#{name}_values" do
+          self.doc ||= parse_html
+          self.doc.send(parser, selector).map &:content
+        end
+      end
+      def key(attribute)
+        define_find_by_id attribute
+        define_count attribute
+      end
+      private
+      def add_attribute(name)
+        self.attribute_names ||= []
+        self.attribute_names << name
+      end
+      def parse_html
+        Nokogiri::HTML(open(url))
+      end
+      def define_finders
+        meta_define :all do
+          count.times.map do |index|
+            find_by_index index
+          end
+        end
+        meta_define :first do
+          find_by_index 0
+        end
+        meta_define :last do
+          find_by_index count - 1
+        end
+        meta_define :find_by_index do |index|
+          return if index.nil? or index < 0
+          attributes = Hash[attribute_names.map {|attribute| [attribute, send("#{attribute}_values")[index]]}]
+          OpenStruct.new(attributes)
+        end
+      end
+      def define_count(key_attribute)
+        meta_define :count do
+          send("#{key_attribute}_values").size
+        end
+      end
+      def define_find_by_id(key_attribute)
+        meta_define :find do |key_value|
+          index = send("#{key_attribute}_values").index(key_value)
+          find_by_index index
+        end
+      end
+    end
+  end
+end

data/lib/scrapify/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Scrapify
+  VERSION = "0.0.1"
+end

data/lib/scrapify.rb ADDED Viewed

@@ -0,0 +1,7 @@
+require 'scrapify/version'
+require 'active_support/core_ext/class/attribute_accessors'
+require 'nokogiri'
+require 'open-uri'
+require 'meta_define'
+require 'ostruct'
+require 'scrapify/base'

data/scrapify.gemspec ADDED Viewed

@@ -0,0 +1,26 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "scrapify/version"
+Gem::Specification.new do |s|
+  s.name        = "scrapify"
+  s.version     = Scrapify::VERSION
+  s.authors     = ["Sathish & Shakiel"]
+  s.email       = ["sathish316@gmail.com"]
+  s.homepage    = "http://www.github.com/sathish316/scrapify"
+  s.summary     = %q{ScrApify scraps static html sites to scraESTlike APIs}
+  s.description = %q{ScrApify scraps static html sites to RESTlike APIs}
+  s.rubyforge_project = "scrapify"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  # specify any dependencies here; for example:
+  s.add_development_dependency "rspec"
+  s.add_development_dependency "mocha"
+  s.add_development_dependency "fakeweb"
+  # s.add_runtime_dependency "nokogiri"
+end

data/spec/pizza.rb ADDED Viewed

@@ -0,0 +1,9 @@
+class Pizza
+  include Scrapify::Base
+  html "http://www.dominos.co.in/menuDetails_ajx.php?catgId=1"
+  attribute :name, css: ".menu_lft li a"
+  attribute :image_url, xpath: "//li//input//@value"
+  key :name
+end

data/spec/scrapify_spec.rb ADDED Viewed

@@ -0,0 +1,79 @@
+require 'spec_helper'
+require 'test_models'
+describe Scrapify do
+  before do
+    @pizza_url = "http://www.dominos.co.in/menuDetails_ajx.php?catgId=1"
+    FakeWeb.register_uri :get, @pizza_url, :body => <<-HTML
+      <ul class="menu_lft">
+        <li><a>chicken supreme</a><input value="chicken.jpg"></li>
+        <li><a>veg supreme</a><input value="veg.jpg"></li>
+        <li><a>pepperoni</a><input value="pepperoni.jpg"></li>
+      </ul>
+    HTML
+  end
+  it "should return attribute names" do
+    ::Pizza.attribute_names.should == [:name, :image_url]
+  end
+  describe "html" do
+    it "should store url" do
+      ::Pizza.url.should == @pizza_url
+    end
+    it "should parse html and fetch attributes using css" do
+      ::Pizza.name_values.should == ['chicken supreme', 'veg supreme', 'pepperoni']
+    end
+    it "should parse html and fetch attributes using xpath" do
+      ::Pizza.image_url_values.should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg']
+    end
+  end
+  describe "find" do
+    it "should find element by key" do
+      pizza = ::Pizza.find('pepperoni')
+      pizza.should_not be_nil
+      pizza.name.should == 'pepperoni'
+      pizza.image_url.should == 'pepperoni.jpg'
+    end
+    it "should be nil if element does not exist" do
+      pizza = ::Pizza.find('mushroom')
+      pizza.should be_nil
+    end
+  end
+  describe "first" do
+    it "should fetch first matching element" do
+      first_pizza = ::Pizza.first
+      first_pizza.name.should == 'chicken supreme'
+      first_pizza.image_url.should == 'chicken.jpg'
+    end
+  end
+  describe "last" do
+    it "should fetch last matching element" do
+      last_pizza = ::Pizza.last
+      last_pizza.name.should == 'pepperoni'
+      last_pizza.image_url.should == 'pepperoni.jpg'
+    end
+  end
+  describe "all" do
+    it "should fetch all objects" do
+      pizzas = ::Pizza.all
+      pizzas.size.should == 3
+      pizzas.map(&:name).should == ['chicken supreme', 'veg supreme', 'pepperoni']
+      pizzas.map(&:image_url).should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg']
+    end
+  end
+  describe "count" do
+    it "should return number of matching elements" do
+      ::Pizza.count.should == 3
+    end
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require 'rubygems'
+require 'bundler/setup'
+require 'rspec/mocks'
+require 'fakeweb'
+require 'scrapify'
+RSpec.configure do |config|
+  config.mock_with :mocha
+end

data/spec/test_models.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'pizza'

metadata ADDED Viewed

@@ -0,0 +1,95 @@
+--- !ruby/object:Gem::Specification
+name: scrapify
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+  prerelease:
+platform: ruby
+authors:
+- Sathish & Shakiel
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-05-25 00:00:00.000000000Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: &70282396705740 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *70282396705740
+- !ruby/object:Gem::Dependency
+  name: mocha
+  requirement: &70282396705260 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *70282396705260
+- !ruby/object:Gem::Dependency
+  name: fakeweb
+  requirement: &70282396704820 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *70282396704820
+description: ScrApify scraps static html sites to RESTlike APIs
+email:
+- sathish316@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- README.md
+- Rakefile
+- lib/meta_define.rb
+- lib/scrapify.rb
+- lib/scrapify/base.rb
+- lib/scrapify/version.rb
+- scrapify.gemspec
+- spec/pizza.rb
+- spec/scrapify_spec.rb
+- spec/spec_helper.rb
+- spec/test_models.rb
+homepage: http://www.github.com/sathish316/scrapify
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project: scrapify
+rubygems_version: 1.8.10
+signing_key:
+specification_version: 3
+summary: ScrApify scraps static html sites to scraESTlike APIs
+test_files:
+- spec/pizza.rb
+- spec/scrapify_spec.rb
+- spec/spec_helper.rb
+- spec/test_models.rb

scrapify 0.0.1