proto 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in proto.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Kevin Curtin
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # Proto
2
+
3
+ Proto lets you create highly malleable, disposable value objects. You create a Proto::Scraper object with a URL. You can then pass it the name of the class you want back and a hash with the attributes and selectors so that it knows which data to scrape for you. The objects you get back are OpenStructs and are very flexible.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'proto'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install proto
18
+
19
+ ## Usage
20
+
21
+ ```ruby
22
+
23
+ proto = Proto::Scraper.new('http://twitter.com/kcurtin')
24
+
25
+ @tweets = proto.fetch_and_create!('Tweet', {:name => 'strong.fullname',
26
+ :content => 'p.js-tweet-text',
27
+ :created_at => 'small.time'})
28
+
29
+ #by default, Proto::Scraper only returns 10 objects
30
+
31
+ @tweets.inspect
32
+ #<Proto::Tweet name="Kevin Curtin", content="@cawebs06 just a tad over my head... You guys are smart :)", created_at="11h">
33
+ #<Proto::Tweet name="Kevin Curtin", content="@garybernhardt awesome, thanks. any plans to be in nyc soon? @FlatironSchool would love to have you stop by. we love DAS", created_at="12h">...
34
+
35
+ ```
36
+
37
+ ## Contributing
38
+
39
+ 1. Fork it
40
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
41
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
42
+ 4. Push to the branch (`git push origin my-new-feature`)
43
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,4 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ RSpec::Core::RakeTask.new('spec')
4
+ # :default => 'test' to task :default => :spec
@@ -0,0 +1,41 @@
1
+ module Proto
2
+ class Scraper
3
+ attr_accessor :doc
4
+
5
+ def initialize(url)
6
+ @doc = Nokogiri::HTML(open(url))
7
+ end
8
+
9
+ def fetch_and_create!(name='Type', args)
10
+ attributes = scrape_attribute_data(args)
11
+ protos = create_return_objects(name, attributes)
12
+ return protos
13
+ end
14
+
15
+ private
16
+ def scrape_attribute_data(attributes)
17
+ collection = Array.new(attributes.length, [])
18
+ final_array = []
19
+ keys = attributes.keys
20
+
21
+ attributes.each_with_index do |(key, selector), index|
22
+ collection[index] = doc.css(selector).slice(1..10).map { |el| el.text.strip }
23
+ end
24
+
25
+ collection.transpose.each do |data|
26
+ hash = {}
27
+ data.each_with_index do |value, index|
28
+ hash[keys[index]] = value
29
+ end
30
+ final_array << hash
31
+ end
32
+ final_array
33
+ end
34
+
35
+ def create_return_objects(name, attributes)
36
+ new_class = Class.new(OpenStruct)
37
+ Proto.const_set(name, new_class)
38
+ attributes.map { |hash| Proto.const_get(name).new(hash) }
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,3 @@
1
+ module Proto
2
+ VERSION = "0.0.1"
3
+ end
data/lib/proto.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'proto/version'
2
+ require 'open-uri'
3
+ require 'ostruct'
4
+ require 'nokogiri'
5
+
6
+ module Proto
7
+ autoload :Scraper, 'proto/scraper'
8
+ end
data/proto.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'proto/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "proto"
8
+ gem.version = Proto::VERSION
9
+ gem.authors = ["Kevin Curtin"]
10
+ gem.email = ["kevincurtin88@gmail.com"]
11
+ gem.description = %q{Highly malleable, disposable value objects}
12
+ gem.summary = %q{Highly malleable, disposable value objects}
13
+ gem.homepage = "https://github.com/kcurtin/proto"
14
+
15
+ gem.add_development_dependency 'rspec'
16
+ gem.add_development_dependency "nokogiri"
17
+
18
+ gem.files = `git ls-files`.split($/)
19
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
20
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
21
+ # gem.test_files = Dir.glob("spec/**/*.rb")
22
+ gem.require_paths = ["lib"]
23
+ end
@@ -0,0 +1,66 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe Proto::Scraper do
4
+ before(:each) do
5
+ # Nokogiri::HTML.stub!(:open).and_return("doc")
6
+ # Nokogiri::HTML::Document.stub!(:parse)
7
+ # @scrape = Proto::Scraper.new('http://example.com')
8
+ # @scrape.stub_chain(:doc, :css, :each).and_return('STUBBED OUT')
9
+ end
10
+
11
+ it 'returns my objects!' do
12
+ obj = Proto::Scraper.new('https://twitter.com/kcurtin')
13
+ obj_collection = obj.fetch_and_create!('Tweet', { :name => 'strong.fullname',
14
+ :content => 'p.js-tweet-text', :created_at => 'small.time' })
15
+ obj_collection.length.should == 10
16
+ obj_collection.first.class.to_s.should == 'Proto::Tweet'
17
+ obj_collection.first.name.should == 'Kevin Curtin'
18
+ end
19
+
20
+ it "sets its doc attr to a nokogiri doc based on url" do
21
+ expect {
22
+ Proto::Scraper.new('blah_url')
23
+ }.to raise_error(Errno::ENOENT)
24
+ end
25
+ # context ".fetch_and_create!" do
26
+ # it "the default class name is 'Proto::Type'" do
27
+ # our_obj = @scrape.fetch_and_create!({})
28
+ # our_obj.class.to_s.should == 'Proto::Type'
29
+ # end
30
+
31
+ # it "accepts only a hash and sets default class name" do
32
+ # our_obj = @scrape.fetch_and_create!({:name => 'default const'})
33
+ # our_obj.class.to_s.should == 'Proto::Type'
34
+ # end
35
+
36
+ # it "returns a Proto object with attributes set" do
37
+ # our_obj = @scrape.fetch_and_create!('Sample', {:name => "Kevin", :title => "Developer"})
38
+ # our_obj.name.should == "STUBBED OUT"
39
+ # our_obj.title.should == "STUBBED OUT"
40
+ # our_obj.class.to_s.should == "Proto::Sample"
41
+ # end
42
+ # end
43
+
44
+ # context 'private methods' do
45
+ # context ".create_return_objects" do
46
+ # it "accepts a custom class name" do
47
+ # our_obj = @scrape.send(:create_return_objects, 'Kevin', {})
48
+ # our_obj.first.class.to_s.should == 'Proto::Kevin'
49
+ # end
50
+
51
+ # it "accepts a hash and name and sets custom attrs" do
52
+ # our_obj = @scrape.send(:create_return_objects, 'Test', [{:name => 'Kevin'},{:title => "Title"}])
53
+ # our_obj.first.name.should == 'Kevin'
54
+ # our_obj.last.title.should == 'Title'
55
+ # our_obj.length.should == 2
56
+ # end
57
+ # end
58
+
59
+ # context ".scrape_attribute_data" do
60
+ # it "returns a hash of stuff" do
61
+ # rh = @scrape.send(:scrape_attribute_data, {:title => "h2 a"})
62
+ # rh.should == [{:title => 'STUBBED OUT'}]
63
+ # end
64
+ # end
65
+ # end
66
+ end
@@ -0,0 +1,7 @@
1
+ require 'spec_helper'
2
+
3
+ describe Proto do
4
+ it 'should return correct version string' do
5
+ Proto.version_string.should == "Proto version #{Proto::VERSION}"
6
+ end
7
+ end