wombat 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,83 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ wombat (0.0.1)
5
+ nightcrawler
6
+
7
+ GEM
8
+ remote: http://rubygems.org/
9
+ specs:
10
+ activemodel (3.0.11)
11
+ activesupport (= 3.0.11)
12
+ builder (~> 2.1.2)
13
+ i18n (~> 0.5.0)
14
+ activerecord (3.0.11)
15
+ activemodel (= 3.0.11)
16
+ activesupport (= 3.0.11)
17
+ arel (~> 2.0.10)
18
+ tzinfo (~> 0.3.23)
19
+ activesupport (3.0.11)
20
+ arel (2.0.10)
21
+ builder (2.1.2)
22
+ defined (0.0.2)
23
+ diff-lcs (1.1.3)
24
+ ffi (1.0.11)
25
+ git (1.2.5)
26
+ growl_notify (0.0.3)
27
+ rb-appscript
28
+ guard (0.9.4)
29
+ ffi (>= 0.5.0)
30
+ thor (~> 0.14.6)
31
+ guard-bundler (0.1.3)
32
+ bundler (>= 1.0.0)
33
+ guard (>= 0.2.2)
34
+ guard-rspec (0.5.10)
35
+ guard (>= 0.8.4)
36
+ i18n (0.5.0)
37
+ jeweler (1.6.4)
38
+ bundler (~> 1.0)
39
+ git (>= 1.2.5)
40
+ rake
41
+ nightcrawler (0.0.2)
42
+ activerecord (~> 3.0.5)
43
+ defined (~> 0.0.1)
44
+ rake (0.9.2.2)
45
+ rb-appscript (0.6.1)
46
+ rb-fchange (0.0.5)
47
+ ffi
48
+ rb-fsevent (0.4.3.1)
49
+ rb-inotify (0.8.8)
50
+ ffi (>= 0.5.0)
51
+ rcov (0.9.11)
52
+ rspec (2.7.0)
53
+ rspec-core (~> 2.7.0)
54
+ rspec-expectations (~> 2.7.0)
55
+ rspec-mocks (~> 2.7.0)
56
+ rspec-core (2.7.1)
57
+ rspec-expectations (2.7.0)
58
+ diff-lcs (~> 1.1.2)
59
+ rspec-mocks (2.7.0)
60
+ shoulda (2.11.3)
61
+ thor (0.14.6)
62
+ tzinfo (0.3.31)
63
+ yard (0.7.4)
64
+
65
+ PLATFORMS
66
+ ruby
67
+
68
+ DEPENDENCIES
69
+ activesupport
70
+ bundler (~> 1.0.0)
71
+ growl_notify
72
+ guard
73
+ guard-bundler
74
+ guard-rspec
75
+ jeweler (~> 1.6.4)
76
+ rb-fchange
77
+ rb-fsevent
78
+ rb-inotify
79
+ rcov
80
+ rspec
81
+ shoulda
82
+ wombat!
83
+ yard
data/Guardfile ADDED
@@ -0,0 +1,16 @@
1
+ require 'guard'
2
+
3
+ guard 'rspec', :version => 2, :cli => '--color', :all_on_start => false, :all_after_pass => false do
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+
6
+ watch(%r{(?:^|\/)spec/.+_spec\.rb$})
7
+ watch(%r{(?:^|\/)spec/helpers/(.+)\.rb$})
8
+ watch(%r{(?:^|\/)app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
9
+ watch(%r{(?:^|\/)lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
10
+
11
+ watch(%r{^spec/factories/(.+)\.rb$})
12
+ end
13
+
14
+ guard 'bundler' do
15
+ watch('Gemfile')
16
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Felipe Lima
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,20 @@
1
+ = Wombat
2
+
3
+ Generic Web crawler with a DSL that parses event-related data from web pages.
4
+ Still under development, it is being rewritten from scratch as a gem from an already existing project.
5
+
6
+ == Contributing to Wombat
7
+
8
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
9
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
10
+ * Fork the project
11
+ * Start a feature/bugfix branch
12
+ * Commit and push until you are happy with your contribution
13
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
14
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
15
+
16
+ == Copyright
17
+
18
+ Copyright (c) 2011 Felipe Lima. See LICENSE.txt for
19
+ further details.
20
+
data/Rakefile ADDED
@@ -0,0 +1,29 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ require 'rake'
6
+ require 'jeweler'
7
+ require 'rspec/core/rake_task'
8
+ require 'yard'
9
+
10
+ Jeweler::Tasks.new do |gem|
11
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
12
+ gem.name = "wombat"
13
+ gem.homepage = "http://github.com/felipecsl/nightcrawler"
14
+ gem.license = "MIT"
15
+ gem.summary = %Q{Generic web crawler for event-related data}
16
+ gem.description = %Q{Generic Web crawler with a DSL that parses event-related data from web pages}
17
+ gem.email = "felipe.lima@gmail.com"
18
+ gem.authors = ["Felipe Lima"]
19
+ # dependencies defined in Gemfile
20
+ end
21
+
22
+ Jeweler::RubygemsDotOrgTasks.new
23
+
24
+ RSpec::Core::RakeTask.new(:spec)
25
+
26
+ task :test => :spec
27
+ task :default => :spec
28
+
29
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.1
@@ -0,0 +1,53 @@
1
+ #coding: utf-8
2
+ require 'wombat/properties'
3
+ require 'wombat/metadata'
4
+ require 'wombat/parser'
5
+ require 'active_support'
6
+ require 'date'
7
+
8
+ module Wombat
9
+ module Crawler
10
+ extend ActiveSupport::Concern
11
+
12
+ module InstanceMethods
13
+ def crawl
14
+ parser.parse self.class.send(:metadata)
15
+ end
16
+
17
+ def supports_city?
18
+ end
19
+
20
+ def parser
21
+ @parser ||= Parser.new
22
+ end
23
+
24
+ def parser= parser
25
+ @parser = parser
26
+ end
27
+ end
28
+
29
+ module ClassMethods
30
+ [:event, :venue, :location].each do |m|
31
+ define_method(m) do |&block|
32
+ block.call(metadata["#{m.to_s}_props".to_sym]) if block
33
+ end
34
+ end
35
+
36
+ def method_missing method, *args, &block
37
+ metadata[method] = args.first
38
+ end
39
+
40
+ def with_details_page
41
+ yield metadata if block_given?
42
+ end
43
+
44
+ def supported_cities
45
+ end
46
+
47
+ private
48
+ def metadata
49
+ @metadata ||= Metadata.new
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,24 @@
1
+ #coding: utf-8
2
+ module Wombat
3
+ class Metadata < Hash
4
+ def initialize
5
+ self[:event_props] = Properties.new
6
+ self[:venue_props] = Properties.new
7
+ self[:location_props] = Properties.new
8
+ end
9
+
10
+ [:event, :venue, :location].each do |m|
11
+ define_method(m) do
12
+ self["#{m.to_s}_props".to_sym]
13
+ end
14
+ end
15
+
16
+ def method_missing method, *args, &block
17
+ if method.to_s.end_with? '='
18
+ self[method] = args.first
19
+ else
20
+ self[method]
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ #coding: utf-8
2
+ require 'wombat/property_locator'
3
+ require 'mechanize'
4
+
5
+ module Wombat
6
+ class Parser
7
+ include PropertyLocator
8
+ attr_accessor :mechanize, :context
9
+
10
+ def initialize
11
+ @mechanize = Mechanize.new
12
+ end
13
+
14
+ def parse metadata
15
+ @context = @mechanize.get("#{metadata.base_url}#{metadata.event_list_page}").parser
16
+
17
+ locate metadata
18
+
19
+ [metadata.event_props, metadata.venue_props, metadata.location_props].flat_map { |p| p.all_properties }.each do |p|
20
+ p.callback.call(p.result) if p.callback
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,31 @@
1
+ #coding: utf-8
2
+ require 'wombat/property'
3
+
4
+ module Wombat
5
+ class Properties
6
+ def initialize
7
+ @properties = []
8
+ end
9
+
10
+ def method_missing method, *args, &block
11
+ @properties << Property.new(
12
+ name: method.to_s,
13
+ selector: args.first,
14
+ format: args[1],
15
+ namespaces: args[2],
16
+ callback: block)
17
+ end
18
+
19
+ # TODO: Why I need this?????
20
+ def to_ary
21
+ end
22
+
23
+ def get_property name
24
+ @properties.detect {|p| p.name == name }
25
+ end
26
+
27
+ def all_properties
28
+ @properties
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,13 @@
1
+ module Wombat
2
+ class Property
3
+ attr_accessor :name, :selector, :format, :namespaces, :callback, :result
4
+
5
+ def initialize options
6
+ @name = options[:name]
7
+ @selector = options[:selector]
8
+ @format = options[:format]
9
+ @namespaces = options[:namespaces]
10
+ @callback = options[:callback]
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,25 @@
1
+ #coding: utf-8
2
+
3
+ module Wombat
4
+ module PropertyLocator
5
+ def locate metadata
6
+ [metadata.event_props, metadata.venue_props, metadata.location_props].flat_map { |p| p.all_properties }.each do |p|
7
+ p.result = locate_property(p).first
8
+ end
9
+ end
10
+
11
+ private
12
+ def locate_property property
13
+ result = locate_selector(property.selector, property.namespaces)
14
+ result.map! {|r| r.inner_html } if property.format == :html
15
+ result.map {|r| r.strip }
16
+ end
17
+
18
+ def locate_selector selector, namespaces = nil
19
+ return [selector.to_s] if selector.is_a? Symbol
20
+ return context.xpath selector[6..-1], namespaces if selector.start_with? "xpath="
21
+ return context.css selector[4..-1] if selector.start_with? "css="
22
+ nil
23
+ end
24
+ end
25
+ end
data/lib/wombat.rb ADDED
@@ -0,0 +1,6 @@
1
+ #coding: utf-8
2
+
3
+ require 'wombat/crawler'
4
+
5
+ module Wombat
6
+ end
@@ -0,0 +1,72 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wombat::Crawler do
4
+ before(:each) do
5
+ @crawler = Class.new
6
+ @parser = Wombat::Parser.new
7
+ @crawler.send(:include, Wombat::Crawler)
8
+ @crawler_instance = @crawler.new
9
+ @crawler_instance.parser = @parser
10
+ end
11
+
12
+ it 'should call the provided block' do
13
+ event_called = false
14
+
15
+ @crawler.event { event_called = true }
16
+
17
+ event_called.should be_true
18
+ end
19
+
20
+ it 'should provide metadata to yielded block' do
21
+ @crawler.event do |e|
22
+ e.should_not be_nil
23
+ end
24
+ end
25
+
26
+ it 'should store assigned metadata information' do
27
+ time = Time.now
28
+
29
+ @crawler.event do |e|
30
+ e.title 'Fulltronic Dezembro'
31
+ e.time Time.now
32
+ end
33
+
34
+ @crawler.venue { |v| v.name "Scooba" }
35
+ @crawler.location { |v| v.latitude -50.2323 }
36
+
37
+ @parser.should_receive(:parse) do |arg|
38
+ arg.event_props.get_property("title").selector.should == "Fulltronic Dezembro"
39
+ arg.event_props.get_property("time").selector.to_s.should == time.to_s
40
+ arg.venue_props.get_property("name").selector.should == "Scooba"
41
+ arg.location_props.get_property("latitude").selector.should == -50.2323
42
+ end
43
+
44
+ @crawler_instance.crawl
45
+ end
46
+
47
+ it 'should isolate metadata between different instances' do
48
+ another_parser = Wombat::Parser.new
49
+ another_crawler = Class.new
50
+ another_crawler.send(:include, Wombat::Crawler)
51
+ another_crawler_instance = another_crawler.new
52
+ another_crawler_instance.parser = another_parser
53
+
54
+ another_crawler.event { |e| e.title 'Ibiza' }
55
+ another_parser.should_receive(:parse) { |arg| arg.event_props.get_property("title").selector.should == "Ibiza" }
56
+ another_crawler_instance.crawl
57
+
58
+ @crawler.event { |e| e.title 'Fulltronic Dezembro' }
59
+ @parser.should_receive(:parse) { |arg| arg.event_props.get_property("title").selector.should == "Fulltronic Dezembro" }
60
+ @crawler_instance.crawl
61
+ end
62
+
63
+ it 'should be able to assign arbitrary plain text metadata' do
64
+ @crawler.some_data "/event/list"
65
+ @parser.should_receive(:parse) { |arg| arg.some_data.should == "/event/list" }
66
+ @crawler_instance.crawl
67
+ end
68
+
69
+ it 'should not explode if no block given' do
70
+ @crawler.event
71
+ end
72
+ end
@@ -0,0 +1,17 @@
1
+ #coding: utf-8
2
+ require 'wombat'
3
+
4
+ class SampleCrawler
5
+ include Wombat::Crawler
6
+
7
+ event do |e|
8
+ e.title "Sample Event"
9
+ e.description "This event's description"
10
+ e.date DateTime.now.to_date
11
+ end
12
+
13
+ venue do |v|
14
+ v.name "Cafe de La Musique"
15
+ v.address "324 Dom Pedro II Street"
16
+ end
17
+ end
@@ -0,0 +1,21 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wombat::Metadata do
4
+ it 'should have basic structure' do
5
+ metadata = Wombat::Metadata.new
6
+
7
+ metadata[:event_props].class.should == Wombat::Properties
8
+ metadata[:venue_props].class.should == Wombat::Properties
9
+ metadata[:location_props].class.should == Wombat::Properties
10
+
11
+ metadata.event_props.should == metadata[:event_props]
12
+ metadata.venue_props.should == metadata[:venue_props]
13
+ metadata.location_props.should == metadata[:location_props]
14
+ end
15
+
16
+ it 'should be able to get hash key like a method' do
17
+ m = Wombat::Metadata.new
18
+ m[:some_data] = "yeah"
19
+ m.some_data.should == "yeah"
20
+ end
21
+ end
@@ -0,0 +1,119 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wombat::Parser do
4
+ before(:each) do
5
+ @parser = Wombat::Parser.new
6
+ @metadata = Wombat::Metadata.new
7
+ end
8
+
9
+ it 'should request page document with correct url' do
10
+ @metadata[:base_url] = "http://www.google.com"
11
+ @metadata[:event_list_page] = "/search"
12
+ fake_document = double :document
13
+ fake_parser = double :parser
14
+ fake_document.should_receive(:parser).and_return(fake_parser)
15
+ @parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document
16
+
17
+ @parser.parse @metadata
18
+ end
19
+
20
+ it 'should send correct data to locate method' do
21
+ fake_document = double :document
22
+ fake_parser = double :parser
23
+ fake_document.should_receive(:parser).and_return(fake_parser)
24
+ @parser.mechanize.stub(:get).and_return fake_document
25
+ @parser.should_receive(:locate).with(@metadata)
26
+ @parser.parse @metadata
27
+ end
28
+
29
+ it 'should invoke event callbacks' do
30
+ fake_document = double :document
31
+ fake_parser = double :parser
32
+ property = double :property
33
+ properties = double :properties
34
+ block_called = false
35
+ block = lambda { |p| block_called = true }
36
+
37
+ property.stub(:result)
38
+ fake_document.should_receive(:parser).and_return(fake_parser)
39
+ property.should_receive(:callback).twice.and_return(block)
40
+ properties.should_receive(:all_properties).and_return [property]
41
+
42
+ @parser.mechanize.stub(:get).and_return fake_document
43
+ @parser.should_receive(:locate).with(@metadata)
44
+ @metadata.should_receive(:event_props).and_return properties
45
+
46
+ @parser.parse @metadata
47
+
48
+ block_called.should be_true
49
+ end
50
+
51
+ it 'should invoke venue callbacks' do
52
+ fake_document = double :document
53
+ fake_parser = double :parser
54
+ property = double :property
55
+ properties = double :properties
56
+ block_called = false
57
+ block = lambda { |p| block_called = true }
58
+
59
+ property.stub(:result)
60
+ fake_document.should_receive(:parser).and_return(fake_parser)
61
+ property.should_receive(:callback).twice.and_return(block)
62
+ properties.should_receive(:all_properties).and_return [property]
63
+
64
+ @parser.mechanize.stub(:get).and_return fake_document
65
+ @parser.should_receive(:locate).with(@metadata)
66
+ @metadata.should_receive(:venue_props).and_return properties
67
+
68
+ @parser.parse @metadata
69
+
70
+ block_called.should be_true
71
+ end
72
+
73
+ it 'should invoke location callbacks' do
74
+ fake_document = double :document
75
+ fake_parser = double :parser
76
+ property = double :property
77
+ properties = double :properties
78
+ block_called = false
79
+ block = lambda { |p| block_called = true }
80
+
81
+ property.stub(:result)
82
+ fake_document.should_receive(:parser).and_return(fake_parser)
83
+ property.should_receive(:callback).twice.and_return(block)
84
+ properties.should_receive(:all_properties).and_return [property]
85
+
86
+ @parser.mechanize.stub(:get).and_return fake_document
87
+ @parser.should_receive(:locate).with(@metadata)
88
+ @metadata.should_receive(:venue_props).and_return properties
89
+
90
+ @parser.parse @metadata
91
+
92
+ block_called.should be_true
93
+ end
94
+
95
+ it 'should invoke callback with parsed data' do
96
+ fake_document = double :document
97
+ fake_parser = double :parser
98
+ property = double :property
99
+ properties = double :properties
100
+ block_called = false
101
+ block = lambda { |p|
102
+ block_called = true
103
+ p.should == "blah"
104
+ }
105
+
106
+ property.should_receive(:result).and_return("blah")
107
+ fake_document.should_receive(:parser).and_return(fake_parser)
108
+ property.should_receive(:callback).twice.and_return(block)
109
+ properties.should_receive(:all_properties).and_return [property]
110
+
111
+ @parser.mechanize.stub(:get).and_return fake_document
112
+ @parser.should_receive(:locate).with(@metadata)
113
+ @metadata.should_receive(:event_props).and_return properties
114
+
115
+ @parser.parse @metadata
116
+
117
+ block_called.should be_true
118
+ end
119
+ end
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wombat::Properties do
4
+ before(:each) do
5
+ @props = Wombat::Properties.new
6
+ end
7
+
8
+ it 'should store event properties' do
9
+ block_executed = false
10
+ @props.title "/my/custom/selector", :text, { xmlns: "http://whatwg.org/xmlns" } do |x|
11
+ block_executed = true
12
+ end
13
+
14
+ title = @props.get_property "title"
15
+
16
+ title.name.should == "title"
17
+ title.selector.should == "/my/custom/selector"
18
+ title.format.should == :text
19
+ title.namespaces.should == { xmlns: "http://whatwg.org/xmlns" }
20
+ title.callback.should_not be_nil
21
+ title.callback.call
22
+ block_executed.should be_true
23
+ end
24
+
25
+ it 'should return all stored properties' do
26
+ @props.name "something"
27
+ @props.date DateTime.now
28
+
29
+ @props.all_properties.size.should == 2
30
+ end
31
+ end
@@ -0,0 +1,56 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wombat::PropertyLocator do
4
+ before(:each) do
5
+ @locator = Class.new
6
+ @locator.send(:include, Wombat::PropertyLocator)
7
+ @locator_instance = @locator.new
8
+ @metadata = Wombat::Metadata.new
9
+ end
10
+
11
+ it 'should locate metadata properties' do
12
+ context = double :context
13
+ context.stub(:xpath).with("/abc", nil).and_return(["Something cool"])
14
+ context.stub(:css).with("/ghi").and_return(["Another stuff"])
15
+
16
+ @metadata.event_props.data1 "xpath=/abc"
17
+ @metadata.venue_props.data2 :farms
18
+ @metadata.location_props.data3 "css=/ghi"
19
+
20
+ @locator_instance.stub(:context).and_return context
21
+
22
+ @locator_instance.locate @metadata
23
+
24
+ @metadata.event_props.get_property("data1").result.should == "Something cool"
25
+ @metadata.venue_props.get_property("data2").result.should == "farms"
26
+ @metadata.location_props.get_property("data3").result.should == "Another stuff"
27
+ end
28
+
29
+ it 'should support properties with html format' do
30
+ context = double :context
31
+ html_info = double :html_info
32
+
33
+ html_info.should_receive(:inner_html).and_return("some another info ")
34
+ context.should_receive(:xpath).with("/anotherData", nil).and_return([html_info])
35
+
36
+ @locator_instance.stub(:context).and_return context
37
+
38
+ @metadata.event_props.another_info "xpath=/anotherData", :html
39
+
40
+ @locator_instance.locate @metadata
41
+
42
+ @metadata.event_props.get_property("another_info").result.should == "some another info"
43
+ end
44
+
45
+ it 'should trim property contents and use namespaces if present' do
46
+ context = double :context
47
+ context.should_receive(:xpath).with("/event/some/description", "blah").and_return([" awesome event "])
48
+
49
+ @locator_instance.stub(:context).and_return context
50
+ @metadata.event_props.description "xpath=/event/some/description", :text, "blah"
51
+
52
+ @locator_instance.locate @metadata
53
+
54
+ @metadata.event_props.get_property("description").result.should == "awesome event"
55
+ end
56
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe Wombat::Property do
4
+ it 'should store property data' do
5
+ property = Wombat::Property.new(
6
+ name: "title",
7
+ selector: "/some/selector",
8
+ format: :html,
9
+ callback: lambda {})
10
+
11
+ property.name.should == "title"
12
+ property.selector.should == "/some/selector"
13
+ property.format.should == :html
14
+ property.callback.should == lambda {}
15
+ end
16
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+ require 'helpers/sample_crawler'
3
+
4
+ describe SampleCrawler do
5
+ before(:each) do
6
+ @sample_crawler = SampleCrawler.new
7
+ @sample_crawler.parser = Wombat::Parser.new
8
+ end
9
+
10
+ it 'should correctly assign event metadata' do
11
+ @sample_crawler.parser.should_receive(:parse) do |args|
12
+ args.event_props.get_property("title").selector.should == "Sample Event"
13
+ args.event_props.get_property("description").selector.should == "This event's description"
14
+ args.event_props.get_property("date").selector.should == DateTime.now.to_date
15
+
16
+ args.venue_props.get_property("name").selector.should == "Cafe de La Musique"
17
+ args.venue_props.get_property("address").selector.should == "324 Dom Pedro II Street"
18
+ end
19
+
20
+ @sample_crawler.crawl
21
+ end
22
+ end
@@ -0,0 +1,3 @@
1
+ require 'wombat'
2
+ require 'rspec'
3
+ require 'rspec-expectations'
data/wombat.gemspec ADDED
@@ -0,0 +1,116 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "wombat"
8
+ s.version = "0.1.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Felipe Lima"]
12
+ s.date = "2011-12-27"
13
+ s.description = "Generic Web crawler with a DSL that parses event-related data from web pages"
14
+ s.email = "felipe.lima@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "Guardfile",
25
+ "LICENSE.txt",
26
+ "README.rdoc",
27
+ "Rakefile",
28
+ "VERSION",
29
+ "lib/wombat.rb",
30
+ "lib/wombat/crawler.rb",
31
+ "lib/wombat/metadata.rb",
32
+ "lib/wombat/parser.rb",
33
+ "lib/wombat/properties.rb",
34
+ "lib/wombat/property.rb",
35
+ "lib/wombat/property_locator.rb",
36
+ "spec/crawler_spec.rb",
37
+ "spec/helpers/sample_crawler.rb",
38
+ "spec/metadata_spec.rb",
39
+ "spec/parser_spec.rb",
40
+ "spec/properties_spec.rb",
41
+ "spec/property_locator_spec.rb",
42
+ "spec/property_spec.rb",
43
+ "spec/sample_crawler_spec.rb",
44
+ "spec/spec_helper.rb",
45
+ "wombat.gemspec"
46
+ ]
47
+ s.homepage = "http://github.com/felipecsl/nightcrawler"
48
+ s.licenses = ["MIT"]
49
+ s.require_paths = ["lib"]
50
+ s.rubygems_version = "1.8.11"
51
+ s.summary = "Generic web crawler for event-related data"
52
+
53
+ if s.respond_to? :specification_version then
54
+ s.specification_version = 3
55
+
56
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
57
+ s.add_runtime_dependency(%q<wombat>, [">= 0"])
58
+ s.add_development_dependency(%q<rspec>, [">= 0"])
59
+ s.add_development_dependency(%q<guard>, [">= 0"])
60
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
61
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
62
+ s.add_development_dependency(%q<rcov>, [">= 0"])
63
+ s.add_development_dependency(%q<yard>, [">= 0"])
64
+ s.add_development_dependency(%q<guard-rspec>, [">= 0"])
65
+ s.add_development_dependency(%q<guard-bundler>, [">= 0"])
66
+ s.add_development_dependency(%q<growl_notify>, [">= 0"])
67
+ s.add_development_dependency(%q<rb-inotify>, [">= 0"])
68
+ s.add_development_dependency(%q<rb-fsevent>, [">= 0"])
69
+ s.add_development_dependency(%q<rb-fchange>, [">= 0"])
70
+ s.add_development_dependency(%q<activesupport>, [">= 0"])
71
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
72
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
73
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
74
+ s.add_development_dependency(%q<rcov>, [">= 0"])
75
+ else
76
+ s.add_dependency(%q<wombat>, [">= 0"])
77
+ s.add_dependency(%q<rspec>, [">= 0"])
78
+ s.add_dependency(%q<guard>, [">= 0"])
79
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
80
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
81
+ s.add_dependency(%q<rcov>, [">= 0"])
82
+ s.add_dependency(%q<yard>, [">= 0"])
83
+ s.add_dependency(%q<guard-rspec>, [">= 0"])
84
+ s.add_dependency(%q<guard-bundler>, [">= 0"])
85
+ s.add_dependency(%q<growl_notify>, [">= 0"])
86
+ s.add_dependency(%q<rb-inotify>, [">= 0"])
87
+ s.add_dependency(%q<rb-fsevent>, [">= 0"])
88
+ s.add_dependency(%q<rb-fchange>, [">= 0"])
89
+ s.add_dependency(%q<activesupport>, [">= 0"])
90
+ s.add_dependency(%q<shoulda>, [">= 0"])
91
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
92
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
93
+ s.add_dependency(%q<rcov>, [">= 0"])
94
+ end
95
+ else
96
+ s.add_dependency(%q<wombat>, [">= 0"])
97
+ s.add_dependency(%q<rspec>, [">= 0"])
98
+ s.add_dependency(%q<guard>, [">= 0"])
99
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
100
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
101
+ s.add_dependency(%q<rcov>, [">= 0"])
102
+ s.add_dependency(%q<yard>, [">= 0"])
103
+ s.add_dependency(%q<guard-rspec>, [">= 0"])
104
+ s.add_dependency(%q<guard-bundler>, [">= 0"])
105
+ s.add_dependency(%q<growl_notify>, [">= 0"])
106
+ s.add_dependency(%q<rb-inotify>, [">= 0"])
107
+ s.add_dependency(%q<rb-fsevent>, [">= 0"])
108
+ s.add_dependency(%q<rb-fchange>, [">= 0"])
109
+ s.add_dependency(%q<activesupport>, [">= 0"])
110
+ s.add_dependency(%q<shoulda>, [">= 0"])
111
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
112
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
113
+ s.add_dependency(%q<rcov>, [">= 0"])
114
+ end
115
+ end
116
+
metadata ADDED
@@ -0,0 +1,272 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wombat
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Felipe Lima
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-27 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: wombat
16
+ requirement: &70274763497680 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70274763497680
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &70274760260820 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *70274760260820
36
+ - !ruby/object:Gem::Dependency
37
+ name: guard
38
+ requirement: &70274760150780 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *70274760150780
47
+ - !ruby/object:Gem::Dependency
48
+ name: bundler
49
+ requirement: &70274759481420 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.0.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *70274759481420
58
+ - !ruby/object:Gem::Dependency
59
+ name: jeweler
60
+ requirement: &70274758627840 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ~>
64
+ - !ruby/object:Gem::Version
65
+ version: 1.6.4
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *70274758627840
69
+ - !ruby/object:Gem::Dependency
70
+ name: rcov
71
+ requirement: &70274758883060 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *70274758883060
80
+ - !ruby/object:Gem::Dependency
81
+ name: yard
82
+ requirement: &70274763457680 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *70274763457680
91
+ - !ruby/object:Gem::Dependency
92
+ name: guard-rspec
93
+ requirement: &70274762681960 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ type: :development
100
+ prerelease: false
101
+ version_requirements: *70274762681960
102
+ - !ruby/object:Gem::Dependency
103
+ name: guard-bundler
104
+ requirement: &70274758275240 !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: *70274758275240
113
+ - !ruby/object:Gem::Dependency
114
+ name: growl_notify
115
+ requirement: &70274758286120 !ruby/object:Gem::Requirement
116
+ none: false
117
+ requirements:
118
+ - - ! '>='
119
+ - !ruby/object:Gem::Version
120
+ version: '0'
121
+ type: :development
122
+ prerelease: false
123
+ version_requirements: *70274758286120
124
+ - !ruby/object:Gem::Dependency
125
+ name: rb-inotify
126
+ requirement: &70274758283180 !ruby/object:Gem::Requirement
127
+ none: false
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: *70274758283180
135
+ - !ruby/object:Gem::Dependency
136
+ name: rb-fsevent
137
+ requirement: &70274758281620 !ruby/object:Gem::Requirement
138
+ none: false
139
+ requirements:
140
+ - - ! '>='
141
+ - !ruby/object:Gem::Version
142
+ version: '0'
143
+ type: :development
144
+ prerelease: false
145
+ version_requirements: *70274758281620
146
+ - !ruby/object:Gem::Dependency
147
+ name: rb-fchange
148
+ requirement: &70274758296380 !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ! '>='
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ type: :development
155
+ prerelease: false
156
+ version_requirements: *70274758296380
157
+ - !ruby/object:Gem::Dependency
158
+ name: activesupport
159
+ requirement: &70274758295580 !ruby/object:Gem::Requirement
160
+ none: false
161
+ requirements:
162
+ - - ! '>='
163
+ - !ruby/object:Gem::Version
164
+ version: '0'
165
+ type: :development
166
+ prerelease: false
167
+ version_requirements: *70274758295580
168
+ - !ruby/object:Gem::Dependency
169
+ name: shoulda
170
+ requirement: &70274758294740 !ruby/object:Gem::Requirement
171
+ none: false
172
+ requirements:
173
+ - - ! '>='
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ type: :development
177
+ prerelease: false
178
+ version_requirements: *70274758294740
179
+ - !ruby/object:Gem::Dependency
180
+ name: bundler
181
+ requirement: &70274758293620 !ruby/object:Gem::Requirement
182
+ none: false
183
+ requirements:
184
+ - - ~>
185
+ - !ruby/object:Gem::Version
186
+ version: 1.0.0
187
+ type: :development
188
+ prerelease: false
189
+ version_requirements: *70274758293620
190
+ - !ruby/object:Gem::Dependency
191
+ name: jeweler
192
+ requirement: &70274758292980 !ruby/object:Gem::Requirement
193
+ none: false
194
+ requirements:
195
+ - - ~>
196
+ - !ruby/object:Gem::Version
197
+ version: 1.6.4
198
+ type: :development
199
+ prerelease: false
200
+ version_requirements: *70274758292980
201
+ - !ruby/object:Gem::Dependency
202
+ name: rcov
203
+ requirement: &70274758291340 !ruby/object:Gem::Requirement
204
+ none: false
205
+ requirements:
206
+ - - ! '>='
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ type: :development
210
+ prerelease: false
211
+ version_requirements: *70274758291340
212
+ description: Generic Web crawler with a DSL that parses event-related data from web
213
+ pages
214
+ email: felipe.lima@gmail.com
215
+ executables: []
216
+ extensions: []
217
+ extra_rdoc_files:
218
+ - LICENSE.txt
219
+ - README.rdoc
220
+ files:
221
+ - .document
222
+ - .rspec
223
+ - Gemfile
224
+ - Gemfile.lock
225
+ - Guardfile
226
+ - LICENSE.txt
227
+ - README.rdoc
228
+ - Rakefile
229
+ - VERSION
230
+ - lib/wombat.rb
231
+ - lib/wombat/crawler.rb
232
+ - lib/wombat/metadata.rb
233
+ - lib/wombat/parser.rb
234
+ - lib/wombat/properties.rb
235
+ - lib/wombat/property.rb
236
+ - lib/wombat/property_locator.rb
237
+ - spec/crawler_spec.rb
238
+ - spec/helpers/sample_crawler.rb
239
+ - spec/metadata_spec.rb
240
+ - spec/parser_spec.rb
241
+ - spec/properties_spec.rb
242
+ - spec/property_locator_spec.rb
243
+ - spec/property_spec.rb
244
+ - spec/sample_crawler_spec.rb
245
+ - spec/spec_helper.rb
246
+ - wombat.gemspec
247
+ homepage: http://github.com/felipecsl/nightcrawler
248
+ licenses:
249
+ - MIT
250
+ post_install_message:
251
+ rdoc_options: []
252
+ require_paths:
253
+ - lib
254
+ required_ruby_version: !ruby/object:Gem::Requirement
255
+ none: false
256
+ requirements:
257
+ - - ! '>='
258
+ - !ruby/object:Gem::Version
259
+ version: '0'
260
+ required_rubygems_version: !ruby/object:Gem::Requirement
261
+ none: false
262
+ requirements:
263
+ - - ! '>='
264
+ - !ruby/object:Gem::Version
265
+ version: '0'
266
+ requirements: []
267
+ rubyforge_project:
268
+ rubygems_version: 1.8.11
269
+ signing_key:
270
+ specification_version: 3
271
+ summary: Generic web crawler for event-related data
272
+ test_files: []