wombat 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +83 -0
- data/Guardfile +16 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +20 -0
- data/Rakefile +29 -0
- data/VERSION +1 -0
- data/lib/wombat/crawler.rb +53 -0
- data/lib/wombat/metadata.rb +24 -0
- data/lib/wombat/parser.rb +24 -0
- data/lib/wombat/properties.rb +31 -0
- data/lib/wombat/property.rb +13 -0
- data/lib/wombat/property_locator.rb +25 -0
- data/lib/wombat.rb +6 -0
- data/spec/crawler_spec.rb +72 -0
- data/spec/helpers/sample_crawler.rb +17 -0
- data/spec/metadata_spec.rb +21 -0
- data/spec/parser_spec.rb +119 -0
- data/spec/properties_spec.rb +31 -0
- data/spec/property_locator_spec.rb +56 -0
- data/spec/property_spec.rb +16 -0
- data/spec/sample_crawler_spec.rb +22 -0
- data/spec/spec_helper.rb +3 -0
- data/wombat.gemspec +116 -0
- metadata +272 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
wombat (0.0.1)
|
5
|
+
nightcrawler
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: http://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activemodel (3.0.11)
|
11
|
+
activesupport (= 3.0.11)
|
12
|
+
builder (~> 2.1.2)
|
13
|
+
i18n (~> 0.5.0)
|
14
|
+
activerecord (3.0.11)
|
15
|
+
activemodel (= 3.0.11)
|
16
|
+
activesupport (= 3.0.11)
|
17
|
+
arel (~> 2.0.10)
|
18
|
+
tzinfo (~> 0.3.23)
|
19
|
+
activesupport (3.0.11)
|
20
|
+
arel (2.0.10)
|
21
|
+
builder (2.1.2)
|
22
|
+
defined (0.0.2)
|
23
|
+
diff-lcs (1.1.3)
|
24
|
+
ffi (1.0.11)
|
25
|
+
git (1.2.5)
|
26
|
+
growl_notify (0.0.3)
|
27
|
+
rb-appscript
|
28
|
+
guard (0.9.4)
|
29
|
+
ffi (>= 0.5.0)
|
30
|
+
thor (~> 0.14.6)
|
31
|
+
guard-bundler (0.1.3)
|
32
|
+
bundler (>= 1.0.0)
|
33
|
+
guard (>= 0.2.2)
|
34
|
+
guard-rspec (0.5.10)
|
35
|
+
guard (>= 0.8.4)
|
36
|
+
i18n (0.5.0)
|
37
|
+
jeweler (1.6.4)
|
38
|
+
bundler (~> 1.0)
|
39
|
+
git (>= 1.2.5)
|
40
|
+
rake
|
41
|
+
nightcrawler (0.0.2)
|
42
|
+
activerecord (~> 3.0.5)
|
43
|
+
defined (~> 0.0.1)
|
44
|
+
rake (0.9.2.2)
|
45
|
+
rb-appscript (0.6.1)
|
46
|
+
rb-fchange (0.0.5)
|
47
|
+
ffi
|
48
|
+
rb-fsevent (0.4.3.1)
|
49
|
+
rb-inotify (0.8.8)
|
50
|
+
ffi (>= 0.5.0)
|
51
|
+
rcov (0.9.11)
|
52
|
+
rspec (2.7.0)
|
53
|
+
rspec-core (~> 2.7.0)
|
54
|
+
rspec-expectations (~> 2.7.0)
|
55
|
+
rspec-mocks (~> 2.7.0)
|
56
|
+
rspec-core (2.7.1)
|
57
|
+
rspec-expectations (2.7.0)
|
58
|
+
diff-lcs (~> 1.1.2)
|
59
|
+
rspec-mocks (2.7.0)
|
60
|
+
shoulda (2.11.3)
|
61
|
+
thor (0.14.6)
|
62
|
+
tzinfo (0.3.31)
|
63
|
+
yard (0.7.4)
|
64
|
+
|
65
|
+
PLATFORMS
|
66
|
+
ruby
|
67
|
+
|
68
|
+
DEPENDENCIES
|
69
|
+
activesupport
|
70
|
+
bundler (~> 1.0.0)
|
71
|
+
growl_notify
|
72
|
+
guard
|
73
|
+
guard-bundler
|
74
|
+
guard-rspec
|
75
|
+
jeweler (~> 1.6.4)
|
76
|
+
rb-fchange
|
77
|
+
rb-fsevent
|
78
|
+
rb-inotify
|
79
|
+
rcov
|
80
|
+
rspec
|
81
|
+
shoulda
|
82
|
+
wombat!
|
83
|
+
yard
|
data/Guardfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'guard'
|
2
|
+
|
3
|
+
guard 'rspec', :version => 2, :cli => '--color', :all_on_start => false, :all_after_pass => false do
|
4
|
+
watch('spec/spec_helper.rb') { "spec" }
|
5
|
+
|
6
|
+
watch(%r{(?:^|\/)spec/.+_spec\.rb$})
|
7
|
+
watch(%r{(?:^|\/)spec/helpers/(.+)\.rb$})
|
8
|
+
watch(%r{(?:^|\/)app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
9
|
+
watch(%r{(?:^|\/)lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
10
|
+
|
11
|
+
watch(%r{^spec/factories/(.+)\.rb$})
|
12
|
+
end
|
13
|
+
|
14
|
+
guard 'bundler' do
|
15
|
+
watch('Gemfile')
|
16
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Felipe Lima
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
= Wombat
|
2
|
+
|
3
|
+
Generic Web crawler with a DSL that parses event-related data from web pages.
|
4
|
+
Still under development, it is being rewritten from scratch as a gem from an already existing project.
|
5
|
+
|
6
|
+
== Contributing to Wombat
|
7
|
+
|
8
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
9
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
10
|
+
* Fork the project
|
11
|
+
* Start a feature/bugfix branch
|
12
|
+
* Commit and push until you are happy with your contribution
|
13
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
14
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
15
|
+
|
16
|
+
== Copyright
|
17
|
+
|
18
|
+
Copyright (c) 2011 Felipe Lima. See LICENSE.txt for
|
19
|
+
further details.
|
20
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
require 'rake'
|
6
|
+
require 'jeweler'
|
7
|
+
require 'rspec/core/rake_task'
|
8
|
+
require 'yard'
|
9
|
+
|
10
|
+
Jeweler::Tasks.new do |gem|
|
11
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
12
|
+
gem.name = "wombat"
|
13
|
+
gem.homepage = "http://github.com/felipecsl/nightcrawler"
|
14
|
+
gem.license = "MIT"
|
15
|
+
gem.summary = %Q{Generic web crawler for event-related data}
|
16
|
+
gem.description = %Q{Generic Web crawler with a DSL that parses event-related data from web pages}
|
17
|
+
gem.email = "felipe.lima@gmail.com"
|
18
|
+
gem.authors = ["Felipe Lima"]
|
19
|
+
# dependencies defined in Gemfile
|
20
|
+
end
|
21
|
+
|
22
|
+
Jeweler::RubygemsDotOrgTasks.new
|
23
|
+
|
24
|
+
RSpec::Core::RakeTask.new(:spec)
|
25
|
+
|
26
|
+
task :test => :spec
|
27
|
+
task :default => :spec
|
28
|
+
|
29
|
+
YARD::Rake::YardocTask.new
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.1
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require 'wombat/properties'
|
3
|
+
require 'wombat/metadata'
|
4
|
+
require 'wombat/parser'
|
5
|
+
require 'active_support'
|
6
|
+
require 'date'
|
7
|
+
|
8
|
+
module Wombat
|
9
|
+
module Crawler
|
10
|
+
extend ActiveSupport::Concern
|
11
|
+
|
12
|
+
module InstanceMethods
|
13
|
+
def crawl
|
14
|
+
parser.parse self.class.send(:metadata)
|
15
|
+
end
|
16
|
+
|
17
|
+
def supports_city?
|
18
|
+
end
|
19
|
+
|
20
|
+
def parser
|
21
|
+
@parser ||= Parser.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def parser= parser
|
25
|
+
@parser = parser
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module ClassMethods
|
30
|
+
[:event, :venue, :location].each do |m|
|
31
|
+
define_method(m) do |&block|
|
32
|
+
block.call(metadata["#{m.to_s}_props".to_sym]) if block
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def method_missing method, *args, &block
|
37
|
+
metadata[method] = args.first
|
38
|
+
end
|
39
|
+
|
40
|
+
def with_details_page
|
41
|
+
yield metadata if block_given?
|
42
|
+
end
|
43
|
+
|
44
|
+
def supported_cities
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
def metadata
|
49
|
+
@metadata ||= Metadata.new
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
module Wombat
|
3
|
+
class Metadata < Hash
|
4
|
+
def initialize
|
5
|
+
self[:event_props] = Properties.new
|
6
|
+
self[:venue_props] = Properties.new
|
7
|
+
self[:location_props] = Properties.new
|
8
|
+
end
|
9
|
+
|
10
|
+
[:event, :venue, :location].each do |m|
|
11
|
+
define_method(m) do
|
12
|
+
self["#{m.to_s}_props".to_sym]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def method_missing method, *args, &block
|
17
|
+
if method.to_s.end_with? '='
|
18
|
+
self[method] = args.first
|
19
|
+
else
|
20
|
+
self[method]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require 'wombat/property_locator'
|
3
|
+
require 'mechanize'
|
4
|
+
|
5
|
+
module Wombat
|
6
|
+
class Parser
|
7
|
+
include PropertyLocator
|
8
|
+
attr_accessor :mechanize, :context
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@mechanize = Mechanize.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse metadata
|
15
|
+
@context = @mechanize.get("#{metadata.base_url}#{metadata.event_list_page}").parser
|
16
|
+
|
17
|
+
locate metadata
|
18
|
+
|
19
|
+
[metadata.event_props, metadata.venue_props, metadata.location_props].flat_map { |p| p.all_properties }.each do |p|
|
20
|
+
p.callback.call(p.result) if p.callback
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require 'wombat/property'
|
3
|
+
|
4
|
+
module Wombat
|
5
|
+
class Properties
|
6
|
+
def initialize
|
7
|
+
@properties = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def method_missing method, *args, &block
|
11
|
+
@properties << Property.new(
|
12
|
+
name: method.to_s,
|
13
|
+
selector: args.first,
|
14
|
+
format: args[1],
|
15
|
+
namespaces: args[2],
|
16
|
+
callback: block)
|
17
|
+
end
|
18
|
+
|
19
|
+
# TODO: Why I need this?????
|
20
|
+
def to_ary
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_property name
|
24
|
+
@properties.detect {|p| p.name == name }
|
25
|
+
end
|
26
|
+
|
27
|
+
def all_properties
|
28
|
+
@properties
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Wombat
|
2
|
+
class Property
|
3
|
+
attr_accessor :name, :selector, :format, :namespaces, :callback, :result
|
4
|
+
|
5
|
+
def initialize options
|
6
|
+
@name = options[:name]
|
7
|
+
@selector = options[:selector]
|
8
|
+
@format = options[:format]
|
9
|
+
@namespaces = options[:namespaces]
|
10
|
+
@callback = options[:callback]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
|
3
|
+
module Wombat
|
4
|
+
module PropertyLocator
|
5
|
+
def locate metadata
|
6
|
+
[metadata.event_props, metadata.venue_props, metadata.location_props].flat_map { |p| p.all_properties }.each do |p|
|
7
|
+
p.result = locate_property(p).first
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
def locate_property property
|
13
|
+
result = locate_selector(property.selector, property.namespaces)
|
14
|
+
result.map! {|r| r.inner_html } if property.format == :html
|
15
|
+
result.map {|r| r.strip }
|
16
|
+
end
|
17
|
+
|
18
|
+
def locate_selector selector, namespaces = nil
|
19
|
+
return [selector.to_s] if selector.is_a? Symbol
|
20
|
+
return context.xpath selector[6..-1], namespaces if selector.start_with? "xpath="
|
21
|
+
return context.css selector[4..-1] if selector.start_with? "css="
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/wombat.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wombat::Crawler do
|
4
|
+
before(:each) do
|
5
|
+
@crawler = Class.new
|
6
|
+
@parser = Wombat::Parser.new
|
7
|
+
@crawler.send(:include, Wombat::Crawler)
|
8
|
+
@crawler_instance = @crawler.new
|
9
|
+
@crawler_instance.parser = @parser
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should call the provided block' do
|
13
|
+
event_called = false
|
14
|
+
|
15
|
+
@crawler.event { event_called = true }
|
16
|
+
|
17
|
+
event_called.should be_true
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should provide metadata to yielded block' do
|
21
|
+
@crawler.event do |e|
|
22
|
+
e.should_not be_nil
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should store assigned metadata information' do
|
27
|
+
time = Time.now
|
28
|
+
|
29
|
+
@crawler.event do |e|
|
30
|
+
e.title 'Fulltronic Dezembro'
|
31
|
+
e.time Time.now
|
32
|
+
end
|
33
|
+
|
34
|
+
@crawler.venue { |v| v.name "Scooba" }
|
35
|
+
@crawler.location { |v| v.latitude -50.2323 }
|
36
|
+
|
37
|
+
@parser.should_receive(:parse) do |arg|
|
38
|
+
arg.event_props.get_property("title").selector.should == "Fulltronic Dezembro"
|
39
|
+
arg.event_props.get_property("time").selector.to_s.should == time.to_s
|
40
|
+
arg.venue_props.get_property("name").selector.should == "Scooba"
|
41
|
+
arg.location_props.get_property("latitude").selector.should == -50.2323
|
42
|
+
end
|
43
|
+
|
44
|
+
@crawler_instance.crawl
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should isolate metadata between different instances' do
|
48
|
+
another_parser = Wombat::Parser.new
|
49
|
+
another_crawler = Class.new
|
50
|
+
another_crawler.send(:include, Wombat::Crawler)
|
51
|
+
another_crawler_instance = another_crawler.new
|
52
|
+
another_crawler_instance.parser = another_parser
|
53
|
+
|
54
|
+
another_crawler.event { |e| e.title 'Ibiza' }
|
55
|
+
another_parser.should_receive(:parse) { |arg| arg.event_props.get_property("title").selector.should == "Ibiza" }
|
56
|
+
another_crawler_instance.crawl
|
57
|
+
|
58
|
+
@crawler.event { |e| e.title 'Fulltronic Dezembro' }
|
59
|
+
@parser.should_receive(:parse) { |arg| arg.event_props.get_property("title").selector.should == "Fulltronic Dezembro" }
|
60
|
+
@crawler_instance.crawl
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'should be able to assign arbitrary plain text metadata' do
|
64
|
+
@crawler.some_data "/event/list"
|
65
|
+
@parser.should_receive(:parse) { |arg| arg.some_data.should == "/event/list" }
|
66
|
+
@crawler_instance.crawl
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'should not explode if no block given' do
|
70
|
+
@crawler.event
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require 'wombat'
|
3
|
+
|
4
|
+
class SampleCrawler
|
5
|
+
include Wombat::Crawler
|
6
|
+
|
7
|
+
event do |e|
|
8
|
+
e.title "Sample Event"
|
9
|
+
e.description "This event's description"
|
10
|
+
e.date DateTime.now.to_date
|
11
|
+
end
|
12
|
+
|
13
|
+
venue do |v|
|
14
|
+
v.name "Cafe de La Musique"
|
15
|
+
v.address "324 Dom Pedro II Street"
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wombat::Metadata do
|
4
|
+
it 'should have basic structure' do
|
5
|
+
metadata = Wombat::Metadata.new
|
6
|
+
|
7
|
+
metadata[:event_props].class.should == Wombat::Properties
|
8
|
+
metadata[:venue_props].class.should == Wombat::Properties
|
9
|
+
metadata[:location_props].class.should == Wombat::Properties
|
10
|
+
|
11
|
+
metadata.event_props.should == metadata[:event_props]
|
12
|
+
metadata.venue_props.should == metadata[:venue_props]
|
13
|
+
metadata.location_props.should == metadata[:location_props]
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should be able to get hash key like a method' do
|
17
|
+
m = Wombat::Metadata.new
|
18
|
+
m[:some_data] = "yeah"
|
19
|
+
m.some_data.should == "yeah"
|
20
|
+
end
|
21
|
+
end
|
data/spec/parser_spec.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wombat::Parser do
|
4
|
+
before(:each) do
|
5
|
+
@parser = Wombat::Parser.new
|
6
|
+
@metadata = Wombat::Metadata.new
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'should request page document with correct url' do
|
10
|
+
@metadata[:base_url] = "http://www.google.com"
|
11
|
+
@metadata[:event_list_page] = "/search"
|
12
|
+
fake_document = double :document
|
13
|
+
fake_parser = double :parser
|
14
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
15
|
+
@parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document
|
16
|
+
|
17
|
+
@parser.parse @metadata
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should send correct data to locate method' do
|
21
|
+
fake_document = double :document
|
22
|
+
fake_parser = double :parser
|
23
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
24
|
+
@parser.mechanize.stub(:get).and_return fake_document
|
25
|
+
@parser.should_receive(:locate).with(@metadata)
|
26
|
+
@parser.parse @metadata
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should invoke event callbacks' do
|
30
|
+
fake_document = double :document
|
31
|
+
fake_parser = double :parser
|
32
|
+
property = double :property
|
33
|
+
properties = double :properties
|
34
|
+
block_called = false
|
35
|
+
block = lambda { |p| block_called = true }
|
36
|
+
|
37
|
+
property.stub(:result)
|
38
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
39
|
+
property.should_receive(:callback).twice.and_return(block)
|
40
|
+
properties.should_receive(:all_properties).and_return [property]
|
41
|
+
|
42
|
+
@parser.mechanize.stub(:get).and_return fake_document
|
43
|
+
@parser.should_receive(:locate).with(@metadata)
|
44
|
+
@metadata.should_receive(:event_props).and_return properties
|
45
|
+
|
46
|
+
@parser.parse @metadata
|
47
|
+
|
48
|
+
block_called.should be_true
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'should invoke venue callbacks' do
|
52
|
+
fake_document = double :document
|
53
|
+
fake_parser = double :parser
|
54
|
+
property = double :property
|
55
|
+
properties = double :properties
|
56
|
+
block_called = false
|
57
|
+
block = lambda { |p| block_called = true }
|
58
|
+
|
59
|
+
property.stub(:result)
|
60
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
61
|
+
property.should_receive(:callback).twice.and_return(block)
|
62
|
+
properties.should_receive(:all_properties).and_return [property]
|
63
|
+
|
64
|
+
@parser.mechanize.stub(:get).and_return fake_document
|
65
|
+
@parser.should_receive(:locate).with(@metadata)
|
66
|
+
@metadata.should_receive(:venue_props).and_return properties
|
67
|
+
|
68
|
+
@parser.parse @metadata
|
69
|
+
|
70
|
+
block_called.should be_true
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'should invoke location callbacks' do
|
74
|
+
fake_document = double :document
|
75
|
+
fake_parser = double :parser
|
76
|
+
property = double :property
|
77
|
+
properties = double :properties
|
78
|
+
block_called = false
|
79
|
+
block = lambda { |p| block_called = true }
|
80
|
+
|
81
|
+
property.stub(:result)
|
82
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
83
|
+
property.should_receive(:callback).twice.and_return(block)
|
84
|
+
properties.should_receive(:all_properties).and_return [property]
|
85
|
+
|
86
|
+
@parser.mechanize.stub(:get).and_return fake_document
|
87
|
+
@parser.should_receive(:locate).with(@metadata)
|
88
|
+
@metadata.should_receive(:venue_props).and_return properties
|
89
|
+
|
90
|
+
@parser.parse @metadata
|
91
|
+
|
92
|
+
block_called.should be_true
|
93
|
+
end
|
94
|
+
|
95
|
+
it 'should invoke callback with parsed data' do
|
96
|
+
fake_document = double :document
|
97
|
+
fake_parser = double :parser
|
98
|
+
property = double :property
|
99
|
+
properties = double :properties
|
100
|
+
block_called = false
|
101
|
+
block = lambda { |p|
|
102
|
+
block_called = true
|
103
|
+
p.should == "blah"
|
104
|
+
}
|
105
|
+
|
106
|
+
property.should_receive(:result).and_return("blah")
|
107
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
108
|
+
property.should_receive(:callback).twice.and_return(block)
|
109
|
+
properties.should_receive(:all_properties).and_return [property]
|
110
|
+
|
111
|
+
@parser.mechanize.stub(:get).and_return fake_document
|
112
|
+
@parser.should_receive(:locate).with(@metadata)
|
113
|
+
@metadata.should_receive(:event_props).and_return properties
|
114
|
+
|
115
|
+
@parser.parse @metadata
|
116
|
+
|
117
|
+
block_called.should be_true
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wombat::Properties do
|
4
|
+
before(:each) do
|
5
|
+
@props = Wombat::Properties.new
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'should store event properties' do
|
9
|
+
block_executed = false
|
10
|
+
@props.title "/my/custom/selector", :text, { xmlns: "http://whatwg.org/xmlns" } do |x|
|
11
|
+
block_executed = true
|
12
|
+
end
|
13
|
+
|
14
|
+
title = @props.get_property "title"
|
15
|
+
|
16
|
+
title.name.should == "title"
|
17
|
+
title.selector.should == "/my/custom/selector"
|
18
|
+
title.format.should == :text
|
19
|
+
title.namespaces.should == { xmlns: "http://whatwg.org/xmlns" }
|
20
|
+
title.callback.should_not be_nil
|
21
|
+
title.callback.call
|
22
|
+
block_executed.should be_true
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'should return all stored properties' do
|
26
|
+
@props.name "something"
|
27
|
+
@props.date DateTime.now
|
28
|
+
|
29
|
+
@props.all_properties.size.should == 2
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wombat::PropertyLocator do
|
4
|
+
before(:each) do
|
5
|
+
@locator = Class.new
|
6
|
+
@locator.send(:include, Wombat::PropertyLocator)
|
7
|
+
@locator_instance = @locator.new
|
8
|
+
@metadata = Wombat::Metadata.new
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'should locate metadata properties' do
|
12
|
+
context = double :context
|
13
|
+
context.stub(:xpath).with("/abc", nil).and_return(["Something cool"])
|
14
|
+
context.stub(:css).with("/ghi").and_return(["Another stuff"])
|
15
|
+
|
16
|
+
@metadata.event_props.data1 "xpath=/abc"
|
17
|
+
@metadata.venue_props.data2 :farms
|
18
|
+
@metadata.location_props.data3 "css=/ghi"
|
19
|
+
|
20
|
+
@locator_instance.stub(:context).and_return context
|
21
|
+
|
22
|
+
@locator_instance.locate @metadata
|
23
|
+
|
24
|
+
@metadata.event_props.get_property("data1").result.should == "Something cool"
|
25
|
+
@metadata.venue_props.get_property("data2").result.should == "farms"
|
26
|
+
@metadata.location_props.get_property("data3").result.should == "Another stuff"
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should support properties with html format' do
|
30
|
+
context = double :context
|
31
|
+
html_info = double :html_info
|
32
|
+
|
33
|
+
html_info.should_receive(:inner_html).and_return("some another info ")
|
34
|
+
context.should_receive(:xpath).with("/anotherData", nil).and_return([html_info])
|
35
|
+
|
36
|
+
@locator_instance.stub(:context).and_return context
|
37
|
+
|
38
|
+
@metadata.event_props.another_info "xpath=/anotherData", :html
|
39
|
+
|
40
|
+
@locator_instance.locate @metadata
|
41
|
+
|
42
|
+
@metadata.event_props.get_property("another_info").result.should == "some another info"
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should trim property contents and use namespaces if present' do
|
46
|
+
context = double :context
|
47
|
+
context.should_receive(:xpath).with("/event/some/description", "blah").and_return([" awesome event "])
|
48
|
+
|
49
|
+
@locator_instance.stub(:context).and_return context
|
50
|
+
@metadata.event_props.description "xpath=/event/some/description", :text, "blah"
|
51
|
+
|
52
|
+
@locator_instance.locate @metadata
|
53
|
+
|
54
|
+
@metadata.event_props.get_property("description").result.should == "awesome event"
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wombat::Property do
|
4
|
+
it 'should store property data' do
|
5
|
+
property = Wombat::Property.new(
|
6
|
+
name: "title",
|
7
|
+
selector: "/some/selector",
|
8
|
+
format: :html,
|
9
|
+
callback: lambda {})
|
10
|
+
|
11
|
+
property.name.should == "title"
|
12
|
+
property.selector.should == "/some/selector"
|
13
|
+
property.format.should == :html
|
14
|
+
property.callback.should == lambda {}
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'helpers/sample_crawler'
|
3
|
+
|
4
|
+
describe SampleCrawler do
|
5
|
+
before(:each) do
|
6
|
+
@sample_crawler = SampleCrawler.new
|
7
|
+
@sample_crawler.parser = Wombat::Parser.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'should correctly assign event metadata' do
|
11
|
+
@sample_crawler.parser.should_receive(:parse) do |args|
|
12
|
+
args.event_props.get_property("title").selector.should == "Sample Event"
|
13
|
+
args.event_props.get_property("description").selector.should == "This event's description"
|
14
|
+
args.event_props.get_property("date").selector.should == DateTime.now.to_date
|
15
|
+
|
16
|
+
args.venue_props.get_property("name").selector.should == "Cafe de La Musique"
|
17
|
+
args.venue_props.get_property("address").selector.should == "324 Dom Pedro II Street"
|
18
|
+
end
|
19
|
+
|
20
|
+
@sample_crawler.crawl
|
21
|
+
end
|
22
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/wombat.gemspec
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "wombat"
|
8
|
+
s.version = "0.1.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Felipe Lima"]
|
12
|
+
s.date = "2011-12-27"
|
13
|
+
s.description = "Generic Web crawler with a DSL that parses event-related data from web pages"
|
14
|
+
s.email = "felipe.lima@gmail.com"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".rspec",
|
22
|
+
"Gemfile",
|
23
|
+
"Gemfile.lock",
|
24
|
+
"Guardfile",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.rdoc",
|
27
|
+
"Rakefile",
|
28
|
+
"VERSION",
|
29
|
+
"lib/wombat.rb",
|
30
|
+
"lib/wombat/crawler.rb",
|
31
|
+
"lib/wombat/metadata.rb",
|
32
|
+
"lib/wombat/parser.rb",
|
33
|
+
"lib/wombat/properties.rb",
|
34
|
+
"lib/wombat/property.rb",
|
35
|
+
"lib/wombat/property_locator.rb",
|
36
|
+
"spec/crawler_spec.rb",
|
37
|
+
"spec/helpers/sample_crawler.rb",
|
38
|
+
"spec/metadata_spec.rb",
|
39
|
+
"spec/parser_spec.rb",
|
40
|
+
"spec/properties_spec.rb",
|
41
|
+
"spec/property_locator_spec.rb",
|
42
|
+
"spec/property_spec.rb",
|
43
|
+
"spec/sample_crawler_spec.rb",
|
44
|
+
"spec/spec_helper.rb",
|
45
|
+
"wombat.gemspec"
|
46
|
+
]
|
47
|
+
s.homepage = "http://github.com/felipecsl/nightcrawler"
|
48
|
+
s.licenses = ["MIT"]
|
49
|
+
s.require_paths = ["lib"]
|
50
|
+
s.rubygems_version = "1.8.11"
|
51
|
+
s.summary = "Generic web crawler for event-related data"
|
52
|
+
|
53
|
+
if s.respond_to? :specification_version then
|
54
|
+
s.specification_version = 3
|
55
|
+
|
56
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
57
|
+
s.add_runtime_dependency(%q<wombat>, [">= 0"])
|
58
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
59
|
+
s.add_development_dependency(%q<guard>, [">= 0"])
|
60
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
61
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
62
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
63
|
+
s.add_development_dependency(%q<yard>, [">= 0"])
|
64
|
+
s.add_development_dependency(%q<guard-rspec>, [">= 0"])
|
65
|
+
s.add_development_dependency(%q<guard-bundler>, [">= 0"])
|
66
|
+
s.add_development_dependency(%q<growl_notify>, [">= 0"])
|
67
|
+
s.add_development_dependency(%q<rb-inotify>, [">= 0"])
|
68
|
+
s.add_development_dependency(%q<rb-fsevent>, [">= 0"])
|
69
|
+
s.add_development_dependency(%q<rb-fchange>, [">= 0"])
|
70
|
+
s.add_development_dependency(%q<activesupport>, [">= 0"])
|
71
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
72
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
73
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
74
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
75
|
+
else
|
76
|
+
s.add_dependency(%q<wombat>, [">= 0"])
|
77
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
78
|
+
s.add_dependency(%q<guard>, [">= 0"])
|
79
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
80
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
81
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
82
|
+
s.add_dependency(%q<yard>, [">= 0"])
|
83
|
+
s.add_dependency(%q<guard-rspec>, [">= 0"])
|
84
|
+
s.add_dependency(%q<guard-bundler>, [">= 0"])
|
85
|
+
s.add_dependency(%q<growl_notify>, [">= 0"])
|
86
|
+
s.add_dependency(%q<rb-inotify>, [">= 0"])
|
87
|
+
s.add_dependency(%q<rb-fsevent>, [">= 0"])
|
88
|
+
s.add_dependency(%q<rb-fchange>, [">= 0"])
|
89
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
90
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
91
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
92
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
93
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
94
|
+
end
|
95
|
+
else
|
96
|
+
s.add_dependency(%q<wombat>, [">= 0"])
|
97
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
98
|
+
s.add_dependency(%q<guard>, [">= 0"])
|
99
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
100
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
101
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
102
|
+
s.add_dependency(%q<yard>, [">= 0"])
|
103
|
+
s.add_dependency(%q<guard-rspec>, [">= 0"])
|
104
|
+
s.add_dependency(%q<guard-bundler>, [">= 0"])
|
105
|
+
s.add_dependency(%q<growl_notify>, [">= 0"])
|
106
|
+
s.add_dependency(%q<rb-inotify>, [">= 0"])
|
107
|
+
s.add_dependency(%q<rb-fsevent>, [">= 0"])
|
108
|
+
s.add_dependency(%q<rb-fchange>, [">= 0"])
|
109
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
110
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
111
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
112
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
113
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
metadata
ADDED
@@ -0,0 +1,272 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wombat
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Felipe Lima
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-12-27 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: wombat
|
16
|
+
requirement: &70274763497680 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70274763497680
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &70274760260820 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70274760260820
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: guard
|
38
|
+
requirement: &70274760150780 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70274760150780
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: bundler
|
49
|
+
requirement: &70274759481420 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.0
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *70274759481420
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: jeweler
|
60
|
+
requirement: &70274758627840 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ~>
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: 1.6.4
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *70274758627840
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rcov
|
71
|
+
requirement: &70274758883060 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *70274758883060
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: yard
|
82
|
+
requirement: &70274763457680 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
type: :development
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *70274763457680
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: guard-rspec
|
93
|
+
requirement: &70274762681960 !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
type: :development
|
100
|
+
prerelease: false
|
101
|
+
version_requirements: *70274762681960
|
102
|
+
- !ruby/object:Gem::Dependency
|
103
|
+
name: guard-bundler
|
104
|
+
requirement: &70274758275240 !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: *70274758275240
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: growl_notify
|
115
|
+
requirement: &70274758286120 !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
117
|
+
requirements:
|
118
|
+
- - ! '>='
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
type: :development
|
122
|
+
prerelease: false
|
123
|
+
version_requirements: *70274758286120
|
124
|
+
- !ruby/object:Gem::Dependency
|
125
|
+
name: rb-inotify
|
126
|
+
requirement: &70274758283180 !ruby/object:Gem::Requirement
|
127
|
+
none: false
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: *70274758283180
|
135
|
+
- !ruby/object:Gem::Dependency
|
136
|
+
name: rb-fsevent
|
137
|
+
requirement: &70274758281620 !ruby/object:Gem::Requirement
|
138
|
+
none: false
|
139
|
+
requirements:
|
140
|
+
- - ! '>='
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: '0'
|
143
|
+
type: :development
|
144
|
+
prerelease: false
|
145
|
+
version_requirements: *70274758281620
|
146
|
+
- !ruby/object:Gem::Dependency
|
147
|
+
name: rb-fchange
|
148
|
+
requirement: &70274758296380 !ruby/object:Gem::Requirement
|
149
|
+
none: false
|
150
|
+
requirements:
|
151
|
+
- - ! '>='
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
type: :development
|
155
|
+
prerelease: false
|
156
|
+
version_requirements: *70274758296380
|
157
|
+
- !ruby/object:Gem::Dependency
|
158
|
+
name: activesupport
|
159
|
+
requirement: &70274758295580 !ruby/object:Gem::Requirement
|
160
|
+
none: false
|
161
|
+
requirements:
|
162
|
+
- - ! '>='
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: '0'
|
165
|
+
type: :development
|
166
|
+
prerelease: false
|
167
|
+
version_requirements: *70274758295580
|
168
|
+
- !ruby/object:Gem::Dependency
|
169
|
+
name: shoulda
|
170
|
+
requirement: &70274758294740 !ruby/object:Gem::Requirement
|
171
|
+
none: false
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
type: :development
|
177
|
+
prerelease: false
|
178
|
+
version_requirements: *70274758294740
|
179
|
+
- !ruby/object:Gem::Dependency
|
180
|
+
name: bundler
|
181
|
+
requirement: &70274758293620 !ruby/object:Gem::Requirement
|
182
|
+
none: false
|
183
|
+
requirements:
|
184
|
+
- - ~>
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: 1.0.0
|
187
|
+
type: :development
|
188
|
+
prerelease: false
|
189
|
+
version_requirements: *70274758293620
|
190
|
+
- !ruby/object:Gem::Dependency
|
191
|
+
name: jeweler
|
192
|
+
requirement: &70274758292980 !ruby/object:Gem::Requirement
|
193
|
+
none: false
|
194
|
+
requirements:
|
195
|
+
- - ~>
|
196
|
+
- !ruby/object:Gem::Version
|
197
|
+
version: 1.6.4
|
198
|
+
type: :development
|
199
|
+
prerelease: false
|
200
|
+
version_requirements: *70274758292980
|
201
|
+
- !ruby/object:Gem::Dependency
|
202
|
+
name: rcov
|
203
|
+
requirement: &70274758291340 !ruby/object:Gem::Requirement
|
204
|
+
none: false
|
205
|
+
requirements:
|
206
|
+
- - ! '>='
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: '0'
|
209
|
+
type: :development
|
210
|
+
prerelease: false
|
211
|
+
version_requirements: *70274758291340
|
212
|
+
description: Generic Web crawler with a DSL that parses event-related data from web
|
213
|
+
pages
|
214
|
+
email: felipe.lima@gmail.com
|
215
|
+
executables: []
|
216
|
+
extensions: []
|
217
|
+
extra_rdoc_files:
|
218
|
+
- LICENSE.txt
|
219
|
+
- README.rdoc
|
220
|
+
files:
|
221
|
+
- .document
|
222
|
+
- .rspec
|
223
|
+
- Gemfile
|
224
|
+
- Gemfile.lock
|
225
|
+
- Guardfile
|
226
|
+
- LICENSE.txt
|
227
|
+
- README.rdoc
|
228
|
+
- Rakefile
|
229
|
+
- VERSION
|
230
|
+
- lib/wombat.rb
|
231
|
+
- lib/wombat/crawler.rb
|
232
|
+
- lib/wombat/metadata.rb
|
233
|
+
- lib/wombat/parser.rb
|
234
|
+
- lib/wombat/properties.rb
|
235
|
+
- lib/wombat/property.rb
|
236
|
+
- lib/wombat/property_locator.rb
|
237
|
+
- spec/crawler_spec.rb
|
238
|
+
- spec/helpers/sample_crawler.rb
|
239
|
+
- spec/metadata_spec.rb
|
240
|
+
- spec/parser_spec.rb
|
241
|
+
- spec/properties_spec.rb
|
242
|
+
- spec/property_locator_spec.rb
|
243
|
+
- spec/property_spec.rb
|
244
|
+
- spec/sample_crawler_spec.rb
|
245
|
+
- spec/spec_helper.rb
|
246
|
+
- wombat.gemspec
|
247
|
+
homepage: http://github.com/felipecsl/nightcrawler
|
248
|
+
licenses:
|
249
|
+
- MIT
|
250
|
+
post_install_message:
|
251
|
+
rdoc_options: []
|
252
|
+
require_paths:
|
253
|
+
- lib
|
254
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
255
|
+
none: false
|
256
|
+
requirements:
|
257
|
+
- - ! '>='
|
258
|
+
- !ruby/object:Gem::Version
|
259
|
+
version: '0'
|
260
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
261
|
+
none: false
|
262
|
+
requirements:
|
263
|
+
- - ! '>='
|
264
|
+
- !ruby/object:Gem::Version
|
265
|
+
version: '0'
|
266
|
+
requirements: []
|
267
|
+
rubyforge_project:
|
268
|
+
rubygems_version: 1.8.11
|
269
|
+
signing_key:
|
270
|
+
specification_version: 3
|
271
|
+
summary: Generic web crawler for event-related data
|
272
|
+
test_files: []
|