horsefield 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -5
- data/.rspec +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +22 -4
- data/horsefield.gemspec +5 -7
- data/lib/horsefield.rb +2 -5
- data/lib/horsefield/node.rb +12 -0
- data/lib/horsefield/node_set.rb +51 -0
- data/lib/horsefield/scraper.rb +30 -10
- data/lib/horsefield/version.rb +1 -1
- data/spec/fixtures/monster.html +2311 -0
- data/spec/fixtures/vcr_cassettes/facebook/johnny_qiu1.yml +7105 -0
- data/spec/horsefield_spec.rb +4 -0
- data/spec/scraper_spec.rb +21 -0
- data/spec/spec_helper.rb +11 -11
- metadata +33 -65
- data/Guardfile +0 -24
- data/lib/horsefield/diggable.rb +0 -34
- data/lib/horsefield/nokogiri.rb +0 -12
- data/spec/fixtures/linkedin_biggs.html +0 -652
- data/spec/fixtures/vcr_cassettes/linkedin_lunarmobiscuit.yml +0 -880
- data/spec/lib/horsefield/scraper_spec.rb +0 -42
@@ -1,42 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'horsefield/scraper'
|
3
|
-
|
4
|
-
describe Horsefield::Scraper do
|
5
|
-
let(:scraper) { Horsefield::Scraper.new('http://www.linkedin.com/in/lunarmobiscuit') }
|
6
|
-
|
7
|
-
it 'should accept HTML or a URL as argument' do
|
8
|
-
VCR.use_cassette('linkedin_lunarmobiscuit') do
|
9
|
-
scraper.html.should match('<!DOCTYPE html>')
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
describe '#scrape' do
|
14
|
-
it 'should take a block with attributes to scrape' do
|
15
|
-
data = scraper.scrape do
|
16
|
-
scope '.profile-header' do
|
17
|
-
one :name, '//span[@class="full-name"]'
|
18
|
-
end
|
19
|
-
|
20
|
-
many :skills, '#skills-list li'
|
21
|
-
|
22
|
-
many :experiences, '#profile-experience .position' do
|
23
|
-
one :headline, 'span.title'
|
24
|
-
|
25
|
-
one :start_year, '.period .dtstart' do
|
26
|
-
attr('title').split('-').first.to_i
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
one :company do
|
31
|
-
one :name, '.postitle h4'
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
p data[:company]
|
36
|
-
|
37
|
-
data[:name].should == "Michael 'Luni' Libes"
|
38
|
-
data[:experiences].first[:headline].should == 'Founder and Managing Director'
|
39
|
-
data[:skills].should == ["Entrepreneurship", "Business Planning", "Fundraising", "Team Building", "Start-ups", "Venture Capital", "Social Entrepreneurship", "Strategic Planning", "Strategic Partnerships", "Strategy", "Business Development", "Management Consulting", "Marketing Strategy", "Sustainability", "Executive Management", "Marketing", "New Business Development", "Competitive Analysis", "Go-to-market Strategy", "Thought Leadership", "Corporate Development", "Software Development", "Business Strategy", "Mobile Devices", "Wireless", "Management", "Cloud Computing", "Nonprofits", "Business Modeling", "SaaS", "Mobile Applications", "Product Marketing", "Program Management", "Analytics", "E-commerce", "Leadership", "Enterprise Software", "Consulting", "Strategic Consulting"]
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|