horsefield 0.3.14 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/spec/scraper_spec.rb DELETED
@@ -1,70 +0,0 @@
1
- require 'spec_helper'
2
- require 'horsefield/scraper'
3
- require 'watir-webdriver'
4
-
5
- describe Horsefield::Scraper do
6
- describe 'with HTML' do
7
- before do
8
- @html = IO.read File.join(__dir__, 'fixtures/monster.html')
9
- end
10
-
11
- it 'should scrape' do
12
- result = Horsefield::Scraper.new.scrape html: @html do
13
- many :jobs, '.listingsTable .odd, .listingsTable .even' do
14
- one :title, '.jobTitleContainer'
15
- one :company, '.companyContainer'
16
- end
17
- end
18
-
19
- expect(result[:jobs].length).to eq(9)
20
- end
21
-
22
- it 'should be able to scrape in scope' do
23
- result = Horsefield::Scraper.new.scrape html: @html do
24
- scope '#primaryResults' do
25
- many :jobs, '.listingsTable .odd, .listingsTable .even' do
26
- one :title, '.jobTitleContainer'
27
- one :company, '.companyContainer'
28
- end
29
- end
30
- end
31
-
32
- expect(result[:jobs].length).to eq(9)
33
- end
34
-
35
- it 'should return nil for selectors that are not found' do
36
- result = Horsefield::Scraper.new.scrape html: @html do
37
- one :job, '.listingsTable .odd, .listingsTable .even' do
38
- one :title, '.jobTitleContainer'
39
- one :company, '.companyContainer'
40
- one :missing, '.doesNotExist'
41
- end
42
- end
43
-
44
- expect(result[:job][:missing]).to be_nil
45
- end
46
-
47
- it 'can return HTML instead of text' do
48
- result = Horsefield::Scraper.new.scrape html: @html do
49
- one :job, '.listingsTable .odd, .listingsTable .even' do
50
- one :title, '.jobTitleContainer', :html
51
- one :company, '.companyContainer'
52
- one :missing, '.doesNotExist'
53
- end
54
- end
55
-
56
- expect(result[:job][:title]).to match(/<a id=\"ctl00_ctl00_ctl00_body_body_wacCenterStage_ctl02_rptResults_ctl00_linkJobTitle\"/)
57
- end
58
-
59
- it 'works with Watir' do
60
- browser = Watir::Browser.new :phantomjs
61
-
62
- result = Horsefield::Scraper.new(browser).scrape 'https://github.com/cowboy' do
63
- one :email, 'a.email'
64
- many :organizations, '//div[@class="orgs"]//@original-title'
65
- end
66
-
67
- p result
68
- end
69
- end
70
- end
data/spec/spec_helper.rb DELETED
@@ -1,29 +0,0 @@
1
- require 'pry'
2
- #require 'vcr'
3
- #require 'webmock'
4
-
5
- #VCR.configure do |config|
6
- # config.cassette_library_dir = 'spec/fixtures/vcr_cassettes'
7
- # config.hook_into :webmock
8
- # config.configure_rspec_metadata!
9
- # config.allow_http_connections_when_no_cassette = true
10
- #end
11
-
12
- # This file was generated by the `rspec --init` command. Conventionally, all
13
- # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
14
- # Require this file using `require "spec_helper"` to ensure that it is only
15
- # loaded once.
16
- #
17
- # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
18
- RSpec.configure do |config|
19
- config.treat_symbols_as_metadata_keys_with_true_values = true
20
- config.run_all_when_everything_filtered = true
21
- config.filter_run :focus
22
- config.treat_symbols_as_metadata_keys_with_true_values = true
23
-
24
- # Run specs in random order to surface order dependencies. If you find an
25
- # order dependency and want to debug it, you can fix the order by providing
26
- # the seed, which is printed after each run.
27
- # --seed 1234
28
- config.order = 'random'
29
- end