horsefield 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ require 'horsefield'
2
+
3
+ describe Horsefield do
4
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+ require 'horsefield/scraper'
3
+
4
+ VCR.configure do |c|
5
+ c.cassette_library_dir = 'spec/fixtures/vcr_cassettes'
6
+ c.hook_into :webmock
7
+ end
8
+
9
+ describe Horsefield::Scraper do
10
+ describe 'with HTML' do
11
+ before do
12
+ html = IO.read File.join(__dir__, 'fixtures/monster.html')
13
+ @scraper = Horsefield::Scraper.new(html: html)
14
+ end
15
+
16
+ it 'should scrape' do
17
+ result = @scraper.scrape do
18
+ many :jobs, '.listingsTable .odd, .listingsTable .even' do
19
+ one :title, '.jobTitleContainer'
20
+ one :company, '.companyContainer'
21
+ end
22
+ end
23
+
24
+ result[:jobs].should have(9).items
25
+ end
26
+ end
27
+
28
+ describe 'with URL' do
29
+ before do
30
+ @scraper = Horsefield::Scraper.new('https://www.facebook.com/johnny.qiu1/info?_fb_noscript=1')
31
+ end
32
+
33
+ it 'should scrape' do
34
+ VCR.use_cassette 'facebook/johnny_qiu1' do
35
+ result = @scraper.scrape do
36
+ one :name, '._8_2'
37
+
38
+ many :employers, '//table[@class="mal _5e7- profileInfoTable _3stn"]//*[text() = "Employers"]' do
39
+ end
40
+
41
+ many :educations, '//table[@class="mal _5e7- profileInfoTable _3stn"]//*[text() = "University" or text() = "Secondary school"]' do
42
+ end
43
+ end
44
+
45
+ result[:name].should == 'Johnny Qiu (邱博瀚)'
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,21 @@
1
+ require 'pry'
2
+ require 'vcr'
3
+ require 'webmock'
4
+
5
+ # This file was generated by the `rspec --init` command. Conventionally, all
6
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
7
+ # Require this file using `require "spec_helper"` to ensure that it is only
8
+ # loaded once.
9
+ #
10
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
11
+ RSpec.configure do |config|
12
+ config.treat_symbols_as_metadata_keys_with_true_values = true
13
+ config.run_all_when_everything_filtered = true
14
+ config.filter_run :focus
15
+
16
+ # Run specs in random order to surface order dependencies. If you find an
17
+ # order dependency and want to debug it, you can fix the order by providing
18
+ # the seed, which is printed after each run.
19
+ # --seed 1234
20
+ config.order = 'random'
21
+ end
metadata ADDED
@@ -0,0 +1,168 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: horsefield
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Erik Strömberg
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-08-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: vcr
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: webmock
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '='
102
+ - !ruby/object:Gem::Version
103
+ version: 1.12.0
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '='
109
+ - !ruby/object:Gem::Version
110
+ version: 1.12.0
111
+ description: It's a scraper
112
+ email:
113
+ - erik.stromberg@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - .gitignore
119
+ - .rspec
120
+ - Gemfile
121
+ - LICENSE.txt
122
+ - README.md
123
+ - Rakefile
124
+ - horsefield.gemspec
125
+ - lib/horsefield.rb
126
+ - lib/horsefield/node.rb
127
+ - lib/horsefield/node_set.rb
128
+ - lib/horsefield/scraper.rb
129
+ - lib/horsefield/version.rb
130
+ - spec/fixtures/monster.html
131
+ - spec/fixtures/vcr_cassettes/facebook/frontpage.yml
132
+ - spec/fixtures/vcr_cassettes/facebook/johnny_qiu1.yml
133
+ - spec/fixtures/vcr_cassettes/facebook/login.yml
134
+ - spec/horsefield_spec.rb
135
+ - spec/scraper_spec.rb
136
+ - spec/spec_helper.rb
137
+ homepage: ''
138
+ licenses:
139
+ - MIT
140
+ metadata: {}
141
+ post_install_message:
142
+ rdoc_options: []
143
+ require_paths:
144
+ - lib
145
+ required_ruby_version: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - '>='
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ required_rubygems_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - '>='
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
155
+ requirements: []
156
+ rubyforge_project:
157
+ rubygems_version: 2.1.0.rc.1
158
+ signing_key:
159
+ specification_version: 4
160
+ summary: It's a scraper
161
+ test_files:
162
+ - spec/fixtures/monster.html
163
+ - spec/fixtures/vcr_cassettes/facebook/frontpage.yml
164
+ - spec/fixtures/vcr_cassettes/facebook/johnny_qiu1.yml
165
+ - spec/fixtures/vcr_cassettes/facebook/login.yml
166
+ - spec/horsefield_spec.rb
167
+ - spec/scraper_spec.rb
168
+ - spec/spec_helper.rb