horsefield 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ require 'horsefield'
2
+
3
+ describe Horsefield do
4
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+ require 'horsefield/scraper'
3
+
4
+ VCR.configure do |c|
5
+ c.cassette_library_dir = 'spec/fixtures/vcr_cassettes'
6
+ c.hook_into :webmock
7
+ end
8
+
9
+ describe Horsefield::Scraper do
10
+ describe 'with HTML' do
11
+ before do
12
+ html = IO.read File.join(__dir__, 'fixtures/monster.html')
13
+ @scraper = Horsefield::Scraper.new(html: html)
14
+ end
15
+
16
+ it 'should scrape' do
17
+ result = @scraper.scrape do
18
+ many :jobs, '.listingsTable .odd, .listingsTable .even' do
19
+ one :title, '.jobTitleContainer'
20
+ one :company, '.companyContainer'
21
+ end
22
+ end
23
+
24
+ result[:jobs].should have(9).items
25
+ end
26
+ end
27
+
28
+ describe 'with URL' do
29
+ before do
30
+ @scraper = Horsefield::Scraper.new('https://www.facebook.com/johnny.qiu1/info?_fb_noscript=1')
31
+ end
32
+
33
+ it 'should scrape' do
34
+ VCR.use_cassette 'facebook/johnny_qiu1' do
35
+ result = @scraper.scrape do
36
+ one :name, '._8_2'
37
+
38
+ many :employers, '//table[@class="mal _5e7- profileInfoTable _3stn"]//*[text() = "Employers"]' do
39
+ end
40
+
41
+ many :educations, '//table[@class="mal _5e7- profileInfoTable _3stn"]//*[text() = "University" or text() = "Secondary school"]' do
42
+ end
43
+ end
44
+
45
+ result[:name].should == 'Johnny Qiu (邱博瀚)'
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,21 @@
1
+ require 'pry'
2
+ require 'vcr'
3
+ require 'webmock'
4
+
5
+ # This file was generated by the `rspec --init` command. Conventionally, all
6
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
7
+ # Require this file using `require "spec_helper"` to ensure that it is only
8
+ # loaded once.
9
+ #
10
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
11
+ RSpec.configure do |config|
12
+ config.treat_symbols_as_metadata_keys_with_true_values = true
13
+ config.run_all_when_everything_filtered = true
14
+ config.filter_run :focus
15
+
16
+ # Run specs in random order to surface order dependencies. If you find an
17
+ # order dependency and want to debug it, you can fix the order by providing
18
+ # the seed, which is printed after each run.
19
+ # --seed 1234
20
+ config.order = 'random'
21
+ end
metadata ADDED
@@ -0,0 +1,168 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: horsefield
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Erik Strömberg
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-08-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: vcr
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: webmock
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '='
102
+ - !ruby/object:Gem::Version
103
+ version: 1.12.0
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '='
109
+ - !ruby/object:Gem::Version
110
+ version: 1.12.0
111
+ description: It's a scraper
112
+ email:
113
+ - erik.stromberg@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - .gitignore
119
+ - .rspec
120
+ - Gemfile
121
+ - LICENSE.txt
122
+ - README.md
123
+ - Rakefile
124
+ - horsefield.gemspec
125
+ - lib/horsefield.rb
126
+ - lib/horsefield/node.rb
127
+ - lib/horsefield/node_set.rb
128
+ - lib/horsefield/scraper.rb
129
+ - lib/horsefield/version.rb
130
+ - spec/fixtures/monster.html
131
+ - spec/fixtures/vcr_cassettes/facebook/frontpage.yml
132
+ - spec/fixtures/vcr_cassettes/facebook/johnny_qiu1.yml
133
+ - spec/fixtures/vcr_cassettes/facebook/login.yml
134
+ - spec/horsefield_spec.rb
135
+ - spec/scraper_spec.rb
136
+ - spec/spec_helper.rb
137
+ homepage: ''
138
+ licenses:
139
+ - MIT
140
+ metadata: {}
141
+ post_install_message:
142
+ rdoc_options: []
143
+ require_paths:
144
+ - lib
145
+ required_ruby_version: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - '>='
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ required_rubygems_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - '>='
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
155
+ requirements: []
156
+ rubyforge_project:
157
+ rubygems_version: 2.1.0.rc.1
158
+ signing_key:
159
+ specification_version: 4
160
+ summary: It's a scraper
161
+ test_files:
162
+ - spec/fixtures/monster.html
163
+ - spec/fixtures/vcr_cassettes/facebook/frontpage.yml
164
+ - spec/fixtures/vcr_cassettes/facebook/johnny_qiu1.yml
165
+ - spec/fixtures/vcr_cassettes/facebook/login.yml
166
+ - spec/horsefield_spec.rb
167
+ - spec/scraper_spec.rb
168
+ - spec/spec_helper.rb