horsefield 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +47 -0
- data/Rakefile +6 -0
- data/horsefield.gemspec +29 -0
- data/lib/horsefield.rb +5 -0
- data/lib/horsefield/node.rb +12 -0
- data/lib/horsefield/node_set.rb +48 -0
- data/lib/horsefield/scraper.rb +32 -0
- data/lib/horsefield/version.rb +3 -0
- data/spec/fixtures/monster.html +2311 -0
- data/spec/fixtures/vcr_cassettes/facebook/frontpage.yml +948 -0
- data/spec/fixtures/vcr_cassettes/facebook/johnny_qiu1.yml +7105 -0
- data/spec/fixtures/vcr_cassettes/facebook/login.yml +11872 -0
- data/spec/horsefield_spec.rb +4 -0
- data/spec/scraper_spec.rb +49 -0
- data/spec/spec_helper.rb +21 -0
- metadata +168 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'horsefield/scraper'
|
3
|
+
|
4
|
+
VCR.configure do |c|
|
5
|
+
c.cassette_library_dir = 'spec/fixtures/vcr_cassettes'
|
6
|
+
c.hook_into :webmock
|
7
|
+
end
|
8
|
+
|
9
|
+
describe Horsefield::Scraper do
|
10
|
+
describe 'with HTML' do
|
11
|
+
before do
|
12
|
+
html = IO.read File.join(__dir__, 'fixtures/monster.html')
|
13
|
+
@scraper = Horsefield::Scraper.new(html: html)
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should scrape' do
|
17
|
+
result = @scraper.scrape do
|
18
|
+
many :jobs, '.listingsTable .odd, .listingsTable .even' do
|
19
|
+
one :title, '.jobTitleContainer'
|
20
|
+
one :company, '.companyContainer'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
result[:jobs].should have(9).items
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'with URL' do
|
29
|
+
before do
|
30
|
+
@scraper = Horsefield::Scraper.new('https://www.facebook.com/johnny.qiu1/info?_fb_noscript=1')
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should scrape' do
|
34
|
+
VCR.use_cassette 'facebook/johnny_qiu1' do
|
35
|
+
result = @scraper.scrape do
|
36
|
+
one :name, '._8_2'
|
37
|
+
|
38
|
+
many :employers, '//table[@class="mal _5e7- profileInfoTable _3stn"]//*[text() = "Employers"]' do
|
39
|
+
end
|
40
|
+
|
41
|
+
many :educations, '//table[@class="mal _5e7- profileInfoTable _3stn"]//*[text() = "University" or text() = "Secondary school"]' do
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
result[:name].should == 'Johnny Qiu (邱博瀚)'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'pry'
|
2
|
+
require 'vcr'
|
3
|
+
require 'webmock'
|
4
|
+
|
5
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
6
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
7
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
8
|
+
# loaded once.
|
9
|
+
#
|
10
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
11
|
+
RSpec.configure do |config|
|
12
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
13
|
+
config.run_all_when_everything_filtered = true
|
14
|
+
config.filter_run :focus
|
15
|
+
|
16
|
+
# Run specs in random order to surface order dependencies. If you find an
|
17
|
+
# order dependency and want to debug it, you can fix the order by providing
|
18
|
+
# the seed, which is printed after each run.
|
19
|
+
# --seed 1234
|
20
|
+
config.order = 'random'
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: horsefield
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Erik Strömberg
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-08-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.3'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: vcr
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: webmock
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 1.12.0
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 1.12.0
|
111
|
+
description: It's a scraper
|
112
|
+
email:
|
113
|
+
- erik.stromberg@gmail.com
|
114
|
+
executables: []
|
115
|
+
extensions: []
|
116
|
+
extra_rdoc_files: []
|
117
|
+
files:
|
118
|
+
- .gitignore
|
119
|
+
- .rspec
|
120
|
+
- Gemfile
|
121
|
+
- LICENSE.txt
|
122
|
+
- README.md
|
123
|
+
- Rakefile
|
124
|
+
- horsefield.gemspec
|
125
|
+
- lib/horsefield.rb
|
126
|
+
- lib/horsefield/node.rb
|
127
|
+
- lib/horsefield/node_set.rb
|
128
|
+
- lib/horsefield/scraper.rb
|
129
|
+
- lib/horsefield/version.rb
|
130
|
+
- spec/fixtures/monster.html
|
131
|
+
- spec/fixtures/vcr_cassettes/facebook/frontpage.yml
|
132
|
+
- spec/fixtures/vcr_cassettes/facebook/johnny_qiu1.yml
|
133
|
+
- spec/fixtures/vcr_cassettes/facebook/login.yml
|
134
|
+
- spec/horsefield_spec.rb
|
135
|
+
- spec/scraper_spec.rb
|
136
|
+
- spec/spec_helper.rb
|
137
|
+
homepage: ''
|
138
|
+
licenses:
|
139
|
+
- MIT
|
140
|
+
metadata: {}
|
141
|
+
post_install_message:
|
142
|
+
rdoc_options: []
|
143
|
+
require_paths:
|
144
|
+
- lib
|
145
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
146
|
+
requirements:
|
147
|
+
- - '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: '0'
|
150
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - '>='
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
requirements: []
|
156
|
+
rubyforge_project:
|
157
|
+
rubygems_version: 2.1.0.rc.1
|
158
|
+
signing_key:
|
159
|
+
specification_version: 4
|
160
|
+
summary: It's a scraper
|
161
|
+
test_files:
|
162
|
+
- spec/fixtures/monster.html
|
163
|
+
- spec/fixtures/vcr_cassettes/facebook/frontpage.yml
|
164
|
+
- spec/fixtures/vcr_cassettes/facebook/johnny_qiu1.yml
|
165
|
+
- spec/fixtures/vcr_cassettes/facebook/login.yml
|
166
|
+
- spec/horsefield_spec.rb
|
167
|
+
- spec/scraper_spec.rb
|
168
|
+
- spec/spec_helper.rb
|