horsefield 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2cfb5049fcbda71817b68f0ba6359a6a2794afdc
4
- data.tar.gz: 2d3c26b593f8a05e6510522f805c3185815a59fc
3
+ metadata.gz: 8b8f8255739340596e630908fdf232ad5cf3ac97
4
+ data.tar.gz: 7f5e63af192760467df38b2788efadee28926da4
5
5
  SHA512:
6
- metadata.gz: d3edbb36a1e8fd05e3306e14f0f7ca54f9ab9d715afcb64c4440507a0808e721f5790343fb23d16f61d343f183cc37da06ea0bc62f04261fa7ee6bf680e76ffd
7
- data.tar.gz: e60f5aef998f9228cc3ddc493d3206c408b32fa3b7221199d6e2ad80bb0658bcb7145c09cbf0615a32f39e948548e2861f2efed95e7fc2e4da7f1f2633d9c84b
6
+ metadata.gz: b493efb36209a2b52527ffb5291314f4b3eeb1a2e80588f5f20a5bd627aa91456acf51d7d4c088a1ef67806467ff4c86099e6ec8f489c9571df605bea60f47e9
7
+ data.tar.gz: bed0f9129ef6e737577644709d6d8b2a8e33db76a1b5ccb6e3f0adbbd69d5be3f76f0a4cd78675c8d6d5ab195d72afbe98b91bd6ba86b5b6275db723d35c0197
@@ -8,7 +8,11 @@ module Horsefield
8
8
  def process
9
9
  case @html
10
10
  when Nokogiri::XML::Element then
11
- @html.search('text()').to_s.split.join ' '
11
+ if @type == :text
12
+ @html.search('text()').to_s.split.join ' '
13
+ elsif @type == :html
14
+ @html.to_s
15
+ end
12
16
  when Nokogiri::XML::Attr then
13
17
  @html.value
14
18
  end
@@ -16,15 +16,15 @@ module Horsefield
16
16
  def browse(*)
17
17
  end
18
18
 
19
- def one(name, selector, &block)
20
- @nodes[name] = dig_deeper selector, &block
19
+ def one(name, selector, type = :text, &block)
20
+ @nodes[name] = dig_deeper selector, false, type, &block
21
21
  end
22
22
 
23
23
  def many(name, selector, &block)
24
24
  @nodes[name] = dig_deeper selector, true, &block
25
25
  end
26
26
 
27
- def dig_deeper(selector, many = false, &block)
27
+ def dig_deeper(selector, many = false, type = :text, &block)
28
28
  return nil if base_elements(selector).empty?
29
29
 
30
30
  if block
@@ -38,10 +38,10 @@ module Horsefield
38
38
  else
39
39
  if many
40
40
  base_elements(selector).map do |e|
41
- Horsefield::Node.new(e).process(&block)
41
+ Horsefield::Node.new(e, type).process(&block)
42
42
  end
43
43
  else
44
- Horsefield::Node.new(base_elements(selector).first).process(&block)
44
+ Horsefield::Node.new(base_elements(selector).first, type).process(&block)
45
45
  end
46
46
  end
47
47
  end
@@ -1,3 +1,3 @@
1
1
  module Horsefield
2
- VERSION = "0.2.4"
2
+ VERSION = "0.2.5"
3
3
  end
data/spec/scraper_spec.rb CHANGED
@@ -31,6 +31,18 @@ describe Horsefield::Scraper do
31
31
  result[:job][:missing].should be_nil
32
32
  end
33
33
 
34
+ it 'can return HTML instead of text' do
35
+ result = Horsefield::Scraper.new.scrape html: @html do
36
+ one :job, '.listingsTable .odd, .listingsTable .even' do
37
+ one :title, '.jobTitleContainer', :html
38
+ one :company, '.companyContainer'
39
+ one :missing, '.doesNotExist'
40
+ end
41
+ end
42
+
43
+ result[:job][:title].should match(/<div class=\"jobTitleContainer\">/)
44
+ end
45
+
34
46
  it 'works with Watir' do
35
47
  browser = Watir::Browser.new :phantomjs
36
48
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: horsefield
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erik Strömberg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-03 00:00:00.000000000 Z
11
+ date: 2013-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -166,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
166
  version: '0'
167
167
  requirements: []
168
168
  rubyforge_project:
169
- rubygems_version: 2.0.0
169
+ rubygems_version: 2.1.9
170
170
  signing_key:
171
171
  specification_version: 4
172
172
  summary: It's a scraper