web-page-parser 0.21 → 0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  $:.unshift File.join(File.dirname(__FILE__), '../../lib')
2
3
  require 'spec/base_parser_spec'
3
4
  require 'web-page-parser'
@@ -69,6 +70,18 @@ describe BbcNewsPageParserV4 do
69
70
  @pa.content.size.should == 18
70
71
  end
71
72
 
73
+ it "should parse the content of an article with market data" do
74
+ @pa = BbcNewsPageParserV4.new(:page => File.read('spec/fixtures/bbc_news/13293006.html'))
75
+ @pa.content.to_s.should_not =~ /Market Data/
76
+ @pa.content.to_s.should_not =~ /Last updated at/
77
+ @pa.content.size.should == 13
78
+ end
79
+
80
+ it "should ignore embedded-hyper content" do
81
+ @pa = BbcNewsPageParserV4.new(:page => File.read('spec/fixtures/bbc_news/12921632.html'))
82
+ @pa.content.to_s.should_not =~ /Fake and real quotes/
83
+ end
84
+
72
85
  end
73
86
 
74
87
 
@@ -98,8 +111,6 @@ describe BbcNewsPageParserV3 do
98
111
  @pa.content[1].should == "These include an £80m loan to Sheffield Forgemasters and new programmes for the young unemployed, Chief Secretary to the Treasury Danny Alexander told MPs."
99
112
  @pa.content[2].should == 'Mr Alexander said the cuts were necessary to tackle the budget deficit and would be done in a "fair" way.'
100
113
  end
101
-
102
-
103
114
 
104
115
  end
105
116
 
data/spec/spec.opts CHANGED
@@ -1,4 +1,2 @@
1
1
  --colour
2
2
  --format s
3
- --loadby mtime
4
- --reverse
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web-page-parser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 33
4
+ hash: 39
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 21
9
- version: "0.21"
8
+ - 22
9
+ version: "0.22"
10
10
  platform: ruby
11
11
  authors:
12
12
  - John Leach
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-08-30 00:00:00 +01:00
17
+ date: 2011-05-08 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -75,6 +75,8 @@ files:
75
75
  - spec/fixtures/bbc_news/8063681.stm.html
76
76
  - spec/fixtures/bbc_news/10249066.stm.html
77
77
  - spec/fixtures/bbc_news/8011268.stm.html
78
+ - spec/fixtures/bbc_news/12921632.html
79
+ - spec/fixtures/bbc_news/13293006.html
78
80
  - spec/fixtures/bbc_news/11125504.html
79
81
  - spec/fixtures/bbc_news/6072486.stm.html
80
82
  - spec/spec.opts
@@ -122,6 +124,8 @@ test_files:
122
124
  - spec/fixtures/bbc_news/8063681.stm.html
123
125
  - spec/fixtures/bbc_news/10249066.stm.html
124
126
  - spec/fixtures/bbc_news/8011268.stm.html
127
+ - spec/fixtures/bbc_news/12921632.html
128
+ - spec/fixtures/bbc_news/13293006.html
125
129
  - spec/fixtures/bbc_news/11125504.html
126
130
  - spec/fixtures/bbc_news/6072486.stm.html
127
131
  - spec/spec.opts