web-page-parser 0.21 → 0.22
Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
1
2
|
$:.unshift File.join(File.dirname(__FILE__), '../../lib')
|
2
3
|
require 'spec/base_parser_spec'
|
3
4
|
require 'web-page-parser'
|
@@ -69,6 +70,18 @@ describe BbcNewsPageParserV4 do
|
|
69
70
|
@pa.content.size.should == 18
|
70
71
|
end
|
71
72
|
|
73
|
+
it "should parse the content of an article with market data" do
|
74
|
+
@pa = BbcNewsPageParserV4.new(:page => File.read('spec/fixtures/bbc_news/13293006.html'))
|
75
|
+
@pa.content.to_s.should_not =~ /Market Data/
|
76
|
+
@pa.content.to_s.should_not =~ /Last updated at/
|
77
|
+
@pa.content.size.should == 13
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should ignore embedded-hyper content" do
|
81
|
+
@pa = BbcNewsPageParserV4.new(:page => File.read('spec/fixtures/bbc_news/12921632.html'))
|
82
|
+
@pa.content.to_s.should_not =~ /Fake and real quotes/
|
83
|
+
end
|
84
|
+
|
72
85
|
end
|
73
86
|
|
74
87
|
|
@@ -98,8 +111,6 @@ describe BbcNewsPageParserV3 do
|
|
98
111
|
@pa.content[1].should == "These include an £80m loan to Sheffield Forgemasters and new programmes for the young unemployed, Chief Secretary to the Treasury Danny Alexander told MPs."
|
99
112
|
@pa.content[2].should == 'Mr Alexander said the cuts were necessary to tackle the budget deficit and would be done in a "fair" way.'
|
100
113
|
end
|
101
|
-
|
102
|
-
|
103
114
|
|
104
115
|
end
|
105
116
|
|
data/spec/spec.opts
CHANGED
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web-page-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 39
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: "0.
|
8
|
+
- 22
|
9
|
+
version: "0.22"
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- John Leach
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date:
|
17
|
+
date: 2011-05-08 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -75,6 +75,8 @@ files:
|
|
75
75
|
- spec/fixtures/bbc_news/8063681.stm.html
|
76
76
|
- spec/fixtures/bbc_news/10249066.stm.html
|
77
77
|
- spec/fixtures/bbc_news/8011268.stm.html
|
78
|
+
- spec/fixtures/bbc_news/12921632.html
|
79
|
+
- spec/fixtures/bbc_news/13293006.html
|
78
80
|
- spec/fixtures/bbc_news/11125504.html
|
79
81
|
- spec/fixtures/bbc_news/6072486.stm.html
|
80
82
|
- spec/spec.opts
|
@@ -122,6 +124,8 @@ test_files:
|
|
122
124
|
- spec/fixtures/bbc_news/8063681.stm.html
|
123
125
|
- spec/fixtures/bbc_news/10249066.stm.html
|
124
126
|
- spec/fixtures/bbc_news/8011268.stm.html
|
127
|
+
- spec/fixtures/bbc_news/12921632.html
|
128
|
+
- spec/fixtures/bbc_news/13293006.html
|
125
129
|
- spec/fixtures/bbc_news/11125504.html
|
126
130
|
- spec/fixtures/bbc_news/6072486.stm.html
|
127
131
|
- spec/spec.opts
|