web-page-parser 0.22 → 0.23
Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,7 @@
|
|
2
2
|
module WebPageParser
|
3
3
|
|
4
4
|
class BbcNewsPageParserFactory < WebPageParser::ParserFactory
|
5
|
-
URL_RE = ORegexp.new("(www|news)\.bbc\.co\.uk/.+/([a-z]+-)?[0-9]+(\.stm)?$")
|
5
|
+
URL_RE = ORegexp.new("(www|news)\.bbc\.co\.uk/.+/([a-z-]+-)?[0-9]+(\.stm)?$")
|
6
6
|
INVALID_URL_RE = ORegexp.new("in_pictures|pop_ups|sport1")
|
7
7
|
|
8
8
|
def self.can_parse?(options)
|
@@ -13,7 +13,8 @@ describe BbcNewsPageParserFactory do
|
|
13
13
|
"http://news.bbc.co.uk/1/hi/england/derbyshire/7996494.stm",
|
14
14
|
"http://news.bbc.co.uk/2/low/uk_news/england/devon/7996447.stm",
|
15
15
|
"http://www.bbc.co.uk/news/business-11125504",
|
16
|
-
"http://www.bbc.co.uk/news/10604897"
|
16
|
+
"http://www.bbc.co.uk/news/10604897",
|
17
|
+
"http://www.bbc.co.uk/news/world-middle-east-13373006"
|
17
18
|
]
|
18
19
|
@invalid_urls = [
|
19
20
|
"http://news.bbc.co.uk/2/hi/health/default.stm",
|
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: web-page-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 37
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: "0.
|
8
|
+
- 23
|
9
|
+
version: "0.23"
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- John Leach
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-05-
|
17
|
+
date: 2011-05-15 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|