web-page-parser 0.22 → 0.23

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,7 @@
2
2
  module WebPageParser
3
3
 
4
4
  class BbcNewsPageParserFactory < WebPageParser::ParserFactory
5
- URL_RE = ORegexp.new("(www|news)\.bbc\.co\.uk/.+/([a-z]+-)?[0-9]+(\.stm)?$")
5
+ URL_RE = ORegexp.new("(www|news)\.bbc\.co\.uk/.+/([a-z-]+-)?[0-9]+(\.stm)?$")
6
6
  INVALID_URL_RE = ORegexp.new("in_pictures|pop_ups|sport1")
7
7
 
8
8
  def self.can_parse?(options)
@@ -13,7 +13,8 @@ describe BbcNewsPageParserFactory do
13
13
  "http://news.bbc.co.uk/1/hi/england/derbyshire/7996494.stm",
14
14
  "http://news.bbc.co.uk/2/low/uk_news/england/devon/7996447.stm",
15
15
  "http://www.bbc.co.uk/news/business-11125504",
16
- "http://www.bbc.co.uk/news/10604897"
16
+ "http://www.bbc.co.uk/news/10604897",
17
+ "http://www.bbc.co.uk/news/world-middle-east-13373006"
17
18
  ]
18
19
  @invalid_urls = [
19
20
  "http://news.bbc.co.uk/2/hi/health/default.stm",
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: web-page-parser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 39
4
+ hash: 37
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 22
9
- version: "0.22"
8
+ - 23
9
+ version: "0.23"
10
10
  platform: ruby
11
11
  authors:
12
12
  - John Leach
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-05-08 00:00:00 +01:00
17
+ date: 2011-05-15 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency