newly 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. data/README.rdoc +4 -3
  2. data/VERSION +1 -1
  3. data/lib/newly.rb +5 -15
  4. data/newly.gemspec +2 -2
  5. data/spec/newly_spec.rb +18 -12
  6. metadata +15 -15
data/README.rdoc CHANGED
@@ -1,17 +1,18 @@
1
1
  = newly
2
2
 
3
- Fetching breaking news from websites
3
+ ==Fetching breaking news from websites
4
+ * Based on Nokogiri https://github.com/sparklemotion/nokogiri
4
5
 
5
6
  == SYNOPSIS:
6
7
  # Fecthing breaking news from some website
7
8
  reader = Newly.new('http://g1.globo.com/bahia/')
8
9
  news = reader.highlights(
9
10
  selector: '#ultimas-regiao div, #ultimas-regiao ul li',
10
- url: 'a',
11
+ href: 'a',
11
12
  date: '.data-hora',
12
13
  title: '.titulo',
13
14
  subtitle: '.subtitulo',
14
- image: 'img')
15
+ img: 'img')
15
16
 
16
17
  # Presentation News
17
18
  news.each |n| do
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.0
1
+ 1.1.0
data/lib/newly.rb CHANGED
@@ -6,13 +6,9 @@ class Newly
6
6
 
7
7
  attr_reader :title, :selector, :url
8
8
 
9
- def initialize(url, html_file=nil)
9
+ def initialize(url, selector=Nokogiri::HTML(open(url)))
10
10
  @url = url
11
- if (html_file)
12
- @selector = Nokogiri::HTML.parse(File.read(html_file))
13
- else
14
- @selector = Nokogiri::HTML(open(url))
15
- end
11
+ @selector = selector
16
12
  @title = @selector.at_css("title").text
17
13
  end
18
14
 
@@ -20,25 +16,19 @@ class Newly
20
16
  news = Array.new
21
17
  @selector.css(args[:selector]).each do |item|
22
18
  if (item)
23
- url = item.css(args[:url]).map { |doc| doc['href'] }.first if args[:url]
24
-
25
- # doc = Nokogiri::HTML(open(url))
26
- # keywords = doc.xpath("//meta[@name='Keywords']/@content") if doc
27
- keywords = nil
28
-
19
+ href = item.css(args[:href]).map { |doc| doc['href'] }.first if args[:href]
29
20
  date = item.css(args[:date]).text if args[:date]
30
21
  title = item.css(args[:title]).text if args[:title]
31
22
  subtitle = item.css(args[:subtitle]).text if args[:subtitle]
32
- image = item.css(args[:image]).map { |doc| doc['src'] }.first if args[:image]
23
+ img = item.css(args[:img]).map { |doc| doc['src'] }.first if args[:img]
33
24
  if (args[:host])
34
25
  host = args[:host]
35
26
  url = "#{host}/#{url}".gsub('../', '') if url
36
27
  image = "#{host}/#{image}".gsub('../', '') if image && image.include?('../')
37
28
  end
38
- news << News.new(url: url, keywords: keywords, date: date, title: title, subtitle: subtitle, image: image)
29
+ news << News.new(url: href, keywords: keywords, date: date, title: title, subtitle: subtitle, image: img)
39
30
  end
40
31
  end
41
32
  news
42
33
  end
43
-
44
34
  end
data/newly.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "newly"
8
- s.version = "1.0.0"
8
+ s.version = "1.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Alab\u{ea} Duarte"]
12
- s.date = "2012-08-13"
12
+ s.date = "2012-08-14"
13
13
  s.description = "Fetching breaking news from websites"
14
14
  s.email = "alabeduarte@gmail.com"
15
15
  s.extra_rdoc_files = [
data/spec/newly_spec.rb CHANGED
@@ -2,10 +2,11 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
  # require 'spec_helper'
3
3
 
4
4
  describe Newly do
5
- let(:ec_bahia) { ec_bahia = Newly.new('http://www.ecbahia.com', 'spec/html/ecbahia.html') }
6
- let(:g1) { g1_bahia = Newly.new('http://g1.globo.com', 'spec/html/g1.html') }
7
- let(:g1_bahia) { g1_bahia = Newly.new('http://g1.globo.com/bahia/', 'spec/html/g1_bahia.html') }
8
- let(:metro1) { g1_bahia = Newly.new('http://www.metro1.com.br/portal/?varSession=noticia&varEditoria=cidade', 'spec/html/metro1_cidade.html') }
5
+ let(:selector) { Nokogiri::HTML }
6
+ let(:ec_bahia) { Newly.new('http://www.ecbahia.com', parse('spec/html/ecbahia.html')) }
7
+ let(:g1) { Newly.new('http://g1.globo.com', parse('spec/html/g1.html')) }
8
+ let(:g1_bahia) { Newly.new('http://g1.globo.com/bahia/', parse('spec/html/g1_bahia.html')) }
9
+ let(:metro1) { Newly.new('http://www.metro1.com.br/portal/?varSession=noticia&varEditoria=cidade', parse('spec/html/metro1_cidade.html')) }
9
10
 
10
11
  it "should fetch ecbahia title" do
11
12
  ec_bahia.title.should == "ecbahia.com - \u00e9 goleada tricolor na internet! (ecbahia, ecbahia.com, ecbahia.com.br, Esporte Clube Bahia)"
@@ -13,11 +14,11 @@ describe Newly do
13
14
 
14
15
  it "should fetch highlights from http://g1.globo.com/bahia" do
15
16
  highlights = g1_bahia.highlights( selector: '#ultimas-regiao div, #ultimas-regiao ul li',
16
- url: 'a',
17
+ href: 'a',
17
18
  date: '.data-hora',
18
19
  title: '.titulo',
19
20
  subtitle: '.subtitulo',
20
- image: 'img'
21
+ img: 'img'
21
22
  )
22
23
  highlights.should_not be_empty
23
24
  end
@@ -25,10 +26,10 @@ describe Newly do
25
26
  context "fetching news from http://g1.globo.com" do
26
27
  it "should fetch highlights news" do
27
28
  highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
28
- url: 'a',
29
+ href: 'a',
29
30
  title: '.chapeu',
30
31
  subtitle: '.subtitulo',
31
- image: '.foto a img'
32
+ img: '.foto a img'
32
33
  )
33
34
  highlights.should_not be_empty
34
35
  highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
@@ -43,10 +44,10 @@ describe Newly do
43
44
 
44
45
  xit "should fetch keywords" do
45
46
  highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
46
- url: 'a',
47
+ href: 'a',
47
48
  title: '.chapeu',
48
49
  subtitle: '.subtitulo',
49
- image: '.foto a img'
50
+ img: '.foto a img'
50
51
  )
51
52
  highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
52
53
  highlights[0].keywords.should == 'noticias, noticia, Mundo'
@@ -55,13 +56,18 @@ describe Newly do
55
56
 
56
57
  it "should fetch highlights from http://www.metro1.com.br" do
57
58
  highlights = metro1.highlights( selector: '#lista-de-resultados .resultado',
58
- url: 'a',
59
+ href: 'a',
59
60
  date: '.resultado-data',
60
61
  title: '.resultado-titulo',
61
62
  subtitle: '.resultado-texto',
62
- image: 'a img.img-resultado',
63
+ img: 'a img.img-resultado',
63
64
  host: 'http://www.metro1.com.br'
64
65
  )
65
66
  highlights.should_not be_empty
66
67
  end
68
+
69
+ private
70
+ def parse(path)
71
+ selector.parse(File.read(path))
72
+ end
67
73
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: newly
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-13 00:00:00.000000000 Z
12
+ date: 2012-08-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70275429701820 !ruby/object:Gem::Requirement
16
+ requirement: &70132223688900 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70275429701820
24
+ version_requirements: *70132223688900
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &70275429699520 !ruby/object:Gem::Requirement
27
+ requirement: &70132223687300 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 2.8.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70275429699520
35
+ version_requirements: *70132223687300
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rdoc
38
- requirement: &70275429697780 !ruby/object:Gem::Requirement
38
+ requirement: &70132223684620 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '3.12'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70275429697780
46
+ version_requirements: *70132223684620
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: bundler
49
- requirement: &70275429696240 !ruby/object:Gem::Requirement
49
+ requirement: &70132223715700 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.1.5
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *70275429696240
57
+ version_requirements: *70132223715700
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: jeweler
60
- requirement: &70275429694540 !ruby/object:Gem::Requirement
60
+ requirement: &70132223714340 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.8.4
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *70275429694540
68
+ version_requirements: *70132223714340
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: simplecov
71
- requirement: &70275429735780 !ruby/object:Gem::Requirement
71
+ requirement: &70132223712120 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *70275429735780
79
+ version_requirements: *70132223712120
80
80
  description: Fetching breaking news from websites
81
81
  email: alabeduarte@gmail.com
82
82
  executables: []
@@ -119,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
119
119
  version: '0'
120
120
  segments:
121
121
  - 0
122
- hash: -2351285680129146534
122
+ hash: 4551793471179022495
123
123
  required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  none: false
125
125
  requirements: