newly 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -11
- data/Gemfile.lock +53 -26
- data/README.md +38 -0
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/newly/feed.rb +19 -0
- data/lib/newly/news.rb +16 -0
- data/lib/newly/news_crawler.rb +42 -0
- data/lib/newly/page_crawler.rb +51 -0
- data/lib/newly/selector.rb +17 -0
- data/lib/newly.rb +2 -34
- data/newly.gemspec +26 -30
- data/spec/html/page_spec.html +51 -0
- data/spec/newly/news_crawler_spec.rb +99 -0
- data/spec/newly/page_crawler_spec.rb +52 -0
- data/spec/spec_helper.rb +5 -9
- metadata +42 -42
- data/README.rdoc +0 -39
- data/lib/news.rb +0 -12
- data/spec/html/ecbahia.html +0 -780
- data/spec/html/g1.html +0 -4988
- data/spec/html/g1_bahia.html +0 -4481
- data/spec/html/metro1_cidade.html +0 -2404
- data/spec/newly_spec.rb +0 -73
data/spec/newly_spec.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
-
# require 'spec_helper'
|
3
|
-
|
4
|
-
describe Newly do
|
5
|
-
let(:selector) { Nokogiri::HTML }
|
6
|
-
let(:ec_bahia) { Newly.new('http://www.ecbahia.com', parse('spec/html/ecbahia.html')) }
|
7
|
-
let(:g1) { Newly.new('http://g1.globo.com', parse('spec/html/g1.html')) }
|
8
|
-
let(:g1_bahia) { Newly.new('http://g1.globo.com/bahia/', parse('spec/html/g1_bahia.html')) }
|
9
|
-
let(:metro1) { Newly.new('http://www.metro1.com.br/portal/?varSession=noticia&varEditoria=cidade', parse('spec/html/metro1_cidade.html')) }
|
10
|
-
|
11
|
-
it "should fetch ecbahia title" do
|
12
|
-
ec_bahia.title.should == "ecbahia.com - \u00e9 goleada tricolor na internet! (ecbahia, ecbahia.com, ecbahia.com.br, Esporte Clube Bahia)"
|
13
|
-
end
|
14
|
-
|
15
|
-
it "should fetch highlights from http://g1.globo.com/bahia" do
|
16
|
-
highlights = g1_bahia.highlights( selector: '#ultimas-regiao div, #ultimas-regiao ul li',
|
17
|
-
href: 'a',
|
18
|
-
date: '.data-hora',
|
19
|
-
title: '.titulo',
|
20
|
-
subtitle: '.subtitulo',
|
21
|
-
img: 'img'
|
22
|
-
)
|
23
|
-
highlights.should_not be_empty
|
24
|
-
end
|
25
|
-
|
26
|
-
context "fetching news from http://g1.globo.com" do
|
27
|
-
it "should fetch highlights news" do
|
28
|
-
highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
|
29
|
-
href: 'a',
|
30
|
-
title: '.chapeu',
|
31
|
-
subtitle: '.subtitulo',
|
32
|
-
img: '.foto a img'
|
33
|
-
)
|
34
|
-
highlights.should_not be_empty
|
35
|
-
highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
|
36
|
-
highlights[0].subtitle.should == 'Tremores deixaram 1.380 pessoas feridas.'
|
37
|
-
|
38
|
-
highlights[1].url.should == 'http://g1.globo.com/politica/mensalao/noticia/2012/08/historias-de-togas-e-becas-alimentam-folclore-de-tribunais-veja-algumas.html'
|
39
|
-
highlights[1].title.should == 'julgamento no stf'
|
40
|
-
|
41
|
-
highlights[2].url.should == 'http://g1.globo.com/concursos-e-emprego/noticia/2012/08/fazenda-e-9-orgaos-abrem-inscricoes-para-12-mil-vagas-na-segunda.html'
|
42
|
-
highlights[2].title.should == 'a partir de amanha'
|
43
|
-
end
|
44
|
-
|
45
|
-
xit "should fetch keywords" do
|
46
|
-
highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
|
47
|
-
href: 'a',
|
48
|
-
title: '.chapeu',
|
49
|
-
subtitle: '.subtitulo',
|
50
|
-
img: '.foto a img'
|
51
|
-
)
|
52
|
-
highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
|
53
|
-
highlights[0].keywords.should == 'noticias, noticia, Mundo'
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
it "should fetch highlights from http://www.metro1.com.br" do
|
58
|
-
highlights = metro1.highlights( selector: '#lista-de-resultados .resultado',
|
59
|
-
href: 'a',
|
60
|
-
date: '.resultado-data',
|
61
|
-
title: '.resultado-titulo',
|
62
|
-
subtitle: '.resultado-texto',
|
63
|
-
img: 'a img.img-resultado',
|
64
|
-
host: 'http://www.metro1.com.br'
|
65
|
-
)
|
66
|
-
highlights.should_not be_empty
|
67
|
-
end
|
68
|
-
|
69
|
-
private
|
70
|
-
def parse(path)
|
71
|
-
selector.parse(File.read(path))
|
72
|
-
end
|
73
|
-
end
|