newly 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +4 -3
- data/VERSION +1 -1
- data/lib/newly.rb +5 -15
- data/newly.gemspec +2 -2
- data/spec/newly_spec.rb +18 -12
- metadata +15 -15
data/README.rdoc
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
= newly
|
2
2
|
|
3
|
-
Fetching breaking news from websites
|
3
|
+
==Fetching breaking news from websites
|
4
|
+
* Based on Nokogiri https://github.com/sparklemotion/nokogiri
|
4
5
|
|
5
6
|
== SYNOPSIS:
|
6
7
|
# Fecthing breaking news from some website
|
7
8
|
reader = Newly.new('http://g1.globo.com/bahia/')
|
8
9
|
news = reader.highlights(
|
9
10
|
selector: '#ultimas-regiao div, #ultimas-regiao ul li',
|
10
|
-
|
11
|
+
href: 'a',
|
11
12
|
date: '.data-hora',
|
12
13
|
title: '.titulo',
|
13
14
|
subtitle: '.subtitulo',
|
14
|
-
|
15
|
+
img: 'img')
|
15
16
|
|
16
17
|
# Presentation News
|
17
18
|
news.each |n| do
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.1.0
|
data/lib/newly.rb
CHANGED
@@ -6,13 +6,9 @@ class Newly
|
|
6
6
|
|
7
7
|
attr_reader :title, :selector, :url
|
8
8
|
|
9
|
-
def initialize(url,
|
9
|
+
def initialize(url, selector=Nokogiri::HTML(open(url)))
|
10
10
|
@url = url
|
11
|
-
|
12
|
-
@selector = Nokogiri::HTML.parse(File.read(html_file))
|
13
|
-
else
|
14
|
-
@selector = Nokogiri::HTML(open(url))
|
15
|
-
end
|
11
|
+
@selector = selector
|
16
12
|
@title = @selector.at_css("title").text
|
17
13
|
end
|
18
14
|
|
@@ -20,25 +16,19 @@ class Newly
|
|
20
16
|
news = Array.new
|
21
17
|
@selector.css(args[:selector]).each do |item|
|
22
18
|
if (item)
|
23
|
-
|
24
|
-
|
25
|
-
# doc = Nokogiri::HTML(open(url))
|
26
|
-
# keywords = doc.xpath("//meta[@name='Keywords']/@content") if doc
|
27
|
-
keywords = nil
|
28
|
-
|
19
|
+
href = item.css(args[:href]).map { |doc| doc['href'] }.first if args[:href]
|
29
20
|
date = item.css(args[:date]).text if args[:date]
|
30
21
|
title = item.css(args[:title]).text if args[:title]
|
31
22
|
subtitle = item.css(args[:subtitle]).text if args[:subtitle]
|
32
|
-
|
23
|
+
img = item.css(args[:img]).map { |doc| doc['src'] }.first if args[:img]
|
33
24
|
if (args[:host])
|
34
25
|
host = args[:host]
|
35
26
|
url = "#{host}/#{url}".gsub('../', '') if url
|
36
27
|
image = "#{host}/#{image}".gsub('../', '') if image && image.include?('../')
|
37
28
|
end
|
38
|
-
news << News.new(url:
|
29
|
+
news << News.new(url: href, keywords: keywords, date: date, title: title, subtitle: subtitle, image: img)
|
39
30
|
end
|
40
31
|
end
|
41
32
|
news
|
42
33
|
end
|
43
|
-
|
44
34
|
end
|
data/newly.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "newly"
|
8
|
-
s.version = "1.
|
8
|
+
s.version = "1.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Alab\u{ea} Duarte"]
|
12
|
-
s.date = "2012-08-
|
12
|
+
s.date = "2012-08-14"
|
13
13
|
s.description = "Fetching breaking news from websites"
|
14
14
|
s.email = "alabeduarte@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
data/spec/newly_spec.rb
CHANGED
@@ -2,10 +2,11 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
2
|
# require 'spec_helper'
|
3
3
|
|
4
4
|
describe Newly do
|
5
|
-
let(:
|
6
|
-
let(:
|
7
|
-
let(:
|
8
|
-
let(:
|
5
|
+
let(:selector) { Nokogiri::HTML }
|
6
|
+
let(:ec_bahia) { Newly.new('http://www.ecbahia.com', parse('spec/html/ecbahia.html')) }
|
7
|
+
let(:g1) { Newly.new('http://g1.globo.com', parse('spec/html/g1.html')) }
|
8
|
+
let(:g1_bahia) { Newly.new('http://g1.globo.com/bahia/', parse('spec/html/g1_bahia.html')) }
|
9
|
+
let(:metro1) { Newly.new('http://www.metro1.com.br/portal/?varSession=noticia&varEditoria=cidade', parse('spec/html/metro1_cidade.html')) }
|
9
10
|
|
10
11
|
it "should fetch ecbahia title" do
|
11
12
|
ec_bahia.title.should == "ecbahia.com - \u00e9 goleada tricolor na internet! (ecbahia, ecbahia.com, ecbahia.com.br, Esporte Clube Bahia)"
|
@@ -13,11 +14,11 @@ describe Newly do
|
|
13
14
|
|
14
15
|
it "should fetch highlights from http://g1.globo.com/bahia" do
|
15
16
|
highlights = g1_bahia.highlights( selector: '#ultimas-regiao div, #ultimas-regiao ul li',
|
16
|
-
|
17
|
+
href: 'a',
|
17
18
|
date: '.data-hora',
|
18
19
|
title: '.titulo',
|
19
20
|
subtitle: '.subtitulo',
|
20
|
-
|
21
|
+
img: 'img'
|
21
22
|
)
|
22
23
|
highlights.should_not be_empty
|
23
24
|
end
|
@@ -25,10 +26,10 @@ describe Newly do
|
|
25
26
|
context "fetching news from http://g1.globo.com" do
|
26
27
|
it "should fetch highlights news" do
|
27
28
|
highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
|
28
|
-
|
29
|
+
href: 'a',
|
29
30
|
title: '.chapeu',
|
30
31
|
subtitle: '.subtitulo',
|
31
|
-
|
32
|
+
img: '.foto a img'
|
32
33
|
)
|
33
34
|
highlights.should_not be_empty
|
34
35
|
highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
|
@@ -43,10 +44,10 @@ describe Newly do
|
|
43
44
|
|
44
45
|
xit "should fetch keywords" do
|
45
46
|
highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
|
46
|
-
|
47
|
+
href: 'a',
|
47
48
|
title: '.chapeu',
|
48
49
|
subtitle: '.subtitulo',
|
49
|
-
|
50
|
+
img: '.foto a img'
|
50
51
|
)
|
51
52
|
highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
|
52
53
|
highlights[0].keywords.should == 'noticias, noticia, Mundo'
|
@@ -55,13 +56,18 @@ describe Newly do
|
|
55
56
|
|
56
57
|
it "should fetch highlights from http://www.metro1.com.br" do
|
57
58
|
highlights = metro1.highlights( selector: '#lista-de-resultados .resultado',
|
58
|
-
|
59
|
+
href: 'a',
|
59
60
|
date: '.resultado-data',
|
60
61
|
title: '.resultado-titulo',
|
61
62
|
subtitle: '.resultado-texto',
|
62
|
-
|
63
|
+
img: 'a img.img-resultado',
|
63
64
|
host: 'http://www.metro1.com.br'
|
64
65
|
)
|
65
66
|
highlights.should_not be_empty
|
66
67
|
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def parse(path)
|
71
|
+
selector.parse(File.read(path))
|
72
|
+
end
|
67
73
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: newly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &70132223688900 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70132223688900
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &70132223687300 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.8.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70132223687300
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rdoc
|
38
|
-
requirement: &
|
38
|
+
requirement: &70132223684620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '3.12'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70132223684620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: bundler
|
49
|
-
requirement: &
|
49
|
+
requirement: &70132223715700 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.1.5
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70132223715700
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: jeweler
|
60
|
-
requirement: &
|
60
|
+
requirement: &70132223714340 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.8.4
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70132223714340
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: simplecov
|
71
|
-
requirement: &
|
71
|
+
requirement: &70132223712120 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70132223712120
|
80
80
|
description: Fetching breaking news from websites
|
81
81
|
email: alabeduarte@gmail.com
|
82
82
|
executables: []
|
@@ -119,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
119
119
|
version: '0'
|
120
120
|
segments:
|
121
121
|
- 0
|
122
|
-
hash:
|
122
|
+
hash: 4551793471179022495
|
123
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
124
|
none: false
|
125
125
|
requirements:
|