newly 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +4 -3
- data/VERSION +1 -1
- data/lib/newly.rb +5 -15
- data/newly.gemspec +2 -2
- data/spec/newly_spec.rb +18 -12
- metadata +15 -15
data/README.rdoc
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
= newly
|
2
2
|
|
3
|
-
Fetching breaking news from websites
|
3
|
+
==Fetching breaking news from websites
|
4
|
+
* Based on Nokogiri https://github.com/sparklemotion/nokogiri
|
4
5
|
|
5
6
|
== SYNOPSIS:
|
6
7
|
# Fecthing breaking news from some website
|
7
8
|
reader = Newly.new('http://g1.globo.com/bahia/')
|
8
9
|
news = reader.highlights(
|
9
10
|
selector: '#ultimas-regiao div, #ultimas-regiao ul li',
|
10
|
-
|
11
|
+
href: 'a',
|
11
12
|
date: '.data-hora',
|
12
13
|
title: '.titulo',
|
13
14
|
subtitle: '.subtitulo',
|
14
|
-
|
15
|
+
img: 'img')
|
15
16
|
|
16
17
|
# Presentation News
|
17
18
|
news.each |n| do
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.1.0
|
data/lib/newly.rb
CHANGED
@@ -6,13 +6,9 @@ class Newly
|
|
6
6
|
|
7
7
|
attr_reader :title, :selector, :url
|
8
8
|
|
9
|
-
def initialize(url,
|
9
|
+
def initialize(url, selector=Nokogiri::HTML(open(url)))
|
10
10
|
@url = url
|
11
|
-
|
12
|
-
@selector = Nokogiri::HTML.parse(File.read(html_file))
|
13
|
-
else
|
14
|
-
@selector = Nokogiri::HTML(open(url))
|
15
|
-
end
|
11
|
+
@selector = selector
|
16
12
|
@title = @selector.at_css("title").text
|
17
13
|
end
|
18
14
|
|
@@ -20,25 +16,19 @@ class Newly
|
|
20
16
|
news = Array.new
|
21
17
|
@selector.css(args[:selector]).each do |item|
|
22
18
|
if (item)
|
23
|
-
|
24
|
-
|
25
|
-
# doc = Nokogiri::HTML(open(url))
|
26
|
-
# keywords = doc.xpath("//meta[@name='Keywords']/@content") if doc
|
27
|
-
keywords = nil
|
28
|
-
|
19
|
+
href = item.css(args[:href]).map { |doc| doc['href'] }.first if args[:href]
|
29
20
|
date = item.css(args[:date]).text if args[:date]
|
30
21
|
title = item.css(args[:title]).text if args[:title]
|
31
22
|
subtitle = item.css(args[:subtitle]).text if args[:subtitle]
|
32
|
-
|
23
|
+
img = item.css(args[:img]).map { |doc| doc['src'] }.first if args[:img]
|
33
24
|
if (args[:host])
|
34
25
|
host = args[:host]
|
35
26
|
url = "#{host}/#{url}".gsub('../', '') if url
|
36
27
|
image = "#{host}/#{image}".gsub('../', '') if image && image.include?('../')
|
37
28
|
end
|
38
|
-
news << News.new(url:
|
29
|
+
news << News.new(url: href, keywords: keywords, date: date, title: title, subtitle: subtitle, image: img)
|
39
30
|
end
|
40
31
|
end
|
41
32
|
news
|
42
33
|
end
|
43
|
-
|
44
34
|
end
|
data/newly.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "newly"
|
8
|
-
s.version = "1.
|
8
|
+
s.version = "1.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Alab\u{ea} Duarte"]
|
12
|
-
s.date = "2012-08-
|
12
|
+
s.date = "2012-08-14"
|
13
13
|
s.description = "Fetching breaking news from websites"
|
14
14
|
s.email = "alabeduarte@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
data/spec/newly_spec.rb
CHANGED
@@ -2,10 +2,11 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
2
|
# require 'spec_helper'
|
3
3
|
|
4
4
|
describe Newly do
|
5
|
-
let(:
|
6
|
-
let(:
|
7
|
-
let(:
|
8
|
-
let(:
|
5
|
+
let(:selector) { Nokogiri::HTML }
|
6
|
+
let(:ec_bahia) { Newly.new('http://www.ecbahia.com', parse('spec/html/ecbahia.html')) }
|
7
|
+
let(:g1) { Newly.new('http://g1.globo.com', parse('spec/html/g1.html')) }
|
8
|
+
let(:g1_bahia) { Newly.new('http://g1.globo.com/bahia/', parse('spec/html/g1_bahia.html')) }
|
9
|
+
let(:metro1) { Newly.new('http://www.metro1.com.br/portal/?varSession=noticia&varEditoria=cidade', parse('spec/html/metro1_cidade.html')) }
|
9
10
|
|
10
11
|
it "should fetch ecbahia title" do
|
11
12
|
ec_bahia.title.should == "ecbahia.com - \u00e9 goleada tricolor na internet! (ecbahia, ecbahia.com, ecbahia.com.br, Esporte Clube Bahia)"
|
@@ -13,11 +14,11 @@ describe Newly do
|
|
13
14
|
|
14
15
|
it "should fetch highlights from http://g1.globo.com/bahia" do
|
15
16
|
highlights = g1_bahia.highlights( selector: '#ultimas-regiao div, #ultimas-regiao ul li',
|
16
|
-
|
17
|
+
href: 'a',
|
17
18
|
date: '.data-hora',
|
18
19
|
title: '.titulo',
|
19
20
|
subtitle: '.subtitulo',
|
20
|
-
|
21
|
+
img: 'img'
|
21
22
|
)
|
22
23
|
highlights.should_not be_empty
|
23
24
|
end
|
@@ -25,10 +26,10 @@ describe Newly do
|
|
25
26
|
context "fetching news from http://g1.globo.com" do
|
26
27
|
it "should fetch highlights news" do
|
27
28
|
highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
|
28
|
-
|
29
|
+
href: 'a',
|
29
30
|
title: '.chapeu',
|
30
31
|
subtitle: '.subtitulo',
|
31
|
-
|
32
|
+
img: '.foto a img'
|
32
33
|
)
|
33
34
|
highlights.should_not be_empty
|
34
35
|
highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
|
@@ -43,10 +44,10 @@ describe Newly do
|
|
43
44
|
|
44
45
|
xit "should fetch keywords" do
|
45
46
|
highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
|
46
|
-
|
47
|
+
href: 'a',
|
47
48
|
title: '.chapeu',
|
48
49
|
subtitle: '.subtitulo',
|
49
|
-
|
50
|
+
img: '.foto a img'
|
50
51
|
)
|
51
52
|
highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
|
52
53
|
highlights[0].keywords.should == 'noticias, noticia, Mundo'
|
@@ -55,13 +56,18 @@ describe Newly do
|
|
55
56
|
|
56
57
|
it "should fetch highlights from http://www.metro1.com.br" do
|
57
58
|
highlights = metro1.highlights( selector: '#lista-de-resultados .resultado',
|
58
|
-
|
59
|
+
href: 'a',
|
59
60
|
date: '.resultado-data',
|
60
61
|
title: '.resultado-titulo',
|
61
62
|
subtitle: '.resultado-texto',
|
62
|
-
|
63
|
+
img: 'a img.img-resultado',
|
63
64
|
host: 'http://www.metro1.com.br'
|
64
65
|
)
|
65
66
|
highlights.should_not be_empty
|
66
67
|
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def parse(path)
|
71
|
+
selector.parse(File.read(path))
|
72
|
+
end
|
67
73
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: newly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &70132223688900 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70132223688900
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &70132223687300 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 2.8.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70132223687300
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rdoc
|
38
|
-
requirement: &
|
38
|
+
requirement: &70132223684620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '3.12'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70132223684620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: bundler
|
49
|
-
requirement: &
|
49
|
+
requirement: &70132223715700 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 1.1.5
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70132223715700
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: jeweler
|
60
|
-
requirement: &
|
60
|
+
requirement: &70132223714340 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 1.8.4
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70132223714340
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: simplecov
|
71
|
-
requirement: &
|
71
|
+
requirement: &70132223712120 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70132223712120
|
80
80
|
description: Fetching breaking news from websites
|
81
81
|
email: alabeduarte@gmail.com
|
82
82
|
executables: []
|
@@ -119,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
119
119
|
version: '0'
|
120
120
|
segments:
|
121
121
|
- 0
|
122
|
-
hash:
|
122
|
+
hash: 4551793471179022495
|
123
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
124
|
none: false
|
125
125
|
requirements:
|