newly 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. data/README.rdoc +4 -3
  2. data/VERSION +1 -1
  3. data/lib/newly.rb +5 -15
  4. data/newly.gemspec +2 -2
  5. data/spec/newly_spec.rb +18 -12
  6. metadata +15 -15
data/README.rdoc CHANGED
@@ -1,17 +1,18 @@
1
1
  = newly
2
2
 
3
- Fetching breaking news from websites
3
+ ==Fetching breaking news from websites
4
+ * Based on Nokogiri https://github.com/sparklemotion/nokogiri
4
5
 
5
6
  == SYNOPSIS:
6
7
  # Fecthing breaking news from some website
7
8
  reader = Newly.new('http://g1.globo.com/bahia/')
8
9
  news = reader.highlights(
9
10
  selector: '#ultimas-regiao div, #ultimas-regiao ul li',
10
- url: 'a',
11
+ href: 'a',
11
12
  date: '.data-hora',
12
13
  title: '.titulo',
13
14
  subtitle: '.subtitulo',
14
- image: 'img')
15
+ img: 'img')
15
16
 
16
17
  # Presentation News
17
18
  news.each |n| do
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.0
1
+ 1.1.0
data/lib/newly.rb CHANGED
@@ -6,13 +6,9 @@ class Newly
6
6
 
7
7
  attr_reader :title, :selector, :url
8
8
 
9
- def initialize(url, html_file=nil)
9
+ def initialize(url, selector=Nokogiri::HTML(open(url)))
10
10
  @url = url
11
- if (html_file)
12
- @selector = Nokogiri::HTML.parse(File.read(html_file))
13
- else
14
- @selector = Nokogiri::HTML(open(url))
15
- end
11
+ @selector = selector
16
12
  @title = @selector.at_css("title").text
17
13
  end
18
14
 
@@ -20,25 +16,19 @@ class Newly
20
16
  news = Array.new
21
17
  @selector.css(args[:selector]).each do |item|
22
18
  if (item)
23
- url = item.css(args[:url]).map { |doc| doc['href'] }.first if args[:url]
24
-
25
- # doc = Nokogiri::HTML(open(url))
26
- # keywords = doc.xpath("//meta[@name='Keywords']/@content") if doc
27
- keywords = nil
28
-
19
+ href = item.css(args[:href]).map { |doc| doc['href'] }.first if args[:href]
29
20
  date = item.css(args[:date]).text if args[:date]
30
21
  title = item.css(args[:title]).text if args[:title]
31
22
  subtitle = item.css(args[:subtitle]).text if args[:subtitle]
32
- image = item.css(args[:image]).map { |doc| doc['src'] }.first if args[:image]
23
+ img = item.css(args[:img]).map { |doc| doc['src'] }.first if args[:img]
33
24
  if (args[:host])
34
25
  host = args[:host]
35
26
  url = "#{host}/#{url}".gsub('../', '') if url
36
27
  image = "#{host}/#{image}".gsub('../', '') if image && image.include?('../')
37
28
  end
38
- news << News.new(url: url, keywords: keywords, date: date, title: title, subtitle: subtitle, image: image)
29
+ news << News.new(url: href, keywords: keywords, date: date, title: title, subtitle: subtitle, image: img)
39
30
  end
40
31
  end
41
32
  news
42
33
  end
43
-
44
34
  end
data/newly.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "newly"
8
- s.version = "1.0.0"
8
+ s.version = "1.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Alab\u{ea} Duarte"]
12
- s.date = "2012-08-13"
12
+ s.date = "2012-08-14"
13
13
  s.description = "Fetching breaking news from websites"
14
14
  s.email = "alabeduarte@gmail.com"
15
15
  s.extra_rdoc_files = [
data/spec/newly_spec.rb CHANGED
@@ -2,10 +2,11 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
  # require 'spec_helper'
3
3
 
4
4
  describe Newly do
5
- let(:ec_bahia) { ec_bahia = Newly.new('http://www.ecbahia.com', 'spec/html/ecbahia.html') }
6
- let(:g1) { g1_bahia = Newly.new('http://g1.globo.com', 'spec/html/g1.html') }
7
- let(:g1_bahia) { g1_bahia = Newly.new('http://g1.globo.com/bahia/', 'spec/html/g1_bahia.html') }
8
- let(:metro1) { g1_bahia = Newly.new('http://www.metro1.com.br/portal/?varSession=noticia&varEditoria=cidade', 'spec/html/metro1_cidade.html') }
5
+ let(:selector) { Nokogiri::HTML }
6
+ let(:ec_bahia) { Newly.new('http://www.ecbahia.com', parse('spec/html/ecbahia.html')) }
7
+ let(:g1) { Newly.new('http://g1.globo.com', parse('spec/html/g1.html')) }
8
+ let(:g1_bahia) { Newly.new('http://g1.globo.com/bahia/', parse('spec/html/g1_bahia.html')) }
9
+ let(:metro1) { Newly.new('http://www.metro1.com.br/portal/?varSession=noticia&varEditoria=cidade', parse('spec/html/metro1_cidade.html')) }
9
10
 
10
11
  it "should fetch ecbahia title" do
11
12
  ec_bahia.title.should == "ecbahia.com - \u00e9 goleada tricolor na internet! (ecbahia, ecbahia.com, ecbahia.com.br, Esporte Clube Bahia)"
@@ -13,11 +14,11 @@ describe Newly do
13
14
 
14
15
  it "should fetch highlights from http://g1.globo.com/bahia" do
15
16
  highlights = g1_bahia.highlights( selector: '#ultimas-regiao div, #ultimas-regiao ul li',
16
- url: 'a',
17
+ href: 'a',
17
18
  date: '.data-hora',
18
19
  title: '.titulo',
19
20
  subtitle: '.subtitulo',
20
- image: 'img'
21
+ img: 'img'
21
22
  )
22
23
  highlights.should_not be_empty
23
24
  end
@@ -25,10 +26,10 @@ describe Newly do
25
26
  context "fetching news from http://g1.globo.com" do
26
27
  it "should fetch highlights news" do
27
28
  highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
28
- url: 'a',
29
+ href: 'a',
29
30
  title: '.chapeu',
30
31
  subtitle: '.subtitulo',
31
- image: '.foto a img'
32
+ img: '.foto a img'
32
33
  )
33
34
  highlights.should_not be_empty
34
35
  highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
@@ -43,10 +44,10 @@ describe Newly do
43
44
 
44
45
  xit "should fetch keywords" do
45
46
  highlights = g1.highlights( selector: '#glb-corpo .glb-area .chamada-principal',
46
- url: 'a',
47
+ href: 'a',
47
48
  title: '.chapeu',
48
49
  subtitle: '.subtitulo',
49
- image: '.foto a img'
50
+ img: '.foto a img'
50
51
  )
51
52
  highlights[0].url.should == 'http://g1.globo.com/mundo/noticia/2012/08/ira-encerra-resgate-apos-terremotos-e-revisa-mortos-para-227-diz-tv-estatal.html'
52
53
  highlights[0].keywords.should == 'noticias, noticia, Mundo'
@@ -55,13 +56,18 @@ describe Newly do
55
56
 
56
57
  it "should fetch highlights from http://www.metro1.com.br" do
57
58
  highlights = metro1.highlights( selector: '#lista-de-resultados .resultado',
58
- url: 'a',
59
+ href: 'a',
59
60
  date: '.resultado-data',
60
61
  title: '.resultado-titulo',
61
62
  subtitle: '.resultado-texto',
62
- image: 'a img.img-resultado',
63
+ img: 'a img.img-resultado',
63
64
  host: 'http://www.metro1.com.br'
64
65
  )
65
66
  highlights.should_not be_empty
66
67
  end
68
+
69
+ private
70
+ def parse(path)
71
+ selector.parse(File.read(path))
72
+ end
67
73
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: newly
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-13 00:00:00.000000000 Z
12
+ date: 2012-08-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70275429701820 !ruby/object:Gem::Requirement
16
+ requirement: &70132223688900 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70275429701820
24
+ version_requirements: *70132223688900
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &70275429699520 !ruby/object:Gem::Requirement
27
+ requirement: &70132223687300 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 2.8.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70275429699520
35
+ version_requirements: *70132223687300
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rdoc
38
- requirement: &70275429697780 !ruby/object:Gem::Requirement
38
+ requirement: &70132223684620 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '3.12'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70275429697780
46
+ version_requirements: *70132223684620
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: bundler
49
- requirement: &70275429696240 !ruby/object:Gem::Requirement
49
+ requirement: &70132223715700 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.1.5
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *70275429696240
57
+ version_requirements: *70132223715700
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: jeweler
60
- requirement: &70275429694540 !ruby/object:Gem::Requirement
60
+ requirement: &70132223714340 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.8.4
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *70275429694540
68
+ version_requirements: *70132223714340
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: simplecov
71
- requirement: &70275429735780 !ruby/object:Gem::Requirement
71
+ requirement: &70132223712120 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *70275429735780
79
+ version_requirements: *70132223712120
80
80
  description: Fetching breaking news from websites
81
81
  email: alabeduarte@gmail.com
82
82
  executables: []
@@ -119,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
119
119
  version: '0'
120
120
  segments:
121
121
  - 0
122
- hash: -2351285680129146534
122
+ hash: 4551793471179022495
123
123
  required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  none: false
125
125
  requirements: