newly 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.ruby-version +1 -0
- data/README.md +16 -1
- data/VERSION +1 -1
- data/lib/newly/news.rb +11 -1
- data/newly.gemspec +4 -4
- data/spec/html/page_spec.html +1 -0
- data/spec/newly/news_spec.rb +35 -0
- metadata +5 -5
- data/.DS_Store +0 -0
- data/spec/.DS_Store +0 -0
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.0
|
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# newly
|
2
2
|
|
3
|
-
|
3
|
+
[![Build Status](https://snap-ci.com/alabeduarte/newly/branch/master/build_image)](https://snap-ci.com/alabeduarte/newly/branch/master)
|
4
|
+
|
5
|
+
|
6
|
+
DSL that helps scrapping news given a feed definition with url and selectors
|
4
7
|
|
5
8
|
## SYNOPSIS:
|
6
9
|
|
@@ -17,6 +20,10 @@ my_feed = Newly::Feed.new(
|
|
17
20
|
image_source: 'img')
|
18
21
|
|
19
22
|
news = Newly::NewsCrawler.new(url: 'http://g1.globo.com/bahia/', feed: my_feed).fetch
|
23
|
+
```
|
24
|
+
Getting data:
|
25
|
+
|
26
|
+
```ruby
|
20
27
|
news.each do |n|
|
21
28
|
puts n.url # news href url
|
22
29
|
puts n.title # news title
|
@@ -24,6 +31,14 @@ news.each do |n|
|
|
24
31
|
puts n.image # news image src
|
25
32
|
end
|
26
33
|
```
|
34
|
+
or
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
news.each do |n|
|
38
|
+
puts n.to_hash # { url: '...', title: '...' }
|
39
|
+
end
|
40
|
+
```
|
41
|
+
|
27
42
|
|
28
43
|
## Contributing to newly
|
29
44
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.0.
|
1
|
+
2.0.1
|
data/lib/newly/news.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Newly
|
2
2
|
class News
|
3
|
-
attr_reader :url, :
|
3
|
+
attr_reader :url, :title, :subtitle, :image, :feed_url
|
4
4
|
|
5
5
|
def initialize(args)
|
6
6
|
page_crawler = args[:page_crawler]
|
@@ -12,5 +12,15 @@ module Newly
|
|
12
12
|
@subtitle = page_crawler.titleize feed.subtitle
|
13
13
|
@image = page_crawler.image feed.image_source
|
14
14
|
end
|
15
|
+
|
16
|
+
def to_hash
|
17
|
+
{
|
18
|
+
url: @url,
|
19
|
+
title: @title,
|
20
|
+
subtitle: @subtitle,
|
21
|
+
image: @image,
|
22
|
+
feed_url: @feed_url
|
23
|
+
}
|
24
|
+
end
|
15
25
|
end
|
16
26
|
end
|
data/newly.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "newly"
|
8
|
-
s.version = "2.0.
|
8
|
+
s.version = "2.0.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Alab\u{ea} Duarte"]
|
12
|
-
s.date = "2014-07-
|
12
|
+
s.date = "2014-07-24"
|
13
13
|
s.description = "DSL that helps scrapping news given a feed definition with url and selectors"
|
14
14
|
s.email = "alabeduarte@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -17,9 +17,9 @@ Gem::Specification.new do |s|
|
|
17
17
|
"README.md"
|
18
18
|
]
|
19
19
|
s.files = [
|
20
|
-
".DS_Store",
|
21
20
|
".document",
|
22
21
|
".rspec",
|
22
|
+
".ruby-version",
|
23
23
|
"Gemfile",
|
24
24
|
"Gemfile.lock",
|
25
25
|
"LICENSE.txt",
|
@@ -33,9 +33,9 @@ Gem::Specification.new do |s|
|
|
33
33
|
"lib/newly/page_crawler.rb",
|
34
34
|
"lib/newly/selector.rb",
|
35
35
|
"newly.gemspec",
|
36
|
-
"spec/.DS_Store",
|
37
36
|
"spec/html/page_spec.html",
|
38
37
|
"spec/newly/news_crawler_spec.rb",
|
38
|
+
"spec/newly/news_spec.rb",
|
39
39
|
"spec/newly/page_crawler_spec.rb",
|
40
40
|
"spec/spec_helper.rb"
|
41
41
|
]
|
data/spec/html/page_spec.html
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Newly::News do
|
4
|
+
describe "exposing attributes" do
|
5
|
+
it "should expose attribute values as hash" do
|
6
|
+
news = build_reader.fetch.first
|
7
|
+
|
8
|
+
expect(news.to_hash).to eq({
|
9
|
+
url: 'http://esporte.uol.com.br/ultimas-noticias/reuters/2012/09/08/jackie-stewart-aconselha-hamilton-a-continuar-na-mclaren.htm',
|
10
|
+
title: 'Jackie Stewart aconselha Hamilton a continuar na McLaren',
|
11
|
+
subtitle: 'MONZA, 8 Set (Reuters) - Tricampeao de Formula 1, Jackie Stewart aconselhou Lewis Hamilton neste sabado a...',
|
12
|
+
feed_url: 'http://noticias.uol.com.br/noticias',
|
13
|
+
image: 'http://s.glbimg.com/en/ho/f/original/2012/09/29/exobeso.jpg',
|
14
|
+
})
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def build_reader
|
20
|
+
url = 'http://noticias.uol.com.br/noticias'
|
21
|
+
feed = Newly::Feed.new(
|
22
|
+
container: 'div.geral section article.news',
|
23
|
+
url_pattern: 'h1 a',
|
24
|
+
title: 'h1 a span',
|
25
|
+
subtitle: 'p',
|
26
|
+
image_source: 'img'
|
27
|
+
)
|
28
|
+
|
29
|
+
Newly::NewsCrawler.new(selector: fake_selector, url: url, feed: feed)
|
30
|
+
end
|
31
|
+
def fake_selector
|
32
|
+
parsed_html = Nokogiri::HTML.parse(File.read 'spec/html/page_spec.html')
|
33
|
+
Newly::Selector.new parsed_html
|
34
|
+
end
|
35
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: newly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -83,9 +83,9 @@ extra_rdoc_files:
|
|
83
83
|
- LICENSE.txt
|
84
84
|
- README.md
|
85
85
|
files:
|
86
|
-
- .DS_Store
|
87
86
|
- .document
|
88
87
|
- .rspec
|
88
|
+
- .ruby-version
|
89
89
|
- Gemfile
|
90
90
|
- Gemfile.lock
|
91
91
|
- LICENSE.txt
|
@@ -99,9 +99,9 @@ files:
|
|
99
99
|
- lib/newly/page_crawler.rb
|
100
100
|
- lib/newly/selector.rb
|
101
101
|
- newly.gemspec
|
102
|
-
- spec/.DS_Store
|
103
102
|
- spec/html/page_spec.html
|
104
103
|
- spec/newly/news_crawler_spec.rb
|
104
|
+
- spec/newly/news_spec.rb
|
105
105
|
- spec/newly/page_crawler_spec.rb
|
106
106
|
- spec/spec_helper.rb
|
107
107
|
homepage: http://github.com/alabeduarte/newly
|
@@ -119,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
119
119
|
version: '0'
|
120
120
|
segments:
|
121
121
|
- 0
|
122
|
-
hash:
|
122
|
+
hash: 3098867371429986061
|
123
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
124
|
none: false
|
125
125
|
requirements:
|
data/.DS_Store
DELETED
Binary file
|
data/spec/.DS_Store
DELETED
Binary file
|