newly 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -11
- data/Gemfile.lock +53 -26
- data/README.md +38 -0
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/newly/feed.rb +19 -0
- data/lib/newly/news.rb +16 -0
- data/lib/newly/news_crawler.rb +42 -0
- data/lib/newly/page_crawler.rb +51 -0
- data/lib/newly/selector.rb +17 -0
- data/lib/newly.rb +2 -34
- data/newly.gemspec +26 -30
- data/spec/html/page_spec.html +51 -0
- data/spec/newly/news_crawler_spec.rb +99 -0
- data/spec/newly/page_crawler_spec.rb +52 -0
- data/spec/spec_helper.rb +5 -9
- metadata +42 -42
- data/README.rdoc +0 -39
- data/lib/news.rb +0 -12
- data/spec/html/ecbahia.html +0 -780
- data/spec/html/g1.html +0 -4988
- data/spec/html/g1_bahia.html +0 -4481
- data/spec/html/metro1_cidade.html +0 -2404
- data/spec/newly_spec.rb +0 -73
data/spec/spec_helper.rb
CHANGED
@@ -1,13 +1,9 @@
|
|
1
|
-
|
2
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
-
require 'rspec'
|
4
|
-
require 'newly'
|
1
|
+
Dir['./lib/**/*.rb'].each {|file| require file }
|
5
2
|
|
6
|
-
|
7
|
-
# in ./support/ and its subdirectories.
|
8
|
-
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
3
|
+
require 'rspec/collection_matchers'
|
9
4
|
|
10
5
|
RSpec.configure do |config|
|
11
|
-
config.
|
6
|
+
config.color = true
|
12
7
|
config.formatter = :documentation
|
13
|
-
|
8
|
+
config.raise_errors_for_deprecations!
|
9
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: newly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,81 +9,79 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-07-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ~>
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: '
|
21
|
+
version: '1.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.5'
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: rspec
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ~>
|
31
36
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
37
|
+
version: '3.0'
|
33
38
|
type: :development
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: rdoc
|
38
|
-
requirement: &70132223684620 !ruby/object:Gem::Requirement
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
41
|
none: false
|
40
42
|
requirements:
|
41
43
|
- - ~>
|
42
44
|
- !ruby/object:Gem::Version
|
43
|
-
version: '3.
|
44
|
-
type: :development
|
45
|
-
prerelease: false
|
46
|
-
version_requirements: *70132223684620
|
45
|
+
version: '3.0'
|
47
46
|
- !ruby/object:Gem::Dependency
|
48
|
-
name:
|
49
|
-
requirement:
|
47
|
+
name: rspec-collection_matchers
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
50
49
|
none: false
|
51
50
|
requirements:
|
52
51
|
- - ~>
|
53
52
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.
|
53
|
+
version: '1.0'
|
55
54
|
type: :development
|
56
55
|
prerelease: false
|
57
|
-
version_requirements:
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.0'
|
58
62
|
- !ruby/object:Gem::Dependency
|
59
63
|
name: jeweler
|
60
|
-
requirement:
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
61
65
|
none: false
|
62
66
|
requirements:
|
63
67
|
- - ~>
|
64
68
|
- !ruby/object:Gem::Version
|
65
|
-
version: 1.8
|
69
|
+
version: '1.8'
|
66
70
|
type: :development
|
67
71
|
prerelease: false
|
68
|
-
version_requirements:
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: simplecov
|
71
|
-
requirement: &70132223712120 !ruby/object:Gem::Requirement
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
72
73
|
none: false
|
73
74
|
requirements:
|
74
|
-
- -
|
75
|
+
- - ~>
|
75
76
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
77
|
-
|
78
|
-
prerelease: false
|
79
|
-
version_requirements: *70132223712120
|
80
|
-
description: Fetching breaking news from websites
|
77
|
+
version: '1.8'
|
78
|
+
description: DSL that helps scrapping news given a feed definition with url and selectors
|
81
79
|
email: alabeduarte@gmail.com
|
82
80
|
executables: []
|
83
81
|
extensions: []
|
84
82
|
extra_rdoc_files:
|
85
83
|
- LICENSE.txt
|
86
|
-
- README.
|
84
|
+
- README.md
|
87
85
|
files:
|
88
86
|
- .DS_Store
|
89
87
|
- .document
|
@@ -91,18 +89,20 @@ files:
|
|
91
89
|
- Gemfile
|
92
90
|
- Gemfile.lock
|
93
91
|
- LICENSE.txt
|
94
|
-
- README.
|
92
|
+
- README.md
|
95
93
|
- Rakefile
|
96
94
|
- VERSION
|
97
95
|
- lib/newly.rb
|
98
|
-
- lib/
|
96
|
+
- lib/newly/feed.rb
|
97
|
+
- lib/newly/news.rb
|
98
|
+
- lib/newly/news_crawler.rb
|
99
|
+
- lib/newly/page_crawler.rb
|
100
|
+
- lib/newly/selector.rb
|
99
101
|
- newly.gemspec
|
100
102
|
- spec/.DS_Store
|
101
|
-
- spec/html/
|
102
|
-
- spec/
|
103
|
-
- spec/
|
104
|
-
- spec/html/metro1_cidade.html
|
105
|
-
- spec/newly_spec.rb
|
103
|
+
- spec/html/page_spec.html
|
104
|
+
- spec/newly/news_crawler_spec.rb
|
105
|
+
- spec/newly/page_crawler_spec.rb
|
106
106
|
- spec/spec_helper.rb
|
107
107
|
homepage: http://github.com/alabeduarte/newly
|
108
108
|
licenses:
|
@@ -119,7 +119,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
119
119
|
version: '0'
|
120
120
|
segments:
|
121
121
|
- 0
|
122
|
-
hash:
|
122
|
+
hash: -2273980581556510551
|
123
123
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
124
|
none: false
|
125
125
|
requirements:
|
@@ -128,7 +128,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
128
|
version: '0'
|
129
129
|
requirements: []
|
130
130
|
rubyforge_project:
|
131
|
-
rubygems_version: 1.8.
|
131
|
+
rubygems_version: 1.8.21
|
132
132
|
signing_key:
|
133
133
|
specification_version: 3
|
134
134
|
summary: Fetching breaking news from websites
|
data/README.rdoc
DELETED
@@ -1,39 +0,0 @@
|
|
1
|
-
= newly
|
2
|
-
|
3
|
-
==Fetching breaking news from websites
|
4
|
-
* Based on Nokogiri https://github.com/sparklemotion/nokogiri
|
5
|
-
|
6
|
-
== SYNOPSIS:
|
7
|
-
# Fecthing breaking news from some website
|
8
|
-
reader = Newly.new('http://g1.globo.com/bahia/')
|
9
|
-
news = reader.highlights(
|
10
|
-
selector: '#ultimas-regiao div, #ultimas-regiao ul li',
|
11
|
-
href: 'a',
|
12
|
-
date: '.data-hora',
|
13
|
-
title: '.titulo',
|
14
|
-
subtitle: '.subtitulo',
|
15
|
-
img: 'img')
|
16
|
-
|
17
|
-
# Presentation News
|
18
|
-
news.each |n| do
|
19
|
-
puts n.url # news href url
|
20
|
-
puts n.title # news title
|
21
|
-
puts n.subtitle # news subtitle
|
22
|
-
puts n.image # news image src
|
23
|
-
end
|
24
|
-
|
25
|
-
== Contributing to newly
|
26
|
-
|
27
|
-
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
28
|
-
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
29
|
-
* Fork the project.
|
30
|
-
* Start a feature/bugfix branch.
|
31
|
-
* Commit and push until you are happy with your contribution.
|
32
|
-
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
33
|
-
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
34
|
-
|
35
|
-
== Copyright
|
36
|
-
|
37
|
-
Copyright (c) 2012 Alabê Duarte. See LICENSE.txt for
|
38
|
-
further details.
|
39
|
-
|
data/lib/news.rb
DELETED