thenewslensapi 2.0.1 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.DS_Store +0 -0
- data/Gemfile +7 -0
- data/README.md +34 -0
- data/Rakefile +5 -0
- data/lib/.DS_Store +0 -0
- data/lib/thenewslensapi.rb +2 -0
- data/lib/thenewslensapi/newsdigest.rb +10 -0
- data/lib/thenewslensapi/thenewslensapi.rb +43 -0
- data/lib/thenewslensapi/version.rb +6 -0
- data/spec/minitest.rb +30 -0
- data/thenewslensapi.gemspec +21 -0
- metadata +14 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b07888bf0f303ce611813890035366ff38b4d16c
|
4
|
+
data.tar.gz: 571a51ce11a78f1d70461762863a092c7d595d8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 20743c51188e50cb5a6633b00581ff3da721bb70b10ae758f9c11e348edebe0c587ef68299ea90d9c10061c5d2becffca6dbcc98e210fe7034dd86f0db50caca
|
7
|
+
data.tar.gz: 0e7bf3725e8a0c99be652391906d02c3140ecbd326832ccd1c374182e8bf35895136469ff5788a86905b1519f30787fea95dfcb1d7ddfb0b95b4aa12d0631808
|
data/.DS_Store
ADDED
Binary file
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
## The-Newslens_HW - A RSS reader by ourselves
|
3
|
+
|
4
|
+
Authors : LinAnita, peyruchao(peggy) and ethancychen
|
5
|
+
|
6
|
+
### The website: The News Lens ( http://www.thenewslens.com )
|
7
|
+
> This website is a platform to provide different and multiple points of the news or some accidents.
|
8
|
+
It can let users to discuss the news and make comments.
|
9
|
+
|
10
|
+
### What do we want to scrape?
|
11
|
+
|
12
|
+
> The title, author and lauched time of the news in NewsLens.com's main page.
|
13
|
+
|
14
|
+
###Usage
|
15
|
+
> In command line just type in
|
16
|
+
```ruby newsdigests.rb```
|
17
|
+
and our program will put out a new file(test.txt) in the directory. The file shows the titles, author and lauched time of the news in NewsLens.com's main page. No parameters is needed.
|
18
|
+
For example, if we type ```ruby newsdigests.rb``` in command line, it will produce a file call "test.txt". The outcome of the file will be look like this:
|
19
|
+
> <pre>
|
20
|
+
---
|
21
|
+
title: 習近平點名批評央視「大褲衩」,北京此後不再興建奇怪建築
|
22
|
+
author: KaChun
|
23
|
+
date: 2014-10-18
|
24
|
+
---
|
25
|
+
title: 教師的多元發聲:第二個全國教師工會「全教產」成立
|
26
|
+
author: TNL 編輯
|
27
|
+
date: 2014-10-18
|
28
|
+
</pre>
|
29
|
+
|
30
|
+
> Install it with the following command:
|
31
|
+
```gem install thenewslensapi```
|
32
|
+
|
33
|
+
> Then in your file:
|
34
|
+
```require "thenewslensapi"```
|
data/Rakefile
ADDED
data/lib/.DS_Store
ADDED
Binary file
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'yaml'
|
4
|
+
require 'date'
|
5
|
+
module Thenewslensapi
|
6
|
+
# class for NewsLens
|
7
|
+
class NewsLens
|
8
|
+
LINK = 'http://www.thenewslens.com'
|
9
|
+
NEWS_XPATH = "//main[@class='HolyGrail-content']
|
10
|
+
//div[@class='post-list-item']"
|
11
|
+
|
12
|
+
def self.gets_news
|
13
|
+
doc = gets_html(LINK)
|
14
|
+
content = gets_content(doc)
|
15
|
+
news = gets_titles(content)
|
16
|
+
to_yaml(news)
|
17
|
+
end
|
18
|
+
def self.gets_html(url)
|
19
|
+
Nokogiri::HTML(open(url))
|
20
|
+
end
|
21
|
+
def self.gets_content(doc)
|
22
|
+
doc.xpath(NEWS_XPATH)
|
23
|
+
end
|
24
|
+
def self.gets_titles(news)
|
25
|
+
news.map(&:text)
|
26
|
+
end
|
27
|
+
def self.to_yaml(item)
|
28
|
+
my_hash = {}
|
29
|
+
|
30
|
+
col_name = ["title","author","date","somthing"]
|
31
|
+
item.map do |x|
|
32
|
+
x.strip!
|
33
|
+
x.gsub!(/\n+\s+/,"\n")
|
34
|
+
|
35
|
+
values = x.split("\n")
|
36
|
+
values[2] = Date.parse(values[2])
|
37
|
+
my_hash = Hash[col_name.zip values]
|
38
|
+
#my_hash.to_yaml
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/spec/minitest.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'minitest/spec'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require './newsdigest.rb'
|
4
|
+
|
5
|
+
describe 'the_newslens_test' do
|
6
|
+
before do
|
7
|
+
@newsfound = Thenewslensapi::NewsLens.gets_news
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'they are not nil' do
|
11
|
+
@newsfound.each do |x|
|
12
|
+
x['title'].wont_be_nil
|
13
|
+
x['author'].wont_be_nil
|
14
|
+
x['date'].wont_be_nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'it is a date string' do
|
19
|
+
@newsfound.each do |x|
|
20
|
+
x['date'].must_be_instance_of Date
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'it is close to 2013' do
|
25
|
+
@newsfound.each do |x|
|
26
|
+
x['date'].year.must_be_close_to 2013, 2
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require 'thenewslensapi/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'thenewslensapi'
|
6
|
+
s.version = Thenewslensapi::VERSION
|
7
|
+
s.executables << 'thenewslensapi'
|
8
|
+
s.date = '2014-11-27'
|
9
|
+
s.summary = 'Grab the news information from theNewsLens'
|
10
|
+
s.description = 'Automatically give you the titles of latest news.'
|
11
|
+
s.authors = ['Anita Lin', 'Peggy Chao', 'Ethan Chen']
|
12
|
+
s.email = 'gatheringbc@gmail.com'
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.homepage = 'https://github.com/SOAgroup3/theNewsLensApi'
|
16
|
+
s.license = 'MIT'
|
17
|
+
|
18
|
+
s.add_development_dependency 'minitest'
|
19
|
+
s.add_development_dependency 'minitest-rg'
|
20
|
+
s.add_runtime_dependency 'nokogiri', '>= 1.6.2' # v.1.6.2 has documented problems
|
21
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: thenewslensapi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anita Lin
|
@@ -61,7 +61,18 @@ executables:
|
|
61
61
|
extensions: []
|
62
62
|
extra_rdoc_files: []
|
63
63
|
files:
|
64
|
+
- ".DS_Store"
|
65
|
+
- Gemfile
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
64
68
|
- bin/thenewslensapi
|
69
|
+
- lib/.DS_Store
|
70
|
+
- lib/thenewslensapi.rb
|
71
|
+
- lib/thenewslensapi/newsdigest.rb
|
72
|
+
- lib/thenewslensapi/thenewslensapi.rb
|
73
|
+
- lib/thenewslensapi/version.rb
|
74
|
+
- spec/minitest.rb
|
75
|
+
- thenewslensapi.gemspec
|
65
76
|
homepage: https://github.com/SOAgroup3/theNewsLensApi
|
66
77
|
licenses:
|
67
78
|
- MIT
|
@@ -86,4 +97,5 @@ rubygems_version: 2.2.2
|
|
86
97
|
signing_key:
|
87
98
|
specification_version: 4
|
88
99
|
summary: Grab the news information from theNewsLens
|
89
|
-
test_files:
|
100
|
+
test_files:
|
101
|
+
- spec/minitest.rb
|