taiwanese_news_parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +23 -0
- data/Rakefile +4 -0
- data/g0v.json +37 -0
- data/lib/taiwanese_news_parser/parser/apple_daily.rb +69 -0
- data/lib/taiwanese_news_parser/parser/china_times.rb +76 -0
- data/lib/taiwanese_news_parser/parser/cna.rb +59 -0
- data/lib/taiwanese_news_parser/parser/cts.rb +52 -0
- data/lib/taiwanese_news_parser/parser/ettoday.rb +53 -0
- data/lib/taiwanese_news_parser/parser/liberty_times.rb +66 -0
- data/lib/taiwanese_news_parser/parser/liberty_times_big5.rb +51 -0
- data/lib/taiwanese_news_parser/parser/now_news.rb +53 -0
- data/lib/taiwanese_news_parser/parser/tvbs.rb +46 -0
- data/lib/taiwanese_news_parser/parser/udn.rb +43 -0
- data/lib/taiwanese_news_parser/parser.rb +57 -0
- data/lib/taiwanese_news_parser/url_cleaner.rb +19 -0
- data/lib/taiwanese_news_parser/version.rb +3 -0
- data/lib/taiwanese_news_parser.rb +15 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/taiwanese_news_parser/parser/apple_daily_s1.html +484 -0
- data/spec/taiwanese_news_parser/parser/apple_daily_s2.html +333 -0
- data/spec/taiwanese_news_parser/parser/apple_daily_s3.html +334 -0
- data/spec/taiwanese_news_parser/parser/apple_daily_spec.rb +57 -0
- data/spec/taiwanese_news_parser/parser/china_times_s1.html +513 -0
- data/spec/taiwanese_news_parser/parser/china_times_s2.html +538 -0
- data/spec/taiwanese_news_parser/parser/china_times_s3.html +893 -0
- data/spec/taiwanese_news_parser/parser/china_times_s4.html +1045 -0
- data/spec/taiwanese_news_parser/parser/china_times_spec.rb +63 -0
- data/spec/taiwanese_news_parser/parser/cna_s1.html +1616 -0
- data/spec/taiwanese_news_parser/parser/cna_spec.rb +33 -0
- data/spec/taiwanese_news_parser/parser/cts_s1.html +672 -0
- data/spec/taiwanese_news_parser/parser/cts_s2.html +672 -0
- data/spec/taiwanese_news_parser/parser/cts_spec.rb +36 -0
- data/spec/taiwanese_news_parser/parser/ettoday_s1.html +1817 -0
- data/spec/taiwanese_news_parser/parser/ettoday_s2.html +1822 -0
- data/spec/taiwanese_news_parser/parser/ettoday_spec.rb +35 -0
- data/spec/taiwanese_news_parser/parser/liberty_times_big5_s1.html +213 -0
- data/spec/taiwanese_news_parser/parser/liberty_times_big5_spec.rb +31 -0
- data/spec/taiwanese_news_parser/parser/liberty_times_s1.html +145 -0
- data/spec/taiwanese_news_parser/parser/liberty_times_spec.rb +29 -0
- data/spec/taiwanese_news_parser/parser/now_news_s1.html +968 -0
- data/spec/taiwanese_news_parser/parser/now_news_s2.html +986 -0
- data/spec/taiwanese_news_parser/parser/now_news_spec.rb +31 -0
- data/spec/taiwanese_news_parser/parser/tvbs_s1.html +734 -0
- data/spec/taiwanese_news_parser/parser/tvbs_s2.html +739 -0
- data/spec/taiwanese_news_parser/parser/tvbs_spec.rb +36 -0
- data/spec/taiwanese_news_parser/parser/udn_s1.html +1678 -0
- data/spec/taiwanese_news_parser/parser/udn_spec.rb +42 -0
- data/taiwanese_news_parser.gemspec +30 -0
- metadata +237 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe TaiwaneseNewsParser::Parser::Cts do
|
4
|
+
describe '#parse' do
|
5
|
+
before do
|
6
|
+
Timecop.freeze(Time.local(2013,6,29,9,13))
|
7
|
+
end
|
8
|
+
it do
|
9
|
+
url = 'http://news.cts.com.tw/cts/politics/201403/201403191393958.html'
|
10
|
+
FakeWeb.register_uri(:get, url, body:sample(__FILE__,'cts_s1.html'))
|
11
|
+
article = described_class.new(url).parse
|
12
|
+
article[:title].should == '為何反服貿? 抗議學生搞不清'
|
13
|
+
article[:content].should include('因為反黑箱服貿,學生霸佔國會')
|
14
|
+
article[:company_name].should == '華視'
|
15
|
+
article[:reporter_name].should == '彭佳芸 黃翊真'
|
16
|
+
article[:published_at].should == Time.new(2014,3,19,18,58)
|
17
|
+
end
|
18
|
+
it do
|
19
|
+
url = 'http://news.cts.com.tw/nownews/politics/201403/201403221395428.html'
|
20
|
+
FakeWeb.register_uri(:get, url, body:sample(__FILE__,'cts_s2.html'))
|
21
|
+
article = described_class.new(url).parse
|
22
|
+
article[:title].should == '學生要求馬英九對話 總統府:不會接受'
|
23
|
+
article[:content].should include('但是學生用這種方式要求對話,府方不會接受。')
|
24
|
+
article[:company_name].should == '今日新聞'
|
25
|
+
article[:reporter_name].should == nil
|
26
|
+
article[:published_at].should == Time.new(2014,3,22,10,9)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe '#parse_url_id' do
|
31
|
+
it do
|
32
|
+
url = 'http://news.cts.com.tw/cts/politics/201403/201403191393958.html'
|
33
|
+
described_class.parse_url_id(url).should == '201403191393958'
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|