taiwanese_news_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +23 -0
  6. data/Rakefile +4 -0
  7. data/g0v.json +37 -0
  8. data/lib/taiwanese_news_parser/parser/apple_daily.rb +69 -0
  9. data/lib/taiwanese_news_parser/parser/china_times.rb +76 -0
  10. data/lib/taiwanese_news_parser/parser/cna.rb +59 -0
  11. data/lib/taiwanese_news_parser/parser/cts.rb +52 -0
  12. data/lib/taiwanese_news_parser/parser/ettoday.rb +53 -0
  13. data/lib/taiwanese_news_parser/parser/liberty_times.rb +66 -0
  14. data/lib/taiwanese_news_parser/parser/liberty_times_big5.rb +51 -0
  15. data/lib/taiwanese_news_parser/parser/now_news.rb +53 -0
  16. data/lib/taiwanese_news_parser/parser/tvbs.rb +46 -0
  17. data/lib/taiwanese_news_parser/parser/udn.rb +43 -0
  18. data/lib/taiwanese_news_parser/parser.rb +57 -0
  19. data/lib/taiwanese_news_parser/url_cleaner.rb +19 -0
  20. data/lib/taiwanese_news_parser/version.rb +3 -0
  21. data/lib/taiwanese_news_parser.rb +15 -0
  22. data/spec/spec_helper.rb +9 -0
  23. data/spec/taiwanese_news_parser/parser/apple_daily_s1.html +484 -0
  24. data/spec/taiwanese_news_parser/parser/apple_daily_s2.html +333 -0
  25. data/spec/taiwanese_news_parser/parser/apple_daily_s3.html +334 -0
  26. data/spec/taiwanese_news_parser/parser/apple_daily_spec.rb +57 -0
  27. data/spec/taiwanese_news_parser/parser/china_times_s1.html +513 -0
  28. data/spec/taiwanese_news_parser/parser/china_times_s2.html +538 -0
  29. data/spec/taiwanese_news_parser/parser/china_times_s3.html +893 -0
  30. data/spec/taiwanese_news_parser/parser/china_times_s4.html +1045 -0
  31. data/spec/taiwanese_news_parser/parser/china_times_spec.rb +63 -0
  32. data/spec/taiwanese_news_parser/parser/cna_s1.html +1616 -0
  33. data/spec/taiwanese_news_parser/parser/cna_spec.rb +33 -0
  34. data/spec/taiwanese_news_parser/parser/cts_s1.html +672 -0
  35. data/spec/taiwanese_news_parser/parser/cts_s2.html +672 -0
  36. data/spec/taiwanese_news_parser/parser/cts_spec.rb +36 -0
  37. data/spec/taiwanese_news_parser/parser/ettoday_s1.html +1817 -0
  38. data/spec/taiwanese_news_parser/parser/ettoday_s2.html +1822 -0
  39. data/spec/taiwanese_news_parser/parser/ettoday_spec.rb +35 -0
  40. data/spec/taiwanese_news_parser/parser/liberty_times_big5_s1.html +213 -0
  41. data/spec/taiwanese_news_parser/parser/liberty_times_big5_spec.rb +31 -0
  42. data/spec/taiwanese_news_parser/parser/liberty_times_s1.html +145 -0
  43. data/spec/taiwanese_news_parser/parser/liberty_times_spec.rb +29 -0
  44. data/spec/taiwanese_news_parser/parser/now_news_s1.html +968 -0
  45. data/spec/taiwanese_news_parser/parser/now_news_s2.html +986 -0
  46. data/spec/taiwanese_news_parser/parser/now_news_spec.rb +31 -0
  47. data/spec/taiwanese_news_parser/parser/tvbs_s1.html +734 -0
  48. data/spec/taiwanese_news_parser/parser/tvbs_s2.html +739 -0
  49. data/spec/taiwanese_news_parser/parser/tvbs_spec.rb +36 -0
  50. data/spec/taiwanese_news_parser/parser/udn_s1.html +1678 -0
  51. data/spec/taiwanese_news_parser/parser/udn_spec.rb +42 -0
  52. data/taiwanese_news_parser.gemspec +30 -0
  53. metadata +237 -0
@@ -0,0 +1,33 @@
1
+ require 'spec_helper'
2
+
3
+ describe TaiwaneseNewsParser::Parser::Cna do
4
+ describe '#parse' do
5
+ it do
6
+ url = 'http://www.cna.com.tw/News/FirstNews/201306290064-1.aspx'
7
+ FakeWeb.register_uri(:get, url, body:sample(__FILE__,'cna_s1.html'))
8
+ article = described_class.new(url).parse
9
+ article[:title].should == '電競亞室運 中華隊奪牌希望濃'
10
+ article[:content].should include('中華代表隊選手楊家正(Sen),今天在韓國仁川進行的2013亞洲室內暨武藝運動會中的電競比賽「星海爭霸II」預賽')
11
+ article[:company_name].should == '中央社'
12
+ #TODO article[:reporter_name].should == '姜遠珍'
13
+ article[:published_at].should == Time.new(2013,6,29,19,3)
14
+ end
15
+ end
16
+
17
+ describe '#parse_url_id' do
18
+ it 'hanle different versions of same news' do
19
+ url = 'http://www.cna.com.tw/News/FirstNews/201308140023.aspx'
20
+ described_class.parse_url_id(url).should == '201308140023'
21
+
22
+ url = 'http://www.cna.com.tw/News/FirstNews/201308160018-1.aspx'
23
+ described_class.parse_url_id(url).should == '201308160018'
24
+
25
+ url = 'http://www.cna.com.tw/News/FirstNews/201308110023-3.aspx'
26
+ described_class.parse_url_id(url).should == '201308110023'
27
+ end
28
+ it 'handle grouped news url' do
29
+ url = 'http://www.cna.com.tw/Topic/Popular/3912-1/201308140022-1.aspx'
30
+ described_class.parse_url_id(url).should == '201308140022'
31
+ end
32
+ end
33
+ end