taiwanese_news_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +23 -0
  6. data/Rakefile +4 -0
  7. data/g0v.json +37 -0
  8. data/lib/taiwanese_news_parser/parser/apple_daily.rb +69 -0
  9. data/lib/taiwanese_news_parser/parser/china_times.rb +76 -0
  10. data/lib/taiwanese_news_parser/parser/cna.rb +59 -0
  11. data/lib/taiwanese_news_parser/parser/cts.rb +52 -0
  12. data/lib/taiwanese_news_parser/parser/ettoday.rb +53 -0
  13. data/lib/taiwanese_news_parser/parser/liberty_times.rb +66 -0
  14. data/lib/taiwanese_news_parser/parser/liberty_times_big5.rb +51 -0
  15. data/lib/taiwanese_news_parser/parser/now_news.rb +53 -0
  16. data/lib/taiwanese_news_parser/parser/tvbs.rb +46 -0
  17. data/lib/taiwanese_news_parser/parser/udn.rb +43 -0
  18. data/lib/taiwanese_news_parser/parser.rb +57 -0
  19. data/lib/taiwanese_news_parser/url_cleaner.rb +19 -0
  20. data/lib/taiwanese_news_parser/version.rb +3 -0
  21. data/lib/taiwanese_news_parser.rb +15 -0
  22. data/spec/spec_helper.rb +9 -0
  23. data/spec/taiwanese_news_parser/parser/apple_daily_s1.html +484 -0
  24. data/spec/taiwanese_news_parser/parser/apple_daily_s2.html +333 -0
  25. data/spec/taiwanese_news_parser/parser/apple_daily_s3.html +334 -0
  26. data/spec/taiwanese_news_parser/parser/apple_daily_spec.rb +57 -0
  27. data/spec/taiwanese_news_parser/parser/china_times_s1.html +513 -0
  28. data/spec/taiwanese_news_parser/parser/china_times_s2.html +538 -0
  29. data/spec/taiwanese_news_parser/parser/china_times_s3.html +893 -0
  30. data/spec/taiwanese_news_parser/parser/china_times_s4.html +1045 -0
  31. data/spec/taiwanese_news_parser/parser/china_times_spec.rb +63 -0
  32. data/spec/taiwanese_news_parser/parser/cna_s1.html +1616 -0
  33. data/spec/taiwanese_news_parser/parser/cna_spec.rb +33 -0
  34. data/spec/taiwanese_news_parser/parser/cts_s1.html +672 -0
  35. data/spec/taiwanese_news_parser/parser/cts_s2.html +672 -0
  36. data/spec/taiwanese_news_parser/parser/cts_spec.rb +36 -0
  37. data/spec/taiwanese_news_parser/parser/ettoday_s1.html +1817 -0
  38. data/spec/taiwanese_news_parser/parser/ettoday_s2.html +1822 -0
  39. data/spec/taiwanese_news_parser/parser/ettoday_spec.rb +35 -0
  40. data/spec/taiwanese_news_parser/parser/liberty_times_big5_s1.html +213 -0
  41. data/spec/taiwanese_news_parser/parser/liberty_times_big5_spec.rb +31 -0
  42. data/spec/taiwanese_news_parser/parser/liberty_times_s1.html +145 -0
  43. data/spec/taiwanese_news_parser/parser/liberty_times_spec.rb +29 -0
  44. data/spec/taiwanese_news_parser/parser/now_news_s1.html +968 -0
  45. data/spec/taiwanese_news_parser/parser/now_news_s2.html +986 -0
  46. data/spec/taiwanese_news_parser/parser/now_news_spec.rb +31 -0
  47. data/spec/taiwanese_news_parser/parser/tvbs_s1.html +734 -0
  48. data/spec/taiwanese_news_parser/parser/tvbs_s2.html +739 -0
  49. data/spec/taiwanese_news_parser/parser/tvbs_spec.rb +36 -0
  50. data/spec/taiwanese_news_parser/parser/udn_s1.html +1678 -0
  51. data/spec/taiwanese_news_parser/parser/udn_spec.rb +42 -0
  52. data/taiwanese_news_parser.gemspec +30 -0
  53. metadata +237 -0
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ describe TaiwaneseNewsParser::Parser::Ettoday do
4
+ describe '#parse' do
5
+ it do
6
+ url = 'http://www.ettoday.net/news/20131129/302433.htm'
7
+ FakeWeb.register_uri(:get, url, body:sample(__FILE__,'ettoday_s1.html'))
8
+ article = described_class.new(url).parse
9
+ article[:title].should == '檢方都諭令「不得騷擾」了 王貴芬30日赴長庚道歉'
10
+ article[:content].should include('桃園檢方訊後將她以10萬元交保並限制住居及出境')
11
+ article[:content].should include('否則可能會違反檢方諭令')
12
+ article[:content].should_not include('活動衝動倒數')
13
+ article[:content].should_not include('酒店妹自殺')
14
+ article[:company_name].should == '東森'
15
+ article[:reporter_name].should == nil
16
+ article[:published_at].should == Time.new(2013,11,29,21,19)
17
+ end
18
+ it do
19
+ url = 'http://www.ettoday.net/news/20130128/158005.htm'
20
+ FakeWeb.register_uri(:get, url, body:sample(__FILE__,'ettoday_s2.html'))
21
+ article = described_class.new(url).parse
22
+ article[:title].should == '軍公教18%調9%? 民進黨不支持:18%結構複雜'
23
+ article[:content].should include('軍公教18%優存利率可望調降為9%')
24
+ article[:content].should_not include('看見台灣列環教片')
25
+ article[:company_name].should == '東森'
26
+ article[:reporter_name].should == '王文萱'
27
+ article[:published_at].should == Time.new(2013,1,28,15,23)
28
+ end
29
+ end
30
+ describe '.parse_url_id' do
31
+ it do
32
+ described_class.parse_url_id('http://www.ettoday.net/news/20131129/302031.htm').should == '20131129/302031'
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,213 @@
1
+ <html xmlns="http://www.w3.org/1999/xhtml">
2
+ <head>
3
+ <meta http-equiv="Content-Type" content="text/html; charset=big5" />
4
+ <title>�ۥѹq�l�� - �G�ġH��s�H���� �u�����v�C�֦~</title>
5
+ <meta http-equiv="Content-Language" content="zh-tw" />
6
+ <meta http-equiv="Pragma" content="no-cache" />
7
+ <meta name="keywords" content="�ۥѮɳ�,�ۥѹq�l��,The Liberty Times" />
8
+ <meta name="robots" content="index,follow" />
9
+ <meta property="fb:app_id" content="140490219413038">
10
+ <link href="/css/index/style7.css" rel="stylesheet" type="text/css" />
11
+ <link rel="canonical" href="http://www.libertytimes.com.tw/2013/new/jun/29/today-t2.htm" />
12
+ <script type="text/javascript">
13
+ var _gaq = _gaq || [];
14
+ _gaq.push(['_setAccount', 'UA-2887146-7']);
15
+ _gaq.push(['_trackPageview']);
16
+ (function() {
17
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
18
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
19
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
20
+ })();
21
+ </script><script type="text/javascript">
22
+ <!--
23
+ function MM_preloadImages() { //v3.0
24
+ var d=document; if(d.images){ if(!d.MM_p) d.MM_p=new Array();
25
+ var i,j=d.MM_p.length,a=MM_preloadImages.arguments; for(i=0; i<a.length; i++)
26
+ if (a[i].indexOf("#")!=0){ d.MM_p[j]=new Image; d.MM_p[j++].src=a[i];}}
27
+ }
28
+ function MM_reloadPage(init) { //reloads the window if Nav4 resized
29
+ if (init==true) with (navigator) {if ((appName=="Netscape")&&(parseInt(appVersion)==4)) {
30
+ document.MM_pgW=innerWidth; document.MM_pgH=innerHeight; onresize=MM_reloadPage; }}
31
+ else if (innerWidth!=document.MM_pgW || innerHeight!=document.MM_pgH) location.reload();
32
+ }
33
+ MM_reloadPage(true);
34
+ function AD_openWindow(ano,target) { //v2.0
35
+ window.open('http://iservice.libertytimes.com.tw/IService2/ad.php?ano='+ano+'&target='+target+'&source='+document.URL);
36
+ }
37
+ function AD_openWindow(ano,target) { //v2.0
38
+ window.open('http://iservice.libertytimes.com.tw/IService2/ad.php?ano='+ano+'&target='+target+'&source='+document.URL);
39
+ }
40
+
41
+ var newsid = 'newsc692404';
42
+ //-->
43
+ </script>
44
+ <script type="text/javascript" src="/js/Print.js"></script>
45
+ </head>
46
+ <body leftmargin="0" topmargin="0" marginwidth="0" marginheight="0" onResize="leftMargin=getLeftMargin()" class="bgBody">
47
+ <!--Head start-->
48
+ <div id="header" > <div id="topnav"><ul><li>&nbsp;</li>
49
+ <li><a href="http://www.yes123.com.tw/admin/index.asp" target="_blank">yes123�D¾��</a></li><li><a href="http://iservice.libertytimes.com.tw/inform/complain.php" target="_blank">�z�Ƨ�D</a></li>
50
+ <li><a href="http://www.libertytimes.com.tw/Service/ad.htm" target="_blank">�s�i�Z�n</a></li>
51
+ <li><a href="http://www.libertytimes.com.tw/Service/Order.htm" target="_blank">�q��</a></li>
52
+ <li><a href="http://www.libertytimes.com.tw/Service/rss.htm" target="_blank">RSS</a></li>
53
+ <li><a href="http://iservice.libertytimes.com.tw/inform/complain_1.php?type=3" target="_blank">�p���ڭ�</a></li></ul>
54
+ </div>
55
+ <div id="clear"></div>
56
+ <div class="logo"><a href="http://www.libertytimes.com.tw/"><img src="../../../../2008/images/img_auto/005/logo_new.gif" alt="" width="220" height="70" border="0"></a></div>
57
+ <div class="title"><img src="../../../images/img_auto/005/top_title1.gif" alt=""></div>
58
+ <div class="ad"><script src="../../../../js/ajs/BindexA03.js"></script></div>
59
+ <div id="clear"></div>
60
+ <!--channel start-->
61
+ <div id="navcontainer_frame"> <div id="navcontainer"> <ul id="navlist" class='float-left'>
62
+ <li><a href="/" id="current">�ۥѷs�D</a></li>
63
+ <li><a href="http://video.libertytimes.com.tw/">�v���T��</a></li><li><a href="http://iservice.libertytimes.com.tw/3c/">3C ���</a></li>
64
+ <li><a href="http://iservice.libertytimes.com.tw/inform/news.php">Ū�̶�a</a></li>
65
+
66
+ <li><a href="http://cheap.libertytimes.com.tw/">�n�d����</a></li>
67
+ <li><a href="http://www.taipeitimes.com/News" target="_blank"> TAIPEI TIMES </a></li>
68
+ <li><a href="http://blog.libertytimes.com.tw/"> Blog </a></li>
69
+ <li><a href="http://m.ltn.com.tw/"> ����� </a></li>
70
+ </ul>
71
+ </div>
72
+ <div id="navcontainer_right">
73
+ <form name="form1" method="post" action="http://iservice.libertytimes.com.tw/IService2/search.php" target="_blank" style="margin:0">
74
+ <input type="hidden" name="flag" value="2">
75
+ �s�D�d�ߡG
76
+ <input type="text" name="InputKey2" size="20">
77
+ <a href="javascript:form1.submit()"><img src="../../../images/img_auto/005/go-search1.gif" width="29" height="16" align="absmiddle" border="0"></a>
78
+ </form>
79
+ </div>
80
+ </div>
81
+ <div id="clear"></div>
82
+
83
+ </div>
84
+ <a name="top" id="top"></a><!--main start-->
85
+ <div class="MainDiv">
86
+ <!--left start-->
87
+ <div class="LeftDiv">
88
+ <div class="menutop" style="font-size:3px;"></div>
89
+ <!--Left Menu start-->
90
+ <script src="menu2.js?635080815006445000"></script>
91
+ <script src="../../../../gamebase.js"></script>
92
+ <div class="VideoMenu"><a href="http://video.libertytimes.com.tw/channel.php?ch=4" target="_blank" onFocus="this.blur()"><img src="../../../images/trailer2.gif" border="0"></a></div>
93
+
94
+ <table width="130" cellpadding="0" cellspacing="0" bgcolor="#FCF0CB">
95
+ <tr>
96
+ <td background="../../../images/img_auto/005/tb3.gif"><div align="left">
97
+ <img src="../../../images/img_auto/005/dot2.gif" align="absmiddle" />
98
+ <span class="bmtitle">�A�ȱM��</span></div>
99
+ </td>
100
+ </tr>
101
+ <tr>
102
+ <td> <table width="100%" border="0" cellpadding="0" cellspacing="1" bgcolor="#E3C9AE">
103
+ <tr>
104
+ <td bgcolor="#FCF0CB"><table width="100%" align="center" cellpadding="2" cellspacing="0" class="style3">
105
+ <tr>
106
+ <td align="center" bgcolor="#FCF0CB"><a class="style2" href="http://iservice.libertytimes.com.tw/inform/complain.php" target="_blank">�ڭn�z��</a>
107
+ �D<a class="style2" href="http://www.libertytimes.com.tw/Service/recruit.htm" target="_blank">�ۥѼx�~</a></td>
108
+ </tr>
109
+ <tr>
110
+ <td align="center" bgcolor="#FCF0CB"><a class="style3" href="http://www.libertytimes.com.tw/Service/ad.htm" target="_blank">�s�i�Z�n</a>
111
+ �D<a class="style3" href="http://www.libertytimes.com.tw/Service/Order.htm" target="_blank">�q���A��</a></td>
112
+ </tr>
113
+ <tr>
114
+ <td align="center" bgcolor="#FCF0CB"><a class="style3" href="http://cheap.libertytimes.com.tw/free_list.php?type=%E6%B4%BB%E5%8B%95" target="_blank">���ʥZ�n</a>
115
+ �D<a class="style3" href="http://www.libertytimes.com.tw/Service/climbing.htm" target="_blank">�n�s����</a></td>
116
+ </tr>
117
+ <tr>
118
+ <td align="center" bgcolor="#FCF0CB"><a class="style3" href="http://www.taiwanlottery.com.tw/" target="_blank">�ֳz�m��</a>
119
+ �D<a class="style3" href="http://invoice.etax.nat.gov.tw/" target="_blank">�Τ@�o��</a></td>
120
+ </tr>
121
+ </table></td>
122
+ </tr>
123
+ </table></td>
124
+ </tr>
125
+ </table>
126
+ <div style="margin-top:12px"><script src="/js/ajs/BindexM01.js"></script></div>
127
+ </div>
128
+ <!--left end-->
129
+ <!--Main content start-->
130
+ <div class="CenterDiv">
131
+ <div class="newstopbar"><a href="http://www.libertytimes.com.tw/">����</a> > �Y���s�D</div>
132
+ <table class="newsuse" border="0" cellspacing="2" cellpadding="2" align="center" >
133
+ <tr>
134
+ <td width="70" align="left" valign="bottom" id="date" class="style7">2013-6-29</td>
135
+ <td align="right" valign="bottom"><font size="2">�r���G</font><img src="../../../images/img_auto/005/font+.gif" align="absmiddle" id="adjustBig" class="adjustFontShow" onClick="adjustFont('big')"> <img src="../../../images/img_auto/005/font-.gif" align="absmiddle" id="adjustSmall" class="adjustFontHide" onClick="adjustFont('small')"> ��<a href="#comments" class="WeaFont">�o��</a>��<a href="#" OnClick="printScreen(newsid)" class="WeaFont">�C�L</a>��<a href="#" onClick="open('http://iservice.libertytimes.com.tw/IService3/forward.php?newsNo=692404','','width=600,height=470,resizable=yes')" class="WeaFont">��H</a></td>
136
+ </tr>
137
+ </table>
138
+ <script type="text/javascript" src="/js/community2.js?b"></script>
139
+ <table width="95%" border="0" cellspacing="1" cellpadding="1" align="center">
140
+ <tr valign="top">
141
+ <td class="content" id="newsContent">
142
+ <!-- replace -->
143
+ <span class="insubject1" id="newtitle">�G�ġH��s�H���� �u�����v�C�֦~</span>
144
+ <div id="K3"><script src="/js/ajs/BindexK03.js"></script></div>
145
+
146
+ <span id="newsc692404"><table class=picture><tr><td><a href="http://iservice.libertytimes.com.tw/IService3/newspic.php?pic=http://www.libertytimes.com.tw/2013/new/jun/29/images/bigPic/144.jpg" ><img src=images/144.jpg border=0></a></td></tr><tr><td style="padding:5 0 5 0">�˥��ҥߩe���ɼ�Q�|��O�̷|�A���X�t�ӱ��X���G��s�o���M���Х�ĵ�y�A�ɭP���G��s�P�G�Ķ��ƵL�k����Ϲj���ѡA�����ܰs�~�ּh�V�U�������t�������C�]�O�̼B�H�w��^�����s�L�q�A���`���d/�����~�ФŶ��s��</td></tr></table><p><span class=boldtitle>���G�f����s ĵ�y�ܼҽk</span></p><p>�e�O�̬I����B�d�G���B���z�ӡ��x�_���ɡf�����W�X�{�U�ӷU�h�H���G�f�������I����s�A�Ʀܩ�X�d�q�H��Hello Kitty���ŶǡA�è�N�ҽk�u�����~�ФŶ��s�v��ĵ�y�ХܡA�˥��ҥߩe���ɼ�Q�ѽ�áA�o�ǰs�~�s�i�ΦP�޻��C�֦~�ܰs�A�D�޾����o�@�L�@���C</p><p>���ɼ���ܡA�s�ӧ䰸���N���w�g�ΦP�ܬ۶ʤƳܰs�~���ơA�p���A�HHello Kitty�@���]�ˡA���H�����M���檺�s���O�G���٬O�s�F�Ʀܳs���窺�x�W�Ұs���q���G��sĵ�y�Хܤ]�P����Ӭ۪�A���H���ѡA�L���O�ɭP�ܰs�~�ּh�u�V�U�����v���t�������C</p><p><span class=boldtitle>�s��@�׸��C �l�ަ~���H</span></p><p>�ثe������G�f������s�A�q�`�s��@�׳��b�G�D���H�ܤT�H�A�۸��s��@�פ��H�ܤC�H���@���s�A�@�׸��C�F�[�W�t�ӥt�~�K�[���G�����A�f�P���P��@���s�A�l�ޤ��֦~���H���աC</p><p>�x�_����q�j�����ܡA�H���ˤ��G��s�ӻ��A�魫�C�Q���窺���~�H�ܥ|�줭���A�s���ȴN�W�L���D�@���F�ܤW�K���A�N�|�W�L���@�M�I�o���зǡ��D�G���C</p><p>�]�F����w�p�Ƹp�����������ܡA�s�~�Z�Oĵ�y�Хܤ��M�A���|�n�D�t�ӭ����ﵽ�A�_�h�̡u�s���Хܺ޲z��k�v�B�@�Q�U�ܤ��Q�U���@��F�]�F���|�b���������M�׬d�ְs�~�]�˱��ΡC</p><p>���������A�ϥΥi�R�d�q�Ϯװ���P���@�w�u�O�w�良���~�H�A�n�ڦ��{�w�޻��C�֦~���s�A���ȷ|���x���A���i�H��~�̶i���U���C�]�F����e�w���X�u�Ұs�޲z�k�ץ���סv�A���w�s���s�i�B�P�P���o�H�ൣ�B�֦~���D�D��H�C</p><p>���ɼ���ܱN���׭ץ��Ұs�޲z�k�A�n�D�s���e��������Хܰs�~�W�١B�s��@�פ�ĵ�y�A�ӥB�s�i�ΫP�P���o�t�ܶ��ΰs����W�i�������Y�B�M�~�a��Φ��\�C</p><p><span class=boldtitle>��s�����~�H �ߩe�P���@</span></p><p>���~���~�̳c��s�~���C�֦~�A�o�ٱN���׭ץ��u�ൣ�Τ֦~�֧Q�P�v�q�O�٪k�v�[���@�d�A�@��Ѳ{��T�d���H�W�B�@�U���d���H�U�A�������T�U���H�W�B�Q�U���H�U�A�ä����H�k�~�̦W��C���~�A�F�������Y��d�r�c��s���ӫ~�������~�����a�A�ó]�����|�M�u�C</p><p>�t�~�A��F�|�H�Ʀ�F�`�B�Q���|��󳡷|ij�A�w�綠�ȭ��s�r�q�X��F�@�h�A���ȤH���p���s�r�欰�A�s���W�L�k�w�зǪ̡A�g�B�O�L�@���ܰO�@�j�L�F���F�s���зǪ̡A�g�B�ӻ|�⦸�F�Y���~�����ĤG���H�W�s�r�欰�̡A�̱��`���H�O�L�@���ܰO�@�j�L�g�B�F�Y�s�r�F�ơA���Y���|�Q�B�H��j�L�K¾�C���ȤH���Y�s�r�gĵ����l�A���D�ʧi���A�Ⱦ����H�Ƴ��A�_�h�N�t���g�B�C</p><p>�ƤH�ƪ��i�������A�ܩ�s�r���ȭ����D�ެO�_��ӭxĵ�s���B���A�ѦU�����ۦ�M�w�C</p><p><span class=boldtitle>�����s�L�q�A���`���d/�����~�ФŶ��s��</span></p></span>
147
+ <!-- replace --> </td>
148
+ </tr>
149
+ <tr>
150
+ <td><br>
151
+ <div class="rate"></div>
152
+
153
+ <script src="/js/ajs/BindexK02.js"></script>
154
+ <script src="/js/ajs/BindexK01.js"></script>
155
+ <p class="gotop"><a href="#top" class="gotop">��TOP</a></p>
156
+ </td>
157
+ </tr>
158
+ </table>
159
+ </div>
160
+ <!--Main content end-->
161
+ <!--Right Menu start-->
162
+ <div class="RightDiv">
163
+ <table width="240" align="center" cellpadding="0" cellspacing="0">
164
+ <tr>
165
+ <td><div><script src="/js/ajs/BindexH06.js"></script></div>
166
+ <div><script src="/js/ajs/BindexH01.js"></script></div>
167
+ <div class="Ad"><script src="/js/ajs/BindexH02.js"></script></div>
168
+ <div class="Ad"><script src="/js/ajs/BindexH03.js"></script></div>
169
+ <div class="Ad"><script src="/js/ajs/BindexH05.js"></script></div>
170
+ <div class="Ad"><script src="/js/ajs/BindexH04.js"></script></div>
171
+ <table width="100%" border="0" cellspacing="0" cellpadding="0" class="Ad">
172
+ <tr>
173
+ <td width="26" height="25"><img src="../../../images/img_auto/005/mt-1.gif" width="26" height="25" /></td>
174
+ <td height="25" background="../../../images/img_auto/005/mt-0.gif" class="style4">�Y���s�D</td>
175
+ <td width="18" height="25"><img src="../../../images/img_auto/005/mt-2.gif" width="18" height="25" /></td>
176
+ </tr>
177
+ </table>
178
+ </td>
179
+ </tr>
180
+ </table>
181
+ <table width=100% cellpadding=3 cellspacing=0 class=inpct>
182
+ <tr>
183
+ <td valign=top width=12><img src=../../../images/img_auto/005/point.gif /></td>
184
+ <td width=233 valign=top><a class=related href=today-t1.htm>����ij���G�A�T������ �F���藍�_�x�W�H</a></td>
185
+ </tr>
186
+ <tr>
187
+ <td valign=top width=12><img src=../../../images/img_auto/005/point.gif /></td>
188
+ <td width=233 valign=top><span class=focusNews>�G�ġH��s�H���� �u�����v�C�֦~</span></td>
189
+ </tr>
190
+ <tr>
191
+ <td valign=top width=12><img src=../../../images/img_auto/005/point.gif /></td>
192
+ <td width=233 valign=top><a class=related href=today-t3.htm>�u�Τ@�O���M�v����F�B����x</a></td>
193
+ </tr></table>
194
+ </div>
195
+ <!--Right Menu end-->
196
+ <div id="clear"></div>
197
+ </div>
198
+ <!--main end-->
199
+ <!--footer start-->
200
+ <table width="960" align="center" cellpadding="3" cellspacing="1" bgcolor="#999999">
201
+ <tr>
202
+ <td align="center" bgcolor="#ECDFB7" class="style3"><a href="http://www.libertytimes.com.tw/">�^����</a>�U
203
+ <a class="style3" href="http://www.libertytimes.com.tw/Service/publish.htm" target="_blank">�ڭn��Z</a>�U
204
+ <a class="style3" href="http://www.libertytimes.com.tw/Service/about.htm" target="_blank">����ڭ�</a>�U
205
+ <a class="style3" href="http://www.libertytimes.com.tw/Service/declaration.htm" target="_blank">���p�v�F���n��</a> <br />
206
+ <font color="#999999" size="2">�ۥѹq�l�� ���v�Ҧ� ���o��� &copy; 2013 The Liberty Times. All Rights Reserved.</font>
207
+ </td>
208
+ </tr>
209
+ </table>
210
+
211
+ <!--footer end-->
212
+ </body>
213
+ </html>
@@ -0,0 +1,31 @@
1
+ require 'spec_helper'
2
+
3
+ describe TaiwaneseNewsParser::Parser::LibertyTimesBig5 do
4
+ describe '#parse' do
5
+ it do
6
+ url = 'http://www.libertytimes.com.tw/2013/new/jun/29/today-t2.htm?Slots=T'
7
+ FakeWeb.register_uri(:get, url, body:sample(__FILE__,'liberty_times_big5_s1.html'))
8
+ article = described_class.new(url).parse
9
+ article[:title].should == '果汁?啤酒?難分 「飲誘」青少年'
10
+ article[:content].should include('市面上出現愈來愈多以水果口味為賣點的啤酒')
11
+ article[:company_name].should == '自由時報'
12
+ article[:reporter_name].should == '施曉光、吳亮儀、陳慧萍'
13
+ article[:published_at].should == Time.new(2013,6,29)
14
+ end
15
+ end
16
+
17
+ describe '#parse_url_id' do
18
+ it do
19
+ url = 'http://www.libertytimes.com.tw/2013/new/jun/29/today-sp1-3.htm'
20
+ described_class.parse_url_id(url).should == '2013/new/jun/29/today-sp1-3'
21
+ end
22
+ it do
23
+ url = 'http://www.libertytimes.com.tw/2013/new/jun/29/today-so10.htm'
24
+ described_class.parse_url_id(url).should == '2013/new/jun/29/today-so10'
25
+ end
26
+ it do
27
+ url = 'http://www.libertytimes.com.tw/2013/new/jun/29/today-int4.htm'
28
+ described_class.parse_url_id(url).should == '2013/new/jun/29/today-int4'
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,145 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
5
+ <title>又傳酒駕釀禍 少婦被撞陷昏迷 - 自由電子報 即時新聞</title>
6
+ <meta name="description" content="〔本報訊〕又傳酒駕釀禍!花蓮縣壽豐鄉一名男子昨天(28日)在友人家喝得爛醉,晚間開車返家經過壽豐路二段時,撞上一名年輕少婦,由於撞擊力道過大,少婦當場被撞飛2公" />
7
+ <link rel="stylesheet" href="liveNews3.css?326">
8
+ <link rel="canonical" href="http://iservice.libertytimes.com.tw/liveNews/news.php?no=829851&type=%E7%A4%BE%E6%9C%83" />
9
+ <link rel="image_src" type="image/jpeg" href="http://iservice.libertytimes.com.tw/Upload/liveNews/phpkbEvMi.jpg" />
10
+ <link rel="image_src" type="image/jpeg" href="http://www.libertytimes.com.tw/images/backimg90.gif" />
11
+ <meta property="fb:app_id" content="140490219413038"/>
12
+ <script type="text/javascript" src="/Js/jquery/jquery-1.3.2.min.js"></script>
13
+ <script type="text/javascript" src="/Js/adopen.js"></script>
14
+ <script type="text/javascript" src="/Js/ga/UA9.js"></script><script>no=829851;</script>
15
+ <script type="text/javascript" src="news.js?326"></script>
16
+
17
+ </head>
18
+ <body>
19
+ <div class="main" role="banner">
20
+ <div id="header">
21
+ <a href="http://www.libertytimes.com.tw" accesskey="1" title="返回首頁" tabindex="1"><img src="images/logo01.gif" id="logo01" alt="自由時報電子報" /></a>
22
+ <div id="nav" role="menubar"><a href="http://iservice.libertytimes.com.tw/inform/complain_1.php?type=3" target="_blank">建議我們</a>│ <a href="http://www.libertytimes.com.tw/Service/ad.htm" target="_blank">廣告刊登</a>│ <a href="http://www.libertytimes.com.tw/2007/new/other/service/order.htm" target="_blank">訂報服務</a>│ <a href="http://www.libertytimes.com.tw/Service/recruit.htm" target="_blank">自由徵才</a>│ <a href="http://blog.libertytimes.com.tw/" target="_blank">Blog</a></div>
23
+ <a class="at" href="#startcontent" accesskey="2" title="跳過導覽列" tabindex="2">跳過導覽列</a>
24
+ <div class="ad"><script src="http://www.libertytimes.com.tw/js/ajs/BindexA03.js"></script></div>
25
+ <hr />
26
+
27
+ </div>
28
+
29
+ <div id="news_block" role="main"> <div id="breadcrumb" role="region">
30
+ <span style="float:left"><a href="http://www.libertytimes.com.tw">自由電子報</a> &gt; <a href="./">即時新聞總覽</a> &gt; <a href="list.php?type=%E7%A4%BE%E6%9C%83">社會</a></span>
31
+ <span style="float:right">2013年6月29日‧星期六</span>
32
+ </div>
33
+ <div id="nav_live" role="menu">
34
+ <div class="type_link"><a href="list.php?type=%E5%8D%B3%E6%99%82%E6%96%B0%E8%81%9E" class="type_link1" role="menuitem" title="即時新聞">即時新聞</a><a href="list.php?type=%E6%94%BF%E6%B2%BB" role="menuitem" title="政治新聞列表">政治</a><a href="list.php?type=%E7%A4%BE%E6%9C%83" role="menuitem" title="社會新聞列表">社會</a><a href="list.php?type=%E7%A7%91%E6%8A%80" role="menuitem" title="科技新聞列表">科技</a><a href="list.php?type=%E5%9C%8B%E9%9A%9B" role="menuitem" title="國際新聞列表">國際</a><a href="list.php?type=%E8%B2%A1%E7%B6%93" role="menuitem" title="財經新聞列表">財經</a><a href="list.php?type=%E7%94%9F%E6%B4%BB" role="menuitem" title="生活新聞列表">生活</a><a href="list.php?type=%E9%AB%94%E8%82%B2" role="menuitem" title="體育新聞列表">體育</a><a href="list.php?type=%E5%BD%B1%E5%8A%87" role="menuitem" title="影劇新聞列表">影劇</a><a href="list.php?type=%E8%B6%A3%E8%81%9E" role="menuitem" title="趣聞新聞列表">趣聞</a></div>
35
+ <div class="AAA">&nbsp;</div>
36
+ </div>
37
+ <table cellpadding="0" cellspacing="0" class="newsTb" role="presentation">
38
+ <thead>
39
+ <tr>
40
+ <th class="news_kind">社會</th>
41
+ <td class="font_size" valign="middle">字型:
42
+ <img src="images/font+.gif" name="adjustBig" alt="大字" id="adjustBig" onClick="adjustFont('big')"><img src="images/font-.gif" name="adjustSmall" alt="小字" id="adjustSmall" onClick="adjustFont('small')">∣<a href="javascript://" onclick="printScreen()">列印</a></td>
43
+ </tr>
44
+ </thead>
45
+ <script type="text/javascript">web = 'http://www.libertytimes.com.tw';</script>
46
+ <script type="text/javascript" src="/Js/community.js?0103"></script>
47
+ <tbody>
48
+ <tr>
49
+ <td class="body" id="pr_cont" colspan="2"><a name="startcontent" id="startcontent"></a>
50
+ <h2 id="newsti" class="news_title" role="heading">又傳酒駕釀禍 少婦被撞陷昏迷 <span class="conttime">【17:52】</span></h2>
51
+ <div id="LB3" class="googlead"></div>
52
+ <div class="pic_area"><img src="../Upload/liveNews/phpkbEvMi.jpg" style="margin:5px" alt="新聞圖片"><div class="pic_text">花蓮縣壽豐鄉壽豐路二段與壽文路口28日發生車禍,又是酒後駕車闖禍。(記者王錦義翻攝)</div></div> <div id="newsc" style="display:none">
53
+ <a href="http://iservice.libertytimes.com.tw/liveNews/news.php?type=%E7%A4%BE%E6%9C%83&no=829851">…詳全文</a><br />
54
+ <a href="http://iservice.libertytimes.com.tw/liveNews/list.php?type=%E5%8D%B3%E6%99%82%E6%96%B0%E8%81%9E">[更多即時]</a>
55
+ <div id="channel">
56
+ <dl><dt><img align="absmiddle" alt="影音" src="http://www.libertytimes.com.tw/images/pic/t9.gif">
57
+ <a target="_blank" href="http://video.libertytimes.com.tw/">[更多影音]</a></dt>
58
+ <dd class="ch_img"><a href="http://video.libertytimes.com.tw/video.php?id=18881" target="_blank"><img src="../Upload/temp/18881.jpg" width="80" height="80" border="0" alt="我們的交換日記/電影預告"></a></dd>
59
+ <dd>.<a href="http://video.libertytimes.com.tw/video.php?id=18881" target="_blank" title="我們的交換日記/電影預告">我們的交換日記/電...</a></dd><dd>.<a href="http://video.libertytimes.com.tw/video.php?id=18879" target="_blank" title="賈伯斯/電影預告">賈伯斯/電影預告</a></dd><dd>.<a href="http://video.libertytimes.com.tw/video.php?id=18878" target="_blank" title="如虹音樂會》趙詠華暗戀周華健 昇華成兄妹情">如虹音樂會》趙詠華...</a></dd><dd>.<a href="http://video.libertytimes.com.tw/video.php?id=18877" target="_blank" title="如虹音樂會》趙詠華爆料 黃小琥私底下很溫柔?">如虹音樂會》趙詠華...</a></dd></dl>
60
+ <dl><dt><img align="absmiddle" alt="旅遊" src="http://www.libertytimes.com.tw/images/pic/t11.gif">
61
+ <a target="_blank" href="http://travel.libertytimes.com.tw/report/">[更多旅遊]</a></dt>
62
+ <dd class="ch_img"><a href="http://travel.libertytimes.com.tw/report/32822/" target="_blank"><img src="../Upload/temp/32822.jpeg" width="80" height="80" border="0" alt="日本鎌倉》青春微感傷"></a></dd>
63
+ <dd>.<a href="http://travel.libertytimes.com.tw/report/32822/" target="_blank" title="日本鎌倉》青春微感傷">日本鎌倉》青春微感傷</a></dd><dd>.<a href="http://travel.libertytimes.com.tw/report/32655/" target="_blank" title="到神奈川快樂上學》日本教育旅行">到神奈川快樂上學》...</a></dd><dd>.<a href="http://travel.libertytimes.com.tw/report/32441/" target="_blank" title="關島夏日盛會》奔向亮麗晴空">關島夏日盛會》奔向...</a></dd><dd>.<a href="http://travel.libertytimes.com.tw/report/32244/" target="_blank" title="泰舒適! 曼谷旅店巡禮》Dusit Thani飯店">泰舒適! 曼谷旅店...</a></dd></dl>
64
+ <dl><dt><img align="absmiddle" alt="好康" src="http://www.libertytimes.com.tw/images/pic/t10.gif">
65
+ <a target="_blank" href="http://cheap.libertytimes.com.tw/">[更多好康]</a></dt>
66
+ <dd class="ch_img"><a href="http://cheap.libertytimes.com.tw/goodnews-2.php?no=11701" target="_blank"><img src="../Upload/temp/phpTJVKiX.jpg" width="80" height="80" border="0" alt="羅技觸控滑鼠T400 傳統與觸控的結合 具備專用觸控區與熟悉的滑鼠功能"></a></dd>
67
+ <dd>.<a href="http://cheap.libertytimes.com.tw/goodnews-2.php?no=11701" target="_blank" title="羅技觸控滑鼠T400 傳統與觸控的結合 具備專用觸控區與熟悉的滑鼠功能">羅技觸控滑鼠T400 ...</a></dd><dd>.<a href="http://cheap.libertytimes.com.tw/goodnews-2.php?no=11700" target="_blank" title="PQI再推出BlueAnt「Ribbon」新型藍牙耳機 及「Embrace」頭戴式立體聲耳機 如真高音質 隨時享受音律間的美感">PQI再推出BlueAnt「...</a></dd><dd>.<a href="http://cheap.libertytimes.com.tw/goodnews-2.php?no=11699" target="_blank" title="Logitech UE 6000 完美音準 主動隔音 展現純正音樂內涵">Logitech UE 6000 ...</a></dd><dd>.<a href="http://cheap.libertytimes.com.tw/goodnews-2.php?no=11698" target="_blank" title="Logitech UE 4000 聲色兼具 專為質男靚女人打造之音樂潮品">Logitech UE 4000 ...</a></dd></dl>
68
+ <dl><dt><img align="absmiddle" alt="Blog" src="http://www.libertytimes.com.tw/images/pic/t12.gif">
69
+ <a target="_blank" href="http://blog.libertytimes.com.tw/">[更多Blog]</a></dt>
70
+ <dd class="ch_img"><a href="http://blog.libertytimes.com.tw/helen0618/2013/03/11/137905" target="_blank"><img src="../Upload/temp/240-22261.jpg" width="80" height="80" border="0" alt="我在西斯汀教堂 Sistine Chapel"></a></dd>
71
+ <dd>.<a href="http://blog.libertytimes.com.tw/helen0618/2013/03/11/137905" target="_blank" title="我在西斯汀教堂 Sistine Chapel">我在西斯汀教堂 Sis...</a></dd><dd>.<a href="http://blog.libertytimes.com.tw/loveformosa/2013/03/09/137835" target="_blank" title="台日戰況激烈凍未條 近百人急救">台日戰況激烈凍未條...</a></dd><dd>.<a href="http://blog.libertytimes.com.tw/wendylin/2013/03/07/137667" target="_blank" title="言論自由聯盟一屆二次大會召開,蕭主席期待勞工團結護權益">言論自由聯盟一屆二...</a></dd><dd>.<a href="http://blog.libertytimes.com.tw/karl6406/2013/03/11/137906" target="_blank" title="反核素人:真正的台灣之光">反核素人:真正的台...</a></dd></dl>
72
+ </div> </div>
73
+ <div id="newsc" class="news_content" role="article">
74
+ 〔本報訊〕又傳酒駕釀禍!花蓮縣壽豐鄉一名男子昨天(28日)在友人家喝得爛醉,晚間開車返家經過壽豐路二段時,撞上一名年輕少婦,由於撞擊力道過大,少婦當場被撞飛2公尺高後,後腦勺著地重傷,送醫陷入昏迷,幸經搶救後情況漸有好轉,現已轉入加護病房觀察。<br />
75
+ <br />
76
+  警方調查發現,45歲無業蘇姓男子昨天下午在友人家喝酒,晚間6時主動表示要載朋友到火車站搭車,返家行經壽豐路二段、要左轉進入壽文路口時,疑因車速過快,當場撞上一名22歲江姓少婦,江姓少婦被撞飛2公尺高後落地,由於頭部先接觸地面,傷勢嚴重,陷入昏迷,現仍在醫院救治。<br />
77
+ <br />
78
+ <b>上月才酒後騎車遭罰 警大罵:罰不怕</b><br />
79
+ <br />
80
+  蘇男發現釀禍,第一時間還藉故要打電話,刻意愈走愈遠,離開現場,救護員眼尖,喝令蘇男站住,詢問蘇男要去哪,蘇男才尷尬表示,「我受傷,我要去看醫生啦!」但當救護員將其送醫後,蘇又辯稱沒有受傷,不願掛號。<br />
81
+ <br />
82
+  警方測得蘇男酒測值達0.80毫克,因其到案時仍陷入酒醉狀態,警方只好先把人送入拘留所,今天中午他酒醒後仍辯稱不曉得撞到人,直到警方出示監視畫面為證,他才俯首認罪,今天(29日)訊後依公共危險罪嫌移送法辦。<br />
83
+ <br />
84
+  據了解,蘇姓男子上個月底才因酒後騎車被罰2萬2500元,這個月酒駕新制上路,沒想到蘇男不知悔改又再度觸法,處理事故的員警正巧就是當時開單告發的員警,讓員警氣得大罵「真是罰不怕」! </div><img src="../IService3/LivePage.php?no=829851&nd=1372509265">
85
+ </td>
86
+ </tr>
87
+ </tbody>
88
+ </table>
89
+ <script type="text/javascript" src="http://www.libertytimes.com.tw/js/ajs/BindexLB03.js"></script>
90
+ <div class="fb-comments" data-href="http://iservice.libertytimes.com.tw/liveNews/news.php?no=829851&type=%E7%A4%BE%E6%9C%83" data-width="635" data-num-posts="2"></div>
91
+ <div id="LB2"><script type="text/javascript" src="http://www.libertytimes.com.tw/js/ajs/BindexLB02.js"></script></div>
92
+ <script type="text/javascript" src="http://www.libertytimes.com.tw/js/ajs/BindexLB01.js"></script>
93
+ <div class="top"><a href="#top"><img src="images/top_button.gif" border="0" alt="top" /></a></div>
94
+ </div>
95
+
96
+ <div id="adc">
97
+
98
+
99
+ <!-- LTN-001-31-news-imm-300X250-PIC-DFP -->
100
+ <div id='div-gpt-ad-1326807770912-0' style='width:300px; height:250px;' class='adc'>
101
+ <script type='text/javascript'>
102
+ googletag.cmd.push(function() { googletag.display('div-gpt-ad-1326807770912-0'); });
103
+ </script>
104
+ </div>
105
+
106
+ <!-- yes123 -->
107
+ <div id='div-gpt-ad-1344502371448-0' style='width:300px; height:150px; margin-bottom:9px'>
108
+ <script type='text/javascript'>
109
+ googletag.cmd.push(function() { googletag.display('div-gpt-ad-1344502371448-0'); });
110
+ </script>
111
+ </div>
112
+
113
+ <div id='div-gpt-ad-1346672285358-0' style='width:300px; height:100px;' class='adc'>
114
+ <script type='text/javascript'>
115
+ googletag.cmd.push(function() { googletag.display('div-gpt-ad-1346672285358-0'); });
116
+ </script>
117
+ </div>
118
+
119
+ <div id='div-gpt-ad-1346672190025-0' style='width:300px; height:250px;' class='adc'>
120
+ <script type='text/javascript'>
121
+ googletag.cmd.push(function() { googletag.display('div-gpt-ad-1346672190025-0'); });
122
+ </script>
123
+ </div>
124
+
125
+ <div id='div-gpt-ad-1346672558307-0' style='width:300px; height:100px;' class='adc'>
126
+ <script type='text/javascript'>
127
+ googletag.cmd.push(function() { googletag.display('div-gpt-ad-1346672558307-0'); });
128
+ </script>
129
+ </div>
130
+
131
+ <div id='div-gpt-ad-1352374863627-0' style='width:300px; height:100px;' class='adc'>
132
+ <script type='text/javascript'>
133
+ googletag.cmd.push(function() { googletag.display('div-gpt-ad-1352374863627-0'); });
134
+ </script>
135
+ </div> <div id="fans"><div class="fb-like-box" data-href="http://www.facebook.com/m.ltn.tw" data-width="300" data-show-faces="true" data-stream="true" data-header="false"></div></div>
136
+ <div id="fb-act"><div class="fb-activity" data-site="http://www.libertytimes.com.tw" data-app-id="140490219413038" data-width="300" data-height="280" data-header="false" data-recommendations="false"></div></div>
137
+ </div>
138
+
139
+ <div id="footer" role="contentinfo"><a href="http://iservice.libertytimes.com.tw/inform/complain_1.php?type=3" accesskey="9" title="連絡我們">連絡我們</a> | 爆料傳真 0809-006-667 | 自由電子報 版權所有 © 2013 The Liberty Times. All Rights Reserved.
140
+ </div>
141
+
142
+ </div>
143
+
144
+ </body>
145
+ </html>
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ describe TaiwaneseNewsParser::Parser::LibertyTimes do
4
+ describe '#parse' do
5
+ before do
6
+ Timecop.freeze(Time.local(2013,6,29,9,13))
7
+ end
8
+ it do
9
+ url = 'http://iservice.libertytimes.com.tw/liveNews/news.php?no=829851&type=%E7%A4%BE%E6%9C%83&Slots=Live'
10
+ FakeWeb.register_uri(:get, url, body:sample(__FILE__,'liberty_times_s1.html'))
11
+ article = described_class.new(url).parse
12
+ article[:title].should == '又傳酒駕釀禍 少婦被撞陷昏迷'
13
+ article[:content].should include('又傳酒駕釀禍!花蓮縣壽豐鄉一名男子昨天(28日)在友人家喝得爛醉')
14
+ article[:company_name].should == '自由時報'
15
+ article[:reporter_name].should == nil
16
+ article[:published_at].should == Time.new(2013,6,29,17,52)
17
+ end
18
+ end
19
+
20
+ describe '#parse_url_id' do
21
+ it do
22
+ url = 'http://iservice.libertytimes.com.tw/liveNews/news.php?no=854755&Slots=Live'
23
+ described_class.parse_url_id(url).should == '854755'
24
+
25
+ url = 'http://iservice.libertytimes.com.tw/liveNews/news.php?no=854838&type=%E5%9C%8B%E9%9A%9B'
26
+ described_class.parse_url_id(url).should == '854838'
27
+ end
28
+ end
29
+ end