taiwanese_news_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +23 -0
  6. data/Rakefile +4 -0
  7. data/g0v.json +37 -0
  8. data/lib/taiwanese_news_parser/parser/apple_daily.rb +69 -0
  9. data/lib/taiwanese_news_parser/parser/china_times.rb +76 -0
  10. data/lib/taiwanese_news_parser/parser/cna.rb +59 -0
  11. data/lib/taiwanese_news_parser/parser/cts.rb +52 -0
  12. data/lib/taiwanese_news_parser/parser/ettoday.rb +53 -0
  13. data/lib/taiwanese_news_parser/parser/liberty_times.rb +66 -0
  14. data/lib/taiwanese_news_parser/parser/liberty_times_big5.rb +51 -0
  15. data/lib/taiwanese_news_parser/parser/now_news.rb +53 -0
  16. data/lib/taiwanese_news_parser/parser/tvbs.rb +46 -0
  17. data/lib/taiwanese_news_parser/parser/udn.rb +43 -0
  18. data/lib/taiwanese_news_parser/parser.rb +57 -0
  19. data/lib/taiwanese_news_parser/url_cleaner.rb +19 -0
  20. data/lib/taiwanese_news_parser/version.rb +3 -0
  21. data/lib/taiwanese_news_parser.rb +15 -0
  22. data/spec/spec_helper.rb +9 -0
  23. data/spec/taiwanese_news_parser/parser/apple_daily_s1.html +484 -0
  24. data/spec/taiwanese_news_parser/parser/apple_daily_s2.html +333 -0
  25. data/spec/taiwanese_news_parser/parser/apple_daily_s3.html +334 -0
  26. data/spec/taiwanese_news_parser/parser/apple_daily_spec.rb +57 -0
  27. data/spec/taiwanese_news_parser/parser/china_times_s1.html +513 -0
  28. data/spec/taiwanese_news_parser/parser/china_times_s2.html +538 -0
  29. data/spec/taiwanese_news_parser/parser/china_times_s3.html +893 -0
  30. data/spec/taiwanese_news_parser/parser/china_times_s4.html +1045 -0
  31. data/spec/taiwanese_news_parser/parser/china_times_spec.rb +63 -0
  32. data/spec/taiwanese_news_parser/parser/cna_s1.html +1616 -0
  33. data/spec/taiwanese_news_parser/parser/cna_spec.rb +33 -0
  34. data/spec/taiwanese_news_parser/parser/cts_s1.html +672 -0
  35. data/spec/taiwanese_news_parser/parser/cts_s2.html +672 -0
  36. data/spec/taiwanese_news_parser/parser/cts_spec.rb +36 -0
  37. data/spec/taiwanese_news_parser/parser/ettoday_s1.html +1817 -0
  38. data/spec/taiwanese_news_parser/parser/ettoday_s2.html +1822 -0
  39. data/spec/taiwanese_news_parser/parser/ettoday_spec.rb +35 -0
  40. data/spec/taiwanese_news_parser/parser/liberty_times_big5_s1.html +213 -0
  41. data/spec/taiwanese_news_parser/parser/liberty_times_big5_spec.rb +31 -0
  42. data/spec/taiwanese_news_parser/parser/liberty_times_s1.html +145 -0
  43. data/spec/taiwanese_news_parser/parser/liberty_times_spec.rb +29 -0
  44. data/spec/taiwanese_news_parser/parser/now_news_s1.html +968 -0
  45. data/spec/taiwanese_news_parser/parser/now_news_s2.html +986 -0
  46. data/spec/taiwanese_news_parser/parser/now_news_spec.rb +31 -0
  47. data/spec/taiwanese_news_parser/parser/tvbs_s1.html +734 -0
  48. data/spec/taiwanese_news_parser/parser/tvbs_s2.html +739 -0
  49. data/spec/taiwanese_news_parser/parser/tvbs_spec.rb +36 -0
  50. data/spec/taiwanese_news_parser/parser/udn_s1.html +1678 -0
  51. data/spec/taiwanese_news_parser/parser/udn_spec.rb +42 -0
  52. data/taiwanese_news_parser.gemspec +30 -0
  53. metadata +237 -0
@@ -0,0 +1,42 @@
1
+ require 'spec_helper'
2
+
3
+ describe TaiwaneseNewsParser::Parser::Udn do
4
+ describe '#parse' do
5
+ it do
6
+ url = 'http://udn.com/NEWS/NATIONAL/NAT4/7996060.shtml'
7
+ FakeWeb.register_uri(:get, url, body:sample(__FILE__,'udn_s1.html'))
8
+ article = described_class.new(url).parse
9
+ article[:title].should == '國光免費去宜蘭第二天 零星衝突'
10
+ article[:content].should include('國光客運新開北宜高國5三條新路線,今天開放免費試乘第二天')
11
+ article[:company_name].should == '聯合晚報'
12
+ article[:reporter_name].should == '邱瓊平'
13
+ article[:published_at].should == Time.new(2013,6,29,16,17)
14
+ end
15
+ end
16
+ describe '#parse_reporter_name' do
17
+ it do
18
+ subject = described_class.new('http://udn.com/NEWS/NATIONAL/NAT2/8040540.shtml')
19
+ subject.stub(:get_company_name_and_reporter_name){'中央社╱桃園20日電'}
20
+
21
+ expect{ subject.parse_reporter_name }.to_not raise_error
22
+ end
23
+ end
24
+ describe '#reproduced?' do
25
+ it do
26
+ subject = described_class.new('http://udn.com/NEWS/NATIONAL/NAT2/8040540.shtml')
27
+ subject.stub(:get_company_name_and_reporter_name){'中央社╱桃園20日電'}
28
+ subject.reproduced?.should == true
29
+ end
30
+ end
31
+ describe '#parse_url_id' do
32
+ it do
33
+ url = 'http://udn.com/news/national/nats4/8099187.shtml'
34
+ described_class.parse_url_id(url).should == '8099187'
35
+ end
36
+ it 'breaking news' do
37
+ url = 'http://udn.com/NEWS/BREAKINGNEWS/BREAKINGNEWS1/8101247.shtml'
38
+ described_class.parse_url_id(url).should == '8101247'
39
+ end
40
+ end
41
+ end
42
+
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'taiwanese_news_parser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "taiwanese_news_parser"
8
+ spec.version = TaiwaneseNewsParser::VERSION
9
+ spec.authors = ["lulalala"]
10
+ spec.email = ["mark@goodlife.tw"]
11
+ spec.description = %q{台灣各新聞網站新聞解析器}
12
+ spec.summary = %q{Parser for various news agency websites in Taiwan}
13
+ spec.homepage = "https://github.com/lulalala/taiwanese_news_parser"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec", "~> 2.6"
24
+ spec.add_development_dependency "fakeweb", "~> 1.3"
25
+ spec.add_development_dependency "timecop", "~> 0.6.1"
26
+
27
+ spec.add_dependency 'addressable', '~> 2.0'
28
+ spec.add_dependency 'nokogiri', '~> 1.5'
29
+ spec.add_dependency 'memoist', '~> 0.9'
30
+ end
metadata ADDED
@@ -0,0 +1,237 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: taiwanese_news_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - lulalala
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-03-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '2.6'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '2.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: fakeweb
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: timecop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: 0.6.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: 0.6.1
83
+ - !ruby/object:Gem::Dependency
84
+ name: addressable
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: '2.0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ~>
95
+ - !ruby/object:Gem::Version
96
+ version: '2.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: nokogiri
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
103
+ version: '1.5'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ~>
109
+ - !ruby/object:Gem::Version
110
+ version: '1.5'
111
+ - !ruby/object:Gem::Dependency
112
+ name: memoist
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '0.9'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ~>
123
+ - !ruby/object:Gem::Version
124
+ version: '0.9'
125
+ description: 台灣各新聞網站新聞解析器
126
+ email:
127
+ - mark@goodlife.tw
128
+ executables: []
129
+ extensions: []
130
+ extra_rdoc_files: []
131
+ files:
132
+ - .gitignore
133
+ - Gemfile
134
+ - LICENSE.txt
135
+ - README.md
136
+ - Rakefile
137
+ - g0v.json
138
+ - lib/taiwanese_news_parser.rb
139
+ - lib/taiwanese_news_parser/parser.rb
140
+ - lib/taiwanese_news_parser/parser/apple_daily.rb
141
+ - lib/taiwanese_news_parser/parser/china_times.rb
142
+ - lib/taiwanese_news_parser/parser/cna.rb
143
+ - lib/taiwanese_news_parser/parser/cts.rb
144
+ - lib/taiwanese_news_parser/parser/ettoday.rb
145
+ - lib/taiwanese_news_parser/parser/liberty_times.rb
146
+ - lib/taiwanese_news_parser/parser/liberty_times_big5.rb
147
+ - lib/taiwanese_news_parser/parser/now_news.rb
148
+ - lib/taiwanese_news_parser/parser/tvbs.rb
149
+ - lib/taiwanese_news_parser/parser/udn.rb
150
+ - lib/taiwanese_news_parser/url_cleaner.rb
151
+ - lib/taiwanese_news_parser/version.rb
152
+ - spec/spec_helper.rb
153
+ - spec/taiwanese_news_parser/parser/apple_daily_s1.html
154
+ - spec/taiwanese_news_parser/parser/apple_daily_s2.html
155
+ - spec/taiwanese_news_parser/parser/apple_daily_s3.html
156
+ - spec/taiwanese_news_parser/parser/apple_daily_spec.rb
157
+ - spec/taiwanese_news_parser/parser/china_times_s1.html
158
+ - spec/taiwanese_news_parser/parser/china_times_s2.html
159
+ - spec/taiwanese_news_parser/parser/china_times_s3.html
160
+ - spec/taiwanese_news_parser/parser/china_times_s4.html
161
+ - spec/taiwanese_news_parser/parser/china_times_spec.rb
162
+ - spec/taiwanese_news_parser/parser/cna_s1.html
163
+ - spec/taiwanese_news_parser/parser/cna_spec.rb
164
+ - spec/taiwanese_news_parser/parser/cts_s1.html
165
+ - spec/taiwanese_news_parser/parser/cts_s2.html
166
+ - spec/taiwanese_news_parser/parser/cts_spec.rb
167
+ - spec/taiwanese_news_parser/parser/ettoday_s1.html
168
+ - spec/taiwanese_news_parser/parser/ettoday_s2.html
169
+ - spec/taiwanese_news_parser/parser/ettoday_spec.rb
170
+ - spec/taiwanese_news_parser/parser/liberty_times_big5_s1.html
171
+ - spec/taiwanese_news_parser/parser/liberty_times_big5_spec.rb
172
+ - spec/taiwanese_news_parser/parser/liberty_times_s1.html
173
+ - spec/taiwanese_news_parser/parser/liberty_times_spec.rb
174
+ - spec/taiwanese_news_parser/parser/now_news_s1.html
175
+ - spec/taiwanese_news_parser/parser/now_news_s2.html
176
+ - spec/taiwanese_news_parser/parser/now_news_spec.rb
177
+ - spec/taiwanese_news_parser/parser/tvbs_s1.html
178
+ - spec/taiwanese_news_parser/parser/tvbs_s2.html
179
+ - spec/taiwanese_news_parser/parser/tvbs_spec.rb
180
+ - spec/taiwanese_news_parser/parser/udn_s1.html
181
+ - spec/taiwanese_news_parser/parser/udn_spec.rb
182
+ - taiwanese_news_parser.gemspec
183
+ homepage: https://github.com/lulalala/taiwanese_news_parser
184
+ licenses:
185
+ - MIT
186
+ metadata: {}
187
+ post_install_message:
188
+ rdoc_options: []
189
+ require_paths:
190
+ - lib
191
+ required_ruby_version: !ruby/object:Gem::Requirement
192
+ requirements:
193
+ - - '>='
194
+ - !ruby/object:Gem::Version
195
+ version: '0'
196
+ required_rubygems_version: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - '>='
199
+ - !ruby/object:Gem::Version
200
+ version: '0'
201
+ requirements: []
202
+ rubyforge_project:
203
+ rubygems_version: 2.0.14
204
+ signing_key:
205
+ specification_version: 4
206
+ summary: Parser for various news agency websites in Taiwan
207
+ test_files:
208
+ - spec/spec_helper.rb
209
+ - spec/taiwanese_news_parser/parser/apple_daily_s1.html
210
+ - spec/taiwanese_news_parser/parser/apple_daily_s2.html
211
+ - spec/taiwanese_news_parser/parser/apple_daily_s3.html
212
+ - spec/taiwanese_news_parser/parser/apple_daily_spec.rb
213
+ - spec/taiwanese_news_parser/parser/china_times_s1.html
214
+ - spec/taiwanese_news_parser/parser/china_times_s2.html
215
+ - spec/taiwanese_news_parser/parser/china_times_s3.html
216
+ - spec/taiwanese_news_parser/parser/china_times_s4.html
217
+ - spec/taiwanese_news_parser/parser/china_times_spec.rb
218
+ - spec/taiwanese_news_parser/parser/cna_s1.html
219
+ - spec/taiwanese_news_parser/parser/cna_spec.rb
220
+ - spec/taiwanese_news_parser/parser/cts_s1.html
221
+ - spec/taiwanese_news_parser/parser/cts_s2.html
222
+ - spec/taiwanese_news_parser/parser/cts_spec.rb
223
+ - spec/taiwanese_news_parser/parser/ettoday_s1.html
224
+ - spec/taiwanese_news_parser/parser/ettoday_s2.html
225
+ - spec/taiwanese_news_parser/parser/ettoday_spec.rb
226
+ - spec/taiwanese_news_parser/parser/liberty_times_big5_s1.html
227
+ - spec/taiwanese_news_parser/parser/liberty_times_big5_spec.rb
228
+ - spec/taiwanese_news_parser/parser/liberty_times_s1.html
229
+ - spec/taiwanese_news_parser/parser/liberty_times_spec.rb
230
+ - spec/taiwanese_news_parser/parser/now_news_s1.html
231
+ - spec/taiwanese_news_parser/parser/now_news_s2.html
232
+ - spec/taiwanese_news_parser/parser/now_news_spec.rb
233
+ - spec/taiwanese_news_parser/parser/tvbs_s1.html
234
+ - spec/taiwanese_news_parser/parser/tvbs_s2.html
235
+ - spec/taiwanese_news_parser/parser/tvbs_spec.rb
236
+ - spec/taiwanese_news_parser/parser/udn_s1.html
237
+ - spec/taiwanese_news_parser/parser/udn_spec.rb