consadole_aggregator 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,88 +1,95 @@
1
1
  # -*- coding: utf-8 -*-
2
- require 'logger'
3
2
  require 'uri'
3
+ require 'net/http'
4
4
  require 'kconv'
5
5
  require 'nokogiri'
6
- require 'net/http'
7
- require_relative 'live/timeline.rb'
6
+ require 'eventmachine'
8
7
 
9
8
  module ConsadoleAggregator
10
- module Live
11
-
12
- def self.reserve reservation_time=nil, opt ={}
13
- Live.new(reservation_time, opt)
14
- end
15
-
16
- class Live
17
- include Aggregatable
18
- attr_reader :reservation_time, :posted, :times, :wait_sec
9
+ class Live
10
+ class Runner
11
+ attr_writer :length_of_a_game, :interval, :daemonize
19
12
 
20
- def initialize reservation_time=nil, opt ={}
21
- @reservation_time = reservation_time
22
- @posted = []
23
- @wait_sec = opt[:wait_sec] || 30
24
- @times = opt[:times] || (60/@wait_sec)*120 # サッカーは120分あれば終わる
25
- @logger = opt[:logger] || Logger.new(File.expand_path(File.dirname(__FILE__) + '/../../log/live.log'))
13
+ def initialize starting_time
14
+ @starting_time = starting_time
15
+ @length_of_a_game = 120 * 60
16
+ @interval = 60
17
+ @daemonize = true
26
18
  end
27
19
 
28
- def execute &block
29
- be_daemonize
30
- wait_initial
31
- @logger.info 'start of loop'
32
- @times.times do |i|
33
- @logger.debug "#{i} times"
34
- update &block rescue @logger.error $!
35
- wait_interval
36
- end
37
- @logger.info 'end of loop'
38
- end
20
+ def run &block
21
+ Process.daemon if @daemonize
39
22
 
40
- def wait_initial
41
- return unless @reservation_time
42
- diff_sec = @reservation_time - Time.now
43
- wait_sec = diff_sec > 0 ? diff_sec : 0
44
- @logger.info "initial wait #{wait_sec} seconds"
45
- sleep wait_sec
46
- end
23
+ starting = @starting_time - Time.now
24
+ ending = starting + @length_of_a_game
25
+ live = Live.new
47
26
 
48
- def wait_interval
49
- sleep @wait_sec
50
- end
27
+ EM.run do
28
+ EM.add_timer(starting) do
29
+ EM.next_tick do
30
+ EM.add_periodic_timer(@interval) do
31
+ live.update &block
32
+ end
33
+ end
34
+ end
51
35
 
52
- def update
53
- new_timeline = ConsadoleAggregator::Live.parse - @posted
54
- new_timeline.each do |timeline|
55
- @logger.debug timeline
56
- yield timeline if block_given?
57
- @posted << timeline
36
+ EM.add_timer(ending) do
37
+ EM.stop
38
+ end
58
39
  end
59
40
  end
41
+ end
60
42
 
61
- def self.get_resource
62
- ->{ Net::HTTP.get(URI.parse('http://www.consadole-sapporo.jp/view/s674.html')).toutf8 }
63
- end
43
+ def self.run starting_time, opt = {}
44
+ runner = Runner.new starting_time
45
+ runner.length_of_a_game = opt[:length_of_a_game] if opt[:length_of_a_game]
46
+ runner.interval = opt[:interval] if opt[:interval]
47
+ runner.daemonize = opt[:daemonize] unless opt[:daemonize].nil?
48
+ runner.run &opt[:writer]
49
+ end
64
50
 
65
- def self.parse_list
66
- ->(list){
67
- live_block = Nokogiri::HTML::parse(list)
68
- .search("hr + p")
69
- .last
70
- .inner_html
71
- lines = live_block
72
- .split(/<br>|\n/)
73
- .delete_if{ |line| line.empty? || line =~ /&lt;前|後半&gt;/ }
74
- lines.map{ |line| line.sub(/ +$/, "") }.reverse
75
- }
76
- end
51
+ attr_reader :posted
77
52
 
78
- def self.parse_article
79
- ->(article){ Timeline.parse article }
80
- end
53
+ def initialize
54
+ @posted = []
55
+ end
81
56
 
82
- private
83
- def be_daemonize
84
- Process.daemon
85
- end
57
+ def update
58
+ self.class
59
+ .fetch
60
+ .reject(&@posted.method(:include?))
61
+ .each_with_object(@posted) { |post, posted|
62
+ begin
63
+ yield({ title: post }) if block_given?
64
+ posted << post
65
+ ConsadoleAggregator.logger.info post
66
+ rescue
67
+ ConsadoleAggregator.logger.error $!
68
+ end
69
+ }
70
+ end
71
+
72
+ private
73
+ def self.parse document
74
+ Nokogiri
75
+ .parse(document)
76
+ .search('p:last-of-type')
77
+ .children
78
+ .grep(Nokogiri::XML::Text)
79
+ .map(&:text)
80
+ .map(&:toutf8)
81
+ .map { |t| t.tr(' ', ' ') }
82
+ .map(&:strip)
83
+ .reject(&/<前|後半>/.method(:match))
84
+ .reverse
85
+ end
86
+
87
+ def self.fetch
88
+ uri = URI.parse('http://www.consadole-sapporo.jp/view/s674.html')
89
+ doc = Net::HTTP.get(uri).force_encoding('SJIS')
90
+ parse(doc)
91
+ rescue
92
+ []
86
93
  end
87
94
  end
88
95
  end
@@ -1,117 +1,182 @@
1
1
  # -*- coding: utf-8 -*-
2
- require 'logger'
3
2
  require 'rss'
4
- require 'uri'
5
- require 'kconv'
6
- require 'net/http'
7
- require 'net/https'
3
+ require 'pstore'
8
4
  require 'nokogiri'
5
+ require 'httpclient'
9
6
 
10
7
  module ConsadoleAggregator
11
8
  module News
12
- def self.get_resource(url_path)
13
- Net::HTTP.get(URI.parse(url_path)).toutf8
9
+ class Register
10
+ def initialize
11
+ @sites = []
12
+ end
13
+
14
+ def name name
15
+ builder = SiteBuilder.new name
16
+ yield builder if block_given?
17
+ @sites << builder.build
18
+ end
19
+
20
+ def register!
21
+ News.const_set(:Sites, []) unless News.const_defined?(:Sites)
22
+ News.const_get(:Sites).concat @sites
23
+ end
14
24
  end
15
25
 
16
- def self.trace(url_path, limit=10)
17
- raise ArgumentError, 'http redirect too deep' if limit == 0
26
+ class SiteBuilder
27
+ def initialize name
28
+ @name = name
29
+ end
18
30
 
19
- case response = Net::HTTP.get_response(URI.parse(url_path))
20
- when Net::HTTPSuccess then url_path
21
- when Net::HTTPRedirection then trace(response['Location'], limit - 1)
22
- else
23
- response.error!
31
+ def resource &block; @resource = block; end
32
+ def parse_list &block; @parse_list = block; end
33
+ def filter_article &block; @filter_article = block; end
34
+ def parse_article &block; @parse_article = block; end
35
+
36
+ def build
37
+ site = Site.new @name
38
+ site.resource = @resource if @resource
39
+ site.list_parser = @parse_list if @parse_list
40
+ site.article_filter = @filter_article if @filter_article
41
+ site.article_parser = @parse_article if @parse_article
42
+ site
24
43
  end
25
44
  end
26
45
 
27
- {
28
- Nikkansports:
29
- [
30
- ->{ get_resource('http://www.nikkansports.com/rss/soccer/jleague/consadole.rdf') },
31
- ->(list){ RSS::Parser.parse(list, false).items.map{ |e| { url:e.link, title:e.title } }.reverse },
32
- ->(article){ article }
33
- ],
34
- Hochiyomiuri:
35
- [
36
- ->{ get_resource('http://hochi.yomiuri.co.jp/hokkaido/soccer/index.htm') },
37
- ->(list){ Nokogiri::HTML(list).search('div.list1 > ul > li a').reverse },
38
- ->(article){ { url:"http://hochi.yomiuri.co.jp#{article['href']}", title:article.text } if article.text =~ /…札幌$/ }
39
- ],
40
- Asahi:
41
- [
42
- ->{ get_resource('http://mytown.asahi.com/hokkaido/newslist.php?d_id=0100019') },
43
- ->(list){ Nokogiri::HTML(list).search('ul.list > li a').reverse },
44
- ->(article){ { url:"http://mytown.asahi.com/hokkaido/#{article['href']}", title:article.text } }
45
- ],
46
- Forzaconsadole:
47
- [
48
- ->{ get_resource('http://www.hokkaido-np.co.jp/news/e_index/?g=consadole') },
49
- ->(list){ Nokogiri::HTML(list).search('ul.iSwBox > li > a').reverse },
50
- ->(article){ { url:article['href'], title:article.text } }
51
- ],
52
- Consaburn:
53
- [
54
- ->{ get_resource('http://www.hokkaido-np.co.jp/cont/consa-burn/index.html') },
55
- ->(list){ Nokogiri::HTML(list).search('ul#news_list > li > a').reverse },
56
- ->(article){ { url:article['href'], title:article.text } }
57
- ],
58
- Consaclub:
59
- [
60
- ->{ get_resource('http://www.hokkaido-np.co.jp/cont/consa-club/index.html') },
61
- ->(list){ Nokogiri::HTML(list).search('ul#news_list > li > a').reverse },
62
- ->(article){ { url:article['href'], title:article.text } }
63
- ],
64
- Consadolenews:
65
- [
66
- ->{ get_resource('http://www.consadole-sapporo.jp/news/atom.xml') },
67
- ->(list){ Nokogiri::XML(list).search('entry').reverse },
68
- ->(article){ { title:article.at('title').text, url:article.at('link')['href'] } }
69
- ],
70
- Consadolephotos:
71
- [
72
- ->{ get_resource('http://www.consadole-sapporo.jp/') },
73
- ->(list){ Nokogiri::HTML(list).search('div.anythingSlider img').reverse },
74
- ->(article){ { url:article['src'], title:article['alt'] } }
75
- ],
76
- Jsgoalnews:
77
- [
78
- ->{ get_resource('http://feeds.feedburner.com/jsgoal/jsgoal?format=xml') },
79
- ->(list){
80
- RSS::Parser.parse(list, false).items.each_with_object([]){ |e, memo|
81
- memo << { url:trace(e.link), title:e.title } if e.title.include?('札幌')
82
- }.reverse },
83
- ->(article){ article }
84
- ],
85
- Jsgoalphotos:
86
- [
87
- ->{ get_resource('http://feeds.feedburner.com/jsgoal/photo?format=xml') },
88
- ->(list){
89
- RSS::Parser.parse(list, false).items.each_with_object([]){ |e, memo|
90
- memo << { url:trace(e.link), title:e.title } if e.title.include?('札幌')
91
- }.reverse },
92
- ->(article){ article }
93
- ],
94
- Clubconsadole:
95
- [
96
- ->{
97
- uri = URI.parse('https://www.finn.ne.jp/user.cgi?fanclub_id=67&actmode=NewsArticleSummary')
98
- https = Net::HTTP.new(uri.host, uri.port)
99
- https.use_ssl = true
100
- https.verify_mode = OpenSSL::SSL::VERIFY_NONE
101
- https.start { https.get(uri.request_uri).body.toutf8 }
102
- },
103
- ->(list){ Nokogiri::HTML(list).search('dl dd').reverse },
104
- ->(article){ { url:'https://www.finn.ne.jp/' + article.at('a')['href'], title: article.text } }
105
- ],
106
- }.each do |k,v|
107
- klass = Class.new do
108
- include Aggregatable
109
- @get_resource, @parse_list, @parse_article = *v
110
- def initialize logger=nil
111
- @logger = logger || Logger.new(File.expand_path(File.dirname(__FILE__) + '/../../log/news.log'))
46
+ class Site
47
+ class << self
48
+ STORE_NAME = 'updated.pstore'
49
+ def store
50
+ @store ||= PStore.new(File.join(ConsadoleAggregator.root_dir, STORE_NAME))
112
51
  end
113
52
  end
114
- const_set(k, klass)
53
+
54
+ STORAGE_ELEMENT_SIZE = 200
55
+
56
+ attr_writer :resource, :list_parser, :article_filter, :article_parser
57
+ attr_reader :name
58
+
59
+ def initialize name
60
+ @name = name
61
+ end
62
+
63
+ def updated
64
+ @updated ||= self.class.store.transaction { |s| s.fetch(name, []) }
65
+ end
66
+
67
+ def update
68
+ resource = @resource.call
69
+ list = @list_parser.call(resource).to_a
70
+ list.select! &@article_filter if @article_filter
71
+ list.map! &@article_parser if @article_parser
72
+ list.reject! { |article| updated.include?(article) }
73
+ list.each { |article|
74
+ begin
75
+ yield article if block_given?
76
+ add_updated article
77
+ ConsadoleAggregator.logger.info article
78
+ rescue
79
+ ConsadoleAggregator.logger.error $!
80
+ end
81
+ }
82
+ rescue
83
+ ConsadoleAggregator.logger.error $!
84
+ end
85
+
86
+ private
87
+ def add_updated article
88
+ @updated << article
89
+ self.class.store.transaction { |s| s[name] = @updated.last(STORAGE_ELEMENT_SIZE) }
90
+ end
91
+ end
92
+
93
+ def self.run &block
94
+ Sites.each { |site| site.update &block }
95
+ end
96
+
97
+ def self.register!
98
+ register = Register.new
99
+ yield register if block_given?
100
+ register.register!
101
+ end
102
+
103
+ register! do |sites|
104
+ sites.name(:nikkansports) do |site|
105
+ site.resource { HTTPClient.new.get_content('http://www.nikkansports.com/rss/soccer/jleague/consadole.rdf').force_encoding('UTF-8') }
106
+ site.parse_list { |list| RSS::Parser.parse(list, false).items.reverse }
107
+ site.parse_article { |article| { url: article.link, title: article.title } }
108
+ end
109
+
110
+ sites.name(:hochiyomiuri) do |site|
111
+ site.resource { HTTPClient.new.get_content('http://hochi.yomiuri.co.jp/hokkaido/soccer/index.htm').force_encoding('UTF-8') }
112
+ site.parse_list { |list| Nokogiri::HTML(list).search('div.list1 > ul > li a').reverse }
113
+ site.filter_article { |article| article.text =~ /札幌|コンサ/ }
114
+ site.parse_article { |article| { url:"http://hochi.yomiuri.co.jp#{article['href']}", title:article.text } }
115
+ end
116
+
117
+ sites.name(:asahi) do |site|
118
+ site.resource { HTTPClient.new.get_content('http://mytown.asahi.com/hokkaido/newslist.php?d_id=0100019').encode('UTF-8') }
119
+ site.parse_list { |list| Nokogiri::HTML(list).search('ul.list > li a').reverse }
120
+ site.parse_article { |article| { url: "http://mytown.asahi.com/hokkaido/#{article['href']}", title:article.text } }
121
+ end
122
+
123
+ sites.name(:forzaconsadole) do |site|
124
+ site.resource { HTTPClient.new.get_content('http://www.hokkaido-np.co.jp/news/e_index/?g=consadole').encode('UTF-8') }
125
+ site.parse_list { |list| Nokogiri::HTML(list).search('ul.iSwBox > li > a').reverse }
126
+ site.parse_article { |article| { url: article['href'], title: article.text } }
127
+ end
128
+
129
+ sites.name(:consaburn) do |site|
130
+ site.resource { HTTPClient.new.get_content('http://www.hokkaido-np.co.jp/cont/consa-burn/index.html').encode('UTF-8') }
131
+ site.parse_list { |list| Nokogiri::HTML(list).search('ul#news_list > li > a').reverse }
132
+ site.parse_article { |article| { url: article['href'], title: article.text } }
133
+ end
134
+
135
+ sites.name(:consaclub) do |site|
136
+ site.resource { HTTPClient.new.get_content('http://www.hokkaido-np.co.jp/cont/consa-club/index.html').encode('UTF-8') }
137
+ site.parse_list { |list| Nokogiri::HTML(list).search('ul#news_list > li > a').reverse }
138
+ site.parse_article { |article| { url: article['href'], title: article.text } }
139
+ end
140
+
141
+ sites.name(:consadolenews) do |site|
142
+ site.resource { HTTPClient.new.get_content('http://www.consadole-sapporo.jp/news/atom.xml').force_encoding('UTF-8') }
143
+ site.parse_list { |list| Nokogiri::XML(list).search('entry').reverse }
144
+ site.parse_article { |article| { title: article.at('title').text, url: article.at('link')['href'] } }
145
+ end
146
+
147
+ sites.name(:consadolephotos) do |site|
148
+ site.resource { HTTPClient.new.get_content('http://www.consadole-sapporo.jp/').force_encoding('UTF-8') }
149
+ site.parse_list { |list| Nokogiri::HTML(list).search('div.anythingSlider img').reverse }
150
+ site.parse_article { |article| { url: article['src'], title: article['alt'] } }
151
+ end
152
+
153
+ sites.name(:jsgoalnews) do |site|
154
+ site.resource { HTTPClient.new.get_content('http://feeds.feedburner.com/jsgoal/jsgoal?format=xml').encode('UTF-8') }
155
+ site.parse_list { |list| RSS::Parser.parse(list, false).items.reverse }
156
+ site.filter_article { |article| article.title =~ /札幌/ }
157
+ site.parse_article { |article|
158
+ c = HTTPClient.new.get(article.link)
159
+ c = HTTPClient.new.get(c.header['location'].first) while c.status == 301
160
+ { url: c.header['location'].first, title: article.title }
161
+ }
162
+ end
163
+
164
+ sites.name(:jsgoalphotos) do |site|
165
+ site.resource { HTTPClient.new.get_content('http://feeds.feedburner.com/jsgoal/photo?format=xml').encode('UTF-8') }
166
+ site.parse_list { |list| RSS::Parser.parse(list, false).items.reverse }
167
+ site.filter_article { |article| article.title =~ /札幌/ }
168
+ site.parse_article { |article|
169
+ c = HTTPClient.new.get(article.link)
170
+ c = HTTPClient.new.get(c.header['location'].first) while c.status == 301
171
+ { url: c.header['location'].first, title: article.title }
172
+ }
173
+ end
174
+
175
+ sites.name(:clubconsadole) do |site|
176
+ site.resource { HTTPClient.new.get_content('http://club-consadole.jp/news/index.php?page=1').encode('UTF-8') }
177
+ site.parse_list { |list| Nokogiri::HTML(list).search('li.news a').reverse }
178
+ site.parse_article { |article| { url: article['href'], title: article.text.strip } }
179
+ end
115
180
  end
116
181
  end
117
182
  end
@@ -0,0 +1,3 @@
1
+ module ConsadoleAggregator
2
+ VERSION = "0.2.0"
3
+ end
@@ -1,8 +1,30 @@
1
+ require "consadole_aggregator/version"
2
+ require 'consadole_aggregator/helper.rb'
3
+ require 'consadole_aggregator/live.rb'
4
+ require 'consadole_aggregator/news.rb'
5
+
1
6
  require 'logger'
2
- require_relative 'consadole_aggregator/helper.rb'
3
- require_relative 'consadole_aggregator/aggregatable.rb'
4
- require_relative 'consadole_aggregator/live.rb'
5
- require_relative 'consadole_aggregator/news.rb'
7
+ require 'fileutils'
6
8
 
7
9
  module ConsadoleAggregator
10
+ class << self
11
+ DEFAULT_ROOT_DIR = File.expand_path('~/.consadole_aggregator')
12
+ DEFAULT_LOG_NAME = 'consadole_aggregator.log'
13
+
14
+ def root_dir
15
+ return @root_dir if @root_dir
16
+ FileUtils.mkdir_p DEFAULT_ROOT_DIR unless File.exist? DEFAULT_ROOT_DIR
17
+ @root_dir = DEFAULT_ROOT_DIR
18
+ end
19
+
20
+ def root_dir= root_dir
21
+ FileUtil.mkdir_p root_dir unless File.exist? root_dir
22
+ @logger &&= Logger.new(File.join(root_dir, DEFAULT_LOG_NAME))
23
+ @root_dir = root_dir
24
+ end
25
+
26
+ def logger
27
+ @logger ||= Logger.new(File.join(root_dir, DEFAULT_LOG_NAME))
28
+ end
29
+ end
8
30
  end
@@ -1,27 +1,32 @@
1
1
  # -*- coding: utf-8 -*-
2
- require_relative '../spec_helper'
2
+ require 'spec_helper'
3
3
 
4
4
  describe ConsadoleAggregator::Helper do
5
- describe :concatenate do
5
+ describe '#truncate_for_twitter' do
6
6
  context 'with argument 140 chars' do
7
- subject { ConsadoleAggregator::Helper.concat('い' * 140) }
7
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('い' * 140) }
8
8
  it { should have(140).item }
9
9
  it { should == 'い' * 140 }
10
10
  end
11
11
  context 'with argument 140 chars and 18 chars url' do
12
- subject { ConsadoleAggregator::Helper.concat('ろ' * 140, url:'http://example.jp/') }
13
- it { should have(140).item }
12
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('ろ' * 140, url:'http://example.jp/') }
13
+ it { should have(138).item }
14
14
  it { should be_end_with 'ろ... http://example.jp/' }
15
15
  end
16
16
  context 'with argument 140 chars and 10 chars hashtag' do
17
- subject { ConsadoleAggregator::Helper.concat('は' * 140, hashtag:'#consadole') }
17
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('は' * 140, hashtag:'#consadole') }
18
18
  it { should have(140).item }
19
19
  it { should be_end_with 'は... #consadole' }
20
20
  end
21
21
  context 'with argument 140 chars and 18 chars url and 10 chars hashtag' do
22
- subject { ConsadoleAggregator::Helper.concat('に' * 140, url:'http://example.jp/', hashtag:'#consadole') }
23
- it { should have(140).item }
22
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('に' * 140, url:'http://example.jp/', hashtag:'#consadole') }
23
+ it { should have(138).item }
24
24
  it { should be_end_with 'に... http://example.jp/ #consadole' }
25
25
  end
26
+ context 'with argument 140 chars and 29 chars url and 10 chars hashtag' do
27
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('に' * 140, url:'http://example.jp/foo/bar/baz', hashtag:'#consadole') }
28
+ it { should have(149).item }
29
+ it { should be_end_with 'に... http://example.jp/foo/bar/baz #consadole' }
30
+ end
26
31
  end
27
32
  end