consadole_aggregator 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,88 +1,95 @@
1
1
  # -*- coding: utf-8 -*-
2
- require 'logger'
3
2
  require 'uri'
3
+ require 'net/http'
4
4
  require 'kconv'
5
5
  require 'nokogiri'
6
- require 'net/http'
7
- require_relative 'live/timeline.rb'
6
+ require 'eventmachine'
8
7
 
9
8
  module ConsadoleAggregator
10
- module Live
11
-
12
- def self.reserve reservation_time=nil, opt ={}
13
- Live.new(reservation_time, opt)
14
- end
15
-
16
- class Live
17
- include Aggregatable
18
- attr_reader :reservation_time, :posted, :times, :wait_sec
9
+ class Live
10
+ class Runner
11
+ attr_writer :length_of_a_game, :interval, :daemonize
19
12
 
20
- def initialize reservation_time=nil, opt ={}
21
- @reservation_time = reservation_time
22
- @posted = []
23
- @wait_sec = opt[:wait_sec] || 30
24
- @times = opt[:times] || (60/@wait_sec)*120 # サッカーは120分あれば終わる
25
- @logger = opt[:logger] || Logger.new(File.expand_path(File.dirname(__FILE__) + '/../../log/live.log'))
13
+ def initialize starting_time
14
+ @starting_time = starting_time
15
+ @length_of_a_game = 120 * 60
16
+ @interval = 60
17
+ @daemonize = true
26
18
  end
27
19
 
28
- def execute &block
29
- be_daemonize
30
- wait_initial
31
- @logger.info 'start of loop'
32
- @times.times do |i|
33
- @logger.debug "#{i} times"
34
- update &block rescue @logger.error $!
35
- wait_interval
36
- end
37
- @logger.info 'end of loop'
38
- end
20
+ def run &block
21
+ Process.daemon if @daemonize
39
22
 
40
- def wait_initial
41
- return unless @reservation_time
42
- diff_sec = @reservation_time - Time.now
43
- wait_sec = diff_sec > 0 ? diff_sec : 0
44
- @logger.info "initial wait #{wait_sec} seconds"
45
- sleep wait_sec
46
- end
23
+ starting = @starting_time - Time.now
24
+ ending = starting + @length_of_a_game
25
+ live = Live.new
47
26
 
48
- def wait_interval
49
- sleep @wait_sec
50
- end
27
+ EM.run do
28
+ EM.add_timer(starting) do
29
+ EM.next_tick do
30
+ EM.add_periodic_timer(@interval) do
31
+ live.update &block
32
+ end
33
+ end
34
+ end
51
35
 
52
- def update
53
- new_timeline = ConsadoleAggregator::Live.parse - @posted
54
- new_timeline.each do |timeline|
55
- @logger.debug timeline
56
- yield timeline if block_given?
57
- @posted << timeline
36
+ EM.add_timer(ending) do
37
+ EM.stop
38
+ end
58
39
  end
59
40
  end
41
+ end
60
42
 
61
- def self.get_resource
62
- ->{ Net::HTTP.get(URI.parse('http://www.consadole-sapporo.jp/view/s674.html')).toutf8 }
63
- end
43
+ def self.run starting_time, opt = {}
44
+ runner = Runner.new starting_time
45
+ runner.length_of_a_game = opt[:length_of_a_game] if opt[:length_of_a_game]
46
+ runner.interval = opt[:interval] if opt[:interval]
47
+ runner.daemonize = opt[:daemonize] unless opt[:daemonize].nil?
48
+ runner.run &opt[:writer]
49
+ end
64
50
 
65
- def self.parse_list
66
- ->(list){
67
- live_block = Nokogiri::HTML::parse(list)
68
- .search("hr + p")
69
- .last
70
- .inner_html
71
- lines = live_block
72
- .split(/<br>|\n/)
73
- .delete_if{ |line| line.empty? || line =~ /&lt;前|後半&gt;/ }
74
- lines.map{ |line| line.sub(/ +$/, "") }.reverse
75
- }
76
- end
51
+ attr_reader :posted
77
52
 
78
- def self.parse_article
79
- ->(article){ Timeline.parse article }
80
- end
53
+ def initialize
54
+ @posted = []
55
+ end
81
56
 
82
- private
83
- def be_daemonize
84
- Process.daemon
85
- end
57
+ def update
58
+ self.class
59
+ .fetch
60
+ .reject(&@posted.method(:include?))
61
+ .each_with_object(@posted) { |post, posted|
62
+ begin
63
+ yield({ title: post }) if block_given?
64
+ posted << post
65
+ ConsadoleAggregator.logger.info post
66
+ rescue
67
+ ConsadoleAggregator.logger.error $!
68
+ end
69
+ }
70
+ end
71
+
72
+ private
73
+ def self.parse document
74
+ Nokogiri
75
+ .parse(document)
76
+ .search('p:last-of-type')
77
+ .children
78
+ .grep(Nokogiri::XML::Text)
79
+ .map(&:text)
80
+ .map(&:toutf8)
81
+ .map { |t| t.tr(' ', ' ') }
82
+ .map(&:strip)
83
+ .reject(&/<前|後半>/.method(:match))
84
+ .reverse
85
+ end
86
+
87
+ def self.fetch
88
+ uri = URI.parse('http://www.consadole-sapporo.jp/view/s674.html')
89
+ doc = Net::HTTP.get(uri).force_encoding('SJIS')
90
+ parse(doc)
91
+ rescue
92
+ []
86
93
  end
87
94
  end
88
95
  end
@@ -1,117 +1,182 @@
1
1
  # -*- coding: utf-8 -*-
2
- require 'logger'
3
2
  require 'rss'
4
- require 'uri'
5
- require 'kconv'
6
- require 'net/http'
7
- require 'net/https'
3
+ require 'pstore'
8
4
  require 'nokogiri'
5
+ require 'httpclient'
9
6
 
10
7
  module ConsadoleAggregator
11
8
  module News
12
- def self.get_resource(url_path)
13
- Net::HTTP.get(URI.parse(url_path)).toutf8
9
+ class Register
10
+ def initialize
11
+ @sites = []
12
+ end
13
+
14
+ def name name
15
+ builder = SiteBuilder.new name
16
+ yield builder if block_given?
17
+ @sites << builder.build
18
+ end
19
+
20
+ def register!
21
+ News.const_set(:Sites, []) unless News.const_defined?(:Sites)
22
+ News.const_get(:Sites).concat @sites
23
+ end
14
24
  end
15
25
 
16
- def self.trace(url_path, limit=10)
17
- raise ArgumentError, 'http redirect too deep' if limit == 0
26
+ class SiteBuilder
27
+ def initialize name
28
+ @name = name
29
+ end
18
30
 
19
- case response = Net::HTTP.get_response(URI.parse(url_path))
20
- when Net::HTTPSuccess then url_path
21
- when Net::HTTPRedirection then trace(response['Location'], limit - 1)
22
- else
23
- response.error!
31
+ def resource &block; @resource = block; end
32
+ def parse_list &block; @parse_list = block; end
33
+ def filter_article &block; @filter_article = block; end
34
+ def parse_article &block; @parse_article = block; end
35
+
36
+ def build
37
+ site = Site.new @name
38
+ site.resource = @resource if @resource
39
+ site.list_parser = @parse_list if @parse_list
40
+ site.article_filter = @filter_article if @filter_article
41
+ site.article_parser = @parse_article if @parse_article
42
+ site
24
43
  end
25
44
  end
26
45
 
27
- {
28
- Nikkansports:
29
- [
30
- ->{ get_resource('http://www.nikkansports.com/rss/soccer/jleague/consadole.rdf') },
31
- ->(list){ RSS::Parser.parse(list, false).items.map{ |e| { url:e.link, title:e.title } }.reverse },
32
- ->(article){ article }
33
- ],
34
- Hochiyomiuri:
35
- [
36
- ->{ get_resource('http://hochi.yomiuri.co.jp/hokkaido/soccer/index.htm') },
37
- ->(list){ Nokogiri::HTML(list).search('div.list1 > ul > li a').reverse },
38
- ->(article){ { url:"http://hochi.yomiuri.co.jp#{article['href']}", title:article.text } if article.text =~ /…札幌$/ }
39
- ],
40
- Asahi:
41
- [
42
- ->{ get_resource('http://mytown.asahi.com/hokkaido/newslist.php?d_id=0100019') },
43
- ->(list){ Nokogiri::HTML(list).search('ul.list > li a').reverse },
44
- ->(article){ { url:"http://mytown.asahi.com/hokkaido/#{article['href']}", title:article.text } }
45
- ],
46
- Forzaconsadole:
47
- [
48
- ->{ get_resource('http://www.hokkaido-np.co.jp/news/e_index/?g=consadole') },
49
- ->(list){ Nokogiri::HTML(list).search('ul.iSwBox > li > a').reverse },
50
- ->(article){ { url:article['href'], title:article.text } }
51
- ],
52
- Consaburn:
53
- [
54
- ->{ get_resource('http://www.hokkaido-np.co.jp/cont/consa-burn/index.html') },
55
- ->(list){ Nokogiri::HTML(list).search('ul#news_list > li > a').reverse },
56
- ->(article){ { url:article['href'], title:article.text } }
57
- ],
58
- Consaclub:
59
- [
60
- ->{ get_resource('http://www.hokkaido-np.co.jp/cont/consa-club/index.html') },
61
- ->(list){ Nokogiri::HTML(list).search('ul#news_list > li > a').reverse },
62
- ->(article){ { url:article['href'], title:article.text } }
63
- ],
64
- Consadolenews:
65
- [
66
- ->{ get_resource('http://www.consadole-sapporo.jp/news/atom.xml') },
67
- ->(list){ Nokogiri::XML(list).search('entry').reverse },
68
- ->(article){ { title:article.at('title').text, url:article.at('link')['href'] } }
69
- ],
70
- Consadolephotos:
71
- [
72
- ->{ get_resource('http://www.consadole-sapporo.jp/') },
73
- ->(list){ Nokogiri::HTML(list).search('div.anythingSlider img').reverse },
74
- ->(article){ { url:article['src'], title:article['alt'] } }
75
- ],
76
- Jsgoalnews:
77
- [
78
- ->{ get_resource('http://feeds.feedburner.com/jsgoal/jsgoal?format=xml') },
79
- ->(list){
80
- RSS::Parser.parse(list, false).items.each_with_object([]){ |e, memo|
81
- memo << { url:trace(e.link), title:e.title } if e.title.include?('札幌')
82
- }.reverse },
83
- ->(article){ article }
84
- ],
85
- Jsgoalphotos:
86
- [
87
- ->{ get_resource('http://feeds.feedburner.com/jsgoal/photo?format=xml') },
88
- ->(list){
89
- RSS::Parser.parse(list, false).items.each_with_object([]){ |e, memo|
90
- memo << { url:trace(e.link), title:e.title } if e.title.include?('札幌')
91
- }.reverse },
92
- ->(article){ article }
93
- ],
94
- Clubconsadole:
95
- [
96
- ->{
97
- uri = URI.parse('https://www.finn.ne.jp/user.cgi?fanclub_id=67&actmode=NewsArticleSummary')
98
- https = Net::HTTP.new(uri.host, uri.port)
99
- https.use_ssl = true
100
- https.verify_mode = OpenSSL::SSL::VERIFY_NONE
101
- https.start { https.get(uri.request_uri).body.toutf8 }
102
- },
103
- ->(list){ Nokogiri::HTML(list).search('dl dd').reverse },
104
- ->(article){ { url:'https://www.finn.ne.jp/' + article.at('a')['href'], title: article.text } }
105
- ],
106
- }.each do |k,v|
107
- klass = Class.new do
108
- include Aggregatable
109
- @get_resource, @parse_list, @parse_article = *v
110
- def initialize logger=nil
111
- @logger = logger || Logger.new(File.expand_path(File.dirname(__FILE__) + '/../../log/news.log'))
46
+ class Site
47
+ class << self
48
+ STORE_NAME = 'updated.pstore'
49
+ def store
50
+ @store ||= PStore.new(File.join(ConsadoleAggregator.root_dir, STORE_NAME))
112
51
  end
113
52
  end
114
- const_set(k, klass)
53
+
54
+ STORAGE_ELEMENT_SIZE = 200
55
+
56
+ attr_writer :resource, :list_parser, :article_filter, :article_parser
57
+ attr_reader :name
58
+
59
+ def initialize name
60
+ @name = name
61
+ end
62
+
63
+ def updated
64
+ @updated ||= self.class.store.transaction { |s| s.fetch(name, []) }
65
+ end
66
+
67
+ def update
68
+ resource = @resource.call
69
+ list = @list_parser.call(resource).to_a
70
+ list.select! &@article_filter if @article_filter
71
+ list.map! &@article_parser if @article_parser
72
+ list.reject! { |article| updated.include?(article) }
73
+ list.each { |article|
74
+ begin
75
+ yield article if block_given?
76
+ add_updated article
77
+ ConsadoleAggregator.logger.info article
78
+ rescue
79
+ ConsadoleAggregator.logger.error $!
80
+ end
81
+ }
82
+ rescue
83
+ ConsadoleAggregator.logger.error $!
84
+ end
85
+
86
+ private
87
+ def add_updated article
88
+ @updated << article
89
+ self.class.store.transaction { |s| s[name] = @updated.last(STORAGE_ELEMENT_SIZE) }
90
+ end
91
+ end
92
+
93
+ def self.run &block
94
+ Sites.each { |site| site.update &block }
95
+ end
96
+
97
+ def self.register!
98
+ register = Register.new
99
+ yield register if block_given?
100
+ register.register!
101
+ end
102
+
103
+ register! do |sites|
104
+ sites.name(:nikkansports) do |site|
105
+ site.resource { HTTPClient.new.get_content('http://www.nikkansports.com/rss/soccer/jleague/consadole.rdf').force_encoding('UTF-8') }
106
+ site.parse_list { |list| RSS::Parser.parse(list, false).items.reverse }
107
+ site.parse_article { |article| { url: article.link, title: article.title } }
108
+ end
109
+
110
+ sites.name(:hochiyomiuri) do |site|
111
+ site.resource { HTTPClient.new.get_content('http://hochi.yomiuri.co.jp/hokkaido/soccer/index.htm').force_encoding('UTF-8') }
112
+ site.parse_list { |list| Nokogiri::HTML(list).search('div.list1 > ul > li a').reverse }
113
+ site.filter_article { |article| article.text =~ /札幌|コンサ/ }
114
+ site.parse_article { |article| { url:"http://hochi.yomiuri.co.jp#{article['href']}", title:article.text } }
115
+ end
116
+
117
+ sites.name(:asahi) do |site|
118
+ site.resource { HTTPClient.new.get_content('http://mytown.asahi.com/hokkaido/newslist.php?d_id=0100019').encode('UTF-8') }
119
+ site.parse_list { |list| Nokogiri::HTML(list).search('ul.list > li a').reverse }
120
+ site.parse_article { |article| { url: "http://mytown.asahi.com/hokkaido/#{article['href']}", title:article.text } }
121
+ end
122
+
123
+ sites.name(:forzaconsadole) do |site|
124
+ site.resource { HTTPClient.new.get_content('http://www.hokkaido-np.co.jp/news/e_index/?g=consadole').encode('UTF-8') }
125
+ site.parse_list { |list| Nokogiri::HTML(list).search('ul.iSwBox > li > a').reverse }
126
+ site.parse_article { |article| { url: article['href'], title: article.text } }
127
+ end
128
+
129
+ sites.name(:consaburn) do |site|
130
+ site.resource { HTTPClient.new.get_content('http://www.hokkaido-np.co.jp/cont/consa-burn/index.html').encode('UTF-8') }
131
+ site.parse_list { |list| Nokogiri::HTML(list).search('ul#news_list > li > a').reverse }
132
+ site.parse_article { |article| { url: article['href'], title: article.text } }
133
+ end
134
+
135
+ sites.name(:consaclub) do |site|
136
+ site.resource { HTTPClient.new.get_content('http://www.hokkaido-np.co.jp/cont/consa-club/index.html').encode('UTF-8') }
137
+ site.parse_list { |list| Nokogiri::HTML(list).search('ul#news_list > li > a').reverse }
138
+ site.parse_article { |article| { url: article['href'], title: article.text } }
139
+ end
140
+
141
+ sites.name(:consadolenews) do |site|
142
+ site.resource { HTTPClient.new.get_content('http://www.consadole-sapporo.jp/news/atom.xml').force_encoding('UTF-8') }
143
+ site.parse_list { |list| Nokogiri::XML(list).search('entry').reverse }
144
+ site.parse_article { |article| { title: article.at('title').text, url: article.at('link')['href'] } }
145
+ end
146
+
147
+ sites.name(:consadolephotos) do |site|
148
+ site.resource { HTTPClient.new.get_content('http://www.consadole-sapporo.jp/').force_encoding('UTF-8') }
149
+ site.parse_list { |list| Nokogiri::HTML(list).search('div.anythingSlider img').reverse }
150
+ site.parse_article { |article| { url: article['src'], title: article['alt'] } }
151
+ end
152
+
153
+ sites.name(:jsgoalnews) do |site|
154
+ site.resource { HTTPClient.new.get_content('http://feeds.feedburner.com/jsgoal/jsgoal?format=xml').encode('UTF-8') }
155
+ site.parse_list { |list| RSS::Parser.parse(list, false).items.reverse }
156
+ site.filter_article { |article| article.title =~ /札幌/ }
157
+ site.parse_article { |article|
158
+ c = HTTPClient.new.get(article.link)
159
+ c = HTTPClient.new.get(c.header['location'].first) while c.status == 301
160
+ { url: c.header['location'].first, title: article.title }
161
+ }
162
+ end
163
+
164
+ sites.name(:jsgoalphotos) do |site|
165
+ site.resource { HTTPClient.new.get_content('http://feeds.feedburner.com/jsgoal/photo?format=xml').encode('UTF-8') }
166
+ site.parse_list { |list| RSS::Parser.parse(list, false).items.reverse }
167
+ site.filter_article { |article| article.title =~ /札幌/ }
168
+ site.parse_article { |article|
169
+ c = HTTPClient.new.get(article.link)
170
+ c = HTTPClient.new.get(c.header['location'].first) while c.status == 301
171
+ { url: c.header['location'].first, title: article.title }
172
+ }
173
+ end
174
+
175
+ sites.name(:clubconsadole) do |site|
176
+ site.resource { HTTPClient.new.get_content('http://club-consadole.jp/news/index.php?page=1').encode('UTF-8') }
177
+ site.parse_list { |list| Nokogiri::HTML(list).search('li.news a').reverse }
178
+ site.parse_article { |article| { url: article['href'], title: article.text.strip } }
179
+ end
115
180
  end
116
181
  end
117
182
  end
@@ -0,0 +1,3 @@
1
+ module ConsadoleAggregator
2
+ VERSION = "0.2.0"
3
+ end
@@ -1,8 +1,30 @@
1
+ require "consadole_aggregator/version"
2
+ require 'consadole_aggregator/helper.rb'
3
+ require 'consadole_aggregator/live.rb'
4
+ require 'consadole_aggregator/news.rb'
5
+
1
6
  require 'logger'
2
- require_relative 'consadole_aggregator/helper.rb'
3
- require_relative 'consadole_aggregator/aggregatable.rb'
4
- require_relative 'consadole_aggregator/live.rb'
5
- require_relative 'consadole_aggregator/news.rb'
7
+ require 'fileutils'
6
8
 
7
9
  module ConsadoleAggregator
10
+ class << self
11
+ DEFAULT_ROOT_DIR = File.expand_path('~/.consadole_aggregator')
12
+ DEFAULT_LOG_NAME = 'consadole_aggregator.log'
13
+
14
+ def root_dir
15
+ return @root_dir if @root_dir
16
+ FileUtils.mkdir_p DEFAULT_ROOT_DIR unless File.exist? DEFAULT_ROOT_DIR
17
+ @root_dir = DEFAULT_ROOT_DIR
18
+ end
19
+
20
+ def root_dir= root_dir
21
+ FileUtil.mkdir_p root_dir unless File.exist? root_dir
22
+ @logger &&= Logger.new(File.join(root_dir, DEFAULT_LOG_NAME))
23
+ @root_dir = root_dir
24
+ end
25
+
26
+ def logger
27
+ @logger ||= Logger.new(File.join(root_dir, DEFAULT_LOG_NAME))
28
+ end
29
+ end
8
30
  end
@@ -1,27 +1,32 @@
1
1
  # -*- coding: utf-8 -*-
2
- require_relative '../spec_helper'
2
+ require 'spec_helper'
3
3
 
4
4
  describe ConsadoleAggregator::Helper do
5
- describe :concatenate do
5
+ describe '#truncate_for_twitter' do
6
6
  context 'with argument 140 chars' do
7
- subject { ConsadoleAggregator::Helper.concat('い' * 140) }
7
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('い' * 140) }
8
8
  it { should have(140).item }
9
9
  it { should == 'い' * 140 }
10
10
  end
11
11
  context 'with argument 140 chars and 18 chars url' do
12
- subject { ConsadoleAggregator::Helper.concat('ろ' * 140, url:'http://example.jp/') }
13
- it { should have(140).item }
12
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('ろ' * 140, url:'http://example.jp/') }
13
+ it { should have(138).item }
14
14
  it { should be_end_with 'ろ... http://example.jp/' }
15
15
  end
16
16
  context 'with argument 140 chars and 10 chars hashtag' do
17
- subject { ConsadoleAggregator::Helper.concat('は' * 140, hashtag:'#consadole') }
17
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('は' * 140, hashtag:'#consadole') }
18
18
  it { should have(140).item }
19
19
  it { should be_end_with 'は... #consadole' }
20
20
  end
21
21
  context 'with argument 140 chars and 18 chars url and 10 chars hashtag' do
22
- subject { ConsadoleAggregator::Helper.concat('に' * 140, url:'http://example.jp/', hashtag:'#consadole') }
23
- it { should have(140).item }
22
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('に' * 140, url:'http://example.jp/', hashtag:'#consadole') }
23
+ it { should have(138).item }
24
24
  it { should be_end_with 'に... http://example.jp/ #consadole' }
25
25
  end
26
+ context 'with argument 140 chars and 29 chars url and 10 chars hashtag' do
27
+ subject { ConsadoleAggregator::Helper.truncate_for_twitter('に' * 140, url:'http://example.jp/foo/bar/baz', hashtag:'#consadole') }
28
+ it { should have(149).item }
29
+ it { should be_end_with 'に... http://example.jp/foo/bar/baz #consadole' }
30
+ end
26
31
  end
27
32
  end