aikatsu_calendar 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --colour
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in aikatsu_calendar.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 furugomu
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ `AikatsuCalendar::Scraper.scrape()`
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ RSpec::Core::RakeTask.new(:spec)
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'aikatsu_calendar/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "aikatsu_calendar"
8
+ spec.version = AikatsuCalendar::VERSION
9
+ spec.authors = ["furugomu"]
10
+ spec.email = ["furugomu@gmail.com"]
11
+ spec.description = %q{Aikatsu calendar}
12
+ spec.summary = %q{Ai! Katsu!}
13
+ spec.homepage = "https://github.com/furugomu/aikatsu_calendar"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "nokogiri"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec"
26
+ end
@@ -0,0 +1,6 @@
1
+ require "aikatsu_calendar/version"
2
+ require "aikatsu_calendar/scraper"
3
+
4
+ module AikatsuCalendar
5
+ URL = 'http://www.aikatsu.com/calender/'
6
+ end
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- encoding: UTF-8 -*-
3
+
4
+ require 'nokogiri'
5
+ require 'uri'
6
+ require 'json'
7
+ require 'open-uri'
8
+
9
+ module AikatsuCalendar
10
+ class Scraper
11
+ attr_accessor :year, :month, :day, :schedules
12
+
13
+ def self.scrape(path=AikatsuCalendar::URL)
14
+ scraper = new()
15
+ doc = open(path) {|f| Nokogiri::HTML.parse(f) }
16
+ scraper.feed(doc)
17
+ scraper.schedules.uniq do |x|
18
+ [x[:type], x[:content], x[:date_from], x[:date_until]]
19
+ end
20
+ end
21
+
22
+ def initialize
23
+ @schedules = []
24
+ end
25
+
26
+ def feed(doc)
27
+ container = doc.at_css(".info-schedule")
28
+ container.css('table').each do |table|
29
+ feed_table(table)
30
+ end
31
+ end
32
+
33
+ def feed_table(table)
34
+ # 年と月
35
+ text = table.at_css('th').text
36
+ m = text.match(/(\d+)年(\d+)月/) or raise ValueError, text
37
+ @year = m[1].to_i
38
+ @month = m[2].to_i
39
+
40
+ table.css('tr')[1..-1].each do |tr|
41
+ feed_row(tr)
42
+ end
43
+ end
44
+
45
+ def feed_row(tr)
46
+ # 日付
47
+ text = tr.at_css('td').text
48
+ m = text.match(/(\d+)日/) or raise ValueError, text
49
+ @day = m[1].to_i
50
+
51
+ tr.css('p').each do |p|
52
+ feed_item(p)
53
+ end
54
+ end
55
+
56
+ def feed_item(p)
57
+ @schedules << parse_item(p)
58
+ end
59
+
60
+ def parse_item(p)
61
+ # 日付
62
+ text = p.text
63
+ re = /(?: ※)?(\d+)年(\d+)月(\d+)日~(?:(\d+)年)?(\d+)月(\d+)日/
64
+ m = text.match(re)
65
+ if m
66
+ year_until = (m[4] || m[1]).to_i
67
+ date_from = Time.local(m[1].to_i, m[2].to_i, m[3].to_i)
68
+ date_until = Time.local(year_until, m[5].to_i, m[6].to_i)
69
+ else
70
+ date_from = date_until = Time.local(@year, @month, @day)
71
+ end
72
+ # 日付をとっぱらう
73
+ text = text.sub(re, '')
74
+
75
+ # URL
76
+ url = nil
77
+ if (a = p.at_css('a[href]'))
78
+ base = 'http://www.aikatsu.com/calender/'
79
+ url = URI.join(base, a.attr(:href)).to_s
80
+ end
81
+
82
+ # type
83
+ type = class_to_type(p.attr(:class))
84
+
85
+ {
86
+ type: type,
87
+ date_from: date_from,
88
+ date_until: date_until,
89
+ content: text.strip,
90
+ link: url,
91
+ }
92
+ end
93
+
94
+ def class_to_type(s)
95
+ if s =~ /schedule-(\w+)/
96
+ $1
97
+ else
98
+ nil
99
+ end
100
+ end
101
+
102
+ def to_json(pretty=false)
103
+ if pretty
104
+ JSON.pretty_generate(@schedules)
105
+ else
106
+ JSON.dump(@schedules)
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,3 @@
1
+ module AikatsuCalendar
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,120 @@
1
+ #!ruby
2
+ # -*- encoding: UTF-8 -*-
3
+
4
+ require 'aikatsu_calendar'
5
+
6
+ def html(s)
7
+ Nokogiri::HTML::DocumentFragment.parse(s.strip).children.first
8
+ end
9
+
10
+ describe AikatsuCalendar::Scraper do
11
+ let(:aical) { AikatsuCalendar::Scraper.new }
12
+ subject { aical }
13
+ before do
14
+ aical.year = 2000; aical.month = 10; aical.day = 10
15
+ end
16
+
17
+ describe 'feed_table' do
18
+ let(:table) do
19
+ html(<<-HTML)
20
+ <table cellspacing="0" class="apr">
21
+ <tbody>
22
+ <tr>
23
+ <th colspan="3">2013年4月</th>
24
+ </tr>
25
+ </tbody>
26
+ </table>
27
+ HTML
28
+ end
29
+ before do
30
+ aical.feed_table(table)
31
+ end
32
+ its(:year) { should == 2013 }
33
+ its(:month) { should == 4 }
34
+ end
35
+
36
+ describe 'feed_row' do
37
+ let(:tr) do
38
+ html(<<-HTML)
39
+ <tr>
40
+ <td class="schedule-day"><span>03日</span><span class="schedule-week">水</span></td>
41
+ <td><p>x</p></td>
42
+ </tr>
43
+ HTML
44
+ end
45
+ before do
46
+ aical.feed_row(tr)
47
+ end
48
+ its(:day) { should == 3 }
49
+ end
50
+
51
+ describe 'parse_item' do
52
+ let(:p) { html('<p class="schedule-game clearfix">いちごちゃん</p>') }
53
+ subject { aical.parse_item(p) }
54
+
55
+ it 'content' do
56
+ subject[:content].should == p.text
57
+ end
58
+ it 'date_from' do
59
+ subject[:date_from].should == Time.local(aical.year, aical.month, aical.day)
60
+ end
61
+ it 'date_until' do
62
+ subject[:date_until].should == Time.local(aical.year, aical.month, aical.day)
63
+ end
64
+ it 'type' do
65
+ subject[:type].should == 'game'
66
+ end
67
+
68
+ context '中に日付が書かれている(※あり)' do
69
+ let(:p) { html('<p class="schedule-game">あおいちゃん ※2013年2月14日~2014年3月31日</p>') }
70
+ it 'content' do
71
+ subject[:content].should == 'あおいちゃん'
72
+ end
73
+ it 'date_from' do
74
+ subject[:date_from].should == Time.local(2013, 2, 14)
75
+ end
76
+ it 'date_until' do
77
+ subject[:date_until].should == Time.local(2014, 3, 31)
78
+ end
79
+ end
80
+
81
+ context '中に日付が書かれている(終了年が無い)' do
82
+ let(:p) { html('<p class="schedule-game">蘭ちゃん ※2013年2月14日~3月31日</p>') }
83
+ it 'content' do
84
+ subject[:content].should == '蘭ちゃん'
85
+ end
86
+ it 'date_from' do
87
+ subject[:date_from].should == Time.local(2013, 2, 14)
88
+ end
89
+ it 'date_until' do
90
+ subject[:date_until].should == Time.local(2013, 3, 31)
91
+ end
92
+ end
93
+
94
+ context '中に日付が書かれている(※なし)' do
95
+ let(:p) do
96
+ html(<<-HTML)
97
+ <p class="schedule-game">
98
+ <span>2013年2月14日~2014年3月31日</span>
99
+ おとめちゃん</p>
100
+ HTML
101
+ end
102
+ it 'content' do
103
+ subject[:content].should == 'おとめちゃん'
104
+ end
105
+ it 'date_from' do
106
+ subject[:date_from].should == Time.local(2013, 2, 14)
107
+ end
108
+ it 'date_until' do
109
+ subject[:date_until].should == Time.local(2014, 3, 31)
110
+ end
111
+ end
112
+
113
+ context 'リンクがある' do
114
+ let(:p) { html('<p class="schedule-magazine"><a href="../magazine/magazine15.html">ちゃお4月号</a></p>') }
115
+ it 'link' do
116
+ subject[:link].should == 'http://www.aikatsu.com/magazine/magazine15.html'
117
+ end
118
+ end
119
+ end
120
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: aikatsu_calendar
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - furugomu
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: bundler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.3'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.3'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: Aikatsu calendar
79
+ email:
80
+ - furugomu@gmail.com
81
+ executables: []
82
+ extensions: []
83
+ extra_rdoc_files: []
84
+ files:
85
+ - .gitignore
86
+ - .rspec
87
+ - Gemfile
88
+ - LICENSE.txt
89
+ - README.md
90
+ - Rakefile
91
+ - aikatsu_calendar.gemspec
92
+ - lib/aikatsu_calendar.rb
93
+ - lib/aikatsu_calendar/scraper.rb
94
+ - lib/aikatsu_calendar/version.rb
95
+ - spec/aikatsu_calendar/scraper_spec.rb
96
+ homepage: https://github.com/furugomu/aikatsu_calendar
97
+ licenses:
98
+ - MIT
99
+ post_install_message:
100
+ rdoc_options: []
101
+ require_paths:
102
+ - lib
103
+ required_ruby_version: !ruby/object:Gem::Requirement
104
+ none: false
105
+ requirements:
106
+ - - ! '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ none: false
111
+ requirements:
112
+ - - ! '>='
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubyforge_project:
117
+ rubygems_version: 1.8.23
118
+ signing_key:
119
+ specification_version: 3
120
+ summary: Ai! Katsu!
121
+ test_files:
122
+ - spec/aikatsu_calendar/scraper_spec.rb