afr_load 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +13 -13
- data/lib/afr_load/parser.rb +45 -0
- data/lib/afr_load/tv_program.rb +36 -0
- data/lib/afr_load/version.rb +1 -1
- data/lib/afr_load.rb +39 -1
- metadata +6 -5
- data/lib/afr_load/afr_load.rb +0 -73
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf37db32d065fd3c466e56015f1ab244f6deb163
|
4
|
+
data.tar.gz: 764b419232276898922f34f932784b7c8e4c75cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cbd76e0148290f6eeb086d2f73546d879fc960b011edb9121086d10b79cac38439a28bfddcd2e8fee269487c49de24d140b23b37fdf81dbd02e47bd51fe9e50
|
7
|
+
data.tar.gz: 2b0b09e1f490156ec28482e04f0cdf92eb3bb7958c8d91e7b39916e4826355884e26550eb7fd04d9aae37a8d43a485024d7097bdb29025636ff82b32af9ba8fa
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -10,24 +10,24 @@ Add this line to your application's Gemfile:
|
|
10
10
|
gem 'afr_load'
|
11
11
|
```
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
$ bundle
|
16
|
-
|
17
|
-
Or install it yourself as:
|
13
|
+
install it yourself as:
|
18
14
|
|
19
15
|
$ gem install afr_load
|
20
16
|
|
21
17
|
## Usage
|
22
18
|
|
23
|
-
AfrLoad::AfrLoad.get_schedule()
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
programs = AfrLoad::AfrLoad.get_schedule()
|
20
|
+
programs.each do |program|
|
21
|
+
program.show()
|
22
|
+
# program.on_air_date #=> 放送日
|
23
|
+
# program.title_ja #=> 邦題
|
24
|
+
# program.title #=> 原題
|
25
|
+
# program.released_year #=> 公開年
|
26
|
+
# program.released_country #=> 公開国(製作国?)
|
27
|
+
# program.leading_actor #=> 主演俳優
|
28
|
+
# program.leading_actor #=> 主演俳優
|
29
|
+
# program.supporting_actor #=> 助演俳優
|
30
|
+
end
|
31
31
|
|
32
32
|
## Contributing
|
33
33
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module AfrLoad
|
4
|
+
module Parser
|
5
|
+
class << self
|
6
|
+
def parse(document)
|
7
|
+
month_lineup_doc = get_month_lineup(document)
|
8
|
+
month_lineup_doc.map do |lineup|
|
9
|
+
parse_month_lineup(lineup)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_month_lineup(document)
|
14
|
+
document.xpath("//div[@id='contents']/div").select do |contents_child|
|
15
|
+
is_month_lineup(contents_child)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def is_month_lineup(contents_child)
|
20
|
+
return false if contents_child.attribute("id") == nil
|
21
|
+
if contents_child.attribute("id").value =~ /[0-9]{6}/
|
22
|
+
true
|
23
|
+
else
|
24
|
+
false
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_month_lineup(contents_child)
|
29
|
+
contents_child.xpath("//div/div[@class='gogo_item']").map do |movie_node|
|
30
|
+
data_block = movie_node.at_xpath("div[contains(@class, 'g_data_block')]")
|
31
|
+
year_country = data_block.at_xpath("div/span[@class='g_country_year']").text.split("◆")
|
32
|
+
tv_program = TvProgram::TvProgram.new(
|
33
|
+
on_air_date: movie_node.at_xpath("span[contains(@class, 'g_day')]").text,
|
34
|
+
title_ja: data_block.at_xpath("h3/span[@class='jp']").text,
|
35
|
+
title: data_block.at_xpath("h3/span[contains(@class, 'en')]").text ,
|
36
|
+
released_year: year_country[0],
|
37
|
+
released_country: year_country[1],
|
38
|
+
leading_actor: data_block.xpath("div/div/div[1]/span[2]").text,
|
39
|
+
supporting_actor: data_block.xpath("div/div/div[2]/span[2]").text
|
40
|
+
)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module AfrLoad
|
4
|
+
module TvProgram
|
5
|
+
class TvProgram
|
6
|
+
attr_reader :on_air_date,
|
7
|
+
:title_ja, :title,
|
8
|
+
:released_year, :released_country,
|
9
|
+
:leading_actor, :supporting_actor
|
10
|
+
|
11
|
+
def initialize(on_air_date:, title_ja:, title:,
|
12
|
+
released_year:, released_country:,
|
13
|
+
leading_actor:, supporting_actor:)
|
14
|
+
@on_air_date = on_air_date
|
15
|
+
@title_ja = title_ja
|
16
|
+
@title = title
|
17
|
+
@released_year = released_year
|
18
|
+
@released_country = released_country
|
19
|
+
@leading_actor = leading_actor
|
20
|
+
@supporting_actor = supporting_actor
|
21
|
+
end
|
22
|
+
|
23
|
+
def show()
|
24
|
+
puts <<"EOS"
|
25
|
+
on_air_date: #{@on_air_date}, \
|
26
|
+
title_ja: #{@title_ja}, \
|
27
|
+
title: #{@title}, \
|
28
|
+
released_year: #{@released_year}, \
|
29
|
+
released_country: #{@released_country}, \
|
30
|
+
leading_actor: #{@leading_actor}, \
|
31
|
+
supporting_actor: #{@supporting_actor}
|
32
|
+
EOS
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/afr_load/version.rb
CHANGED
data/lib/afr_load.rb
CHANGED
@@ -1,7 +1,45 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require "oga"
|
4
|
+
require "httpclient"
|
5
|
+
|
3
6
|
require "afr_load/version"
|
4
|
-
require "afr_load/
|
7
|
+
require "afr_load/tv_program"
|
8
|
+
require "afr_load/parser.rb"
|
5
9
|
|
6
10
|
module AfrLoad
|
11
|
+
class AfrLoad
|
12
|
+
attr_reader :url, :programs, :document
|
13
|
+
|
14
|
+
AFR_LOAD_URL = "http://www.tv-tokyo.co.jp/telecine/oa_afr_load/"
|
15
|
+
|
16
|
+
def initialize(url = AFR_LOAD_URL)
|
17
|
+
@programs = Array.new()
|
18
|
+
@url = url
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_schedule()
|
22
|
+
@document = self.class.fetch_schedule(@url)
|
23
|
+
@programs = Parser.parse(@document).flatten
|
24
|
+
@programs
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.get_schedule(url = AFR_LOAD_URL)
|
28
|
+
afr = self.new(url)
|
29
|
+
afr.get_schedule()
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.get_schedule_from_file(file_path)
|
33
|
+
handler = File.open(file_path)
|
34
|
+
document = Oga.parse_html(handler)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.fetch_schedule(url = AFR_LOAD_URL)
|
38
|
+
Oga.parse_html(Enumerator.new do |yielder|
|
39
|
+
HTTPClient.get(url) do |chunk|
|
40
|
+
yielder << chunk.force_encoding("utf-8")
|
41
|
+
end
|
42
|
+
end)
|
43
|
+
end
|
44
|
+
end
|
7
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: afr_load
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- iaia
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,7 +80,7 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
-
description:
|
83
|
+
description: 午後ローのスケジュールから、放送日、タイトル、映画に関する情報をハッシュ形式で取得します
|
84
84
|
email:
|
85
85
|
- iaia72160@gmail.com
|
86
86
|
executables: []
|
@@ -98,7 +98,8 @@ files:
|
|
98
98
|
- bin/console
|
99
99
|
- bin/setup
|
100
100
|
- lib/afr_load.rb
|
101
|
-
- lib/afr_load/
|
101
|
+
- lib/afr_load/parser.rb
|
102
|
+
- lib/afr_load/tv_program.rb
|
102
103
|
- lib/afr_load/version.rb
|
103
104
|
homepage: ''
|
104
105
|
licenses:
|
@@ -120,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
120
121
|
version: '0'
|
121
122
|
requirements: []
|
122
123
|
rubyforge_project:
|
123
|
-
rubygems_version: 2.
|
124
|
+
rubygems_version: 2.5.2
|
124
125
|
signing_key:
|
125
126
|
specification_version: 4
|
126
127
|
summary: Web Scraping for 午後のロードショー
|
data/lib/afr_load/afr_load.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require "oga"
|
4
|
-
require "httpclient"
|
5
|
-
|
6
|
-
module AfrLoad
|
7
|
-
class AfrLoad
|
8
|
-
attr_reader :programs
|
9
|
-
|
10
|
-
AFR_LOAD_URL = "http://www.tv-tokyo.co.jp/telecine/oa_afr_load/"
|
11
|
-
|
12
|
-
def initialize()
|
13
|
-
@programs = Array.new()
|
14
|
-
end
|
15
|
-
def self.get_schedule()
|
16
|
-
afr = self.new()
|
17
|
-
afr.get_schedule()
|
18
|
-
return afr.programs
|
19
|
-
end
|
20
|
-
def afr_load(file_path)
|
21
|
-
if file.blank?
|
22
|
-
document = get_schedule()
|
23
|
-
else
|
24
|
-
document = get_schedule_from_file(file_path)
|
25
|
-
end
|
26
|
-
parse(document)
|
27
|
-
end
|
28
|
-
def get_schedule()
|
29
|
-
enum = Enumerator.new do |yielder|
|
30
|
-
HTTPClient.get(AFR_LOAD_URL) do |chunk|
|
31
|
-
yielder << chunk.force_encoding("utf-8")
|
32
|
-
end
|
33
|
-
end
|
34
|
-
document = Oga.parse_html(enum)
|
35
|
-
parse(document)
|
36
|
-
return document
|
37
|
-
end
|
38
|
-
def get_schedule_from_file(file_path)
|
39
|
-
handler = File.open(file_path)
|
40
|
-
document = Oga.parse_html(handler)
|
41
|
-
parse(document)
|
42
|
-
return document
|
43
|
-
end
|
44
|
-
def parse(document)
|
45
|
-
document.xpath("//div[@id='contents']/div").each do |contents_child|
|
46
|
-
if contents_child.attribute("id") == nil
|
47
|
-
next
|
48
|
-
end
|
49
|
-
if not contents_child.attribute("id").value =~ /[0-9]{6}/
|
50
|
-
next
|
51
|
-
end
|
52
|
-
# gogo_item
|
53
|
-
contents_child.xpath("//div/div[@class='gogo_item']").each do |movie_node|
|
54
|
-
movie = Hash.new()
|
55
|
-
movie.store("on_air_date", movie_node.at_xpath("span[contains(@class, 'g_day')]").text)
|
56
|
-
data_block = movie_node.at_xpath("div[contains(@class, 'g_data_block')]")
|
57
|
-
|
58
|
-
movie.store("title_ja", data_block.at_xpath("h3/span[@class='jp']").text)
|
59
|
-
movie.store("title", data_block.at_xpath("h3/span[contains(@class, 'en')]").text)
|
60
|
-
|
61
|
-
year_country = data_block.at_xpath("div/span[@class='g_country_year']").text.split("◆")
|
62
|
-
movie.store("released_year", year_country[0])
|
63
|
-
movie.store("released_country", year_country[1])
|
64
|
-
|
65
|
-
movie.store("leading_actor", data_block.xpath("div/div/div[1]/span[2]").text)
|
66
|
-
movie.store("supporting_actor", data_block.xpath("div/div/div[2]/span[2]").text)
|
67
|
-
@programs.push(movie)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|