afr_load 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +13 -13
- data/lib/afr_load/parser.rb +45 -0
- data/lib/afr_load/tv_program.rb +36 -0
- data/lib/afr_load/version.rb +1 -1
- data/lib/afr_load.rb +39 -1
- metadata +6 -5
- data/lib/afr_load/afr_load.rb +0 -73
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf37db32d065fd3c466e56015f1ab244f6deb163
|
4
|
+
data.tar.gz: 764b419232276898922f34f932784b7c8e4c75cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cbd76e0148290f6eeb086d2f73546d879fc960b011edb9121086d10b79cac38439a28bfddcd2e8fee269487c49de24d140b23b37fdf81dbd02e47bd51fe9e50
|
7
|
+
data.tar.gz: 2b0b09e1f490156ec28482e04f0cdf92eb3bb7958c8d91e7b39916e4826355884e26550eb7fd04d9aae37a8d43a485024d7097bdb29025636ff82b32af9ba8fa
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -10,24 +10,24 @@ Add this line to your application's Gemfile:
|
|
10
10
|
gem 'afr_load'
|
11
11
|
```
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
$ bundle
|
16
|
-
|
17
|
-
Or install it yourself as:
|
13
|
+
install it yourself as:
|
18
14
|
|
19
15
|
$ gem install afr_load
|
20
16
|
|
21
17
|
## Usage
|
22
18
|
|
23
|
-
AfrLoad::AfrLoad.get_schedule()
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
programs = AfrLoad::AfrLoad.get_schedule()
|
20
|
+
programs.each do |program|
|
21
|
+
program.show()
|
22
|
+
# program.on_air_date #=> 放送日
|
23
|
+
# program.title_ja #=> 邦題
|
24
|
+
# program.title #=> 原題
|
25
|
+
# program.released_year #=> 公開年
|
26
|
+
# program.released_country #=> 公開国(製作国?)
|
27
|
+
# program.leading_actor #=> 主演俳優
|
28
|
+
# program.leading_actor #=> 主演俳優
|
29
|
+
# program.supporting_actor #=> 助演俳優
|
30
|
+
end
|
31
31
|
|
32
32
|
## Contributing
|
33
33
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module AfrLoad
|
4
|
+
module Parser
|
5
|
+
class << self
|
6
|
+
def parse(document)
|
7
|
+
month_lineup_doc = get_month_lineup(document)
|
8
|
+
month_lineup_doc.map do |lineup|
|
9
|
+
parse_month_lineup(lineup)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_month_lineup(document)
|
14
|
+
document.xpath("//div[@id='contents']/div").select do |contents_child|
|
15
|
+
is_month_lineup(contents_child)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def is_month_lineup(contents_child)
|
20
|
+
return false if contents_child.attribute("id") == nil
|
21
|
+
if contents_child.attribute("id").value =~ /[0-9]{6}/
|
22
|
+
true
|
23
|
+
else
|
24
|
+
false
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_month_lineup(contents_child)
|
29
|
+
contents_child.xpath("//div/div[@class='gogo_item']").map do |movie_node|
|
30
|
+
data_block = movie_node.at_xpath("div[contains(@class, 'g_data_block')]")
|
31
|
+
year_country = data_block.at_xpath("div/span[@class='g_country_year']").text.split("◆")
|
32
|
+
tv_program = TvProgram::TvProgram.new(
|
33
|
+
on_air_date: movie_node.at_xpath("span[contains(@class, 'g_day')]").text,
|
34
|
+
title_ja: data_block.at_xpath("h3/span[@class='jp']").text,
|
35
|
+
title: data_block.at_xpath("h3/span[contains(@class, 'en')]").text ,
|
36
|
+
released_year: year_country[0],
|
37
|
+
released_country: year_country[1],
|
38
|
+
leading_actor: data_block.xpath("div/div/div[1]/span[2]").text,
|
39
|
+
supporting_actor: data_block.xpath("div/div/div[2]/span[2]").text
|
40
|
+
)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module AfrLoad
|
4
|
+
module TvProgram
|
5
|
+
class TvProgram
|
6
|
+
attr_reader :on_air_date,
|
7
|
+
:title_ja, :title,
|
8
|
+
:released_year, :released_country,
|
9
|
+
:leading_actor, :supporting_actor
|
10
|
+
|
11
|
+
def initialize(on_air_date:, title_ja:, title:,
|
12
|
+
released_year:, released_country:,
|
13
|
+
leading_actor:, supporting_actor:)
|
14
|
+
@on_air_date = on_air_date
|
15
|
+
@title_ja = title_ja
|
16
|
+
@title = title
|
17
|
+
@released_year = released_year
|
18
|
+
@released_country = released_country
|
19
|
+
@leading_actor = leading_actor
|
20
|
+
@supporting_actor = supporting_actor
|
21
|
+
end
|
22
|
+
|
23
|
+
def show()
|
24
|
+
puts <<"EOS"
|
25
|
+
on_air_date: #{@on_air_date}, \
|
26
|
+
title_ja: #{@title_ja}, \
|
27
|
+
title: #{@title}, \
|
28
|
+
released_year: #{@released_year}, \
|
29
|
+
released_country: #{@released_country}, \
|
30
|
+
leading_actor: #{@leading_actor}, \
|
31
|
+
supporting_actor: #{@supporting_actor}
|
32
|
+
EOS
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/afr_load/version.rb
CHANGED
data/lib/afr_load.rb
CHANGED
@@ -1,7 +1,45 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require "oga"
|
4
|
+
require "httpclient"
|
5
|
+
|
3
6
|
require "afr_load/version"
|
4
|
-
require "afr_load/
|
7
|
+
require "afr_load/tv_program"
|
8
|
+
require "afr_load/parser.rb"
|
5
9
|
|
6
10
|
module AfrLoad
|
11
|
+
class AfrLoad
|
12
|
+
attr_reader :url, :programs, :document
|
13
|
+
|
14
|
+
AFR_LOAD_URL = "http://www.tv-tokyo.co.jp/telecine/oa_afr_load/"
|
15
|
+
|
16
|
+
def initialize(url = AFR_LOAD_URL)
|
17
|
+
@programs = Array.new()
|
18
|
+
@url = url
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_schedule()
|
22
|
+
@document = self.class.fetch_schedule(@url)
|
23
|
+
@programs = Parser.parse(@document).flatten
|
24
|
+
@programs
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.get_schedule(url = AFR_LOAD_URL)
|
28
|
+
afr = self.new(url)
|
29
|
+
afr.get_schedule()
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.get_schedule_from_file(file_path)
|
33
|
+
handler = File.open(file_path)
|
34
|
+
document = Oga.parse_html(handler)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.fetch_schedule(url = AFR_LOAD_URL)
|
38
|
+
Oga.parse_html(Enumerator.new do |yielder|
|
39
|
+
HTTPClient.get(url) do |chunk|
|
40
|
+
yielder << chunk.force_encoding("utf-8")
|
41
|
+
end
|
42
|
+
end)
|
43
|
+
end
|
44
|
+
end
|
7
45
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: afr_load
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- iaia
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,7 +80,7 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
-
description:
|
83
|
+
description: 午後ローのスケジュールから、放送日、タイトル、映画に関する情報をハッシュ形式で取得します
|
84
84
|
email:
|
85
85
|
- iaia72160@gmail.com
|
86
86
|
executables: []
|
@@ -98,7 +98,8 @@ files:
|
|
98
98
|
- bin/console
|
99
99
|
- bin/setup
|
100
100
|
- lib/afr_load.rb
|
101
|
-
- lib/afr_load/
|
101
|
+
- lib/afr_load/parser.rb
|
102
|
+
- lib/afr_load/tv_program.rb
|
102
103
|
- lib/afr_load/version.rb
|
103
104
|
homepage: ''
|
104
105
|
licenses:
|
@@ -120,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
120
121
|
version: '0'
|
121
122
|
requirements: []
|
122
123
|
rubyforge_project:
|
123
|
-
rubygems_version: 2.
|
124
|
+
rubygems_version: 2.5.2
|
124
125
|
signing_key:
|
125
126
|
specification_version: 4
|
126
127
|
summary: Web Scraping for 午後のロードショー
|
data/lib/afr_load/afr_load.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require "oga"
|
4
|
-
require "httpclient"
|
5
|
-
|
6
|
-
module AfrLoad
|
7
|
-
class AfrLoad
|
8
|
-
attr_reader :programs
|
9
|
-
|
10
|
-
AFR_LOAD_URL = "http://www.tv-tokyo.co.jp/telecine/oa_afr_load/"
|
11
|
-
|
12
|
-
def initialize()
|
13
|
-
@programs = Array.new()
|
14
|
-
end
|
15
|
-
def self.get_schedule()
|
16
|
-
afr = self.new()
|
17
|
-
afr.get_schedule()
|
18
|
-
return afr.programs
|
19
|
-
end
|
20
|
-
def afr_load(file_path)
|
21
|
-
if file.blank?
|
22
|
-
document = get_schedule()
|
23
|
-
else
|
24
|
-
document = get_schedule_from_file(file_path)
|
25
|
-
end
|
26
|
-
parse(document)
|
27
|
-
end
|
28
|
-
def get_schedule()
|
29
|
-
enum = Enumerator.new do |yielder|
|
30
|
-
HTTPClient.get(AFR_LOAD_URL) do |chunk|
|
31
|
-
yielder << chunk.force_encoding("utf-8")
|
32
|
-
end
|
33
|
-
end
|
34
|
-
document = Oga.parse_html(enum)
|
35
|
-
parse(document)
|
36
|
-
return document
|
37
|
-
end
|
38
|
-
def get_schedule_from_file(file_path)
|
39
|
-
handler = File.open(file_path)
|
40
|
-
document = Oga.parse_html(handler)
|
41
|
-
parse(document)
|
42
|
-
return document
|
43
|
-
end
|
44
|
-
def parse(document)
|
45
|
-
document.xpath("//div[@id='contents']/div").each do |contents_child|
|
46
|
-
if contents_child.attribute("id") == nil
|
47
|
-
next
|
48
|
-
end
|
49
|
-
if not contents_child.attribute("id").value =~ /[0-9]{6}/
|
50
|
-
next
|
51
|
-
end
|
52
|
-
# gogo_item
|
53
|
-
contents_child.xpath("//div/div[@class='gogo_item']").each do |movie_node|
|
54
|
-
movie = Hash.new()
|
55
|
-
movie.store("on_air_date", movie_node.at_xpath("span[contains(@class, 'g_day')]").text)
|
56
|
-
data_block = movie_node.at_xpath("div[contains(@class, 'g_data_block')]")
|
57
|
-
|
58
|
-
movie.store("title_ja", data_block.at_xpath("h3/span[@class='jp']").text)
|
59
|
-
movie.store("title", data_block.at_xpath("h3/span[contains(@class, 'en')]").text)
|
60
|
-
|
61
|
-
year_country = data_block.at_xpath("div/span[@class='g_country_year']").text.split("◆")
|
62
|
-
movie.store("released_year", year_country[0])
|
63
|
-
movie.store("released_country", year_country[1])
|
64
|
-
|
65
|
-
movie.store("leading_actor", data_block.xpath("div/div/div[1]/span[2]").text)
|
66
|
-
movie.store("supporting_actor", data_block.xpath("div/div/div[2]/span[2]").text)
|
67
|
-
@programs.push(movie)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|