movie_crawler 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +6 -0
- data/Gemfile +5 -2
- data/README.md +24 -16
- data/bin/app +17 -6
- data/lib/movie_crawler/crawler.rb +10 -13
- data/lib/movie_crawler/version.rb +1 -1
- data/movie_crawler-0.1.0.gem +0 -0
- data/movie_crawler.gemspec +1 -1
- data/spec/movies_spec.rb +3 -2
- data/spec/rank_spec.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f6ebe9cba57dd662d4f04a78fc0891ad3c1f77f
|
4
|
+
data.tar.gz: 4e63f3330a19ebae78af35c75dc64c201e2ad72d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5843495e2bb5b0a9b5bad3ad57e995484d62e843f46ff0787c773ba9cb3320a028277032ad438c2bc59396a5484492691e06d43769a1981982cb3ec70d897285
|
7
|
+
data.tar.gz: 5e2eb53601932cc76f510770dd4c834f24f0afe1fa76543f1372a3dce82e3dc765e6e2f13a13826f4456b3b4474c043d2c4b8ec4b5849af7db32495c9adefd46
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,34 +1,42 @@
|
|
1
|
-
|
1
|
+
MovieInfo
|
2
2
|
===============
|
3
|
+
[![Build Status](https://api.travis-ci.org/ChenLiZhan/SOA-Crawler.svg?branch=master)]()
|
3
4
|
|
4
|
-
|
5
|
+
home: [https://github.com/ChenLiZhan/SOA-Crawler](https://github.com/ChenLiZhan/SOA-Crawler)
|
6
|
+
|
7
|
+
MovieInfo tries to grabs some information on the [**@movies**](www.atmovies.com.tw/home/)
|
5
8
|
|
6
9
|
## About
|
7
10
|
|
8
|
-
|
11
|
+
If you want check the movie schedule in Taiwan.And choose which to see at the weekend. The gem will provide you with the current films including first, second and recommend movie list. Also allowing you check the description and ranking list in the specific one
|
9
12
|
|
10
13
|
## Usage
|
14
|
+
This gem could be used as a command line utility or called from code
|
11
15
|
|
12
|
-
|
16
|
+
### CLI:
|
17
|
+
movie_crawler
|
13
18
|
|
14
|
-
|
15
|
-
````
|
16
|
-
$ git clone git@github.com:ChenLiZhan/SOA-Crawler.git
|
17
|
-
````
|
19
|
+
### code example:
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
require 'movie_crawler'
|
22
|
+
|
23
|
+
movie_list = MovieInfo.movies('FIRST_ROUND') # 'LATEST' or 'SECOND_ROUND'
|
24
|
+
puts movie_list
|
25
|
+
|
26
|
+
dvd_rank = MovieInfo.dvd_rank
|
27
|
+
puts dvd_rank
|
23
28
|
|
24
29
|
## Format
|
25
30
|
|
26
31
|
*Ex :*
|
27
32
|
|
28
|
-
**title:**
|
33
|
+
**title:** "黑魔女:沉睡魔咒Maleficent"
|
34
|
+
|
35
|
+
**story:** "改編自經典童話《睡美人》,由曾因《阿凡達》、《魔境夢遊》獲得奧斯卡肯定的視覺特效羅伯特史東博格執導,安潔莉娜裘莉與艾兒芬妮分別飾演反派女巫與奧蘿拉公主。"
|
36
|
+
|
37
|
+
**date:** 5/30/14
|
29
38
|
|
30
|
-
**
|
39
|
+
**runtime(minutes):** '96'
|
31
40
|
|
32
|
-
**
|
41
|
+
**trailer:** http://app.atmovies.com.tw/movie/movie.cfm?action=trailer&film_id=fmus01587310
|
33
42
|
|
34
|
-
**link:** http://plan.niceday.tw/trip/view/id/20973
|
data/bin/app
CHANGED
@@ -1,9 +1,20 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
require 'movie_crawler'
|
3
3
|
|
4
|
-
puts
|
5
|
-
puts MovieInfo.
|
6
|
-
|
7
|
-
puts
|
8
|
-
puts MovieInfo.
|
9
|
-
|
4
|
+
puts "\ndvd_rank:"
|
5
|
+
puts MovieCrawler::MovieInfo.dvd_rank
|
6
|
+
|
7
|
+
puts "\nus_weekend_rank:"
|
8
|
+
puts MovieCrawler::MovieInfo.us_weekend
|
9
|
+
|
10
|
+
puts "\ntaipei_weekend_rank:"
|
11
|
+
puts MovieCrawler::MovieInfo.taipei_weekend
|
12
|
+
|
13
|
+
puts "\nlastest_movie_list:"
|
14
|
+
puts MovieCrawler::MovieInfo.movies('LATEST')
|
15
|
+
|
16
|
+
puts "\nfirst_round_movie_list_:"
|
17
|
+
puts MovieCrawler::MovieInfo.movies('FIRST_ROUND')
|
18
|
+
|
19
|
+
puts "\nsecond_round_movie_list:"
|
20
|
+
puts MovieCrawler::MovieInfo.movies('SECOND_ROUND')
|
@@ -6,7 +6,13 @@ require 'iconv'
|
|
6
6
|
module MovieCrawler
|
7
7
|
# get the info from atmovies
|
8
8
|
class MovieInfo
|
9
|
-
|
9
|
+
|
10
|
+
URL_LIST = {
|
11
|
+
'LATEST' => 'http://www.atmovies.com.tw/movie/movie_new.html',
|
12
|
+
'SECOND_ROUND' => 'http://www.atmovies.com.tw/movie/movie_now2-1.html'
|
13
|
+
# first_round is unnessary, the result is the same as latest.
|
14
|
+
}
|
15
|
+
|
10
16
|
MOVIE_BASE_URL = 'http://www.atmovies.com.tw/movie/'
|
11
17
|
WHOLE_MOVIEWS_TITLES = "//div[@class = 'title']/a"
|
12
18
|
WHOLE_MOVIEWS_STORIES = "//div[@class = 'story']"
|
@@ -19,8 +25,6 @@ module MovieCrawler
|
|
19
25
|
REFLECTION_SAID = '&said='
|
20
26
|
REFLECTION_NAME = "//span[@class = 'at21b']"
|
21
27
|
TRAILER_URL = 'http://app.atmovies.com.tw/movie/movie.cfm?action=trailer&film_id='
|
22
|
-
FIRST_ROUND = 'http://www.atmovies.com.tw/movie/movie_now-1.html'
|
23
|
-
SECOND_ROUND = 'http://www.atmovies.com.tw/movie/movie_now2-1.html'
|
24
28
|
ATMOVIES_MAIN_URL = 'http://www.atmovies.com.tw/home/movie_homepage.html'
|
25
29
|
|
26
30
|
# add three rank parser
|
@@ -60,20 +64,13 @@ module MovieCrawler
|
|
60
64
|
|
61
65
|
# switch to different url accordingly
|
62
66
|
def self.movies(category = 'LATEST')
|
63
|
-
|
64
|
-
when 'LATEST'
|
65
|
-
url = LATEST
|
66
|
-
when 'FIRST_ROUND'
|
67
|
-
url = FIRST_ROUND
|
68
|
-
when 'SECOND_ROUND'
|
69
|
-
url = SECOND_ROUND
|
70
|
-
end
|
71
|
-
result = movies_parser(url)
|
67
|
+
result = movies_parser(category)
|
72
68
|
to_yaml(result)
|
73
69
|
end
|
74
70
|
|
75
71
|
# parse the movies acoordingly
|
76
|
-
def self.movies_parser(
|
72
|
+
def self.movies_parser(category)
|
73
|
+
url = URL_LIST[category.upcase]
|
77
74
|
document = open_html(url)
|
78
75
|
titles = get_titles(document)
|
79
76
|
stories = get_stories(document)
|
Binary file
|
data/movie_crawler.gemspec
CHANGED
@@ -4,7 +4,7 @@ Gem::Specification.new do |s|
|
|
4
4
|
s.name = 'movie_crawler'
|
5
5
|
s.version = MovieCrawler::VERSION
|
6
6
|
s.executables << 'app'
|
7
|
-
s.date = '2014-10-
|
7
|
+
s.date = '2014-10-30'
|
8
8
|
s.summary = 'Grab the movies information from the atmovies.com'
|
9
9
|
s.description = 'Grab the movies information from the atmovies.com'
|
10
10
|
s.authors = ['Lee Chen', 'Chen Hung Tu', 'David Yang']
|
data/spec/movies_spec.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'minitest/autorun'
|
2
|
-
require
|
2
|
+
require 'minitest/rg'
|
3
|
+
require File.expand_path('../../lib/movie_crawler', __FILE__)
|
3
4
|
|
4
5
|
LATEST = 'http://www.atmovies.com.tw/movie/movie_new.html'
|
5
6
|
SECOND_ROUND = 'http://www.atmovies.com.tw/movie/movie_now2-1.html'
|
@@ -7,7 +8,7 @@ TRAILER_URL = 'http://app.atmovies.com.tw/movie/movie.cfm?action=trailer&film_id
|
|
7
8
|
|
8
9
|
# generate a random number to test either latest or second_round
|
9
10
|
rand < 0.5 ? url = LATEST : url = SECOND_ROUND
|
10
|
-
sample = MovieInfo.movies_parser(url)
|
11
|
+
sample = MovieCrawler::MovieInfo.movies_parser(url)
|
11
12
|
|
12
13
|
describe 'movies_parser should involve' do
|
13
14
|
|
data/spec/rank_spec.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: movie_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lee Chen
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-10-
|
13
|
+
date: 2014-10-30 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: minitest
|
@@ -75,6 +75,8 @@ executables:
|
|
75
75
|
extensions: []
|
76
76
|
extra_rdoc_files: []
|
77
77
|
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- ".travis.yml"
|
78
80
|
- Gemfile
|
79
81
|
- README.md
|
80
82
|
- Rakefile
|
@@ -82,6 +84,7 @@ files:
|
|
82
84
|
- lib/movie_crawler.rb
|
83
85
|
- lib/movie_crawler/crawler.rb
|
84
86
|
- lib/movie_crawler/version.rb
|
87
|
+
- movie_crawler-0.1.0.gem
|
85
88
|
- movie_crawler.gemspec
|
86
89
|
- spec/movies_spec.rb
|
87
90
|
- spec/rank_spec.rb
|