movie_spider 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5f678d1d4e3479f2fa48b889671b7bc2d915208c
4
+ data.tar.gz: bdf47ebf540ed0cb9374e46038d5f2913b236dd0
5
+ SHA512:
6
+ metadata.gz: fd34d70d9da3c88b3d38f2692949d2e07162d2a7fe8c860c98a580362ddf9c3670782a185f4dbbaf5f9ccabae310cc1f8af369a7bec01549e30225379a62e508
7
+ data.tar.gz: c75f433426d2f5425259a17d06238e8e3155ad2933d1a1ac37123c6ffd6e36a5a6a0b7cb82d9560112c40b87479d9f8e80a2c04d545ab5856e903d8c94ed1a4f
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in movie_spider.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 hzlu
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # MovieSpider
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'movie_spider'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install movie_spider
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/movie_spider/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,58 @@
1
+ # coding: utf-8
2
+
3
+ module MovieSpider
4
+ class DoubanMovie
5
+ UrlPrefix = "http://api.douban.com/v2/"
6
+ Key = "0c89712b2297db4e259c538167c791ea"
7
+ def get_subject(movie_id)
8
+ path = "movie/subject/#{movie_id}?apikey=#{Key}"
9
+ data = api_get(path)
10
+ end
11
+
12
+ def douban_search_movie(movie_name)
13
+ movie_name = movie_name.gsub(/\[.+\]/, "")
14
+ path = "movie/search?q=#{movie_name}&apikey=#{Key}"
15
+ data = api_get(path)["subjects"]
16
+ end
17
+
18
+ def api_get(path)
19
+ begin
20
+ data = RestClient.get URI.encode(UrlPrefix + path)
21
+ JSON.parse data
22
+ rescue Timeout::Error => e
23
+ ExceptionNotifier::Notifier.background_exception_notification(e).deliver
24
+ Rails.logger.error "获取豆瓣API: #{UrlPrefix}movie/search?q=#{movie_name}超时出错..."
25
+ rescue JSON::JSONError => e
26
+ ExceptionNotifier::Notifier.background_exception_notification(e).deliver
27
+ Rails.logger.error "获取豆瓣API: #{UrlPrefix}movie/suject/#{movie_id}数据JSON.parse出错..."
28
+ end
29
+ end
30
+
31
+ def douban_movie_stills(douban_id)
32
+ fetch_img("http://movie.douban.com/subject/#{douban_id}/photos?type=S&start=0&sortby=vote&size=a&subtype=o")
33
+ end
34
+
35
+ def douban_movie_posters(douban_id)
36
+ fetch_img("http://movie.douban.com/subject/#{douban_id}/photos?type=R&start=0&sortby=vote&size=a&subtype=a")
37
+ end
38
+
39
+ def fetch_img(url)
40
+ begin
41
+ doc = Nokogiri::HTML(open(url))
42
+ rescue
43
+ return []
44
+ end
45
+ as = doc.css("ul li div.cover a")
46
+ photos = []
47
+ as.each do |a|
48
+ photos << a.css("img").first.attributes["src"].value.sub("thumb", "photo") rescue next
49
+ end
50
+ if doc.css("span.next a").first.present?
51
+ url = doc.css("span.next a").first.attributes["href"].value
52
+ photos = photos + fetch_img(url)
53
+ end
54
+ photos.uniq
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,85 @@
1
+ # coding: utf-8
2
+ module MovieSpider
3
+ class MtimeMovie
4
+ #获取官方剧照,三种参数选择img_200 img_235 img_1000,注意235的尺寸可能会fetch不到,期望使用220或1000
5
+ def get_mtime_stills(movie_title, img_size="img_1000")
6
+ parse_mtime(movie_title, "officialstageimage", img_size)
7
+ end
8
+ #获取正式海报
9
+ def get_general_posters(movie_title, img_size="img_1000")
10
+ parse_mtime(movie_title, "generalposter", img_size)
11
+ end
12
+ #获取预告海报
13
+ def get_forecast_posters(movie_title, img_size="img_1000")
14
+ parse_mtime(movie_title, "forecastposter", img_size)
15
+ end
16
+ #获取角色海报
17
+ def get_role_posters(movie_title, img_size="img_1000")
18
+ parse_mtime(movie_title, "roleposter", img_size)
19
+ end
20
+ #全部海报
21
+ def get_mtime_posters(movie_title, img_size="img_1000")
22
+ posters = []
23
+ posters += get_general_posters(movie_title, img_size) + get_forecast_posters(movie_title,img_size) + get_role_posters(movie_title, img_size)
24
+ posters.delete(nil);posters.delete("")
25
+ posters
26
+ end
27
+ #获取时光网电影评分
28
+ def get_mtime_rating(movie_title)
29
+ fetch_result = parse_search(movie_title)
30
+ if fetch_result.is_a?(Hash)
31
+ movie_result = fetch_result['value']['movieResult']
32
+ if movie_result['directMovie'].empty?
33
+ #无法命中电影标题时
34
+ return "影片暂无评分" if movie_result['moreMovies'][0]['movieRating'].nil?
35
+ movie_result['moreMovies'][0]['movieRating']
36
+ else
37
+ #直接查找出电影
38
+ return "影片暂无评分" if movie_result['directMovie']['movieRating'].nil?
39
+ movie_result['directMovie']['movieRating']
40
+ end
41
+ else
42
+ "解析出错,暂无评分"
43
+ end
44
+ end
45
+
46
+ private
47
+ #解析时光网search API返回
48
+ def parse_search(movie_title)
49
+ begin
50
+ result_str = open(URI::encode("http://service.channel.mtime.com/Search.api?Ajax_CallBack=true&Ajax_CallBackType=Mtime.Channel.Services&Ajax_CallBackMethod=GetSearchResult&Ajax_CrossDomain=1&Ajax_CallBackArgument0=#{movie_title}")).read
51
+ rescue
52
+ return ""
53
+ end
54
+ result_str.scan(/{.*}/) {|match| return JSON.parse(match)}
55
+ end
56
+ def get_mtime_movie_id_by_title(movie_title)
57
+ movie_result = {}
58
+ begin
59
+ movie_result = parse_search(movie_title)['value']['movieResult']
60
+ rescue
61
+ return ""
62
+ end
63
+ if movie_result['directMovie'].empty?
64
+ movie_result['moreMovies'][0]['movieId']
65
+ else
66
+ movie_result['directMovie']['movieId']
67
+ end
68
+ end
69
+ #解析时光网JS返回变量
70
+ def parse_mtime(movie_title, type, img_size)
71
+ movie_id = get_mtime_movie_id_by_title(movie_title)
72
+ begin
73
+ html_doc = Nokogiri::HTML(open("http://movie.mtime.com/#{movie_id}/posters_and_images/posters/hot.html"))
74
+ parser = ""
75
+ html_doc.css("body").search("script")[1].text.scan(/{"#{type}".*?}\]}/) {|match| parser = JSON.parse(match) }
76
+ parser[type].map do |item|
77
+ item[img_size]
78
+ end
79
+ rescue
80
+ return []
81
+ end
82
+ end
83
+
84
+ end
85
+ end
@@ -0,0 +1,3 @@
1
+ module MovieSpider
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,56 @@
1
+ require "movie_spider/version"
2
+ require "movie_spider/douban_fetcher"
3
+ require "movie_spider/mtime_fetcher"
4
+ require 'json'
5
+ require 'open-uri'
6
+
7
+ module MovieSpider
8
+ def self.get_title_from_douban_id(douban_id)
9
+ DoubanMovie.new.get_subject(douban_id)["title"]
10
+ end
11
+
12
+ def self.fetch_stills(douban_id,source)
13
+ title = get_title_from_douban_id(douban_id)
14
+ case source
15
+ when /douban/
16
+ stills = DoubanMovie.new.douban_movie_stills(douban_id)
17
+ when /mtime/
18
+ stills = MtimeMovie.new.get_mtime_stills(title)
19
+ when /all/
20
+ stills = DoubanMovie.new.douban_movie_stills(douban_id) + MtimeMovie.new.get_mtime_stills(title)
21
+ end
22
+ stills
23
+ end
24
+
25
+ def self.fetch_posters(douban_id,source)
26
+ title = get_title_from_douban_id(douban_id)
27
+ case source
28
+ when /douban/
29
+ posters = DoubanMovie.new.douban_movie_posters(douban_id)
30
+ when /mtime/
31
+ posters = MtimeMovie.new.get_mtime_posters(title)
32
+ when /all/
33
+ posters = DoubanMovie.new.douban_movie_posters(douban_id) + MtimeMovie.new.get_mtime_posters(title)
34
+ end
35
+ posters
36
+ end
37
+
38
+ def self.fetch_rating(douban_id, source)
39
+ title = get_title_from_douban_id(douban_id)
40
+ if source =~ /douban/
41
+ DoubanMovie.new.get_subject(douban_id)["rating"]["average"]
42
+ elsif source =~ /mtime/
43
+ MtimeMovie.new.get_mtime_rating(title)
44
+ else
45
+ "评分网站参数错误[douban|mtime]"
46
+ end
47
+ end
48
+
49
+ def self.get_subject(douban_id)
50
+ DoubanMovie.new.get_subject(douban_id)
51
+ end
52
+
53
+ def self.douban_search_movie(movie_name)
54
+ DoubanMovie.new.douban_search_movie(movie_name)
55
+ end
56
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'movie_spider/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "movie_spider"
8
+ spec.version = MovieSpider::VERSION
9
+ spec.authors = ["hzlu"]
10
+ spec.email = ["hzlu2010@163.com"]
11
+ spec.summary = %q{fetch movies' infomation}
12
+ spec.description = %q{fetch Mtime and Douban, movie posters, stills, and rating.}
13
+ spec.homepage = "http://www.dan-che.com"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0") + ["lib/movie_spider/douban_fetcher.rb", "lib/movie_spider/mtime_fetcher.rb"]
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "nokogiri", "~> 2.0"
24
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: movie_spider
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - hzlu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '2.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '2.0'
55
+ description: fetch Mtime and Douban, movie posters, stills, and rating.
56
+ email:
57
+ - hzlu2010@163.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - .gitignore
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - lib/movie_spider.rb
68
+ - lib/movie_spider/douban_fetcher.rb
69
+ - lib/movie_spider/mtime_fetcher.rb
70
+ - lib/movie_spider/version.rb
71
+ - movie_spider.gemspec
72
+ homepage: http://www.dan-che.com
73
+ licenses:
74
+ - MIT
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 2.4.5
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: fetch movies' infomation
96
+ test_files: []