crawler-movie-imdb 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 80ae48bcce3f954c65f34ff2656db0bdb1e9c7cf365517f3a9a2f4dce474d1bb
4
- data.tar.gz: 226b463a5513e82530a60d2c58a21b60f73321b5914bf15572aa647421aec839
3
+ metadata.gz: a261dfb1bad0cc3e1a762facb6167e2a67a907a592958300dbc4aaa81dfe7e84
4
+ data.tar.gz: b48fa5312d3c8a0cb44117fb91673b5cb35427f9bb21b13f4e3c4dca59091694
5
5
  SHA512:
6
- metadata.gz: 5909892bf56d86b7d775d98f17df57f1c2e67dd3d94d1621e533fff9017ef80da51aa9170313c7f48f29b9810575eacac5418c61a614f0edf7b081a948880ecf
7
- data.tar.gz: 764d83e319530d632ebe8cb0f49f64f51e674a32ef8e43b12d6728051526f1b841bf0dbd0d88472f976b11d6f9493c67bbbb96e814710127a028d042b2007264
6
+ metadata.gz: 268656d0ec96de093ce2deace8b106599ee5a0db4e0353d94f47ac256c1bfecc5acab27213a6055a3e3102c32946eba0bb4e7e0f3808c3cd254faf4cbddf6768
7
+ data.tar.gz: 772e2dff81f70c34df0f6cb6fa9068fefe0f3b9829a7c865927744816d4a42417190ca04aa62964ec305ccf9c36b2de4cb5b59ebf2c5320bab862b0fe900a98b
@@ -30,4 +30,5 @@ Gem::Specification.new do |spec|
30
30
  spec.add_development_dependency 'rake', '~> 10.0'
31
31
  spec.add_runtime_dependency 'faraday', '>= 0.15'
32
32
  spec.add_runtime_dependency 'nokogiri', '>= 1.4'
33
+ spec.add_runtime_dependency 'activesupport', '>= 3.0'
33
34
  end
@@ -2,11 +2,50 @@ require 'crawler/movie'
2
2
  require 'faraday'
3
3
  require 'nokogiri'
4
4
  require 'date'
5
+ require 'active_support/core_ext/object/blank'
6
+ require 'uri'
7
+ require 'time'
5
8
 
6
9
  module Crawler
7
10
  module Movie
8
11
  module Providers
9
12
  module Imdb
13
+ def self.find(id)
14
+ response = Faraday.get("https://www.imdb.com/title/#{id}/")
15
+
16
+ return if !response.success? || !response.body
17
+
18
+ html = Nokogiri::HTML(response.body)
19
+ widget = html.css('#title-overview-widget')
20
+ title = widget.css('.title_wrapper h1 > text()').text.tr("\u00A0", '').strip
21
+ original_title = widget.css('.title_wrapper .originalTitle > text()').text.presence
22
+ genres = (html.css('#titleStoryLine .see-more:contains("Genres:") a') || []).map { |genre| genre.text.strip }
23
+ poster = widget.css('.poster img')
24
+ poster_url = poster.present? ? "#{poster.attr('src').value.split('@').first}@._V1_.jpg" : nil
25
+ overview = widget.css('.summary_text').text.sub(/See full (synopsis|summary) »/, '').strip
26
+ overview = nil if overview == 'Add a Plot »'
27
+ release_date_matches = html.css('#titleDetails .txt-block:contains("Release Date:") > text()').text.match(/(?<release_date>\d{1,2} \w+ \d{4})/)
28
+ release_date = release_date_matches && Date.parse(release_date_matches[:release_date])
29
+ original_languages = (html.css('#titleDetails .txt-block:contains("Language:") a') || []).map do |lang|
30
+ uri = URI(lang.attr('href'))
31
+ params = URI.decode_www_form(uri.query).to_h
32
+ params['primary_language']
33
+ end
34
+
35
+ {
36
+ id: id,
37
+ source: 'internet-movie-database',
38
+ title: title,
39
+ poster_url: poster_url,
40
+ backdrop_url: nil,
41
+ original_languages: original_languages,
42
+ original_titles: original_title,
43
+ genres: genres,
44
+ overview: overview,
45
+ release_date: release_date
46
+ }
47
+ end
48
+
10
49
  def self.search(query)
11
50
  movies = []
12
51
  current_start = 1
@@ -29,11 +68,15 @@ module Crawler
29
68
  overview = item_content.css('.text-muted')[2].text.sub(/See full (synopsis|summary) »/, '').strip
30
69
  overview = nil if overview == 'Add a Plot'
31
70
  year_matches = item_content.css('.lister-item-year').text.match(/\((?<year>\d+)\)/)
71
+ release_date = year_matches && Date.parse("#{year_matches[:year]}-1-1")
32
72
  item_image = movie.css('.lister-item-image img')
33
73
  id = item_image.attr('data-tconst').value
34
74
  poster_url = item_image.attr('loadlate').value.split('@').first
35
75
  poster_url = nil if poster_url.match?(%r{/nopicture/})
36
76
  poster_url += '@._V1_.jpg' if poster_url
77
+ details = find(id)
78
+
79
+ next details if details
37
80
 
38
81
  {
39
82
  id: id,
@@ -45,7 +88,7 @@ module Crawler
45
88
  original_title: nil,
46
89
  genres: genres,
47
90
  overview: overview,
48
- release_date: year_matches && Date.parse("#{year_matches[:year]}-1-1")
91
+ release_date: release_date
49
92
  }
50
93
  end
51
94
 
@@ -2,7 +2,7 @@ module Crawler
2
2
  module Movie
3
3
  module Providers
4
4
  module Imdb
5
- VERSION = '0.1.0'
5
+ VERSION = '0.2.0'
6
6
  end
7
7
  end
8
8
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawler-movie-imdb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan PHILIPPE
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.4'
69
+ - !ruby/object:Gem::Dependency
70
+ name: activesupport
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
69
83
  description: ''
70
84
  email:
71
85
  - jonathan@cinema.paris