picturehouse_uk 1.0.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +2 -1
  3. data/CHANGELOG.md +18 -0
  4. data/README.md +7 -10
  5. data/Rakefile +20 -7
  6. data/lib/picturehouse_uk.rb +4 -1
  7. data/lib/picturehouse_uk/cinema.rb +51 -100
  8. data/lib/picturehouse_uk/film.rb +22 -8
  9. data/lib/picturehouse_uk/internal/address_parser.rb +72 -0
  10. data/lib/picturehouse_uk/internal/cinema_page.rb +35 -0
  11. data/lib/picturehouse_uk/internal/film_with_screenings_parser.rb +87 -37
  12. data/lib/picturehouse_uk/internal/title_sanitizer.rb +49 -0
  13. data/lib/picturehouse_uk/internal/website.rb +39 -0
  14. data/lib/picturehouse_uk/screening.rb +63 -19
  15. data/lib/picturehouse_uk/version.rb +2 -2
  16. data/picturehouse_uk.gemspec +11 -11
  17. data/test/fixture_updater.rb +43 -0
  18. data/test/fixtures/address-fragments/duke-of-yorks.html +39 -0
  19. data/test/fixtures/address-fragments/hackney-picturehouse.html +12 -0
  20. data/test/fixtures/cinema/Duke_Of_Yorks.html +4370 -0
  21. data/test/fixtures/cinema/Duke_Of_Yorks/film_last.html +45 -0
  22. data/test/fixtures/cinema/Duke_Of_Yorks/film_second.html +37 -0
  23. data/test/fixtures/{abbeygate-contact-us.html → contact_us/Duke_Of_Yorks.html} +635 -156
  24. data/test/fixtures/{dukes-at-komedia-contact-us.html → contact_us/Dukes_At_Komedia.html} +582 -98
  25. data/test/fixtures/{picturehouses-homepage.html → home.html} +642 -146
  26. data/test/lib/picturehouse_uk/cinema_test.rb +127 -307
  27. data/test/lib/picturehouse_uk/film_test.rb +65 -16
  28. data/test/lib/picturehouse_uk/internal/address_parser_test.rb +55 -0
  29. data/test/lib/picturehouse_uk/internal/cinema_page_test.rb +51 -0
  30. data/test/lib/picturehouse_uk/internal/film_with_screenings_parser_test.rb +44 -151
  31. data/test/lib/picturehouse_uk/internal/title_sanitizer_test.rb +131 -0
  32. data/test/lib/picturehouse_uk/internal/website_test.rb +64 -0
  33. data/test/lib/picturehouse_uk/screening_test.rb +149 -21
  34. data/test/live/integration_test.rb +68 -0
  35. data/test/test_helper.rb +3 -1
  36. metadata +40 -43
  37. data/test/fixtures/dukes-at-komedia-cinema.html +0 -7148
  38. data/test/fixtures/film_node/blue-jasmine-done.html +0 -53
  39. data/test/fixtures/film_node/blue-jasmine-future.html +0 -55
  40. data/test/fixtures/film_node/bolshoi-spartacus.html +0 -26
  41. data/test/fixtures/film_node/captain-phillips-with-silver-screen-and-subtitles.html +0 -103
  42. data/test/fixtures/film_node/fifth-estate-with-big-scream.html +0 -73
  43. data/test/fixtures/film_node/london-film-festival-with-toddler-time.html +0 -46
  44. data/test/fixtures/film_node/met-encore-rusalka-as-live.html +0 -26
  45. data/test/fixtures/film_node/nt-encore-hamlet.html +0 -26
  46. data/test/fixtures/film_node/planes-with-kids-club.html +0 -77
  47. data/test/fixtures/film_node/royal-opera-house-don-quixote.html +0 -26
  48. data/test/fixtures/film_node/rsc-encore-richard-ii.html +0 -28
  49. data/test/fixtures/film_node/rsc-live-richard-ii.html +0 -41
  50. data/test/fixtures/film_node/rsc-live-the-two-gentlemen-of-verona-zero-cert.html +0 -19
  51. data/test/fixtures/hackney-contact-us.html +0 -998
@@ -0,0 +1,35 @@
1
+ module PicturehouseUk
2
+ # @api private
3
+ module Internal
4
+ # Parses a chunk of HTML to derive movie showing data
5
+ class CinemaPage
6
+ # css for a film + screenings
7
+ FILM_CSS = '#events .largelist .item'
8
+
9
+ # @param [Integer] cinema_id cineworld cinema id
10
+ def initialize(cinema_id)
11
+ @cinema_id = cinema_id
12
+ end
13
+
14
+ # break up the page into individual chunks for each film
15
+ # @return [Array<String>] html chunks for a film and it's screenings
16
+ def film_html
17
+ film_nodes.map { |n| n.to_s.gsub(/^\s+/, '') }
18
+ end
19
+
20
+ private
21
+
22
+ def cinema
23
+ @cinema ||= PicturehouseUk::Internal::Website.new.cinema(@cinema_id)
24
+ end
25
+
26
+ def cinema_doc
27
+ @cinema_doc ||= Nokogiri::HTML(cinema)
28
+ end
29
+
30
+ def film_nodes
31
+ cinema_doc.css(FILM_CSS)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,59 +1,109 @@
1
1
  module PicturehouseUk
2
-
3
- # Internal utility classes: Do not use
4
2
  # @api private
5
3
  module Internal
6
-
7
4
  # Parses a chunk of HTML to derive movie showing data
8
5
  class FilmWithScreeningsParser
6
+ # film name css
7
+ FILM_NAME_CSS = '.movielink'
8
+ # showings css
9
+ SCREENING_CSS = 'a[epoch]'
9
10
 
10
- # @param [String] film_html a chunk of html
11
- def initialize(film_html)
12
- @nokogiri_html = Nokogiri::HTML(film_html)
11
+ # @param [Nokogiri::HTML] film_html a chunk of html
12
+ def initialize(html)
13
+ @html = html
13
14
  end
14
15
 
15
16
  # The film name
16
17
  # @return [String]
17
18
  def film_name
18
- name = @nokogiri_html.css('.movielink').children.first.to_s
19
+ TitleSanitizer.new(raw_film_name).sanitized
20
+ end
21
+
22
+ # Showings hashes
23
+ # @return [Array<Hash>]
24
+ def to_a
25
+ return [] unless screenings?
26
+ screening_nodes.map do |node|
27
+ {
28
+ film_name: film_name,
29
+ dimension: dimension
30
+ }.merge(ScreeningParser.new(node).to_hash)
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def dimension
37
+ raw_film_name.match(/3d/i) ? '3d' : '2d'
38
+ end
39
+
40
+ def doc
41
+ @doc ||= Nokogiri::HTML(@html)
42
+ end
43
+
44
+ def raw_film_name
45
+ @raw_film_name ||= doc.css(FILM_NAME_CSS).children.first.to_s
46
+ end
19
47
 
20
- # screening types
21
- name = name.gsub /\s\[(AS LIVE: )?[ACPGU1258]+\]/, '' # remove certificate
22
- name = name.gsub /\s\[NO CERT\]/, '' # remove no certificate
23
- name = name.gsub /\s\[\]/, '' # remove no certificate
24
- name = name.gsub /\s+[23][dD]/, '' # remove 2d or 3d from title
48
+ def screening_nodes
49
+ @screening_nodes ||= doc.css(SCREENING_CSS)
50
+ end
25
51
 
26
- # special screenings
27
- name = name.gsub 'ROH. Live:', 'Royal Opera House:' # fill out Royal Opera House
28
- name = name.gsub 'Met. Encore:', 'Met Opera:' # fill out Met Opera
29
- name = name.gsub 'NT Encore:', 'National Theatre:' # National theatre
30
- name = name.gsub 'RSC Live:', 'Royal Shakespeare Company:' # RSC
31
- name = name.gsub 'RSC Encore:', 'Royal Shakespeare Company:' # RSC
52
+ def screenings?
53
+ !!screening_nodes
54
+ end
55
+ end
32
56
 
33
- name = name.squeeze(' ') # spaces compressed
34
- name = name.gsub /\A\s+/, '' # remove leading spaces
35
- name = name.gsub /\s+\z/, '' # remove trailing spaces
57
+ # parse an individual screening node
58
+ class ScreeningParser
59
+ # @param [Nokogiri::HTML] node a node with a film screening
60
+ def initialize(node)
61
+ @node = node
36
62
  end
37
63
 
38
- # Showings
64
+ # is the screening bookable?
65
+ # @return [Boolean]
66
+ def bookable?
67
+ !!booking_url
68
+ end
69
+
70
+ # the attributes of a single screening
39
71
  # @return [Hash]
40
72
  # @example
41
- # {
42
- # "2D" => [Time.utc, Time.utc]
43
- # }
44
- def showings
45
- @nokogiri_html.css('a[epoch]').inject({}) do |result, link|
46
- key = case link['class']
47
- when /big_scream/ then 'baby'
48
- when /kids_club|toddler_time/ then 'kids'
49
- when /silver_screen/ then 'silver'
50
- when /subtitled_cinema/ then 'subtitled'
51
- else '2d'
52
- end
53
- # this is a hack because Time.at() only uses local time
54
- time = Time.utc(1970)+link['epoch'].to_i
73
+ # Cineworld::Internal::ScreeningParser.new(html).to_hash
74
+ # => {
75
+ # booking_url: 'http://...',
76
+ # time: <Time>,
77
+ # variant: ['imax']
78
+ # }
79
+ def to_hash
80
+ {
81
+ booking_url: "http://www.picturehouses.co.uk#{booking_url}",
82
+ time: time,
83
+ variant: variant
84
+ }
85
+ end
55
86
 
56
- result.merge(key => (result[key] || []) << time)
87
+ private
88
+
89
+ def booking_url
90
+ @booking_url ||= @node['html']
91
+ end
92
+
93
+ def time
94
+ @time ||= Time.utc(1970) + @node['epoch'].to_i
95
+ end
96
+
97
+ def variant
98
+ @variant ||= begin
99
+ case @node['class']
100
+ when /big_scream/ then ['baby']
101
+ when /kids_club|toddler_time/ then ['kids']
102
+ when /silver_screen/ then ['silver']
103
+ when /subtitled_cinema/ then ['subtitled']
104
+ else
105
+ []
106
+ end
57
107
  end
58
108
  end
59
109
  end
@@ -0,0 +1,49 @@
1
+ module PicturehouseUk
2
+ # @api private
3
+ module Internal
4
+ # Sanitize and standardize film titles
5
+ class TitleSanitizer
6
+ # strings and regex to be removed
7
+ REMOVE = [
8
+ /\s\[(AS LIVE: )?[ACPGU1258]+\]/, # regular certificate
9
+ /\s+[23][dD]/, # 2d or 3d from title
10
+ /\s\[NO CERT\]/, # no certificate
11
+ /\s\[\]/, # blank certificate
12
+ /ourscreen\: /, # ourscreen
13
+ /\s\(Re(\: \d{0,4})?\)/i, # Re-release
14
+ /\s\[CERT TBC\]/, # certificate TBC
15
+ ]
16
+
17
+ # regexes and their replacements
18
+ REPLACE = {
19
+ /Met\.? Encore: (.*)/ => 'Met Opera:',
20
+ /Met\.? Opera: (.*)/ => 'Met Opera: ',
21
+ /NT Encore: (.*)/ => 'National Theatre:',
22
+ /NT Live: (.*)/ => 'National Theatre:',
23
+ /ROH\.? Live: (.*)/ => 'Royal Opera House:',
24
+ /RSC\.? Live: (.*)/ => 'Royal Shakespeare Company:',
25
+ /RSC\.? Encore: (.*)/ => 'Royal Shakespeare Company:'
26
+ }
27
+
28
+ # @param [String] title a film title
29
+ def initialize(title)
30
+ @title = title
31
+ end
32
+
33
+ # sanitized and standardized title
34
+ # @return [String] title
35
+ def sanitized
36
+ @sanitzed ||= begin
37
+ sanitized = @title
38
+ REMOVE.each do |pattern|
39
+ sanitized.gsub! pattern, ''
40
+ end
41
+ REPLACE.each do |pattern, prefix|
42
+ sanitized.gsub!(pattern) { |_| prefix + $1 }
43
+ end
44
+ sanitized.squeeze(' ').strip
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,39 @@
1
+ require 'open-uri'
2
+
3
+ module PicturehouseUk
4
+ # @api private
5
+ module Internal
6
+ # fetches pages from the picturehouse.co.uk website
7
+ class Website
8
+ # get the cinema page with showings for passed id
9
+ # @return [String]
10
+ def cinema(id)
11
+ get("cinema/#{id}/")
12
+ end
13
+
14
+ # get the cinema contact information page for passed id
15
+ # @return [String]
16
+ def contact_us(id)
17
+ get("cinema/#{id}/Hires_Info/Contact_Us/")
18
+ end
19
+
20
+ # get the home page
21
+ # @return [String]
22
+ def home
23
+ get(nil)
24
+ end
25
+
26
+ # get the cinema page containing all upcoming films and screenings
27
+ # @return [String]
28
+ def whatson(id)
29
+ get("whatson?cinema=#{id}")
30
+ end
31
+
32
+ private
33
+
34
+ def get(path)
35
+ open("http://www.picturehouses.co.uk/#{path}").read
36
+ end
37
+ end
38
+ end
39
+ end
@@ -1,36 +1,80 @@
1
1
  module PicturehouseUk
2
-
3
2
  # A single screening of a film on the Picturehouse UK website
4
3
  class Screening
5
-
4
+ # @return [String] the booking URL on the cinema website
5
+ attr_reader :booking_url
6
6
  # @return [String] the cinema name
7
7
  attr_reader :cinema_name
8
+ # @return [String] 2d or 3d
9
+ attr_reader :dimension
8
10
  # @return [String] the film name
9
11
  attr_reader :film_name
10
- # @return [Time] the UTC time of the screening
11
- attr_reader :when
12
- # @return [String] the type of screening (2D, 3D, IMAX...)
13
- attr_reader :variant
14
12
 
15
- # @param [String] film_name the film name
16
- # @param [String] cinema_name the cinema name
17
- # @param [Time] time datetime of the screening (UTC preferred)
18
- # @param [String] variant the type of showing (e.g. 3d/baby/live)
19
- def initialize(film_name, cinema_name, time, variant=nil)
20
- @cinema_name, @film_name, @variant = cinema_name, film_name, variant
21
- @when = time.utc? ? time : TZInfo::Timezone.get('Europe/London').local_to_utc(time)
13
+ # @param [Hash] options
14
+ def initialize(options)
15
+ @booking_url = options.fetch(:booking_url, nil)
16
+ @cinema_name = options.fetch(:cinema_name)
17
+ @cinema_id = options.fetch(:cinema_id)
18
+ @dimension = options.fetch(:dimension, '2d')
19
+ @film_name = options.fetch(:film_name)
20
+ @time = options.fetch(:time)
21
+ @variant = options.fetch(:variant, [])
22
+ end
23
+
24
+ # Screenings at a single cinema
25
+ # @param [String] cinema_id the id of the cinema
26
+ # @return [Array<PicturehouseUk::Screening>]
27
+ def self.at(cinema_id)
28
+ cinema_page(cinema_id).film_html.map do |html|
29
+ create_for_single_film(html, cinema_id)
30
+ end.flatten
31
+ end
32
+
33
+ # The UTC time of the screening
34
+ # @return [Time]
35
+ def showing_at
36
+ @showing_at ||= begin
37
+ if @time.utc?
38
+ @time
39
+ else
40
+ TZInfo::Timezone.get('Europe/London').local_to_utc(@time)
41
+ end
42
+ end
22
43
  end
23
44
 
24
45
  # The date of the screening
25
46
  # @return [Date]
26
- def date
27
- @when.to_date
47
+ def showing_on
48
+ showing_at.to_date
49
+ end
50
+
51
+ # The kinds of screening
52
+ # @return <Array[String]>
53
+ def variant
54
+ @variant.map(&:downcase).sort
55
+ end
56
+
57
+ private
58
+
59
+ def self.cinema_hash(cinema_id)
60
+ {
61
+ cinema_id: cinema_id,
62
+ cinema_name: PicturehouseUk::Cinema.find(cinema_id).name
63
+ }
64
+ end
65
+
66
+ def self.cinema_page(cinema_id)
67
+ PicturehouseUk::Internal::CinemaPage.new(cinema_id)
68
+ end
69
+
70
+ def self.create_for_single_film(html, cinema_id)
71
+ screenings_parser(html).to_a.map do |attributes|
72
+ new cinema_hash(cinema_id).merge(attributes)
73
+ end
28
74
  end
29
75
 
30
- # @deprecated Please use {#variant} instead, I can't spell
31
- def varient
32
- warn "Please use #variant instead, I can't spell"
33
- variant
76
+ def self.screenings_parser(html)
77
+ PicturehouseUk::Internal::FilmWithScreeningsParser.new(html)
34
78
  end
35
79
  end
36
80
  end
@@ -1,6 +1,6 @@
1
1
  # Ruby interface for http://www.picturehouses.co.uk
2
- # @version 1.0.2
2
+ # @version 2.0.0
3
3
  module PicturehouseUk
4
4
  # Gem version
5
- VERSION = "1.0.2"
5
+ VERSION = '2.0.0'
6
6
  end
@@ -4,25 +4,25 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'picturehouse_uk/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "picturehouse_uk"
7
+ spec.name = 'picturehouse_uk'
8
8
  spec.version = PicturehouseUk::VERSION
9
- spec.authors = ["Andy Croll"]
10
- spec.email = ["andy@goodscary.com"]
11
- spec.description = %q{An API to pull movie information from the picturehouse.co.uk website}
12
- spec.summary = %q{It's a scraper, but a nice one}
13
- spec.homepage = ""
14
- spec.license = "MIT"
9
+ spec.authors = ['Andy Croll']
10
+ spec.email = ['andy@goodscary.com']
11
+ spec.description = 'An API to pull movie information from the picturehouse.co.uk website'
12
+ spec.summary = "It's a scraper, but a nice one"
13
+ spec.homepage = ''
14
+ spec.license = 'MIT'
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
17
17
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
- spec.require_paths = ["lib"]
19
+ spec.require_paths = ['lib']
20
20
 
21
- spec.add_development_dependency "bundler", "~> 1.3"
22
- spec.add_development_dependency "rake"
21
+ spec.add_development_dependency 'bundler', '~> 1.3'
22
+ spec.add_development_dependency 'minitest-reporters'
23
+ spec.add_development_dependency 'rake'
23
24
  spec.add_development_dependency 'webmock'
24
25
 
25
- spec.add_runtime_dependency 'httparty'
26
26
  spec.add_runtime_dependency 'nokogiri'
27
27
  spec.add_runtime_dependency 'tzinfo'
28
28
  spec.add_runtime_dependency 'tzinfo-data'
@@ -0,0 +1,43 @@
1
+ require File.expand_path('../../lib/picturehouse_uk.rb', __FILE__)
2
+
3
+ def fixture(name)
4
+ File.expand_path("../fixtures/#{name}.html", __FILE__)
5
+ end
6
+
7
+ File.open(fixture('home'), 'w') do |file|
8
+ puts '* Homepage'
9
+ file.write PicturehouseUk::Internal::Website.new.home
10
+ end
11
+
12
+ # DUKE OF YORKS
13
+
14
+ File.open(fixture('cinema/Duke_Of_Yorks'), 'w') do |file|
15
+ puts '* Duke of Yorks'
16
+ file.write PicturehouseUk::Internal::Website.new.cinema('Duke_Of_Yorks')
17
+ end
18
+
19
+ File.open(fixture('contact_us/Duke_Of_Yorks'), 'w') do |file|
20
+ puts '* Duke of Yorks Information'
21
+ file.write PicturehouseUk::Internal::Website.new.contact_us('Duke_Of_Yorks')
22
+ end
23
+
24
+ # KOMEDIA
25
+
26
+ File.open(fixture('contact_us/Dukes_At_Komedia'), 'w') do |file|
27
+ puts '* Dukes at Komedia Information'
28
+ file.write PicturehouseUk::Internal::Website.new.contact_us('Dukes_At_Komedia')
29
+ end
30
+
31
+ # FILMS
32
+
33
+ page = PicturehouseUk::Internal::CinemaPage.new('Duke_Of_Yorks')
34
+
35
+ File.open(fixture('cinema/Duke_Of_Yorks/film_second'), 'w') do |file|
36
+ puts '* Duke of Yorks Second Film'
37
+ file.write page.film_html[1]
38
+ end
39
+
40
+ File.open(fixture('cinema/Duke_Of_Yorks/film_last'), 'w') do |file|
41
+ puts '* Duke of Yorks Last Film'
42
+ file.write page.film_html[-1]
43
+ end