picturehouse_uk 2.0.5 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -6
- data/README.md +9 -1
- data/lib/picturehouse_uk.rb +2 -3
- data/lib/picturehouse_uk/cinema.rb +24 -12
- data/lib/picturehouse_uk/film.rb +3 -5
- data/lib/picturehouse_uk/internal/parser/address.rb +37 -0
- data/lib/picturehouse_uk/internal/parser/screenings.rb +117 -0
- data/lib/picturehouse_uk/internal/title_sanitizer.rb +2 -0
- data/lib/picturehouse_uk/internal/website.rb +4 -10
- data/lib/picturehouse_uk/screening.rb +5 -15
- data/lib/picturehouse_uk/version.rb +2 -2
- data/test/fixture_updater.rb +16 -13
- data/test/fixtures/cinema/Duke_Of_Yorks.html +2048 -4341
- data/test/fixtures/cinema/Dukes_At_Komedia.html +3780 -0
- data/test/fixtures/cinema/Phoenix_Picturehouse.html +3465 -0
- data/test/fixtures/home.html +487 -1932
- data/test/fixtures/info/Duke_Of_Yorks.html +450 -0
- data/test/fixtures/info/Dukes_At_Komedia.html +439 -0
- data/test/fixtures/info/Phoenix_Picturehouse.html +458 -0
- data/test/lib/picturehouse_uk/cinema_test.rb +53 -16
- data/test/lib/picturehouse_uk/internal/parser/screenings_test.rb +45 -0
- data/test/lib/picturehouse_uk/internal/title_sanitizer_test.rb +16 -0
- data/test/lib/picturehouse_uk/internal/website_test.rb +6 -6
- data/test/lib/picturehouse_uk/screening_test.rb +9 -1
- metadata +16 -25
- data/lib/picturehouse_uk/internal/address_parser.rb +0 -72
- data/lib/picturehouse_uk/internal/cinema_page.rb +0 -35
- data/lib/picturehouse_uk/internal/film_with_screenings_parser.rb +0 -116
- data/test/fixtures/address-fragments/duke-of-yorks.html +0 -39
- data/test/fixtures/address-fragments/hackney-picturehouse.html +0 -12
- data/test/fixtures/cinema/Duke_Of_Yorks/film_last.html +0 -45
- data/test/fixtures/cinema/Duke_Of_Yorks/film_second.html +0 -37
- data/test/fixtures/cinema/York_Picturehouse/basement.html +0 -19
- data/test/fixtures/contact_us/Duke_Of_Yorks.html +0 -1505
- data/test/fixtures/contact_us/Dukes_At_Komedia.html +0 -1503
- data/test/lib/picturehouse_uk/internal/address_parser_test.rb +0 -55
- data/test/lib/picturehouse_uk/internal/cinema_page_test.rb +0 -51
- data/test/lib/picturehouse_uk/internal/film_with_screenings_parser_test.rb +0 -95
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 400c5d45b2f84ec815b86ff05d36ba60864aef9e
|
4
|
+
data.tar.gz: 682bc113b527cf7f3ad011a65c048118658a58c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59b2fb923ff3edea3a4d837cd3615f08e995c0b63f06e3ba114684d32c691c630b16e1fdfc692dbe71047570c0f783b1d6ee11d639bdbf63af755cbdfe60f0fb
|
7
|
+
data.tar.gz: ad018b472fbc9680f36aafc708be483f61572e986a1e3477068a1f0d17d71847285863e49db6af0512a3e1c6988ebe1238f411ed5ce8704fd75931de3746b1ae
|
data/CHANGELOG.md
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
# Change Log
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
|
4
|
-
##
|
4
|
+
## [3.0.0] - 2015-02-15
|
5
|
+
|
6
|
+
### Fixed
|
7
|
+
- the whole damn parsing, we now look at picturehouses.com
|
8
|
+
|
9
|
+
## [2.0.5] - 2015-01-03
|
5
10
|
|
6
11
|
### Added
|
7
12
|
- parents and babies screening title
|
8
13
|
- subtitled screening
|
9
14
|
|
10
|
-
##
|
15
|
+
## [2.0.4] - 2015-01-01
|
11
16
|
|
12
17
|
### Added
|
13
18
|
- remove imax from film title
|
@@ -15,14 +20,14 @@ All notable changes to this project will be documented in this file.
|
|
15
20
|
- remove 'toddler time' from film title
|
16
21
|
- remove 'singalong' from film title
|
17
22
|
|
18
|
-
##
|
23
|
+
## [2.0.3] - 2014-10-19
|
19
24
|
|
20
25
|
### Added
|
21
26
|
- Better Q&A/panel removal from titles
|
22
27
|
- deal with various title edge cases
|
23
28
|
- remove years from titles
|
24
29
|
|
25
|
-
##
|
30
|
+
## [2.0.2] - 2014-10-19
|
26
31
|
|
27
32
|
### Added
|
28
33
|
- No screenings in York for 'basement events'
|
@@ -31,7 +36,7 @@ All notable changes to this project will be documented in this file.
|
|
31
36
|
- deal with ROH Encore
|
32
37
|
- better spacing on 'event' titles
|
33
38
|
|
34
|
-
##
|
39
|
+
## [2.0.1] - 2014-10-19
|
35
40
|
|
36
41
|
### Added
|
37
42
|
- Better parsing of ampersands in film titles
|
@@ -41,7 +46,7 @@ All notable changes to this project will be documented in this file.
|
|
41
46
|
- Better parsing of rogue screening types
|
42
47
|
- Better parsing of free screening types
|
43
48
|
|
44
|
-
## 2.0.0 - 2014-10-16
|
49
|
+
## [2.0.0] - 2014-10-16
|
45
50
|
|
46
51
|
### Added
|
47
52
|
- Live testing rake task
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# PicturehouseUk
|
2
2
|
|
3
|
-
A simple gem to parse the [Picturehouse Cinemas UK website](http://picturehouses.
|
3
|
+
A simple gem to parse the [Picturehouse Cinemas UK website](http://picturehouses.com) and spit out useful formatted info.
|
4
4
|
|
5
5
|
[![Gem Version](https://badge.fury.io/rb/picturehouse_uk.png)](http://badge.fury.io/rb/picturehouse_uk)
|
6
6
|
[![Code Climate](https://codeclimate.com/github/andycroll/picturehouse_uk.png)](https://codeclimate.com/github/andycroll/picturehouse_uk)
|
@@ -53,6 +53,14 @@ cinema.screenings
|
|
53
53
|
|
54
54
|
```
|
55
55
|
|
56
|
+
## Running Tests
|
57
|
+
|
58
|
+
To redownload the test URLs run
|
59
|
+
|
60
|
+
```
|
61
|
+
ruby test/fixture_updater.rb
|
62
|
+
```
|
63
|
+
|
56
64
|
## Contributing
|
57
65
|
|
58
66
|
1. Fork it
|
data/lib/picturehouse_uk.rb
CHANGED
@@ -4,9 +4,8 @@ require 'tzinfo/data'
|
|
4
4
|
|
5
5
|
require_relative './picturehouse_uk/version'
|
6
6
|
|
7
|
-
require_relative './picturehouse_uk/internal/
|
8
|
-
require_relative './picturehouse_uk/internal/
|
9
|
-
require_relative './picturehouse_uk/internal/film_with_screenings_parser'
|
7
|
+
require_relative './picturehouse_uk/internal/parser/address'
|
8
|
+
require_relative './picturehouse_uk/internal/parser/screenings'
|
10
9
|
require_relative './picturehouse_uk/internal/title_sanitizer'
|
11
10
|
require_relative './picturehouse_uk/internal/website'
|
12
11
|
|
@@ -2,9 +2,9 @@ module PicturehouseUk
|
|
2
2
|
# The object representing a cinema on the Picturehouse UK website
|
3
3
|
class Cinema
|
4
4
|
# address css
|
5
|
-
ADDRESS_CSS = '.
|
5
|
+
ADDRESS_CSS = '.static-content #contact-us + p:first'
|
6
6
|
# cinema link css
|
7
|
-
CINEMA_LINKS_CSS = '
|
7
|
+
CINEMA_LINKS_CSS = '.footer .col-sm-3 option + option'
|
8
8
|
|
9
9
|
# @return [String] the brand of the cinema
|
10
10
|
attr_reader :brand
|
@@ -25,7 +25,7 @@ module PicturehouseUk
|
|
25
25
|
@name = options[:name]
|
26
26
|
@slug = @name.downcase.gsub(/[^0-9a-z ]/, '').gsub(/\s+/, '-')
|
27
27
|
@url = if options[:url][0] == '/'
|
28
|
-
"http://www.picturehouses.
|
28
|
+
"http://www.picturehouses.com#{options[:url]}"
|
29
29
|
else
|
30
30
|
options[:url]
|
31
31
|
end
|
@@ -59,6 +59,7 @@ module PicturehouseUk
|
|
59
59
|
# street_address: '44-47 Gardner Street',
|
60
60
|
# extended_address: 'North Laine',
|
61
61
|
# locality: 'Brighton',
|
62
|
+
# region: 'East Sussex',
|
62
63
|
# postal_code: 'BN1 1UN',
|
63
64
|
# country_name: 'United Kingdom'
|
64
65
|
# }
|
@@ -121,6 +122,17 @@ module PicturehouseUk
|
|
121
122
|
address[:postal_code]
|
122
123
|
end
|
123
124
|
|
125
|
+
# The region (county) of the cinema
|
126
|
+
# @return [String]
|
127
|
+
# @example
|
128
|
+
# cinema = PicturehouseUk::Cinema.find('Dukes_At_Komedia')
|
129
|
+
# cinema.region
|
130
|
+
# #=> 'East Sussex'
|
131
|
+
# @note Uses the standard method naming as at http://microformats.org/wiki/adr
|
132
|
+
def region
|
133
|
+
address[:region]
|
134
|
+
end
|
135
|
+
|
124
136
|
# All planned screenings
|
125
137
|
# @return [Array<PicturehouseUk::Screening>]
|
126
138
|
# @example
|
@@ -157,20 +169,20 @@ module PicturehouseUk
|
|
157
169
|
end
|
158
170
|
|
159
171
|
def self.new_from_link(link)
|
160
|
-
url =
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
172
|
+
url = link.get_attribute('data-href')
|
173
|
+
name = link.children.first.to_s.split(' — ')[1]
|
174
|
+
|
175
|
+
new(id: url.match(%r{/cinema/(.+)$})[1],
|
176
|
+
name: name,
|
177
|
+
url: url)
|
166
178
|
end
|
167
179
|
|
168
180
|
def address_node
|
169
|
-
@address_node ||=
|
181
|
+
@address_node ||= info_doc.css(ADDRESS_CSS)
|
170
182
|
end
|
171
183
|
|
172
|
-
def
|
173
|
-
@
|
184
|
+
def info_doc
|
185
|
+
@info_doc ||= Nokogiri::HTML(self.class.website.info(id))
|
174
186
|
end
|
175
187
|
end
|
176
188
|
end
|
data/lib/picturehouse_uk/film.rb
CHANGED
@@ -19,9 +19,7 @@ module PicturehouseUk
|
|
19
19
|
# @param [String] cinema_id the id of the cinema
|
20
20
|
# @return [Array<PicturehouseUk::Film>]
|
21
21
|
def self.at(cinema_id)
|
22
|
-
|
23
|
-
new(Internal::FilmWithScreeningsParser.new(html).film_name)
|
24
|
-
end.uniq
|
22
|
+
screenings(cinema_id).map { |hash| new hash[:film_name] }.uniq
|
25
23
|
end
|
26
24
|
|
27
25
|
# Allows sort on objects
|
@@ -54,8 +52,8 @@ module PicturehouseUk
|
|
54
52
|
|
55
53
|
private
|
56
54
|
|
57
|
-
def self.
|
58
|
-
|
55
|
+
def self.screenings(cinema_id)
|
56
|
+
PicturehouseUk::Internal::Parser::Screenings.new(cinema_id).to_a
|
59
57
|
end
|
60
58
|
end
|
61
59
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module PicturehouseUk
|
2
|
+
# @api private
|
3
|
+
module Internal
|
4
|
+
# Parses a chunk of HTML to derive address
|
5
|
+
class AddressParser
|
6
|
+
# @param [String] node the HTML to parse into an address
|
7
|
+
# @return [PicturehouseUk::Internal::AddressParser]
|
8
|
+
def initialize(html)
|
9
|
+
@html = html
|
10
|
+
end
|
11
|
+
|
12
|
+
# @return [Hash] contains :street_address, :extended_address, :locality,
|
13
|
+
# :postal_code, :country
|
14
|
+
# @note Uses the address naming from http://microformats.org/wiki/adr
|
15
|
+
def address
|
16
|
+
{
|
17
|
+
street_address: array[1],
|
18
|
+
extended_address: array.length > 5 ? array[2] : nil,
|
19
|
+
locality: town,
|
20
|
+
region: array[-2] == town ? nil : array[-2],
|
21
|
+
postal_code: array[-1],
|
22
|
+
country: 'United Kingdom'
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def town
|
29
|
+
@town ||= array[0].split(', ')[-1]
|
30
|
+
end
|
31
|
+
|
32
|
+
def array
|
33
|
+
@array ||= @html.gsub(/\<.?p.?\>/, '').split('<br>')
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
module PicturehouseUk
|
2
|
+
# @api private
|
3
|
+
module Internal
|
4
|
+
# @api private
|
5
|
+
module Parser
|
6
|
+
# Parses screenings page into an array of hashes for an individual cinema
|
7
|
+
class Screenings < Struct.new(:cinema_id)
|
8
|
+
# css for a day of films & screenings
|
9
|
+
LISTINGS = '#this-week .listings > li, #further-ahead .listings > li'
|
10
|
+
|
11
|
+
# parse the cinema page into an array of screenings attributes
|
12
|
+
# @return [Array<Hash>]
|
13
|
+
def to_a
|
14
|
+
date = nil
|
15
|
+
doc.css(LISTINGS).each_with_object([]) do |node, result|
|
16
|
+
if node.attribute('title') != nil
|
17
|
+
date = Date.parse(node.attribute('title').value)
|
18
|
+
else
|
19
|
+
result << FilmWithShowtimes.new(node, date).to_a
|
20
|
+
end
|
21
|
+
end.flatten
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def doc
|
27
|
+
@doc ||= Nokogiri::HTML(page)
|
28
|
+
end
|
29
|
+
|
30
|
+
def page
|
31
|
+
@page ||= PicturehouseUk::Internal::Website.new.cinema(cinema_id)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class FilmWithShowtimes < Struct.new(:node, :date)
|
37
|
+
# film name css
|
38
|
+
NAME = '.top-mg-sm a'
|
39
|
+
# variants css
|
40
|
+
VARIANTS = '.film-times .col-xs-10'
|
41
|
+
|
42
|
+
# The film name
|
43
|
+
# @return [String]
|
44
|
+
def name
|
45
|
+
TitleSanitizer.new(raw_name).sanitized
|
46
|
+
end
|
47
|
+
|
48
|
+
# Showings hashes
|
49
|
+
# @return [Array<Hash>]
|
50
|
+
def to_a
|
51
|
+
Array(node.css(VARIANTS)).flat_map do |variant|
|
52
|
+
Variant.new(variant, date).to_a.map do |hash|
|
53
|
+
{
|
54
|
+
film_name: name,
|
55
|
+
dimension: dimension
|
56
|
+
}.merge(hash)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def dimension
|
64
|
+
raw_name.match(/3d/i) ? '3d' : '2d'
|
65
|
+
end
|
66
|
+
|
67
|
+
def raw_name
|
68
|
+
@raw_name ||= node.css(NAME).children.first.to_s
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
class Variant < Struct.new(:node, :date)
|
73
|
+
SHOWTIMES = 'a'
|
74
|
+
VARIENT = '.film-type-desc'
|
75
|
+
|
76
|
+
def to_a
|
77
|
+
node.css(SHOWTIMES).map do |node|
|
78
|
+
{ variant: variant }.merge(Showtime.new(node, date).to_hash)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def variant
|
85
|
+
@variant ||= nil
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# parse an individual screening node
|
90
|
+
class Showtime < Struct.new(:node, :date)
|
91
|
+
# the attributes of a single screening
|
92
|
+
# @return [Hash]
|
93
|
+
# @example
|
94
|
+
# PicturehouseUk::Internal::ScreeningParser.new(html).to_hash
|
95
|
+
# => {
|
96
|
+
# booking_url: 'http://...',
|
97
|
+
# time: <Time>,
|
98
|
+
# variant: ['imax']
|
99
|
+
# }
|
100
|
+
def to_hash
|
101
|
+
{
|
102
|
+
booking_url: node['href'],
|
103
|
+
time: time
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def time
|
110
|
+
@time ||= begin
|
111
|
+
hour, min = node.text.split('.').map(&:to_i)
|
112
|
+
date.to_time + (hour * 60 + min) * 60
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -9,6 +9,7 @@ module PicturehouseUk
|
|
9
9
|
/\s+\(?[23][dD]\)?/, # 2d or 3d from title
|
10
10
|
/\bIMAX\b/i, # imax from title
|
11
11
|
/\s\[NO CERT\]/, # no certificate
|
12
|
+
/\s\[TBC\]/, # tbc
|
12
13
|
/\s\[\]/, # blank certificate
|
13
14
|
/ourscreen\: /, # ourscreen
|
14
15
|
/\s\(Re(\: \d{0,4})?\)/i, # Re-release
|
@@ -32,6 +33,7 @@ module PicturehouseUk
|
|
32
33
|
/\bsingalong\b/i, # singalong
|
33
34
|
/\s+\-?\s*Parents \& Babies/i, # parents and babies
|
34
35
|
/Subtitled\:*\s*/i, # subtitled
|
36
|
+
/\(?live\)?\z/i, # live
|
35
37
|
]
|
36
38
|
|
37
39
|
# regexes and their replacements
|
@@ -8,13 +8,13 @@ module PicturehouseUk
|
|
8
8
|
# get the cinema page with showings for passed id
|
9
9
|
# @return [String]
|
10
10
|
def cinema(id)
|
11
|
-
get("cinema/#{id}
|
11
|
+
get("cinema/#{id}")
|
12
12
|
end
|
13
13
|
|
14
14
|
# get the cinema contact information page for passed id
|
15
15
|
# @return [String]
|
16
|
-
def
|
17
|
-
get("cinema/#{id}
|
16
|
+
def info(id)
|
17
|
+
get("cinema/info/#{id}")
|
18
18
|
end
|
19
19
|
|
20
20
|
# get the home page
|
@@ -23,16 +23,10 @@ module PicturehouseUk
|
|
23
23
|
get(nil)
|
24
24
|
end
|
25
25
|
|
26
|
-
# get the cinema page containing all upcoming films and screenings
|
27
|
-
# @return [String]
|
28
|
-
def whatson(id)
|
29
|
-
get("whatson?cinema=#{id}")
|
30
|
-
end
|
31
|
-
|
32
26
|
private
|
33
27
|
|
34
28
|
def get(path)
|
35
|
-
open("http://www.picturehouses.
|
29
|
+
open("http://www.picturehouses.com/#{path}").read
|
36
30
|
end
|
37
31
|
end
|
38
32
|
end
|
@@ -25,9 +25,9 @@ module PicturehouseUk
|
|
25
25
|
# @param [String] cinema_id the id of the cinema
|
26
26
|
# @return [Array<PicturehouseUk::Screening>]
|
27
27
|
def self.at(cinema_id)
|
28
|
-
|
29
|
-
|
30
|
-
end.
|
28
|
+
screenings(cinema_id).map do |attributes|
|
29
|
+
new cinema_hash(cinema_id).merge(attributes)
|
30
|
+
end.uniq
|
31
31
|
end
|
32
32
|
|
33
33
|
# The UTC time of the screening
|
@@ -63,18 +63,8 @@ module PicturehouseUk
|
|
63
63
|
}
|
64
64
|
end
|
65
65
|
|
66
|
-
def self.
|
67
|
-
PicturehouseUk::Internal::
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.create_for_single_film(html, cinema_id)
|
71
|
-
screenings_parser(html).to_a.map do |attributes|
|
72
|
-
new cinema_hash(cinema_id).merge(attributes)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def self.screenings_parser(html)
|
77
|
-
PicturehouseUk::Internal::FilmWithScreeningsParser.new(html)
|
66
|
+
def self.screenings(cinema_id)
|
67
|
+
PicturehouseUk::Internal::Parser::Screenings.new(cinema_id).to_a
|
78
68
|
end
|
79
69
|
end
|
80
70
|
end
|