cineworld_uk 2.0.5 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/cineworld_uk.rb +1 -2
- data/lib/cineworld_uk/film.rb +1 -1
- data/lib/cineworld_uk/internal/film_with_screenings_parser.rb +18 -12
- data/lib/cineworld_uk/internal/title_sanitizer.rb +57 -0
- data/lib/cineworld_uk/screening.rb +6 -3
- data/lib/cineworld_uk/version.rb +2 -2
- data/test/lib/cineworld_uk/internal/film_with_screenings_parser_test.rb +0 -28
- data/test/lib/cineworld_uk/internal/title_sanitizer_test.rb +121 -0
- metadata +5 -8
- data/lib/cineworld_uk/internal/name_parser.rb +0 -119
- data/lib/cineworld_uk/internal/titleize.rb +0 -87
- data/test/lib/cineworld_uk/internal/name_parser_test.rb +0 -42
- data/test/lib/cineworld_uk/internal/titleize_test.rb +0 -50
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e98514372d03e542f9340c9a2b685ea612aa184
|
4
|
+
data.tar.gz: 9371c1887c3a6576ffd7c6f2d5b2dd5990bcab77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fbed5e389afa12f6109ed6eef16a15d7d7c8cab657cff26198e756ae1c7ad172c3613b2b52af007568a3310957eee4b20589734ba1e18015d3842484222433b9
|
7
|
+
data.tar.gz: f44deed7653cf28f48c7b900b14a624cf444ea95aa3ae3df01eacfd23d691c5acf7108c0a503b4c41cf6462cb11cd6f3542fd8cd16fbcf88bae67c04a9965e70
|
data/CHANGELOG.md
CHANGED
data/lib/cineworld_uk.rb
CHANGED
@@ -5,9 +5,8 @@ require 'tzinfo/data'
|
|
5
5
|
require_relative './cineworld_uk/version'
|
6
6
|
|
7
7
|
require_relative './cineworld_uk/internal/film_with_screenings_parser'
|
8
|
-
require_relative './cineworld_uk/internal/name_parser'
|
9
|
-
require_relative './cineworld_uk/internal/titleize'
|
10
8
|
require_relative './cineworld_uk/internal/screening_parser'
|
9
|
+
require_relative './cineworld_uk/internal/title_sanitizer'
|
11
10
|
require_relative './cineworld_uk/internal/whatson_parser'
|
12
11
|
require_relative './cineworld_uk/internal/website'
|
13
12
|
|
data/lib/cineworld_uk/film.rb
CHANGED
@@ -4,8 +4,8 @@ module CineworldUk
|
|
4
4
|
module Internal
|
5
5
|
# Parses a chunk of HTML to derive movie showing data
|
6
6
|
class FilmWithScreeningsParser
|
7
|
-
# css selector for film name
|
8
|
-
FILM_NAME_CSS = 'h3.h1
|
7
|
+
# css selector for film name link
|
8
|
+
FILM_NAME_CSS = 'h3.h1'
|
9
9
|
# css selector for performances
|
10
10
|
PERFORMANCES_CSS = '.schedule .performances > li'
|
11
11
|
|
@@ -14,16 +14,10 @@ module CineworldUk
|
|
14
14
|
@film_html = film_html.to_s
|
15
15
|
end
|
16
16
|
|
17
|
-
# The cinema id
|
18
|
-
# @return [String]
|
19
|
-
def cinema_id
|
20
|
-
name_doc.to_s.match(/cinema=(\d+)/)[1].to_i
|
21
|
-
end
|
22
|
-
|
23
17
|
# The film name
|
24
18
|
# @return [String]
|
25
19
|
def film_name
|
26
|
-
|
20
|
+
title_sanitizer(film_name_text.children[0].to_s)
|
27
21
|
end
|
28
22
|
|
29
23
|
# attributes of all the screenings
|
@@ -42,11 +36,19 @@ module CineworldUk
|
|
42
36
|
end
|
43
37
|
|
44
38
|
def film_hash
|
45
|
-
@film_hash ||= {
|
39
|
+
@film_hash ||= { film_name: film_name }
|
40
|
+
end
|
41
|
+
|
42
|
+
def film_link
|
43
|
+
@film_link ||= film_name_doc.css('a[href*=whatson]')
|
46
44
|
end
|
47
45
|
|
48
|
-
def
|
49
|
-
|
46
|
+
def film_name_text
|
47
|
+
film_link.empty? ? name_doc : film_link
|
48
|
+
end
|
49
|
+
|
50
|
+
def film_name_doc
|
51
|
+
@film_name_doc ||= doc.css(FILM_NAME_CSS)
|
50
52
|
end
|
51
53
|
|
52
54
|
def performances_doc
|
@@ -56,6 +58,10 @@ module CineworldUk
|
|
56
58
|
def screening_parser_hash(node)
|
57
59
|
ScreeningParser.new(node).to_hash
|
58
60
|
end
|
61
|
+
|
62
|
+
def title_sanitizer(title)
|
63
|
+
TitleSanitizer.new(title).sanitized
|
64
|
+
end
|
59
65
|
end
|
60
66
|
end
|
61
67
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module CineworldUk
|
2
|
+
# Internal utility classes: Do not use
|
3
|
+
# @api private
|
4
|
+
module Internal
|
5
|
+
# Sanitize and standardize film titles
|
6
|
+
class TitleSanitizer
|
7
|
+
# strings and regex to be removed
|
8
|
+
REMOVE = [
|
9
|
+
/\s+[23]d/i, # dimension
|
10
|
+
/\(Hindi\)/i, # Indian Language
|
11
|
+
/\(Malayalam\)/i, # Indian Language
|
12
|
+
/\(Tamil\)/i, # Indian Language
|
13
|
+
%r(-? \d{1,2}/\d{1,2}/\d{2,4}), # date
|
14
|
+
/\n/, # newlines
|
15
|
+
/\- Encore/, # encore
|
16
|
+
'Autism Friendly Screening:', # autism screening
|
17
|
+
'- Unlimited Screening', # unlimited screening
|
18
|
+
/LFF Opening Night Live/, # london film festival
|
19
|
+
'- Special Performance', # special performance
|
20
|
+
/\ATake 2 -/, # take 2
|
21
|
+
' - Movies for Juniors', # movies for juniors
|
22
|
+
]
|
23
|
+
|
24
|
+
# regexes and their replacements
|
25
|
+
REPLACE = {
|
26
|
+
/Bolshoi Ballet: (.*)/ => 'Bolshoi: ',
|
27
|
+
/Guardian Live - (.*)/ => 'The Guardian: ',
|
28
|
+
/Met Opera - (.*)/i => 'Met Opera: ',
|
29
|
+
/NT Live: (.*)/ => 'National Theatre: ',
|
30
|
+
/NT Live Encore: (.*)/ => 'National Theatre: ',
|
31
|
+
/ROH - (.*)/ => 'Royal Opera House: ',
|
32
|
+
/RSC Live: (.*)/ => 'Royal Shakespeare Company: ',
|
33
|
+
/The Royal Ballet - (.*)/ => 'The Royal Ballet: '
|
34
|
+
}
|
35
|
+
|
36
|
+
# @param [String] title a film title
|
37
|
+
def initialize(title)
|
38
|
+
@title = title
|
39
|
+
end
|
40
|
+
|
41
|
+
# sanitized and standardized title
|
42
|
+
# @return [String] title
|
43
|
+
def sanitized
|
44
|
+
@sanitzed ||= begin
|
45
|
+
sanitized = @title
|
46
|
+
REMOVE.each do |pattern|
|
47
|
+
sanitized.gsub! pattern, ''
|
48
|
+
end
|
49
|
+
REPLACE.each do |pattern, prefix|
|
50
|
+
sanitized.gsub!(pattern) { |_| prefix + $1 }
|
51
|
+
end
|
52
|
+
sanitized.squeeze(' ').strip
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -63,13 +63,16 @@ module CineworldUk
|
|
63
63
|
|
64
64
|
private
|
65
65
|
|
66
|
-
def self.
|
67
|
-
{
|
66
|
+
def self.cinema_hash(cinema_id)
|
67
|
+
{
|
68
|
+
cinema_id: cinema_id,
|
69
|
+
cinema_name: CineworldUk::Cinema.find(cinema_id).name
|
70
|
+
}
|
68
71
|
end
|
69
72
|
|
70
73
|
def self.create_for_single_film(html, cinema_id)
|
71
74
|
screenings_parser(html).to_a.map do |attributes|
|
72
|
-
new
|
75
|
+
new cinema_hash(cinema_id).merge(attributes)
|
73
76
|
end
|
74
77
|
end
|
75
78
|
|
data/lib/cineworld_uk/version.rb
CHANGED
@@ -3,34 +3,6 @@ require_relative '../../../test_helper'
|
|
3
3
|
describe CineworldUk::Internal::FilmWithScreeningsParser do
|
4
4
|
let(:described_class) { CineworldUk::Internal::FilmWithScreeningsParser }
|
5
5
|
|
6
|
-
describe '#cinema_id' do
|
7
|
-
subject { described_class.new(film_html).cinema_id }
|
8
|
-
|
9
|
-
describe 'passed film html from top of page' do
|
10
|
-
let(:film_html) { read_film_html('brighton/film_first') }
|
11
|
-
|
12
|
-
it 'returns the id' do
|
13
|
-
subject.must_equal(3)
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
describe 'passed second html from page' do
|
18
|
-
let(:film_html) { read_film_html('brighton/film_second') }
|
19
|
-
|
20
|
-
it 'returns the id' do
|
21
|
-
subject.must_equal(3)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
describe 'passed last html from end of page' do
|
26
|
-
let(:film_html) { read_film_html('brighton/film_last') }
|
27
|
-
|
28
|
-
it 'returns the id' do
|
29
|
-
subject.must_equal(3)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
6
|
describe '#film_name' do
|
35
7
|
subject { described_class.new(film_html).film_name }
|
36
8
|
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require_relative '../../../test_helper'
|
2
|
+
|
3
|
+
describe CineworldUk::Internal::TitleSanitizer do
|
4
|
+
let(:described_class) { CineworldUk::Internal::TitleSanitizer }
|
5
|
+
|
6
|
+
describe '#sanitized' do
|
7
|
+
subject { described_class.new(title).sanitized }
|
8
|
+
|
9
|
+
describe 'with 2d in title' do
|
10
|
+
let(:title) { 'Iron Man 3 2D' }
|
11
|
+
|
12
|
+
it 'removes dimension' do
|
13
|
+
subject.must_equal('Iron Man 3')
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe 'with 3d in title' do
|
18
|
+
let(:title) { 'Iron Man 3 3d' }
|
19
|
+
|
20
|
+
it 'removes dimension' do
|
21
|
+
subject.must_equal('Iron Man 3')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'in hindi' do
|
26
|
+
let(:title) { 'Bang Bang! (Hindi)' }
|
27
|
+
|
28
|
+
it 'removes language' do
|
29
|
+
subject.must_equal('Bang Bang!')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'Unlimited Screening' do
|
34
|
+
let(:title) { 'Nightcrawler - Unlimited Screening' }
|
35
|
+
|
36
|
+
it 'removes prefix' do
|
37
|
+
subject.must_equal('Nightcrawler')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'Austism screening' do
|
42
|
+
let(:title) { 'Autism Friendly Screening: Dolphin Tale 2' }
|
43
|
+
|
44
|
+
it 'removes prefix' do
|
45
|
+
subject.must_equal('Dolphin Tale 2')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe 'Bolshoi screeening' do
|
50
|
+
let(:title) { 'Bolshoi Ballet: The Legend of Love' }
|
51
|
+
|
52
|
+
it 'removes prefix' do
|
53
|
+
subject.must_equal('Bolshoi: The Legend of Love')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe 'Met Opera screeening' do
|
58
|
+
let(:title) { 'MET Opera - Le Nozze Di Figaro' }
|
59
|
+
|
60
|
+
it 'removes prefix' do
|
61
|
+
subject.must_equal('Met Opera: Le Nozze Di Figaro')
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe 'NT Live screeening' do
|
66
|
+
let(:title) { 'NT Live Encore: Frankenstein (starring Benedict Cumberbatch)' }
|
67
|
+
|
68
|
+
it 'removes prefix' do
|
69
|
+
subject.must_equal('National Theatre: Frankenstein (starring Benedict Cumberbatch)')
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe 'National Theatre screeening' do
|
74
|
+
let(:title) { 'NT Live: Treasure Island' }
|
75
|
+
|
76
|
+
it 'removes prefix' do
|
77
|
+
subject.must_equal('National Theatre: Treasure Island')
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
describe 'National Theatre Encore screeening' do
|
82
|
+
let(:title) { 'NT Live: A Streetcar Named Desire (Young Vic) - Encore' }
|
83
|
+
|
84
|
+
it 'removes prefix' do
|
85
|
+
subject.must_equal('National Theatre: A Streetcar Named Desire (Young Vic)')
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe 'ROH screeening' do
|
90
|
+
let(:title) { 'ROH - I Due Foscari' }
|
91
|
+
|
92
|
+
it 'removes prefix' do
|
93
|
+
subject.must_equal('Royal Opera House: I Due Foscari')
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe 'RSC screeening' do
|
98
|
+
let(:title) { "RSC Live: Love's Labour's Lost" }
|
99
|
+
|
100
|
+
it 'removes prefix' do
|
101
|
+
subject.must_equal("Royal Shakespeare Company: Love's Labour's Lost")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe 'special screeening' do
|
106
|
+
let(:title) { 'Billy Elliot The Musical Live - Special Performance' }
|
107
|
+
|
108
|
+
it 'removes prefix' do
|
109
|
+
subject.must_equal('Billy Elliot The Musical Live')
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
describe 'Take 2 screeening' do
|
114
|
+
let(:title) { 'Take 2 - Boyhood' }
|
115
|
+
|
116
|
+
it 'removes prefix' do
|
117
|
+
subject.must_equal('Boyhood')
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cineworld_uk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Croll
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -127,9 +127,8 @@ files:
|
|
127
127
|
- lib/cineworld_uk/cinema.rb
|
128
128
|
- lib/cineworld_uk/film.rb
|
129
129
|
- lib/cineworld_uk/internal/film_with_screenings_parser.rb
|
130
|
-
- lib/cineworld_uk/internal/name_parser.rb
|
131
130
|
- lib/cineworld_uk/internal/screening_parser.rb
|
132
|
-
- lib/cineworld_uk/internal/
|
131
|
+
- lib/cineworld_uk/internal/title_sanitizer.rb
|
133
132
|
- lib/cineworld_uk/internal/website.rb
|
134
133
|
- lib/cineworld_uk/internal/whatson_parser.rb
|
135
134
|
- lib/cineworld_uk/screening.rb
|
@@ -147,8 +146,7 @@ files:
|
|
147
146
|
- test/lib/cineworld_uk/cinema_test.rb
|
148
147
|
- test/lib/cineworld_uk/film_test.rb
|
149
148
|
- test/lib/cineworld_uk/internal/film_with_screenings_parser_test.rb
|
150
|
-
- test/lib/cineworld_uk/internal/
|
151
|
-
- test/lib/cineworld_uk/internal/titleize_test.rb
|
149
|
+
- test/lib/cineworld_uk/internal/title_sanitizer_test.rb
|
152
150
|
- test/lib/cineworld_uk/internal/website_test.rb
|
153
151
|
- test/lib/cineworld_uk/internal/whatson_parser_test.rb
|
154
152
|
- test/lib/cineworld_uk/screening_test.rb
|
@@ -192,8 +190,7 @@ test_files:
|
|
192
190
|
- test/lib/cineworld_uk/cinema_test.rb
|
193
191
|
- test/lib/cineworld_uk/film_test.rb
|
194
192
|
- test/lib/cineworld_uk/internal/film_with_screenings_parser_test.rb
|
195
|
-
- test/lib/cineworld_uk/internal/
|
196
|
-
- test/lib/cineworld_uk/internal/titleize_test.rb
|
193
|
+
- test/lib/cineworld_uk/internal/title_sanitizer_test.rb
|
197
194
|
- test/lib/cineworld_uk/internal/website_test.rb
|
198
195
|
- test/lib/cineworld_uk/internal/whatson_parser_test.rb
|
199
196
|
- test/lib/cineworld_uk/screening_test.rb
|
@@ -1,119 +0,0 @@
|
|
1
|
-
module CineworldUk
|
2
|
-
# Internal utility classes: Do not use
|
3
|
-
# @api private
|
4
|
-
module Internal
|
5
|
-
# Parses a string to derive a standardized movie title
|
6
|
-
class NameParser
|
7
|
-
# @return [String] the original name
|
8
|
-
attr_reader :original_name
|
9
|
-
|
10
|
-
# @param [String] name original film name
|
11
|
-
def initialize(name)
|
12
|
-
@original_name = name
|
13
|
-
@name = name
|
14
|
-
end
|
15
|
-
|
16
|
-
# Process the name and return the final string
|
17
|
-
# @return [String]
|
18
|
-
def standardize
|
19
|
-
strip_and_squeeze
|
20
|
-
.ampersands_into_text
|
21
|
-
.into_ampersand_if_second_to_last
|
22
|
-
.remove_indian_languages
|
23
|
-
.remove_screening_details
|
24
|
-
.replace_non_film_prefix
|
25
|
-
.remove_newlines
|
26
|
-
.remove_dates
|
27
|
-
.title_case
|
28
|
-
to_s
|
29
|
-
end
|
30
|
-
|
31
|
-
# The processed name
|
32
|
-
# @return [String]
|
33
|
-
def to_s
|
34
|
-
@name
|
35
|
-
end
|
36
|
-
|
37
|
-
protected
|
38
|
-
|
39
|
-
def ampersands_into_text
|
40
|
-
_replace(/\s(\&|\&)\s/, ' and ')
|
41
|
-
self
|
42
|
-
end
|
43
|
-
|
44
|
-
def into_ampersand_if_second_to_last
|
45
|
-
_replace(/\s(and)\s(\w+)\z/, ' & \2')
|
46
|
-
self
|
47
|
-
end
|
48
|
-
|
49
|
-
def remove_indian_languages
|
50
|
-
languages = %w(Malayalam Tamil)
|
51
|
-
|
52
|
-
_remove(/\((#{languages * '|'})\)/i)
|
53
|
-
self
|
54
|
-
end
|
55
|
-
|
56
|
-
def remove_screening_details
|
57
|
-
_remove 'Take 2 Thursday - '
|
58
|
-
_remove 'Autism Friendly Screening: '
|
59
|
-
self
|
60
|
-
end
|
61
|
-
|
62
|
-
def remove_dates
|
63
|
-
_remove(%r(-? \d{1,2}/\d{1,2}/\d{2,4}))
|
64
|
-
self
|
65
|
-
end
|
66
|
-
|
67
|
-
def remove_newlines
|
68
|
-
_remove(/\n/)
|
69
|
-
self
|
70
|
-
end
|
71
|
-
|
72
|
-
def replace_non_film_prefix
|
73
|
-
_replace 'Bolshoi Ballet Live -', 'Bolshoi:'
|
74
|
-
|
75
|
-
if @name.match(/\- NT .+ encore/)
|
76
|
-
@name = 'National Theatre: ' + @name.gsub(/\- NT .+ encore/, '')
|
77
|
-
end
|
78
|
-
|
79
|
-
_replace 'NT Live:', 'National Theatre:'
|
80
|
-
|
81
|
-
_replace 'MET Opera -', 'Met Opera:'
|
82
|
-
_replace 'Royal Ballet Live:', 'Royal Ballet:'
|
83
|
-
|
84
|
-
# fill out Royal Opera House
|
85
|
-
@name.match(/Royal Opera Live\: (.+) \-.+/) do |match|
|
86
|
-
@name = 'Royal Opera House: ' + match[1]
|
87
|
-
end
|
88
|
-
_replace 'Royal Opera Live:', 'Royal Opera House:'
|
89
|
-
|
90
|
-
_replace 'RSC Live:', 'Royal Shakespeare Company:'
|
91
|
-
_remove '(Encore Performance)' # remove rsc-style encore
|
92
|
-
|
93
|
-
_remove ' Theatre Series' # West End
|
94
|
-
|
95
|
-
self
|
96
|
-
end
|
97
|
-
|
98
|
-
def strip_and_squeeze
|
99
|
-
@name = @name.strip.squeeze(' ')
|
100
|
-
self
|
101
|
-
end
|
102
|
-
|
103
|
-
def title_case
|
104
|
-
@name = CineworldUk::Internal::Titleize.titleize(@name)
|
105
|
-
self
|
106
|
-
end
|
107
|
-
|
108
|
-
private
|
109
|
-
|
110
|
-
def _remove(match)
|
111
|
-
@name = @name.gsub(match, '')
|
112
|
-
end
|
113
|
-
|
114
|
-
def _replace(match, replacement)
|
115
|
-
@name = @name.gsub(match, replacement)
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
@@ -1,87 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
module CineworldUk
|
3
|
-
# Internal utility classes: Do not use
|
4
|
-
# @api private
|
5
|
-
module Internal
|
6
|
-
# @note Modified from titleize gem
|
7
|
-
# https://github.com/granth/titleize
|
8
|
-
module Titleize
|
9
|
-
# List of words not to capitalize unless they lead a phrase
|
10
|
-
WORDS = %w(a an and as at but by en for if in of on or the to via vs vs.)
|
11
|
-
|
12
|
-
extend self
|
13
|
-
|
14
|
-
# Capitalizes most words to create a nicer looking title string.
|
15
|
-
#
|
16
|
-
# The list of "small words" which are not capped comes from
|
17
|
-
# the New York Times Manual of Style, plus 'vs'.
|
18
|
-
#
|
19
|
-
# Also capitalises roman numerals
|
20
|
-
#
|
21
|
-
# "notes on a scandal" # => "Notes on a Scandal"
|
22
|
-
# "ghostbusters ii" # => "Ghostbusters II"
|
23
|
-
#
|
24
|
-
# @param [String] title a chunk of html
|
25
|
-
# @return [String]
|
26
|
-
def titleize(title)
|
27
|
-
title = title.dup
|
28
|
-
title.downcase! unless title[/[[:lower:]]/] # assume all-caps fixing
|
29
|
-
|
30
|
-
phrases(title).map do |phrase|
|
31
|
-
words = phrase.split
|
32
|
-
words.map do |word|
|
33
|
-
def word.capitalize
|
34
|
-
# like String#capitalize, but it starts with the first letter
|
35
|
-
sub(/[[:alpha:]].*/) { |subword| subword.capitalize }
|
36
|
-
end
|
37
|
-
|
38
|
-
case word
|
39
|
-
when /[[:alpha:]]\.[[:alpha:]]/ # words with dots in
|
40
|
-
word
|
41
|
-
when /[-‑]/ # hyphenated word (regular and non-breaking)
|
42
|
-
word.split(/([-‑])/).map do |part|
|
43
|
-
WORDS.include?(part) ? part : part.capitalize
|
44
|
-
end.join
|
45
|
-
when /^[[:alpha:]].*[[:upper:]]/ # non-first letter capitalized
|
46
|
-
word
|
47
|
-
when /^[[:digit:]]/ # first character is a number
|
48
|
-
word
|
49
|
-
when /^(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i
|
50
|
-
word.upcase
|
51
|
-
when words.first, words.last
|
52
|
-
word.capitalize
|
53
|
-
when *(WORDS + WORDS.map { |small| small.capitalize })
|
54
|
-
word.downcase
|
55
|
-
else
|
56
|
-
word.capitalize
|
57
|
-
end
|
58
|
-
end.join(' ')
|
59
|
-
end.join(' ')
|
60
|
-
end
|
61
|
-
|
62
|
-
# Splits a title into an array based on punctuation.
|
63
|
-
# @param [String] title Film title
|
64
|
-
# @return [Array<String>]
|
65
|
-
#
|
66
|
-
# Titleize.phrases("simple title")
|
67
|
-
# #=> ["simple title"]
|
68
|
-
# Titleize.phrases("more complicated: titling")
|
69
|
-
# #=> ["more complicated:", "titling"]
|
70
|
-
# Titleize.phrases("even more: complicated - titling")
|
71
|
-
# #=> ["even more:", "complicated -", "titling"]
|
72
|
-
def phrases(title)
|
73
|
-
phrases = title.scan(/.+?(?:[-:.;?!] |$)/).map { |phrase| phrase.strip }
|
74
|
-
|
75
|
-
# rejoin phrases that were split on the '.' from a small word
|
76
|
-
if phrases.size > 1
|
77
|
-
phrases[0..-2].each_with_index do |phrase, index|
|
78
|
-
next unless WORDS.include?(phrase.split.last.downcase)
|
79
|
-
phrases[index] << ' ' + phrases.slice!(index + 1)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
phrases
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
@@ -1,42 +0,0 @@
|
|
1
|
-
require_relative '../../../test_helper'
|
2
|
-
|
3
|
-
describe CineworldUk::Internal::NameParser do
|
4
|
-
|
5
|
-
describe '#standardize' do
|
6
|
-
subject { CineworldUk::Internal::NameParser.new(film_name).standardize }
|
7
|
-
|
8
|
-
[
|
9
|
-
['Rita, Sue and Bob Too', 'Rita, Sue and Bob Too', 'words with "and"'],
|
10
|
-
['Rita, Sue & Bob Too', 'Rita, Sue and Bob Too', 'words with "&"'],
|
11
|
-
['Rita, Sue & Bob Too', 'Rita, Sue and Bob Too', 'words with HTML "&"'],
|
12
|
-
['Cowboys and Aliens', 'Cowboys & Aliens', '"and" as the last but one word'],
|
13
|
-
['Cowboys & Aliens', 'Cowboys & Aliens', 'HTML "&" as the last but one word'],
|
14
|
-
['star wars: episode IV - A new hope', 'Star Wars: Episode IV - A New Hope', 'titleize'],
|
15
|
-
['star wars: episode v - the empire strikes back', 'Star Wars: Episode V - The Empire Strikes Back', 'titleize'],
|
16
|
-
['2 fast 2 furious', '2 Fast 2 Furious', 'titleize'],
|
17
|
-
['Geethanjali (Malayalam)', 'Geethanjali', 'Indian language removal'],
|
18
|
-
['Arrambam (Tamil)', 'Arrambam', 'Indian language removal'],
|
19
|
-
['Take 2 Thursday - About Time', 'About Time', 'remove "Take 2" prefix'],
|
20
|
-
['Autism Friendly Screening: Cloudy With A Chance Of Meatballs 2', 'Cloudy With a Chance of Meatballs 2', 'autism friendly'],
|
21
|
-
['Bolshoi Ballet Live - Lost Illusions', 'Bolshoi: Lost Illusions', 'bolshoi'],
|
22
|
-
['NT Live: War Horse', 'National Theatre: War Horse', 'NT'],
|
23
|
-
['Frankenstein (with Jonny Lee Miller as the Creature) - NT 50th Anniversary encore', 'National Theatre: Frankenstein (With Jonny Lee Miller as the Creature)', 'NT 50th'],
|
24
|
-
['MET Opera - Falstaff - 14/12/2013', 'Met Opera: Falstaff', 'Met Opera with date'],
|
25
|
-
['Royal Ballet Live: The Sleeping Beauty - 19/03/14', 'Royal Ballet: The Sleeping Beauty', 'royal ballet'],
|
26
|
-
['Royal Opera Live: Parsifal - Wednesday 18 Dec 2013', 'Royal Opera House: Parsifal', 'royal opera'],
|
27
|
-
['RSC Live: Richard II (Encore Performance)', 'Royal Shakespeare Company: Richard II', 'rsc'],
|
28
|
-
["West End Theatre Series: Noel Coward's Private Lives", "West End: Noel Coward's Private Lives", 'west end'],
|
29
|
-
["Raiders of\n the Lost Ark", 'Raiders of the Lost Ark', 'New lines']
|
30
|
-
|
31
|
-
].each do |test_case|
|
32
|
-
|
33
|
-
describe test_case[2] do
|
34
|
-
let(:film_name) { test_case[0] }
|
35
|
-
it 'returns standardized title' do
|
36
|
-
subject.must_equal test_case[1]
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,50 +0,0 @@
|
|
1
|
-
require_relative '../../../test_helper'
|
2
|
-
|
3
|
-
describe CineworldUk::Internal::Titleize do
|
4
|
-
|
5
|
-
describe '#titleize(name)' do
|
6
|
-
subject { CineworldUk::Internal::Titleize.titleize(string) }
|
7
|
-
|
8
|
-
[
|
9
|
-
[
|
10
|
-
'star wars: episode iv - a new hope',
|
11
|
-
'Star Wars: Episode IV - A New Hope'
|
12
|
-
],
|
13
|
-
[
|
14
|
-
'star wars: episode v - the empire strikes back',
|
15
|
-
'Star Wars: Episode V - The Empire Strikes Back'
|
16
|
-
],
|
17
|
-
['2 fast 2 furious', '2 Fast 2 Furious'],
|
18
|
-
['saw iv', 'Saw IV'],
|
19
|
-
['fast & Furious 6', 'Fast & Furious 6'],
|
20
|
-
['fast & Furious vi', 'Fast & Furious VI']
|
21
|
-
].each do |test_case|
|
22
|
-
|
23
|
-
describe test_case[2] do
|
24
|
-
let(:string) { test_case[0] }
|
25
|
-
it 'returns titlecase' do
|
26
|
-
subject.must_equal test_case[1]
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
describe '#phrases(name)' do
|
33
|
-
subject { CineworldUk::Internal::Titleize.phrases(string) }
|
34
|
-
|
35
|
-
[
|
36
|
-
[
|
37
|
-
'star wars: episode iv - a new hope',
|
38
|
-
['star wars:', 'episode iv -', 'a new hope']
|
39
|
-
]
|
40
|
-
].each do |test_case|
|
41
|
-
|
42
|
-
describe test_case[0] do
|
43
|
-
let(:string) { test_case[0] }
|
44
|
-
it 'splits the name' do
|
45
|
-
subject.must_equal test_case[1]
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|