cineworld_uk 2.0.5 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/cineworld_uk.rb +1 -2
- data/lib/cineworld_uk/film.rb +1 -1
- data/lib/cineworld_uk/internal/film_with_screenings_parser.rb +18 -12
- data/lib/cineworld_uk/internal/title_sanitizer.rb +57 -0
- data/lib/cineworld_uk/screening.rb +6 -3
- data/lib/cineworld_uk/version.rb +2 -2
- data/test/lib/cineworld_uk/internal/film_with_screenings_parser_test.rb +0 -28
- data/test/lib/cineworld_uk/internal/title_sanitizer_test.rb +121 -0
- metadata +5 -8
- data/lib/cineworld_uk/internal/name_parser.rb +0 -119
- data/lib/cineworld_uk/internal/titleize.rb +0 -87
- data/test/lib/cineworld_uk/internal/name_parser_test.rb +0 -42
- data/test/lib/cineworld_uk/internal/titleize_test.rb +0 -50
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e98514372d03e542f9340c9a2b685ea612aa184
|
4
|
+
data.tar.gz: 9371c1887c3a6576ffd7c6f2d5b2dd5990bcab77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fbed5e389afa12f6109ed6eef16a15d7d7c8cab657cff26198e756ae1c7ad172c3613b2b52af007568a3310957eee4b20589734ba1e18015d3842484222433b9
|
7
|
+
data.tar.gz: f44deed7653cf28f48c7b900b14a624cf444ea95aa3ae3df01eacfd23d691c5acf7108c0a503b4c41cf6462cb11cd6f3542fd8cd16fbcf88bae67c04a9965e70
|
data/CHANGELOG.md
CHANGED
data/lib/cineworld_uk.rb
CHANGED
@@ -5,9 +5,8 @@ require 'tzinfo/data'
|
|
5
5
|
require_relative './cineworld_uk/version'
|
6
6
|
|
7
7
|
require_relative './cineworld_uk/internal/film_with_screenings_parser'
|
8
|
-
require_relative './cineworld_uk/internal/name_parser'
|
9
|
-
require_relative './cineworld_uk/internal/titleize'
|
10
8
|
require_relative './cineworld_uk/internal/screening_parser'
|
9
|
+
require_relative './cineworld_uk/internal/title_sanitizer'
|
11
10
|
require_relative './cineworld_uk/internal/whatson_parser'
|
12
11
|
require_relative './cineworld_uk/internal/website'
|
13
12
|
|
data/lib/cineworld_uk/film.rb
CHANGED
@@ -4,8 +4,8 @@ module CineworldUk
|
|
4
4
|
module Internal
|
5
5
|
# Parses a chunk of HTML to derive movie showing data
|
6
6
|
class FilmWithScreeningsParser
|
7
|
-
# css selector for film name
|
8
|
-
FILM_NAME_CSS = 'h3.h1
|
7
|
+
# css selector for film name link
|
8
|
+
FILM_NAME_CSS = 'h3.h1'
|
9
9
|
# css selector for performances
|
10
10
|
PERFORMANCES_CSS = '.schedule .performances > li'
|
11
11
|
|
@@ -14,16 +14,10 @@ module CineworldUk
|
|
14
14
|
@film_html = film_html.to_s
|
15
15
|
end
|
16
16
|
|
17
|
-
# The cinema id
|
18
|
-
# @return [String]
|
19
|
-
def cinema_id
|
20
|
-
name_doc.to_s.match(/cinema=(\d+)/)[1].to_i
|
21
|
-
end
|
22
|
-
|
23
17
|
# The film name
|
24
18
|
# @return [String]
|
25
19
|
def film_name
|
26
|
-
|
20
|
+
title_sanitizer(film_name_text.children[0].to_s)
|
27
21
|
end
|
28
22
|
|
29
23
|
# attributes of all the screenings
|
@@ -42,11 +36,19 @@ module CineworldUk
|
|
42
36
|
end
|
43
37
|
|
44
38
|
def film_hash
|
45
|
-
@film_hash ||= {
|
39
|
+
@film_hash ||= { film_name: film_name }
|
40
|
+
end
|
41
|
+
|
42
|
+
def film_link
|
43
|
+
@film_link ||= film_name_doc.css('a[href*=whatson]')
|
46
44
|
end
|
47
45
|
|
48
|
-
def
|
49
|
-
|
46
|
+
def film_name_text
|
47
|
+
film_link.empty? ? name_doc : film_link
|
48
|
+
end
|
49
|
+
|
50
|
+
def film_name_doc
|
51
|
+
@film_name_doc ||= doc.css(FILM_NAME_CSS)
|
50
52
|
end
|
51
53
|
|
52
54
|
def performances_doc
|
@@ -56,6 +58,10 @@ module CineworldUk
|
|
56
58
|
def screening_parser_hash(node)
|
57
59
|
ScreeningParser.new(node).to_hash
|
58
60
|
end
|
61
|
+
|
62
|
+
def title_sanitizer(title)
|
63
|
+
TitleSanitizer.new(title).sanitized
|
64
|
+
end
|
59
65
|
end
|
60
66
|
end
|
61
67
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module CineworldUk
|
2
|
+
# Internal utility classes: Do not use
|
3
|
+
# @api private
|
4
|
+
module Internal
|
5
|
+
# Sanitize and standardize film titles
|
6
|
+
class TitleSanitizer
|
7
|
+
# strings and regex to be removed
|
8
|
+
REMOVE = [
|
9
|
+
/\s+[23]d/i, # dimension
|
10
|
+
/\(Hindi\)/i, # Indian Language
|
11
|
+
/\(Malayalam\)/i, # Indian Language
|
12
|
+
/\(Tamil\)/i, # Indian Language
|
13
|
+
%r(-? \d{1,2}/\d{1,2}/\d{2,4}), # date
|
14
|
+
/\n/, # newlines
|
15
|
+
/\- Encore/, # encore
|
16
|
+
'Autism Friendly Screening:', # autism screening
|
17
|
+
'- Unlimited Screening', # unlimited screening
|
18
|
+
/LFF Opening Night Live/, # london film festival
|
19
|
+
'- Special Performance', # special performance
|
20
|
+
/\ATake 2 -/, # take 2
|
21
|
+
' - Movies for Juniors', # movies for juniors
|
22
|
+
]
|
23
|
+
|
24
|
+
# regexes and their replacements
|
25
|
+
REPLACE = {
|
26
|
+
/Bolshoi Ballet: (.*)/ => 'Bolshoi: ',
|
27
|
+
/Guardian Live - (.*)/ => 'The Guardian: ',
|
28
|
+
/Met Opera - (.*)/i => 'Met Opera: ',
|
29
|
+
/NT Live: (.*)/ => 'National Theatre: ',
|
30
|
+
/NT Live Encore: (.*)/ => 'National Theatre: ',
|
31
|
+
/ROH - (.*)/ => 'Royal Opera House: ',
|
32
|
+
/RSC Live: (.*)/ => 'Royal Shakespeare Company: ',
|
33
|
+
/The Royal Ballet - (.*)/ => 'The Royal Ballet: '
|
34
|
+
}
|
35
|
+
|
36
|
+
# @param [String] title a film title
|
37
|
+
def initialize(title)
|
38
|
+
@title = title
|
39
|
+
end
|
40
|
+
|
41
|
+
# sanitized and standardized title
|
42
|
+
# @return [String] title
|
43
|
+
def sanitized
|
44
|
+
@sanitzed ||= begin
|
45
|
+
sanitized = @title
|
46
|
+
REMOVE.each do |pattern|
|
47
|
+
sanitized.gsub! pattern, ''
|
48
|
+
end
|
49
|
+
REPLACE.each do |pattern, prefix|
|
50
|
+
sanitized.gsub!(pattern) { |_| prefix + $1 }
|
51
|
+
end
|
52
|
+
sanitized.squeeze(' ').strip
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -63,13 +63,16 @@ module CineworldUk
|
|
63
63
|
|
64
64
|
private
|
65
65
|
|
66
|
-
def self.
|
67
|
-
{
|
66
|
+
def self.cinema_hash(cinema_id)
|
67
|
+
{
|
68
|
+
cinema_id: cinema_id,
|
69
|
+
cinema_name: CineworldUk::Cinema.find(cinema_id).name
|
70
|
+
}
|
68
71
|
end
|
69
72
|
|
70
73
|
def self.create_for_single_film(html, cinema_id)
|
71
74
|
screenings_parser(html).to_a.map do |attributes|
|
72
|
-
new
|
75
|
+
new cinema_hash(cinema_id).merge(attributes)
|
73
76
|
end
|
74
77
|
end
|
75
78
|
|
data/lib/cineworld_uk/version.rb
CHANGED
@@ -3,34 +3,6 @@ require_relative '../../../test_helper'
|
|
3
3
|
describe CineworldUk::Internal::FilmWithScreeningsParser do
|
4
4
|
let(:described_class) { CineworldUk::Internal::FilmWithScreeningsParser }
|
5
5
|
|
6
|
-
describe '#cinema_id' do
|
7
|
-
subject { described_class.new(film_html).cinema_id }
|
8
|
-
|
9
|
-
describe 'passed film html from top of page' do
|
10
|
-
let(:film_html) { read_film_html('brighton/film_first') }
|
11
|
-
|
12
|
-
it 'returns the id' do
|
13
|
-
subject.must_equal(3)
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
describe 'passed second html from page' do
|
18
|
-
let(:film_html) { read_film_html('brighton/film_second') }
|
19
|
-
|
20
|
-
it 'returns the id' do
|
21
|
-
subject.must_equal(3)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
describe 'passed last html from end of page' do
|
26
|
-
let(:film_html) { read_film_html('brighton/film_last') }
|
27
|
-
|
28
|
-
it 'returns the id' do
|
29
|
-
subject.must_equal(3)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
6
|
describe '#film_name' do
|
35
7
|
subject { described_class.new(film_html).film_name }
|
36
8
|
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require_relative '../../../test_helper'
|
2
|
+
|
3
|
+
describe CineworldUk::Internal::TitleSanitizer do
|
4
|
+
let(:described_class) { CineworldUk::Internal::TitleSanitizer }
|
5
|
+
|
6
|
+
describe '#sanitized' do
|
7
|
+
subject { described_class.new(title).sanitized }
|
8
|
+
|
9
|
+
describe 'with 2d in title' do
|
10
|
+
let(:title) { 'Iron Man 3 2D' }
|
11
|
+
|
12
|
+
it 'removes dimension' do
|
13
|
+
subject.must_equal('Iron Man 3')
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe 'with 3d in title' do
|
18
|
+
let(:title) { 'Iron Man 3 3d' }
|
19
|
+
|
20
|
+
it 'removes dimension' do
|
21
|
+
subject.must_equal('Iron Man 3')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'in hindi' do
|
26
|
+
let(:title) { 'Bang Bang! (Hindi)' }
|
27
|
+
|
28
|
+
it 'removes language' do
|
29
|
+
subject.must_equal('Bang Bang!')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'Unlimited Screening' do
|
34
|
+
let(:title) { 'Nightcrawler - Unlimited Screening' }
|
35
|
+
|
36
|
+
it 'removes prefix' do
|
37
|
+
subject.must_equal('Nightcrawler')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'Austism screening' do
|
42
|
+
let(:title) { 'Autism Friendly Screening: Dolphin Tale 2' }
|
43
|
+
|
44
|
+
it 'removes prefix' do
|
45
|
+
subject.must_equal('Dolphin Tale 2')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe 'Bolshoi screeening' do
|
50
|
+
let(:title) { 'Bolshoi Ballet: The Legend of Love' }
|
51
|
+
|
52
|
+
it 'removes prefix' do
|
53
|
+
subject.must_equal('Bolshoi: The Legend of Love')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe 'Met Opera screeening' do
|
58
|
+
let(:title) { 'MET Opera - Le Nozze Di Figaro' }
|
59
|
+
|
60
|
+
it 'removes prefix' do
|
61
|
+
subject.must_equal('Met Opera: Le Nozze Di Figaro')
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe 'NT Live screeening' do
|
66
|
+
let(:title) { 'NT Live Encore: Frankenstein (starring Benedict Cumberbatch)' }
|
67
|
+
|
68
|
+
it 'removes prefix' do
|
69
|
+
subject.must_equal('National Theatre: Frankenstein (starring Benedict Cumberbatch)')
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe 'National Theatre screeening' do
|
74
|
+
let(:title) { 'NT Live: Treasure Island' }
|
75
|
+
|
76
|
+
it 'removes prefix' do
|
77
|
+
subject.must_equal('National Theatre: Treasure Island')
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
describe 'National Theatre Encore screeening' do
|
82
|
+
let(:title) { 'NT Live: A Streetcar Named Desire (Young Vic) - Encore' }
|
83
|
+
|
84
|
+
it 'removes prefix' do
|
85
|
+
subject.must_equal('National Theatre: A Streetcar Named Desire (Young Vic)')
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe 'ROH screeening' do
|
90
|
+
let(:title) { 'ROH - I Due Foscari' }
|
91
|
+
|
92
|
+
it 'removes prefix' do
|
93
|
+
subject.must_equal('Royal Opera House: I Due Foscari')
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
describe 'RSC screeening' do
|
98
|
+
let(:title) { "RSC Live: Love's Labour's Lost" }
|
99
|
+
|
100
|
+
it 'removes prefix' do
|
101
|
+
subject.must_equal("Royal Shakespeare Company: Love's Labour's Lost")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe 'special screeening' do
|
106
|
+
let(:title) { 'Billy Elliot The Musical Live - Special Performance' }
|
107
|
+
|
108
|
+
it 'removes prefix' do
|
109
|
+
subject.must_equal('Billy Elliot The Musical Live')
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
describe 'Take 2 screeening' do
|
114
|
+
let(:title) { 'Take 2 - Boyhood' }
|
115
|
+
|
116
|
+
it 'removes prefix' do
|
117
|
+
subject.must_equal('Boyhood')
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cineworld_uk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Croll
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -127,9 +127,8 @@ files:
|
|
127
127
|
- lib/cineworld_uk/cinema.rb
|
128
128
|
- lib/cineworld_uk/film.rb
|
129
129
|
- lib/cineworld_uk/internal/film_with_screenings_parser.rb
|
130
|
-
- lib/cineworld_uk/internal/name_parser.rb
|
131
130
|
- lib/cineworld_uk/internal/screening_parser.rb
|
132
|
-
- lib/cineworld_uk/internal/
|
131
|
+
- lib/cineworld_uk/internal/title_sanitizer.rb
|
133
132
|
- lib/cineworld_uk/internal/website.rb
|
134
133
|
- lib/cineworld_uk/internal/whatson_parser.rb
|
135
134
|
- lib/cineworld_uk/screening.rb
|
@@ -147,8 +146,7 @@ files:
|
|
147
146
|
- test/lib/cineworld_uk/cinema_test.rb
|
148
147
|
- test/lib/cineworld_uk/film_test.rb
|
149
148
|
- test/lib/cineworld_uk/internal/film_with_screenings_parser_test.rb
|
150
|
-
- test/lib/cineworld_uk/internal/
|
151
|
-
- test/lib/cineworld_uk/internal/titleize_test.rb
|
149
|
+
- test/lib/cineworld_uk/internal/title_sanitizer_test.rb
|
152
150
|
- test/lib/cineworld_uk/internal/website_test.rb
|
153
151
|
- test/lib/cineworld_uk/internal/whatson_parser_test.rb
|
154
152
|
- test/lib/cineworld_uk/screening_test.rb
|
@@ -192,8 +190,7 @@ test_files:
|
|
192
190
|
- test/lib/cineworld_uk/cinema_test.rb
|
193
191
|
- test/lib/cineworld_uk/film_test.rb
|
194
192
|
- test/lib/cineworld_uk/internal/film_with_screenings_parser_test.rb
|
195
|
-
- test/lib/cineworld_uk/internal/
|
196
|
-
- test/lib/cineworld_uk/internal/titleize_test.rb
|
193
|
+
- test/lib/cineworld_uk/internal/title_sanitizer_test.rb
|
197
194
|
- test/lib/cineworld_uk/internal/website_test.rb
|
198
195
|
- test/lib/cineworld_uk/internal/whatson_parser_test.rb
|
199
196
|
- test/lib/cineworld_uk/screening_test.rb
|
@@ -1,119 +0,0 @@
|
|
1
|
-
module CineworldUk
|
2
|
-
# Internal utility classes: Do not use
|
3
|
-
# @api private
|
4
|
-
module Internal
|
5
|
-
# Parses a string to derive a standardized movie title
|
6
|
-
class NameParser
|
7
|
-
# @return [String] the original name
|
8
|
-
attr_reader :original_name
|
9
|
-
|
10
|
-
# @param [String] name original film name
|
11
|
-
def initialize(name)
|
12
|
-
@original_name = name
|
13
|
-
@name = name
|
14
|
-
end
|
15
|
-
|
16
|
-
# Process the name and return the final string
|
17
|
-
# @return [String]
|
18
|
-
def standardize
|
19
|
-
strip_and_squeeze
|
20
|
-
.ampersands_into_text
|
21
|
-
.into_ampersand_if_second_to_last
|
22
|
-
.remove_indian_languages
|
23
|
-
.remove_screening_details
|
24
|
-
.replace_non_film_prefix
|
25
|
-
.remove_newlines
|
26
|
-
.remove_dates
|
27
|
-
.title_case
|
28
|
-
to_s
|
29
|
-
end
|
30
|
-
|
31
|
-
# The processed name
|
32
|
-
# @return [String]
|
33
|
-
def to_s
|
34
|
-
@name
|
35
|
-
end
|
36
|
-
|
37
|
-
protected
|
38
|
-
|
39
|
-
def ampersands_into_text
|
40
|
-
_replace(/\s(\&|\&)\s/, ' and ')
|
41
|
-
self
|
42
|
-
end
|
43
|
-
|
44
|
-
def into_ampersand_if_second_to_last
|
45
|
-
_replace(/\s(and)\s(\w+)\z/, ' & \2')
|
46
|
-
self
|
47
|
-
end
|
48
|
-
|
49
|
-
def remove_indian_languages
|
50
|
-
languages = %w(Malayalam Tamil)
|
51
|
-
|
52
|
-
_remove(/\((#{languages * '|'})\)/i)
|
53
|
-
self
|
54
|
-
end
|
55
|
-
|
56
|
-
def remove_screening_details
|
57
|
-
_remove 'Take 2 Thursday - '
|
58
|
-
_remove 'Autism Friendly Screening: '
|
59
|
-
self
|
60
|
-
end
|
61
|
-
|
62
|
-
def remove_dates
|
63
|
-
_remove(%r(-? \d{1,2}/\d{1,2}/\d{2,4}))
|
64
|
-
self
|
65
|
-
end
|
66
|
-
|
67
|
-
def remove_newlines
|
68
|
-
_remove(/\n/)
|
69
|
-
self
|
70
|
-
end
|
71
|
-
|
72
|
-
def replace_non_film_prefix
|
73
|
-
_replace 'Bolshoi Ballet Live -', 'Bolshoi:'
|
74
|
-
|
75
|
-
if @name.match(/\- NT .+ encore/)
|
76
|
-
@name = 'National Theatre: ' + @name.gsub(/\- NT .+ encore/, '')
|
77
|
-
end
|
78
|
-
|
79
|
-
_replace 'NT Live:', 'National Theatre:'
|
80
|
-
|
81
|
-
_replace 'MET Opera -', 'Met Opera:'
|
82
|
-
_replace 'Royal Ballet Live:', 'Royal Ballet:'
|
83
|
-
|
84
|
-
# fill out Royal Opera House
|
85
|
-
@name.match(/Royal Opera Live\: (.+) \-.+/) do |match|
|
86
|
-
@name = 'Royal Opera House: ' + match[1]
|
87
|
-
end
|
88
|
-
_replace 'Royal Opera Live:', 'Royal Opera House:'
|
89
|
-
|
90
|
-
_replace 'RSC Live:', 'Royal Shakespeare Company:'
|
91
|
-
_remove '(Encore Performance)' # remove rsc-style encore
|
92
|
-
|
93
|
-
_remove ' Theatre Series' # West End
|
94
|
-
|
95
|
-
self
|
96
|
-
end
|
97
|
-
|
98
|
-
def strip_and_squeeze
|
99
|
-
@name = @name.strip.squeeze(' ')
|
100
|
-
self
|
101
|
-
end
|
102
|
-
|
103
|
-
def title_case
|
104
|
-
@name = CineworldUk::Internal::Titleize.titleize(@name)
|
105
|
-
self
|
106
|
-
end
|
107
|
-
|
108
|
-
private
|
109
|
-
|
110
|
-
def _remove(match)
|
111
|
-
@name = @name.gsub(match, '')
|
112
|
-
end
|
113
|
-
|
114
|
-
def _replace(match, replacement)
|
115
|
-
@name = @name.gsub(match, replacement)
|
116
|
-
end
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
@@ -1,87 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
module CineworldUk
|
3
|
-
# Internal utility classes: Do not use
|
4
|
-
# @api private
|
5
|
-
module Internal
|
6
|
-
# @note Modified from titleize gem
|
7
|
-
# https://github.com/granth/titleize
|
8
|
-
module Titleize
|
9
|
-
# List of words not to capitalize unless they lead a phrase
|
10
|
-
WORDS = %w(a an and as at but by en for if in of on or the to via vs vs.)
|
11
|
-
|
12
|
-
extend self
|
13
|
-
|
14
|
-
# Capitalizes most words to create a nicer looking title string.
|
15
|
-
#
|
16
|
-
# The list of "small words" which are not capped comes from
|
17
|
-
# the New York Times Manual of Style, plus 'vs'.
|
18
|
-
#
|
19
|
-
# Also capitalises roman numerals
|
20
|
-
#
|
21
|
-
# "notes on a scandal" # => "Notes on a Scandal"
|
22
|
-
# "ghostbusters ii" # => "Ghostbusters II"
|
23
|
-
#
|
24
|
-
# @param [String] title a chunk of html
|
25
|
-
# @return [String]
|
26
|
-
def titleize(title)
|
27
|
-
title = title.dup
|
28
|
-
title.downcase! unless title[/[[:lower:]]/] # assume all-caps fixing
|
29
|
-
|
30
|
-
phrases(title).map do |phrase|
|
31
|
-
words = phrase.split
|
32
|
-
words.map do |word|
|
33
|
-
def word.capitalize
|
34
|
-
# like String#capitalize, but it starts with the first letter
|
35
|
-
sub(/[[:alpha:]].*/) { |subword| subword.capitalize }
|
36
|
-
end
|
37
|
-
|
38
|
-
case word
|
39
|
-
when /[[:alpha:]]\.[[:alpha:]]/ # words with dots in
|
40
|
-
word
|
41
|
-
when /[-‑]/ # hyphenated word (regular and non-breaking)
|
42
|
-
word.split(/([-‑])/).map do |part|
|
43
|
-
WORDS.include?(part) ? part : part.capitalize
|
44
|
-
end.join
|
45
|
-
when /^[[:alpha:]].*[[:upper:]]/ # non-first letter capitalized
|
46
|
-
word
|
47
|
-
when /^[[:digit:]]/ # first character is a number
|
48
|
-
word
|
49
|
-
when /^(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i
|
50
|
-
word.upcase
|
51
|
-
when words.first, words.last
|
52
|
-
word.capitalize
|
53
|
-
when *(WORDS + WORDS.map { |small| small.capitalize })
|
54
|
-
word.downcase
|
55
|
-
else
|
56
|
-
word.capitalize
|
57
|
-
end
|
58
|
-
end.join(' ')
|
59
|
-
end.join(' ')
|
60
|
-
end
|
61
|
-
|
62
|
-
# Splits a title into an array based on punctuation.
|
63
|
-
# @param [String] title Film title
|
64
|
-
# @return [Array<String>]
|
65
|
-
#
|
66
|
-
# Titleize.phrases("simple title")
|
67
|
-
# #=> ["simple title"]
|
68
|
-
# Titleize.phrases("more complicated: titling")
|
69
|
-
# #=> ["more complicated:", "titling"]
|
70
|
-
# Titleize.phrases("even more: complicated - titling")
|
71
|
-
# #=> ["even more:", "complicated -", "titling"]
|
72
|
-
def phrases(title)
|
73
|
-
phrases = title.scan(/.+?(?:[-:.;?!] |$)/).map { |phrase| phrase.strip }
|
74
|
-
|
75
|
-
# rejoin phrases that were split on the '.' from a small word
|
76
|
-
if phrases.size > 1
|
77
|
-
phrases[0..-2].each_with_index do |phrase, index|
|
78
|
-
next unless WORDS.include?(phrase.split.last.downcase)
|
79
|
-
phrases[index] << ' ' + phrases.slice!(index + 1)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
phrases
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
@@ -1,42 +0,0 @@
|
|
1
|
-
require_relative '../../../test_helper'
|
2
|
-
|
3
|
-
describe CineworldUk::Internal::NameParser do
|
4
|
-
|
5
|
-
describe '#standardize' do
|
6
|
-
subject { CineworldUk::Internal::NameParser.new(film_name).standardize }
|
7
|
-
|
8
|
-
[
|
9
|
-
['Rita, Sue and Bob Too', 'Rita, Sue and Bob Too', 'words with "and"'],
|
10
|
-
['Rita, Sue & Bob Too', 'Rita, Sue and Bob Too', 'words with "&"'],
|
11
|
-
['Rita, Sue & Bob Too', 'Rita, Sue and Bob Too', 'words with HTML "&"'],
|
12
|
-
['Cowboys and Aliens', 'Cowboys & Aliens', '"and" as the last but one word'],
|
13
|
-
['Cowboys & Aliens', 'Cowboys & Aliens', 'HTML "&" as the last but one word'],
|
14
|
-
['star wars: episode IV - A new hope', 'Star Wars: Episode IV - A New Hope', 'titleize'],
|
15
|
-
['star wars: episode v - the empire strikes back', 'Star Wars: Episode V - The Empire Strikes Back', 'titleize'],
|
16
|
-
['2 fast 2 furious', '2 Fast 2 Furious', 'titleize'],
|
17
|
-
['Geethanjali (Malayalam)', 'Geethanjali', 'Indian language removal'],
|
18
|
-
['Arrambam (Tamil)', 'Arrambam', 'Indian language removal'],
|
19
|
-
['Take 2 Thursday - About Time', 'About Time', 'remove "Take 2" prefix'],
|
20
|
-
['Autism Friendly Screening: Cloudy With A Chance Of Meatballs 2', 'Cloudy With a Chance of Meatballs 2', 'autism friendly'],
|
21
|
-
['Bolshoi Ballet Live - Lost Illusions', 'Bolshoi: Lost Illusions', 'bolshoi'],
|
22
|
-
['NT Live: War Horse', 'National Theatre: War Horse', 'NT'],
|
23
|
-
['Frankenstein (with Jonny Lee Miller as the Creature) - NT 50th Anniversary encore', 'National Theatre: Frankenstein (With Jonny Lee Miller as the Creature)', 'NT 50th'],
|
24
|
-
['MET Opera - Falstaff - 14/12/2013', 'Met Opera: Falstaff', 'Met Opera with date'],
|
25
|
-
['Royal Ballet Live: The Sleeping Beauty - 19/03/14', 'Royal Ballet: The Sleeping Beauty', 'royal ballet'],
|
26
|
-
['Royal Opera Live: Parsifal - Wednesday 18 Dec 2013', 'Royal Opera House: Parsifal', 'royal opera'],
|
27
|
-
['RSC Live: Richard II (Encore Performance)', 'Royal Shakespeare Company: Richard II', 'rsc'],
|
28
|
-
["West End Theatre Series: Noel Coward's Private Lives", "West End: Noel Coward's Private Lives", 'west end'],
|
29
|
-
["Raiders of\n the Lost Ark", 'Raiders of the Lost Ark', 'New lines']
|
30
|
-
|
31
|
-
].each do |test_case|
|
32
|
-
|
33
|
-
describe test_case[2] do
|
34
|
-
let(:film_name) { test_case[0] }
|
35
|
-
it 'returns standardized title' do
|
36
|
-
subject.must_equal test_case[1]
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,50 +0,0 @@
|
|
1
|
-
require_relative '../../../test_helper'
|
2
|
-
|
3
|
-
describe CineworldUk::Internal::Titleize do
|
4
|
-
|
5
|
-
describe '#titleize(name)' do
|
6
|
-
subject { CineworldUk::Internal::Titleize.titleize(string) }
|
7
|
-
|
8
|
-
[
|
9
|
-
[
|
10
|
-
'star wars: episode iv - a new hope',
|
11
|
-
'Star Wars: Episode IV - A New Hope'
|
12
|
-
],
|
13
|
-
[
|
14
|
-
'star wars: episode v - the empire strikes back',
|
15
|
-
'Star Wars: Episode V - The Empire Strikes Back'
|
16
|
-
],
|
17
|
-
['2 fast 2 furious', '2 Fast 2 Furious'],
|
18
|
-
['saw iv', 'Saw IV'],
|
19
|
-
['fast & Furious 6', 'Fast & Furious 6'],
|
20
|
-
['fast & Furious vi', 'Fast & Furious VI']
|
21
|
-
].each do |test_case|
|
22
|
-
|
23
|
-
describe test_case[2] do
|
24
|
-
let(:string) { test_case[0] }
|
25
|
-
it 'returns titlecase' do
|
26
|
-
subject.must_equal test_case[1]
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
describe '#phrases(name)' do
|
33
|
-
subject { CineworldUk::Internal::Titleize.phrases(string) }
|
34
|
-
|
35
|
-
[
|
36
|
-
[
|
37
|
-
'star wars: episode iv - a new hope',
|
38
|
-
['star wars:', 'episode iv -', 'a new hope']
|
39
|
-
]
|
40
|
-
].each do |test_case|
|
41
|
-
|
42
|
-
describe test_case[0] do
|
43
|
-
let(:string) { test_case[0] }
|
44
|
-
it 'splits the name' do
|
45
|
-
subject.must_equal test_case[1]
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|