RubyGems - cineworld_uk - Versions diffs - 1.0.3 → 1.0.4 - Mend

cineworld_uk 1.0.3 → 1.0.4

Files changed (24) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 965975013c473f998ec09dc724265a7e96d88e62
+  data.tar.gz: d9c259cf94a9b63e7c412e6fb1ee32eead4b7f87
+SHA512:
+  metadata.gz: 3f8c6957d9f6caf687d1a13867565c7d8983cfcc5130264bc0d2d38f56e43249ff181fea4185e9369a2abd60ecadb888ba091ca89d118b145ece2726a6c8c962
+  data.tar.gz: 6ebb510ad7628f1289daa05f768c48ea0c53eda60ea6ab6377073a073112c743d59c065afee69c6022cde5e98b0a295bb419fa0f3e64ade42fad8dfb53918f44

data/.travis.yml CHANGED Viewed

@@ -2,3 +2,4 @@ language: ruby
 rvm:
   - 1.9.3
   - 2.0.0
+  - 2.1.0

data/Rakefile CHANGED Viewed

@@ -12,4 +12,13 @@ Rake::TestTask.new do |t|
   t.verbose = true
 end
+# http://erniemiller.org/2014/02/05/7-lines-every-gems-rakefile-should-have/
+task :console do
+  require 'irb'
+  require 'irb/completion'
+  require 'cineworld_uk'
+  ARGV.clear
+  IRB.start
+end
 task :default => :test

data/lib/cineworld_uk/internal/film_with_screenings_parser.rb CHANGED Viewed

@@ -15,40 +15,7 @@ module CineworldUk
       # The film name
       # @return [String]
       def film_name
-        name = original_name
-        # screening types
-        name = name.gsub 'Take 2 Thursday - ', '' # take 2 thursday
-        name = name.gsub 'Autism Friendly Screening: ', '' # remove autism friendly
-        # bollywood - remove language from film name
-        name = name.gsub ' (Malayalam)', ''
-        name = name.gsub ' (Tamil)', ''
-        # special screenings
-        name = name.gsub 'Bolshoi Ballet Live -', 'Bolshoi:' # bolshoi ballet
-        if name.match /\- NT .+ encore/
-          name = 'National Theatre: ' + name.gsub(/\- NT .+ encore/, '')
-        end
-        name = name.gsub 'MET Opera -', 'Met Opera:' # fill out Met Opera
-        name = name.gsub 'NT Live:', 'National Theatre:' # National theatre
-        name = name.gsub 'Royal Ballet Live:', 'Royal Ballet:' # Royal Ballet
-        # fill out Royal Opera House
-        if pure_name_match = name.match(/Royal Opera Live\: (.+) \-.+/)
-          name = 'Royal Opera House: ' + pure_name_match[1]
-        end
-        name = name.gsub 'Royal Opera Live:', 'Royal Opera House:'
-        name = name.gsub '(Encore Performance)', '' # remove rsc-style encore
-        name = name.gsub 'RSC Live:', 'Royal Shakespeare Company:' # globe
-        name = name.gsub /\- \d{1,2}\/\d{1,2}\/\d{2,4}/, '' # remove dates
-        name = name.gsub /\- \d{1,2}\/\d{1,2}\/\d{2,4}/, '' # remove dates
-        name = name.gsub /\n/, '' # remove newlines
-        name = name.gsub /\A\s+/, '' # remove leading spaces
-        name = name.gsub /\s+\z/, '' # remove trailing spaces
-        name = name.squeeze(' ') # spaces compressed
+        NameParser.new(original_name).standardize
       end
       # Showings

data/lib/cineworld_uk/internal/name_parser.rb ADDED Viewed

@@ -0,0 +1,119 @@
+require 'titleize'
+module CineworldUk
+  # Internal utility classes: Do not use
+  # @api private
+  module Internal
+    # Parses a string to derive a standardized movie title
+    class NameParser
+      attr_reader :original_name
+      def initialize(name)
+        @original_name = name
+        @name = name
+      end
+      # Process the name and return the final string
+      # @return [String]
+      def standardize
+        strip_and_squeeze.
+          ampersands_into_text.
+          into_ampersand_if_second_to_last.
+          remove_indian_languages.
+          remove_screening_details.
+          replace_non_film_prefix.
+          remove_newlines.
+          remove_dates.
+          title_case
+        to_s
+      end
+      # The processed name
+      # @return [String]
+      def to_s
+        @name
+      end
+      protected
+      def ampersands_into_text
+        _replace(/\s(\&amp;|\&)\s/, ' and ')
+        self
+      end
+      def into_ampersand_if_second_to_last
+        _replace(/\s(and)\s(\w+)\z/, ' & \2')
+        self
+      end
+      def remove_indian_languages
+        languages = ['Malayalam', 'Tamil']
+        _remove(/\((#{languages*'|'})\)/i)
+        self
+      end
+      def remove_screening_details
+        _remove 'Take 2 Thursday - '
+        _remove 'Autism Friendly Screening: '
+        self
+      end
+      def remove_dates
+        _remove(/\-? \d{1,2}\/\d{1,2}\/\d{2,4}/)
+        self
+      end
+      def remove_newlines
+        _remove(/\n/)
+        self
+      end
+      def replace_non_film_prefix
+        _replace 'Bolshoi Ballet Live -', 'Bolshoi:'
+        @name = 'National Theatre: ' + @name.gsub(/\- NT .+ encore/, '') if @name.match /\- NT .+ encore/
+        _replace 'NT Live:', 'National Theatre:'
+        _replace 'MET Opera -', 'Met Opera:'
+        _replace 'Royal Ballet Live:', 'Royal Ballet:'
+        # fill out Royal Opera House
+        if pure_name_match = @name.match(/Royal Opera Live\: (.+) \-.+/)
+          @name = 'Royal Opera House: ' + pure_name_match[1]
+        end
+        _replace 'Royal Opera Live:', 'Royal Opera House:'
+        _replace 'RSC Live:', 'Royal Shakespeare Company:'
+        _remove '(Encore Performance)' # remove rsc-style encore
+        _remove ' Theatre Series' # West End
+        self
+      end
+      def strip_and_squeeze
+        @name = @name.strip.squeeze(' ')
+        self
+      end
+      def title_case
+        @name = CineworldUk::Internal::Titleize.titleize(@name)
+        self
+      end
+      private
+      def _remove(match)
+        @name = @name.gsub(match, '')
+      end
+      def _replace(match, replacement)
+        @name = @name.gsub(match, replacement)
+      end
+    end
+  end
+end

data/lib/cineworld_uk/internal/titleize.rb ADDED Viewed

@@ -0,0 +1,87 @@
+module CineworldUk
+  # Internal utility classes: Do not use
+  # @api private
+  module Internal
+    # @note Modified from titleize gem
+    #   https://github.com/granth/titleize
+    module Titleize
+      # List of words not to capitalize unless they lead a phrase
+      SMALL_WORDS = %w{a an and as at but by en for if in of on or the to via vs vs.}
+      extend self
+      # Capitalizes most words to create a nicer looking title string.
+      #
+      # The list of "small words" which are not capped comes from
+      # the New York Times Manual of Style, plus 'vs'.
+      #
+      # Also capitalises roman numerals
+      #
+      #   "notes on a scandal" # => "Notes on a Scandal"
+      #   "ghostbusters ii"    # => "Ghostbusters II"
+      #
+      # @param [String] title a chunk of html
+      # @return [String]
+      def titleize(title)
+        title = title.dup
+        title.downcase! unless title[/[[:lower:]]/]  # assume all-caps need fixing
+        phrases(title).map do |phrase|
+          words = phrase.split
+          words.map do |word|
+            def word.capitalize
+              # like String#capitalize, but it starts with the first letter
+              self.sub(/[[:alpha:]].*/) {|subword| subword.capitalize}
+            end
+            case word
+            when /[[:alpha:]]\.[[:alpha:]]/  # words with dots in, like "example.com"
+              word
+            when /[-‑]/  # hyphenated word (regular and non-breaking)
+              word.split(/([-‑])/).map do |part|
+                SMALL_WORDS.include?(part) ? part : part.capitalize
+              end.join
+            when /^[[:alpha:]].*[[:upper:]]/ # non-first letter capitalized already
+              word
+            when /^[[:digit:]]/  # first character is a number
+              word
+            when /^(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i
+              word.upcase
+            when words.first, words.last
+              word.capitalize
+            when *(SMALL_WORDS + SMALL_WORDS.map {|small| small.capitalize })
+              word.downcase
+            else
+              word.capitalize
+            end
+          end.join(" ")
+        end.join(" ")
+      end
+      # Splits a title into an array based on punctuation.
+      # @param [String] title Film title
+      # @return [Array<String>]
+      #
+      #   "simple title"                     # => ["simple title"]
+      #   "more complicated: titling"        # => ["more complicated:", "titling"]
+      #   "even more: complicated - titling" # => ["even more:", "complicated -", "titling"]
+      def phrases(title)
+        phrases = title.scan(/.+?(?:[-:.;?!] |$)/).map {|phrase| phrase.strip }
+        # rejoin phrases that were split on the '.' from a small word
+        if phrases.size > 1
+          phrases[0..-2].each_with_index do |phrase, index|
+            if SMALL_WORDS.include?(phrase.split.last.downcase)
+              phrases[index] << " " + phrases.slice!(index + 1)
+            end
+          end
+        end
+        phrases
+      end
+    end
+  end
+end

data/lib/cineworld_uk/version.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ruby interface for http://www.cineworld.co.uk
-# @version 1.0.3
+# @version 1.0.4
 module CineworldUk
   # Gem version
-  VERSION = "1.0.3"
+  VERSION = "1.0.4"
 end

data/lib/cineworld_uk.rb CHANGED Viewed

@@ -6,6 +6,8 @@ require 'tzinfo/data'
 require_relative './cineworld_uk/version'
 require_relative './cineworld_uk/internal/film_with_screenings_parser'
+require_relative './cineworld_uk/internal/name_parser'
+require_relative './cineworld_uk/internal/titleize'
 require_relative './cineworld_uk/cinema'
 require_relative './cineworld_uk/film'

data/test/lib/cineworld_uk/internal/film_with_screenings_parser_test.rb CHANGED Viewed

@@ -12,102 +12,6 @@ describe CineworldUk::Internal::FilmWithScreeningsParser do
         subject.must_equal('Gravity')
       end
     end
-    describe 'passed valid film html with take 2 name prefix' do
-      let(:film_html) { read_film_html('brighton/take-2-thursday-about-time') }
-      it 'returns the film name' do
-        subject.must_equal('About Time')
-      end
-    end
-    describe 'passed valid film html with autism friendly name prefix' do
-      let(:film_html) { read_film_html('brighton/autism-friendly-cloudy-2') }
-      it 'returns the film name' do
-        subject.must_equal('Cloudy With A Chance Of Meatballs 2')
-      end
-    end
-    describe 'passed valid film html with malayalam language suffix' do
-      let(:film_html) { read_film_html('brighton/geethanjali-malayalam') }
-      it 'returns the film name' do
-        subject.must_equal('Geethanjali')
-      end
-    end
-    describe 'passed valid film html with tamil language suffix' do
-      let(:film_html) { read_film_html('wandsworth/arrambam-tamil') }
-      it 'returns the film name' do
-        subject.must_equal('Arrambam')
-      end
-    end
-    describe 'passed valid film html with bolshoi live' do
-      let(:film_html) { read_film_html('wandsworth/bolshoi-ballet-live-lost-illusions') }
-      it 'returns the film name' do
-        subject.must_equal('Bolshoi: Lost Illusions')
-      end
-    end
-    describe 'passed valid film html with NT 50th encore' do
-      let(:film_html) { read_film_html('wandsworth/frankenstein-nt-50th') }
-      it 'returns the film name' do
-        subject.must_equal('National Theatre: Frankenstein (with Jonny Lee Miller as the Creature)')
-      end
-    end
-    describe 'passed valid film html with met opera and date' do
-      let(:film_html) { read_film_html('wandsworth/met-opera-falstaff') }
-      it 'returns the film name' do
-        subject.must_equal('Met Opera: Falstaff')
-      end
-    end
-    describe 'passed valid film html with nt live' do
-      let(:film_html) { read_film_html('wandsworth/nt-live-war-horse') }
-      it 'returns the film name' do
-        subject.must_equal('National Theatre: War Horse')
-      end
-    end
-    describe 'passed valid film html with ballet live' do
-      let(:film_html) { read_film_html('wandsworth/royal-ballet-live-the-sleeping-beauty') }
-      it 'returns the film name' do
-        subject.must_equal('Royal Ballet: The Sleeping Beauty')
-      end
-    end
-    describe 'passed valid film html with royal opera house and weird date' do
-      let(:film_html) { read_film_html('wandsworth/royal-opera-live-parsifal-weird-date') }
-      it 'returns the film name' do
-        subject.must_equal('Royal Opera House: Parsifal')
-      end
-    end
-    describe 'passed valid film html with RSC and encore' do
-      let(:film_html) { read_film_html('wandsworth/rsc-live-richard-ii-encore') }
-      it 'returns the film name' do
-        subject.must_equal('Royal Shakespeare Company: Richard II')
-      end
-    end
-    describe 'passed valid film html with West End Theatre' do
-      let(:film_html) { read_film_html('wandsworth/west-end-theatre-series-private-lives') }
-      it 'returns the film name' do
-        subject.must_equal("West End Theatre Series: Noel Coward's Private Lives")
-      end
-    end
   end
   describe '#showings' do

data/test/lib/cineworld_uk/internal/name_parser_test.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require_relative '../../../test_helper'
+describe CineworldUk::Internal::NameParser do
+  describe '#standardize' do
+    subject { CineworldUk::Internal::NameParser.new(film_name).standardize }
+    [
+      ['Rita, Sue and Bob Too', 'Rita, Sue and Bob Too', 'words with "and"'],
+      ['Rita, Sue & Bob Too', 'Rita, Sue and Bob Too', 'words with "&"'],
+      ['Rita, Sue &amp; Bob Too', 'Rita, Sue and Bob Too', 'words with HTML "&"'],
+      ['Cowboys and Aliens', 'Cowboys & Aliens', '"and" as the last but one word'],
+      ['Cowboys &amp; Aliens', 'Cowboys & Aliens', 'HTML "&" as the last but one word'],
+      ['star wars: episode IV - A new hope', 'Star Wars: Episode IV - A New Hope', 'titleize'],
+      ['star wars: episode v - the empire strikes back', 'Star Wars: Episode V - The Empire Strikes Back', 'titleize'],
+      ['2 fast 2 furious', '2 Fast 2 Furious', 'titleize'],
+      ['Geethanjali (Malayalam)', 'Geethanjali', 'Indian language removal'],
+      ['Arrambam (Tamil)', 'Arrambam', 'Indian language removal'],
+      ['Take 2 Thursday - About Time', 'About Time', 'remove "Take 2" prefix'],
+      ['Autism Friendly Screening: Cloudy With A Chance Of Meatballs 2', 'Cloudy With a Chance of Meatballs 2', 'autism friendly'],
+      ['Bolshoi Ballet Live - Lost Illusions', 'Bolshoi: Lost Illusions', 'bolshoi'],
+      ['NT Live: War Horse', 'National Theatre: War Horse', 'NT'],
+      ['Frankenstein (with Jonny Lee Miller as the Creature) - NT 50th Anniversary encore', 'National Theatre: Frankenstein (With Jonny Lee Miller as the Creature)', 'NT 50th'],
+      ['MET Opera - Falstaff - 14/12/2013', 'Met Opera: Falstaff', 'Met Opera with date'],
+      ['Royal Ballet Live: The Sleeping Beauty - 19/03/14', 'Royal Ballet: The Sleeping Beauty', 'royal ballet'],
+      ['Royal Opera Live: Parsifal - Wednesday 18 Dec 2013', 'Royal Opera House: Parsifal', 'royal opera'],
+      ['RSC Live: Richard II (Encore Performance)', 'Royal Shakespeare Company: Richard II', 'rsc'],
+      ["West End Theatre Series: Noel Coward's Private Lives", "West End: Noel Coward's Private Lives", 'west end'],
+      ["Raiders of\n the Lost Ark", 'Raiders of the Lost Ark', 'New lines']
+    ].each do |test_case|
+      describe test_case[2] do
+        let(:film_name) { test_case[0] }
+        it 'returns standardized title' do
+          subject.must_equal test_case[1]
+        end
+      end
+    end
+  end
+end

data/test/lib/cineworld_uk/internal/titleize_test.rb ADDED Viewed

@@ -0,0 +1,43 @@
+require_relative '../../../test_helper'
+describe CineworldUk::Internal::Titleize do
+  describe '#titleize(name)' do
+    subject { CineworldUk::Internal::Titleize.titleize(string) }
+    [
+      ['star wars: episode iv - a new hope', 'Star Wars: Episode IV - A New Hope'],
+      ['star wars: episode v - the empire strikes back', 'Star Wars: Episode V - The Empire Strikes Back'],
+      ['2 fast 2 furious', '2 Fast 2 Furious'],
+      ['saw iv', 'Saw IV'],
+      ['fast & Furious 6', 'Fast & Furious 6'],
+      ['fast & Furious vi', 'Fast & Furious VI']
+    ].each do |test_case|
+      describe test_case[2] do
+        let(:string) { test_case[0] }
+        it 'returns titlecase' do
+          subject.must_equal test_case[1]
+        end
+      end
+    end
+  end
+  describe '#phrases(name)' do
+    subject { CineworldUk::Internal::Titleize.phrases(string) }
+    [
+      ['star wars: episode iv - a new hope',['star wars:','episode iv -','a new hope']],
+    ].each do |test_case|
+      describe test_case[0] do
+        let(:string) { test_case[0] }
+        it 'splits the name' do
+          subject.must_equal test_case[1]
+        end
+      end
+    end
+  end
+end