google_movies47 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in movie_show_times.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Christian Hein
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,85 @@
1
+ # MovieShowTimes
2
+
3
+ Get Movies Show Times for all theaters near a given location.
4
+
5
+ The gem crawls and parses Google Movies pages.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'movie_show_times'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install movie_show_times
20
+
21
+ ## Usage
22
+
23
+ * <tt>:city</tt> (required string) name of city, town, location
24
+ * <tt>:state</tt> (required string) name of state or country (If inside USA, use country if outside USA)
25
+ * <tt>:language</tt> (optional string) language code, like en, es, de, fr.
26
+ * <tt>:days\_ahead</tt> (optional integer) number of days after today for which to get show times
27
+
28
+ A location is required to initialize the crawling. By default, it gets show times for the current date in English.
29
+
30
+ movieShowTimes = MovieShowTimes::Crawler.new({ :city => 'Buenos Aires', :state => 'Argentina' })
31
+
32
+ theater = movieShowTimes.theaters['Monumental']
33
+ puts theater # => { :name => 'Monumental', :info => 'Lavalle 780, Buenos Aires, Argentina - 0-11-4393-9008',
34
+ :movies => [ ... ]
35
+ }
36
+
37
+ puts theater[:movies][0] # => { :name => 'Titanic 3D',
38
+ :info => { :duration => 10814,
39
+ :language => 'English',
40
+ :genre => 'Action/Adventure/Drama'
41
+ }
42
+ :times => ['1:30pm', '5:30pm', '9:30pm', '1:00am']
43
+ }
44
+
45
+
46
+ You can retrieve movie show times for following days. There is no definition for how far in the future will this information be available, so use with caution as it may be the cause for retrieving no show times at all.
47
+
48
+ movieShowTimes = MovieShowTimes::Crawler.new({ :city => 'Buenos Aires', :state => 'Argentina',
49
+ :days_ahead => 2
50
+ })
51
+
52
+ Getting show times info in Spanish:
53
+
54
+ movieShowTimes = MovieShowTimes::Crawler.new({
55
+ :city => 'Buenos Aires',
56
+ :state => 'Argentina',
57
+ :language => 'es'
58
+ })
59
+
60
+ puts theater[:movies][0] # => { :name => 'Titanic 3D',
61
+ :info => {
62
+ :duration => 10814,
63
+ :language => 'English',
64
+ :genre => 'Acción/Aventura/Drama'
65
+ }
66
+ :times => ['13:30', '17:30', '21:30', '01:00']
67
+ }
68
+
69
+ ## TODO
70
+
71
+ Improve API
72
+
73
+ Include more movie info from other data sources
74
+
75
+ ## Contributing
76
+
77
+ 1. Fork it
78
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
79
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
80
+ 4. Push to the branch (`git push origin my-new-feature`)
81
+ 5. Create new Pull Request
82
+
83
+ ## License
84
+
85
+ Copyright © 2012 Christian Hein, released under the MIT license
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new('spec')
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/google_movies47/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Christian Hein","Jacob Williams"]
6
+ gem.email = ["chrishein@gmail.com","ponyboy47@gmail.com"]
7
+ gem.description = %q{Get Movie Show Times by Location}
8
+ gem.summary = %q{Google Movies crawler and parser}
9
+ gem.homepage = "https://github.com/Ponyboy47/movie_show_times"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "google_movies47"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = GoogleMovies47::VERSION
17
+
18
+ gem.add_dependency 'mechanize'
19
+ gem.add_dependency 'nokogiri'
20
+ gem.add_dependency 'chronic_duration'
21
+
22
+ gem.add_development_dependency 'rspec'
23
+ gem.add_development_dependency 'rake'
24
+ gem.add_development_dependency 'fakeweb'
25
+ end
@@ -0,0 +1,9 @@
1
+ require "google_movies47/version"
2
+ require 'google_movies47/crawler'
3
+ require 'google_movies47/parser'
4
+ require 'google_movies47/language_parser'
5
+ require 'google_movies47/genre_parser'
6
+
7
+ module GoogleMovies47
8
+
9
+ end
@@ -0,0 +1,57 @@
1
+ # encoding: UTF-8
2
+ require 'mechanize'
3
+ require 'cgi'
4
+
5
+ module GoogleMovies47
6
+ class Crawler
7
+
8
+ const_set("MissingLocationArgument", Class.new(StandardError))
9
+ const_set("WrongDaysAheadArgument", Class.new(StandardError))
10
+
11
+ def initialize(options = {})
12
+
13
+ raise MissingLocationArgument unless options[:city] and options[:state]
14
+
15
+ language = options[:language] || 'en'
16
+
17
+ days_ahead = options[:days_ahead] || 0
18
+ raise WrongDaysAheadArgument unless days_ahead.kind_of? Integer and 0 >= days_ahead
19
+
20
+ @parser = GoogleMovies47::Parser.new(language)
21
+
22
+ search_url = "http://www.google.com/movies?hl=#{language}" \
23
+ "&near=#{CGI.escape(options[:city])}+#{CGI.escape(options[:state])}&date=#{days_ahead}"
24
+
25
+ @agent = Mechanize.new
26
+ page = @agent.get(search_url)
27
+
28
+ crawl_result_pages(page)
29
+
30
+ end
31
+
32
+ def movies
33
+ @parser.movies
34
+ end
35
+
36
+ def theaters
37
+ @parser.theaters
38
+ end
39
+
40
+ private
41
+
42
+ def crawl_result_pages(page)
43
+ result_pages = []
44
+
45
+ doc = page.parser
46
+ result_pages << doc
47
+
48
+ doc.xpath("//div[@id='navbar']/table//tr/td[not(@class='b')]/a").each do |p|
49
+ result_pages << @agent.get(p['href']).parser
50
+ end
51
+
52
+ result_pages.each do |rp|
53
+ @parser.parse_show_times(rp)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+
3
+ module GoogleMovies47
4
+ class GenreParser
5
+
6
+ def initialize(language = 'en')
7
+ @language = language
8
+ terms = nil
9
+
10
+ begin
11
+ genres_file_contents = File.open("#{File.dirname(__FILE__)}/genres/#{language}.txt").readlines.map(&:chomp)
12
+ terms = genres_file_contents.join('|')
13
+ rescue Errno::ENOENT
14
+ end
15
+ @regular_expression = Regexp.new("(#{terms})", 'i') unless terms.nil?
16
+ end
17
+
18
+ def parse(info_line)
19
+ if @regular_expression
20
+ parts = info_line.split(' - ')
21
+ parts.each do |p|
22
+ return p unless @regular_expression.match(p).nil?
23
+ end
24
+ end
25
+
26
+ nil
27
+
28
+ end
29
+ end
30
+
31
+ end
32
+
33
+ =begin
34
+ i.match("-+[^-]+-")
35
+
36
+ =end
@@ -0,0 +1,28 @@
1
+ Action
2
+ Adventure
3
+ Animation
4
+ Biography
5
+ Comedy
6
+ Crime
7
+ Documentary
8
+ Drama
9
+ Family
10
+ Fantasy
11
+ Film-Noir
12
+ Game-Show
13
+ History
14
+ Horror
15
+ Music
16
+ Musical
17
+ Mystery
18
+ News
19
+ Reality-TV
20
+ Romance
21
+ Sci-Fi
22
+ Scifi
23
+ Sport
24
+ Suspense
25
+ Talk-Show
26
+ Thriller
27
+ War
28
+ Western
@@ -0,0 +1,26 @@
1
+ acción
2
+ aventura
3
+ animación
4
+ biografía
5
+ comedia
6
+ crimen
7
+ documental
8
+ drama
9
+ familia
10
+ fantasía
11
+ Cine negro
12
+ historia
13
+ horror
14
+ música
15
+ musical
16
+ misterio
17
+ noticias
18
+ reality
19
+ romance
20
+ ciencia ficción
21
+ scifi
22
+ deporte
23
+ suspenso
24
+ Talk-Show
25
+ guerra
26
+ western
@@ -0,0 +1,26 @@
1
+ # encoding: UTF-8
2
+
3
+ module GoogleMovies47
4
+ class LanguageParser
5
+
6
+ def initialize(language = 'en')
7
+ @language = language
8
+ language_names = nil
9
+
10
+ begin
11
+ lang_file_contents = File.open("#{File.dirname(__FILE__)}/languages/#{language}.txt").readlines.map(&:chomp)
12
+ language_names = lang_file_contents.join('|')
13
+ rescue Errno::ENOENT
14
+ end
15
+ @language_regular_expression = Regexp.new("(#{language_names})", 'i') unless language_names.nil?
16
+ end
17
+
18
+ def parse(info_line)
19
+ if @language_regular_expression
20
+ matches = @language_regular_expression.match(info_line)
21
+ return matches[0].capitalize unless matches.nil?
22
+ end
23
+ nil
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,63 @@
1
+ Afrikaans
2
+ Albanisch
3
+ Arabisch
4
+ Azerbaijani
5
+ Baskische
6
+ Bengali
7
+ belarussischen
8
+ Bulgarisch
9
+ Katalanisch
10
+ Chinesisch
11
+ kroatisch
12
+ Tschechisch
13
+ Dänisch
14
+ Holländer
15
+ Englisch
16
+ Esperanto
17
+ Estnisch
18
+ Filipino
19
+ Finnisch
20
+ Französisch
21
+ galizischen
22
+ Georgier
23
+ Deutsch
24
+ Griechisch
25
+ Gujarati
26
+ Haitian Creole
27
+ Hebräisch
28
+ Hindi
29
+ Ungarisch
30
+ Isländisch
31
+ Indonesier
32
+ Irisch
33
+ Italienisch
34
+ Japanisch
35
+ Kannada
36
+ Koreanisch
37
+ Latein
38
+ Lettisch
39
+ Litauisch
40
+ Mazedonier
41
+ malaiisch
42
+ Malteser
43
+ Norwegisch
44
+ Persisch
45
+ polnisch
46
+ Portugiesisch
47
+ Rumänisch
48
+ Russisch
49
+ serbisch
50
+ Slowakisch
51
+ Slowenisch
52
+ Spanisch
53
+ Swahili
54
+ Schwedisch
55
+ Tamilisch
56
+ Telugu
57
+ thailändisch
58
+ Türkisch
59
+ Ukrainisch
60
+ Urdu
61
+ Vietnamesisch
62
+ Walisisch
63
+ Jiddisch
@@ -0,0 +1,63 @@
1
+ Afrikaans
2
+ Albanian
3
+ Arabic
4
+ Azerbaijani
5
+ Basque
6
+ Bengali
7
+ Belarusian
8
+ Bulgarian
9
+ Catalan
10
+ Chinese
11
+ Croatian
12
+ Czech
13
+ Danish
14
+ Dutch
15
+ English
16
+ Esperanto
17
+ Estonian
18
+ Filipino
19
+ Finnish
20
+ French
21
+ Galician
22
+ Georgian
23
+ German
24
+ Greek
25
+ Gujarati
26
+ Haitian Creole
27
+ Hebrew
28
+ Hindi
29
+ Hungarian
30
+ Icelandic
31
+ Indonesian
32
+ Irish
33
+ Italian
34
+ Japanese
35
+ Kannada
36
+ Korean
37
+ Latin
38
+ Latvian
39
+ Lithuanian
40
+ Macedonian
41
+ Malay
42
+ Maltese
43
+ Norwegian
44
+ Persian
45
+ Polish
46
+ Portuguese
47
+ Romanian
48
+ Russian
49
+ Serbian
50
+ Slovak
51
+ Slovenian
52
+ Spanish
53
+ Swahili
54
+ Swedish
55
+ Tamil
56
+ Telugu
57
+ Thai
58
+ Turkish
59
+ Ukrainian
60
+ Urdu
61
+ Vietnamese
62
+ Welsh
63
+ Yiddish