google_movies47 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +85 -0
- data/Rakefile +5 -0
- data/google_movies47.gemspec +25 -0
- data/lib/google_movies47.rb +9 -0
- data/lib/google_movies47/crawler.rb +57 -0
- data/lib/google_movies47/genre_parser.rb +36 -0
- data/lib/google_movies47/genres/en.txt +28 -0
- data/lib/google_movies47/genres/es.txt +26 -0
- data/lib/google_movies47/language_parser.rb +26 -0
- data/lib/google_movies47/languages/de.txt +63 -0
- data/lib/google_movies47/languages/en.txt +63 -0
- data/lib/google_movies47/languages/es.txt +76 -0
- data/lib/google_movies47/languages/fr.txt +63 -0
- data/lib/google_movies47/languages/it.txt +63 -0
- data/lib/google_movies47/languages/pt.txt +63 -0
- data/lib/google_movies47/parser.rb +62 -0
- data/lib/google_movies47/version.rb +3 -0
- data/spec/fixtures/movies_bsas.html +21 -0
- data/spec/fixtures/movies_bsas_1.txt +33 -0
- data/spec/fixtures/movies_bsas_2.txt +33 -0
- data/spec/fixtures/movies_bsas_3.txt +33 -0
- data/spec/fixtures/movies_bsas_4.txt +33 -0
- data/spec/fixtures/movies_bsas_5.txt +33 -0
- data/spec/fixtures/movies_bsas_6.txt +33 -0
- data/spec/genre_parser_spec.rb +38 -0
- data/spec/google_movies47_spec.rb +139 -0
- data/spec/language_parser_spec.rb +34 -0
- data/spec/spec_helper.rb +7 -0
- metadata +185 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Christian Hein
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
# MovieShowTimes
|
2
|
+
|
3
|
+
Get Movies Show Times for all theaters near a given location.
|
4
|
+
|
5
|
+
The gem crawls and parses Google Movies pages.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'movie_show_times'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install movie_show_times
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
* <tt>:city</tt> (required string) name of city, town, location
|
24
|
+
* <tt>:state</tt> (required string) name of state or country (If inside USA, use country if outside USA)
|
25
|
+
* <tt>:language</tt> (optional string) language code, like en, es, de, fr.
|
26
|
+
* <tt>:days\_ahead</tt> (optional integer) number of days after today for which to get show times
|
27
|
+
|
28
|
+
A location is required to initialize the crawling. By default, it gets show times for the current date in English.
|
29
|
+
|
30
|
+
movieShowTimes = MovieShowTimes::Crawler.new({ :city => 'Buenos Aires', :state => 'Argentina' })
|
31
|
+
|
32
|
+
theater = movieShowTimes.theaters['Monumental']
|
33
|
+
puts theater # => { :name => 'Monumental', :info => 'Lavalle 780, Buenos Aires, Argentina - 0-11-4393-9008',
|
34
|
+
:movies => [ ... ]
|
35
|
+
}
|
36
|
+
|
37
|
+
puts theater[:movies][0] # => { :name => 'Titanic 3D',
|
38
|
+
:info => { :duration => 10814,
|
39
|
+
:language => 'English',
|
40
|
+
:genre => 'Action/Adventure/Drama'
|
41
|
+
}
|
42
|
+
:times => ['1:30pm', '5:30pm', '9:30pm', '1:00am']
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
You can retrieve movie show times for following days. There is no definition for how far in the future will this information be available, so use with caution as it may be the cause for retrieving no show times at all.
|
47
|
+
|
48
|
+
movieShowTimes = MovieShowTimes::Crawler.new({ :city => 'Buenos Aires', :state => 'Argentina',
|
49
|
+
:days_ahead => 2
|
50
|
+
})
|
51
|
+
|
52
|
+
Getting show times info in Spanish:
|
53
|
+
|
54
|
+
movieShowTimes = MovieShowTimes::Crawler.new({
|
55
|
+
:city => 'Buenos Aires',
|
56
|
+
:state => 'Argentina',
|
57
|
+
:language => 'es'
|
58
|
+
})
|
59
|
+
|
60
|
+
puts theater[:movies][0] # => { :name => 'Titanic 3D',
|
61
|
+
:info => {
|
62
|
+
:duration => 10814,
|
63
|
+
:language => 'English',
|
64
|
+
:genre => 'Acción/Aventura/Drama'
|
65
|
+
}
|
66
|
+
:times => ['13:30', '17:30', '21:30', '01:00']
|
67
|
+
}
|
68
|
+
|
69
|
+
## TODO
|
70
|
+
|
71
|
+
Improve API
|
72
|
+
|
73
|
+
Include more movie info from other data sources
|
74
|
+
|
75
|
+
## Contributing
|
76
|
+
|
77
|
+
1. Fork it
|
78
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
79
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
80
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
81
|
+
5. Create new Pull Request
|
82
|
+
|
83
|
+
## License
|
84
|
+
|
85
|
+
Copyright © 2012 Christian Hein, released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/google_movies47/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Christian Hein","Jacob Williams"]
|
6
|
+
gem.email = ["chrishein@gmail.com","ponyboy47@gmail.com"]
|
7
|
+
gem.description = %q{Get Movie Show Times by Location}
|
8
|
+
gem.summary = %q{Google Movies crawler and parser}
|
9
|
+
gem.homepage = "https://github.com/Ponyboy47/movie_show_times"
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "google_movies47"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = GoogleMovies47::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency 'mechanize'
|
19
|
+
gem.add_dependency 'nokogiri'
|
20
|
+
gem.add_dependency 'chronic_duration'
|
21
|
+
|
22
|
+
gem.add_development_dependency 'rspec'
|
23
|
+
gem.add_development_dependency 'rake'
|
24
|
+
gem.add_development_dependency 'fakeweb'
|
25
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'mechanize'
|
3
|
+
require 'cgi'
|
4
|
+
|
5
|
+
module GoogleMovies47
|
6
|
+
class Crawler
|
7
|
+
|
8
|
+
const_set("MissingLocationArgument", Class.new(StandardError))
|
9
|
+
const_set("WrongDaysAheadArgument", Class.new(StandardError))
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
|
13
|
+
raise MissingLocationArgument unless options[:city] and options[:state]
|
14
|
+
|
15
|
+
language = options[:language] || 'en'
|
16
|
+
|
17
|
+
days_ahead = options[:days_ahead] || 0
|
18
|
+
raise WrongDaysAheadArgument unless days_ahead.kind_of? Integer and 0 >= days_ahead
|
19
|
+
|
20
|
+
@parser = GoogleMovies47::Parser.new(language)
|
21
|
+
|
22
|
+
search_url = "http://www.google.com/movies?hl=#{language}" \
|
23
|
+
"&near=#{CGI.escape(options[:city])}+#{CGI.escape(options[:state])}&date=#{days_ahead}"
|
24
|
+
|
25
|
+
@agent = Mechanize.new
|
26
|
+
page = @agent.get(search_url)
|
27
|
+
|
28
|
+
crawl_result_pages(page)
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def movies
|
33
|
+
@parser.movies
|
34
|
+
end
|
35
|
+
|
36
|
+
def theaters
|
37
|
+
@parser.theaters
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def crawl_result_pages(page)
|
43
|
+
result_pages = []
|
44
|
+
|
45
|
+
doc = page.parser
|
46
|
+
result_pages << doc
|
47
|
+
|
48
|
+
doc.xpath("//div[@id='navbar']/table//tr/td[not(@class='b')]/a").each do |p|
|
49
|
+
result_pages << @agent.get(p['href']).parser
|
50
|
+
end
|
51
|
+
|
52
|
+
result_pages.each do |rp|
|
53
|
+
@parser.parse_show_times(rp)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module GoogleMovies47
|
4
|
+
class GenreParser
|
5
|
+
|
6
|
+
def initialize(language = 'en')
|
7
|
+
@language = language
|
8
|
+
terms = nil
|
9
|
+
|
10
|
+
begin
|
11
|
+
genres_file_contents = File.open("#{File.dirname(__FILE__)}/genres/#{language}.txt").readlines.map(&:chomp)
|
12
|
+
terms = genres_file_contents.join('|')
|
13
|
+
rescue Errno::ENOENT
|
14
|
+
end
|
15
|
+
@regular_expression = Regexp.new("(#{terms})", 'i') unless terms.nil?
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse(info_line)
|
19
|
+
if @regular_expression
|
20
|
+
parts = info_line.split(' - ')
|
21
|
+
parts.each do |p|
|
22
|
+
return p unless @regular_expression.match(p).nil?
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
nil
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
=begin
|
34
|
+
i.match("-+[^-]+-")
|
35
|
+
|
36
|
+
=end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
Action
|
2
|
+
Adventure
|
3
|
+
Animation
|
4
|
+
Biography
|
5
|
+
Comedy
|
6
|
+
Crime
|
7
|
+
Documentary
|
8
|
+
Drama
|
9
|
+
Family
|
10
|
+
Fantasy
|
11
|
+
Film-Noir
|
12
|
+
Game-Show
|
13
|
+
History
|
14
|
+
Horror
|
15
|
+
Music
|
16
|
+
Musical
|
17
|
+
Mystery
|
18
|
+
News
|
19
|
+
Reality-TV
|
20
|
+
Romance
|
21
|
+
Sci-Fi
|
22
|
+
Scifi
|
23
|
+
Sport
|
24
|
+
Suspense
|
25
|
+
Talk-Show
|
26
|
+
Thriller
|
27
|
+
War
|
28
|
+
Western
|
@@ -0,0 +1,26 @@
|
|
1
|
+
acción
|
2
|
+
aventura
|
3
|
+
animación
|
4
|
+
biografía
|
5
|
+
comedia
|
6
|
+
crimen
|
7
|
+
documental
|
8
|
+
drama
|
9
|
+
familia
|
10
|
+
fantasía
|
11
|
+
Cine negro
|
12
|
+
historia
|
13
|
+
horror
|
14
|
+
música
|
15
|
+
musical
|
16
|
+
misterio
|
17
|
+
noticias
|
18
|
+
reality
|
19
|
+
romance
|
20
|
+
ciencia ficción
|
21
|
+
scifi
|
22
|
+
deporte
|
23
|
+
suspenso
|
24
|
+
Talk-Show
|
25
|
+
guerra
|
26
|
+
western
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module GoogleMovies47
|
4
|
+
class LanguageParser
|
5
|
+
|
6
|
+
def initialize(language = 'en')
|
7
|
+
@language = language
|
8
|
+
language_names = nil
|
9
|
+
|
10
|
+
begin
|
11
|
+
lang_file_contents = File.open("#{File.dirname(__FILE__)}/languages/#{language}.txt").readlines.map(&:chomp)
|
12
|
+
language_names = lang_file_contents.join('|')
|
13
|
+
rescue Errno::ENOENT
|
14
|
+
end
|
15
|
+
@language_regular_expression = Regexp.new("(#{language_names})", 'i') unless language_names.nil?
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse(info_line)
|
19
|
+
if @language_regular_expression
|
20
|
+
matches = @language_regular_expression.match(info_line)
|
21
|
+
return matches[0].capitalize unless matches.nil?
|
22
|
+
end
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
Afrikaans
|
2
|
+
Albanisch
|
3
|
+
Arabisch
|
4
|
+
Azerbaijani
|
5
|
+
Baskische
|
6
|
+
Bengali
|
7
|
+
belarussischen
|
8
|
+
Bulgarisch
|
9
|
+
Katalanisch
|
10
|
+
Chinesisch
|
11
|
+
kroatisch
|
12
|
+
Tschechisch
|
13
|
+
Dänisch
|
14
|
+
Holländer
|
15
|
+
Englisch
|
16
|
+
Esperanto
|
17
|
+
Estnisch
|
18
|
+
Filipino
|
19
|
+
Finnisch
|
20
|
+
Französisch
|
21
|
+
galizischen
|
22
|
+
Georgier
|
23
|
+
Deutsch
|
24
|
+
Griechisch
|
25
|
+
Gujarati
|
26
|
+
Haitian Creole
|
27
|
+
Hebräisch
|
28
|
+
Hindi
|
29
|
+
Ungarisch
|
30
|
+
Isländisch
|
31
|
+
Indonesier
|
32
|
+
Irisch
|
33
|
+
Italienisch
|
34
|
+
Japanisch
|
35
|
+
Kannada
|
36
|
+
Koreanisch
|
37
|
+
Latein
|
38
|
+
Lettisch
|
39
|
+
Litauisch
|
40
|
+
Mazedonier
|
41
|
+
malaiisch
|
42
|
+
Malteser
|
43
|
+
Norwegisch
|
44
|
+
Persisch
|
45
|
+
polnisch
|
46
|
+
Portugiesisch
|
47
|
+
Rumänisch
|
48
|
+
Russisch
|
49
|
+
serbisch
|
50
|
+
Slowakisch
|
51
|
+
Slowenisch
|
52
|
+
Spanisch
|
53
|
+
Swahili
|
54
|
+
Schwedisch
|
55
|
+
Tamilisch
|
56
|
+
Telugu
|
57
|
+
thailändisch
|
58
|
+
Türkisch
|
59
|
+
Ukrainisch
|
60
|
+
Urdu
|
61
|
+
Vietnamesisch
|
62
|
+
Walisisch
|
63
|
+
Jiddisch
|
@@ -0,0 +1,63 @@
|
|
1
|
+
Afrikaans
|
2
|
+
Albanian
|
3
|
+
Arabic
|
4
|
+
Azerbaijani
|
5
|
+
Basque
|
6
|
+
Bengali
|
7
|
+
Belarusian
|
8
|
+
Bulgarian
|
9
|
+
Catalan
|
10
|
+
Chinese
|
11
|
+
Croatian
|
12
|
+
Czech
|
13
|
+
Danish
|
14
|
+
Dutch
|
15
|
+
English
|
16
|
+
Esperanto
|
17
|
+
Estonian
|
18
|
+
Filipino
|
19
|
+
Finnish
|
20
|
+
French
|
21
|
+
Galician
|
22
|
+
Georgian
|
23
|
+
German
|
24
|
+
Greek
|
25
|
+
Gujarati
|
26
|
+
Haitian Creole
|
27
|
+
Hebrew
|
28
|
+
Hindi
|
29
|
+
Hungarian
|
30
|
+
Icelandic
|
31
|
+
Indonesian
|
32
|
+
Irish
|
33
|
+
Italian
|
34
|
+
Japanese
|
35
|
+
Kannada
|
36
|
+
Korean
|
37
|
+
Latin
|
38
|
+
Latvian
|
39
|
+
Lithuanian
|
40
|
+
Macedonian
|
41
|
+
Malay
|
42
|
+
Maltese
|
43
|
+
Norwegian
|
44
|
+
Persian
|
45
|
+
Polish
|
46
|
+
Portuguese
|
47
|
+
Romanian
|
48
|
+
Russian
|
49
|
+
Serbian
|
50
|
+
Slovak
|
51
|
+
Slovenian
|
52
|
+
Spanish
|
53
|
+
Swahili
|
54
|
+
Swedish
|
55
|
+
Tamil
|
56
|
+
Telugu
|
57
|
+
Thai
|
58
|
+
Turkish
|
59
|
+
Ukrainian
|
60
|
+
Urdu
|
61
|
+
Vietnamese
|
62
|
+
Welsh
|
63
|
+
Yiddish
|