omelete 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,8 @@
1
+ class Movie
2
+ attr_accessor :omelete_id, :name, :runtime, :genre, :age_rating, :image
3
+ attr_accessor :cast, :directed_by, :release_date, :synopsis
4
+
5
+ def initialize(omelete_id, name)
6
+ self.omelete_id, self.name = omelete_id, name
7
+ end
8
+ end
@@ -0,0 +1,7 @@
1
+ class MovieTheater
2
+ attr_accessor :omelete_id, :name, :values
3
+
4
+ def initialize(omelete_id, name, values)
5
+ self.omelete_id, self.name, self.values = omelete_id, name, values
6
+ end
7
+ end
@@ -0,0 +1,17 @@
1
+ class Showtime
2
+ attr_accessor :omelete_movie_id, :obs, :kind, :time, :theater
3
+
4
+ def initialize(omelete_movie_id, obs, kind, time, theater)
5
+ self.omelete_movie_id, self.obs, self.kind, self.time, self.theater = omelete_movie_id, pretty_obs(obs), kind, time, theater
6
+ end
7
+
8
+ def pretty_obs(obs)
9
+ if obs
10
+ while obs.match(/\.[A-Z] | feriado[A-Z]/) do
11
+ obs.insert(obs.index(/\.[A-Z]/) + 1, "\n") if obs.match(/\.[A-Z]/)
12
+ obs.insert(obs.index(/feriado[A-Z]/) + 7, "\n") if obs.match(/feriado[A-Z]/)
13
+ end
14
+ end
15
+ obs
16
+ end
17
+ end
@@ -0,0 +1,7 @@
1
+ class Theater
2
+ attr_accessor :name
3
+
4
+ def initialize(name)
5
+ self.name = name
6
+ end
7
+ end
@@ -0,0 +1,39 @@
1
+ # -*- encoding : utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require File.expand_path("../../../lib/omelete", __FILE__)
6
+ require File.expand_path("../../../lib/model/movie", __FILE__)
7
+
8
+ module Omelete
9
+ class MovieAgent
10
+
11
+ def initialize(omelete_id)
12
+ @client = Client.new
13
+ @page_doc = @client.page_doc("cinema/#{omelete_id}")
14
+ end
15
+
16
+ def movies
17
+ movies=[]
18
+ omelete_id, name, runtime, genre, age_rating, image = nil, nil, nil, nil, nil, nil
19
+ @page_doc.css("div[class='programacao_filme']").each do |m|
20
+ omelete_id = m.css("div[class='programacao_filme_desc'] h2").css("a")[0]
21
+ omelete_id = omelete_id ? omelete_id["href"][/\d+/] : nil
22
+ name = m.css("div[class='programacao_filme_desc'] h2 a").text
23
+ movie = Movie.new(omelete_id, name)
24
+
25
+ image = m.css("div[class='programacao_filme_poster']").css("a")[0]
26
+ genre = m.css("div[class='programacao_filme_desc'] h4").text.split(" - ")[0]
27
+ movie.runtime = m.css("div[class='programacao_filme_desc'] h4").text.split(" - ")[1]
28
+ age_rating = m.css("div[class='programacao_filme_desc'] h4").text.split(" - ")[2]
29
+
30
+ movie.image = image ? image["href"] : nil
31
+ movie.age_rating = age_rating ? age_rating[/\d+/] : nil
32
+
33
+ movies << movie
34
+ end
35
+ movies
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding : utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require File.expand_path("../../../lib/omelete", __FILE__)
6
+ require File.expand_path("../../../lib/model/movie", __FILE__)
7
+
8
+ module Omelete
9
+ class MovieDetailsAgent
10
+
11
+ def initialize(movie_id)
12
+ @client = Client.new
13
+ @id = movie_id
14
+ @page_doc = @client.page_doc("filme/#{movie_id}")
15
+ end
16
+
17
+ def movie
18
+ synopsis, cast, directed_by = nil, nil, nil
19
+ movie = Movie.new @id, nil
20
+ movie.synopsis = @page_doc.css("div#tab_#{@id}_sinopse blockquote").text
21
+ @page_doc.css("div#tab_#{@id}_ficha dl").children.each do |child|
22
+ if child.node_name == "dd" && child.previous_element && child.previous_element.content == "Direção:"
23
+ movie.directed_by = child.content.delete("\r\n")
24
+ elsif child.node_name == "dd" && child.previous_element && child.previous_element.content == "Elenco:"
25
+ movie.cast = child.content.delete("\r\n")
26
+ end
27
+ end
28
+ movie
29
+ end
30
+
31
+ end
32
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding : utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require File.expand_path("../../../lib/omelete", __FILE__)
6
+ require File.expand_path("../../../lib/model/movie_theater", __FILE__)
7
+
8
+ module Omelete
9
+ class MovieTheaterAgent
10
+
11
+ def initialize(omelete_id)
12
+ client = Client.new
13
+ @id = omelete_id
14
+ @page_doc = client.page_doc("cinema/#{@id}")
15
+ end
16
+
17
+ def movie_theater
18
+ name = @page_doc.css("div[class='programacao_cinema'] h2 a").text
19
+ values = @page_doc.css("div#tab_#{@id}_ingressos dl dd").text
20
+ @movie_theater = MovieTheater.new @id, name, values
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,37 @@
1
+ # -*- encoding : utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require File.expand_path("../../../lib/omelete", __FILE__)
6
+ require File.expand_path("../../../lib/model/showtime", __FILE__)
7
+
8
+ module Omelete
9
+ class ShowtimeAgent
10
+
11
+ def initialize(omelete_id)
12
+ client = Client.new
13
+ @id = omelete_id
14
+ @page_doc = client.page_doc("cinema/#{@id}")
15
+ end
16
+
17
+ def showtimes
18
+ showtimes = []
19
+ obs, kind, time, theater = nil, nil, nil, nil
20
+ @page_doc.css("div[class='programacao_horarios']").each do |div|
21
+ omelete_movie_id = div.css("p a")[0]["href"][/\d+/]
22
+ div.css("table tr td").each_with_index do |td, i|
23
+ obs = td.text if td.attribute("colspan")
24
+ unless td.attribute("colspan")
25
+ theater = td.text if i == 0
26
+ kind = td.text if i == 1
27
+ time = td.text if i == 2
28
+ end
29
+ end
30
+
31
+ showtimes << Showtime.new(omelete_movie_id, obs, kind, time, theater)
32
+ end
33
+ showtimes
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ # -*- encoding : utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require File.expand_path("../../../lib/omelete", __FILE__)
6
+ require File.expand_path("../../../lib/model/theater", __FILE__)
7
+
8
+ module Omelete
9
+ class TheaterAgent
10
+
11
+ def initialize(omelete_id)
12
+ client = Client.new
13
+ @id = omelete_id
14
+ @page_doc = client.page_doc("cinema/#{@id}")
15
+ end
16
+
17
+ def theaters
18
+ theaters = []
19
+ @page_doc.css("div[class='programacao_horarios'] table tr td").each do |td|
20
+ if td.text.match(/^[0-9]{1,}$/)
21
+ @theater = Theater.new td.text
22
+ theaters << @theater
23
+ end
24
+ end
25
+ theaters
26
+ end
27
+
28
+ end
29
+ end
data/lib/omelete.rb CHANGED
@@ -2,21 +2,21 @@
2
2
  #!/usr/bin/env ruby
3
3
  require 'nokogiri'
4
4
  require 'open-uri'
5
- require File.expand_path('../omelete/movie_page', __FILE__)
6
-
7
5
 
8
6
  module Omelete
9
7
  class Client
10
8
 
11
- def initialize(state,city)
12
- @state=state
13
- @city=city
14
- @movie_page = MoviePage.new(@state,@city)
15
- end
9
+ HOME_URL = "http://omelete.uol.com.br/filmes-em-cartaz/"
10
+
11
+ def page_doc(complement)
12
+ begin
13
+ uri = URI.escape(HOME_URL + complement)
14
+ @page_doc = Nokogiri::HTML(open(uri))
15
+ @page_doc
16
+ rescue SocketError => e
17
+ raise e.message
18
+ end
19
+ end
16
20
 
17
- def movies
18
- @movie_page.movies if @movie_page
19
- end
20
-
21
21
  end
22
22
  end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Omelete
2
- VERSION = "1.0.0"
2
+ VERSION = "2.0.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omelete
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-24 00:00:00.000000000 Z
12
+ date: 2012-10-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -28,14 +28,14 @@ dependencies:
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
30
  - !ruby/object:Gem::Dependency
31
- name: mechanize
31
+ name: rspec
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  none: false
34
34
  requirements:
35
35
  - - ! '>='
36
36
  - !ruby/object:Gem::Version
37
37
  version: '0'
38
- type: :runtime
38
+ type: :development
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  none: false
@@ -44,21 +44,21 @@ dependencies:
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
46
  - !ruby/object:Gem::Dependency
47
- name: rspec
47
+ name: mocha
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  none: false
50
50
  requirements:
51
- - - ~>
51
+ - - ! '>='
52
52
  - !ruby/object:Gem::Version
53
- version: '2.7'
53
+ version: '0'
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  none: false
58
58
  requirements:
59
- - - ~>
59
+ - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
- version: '2.7'
61
+ version: '0'
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: vcr
64
64
  requirement: !ruby/object:Gem::Requirement
@@ -80,17 +80,17 @@ dependencies:
80
80
  requirement: !ruby/object:Gem::Requirement
81
81
  none: false
82
82
  requirements:
83
- - - ~>
83
+ - - ! '>='
84
84
  - !ruby/object:Gem::Version
85
- version: 1.8.0
85
+ version: '0'
86
86
  type: :development
87
87
  prerelease: false
88
88
  version_requirements: !ruby/object:Gem::Requirement
89
89
  none: false
90
90
  requirements:
91
- - - ~>
91
+ - - ! '>='
92
92
  - !ruby/object:Gem::Version
93
- version: 1.8.0
93
+ version: '0'
94
94
  - !ruby/object:Gem::Dependency
95
95
  name: rake
96
96
  requirement: !ruby/object:Gem::Requirement
@@ -114,10 +114,15 @@ executables: []
114
114
  extensions: []
115
115
  extra_rdoc_files: []
116
116
  files:
117
- - lib/omelete/detailed_page.rb
118
- - lib/omelete/models/movie.rb
119
- - lib/omelete/models/showtime.rb
120
- - lib/omelete/movie_page.rb
117
+ - lib/model/movie.rb
118
+ - lib/model/movie_theater.rb
119
+ - lib/model/showtime.rb
120
+ - lib/model/theater.rb
121
+ - lib/omelete/movie_agent.rb
122
+ - lib/omelete/movie_details_agent.rb
123
+ - lib/omelete/movie_theater_agent.rb
124
+ - lib/omelete/showtime_agent.rb
125
+ - lib/omelete/theater_agent.rb
121
126
  - lib/omelete.rb
122
127
  - lib/version.rb
123
128
  homepage: http://github.com/mvoto/omelete
@@ -140,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
145
  version: '0'
141
146
  requirements: []
142
147
  rubyforge_project:
143
- rubygems_version: 1.8.23
148
+ rubygems_version: 1.8.24
144
149
  signing_key:
145
150
  specification_version: 3
146
151
  summary: Ruby web crawler to access omelete informations
@@ -1,107 +0,0 @@
1
- # -*- encoding : utf-8 -*-
2
- #!/usr/bin/env ruby
3
- $:.unshift File.expand_path('../models', __FILE__)
4
- require 'rubygems'
5
- require 'open-uri'
6
- require 'nokogiri'
7
- require 'mechanize'
8
- require 'movie'
9
- require 'showtime'
10
-
11
- module Omelete
12
- class DetailedPage
13
-
14
- def initialize(page,link)
15
- @page = page.link_with(:href => link).click
16
- end
17
-
18
- def movie_poi(movie)
19
- movie.movie_theaters = movie_theaters(movie) unless @page.search('//div[@class = "programacao_cinema"]').first.nil?
20
- movie.showtimes = showtimes(movie) unless @page.search('//div[@class = "programacao_cinema"]').first.nil?
21
- movie.synopsis = synopsis(movie) unless @page.search("//div[@id = \"tab_#{movie.id}_sinopse\"]/blockquote").first.nil?
22
- unless @page.search("//div[@id = \"tab_#{movie.id}_ficha\"]/dl/dd").first.nil?
23
- movie.cast = cast(movie)
24
- movie.directed_by = directed_by(movie)
25
- end
26
- movie
27
- end
28
-
29
- def synopsis(movie)
30
- @page.search("//div[@id = \"tab_#{movie.id}_sinopse\"]/blockquote").first.content
31
- end
32
-
33
- def cast(movie)
34
- @page.search("//div[@id = \"tab_#{movie.id}_ficha\"]/dl/dd").first.content.delete("\r\n")
35
- end
36
-
37
- def directed_by(movie)
38
- directed_by = ""
39
- @page.search("//div[@id = \"tab_#{movie.id}_ficha\"]/dl").first.children.each do |child|
40
- directed_by = child.content if child.node_name == "dd" && child.previous_element && child.previous_element.content == "Direção:"
41
- end
42
- directed_by.delete("\r\n")
43
- end
44
-
45
- def movie_theaters(movie)
46
- movie_theaters = []
47
- @page.search('//div[@class = "programacao_cinema"]/h2/a').each do |movie_theater|
48
- movie_theaters << movie_theater.content if movie_theater
49
- end
50
- movie_theaters
51
- end
52
-
53
- def showtimes(movie)
54
- showtimes = []
55
- mt = ""
56
- obs = ""
57
- @page.parser.xpath('//div[@id="content-left"]/div[@class="grid_8"]/div').each do |div_child|
58
- # if div_child.node_name == "div"
59
- mt = div_child.search('h2').first.text.gsub("\n", "").strip if div_child.attribute("class").value == "programacao_cinema"
60
- if div_child.attribute("class").value == "programacao_horarios"
61
- div_child.search('tr').each do |tr_doc|
62
- unless tr_doc.content.include?("Sala")
63
- obs = div_child.search('td[@colspan="3"]').first.content if div_child.search('td[@colspan="3"]').first
64
- sat = ShowtimeAndTheater.new tr_doc
65
- showtimes << sat.create_showtime_with(mt,movie,obs)
66
- end
67
- end
68
- end
69
- # end
70
- end
71
- showtimes
72
- end
73
-
74
- end
75
-
76
- class ShowtimeAndTheater
77
-
78
- def initialize(doc)
79
- @doc = doc
80
- end
81
-
82
- def create_showtime_with(movie_theater,movie,observ)
83
- unless @doc.search('td').first.next_element.nil?
84
- showtime = Showtime.new(nil,nil,nil,movie,nil,movie_theater)
85
- showtime.time = time
86
- showtime.theater = theater
87
- showtime.kind = kind
88
- showtime.obs = observ
89
- # p "#{showtime.movie_theater} - #{showtime.theater} - #{showtime.movie.name} - #{showtime.time} - #{showtime.obs}"
90
- end
91
- showtime
92
- end
93
-
94
- def time
95
- @doc.search('td').last.content
96
- end
97
-
98
- def kind
99
- @doc.search('td').first.next_element.content
100
- end
101
-
102
- def theater
103
- @doc.search('td').first.content
104
- end
105
-
106
- end
107
- end
@@ -1,13 +0,0 @@
1
- # -*- encoding : utf-8 -*-
2
- module Omelete
3
- class Movie
4
-
5
- attr_accessor :id, :name, :status, :runtime, :cast, :genre, :directed_by, :city, :state
6
- attr_accessor :age_rating, :synopsis, :image, :movie_theaters, :showtimes
7
-
8
- def initialize(id, name)
9
- @id = id
10
- @name = name
11
- end
12
- end
13
- end
@@ -1,16 +0,0 @@
1
- # -*- encoding : utf-8 -*-
2
- module Omelete
3
- class Showtime
4
-
5
- attr_accessor :time, :obs, :movie, :theater, :kind, :movie_theater
6
-
7
- def initialize(time, obs, kind, movie, theater, movie_theater)
8
- @time = time
9
- @obs = obs
10
- @kind = kind
11
- @movie = movie
12
- @theater = theater
13
- @movie_theater = movie_theater
14
- end
15
- end
16
- end
@@ -1,50 +0,0 @@
1
- # -*- encoding : utf-8 -*-
2
- #!/usr/bin/env ruby
3
- $:.unshift File.expand_path('../', __FILE__)
4
- $:.unshift File.expand_path('../models', __FILE__)
5
- require 'rubygems'
6
- require 'open-uri'
7
- require 'nokogiri'
8
- require 'mechanize'
9
- require 'movie'
10
- require 'detailed_page'
11
-
12
- module Omelete
13
- class MoviePage
14
- URL = "http://omelete.uol.com.br/filmes-em-cartaz"
15
- def initialize(state,city)
16
- @state = state
17
- @city = city
18
- @agent = Mechanize.new{|agent| agent.user_agent_alias = 'Mac Safari'}
19
- url=URL + "?uf=#{@state}&cidade=#{@city}"
20
- @agent.get(URI.escape(url)){ |page| @page = page }
21
- end
22
-
23
- def movies
24
- @movies = []
25
- @page.search('//div[@class = "programacao_filme"]').each do |movie_doc|
26
- link = movie_doc.search('p').first.search('a').first.attr('href')
27
- id = id_from(link)
28
- return @movies if movie_doc.search('h2').first.search('a').first.nil?
29
- name = movie_doc.search('h2').first.search('a').first.content.strip
30
- movie = Movie.new(id,name)
31
-
32
- return @movies if movie_doc.search('h4').first.nil?
33
- info = movie_doc.search('h4').first.content.strip.split('-')
34
- movie.genre = info[0]
35
- movie.runtime = info[1]
36
- movie.age_rating = info[2]
37
- movie.image = movie_doc.search('div').first.search('a').first.attr('href').strip unless movie_doc.search('div').first.search('a').first.nil?
38
-
39
- dp = DetailedPage.new(@page, link)
40
- @movies << dp.movie_poi(movie)
41
- end
42
- @movies
43
- end
44
-
45
- def id_from(link)
46
- link.match(/\d+/)[0]
47
- end
48
-
49
- end
50
- end