concurso_hub 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Application
4
+ module Ports
5
+ class ConcursoRepository
6
+ def fetch_abertos
7
+ raise NotImplementedError, "#{self.class}#fetch_abertos deve ser implementado"
8
+ end
9
+
10
+ def fetch_encerrados(busca)
11
+ raise NotImplementedError, "#{self.class}#fetch_encerrados deve ser implementado"
12
+ end
13
+
14
+ def fetch_edital(url)
15
+ raise NotImplementedError, "#{self.class}#fetch_edital deve ser implementado"
16
+ end
17
+
18
+ def fetch_provas_listing(provas_url)
19
+ raise NotImplementedError, "#{self.class}#fetch_provas_listing deve ser implementado"
20
+ end
21
+
22
+ def fetch_prova_pdfs(download_url)
23
+ raise NotImplementedError, "#{self.class}#fetch_prova_pdfs deve ser implementado"
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Application
4
+ module Ports
5
+ class FileDownloader
6
+ def download(url, dest_path)
7
+ raise NotImplementedError, "#{self.class}#download deve ser implementado"
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Application
4
+ module Ports
5
+ class Presenter
6
+ def show_loading
7
+ raise NotImplementedError, "#{self.class}#show_loading deve ser implementado"
8
+ end
9
+
10
+ def show(concursos, metadata: {})
11
+ raise NotImplementedError, "#{self.class}#show deve ser implementado"
12
+ end
13
+
14
+ def error(message)
15
+ raise NotImplementedError, "#{self.class}#error deve ser implementado"
16
+ end
17
+
18
+ def show_edital(edital)
19
+ raise NotImplementedError, "#{self.class}#show_edital deve ser implementado"
20
+ end
21
+
22
+ def show_download_start(titulo, index, total)
23
+ raise NotImplementedError, "#{self.class}#show_download_start deve ser implementado"
24
+ end
25
+
26
+ def show_download_done(paths)
27
+ raise NotImplementedError, "#{self.class}#show_download_done deve ser implementado"
28
+ end
29
+
30
+ def show_provas(provas)
31
+ raise NotImplementedError, "#{self.class}#show_provas deve ser implementado"
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+ require_relative '../baixar_edital_request'
5
+ require_relative '../ports/concurso_repository'
6
+ require_relative '../ports/file_downloader'
7
+ require_relative '../ports/presenter'
8
+
9
+ module Application
10
+ module UseCases
11
+ class BaixarEdital
12
+ def initialize(repository:, downloader:, presenter:)
13
+ @repository = repository
14
+ @downloader = downloader
15
+ @presenter = presenter
16
+ end
17
+
18
+ def execute(request)
19
+ edital = @repository.fetch_edital(request.url)
20
+
21
+ if edital.pdfs.empty?
22
+ @presenter.error("Nenhum PDF encontrado para este edital.")
23
+ return
24
+ end
25
+
26
+ dest_dir = request.dest_dir || File.join(Dir.pwd, 'editais')
27
+ Dir.mkdir(dest_dir) unless Dir.exist?(dest_dir)
28
+
29
+ downloaded = []
30
+ edital.pdfs.each_with_index do |pdf, index|
31
+ filename = File.basename(URI.parse(pdf[:url]).path)
32
+ dest_path = File.join(dest_dir, filename)
33
+
34
+ @presenter.show_download_start(pdf[:titulo], index + 1, edital.pdfs.size)
35
+ @downloader.download(pdf[:url], dest_path)
36
+ downloaded << dest_path
37
+ end
38
+
39
+ @presenter.show_download_done(downloaded)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+ require_relative '../baixar_provas_request'
5
+ require_relative '../ports/concurso_repository'
6
+ require_relative '../ports/file_downloader'
7
+ require_relative '../ports/presenter'
8
+
9
+ module Application
10
+ module UseCases
11
+ class BaixarProvas
12
+ def initialize(repository:, downloader:, presenter:)
13
+ @repository = repository
14
+ @downloader = downloader
15
+ @presenter = presenter
16
+ end
17
+
18
+ def execute(request)
19
+ provas = @repository.fetch_provas_listing(request.url)
20
+
21
+ if provas.empty?
22
+ @presenter.error("Nenhuma prova encontrada em: #{request.url}")
23
+ return
24
+ end
25
+
26
+ todos_pdfs = []
27
+ provas.each_with_index do |prova, i|
28
+ @presenter.show_download_start(
29
+ "Buscando provas de: #{prova[:cargo]}", i + 1, provas.size
30
+ )
31
+ pdfs = @repository.fetch_prova_pdfs(prova[:download_url])
32
+ pdfs.each { |pdf| todos_pdfs << { cargo: prova[:cargo], **pdf } }
33
+ end
34
+
35
+ if todos_pdfs.empty?
36
+ @presenter.error("Nenhum PDF de prova encontrado.")
37
+ return
38
+ end
39
+
40
+ dest_dir = request.dest_dir || File.join(Dir.pwd, 'editais')
41
+ Dir.mkdir(dest_dir) unless Dir.exist?(dest_dir)
42
+
43
+ downloaded = []
44
+ todos_pdfs.each_with_index do |pdf, i|
45
+ filename = File.basename(URI.parse(pdf[:url]).path)
46
+ dest_path = File.join(dest_dir, filename)
47
+
48
+ @presenter.show_download_start(
49
+ "#{pdf[:cargo]} — #{pdf[:titulo]}", i + 1, todos_pdfs.size
50
+ )
51
+ @downloader.download(pdf[:url], dest_path)
52
+ downloaded << dest_path
53
+ end
54
+
55
+ @presenter.show_download_done(downloaded)
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../filtros_concurso'
4
+ require_relative '../ports/concurso_repository'
5
+ require_relative '../ports/presenter'
6
+
7
+ module Application
8
+ module UseCases
9
+ class ListarConcursos
10
+ def initialize(repository:, presenter:)
11
+ @repository = repository
12
+ @presenter = presenter
13
+ end
14
+
15
+ def execute(filtros = FiltrosConcurso.new)
16
+ if filtros.encerrados?
17
+ executar_encerrados(filtros)
18
+ else
19
+ executar_abertos(filtros)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def executar_abertos(filtros)
26
+ concursos, metadata = @repository.fetch_abertos
27
+ metadata[:total_scraped] = concursos.size
28
+ metadata[:modo] = :abertos
29
+
30
+ concursos = aplicar_filtros(concursos, filtros, incluir_busca: true)
31
+ concursos = concursos.first(filtros.limite) if filtros.limite
32
+ @presenter.show(concursos, metadata: metadata)
33
+ end
34
+
35
+ def executar_encerrados(filtros)
36
+ unless filtros.busca
37
+ @presenter.error("--encerrados requer --busca TEXTO (ex: ruby main.rb --encerrados --busca policia)")
38
+ return
39
+ end
40
+
41
+ concursos, metadata = @repository.fetch_encerrados(filtros.busca)
42
+ metadata[:total_scraped] = concursos.size
43
+ metadata[:modo] = :encerrados
44
+ metadata[:busca] = filtros.busca
45
+
46
+ concursos = aplicar_filtros(concursos, filtros, incluir_busca: false)
47
+ concursos = concursos.first(filtros.limite) if filtros.limite
48
+ @presenter.show(concursos, metadata: metadata)
49
+ end
50
+
51
+ def aplicar_filtros(concursos, filtros, incluir_busca: true)
52
+ concursos = filtrar_por_estado(concursos, filtros.estado)
53
+ concursos = filtrar_por_nivel(concursos, filtros.nivel)
54
+ concursos = filtrar_por_busca(concursos, filtros.busca) if incluir_busca
55
+ concursos = filtrar_por_ano(concursos, filtros.ano)
56
+ concursos
57
+ end
58
+
59
+ def filtrar_por_estado(concursos, estado)
60
+ return concursos unless estado
61
+
62
+ concursos.select { |c| c.estado == estado }
63
+ end
64
+
65
+ def filtrar_por_nivel(concursos, nivel)
66
+ return concursos unless nivel
67
+
68
+ term = nivel.downcase
69
+ concursos.select { |c| c.nivel.downcase.include?(term) }
70
+ end
71
+
72
+ def filtrar_por_busca(concursos, busca)
73
+ return concursos unless busca
74
+
75
+ term = busca.downcase
76
+ concursos.select do |c|
77
+ c.instituicao.downcase.include?(term) ||
78
+ c.cargos.downcase.include?(term)
79
+ end
80
+ end
81
+
82
+ def filtrar_por_ano(concursos, ano)
83
+ return concursos unless ano
84
+
85
+ concursos.select { |c| c.prazo.include?(ano.to_s) }
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ver_edital_request'
4
+ require_relative '../ports/concurso_repository'
5
+ require_relative '../ports/presenter'
6
+
7
+ module Application
8
+ module UseCases
9
+ class ListarProvas
10
+ def initialize(repository:, presenter:)
11
+ @repository = repository
12
+ @presenter = presenter
13
+ end
14
+
15
+ def execute(request)
16
+ listing = @repository.fetch_provas_listing(request.url)
17
+
18
+ if listing.empty?
19
+ @presenter.error("Nenhuma prova encontrada em: #{request.url}")
20
+ return
21
+ end
22
+
23
+ resultado = listing.map do |prova|
24
+ pdfs = @repository.fetch_prova_pdfs(prova[:download_url])
25
+ { cargo: prova[:cargo], pdfs: pdfs }
26
+ end
27
+
28
+ @presenter.show_provas(resultado)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ver_edital_request'
4
+ require_relative '../ports/concurso_repository'
5
+ require_relative '../ports/presenter'
6
+
7
+ module Application
8
+ module UseCases
9
+ class VerEdital
10
+ def initialize(repository:, presenter:)
11
+ @repository = repository
12
+ @presenter = presenter
13
+ end
14
+
15
+ def execute(request)
16
+ edital = @repository.fetch_edital(request.url)
17
+ @presenter.show_edital(edital)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Application
4
+ VerEditalRequest = Struct.new(:url, keyword_init: true)
5
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Domain
4
+ module Entities
5
+ class Concurso
6
+ attr_reader :instituicao, :estado, :vagas, :salario,
7
+ :cargos, :nivel, :prazo, :url
8
+
9
+ def initialize(instituicao:, estado:, vagas:, salario:,
10
+ cargos:, nivel:, prazo:, url:)
11
+ @instituicao = instituicao
12
+ @estado = estado
13
+ @vagas = vagas
14
+ @salario = salario
15
+ @cargos = cargos
16
+ @nivel = nivel
17
+ @prazo = prazo
18
+ @url = url
19
+
20
+ freeze
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Domain
4
+ module Entities
5
+ class Edital
6
+ attr_reader :titulo, :descricao, :data_publicacao, :blocos, :pdfs, :provas_url, :url
7
+
8
+ def initialize(titulo:, descricao:, data_publicacao:, blocos:, pdfs: [], provas_url: nil, url:)
9
+ @titulo = titulo
10
+ @descricao = descricao
11
+ @data_publicacao = data_publicacao
12
+ @blocos = blocos.freeze
13
+ @pdfs = pdfs.freeze
14
+ @provas_url = provas_url
15
+ @url = url
16
+
17
+ freeze
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+ require 'uri'
5
+
6
+ module Infrastructure
7
+ module Http
8
+ class HttpClient
9
+ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
10
+ '(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
11
+
12
+ def get(url, redirect_limit: 5)
13
+ raise 'Muitos redirecionamentos' if redirect_limit.zero?
14
+
15
+ uri = URI.parse(url)
16
+ http = Net::HTTP.new(uri.host, uri.port)
17
+ http.use_ssl = (uri.scheme == 'https')
18
+ http.open_timeout = 15
19
+ http.read_timeout = 30
20
+
21
+ request = Net::HTTP::Get.new(uri.request_uri)
22
+ request['User-Agent'] = USER_AGENT
23
+ request['Accept'] = 'text/html,application/xhtml+xml'
24
+ request['Accept-Language'] = 'pt-BR,pt;q=0.9'
25
+
26
+ response = http.request(request)
27
+
28
+ case response
29
+ when Net::HTTPSuccess
30
+ body = response.body
31
+ body.force_encoding('UTF-8')
32
+ body
33
+ when Net::HTTPRedirection
34
+ get(response['location'], redirect_limit: redirect_limit - 1)
35
+ else
36
+ raise "Erro HTTP: #{response.code} #{response.message}"
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+ require 'uri'
5
+ require_relative '../../application/ports/file_downloader'
6
+
7
+ module Infrastructure
8
+ module Http
9
+ class HttpFileDownloader < Application::Ports::FileDownloader
10
+ USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
11
+ '(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
12
+
13
+ def download(url, dest_path, redirect_limit: 5)
14
+ raise 'Muitos redirecionamentos' if redirect_limit.zero?
15
+
16
+ uri = URI.parse(url)
17
+ http = Net::HTTP.new(uri.host, uri.port)
18
+ http.use_ssl = (uri.scheme == 'https')
19
+ http.open_timeout = 15
20
+ http.read_timeout = 120
21
+
22
+ request = Net::HTTP::Get.new(uri.request_uri)
23
+ request['User-Agent'] = USER_AGENT
24
+
25
+ http.start do |h|
26
+ h.request(request) do |response|
27
+ case response
28
+ when Net::HTTPSuccess
29
+ File.open(dest_path, 'wb') do |file|
30
+ response.read_body { |chunk| file.write(chunk) }
31
+ end
32
+ when Net::HTTPRedirection
33
+ new_url = response['location']
34
+ new_url = URI.join(url, new_url).to_s unless new_url.start_with?('http')
35
+ download(new_url, dest_path, redirect_limit: redirect_limit - 1)
36
+ else
37
+ raise "Erro HTTP: #{response.code} #{response.message}"
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,208 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require_relative '../../domain/entities/concurso'
5
+ require_relative '../../domain/entities/edital'
6
+
7
+ module Infrastructure
8
+ module Parsers
9
+ class PciHtmlParser
10
+ def parse_abertos(html)
11
+ doc = Nokogiri::HTML(html, nil, 'UTF-8')
12
+ [extract_abertos(doc), { total_vagas: extract_total_vagas(doc) }]
13
+ end
14
+
15
+ def parse_encerrados(html)
16
+ doc = Nokogiri::HTML(html, nil, 'UTF-8')
17
+ concursos = doc.css('div.ea').filter_map { |el| build_concurso_encerrado(el) }
18
+ [concursos, { total_vagas: '' }]
19
+ end
20
+
21
+ def parse_edital(html, url)
22
+ doc = Nokogiri::HTML(html, nil, 'UTF-8')
23
+ article = doc.css('article#noticia').first
24
+
25
+ raise "Edital não encontrado na página: #{url}" unless article
26
+
27
+ titulo = article.css('h1[itemprop="headline"]').first&.text&.strip || ''
28
+ descricao = article.css('div.description').first&.text&.strip || ''
29
+ data_raw = article.css('abbr.published').first&.[]('title') || ''
30
+ data_pub = format_date(data_raw)
31
+
32
+ body_node = article.css('div[itemprop="articleBody"]').first
33
+ blocos = body_node ? extract_body_blocks(body_node) : []
34
+
35
+ Domain::Entities::Edital.new(
36
+ titulo: titulo,
37
+ descricao: descricao,
38
+ data_publicacao: data_pub,
39
+ blocos: blocos,
40
+ pdfs: extract_pdfs(doc),
41
+ provas_url: extract_provas_url(doc),
42
+ url: url
43
+ )
44
+ end
45
+
46
+ def parse_provas_listing(html)
47
+ doc = Nokogiri::HTML(html, nil, 'UTF-8')
48
+ doc.css('a.prova_download').map do |a|
49
+ cargo = a.children.select(&:text?).map(&:text).join.strip
50
+ { cargo: cargo, download_url: a['href'] }
51
+ end
52
+ end
53
+
54
+ def parse_prova_download_page(html)
55
+ doc = Nokogiri::HTML(html, nil, 'UTF-8')
56
+ doc.css('div#download a.item-link[href$=".pdf"]')
57
+ .select { |a| a.text.strip.start_with?('Baixar') }
58
+ .map { |a| { titulo: a.text.sub(/\ABaixar\s+/i, '').strip, url: a['href'] } }
59
+ end
60
+
61
+ private
62
+
63
+ def extract_total_vagas(doc)
64
+ doc.css('h1').first&.text&.match(/[\d.]+\s*Vagas?/i)&.[](0) || ''
65
+ end
66
+
67
+ def extract_abertos(doc)
68
+ nacional = doc.css('#NACIONAL').first
69
+ return [] unless nacional
70
+
71
+ concursos = []
72
+ current_state = 'NACIONAL'
73
+
74
+ nacional.parent.children.each do |child|
75
+ next unless child.element?
76
+
77
+ case child['class']&.strip
78
+ when 'ua'
79
+ current_state = child['id'] || 'NACIONAL'
80
+ when 'da', 'na'
81
+ entry = build_concurso(child, current_state)
82
+ concursos << entry if entry
83
+ end
84
+ end
85
+
86
+ concursos
87
+ end
88
+
89
+ def build_concurso_encerrado(el)
90
+ link = el.css('div.ca > a').first
91
+ return nil unless link
92
+
93
+ state = el.css('div.cc').first&.text&.strip || ''
94
+ vagas, salario = parse_vagas_salario(el)
95
+ cargos, nivel = parse_cargo_nivel(el)
96
+
97
+ Domain::Entities::Concurso.new(
98
+ instituicao: link.text.strip,
99
+ estado: state,
100
+ vagas: vagas,
101
+ salario: salario,
102
+ cargos: cargos,
103
+ nivel: nivel,
104
+ prazo: parse_prazo(el),
105
+ url: link['href']
106
+ )
107
+ end
108
+ def build_concurso(el, state)
109
+ link = el.css('div.ca > a').first
110
+ return nil unless link
111
+
112
+ vagas, salario = parse_vagas_salario(el)
113
+ cargos, nivel = parse_cargo_nivel(el)
114
+
115
+ Domain::Entities::Concurso.new(
116
+ instituicao: link.text.strip,
117
+ estado: state,
118
+ vagas: vagas,
119
+ salario: salario,
120
+ cargos: cargos,
121
+ nivel: nivel,
122
+ prazo: parse_prazo(el),
123
+ url: link['href']
124
+ )
125
+ end
126
+
127
+ def parse_vagas_salario(el)
128
+ cd = el.css('div.cd').first
129
+ return ['', ''] unless cd
130
+
131
+ text = cd.xpath('text()[1]').text.strip
132
+ if text =~ /^(.+?)\s+(até R\$.+)$/
133
+ [$1.strip, $2.strip]
134
+ else
135
+ [text, '']
136
+ end
137
+ end
138
+
139
+ def parse_cargo_nivel(el)
140
+ cd = el.css('div.cd').first
141
+ return ['', ''] unless cd
142
+
143
+ outer = cd.children.find { |c| c.element? && c.name == 'span' }
144
+ return ['', ''] unless outer
145
+
146
+ cargos = outer.xpath('text()[1]').text.strip
147
+ nivel = outer.children
148
+ .find { |c| c.element? && c.name == 'span' }
149
+ &.text&.strip || ''
150
+ [cargos, nivel]
151
+ end
152
+
153
+ def parse_prazo(el)
154
+ span = el.css('div.ce span').first
155
+ return '' unless span
156
+
157
+ span.children.map { |c|
158
+ c.element? && c.name == 'br' ? ' ' : c.text
159
+ }.join.gsub(/\s+/, ' ').strip
160
+ end
161
+
162
+ def extract_pdfs(doc)
163
+ doc.css('aside#links li.pdf a').map do |a|
164
+ { titulo: a.text.strip, url: a['href'] }
165
+ end
166
+ end
167
+
168
+ def extract_provas_url(doc)
169
+ doc.css('aside#links li.li_provas a').first&.[]('href')
170
+ end
171
+
172
+ def format_date(iso_str)
173
+ return '' if iso_str.nil? || iso_str.empty?
174
+
175
+ parts = iso_str[0..9].split('-')
176
+ return iso_str unless parts.length == 3
177
+
178
+ parts.reverse.join('/')
179
+ end
180
+
181
+ # Converte nós filhos de um articleBody em blocos estruturados.
182
+ # Cada bloco: { tipo: :secao | :paragrafo | :item, texto: String }
183
+ def extract_body_blocks(node)
184
+ blocos = []
185
+ node.children.each do |child|
186
+ next unless child.element?
187
+
188
+ case child.name
189
+ when 'p'
190
+ text = child.text.strip.gsub(/\s+/, ' ')
191
+ blocos << { tipo: :paragrafo, texto: text } unless text.empty?
192
+ when 'h2', 'h3', 'h4'
193
+ text = child.text.strip
194
+ blocos << { tipo: :secao, texto: text } unless text.empty?
195
+ when 'ul', 'ol'
196
+ child.css('li').each do |li|
197
+ text = li.text.strip.gsub(/\s+/, ' ')
198
+ blocos << { tipo: :item, texto: text } unless text.empty?
199
+ end
200
+ when 'div'
201
+ blocos.concat(extract_body_blocks(child))
202
+ end
203
+ end
204
+ blocos
205
+ end
206
+ end
207
+ end
208
+ end