concurso_hub 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +511 -0
- data/lib/concurso_hub/version.rb +5 -0
- data/lib/concurso_hub.rb +143 -0
- data/src/application/baixar_edital_request.rb +5 -0
- data/src/application/baixar_provas_request.rb +5 -0
- data/src/application/filtros_concurso.rb +16 -0
- data/src/application/ports/concurso_repository.rb +27 -0
- data/src/application/ports/file_downloader.rb +11 -0
- data/src/application/ports/presenter.rb +35 -0
- data/src/application/use_cases/baixar_edital.rb +43 -0
- data/src/application/use_cases/baixar_provas.rb +59 -0
- data/src/application/use_cases/listar_concursos.rb +89 -0
- data/src/application/use_cases/listar_provas.rb +32 -0
- data/src/application/use_cases/ver_edital.rb +21 -0
- data/src/application/ver_edital_request.rb +5 -0
- data/src/domain/entities/concurso.rb +24 -0
- data/src/domain/entities/edital.rb +21 -0
- data/src/infrastructure/http/http_client.rb +41 -0
- data/src/infrastructure/http/http_file_downloader.rb +44 -0
- data/src/infrastructure/parsers/pci_html_parser.rb +208 -0
- data/src/infrastructure/repositories/pci_concurso_repository.rb +49 -0
- data/src/presentation/cli/cli_controller.rb +35 -0
- data/src/presentation/cli/cli_options_parser.rb +121 -0
- data/src/presentation/formatters/terminal_presenter.rb +203 -0
- metadata +81 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Application
|
|
4
|
+
module Ports
|
|
5
|
+
class ConcursoRepository
|
|
6
|
+
def fetch_abertos
|
|
7
|
+
raise NotImplementedError, "#{self.class}#fetch_abertos deve ser implementado"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def fetch_encerrados(busca)
|
|
11
|
+
raise NotImplementedError, "#{self.class}#fetch_encerrados deve ser implementado"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def fetch_edital(url)
|
|
15
|
+
raise NotImplementedError, "#{self.class}#fetch_edital deve ser implementado"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def fetch_provas_listing(provas_url)
|
|
19
|
+
raise NotImplementedError, "#{self.class}#fetch_provas_listing deve ser implementado"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def fetch_prova_pdfs(download_url)
|
|
23
|
+
raise NotImplementedError, "#{self.class}#fetch_prova_pdfs deve ser implementado"
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Application
|
|
4
|
+
module Ports
|
|
5
|
+
class Presenter
|
|
6
|
+
def show_loading
|
|
7
|
+
raise NotImplementedError, "#{self.class}#show_loading deve ser implementado"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def show(concursos, metadata: {})
|
|
11
|
+
raise NotImplementedError, "#{self.class}#show deve ser implementado"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def error(message)
|
|
15
|
+
raise NotImplementedError, "#{self.class}#error deve ser implementado"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def show_edital(edital)
|
|
19
|
+
raise NotImplementedError, "#{self.class}#show_edital deve ser implementado"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def show_download_start(titulo, index, total)
|
|
23
|
+
raise NotImplementedError, "#{self.class}#show_download_start deve ser implementado"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def show_download_done(paths)
|
|
27
|
+
raise NotImplementedError, "#{self.class}#show_download_done deve ser implementado"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def show_provas(provas)
|
|
31
|
+
raise NotImplementedError, "#{self.class}#show_provas deve ser implementado"
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'uri'
|
|
4
|
+
require_relative '../baixar_edital_request'
|
|
5
|
+
require_relative '../ports/concurso_repository'
|
|
6
|
+
require_relative '../ports/file_downloader'
|
|
7
|
+
require_relative '../ports/presenter'
|
|
8
|
+
|
|
9
|
+
module Application
|
|
10
|
+
module UseCases
|
|
11
|
+
class BaixarEdital
|
|
12
|
+
def initialize(repository:, downloader:, presenter:)
|
|
13
|
+
@repository = repository
|
|
14
|
+
@downloader = downloader
|
|
15
|
+
@presenter = presenter
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def execute(request)
|
|
19
|
+
edital = @repository.fetch_edital(request.url)
|
|
20
|
+
|
|
21
|
+
if edital.pdfs.empty?
|
|
22
|
+
@presenter.error("Nenhum PDF encontrado para este edital.")
|
|
23
|
+
return
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
dest_dir = request.dest_dir || File.join(Dir.pwd, 'editais')
|
|
27
|
+
Dir.mkdir(dest_dir) unless Dir.exist?(dest_dir)
|
|
28
|
+
|
|
29
|
+
downloaded = []
|
|
30
|
+
edital.pdfs.each_with_index do |pdf, index|
|
|
31
|
+
filename = File.basename(URI.parse(pdf[:url]).path)
|
|
32
|
+
dest_path = File.join(dest_dir, filename)
|
|
33
|
+
|
|
34
|
+
@presenter.show_download_start(pdf[:titulo], index + 1, edital.pdfs.size)
|
|
35
|
+
@downloader.download(pdf[:url], dest_path)
|
|
36
|
+
downloaded << dest_path
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
@presenter.show_download_done(downloaded)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'uri'
|
|
4
|
+
require_relative '../baixar_provas_request'
|
|
5
|
+
require_relative '../ports/concurso_repository'
|
|
6
|
+
require_relative '../ports/file_downloader'
|
|
7
|
+
require_relative '../ports/presenter'
|
|
8
|
+
|
|
9
|
+
module Application
|
|
10
|
+
module UseCases
|
|
11
|
+
class BaixarProvas
|
|
12
|
+
def initialize(repository:, downloader:, presenter:)
|
|
13
|
+
@repository = repository
|
|
14
|
+
@downloader = downloader
|
|
15
|
+
@presenter = presenter
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def execute(request)
|
|
19
|
+
provas = @repository.fetch_provas_listing(request.url)
|
|
20
|
+
|
|
21
|
+
if provas.empty?
|
|
22
|
+
@presenter.error("Nenhuma prova encontrada em: #{request.url}")
|
|
23
|
+
return
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
todos_pdfs = []
|
|
27
|
+
provas.each_with_index do |prova, i|
|
|
28
|
+
@presenter.show_download_start(
|
|
29
|
+
"Buscando provas de: #{prova[:cargo]}", i + 1, provas.size
|
|
30
|
+
)
|
|
31
|
+
pdfs = @repository.fetch_prova_pdfs(prova[:download_url])
|
|
32
|
+
pdfs.each { |pdf| todos_pdfs << { cargo: prova[:cargo], **pdf } }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
if todos_pdfs.empty?
|
|
36
|
+
@presenter.error("Nenhum PDF de prova encontrado.")
|
|
37
|
+
return
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
dest_dir = request.dest_dir || File.join(Dir.pwd, 'editais')
|
|
41
|
+
Dir.mkdir(dest_dir) unless Dir.exist?(dest_dir)
|
|
42
|
+
|
|
43
|
+
downloaded = []
|
|
44
|
+
todos_pdfs.each_with_index do |pdf, i|
|
|
45
|
+
filename = File.basename(URI.parse(pdf[:url]).path)
|
|
46
|
+
dest_path = File.join(dest_dir, filename)
|
|
47
|
+
|
|
48
|
+
@presenter.show_download_start(
|
|
49
|
+
"#{pdf[:cargo]} — #{pdf[:titulo]}", i + 1, todos_pdfs.size
|
|
50
|
+
)
|
|
51
|
+
@downloader.download(pdf[:url], dest_path)
|
|
52
|
+
downloaded << dest_path
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
@presenter.show_download_done(downloaded)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../filtros_concurso'
|
|
4
|
+
require_relative '../ports/concurso_repository'
|
|
5
|
+
require_relative '../ports/presenter'
|
|
6
|
+
|
|
7
|
+
module Application
|
|
8
|
+
module UseCases
|
|
9
|
+
class ListarConcursos
|
|
10
|
+
def initialize(repository:, presenter:)
|
|
11
|
+
@repository = repository
|
|
12
|
+
@presenter = presenter
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def execute(filtros = FiltrosConcurso.new)
|
|
16
|
+
if filtros.encerrados?
|
|
17
|
+
executar_encerrados(filtros)
|
|
18
|
+
else
|
|
19
|
+
executar_abertos(filtros)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def executar_abertos(filtros)
|
|
26
|
+
concursos, metadata = @repository.fetch_abertos
|
|
27
|
+
metadata[:total_scraped] = concursos.size
|
|
28
|
+
metadata[:modo] = :abertos
|
|
29
|
+
|
|
30
|
+
concursos = aplicar_filtros(concursos, filtros, incluir_busca: true)
|
|
31
|
+
concursos = concursos.first(filtros.limite) if filtros.limite
|
|
32
|
+
@presenter.show(concursos, metadata: metadata)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def executar_encerrados(filtros)
|
|
36
|
+
unless filtros.busca
|
|
37
|
+
@presenter.error("--encerrados requer --busca TEXTO (ex: ruby main.rb --encerrados --busca policia)")
|
|
38
|
+
return
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
concursos, metadata = @repository.fetch_encerrados(filtros.busca)
|
|
42
|
+
metadata[:total_scraped] = concursos.size
|
|
43
|
+
metadata[:modo] = :encerrados
|
|
44
|
+
metadata[:busca] = filtros.busca
|
|
45
|
+
|
|
46
|
+
concursos = aplicar_filtros(concursos, filtros, incluir_busca: false)
|
|
47
|
+
concursos = concursos.first(filtros.limite) if filtros.limite
|
|
48
|
+
@presenter.show(concursos, metadata: metadata)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def aplicar_filtros(concursos, filtros, incluir_busca: true)
|
|
52
|
+
concursos = filtrar_por_estado(concursos, filtros.estado)
|
|
53
|
+
concursos = filtrar_por_nivel(concursos, filtros.nivel)
|
|
54
|
+
concursos = filtrar_por_busca(concursos, filtros.busca) if incluir_busca
|
|
55
|
+
concursos = filtrar_por_ano(concursos, filtros.ano)
|
|
56
|
+
concursos
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def filtrar_por_estado(concursos, estado)
|
|
60
|
+
return concursos unless estado
|
|
61
|
+
|
|
62
|
+
concursos.select { |c| c.estado == estado }
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def filtrar_por_nivel(concursos, nivel)
|
|
66
|
+
return concursos unless nivel
|
|
67
|
+
|
|
68
|
+
term = nivel.downcase
|
|
69
|
+
concursos.select { |c| c.nivel.downcase.include?(term) }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def filtrar_por_busca(concursos, busca)
|
|
73
|
+
return concursos unless busca
|
|
74
|
+
|
|
75
|
+
term = busca.downcase
|
|
76
|
+
concursos.select do |c|
|
|
77
|
+
c.instituicao.downcase.include?(term) ||
|
|
78
|
+
c.cargos.downcase.include?(term)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def filtrar_por_ano(concursos, ano)
|
|
83
|
+
return concursos unless ano
|
|
84
|
+
|
|
85
|
+
concursos.select { |c| c.prazo.include?(ano.to_s) }
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../ver_edital_request'
|
|
4
|
+
require_relative '../ports/concurso_repository'
|
|
5
|
+
require_relative '../ports/presenter'
|
|
6
|
+
|
|
7
|
+
module Application
|
|
8
|
+
module UseCases
|
|
9
|
+
class ListarProvas
|
|
10
|
+
def initialize(repository:, presenter:)
|
|
11
|
+
@repository = repository
|
|
12
|
+
@presenter = presenter
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def execute(request)
|
|
16
|
+
listing = @repository.fetch_provas_listing(request.url)
|
|
17
|
+
|
|
18
|
+
if listing.empty?
|
|
19
|
+
@presenter.error("Nenhuma prova encontrada em: #{request.url}")
|
|
20
|
+
return
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
resultado = listing.map do |prova|
|
|
24
|
+
pdfs = @repository.fetch_prova_pdfs(prova[:download_url])
|
|
25
|
+
{ cargo: prova[:cargo], pdfs: pdfs }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
@presenter.show_provas(resultado)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../ver_edital_request'
|
|
4
|
+
require_relative '../ports/concurso_repository'
|
|
5
|
+
require_relative '../ports/presenter'
|
|
6
|
+
|
|
7
|
+
module Application
|
|
8
|
+
module UseCases
|
|
9
|
+
class VerEdital
|
|
10
|
+
def initialize(repository:, presenter:)
|
|
11
|
+
@repository = repository
|
|
12
|
+
@presenter = presenter
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def execute(request)
|
|
16
|
+
edital = @repository.fetch_edital(request.url)
|
|
17
|
+
@presenter.show_edital(edital)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Domain
|
|
4
|
+
module Entities
|
|
5
|
+
class Concurso
|
|
6
|
+
attr_reader :instituicao, :estado, :vagas, :salario,
|
|
7
|
+
:cargos, :nivel, :prazo, :url
|
|
8
|
+
|
|
9
|
+
def initialize(instituicao:, estado:, vagas:, salario:,
|
|
10
|
+
cargos:, nivel:, prazo:, url:)
|
|
11
|
+
@instituicao = instituicao
|
|
12
|
+
@estado = estado
|
|
13
|
+
@vagas = vagas
|
|
14
|
+
@salario = salario
|
|
15
|
+
@cargos = cargos
|
|
16
|
+
@nivel = nivel
|
|
17
|
+
@prazo = prazo
|
|
18
|
+
@url = url
|
|
19
|
+
|
|
20
|
+
freeze
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Domain
|
|
4
|
+
module Entities
|
|
5
|
+
class Edital
|
|
6
|
+
attr_reader :titulo, :descricao, :data_publicacao, :blocos, :pdfs, :provas_url, :url
|
|
7
|
+
|
|
8
|
+
def initialize(titulo:, descricao:, data_publicacao:, blocos:, pdfs: [], provas_url: nil, url:)
|
|
9
|
+
@titulo = titulo
|
|
10
|
+
@descricao = descricao
|
|
11
|
+
@data_publicacao = data_publicacao
|
|
12
|
+
@blocos = blocos.freeze
|
|
13
|
+
@pdfs = pdfs.freeze
|
|
14
|
+
@provas_url = provas_url
|
|
15
|
+
@url = url
|
|
16
|
+
|
|
17
|
+
freeze
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'net/http'
|
|
4
|
+
require 'uri'
|
|
5
|
+
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module Http
|
|
8
|
+
class HttpClient
|
|
9
|
+
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
|
10
|
+
'(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
|
11
|
+
|
|
12
|
+
def get(url, redirect_limit: 5)
|
|
13
|
+
raise 'Muitos redirecionamentos' if redirect_limit.zero?
|
|
14
|
+
|
|
15
|
+
uri = URI.parse(url)
|
|
16
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
17
|
+
http.use_ssl = (uri.scheme == 'https')
|
|
18
|
+
http.open_timeout = 15
|
|
19
|
+
http.read_timeout = 30
|
|
20
|
+
|
|
21
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
|
22
|
+
request['User-Agent'] = USER_AGENT
|
|
23
|
+
request['Accept'] = 'text/html,application/xhtml+xml'
|
|
24
|
+
request['Accept-Language'] = 'pt-BR,pt;q=0.9'
|
|
25
|
+
|
|
26
|
+
response = http.request(request)
|
|
27
|
+
|
|
28
|
+
case response
|
|
29
|
+
when Net::HTTPSuccess
|
|
30
|
+
body = response.body
|
|
31
|
+
body.force_encoding('UTF-8')
|
|
32
|
+
body
|
|
33
|
+
when Net::HTTPRedirection
|
|
34
|
+
get(response['location'], redirect_limit: redirect_limit - 1)
|
|
35
|
+
else
|
|
36
|
+
raise "Erro HTTP: #{response.code} #{response.message}"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'net/http'
|
|
4
|
+
require 'uri'
|
|
5
|
+
require_relative '../../application/ports/file_downloader'
|
|
6
|
+
|
|
7
|
+
module Infrastructure
|
|
8
|
+
module Http
|
|
9
|
+
class HttpFileDownloader < Application::Ports::FileDownloader
|
|
10
|
+
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
|
11
|
+
'(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
|
|
12
|
+
|
|
13
|
+
def download(url, dest_path, redirect_limit: 5)
|
|
14
|
+
raise 'Muitos redirecionamentos' if redirect_limit.zero?
|
|
15
|
+
|
|
16
|
+
uri = URI.parse(url)
|
|
17
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
18
|
+
http.use_ssl = (uri.scheme == 'https')
|
|
19
|
+
http.open_timeout = 15
|
|
20
|
+
http.read_timeout = 120
|
|
21
|
+
|
|
22
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
|
23
|
+
request['User-Agent'] = USER_AGENT
|
|
24
|
+
|
|
25
|
+
http.start do |h|
|
|
26
|
+
h.request(request) do |response|
|
|
27
|
+
case response
|
|
28
|
+
when Net::HTTPSuccess
|
|
29
|
+
File.open(dest_path, 'wb') do |file|
|
|
30
|
+
response.read_body { |chunk| file.write(chunk) }
|
|
31
|
+
end
|
|
32
|
+
when Net::HTTPRedirection
|
|
33
|
+
new_url = response['location']
|
|
34
|
+
new_url = URI.join(url, new_url).to_s unless new_url.start_with?('http')
|
|
35
|
+
download(new_url, dest_path, redirect_limit: redirect_limit - 1)
|
|
36
|
+
else
|
|
37
|
+
raise "Erro HTTP: #{response.code} #{response.message}"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require_relative '../../domain/entities/concurso'
|
|
5
|
+
require_relative '../../domain/entities/edital'
|
|
6
|
+
|
|
7
|
+
module Infrastructure
|
|
8
|
+
module Parsers
|
|
9
|
+
class PciHtmlParser
|
|
10
|
+
def parse_abertos(html)
|
|
11
|
+
doc = Nokogiri::HTML(html, nil, 'UTF-8')
|
|
12
|
+
[extract_abertos(doc), { total_vagas: extract_total_vagas(doc) }]
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def parse_encerrados(html)
|
|
16
|
+
doc = Nokogiri::HTML(html, nil, 'UTF-8')
|
|
17
|
+
concursos = doc.css('div.ea').filter_map { |el| build_concurso_encerrado(el) }
|
|
18
|
+
[concursos, { total_vagas: '' }]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def parse_edital(html, url)
|
|
22
|
+
doc = Nokogiri::HTML(html, nil, 'UTF-8')
|
|
23
|
+
article = doc.css('article#noticia').first
|
|
24
|
+
|
|
25
|
+
raise "Edital não encontrado na página: #{url}" unless article
|
|
26
|
+
|
|
27
|
+
titulo = article.css('h1[itemprop="headline"]').first&.text&.strip || ''
|
|
28
|
+
descricao = article.css('div.description').first&.text&.strip || ''
|
|
29
|
+
data_raw = article.css('abbr.published').first&.[]('title') || ''
|
|
30
|
+
data_pub = format_date(data_raw)
|
|
31
|
+
|
|
32
|
+
body_node = article.css('div[itemprop="articleBody"]').first
|
|
33
|
+
blocos = body_node ? extract_body_blocks(body_node) : []
|
|
34
|
+
|
|
35
|
+
Domain::Entities::Edital.new(
|
|
36
|
+
titulo: titulo,
|
|
37
|
+
descricao: descricao,
|
|
38
|
+
data_publicacao: data_pub,
|
|
39
|
+
blocos: blocos,
|
|
40
|
+
pdfs: extract_pdfs(doc),
|
|
41
|
+
provas_url: extract_provas_url(doc),
|
|
42
|
+
url: url
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def parse_provas_listing(html)
|
|
47
|
+
doc = Nokogiri::HTML(html, nil, 'UTF-8')
|
|
48
|
+
doc.css('a.prova_download').map do |a|
|
|
49
|
+
cargo = a.children.select(&:text?).map(&:text).join.strip
|
|
50
|
+
{ cargo: cargo, download_url: a['href'] }
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def parse_prova_download_page(html)
|
|
55
|
+
doc = Nokogiri::HTML(html, nil, 'UTF-8')
|
|
56
|
+
doc.css('div#download a.item-link[href$=".pdf"]')
|
|
57
|
+
.select { |a| a.text.strip.start_with?('Baixar') }
|
|
58
|
+
.map { |a| { titulo: a.text.sub(/\ABaixar\s+/i, '').strip, url: a['href'] } }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def extract_total_vagas(doc)
|
|
64
|
+
doc.css('h1').first&.text&.match(/[\d.]+\s*Vagas?/i)&.[](0) || ''
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def extract_abertos(doc)
|
|
68
|
+
nacional = doc.css('#NACIONAL').first
|
|
69
|
+
return [] unless nacional
|
|
70
|
+
|
|
71
|
+
concursos = []
|
|
72
|
+
current_state = 'NACIONAL'
|
|
73
|
+
|
|
74
|
+
nacional.parent.children.each do |child|
|
|
75
|
+
next unless child.element?
|
|
76
|
+
|
|
77
|
+
case child['class']&.strip
|
|
78
|
+
when 'ua'
|
|
79
|
+
current_state = child['id'] || 'NACIONAL'
|
|
80
|
+
when 'da', 'na'
|
|
81
|
+
entry = build_concurso(child, current_state)
|
|
82
|
+
concursos << entry if entry
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
concursos
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def build_concurso_encerrado(el)
|
|
90
|
+
link = el.css('div.ca > a').first
|
|
91
|
+
return nil unless link
|
|
92
|
+
|
|
93
|
+
state = el.css('div.cc').first&.text&.strip || ''
|
|
94
|
+
vagas, salario = parse_vagas_salario(el)
|
|
95
|
+
cargos, nivel = parse_cargo_nivel(el)
|
|
96
|
+
|
|
97
|
+
Domain::Entities::Concurso.new(
|
|
98
|
+
instituicao: link.text.strip,
|
|
99
|
+
estado: state,
|
|
100
|
+
vagas: vagas,
|
|
101
|
+
salario: salario,
|
|
102
|
+
cargos: cargos,
|
|
103
|
+
nivel: nivel,
|
|
104
|
+
prazo: parse_prazo(el),
|
|
105
|
+
url: link['href']
|
|
106
|
+
)
|
|
107
|
+
end
|
|
108
|
+
def build_concurso(el, state)
|
|
109
|
+
link = el.css('div.ca > a').first
|
|
110
|
+
return nil unless link
|
|
111
|
+
|
|
112
|
+
vagas, salario = parse_vagas_salario(el)
|
|
113
|
+
cargos, nivel = parse_cargo_nivel(el)
|
|
114
|
+
|
|
115
|
+
Domain::Entities::Concurso.new(
|
|
116
|
+
instituicao: link.text.strip,
|
|
117
|
+
estado: state,
|
|
118
|
+
vagas: vagas,
|
|
119
|
+
salario: salario,
|
|
120
|
+
cargos: cargos,
|
|
121
|
+
nivel: nivel,
|
|
122
|
+
prazo: parse_prazo(el),
|
|
123
|
+
url: link['href']
|
|
124
|
+
)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def parse_vagas_salario(el)
|
|
128
|
+
cd = el.css('div.cd').first
|
|
129
|
+
return ['', ''] unless cd
|
|
130
|
+
|
|
131
|
+
text = cd.xpath('text()[1]').text.strip
|
|
132
|
+
if text =~ /^(.+?)\s+(até R\$.+)$/
|
|
133
|
+
[$1.strip, $2.strip]
|
|
134
|
+
else
|
|
135
|
+
[text, '']
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def parse_cargo_nivel(el)
|
|
140
|
+
cd = el.css('div.cd').first
|
|
141
|
+
return ['', ''] unless cd
|
|
142
|
+
|
|
143
|
+
outer = cd.children.find { |c| c.element? && c.name == 'span' }
|
|
144
|
+
return ['', ''] unless outer
|
|
145
|
+
|
|
146
|
+
cargos = outer.xpath('text()[1]').text.strip
|
|
147
|
+
nivel = outer.children
|
|
148
|
+
.find { |c| c.element? && c.name == 'span' }
|
|
149
|
+
&.text&.strip || ''
|
|
150
|
+
[cargos, nivel]
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def parse_prazo(el)
|
|
154
|
+
span = el.css('div.ce span').first
|
|
155
|
+
return '' unless span
|
|
156
|
+
|
|
157
|
+
span.children.map { |c|
|
|
158
|
+
c.element? && c.name == 'br' ? ' ' : c.text
|
|
159
|
+
}.join.gsub(/\s+/, ' ').strip
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def extract_pdfs(doc)
|
|
163
|
+
doc.css('aside#links li.pdf a').map do |a|
|
|
164
|
+
{ titulo: a.text.strip, url: a['href'] }
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def extract_provas_url(doc)
|
|
169
|
+
doc.css('aside#links li.li_provas a').first&.[]('href')
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def format_date(iso_str)
|
|
173
|
+
return '' if iso_str.nil? || iso_str.empty?
|
|
174
|
+
|
|
175
|
+
parts = iso_str[0..9].split('-')
|
|
176
|
+
return iso_str unless parts.length == 3
|
|
177
|
+
|
|
178
|
+
parts.reverse.join('/')
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Converte nós filhos de um articleBody em blocos estruturados.
|
|
182
|
+
# Cada bloco: { tipo: :secao | :paragrafo | :item, texto: String }
|
|
183
|
+
def extract_body_blocks(node)
|
|
184
|
+
blocos = []
|
|
185
|
+
node.children.each do |child|
|
|
186
|
+
next unless child.element?
|
|
187
|
+
|
|
188
|
+
case child.name
|
|
189
|
+
when 'p'
|
|
190
|
+
text = child.text.strip.gsub(/\s+/, ' ')
|
|
191
|
+
blocos << { tipo: :paragrafo, texto: text } unless text.empty?
|
|
192
|
+
when 'h2', 'h3', 'h4'
|
|
193
|
+
text = child.text.strip
|
|
194
|
+
blocos << { tipo: :secao, texto: text } unless text.empty?
|
|
195
|
+
when 'ul', 'ol'
|
|
196
|
+
child.css('li').each do |li|
|
|
197
|
+
text = li.text.strip.gsub(/\s+/, ' ')
|
|
198
|
+
blocos << { tipo: :item, texto: text } unless text.empty?
|
|
199
|
+
end
|
|
200
|
+
when 'div'
|
|
201
|
+
blocos.concat(extract_body_blocks(child))
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
blocos
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|