xupa_emec 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/xupa_emec CHANGED
@@ -13,15 +13,20 @@ as opções são:
13
13
  EOS
14
14
  opt :entrada, "Arquivo fonte com lista de faculdades exportadas pelo emec", :short => 'i', :default => 'in.xls'
15
15
  opt :saida, "Arquivo csv que será gerado", :short => 'o', :default => 'out.csv'
16
+ opt :quebraemail, "Gera uma linha por email", :short => 'q'
17
+ opt :buscacursos, "Busca lista de cursos das IES (demora mais)", :short => 'c'
16
18
  end
17
19
 
18
- crawler = XupaEmec::Crawler.new
20
+ crawler = XupaEmec::Crawler.new(:search_courses => opts[:buscacursos])
21
+
22
+ headers = ['nome', 'sigla', 'nome_limpo', 'tipo', 'cidade', 'tel', 'site', 'email', 'mantenedora', 'representante_nome', 'representante_primeiro_nome', 'representante_cargo']
23
+ headers << 'num_cursos' << 'lista_cursos' if opts[:buscacursos]
19
24
 
20
25
  File.open(opts[:entrada], "r") do |input|
21
26
 
22
27
  FasterCSV.open(opts[:saida], "w",
23
28
  :write_headers => true,
24
- :headers => ['nome', 'tipo', 'cidade', 'tel', 'site', 'email', 'mantenedora', 'representante_nome', 'representante_primeiro_nome', 'representante_cargo']) do |out_csv|
29
+ :headers => headers) do |out_csv|
25
30
 
26
31
  in_html = doc = Nokogiri::HTML(input)
27
32
  iess_to_search = in_html.css('table:nth-child(2) tbody tr')
@@ -37,8 +42,18 @@ File.open(opts[:entrada], "r") do |input|
37
42
  puts
38
43
  puts "#{index+1} - Buscando nome da instituição '#{ies_search_name}'..."
39
44
 
40
- out_csv << crawler.crawl(ies_search_name)
41
45
 
46
+ if opts[:quebraemail]
47
+ ies_hash = crawler.crawl(ies_search_name)
48
+ ies_hash['email'].split(',').each do |email|
49
+ new_hash = ies_hash.clone
50
+ new_hash['email'] = email
51
+ out_csv << new_hash
52
+ end
53
+ else
54
+ out_csv << crawler.crawl(ies_search_name)
55
+ end
56
+
42
57
  end
43
58
 
44
59
  end
@@ -1,7 +1,8 @@
1
1
  module XupaEmec
2
2
  class Crawler
3
- def initialize(agent = Mechanize.new)
4
- @agent = agent
3
+ def initialize(options={})
4
+ @search_courses = options[:search_courses]
5
+ @agent = options[:agent] || Mechanize.new
5
6
  end
6
7
 
7
8
  attr_reader :agent
@@ -36,6 +37,10 @@ module XupaEmec
36
37
 
37
38
  ies_info['nome'] = ies_data.search("table.tab_paleta > tr:nth-child(4) tr:nth-child(1) > td:nth-child(2)").first.text.strip
38
39
 
40
+ ies_info['sigla'] = ies_info['nome'].split(' - ')[1..-1].join('-')
41
+
42
+ ies_info['nome_limpo'] = ies_info['nome'].split(' - ')[0].mb_chars.titleize
43
+
39
44
  ies_info['cidade'] = ies_data.search("table.tab_paleta > tr:nth-child(4) tr:nth-child(5) > td:nth-child(2)").first.text.strip
40
45
 
41
46
  ies_info['tel'] = ies_data.search("table.tab_paleta > tr:nth-child(4) tr:nth-child(6) > td:nth-child(2)").first.text.strip
@@ -44,7 +49,13 @@ module XupaEmec
44
49
 
45
50
  ies_info['site'] = ies_data.search("table.tab_paleta > tr:nth-child(4) tr:nth-child(7) > td:nth-child(4)").first.text.strip
46
51
 
47
- ies_info['email'] = ies_data.search("table.tab_paleta > tr:nth-child(4) tr:nth-child(8) > td:nth-child(2)").first.text.strip
52
+ ies_info['email'] = ies_data.search("table.tab_paleta > tr:nth-child(4) tr:nth-child(8) > td:nth-child(2)").first.text.strip.split(/\s*[\s,;\/\\]\s*/).join(',')
53
+
54
+ if @search_courses
55
+ courses_page= agent.get("http://emec.mec.gov.br/emec/consulta-ies/listar-curso-agrupado/#{ies_url}/page/1/list/1000")
56
+ ies_info['num_cursos'] = courses_page.search("div.campform > div:first-child").text.match(/Registro\(s\)\: 1 a \d+ de (\d+)/)[1]
57
+ ies_info['lista_cursos'] = courses_page.search("table#listar-ies-cadastro > tbody > tr").map{|l| l.search('td').first.text.gsub('&nbsp;', '').strip}.join(', ')
58
+ end
48
59
 
49
60
  puts "Informação processada para '#{ies_search_name}' :"
50
61
  puts ies_info.to_yaml
@@ -2,7 +2,7 @@ module XupaEmec
2
2
  module Version
3
3
  MAJOR = 1
4
4
  MINOR = 0
5
- PATCH = 2
5
+ PATCH = 3
6
6
  STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
7
7
  end
8
8
  end
data/xupa_emec.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{xupa_emec}
8
- s.version = "1.0.2"
8
+ s.version = "1.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Bernardo de P\303\241dua"]
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xupa_emec
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 2
10
- version: 1.0.2
9
+ - 3
10
+ version: 1.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - "Bernardo de P\xC3\xA1dua"