congress-scrapper 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  module Congress
2
2
  module Scrapper
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
@@ -3,6 +3,7 @@
3
3
  require "congress-scrapper/version"
4
4
  require "mechanize"
5
5
  require "progressbar"
6
+ require_relative "proposer"
6
7
 
7
8
  module Congress
8
9
  module Scrapper
@@ -33,21 +34,20 @@ module Congress
33
34
 
34
35
  commission_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Comisión competente:')]/following-sibling::*[@class='texto']"))
35
36
 
36
- proposer = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Autor:')]/following-sibling::*[@class='texto']"))
37
+ proposer_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Autor:')]/following-sibling::*[@class='texto']"))
37
38
 
38
39
  proposed_at_text = text_for("//*[@class='texto' and contains(normalize-space(text()),'Presentado el')]")
39
40
  proposed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if proposed_at_text && proposed_at_text.match(/Presentado\s+el\s+(\d\d)\/(\d\d)\/(\d\d\d\d)/)
40
41
 
41
42
  closed_at_text = text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Tramitación seguida por la iniciativa:')]/following-sibling::*[@class='texto']")
42
43
  closed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if closed_at_text && closed_at_text.match(/Concluido\s+.+\s+desde (\d\d)\/(\d\d)\/(\d\d\d\d)/)
43
-
44
44
  proposal = {:title => clean_text(title.content),
45
45
  :official_url => "http://www.congreso.es" + title[:href],
46
46
  :proposal_type => proposal_type,
47
47
  :closed_at => closed_at,
48
48
  :official_resolution => resolution,
49
- :commission_name => commission_name,
50
- :proposer => proposer,
49
+ :category_name => category(commission_name),
50
+ :proposer_name => proposer(proposer_name),
51
51
  :proposed_at => proposed_at}
52
52
 
53
53
  progress.inc
@@ -75,5 +75,19 @@ module Congress
75
75
  return unless text
76
76
  text.gsub(/\s+/,' ').gsub(/\s*\.\s*$/, '').strip
77
77
  end
78
+
79
+ def category(name)
80
+ return unless name
81
+ upcase_first(name.gsub(/Comisión( Mixta)?( del?| para las?)? /, ""))
82
+ end
83
+
84
+ def upcase_first(string)
85
+ string[0..0].upcase + string[1..-1]
86
+ end
87
+
88
+ def proposer(string)
89
+ return unless string
90
+ Proposer.new(string).name
91
+ end
78
92
  end
79
- end
93
+ end
data/lib/proposer.rb ADDED
@@ -0,0 +1,49 @@
1
+ # coding: utf-8
2
+ class Proposer
3
+
4
+ attr_accessor :name
5
+
6
+ def initialize(string)
7
+ @name = string
8
+ end
9
+
10
+ def name
11
+ full_name ? short_name(full_name) : @name
12
+ end
13
+
14
+ def full_name
15
+ mapping.map(&:last).index(@name)
16
+ end
17
+
18
+ def short_name(index)
19
+ mapping[index].first
20
+ end
21
+
22
+ def mapping
23
+ [["PSOE", "Grupo Parlamentario Socialista"],
24
+ ["PP", "Grupo Parlamentario Popular en el Congreso"],
25
+ ["Convergència i Unió", "Grupo Parlamentario Catalán (Convergència i Unió)"],
26
+ ["PNV", "Grupo Parlamentario Vasco (EAJ-PNV)"],
27
+ ["Izquierda Unida", "Grupo Parlamentario de Esquerra Republicana-Izquierda Unida-Iniciativa per Catalunya Verds"],
28
+ ["Grupo Mixto", "Grupo Parlamentario Mixto"],
29
+ ["PSOE", "Senado Grupo Parlamentario Socialista"],
30
+ ["PP", "Senado Grupo Parlamentario Popular en el Senado"],
31
+ ["Convergència i Unió", "Senado Grupo Parlamentario Catalán en el Senado de Convergencia i Unió"],
32
+ ["PNV", "Senado Grupo Parlamentario de Senadores Nacionalistas"],
33
+ ["Izquierda Unida", "Senado Grupo Parlamentario de Entesa Catalana de Progrés"],
34
+ ["Grupo Mixto", "Senado Grupo Parlamentario Mixto"],
35
+ ["Andalucía", "Comunidad Autónoma de Andalucía-Parlamento"],
36
+ ["Aragón", "Comunidad Autónoma de Aragón-Cortes"],
37
+ ["Canarias", "Comunidad Autónoma de Canarias - Parlamento"],
38
+ ["Castilla y León", "Comunidad Autónoma de Castilla y León - Cortes"],
39
+ ["Castilla-La Mancha", "Comunidad Autónoma de Castilla-La Mancha - Cortes"],
40
+ ["Cataluña", "Comunidad Autónoma de Cataluña - Parlamento"],
41
+ ["Extremadura", "Comunidad Autónoma de Extremadura - Asamblea"],
42
+ ["Galicia", "Comunidad Autónoma de Galicia - Parlamento"],
43
+ ["Murcia", "Comunidad Autónoma de la Región de Murcia - Asamblea Regional"],
44
+ ["La Rioja", "Comunidad Autónoma de La Rioja - Diputación General"],
45
+ ["Baleares", "Comunidad Autónoma de las Illes Balears - Gobierno"],
46
+ ["País Vasco", "Comunidad Autónoma del País Vasco - Gobierno"],
47
+ ["País Vasco", "Comunidad Autónoma del País Vasco - Parlamento"]]
48
+ end
49
+ end
@@ -45,8 +45,8 @@ describe Congress::Scrapper do
45
45
  proposal[:closed_at].should be_nil
46
46
  proposal[:official_resolution].should be_nil
47
47
  proposal[:proposed_at].should == Date.new(2010, 4, 9)
48
- proposal[:commission_name].should == "Comisión de Medio Ambiente, Agricultura y Pesca"
49
- proposal[:proposer].should == "Grupo Parlamentario Socialista"
48
+ proposal[:category_name].should == "Medio Ambiente, Agricultura y Pesca"
49
+ proposal[:proposer_name].should == "PSOE"
50
50
  end
51
51
 
52
52
  it "should populate closed proposals info" do
@@ -56,8 +56,8 @@ describe Congress::Scrapper do
56
56
  proposal[:proposal_type].should == "Proyecto de ley"
57
57
  proposal[:closed_at].should == Date.new(2009, 6, 24)
58
58
  proposal[:official_resolution].should == "Aprobado sin modificaciones"
59
- proposal[:commission_name].should == "Comisión de Economía y Hacienda"
60
- proposal[:proposer].should == "Gobierno"
59
+ proposal[:category_name].should == "Economía y Hacienda"
60
+ proposal[:proposer_name].should == "Gobierno"
61
61
  end
62
62
  end
63
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: congress-scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,11 +11,11 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2011-10-22 00:00:00.000000000Z
14
+ date: 2012-12-14 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rspec
18
- requirement: &2152233780 !ruby/object:Gem::Requirement
18
+ requirement: !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
@@ -23,10 +23,15 @@ dependencies:
23
23
  version: '0'
24
24
  type: :development
25
25
  prerelease: false
26
- version_requirements: *2152233780
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ! '>='
30
+ - !ruby/object:Gem::Version
31
+ version: '0'
27
32
  - !ruby/object:Gem::Dependency
28
33
  name: webmock
29
- requirement: &2152233100 !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
30
35
  none: false
31
36
  requirements:
32
37
  - - ! '>='
@@ -34,10 +39,15 @@ dependencies:
34
39
  version: '0'
35
40
  type: :development
36
41
  prerelease: false
37
- version_requirements: *2152233100
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
38
48
  - !ruby/object:Gem::Dependency
39
49
  name: progressbar
40
- requirement: &2152232560 !ruby/object:Gem::Requirement
50
+ requirement: !ruby/object:Gem::Requirement
41
51
  none: false
42
52
  requirements:
43
53
  - - ! '>='
@@ -45,10 +55,15 @@ dependencies:
45
55
  version: '0'
46
56
  type: :runtime
47
57
  prerelease: false
48
- version_requirements: *2152232560
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
49
64
  - !ruby/object:Gem::Dependency
50
65
  name: mechanize
51
- requirement: &2152232140 !ruby/object:Gem::Requirement
66
+ requirement: !ruby/object:Gem::Requirement
52
67
  none: false
53
68
  requirements:
54
69
  - - ! '>='
@@ -56,7 +71,12 @@ dependencies:
56
71
  version: '0'
57
72
  type: :runtime
58
73
  prerelease: false
59
- version_requirements: *2152232140
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
60
80
  description: Scrapper to get proposals from Spanish Congress
61
81
  email:
62
82
  - voodoorai2000 at gmail
@@ -71,6 +91,7 @@ files:
71
91
  - congress-scrapper.gemspec
72
92
  - lib/congress-scrapper.rb
73
93
  - lib/congress-scrapper/version.rb
94
+ - lib/proposer.rb
74
95
  - spec/fixtures/closed_proposal_page.html
75
96
  - spec/fixtures/open_proposal_page.html
76
97
  - spec/fixtures/proposers.yml
@@ -100,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
100
121
  version: '0'
101
122
  requirements: []
102
123
  rubyforge_project: congress-scrapper
103
- rubygems_version: 1.8.10
124
+ rubygems_version: 1.8.24
104
125
  signing_key:
105
126
  specification_version: 3
106
127
  summary: Scrapper to get proposals from Spanish Congress