congress-scrapper 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  module Congress
2
2
  module Scrapper
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
@@ -3,6 +3,7 @@
3
3
  require "congress-scrapper/version"
4
4
  require "mechanize"
5
5
  require "progressbar"
6
+ require_relative "proposer"
6
7
 
7
8
  module Congress
8
9
  module Scrapper
@@ -33,21 +34,20 @@ module Congress
33
34
 
34
35
  commission_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Comisión competente:')]/following-sibling::*[@class='texto']"))
35
36
 
36
- proposer = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Autor:')]/following-sibling::*[@class='texto']"))
37
+ proposer_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Autor:')]/following-sibling::*[@class='texto']"))
37
38
 
38
39
  proposed_at_text = text_for("//*[@class='texto' and contains(normalize-space(text()),'Presentado el')]")
39
40
  proposed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if proposed_at_text && proposed_at_text.match(/Presentado\s+el\s+(\d\d)\/(\d\d)\/(\d\d\d\d)/)
40
41
 
41
42
  closed_at_text = text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Tramitación seguida por la iniciativa:')]/following-sibling::*[@class='texto']")
42
43
  closed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if closed_at_text && closed_at_text.match(/Concluido\s+.+\s+desde (\d\d)\/(\d\d)\/(\d\d\d\d)/)
43
-
44
44
  proposal = {:title => clean_text(title.content),
45
45
  :official_url => "http://www.congreso.es" + title[:href],
46
46
  :proposal_type => proposal_type,
47
47
  :closed_at => closed_at,
48
48
  :official_resolution => resolution,
49
- :commission_name => commission_name,
50
- :proposer => proposer,
49
+ :category_name => category(commission_name),
50
+ :proposer_name => proposer(proposer_name),
51
51
  :proposed_at => proposed_at}
52
52
 
53
53
  progress.inc
@@ -75,5 +75,19 @@ module Congress
75
75
  return unless text
76
76
  text.gsub(/\s+/,' ').gsub(/\s*\.\s*$/, '').strip
77
77
  end
78
+
79
+ def category(name)
80
+ return unless name
81
+ upcase_first(name.gsub(/Comisión( Mixta)?( del?| para las?)? /, ""))
82
+ end
83
+
84
+ def upcase_first(string)
85
+ string[0..0].upcase + string[1..-1]
86
+ end
87
+
88
+ def proposer(string)
89
+ return unless string
90
+ Proposer.new(string).name
91
+ end
78
92
  end
79
- end
93
+ end
data/lib/proposer.rb ADDED
@@ -0,0 +1,49 @@
1
+ # coding: utf-8
2
+ class Proposer
3
+
4
+ attr_accessor :name
5
+
6
+ def initialize(string)
7
+ @name = string
8
+ end
9
+
10
+ def name
11
+ full_name ? short_name(full_name) : @name
12
+ end
13
+
14
+ def full_name
15
+ mapping.map(&:last).index(@name)
16
+ end
17
+
18
+ def short_name(index)
19
+ mapping[index].first
20
+ end
21
+
22
+ def mapping
23
+ [["PSOE", "Grupo Parlamentario Socialista"],
24
+ ["PP", "Grupo Parlamentario Popular en el Congreso"],
25
+ ["Convergència i Unió", "Grupo Parlamentario Catalán (Convergència i Unió)"],
26
+ ["PNV", "Grupo Parlamentario Vasco (EAJ-PNV)"],
27
+ ["Izquierda Unida", "Grupo Parlamentario de Esquerra Republicana-Izquierda Unida-Iniciativa per Catalunya Verds"],
28
+ ["Grupo Mixto", "Grupo Parlamentario Mixto"],
29
+ ["PSOE", "Senado Grupo Parlamentario Socialista"],
30
+ ["PP", "Senado Grupo Parlamentario Popular en el Senado"],
31
+ ["Convergència i Unió", "Senado Grupo Parlamentario Catalán en el Senado de Convergencia i Unió"],
32
+ ["PNV", "Senado Grupo Parlamentario de Senadores Nacionalistas"],
33
+ ["Izquierda Unida", "Senado Grupo Parlamentario de Entesa Catalana de Progrés"],
34
+ ["Grupo Mixto", "Senado Grupo Parlamentario Mixto"],
35
+ ["Andalucía", "Comunidad Autónoma de Andalucía-Parlamento"],
36
+ ["Aragón", "Comunidad Autónoma de Aragón-Cortes"],
37
+ ["Canarias", "Comunidad Autónoma de Canarias - Parlamento"],
38
+ ["Castilla y León", "Comunidad Autónoma de Castilla y León - Cortes"],
39
+ ["Castilla-La Mancha", "Comunidad Autónoma de Castilla-La Mancha - Cortes"],
40
+ ["Cataluña", "Comunidad Autónoma de Cataluña - Parlamento"],
41
+ ["Extremadura", "Comunidad Autónoma de Extremadura - Asamblea"],
42
+ ["Galicia", "Comunidad Autónoma de Galicia - Parlamento"],
43
+ ["Murcia", "Comunidad Autónoma de la Región de Murcia - Asamblea Regional"],
44
+ ["La Rioja", "Comunidad Autónoma de La Rioja - Diputación General"],
45
+ ["Baleares", "Comunidad Autónoma de las Illes Balears - Gobierno"],
46
+ ["País Vasco", "Comunidad Autónoma del País Vasco - Gobierno"],
47
+ ["País Vasco", "Comunidad Autónoma del País Vasco - Parlamento"]]
48
+ end
49
+ end
@@ -45,8 +45,8 @@ describe Congress::Scrapper do
45
45
  proposal[:closed_at].should be_nil
46
46
  proposal[:official_resolution].should be_nil
47
47
  proposal[:proposed_at].should == Date.new(2010, 4, 9)
48
- proposal[:commission_name].should == "Comisión de Medio Ambiente, Agricultura y Pesca"
49
- proposal[:proposer].should == "Grupo Parlamentario Socialista"
48
+ proposal[:category_name].should == "Medio Ambiente, Agricultura y Pesca"
49
+ proposal[:proposer_name].should == "PSOE"
50
50
  end
51
51
 
52
52
  it "should populate closed proposals info" do
@@ -56,8 +56,8 @@ describe Congress::Scrapper do
56
56
  proposal[:proposal_type].should == "Proyecto de ley"
57
57
  proposal[:closed_at].should == Date.new(2009, 6, 24)
58
58
  proposal[:official_resolution].should == "Aprobado sin modificaciones"
59
- proposal[:commission_name].should == "Comisión de Economía y Hacienda"
60
- proposal[:proposer].should == "Gobierno"
59
+ proposal[:category_name].should == "Economía y Hacienda"
60
+ proposal[:proposer_name].should == "Gobierno"
61
61
  end
62
62
  end
63
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: congress-scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,11 +11,11 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2011-10-22 00:00:00.000000000Z
14
+ date: 2012-12-14 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rspec
18
- requirement: &2152233780 !ruby/object:Gem::Requirement
18
+ requirement: !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
@@ -23,10 +23,15 @@ dependencies:
23
23
  version: '0'
24
24
  type: :development
25
25
  prerelease: false
26
- version_requirements: *2152233780
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ! '>='
30
+ - !ruby/object:Gem::Version
31
+ version: '0'
27
32
  - !ruby/object:Gem::Dependency
28
33
  name: webmock
29
- requirement: &2152233100 !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
30
35
  none: false
31
36
  requirements:
32
37
  - - ! '>='
@@ -34,10 +39,15 @@ dependencies:
34
39
  version: '0'
35
40
  type: :development
36
41
  prerelease: false
37
- version_requirements: *2152233100
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
38
48
  - !ruby/object:Gem::Dependency
39
49
  name: progressbar
40
- requirement: &2152232560 !ruby/object:Gem::Requirement
50
+ requirement: !ruby/object:Gem::Requirement
41
51
  none: false
42
52
  requirements:
43
53
  - - ! '>='
@@ -45,10 +55,15 @@ dependencies:
45
55
  version: '0'
46
56
  type: :runtime
47
57
  prerelease: false
48
- version_requirements: *2152232560
58
+ version_requirements: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
49
64
  - !ruby/object:Gem::Dependency
50
65
  name: mechanize
51
- requirement: &2152232140 !ruby/object:Gem::Requirement
66
+ requirement: !ruby/object:Gem::Requirement
52
67
  none: false
53
68
  requirements:
54
69
  - - ! '>='
@@ -56,7 +71,12 @@ dependencies:
56
71
  version: '0'
57
72
  type: :runtime
58
73
  prerelease: false
59
- version_requirements: *2152232140
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
60
80
  description: Scrapper to get proposals from Spanish Congress
61
81
  email:
62
82
  - voodoorai2000 at gmail
@@ -71,6 +91,7 @@ files:
71
91
  - congress-scrapper.gemspec
72
92
  - lib/congress-scrapper.rb
73
93
  - lib/congress-scrapper/version.rb
94
+ - lib/proposer.rb
74
95
  - spec/fixtures/closed_proposal_page.html
75
96
  - spec/fixtures/open_proposal_page.html
76
97
  - spec/fixtures/proposers.yml
@@ -100,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
100
121
  version: '0'
101
122
  requirements: []
102
123
  rubyforge_project: congress-scrapper
103
- rubygems_version: 1.8.10
124
+ rubygems_version: 1.8.24
104
125
  signing_key:
105
126
  specification_version: 3
106
127
  summary: Scrapper to get proposals from Spanish Congress