congress-scrapper 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/congress-scrapper/version.rb +1 -1
- data/lib/congress-scrapper.rb +19 -5
- data/lib/proposer.rb +49 -0
- data/spec/lib/scrapper_spec.rb +4 -4
- metadata +32 -11
data/lib/congress-scrapper.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require "congress-scrapper/version"
|
4
4
|
require "mechanize"
|
5
5
|
require "progressbar"
|
6
|
+
require_relative "proposer"
|
6
7
|
|
7
8
|
module Congress
|
8
9
|
module Scrapper
|
@@ -33,21 +34,20 @@ module Congress
|
|
33
34
|
|
34
35
|
commission_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Comisión competente:')]/following-sibling::*[@class='texto']"))
|
35
36
|
|
36
|
-
|
37
|
+
proposer_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Autor:')]/following-sibling::*[@class='texto']"))
|
37
38
|
|
38
39
|
proposed_at_text = text_for("//*[@class='texto' and contains(normalize-space(text()),'Presentado el')]")
|
39
40
|
proposed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if proposed_at_text && proposed_at_text.match(/Presentado\s+el\s+(\d\d)\/(\d\d)\/(\d\d\d\d)/)
|
40
41
|
|
41
42
|
closed_at_text = text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Tramitación seguida por la iniciativa:')]/following-sibling::*[@class='texto']")
|
42
43
|
closed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if closed_at_text && closed_at_text.match(/Concluido\s+.+\s+desde (\d\d)\/(\d\d)\/(\d\d\d\d)/)
|
43
|
-
|
44
44
|
proposal = {:title => clean_text(title.content),
|
45
45
|
:official_url => "http://www.congreso.es" + title[:href],
|
46
46
|
:proposal_type => proposal_type,
|
47
47
|
:closed_at => closed_at,
|
48
48
|
:official_resolution => resolution,
|
49
|
-
:
|
50
|
-
:
|
49
|
+
:category_name => category(commission_name),
|
50
|
+
:proposer_name => proposer(proposer_name),
|
51
51
|
:proposed_at => proposed_at}
|
52
52
|
|
53
53
|
progress.inc
|
@@ -75,5 +75,19 @@ module Congress
|
|
75
75
|
return unless text
|
76
76
|
text.gsub(/\s+/,' ').gsub(/\s*\.\s*$/, '').strip
|
77
77
|
end
|
78
|
+
|
79
|
+
def category(name)
|
80
|
+
return unless name
|
81
|
+
upcase_first(name.gsub(/Comisión( Mixta)?( del?| para las?)? /, ""))
|
82
|
+
end
|
83
|
+
|
84
|
+
def upcase_first(string)
|
85
|
+
string[0..0].upcase + string[1..-1]
|
86
|
+
end
|
87
|
+
|
88
|
+
def proposer(string)
|
89
|
+
return unless string
|
90
|
+
Proposer.new(string).name
|
91
|
+
end
|
78
92
|
end
|
79
|
-
end
|
93
|
+
end
|
data/lib/proposer.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
class Proposer
|
3
|
+
|
4
|
+
attr_accessor :name
|
5
|
+
|
6
|
+
def initialize(string)
|
7
|
+
@name = string
|
8
|
+
end
|
9
|
+
|
10
|
+
def name
|
11
|
+
full_name ? short_name(full_name) : @name
|
12
|
+
end
|
13
|
+
|
14
|
+
def full_name
|
15
|
+
mapping.map(&:last).index(@name)
|
16
|
+
end
|
17
|
+
|
18
|
+
def short_name(index)
|
19
|
+
mapping[index].first
|
20
|
+
end
|
21
|
+
|
22
|
+
def mapping
|
23
|
+
[["PSOE", "Grupo Parlamentario Socialista"],
|
24
|
+
["PP", "Grupo Parlamentario Popular en el Congreso"],
|
25
|
+
["Convergència i Unió", "Grupo Parlamentario Catalán (Convergència i Unió)"],
|
26
|
+
["PNV", "Grupo Parlamentario Vasco (EAJ-PNV)"],
|
27
|
+
["Izquierda Unida", "Grupo Parlamentario de Esquerra Republicana-Izquierda Unida-Iniciativa per Catalunya Verds"],
|
28
|
+
["Grupo Mixto", "Grupo Parlamentario Mixto"],
|
29
|
+
["PSOE", "Senado Grupo Parlamentario Socialista"],
|
30
|
+
["PP", "Senado Grupo Parlamentario Popular en el Senado"],
|
31
|
+
["Convergència i Unió", "Senado Grupo Parlamentario Catalán en el Senado de Convergencia i Unió"],
|
32
|
+
["PNV", "Senado Grupo Parlamentario de Senadores Nacionalistas"],
|
33
|
+
["Izquierda Unida", "Senado Grupo Parlamentario de Entesa Catalana de Progrés"],
|
34
|
+
["Grupo Mixto", "Senado Grupo Parlamentario Mixto"],
|
35
|
+
["Andalucía", "Comunidad Autónoma de Andalucía-Parlamento"],
|
36
|
+
["Aragón", "Comunidad Autónoma de Aragón-Cortes"],
|
37
|
+
["Canarias", "Comunidad Autónoma de Canarias - Parlamento"],
|
38
|
+
["Castilla y León", "Comunidad Autónoma de Castilla y León - Cortes"],
|
39
|
+
["Castilla-La Mancha", "Comunidad Autónoma de Castilla-La Mancha - Cortes"],
|
40
|
+
["Cataluña", "Comunidad Autónoma de Cataluña - Parlamento"],
|
41
|
+
["Extremadura", "Comunidad Autónoma de Extremadura - Asamblea"],
|
42
|
+
["Galicia", "Comunidad Autónoma de Galicia - Parlamento"],
|
43
|
+
["Murcia", "Comunidad Autónoma de la Región de Murcia - Asamblea Regional"],
|
44
|
+
["La Rioja", "Comunidad Autónoma de La Rioja - Diputación General"],
|
45
|
+
["Baleares", "Comunidad Autónoma de las Illes Balears - Gobierno"],
|
46
|
+
["País Vasco", "Comunidad Autónoma del País Vasco - Gobierno"],
|
47
|
+
["País Vasco", "Comunidad Autónoma del País Vasco - Parlamento"]]
|
48
|
+
end
|
49
|
+
end
|
data/spec/lib/scrapper_spec.rb
CHANGED
@@ -45,8 +45,8 @@ describe Congress::Scrapper do
|
|
45
45
|
proposal[:closed_at].should be_nil
|
46
46
|
proposal[:official_resolution].should be_nil
|
47
47
|
proposal[:proposed_at].should == Date.new(2010, 4, 9)
|
48
|
-
proposal[:
|
49
|
-
proposal[:
|
48
|
+
proposal[:category_name].should == "Medio Ambiente, Agricultura y Pesca"
|
49
|
+
proposal[:proposer_name].should == "PSOE"
|
50
50
|
end
|
51
51
|
|
52
52
|
it "should populate closed proposals info" do
|
@@ -56,8 +56,8 @@ describe Congress::Scrapper do
|
|
56
56
|
proposal[:proposal_type].should == "Proyecto de ley"
|
57
57
|
proposal[:closed_at].should == Date.new(2009, 6, 24)
|
58
58
|
proposal[:official_resolution].should == "Aprobado sin modificaciones"
|
59
|
-
proposal[:
|
60
|
-
proposal[:
|
59
|
+
proposal[:category_name].should == "Economía y Hacienda"
|
60
|
+
proposal[:proposer_name].should == "Gobierno"
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: congress-scrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,11 +11,11 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2012-12-14 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|
18
|
-
requirement:
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
@@ -23,10 +23,15 @@ dependencies:
|
|
23
23
|
version: '0'
|
24
24
|
type: :development
|
25
25
|
prerelease: false
|
26
|
-
version_requirements:
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - ! '>='
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: '0'
|
27
32
|
- !ruby/object:Gem::Dependency
|
28
33
|
name: webmock
|
29
|
-
requirement:
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
30
35
|
none: false
|
31
36
|
requirements:
|
32
37
|
- - ! '>='
|
@@ -34,10 +39,15 @@ dependencies:
|
|
34
39
|
version: '0'
|
35
40
|
type: :development
|
36
41
|
prerelease: false
|
37
|
-
version_requirements:
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
38
48
|
- !ruby/object:Gem::Dependency
|
39
49
|
name: progressbar
|
40
|
-
requirement:
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
41
51
|
none: false
|
42
52
|
requirements:
|
43
53
|
- - ! '>='
|
@@ -45,10 +55,15 @@ dependencies:
|
|
45
55
|
version: '0'
|
46
56
|
type: :runtime
|
47
57
|
prerelease: false
|
48
|
-
version_requirements:
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
49
64
|
- !ruby/object:Gem::Dependency
|
50
65
|
name: mechanize
|
51
|
-
requirement:
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
52
67
|
none: false
|
53
68
|
requirements:
|
54
69
|
- - ! '>='
|
@@ -56,7 +71,12 @@ dependencies:
|
|
56
71
|
version: '0'
|
57
72
|
type: :runtime
|
58
73
|
prerelease: false
|
59
|
-
version_requirements:
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
60
80
|
description: Scrapper to get proposals from Spanish Congress
|
61
81
|
email:
|
62
82
|
- voodoorai2000 at gmail
|
@@ -71,6 +91,7 @@ files:
|
|
71
91
|
- congress-scrapper.gemspec
|
72
92
|
- lib/congress-scrapper.rb
|
73
93
|
- lib/congress-scrapper/version.rb
|
94
|
+
- lib/proposer.rb
|
74
95
|
- spec/fixtures/closed_proposal_page.html
|
75
96
|
- spec/fixtures/open_proposal_page.html
|
76
97
|
- spec/fixtures/proposers.yml
|
@@ -100,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
121
|
version: '0'
|
101
122
|
requirements: []
|
102
123
|
rubyforge_project: congress-scrapper
|
103
|
-
rubygems_version: 1.8.
|
124
|
+
rubygems_version: 1.8.24
|
104
125
|
signing_key:
|
105
126
|
specification_version: 3
|
106
127
|
summary: Scrapper to get proposals from Spanish Congress
|