congress-scrapper 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/congress-scrapper/version.rb +1 -1
- data/lib/congress-scrapper.rb +19 -5
- data/lib/proposer.rb +49 -0
- data/spec/lib/scrapper_spec.rb +4 -4
- metadata +32 -11
data/lib/congress-scrapper.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require "congress-scrapper/version"
|
4
4
|
require "mechanize"
|
5
5
|
require "progressbar"
|
6
|
+
require_relative "proposer"
|
6
7
|
|
7
8
|
module Congress
|
8
9
|
module Scrapper
|
@@ -33,21 +34,20 @@ module Congress
|
|
33
34
|
|
34
35
|
commission_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Comisión competente:')]/following-sibling::*[@class='texto']"))
|
35
36
|
|
36
|
-
|
37
|
+
proposer_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Autor:')]/following-sibling::*[@class='texto']"))
|
37
38
|
|
38
39
|
proposed_at_text = text_for("//*[@class='texto' and contains(normalize-space(text()),'Presentado el')]")
|
39
40
|
proposed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if proposed_at_text && proposed_at_text.match(/Presentado\s+el\s+(\d\d)\/(\d\d)\/(\d\d\d\d)/)
|
40
41
|
|
41
42
|
closed_at_text = text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Tramitación seguida por la iniciativa:')]/following-sibling::*[@class='texto']")
|
42
43
|
closed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if closed_at_text && closed_at_text.match(/Concluido\s+.+\s+desde (\d\d)\/(\d\d)\/(\d\d\d\d)/)
|
43
|
-
|
44
44
|
proposal = {:title => clean_text(title.content),
|
45
45
|
:official_url => "http://www.congreso.es" + title[:href],
|
46
46
|
:proposal_type => proposal_type,
|
47
47
|
:closed_at => closed_at,
|
48
48
|
:official_resolution => resolution,
|
49
|
-
:
|
50
|
-
:
|
49
|
+
:category_name => category(commission_name),
|
50
|
+
:proposer_name => proposer(proposer_name),
|
51
51
|
:proposed_at => proposed_at}
|
52
52
|
|
53
53
|
progress.inc
|
@@ -75,5 +75,19 @@ module Congress
|
|
75
75
|
return unless text
|
76
76
|
text.gsub(/\s+/,' ').gsub(/\s*\.\s*$/, '').strip
|
77
77
|
end
|
78
|
+
|
79
|
+
def category(name)
|
80
|
+
return unless name
|
81
|
+
upcase_first(name.gsub(/Comisión( Mixta)?( del?| para las?)? /, ""))
|
82
|
+
end
|
83
|
+
|
84
|
+
def upcase_first(string)
|
85
|
+
string[0..0].upcase + string[1..-1]
|
86
|
+
end
|
87
|
+
|
88
|
+
def proposer(string)
|
89
|
+
return unless string
|
90
|
+
Proposer.new(string).name
|
91
|
+
end
|
78
92
|
end
|
79
|
-
end
|
93
|
+
end
|
data/lib/proposer.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
class Proposer
|
3
|
+
|
4
|
+
attr_accessor :name
|
5
|
+
|
6
|
+
def initialize(string)
|
7
|
+
@name = string
|
8
|
+
end
|
9
|
+
|
10
|
+
def name
|
11
|
+
full_name ? short_name(full_name) : @name
|
12
|
+
end
|
13
|
+
|
14
|
+
def full_name
|
15
|
+
mapping.map(&:last).index(@name)
|
16
|
+
end
|
17
|
+
|
18
|
+
def short_name(index)
|
19
|
+
mapping[index].first
|
20
|
+
end
|
21
|
+
|
22
|
+
def mapping
|
23
|
+
[["PSOE", "Grupo Parlamentario Socialista"],
|
24
|
+
["PP", "Grupo Parlamentario Popular en el Congreso"],
|
25
|
+
["Convergència i Unió", "Grupo Parlamentario Catalán (Convergència i Unió)"],
|
26
|
+
["PNV", "Grupo Parlamentario Vasco (EAJ-PNV)"],
|
27
|
+
["Izquierda Unida", "Grupo Parlamentario de Esquerra Republicana-Izquierda Unida-Iniciativa per Catalunya Verds"],
|
28
|
+
["Grupo Mixto", "Grupo Parlamentario Mixto"],
|
29
|
+
["PSOE", "Senado Grupo Parlamentario Socialista"],
|
30
|
+
["PP", "Senado Grupo Parlamentario Popular en el Senado"],
|
31
|
+
["Convergència i Unió", "Senado Grupo Parlamentario Catalán en el Senado de Convergencia i Unió"],
|
32
|
+
["PNV", "Senado Grupo Parlamentario de Senadores Nacionalistas"],
|
33
|
+
["Izquierda Unida", "Senado Grupo Parlamentario de Entesa Catalana de Progrés"],
|
34
|
+
["Grupo Mixto", "Senado Grupo Parlamentario Mixto"],
|
35
|
+
["Andalucía", "Comunidad Autónoma de Andalucía-Parlamento"],
|
36
|
+
["Aragón", "Comunidad Autónoma de Aragón-Cortes"],
|
37
|
+
["Canarias", "Comunidad Autónoma de Canarias - Parlamento"],
|
38
|
+
["Castilla y León", "Comunidad Autónoma de Castilla y León - Cortes"],
|
39
|
+
["Castilla-La Mancha", "Comunidad Autónoma de Castilla-La Mancha - Cortes"],
|
40
|
+
["Cataluña", "Comunidad Autónoma de Cataluña - Parlamento"],
|
41
|
+
["Extremadura", "Comunidad Autónoma de Extremadura - Asamblea"],
|
42
|
+
["Galicia", "Comunidad Autónoma de Galicia - Parlamento"],
|
43
|
+
["Murcia", "Comunidad Autónoma de la Región de Murcia - Asamblea Regional"],
|
44
|
+
["La Rioja", "Comunidad Autónoma de La Rioja - Diputación General"],
|
45
|
+
["Baleares", "Comunidad Autónoma de las Illes Balears - Gobierno"],
|
46
|
+
["País Vasco", "Comunidad Autónoma del País Vasco - Gobierno"],
|
47
|
+
["País Vasco", "Comunidad Autónoma del País Vasco - Parlamento"]]
|
48
|
+
end
|
49
|
+
end
|
data/spec/lib/scrapper_spec.rb
CHANGED
@@ -45,8 +45,8 @@ describe Congress::Scrapper do
|
|
45
45
|
proposal[:closed_at].should be_nil
|
46
46
|
proposal[:official_resolution].should be_nil
|
47
47
|
proposal[:proposed_at].should == Date.new(2010, 4, 9)
|
48
|
-
proposal[:
|
49
|
-
proposal[:
|
48
|
+
proposal[:category_name].should == "Medio Ambiente, Agricultura y Pesca"
|
49
|
+
proposal[:proposer_name].should == "PSOE"
|
50
50
|
end
|
51
51
|
|
52
52
|
it "should populate closed proposals info" do
|
@@ -56,8 +56,8 @@ describe Congress::Scrapper do
|
|
56
56
|
proposal[:proposal_type].should == "Proyecto de ley"
|
57
57
|
proposal[:closed_at].should == Date.new(2009, 6, 24)
|
58
58
|
proposal[:official_resolution].should == "Aprobado sin modificaciones"
|
59
|
-
proposal[:
|
60
|
-
proposal[:
|
59
|
+
proposal[:category_name].should == "Economía y Hacienda"
|
60
|
+
proposal[:proposer_name].should == "Gobierno"
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: congress-scrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,11 +11,11 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2012-12-14 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|
18
|
-
requirement:
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
@@ -23,10 +23,15 @@ dependencies:
|
|
23
23
|
version: '0'
|
24
24
|
type: :development
|
25
25
|
prerelease: false
|
26
|
-
version_requirements:
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - ! '>='
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: '0'
|
27
32
|
- !ruby/object:Gem::Dependency
|
28
33
|
name: webmock
|
29
|
-
requirement:
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
30
35
|
none: false
|
31
36
|
requirements:
|
32
37
|
- - ! '>='
|
@@ -34,10 +39,15 @@ dependencies:
|
|
34
39
|
version: '0'
|
35
40
|
type: :development
|
36
41
|
prerelease: false
|
37
|
-
version_requirements:
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
38
48
|
- !ruby/object:Gem::Dependency
|
39
49
|
name: progressbar
|
40
|
-
requirement:
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
41
51
|
none: false
|
42
52
|
requirements:
|
43
53
|
- - ! '>='
|
@@ -45,10 +55,15 @@ dependencies:
|
|
45
55
|
version: '0'
|
46
56
|
type: :runtime
|
47
57
|
prerelease: false
|
48
|
-
version_requirements:
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
49
64
|
- !ruby/object:Gem::Dependency
|
50
65
|
name: mechanize
|
51
|
-
requirement:
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
52
67
|
none: false
|
53
68
|
requirements:
|
54
69
|
- - ! '>='
|
@@ -56,7 +71,12 @@ dependencies:
|
|
56
71
|
version: '0'
|
57
72
|
type: :runtime
|
58
73
|
prerelease: false
|
59
|
-
version_requirements:
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
60
80
|
description: Scrapper to get proposals from Spanish Congress
|
61
81
|
email:
|
62
82
|
- voodoorai2000 at gmail
|
@@ -71,6 +91,7 @@ files:
|
|
71
91
|
- congress-scrapper.gemspec
|
72
92
|
- lib/congress-scrapper.rb
|
73
93
|
- lib/congress-scrapper/version.rb
|
94
|
+
- lib/proposer.rb
|
74
95
|
- spec/fixtures/closed_proposal_page.html
|
75
96
|
- spec/fixtures/open_proposal_page.html
|
76
97
|
- spec/fixtures/proposers.yml
|
@@ -100,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
121
|
version: '0'
|
101
122
|
requirements: []
|
102
123
|
rubyforge_project: congress-scrapper
|
103
|
-
rubygems_version: 1.8.
|
124
|
+
rubygems_version: 1.8.24
|
104
125
|
signing_key:
|
105
126
|
specification_version: 3
|
106
127
|
summary: Scrapper to get proposals from Spanish Congress
|