congress-scrapper 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/congress-scrapper.rb +59 -40
- data/lib/congress-scrapper/version.rb +1 -1
- data/spec/fixtures/full_proposal_text.html +4837 -0
- data/spec/fixtures/proposal_with_law_draft.html +3448 -0
- data/spec/lib/scrapper_spec.rb +15 -16
- data/spec/lib/scrapper_spec_helper.rb +4 -0
- metadata +6 -2
data/spec/lib/scrapper_spec.rb
CHANGED
@@ -12,28 +12,20 @@ describe Congress::Scrapper do
|
|
12
12
|
stub_request(:get, search_results_next_page).to_return(:body => fixture(:search_results_page2), :headers => { 'Content-Type' => 'text/html' })
|
13
13
|
stub_request(:get, proposal_page1).to_return(:body => fixture(:open_proposal_page), :headers => { 'Content-Type' => 'text/html' })
|
14
14
|
stub_request(:get, proposal_page2).to_return(:body => fixture(:closed_proposal_with_changes_page), :headers => { 'Content-Type' => 'text/html' })
|
15
|
-
stub_request(:get, proposal_page3).to_return(:body =>
|
15
|
+
stub_request(:get, proposal_page3).to_return(:body => fixture(:proposal_with_law_draft), :headers => { 'Content-Type' => 'text/html' })
|
16
16
|
stub_request(:get, proposal_page4).to_return(:body => fixture(:closed_proposal_page), :headers => { 'Content-Type' => 'text/html' })
|
17
|
+
|
18
|
+
stub_request(:get, /#{full_proposal_text}/).to_return(:body => fixture(:full_proposal_text), :headers => { 'Content-Type' => 'text/html' })
|
17
19
|
end
|
18
20
|
|
19
21
|
it "should go to the proposal search form" do
|
20
22
|
Congress::Scrapper.scrape
|
21
|
-
a_request(:get, search_page).should have_been_made
|
23
|
+
a_request(:get, search_page).should have_been_made.times(2)
|
22
24
|
end
|
23
25
|
|
24
26
|
it "should search the proposals we're interested in" do
|
25
27
|
Congress::Scrapper.scrape
|
26
|
-
a_request(:post, search_results_page).with{|r| r.body =~ /TPTR=Competencia\+Legislativa\+Plena/}.should have_been_made
|
27
|
-
end
|
28
|
-
|
29
|
-
it "should create one proposal for record found" do
|
30
|
-
proposals = Congress::Scrapper.scrape
|
31
|
-
proposals.collect { |p| p[:title] }.should == [
|
32
|
-
"Proyecto de Ley de almacenamiento geológico de dióxido de carbono",
|
33
|
-
"Proyecto de Ley del régimen de cesión de tributos del Estado a la Comunitat Valenciana y de fijación del alcance y condiciones de dicha cesión",
|
34
|
-
"Proyecto de Ley de Reforma del Sistema de Apoyo Financiero a la Internacionalización de la empresa española",
|
35
|
-
"Proyecto de Ley por la que se modifica el Estatuto Legal del Consorcio de Compensación de Seguros, aprobado por el Real Decreto Legislativo 7/2004, de 29 de octubre, para suprimir las funciones del Consorcio de Compensación de Seguros en relación con los seguros obligatorios de viajeros y del cazador y reducir el recargo destinado a financiar las funciones de liquidación de entidades aseguradoras, y el texto refundido de la Ley de Ordenación y Supervisión de los Seguros Privados, aprobado por el Real Decreto Legislativo 6/2004, de 29 de octubre"
|
36
|
-
]
|
28
|
+
a_request(:post, search_results_page).with{|r| r.body =~ /TPTR=Competencia\+Legislativa\+Plena/}.should have_been_made.times(2)
|
37
29
|
end
|
38
30
|
|
39
31
|
it "should populate open proposals info" do
|
@@ -42,7 +34,7 @@ describe Congress::Scrapper do
|
|
42
34
|
proposal[:official_url].should == proposal_page1
|
43
35
|
proposal[:proposal_type].should == "Proyecto de ley"
|
44
36
|
proposal[:closed_at].should be_nil
|
45
|
-
proposal[:
|
37
|
+
proposal[:status].should == "Comisión de Medio Ambiente, Agricultura y Pesca Enmiendas"
|
46
38
|
proposal[:proposed_at].should == Date.new(2010, 4, 9)
|
47
39
|
proposal[:category_name].should == "Medio Ambiente, Agricultura y Pesca"
|
48
40
|
proposal[:proposer_name].should == "PSOE"
|
@@ -54,7 +46,7 @@ describe Congress::Scrapper do
|
|
54
46
|
proposal[:official_url].should == proposal_page2
|
55
47
|
proposal[:proposal_type].should == "Proyecto de ley"
|
56
48
|
proposal[:closed_at].should == Date.new(2012, 10, 24)
|
57
|
-
proposal[:
|
49
|
+
proposal[:status].should == "Concluido - (Aprobado con modificaciones)"
|
58
50
|
proposal[:category_name].should == "Hacienda y Administraciones Públicas"
|
59
51
|
proposal[:proposer_name].should == "Gobierno"
|
60
52
|
end
|
@@ -65,10 +57,17 @@ describe Congress::Scrapper do
|
|
65
57
|
proposal[:official_url].should == proposal_page4
|
66
58
|
proposal[:proposal_type].should == "Proyecto de ley"
|
67
59
|
proposal[:closed_at].should == Date.new(2009, 6, 24)
|
68
|
-
proposal[:
|
60
|
+
proposal[:status].should == "Aprobado sin modificaciones"
|
69
61
|
proposal[:category_name].should == "Economía y Hacienda"
|
70
62
|
proposal[:proposer_name].should == "Gobierno"
|
71
63
|
end
|
64
|
+
|
65
|
+
it "should populate the full proposal text" do
|
66
|
+
proposals = Congress::Scrapper.scrape
|
67
|
+
proposal = proposals[2]
|
68
|
+
proposal[:official_url].should == proposal_page3
|
69
|
+
proposal[:body].should =~ /En cumplimiento de lo dispuesto en el artículo 86.2/
|
70
|
+
end
|
72
71
|
|
73
72
|
end
|
74
73
|
end
|
@@ -32,6 +32,10 @@ module CongressWebSitePaths
|
|
32
32
|
def proposal_page5
|
33
33
|
"http://www.congreso.es/portal/page/portal/Congreso/Congreso/Iniciativas/Busqueda%20Avanzada?_piref73_1335465_73_1335464_1335464.next_page=/wc/servidorCGI&CMD=VERLST&BASE=IW10&PIECE=IWA0&FMT=INITXD1S.fmt&FORM1=INITXLTS.fmt&DOCS=13-13&QUERY=%28I%29.ACIN1.+%26+%28%22COMPETENCIA+LEGISLATIVA+PLENA%22%29.TPTR."
|
34
34
|
end
|
35
|
+
|
36
|
+
def full_proposal_text
|
37
|
+
"http://www.congreso.es/portal/page/portal/Congreso/PopUpCGI"
|
38
|
+
end
|
35
39
|
|
36
40
|
end
|
37
41
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: congress-scrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2013-07-21 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|
@@ -94,7 +94,9 @@ files:
|
|
94
94
|
- lib/proposer.rb
|
95
95
|
- spec/fixtures/closed_proposal_page.html
|
96
96
|
- spec/fixtures/closed_proposal_with_changes_page.html
|
97
|
+
- spec/fixtures/full_proposal_text.html
|
97
98
|
- spec/fixtures/open_proposal_page.html
|
99
|
+
- spec/fixtures/proposal_with_law_draft.html
|
98
100
|
- spec/fixtures/proposers.yml
|
99
101
|
- spec/fixtures/search_page.html
|
100
102
|
- spec/fixtures/search_results_page1.html
|
@@ -129,7 +131,9 @@ summary: Scrapper to get proposals from Spanish Congress
|
|
129
131
|
test_files:
|
130
132
|
- spec/fixtures/closed_proposal_page.html
|
131
133
|
- spec/fixtures/closed_proposal_with_changes_page.html
|
134
|
+
- spec/fixtures/full_proposal_text.html
|
132
135
|
- spec/fixtures/open_proposal_page.html
|
136
|
+
- spec/fixtures/proposal_with_law_draft.html
|
133
137
|
- spec/fixtures/proposers.yml
|
134
138
|
- spec/fixtures/search_page.html
|
135
139
|
- spec/fixtures/search_results_page1.html
|