lobbyliste 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/lobbyliste +17 -1
- data/lib/lobbyliste/downloader.rb +15 -2
- data/lib/lobbyliste/factories/person_factory.rb +1 -1
- data/lib/lobbyliste/person.rb +6 -2
- data/lib/lobbyliste/version.rb +1 -1
- data/lib/lobbyliste.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 760a270ddbd57ea8ccf471e37383f550f1abbd7d
|
4
|
+
data.tar.gz: 2da7a0cce32f6e6defde7c69ea937052c84dc770
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dacbb13d833b08291962f514da796039e74949737597ab02ff8f53080fe5343e262423696782827cd2d99544d577ab085d3dee548c177d32499bde63026fadea
|
7
|
+
data.tar.gz: cc9fa586a5ec1b7cdea7b5c4b7dcfb2ed238254cf6fbdf3d551991df0c06c8fa2cc1e242decc1ef455fe87767bc9376609014b720df62c11d18fa04793de563f
|
data/bin/lobbyliste
CHANGED
@@ -2,6 +2,22 @@
|
|
2
2
|
|
3
3
|
require "bundler/setup"
|
4
4
|
require "lobbyliste"
|
5
|
+
require 'optparse'
|
5
6
|
|
6
|
-
|
7
|
+
$url = nil
|
8
|
+
|
9
|
+
OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: lobbyliste [options]"
|
11
|
+
|
12
|
+
opts.on("-u","--url URL","Specify link to Lobbyliste pdf") do |url|
|
13
|
+
$url = url
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.on("-h", "--help", "Prints this help") do
|
17
|
+
puts opts
|
18
|
+
exit
|
19
|
+
end
|
20
|
+
end.parse!
|
21
|
+
|
22
|
+
list = Lobbyliste.fetch_and_parse($url)
|
7
23
|
puts list.to_json
|
@@ -6,6 +6,12 @@ module Lobbyliste
|
|
6
6
|
# This class finds the lobbyliste pdf on the Bundestag website, downloads it and extracts the pdf content
|
7
7
|
class Downloader
|
8
8
|
|
9
|
+
# Creates a new Downloader
|
10
|
+
# @param [String] link that will be used to fetch the lobbylist pdf, defaults to nil
|
11
|
+
def initialize(pdf_link=nil)
|
12
|
+
@pdf_link = pdf_link
|
13
|
+
end
|
14
|
+
|
9
15
|
# @return [String] raw content of pdf file
|
10
16
|
def pdf_data
|
11
17
|
retrieve_pdf unless @pdf_data
|
@@ -25,18 +31,25 @@ module Lobbyliste
|
|
25
31
|
@html_data
|
26
32
|
end
|
27
33
|
|
34
|
+
# @return [String] link to Lobbyliste pdf
|
35
|
+
def pdf_link
|
36
|
+
fetch_pdf_link unless @pdf_link
|
37
|
+
@pdf_link
|
38
|
+
end
|
39
|
+
|
28
40
|
private
|
29
41
|
|
30
42
|
# Since this link changes with every new version we download the Lobbyliste website and try to extract the link
|
31
43
|
# @return [String] the link to the Lobbyliste pdf
|
32
|
-
def
|
44
|
+
def fetch_pdf_link
|
33
45
|
website = Nokogiri::HTML(open("https://www.bundestag.de/dokumente/lobbyliste"))
|
34
46
|
link = website.css(".inhalt a[title^='Aktuelle Fassung']").first
|
35
47
|
|
36
48
|
raise "Could no find PDF link on the website!" unless link
|
37
|
-
"https://bundestag.de#{link['href']}"
|
49
|
+
@pdf_link = "https://bundestag.de#{link['href']}"
|
38
50
|
end
|
39
51
|
|
52
|
+
|
40
53
|
def retrieve_pdf
|
41
54
|
@pdf_data = open(pdf_link) {|f| f.read}
|
42
55
|
end
|
@@ -33,7 +33,7 @@ module Lobbyliste
|
|
33
33
|
# @return [Lobbylist::Person] builds a new person, might be nil if the line does not represent a person
|
34
34
|
def self.build(raw_data)
|
35
35
|
factory = new(raw_data)
|
36
|
-
factory.is_person? ? ::Lobbyliste::Person.new(factory.name,factory.titles) : nil
|
36
|
+
factory.is_person? ? ::Lobbyliste::Person.new(factory.name,factory.titles,raw_data) : nil
|
37
37
|
end
|
38
38
|
|
39
39
|
def initialize(raw_data)
|
data/lib/lobbyliste/person.rb
CHANGED
@@ -8,13 +8,17 @@ module Lobbyliste
|
|
8
8
|
# @return [Array] list of all titles (job, academic, positions)
|
9
9
|
attr_reader :titles
|
10
10
|
|
11
|
-
|
11
|
+
# @return [String] the original name with titles as stated in the document
|
12
|
+
attr_reader :original_name
|
13
|
+
|
14
|
+
def initialize(name, titles, original_name)
|
12
15
|
@name = name
|
13
16
|
@titles = titles
|
17
|
+
@original_name = original_name
|
14
18
|
end
|
15
19
|
|
16
20
|
def ==(other)
|
17
|
-
|
21
|
+
original_name==other.original_name
|
18
22
|
end
|
19
23
|
|
20
24
|
def to_json(*a)
|
data/lib/lobbyliste/version.rb
CHANGED
data/lib/lobbyliste.rb
CHANGED
@@ -11,9 +11,10 @@ require 'json'
|
|
11
11
|
module Lobbyliste
|
12
12
|
|
13
13
|
# Download the PDF and parse it
|
14
|
+
# @param [String] link to Lobbyliste pdf, if left out pdf link is retrieved automatically from Bundestag website
|
14
15
|
# @return [Lobbyliste::Liste]
|
15
|
-
def self.fetch_and_parse
|
16
|
-
downloader = Lobbyliste::Downloader.new
|
16
|
+
def self.fetch_and_parse(pdf_link=nil)
|
17
|
+
downloader = Lobbyliste::Downloader.new(pdf_link)
|
17
18
|
Lobbyliste::Factories::ListFactory.build(downloader.text_data,downloader.html_data)
|
18
19
|
end
|
19
20
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lobbyliste
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- DarthMax
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|