lobbyliste 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/lobbyliste +17 -1
- data/lib/lobbyliste/downloader.rb +15 -2
- data/lib/lobbyliste/factories/person_factory.rb +1 -1
- data/lib/lobbyliste/person.rb +6 -2
- data/lib/lobbyliste/version.rb +1 -1
- data/lib/lobbyliste.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 760a270ddbd57ea8ccf471e37383f550f1abbd7d
|
4
|
+
data.tar.gz: 2da7a0cce32f6e6defde7c69ea937052c84dc770
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dacbb13d833b08291962f514da796039e74949737597ab02ff8f53080fe5343e262423696782827cd2d99544d577ab085d3dee548c177d32499bde63026fadea
|
7
|
+
data.tar.gz: cc9fa586a5ec1b7cdea7b5c4b7dcfb2ed238254cf6fbdf3d551991df0c06c8fa2cc1e242decc1ef455fe87767bc9376609014b720df62c11d18fa04793de563f
|
data/bin/lobbyliste
CHANGED
@@ -2,6 +2,22 @@
|
|
2
2
|
|
3
3
|
require "bundler/setup"
|
4
4
|
require "lobbyliste"
|
5
|
+
require 'optparse'
|
5
6
|
|
6
|
-
|
7
|
+
$url = nil
|
8
|
+
|
9
|
+
OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: lobbyliste [options]"
|
11
|
+
|
12
|
+
opts.on("-u","--url URL","Specify link to Lobbyliste pdf") do |url|
|
13
|
+
$url = url
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.on("-h", "--help", "Prints this help") do
|
17
|
+
puts opts
|
18
|
+
exit
|
19
|
+
end
|
20
|
+
end.parse!
|
21
|
+
|
22
|
+
list = Lobbyliste.fetch_and_parse($url)
|
7
23
|
puts list.to_json
|
@@ -6,6 +6,12 @@ module Lobbyliste
|
|
6
6
|
# This class finds the lobbyliste pdf on the Bundestag website, downloads it and extracts the pdf content
|
7
7
|
class Downloader
|
8
8
|
|
9
|
+
# Creates a new Downloader
|
10
|
+
# @param [String] link that will be used to fetch the lobbylist pdf, defaults to nil
|
11
|
+
def initialize(pdf_link=nil)
|
12
|
+
@pdf_link = pdf_link
|
13
|
+
end
|
14
|
+
|
9
15
|
# @return [String] raw content of pdf file
|
10
16
|
def pdf_data
|
11
17
|
retrieve_pdf unless @pdf_data
|
@@ -25,18 +31,25 @@ module Lobbyliste
|
|
25
31
|
@html_data
|
26
32
|
end
|
27
33
|
|
34
|
+
# @return [String] link to Lobbyliste pdf
|
35
|
+
def pdf_link
|
36
|
+
fetch_pdf_link unless @pdf_link
|
37
|
+
@pdf_link
|
38
|
+
end
|
39
|
+
|
28
40
|
private
|
29
41
|
|
30
42
|
# Since this link changes with every new version we download the Lobbyliste website and try to extract the link
|
31
43
|
# @return [String] the link to the Lobbyliste pdf
|
32
|
-
def
|
44
|
+
def fetch_pdf_link
|
33
45
|
website = Nokogiri::HTML(open("https://www.bundestag.de/dokumente/lobbyliste"))
|
34
46
|
link = website.css(".inhalt a[title^='Aktuelle Fassung']").first
|
35
47
|
|
36
48
|
raise "Could no find PDF link on the website!" unless link
|
37
|
-
"https://bundestag.de#{link['href']}"
|
49
|
+
@pdf_link = "https://bundestag.de#{link['href']}"
|
38
50
|
end
|
39
51
|
|
52
|
+
|
40
53
|
def retrieve_pdf
|
41
54
|
@pdf_data = open(pdf_link) {|f| f.read}
|
42
55
|
end
|
@@ -33,7 +33,7 @@ module Lobbyliste
|
|
33
33
|
# @return [Lobbylist::Person] builds a new person, might be nil if the line does not represent a person
|
34
34
|
def self.build(raw_data)
|
35
35
|
factory = new(raw_data)
|
36
|
-
factory.is_person? ? ::Lobbyliste::Person.new(factory.name,factory.titles) : nil
|
36
|
+
factory.is_person? ? ::Lobbyliste::Person.new(factory.name,factory.titles,raw_data) : nil
|
37
37
|
end
|
38
38
|
|
39
39
|
def initialize(raw_data)
|
data/lib/lobbyliste/person.rb
CHANGED
@@ -8,13 +8,17 @@ module Lobbyliste
|
|
8
8
|
# @return [Array] list of all titles (job, academic, positions)
|
9
9
|
attr_reader :titles
|
10
10
|
|
11
|
-
|
11
|
+
# @return [String] the original name with titles as stated in the document
|
12
|
+
attr_reader :original_name
|
13
|
+
|
14
|
+
def initialize(name, titles, original_name)
|
12
15
|
@name = name
|
13
16
|
@titles = titles
|
17
|
+
@original_name = original_name
|
14
18
|
end
|
15
19
|
|
16
20
|
def ==(other)
|
17
|
-
|
21
|
+
original_name==other.original_name
|
18
22
|
end
|
19
23
|
|
20
24
|
def to_json(*a)
|
data/lib/lobbyliste/version.rb
CHANGED
data/lib/lobbyliste.rb
CHANGED
@@ -11,9 +11,10 @@ require 'json'
|
|
11
11
|
module Lobbyliste
|
12
12
|
|
13
13
|
# Download the PDF and parse it
|
14
|
+
# @param [String] link to Lobbyliste pdf, if left out pdf link is retrieved automatically from Bundestag website
|
14
15
|
# @return [Lobbyliste::Liste]
|
15
|
-
def self.fetch_and_parse
|
16
|
-
downloader = Lobbyliste::Downloader.new
|
16
|
+
def self.fetch_and_parse(pdf_link=nil)
|
17
|
+
downloader = Lobbyliste::Downloader.new(pdf_link)
|
17
18
|
Lobbyliste::Factories::ListFactory.build(downloader.text_data,downloader.html_data)
|
18
19
|
end
|
19
20
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lobbyliste
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- DarthMax
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-06-
|
11
|
+
date: 2016-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|