justiz 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/justiz.gemspec +1 -1
- data/lib/justiz/contact.rb +29 -2
- data/lib/justiz/scraper/courts.rb +12 -5
- data/lib/justiz/version.rb +1 -1
- data/spec/lib/contact_spec.rb +12 -0
- data/spec/lib/scraper/courts_spec.rb +13 -6
- data/spec/vcr/courts/zvg_nrw.yml +3009 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2956b90485c84af57cd711724282b36debf63a3c
|
4
|
+
data.tar.gz: 09412689d63cb8fd117e9f06b7bb9f7a3fcc49b6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf3287ebafd6d08a41a00e0871eea16e4fbd1e0f3931730177d3e0ed995fd6a4c67a32ddc3949554f5dd06859807f2e309ba1e00c236d51e2e7fbf4f885e1d28
|
7
|
+
data.tar.gz: 3c93f26946552ad11888666f775c3e2658d44b330bda2e4057f21dda5ab3d657dcbbafb19a5151423e5484da8065226a841671413dd223b2860c25e418ac688d
|
data/justiz.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["mikep@quake.net"]
|
11
11
|
spec.description = %q{Extracts contact data.}
|
12
12
|
spec.summary = %q{Extract contact data from http://www.justizadressen.nrw.de/}
|
13
|
-
spec.homepage = ""
|
13
|
+
spec.homepage = "https://github.com/mike-park/justiz"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
16
16
|
spec.files = `git ls-files`.split($/)
|
data/lib/justiz/contact.rb
CHANGED
@@ -6,6 +6,16 @@ module Justiz
|
|
6
6
|
FIELDS = [:court, :location, :post, :phone, :fax, :justiz_id, :url, :email]
|
7
7
|
attr_accessor :attributes, *FIELDS
|
8
8
|
|
9
|
+
# remove from court if present
|
10
|
+
COURT_NOISE = [
|
11
|
+
' - Insolvenzabteilung -',
|
12
|
+
' - Handels- und Genossenschaftsregister -',
|
13
|
+
' - Vereinsregister -',
|
14
|
+
' - Partnerschaftsregister -',
|
15
|
+
' - Zwangsversteigerung/-verwaltung -',
|
16
|
+
' - Familiengericht -'
|
17
|
+
]
|
18
|
+
|
9
19
|
def initialize(attributes = {})
|
10
20
|
self.attributes = attributes
|
11
21
|
end
|
@@ -16,11 +26,16 @@ module Justiz
|
|
16
26
|
end
|
17
27
|
end
|
18
28
|
|
29
|
+
def court=(name)
|
30
|
+
@court = remove_court_noise(name)
|
31
|
+
end
|
32
|
+
|
19
33
|
def id
|
20
34
|
# too many duplicates
|
21
35
|
#[court, justiz_id].compact.join("")
|
22
|
-
#
|
23
|
-
[court, email].compact.join("")
|
36
|
+
# with noise removed also too many duplicates
|
37
|
+
#[court, email].compact.join("")
|
38
|
+
digest
|
24
39
|
end
|
25
40
|
|
26
41
|
def location_address
|
@@ -38,5 +53,17 @@ module Justiz
|
|
38
53
|
end
|
39
54
|
Digest.hexencode(sha256.digest)
|
40
55
|
end
|
56
|
+
|
57
|
+
def to_s
|
58
|
+
inspect
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def remove_court_noise(court)
|
64
|
+
court = court.dup
|
65
|
+
COURT_NOISE.each { |noise| court.sub!(/#{noise}$/, '') }
|
66
|
+
court
|
67
|
+
end
|
41
68
|
end
|
42
69
|
end
|
@@ -11,9 +11,10 @@ module Justiz
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def contacts
|
14
|
-
states.keys.map do |state|
|
14
|
+
contacts = states.keys.map do |state|
|
15
15
|
contacts_for(state)
|
16
16
|
end.flatten.compact
|
17
|
+
uniq_contacts(contacts)
|
17
18
|
end
|
18
19
|
|
19
20
|
def contacts_for(state)
|
@@ -21,17 +22,23 @@ module Justiz
|
|
21
22
|
return page.contacts unless page.limit_warning?
|
22
23
|
|
23
24
|
# do each type separately hoping to avoid limit warning
|
24
|
-
court_types.keys.map do |court_type|
|
25
|
+
contacts = court_types.keys.map do |court_type|
|
25
26
|
contacts_of_type(court_type, state)
|
26
|
-
end.flatten.compact
|
27
|
+
end.flatten.compact
|
28
|
+
uniq_contacts(contacts)
|
27
29
|
end
|
28
30
|
|
29
31
|
def contacts_of_type(type, state)
|
30
|
-
load_page(type, state, with_warning: true).contacts
|
32
|
+
contacts = load_page(type, state, with_warning: true).contacts
|
33
|
+
uniq_contacts(contacts)
|
31
34
|
end
|
32
35
|
|
33
36
|
private
|
34
37
|
|
38
|
+
def uniq_contacts(contacts)
|
39
|
+
contacts.uniq {|contact| contact.digest }
|
40
|
+
end
|
41
|
+
|
35
42
|
def home_page
|
36
43
|
@home_page ||= Page.new(agent.get('http://www.justizadressen.nrw.de/og.php?MD=nrw'))
|
37
44
|
end
|
@@ -69,7 +76,7 @@ module Justiz
|
|
69
76
|
end
|
70
77
|
|
71
78
|
def contacts
|
72
|
-
@contacts ||= parse_contacts
|
79
|
+
@contacts ||= parse_contacts.uniq
|
73
80
|
end
|
74
81
|
|
75
82
|
def parse_contacts
|
data/lib/justiz/version.rb
CHANGED
data/spec/lib/contact_spec.rb
CHANGED
@@ -34,4 +34,16 @@ describe Justiz::Contact do
|
|
34
34
|
digest = "76ef09d0c7d0078015df7a948cf0352c00f6451dab354389b21895a50d89a4a8"
|
35
35
|
expect(contact.digest).to eq(digest)
|
36
36
|
end
|
37
|
+
|
38
|
+
it "should remove - xxx - from court" do
|
39
|
+
court = "Something#{Justiz::Contact::COURT_NOISE.first}"
|
40
|
+
contact = Justiz::Contact.new(court: court)
|
41
|
+
expect(contact.court).to eq('Something')
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should not remove - xxx - from court" do
|
45
|
+
court = 'Something - Not be removed -'
|
46
|
+
contact = Justiz::Contact.new(court: court)
|
47
|
+
expect(contact.court).to eq(court)
|
48
|
+
end
|
37
49
|
end
|
@@ -73,7 +73,7 @@ describe Justiz::Scraper::Courts do
|
|
73
73
|
VCR.use_cassette 'courts/all_nrw' do
|
74
74
|
contacts = subject.contacts_for('NRW')
|
75
75
|
#ap contacts
|
76
|
-
expect(contacts.count).to eq(
|
76
|
+
expect(contacts.count).to eq(377)
|
77
77
|
end
|
78
78
|
end
|
79
79
|
end
|
@@ -83,16 +83,16 @@ describe Justiz::Scraper::Courts do
|
|
83
83
|
VCR.use_cassette 'courts/all_all' do
|
84
84
|
original = {
|
85
85
|
"BRD" => "Bundesgerichte/-behörden 12",
|
86
|
-
"BW" => "Baden-Württemberg
|
87
|
-
"BAY" => "Bayern
|
86
|
+
"BW" => "Baden-Württemberg 284",
|
87
|
+
"BAY" => "Bayern 263",
|
88
88
|
"B" => "Berlin 38",
|
89
89
|
"BRA" => "Brandenburg 64",
|
90
90
|
"BRE" => "Bremen 19",
|
91
91
|
"HH" => "Hamburg 35",
|
92
92
|
"HES" => "Hessen 115",
|
93
93
|
"MV" => "Mecklenburg-Vorpommern 55",
|
94
|
-
"NS" => "Niedersachsen
|
95
|
-
"NRW" => "Nordrhein-Westfalen
|
94
|
+
"NS" => "Niedersachsen 262",
|
95
|
+
"NRW" => "Nordrhein-Westfalen 377",
|
96
96
|
"RPF" => "Rheinland-Pfalz 101",
|
97
97
|
"SAA" => "Saarland 32",
|
98
98
|
"SAC" => "Sachsen 79",
|
@@ -108,7 +108,7 @@ describe Justiz::Scraper::Courts do
|
|
108
108
|
total += count
|
109
109
|
end
|
110
110
|
expect(states).to eq(original)
|
111
|
-
expect(total).to eq(
|
111
|
+
expect(total).to eq(1926)
|
112
112
|
end
|
113
113
|
end
|
114
114
|
end
|
@@ -200,6 +200,13 @@ describe Justiz::Scraper::Courts do
|
|
200
200
|
expect(contact.post_address).to be_a(Justiz::Address)
|
201
201
|
end
|
202
202
|
end
|
203
|
+
|
204
|
+
it "should rename court" do
|
205
|
+
VCR.use_cassette 'courts/zvg_nrw' do
|
206
|
+
contact = subject.contacts_of_type('ZVG', 'NRW').first
|
207
|
+
expect(contact.court).to_not match(/Zwangsversteigerung/)
|
208
|
+
end
|
209
|
+
end
|
203
210
|
end
|
204
211
|
end
|
205
212
|
|