justiz 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0d196d5dbc4f9c497f5a6340ddd7377cf416dff6
4
- data.tar.gz: 84a09a957eb241e514241357e52aee7376093ac3
3
+ metadata.gz: 2956b90485c84af57cd711724282b36debf63a3c
4
+ data.tar.gz: 09412689d63cb8fd117e9f06b7bb9f7a3fcc49b6
5
5
  SHA512:
6
- metadata.gz: 244790715df8b1b00ec551430edbdf7a1872d6a0588a73240ff6bbd83d29494a7afe751416907690cc1188368f2947b827c5c8a760158a355924485ff175efd6
7
- data.tar.gz: 3090fd312cedff09deb3600fe326a4c1b0973df6ec0b144ebfec38672d08ad6e2bc855268503a27f47c6c9c6868c1a6067541a242b8faf54868b82ff180939e3
6
+ metadata.gz: bf3287ebafd6d08a41a00e0871eea16e4fbd1e0f3931730177d3e0ed995fd6a4c67a32ddc3949554f5dd06859807f2e309ba1e00c236d51e2e7fbf4f885e1d28
7
+ data.tar.gz: 3c93f26946552ad11888666f775c3e2658d44b330bda2e4057f21dda5ab3d657dcbbafb19a5151423e5484da8065226a841671413dd223b2860c25e418ac688d
data/justiz.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["mikep@quake.net"]
11
11
  spec.description = %q{Extracts contact data.}
12
12
  spec.summary = %q{Extract contact data from http://www.justizadressen.nrw.de/}
13
- spec.homepage = ""
13
+ spec.homepage = "https://github.com/mike-park/justiz"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -6,6 +6,16 @@ module Justiz
6
6
  FIELDS = [:court, :location, :post, :phone, :fax, :justiz_id, :url, :email]
7
7
  attr_accessor :attributes, *FIELDS
8
8
 
9
+ # remove from court if present
10
+ COURT_NOISE = [
11
+ ' - Insolvenzabteilung -',
12
+ ' - Handels- und Genossenschaftsregister -',
13
+ ' - Vereinsregister -',
14
+ ' - Partnerschaftsregister -',
15
+ ' - Zwangsversteigerung/-verwaltung -',
16
+ ' - Familiengericht -'
17
+ ]
18
+
9
19
  def initialize(attributes = {})
10
20
  self.attributes = attributes
11
21
  end
@@ -16,11 +26,16 @@ module Justiz
16
26
  end
17
27
  end
18
28
 
29
+ def court=(name)
30
+ @court = remove_court_noise(name)
31
+ end
32
+
19
33
  def id
20
34
  # too many duplicates
21
35
  #[court, justiz_id].compact.join("")
22
- # currently no duplicates
23
- [court, email].compact.join("")
36
+ # with noise removed also too many duplicates
37
+ #[court, email].compact.join("")
38
+ digest
24
39
  end
25
40
 
26
41
  def location_address
@@ -38,5 +53,17 @@ module Justiz
38
53
  end
39
54
  Digest.hexencode(sha256.digest)
40
55
  end
56
+
57
+ def to_s
58
+ inspect
59
+ end
60
+
61
+ private
62
+
63
+ def remove_court_noise(court)
64
+ court = court.dup
65
+ COURT_NOISE.each { |noise| court.sub!(/#{noise}$/, '') }
66
+ court
67
+ end
41
68
  end
42
69
  end
@@ -11,9 +11,10 @@ module Justiz
11
11
  end
12
12
 
13
13
  def contacts
14
- states.keys.map do |state|
14
+ contacts = states.keys.map do |state|
15
15
  contacts_for(state)
16
16
  end.flatten.compact
17
+ uniq_contacts(contacts)
17
18
  end
18
19
 
19
20
  def contacts_for(state)
@@ -21,17 +22,23 @@ module Justiz
21
22
  return page.contacts unless page.limit_warning?
22
23
 
23
24
  # do each type separately hoping to avoid limit warning
24
- court_types.keys.map do |court_type|
25
+ contacts = court_types.keys.map do |court_type|
25
26
  contacts_of_type(court_type, state)
26
- end.flatten.compact.uniq
27
+ end.flatten.compact
28
+ uniq_contacts(contacts)
27
29
  end
28
30
 
29
31
  def contacts_of_type(type, state)
30
- load_page(type, state, with_warning: true).contacts
32
+ contacts = load_page(type, state, with_warning: true).contacts
33
+ uniq_contacts(contacts)
31
34
  end
32
35
 
33
36
  private
34
37
 
38
+ def uniq_contacts(contacts)
39
+ contacts.uniq {|contact| contact.digest }
40
+ end
41
+
35
42
  def home_page
36
43
  @home_page ||= Page.new(agent.get('http://www.justizadressen.nrw.de/og.php?MD=nrw'))
37
44
  end
@@ -69,7 +76,7 @@ module Justiz
69
76
  end
70
77
 
71
78
  def contacts
72
- @contacts ||= parse_contacts
79
+ @contacts ||= parse_contacts.uniq
73
80
  end
74
81
 
75
82
  def parse_contacts
@@ -1,3 +1,3 @@
1
1
  module Justiz
2
- VERSION = "0.1.3"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -34,4 +34,16 @@ describe Justiz::Contact do
34
34
  digest = "76ef09d0c7d0078015df7a948cf0352c00f6451dab354389b21895a50d89a4a8"
35
35
  expect(contact.digest).to eq(digest)
36
36
  end
37
+
38
+ it "should remove - xxx - from court" do
39
+ court = "Something#{Justiz::Contact::COURT_NOISE.first}"
40
+ contact = Justiz::Contact.new(court: court)
41
+ expect(contact.court).to eq('Something')
42
+ end
43
+
44
+ it "should not remove - xxx - from court" do
45
+ court = 'Something - Not be removed -'
46
+ contact = Justiz::Contact.new(court: court)
47
+ expect(contact.court).to eq(court)
48
+ end
37
49
  end
@@ -73,7 +73,7 @@ describe Justiz::Scraper::Courts do
73
73
  VCR.use_cassette 'courts/all_nrw' do
74
74
  contacts = subject.contacts_for('NRW')
75
75
  #ap contacts
76
- expect(contacts.count).to eq(513)
76
+ expect(contacts.count).to eq(377)
77
77
  end
78
78
  end
79
79
  end
@@ -83,16 +83,16 @@ describe Justiz::Scraper::Courts do
83
83
  VCR.use_cassette 'courts/all_all' do
84
84
  original = {
85
85
  "BRD" => "Bundesgerichte/-behörden 12",
86
- "BW" => "Baden-Württemberg 430",
87
- "BAY" => "Bayern 348",
86
+ "BW" => "Baden-Württemberg 284",
87
+ "BAY" => "Bayern 263",
88
88
  "B" => "Berlin 38",
89
89
  "BRA" => "Brandenburg 64",
90
90
  "BRE" => "Bremen 19",
91
91
  "HH" => "Hamburg 35",
92
92
  "HES" => "Hessen 115",
93
93
  "MV" => "Mecklenburg-Vorpommern 55",
94
- "NS" => "Niedersachsen 305",
95
- "NRW" => "Nordrhein-Westfalen 513",
94
+ "NS" => "Niedersachsen 262",
95
+ "NRW" => "Nordrhein-Westfalen 377",
96
96
  "RPF" => "Rheinland-Pfalz 101",
97
97
  "SAA" => "Saarland 32",
98
98
  "SAC" => "Sachsen 79",
@@ -108,7 +108,7 @@ describe Justiz::Scraper::Courts do
108
108
  total += count
109
109
  end
110
110
  expect(states).to eq(original)
111
- expect(total).to eq(2336)
111
+ expect(total).to eq(1926)
112
112
  end
113
113
  end
114
114
  end
@@ -200,6 +200,13 @@ describe Justiz::Scraper::Courts do
200
200
  expect(contact.post_address).to be_a(Justiz::Address)
201
201
  end
202
202
  end
203
+
204
+ it "should rename court" do
205
+ VCR.use_cassette 'courts/zvg_nrw' do
206
+ contact = subject.contacts_of_type('ZVG', 'NRW').first
207
+ expect(contact.court).to_not match(/Zwangsversteigerung/)
208
+ end
209
+ end
203
210
  end
204
211
  end
205
212