justiz 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0d196d5dbc4f9c497f5a6340ddd7377cf416dff6
4
- data.tar.gz: 84a09a957eb241e514241357e52aee7376093ac3
3
+ metadata.gz: 2956b90485c84af57cd711724282b36debf63a3c
4
+ data.tar.gz: 09412689d63cb8fd117e9f06b7bb9f7a3fcc49b6
5
5
  SHA512:
6
- metadata.gz: 244790715df8b1b00ec551430edbdf7a1872d6a0588a73240ff6bbd83d29494a7afe751416907690cc1188368f2947b827c5c8a760158a355924485ff175efd6
7
- data.tar.gz: 3090fd312cedff09deb3600fe326a4c1b0973df6ec0b144ebfec38672d08ad6e2bc855268503a27f47c6c9c6868c1a6067541a242b8faf54868b82ff180939e3
6
+ metadata.gz: bf3287ebafd6d08a41a00e0871eea16e4fbd1e0f3931730177d3e0ed995fd6a4c67a32ddc3949554f5dd06859807f2e309ba1e00c236d51e2e7fbf4f885e1d28
7
+ data.tar.gz: 3c93f26946552ad11888666f775c3e2658d44b330bda2e4057f21dda5ab3d657dcbbafb19a5151423e5484da8065226a841671413dd223b2860c25e418ac688d
data/justiz.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["mikep@quake.net"]
11
11
  spec.description = %q{Extracts contact data.}
12
12
  spec.summary = %q{Extract contact data from http://www.justizadressen.nrw.de/}
13
- spec.homepage = ""
13
+ spec.homepage = "https://github.com/mike-park/justiz"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -6,6 +6,16 @@ module Justiz
6
6
  FIELDS = [:court, :location, :post, :phone, :fax, :justiz_id, :url, :email]
7
7
  attr_accessor :attributes, *FIELDS
8
8
 
9
+ # remove from court if present
10
+ COURT_NOISE = [
11
+ ' - Insolvenzabteilung -',
12
+ ' - Handels- und Genossenschaftsregister -',
13
+ ' - Vereinsregister -',
14
+ ' - Partnerschaftsregister -',
15
+ ' - Zwangsversteigerung/-verwaltung -',
16
+ ' - Familiengericht -'
17
+ ]
18
+
9
19
  def initialize(attributes = {})
10
20
  self.attributes = attributes
11
21
  end
@@ -16,11 +26,16 @@ module Justiz
16
26
  end
17
27
  end
18
28
 
29
+ def court=(name)
30
+ @court = remove_court_noise(name)
31
+ end
32
+
19
33
  def id
20
34
  # too many duplicates
21
35
  #[court, justiz_id].compact.join("")
22
- # currently no duplicates
23
- [court, email].compact.join("")
36
+ # with noise removed also too many duplicates
37
+ #[court, email].compact.join("")
38
+ digest
24
39
  end
25
40
 
26
41
  def location_address
@@ -38,5 +53,17 @@ module Justiz
38
53
  end
39
54
  Digest.hexencode(sha256.digest)
40
55
  end
56
+
57
+ def to_s
58
+ inspect
59
+ end
60
+
61
+ private
62
+
63
+ def remove_court_noise(court)
64
+ court = court.dup
65
+ COURT_NOISE.each { |noise| court.sub!(/#{noise}$/, '') }
66
+ court
67
+ end
41
68
  end
42
69
  end
@@ -11,9 +11,10 @@ module Justiz
11
11
  end
12
12
 
13
13
  def contacts
14
- states.keys.map do |state|
14
+ contacts = states.keys.map do |state|
15
15
  contacts_for(state)
16
16
  end.flatten.compact
17
+ uniq_contacts(contacts)
17
18
  end
18
19
 
19
20
  def contacts_for(state)
@@ -21,17 +22,23 @@ module Justiz
21
22
  return page.contacts unless page.limit_warning?
22
23
 
23
24
  # do each type separately hoping to avoid limit warning
24
- court_types.keys.map do |court_type|
25
+ contacts = court_types.keys.map do |court_type|
25
26
  contacts_of_type(court_type, state)
26
- end.flatten.compact.uniq
27
+ end.flatten.compact
28
+ uniq_contacts(contacts)
27
29
  end
28
30
 
29
31
  def contacts_of_type(type, state)
30
- load_page(type, state, with_warning: true).contacts
32
+ contacts = load_page(type, state, with_warning: true).contacts
33
+ uniq_contacts(contacts)
31
34
  end
32
35
 
33
36
  private
34
37
 
38
+ def uniq_contacts(contacts)
39
+ contacts.uniq {|contact| contact.digest }
40
+ end
41
+
35
42
  def home_page
36
43
  @home_page ||= Page.new(agent.get('http://www.justizadressen.nrw.de/og.php?MD=nrw'))
37
44
  end
@@ -69,7 +76,7 @@ module Justiz
69
76
  end
70
77
 
71
78
  def contacts
72
- @contacts ||= parse_contacts
79
+ @contacts ||= parse_contacts.uniq
73
80
  end
74
81
 
75
82
  def parse_contacts
@@ -1,3 +1,3 @@
1
1
  module Justiz
2
- VERSION = "0.1.3"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -34,4 +34,16 @@ describe Justiz::Contact do
34
34
  digest = "76ef09d0c7d0078015df7a948cf0352c00f6451dab354389b21895a50d89a4a8"
35
35
  expect(contact.digest).to eq(digest)
36
36
  end
37
+
38
+ it "should remove - xxx - from court" do
39
+ court = "Something#{Justiz::Contact::COURT_NOISE.first}"
40
+ contact = Justiz::Contact.new(court: court)
41
+ expect(contact.court).to eq('Something')
42
+ end
43
+
44
+ it "should not remove - xxx - from court" do
45
+ court = 'Something - Not be removed -'
46
+ contact = Justiz::Contact.new(court: court)
47
+ expect(contact.court).to eq(court)
48
+ end
37
49
  end
@@ -73,7 +73,7 @@ describe Justiz::Scraper::Courts do
73
73
  VCR.use_cassette 'courts/all_nrw' do
74
74
  contacts = subject.contacts_for('NRW')
75
75
  #ap contacts
76
- expect(contacts.count).to eq(513)
76
+ expect(contacts.count).to eq(377)
77
77
  end
78
78
  end
79
79
  end
@@ -83,16 +83,16 @@ describe Justiz::Scraper::Courts do
83
83
  VCR.use_cassette 'courts/all_all' do
84
84
  original = {
85
85
  "BRD" => "Bundesgerichte/-behörden 12",
86
- "BW" => "Baden-Württemberg 430",
87
- "BAY" => "Bayern 348",
86
+ "BW" => "Baden-Württemberg 284",
87
+ "BAY" => "Bayern 263",
88
88
  "B" => "Berlin 38",
89
89
  "BRA" => "Brandenburg 64",
90
90
  "BRE" => "Bremen 19",
91
91
  "HH" => "Hamburg 35",
92
92
  "HES" => "Hessen 115",
93
93
  "MV" => "Mecklenburg-Vorpommern 55",
94
- "NS" => "Niedersachsen 305",
95
- "NRW" => "Nordrhein-Westfalen 513",
94
+ "NS" => "Niedersachsen 262",
95
+ "NRW" => "Nordrhein-Westfalen 377",
96
96
  "RPF" => "Rheinland-Pfalz 101",
97
97
  "SAA" => "Saarland 32",
98
98
  "SAC" => "Sachsen 79",
@@ -108,7 +108,7 @@ describe Justiz::Scraper::Courts do
108
108
  total += count
109
109
  end
110
110
  expect(states).to eq(original)
111
- expect(total).to eq(2336)
111
+ expect(total).to eq(1926)
112
112
  end
113
113
  end
114
114
  end
@@ -200,6 +200,13 @@ describe Justiz::Scraper::Courts do
200
200
  expect(contact.post_address).to be_a(Justiz::Address)
201
201
  end
202
202
  end
203
+
204
+ it "should rename court" do
205
+ VCR.use_cassette 'courts/zvg_nrw' do
206
+ contact = subject.contacts_of_type('ZVG', 'NRW').first
207
+ expect(contact.court).to_not match(/Zwangsversteigerung/)
208
+ end
209
+ end
203
210
  end
204
211
  end
205
212