pupa 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pupa/models/concerns/identifiable.rb +8 -1
- data/lib/pupa/models/contact_detail_list.rb +4 -2
- data/lib/pupa/models/identifier_list.rb +16 -0
- data/lib/pupa/processor.rb +5 -3
- data/lib/pupa/version.rb +1 -1
- data/lib/pupa.rb +1 -0
- data/spec/models/concerns/identifiable_spec.rb +7 -0
- data/spec/models/contact_detail_list_spec.rb +14 -0
- data/spec/models/identifier_list_spec.rb +26 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6deee32d18def9fda78da75c183a4bee5b67c00e
|
4
|
+
data.tar.gz: 5fd982a4e5445d3c25df91f161d7f41fbc6e5772
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 962075b24c71d2f46648c691b211619a32a96a9edf35bfcb68699c3cf98eeb93681c38c52a69e8ce99bf1bbe1723ec11ca5af33cc1489233cd66365eb6f00502
|
7
|
+
data.tar.gz: cbf8b00824dab5ae285bd225057ebd86a355722761f5c362d807184f8e43a384277cf26915c889108ea3d901643586c8666580ceac4355ea9601a8f06745d5a8
|
@@ -5,7 +5,14 @@ module Pupa
|
|
5
5
|
extend ActiveSupport::Concern
|
6
6
|
|
7
7
|
included do
|
8
|
-
|
8
|
+
attr_reader :identifiers
|
9
|
+
end
|
10
|
+
|
11
|
+
# Sets the identifiers.
|
12
|
+
#
|
13
|
+
# @param [Array] identifiers a list of identifiers
|
14
|
+
def identifiers=(identifiers)
|
15
|
+
@identifiers = IdentifierList.new(identifiers)
|
9
16
|
end
|
10
17
|
|
11
18
|
# Adds an issued identifier.
|
@@ -15,8 +15,10 @@ module Pupa
|
|
15
15
|
find_by_type('email')
|
16
16
|
end
|
17
17
|
|
18
|
-
|
19
|
-
|
18
|
+
# Returns the value of the first contact detail matching the type.
|
19
|
+
#
|
20
|
+
# @param [String] a type
|
21
|
+
# @return [String,nil] a value
|
20
22
|
def find_by_type(type)
|
21
23
|
find{|contact_detail|
|
22
24
|
contact_detail[:type] == type
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Pupa
|
2
|
+
# A list of identifiers.
|
3
|
+
class IdentifierList < Array
|
4
|
+
# Returns the first identifier matching the scheme.
|
5
|
+
#
|
6
|
+
# @param [String] scheme a scheme
|
7
|
+
# @return [String,nil] an identifier
|
8
|
+
def find_by_scheme(scheme)
|
9
|
+
find{|identifier|
|
10
|
+
identifier[:scheme] == scheme
|
11
|
+
}.try{|identifier|
|
12
|
+
identifier[:identifier]
|
13
|
+
}
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/pupa/processor.rb
CHANGED
@@ -17,7 +17,7 @@ module Pupa
|
|
17
17
|
class_attribute :tasks
|
18
18
|
self.tasks = []
|
19
19
|
|
20
|
-
attr_reader :report
|
20
|
+
attr_reader :report, :client, :options
|
21
21
|
|
22
22
|
def_delegators :@logger, :debug, :info, :warn, :error, :fatal
|
23
23
|
|
@@ -54,7 +54,7 @@ module Pupa
|
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
|
-
|
57
|
+
client.get(url, params).body
|
58
58
|
end
|
59
59
|
|
60
60
|
# Retrieves and parses a document with a POST request.
|
@@ -63,7 +63,7 @@ module Pupa
|
|
63
63
|
# @param [String,Hash] params query string parameters
|
64
64
|
# @return a parsed document
|
65
65
|
def post(url, params = {})
|
66
|
-
|
66
|
+
client.post(url, params).body
|
67
67
|
end
|
68
68
|
|
69
69
|
# Adds a scraping task to Pupa.rb.
|
@@ -277,6 +277,8 @@ module Pupa
|
|
277
277
|
# @return [Hash] a mapping from an object ID to the ID of its duplicate
|
278
278
|
def build_losers_to_winners_map(objects)
|
279
279
|
{}.tap do |map|
|
280
|
+
# We don't need to iterate on the last item in the hash, but skipping
|
281
|
+
# the last item is more effort than running the last item.
|
280
282
|
objects.each_with_index do |(id1,object1),index|
|
281
283
|
unless map.key?(id1) # Don't search for duplicates of duplicates.
|
282
284
|
objects.drop(index + 1).each do |id2,object2|
|
data/lib/pupa/version.rb
CHANGED
data/lib/pupa.rb
CHANGED
@@ -19,6 +19,7 @@ require 'pupa/models/concerns/timestamps'
|
|
19
19
|
|
20
20
|
require 'pupa/models/base'
|
21
21
|
require 'pupa/models/contact_detail_list'
|
22
|
+
require 'pupa/models/identifier_list'
|
22
23
|
require 'pupa/models/membership'
|
23
24
|
require 'pupa/models/organization'
|
24
25
|
require 'pupa/models/person'
|
@@ -11,6 +11,13 @@ describe Pupa::Concerns::Identifiable do
|
|
11
11
|
klass.new
|
12
12
|
end
|
13
13
|
|
14
|
+
describe '#identifiers=' do
|
15
|
+
it 'should use coerce to a IdentifierList' do
|
16
|
+
object.identifiers = [{identifier: '123456789', scheme: 'DUNS'}]
|
17
|
+
object.identifiers.should be_a(Pupa::IdentifierList)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
14
21
|
describe '#add_identifier' do
|
15
22
|
it 'should add an identifier' do
|
16
23
|
object.add_identifier('123456789', scheme: 'duns')
|
@@ -19,6 +19,10 @@ describe Pupa::ContactDetailList do
|
|
19
19
|
type: 'email',
|
20
20
|
value: 'second',
|
21
21
|
},
|
22
|
+
{
|
23
|
+
type: 'custom',
|
24
|
+
value: 'content',
|
25
|
+
},
|
22
26
|
])
|
23
27
|
end
|
24
28
|
|
@@ -41,4 +45,14 @@ describe Pupa::ContactDetailList do
|
|
41
45
|
Pupa::ContactDetailList.new.email.should == nil
|
42
46
|
end
|
43
47
|
end
|
48
|
+
|
49
|
+
describe '#find_by_type' do
|
50
|
+
it 'should return the value of the first contact detail matching the type' do
|
51
|
+
object.find_by_type('custom').should == 'content'
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'should return nil if no contact detail matches the type' do
|
55
|
+
Pupa::ContactDetailList.new.find_by_type('custom').should == nil
|
56
|
+
end
|
57
|
+
end
|
44
58
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
describe Pupa::IdentifierList do
|
4
|
+
let :object do
|
5
|
+
Pupa::IdentifierList.new([
|
6
|
+
{
|
7
|
+
identifier: '123456789',
|
8
|
+
scheme: 'DUNS',
|
9
|
+
},
|
10
|
+
{
|
11
|
+
identifier: 'US0123456789',
|
12
|
+
scheme: 'ISIN',
|
13
|
+
},
|
14
|
+
])
|
15
|
+
end
|
16
|
+
|
17
|
+
describe '#find_by_scheme' do
|
18
|
+
it 'should return the first identifier matching the scheme' do
|
19
|
+
object.find_by_scheme('ISIN').should == 'US0123456789'
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should return nil if no identifier matches the scheme' do
|
23
|
+
Pupa::IdentifierList.new.find_by_scheme('ISIN').should == nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pupa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Open North
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-09-
|
11
|
+
date: 2013-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -232,6 +232,7 @@ files:
|
|
232
232
|
- lib/pupa/models/concerns/sourceable.rb
|
233
233
|
- lib/pupa/models/concerns/timestamps.rb
|
234
234
|
- lib/pupa/models/contact_detail_list.rb
|
235
|
+
- lib/pupa/models/identifier_list.rb
|
235
236
|
- lib/pupa/models/membership.rb
|
236
237
|
- lib/pupa/models/organization.rb
|
237
238
|
- lib/pupa/models/person.rb
|
@@ -272,6 +273,7 @@ files:
|
|
272
273
|
- spec/models/concerns/sourceable_spec.rb
|
273
274
|
- spec/models/concerns/timestamps_spec.rb
|
274
275
|
- spec/models/contact_detail_list_spec.rb
|
276
|
+
- spec/models/identifier_list_spec.rb
|
275
277
|
- spec/models/membership_spec.rb
|
276
278
|
- spec/models/organization_spec.rb
|
277
279
|
- spec/models/person_spec.rb
|
@@ -326,6 +328,7 @@ test_files:
|
|
326
328
|
- spec/models/concerns/sourceable_spec.rb
|
327
329
|
- spec/models/concerns/timestamps_spec.rb
|
328
330
|
- spec/models/contact_detail_list_spec.rb
|
331
|
+
- spec/models/identifier_list_spec.rb
|
329
332
|
- spec/models/membership_spec.rb
|
330
333
|
- spec/models/organization_spec.rb
|
331
334
|
- spec/models/person_spec.rb
|