pupa 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.travis.yml +5 -0
- data/.yardopts +4 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/README.md +52 -0
- data/Rakefile +37 -0
- data/USAGE +1 -0
- data/lib/pupa/errors.rb +30 -0
- data/lib/pupa/logger.rb +37 -0
- data/lib/pupa/models/base.rb +190 -0
- data/lib/pupa/models/concerns/contactable.rb +34 -0
- data/lib/pupa/models/concerns/identifiable.rb +26 -0
- data/lib/pupa/models/concerns/linkable.rb +26 -0
- data/lib/pupa/models/concerns/nameable.rb +34 -0
- data/lib/pupa/models/concerns/sourceable.rb +26 -0
- data/lib/pupa/models/concerns/timestamps.rb +22 -0
- data/lib/pupa/models/contact_detail_list.rb +28 -0
- data/lib/pupa/models/membership.rb +37 -0
- data/lib/pupa/models/organization.rb +40 -0
- data/lib/pupa/models/person.rb +35 -0
- data/lib/pupa/models/post.rb +28 -0
- data/lib/pupa/processor/client.rb +42 -0
- data/lib/pupa/processor/dependency_graph.rb +18 -0
- data/lib/pupa/processor/helper.rb +15 -0
- data/lib/pupa/processor/middleware/logger.rb +37 -0
- data/lib/pupa/processor/middleware/parse_html.rb +16 -0
- data/lib/pupa/processor/persistence.rb +80 -0
- data/lib/pupa/processor/yielder.rb +50 -0
- data/lib/pupa/processor.rb +351 -0
- data/lib/pupa/refinements/faraday_middleware.rb +32 -0
- data/lib/pupa/refinements/json-schema.rb +36 -0
- data/lib/pupa/runner.rb +185 -0
- data/lib/pupa/version.rb +3 -0
- data/lib/pupa.rb +31 -0
- data/pupa.gemspec +34 -0
- data/schemas/popolo/contact_detail.json +44 -0
- data/schemas/popolo/identifier.json +18 -0
- data/schemas/popolo/link.json +19 -0
- data/schemas/popolo/membership.json +86 -0
- data/schemas/popolo/organization.json +104 -0
- data/schemas/popolo/other_name.json +28 -0
- data/schemas/popolo/person.json +130 -0
- data/schemas/popolo/post.json +78 -0
- data/spec/cassettes/31ac91ccad069eefc07d96cfbe66fa66c1b41fcf.yml +56 -0
- data/spec/cassettes/4ff54d737afb5d693653752d7bf234a405a80172.yml +48 -0
- data/spec/cassettes/898049a22e6ca51dfa2510d9e0e0207a5c396524.yml +54 -0
- data/spec/cassettes/ce69ff734ce852d2bfaa482bbf55d7ffb4762e87.yml +26 -0
- data/spec/cassettes/da629b01e0836deda8a5540a4e6a08783dd7aef9.yml +46 -0
- data/spec/cassettes/e398f35bea86b3d4c87a6934bae1eb7fca8744f9.yml +26 -0
- data/spec/logger_spec.rb +4 -0
- data/spec/models/base_spec.rb +194 -0
- data/spec/models/concerns/contactable_spec.rb +37 -0
- data/spec/models/concerns/identifiable_spec.rb +25 -0
- data/spec/models/concerns/linkable_spec.rb +25 -0
- data/spec/models/concerns/nameable_spec.rb +25 -0
- data/spec/models/concerns/sourceable_spec.rb +25 -0
- data/spec/models/concerns/timestamps_spec.rb +32 -0
- data/spec/models/contact_detail_list_spec.rb +44 -0
- data/spec/models/membership_spec.rb +30 -0
- data/spec/models/organization_spec.rb +24 -0
- data/spec/models/person_spec.rb +24 -0
- data/spec/models/post_spec.rb +19 -0
- data/spec/processor/client_spec.rb +4 -0
- data/spec/processor/dependency_graph_spec.rb +4 -0
- data/spec/processor/helper_spec.rb +4 -0
- data/spec/processor/middleware/logger_spec.rb +87 -0
- data/spec/processor/middleware/parse_html_spec.rb +92 -0
- data/spec/processor/persistence_spec.rb +41 -0
- data/spec/processor/yielder_spec.rb +55 -0
- data/spec/processor_spec.rb +268 -0
- data/spec/runner_spec.rb +85 -0
- data/spec/spec_helper.rb +17 -0
- metadata +342 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 97c9d9fa0543551ea5408129269350f772f5622b
|
4
|
+
data.tar.gz: c5784cad0628a12a1b6bd7f1b88e023da0c93b21
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9fb0c239b6b7d8f3b3c9e264ca83051a6b7e7156578543f99d29034c8385da32055e0b03ab685d740afc46a6cea1a0c18e2ec92a65605575153c1da30a2bd5c9
|
7
|
+
data.tar.gz: 49dd4745dcb8ad7db1bd54ca9d5aaf3c2a640d16c62494699fbf3ab601034055ba77d8a800e1b632efe4638d74fb4d92aa99525bc8286e63591bb120a28990a3
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/.yardopts
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Open North Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# Pupa.rb: A Data Scraping Framework
|
2
|
+
|
3
|
+
[![Build Status](https://secure.travis-ci.org/opennorth/pupa-ruby.png)](http://travis-ci.org/opennorth/pupa-ruby)
|
4
|
+
[![Dependency Status](https://gemnasium.com/opennorth/pupa-ruby.png)](https://gemnasium.com/opennorth/pupa-ruby)
|
5
|
+
[![Coverage Status](https://coveralls.io/repos/opennorth/pupa-ruby/badge.png?branch=master)](https://coveralls.io/r/opennorth/pupa-ruby)
|
6
|
+
[![Code Climate](https://codeclimate.com/github/opennorth/pupa-ruby.png)](https://codeclimate.com/github/opennorth/pupa-ruby)
|
7
|
+
|
8
|
+
Pupa.rb is a Ruby 2.0 fork of Sunlight Labs' [Pupa](https://github.com/opencivicdata/pupa). It implements an Extract, Transform and Load (ETL) process to scrape data from online sources, transform it, and write it to a database.
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
You can use Pupa.rb to author scrapers that create people, organizations, memberships and posts according to the [Popolo](http://popoloproject.com/) open government data specification. If you need to scrape other types of data, you can also use your own models with Pupa.rb.
|
13
|
+
|
14
|
+
The [cat.rb](http://opennorth.github.io/pupa-ruby/docs/cat.html) example shows you how to:
|
15
|
+
|
16
|
+
* write a simple Cat class that is compatible with Pupa.rb
|
17
|
+
* use mixins to add Popolo properties to your class
|
18
|
+
* write a processor to scrape Cat objects from the Internet
|
19
|
+
* register a scraping task with Pupa.rb
|
20
|
+
* run the processor to save the Cat objects to MongoDB
|
21
|
+
|
22
|
+
The [bill.rb](http://opennorth.github.io/pupa-ruby/docs/bill.html) example shows you how to:
|
23
|
+
|
24
|
+
* create relations between objects
|
25
|
+
* relate two objects, even if you do not know the ID of one object
|
26
|
+
* write separate scraping tasks for different types of data
|
27
|
+
* run each scraping task separately
|
28
|
+
|
29
|
+
The [legislator.rb](http://opennorth.github.io/pupa-ruby/docs/legislator.html) example shows you how to:
|
30
|
+
|
31
|
+
* use a different HTTP client than the default [Faraday](https://github.com/lostisland/faraday)
|
32
|
+
* select a scraping method according to criteria like the legislative term
|
33
|
+
* pass selection criteria to the processor before running scraping tasks
|
34
|
+
|
35
|
+
The [organization.rb](http://opennorth.github.io/pupa-ruby/docs/organization.html) example shows you how to:
|
36
|
+
|
37
|
+
* register a transformation task with Pupa.rb
|
38
|
+
* run the processor's transformation task
|
39
|
+
|
40
|
+
### Scraping method selection
|
41
|
+
|
42
|
+
1. For simple processing, your processor class need only define a single `scrape_objects` method, which will perform all scraping. See [cat.rb](http://opennorth.github.io/pupa-ruby/docs/cat.html) for an example.
|
43
|
+
|
44
|
+
1. If you scrape many types of data from the same source, you may want to split the scraping into separate tasks according to the type of data being scraped. See [bill.rb](http://opennorth.github.io/pupa-ruby/docs/bill.html) for an example.
|
45
|
+
|
46
|
+
1. You may want more control over the method used to perform a scraping task. For example, a legislature may publish legislators before 1997 in one format and legislators after 1997 in another format. In this case, you may want to select the method used to scrape legislators according to the year. See [legislator.rb](http://opennorth.github.io/pupa-ruby/docs/legislator.html).
|
47
|
+
|
48
|
+
## Bugs? Questions?
|
49
|
+
|
50
|
+
This project's main repository is on GitHub: [http://github.com/opennorth/pupa-ruby](http://github.com/opennorth/pupa-ruby), where your contributions, forks, bug reports, feature requests, and feedback are greatly welcomed.
|
51
|
+
|
52
|
+
Copyright (c) 2013 Open North Inc., released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rspec/core/rake_task'
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
6
|
+
|
7
|
+
task :default => :spec
|
8
|
+
|
9
|
+
begin
|
10
|
+
require 'yard'
|
11
|
+
YARD::Rake::YardocTask.new
|
12
|
+
rescue LoadError
|
13
|
+
task :yard do
|
14
|
+
abort 'YARD is not available. In order to run yard, you must: gem install yard'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
namespace :popolo do
|
19
|
+
desc 'Update Popolo schemas'
|
20
|
+
task :schemas do
|
21
|
+
require 'base64'
|
22
|
+
|
23
|
+
require 'octokit'
|
24
|
+
|
25
|
+
Octokit.contents('opennorth/popolo-spec', path: 'schemas', ref: 'gh-pages').each do |file|
|
26
|
+
response = Octokit.contents('opennorth/popolo-spec', path: file.path, ref: 'gh-pages')
|
27
|
+
if response.encoding == 'base64'
|
28
|
+
content = Base64.decode64(response.content)
|
29
|
+
else
|
30
|
+
raise "Can't handle #{response.encoding} encoding"
|
31
|
+
end
|
32
|
+
File.open(File.expand_path(File.join('schemas', 'popolo', file.name), __dir__), 'w') do |f|
|
33
|
+
f.write(content)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/USAGE
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
See README.md for full usage details.
|
data/lib/pupa/errors.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
module Pupa
|
2
|
+
module Errors
|
3
|
+
# An abstract class from which all Pupa errors inherit.
|
4
|
+
class Error < StandardError; end
|
5
|
+
|
6
|
+
# This error is raised when saving an object to a database if a foreign key
|
7
|
+
# cannot be resolved.
|
8
|
+
class MissingDatabaseIdError < Error; end
|
9
|
+
|
10
|
+
# This error is raised when dumping scraped objects to disk if two of those
|
11
|
+
# objects share an ID.
|
12
|
+
class DuplicateObjectIdError < Error; end
|
13
|
+
|
14
|
+
# This error is raised when attempting to get or set a property that does
|
15
|
+
# not exist in an object.
|
16
|
+
class MissingAttributeError < Error; end
|
17
|
+
|
18
|
+
# This error is raised when saving an object to a database if the object
|
19
|
+
# matches more than one document in the database.
|
20
|
+
class TooManyMatches < Error; end
|
21
|
+
|
22
|
+
# This error is raised if an object's foreign keys or foreign objects cannot
|
23
|
+
# be resolved.
|
24
|
+
class UnprocessableEntity < Error; end
|
25
|
+
|
26
|
+
# This error is raised if duplicate objects were inadvertently saved to the
|
27
|
+
# database.
|
28
|
+
class DuplicateDocumentError < Error; end
|
29
|
+
end
|
30
|
+
end
|
data/lib/pupa/logger.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
require 'colored'
|
4
|
+
|
5
|
+
module Pupa
|
6
|
+
# A logger factory.
|
7
|
+
class Logger
|
8
|
+
# Returns a configured logger.
|
9
|
+
#
|
10
|
+
# @param [String] progname the name of the program performing the logging
|
11
|
+
# @param [String] level the log level, one of "DEBUG", "INFO", "WARN",
|
12
|
+
# "ERROR", "FATAL" or "UNKNOWN"
|
13
|
+
# @param [String,IO] logdev the log device
|
14
|
+
# @return [Logger] a configured logger
|
15
|
+
def self.new(progname, level: 'INFO', logdev: STDOUT)
|
16
|
+
logger = ::Logger.new(logdev)
|
17
|
+
logger.level = ::Logger.const_get(level)
|
18
|
+
logger.progname = progname
|
19
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
20
|
+
message = "#{datetime.strftime('%T')} #{severity} #{progname}: #{msg}\n"
|
21
|
+
case severity
|
22
|
+
when 'DEBUG'
|
23
|
+
message.magenta
|
24
|
+
when 'INFO'
|
25
|
+
message.white
|
26
|
+
when 'WARN'
|
27
|
+
message.yellow
|
28
|
+
when 'ERROR'
|
29
|
+
message.red
|
30
|
+
when 'FATAL'
|
31
|
+
message.bold.red_on_white
|
32
|
+
end
|
33
|
+
end
|
34
|
+
logger
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'securerandom'
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
require 'active_support/callbacks'
|
6
|
+
require 'active_support/core_ext/hash/except'
|
7
|
+
require 'active_support/core_ext/hash/slice'
|
8
|
+
require 'active_support/core_ext/object/try'
|
9
|
+
require 'json-schema'
|
10
|
+
|
11
|
+
require 'pupa/refinements/json-schema'
|
12
|
+
|
13
|
+
JSON::Validator.cache_schemas = true
|
14
|
+
|
15
|
+
module Pupa
|
16
|
+
# The base class from which other primary Popolo classes inherit.
|
17
|
+
class Base
|
18
|
+
include ActiveSupport::Callbacks
|
19
|
+
define_callbacks :create, :save
|
20
|
+
|
21
|
+
class_attribute :json_schema
|
22
|
+
class_attribute :properties
|
23
|
+
class_attribute :foreign_keys
|
24
|
+
class_attribute :foreign_objects
|
25
|
+
|
26
|
+
self.properties = Set.new
|
27
|
+
self.foreign_keys = Set.new
|
28
|
+
self.foreign_objects = Set.new
|
29
|
+
|
30
|
+
class << self
|
31
|
+
# Declare the class' properties.
|
32
|
+
#
|
33
|
+
# When converting an object to a hash using the `to_h` method, only the
|
34
|
+
# properties declared with `attr_accessor` will be included in the hash.
|
35
|
+
#
|
36
|
+
# @param [Array<Symbol>] the class' properties
|
37
|
+
def attr_accessor(*attributes)
|
38
|
+
self.properties += attributes # use assignment to not overwrite the parent's attribute
|
39
|
+
super
|
40
|
+
end
|
41
|
+
|
42
|
+
# Declare the class' foreign keys.
|
43
|
+
#
|
44
|
+
# When importing scraped objects, the foreign keys will be used to draw a
|
45
|
+
# dependency graph and derive an evaluation order.
|
46
|
+
#
|
47
|
+
# @param [Array<Symbol>] the class' foreign keys
|
48
|
+
def foreign_key(*attributes)
|
49
|
+
self.foreign_keys += attributes
|
50
|
+
end
|
51
|
+
|
52
|
+
# Declare the class' foreign objects.
|
53
|
+
#
|
54
|
+
# If some cases, you may not know the ID of an existing foreign object,
|
55
|
+
# but you may have other information to identify the object. In that case,
|
56
|
+
# put the information you have in a property named after the foreign key
|
57
|
+
# without the `_id` suffix: for example, `person` for `person_id`. Before
|
58
|
+
# saving the object to the database, Pupa.rb will use this information to
|
59
|
+
# identify the foreign object.
|
60
|
+
#
|
61
|
+
# @param [Array<Symbol>] the class' foreign objects
|
62
|
+
def foreign_object(*attributes)
|
63
|
+
self.foreign_objects += attributes
|
64
|
+
end
|
65
|
+
|
66
|
+
# Sets the class' schema.
|
67
|
+
#
|
68
|
+
# @param [Hash,String] value a hash or a relative or absolute path
|
69
|
+
def schema=(value)
|
70
|
+
self.json_schema = if Hash === value
|
71
|
+
value
|
72
|
+
elsif Pathname.new(value).absolute?
|
73
|
+
value
|
74
|
+
else
|
75
|
+
File.expand_path(File.join('..', '..', '..', 'schemas', "#{value}.json"), __dir__)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
attr_accessor :_id, :_type, :extras
|
81
|
+
|
82
|
+
# @param [Hash] properties the object's properties
|
83
|
+
def initialize(properties = {})
|
84
|
+
@_type = self.class.to_s.underscore
|
85
|
+
@_id = SecureRandom.uuid
|
86
|
+
@extras = {}
|
87
|
+
|
88
|
+
properties.each do |key,value|
|
89
|
+
self[key] = value
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns the value of a property.
|
94
|
+
#
|
95
|
+
# @param [Symbol] property a property name
|
96
|
+
# @raises [Pupa::Errors::MissingAttributeError] if class is missing the property
|
97
|
+
def [](property)
|
98
|
+
if properties.include?(property.to_sym)
|
99
|
+
send(property)
|
100
|
+
else
|
101
|
+
raise Errors::MissingAttributeError, "missing attribute: #{property}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Sets the value of a property.
|
106
|
+
#
|
107
|
+
# @param [Symbol] property a property name
|
108
|
+
# @param value a value
|
109
|
+
# @raises [Pupa::Errors::MissingAttributeError] if class is missing the property
|
110
|
+
def []=(property, value)
|
111
|
+
if properties.include?(property.to_sym)
|
112
|
+
send("#{property}=", value)
|
113
|
+
else
|
114
|
+
raise Errors::MissingAttributeError, "missing attribute: #{property}"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Sets the object's ID.
|
119
|
+
#
|
120
|
+
# @param [String,Moped::BSON::ObjectId] id an ID
|
121
|
+
def _id=(id)
|
122
|
+
@_id = id.to_s # in case of Moped::BSON::ObjectId
|
123
|
+
end
|
124
|
+
|
125
|
+
# Adds a key-value pair to the object.
|
126
|
+
#
|
127
|
+
# @param [Symbol] key a key
|
128
|
+
# @param value a value
|
129
|
+
def add_extra(key, value)
|
130
|
+
@extras[key] = value
|
131
|
+
end
|
132
|
+
|
133
|
+
# Returns a subset of the object's properties that should uniquely identify
|
134
|
+
# the object.
|
135
|
+
#
|
136
|
+
# @return [Hash] a subset of the object's properties
|
137
|
+
def fingerprint
|
138
|
+
to_h.except(:_id)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Returns the object's foreign keys and foreign objects.
|
142
|
+
#
|
143
|
+
# @return [Hash] the object's foreign keys and foreign objects
|
144
|
+
def foreign_properties
|
145
|
+
to_h(include_foreign_objects: true).slice(*foreign_keys + foreign_objects)
|
146
|
+
end
|
147
|
+
|
148
|
+
# Validates the object against the schema.
|
149
|
+
#
|
150
|
+
# @raises [JSON::Schema::ValidationError] if the object is invalid
|
151
|
+
def validate!
|
152
|
+
if self.class.json_schema
|
153
|
+
result = {}
|
154
|
+
to_h.each do |key,value|
|
155
|
+
result[key.to_s] = value
|
156
|
+
end
|
157
|
+
# JSON::Validator#initialize_data runs fastest if given a hash.
|
158
|
+
JSON::Validator.validate!(self.class.json_schema, result)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns the object as a hash.
|
163
|
+
#
|
164
|
+
# @param [Boolean] include_foreign_objects whether to include foreign objects
|
165
|
+
# @return [Hash] the object as a hash
|
166
|
+
def to_h(include_foreign_objects: false)
|
167
|
+
{}.tap do |hash|
|
168
|
+
(include_foreign_objects ? properties : properties - foreign_objects).each do |property|
|
169
|
+
value = self[property]
|
170
|
+
if value == false || value.present?
|
171
|
+
hash[property] = value
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Returns whether two objects are identical, ignoring any differences in
|
178
|
+
# the objects' machine IDs.
|
179
|
+
#
|
180
|
+
# @param [Object] other another object
|
181
|
+
# @return [Boolean] whether the objects are identical
|
182
|
+
def ==(other)
|
183
|
+
a = to_h
|
184
|
+
b = other.to_h
|
185
|
+
a.delete(:_id)
|
186
|
+
b.delete(:_id)
|
187
|
+
a == b
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Pupa
|
2
|
+
module Concerns
|
3
|
+
# Adds the Popolo `contact_details` property to a model.
|
4
|
+
module Contactable
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
included do
|
8
|
+
attr_reader :contact_details
|
9
|
+
end
|
10
|
+
|
11
|
+
# Sets the contact details.
|
12
|
+
#
|
13
|
+
# @param [Array] contact_details a list of contact details
|
14
|
+
def contact_details=(contact_details)
|
15
|
+
@contact_details = ContactDetailList.new(contact_details)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Adds a contact detail.
|
19
|
+
#
|
20
|
+
# @param [String] type a type of medium, e.g. "fax" or "email"
|
21
|
+
# @param [String] value a value, e.g. a phone number or email address
|
22
|
+
# @param [String] note a note, e.g. for grouping contact details by physical location
|
23
|
+
def add_contact_detail(type, value, note: nil)
|
24
|
+
data = {type: type, value: value}
|
25
|
+
if note
|
26
|
+
data[:note] = note
|
27
|
+
end
|
28
|
+
if type && value
|
29
|
+
(@contact_details ||= ContactDetailList.new) << data
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Pupa
|
2
|
+
module Concerns
|
3
|
+
# Adds the Popolo `identifiers` property to a model.
|
4
|
+
module Identifiable
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
included do
|
8
|
+
attr_accessor :identifiers
|
9
|
+
end
|
10
|
+
|
11
|
+
# Adds an issued identifier.
|
12
|
+
#
|
13
|
+
# @param [String] identifier an issued identifier, e.g. a DUNS number
|
14
|
+
# @param [String] scheme an identifier scheme, e.g. DUNS
|
15
|
+
def add_identifier(identifier, scheme: nil)
|
16
|
+
data = {identifier: identifier}
|
17
|
+
if scheme
|
18
|
+
data[:scheme] = scheme
|
19
|
+
end
|
20
|
+
if identifier
|
21
|
+
(@identifiers ||= []) << data
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Pupa
|
2
|
+
module Concerns
|
3
|
+
# Adds the Popolo `links` property to a model.
|
4
|
+
module Linkable
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
included do
|
8
|
+
attr_accessor :links
|
9
|
+
end
|
10
|
+
|
11
|
+
# Adds a URL.
|
12
|
+
#
|
13
|
+
# @param [String] url a URL
|
14
|
+
# @param [String] note a note, e.g. "Wikipedia page"
|
15
|
+
def add_link(url, note: nil)
|
16
|
+
data = {url: url}
|
17
|
+
if note
|
18
|
+
data[:note] = note
|
19
|
+
end
|
20
|
+
if url
|
21
|
+
(@links ||= []) << data
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Pupa
|
2
|
+
module Concerns
|
3
|
+
# Adds the Popolo `other_names` property to a model.
|
4
|
+
module Nameable
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
included do
|
8
|
+
attr_accessor :other_names
|
9
|
+
end
|
10
|
+
|
11
|
+
# Adds an alternate or former name.
|
12
|
+
#
|
13
|
+
# @param [String] name an alternate or former name
|
14
|
+
# @param [Date,Time] start_date the date on which the name was adopted
|
15
|
+
# @param [Date,Time] end_date the date on which the name was abandoned
|
16
|
+
# @param [String] note a note, e.g. "Birth name"
|
17
|
+
def add_name(name, start_date: nil, end_date: nil, note: nil)
|
18
|
+
data = {name: name}
|
19
|
+
if start_date
|
20
|
+
data[:start_date] = start_date
|
21
|
+
end
|
22
|
+
if end_date
|
23
|
+
data[:end_date] = end_date
|
24
|
+
end
|
25
|
+
if note
|
26
|
+
data[:note] = note
|
27
|
+
end
|
28
|
+
if name
|
29
|
+
(@other_names ||= []) << data
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Pupa
|
2
|
+
module Concerns
|
3
|
+
# Adds the Popolo `sources` property to a model.
|
4
|
+
module Sourceable
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
included do
|
8
|
+
attr_accessor :sources
|
9
|
+
end
|
10
|
+
|
11
|
+
# Adds a source to the object.
|
12
|
+
#
|
13
|
+
# @param [String] url a URL
|
14
|
+
# @param [String] note a note
|
15
|
+
def add_source(url, note: nil)
|
16
|
+
data = {url: url}
|
17
|
+
if note
|
18
|
+
data[:note] = note
|
19
|
+
end
|
20
|
+
if url
|
21
|
+
(@sources ||= []) << data
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Pupa
|
2
|
+
module Concerns
|
3
|
+
# Adds the Popolo `created_at` and `updated_at` properties to a model. The
|
4
|
+
# `created_at` and `updated_at` properties will automatically be set when
|
5
|
+
# the object is inserted into or updated in the database.
|
6
|
+
module Timestamps
|
7
|
+
extend ActiveSupport::Concern
|
8
|
+
|
9
|
+
included do
|
10
|
+
attr_accessor :created_at, :updated_at
|
11
|
+
|
12
|
+
set_callback(:create, :before) do |object|
|
13
|
+
object.created_at = Time.now.utc
|
14
|
+
end
|
15
|
+
|
16
|
+
set_callback(:save, :before) do |object|
|
17
|
+
object.updated_at = Time.now.utc
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Pupa
|
2
|
+
# A list of contact details.
|
3
|
+
class ContactDetailList < Array
|
4
|
+
# Returns the first postal address within the list of contact details.
|
5
|
+
#
|
6
|
+
# @return [String,nil] a postal address
|
7
|
+
def address
|
8
|
+
find_by_type('address')
|
9
|
+
end
|
10
|
+
|
11
|
+
# Returns the first email address within the list of contact details.
|
12
|
+
#
|
13
|
+
# @return [String,nil] an email address
|
14
|
+
def email
|
15
|
+
find_by_type('email')
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def find_by_type(type)
|
21
|
+
find{|contact_detail|
|
22
|
+
contact_detail[:type] == type
|
23
|
+
}.try{|contact_detail|
|
24
|
+
contact_detail[:value]
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Pupa
|
2
|
+
# A relationship between a person and an organization.
|
3
|
+
class Membership < Base
|
4
|
+
self.schema = 'popolo/membership'
|
5
|
+
|
6
|
+
include Concerns::Timestamps
|
7
|
+
include Concerns::Sourceable
|
8
|
+
include Concerns::Contactable
|
9
|
+
include Concerns::Linkable
|
10
|
+
|
11
|
+
attr_accessor :label, :role, :person_id, :organization_id, :post_id,
|
12
|
+
:start_date, :end_date
|
13
|
+
|
14
|
+
foreign_key :person_id, :organization_id, :post_id
|
15
|
+
|
16
|
+
# Returns the IDs of the parties to the relationship.
|
17
|
+
#
|
18
|
+
# @return [String] the IDs of the parties to the relationship
|
19
|
+
def to_s
|
20
|
+
"#{person_id} in #{organization_id}"
|
21
|
+
end
|
22
|
+
|
23
|
+
# A person's relationship with an organization must have a unique label,
|
24
|
+
# though it may share a label with a historical relationship. Similarly, a
|
25
|
+
# person may hold a post in an organization multiple times historically but
|
26
|
+
# not simultaneously.
|
27
|
+
def fingerprint
|
28
|
+
hash = super
|
29
|
+
{
|
30
|
+
'$or' => [
|
31
|
+
hash.slice(:label, :person_id, :organization_id, :end_date),
|
32
|
+
hash.slice(:person_id, :organization_id, :post_id, :end_date),
|
33
|
+
],
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|