rialto-etl 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +2 -1
- data/.rubocop.yml +18 -0
- data/Gemfile +6 -2
- data/LICENSE +203 -0
- data/README.md +12 -1
- data/docs/CAP-organizations.md +258 -0
- data/lib/rialto/etl/configs/stanford_organizations.rb +9 -16
- data/lib/rialto/etl/extractors/abstract_stanford_extractor.rb +9 -1
- data/lib/rialto/etl/extractors/stanford_organizations.rb +1 -0
- data/lib/rialto/etl/extractors/stanford_researchers.rb +1 -0
- data/lib/rialto/etl/readers/stanford_organizations_json_reader.rb +36 -0
- data/lib/rialto/etl/transformers/stanford_organizations_to_vivo.rb +6 -2
- data/lib/rialto/etl/version.rb +1 -1
- data/lib/rialto/etl/writers/jsonld_writer.rb +40 -0
- data/lib/rialto/etl/writers/ntriples_writer.rb +32 -0
- data/mapping.md +43 -16
- data/rialto-etl.gemspec +3 -0
- metadata +49 -3
- data/lib/rialto/etl/stanford_organizations_json_reader.rb +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80d6ca761827682b6f4175965c61a24fe426fcd9
|
4
|
+
data.tar.gz: ed7045079818141034e35dc54e7b2bba1a2b20df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2bb3c2df22a94341bf814c308d212849f3f3077eafc7cef08f0de82ae9965669d9abc8ceba12fb73b145f5dd3a142110d50fd2bc9569f9b1af5c0747e28afb29
|
7
|
+
data.tar.gz: 8dd23de023248b05b2da377041cdb76a5130d7dabd2d186ac6ca6c946e16532343d3b7d38686b12124fff683423838af6ca9e309ed975ced4d5066821124c2ef
|
data/.gitignore
CHANGED
data/.rspec
CHANGED
data/.rubocop.yml
CHANGED
@@ -1,8 +1,26 @@
|
|
1
|
+
require: rubocop-rspec
|
1
2
|
inherit_from: .rubocop_todo.yml
|
2
3
|
|
3
4
|
AllCops:
|
4
5
|
TargetRubyVersion: 2.3
|
5
6
|
|
7
|
+
Metrics/BlockLength:
|
8
|
+
Exclude:
|
9
|
+
- spec/**/*.rb
|
10
|
+
|
11
|
+
RSpec/FilePath:
|
12
|
+
Enabled: false
|
13
|
+
|
14
|
+
RSpec/IteratedExpectation:
|
15
|
+
Exclude:
|
16
|
+
- spec/readers/*.rb
|
17
|
+
|
18
|
+
RSpec/NestedGroups:
|
19
|
+
Max: 4
|
20
|
+
|
21
|
+
RSpec/SubjectStub:
|
22
|
+
Enabled: false
|
23
|
+
|
6
24
|
Style/MixinUsage:
|
7
25
|
Exclude:
|
8
26
|
- lib/rialto/etl/configs/stanford_organizations.rb
|
data/Gemfile
CHANGED
@@ -5,5 +5,9 @@ source 'https://rubygems.org'
|
|
5
5
|
# Specify your gem's dependencies in rialto-etl.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
|
9
|
-
gem '
|
8
|
+
group :development, :test do
|
9
|
+
gem 'coveralls', require: false
|
10
|
+
gem 'pry' unless ENV['CI']
|
11
|
+
gem 'pry-byebug' unless ENV['CI']
|
12
|
+
gem 'simplecov', require: false
|
13
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
Apache License
|
2
|
+
Version 2.0, January 2004
|
3
|
+
http://www.apache.org/licenses/
|
4
|
+
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6
|
+
|
7
|
+
1. Definitions.
|
8
|
+
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
11
|
+
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13
|
+
the copyright owner that is granting the License.
|
14
|
+
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
16
|
+
other entities that control, are controlled by, or are under common
|
17
|
+
control with that entity. For the purposes of this definition,
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
19
|
+
direction or management of such entity, whether by contract or
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22
|
+
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24
|
+
exercising permissions granted by this License.
|
25
|
+
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
27
|
+
including but not limited to software source code, documentation
|
28
|
+
source, and configuration files.
|
29
|
+
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
31
|
+
transformation or translation of a Source form, including but
|
32
|
+
not limited to compiled object code, generated documentation,
|
33
|
+
and conversions to other media types.
|
34
|
+
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
36
|
+
Object form, made available under the License, as indicated by a
|
37
|
+
copyright notice that is included in or attached to the work
|
38
|
+
(an example is provided in the Appendix below).
|
39
|
+
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46
|
+
the Work and Derivative Works thereof.
|
47
|
+
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
49
|
+
the original version of the Work and any modifications or additions
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
61
|
+
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
64
|
+
subsequently incorporated within the Work.
|
65
|
+
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
72
|
+
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78
|
+
where such license applies only to those patent claims licensable
|
79
|
+
by such Contributor that are necessarily infringed by their
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
82
|
+
institute patent litigation against any entity (including a
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
85
|
+
or contributory patent infringement, then any patent licenses
|
86
|
+
granted to You under this License for that Work shall terminate
|
87
|
+
as of the date such litigation is filed.
|
88
|
+
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
91
|
+
modifications, and in Source or Object form, provided that You
|
92
|
+
meet the following conditions:
|
93
|
+
|
94
|
+
(a) You must give any other recipients of the Work or
|
95
|
+
Derivative Works a copy of this License; and
|
96
|
+
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
98
|
+
stating that You changed the files; and
|
99
|
+
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
102
|
+
attribution notices from the Source form of the Work,
|
103
|
+
excluding those notices that do not pertain to any part of
|
104
|
+
the Derivative Works; and
|
105
|
+
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
108
|
+
include a readable copy of the attribution notices contained
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
111
|
+
of the following places: within a NOTICE text file distributed
|
112
|
+
as part of the Derivative Works; within the Source form or
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
114
|
+
within a display generated by the Derivative Works, if and
|
115
|
+
wherever such third-party notices normally appear. The contents
|
116
|
+
of the NOTICE file are for informational purposes only and
|
117
|
+
do not modify the License. You may add Your own attribution
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
120
|
+
that such additional attribution notices cannot be construed
|
121
|
+
as modifying the License.
|
122
|
+
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
124
|
+
may provide additional or different license terms and conditions
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
128
|
+
the conditions stated in this License.
|
129
|
+
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
133
|
+
this License, without any additional terms or conditions.
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135
|
+
the terms of any separate license agreement you may have executed
|
136
|
+
with Licensor regarding such Contributions.
|
137
|
+
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
140
|
+
except as required for reasonable and customary use in describing the
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
142
|
+
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
152
|
+
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
158
|
+
incidental, or consequential damages of any character arising as a
|
159
|
+
result of this License or out of the use or inability to use the
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
162
|
+
other commercial damages or losses), even if such Contributor
|
163
|
+
has been advised of the possibility of such damages.
|
164
|
+
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168
|
+
or other liability obligations and/or rights consistent with this
|
169
|
+
License. However, in accepting such obligations, You may act only
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
174
|
+
of your accepting any such warranty or additional liability.
|
175
|
+
|
176
|
+
END OF TERMS AND CONDITIONS
|
177
|
+
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
179
|
+
|
180
|
+
To apply the Apache License to your work, attach the following
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "{}"
|
182
|
+
replaced with your own identifying information. (Don't include
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
184
|
+
comment syntax for the file format. We also recommend that a
|
185
|
+
file or class name and description of purpose be included on the
|
186
|
+
same "printed page" as the copyright notice for easier
|
187
|
+
identification within third-party archives.
|
188
|
+
|
189
|
+
Copyright 2017 Stanford University
|
190
|
+
Additional copyright may be held by others, as reflected in the commit log
|
191
|
+
|
192
|
+
|
193
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
194
|
+
you may not use this file except in compliance with the License.
|
195
|
+
You may obtain a copy of the License at
|
196
|
+
|
197
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
198
|
+
|
199
|
+
Unless required by applicable law or agreed to in writing, software
|
200
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
201
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
202
|
+
See the License for the specific language governing permissions and
|
203
|
+
limitations under the License.
|
data/README.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
# Rialto::Etl
|
2
2
|
|
3
|
-
[![
|
3
|
+
[![Gem](https://img.shields.io/gem/v/rialto-etl.svg)](https://rubygems.org/gems/rialto-etl)
|
4
|
+
[![Travis](https://img.shields.io/travis/sul-dlss-labs/rialto-etl.svg)](https://travis-ci.org/sul-dlss-labs/rialto-etl)
|
5
|
+
[![Code Climate](https://img.shields.io/codeclimate/maintainability/sul-dlss-labs/rialto-etl.svg)](https://codeclimate.com/github/sul-dlss-labs/rialto-etl/maintainability)
|
6
|
+
[![Coveralls github](https://img.shields.io/coveralls/github/sul-dlss-labs/rialto-etl.svg)](https://coveralls.io/github/sul-dlss-labs/rialto-etl?branch=master)
|
7
|
+
[![Documentation](https://inch-ci.org/github/sul-dlss-labs/rialto-etl.svg?branch=master)](https://inch-ci.org/github/sul-dlss-labs/rialto-etl)
|
8
|
+
[![API](http://img.shields.io/badge/API-docs-blue.svg)](http://rubydoc.info/gems/rialto-etl)
|
9
|
+
[![Apache 2.0 License](http://img.shields.io/badge/APACHE2-license-blue.svg)](./LICENSE)
|
4
10
|
|
5
11
|
Rialto::Etl is a set of ETL tools for RIALTO, Stanford University Libraries' research intelligence project
|
6
12
|
|
@@ -53,6 +59,11 @@ TBD
|
|
53
59
|
-n, --name NAME Name of the transformer to run (REQUIRED)
|
54
60
|
-i, --input-file FILENAME Name of file holding data to be transformed (REQUIRED)
|
55
61
|
|
62
|
+
## Documentation
|
63
|
+
|
64
|
+
* [Mapping / Mapping Target](./mapping.md)
|
65
|
+
* [CAP Organizations to VIVO Mapping](./docs/CAP-organizations.md)
|
66
|
+
|
56
67
|
## Development
|
57
68
|
|
58
69
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -0,0 +1,258 @@
|
|
1
|
+
# CAP Organizations to VIVO / RIALTO Stanford Organizations Mapping
|
2
|
+
|
3
|
+
This is mapping documentation for taking CAP API Organizations data (`http://api.stanford.edu/cap/v1/orgs/org-path-name`) and mapping them to our RIALTO model (based on VIVO-ISF Ontology) for `Organizations` (a subclass of `Agents`).
|
4
|
+
|
5
|
+
## Mapping
|
6
|
+
|
7
|
+
Reused Ontologies List (to be vetted):
|
8
|
+
- "bibo": "http://purl.org/ontology/bibo/"
|
9
|
+
- "c4o": "http://purl.org/spar/c4o/"
|
10
|
+
- "cito": "http://purl.org/spar/cito/"
|
11
|
+
- "dbpedia": "http://dbpedia.org/resource/"
|
12
|
+
- "dbo": "http://dbpedia.org/ontology/"
|
13
|
+
- "event": "http://purl.org/NET/c4dm/event.owl#"
|
14
|
+
- "fabio": "http://purl.org/spar/fabio/"
|
15
|
+
- "foaf": "http://xmlns.com/foaf/0.1/"
|
16
|
+
- "geo": "http://aims.fao.org/aos/geopolitical.owl#"
|
17
|
+
- "obo": "http://purl.obolibrary.org/obo/"
|
18
|
+
- "ocrer": "http://purl.org/net/OCRe/research.owl#"
|
19
|
+
- "ocresd": "http://purl.org/net/OCRe/study_design.owl#"
|
20
|
+
- "owl": "http://www.w3.org/2002/07/owl#"
|
21
|
+
- "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
22
|
+
- "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
|
23
|
+
- "scires": "http://vivoweb.org/ontology/scientific-research#"
|
24
|
+
- "skos": "http://www.w3.org/2004/02/skos/core#"
|
25
|
+
- "vcard": "http://www.w3.org/2006/vcard/ns#"
|
26
|
+
- "vitro": "http://vitro.mannlib.cornell.edu/ns/vitro/0.7#"
|
27
|
+
- "vitro-public": "http://vitro.mannlib.cornell.edu/ns/vitro/public#"
|
28
|
+
- "vivo": "http://vivoweb.org/ontology/core#"
|
29
|
+
- "xsd": "http://www.w3.org/2001/XMLSchema#"
|
30
|
+
|
31
|
+
For a given organization hash:
|
32
|
+
|
33
|
+
| CAP key | RIALTO entry | Notes |
|
34
|
+
| ------------ | --------------------------------------------------------- | ----- |
|
35
|
+
| 'type' | `rdf:type` / `@type` for given organization at RIALTO URI | See mapping below. |
|
36
|
+
| 'alias' | `@id` `http://rialto.stanford.edu/individual/{alias}` | Domain may change. Want to confirm alias is consistent enough for use of minting resources that will be fed by all data sources. |
|
37
|
+
| 'alias' | `dbo:alias` then value as string | Capture the alias also in the metadata explicitly. |
|
38
|
+
| 'name' | `rdfs:label` then value as string | any alt labels? repeated labels? need to check. |
|
39
|
+
| 'orgCodes' | for each value, `dbo:code` then value as string | alternate identifiers? where will we look for later matching? |
|
40
|
+
| 'children' | `obo:BFO_0000051` (*has part*) then child's alias value as RIALTO URI | capture each presumed URI from the alias, but get the data for that specific organization from separate API calls...? See question above. |
|
41
|
+
| 'children' | for each child, `obo:BFO_000005` (*part of*) then parent's RIALTO URI | how to make sure this adds data to the child's graph without removing data the parent won't know about? Or just use Stanford / ROOT and add all data for all Organizations from that? |
|
42
|
+
| 'url' | `rdfs:seeAlso` then value as IRI | |
|
43
|
+
| 'browsable' | n/a | Ignore. |
|
44
|
+
| 'onboarding' | n/a | Ignore. |
|
45
|
+
|
46
|
+
| CAP Organization Type | RIALTO / VIVO Entity Type | Notes |
|
47
|
+
| --------------------- | ------------------------------------ | ----- |
|
48
|
+
| ROOT | vivo:University << foaf:Organization | |
|
49
|
+
| SCHOOL | vivo:School << foaf:Organization | |
|
50
|
+
| DEPARTMENT | vivo:Department << foaf:Organization | From VIVO for Department: "Use for any non-academic department" so this may not fit long-term. Seems like vivo:AcademicDepartment could be better, but departments in CAP are not consistently academic or other. |
|
51
|
+
| DIVISION | vivo:Division << foaf:Organization, vivo:ExtensionUnit | From VIVO: subclass of Extension Unit, "A unit devoted primarily to extension activities, whether for outreach or research", so this may not fit long term. |
|
52
|
+
| SUB_DIVISION | vivo:Division << foaf:Organization, vivo:ExtensionUnit | See note above. No requirement to distinguish sub-ness in RIALTO. |
|
53
|
+
|
54
|
+
## Sample Input
|
55
|
+
|
56
|
+
Sample source CAP data for a provided Organization is in [our fixtures (this has been shortened and any real values replace)](../spec/fixtures/cap/organization.json). See a simplified example of the JSON output below:
|
57
|
+
|
58
|
+
```JSON
|
59
|
+
{
|
60
|
+
"alias": "stanford-test",
|
61
|
+
"browsable": false,
|
62
|
+
"children": [{
|
63
|
+
"alias": "department-of-funny-walks",
|
64
|
+
"browsable": false,
|
65
|
+
"children": [{
|
66
|
+
"alias": "department-of-funny-walks/intercollegiate-walks",
|
67
|
+
"browsable": false,
|
68
|
+
"name": "Intercollegiate Walks",
|
69
|
+
"onboarding": true,
|
70
|
+
"orgCodes": [
|
71
|
+
"WALK",
|
72
|
+
"WALZ"
|
73
|
+
],
|
74
|
+
"type": "DEPARTMENT"
|
75
|
+
},
|
76
|
+
{
|
77
|
+
"alias": "department-of-funny-walks/walks-education",
|
78
|
+
"browsable": false,
|
79
|
+
"children": [{
|
80
|
+
"alias": "department-of-funny-walks/walks-education/adventure-walks",
|
81
|
+
"browsable": false,
|
82
|
+
"name": "Adventure Walks",
|
83
|
+
"onboarding": true,
|
84
|
+
"orgCodes": [
|
85
|
+
"ADVE"
|
86
|
+
],
|
87
|
+
"type": "DIVISION"
|
88
|
+
}
|
89
|
+
],
|
90
|
+
"name": "Walks Education",
|
91
|
+
"onboarding": true,
|
92
|
+
"orgCodes": [
|
93
|
+
"EDUC",
|
94
|
+
"WEDU",
|
95
|
+
"EDUW",
|
96
|
+
"WAED",
|
97
|
+
"EDWA"
|
98
|
+
],
|
99
|
+
"type": "DEPARTMENT"
|
100
|
+
}
|
101
|
+
],
|
102
|
+
"name": "Department of Funny Walks",
|
103
|
+
"onboarding": false,
|
104
|
+
"orgCodes": [
|
105
|
+
"HAAA"
|
106
|
+
],
|
107
|
+
"type": "SCHOOL"
|
108
|
+
},
|
109
|
+
{
|
110
|
+
"alias": "graduate-school-of-parrots",
|
111
|
+
"browsable": false,
|
112
|
+
"name": "Graduate School of Parrots",
|
113
|
+
"onboarding": true,
|
114
|
+
"orgCodes": [
|
115
|
+
"PARR"
|
116
|
+
],
|
117
|
+
"type": "SCHOOL",
|
118
|
+
"url": "http://parrots.python.pizza/"
|
119
|
+
}],
|
120
|
+
"name": "Stanford Test",
|
121
|
+
"onboarding": false,
|
122
|
+
"orgCodes": [
|
123
|
+
"STAN"
|
124
|
+
],
|
125
|
+
"type": "ROOT",
|
126
|
+
"url": "http://python.pizza/"
|
127
|
+
}
|
128
|
+
```
|
129
|
+
|
130
|
+
For any given Organization, these keys / fields appear in its Organization object:
|
131
|
+
|
132
|
+
| Key | Expectation | Definition | Notes |
|
133
|
+
| ------------ | ----------------------------- | ---------- | ----- |
|
134
|
+
| 'orgCodes' | Array of 4-letter strings | the Stanford-specific (for ... HR?) organization code or identifier | history / previous projects says these can be helpful but also reflect previous / no longer extent departments or relationships |
|
135
|
+
| 'type' | String, 1 of following values: `ROOT`, `SCHOOL`, `DEPARTMENT`, `DIVISION`, `SUB_DIVISION` | The type of organization within the University (aka the `ROOT`) | See mappings to RIALTO / VIVO types below |
|
136
|
+
| 'name' | String | Name or label for the organization represented by the present JSON Object | n/a |
|
137
|
+
| 'children' | Array of Organization Objects | Any organizations that are children of the organization represented by the present JSON Object | Should we iterate on these for data or just to know what orgs are children, then call their own API response separately? |
|
138
|
+
| 'browsable' | Boolean | Uncertain. If is public data? | n/a |
|
139
|
+
| 'alias' | String, API query path value | The API URL path value for the organization. | Is this used for anything other than the API? |
|
140
|
+
| 'url' | String, HTTP URL | URL provided for the given organization. | n/a |
|
141
|
+
| 'onboarding' | Boolean | Uncertain. If onboarding exists? | n/a |
|
142
|
+
|
143
|
+
## Sample Output
|
144
|
+
|
145
|
+
Sample output VIVO JSON-LD data for a provided Organization is in [our fixtures (this has been shortened and any real values replace)](../spec/fixtures/vivo/org-out.json). See a simplified example of the JSON output below:
|
146
|
+
|
147
|
+
```JSON
|
148
|
+
{
|
149
|
+
"@context": {
|
150
|
+
"dbpedia": "http://dbpedia.org/resource/",
|
151
|
+
"dbo": "http://dbpedia.org/ontology/",
|
152
|
+
"obo": "http://purl.obolibrary.org/obo/",
|
153
|
+
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
154
|
+
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
155
|
+
"vivo": "http://vivoweb.org/ontology/core#"
|
156
|
+
},
|
157
|
+
"@graph": [
|
158
|
+
{
|
159
|
+
"@id": "http://rialto.stanford.edu/individual/stanford-test",
|
160
|
+
"@type": "vivo:University",
|
161
|
+
"dbo:alias": "stanford-test",
|
162
|
+
"rdfs:label": "Stanford Test",
|
163
|
+
"rdfs:seeAlso": "http://python.pizza/",
|
164
|
+
"dbo:code": [
|
165
|
+
"STAN"
|
166
|
+
],
|
167
|
+
"obo:BFO_0000051": [
|
168
|
+
{
|
169
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks"
|
170
|
+
},
|
171
|
+
{
|
172
|
+
"@id": "http://rialto.stanford.edu/individual/graduate-school-of-parrots"
|
173
|
+
}
|
174
|
+
],
|
175
|
+
"obo:RO_0001025": {
|
176
|
+
"@id": "dbpedia:Palo_Alto,_California"
|
177
|
+
}
|
178
|
+
},
|
179
|
+
{
|
180
|
+
"@id": "http://rialto.stanford.edu/individual/graduate-school-of-parrots",
|
181
|
+
"@type": "vivo:School",
|
182
|
+
"dbo:alias": "graduate-school-of-parrots",
|
183
|
+
"rdfs:label": "Graduate School of Parrots",
|
184
|
+
"rdfs:seeAlso": "http://parrots.python.pizza/",
|
185
|
+
"dbo:code": [
|
186
|
+
"PARR"
|
187
|
+
],
|
188
|
+
"obo:BFO_0000050": {
|
189
|
+
"@id": "http://rialto.stanford.edu/individual/stanford-test"
|
190
|
+
}
|
191
|
+
},
|
192
|
+
{
|
193
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks",
|
194
|
+
"@type": "vivo:School",
|
195
|
+
"dbo:alias": "department-of-funny-walks",
|
196
|
+
"rdfs:label": "Department of Funny Walks",
|
197
|
+
"dbo:code": [
|
198
|
+
"HAAA"
|
199
|
+
],
|
200
|
+
"obo:BFO_0000050": {
|
201
|
+
"@id": "http://rialto.stanford.edu/individual/stanford-test"
|
202
|
+
},
|
203
|
+
"obo:BFO_0000051": [
|
204
|
+
{
|
205
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks/intercollegiate-walks"
|
206
|
+
},
|
207
|
+
{
|
208
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks/walks-education"
|
209
|
+
}
|
210
|
+
]
|
211
|
+
},
|
212
|
+
{
|
213
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks/intercollegiate-walks",
|
214
|
+
"@type": "vivo:Department",
|
215
|
+
"dbo:alias": "department-of-funny-walks/intercollegiate-walks",
|
216
|
+
"rdfs:label": "Intercollegiate Walks",
|
217
|
+
"dbo:code": [
|
218
|
+
"WALK",
|
219
|
+
"WALZ"
|
220
|
+
],
|
221
|
+
"obo:BFO_0000051": {
|
222
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks"
|
223
|
+
}
|
224
|
+
},
|
225
|
+
{
|
226
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks/walks-education",
|
227
|
+
"@type": "vivo:Department",
|
228
|
+
"dbo:alias": "department-of-funny-walks/walks-education",
|
229
|
+
"rdfs:label": "Walks Education",
|
230
|
+
"dbo:code": [
|
231
|
+
"EDUC",
|
232
|
+
"WEDU",
|
233
|
+
"EDUW",
|
234
|
+
"WAED",
|
235
|
+
"EDWA"
|
236
|
+
],
|
237
|
+
"obo:BFO_0000051": {
|
238
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks"
|
239
|
+
},
|
240
|
+
"obo:BFO_0000050": {
|
241
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks/walks-education/adventure-walks"
|
242
|
+
}
|
243
|
+
},
|
244
|
+
{
|
245
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks/walks-education/adventure-walks",
|
246
|
+
"@type": "vivo:Division",
|
247
|
+
"dbo:alias": "department-of-funny-walks/walks-education/adventure-walks",
|
248
|
+
"rdfs:label": "Adventure Walks",
|
249
|
+
"dbo:code": [
|
250
|
+
"ADVE"
|
251
|
+
],
|
252
|
+
"obo:BFO_0000051": {
|
253
|
+
"@id": "http://rialto.stanford.edu/individual/department-of-funny-walks/walks-education"
|
254
|
+
}
|
255
|
+
}
|
256
|
+
]
|
257
|
+
}
|
258
|
+
```
|
@@ -1,26 +1,19 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'traject_plus'
|
4
|
+
require 'rialto/etl/readers/stanford_organizations_json_reader'
|
5
|
+
require 'rialto/etl/writers/ntriples_writer'
|
6
|
+
|
3
7
|
extend TrajectPlus::Macros
|
4
8
|
extend TrajectPlus::Macros::JSON
|
5
9
|
|
6
10
|
settings do
|
7
|
-
provide 'writer_class_name', '
|
8
|
-
provide 'reader_class_name', 'Rialto::Etl::StanfordOrganizationsJsonReader'
|
11
|
+
provide 'writer_class_name', 'Rialto::Etl::Writers::NtriplesWriter'
|
12
|
+
provide 'reader_class_name', 'Rialto::Etl::Readers::StanfordOrganizationsJsonReader'
|
9
13
|
end
|
10
14
|
|
11
|
-
# context_object = {
|
12
|
-
# '@context' => {
|
13
|
-
# obo: 'http://purl.obolibrary.org/obo/',
|
14
|
-
# rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
15
|
-
# vcard: 'http://www.w3.org/2006/vcard/ns#',
|
16
|
-
# vivo: 'http://vivoweb.org/ontology/core#',
|
17
|
-
# stanford: 'http://authorities.stanford.edu/orgs#'
|
18
|
-
# }
|
19
|
-
# }
|
20
|
-
|
21
|
-
# puts context_object.to_json
|
22
15
|
to_field '@id', extract_json('$.alias'), transform: transform(prepend: 'http://authorities.stanford.edu/orgs#'), single: true
|
23
16
|
to_field '@type', extract_json('$.type', translation_map: 'stanford_organizations_to_vivo_types'), single: true
|
24
|
-
to_field '
|
25
|
-
to_field '
|
26
|
-
to_field '
|
17
|
+
to_field 'http://www.w3.org/2000/01/rdf-schema#label', extract_json('$.name'), single: true
|
18
|
+
to_field 'http://www.w3.org/2000/01/rdf-schema#seeAlso', extract_json('$.url'), single: true
|
19
|
+
to_field 'http://vivoweb.org/ontology/core#abbreviation', extract_json('$.orgCodes'), single: true
|
@@ -10,8 +10,10 @@ module Rialto
|
|
10
10
|
module Extractors
|
11
11
|
# Abstract class hitting Stanford APIs using Stanford authz
|
12
12
|
class AbstractStanfordExtractor
|
13
|
+
# Time at which access token expires (as integer)
|
13
14
|
attr_reader :access_token_expiry_time
|
14
15
|
|
16
|
+
# Hit an API endpoint and return the results
|
15
17
|
def extract
|
16
18
|
raise NotImplementedError, 'Implement #extract in a child class'
|
17
19
|
end
|
@@ -28,7 +30,6 @@ module Rialto
|
|
28
30
|
Faraday.new(uri, headers: connection_headers) do |connection|
|
29
31
|
connection.request :retry, max: 3, interval: 0.8, interval_randomness: 0.2, backoff_factor: 2
|
30
32
|
connection.ssl.update(verify: true, verify_mode: OpenSSL::SSL::VERIFY_PEER)
|
31
|
-
connection.use Faraday::Response::RaiseError
|
32
33
|
connection.adapter :httpclient
|
33
34
|
connection.options.timeout = 500
|
34
35
|
connection.options.open_timeout = 10
|
@@ -53,6 +54,9 @@ module Rialto
|
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
57
|
+
# Set the access token to `nil` to force retrieving a new one
|
58
|
+
#
|
59
|
+
# @return [void]
|
56
60
|
def reset_access_token!
|
57
61
|
@access_token = nil
|
58
62
|
end
|
@@ -63,6 +67,10 @@ module Rialto
|
|
63
67
|
true
|
64
68
|
end
|
65
69
|
|
70
|
+
# Set token expiry time to a new value based on current time
|
71
|
+
#
|
72
|
+
# @param expires_in [#to_i] time instance dictating when token expires
|
73
|
+
# @return [void]
|
66
74
|
def reset_expiry_time!(expires_in:)
|
67
75
|
@access_token_expiry_time = current_time + expires_in.to_i
|
68
76
|
end
|
@@ -7,6 +7,7 @@ module Rialto
|
|
7
7
|
module Extractors
|
8
8
|
# Stanford CAP API for orgs
|
9
9
|
class StanfordOrganizations < AbstractStanfordExtractor
|
10
|
+
# Hit an API endpoint and return the results
|
10
11
|
def extract
|
11
12
|
client.get('/cap/v1/orgs/stanford?p=1&ps=10').body
|
12
13
|
rescue StandardError => exception
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'traject_plus'
|
4
|
+
|
5
|
+
module Rialto
|
6
|
+
module Etl
|
7
|
+
# Holds readers for use in Traject mappings
|
8
|
+
module Readers
|
9
|
+
# Read JSON that maps to Stanford orgs
|
10
|
+
class StanfordOrganizationsJsonReader < TrajectPlus::JsonReader
|
11
|
+
# Overrides the implementation inherited from superclass
|
12
|
+
#
|
13
|
+
# @param block [#call] a block that is executed on each organization
|
14
|
+
# @return [String] JSON representation of an organization
|
15
|
+
def each(&block)
|
16
|
+
yield_children(json, block)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def yield_children(hash, block)
|
22
|
+
block.call(hash)
|
23
|
+
children = children_path(hash)
|
24
|
+
return if children.blank?
|
25
|
+
children.each do |child|
|
26
|
+
yield_children(child, block)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def children_path(hash)
|
31
|
+
JsonPath.on(hash, '$.children').first
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -1,19 +1,23 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require '
|
4
|
-
require 'rialto/etl/stanford_organizations_json_reader'
|
3
|
+
require 'traject'
|
5
4
|
|
6
5
|
module Rialto
|
7
6
|
module Etl
|
8
7
|
module Transformers
|
9
8
|
# Transformer turning Stanford org info into Vivo format
|
10
9
|
class StanfordOrganizationsToVivo
|
10
|
+
# A valid file path
|
11
11
|
attr_reader :input
|
12
12
|
|
13
|
+
# Initialize a new instance of the transformer
|
14
|
+
#
|
15
|
+
# @param input [String] valid file path
|
13
16
|
def initialize(input:)
|
14
17
|
@input = input
|
15
18
|
end
|
16
19
|
|
20
|
+
# Transform a stream into a new representation, using Traject
|
17
21
|
def transform
|
18
22
|
File.open(input, 'r') do |stream|
|
19
23
|
transformer.process(stream)
|
data/lib/rialto/etl/version.rb
CHANGED
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rialto
|
4
|
+
module Etl
|
5
|
+
# Holds writers for use in Traject mappings
|
6
|
+
module Writers
|
7
|
+
# Write JSON-LD records. This writer conforms to Traject's
|
8
|
+
# writer class interface, supporting #initialize, #put, and
|
9
|
+
# #close
|
10
|
+
class JsonldWriter
|
11
|
+
# Traject settings object
|
12
|
+
|
13
|
+
# Constructor
|
14
|
+
def initialize(_); end
|
15
|
+
|
16
|
+
# Append the hash representing a single mapped record to the
|
17
|
+
# list of records held in memory
|
18
|
+
#
|
19
|
+
# @param context [Traject::Indexer::Context] a Traject context
|
20
|
+
# object containing the output of the mapping
|
21
|
+
# @return [Array] a list of all records mapped
|
22
|
+
def put(context)
|
23
|
+
records << context.output_hash
|
24
|
+
end
|
25
|
+
|
26
|
+
# Print a JSON representation of the records with the
|
27
|
+
# JSON-LD context object attached
|
28
|
+
def close
|
29
|
+
$stdout.puts records.to_json
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def records
|
35
|
+
@records ||= []
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rdf'
|
4
|
+
require 'traject'
|
5
|
+
|
6
|
+
module Rialto
|
7
|
+
module Etl
|
8
|
+
# Holds writers for use in Traject mappings
|
9
|
+
module Writers
|
10
|
+
# Write NTriples records
|
11
|
+
class NtriplesWriter < Traject::LineWriter
|
12
|
+
# Overrides the serialization routine from superclass
|
13
|
+
#
|
14
|
+
# @param context [Traject::Indexer::Context] a Traject context
|
15
|
+
# object containing the output of the mapping
|
16
|
+
# @return [String] NTriples representation of the mapping
|
17
|
+
def serialize(context)
|
18
|
+
hash = context.output_hash
|
19
|
+
subject = RDF::URI.new(hash.delete('@id'))
|
20
|
+
type = RDF::URI.new(hash.delete('@type'))
|
21
|
+
graph = RDF::Graph.new << [subject, RDF.type, type]
|
22
|
+
hash.each_pair do |field, values|
|
23
|
+
Array(values).each do |value|
|
24
|
+
graph << [subject, RDF::URI.new(field), value]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
graph.dump(:ntriples)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/mapping.md
CHANGED
@@ -1,22 +1,49 @@
|
|
1
|
-
|
1
|
+
# RIALTO / VIVO Mapping & Mapping Target
|
2
2
|
|
3
|
-
|
3
|
+
This is mapping documentation for the end result of our selected sources to RIALTO / VIVO models. See more information in our [docs folder](docs). This will be iterated on as sources and types are mapped.
|
4
4
|
|
5
|
-
|
6
|
-
browsable (boolean) => ignore
|
7
|
-
children (array) => keep track of parent, iterate over values (for each obo:BFO\_0000050/partOf) and map, keep track of children for obo:BFO\_0000051/hasPart
|
8
|
-
name (string) => rdfs:label
|
9
|
-
onboarding (boolean) => ignore
|
10
|
-
orgCodes (array) => vivo:abbreviation
|
11
|
-
type (string) => see type mappings
|
12
|
-
url (string) => rdfs:seeAlso
|
5
|
+
# Reused Ontologies List (to be further vetted)
|
13
6
|
|
14
|
-
|
7
|
+
- "dbpedia": "http://dbpedia.org/resource/"
|
8
|
+
- "dbo": "http://dbpedia.org/ontology/"
|
9
|
+
- "foaf": "http://xmlns.com/foaf/0.1/"
|
10
|
+
- "obo": "http://purl.obolibrary.org/obo/"
|
11
|
+
- "owl": "http://www.w3.org/2002/07/owl#"
|
12
|
+
- "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
13
|
+
- "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
|
14
|
+
- "skos": "http://www.w3.org/2004/02/skos/core#"
|
15
|
+
- "vivo": "http://vivoweb.org/ontology/core#"
|
16
|
+
- "xsd": "http://www.w3.org/2001/XMLSchema#"
|
15
17
|
|
16
|
-
|
17
|
-
SUB_DIVISION @type: http://vivoweb.org/ontology/core#Division
|
18
|
-
ROOT @type: http://vivoweb.org/ontology/core#University
|
19
|
-
SCHOOL @type: http://vivoweb.org/ontology/core#School
|
20
|
-
DEPARTMENT @type: http://vivoweb.org/ontology/core#Department
|
18
|
+
## Overarching RIALTO Model
|
21
19
|
|
20
|
+
TBD
|
22
21
|
|
22
|
+
## Mappings to RIALTO
|
23
|
+
|
24
|
+
### For Organizations
|
25
|
+
|
26
|
+
| Source & key | RIALTO entry | Notes |
|
27
|
+
| -------------- | --------------------------------------------------------- | ----- |
|
28
|
+
| CAP 'type' | `rdf:type` / `@type` for given organization at RIALTO URI | See mapping below. |
|
29
|
+
| CAP 'alias' | `@id` `http://rialto.stanford.edu/individual/{alias}` | Domain may change. Want to confirm alias is consistent enough for use of minting resources that will be fed by all data sources. |
|
30
|
+
| CAP 'alias' | `dbo:alias` then value as string | Capture the alias also in the metadata explicitly. |
|
31
|
+
| CAP 'name' | `rdfs:label` then value as string | any alt labels? repeated labels? need to check. |
|
32
|
+
| CAP 'orgCodes' | for each value, `dbo:code` then value as string | alternate identifiers? where will we look for later matching? |
|
33
|
+
| CAP 'children' | `obo:BFO_0000051` (*has part*) then child's alias value as RIALTO URI | capture each presumed URI from the alias, but get the data for that specific organization from separate API calls...? See question above. |
|
34
|
+
| CAP 'children' | for each child, `obo:BFO_000005` (*part of*) then parent's RIALTO URI | how to make sure this adds data to the child's graph without removing data the parent won't know about? Or just use Stanford / ROOT and add all data for all Organizations from that? |
|
35
|
+
| CAP 'url' | `rdfs:seeAlso` then value as IRI | |
|
36
|
+
|
37
|
+
### RIALTO Organization Types Mapping
|
38
|
+
|
39
|
+
| Source & Type | RIALTO / VIVO Entity Type | Notes |
|
40
|
+
| --------------------- | ------------------------------------ | ----- |
|
41
|
+
| CAP@type ROOT | vivo:University << foaf:Organization | |
|
42
|
+
| CAP@type SCHOOL | vivo:School << foaf:Organization | |
|
43
|
+
| CAP@type DEPARTMENT | vivo:Department << foaf:Organization | From VIVO for Department: "Use for any non-academic department" so this may not fit long-term. Seems like vivo:AcademicDepartment could be better, but departments in CAP are not consistently academic or other. |
|
44
|
+
| CAP@type DIVISION | vivo:Division << foaf:Organization, vivo:ExtensionUnit | From VIVO: subclass of Extension Unit, "A unit devoted primarily to extension activities, whether for outreach or research", so this may not fit long term. |
|
45
|
+
| CAP@type SUB_DIVISION | vivo:Division << foaf:Organization, vivo:ExtensionUnit | See note above. No requirement to distinguish sub-ness in RIALTO. |
|
46
|
+
|
47
|
+
## Sample RIALTO Graph
|
48
|
+
|
49
|
+
Sample output VIVO JSON-LD data for a provided Organization is in [our fixtures (this has been shortened and any real values replace)](spec/fixtures/vivo/org-out.json). A larger file with a fuller graph generated from multiple sources will be added in the near future.
|
data/rialto-etl.gemspec
CHANGED
@@ -20,10 +20,13 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_dependency 'faraday'
|
22
22
|
spec.add_dependency 'httpclient'
|
23
|
+
spec.add_dependency 'rdf'
|
23
24
|
spec.add_dependency 'traject_plus', '>= 0.0.2'
|
24
25
|
|
25
26
|
spec.add_development_dependency 'bundler', '~> 1.11'
|
26
27
|
spec.add_development_dependency 'rake', '~> 10.0'
|
27
28
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
28
29
|
spec.add_development_dependency 'rubocop', '~> 0.52.0'
|
30
|
+
spec.add_development_dependency 'rubocop-rspec', '~> 1.21.0'
|
31
|
+
spec.add_development_dependency 'webmock'
|
29
32
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rialto-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael J. Giarlo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rdf
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: traject_plus
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +122,34 @@ dependencies:
|
|
108
122
|
- - "~>"
|
109
123
|
- !ruby/object:Gem::Version
|
110
124
|
version: 0.52.0
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rubocop-rspec
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.21.0
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 1.21.0
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: webmock
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
111
153
|
description:
|
112
154
|
email:
|
113
155
|
- mjgiarlo@stanford.edu
|
@@ -121,22 +163,26 @@ files:
|
|
121
163
|
- ".rubocop_todo.yml"
|
122
164
|
- ".travis.yml"
|
123
165
|
- Gemfile
|
166
|
+
- LICENSE
|
124
167
|
- README.md
|
125
168
|
- Rakefile
|
126
169
|
- bin/console
|
127
170
|
- bin/extract
|
128
171
|
- bin/setup
|
129
172
|
- bin/transform
|
173
|
+
- docs/CAP-organizations.md
|
130
174
|
- lib/rialto/etl.rb
|
131
175
|
- lib/rialto/etl/configs/stanford_organizations.rb
|
132
176
|
- lib/rialto/etl/extractors.rb
|
133
177
|
- lib/rialto/etl/extractors/abstract_stanford_extractor.rb
|
134
178
|
- lib/rialto/etl/extractors/stanford_organizations.rb
|
135
179
|
- lib/rialto/etl/extractors/stanford_researchers.rb
|
136
|
-
- lib/rialto/etl/stanford_organizations_json_reader.rb
|
180
|
+
- lib/rialto/etl/readers/stanford_organizations_json_reader.rb
|
137
181
|
- lib/rialto/etl/transformers.rb
|
138
182
|
- lib/rialto/etl/transformers/stanford_organizations_to_vivo.rb
|
139
183
|
- lib/rialto/etl/version.rb
|
184
|
+
- lib/rialto/etl/writers/jsonld_writer.rb
|
185
|
+
- lib/rialto/etl/writers/ntriples_writer.rb
|
140
186
|
- lib/translation_maps/stanford_organizations_to_vivo_types.yaml
|
141
187
|
- mapping.md
|
142
188
|
- rialto-etl.gemspec
|
@@ -1,27 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Rialto
|
4
|
-
module Etl
|
5
|
-
# Read JSON that maps to Stanford orgs
|
6
|
-
class StanfordOrganizationsJsonReader < TrajectPlus::JsonReader
|
7
|
-
def each(&block)
|
8
|
-
yield_children(json, block)
|
9
|
-
end
|
10
|
-
|
11
|
-
private
|
12
|
-
|
13
|
-
def yield_children(hash, block)
|
14
|
-
block.call(hash)
|
15
|
-
children = children_path(hash)
|
16
|
-
return if children.blank?
|
17
|
-
children.each do |child|
|
18
|
-
yield_children(child, block)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def children_path(hash)
|
23
|
-
JsonPath.on(hash, '$.children').first
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|