marc2linkeddata 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.env_example +62 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE +202 -0
- data/README.md +234 -0
- data/bin/console +8 -0
- data/bin/loc_downloads.sh +62 -0
- data/bin/loc_import_4store.sh +24 -0
- data/bin/loc_import_allegrograph.sh +22 -0
- data/bin/loc_import_marklogic.sh +19 -0
- data/bin/readMarcAuthority +113 -0
- data/lib/marc2linkeddata/configuration.rb +146 -0
- data/lib/marc2linkeddata/isni.rb +23 -0
- data/lib/marc2linkeddata/lib_auth.rb +17 -0
- data/lib/marc2linkeddata/loc.rb +91 -0
- data/lib/marc2linkeddata/oclc_creative_work.rb +44 -0
- data/lib/marc2linkeddata/oclc_identity.rb +46 -0
- data/lib/marc2linkeddata/oclc_resource.rb +79 -0
- data/lib/marc2linkeddata/oclc_work.rb +19 -0
- data/lib/marc2linkeddata/parseMarcAuthority.rb +492 -0
- data/lib/marc2linkeddata/readMarcCatalog.rb +175 -0
- data/lib/marc2linkeddata/resource.rb +131 -0
- data/lib/marc2linkeddata/sparql.rb +55 -0
- data/lib/marc2linkeddata/viaf.rb +48 -0
- data/lib/marc2linkeddata.rb +64 -0
- data/marc2linkeddata.gemspec +40 -0
- data/spec/marc2linkeddata/configuration_spec.rb +84 -0
- data/spec/marc2linkeddata/loc_spec.rb +71 -0
- data/spec/marc2linkeddata/resource_spec.rb +53 -0
- data/spec/marc2linkeddata/viaf_spec.rb +53 -0
- data/spec/marc2linkeddata_spec.rb +39 -0
- data/spec/spec_helper.rb +92 -0
- metadata +243 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fa055fa139e900737ad34ddc5a70fe3b256c3570
|
4
|
+
data.tar.gz: 5a9b29792f26b0f7a48cf522aa77cc7535f72ad9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7ea8a770cff4eeca40054df514aff320200d807b360a9bcce8538c65f31139e6a68d40c940b5e60038379a67f832d9b34e326b42cf7100cc9912397934959c40
|
7
|
+
data.tar.gz: 806dcea47e0ab546acc347765e2d241faab174dc1add50b380a84774d717fb7f075667ef2d67a3fd2da6c78075c2e6515a809fe5207596dbb124eb1a9abb2bd1
|
data/.env_example
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# https://github.com/bkeepers/dotenv is used for
|
2
|
+
# default configuration options. The values in
|
3
|
+
# this file do not replace existing values in
|
4
|
+
# the shell ENV.
|
5
|
+
|
6
|
+
# Uncomment and set values as required. See used settings in
|
7
|
+
# lib/marc2linkeddata/configuration.rb
|
8
|
+
|
9
|
+
DEBUG: false
|
10
|
+
|
11
|
+
# Authority record field numbers for useful link data
|
12
|
+
FIELD_AUTH_LOC: 920
|
13
|
+
FIELD_AUTH_VIAF: 921
|
14
|
+
FIELD_AUTH_ISNI: 922
|
15
|
+
FIELD_AUTH_OCLC: 035
|
16
|
+
|
17
|
+
# Options for retrieving linked data to resolve and enhance data.
|
18
|
+
# Set all false for the quickest translation.
|
19
|
+
GET_ISNI: false
|
20
|
+
GET_LOC: true # currently required, should be optional
|
21
|
+
GET_OCLC: false
|
22
|
+
GET_VIAF: false
|
23
|
+
|
24
|
+
# Using OCLC identity, retrieve RDF for creative works?
|
25
|
+
# Only works when GET_OCLC==true; it can slow processing significantly.
|
26
|
+
OCLC_AUTH2WORKS: false
|
27
|
+
|
28
|
+
LOG_FILE: 'marc2ld.log'
|
29
|
+
|
30
|
+
LIB_PREFIX: http://linked-data.example.org/library/
|
31
|
+
|
32
|
+
# Use FOAF or SCHEMA or both
|
33
|
+
USE_FOAF: false
|
34
|
+
USE_SCHEMA: true
|
35
|
+
|
36
|
+
# Local triple store for LOC authority data,
|
37
|
+
# accessed via an HTTP API with basic authentication.
|
38
|
+
# See downloads at http://id.loc.gov/download/
|
39
|
+
LOCAL_LOC_USER: 'sparqlUser'
|
40
|
+
LOCAL_LOC_PASS: 'sparqlPass'
|
41
|
+
LOCAL_LOC_HOST: 'dev-sparql.example.org'
|
42
|
+
LOCAL_LOC_PORT: '80'
|
43
|
+
LOCAL_LOC_PATH: '/sparql?'
|
44
|
+
|
45
|
+
# Redis Persistence - based on https://github.com/redis/redis-rb
|
46
|
+
# - essential options:
|
47
|
+
# export REDIS4MARC=true # enable redis persistence (default = false)
|
48
|
+
# - supplementary options:
|
49
|
+
# Set the REDIS_URL for a custom redis configuration.
|
50
|
+
# export REDIS_URL="redis://{user}:{password}@{host}:{port}/{db}"
|
51
|
+
# export REDIS_READ=true # enable redis reads (default = REDIS4MARC || false)
|
52
|
+
# # faster reading of triples from pre-populated redis data
|
53
|
+
# export REDIS_WRITE=true # enable redis writes (default = REDIS4MARC || false)
|
54
|
+
# # current data is updated in redis
|
55
|
+
REDIS4MARC: false
|
56
|
+
# Uncomment these options to disable read or write (independently)
|
57
|
+
#REDIS_READ: false
|
58
|
+
#REDIS_WRITE: false
|
59
|
+
# Leave commented to use default redis configs on localhost
|
60
|
+
#REDIS_URL: localhost
|
61
|
+
|
62
|
+
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
Apache License
|
2
|
+
Version 2.0, January 2004
|
3
|
+
http://www.apache.org/licenses/
|
4
|
+
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6
|
+
|
7
|
+
1. Definitions.
|
8
|
+
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
11
|
+
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13
|
+
the copyright owner that is granting the License.
|
14
|
+
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
16
|
+
other entities that control, are controlled by, or are under common
|
17
|
+
control with that entity. For the purposes of this definition,
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
19
|
+
direction or management of such entity, whether by contract or
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22
|
+
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24
|
+
exercising permissions granted by this License.
|
25
|
+
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
27
|
+
including but not limited to software source code, documentation
|
28
|
+
source, and configuration files.
|
29
|
+
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
31
|
+
transformation or translation of a Source form, including but
|
32
|
+
not limited to compiled object code, generated documentation,
|
33
|
+
and conversions to other media types.
|
34
|
+
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
36
|
+
Object form, made available under the License, as indicated by a
|
37
|
+
copyright notice that is included in or attached to the work
|
38
|
+
(an example is provided in the Appendix below).
|
39
|
+
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46
|
+
the Work and Derivative Works thereof.
|
47
|
+
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
49
|
+
the original version of the Work and any modifications or additions
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
61
|
+
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
64
|
+
subsequently incorporated within the Work.
|
65
|
+
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
72
|
+
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78
|
+
where such license applies only to those patent claims licensable
|
79
|
+
by such Contributor that are necessarily infringed by their
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
82
|
+
institute patent litigation against any entity (including a
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
85
|
+
or contributory patent infringement, then any patent licenses
|
86
|
+
granted to You under this License for that Work shall terminate
|
87
|
+
as of the date such litigation is filed.
|
88
|
+
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
91
|
+
modifications, and in Source or Object form, provided that You
|
92
|
+
meet the following conditions:
|
93
|
+
|
94
|
+
(a) You must give any other recipients of the Work or
|
95
|
+
Derivative Works a copy of this License; and
|
96
|
+
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
98
|
+
stating that You changed the files; and
|
99
|
+
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
102
|
+
attribution notices from the Source form of the Work,
|
103
|
+
excluding those notices that do not pertain to any part of
|
104
|
+
the Derivative Works; and
|
105
|
+
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
108
|
+
include a readable copy of the attribution notices contained
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
111
|
+
of the following places: within a NOTICE text file distributed
|
112
|
+
as part of the Derivative Works; within the Source form or
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
114
|
+
within a display generated by the Derivative Works, if and
|
115
|
+
wherever such third-party notices normally appear. The contents
|
116
|
+
of the NOTICE file are for informational purposes only and
|
117
|
+
do not modify the License. You may add Your own attribution
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
120
|
+
that such additional attribution notices cannot be construed
|
121
|
+
as modifying the License.
|
122
|
+
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
124
|
+
may provide additional or different license terms and conditions
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
128
|
+
the conditions stated in this License.
|
129
|
+
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
133
|
+
this License, without any additional terms or conditions.
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135
|
+
the terms of any separate license agreement you may have executed
|
136
|
+
with Licensor regarding such Contributions.
|
137
|
+
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
140
|
+
except as required for reasonable and customary use in describing the
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
142
|
+
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
152
|
+
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
158
|
+
incidental, or consequential damages of any character arising as a
|
159
|
+
result of this License or out of the use or inability to use the
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
162
|
+
other commercial damages or losses), even if such Contributor
|
163
|
+
has been advised of the possibility of such damages.
|
164
|
+
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168
|
+
or other liability obligations and/or rights consistent with this
|
169
|
+
License. However, in accepting such obligations, You may act only
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
174
|
+
of your accepting any such warranty or additional liability.
|
175
|
+
|
176
|
+
END OF TERMS AND CONDITIONS
|
177
|
+
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
179
|
+
|
180
|
+
To apply the Apache License to your work, attach the following
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "{}"
|
182
|
+
replaced with your own identifying information. (Don't include
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
184
|
+
comment syntax for the file format. We also recommend that a
|
185
|
+
file or class name and description of purpose be included on the
|
186
|
+
same "printed page" as the copyright notice for easier
|
187
|
+
identification within third-party archives.
|
188
|
+
|
189
|
+
Copyright {yyyy} {name of copyright owner}
|
190
|
+
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192
|
+
you may not use this file except in compliance with the License.
|
193
|
+
You may obtain a copy of the License at
|
194
|
+
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
196
|
+
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200
|
+
See the License for the specific language governing permissions and
|
201
|
+
limitations under the License.
|
202
|
+
|
data/README.md
ADDED
@@ -0,0 +1,234 @@
|
|
1
|
+
|
2
|
+
marc2linkeddata
|
3
|
+
===============
|
4
|
+
|
5
|
+
Utilities for translating MARC21 into linked data.
|
6
|
+
|
7
|
+
Optional Dependencies
|
8
|
+
|
9
|
+
- http://redis.io/
|
10
|
+
- http://4store.org/
|
11
|
+
- see notes below
|
12
|
+
|
13
|
+
Install
|
14
|
+
|
15
|
+
gem install marc2linkeddata
|
16
|
+
# when a gem is published
|
17
|
+
|
18
|
+
Configure
|
19
|
+
|
20
|
+
- set env values and/or create or modify a .env file
|
21
|
+
- see the .env_example file for details
|
22
|
+
|
23
|
+
Use
|
24
|
+
|
25
|
+
- authority files
|
26
|
+
|
27
|
+
require 'marc2linkeddata'
|
28
|
+
marc_filename = 'stf_auth.01.mrc'
|
29
|
+
marc_file = File.open(marc_filename,'r')
|
30
|
+
until marc_file.eof?
|
31
|
+
leader = ParseMarcAuthority::parse_leader(marc_file)
|
32
|
+
if leader[:type] == 'z'
|
33
|
+
raw = marc_file.read(leader[:length])
|
34
|
+
record = MARC::Reader.decode(raw)
|
35
|
+
auth = ParseMarcAuthority.new(record)
|
36
|
+
auth_id = "auth:#{auth.get_id}"
|
37
|
+
triples = auth.to_ttl
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
Clone
|
42
|
+
|
43
|
+
git clone git@github.com:darrenleeweber/marc2linkeddata.git
|
44
|
+
cd marc2linkeddata
|
45
|
+
./bin/setup.sh
|
46
|
+
./bin/test.sh
|
47
|
+
cp .env_example .env # then edit .env
|
48
|
+
|
49
|
+
Script
|
50
|
+
|
51
|
+
# First configure (see above).
|
52
|
+
|
53
|
+
# Translate a MARC21 authority file to a turtle file.
|
54
|
+
# readMarcAuthority [ authfile1.mrc .. authfileN.mrc ]
|
55
|
+
.binstubs/readMarcAuthority data/auth.01.mrc
|
56
|
+
|
57
|
+
# Check the syntax of the resulting turtle file.
|
58
|
+
rapper -c -i turtle data/auth.01.ttl
|
59
|
+
|
60
|
+
# License
|
61
|
+
|
62
|
+
Copyright 2014 The Board of Trustees of the Leland Stanford Junior University.
|
63
|
+
|
64
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
65
|
+
you may not use this file except in compliance with the License.
|
66
|
+
You may obtain a copy of the License at
|
67
|
+
|
68
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
69
|
+
|
70
|
+
Unless required by applicable law or agreed to in writing, software
|
71
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
72
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
73
|
+
See the License for the specific language governing permissions and
|
74
|
+
limitations under the License.
|
75
|
+
|
76
|
+
|
77
|
+
# Redis
|
78
|
+
|
79
|
+
On Ubuntu, check the system redis is installed and running:
|
80
|
+
|
81
|
+
sudo apt-get install redis-server redis-tools redis-desktop-manager
|
82
|
+
service redis-server status
|
83
|
+
# If necessary:
|
84
|
+
#sudo service redis-server start
|
85
|
+
#sudo service redis-server restart
|
86
|
+
|
87
|
+
Useful during development (use at your own risk):
|
88
|
+
|
89
|
+
redis-cli 'FLUSHALL'
|
90
|
+
|
91
|
+
# 4store
|
92
|
+
|
93
|
+
- http://4store.org/
|
94
|
+
- http://4store.org/trac/wiki/Documentation
|
95
|
+
|
96
|
+
On Ubuntu, check the system 4store is installed and running:
|
97
|
+
|
98
|
+
# installation
|
99
|
+
sudo apt-get install 4store lib4store-dev lib4store0
|
100
|
+
# that should install dependencies, such as:
|
101
|
+
#sudo apt-get install libpcre3-dev
|
102
|
+
#sudo apt-get install libraptor2-dev libraptor2-doc raptor2-utils
|
103
|
+
#sudo apt-get install librasqal3-dev rasqal-utils
|
104
|
+
# service admin
|
105
|
+
sudo service 4store status
|
106
|
+
# If necessary:
|
107
|
+
#sudo service 4store start
|
108
|
+
#sudo service 4store restart
|
109
|
+
|
110
|
+
Build from source
|
111
|
+
|
112
|
+
# assuming 64-bit linux OS (e.g. Ubuntu)
|
113
|
+
# install dependencies, e.g.
|
114
|
+
sudo apt-get install libavahi-common3 libavahi-client3
|
115
|
+
sudo apt-get install libraptor2-dev libraptor2-doc raptor2-utils
|
116
|
+
sudo apt-get install librasqal3-dev librasqal3-doc rasqal-utils
|
117
|
+
sudo apt-get install libpcre3-dev
|
118
|
+
git clone https://github.com/garlik/4store.git
|
119
|
+
cd 4store
|
120
|
+
more docs/INSTALL # out-dated, but read it anyway
|
121
|
+
# also read http://4store.org/trac/wiki/Install
|
122
|
+
sh autogen.sh
|
123
|
+
./configure
|
124
|
+
make
|
125
|
+
sudo make install # installs binaries to /usr/local/bin/4s-*
|
126
|
+
# optional:
|
127
|
+
#make test
|
128
|
+
|
129
|
+
/etc/4store.conf should contain:
|
130
|
+
|
131
|
+
[4s-boss]
|
132
|
+
discovery = default
|
133
|
+
[ld4l]
|
134
|
+
port = 9000
|
135
|
+
unsafe = true
|
136
|
+
|
137
|
+
Run 4s-boss:
|
138
|
+
|
139
|
+
#4s-boss -D # debug mode to verify it works OK
|
140
|
+
# kill the process with CNT-C
|
141
|
+
4s-boss
|
142
|
+
|
143
|
+
When 4s-boss is running, 4s-admin can interact with it.
|
144
|
+
|
145
|
+
4s-admin --help
|
146
|
+
4s-admin list-nodes
|
147
|
+
4s-admin list-stores
|
148
|
+
# See other 4s-* utils, like 4s-size
|
149
|
+
4s-size ld4l
|
150
|
+
|
151
|
+
Create and start a KB store:
|
152
|
+
|
153
|
+
touch /var/log/4store/query-ld4l.log
|
154
|
+
4s-admin create-store ld4l
|
155
|
+
4s-admin start-stores ld4l
|
156
|
+
|
157
|
+
See 4store wiki for additional notes on creating databases at
|
158
|
+
- http://4store.org/trac/wiki/Documentation
|
159
|
+
- http://4store.org/trac/wiki/CreateDatabase
|
160
|
+
|
161
|
+
## Useful Development Commands
|
162
|
+
|
163
|
+
Preliminaries:
|
164
|
+
|
165
|
+
# First shutdown the system 4store service
|
166
|
+
sudo service 4store status
|
167
|
+
sudo service 4store stop
|
168
|
+
# Optional - switch to manual control of 4store service
|
169
|
+
#sudo echo "manual" > /etc/init/4store.override
|
170
|
+
# Start 4s-boss
|
171
|
+
4s-boss
|
172
|
+
|
173
|
+
Routine commands (use at your own risk):
|
174
|
+
|
175
|
+
4s-admin stop-stores ld4l && 4s-admin delete-stores ld4l
|
176
|
+
4s-admin create-store ld4l && 4s-admin start-stores ld4l
|
177
|
+
4s-httpd -D -R -s-1 ld4l
|
178
|
+
# 4s-httpd locks out other processes, like 4s-size.
|
179
|
+
# 4s-httpd options are read from /etc/4store.conf, plus:
|
180
|
+
# -D = debug info
|
181
|
+
# -R = reasoning (query rewriting)
|
182
|
+
# -s -1 = no timeouts
|
183
|
+
|
184
|
+
## Loading Data from the Library of Congress (LOC)
|
185
|
+
|
186
|
+
- Download the LOC data
|
187
|
+
|
188
|
+
cd $YOUR_DOWNLOAD_PATH
|
189
|
+
# when the marc2linkeddata gem is installed,
|
190
|
+
# this script should be available in the path.
|
191
|
+
# The download could take a long time.
|
192
|
+
loc_downloads.sh
|
193
|
+
|
194
|
+
- Add to /etc/4store.conf:
|
195
|
+
|
196
|
+
[loc]
|
197
|
+
port = 9001
|
198
|
+
unsafe = true
|
199
|
+
|
200
|
+
- Create a new KB for the LOC data
|
201
|
+
|
202
|
+
sudo service 4store stop
|
203
|
+
touch /var/log/4store/query-loc.log
|
204
|
+
4s-boss
|
205
|
+
4s-admin create-store loc
|
206
|
+
4s-admin start-stores loc
|
207
|
+
4s-admin list-stores
|
208
|
+
|
209
|
+
- Import the LOC data into 4store
|
210
|
+
|
211
|
+
cd $YOUR_DOWNLOAD_PATH
|
212
|
+
# when the marc2linkeddata gem is installed,
|
213
|
+
# this script should be available in the path.
|
214
|
+
# The import could take a long time.
|
215
|
+
loc_4store_import.sh
|
216
|
+
|
217
|
+
- Run the 4s-httpd server for the LOC KB
|
218
|
+
|
219
|
+
4s-httpd -D -R -s-1 loc
|
220
|
+
# 4s-httpd locks out other processes, like 4s-size.
|
221
|
+
# 4s-httpd options are read from /etc/4store.conf, plus:
|
222
|
+
# -D = debug info
|
223
|
+
# -R = reasoning (query rewriting)
|
224
|
+
# -s -1 = no timeouts
|
225
|
+
|
226
|
+
- Configure marc2linkeddata to use this KB
|
227
|
+
|
228
|
+
# TODO, but it should result in something like this:
|
229
|
+
# repo = RDF::FourStore::Repository.new('http://localhost:9001')
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
|
data/bin/console
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# LOC downloads are available at http://id.loc.gov/download/
|
4
|
+
|
5
|
+
if [ ! -s authoritiesnames_madsrdf.nt ]; then
|
6
|
+
wget -c http://id.loc.gov/static/data/authoritiesnames.nt.madsrdf.gz
|
7
|
+
gunzip authoritiesnames.nt.madsrdf.gz
|
8
|
+
mv authoritiesnames.nt.madsrdf authoritiesnames_madsrdf.nt
|
9
|
+
fi
|
10
|
+
|
11
|
+
wget -c http://id.loc.gov/static/data/authoritiessubjects.nt.madsrdf.zip
|
12
|
+
unzip -o authoritiessubjects.nt.madsrdf.zip
|
13
|
+
# created subjects-madsrdf-20140306.nt
|
14
|
+
|
15
|
+
# Skipping skos because most of the data is in madsrdf.
|
16
|
+
#if [ ! -s authoritiesnames_skos.nt ]; then
|
17
|
+
# wget -c http://id.loc.gov/static/data/authoritiesnames.nt.skos.gz
|
18
|
+
# gunzip authoritiesnames.nt.skos.gz
|
19
|
+
# mv authoritiesnames.nt.skos authoritiesnames_skos.nt
|
20
|
+
#fi
|
21
|
+
|
22
|
+
# Skipping skos because most of the data is in madsrdf.
|
23
|
+
#wget -c http://id.loc.gov/static/data/authoritiessubjects.nt.skos.zip
|
24
|
+
#unzip -o authoritiessubjects.nt.skos.zip
|
25
|
+
|
26
|
+
wget -c http://id.loc.gov/static/data/authoritieschildrensSubjects.nt.zip
|
27
|
+
unzip -o authoritieschildrensSubjects.nt.zip
|
28
|
+
|
29
|
+
wget -c http://id.loc.gov/static/data/authoritiesgenreForms.nt.zip
|
30
|
+
unzip -o authoritiesgenreForms.nt.zip
|
31
|
+
|
32
|
+
wget -c http://id.loc.gov/static/data/authoritiesperformanceMediums.nt.zip
|
33
|
+
unzip -o authoritiesperformanceMediums.nt.zip
|
34
|
+
|
35
|
+
wget -c http://id.loc.gov/static/data/vocabularycountries.nt.zip
|
36
|
+
unzip -o vocabularycountries.nt.zip
|
37
|
+
|
38
|
+
wget -c http://id.loc.gov/static/data/vocabularyethnographicTerms.nt.zip
|
39
|
+
unzip -o vocabularyethnographicTerms.nt.zip
|
40
|
+
|
41
|
+
wget -c http://id.loc.gov/static/data/vocabularygeographicAreas.nt.zip
|
42
|
+
unzip -o vocabularygeographicAreas.nt.zip
|
43
|
+
|
44
|
+
wget -c http://id.loc.gov/static/data/vocabularygraphicMaterials.nt.zip
|
45
|
+
unzip -o vocabularygraphicMaterials.nt.zip
|
46
|
+
|
47
|
+
wget -c http://id.loc.gov/static/data/vocabularyiso639-1.nt.zip
|
48
|
+
unzip -o vocabularyiso639-1.nt.zip
|
49
|
+
wget -c http://id.loc.gov/static/data/vocabularyiso639-2.nt.zip
|
50
|
+
unzip -o vocabularyiso639-2.nt.zip
|
51
|
+
wget -c http://id.loc.gov/static/data/vocabularyiso639-5.nt.zip
|
52
|
+
unzip -o vocabularyiso639-5.nt.zip
|
53
|
+
|
54
|
+
wget -c http://id.loc.gov/static/data/vocabularylanguages.nt.zip
|
55
|
+
unzip -o vocabularylanguages.nt.zip
|
56
|
+
|
57
|
+
wget -c http://id.loc.gov/static/data/vocabularyorganizations.nt.zip
|
58
|
+
unzip -o vocabularyorganizations.nt.zip
|
59
|
+
|
60
|
+
wget -c http://id.loc.gov/static/data/vocabularyrelators.nt.zip
|
61
|
+
unzip -o vocabularyrelators.nt.zip
|
62
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# See loc_downloads.sh to get the data
|
4
|
+
# This script assumes the data are ntriples in *.nt files.
|
5
|
+
|
6
|
+
# Check that a 4store KB is available and running. This
|
7
|
+
# script assumes that the KB is called 'loc'.
|
8
|
+
4s-admin list-stores | grep -q -E 'loc.*available.*running'
|
9
|
+
if [ $? -eq 0 ]; then
|
10
|
+
echo "4store KB 'loc' is available and running."
|
11
|
+
files=$(find ./ -name '*.nt')
|
12
|
+
for f in ${files}; do
|
13
|
+
filesize=$(du -h "$f" | cut -f1)
|
14
|
+
echo "$filesize" | grep -q -F 'G'
|
15
|
+
if [ $? -eq 0 ]; then
|
16
|
+
echo "Running 4s-import for $f ($filesize); this could take some time ..."
|
17
|
+
else
|
18
|
+
echo "Running 4s-import for $f ($filesize)"
|
19
|
+
fi
|
20
|
+
# Usage: 4s-import <kbname> <rdf file/URI> ...
|
21
|
+
4s-import -f ntriples loc $f
|
22
|
+
done
|
23
|
+
fi
|
24
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# See loc_downloads.sh to get the data
|
4
|
+
# This script assumes the data are ntriples in *.nt files.
|
5
|
+
|
6
|
+
# For reference, note that there is an allegrograph ruby gem, see
|
7
|
+
# https://github.com/emk/rdf-agraph
|
8
|
+
|
9
|
+
files=$(find ./ -name '*.nt')
|
10
|
+
for f in ${files}; do
|
11
|
+
filesize=$(du -h "$f" | cut -f1)
|
12
|
+
echo "$filesize" | grep -q -F 'G'
|
13
|
+
if [ $? -eq 0 ]; then
|
14
|
+
echo "Running import for $f ($filesize); this could take some time ..."
|
15
|
+
else
|
16
|
+
echo "Running import for $f ($filesize)"
|
17
|
+
fi
|
18
|
+
# Usage: agload <kbname> <rdf files> ...
|
19
|
+
# TODO: add option for skipping errors?
|
20
|
+
agload -d delete-spo -i ntriples --port 8080 --bulk --rapper loc $f
|
21
|
+
done
|
22
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# See loc_downloads.sh to get the data
|
4
|
+
# This script assumes the data are ntriples in *.nt files.
|
5
|
+
|
6
|
+
options="-host localhost -port 8049 -username $ML_USER -password $ML_PASS -mode local -input_file_type rdf "
|
7
|
+
|
8
|
+
files=$(find ./ -name '*.nt')
|
9
|
+
for f in ${files}; do
|
10
|
+
filesize=$(du -h "$f" | cut -f1)
|
11
|
+
echo "$filesize" | grep -q -F 'G'
|
12
|
+
if [ $? -eq 0 ]; then
|
13
|
+
echo "Running import for $f ($filesize); this could take some time ..."
|
14
|
+
else
|
15
|
+
echo "Running import for $f ($filesize)"
|
16
|
+
fi
|
17
|
+
/opt/MarkLogic/mlcp/bin/mlcp.sh import $options -input_file_path $f
|
18
|
+
done
|
19
|
+
|