marc2linkeddata 0.0.7 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.env_example +52 -27
- data/README.md +33 -2
- data/bin/marcAuthority2LD +131 -40
- data/bin/run_test_data.sh +68 -0
- data/lib/includes.rb +44 -0
- data/lib/marc2linkeddata.rb +1 -29
- data/lib/marc2linkeddata/cap.rb +15 -0
- data/lib/marc2linkeddata/cap_db.rb +44 -0
- data/lib/marc2linkeddata/configuration.rb +5 -0
- data/lib/marc2linkeddata/loc.rb +5 -1
- data/lib/marc2linkeddata/parseMarcAuthority.rb +333 -89
- data/lib/marc2linkeddata/sparql.rb +4 -37
- data/lib/marc2linkeddata/sparql_dbpedia.rb +22 -0
- data/lib/marc2linkeddata/sparql_local_loc.rb +29 -0
- data/lib/marc2linkeddata/sparql_pubmed.rb +2 -4
- data/log/.gitignore +4 -0
- data/marc2linkeddata.gemspec +11 -2
- metadata +67 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9539a926fe1d42b3827b55c4e44245859f0c1e2
|
4
|
+
data.tar.gz: 048690d375535f681bf518af81639d6b6dca4af4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d42f2629e02882f94e60ceb1fe8503766d88c2cd108b0e57fb87d8704663f6a693667d098832d4c793e5e3ff285b04b3541d297d989529e63782dfbae5289a8
|
7
|
+
data.tar.gz: fa4fe996e40a4de2cb0bffab698dc76c843612ab59055c498b0a99af4f6d50a05156c0e715bcbfd074643e04ff0578687e555e3753eeb6990417f2f246ed11ab
|
data/.env_example
CHANGED
@@ -1,46 +1,71 @@
|
|
1
1
|
# https://github.com/bkeepers/dotenv is used for
|
2
2
|
# default configuration options. The values in
|
3
3
|
# this file do not replace existing values in
|
4
|
-
# the shell ENV
|
4
|
+
# the shell ENV, but these settings will be in
|
5
|
+
# effect when the shell ENV doesn't contain them.
|
6
|
+
# To add these settings to the shell ENV, use
|
7
|
+
# $ source .env
|
8
|
+
# To override individual settings in this file, just
|
9
|
+
# set them in the shell ENV before running a script.
|
5
10
|
|
6
11
|
# Uncomment and set values as required. See used settings in
|
7
12
|
# lib/marc2linkeddata/configuration.rb
|
8
13
|
|
9
|
-
DEBUG
|
14
|
+
export DEBUG=false
|
15
|
+
|
16
|
+
export LOG_FILE='marc2ld.log'
|
17
|
+
export LIB_PREFIX=http://linked-data.example.org/library/
|
18
|
+
|
19
|
+
# Without any options enabled to GET_* via HTTP, the process
|
20
|
+
# is largely file IO bound, rather than CPU bound. In this
|
21
|
+
# case, threading can decrease performance. However, when any
|
22
|
+
# GET_* options are enabled for retrieval of RDF over HTTP,
|
23
|
+
# threading may improve performance.
|
24
|
+
export THREADS=true
|
25
|
+
export THREAD_LIMIT=25
|
10
26
|
|
11
27
|
# Authority record field numbers for useful link data
|
12
|
-
FIELD_AUTH_LOC
|
13
|
-
FIELD_AUTH_VIAF
|
14
|
-
FIELD_AUTH_ISNI
|
15
|
-
FIELD_AUTH_OCLC
|
28
|
+
export FIELD_AUTH_LOC=920
|
29
|
+
export FIELD_AUTH_VIAF=921
|
30
|
+
export FIELD_AUTH_ISNI=922
|
31
|
+
export FIELD_AUTH_OCLC=035
|
16
32
|
|
17
33
|
# Options for retrieving linked data to resolve and enhance data.
|
18
|
-
# Set all false for the quickest translation.
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
34
|
+
# Set all false for the quickest translation. Note that having an
|
35
|
+
# LOC identifier (MARC record number may be sufficient), is usually
|
36
|
+
# a prelude to getting additional linked data. If any of these are
|
37
|
+
# enabled, it may require GET_LOC=true to have greater success.
|
38
|
+
export GET_LOC=false
|
39
|
+
export GET_ISNI=false
|
40
|
+
export GET_OCLC=false
|
41
|
+
export GET_VIAF=false
|
23
42
|
|
24
43
|
# Using OCLC identity, retrieve RDF for creative works?
|
25
44
|
# Only works when GET_OCLC==true; it can slow processing significantly.
|
26
|
-
OCLC_AUTH2WORKS
|
27
|
-
|
28
|
-
LOG_FILE: 'marc2ld.log'
|
29
|
-
|
30
|
-
LIB_PREFIX: http://linked-data.example.org/library/
|
45
|
+
export OCLC_AUTH2WORKS=false
|
31
46
|
|
32
47
|
# Use FOAF or SCHEMA or both
|
33
|
-
USE_FOAF
|
34
|
-
USE_SCHEMA
|
48
|
+
export USE_FOAF=false
|
49
|
+
export USE_SCHEMA=true
|
35
50
|
|
36
51
|
# Local triple store for LOC authority data,
|
37
52
|
# accessed via an HTTP API with basic authentication.
|
38
53
|
# See downloads at http://id.loc.gov/download/
|
39
|
-
LOCAL_LOC_USER
|
40
|
-
LOCAL_LOC_PASS
|
41
|
-
LOCAL_LOC_HOST
|
42
|
-
LOCAL_LOC_PORT
|
43
|
-
LOCAL_LOC_PATH
|
54
|
+
export LOCAL_LOC_USER='sparqlUser'
|
55
|
+
export LOCAL_LOC_PASS='sparqlPass'
|
56
|
+
export LOCAL_LOC_HOST='dev-sparql.example.org'
|
57
|
+
export LOCAL_LOC_PORT='80'
|
58
|
+
export LOCAL_LOC_PATH='/sparql?'
|
59
|
+
|
60
|
+
# SUL-CAP resources
|
61
|
+
# May require ssh port forwarding, e.g.:
|
62
|
+
# ssh ${USER}@cap-mysql-host.example.com -L 3308:localhost:3306 -N &
|
63
|
+
export SUL_CAP_ENABLED=false
|
64
|
+
export SUL_CAP_DB_HOST=localhost
|
65
|
+
export SUL_CAP_DB_PORT=3306
|
66
|
+
export SUL_CAP_DB_USER=capUser
|
67
|
+
export SUL_CAP_DB_PASSWORD=capPass
|
68
|
+
export SUL_CAP_DB_DATABASE=cap
|
44
69
|
|
45
70
|
# Redis Persistence - based on https://github.com/redis/redis-rb
|
46
71
|
# - essential options:
|
@@ -52,11 +77,11 @@ LOCAL_LOC_PATH: '/sparql?'
|
|
52
77
|
# # faster reading of triples from pre-populated redis data
|
53
78
|
# export REDIS_WRITE=true # enable redis writes (default = REDIS4MARC || false)
|
54
79
|
# # current data is updated in redis
|
55
|
-
REDIS4MARC
|
80
|
+
export REDIS4MARC=false
|
56
81
|
# Uncomment these options to disable read or write (independently)
|
57
|
-
#REDIS_READ
|
58
|
-
#REDIS_WRITE
|
82
|
+
#export REDIS_READ=false
|
83
|
+
#export REDIS_WRITE=false
|
59
84
|
# Leave commented to use default redis configs on localhost
|
60
|
-
#REDIS_URL
|
85
|
+
#export REDIS_URL=localhost
|
61
86
|
|
62
87
|
|
data/README.md
CHANGED
@@ -3,13 +3,44 @@ marc2linkeddata
|
|
3
3
|
===============
|
4
4
|
|
5
5
|
Utilities for translating MARC21 into linked data. The project has
|
6
|
-
focused on authority records (as of
|
6
|
+
focused on authority records (as of 2015).
|
7
|
+
|
8
|
+
It has config options that can be enabled to increase the amount of data retrieved.
|
9
|
+
Without any HTTP options enabled, using only data in the MARC record, it can
|
10
|
+
translate 100,000 authority records in about 5-6 min on a current laptop system.
|
11
|
+
File IO is the most expensive operation in this mode, so it helps to have a solid
|
12
|
+
state drive or something with high IO performance.
|
13
|
+
|
14
|
+
The current output is to the file system, but it should be easy to incorporate
|
15
|
+
and configure alternatives by using the RDF.rb facilities for connecting to a
|
16
|
+
repository. A minor attempt was explored to use redis for caching, but that
|
17
|
+
exploration hasn't matured much, mainly because there is no 'cache-expiry' data
|
18
|
+
yet and because it would be better to use an RDF.rb extension of some
|
19
|
+
kind (for redis, mongodb, etc) or to use a triple store/solr platform.
|
20
|
+
|
21
|
+
With HTTP/RDF retrieval options enabled, it can take a lot longer (days) and the
|
22
|
+
providers may not be very happy about a barrage of requests.
|
23
|
+
|
24
|
+
Note that it runs a lot slower on jruby-9.0.0.0-pre1 than MRI 2.2.0, whether threads
|
25
|
+
are enabled or not. It raises exceptions on jruby-1.7.9, related to ruby
|
26
|
+
language support (such as Array#delete_if).
|
27
|
+
|
28
|
+
TODO: A significant problem to solve is effective caching or mirrors for linked data.
|
29
|
+
The retrieval should inspect any HTTP cache headers that might be available and
|
30
|
+
adding PROVO to the linked-data graph generated for each record.
|
31
|
+
|
32
|
+
TODO: Provide system platform options, to dockerize the application and make it easier
|
33
|
+
for automatic horizontal scaling. Consider https://www.packer.io/intro/index.html
|
7
34
|
|
8
35
|
Optional Dependencies
|
9
36
|
|
10
|
-
- http://redis.io/
|
11
37
|
- http://4store.org/
|
38
|
+
- http://www.mongodb.org/
|
39
|
+
- http://redis.io/
|
12
40
|
- see notes below
|
41
|
+
- see also:
|
42
|
+
- http://marmotta.apache.org
|
43
|
+
- http://stardog.com
|
13
44
|
|
14
45
|
Install
|
15
46
|
|
data/bin/marcAuthority2LD
CHANGED
@@ -4,6 +4,17 @@ require 'marc2linkeddata'
|
|
4
4
|
|
5
5
|
CONFIG = Marc2LinkedData.configuration
|
6
6
|
|
7
|
+
def stack_trace(e, record)
|
8
|
+
$stderr.write "\n"
|
9
|
+
$stderr.write "ERROR\n"
|
10
|
+
$stderr.write e.message
|
11
|
+
$stderr.write e.backtrace
|
12
|
+
$stderr.write "\n"
|
13
|
+
$stderr.write record.to_s
|
14
|
+
$stderr.write "\n"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Count all the records in the MARC file.
|
7
18
|
def marc_auth_count(marc_file)
|
8
19
|
auth_records = 0
|
9
20
|
until marc_file.eof?
|
@@ -12,11 +23,7 @@ def marc_auth_count(marc_file)
|
|
12
23
|
marc_file.seek(leader[:length], IO::SEEK_CUR)
|
13
24
|
auth_records += 1 if leader[:type] == 'z'
|
14
25
|
rescue => e
|
15
|
-
|
16
|
-
puts 'ERROR'
|
17
|
-
puts e.message
|
18
|
-
puts e.backtrace
|
19
|
-
puts
|
26
|
+
stack_trace(e, record)
|
20
27
|
binding.pry if CONFIG.debug
|
21
28
|
end
|
22
29
|
end
|
@@ -24,7 +31,76 @@ def marc_auth_count(marc_file)
|
|
24
31
|
auth_records
|
25
32
|
end
|
26
33
|
|
27
|
-
|
34
|
+
# Memory intensive loading of all authority records in the MARC file.
|
35
|
+
def marc_authority_records(marc_filename)
|
36
|
+
puts "Reading records from: #{marc_filename}"
|
37
|
+
marc_file = File.open(marc_filename,'r')
|
38
|
+
auth_count = 0
|
39
|
+
auth_records = []
|
40
|
+
until marc_file.eof?
|
41
|
+
begin
|
42
|
+
leader = Marc2LinkedData::ParseMarcAuthority::parse_leader(marc_file)
|
43
|
+
raw = marc_file.read(leader[:length])
|
44
|
+
if leader[:type] == 'z'
|
45
|
+
record = MARC::Reader.decode(raw)
|
46
|
+
auth_records << record
|
47
|
+
auth_count += 1
|
48
|
+
$stdout.printf "\b\b\b\b\b\b" if auth_count > 1
|
49
|
+
$stdout.printf '%06d', auth_count
|
50
|
+
end
|
51
|
+
rescue => e
|
52
|
+
stack_trace(e, record)
|
53
|
+
binding.pry if CONFIG.debug
|
54
|
+
end
|
55
|
+
end
|
56
|
+
marc_file.close
|
57
|
+
$stdout.write "\n"
|
58
|
+
auth_records
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
def auth_record_cache(auth)
|
63
|
+
|
64
|
+
# auth_id = "auth:#{auth.get_id}"
|
65
|
+
# triples = nil
|
66
|
+
# # TODO: enable additional persistence options
|
67
|
+
# # Use data already in redis (if enabled)
|
68
|
+
# triples = CONFIG.redis.get(auth_id) if CONFIG.redis_read
|
69
|
+
# if triples.nil?
|
70
|
+
# triples = auth.to_ttl # generate new triples
|
71
|
+
# # Update redis (if enabled) for triples not read from redis
|
72
|
+
# CONFIG.redis.set(auth_id, triples) if CONFIG.redis_write
|
73
|
+
# end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
def marc_record2turtle(record, output_path=nil)
|
80
|
+
begin
|
81
|
+
# ParseMarcAuthority is a lazy parser, so
|
82
|
+
# init only assigns record to an instance var.
|
83
|
+
auth = Marc2LinkedData::ParseMarcAuthority.new(record)
|
84
|
+
auth_record_cache(auth)
|
85
|
+
triples = auth.to_ttl.lines
|
86
|
+
binding.pry if (CONFIG.debug && triples.empty?)
|
87
|
+
triples.delete_if {|l| l.chomp.empty? }
|
88
|
+
# Output the triples to a turtle file.
|
89
|
+
ld_filename = File.join(output_path, "auth_#{auth.get_id}.ttl")
|
90
|
+
CONFIG.logger.info "Writing triples in turtle to #{ld_filename}"
|
91
|
+
ld_file = File.open(ld_filename,'w')
|
92
|
+
ld_file.write(triples.join)
|
93
|
+
ld_file.flush
|
94
|
+
ld_file.close
|
95
|
+
rescue => e
|
96
|
+
stack_trace(e, record)
|
97
|
+
binding.pry if CONFIG.debug
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
def marc_file2ld(marc_filename)
|
28
104
|
ld_filename = marc_filename.gsub('.mrc','.ttl')
|
29
105
|
puts "Translating: #{marc_filename} to #{ld_filename}"
|
30
106
|
ld_file = File.open(ld_filename,'w')
|
@@ -44,17 +120,7 @@ def marc2ld(marc_filename)
|
|
44
120
|
# init only assigns record to an instance var.
|
45
121
|
auth = Marc2LinkedData::ParseMarcAuthority.new(record)
|
46
122
|
auth_count += 1
|
47
|
-
|
48
|
-
# triples = nil
|
49
|
-
# # TODO: enable additional persistence options
|
50
|
-
# # Use data already in redis (if enabled)
|
51
|
-
# triples = CONFIG.redis.get(auth_id) if CONFIG.redis_read
|
52
|
-
# if triples.nil?
|
53
|
-
# triples = auth.to_ttl # generate new triples
|
54
|
-
# # Update redis (if enabled) for triples not read from redis
|
55
|
-
# CONFIG.redis.set(auth_id, triples) if CONFIG.redis_write
|
56
|
-
# end
|
57
|
-
|
123
|
+
auth_record_cache(auth)
|
58
124
|
triples = auth.to_ttl.lines
|
59
125
|
binding.pry if (CONFIG.debug && triples.empty?)
|
60
126
|
triples.delete_if {|l| l.chomp.empty? }
|
@@ -63,12 +129,7 @@ def marc2ld(marc_filename)
|
|
63
129
|
ld_file.flush
|
64
130
|
end
|
65
131
|
rescue => e
|
66
|
-
|
67
|
-
puts 'ERROR'
|
68
|
-
puts e.message
|
69
|
-
puts e.backtrace
|
70
|
-
puts record.to_s
|
71
|
-
puts
|
132
|
+
stack_trace(e, record)
|
72
133
|
binding.pry if CONFIG.debug
|
73
134
|
end
|
74
135
|
end
|
@@ -77,38 +138,68 @@ def marc2ld(marc_filename)
|
|
77
138
|
ld_file.close
|
78
139
|
end
|
79
140
|
|
141
|
+
def thread_wait(threads)
|
142
|
+
threads.each{|t| t.join}
|
143
|
+
threads.delete_if {|t| t.status == false}
|
144
|
+
threads.delete_if {|t| t.status.nil? }
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
# ---------------------------------------------------------------------
|
149
|
+
# MAIN
|
150
|
+
|
151
|
+
|
80
152
|
marc_files = []
|
81
153
|
ARGV.each do |filename|
|
82
154
|
path = Pathname(filename)
|
83
155
|
marc_files.push(path) if path.exist?
|
84
156
|
end
|
85
157
|
if marc_files.empty?
|
158
|
+
script_name = File.basename(__FILE__)
|
159
|
+
script_path = File.dirname(__FILE__)
|
160
|
+
example_env_file = File.absolute_path(File.join(script_path,'..','.env_example'))
|
86
161
|
puts <<HELP
|
87
|
-
#{
|
88
|
-
|
89
|
-
Output is RDF triples in
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
export REDIS_URL="redis://{user}:{password}@{host}:{port}/{db}"
|
98
|
-
export REDIS_READ=true # enable redis reads (default = REDIS4MARC || false)
|
99
|
-
# faster reading of triples from pre-populated redis data
|
100
|
-
export REDIS_WRITE=true # enable redis writes (default = REDIS4MARC || false)
|
101
|
-
# recent data is updated in redis
|
162
|
+
#{script_name} marc_authority_file1.mrc [ marc_authority_file2.mrc .. marc_authority_fileN.mrc ]
|
163
|
+
|
164
|
+
Output is RDF triples in turtle files (.ttl). The files are output into a
|
165
|
+
directory created in the same path as the .mrc file,
|
166
|
+
with one .ttl file for each record.
|
167
|
+
|
168
|
+
Optional configuration can be set in environment variables. A '.env' file can be
|
169
|
+
created in the path where this utility is run and this utility will use it. See
|
170
|
+
comments and settings in the example file at:
|
171
|
+
#{example_env_file}
|
102
172
|
|
103
173
|
HELP
|
104
174
|
exit!
|
105
|
-
else
|
106
175
|
end
|
107
176
|
|
108
177
|
puts "Logging to: #{CONFIG.log_file}"
|
109
178
|
marc_files.each do |path|
|
110
179
|
CONFIG.logger.info "Processing: #{path}"
|
111
|
-
|
180
|
+
# marc_file2ld(path.to_s)
|
181
|
+
output_dir = path.basename.to_s.gsub('.mrc','').gsub('.','_') + '_turtle'
|
182
|
+
output_path = File.join(path.dirname.to_s, output_dir)
|
183
|
+
Dir.mkdir(output_path, 0775) unless File.directory? output_path
|
184
|
+
auth_records = marc_authority_records(path.to_s)
|
185
|
+
progress = ProgressBar.create(:total => auth_records.length, :format => '%a %f |%b>>%i| %P%% %t')
|
186
|
+
if CONFIG.threads
|
187
|
+
threads = []
|
188
|
+
auth_records.each do |r|
|
189
|
+
thread_wait(threads) while threads.length >= CONFIG.thread_limit
|
190
|
+
t = Thread.new { marc_record2turtle(r, output_path) }
|
191
|
+
t.abort_on_exception = true
|
192
|
+
threads << t
|
193
|
+
progress.increment # increment progress although thread is still running
|
194
|
+
end
|
195
|
+
# Ensure any remaining threads complete
|
196
|
+
threads.each{|t| t.join}
|
197
|
+
else
|
198
|
+
auth_records.each do |r|
|
199
|
+
marc_record2turtle(r, output_path)
|
200
|
+
progress.increment
|
201
|
+
end
|
202
|
+
end
|
112
203
|
end
|
113
204
|
|
114
205
|
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
export DEBUG=false
|
4
|
+
|
5
|
+
# Runs a lot slower on jruby, even with threading enabled.
|
6
|
+
export JRUBY_OPTS=-J-Xmx2g
|
7
|
+
|
8
|
+
export THREAD=false
|
9
|
+
export GET_LOC=false # if this is true, be prepared to wait a very long time!
|
10
|
+
|
11
|
+
export LOG_FILE='./log/marc2ld.log'
|
12
|
+
export LIB_PREFIX='http://www.linked-data.org/library/'
|
13
|
+
|
14
|
+
# Additional config options should be in .env;
|
15
|
+
# the .env values will not replace those above.
|
16
|
+
if [ ! -s .env ]; then
|
17
|
+
cp -u .env_example .env
|
18
|
+
fi
|
19
|
+
|
20
|
+
SCRIPT_FILE='.binstubs/marcAuthority2LD'
|
21
|
+
if [ ! -s ${SCRIPT_FILE} ]; then
|
22
|
+
echo "Cannot locate script: $SCRIPT_FILE"
|
23
|
+
exit 1
|
24
|
+
fi
|
25
|
+
|
26
|
+
AUTH_FILE="./data/auth.mrc"
|
27
|
+
AUTH_PATH="./data/auth_turtle/"
|
28
|
+
if [ ! -s ${AUTH_FILE} ]; then
|
29
|
+
echo "Place a MARC21 authority file into: $AUTH_FILE"
|
30
|
+
exit 1
|
31
|
+
fi
|
32
|
+
|
33
|
+
${SCRIPT_FILE} ${AUTH_FILE}
|
34
|
+
|
35
|
+
DATA_LOG_FILE="./log/run_test_data.log"
|
36
|
+
echo -e "\n\n" > ${DATA_LOG_FILE}
|
37
|
+
|
38
|
+
echo -e "Output file count should be 100000:" >> ${DATA_LOG_FILE}
|
39
|
+
find ${AUTH_PATH} -type f | wc -l >> ${DATA_LOG_FILE}
|
40
|
+
echo -e "\n\n" > ${DATA_LOG_FILE}
|
41
|
+
|
42
|
+
# count all the different types of authority files
|
43
|
+
echo -e "Different types of authority records:\n" >> ${DATA_LOG_FILE}
|
44
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | sed -e 's/^.*> a/a/' | sort -u >> ${DATA_LOG_FILE}
|
45
|
+
echo -e "\n\n" > ${DATA_LOG_FILE}
|
46
|
+
|
47
|
+
echo -e "Count for 'Person' authority records:" >> ${DATA_LOG_FILE}
|
48
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'Person' >> ${DATA_LOG_FILE}
|
49
|
+
echo -e "Count for 'Organization' authority records:" >> ${DATA_LOG_FILE}
|
50
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'Organization' >> ${DATA_LOG_FILE}
|
51
|
+
echo -e "Count for 'Place' authority records:" >> ${DATA_LOG_FILE}
|
52
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'Place' >> ${DATA_LOG_FILE}
|
53
|
+
echo -e "Count for 'event' authority records:" >> ${DATA_LOG_FILE}
|
54
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'event' >> ${DATA_LOG_FILE}
|
55
|
+
echo -e "Count for 'v1#NameTitle' authority records:" >> ${DATA_LOG_FILE}
|
56
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'v1#NameTitle' >> ${DATA_LOG_FILE}
|
57
|
+
echo -e "Count for 'v1#Title' authority records:" >> ${DATA_LOG_FILE}
|
58
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'v1#Title' >> ${DATA_LOG_FILE}
|
59
|
+
|
60
|
+
# # check the syntax of the output files
|
61
|
+
# echo -e "\n\n\n" >> ${DATA_LOG_FILE}
|
62
|
+
# for f in $(find ${AUTH_PATH} -type f); do
|
63
|
+
# rapper -c -i turtle $f >> ${DATA_LOG_FILE} 2>&1
|
64
|
+
# done
|
65
|
+
|
66
|
+
# cleanup
|
67
|
+
rm -rf ${AUTH_PATH}
|
68
|
+
|