marc2linkeddata 0.0.7 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env_example +52 -27
- data/README.md +33 -2
- data/bin/marcAuthority2LD +131 -40
- data/bin/run_test_data.sh +68 -0
- data/lib/includes.rb +44 -0
- data/lib/marc2linkeddata.rb +1 -29
- data/lib/marc2linkeddata/cap.rb +15 -0
- data/lib/marc2linkeddata/cap_db.rb +44 -0
- data/lib/marc2linkeddata/configuration.rb +5 -0
- data/lib/marc2linkeddata/loc.rb +5 -1
- data/lib/marc2linkeddata/parseMarcAuthority.rb +333 -89
- data/lib/marc2linkeddata/sparql.rb +4 -37
- data/lib/marc2linkeddata/sparql_dbpedia.rb +22 -0
- data/lib/marc2linkeddata/sparql_local_loc.rb +29 -0
- data/lib/marc2linkeddata/sparql_pubmed.rb +2 -4
- data/log/.gitignore +4 -0
- data/marc2linkeddata.gemspec +11 -2
- metadata +67 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9539a926fe1d42b3827b55c4e44245859f0c1e2
|
4
|
+
data.tar.gz: 048690d375535f681bf518af81639d6b6dca4af4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d42f2629e02882f94e60ceb1fe8503766d88c2cd108b0e57fb87d8704663f6a693667d098832d4c793e5e3ff285b04b3541d297d989529e63782dfbae5289a8
|
7
|
+
data.tar.gz: fa4fe996e40a4de2cb0bffab698dc76c843612ab59055c498b0a99af4f6d50a05156c0e715bcbfd074643e04ff0578687e555e3753eeb6990417f2f246ed11ab
|
data/.env_example
CHANGED
@@ -1,46 +1,71 @@
|
|
1
1
|
# https://github.com/bkeepers/dotenv is used for
|
2
2
|
# default configuration options. The values in
|
3
3
|
# this file do not replace existing values in
|
4
|
-
# the shell ENV
|
4
|
+
# the shell ENV, but these settings will be in
|
5
|
+
# effect when the shell ENV doesn't contain them.
|
6
|
+
# To add these settings to the shell ENV, use
|
7
|
+
# $ source .env
|
8
|
+
# To override individual settings in this file, just
|
9
|
+
# set them in the shell ENV before running a script.
|
5
10
|
|
6
11
|
# Uncomment and set values as required. See used settings in
|
7
12
|
# lib/marc2linkeddata/configuration.rb
|
8
13
|
|
9
|
-
DEBUG
|
14
|
+
export DEBUG=false
|
15
|
+
|
16
|
+
export LOG_FILE='marc2ld.log'
|
17
|
+
export LIB_PREFIX=http://linked-data.example.org/library/
|
18
|
+
|
19
|
+
# Without any options enabled to GET_* via HTTP, the process
|
20
|
+
# is largely file IO bound, rather than CPU bound. In this
|
21
|
+
# case, threading can decrease performance. However, when any
|
22
|
+
# GET_* options are enabled for retrieval of RDF over HTTP,
|
23
|
+
# threading may improve performance.
|
24
|
+
export THREADS=true
|
25
|
+
export THREAD_LIMIT=25
|
10
26
|
|
11
27
|
# Authority record field numbers for useful link data
|
12
|
-
FIELD_AUTH_LOC
|
13
|
-
FIELD_AUTH_VIAF
|
14
|
-
FIELD_AUTH_ISNI
|
15
|
-
FIELD_AUTH_OCLC
|
28
|
+
export FIELD_AUTH_LOC=920
|
29
|
+
export FIELD_AUTH_VIAF=921
|
30
|
+
export FIELD_AUTH_ISNI=922
|
31
|
+
export FIELD_AUTH_OCLC=035
|
16
32
|
|
17
33
|
# Options for retrieving linked data to resolve and enhance data.
|
18
|
-
# Set all false for the quickest translation.
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
34
|
+
# Set all false for the quickest translation. Note that having an
|
35
|
+
# LOC identifier (MARC record number may be sufficient), is usually
|
36
|
+
# a prelude to getting additional linked data. If any of these are
|
37
|
+
# enabled, it may require GET_LOC=true to have greater success.
|
38
|
+
export GET_LOC=false
|
39
|
+
export GET_ISNI=false
|
40
|
+
export GET_OCLC=false
|
41
|
+
export GET_VIAF=false
|
23
42
|
|
24
43
|
# Using OCLC identity, retrieve RDF for creative works?
|
25
44
|
# Only works when GET_OCLC==true; it can slow processing significantly.
|
26
|
-
OCLC_AUTH2WORKS
|
27
|
-
|
28
|
-
LOG_FILE: 'marc2ld.log'
|
29
|
-
|
30
|
-
LIB_PREFIX: http://linked-data.example.org/library/
|
45
|
+
export OCLC_AUTH2WORKS=false
|
31
46
|
|
32
47
|
# Use FOAF or SCHEMA or both
|
33
|
-
USE_FOAF
|
34
|
-
USE_SCHEMA
|
48
|
+
export USE_FOAF=false
|
49
|
+
export USE_SCHEMA=true
|
35
50
|
|
36
51
|
# Local triple store for LOC authority data,
|
37
52
|
# accessed via an HTTP API with basic authentication.
|
38
53
|
# See downloads at http://id.loc.gov/download/
|
39
|
-
LOCAL_LOC_USER
|
40
|
-
LOCAL_LOC_PASS
|
41
|
-
LOCAL_LOC_HOST
|
42
|
-
LOCAL_LOC_PORT
|
43
|
-
LOCAL_LOC_PATH
|
54
|
+
export LOCAL_LOC_USER='sparqlUser'
|
55
|
+
export LOCAL_LOC_PASS='sparqlPass'
|
56
|
+
export LOCAL_LOC_HOST='dev-sparql.example.org'
|
57
|
+
export LOCAL_LOC_PORT='80'
|
58
|
+
export LOCAL_LOC_PATH='/sparql?'
|
59
|
+
|
60
|
+
# SUL-CAP resources
|
61
|
+
# May require ssh port forwarding, e.g.:
|
62
|
+
# ssh ${USER}@cap-mysql-host.example.com -L 3308:localhost:3306 -N &
|
63
|
+
export SUL_CAP_ENABLED=false
|
64
|
+
export SUL_CAP_DB_HOST=localhost
|
65
|
+
export SUL_CAP_DB_PORT=3306
|
66
|
+
export SUL_CAP_DB_USER=capUser
|
67
|
+
export SUL_CAP_DB_PASSWORD=capPass
|
68
|
+
export SUL_CAP_DB_DATABASE=cap
|
44
69
|
|
45
70
|
# Redis Persistence - based on https://github.com/redis/redis-rb
|
46
71
|
# - essential options:
|
@@ -52,11 +77,11 @@ LOCAL_LOC_PATH: '/sparql?'
|
|
52
77
|
# # faster reading of triples from pre-populated redis data
|
53
78
|
# export REDIS_WRITE=true # enable redis writes (default = REDIS4MARC || false)
|
54
79
|
# # current data is updated in redis
|
55
|
-
REDIS4MARC
|
80
|
+
export REDIS4MARC=false
|
56
81
|
# Uncomment these options to disable read or write (independently)
|
57
|
-
#REDIS_READ
|
58
|
-
#REDIS_WRITE
|
82
|
+
#export REDIS_READ=false
|
83
|
+
#export REDIS_WRITE=false
|
59
84
|
# Leave commented to use default redis configs on localhost
|
60
|
-
#REDIS_URL
|
85
|
+
#export REDIS_URL=localhost
|
61
86
|
|
62
87
|
|
data/README.md
CHANGED
@@ -3,13 +3,44 @@ marc2linkeddata
|
|
3
3
|
===============
|
4
4
|
|
5
5
|
Utilities for translating MARC21 into linked data. The project has
|
6
|
-
focused on authority records (as of
|
6
|
+
focused on authority records (as of 2015).
|
7
|
+
|
8
|
+
It has config options that can be enabled to increase the amount of data retrieved.
|
9
|
+
Without any HTTP options enabled, using only data in the MARC record, it can
|
10
|
+
translate 100,000 authority records in about 5-6 min on a current laptop system.
|
11
|
+
File IO is the most expensive operation in this mode, so it helps to have a solid
|
12
|
+
state drive or something with high IO performance.
|
13
|
+
|
14
|
+
The current output is to the file system, but it should be easy to incorporate
|
15
|
+
and configure alternatives by using the RDF.rb facilities for connecting to a
|
16
|
+
repository. A minor attempt was explored to use redis for caching, but that
|
17
|
+
exploration hasn't matured much, mainly because there is no 'cache-expiry' data
|
18
|
+
yet and because it would be better to use an RDF.rb extension of some
|
19
|
+
kind (for redis, mongodb, etc) or to use a triple store/solr platform.
|
20
|
+
|
21
|
+
With HTTP/RDF retrieval options enabled, it can take a lot longer (days) and the
|
22
|
+
providers may not be very happy about a barrage of requests.
|
23
|
+
|
24
|
+
Note that it runs a lot slower on jruby-9.0.0.0-pre1 than MRI 2.2.0, whether threads
|
25
|
+
are enabled or not. It raises exceptions on jruby-1.7.9, related to ruby
|
26
|
+
language support (such as Array#delete_if).
|
27
|
+
|
28
|
+
TODO: A significant problem to solve is effective caching or mirrors for linked data.
|
29
|
+
The retrieval should inspect any HTTP cache headers that might be available and
|
30
|
+
adding PROVO to the linked-data graph generated for each record.
|
31
|
+
|
32
|
+
TODO: Provide system platform options, to dockerize the application and make it easier
|
33
|
+
for automatic horizontal scaling. Consider https://www.packer.io/intro/index.html
|
7
34
|
|
8
35
|
Optional Dependencies
|
9
36
|
|
10
|
-
- http://redis.io/
|
11
37
|
- http://4store.org/
|
38
|
+
- http://www.mongodb.org/
|
39
|
+
- http://redis.io/
|
12
40
|
- see notes below
|
41
|
+
- see also:
|
42
|
+
- http://marmotta.apache.org
|
43
|
+
- http://stardog.com
|
13
44
|
|
14
45
|
Install
|
15
46
|
|
data/bin/marcAuthority2LD
CHANGED
@@ -4,6 +4,17 @@ require 'marc2linkeddata'
|
|
4
4
|
|
5
5
|
CONFIG = Marc2LinkedData.configuration
|
6
6
|
|
7
|
+
def stack_trace(e, record)
|
8
|
+
$stderr.write "\n"
|
9
|
+
$stderr.write "ERROR\n"
|
10
|
+
$stderr.write e.message
|
11
|
+
$stderr.write e.backtrace
|
12
|
+
$stderr.write "\n"
|
13
|
+
$stderr.write record.to_s
|
14
|
+
$stderr.write "\n"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Count all the records in the MARC file.
|
7
18
|
def marc_auth_count(marc_file)
|
8
19
|
auth_records = 0
|
9
20
|
until marc_file.eof?
|
@@ -12,11 +23,7 @@ def marc_auth_count(marc_file)
|
|
12
23
|
marc_file.seek(leader[:length], IO::SEEK_CUR)
|
13
24
|
auth_records += 1 if leader[:type] == 'z'
|
14
25
|
rescue => e
|
15
|
-
|
16
|
-
puts 'ERROR'
|
17
|
-
puts e.message
|
18
|
-
puts e.backtrace
|
19
|
-
puts
|
26
|
+
stack_trace(e, record)
|
20
27
|
binding.pry if CONFIG.debug
|
21
28
|
end
|
22
29
|
end
|
@@ -24,7 +31,76 @@ def marc_auth_count(marc_file)
|
|
24
31
|
auth_records
|
25
32
|
end
|
26
33
|
|
27
|
-
|
34
|
+
# Memory intensive loading of all authority records in the MARC file.
|
35
|
+
def marc_authority_records(marc_filename)
|
36
|
+
puts "Reading records from: #{marc_filename}"
|
37
|
+
marc_file = File.open(marc_filename,'r')
|
38
|
+
auth_count = 0
|
39
|
+
auth_records = []
|
40
|
+
until marc_file.eof?
|
41
|
+
begin
|
42
|
+
leader = Marc2LinkedData::ParseMarcAuthority::parse_leader(marc_file)
|
43
|
+
raw = marc_file.read(leader[:length])
|
44
|
+
if leader[:type] == 'z'
|
45
|
+
record = MARC::Reader.decode(raw)
|
46
|
+
auth_records << record
|
47
|
+
auth_count += 1
|
48
|
+
$stdout.printf "\b\b\b\b\b\b" if auth_count > 1
|
49
|
+
$stdout.printf '%06d', auth_count
|
50
|
+
end
|
51
|
+
rescue => e
|
52
|
+
stack_trace(e, record)
|
53
|
+
binding.pry if CONFIG.debug
|
54
|
+
end
|
55
|
+
end
|
56
|
+
marc_file.close
|
57
|
+
$stdout.write "\n"
|
58
|
+
auth_records
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
def auth_record_cache(auth)
|
63
|
+
|
64
|
+
# auth_id = "auth:#{auth.get_id}"
|
65
|
+
# triples = nil
|
66
|
+
# # TODO: enable additional persistence options
|
67
|
+
# # Use data already in redis (if enabled)
|
68
|
+
# triples = CONFIG.redis.get(auth_id) if CONFIG.redis_read
|
69
|
+
# if triples.nil?
|
70
|
+
# triples = auth.to_ttl # generate new triples
|
71
|
+
# # Update redis (if enabled) for triples not read from redis
|
72
|
+
# CONFIG.redis.set(auth_id, triples) if CONFIG.redis_write
|
73
|
+
# end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
def marc_record2turtle(record, output_path=nil)
|
80
|
+
begin
|
81
|
+
# ParseMarcAuthority is a lazy parser, so
|
82
|
+
# init only assigns record to an instance var.
|
83
|
+
auth = Marc2LinkedData::ParseMarcAuthority.new(record)
|
84
|
+
auth_record_cache(auth)
|
85
|
+
triples = auth.to_ttl.lines
|
86
|
+
binding.pry if (CONFIG.debug && triples.empty?)
|
87
|
+
triples.delete_if {|l| l.chomp.empty? }
|
88
|
+
# Output the triples to a turtle file.
|
89
|
+
ld_filename = File.join(output_path, "auth_#{auth.get_id}.ttl")
|
90
|
+
CONFIG.logger.info "Writing triples in turtle to #{ld_filename}"
|
91
|
+
ld_file = File.open(ld_filename,'w')
|
92
|
+
ld_file.write(triples.join)
|
93
|
+
ld_file.flush
|
94
|
+
ld_file.close
|
95
|
+
rescue => e
|
96
|
+
stack_trace(e, record)
|
97
|
+
binding.pry if CONFIG.debug
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
def marc_file2ld(marc_filename)
|
28
104
|
ld_filename = marc_filename.gsub('.mrc','.ttl')
|
29
105
|
puts "Translating: #{marc_filename} to #{ld_filename}"
|
30
106
|
ld_file = File.open(ld_filename,'w')
|
@@ -44,17 +120,7 @@ def marc2ld(marc_filename)
|
|
44
120
|
# init only assigns record to an instance var.
|
45
121
|
auth = Marc2LinkedData::ParseMarcAuthority.new(record)
|
46
122
|
auth_count += 1
|
47
|
-
|
48
|
-
# triples = nil
|
49
|
-
# # TODO: enable additional persistence options
|
50
|
-
# # Use data already in redis (if enabled)
|
51
|
-
# triples = CONFIG.redis.get(auth_id) if CONFIG.redis_read
|
52
|
-
# if triples.nil?
|
53
|
-
# triples = auth.to_ttl # generate new triples
|
54
|
-
# # Update redis (if enabled) for triples not read from redis
|
55
|
-
# CONFIG.redis.set(auth_id, triples) if CONFIG.redis_write
|
56
|
-
# end
|
57
|
-
|
123
|
+
auth_record_cache(auth)
|
58
124
|
triples = auth.to_ttl.lines
|
59
125
|
binding.pry if (CONFIG.debug && triples.empty?)
|
60
126
|
triples.delete_if {|l| l.chomp.empty? }
|
@@ -63,12 +129,7 @@ def marc2ld(marc_filename)
|
|
63
129
|
ld_file.flush
|
64
130
|
end
|
65
131
|
rescue => e
|
66
|
-
|
67
|
-
puts 'ERROR'
|
68
|
-
puts e.message
|
69
|
-
puts e.backtrace
|
70
|
-
puts record.to_s
|
71
|
-
puts
|
132
|
+
stack_trace(e, record)
|
72
133
|
binding.pry if CONFIG.debug
|
73
134
|
end
|
74
135
|
end
|
@@ -77,38 +138,68 @@ def marc2ld(marc_filename)
|
|
77
138
|
ld_file.close
|
78
139
|
end
|
79
140
|
|
141
|
+
def thread_wait(threads)
|
142
|
+
threads.each{|t| t.join}
|
143
|
+
threads.delete_if {|t| t.status == false}
|
144
|
+
threads.delete_if {|t| t.status.nil? }
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
# ---------------------------------------------------------------------
|
149
|
+
# MAIN
|
150
|
+
|
151
|
+
|
80
152
|
marc_files = []
|
81
153
|
ARGV.each do |filename|
|
82
154
|
path = Pathname(filename)
|
83
155
|
marc_files.push(path) if path.exist?
|
84
156
|
end
|
85
157
|
if marc_files.empty?
|
158
|
+
script_name = File.basename(__FILE__)
|
159
|
+
script_path = File.dirname(__FILE__)
|
160
|
+
example_env_file = File.absolute_path(File.join(script_path,'..','.env_example'))
|
86
161
|
puts <<HELP
|
87
|
-
#{
|
88
|
-
|
89
|
-
Output is RDF triples in
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
export REDIS_URL="redis://{user}:{password}@{host}:{port}/{db}"
|
98
|
-
export REDIS_READ=true # enable redis reads (default = REDIS4MARC || false)
|
99
|
-
# faster reading of triples from pre-populated redis data
|
100
|
-
export REDIS_WRITE=true # enable redis writes (default = REDIS4MARC || false)
|
101
|
-
# recent data is updated in redis
|
162
|
+
#{script_name} marc_authority_file1.mrc [ marc_authority_file2.mrc .. marc_authority_fileN.mrc ]
|
163
|
+
|
164
|
+
Output is RDF triples in turtle files (.ttl). The files are output into a
|
165
|
+
directory created in the same path as the .mrc file,
|
166
|
+
with one .ttl file for each record.
|
167
|
+
|
168
|
+
Optional configuration can be set in environment variables. A '.env' file can be
|
169
|
+
created in the path where this utility is run and this utility will use it. See
|
170
|
+
comments and settings in the example file at:
|
171
|
+
#{example_env_file}
|
102
172
|
|
103
173
|
HELP
|
104
174
|
exit!
|
105
|
-
else
|
106
175
|
end
|
107
176
|
|
108
177
|
puts "Logging to: #{CONFIG.log_file}"
|
109
178
|
marc_files.each do |path|
|
110
179
|
CONFIG.logger.info "Processing: #{path}"
|
111
|
-
|
180
|
+
# marc_file2ld(path.to_s)
|
181
|
+
output_dir = path.basename.to_s.gsub('.mrc','').gsub('.','_') + '_turtle'
|
182
|
+
output_path = File.join(path.dirname.to_s, output_dir)
|
183
|
+
Dir.mkdir(output_path, 0775) unless File.directory? output_path
|
184
|
+
auth_records = marc_authority_records(path.to_s)
|
185
|
+
progress = ProgressBar.create(:total => auth_records.length, :format => '%a %f |%b>>%i| %P%% %t')
|
186
|
+
if CONFIG.threads
|
187
|
+
threads = []
|
188
|
+
auth_records.each do |r|
|
189
|
+
thread_wait(threads) while threads.length >= CONFIG.thread_limit
|
190
|
+
t = Thread.new { marc_record2turtle(r, output_path) }
|
191
|
+
t.abort_on_exception = true
|
192
|
+
threads << t
|
193
|
+
progress.increment # increment progress although thread is still running
|
194
|
+
end
|
195
|
+
# Ensure any remaining threads complete
|
196
|
+
threads.each{|t| t.join}
|
197
|
+
else
|
198
|
+
auth_records.each do |r|
|
199
|
+
marc_record2turtle(r, output_path)
|
200
|
+
progress.increment
|
201
|
+
end
|
202
|
+
end
|
112
203
|
end
|
113
204
|
|
114
205
|
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
export DEBUG=false
|
4
|
+
|
5
|
+
# Runs a lot slower on jruby, even with threading enabled.
|
6
|
+
export JRUBY_OPTS=-J-Xmx2g
|
7
|
+
|
8
|
+
export THREAD=false
|
9
|
+
export GET_LOC=false # if this is true, be prepared to wait a very long time!
|
10
|
+
|
11
|
+
export LOG_FILE='./log/marc2ld.log'
|
12
|
+
export LIB_PREFIX='http://www.linked-data.org/library/'
|
13
|
+
|
14
|
+
# Additional config options should be in .env;
|
15
|
+
# the .env values will not replace those above.
|
16
|
+
if [ ! -s .env ]; then
|
17
|
+
cp -u .env_example .env
|
18
|
+
fi
|
19
|
+
|
20
|
+
SCRIPT_FILE='.binstubs/marcAuthority2LD'
|
21
|
+
if [ ! -s ${SCRIPT_FILE} ]; then
|
22
|
+
echo "Cannot locate script: $SCRIPT_FILE"
|
23
|
+
exit 1
|
24
|
+
fi
|
25
|
+
|
26
|
+
AUTH_FILE="./data/auth.mrc"
|
27
|
+
AUTH_PATH="./data/auth_turtle/"
|
28
|
+
if [ ! -s ${AUTH_FILE} ]; then
|
29
|
+
echo "Place a MARC21 authority file into: $AUTH_FILE"
|
30
|
+
exit 1
|
31
|
+
fi
|
32
|
+
|
33
|
+
${SCRIPT_FILE} ${AUTH_FILE}
|
34
|
+
|
35
|
+
DATA_LOG_FILE="./log/run_test_data.log"
|
36
|
+
echo -e "\n\n" > ${DATA_LOG_FILE}
|
37
|
+
|
38
|
+
echo -e "Output file count should be 100000:" >> ${DATA_LOG_FILE}
|
39
|
+
find ${AUTH_PATH} -type f | wc -l >> ${DATA_LOG_FILE}
|
40
|
+
echo -e "\n\n" > ${DATA_LOG_FILE}
|
41
|
+
|
42
|
+
# count all the different types of authority files
|
43
|
+
echo -e "Different types of authority records:\n" >> ${DATA_LOG_FILE}
|
44
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | sed -e 's/^.*> a/a/' | sort -u >> ${DATA_LOG_FILE}
|
45
|
+
echo -e "\n\n" > ${DATA_LOG_FILE}
|
46
|
+
|
47
|
+
echo -e "Count for 'Person' authority records:" >> ${DATA_LOG_FILE}
|
48
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'Person' >> ${DATA_LOG_FILE}
|
49
|
+
echo -e "Count for 'Organization' authority records:" >> ${DATA_LOG_FILE}
|
50
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'Organization' >> ${DATA_LOG_FILE}
|
51
|
+
echo -e "Count for 'Place' authority records:" >> ${DATA_LOG_FILE}
|
52
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'Place' >> ${DATA_LOG_FILE}
|
53
|
+
echo -e "Count for 'event' authority records:" >> ${DATA_LOG_FILE}
|
54
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'event' >> ${DATA_LOG_FILE}
|
55
|
+
echo -e "Count for 'v1#NameTitle' authority records:" >> ${DATA_LOG_FILE}
|
56
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'v1#NameTitle' >> ${DATA_LOG_FILE}
|
57
|
+
echo -e "Count for 'v1#Title' authority records:" >> ${DATA_LOG_FILE}
|
58
|
+
find ${AUTH_PATH} -type f | xargs grep 'linked-data' | grep -c -F 'v1#Title' >> ${DATA_LOG_FILE}
|
59
|
+
|
60
|
+
# # check the syntax of the output files
|
61
|
+
# echo -e "\n\n\n" >> ${DATA_LOG_FILE}
|
62
|
+
# for f in $(find ${AUTH_PATH} -type f); do
|
63
|
+
# rapper -c -i turtle $f >> ${DATA_LOG_FILE} 2>&1
|
64
|
+
# done
|
65
|
+
|
66
|
+
# cleanup
|
67
|
+
rm -rf ${AUTH_PATH}
|
68
|
+
|