marc2linkeddata 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +65 -2
- data/bin/marcTypeCount +29 -0
- data/lib/includes.rb +1 -0
- data/lib/marc2linkeddata/utils.rb +109 -0
- data/marc2linkeddata.gemspec +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 252172f95d3c381127868b0a31e1558b30adac49
|
4
|
+
data.tar.gz: da2c92881b250cafee8d106c49b2f4e86f7a2cbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b77b8490be83acc7fd4b09273fd0c152aca6c3d097394a3aa1ddafc9e96a917d496ef4eeb04fe2e52b7cc89db16da6d32e80da184ff3714b201873df03033d9c
|
7
|
+
data.tar.gz: d391003816a3a35f757ee27e8227c793f032834775671795fde4fb19c8e055ffca8fac65b27a197c021c81a048bbe16fcc58e0dabbc6507e9ce5382217808f93
|
data/README.md
CHANGED
@@ -55,6 +55,12 @@ Optional Dependencies
|
|
55
55
|
- http://marmotta.apache.org
|
56
56
|
- http://stardog.com
|
57
57
|
|
58
|
+
MARC resources
|
59
|
+
|
60
|
+
- http://www.loc.gov/marc/
|
61
|
+
- http://wiki.code4lib.org/index.php/Working_with_MaRC
|
62
|
+
- http://openmetadata.lib.harvard.edu/bibdata
|
63
|
+
|
58
64
|
Install
|
59
65
|
|
60
66
|
gem install marc2linkeddata
|
@@ -81,6 +87,63 @@ Configure
|
|
81
87
|
# data resources, such as OCLC works for authorities.
|
82
88
|
marc2LD_config
|
83
89
|
|
90
|
+
Console Exploration
|
91
|
+
|
92
|
+
# First set configuration parameters (see details above).
|
93
|
+
# Then enter the pry REPL console, which requires the
|
94
|
+
# gem and loads the configuration.
|
95
|
+
marc2LD_console
|
96
|
+
> loc = Marc2LinkedData::Loc.new 'http://id.loc.gov/authorities/names/n79044798'
|
97
|
+
> loc.id
|
98
|
+
=> "n79044798"
|
99
|
+
> #
|
100
|
+
> # retrieve RDF from LOC
|
101
|
+
> loc.rdf
|
102
|
+
=> #<RDF::Graph:0x3fe88de67494(default)>
|
103
|
+
> # the RDF is an in-memory graph
|
104
|
+
> loc.rdf.to_ttl
|
105
|
+
=> snipped for brevity
|
106
|
+
> #
|
107
|
+
> # Various attributes derived from the RDF
|
108
|
+
> loc.label
|
109
|
+
=> "Byrnes, Christopher I., 1949-"
|
110
|
+
> loc.deprecated?
|
111
|
+
=> false
|
112
|
+
> loc.person?
|
113
|
+
=> true
|
114
|
+
> loc.corporation?
|
115
|
+
=> false
|
116
|
+
> loc.conference?
|
117
|
+
=> false
|
118
|
+
> loc.geographic?
|
119
|
+
=> false
|
120
|
+
> loc.name_title?
|
121
|
+
=> false
|
122
|
+
> loc.uniform_title?
|
123
|
+
=> false
|
124
|
+
> # Try to retrieve additional linked data resources:
|
125
|
+
> oclc_uri = loc.get_oclc_identity
|
126
|
+
=> "http://www.worldcat.org/identities/lccn-n79044798/"
|
127
|
+
> oclc_auth = Marc2LinkedData::OclcIdentity.new oclc_uri
|
128
|
+
> sa_graph = oclc_auth.same_as
|
129
|
+
=> #<RDF::Graph:0x3fce1a5ec0f4(default)>
|
130
|
+
> sa_graph.to_ttl
|
131
|
+
=> snipped for brevity
|
132
|
+
> oclc_auth.creative_works
|
133
|
+
=> [#<RDF::URI:0x3fce1bc0cc6c URI:http://worldcat.org/oclc/747413718>,
|
134
|
+
#<RDF::URI:0x3fce1bc2a668 URI:http://worldcat.org/oclc/004933024>,
|
135
|
+
snipped for brevity
|
136
|
+
#<RDF::URI:0x3fce1be21444 URI:http://worldcat.org/oclc/751661734>]
|
137
|
+
> #
|
138
|
+
> # Don't just read this, try it out!
|
139
|
+
> # There are similar classes for VIAF, ISNI and OCLC entities,
|
140
|
+
> # explore the code base for more details and figure out how
|
141
|
+
> # to use that VIAF IRI to construct a Viaf object, and
|
142
|
+
> # then use it to get more ISNI linked data 8-)
|
143
|
+
> viaf_uri = loc.get_viaf
|
144
|
+
=> "http://viaf.org/viaf/108317368/"
|
145
|
+
|
146
|
+
|
84
147
|
Scripting
|
85
148
|
|
86
149
|
# First configure (see details above).
|
@@ -196,10 +259,10 @@ Example Output Files
|
|
196
259
|
|
197
260
|
Ruby Library Use
|
198
261
|
|
199
|
-
- authority
|
262
|
+
- iterating records in an authority file
|
200
263
|
|
201
264
|
require 'marc2linkeddata'
|
202
|
-
marc_filename = '
|
265
|
+
marc_filename = 'auth.mrc'
|
203
266
|
marc_file = File.open(marc_filename,'r')
|
204
267
|
until marc_file.eof?
|
205
268
|
leader = ParseMarcAuthority::parse_leader(marc_file)
|
data/bin/marcTypeCount
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'marc2linkeddata'
|
4
|
+
CONFIG = Marc2LinkedData.configuration
|
5
|
+
|
6
|
+
marc_files = []
|
7
|
+
ARGV.each do |filename|
|
8
|
+
path = Pathname(filename)
|
9
|
+
marc_files.push(path) if path.exist?
|
10
|
+
end
|
11
|
+
if marc_files.empty?
|
12
|
+
script_name = File.basename(__FILE__)
|
13
|
+
# script_path = File.dirname(__FILE__)
|
14
|
+
# example_env_file = File.absolute_path(File.join(script_path,'..','.env_example'))
|
15
|
+
puts <<HELP
|
16
|
+
#{script_name} marc_authority_file1.mrc [ marc_authority_file2.mrc .. marc_authority_fileN.mrc ]
|
17
|
+
|
18
|
+
Output is a count of the record types in the marc file(s).
|
19
|
+
|
20
|
+
HELP
|
21
|
+
exit!
|
22
|
+
end
|
23
|
+
|
24
|
+
marc_files.each do |path|
|
25
|
+
record_types = Marc2LinkedData::Utils.marc_type_count(path.to_s)
|
26
|
+
puts "MARC record types in: #{path}"
|
27
|
+
pp record_types
|
28
|
+
end
|
29
|
+
|
data/lib/includes.rb
CHANGED
@@ -0,0 +1,109 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
module Marc2LinkedData
|
6
|
+
|
7
|
+
class Utils
|
8
|
+
|
9
|
+
def self.stack_trace(e, record=nil)
|
10
|
+
$stderr.write "\n"
|
11
|
+
$stderr.write "ERROR\n"
|
12
|
+
$stderr.write e.message
|
13
|
+
$stderr.write e.backtrace
|
14
|
+
$stderr.write "\n"
|
15
|
+
$stderr.write record.to_s
|
16
|
+
$stderr.write "\n"
|
17
|
+
end
|
18
|
+
|
19
|
+
# Count all the records in the MARC file, by
|
20
|
+
# parsing the record leader, see
|
21
|
+
# http://www.loc.gov/marc/bibliographic/bdleader.html
|
22
|
+
# http://www.loc.gov/marc/authority/adleader.html
|
23
|
+
def self.marc_type_count(marc_filename)
|
24
|
+
# 06 - Type of record
|
25
|
+
# a - Language material
|
26
|
+
# c - Notated music
|
27
|
+
# d - Manuscript notated music
|
28
|
+
# e - Cartographic material
|
29
|
+
# f - Manuscript cartographic material
|
30
|
+
# g - Projected medium
|
31
|
+
# i - Nonmusical sound recording
|
32
|
+
# z - Authority data
|
33
|
+
record_types = {
|
34
|
+
:a_language_material => 0,
|
35
|
+
:c_notated_music => 0,
|
36
|
+
:d_manuscript_notated_music => 0,
|
37
|
+
:e_cartographic_material => 0,
|
38
|
+
:f_manuscript_cartographic_material => 0,
|
39
|
+
:g_projected_medium => 0,
|
40
|
+
:i_nonmusical_sound_recording => 0,
|
41
|
+
:z_authority_data => 0
|
42
|
+
}
|
43
|
+
marc_file = File.open(marc_filename,'r')
|
44
|
+
until marc_file.eof?
|
45
|
+
begin
|
46
|
+
leader = Marc2LinkedData::Utils.parse_leader(marc_file)
|
47
|
+
marc_file.seek(leader[:length], IO::SEEK_CUR)
|
48
|
+
case leader[:type]
|
49
|
+
when 'a'
|
50
|
+
record_types[:a_language_material] += 1
|
51
|
+
when 'c'
|
52
|
+
record_types[:c_notated_music] += 1
|
53
|
+
when 'd'
|
54
|
+
record_types[:d_manuscript_notated_music] += 1
|
55
|
+
when 'e'
|
56
|
+
record_types[:e_cartographic_material] += 1
|
57
|
+
when 'f'
|
58
|
+
record_types[:f_manuscript_cartographic_material] += 1
|
59
|
+
when 'g'
|
60
|
+
record_types[:g_projected_medium] += 1
|
61
|
+
when 'i'
|
62
|
+
record_types[:i_nonmusical_sound_recording] += 1
|
63
|
+
when 'z'
|
64
|
+
record_types[:z_authority_data] += 1
|
65
|
+
end
|
66
|
+
rescue => e
|
67
|
+
Marc2LinkedData::Utils.stack_trace(e, leader)
|
68
|
+
binding.pry if CONFIG.debug
|
69
|
+
end
|
70
|
+
end
|
71
|
+
marc_file.close
|
72
|
+
record_types
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.parse_leader(file_handle, leader_bytes=24)
|
76
|
+
# example:
|
77
|
+
#record.leader
|
78
|
+
#=> "00774cz a2200253n 4500"
|
79
|
+
# 00-04: '00774' - record length
|
80
|
+
# 05: 'c' - corrected or revised
|
81
|
+
# 06: 'z' - always 'z' for authority records
|
82
|
+
# 09: 'a' - UCS/Unicode
|
83
|
+
# 12-16: '00253' - base address of data, Length of Leader and Directory
|
84
|
+
# 17: 'n' - Complete authority record
|
85
|
+
# leader_status_codes = {
|
86
|
+
# 'a' => 'Increase in encoding level',
|
87
|
+
# 'c' => 'Corrected or revised',
|
88
|
+
# 'd' => 'Deleted',
|
89
|
+
# 'n' => 'New',
|
90
|
+
# 'o' => 'Obsolete',
|
91
|
+
# 's' => 'Deleted; heading split into two or more headings',
|
92
|
+
# 'x' => 'Deleted; heading replaced by another heading'
|
93
|
+
# }
|
94
|
+
leader = file_handle.read(leader_bytes)
|
95
|
+
file_handle.seek(-1 * leader_bytes, IO::SEEK_CUR)
|
96
|
+
{
|
97
|
+
:length => leader[0..4].to_i,
|
98
|
+
:status => leader[5], # leader_status_codes[ record.leader[5] ]
|
99
|
+
:type => leader[6], # always 'z' for authority records
|
100
|
+
:encoding => leader[9], # translate letter code into ruby encoding string
|
101
|
+
:data_address => leader[12..16].to_i,
|
102
|
+
:complete => leader[17].include?('n')
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
data/marc2linkeddata.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc2linkeddata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Darren Weber
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -273,6 +273,7 @@ executables:
|
|
273
273
|
- marc2LD_config
|
274
274
|
- marc2LD_console
|
275
275
|
- marcAuthority2LD
|
276
|
+
- marcTypeCount
|
276
277
|
- run_test_data.sh
|
277
278
|
extensions: []
|
278
279
|
extra_rdoc_files: []
|
@@ -290,6 +291,7 @@ files:
|
|
290
291
|
- bin/marc2LD_config
|
291
292
|
- bin/marc2LD_console
|
292
293
|
- bin/marcAuthority2LD
|
294
|
+
- bin/marcTypeCount
|
293
295
|
- bin/run_test_data.sh
|
294
296
|
- lib/includes.rb
|
295
297
|
- lib/marc2linkeddata.rb
|
@@ -310,6 +312,7 @@ files:
|
|
310
312
|
- lib/marc2linkeddata/sparql_dbpedia.rb
|
311
313
|
- lib/marc2linkeddata/sparql_local_loc.rb
|
312
314
|
- lib/marc2linkeddata/sparql_pubmed.rb
|
315
|
+
- lib/marc2linkeddata/utils.rb
|
313
316
|
- lib/marc2linkeddata/viaf.rb
|
314
317
|
- log/.gitignore
|
315
318
|
- marc2linkeddata.gemspec
|