marc2linkeddata 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +65 -2
- data/bin/marcTypeCount +29 -0
- data/lib/includes.rb +1 -0
- data/lib/marc2linkeddata/utils.rb +109 -0
- data/marc2linkeddata.gemspec +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 252172f95d3c381127868b0a31e1558b30adac49
|
4
|
+
data.tar.gz: da2c92881b250cafee8d106c49b2f4e86f7a2cbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b77b8490be83acc7fd4b09273fd0c152aca6c3d097394a3aa1ddafc9e96a917d496ef4eeb04fe2e52b7cc89db16da6d32e80da184ff3714b201873df03033d9c
|
7
|
+
data.tar.gz: d391003816a3a35f757ee27e8227c793f032834775671795fde4fb19c8e055ffca8fac65b27a197c021c81a048bbe16fcc58e0dabbc6507e9ce5382217808f93
|
data/README.md
CHANGED
@@ -55,6 +55,12 @@ Optional Dependencies
|
|
55
55
|
- http://marmotta.apache.org
|
56
56
|
- http://stardog.com
|
57
57
|
|
58
|
+
MARC resources
|
59
|
+
|
60
|
+
- http://www.loc.gov/marc/
|
61
|
+
- http://wiki.code4lib.org/index.php/Working_with_MaRC
|
62
|
+
- http://openmetadata.lib.harvard.edu/bibdata
|
63
|
+
|
58
64
|
Install
|
59
65
|
|
60
66
|
gem install marc2linkeddata
|
@@ -81,6 +87,63 @@ Configure
|
|
81
87
|
# data resources, such as OCLC works for authorities.
|
82
88
|
marc2LD_config
|
83
89
|
|
90
|
+
Console Exploration
|
91
|
+
|
92
|
+
# First set configuration parameters (see details above).
|
93
|
+
# Then enter the pry REPL console, which requires the
|
94
|
+
# gem and loads the configuration.
|
95
|
+
marc2LD_console
|
96
|
+
> loc = Marc2LinkedData::Loc.new 'http://id.loc.gov/authorities/names/n79044798'
|
97
|
+
> loc.id
|
98
|
+
=> "n79044798"
|
99
|
+
> #
|
100
|
+
> # retrieve RDF from LOC
|
101
|
+
> loc.rdf
|
102
|
+
=> #<RDF::Graph:0x3fe88de67494(default)>
|
103
|
+
> # the RDF is an in-memory graph
|
104
|
+
> loc.rdf.to_ttl
|
105
|
+
=> snipped for brevity
|
106
|
+
> #
|
107
|
+
> # Various attributes derived from the RDF
|
108
|
+
> loc.label
|
109
|
+
=> "Byrnes, Christopher I., 1949-"
|
110
|
+
> loc.deprecated?
|
111
|
+
=> false
|
112
|
+
> loc.person?
|
113
|
+
=> true
|
114
|
+
> loc.corporation?
|
115
|
+
=> false
|
116
|
+
> loc.conference?
|
117
|
+
=> false
|
118
|
+
> loc.geographic?
|
119
|
+
=> false
|
120
|
+
> loc.name_title?
|
121
|
+
=> false
|
122
|
+
> loc.uniform_title?
|
123
|
+
=> false
|
124
|
+
> # Try to retrieve additional linked data resources:
|
125
|
+
> oclc_uri = loc.get_oclc_identity
|
126
|
+
=> "http://www.worldcat.org/identities/lccn-n79044798/"
|
127
|
+
> oclc_auth = Marc2LinkedData::OclcIdentity.new oclc_uri
|
128
|
+
> sa_graph = oclc_auth.same_as
|
129
|
+
=> #<RDF::Graph:0x3fce1a5ec0f4(default)>
|
130
|
+
> sa_graph.to_ttl
|
131
|
+
=> snipped for brevity
|
132
|
+
> oclc_auth.creative_works
|
133
|
+
=> [#<RDF::URI:0x3fce1bc0cc6c URI:http://worldcat.org/oclc/747413718>,
|
134
|
+
#<RDF::URI:0x3fce1bc2a668 URI:http://worldcat.org/oclc/004933024>,
|
135
|
+
snipped for brevity
|
136
|
+
#<RDF::URI:0x3fce1be21444 URI:http://worldcat.org/oclc/751661734>]
|
137
|
+
> #
|
138
|
+
> # Don't just read this, try it out!
|
139
|
+
> # There are similar classes for VIAF, ISNI and OCLC entities,
|
140
|
+
> # explore the code base for more details and figure out how
|
141
|
+
> # to use that VIAF IRI to construct a Viaf object, and
|
142
|
+
> # then use it to get more ISNI linked data 8-)
|
143
|
+
> viaf_uri = loc.get_viaf
|
144
|
+
=> "http://viaf.org/viaf/108317368/"
|
145
|
+
|
146
|
+
|
84
147
|
Scripting
|
85
148
|
|
86
149
|
# First configure (see details above).
|
@@ -196,10 +259,10 @@ Example Output Files
|
|
196
259
|
|
197
260
|
Ruby Library Use
|
198
261
|
|
199
|
-
- authority
|
262
|
+
- iterating records in an authority file
|
200
263
|
|
201
264
|
require 'marc2linkeddata'
|
202
|
-
marc_filename = '
|
265
|
+
marc_filename = 'auth.mrc'
|
203
266
|
marc_file = File.open(marc_filename,'r')
|
204
267
|
until marc_file.eof?
|
205
268
|
leader = ParseMarcAuthority::parse_leader(marc_file)
|
data/bin/marcTypeCount
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'marc2linkeddata'
|
4
|
+
CONFIG = Marc2LinkedData.configuration
|
5
|
+
|
6
|
+
marc_files = []
|
7
|
+
ARGV.each do |filename|
|
8
|
+
path = Pathname(filename)
|
9
|
+
marc_files.push(path) if path.exist?
|
10
|
+
end
|
11
|
+
if marc_files.empty?
|
12
|
+
script_name = File.basename(__FILE__)
|
13
|
+
# script_path = File.dirname(__FILE__)
|
14
|
+
# example_env_file = File.absolute_path(File.join(script_path,'..','.env_example'))
|
15
|
+
puts <<HELP
|
16
|
+
#{script_name} marc_authority_file1.mrc [ marc_authority_file2.mrc .. marc_authority_fileN.mrc ]
|
17
|
+
|
18
|
+
Output is a count of the record types in the marc file(s).
|
19
|
+
|
20
|
+
HELP
|
21
|
+
exit!
|
22
|
+
end
|
23
|
+
|
24
|
+
marc_files.each do |path|
|
25
|
+
record_types = Marc2LinkedData::Utils.marc_type_count(path.to_s)
|
26
|
+
puts "MARC record types in: #{path}"
|
27
|
+
pp record_types
|
28
|
+
end
|
29
|
+
|
data/lib/includes.rb
CHANGED
@@ -0,0 +1,109 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
module Marc2LinkedData
|
6
|
+
|
7
|
+
class Utils
|
8
|
+
|
9
|
+
def self.stack_trace(e, record=nil)
|
10
|
+
$stderr.write "\n"
|
11
|
+
$stderr.write "ERROR\n"
|
12
|
+
$stderr.write e.message
|
13
|
+
$stderr.write e.backtrace
|
14
|
+
$stderr.write "\n"
|
15
|
+
$stderr.write record.to_s
|
16
|
+
$stderr.write "\n"
|
17
|
+
end
|
18
|
+
|
19
|
+
# Count all the records in the MARC file, by
|
20
|
+
# parsing the record leader, see
|
21
|
+
# http://www.loc.gov/marc/bibliographic/bdleader.html
|
22
|
+
# http://www.loc.gov/marc/authority/adleader.html
|
23
|
+
def self.marc_type_count(marc_filename)
|
24
|
+
# 06 - Type of record
|
25
|
+
# a - Language material
|
26
|
+
# c - Notated music
|
27
|
+
# d - Manuscript notated music
|
28
|
+
# e - Cartographic material
|
29
|
+
# f - Manuscript cartographic material
|
30
|
+
# g - Projected medium
|
31
|
+
# i - Nonmusical sound recording
|
32
|
+
# z - Authority data
|
33
|
+
record_types = {
|
34
|
+
:a_language_material => 0,
|
35
|
+
:c_notated_music => 0,
|
36
|
+
:d_manuscript_notated_music => 0,
|
37
|
+
:e_cartographic_material => 0,
|
38
|
+
:f_manuscript_cartographic_material => 0,
|
39
|
+
:g_projected_medium => 0,
|
40
|
+
:i_nonmusical_sound_recording => 0,
|
41
|
+
:z_authority_data => 0
|
42
|
+
}
|
43
|
+
marc_file = File.open(marc_filename,'r')
|
44
|
+
until marc_file.eof?
|
45
|
+
begin
|
46
|
+
leader = Marc2LinkedData::Utils.parse_leader(marc_file)
|
47
|
+
marc_file.seek(leader[:length], IO::SEEK_CUR)
|
48
|
+
case leader[:type]
|
49
|
+
when 'a'
|
50
|
+
record_types[:a_language_material] += 1
|
51
|
+
when 'c'
|
52
|
+
record_types[:c_notated_music] += 1
|
53
|
+
when 'd'
|
54
|
+
record_types[:d_manuscript_notated_music] += 1
|
55
|
+
when 'e'
|
56
|
+
record_types[:e_cartographic_material] += 1
|
57
|
+
when 'f'
|
58
|
+
record_types[:f_manuscript_cartographic_material] += 1
|
59
|
+
when 'g'
|
60
|
+
record_types[:g_projected_medium] += 1
|
61
|
+
when 'i'
|
62
|
+
record_types[:i_nonmusical_sound_recording] += 1
|
63
|
+
when 'z'
|
64
|
+
record_types[:z_authority_data] += 1
|
65
|
+
end
|
66
|
+
rescue => e
|
67
|
+
Marc2LinkedData::Utils.stack_trace(e, leader)
|
68
|
+
binding.pry if CONFIG.debug
|
69
|
+
end
|
70
|
+
end
|
71
|
+
marc_file.close
|
72
|
+
record_types
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.parse_leader(file_handle, leader_bytes=24)
|
76
|
+
# example:
|
77
|
+
#record.leader
|
78
|
+
#=> "00774cz a2200253n 4500"
|
79
|
+
# 00-04: '00774' - record length
|
80
|
+
# 05: 'c' - corrected or revised
|
81
|
+
# 06: 'z' - always 'z' for authority records
|
82
|
+
# 09: 'a' - UCS/Unicode
|
83
|
+
# 12-16: '00253' - base address of data, Length of Leader and Directory
|
84
|
+
# 17: 'n' - Complete authority record
|
85
|
+
# leader_status_codes = {
|
86
|
+
# 'a' => 'Increase in encoding level',
|
87
|
+
# 'c' => 'Corrected or revised',
|
88
|
+
# 'd' => 'Deleted',
|
89
|
+
# 'n' => 'New',
|
90
|
+
# 'o' => 'Obsolete',
|
91
|
+
# 's' => 'Deleted; heading split into two or more headings',
|
92
|
+
# 'x' => 'Deleted; heading replaced by another heading'
|
93
|
+
# }
|
94
|
+
leader = file_handle.read(leader_bytes)
|
95
|
+
file_handle.seek(-1 * leader_bytes, IO::SEEK_CUR)
|
96
|
+
{
|
97
|
+
:length => leader[0..4].to_i,
|
98
|
+
:status => leader[5], # leader_status_codes[ record.leader[5] ]
|
99
|
+
:type => leader[6], # always 'z' for authority records
|
100
|
+
:encoding => leader[9], # translate letter code into ruby encoding string
|
101
|
+
:data_address => leader[12..16].to_i,
|
102
|
+
:complete => leader[17].include?('n')
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
data/marc2linkeddata.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc2linkeddata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Darren Weber
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -273,6 +273,7 @@ executables:
|
|
273
273
|
- marc2LD_config
|
274
274
|
- marc2LD_console
|
275
275
|
- marcAuthority2LD
|
276
|
+
- marcTypeCount
|
276
277
|
- run_test_data.sh
|
277
278
|
extensions: []
|
278
279
|
extra_rdoc_files: []
|
@@ -290,6 +291,7 @@ files:
|
|
290
291
|
- bin/marc2LD_config
|
291
292
|
- bin/marc2LD_console
|
292
293
|
- bin/marcAuthority2LD
|
294
|
+
- bin/marcTypeCount
|
293
295
|
- bin/run_test_data.sh
|
294
296
|
- lib/includes.rb
|
295
297
|
- lib/marc2linkeddata.rb
|
@@ -310,6 +312,7 @@ files:
|
|
310
312
|
- lib/marc2linkeddata/sparql_dbpedia.rb
|
311
313
|
- lib/marc2linkeddata/sparql_local_loc.rb
|
312
314
|
- lib/marc2linkeddata/sparql_pubmed.rb
|
315
|
+
- lib/marc2linkeddata/utils.rb
|
313
316
|
- lib/marc2linkeddata/viaf.rb
|
314
317
|
- log/.gitignore
|
315
318
|
- marc2linkeddata.gemspec
|