marc2linkeddata 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 543898af0ce1ceaa309d3998c8cbfa2b12df00f9
4
- data.tar.gz: 45fe5fb9d27c32c66b5d385154ea60ccb9368262
3
+ metadata.gz: 252172f95d3c381127868b0a31e1558b30adac49
4
+ data.tar.gz: da2c92881b250cafee8d106c49b2f4e86f7a2cbd
5
5
  SHA512:
6
- metadata.gz: f0458ac1a99307bf1988628298c9eda642fa1015b37dd92abe6fc8ad09d7f4746a962bbdf90ef3948e30c433d686df84d2d628094202097b21e21177b56911b1
7
- data.tar.gz: 06a32b1595aee813138e1705663e2d1fdb92e9d57d03692e5f4dd9e7582c363b014e462641f49100ac561e01b0e7ea1acc6a91e70c0a173dde25a2761de408ea
6
+ metadata.gz: b77b8490be83acc7fd4b09273fd0c152aca6c3d097394a3aa1ddafc9e96a917d496ef4eeb04fe2e52b7cc89db16da6d32e80da184ff3714b201873df03033d9c
7
+ data.tar.gz: d391003816a3a35f757ee27e8227c793f032834775671795fde4fb19c8e055ffca8fac65b27a197c021c81a048bbe16fcc58e0dabbc6507e9ce5382217808f93
data/README.md CHANGED
@@ -55,6 +55,12 @@ Optional Dependencies
55
55
  - http://marmotta.apache.org
56
56
  - http://stardog.com
57
57
 
58
+ MARC resources
59
+
60
+ - http://www.loc.gov/marc/
61
+ - http://wiki.code4lib.org/index.php/Working_with_MaRC
62
+ - http://openmetadata.lib.harvard.edu/bibdata
63
+
58
64
  Install
59
65
 
60
66
  gem install marc2linkeddata
@@ -81,6 +87,63 @@ Configure
81
87
  # data resources, such as OCLC works for authorities.
82
88
  marc2LD_config
83
89
 
90
+ Console Exploration
91
+
92
+ # First set configuration parameters (see details above).
93
+ # Then enter the pry REPL console, which requires the
94
+ # gem and loads the configuration.
95
+ marc2LD_console
96
+ > loc = Marc2LinkedData::Loc.new 'http://id.loc.gov/authorities/names/n79044798'
97
+ > loc.id
98
+ => "n79044798"
99
+ > #
100
+ > # retrieve RDF from LOC
101
+ > loc.rdf
102
+ => #<RDF::Graph:0x3fe88de67494(default)>
103
+ > # the RDF is an in-memory graph
104
+ > loc.rdf.to_ttl
105
+ => snipped for brevity
106
+ > #
107
+ > # Various attributes derived from the RDF
108
+ > loc.label
109
+ => "Byrnes, Christopher I., 1949-"
110
+ > loc.deprecated?
111
+ => false
112
+ > loc.person?
113
+ => true
114
+ > loc.corporation?
115
+ => false
116
+ > loc.conference?
117
+ => false
118
+ > loc.geographic?
119
+ => false
120
+ > loc.name_title?
121
+ => false
122
+ > loc.uniform_title?
123
+ => false
124
+ > # Try to retrieve additional linked data resources:
125
+ > oclc_uri = loc.get_oclc_identity
126
+ => "http://www.worldcat.org/identities/lccn-n79044798/"
127
+ > oclc_auth = Marc2LinkedData::OclcIdentity.new oclc_uri
128
+ > sa_graph = oclc_auth.same_as
129
+ => #<RDF::Graph:0x3fce1a5ec0f4(default)>
130
+ > sa_graph.to_ttl
131
+ => snipped for brevity
132
+ > oclc_auth.creative_works
133
+ => [#<RDF::URI:0x3fce1bc0cc6c URI:http://worldcat.org/oclc/747413718>,
134
+ #<RDF::URI:0x3fce1bc2a668 URI:http://worldcat.org/oclc/004933024>,
135
+ snipped for brevity
136
+ #<RDF::URI:0x3fce1be21444 URI:http://worldcat.org/oclc/751661734>]
137
+ > #
138
+ > # Don't just read this, try it out!
139
+ > # There are similar classes for VIAF, ISNI and OCLC entities,
140
+ > # explore the code base for more details and figure out how
141
+ > # to use that VIAF IRI to construct a Viaf object, and
142
+ > # then use it to get more ISNI linked data 8-)
143
+ > viaf_uri = loc.get_viaf
144
+ => "http://viaf.org/viaf/108317368/"
145
+
146
+
84
147
  Scripting
85
148
 
86
149
  # First configure (see details above).
@@ -196,10 +259,10 @@ Example Output Files
196
259
 
197
260
  Ruby Library Use
198
261
 
199
- - authority files
262
+ - iterating records in an authority file
200
263
 
201
264
  require 'marc2linkeddata'
202
- marc_filename = 'stf_auth.01.mrc'
265
+ marc_filename = 'auth.mrc'
203
266
  marc_file = File.open(marc_filename,'r')
204
267
  until marc_file.eof?
205
268
  leader = ParseMarcAuthority::parse_leader(marc_file)
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'marc2linkeddata'
4
+ CONFIG = Marc2LinkedData.configuration
5
+
6
+ marc_files = []
7
+ ARGV.each do |filename|
8
+ path = Pathname(filename)
9
+ marc_files.push(path) if path.exist?
10
+ end
11
+ if marc_files.empty?
12
+ script_name = File.basename(__FILE__)
13
+ # script_path = File.dirname(__FILE__)
14
+ # example_env_file = File.absolute_path(File.join(script_path,'..','.env_example'))
15
+ puts <<HELP
16
+ #{script_name} marc_authority_file1.mrc [ marc_authority_file2.mrc .. marc_authority_fileN.mrc ]
17
+
18
+ Output is a count of the record types in the marc file(s).
19
+
20
+ HELP
21
+ exit!
22
+ end
23
+
24
+ marc_files.each do |path|
25
+ record_types = Marc2LinkedData::Utils.marc_type_count(path.to_s)
26
+ puts "MARC record types in: #{path}"
27
+ pp record_types
28
+ end
29
+
@@ -19,6 +19,7 @@ require 'pry'
19
19
  require 'pry-doc'
20
20
 
21
21
  require_relative 'marc2linkeddata/configuration'
22
+ require_relative 'marc2linkeddata/utils'
22
23
 
23
24
  require_relative 'marc2linkeddata/resource'
24
25
  require_relative 'marc2linkeddata/isni'
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+
5
+ module Marc2LinkedData
6
+
7
+ class Utils
8
+
9
+ def self.stack_trace(e, record=nil)
10
+ $stderr.write "\n"
11
+ $stderr.write "ERROR\n"
12
+ $stderr.write e.message
13
+ $stderr.write e.backtrace
14
+ $stderr.write "\n"
15
+ $stderr.write record.to_s
16
+ $stderr.write "\n"
17
+ end
18
+
19
+ # Count all the records in the MARC file, by
20
+ # parsing the record leader, see
21
+ # http://www.loc.gov/marc/bibliographic/bdleader.html
22
+ # http://www.loc.gov/marc/authority/adleader.html
23
+ def self.marc_type_count(marc_filename)
24
+ # 06 - Type of record
25
+ # a - Language material
26
+ # c - Notated music
27
+ # d - Manuscript notated music
28
+ # e - Cartographic material
29
+ # f - Manuscript cartographic material
30
+ # g - Projected medium
31
+ # i - Nonmusical sound recording
32
+ # z - Authority data
33
+ record_types = {
34
+ :a_language_material => 0,
35
+ :c_notated_music => 0,
36
+ :d_manuscript_notated_music => 0,
37
+ :e_cartographic_material => 0,
38
+ :f_manuscript_cartographic_material => 0,
39
+ :g_projected_medium => 0,
40
+ :i_nonmusical_sound_recording => 0,
41
+ :z_authority_data => 0
42
+ }
43
+ marc_file = File.open(marc_filename,'r')
44
+ until marc_file.eof?
45
+ begin
46
+ leader = Marc2LinkedData::Utils.parse_leader(marc_file)
47
+ marc_file.seek(leader[:length], IO::SEEK_CUR)
48
+ case leader[:type]
49
+ when 'a'
50
+ record_types[:a_language_material] += 1
51
+ when 'c'
52
+ record_types[:c_notated_music] += 1
53
+ when 'd'
54
+ record_types[:d_manuscript_notated_music] += 1
55
+ when 'e'
56
+ record_types[:e_cartographic_material] += 1
57
+ when 'f'
58
+ record_types[:f_manuscript_cartographic_material] += 1
59
+ when 'g'
60
+ record_types[:g_projected_medium] += 1
61
+ when 'i'
62
+ record_types[:i_nonmusical_sound_recording] += 1
63
+ when 'z'
64
+ record_types[:z_authority_data] += 1
65
+ end
66
+ rescue => e
67
+ Marc2LinkedData::Utils.stack_trace(e, leader)
68
+ binding.pry if CONFIG.debug
69
+ end
70
+ end
71
+ marc_file.close
72
+ record_types
73
+ end
74
+
75
+ def self.parse_leader(file_handle, leader_bytes=24)
76
+ # example:
77
+ #record.leader
78
+ #=> "00774cz a2200253n 4500"
79
+ # 00-04: '00774' - record length
80
+ # 05: 'c' - corrected or revised
81
+ # 06: 'z' - always 'z' for authority records
82
+ # 09: 'a' - UCS/Unicode
83
+ # 12-16: '00253' - base address of data, Length of Leader and Directory
84
+ # 17: 'n' - Complete authority record
85
+ # leader_status_codes = {
86
+ # 'a' => 'Increase in encoding level',
87
+ # 'c' => 'Corrected or revised',
88
+ # 'd' => 'Deleted',
89
+ # 'n' => 'New',
90
+ # 'o' => 'Obsolete',
91
+ # 's' => 'Deleted; heading split into two or more headings',
92
+ # 'x' => 'Deleted; heading replaced by another heading'
93
+ # }
94
+ leader = file_handle.read(leader_bytes)
95
+ file_handle.seek(-1 * leader_bytes, IO::SEEK_CUR)
96
+ {
97
+ :length => leader[0..4].to_i,
98
+ :status => leader[5], # leader_status_codes[ record.leader[5] ]
99
+ :type => leader[6], # always 'z' for authority records
100
+ :encoding => leader[9], # translate letter code into ruby encoding string
101
+ :data_address => leader[12..16].to_i,
102
+ :complete => leader[17].include?('n')
103
+ }
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+
@@ -4,7 +4,7 @@ $:.unshift lib unless $:.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |s|
6
6
  s.name = 'marc2linkeddata'
7
- s.version = '0.2.0'
7
+ s.version = '0.2.1'
8
8
  s.licenses = ['Apache-2.0']
9
9
 
10
10
  # mysql and bson_ext only install on MRI (c-ruby)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc2linkeddata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Darren Weber
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-18 00:00:00.000000000 Z
11
+ date: 2015-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -273,6 +273,7 @@ executables:
273
273
  - marc2LD_config
274
274
  - marc2LD_console
275
275
  - marcAuthority2LD
276
+ - marcTypeCount
276
277
  - run_test_data.sh
277
278
  extensions: []
278
279
  extra_rdoc_files: []
@@ -290,6 +291,7 @@ files:
290
291
  - bin/marc2LD_config
291
292
  - bin/marc2LD_console
292
293
  - bin/marcAuthority2LD
294
+ - bin/marcTypeCount
293
295
  - bin/run_test_data.sh
294
296
  - lib/includes.rb
295
297
  - lib/marc2linkeddata.rb
@@ -310,6 +312,7 @@ files:
310
312
  - lib/marc2linkeddata/sparql_dbpedia.rb
311
313
  - lib/marc2linkeddata/sparql_local_loc.rb
312
314
  - lib/marc2linkeddata/sparql_pubmed.rb
315
+ - lib/marc2linkeddata/utils.rb
313
316
  - lib/marc2linkeddata/viaf.rb
314
317
  - log/.gitignore
315
318
  - marc2linkeddata.gemspec