marc2linkeddata 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 543898af0ce1ceaa309d3998c8cbfa2b12df00f9
4
- data.tar.gz: 45fe5fb9d27c32c66b5d385154ea60ccb9368262
3
+ metadata.gz: 252172f95d3c381127868b0a31e1558b30adac49
4
+ data.tar.gz: da2c92881b250cafee8d106c49b2f4e86f7a2cbd
5
5
  SHA512:
6
- metadata.gz: f0458ac1a99307bf1988628298c9eda642fa1015b37dd92abe6fc8ad09d7f4746a962bbdf90ef3948e30c433d686df84d2d628094202097b21e21177b56911b1
7
- data.tar.gz: 06a32b1595aee813138e1705663e2d1fdb92e9d57d03692e5f4dd9e7582c363b014e462641f49100ac561e01b0e7ea1acc6a91e70c0a173dde25a2761de408ea
6
+ metadata.gz: b77b8490be83acc7fd4b09273fd0c152aca6c3d097394a3aa1ddafc9e96a917d496ef4eeb04fe2e52b7cc89db16da6d32e80da184ff3714b201873df03033d9c
7
+ data.tar.gz: d391003816a3a35f757ee27e8227c793f032834775671795fde4fb19c8e055ffca8fac65b27a197c021c81a048bbe16fcc58e0dabbc6507e9ce5382217808f93
data/README.md CHANGED
@@ -55,6 +55,12 @@ Optional Dependencies
55
55
  - http://marmotta.apache.org
56
56
  - http://stardog.com
57
57
 
58
+ MARC resources
59
+
60
+ - http://www.loc.gov/marc/
61
+ - http://wiki.code4lib.org/index.php/Working_with_MaRC
62
+ - http://openmetadata.lib.harvard.edu/bibdata
63
+
58
64
  Install
59
65
 
60
66
  gem install marc2linkeddata
@@ -81,6 +87,63 @@ Configure
81
87
  # data resources, such as OCLC works for authorities.
82
88
  marc2LD_config
83
89
 
90
+ Console Exploration
91
+
92
+ # First set configuration parameters (see details above).
93
+ # Then enter the pry REPL console, which requires the
94
+ # gem and loads the configuration.
95
+ marc2LD_console
96
+ > loc = Marc2LinkedData::Loc.new 'http://id.loc.gov/authorities/names/n79044798'
97
+ > loc.id
98
+ => "n79044798"
99
+ > #
100
+ > # retrieve RDF from LOC
101
+ > loc.rdf
102
+ => #<RDF::Graph:0x3fe88de67494(default)>
103
+ > # the RDF is an in-memory graph
104
+ > loc.rdf.to_ttl
105
+ => snipped for brevity
106
+ > #
107
+ > # Various attributes derived from the RDF
108
+ > loc.label
109
+ => "Byrnes, Christopher I., 1949-"
110
+ > loc.deprecated?
111
+ => false
112
+ > loc.person?
113
+ => true
114
+ > loc.corporation?
115
+ => false
116
+ > loc.conference?
117
+ => false
118
+ > loc.geographic?
119
+ => false
120
+ > loc.name_title?
121
+ => false
122
+ > loc.uniform_title?
123
+ => false
124
+ > # Try to retrieve additional linked data resources:
125
+ > oclc_uri = loc.get_oclc_identity
126
+ => "http://www.worldcat.org/identities/lccn-n79044798/"
127
+ > oclc_auth = Marc2LinkedData::OclcIdentity.new oclc_uri
128
+ > sa_graph = oclc_auth.same_as
129
+ => #<RDF::Graph:0x3fce1a5ec0f4(default)>
130
+ > sa_graph.to_ttl
131
+ => snipped for brevity
132
+ > oclc_auth.creative_works
133
+ => [#<RDF::URI:0x3fce1bc0cc6c URI:http://worldcat.org/oclc/747413718>,
134
+ #<RDF::URI:0x3fce1bc2a668 URI:http://worldcat.org/oclc/004933024>,
135
+ snipped for brevity
136
+ #<RDF::URI:0x3fce1be21444 URI:http://worldcat.org/oclc/751661734>]
137
+ > #
138
+ > # Don't just read this, try it out!
139
+ > # There are similar classes for VIAF, ISNI and OCLC entities,
140
+ > # explore the code base for more details and figure out how
141
+ > # to use that VIAF IRI to construct a Viaf object, and
142
+ > # then use it to get more ISNI linked data 8-)
143
+ > viaf_uri = loc.get_viaf
144
+ => "http://viaf.org/viaf/108317368/"
145
+
146
+
84
147
  Scripting
85
148
 
86
149
  # First configure (see details above).
@@ -196,10 +259,10 @@ Example Output Files
196
259
 
197
260
  Ruby Library Use
198
261
 
199
- - authority files
262
+ - iterating records in an authority file
200
263
 
201
264
  require 'marc2linkeddata'
202
- marc_filename = 'stf_auth.01.mrc'
265
+ marc_filename = 'auth.mrc'
203
266
  marc_file = File.open(marc_filename,'r')
204
267
  until marc_file.eof?
205
268
  leader = ParseMarcAuthority::parse_leader(marc_file)
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'marc2linkeddata'
4
+ CONFIG = Marc2LinkedData.configuration
5
+
6
+ marc_files = []
7
+ ARGV.each do |filename|
8
+ path = Pathname(filename)
9
+ marc_files.push(path) if path.exist?
10
+ end
11
+ if marc_files.empty?
12
+ script_name = File.basename(__FILE__)
13
+ # script_path = File.dirname(__FILE__)
14
+ # example_env_file = File.absolute_path(File.join(script_path,'..','.env_example'))
15
+ puts <<HELP
16
+ #{script_name} marc_authority_file1.mrc [ marc_authority_file2.mrc .. marc_authority_fileN.mrc ]
17
+
18
+ Output is a count of the record types in the marc file(s).
19
+
20
+ HELP
21
+ exit!
22
+ end
23
+
24
+ marc_files.each do |path|
25
+ record_types = Marc2LinkedData::Utils.marc_type_count(path.to_s)
26
+ puts "MARC record types in: #{path}"
27
+ pp record_types
28
+ end
29
+
@@ -19,6 +19,7 @@ require 'pry'
19
19
  require 'pry-doc'
20
20
 
21
21
  require_relative 'marc2linkeddata/configuration'
22
+ require_relative 'marc2linkeddata/utils'
22
23
 
23
24
  require_relative 'marc2linkeddata/resource'
24
25
  require_relative 'marc2linkeddata/isni'
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+
5
+ module Marc2LinkedData
6
+
7
+ class Utils
8
+
9
+ def self.stack_trace(e, record=nil)
10
+ $stderr.write "\n"
11
+ $stderr.write "ERROR\n"
12
+ $stderr.write e.message
13
+ $stderr.write e.backtrace
14
+ $stderr.write "\n"
15
+ $stderr.write record.to_s
16
+ $stderr.write "\n"
17
+ end
18
+
19
+ # Count all the records in the MARC file, by
20
+ # parsing the record leader, see
21
+ # http://www.loc.gov/marc/bibliographic/bdleader.html
22
+ # http://www.loc.gov/marc/authority/adleader.html
23
+ def self.marc_type_count(marc_filename)
24
+ # 06 - Type of record
25
+ # a - Language material
26
+ # c - Notated music
27
+ # d - Manuscript notated music
28
+ # e - Cartographic material
29
+ # f - Manuscript cartographic material
30
+ # g - Projected medium
31
+ # i - Nonmusical sound recording
32
+ # z - Authority data
33
+ record_types = {
34
+ :a_language_material => 0,
35
+ :c_notated_music => 0,
36
+ :d_manuscript_notated_music => 0,
37
+ :e_cartographic_material => 0,
38
+ :f_manuscript_cartographic_material => 0,
39
+ :g_projected_medium => 0,
40
+ :i_nonmusical_sound_recording => 0,
41
+ :z_authority_data => 0
42
+ }
43
+ marc_file = File.open(marc_filename,'r')
44
+ until marc_file.eof?
45
+ begin
46
+ leader = Marc2LinkedData::Utils.parse_leader(marc_file)
47
+ marc_file.seek(leader[:length], IO::SEEK_CUR)
48
+ case leader[:type]
49
+ when 'a'
50
+ record_types[:a_language_material] += 1
51
+ when 'c'
52
+ record_types[:c_notated_music] += 1
53
+ when 'd'
54
+ record_types[:d_manuscript_notated_music] += 1
55
+ when 'e'
56
+ record_types[:e_cartographic_material] += 1
57
+ when 'f'
58
+ record_types[:f_manuscript_cartographic_material] += 1
59
+ when 'g'
60
+ record_types[:g_projected_medium] += 1
61
+ when 'i'
62
+ record_types[:i_nonmusical_sound_recording] += 1
63
+ when 'z'
64
+ record_types[:z_authority_data] += 1
65
+ end
66
+ rescue => e
67
+ Marc2LinkedData::Utils.stack_trace(e, leader)
68
+ binding.pry if CONFIG.debug
69
+ end
70
+ end
71
+ marc_file.close
72
+ record_types
73
+ end
74
+
75
+ def self.parse_leader(file_handle, leader_bytes=24)
76
+ # example:
77
+ #record.leader
78
+ #=> "00774cz a2200253n 4500"
79
+ # 00-04: '00774' - record length
80
+ # 05: 'c' - corrected or revised
81
+ # 06: 'z' - always 'z' for authority records
82
+ # 09: 'a' - UCS/Unicode
83
+ # 12-16: '00253' - base address of data, Length of Leader and Directory
84
+ # 17: 'n' - Complete authority record
85
+ # leader_status_codes = {
86
+ # 'a' => 'Increase in encoding level',
87
+ # 'c' => 'Corrected or revised',
88
+ # 'd' => 'Deleted',
89
+ # 'n' => 'New',
90
+ # 'o' => 'Obsolete',
91
+ # 's' => 'Deleted; heading split into two or more headings',
92
+ # 'x' => 'Deleted; heading replaced by another heading'
93
+ # }
94
+ leader = file_handle.read(leader_bytes)
95
+ file_handle.seek(-1 * leader_bytes, IO::SEEK_CUR)
96
+ {
97
+ :length => leader[0..4].to_i,
98
+ :status => leader[5], # leader_status_codes[ record.leader[5] ]
99
+ :type => leader[6], # always 'z' for authority records
100
+ :encoding => leader[9], # translate letter code into ruby encoding string
101
+ :data_address => leader[12..16].to_i,
102
+ :complete => leader[17].include?('n')
103
+ }
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+
@@ -4,7 +4,7 @@ $:.unshift lib unless $:.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |s|
6
6
  s.name = 'marc2linkeddata'
7
- s.version = '0.2.0'
7
+ s.version = '0.2.1'
8
8
  s.licenses = ['Apache-2.0']
9
9
 
10
10
  # mysql and bson_ext only install on MRI (c-ruby)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc2linkeddata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Darren Weber
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-18 00:00:00.000000000 Z
11
+ date: 2015-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -273,6 +273,7 @@ executables:
273
273
  - marc2LD_config
274
274
  - marc2LD_console
275
275
  - marcAuthority2LD
276
+ - marcTypeCount
276
277
  - run_test_data.sh
277
278
  extensions: []
278
279
  extra_rdoc_files: []
@@ -290,6 +291,7 @@ files:
290
291
  - bin/marc2LD_config
291
292
  - bin/marc2LD_console
292
293
  - bin/marcAuthority2LD
294
+ - bin/marcTypeCount
293
295
  - bin/run_test_data.sh
294
296
  - lib/includes.rb
295
297
  - lib/marc2linkeddata.rb
@@ -310,6 +312,7 @@ files:
310
312
  - lib/marc2linkeddata/sparql_dbpedia.rb
311
313
  - lib/marc2linkeddata/sparql_local_loc.rb
312
314
  - lib/marc2linkeddata/sparql_pubmed.rb
315
+ - lib/marc2linkeddata/utils.rb
313
316
  - lib/marc2linkeddata/viaf.rb
314
317
  - log/.gitignore
315
318
  - marc2linkeddata.gemspec