wcid 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/ruby -w
2
+ require 'wcid'
3
+ include WCID
4
+
5
+ begin
6
+ authorities = marc_file_to_auths(ARGV[0])
7
+ rescue
8
+ puts 'There has been an error! Check that the WorldCat Identities site is up at http://orlabs.oclc.org/Identities/ before complaining to the developer. The site is often down as it is now only in beta.'
9
+ exit
10
+ end
11
+
12
+ puts authorities
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/ruby -w
2
+ ## Jason Ronallo
3
+ # May 2007
4
+
5
+ require 'rubygems'
6
+ require "rexml/document"
7
+ require "rexml/xpath"
8
+ require 'net/http'
9
+ require 'uri'
10
+ require 'marc'
11
+ require 'stringio'
12
+ require 'worldcatid'
13
+
14
+ #file = ''
15
+ if ARGF.filename == '-'
16
+ print "Enter the full path to a MARC file: "
17
+ file = (STDIN.gets).chomp
18
+ else
19
+ file = ARGF
20
+ end
21
+
22
+ #wc_search = (WCID::Search.new('Hull, Richard 1945')).search_exact
23
+ x = 0
24
+ begin
25
+ records = MARC::Reader.new(file).to_a
26
+ rescue MARC::Exception
27
+ puts "MARC Exception"
28
+ exit
29
+ rescue Exception => e
30
+ puts e
31
+ print "Enter a valid filename: "
32
+ file = STDIN.gets.chomp
33
+ retry
34
+ end
35
+ puts "records.length: #{records.length}"
36
+ #STDIN.gets
37
+ records.each do | record |
38
+ fields = record.name_fields
39
+ puts "fields.length #{fields.length}"
40
+ #STDIN.gets
41
+ fields.each do | field |
42
+ query = field.pnkey_from_marc
43
+ rec = (WCID::Search.new(query)).search_exact
44
+ if rec == nil
45
+ puts "There's no rec for #{query}"
46
+ puts "Hit enter to continue"
47
+ STDIN.gets
48
+ next
49
+ end
50
+ puts "class of rec: #{rec.class}"
51
+
52
+ lc_retries = 0
53
+ begin
54
+ auth = rec.get_lc_auth
55
+ rescue Timeout::Error, Exception => e
56
+ #puts e; STDIN.gets
57
+ if e.to_s == "eof"
58
+ puts "We'll retry in a long time...."
59
+ sleep 1000
60
+ retry
61
+ end
62
+ if lc_retries < 5
63
+ puts "retrying...."
64
+ lc_retries += 1
65
+ puts
66
+ puts "retry #{lc_retries}"
67
+ retry
68
+ else
69
+ puts "you're outta luck on the lc_auth lookup"
70
+ next
71
+ end
72
+ end
73
+ if auth == nil
74
+ puts "auth nil"
75
+ STDIN.gets
76
+ next
77
+ end
78
+ puts auth
79
+ puts auth.inspect
80
+ STDIN.gets
81
+ end
82
+ end
83
+
84
+ puts "this is the end of the line"
@@ -0,0 +1,45 @@
1
+ #wcid is a ruby library for handling WorldCat Identities
2
+ #for more information on WorldCat Identities see #http://orlabs.oclc.org/Identities/
3
+ #
4
+ #USAGE
5
+ #
6
+ # require 'wcid'
7
+
8
+
9
+ require 'rubygems'
10
+ require 'rexml/document'
11
+ require 'net/http'
12
+ require 'uri'
13
+ require 'marc'
14
+ require 'stringio'
15
+
16
+ require 'wcid/version'
17
+
18
+ require 'wcid/marc'
19
+ require 'wcid/search'
20
+ require 'wcid/hit'
21
+ require 'wcid/id'
22
+
23
+ module WCID
24
+
25
+ #given one or more MARC records
26
+ #it returns an array of associated name authorities
27
+ #as MARC::Record objects
28
+
29
+ def marc_file_to_auths(filename)
30
+ authorities = []
31
+ records = MARC::Reader.new(filename).to_a
32
+ records.each do | record |
33
+ fields = record.name_fields
34
+ fields.each do | field |
35
+ query = field.pnkey_from_marc
36
+ wcid_obj = (WCID::Search.new(query)).search_exact
37
+ next if wcid_obj.nil?
38
+ auth = wcid_obj.get_lc_auth
39
+ next if auth.nil?
40
+ authorities << auth
41
+ end
42
+ end
43
+ return authorities
44
+ end
45
+ end
@@ -0,0 +1,34 @@
1
+ module WCID
2
+
3
+ #Conducting a search of the WorldCat Identities site returns a WCID::Hit
4
+ #object.
5
+
6
+ class Hit
7
+
8
+
9
+
10
+ attr_reader :score, :established_form, :uri, :citation, :lccn, :pubdates, :name_type
11
+
12
+ #Hit objects contain some potentially useful information, but of
13
+ #primary interest to me is the lccn which allows you to grab the
14
+ #authority record of a particular hit.
15
+
16
+ def initialize m
17
+ @score = m.attributes['score']
18
+ @established_form = m.elements["establishedForm"].text if m.elements["establishedForm"]
19
+ @uri = m.elements['uri'].text if m.elements['uri']
20
+ @citation = m.elements['citation'].text if m.elements['citation']
21
+ @lccn = m.elements['lccn'].text if m.elements['lccn']
22
+ @pubdates = m.elements['pubDates'].text if m.elements['pubDates']
23
+ @name_type = m.elements['nameType'].text if m.elements['nameType']
24
+ end
25
+
26
+ #Given a WCID::Hit object it returns a single authority record
27
+ #as a MARC::Record object.
28
+ def get_lc_auth
29
+ src = Net::HTTP.get(URI.parse("http://errol.oclc.org/laf/#{self.lccn}.MarcXML"))
30
+ record = MARC::XMLReader.new(StringIO.new(string=src)).to_a
31
+ record = record[0]
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,80 @@
1
+ module WCID
2
+
3
+ #A WCID::ID object is a single Identity record in WorldCat Identities.
4
+ #
5
+
6
+ class ID
7
+ #lccn = Library of Congress Control Number
8
+ #type =
9
+ #wiki_link = link to Wikipedia for this identity
10
+ #subfield_a = name
11
+ #subfield_d = dates associated with the name
12
+ #pnkey = each WorldCat Identities record is given a unique pnkey made up
13
+ # the name and associated dates (and qualifier?). A pnkey is the most exact way to
14
+ # search WorldCat Identities.
15
+
16
+ attr_reader :lccn, :type, :wiki_link, :subfield_a, :subfield_d, :pnkey
17
+ def initialize doc
18
+ @record = doc.elements.to_a("searchRetrieveResponse/records/record")
19
+ @identity = @record[0].elements['recordData/Identity']
20
+ @pnkey = @identity.elements['pnkey'].text
21
+ @lccn_record = @identity.elements['authorityInfo/lccn'].text
22
+ @lccn = convert_lccn(@lccn_record)
23
+ @identity = @record[0].elements['recordData/Identity']
24
+ @type = @identity.attributes['type']
25
+ @wiki_link = "http://en.wikipedia.org/wiki/#{(@identity.elements['authorityInfo/wikiLink']).text}" if @identity.elements['authorityInfo/wikiLink']
26
+ @subfield_a = @identity.elements['authorityInfo/standardForm/suba'].text if @identity.elements['authorityInfo/standardForm/suba']
27
+ @subfield_d = @identity.elements['authorityInfo/standardForm/subd'].text if @identity.elements['authorityInfo/standardForm/subd']
28
+ end
29
+
30
+ #A method to convert the Library of Congress Control Number as represented
31
+ #in the WorldCat Identities database into an LCCN subtable for searching
32
+ #the Linked Authority File.
33
+
34
+ def convert_lccn(lccn)
35
+ lccn.gsub!(' ', '')
36
+ ll = lccn.length
37
+ if lccn[ll - 6, 1] != '0'
38
+ lccn = lccn[0, ll - 6] + "-" + lccn[ll - 6, 99]
39
+ elsif lccn[ll - 5, 1] != '0'
40
+ lccn = lccn[0, ll - 6] + "-" + lccn[ll - 5, 99]
41
+ elsif lccn[ll - 4, 1] != '0'
42
+ lccn = lccn[0, ll - 6] + "-" + lccn[ll - 4, 99]
43
+ elsif lccn[ll - 3, 1] != '0'
44
+ lccn = lccn[0, ll - 6] + "-" + lccn[ll - 3, 99]
45
+ end
46
+ lccn
47
+ end
48
+
49
+ #Returns a MARC::Record representation of the associated LC authority
50
+ #record.
51
+
52
+ def get_lc_auth
53
+ uri = URI.escape("http://errol.oclc.org/laf/#{self.lccn}.MarcXML")
54
+ src = ''
55
+ begin
56
+ Timeout.timeout 20 do
57
+ src = Net::HTTP.get(URI.parse("#{uri}"))
58
+ end
59
+ record = MARC::XMLReader.new(StringIO.new(string=src)).to_a[0]
60
+ end
61
+ rescue Timeout::Error
62
+ puts "The auth lookup timed out!"
63
+ raise
64
+ rescue MARC::Exception => e
65
+ puts "MARC exception: #{e}"
66
+ raise
67
+ rescue REXML::ParseException
68
+ puts "rexml error"
69
+ raise
70
+ rescue EOFError
71
+ puts "The service seems to be down!"
72
+ raise "eof"
73
+ rescue Exception => e
74
+ puts "some other type of exception"
75
+ puts e
76
+ raise
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,69 @@
1
+ require 'marc'
2
+
3
+ module MARC
4
+ class DataField
5
+
6
+ #accept a MARC datafield and create a pnkey for query
7
+ #really just a bunch of regexps to tur
8
+ #This should only really work for exact searches if the datafield passed
9
+ #to the method had proper authority work done on it.
10
+
11
+ def pnkey_from_marc
12
+ name = self.name_from_datafield
13
+ dates = self.dates_from_datafield
14
+ qualifier = self.qualifier_from_datafield
15
+
16
+ query = "\"#{name}"
17
+ query += "$" + qualifier if qualifier
18
+ query += dates if dates
19
+ query += "\""
20
+ #puts query
21
+ query
22
+ end
23
+
24
+ #Should the following be private?
25
+ def name_from_datafield
26
+ name = self['a'].downcase
27
+ name.gsub!('.','')
28
+ name.gsub!(/,$/,'')
29
+ name.gsub!('-',' ')
30
+ name.gsub!('\'','')
31
+ name
32
+ end
33
+ def dates_from_datafield
34
+ dates = self['d'] if self['d']
35
+ dates.gsub!('-', ' ') if dates
36
+ dates.gsub!('.', '') if dates
37
+ dates = "$" + dates if dates
38
+ dates
39
+ end
40
+ def qualifier_from_datafield
41
+ qualifier = self['q'] if self['q']
42
+ qualifier.gsub!('-', ' ') if qualifier
43
+ qualifier.gsub!(',','') if qualifier
44
+ qualifier.gsub!('(','') if qualifier
45
+ qualifier.gsub!(')','') if qualifier
46
+ qualifier.downcase! if qualifier
47
+ end
48
+ end
49
+
50
+ class Record
51
+
52
+ #accept a MARC record and return an array of name fields to be used in queries
53
+
54
+ def name_fields
55
+ f700 = self.find_all {|f| f.tag == '700'}
56
+ name_100 = self['100'] if self['100']
57
+ name_110 = self['110'] if self['110']
58
+ names =[]
59
+ names << name_100 unless name_100.nil?
60
+ names << name_110 unless name_110.nil?
61
+ names << f700 unless f700.empty?
62
+ names.compact!
63
+ names.flatten!
64
+ #puts names.inspect
65
+ names
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,78 @@
1
+ module WCID
2
+
3
+ #WCID::Search objects are for creating and running queries
4
+ #against WorldCat Identities.
5
+
6
+ class Search
7
+ attr_reader :query
8
+
9
+ #pass it a string
10
+ # name_search = WCID::Search.new('Twain, Mark')
11
+
12
+ def initialize query
13
+ @query = query
14
+ end
15
+
16
+
17
+ #Actually performs the search returns array of Hit objects
18
+ # hits = name_search.search
19
+ # or
20
+ # hits = (WCID::Search.new('Twain, Mark')).search
21
+
22
+ def search
23
+ uri = URI.escape("http://orlabs.oclc.org/Identities/find?fullName=#{@query}")
24
+ doc = do_search uri
25
+ m = doc.elements.to_a("nameAuthorities/match")
26
+ hits = []
27
+ m.each do | m |
28
+ hit_object = WCID::Hit.new(m)
29
+ hits << hit_object
30
+ end
31
+ hits
32
+ end
33
+
34
+ #returns single ID object (the first hit) or nil
35
+ # hit = name_search.search_exact
36
+ # or
37
+ # hit = (WCID::Search.new('Twain, Mark')).search_exact
38
+
39
+ def search_exact
40
+ uri = URI.escape("http://orlabs.oclc.org/SRW/search/Identities?query=local.pnkey+exact+#{@query}")
41
+ #puts uri
42
+ begin
43
+ doc = do_search uri
44
+ #puts doc
45
+ if doc.elements['searchRetrieveResponse/numberOfRecords'].text == '0'
46
+ #puts "no record"
47
+ return nil
48
+ else
49
+ #puts 'a record here'
50
+ record = WCID::ID.new(doc)
51
+ end
52
+ rescue
53
+ #puts 'um no text for node?'
54
+ raise
55
+ end
56
+ end
57
+
58
+ #This should maybe be private or somesuch?
59
+ def do_search uri
60
+ begin
61
+ file = ''
62
+ Timeout.timeout 20 do
63
+ file = Net::HTTP.get(URI.parse("#{uri}"))
64
+ #puts file
65
+ end
66
+ doc = REXML::Document.new file
67
+ rescue Timeout::Error
68
+ puts "timeout error!"
69
+ raise
70
+ rescue Errno::ECONNRESET
71
+ puts "errno!"
72
+ raise
73
+ end
74
+ end
75
+ end
76
+
77
+
78
+ end
@@ -0,0 +1,9 @@
1
+ module Wcid #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ TINY = 1
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'redcloth'
5
+ require 'syntax/convertors/html'
6
+ require 'erb'
7
+ require File.dirname(__FILE__) + '/../lib/wcid/version.rb'
8
+
9
+ version = Wcid::VERSION::STRING
10
+ download = 'http://rubyforge.org/projects/wcid'
11
+
12
+ class Fixnum
13
+ def ordinal
14
+ # teens
15
+ return 'th' if (10..19).include?(self % 100)
16
+ # others
17
+ case self % 10
18
+ when 1: return 'st'
19
+ when 2: return 'nd'
20
+ when 3: return 'rd'
21
+ else return 'th'
22
+ end
23
+ end
24
+ end
25
+
26
+ class Time
27
+ def pretty
28
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
29
+ end
30
+ end
31
+
32
+ def convert_syntax(syntax, source)
33
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
34
+ end
35
+
36
+ if ARGV.length >= 1
37
+ src, template = ARGV
38
+ template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
39
+
40
+ else
41
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
42
+ exit!
43
+ end
44
+
45
+ template = ERB.new(File.open(template).read)
46
+
47
+ title = nil
48
+ body = nil
49
+ File.open(src) do |fsrc|
50
+ title_text = fsrc.readline
51
+ body_text = fsrc.read
52
+ syntax_items = []
53
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
54
+ ident = syntax_items.length
55
+ element, syntax, source = $1, $2, $3
56
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
57
+ "syntax-temp-#{ident}"
58
+ }
59
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
60
+ body = RedCloth.new(body_text).to_html
61
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
62
+ end
63
+ stat = File.stat(src)
64
+ created = stat.ctime
65
+ modified = stat.mtime
66
+
67
+ $stdout << template.result(binding)