wcid 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/ruby -w
2
+ require 'wcid'
3
+ include WCID
4
+
5
+ begin
6
+ authorities = marc_file_to_auths(ARGV[0])
7
+ rescue
8
+ puts 'There has been an error! Check that the WorldCat Identities site is up at http://orlabs.oclc.org/Identities/ before complaining to the developer. The site is often down as it is now only in beta.'
9
+ exit
10
+ end
11
+
12
+ puts authorities
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/ruby -w
2
+ ## Jason Ronallo
3
+ # May 2007
4
+
5
+ require 'rubygems'
6
+ require "rexml/document"
7
+ require "rexml/xpath"
8
+ require 'net/http'
9
+ require 'uri'
10
+ require 'marc'
11
+ require 'stringio'
12
+ require 'worldcatid'
13
+
14
+ #file = ''
15
+ if ARGF.filename == '-'
16
+ print "Enter the full path to a MARC file: "
17
+ file = (STDIN.gets).chomp
18
+ else
19
+ file = ARGF
20
+ end
21
+
22
+ #wc_search = (WCID::Search.new('Hull, Richard 1945')).search_exact
23
+ x = 0
24
+ begin
25
+ records = MARC::Reader.new(file).to_a
26
+ rescue MARC::Exception
27
+ puts "MARC Exception"
28
+ exit
29
+ rescue Exception => e
30
+ puts e
31
+ print "Enter a valid filename: "
32
+ file = STDIN.gets.chomp
33
+ retry
34
+ end
35
+ puts "records.length: #{records.length}"
36
+ #STDIN.gets
37
+ records.each do | record |
38
+ fields = record.name_fields
39
+ puts "fields.length #{fields.length}"
40
+ #STDIN.gets
41
+ fields.each do | field |
42
+ query = field.pnkey_from_marc
43
+ rec = (WCID::Search.new(query)).search_exact
44
+ if rec == nil
45
+ puts "There's no rec for #{query}"
46
+ puts "Hit enter to continue"
47
+ STDIN.gets
48
+ next
49
+ end
50
+ puts "class of rec: #{rec.class}"
51
+
52
+ lc_retries = 0
53
+ begin
54
+ auth = rec.get_lc_auth
55
+ rescue Timeout::Error, Exception => e
56
+ #puts e; STDIN.gets
57
+ if e.to_s == "eof"
58
+ puts "We'll retry in a long time...."
59
+ sleep 1000
60
+ retry
61
+ end
62
+ if lc_retries < 5
63
+ puts "retrying...."
64
+ lc_retries += 1
65
+ puts
66
+ puts "retry #{lc_retries}"
67
+ retry
68
+ else
69
+ puts "you're outta luck on the lc_auth lookup"
70
+ next
71
+ end
72
+ end
73
+ if auth == nil
74
+ puts "auth nil"
75
+ STDIN.gets
76
+ next
77
+ end
78
+ puts auth
79
+ puts auth.inspect
80
+ STDIN.gets
81
+ end
82
+ end
83
+
84
+ puts "this is the end of the line"
@@ -0,0 +1,45 @@
1
+ #wcid is a ruby library for handling WorldCat Identities
2
+ #for more information on WorldCat Identities see #http://orlabs.oclc.org/Identities/
3
+ #
4
+ #USAGE
5
+ #
6
+ # require 'wcid'
7
+
8
+
9
+ require 'rubygems'
10
+ require 'rexml/document'
11
+ require 'net/http'
12
+ require 'uri'
13
+ require 'marc'
14
+ require 'stringio'
15
+
16
+ require 'wcid/version'
17
+
18
+ require 'wcid/marc'
19
+ require 'wcid/search'
20
+ require 'wcid/hit'
21
+ require 'wcid/id'
22
+
23
+ module WCID
24
+
25
+ #given one or more MARC records
26
+ #it returns an array of associated name authorities
27
+ #as MARC::Record objects
28
+
29
+ def marc_file_to_auths(filename)
30
+ authorities = []
31
+ records = MARC::Reader.new(filename).to_a
32
+ records.each do | record |
33
+ fields = record.name_fields
34
+ fields.each do | field |
35
+ query = field.pnkey_from_marc
36
+ wcid_obj = (WCID::Search.new(query)).search_exact
37
+ next if wcid_obj.nil?
38
+ auth = wcid_obj.get_lc_auth
39
+ next if auth.nil?
40
+ authorities << auth
41
+ end
42
+ end
43
+ return authorities
44
+ end
45
+ end
@@ -0,0 +1,34 @@
1
+ module WCID
2
+
3
+ #Conducting a search of the WorldCat Identities site returns a WCID::Hit
4
+ #object.
5
+
6
+ class Hit
7
+
8
+
9
+
10
+ attr_reader :score, :established_form, :uri, :citation, :lccn, :pubdates, :name_type
11
+
12
+ #Hit objects contain some potentially useful information, but of
13
+ #primary interest to me is the lccn which allows you to grab the
14
+ #authority record of a particular hit.
15
+
16
+ def initialize m
17
+ @score = m.attributes['score']
18
+ @established_form = m.elements["establishedForm"].text if m.elements["establishedForm"]
19
+ @uri = m.elements['uri'].text if m.elements['uri']
20
+ @citation = m.elements['citation'].text if m.elements['citation']
21
+ @lccn = m.elements['lccn'].text if m.elements['lccn']
22
+ @pubdates = m.elements['pubDates'].text if m.elements['pubDates']
23
+ @name_type = m.elements['nameType'].text if m.elements['nameType']
24
+ end
25
+
26
+ #Given a WCID::Hit object it returns a single authority record
27
+ #as a MARC::Record object.
28
+ def get_lc_auth
29
+ src = Net::HTTP.get(URI.parse("http://errol.oclc.org/laf/#{self.lccn}.MarcXML"))
30
+ record = MARC::XMLReader.new(StringIO.new(string=src)).to_a
31
+ record = record[0]
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,80 @@
1
+ module WCID
2
+
3
+ #A WCID::ID object is a single Identity record in WorldCat Identities.
4
+ #
5
+
6
+ class ID
7
+ #lccn = Library of Congress Control Number
8
+ #type =
9
+ #wiki_link = link to Wikipedia for this identity
10
+ #subfield_a = name
11
+ #subfield_d = dates associated with the name
12
+ #pnkey = each WorldCat Identities record is given a unique pnkey made up
13
+ # the name and associated dates (and qualifier?). A pnkey is the most exact way to
14
+ # search WorldCat Identities.
15
+
16
+ attr_reader :lccn, :type, :wiki_link, :subfield_a, :subfield_d, :pnkey
17
+ def initialize doc
18
+ @record = doc.elements.to_a("searchRetrieveResponse/records/record")
19
+ @identity = @record[0].elements['recordData/Identity']
20
+ @pnkey = @identity.elements['pnkey'].text
21
+ @lccn_record = @identity.elements['authorityInfo/lccn'].text
22
+ @lccn = convert_lccn(@lccn_record)
23
+ @identity = @record[0].elements['recordData/Identity']
24
+ @type = @identity.attributes['type']
25
+ @wiki_link = "http://en.wikipedia.org/wiki/#{(@identity.elements['authorityInfo/wikiLink']).text}" if @identity.elements['authorityInfo/wikiLink']
26
+ @subfield_a = @identity.elements['authorityInfo/standardForm/suba'].text if @identity.elements['authorityInfo/standardForm/suba']
27
+ @subfield_d = @identity.elements['authorityInfo/standardForm/subd'].text if @identity.elements['authorityInfo/standardForm/subd']
28
+ end
29
+
30
+ #A method to convert the Library of Congress Control Number as represented
31
+ #in the WorldCat Identities database into an LCCN subtable for searching
32
+ #the Linked Authority File.
33
+
34
+ def convert_lccn(lccn)
35
+ lccn.gsub!(' ', '')
36
+ ll = lccn.length
37
+ if lccn[ll - 6, 1] != '0'
38
+ lccn = lccn[0, ll - 6] + "-" + lccn[ll - 6, 99]
39
+ elsif lccn[ll - 5, 1] != '0'
40
+ lccn = lccn[0, ll - 6] + "-" + lccn[ll - 5, 99]
41
+ elsif lccn[ll - 4, 1] != '0'
42
+ lccn = lccn[0, ll - 6] + "-" + lccn[ll - 4, 99]
43
+ elsif lccn[ll - 3, 1] != '0'
44
+ lccn = lccn[0, ll - 6] + "-" + lccn[ll - 3, 99]
45
+ end
46
+ lccn
47
+ end
48
+
49
+ #Returns a MARC::Record representation of the associated LC authority
50
+ #record.
51
+
52
+ def get_lc_auth
53
+ uri = URI.escape("http://errol.oclc.org/laf/#{self.lccn}.MarcXML")
54
+ src = ''
55
+ begin
56
+ Timeout.timeout 20 do
57
+ src = Net::HTTP.get(URI.parse("#{uri}"))
58
+ end
59
+ record = MARC::XMLReader.new(StringIO.new(string=src)).to_a[0]
60
+ end
61
+ rescue Timeout::Error
62
+ puts "The auth lookup timed out!"
63
+ raise
64
+ rescue MARC::Exception => e
65
+ puts "MARC exception: #{e}"
66
+ raise
67
+ rescue REXML::ParseException
68
+ puts "rexml error"
69
+ raise
70
+ rescue EOFError
71
+ puts "The service seems to be down!"
72
+ raise "eof"
73
+ rescue Exception => e
74
+ puts "some other type of exception"
75
+ puts e
76
+ raise
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,69 @@
1
+ require 'marc'
2
+
3
+ module MARC
4
+ class DataField
5
+
6
+ #accept a MARC datafield and create a pnkey for query
7
+ #really just a bunch of regexps to tur
8
+ #This should only really work for exact searches if the datafield passed
9
+ #to the method had proper authority work done on it.
10
+
11
+ def pnkey_from_marc
12
+ name = self.name_from_datafield
13
+ dates = self.dates_from_datafield
14
+ qualifier = self.qualifier_from_datafield
15
+
16
+ query = "\"#{name}"
17
+ query += "$" + qualifier if qualifier
18
+ query += dates if dates
19
+ query += "\""
20
+ #puts query
21
+ query
22
+ end
23
+
24
+ #Should the following be private?
25
+ def name_from_datafield
26
+ name = self['a'].downcase
27
+ name.gsub!('.','')
28
+ name.gsub!(/,$/,'')
29
+ name.gsub!('-',' ')
30
+ name.gsub!('\'','')
31
+ name
32
+ end
33
+ def dates_from_datafield
34
+ dates = self['d'] if self['d']
35
+ dates.gsub!('-', ' ') if dates
36
+ dates.gsub!('.', '') if dates
37
+ dates = "$" + dates if dates
38
+ dates
39
+ end
40
+ def qualifier_from_datafield
41
+ qualifier = self['q'] if self['q']
42
+ qualifier.gsub!('-', ' ') if qualifier
43
+ qualifier.gsub!(',','') if qualifier
44
+ qualifier.gsub!('(','') if qualifier
45
+ qualifier.gsub!(')','') if qualifier
46
+ qualifier.downcase! if qualifier
47
+ end
48
+ end
49
+
50
+ class Record
51
+
52
+ #accept a MARC record and return an array of name fields to be used in queries
53
+
54
+ def name_fields
55
+ f700 = self.find_all {|f| f.tag == '700'}
56
+ name_100 = self['100'] if self['100']
57
+ name_110 = self['110'] if self['110']
58
+ names =[]
59
+ names << name_100 unless name_100.nil?
60
+ names << name_110 unless name_110.nil?
61
+ names << f700 unless f700.empty?
62
+ names.compact!
63
+ names.flatten!
64
+ #puts names.inspect
65
+ names
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,78 @@
1
+ module WCID
2
+
3
+ #WCID::Search objects are for creating and running queries
4
+ #against WorldCat Identities.
5
+
6
+ class Search
7
+ attr_reader :query
8
+
9
+ #pass it a string
10
+ # name_search = WCID::Search.new('Twain, Mark')
11
+
12
+ def initialize query
13
+ @query = query
14
+ end
15
+
16
+
17
+ #Actually performs the search returns array of Hit objects
18
+ # hits = name_search.search
19
+ # or
20
+ # hits = (WCID::Search.new('Twain, Mark')).search
21
+
22
+ def search
23
+ uri = URI.escape("http://orlabs.oclc.org/Identities/find?fullName=#{@query}")
24
+ doc = do_search uri
25
+ m = doc.elements.to_a("nameAuthorities/match")
26
+ hits = []
27
+ m.each do | m |
28
+ hit_object = WCID::Hit.new(m)
29
+ hits << hit_object
30
+ end
31
+ hits
32
+ end
33
+
34
+ #returns single ID object (the first hit) or nil
35
+ # hit = name_search.search_exact
36
+ # or
37
+ # hit = (WCID::Search.new('Twain, Mark')).search_exact
38
+
39
+ def search_exact
40
+ uri = URI.escape("http://orlabs.oclc.org/SRW/search/Identities?query=local.pnkey+exact+#{@query}")
41
+ #puts uri
42
+ begin
43
+ doc = do_search uri
44
+ #puts doc
45
+ if doc.elements['searchRetrieveResponse/numberOfRecords'].text == '0'
46
+ #puts "no record"
47
+ return nil
48
+ else
49
+ #puts 'a record here'
50
+ record = WCID::ID.new(doc)
51
+ end
52
+ rescue
53
+ #puts 'um no text for node?'
54
+ raise
55
+ end
56
+ end
57
+
58
+ #This should maybe be private or somesuch?
59
+ def do_search uri
60
+ begin
61
+ file = ''
62
+ Timeout.timeout 20 do
63
+ file = Net::HTTP.get(URI.parse("#{uri}"))
64
+ #puts file
65
+ end
66
+ doc = REXML::Document.new file
67
+ rescue Timeout::Error
68
+ puts "timeout error!"
69
+ raise
70
+ rescue Errno::ECONNRESET
71
+ puts "errno!"
72
+ raise
73
+ end
74
+ end
75
+ end
76
+
77
+
78
+ end
@@ -0,0 +1,9 @@
1
+ module Wcid #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ TINY = 1
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'redcloth'
5
+ require 'syntax/convertors/html'
6
+ require 'erb'
7
+ require File.dirname(__FILE__) + '/../lib/wcid/version.rb'
8
+
9
+ version = Wcid::VERSION::STRING
10
+ download = 'http://rubyforge.org/projects/wcid'
11
+
12
+ class Fixnum
13
+ def ordinal
14
+ # teens
15
+ return 'th' if (10..19).include?(self % 100)
16
+ # others
17
+ case self % 10
18
+ when 1: return 'st'
19
+ when 2: return 'nd'
20
+ when 3: return 'rd'
21
+ else return 'th'
22
+ end
23
+ end
24
+ end
25
+
26
+ class Time
27
+ def pretty
28
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
29
+ end
30
+ end
31
+
32
+ def convert_syntax(syntax, source)
33
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
34
+ end
35
+
36
+ if ARGV.length >= 1
37
+ src, template = ARGV
38
+ template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
39
+
40
+ else
41
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
42
+ exit!
43
+ end
44
+
45
+ template = ERB.new(File.open(template).read)
46
+
47
+ title = nil
48
+ body = nil
49
+ File.open(src) do |fsrc|
50
+ title_text = fsrc.readline
51
+ body_text = fsrc.read
52
+ syntax_items = []
53
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
54
+ ident = syntax_items.length
55
+ element, syntax, source = $1, $2, $3
56
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
57
+ "syntax-temp-#{ident}"
58
+ }
59
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
60
+ body = RedCloth.new(body_text).to_html
61
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
62
+ end
63
+ stat = File.stat(src)
64
+ created = stat.ctime
65
+ modified = stat.mtime
66
+
67
+ $stdout << template.result(binding)