rubyBHL 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ Manifest
2
+ README.rdoc
3
+ Rakefile
4
+ init.rb
5
+ lib/rubyBHL.rb
6
+ test/helper.rb
7
+ test/rubyBHL_test.rb
@@ -0,0 +1,16 @@
1
+ = RubyBHL
2
+
3
+ A very simple Ruby hook to the BHL.
4
+
5
+ == Install
6
+
7
+ gem install mjy-RubyBHL --source http://gems.github.com
8
+
9
+ == Usage
10
+
11
+ See tests.
12
+
13
+ == Todo
14
+
15
+ Lots.
16
+
@@ -0,0 +1,18 @@
1
+ # follows Bate's screencast 135
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'echoe'
6
+
7
+ Echoe.new('rubyBHL', '0.1.0') do |p|
8
+ p.description = "Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR."
9
+ p.url = "http://github.com/mjy/rubyBHL"
10
+ p.author = "Yoder & Seltmann"
11
+ p.email = "diapriid@gmail.com"
12
+ p.ignore_pattern = ["tmp/*", "script/*"]
13
+ p.development_dependencies = []
14
+ end
15
+
16
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each {|ext| load ext}
17
+
18
+
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ #require 'ruby_BHL'
@@ -0,0 +1,116 @@
1
+ module RubyBHL
2
+
3
+ # Some quick hacks, only configured for json right now.
4
+
5
+ require 'net/http'
6
+ # require 'json'
7
+ require 'json/add/rails'
8
+
9
+ class RbhlError < StandardError
10
+ end
11
+
12
+ class Rbhl
13
+
14
+ # some notes
15
+ #http://www.biodiversitylibrary.org/openurl?url_ver=Z39.88-2004&ctx_ver=Z39.88-2004
16
+ #rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook
17
+ #rft.btitle
18
+ #rft.jtitle
19
+ #rft.au
20
+ #rft.aufirst
21
+ #rft.aulast
22
+ #rft.publisher
23
+ #rft.pub
24
+ #rft.place
25
+ #rft.date
26
+ #rft.issn
27
+ #rft.isbn
28
+ # rft.coden
29
+ # rft.stitle
30
+ # rft.volume
31
+ # rft.issue
32
+ # rft.spage
33
+ # rft_id=info:oclcnum/XXXX
34
+ # rft_id=info:lccn/XXXX
35
+ # rft_id=http://www.biodiversitylibrary.org/bibliography/XXXX
36
+ # rft_id=http://www.biodiversitylibrary.org/page/XXXX
37
+
38
+ # "constants" (defaults really)
39
+ SEARCH_URL = 'http://www.biodiversitylibrary.org/openurl?'
40
+ FORMAT = 'json'
41
+ # METHOD = '' # openURL0.1, openURL1.0
42
+
43
+ PARAMETERS = [
44
+ :title,
45
+ :au,
46
+ :aufirst,
47
+ :aulast,
48
+ :publisher,
49
+ :date,
50
+ :issn,
51
+ :isbn,
52
+ :coden,
53
+ :stitle,
54
+ :volume,
55
+ :issue,
56
+ :spage]
57
+
58
+ # created from response
59
+ attr_reader(:json_data)
60
+ attr_reader(:citations)
61
+ attr :search_url
62
+
63
+ def initialize(options = {})
64
+ @opt = {
65
+ # :method => METHOD,
66
+ :format => FORMAT,
67
+ :params => {}
68
+ }.merge!(options)
69
+
70
+ # check for legal parameters
71
+ @opt[:params].keys.each do |p|
72
+ raise RbhlError, "#{p} is not a valid parameter" if !PARAMETERS.include?(p)
73
+ end
74
+
75
+ @json_data = {}
76
+
77
+ @search_url = SEARCH_URL +
78
+ @opt[:params].keys.sort{|a,b| a.to_s <=> b.to_s}.collect{|k| "#{k}=#{@opt[:params][k].gsub(/\s/, "+")}"}.join("&") +
79
+ '&format=' + @opt[:format]
80
+
81
+ @json_data = JSON.parse(Net::HTTP.get_response(URI.parse(@search_url)).body)
82
+ @citations = @json_data['citations'] if @json_data['Status'] == 1 # a simpler reference
83
+
84
+ true
85
+ end
86
+
87
+ # this works on a redirect
88
+ def OCR_text(citation_index)
89
+ return nil if !citation_index
90
+ fetch(OCR_url(citation_index))
91
+ end
92
+
93
+ # from the ruby doc
94
+ def fetch(uri_string, limit = 10)
95
+ return nil if !uri_string
96
+ limit = 10 # Justin Case we get in some redirect loop
97
+ raise RbhlError, 'HTTP redirect too deep' if limit == 0 # should tweak
98
+ response = Net::HTTP.get_response(URI.parse(uri_string))
99
+ case response
100
+ when Net::HTTPSuccess then response.body
101
+ when Net::HTTPRedirection then fetch(response['location'], limit - 1)
102
+ else
103
+ response.error!
104
+ end
105
+ end
106
+
107
+ # Since the API doesn't return a link to the OCR we screen scrape it the URL
108
+ def OCR_url(citation_index)
109
+ Net::HTTP.get_response(URI.parse(@citations[citation_index]["Url"])).body =~ /http:\/\/.*?\.txt/
110
+ return $& # the matched results
111
+ end
112
+
113
+ end
114
+
115
+
116
+ end
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{rubyBHL}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Yoder & Seltmann"]
9
+ s.date = %q{2009-10-19}
10
+ s.description = %q{Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.}
11
+ s.email = %q{diapriid@gmail.com}
12
+ s.extra_rdoc_files = ["README.rdoc", "lib/rubyBHL.rb"]
13
+ s.files = ["Manifest", "README.rdoc", "Rakefile", "init.rb", "lib/rubyBHL.rb", "test/helper.rb", "test/rubyBHL_test.rb", "rubyBHL.gemspec"]
14
+ s.homepage = %q{http://github.com/mjy/rubyBHL}
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "rubyBHL", "--main", "README.rdoc"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{rubybhl}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.}
20
+ s.test_files = ["test/rubyBHL_test.rb"]
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'ruby-debug'
@@ -0,0 +1,36 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
2
+ require File.expand_path(File.join(File.dirname(__FILE__), '../lib/rubyBHL'))
3
+
4
+ class RubyBHLTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ end
8
+
9
+ def test_foo_is_not_a_legal_param
10
+ assert_raise(RubyBHL::RbhlError) {RubyBHL::Rbhl.new(:params => {:foo => "BAR!"}) }
11
+ end
12
+
13
+ def test_search_url_for_default_url_is_properly_formed
14
+ @bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR!", :volume => "2"})
15
+ assert_equal "http://www.biodiversitylibrary.org/openurl?title=BAR!&volume=2&format=json", @bhl.search_url
16
+ end
17
+
18
+ def test_search_url_convert_space_to_plus
19
+ @bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR FOO"})
20
+ assert_equal "http://www.biodiversitylibrary.org/openurl?title=BAR+FOO&format=json", @bhl.search_url
21
+ end
22
+
23
+ def test_basic_response_is_returned
24
+ # this is a crummy test, we need a reference to the status codes.
25
+ @bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR FOO"}) # should match nothing, I hope
26
+ assert @bhl.json_data["Status"] == 1
27
+ assert @bhl.json_data["citations"].size == 0
28
+ end
29
+
30
+ def test_response_with_citations_is_returned
31
+ # this is a crummy test, we need a reference to the status codes.
32
+ @bhl = RubyBHL::Rbhl.new(:params => {:title => "Manual of North American Diptera"}) # should match nothing, I hope
33
+ assert @bhl.json_data["citations"].size > 0
34
+ end
35
+
36
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubyBHL
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Yoder & Seltmann
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-19 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.
17
+ email: diapriid@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.rdoc
24
+ - lib/rubyBHL.rb
25
+ files:
26
+ - Manifest
27
+ - README.rdoc
28
+ - Rakefile
29
+ - init.rb
30
+ - lib/rubyBHL.rb
31
+ - test/helper.rb
32
+ - test/rubyBHL_test.rb
33
+ - rubyBHL.gemspec
34
+ has_rdoc: true
35
+ homepage: http://github.com/mjy/rubyBHL
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options:
40
+ - --line-numbers
41
+ - --inline-source
42
+ - --title
43
+ - rubyBHL
44
+ - --main
45
+ - README.rdoc
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "1.2"
59
+ version:
60
+ requirements: []
61
+
62
+ rubyforge_project: rubybhl
63
+ rubygems_version: 1.3.5
64
+ signing_key:
65
+ specification_version: 3
66
+ summary: Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.
67
+ test_files:
68
+ - test/rubyBHL_test.rb