rubyBHL 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +7 -0
- data/README.rdoc +16 -0
- data/Rakefile +18 -0
- data/init.rb +1 -0
- data/lib/rubyBHL.rb +116 -0
- data/rubyBHL.gemspec +31 -0
- data/test/helper.rb +3 -0
- data/test/rubyBHL_test.rb +36 -0
- metadata +68 -0
data/Manifest
ADDED
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# follows Bate's screencast 135
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
require 'echoe'
|
6
|
+
|
7
|
+
Echoe.new('rubyBHL', '0.1.0') do |p|
|
8
|
+
p.description = "Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR."
|
9
|
+
p.url = "http://github.com/mjy/rubyBHL"
|
10
|
+
p.author = "Yoder & Seltmann"
|
11
|
+
p.email = "diapriid@gmail.com"
|
12
|
+
p.ignore_pattern = ["tmp/*", "script/*"]
|
13
|
+
p.development_dependencies = []
|
14
|
+
end
|
15
|
+
|
16
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each {|ext| load ext}
|
17
|
+
|
18
|
+
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
#require 'ruby_BHL'
|
data/lib/rubyBHL.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
module RubyBHL
|
2
|
+
|
3
|
+
# Some quick hacks, only configured for json right now.
|
4
|
+
|
5
|
+
require 'net/http'
|
6
|
+
# require 'json'
|
7
|
+
require 'json/add/rails'
|
8
|
+
|
9
|
+
class RbhlError < StandardError
|
10
|
+
end
|
11
|
+
|
12
|
+
class Rbhl
|
13
|
+
|
14
|
+
# some notes
|
15
|
+
#http://www.biodiversitylibrary.org/openurl?url_ver=Z39.88-2004&ctx_ver=Z39.88-2004
|
16
|
+
#rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook
|
17
|
+
#rft.btitle
|
18
|
+
#rft.jtitle
|
19
|
+
#rft.au
|
20
|
+
#rft.aufirst
|
21
|
+
#rft.aulast
|
22
|
+
#rft.publisher
|
23
|
+
#rft.pub
|
24
|
+
#rft.place
|
25
|
+
#rft.date
|
26
|
+
#rft.issn
|
27
|
+
#rft.isbn
|
28
|
+
# rft.coden
|
29
|
+
# rft.stitle
|
30
|
+
# rft.volume
|
31
|
+
# rft.issue
|
32
|
+
# rft.spage
|
33
|
+
# rft_id=info:oclcnum/XXXX
|
34
|
+
# rft_id=info:lccn/XXXX
|
35
|
+
# rft_id=http://www.biodiversitylibrary.org/bibliography/XXXX
|
36
|
+
# rft_id=http://www.biodiversitylibrary.org/page/XXXX
|
37
|
+
|
38
|
+
# "constants" (defaults really)
|
39
|
+
SEARCH_URL = 'http://www.biodiversitylibrary.org/openurl?'
|
40
|
+
FORMAT = 'json'
|
41
|
+
# METHOD = '' # openURL0.1, openURL1.0
|
42
|
+
|
43
|
+
PARAMETERS = [
|
44
|
+
:title,
|
45
|
+
:au,
|
46
|
+
:aufirst,
|
47
|
+
:aulast,
|
48
|
+
:publisher,
|
49
|
+
:date,
|
50
|
+
:issn,
|
51
|
+
:isbn,
|
52
|
+
:coden,
|
53
|
+
:stitle,
|
54
|
+
:volume,
|
55
|
+
:issue,
|
56
|
+
:spage]
|
57
|
+
|
58
|
+
# created from response
|
59
|
+
attr_reader(:json_data)
|
60
|
+
attr_reader(:citations)
|
61
|
+
attr :search_url
|
62
|
+
|
63
|
+
def initialize(options = {})
|
64
|
+
@opt = {
|
65
|
+
# :method => METHOD,
|
66
|
+
:format => FORMAT,
|
67
|
+
:params => {}
|
68
|
+
}.merge!(options)
|
69
|
+
|
70
|
+
# check for legal parameters
|
71
|
+
@opt[:params].keys.each do |p|
|
72
|
+
raise RbhlError, "#{p} is not a valid parameter" if !PARAMETERS.include?(p)
|
73
|
+
end
|
74
|
+
|
75
|
+
@json_data = {}
|
76
|
+
|
77
|
+
@search_url = SEARCH_URL +
|
78
|
+
@opt[:params].keys.sort{|a,b| a.to_s <=> b.to_s}.collect{|k| "#{k}=#{@opt[:params][k].gsub(/\s/, "+")}"}.join("&") +
|
79
|
+
'&format=' + @opt[:format]
|
80
|
+
|
81
|
+
@json_data = JSON.parse(Net::HTTP.get_response(URI.parse(@search_url)).body)
|
82
|
+
@citations = @json_data['citations'] if @json_data['Status'] == 1 # a simpler reference
|
83
|
+
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
# this works on a redirect
|
88
|
+
def OCR_text(citation_index)
|
89
|
+
return nil if !citation_index
|
90
|
+
fetch(OCR_url(citation_index))
|
91
|
+
end
|
92
|
+
|
93
|
+
# from the ruby doc
|
94
|
+
def fetch(uri_string, limit = 10)
|
95
|
+
return nil if !uri_string
|
96
|
+
limit = 10 # Justin Case we get in some redirect loop
|
97
|
+
raise RbhlError, 'HTTP redirect too deep' if limit == 0 # should tweak
|
98
|
+
response = Net::HTTP.get_response(URI.parse(uri_string))
|
99
|
+
case response
|
100
|
+
when Net::HTTPSuccess then response.body
|
101
|
+
when Net::HTTPRedirection then fetch(response['location'], limit - 1)
|
102
|
+
else
|
103
|
+
response.error!
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Since the API doesn't return a link to the OCR we screen scrape it the URL
|
108
|
+
def OCR_url(citation_index)
|
109
|
+
Net::HTTP.get_response(URI.parse(@citations[citation_index]["Url"])).body =~ /http:\/\/.*?\.txt/
|
110
|
+
return $& # the matched results
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
end
|
data/rubyBHL.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{rubyBHL}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Yoder & Seltmann"]
|
9
|
+
s.date = %q{2009-10-19}
|
10
|
+
s.description = %q{Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.}
|
11
|
+
s.email = %q{diapriid@gmail.com}
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/rubyBHL.rb"]
|
13
|
+
s.files = ["Manifest", "README.rdoc", "Rakefile", "init.rb", "lib/rubyBHL.rb", "test/helper.rb", "test/rubyBHL_test.rb", "rubyBHL.gemspec"]
|
14
|
+
s.homepage = %q{http://github.com/mjy/rubyBHL}
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "rubyBHL", "--main", "README.rdoc"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = %q{rubybhl}
|
18
|
+
s.rubygems_version = %q{1.3.5}
|
19
|
+
s.summary = %q{Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.}
|
20
|
+
s.test_files = ["test/rubyBHL_test.rb"]
|
21
|
+
|
22
|
+
if s.respond_to? :specification_version then
|
23
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
24
|
+
s.specification_version = 3
|
25
|
+
|
26
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
27
|
+
else
|
28
|
+
end
|
29
|
+
else
|
30
|
+
end
|
31
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/rubyBHL'))
|
3
|
+
|
4
|
+
class RubyBHLTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_foo_is_not_a_legal_param
|
10
|
+
assert_raise(RubyBHL::RbhlError) {RubyBHL::Rbhl.new(:params => {:foo => "BAR!"}) }
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_search_url_for_default_url_is_properly_formed
|
14
|
+
@bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR!", :volume => "2"})
|
15
|
+
assert_equal "http://www.biodiversitylibrary.org/openurl?title=BAR!&volume=2&format=json", @bhl.search_url
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_search_url_convert_space_to_plus
|
19
|
+
@bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR FOO"})
|
20
|
+
assert_equal "http://www.biodiversitylibrary.org/openurl?title=BAR+FOO&format=json", @bhl.search_url
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_basic_response_is_returned
|
24
|
+
# this is a crummy test, we need a reference to the status codes.
|
25
|
+
@bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR FOO"}) # should match nothing, I hope
|
26
|
+
assert @bhl.json_data["Status"] == 1
|
27
|
+
assert @bhl.json_data["citations"].size == 0
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_response_with_citations_is_returned
|
31
|
+
# this is a crummy test, we need a reference to the status codes.
|
32
|
+
@bhl = RubyBHL::Rbhl.new(:params => {:title => "Manual of North American Diptera"}) # should match nothing, I hope
|
33
|
+
assert @bhl.json_data["citations"].size > 0
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rubyBHL
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yoder & Seltmann
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-19 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.
|
17
|
+
email: diapriid@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.rdoc
|
24
|
+
- lib/rubyBHL.rb
|
25
|
+
files:
|
26
|
+
- Manifest
|
27
|
+
- README.rdoc
|
28
|
+
- Rakefile
|
29
|
+
- init.rb
|
30
|
+
- lib/rubyBHL.rb
|
31
|
+
- test/helper.rb
|
32
|
+
- test/rubyBHL_test.rb
|
33
|
+
- rubyBHL.gemspec
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://github.com/mjy/rubyBHL
|
36
|
+
licenses: []
|
37
|
+
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options:
|
40
|
+
- --line-numbers
|
41
|
+
- --inline-source
|
42
|
+
- --title
|
43
|
+
- rubyBHL
|
44
|
+
- --main
|
45
|
+
- README.rdoc
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: "0"
|
53
|
+
version:
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: "1.2"
|
59
|
+
version:
|
60
|
+
requirements: []
|
61
|
+
|
62
|
+
rubyforge_project: rubybhl
|
63
|
+
rubygems_version: 1.3.5
|
64
|
+
signing_key:
|
65
|
+
specification_version: 3
|
66
|
+
summary: Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.
|
67
|
+
test_files:
|
68
|
+
- test/rubyBHL_test.rb
|