rubyBHL 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest +7 -0
- data/README.rdoc +16 -0
- data/Rakefile +18 -0
- data/init.rb +1 -0
- data/lib/rubyBHL.rb +116 -0
- data/rubyBHL.gemspec +31 -0
- data/test/helper.rb +3 -0
- data/test/rubyBHL_test.rb +36 -0
- metadata +68 -0
data/Manifest
ADDED
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# follows Bate's screencast 135
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
require 'echoe'
|
6
|
+
|
7
|
+
Echoe.new('rubyBHL', '0.1.0') do |p|
|
8
|
+
p.description = "Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR."
|
9
|
+
p.url = "http://github.com/mjy/rubyBHL"
|
10
|
+
p.author = "Yoder & Seltmann"
|
11
|
+
p.email = "diapriid@gmail.com"
|
12
|
+
p.ignore_pattern = ["tmp/*", "script/*"]
|
13
|
+
p.development_dependencies = []
|
14
|
+
end
|
15
|
+
|
16
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each {|ext| load ext}
|
17
|
+
|
18
|
+
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
#require 'ruby_BHL'
|
data/lib/rubyBHL.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
module RubyBHL
|
2
|
+
|
3
|
+
# Some quick hacks, only configured for json right now.
|
4
|
+
|
5
|
+
require 'net/http'
|
6
|
+
# require 'json'
|
7
|
+
require 'json/add/rails'
|
8
|
+
|
9
|
+
class RbhlError < StandardError
|
10
|
+
end
|
11
|
+
|
12
|
+
class Rbhl
|
13
|
+
|
14
|
+
# some notes
|
15
|
+
#http://www.biodiversitylibrary.org/openurl?url_ver=Z39.88-2004&ctx_ver=Z39.88-2004
|
16
|
+
#rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook
|
17
|
+
#rft.btitle
|
18
|
+
#rft.jtitle
|
19
|
+
#rft.au
|
20
|
+
#rft.aufirst
|
21
|
+
#rft.aulast
|
22
|
+
#rft.publisher
|
23
|
+
#rft.pub
|
24
|
+
#rft.place
|
25
|
+
#rft.date
|
26
|
+
#rft.issn
|
27
|
+
#rft.isbn
|
28
|
+
# rft.coden
|
29
|
+
# rft.stitle
|
30
|
+
# rft.volume
|
31
|
+
# rft.issue
|
32
|
+
# rft.spage
|
33
|
+
# rft_id=info:oclcnum/XXXX
|
34
|
+
# rft_id=info:lccn/XXXX
|
35
|
+
# rft_id=http://www.biodiversitylibrary.org/bibliography/XXXX
|
36
|
+
# rft_id=http://www.biodiversitylibrary.org/page/XXXX
|
37
|
+
|
38
|
+
# "constants" (defaults really)
|
39
|
+
SEARCH_URL = 'http://www.biodiversitylibrary.org/openurl?'
|
40
|
+
FORMAT = 'json'
|
41
|
+
# METHOD = '' # openURL0.1, openURL1.0
|
42
|
+
|
43
|
+
PARAMETERS = [
|
44
|
+
:title,
|
45
|
+
:au,
|
46
|
+
:aufirst,
|
47
|
+
:aulast,
|
48
|
+
:publisher,
|
49
|
+
:date,
|
50
|
+
:issn,
|
51
|
+
:isbn,
|
52
|
+
:coden,
|
53
|
+
:stitle,
|
54
|
+
:volume,
|
55
|
+
:issue,
|
56
|
+
:spage]
|
57
|
+
|
58
|
+
# created from response
|
59
|
+
attr_reader(:json_data)
|
60
|
+
attr_reader(:citations)
|
61
|
+
attr :search_url
|
62
|
+
|
63
|
+
def initialize(options = {})
|
64
|
+
@opt = {
|
65
|
+
# :method => METHOD,
|
66
|
+
:format => FORMAT,
|
67
|
+
:params => {}
|
68
|
+
}.merge!(options)
|
69
|
+
|
70
|
+
# check for legal parameters
|
71
|
+
@opt[:params].keys.each do |p|
|
72
|
+
raise RbhlError, "#{p} is not a valid parameter" if !PARAMETERS.include?(p)
|
73
|
+
end
|
74
|
+
|
75
|
+
@json_data = {}
|
76
|
+
|
77
|
+
@search_url = SEARCH_URL +
|
78
|
+
@opt[:params].keys.sort{|a,b| a.to_s <=> b.to_s}.collect{|k| "#{k}=#{@opt[:params][k].gsub(/\s/, "+")}"}.join("&") +
|
79
|
+
'&format=' + @opt[:format]
|
80
|
+
|
81
|
+
@json_data = JSON.parse(Net::HTTP.get_response(URI.parse(@search_url)).body)
|
82
|
+
@citations = @json_data['citations'] if @json_data['Status'] == 1 # a simpler reference
|
83
|
+
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
# this works on a redirect
|
88
|
+
def OCR_text(citation_index)
|
89
|
+
return nil if !citation_index
|
90
|
+
fetch(OCR_url(citation_index))
|
91
|
+
end
|
92
|
+
|
93
|
+
# from the ruby doc
|
94
|
+
def fetch(uri_string, limit = 10)
|
95
|
+
return nil if !uri_string
|
96
|
+
limit = 10 # Justin Case we get in some redirect loop
|
97
|
+
raise RbhlError, 'HTTP redirect too deep' if limit == 0 # should tweak
|
98
|
+
response = Net::HTTP.get_response(URI.parse(uri_string))
|
99
|
+
case response
|
100
|
+
when Net::HTTPSuccess then response.body
|
101
|
+
when Net::HTTPRedirection then fetch(response['location'], limit - 1)
|
102
|
+
else
|
103
|
+
response.error!
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Since the API doesn't return a link to the OCR we screen scrape it the URL
|
108
|
+
def OCR_url(citation_index)
|
109
|
+
Net::HTTP.get_response(URI.parse(@citations[citation_index]["Url"])).body =~ /http:\/\/.*?\.txt/
|
110
|
+
return $& # the matched results
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
end
|
data/rubyBHL.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{rubyBHL}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Yoder & Seltmann"]
|
9
|
+
s.date = %q{2009-10-19}
|
10
|
+
s.description = %q{Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.}
|
11
|
+
s.email = %q{diapriid@gmail.com}
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/rubyBHL.rb"]
|
13
|
+
s.files = ["Manifest", "README.rdoc", "Rakefile", "init.rb", "lib/rubyBHL.rb", "test/helper.rb", "test/rubyBHL_test.rb", "rubyBHL.gemspec"]
|
14
|
+
s.homepage = %q{http://github.com/mjy/rubyBHL}
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "rubyBHL", "--main", "README.rdoc"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = %q{rubybhl}
|
18
|
+
s.rubygems_version = %q{1.3.5}
|
19
|
+
s.summary = %q{Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.}
|
20
|
+
s.test_files = ["test/rubyBHL_test.rb"]
|
21
|
+
|
22
|
+
if s.respond_to? :specification_version then
|
23
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
24
|
+
s.specification_version = 3
|
25
|
+
|
26
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
27
|
+
else
|
28
|
+
end
|
29
|
+
else
|
30
|
+
end
|
31
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/rubyBHL'))
|
3
|
+
|
4
|
+
class RubyBHLTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_foo_is_not_a_legal_param
|
10
|
+
assert_raise(RubyBHL::RbhlError) {RubyBHL::Rbhl.new(:params => {:foo => "BAR!"}) }
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_search_url_for_default_url_is_properly_formed
|
14
|
+
@bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR!", :volume => "2"})
|
15
|
+
assert_equal "http://www.biodiversitylibrary.org/openurl?title=BAR!&volume=2&format=json", @bhl.search_url
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_search_url_convert_space_to_plus
|
19
|
+
@bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR FOO"})
|
20
|
+
assert_equal "http://www.biodiversitylibrary.org/openurl?title=BAR+FOO&format=json", @bhl.search_url
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_basic_response_is_returned
|
24
|
+
# this is a crummy test, we need a reference to the status codes.
|
25
|
+
@bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR FOO"}) # should match nothing, I hope
|
26
|
+
assert @bhl.json_data["Status"] == 1
|
27
|
+
assert @bhl.json_data["citations"].size == 0
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_response_with_citations_is_returned
|
31
|
+
# this is a crummy test, we need a reference to the status codes.
|
32
|
+
@bhl = RubyBHL::Rbhl.new(:params => {:title => "Manual of North American Diptera"}) # should match nothing, I hope
|
33
|
+
assert @bhl.json_data["citations"].size > 0
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rubyBHL
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yoder & Seltmann
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-19 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.
|
17
|
+
email: diapriid@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.rdoc
|
24
|
+
- lib/rubyBHL.rb
|
25
|
+
files:
|
26
|
+
- Manifest
|
27
|
+
- README.rdoc
|
28
|
+
- Rakefile
|
29
|
+
- init.rb
|
30
|
+
- lib/rubyBHL.rb
|
31
|
+
- test/helper.rb
|
32
|
+
- test/rubyBHL_test.rb
|
33
|
+
- rubyBHL.gemspec
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://github.com/mjy/rubyBHL
|
36
|
+
licenses: []
|
37
|
+
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options:
|
40
|
+
- --line-numbers
|
41
|
+
- --inline-source
|
42
|
+
- --title
|
43
|
+
- rubyBHL
|
44
|
+
- --main
|
45
|
+
- README.rdoc
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: "0"
|
53
|
+
version:
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: "1.2"
|
59
|
+
version:
|
60
|
+
requirements: []
|
61
|
+
|
62
|
+
rubyforge_project: rubybhl
|
63
|
+
rubygems_version: 1.3.5
|
64
|
+
signing_key:
|
65
|
+
specification_version: 3
|
66
|
+
summary: Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.
|
67
|
+
test_files:
|
68
|
+
- test/rubyBHL_test.rb
|