rubyBHL 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ Manifest
2
+ README.rdoc
3
+ Rakefile
4
+ init.rb
5
+ lib/rubyBHL.rb
6
+ test/helper.rb
7
+ test/rubyBHL_test.rb
@@ -0,0 +1,16 @@
1
+ = RubyBHL
2
+
3
+ A very simple Ruby hook to the BHL.
4
+
5
+ == Install
6
+
7
+ gem install mjy-RubyBHL --source http://gems.github.com
8
+
9
+ == Usage
10
+
11
+ See tests.
12
+
13
+ == Todo
14
+
15
+ Lots.
16
+
@@ -0,0 +1,18 @@
1
+ # follows Bate's screencast 135
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'echoe'
6
+
7
+ Echoe.new('rubyBHL', '0.1.0') do |p|
8
+ p.description = "Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR."
9
+ p.url = "http://github.com/mjy/rubyBHL"
10
+ p.author = "Yoder & Seltmann"
11
+ p.email = "diapriid@gmail.com"
12
+ p.ignore_pattern = ["tmp/*", "script/*"]
13
+ p.development_dependencies = []
14
+ end
15
+
16
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each {|ext| load ext}
17
+
18
+
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ #require 'ruby_BHL'
@@ -0,0 +1,116 @@
1
+ module RubyBHL
2
+
3
+ # Some quick hacks, only configured for json right now.
4
+
5
+ require 'net/http'
6
+ # require 'json'
7
+ require 'json/add/rails'
8
+
9
+ class RbhlError < StandardError
10
+ end
11
+
12
+ class Rbhl
13
+
14
+ # some notes
15
+ #http://www.biodiversitylibrary.org/openurl?url_ver=Z39.88-2004&ctx_ver=Z39.88-2004
16
+ #rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook
17
+ #rft.btitle
18
+ #rft.jtitle
19
+ #rft.au
20
+ #rft.aufirst
21
+ #rft.aulast
22
+ #rft.publisher
23
+ #rft.pub
24
+ #rft.place
25
+ #rft.date
26
+ #rft.issn
27
+ #rft.isbn
28
+ # rft.coden
29
+ # rft.stitle
30
+ # rft.volume
31
+ # rft.issue
32
+ # rft.spage
33
+ # rft_id=info:oclcnum/XXXX
34
+ # rft_id=info:lccn/XXXX
35
+ # rft_id=http://www.biodiversitylibrary.org/bibliography/XXXX
36
+ # rft_id=http://www.biodiversitylibrary.org/page/XXXX
37
+
38
+ # "constants" (defaults really)
39
+ SEARCH_URL = 'http://www.biodiversitylibrary.org/openurl?'
40
+ FORMAT = 'json'
41
+ # METHOD = '' # openURL0.1, openURL1.0
42
+
43
+ PARAMETERS = [
44
+ :title,
45
+ :au,
46
+ :aufirst,
47
+ :aulast,
48
+ :publisher,
49
+ :date,
50
+ :issn,
51
+ :isbn,
52
+ :coden,
53
+ :stitle,
54
+ :volume,
55
+ :issue,
56
+ :spage]
57
+
58
+ # created from response
59
+ attr_reader(:json_data)
60
+ attr_reader(:citations)
61
+ attr :search_url
62
+
63
+ def initialize(options = {})
64
+ @opt = {
65
+ # :method => METHOD,
66
+ :format => FORMAT,
67
+ :params => {}
68
+ }.merge!(options)
69
+
70
+ # check for legal parameters
71
+ @opt[:params].keys.each do |p|
72
+ raise RbhlError, "#{p} is not a valid parameter" if !PARAMETERS.include?(p)
73
+ end
74
+
75
+ @json_data = {}
76
+
77
+ @search_url = SEARCH_URL +
78
+ @opt[:params].keys.sort{|a,b| a.to_s <=> b.to_s}.collect{|k| "#{k}=#{@opt[:params][k].gsub(/\s/, "+")}"}.join("&") +
79
+ '&format=' + @opt[:format]
80
+
81
+ @json_data = JSON.parse(Net::HTTP.get_response(URI.parse(@search_url)).body)
82
+ @citations = @json_data['citations'] if @json_data['Status'] == 1 # a simpler reference
83
+
84
+ true
85
+ end
86
+
87
+ # this works on a redirect
88
+ def OCR_text(citation_index)
89
+ return nil if !citation_index
90
+ fetch(OCR_url(citation_index))
91
+ end
92
+
93
+ # from the ruby doc
94
+ def fetch(uri_string, limit = 10)
95
+ return nil if !uri_string
96
+ limit = 10 # Justin Case we get in some redirect loop
97
+ raise RbhlError, 'HTTP redirect too deep' if limit == 0 # should tweak
98
+ response = Net::HTTP.get_response(URI.parse(uri_string))
99
+ case response
100
+ when Net::HTTPSuccess then response.body
101
+ when Net::HTTPRedirection then fetch(response['location'], limit - 1)
102
+ else
103
+ response.error!
104
+ end
105
+ end
106
+
107
+ # Since the API doesn't return a link to the OCR we screen scrape it the URL
108
+ def OCR_url(citation_index)
109
+ Net::HTTP.get_response(URI.parse(@citations[citation_index]["Url"])).body =~ /http:\/\/.*?\.txt/
110
+ return $& # the matched results
111
+ end
112
+
113
+ end
114
+
115
+
116
+ end
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{rubyBHL}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Yoder & Seltmann"]
9
+ s.date = %q{2009-10-19}
10
+ s.description = %q{Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.}
11
+ s.email = %q{diapriid@gmail.com}
12
+ s.extra_rdoc_files = ["README.rdoc", "lib/rubyBHL.rb"]
13
+ s.files = ["Manifest", "README.rdoc", "Rakefile", "init.rb", "lib/rubyBHL.rb", "test/helper.rb", "test/rubyBHL_test.rb", "rubyBHL.gemspec"]
14
+ s.homepage = %q{http://github.com/mjy/rubyBHL}
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "rubyBHL", "--main", "README.rdoc"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{rubybhl}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.}
20
+ s.test_files = ["test/rubyBHL_test.rb"]
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ require 'test/unit'
2
+ require 'rubygems'
3
+ require 'ruby-debug'
@@ -0,0 +1,36 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
2
+ require File.expand_path(File.join(File.dirname(__FILE__), '../lib/rubyBHL'))
3
+
4
+ class RubyBHLTest < Test::Unit::TestCase
5
+
6
+ def setup
7
+ end
8
+
9
+ def test_foo_is_not_a_legal_param
10
+ assert_raise(RubyBHL::RbhlError) {RubyBHL::Rbhl.new(:params => {:foo => "BAR!"}) }
11
+ end
12
+
13
+ def test_search_url_for_default_url_is_properly_formed
14
+ @bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR!", :volume => "2"})
15
+ assert_equal "http://www.biodiversitylibrary.org/openurl?title=BAR!&volume=2&format=json", @bhl.search_url
16
+ end
17
+
18
+ def test_search_url_convert_space_to_plus
19
+ @bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR FOO"})
20
+ assert_equal "http://www.biodiversitylibrary.org/openurl?title=BAR+FOO&format=json", @bhl.search_url
21
+ end
22
+
23
+ def test_basic_response_is_returned
24
+ # this is a crummy test, we need a reference to the status codes.
25
+ @bhl = RubyBHL::Rbhl.new(:params => {:title => "BAR FOO"}) # should match nothing, I hope
26
+ assert @bhl.json_data["Status"] == 1
27
+ assert @bhl.json_data["citations"].size == 0
28
+ end
29
+
30
+ def test_response_with_citations_is_returned
31
+ # this is a crummy test, we need a reference to the status codes.
32
+ @bhl = RubyBHL::Rbhl.new(:params => {:title => "Manual of North American Diptera"}) # should match nothing, I hope
33
+ assert @bhl.json_data["citations"].size > 0
34
+ end
35
+
36
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubyBHL
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Yoder & Seltmann
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-19 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.
17
+ email: diapriid@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.rdoc
24
+ - lib/rubyBHL.rb
25
+ files:
26
+ - Manifest
27
+ - README.rdoc
28
+ - Rakefile
29
+ - init.rb
30
+ - lib/rubyBHL.rb
31
+ - test/helper.rb
32
+ - test/rubyBHL_test.rb
33
+ - rubyBHL.gemspec
34
+ has_rdoc: true
35
+ homepage: http://github.com/mjy/rubyBHL
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options:
40
+ - --line-numbers
41
+ - --inline-source
42
+ - --title
43
+ - rubyBHL
44
+ - --main
45
+ - README.rdoc
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "1.2"
59
+ version:
60
+ requirements: []
61
+
62
+ rubyforge_project: rubybhl
63
+ rubygems_version: 1.3.5
64
+ signing_key:
65
+ specification_version: 3
66
+ summary: Hook to the Biodiversity Heritage Library API plus some screen scraping for OCR.
67
+ test_files:
68
+ - test/rubyBHL_test.rb