clio-search 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/clio.rb +74 -0
  2. metadata +56 -0
@@ -0,0 +1,74 @@
1
+ require 'rubygems'
2
+
3
+ require 'cgi'
4
+ require 'nokogiri'
5
+ require 'open-uri'
6
+
7
+
8
+ ClioResult = Struct.new :title, :subtitle, :authors, :published, :online
9
+
10
+ class Clio
11
+ # API for Columbia Library CLIO Beta search engine
12
+ #
13
+ # Example:
14
+ # >> Clio.new('[Referer]').search('nano fibers')
15
+ # => [ #<Result:...>, ... ]
16
+ #
17
+ # Arguments:
18
+ # referer: (String)
19
+ # num_results: (Integer+)
20
+
21
+ API_PATH = "http://cliobeta.columbia.edu/catalog?&"
22
+
23
+ attr_accessor :referer, :num_results
24
+
25
+ def initialize(referer='', num_results=100)
26
+ @referer = referer
27
+ @num_results = num_results
28
+ end
29
+
30
+ def search(query)
31
+ params = "per_page=#{@num_results}&q=#{CGI.escape(query)}"
32
+ response = open(API_PATH + params, { 'Referer' => @referer })
33
+ return nil if response.class.superclass == Net::HTTPServerError
34
+ doc = Nokogiri::HTML(response)
35
+ # check that results were returned
36
+ # fetch number of results so we slice properly
37
+ num_returned = doc.css('.pageEntriesInfo b:first')[0]
38
+ return [] if num_returned == nil
39
+ num_returned = num_returned.content.split(/(\d+)/)[-1].to_i
40
+ css_types = {
41
+ :title => 0,
42
+ :other => 1
43
+ }
44
+ css_attrs = [
45
+ '.title a',
46
+ '.details',
47
+ ]
48
+ tag_sets = doc.css(css_attrs.join(', ')
49
+ ).each_slice(num_returned).map { |el| el }.transpose
50
+ tag_sets.map do |tag_set|
51
+ # build hash from details
52
+ details = {}
53
+ tag_set[css_types[:other]].css('.row').each do |row|
54
+ label = row.css('.label')[0].content.downcase
55
+ entries = row.css('.entry')
56
+ # online labels have >= 1 links to extract
57
+ details[label] = (label == 'online') ? entries.inject({}) do |h, entry|
58
+ link = entry.css('a')[0]
59
+ h[link.content] = link['href']
60
+ h
61
+ end : entries[0].content
62
+ end
63
+
64
+ ClioResult.new(
65
+ tag_set[css_types[:title]].content,
66
+ details['subtitle'],
67
+ details['author'],
68
+ details['published'],
69
+ details['online']
70
+ )
71
+ end
72
+ end
73
+ end
74
+
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clio-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Peter Lubell-Doughtie
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &11102200 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *11102200
25
+ description: API for Columbia Library CLIO Beta
26
+ email: peter@helioid.com
27
+ executables: []
28
+ extensions: []
29
+ extra_rdoc_files: []
30
+ files:
31
+ - lib/clio.rb
32
+ homepage: http://www.helioid.com/
33
+ licenses: []
34
+ post_install_message:
35
+ rdoc_options: []
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ required_rubygems_version: !ruby/object:Gem::Requirement
45
+ none: false
46
+ requirements:
47
+ - - ! '>='
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ requirements: []
51
+ rubyforge_project:
52
+ rubygems_version: 1.8.15
53
+ signing_key:
54
+ specification_version: 3
55
+ summary: API for Columbia Library CLIO Beta
56
+ test_files: []