cloudlib 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +340 -0
- data/README +91 -0
- data/bin/cloudlib +265 -0
- data/bin/cloudlib-web +216 -0
- data/cloudlib.gemspec +28 -0
- data/lib/cloudlib.rb +386 -0
- metadata +138 -0
data/bin/cloudlib-web
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
require 'digest/sha1'
|
|
4
|
+
require 'sinatra' # sinatra gem
|
|
5
|
+
require 'tempfile'
|
|
6
|
+
require 'fileutils'
|
|
7
|
+
require 'cloudlib'
|
|
8
|
+
require 'highline/import' # highline gem
|
|
9
|
+
|
|
10
|
+
# check that required environment variables are set
|
|
11
|
+
envvars = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "CLOUDLIB_LIBRARY_NAME", "CLOUDLIB_WEB_USERNAME", "CLOUDLIB_WEB_PASSWORD"]
|
|
12
|
+
envvars.each do |var|
|
|
13
|
+
unless ENV[var]
|
|
14
|
+
ENV[var] = ask("#{var}: ", String) { |q| q.echo = if var == "AWS_SECRET_ACCESS_KEY" || var == "CLOUDLIB_WEB_PASSWORD" then "*" else true end }
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
Cloudlib::Entry.connect(ENV['CLOUDLIB_LIBRARY_NAME'], ENV['AWS_ACCESS_KEY_ID'], ENV['AWS_SECRET_ACCESS_KEY'])
|
|
18
|
+
|
|
19
|
+
use Rack::Auth::Basic do |username, password|
|
|
20
|
+
username == ENV['CLOUDLIB_WEB_USERNAME'] &&
|
|
21
|
+
password == ENV['CLOUDLIB_WEB_PASSWORD']
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
get '/stylesheet.css' do
|
|
25
|
+
content_type 'text/css', :charset => 'utf-8'
|
|
26
|
+
sass :stylesheet
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
get '/' do
|
|
30
|
+
@token, @entries = "", []
|
|
31
|
+
@query = ""
|
|
32
|
+
haml :index
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
post '/' do
|
|
36
|
+
if params[:query]
|
|
37
|
+
@query = params[:query]
|
|
38
|
+
@token, @entries = Cloudlib::Entry.query(@query, 10, params[:token])
|
|
39
|
+
else
|
|
40
|
+
@token, @entries = "", []
|
|
41
|
+
@query = ""
|
|
42
|
+
end
|
|
43
|
+
haml :index
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
get '/upload' do
|
|
47
|
+
@error = params[:error]
|
|
48
|
+
haml :upload
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
post '/upload' do
|
|
52
|
+
tempfile = params[:fileToUpload][:tempfile]
|
|
53
|
+
tempfilepath = tempfile.path
|
|
54
|
+
tempfile.close
|
|
55
|
+
origname = params[:fileToUpload][:filename] || ""
|
|
56
|
+
if origname == ""
|
|
57
|
+
redirect "/upload?error=Please+select+a+file+to+upload."
|
|
58
|
+
else
|
|
59
|
+
entry = Cloudlib::Entry.from_file(tempfilepath, params[:fileToUpload][:filename])
|
|
60
|
+
FileUtils.rm tempfilepath
|
|
61
|
+
set_attributes_from_form(entry)
|
|
62
|
+
entry.save
|
|
63
|
+
redirect "/"
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
get '/*/bibtex' do
|
|
68
|
+
@entry = Cloudlib::Entry.find_by_name(params[:splat][0])
|
|
69
|
+
content_type 'text/plain', :charset => 'utf-8'
|
|
70
|
+
@entry.to_bibtex
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
get '/*' do
|
|
74
|
+
name = "#{params[:splat][0]}"
|
|
75
|
+
@entry = Cloudlib::Entry.find_by_name(name)
|
|
76
|
+
haml :modify
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
post '/*' do
|
|
80
|
+
entry = Cloudlib::Entry.find_by_name(params[:splat][0])
|
|
81
|
+
set_attributes_from_form(entry)
|
|
82
|
+
entry.save
|
|
83
|
+
redirect '/'
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
delete '/*' do
|
|
87
|
+
entry = Cloudlib::Entry.find_by_name(params[:splat][0])
|
|
88
|
+
entry.delete
|
|
89
|
+
redirect '/'
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def field_for_type?(field, type)
|
|
93
|
+
Cloudlib::Entry.fields(type).member?(field)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def show_fields(type)
|
|
97
|
+
cmds = Cloudlib::Entry.fields.map do |field|
|
|
98
|
+
"document.getElementById('#{field.to_s}').setAttribute('style', 'display: #{if field_for_type?(field, type) then 'all' else 'none' end}'); "
|
|
99
|
+
end
|
|
100
|
+
return cmds.join
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def set_attributes_from_form(entry)
|
|
104
|
+
entry.attributes['entry_type'] = params['entry_type']
|
|
105
|
+
Cloudlib::Entry.fields.each do |field|
|
|
106
|
+
if field_for_type?(field, params['entry_type']) && params[field]
|
|
107
|
+
entry.set_attribute(field.to_s, params[field])
|
|
108
|
+
else
|
|
109
|
+
entry.set_attribute(field.to_s, '')
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
use_in_file_templates!
|
|
115
|
+
|
|
116
|
+
__END__
|
|
117
|
+
|
|
118
|
+
@@ layout
|
|
119
|
+
!!! Strict
|
|
120
|
+
%head
|
|
121
|
+
%link{:href => '/stylesheet.css', :type => 'text/css', :media => 'all', :rel => 'stylesheet'}
|
|
122
|
+
%title
|
|
123
|
+
= ENV['CLOUDLIB_LIBRARY_NAME']
|
|
124
|
+
%body
|
|
125
|
+
%h1
|
|
126
|
+
%a{:href => '/'}
|
|
127
|
+
= ENV['CLOUDLIB_LIBRARY_NAME']
|
|
128
|
+
%div#content
|
|
129
|
+
= yield
|
|
130
|
+
%div#footer
|
|
131
|
+
powered by
|
|
132
|
+
%a{:href => 'http://github.com/jgm/cloudlib/tree/master'}cloudlib
|
|
133
|
+
|
|
134
|
+
@@ index
|
|
135
|
+
%div.queryform
|
|
136
|
+
%form{:method => 'POST', :action => '/'}
|
|
137
|
+
%input{:type => 'text', :name => 'query', :value => @query, :size => '30'}
|
|
138
|
+
%input{:type => 'submit', :value => 'Search'}
|
|
139
|
+
%ol
|
|
140
|
+
- @entries.each do |i|
|
|
141
|
+
%li
|
|
142
|
+
= i.to_s
|
|
143
|
+
%a{:href => "/#{i.name}/bibtex"}bibtex
|
|
144
|
+
%span.separator •
|
|
145
|
+
%a{:href => "/#{i.name}"}modify
|
|
146
|
+
%span.separator •
|
|
147
|
+
%a{:href => i.url}download
|
|
148
|
+
- if not @token.empty?
|
|
149
|
+
%form{:method => 'POST', :action => '/'}
|
|
150
|
+
%input{:type => 'text', :name => 'query', :value => @query, :style => 'display: none'}
|
|
151
|
+
%input{:type => 'text', :name => 'token', :value => @token, :style => 'display: none'}
|
|
152
|
+
%input{:type => 'submit', :value => 'More matches...'}
|
|
153
|
+
%a{:href => "/upload"}upload
|
|
154
|
+
|
|
155
|
+
@@ upload
|
|
156
|
+
%form{:method => 'POST', :action => '/upload', :enctype => 'multipart/form-data'}
|
|
157
|
+
%p.error
|
|
158
|
+
= @error
|
|
159
|
+
%label Select file to upload:
|
|
160
|
+
%br
|
|
161
|
+
%input{:type => 'file', :name => 'fileToUpload', :size => 40}
|
|
162
|
+
= haml :metadata, :layout => false
|
|
163
|
+
%input{:type => 'submit', :value => 'Add file to library'}
|
|
164
|
+
|
|
165
|
+
@@ metadata
|
|
166
|
+
%table
|
|
167
|
+
%tr
|
|
168
|
+
%td
|
|
169
|
+
%label Type:
|
|
170
|
+
%td
|
|
171
|
+
%select{:name => 'entry_type'}
|
|
172
|
+
- ['','article','book','incollection','chapter','unpublished'].each do |type|
|
|
173
|
+
%option{:onChange => show_fields(type), :selected => (@entry && @entry.show_attribute('entry_type') == type) || type.empty?}
|
|
174
|
+
= type
|
|
175
|
+
- Cloudlib::Entry.fields.each do |field|
|
|
176
|
+
%tr{:style => (@entry && field_for_type?(field, @entry.show_attribute('entry_type'))) || 'display: none;', :id => field.to_s}
|
|
177
|
+
%td
|
|
178
|
+
%label
|
|
179
|
+
= field.to_s.capitalize + ':'
|
|
180
|
+
%td
|
|
181
|
+
%input{:type => 'text', :name => field.to_s, :value => (@entry && @entry.show_attribute(field.to_s)) || '', :size => 50}
|
|
182
|
+
|
|
183
|
+
@@ modify
|
|
184
|
+
%div.detail
|
|
185
|
+
%form{:method => 'POST', :action => "/#{@entry.name}"}
|
|
186
|
+
= haml :metadata, :layout => false
|
|
187
|
+
%p
|
|
188
|
+
%input{:type => 'submit', :value => 'Update metadata'}
|
|
189
|
+
%form{:method => 'POST', :action => "/#{@entry.name}"}
|
|
190
|
+
%p
|
|
191
|
+
%input{:type => 'text', :name => '_method', :value => 'delete', :style => 'display: none;'}
|
|
192
|
+
%input{:type => 'submit', :value => 'Delete this entry'}
|
|
193
|
+
|
|
194
|
+
@@ stylesheet
|
|
195
|
+
body
|
|
196
|
+
font-size: small
|
|
197
|
+
padding: 10px
|
|
198
|
+
h1
|
|
199
|
+
border-top: 1px solid gray
|
|
200
|
+
border-bottom: 1px solid gray
|
|
201
|
+
h1 a
|
|
202
|
+
color: #7a7a7a
|
|
203
|
+
text-decoration: none
|
|
204
|
+
&:visited
|
|
205
|
+
color: #7a7a7a
|
|
206
|
+
li
|
|
207
|
+
padding-bottom: 0.3em
|
|
208
|
+
#footer
|
|
209
|
+
border-top: 1px solid gray
|
|
210
|
+
margin-top: 1em
|
|
211
|
+
padding-top: 1em
|
|
212
|
+
font-size: x-small
|
|
213
|
+
text-align: center
|
|
214
|
+
p.error
|
|
215
|
+
color: red
|
|
216
|
+
font-weight: bold
|
data/cloudlib.gemspec
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Gem::Specification.new do |s|
|
|
2
|
+
s.name = "cloudlib"
|
|
3
|
+
s.version = "0.3.10"
|
|
4
|
+
s.date = "2010-10-21"
|
|
5
|
+
s.summary = "Tools for maintaining a library of books and articles in Amazon S3 and SimpleDB"
|
|
6
|
+
s.email = "jgm@berkeley.edu"
|
|
7
|
+
s.homepage = "http://github.com/jgm/cloudlib"
|
|
8
|
+
s.description = "Cloudlib is a ruby library and commands for maintaining a library of books and articles on the Amazon 'cloud': S3 and SimpleDB."
|
|
9
|
+
s.has_rdoc = true
|
|
10
|
+
s.authors = ["John MacFarlane"]
|
|
11
|
+
s.bindir = "bin"
|
|
12
|
+
s.executables = ["cloudlib", "cloudlib-web"]
|
|
13
|
+
s.default_executable = "cloudlib"
|
|
14
|
+
s.files = [ "README",
|
|
15
|
+
"LICENSE",
|
|
16
|
+
"cloudlib.gemspec",
|
|
17
|
+
"lib/cloudlib.rb",
|
|
18
|
+
"bin/cloudlib",
|
|
19
|
+
"bin/cloudlib-web" ]
|
|
20
|
+
s.test_files = []
|
|
21
|
+
s.rdoc_options = ["--main", "README", "--inline-source"]
|
|
22
|
+
s.extra_rdoc_files = ["README"]
|
|
23
|
+
s.add_dependency("aws-s3", [">= 0.5.1"])
|
|
24
|
+
s.add_dependency("aws-sdb", [">= 0.3.1"])
|
|
25
|
+
s.add_dependency("sinatra", [">= 0.3.2"])
|
|
26
|
+
s.add_dependency("highline", [">= 1.2.9"])
|
|
27
|
+
end
|
|
28
|
+
|
data/lib/cloudlib.rb
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# This library provides the means for maintaining a database of
|
|
2
|
+
# documents on Amazon's S3 file store, with searchable metadata in
|
|
3
|
+
# Amazon's SimpleDB database. Think of it as a filing cabinet or library
|
|
4
|
+
# that can be extended indefinitely and accessed from anywhere in the
|
|
5
|
+
# world: a library that lives "in the cloud."
|
|
6
|
+
|
|
7
|
+
# In order to use this library, you need to sign up for
|
|
8
|
+
# Amazon's S3 and SimpleDB services:
|
|
9
|
+
#
|
|
10
|
+
# * Amazon SimpleDB: http://aws.amazon.com/simpledb/
|
|
11
|
+
# * Amazon S3: http://aws.amazon.com/s3/
|
|
12
|
+
#
|
|
13
|
+
# Simple usage example:
|
|
14
|
+
#
|
|
15
|
+
# require 'rubygems'
|
|
16
|
+
# require 'cloudlib'
|
|
17
|
+
# include Cloudlib
|
|
18
|
+
# Entry.connect('xxx_key_id_xxx', 'xxx_secret_access_key_xxx', 'my_aws_library')
|
|
19
|
+
# logic_entries = Entry.query('logic')
|
|
20
|
+
# logic_entries.each {|entry| puts entry.to_s}
|
|
21
|
+
#
|
|
22
|
+
# For more examples of the use of the library, see the programs cloudlib.rb
|
|
23
|
+
# and cloudlib-web.rb, included in the gem.
|
|
24
|
+
|
|
25
|
+
# Author:: John MacFarlane (jgm at berkeley dot edu)
|
|
26
|
+
# Copyright:: Copyright (c) 2008 John MacFarlane
|
|
27
|
+
# License:: GPL v2
|
|
28
|
+
|
|
29
|
+
require 'rubygems'
|
|
30
|
+
require 'readline'
|
|
31
|
+
require 'aws/s3' # aws-s3 gem
|
|
32
|
+
require 'aws_sdb' # aws-sdb gem
|
|
33
|
+
require 'open-uri'
|
|
34
|
+
require 'fileutils'
|
|
35
|
+
|
|
36
|
+
module Cloudlib
|
|
37
|
+
|
|
38
|
+
# A library entry, including content and metadata. An entry has a name
|
|
39
|
+
# (which is also the key of the associated S3 object) and an attributes
|
|
40
|
+
# hash. The name is of the form "sha1.ext", where sha1 is a SHA1 hash of
|
|
41
|
+
# the contents of the file, and ext is the file extension. This makes
|
|
42
|
+
# it impossible to have entries with duplicate contents. The attributes
|
|
43
|
+
# hash contains the following fields:
|
|
44
|
+
#
|
|
45
|
+
# * extension - file extension including .
|
|
46
|
+
# * size - size of contents (bytes)
|
|
47
|
+
# * date-added - date entry was added to library
|
|
48
|
+
# * entry_type - article, book, chapter, incollection, unpublished
|
|
49
|
+
# * authors - list of authors
|
|
50
|
+
# * editors - list of editors
|
|
51
|
+
# * title - title of entry
|
|
52
|
+
# * booktitle - title of book containing entry
|
|
53
|
+
# * year - publication year of entry
|
|
54
|
+
# * publisher - publisher of book
|
|
55
|
+
# * address - publication address
|
|
56
|
+
# * journal - journal containing entry
|
|
57
|
+
# * volume - volume number of journal
|
|
58
|
+
# * pages - page range of entry in book or journal
|
|
59
|
+
# * keywords - keywords
|
|
60
|
+
# * doi - DOI for entry
|
|
61
|
+
# * url - URL for entry
|
|
62
|
+
# * comments - miscellaneous comments
|
|
63
|
+
# * *_lowercase - lowercase version of *
|
|
64
|
+
# * *_words - lowercase version of *, split into a list of words
|
|
65
|
+
# * all_words - list of words in title, authors, editors, booktitle, keywords
|
|
66
|
+
|
|
67
|
+
class Entry
|
|
68
|
+
|
|
69
|
+
attr_accessor :name, :attributes
|
|
70
|
+
|
|
71
|
+
# Establish connections to the S3 file store and the SimpleDB database.
|
|
72
|
+
# If values are not supplied for the parameters, they will default to
|
|
73
|
+
# the values of the environment variables CLOUDLIB_LIBRARY_NAME,
|
|
74
|
+
# AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY. Note that library_name
|
|
75
|
+
# is the name of both the S3 bucket that will hold the contents of
|
|
76
|
+
# the entries and the SimpleDB domain that will hold the metadata.
|
|
77
|
+
def self.connect(library_name=ENV['CLOUDLIB_LIBRARY_NAME'],
|
|
78
|
+
aws_access_key_id=ENV['AWS_ACCESS_KEY_ID'],
|
|
79
|
+
aws_secret_access_key=ENV['AWS_SECRET_ACCESS_KEY'],
|
|
80
|
+
debug = false)
|
|
81
|
+
@@aws_access_key_id = aws_access_key_id
|
|
82
|
+
@@aws_secret_access_key = aws_secret_access_key
|
|
83
|
+
AWS::S3::Base.establish_connection!(:access_key_id => @@aws_access_key_id, :secret_access_key => @@aws_secret_access_key, :use_ssl => true)
|
|
84
|
+
@@bucket = library_name
|
|
85
|
+
logger = Logger.new(STDERR)
|
|
86
|
+
logger.level = if debug then Logger::DEBUG else Logger::WARN end
|
|
87
|
+
@@db = AwsSdb::Service.new(:access_key_id => @@aws_access_key_id, :secret_access_key => @@aws_secret_access_key, :use_ssl => true, :logger => logger)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Creates a new entry object. To create an entry with contents,
|
|
91
|
+
# use Entry.from_file.
|
|
92
|
+
def initialize(name, attributes={'all_words' => []})
|
|
93
|
+
@name = name
|
|
94
|
+
@attributes = attributes
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Create the S3 bucket and SimpleDB domain that will store the library entries.
|
|
98
|
+
# This method should be run once to create the library.
|
|
99
|
+
def self.create_library
|
|
100
|
+
AWS::S3::Bucket.create(@@bucket)
|
|
101
|
+
@@db.create_domain(@@bucket)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Delete the S3 bucket and SimpleDB domain that store the library entries.
|
|
105
|
+
# All data will be lost.
|
|
106
|
+
def self.delete_library
|
|
107
|
+
AWS::S3::Bucket.delete(@@bucket, :force => true)
|
|
108
|
+
@@db.delete_domain(@@bucket)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Creates and saves an entry from a file, using attributes supplied.
|
|
112
|
+
# Returns the entry.
|
|
113
|
+
def self.from_file(path, filename=path, attributes={'all_words' => []})
|
|
114
|
+
sha1 = Digest::SHA1.file(path).hexdigest
|
|
115
|
+
ext = File.extname(filename)
|
|
116
|
+
name = "#{sha1}#{ext}"
|
|
117
|
+
attributes['size'] = File.size(path).to_s
|
|
118
|
+
attributes['date-added'] = Date.today.to_s
|
|
119
|
+
entry = Entry.new(name, attributes)
|
|
120
|
+
AWS::S3::S3Object.store(name, open(path), @@bucket)
|
|
121
|
+
@@db.put_attributes(@@bucket, name, attributes, replace=true)
|
|
122
|
+
return entry
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Return an entry with the specified name. Raises an error if not found.
|
|
126
|
+
def self.find_by_name(name)
|
|
127
|
+
attributes = @@db.get_attributes(@@bucket, name)
|
|
128
|
+
if attributes == {} then raise "Item not found." end
|
|
129
|
+
Entry.new(name, attributes)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Queries the database and returns a list [token, entries]. entries is
|
|
133
|
+
# a list of up to numitems Entry objects that match the query. If
|
|
134
|
+
# there are more entries than numitems, token will be nonempty, and
|
|
135
|
+
# can be passed in on a subsequent calls for the remaining entries.
|
|
136
|
+
#
|
|
137
|
+
# The query string can contain one or more words. If a word is
|
|
138
|
+
# preceded by ti=, only entries that match it in the title will be
|
|
139
|
+
# returned. Similarly, au= searches authors, jo= journals, pu=
|
|
140
|
+
# publishers, ad= addresses, ed= editors, bo= booktitle (for collections),
|
|
141
|
+
# and ye= years. ye> and # ye< may also be used.
|
|
142
|
+
# The form ti='word1 word2' may also be used; entries will only match
|
|
143
|
+
# if their titles contain both word1 and word2.
|
|
144
|
+
def self.query(query_string, numitems=10, token=nil)
|
|
145
|
+
query_parts = query_string.downcase.scan(/((ti(?:tle)?|au(?:thors?)?|jo(?:urnal)?|bo(?:ooktitle)?|pu(?:blisher)?|ad(?:ddress)?|ed(?:itors?)?|ye(?:ar)?)\s*([<=>])\s*('[^']*'|"[^"]*"|\S*)|\S+)\s*/)
|
|
146
|
+
query = query_parts.reject {|part| part[0] == '*'}.map do |part|
|
|
147
|
+
whole, key, comparison, val = part
|
|
148
|
+
if val then val = val.gsub(/^['"](.*)['"]$/, "\\1") end
|
|
149
|
+
if not val then val = whole end
|
|
150
|
+
key_full = if key
|
|
151
|
+
case key[0..1]
|
|
152
|
+
when 'ti'
|
|
153
|
+
'title'
|
|
154
|
+
when 'au'
|
|
155
|
+
'authors'
|
|
156
|
+
when 'jo'
|
|
157
|
+
'journal'
|
|
158
|
+
when 'pu'
|
|
159
|
+
'publisher'
|
|
160
|
+
when 'ad'
|
|
161
|
+
'address'
|
|
162
|
+
when 'ed'
|
|
163
|
+
'editors'
|
|
164
|
+
when 'ye'
|
|
165
|
+
'year'
|
|
166
|
+
else 'all'
|
|
167
|
+
end
|
|
168
|
+
else
|
|
169
|
+
'all'
|
|
170
|
+
end
|
|
171
|
+
# split hyphenated names into components, since a query might just have one
|
|
172
|
+
vals = val.split(/[-[:space:]]+/)
|
|
173
|
+
vals.map do |v|
|
|
174
|
+
if key_full == 'year' # there is no year_words field
|
|
175
|
+
"['year' #{comparison} '#{v}']"
|
|
176
|
+
else
|
|
177
|
+
v_escaped = v.gsub(/\\/,"\\\\\\\\").gsub(/'/,"\\\\'")
|
|
178
|
+
"['#{key_full}_words' = '#{v_escaped}']"
|
|
179
|
+
end
|
|
180
|
+
end.join(" intersection ")
|
|
181
|
+
end.join(" intersection ")
|
|
182
|
+
# note: query has to include year in order to sort by year
|
|
183
|
+
# hence this dummy search
|
|
184
|
+
if query.empty?
|
|
185
|
+
query = "['year' starts-with ''] sort 'year'"
|
|
186
|
+
else
|
|
187
|
+
query += " intersection ['year' starts-with ''] sort 'year'"
|
|
188
|
+
end
|
|
189
|
+
names, token = if token
|
|
190
|
+
@@db.query(@@bucket, query, numitems, token)
|
|
191
|
+
else
|
|
192
|
+
@@db.query(@@bucket, query, numitems)
|
|
193
|
+
end
|
|
194
|
+
entries = names.map do |name|
|
|
195
|
+
attributes = @@db.get_attributes(@@bucket, name)
|
|
196
|
+
Entry.new(name, attributes)
|
|
197
|
+
end
|
|
198
|
+
return token, entries
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Returns a human-friendly filename for the entry, constructed from
|
|
202
|
+
# authors and title.
|
|
203
|
+
def friendly_filename
|
|
204
|
+
authornames = self.attributes['authors'].map {|a| last_name(a)}.join('_')
|
|
205
|
+
title = self.show_attribute('title').gsub(/[,.\/[:space:]]+/,'_')
|
|
206
|
+
ext = File.extname(self.name)
|
|
207
|
+
return "#{authornames}_#{title}#{ext}"
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Deletes the entry.
|
|
211
|
+
def delete
|
|
212
|
+
AWS::S3::S3Object.delete(self.name, @@bucket)
|
|
213
|
+
@@db.delete_attributes(@@bucket, self.name)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Saves the entry (metadata only; contents are saved by the from_file
|
|
217
|
+
# method).
|
|
218
|
+
def save
|
|
219
|
+
@@db.put_attributes(@@bucket, self.name, self.attributes, replace=true)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Downloads the entry and saves as filename.
|
|
223
|
+
def download(path)
|
|
224
|
+
if File.exist?(path)
|
|
225
|
+
STDERR.puts "Backing up existing #{path} as #{path}~"
|
|
226
|
+
FileUtils.copy_file(path, "#{path}~", preserve=true)
|
|
227
|
+
end
|
|
228
|
+
open(path, 'w') do |outfile|
|
|
229
|
+
open(self.url, 'r') do |source|
|
|
230
|
+
FileUtils.copy_stream(source, outfile)
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
return path
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Returns a bibtex entry for the entry.
|
|
237
|
+
def to_bibtex
|
|
238
|
+
pairs = self.fields.map do |field|
|
|
239
|
+
if self.attributes[field.to_s]
|
|
240
|
+
sprintf(" %-15s= {%s}", field.to_s, self.show_attribute(field.to_s))
|
|
241
|
+
else
|
|
242
|
+
nil
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
pairs += [sprintf(" %-15s= {%s}", "file", self.name)]
|
|
246
|
+
authornames = self.attributes['authors'].map {|a| last_name(a)}.join('.')
|
|
247
|
+
year = self.attributes['year']
|
|
248
|
+
entry_type = self.show_attribute('entry_type') || 'unknown'
|
|
249
|
+
if entry_type == 'chapter' then entry_type = 'inbook' end
|
|
250
|
+
entry_key = "#{authornames}:#{year}"
|
|
251
|
+
"@#{entry_type.upcase}{#{entry_key},\n#{pairs.join(",\n")}\n}"
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Returns a string representation of the entry's metadata.
|
|
255
|
+
def to_s
|
|
256
|
+
authors = self.show_attribute('authors')
|
|
257
|
+
unless authors.empty?
|
|
258
|
+
authors = "#{authors}, "
|
|
259
|
+
end
|
|
260
|
+
title = "#{self.show_attribute('title')}"
|
|
261
|
+
year = self.show_attribute('year')
|
|
262
|
+
titleyear = if year.empty?
|
|
263
|
+
title + ". "
|
|
264
|
+
else
|
|
265
|
+
title + " (#{year}). "
|
|
266
|
+
end
|
|
267
|
+
pubaddr = [self.show_attribute('address'),
|
|
268
|
+
self.show_attribute('publisher')].reject {|x| x.empty?}.join(": ")
|
|
269
|
+
chapter = self.show_attribute('chapter')
|
|
270
|
+
pages = self.show_attribute('pages')
|
|
271
|
+
booktitle = self.show_attribute('booktitle')
|
|
272
|
+
editors = self.show_attribute('editors')
|
|
273
|
+
journal = self.show_attribute('journal')
|
|
274
|
+
volume = self.show_attribute('volume')
|
|
275
|
+
rest = case self.show_attribute('entry_type')
|
|
276
|
+
when 'article'
|
|
277
|
+
if journal.empty?
|
|
278
|
+
""
|
|
279
|
+
else
|
|
280
|
+
"#{journal} #{volume}" +
|
|
281
|
+
if pages.empty? then "." else ", #{pages}." end
|
|
282
|
+
end
|
|
283
|
+
when 'book'
|
|
284
|
+
if pubaddr.empty? then "" else "#{pubaddr}." end
|
|
285
|
+
when 'chapter'
|
|
286
|
+
if pubaddr.empty? then "" else "#{pubaddr}." end +
|
|
287
|
+
if chapter.empty? then "" else " Chapter #{chapter}." end +
|
|
288
|
+
if pages.empty? then "" else " #{pages}." end
|
|
289
|
+
when 'incollection'
|
|
290
|
+
"In " +
|
|
291
|
+
if editors.empty? then "" else editors + " (eds.), " end +
|
|
292
|
+
booktitle +
|
|
293
|
+
if pubaddr.empty? then "" else " (#{pubaddr})." end +
|
|
294
|
+
if chapter.empty? then "" else " Chapter #{chapter}." end +
|
|
295
|
+
if pages.empty? then "" else " #{pages}." end
|
|
296
|
+
when 'unpublished'
|
|
297
|
+
" (unpublished)."
|
|
298
|
+
else ""
|
|
299
|
+
end
|
|
300
|
+
return authors + titleyear + rest
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Sets the specified metadata attribute to ans. ans is assumed to be a regular string.
|
|
304
|
+
# It will be split by " and " for authors and editors, or by spaces for keywords.
|
|
305
|
+
def set_attribute(attribute, ans)
|
|
306
|
+
index = ['title', 'authors', 'editors', 'booktitle'].member?(attribute)
|
|
307
|
+
if ans.nil? || ans.empty?
|
|
308
|
+
self.attributes[attribute] = nil
|
|
309
|
+
else
|
|
310
|
+
newval = if attribute == 'editors' || attribute == 'authors'
|
|
311
|
+
ans.split(" and ").map {|a| a.strip}
|
|
312
|
+
elsif attribute == 'keywords'
|
|
313
|
+
ans.split
|
|
314
|
+
else
|
|
315
|
+
[ans.strip]
|
|
316
|
+
end
|
|
317
|
+
self.attributes[attribute] = newval
|
|
318
|
+
unless ['url', 'doi', 'keywords'].member?(attribute)
|
|
319
|
+
self.attributes[attribute + "_lowercase"] = newval.map {|a| a.downcase}
|
|
320
|
+
self.attributes[attribute + "_words"] = self.attributes[attribute + "_lowercase"].map {|a| a.split(/[[:punct:]]*[[:space:]]+|-+/)}.flatten.reject {|a| a.empty?}
|
|
321
|
+
end
|
|
322
|
+
# recalculate all_words
|
|
323
|
+
tit_auth_words = ['title', 'authors', 'editors', 'booktitle'].map {|att| self.attributes[att + "_words"] || []}.flatten
|
|
324
|
+
keywords = self.attributes['keywords'] || []
|
|
325
|
+
self.attributes['all_words'] = keywords + tit_auth_words
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Returns a string representation of an attribute.
|
|
330
|
+
def show_attribute(attribute)
|
|
331
|
+
value = self.attributes[attribute]
|
|
332
|
+
if value.nil?
|
|
333
|
+
""
|
|
334
|
+
elsif attribute == 'keywords'
|
|
335
|
+
value.join(' ')
|
|
336
|
+
elsif attribute == 'editors' || attribute == 'authors'
|
|
337
|
+
value.join(' and ')
|
|
338
|
+
else
|
|
339
|
+
value[0]
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# Returns an array of the field keywords appropriate for a type of entry.
|
|
344
|
+
def self.fields(entry_type='*')
|
|
345
|
+
fields = [:title, :authors, :year]
|
|
346
|
+
case entry_type
|
|
347
|
+
when 'article'
|
|
348
|
+
fields += [:journal, :volume, :pages]
|
|
349
|
+
when 'book'
|
|
350
|
+
fields += [:publisher, :address]
|
|
351
|
+
when 'chapter'
|
|
352
|
+
fields += [:booktitle, :chapter, :publisher, :address, :pages]
|
|
353
|
+
when 'incollection'
|
|
354
|
+
fields += [:booktitle, :chapter, :publisher, :address, :editors, :pages]
|
|
355
|
+
when '*'
|
|
356
|
+
fields += [:journal, :volume, :booktitle, :editors, :chapter,
|
|
357
|
+
:publisher, :address, :pages]
|
|
358
|
+
end
|
|
359
|
+
fields += [:keywords, :url, :doi, :comments]
|
|
360
|
+
return fields
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Returns the fields appropriate for an entry.
|
|
364
|
+
def fields
|
|
365
|
+
entry_type = self.show_attribute('entry_type')
|
|
366
|
+
Entry.fields(entry_type)
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
def url(minutes = 10)
|
|
370
|
+
AWS::S3::S3Object.url_for(self.name, @@bucket, :expires_in => 60 * minutes)
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
private
|
|
374
|
+
# Returns the author's last name.
|
|
375
|
+
def last_name(author)
|
|
376
|
+
if author =~ /,/
|
|
377
|
+
author =~ /([^ ,]+),/
|
|
378
|
+
else
|
|
379
|
+
author =~ /([^ \t]+)$/
|
|
380
|
+
end
|
|
381
|
+
return $1
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
end
|