whitepaper 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,14 @@ require 'optparse'
3
3
  require 'whitepaper'
4
4
 
5
5
  module Whitepaper
6
+ # The commandline interface to Whitespace.
6
7
  class CLI
8
+ # Usage banner
7
9
  BANNER = <<-USAGE
8
10
  USAGE
9
11
 
10
12
  class << self
13
+ # Parse and respond to the command line options.
11
14
  def parse_options
12
15
  options = {}
13
16
  @opts = OptionParser.new do |opts|
@@ -77,6 +80,7 @@ module Whitepaper
77
80
  end
78
81
  end
79
82
 
83
+ # Executes the command line version of whitespace.
80
84
  def CLI.run
81
85
  begin
82
86
  parse_options
@@ -85,11 +89,6 @@ module Whitepaper
85
89
  exit -1
86
90
  end
87
91
 
88
- def fail
89
- puts @opts
90
- exit -1
91
- end
92
-
93
92
  # Default
94
93
  puts BANNER
95
94
  exit 0
@@ -4,15 +4,21 @@ require 'whitepaper/paper'
4
4
 
5
5
  module Whitepaper
6
6
  module Engine
7
+ # This engine uses the CiteSeerX database to query metadata about a paper.
7
8
  module CiteSeerX
9
+ # The domain to use for CiteSeerX.
8
10
  DOMAIN = "http://citeseerx.ist.psu.edu"
11
+
12
+ # The url to use to search by title.
9
13
  SEARCH_BY_TITLE_URL = "search?q=title%3A{title}&t=doc&sort=cite"
10
14
 
11
15
  class << self
16
+ # Returns a url that will query for the given title keywords.
12
17
  def find_by_title_url(title)
13
18
  "#{DOMAIN}/#{SEARCH_BY_TITLE_URL.gsub(/\{title\}/, title)}"
14
19
  end
15
20
 
21
+ # Returns a Whitespace::Paper by searching for the paper with the given title keywords.
16
22
  def find_by_title(title)
17
23
  @agent = Mechanize.new
18
24
  page = @agent.get "#{find_by_title_url(title)}"
@@ -25,25 +31,26 @@ module Whitepaper
25
31
  retrieve_details paper_link
26
32
  end
27
33
 
34
+ # Returns a Whitespace::Paper by reading the direct page for a particular paper.
28
35
  def retrieve_details(url)
29
36
  @agent = Mechanize.new
30
37
 
31
38
  page = @agent.get url
32
39
 
33
- def get_meta(name, page)
40
+ get_meta = lambda {|name|
34
41
  meta = page.search "//meta[@name=\"#{name}\"]"
35
42
  if meta.nil? or meta.first.nil?
36
43
  return ""
37
44
  end
38
45
  meta.first.attribute "content"
39
- end
40
-
41
- description = get_meta("description", page)
42
- keywords_raw = get_meta("keywords", page)
43
- title = get_meta("citation_title", page)
44
- authors_raw = get_meta("citation_authors", page)
45
- year = get_meta("citation_year", page)
46
- conference = get_meta("citation_conference", page)
46
+ }
47
+
48
+ description = get_meta.call("description")
49
+ keywords_raw = get_meta.call("keywords")
50
+ title = get_meta.call("citation_title")
51
+ authors_raw = get_meta.call("citation_authors")
52
+ year = get_meta.call("citation_year")
53
+ conference = get_meta.call("citation_conference")
47
54
 
48
55
  authors = authors_raw.to_s.split(',').map(&:strip)
49
56
  keywords = keywords_raw.to_s.split(',').map(&:strip)
@@ -4,8 +4,10 @@ require 'whitepaper/paper'
4
4
 
5
5
  module Whitepaper
6
6
  module Engine
7
+ # This engine simply uses a google filetype:pdf search to find paper information.
7
8
  module Google
8
9
  class << self
10
+ # Finds a Whitespace::Paper by looking up a paper with the given title keywords.
9
11
  def find_by_title(title)
10
12
  @agent = Mechanize.new
11
13
 
@@ -1,15 +1,32 @@
1
1
  module Whitepaper
2
+ # The representation of a paper, including title, author, and pdf urls.
2
3
  class Paper
4
+ # The title of the paper.
3
5
  attr_reader :title
6
+
7
+ # The list of authors of the paper.
4
8
  attr_reader :authors
9
+
10
+ # A summary of the paper, typically an abstract. Defaults to "".
5
11
  attr_reader :description
12
+
13
+ # A list of keywords associated with the paper. Defaults to [].
6
14
  attr_reader :keywords
15
+
16
+ # The year of publication. Defaults to "".
7
17
  attr_reader :year
18
+
19
+ # The conference, if any, the paper appeared. Defaults to "".
8
20
  attr_reader :conference
9
21
 
22
+ # A list of urls to pdf copies of the paper. Defaults to [].
10
23
  attr_reader :pdf_urls
24
+
25
+ # A list of urls to ps copies of the paper. Defaults to [].
11
26
  attr_reader :ps_urls
12
27
 
28
+ # Construct an object representing paper metadata with the given fields.
29
+ # Title and authors are required, all other fields can be omitted.
13
30
  def initialize(title, authors, options = {})
14
31
  @title = title
15
32
  @authors = authors
@@ -22,6 +39,9 @@ module Whitepaper
22
39
  @ps_urls = options[:ps_urls] || []
23
40
  end
24
41
 
42
+ # Downloads the paper by using the pdf urls. The created file will be named
43
+ # after the title if no filename is given. The file will overwrite any existing
44
+ # file with the same name in the current directory.
25
45
  def download(filename = nil)
26
46
  if filename.nil?
27
47
  filename = title.to_s
@@ -50,6 +70,7 @@ module Whitepaper
50
70
  true
51
71
  end
52
72
 
73
+ # Output a simple description of the paper metadata.
53
74
  def to_s
54
75
  "Title: #{@title}\n" +
55
76
  "Authors: #{@authors}\n" +
@@ -1,3 +1,4 @@
1
1
  module Whitepaper
2
- VERSION = "0.0.1"
2
+ # Version number for Whitepaper gem.
3
+ VERSION = "0.0.2"
3
4
  end
data/lib/whitepaper.rb CHANGED
@@ -3,8 +3,14 @@ require "whitepaper/version"
3
3
  require 'whitepaper/engine/citeseerx'
4
4
  require 'whitepaper/engine/google'
5
5
 
6
+ # The namespace for the available metadata gathering engines.
7
+ module Whitepaper::Engine
8
+ end
9
+
10
+ # The main module encapsulating Whitepaper resources.
6
11
  module Whitepaper
7
12
  class << self
13
+ # Find and return a Whitepaper::Paper by searching for a partial match with the given title.
8
14
  def find_by_title(title)
9
15
  paper = Engine::CiteSeerX.find_by_title(title)
10
16
 
@@ -24,6 +30,7 @@ module Whitepaper
24
30
  paper
25
31
  end
26
32
 
33
+ # Find and return a list of authors by searching for a partial match with the given title.
27
34
  def find_authors_by_title(title)
28
35
  paper = find_by_title(title)
29
36
 
@@ -32,6 +39,7 @@ module Whitepaper
32
39
  end
33
40
  end
34
41
 
42
+ # Find and return the proper title by searching for a partial match with the given title.
35
43
  def find_title_by_title(title)
36
44
  paper = find_by_title(title)
37
45
 
@@ -40,6 +48,7 @@ module Whitepaper
40
48
  end
41
49
  end
42
50
 
51
+ # Find and return a list of pdf urls by searching for a partial match with the given title.
43
52
  def find_pdfs_by_title(title)
44
53
  paper = find_by_title(title)
45
54
 
@@ -48,6 +57,8 @@ module Whitepaper
48
57
  end
49
58
  end
50
59
 
60
+ # Downloads the first available pdf by searching for a partial match with the given title.
61
+ # The name of the file will be the title of the paper.
51
62
  def download_pdf_by_title(title)
52
63
  paper = find_by_title(title)
53
64
  paper.download
data/whitepaper.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |gem|
10
10
  gem.email = ["wilkie05@gmail.com"]
11
11
  gem.description = %q{Finds metadata on scholarly works and is able to download pdfs of whitepapers.}
12
12
  gem.summary = %q{Finds whitepaper metadata and pdf download links with a basic keyword query using web-based databases such as Google and CiteSeerX.}
13
- gem.homepage = ""
13
+ gem.homepage = "https://github.com/wilkie/whitepaper"
14
14
 
15
15
  gem.files = `git ls-files`.split($/)
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whitepaper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-27 00:00:00.000000000 Z
12
+ date: 2013-01-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -61,11 +61,10 @@ files:
61
61
  - lib/whitepaper/cli.rb
62
62
  - lib/whitepaper/engine/citeseerx.rb
63
63
  - lib/whitepaper/engine/google.rb
64
- - lib/whitepaper/finder.rb
65
64
  - lib/whitepaper/paper.rb
66
65
  - lib/whitepaper/version.rb
67
66
  - whitepaper.gemspec
68
- homepage: ''
67
+ homepage: https://github.com/wilkie/whitepaper
69
68
  licenses: []
70
69
  post_install_message:
71
70
  rdoc_options: []
@@ -1,4 +0,0 @@
1
- module Whitepaper
2
- class Finder
3
- end
4
- end