RubyGems - rubyscholar - Versions diffs - 0.0.2 → 0.0.3 - Mend

rubyscholar 0.0.2 → 0.0.3

Files changed (10) hide show

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+# Specify your gem's dependencies in rubyscholar.gemspec
+gemspec

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2013 Yannick Wurm
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md CHANGED Viewed

@@ -1,3 +1,5 @@
+# Rubyscholar
 # Synopsis
 Here is a small script to "scrape" your Google Scholar citations and reformat them (the way I need it for my website).
@@ -11,12 +13,13 @@ Some features:
 # How to use:
+### As a ruby script:
 1. Configure "config.yml"
    If you want DOI retreival to work (including Altmetrics), you need to be
    registered at crossref (its free).
-2. Run `ruby bin/scrape.rb > mypublications.html`
-3. Thats it.
+2. Run `ruby bin/scrape.rb`
+3. A scholar.html file is created with your publications from google scholar.
+4. Thats it.
 # Potential for improvement:
@@ -36,5 +39,12 @@ RubyScholar was developed by Yannick Wurm (http://yannick.poulet.org). Pull requ
 # Copyright
-RubyScholar � 2013 by Yannick Wurm. Licensed under the MIT license.
+RubyScholar © 2013 by Yannick Wurm. Licensed under the MIT license.
+## Contributing
+1. Fork it
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request

data/Rakefile ADDED Viewed

	@@ -0,0 +1 @@
1	+ require "bundler/gem_tasks"

data/bin/rubyscholar ADDED Viewed

@@ -0,0 +1,47 @@
+#!/usr/bin/env ruby
+require 'optparse'
+require 'rubygems'
+require 'commander/import'
+require 'rubyscholar'
+require 'yaml'
+program :name, 'rubysholar'
+program :version, '0.0.2'
+program :description, 'Rubyscholar scrapes google scholar and formats it into a scholar.html file.'
+default_command :scrape
+command :scrape do |c|
+  c.syntax = 'rubyscholar scrape [options]'
+  c.summary = ''
+  c.description = "Scape google scholar for new publications"
+  c.option '--config [Config File]', 'Config file to use'
+  c.option '--out [Output File]', 'File to output the scrapes to'
+  c.action do |args, options|
+   options.default \
+      :config => 'config.yml',
+      :out  => 'scholar.html'
+   config    = YAML.load_file('config.yml')
+   parsed    = Rubyscholar::Parser.new(config["url"],
+                                config["email"])
+   formatter = Rubyscholar::Formatter.new(parsed,
+                                        config["highlight"],
+                                        config["pdfs"],
+                                        config["altmetricDOIs"],
+                                        config["minCitations"].to_i)
+   html = formatter.to_html
+       config["italicize"].each do |term|
+       html.gsub!( term , '<em>' + term + '</em>')
+   end
+   f= File.open('scholar.html','w')
+   f.write html
+   f.close()
+end
+end

data/bin/scrape.rb CHANGED Viewed

@@ -1,11 +1,10 @@
 require_relative '../lib/rubyscholar'
 require 'yaml'
-def scrape()
     config    = YAML.load_file('config.yml')
-    parsed    = RubyScholar::Parser.new(config["url"],
+    parsed    = Rubyscholar::Parser.new(config["url"],
                                     config["email"])
-    formatter = RubyScholar::Formatter.new(parsed,
+    formatter = Rubyscholar::Formatter.new(parsed,
                                        config["highlight"],
                                        config["pdfs"],
                                        config["altmetricDOIs"],
@@ -19,4 +18,3 @@ def scrape()
     f= File.open('scholar.html','w')
     f.write html
     f.close
-end

data/lib/rubyscholar/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Rubyscholar
+  VERSION = "0.0.3"
+end

data/lib/rubyscholar.rb CHANGED Viewed

@@ -1,24 +1,26 @@
+require "rubyscholar/version"
 require "nokogiri"
 require "open-uri"
-class String
+ class String
   def clean
     # removes leading and trailing whitespace, commas
     self.gsub!(/(^[\s,]+)|([\s,]+$)/, '')
     return self
   end
-end
+ end
-module RubyScholar
+module Rubyscholar
   class Paper < Struct.new(:title, :url, :authors, :journalName, :journalDetails, :year, :citationCount, :citingPapers, :doi)
-  end
+  end
   class Parser
     attr_accessor :parsedPapers, :crossRefEmail
     def initialize(url, crossRefEmail = "")
       @parsedPapers  = []
-      @crossRefEmail = crossRefEmail # if nil doesn't return any DOI
+      @crossRefEmail = crossRefEmail # if nil doesn't retursn any DOI
       parse(url)
     end
@@ -41,7 +43,7 @@ module RubyScholar
         #citations
         citeInfo      = paper.css(".cit-dark-link")
         citationCount = citeInfo.text
-        citationUrl   = citationCount.empty?  ? nil : citeInfo.attribute('href').to_s
+        citationUrl   = citationCount.empty?  ? nil : citeInfo.attribute('href').to_s
         # get DOI: needs last name of first author, no funny chars
         lastNameFirstAuthor = ((authors.split(',').first ).split(' ').last ).gsub(/[^A-Za-z\-]/, '')
@@ -52,24 +54,24 @@ module RubyScholar
       STDOUT << "Scraped #{parsedPapers.length} from Google Scholar.\n"
     end
-    # Scholar doesn't provide DOI.
-    # But if registered at crossref (its free), DOI can be retreived.
+    # Scholar doesn't provide DOI.
+    # But if registered at crossref (its free), DOI can be retreived.
     def getDoi(lastNameFirstAuthor, title, crossRefEmail)
       return '' if @crossRefEmail.nil?
-      sleep(1) # to reduce risk
+      sleep(1) # to reduce risk
       STDERR << "Getting DOI for paper by #{lastNameFirstAuthor}: #{title}.\n"
-      url = 'http://www.crossref.org/openurl?redirect=false' +
-        '&pid='    + crossRefEmail +
+      url = 'http://www.crossref.org/openurl?redirect=false' +
+        '&pid='    + crossRefEmail +
         '&aulast=' + lastNameFirstAuthor   +
         '&atitle=' + URI.escape(title)
-      crossRefXML = Nokogiri::XML(open(url))
+      crossRefXML = Nokogiri::XML(open(url))
       crossRefXML.search("doi").children.first.content rescue ''
     end
   end
   class Formatter
     attr_accessor :parser, :nameToHighlight, :pdfLinks, :altmetricDOIs
     def initialize(parser, nameToHighlight = nil, pdfLinks = {}, altmetricDOIs = [], minCitationCount = 1)
       @parser          = parser
       @nameToHighlight = nameToHighlight
@@ -79,47 +81,47 @@ module RubyScholar
     end
     def to_html
-      ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
+      ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
       builder = Nokogiri::HTML::Builder.new do |doc|
         doc.html {
           doc.body {
             @parser.parsedPapers.each_with_index { |paper, index|
               doc.div( :class => "publication") {
                 doc.p {
-                  doc.text ((@parser.parsedPapers).length - index).to_s + '. '
+                  doc.text ((@parser.parsedPapers).length - index).to_s + '. '
+                  doc.b    paper[:title] + '.'
+                  doc.text ' (' + paper[:year] + '). '
                   if paper[:authors].include?(@nameToHighlight)
                     doc.text( paper[:authors].sub(Regexp.new(@nameToHighlight + '.*'), '') )
-                    doc.span( :class => "me") { doc.text @nameToHighlight }
+                    doc.span( :class => "label label-info") { doc.text @nameToHighlight }
                     doc.text( paper[:authors].sub(Regexp.new('.*' + @nameToHighlight), '') )
                   else
                     doc.text( paper[:authors])
                   end
-                  doc.text ' ' + paper[:year] + '. '
-                  doc.b    paper[:title] + '.'
                   doc.br
                   doc.em   paper[:journalName]
                   doc.text ' '
                   doc.text paper[:journalDetails]
                   unless paper[ :doi].empty?
                     doc.text(' ')
-                    doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi]))  {
-                      doc.text "[DOI]"
-                    }
+                    doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi]))  {
+                      doc.text "[DOI]"
+                    }
                   end
                   if @pdfLinks.keys.include?(paper[:title])
                     doc.text(' ')
-                    doc.a( :href => @pdfLinks[paper[:title]])  {
+                    doc.a( :href => @pdfLinks[paper[:title]])  {
                       doc.text "[PDF]"
-                    }
+                    }
                   end
                   if paper[ :citationCount].to_i > @minCitations
                     doc.text(' ')
-                    doc.a( :href => paper[ :citingPapers]) {
-                      doc.text("[Cited #{paper[ :citationCount]}x]")
-                    }
+                    doc.a( :href => paper[ :citingPapers]) {
+                      doc.text("[Cited #{paper[ :citationCount]}x]")
+                    }
                   end
                   if altmetricDOIs.include?( paper[ :doi])
                     doc.text(' ')
@@ -137,5 +139,3 @@ module RubyScholar
     end
   end
 end

data/rubyscholar.gemspec ADDED Viewed

@@ -0,0 +1,22 @@
+# -*- encoding: utf-8 -*-
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'rubyscholar/version'
+Gem::Specification.new do |gem|
+  gem.name          = "rubyscholar"
+  gem.version       = Rubyscholar::VERSION
+  gem.authors       = ["Yannick Wurm","Gaurav Koley"]
+  gem.email         = ["y.wurm@qmul.ac.uk","arkokoley@live.in"]
+  gem.description   = %q{Scrape Google Scholar}
+  gem.summary       = %q{Rubyscholar scrapes google scholar and formats it into a scholar.html file.}
+  gem.homepage      = ""
+  gem.add_dependency "nokogiri", "~>1.6.0"
+  gem.add_dependency "commander", "~>4.1.5"
+  gem.files         = `git ls-files`.split($/)
+  gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
+  gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
+  gem.require_paths = ["lib"]
+end

metadata CHANGED Viewed

@@ -1,15 +1,16 @@
 --- !ruby/object:Gem::Specification
 name: rubyscholar
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
   prerelease:
 platform: ruby
 authors:
 - Yannick Wurm
+- Gaurav Koley
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-08-18 00:00:00.000000000 Z
+date: 2013-10-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -28,38 +29,44 @@ dependencies:
       - !ruby/object:Gem::Version
         version: 1.6.0
 - !ruby/object:Gem::Dependency
-  name: rspec
+  name: commander
   requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: 2.5.0
-  type: :development
+        version: 4.1.5
+  type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: 2.5.0
-description: A small script to "scrape" your Google Scholar citations and reformat
-  them. It doesn't do a whole lot, but it's still useful.
+        version: 4.1.5
+description: Scrape Google Scholar
 email:
 - y.wurm@qmul.ac.uk
+- arkokoley@live.in
 executables:
+- rubyscholar
 - scrape.rb
 extensions: []
 extra_rdoc_files: []
 files:
 - .gitignore
+- Gemfile
+- LICENSE.txt
 - README.md
+- Rakefile
+- bin/rubyscholar
 - bin/scrape.rb
 - config.yml
 - lib/rubyscholar.rb
+- lib/rubyscholar/version.rb
+- rubyscholar.gemspec
 homepage: ''
-licenses:
-- MIT
+licenses: []
 post_install_message:
 rdoc_options: []
 require_paths:
@@ -81,5 +88,5 @@ rubyforge_project:
 rubygems_version: 1.8.23
 signing_key:
 specification_version: 3
-summary: RubyScholar - Scrape your Google Scholar citations.
+summary: Rubyscholar scrapes google scholar and formats it into a scholar.html file.
 test_files: []