rubyscholar 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rubyscholar.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Yannick Wurm
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ # Rubyscholar
2
+
1
3
  # Synopsis
2
4
 
3
5
  Here is a small script to "scrape" your Google Scholar citations and reformat them (the way I need it for my website).
@@ -11,12 +13,13 @@ Some features:
11
13
 
12
14
  # How to use:
13
15
 
16
+ ### As a ruby script:
14
17
  1. Configure "config.yml"
15
18
  If you want DOI retreival to work (including Altmetrics), you need to be
16
19
  registered at crossref (its free).
17
- 2. Run `ruby bin/scrape.rb > mypublications.html`
18
- 3. Thats it.
19
-
20
+ 2. Run `ruby bin/scrape.rb`
21
+ 3. A scholar.html file is created with your publications from google scholar.
22
+ 4. Thats it.
20
23
 
21
24
  # Potential for improvement:
22
25
 
@@ -36,5 +39,12 @@ RubyScholar was developed by Yannick Wurm (http://yannick.poulet.org). Pull requ
36
39
 
37
40
  # Copyright
38
41
 
39
- RubyScholar 2013 by Yannick Wurm. Licensed under the MIT license.
42
+ RubyScholar © 2013 by Yannick Wurm. Licensed under the MIT license.
43
+
44
+ ## Contributing
40
45
 
46
+ 1. Fork it
47
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
48
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
49
+ 4. Push to the branch (`git push origin my-new-feature`)
50
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/rubyscholar ADDED
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'rubygems'
5
+ require 'commander/import'
6
+ require 'rubyscholar'
7
+ require 'yaml'
8
+
9
+
10
+ program :name, 'rubysholar'
11
+ program :version, '0.0.2'
12
+ program :description, 'Rubyscholar scrapes google scholar and formats it into a scholar.html file.'
13
+
14
+ default_command :scrape
15
+
16
+ command :scrape do |c|
17
+ c.syntax = 'rubyscholar scrape [options]'
18
+ c.summary = ''
19
+ c.description = "Scape google scholar for new publications"
20
+
21
+ c.option '--config [Config File]', 'Config file to use'
22
+ c.option '--out [Output File]', 'File to output the scrapes to'
23
+
24
+ c.action do |args, options|
25
+ options.default \
26
+ :config => 'config.yml',
27
+ :out => 'scholar.html'
28
+ config = YAML.load_file('config.yml')
29
+ parsed = Rubyscholar::Parser.new(config["url"],
30
+ config["email"])
31
+ formatter = Rubyscholar::Formatter.new(parsed,
32
+ config["highlight"],
33
+ config["pdfs"],
34
+ config["altmetricDOIs"],
35
+ config["minCitations"].to_i)
36
+
37
+ html = formatter.to_html
38
+ config["italicize"].each do |term|
39
+ html.gsub!( term , '<em>' + term + '</em>')
40
+ end
41
+
42
+ f= File.open('scholar.html','w')
43
+ f.write html
44
+ f.close()
45
+ end
46
+ end
47
+
data/bin/scrape.rb CHANGED
@@ -1,11 +1,10 @@
1
1
  require_relative '../lib/rubyscholar'
2
2
  require 'yaml'
3
3
 
4
- def scrape()
5
4
  config = YAML.load_file('config.yml')
6
- parsed = RubyScholar::Parser.new(config["url"],
5
+ parsed = Rubyscholar::Parser.new(config["url"],
7
6
  config["email"])
8
- formatter = RubyScholar::Formatter.new(parsed,
7
+ formatter = Rubyscholar::Formatter.new(parsed,
9
8
  config["highlight"],
10
9
  config["pdfs"],
11
10
  config["altmetricDOIs"],
@@ -19,4 +18,3 @@ def scrape()
19
18
  f= File.open('scholar.html','w')
20
19
  f.write html
21
20
  f.close
22
- end
@@ -0,0 +1,3 @@
1
+ module Rubyscholar
2
+ VERSION = "0.0.3"
3
+ end
data/lib/rubyscholar.rb CHANGED
@@ -1,24 +1,26 @@
1
+ require "rubyscholar/version"
1
2
  require "nokogiri"
2
3
  require "open-uri"
3
4
 
4
- class String
5
+
6
+ class String
5
7
  def clean
6
8
  # removes leading and trailing whitespace, commas
7
9
  self.gsub!(/(^[\s,]+)|([\s,]+$)/, '')
8
10
  return self
9
11
  end
10
- end
12
+ end
11
13
 
12
- module RubyScholar
14
+ module Rubyscholar
13
15
  class Paper < Struct.new(:title, :url, :authors, :journalName, :journalDetails, :year, :citationCount, :citingPapers, :doi)
14
- end
15
-
16
+ end
17
+
16
18
  class Parser
17
19
  attr_accessor :parsedPapers, :crossRefEmail
18
-
20
+
19
21
  def initialize(url, crossRefEmail = "")
20
22
  @parsedPapers = []
21
- @crossRefEmail = crossRefEmail # if nil doesn't return any DOI
23
+ @crossRefEmail = crossRefEmail # if nil doesn't retursn any DOI
22
24
  parse(url)
23
25
  end
24
26
 
@@ -41,7 +43,7 @@ module RubyScholar
41
43
  #citations
42
44
  citeInfo = paper.css(".cit-dark-link")
43
45
  citationCount = citeInfo.text
44
- citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
46
+ citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
45
47
 
46
48
  # get DOI: needs last name of first author, no funny chars
47
49
  lastNameFirstAuthor = ((authors.split(',').first ).split(' ').last ).gsub(/[^A-Za-z\-]/, '')
@@ -52,24 +54,24 @@ module RubyScholar
52
54
  STDOUT << "Scraped #{parsedPapers.length} from Google Scholar.\n"
53
55
  end
54
56
 
55
- # Scholar doesn't provide DOI.
56
- # But if registered at crossref (its free), DOI can be retreived.
57
+ # Scholar doesn't provide DOI.
58
+ # But if registered at crossref (its free), DOI can be retreived.
57
59
  def getDoi(lastNameFirstAuthor, title, crossRefEmail)
58
60
  return '' if @crossRefEmail.nil?
59
- sleep(1) # to reduce risk
61
+ sleep(1) # to reduce risk
60
62
  STDERR << "Getting DOI for paper by #{lastNameFirstAuthor}: #{title}.\n"
61
- url = 'http://www.crossref.org/openurl?redirect=false' +
62
- '&pid=' + crossRefEmail +
63
+ url = 'http://www.crossref.org/openurl?redirect=false' +
64
+ '&pid=' + crossRefEmail +
63
65
  '&aulast=' + lastNameFirstAuthor +
64
66
  '&atitle=' + URI.escape(title)
65
- crossRefXML = Nokogiri::XML(open(url))
67
+ crossRefXML = Nokogiri::XML(open(url))
66
68
  crossRefXML.search("doi").children.first.content rescue ''
67
69
  end
68
70
  end
69
-
71
+
70
72
  class Formatter
71
73
  attr_accessor :parser, :nameToHighlight, :pdfLinks, :altmetricDOIs
72
-
74
+
73
75
  def initialize(parser, nameToHighlight = nil, pdfLinks = {}, altmetricDOIs = [], minCitationCount = 1)
74
76
  @parser = parser
75
77
  @nameToHighlight = nameToHighlight
@@ -79,47 +81,47 @@ module RubyScholar
79
81
  end
80
82
 
81
83
  def to_html
82
- ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
84
+ ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
83
85
  builder = Nokogiri::HTML::Builder.new do |doc|
84
86
  doc.html {
85
87
  doc.body {
86
88
  @parser.parsedPapers.each_with_index { |paper, index|
87
89
  doc.div( :class => "publication") {
88
90
  doc.p {
89
- doc.text ((@parser.parsedPapers).length - index).to_s + '. '
91
+ doc.text ((@parser.parsedPapers).length - index).to_s + '. '
92
+
93
+ doc.b paper[:title] + '.'
94
+ doc.text ' (' + paper[:year] + '). '
90
95
 
91
96
  if paper[:authors].include?(@nameToHighlight)
92
97
  doc.text( paper[:authors].sub(Regexp.new(@nameToHighlight + '.*'), '') )
93
- doc.span( :class => "me") { doc.text @nameToHighlight }
98
+ doc.span( :class => "label label-info") { doc.text @nameToHighlight }
94
99
  doc.text( paper[:authors].sub(Regexp.new('.*' + @nameToHighlight), '') )
95
100
  else
96
101
  doc.text( paper[:authors])
97
102
  end
98
-
99
- doc.text ' ' + paper[:year] + '. '
100
- doc.b paper[:title] + '.'
103
+
101
104
  doc.br
102
105
  doc.em paper[:journalName]
103
106
  doc.text ' '
104
107
  doc.text paper[:journalDetails]
105
-
106
108
  unless paper[ :doi].empty?
107
109
  doc.text(' ')
108
- doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
109
- doc.text "[DOI]"
110
- }
110
+ doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
111
+ doc.text "[DOI]"
112
+ }
111
113
  end
112
114
  if @pdfLinks.keys.include?(paper[:title])
113
115
  doc.text(' ')
114
- doc.a( :href => @pdfLinks[paper[:title]]) {
116
+ doc.a( :href => @pdfLinks[paper[:title]]) {
115
117
  doc.text "[PDF]"
116
- }
118
+ }
117
119
  end
118
120
  if paper[ :citationCount].to_i > @minCitations
119
121
  doc.text(' ')
120
- doc.a( :href => paper[ :citingPapers]) {
121
- doc.text("[Cited #{paper[ :citationCount]}x]")
122
- }
122
+ doc.a( :href => paper[ :citingPapers]) {
123
+ doc.text("[Cited #{paper[ :citationCount]}x]")
124
+ }
123
125
  end
124
126
  if altmetricDOIs.include?( paper[ :doi])
125
127
  doc.text(' ')
@@ -137,5 +139,3 @@ module RubyScholar
137
139
  end
138
140
  end
139
141
  end
140
-
141
-
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rubyscholar/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "rubyscholar"
8
+ gem.version = Rubyscholar::VERSION
9
+ gem.authors = ["Yannick Wurm","Gaurav Koley"]
10
+ gem.email = ["y.wurm@qmul.ac.uk","arkokoley@live.in"]
11
+ gem.description = %q{Scrape Google Scholar}
12
+ gem.summary = %q{Rubyscholar scrapes google scholar and formats it into a scholar.html file.}
13
+ gem.homepage = ""
14
+
15
+ gem.add_dependency "nokogiri", "~>1.6.0"
16
+ gem.add_dependency "commander", "~>4.1.5"
17
+
18
+ gem.files = `git ls-files`.split($/)
19
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
20
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
21
+ gem.require_paths = ["lib"]
22
+ end
metadata CHANGED
@@ -1,15 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyscholar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Yannick Wurm
9
+ - Gaurav Koley
9
10
  autorequire:
10
11
  bindir: bin
11
12
  cert_chain: []
12
- date: 2013-08-18 00:00:00.000000000 Z
13
+ date: 2013-10-01 00:00:00.000000000 Z
13
14
  dependencies:
14
15
  - !ruby/object:Gem::Dependency
15
16
  name: nokogiri
@@ -28,38 +29,44 @@ dependencies:
28
29
  - !ruby/object:Gem::Version
29
30
  version: 1.6.0
30
31
  - !ruby/object:Gem::Dependency
31
- name: rspec
32
+ name: commander
32
33
  requirement: !ruby/object:Gem::Requirement
33
34
  none: false
34
35
  requirements:
35
36
  - - ~>
36
37
  - !ruby/object:Gem::Version
37
- version: 2.5.0
38
- type: :development
38
+ version: 4.1.5
39
+ type: :runtime
39
40
  prerelease: false
40
41
  version_requirements: !ruby/object:Gem::Requirement
41
42
  none: false
42
43
  requirements:
43
44
  - - ~>
44
45
  - !ruby/object:Gem::Version
45
- version: 2.5.0
46
- description: A small script to "scrape" your Google Scholar citations and reformat
47
- them. It doesn't do a whole lot, but it's still useful.
46
+ version: 4.1.5
47
+ description: Scrape Google Scholar
48
48
  email:
49
49
  - y.wurm@qmul.ac.uk
50
+ - arkokoley@live.in
50
51
  executables:
52
+ - rubyscholar
51
53
  - scrape.rb
52
54
  extensions: []
53
55
  extra_rdoc_files: []
54
56
  files:
55
57
  - .gitignore
58
+ - Gemfile
59
+ - LICENSE.txt
56
60
  - README.md
61
+ - Rakefile
62
+ - bin/rubyscholar
57
63
  - bin/scrape.rb
58
64
  - config.yml
59
65
  - lib/rubyscholar.rb
66
+ - lib/rubyscholar/version.rb
67
+ - rubyscholar.gemspec
60
68
  homepage: ''
61
- licenses:
62
- - MIT
69
+ licenses: []
63
70
  post_install_message:
64
71
  rdoc_options: []
65
72
  require_paths:
@@ -81,5 +88,5 @@ rubyforge_project:
81
88
  rubygems_version: 1.8.23
82
89
  signing_key:
83
90
  specification_version: 3
84
- summary: RubyScholar - Scrape your Google Scholar citations.
91
+ summary: Rubyscholar scrapes google scholar and formats it into a scholar.html file.
85
92
  test_files: []