rubyscholar 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rubyscholar.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Yannick Wurm
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ # Rubyscholar
2
+
1
3
  # Synopsis
2
4
 
3
5
  Here is a small script to "scrape" your Google Scholar citations and reformat them (the way I need it for my website).
@@ -11,12 +13,13 @@ Some features:
11
13
 
12
14
  # How to use:
13
15
 
16
+ ### As a ruby script:
14
17
  1. Configure "config.yml"
15
18
  If you want DOI retreival to work (including Altmetrics), you need to be
16
19
  registered at crossref (its free).
17
- 2. Run `ruby bin/scrape.rb > mypublications.html`
18
- 3. Thats it.
19
-
20
+ 2. Run `ruby bin/scrape.rb`
21
+ 3. A scholar.html file is created with your publications from google scholar.
22
+ 4. Thats it.
20
23
 
21
24
  # Potential for improvement:
22
25
 
@@ -36,5 +39,12 @@ RubyScholar was developed by Yannick Wurm (http://yannick.poulet.org). Pull requ
36
39
 
37
40
  # Copyright
38
41
 
39
- RubyScholar 2013 by Yannick Wurm. Licensed under the MIT license.
42
+ RubyScholar © 2013 by Yannick Wurm. Licensed under the MIT license.
43
+
44
+ ## Contributing
40
45
 
46
+ 1. Fork it
47
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
48
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
49
+ 4. Push to the branch (`git push origin my-new-feature`)
50
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/rubyscholar ADDED
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'rubygems'
5
+ require 'commander/import'
6
+ require 'rubyscholar'
7
+ require 'yaml'
8
+
9
+
10
+ program :name, 'rubysholar'
11
+ program :version, '0.0.2'
12
+ program :description, 'Rubyscholar scrapes google scholar and formats it into a scholar.html file.'
13
+
14
+ default_command :scrape
15
+
16
+ command :scrape do |c|
17
+ c.syntax = 'rubyscholar scrape [options]'
18
+ c.summary = ''
19
+ c.description = "Scape google scholar for new publications"
20
+
21
+ c.option '--config [Config File]', 'Config file to use'
22
+ c.option '--out [Output File]', 'File to output the scrapes to'
23
+
24
+ c.action do |args, options|
25
+ options.default \
26
+ :config => 'config.yml',
27
+ :out => 'scholar.html'
28
+ config = YAML.load_file('config.yml')
29
+ parsed = Rubyscholar::Parser.new(config["url"],
30
+ config["email"])
31
+ formatter = Rubyscholar::Formatter.new(parsed,
32
+ config["highlight"],
33
+ config["pdfs"],
34
+ config["altmetricDOIs"],
35
+ config["minCitations"].to_i)
36
+
37
+ html = formatter.to_html
38
+ config["italicize"].each do |term|
39
+ html.gsub!( term , '<em>' + term + '</em>')
40
+ end
41
+
42
+ f= File.open('scholar.html','w')
43
+ f.write html
44
+ f.close()
45
+ end
46
+ end
47
+
data/bin/scrape.rb CHANGED
@@ -1,11 +1,10 @@
1
1
  require_relative '../lib/rubyscholar'
2
2
  require 'yaml'
3
3
 
4
- def scrape()
5
4
  config = YAML.load_file('config.yml')
6
- parsed = RubyScholar::Parser.new(config["url"],
5
+ parsed = Rubyscholar::Parser.new(config["url"],
7
6
  config["email"])
8
- formatter = RubyScholar::Formatter.new(parsed,
7
+ formatter = Rubyscholar::Formatter.new(parsed,
9
8
  config["highlight"],
10
9
  config["pdfs"],
11
10
  config["altmetricDOIs"],
@@ -19,4 +18,3 @@ def scrape()
19
18
  f= File.open('scholar.html','w')
20
19
  f.write html
21
20
  f.close
22
- end
@@ -0,0 +1,3 @@
1
+ module Rubyscholar
2
+ VERSION = "0.0.3"
3
+ end
data/lib/rubyscholar.rb CHANGED
@@ -1,24 +1,26 @@
1
+ require "rubyscholar/version"
1
2
  require "nokogiri"
2
3
  require "open-uri"
3
4
 
4
- class String
5
+
6
+ class String
5
7
  def clean
6
8
  # removes leading and trailing whitespace, commas
7
9
  self.gsub!(/(^[\s,]+)|([\s,]+$)/, '')
8
10
  return self
9
11
  end
10
- end
12
+ end
11
13
 
12
- module RubyScholar
14
+ module Rubyscholar
13
15
  class Paper < Struct.new(:title, :url, :authors, :journalName, :journalDetails, :year, :citationCount, :citingPapers, :doi)
14
- end
15
-
16
+ end
17
+
16
18
  class Parser
17
19
  attr_accessor :parsedPapers, :crossRefEmail
18
-
20
+
19
21
  def initialize(url, crossRefEmail = "")
20
22
  @parsedPapers = []
21
- @crossRefEmail = crossRefEmail # if nil doesn't return any DOI
23
+ @crossRefEmail = crossRefEmail # if nil doesn't retursn any DOI
22
24
  parse(url)
23
25
  end
24
26
 
@@ -41,7 +43,7 @@ module RubyScholar
41
43
  #citations
42
44
  citeInfo = paper.css(".cit-dark-link")
43
45
  citationCount = citeInfo.text
44
- citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
46
+ citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
45
47
 
46
48
  # get DOI: needs last name of first author, no funny chars
47
49
  lastNameFirstAuthor = ((authors.split(',').first ).split(' ').last ).gsub(/[^A-Za-z\-]/, '')
@@ -52,24 +54,24 @@ module RubyScholar
52
54
  STDOUT << "Scraped #{parsedPapers.length} from Google Scholar.\n"
53
55
  end
54
56
 
55
- # Scholar doesn't provide DOI.
56
- # But if registered at crossref (its free), DOI can be retreived.
57
+ # Scholar doesn't provide DOI.
58
+ # But if registered at crossref (its free), DOI can be retreived.
57
59
  def getDoi(lastNameFirstAuthor, title, crossRefEmail)
58
60
  return '' if @crossRefEmail.nil?
59
- sleep(1) # to reduce risk
61
+ sleep(1) # to reduce risk
60
62
  STDERR << "Getting DOI for paper by #{lastNameFirstAuthor}: #{title}.\n"
61
- url = 'http://www.crossref.org/openurl?redirect=false' +
62
- '&pid=' + crossRefEmail +
63
+ url = 'http://www.crossref.org/openurl?redirect=false' +
64
+ '&pid=' + crossRefEmail +
63
65
  '&aulast=' + lastNameFirstAuthor +
64
66
  '&atitle=' + URI.escape(title)
65
- crossRefXML = Nokogiri::XML(open(url))
67
+ crossRefXML = Nokogiri::XML(open(url))
66
68
  crossRefXML.search("doi").children.first.content rescue ''
67
69
  end
68
70
  end
69
-
71
+
70
72
  class Formatter
71
73
  attr_accessor :parser, :nameToHighlight, :pdfLinks, :altmetricDOIs
72
-
74
+
73
75
  def initialize(parser, nameToHighlight = nil, pdfLinks = {}, altmetricDOIs = [], minCitationCount = 1)
74
76
  @parser = parser
75
77
  @nameToHighlight = nameToHighlight
@@ -79,47 +81,47 @@ module RubyScholar
79
81
  end
80
82
 
81
83
  def to_html
82
- ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
84
+ ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
83
85
  builder = Nokogiri::HTML::Builder.new do |doc|
84
86
  doc.html {
85
87
  doc.body {
86
88
  @parser.parsedPapers.each_with_index { |paper, index|
87
89
  doc.div( :class => "publication") {
88
90
  doc.p {
89
- doc.text ((@parser.parsedPapers).length - index).to_s + '. '
91
+ doc.text ((@parser.parsedPapers).length - index).to_s + '. '
92
+
93
+ doc.b paper[:title] + '.'
94
+ doc.text ' (' + paper[:year] + '). '
90
95
 
91
96
  if paper[:authors].include?(@nameToHighlight)
92
97
  doc.text( paper[:authors].sub(Regexp.new(@nameToHighlight + '.*'), '') )
93
- doc.span( :class => "me") { doc.text @nameToHighlight }
98
+ doc.span( :class => "label label-info") { doc.text @nameToHighlight }
94
99
  doc.text( paper[:authors].sub(Regexp.new('.*' + @nameToHighlight), '') )
95
100
  else
96
101
  doc.text( paper[:authors])
97
102
  end
98
-
99
- doc.text ' ' + paper[:year] + '. '
100
- doc.b paper[:title] + '.'
103
+
101
104
  doc.br
102
105
  doc.em paper[:journalName]
103
106
  doc.text ' '
104
107
  doc.text paper[:journalDetails]
105
-
106
108
  unless paper[ :doi].empty?
107
109
  doc.text(' ')
108
- doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
109
- doc.text "[DOI]"
110
- }
110
+ doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
111
+ doc.text "[DOI]"
112
+ }
111
113
  end
112
114
  if @pdfLinks.keys.include?(paper[:title])
113
115
  doc.text(' ')
114
- doc.a( :href => @pdfLinks[paper[:title]]) {
116
+ doc.a( :href => @pdfLinks[paper[:title]]) {
115
117
  doc.text "[PDF]"
116
- }
118
+ }
117
119
  end
118
120
  if paper[ :citationCount].to_i > @minCitations
119
121
  doc.text(' ')
120
- doc.a( :href => paper[ :citingPapers]) {
121
- doc.text("[Cited #{paper[ :citationCount]}x]")
122
- }
122
+ doc.a( :href => paper[ :citingPapers]) {
123
+ doc.text("[Cited #{paper[ :citationCount]}x]")
124
+ }
123
125
  end
124
126
  if altmetricDOIs.include?( paper[ :doi])
125
127
  doc.text(' ')
@@ -137,5 +139,3 @@ module RubyScholar
137
139
  end
138
140
  end
139
141
  end
140
-
141
-
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rubyscholar/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "rubyscholar"
8
+ gem.version = Rubyscholar::VERSION
9
+ gem.authors = ["Yannick Wurm","Gaurav Koley"]
10
+ gem.email = ["y.wurm@qmul.ac.uk","arkokoley@live.in"]
11
+ gem.description = %q{Scrape Google Scholar}
12
+ gem.summary = %q{Rubyscholar scrapes google scholar and formats it into a scholar.html file.}
13
+ gem.homepage = ""
14
+
15
+ gem.add_dependency "nokogiri", "~>1.6.0"
16
+ gem.add_dependency "commander", "~>4.1.5"
17
+
18
+ gem.files = `git ls-files`.split($/)
19
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
20
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
21
+ gem.require_paths = ["lib"]
22
+ end
metadata CHANGED
@@ -1,15 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyscholar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Yannick Wurm
9
+ - Gaurav Koley
9
10
  autorequire:
10
11
  bindir: bin
11
12
  cert_chain: []
12
- date: 2013-08-18 00:00:00.000000000 Z
13
+ date: 2013-10-01 00:00:00.000000000 Z
13
14
  dependencies:
14
15
  - !ruby/object:Gem::Dependency
15
16
  name: nokogiri
@@ -28,38 +29,44 @@ dependencies:
28
29
  - !ruby/object:Gem::Version
29
30
  version: 1.6.0
30
31
  - !ruby/object:Gem::Dependency
31
- name: rspec
32
+ name: commander
32
33
  requirement: !ruby/object:Gem::Requirement
33
34
  none: false
34
35
  requirements:
35
36
  - - ~>
36
37
  - !ruby/object:Gem::Version
37
- version: 2.5.0
38
- type: :development
38
+ version: 4.1.5
39
+ type: :runtime
39
40
  prerelease: false
40
41
  version_requirements: !ruby/object:Gem::Requirement
41
42
  none: false
42
43
  requirements:
43
44
  - - ~>
44
45
  - !ruby/object:Gem::Version
45
- version: 2.5.0
46
- description: A small script to "scrape" your Google Scholar citations and reformat
47
- them. It doesn't do a whole lot, but it's still useful.
46
+ version: 4.1.5
47
+ description: Scrape Google Scholar
48
48
  email:
49
49
  - y.wurm@qmul.ac.uk
50
+ - arkokoley@live.in
50
51
  executables:
52
+ - rubyscholar
51
53
  - scrape.rb
52
54
  extensions: []
53
55
  extra_rdoc_files: []
54
56
  files:
55
57
  - .gitignore
58
+ - Gemfile
59
+ - LICENSE.txt
56
60
  - README.md
61
+ - Rakefile
62
+ - bin/rubyscholar
57
63
  - bin/scrape.rb
58
64
  - config.yml
59
65
  - lib/rubyscholar.rb
66
+ - lib/rubyscholar/version.rb
67
+ - rubyscholar.gemspec
60
68
  homepage: ''
61
- licenses:
62
- - MIT
69
+ licenses: []
63
70
  post_install_message:
64
71
  rdoc_options: []
65
72
  require_paths:
@@ -81,5 +88,5 @@ rubyforge_project:
81
88
  rubygems_version: 1.8.23
82
89
  signing_key:
83
90
  specification_version: 3
84
- summary: RubyScholar - Scrape your Google Scholar citations.
91
+ summary: Rubyscholar scrapes google scholar and formats it into a scholar.html file.
85
92
  test_files: []