rubyscholar 0.0.5 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 68cc9491e42d735441cfd82e8e18c108a84ec05d
4
+ data.tar.gz: 5cc483aa8e259775a599b588cf658a219d5389f9
5
+ SHA512:
6
+ metadata.gz: de91f4aa92392ce239fe9e093ca804f6725fa0ae05a1c6419ff49022cb3f12888961a8ea2f65c9fb35590b8b446620e5694385e2e35cf159b8beab632fc8e76d
7
+ data.tar.gz: face85b2e90f71e9d6c37e281a81a4e78011975e1ae820449fb2b995863d7195219ad4794fd7137a5420d2ae5a2f6b19433bc90a51f55883ea70a50d4d4b47f9
data/README.md CHANGED
@@ -13,16 +13,6 @@ Some features:
13
13
 
14
14
  # How to use:
15
15
 
16
- ### As a Ruby Gem:
17
- 1. Install the gem using: `[sudo] gem install rubyscholar`
18
- 2. Create and configure a `config.yml` file.
19
- To create a `config.yml` file, run `$ rubyscholar init`
20
- Edit the file, filling in your details.
21
- 3. Run as `$ rubyscholar scrape --out file.html `.
22
- 4. A `file.html` file is created containing your citations all formatted
23
- and ready to use.
24
- 5. Done!
25
-
26
16
  ### As a ruby script:
27
17
  1. Configure "config.yml"
28
18
  If you want DOI retreival to work (including Altmetrics), you need to be
@@ -35,8 +25,10 @@ and ready to use.
35
25
 
36
26
  * uses author list as visible on your main Google Scholar page. Sometimes this
37
27
  means names are chopped in two or just a single author is missing. This could
38
- be made smarter.
39
- * flexible output
28
+ be made smarter (by following the link to get the full author list).
29
+ * output format could be more flexible. (e.g. change order (eg title before authors), or change formatting (e.g. remove first initial)). Perhaps this could be done with by providing a regexp search/replace configuration option within each field.
30
+ * Ensure that a true email is entered.
31
+ * right now only works from "user profile" pages. Not from "articles citing article" pages.
40
32
  * flexible use of DOIs
41
33
 
42
34
  # Technologies
data/bin/rubyscholar CHANGED
@@ -1,76 +1,50 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'optparse'
4
3
  require 'rubygems'
4
+ require 'optparse' # YW is this needed or redundant with commander?
5
5
  require 'commander/import'
6
- require 'rubyscholar'
7
6
  require 'yaml'
7
+ require 'rubyscholar-main'
8
8
 
9
9
 
10
- program :name, 'rubysholar'
11
- program :version, '0.0.2'
10
+ program :name, 'rubyscholar'
11
+ program :version, Rubyscholar::VERSION
12
12
  program :description, 'Rubyscholar scrapes google scholar and formats it into a scholar.html file.'
13
13
 
14
14
  default_command :scrape
15
15
 
16
16
  command :scrape do |c|
17
17
  c.syntax = 'rubyscholar scrape [options]'
18
- c.summary = ''
19
- c.description = "Scape google scholar for new publications"
18
+ c.description = "Scrape Google Scholar for new publications"
20
19
 
21
20
  c.option '--config [Config File]', 'Config file to use'
22
- c.option '--out [Output File]', 'File to output the scrapes to'
23
-
24
- c.action do |args, options|
25
- options.default :config => "config.yml", :out => "scholar.html"
26
- configFile= "#{options.config}"
27
- config = YAML.load_file(configFile)
28
- parsed = Rubyscholar::Parser.new(config["url"],
29
- config["email"])
30
- formatter = Rubyscholar::Formatter.new(parsed,
31
- config["highlight"],
32
- config["pdfs"],
33
- config["altmetricDOIs"],
34
- config["minCitations"].to_i)
35
-
36
- html = formatter.to_html
37
- config["italicize"].each do |term|
38
- html.gsub!( term , '<em>' + term + '</em>')
39
- end
40
- outFile="#{options.out}"
41
- f= File.open(outFile,'w')
42
- f.write html
43
- f.close()
44
- end
45
- end
46
-
47
- command :init do |c|
48
- c.syntax = 'rubyscholar init'
49
- c.description = 'Creates a sample config.yml file for Scraping.'
21
+ c.option '--output [Output File]', 'HTML output for publication list'
22
+
50
23
  c.action do |args, options|
51
- say '# Intialising a configuration file for Rubyscholar.'
52
- if File.exist?("config.yml")
53
- abort("Initialisation aborted!") if ask("config.yml already exists. Do you want to overwrite?", ['y', 'n']) == 'n'
24
+ options.default \
25
+ :config => 'config.yaml',
26
+ :output => 'publications.html'
27
+ #rest stays in this block bc we need to access options
28
+
29
+ raise IOError, "You must specify config file via --config\n" if options.config.nil?
30
+ config = YAML.load_file(options.config)
31
+ parsed = Rubyscholar::Parser.new(config["url"],
32
+ config["email"])
33
+ html = Rubyscholar::Formatter.new(parsed,
34
+ config["highlight"],
35
+ config["pdfs"],
36
+ config["altmetricDOIs"],
37
+ config["minCitations"].to_i
38
+ ).to_html
39
+
40
+ config["italicize"].each do |term|
41
+ html.gsub!( term , '<em>' + term + '</em>')
54
42
  end
55
- puts "# Creating config.yml."
56
- open("config.yml", 'w') do |page|
57
- page.puts "# Google Scholar page (you can choose how you sort it)"
58
- page.puts "url: "
59
- page.puts "\n\n# Name to highlight (withing \" and \")"
60
- page.puts "highlight: "
61
- page.puts "\n\n# Need an Email address that has been registered with CrossRef to obtain DOIs using their OpenURL service. "
62
- page.puts "# e.g. the following should provide an XML file: "
63
- page.puts "# http://www.crossref.org/openurl?redirect=false&pid=YOUR@EMAIL>COM&aulast=Wurm&atitle=Behavioral%20Genomics:%20A,%20Bee,%20C,%20G,%20T"
64
- page.puts "email: your@email.com"
65
- page.puts "\n\n# Show \"[Cited Nx]\" if N > the following number"
66
- page.puts "minCitations: 5 "
67
- page.puts "\n\n# Words to italicize (emphasize). These will have \"<em>\" around them. "
68
- page.puts "italicize: "
69
- page.puts "\n\n# DOIs of articles for which we should show altmetric.org badges. "
70
- page.puts "altmetricDOIs: "
71
- page.puts "\n\n# Article titles for which we have urls to PDFs in \"name\" : \"url\" format"
72
- page.puts "pdfs:"
73
- end
74
- end
43
+
44
+ STDERR << "Outputting to #{options.output}\n."
45
+ f = File.open(options.output,'w')
46
+ f.write html
47
+ f.close
48
+ end
75
49
  end
76
50
 
@@ -0,0 +1,51 @@
1
+ # Google Scholar page (you can choose how you sort it)
2
+ url: "http://scholar.google.com/citations?hl=en&user=k6y0EGsAAAAJ&sortby=pubdate&view_op=list_works&pagesize=100"
3
+
4
+ #### Everything below this line is OPTIONAL ####
5
+
6
+ # Name to highlight
7
+ highlight: "Y Wurm"
8
+
9
+
10
+ # Need an Email address that has been registered with CrossRef to obtain DOIs
11
+ # using their OpenURL service.
12
+ # e.g. the following should provide an XML file:
13
+ # http://www.crossref.org/openurl?redirect=false&pid=YOUR@EMAIL>COM&aulast=Wurm&atitle=Behavioral%20Genomics:%20A,%20Bee,%20C,%20G,%20T
14
+ email: your@email.com
15
+
16
+
17
+ # Show "[Cited Nx]" if N > the following number
18
+ minCitations: 5
19
+
20
+ # Words to italicize (emphasize). These will have "<em>" around them.
21
+ italicize:
22
+ - Solenopsis invicta
23
+ - Acromyrmex echinatior
24
+ - de novo
25
+
26
+ # DOIs of articles for which we should show altmetric.org badges.
27
+ altmetricDOIs:
28
+ - "10.1038/nature11832"
29
+ - "10.1101/gr.121392.111"
30
+ - "10.1073/pnas.1009690108"
31
+ - "10.1073/pnas.1104825108"
32
+
33
+ # Article titles for which we have urls to PDFs
34
+ pdfs:
35
+ "A Y-like social chromosome causes alternative colony organization in fire ants" : "/publications/wangwurm2013socialChromosome.pdf"
36
+ "Duplication and concerted evolution in a master sex determiner under balancing selection" : "/publications/procb2013.pdf"
37
+ "Comparative genomics of chemosensory protein genes reveals rapid evolution and positive selection in ant-specific duplicates" : "/publications/hdy2012122a.pdf"
38
+ "The Molecular Clockwork of the Fire Ant Solenopsis invicta" : "/publications/ingram2012-fireAntClockGenes.pdf"
39
+ "Epigenetics: The Making of Ant Castes" : "/publications/2012CurrBiolAntepigenetics.pdf"
40
+ "Visualization and quality assessment of de novo genome assemblies" : "/publications/Bioinformatics-2011-Riba-Grognuz-3425-6"
41
+ "The genomic impact of 100 million years of social evolution in seven ant species" : "/publications/TiG2011.pdf"
42
+ "Relaxed selection is a precursor to the evolution of phenotypic plasticity" : "/publications/hunt2011phenotypicPlasticity.pdf"
43
+ "The genome of the leaf-cutting ant Acromyrmex echinatior suggests key adaptations to advanced social life and fungus farming" : "/publications/nygaard2011-acromyrmex-genome.pdf"
44
+ "Behind the Scenes of an Ant Genome Project" : "/publications/wurm2011antGenomeBehindTheScenes.pdf"
45
+ "The genome of the fire ant Solenopsis invicta" : "/publications/wurm2011fireAntGenome.pdf"
46
+ "Odorant Binding Proteins of the Red Imported Fire Ant, Solenopsis invicta: An Example of the Problems Facing the Analysis of Widely Divergent Proteins" : "/publications/gotzek2011obps.pdf"
47
+ "Parasitoid Wasps: From Natural History to Genomic Studies" : "/publications/wurm2010wasps.pdf"
48
+ "Changes in reproductive roles are associated with changes in gene expression in fire ant queens" : "/publications/wurm2010fireAntQueenDealationExpression.pdf"
49
+ "Fourmidable: a database for ant genomics" : "/publications/wurm2009antDatabase.pdf"
50
+ "Behavioral Genomics: A, Bee, C, G, T" : "/publications/wurm2007bees.pdf"
51
+ "An annotated cDNA library and microarray for large-scale gene-expression studies in the ant Solenopsis invicta" : "/publications/wang2007fireAntMicroarrays.pdf"
@@ -0,0 +1,138 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'rubyscholar-version'
4
+
5
+
6
+ class String
7
+ def clean
8
+ # removes leading and trailing whitespace, commas
9
+ self.gsub!(/(^[\s,]+)|([\s,]+$)/, '')
10
+ return self
11
+ end
12
+ end
13
+
14
+ module Rubyscholar
15
+ class Paper < Struct.new(:title, :url, :authors, :journalName, :journalDetails, :year, :citationCount, :citingPapers, :doi)
16
+ end
17
+
18
+ class Parser
19
+ attr_accessor :parsedPapers, :crossRefEmail
20
+
21
+ def initialize(url, crossRefEmail = "")
22
+ @parsedPapers = []
23
+ @crossRefEmail = crossRefEmail # if nil doesn't return any DOI
24
+ parse(url)
25
+ end
26
+
27
+ def parse(url)
28
+ STDERR << "Will check #{url}.\n"
29
+ page = Nokogiri::HTML(open(url,
30
+ 'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2'), nil, 'utf-8')
31
+ papers = page.css(".gsc_a_tr")
32
+ STDERR << "Found #{papers.length} papers.\n"
33
+ papers.each do |paper|
34
+ title = paper.css(".gsc_a_at").text rescue ''
35
+ title.gsub!(/\.$/, '')
36
+
37
+ googleUrl = paper.children[0].children[0].attribute('href').text rescue ''
38
+ authors = paper.children[0].children[1].text.clean rescue ''
39
+ authors.gsub!("...", "et al")
40
+
41
+ journal = paper.children[0].children[2].text rescue ''
42
+ journalName = journal.split(/,|\d/).first.clean rescue ''
43
+ journalDetails = journal.gsub(journalName, '').clean
44
+ year = journalDetails.match(/, \d+$/)[0]
45
+ journalDetails = journalDetails.gsub(year, '').clean
46
+ year = year.clean
47
+
48
+ #citations
49
+ citeInfo = paper.css('.gsc_a_ac')
50
+ citationCount = citeInfo.text
51
+ citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
52
+
53
+ # get DOI: needs last name of first author, no funny chars
54
+ lastNameFirstAuthor = ((authors.split(',').first ).split(' ').last ).gsub(/[^A-Za-z\-]/, '')
55
+ doi = getDoi( lastNameFirstAuthor, title, @crossRefEmail)
56
+
57
+ @parsedPapers.push(Paper.new( title, googleUrl, authors, journalName, journalDetails, year, citationCount, citationUrl, doi))
58
+ end
59
+ STDERR << "Scraped #{parsedPapers.length} from Google Scholar.\n"
60
+ end
61
+
62
+ # Scholar doesn't provide DOI.
63
+ # But if registered at crossref (its free), DOI can be retreived.
64
+ def getDoi(lastNameFirstAuthor, title, crossRefEmail)
65
+ return '' if @crossRefEmail.nil?
66
+ sleep(1) # to reduce risk
67
+ STDERR << "Getting DOI for paper by #{lastNameFirstAuthor}: #{title}.\n"
68
+ url = 'http://www.crossref.org/openurl?redirect=false' +
69
+ '&pid=' + crossRefEmail +
70
+ '&aulast=' + lastNameFirstAuthor +
71
+ '&atitle=' + URI.escape(title)
72
+ crossRefXML = Nokogiri::XML(open(url))
73
+ crossRefXML.search("doi").children.first.content rescue ''
74
+ end
75
+ end
76
+
77
+ class Formatter
78
+ attr_accessor :parser, :nameToHighlight, :pdfLinks, :altmetricDOIs
79
+
80
+ def initialize(parser, nameToHighlight = nil, pdfLinks = {}, altmetricDOIs = [], minCitationCount = 1)
81
+ @parser = parser
82
+ @nameToHighlight = nameToHighlight
83
+ @pdfLinks = pdfLinks
84
+ @altmetricDOIs = altmetricDOIs
85
+ @minCitations = minCitationCount
86
+ end
87
+
88
+ def to_html
89
+ builder = Nokogiri::HTML::Builder.new do |doc|
90
+ doc.div( :class => "publication") {
91
+ doc.ol {
92
+ @parser.parsedPapers.each_with_index do |paper, index|
93
+ doc.li( :value=> ( (@parser.parsedPapers).length - index).to_s) {
94
+ doc.b paper[:title] + '.'
95
+ doc.text ' (' + paper[:year] + '). '
96
+ if paper[:authors].include?(@nameToHighlight)
97
+ doc.text( paper[:authors].sub(Regexp.new(@nameToHighlight + '.*'), '') )
98
+ doc.span( :class => "label") { doc.text @nameToHighlight }
99
+ doc.text( paper[:authors].sub(Regexp.new('.*' + @nameToHighlight), '') )
100
+ else
101
+ doc.text( paper[:authors]) + '.'
102
+ end
103
+
104
+ doc.em ' ' + paper[:journalName]
105
+ doc.text ' ' + paper[:journalDetails]
106
+ unless paper[ :doi].empty?
107
+ doc.text(' ')
108
+ doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
109
+ doc.text "[DOI]"
110
+ }
111
+ end
112
+ if @pdfLinks.keys.include?(paper[:title])
113
+ doc.text(' ')
114
+ doc.a( :href => @pdfLinks[paper[:title]]) {
115
+ doc.text "[PDF]"
116
+ }
117
+ end
118
+ if paper[ :citationCount].to_i > @minCitations
119
+ doc.text(' ')
120
+ doc.a( :href => paper[ :citingPapers], :title => "Citations") {
121
+ doc.span( :class => "badge badge-inverse") { doc.test("#{paper[ :citationCount]}x") }
122
+ }
123
+ end
124
+ if altmetricDOIs.include?( paper[ :doi])
125
+ doc.text(' ')
126
+ doc.span( :class => 'altmetric-embed',
127
+ :'data-badge-popover' => 'bottom',
128
+ :'data-doi' => paper[ :doi] )
129
+ end
130
+ }
131
+ end
132
+ }
133
+ }
134
+ end
135
+ return builder.to_html
136
+ end
137
+ end
138
+ end
@@ -1,3 +1,3 @@
1
1
  module Rubyscholar
2
- VERSION = "0.0.5"
2
+ VERSION = "0.2"
3
3
  end
@@ -0,0 +1,3 @@
1
+ module Rubyscholar
2
+ VERSION = "0.0.1"
3
+ end
@@ -1,32 +1,31 @@
1
- require "rubyscholar/version"
2
1
  require "nokogiri"
3
2
  require "open-uri"
4
3
 
5
-
4
+ module Rubyscholar
6
5
  class String
7
6
  def clean
8
7
  # removes leading and trailing whitespace, commas
9
8
  self.gsub!(/(^[\s,]+)|([\s,]+$)/, '')
10
9
  return self
11
10
  end
12
- end
11
+ end
13
12
 
14
- module Rubyscholar
15
- class Paper < Struct.new(:title, :url, :authors, :journalName, :journalDetails, :year, :citationCount, :citingPapers, :doi)
16
- end
17
13
 
14
+ class Paper < Struct.new(:title, :url, :authors, :journalName, :journalDetails, :year, :citationCount, :citingPapers, :doi)
15
+ end
16
+
18
17
  class Parser
19
18
  attr_accessor :parsedPapers, :crossRefEmail
20
-
19
+
21
20
  def initialize(url, crossRefEmail = "")
22
21
  @parsedPapers = []
23
- @crossRefEmail = crossRefEmail # if nil doesn't retursn any DOI
22
+ @crossRefEmail = crossRefEmail # if nil doesn't return any DOI
24
23
  parse(url)
25
24
  end
26
25
 
27
26
  def parse(url)
28
27
  papers = Nokogiri::HTML(open(url)).css(".cit-table .item")
29
- STDOUT << "Found #{papers.length} papers.\n"
28
+ STDERR << "Found #{papers.length} papers.\n"
30
29
  papers.each do |paper|
31
30
  paperDetails = paper.css("#col-title")
32
31
  title = paperDetails[0].children[0].content.clean
@@ -43,7 +42,7 @@ module Rubyscholar
43
42
  #citations
44
43
  citeInfo = paper.css(".cit-dark-link")
45
44
  citationCount = citeInfo.text
46
- citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
45
+ citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
47
46
 
48
47
  # get DOI: needs last name of first author, no funny chars
49
48
  lastNameFirstAuthor = ((authors.split(',').first ).split(' ').last ).gsub(/[^A-Za-z\-]/, '')
@@ -51,27 +50,27 @@ module Rubyscholar
51
50
 
52
51
  @parsedPapers.push(Paper.new( title, googleUrl, authors, journalName, journalDetails, year, citationCount, citationUrl, doi))
53
52
  end
54
- STDOUT << "Scraped #{parsedPapers.length} from Google Scholar.\n"
53
+ STDERR << "Scraped #{parsedPapers.length} from Google Scholar.\n"
55
54
  end
56
55
 
57
- # Scholar doesn't provide DOI.
58
- # But if registered at crossref (its free), DOI can be retreived.
56
+ # Scholar doesn't provide DOI.
57
+ # But if registered at crossref (its free), DOI can be retreived.
59
58
  def getDoi(lastNameFirstAuthor, title, crossRefEmail)
60
59
  return '' if @crossRefEmail.nil?
61
- sleep(1) # to reduce risk
60
+ sleep(1) # to reduce risk
62
61
  STDERR << "Getting DOI for paper by #{lastNameFirstAuthor}: #{title}.\n"
63
- url = 'http://www.crossref.org/openurl?redirect=false' +
64
- '&pid=' + crossRefEmail +
62
+ url = 'http://www.crossref.org/openurl?redirect=false' +
63
+ '&pid=' + crossRefEmail +
65
64
  '&aulast=' + lastNameFirstAuthor +
66
65
  '&atitle=' + URI.escape(title)
67
- crossRefXML = Nokogiri::XML(open(url))
66
+ crossRefXML = Nokogiri::XML(open(url))
68
67
  crossRefXML.search("doi").children.first.content rescue ''
69
68
  end
70
69
  end
71
-
70
+
72
71
  class Formatter
73
72
  attr_accessor :parser, :nameToHighlight, :pdfLinks, :altmetricDOIs
74
-
73
+
75
74
  def initialize(parser, nameToHighlight = nil, pdfLinks = {}, altmetricDOIs = [], minCitationCount = 1)
76
75
  @parser = parser
77
76
  @nameToHighlight = nameToHighlight
@@ -81,14 +80,14 @@ module Rubyscholar
81
80
  end
82
81
 
83
82
  def to_html
84
- ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
83
+ ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
85
84
  builder = Nokogiri::HTML::Builder.new do |doc|
86
85
  doc.html {
87
86
  doc.body {
88
87
  @parser.parsedPapers.each_with_index { |paper, index|
89
88
  doc.div( :class => "publication") {
90
89
  doc.p {
91
- doc.text ((@parser.parsedPapers).length - index).to_s + '. '
90
+ doc.text ((@parser.parsedPapers).length - index).to_s + '. '
92
91
 
93
92
  doc.b paper[:title] + '.'
94
93
  doc.text ' (' + paper[:year] + '). '
@@ -107,21 +106,21 @@ module Rubyscholar
107
106
  doc.text paper[:journalDetails]
108
107
  unless paper[ :doi].empty?
109
108
  doc.text(' ')
110
- doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
111
- doc.text "[DOI]"
112
- }
109
+ doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
110
+ doc.text "[DOI]"
111
+ }
113
112
  end
114
113
  if @pdfLinks.keys.include?(paper[:title])
115
114
  doc.text(' ')
116
- doc.a( :href => @pdfLinks[paper[:title]]) {
115
+ doc.a( :href => @pdfLinks[paper[:title]]) {
117
116
  doc.text "[PDF]"
118
- }
117
+ }
119
118
  end
120
119
  if paper[ :citationCount].to_i > @minCitations
121
120
  doc.text(' ')
122
- doc.a( :href => paper[ :citingPapers]) {
123
- doc.text("[Cited #{paper[ :citationCount]}x]")
124
- }
121
+ doc.a( :href => paper[ :citingPapers]) {
122
+ doc.text("[Cited #{paper[ :citationCount]}x]")
123
+ }
125
124
  end
126
125
  if altmetricDOIs.include?( paper[ :doi])
127
126
  doc.text(' ')
data/rubyscholar.gemspec CHANGED
@@ -1,23 +1,40 @@
1
- # -*- encoding: utf-8 -*-
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'rubyscholar/version'
1
+ require './lib/rubyscholar-version.rb'
5
2
 
6
3
  Gem::Specification.new do |gem|
7
- gem.name = "rubyscholar"
4
+ gem.name = 'rubyscholar'
8
5
  gem.version = Rubyscholar::VERSION
9
- gem.authors = ["Yannick Wurm","Gaurav Koley"]
10
- gem.email = ["y.wurm@qmul.ac.uk","arkokoley@live.in"]
11
- gem.description = %q{Scrape Google Scholar}
12
- gem.summary = %q{Rubyscholar scrapes google scholar and formats it into a scholar.html file.}
13
- gem.homepage = "http://github.com/yannickwurm/rubyscholar"
14
- gem.license = "MIT"
15
-
16
- gem.add_dependency "nokogiri", "~>1.6.0"
17
- gem.add_dependency "commander", "~>4.1.5"
18
-
19
- gem.files = `git ls-files`.split($/)
20
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
21
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
22
- gem.require_paths = ["lib"]
6
+ gem.authors = ['Yannick Wurm','Gaurav Koley']
7
+ gem.email = ['y.wurm@qmul.ac.uk','arkokoley@live.in']
8
+ gem.description = %q{Scrape Google Scholar Profile page}
9
+ gem.summary = %q{Rubyscholar scrapes one google scholar and formats it into a scholar.html file listing publications.}
10
+ gem.homepage = 'http://yannick.poulet.org/'
11
+ gem.license = 'MIT'
12
+
13
+
14
+ gem.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.0'
15
+ gem.add_runtime_dependency 'commander', '~> 4.1', '>= 4.1.5'
16
+
17
+ gem.files = Dir['lib/**/*'] + Dir['views/**/*'] + Dir['public/**/*'] + Dir['tests/**/*']
18
+ gem.files = gem.files + ['example.config.yml']
19
+ gem.files = gem.files + ['LICENSE.txt', 'README.md']
20
+ gem.files = gem.files + ['Gemfile', 'rubyscholar.gemspec']
21
+
22
+ gem.executables = ['rubyscholar']
23
+ gem.require_paths = ['lib']
24
+
25
+ gem.post_install_message = <<INFO
26
+
27
+ -----
28
+ Thanks for installing rubyscholar.
29
+ If something isn't working, this may be due to Google's changing the format of Scholar pages
30
+ (they do this regularly with no warning).
31
+
32
+ If you can have a shot at fixing it, please go ahead - pull requests are most welcome.
33
+
34
+ All the best,
35
+
36
+ Yannick - http://yannick.poulet.org
37
+ ----
38
+
39
+ INFO
23
40
  end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyscholar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
5
- prerelease:
4
+ version: '0.2'
6
5
  platform: ruby
7
6
  authors:
8
7
  - Yannick Wurm
@@ -10,84 +9,93 @@ authors:
10
9
  autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
- date: 2013-10-12 00:00:00.000000000 Z
12
+ date: 2014-09-20 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
15
  name: nokogiri
17
16
  requirement: !ruby/object:Gem::Requirement
18
- none: false
19
17
  requirements:
20
- - - ~>
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.6'
21
+ - - ">="
21
22
  - !ruby/object:Gem::Version
22
23
  version: 1.6.0
23
24
  type: :runtime
24
25
  prerelease: false
25
26
  version_requirements: !ruby/object:Gem::Requirement
26
- none: false
27
27
  requirements:
28
- - - ~>
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '1.6'
31
+ - - ">="
29
32
  - !ruby/object:Gem::Version
30
33
  version: 1.6.0
31
34
  - !ruby/object:Gem::Dependency
32
35
  name: commander
33
36
  requirement: !ruby/object:Gem::Requirement
34
- none: false
35
37
  requirements:
36
- - - ~>
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '4.1'
41
+ - - ">="
37
42
  - !ruby/object:Gem::Version
38
43
  version: 4.1.5
39
44
  type: :runtime
40
45
  prerelease: false
41
46
  version_requirements: !ruby/object:Gem::Requirement
42
- none: false
43
47
  requirements:
44
- - - ~>
48
+ - - "~>"
49
+ - !ruby/object:Gem::Version
50
+ version: '4.1'
51
+ - - ">="
45
52
  - !ruby/object:Gem::Version
46
53
  version: 4.1.5
47
- description: Scrape Google Scholar
54
+ description: Scrape Google Scholar Profile page
48
55
  email:
49
56
  - y.wurm@qmul.ac.uk
50
57
  - arkokoley@live.in
51
58
  executables:
52
59
  - rubyscholar
53
- - scrape.rb
54
60
  extensions: []
55
61
  extra_rdoc_files: []
56
62
  files:
57
- - .gitignore
58
- - CHANGELOG.md
59
63
  - Gemfile
60
64
  - LICENSE.txt
61
65
  - README.md
62
- - Rakefile
63
66
  - bin/rubyscholar
64
- - bin/scrape.rb
65
- - lib/rubyscholar.rb
66
- - lib/rubyscholar/version.rb
67
+ - example.config.yml
68
+ - lib/rubyscholar-main.rb
69
+ - lib/rubyscholar-version.rb
70
+ - lib/rubyscholar-version.rb~
71
+ - lib/rubyscholar.rb~
67
72
  - rubyscholar.gemspec
68
- homepage: http://github.com/yannickwurm/rubyscholar
73
+ homepage: http://yannick.poulet.org/
69
74
  licenses:
70
75
  - MIT
71
- post_install_message:
76
+ metadata: {}
77
+ post_install_message: "\n-----\nThanks for installing rubyscholar. \nIf something
78
+ isn't working, this may be due to Google's changing the format of Scholar pages\n(they
79
+ do this regularly with no warning). \n\nIf you can have a shot at fixing it, please
80
+ go ahead - pull requests are most welcome.\n\nAll the best, \n\nYannick - http://yannick.poulet.org\n----\n\n"
72
81
  rdoc_options: []
73
82
  require_paths:
74
83
  - lib
75
84
  required_ruby_version: !ruby/object:Gem::Requirement
76
- none: false
77
85
  requirements:
78
- - - ! '>='
86
+ - - ">="
79
87
  - !ruby/object:Gem::Version
80
88
  version: '0'
81
89
  required_rubygems_version: !ruby/object:Gem::Requirement
82
- none: false
83
90
  requirements:
84
- - - ! '>='
91
+ - - ">="
85
92
  - !ruby/object:Gem::Version
86
93
  version: '0'
87
94
  requirements: []
88
95
  rubyforge_project:
89
- rubygems_version: 1.8.23
96
+ rubygems_version: 2.2.2
90
97
  signing_key:
91
- specification_version: 3
92
- summary: Rubyscholar scrapes google scholar and formats it into a scholar.html file.
98
+ specification_version: 4
99
+ summary: Rubyscholar scrapes one google scholar and formats it into a scholar.html
100
+ file listing publications.
93
101
  test_files: []
data/.gitignore DELETED
@@ -1,18 +0,0 @@
1
- *.gem
2
- *.rbc
3
- .bundle
4
- .config
5
- coverage
6
- InstalledFiles
7
- lib/bundler/man
8
- pkg
9
- rdoc
10
- spec/reports
11
- test/tmp
12
- test/version_tmp
13
- tmp
14
-
15
- # YARD artifacts
16
- .yardoc
17
- _yardoc
18
- doc/
data/CHANGELOG.md DELETED
@@ -1,6 +0,0 @@
1
- ## Changelog
2
-
3
- version 0.0.4
4
-
5
- * Support for custom config files and output file in commands.
6
- * Command for creating a config.yml file using `rubyscholar init`.
data/Rakefile DELETED
@@ -1 +0,0 @@
1
- require "bundler/gem_tasks"
data/bin/scrape.rb DELETED
@@ -1,20 +0,0 @@
1
- require_relative '../lib/rubyscholar'
2
- require 'yaml'
3
-
4
- config = YAML.load_file('config.yml')
5
- parsed = Rubyscholar::Parser.new(config["url"],
6
- config["email"])
7
- formatter = Rubyscholar::Formatter.new(parsed,
8
- config["highlight"],
9
- config["pdfs"],
10
- config["altmetricDOIs"],
11
- config["minCitations"].to_i)
12
-
13
- html = formatter.to_html
14
- config["italicize"].each do |term|
15
- html.gsub!( term , '<em>' + term + '</em>')
16
- end
17
-
18
- f= File.open('scholar.html','w')
19
- f.write html
20
- f.close