rubyscholar 0.0.5 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 68cc9491e42d735441cfd82e8e18c108a84ec05d
4
+ data.tar.gz: 5cc483aa8e259775a599b588cf658a219d5389f9
5
+ SHA512:
6
+ metadata.gz: de91f4aa92392ce239fe9e093ca804f6725fa0ae05a1c6419ff49022cb3f12888961a8ea2f65c9fb35590b8b446620e5694385e2e35cf159b8beab632fc8e76d
7
+ data.tar.gz: face85b2e90f71e9d6c37e281a81a4e78011975e1ae820449fb2b995863d7195219ad4794fd7137a5420d2ae5a2f6b19433bc90a51f55883ea70a50d4d4b47f9
data/README.md CHANGED
@@ -13,16 +13,6 @@ Some features:
13
13
 
14
14
  # How to use:
15
15
 
16
- ### As a Ruby Gem:
17
- 1. Install the gem using: `[sudo] gem install rubyscholar`
18
- 2. Create and configure a `config.yml` file.
19
- To create a `config.yml` file, run `$ rubyscholar init`
20
- Edit the file, filling in your details.
21
- 3. Run as `$ rubyscholar scrape --out file.html `.
22
- 4. A `file.html` file is created containing your citations all formatted
23
- and ready to use.
24
- 5. Done!
25
-
26
16
  ### As a ruby script:
27
17
  1. Configure "config.yml"
28
18
  If you want DOI retreival to work (including Altmetrics), you need to be
@@ -35,8 +25,10 @@ and ready to use.
35
25
 
36
26
  * uses author list as visible on your main Google Scholar page. Sometimes this
37
27
  means names are chopped in two or just a single author is missing. This could
38
- be made smarter.
39
- * flexible output
28
+ be made smarter (by following the link to get the full author list).
29
+ * output format could be more flexible. (e.g. change order (eg title before authors), or change formatting (e.g. remove first initial)). Perhaps this could be done with by providing a regexp search/replace configuration option within each field.
30
+ * Ensure that a true email is entered.
31
+ * right now only works from "user profile" pages. Not from "articles citing article" pages.
40
32
  * flexible use of DOIs
41
33
 
42
34
  # Technologies
data/bin/rubyscholar CHANGED
@@ -1,76 +1,50 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'optparse'
4
3
  require 'rubygems'
4
+ require 'optparse' # YW is this needed or redundant with commander?
5
5
  require 'commander/import'
6
- require 'rubyscholar'
7
6
  require 'yaml'
7
+ require 'rubyscholar-main'
8
8
 
9
9
 
10
- program :name, 'rubysholar'
11
- program :version, '0.0.2'
10
+ program :name, 'rubyscholar'
11
+ program :version, Rubyscholar::VERSION
12
12
  program :description, 'Rubyscholar scrapes google scholar and formats it into a scholar.html file.'
13
13
 
14
14
  default_command :scrape
15
15
 
16
16
  command :scrape do |c|
17
17
  c.syntax = 'rubyscholar scrape [options]'
18
- c.summary = ''
19
- c.description = "Scape google scholar for new publications"
18
+ c.description = "Scrape Google Scholar for new publications"
20
19
 
21
20
  c.option '--config [Config File]', 'Config file to use'
22
- c.option '--out [Output File]', 'File to output the scrapes to'
23
-
24
- c.action do |args, options|
25
- options.default :config => "config.yml", :out => "scholar.html"
26
- configFile= "#{options.config}"
27
- config = YAML.load_file(configFile)
28
- parsed = Rubyscholar::Parser.new(config["url"],
29
- config["email"])
30
- formatter = Rubyscholar::Formatter.new(parsed,
31
- config["highlight"],
32
- config["pdfs"],
33
- config["altmetricDOIs"],
34
- config["minCitations"].to_i)
35
-
36
- html = formatter.to_html
37
- config["italicize"].each do |term|
38
- html.gsub!( term , '<em>' + term + '</em>')
39
- end
40
- outFile="#{options.out}"
41
- f= File.open(outFile,'w')
42
- f.write html
43
- f.close()
44
- end
45
- end
46
-
47
- command :init do |c|
48
- c.syntax = 'rubyscholar init'
49
- c.description = 'Creates a sample config.yml file for Scraping.'
21
+ c.option '--output [Output File]', 'HTML output for publication list'
22
+
50
23
  c.action do |args, options|
51
- say '# Intialising a configuration file for Rubyscholar.'
52
- if File.exist?("config.yml")
53
- abort("Initialisation aborted!") if ask("config.yml already exists. Do you want to overwrite?", ['y', 'n']) == 'n'
24
+ options.default \
25
+ :config => 'config.yaml',
26
+ :output => 'publications.html'
27
+ #rest stays in this block bc we need to access options
28
+
29
+ raise IOError, "You must specify config file via --config\n" if options.config.nil?
30
+ config = YAML.load_file(options.config)
31
+ parsed = Rubyscholar::Parser.new(config["url"],
32
+ config["email"])
33
+ html = Rubyscholar::Formatter.new(parsed,
34
+ config["highlight"],
35
+ config["pdfs"],
36
+ config["altmetricDOIs"],
37
+ config["minCitations"].to_i
38
+ ).to_html
39
+
40
+ config["italicize"].each do |term|
41
+ html.gsub!( term , '<em>' + term + '</em>')
54
42
  end
55
- puts "# Creating config.yml."
56
- open("config.yml", 'w') do |page|
57
- page.puts "# Google Scholar page (you can choose how you sort it)"
58
- page.puts "url: "
59
- page.puts "\n\n# Name to highlight (withing \" and \")"
60
- page.puts "highlight: "
61
- page.puts "\n\n# Need an Email address that has been registered with CrossRef to obtain DOIs using their OpenURL service. "
62
- page.puts "# e.g. the following should provide an XML file: "
63
- page.puts "# http://www.crossref.org/openurl?redirect=false&pid=YOUR@EMAIL>COM&aulast=Wurm&atitle=Behavioral%20Genomics:%20A,%20Bee,%20C,%20G,%20T"
64
- page.puts "email: your@email.com"
65
- page.puts "\n\n# Show \"[Cited Nx]\" if N > the following number"
66
- page.puts "minCitations: 5 "
67
- page.puts "\n\n# Words to italicize (emphasize). These will have \"<em>\" around them. "
68
- page.puts "italicize: "
69
- page.puts "\n\n# DOIs of articles for which we should show altmetric.org badges. "
70
- page.puts "altmetricDOIs: "
71
- page.puts "\n\n# Article titles for which we have urls to PDFs in \"name\" : \"url\" format"
72
- page.puts "pdfs:"
73
- end
74
- end
43
+
44
+ STDERR << "Outputting to #{options.output}\n."
45
+ f = File.open(options.output,'w')
46
+ f.write html
47
+ f.close
48
+ end
75
49
  end
76
50
 
@@ -0,0 +1,51 @@
1
+ # Google Scholar page (you can choose how you sort it)
2
+ url: "http://scholar.google.com/citations?hl=en&user=k6y0EGsAAAAJ&sortby=pubdate&view_op=list_works&pagesize=100"
3
+
4
+ #### Everything below this line is OPTIONAL ####
5
+
6
+ # Name to highlight
7
+ highlight: "Y Wurm"
8
+
9
+
10
+ # Need an Email address that has been registered with CrossRef to obtain DOIs
11
+ # using their OpenURL service.
12
+ # e.g. the following should provide an XML file:
13
+ # http://www.crossref.org/openurl?redirect=false&pid=YOUR@EMAIL>COM&aulast=Wurm&atitle=Behavioral%20Genomics:%20A,%20Bee,%20C,%20G,%20T
14
+ email: your@email.com
15
+
16
+
17
+ # Show "[Cited Nx]" if N > the following number
18
+ minCitations: 5
19
+
20
+ # Words to italicize (emphasize). These will have "<em>" around them.
21
+ italicize:
22
+ - Solenopsis invicta
23
+ - Acromyrmex echinatior
24
+ - de novo
25
+
26
+ # DOIs of articles for which we should show altmetric.org badges.
27
+ altmetricDOIs:
28
+ - "10.1038/nature11832"
29
+ - "10.1101/gr.121392.111"
30
+ - "10.1073/pnas.1009690108"
31
+ - "10.1073/pnas.1104825108"
32
+
33
+ # Article titles for which we have urls to PDFs
34
+ pdfs:
35
+ "A Y-like social chromosome causes alternative colony organization in fire ants" : "/publications/wangwurm2013socialChromosome.pdf"
36
+ "Duplication and concerted evolution in a master sex determiner under balancing selection" : "/publications/procb2013.pdf"
37
+ "Comparative genomics of chemosensory protein genes reveals rapid evolution and positive selection in ant-specific duplicates" : "/publications/hdy2012122a.pdf"
38
+ "The Molecular Clockwork of the Fire Ant Solenopsis invicta" : "/publications/ingram2012-fireAntClockGenes.pdf"
39
+ "Epigenetics: The Making of Ant Castes" : "/publications/2012CurrBiolAntepigenetics.pdf"
40
+ "Visualization and quality assessment of de novo genome assemblies" : "/publications/Bioinformatics-2011-Riba-Grognuz-3425-6"
41
+ "The genomic impact of 100 million years of social evolution in seven ant species" : "/publications/TiG2011.pdf"
42
+ "Relaxed selection is a precursor to the evolution of phenotypic plasticity" : "/publications/hunt2011phenotypicPlasticity.pdf"
43
+ "The genome of the leaf-cutting ant Acromyrmex echinatior suggests key adaptations to advanced social life and fungus farming" : "/publications/nygaard2011-acromyrmex-genome.pdf"
44
+ "Behind the Scenes of an Ant Genome Project" : "/publications/wurm2011antGenomeBehindTheScenes.pdf"
45
+ "The genome of the fire ant Solenopsis invicta" : "/publications/wurm2011fireAntGenome.pdf"
46
+ "Odorant Binding Proteins of the Red Imported Fire Ant, Solenopsis invicta: An Example of the Problems Facing the Analysis of Widely Divergent Proteins" : "/publications/gotzek2011obps.pdf"
47
+ "Parasitoid Wasps: From Natural History to Genomic Studies" : "/publications/wurm2010wasps.pdf"
48
+ "Changes in reproductive roles are associated with changes in gene expression in fire ant queens" : "/publications/wurm2010fireAntQueenDealationExpression.pdf"
49
+ "Fourmidable: a database for ant genomics" : "/publications/wurm2009antDatabase.pdf"
50
+ "Behavioral Genomics: A, Bee, C, G, T" : "/publications/wurm2007bees.pdf"
51
+ "An annotated cDNA library and microarray for large-scale gene-expression studies in the ant Solenopsis invicta" : "/publications/wang2007fireAntMicroarrays.pdf"
@@ -0,0 +1,138 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'rubyscholar-version'
4
+
5
+
6
+ class String
7
+ def clean
8
+ # removes leading and trailing whitespace, commas
9
+ self.gsub!(/(^[\s,]+)|([\s,]+$)/, '')
10
+ return self
11
+ end
12
+ end
13
+
14
+ module Rubyscholar
15
+ class Paper < Struct.new(:title, :url, :authors, :journalName, :journalDetails, :year, :citationCount, :citingPapers, :doi)
16
+ end
17
+
18
+ class Parser
19
+ attr_accessor :parsedPapers, :crossRefEmail
20
+
21
+ def initialize(url, crossRefEmail = "")
22
+ @parsedPapers = []
23
+ @crossRefEmail = crossRefEmail # if nil doesn't return any DOI
24
+ parse(url)
25
+ end
26
+
27
+ def parse(url)
28
+ STDERR << "Will check #{url}.\n"
29
+ page = Nokogiri::HTML(open(url,
30
+ 'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2'), nil, 'utf-8')
31
+ papers = page.css(".gsc_a_tr")
32
+ STDERR << "Found #{papers.length} papers.\n"
33
+ papers.each do |paper|
34
+ title = paper.css(".gsc_a_at").text rescue ''
35
+ title.gsub!(/\.$/, '')
36
+
37
+ googleUrl = paper.children[0].children[0].attribute('href').text rescue ''
38
+ authors = paper.children[0].children[1].text.clean rescue ''
39
+ authors.gsub!("...", "et al")
40
+
41
+ journal = paper.children[0].children[2].text rescue ''
42
+ journalName = journal.split(/,|\d/).first.clean rescue ''
43
+ journalDetails = journal.gsub(journalName, '').clean
44
+ year = journalDetails.match(/, \d+$/)[0]
45
+ journalDetails = journalDetails.gsub(year, '').clean
46
+ year = year.clean
47
+
48
+ #citations
49
+ citeInfo = paper.css('.gsc_a_ac')
50
+ citationCount = citeInfo.text
51
+ citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
52
+
53
+ # get DOI: needs last name of first author, no funny chars
54
+ lastNameFirstAuthor = ((authors.split(',').first ).split(' ').last ).gsub(/[^A-Za-z\-]/, '')
55
+ doi = getDoi( lastNameFirstAuthor, title, @crossRefEmail)
56
+
57
+ @parsedPapers.push(Paper.new( title, googleUrl, authors, journalName, journalDetails, year, citationCount, citationUrl, doi))
58
+ end
59
+ STDERR << "Scraped #{parsedPapers.length} from Google Scholar.\n"
60
+ end
61
+
62
+ # Scholar doesn't provide DOI.
63
+ # But if registered at crossref (its free), DOI can be retreived.
64
+ def getDoi(lastNameFirstAuthor, title, crossRefEmail)
65
+ return '' if @crossRefEmail.nil?
66
+ sleep(1) # to reduce risk
67
+ STDERR << "Getting DOI for paper by #{lastNameFirstAuthor}: #{title}.\n"
68
+ url = 'http://www.crossref.org/openurl?redirect=false' +
69
+ '&pid=' + crossRefEmail +
70
+ '&aulast=' + lastNameFirstAuthor +
71
+ '&atitle=' + URI.escape(title)
72
+ crossRefXML = Nokogiri::XML(open(url))
73
+ crossRefXML.search("doi").children.first.content rescue ''
74
+ end
75
+ end
76
+
77
+ class Formatter
78
+ attr_accessor :parser, :nameToHighlight, :pdfLinks, :altmetricDOIs
79
+
80
+ def initialize(parser, nameToHighlight = nil, pdfLinks = {}, altmetricDOIs = [], minCitationCount = 1)
81
+ @parser = parser
82
+ @nameToHighlight = nameToHighlight
83
+ @pdfLinks = pdfLinks
84
+ @altmetricDOIs = altmetricDOIs
85
+ @minCitations = minCitationCount
86
+ end
87
+
88
+ def to_html
89
+ builder = Nokogiri::HTML::Builder.new do |doc|
90
+ doc.div( :class => "publication") {
91
+ doc.ol {
92
+ @parser.parsedPapers.each_with_index do |paper, index|
93
+ doc.li( :value=> ( (@parser.parsedPapers).length - index).to_s) {
94
+ doc.b paper[:title] + '.'
95
+ doc.text ' (' + paper[:year] + '). '
96
+ if paper[:authors].include?(@nameToHighlight)
97
+ doc.text( paper[:authors].sub(Regexp.new(@nameToHighlight + '.*'), '') )
98
+ doc.span( :class => "label") { doc.text @nameToHighlight }
99
+ doc.text( paper[:authors].sub(Regexp.new('.*' + @nameToHighlight), '') )
100
+ else
101
+ doc.text( paper[:authors]) + '.'
102
+ end
103
+
104
+ doc.em ' ' + paper[:journalName]
105
+ doc.text ' ' + paper[:journalDetails]
106
+ unless paper[ :doi].empty?
107
+ doc.text(' ')
108
+ doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
109
+ doc.text "[DOI]"
110
+ }
111
+ end
112
+ if @pdfLinks.keys.include?(paper[:title])
113
+ doc.text(' ')
114
+ doc.a( :href => @pdfLinks[paper[:title]]) {
115
+ doc.text "[PDF]"
116
+ }
117
+ end
118
+ if paper[ :citationCount].to_i > @minCitations
119
+ doc.text(' ')
120
+ doc.a( :href => paper[ :citingPapers], :title => "Citations") {
121
+ doc.span( :class => "badge badge-inverse") { doc.test("#{paper[ :citationCount]}x") }
122
+ }
123
+ end
124
+ if altmetricDOIs.include?( paper[ :doi])
125
+ doc.text(' ')
126
+ doc.span( :class => 'altmetric-embed',
127
+ :'data-badge-popover' => 'bottom',
128
+ :'data-doi' => paper[ :doi] )
129
+ end
130
+ }
131
+ end
132
+ }
133
+ }
134
+ end
135
+ return builder.to_html
136
+ end
137
+ end
138
+ end
@@ -1,3 +1,3 @@
1
1
  module Rubyscholar
2
- VERSION = "0.0.5"
2
+ VERSION = "0.2"
3
3
  end
@@ -0,0 +1,3 @@
1
+ module Rubyscholar
2
+ VERSION = "0.0.1"
3
+ end
@@ -1,32 +1,31 @@
1
- require "rubyscholar/version"
2
1
  require "nokogiri"
3
2
  require "open-uri"
4
3
 
5
-
4
+ module Rubyscholar
6
5
  class String
7
6
  def clean
8
7
  # removes leading and trailing whitespace, commas
9
8
  self.gsub!(/(^[\s,]+)|([\s,]+$)/, '')
10
9
  return self
11
10
  end
12
- end
11
+ end
13
12
 
14
- module Rubyscholar
15
- class Paper < Struct.new(:title, :url, :authors, :journalName, :journalDetails, :year, :citationCount, :citingPapers, :doi)
16
- end
17
13
 
14
+ class Paper < Struct.new(:title, :url, :authors, :journalName, :journalDetails, :year, :citationCount, :citingPapers, :doi)
15
+ end
16
+
18
17
  class Parser
19
18
  attr_accessor :parsedPapers, :crossRefEmail
20
-
19
+
21
20
  def initialize(url, crossRefEmail = "")
22
21
  @parsedPapers = []
23
- @crossRefEmail = crossRefEmail # if nil doesn't retursn any DOI
22
+ @crossRefEmail = crossRefEmail # if nil doesn't return any DOI
24
23
  parse(url)
25
24
  end
26
25
 
27
26
  def parse(url)
28
27
  papers = Nokogiri::HTML(open(url)).css(".cit-table .item")
29
- STDOUT << "Found #{papers.length} papers.\n"
28
+ STDERR << "Found #{papers.length} papers.\n"
30
29
  papers.each do |paper|
31
30
  paperDetails = paper.css("#col-title")
32
31
  title = paperDetails[0].children[0].content.clean
@@ -43,7 +42,7 @@ module Rubyscholar
43
42
  #citations
44
43
  citeInfo = paper.css(".cit-dark-link")
45
44
  citationCount = citeInfo.text
46
- citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
45
+ citationUrl = citationCount.empty? ? nil : citeInfo.attribute('href').to_s
47
46
 
48
47
  # get DOI: needs last name of first author, no funny chars
49
48
  lastNameFirstAuthor = ((authors.split(',').first ).split(' ').last ).gsub(/[^A-Za-z\-]/, '')
@@ -51,27 +50,27 @@ module Rubyscholar
51
50
 
52
51
  @parsedPapers.push(Paper.new( title, googleUrl, authors, journalName, journalDetails, year, citationCount, citationUrl, doi))
53
52
  end
54
- STDOUT << "Scraped #{parsedPapers.length} from Google Scholar.\n"
53
+ STDERR << "Scraped #{parsedPapers.length} from Google Scholar.\n"
55
54
  end
56
55
 
57
- # Scholar doesn't provide DOI.
58
- # But if registered at crossref (its free), DOI can be retreived.
56
+ # Scholar doesn't provide DOI.
57
+ # But if registered at crossref (its free), DOI can be retreived.
59
58
  def getDoi(lastNameFirstAuthor, title, crossRefEmail)
60
59
  return '' if @crossRefEmail.nil?
61
- sleep(1) # to reduce risk
60
+ sleep(1) # to reduce risk
62
61
  STDERR << "Getting DOI for paper by #{lastNameFirstAuthor}: #{title}.\n"
63
- url = 'http://www.crossref.org/openurl?redirect=false' +
64
- '&pid=' + crossRefEmail +
62
+ url = 'http://www.crossref.org/openurl?redirect=false' +
63
+ '&pid=' + crossRefEmail +
65
64
  '&aulast=' + lastNameFirstAuthor +
66
65
  '&atitle=' + URI.escape(title)
67
- crossRefXML = Nokogiri::XML(open(url))
66
+ crossRefXML = Nokogiri::XML(open(url))
68
67
  crossRefXML.search("doi").children.first.content rescue ''
69
68
  end
70
69
  end
71
-
70
+
72
71
  class Formatter
73
72
  attr_accessor :parser, :nameToHighlight, :pdfLinks, :altmetricDOIs
74
-
73
+
75
74
  def initialize(parser, nameToHighlight = nil, pdfLinks = {}, altmetricDOIs = [], minCitationCount = 1)
76
75
  @parser = parser
77
76
  @nameToHighlight = nameToHighlight
@@ -81,14 +80,14 @@ module Rubyscholar
81
80
  end
82
81
 
83
82
  def to_html
84
- ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
83
+ ##@doc = Nokogiri::HTML::DocumentFragment.parse ""
85
84
  builder = Nokogiri::HTML::Builder.new do |doc|
86
85
  doc.html {
87
86
  doc.body {
88
87
  @parser.parsedPapers.each_with_index { |paper, index|
89
88
  doc.div( :class => "publication") {
90
89
  doc.p {
91
- doc.text ((@parser.parsedPapers).length - index).to_s + '. '
90
+ doc.text ((@parser.parsedPapers).length - index).to_s + '. '
92
91
 
93
92
  doc.b paper[:title] + '.'
94
93
  doc.text ' (' + paper[:year] + '). '
@@ -107,21 +106,21 @@ module Rubyscholar
107
106
  doc.text paper[:journalDetails]
108
107
  unless paper[ :doi].empty?
109
108
  doc.text(' ')
110
- doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
111
- doc.text "[DOI]"
112
- }
109
+ doc.a( :href => URI.join("http://dx.doi.org/", paper[ :doi])) {
110
+ doc.text "[DOI]"
111
+ }
113
112
  end
114
113
  if @pdfLinks.keys.include?(paper[:title])
115
114
  doc.text(' ')
116
- doc.a( :href => @pdfLinks[paper[:title]]) {
115
+ doc.a( :href => @pdfLinks[paper[:title]]) {
117
116
  doc.text "[PDF]"
118
- }
117
+ }
119
118
  end
120
119
  if paper[ :citationCount].to_i > @minCitations
121
120
  doc.text(' ')
122
- doc.a( :href => paper[ :citingPapers]) {
123
- doc.text("[Cited #{paper[ :citationCount]}x]")
124
- }
121
+ doc.a( :href => paper[ :citingPapers]) {
122
+ doc.text("[Cited #{paper[ :citationCount]}x]")
123
+ }
125
124
  end
126
125
  if altmetricDOIs.include?( paper[ :doi])
127
126
  doc.text(' ')
data/rubyscholar.gemspec CHANGED
@@ -1,23 +1,40 @@
1
- # -*- encoding: utf-8 -*-
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'rubyscholar/version'
1
+ require './lib/rubyscholar-version.rb'
5
2
 
6
3
  Gem::Specification.new do |gem|
7
- gem.name = "rubyscholar"
4
+ gem.name = 'rubyscholar'
8
5
  gem.version = Rubyscholar::VERSION
9
- gem.authors = ["Yannick Wurm","Gaurav Koley"]
10
- gem.email = ["y.wurm@qmul.ac.uk","arkokoley@live.in"]
11
- gem.description = %q{Scrape Google Scholar}
12
- gem.summary = %q{Rubyscholar scrapes google scholar and formats it into a scholar.html file.}
13
- gem.homepage = "http://github.com/yannickwurm/rubyscholar"
14
- gem.license = "MIT"
15
-
16
- gem.add_dependency "nokogiri", "~>1.6.0"
17
- gem.add_dependency "commander", "~>4.1.5"
18
-
19
- gem.files = `git ls-files`.split($/)
20
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
21
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
22
- gem.require_paths = ["lib"]
6
+ gem.authors = ['Yannick Wurm','Gaurav Koley']
7
+ gem.email = ['y.wurm@qmul.ac.uk','arkokoley@live.in']
8
+ gem.description = %q{Scrape Google Scholar Profile page}
9
+ gem.summary = %q{Rubyscholar scrapes one google scholar and formats it into a scholar.html file listing publications.}
10
+ gem.homepage = 'http://yannick.poulet.org/'
11
+ gem.license = 'MIT'
12
+
13
+
14
+ gem.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.0'
15
+ gem.add_runtime_dependency 'commander', '~> 4.1', '>= 4.1.5'
16
+
17
+ gem.files = Dir['lib/**/*'] + Dir['views/**/*'] + Dir['public/**/*'] + Dir['tests/**/*']
18
+ gem.files = gem.files + ['example.config.yml']
19
+ gem.files = gem.files + ['LICENSE.txt', 'README.md']
20
+ gem.files = gem.files + ['Gemfile', 'rubyscholar.gemspec']
21
+
22
+ gem.executables = ['rubyscholar']
23
+ gem.require_paths = ['lib']
24
+
25
+ gem.post_install_message = <<INFO
26
+
27
+ -----
28
+ Thanks for installing rubyscholar.
29
+ If something isn't working, this may be due to Google's changing the format of Scholar pages
30
+ (they do this regularly with no warning).
31
+
32
+ If you can have a shot at fixing it, please go ahead - pull requests are most welcome.
33
+
34
+ All the best,
35
+
36
+ Yannick - http://yannick.poulet.org
37
+ ----
38
+
39
+ INFO
23
40
  end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyscholar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
5
- prerelease:
4
+ version: '0.2'
6
5
  platform: ruby
7
6
  authors:
8
7
  - Yannick Wurm
@@ -10,84 +9,93 @@ authors:
10
9
  autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
- date: 2013-10-12 00:00:00.000000000 Z
12
+ date: 2014-09-20 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
15
  name: nokogiri
17
16
  requirement: !ruby/object:Gem::Requirement
18
- none: false
19
17
  requirements:
20
- - - ~>
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.6'
21
+ - - ">="
21
22
  - !ruby/object:Gem::Version
22
23
  version: 1.6.0
23
24
  type: :runtime
24
25
  prerelease: false
25
26
  version_requirements: !ruby/object:Gem::Requirement
26
- none: false
27
27
  requirements:
28
- - - ~>
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '1.6'
31
+ - - ">="
29
32
  - !ruby/object:Gem::Version
30
33
  version: 1.6.0
31
34
  - !ruby/object:Gem::Dependency
32
35
  name: commander
33
36
  requirement: !ruby/object:Gem::Requirement
34
- none: false
35
37
  requirements:
36
- - - ~>
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '4.1'
41
+ - - ">="
37
42
  - !ruby/object:Gem::Version
38
43
  version: 4.1.5
39
44
  type: :runtime
40
45
  prerelease: false
41
46
  version_requirements: !ruby/object:Gem::Requirement
42
- none: false
43
47
  requirements:
44
- - - ~>
48
+ - - "~>"
49
+ - !ruby/object:Gem::Version
50
+ version: '4.1'
51
+ - - ">="
45
52
  - !ruby/object:Gem::Version
46
53
  version: 4.1.5
47
- description: Scrape Google Scholar
54
+ description: Scrape Google Scholar Profile page
48
55
  email:
49
56
  - y.wurm@qmul.ac.uk
50
57
  - arkokoley@live.in
51
58
  executables:
52
59
  - rubyscholar
53
- - scrape.rb
54
60
  extensions: []
55
61
  extra_rdoc_files: []
56
62
  files:
57
- - .gitignore
58
- - CHANGELOG.md
59
63
  - Gemfile
60
64
  - LICENSE.txt
61
65
  - README.md
62
- - Rakefile
63
66
  - bin/rubyscholar
64
- - bin/scrape.rb
65
- - lib/rubyscholar.rb
66
- - lib/rubyscholar/version.rb
67
+ - example.config.yml
68
+ - lib/rubyscholar-main.rb
69
+ - lib/rubyscholar-version.rb
70
+ - lib/rubyscholar-version.rb~
71
+ - lib/rubyscholar.rb~
67
72
  - rubyscholar.gemspec
68
- homepage: http://github.com/yannickwurm/rubyscholar
73
+ homepage: http://yannick.poulet.org/
69
74
  licenses:
70
75
  - MIT
71
- post_install_message:
76
+ metadata: {}
77
+ post_install_message: "\n-----\nThanks for installing rubyscholar. \nIf something
78
+ isn't working, this may be due to Google's changing the format of Scholar pages\n(they
79
+ do this regularly with no warning). \n\nIf you can have a shot at fixing it, please
80
+ go ahead - pull requests are most welcome.\n\nAll the best, \n\nYannick - http://yannick.poulet.org\n----\n\n"
72
81
  rdoc_options: []
73
82
  require_paths:
74
83
  - lib
75
84
  required_ruby_version: !ruby/object:Gem::Requirement
76
- none: false
77
85
  requirements:
78
- - - ! '>='
86
+ - - ">="
79
87
  - !ruby/object:Gem::Version
80
88
  version: '0'
81
89
  required_rubygems_version: !ruby/object:Gem::Requirement
82
- none: false
83
90
  requirements:
84
- - - ! '>='
91
+ - - ">="
85
92
  - !ruby/object:Gem::Version
86
93
  version: '0'
87
94
  requirements: []
88
95
  rubyforge_project:
89
- rubygems_version: 1.8.23
96
+ rubygems_version: 2.2.2
90
97
  signing_key:
91
- specification_version: 3
92
- summary: Rubyscholar scrapes google scholar and formats it into a scholar.html file.
98
+ specification_version: 4
99
+ summary: Rubyscholar scrapes one google scholar and formats it into a scholar.html
100
+ file listing publications.
93
101
  test_files: []
data/.gitignore DELETED
@@ -1,18 +0,0 @@
1
- *.gem
2
- *.rbc
3
- .bundle
4
- .config
5
- coverage
6
- InstalledFiles
7
- lib/bundler/man
8
- pkg
9
- rdoc
10
- spec/reports
11
- test/tmp
12
- test/version_tmp
13
- tmp
14
-
15
- # YARD artifacts
16
- .yardoc
17
- _yardoc
18
- doc/
data/CHANGELOG.md DELETED
@@ -1,6 +0,0 @@
1
- ## Changelog
2
-
3
- version 0.0.4
4
-
5
- * Support for custom config files and output file in commands.
6
- * Command for creating a config.yml file using `rubyscholar init`.
data/Rakefile DELETED
@@ -1 +0,0 @@
1
- require "bundler/gem_tasks"
data/bin/scrape.rb DELETED
@@ -1,20 +0,0 @@
1
- require_relative '../lib/rubyscholar'
2
- require 'yaml'
3
-
4
- config = YAML.load_file('config.yml')
5
- parsed = Rubyscholar::Parser.new(config["url"],
6
- config["email"])
7
- formatter = Rubyscholar::Formatter.new(parsed,
8
- config["highlight"],
9
- config["pdfs"],
10
- config["altmetricDOIs"],
11
- config["minCitations"].to_i)
12
-
13
- html = formatter.to_html
14
- config["italicize"].each do |term|
15
- html.gsub!( term , '<em>' + term + '</em>')
16
- end
17
-
18
- f= File.open('scholar.html','w')
19
- f.write html
20
- f.close