gentle-scholar 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/publication.rb +78 -0
  3. metadata +100 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a376293e4fa94ea7d9e02be8b1c481a82d662384
4
+ data.tar.gz: 1c0739ae9eb14ed3e15fcb45f86d47b0d85330a4
5
+ SHA512:
6
+ metadata.gz: 05a400e3f02cec668146bf5ae377631478586883f779a4026a2ed61a6bb18008875a402279c0fa9eaf02d78b77a9817d7b6d6aea3581125c98284f33deb778b2
7
+ data.tar.gz: 6773cb9144c86bee17eff3b3e48b23bf8a4f6bf0da11cd6907e836b49cb807c04db4a1d41b4723c47005e620bc035d4e22328034eb79d728f609edd9e59f7f7c
@@ -0,0 +1,78 @@
1
+ # Author:: Soumya Ray (mailto: soumya.ray@gmail.com)
2
+ # License:: MIT
3
+
4
+ require 'typhoeus'
5
+ require 'nokogiri'
6
+ require 'date'
7
+
8
+ # This class loads a single publication from Google scholar and returns
9
+ # all its attributes, including dynamic attributes like number of citations
10
+ class GScholarPub
11
+ GSCHOLAR_HOST_URL = 'http://scholar.google.com'
12
+ GSCHOLAR_CIT_URL =
13
+ 'http://scholar.google.com/citations?view_op=view_citation&hl=en'
14
+
15
+ attr_reader :title, :cites, :cites_url, :chart_url, :article_url
16
+ attr_reader :authors, :date, :journal, :volume, :issue, :pages, :publisher
17
+ attr_reader :description, :gscholar_url
18
+ # TODO: @doc only for development, testing modes
19
+ attr_reader :doc
20
+
21
+ SCAN_STR = {
22
+ gscholar_url:
23
+ "//div[contains(@class,'g-section cit-dgb')]/div/table/tr/td/a",
24
+ cites: "//div[contains(@id,'scholar_sec')]/div/a",
25
+ title: '//div[@id="title"]/a',
26
+ article_url: '//div[@id="title"]/a',
27
+ chart_url: '//div[contains(@class,"cit-dd")]/img'
28
+ }
29
+
30
+ TABLE_ATTR = {
31
+ authors: 'Authors',
32
+ date: 'Publication date',
33
+ journal: 'Journal name',
34
+ volume: 'Volume',
35
+ issue: 'Issue',
36
+ pages: 'Pages',
37
+ publisher: 'Publisher',
38
+ description: 'Description'
39
+ }
40
+
41
+ def initialize(scholar_pub_id)
42
+ auth_id, pub_id = scholar_pub_id.split(/:/)
43
+ url = GSCHOLAR_CIT_URL + '&user=' + auth_id \
44
+ + '&citation_for_view=' + auth_id + ':' + pub_id
45
+ res = Typhoeus::Request.new(url).run
46
+ @doc = Nokogiri::HTML(res.response_body)
47
+
48
+ extract_html_elements
49
+ extract_html_table
50
+ end
51
+
52
+ def extract_html_elements
53
+ @cites = @doc.xpath(SCAN_STR[:cites]).text[/\d+/].to_i
54
+ @cites_url = @doc.xpath(SCAN_STR[:cites])[0].attributes['href'].value
55
+
56
+ @title = @doc.xpath(SCAN_STR[:title]).text
57
+ @article_url = @doc.xpath(SCAN_STR[:article_url]).attr('href').value
58
+
59
+ @chart_url = @doc.xpath(SCAN_STR[:chart_url]).attr('src').value
60
+
61
+ @gscholar_url = GSCHOLAR_HOST_URL + @doc.xpath(
62
+ SCAN_STR[:gscholar_url]).attr('href').value
63
+ end
64
+
65
+ def extract_html_table
66
+ # lambda gets text from right html column given name in left column
67
+ table_pick = lambda do |name|
68
+ @doc.xpath("//div[starts-with(.,'#{name}')]")[0].children[1].text
69
+ end
70
+
71
+ TABLE_ATTR.each do |k, v|
72
+ instance_variable_set("@#{k}", table_pick.call(v))
73
+ end
74
+
75
+ @authors = @authors.split(/,/).map { |a| a.split(' ') }
76
+ @date = Date.strptime(@date, '%Y/%m/%d')
77
+ end
78
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gentle-scholar
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Soumya Ray
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest-rg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 1.6.2
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.6.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: typhoeus
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: 0.6.8
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 0.6.8
69
+ description: Extract author/paper info from Google Scholar
70
+ email: soumya.ray@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - lib/publication.rb
76
+ homepage: https://github.com/soumyaray/gentle-scholar
77
+ licenses:
78
+ - MIT
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.1.11
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: Google Scholar infor extractor
100
+ test_files: []