gentle-scholar 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/publication.rb +78 -0
  3. metadata +100 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a376293e4fa94ea7d9e02be8b1c481a82d662384
4
+ data.tar.gz: 1c0739ae9eb14ed3e15fcb45f86d47b0d85330a4
5
+ SHA512:
6
+ metadata.gz: 05a400e3f02cec668146bf5ae377631478586883f779a4026a2ed61a6bb18008875a402279c0fa9eaf02d78b77a9817d7b6d6aea3581125c98284f33deb778b2
7
+ data.tar.gz: 6773cb9144c86bee17eff3b3e48b23bf8a4f6bf0da11cd6907e836b49cb807c04db4a1d41b4723c47005e620bc035d4e22328034eb79d728f609edd9e59f7f7c
@@ -0,0 +1,78 @@
1
+ # Author:: Soumya Ray (mailto: soumya.ray@gmail.com)
2
+ # License:: MIT
3
+
4
+ require 'typhoeus'
5
+ require 'nokogiri'
6
+ require 'date'
7
+
8
+ # This class loads a single publication from Google scholar and returns
9
+ # all its attributes, including dynamic attributes like number of citations
10
+ class GScholarPub
11
+ GSCHOLAR_HOST_URL = 'http://scholar.google.com'
12
+ GSCHOLAR_CIT_URL =
13
+ 'http://scholar.google.com/citations?view_op=view_citation&hl=en'
14
+
15
+ attr_reader :title, :cites, :cites_url, :chart_url, :article_url
16
+ attr_reader :authors, :date, :journal, :volume, :issue, :pages, :publisher
17
+ attr_reader :description, :gscholar_url
18
+ # TODO: @doc only for development, testing modes
19
+ attr_reader :doc
20
+
21
+ SCAN_STR = {
22
+ gscholar_url:
23
+ "//div[contains(@class,'g-section cit-dgb')]/div/table/tr/td/a",
24
+ cites: "//div[contains(@id,'scholar_sec')]/div/a",
25
+ title: '//div[@id="title"]/a',
26
+ article_url: '//div[@id="title"]/a',
27
+ chart_url: '//div[contains(@class,"cit-dd")]/img'
28
+ }
29
+
30
+ TABLE_ATTR = {
31
+ authors: 'Authors',
32
+ date: 'Publication date',
33
+ journal: 'Journal name',
34
+ volume: 'Volume',
35
+ issue: 'Issue',
36
+ pages: 'Pages',
37
+ publisher: 'Publisher',
38
+ description: 'Description'
39
+ }
40
+
41
+ def initialize(scholar_pub_id)
42
+ auth_id, pub_id = scholar_pub_id.split(/:/)
43
+ url = GSCHOLAR_CIT_URL + '&user=' + auth_id \
44
+ + '&citation_for_view=' + auth_id + ':' + pub_id
45
+ res = Typhoeus::Request.new(url).run
46
+ @doc = Nokogiri::HTML(res.response_body)
47
+
48
+ extract_html_elements
49
+ extract_html_table
50
+ end
51
+
52
+ def extract_html_elements
53
+ @cites = @doc.xpath(SCAN_STR[:cites]).text[/\d+/].to_i
54
+ @cites_url = @doc.xpath(SCAN_STR[:cites])[0].attributes['href'].value
55
+
56
+ @title = @doc.xpath(SCAN_STR[:title]).text
57
+ @article_url = @doc.xpath(SCAN_STR[:article_url]).attr('href').value
58
+
59
+ @chart_url = @doc.xpath(SCAN_STR[:chart_url]).attr('src').value
60
+
61
+ @gscholar_url = GSCHOLAR_HOST_URL + @doc.xpath(
62
+ SCAN_STR[:gscholar_url]).attr('href').value
63
+ end
64
+
65
+ def extract_html_table
66
+ # lambda gets text from right html column given name in left column
67
+ table_pick = lambda do |name|
68
+ @doc.xpath("//div[starts-with(.,'#{name}')]")[0].children[1].text
69
+ end
70
+
71
+ TABLE_ATTR.each do |k, v|
72
+ instance_variable_set("@#{k}", table_pick.call(v))
73
+ end
74
+
75
+ @authors = @authors.split(/,/).map { |a| a.split(' ') }
76
+ @date = Date.strptime(@date, '%Y/%m/%d')
77
+ end
78
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gentle-scholar
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Soumya Ray
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest-rg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 1.6.2
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.6.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: typhoeus
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: 0.6.8
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 0.6.8
69
+ description: Extract author/paper info from Google Scholar
70
+ email: soumya.ray@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - lib/publication.rb
76
+ homepage: https://github.com/soumyaray/gentle-scholar
77
+ licenses:
78
+ - MIT
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.1.11
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: Google Scholar infor extractor
100
+ test_files: []