gentle-scholar 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/publication.rb +78 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a376293e4fa94ea7d9e02be8b1c481a82d662384
|
4
|
+
data.tar.gz: 1c0739ae9eb14ed3e15fcb45f86d47b0d85330a4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 05a400e3f02cec668146bf5ae377631478586883f779a4026a2ed61a6bb18008875a402279c0fa9eaf02d78b77a9817d7b6d6aea3581125c98284f33deb778b2
|
7
|
+
data.tar.gz: 6773cb9144c86bee17eff3b3e48b23bf8a4f6bf0da11cd6907e836b49cb807c04db4a1d41b4723c47005e620bc035d4e22328034eb79d728f609edd9e59f7f7c
|
data/lib/publication.rb
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
# Author:: Soumya Ray (mailto: soumya.ray@gmail.com)
|
2
|
+
# License:: MIT
|
3
|
+
|
4
|
+
require 'typhoeus'
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'date'
|
7
|
+
|
8
|
+
# This class loads a single publication from Google scholar and returns
|
9
|
+
# all its attributes, including dynamic attributes like number of citations
|
10
|
+
class GScholarPub
|
11
|
+
GSCHOLAR_HOST_URL = 'http://scholar.google.com'
|
12
|
+
GSCHOLAR_CIT_URL =
|
13
|
+
'http://scholar.google.com/citations?view_op=view_citation&hl=en'
|
14
|
+
|
15
|
+
attr_reader :title, :cites, :cites_url, :chart_url, :article_url
|
16
|
+
attr_reader :authors, :date, :journal, :volume, :issue, :pages, :publisher
|
17
|
+
attr_reader :description, :gscholar_url
|
18
|
+
# TODO: @doc only for development, testing modes
|
19
|
+
attr_reader :doc
|
20
|
+
|
21
|
+
SCAN_STR = {
|
22
|
+
gscholar_url:
|
23
|
+
"//div[contains(@class,'g-section cit-dgb')]/div/table/tr/td/a",
|
24
|
+
cites: "//div[contains(@id,'scholar_sec')]/div/a",
|
25
|
+
title: '//div[@id="title"]/a',
|
26
|
+
article_url: '//div[@id="title"]/a',
|
27
|
+
chart_url: '//div[contains(@class,"cit-dd")]/img'
|
28
|
+
}
|
29
|
+
|
30
|
+
TABLE_ATTR = {
|
31
|
+
authors: 'Authors',
|
32
|
+
date: 'Publication date',
|
33
|
+
journal: 'Journal name',
|
34
|
+
volume: 'Volume',
|
35
|
+
issue: 'Issue',
|
36
|
+
pages: 'Pages',
|
37
|
+
publisher: 'Publisher',
|
38
|
+
description: 'Description'
|
39
|
+
}
|
40
|
+
|
41
|
+
def initialize(scholar_pub_id)
|
42
|
+
auth_id, pub_id = scholar_pub_id.split(/:/)
|
43
|
+
url = GSCHOLAR_CIT_URL + '&user=' + auth_id \
|
44
|
+
+ '&citation_for_view=' + auth_id + ':' + pub_id
|
45
|
+
res = Typhoeus::Request.new(url).run
|
46
|
+
@doc = Nokogiri::HTML(res.response_body)
|
47
|
+
|
48
|
+
extract_html_elements
|
49
|
+
extract_html_table
|
50
|
+
end
|
51
|
+
|
52
|
+
def extract_html_elements
|
53
|
+
@cites = @doc.xpath(SCAN_STR[:cites]).text[/\d+/].to_i
|
54
|
+
@cites_url = @doc.xpath(SCAN_STR[:cites])[0].attributes['href'].value
|
55
|
+
|
56
|
+
@title = @doc.xpath(SCAN_STR[:title]).text
|
57
|
+
@article_url = @doc.xpath(SCAN_STR[:article_url]).attr('href').value
|
58
|
+
|
59
|
+
@chart_url = @doc.xpath(SCAN_STR[:chart_url]).attr('src').value
|
60
|
+
|
61
|
+
@gscholar_url = GSCHOLAR_HOST_URL + @doc.xpath(
|
62
|
+
SCAN_STR[:gscholar_url]).attr('href').value
|
63
|
+
end
|
64
|
+
|
65
|
+
def extract_html_table
|
66
|
+
# lambda gets text from right html column given name in left column
|
67
|
+
table_pick = lambda do |name|
|
68
|
+
@doc.xpath("//div[starts-with(.,'#{name}')]")[0].children[1].text
|
69
|
+
end
|
70
|
+
|
71
|
+
TABLE_ATTR.each do |k, v|
|
72
|
+
instance_variable_set("@#{k}", table_pick.call(v))
|
73
|
+
end
|
74
|
+
|
75
|
+
@authors = @authors.split(/,/).map { |a| a.split(' ') }
|
76
|
+
@date = Date.strptime(@date, '%Y/%m/%d')
|
77
|
+
end
|
78
|
+
end
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gentle-scholar
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Soumya Ray
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: minitest
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest-rg
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.6.2
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.6.2
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: typhoeus
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.6.8
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.6.8
|
69
|
+
description: Extract author/paper info from Google Scholar
|
70
|
+
email: soumya.ray@gmail.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- lib/publication.rb
|
76
|
+
homepage: https://github.com/soumyaray/gentle-scholar
|
77
|
+
licenses:
|
78
|
+
- MIT
|
79
|
+
metadata: {}
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 2.1.11
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: Google Scholar infor extractor
|
100
|
+
test_files: []
|