citesight 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.bundle/config +1 -0
- data/.gitignore +3 -0
- data/.travis.yml +10 -0
- data/Gemfile +7 -0
- data/LICENSE +21 -0
- data/README.md +28 -0
- data/Rakefile +6 -0
- data/bin/citesight +4 -1
- data/citesight.gemspec +23 -0
- data/lib/citesight.rb +1 -55
- data/lib/citesight/paper_citations.rb +57 -0
- data/lib/citesight/version.rb +4 -0
- data/spec/citesight_spec.rb +48 -0
- data/spec/minitest_helper.rb +44 -0
- data/spec/testfiles/large_test.txt +1 -0
- data/spec/testfiles/large_test_results.txt +98 -0
- data/spec/testfiles/no_match.txt +3 -0
- data/spec/testfiles/test.txt +13 -0
- data/spec/testfiles/test_cases.json +54 -0
- metadata +29 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e11117f9c3c2e1eacd42126bc7a97ed7841ee28
|
4
|
+
data.tar.gz: a0dfd14a313ec7ece65bed421eb335fdda9427de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ace1dc22e05ee5eebe3383d075d8965ab503209bbd14dd8d72ca042aff3608c7497404fd1d5acebe5317c35b5e6c973604f527665f7425cbfa26503436da5b3e
|
7
|
+
data.tar.gz: 49ab665d7ea38767d508681707a15c0c6063dad04df974354e352a859311e4bf6b2b00b9c673391e0b5a6498102a777ac76c91a3fe4106ce4ea78f58bc15bba2
|
data/.bundle/config
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--- {}
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Soumya Ray
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#citesight
|
2
|
+
[](http://badge.fury.io/rb/citesight)
|
3
|
+
[](https://travis-ci.org/soumyaray/citesight)
|
4
|
+
|
5
|
+
home: [https://github.com/soumyaray/citesight](https://github.com/soumyaray/citesight)
|
6
|
+
Gem to extract and report on citations in an academic text
|
7
|
+
|
8
|
+
## Usage
|
9
|
+
This gem may be used as a command line utility or called from code
|
10
|
+
|
11
|
+
### CLI:
|
12
|
+
citesight mydoc.txt
|
13
|
+
|
14
|
+
### Code example:
|
15
|
+
require 'citesight'
|
16
|
+
require 'pp'
|
17
|
+
|
18
|
+
contents = File.read("spec/testfiles/large_test.txt", :encoding => "UTF-8")
|
19
|
+
cites = PaperCitations.unique_cites(contents)
|
20
|
+
|
21
|
+
puts "\nTotal unique citations: #{cites.count}"
|
22
|
+
PP.pp(Hash[cites])
|
23
|
+
|
24
|
+
top_cite = cites.sort_by { |_c, count| count}.reverse.first[0]
|
25
|
+
puts "\nYour top citation: #{top_cite}"
|
26
|
+
|
27
|
+
top_cite_indexes = PaperCitations.index_of_cite(contents, top_cite)
|
28
|
+
puts "It was cited at locations: #{top_cite_indexes.join(', ')}"
|
data/Rakefile
ADDED
data/bin/citesight
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
3
4
|
require 'citesight'
|
4
5
|
require 'pp'
|
5
6
|
|
@@ -9,7 +10,9 @@ fail ArgumentError, "Usage: get_citations [filename]\n" if ARGV.count == 0
|
|
9
10
|
|
10
11
|
contents = File.open(ARGV[0], 'r').read
|
11
12
|
|
12
|
-
results = PaperCitations.unique_cites(contents).sort_by
|
13
|
+
results = CiteSight::PaperCitations.unique_cites(contents).sort_by do |c, _|
|
14
|
+
c.downcase
|
15
|
+
end
|
13
16
|
|
14
17
|
puts "Top citations:\n"
|
15
18
|
PP.pp(Hash[results.sort_by { |_cite, count| count }.reverse.take(5)])
|
data/citesight.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require 'citesight/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'citesight'
|
6
|
+
s.version = CiteSight::VERSION
|
7
|
+
s.date = CiteSight::DATE
|
8
|
+
|
9
|
+
s.executables << 'citesight'
|
10
|
+
s.add_development_dependency 'minitest'
|
11
|
+
s.add_development_dependency 'minitest-rg'
|
12
|
+
|
13
|
+
s.summary = 'Citation extractor and analyzer'
|
14
|
+
s.description = 'Extract and analyze citations from APA style text'
|
15
|
+
s.authors = ['Soumya Ray']
|
16
|
+
s.email = 'soumya.ray@gmail.com'
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
|
21
|
+
s.homepage = 'https://github.com/soumyaray/citesight'
|
22
|
+
s.license = 'MIT'
|
23
|
+
end
|
data/lib/citesight.rb
CHANGED
@@ -1,55 +1 @@
|
|
1
|
-
|
2
|
-
# The unique_cites method returns a hash of citations and counts
|
3
|
-
# in the order in which they were encountered.
|
4
|
-
class PaperCitations
|
5
|
-
def self.unique_cites(contents)
|
6
|
-
new(contents).unique_cites
|
7
|
-
end
|
8
|
-
|
9
|
-
def self.index_of_cite(contents, cite)
|
10
|
-
new(contents).index_of_cite(cite)
|
11
|
-
end
|
12
|
-
|
13
|
-
def initialize(contents)
|
14
|
-
@contents = contents
|
15
|
-
end
|
16
|
-
|
17
|
-
def unique_cites
|
18
|
-
clean_cites = @contents.scan(cite_match).map do |c|
|
19
|
-
remove_punctuation(c[0])
|
20
|
-
end
|
21
|
-
|
22
|
-
Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
|
23
|
-
end
|
24
|
-
|
25
|
-
##
|
26
|
-
# Returns array of citation locations (index) in manuscript
|
27
|
-
#
|
28
|
-
# contents = File.read('./spec/testfiles/test.txt')
|
29
|
-
# PaperCitations.index_cite(contents, 'Peters et al. 2007')
|
30
|
-
# # => [219, 500]
|
31
|
-
def index_of_cite(cite)
|
32
|
-
cite_parts = cite.split
|
33
|
-
author_s = cite_parts.take(cite_parts.size-1).join(' ')
|
34
|
-
year_s = cite_parts.last
|
35
|
-
@contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
|
36
|
-
).map { Regexp.last_match.begin(0) }
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
42
|
-
def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
|
43
|
-
def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
|
44
|
-
def possessive() "([\'\u2019]s|[\'\u2019])" end
|
45
|
-
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
46
|
-
def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
|
47
|
-
|
48
|
-
def cite_match
|
49
|
-
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
50
|
-
end
|
51
|
-
|
52
|
-
def remove_punctuation(cite)
|
53
|
-
cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
|
54
|
-
end
|
55
|
-
end
|
1
|
+
require 'citesight/paper_citations.rb'
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# This class extracts and counts APA-style citations in a paper.
|
2
|
+
# The unique_cites method returns a hash of citations and counts
|
3
|
+
# in the order in which they were encountered.
|
4
|
+
module CiteSight
|
5
|
+
class PaperCitations
|
6
|
+
def self.unique_cites(contents)
|
7
|
+
new(contents).unique_cites
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.index_of_cite(contents, cite)
|
11
|
+
new(contents).index_of_cite(cite)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(contents)
|
15
|
+
@contents = contents
|
16
|
+
end
|
17
|
+
|
18
|
+
def unique_cites
|
19
|
+
clean_cites = @contents.scan(cite_match).map do |c|
|
20
|
+
remove_punctuation(c[0])
|
21
|
+
end
|
22
|
+
|
23
|
+
Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Returns array of citation locations (index) in manuscript
|
28
|
+
#
|
29
|
+
# contents = File.read('./spec/testfiles/test.txt')
|
30
|
+
# PaperCitations.index_cite(contents, 'Peters et al. 2007')
|
31
|
+
# # => [219, 500]
|
32
|
+
def index_of_cite(cite)
|
33
|
+
cite_parts = cite.split
|
34
|
+
author_s = cite_parts.take(cite_parts.size-1).join(' ')
|
35
|
+
year_s = cite_parts.last
|
36
|
+
@contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
|
37
|
+
).map { Regexp.last_match.begin(0) }
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
43
|
+
def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
|
44
|
+
def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
|
45
|
+
def possessive() "([\'\u2019]s|[\'\u2019])" end
|
46
|
+
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
47
|
+
def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
|
48
|
+
|
49
|
+
def cite_match
|
50
|
+
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
51
|
+
end
|
52
|
+
|
53
|
+
def remove_punctuation(cite)
|
54
|
+
cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'minitest/rg'
|
3
|
+
require './spec/minitest_helper.rb'
|
4
|
+
|
5
|
+
describe 'Paper', 'A text document' do
|
6
|
+
|
7
|
+
describe 'when there are citations in the text' do
|
8
|
+
before do
|
9
|
+
@results = CiteSight::PaperCitations.unique_cites(TEST_CONTENTS)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should fine the right citations' do
|
13
|
+
@results.must_equal TEST_CITES
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe 'when there are no citations to be found' do
|
18
|
+
it 'should return an empty hash' do
|
19
|
+
no_cites_txt = 'these are not the citations you are looking for'
|
20
|
+
# TODO: check return of empty hash
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'Cases', 'Test different citation cases' do
|
26
|
+
TEST_CASES.keys.each do |k|
|
27
|
+
it "can detect #{k}" do
|
28
|
+
CiteSight::PaperCitations.unique_cites(TEST_CASES[k]["case"]).to_a\
|
29
|
+
.must_equal(TEST_CASES[k]["result"])
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe 'Indexes', 'Accurately find index of different citations' do
|
35
|
+
it "can find the right index for all citations" do
|
36
|
+
TEST_CITES.map do |cite, _count|
|
37
|
+
TEST_INDEXES[cite].must_equal \
|
38
|
+
CiteSight::PaperCitations.index_of_cite(TEST_CONTENTS, cite)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
## Produce hash of all cite indexes:
|
44
|
+
# Hash[
|
45
|
+
# TEST_CITES.map do |cite, count|
|
46
|
+
# [cite, PaperCitations.index_cite(doc, cite)]
|
47
|
+
# end
|
48
|
+
# ]
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'json'
|
2
|
+
require './lib/citesight.rb'
|
3
|
+
|
4
|
+
TEST_CONTENTS = File.read('./spec/testfiles/test.txt')
|
5
|
+
|
6
|
+
TEST_CITES =
|
7
|
+
{
|
8
|
+
"Charlton 2002" => 1,
|
9
|
+
"Griffiths 2000" => 1,
|
10
|
+
"Peters et al. 2007" => 2,
|
11
|
+
"Ma and Agarwal 2007" => 1,
|
12
|
+
"Hur 2007" => 2,
|
13
|
+
"Oreg 1995b" => 1,
|
14
|
+
"Ray 2000" => 2,
|
15
|
+
"Ma et al. 2002" => 3,
|
16
|
+
"Griffins 2000" => 1,
|
17
|
+
"O'Hern 2010" => 1,
|
18
|
+
"McDonald 2003" => 1,
|
19
|
+
"Wa'el et al. 1993" => 1,
|
20
|
+
"van der Aalst 2004" => 1,
|
21
|
+
"De Boor 1980" => 1
|
22
|
+
}
|
23
|
+
|
24
|
+
TEST_INDEXES =
|
25
|
+
{
|
26
|
+
"Charlton 2002"=>[40],
|
27
|
+
"Griffiths 2000"=>[97],
|
28
|
+
"Peters et al. 2007"=>[219, 500],
|
29
|
+
"Ma and Agarwal 2007"=>[174],
|
30
|
+
"Hur 2007"=>[146, 490],
|
31
|
+
"Oreg 1995b"=>[318],
|
32
|
+
"Ray 2000"=>[346, 432],
|
33
|
+
"Ma et al. 2002"=>[272, 377, 407],
|
34
|
+
"Griffins 2000"=>[522],
|
35
|
+
"O'Hern 2010"=>[566],
|
36
|
+
"McDonald 2003"=>[634],
|
37
|
+
"Wa'el et al. 1993"=>[583],
|
38
|
+
"van der Aalst 2004"=>[697],
|
39
|
+
"De Boor 1980"=>[729]
|
40
|
+
}
|
41
|
+
|
42
|
+
TEST_CASES = File.open('./spec/testfiles/test_cases.json', 'r') do |f|
|
43
|
+
JSON.load(f)
|
44
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
Top citations:
|
2
|
+
{"Kim et al. 2005"=>13,
|
3
|
+
"Polites and Karahanna 2012"=>12,
|
4
|
+
"Turel et al. 2011b"=>11,
|
5
|
+
"Limayem et al. 2007"=>7,
|
6
|
+
"Kelley and Berridge 2002"=>5}
|
7
|
+
|
8
|
+
Total unique citations: 90
|
9
|
+
{"Aarts and Dijksterhuis 2000"=>3,
|
10
|
+
"Agarwal and Karahanna 2000"=>2,
|
11
|
+
"Ainslie 2001"=>2,
|
12
|
+
"Bagozzi and Yi 1988"=>2,
|
13
|
+
"Bagozzi et al. 2003"=>5,
|
14
|
+
"Bargh et al. 2001"=>2,
|
15
|
+
"Berke and Hyman 2000"=>1,
|
16
|
+
"Bhattacherjee 2001"=>1,
|
17
|
+
"Browne et al. 1993"=>1,
|
18
|
+
"Caplan 2002"=>3,
|
19
|
+
"Chak and Leung 2004"=>3,
|
20
|
+
"Charlton 2002"=>1,
|
21
|
+
"Charlton and Danforth 2010"=>1,
|
22
|
+
"Chou and Ting 2003"=>1,
|
23
|
+
"Corder and Corder 1974"=>1,
|
24
|
+
"Davis 2001"=>2,
|
25
|
+
"Davis et al. 1989"=>1,
|
26
|
+
"Davis et al. 1992"=>1,
|
27
|
+
"Davis et al. 2002"=>2,
|
28
|
+
"deGuinea and Markus 2009"=>1,
|
29
|
+
"Everitt and Robbins 2005"=>1,
|
30
|
+
"Fornell and Larcker 1981"=>2,
|
31
|
+
"Gollwitzer 1996"=>1,
|
32
|
+
"Gray 1970"=>2,
|
33
|
+
"Gray and McNaughton 2000"=>2,
|
34
|
+
"Griffiths 2000"=>1,
|
35
|
+
"Grodner and Reid 2010"=>1,
|
36
|
+
"Grover et al. 2011"=>1,
|
37
|
+
"Hampton et al. 2011"=>2,
|
38
|
+
"Harrison 1968"=>1,
|
39
|
+
"Harrison and Zajonc 1970"=>1,
|
40
|
+
"Holden 2001"=>2,
|
41
|
+
"Huang et al. 2009"=>1,
|
42
|
+
"Hur 2006"=>2,
|
43
|
+
"Jasperson et al. 2005"=>2,
|
44
|
+
"Jennett et al. 2008"=>3,
|
45
|
+
"Jöreskog and Sörbom 1996"=>1,
|
46
|
+
"Kelley and Berridge 2002"=>5,
|
47
|
+
"Kim 2009"=>2,
|
48
|
+
"Kim and Malhotra 2005"=>2,
|
49
|
+
"Kim and Son 2009"=>1,
|
50
|
+
"Kim et al. 2005"=>13,
|
51
|
+
"Kuss and Griffiths 2011"=>1,
|
52
|
+
"Lankton et al. 2010"=>1,
|
53
|
+
"Leary et al. 2005"=>1,
|
54
|
+
"Limayem and Hirt 2003"=>2,
|
55
|
+
"Limayem et al. 2007"=>7,
|
56
|
+
"Louis and Sutton 1991"=>2,
|
57
|
+
"Lunden 2012"=>2,
|
58
|
+
"Mellor et al. 2008"=>1,
|
59
|
+
"Mittal 1988"=>1,
|
60
|
+
"Moore and Benbasat 1991"=>1,
|
61
|
+
"Morahan-Martin and Schumacher 2000"=>3,
|
62
|
+
"Nicolaou and McKnight 2006"=>1,
|
63
|
+
"Oreg 2003"=>1,
|
64
|
+
"Peters et al. 2007"=>1,
|
65
|
+
"Phillips 2009"=>1,
|
66
|
+
"Podsakoff et al. 2003"=>1,
|
67
|
+
"Polites and Karahanna 2012"=>12,
|
68
|
+
"Robinson and Berridge 1993"=>5,
|
69
|
+
"Robinson and Berridge 2003"=>4,
|
70
|
+
"Rokeach 1960"=>1,
|
71
|
+
"Salaway et al. 2008"=>1,
|
72
|
+
"Seah and Cairns 2008"=>2,
|
73
|
+
"Segars 1997"=>1,
|
74
|
+
"Shotton 1991"=>1,
|
75
|
+
"Smith and Brenner 2012"=>1,
|
76
|
+
"Sutton 1987"=>1,
|
77
|
+
"Thadani and Cheung 2011"=>1,
|
78
|
+
"Tokunaga 2011"=>1,
|
79
|
+
"Turel and Serenko 2012"=>4,
|
80
|
+
"Turel et al. 2011"=>3,
|
81
|
+
"Turel et al. 2011a"=>3,
|
82
|
+
"Turel et al. 2011b"=>11,
|
83
|
+
"van der Heijden 2004"=>1,
|
84
|
+
"Venkatesh and Davis 2000"=>1,
|
85
|
+
"Venkatesh et al. 2000"=>1,
|
86
|
+
"Venkatesh et al. 2003"=>2,
|
87
|
+
"Venkatesh et al. 2008"=>1,
|
88
|
+
"Verplanken and Orbell 2003"=>1,
|
89
|
+
"Verplanken and Wood 2006"=>1,
|
90
|
+
"Verplanken et al. 1998"=>2,
|
91
|
+
"Wang 2001"=>2,
|
92
|
+
"Wang and Chu 2007"=>2,
|
93
|
+
"Wixom and Todd 2005"=>1,
|
94
|
+
"Yang and Tung 2007"=>1,
|
95
|
+
"Yellowlees and Marks 2007"=>1,
|
96
|
+
"Young 1998"=>2,
|
97
|
+
"Young 2004"=>2,
|
98
|
+
"Zait and Bertea 2011"=>1}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
TEST CASES:
|
2
|
+
special \u2019 quotes: from Charlton’s (2002) global measure
|
3
|
+
trailing apostrophe: in Griffiths’ (2000) and others
|
4
|
+
Single author: from Hur (2007) and
|
5
|
+
Two authors: Ma and Agarwal (2007)
|
6
|
+
More than two authors: Peters et al.’s (2007) major work similar to others (Ma et al. 2002)
|
7
|
+
Possessive first author: even Oreg's (1995b) paper citing Ray's (2000) work
|
8
|
+
Short names: Ma et al.'s (2002)
|
9
|
+
Fragment1: Ma et al. (2002) et al. (Ray 2000) both published before 2005.
|
10
|
+
Multiple cites: in (Hur 2007, Peters et al.’s 2007, Griffins' 2000) from
|
11
|
+
Apostrophes in names: (O'Hern 2010) and Wa'el et al. (1993) or
|
12
|
+
Capitals within names: from McDonald (2003) we gather that
|
13
|
+
Multiword last names: even from van der Aalst (2004) or others (De Boor 1980).
|
@@ -0,0 +1,54 @@
|
|
1
|
+
{
|
2
|
+
"special u2019 quotes": {
|
3
|
+
"case": "from Charlton’s (2002) global measure",
|
4
|
+
"result": [["Charlton 2002", 1]]
|
5
|
+
},
|
6
|
+
"trailing apostrophe": {
|
7
|
+
"case": "in Griffiths’ (2000) and others",
|
8
|
+
"result": [["Griffiths 2000", 1]]
|
9
|
+
},
|
10
|
+
"single author": {
|
11
|
+
"case": "from Hur (2007) and",
|
12
|
+
"result": [["Hur 2007", 1]]
|
13
|
+
},
|
14
|
+
"two authors": {
|
15
|
+
"case": "Ma and Agarwal (2007)",
|
16
|
+
"result": [["Ma and Agarwal 2007", 1]]
|
17
|
+
},
|
18
|
+
"more than two authors": {
|
19
|
+
"case": "Peters et al.’s (2007) major work",
|
20
|
+
"result": [["Peters et al. 2007", 1]]
|
21
|
+
},
|
22
|
+
"possesive first author": {
|
23
|
+
"case": "even Hur's (2007) paper citing Ray's (2000) work",
|
24
|
+
"result": [["Hur 2007", 1], ["Ray 2000", 1]]
|
25
|
+
},
|
26
|
+
"character in year": {
|
27
|
+
"case": "Oreg's (1995b) paper and Venkatesh (2003a)",
|
28
|
+
"result": [["Oreg 1995b", 1], ["Venkatesh 2003a", 1]]
|
29
|
+
},
|
30
|
+
"short last name": {
|
31
|
+
"case": "from Ma et al. (2002) we find that",
|
32
|
+
"result": [["Ma et al. 2002", 1]]
|
33
|
+
},
|
34
|
+
"fragment1": {
|
35
|
+
"case": "Ma et al. (2002) et al. (2000)",
|
36
|
+
"result": [["Ma et al. 2002", 1]]
|
37
|
+
},
|
38
|
+
"multiple cites": {
|
39
|
+
"case": "in (Hur 2007, Peters et al.’s 2007, Griffiths' 2000) from",
|
40
|
+
"result": [["Hur 2007", 1], ["Peters et al. 2007", 1], ["Griffiths 2000", 1]]
|
41
|
+
},
|
42
|
+
"apostrophes in name": {
|
43
|
+
"case": "We've seen (O'Hern 2010) and Wa'el et al. (1993) say",
|
44
|
+
"result": [["O'Hern 2010", 1], ["Wa'el et al. 1993", 1]]
|
45
|
+
},
|
46
|
+
"capital letters within names": {
|
47
|
+
"case": "from McDonald (2003) we gather that",
|
48
|
+
"result": [["McDonald 2003", 1]]
|
49
|
+
},
|
50
|
+
"multiword last names": {
|
51
|
+
"case": "even from van der Aalst (2004) or others (De Boor 1980) claim",
|
52
|
+
"result": [["van der Aalst 2004", 1], ["De Boor 1980", 1]]
|
53
|
+
}
|
54
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: citesight
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Soumya Ray
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-05-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: minitest
|
@@ -45,8 +45,25 @@ executables:
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
-
|
48
|
+
- ".bundle/config"
|
49
|
+
- ".gitignore"
|
50
|
+
- ".travis.yml"
|
51
|
+
- Gemfile
|
52
|
+
- LICENSE
|
53
|
+
- README.md
|
54
|
+
- Rakefile
|
49
55
|
- bin/citesight
|
56
|
+
- citesight.gemspec
|
57
|
+
- lib/citesight.rb
|
58
|
+
- lib/citesight/paper_citations.rb
|
59
|
+
- lib/citesight/version.rb
|
60
|
+
- spec/citesight_spec.rb
|
61
|
+
- spec/minitest_helper.rb
|
62
|
+
- spec/testfiles/large_test.txt
|
63
|
+
- spec/testfiles/large_test_results.txt
|
64
|
+
- spec/testfiles/no_match.txt
|
65
|
+
- spec/testfiles/test.txt
|
66
|
+
- spec/testfiles/test_cases.json
|
50
67
|
homepage: https://github.com/soumyaray/citesight
|
51
68
|
licenses:
|
52
69
|
- MIT
|
@@ -67,8 +84,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
67
84
|
version: '0'
|
68
85
|
requirements: []
|
69
86
|
rubyforge_project:
|
70
|
-
rubygems_version: 2.1
|
87
|
+
rubygems_version: 2.5.1
|
71
88
|
signing_key:
|
72
89
|
specification_version: 4
|
73
90
|
summary: Citation extractor and analyzer
|
74
|
-
test_files:
|
91
|
+
test_files:
|
92
|
+
- spec/citesight_spec.rb
|
93
|
+
- spec/minitest_helper.rb
|
94
|
+
- spec/testfiles/large_test.txt
|
95
|
+
- spec/testfiles/large_test_results.txt
|
96
|
+
- spec/testfiles/no_match.txt
|
97
|
+
- spec/testfiles/test.txt
|
98
|
+
- spec/testfiles/test_cases.json
|