citesight 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.bundle/config +1 -0
- data/.gitignore +3 -0
- data/.travis.yml +10 -0
- data/Gemfile +7 -0
- data/LICENSE +21 -0
- data/README.md +28 -0
- data/Rakefile +6 -0
- data/bin/citesight +4 -1
- data/citesight.gemspec +23 -0
- data/lib/citesight.rb +1 -55
- data/lib/citesight/paper_citations.rb +57 -0
- data/lib/citesight/version.rb +4 -0
- data/spec/citesight_spec.rb +48 -0
- data/spec/minitest_helper.rb +44 -0
- data/spec/testfiles/large_test.txt +1 -0
- data/spec/testfiles/large_test_results.txt +98 -0
- data/spec/testfiles/no_match.txt +3 -0
- data/spec/testfiles/test.txt +13 -0
- data/spec/testfiles/test_cases.json +54 -0
- metadata +29 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e11117f9c3c2e1eacd42126bc7a97ed7841ee28
|
4
|
+
data.tar.gz: a0dfd14a313ec7ece65bed421eb335fdda9427de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ace1dc22e05ee5eebe3383d075d8965ab503209bbd14dd8d72ca042aff3608c7497404fd1d5acebe5317c35b5e6c973604f527665f7425cbfa26503436da5b3e
|
7
|
+
data.tar.gz: 49ab665d7ea38767d508681707a15c0c6063dad04df974354e352a859311e4bf6b2b00b9c673391e0b5a6498102a777ac76c91a3fe4106ce4ea78f58bc15bba2
|
data/.bundle/config
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--- {}
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Soumya Ray
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#citesight
|
2
|
+
[![Gem Version](https://badge.fury.io/rb/citesight.svg)](http://badge.fury.io/rb/citesight)
|
3
|
+
[![Build Status](https://travis-ci.org/soumyaray/citesight.svg?branch=master)](https://travis-ci.org/soumyaray/citesight)
|
4
|
+
|
5
|
+
home: [https://github.com/soumyaray/citesight](https://github.com/soumyaray/citesight)
|
6
|
+
Gem to extract and report on citations in an academic text
|
7
|
+
|
8
|
+
## Usage
|
9
|
+
This gem may be used as a command line utility or called from code
|
10
|
+
|
11
|
+
### CLI:
|
12
|
+
citesight mydoc.txt
|
13
|
+
|
14
|
+
### Code example:
|
15
|
+
require 'citesight'
|
16
|
+
require 'pp'
|
17
|
+
|
18
|
+
contents = File.read("spec/testfiles/large_test.txt", :encoding => "UTF-8")
|
19
|
+
cites = PaperCitations.unique_cites(contents)
|
20
|
+
|
21
|
+
puts "\nTotal unique citations: #{cites.count}"
|
22
|
+
PP.pp(Hash[cites])
|
23
|
+
|
24
|
+
top_cite = cites.sort_by { |_c, count| count}.reverse.first[0]
|
25
|
+
puts "\nYour top citation: #{top_cite}"
|
26
|
+
|
27
|
+
top_cite_indexes = PaperCitations.index_of_cite(contents, top_cite)
|
28
|
+
puts "It was cited at locations: #{top_cite_indexes.join(', ')}"
|
data/Rakefile
ADDED
data/bin/citesight
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
3
4
|
require 'citesight'
|
4
5
|
require 'pp'
|
5
6
|
|
@@ -9,7 +10,9 @@ fail ArgumentError, "Usage: get_citations [filename]\n" if ARGV.count == 0
|
|
9
10
|
|
10
11
|
contents = File.open(ARGV[0], 'r').read
|
11
12
|
|
12
|
-
results = PaperCitations.unique_cites(contents).sort_by
|
13
|
+
results = CiteSight::PaperCitations.unique_cites(contents).sort_by do |c, _|
|
14
|
+
c.downcase
|
15
|
+
end
|
13
16
|
|
14
17
|
puts "Top citations:\n"
|
15
18
|
PP.pp(Hash[results.sort_by { |_cite, count| count }.reverse.take(5)])
|
data/citesight.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require 'citesight/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'citesight'
|
6
|
+
s.version = CiteSight::VERSION
|
7
|
+
s.date = CiteSight::DATE
|
8
|
+
|
9
|
+
s.executables << 'citesight'
|
10
|
+
s.add_development_dependency 'minitest'
|
11
|
+
s.add_development_dependency 'minitest-rg'
|
12
|
+
|
13
|
+
s.summary = 'Citation extractor and analyzer'
|
14
|
+
s.description = 'Extract and analyze citations from APA style text'
|
15
|
+
s.authors = ['Soumya Ray']
|
16
|
+
s.email = 'soumya.ray@gmail.com'
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
|
21
|
+
s.homepage = 'https://github.com/soumyaray/citesight'
|
22
|
+
s.license = 'MIT'
|
23
|
+
end
|
data/lib/citesight.rb
CHANGED
@@ -1,55 +1 @@
|
|
1
|
-
|
2
|
-
# The unique_cites method returns a hash of citations and counts
|
3
|
-
# in the order in which they were encountered.
|
4
|
-
class PaperCitations
|
5
|
-
def self.unique_cites(contents)
|
6
|
-
new(contents).unique_cites
|
7
|
-
end
|
8
|
-
|
9
|
-
def self.index_of_cite(contents, cite)
|
10
|
-
new(contents).index_of_cite(cite)
|
11
|
-
end
|
12
|
-
|
13
|
-
def initialize(contents)
|
14
|
-
@contents = contents
|
15
|
-
end
|
16
|
-
|
17
|
-
def unique_cites
|
18
|
-
clean_cites = @contents.scan(cite_match).map do |c|
|
19
|
-
remove_punctuation(c[0])
|
20
|
-
end
|
21
|
-
|
22
|
-
Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
|
23
|
-
end
|
24
|
-
|
25
|
-
##
|
26
|
-
# Returns array of citation locations (index) in manuscript
|
27
|
-
#
|
28
|
-
# contents = File.read('./spec/testfiles/test.txt')
|
29
|
-
# PaperCitations.index_cite(contents, 'Peters et al. 2007')
|
30
|
-
# # => [219, 500]
|
31
|
-
def index_of_cite(cite)
|
32
|
-
cite_parts = cite.split
|
33
|
-
author_s = cite_parts.take(cite_parts.size-1).join(' ')
|
34
|
-
year_s = cite_parts.last
|
35
|
-
@contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
|
36
|
-
).map { Regexp.last_match.begin(0) }
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
42
|
-
def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
|
43
|
-
def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
|
44
|
-
def possessive() "([\'\u2019]s|[\'\u2019])" end
|
45
|
-
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
46
|
-
def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
|
47
|
-
|
48
|
-
def cite_match
|
49
|
-
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
50
|
-
end
|
51
|
-
|
52
|
-
def remove_punctuation(cite)
|
53
|
-
cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
|
54
|
-
end
|
55
|
-
end
|
1
|
+
require 'citesight/paper_citations.rb'
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# This class extracts and counts APA-style citations in a paper.
|
2
|
+
# The unique_cites method returns a hash of citations and counts
|
3
|
+
# in the order in which they were encountered.
|
4
|
+
module CiteSight
|
5
|
+
class PaperCitations
|
6
|
+
def self.unique_cites(contents)
|
7
|
+
new(contents).unique_cites
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.index_of_cite(contents, cite)
|
11
|
+
new(contents).index_of_cite(cite)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(contents)
|
15
|
+
@contents = contents
|
16
|
+
end
|
17
|
+
|
18
|
+
def unique_cites
|
19
|
+
clean_cites = @contents.scan(cite_match).map do |c|
|
20
|
+
remove_punctuation(c[0])
|
21
|
+
end
|
22
|
+
|
23
|
+
Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Returns array of citation locations (index) in manuscript
|
28
|
+
#
|
29
|
+
# contents = File.read('./spec/testfiles/test.txt')
|
30
|
+
# PaperCitations.index_cite(contents, 'Peters et al. 2007')
|
31
|
+
# # => [219, 500]
|
32
|
+
def index_of_cite(cite)
|
33
|
+
cite_parts = cite.split
|
34
|
+
author_s = cite_parts.take(cite_parts.size-1).join(' ')
|
35
|
+
year_s = cite_parts.last
|
36
|
+
@contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
|
37
|
+
).map { Regexp.last_match.begin(0) }
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
43
|
+
def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
|
44
|
+
def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
|
45
|
+
def possessive() "([\'\u2019]s|[\'\u2019])" end
|
46
|
+
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
47
|
+
def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
|
48
|
+
|
49
|
+
def cite_match
|
50
|
+
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
51
|
+
end
|
52
|
+
|
53
|
+
def remove_punctuation(cite)
|
54
|
+
cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'minitest/rg'
|
3
|
+
require './spec/minitest_helper.rb'
|
4
|
+
|
5
|
+
describe 'Paper', 'A text document' do
|
6
|
+
|
7
|
+
describe 'when there are citations in the text' do
|
8
|
+
before do
|
9
|
+
@results = CiteSight::PaperCitations.unique_cites(TEST_CONTENTS)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should fine the right citations' do
|
13
|
+
@results.must_equal TEST_CITES
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
describe 'when there are no citations to be found' do
|
18
|
+
it 'should return an empty hash' do
|
19
|
+
no_cites_txt = 'these are not the citations you are looking for'
|
20
|
+
# TODO: check return of empty hash
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'Cases', 'Test different citation cases' do
|
26
|
+
TEST_CASES.keys.each do |k|
|
27
|
+
it "can detect #{k}" do
|
28
|
+
CiteSight::PaperCitations.unique_cites(TEST_CASES[k]["case"]).to_a\
|
29
|
+
.must_equal(TEST_CASES[k]["result"])
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe 'Indexes', 'Accurately find index of different citations' do
|
35
|
+
it "can find the right index for all citations" do
|
36
|
+
TEST_CITES.map do |cite, _count|
|
37
|
+
TEST_INDEXES[cite].must_equal \
|
38
|
+
CiteSight::PaperCitations.index_of_cite(TEST_CONTENTS, cite)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
## Produce hash of all cite indexes:
|
44
|
+
# Hash[
|
45
|
+
# TEST_CITES.map do |cite, count|
|
46
|
+
# [cite, PaperCitations.index_cite(doc, cite)]
|
47
|
+
# end
|
48
|
+
# ]
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'json'
|
2
|
+
require './lib/citesight.rb'
|
3
|
+
|
4
|
+
TEST_CONTENTS = File.read('./spec/testfiles/test.txt')
|
5
|
+
|
6
|
+
TEST_CITES =
|
7
|
+
{
|
8
|
+
"Charlton 2002" => 1,
|
9
|
+
"Griffiths 2000" => 1,
|
10
|
+
"Peters et al. 2007" => 2,
|
11
|
+
"Ma and Agarwal 2007" => 1,
|
12
|
+
"Hur 2007" => 2,
|
13
|
+
"Oreg 1995b" => 1,
|
14
|
+
"Ray 2000" => 2,
|
15
|
+
"Ma et al. 2002" => 3,
|
16
|
+
"Griffins 2000" => 1,
|
17
|
+
"O'Hern 2010" => 1,
|
18
|
+
"McDonald 2003" => 1,
|
19
|
+
"Wa'el et al. 1993" => 1,
|
20
|
+
"van der Aalst 2004" => 1,
|
21
|
+
"De Boor 1980" => 1
|
22
|
+
}
|
23
|
+
|
24
|
+
TEST_INDEXES =
|
25
|
+
{
|
26
|
+
"Charlton 2002"=>[40],
|
27
|
+
"Griffiths 2000"=>[97],
|
28
|
+
"Peters et al. 2007"=>[219, 500],
|
29
|
+
"Ma and Agarwal 2007"=>[174],
|
30
|
+
"Hur 2007"=>[146, 490],
|
31
|
+
"Oreg 1995b"=>[318],
|
32
|
+
"Ray 2000"=>[346, 432],
|
33
|
+
"Ma et al. 2002"=>[272, 377, 407],
|
34
|
+
"Griffins 2000"=>[522],
|
35
|
+
"O'Hern 2010"=>[566],
|
36
|
+
"McDonald 2003"=>[634],
|
37
|
+
"Wa'el et al. 1993"=>[583],
|
38
|
+
"van der Aalst 2004"=>[697],
|
39
|
+
"De Boor 1980"=>[729]
|
40
|
+
}
|
41
|
+
|
42
|
+
TEST_CASES = File.open('./spec/testfiles/test_cases.json', 'r') do |f|
|
43
|
+
JSON.load(f)
|
44
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
Top citations:
|
2
|
+
{"Kim et al. 2005"=>13,
|
3
|
+
"Polites and Karahanna 2012"=>12,
|
4
|
+
"Turel et al. 2011b"=>11,
|
5
|
+
"Limayem et al. 2007"=>7,
|
6
|
+
"Kelley and Berridge 2002"=>5}
|
7
|
+
|
8
|
+
Total unique citations: 90
|
9
|
+
{"Aarts and Dijksterhuis 2000"=>3,
|
10
|
+
"Agarwal and Karahanna 2000"=>2,
|
11
|
+
"Ainslie 2001"=>2,
|
12
|
+
"Bagozzi and Yi 1988"=>2,
|
13
|
+
"Bagozzi et al. 2003"=>5,
|
14
|
+
"Bargh et al. 2001"=>2,
|
15
|
+
"Berke and Hyman 2000"=>1,
|
16
|
+
"Bhattacherjee 2001"=>1,
|
17
|
+
"Browne et al. 1993"=>1,
|
18
|
+
"Caplan 2002"=>3,
|
19
|
+
"Chak and Leung 2004"=>3,
|
20
|
+
"Charlton 2002"=>1,
|
21
|
+
"Charlton and Danforth 2010"=>1,
|
22
|
+
"Chou and Ting 2003"=>1,
|
23
|
+
"Corder and Corder 1974"=>1,
|
24
|
+
"Davis 2001"=>2,
|
25
|
+
"Davis et al. 1989"=>1,
|
26
|
+
"Davis et al. 1992"=>1,
|
27
|
+
"Davis et al. 2002"=>2,
|
28
|
+
"deGuinea and Markus 2009"=>1,
|
29
|
+
"Everitt and Robbins 2005"=>1,
|
30
|
+
"Fornell and Larcker 1981"=>2,
|
31
|
+
"Gollwitzer 1996"=>1,
|
32
|
+
"Gray 1970"=>2,
|
33
|
+
"Gray and McNaughton 2000"=>2,
|
34
|
+
"Griffiths 2000"=>1,
|
35
|
+
"Grodner and Reid 2010"=>1,
|
36
|
+
"Grover et al. 2011"=>1,
|
37
|
+
"Hampton et al. 2011"=>2,
|
38
|
+
"Harrison 1968"=>1,
|
39
|
+
"Harrison and Zajonc 1970"=>1,
|
40
|
+
"Holden 2001"=>2,
|
41
|
+
"Huang et al. 2009"=>1,
|
42
|
+
"Hur 2006"=>2,
|
43
|
+
"Jasperson et al. 2005"=>2,
|
44
|
+
"Jennett et al. 2008"=>3,
|
45
|
+
"Jöreskog and Sörbom 1996"=>1,
|
46
|
+
"Kelley and Berridge 2002"=>5,
|
47
|
+
"Kim 2009"=>2,
|
48
|
+
"Kim and Malhotra 2005"=>2,
|
49
|
+
"Kim and Son 2009"=>1,
|
50
|
+
"Kim et al. 2005"=>13,
|
51
|
+
"Kuss and Griffiths 2011"=>1,
|
52
|
+
"Lankton et al. 2010"=>1,
|
53
|
+
"Leary et al. 2005"=>1,
|
54
|
+
"Limayem and Hirt 2003"=>2,
|
55
|
+
"Limayem et al. 2007"=>7,
|
56
|
+
"Louis and Sutton 1991"=>2,
|
57
|
+
"Lunden 2012"=>2,
|
58
|
+
"Mellor et al. 2008"=>1,
|
59
|
+
"Mittal 1988"=>1,
|
60
|
+
"Moore and Benbasat 1991"=>1,
|
61
|
+
"Morahan-Martin and Schumacher 2000"=>3,
|
62
|
+
"Nicolaou and McKnight 2006"=>1,
|
63
|
+
"Oreg 2003"=>1,
|
64
|
+
"Peters et al. 2007"=>1,
|
65
|
+
"Phillips 2009"=>1,
|
66
|
+
"Podsakoff et al. 2003"=>1,
|
67
|
+
"Polites and Karahanna 2012"=>12,
|
68
|
+
"Robinson and Berridge 1993"=>5,
|
69
|
+
"Robinson and Berridge 2003"=>4,
|
70
|
+
"Rokeach 1960"=>1,
|
71
|
+
"Salaway et al. 2008"=>1,
|
72
|
+
"Seah and Cairns 2008"=>2,
|
73
|
+
"Segars 1997"=>1,
|
74
|
+
"Shotton 1991"=>1,
|
75
|
+
"Smith and Brenner 2012"=>1,
|
76
|
+
"Sutton 1987"=>1,
|
77
|
+
"Thadani and Cheung 2011"=>1,
|
78
|
+
"Tokunaga 2011"=>1,
|
79
|
+
"Turel and Serenko 2012"=>4,
|
80
|
+
"Turel et al. 2011"=>3,
|
81
|
+
"Turel et al. 2011a"=>3,
|
82
|
+
"Turel et al. 2011b"=>11,
|
83
|
+
"van der Heijden 2004"=>1,
|
84
|
+
"Venkatesh and Davis 2000"=>1,
|
85
|
+
"Venkatesh et al. 2000"=>1,
|
86
|
+
"Venkatesh et al. 2003"=>2,
|
87
|
+
"Venkatesh et al. 2008"=>1,
|
88
|
+
"Verplanken and Orbell 2003"=>1,
|
89
|
+
"Verplanken and Wood 2006"=>1,
|
90
|
+
"Verplanken et al. 1998"=>2,
|
91
|
+
"Wang 2001"=>2,
|
92
|
+
"Wang and Chu 2007"=>2,
|
93
|
+
"Wixom and Todd 2005"=>1,
|
94
|
+
"Yang and Tung 2007"=>1,
|
95
|
+
"Yellowlees and Marks 2007"=>1,
|
96
|
+
"Young 1998"=>2,
|
97
|
+
"Young 2004"=>2,
|
98
|
+
"Zait and Bertea 2011"=>1}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
TEST CASES:
|
2
|
+
special \u2019 quotes: from Charlton’s (2002) global measure
|
3
|
+
trailing apostrophe: in Griffiths’ (2000) and others
|
4
|
+
Single author: from Hur (2007) and
|
5
|
+
Two authors: Ma and Agarwal (2007)
|
6
|
+
More than two authors: Peters et al.’s (2007) major work similar to others (Ma et al. 2002)
|
7
|
+
Possessive first author: even Oreg's (1995b) paper citing Ray's (2000) work
|
8
|
+
Short names: Ma et al.'s (2002)
|
9
|
+
Fragment1: Ma et al. (2002) et al. (Ray 2000) both published before 2005.
|
10
|
+
Multiple cites: in (Hur 2007, Peters et al.’s 2007, Griffins' 2000) from
|
11
|
+
Apostrophes in names: (O'Hern 2010) and Wa'el et al. (1993) or
|
12
|
+
Capitals within names: from McDonald (2003) we gather that
|
13
|
+
Multiword last names: even from van der Aalst (2004) or others (De Boor 1980).
|
@@ -0,0 +1,54 @@
|
|
1
|
+
{
|
2
|
+
"special u2019 quotes": {
|
3
|
+
"case": "from Charlton’s (2002) global measure",
|
4
|
+
"result": [["Charlton 2002", 1]]
|
5
|
+
},
|
6
|
+
"trailing apostrophe": {
|
7
|
+
"case": "in Griffiths’ (2000) and others",
|
8
|
+
"result": [["Griffiths 2000", 1]]
|
9
|
+
},
|
10
|
+
"single author": {
|
11
|
+
"case": "from Hur (2007) and",
|
12
|
+
"result": [["Hur 2007", 1]]
|
13
|
+
},
|
14
|
+
"two authors": {
|
15
|
+
"case": "Ma and Agarwal (2007)",
|
16
|
+
"result": [["Ma and Agarwal 2007", 1]]
|
17
|
+
},
|
18
|
+
"more than two authors": {
|
19
|
+
"case": "Peters et al.’s (2007) major work",
|
20
|
+
"result": [["Peters et al. 2007", 1]]
|
21
|
+
},
|
22
|
+
"possesive first author": {
|
23
|
+
"case": "even Hur's (2007) paper citing Ray's (2000) work",
|
24
|
+
"result": [["Hur 2007", 1], ["Ray 2000", 1]]
|
25
|
+
},
|
26
|
+
"character in year": {
|
27
|
+
"case": "Oreg's (1995b) paper and Venkatesh (2003a)",
|
28
|
+
"result": [["Oreg 1995b", 1], ["Venkatesh 2003a", 1]]
|
29
|
+
},
|
30
|
+
"short last name": {
|
31
|
+
"case": "from Ma et al. (2002) we find that",
|
32
|
+
"result": [["Ma et al. 2002", 1]]
|
33
|
+
},
|
34
|
+
"fragment1": {
|
35
|
+
"case": "Ma et al. (2002) et al. (2000)",
|
36
|
+
"result": [["Ma et al. 2002", 1]]
|
37
|
+
},
|
38
|
+
"multiple cites": {
|
39
|
+
"case": "in (Hur 2007, Peters et al.’s 2007, Griffiths' 2000) from",
|
40
|
+
"result": [["Hur 2007", 1], ["Peters et al. 2007", 1], ["Griffiths 2000", 1]]
|
41
|
+
},
|
42
|
+
"apostrophes in name": {
|
43
|
+
"case": "We've seen (O'Hern 2010) and Wa'el et al. (1993) say",
|
44
|
+
"result": [["O'Hern 2010", 1], ["Wa'el et al. 1993", 1]]
|
45
|
+
},
|
46
|
+
"capital letters within names": {
|
47
|
+
"case": "from McDonald (2003) we gather that",
|
48
|
+
"result": [["McDonald 2003", 1]]
|
49
|
+
},
|
50
|
+
"multiword last names": {
|
51
|
+
"case": "even from van der Aalst (2004) or others (De Boor 1980) claim",
|
52
|
+
"result": [["van der Aalst 2004", 1], ["De Boor 1980", 1]]
|
53
|
+
}
|
54
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: citesight
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Soumya Ray
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-05-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: minitest
|
@@ -45,8 +45,25 @@ executables:
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
-
|
48
|
+
- ".bundle/config"
|
49
|
+
- ".gitignore"
|
50
|
+
- ".travis.yml"
|
51
|
+
- Gemfile
|
52
|
+
- LICENSE
|
53
|
+
- README.md
|
54
|
+
- Rakefile
|
49
55
|
- bin/citesight
|
56
|
+
- citesight.gemspec
|
57
|
+
- lib/citesight.rb
|
58
|
+
- lib/citesight/paper_citations.rb
|
59
|
+
- lib/citesight/version.rb
|
60
|
+
- spec/citesight_spec.rb
|
61
|
+
- spec/minitest_helper.rb
|
62
|
+
- spec/testfiles/large_test.txt
|
63
|
+
- spec/testfiles/large_test_results.txt
|
64
|
+
- spec/testfiles/no_match.txt
|
65
|
+
- spec/testfiles/test.txt
|
66
|
+
- spec/testfiles/test_cases.json
|
50
67
|
homepage: https://github.com/soumyaray/citesight
|
51
68
|
licenses:
|
52
69
|
- MIT
|
@@ -67,8 +84,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
67
84
|
version: '0'
|
68
85
|
requirements: []
|
69
86
|
rubyforge_project:
|
70
|
-
rubygems_version: 2.1
|
87
|
+
rubygems_version: 2.5.1
|
71
88
|
signing_key:
|
72
89
|
specification_version: 4
|
73
90
|
summary: Citation extractor and analyzer
|
74
|
-
test_files:
|
91
|
+
test_files:
|
92
|
+
- spec/citesight_spec.rb
|
93
|
+
- spec/minitest_helper.rb
|
94
|
+
- spec/testfiles/large_test.txt
|
95
|
+
- spec/testfiles/large_test_results.txt
|
96
|
+
- spec/testfiles/no_match.txt
|
97
|
+
- spec/testfiles/test.txt
|
98
|
+
- spec/testfiles/test_cases.json
|