citesight 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.bundle/config +1 -0
- data/.github/workflows/ruby.yml +39 -0
- data/.gitignore +3 -0
- data/Gemfile +7 -0
- data/LICENSE +21 -0
- data/README.md +36 -0
- data/Rakefile +6 -0
- data/bin/citesight +5 -1
- data/citesight.gemspec +23 -0
- data/lib/citesight/paper_citations.rb +53 -0
- data/lib/citesight/version.rb +4 -0
- data/lib/citesight.rb +1 -55
- data/spec/citesight_spec.rb +40 -0
- data/spec/spec_helper.rb +46 -0
- data/spec/testfiles/large_test.txt +1 -0
- data/spec/testfiles/large_test_results.txt +98 -0
- data/spec/testfiles/no_match.txt +3 -0
- data/spec/testfiles/test.txt +16 -0
- data/spec/testfiles/test_cases.json +54 -0
- metadata +41 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f8bbbcea21b92c660093f84144abd1c113ea3632d766759d3fd323b7b3dc5cf7
|
4
|
+
data.tar.gz: fd1b56b1ab356e63d2519844deccd8685b967edfc1fa8995c039e71a8c8bbef9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b638261f8e8ec4fbfc3e685fca5e223d5949078c13d62cd240a2b0819eaca0be500f1748c0f9638fc00923b11a2158f48aedc5ff0ae6d862b418883067fb0a1b
|
7
|
+
data.tar.gz: '079afeb512af5a7eff458a2cfa23d50892a871380058c3e5dcdc8218ee8326250fd6b620a4565cd4e503f623c7c37e725972bdbe266b2c20958a8ad987e005f8'
|
data/.bundle/config
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--- {}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ "main", "develop" ]
|
13
|
+
pull_request:
|
14
|
+
branches: '**'
|
15
|
+
|
16
|
+
permissions:
|
17
|
+
contents: read
|
18
|
+
|
19
|
+
jobs:
|
20
|
+
test:
|
21
|
+
|
22
|
+
strategy:
|
23
|
+
matrix:
|
24
|
+
os: [ubuntu, macos]
|
25
|
+
ruby-version: ['3.0', '3.1', '3.2']
|
26
|
+
runs-on: ${{ matrix.os }}-latest
|
27
|
+
|
28
|
+
steps:
|
29
|
+
- uses: actions/checkout@v3
|
30
|
+
- name: Set up Ruby
|
31
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
32
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
33
|
+
# uses: ruby/setup-ruby@v1
|
34
|
+
uses: ruby/setup-ruby@55283cc23133118229fd3f97f9336ee23a179fcf # v1.146.0
|
35
|
+
with:
|
36
|
+
ruby-version: ${{ matrix.ruby-version }}
|
37
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
38
|
+
- name: Run tests
|
39
|
+
run: bundle exec rake spec
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Soumya Ray
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# citesight
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/citesight.svg)](http://badge.fury.io/rb/citesight)
|
4
|
+
[![Build Status](https://travis-ci.org/soumyaray/citesight.svg?branch=master)](https://travis-ci.org/soumyaray/citesight)
|
5
|
+
|
6
|
+
home: [https://github.com/soumyaray/citesight](https://github.com/soumyaray/citesight)
|
7
|
+
Gem to extract and report on citations in an academic text
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
This gem may be used as a command line utility or called from code
|
12
|
+
|
13
|
+
<!-- TODO: installation instructions -->
|
14
|
+
|
15
|
+
### CLI
|
16
|
+
|
17
|
+
citesight mydoc.txt
|
18
|
+
|
19
|
+
<!-- TODO: update usage instructions -->
|
20
|
+
### Code example
|
21
|
+
|
22
|
+
require 'citesight'
|
23
|
+
require 'pp'
|
24
|
+
|
25
|
+
contents = File.read("spec/testfiles/large_test.txt", :encoding => "UTF-8")
|
26
|
+
paper = PaperCitations.new(contents)
|
27
|
+
cites = paper.unique_cites
|
28
|
+
|
29
|
+
puts "\nTotal unique citations: #{cites.count}"
|
30
|
+
PP.pp(Hash[cites])
|
31
|
+
|
32
|
+
top_cite = cites.sort_by { |_c, count| count}.reverse.first[0]
|
33
|
+
puts "\nYour top citation: #{top_cite}"
|
34
|
+
|
35
|
+
top_cite_indexes = paper.index_of_cite(top_cite)
|
36
|
+
puts "It was cited at locations: #{top_cite_indexes.join(', ')}"
|
data/Rakefile
ADDED
data/bin/citesight
CHANGED
@@ -1,15 +1,19 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
|
3
4
|
require 'citesight'
|
4
5
|
require 'pp'
|
5
6
|
|
6
7
|
# executable requirements: (1) env shebang above; (2) file mode 0755
|
8
|
+
# TODO: Return with error message instead of failing
|
7
9
|
|
8
10
|
fail ArgumentError, "Usage: get_citations [filename]\n" if ARGV.count == 0
|
9
11
|
|
10
12
|
contents = File.open(ARGV[0], 'r').read
|
11
13
|
|
12
|
-
results = PaperCitations.
|
14
|
+
results = CiteSight::PaperCitations.new(contents).unique_cites.sort_by do |c, _|
|
15
|
+
c.downcase
|
16
|
+
end
|
13
17
|
|
14
18
|
puts "Top citations:\n"
|
15
19
|
PP.pp(Hash[results.sort_by { |_cite, count| count }.reverse.take(5)])
|
data/citesight.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require 'citesight/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'citesight'
|
6
|
+
s.version = CiteSight::VERSION
|
7
|
+
s.date = CiteSight::DATE
|
8
|
+
|
9
|
+
s.executables << 'citesight'
|
10
|
+
s.add_development_dependency 'minitest', '~> 5.20'
|
11
|
+
s.add_development_dependency 'minitest-rg', '~> 5.3'
|
12
|
+
|
13
|
+
s.summary = 'Citation extractor and analyzer'
|
14
|
+
s.description = 'Extract and analyze citations from MISQ and APA style text'
|
15
|
+
s.authors = ['Soumya Ray']
|
16
|
+
s.email = 'soumya.ray@gmail.com'
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
|
21
|
+
s.homepage = 'https://github.com/soumyaray/citesight'
|
22
|
+
s.license = 'MIT'
|
23
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# This class extracts and counts MISQ or APA style citations in a paper.
|
2
|
+
# The unique_cites method returns a hash of citations and counts
|
3
|
+
# in the order in which they were encountered.
|
4
|
+
module CiteSight
|
5
|
+
class PaperCitations
|
6
|
+
def initialize(contents)
|
7
|
+
@contents = contents
|
8
|
+
end
|
9
|
+
|
10
|
+
def unique_cites
|
11
|
+
clean_cites = @contents.scan(cite_match).map do |c|
|
12
|
+
remove_punctuation(c[0])
|
13
|
+
end
|
14
|
+
|
15
|
+
Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Returns array of citation locations (index) in manuscript
|
20
|
+
#
|
21
|
+
# contents = File.read('./spec/testfiles/test.txt')
|
22
|
+
# PaperCitations.index_cite(contents, 'Peters et al. 2007')
|
23
|
+
# # => [219, 500]
|
24
|
+
def index_of_cite(cite)
|
25
|
+
cite_parts = cite.split
|
26
|
+
author_s = cite_parts.take(cite_parts.size-1).join(' ')
|
27
|
+
year_s = cite_parts.last
|
28
|
+
@contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
|
29
|
+
).map { Regexp.last_match.begin(0) }
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def apostrophe() "\'\u2019" end
|
35
|
+
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
36
|
+
def author() "(#{prefix}?[A-Z][[:alpha:]#{apostrophe}\-]+)" end
|
37
|
+
def other_authors() "([ ](and|\&)[ ]#{author} | ([ ]et[ ]al.){1})" end
|
38
|
+
def possessive() "([#{apostrophe}]s|[#{apostrophe}])" end
|
39
|
+
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
40
|
+
def year(yr) "([,]?[ ][\(]?#{yr}[,\)\;])" end
|
41
|
+
|
42
|
+
def cite_match
|
43
|
+
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
44
|
+
end
|
45
|
+
|
46
|
+
def remove_punctuation(cite)
|
47
|
+
cite
|
48
|
+
.gsub(/[\(\),;]|([#{apostrophe}]s)/, '')
|
49
|
+
.gsub(/[#{apostrophe}]\s/, ' ')
|
50
|
+
.gsub(/\&/, 'and')
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/citesight.rb
CHANGED
@@ -1,55 +1 @@
|
|
1
|
-
|
2
|
-
# The unique_cites method returns a hash of citations and counts
|
3
|
-
# in the order in which they were encountered.
|
4
|
-
class PaperCitations
|
5
|
-
def self.unique_cites(contents)
|
6
|
-
new(contents).unique_cites
|
7
|
-
end
|
8
|
-
|
9
|
-
def self.index_of_cite(contents, cite)
|
10
|
-
new(contents).index_of_cite(cite)
|
11
|
-
end
|
12
|
-
|
13
|
-
def initialize(contents)
|
14
|
-
@contents = contents
|
15
|
-
end
|
16
|
-
|
17
|
-
def unique_cites
|
18
|
-
clean_cites = @contents.scan(cite_match).map do |c|
|
19
|
-
remove_punctuation(c[0])
|
20
|
-
end
|
21
|
-
|
22
|
-
Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
|
23
|
-
end
|
24
|
-
|
25
|
-
##
|
26
|
-
# Returns array of citation locations (index) in manuscript
|
27
|
-
#
|
28
|
-
# contents = File.read('./spec/testfiles/test.txt')
|
29
|
-
# PaperCitations.index_cite(contents, 'Peters et al. 2007')
|
30
|
-
# # => [219, 500]
|
31
|
-
def index_of_cite(cite)
|
32
|
-
cite_parts = cite.split
|
33
|
-
author_s = cite_parts.take(cite_parts.size-1).join(' ')
|
34
|
-
year_s = cite_parts.last
|
35
|
-
@contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
|
36
|
-
).map { Regexp.last_match.begin(0) }
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
42
|
-
def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
|
43
|
-
def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
|
44
|
-
def possessive() "([\'\u2019]s|[\'\u2019])" end
|
45
|
-
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
46
|
-
def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
|
47
|
-
|
48
|
-
def cite_match
|
49
|
-
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
50
|
-
end
|
51
|
-
|
52
|
-
def remove_punctuation(cite)
|
53
|
-
cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
|
54
|
-
end
|
55
|
-
end
|
1
|
+
require 'citesight/paper_citations.rb'
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'minitest/rg'
|
3
|
+
require './spec/spec_helper.rb'
|
4
|
+
|
5
|
+
describe 'Paper', 'A text document' do
|
6
|
+
|
7
|
+
describe 'when there are citations in the text' do
|
8
|
+
TEST_RESULTS.each do |cite, count|
|
9
|
+
it "should find the right citation count for #{cite}" do
|
10
|
+
_(TEST_CITES[cite]).must_equal count
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe 'when there are no citations to be found' do
|
16
|
+
it 'should return an empty hash' do
|
17
|
+
no_cites_txt = 'these are not the citations you are looking for'
|
18
|
+
_(CiteSight::PaperCitations.new(no_cites_txt).unique_cites.any?)
|
19
|
+
.must_equal(false)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
describe 'Cases', 'Test different citation cases' do
|
25
|
+
TEST_CASES.keys.each do |k|
|
26
|
+
it "can detect #{k}" do
|
27
|
+
_(CiteSight::PaperCitations.new(TEST_CASES[k]["case"]).unique_cites.to_a)
|
28
|
+
.must_equal(TEST_CASES[k]["result"])
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'Indexes', 'Accurately find index of different citations' do
|
34
|
+
TEST_CITES.map do |cite, _count|
|
35
|
+
it "can find the right index for #{cite}" do
|
36
|
+
_(TEST_INDEXES[cite]).must_equal \
|
37
|
+
CiteSight::PaperCitations.new(TEST_CONTENTS).index_of_cite(cite)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'json'
|
2
|
+
require './lib/citesight.rb'
|
3
|
+
|
4
|
+
TEST_CONTENTS = File.read('./spec/testfiles/test.txt')
|
5
|
+
|
6
|
+
TEST_CITES =
|
7
|
+
{
|
8
|
+
"Charlton 2002" => 1,
|
9
|
+
"Griffiths 2000" => 1,
|
10
|
+
"Peters et al. 2007" => 2,
|
11
|
+
"Ma and Agarwal 2007" => 3,
|
12
|
+
"Hur 2007" => 3,
|
13
|
+
"Oreg 1995b" => 1,
|
14
|
+
"Ray 2000" => 4,
|
15
|
+
"Ma et al. 2002" => 3,
|
16
|
+
"Griffins 2000" => 1,
|
17
|
+
"O'Hern 2010" => 1,
|
18
|
+
"McDonald 2003" => 1,
|
19
|
+
"Wa'el et al. 1993" => 1,
|
20
|
+
"van der Aalst 2004" => 1,
|
21
|
+
"De Boor 1980" => 1
|
22
|
+
}
|
23
|
+
|
24
|
+
TEST_INDEXES =
|
25
|
+
{
|
26
|
+
"Charlton 2002"=>[40],
|
27
|
+
"Griffiths 2000"=>[97],
|
28
|
+
"Peters et al. 2007"=>[219, 500],
|
29
|
+
"Ma and Agarwal 2007"=>[174],
|
30
|
+
"Hur 2007"=>[146, 490, 828],
|
31
|
+
"Oreg 1995b"=>[318],
|
32
|
+
"Ray 2000"=>[346, 432, 781, 817],
|
33
|
+
"Ma et al. 2002"=>[272, 377, 407],
|
34
|
+
"Griffins 2000"=>[522],
|
35
|
+
"O'Hern 2010"=>[566],
|
36
|
+
"McDonald 2003"=>[634],
|
37
|
+
"Wa'el et al. 1993"=>[583],
|
38
|
+
"van der Aalst 2004"=>[697],
|
39
|
+
"De Boor 1980"=>[729]
|
40
|
+
}
|
41
|
+
|
42
|
+
TEST_CASES = File.open('./spec/testfiles/test_cases.json', 'r') do |f|
|
43
|
+
JSON.load(f)
|
44
|
+
end
|
45
|
+
|
46
|
+
TEST_RESULTS = CiteSight::PaperCitations.new(TEST_CONTENTS).unique_cites
|
@@ -0,0 +1,98 @@
|
|
1
|
+
Top citations:
|
2
|
+
{"Kim et al. 2005"=>13,
|
3
|
+
"Polites and Karahanna 2012"=>12,
|
4
|
+
"Turel et al. 2011b"=>11,
|
5
|
+
"Limayem et al. 2007"=>7,
|
6
|
+
"Kelley and Berridge 2002"=>5}
|
7
|
+
|
8
|
+
Total unique citations: 90
|
9
|
+
{"Aarts and Dijksterhuis 2000"=>3,
|
10
|
+
"Agarwal and Karahanna 2000"=>2,
|
11
|
+
"Ainslie 2001"=>2,
|
12
|
+
"Bagozzi and Yi 1988"=>2,
|
13
|
+
"Bagozzi et al. 2003"=>5,
|
14
|
+
"Bargh et al. 2001"=>2,
|
15
|
+
"Berke and Hyman 2000"=>1,
|
16
|
+
"Bhattacherjee 2001"=>1,
|
17
|
+
"Browne et al. 1993"=>1,
|
18
|
+
"Caplan 2002"=>3,
|
19
|
+
"Chak and Leung 2004"=>3,
|
20
|
+
"Charlton 2002"=>1,
|
21
|
+
"Charlton and Danforth 2010"=>1,
|
22
|
+
"Chou and Ting 2003"=>1,
|
23
|
+
"Corder and Corder 1974"=>1,
|
24
|
+
"Davis 2001"=>2,
|
25
|
+
"Davis et al. 1989"=>1,
|
26
|
+
"Davis et al. 1992"=>1,
|
27
|
+
"Davis et al. 2002"=>2,
|
28
|
+
"deGuinea and Markus 2009"=>1,
|
29
|
+
"Everitt and Robbins 2005"=>1,
|
30
|
+
"Fornell and Larcker 1981"=>2,
|
31
|
+
"Gollwitzer 1996"=>1,
|
32
|
+
"Gray 1970"=>2,
|
33
|
+
"Gray and McNaughton 2000"=>2,
|
34
|
+
"Griffiths 2000"=>1,
|
35
|
+
"Grodner and Reid 2010"=>1,
|
36
|
+
"Grover et al. 2011"=>1,
|
37
|
+
"Hampton et al. 2011"=>2,
|
38
|
+
"Harrison 1968"=>1,
|
39
|
+
"Harrison and Zajonc 1970"=>1,
|
40
|
+
"Holden 2001"=>2,
|
41
|
+
"Huang et al. 2009"=>1,
|
42
|
+
"Hur 2006"=>2,
|
43
|
+
"Jasperson et al. 2005"=>2,
|
44
|
+
"Jennett et al. 2008"=>3,
|
45
|
+
"Jöreskog and Sörbom 1996"=>1,
|
46
|
+
"Kelley and Berridge 2002"=>5,
|
47
|
+
"Kim 2009"=>2,
|
48
|
+
"Kim and Malhotra 2005"=>2,
|
49
|
+
"Kim and Son 2009"=>1,
|
50
|
+
"Kim et al. 2005"=>13,
|
51
|
+
"Kuss and Griffiths 2011"=>1,
|
52
|
+
"Lankton et al. 2010"=>1,
|
53
|
+
"Leary et al. 2005"=>1,
|
54
|
+
"Limayem and Hirt 2003"=>2,
|
55
|
+
"Limayem et al. 2007"=>7,
|
56
|
+
"Louis and Sutton 1991"=>2,
|
57
|
+
"Lunden 2012"=>2,
|
58
|
+
"Mellor et al. 2008"=>1,
|
59
|
+
"Mittal 1988"=>1,
|
60
|
+
"Moore and Benbasat 1991"=>1,
|
61
|
+
"Morahan-Martin and Schumacher 2000"=>3,
|
62
|
+
"Nicolaou and McKnight 2006"=>1,
|
63
|
+
"Oreg 2003"=>1,
|
64
|
+
"Peters et al. 2007"=>1,
|
65
|
+
"Phillips 2009"=>1,
|
66
|
+
"Podsakoff et al. 2003"=>1,
|
67
|
+
"Polites and Karahanna 2012"=>12,
|
68
|
+
"Robinson and Berridge 1993"=>5,
|
69
|
+
"Robinson and Berridge 2003"=>4,
|
70
|
+
"Rokeach 1960"=>1,
|
71
|
+
"Salaway et al. 2008"=>1,
|
72
|
+
"Seah and Cairns 2008"=>2,
|
73
|
+
"Segars 1997"=>1,
|
74
|
+
"Shotton 1991"=>1,
|
75
|
+
"Smith and Brenner 2012"=>1,
|
76
|
+
"Sutton 1987"=>1,
|
77
|
+
"Thadani and Cheung 2011"=>1,
|
78
|
+
"Tokunaga 2011"=>1,
|
79
|
+
"Turel and Serenko 2012"=>4,
|
80
|
+
"Turel et al. 2011"=>3,
|
81
|
+
"Turel et al. 2011a"=>3,
|
82
|
+
"Turel et al. 2011b"=>11,
|
83
|
+
"van der Heijden 2004"=>1,
|
84
|
+
"Venkatesh and Davis 2000"=>1,
|
85
|
+
"Venkatesh et al. 2000"=>1,
|
86
|
+
"Venkatesh et al. 2003"=>2,
|
87
|
+
"Venkatesh et al. 2008"=>1,
|
88
|
+
"Verplanken and Orbell 2003"=>1,
|
89
|
+
"Verplanken and Wood 2006"=>1,
|
90
|
+
"Verplanken et al. 1998"=>2,
|
91
|
+
"Wang 2001"=>2,
|
92
|
+
"Wang and Chu 2007"=>2,
|
93
|
+
"Wixom and Todd 2005"=>1,
|
94
|
+
"Yang and Tung 2007"=>1,
|
95
|
+
"Yellowlees and Marks 2007"=>1,
|
96
|
+
"Young 1998"=>2,
|
97
|
+
"Young 2004"=>2,
|
98
|
+
"Zait and Bertea 2011"=>1}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
TEST CASES:
|
2
|
+
special \u2019 quotes: from Charlton’s (2002) global measure
|
3
|
+
trailing apostrophe: in Griffiths’ (2000) and others
|
4
|
+
Single author: from Hur (2007) and
|
5
|
+
Two authors: Ma and Agarwal (2007)
|
6
|
+
More than two authors: Peters et al.’s (2007) major work similar to others (Ma et al. 2002)
|
7
|
+
Possessive first author: even Oreg's (1995b) paper citing Ray's (2000) work
|
8
|
+
Short names: Ma et al.'s (2002)
|
9
|
+
Fragment1: Ma et al. (2002) et al. (Ray 2000) both published before 2005.
|
10
|
+
Multiple cites: in (Hur 2007, Peters et al.’s 2007, Griffins' 2000) from
|
11
|
+
Apostrophes in names: (O'Hern 2010) and Wa'el et al. (1993) or
|
12
|
+
Capitals within names: from McDonald (2003) we gather that
|
13
|
+
Multiword last names: even from van der Aalst (2004) or others (De Boor 1980).
|
14
|
+
APA commas: according to literature (Ray, 2000) the
|
15
|
+
APA multiple cites: (Ray, 2000; Hur, 2007)
|
16
|
+
APA two authors: Ma & Agarwal (2007) agree with this (Ma & Agarwal 2007)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
{
|
2
|
+
"special u2019 quotes": {
|
3
|
+
"case": "from Charlton’s (2002) global measure",
|
4
|
+
"result": [["Charlton 2002", 1]]
|
5
|
+
},
|
6
|
+
"trailing apostrophe": {
|
7
|
+
"case": "in Griffiths’ (2000) and others",
|
8
|
+
"result": [["Griffiths 2000", 1]]
|
9
|
+
},
|
10
|
+
"single author": {
|
11
|
+
"case": "from Hur (2007) and",
|
12
|
+
"result": [["Hur 2007", 1]]
|
13
|
+
},
|
14
|
+
"two authors": {
|
15
|
+
"case": "Ma and Agarwal (2007)",
|
16
|
+
"result": [["Ma and Agarwal 2007", 1]]
|
17
|
+
},
|
18
|
+
"more than two authors": {
|
19
|
+
"case": "Peters et al.’s (2007) major work",
|
20
|
+
"result": [["Peters et al. 2007", 1]]
|
21
|
+
},
|
22
|
+
"possesive first author": {
|
23
|
+
"case": "even Hur's (2007) paper citing Ray's (2000) work",
|
24
|
+
"result": [["Hur 2007", 1], ["Ray 2000", 1]]
|
25
|
+
},
|
26
|
+
"character in year": {
|
27
|
+
"case": "Oreg's (1995b) paper and Venkatesh (2003a)",
|
28
|
+
"result": [["Oreg 1995b", 1], ["Venkatesh 2003a", 1]]
|
29
|
+
},
|
30
|
+
"short last name": {
|
31
|
+
"case": "from Ma et al. (2002) we find that",
|
32
|
+
"result": [["Ma et al. 2002", 1]]
|
33
|
+
},
|
34
|
+
"fragment1": {
|
35
|
+
"case": "Ma et al. (2002) et al. (2000)",
|
36
|
+
"result": [["Ma et al. 2002", 1]]
|
37
|
+
},
|
38
|
+
"multiple cites": {
|
39
|
+
"case": "in (Hur 2007, Peters et al.’s 2007, Griffiths' 2000) from",
|
40
|
+
"result": [["Hur 2007", 1], ["Peters et al. 2007", 1], ["Griffiths 2000", 1]]
|
41
|
+
},
|
42
|
+
"apostrophes in name": {
|
43
|
+
"case": "We've seen (O'Hern 2010) and Wa'el et al. (1993) say",
|
44
|
+
"result": [["O'Hern 2010", 1], ["Wa'el et al. 1993", 1]]
|
45
|
+
},
|
46
|
+
"capital letters within names": {
|
47
|
+
"case": "from McDonald (2003) we gather that",
|
48
|
+
"result": [["McDonald 2003", 1]]
|
49
|
+
},
|
50
|
+
"multiword last names": {
|
51
|
+
"case": "even from van der Aalst (2004) or others (De Boor 1980) claim",
|
52
|
+
"result": [["van der Aalst 2004", 1], ["De Boor 1980", 1]]
|
53
|
+
}
|
54
|
+
}
|
metadata
CHANGED
@@ -1,57 +1,74 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: citesight
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Soumya Ray
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-12-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: minitest
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '5.20'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '5.20'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: minitest-rg
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '5.3'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
41
|
-
description: Extract and analyze citations from APA style text
|
40
|
+
version: '5.3'
|
41
|
+
description: Extract and analyze citations from MISQ and APA style text
|
42
42
|
email: soumya.ray@gmail.com
|
43
43
|
executables:
|
44
44
|
- citesight
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
-
|
48
|
+
- ".bundle/config"
|
49
|
+
- ".github/workflows/ruby.yml"
|
50
|
+
- ".gitignore"
|
51
|
+
- Gemfile
|
52
|
+
- LICENSE
|
53
|
+
- README.md
|
54
|
+
- Rakefile
|
49
55
|
- bin/citesight
|
56
|
+
- citesight.gemspec
|
57
|
+
- lib/citesight.rb
|
58
|
+
- lib/citesight/paper_citations.rb
|
59
|
+
- lib/citesight/version.rb
|
60
|
+
- spec/citesight_spec.rb
|
61
|
+
- spec/spec_helper.rb
|
62
|
+
- spec/testfiles/large_test.txt
|
63
|
+
- spec/testfiles/large_test_results.txt
|
64
|
+
- spec/testfiles/no_match.txt
|
65
|
+
- spec/testfiles/test.txt
|
66
|
+
- spec/testfiles/test_cases.json
|
50
67
|
homepage: https://github.com/soumyaray/citesight
|
51
68
|
licenses:
|
52
69
|
- MIT
|
53
70
|
metadata: {}
|
54
|
-
post_install_message:
|
71
|
+
post_install_message:
|
55
72
|
rdoc_options: []
|
56
73
|
require_paths:
|
57
74
|
- lib
|
@@ -66,9 +83,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
83
|
- !ruby/object:Gem::Version
|
67
84
|
version: '0'
|
68
85
|
requirements: []
|
69
|
-
|
70
|
-
|
71
|
-
signing_key:
|
86
|
+
rubygems_version: 3.4.10
|
87
|
+
signing_key:
|
72
88
|
specification_version: 4
|
73
89
|
summary: Citation extractor and analyzer
|
74
|
-
test_files:
|
90
|
+
test_files:
|
91
|
+
- spec/citesight_spec.rb
|
92
|
+
- spec/spec_helper.rb
|
93
|
+
- spec/testfiles/large_test.txt
|
94
|
+
- spec/testfiles/large_test_results.txt
|
95
|
+
- spec/testfiles/no_match.txt
|
96
|
+
- spec/testfiles/test.txt
|
97
|
+
- spec/testfiles/test_cases.json
|