citesight 0.1.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +39 -0
- data/README.md +13 -5
- data/bin/citesight +2 -1
- data/citesight.gemspec +4 -4
- data/lib/citesight/paper_citations.rb +10 -14
- data/lib/citesight/version.rb +2 -2
- data/spec/citesight_spec.rb +12 -20
- data/spec/{minitest_helper.rb → spec_helper.rb} +7 -5
- data/spec/testfiles/test.txt +3 -0
- metadata +18 -19
- data/.travis.yml +0 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f8bbbcea21b92c660093f84144abd1c113ea3632d766759d3fd323b7b3dc5cf7
|
4
|
+
data.tar.gz: fd1b56b1ab356e63d2519844deccd8685b967edfc1fa8995c039e71a8c8bbef9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b638261f8e8ec4fbfc3e685fca5e223d5949078c13d62cd240a2b0819eaca0be500f1748c0f9638fc00923b11a2158f48aedc5ff0ae6d862b418883067fb0a1b
|
7
|
+
data.tar.gz: '079afeb512af5a7eff458a2cfa23d50892a871380058c3e5dcdc8218ee8326250fd6b620a4565cd4e503f623c7c37e725972bdbe266b2c20958a8ad987e005f8'
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ "main", "develop" ]
|
13
|
+
pull_request:
|
14
|
+
branches: '**'
|
15
|
+
|
16
|
+
permissions:
|
17
|
+
contents: read
|
18
|
+
|
19
|
+
jobs:
|
20
|
+
test:
|
21
|
+
|
22
|
+
strategy:
|
23
|
+
matrix:
|
24
|
+
os: [ubuntu, macos]
|
25
|
+
ruby-version: ['3.0', '3.1', '3.2']
|
26
|
+
runs-on: ${{ matrix.os }}-latest
|
27
|
+
|
28
|
+
steps:
|
29
|
+
- uses: actions/checkout@v3
|
30
|
+
- name: Set up Ruby
|
31
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
32
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
33
|
+
# uses: ruby/setup-ruby@v1
|
34
|
+
uses: ruby/setup-ruby@55283cc23133118229fd3f97f9336ee23a179fcf # v1.146.0
|
35
|
+
with:
|
36
|
+
ruby-version: ${{ matrix.ruby-version }}
|
37
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
38
|
+
- name: Run tests
|
39
|
+
run: bundle exec rake spec
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#citesight
|
1
|
+
# citesight
|
2
|
+
|
2
3
|
[![Gem Version](https://badge.fury.io/rb/citesight.svg)](http://badge.fury.io/rb/citesight)
|
3
4
|
[![Build Status](https://travis-ci.org/soumyaray/citesight.svg?branch=master)](https://travis-ci.org/soumyaray/citesight)
|
4
5
|
|
@@ -6,17 +7,24 @@ home: [https://github.com/soumyaray/citesight](https://github.com/soumyaray/cite
|
|
6
7
|
Gem to extract and report on citations in an academic text
|
7
8
|
|
8
9
|
## Usage
|
10
|
+
|
9
11
|
This gem may be used as a command line utility or called from code
|
10
12
|
|
11
|
-
|
13
|
+
<!-- TODO: installation instructions -->
|
14
|
+
|
15
|
+
### CLI
|
16
|
+
|
12
17
|
citesight mydoc.txt
|
13
18
|
|
14
|
-
|
19
|
+
<!-- TODO: update usage instructions -->
|
20
|
+
### Code example
|
21
|
+
|
15
22
|
require 'citesight'
|
16
23
|
require 'pp'
|
17
24
|
|
18
25
|
contents = File.read("spec/testfiles/large_test.txt", :encoding => "UTF-8")
|
19
|
-
|
26
|
+
paper = PaperCitations.new(contents)
|
27
|
+
cites = paper.unique_cites
|
20
28
|
|
21
29
|
puts "\nTotal unique citations: #{cites.count}"
|
22
30
|
PP.pp(Hash[cites])
|
@@ -24,5 +32,5 @@ This gem may be used as a command line utility or called from code
|
|
24
32
|
top_cite = cites.sort_by { |_c, count| count}.reverse.first[0]
|
25
33
|
puts "\nYour top citation: #{top_cite}"
|
26
34
|
|
27
|
-
top_cite_indexes =
|
35
|
+
top_cite_indexes = paper.index_of_cite(top_cite)
|
28
36
|
puts "It was cited at locations: #{top_cite_indexes.join(', ')}"
|
data/bin/citesight
CHANGED
@@ -5,12 +5,13 @@ require 'citesight'
|
|
5
5
|
require 'pp'
|
6
6
|
|
7
7
|
# executable requirements: (1) env shebang above; (2) file mode 0755
|
8
|
+
# TODO: Return with error message instead of failing
|
8
9
|
|
9
10
|
fail ArgumentError, "Usage: get_citations [filename]\n" if ARGV.count == 0
|
10
11
|
|
11
12
|
contents = File.open(ARGV[0], 'r').read
|
12
13
|
|
13
|
-
results = CiteSight::PaperCitations.
|
14
|
+
results = CiteSight::PaperCitations.new(contents).unique_cites.sort_by do |c, _|
|
14
15
|
c.downcase
|
15
16
|
end
|
16
17
|
|
data/citesight.gemspec
CHANGED
@@ -7,17 +7,17 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.date = CiteSight::DATE
|
8
8
|
|
9
9
|
s.executables << 'citesight'
|
10
|
-
s.add_development_dependency 'minitest'
|
11
|
-
s.add_development_dependency 'minitest-rg'
|
10
|
+
s.add_development_dependency 'minitest', '~> 5.20'
|
11
|
+
s.add_development_dependency 'minitest-rg', '~> 5.3'
|
12
12
|
|
13
13
|
s.summary = 'Citation extractor and analyzer'
|
14
|
-
s.description = 'Extract and analyze citations from APA style text'
|
14
|
+
s.description = 'Extract and analyze citations from MISQ and APA style text'
|
15
15
|
s.authors = ['Soumya Ray']
|
16
16
|
s.email = 'soumya.ray@gmail.com'
|
17
17
|
|
18
18
|
s.files = `git ls-files`.split("\n")
|
19
19
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
-
|
20
|
+
|
21
21
|
s.homepage = 'https://github.com/soumyaray/citesight'
|
22
22
|
s.license = 'MIT'
|
23
23
|
end
|
@@ -1,16 +1,8 @@
|
|
1
|
-
# This class extracts and counts APA
|
1
|
+
# This class extracts and counts MISQ or APA style citations in a paper.
|
2
2
|
# The unique_cites method returns a hash of citations and counts
|
3
3
|
# in the order in which they were encountered.
|
4
4
|
module CiteSight
|
5
5
|
class PaperCitations
|
6
|
-
def self.unique_cites(contents)
|
7
|
-
new(contents).unique_cites
|
8
|
-
end
|
9
|
-
|
10
|
-
def self.index_of_cite(contents, cite)
|
11
|
-
new(contents).index_of_cite(cite)
|
12
|
-
end
|
13
|
-
|
14
6
|
def initialize(contents)
|
15
7
|
@contents = contents
|
16
8
|
end
|
@@ -39,19 +31,23 @@ module CiteSight
|
|
39
31
|
|
40
32
|
private
|
41
33
|
|
34
|
+
def apostrophe() "\'\u2019" end
|
42
35
|
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
43
|
-
def author() "(#{prefix}?[A-Z][[:alpha:]
|
44
|
-
def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
|
45
|
-
def possessive() "([
|
36
|
+
def author() "(#{prefix}?[A-Z][[:alpha:]#{apostrophe}\-]+)" end
|
37
|
+
def other_authors() "([ ](and|\&)[ ]#{author} | ([ ]et[ ]al.){1})" end
|
38
|
+
def possessive() "([#{apostrophe}]s|[#{apostrophe}])" end
|
46
39
|
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
47
|
-
def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
|
40
|
+
def year(yr) "([,]?[ ][\(]?#{yr}[,\)\;])" end
|
48
41
|
|
49
42
|
def cite_match
|
50
43
|
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
51
44
|
end
|
52
45
|
|
53
46
|
def remove_punctuation(cite)
|
54
|
-
cite
|
47
|
+
cite
|
48
|
+
.gsub(/[\(\),;]|([#{apostrophe}]s)/, '')
|
49
|
+
.gsub(/[#{apostrophe}]\s/, ' ')
|
50
|
+
.gsub(/\&/, 'and')
|
55
51
|
end
|
56
52
|
end
|
57
53
|
end
|
data/lib/citesight/version.rb
CHANGED
data/spec/citesight_spec.rb
CHANGED
@@ -1,23 +1,22 @@
|
|
1
1
|
require 'minitest/autorun'
|
2
2
|
require 'minitest/rg'
|
3
|
-
require './spec/
|
3
|
+
require './spec/spec_helper.rb'
|
4
4
|
|
5
5
|
describe 'Paper', 'A text document' do
|
6
6
|
|
7
7
|
describe 'when there are citations in the text' do
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
it 'should fine the right citations' do
|
13
|
-
@results.must_equal TEST_CITES
|
8
|
+
TEST_RESULTS.each do |cite, count|
|
9
|
+
it "should find the right citation count for #{cite}" do
|
10
|
+
_(TEST_CITES[cite]).must_equal count
|
11
|
+
end
|
14
12
|
end
|
15
13
|
end
|
16
14
|
|
17
15
|
describe 'when there are no citations to be found' do
|
18
16
|
it 'should return an empty hash' do
|
19
17
|
no_cites_txt = 'these are not the citations you are looking for'
|
20
|
-
|
18
|
+
_(CiteSight::PaperCitations.new(no_cites_txt).unique_cites.any?)
|
19
|
+
.must_equal(false)
|
21
20
|
end
|
22
21
|
end
|
23
22
|
end
|
@@ -25,24 +24,17 @@ end
|
|
25
24
|
describe 'Cases', 'Test different citation cases' do
|
26
25
|
TEST_CASES.keys.each do |k|
|
27
26
|
it "can detect #{k}" do
|
28
|
-
CiteSight::PaperCitations.
|
27
|
+
_(CiteSight::PaperCitations.new(TEST_CASES[k]["case"]).unique_cites.to_a)
|
29
28
|
.must_equal(TEST_CASES[k]["result"])
|
30
29
|
end
|
31
30
|
end
|
32
31
|
end
|
33
32
|
|
34
33
|
describe 'Indexes', 'Accurately find index of different citations' do
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
34
|
+
TEST_CITES.map do |cite, _count|
|
35
|
+
it "can find the right index for #{cite}" do
|
36
|
+
_(TEST_INDEXES[cite]).must_equal \
|
37
|
+
CiteSight::PaperCitations.new(TEST_CONTENTS).index_of_cite(cite)
|
39
38
|
end
|
40
39
|
end
|
41
40
|
end
|
42
|
-
|
43
|
-
## Produce hash of all cite indexes:
|
44
|
-
# Hash[
|
45
|
-
# TEST_CITES.map do |cite, count|
|
46
|
-
# [cite, PaperCitations.index_cite(doc, cite)]
|
47
|
-
# end
|
48
|
-
# ]
|
@@ -8,10 +8,10 @@ TEST_CITES =
|
|
8
8
|
"Charlton 2002" => 1,
|
9
9
|
"Griffiths 2000" => 1,
|
10
10
|
"Peters et al. 2007" => 2,
|
11
|
-
"Ma and Agarwal 2007" =>
|
12
|
-
"Hur 2007" =>
|
11
|
+
"Ma and Agarwal 2007" => 3,
|
12
|
+
"Hur 2007" => 3,
|
13
13
|
"Oreg 1995b" => 1,
|
14
|
-
"Ray 2000" =>
|
14
|
+
"Ray 2000" => 4,
|
15
15
|
"Ma et al. 2002" => 3,
|
16
16
|
"Griffins 2000" => 1,
|
17
17
|
"O'Hern 2010" => 1,
|
@@ -27,9 +27,9 @@ TEST_INDEXES =
|
|
27
27
|
"Griffiths 2000"=>[97],
|
28
28
|
"Peters et al. 2007"=>[219, 500],
|
29
29
|
"Ma and Agarwal 2007"=>[174],
|
30
|
-
"Hur 2007"=>[146, 490],
|
30
|
+
"Hur 2007"=>[146, 490, 828],
|
31
31
|
"Oreg 1995b"=>[318],
|
32
|
-
"Ray 2000"=>[346, 432],
|
32
|
+
"Ray 2000"=>[346, 432, 781, 817],
|
33
33
|
"Ma et al. 2002"=>[272, 377, 407],
|
34
34
|
"Griffins 2000"=>[522],
|
35
35
|
"O'Hern 2010"=>[566],
|
@@ -42,3 +42,5 @@ TEST_INDEXES =
|
|
42
42
|
TEST_CASES = File.open('./spec/testfiles/test_cases.json', 'r') do |f|
|
43
43
|
JSON.load(f)
|
44
44
|
end
|
45
|
+
|
46
|
+
TEST_RESULTS = CiteSight::PaperCitations.new(TEST_CONTENTS).unique_cites
|
data/spec/testfiles/test.txt
CHANGED
@@ -11,3 +11,6 @@ Multiple cites: in (Hur 2007, Peters et al.’s 2007, Griffins' 2000) from
|
|
11
11
|
Apostrophes in names: (O'Hern 2010) and Wa'el et al. (1993) or
|
12
12
|
Capitals within names: from McDonald (2003) we gather that
|
13
13
|
Multiword last names: even from van der Aalst (2004) or others (De Boor 1980).
|
14
|
+
APA commas: according to literature (Ray, 2000) the
|
15
|
+
APA multiple cites: (Ray, 2000; Hur, 2007)
|
16
|
+
APA two authors: Ma & Agarwal (2007) agree with this (Ma & Agarwal 2007)
|
metadata
CHANGED
@@ -1,44 +1,44 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: citesight
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Soumya Ray
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-12-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: minitest
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '5.20'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '5.20'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: minitest-rg
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '5.3'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
41
|
-
description: Extract and analyze citations from APA style text
|
40
|
+
version: '5.3'
|
41
|
+
description: Extract and analyze citations from MISQ and APA style text
|
42
42
|
email: soumya.ray@gmail.com
|
43
43
|
executables:
|
44
44
|
- citesight
|
@@ -46,8 +46,8 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- ".bundle/config"
|
49
|
+
- ".github/workflows/ruby.yml"
|
49
50
|
- ".gitignore"
|
50
|
-
- ".travis.yml"
|
51
51
|
- Gemfile
|
52
52
|
- LICENSE
|
53
53
|
- README.md
|
@@ -58,7 +58,7 @@ files:
|
|
58
58
|
- lib/citesight/paper_citations.rb
|
59
59
|
- lib/citesight/version.rb
|
60
60
|
- spec/citesight_spec.rb
|
61
|
-
- spec/
|
61
|
+
- spec/spec_helper.rb
|
62
62
|
- spec/testfiles/large_test.txt
|
63
63
|
- spec/testfiles/large_test_results.txt
|
64
64
|
- spec/testfiles/no_match.txt
|
@@ -68,7 +68,7 @@ homepage: https://github.com/soumyaray/citesight
|
|
68
68
|
licenses:
|
69
69
|
- MIT
|
70
70
|
metadata: {}
|
71
|
-
post_install_message:
|
71
|
+
post_install_message:
|
72
72
|
rdoc_options: []
|
73
73
|
require_paths:
|
74
74
|
- lib
|
@@ -83,14 +83,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
83
83
|
- !ruby/object:Gem::Version
|
84
84
|
version: '0'
|
85
85
|
requirements: []
|
86
|
-
|
87
|
-
|
88
|
-
signing_key:
|
86
|
+
rubygems_version: 3.4.10
|
87
|
+
signing_key:
|
89
88
|
specification_version: 4
|
90
89
|
summary: Citation extractor and analyzer
|
91
90
|
test_files:
|
92
91
|
- spec/citesight_spec.rb
|
93
|
-
- spec/
|
92
|
+
- spec/spec_helper.rb
|
94
93
|
- spec/testfiles/large_test.txt
|
95
94
|
- spec/testfiles/large_test_results.txt
|
96
95
|
- spec/testfiles/no_match.txt
|