citesight 0.1.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +39 -0
- data/README.md +13 -5
- data/bin/citesight +2 -1
- data/citesight.gemspec +4 -4
- data/lib/citesight/paper_citations.rb +10 -14
- data/lib/citesight/version.rb +2 -2
- data/spec/citesight_spec.rb +12 -20
- data/spec/{minitest_helper.rb → spec_helper.rb} +7 -5
- data/spec/testfiles/test.txt +3 -0
- metadata +18 -19
- data/.travis.yml +0 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f8bbbcea21b92c660093f84144abd1c113ea3632d766759d3fd323b7b3dc5cf7
|
4
|
+
data.tar.gz: fd1b56b1ab356e63d2519844deccd8685b967edfc1fa8995c039e71a8c8bbef9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b638261f8e8ec4fbfc3e685fca5e223d5949078c13d62cd240a2b0819eaca0be500f1748c0f9638fc00923b11a2158f48aedc5ff0ae6d862b418883067fb0a1b
|
7
|
+
data.tar.gz: '079afeb512af5a7eff458a2cfa23d50892a871380058c3e5dcdc8218ee8326250fd6b620a4565cd4e503f623c7c37e725972bdbe266b2c20958a8ad987e005f8'
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ "main", "develop" ]
|
13
|
+
pull_request:
|
14
|
+
branches: '**'
|
15
|
+
|
16
|
+
permissions:
|
17
|
+
contents: read
|
18
|
+
|
19
|
+
jobs:
|
20
|
+
test:
|
21
|
+
|
22
|
+
strategy:
|
23
|
+
matrix:
|
24
|
+
os: [ubuntu, macos]
|
25
|
+
ruby-version: ['3.0', '3.1', '3.2']
|
26
|
+
runs-on: ${{ matrix.os }}-latest
|
27
|
+
|
28
|
+
steps:
|
29
|
+
- uses: actions/checkout@v3
|
30
|
+
- name: Set up Ruby
|
31
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
32
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
33
|
+
# uses: ruby/setup-ruby@v1
|
34
|
+
uses: ruby/setup-ruby@55283cc23133118229fd3f97f9336ee23a179fcf # v1.146.0
|
35
|
+
with:
|
36
|
+
ruby-version: ${{ matrix.ruby-version }}
|
37
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
38
|
+
- name: Run tests
|
39
|
+
run: bundle exec rake spec
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#citesight
|
1
|
+
# citesight
|
2
|
+
|
2
3
|
[](http://badge.fury.io/rb/citesight)
|
3
4
|
[](https://travis-ci.org/soumyaray/citesight)
|
4
5
|
|
@@ -6,17 +7,24 @@ home: [https://github.com/soumyaray/citesight](https://github.com/soumyaray/cite
|
|
6
7
|
Gem to extract and report on citations in an academic text
|
7
8
|
|
8
9
|
## Usage
|
10
|
+
|
9
11
|
This gem may be used as a command line utility or called from code
|
10
12
|
|
11
|
-
|
13
|
+
<!-- TODO: installation instructions -->
|
14
|
+
|
15
|
+
### CLI
|
16
|
+
|
12
17
|
citesight mydoc.txt
|
13
18
|
|
14
|
-
|
19
|
+
<!-- TODO: update usage instructions -->
|
20
|
+
### Code example
|
21
|
+
|
15
22
|
require 'citesight'
|
16
23
|
require 'pp'
|
17
24
|
|
18
25
|
contents = File.read("spec/testfiles/large_test.txt", :encoding => "UTF-8")
|
19
|
-
|
26
|
+
paper = PaperCitations.new(contents)
|
27
|
+
cites = paper.unique_cites
|
20
28
|
|
21
29
|
puts "\nTotal unique citations: #{cites.count}"
|
22
30
|
PP.pp(Hash[cites])
|
@@ -24,5 +32,5 @@ This gem may be used as a command line utility or called from code
|
|
24
32
|
top_cite = cites.sort_by { |_c, count| count}.reverse.first[0]
|
25
33
|
puts "\nYour top citation: #{top_cite}"
|
26
34
|
|
27
|
-
top_cite_indexes =
|
35
|
+
top_cite_indexes = paper.index_of_cite(top_cite)
|
28
36
|
puts "It was cited at locations: #{top_cite_indexes.join(', ')}"
|
data/bin/citesight
CHANGED
@@ -5,12 +5,13 @@ require 'citesight'
|
|
5
5
|
require 'pp'
|
6
6
|
|
7
7
|
# executable requirements: (1) env shebang above; (2) file mode 0755
|
8
|
+
# TODO: Return with error message instead of failing
|
8
9
|
|
9
10
|
fail ArgumentError, "Usage: get_citations [filename]\n" if ARGV.count == 0
|
10
11
|
|
11
12
|
contents = File.open(ARGV[0], 'r').read
|
12
13
|
|
13
|
-
results = CiteSight::PaperCitations.
|
14
|
+
results = CiteSight::PaperCitations.new(contents).unique_cites.sort_by do |c, _|
|
14
15
|
c.downcase
|
15
16
|
end
|
16
17
|
|
data/citesight.gemspec
CHANGED
@@ -7,17 +7,17 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.date = CiteSight::DATE
|
8
8
|
|
9
9
|
s.executables << 'citesight'
|
10
|
-
s.add_development_dependency 'minitest'
|
11
|
-
s.add_development_dependency 'minitest-rg'
|
10
|
+
s.add_development_dependency 'minitest', '~> 5.20'
|
11
|
+
s.add_development_dependency 'minitest-rg', '~> 5.3'
|
12
12
|
|
13
13
|
s.summary = 'Citation extractor and analyzer'
|
14
|
-
s.description = 'Extract and analyze citations from APA style text'
|
14
|
+
s.description = 'Extract and analyze citations from MISQ and APA style text'
|
15
15
|
s.authors = ['Soumya Ray']
|
16
16
|
s.email = 'soumya.ray@gmail.com'
|
17
17
|
|
18
18
|
s.files = `git ls-files`.split("\n")
|
19
19
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
-
|
20
|
+
|
21
21
|
s.homepage = 'https://github.com/soumyaray/citesight'
|
22
22
|
s.license = 'MIT'
|
23
23
|
end
|
@@ -1,16 +1,8 @@
|
|
1
|
-
# This class extracts and counts APA
|
1
|
+
# This class extracts and counts MISQ or APA style citations in a paper.
|
2
2
|
# The unique_cites method returns a hash of citations and counts
|
3
3
|
# in the order in which they were encountered.
|
4
4
|
module CiteSight
|
5
5
|
class PaperCitations
|
6
|
-
def self.unique_cites(contents)
|
7
|
-
new(contents).unique_cites
|
8
|
-
end
|
9
|
-
|
10
|
-
def self.index_of_cite(contents, cite)
|
11
|
-
new(contents).index_of_cite(cite)
|
12
|
-
end
|
13
|
-
|
14
6
|
def initialize(contents)
|
15
7
|
@contents = contents
|
16
8
|
end
|
@@ -39,19 +31,23 @@ module CiteSight
|
|
39
31
|
|
40
32
|
private
|
41
33
|
|
34
|
+
def apostrophe() "\'\u2019" end
|
42
35
|
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
43
|
-
def author() "(#{prefix}?[A-Z][[:alpha:]
|
44
|
-
def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
|
45
|
-
def possessive() "([
|
36
|
+
def author() "(#{prefix}?[A-Z][[:alpha:]#{apostrophe}\-]+)" end
|
37
|
+
def other_authors() "([ ](and|\&)[ ]#{author} | ([ ]et[ ]al.){1})" end
|
38
|
+
def possessive() "([#{apostrophe}]s|[#{apostrophe}])" end
|
46
39
|
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
47
|
-
def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
|
40
|
+
def year(yr) "([,]?[ ][\(]?#{yr}[,\)\;])" end
|
48
41
|
|
49
42
|
def cite_match
|
50
43
|
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
51
44
|
end
|
52
45
|
|
53
46
|
def remove_punctuation(cite)
|
54
|
-
cite
|
47
|
+
cite
|
48
|
+
.gsub(/[\(\),;]|([#{apostrophe}]s)/, '')
|
49
|
+
.gsub(/[#{apostrophe}]\s/, ' ')
|
50
|
+
.gsub(/\&/, 'and')
|
55
51
|
end
|
56
52
|
end
|
57
53
|
end
|
data/lib/citesight/version.rb
CHANGED
data/spec/citesight_spec.rb
CHANGED
@@ -1,23 +1,22 @@
|
|
1
1
|
require 'minitest/autorun'
|
2
2
|
require 'minitest/rg'
|
3
|
-
require './spec/
|
3
|
+
require './spec/spec_helper.rb'
|
4
4
|
|
5
5
|
describe 'Paper', 'A text document' do
|
6
6
|
|
7
7
|
describe 'when there are citations in the text' do
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
it 'should fine the right citations' do
|
13
|
-
@results.must_equal TEST_CITES
|
8
|
+
TEST_RESULTS.each do |cite, count|
|
9
|
+
it "should find the right citation count for #{cite}" do
|
10
|
+
_(TEST_CITES[cite]).must_equal count
|
11
|
+
end
|
14
12
|
end
|
15
13
|
end
|
16
14
|
|
17
15
|
describe 'when there are no citations to be found' do
|
18
16
|
it 'should return an empty hash' do
|
19
17
|
no_cites_txt = 'these are not the citations you are looking for'
|
20
|
-
|
18
|
+
_(CiteSight::PaperCitations.new(no_cites_txt).unique_cites.any?)
|
19
|
+
.must_equal(false)
|
21
20
|
end
|
22
21
|
end
|
23
22
|
end
|
@@ -25,24 +24,17 @@ end
|
|
25
24
|
describe 'Cases', 'Test different citation cases' do
|
26
25
|
TEST_CASES.keys.each do |k|
|
27
26
|
it "can detect #{k}" do
|
28
|
-
CiteSight::PaperCitations.
|
27
|
+
_(CiteSight::PaperCitations.new(TEST_CASES[k]["case"]).unique_cites.to_a)
|
29
28
|
.must_equal(TEST_CASES[k]["result"])
|
30
29
|
end
|
31
30
|
end
|
32
31
|
end
|
33
32
|
|
34
33
|
describe 'Indexes', 'Accurately find index of different citations' do
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
34
|
+
TEST_CITES.map do |cite, _count|
|
35
|
+
it "can find the right index for #{cite}" do
|
36
|
+
_(TEST_INDEXES[cite]).must_equal \
|
37
|
+
CiteSight::PaperCitations.new(TEST_CONTENTS).index_of_cite(cite)
|
39
38
|
end
|
40
39
|
end
|
41
40
|
end
|
42
|
-
|
43
|
-
## Produce hash of all cite indexes:
|
44
|
-
# Hash[
|
45
|
-
# TEST_CITES.map do |cite, count|
|
46
|
-
# [cite, PaperCitations.index_cite(doc, cite)]
|
47
|
-
# end
|
48
|
-
# ]
|
@@ -8,10 +8,10 @@ TEST_CITES =
|
|
8
8
|
"Charlton 2002" => 1,
|
9
9
|
"Griffiths 2000" => 1,
|
10
10
|
"Peters et al. 2007" => 2,
|
11
|
-
"Ma and Agarwal 2007" =>
|
12
|
-
"Hur 2007" =>
|
11
|
+
"Ma and Agarwal 2007" => 3,
|
12
|
+
"Hur 2007" => 3,
|
13
13
|
"Oreg 1995b" => 1,
|
14
|
-
"Ray 2000" =>
|
14
|
+
"Ray 2000" => 4,
|
15
15
|
"Ma et al. 2002" => 3,
|
16
16
|
"Griffins 2000" => 1,
|
17
17
|
"O'Hern 2010" => 1,
|
@@ -27,9 +27,9 @@ TEST_INDEXES =
|
|
27
27
|
"Griffiths 2000"=>[97],
|
28
28
|
"Peters et al. 2007"=>[219, 500],
|
29
29
|
"Ma and Agarwal 2007"=>[174],
|
30
|
-
"Hur 2007"=>[146, 490],
|
30
|
+
"Hur 2007"=>[146, 490, 828],
|
31
31
|
"Oreg 1995b"=>[318],
|
32
|
-
"Ray 2000"=>[346, 432],
|
32
|
+
"Ray 2000"=>[346, 432, 781, 817],
|
33
33
|
"Ma et al. 2002"=>[272, 377, 407],
|
34
34
|
"Griffins 2000"=>[522],
|
35
35
|
"O'Hern 2010"=>[566],
|
@@ -42,3 +42,5 @@ TEST_INDEXES =
|
|
42
42
|
TEST_CASES = File.open('./spec/testfiles/test_cases.json', 'r') do |f|
|
43
43
|
JSON.load(f)
|
44
44
|
end
|
45
|
+
|
46
|
+
TEST_RESULTS = CiteSight::PaperCitations.new(TEST_CONTENTS).unique_cites
|
data/spec/testfiles/test.txt
CHANGED
@@ -11,3 +11,6 @@ Multiple cites: in (Hur 2007, Peters et al.’s 2007, Griffins' 2000) from
|
|
11
11
|
Apostrophes in names: (O'Hern 2010) and Wa'el et al. (1993) or
|
12
12
|
Capitals within names: from McDonald (2003) we gather that
|
13
13
|
Multiword last names: even from van der Aalst (2004) or others (De Boor 1980).
|
14
|
+
APA commas: according to literature (Ray, 2000) the
|
15
|
+
APA multiple cites: (Ray, 2000; Hur, 2007)
|
16
|
+
APA two authors: Ma & Agarwal (2007) agree with this (Ma & Agarwal 2007)
|
metadata
CHANGED
@@ -1,44 +1,44 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: citesight
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Soumya Ray
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-12-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: minitest
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '5.20'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '5.20'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: minitest-rg
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '5.3'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
41
|
-
description: Extract and analyze citations from APA style text
|
40
|
+
version: '5.3'
|
41
|
+
description: Extract and analyze citations from MISQ and APA style text
|
42
42
|
email: soumya.ray@gmail.com
|
43
43
|
executables:
|
44
44
|
- citesight
|
@@ -46,8 +46,8 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- ".bundle/config"
|
49
|
+
- ".github/workflows/ruby.yml"
|
49
50
|
- ".gitignore"
|
50
|
-
- ".travis.yml"
|
51
51
|
- Gemfile
|
52
52
|
- LICENSE
|
53
53
|
- README.md
|
@@ -58,7 +58,7 @@ files:
|
|
58
58
|
- lib/citesight/paper_citations.rb
|
59
59
|
- lib/citesight/version.rb
|
60
60
|
- spec/citesight_spec.rb
|
61
|
-
- spec/
|
61
|
+
- spec/spec_helper.rb
|
62
62
|
- spec/testfiles/large_test.txt
|
63
63
|
- spec/testfiles/large_test_results.txt
|
64
64
|
- spec/testfiles/no_match.txt
|
@@ -68,7 +68,7 @@ homepage: https://github.com/soumyaray/citesight
|
|
68
68
|
licenses:
|
69
69
|
- MIT
|
70
70
|
metadata: {}
|
71
|
-
post_install_message:
|
71
|
+
post_install_message:
|
72
72
|
rdoc_options: []
|
73
73
|
require_paths:
|
74
74
|
- lib
|
@@ -83,14 +83,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
83
83
|
- !ruby/object:Gem::Version
|
84
84
|
version: '0'
|
85
85
|
requirements: []
|
86
|
-
|
87
|
-
|
88
|
-
signing_key:
|
86
|
+
rubygems_version: 3.4.10
|
87
|
+
signing_key:
|
89
88
|
specification_version: 4
|
90
89
|
summary: Citation extractor and analyzer
|
91
90
|
test_files:
|
92
91
|
- spec/citesight_spec.rb
|
93
|
-
- spec/
|
92
|
+
- spec/spec_helper.rb
|
94
93
|
- spec/testfiles/large_test.txt
|
95
94
|
- spec/testfiles/large_test_results.txt
|
96
95
|
- spec/testfiles/no_match.txt
|