citesight 0.1.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 7e11117f9c3c2e1eacd42126bc7a97ed7841ee28
4
- data.tar.gz: a0dfd14a313ec7ece65bed421eb335fdda9427de
2
+ SHA256:
3
+ metadata.gz: f8bbbcea21b92c660093f84144abd1c113ea3632d766759d3fd323b7b3dc5cf7
4
+ data.tar.gz: fd1b56b1ab356e63d2519844deccd8685b967edfc1fa8995c039e71a8c8bbef9
5
5
  SHA512:
6
- metadata.gz: ace1dc22e05ee5eebe3383d075d8965ab503209bbd14dd8d72ca042aff3608c7497404fd1d5acebe5317c35b5e6c973604f527665f7425cbfa26503436da5b3e
7
- data.tar.gz: 49ab665d7ea38767d508681707a15c0c6063dad04df974354e352a859311e4bf6b2b00b9c673391e0b5a6498102a777ac76c91a3fe4106ce4ea78f58bc15bba2
6
+ metadata.gz: b638261f8e8ec4fbfc3e685fca5e223d5949078c13d62cd240a2b0819eaca0be500f1748c0f9638fc00923b11a2158f48aedc5ff0ae6d862b418883067fb0a1b
7
+ data.tar.gz: '079afeb512af5a7eff458a2cfa23d50892a871380058c3e5dcdc8218ee8326250fd6b620a4565cd4e503f623c7c37e725972bdbe266b2c20958a8ad987e005f8'
@@ -0,0 +1,39 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Ruby
9
+
10
+ on:
11
+ push:
12
+ branches: [ "main", "develop" ]
13
+ pull_request:
14
+ branches: '**'
15
+
16
+ permissions:
17
+ contents: read
18
+
19
+ jobs:
20
+ test:
21
+
22
+ strategy:
23
+ matrix:
24
+ os: [ubuntu, macos]
25
+ ruby-version: ['3.0', '3.1', '3.2']
26
+ runs-on: ${{ matrix.os }}-latest
27
+
28
+ steps:
29
+ - uses: actions/checkout@v3
30
+ - name: Set up Ruby
31
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
32
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
33
+ # uses: ruby/setup-ruby@v1
34
+ uses: ruby/setup-ruby@55283cc23133118229fd3f97f9336ee23a179fcf # v1.146.0
35
+ with:
36
+ ruby-version: ${{ matrix.ruby-version }}
37
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
38
+ - name: Run tests
39
+ run: bundle exec rake spec
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
- #citesight
1
+ # citesight
2
+
2
3
  [![Gem Version](https://badge.fury.io/rb/citesight.svg)](http://badge.fury.io/rb/citesight)
3
4
  [![Build Status](https://travis-ci.org/soumyaray/citesight.svg?branch=master)](https://travis-ci.org/soumyaray/citesight)
4
5
 
@@ -6,17 +7,24 @@ home: [https://github.com/soumyaray/citesight](https://github.com/soumyaray/cite
6
7
  Gem to extract and report on citations in an academic text
7
8
 
8
9
  ## Usage
10
+
9
11
  This gem may be used as a command line utility or called from code
10
12
 
11
- ### CLI:
13
+ <!-- TODO: installation instructions -->
14
+
15
+ ### CLI
16
+
12
17
  citesight mydoc.txt
13
18
 
14
- ### Code example:
19
+ <!-- TODO: update usage instructions -->
20
+ ### Code example
21
+
15
22
  require 'citesight'
16
23
  require 'pp'
17
24
 
18
25
  contents = File.read("spec/testfiles/large_test.txt", :encoding => "UTF-8")
19
- cites = PaperCitations.unique_cites(contents)
26
+ paper = PaperCitations.new(contents)
27
+ cites = paper.unique_cites
20
28
 
21
29
  puts "\nTotal unique citations: #{cites.count}"
22
30
  PP.pp(Hash[cites])
@@ -24,5 +32,5 @@ This gem may be used as a command line utility or called from code
24
32
  top_cite = cites.sort_by { |_c, count| count}.reverse.first[0]
25
33
  puts "\nYour top citation: #{top_cite}"
26
34
 
27
- top_cite_indexes = PaperCitations.index_of_cite(contents, top_cite)
35
+ top_cite_indexes = paper.index_of_cite(top_cite)
28
36
  puts "It was cited at locations: #{top_cite_indexes.join(', ')}"
data/bin/citesight CHANGED
@@ -5,12 +5,13 @@ require 'citesight'
5
5
  require 'pp'
6
6
 
7
7
  # executable requirements: (1) env shebang above; (2) file mode 0755
8
+ # TODO: Return with error message instead of failing
8
9
 
9
10
  fail ArgumentError, "Usage: get_citations [filename]\n" if ARGV.count == 0
10
11
 
11
12
  contents = File.open(ARGV[0], 'r').read
12
13
 
13
- results = CiteSight::PaperCitations.unique_cites(contents).sort_by do |c, _|
14
+ results = CiteSight::PaperCitations.new(contents).unique_cites.sort_by do |c, _|
14
15
  c.downcase
15
16
  end
16
17
 
data/citesight.gemspec CHANGED
@@ -7,17 +7,17 @@ Gem::Specification.new do |s|
7
7
  s.date = CiteSight::DATE
8
8
 
9
9
  s.executables << 'citesight'
10
- s.add_development_dependency 'minitest'
11
- s.add_development_dependency 'minitest-rg'
10
+ s.add_development_dependency 'minitest', '~> 5.20'
11
+ s.add_development_dependency 'minitest-rg', '~> 5.3'
12
12
 
13
13
  s.summary = 'Citation extractor and analyzer'
14
- s.description = 'Extract and analyze citations from APA style text'
14
+ s.description = 'Extract and analyze citations from MISQ and APA style text'
15
15
  s.authors = ['Soumya Ray']
16
16
  s.email = 'soumya.ray@gmail.com'
17
17
 
18
18
  s.files = `git ls-files`.split("\n")
19
19
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
-
20
+
21
21
  s.homepage = 'https://github.com/soumyaray/citesight'
22
22
  s.license = 'MIT'
23
23
  end
@@ -1,16 +1,8 @@
1
- # This class extracts and counts APA-style citations in a paper.
1
+ # This class extracts and counts MISQ or APA style citations in a paper.
2
2
  # The unique_cites method returns a hash of citations and counts
3
3
  # in the order in which they were encountered.
4
4
  module CiteSight
5
5
  class PaperCitations
6
- def self.unique_cites(contents)
7
- new(contents).unique_cites
8
- end
9
-
10
- def self.index_of_cite(contents, cite)
11
- new(contents).index_of_cite(cite)
12
- end
13
-
14
6
  def initialize(contents)
15
7
  @contents = contents
16
8
  end
@@ -39,19 +31,23 @@ module CiteSight
39
31
 
40
32
  private
41
33
 
34
+ def apostrophe() "\'\u2019" end
42
35
  def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
43
- def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
44
- def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
45
- def possessive() "([\'\u2019]s|[\'\u2019])" end
36
+ def author() "(#{prefix}?[A-Z][[:alpha:]#{apostrophe}\-]+)" end
37
+ def other_authors() "([ ](and|\&)[ ]#{author} | ([ ]et[ ]al.){1})" end
38
+ def possessive() "([#{apostrophe}]s|[#{apostrophe}])" end
46
39
  def year_literal() "[1-2][0-9]{3}[a-z]?" end
47
- def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
40
+ def year(yr) "([,]?[ ][\(]?#{yr}[,\)\;])" end
48
41
 
49
42
  def cite_match
50
43
  /( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
51
44
  end
52
45
 
53
46
  def remove_punctuation(cite)
54
- cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
47
+ cite
48
+ .gsub(/[\(\),;]|([#{apostrophe}]s)/, '')
49
+ .gsub(/[#{apostrophe}]\s/, ' ')
50
+ .gsub(/\&/, 'and')
55
51
  end
56
52
  end
57
53
  end
@@ -1,4 +1,4 @@
1
1
  module CiteSight
2
- VERSION = '0.1.2'
3
- DATE = '2016-05-10'
2
+ VERSION = '1.0.0'
3
+ DATE = '2023-12-27'
4
4
  end
@@ -1,23 +1,22 @@
1
1
  require 'minitest/autorun'
2
2
  require 'minitest/rg'
3
- require './spec/minitest_helper.rb'
3
+ require './spec/spec_helper.rb'
4
4
 
5
5
  describe 'Paper', 'A text document' do
6
6
 
7
7
  describe 'when there are citations in the text' do
8
- before do
9
- @results = CiteSight::PaperCitations.unique_cites(TEST_CONTENTS)
10
- end
11
-
12
- it 'should fine the right citations' do
13
- @results.must_equal TEST_CITES
8
+ TEST_RESULTS.each do |cite, count|
9
+ it "should find the right citation count for #{cite}" do
10
+ _(TEST_CITES[cite]).must_equal count
11
+ end
14
12
  end
15
13
  end
16
14
 
17
15
  describe 'when there are no citations to be found' do
18
16
  it 'should return an empty hash' do
19
17
  no_cites_txt = 'these are not the citations you are looking for'
20
- # TODO: check return of empty hash
18
+ _(CiteSight::PaperCitations.new(no_cites_txt).unique_cites.any?)
19
+ .must_equal(false)
21
20
  end
22
21
  end
23
22
  end
@@ -25,24 +24,17 @@ end
25
24
  describe 'Cases', 'Test different citation cases' do
26
25
  TEST_CASES.keys.each do |k|
27
26
  it "can detect #{k}" do
28
- CiteSight::PaperCitations.unique_cites(TEST_CASES[k]["case"]).to_a\
27
+ _(CiteSight::PaperCitations.new(TEST_CASES[k]["case"]).unique_cites.to_a)
29
28
  .must_equal(TEST_CASES[k]["result"])
30
29
  end
31
30
  end
32
31
  end
33
32
 
34
33
  describe 'Indexes', 'Accurately find index of different citations' do
35
- it "can find the right index for all citations" do
36
- TEST_CITES.map do |cite, _count|
37
- TEST_INDEXES[cite].must_equal \
38
- CiteSight::PaperCitations.index_of_cite(TEST_CONTENTS, cite)
34
+ TEST_CITES.map do |cite, _count|
35
+ it "can find the right index for #{cite}" do
36
+ _(TEST_INDEXES[cite]).must_equal \
37
+ CiteSight::PaperCitations.new(TEST_CONTENTS).index_of_cite(cite)
39
38
  end
40
39
  end
41
40
  end
42
-
43
- ## Produce hash of all cite indexes:
44
- # Hash[
45
- # TEST_CITES.map do |cite, count|
46
- # [cite, PaperCitations.index_cite(doc, cite)]
47
- # end
48
- # ]
@@ -8,10 +8,10 @@ TEST_CITES =
8
8
  "Charlton 2002" => 1,
9
9
  "Griffiths 2000" => 1,
10
10
  "Peters et al. 2007" => 2,
11
- "Ma and Agarwal 2007" => 1,
12
- "Hur 2007" => 2,
11
+ "Ma and Agarwal 2007" => 3,
12
+ "Hur 2007" => 3,
13
13
  "Oreg 1995b" => 1,
14
- "Ray 2000" => 2,
14
+ "Ray 2000" => 4,
15
15
  "Ma et al. 2002" => 3,
16
16
  "Griffins 2000" => 1,
17
17
  "O'Hern 2010" => 1,
@@ -27,9 +27,9 @@ TEST_INDEXES =
27
27
  "Griffiths 2000"=>[97],
28
28
  "Peters et al. 2007"=>[219, 500],
29
29
  "Ma and Agarwal 2007"=>[174],
30
- "Hur 2007"=>[146, 490],
30
+ "Hur 2007"=>[146, 490, 828],
31
31
  "Oreg 1995b"=>[318],
32
- "Ray 2000"=>[346, 432],
32
+ "Ray 2000"=>[346, 432, 781, 817],
33
33
  "Ma et al. 2002"=>[272, 377, 407],
34
34
  "Griffins 2000"=>[522],
35
35
  "O'Hern 2010"=>[566],
@@ -42,3 +42,5 @@ TEST_INDEXES =
42
42
  TEST_CASES = File.open('./spec/testfiles/test_cases.json', 'r') do |f|
43
43
  JSON.load(f)
44
44
  end
45
+
46
+ TEST_RESULTS = CiteSight::PaperCitations.new(TEST_CONTENTS).unique_cites
@@ -11,3 +11,6 @@ Multiple cites: in (Hur 2007, Peters et al.’s 2007, Griffins' 2000) from
11
11
  Apostrophes in names: (O'Hern 2010) and Wa'el et al. (1993) or
12
12
  Capitals within names: from McDonald (2003) we gather that
13
13
  Multiword last names: even from van der Aalst (2004) or others (De Boor 1980).
14
+ APA commas: according to literature (Ray, 2000) the
15
+ APA multiple cites: (Ray, 2000; Hur, 2007)
16
+ APA two authors: Ma & Agarwal (2007) agree with this (Ma & Agarwal 2007)
metadata CHANGED
@@ -1,44 +1,44 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: citesight
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Soumya Ray
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-10 00:00:00.000000000 Z
11
+ date: 2023-12-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: minitest
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '5.20'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '5.20'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: minitest-rg
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '5.3'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
41
- description: Extract and analyze citations from APA style text
40
+ version: '5.3'
41
+ description: Extract and analyze citations from MISQ and APA style text
42
42
  email: soumya.ray@gmail.com
43
43
  executables:
44
44
  - citesight
@@ -46,8 +46,8 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - ".bundle/config"
49
+ - ".github/workflows/ruby.yml"
49
50
  - ".gitignore"
50
- - ".travis.yml"
51
51
  - Gemfile
52
52
  - LICENSE
53
53
  - README.md
@@ -58,7 +58,7 @@ files:
58
58
  - lib/citesight/paper_citations.rb
59
59
  - lib/citesight/version.rb
60
60
  - spec/citesight_spec.rb
61
- - spec/minitest_helper.rb
61
+ - spec/spec_helper.rb
62
62
  - spec/testfiles/large_test.txt
63
63
  - spec/testfiles/large_test_results.txt
64
64
  - spec/testfiles/no_match.txt
@@ -68,7 +68,7 @@ homepage: https://github.com/soumyaray/citesight
68
68
  licenses:
69
69
  - MIT
70
70
  metadata: {}
71
- post_install_message:
71
+ post_install_message:
72
72
  rdoc_options: []
73
73
  require_paths:
74
74
  - lib
@@ -83,14 +83,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubyforge_project:
87
- rubygems_version: 2.5.1
88
- signing_key:
86
+ rubygems_version: 3.4.10
87
+ signing_key:
89
88
  specification_version: 4
90
89
  summary: Citation extractor and analyzer
91
90
  test_files:
92
91
  - spec/citesight_spec.rb
93
- - spec/minitest_helper.rb
92
+ - spec/spec_helper.rb
94
93
  - spec/testfiles/large_test.txt
95
94
  - spec/testfiles/large_test_results.txt
96
95
  - spec/testfiles/no_match.txt
data/.travis.yml DELETED
@@ -1,10 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - ruby-head
4
- - 2.1.0
5
-
6
- branches:
7
- only:
8
- - master
9
- - develop
10
- - /^rel-.*$/