citesight 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/citesight +18 -0
  3. data/lib/citesight.rb +41 -0
  4. metadata +74 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 60b2a4cc5aaf0a5c7b4f29aa1f7adc0912e2d83a
4
+ data.tar.gz: 38770a64ec74e3bd40eddc6bb05ad3911a9ca058
5
+ SHA512:
6
+ metadata.gz: f8745a2261b85156d191068357b10210c82fabd510a7a6206a3cfb27a9643b325a7265b5eb88aba846c18b4e82179e640be0f46b66d79d413860db9bd80226c1
7
+ data.tar.gz: 42cb14f320069779e39427bc94dd745e7703a67b33793f32410b456a6dbca0687742e629f15f93e8a2cab64e0cbbfca7264242cceabe2c4afc731c5214c13b05
data/bin/citesight ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'citesight'
4
+ require 'pp'
5
+
6
+ # executable requirements: (1) env shebang above; (2) file mode 0755
7
+
8
+ fail ArgumentError, "Usage: get_citations [filename]\n" if ARGV.count == 0
9
+
10
+ contents = File.open(ARGV[0], 'r').read
11
+
12
+ results = PaperCitations.unique_cites(contents).sort_by { |c, _| c.downcase }
13
+
14
+ puts "Top citations:\n"
15
+ PP.pp(Hash[results.sort_by { |_cite, count| count }.reverse.take(5)])
16
+
17
+ puts "\nTotal unique citations: #{results.count}"
18
+ PP.pp(Hash[results])
data/lib/citesight.rb ADDED
@@ -0,0 +1,41 @@
1
+ # This class extracts and counts APA-style citations in a paper.
2
+ # The unique_cites method returns a hash of citations and counts
3
+ # in the order in which they were encountered.
4
+ class PaperCitations
5
+ def self.unique_cites(contents)
6
+ new(contents).unique_cites
7
+ end
8
+
9
+ def initialize(contents)
10
+ @contents = contents
11
+ end
12
+
13
+ def unique_cites
14
+ # clean citations of slashes, commas, semi-colons, possesives
15
+ clean_cites = @contents.scan(cite_match).map do |c|
16
+ c[0].gsub(/[\(\),;]|([\'\’]s)/, '').gsub(/[\'\’]\s/, ' ')
17
+ end
18
+
19
+ # create hash of citations (key) with counts (value)
20
+ Hash[clean_cites.group_by { |c| c }.map { |k, v| [k, v.count] }]
21
+ end
22
+
23
+ private
24
+
25
+ def prefix
26
+ '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)'
27
+ end
28
+
29
+ def name
30
+ "(#{prefix}?[A-Z][[:alpha:]\'\’\-]+)" # name: caps, accents, 's
31
+ end
32
+
33
+ def cite_match
34
+ /(
35
+ #{name}{1} # first author
36
+ ([ ]and[ ]#{name} | ([ ]et[ ]al.){1})? # remaining authors
37
+ ([\'\’]s|s[\'\’])? # possessive form
38
+ ([ ][\(]?[1-2][0-9]{3}[a-z]?[,\)\;]) # year
39
+ )/x
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: citesight
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Soumya Ray
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest-rg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Extract and analyze citations from APA style paper
42
+ email: soumya.ray@gmail.com
43
+ executables:
44
+ - citesight
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - lib/citesight.rb
49
+ - bin/citesight
50
+ homepage: https://github.com/soumyaray/citesight
51
+ licenses:
52
+ - MIT
53
+ metadata: {}
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.1.11
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: Citation extractor and analyzer
74
+ test_files: []