citesight 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/citesight +18 -0
- data/lib/citesight.rb +41 -0
- metadata +74 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 60b2a4cc5aaf0a5c7b4f29aa1f7adc0912e2d83a
|
4
|
+
data.tar.gz: 38770a64ec74e3bd40eddc6bb05ad3911a9ca058
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f8745a2261b85156d191068357b10210c82fabd510a7a6206a3cfb27a9643b325a7265b5eb88aba846c18b4e82179e640be0f46b66d79d413860db9bd80226c1
|
7
|
+
data.tar.gz: 42cb14f320069779e39427bc94dd745e7703a67b33793f32410b456a6dbca0687742e629f15f93e8a2cab64e0cbbfca7264242cceabe2c4afc731c5214c13b05
|
data/bin/citesight
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'citesight'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
# executable requirements: (1) env shebang above; (2) file mode 0755
|
7
|
+
|
8
|
+
fail ArgumentError, "Usage: get_citations [filename]\n" if ARGV.count == 0
|
9
|
+
|
10
|
+
contents = File.open(ARGV[0], 'r').read
|
11
|
+
|
12
|
+
results = PaperCitations.unique_cites(contents).sort_by { |c, _| c.downcase }
|
13
|
+
|
14
|
+
puts "Top citations:\n"
|
15
|
+
PP.pp(Hash[results.sort_by { |_cite, count| count }.reverse.take(5)])
|
16
|
+
|
17
|
+
puts "\nTotal unique citations: #{results.count}"
|
18
|
+
PP.pp(Hash[results])
|
data/lib/citesight.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# This class extracts and counts APA-style citations in a paper.
|
2
|
+
# The unique_cites method returns a hash of citations and counts
|
3
|
+
# in the order in which they were encountered.
|
4
|
+
class PaperCitations
|
5
|
+
def self.unique_cites(contents)
|
6
|
+
new(contents).unique_cites
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(contents)
|
10
|
+
@contents = contents
|
11
|
+
end
|
12
|
+
|
13
|
+
def unique_cites
|
14
|
+
# clean citations of slashes, commas, semi-colons, possesives
|
15
|
+
clean_cites = @contents.scan(cite_match).map do |c|
|
16
|
+
c[0].gsub(/[\(\),;]|([\'\’]s)/, '').gsub(/[\'\’]\s/, ' ')
|
17
|
+
end
|
18
|
+
|
19
|
+
# create hash of citations (key) with counts (value)
|
20
|
+
Hash[clean_cites.group_by { |c| c }.map { |k, v| [k, v.count] }]
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def prefix
|
26
|
+
'(([dD]e|[vV]an[ ]?[dD]er)[ ]?)'
|
27
|
+
end
|
28
|
+
|
29
|
+
def name
|
30
|
+
"(#{prefix}?[A-Z][[:alpha:]\'\’\-]+)" # name: caps, accents, 's
|
31
|
+
end
|
32
|
+
|
33
|
+
def cite_match
|
34
|
+
/(
|
35
|
+
#{name}{1} # first author
|
36
|
+
([ ]and[ ]#{name} | ([ ]et[ ]al.){1})? # remaining authors
|
37
|
+
([\'\’]s|s[\'\’])? # possessive form
|
38
|
+
([ ][\(]?[1-2][0-9]{3}[a-z]?[,\)\;]) # year
|
39
|
+
)/x
|
40
|
+
end
|
41
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: citesight
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Soumya Ray
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-27 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: minitest
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest-rg
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Extract and analyze citations from APA style paper
|
42
|
+
email: soumya.ray@gmail.com
|
43
|
+
executables:
|
44
|
+
- citesight
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- lib/citesight.rb
|
49
|
+
- bin/citesight
|
50
|
+
homepage: https://github.com/soumyaray/citesight
|
51
|
+
licenses:
|
52
|
+
- MIT
|
53
|
+
metadata: {}
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
requirements: []
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 2.1.11
|
71
|
+
signing_key:
|
72
|
+
specification_version: 4
|
73
|
+
summary: Citation extractor and analyzer
|
74
|
+
test_files: []
|