citesight 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/citesight.rb +30 -16
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a923e35cd2ee4dfb162449aba2f5c1fb50358efe
|
4
|
+
data.tar.gz: 606aeba079bda3f0ebb5248d22bbfa28b301e5df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da9e7c7f486d9f04ba1a7e8e046adb90fb53dacb868aceca082a1ec22fa67548c99c6b71c79e1b8749a23d6a79be80845b60147d4d724805802c05937769a0dc
|
7
|
+
data.tar.gz: b63e4978e3e02a876cf8ee09c9a28e0f8049eb68f8b80872abe6902f30df1f734c839397c5563fe2cb5d07bde9383e7256f2a91ae138bb67598e08a52d5b480f
|
data/lib/citesight.rb
CHANGED
@@ -6,36 +6,50 @@ class PaperCitations
|
|
6
6
|
new(contents).unique_cites
|
7
7
|
end
|
8
8
|
|
9
|
+
def self.index_of_cite(contents, cite)
|
10
|
+
new(contents).index_of_cite(cite)
|
11
|
+
end
|
12
|
+
|
9
13
|
def initialize(contents)
|
10
14
|
@contents = contents
|
11
15
|
end
|
12
16
|
|
13
17
|
def unique_cites
|
14
|
-
# clean citations of slashes, commas, semi-colons, possesives
|
15
18
|
clean_cites = @contents.scan(cite_match).map do |c|
|
16
|
-
c[0]
|
19
|
+
remove_punctuation(c[0])
|
17
20
|
end
|
18
21
|
|
19
|
-
|
20
|
-
|
22
|
+
Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# Returns array of citation locations (index) in manuscript
|
27
|
+
#
|
28
|
+
# contents = File.read('./spec/testfiles/test.txt')
|
29
|
+
# PaperCitations.index_cite(contents, 'Peters et al. 2007')
|
30
|
+
# # => [219, 500]
|
31
|
+
def index_of_cite(cite)
|
32
|
+
cite_parts = cite.split
|
33
|
+
author_s = cite_parts.take(cite_parts.size-1).join(' ')
|
34
|
+
year_s = cite_parts.last
|
35
|
+
@contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
|
36
|
+
).map { Regexp.last_match.begin(0) }
|
21
37
|
end
|
22
38
|
|
23
39
|
private
|
24
40
|
|
25
|
-
def prefix
|
26
|
-
|
27
|
-
end
|
41
|
+
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
42
|
+
def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
|
43
|
+
def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
|
44
|
+
def possessive() "([\'\u2019]s|[\'\u2019])" end
|
45
|
+
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
46
|
+
def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
|
28
47
|
|
29
|
-
def
|
30
|
-
|
48
|
+
def cite_match
|
49
|
+
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
31
50
|
end
|
32
51
|
|
33
|
-
def
|
34
|
-
/(
|
35
|
-
#{name}{1} # first author
|
36
|
-
([ ]and[ ]#{name} | ([ ]et[ ]al.){1})? # remaining authors
|
37
|
-
([\'\u2019]s|s[\'\u2019])? # possessive form
|
38
|
-
([ ][\(]?[1-2][0-9]{3}[a-z]?[,\)\;]) # year
|
39
|
-
)/x
|
52
|
+
def remove_punctuation(cite)
|
53
|
+
cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
|
40
54
|
end
|
41
55
|
end
|