citesight 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/citesight.rb +30 -16
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a923e35cd2ee4dfb162449aba2f5c1fb50358efe
|
4
|
+
data.tar.gz: 606aeba079bda3f0ebb5248d22bbfa28b301e5df
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da9e7c7f486d9f04ba1a7e8e046adb90fb53dacb868aceca082a1ec22fa67548c99c6b71c79e1b8749a23d6a79be80845b60147d4d724805802c05937769a0dc
|
7
|
+
data.tar.gz: b63e4978e3e02a876cf8ee09c9a28e0f8049eb68f8b80872abe6902f30df1f734c839397c5563fe2cb5d07bde9383e7256f2a91ae138bb67598e08a52d5b480f
|
data/lib/citesight.rb
CHANGED
@@ -6,36 +6,50 @@ class PaperCitations
|
|
6
6
|
new(contents).unique_cites
|
7
7
|
end
|
8
8
|
|
9
|
+
def self.index_of_cite(contents, cite)
|
10
|
+
new(contents).index_of_cite(cite)
|
11
|
+
end
|
12
|
+
|
9
13
|
def initialize(contents)
|
10
14
|
@contents = contents
|
11
15
|
end
|
12
16
|
|
13
17
|
def unique_cites
|
14
|
-
# clean citations of slashes, commas, semi-colons, possesives
|
15
18
|
clean_cites = @contents.scan(cite_match).map do |c|
|
16
|
-
c[0]
|
19
|
+
remove_punctuation(c[0])
|
17
20
|
end
|
18
21
|
|
19
|
-
|
20
|
-
|
22
|
+
Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# Returns array of citation locations (index) in manuscript
|
27
|
+
#
|
28
|
+
# contents = File.read('./spec/testfiles/test.txt')
|
29
|
+
# PaperCitations.index_cite(contents, 'Peters et al. 2007')
|
30
|
+
# # => [219, 500]
|
31
|
+
def index_of_cite(cite)
|
32
|
+
cite_parts = cite.split
|
33
|
+
author_s = cite_parts.take(cite_parts.size-1).join(' ')
|
34
|
+
year_s = cite_parts.last
|
35
|
+
@contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
|
36
|
+
).map { Regexp.last_match.begin(0) }
|
21
37
|
end
|
22
38
|
|
23
39
|
private
|
24
40
|
|
25
|
-
def prefix
|
26
|
-
|
27
|
-
end
|
41
|
+
def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
|
42
|
+
def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
|
43
|
+
def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
|
44
|
+
def possessive() "([\'\u2019]s|[\'\u2019])" end
|
45
|
+
def year_literal() "[1-2][0-9]{3}[a-z]?" end
|
46
|
+
def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
|
28
47
|
|
29
|
-
def
|
30
|
-
|
48
|
+
def cite_match
|
49
|
+
/( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
|
31
50
|
end
|
32
51
|
|
33
|
-
def
|
34
|
-
/(
|
35
|
-
#{name}{1} # first author
|
36
|
-
([ ]and[ ]#{name} | ([ ]et[ ]al.){1})? # remaining authors
|
37
|
-
([\'\u2019]s|s[\'\u2019])? # possessive form
|
38
|
-
([ ][\(]?[1-2][0-9]{3}[a-z]?[,\)\;]) # year
|
39
|
-
)/x
|
52
|
+
def remove_punctuation(cite)
|
53
|
+
cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
|
40
54
|
end
|
41
55
|
end
|