citesight 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/citesight.rb +30 -16
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 60bfb7a13222d0a5c5e1bdcc55d4533b45c817db
4
- data.tar.gz: 250740d51c6a41f5733d1014b62ef0070b96623e
3
+ metadata.gz: a923e35cd2ee4dfb162449aba2f5c1fb50358efe
4
+ data.tar.gz: 606aeba079bda3f0ebb5248d22bbfa28b301e5df
5
5
  SHA512:
6
- metadata.gz: 8d5c728f567de3370d12fcc07750d4cafe09f5c09ba6502f5560a2a386fee92e1d2059d4464a058c11ee58f9d2a7f25ed311637d2688ca401c098ea18c546e2d
7
- data.tar.gz: 6c80cbc99f513a66925bb1668e2323fa7912ac4fff04d576d1a41d4c842f773a6388286652c87d69b469b388dba103aba777af502c32838372426dfe63235c99
6
+ metadata.gz: da9e7c7f486d9f04ba1a7e8e046adb90fb53dacb868aceca082a1ec22fa67548c99c6b71c79e1b8749a23d6a79be80845b60147d4d724805802c05937769a0dc
7
+ data.tar.gz: b63e4978e3e02a876cf8ee09c9a28e0f8049eb68f8b80872abe6902f30df1f734c839397c5563fe2cb5d07bde9383e7256f2a91ae138bb67598e08a52d5b480f
data/lib/citesight.rb CHANGED
@@ -6,36 +6,50 @@ class PaperCitations
6
6
  new(contents).unique_cites
7
7
  end
8
8
 
9
+ def self.index_of_cite(contents, cite)
10
+ new(contents).index_of_cite(cite)
11
+ end
12
+
9
13
  def initialize(contents)
10
14
  @contents = contents
11
15
  end
12
16
 
13
17
  def unique_cites
14
- # clean citations of slashes, commas, semi-colons, possesives
15
18
  clean_cites = @contents.scan(cite_match).map do |c|
16
- c[0].gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
19
+ remove_punctuation(c[0])
17
20
  end
18
21
 
19
- # create hash of citations (key) with counts (value)
20
- Hash[clean_cites.group_by { |c| c }.map { |k, v| [k, v.count] }]
22
+ Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
23
+ end
24
+
25
+ ##
26
+ # Returns array of citation locations (index) in manuscript
27
+ #
28
+ # contents = File.read('./spec/testfiles/test.txt')
29
+ # PaperCitations.index_cite(contents, 'Peters et al. 2007')
30
+ # # => [219, 500]
31
+ def index_of_cite(cite)
32
+ cite_parts = cite.split
33
+ author_s = cite_parts.take(cite_parts.size-1).join(' ')
34
+ year_s = cite_parts.last
35
+ @contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
36
+ ).map { Regexp.last_match.begin(0) }
21
37
  end
22
38
 
23
39
  private
24
40
 
25
- def prefix
26
- '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)'
27
- end
41
+ def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
42
+ def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
43
+ def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
44
+ def possessive() "([\'\u2019]s|[\'\u2019])" end
45
+ def year_literal() "[1-2][0-9]{3}[a-z]?" end
46
+ def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
28
47
 
29
- def name
30
- "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" # name: caps, accents, 's
48
+ def cite_match
49
+ /( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
31
50
  end
32
51
 
33
- def cite_match
34
- /(
35
- #{name}{1} # first author
36
- ([ ]and[ ]#{name} | ([ ]et[ ]al.){1})? # remaining authors
37
- ([\'\u2019]s|s[\'\u2019])? # possessive form
38
- ([ ][\(]?[1-2][0-9]{3}[a-z]?[,\)\;]) # year
39
- )/x
52
+ def remove_punctuation(cite)
53
+ cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
40
54
  end
41
55
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: citesight
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Soumya Ray