citesight 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/citesight.rb +30 -16
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 60bfb7a13222d0a5c5e1bdcc55d4533b45c817db
4
- data.tar.gz: 250740d51c6a41f5733d1014b62ef0070b96623e
3
+ metadata.gz: a923e35cd2ee4dfb162449aba2f5c1fb50358efe
4
+ data.tar.gz: 606aeba079bda3f0ebb5248d22bbfa28b301e5df
5
5
  SHA512:
6
- metadata.gz: 8d5c728f567de3370d12fcc07750d4cafe09f5c09ba6502f5560a2a386fee92e1d2059d4464a058c11ee58f9d2a7f25ed311637d2688ca401c098ea18c546e2d
7
- data.tar.gz: 6c80cbc99f513a66925bb1668e2323fa7912ac4fff04d576d1a41d4c842f773a6388286652c87d69b469b388dba103aba777af502c32838372426dfe63235c99
6
+ metadata.gz: da9e7c7f486d9f04ba1a7e8e046adb90fb53dacb868aceca082a1ec22fa67548c99c6b71c79e1b8749a23d6a79be80845b60147d4d724805802c05937769a0dc
7
+ data.tar.gz: b63e4978e3e02a876cf8ee09c9a28e0f8049eb68f8b80872abe6902f30df1f734c839397c5563fe2cb5d07bde9383e7256f2a91ae138bb67598e08a52d5b480f
data/lib/citesight.rb CHANGED
@@ -6,36 +6,50 @@ class PaperCitations
6
6
  new(contents).unique_cites
7
7
  end
8
8
 
9
+ def self.index_of_cite(contents, cite)
10
+ new(contents).index_of_cite(cite)
11
+ end
12
+
9
13
  def initialize(contents)
10
14
  @contents = contents
11
15
  end
12
16
 
13
17
  def unique_cites
14
- # clean citations of slashes, commas, semi-colons, possesives
15
18
  clean_cites = @contents.scan(cite_match).map do |c|
16
- c[0].gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
19
+ remove_punctuation(c[0])
17
20
  end
18
21
 
19
- # create hash of citations (key) with counts (value)
20
- Hash[clean_cites.group_by { |c| c }.map { |k, v| [k, v.count] }]
22
+ Hash[clean_cites.group_by { |c| c }.map { |cit, num| [cit, num.count] }]
23
+ end
24
+
25
+ ##
26
+ # Returns array of citation locations (index) in manuscript
27
+ #
28
+ # contents = File.read('./spec/testfiles/test.txt')
29
+ # PaperCitations.index_cite(contents, 'Peters et al. 2007')
30
+ # # => [219, 500]
31
+ def index_of_cite(cite)
32
+ cite_parts = cite.split
33
+ author_s = cite_parts.take(cite_parts.size-1).join(' ')
34
+ year_s = cite_parts.last
35
+ @contents.enum_for(:scan, /(#{author_s}#{possessive}?#{year(year_s)})/
36
+ ).map { Regexp.last_match.begin(0) }
21
37
  end
22
38
 
23
39
  private
24
40
 
25
- def prefix
26
- '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)'
27
- end
41
+ def prefix() '(([dD]e|[vV]an[ ]?[dD]er)[ ]?)' end
42
+ def author() "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" end
43
+ def other_authors() "([ ]and[ ]#{author} | ([ ]et[ ]al.){1})" end
44
+ def possessive() "([\'\u2019]s|[\'\u2019])" end
45
+ def year_literal() "[1-2][0-9]{3}[a-z]?" end
46
+ def year(yr) "([ ][\(]?#{yr}[,\)\;])" end
28
47
 
29
- def name
30
- "(#{prefix}?[A-Z][[:alpha:]\'\u2019\-]+)" # name: caps, accents, 's
48
+ def cite_match
49
+ /( #{author}{1}#{other_authors}?#{possessive}?#{year(year_literal)} )/x
31
50
  end
32
51
 
33
- def cite_match
34
- /(
35
- #{name}{1} # first author
36
- ([ ]and[ ]#{name} | ([ ]et[ ]al.){1})? # remaining authors
37
- ([\'\u2019]s|s[\'\u2019])? # possessive form
38
- ([ ][\(]?[1-2][0-9]{3}[a-z]?[,\)\;]) # year
39
- )/x
52
+ def remove_punctuation(cite)
53
+ cite.gsub(/[\(\),;]|([\'\u2019]s)/, '').gsub(/[\'\u2019]\s/, ' ')
40
54
  end
41
55
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: citesight
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Soumya Ray