rbtagger 0.4.2 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/brill/tagger.rb CHANGED
@@ -26,6 +26,34 @@ module Brill
26
26
  tag(text).select{|t| t.last.match(/NN/) }
27
27
  end
28
28
 
29
+ # see: http://cpansearch.perl.org/src/ACOBURN/Lingua-EN-Tagger-0.15/Tagger.pm
30
+ def noun_phrases(text)
31
+ # ?:$PREP|$DET|$NUM)
32
+ #
33
+ tags = tag(text.gsub(/[^\w]/,' '))
34
+ phrases = []
35
+ phrase = []
36
+ mark = -1
37
+
38
+ tags.each_with_index do|tag,i|
39
+ if phrase.empty?
40
+ mark = i if tag.last.match(/PRP\$|DT/)
41
+ if tag.last == 'NNP' and mark != -1
42
+ phrase = [ tags[mark..i] ]
43
+ #mark = -1
44
+ end
45
+ mark = -1 if i - mark > 8
46
+ elsif tag.last.match(/NN/)
47
+ phrase << tag
48
+ else
49
+ phrases << phrase
50
+ phrase = []
51
+ mark = -1
52
+ end
53
+ end
54
+ phrases
55
+ end
56
+
29
57
  # returns similar results as tag, but further reduced by only selecting nouns
30
58
  def suggest( text, max = 10 )
31
59
  tags = tag(text)
@@ -2,7 +2,7 @@ module RbTagger #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 4
5
- TINY = 2
5
+ TINY = 3
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbtagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Todd A. Fisher