rbtagger 0.4.2 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/brill/tagger.rb +28 -0
- data/lib/rbtagger/version.rb +1 -1
- metadata +1 -1
data/lib/brill/tagger.rb
CHANGED
@@ -26,6 +26,34 @@ module Brill
|
|
26
26
|
tag(text).select{|t| t.last.match(/NN/) }
|
27
27
|
end
|
28
28
|
|
29
|
+
# see: http://cpansearch.perl.org/src/ACOBURN/Lingua-EN-Tagger-0.15/Tagger.pm
|
30
|
+
def noun_phrases(text)
|
31
|
+
# ?:$PREP|$DET|$NUM)
|
32
|
+
#
|
33
|
+
tags = tag(text.gsub(/[^\w]/,' '))
|
34
|
+
phrases = []
|
35
|
+
phrase = []
|
36
|
+
mark = -1
|
37
|
+
|
38
|
+
tags.each_with_index do|tag,i|
|
39
|
+
if phrase.empty?
|
40
|
+
mark = i if tag.last.match(/PRP\$|DT/)
|
41
|
+
if tag.last == 'NNP' and mark != -1
|
42
|
+
phrase = [ tags[mark..i] ]
|
43
|
+
#mark = -1
|
44
|
+
end
|
45
|
+
mark = -1 if i - mark > 8
|
46
|
+
elsif tag.last.match(/NN/)
|
47
|
+
phrase << tag
|
48
|
+
else
|
49
|
+
phrases << phrase
|
50
|
+
phrase = []
|
51
|
+
mark = -1
|
52
|
+
end
|
53
|
+
end
|
54
|
+
phrases
|
55
|
+
end
|
56
|
+
|
29
57
|
# returns similar results as tag, but further reduced by only selecting nouns
|
30
58
|
def suggest( text, max = 10 )
|
31
59
|
tags = tag(text)
|
data/lib/rbtagger/version.rb
CHANGED