treat 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +0 -0
- data/LICENSE +28 -0
- data/README +0 -0
- data/TODO +67 -0
- data/bin/INFO +1 -0
- data/examples/benchmark.rb +81 -0
- data/examples/keywords.rb +60 -0
- data/examples/texts/bugged_out.txt +26 -0
- data/examples/texts/half_cocked_basel.txt +16 -0
- data/examples/texts/hedge_funds.txt +24 -0
- data/examples/texts/hose_and_dry.txt +19 -0
- data/examples/texts/hungarys_troubles.txt +46 -0
- data/examples/texts/indias_slowdown.txt +15 -0
- data/examples/texts/merkozy_rides_again.txt +24 -0
- data/examples/texts/prada_is_not_walmart.txt +9 -0
- data/examples/texts/republican_nomination.txt +26 -0
- data/examples/texts/to_infinity_and_beyond.txt +15 -0
- data/lib/treat.rb +91 -0
- data/lib/treat/buildable.rb +115 -0
- data/lib/treat/categories.rb +29 -0
- data/lib/treat/category.rb +28 -0
- data/lib/treat/delegatable.rb +90 -0
- data/lib/treat/detectors.rb +28 -0
- data/lib/treat/detectors/encoding/native.rb +12 -0
- data/lib/treat/detectors/encoding/r_chardet19.rb +24 -0
- data/lib/treat/detectors/format/file.rb +36 -0
- data/lib/treat/detectors/language/language_detector.rb +19 -0
- data/lib/treat/detectors/language/what_language.rb +29 -0
- data/lib/treat/entities.rb +52 -0
- data/lib/treat/entities/collection.rb +19 -0
- data/lib/treat/entities/constituents.rb +15 -0
- data/lib/treat/entities/document.rb +11 -0
- data/lib/treat/entities/entity.rb +242 -0
- data/lib/treat/entities/sentence.rb +8 -0
- data/lib/treat/entities/text.rb +7 -0
- data/lib/treat/entities/tokens.rb +37 -0
- data/lib/treat/entities/zones.rb +17 -0
- data/lib/treat/exception.rb +5 -0
- data/lib/treat/extractors.rb +41 -0
- data/lib/treat/extractors/key_sentences/topics_frequency.rb +49 -0
- data/lib/treat/extractors/named_entity/abner.rb +20 -0
- data/lib/treat/extractors/named_entity/stanford.rb +174 -0
- data/lib/treat/extractors/statistics/frequency.rb +22 -0
- data/lib/treat/extractors/statistics/frequency_of.rb +17 -0
- data/lib/treat/extractors/statistics/position_in.rb +13 -0
- data/lib/treat/extractors/statistics/transition_matrix.rb +105 -0
- data/lib/treat/extractors/statistics/transition_probability.rb +53 -0
- data/lib/treat/extractors/time/chronic.rb +12 -0
- data/lib/treat/extractors/time/native.rb +12 -0
- data/lib/treat/extractors/time/nickel.rb +45 -0
- data/lib/treat/extractors/topic_words/lda.rb +71 -0
- data/lib/treat/extractors/topic_words/lda/data.dat +46 -0
- data/lib/treat/extractors/topic_words/lda/wiki.yml +121 -0
- data/lib/treat/extractors/topics/reuters.rb +91 -0
- data/lib/treat/extractors/topics/reuters/industry.xml +2717 -0
- data/lib/treat/extractors/topics/reuters/region.xml +13585 -0
- data/lib/treat/extractors/topics/reuters/topics.xml +17977 -0
- data/lib/treat/feature.rb +53 -0
- data/lib/treat/formatters.rb +44 -0
- data/lib/treat/formatters/cleaners/html.rb +17 -0
- data/lib/treat/formatters/readers/autoselect.rb +35 -0
- data/lib/treat/formatters/readers/gocr.rb +24 -0
- data/lib/treat/formatters/readers/html.rb +13 -0
- data/lib/treat/formatters/readers/ocropus.rb +31 -0
- data/lib/treat/formatters/readers/pdf.rb +17 -0
- data/lib/treat/formatters/readers/txt.rb +15 -0
- data/lib/treat/formatters/serializers/xml.rb +48 -0
- data/lib/treat/formatters/serializers/yaml.rb +15 -0
- data/lib/treat/formatters/serializers/yaml/helper.rb +96 -0
- data/lib/treat/formatters/unserializers/autoselect.rb +19 -0
- data/lib/treat/formatters/unserializers/xml.rb +79 -0
- data/lib/treat/formatters/unserializers/yaml.rb +15 -0
- data/lib/treat/formatters/visualizers/dot.rb +73 -0
- data/lib/treat/formatters/visualizers/html.rb +12 -0
- data/lib/treat/formatters/visualizers/inspect.rb +16 -0
- data/lib/treat/formatters/visualizers/short_value.rb +14 -0
- data/lib/treat/formatters/visualizers/standoff.rb +41 -0
- data/lib/treat/formatters/visualizers/tree.rb +28 -0
- data/lib/treat/formatters/visualizers/txt.rb +31 -0
- data/lib/treat/group.rb +96 -0
- data/lib/treat/inflectors.rb +50 -0
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +45 -0
- data/lib/treat/inflectors/conjugators/linguistics.rb +30 -0
- data/lib/treat/inflectors/declensors/en.rb +18 -0
- data/lib/treat/inflectors/declensors/linguistics.rb +30 -0
- data/lib/treat/inflectors/lemmatizers/e_lemma.rb +12 -0
- data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +213 -0
- data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +68 -0
- data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +6 -0
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +21 -0
- data/lib/treat/inflectors/stemmers/porter.rb +158 -0
- data/lib/treat/inflectors/stemmers/porter_c.rb +23 -0
- data/lib/treat/inflectors/stemmers/uea.rb +30 -0
- data/lib/treat/lexicalizers.rb +49 -0
- data/lib/treat/lexicalizers/category/from_tag.rb +30 -0
- data/lib/treat/lexicalizers/linkages/naive.rb +63 -0
- data/lib/treat/lexicalizers/synsets/rita_wn.rb +23 -0
- data/lib/treat/lexicalizers/synsets/wordnet.rb +72 -0
- data/lib/treat/lexicalizers/tag/brill.rb +101 -0
- data/lib/treat/lexicalizers/tag/lingua.rb +114 -0
- data/lib/treat/lexicalizers/tag/stanford.rb +86 -0
- data/lib/treat/processors.rb +45 -0
- data/lib/treat/processors/chunkers/txt.rb +27 -0
- data/lib/treat/processors/parsers/enju.rb +214 -0
- data/lib/treat/processors/parsers/stanford.rb +60 -0
- data/lib/treat/processors/segmenters/punkt.rb +48 -0
- data/lib/treat/processors/segmenters/stanford.rb +45 -0
- data/lib/treat/processors/segmenters/tactful.rb +34 -0
- data/lib/treat/processors/tokenizers/macintyre.rb +76 -0
- data/lib/treat/processors/tokenizers/multilingual.rb +31 -0
- data/lib/treat/processors/tokenizers/perl.rb +96 -0
- data/lib/treat/processors/tokenizers/punkt.rb +42 -0
- data/lib/treat/processors/tokenizers/stanford.rb +33 -0
- data/lib/treat/processors/tokenizers/tactful.rb +59 -0
- data/lib/treat/proxies.rb +66 -0
- data/lib/treat/registrable.rb +26 -0
- data/lib/treat/resources.rb +10 -0
- data/lib/treat/resources/categories.rb +18 -0
- data/lib/treat/resources/delegates.rb +96 -0
- data/lib/treat/resources/dependencies.rb +0 -0
- data/lib/treat/resources/edges.rb +8 -0
- data/lib/treat/resources/formats.rb +23 -0
- data/lib/treat/resources/languages.rb +86 -0
- data/lib/treat/resources/languages.txt +504 -0
- data/lib/treat/resources/tags.rb +393 -0
- data/lib/treat/sugar.rb +43 -0
- data/lib/treat/tree.rb +174 -0
- data/lib/treat/utilities.rb +127 -0
- data/lib/treat/visitable.rb +27 -0
- data/test/profile.rb +2 -0
- data/test/tc_detectors.rb +27 -0
- data/test/tc_entity.rb +105 -0
- data/test/tc_extractors.rb +48 -0
- data/test/tc_formatters.rb +46 -0
- data/test/tc_inflectors.rb +39 -0
- data/test/tc_lexicalizers.rb +39 -0
- data/test/tc_processors.rb +36 -0
- data/test/tc_resources.rb +27 -0
- data/test/tc_treat.rb +64 -0
- data/test/tc_tree.rb +60 -0
- data/test/tests.rb +19 -0
- data/test/texts.rb +20 -0
- data/test/texts/english/long.html +24 -0
- data/test/texts/english/long.txt +22 -0
- data/test/texts/english/medium.txt +5 -0
- data/test/texts/english/short.txt +3 -0
- metadata +412 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
module Treat
|
2
|
+
module Formatters
|
3
|
+
module Unserializers
|
4
|
+
class YAML
|
5
|
+
# Require the Psych YAML parser.
|
6
|
+
require 'psych'
|
7
|
+
# Unserialize a YAML file representing an entity.
|
8
|
+
def self.unserialize(document, options = {})
|
9
|
+
document << ::Psych.load(File.read(document.file))
|
10
|
+
document
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Treat
|
2
|
+
module Formatters
|
3
|
+
module Visualizers
|
4
|
+
class Dot
|
5
|
+
# Border colors to use for different POS tags.
|
6
|
+
BorderColors = {
|
7
|
+
:verb => "#00AABB",
|
8
|
+
:noun => "#FAD4A7",
|
9
|
+
:adverb => '#103585',
|
10
|
+
:adjective => '#D21D54'
|
11
|
+
}
|
12
|
+
# Create the top-most graph structure
|
13
|
+
# and delegate the creation of the graph
|
14
|
+
# nodes to to_dot.
|
15
|
+
def self.visualize(entity, options = {})
|
16
|
+
string = "graph {"
|
17
|
+
string << self.to_dot(entity)
|
18
|
+
string << "\n}"
|
19
|
+
end
|
20
|
+
# dot -Tpdf test4.dot > test4.pdf
|
21
|
+
def self.to_dot(entity)
|
22
|
+
string = ''
|
23
|
+
if entity.is_leaf?
|
24
|
+
if entity.is_a?(Treat::Entities::Word)
|
25
|
+
label = "label=\"#{entity.value} (#{entity.tag})\","
|
26
|
+
label << "color=\"#{BorderColors[entity.cat]}\","
|
27
|
+
else
|
28
|
+
label = "label=\"#{entity.value.inspect[1..-2]}\","
|
29
|
+
end
|
30
|
+
else
|
31
|
+
if entity.class < Entities::Constituent
|
32
|
+
label = "label=\"#{entity.tag}\","
|
33
|
+
# label << "color=\"#{BorderColors[entity.tag]}\","
|
34
|
+
else
|
35
|
+
label = "label=\"#{cc(cl(entity.class))}\","
|
36
|
+
end
|
37
|
+
end
|
38
|
+
string << "\n#{entity.id} ["
|
39
|
+
if entity.has_features?
|
40
|
+
string << label
|
41
|
+
entity.features.each_pair do |feature, value|
|
42
|
+
if value.is_a?(Treat::Entities::Entity)
|
43
|
+
string << "#{feature}=\"#{value.id}\","
|
44
|
+
else
|
45
|
+
string << "#{feature}=\"#{value}\","
|
46
|
+
end
|
47
|
+
end
|
48
|
+
string = string[0..-2]
|
49
|
+
string << "]"
|
50
|
+
else
|
51
|
+
string << "#{label[0..-2]}]"
|
52
|
+
end
|
53
|
+
if entity.has_parent?
|
54
|
+
string << "\n#{entity.parent.id} -- #{entity.id};"
|
55
|
+
end
|
56
|
+
if entity.has_children?
|
57
|
+
entity.each do |child|
|
58
|
+
string << self.to_dot(child)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
if entity.has_edges?
|
62
|
+
entity.edges.each_pair do |target, type|
|
63
|
+
string << "\n#{entity.id} -- #{target}"
|
64
|
+
string << "[label=#{type},dir=forward,"
|
65
|
+
string << "arrowhead=\"odiamond\"]"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
string
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Treat
|
2
|
+
module Formatters
|
3
|
+
module Visualizers
|
4
|
+
class Inspect
|
5
|
+
def self.visualize(entity, options = {})
|
6
|
+
s = "#{entity.class.to_s.split('::')[-1]} (#{entity.id.to_s})"
|
7
|
+
unless caller_method == :inspect
|
8
|
+
s += " | #{entity.short_value.inspect} | #{entity.features.inspect}" +
|
9
|
+
" | #{entity.edges.inspect}"
|
10
|
+
end
|
11
|
+
s
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Treat
|
2
|
+
module Formatters
|
3
|
+
module Visualizers
|
4
|
+
class ShortValue
|
5
|
+
def self.visualize(entity, options = {})
|
6
|
+
options[:max_length] ||= 6
|
7
|
+
words = entity.to_s.split(' ')
|
8
|
+
return entity.to_s if words.size < options[:max_length]
|
9
|
+
words[0..2].join(' ') + ' [...] ' + words[-3..-1].join(' ')
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Treat
|
2
|
+
module Formatters
|
3
|
+
module Visualizers
|
4
|
+
# This class allows the visualization of
|
5
|
+
# an entity in standoff format; for example:
|
6
|
+
# (S (NP John) (VP has (VP come))).
|
7
|
+
class Standoff
|
8
|
+
Recurse = Proc.new do |entity, options|
|
9
|
+
v = ''
|
10
|
+
entity.each { |child| v += visualize(child, options) }
|
11
|
+
v
|
12
|
+
end
|
13
|
+
# Visualize the entity using standoff notation.
|
14
|
+
# This can only be called on sentences, as it
|
15
|
+
# is not a suitable format to represent larger
|
16
|
+
# entity.
|
17
|
+
def self.visualize(entity, options = {})
|
18
|
+
options = {:indent => 0} if options.empty?
|
19
|
+
value = ''; spaces = ''
|
20
|
+
options[:indent].times { spaces << ' '}
|
21
|
+
options[:indent] += 1
|
22
|
+
if entity.is_a?(Treat::Entities::Token)
|
23
|
+
value += "#{spaces}(#{entity.tag} #{entity.value})"
|
24
|
+
elsif entity.is_a?(Treat::Entities::Constituent)
|
25
|
+
value += ("#{spaces}(#{entity.tag}\n" +
|
26
|
+
"#{Recurse.call(entity, options)})\n")
|
27
|
+
elsif entity.is_a?(Treat::Entities::Sentence)
|
28
|
+
value += ("#{spaces}(S\n" +
|
29
|
+
"#{Recurse.call(entity, options)})\n")
|
30
|
+
else
|
31
|
+
raise 'Standoff format is unsuitable to represent' +
|
32
|
+
' entities larger than sentences.'
|
33
|
+
end
|
34
|
+
options[:indent] -= 1
|
35
|
+
value.gsub!(")\n)", "))")
|
36
|
+
value
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Treat
|
2
|
+
module Formatters
|
3
|
+
module Visualizers
|
4
|
+
class Tree
|
5
|
+
# Obtain a plain text tree representation
|
6
|
+
# of the entity.
|
7
|
+
def self.visualize(entity, options = {})
|
8
|
+
options = {:indent => 0} if options.empty?
|
9
|
+
string = ''
|
10
|
+
if entity.has_children?
|
11
|
+
spacer = '--'
|
12
|
+
spaces = ''
|
13
|
+
options[:indent].times { spaces << ' '}
|
14
|
+
string << "+ #{entity.inspect}\n#{spaces}|"
|
15
|
+
options[:indent] += 1
|
16
|
+
entity.children.each do |child|
|
17
|
+
string = string + "\n" + spaces + '+' +
|
18
|
+
spacer + self.visualize(child, options)
|
19
|
+
end
|
20
|
+
options[:indent] -= 1
|
21
|
+
return string
|
22
|
+
end
|
23
|
+
'> ' + entity.inspect
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Treat
|
2
|
+
module Formatters
|
3
|
+
module Visualizers
|
4
|
+
# Creates a plain text visualization of an entity.
|
5
|
+
class Txt
|
6
|
+
# Obtain a plain text visualization of the entity,
|
7
|
+
# with no additional information.
|
8
|
+
def self.visualize(entity, options = {})
|
9
|
+
options[:sep] = ' '
|
10
|
+
return entity.value if !entity.has_children?
|
11
|
+
value = ''
|
12
|
+
entity.each do |child|
|
13
|
+
if child.is_a?(Treat::Entities::Token) || child.value != ''
|
14
|
+
# Remove the trailing space for tokens that
|
15
|
+
# 'stick' to the previous one, such
|
16
|
+
# as punctuation symbols and clitics.
|
17
|
+
if child.is_a?(Treat::Entities::Punctuation) ||
|
18
|
+
child.is_a?(Treat::Entities::Clitic)
|
19
|
+
value.strip!
|
20
|
+
end
|
21
|
+
value += child.value + options[:sep]
|
22
|
+
else
|
23
|
+
value += visualize(child, options)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
value
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/treat/group.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
module Treat
|
2
|
+
module Group
|
3
|
+
def self.extended(group)
|
4
|
+
group.module_eval do
|
5
|
+
class << self
|
6
|
+
attr_accessor :type, :default, :targets
|
7
|
+
end
|
8
|
+
# Return the method corresponding to the group.
|
9
|
+
# This method resolves the name of the method
|
10
|
+
# that a group should provide based on the name
|
11
|
+
# of the group. Basically, if the group ends in
|
12
|
+
# -ers, the verb corresponding to the group is
|
13
|
+
# returned (tokenizers -> tokenize, inflectors ->
|
14
|
+
# inflect). Otherwise, the name of the method
|
15
|
+
# is the same as that of the group (encoding ->
|
16
|
+
# encoding, tag -> tag).
|
17
|
+
@method = nil
|
18
|
+
def self.method
|
19
|
+
return @method if @method
|
20
|
+
m = ucc(cl(self))
|
21
|
+
if m[-3..-1] == 'ers'
|
22
|
+
if ['k', 't', 'm', 'd', 'g', 'n'].include? m[-4]
|
23
|
+
n = m[0..-4]
|
24
|
+
n = n[0..-2] if n[-1] == n[-2]
|
25
|
+
else
|
26
|
+
n = m[0..-3]
|
27
|
+
end
|
28
|
+
elsif m[-3..-1] == 'ors'
|
29
|
+
n = m[0..-4] + 'e'
|
30
|
+
else
|
31
|
+
n = m
|
32
|
+
end
|
33
|
+
@method = :"#{n}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
# Create a new algorithm within the group. Once
|
38
|
+
# the algorithm is added, it will be automatically
|
39
|
+
# installed on all the targets of the group.
|
40
|
+
def add(class_name, &block)
|
41
|
+
class_name = :"#{cc(class_name)}"
|
42
|
+
klass = self.const_set(class_name, Class.new)
|
43
|
+
method = self.method
|
44
|
+
klass.class_eval do
|
45
|
+
@@block = block
|
46
|
+
eval "def #{method}(entity);" +
|
47
|
+
"@@block.call(entity); end"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
# Boolean - does the group have the supplied class
|
51
|
+
# included in its targets?
|
52
|
+
def has_target?(target, strict = false)
|
53
|
+
is_target = false
|
54
|
+
self.targets.each do |entity_type|
|
55
|
+
entity_type = Entities.const_get(cc(entity_type))
|
56
|
+
if target < entity_type || entity_type == target
|
57
|
+
|
58
|
+
is_target = true; break
|
59
|
+
end
|
60
|
+
end
|
61
|
+
is_target
|
62
|
+
end
|
63
|
+
# Populates once the list of the adaptors in the group
|
64
|
+
# by crawling the filesystem.
|
65
|
+
@@list = {}
|
66
|
+
def list
|
67
|
+
mod = ucc(cl(self))
|
68
|
+
if @@list[mod].nil?
|
69
|
+
@@list[mod] = []
|
70
|
+
dirs = Dir["#{File.dirname(__FILE__)}/*/#{mod}/*.rb"] # Fix
|
71
|
+
dirs.each do |file|
|
72
|
+
@@list[mod] <<
|
73
|
+
:"#{file.split('/')[-1][0..-4]}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
@@list[mod]
|
77
|
+
end
|
78
|
+
# Set inherit to false by default.
|
79
|
+
def const_get(const)
|
80
|
+
super(const, false)
|
81
|
+
end
|
82
|
+
# Autoload the algorithms.
|
83
|
+
def const_missing(const)
|
84
|
+
bits = self.ancestors[0].to_s.split('::')
|
85
|
+
bits.collect! { |bit| ucc(bit) }
|
86
|
+
file = bits.join('/') + "/#{ucc(const)}" # Fix
|
87
|
+
#if not File.readable?(file + '.rb')
|
88
|
+
# raise Treat::Exception,
|
89
|
+
# "File '#{file}.rb' corresponding to requested delegate "+
|
90
|
+
# "#{self}::#{const} does not exist."
|
91
|
+
require file
|
92
|
+
const_get(const)
|
93
|
+
#end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Treat
|
2
|
+
# Algorithms to retrieve the inflections of a word.
|
3
|
+
# Stemmers return the stem (not root form) of a word.
|
4
|
+
# Taggers return the part of speech tag of a word.
|
5
|
+
# Inflectors allow to retrieve the different inflections of a
|
6
|
+
# noun (declensions), a verb (conjugations). Lexicons return,
|
7
|
+
# among other things, the gloss or synset of a word.
|
8
|
+
module Inflectors
|
9
|
+
# Lemmatizers return the root form of a word.
|
10
|
+
module Lemmatizers
|
11
|
+
extend Group
|
12
|
+
self.type = :annotator
|
13
|
+
self.targets = [:word]
|
14
|
+
end
|
15
|
+
# Stemmers return the stem (*not root form*) of a word.
|
16
|
+
module Stemmers
|
17
|
+
extend Group
|
18
|
+
self.type = :annotator
|
19
|
+
self.targets = [:word]
|
20
|
+
end
|
21
|
+
# Declensors allow to retrieve the different declensions of a
|
22
|
+
# noun (singular, plural).
|
23
|
+
module Declensors
|
24
|
+
extend Group
|
25
|
+
self.type = :annotator
|
26
|
+
self.targets = [:word]
|
27
|
+
end
|
28
|
+
# Conjugators allow to retrieve the different conjugations of
|
29
|
+
# a word.
|
30
|
+
module Conjugators
|
31
|
+
extend Group
|
32
|
+
self.type = :annotator
|
33
|
+
self.targets = [:word]
|
34
|
+
end
|
35
|
+
# Cardinal retrieve the full text description of a number.
|
36
|
+
module CardinalWords
|
37
|
+
extend Group
|
38
|
+
self.type = :annotator
|
39
|
+
self.targets = [:number]
|
40
|
+
end
|
41
|
+
# Ordinal retrieve the ordinal form of numbers.
|
42
|
+
module OrdinalWords
|
43
|
+
extend Group
|
44
|
+
self.type = :annotator
|
45
|
+
self.targets = [:number]
|
46
|
+
end
|
47
|
+
extend Treat::Category
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Treat
|
2
|
+
module Inflectors
|
3
|
+
module CardinalWords
|
4
|
+
class Linguistics
|
5
|
+
silently { require 'linguistics' }
|
6
|
+
#
|
7
|
+
# Options:
|
8
|
+
#
|
9
|
+
# :group => Controls how many numbers at a time are
|
10
|
+
# grouped together. Valid values are 0 (normal grouping),
|
11
|
+
# 1 (single-digit grouping, e.g., “one, two, three, four”),
|
12
|
+
# 2 (double-digit grouping, e.g., “twelve, thirty-four”, or
|
13
|
+
# 3 (triple-digit grouping, e.g., “one twenty-three, four”).
|
14
|
+
# :comma => Set the character/s used to separate word groups.
|
15
|
+
# Defaults to ", ".
|
16
|
+
# :and => Set the word and/or characters used where ' and '
|
17
|
+
# (the default) is normally used. Setting :and to ' ', for
|
18
|
+
# example, will cause 2556 to be returned as “two-thousand,
|
19
|
+
# five hundred fifty-six” instead of “two-thousand, five
|
20
|
+
# hundred and fifty-six”.
|
21
|
+
# :zero => Set the word used to represent the numeral 0 in
|
22
|
+
# the result. 'zero' is the default.
|
23
|
+
# :decimal => Set the translation of any decimal points in
|
24
|
+
# the number; the default is 'point'.
|
25
|
+
# :asArray If set to a true value, the number will be returned
|
26
|
+
# as an array of word groups instead of a String.
|
27
|
+
#
|
28
|
+
# More specific options when using :type => :ordinal:
|
29
|
+
#
|
30
|
+
#
|
31
|
+
def self.cardinal_words(entity, options = {})
|
32
|
+
begin
|
33
|
+
l = entity.language.to_s.upcase
|
34
|
+
delegate = nil
|
35
|
+
silently { delegate = ::Linguistics.const_get(l) }
|
36
|
+
rescue RuntimeError
|
37
|
+
raise "Ruby Linguistics does not have a module " +
|
38
|
+
" installed for the #{entity.language} language."
|
39
|
+
end
|
40
|
+
silently { delegate.numwords(entity.to_s, options) }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Treat
|
2
|
+
module Inflectors
|
3
|
+
module Conjugators
|
4
|
+
class Linguistics
|
5
|
+
silently { require 'linguistics' }
|
6
|
+
def self.conjugate(entity, parameters)
|
7
|
+
begin
|
8
|
+
l = entity.language.to_s.upcase
|
9
|
+
delegate = nil
|
10
|
+
silently { delegate = ::Linguistics.const_get(l) }
|
11
|
+
rescue RuntimeError
|
12
|
+
raise "Ruby Linguistics does not have a module " +
|
13
|
+
" installed for the #{entity.language} language."
|
14
|
+
end
|
15
|
+
if parameters[:mode] == :infinitive
|
16
|
+
silently { delegate.infinitive(entity.to_s) }
|
17
|
+
elsif parameters[:mode] == :participle && parameters[:tense] == :present
|
18
|
+
silently { delegate.present_participle(entity.to_s) }
|
19
|
+
elsif parameters[:count] == :plural && parameters.size == 1
|
20
|
+
silently { delegate.plural_verb(entity.to_s) }
|
21
|
+
else
|
22
|
+
raise Treat::Exception,
|
23
|
+
'This combination of modes, tenses, persons ' +
|
24
|
+
'and/or counts is not presently supported.'
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|