treat 0.2.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +3 -3
- data/README.md +33 -0
- data/files/INFO +1 -0
- data/lib/treat.rb +40 -105
- data/lib/treat/ai.rb +12 -0
- data/lib/treat/ai/classifiers/id3.rb +27 -0
- data/lib/treat/categories.rb +82 -35
- data/lib/treat/categorizable.rb +44 -0
- data/lib/treat/classification.rb +61 -0
- data/lib/treat/configurable.rb +115 -0
- data/lib/treat/data_set.rb +42 -0
- data/lib/treat/dependencies.rb +24 -0
- data/lib/treat/downloader.rb +87 -0
- data/lib/treat/entities.rb +68 -66
- data/lib/treat/entities/abilities.rb +10 -0
- data/lib/treat/entities/abilities/buildable.rb +327 -0
- data/lib/treat/entities/abilities/checkable.rb +31 -0
- data/lib/treat/entities/abilities/copyable.rb +45 -0
- data/lib/treat/entities/abilities/countable.rb +51 -0
- data/lib/treat/entities/abilities/debuggable.rb +83 -0
- data/lib/treat/entities/abilities/delegatable.rb +123 -0
- data/lib/treat/entities/abilities/doable.rb +62 -0
- data/lib/treat/entities/abilities/exportable.rb +11 -0
- data/lib/treat/entities/abilities/iterable.rb +115 -0
- data/lib/treat/entities/abilities/magical.rb +83 -0
- data/lib/treat/entities/abilities/registrable.rb +74 -0
- data/lib/treat/entities/abilities/stringable.rb +91 -0
- data/lib/treat/entities/entities.rb +104 -0
- data/lib/treat/entities/entity.rb +122 -245
- data/lib/treat/exception.rb +4 -4
- data/lib/treat/extractors.rb +77 -80
- data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
- data/lib/treat/extractors/language/what_language.rb +50 -45
- data/lib/treat/extractors/name_tag/stanford.rb +55 -0
- data/lib/treat/extractors/tf_idf/native.rb +87 -0
- data/lib/treat/extractors/time/chronic.rb +55 -0
- data/lib/treat/extractors/time/nickel.rb +86 -62
- data/lib/treat/extractors/time/ruby.rb +53 -0
- data/lib/treat/extractors/topic_words/lda.rb +67 -58
- data/lib/treat/extractors/topics/reuters.rb +100 -87
- data/lib/treat/formatters.rb +39 -35
- data/lib/treat/formatters/readers/abw.rb +49 -29
- data/lib/treat/formatters/readers/autoselect.rb +37 -33
- data/lib/treat/formatters/readers/doc.rb +19 -13
- data/lib/treat/formatters/readers/html.rb +52 -30
- data/lib/treat/formatters/readers/image.rb +41 -40
- data/lib/treat/formatters/readers/odt.rb +59 -45
- data/lib/treat/formatters/readers/pdf.rb +28 -25
- data/lib/treat/formatters/readers/txt.rb +12 -15
- data/lib/treat/formatters/readers/xml.rb +73 -36
- data/lib/treat/formatters/serializers/xml.rb +80 -79
- data/lib/treat/formatters/serializers/yaml.rb +19 -18
- data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
- data/lib/treat/formatters/unserializers/xml.rb +94 -99
- data/lib/treat/formatters/unserializers/yaml.rb +20 -19
- data/lib/treat/formatters/visualizers/dot.rb +132 -132
- data/lib/treat/formatters/visualizers/standoff.rb +52 -44
- data/lib/treat/formatters/visualizers/tree.rb +26 -29
- data/lib/treat/groupable.rb +153 -0
- data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
- data/lib/treat/inflectors.rb +50 -45
- data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
- data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
- data/lib/treat/inflectors/declensors/active_support.rb +31 -0
- data/lib/treat/inflectors/declensors/english.rb +38 -0
- data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
- data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
- data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
- data/lib/treat/inflectors/stemmers/porter.rb +160 -0
- data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
- data/lib/treat/inflectors/stemmers/uea.rb +28 -0
- data/lib/treat/installer.rb +308 -0
- data/lib/treat/kernel.rb +105 -27
- data/lib/treat/languages.rb +122 -88
- data/lib/treat/languages/arabic.rb +15 -15
- data/lib/treat/languages/chinese.rb +15 -15
- data/lib/treat/languages/dutch.rb +15 -15
- data/lib/treat/languages/english.rb +61 -62
- data/lib/treat/languages/french.rb +19 -19
- data/lib/treat/languages/german.rb +20 -20
- data/lib/treat/languages/greek.rb +15 -15
- data/lib/treat/languages/italian.rb +16 -16
- data/lib/treat/languages/polish.rb +15 -15
- data/lib/treat/languages/portuguese.rb +15 -15
- data/lib/treat/languages/russian.rb +15 -15
- data/lib/treat/languages/spanish.rb +16 -16
- data/lib/treat/languages/swedish.rb +16 -16
- data/lib/treat/lexicalizers.rb +34 -55
- data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
- data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
- data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
- data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
- data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
- data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
- data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
- data/lib/treat/linguistics.rb +9 -0
- data/lib/treat/linguistics/categories.rb +11 -0
- data/lib/treat/linguistics/tags.rb +422 -0
- data/lib/treat/loaders/linguistics.rb +30 -0
- data/lib/treat/loaders/stanford.rb +27 -0
- data/lib/treat/object.rb +1 -0
- data/lib/treat/processors.rb +37 -44
- data/lib/treat/processors/chunkers/autoselect.rb +16 -0
- data/lib/treat/processors/chunkers/html.rb +71 -0
- data/lib/treat/processors/chunkers/txt.rb +18 -24
- data/lib/treat/processors/parsers/enju.rb +253 -208
- data/lib/treat/processors/parsers/stanford.rb +130 -131
- data/lib/treat/processors/segmenters/punkt.rb +79 -45
- data/lib/treat/processors/segmenters/stanford.rb +46 -48
- data/lib/treat/processors/segmenters/tactful.rb +43 -36
- data/lib/treat/processors/tokenizers/perl.rb +124 -92
- data/lib/treat/processors/tokenizers/ptb.rb +81 -0
- data/lib/treat/processors/tokenizers/punkt.rb +48 -42
- data/lib/treat/processors/tokenizers/stanford.rb +39 -38
- data/lib/treat/processors/tokenizers/tactful.rb +64 -55
- data/lib/treat/proxies.rb +52 -35
- data/lib/treat/retrievers.rb +26 -16
- data/lib/treat/retrievers/indexers/ferret.rb +47 -26
- data/lib/treat/retrievers/searchers/ferret.rb +69 -50
- data/lib/treat/tree.rb +241 -183
- data/spec/collection.rb +123 -0
- data/spec/document.rb +93 -0
- data/spec/entity.rb +408 -0
- data/spec/languages.rb +25 -0
- data/spec/phrase.rb +146 -0
- data/spec/samples/mathematicians/archimedes.abw +34 -0
- data/spec/samples/mathematicians/euler.html +21 -0
- data/spec/samples/mathematicians/gauss.pdf +0 -0
- data/spec/samples/mathematicians/leibniz.txt +13 -0
- data/spec/samples/mathematicians/newton.doc +0 -0
- data/spec/sandbox.rb +5 -0
- data/spec/token.rb +109 -0
- data/spec/treat.rb +52 -0
- data/spec/tree.rb +117 -0
- data/spec/word.rb +110 -0
- data/spec/zone.rb +66 -0
- data/tmp/INFO +1 -1
- metadata +100 -201
- data/INSTALL +0 -1
- data/README +0 -3
- data/TODO +0 -28
- data/lib/economist/half_cocked_basel.txt +0 -16
- data/lib/economist/hungarys_troubles.txt +0 -46
- data/lib/economist/indias_slowdown.txt +0 -15
- data/lib/economist/merkozy_rides_again.txt +0 -24
- data/lib/economist/prada_is_not_walmart.txt +0 -9
- data/lib/economist/to_infinity_and_beyond.txt +0 -15
- data/lib/ferret/_11.cfs +0 -0
- data/lib/ferret/_14.cfs +0 -0
- data/lib/ferret/_p.cfs +0 -0
- data/lib/ferret/_s.cfs +0 -0
- data/lib/ferret/_v.cfs +0 -0
- data/lib/ferret/_y.cfs +0 -0
- data/lib/ferret/segments +0 -0
- data/lib/ferret/segments_15 +0 -0
- data/lib/treat/buildable.rb +0 -157
- data/lib/treat/category.rb +0 -33
- data/lib/treat/delegatable.rb +0 -116
- data/lib/treat/doable.rb +0 -45
- data/lib/treat/entities/collection.rb +0 -14
- data/lib/treat/entities/document.rb +0 -12
- data/lib/treat/entities/phrases.rb +0 -17
- data/lib/treat/entities/tokens.rb +0 -61
- data/lib/treat/entities/zones.rb +0 -41
- data/lib/treat/extractors/coreferences/stanford.rb +0 -69
- data/lib/treat/extractors/date/chronic.rb +0 -32
- data/lib/treat/extractors/date/ruby.rb +0 -25
- data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
- data/lib/treat/extractors/language/language_extractor.rb +0 -27
- data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
- data/lib/treat/extractors/roles/naive.rb +0 -73
- data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
- data/lib/treat/extractors/statistics/position_in.rb +0 -14
- data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
- data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
- data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
- data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
- data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
- data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
- data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
- data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
- data/lib/treat/feature.rb +0 -58
- data/lib/treat/features.rb +0 -7
- data/lib/treat/formatters/visualizers/short_value.rb +0 -29
- data/lib/treat/formatters/visualizers/txt.rb +0 -45
- data/lib/treat/group.rb +0 -106
- data/lib/treat/helpers/linguistics_loader.rb +0 -18
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
- data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
- data/lib/treat/inflectors/declensions/english.rb +0 -319
- data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
- data/lib/treat/inflectors/stem/porter.rb +0 -162
- data/lib/treat/inflectors/stem/porter_c.rb +0 -26
- data/lib/treat/inflectors/stem/uea.rb +0 -30
- data/lib/treat/install.rb +0 -59
- data/lib/treat/languages/tags.rb +0 -377
- data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
- data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
- data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
- data/lib/treat/lexicalizers/tag/brill.rb +0 -91
- data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
- data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
- data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
- data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
- data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
- data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
- data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
- data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
- data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
- data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
- data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
- data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
- data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
- data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
- data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
- data/lib/treat/registrable.rb +0 -28
- data/lib/treat/sugar.rb +0 -50
- data/lib/treat/viewable.rb +0 -29
- data/lib/treat/visitable.rb +0 -28
- data/test/profile.rb +0 -2
- data/test/tc_entity.rb +0 -117
- data/test/tc_extractors.rb +0 -73
- data/test/tc_formatters.rb +0 -41
- data/test/tc_inflectors.rb +0 -34
- data/test/tc_lexicalizers.rb +0 -32
- data/test/tc_processors.rb +0 -50
- data/test/tc_resources.rb +0 -22
- data/test/tc_treat.rb +0 -60
- data/test/tc_tree.rb +0 -60
- data/test/tests.rb +0 -20
- data/test/texts.rb +0 -19
- data/test/texts/english/half_cocked_basel.txt +0 -16
- data/test/texts/english/hose_and_dry.doc +0 -0
- data/test/texts/english/hungarys_troubles.abw +0 -70
- data/test/texts/english/long.html +0 -24
- data/test/texts/english/long.txt +0 -22
- data/test/texts/english/medium.txt +0 -5
- data/test/texts/english/republican_nomination.pdf +0 -0
- data/test/texts/english/saving_the_euro.odt +0 -0
- data/test/texts/english/short.txt +0 -3
- data/test/texts/english/zero_sum.html +0 -111
data/lib/treat/feature.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
# This class represents a probabilistic feature;
|
3
|
-
# it is currently not used, because its
|
4
|
-
# behaviour is non-deterministic. Perhaps at
|
5
|
-
# some point this will be of value for specific
|
6
|
-
# algorithms and so I'm keeping it here.
|
7
|
-
class Feature
|
8
|
-
# Undefine all methods, except those that
|
9
|
-
# create any problems (e.g. with serializing).
|
10
|
-
instance_methods.each do |meth|
|
11
|
-
undef_method(meth) if meth !~
|
12
|
-
/^(__|object_id|class|instance_variables|instance_variable_get)/
|
13
|
-
end
|
14
|
-
# Allows to read the probability hash,
|
15
|
-
# the possible values of the feature,
|
16
|
-
# and the best value (with highest P).
|
17
|
-
attr_reader :p_hash, :values, :best
|
18
|
-
# Initialize the feature with a hash
|
19
|
-
# of features => probabilities.
|
20
|
-
def initialize(p_hash)
|
21
|
-
@p_hash = p_hash
|
22
|
-
normalize
|
23
|
-
max = @p_hash.values.max
|
24
|
-
@best = @p_hash.select { |i,j| j == max }.keys.sample
|
25
|
-
@values = @p_hash.keys
|
26
|
-
type = @values[0].class
|
27
|
-
if type == ::Symbol || type == ::NilClass
|
28
|
-
@object = @best
|
29
|
-
else
|
30
|
-
@object = type.new(@best)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
# Normalize the probabilities, so that
|
34
|
-
# the sum of all probabilities is 1,
|
35
|
-
# except if the sum of all probabilities
|
36
|
-
# is already below one (in which case we
|
37
|
-
# assume that the feature is intentionally
|
38
|
-
# incomplete).
|
39
|
-
def normalize
|
40
|
-
sum = @p_hash.inject(0.0) { |r, e| r + e[1] }
|
41
|
-
return if sum <= 1.0
|
42
|
-
p = {}
|
43
|
-
@p_hash.each { |k,v| p[k] = v.to_f/sum.to_f }
|
44
|
-
@p_hash = p
|
45
|
-
end
|
46
|
-
# Find the probability of value x.
|
47
|
-
def probability(x)
|
48
|
-
@p_hash[x] ? @p_hash[x] : 0
|
49
|
-
end
|
50
|
-
# Alias for probability: p(x).
|
51
|
-
alias :p :probability
|
52
|
-
# Catch all other methods than the ones
|
53
|
-
# explicitly defined.
|
54
|
-
def method_missing(sym, *args, &block)
|
55
|
-
@object.send(sym, *args, &block)
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
data/lib/treat/features.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Formatters
|
3
|
-
module Visualizers
|
4
|
-
class ShortValue
|
5
|
-
# Default options for the visualizer.
|
6
|
-
DefaultOptions = { :max_words => 6, :max_length => 30 }
|
7
|
-
# Returns the text value of an entity, shortend
|
8
|
-
# with [..] if the value is longer than :max_words
|
9
|
-
# or longer than :max_length.
|
10
|
-
#
|
11
|
-
# Options:
|
12
|
-
# - (Integer) :max_words => the maximum number
|
13
|
-
# of words in an entity before it is shortened.
|
14
|
-
# - (Integer) :max_length => the maximum number
|
15
|
-
# of characters in an entity before it is shortened.s
|
16
|
-
def self.visualize(entity, options = {})
|
17
|
-
options = DefaultOptions.merge(options)
|
18
|
-
words = entity.to_s.split(' ')
|
19
|
-
if words.size < options[:max_words] ||
|
20
|
-
entity.to_s.length < options[:max_length]
|
21
|
-
entity.to_s
|
22
|
-
else
|
23
|
-
words[0..2].join(' ') + ' [...] ' + words[-3..-1].join(' ')
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Formatters
|
3
|
-
module Visualizers
|
4
|
-
# Creates a plain text visualization of an entity.
|
5
|
-
class Txt
|
6
|
-
# The default options for the visualizer.
|
7
|
-
DefaultOptions = { :sep => ' ' }
|
8
|
-
# Obtain a plain text visualization of the entity,
|
9
|
-
# with no additional information.
|
10
|
-
#
|
11
|
-
# Options:
|
12
|
-
# (String) :sep => the separator to use between words.
|
13
|
-
def self.visualize(entity, options = {})
|
14
|
-
options[:first] = true unless options[:first] == false
|
15
|
-
first = options[:first]
|
16
|
-
options = DefaultOptions.merge(options)
|
17
|
-
return entity.value.dup if !entity.has_children?
|
18
|
-
value = ''
|
19
|
-
options[:first] = false
|
20
|
-
entity.each do |child|
|
21
|
-
value += "\n\n" if child.is_a?(Treat::Entities::Section)
|
22
|
-
if child.is_a?(Treat::Entities::Token) || child.value != ''
|
23
|
-
# Remove the trailing space for tokens that
|
24
|
-
# 'stick' to the previous one, such
|
25
|
-
# as punctuation symbols and clitics.
|
26
|
-
if child.is_a?(Treat::Entities::Punctuation) ||
|
27
|
-
child.is_a?(Treat::Entities::Clitic)
|
28
|
-
value.strip!
|
29
|
-
end
|
30
|
-
value += child.value + options[:sep]
|
31
|
-
else
|
32
|
-
value += visualize(child, options)
|
33
|
-
end
|
34
|
-
if child.is_a?(Treat::Entities::Title) ||
|
35
|
-
child.is_a?(Treat::Entities::Paragraph)
|
36
|
-
value += "\n\n"
|
37
|
-
end
|
38
|
-
end
|
39
|
-
value = value.strip if first
|
40
|
-
value
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
data/lib/treat/group.rb
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Group
|
3
|
-
# Modify the extended class.
|
4
|
-
def self.extended(group)
|
5
|
-
group.module_eval do
|
6
|
-
class << self
|
7
|
-
attr_accessor :type, :default, :targets
|
8
|
-
attr_accessor :presets, :preprocessors, :postprocessors
|
9
|
-
end
|
10
|
-
self.presets = {}
|
11
|
-
self.preprocessors = {}
|
12
|
-
self.postprocessors = {}
|
13
|
-
# Return the method corresponding to the group.
|
14
|
-
# This method resolves the name of the method
|
15
|
-
# that a group should provide based on the name
|
16
|
-
# of the group. Basically, if the group ends in
|
17
|
-
# -ers, the verb corresponding to the group is
|
18
|
-
# returned (tokenizers -> tokenize, inflectors ->
|
19
|
-
# inflect). Otherwise, the name of the method
|
20
|
-
# is the same as that of the group (encoding ->
|
21
|
-
# encoding, tag -> tag).
|
22
|
-
@method = nil
|
23
|
-
def self.method
|
24
|
-
return @method if @method
|
25
|
-
m = ucc(cl(self))
|
26
|
-
if m[-3..-1] == 'ers'
|
27
|
-
if ['k', 't', 'm', 'd', 'g', 'n', 'x', 'h'].include? m[-4]
|
28
|
-
n = m[0..-4]
|
29
|
-
n = n[0..-2] if n[-1] == n[-2]
|
30
|
-
else
|
31
|
-
n = m[0..-3]
|
32
|
-
end
|
33
|
-
elsif m[-3..-1] == 'ors'
|
34
|
-
n = m[0..-4] + 'e'
|
35
|
-
else
|
36
|
-
n = m
|
37
|
-
end
|
38
|
-
@method = n.intern
|
39
|
-
end
|
40
|
-
end
|
41
|
-
group.list
|
42
|
-
end
|
43
|
-
# Create a new algorithm within the group. Once
|
44
|
-
# the algorithm is added, it will be automatically
|
45
|
-
# installed on all the targets of the group.
|
46
|
-
def add(class_name, &block)
|
47
|
-
klass = self.const_set(cc(class_name).intern, Class.new)
|
48
|
-
method = self.method
|
49
|
-
@@list[ucc(cl(self))] << class_name
|
50
|
-
klass.send(:define_singleton_method, method) do |entity, options={}|
|
51
|
-
block.call(entity, options)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
# Boolean - does the group have the supplied class
|
55
|
-
# included in its targets?
|
56
|
-
def has_target?(target, strict = false)
|
57
|
-
is_target = false
|
58
|
-
self.targets.each do |entity_type|
|
59
|
-
entity_type = Entities.const_get(cc(entity_type))
|
60
|
-
if target < entity_type || entity_type == target
|
61
|
-
is_target = true; break
|
62
|
-
end
|
63
|
-
end
|
64
|
-
is_target
|
65
|
-
end
|
66
|
-
# Cache the list of adaptors to improve performance.
|
67
|
-
@@list = {}
|
68
|
-
# Populates once the list of the adaptors in the group
|
69
|
-
# by crawling the filesystem.
|
70
|
-
def list
|
71
|
-
mod = ucc(cl(self))
|
72
|
-
if @@list[mod].nil?
|
73
|
-
@@list[mod] = []
|
74
|
-
dirs = Dir.glob("#{Treat.lib}/treat/*/#{mod}/*.rb")
|
75
|
-
dirs.each do |file|
|
76
|
-
@@list[mod] <<
|
77
|
-
file.split('/')[-1][0..-4].intern
|
78
|
-
end
|
79
|
-
end
|
80
|
-
@@list[mod]
|
81
|
-
end
|
82
|
-
# Get constants in this module, excluding those
|
83
|
-
# defined by parent modules.
|
84
|
-
def const_get(const)
|
85
|
-
super(const, false)
|
86
|
-
end
|
87
|
-
# Lazy load the classes in the group.
|
88
|
-
def const_missing(const)
|
89
|
-
bits = self.ancestors[0].to_s.split('::')
|
90
|
-
bits.collect! { |bit| ucc(bit) }
|
91
|
-
file = bits.join('/') + "/#{ucc(const)}"
|
92
|
-
if not File.readable?("#{Treat.lib}/#{file}.rb")
|
93
|
-
raise Treat::Exception,
|
94
|
-
"File '#{file}.rb' corresponding to requested worker "+
|
95
|
-
"#{self}::#{const} does not exist."
|
96
|
-
else
|
97
|
-
require file
|
98
|
-
if not const_defined?(const)
|
99
|
-
raise Treat::Exception,
|
100
|
-
"File #{file} does not define #{self}::#{const}."
|
101
|
-
end
|
102
|
-
const_get(const)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Helpers
|
3
|
-
class LinguisticsLoader
|
4
|
-
silence_warnings { require 'linguistics' }
|
5
|
-
def self.load(language)
|
6
|
-
begin
|
7
|
-
l = language.to_s.upcase
|
8
|
-
klass = nil
|
9
|
-
silence_warnings { klass = ::Linguistics.const_get(l) }
|
10
|
-
klass
|
11
|
-
rescue RuntimeError
|
12
|
-
raise "Ruby Linguistics does not have a module " +
|
13
|
-
" installed for the #{language} language."
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,42 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Inflectors
|
3
|
-
module CardinalWords
|
4
|
-
# This class is a wrapper for the functions included
|
5
|
-
# in the 'linguistics' gem that allow to describe a
|
6
|
-
# number in words in cardinal form.
|
7
|
-
#
|
8
|
-
# Project website: http://deveiate.org/projects/Linguistics/
|
9
|
-
class Linguistics
|
10
|
-
require 'treat/helpers/linguistics_loader'
|
11
|
-
# Return the description of a cardinal number in words.
|
12
|
-
#
|
13
|
-
# Options:
|
14
|
-
#
|
15
|
-
# - :group => Controls how many numbers at a time are
|
16
|
-
# grouped together. Valid values are 0 (normal grouping),
|
17
|
-
# 1 (single-digit grouping, e.g., “one, two, three, four”),
|
18
|
-
# 2 (double-digit grouping, e.g., “twelve, thirty-four”, or
|
19
|
-
# 3 (triple-digit grouping, e.g., “one twenty-three, four”).
|
20
|
-
# - :comma => Set the character/s used to separate word groups.
|
21
|
-
# Defaults to ", ".
|
22
|
-
# - :and => Set the word and/or characters used where ' and '
|
23
|
-
# (the default) is normally used. Setting :and to ' ', for
|
24
|
-
# example, will cause 2556 to be returned as “two-thousand,
|
25
|
-
# five hundred fifty-six” instead of “two-thousand, five
|
26
|
-
# hundred and fifty-six”.
|
27
|
-
# - :zero => Set the word used to represent the numeral 0 in
|
28
|
-
# the result. 'zero' is the default.
|
29
|
-
# - :decimal => Set the translation of any decimal points in
|
30
|
-
# the number; the default is 'point'.
|
31
|
-
# - :asArray If set to a true value, the number will be returned
|
32
|
-
# as an array of word groups instead of a String.
|
33
|
-
#
|
34
|
-
# More specific options when using :type => :ordinal:
|
35
|
-
def self.cardinal_words(entity, options = {})
|
36
|
-
klass = Treat::Helpers::LinguisticsLoader.load(entity.language)
|
37
|
-
klass.numwords(entity.to_s, options)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,36 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Inflectors
|
3
|
-
module Conjugations
|
4
|
-
# This class is a wrapper for the functions included
|
5
|
-
# in the 'linguistics' gem that allow to conjugate verbs.
|
6
|
-
#
|
7
|
-
# Project website: http://deveiate.org/projects/Linguistics/
|
8
|
-
class Linguistics
|
9
|
-
require 'treat/helpers/linguistics_loader'
|
10
|
-
# Conjugate a verb using ruby linguistics with the specified
|
11
|
-
# mode, tense, count and person.
|
12
|
-
#
|
13
|
-
# Options:
|
14
|
-
#
|
15
|
-
# - (Symbol) :mode => :infinitive, :indicative, :subjunctive, :participle
|
16
|
-
# - (Symbol) :tense => :past, :present, :future
|
17
|
-
# - (Symbol) :count => :singular, :plural
|
18
|
-
# - (Symbol) :person => :first, :second, :third
|
19
|
-
def self.conjugations(entity, parameters)
|
20
|
-
klass = Treat::Helpers::LinguisticsLoader.load(entity.language)
|
21
|
-
if parameters[:mode] == :infinitive
|
22
|
-
silence_warnings { klass.infinitive(entity.to_s) }
|
23
|
-
elsif parameters[:mode] == :participle && parameters[:tense] == :present
|
24
|
-
silence_warnings { klass.present_participle(entity.to_s) }
|
25
|
-
elsif parameters[:count] == :plural && parameters.size == 1
|
26
|
-
silence_warnings { klass.plural_verb(entity.to_s) }
|
27
|
-
else
|
28
|
-
raise Treat::Exception,
|
29
|
-
'This combination of modes, tenses, persons ' +
|
30
|
-
'and/or counts is not presently supported.'
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
@@ -1,319 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Inflectors
|
3
|
-
module Declensions
|
4
|
-
# This class is a wrapper for the Inflect module,
|
5
|
-
# copied from the unmaintained 'english' ruby gem,
|
6
|
-
# created by Thomas Sawyer.
|
7
|
-
#
|
8
|
-
# Released under the MIT License.
|
9
|
-
#
|
10
|
-
# http://english.rubyforge.org
|
11
|
-
class English
|
12
|
-
# Retrieve the declensions (singular, plural)
|
13
|
-
# of an english word using a class lifted from
|
14
|
-
# the 'english' ruby gem.
|
15
|
-
def self.declensions(entity, options)
|
16
|
-
unless options[:count]
|
17
|
-
raise Treat::Exception,
|
18
|
-
"Must supply option count (:singular or :plural)."
|
19
|
-
end
|
20
|
-
string = entity.to_s
|
21
|
-
if entity.category == :verb
|
22
|
-
raise Treat::Exception,
|
23
|
-
"Cannot retrieve the declensions of a verb. " +
|
24
|
-
"Use #singular_verb and #plural_verb instead."
|
25
|
-
elsif options[:count] == :plural
|
26
|
-
Inflect.plural(string)
|
27
|
-
elsif options[:count] == :singular
|
28
|
-
Inflect.singular(string)
|
29
|
-
else
|
30
|
-
{:singular => Inflect.singular(string),
|
31
|
-
:plural => Inflect.plural(string)}
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
module Inflect
|
36
|
-
|
37
|
-
@singular_of = {}
|
38
|
-
@plural_of = {}
|
39
|
-
|
40
|
-
@singular_rules = []
|
41
|
-
@plural_rules = []
|
42
|
-
|
43
|
-
# This class provides the DSL for creating inflections, you can add additional rules.
|
44
|
-
# Examples:
|
45
|
-
#
|
46
|
-
# word "ox", "oxen"
|
47
|
-
# word "octopus", "octopi"
|
48
|
-
# word "man", "men"
|
49
|
-
#
|
50
|
-
# rule "lf", "lves"
|
51
|
-
#
|
52
|
-
# word "equipment"
|
53
|
-
#
|
54
|
-
# Rules are evaluated by size, so rules you add to override specific cases should be longer than the rule
|
55
|
-
# it overrides. For instance, if you want "pta" to pluralize to "ptas", even though a general purpose rule
|
56
|
-
# for "ta" => "tum" already exists, simply add a new rule for "pta" => "ptas", and it will automatically win
|
57
|
-
# since it is longer than the old rule.
|
58
|
-
#
|
59
|
-
# Also, single-word exceptions win over general words ("ox" pluralizes to "oxen", because it's a single word
|
60
|
-
# exception, even though "fox" pluralizes to "foxes")
|
61
|
-
class << self
|
62
|
-
# Define a general two-way exception.
|
63
|
-
#
|
64
|
-
# This also defines a general rule, so foo_child will correctly become
|
65
|
-
# foo_children.
|
66
|
-
#
|
67
|
-
# Whole words also work if they are capitalized (Goose => Geese).
|
68
|
-
def word(singular, plural=nil)
|
69
|
-
plural = singular unless plural
|
70
|
-
singular_word(singular, plural)
|
71
|
-
plural_word(singular, plural)
|
72
|
-
rule(singular, plural)
|
73
|
-
end
|
74
|
-
|
75
|
-
# Define a singularization exception.
|
76
|
-
def singular_word(singular, plural)
|
77
|
-
@singular_of[plural] = singular
|
78
|
-
@singular_of[plural.capitalize] = singular.capitalize
|
79
|
-
end
|
80
|
-
|
81
|
-
# Define a pluralization exception.
|
82
|
-
def plural_word(singular, plural)
|
83
|
-
@plural_of[singular] = plural
|
84
|
-
@plural_of[singular.capitalize] = plural.capitalize
|
85
|
-
end
|
86
|
-
|
87
|
-
# Define a general rule.
|
88
|
-
def rule(singular, plural)
|
89
|
-
singular_rule(singular, plural)
|
90
|
-
plural_rule(singular, plural)
|
91
|
-
end
|
92
|
-
|
93
|
-
# Define a singularization rule.
|
94
|
-
def singular_rule(singular, plural)
|
95
|
-
@singular_rules << [singular, plural]
|
96
|
-
end
|
97
|
-
|
98
|
-
# Define a plurualization rule.
|
99
|
-
def plural_rule(singular, plural)
|
100
|
-
@plural_rules << [singular, plural]
|
101
|
-
end
|
102
|
-
|
103
|
-
# Read prepared singularization rules.
|
104
|
-
def singularization_rules
|
105
|
-
if defined?(@singularization_regex) && @singularization_regex
|
106
|
-
return [@singularization_regex, @singularization_hash]
|
107
|
-
end
|
108
|
-
# No sorting needed: Regexen match on longest string
|
109
|
-
@singularization_regex = Regexp.new("(" + @singular_rules.map {|s,p| p}.join("|") + ")$", "i")
|
110
|
-
@singularization_hash = Hash[*@singular_rules.flatten].invert
|
111
|
-
[@singularization_regex, @singularization_hash]
|
112
|
-
end
|
113
|
-
|
114
|
-
# Read prepared singularization rules.
|
115
|
-
#def singularization_rules
|
116
|
-
# return @singularization_rules if @singularization_rules
|
117
|
-
# sorted = @singular_rules.sort_by{ |s, p| "#{p}".size }.reverse
|
118
|
-
# @singularization_rules = sorted.collect do |s, p|
|
119
|
-
# [ /#{p}$/, "#{s}" ]
|
120
|
-
# end
|
121
|
-
#end
|
122
|
-
|
123
|
-
# Read prepared pluralization rules.
|
124
|
-
def pluralization_rules
|
125
|
-
if defined?(@pluralization_regex) && @pluralization_regex
|
126
|
-
return [@pluralization_regex, @pluralization_hash]
|
127
|
-
end
|
128
|
-
@pluralization_regex = Regexp.new("(" + @plural_rules.map {|s,p| s}.join("|") + ")$", "i")
|
129
|
-
@pluralization_hash = Hash[*@plural_rules.flatten]
|
130
|
-
[@pluralization_regex, @pluralization_hash]
|
131
|
-
end
|
132
|
-
|
133
|
-
# Read prepared pluralization rules.
|
134
|
-
#def pluralization_rules
|
135
|
-
# return @pluralization_rules if @pluralization_rules
|
136
|
-
# sorted = @plural_rules.sort_by{ |s, p| "#{s}".size }.reverse
|
137
|
-
# @pluralization_rules = sorted.collect do |s, p|
|
138
|
-
# [ /#{s}$/, "#{p}" ]
|
139
|
-
# end
|
140
|
-
#end
|
141
|
-
|
142
|
-
#
|
143
|
-
def singular_of ; @singular_of ; end
|
144
|
-
|
145
|
-
#
|
146
|
-
def plural_of ; @plural_of ; end
|
147
|
-
|
148
|
-
# Convert an English word from plurel to singular.
|
149
|
-
#
|
150
|
-
# "boys".singular #=> boy
|
151
|
-
# "tomatoes".singular #=> tomato
|
152
|
-
#
|
153
|
-
def singular(word)
|
154
|
-
return "" if word == ""
|
155
|
-
if result = singular_of[word]
|
156
|
-
return result.dup
|
157
|
-
end
|
158
|
-
result = word.dup
|
159
|
-
|
160
|
-
regex, hash = singularization_rules
|
161
|
-
result.sub!(regex) {|m| hash[m]}
|
162
|
-
singular_of[word] = result
|
163
|
-
return result
|
164
|
-
#singularization_rules.each do |(match, replacement)|
|
165
|
-
# break if result.gsub!(match, replacement)
|
166
|
-
#end
|
167
|
-
#return result
|
168
|
-
end
|
169
|
-
|
170
|
-
# Alias for #singular (a Railism).
|
171
|
-
#
|
172
|
-
alias_method(:singularize, :singular)
|
173
|
-
|
174
|
-
# Convert an English word from singular to plurel.
|
175
|
-
#
|
176
|
-
# "boy".plural #=> boys
|
177
|
-
# "tomato".plural #=> tomatoes
|
178
|
-
#
|
179
|
-
def plural(word)
|
180
|
-
return "" if word == ""
|
181
|
-
if result = plural_of[word]
|
182
|
-
return result.dup
|
183
|
-
end
|
184
|
-
#return self.dup if /s$/ =~ self # ???
|
185
|
-
result = word.dup
|
186
|
-
|
187
|
-
regex, hash = pluralization_rules
|
188
|
-
result.sub!(regex) {|m| hash[m]}
|
189
|
-
plural_of[word] = result
|
190
|
-
return result
|
191
|
-
#pluralization_rules.each do |(match, replacement)|
|
192
|
-
# break if result.gsub!(match, replacement)
|
193
|
-
#end
|
194
|
-
#return result
|
195
|
-
end
|
196
|
-
|
197
|
-
# Alias for #plural (a Railism).
|
198
|
-
alias_method(:pluralize, :plural)
|
199
|
-
|
200
|
-
# Clear all rules.
|
201
|
-
def clear(type = :all)
|
202
|
-
if type == :singular || type == :all
|
203
|
-
@singular_of = {}
|
204
|
-
@singular_rules = []
|
205
|
-
@singularization_rules, @singularization_regex = nil, nil
|
206
|
-
end
|
207
|
-
if type == :plural || type == :all
|
208
|
-
@singular_of = {}
|
209
|
-
@singular_rules = []
|
210
|
-
@singularization_rules, @singularization_regex = nil, nil
|
211
|
-
end
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
# One argument means singular and plural are the same.
|
216
|
-
|
217
|
-
word 'equipment'
|
218
|
-
word 'information'
|
219
|
-
word 'money'
|
220
|
-
word 'species'
|
221
|
-
word 'series'
|
222
|
-
word 'fish'
|
223
|
-
word 'sheep'
|
224
|
-
word 'moose'
|
225
|
-
word 'hovercraft'
|
226
|
-
word 'news'
|
227
|
-
word 'rice'
|
228
|
-
word 'plurals'
|
229
|
-
|
230
|
-
# Two arguments defines a singular and plural exception.
|
231
|
-
|
232
|
-
word 'Swiss' , 'Swiss'
|
233
|
-
word 'alias' , 'aliases'
|
234
|
-
word 'analysis' , 'analyses'
|
235
|
-
#word 'axis' , 'axes'
|
236
|
-
word 'basis' , 'bases'
|
237
|
-
word 'buffalo' , 'buffaloes'
|
238
|
-
word 'child' , 'children'
|
239
|
-
#word 'cow' , 'kine'
|
240
|
-
word 'crisis' , 'crises'
|
241
|
-
word 'criterion' , 'criteria'
|
242
|
-
word 'datum' , 'data'
|
243
|
-
word 'goose' , 'geese'
|
244
|
-
word 'hive' , 'hives'
|
245
|
-
word 'index' , 'indices'
|
246
|
-
word 'life' , 'lives'
|
247
|
-
word 'louse' , 'lice'
|
248
|
-
word 'man' , 'men'
|
249
|
-
word 'matrix' , 'matrices'
|
250
|
-
word 'medium' , 'media'
|
251
|
-
word 'mouse' , 'mice'
|
252
|
-
word 'movie' , 'movies'
|
253
|
-
word 'octopus' , 'octopi'
|
254
|
-
word 'ox' , 'oxen'
|
255
|
-
word 'person' , 'people'
|
256
|
-
word 'potato' , 'potatoes'
|
257
|
-
word 'quiz' , 'quizzes'
|
258
|
-
word 'shoe' , 'shoes'
|
259
|
-
word 'status' , 'statuses'
|
260
|
-
word 'testis' , 'testes'
|
261
|
-
word 'thesis' , 'theses'
|
262
|
-
word 'thief' , 'thieves'
|
263
|
-
word 'tomato' , 'tomatoes'
|
264
|
-
word 'torpedo' , 'torpedoes'
|
265
|
-
word 'vertex' , 'vertices'
|
266
|
-
word 'virus' , 'viri'
|
267
|
-
word 'wife' , 'wives'
|
268
|
-
|
269
|
-
# One-way singularization exception (convert plural to singular).
|
270
|
-
|
271
|
-
singular_word 'cactus', 'cacti'
|
272
|
-
|
273
|
-
# One-way pluralizaton exception (convert singular to plural).
|
274
|
-
|
275
|
-
plural_word 'axis', 'axes'
|
276
|
-
|
277
|
-
# General rules.
|
278
|
-
|
279
|
-
rule 'rf' , 'rves'
|
280
|
-
rule 'ero' , 'eroes'
|
281
|
-
rule 'ch' , 'ches'
|
282
|
-
rule 'sh' , 'shes'
|
283
|
-
rule 'ss' , 'sses'
|
284
|
-
#rule 'ess' , 'esses'
|
285
|
-
rule 'ta' , 'tum'
|
286
|
-
rule 'ia' , 'ium'
|
287
|
-
rule 'ra' , 'rum'
|
288
|
-
rule 'ay' , 'ays'
|
289
|
-
rule 'ey' , 'eys'
|
290
|
-
rule 'oy' , 'oys'
|
291
|
-
rule 'uy' , 'uys'
|
292
|
-
rule 'y' , 'ies'
|
293
|
-
rule 'x' , 'xes'
|
294
|
-
rule 'lf' , 'lves'
|
295
|
-
rule 'ffe' , 'ffes'
|
296
|
-
rule 'af' , 'aves'
|
297
|
-
rule 'us' , 'uses'
|
298
|
-
rule 'ouse' , 'ouses'
|
299
|
-
rule 'osis' , 'oses'
|
300
|
-
rule 'ox' , 'oxes'
|
301
|
-
rule '' , 's'
|
302
|
-
|
303
|
-
# One-way singular rules.
|
304
|
-
|
305
|
-
singular_rule 'of' , 'ofs' # proof
|
306
|
-
singular_rule 'o' , 'oes' # hero, heroes
|
307
|
-
#singular_rule 'f' , 'ves'
|
308
|
-
|
309
|
-
# One-way plural rules.
|
310
|
-
|
311
|
-
plural_rule 's' , 'ses'
|
312
|
-
plural_rule 'ive' , 'ives' # don't want to snag wife
|
313
|
-
plural_rule 'fe' , 'ves' # don't want to snag perspectives
|
314
|
-
|
315
|
-
end
|
316
|
-
end
|
317
|
-
end
|
318
|
-
end
|
319
|
-
end
|