treat 0.2.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +3 -3
- data/README.md +33 -0
- data/files/INFO +1 -0
- data/lib/treat.rb +40 -105
- data/lib/treat/ai.rb +12 -0
- data/lib/treat/ai/classifiers/id3.rb +27 -0
- data/lib/treat/categories.rb +82 -35
- data/lib/treat/categorizable.rb +44 -0
- data/lib/treat/classification.rb +61 -0
- data/lib/treat/configurable.rb +115 -0
- data/lib/treat/data_set.rb +42 -0
- data/lib/treat/dependencies.rb +24 -0
- data/lib/treat/downloader.rb +87 -0
- data/lib/treat/entities.rb +68 -66
- data/lib/treat/entities/abilities.rb +10 -0
- data/lib/treat/entities/abilities/buildable.rb +327 -0
- data/lib/treat/entities/abilities/checkable.rb +31 -0
- data/lib/treat/entities/abilities/copyable.rb +45 -0
- data/lib/treat/entities/abilities/countable.rb +51 -0
- data/lib/treat/entities/abilities/debuggable.rb +83 -0
- data/lib/treat/entities/abilities/delegatable.rb +123 -0
- data/lib/treat/entities/abilities/doable.rb +62 -0
- data/lib/treat/entities/abilities/exportable.rb +11 -0
- data/lib/treat/entities/abilities/iterable.rb +115 -0
- data/lib/treat/entities/abilities/magical.rb +83 -0
- data/lib/treat/entities/abilities/registrable.rb +74 -0
- data/lib/treat/entities/abilities/stringable.rb +91 -0
- data/lib/treat/entities/entities.rb +104 -0
- data/lib/treat/entities/entity.rb +122 -245
- data/lib/treat/exception.rb +4 -4
- data/lib/treat/extractors.rb +77 -80
- data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
- data/lib/treat/extractors/language/what_language.rb +50 -45
- data/lib/treat/extractors/name_tag/stanford.rb +55 -0
- data/lib/treat/extractors/tf_idf/native.rb +87 -0
- data/lib/treat/extractors/time/chronic.rb +55 -0
- data/lib/treat/extractors/time/nickel.rb +86 -62
- data/lib/treat/extractors/time/ruby.rb +53 -0
- data/lib/treat/extractors/topic_words/lda.rb +67 -58
- data/lib/treat/extractors/topics/reuters.rb +100 -87
- data/lib/treat/formatters.rb +39 -35
- data/lib/treat/formatters/readers/abw.rb +49 -29
- data/lib/treat/formatters/readers/autoselect.rb +37 -33
- data/lib/treat/formatters/readers/doc.rb +19 -13
- data/lib/treat/formatters/readers/html.rb +52 -30
- data/lib/treat/formatters/readers/image.rb +41 -40
- data/lib/treat/formatters/readers/odt.rb +59 -45
- data/lib/treat/formatters/readers/pdf.rb +28 -25
- data/lib/treat/formatters/readers/txt.rb +12 -15
- data/lib/treat/formatters/readers/xml.rb +73 -36
- data/lib/treat/formatters/serializers/xml.rb +80 -79
- data/lib/treat/formatters/serializers/yaml.rb +19 -18
- data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
- data/lib/treat/formatters/unserializers/xml.rb +94 -99
- data/lib/treat/formatters/unserializers/yaml.rb +20 -19
- data/lib/treat/formatters/visualizers/dot.rb +132 -132
- data/lib/treat/formatters/visualizers/standoff.rb +52 -44
- data/lib/treat/formatters/visualizers/tree.rb +26 -29
- data/lib/treat/groupable.rb +153 -0
- data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
- data/lib/treat/inflectors.rb +50 -45
- data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
- data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
- data/lib/treat/inflectors/declensors/active_support.rb +31 -0
- data/lib/treat/inflectors/declensors/english.rb +38 -0
- data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
- data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
- data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
- data/lib/treat/inflectors/stemmers/porter.rb +160 -0
- data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
- data/lib/treat/inflectors/stemmers/uea.rb +28 -0
- data/lib/treat/installer.rb +308 -0
- data/lib/treat/kernel.rb +105 -27
- data/lib/treat/languages.rb +122 -88
- data/lib/treat/languages/arabic.rb +15 -15
- data/lib/treat/languages/chinese.rb +15 -15
- data/lib/treat/languages/dutch.rb +15 -15
- data/lib/treat/languages/english.rb +61 -62
- data/lib/treat/languages/french.rb +19 -19
- data/lib/treat/languages/german.rb +20 -20
- data/lib/treat/languages/greek.rb +15 -15
- data/lib/treat/languages/italian.rb +16 -16
- data/lib/treat/languages/polish.rb +15 -15
- data/lib/treat/languages/portuguese.rb +15 -15
- data/lib/treat/languages/russian.rb +15 -15
- data/lib/treat/languages/spanish.rb +16 -16
- data/lib/treat/languages/swedish.rb +16 -16
- data/lib/treat/lexicalizers.rb +34 -55
- data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
- data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
- data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
- data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
- data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
- data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
- data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
- data/lib/treat/linguistics.rb +9 -0
- data/lib/treat/linguistics/categories.rb +11 -0
- data/lib/treat/linguistics/tags.rb +422 -0
- data/lib/treat/loaders/linguistics.rb +30 -0
- data/lib/treat/loaders/stanford.rb +27 -0
- data/lib/treat/object.rb +1 -0
- data/lib/treat/processors.rb +37 -44
- data/lib/treat/processors/chunkers/autoselect.rb +16 -0
- data/lib/treat/processors/chunkers/html.rb +71 -0
- data/lib/treat/processors/chunkers/txt.rb +18 -24
- data/lib/treat/processors/parsers/enju.rb +253 -208
- data/lib/treat/processors/parsers/stanford.rb +130 -131
- data/lib/treat/processors/segmenters/punkt.rb +79 -45
- data/lib/treat/processors/segmenters/stanford.rb +46 -48
- data/lib/treat/processors/segmenters/tactful.rb +43 -36
- data/lib/treat/processors/tokenizers/perl.rb +124 -92
- data/lib/treat/processors/tokenizers/ptb.rb +81 -0
- data/lib/treat/processors/tokenizers/punkt.rb +48 -42
- data/lib/treat/processors/tokenizers/stanford.rb +39 -38
- data/lib/treat/processors/tokenizers/tactful.rb +64 -55
- data/lib/treat/proxies.rb +52 -35
- data/lib/treat/retrievers.rb +26 -16
- data/lib/treat/retrievers/indexers/ferret.rb +47 -26
- data/lib/treat/retrievers/searchers/ferret.rb +69 -50
- data/lib/treat/tree.rb +241 -183
- data/spec/collection.rb +123 -0
- data/spec/document.rb +93 -0
- data/spec/entity.rb +408 -0
- data/spec/languages.rb +25 -0
- data/spec/phrase.rb +146 -0
- data/spec/samples/mathematicians/archimedes.abw +34 -0
- data/spec/samples/mathematicians/euler.html +21 -0
- data/spec/samples/mathematicians/gauss.pdf +0 -0
- data/spec/samples/mathematicians/leibniz.txt +13 -0
- data/spec/samples/mathematicians/newton.doc +0 -0
- data/spec/sandbox.rb +5 -0
- data/spec/token.rb +109 -0
- data/spec/treat.rb +52 -0
- data/spec/tree.rb +117 -0
- data/spec/word.rb +110 -0
- data/spec/zone.rb +66 -0
- data/tmp/INFO +1 -1
- metadata +100 -201
- data/INSTALL +0 -1
- data/README +0 -3
- data/TODO +0 -28
- data/lib/economist/half_cocked_basel.txt +0 -16
- data/lib/economist/hungarys_troubles.txt +0 -46
- data/lib/economist/indias_slowdown.txt +0 -15
- data/lib/economist/merkozy_rides_again.txt +0 -24
- data/lib/economist/prada_is_not_walmart.txt +0 -9
- data/lib/economist/to_infinity_and_beyond.txt +0 -15
- data/lib/ferret/_11.cfs +0 -0
- data/lib/ferret/_14.cfs +0 -0
- data/lib/ferret/_p.cfs +0 -0
- data/lib/ferret/_s.cfs +0 -0
- data/lib/ferret/_v.cfs +0 -0
- data/lib/ferret/_y.cfs +0 -0
- data/lib/ferret/segments +0 -0
- data/lib/ferret/segments_15 +0 -0
- data/lib/treat/buildable.rb +0 -157
- data/lib/treat/category.rb +0 -33
- data/lib/treat/delegatable.rb +0 -116
- data/lib/treat/doable.rb +0 -45
- data/lib/treat/entities/collection.rb +0 -14
- data/lib/treat/entities/document.rb +0 -12
- data/lib/treat/entities/phrases.rb +0 -17
- data/lib/treat/entities/tokens.rb +0 -61
- data/lib/treat/entities/zones.rb +0 -41
- data/lib/treat/extractors/coreferences/stanford.rb +0 -69
- data/lib/treat/extractors/date/chronic.rb +0 -32
- data/lib/treat/extractors/date/ruby.rb +0 -25
- data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
- data/lib/treat/extractors/language/language_extractor.rb +0 -27
- data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
- data/lib/treat/extractors/roles/naive.rb +0 -73
- data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
- data/lib/treat/extractors/statistics/position_in.rb +0 -14
- data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
- data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
- data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
- data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
- data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
- data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
- data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
- data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
- data/lib/treat/feature.rb +0 -58
- data/lib/treat/features.rb +0 -7
- data/lib/treat/formatters/visualizers/short_value.rb +0 -29
- data/lib/treat/formatters/visualizers/txt.rb +0 -45
- data/lib/treat/group.rb +0 -106
- data/lib/treat/helpers/linguistics_loader.rb +0 -18
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
- data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
- data/lib/treat/inflectors/declensions/english.rb +0 -319
- data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
- data/lib/treat/inflectors/stem/porter.rb +0 -162
- data/lib/treat/inflectors/stem/porter_c.rb +0 -26
- data/lib/treat/inflectors/stem/uea.rb +0 -30
- data/lib/treat/install.rb +0 -59
- data/lib/treat/languages/tags.rb +0 -377
- data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
- data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
- data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
- data/lib/treat/lexicalizers/tag/brill.rb +0 -91
- data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
- data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
- data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
- data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
- data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
- data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
- data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
- data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
- data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
- data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
- data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
- data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
- data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
- data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
- data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
- data/lib/treat/registrable.rb +0 -28
- data/lib/treat/sugar.rb +0 -50
- data/lib/treat/viewable.rb +0 -29
- data/lib/treat/visitable.rb +0 -28
- data/test/profile.rb +0 -2
- data/test/tc_entity.rb +0 -117
- data/test/tc_extractors.rb +0 -73
- data/test/tc_formatters.rb +0 -41
- data/test/tc_inflectors.rb +0 -34
- data/test/tc_lexicalizers.rb +0 -32
- data/test/tc_processors.rb +0 -50
- data/test/tc_resources.rb +0 -22
- data/test/tc_treat.rb +0 -60
- data/test/tc_tree.rb +0 -60
- data/test/tests.rb +0 -20
- data/test/texts.rb +0 -19
- data/test/texts/english/half_cocked_basel.txt +0 -16
- data/test/texts/english/hose_and_dry.doc +0 -0
- data/test/texts/english/hungarys_troubles.abw +0 -70
- data/test/texts/english/long.html +0 -24
- data/test/texts/english/long.txt +0 -22
- data/test/texts/english/medium.txt +0 -5
- data/test/texts/english/republican_nomination.pdf +0 -0
- data/test/texts/english/saving_the_euro.odt +0 -0
- data/test/texts/english/short.txt +0 -3
- data/test/texts/english/zero_sum.html +0 -111
@@ -0,0 +1,55 @@
|
|
1
|
+
# This class is a wrapper for the functions included
|
2
|
+
# in the 'linguistics' gem that allow to conjugate verbs.
|
3
|
+
#
|
4
|
+
# Project website: http://deveiate.org/projects/Linguistics/
|
5
|
+
module Treat::Inflectors::Conjugators::Linguistics
|
6
|
+
|
7
|
+
require 'treat/loaders/linguistics'
|
8
|
+
|
9
|
+
DefaultOptions = {
|
10
|
+
:strict => false
|
11
|
+
}
|
12
|
+
|
13
|
+
Forms = {
|
14
|
+
:present_participle =>
|
15
|
+
{:mode => :participle, :tense => :present},
|
16
|
+
:infinitive => {:mode => :infinitive},
|
17
|
+
:plural_verb => {:count => :plural},
|
18
|
+
:singular_verb => {:count => :singular}
|
19
|
+
}
|
20
|
+
|
21
|
+
# Conjugate a verb using ruby linguistics with the specified
|
22
|
+
# mode, tense, count and person.
|
23
|
+
#
|
24
|
+
# Options:
|
25
|
+
#
|
26
|
+
# - (Boolean) :strict => whether to tag all words or only verbs.
|
27
|
+
# - (Symbol) :mode => :infinitive, :indicative, :subjunctive, :participle
|
28
|
+
# - (Symbol) :tense => :past, :present, :future
|
29
|
+
# - (Symbol) :count => :singular, :plural
|
30
|
+
# - (Symbol) :person => :first, :second, :third
|
31
|
+
#
|
32
|
+
def self.conjugate(entity, options = {})
|
33
|
+
|
34
|
+
options = DefaultOptions.merge(options)
|
35
|
+
cat = entity.check_has(:category)
|
36
|
+
return if cat != :verb && options[:strict]
|
37
|
+
|
38
|
+
options = Forms[options[:form]] if options[:form]
|
39
|
+
|
40
|
+
klass = Treat::Loaders::Linguistics.load(entity.language)
|
41
|
+
if options[:mode] == :infinitive
|
42
|
+
silence_warnings { klass.infinitive(entity.to_s) }
|
43
|
+
elsif options[:mode] == :participle && options[:tense] == :present
|
44
|
+
silence_warnings { klass.present_participle(entity.to_s) }
|
45
|
+
elsif options[:count] == :plural && options.size == 1
|
46
|
+
silence_warnings { klass.plural_verb(entity.to_s) }
|
47
|
+
else
|
48
|
+
raise Treat::Exception,
|
49
|
+
'This combination of modes, tenses, persons ' +
|
50
|
+
'and/or counts is not presently supported.'
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# This class is a wrapper for the ActiveSupport
|
2
|
+
# declension tools.
|
3
|
+
class Treat::Inflectors::Declensors::English
|
4
|
+
|
5
|
+
require 'active_support/inflector/inflections'
|
6
|
+
|
7
|
+
# Declense a word using ActiveSupport::Inflector::Inflections
|
8
|
+
def self.declense(entity, options)
|
9
|
+
|
10
|
+
cat = entity.check_has(:category)
|
11
|
+
unless [:noun, :adjective, :determiner].
|
12
|
+
include?(cat)
|
13
|
+
return
|
14
|
+
end
|
15
|
+
|
16
|
+
unless options[:count]
|
17
|
+
raise Treat::Exception,
|
18
|
+
"Must supply option count (:singular or :plural)."
|
19
|
+
end
|
20
|
+
|
21
|
+
string = entity.to_s
|
22
|
+
|
23
|
+
if options[:count] == :plural
|
24
|
+
ActiveSupport::Inflector::Inflections.pluralize(string)
|
25
|
+
elsif options[:count] == :singular
|
26
|
+
ActiveSupport::Inflector::Inflections.singularize(string)
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# This class is a wrapper for the Inflect module,
|
2
|
+
# copied from the unmaintained 'english' ruby gem,
|
3
|
+
# created by Thomas Sawyer.
|
4
|
+
#
|
5
|
+
# Released under the MIT License.
|
6
|
+
#
|
7
|
+
# http://english.rubyforge.org
|
8
|
+
class Treat::Inflectors::Declensors::English
|
9
|
+
|
10
|
+
require 'treat/inflectors/declensors/english/inflect'
|
11
|
+
|
12
|
+
# Retrieve the declensions (singular, plural)
|
13
|
+
# of an english word using a class lifted from
|
14
|
+
# the 'english' ruby gem.
|
15
|
+
def self.declense(entity, options)
|
16
|
+
|
17
|
+
cat = entity.check_has(:category)
|
18
|
+
unless [:noun, :adjective, :determiner].
|
19
|
+
include?(cat)
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
unless options[:count]
|
24
|
+
raise Treat::Exception,
|
25
|
+
"Must supply option count (:singular or :plural)."
|
26
|
+
end
|
27
|
+
|
28
|
+
string = entity.to_s
|
29
|
+
|
30
|
+
if options[:count] == :plural
|
31
|
+
Inflect.plural(string)
|
32
|
+
elsif options[:count] == :singular
|
33
|
+
Inflect.singular(string)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,288 @@
|
|
1
|
+
# This class comes from the Inflect module; it has been
|
2
|
+
# copied from the unmaintained 'english' ruby gem,
|
3
|
+
# created by Thomas Sawyer.
|
4
|
+
#
|
5
|
+
# Released under the MIT License.
|
6
|
+
#
|
7
|
+
# http://english.rubyforge.org
|
8
|
+
module Treat::Inflectors::Declensors::English::Inflect
|
9
|
+
|
10
|
+
@singular_of = {}
|
11
|
+
@plural_of = {}
|
12
|
+
|
13
|
+
@singular_rules = []
|
14
|
+
@plural_rules = []
|
15
|
+
|
16
|
+
# This class provides the DSL for creating inflections, you can add additional rules.
|
17
|
+
# Examples:
|
18
|
+
#
|
19
|
+
# word "ox", "oxen"
|
20
|
+
# word "octopus", "octopi"
|
21
|
+
# word "man", "men"
|
22
|
+
#
|
23
|
+
# rule "lf", "lves"
|
24
|
+
#
|
25
|
+
# word "equipment"
|
26
|
+
#
|
27
|
+
# Rules are evaluated by size, so rules you add to override specific cases should be longer than the rule
|
28
|
+
# it overrides. For instance, if you want "pta" to pluralize to "ptas", even though a general purpose rule
|
29
|
+
# for "ta" => "tum" already exists, simply add a new rule for "pta" => "ptas", and it will automatically win
|
30
|
+
# since it is longer than the old rule.
|
31
|
+
#
|
32
|
+
# Also, single-word exceptions win over general words ("ox" pluralizes to "oxen", because it's a single word
|
33
|
+
# exception, even though "fox" pluralizes to "foxes")
|
34
|
+
class << self
|
35
|
+
# Define a general two-way exception.
|
36
|
+
#
|
37
|
+
# This also defines a general rule, so foo_child will correctly become
|
38
|
+
# foo_children.
|
39
|
+
#
|
40
|
+
# Whole words also work if they are capitalized (Goose => Geese).
|
41
|
+
def word(singular, plural=nil)
|
42
|
+
plural = singular unless plural
|
43
|
+
singular_word(singular, plural)
|
44
|
+
plural_word(singular, plural)
|
45
|
+
rule(singular, plural)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Define a singularization exception.
|
49
|
+
def singular_word(singular, plural)
|
50
|
+
@singular_of[plural] = singular
|
51
|
+
@singular_of[plural.capitalize] = singular.capitalize
|
52
|
+
end
|
53
|
+
|
54
|
+
# Define a pluralization exception.
|
55
|
+
def plural_word(singular, plural)
|
56
|
+
@plural_of[singular] = plural
|
57
|
+
@plural_of[singular.capitalize] = plural.capitalize
|
58
|
+
end
|
59
|
+
|
60
|
+
# Define a general rule.
|
61
|
+
def rule(singular, plural)
|
62
|
+
singular_rule(singular, plural)
|
63
|
+
plural_rule(singular, plural)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Define a singularization rule.
|
67
|
+
def singular_rule(singular, plural)
|
68
|
+
@singular_rules << [singular, plural]
|
69
|
+
end
|
70
|
+
|
71
|
+
# Define a plurualization rule.
|
72
|
+
def plural_rule(singular, plural)
|
73
|
+
@plural_rules << [singular, plural]
|
74
|
+
end
|
75
|
+
|
76
|
+
# Read prepared singularization rules.
|
77
|
+
def singularization_rules
|
78
|
+
if defined?(@singularization_regex) && @singularization_regex
|
79
|
+
return [@singularization_regex, @singularization_hash]
|
80
|
+
end
|
81
|
+
# No sorting needed: Regexen match on longest string
|
82
|
+
@singularization_regex = Regexp.new("(" + @singular_rules.map {|s,p| p}.join("|") + ")$", "i")
|
83
|
+
@singularization_hash = Hash[*@singular_rules.flatten].invert
|
84
|
+
[@singularization_regex, @singularization_hash]
|
85
|
+
end
|
86
|
+
|
87
|
+
# Read prepared singularization rules.
|
88
|
+
#def singularization_rules
|
89
|
+
# return @singularization_rules if @singularization_rules
|
90
|
+
# sorted = @singular_rules.sort_by{ |s, p| "#{p}".size }.reverse
|
91
|
+
# @singularization_rules = sorted.collect do |s, p|
|
92
|
+
# [ /#{p}$/, "#{s}" ]
|
93
|
+
# end
|
94
|
+
#end
|
95
|
+
|
96
|
+
# Read prepared pluralization rules.
|
97
|
+
def pluralization_rules
|
98
|
+
if defined?(@pluralization_regex) && @pluralization_regex
|
99
|
+
return [@pluralization_regex, @pluralization_hash]
|
100
|
+
end
|
101
|
+
@pluralization_regex = Regexp.new("(" + @plural_rules.map {|s,p| s}.join("|") + ")$", "i")
|
102
|
+
@pluralization_hash = Hash[*@plural_rules.flatten]
|
103
|
+
[@pluralization_regex, @pluralization_hash]
|
104
|
+
end
|
105
|
+
|
106
|
+
# Read prepared pluralization rules.
|
107
|
+
#def pluralization_rules
|
108
|
+
# return @pluralization_rules if @pluralization_rules
|
109
|
+
# sorted = @plural_rules.sort_by{ |s, p| "#{s}".size }.reverse
|
110
|
+
# @pluralization_rules = sorted.collect do |s, p|
|
111
|
+
# [ /#{s}$/, "#{p}" ]
|
112
|
+
# end
|
113
|
+
#end
|
114
|
+
|
115
|
+
#
|
116
|
+
def singular_of ; @singular_of ; end
|
117
|
+
|
118
|
+
#
|
119
|
+
def plural_of ; @plural_of ; end
|
120
|
+
|
121
|
+
# Convert an English word from plurel to singular.
|
122
|
+
#
|
123
|
+
# "boys".singular #=> boy
|
124
|
+
# "tomatoes".singular #=> tomato
|
125
|
+
#
|
126
|
+
def singular(word)
|
127
|
+
return "" if word == ""
|
128
|
+
if result = singular_of[word]
|
129
|
+
return result.dup
|
130
|
+
end
|
131
|
+
result = word.dup
|
132
|
+
|
133
|
+
regex, hash = singularization_rules
|
134
|
+
result.sub!(regex) {|m| hash[m]}
|
135
|
+
singular_of[word] = result
|
136
|
+
return result
|
137
|
+
#singularization_rules.each do |(match, replacement)|
|
138
|
+
# break if result.gsub!(match, replacement)
|
139
|
+
#end
|
140
|
+
#return result
|
141
|
+
end
|
142
|
+
|
143
|
+
# Alias for #singular (a Railism).
|
144
|
+
#
|
145
|
+
alias_method(:singularize, :singular)
|
146
|
+
|
147
|
+
# Convert an English word from singular to plurel.
|
148
|
+
#
|
149
|
+
# "boy".plural #=> boys
|
150
|
+
# "tomato".plural #=> tomatoes
|
151
|
+
#
|
152
|
+
def plural(word)
|
153
|
+
return "" if word == ""
|
154
|
+
if result = plural_of[word]
|
155
|
+
return result.dup
|
156
|
+
end
|
157
|
+
#return self.dup if /s$/ =~ self # ???
|
158
|
+
result = word.dup
|
159
|
+
|
160
|
+
regex, hash = pluralization_rules
|
161
|
+
result.sub!(regex) {|m| hash[m]}
|
162
|
+
plural_of[word] = result
|
163
|
+
return result
|
164
|
+
#pluralization_rules.each do |(match, replacement)|
|
165
|
+
# break if result.gsub!(match, replacement)
|
166
|
+
#end
|
167
|
+
#return result
|
168
|
+
end
|
169
|
+
|
170
|
+
# Alias for #plural (a Railism).
|
171
|
+
alias_method(:pluralize, :plural)
|
172
|
+
|
173
|
+
# Clear all rules.
|
174
|
+
def clear(type = :all)
|
175
|
+
if type == :singular || type == :all
|
176
|
+
@singular_of = {}
|
177
|
+
@singular_rules = []
|
178
|
+
@singularization_rules, @singularization_regex = nil, nil
|
179
|
+
end
|
180
|
+
if type == :plural || type == :all
|
181
|
+
@singular_of = {}
|
182
|
+
@singular_rules = []
|
183
|
+
@singularization_rules, @singularization_regex = nil, nil
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# One argument means singular and plural are the same.
|
189
|
+
|
190
|
+
word 'equipment'
|
191
|
+
word 'information'
|
192
|
+
word 'money'
|
193
|
+
word 'species'
|
194
|
+
word 'series'
|
195
|
+
word 'fish'
|
196
|
+
word 'sheep'
|
197
|
+
word 'moose'
|
198
|
+
word 'hovercraft'
|
199
|
+
word 'news'
|
200
|
+
word 'rice'
|
201
|
+
word 'plurals'
|
202
|
+
|
203
|
+
# Two arguments defines a singular and plural exception.
|
204
|
+
|
205
|
+
word 'Swiss' , 'Swiss'
|
206
|
+
word 'alias' , 'aliases'
|
207
|
+
word 'analysis' , 'analyses'
|
208
|
+
#word 'axis' , 'axes'
|
209
|
+
word 'basis' , 'bases'
|
210
|
+
word 'buffalo' , 'buffaloes'
|
211
|
+
word 'child' , 'children'
|
212
|
+
#word 'cow' , 'kine'
|
213
|
+
word 'crisis' , 'crises'
|
214
|
+
word 'criterion' , 'criteria'
|
215
|
+
word 'datum' , 'data'
|
216
|
+
word 'goose' , 'geese'
|
217
|
+
word 'hive' , 'hives'
|
218
|
+
word 'index' , 'indices'
|
219
|
+
word 'life' , 'lives'
|
220
|
+
word 'louse' , 'lice'
|
221
|
+
word 'man' , 'men'
|
222
|
+
word 'matrix' , 'matrices'
|
223
|
+
word 'medium' , 'media'
|
224
|
+
word 'mouse' , 'mice'
|
225
|
+
word 'movie' , 'movies'
|
226
|
+
word 'octopus' , 'octopi'
|
227
|
+
word 'ox' , 'oxen'
|
228
|
+
word 'person' , 'people'
|
229
|
+
word 'potato' , 'potatoes'
|
230
|
+
word 'quiz' , 'quizzes'
|
231
|
+
word 'shoe' , 'shoes'
|
232
|
+
word 'status' , 'statuses'
|
233
|
+
word 'testis' , 'testes'
|
234
|
+
word 'thesis' , 'theses'
|
235
|
+
word 'thief' , 'thieves'
|
236
|
+
word 'tomato' , 'tomatoes'
|
237
|
+
word 'torpedo' , 'torpedoes'
|
238
|
+
word 'vertex' , 'vertices'
|
239
|
+
word 'virus' , 'viri'
|
240
|
+
word 'wife' , 'wives'
|
241
|
+
|
242
|
+
# One-way singularization exception (convert plural to singular).
|
243
|
+
|
244
|
+
singular_word 'cactus', 'cacti'
|
245
|
+
|
246
|
+
# One-way pluralizaton exception (convert singular to plural).
|
247
|
+
|
248
|
+
plural_word 'axis', 'axes'
|
249
|
+
|
250
|
+
# General rules.
|
251
|
+
|
252
|
+
rule 'rf' , 'rves'
|
253
|
+
rule 'ero' , 'eroes'
|
254
|
+
rule 'ch' , 'ches'
|
255
|
+
rule 'sh' , 'shes'
|
256
|
+
rule 'ss' , 'sses'
|
257
|
+
#rule 'ess' , 'esses'
|
258
|
+
rule 'ta' , 'tum'
|
259
|
+
rule 'ia' , 'ium'
|
260
|
+
rule 'ra' , 'rum'
|
261
|
+
rule 'ay' , 'ays'
|
262
|
+
rule 'ey' , 'eys'
|
263
|
+
rule 'oy' , 'oys'
|
264
|
+
rule 'uy' , 'uys'
|
265
|
+
rule 'y' , 'ies'
|
266
|
+
rule 'x' , 'xes'
|
267
|
+
rule 'lf' , 'lves'
|
268
|
+
rule 'ffe' , 'ffes'
|
269
|
+
rule 'af' , 'aves'
|
270
|
+
rule 'us' , 'uses'
|
271
|
+
rule 'ouse' , 'ouses'
|
272
|
+
rule 'osis' , 'oses'
|
273
|
+
rule 'ox' , 'oxes'
|
274
|
+
rule '' , 's'
|
275
|
+
|
276
|
+
# One-way singular rules.
|
277
|
+
|
278
|
+
singular_rule 'of' , 'ofs' # proof
|
279
|
+
singular_rule 'o' , 'oes' # hero, heroes
|
280
|
+
#singular_rule 'f' , 'ves'
|
281
|
+
|
282
|
+
# One-way plural rules.
|
283
|
+
|
284
|
+
plural_rule 's' , 'ses'
|
285
|
+
plural_rule 'ive' , 'ives' # don't want to snag wife
|
286
|
+
plural_rule 'fe' , 'ves' # don't want to snag perspectives
|
287
|
+
|
288
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# This class is a wrapper for the functions included
|
2
|
+
# in the 'linguistics' gem that allow to obtain the
|
3
|
+
# declensions of a word.
|
4
|
+
#
|
5
|
+
# Project website: http://deveiate.org/projects/Linguistics/
|
6
|
+
class Treat::Inflectors::Declensors::Linguistics
|
7
|
+
|
8
|
+
require 'treat/loaders/linguistics'
|
9
|
+
|
10
|
+
# Retrieve a declension of a word using the 'linguistics' gem.
|
11
|
+
#
|
12
|
+
# Options:
|
13
|
+
#
|
14
|
+
# - (Identifier) :count => :singular, :plural
|
15
|
+
def self.declense(entity, options = {})
|
16
|
+
|
17
|
+
cat = entity.check_has(:category)
|
18
|
+
unless [:noun, :adjective, :determiner].
|
19
|
+
include?(cat)
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
unless options[:count]
|
24
|
+
raise Treat::Exception,
|
25
|
+
"Must supply option count (:singular or :plural)."
|
26
|
+
end
|
27
|
+
|
28
|
+
klass = Treat::Loaders::Linguistics.load(entity.language)
|
29
|
+
string = entity.to_s
|
30
|
+
|
31
|
+
if options[:count] == :plural
|
32
|
+
|
33
|
+
if entity.has?(:category) &&
|
34
|
+
[:noun, :adjective, :verb].
|
35
|
+
include?(entity.category)
|
36
|
+
silence_warnings do
|
37
|
+
klass.send(
|
38
|
+
:"plural_#{entity.category}",
|
39
|
+
string)
|
40
|
+
end
|
41
|
+
else
|
42
|
+
klass.plural(string)
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|