treat 0.2.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +3 -3
- data/README.md +33 -0
- data/files/INFO +1 -0
- data/lib/treat.rb +40 -105
- data/lib/treat/ai.rb +12 -0
- data/lib/treat/ai/classifiers/id3.rb +27 -0
- data/lib/treat/categories.rb +82 -35
- data/lib/treat/categorizable.rb +44 -0
- data/lib/treat/classification.rb +61 -0
- data/lib/treat/configurable.rb +115 -0
- data/lib/treat/data_set.rb +42 -0
- data/lib/treat/dependencies.rb +24 -0
- data/lib/treat/downloader.rb +87 -0
- data/lib/treat/entities.rb +68 -66
- data/lib/treat/entities/abilities.rb +10 -0
- data/lib/treat/entities/abilities/buildable.rb +327 -0
- data/lib/treat/entities/abilities/checkable.rb +31 -0
- data/lib/treat/entities/abilities/copyable.rb +45 -0
- data/lib/treat/entities/abilities/countable.rb +51 -0
- data/lib/treat/entities/abilities/debuggable.rb +83 -0
- data/lib/treat/entities/abilities/delegatable.rb +123 -0
- data/lib/treat/entities/abilities/doable.rb +62 -0
- data/lib/treat/entities/abilities/exportable.rb +11 -0
- data/lib/treat/entities/abilities/iterable.rb +115 -0
- data/lib/treat/entities/abilities/magical.rb +83 -0
- data/lib/treat/entities/abilities/registrable.rb +74 -0
- data/lib/treat/entities/abilities/stringable.rb +91 -0
- data/lib/treat/entities/entities.rb +104 -0
- data/lib/treat/entities/entity.rb +122 -245
- data/lib/treat/exception.rb +4 -4
- data/lib/treat/extractors.rb +77 -80
- data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
- data/lib/treat/extractors/language/what_language.rb +50 -45
- data/lib/treat/extractors/name_tag/stanford.rb +55 -0
- data/lib/treat/extractors/tf_idf/native.rb +87 -0
- data/lib/treat/extractors/time/chronic.rb +55 -0
- data/lib/treat/extractors/time/nickel.rb +86 -62
- data/lib/treat/extractors/time/ruby.rb +53 -0
- data/lib/treat/extractors/topic_words/lda.rb +67 -58
- data/lib/treat/extractors/topics/reuters.rb +100 -87
- data/lib/treat/formatters.rb +39 -35
- data/lib/treat/formatters/readers/abw.rb +49 -29
- data/lib/treat/formatters/readers/autoselect.rb +37 -33
- data/lib/treat/formatters/readers/doc.rb +19 -13
- data/lib/treat/formatters/readers/html.rb +52 -30
- data/lib/treat/formatters/readers/image.rb +41 -40
- data/lib/treat/formatters/readers/odt.rb +59 -45
- data/lib/treat/formatters/readers/pdf.rb +28 -25
- data/lib/treat/formatters/readers/txt.rb +12 -15
- data/lib/treat/formatters/readers/xml.rb +73 -36
- data/lib/treat/formatters/serializers/xml.rb +80 -79
- data/lib/treat/formatters/serializers/yaml.rb +19 -18
- data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
- data/lib/treat/formatters/unserializers/xml.rb +94 -99
- data/lib/treat/formatters/unserializers/yaml.rb +20 -19
- data/lib/treat/formatters/visualizers/dot.rb +132 -132
- data/lib/treat/formatters/visualizers/standoff.rb +52 -44
- data/lib/treat/formatters/visualizers/tree.rb +26 -29
- data/lib/treat/groupable.rb +153 -0
- data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
- data/lib/treat/inflectors.rb +50 -45
- data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
- data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
- data/lib/treat/inflectors/declensors/active_support.rb +31 -0
- data/lib/treat/inflectors/declensors/english.rb +38 -0
- data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
- data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
- data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
- data/lib/treat/inflectors/stemmers/porter.rb +160 -0
- data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
- data/lib/treat/inflectors/stemmers/uea.rb +28 -0
- data/lib/treat/installer.rb +308 -0
- data/lib/treat/kernel.rb +105 -27
- data/lib/treat/languages.rb +122 -88
- data/lib/treat/languages/arabic.rb +15 -15
- data/lib/treat/languages/chinese.rb +15 -15
- data/lib/treat/languages/dutch.rb +15 -15
- data/lib/treat/languages/english.rb +61 -62
- data/lib/treat/languages/french.rb +19 -19
- data/lib/treat/languages/german.rb +20 -20
- data/lib/treat/languages/greek.rb +15 -15
- data/lib/treat/languages/italian.rb +16 -16
- data/lib/treat/languages/polish.rb +15 -15
- data/lib/treat/languages/portuguese.rb +15 -15
- data/lib/treat/languages/russian.rb +15 -15
- data/lib/treat/languages/spanish.rb +16 -16
- data/lib/treat/languages/swedish.rb +16 -16
- data/lib/treat/lexicalizers.rb +34 -55
- data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
- data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
- data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
- data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
- data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
- data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
- data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
- data/lib/treat/linguistics.rb +9 -0
- data/lib/treat/linguistics/categories.rb +11 -0
- data/lib/treat/linguistics/tags.rb +422 -0
- data/lib/treat/loaders/linguistics.rb +30 -0
- data/lib/treat/loaders/stanford.rb +27 -0
- data/lib/treat/object.rb +1 -0
- data/lib/treat/processors.rb +37 -44
- data/lib/treat/processors/chunkers/autoselect.rb +16 -0
- data/lib/treat/processors/chunkers/html.rb +71 -0
- data/lib/treat/processors/chunkers/txt.rb +18 -24
- data/lib/treat/processors/parsers/enju.rb +253 -208
- data/lib/treat/processors/parsers/stanford.rb +130 -131
- data/lib/treat/processors/segmenters/punkt.rb +79 -45
- data/lib/treat/processors/segmenters/stanford.rb +46 -48
- data/lib/treat/processors/segmenters/tactful.rb +43 -36
- data/lib/treat/processors/tokenizers/perl.rb +124 -92
- data/lib/treat/processors/tokenizers/ptb.rb +81 -0
- data/lib/treat/processors/tokenizers/punkt.rb +48 -42
- data/lib/treat/processors/tokenizers/stanford.rb +39 -38
- data/lib/treat/processors/tokenizers/tactful.rb +64 -55
- data/lib/treat/proxies.rb +52 -35
- data/lib/treat/retrievers.rb +26 -16
- data/lib/treat/retrievers/indexers/ferret.rb +47 -26
- data/lib/treat/retrievers/searchers/ferret.rb +69 -50
- data/lib/treat/tree.rb +241 -183
- data/spec/collection.rb +123 -0
- data/spec/document.rb +93 -0
- data/spec/entity.rb +408 -0
- data/spec/languages.rb +25 -0
- data/spec/phrase.rb +146 -0
- data/spec/samples/mathematicians/archimedes.abw +34 -0
- data/spec/samples/mathematicians/euler.html +21 -0
- data/spec/samples/mathematicians/gauss.pdf +0 -0
- data/spec/samples/mathematicians/leibniz.txt +13 -0
- data/spec/samples/mathematicians/newton.doc +0 -0
- data/spec/sandbox.rb +5 -0
- data/spec/token.rb +109 -0
- data/spec/treat.rb +52 -0
- data/spec/tree.rb +117 -0
- data/spec/word.rb +110 -0
- data/spec/zone.rb +66 -0
- data/tmp/INFO +1 -1
- metadata +100 -201
- data/INSTALL +0 -1
- data/README +0 -3
- data/TODO +0 -28
- data/lib/economist/half_cocked_basel.txt +0 -16
- data/lib/economist/hungarys_troubles.txt +0 -46
- data/lib/economist/indias_slowdown.txt +0 -15
- data/lib/economist/merkozy_rides_again.txt +0 -24
- data/lib/economist/prada_is_not_walmart.txt +0 -9
- data/lib/economist/to_infinity_and_beyond.txt +0 -15
- data/lib/ferret/_11.cfs +0 -0
- data/lib/ferret/_14.cfs +0 -0
- data/lib/ferret/_p.cfs +0 -0
- data/lib/ferret/_s.cfs +0 -0
- data/lib/ferret/_v.cfs +0 -0
- data/lib/ferret/_y.cfs +0 -0
- data/lib/ferret/segments +0 -0
- data/lib/ferret/segments_15 +0 -0
- data/lib/treat/buildable.rb +0 -157
- data/lib/treat/category.rb +0 -33
- data/lib/treat/delegatable.rb +0 -116
- data/lib/treat/doable.rb +0 -45
- data/lib/treat/entities/collection.rb +0 -14
- data/lib/treat/entities/document.rb +0 -12
- data/lib/treat/entities/phrases.rb +0 -17
- data/lib/treat/entities/tokens.rb +0 -61
- data/lib/treat/entities/zones.rb +0 -41
- data/lib/treat/extractors/coreferences/stanford.rb +0 -69
- data/lib/treat/extractors/date/chronic.rb +0 -32
- data/lib/treat/extractors/date/ruby.rb +0 -25
- data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
- data/lib/treat/extractors/language/language_extractor.rb +0 -27
- data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
- data/lib/treat/extractors/roles/naive.rb +0 -73
- data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
- data/lib/treat/extractors/statistics/position_in.rb +0 -14
- data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
- data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
- data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
- data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
- data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
- data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
- data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
- data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
- data/lib/treat/feature.rb +0 -58
- data/lib/treat/features.rb +0 -7
- data/lib/treat/formatters/visualizers/short_value.rb +0 -29
- data/lib/treat/formatters/visualizers/txt.rb +0 -45
- data/lib/treat/group.rb +0 -106
- data/lib/treat/helpers/linguistics_loader.rb +0 -18
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
- data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
- data/lib/treat/inflectors/declensions/english.rb +0 -319
- data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
- data/lib/treat/inflectors/stem/porter.rb +0 -162
- data/lib/treat/inflectors/stem/porter_c.rb +0 -26
- data/lib/treat/inflectors/stem/uea.rb +0 -30
- data/lib/treat/install.rb +0 -59
- data/lib/treat/languages/tags.rb +0 -377
- data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
- data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
- data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
- data/lib/treat/lexicalizers/tag/brill.rb +0 -91
- data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
- data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
- data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
- data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
- data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
- data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
- data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
- data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
- data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
- data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
- data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
- data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
- data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
- data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
- data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
- data/lib/treat/registrable.rb +0 -28
- data/lib/treat/sugar.rb +0 -50
- data/lib/treat/viewable.rb +0 -29
- data/lib/treat/visitable.rb +0 -28
- data/test/profile.rb +0 -2
- data/test/tc_entity.rb +0 -117
- data/test/tc_extractors.rb +0 -73
- data/test/tc_formatters.rb +0 -41
- data/test/tc_inflectors.rb +0 -34
- data/test/tc_lexicalizers.rb +0 -32
- data/test/tc_processors.rb +0 -50
- data/test/tc_resources.rb +0 -22
- data/test/tc_treat.rb +0 -60
- data/test/tc_tree.rb +0 -60
- data/test/tests.rb +0 -20
- data/test/texts.rb +0 -19
- data/test/texts/english/half_cocked_basel.txt +0 -16
- data/test/texts/english/hose_and_dry.doc +0 -0
- data/test/texts/english/hungarys_troubles.abw +0 -70
- data/test/texts/english/long.html +0 -24
- data/test/texts/english/long.txt +0 -22
- data/test/texts/english/medium.txt +0 -5
- data/test/texts/english/republican_nomination.pdf +0 -0
- data/test/texts/english/saving_the_euro.odt +0 -0
- data/test/texts/english/short.txt +0 -3
- data/test/texts/english/zero_sum.html +0 -111
@@ -0,0 +1,31 @@
|
|
1
|
+
# This module implements methods that are used
|
2
|
+
# by workers to determine if an entity is properly
|
3
|
+
# formatted before working on it.
|
4
|
+
module Treat::Entities::Abilities::Checkable
|
5
|
+
|
6
|
+
# Check if the entity has the given feature,
|
7
|
+
# and if so return it. If not, calculate the
|
8
|
+
# requested feature if do_it is set to true,
|
9
|
+
# or raise an exception if do_it is set to false.
|
10
|
+
def check_has(feature, do_it = true)
|
11
|
+
return @features[feature] if has?(feature)
|
12
|
+
return send(feature) if do_it
|
13
|
+
task = caller_method(2) # This is dangerous !
|
14
|
+
g1 = Treat::Categories.lookup(task)
|
15
|
+
g2 = Treat::Categories.lookup(feature)
|
16
|
+
|
17
|
+
raise Treat::Exception,
|
18
|
+
"#{g1.type.to_s.capitalize} #{task} " +
|
19
|
+
"requires #{g2.type} #{g2.method}."
|
20
|
+
end
|
21
|
+
|
22
|
+
# Raises an error if the entity has children.
|
23
|
+
def check_hasnt_children
|
24
|
+
return unless has_children?
|
25
|
+
raise Treat::Exception,
|
26
|
+
"Warning: can't #{caller_method(2)} "+
|
27
|
+
"an entity that has children. Removing " +
|
28
|
+
" all children of text \"[#{short_value}].\""
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Treat::Entities::Abilities::Copyable
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
def copy_into(collection)
|
6
|
+
unless collection.is_a?(
|
7
|
+
Treat::Entities::Collection)
|
8
|
+
raise Treat::Exception,
|
9
|
+
"Cannot copy an entity into " +
|
10
|
+
"something else than a collection."
|
11
|
+
end
|
12
|
+
if type == :document
|
13
|
+
copy_document_into(collection)
|
14
|
+
elsif type == :collection
|
15
|
+
copy_collection_into(collection)
|
16
|
+
else
|
17
|
+
raise Treat::Exception,
|
18
|
+
"Can only copy a document " +
|
19
|
+
"or collection into a collection."
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def copy_collection_into(collection)
|
24
|
+
copy = dup
|
25
|
+
f = File.dirname(folder)
|
26
|
+
f = f.split(File::SEPARATOR)[-1]
|
27
|
+
f = File.join(collection.folder, f)
|
28
|
+
FileUtils.mkdir(f) unless
|
29
|
+
FileTest.directory(f)
|
30
|
+
FileUtils.cp_r(folder, f)
|
31
|
+
copy.set :folder, f
|
32
|
+
copy
|
33
|
+
end
|
34
|
+
|
35
|
+
def copy_document_into(collection)
|
36
|
+
copy = dup
|
37
|
+
return copy unless file
|
38
|
+
f = File.basename(file)
|
39
|
+
f = File.join(collection.folder, f)
|
40
|
+
FileUtils.cp(file, f)
|
41
|
+
copy.set :file, f
|
42
|
+
copy
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Treat::Entities::Abilities::Countable
|
2
|
+
|
3
|
+
# Find the position of the current entity
|
4
|
+
# inside the parent entity, starting at 1.
|
5
|
+
def position
|
6
|
+
|
7
|
+
unless has_parent?
|
8
|
+
raise Treat::Exception,
|
9
|
+
"No parent to get position in."
|
10
|
+
end
|
11
|
+
|
12
|
+
parent.children.index(self) + 1
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
# Find the frequency of the entity in
|
17
|
+
# the supplied parent or in the root
|
18
|
+
# node if nil.
|
19
|
+
def frequency_in(parent_type = nil)
|
20
|
+
|
21
|
+
unless parent_type
|
22
|
+
root.registry[:value][id]
|
23
|
+
end
|
24
|
+
|
25
|
+
registry(parent_type)[:value][value]
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
# Get the frequency of this entity's
|
30
|
+
# value in the root node.
|
31
|
+
alias :frequency :frequency_in
|
32
|
+
|
33
|
+
# Get the number of children with a type
|
34
|
+
# in this entity.
|
35
|
+
def count(type)
|
36
|
+
@registry[:type][type].size
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns the frequency of the given value
|
40
|
+
# in the this entity.
|
41
|
+
def frequency_of(value)
|
42
|
+
if is_a?(Treat::Entities::Token)
|
43
|
+
raise Treat::Exception,
|
44
|
+
"Cannot get the frequency " +
|
45
|
+
"of something within a leaf."
|
46
|
+
end
|
47
|
+
tv = @registry[:value][value]
|
48
|
+
tv ? tv : 0
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# When Treat.debug is set to true, each call to
|
2
|
+
# #call_worker will result in a debug message being
|
3
|
+
# printed by the #print_debug function.
|
4
|
+
module Treat::Entities::Abilities::Debuggable
|
5
|
+
|
6
|
+
@@prev = nil
|
7
|
+
@@i = 0
|
8
|
+
|
9
|
+
# Explains what Treat is currently doing.
|
10
|
+
def print_debug(entity, task, worker, group, options)
|
11
|
+
|
12
|
+
targs = group.targets.map do |target|
|
13
|
+
target.to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
if targs.size == 1
|
17
|
+
t = targs[0]
|
18
|
+
else
|
19
|
+
t = targs[0..-2].join(', ') +
|
20
|
+
' and/or ' + targs[-1]
|
21
|
+
end
|
22
|
+
|
23
|
+
genitive = targs.size > 1 ?
|
24
|
+
'their' : 'its'
|
25
|
+
|
26
|
+
doing = ''
|
27
|
+
|
28
|
+
human_task = task.to_s.gsub('_', ' ')
|
29
|
+
|
30
|
+
if group.type == :transformer ||
|
31
|
+
group.type == :computer
|
32
|
+
|
33
|
+
tt = human_task
|
34
|
+
tt = tt[0..-2] if tt[-1] == 'e'
|
35
|
+
ed = tt[-1] == 'd' ? '' : 'ed'
|
36
|
+
doing = "#{tt.capitalize}#{ed} #{t}"
|
37
|
+
|
38
|
+
elsif group.type == :annotator
|
39
|
+
|
40
|
+
if group.preset_option
|
41
|
+
opt = options[group.preset_option]
|
42
|
+
form = opt.to_s.gsub('_', ' ')
|
43
|
+
human_task[-1] = ''
|
44
|
+
human_task = form + ' ' + human_task
|
45
|
+
end
|
46
|
+
|
47
|
+
doing = "Annotated #{t} with " +
|
48
|
+
"#{genitive} #{human_task}"
|
49
|
+
end
|
50
|
+
|
51
|
+
if group.to_s.index('Formatters')
|
52
|
+
curr = doing +
|
53
|
+
' in format ' +
|
54
|
+
worker.to_s
|
55
|
+
else
|
56
|
+
curr = doing +
|
57
|
+
' using ' +
|
58
|
+
worker.to_s.gsub('_', ' ')
|
59
|
+
end
|
60
|
+
|
61
|
+
curr.gsub!('ss', 's')
|
62
|
+
curr += '.'
|
63
|
+
|
64
|
+
if curr == @@prev
|
65
|
+
@@i += 1
|
66
|
+
else
|
67
|
+
if @@i > 1
|
68
|
+
Treat::Entities.list.each do |e|
|
69
|
+
@@prev.gsub!(e.to_s, e.to_s + 's')
|
70
|
+
end
|
71
|
+
@@prev.gsub!('its', 'their')
|
72
|
+
@@prev = @@prev.split(' ').
|
73
|
+
insert(1, @@i.to_s).join(' ')
|
74
|
+
end
|
75
|
+
@@i = 0
|
76
|
+
puts @@prev # Last call doesn't get shown.
|
77
|
+
end
|
78
|
+
|
79
|
+
@@prev = curr
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# Makes a class delegatable, allowing calls
|
2
|
+
# on it to be forwarded to a worker class
|
3
|
+
# able to perform the appropriate task.
|
4
|
+
module Treat::Entities::Abilities::Delegatable
|
5
|
+
|
6
|
+
# Add preset methods to an entity class.
|
7
|
+
def add_presets(group)
|
8
|
+
|
9
|
+
opt = group.preset_option
|
10
|
+
return unless opt
|
11
|
+
|
12
|
+
group.presets.each do |preset|
|
13
|
+
define_method(preset) do |worker=nil, options={}|
|
14
|
+
return get(preset) if has?(preset)
|
15
|
+
options = {opt => preset}.merge(options)
|
16
|
+
m = group.method
|
17
|
+
send(m, worker, options)
|
18
|
+
f = unset(m)
|
19
|
+
features[preset] = f if f
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
# Add the workers to perform a task on an entity class.
|
26
|
+
def add_workers(group)
|
27
|
+
|
28
|
+
self.class_eval do
|
29
|
+
task = group.method
|
30
|
+
add_presets(group)
|
31
|
+
define_method(task) do |worker=nil, options={}|
|
32
|
+
if worker.is_a?(Hash)
|
33
|
+
options, worker =
|
34
|
+
worker, nil
|
35
|
+
end
|
36
|
+
if !@features[task].nil?
|
37
|
+
@features[task]
|
38
|
+
else
|
39
|
+
self.class.call_worker(
|
40
|
+
self, task, worker,
|
41
|
+
group, options
|
42
|
+
)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
# Ask a worker found in the given group to perform
|
50
|
+
# a task on the entity with the supplied options.
|
51
|
+
def call_worker(entity, task, worker, group, options)
|
52
|
+
|
53
|
+
if worker.nil? || worker == :default
|
54
|
+
worker = find_worker(entity, group)
|
55
|
+
end
|
56
|
+
|
57
|
+
print_debug(entity, task, worker,
|
58
|
+
group, options) if Treat.debug
|
59
|
+
|
60
|
+
if not group.list.include?(worker)
|
61
|
+
raise Treat::Exception,
|
62
|
+
worker_not_found(worker, group)
|
63
|
+
else
|
64
|
+
|
65
|
+
worker = group.const_get(
|
66
|
+
cc(worker.to_s).intern
|
67
|
+
)
|
68
|
+
|
69
|
+
result = worker.send(group.method, entity, options)
|
70
|
+
|
71
|
+
if group.type == :annotator && result
|
72
|
+
entity.features[task] = result
|
73
|
+
end
|
74
|
+
|
75
|
+
if group.type == :transformer
|
76
|
+
entity
|
77
|
+
else
|
78
|
+
result
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Find which worker to use if none has been supplied.
|
85
|
+
def find_worker(entity, group)
|
86
|
+
group.default.nil? ?
|
87
|
+
self.find_worker_for_language(
|
88
|
+
entity.language, group) :
|
89
|
+
group.default
|
90
|
+
end
|
91
|
+
|
92
|
+
# Get the default worker for that language
|
93
|
+
# inside the given group.
|
94
|
+
def find_worker_for_language(language, group)
|
95
|
+
|
96
|
+
lang = Treat::Languages.describe(language)
|
97
|
+
klass = cc(lang).intern
|
98
|
+
lclass = Treat::Languages.const_get(klass)
|
99
|
+
cat = group.to_s.split('::')[-2].intern
|
100
|
+
klass = lclass.const_get(cat)
|
101
|
+
|
102
|
+
g = ucc(cl(group)).intern
|
103
|
+
|
104
|
+
if !klass[g] || !klass[g][0]
|
105
|
+
d = ucc(cl(group))
|
106
|
+
d.gsub!('_', ' ')
|
107
|
+
d = 'worker to find "' + d
|
108
|
+
raise Treat::Exception, "No #{d}" +
|
109
|
+
"\" is available for the " +
|
110
|
+
"#{lang.to_s.capitalize} language."
|
111
|
+
end
|
112
|
+
return klass[g][0]
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
# Return an error message and suggest possible typos.
|
117
|
+
def worker_not_found(klass, group)
|
118
|
+
"Algorithm '#{ucc(cl(klass))}' couldn't be "+
|
119
|
+
"found in group #{group}." + did_you_mean?(
|
120
|
+
group.list.map { |c| ucc(c) }, ucc(klass))
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Implement support for the functions #do and #do_task.
|
2
|
+
module Treat::Entities::Abilities::Doable
|
3
|
+
|
4
|
+
# Perform the supplied tasks on the entity.
|
5
|
+
def do(*tasks)
|
6
|
+
tasks.each do |task|
|
7
|
+
|
8
|
+
if task.is_a?(Hash)
|
9
|
+
|
10
|
+
task.each do |k,v|
|
11
|
+
t, w = k, v
|
12
|
+
w, o = *w if w.is_a?(Array)
|
13
|
+
o ||= {}
|
14
|
+
do_task(t, w, o)
|
15
|
+
end
|
16
|
+
else
|
17
|
+
|
18
|
+
t = task.is_a?(Array) ? task[0] : task
|
19
|
+
w = task.is_a?(Array) ? task[1] : nil
|
20
|
+
w, o = *w if w.is_a?(Array)
|
21
|
+
o ||= {}
|
22
|
+
do_task(t, w, o)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
29
|
+
# Perform an individual task on an entity
|
30
|
+
# given a worker and options to pass to it.
|
31
|
+
def do_task(task, worker, options, group = nil)
|
32
|
+
group ||= get_group(task)
|
33
|
+
entity_types = group.targets
|
34
|
+
f = nil
|
35
|
+
entity_types.each do |t|
|
36
|
+
f = true if Treat::Entities.match_types[t][type]
|
37
|
+
end
|
38
|
+
if f || entity_types.include?(:entity)
|
39
|
+
send(task, worker, options)
|
40
|
+
else
|
41
|
+
each_entity(*entity_types) do |entity|
|
42
|
+
entity.do_task(task, worker, options, group)
|
43
|
+
end
|
44
|
+
unless entity_types.include?(type)
|
45
|
+
features.delete(task)
|
46
|
+
end
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Get the group of a task.
|
52
|
+
def get_group(task)
|
53
|
+
g = Treat::Categories.lookup(task)
|
54
|
+
unless g
|
55
|
+
raise Treat::Exception,
|
56
|
+
"Task #{task} does not exist."
|
57
|
+
end
|
58
|
+
g
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
module Treat::Entities::Abilities::Iterable
|
2
|
+
|
3
|
+
# Yields each entity of any of the supplied
|
4
|
+
# types in the children tree of this Entity.
|
5
|
+
# Note that this function is recursive, unlike
|
6
|
+
# #each. It does not yield the top element being
|
7
|
+
# recursed.
|
8
|
+
#
|
9
|
+
# This function NEEDS to be ported to C.
|
10
|
+
def each_entity(*types)
|
11
|
+
types = [:entity] if types.size == 0
|
12
|
+
f = false
|
13
|
+
types.each do |t2|
|
14
|
+
if Treat::Entities.match_types[t2][type]
|
15
|
+
f = true; break
|
16
|
+
end
|
17
|
+
end
|
18
|
+
yield self if f
|
19
|
+
unless @children.size == 0
|
20
|
+
# return unless contains_types?(types)
|
21
|
+
@children.each do |child|
|
22
|
+
child.each_entity(*types) { |y| yield y }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns an array of the children that have a feature
|
28
|
+
# equal to value within the entities of the given type.
|
29
|
+
def entities_with_feature(feature, value, type = nil)
|
30
|
+
a = []
|
31
|
+
type = :entity unless type
|
32
|
+
each_entity(type) do |e|
|
33
|
+
a << e if (e.has?(feature) &&
|
34
|
+
e.features[feature] == value) ||
|
35
|
+
([:id, :value, :type].include?(feature) &&
|
36
|
+
e.send(feature) == value)
|
37
|
+
end
|
38
|
+
a
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns an array of the children that have a type
|
42
|
+
# within the supplied types.
|
43
|
+
def entities_with_types(*types)
|
44
|
+
a = []
|
45
|
+
each_entity(*types) { |e| a << e }
|
46
|
+
a
|
47
|
+
end
|
48
|
+
|
49
|
+
alias :entities_with_type :entities_with_types
|
50
|
+
|
51
|
+
# Returns an array of the entities with the given
|
52
|
+
# category.
|
53
|
+
def entities_with_category(category, type = nil)
|
54
|
+
entities_with_feature(:category, type)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Returns the first ancestor of this entity
|
58
|
+
# that has the given type.
|
59
|
+
def ancestor_with_types(*types)
|
60
|
+
ancestor = @parent
|
61
|
+
match_types = lambda do |t1, t2|
|
62
|
+
f = false
|
63
|
+
types.each do |t2|
|
64
|
+
if Treat::Entities.match_types[t2][t1]
|
65
|
+
f = true; break
|
66
|
+
end
|
67
|
+
end
|
68
|
+
f
|
69
|
+
end
|
70
|
+
if ancestor
|
71
|
+
while not match_types.call(ancestor.type, type)
|
72
|
+
return nil unless (ancestor && ancestor.has_parent?)
|
73
|
+
ancestor = ancestor.parent
|
74
|
+
end
|
75
|
+
match_types.call(ancestor.type, types) ? ancestor : nil
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
alias :ancestor_with_type :ancestor_with_types
|
80
|
+
|
81
|
+
# Yields each ancestors of this entity that
|
82
|
+
# has one of the the given types. May skip levels.
|
83
|
+
def each_ancestor(*types)
|
84
|
+
types = [:entity] if types.empty?
|
85
|
+
ancestor = self
|
86
|
+
while (a = ancestor.ancestor_with_types(*types))
|
87
|
+
yield a
|
88
|
+
ancestor = ancestor.parent
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns an array of ancestors of this entity that
|
93
|
+
# has one of the the given types. May skip levels.
|
94
|
+
def ancestors_with_types(*types)
|
95
|
+
as = []
|
96
|
+
each_ancestor(*types) { |a| as << a }
|
97
|
+
as
|
98
|
+
end
|
99
|
+
|
100
|
+
alias :ancestors_with_type :ancestors_with_types
|
101
|
+
|
102
|
+
# Return the first element in the array, warning if not
|
103
|
+
# the only one in the array. Used for magic methods: e.g.,
|
104
|
+
# the magic method "word" if called on a sentence with many
|
105
|
+
# words, Treat will return the first word, but warn the user.
|
106
|
+
def first_but_warn(array, type)
|
107
|
+
if array.size > 1
|
108
|
+
warn "Warning: requested one #{type}, but" +
|
109
|
+
" there are many #{type}s in this entity."
|
110
|
+
end
|
111
|
+
array[0]
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
end
|