treat 1.0.6 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +2 -4
- data/README.md +13 -12
- data/bin/MANIFEST +1 -0
- data/bin/stanford/bridge.jar +0 -0
- data/bin/stanford/joda-time.jar +0 -0
- data/bin/stanford/stanford-corenlp.jar +0 -0
- data/bin/stanford/stanford-parser.jar +0 -0
- data/bin/stanford/xom.jar +0 -0
- data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
- data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
- data/files/{INFO → MANIFEST} +0 -0
- data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
- data/files/weather-central-canada-heat-wave.html +1370 -0
- data/lib/treat/config/core/acronyms.rb +4 -0
- data/lib/treat/config/core/encodings.rb +8 -0
- data/lib/treat/config/core/entities.rb +2 -0
- data/lib/treat/config/core/language.rb +3 -0
- data/lib/treat/config/core/paths.rb +8 -0
- data/lib/treat/config/core/syntax.rb +1 -0
- data/lib/treat/config/core/verbosity.rb +1 -0
- data/lib/treat/config/databases/mongo.rb +3 -0
- data/lib/treat/config/languages/agnostic.rb +34 -0
- data/lib/treat/config/languages/arabic.rb +13 -0
- data/lib/treat/config/languages/chinese.rb +13 -0
- data/lib/treat/config/languages/dutch.rb +12 -0
- data/lib/treat/config/languages/english.rb +60 -0
- data/lib/treat/config/languages/french.rb +18 -0
- data/lib/treat/config/languages/german.rb +18 -0
- data/lib/treat/config/languages/greek.rb +12 -0
- data/lib/treat/config/languages/italian.rb +12 -0
- data/lib/treat/config/languages/polish.rb +12 -0
- data/lib/treat/config/languages/portuguese.rb +12 -0
- data/lib/treat/config/languages/russian.rb +12 -0
- data/lib/treat/config/languages/spanish.rb +12 -0
- data/lib/treat/config/languages/swedish.rb +12 -0
- data/lib/treat/config/libraries/stanford.rb +1 -0
- data/lib/treat/config/linguistics/categories.rb +4 -0
- data/lib/treat/config/linguistics/punctuation.rb +33 -0
- data/lib/treat/config/tags/aligned.rb +221 -0
- data/lib/treat/config/tags/enju.rb +71 -0
- data/lib/treat/config/tags/paris7.rb +17 -0
- data/lib/treat/config/tags/ptb.rb +15 -0
- data/lib/treat/config/workers/extractors.rb +39 -0
- data/lib/treat/config/workers/formatters.rb +20 -0
- data/lib/treat/config/workers/inflectors.rb +27 -0
- data/lib/treat/config/workers/learners.rb +6 -0
- data/lib/treat/config/workers/lexicalizers.rb +18 -0
- data/lib/treat/config/workers/list.rb +1 -0
- data/lib/treat/config/workers/processors.rb +19 -0
- data/lib/treat/config/workers/retrievers.rb +12 -0
- data/lib/treat/config.rb +125 -0
- data/lib/treat/{classification.rb → core/classification.rb} +1 -1
- data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
- data/lib/treat/{tree.rb → core/node.rb} +5 -5
- data/lib/treat/core/server.rb +3 -0
- data/lib/treat/core.rb +5 -0
- data/lib/treat/entities/abilities/buildable.rb +61 -56
- data/lib/treat/entities/abilities/checkable.rb +2 -2
- data/lib/treat/entities/abilities/comparable.rb +21 -0
- data/lib/treat/entities/abilities/copyable.rb +2 -0
- data/lib/treat/entities/abilities/countable.rb +1 -1
- data/lib/treat/entities/abilities/debuggable.rb +1 -1
- data/lib/treat/entities/abilities/delegatable.rb +42 -36
- data/lib/treat/entities/abilities/doable.rb +2 -2
- data/lib/treat/entities/abilities/exportable.rb +1 -1
- data/lib/treat/entities/abilities/iterable.rb +21 -33
- data/lib/treat/entities/abilities/magical.rb +8 -8
- data/lib/treat/entities/abilities/registrable.rb +0 -38
- data/lib/treat/entities/abilities/stringable.rb +19 -19
- data/lib/treat/entities/collection.rb +31 -0
- data/lib/treat/entities/document.rb +10 -0
- data/lib/treat/entities/entity.rb +18 -13
- data/lib/treat/entities/group.rb +15 -0
- data/lib/treat/entities/section.rb +13 -0
- data/lib/treat/entities/token.rb +35 -0
- data/lib/treat/entities/zone.rb +11 -0
- data/lib/treat/entities.rb +5 -75
- data/lib/treat/helpers/didyoumean.rb +57 -0
- data/lib/treat/helpers/escaping.rb +15 -0
- data/lib/treat/helpers/formatting.rb +41 -0
- data/lib/treat/helpers/platform.rb +15 -0
- data/lib/treat/helpers/reflection.rb +17 -0
- data/lib/treat/helpers/temporary.rb +27 -0
- data/lib/treat/helpers/verbosity.rb +19 -0
- data/lib/treat/helpers.rb +5 -0
- data/lib/treat/installer.rb +46 -165
- data/lib/treat/loaders/linguistics.rb +22 -27
- data/lib/treat/loaders/stanford.rb +23 -41
- data/lib/treat/loaders.rb +10 -0
- data/lib/treat/proxies.rb +73 -24
- data/lib/treat/version.rb +3 -0
- data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
- data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
- data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
- data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
- data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
- data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
- data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
- data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
- data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
- data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
- data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
- data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
- data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
- data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
- data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
- data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
- data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
- data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
- data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
- data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
- data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
- data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
- data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
- data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
- data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
- data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
- data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
- data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
- data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
- data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
- data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
- data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
- data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
- data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
- data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
- data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
- data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
- data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
- data/lib/treat/workers.rb +96 -0
- data/lib/treat.rb +23 -49
- data/spec/collection.rb +4 -4
- data/spec/document.rb +5 -5
- data/spec/entity.rb +33 -32
- data/spec/{tree.rb → node.rb} +5 -5
- data/spec/phrase.rb +5 -39
- data/spec/sandbox.rb +212 -6
- data/spec/token.rb +12 -9
- data/spec/treat.rb +12 -9
- data/spec/word.rb +10 -9
- data/spec/zone.rb +6 -2
- data/tmp/{INFO → MANIFEST} +0 -0
- data/tmp/english.yaml +10340 -0
- metadata +149 -139
- data/lib/treat/ai.rb +0 -12
- data/lib/treat/categories.rb +0 -90
- data/lib/treat/categorizable.rb +0 -44
- data/lib/treat/configurable.rb +0 -115
- data/lib/treat/dependencies.rb +0 -25
- data/lib/treat/downloader.rb +0 -87
- data/lib/treat/entities/abilities.rb +0 -10
- data/lib/treat/entities/entities.rb +0 -102
- data/lib/treat/exception.rb +0 -7
- data/lib/treat/extractors.rb +0 -79
- data/lib/treat/formatters/serializers/mongo.rb +0 -64
- data/lib/treat/formatters.rb +0 -41
- data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
- data/lib/treat/inflectors.rb +0 -52
- data/lib/treat/kernel.rb +0 -208
- data/lib/treat/languages/arabic.rb +0 -16
- data/lib/treat/languages/chinese.rb +0 -16
- data/lib/treat/languages/dutch.rb +0 -16
- data/lib/treat/languages/english.rb +0 -63
- data/lib/treat/languages/french.rb +0 -20
- data/lib/treat/languages/german.rb +0 -20
- data/lib/treat/languages/greek.rb +0 -16
- data/lib/treat/languages/italian.rb +0 -17
- data/lib/treat/languages/language.rb +0 -10
- data/lib/treat/languages/list.txt +0 -504
- data/lib/treat/languages/polish.rb +0 -16
- data/lib/treat/languages/portuguese.rb +0 -16
- data/lib/treat/languages/russian.rb +0 -16
- data/lib/treat/languages/spanish.rb +0 -16
- data/lib/treat/languages/swedish.rb +0 -16
- data/lib/treat/languages.rb +0 -132
- data/lib/treat/lexicalizers.rb +0 -37
- data/lib/treat/object.rb +0 -7
- data/lib/treat/processors/chunkers/autoselect.rb +0 -16
- data/lib/treat/processors/chunkers/txt.rb +0 -21
- data/lib/treat/processors.rb +0 -38
- data/lib/treat/retrievers.rb +0 -27
- data/lib/treat/server.rb +0 -26
- data/lib/treat/universalisation/encodings.rb +0 -12
- data/lib/treat/universalisation/tags.rb +0 -453
- data/lib/treat/universalisation.rb +0 -9
- data/spec/languages.rb +0 -25
@@ -1,16 +0,0 @@
|
|
1
|
-
class Treat::Languages::Russian
|
2
|
-
|
3
|
-
RequiredDependencies = []
|
4
|
-
OptionalDependencies = []
|
5
|
-
|
6
|
-
Extractors = {}
|
7
|
-
Inflectors = {}
|
8
|
-
Lexicalizers = {}
|
9
|
-
Processors = {
|
10
|
-
:chunkers => [:txt],
|
11
|
-
:segmenters => [:punkt],
|
12
|
-
:tokenizers => [:tactful]
|
13
|
-
}
|
14
|
-
Retrievers = {}
|
15
|
-
|
16
|
-
end
|
@@ -1,16 +0,0 @@
|
|
1
|
-
class Treat::Languages::Spanish
|
2
|
-
|
3
|
-
RequiredDependencies = []
|
4
|
-
OptionalDependencies = []
|
5
|
-
|
6
|
-
Extractors = {}
|
7
|
-
Inflectors = {}
|
8
|
-
Lexicalizers = {}
|
9
|
-
Processors = {
|
10
|
-
:chunkers => [:txt],
|
11
|
-
:segmenters => [:tactful],
|
12
|
-
:tokenizers => [:tactful]
|
13
|
-
}
|
14
|
-
Retrievers = {}
|
15
|
-
|
16
|
-
end
|
@@ -1,16 +0,0 @@
|
|
1
|
-
class Treat::Languages::Swedish
|
2
|
-
|
3
|
-
RequiredDependencies = []
|
4
|
-
OptionalDependencies = []
|
5
|
-
|
6
|
-
Extractors = {}
|
7
|
-
Inflectors = {}
|
8
|
-
Lexicalizers = {}
|
9
|
-
Processors = {
|
10
|
-
:chunkers => [:txt],
|
11
|
-
:segmenters => [:punkt],
|
12
|
-
:tokenizers => [:tactful]
|
13
|
-
}
|
14
|
-
Retrievers = {}
|
15
|
-
|
16
|
-
end
|
data/lib/treat/languages.rb
DELETED
@@ -1,132 +0,0 @@
|
|
1
|
-
# This module provides linguistic resources
|
2
|
-
# for the Treat library, including information
|
3
|
-
# about language codes, the functions available
|
4
|
-
# for each language, and the different tags used
|
5
|
-
# to markup that language.
|
6
|
-
module Treat::Languages
|
7
|
-
|
8
|
-
def self.const_missing(const)
|
9
|
-
lang = const.to_s.downcase
|
10
|
-
f = File.join(File.dirname(__FILE__), "languages", lang)
|
11
|
-
unless File.readable?(f + '.rb')
|
12
|
-
raise Treat::Exception,
|
13
|
-
"Language #{lang} is not supported."
|
14
|
-
end
|
15
|
-
require f
|
16
|
-
const_get(const)
|
17
|
-
end
|
18
|
-
|
19
|
-
# Yield a lowercase symbol for each
|
20
|
-
# defined language.
|
21
|
-
def self.each
|
22
|
-
constants.each do |constant|
|
23
|
-
yield constant.to_s.downcase.intern
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
# Identifier constants for language codes.
|
28
|
-
ISO639_1 = 1
|
29
|
-
ISO639_2 = 2
|
30
|
-
|
31
|
-
# Describe a language code (ISO-639-1 or ISO-639-2)
|
32
|
-
# or its full text description in full French or English.
|
33
|
-
def self.describe(lang, desc_lang = :en)
|
34
|
-
raise "Must provide a non-nil language "+
|
35
|
-
"identifier to describe." if lang.nil?
|
36
|
-
lang = code(lang).to_s
|
37
|
-
if [:en, :eng, :english, :anglais].
|
38
|
-
include?(desc_lang)
|
39
|
-
l = @@english_full.key(lang)
|
40
|
-
elsif [:fr, :fra, :french, :french].
|
41
|
-
include?(desc_lang)
|
42
|
-
l = @@french_full.key(lang)
|
43
|
-
else
|
44
|
-
raise Treat::Exception,
|
45
|
-
"Unknown language to describe: #{desc_lang}."
|
46
|
-
end
|
47
|
-
not_found(lang) if l.nil?
|
48
|
-
l.intern
|
49
|
-
end
|
50
|
-
|
51
|
-
# Raise an error message when a language code
|
52
|
-
# or description is not found and suggest
|
53
|
-
# possible misspellings.
|
54
|
-
def self.not_found(lang)
|
55
|
-
msg = "Language '#{lang}' does not exist."
|
56
|
-
all = @@iso639_2.keys + @@iso639_1.keys +
|
57
|
-
@@english_full.keys + @@french_full.keys
|
58
|
-
msg += did_you_mean?(all, lang)
|
59
|
-
raise Treat::Exception, msg
|
60
|
-
end
|
61
|
-
|
62
|
-
# Return the class representing a language.
|
63
|
-
def self.get(lang)
|
64
|
-
lang = Treat::Languages.describe(lang).to_s
|
65
|
-
begin
|
66
|
-
const_get(lang.capitalize)
|
67
|
-
rescue
|
68
|
-
not_found(lang)
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
# Find a language by ISO-639-1 or ISO-639-2 code
|
73
|
-
# or full name (in English or French) and return
|
74
|
-
# the ISO-639-1 or ISO-639-2 language code as a
|
75
|
-
# lowercase identifier.
|
76
|
-
def self.code(lang, rc = ISO639_2)
|
77
|
-
raise "Must provide a non-nil language "+
|
78
|
-
"identifier to describe." if lang.nil?
|
79
|
-
get_languages
|
80
|
-
lang = lang.to_s.downcase
|
81
|
-
if @@iso639_1.has_key?(lang)
|
82
|
-
return lang.intern if rc == ISO639_2
|
83
|
-
return @@iso639_1[lang].intern if rc == ISO639_1
|
84
|
-
elsif @@iso639_2.has_key?(lang)
|
85
|
-
return lang.intern if rc == ISO639_2
|
86
|
-
return @@iso639_2[lang].intern if rc == ISO639_1
|
87
|
-
elsif @@english_full.has_key?(lang)
|
88
|
-
return @@english_full[lang].intern if rc == ISO639_2
|
89
|
-
return @@iso639_2[@@english_full[lang]].intern if rc == ISO639_1
|
90
|
-
elsif @@french_full.has_key?(lang)
|
91
|
-
return @@french_full[lang].intern if rc == ISO639_2
|
92
|
-
return @@iso639_2[@@french_full[lang]].intern if rc == ISO639_1
|
93
|
-
else
|
94
|
-
not_found(lang)
|
95
|
-
end
|
96
|
-
|
97
|
-
end
|
98
|
-
|
99
|
-
# Whether the language list has been loaded or not.
|
100
|
-
@@loaded = false
|
101
|
-
|
102
|
-
# Get the languages from the dictionary.
|
103
|
-
def self.get_languages
|
104
|
-
return if @@loaded
|
105
|
-
@@iso639_1 = {}; @@iso639_2 = {};
|
106
|
-
@@english_full = {}; @@french_full = {}
|
107
|
-
languages = IO.readlines(File.join(
|
108
|
-
File.dirname(__FILE__), "languages", "list.txt"))
|
109
|
-
languages.each do |language|
|
110
|
-
iso639_2, iso639_1, english_desc, french_desc =
|
111
|
-
language.split(',')
|
112
|
-
@@iso639_1[iso639_1] = iso639_2
|
113
|
-
@@iso639_2[iso639_2] = iso639_1
|
114
|
-
unless english_desc.nil?
|
115
|
-
english_desc.strip.downcase.split('|').each do |l|
|
116
|
-
@@english_full[l.downcase.strip] = iso639_2
|
117
|
-
end
|
118
|
-
end
|
119
|
-
unless french_desc.nil?
|
120
|
-
french_desc.strip.downcase.split('|').each do |l|
|
121
|
-
@@french_full[l.downcase.strip] = iso639_2
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
@@loaded = true
|
126
|
-
end
|
127
|
-
|
128
|
-
# Get the language list.
|
129
|
-
get_languages
|
130
|
-
|
131
|
-
|
132
|
-
end
|
data/lib/treat/lexicalizers.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
# Lexicalizers allow to retrieve lexical information
|
2
|
-
# (part of speech tag, general word category, synsets,
|
3
|
-
# synonyms, antonyms, hyponyms, hypernyms, lexical
|
4
|
-
# relations, grammatical links).
|
5
|
-
# of an entity.
|
6
|
-
module Treat::Lexicalizers
|
7
|
-
|
8
|
-
# Taggers return the part of speech tag of a word.
|
9
|
-
module Taggers
|
10
|
-
extend Treat::Groupable
|
11
|
-
self.type = :annotator
|
12
|
-
self.targets = [:sentence, :phrase, :token]
|
13
|
-
end
|
14
|
-
|
15
|
-
# Return the general category of a word.
|
16
|
-
module Categorizers
|
17
|
-
extend Treat::Groupable
|
18
|
-
self.type = :annotator
|
19
|
-
self.targets = [:sentence, :phrase, :token]
|
20
|
-
self.recursive = true
|
21
|
-
self.default = :from_tag
|
22
|
-
end
|
23
|
-
|
24
|
-
# Find the synsets of a word in a lexicon.
|
25
|
-
module Sensers
|
26
|
-
extend Treat::Groupable
|
27
|
-
self.type = :annotator
|
28
|
-
self.targets = [:word]
|
29
|
-
self.preset_option = :nym
|
30
|
-
self.presets = [:synonyms, :antonyms,
|
31
|
-
:hyponyms, :hypernyms]
|
32
|
-
end
|
33
|
-
|
34
|
-
# Make Lexicalizers categorizable.
|
35
|
-
extend Treat::Categorizable
|
36
|
-
|
37
|
-
end
|
data/lib/treat/object.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
class Treat::Processors::Chunkers::Autoselect
|
2
|
-
|
3
|
-
def self.chunk(entity, options = {})
|
4
|
-
entity.check_has(:format)
|
5
|
-
begin
|
6
|
-
k = Treat::Processors::
|
7
|
-
Chunkers.const_get(cc(entity.format))
|
8
|
-
k.chunk(entity, options)
|
9
|
-
rescue Treat::Exception
|
10
|
-
Treat::Processors::
|
11
|
-
Chunkers::TXT.chunk(entity, options)
|
12
|
-
end
|
13
|
-
|
14
|
-
end
|
15
|
-
|
16
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
class Treat::Processors::Chunkers::TXT
|
2
|
-
|
3
|
-
# Separates a string into
|
4
|
-
# zones on the basis of newlines.
|
5
|
-
#
|
6
|
-
# Options: none.
|
7
|
-
def self.chunk(entity, options = {})
|
8
|
-
|
9
|
-
entity.check_hasnt_children
|
10
|
-
zones = entity.to_s.split("\n")
|
11
|
-
|
12
|
-
zones.each do |zone|
|
13
|
-
zone.strip!
|
14
|
-
next if zone == ''
|
15
|
-
entity << Treat::Entities::
|
16
|
-
Zone.from_string(zone)
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
data/lib/treat/processors.rb
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
# Processors build trees representing textual entities.
|
2
|
-
module Treat::Processors
|
3
|
-
|
4
|
-
# Chunkers split a document into sections and zones.
|
5
|
-
module Chunkers
|
6
|
-
extend Treat::Groupable
|
7
|
-
self.type = :transformer
|
8
|
-
self.targets = [:document]
|
9
|
-
self.default = :autoselect
|
10
|
-
end
|
11
|
-
|
12
|
-
# Segmenters split a document or zone into sentences.
|
13
|
-
module Segmenters
|
14
|
-
extend Treat::Groupable
|
15
|
-
self.type = :transformer
|
16
|
-
self.targets = [:zone]
|
17
|
-
end
|
18
|
-
|
19
|
-
# Tokenizers splits a sentence into Token objects.
|
20
|
-
module Tokenizers
|
21
|
-
extend Treat::Groupable
|
22
|
-
self.type = :transformer
|
23
|
-
self.targets = [:phrase]
|
24
|
-
end
|
25
|
-
|
26
|
-
# Parsers split a sentence into phrase objects
|
27
|
-
# representing its syntactic structure, with the
|
28
|
-
# Token objects as children of the phrases.
|
29
|
-
module Parsers
|
30
|
-
extend Treat::Groupable
|
31
|
-
self.type = :transformer
|
32
|
-
self.targets = [:phrase]
|
33
|
-
end
|
34
|
-
|
35
|
-
# Make Processors categorizable.
|
36
|
-
extend Treat::Categorizable
|
37
|
-
|
38
|
-
end
|
data/lib/treat/retrievers.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# Retrievers find documents in collections.
|
2
|
-
module Treat::Retrievers
|
3
|
-
|
4
|
-
# Indexers create an index of words used
|
5
|
-
# in the documents within a collection.
|
6
|
-
module Indexers
|
7
|
-
extend Treat::Groupable
|
8
|
-
self.type = :annotator
|
9
|
-
self.targets = [:collection]
|
10
|
-
self.default = :ferret
|
11
|
-
end
|
12
|
-
|
13
|
-
# Searchers perform full-text search
|
14
|
-
# on indexed collections in order
|
15
|
-
# to retrieve documents matching
|
16
|
-
# a query.
|
17
|
-
module Searchers
|
18
|
-
extend Treat::Groupable
|
19
|
-
self.type = :computer
|
20
|
-
self.targets = [:collection]
|
21
|
-
self.default = :ferret
|
22
|
-
end
|
23
|
-
|
24
|
-
# Make Retrievers categorizable.
|
25
|
-
extend Treat::Categorizable
|
26
|
-
|
27
|
-
end
|
data/lib/treat/server.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
class Treat::Server
|
2
|
-
|
3
|
-
require 'thin'
|
4
|
-
|
5
|
-
def self.start
|
6
|
-
app = proc do |env|
|
7
|
-
#!/usr/bin/env ruby -w
|
8
|
-
# simple_service.rb
|
9
|
-
# A simple DRb service
|
10
|
-
|
11
|
-
# load DRb
|
12
|
-
require 'drb'
|
13
|
-
|
14
|
-
# start up the DRb service
|
15
|
-
DRb.start_service nil, []
|
16
|
-
|
17
|
-
# We need the uri of the service to connect a client
|
18
|
-
puts DRb.uri
|
19
|
-
|
20
|
-
# wait for the DRb service to finish before exiting
|
21
|
-
DRb.thread.join
|
22
|
-
end
|
23
|
-
run app
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|