treat 1.0.6 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (210) hide show
  1. data/LICENSE +2 -4
  2. data/README.md +13 -12
  3. data/bin/MANIFEST +1 -0
  4. data/bin/stanford/bridge.jar +0 -0
  5. data/bin/stanford/joda-time.jar +0 -0
  6. data/bin/stanford/stanford-corenlp.jar +0 -0
  7. data/bin/stanford/stanford-parser.jar +0 -0
  8. data/bin/stanford/xom.jar +0 -0
  9. data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
  10. data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
  11. data/files/{INFO → MANIFEST} +0 -0
  12. data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
  13. data/files/weather-central-canada-heat-wave.html +1370 -0
  14. data/lib/treat/config/core/acronyms.rb +4 -0
  15. data/lib/treat/config/core/encodings.rb +8 -0
  16. data/lib/treat/config/core/entities.rb +2 -0
  17. data/lib/treat/config/core/language.rb +3 -0
  18. data/lib/treat/config/core/paths.rb +8 -0
  19. data/lib/treat/config/core/syntax.rb +1 -0
  20. data/lib/treat/config/core/verbosity.rb +1 -0
  21. data/lib/treat/config/databases/mongo.rb +3 -0
  22. data/lib/treat/config/languages/agnostic.rb +34 -0
  23. data/lib/treat/config/languages/arabic.rb +13 -0
  24. data/lib/treat/config/languages/chinese.rb +13 -0
  25. data/lib/treat/config/languages/dutch.rb +12 -0
  26. data/lib/treat/config/languages/english.rb +60 -0
  27. data/lib/treat/config/languages/french.rb +18 -0
  28. data/lib/treat/config/languages/german.rb +18 -0
  29. data/lib/treat/config/languages/greek.rb +12 -0
  30. data/lib/treat/config/languages/italian.rb +12 -0
  31. data/lib/treat/config/languages/polish.rb +12 -0
  32. data/lib/treat/config/languages/portuguese.rb +12 -0
  33. data/lib/treat/config/languages/russian.rb +12 -0
  34. data/lib/treat/config/languages/spanish.rb +12 -0
  35. data/lib/treat/config/languages/swedish.rb +12 -0
  36. data/lib/treat/config/libraries/stanford.rb +1 -0
  37. data/lib/treat/config/linguistics/categories.rb +4 -0
  38. data/lib/treat/config/linguistics/punctuation.rb +33 -0
  39. data/lib/treat/config/tags/aligned.rb +221 -0
  40. data/lib/treat/config/tags/enju.rb +71 -0
  41. data/lib/treat/config/tags/paris7.rb +17 -0
  42. data/lib/treat/config/tags/ptb.rb +15 -0
  43. data/lib/treat/config/workers/extractors.rb +39 -0
  44. data/lib/treat/config/workers/formatters.rb +20 -0
  45. data/lib/treat/config/workers/inflectors.rb +27 -0
  46. data/lib/treat/config/workers/learners.rb +6 -0
  47. data/lib/treat/config/workers/lexicalizers.rb +18 -0
  48. data/lib/treat/config/workers/list.rb +1 -0
  49. data/lib/treat/config/workers/processors.rb +19 -0
  50. data/lib/treat/config/workers/retrievers.rb +12 -0
  51. data/lib/treat/config.rb +125 -0
  52. data/lib/treat/{classification.rb → core/classification.rb} +1 -1
  53. data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
  54. data/lib/treat/{tree.rb → core/node.rb} +5 -5
  55. data/lib/treat/core/server.rb +3 -0
  56. data/lib/treat/core.rb +5 -0
  57. data/lib/treat/entities/abilities/buildable.rb +61 -56
  58. data/lib/treat/entities/abilities/checkable.rb +2 -2
  59. data/lib/treat/entities/abilities/comparable.rb +21 -0
  60. data/lib/treat/entities/abilities/copyable.rb +2 -0
  61. data/lib/treat/entities/abilities/countable.rb +1 -1
  62. data/lib/treat/entities/abilities/debuggable.rb +1 -1
  63. data/lib/treat/entities/abilities/delegatable.rb +42 -36
  64. data/lib/treat/entities/abilities/doable.rb +2 -2
  65. data/lib/treat/entities/abilities/exportable.rb +1 -1
  66. data/lib/treat/entities/abilities/iterable.rb +21 -33
  67. data/lib/treat/entities/abilities/magical.rb +8 -8
  68. data/lib/treat/entities/abilities/registrable.rb +0 -38
  69. data/lib/treat/entities/abilities/stringable.rb +19 -19
  70. data/lib/treat/entities/collection.rb +31 -0
  71. data/lib/treat/entities/document.rb +10 -0
  72. data/lib/treat/entities/entity.rb +18 -13
  73. data/lib/treat/entities/group.rb +15 -0
  74. data/lib/treat/entities/section.rb +13 -0
  75. data/lib/treat/entities/token.rb +35 -0
  76. data/lib/treat/entities/zone.rb +11 -0
  77. data/lib/treat/entities.rb +5 -75
  78. data/lib/treat/helpers/didyoumean.rb +57 -0
  79. data/lib/treat/helpers/escaping.rb +15 -0
  80. data/lib/treat/helpers/formatting.rb +41 -0
  81. data/lib/treat/helpers/platform.rb +15 -0
  82. data/lib/treat/helpers/reflection.rb +17 -0
  83. data/lib/treat/helpers/temporary.rb +27 -0
  84. data/lib/treat/helpers/verbosity.rb +19 -0
  85. data/lib/treat/helpers.rb +5 -0
  86. data/lib/treat/installer.rb +46 -165
  87. data/lib/treat/loaders/linguistics.rb +22 -27
  88. data/lib/treat/loaders/stanford.rb +23 -41
  89. data/lib/treat/loaders.rb +10 -0
  90. data/lib/treat/proxies.rb +73 -24
  91. data/lib/treat/version.rb +3 -0
  92. data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
  93. data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
  94. data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
  95. data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
  96. data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
  97. data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
  98. data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
  99. data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
  100. data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
  101. data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
  102. data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
  103. data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
  104. data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
  105. data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
  106. data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
  107. data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
  108. data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
  109. data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
  110. data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
  111. data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
  112. data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
  113. data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
  114. data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
  115. data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
  116. data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
  117. data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
  118. data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
  119. data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
  120. data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
  121. data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
  122. data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
  123. data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
  124. data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
  125. data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
  126. data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
  127. data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
  128. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
  129. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
  130. data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
  131. data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
  132. data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
  133. data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
  134. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
  135. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
  136. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
  137. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
  138. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
  139. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
  140. data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
  141. data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
  142. data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
  143. data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
  144. data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
  145. data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
  146. data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
  147. data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
  148. data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
  149. data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
  150. data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
  151. data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
  152. data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
  153. data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
  154. data/lib/treat/workers.rb +96 -0
  155. data/lib/treat.rb +23 -49
  156. data/spec/collection.rb +4 -4
  157. data/spec/document.rb +5 -5
  158. data/spec/entity.rb +33 -32
  159. data/spec/{tree.rb → node.rb} +5 -5
  160. data/spec/phrase.rb +5 -39
  161. data/spec/sandbox.rb +212 -6
  162. data/spec/token.rb +12 -9
  163. data/spec/treat.rb +12 -9
  164. data/spec/word.rb +10 -9
  165. data/spec/zone.rb +6 -2
  166. data/tmp/{INFO → MANIFEST} +0 -0
  167. data/tmp/english.yaml +10340 -0
  168. metadata +149 -139
  169. data/lib/treat/ai.rb +0 -12
  170. data/lib/treat/categories.rb +0 -90
  171. data/lib/treat/categorizable.rb +0 -44
  172. data/lib/treat/configurable.rb +0 -115
  173. data/lib/treat/dependencies.rb +0 -25
  174. data/lib/treat/downloader.rb +0 -87
  175. data/lib/treat/entities/abilities.rb +0 -10
  176. data/lib/treat/entities/entities.rb +0 -102
  177. data/lib/treat/exception.rb +0 -7
  178. data/lib/treat/extractors.rb +0 -79
  179. data/lib/treat/formatters/serializers/mongo.rb +0 -64
  180. data/lib/treat/formatters.rb +0 -41
  181. data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
  182. data/lib/treat/inflectors.rb +0 -52
  183. data/lib/treat/kernel.rb +0 -208
  184. data/lib/treat/languages/arabic.rb +0 -16
  185. data/lib/treat/languages/chinese.rb +0 -16
  186. data/lib/treat/languages/dutch.rb +0 -16
  187. data/lib/treat/languages/english.rb +0 -63
  188. data/lib/treat/languages/french.rb +0 -20
  189. data/lib/treat/languages/german.rb +0 -20
  190. data/lib/treat/languages/greek.rb +0 -16
  191. data/lib/treat/languages/italian.rb +0 -17
  192. data/lib/treat/languages/language.rb +0 -10
  193. data/lib/treat/languages/list.txt +0 -504
  194. data/lib/treat/languages/polish.rb +0 -16
  195. data/lib/treat/languages/portuguese.rb +0 -16
  196. data/lib/treat/languages/russian.rb +0 -16
  197. data/lib/treat/languages/spanish.rb +0 -16
  198. data/lib/treat/languages/swedish.rb +0 -16
  199. data/lib/treat/languages.rb +0 -132
  200. data/lib/treat/lexicalizers.rb +0 -37
  201. data/lib/treat/object.rb +0 -7
  202. data/lib/treat/processors/chunkers/autoselect.rb +0 -16
  203. data/lib/treat/processors/chunkers/txt.rb +0 -21
  204. data/lib/treat/processors.rb +0 -38
  205. data/lib/treat/retrievers.rb +0 -27
  206. data/lib/treat/server.rb +0 -26
  207. data/lib/treat/universalisation/encodings.rb +0 -12
  208. data/lib/treat/universalisation/tags.rb +0 -453
  209. data/lib/treat/universalisation.rb +0 -9
  210. data/spec/languages.rb +0 -25
@@ -1,16 +0,0 @@
1
- class Treat::Languages::Russian
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {}
9
- Processors = {
10
- :chunkers => [:txt],
11
- :segmenters => [:punkt],
12
- :tokenizers => [:tactful]
13
- }
14
- Retrievers = {}
15
-
16
- end
@@ -1,16 +0,0 @@
1
- class Treat::Languages::Spanish
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {}
9
- Processors = {
10
- :chunkers => [:txt],
11
- :segmenters => [:tactful],
12
- :tokenizers => [:tactful]
13
- }
14
- Retrievers = {}
15
-
16
- end
@@ -1,16 +0,0 @@
1
- class Treat::Languages::Swedish
2
-
3
- RequiredDependencies = []
4
- OptionalDependencies = []
5
-
6
- Extractors = {}
7
- Inflectors = {}
8
- Lexicalizers = {}
9
- Processors = {
10
- :chunkers => [:txt],
11
- :segmenters => [:punkt],
12
- :tokenizers => [:tactful]
13
- }
14
- Retrievers = {}
15
-
16
- end
@@ -1,132 +0,0 @@
1
- # This module provides linguistic resources
2
- # for the Treat library, including information
3
- # about language codes, the functions available
4
- # for each language, and the different tags used
5
- # to markup that language.
6
- module Treat::Languages
7
-
8
- def self.const_missing(const)
9
- lang = const.to_s.downcase
10
- f = File.join(File.dirname(__FILE__), "languages", lang)
11
- unless File.readable?(f + '.rb')
12
- raise Treat::Exception,
13
- "Language #{lang} is not supported."
14
- end
15
- require f
16
- const_get(const)
17
- end
18
-
19
- # Yield a lowercase symbol for each
20
- # defined language.
21
- def self.each
22
- constants.each do |constant|
23
- yield constant.to_s.downcase.intern
24
- end
25
- end
26
-
27
- # Identifier constants for language codes.
28
- ISO639_1 = 1
29
- ISO639_2 = 2
30
-
31
- # Describe a language code (ISO-639-1 or ISO-639-2)
32
- # or its full text description in full French or English.
33
- def self.describe(lang, desc_lang = :en)
34
- raise "Must provide a non-nil language "+
35
- "identifier to describe." if lang.nil?
36
- lang = code(lang).to_s
37
- if [:en, :eng, :english, :anglais].
38
- include?(desc_lang)
39
- l = @@english_full.key(lang)
40
- elsif [:fr, :fra, :french, :french].
41
- include?(desc_lang)
42
- l = @@french_full.key(lang)
43
- else
44
- raise Treat::Exception,
45
- "Unknown language to describe: #{desc_lang}."
46
- end
47
- not_found(lang) if l.nil?
48
- l.intern
49
- end
50
-
51
- # Raise an error message when a language code
52
- # or description is not found and suggest
53
- # possible misspellings.
54
- def self.not_found(lang)
55
- msg = "Language '#{lang}' does not exist."
56
- all = @@iso639_2.keys + @@iso639_1.keys +
57
- @@english_full.keys + @@french_full.keys
58
- msg += did_you_mean?(all, lang)
59
- raise Treat::Exception, msg
60
- end
61
-
62
- # Return the class representing a language.
63
- def self.get(lang)
64
- lang = Treat::Languages.describe(lang).to_s
65
- begin
66
- const_get(lang.capitalize)
67
- rescue
68
- not_found(lang)
69
- end
70
- end
71
-
72
- # Find a language by ISO-639-1 or ISO-639-2 code
73
- # or full name (in English or French) and return
74
- # the ISO-639-1 or ISO-639-2 language code as a
75
- # lowercase identifier.
76
- def self.code(lang, rc = ISO639_2)
77
- raise "Must provide a non-nil language "+
78
- "identifier to describe." if lang.nil?
79
- get_languages
80
- lang = lang.to_s.downcase
81
- if @@iso639_1.has_key?(lang)
82
- return lang.intern if rc == ISO639_2
83
- return @@iso639_1[lang].intern if rc == ISO639_1
84
- elsif @@iso639_2.has_key?(lang)
85
- return lang.intern if rc == ISO639_2
86
- return @@iso639_2[lang].intern if rc == ISO639_1
87
- elsif @@english_full.has_key?(lang)
88
- return @@english_full[lang].intern if rc == ISO639_2
89
- return @@iso639_2[@@english_full[lang]].intern if rc == ISO639_1
90
- elsif @@french_full.has_key?(lang)
91
- return @@french_full[lang].intern if rc == ISO639_2
92
- return @@iso639_2[@@french_full[lang]].intern if rc == ISO639_1
93
- else
94
- not_found(lang)
95
- end
96
-
97
- end
98
-
99
- # Whether the language list has been loaded or not.
100
- @@loaded = false
101
-
102
- # Get the languages from the dictionary.
103
- def self.get_languages
104
- return if @@loaded
105
- @@iso639_1 = {}; @@iso639_2 = {};
106
- @@english_full = {}; @@french_full = {}
107
- languages = IO.readlines(File.join(
108
- File.dirname(__FILE__), "languages", "list.txt"))
109
- languages.each do |language|
110
- iso639_2, iso639_1, english_desc, french_desc =
111
- language.split(',')
112
- @@iso639_1[iso639_1] = iso639_2
113
- @@iso639_2[iso639_2] = iso639_1
114
- unless english_desc.nil?
115
- english_desc.strip.downcase.split('|').each do |l|
116
- @@english_full[l.downcase.strip] = iso639_2
117
- end
118
- end
119
- unless french_desc.nil?
120
- french_desc.strip.downcase.split('|').each do |l|
121
- @@french_full[l.downcase.strip] = iso639_2
122
- end
123
- end
124
- end
125
- @@loaded = true
126
- end
127
-
128
- # Get the language list.
129
- get_languages
130
-
131
-
132
- end
@@ -1,37 +0,0 @@
1
- # Lexicalizers allow to retrieve lexical information
2
- # (part of speech tag, general word category, synsets,
3
- # synonyms, antonyms, hyponyms, hypernyms, lexical
4
- # relations, grammatical links).
5
- # of an entity.
6
- module Treat::Lexicalizers
7
-
8
- # Taggers return the part of speech tag of a word.
9
- module Taggers
10
- extend Treat::Groupable
11
- self.type = :annotator
12
- self.targets = [:sentence, :phrase, :token]
13
- end
14
-
15
- # Return the general category of a word.
16
- module Categorizers
17
- extend Treat::Groupable
18
- self.type = :annotator
19
- self.targets = [:sentence, :phrase, :token]
20
- self.recursive = true
21
- self.default = :from_tag
22
- end
23
-
24
- # Find the synsets of a word in a lexicon.
25
- module Sensers
26
- extend Treat::Groupable
27
- self.type = :annotator
28
- self.targets = [:word]
29
- self.preset_option = :nym
30
- self.presets = [:synonyms, :antonyms,
31
- :hyponyms, :hypernyms]
32
- end
33
-
34
- # Make Lexicalizers categorizable.
35
- extend Treat::Categorizable
36
-
37
- end
data/lib/treat/object.rb DELETED
@@ -1,7 +0,0 @@
1
- # Make undefining constants publicly available on any object.
2
- Object.module_eval do
3
- # Unset a constant without private access.
4
- def self.const_unset(const)
5
- Object.instance_eval { remove_const(const) }
6
- end
7
- end
@@ -1,16 +0,0 @@
1
- class Treat::Processors::Chunkers::Autoselect
2
-
3
- def self.chunk(entity, options = {})
4
- entity.check_has(:format)
5
- begin
6
- k = Treat::Processors::
7
- Chunkers.const_get(cc(entity.format))
8
- k.chunk(entity, options)
9
- rescue Treat::Exception
10
- Treat::Processors::
11
- Chunkers::TXT.chunk(entity, options)
12
- end
13
-
14
- end
15
-
16
- end
@@ -1,21 +0,0 @@
1
- class Treat::Processors::Chunkers::TXT
2
-
3
- # Separates a string into
4
- # zones on the basis of newlines.
5
- #
6
- # Options: none.
7
- def self.chunk(entity, options = {})
8
-
9
- entity.check_hasnt_children
10
- zones = entity.to_s.split("\n")
11
-
12
- zones.each do |zone|
13
- zone.strip!
14
- next if zone == ''
15
- entity << Treat::Entities::
16
- Zone.from_string(zone)
17
- end
18
-
19
- end
20
-
21
- end
@@ -1,38 +0,0 @@
1
- # Processors build trees representing textual entities.
2
- module Treat::Processors
3
-
4
- # Chunkers split a document into sections and zones.
5
- module Chunkers
6
- extend Treat::Groupable
7
- self.type = :transformer
8
- self.targets = [:document]
9
- self.default = :autoselect
10
- end
11
-
12
- # Segmenters split a document or zone into sentences.
13
- module Segmenters
14
- extend Treat::Groupable
15
- self.type = :transformer
16
- self.targets = [:zone]
17
- end
18
-
19
- # Tokenizers splits a sentence into Token objects.
20
- module Tokenizers
21
- extend Treat::Groupable
22
- self.type = :transformer
23
- self.targets = [:phrase]
24
- end
25
-
26
- # Parsers split a sentence into phrase objects
27
- # representing its syntactic structure, with the
28
- # Token objects as children of the phrases.
29
- module Parsers
30
- extend Treat::Groupable
31
- self.type = :transformer
32
- self.targets = [:phrase]
33
- end
34
-
35
- # Make Processors categorizable.
36
- extend Treat::Categorizable
37
-
38
- end
@@ -1,27 +0,0 @@
1
- # Retrievers find documents in collections.
2
- module Treat::Retrievers
3
-
4
- # Indexers create an index of words used
5
- # in the documents within a collection.
6
- module Indexers
7
- extend Treat::Groupable
8
- self.type = :annotator
9
- self.targets = [:collection]
10
- self.default = :ferret
11
- end
12
-
13
- # Searchers perform full-text search
14
- # on indexed collections in order
15
- # to retrieve documents matching
16
- # a query.
17
- module Searchers
18
- extend Treat::Groupable
19
- self.type = :computer
20
- self.targets = [:collection]
21
- self.default = :ferret
22
- end
23
-
24
- # Make Retrievers categorizable.
25
- extend Treat::Categorizable
26
-
27
- end
data/lib/treat/server.rb DELETED
@@ -1,26 +0,0 @@
1
- class Treat::Server
2
-
3
- require 'thin'
4
-
5
- def self.start
6
- app = proc do |env|
7
- #!/usr/bin/env ruby -w
8
- # simple_service.rb
9
- # A simple DRb service
10
-
11
- # load DRb
12
- require 'drb'
13
-
14
- # start up the DRb service
15
- DRb.start_service nil, []
16
-
17
- # We need the uri of the service to connect a client
18
- puts DRb.uri
19
-
20
- # wait for the DRb service to finish before exiting
21
- DRb.thread.join
22
- end
23
- run app
24
- end
25
-
26
- end
@@ -1,12 +0,0 @@
1
- module Treat::Universalisation
2
-
3
- Encodings = {
4
- :arabic => 'UTF-8',
5
- :chinese => 'GB18030',
6
- :english => 'UTF-8',
7
- :french => 'ISO_8859-1',
8
- :german => 'ISO_8859-1',
9
- :hebrew => 'UTF-8'
10
- }
11
-
12
- end