treat 1.0.6 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (210) hide show
  1. data/LICENSE +2 -4
  2. data/README.md +13 -12
  3. data/bin/MANIFEST +1 -0
  4. data/bin/stanford/bridge.jar +0 -0
  5. data/bin/stanford/joda-time.jar +0 -0
  6. data/bin/stanford/stanford-corenlp.jar +0 -0
  7. data/bin/stanford/stanford-parser.jar +0 -0
  8. data/bin/stanford/xom.jar +0 -0
  9. data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
  10. data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
  11. data/files/{INFO → MANIFEST} +0 -0
  12. data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
  13. data/files/weather-central-canada-heat-wave.html +1370 -0
  14. data/lib/treat/config/core/acronyms.rb +4 -0
  15. data/lib/treat/config/core/encodings.rb +8 -0
  16. data/lib/treat/config/core/entities.rb +2 -0
  17. data/lib/treat/config/core/language.rb +3 -0
  18. data/lib/treat/config/core/paths.rb +8 -0
  19. data/lib/treat/config/core/syntax.rb +1 -0
  20. data/lib/treat/config/core/verbosity.rb +1 -0
  21. data/lib/treat/config/databases/mongo.rb +3 -0
  22. data/lib/treat/config/languages/agnostic.rb +34 -0
  23. data/lib/treat/config/languages/arabic.rb +13 -0
  24. data/lib/treat/config/languages/chinese.rb +13 -0
  25. data/lib/treat/config/languages/dutch.rb +12 -0
  26. data/lib/treat/config/languages/english.rb +60 -0
  27. data/lib/treat/config/languages/french.rb +18 -0
  28. data/lib/treat/config/languages/german.rb +18 -0
  29. data/lib/treat/config/languages/greek.rb +12 -0
  30. data/lib/treat/config/languages/italian.rb +12 -0
  31. data/lib/treat/config/languages/polish.rb +12 -0
  32. data/lib/treat/config/languages/portuguese.rb +12 -0
  33. data/lib/treat/config/languages/russian.rb +12 -0
  34. data/lib/treat/config/languages/spanish.rb +12 -0
  35. data/lib/treat/config/languages/swedish.rb +12 -0
  36. data/lib/treat/config/libraries/stanford.rb +1 -0
  37. data/lib/treat/config/linguistics/categories.rb +4 -0
  38. data/lib/treat/config/linguistics/punctuation.rb +33 -0
  39. data/lib/treat/config/tags/aligned.rb +221 -0
  40. data/lib/treat/config/tags/enju.rb +71 -0
  41. data/lib/treat/config/tags/paris7.rb +17 -0
  42. data/lib/treat/config/tags/ptb.rb +15 -0
  43. data/lib/treat/config/workers/extractors.rb +39 -0
  44. data/lib/treat/config/workers/formatters.rb +20 -0
  45. data/lib/treat/config/workers/inflectors.rb +27 -0
  46. data/lib/treat/config/workers/learners.rb +6 -0
  47. data/lib/treat/config/workers/lexicalizers.rb +18 -0
  48. data/lib/treat/config/workers/list.rb +1 -0
  49. data/lib/treat/config/workers/processors.rb +19 -0
  50. data/lib/treat/config/workers/retrievers.rb +12 -0
  51. data/lib/treat/config.rb +125 -0
  52. data/lib/treat/{classification.rb → core/classification.rb} +1 -1
  53. data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
  54. data/lib/treat/{tree.rb → core/node.rb} +5 -5
  55. data/lib/treat/core/server.rb +3 -0
  56. data/lib/treat/core.rb +5 -0
  57. data/lib/treat/entities/abilities/buildable.rb +61 -56
  58. data/lib/treat/entities/abilities/checkable.rb +2 -2
  59. data/lib/treat/entities/abilities/comparable.rb +21 -0
  60. data/lib/treat/entities/abilities/copyable.rb +2 -0
  61. data/lib/treat/entities/abilities/countable.rb +1 -1
  62. data/lib/treat/entities/abilities/debuggable.rb +1 -1
  63. data/lib/treat/entities/abilities/delegatable.rb +42 -36
  64. data/lib/treat/entities/abilities/doable.rb +2 -2
  65. data/lib/treat/entities/abilities/exportable.rb +1 -1
  66. data/lib/treat/entities/abilities/iterable.rb +21 -33
  67. data/lib/treat/entities/abilities/magical.rb +8 -8
  68. data/lib/treat/entities/abilities/registrable.rb +0 -38
  69. data/lib/treat/entities/abilities/stringable.rb +19 -19
  70. data/lib/treat/entities/collection.rb +31 -0
  71. data/lib/treat/entities/document.rb +10 -0
  72. data/lib/treat/entities/entity.rb +18 -13
  73. data/lib/treat/entities/group.rb +15 -0
  74. data/lib/treat/entities/section.rb +13 -0
  75. data/lib/treat/entities/token.rb +35 -0
  76. data/lib/treat/entities/zone.rb +11 -0
  77. data/lib/treat/entities.rb +5 -75
  78. data/lib/treat/helpers/didyoumean.rb +57 -0
  79. data/lib/treat/helpers/escaping.rb +15 -0
  80. data/lib/treat/helpers/formatting.rb +41 -0
  81. data/lib/treat/helpers/platform.rb +15 -0
  82. data/lib/treat/helpers/reflection.rb +17 -0
  83. data/lib/treat/helpers/temporary.rb +27 -0
  84. data/lib/treat/helpers/verbosity.rb +19 -0
  85. data/lib/treat/helpers.rb +5 -0
  86. data/lib/treat/installer.rb +46 -165
  87. data/lib/treat/loaders/linguistics.rb +22 -27
  88. data/lib/treat/loaders/stanford.rb +23 -41
  89. data/lib/treat/loaders.rb +10 -0
  90. data/lib/treat/proxies.rb +73 -24
  91. data/lib/treat/version.rb +3 -0
  92. data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
  93. data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
  94. data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
  95. data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
  96. data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
  97. data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
  98. data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
  99. data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
  100. data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
  101. data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
  102. data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
  103. data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
  104. data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
  105. data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
  106. data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
  107. data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
  108. data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
  109. data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
  110. data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
  111. data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
  112. data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
  113. data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
  114. data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
  115. data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
  116. data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
  117. data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
  118. data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
  119. data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
  120. data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
  121. data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
  122. data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
  123. data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
  124. data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
  125. data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
  126. data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
  127. data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
  128. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
  129. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
  130. data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
  131. data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
  132. data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
  133. data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
  134. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
  135. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
  136. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
  137. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
  138. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
  139. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
  140. data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
  141. data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
  142. data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
  143. data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
  144. data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
  145. data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
  146. data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
  147. data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
  148. data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
  149. data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
  150. data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
  151. data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
  152. data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
  153. data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
  154. data/lib/treat/workers.rb +96 -0
  155. data/lib/treat.rb +23 -49
  156. data/spec/collection.rb +4 -4
  157. data/spec/document.rb +5 -5
  158. data/spec/entity.rb +33 -32
  159. data/spec/{tree.rb → node.rb} +5 -5
  160. data/spec/phrase.rb +5 -39
  161. data/spec/sandbox.rb +212 -6
  162. data/spec/token.rb +12 -9
  163. data/spec/treat.rb +12 -9
  164. data/spec/word.rb +10 -9
  165. data/spec/zone.rb +6 -2
  166. data/tmp/{INFO → MANIFEST} +0 -0
  167. data/tmp/english.yaml +10340 -0
  168. metadata +149 -139
  169. data/lib/treat/ai.rb +0 -12
  170. data/lib/treat/categories.rb +0 -90
  171. data/lib/treat/categorizable.rb +0 -44
  172. data/lib/treat/configurable.rb +0 -115
  173. data/lib/treat/dependencies.rb +0 -25
  174. data/lib/treat/downloader.rb +0 -87
  175. data/lib/treat/entities/abilities.rb +0 -10
  176. data/lib/treat/entities/entities.rb +0 -102
  177. data/lib/treat/exception.rb +0 -7
  178. data/lib/treat/extractors.rb +0 -79
  179. data/lib/treat/formatters/serializers/mongo.rb +0 -64
  180. data/lib/treat/formatters.rb +0 -41
  181. data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
  182. data/lib/treat/inflectors.rb +0 -52
  183. data/lib/treat/kernel.rb +0 -208
  184. data/lib/treat/languages/arabic.rb +0 -16
  185. data/lib/treat/languages/chinese.rb +0 -16
  186. data/lib/treat/languages/dutch.rb +0 -16
  187. data/lib/treat/languages/english.rb +0 -63
  188. data/lib/treat/languages/french.rb +0 -20
  189. data/lib/treat/languages/german.rb +0 -20
  190. data/lib/treat/languages/greek.rb +0 -16
  191. data/lib/treat/languages/italian.rb +0 -17
  192. data/lib/treat/languages/language.rb +0 -10
  193. data/lib/treat/languages/list.txt +0 -504
  194. data/lib/treat/languages/polish.rb +0 -16
  195. data/lib/treat/languages/portuguese.rb +0 -16
  196. data/lib/treat/languages/russian.rb +0 -16
  197. data/lib/treat/languages/spanish.rb +0 -16
  198. data/lib/treat/languages/swedish.rb +0 -16
  199. data/lib/treat/languages.rb +0 -132
  200. data/lib/treat/lexicalizers.rb +0 -37
  201. data/lib/treat/object.rb +0 -7
  202. data/lib/treat/processors/chunkers/autoselect.rb +0 -16
  203. data/lib/treat/processors/chunkers/txt.rb +0 -21
  204. data/lib/treat/processors.rb +0 -38
  205. data/lib/treat/retrievers.rb +0 -27
  206. data/lib/treat/server.rb +0 -26
  207. data/lib/treat/universalisation/encodings.rb +0 -12
  208. data/lib/treat/universalisation/tags.rb +0 -453
  209. data/lib/treat/universalisation.rb +0 -9
  210. data/spec/languages.rb +0 -25
@@ -0,0 +1,57 @@
1
+ # Search the list to see if there are
2
+ # words similar to #name in the #list
3
+ # If yes, return a string saying
4
+ # "Did you mean ... ?" with the names.
5
+ def did_you_mean?(list, name)
6
+ return '' # Fix
7
+ list = list.map { |e| e.to_s }
8
+ name = name.to_s
9
+ sugg = []
10
+ list.each do |element|
11
+ l = levenshtein(element,name)
12
+ if l > 0 && l < 2
13
+ sugg << element
14
+ end
15
+ end
16
+ unless sugg.size == 0
17
+ if sugg.size == 1
18
+ msg += " Perhaps you meant '#{sugg[0]}' ?"
19
+ else
20
+ sugg_quote = sugg[0..-2].map do
21
+ |x| '\'' + x + '\''
22
+ end
23
+ msg += " Perhaps you meant " +
24
+ "#{sugg_quote.join(', ')}," +
25
+ " or '#{sugg[-1]}' ?"
26
+ end
27
+ end
28
+ msg
29
+ end
30
+
31
+ alias :dym? :did_you_mean?
32
+
33
+ # Return the levensthein distance between
34
+ # two strings taking into account the costs
35
+ # of insertion, deletion, and substitution.
36
+ # Used by did_you_mean? to detect typos.
37
+ def levenshtein(first, other, ins=1, del=1, sub=1)
38
+ return nil if first.nil? || other.nil?
39
+ dm = []
40
+ dm[0] = (0..first.length).collect { |i| i * ins}
41
+ fill = [0] * (first.length - 1).abs
42
+ for i in 1..other.length
43
+ dm[i] = [i * del, fill.flatten]
44
+ end
45
+ for i in 1..other.length
46
+ for j in 1..first.length
47
+ dm[i][j] = [
48
+ dm[i-1][j-1] +
49
+ (first[i-1] ==
50
+ other[i-1] ? 0 : sub),
51
+ dm[i][j-1] + ins,
52
+ dm[i-1][j] + del
53
+ ].min
54
+ end
55
+ end
56
+ dm[other.length][first.length]
57
+ end
@@ -0,0 +1,15 @@
1
+ # This is ugly, we should change it.
2
+ EscapeChar = '^^'
3
+ EscapedEscapeChar = '\^\^'
4
+
5
+ def escape_floats!(s)
6
+ s.gsub!(/([0-9]+)\.([0-9]+)/) do
7
+ $1 + EscapeChar + $2
8
+ end
9
+ end
10
+
11
+ def unescape_floats!(s)
12
+ s.gsub!(/([0-9]+)#{EscapedEscapeChar}([0-9]+)/) do
13
+ $1 + '.' + $2
14
+ end
15
+ end
@@ -0,0 +1,41 @@
1
+ # A cache to optimize camel casing.
2
+ @@cc_cache = {}
3
+
4
+ # A cache to optimize un camel casing.
5
+ @@ucc_cache = {}
6
+
7
+ # Convert un_camel_case to CamelCase.
8
+ def camel_case(o_phrase)
9
+ phrase = o_phrase.to_s.dup
10
+ return @@cc_cache[o_phrase] if @@cc_cache[o_phrase]
11
+
12
+ if Treat.core.acronyms.include?(phrase)
13
+ phrase = phrase.upcase
14
+ else
15
+ phrase.gsub!(/^[a-z]|_[a-z]/) { |a| a.upcase }
16
+ phrase.gsub!('_', '')
17
+ end
18
+ @@cc_cache[o_phrase] = phrase
19
+ end
20
+
21
+ alias :cc :camel_case
22
+
23
+ # Convert CamelCase to un_camel_case.
24
+ def un_camel_case(o_phrase)
25
+ phrase = o_phrase.to_s.dup
26
+ return @@ucc_cache[o_phrase] if @@ucc_cache[o_phrase]
27
+ if Treat.core.acronyms.include?(phrase.downcase)
28
+ phrase = phrase.downcase
29
+ else
30
+ phrase.gsub!(/[A-Z]/) { |p| '_' + p.downcase }
31
+ phrase = phrase[1..-1] if phrase[0] == '_'
32
+ end
33
+ @@ucc_cache[o_phrase] = phrase
34
+ end
35
+
36
+ alias :ucc :un_camel_case
37
+
38
+ # Retrieve the Class from a Module::Class.
39
+ def class_name(n); n.to_s.split('::')[-1]; end
40
+
41
+ alias :cl :class_name
@@ -0,0 +1,15 @@
1
+ # Detect the platform we're running on.
2
+ def detect_platform
3
+ p = RUBY_PLATFORM.downcase
4
+ return :mac if p.include?("darwin")
5
+ return :windows if p.include?("mswin")
6
+ return :linux if p.include?("linux")
7
+ return :unknown
8
+ end
9
+
10
+ # Set up the right NULL device.
11
+ if detect_platform == :windows
12
+ NULL_DEVICE = 'NUL'
13
+ else
14
+ NULL_DEVICE = '/dev/null'
15
+ end
@@ -0,0 +1,17 @@
1
+ # Return the name of the method that
2
+ # called the method that calls this method.
3
+ def caller_method(n = 3)
4
+ at = caller(n).first
5
+ /^(.+?):(\d+)(?::in `(.*)')?/ =~ at
6
+ Regexp.last_match[3].
7
+ gsub('block in ', '').intern
8
+ end
9
+
10
+ Object.module_eval do
11
+ # Unset a constant publicly.
12
+ def self.const_unset(const)
13
+ Object.instance_eval do
14
+ remove_const(const)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,27 @@
1
+ # Require file utilities for creating and
2
+ # deleting temporary files.
3
+ require 'fileutils'
4
+
5
+ # Create a temporary file which is deleted
6
+ # after execution of the block.
7
+ def create_temp_file(ext, value = nil, &block)
8
+ fname = Treat.paths.tmp +
9
+ "#{Random.rand(10000000).to_s}.#{ext}"
10
+ File.open(fname, 'w') do |f|
11
+ f.write(value) if value
12
+ block.call(f.path)
13
+ end
14
+ ensure
15
+ File.delete(fname)
16
+ end
17
+
18
+ # Create a temporary directory, which is
19
+ # deleted after execution of the block.
20
+ def create_temp_dir(&block)
21
+ dname = Treat.paths.tmp +
22
+ "#{Random.rand(10000000).to_s}"
23
+ Dir.mkdir(dname)
24
+ block.call(dname)
25
+ ensure
26
+ FileUtils.rm_rf(dname)
27
+ end
@@ -0,0 +1,19 @@
1
+ # Runs a block of code without warnings.
2
+ def silence_warnings(&block)
3
+ warn_level = $VERBOSE
4
+ $VERBOSE = nil
5
+ result = block.call
6
+ $VERBOSE = warn_level
7
+ result
8
+ end
9
+
10
+ # Runs a block of code while blocking stdout.
11
+ def silence_stdout(log = '/dev/null')
12
+ unless Treat.core.verbosity.silence
13
+ yield; return
14
+ end
15
+ old = $stdout.dup
16
+ $stdout.reopen(File.new(log, 'w'))
17
+ yield
18
+ $stdout = old
19
+ end
@@ -0,0 +1,5 @@
1
+ # Contains utility functions used by Treat.
2
+ module Treat::Helpers
3
+ p = Treat.paths.lib + 'treat/helpers/*.rb'
4
+ Dir.glob(p).each { |f| require f }
5
+ end
@@ -1,5 +1,4 @@
1
- # Installer is a dependency manager for languages.
2
- #
1
+ # A dependency manager for Treat language plugins.
3
2
  # It can be called by using Treat.install(language).
4
3
  module Treat::Installer
5
4
 
@@ -7,15 +6,9 @@ module Treat::Installer
7
6
  silence_warnings do
8
7
  require 'rubygems/dependency_installer'
9
8
  end
10
- require 'treat/downloader'
11
- require 'treat/dependencies'
12
-
13
- # Package managers for each platforms.
14
- PackageManagers = {
15
- :mac => 'port',
16
- :linux => 'apt-get',
17
- :windows => 'win-get'
18
- }
9
+
10
+ require 'treat/version'
11
+ require 'schiphol'
19
12
 
20
13
  # Address of the server with the files.
21
14
  Server = 'www.louismullie.com'
@@ -29,66 +22,40 @@ module Treat::Installer
29
22
 
30
23
  # Absolute paths required for cp and mkdir.
31
24
  Paths = {
32
- :tmp => File.absolute_path(Treat.tmp),
33
- :bin => File.absolute_path(Treat.bin),
34
- :models => File.absolute_path(Treat.models)
25
+ :tmp => File.absolute_path(Treat.paths.tmp),
26
+ :bin => File.absolute_path(Treat.paths.bin),
27
+ :models => File.absolute_path(Treat.paths.models)
35
28
  }
36
29
 
37
30
  # Install required dependencies and optional
38
31
  # dependencies for a specific language.
39
- def self.install(language = :english)
32
+ def self.install(language = 'english')
40
33
 
41
34
  @@installer = Gem::DependencyInstaller.new
42
35
 
43
- if language == :travis
36
+ if language == 'travis'
44
37
  install_travis; return
45
38
  end
46
39
 
47
- lang_class = Treat::Languages.get(language.to_s)
48
40
  l = "#{language.to_s.capitalize} language"
49
41
 
50
- puts
51
- puts "Treat Installer, v. #{Treat::VERSION.to_s}\n"
52
- puts
42
+ puts "\nTreat Installer, v. #{Treat::VERSION.to_s}\n\n"
53
43
 
54
44
  begin
55
45
 
56
- title "Install language-independent gem dependencies."
57
-
58
- case prompt(
59
- "1 - Install all default language-independent dependencies\n" +
60
- "2 - Select dependencies to install manually\n" +
61
- "3 - Skip this step", ['1', '2', '3'])
62
- when '1' then install_dependencies(false)
63
- when '2' then install_dependencies(true)
64
- when '3' then puts 'Skipping this step.'
65
- end
66
-
67
- title "Install gem dependencies for the #{l}.\n"
68
-
69
- dflt = lang_class::RequiredDependencies
70
- all = dflt + lang_class::OptionalDependencies
71
- case prompt("1 - Install default dependencies.\n" +
72
- "2 - Select dependencies to install manually.\n" +
73
- "3 - Skip this step.", ['1', '2', '3'])
74
- when '1' then install_language_dependencies(dflt, false)
75
- when '2' then install_language_dependencies(all, true)
76
- when '3' then puts 'Skipping this step.'
77
- end
46
+ title "Installing core dependencies."
47
+ install_language_dependencies('agnostic')
78
48
 
79
- Treat::Downloader.show_progress = true
49
+ title "Installing dependencies for the #{l}.\n"
50
+ install_language_dependencies(language)
80
51
 
81
52
  # If gem is installed only, download models.
82
53
  begin
83
54
  Gem::Specification.find_by_name('punkt-segmenter')
84
- title "Downloading model for the Punkt segmenter for the #{l}."
85
- # Need fix
86
- download_punkt_models([language.to_s])
55
+ title "Downloading models for the Punkt segmenter for the #{l}."
56
+ download_punkt_models(language)
87
57
  rescue Gem::LoadError; end
88
-
89
- # Download reuters models always
90
- download_reuters_models
91
-
58
+
92
59
  # If stanford is installed, download models.
93
60
  begin
94
61
  Gem::Specification.find_by_name('stanford-core-nlp')
@@ -96,23 +63,7 @@ module Treat::Installer
96
63
  "model files for the the #{l}.\n\n"
97
64
  package = (language == :english) ? :english : :all
98
65
  download_stanford(package)
99
- rescue Gem::LoadError
100
- puts 'Stanford-core-nlp gem not installed.'
101
- puts 'Skipping download of Stanford models.'
102
- end
103
-
104
- title "Install external binary libraries " +
105
- "(requires port, apt-get or win-get).\n"
106
- puts "Warning: this may take a long amount of time."
107
-
108
- case prompt("1 - Select binaries to install manually.\n" +
109
- "2 - Skip this step.", ['1', '2'])
110
- when '1' then install_binaries
111
- when '2' then puts 'Skipping this step.'
112
- end
113
-
114
- puts
115
- puts "-----\nDone!"
66
+ rescue Gem::LoadError; end
116
67
 
117
68
  rescue Errno::EACCES => e
118
69
 
@@ -124,77 +75,32 @@ module Treat::Installer
124
75
 
125
76
  end
126
77
 
127
- # Automated install for Travis CI.
78
+ # Minimal install for Travis CI.
128
79
  def self.install_travis
129
- dep = (Treat::Languages::English::RequiredDependencies +
130
- Treat::Languages::English::OptionalDependencies)
131
- install_dependencies(false)
132
- install_language_dependencies(dep, false)
80
+ install_language_dependencies(:agnostic)
81
+ install_language_dependencies(:english)
133
82
  download_stanford(:minimal)
134
- download_punkt_models([:english])
83
+ download_punkt_models(:english)
135
84
  end
136
85
 
137
- def self.install_dependencies(optionally)
138
-
139
- Treat::Dependencies::Gem.each do |d|
140
- dep, ver, pur = *d
141
- install_gem(dep, ver, pur, optionally)
142
- end
143
-
144
- end
145
-
146
- def self.install_language_dependencies(dependencies, optionally)
147
86
 
87
+ def self.install_language_dependencies(language)
88
+ dependencies = Treat.languages[language].dependencies
148
89
  puts "No dependencies to install.\n" if dependencies.empty?
149
90
  dependencies.each do |dependency|
150
- install_gem(dependency, nil, nil, optionally)
91
+ install_gem(dependency)
151
92
  end
152
-
153
- end
154
-
155
- def self.install_binaries
156
-
157
- puts "Warning: this will require authentification."
158
-
159
- p = detect_platform
160
- man = PackageManagers[p]
161
-
162
- if !man
163
- puts "Cannot find a download manager "+
164
- "for the #{p} platform.\n\n"
165
- else
166
- unless `hash #{man} 2>&1` == ''
167
- puts "The '#{man}' command is required "+
168
- "to install binaries on #{p}.\n\n"
169
- man = nil
170
- end
171
- end
172
-
173
- unless man
174
- puts "Skipping installation of the "+
175
- "following binaries:\n\n"
176
- Binary.each do |binary, purpose|
177
- puts "- #{binary} to #{purpose}"
178
- end
179
- return
180
- end
181
-
182
- Treat::Dependencies::Binary.each do |binary, purpose|
183
- if prompt("install #{binary} to " +
184
- "#{purpose} (y/n)", ['y', 'n']) == 'y'
185
- `sudo #{man} install #{binary}`
186
- end
187
- end
188
-
189
93
  end
190
94
 
191
95
  def self.download_stanford(package = :minimal)
192
96
 
193
97
  f = StanfordPackages[package]
194
- loc = Treat::Downloader.download(
195
- 'http', Server, 'treat', f, Treat.tmp)
98
+ url = "http://#{Server}/treat/#{f}"
99
+ loc = Schiphol.download(url,
100
+ download_folder: Treat.paths.tmp
101
+ )
196
102
  puts "- Unzipping package ..."
197
- dest = File.join(Treat.tmp, 'stanford')
103
+ dest = File.join(Treat.paths.tmp, 'stanford')
198
104
  unzip_stanford(loc, dest)
199
105
 
200
106
  model_dir = File.join(Paths[:models], 'stanford')
@@ -232,37 +138,26 @@ module Treat::Installer
232
138
  puts "- Cleaning up..."
233
139
  FileUtils.rm_rf(origin)
234
140
 
141
+ 'Done.'
142
+
235
143
  end
236
144
 
237
- def self.download_punkt_models(languages)
238
- languages.map! { |l| "#{l}.yaml" }
239
- download_models 'punkt', languages
240
- end
241
-
242
- def self.download_reuters_models
243
- files = ["industry.xml", "region.xml", "topics.xml"]
244
- download_models 'reuters', files
245
- end
246
-
247
- def self.download_models(directory, files)
248
-
249
- dest = "#{Treat.models}#{directory}/"
145
+ def self.download_punkt_models(language)
250
146
 
147
+ f = "#{language}.yaml"
148
+ dest = "#{Treat.paths.models}punkt/"
149
+ url = "http://#{Server}/treat/punkt/#{f}"
150
+ loc = Schiphol.download(url,
151
+ download_folder: Treat.paths.tmp
152
+ )
251
153
  unless File.readable?(dest)
252
- puts "- Creating directory models/#{directory} ..."
154
+ puts "- Creating directory models/punkt ..."
253
155
  FileUtils.mkdir_p(File.absolute_path(dest))
254
156
  end
255
157
 
158
+ puts "- Copying model file to models/punkt ..."
159
+ FileUtils.cp(loc, File.join(Paths[:models], 'punkt', f))
256
160
 
257
- files.each do |file|
258
- puts "- Downloading #{file} ..."
259
- loc = Treat::Downloader.download(
260
- 'http', Server, "treat/#{directory}", file, Treat.tmp)
261
- puts "- Copying file to models/#{directory} ..."
262
- FileUtils.cp(loc, File.join(Paths[:models], directory, file))
263
- end
264
-
265
-
266
161
  puts "- Cleaning up..."
267
162
  FileUtils.rm_rf(Paths[:tmp] + Server)
268
163
 
@@ -282,25 +177,11 @@ module Treat::Installer
282
177
 
283
178
  # Install a dependency with a supplied purpose
284
179
  # but ask the user if she wishes to do so first.
285
- def self.install_gem(dependency, version = nil,
286
- purpose = nil, optionally = false)
287
-
288
- install = false
180
+ def self.install_gem(dependency)
289
181
 
290
182
  begin
291
- purpose = purpose ? " to #{purpose}" : ''
292
- if optionally
293
- if prompt("install #{dependency}#{purpose}",
294
- ['y', 'n']) == 'y'
295
- install = true
296
- end
297
- else
298
- puts "\n- Installing #{dependency}#{purpose}."
299
- install = true
300
- end
301
- silence_warnings do
302
- @@installer.install(dependency, version)
303
- end if install
183
+ puts "Installing #{dependency}...\n"
184
+ @@installer.install(dependency)
304
185
  rescue Exception => error
305
186
  raise
306
187
  puts "Couldn't install gem '#{dependency}' " +
@@ -325,4 +206,4 @@ module Treat::Installer
325
206
 
326
207
  end
327
208
 
328
- end
209
+ end
@@ -1,34 +1,29 @@
1
- module Treat
1
+ # A helper class to load a language class
2
+ # registered with the Linguistics gem.
3
+ class Treat::Loaders::Linguistics
2
4
 
3
- module Loaders
4
-
5
- # A helper class to load a language class
6
- # registered with the Linguistics gem.
7
- class Linguistics
8
-
9
- silence_warnings { require 'linguistics' }
10
- @@languages = {}
11
-
12
- def self.load(language)
13
- if @@languages[language]
14
- return @@languages[language]
15
- end
16
- begin
17
- l = language.to_s.upcase
18
- silence_warnings do
19
- @@languages[language] =
20
- ::Linguistics.const_get(l)
21
- end
22
- rescue RuntimeError
23
- raise "Ruby Linguistics does " +
24
- "not have a module installed " +
25
- "for the #{language} language."
26
- end
5
+ silence_warnings do
6
+ require 'linguistics'
7
+ end
8
+
9
+ @@languages = {}
27
10
 
11
+ def self.load(language)
12
+ if @@languages[language]
13
+ return @@languages[language]
14
+ end
15
+ begin
16
+ l = language.to_s[0..1].upcase
17
+ silence_warnings do
18
+ @@languages[language] =
19
+ ::Linguistics.const_get(l)
28
20
  end
29
-
21
+ rescue RuntimeError
22
+ raise "Ruby Linguistics does " +
23
+ "not have a module installed " +
24
+ "for the #{language} language."
30
25
  end
31
26
 
32
27
  end
33
28
 
34
- end
29
+ end
@@ -1,45 +1,27 @@
1
- module Treat
2
-
3
- module Loaders
4
-
5
- class Stanford
6
-
7
- require 'stanford-core-nlp'
8
-
9
- class << self
10
- attr_accessor :jar_path
11
- attr_accessor :model_path
12
- attr_accessor :loaded
13
- end
14
-
15
- self.jar_path = Treat.bin + 'stanford/'
16
- self.model_path = Treat.models + 'stanford/'
17
- self.loaded = false
18
-
19
- def self.load(language = nil)
20
-
21
- return if self.loaded
22
-
23
- language ||=
24
- Treat::Languages.describe(
25
- Treat.default_language)
26
-
27
- StanfordCoreNLP.jar_path = self.jar_path
28
- StanfordCoreNLP.model_path = self.model_path
29
-
30
- StanfordCoreNLP.use(language)
31
-
32
- StanfordCoreNLP.log_file =
33
- NULL_DEVICE if Treat.silence
34
-
35
- StanfordCoreNLP.bind
36
-
37
- self.loaded = true
38
-
39
- end
40
-
1
+ # A helper class to load the
2
+ # Stanford Core NLP package.
3
+ class Treat::Loaders::Stanford
4
+
5
+ require 'stanford-core-nlp'
6
+ @@loaded = false
7
+
8
+ def self.load(language = nil)
9
+ return if @@loaded
10
+ language ||= Treat.core.language.default
11
+ jar_path = Treat.libraries.
12
+ stanford.jar_path || Treat.paths.bin
13
+ models_path = Treat.libraries.
14
+ stanford.model_path || Treat.paths.models
15
+ StanfordCoreNLP.jar_path =
16
+ "#{jar_path}stanford/"
17
+ StanfordCoreNLP.model_path =
18
+ "#{models_path}stanford/"
19
+ StanfordCoreNLP.use(language)
20
+ if Treat.core.verbosity.silence
21
+ StanfordCoreNLP.log_file = NULL_DEVICE
41
22
  end
42
-
23
+ StanfordCoreNLP.bind
24
+ @@loaded = true
43
25
  end
44
26
 
45
27
  end
@@ -0,0 +1,10 @@
1
+ # Contains classes to load external libraries.
2
+ module Treat::Loaders
3
+ # Autoload all of the loaders.
4
+ def self.const_missing(const)
5
+ name = const.to_s.downcase
6
+ require Treat.paths.lib +
7
+ "treat/loaders/#{name}.rb"
8
+ self.const_get(const)
9
+ end
10
+ end