treat 1.0.6 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +2 -4
- data/README.md +13 -12
- data/bin/MANIFEST +1 -0
- data/bin/stanford/bridge.jar +0 -0
- data/bin/stanford/joda-time.jar +0 -0
- data/bin/stanford/stanford-corenlp.jar +0 -0
- data/bin/stanford/stanford-parser.jar +0 -0
- data/bin/stanford/xom.jar +0 -0
- data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
- data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
- data/files/{INFO → MANIFEST} +0 -0
- data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
- data/files/weather-central-canada-heat-wave.html +1370 -0
- data/lib/treat/config/core/acronyms.rb +4 -0
- data/lib/treat/config/core/encodings.rb +8 -0
- data/lib/treat/config/core/entities.rb +2 -0
- data/lib/treat/config/core/language.rb +3 -0
- data/lib/treat/config/core/paths.rb +8 -0
- data/lib/treat/config/core/syntax.rb +1 -0
- data/lib/treat/config/core/verbosity.rb +1 -0
- data/lib/treat/config/databases/mongo.rb +3 -0
- data/lib/treat/config/languages/agnostic.rb +34 -0
- data/lib/treat/config/languages/arabic.rb +13 -0
- data/lib/treat/config/languages/chinese.rb +13 -0
- data/lib/treat/config/languages/dutch.rb +12 -0
- data/lib/treat/config/languages/english.rb +60 -0
- data/lib/treat/config/languages/french.rb +18 -0
- data/lib/treat/config/languages/german.rb +18 -0
- data/lib/treat/config/languages/greek.rb +12 -0
- data/lib/treat/config/languages/italian.rb +12 -0
- data/lib/treat/config/languages/polish.rb +12 -0
- data/lib/treat/config/languages/portuguese.rb +12 -0
- data/lib/treat/config/languages/russian.rb +12 -0
- data/lib/treat/config/languages/spanish.rb +12 -0
- data/lib/treat/config/languages/swedish.rb +12 -0
- data/lib/treat/config/libraries/stanford.rb +1 -0
- data/lib/treat/config/linguistics/categories.rb +4 -0
- data/lib/treat/config/linguistics/punctuation.rb +33 -0
- data/lib/treat/config/tags/aligned.rb +221 -0
- data/lib/treat/config/tags/enju.rb +71 -0
- data/lib/treat/config/tags/paris7.rb +17 -0
- data/lib/treat/config/tags/ptb.rb +15 -0
- data/lib/treat/config/workers/extractors.rb +39 -0
- data/lib/treat/config/workers/formatters.rb +20 -0
- data/lib/treat/config/workers/inflectors.rb +27 -0
- data/lib/treat/config/workers/learners.rb +6 -0
- data/lib/treat/config/workers/lexicalizers.rb +18 -0
- data/lib/treat/config/workers/list.rb +1 -0
- data/lib/treat/config/workers/processors.rb +19 -0
- data/lib/treat/config/workers/retrievers.rb +12 -0
- data/lib/treat/config.rb +125 -0
- data/lib/treat/{classification.rb → core/classification.rb} +1 -1
- data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
- data/lib/treat/{tree.rb → core/node.rb} +5 -5
- data/lib/treat/core/server.rb +3 -0
- data/lib/treat/core.rb +5 -0
- data/lib/treat/entities/abilities/buildable.rb +61 -56
- data/lib/treat/entities/abilities/checkable.rb +2 -2
- data/lib/treat/entities/abilities/comparable.rb +21 -0
- data/lib/treat/entities/abilities/copyable.rb +2 -0
- data/lib/treat/entities/abilities/countable.rb +1 -1
- data/lib/treat/entities/abilities/debuggable.rb +1 -1
- data/lib/treat/entities/abilities/delegatable.rb +42 -36
- data/lib/treat/entities/abilities/doable.rb +2 -2
- data/lib/treat/entities/abilities/exportable.rb +1 -1
- data/lib/treat/entities/abilities/iterable.rb +21 -33
- data/lib/treat/entities/abilities/magical.rb +8 -8
- data/lib/treat/entities/abilities/registrable.rb +0 -38
- data/lib/treat/entities/abilities/stringable.rb +19 -19
- data/lib/treat/entities/collection.rb +31 -0
- data/lib/treat/entities/document.rb +10 -0
- data/lib/treat/entities/entity.rb +18 -13
- data/lib/treat/entities/group.rb +15 -0
- data/lib/treat/entities/section.rb +13 -0
- data/lib/treat/entities/token.rb +35 -0
- data/lib/treat/entities/zone.rb +11 -0
- data/lib/treat/entities.rb +5 -75
- data/lib/treat/helpers/didyoumean.rb +57 -0
- data/lib/treat/helpers/escaping.rb +15 -0
- data/lib/treat/helpers/formatting.rb +41 -0
- data/lib/treat/helpers/platform.rb +15 -0
- data/lib/treat/helpers/reflection.rb +17 -0
- data/lib/treat/helpers/temporary.rb +27 -0
- data/lib/treat/helpers/verbosity.rb +19 -0
- data/lib/treat/helpers.rb +5 -0
- data/lib/treat/installer.rb +46 -165
- data/lib/treat/loaders/linguistics.rb +22 -27
- data/lib/treat/loaders/stanford.rb +23 -41
- data/lib/treat/loaders.rb +10 -0
- data/lib/treat/proxies.rb +73 -24
- data/lib/treat/version.rb +3 -0
- data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
- data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
- data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
- data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
- data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
- data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
- data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
- data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
- data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
- data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
- data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
- data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
- data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
- data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
- data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
- data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
- data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
- data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
- data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
- data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
- data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
- data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
- data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
- data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
- data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
- data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
- data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
- data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
- data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
- data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
- data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
- data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
- data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
- data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
- data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
- data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
- data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
- data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
- data/lib/treat/workers.rb +96 -0
- data/lib/treat.rb +23 -49
- data/spec/collection.rb +4 -4
- data/spec/document.rb +5 -5
- data/spec/entity.rb +33 -32
- data/spec/{tree.rb → node.rb} +5 -5
- data/spec/phrase.rb +5 -39
- data/spec/sandbox.rb +212 -6
- data/spec/token.rb +12 -9
- data/spec/treat.rb +12 -9
- data/spec/word.rb +10 -9
- data/spec/zone.rb +6 -2
- data/tmp/{INFO → MANIFEST} +0 -0
- data/tmp/english.yaml +10340 -0
- metadata +149 -139
- data/lib/treat/ai.rb +0 -12
- data/lib/treat/categories.rb +0 -90
- data/lib/treat/categorizable.rb +0 -44
- data/lib/treat/configurable.rb +0 -115
- data/lib/treat/dependencies.rb +0 -25
- data/lib/treat/downloader.rb +0 -87
- data/lib/treat/entities/abilities.rb +0 -10
- data/lib/treat/entities/entities.rb +0 -102
- data/lib/treat/exception.rb +0 -7
- data/lib/treat/extractors.rb +0 -79
- data/lib/treat/formatters/serializers/mongo.rb +0 -64
- data/lib/treat/formatters.rb +0 -41
- data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
- data/lib/treat/inflectors.rb +0 -52
- data/lib/treat/kernel.rb +0 -208
- data/lib/treat/languages/arabic.rb +0 -16
- data/lib/treat/languages/chinese.rb +0 -16
- data/lib/treat/languages/dutch.rb +0 -16
- data/lib/treat/languages/english.rb +0 -63
- data/lib/treat/languages/french.rb +0 -20
- data/lib/treat/languages/german.rb +0 -20
- data/lib/treat/languages/greek.rb +0 -16
- data/lib/treat/languages/italian.rb +0 -17
- data/lib/treat/languages/language.rb +0 -10
- data/lib/treat/languages/list.txt +0 -504
- data/lib/treat/languages/polish.rb +0 -16
- data/lib/treat/languages/portuguese.rb +0 -16
- data/lib/treat/languages/russian.rb +0 -16
- data/lib/treat/languages/spanish.rb +0 -16
- data/lib/treat/languages/swedish.rb +0 -16
- data/lib/treat/languages.rb +0 -132
- data/lib/treat/lexicalizers.rb +0 -37
- data/lib/treat/object.rb +0 -7
- data/lib/treat/processors/chunkers/autoselect.rb +0 -16
- data/lib/treat/processors/chunkers/txt.rb +0 -21
- data/lib/treat/processors.rb +0 -38
- data/lib/treat/retrievers.rb +0 -27
- data/lib/treat/server.rb +0 -26
- data/lib/treat/universalisation/encodings.rb +0 -12
- data/lib/treat/universalisation/tags.rb +0 -453
- data/lib/treat/universalisation.rb +0 -9
- data/spec/languages.rb +0 -25
@@ -0,0 +1,57 @@
|
|
1
|
+
# Search the list to see if there are
|
2
|
+
# words similar to #name in the #list
|
3
|
+
# If yes, return a string saying
|
4
|
+
# "Did you mean ... ?" with the names.
|
5
|
+
def did_you_mean?(list, name)
|
6
|
+
return '' # Fix
|
7
|
+
list = list.map { |e| e.to_s }
|
8
|
+
name = name.to_s
|
9
|
+
sugg = []
|
10
|
+
list.each do |element|
|
11
|
+
l = levenshtein(element,name)
|
12
|
+
if l > 0 && l < 2
|
13
|
+
sugg << element
|
14
|
+
end
|
15
|
+
end
|
16
|
+
unless sugg.size == 0
|
17
|
+
if sugg.size == 1
|
18
|
+
msg += " Perhaps you meant '#{sugg[0]}' ?"
|
19
|
+
else
|
20
|
+
sugg_quote = sugg[0..-2].map do
|
21
|
+
|x| '\'' + x + '\''
|
22
|
+
end
|
23
|
+
msg += " Perhaps you meant " +
|
24
|
+
"#{sugg_quote.join(', ')}," +
|
25
|
+
" or '#{sugg[-1]}' ?"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
msg
|
29
|
+
end
|
30
|
+
|
31
|
+
alias :dym? :did_you_mean?
|
32
|
+
|
33
|
+
# Return the levensthein distance between
|
34
|
+
# two strings taking into account the costs
|
35
|
+
# of insertion, deletion, and substitution.
|
36
|
+
# Used by did_you_mean? to detect typos.
|
37
|
+
def levenshtein(first, other, ins=1, del=1, sub=1)
|
38
|
+
return nil if first.nil? || other.nil?
|
39
|
+
dm = []
|
40
|
+
dm[0] = (0..first.length).collect { |i| i * ins}
|
41
|
+
fill = [0] * (first.length - 1).abs
|
42
|
+
for i in 1..other.length
|
43
|
+
dm[i] = [i * del, fill.flatten]
|
44
|
+
end
|
45
|
+
for i in 1..other.length
|
46
|
+
for j in 1..first.length
|
47
|
+
dm[i][j] = [
|
48
|
+
dm[i-1][j-1] +
|
49
|
+
(first[i-1] ==
|
50
|
+
other[i-1] ? 0 : sub),
|
51
|
+
dm[i][j-1] + ins,
|
52
|
+
dm[i-1][j] + del
|
53
|
+
].min
|
54
|
+
end
|
55
|
+
end
|
56
|
+
dm[other.length][first.length]
|
57
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# This is ugly, we should change it.
|
2
|
+
EscapeChar = '^^'
|
3
|
+
EscapedEscapeChar = '\^\^'
|
4
|
+
|
5
|
+
def escape_floats!(s)
|
6
|
+
s.gsub!(/([0-9]+)\.([0-9]+)/) do
|
7
|
+
$1 + EscapeChar + $2
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def unescape_floats!(s)
|
12
|
+
s.gsub!(/([0-9]+)#{EscapedEscapeChar}([0-9]+)/) do
|
13
|
+
$1 + '.' + $2
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# A cache to optimize camel casing.
|
2
|
+
@@cc_cache = {}
|
3
|
+
|
4
|
+
# A cache to optimize un camel casing.
|
5
|
+
@@ucc_cache = {}
|
6
|
+
|
7
|
+
# Convert un_camel_case to CamelCase.
|
8
|
+
def camel_case(o_phrase)
|
9
|
+
phrase = o_phrase.to_s.dup
|
10
|
+
return @@cc_cache[o_phrase] if @@cc_cache[o_phrase]
|
11
|
+
|
12
|
+
if Treat.core.acronyms.include?(phrase)
|
13
|
+
phrase = phrase.upcase
|
14
|
+
else
|
15
|
+
phrase.gsub!(/^[a-z]|_[a-z]/) { |a| a.upcase }
|
16
|
+
phrase.gsub!('_', '')
|
17
|
+
end
|
18
|
+
@@cc_cache[o_phrase] = phrase
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :cc :camel_case
|
22
|
+
|
23
|
+
# Convert CamelCase to un_camel_case.
|
24
|
+
def un_camel_case(o_phrase)
|
25
|
+
phrase = o_phrase.to_s.dup
|
26
|
+
return @@ucc_cache[o_phrase] if @@ucc_cache[o_phrase]
|
27
|
+
if Treat.core.acronyms.include?(phrase.downcase)
|
28
|
+
phrase = phrase.downcase
|
29
|
+
else
|
30
|
+
phrase.gsub!(/[A-Z]/) { |p| '_' + p.downcase }
|
31
|
+
phrase = phrase[1..-1] if phrase[0] == '_'
|
32
|
+
end
|
33
|
+
@@ucc_cache[o_phrase] = phrase
|
34
|
+
end
|
35
|
+
|
36
|
+
alias :ucc :un_camel_case
|
37
|
+
|
38
|
+
# Retrieve the Class from a Module::Class.
|
39
|
+
def class_name(n); n.to_s.split('::')[-1]; end
|
40
|
+
|
41
|
+
alias :cl :class_name
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# Detect the platform we're running on.
|
2
|
+
def detect_platform
|
3
|
+
p = RUBY_PLATFORM.downcase
|
4
|
+
return :mac if p.include?("darwin")
|
5
|
+
return :windows if p.include?("mswin")
|
6
|
+
return :linux if p.include?("linux")
|
7
|
+
return :unknown
|
8
|
+
end
|
9
|
+
|
10
|
+
# Set up the right NULL device.
|
11
|
+
if detect_platform == :windows
|
12
|
+
NULL_DEVICE = 'NUL'
|
13
|
+
else
|
14
|
+
NULL_DEVICE = '/dev/null'
|
15
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# Return the name of the method that
|
2
|
+
# called the method that calls this method.
|
3
|
+
def caller_method(n = 3)
|
4
|
+
at = caller(n).first
|
5
|
+
/^(.+?):(\d+)(?::in `(.*)')?/ =~ at
|
6
|
+
Regexp.last_match[3].
|
7
|
+
gsub('block in ', '').intern
|
8
|
+
end
|
9
|
+
|
10
|
+
Object.module_eval do
|
11
|
+
# Unset a constant publicly.
|
12
|
+
def self.const_unset(const)
|
13
|
+
Object.instance_eval do
|
14
|
+
remove_const(const)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Require file utilities for creating and
|
2
|
+
# deleting temporary files.
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
# Create a temporary file which is deleted
|
6
|
+
# after execution of the block.
|
7
|
+
def create_temp_file(ext, value = nil, &block)
|
8
|
+
fname = Treat.paths.tmp +
|
9
|
+
"#{Random.rand(10000000).to_s}.#{ext}"
|
10
|
+
File.open(fname, 'w') do |f|
|
11
|
+
f.write(value) if value
|
12
|
+
block.call(f.path)
|
13
|
+
end
|
14
|
+
ensure
|
15
|
+
File.delete(fname)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Create a temporary directory, which is
|
19
|
+
# deleted after execution of the block.
|
20
|
+
def create_temp_dir(&block)
|
21
|
+
dname = Treat.paths.tmp +
|
22
|
+
"#{Random.rand(10000000).to_s}"
|
23
|
+
Dir.mkdir(dname)
|
24
|
+
block.call(dname)
|
25
|
+
ensure
|
26
|
+
FileUtils.rm_rf(dname)
|
27
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Runs a block of code without warnings.
|
2
|
+
def silence_warnings(&block)
|
3
|
+
warn_level = $VERBOSE
|
4
|
+
$VERBOSE = nil
|
5
|
+
result = block.call
|
6
|
+
$VERBOSE = warn_level
|
7
|
+
result
|
8
|
+
end
|
9
|
+
|
10
|
+
# Runs a block of code while blocking stdout.
|
11
|
+
def silence_stdout(log = '/dev/null')
|
12
|
+
unless Treat.core.verbosity.silence
|
13
|
+
yield; return
|
14
|
+
end
|
15
|
+
old = $stdout.dup
|
16
|
+
$stdout.reopen(File.new(log, 'w'))
|
17
|
+
yield
|
18
|
+
$stdout = old
|
19
|
+
end
|
data/lib/treat/installer.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
#
|
2
|
-
#
|
1
|
+
# A dependency manager for Treat language plugins.
|
3
2
|
# It can be called by using Treat.install(language).
|
4
3
|
module Treat::Installer
|
5
4
|
|
@@ -7,15 +6,9 @@ module Treat::Installer
|
|
7
6
|
silence_warnings do
|
8
7
|
require 'rubygems/dependency_installer'
|
9
8
|
end
|
10
|
-
|
11
|
-
require 'treat/
|
12
|
-
|
13
|
-
# Package managers for each platforms.
|
14
|
-
PackageManagers = {
|
15
|
-
:mac => 'port',
|
16
|
-
:linux => 'apt-get',
|
17
|
-
:windows => 'win-get'
|
18
|
-
}
|
9
|
+
|
10
|
+
require 'treat/version'
|
11
|
+
require 'schiphol'
|
19
12
|
|
20
13
|
# Address of the server with the files.
|
21
14
|
Server = 'www.louismullie.com'
|
@@ -29,66 +22,40 @@ module Treat::Installer
|
|
29
22
|
|
30
23
|
# Absolute paths required for cp and mkdir.
|
31
24
|
Paths = {
|
32
|
-
:tmp => File.absolute_path(Treat.tmp),
|
33
|
-
:bin => File.absolute_path(Treat.bin),
|
34
|
-
:models => File.absolute_path(Treat.models)
|
25
|
+
:tmp => File.absolute_path(Treat.paths.tmp),
|
26
|
+
:bin => File.absolute_path(Treat.paths.bin),
|
27
|
+
:models => File.absolute_path(Treat.paths.models)
|
35
28
|
}
|
36
29
|
|
37
30
|
# Install required dependencies and optional
|
38
31
|
# dependencies for a specific language.
|
39
|
-
def self.install(language =
|
32
|
+
def self.install(language = 'english')
|
40
33
|
|
41
34
|
@@installer = Gem::DependencyInstaller.new
|
42
35
|
|
43
|
-
if language ==
|
36
|
+
if language == 'travis'
|
44
37
|
install_travis; return
|
45
38
|
end
|
46
39
|
|
47
|
-
lang_class = Treat::Languages.get(language.to_s)
|
48
40
|
l = "#{language.to_s.capitalize} language"
|
49
41
|
|
50
|
-
puts
|
51
|
-
puts "Treat Installer, v. #{Treat::VERSION.to_s}\n"
|
52
|
-
puts
|
42
|
+
puts "\nTreat Installer, v. #{Treat::VERSION.to_s}\n\n"
|
53
43
|
|
54
44
|
begin
|
55
45
|
|
56
|
-
title "
|
57
|
-
|
58
|
-
case prompt(
|
59
|
-
"1 - Install all default language-independent dependencies\n" +
|
60
|
-
"2 - Select dependencies to install manually\n" +
|
61
|
-
"3 - Skip this step", ['1', '2', '3'])
|
62
|
-
when '1' then install_dependencies(false)
|
63
|
-
when '2' then install_dependencies(true)
|
64
|
-
when '3' then puts 'Skipping this step.'
|
65
|
-
end
|
66
|
-
|
67
|
-
title "Install gem dependencies for the #{l}.\n"
|
68
|
-
|
69
|
-
dflt = lang_class::RequiredDependencies
|
70
|
-
all = dflt + lang_class::OptionalDependencies
|
71
|
-
case prompt("1 - Install default dependencies.\n" +
|
72
|
-
"2 - Select dependencies to install manually.\n" +
|
73
|
-
"3 - Skip this step.", ['1', '2', '3'])
|
74
|
-
when '1' then install_language_dependencies(dflt, false)
|
75
|
-
when '2' then install_language_dependencies(all, true)
|
76
|
-
when '3' then puts 'Skipping this step.'
|
77
|
-
end
|
46
|
+
title "Installing core dependencies."
|
47
|
+
install_language_dependencies('agnostic')
|
78
48
|
|
79
|
-
|
49
|
+
title "Installing dependencies for the #{l}.\n"
|
50
|
+
install_language_dependencies(language)
|
80
51
|
|
81
52
|
# If gem is installed only, download models.
|
82
53
|
begin
|
83
54
|
Gem::Specification.find_by_name('punkt-segmenter')
|
84
|
-
title "Downloading
|
85
|
-
|
86
|
-
download_punkt_models([language.to_s])
|
55
|
+
title "Downloading models for the Punkt segmenter for the #{l}."
|
56
|
+
download_punkt_models(language)
|
87
57
|
rescue Gem::LoadError; end
|
88
|
-
|
89
|
-
# Download reuters models always
|
90
|
-
download_reuters_models
|
91
|
-
|
58
|
+
|
92
59
|
# If stanford is installed, download models.
|
93
60
|
begin
|
94
61
|
Gem::Specification.find_by_name('stanford-core-nlp')
|
@@ -96,23 +63,7 @@ module Treat::Installer
|
|
96
63
|
"model files for the the #{l}.\n\n"
|
97
64
|
package = (language == :english) ? :english : :all
|
98
65
|
download_stanford(package)
|
99
|
-
rescue Gem::LoadError
|
100
|
-
puts 'Stanford-core-nlp gem not installed.'
|
101
|
-
puts 'Skipping download of Stanford models.'
|
102
|
-
end
|
103
|
-
|
104
|
-
title "Install external binary libraries " +
|
105
|
-
"(requires port, apt-get or win-get).\n"
|
106
|
-
puts "Warning: this may take a long amount of time."
|
107
|
-
|
108
|
-
case prompt("1 - Select binaries to install manually.\n" +
|
109
|
-
"2 - Skip this step.", ['1', '2'])
|
110
|
-
when '1' then install_binaries
|
111
|
-
when '2' then puts 'Skipping this step.'
|
112
|
-
end
|
113
|
-
|
114
|
-
puts
|
115
|
-
puts "-----\nDone!"
|
66
|
+
rescue Gem::LoadError; end
|
116
67
|
|
117
68
|
rescue Errno::EACCES => e
|
118
69
|
|
@@ -124,77 +75,32 @@ module Treat::Installer
|
|
124
75
|
|
125
76
|
end
|
126
77
|
|
127
|
-
#
|
78
|
+
# Minimal install for Travis CI.
|
128
79
|
def self.install_travis
|
129
|
-
|
130
|
-
|
131
|
-
install_dependencies(false)
|
132
|
-
install_language_dependencies(dep, false)
|
80
|
+
install_language_dependencies(:agnostic)
|
81
|
+
install_language_dependencies(:english)
|
133
82
|
download_stanford(:minimal)
|
134
|
-
download_punkt_models(
|
83
|
+
download_punkt_models(:english)
|
135
84
|
end
|
136
85
|
|
137
|
-
def self.install_dependencies(optionally)
|
138
|
-
|
139
|
-
Treat::Dependencies::Gem.each do |d|
|
140
|
-
dep, ver, pur = *d
|
141
|
-
install_gem(dep, ver, pur, optionally)
|
142
|
-
end
|
143
|
-
|
144
|
-
end
|
145
|
-
|
146
|
-
def self.install_language_dependencies(dependencies, optionally)
|
147
86
|
|
87
|
+
def self.install_language_dependencies(language)
|
88
|
+
dependencies = Treat.languages[language].dependencies
|
148
89
|
puts "No dependencies to install.\n" if dependencies.empty?
|
149
90
|
dependencies.each do |dependency|
|
150
|
-
install_gem(dependency
|
91
|
+
install_gem(dependency)
|
151
92
|
end
|
152
|
-
|
153
|
-
end
|
154
|
-
|
155
|
-
def self.install_binaries
|
156
|
-
|
157
|
-
puts "Warning: this will require authentification."
|
158
|
-
|
159
|
-
p = detect_platform
|
160
|
-
man = PackageManagers[p]
|
161
|
-
|
162
|
-
if !man
|
163
|
-
puts "Cannot find a download manager "+
|
164
|
-
"for the #{p} platform.\n\n"
|
165
|
-
else
|
166
|
-
unless `hash #{man} 2>&1` == ''
|
167
|
-
puts "The '#{man}' command is required "+
|
168
|
-
"to install binaries on #{p}.\n\n"
|
169
|
-
man = nil
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
unless man
|
174
|
-
puts "Skipping installation of the "+
|
175
|
-
"following binaries:\n\n"
|
176
|
-
Binary.each do |binary, purpose|
|
177
|
-
puts "- #{binary} to #{purpose}"
|
178
|
-
end
|
179
|
-
return
|
180
|
-
end
|
181
|
-
|
182
|
-
Treat::Dependencies::Binary.each do |binary, purpose|
|
183
|
-
if prompt("install #{binary} to " +
|
184
|
-
"#{purpose} (y/n)", ['y', 'n']) == 'y'
|
185
|
-
`sudo #{man} install #{binary}`
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
93
|
end
|
190
94
|
|
191
95
|
def self.download_stanford(package = :minimal)
|
192
96
|
|
193
97
|
f = StanfordPackages[package]
|
194
|
-
|
195
|
-
|
98
|
+
url = "http://#{Server}/treat/#{f}"
|
99
|
+
loc = Schiphol.download(url,
|
100
|
+
download_folder: Treat.paths.tmp
|
101
|
+
)
|
196
102
|
puts "- Unzipping package ..."
|
197
|
-
dest = File.join(Treat.tmp, 'stanford')
|
103
|
+
dest = File.join(Treat.paths.tmp, 'stanford')
|
198
104
|
unzip_stanford(loc, dest)
|
199
105
|
|
200
106
|
model_dir = File.join(Paths[:models], 'stanford')
|
@@ -232,37 +138,26 @@ module Treat::Installer
|
|
232
138
|
puts "- Cleaning up..."
|
233
139
|
FileUtils.rm_rf(origin)
|
234
140
|
|
141
|
+
'Done.'
|
142
|
+
|
235
143
|
end
|
236
144
|
|
237
|
-
def self.download_punkt_models(
|
238
|
-
languages.map! { |l| "#{l}.yaml" }
|
239
|
-
download_models 'punkt', languages
|
240
|
-
end
|
241
|
-
|
242
|
-
def self.download_reuters_models
|
243
|
-
files = ["industry.xml", "region.xml", "topics.xml"]
|
244
|
-
download_models 'reuters', files
|
245
|
-
end
|
246
|
-
|
247
|
-
def self.download_models(directory, files)
|
248
|
-
|
249
|
-
dest = "#{Treat.models}#{directory}/"
|
145
|
+
def self.download_punkt_models(language)
|
250
146
|
|
147
|
+
f = "#{language}.yaml"
|
148
|
+
dest = "#{Treat.paths.models}punkt/"
|
149
|
+
url = "http://#{Server}/treat/punkt/#{f}"
|
150
|
+
loc = Schiphol.download(url,
|
151
|
+
download_folder: Treat.paths.tmp
|
152
|
+
)
|
251
153
|
unless File.readable?(dest)
|
252
|
-
puts "- Creating directory models
|
154
|
+
puts "- Creating directory models/punkt ..."
|
253
155
|
FileUtils.mkdir_p(File.absolute_path(dest))
|
254
156
|
end
|
255
157
|
|
158
|
+
puts "- Copying model file to models/punkt ..."
|
159
|
+
FileUtils.cp(loc, File.join(Paths[:models], 'punkt', f))
|
256
160
|
|
257
|
-
files.each do |file|
|
258
|
-
puts "- Downloading #{file} ..."
|
259
|
-
loc = Treat::Downloader.download(
|
260
|
-
'http', Server, "treat/#{directory}", file, Treat.tmp)
|
261
|
-
puts "- Copying file to models/#{directory} ..."
|
262
|
-
FileUtils.cp(loc, File.join(Paths[:models], directory, file))
|
263
|
-
end
|
264
|
-
|
265
|
-
|
266
161
|
puts "- Cleaning up..."
|
267
162
|
FileUtils.rm_rf(Paths[:tmp] + Server)
|
268
163
|
|
@@ -282,25 +177,11 @@ module Treat::Installer
|
|
282
177
|
|
283
178
|
# Install a dependency with a supplied purpose
|
284
179
|
# but ask the user if she wishes to do so first.
|
285
|
-
def self.install_gem(dependency
|
286
|
-
purpose = nil, optionally = false)
|
287
|
-
|
288
|
-
install = false
|
180
|
+
def self.install_gem(dependency)
|
289
181
|
|
290
182
|
begin
|
291
|
-
|
292
|
-
|
293
|
-
if prompt("install #{dependency}#{purpose}",
|
294
|
-
['y', 'n']) == 'y'
|
295
|
-
install = true
|
296
|
-
end
|
297
|
-
else
|
298
|
-
puts "\n- Installing #{dependency}#{purpose}."
|
299
|
-
install = true
|
300
|
-
end
|
301
|
-
silence_warnings do
|
302
|
-
@@installer.install(dependency, version)
|
303
|
-
end if install
|
183
|
+
puts "Installing #{dependency}...\n"
|
184
|
+
@@installer.install(dependency)
|
304
185
|
rescue Exception => error
|
305
186
|
raise
|
306
187
|
puts "Couldn't install gem '#{dependency}' " +
|
@@ -325,4 +206,4 @@ module Treat::Installer
|
|
325
206
|
|
326
207
|
end
|
327
208
|
|
328
|
-
end
|
209
|
+
end
|
@@ -1,34 +1,29 @@
|
|
1
|
-
|
1
|
+
# A helper class to load a language class
|
2
|
+
# registered with the Linguistics gem.
|
3
|
+
class Treat::Loaders::Linguistics
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
silence_warnings { require 'linguistics' }
|
10
|
-
@@languages = {}
|
11
|
-
|
12
|
-
def self.load(language)
|
13
|
-
if @@languages[language]
|
14
|
-
return @@languages[language]
|
15
|
-
end
|
16
|
-
begin
|
17
|
-
l = language.to_s.upcase
|
18
|
-
silence_warnings do
|
19
|
-
@@languages[language] =
|
20
|
-
::Linguistics.const_get(l)
|
21
|
-
end
|
22
|
-
rescue RuntimeError
|
23
|
-
raise "Ruby Linguistics does " +
|
24
|
-
"not have a module installed " +
|
25
|
-
"for the #{language} language."
|
26
|
-
end
|
5
|
+
silence_warnings do
|
6
|
+
require 'linguistics'
|
7
|
+
end
|
8
|
+
|
9
|
+
@@languages = {}
|
27
10
|
|
11
|
+
def self.load(language)
|
12
|
+
if @@languages[language]
|
13
|
+
return @@languages[language]
|
14
|
+
end
|
15
|
+
begin
|
16
|
+
l = language.to_s[0..1].upcase
|
17
|
+
silence_warnings do
|
18
|
+
@@languages[language] =
|
19
|
+
::Linguistics.const_get(l)
|
28
20
|
end
|
29
|
-
|
21
|
+
rescue RuntimeError
|
22
|
+
raise "Ruby Linguistics does " +
|
23
|
+
"not have a module installed " +
|
24
|
+
"for the #{language} language."
|
30
25
|
end
|
31
26
|
|
32
27
|
end
|
33
28
|
|
34
|
-
end
|
29
|
+
end
|
@@ -1,45 +1,27 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
language ||=
|
24
|
-
Treat::Languages.describe(
|
25
|
-
Treat.default_language)
|
26
|
-
|
27
|
-
StanfordCoreNLP.jar_path = self.jar_path
|
28
|
-
StanfordCoreNLP.model_path = self.model_path
|
29
|
-
|
30
|
-
StanfordCoreNLP.use(language)
|
31
|
-
|
32
|
-
StanfordCoreNLP.log_file =
|
33
|
-
NULL_DEVICE if Treat.silence
|
34
|
-
|
35
|
-
StanfordCoreNLP.bind
|
36
|
-
|
37
|
-
self.loaded = true
|
38
|
-
|
39
|
-
end
|
40
|
-
|
1
|
+
# A helper class to load the
|
2
|
+
# Stanford Core NLP package.
|
3
|
+
class Treat::Loaders::Stanford
|
4
|
+
|
5
|
+
require 'stanford-core-nlp'
|
6
|
+
@@loaded = false
|
7
|
+
|
8
|
+
def self.load(language = nil)
|
9
|
+
return if @@loaded
|
10
|
+
language ||= Treat.core.language.default
|
11
|
+
jar_path = Treat.libraries.
|
12
|
+
stanford.jar_path || Treat.paths.bin
|
13
|
+
models_path = Treat.libraries.
|
14
|
+
stanford.model_path || Treat.paths.models
|
15
|
+
StanfordCoreNLP.jar_path =
|
16
|
+
"#{jar_path}stanford/"
|
17
|
+
StanfordCoreNLP.model_path =
|
18
|
+
"#{models_path}stanford/"
|
19
|
+
StanfordCoreNLP.use(language)
|
20
|
+
if Treat.core.verbosity.silence
|
21
|
+
StanfordCoreNLP.log_file = NULL_DEVICE
|
41
22
|
end
|
42
|
-
|
23
|
+
StanfordCoreNLP.bind
|
24
|
+
@@loaded = true
|
43
25
|
end
|
44
26
|
|
45
27
|
end
|