treat 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. data/files/21552208.html +786 -0
  2. data/files/nethttp-cheat-sheet-2940.html +393 -0
  3. data/lib/treat/builder.rb +6 -0
  4. data/lib/treat/config/data/languages/agnostic.rb +2 -2
  5. data/lib/treat/core/server.rb +1 -0
  6. data/lib/treat/entities/entity/buildable.rb +1 -1
  7. data/lib/treat/loaders/linguistics.rb +6 -7
  8. data/lib/treat/loaders/stanford.rb +45 -11
  9. data/lib/treat/version.rb +1 -1
  10. data/lib/treat/workers/categorizable.rb +30 -32
  11. data/lib/treat/workers/extractors/name_tag/stanford.rb +8 -24
  12. data/lib/treat/workers/formatters/readers/html.rb +1 -1
  13. data/lib/treat/workers/formatters/readers/xml.rb +1 -1
  14. data/lib/treat/workers/formatters/unserializers/mongo.rb +1 -1
  15. data/lib/treat/workers/groupable.rb +2 -1
  16. data/lib/treat/workers/inflectors/cardinalizers/linguistics.rb +3 -3
  17. data/lib/treat/workers/inflectors/conjugators/linguistics.rb +6 -4
  18. data/lib/treat/workers/inflectors/declensors/linguistics.rb +11 -18
  19. data/lib/treat/workers/inflectors/ordinalizers/linguistics.rb +4 -4
  20. data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +1 -1
  21. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +23 -21
  22. data/lib/treat/workers/processors/parsers/stanford.rb +10 -20
  23. data/lib/treat/workers/processors/segmenters/stanford.rb +1 -1
  24. data/lib/treat/workers/processors/tokenizers/maxent.rb +29 -0
  25. data/lib/treat/workers/processors/tokenizers/stanford.rb +2 -4
  26. data/lib/treat.rb +1 -0
  27. data/spec/helper.rb +8 -6
  28. data/spec/sandbox.rb +18 -6
  29. data/spec/workers/agnostic.rb +76 -29
  30. data/spec/workers/english.rb +23 -73
  31. data/spec/workers/examples/english/economist/saving_the_euro.odt +0 -0
  32. metadata +6 -18
@@ -9,26 +9,24 @@ module Treat::Workers::Categorizable
9
9
  @@lookup = {}
10
10
 
11
11
  # Find a worker group based on method.
12
- def lookup(method)
13
- @@lookup[method]
14
- end
12
+ def lookup(method); @@lookup[method]; end
15
13
 
16
14
  def categorize!
17
15
  Treat.workers.members.each do |cat|
18
- create_category(cat.
19
- capitalize.intern,
20
- load_category_conf(cat))
16
+ name = cat.capitalize.intern
17
+ conf = load_category_conf(cat)
18
+ create_category(name, conf)
21
19
  end
22
20
  end
23
21
 
24
22
  def load_category_conf(name)
25
- config = Treat.workers[name]
26
- if config.nil?
23
+ if !Treat.workers.respond_to?(name)
27
24
  raise Treat::Exception,
28
25
  "The configuration file " +
29
26
  "for #{cat_sym} is missing."
27
+ else
28
+ Treat.workers[name]
30
29
  end
31
- config
32
30
  end
33
31
 
34
32
  def create_category(name, conf)
@@ -37,11 +35,11 @@ module Treat::Workers::Categorizable
37
35
  conf.each_pair do |group, worker|
38
36
  name = group.to_s.cc.intern
39
37
  category.module_eval do
40
- @@methods = []; def methods;
41
- @@methods; end; def groups;
42
- self.constants; end
38
+ @@methods = []
39
+ def methods; @@methods; end
40
+ def groups; self.constants; end
43
41
  end
44
- self.create_group(name, worker, category)
42
+ create_group(name, worker, category)
45
43
  end
46
44
  end
47
45
 
@@ -54,24 +52,6 @@ module Treat::Workers::Categorizable
54
52
  @@lookup[group.method] = group
55
53
  end
56
54
 
57
- def bind_group_targets(group)
58
- group.targets.each do |entity_type|
59
- entity = Treat::Entities.
60
- const_get(entity_type.cc)
61
- entity.class_eval do
62
- add_workers group
63
- end
64
- end
65
- end
66
-
67
- def register_group_presets(group, conf)
68
- return unless conf.respond_to? :presets
69
- conf.presets.each do |m|
70
- @@methods << m
71
- @@lookup[m] = group
72
- end
73
- end
74
-
75
55
  def set_group_options(group, conf)
76
56
  group.module_eval do
77
57
  extend Treat::Workers::Groupable
@@ -91,5 +71,23 @@ module Treat::Workers::Categorizable
91
71
  end
92
72
  end
93
73
  end
94
-
74
+
75
+ def bind_group_targets(group)
76
+ group.targets.each do |entity_type|
77
+ entity = Treat::Entities.
78
+ const_get(entity_type.cc)
79
+ entity.class_eval do
80
+ add_workers group
81
+ end
82
+ end
83
+ end
84
+
85
+ def register_group_presets(group, conf)
86
+ return unless conf.respond_to?(:presets)
87
+ conf.presets.each do |method|
88
+ @@methods << method
89
+ @@lookup[method] = group
90
+ end
91
+ end
92
+
95
93
  end
@@ -16,32 +16,21 @@ class Treat::Workers::Extractors::NameTag::Stanford
16
16
 
17
17
  def self.name_tag(entity, options = {})
18
18
 
19
- pp = nil
20
-
21
19
  language = entity.language
22
-
23
20
  Treat::Loaders::Stanford.load(language)
24
21
 
25
22
  isolated_token = entity.is_a?(Treat::Entities::Token)
26
23
  tokens = isolated_token ? [entity] : entity.tokens
27
-
28
- ms = StanfordCoreNLP::Config::Models[:ner][language.intern]
29
- model_path = Treat.libraries.stanford.model_path ||
30
- (Treat.paths.models + '/stanford/')
31
- ms = model_path + '/' +
32
- StanfordCoreNLP::Config::ModelFolders[:ner] +
33
- ms['3class']
34
-
35
- @@classifiers[language] ||=
36
- StanfordCoreNLP::CRFClassifier.
37
- getClassifier(ms)
38
-
24
+
25
+ unless classifier = @@classifiers[language]
26
+ model = Treat::Loaders::Stanford.find_model(:ner, language)
27
+ classifier = StanfordCoreNLP::CRFClassifier.getClassifier(model)
28
+ @@classifiers[language] = classifier
29
+ end
30
+
39
31
  token_list = StanfordCoreNLP.get_list(tokens)
40
- sentence = @@classifiers[language].
41
- classify_sentence(token_list)
42
-
32
+ sentence = classifier.classify_sentence(token_list)
43
33
  i = 0
44
- n = 0
45
34
 
46
35
  sentence.each do |s_token|
47
36
  tag = s_token.get(:answer).to_s.downcase
@@ -49,14 +38,9 @@ class Treat::Workers::Extractors::NameTag::Stanford
49
38
  return tag if isolated_token
50
39
  if tag
51
40
  tokens[i].set :name_tag, tag
52
- n += 1
53
41
  end
54
42
  i += 1
55
43
  end
56
-
57
- entity.set :named_entity_count, n
58
-
59
- nil
60
44
 
61
45
  end
62
46
 
@@ -6,7 +6,7 @@
6
6
  # https://github.com/iterationlabs/ruby-readability
7
7
  class Treat::Workers::Formatters::Readers::HTML
8
8
 
9
- silence_warnings { require 'jruby-readability' }
9
+ silence_warnings { require 'ruby-readability' }
10
10
 
11
11
  # By default, don't backup the original HTML
12
12
  DefaultOptions = {
@@ -30,7 +30,7 @@ class Treat::Workers::Formatters::Readers::XML
30
30
  @@xml_reader ||= StanfordCoreNLP.load(
31
31
  :tokenize, :ssplit, :cleanxml)
32
32
 
33
- text = StanfordCoreNLP::Text.new(xml)
33
+ text = StanfordCoreNLP::Annotation.new(xml)
34
34
  @@xml_reader.annotate(text)
35
35
 
36
36
  text.get(:sentences).each do |sentence|
@@ -17,7 +17,7 @@ class Treat::Workers::Formatters::Unserializers::Mongo
17
17
 
18
18
  @@database ||= Mongo::Connection.
19
19
  new(Treat.databases.mongo.host).
20
- db(Treat.databases.mongo.db || db)
20
+ db(db || Treat.databases.mongo.db)
21
21
 
22
22
  supertype = Treat::Entities.const_get(
23
23
  entity.type.to_s.capitalize.intern).superclass.mn.downcase
@@ -5,7 +5,8 @@ module Treat::Workers::Groupable
5
5
  bits = self.ancestors[0].to_s.split('::')
6
6
  bits.collect! { |bit| bit.ucc }
7
7
  file = bits.join('/') + "/#{const.ucc}"
8
- if not File.readable?(Treat.paths.lib + "#{file}.rb")
8
+ path = Treat.paths.lib + "#{file}.rb"
9
+ if not File.readable?(path)
9
10
  raise Treat::Exception,
10
11
  "File '#{file}.rb' corresponding to " +
11
12
  "requested worker #{self}::#{const} " +
@@ -35,9 +35,9 @@ class Treat::Workers::Inflectors::Cardinalizers::Linguistics
35
35
  # More specific options when using :type => :ordinal:
36
36
  def self.cardinal(entity, options = {})
37
37
  options = DefaultOptions.merge(options)
38
- Treat::Loaders::Linguistics.
39
- load(options[:language]).
40
- numwords(entity.to_s, options)
38
+ lang = entity.language
39
+ code = Treat::Loaders::Linguistics.load(lang)
40
+ entity.to_s.send(code).numwords(options)
41
41
  end
42
42
 
43
43
  end
@@ -35,13 +35,15 @@ module Treat::Workers::Inflectors::Conjugators::Linguistics
35
35
 
36
36
  options = Forms[options[:form].to_s] if options[:form]
37
37
 
38
- klass = Treat::Loaders::Linguistics.load(entity.language)
38
+ code = Treat::Loaders::Linguistics.load(entity.language)
39
+ obj = entity.to_s.send(code)
40
+
39
41
  if options[:mode] == 'infinitive'
40
- silence_warnings { klass.infinitive(entity.to_s) }
42
+ obj.infinitive
41
43
  elsif options[:mode] == 'participle' && options[:tense] == 'present'
42
- silence_warnings { klass.present_participle(entity.to_s) }
44
+ obj.present_participle
43
45
  elsif options[:count] == 'plural' && options.size == 1
44
- silence_warnings { klass.plural_verb(entity.to_s) }
46
+ obj.plural_verb
45
47
  else
46
48
  raise Treat::Exception,
47
49
  'This combination of modes, tenses, persons ' +
@@ -17,34 +17,27 @@ class Treat::Workers::Inflectors::Declensors::Linguistics
17
17
 
18
18
  cat = entity.get(:category)
19
19
  return if cat && !POS.include?(cat)
20
+
20
21
  unless options[:count]
21
22
  raise Treat::Exception, 'Must supply ' +
22
23
  ':count option ("singular" or "plural").'
23
24
  end
24
-
25
- klass = Treat::Loaders::
26
- Linguistics.load(entity.language)
27
- string = entity.to_s
28
-
29
- if options[:count].to_s == 'plural'
30
- if (entity.has?(:category))
31
- result = ''
32
- silence_warnings do
33
- result = klass.send(
34
- :"plural_#{entity.category}",
35
- string)
36
- end
37
- return result
38
- else
39
- return klass.plural(string)
40
- end
41
25
 
42
- else
26
+ unless options[:count].to_s == 'plural'
43
27
  raise Treat::Exception,
44
28
  "Ruby Linguistics does not support " +
45
29
  "singularization of words."
46
30
  end
47
31
 
32
+ lang = entity.language
33
+ code = Treat::Loaders::Linguistics.load(lang)
34
+ obj = entity.to_s.send(code)
35
+
36
+ if cat = entity.get(:category)
37
+ method = "plural_#{cat}"
38
+ obj.send(method)
39
+ else; obj.plural; end
40
+
48
41
  end
49
42
 
50
43
  end
@@ -11,11 +11,11 @@ class Treat::Workers::Inflectors::Ordinalizers::Linguistics
11
11
 
12
12
  # Desribe a number in words in ordinal form, using the
13
13
  # 'linguistics' gem.
14
- def self.ordinal(number, options = {})
14
+ def self.ordinal(entity, options = {})
15
15
  options = DefaultOptions.merge(options)
16
- klass = Treat::Loaders::
17
- Linguistics.load(options[:language])
18
- klass.ordinate(number.to_s)
16
+ lang = entity.language
17
+ code = Treat::Loaders::Linguistics.load(lang)
18
+ entity.to_s.send(code).ordinate
19
19
  end
20
20
 
21
21
  end
@@ -35,7 +35,7 @@ class Treat::Workers::Lexicalizers::Sensers::Wordnet
35
35
  end
36
36
 
37
37
  if !options[:nym].is_a?(Symbol)
38
- options[:nym] == options[:nym].intern
38
+ options[:nym] = options[:nym].intern
39
39
  end
40
40
 
41
41
  if ![:synonyms, :antonyms,
@@ -1,15 +1,15 @@
1
- # POS tagging using (i) explicit use of both preceding
2
- # and following tag contexts via a dependency network
3
- # representation, (ii) broad use of lexical features,
4
- # including jointly conditioning on multiple consecutive
5
- # words, (iii) effective use of priors in conditional
6
- # loglinear models, and (iv) fine-grained modeling of
1
+ # POS tagging using (i) explicit use of both preceding
2
+ # and following tag contexts via a dependency network
3
+ # representation, (ii) broad use of lexical features,
4
+ # including jointly conditioning on multiple consecutive
5
+ # words, (iii) effective use of priors in conditional
6
+ # loglinear models, and (iv) fine-grained modeling of
7
7
  # unknown word features.
8
8
  #
9
9
  # Original paper: Toutanova, Manning, Klein and Singer.
10
- # 2003. Feature-Rich Part-of-Speech Tagging with a
11
- # Cyclic Dependency Network. In Proceedings of the
12
- # Conference of the North American Chapter of the
10
+ # 2003. Feature-Rich Part-of-Speech Tagging with a
11
+ # Cyclic Dependency Network. In Proceedings of the
12
+ # Conference of the North American Chapter of the
13
13
  # Association for Computational Linguistics.
14
14
  class Treat::Workers::Lexicalizers::Taggers::Stanford
15
15
 
@@ -21,6 +21,9 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
21
21
  :tagger_model => nil
22
22
  }
23
23
 
24
+ # Shortcut for gem config.
25
+ Config = StanfordCoreNLP::Config
26
+
24
27
  # Tag the word using one of the Stanford taggers.
25
28
  def self.tag(entity, options = {})
26
29
 
@@ -36,9 +39,9 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
36
39
  return 'P' if entity.is_a?(Treat::Entities::Phrase)
37
40
  return 'F' if entity.is_a?(Treat::Entities::Fragment)
38
41
  return 'G' if entity.is_a?(Treat::Entities::Group)
39
-
42
+
40
43
  # Handle options and initialize the tagger.
41
- lang = entity.language
44
+ lang = entity.language.intern
42
45
  init_tagger(lang) unless @@taggers[lang]
43
46
  options = get_options(options, lang)
44
47
  tokens, t_list = get_token_list(entity)
@@ -46,7 +49,7 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
46
49
  # Do the tagging.
47
50
  i = 0
48
51
  isolated_token = entity.is_a?(Treat::Entities::Token)
49
-
52
+
50
53
  @@taggers[lang].apply(t_list).each do |tok|
51
54
  tokens[i].set(:tag, tok.tag)
52
55
  tokens[i].set(:tag_set,
@@ -59,21 +62,20 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
59
62
 
60
63
  # Initialize the tagger for a language.
61
64
  def self.init_tagger(language)
62
- Treat::Loaders::Stanford.load(language)
63
- model = StanfordCoreNLP::Config::Models[:pos][language]
64
- model_path = Treat.libraries.stanford.model_path ||
65
- Treat.paths.models + 'stanford/'
66
- model = model_path + StanfordCoreNLP::
67
- Config::ModelFolders[:pos] + model
68
- @@taggers[language] ||=
69
- StanfordCoreNLP::MaxentTagger.new(model)
65
+ unless @@taggers[language]
66
+ Treat::Loaders::Stanford.load(language)
67
+ model = Treat::Loaders::Stanford.find_model(:pos,language)
68
+ tagger = StanfordCoreNLP::MaxentTagger.new(model)
69
+ @@taggers[language] = tagger
70
+ end
71
+ @@taggers[language]
70
72
  end
71
73
 
72
74
  # Handle the options for the tagger.
73
75
  def self.get_options(options, language)
74
76
  options = DefaultOptions.merge(options)
75
77
  if options[:tagger_model]
76
- ::StanfordCoreNLP.set_model('pos.model',
78
+ StanfordCoreNLP.set_model('pos.model',
77
79
  options[:tagger_model])
78
80
  end
79
81
  options[:tag_set] =
@@ -29,15 +29,14 @@ class Treat::Workers::Processors::Parsers::Stanford
29
29
  # instead of displaying it.
30
30
  def self.parse(entity, options = {})
31
31
 
32
- val = entity.to_s
33
- lang = entity.language
34
- init(lang, options)
32
+ val, lang = entity.to_s, entity.language
33
+ init(lang, options) unless @@parsers[lang]
35
34
 
36
35
  entity.check_hasnt_children
37
36
 
38
37
  tag_set = StanfordCoreNLP::Config::TagSets[lang]
39
38
 
40
- text = ::StanfordCoreNLP::Text.new(val)
39
+ text = ::StanfordCoreNLP::Annotation.new(val)
41
40
  @@parsers[lang].annotate(text)
42
41
 
43
42
  text.get(:sentences).each do |s|
@@ -50,7 +49,7 @@ class Treat::Workers::Processors::Parsers::Stanford
50
49
  entity.set :tag, tag_s
51
50
  entity.set :tag_opt, tag_opt if tag_opt
52
51
  recurse(s.get(:tree).children[0], entity, tag_set)
53
- break #######
52
+ break ####### ? FIX
54
53
  else
55
54
  recurse(s.get(:tree), entity, tag_set)
56
55
  end
@@ -62,26 +61,17 @@ class Treat::Workers::Processors::Parsers::Stanford
62
61
  end
63
62
 
64
63
  def self.init(lang, options)
65
- return if @@parsers[lang]
66
-
67
64
  Treat::Loaders::Stanford.load(lang)
68
-
69
65
  options = DefaultOptions.merge(options)
70
- StanfordCoreNLP.use(lang)
66
+ StanfordCoreNLP.use(lang.intern)
71
67
  if options[:tagger_model]
72
- ::StanfordCoreNLP.set_model(
73
- 'pos.model', options[:tagger_model]
74
- )
68
+ StanfordCoreNLP.set_model('pos.model', options[:tagger_model])
75
69
  end
76
70
  if options[:parser_model]
77
- ::StanfordCoreNLP.set_model(
78
- 'parser.model', options[:parser_model]
79
- )
71
+ StanfordCoreNLP.set_model('parser.model', options[:parser_model])
80
72
  end
81
- @@parsers[lang] ||=
82
- ::StanfordCoreNLP.load(
83
- :tokenize, :ssplit, :pos, :lemma, :parse
84
- )
73
+ annotators = [:tokenize, :ssplit, :pos, :lemma, :parse]
74
+ @@parsers[lang] = StanfordCoreNLP.load(*annotators)
85
75
  end
86
76
 
87
77
  # Helper method which recurses the tree supplied by
@@ -128,7 +118,7 @@ class Treat::Workers::Processors::Parsers::Stanford
128
118
  l = java_child.children[0].to_s
129
119
  v = java_child.children[0].value.to_s.strip
130
120
 
131
- # Mhmhmhmhmhm
121
+ # Mhmhmhmhmhm FIX!
132
122
  val = (l == v) ? v : l.split(' ')[-1].gsub(')', '')
133
123
  ruby_child = Treat::Entities::Token.from_string(val)
134
124
  end
@@ -32,7 +32,7 @@ class Treat::Workers::Processors::Segmenters::Stanford
32
32
  ::StanfordCoreNLP.load(:tokenize, :ssplit)
33
33
 
34
34
  s = entity.to_s
35
- text = ::StanfordCoreNLP::Text.new(s)
35
+ text = ::StanfordCoreNLP::Annotation.new(s)
36
36
 
37
37
  @@segmenter.annotate(text)
38
38
  text.get(:sentences).each do |sentence|
@@ -0,0 +1,29 @@
1
+ # Maximum entropy tokenization supplied by OpenNLP.
2
+ class Treat::Workers::Processors::Tokenizers::Maxent
3
+
4
+ require 'open-nlp'
5
+ OpenNLP.load
6
+
7
+ # Maximum entropy tokenization.
8
+ def self.tokenize(entity, options = {})
9
+
10
+ lang = entity.language
11
+ str = entity.to_s
12
+
13
+ unless @@tokenizers[lang]
14
+ OpenNLP.use(lang.intern)
15
+ @@tokenizers[lang] =
16
+ OpenNLP::TokenizerME.new
17
+ end
18
+
19
+ tokenizer = @@tokenizers[lang]
20
+ tokens = tokenizer.tokenize(str).to_a
21
+
22
+ tokens.each do |token|
23
+ entity << Treat::Entities
24
+ ::Token.from_string(chunk)
25
+ end
26
+
27
+ end
28
+
29
+ end
@@ -27,11 +27,9 @@ class Treat::Workers::Processors::Tokenizers::Stanford
27
27
  # replacing "..." by ``...''. Off by default.
28
28
  def self.tokenize(entity, options = {})
29
29
  options = DefaultOptions.merge(options)
30
- @@tokenizer ||=
31
- ::StanfordCoreNLP.load(:tokenize)
30
+ @@tokenizer ||= StanfordCoreNLP.load(:tokenize)
32
31
  entity.check_hasnt_children
33
- text = ::StanfordCoreNLP::
34
- Text.new(entity.to_s)
32
+ text = ::StanfordCoreNLP::Annotation.new(entity.to_s)
35
33
  @@tokenizer.annotate(text)
36
34
  add_tokens(entity, text.get(:tokens), options)
37
35
  end
data/lib/treat.rb CHANGED
@@ -18,5 +18,6 @@ module Treat
18
18
  require_relative 'treat/exception'
19
19
  require_relative 'treat/autoload'
20
20
  require_relative 'treat/modules'
21
+ require_relative 'treat/builder'
21
22
 
22
23
  end
data/spec/helper.rb CHANGED
@@ -1,20 +1,23 @@
1
1
  require_relative '../lib/treat'
2
2
 
3
+ include Treat::Core::DSL
4
+
3
5
  module Treat::Specs
4
6
 
5
7
  require 'rspec'
6
8
 
7
9
  # Some configuration options for devel.
10
+
8
11
  Treat.databases.mongo.db = 'treat_test'
9
12
  Treat.libraries.stanford.model_path =
10
- '/ruby/stanford/stanford-core-nlp-all/'
13
+ '/ruby/stanford-core-nlp-minimal/models/'
11
14
  Treat.libraries.stanford.jar_path =
12
- '/ruby/stanford/stanford-core-nlp-all/'
15
+ '/ruby/stanford-core-nlp-minimal/bin/'
13
16
  Treat.libraries.punkt.model_path =
14
- '/ruby/punkt/'
17
+ '/ruby/punkt/models/'
15
18
  Treat.libraries.reuters.model_path =
16
- '/ruby/reuters/'
17
-
19
+ '/ruby/reuters/models/'
20
+
18
21
  ModuleFiles = ['entities/*.rb', 'learning/*.rb']
19
22
 
20
23
  # Provide helper functions for running specs.
@@ -25,7 +28,6 @@ module Treat::Specs
25
28
  require 'simplecov'
26
29
  SimpleCov.start do
27
30
  add_filter '/spec/'
28
- add_filter '/config/'
29
31
  add_group 'Core', 'treat/core'
30
32
  add_group 'Entities', 'treat/entities'
31
33
  add_group 'Helpers', 'treat/helpers'
data/spec/sandbox.rb CHANGED
@@ -1,13 +1,25 @@
1
1
  # encoding: utf-8
2
2
  require_relative '../lib/treat'
3
3
 
4
- require 'treat'
5
- include Treat::Core::DSL
6
-
7
- collection Treat.paths.spec + '/workers/examples/english/economist'
8
- collection.apply :chunk, :segment, :tokenize
9
- puts collection.topic_words.inspect
4
+ Treat.databases.mongo.db = 'treat_test'
5
+ Treat.libraries.stanford.model_path =
6
+ '/ruby/stanford-core-nlp-minimal/models/'
7
+ Treat.libraries.stanford.jar_path =
8
+ '/ruby/stanford-core-nlp-minimal/bin/'
9
+ Treat.libraries.punkt.model_path =
10
+ '/ruby/punkt/models/'
11
+ Treat.libraries.reuters.model_path =
12
+ '/ruby/reuters/models/'
13
+
14
+ # include Treat::Core::DSL
15
+
16
+ Treat::Builder.new do
17
+ s = sentence "Hello, world!"
18
+ s.print_tree
19
+ end
10
20
 
21
+ p = paragraph('A walk in the park. A trip on a boat.').segment
22
+ p.visualize :dot, file: 'test.dot'
11
23
  =begin
12
24
 
13
25
  g = group("I was running")