treat 2.0.2 → 2.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. data/files/21552208.html +786 -0
  2. data/files/nethttp-cheat-sheet-2940.html +393 -0
  3. data/lib/treat/builder.rb +6 -0
  4. data/lib/treat/config/data/languages/agnostic.rb +2 -2
  5. data/lib/treat/core/server.rb +1 -0
  6. data/lib/treat/entities/entity/buildable.rb +1 -1
  7. data/lib/treat/loaders/linguistics.rb +6 -7
  8. data/lib/treat/loaders/stanford.rb +45 -11
  9. data/lib/treat/version.rb +1 -1
  10. data/lib/treat/workers/categorizable.rb +30 -32
  11. data/lib/treat/workers/extractors/name_tag/stanford.rb +8 -24
  12. data/lib/treat/workers/formatters/readers/html.rb +1 -1
  13. data/lib/treat/workers/formatters/readers/xml.rb +1 -1
  14. data/lib/treat/workers/formatters/unserializers/mongo.rb +1 -1
  15. data/lib/treat/workers/groupable.rb +2 -1
  16. data/lib/treat/workers/inflectors/cardinalizers/linguistics.rb +3 -3
  17. data/lib/treat/workers/inflectors/conjugators/linguistics.rb +6 -4
  18. data/lib/treat/workers/inflectors/declensors/linguistics.rb +11 -18
  19. data/lib/treat/workers/inflectors/ordinalizers/linguistics.rb +4 -4
  20. data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +1 -1
  21. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +23 -21
  22. data/lib/treat/workers/processors/parsers/stanford.rb +10 -20
  23. data/lib/treat/workers/processors/segmenters/stanford.rb +1 -1
  24. data/lib/treat/workers/processors/tokenizers/maxent.rb +29 -0
  25. data/lib/treat/workers/processors/tokenizers/stanford.rb +2 -4
  26. data/lib/treat.rb +1 -0
  27. data/spec/helper.rb +8 -6
  28. data/spec/sandbox.rb +18 -6
  29. data/spec/workers/agnostic.rb +76 -29
  30. data/spec/workers/english.rb +23 -73
  31. data/spec/workers/examples/english/economist/saving_the_euro.odt +0 -0
  32. metadata +6 -18
@@ -9,26 +9,24 @@ module Treat::Workers::Categorizable
9
9
  @@lookup = {}
10
10
 
11
11
  # Find a worker group based on method.
12
- def lookup(method)
13
- @@lookup[method]
14
- end
12
+ def lookup(method); @@lookup[method]; end
15
13
 
16
14
  def categorize!
17
15
  Treat.workers.members.each do |cat|
18
- create_category(cat.
19
- capitalize.intern,
20
- load_category_conf(cat))
16
+ name = cat.capitalize.intern
17
+ conf = load_category_conf(cat)
18
+ create_category(name, conf)
21
19
  end
22
20
  end
23
21
 
24
22
  def load_category_conf(name)
25
- config = Treat.workers[name]
26
- if config.nil?
23
+ if !Treat.workers.respond_to?(name)
27
24
  raise Treat::Exception,
28
25
  "The configuration file " +
29
26
  "for #{cat_sym} is missing."
27
+ else
28
+ Treat.workers[name]
30
29
  end
31
- config
32
30
  end
33
31
 
34
32
  def create_category(name, conf)
@@ -37,11 +35,11 @@ module Treat::Workers::Categorizable
37
35
  conf.each_pair do |group, worker|
38
36
  name = group.to_s.cc.intern
39
37
  category.module_eval do
40
- @@methods = []; def methods;
41
- @@methods; end; def groups;
42
- self.constants; end
38
+ @@methods = []
39
+ def methods; @@methods; end
40
+ def groups; self.constants; end
43
41
  end
44
- self.create_group(name, worker, category)
42
+ create_group(name, worker, category)
45
43
  end
46
44
  end
47
45
 
@@ -54,24 +52,6 @@ module Treat::Workers::Categorizable
54
52
  @@lookup[group.method] = group
55
53
  end
56
54
 
57
- def bind_group_targets(group)
58
- group.targets.each do |entity_type|
59
- entity = Treat::Entities.
60
- const_get(entity_type.cc)
61
- entity.class_eval do
62
- add_workers group
63
- end
64
- end
65
- end
66
-
67
- def register_group_presets(group, conf)
68
- return unless conf.respond_to? :presets
69
- conf.presets.each do |m|
70
- @@methods << m
71
- @@lookup[m] = group
72
- end
73
- end
74
-
75
55
  def set_group_options(group, conf)
76
56
  group.module_eval do
77
57
  extend Treat::Workers::Groupable
@@ -91,5 +71,23 @@ module Treat::Workers::Categorizable
91
71
  end
92
72
  end
93
73
  end
94
-
74
+
75
+ def bind_group_targets(group)
76
+ group.targets.each do |entity_type|
77
+ entity = Treat::Entities.
78
+ const_get(entity_type.cc)
79
+ entity.class_eval do
80
+ add_workers group
81
+ end
82
+ end
83
+ end
84
+
85
+ def register_group_presets(group, conf)
86
+ return unless conf.respond_to?(:presets)
87
+ conf.presets.each do |method|
88
+ @@methods << method
89
+ @@lookup[method] = group
90
+ end
91
+ end
92
+
95
93
  end
@@ -16,32 +16,21 @@ class Treat::Workers::Extractors::NameTag::Stanford
16
16
 
17
17
  def self.name_tag(entity, options = {})
18
18
 
19
- pp = nil
20
-
21
19
  language = entity.language
22
-
23
20
  Treat::Loaders::Stanford.load(language)
24
21
 
25
22
  isolated_token = entity.is_a?(Treat::Entities::Token)
26
23
  tokens = isolated_token ? [entity] : entity.tokens
27
-
28
- ms = StanfordCoreNLP::Config::Models[:ner][language.intern]
29
- model_path = Treat.libraries.stanford.model_path ||
30
- (Treat.paths.models + '/stanford/')
31
- ms = model_path + '/' +
32
- StanfordCoreNLP::Config::ModelFolders[:ner] +
33
- ms['3class']
34
-
35
- @@classifiers[language] ||=
36
- StanfordCoreNLP::CRFClassifier.
37
- getClassifier(ms)
38
-
24
+
25
+ unless classifier = @@classifiers[language]
26
+ model = Treat::Loaders::Stanford.find_model(:ner, language)
27
+ classifier = StanfordCoreNLP::CRFClassifier.getClassifier(model)
28
+ @@classifiers[language] = classifier
29
+ end
30
+
39
31
  token_list = StanfordCoreNLP.get_list(tokens)
40
- sentence = @@classifiers[language].
41
- classify_sentence(token_list)
42
-
32
+ sentence = classifier.classify_sentence(token_list)
43
33
  i = 0
44
- n = 0
45
34
 
46
35
  sentence.each do |s_token|
47
36
  tag = s_token.get(:answer).to_s.downcase
@@ -49,14 +38,9 @@ class Treat::Workers::Extractors::NameTag::Stanford
49
38
  return tag if isolated_token
50
39
  if tag
51
40
  tokens[i].set :name_tag, tag
52
- n += 1
53
41
  end
54
42
  i += 1
55
43
  end
56
-
57
- entity.set :named_entity_count, n
58
-
59
- nil
60
44
 
61
45
  end
62
46
 
@@ -6,7 +6,7 @@
6
6
  # https://github.com/iterationlabs/ruby-readability
7
7
  class Treat::Workers::Formatters::Readers::HTML
8
8
 
9
- silence_warnings { require 'jruby-readability' }
9
+ silence_warnings { require 'ruby-readability' }
10
10
 
11
11
  # By default, don't backup the original HTML
12
12
  DefaultOptions = {
@@ -30,7 +30,7 @@ class Treat::Workers::Formatters::Readers::XML
30
30
  @@xml_reader ||= StanfordCoreNLP.load(
31
31
  :tokenize, :ssplit, :cleanxml)
32
32
 
33
- text = StanfordCoreNLP::Text.new(xml)
33
+ text = StanfordCoreNLP::Annotation.new(xml)
34
34
  @@xml_reader.annotate(text)
35
35
 
36
36
  text.get(:sentences).each do |sentence|
@@ -17,7 +17,7 @@ class Treat::Workers::Formatters::Unserializers::Mongo
17
17
 
18
18
  @@database ||= Mongo::Connection.
19
19
  new(Treat.databases.mongo.host).
20
- db(Treat.databases.mongo.db || db)
20
+ db(db || Treat.databases.mongo.db)
21
21
 
22
22
  supertype = Treat::Entities.const_get(
23
23
  entity.type.to_s.capitalize.intern).superclass.mn.downcase
@@ -5,7 +5,8 @@ module Treat::Workers::Groupable
5
5
  bits = self.ancestors[0].to_s.split('::')
6
6
  bits.collect! { |bit| bit.ucc }
7
7
  file = bits.join('/') + "/#{const.ucc}"
8
- if not File.readable?(Treat.paths.lib + "#{file}.rb")
8
+ path = Treat.paths.lib + "#{file}.rb"
9
+ if not File.readable?(path)
9
10
  raise Treat::Exception,
10
11
  "File '#{file}.rb' corresponding to " +
11
12
  "requested worker #{self}::#{const} " +
@@ -35,9 +35,9 @@ class Treat::Workers::Inflectors::Cardinalizers::Linguistics
35
35
  # More specific options when using :type => :ordinal:
36
36
  def self.cardinal(entity, options = {})
37
37
  options = DefaultOptions.merge(options)
38
- Treat::Loaders::Linguistics.
39
- load(options[:language]).
40
- numwords(entity.to_s, options)
38
+ lang = entity.language
39
+ code = Treat::Loaders::Linguistics.load(lang)
40
+ entity.to_s.send(code).numwords(options)
41
41
  end
42
42
 
43
43
  end
@@ -35,13 +35,15 @@ module Treat::Workers::Inflectors::Conjugators::Linguistics
35
35
 
36
36
  options = Forms[options[:form].to_s] if options[:form]
37
37
 
38
- klass = Treat::Loaders::Linguistics.load(entity.language)
38
+ code = Treat::Loaders::Linguistics.load(entity.language)
39
+ obj = entity.to_s.send(code)
40
+
39
41
  if options[:mode] == 'infinitive'
40
- silence_warnings { klass.infinitive(entity.to_s) }
42
+ obj.infinitive
41
43
  elsif options[:mode] == 'participle' && options[:tense] == 'present'
42
- silence_warnings { klass.present_participle(entity.to_s) }
44
+ obj.present_participle
43
45
  elsif options[:count] == 'plural' && options.size == 1
44
- silence_warnings { klass.plural_verb(entity.to_s) }
46
+ obj.plural_verb
45
47
  else
46
48
  raise Treat::Exception,
47
49
  'This combination of modes, tenses, persons ' +
@@ -17,34 +17,27 @@ class Treat::Workers::Inflectors::Declensors::Linguistics
17
17
 
18
18
  cat = entity.get(:category)
19
19
  return if cat && !POS.include?(cat)
20
+
20
21
  unless options[:count]
21
22
  raise Treat::Exception, 'Must supply ' +
22
23
  ':count option ("singular" or "plural").'
23
24
  end
24
-
25
- klass = Treat::Loaders::
26
- Linguistics.load(entity.language)
27
- string = entity.to_s
28
-
29
- if options[:count].to_s == 'plural'
30
- if (entity.has?(:category))
31
- result = ''
32
- silence_warnings do
33
- result = klass.send(
34
- :"plural_#{entity.category}",
35
- string)
36
- end
37
- return result
38
- else
39
- return klass.plural(string)
40
- end
41
25
 
42
- else
26
+ unless options[:count].to_s == 'plural'
43
27
  raise Treat::Exception,
44
28
  "Ruby Linguistics does not support " +
45
29
  "singularization of words."
46
30
  end
47
31
 
32
+ lang = entity.language
33
+ code = Treat::Loaders::Linguistics.load(lang)
34
+ obj = entity.to_s.send(code)
35
+
36
+ if cat = entity.get(:category)
37
+ method = "plural_#{cat}"
38
+ obj.send(method)
39
+ else; obj.plural; end
40
+
48
41
  end
49
42
 
50
43
  end
@@ -11,11 +11,11 @@ class Treat::Workers::Inflectors::Ordinalizers::Linguistics
11
11
 
12
12
  # Desribe a number in words in ordinal form, using the
13
13
  # 'linguistics' gem.
14
- def self.ordinal(number, options = {})
14
+ def self.ordinal(entity, options = {})
15
15
  options = DefaultOptions.merge(options)
16
- klass = Treat::Loaders::
17
- Linguistics.load(options[:language])
18
- klass.ordinate(number.to_s)
16
+ lang = entity.language
17
+ code = Treat::Loaders::Linguistics.load(lang)
18
+ entity.to_s.send(code).ordinate
19
19
  end
20
20
 
21
21
  end
@@ -35,7 +35,7 @@ class Treat::Workers::Lexicalizers::Sensers::Wordnet
35
35
  end
36
36
 
37
37
  if !options[:nym].is_a?(Symbol)
38
- options[:nym] == options[:nym].intern
38
+ options[:nym] = options[:nym].intern
39
39
  end
40
40
 
41
41
  if ![:synonyms, :antonyms,
@@ -1,15 +1,15 @@
1
- # POS tagging using (i) explicit use of both preceding
2
- # and following tag contexts via a dependency network
3
- # representation, (ii) broad use of lexical features,
4
- # including jointly conditioning on multiple consecutive
5
- # words, (iii) effective use of priors in conditional
6
- # loglinear models, and (iv) fine-grained modeling of
1
+ # POS tagging using (i) explicit use of both preceding
2
+ # and following tag contexts via a dependency network
3
+ # representation, (ii) broad use of lexical features,
4
+ # including jointly conditioning on multiple consecutive
5
+ # words, (iii) effective use of priors in conditional
6
+ # loglinear models, and (iv) fine-grained modeling of
7
7
  # unknown word features.
8
8
  #
9
9
  # Original paper: Toutanova, Manning, Klein and Singer.
10
- # 2003. Feature-Rich Part-of-Speech Tagging with a
11
- # Cyclic Dependency Network. In Proceedings of the
12
- # Conference of the North American Chapter of the
10
+ # 2003. Feature-Rich Part-of-Speech Tagging with a
11
+ # Cyclic Dependency Network. In Proceedings of the
12
+ # Conference of the North American Chapter of the
13
13
  # Association for Computational Linguistics.
14
14
  class Treat::Workers::Lexicalizers::Taggers::Stanford
15
15
 
@@ -21,6 +21,9 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
21
21
  :tagger_model => nil
22
22
  }
23
23
 
24
+ # Shortcut for gem config.
25
+ Config = StanfordCoreNLP::Config
26
+
24
27
  # Tag the word using one of the Stanford taggers.
25
28
  def self.tag(entity, options = {})
26
29
 
@@ -36,9 +39,9 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
36
39
  return 'P' if entity.is_a?(Treat::Entities::Phrase)
37
40
  return 'F' if entity.is_a?(Treat::Entities::Fragment)
38
41
  return 'G' if entity.is_a?(Treat::Entities::Group)
39
-
42
+
40
43
  # Handle options and initialize the tagger.
41
- lang = entity.language
44
+ lang = entity.language.intern
42
45
  init_tagger(lang) unless @@taggers[lang]
43
46
  options = get_options(options, lang)
44
47
  tokens, t_list = get_token_list(entity)
@@ -46,7 +49,7 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
46
49
  # Do the tagging.
47
50
  i = 0
48
51
  isolated_token = entity.is_a?(Treat::Entities::Token)
49
-
52
+
50
53
  @@taggers[lang].apply(t_list).each do |tok|
51
54
  tokens[i].set(:tag, tok.tag)
52
55
  tokens[i].set(:tag_set,
@@ -59,21 +62,20 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
59
62
 
60
63
  # Initialize the tagger for a language.
61
64
  def self.init_tagger(language)
62
- Treat::Loaders::Stanford.load(language)
63
- model = StanfordCoreNLP::Config::Models[:pos][language]
64
- model_path = Treat.libraries.stanford.model_path ||
65
- Treat.paths.models + 'stanford/'
66
- model = model_path + StanfordCoreNLP::
67
- Config::ModelFolders[:pos] + model
68
- @@taggers[language] ||=
69
- StanfordCoreNLP::MaxentTagger.new(model)
65
+ unless @@taggers[language]
66
+ Treat::Loaders::Stanford.load(language)
67
+ model = Treat::Loaders::Stanford.find_model(:pos,language)
68
+ tagger = StanfordCoreNLP::MaxentTagger.new(model)
69
+ @@taggers[language] = tagger
70
+ end
71
+ @@taggers[language]
70
72
  end
71
73
 
72
74
  # Handle the options for the tagger.
73
75
  def self.get_options(options, language)
74
76
  options = DefaultOptions.merge(options)
75
77
  if options[:tagger_model]
76
- ::StanfordCoreNLP.set_model('pos.model',
78
+ StanfordCoreNLP.set_model('pos.model',
77
79
  options[:tagger_model])
78
80
  end
79
81
  options[:tag_set] =
@@ -29,15 +29,14 @@ class Treat::Workers::Processors::Parsers::Stanford
29
29
  # instead of displaying it.
30
30
  def self.parse(entity, options = {})
31
31
 
32
- val = entity.to_s
33
- lang = entity.language
34
- init(lang, options)
32
+ val, lang = entity.to_s, entity.language
33
+ init(lang, options) unless @@parsers[lang]
35
34
 
36
35
  entity.check_hasnt_children
37
36
 
38
37
  tag_set = StanfordCoreNLP::Config::TagSets[lang]
39
38
 
40
- text = ::StanfordCoreNLP::Text.new(val)
39
+ text = ::StanfordCoreNLP::Annotation.new(val)
41
40
  @@parsers[lang].annotate(text)
42
41
 
43
42
  text.get(:sentences).each do |s|
@@ -50,7 +49,7 @@ class Treat::Workers::Processors::Parsers::Stanford
50
49
  entity.set :tag, tag_s
51
50
  entity.set :tag_opt, tag_opt if tag_opt
52
51
  recurse(s.get(:tree).children[0], entity, tag_set)
53
- break #######
52
+ break ####### ? FIX
54
53
  else
55
54
  recurse(s.get(:tree), entity, tag_set)
56
55
  end
@@ -62,26 +61,17 @@ class Treat::Workers::Processors::Parsers::Stanford
62
61
  end
63
62
 
64
63
  def self.init(lang, options)
65
- return if @@parsers[lang]
66
-
67
64
  Treat::Loaders::Stanford.load(lang)
68
-
69
65
  options = DefaultOptions.merge(options)
70
- StanfordCoreNLP.use(lang)
66
+ StanfordCoreNLP.use(lang.intern)
71
67
  if options[:tagger_model]
72
- ::StanfordCoreNLP.set_model(
73
- 'pos.model', options[:tagger_model]
74
- )
68
+ StanfordCoreNLP.set_model('pos.model', options[:tagger_model])
75
69
  end
76
70
  if options[:parser_model]
77
- ::StanfordCoreNLP.set_model(
78
- 'parser.model', options[:parser_model]
79
- )
71
+ StanfordCoreNLP.set_model('parser.model', options[:parser_model])
80
72
  end
81
- @@parsers[lang] ||=
82
- ::StanfordCoreNLP.load(
83
- :tokenize, :ssplit, :pos, :lemma, :parse
84
- )
73
+ annotators = [:tokenize, :ssplit, :pos, :lemma, :parse]
74
+ @@parsers[lang] = StanfordCoreNLP.load(*annotators)
85
75
  end
86
76
 
87
77
  # Helper method which recurses the tree supplied by
@@ -128,7 +118,7 @@ class Treat::Workers::Processors::Parsers::Stanford
128
118
  l = java_child.children[0].to_s
129
119
  v = java_child.children[0].value.to_s.strip
130
120
 
131
- # Mhmhmhmhmhm
121
+ # Mhmhmhmhmhm FIX!
132
122
  val = (l == v) ? v : l.split(' ')[-1].gsub(')', '')
133
123
  ruby_child = Treat::Entities::Token.from_string(val)
134
124
  end
@@ -32,7 +32,7 @@ class Treat::Workers::Processors::Segmenters::Stanford
32
32
  ::StanfordCoreNLP.load(:tokenize, :ssplit)
33
33
 
34
34
  s = entity.to_s
35
- text = ::StanfordCoreNLP::Text.new(s)
35
+ text = ::StanfordCoreNLP::Annotation.new(s)
36
36
 
37
37
  @@segmenter.annotate(text)
38
38
  text.get(:sentences).each do |sentence|
@@ -0,0 +1,29 @@
1
+ # Maximum entropy tokenization supplied by OpenNLP.
2
+ class Treat::Workers::Processors::Tokenizers::Maxent
3
+
4
+ require 'open-nlp'
5
+ OpenNLP.load
6
+
7
+ # Maximum entropy tokenization.
8
+ def self.tokenize(entity, options = {})
9
+
10
+ lang = entity.language
11
+ str = entity.to_s
12
+
13
+ unless @@tokenizers[lang]
14
+ OpenNLP.use(lang.intern)
15
+ @@tokenizers[lang] =
16
+ OpenNLP::TokenizerME.new
17
+ end
18
+
19
+ tokenizer = @@tokenizers[lang]
20
+ tokens = tokenizer.tokenize(str).to_a
21
+
22
+ tokens.each do |token|
23
+ entity << Treat::Entities
24
+ ::Token.from_string(chunk)
25
+ end
26
+
27
+ end
28
+
29
+ end
@@ -27,11 +27,9 @@ class Treat::Workers::Processors::Tokenizers::Stanford
27
27
  # replacing "..." by ``...''. Off by default.
28
28
  def self.tokenize(entity, options = {})
29
29
  options = DefaultOptions.merge(options)
30
- @@tokenizer ||=
31
- ::StanfordCoreNLP.load(:tokenize)
30
+ @@tokenizer ||= StanfordCoreNLP.load(:tokenize)
32
31
  entity.check_hasnt_children
33
- text = ::StanfordCoreNLP::
34
- Text.new(entity.to_s)
32
+ text = ::StanfordCoreNLP::Annotation.new(entity.to_s)
35
33
  @@tokenizer.annotate(text)
36
34
  add_tokens(entity, text.get(:tokens), options)
37
35
  end
data/lib/treat.rb CHANGED
@@ -18,5 +18,6 @@ module Treat
18
18
  require_relative 'treat/exception'
19
19
  require_relative 'treat/autoload'
20
20
  require_relative 'treat/modules'
21
+ require_relative 'treat/builder'
21
22
 
22
23
  end
data/spec/helper.rb CHANGED
@@ -1,20 +1,23 @@
1
1
  require_relative '../lib/treat'
2
2
 
3
+ include Treat::Core::DSL
4
+
3
5
  module Treat::Specs
4
6
 
5
7
  require 'rspec'
6
8
 
7
9
  # Some configuration options for devel.
10
+
8
11
  Treat.databases.mongo.db = 'treat_test'
9
12
  Treat.libraries.stanford.model_path =
10
- '/ruby/stanford/stanford-core-nlp-all/'
13
+ '/ruby/stanford-core-nlp-minimal/models/'
11
14
  Treat.libraries.stanford.jar_path =
12
- '/ruby/stanford/stanford-core-nlp-all/'
15
+ '/ruby/stanford-core-nlp-minimal/bin/'
13
16
  Treat.libraries.punkt.model_path =
14
- '/ruby/punkt/'
17
+ '/ruby/punkt/models/'
15
18
  Treat.libraries.reuters.model_path =
16
- '/ruby/reuters/'
17
-
19
+ '/ruby/reuters/models/'
20
+
18
21
  ModuleFiles = ['entities/*.rb', 'learning/*.rb']
19
22
 
20
23
  # Provide helper functions for running specs.
@@ -25,7 +28,6 @@ module Treat::Specs
25
28
  require 'simplecov'
26
29
  SimpleCov.start do
27
30
  add_filter '/spec/'
28
- add_filter '/config/'
29
31
  add_group 'Core', 'treat/core'
30
32
  add_group 'Entities', 'treat/entities'
31
33
  add_group 'Helpers', 'treat/helpers'
data/spec/sandbox.rb CHANGED
@@ -1,13 +1,25 @@
1
1
  # encoding: utf-8
2
2
  require_relative '../lib/treat'
3
3
 
4
- require 'treat'
5
- include Treat::Core::DSL
6
-
7
- collection Treat.paths.spec + '/workers/examples/english/economist'
8
- collection.apply :chunk, :segment, :tokenize
9
- puts collection.topic_words.inspect
4
+ Treat.databases.mongo.db = 'treat_test'
5
+ Treat.libraries.stanford.model_path =
6
+ '/ruby/stanford-core-nlp-minimal/models/'
7
+ Treat.libraries.stanford.jar_path =
8
+ '/ruby/stanford-core-nlp-minimal/bin/'
9
+ Treat.libraries.punkt.model_path =
10
+ '/ruby/punkt/models/'
11
+ Treat.libraries.reuters.model_path =
12
+ '/ruby/reuters/models/'
13
+
14
+ # include Treat::Core::DSL
15
+
16
+ Treat::Builder.new do
17
+ s = sentence "Hello, world!"
18
+ s.print_tree
19
+ end
10
20
 
21
+ p = paragraph('A walk in the park. A trip on a boat.').segment
22
+ p.visualize :dot, file: 'test.dot'
11
23
  =begin
12
24
 
13
25
  g = group("I was running")