treat 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -0
- data/lib/treat/config/data/core.rb +3 -1
- data/lib/treat/config/data/languages/agnostic.rb +1 -1
- data/lib/treat/core/dsl.rb +12 -44
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/extractors/name_tag/stanford.rb +1 -1
- data/lib/treat/workers/extractors/topic_words/lda.rb +1 -1
- data/lib/treat/workers/formatters/readers/autoselect.rb +3 -1
- data/lib/treat/workers/formatters/readers/html.rb +4 -2
- data/lib/treat/workers/formatters/serializers/xml.rb +1 -1
- data/lib/treat/workers/groupable.rb +1 -3
- data/lib/treat/workers/lexicalizers/categorizers/from_tag.rb +3 -2
- data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +12 -2
- data/lib/treat/workers/lexicalizers/taggers/brill.rb +2 -1
- data/lib/treat/workers/lexicalizers/taggers/lingua.rb +3 -1
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -5
- data/spec/entities/collection.rb +2 -2
- data/spec/entities/entity.rb +4 -4
- data/spec/helper.rb +16 -68
- data/spec/{core → learning}/data_set.rb +0 -0
- data/spec/{core → learning}/export.rb +0 -0
- data/spec/{core → learning}/problem.rb +0 -0
- data/spec/{core → learning}/question.rb +0 -0
- data/spec/sandbox.rb +14 -3
- data/spec/workers/agnostic.rb +80 -30
- data/spec/workers/english.rb +475 -190
- metadata +6 -11
- data/files/21552208.html +0 -792
- data/files/nethttp-cheat-sheet-2940.html +0 -392
- data/lib/treat/config/data/config.rb +0 -50
- data/spec/workers/language.rb +0 -280
- data/spec/workers.rb +0 -28
data/README.md
CHANGED
@@ -20,6 +20,17 @@ Treat is a toolkit for natural language processing and computational linguistics
|
|
20
20
|
|
21
21
|
I am actively seeking developers that can help maintain and expand this project. You can find a list of ideas for contributing to the project [here](https://github.com/louismullie/treat/wiki/Contributing).
|
22
22
|
|
23
|
+
**Authors**
|
24
|
+
|
25
|
+
Lead developper: @louismullie [[Twitter](https://twitter.com/LouisMullie)]
|
26
|
+
|
27
|
+
Contributors:
|
28
|
+
- @bdigital
|
29
|
+
- @automatedtendencies
|
30
|
+
- @LeFnord
|
31
|
+
- @darkphantum
|
32
|
+
- @whistlerbrk
|
33
|
+
|
23
34
|
**License**
|
24
35
|
|
25
36
|
This software is released under the [GPL License](https://github.com/louismullie/treat/wiki/License-Information) and includes software released under the GPL, Ruby, Apache 2.0 and MIT licenses.
|
@@ -3,7 +3,7 @@
|
|
3
3
|
'nokogiri', 'ferret',
|
4
4
|
'bson_ext', 'mongo', 'lda-ruby',
|
5
5
|
'stanford-core-nlp', 'linguistics',
|
6
|
-
'
|
6
|
+
'jruby-readability', 'whatlanguage',
|
7
7
|
'chronic', 'nickel', 'decisiontree',
|
8
8
|
'rb-libsvm', 'ruby-fann', 'zip',
|
9
9
|
'tf-idf-similarity', 'narray'
|
data/lib/treat/core/dsl.rb
CHANGED
@@ -1,53 +1,21 @@
|
|
1
1
|
module Treat::Core::DSL
|
2
2
|
|
3
|
-
# Message for deprecation of old DSL syntax.
|
4
|
-
DeprecationMessage = "The DSL that used " +
|
5
|
-
"capitalized entity names is now deprecated. " +
|
6
|
-
"Use `include Treat::Core::DSL` along with " +
|
7
|
-
"lowercase names from now on."
|
8
|
-
|
9
3
|
# Map all classes in Treat::Entities to
|
10
4
|
# a global builder function (entity, word,
|
11
5
|
# phrase, punctuation, symbol, list, etc.)
|
12
6
|
def self.included(base)
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
define_method(mname.capitalize) do |*args|
|
26
|
-
raise DeprecationMessage
|
27
|
-
end
|
28
|
-
old_mm = instance_method(:method_missing)
|
29
|
-
define_method(:method_missing) do |sym,*args,&block|
|
30
|
-
return klass.build(*args) if sym == mname
|
31
|
-
old_mm.bind(self).call(sym,*args,&block)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
# Map all classes in the Learning module
|
38
|
-
# to a global builder function. Defines:
|
39
|
-
# dataset, export, feature, tag, problem
|
40
|
-
# question.
|
41
|
-
def self.sweeten_learning(base, on = true)
|
42
|
-
Treat::Learning.constants.each do |kname|
|
43
|
-
mname = kname.downcase
|
44
|
-
klass = Treat::Learning.const_get(kname)
|
45
|
-
base.class_eval do
|
46
|
-
old_mm = instance_method(:method_missing)
|
47
|
-
define_method(:method_missing) do |sym,*args,&block|
|
48
|
-
return klass.new(*args) if sym == mname
|
49
|
-
old_mm.bind(self).call(sym,*args,&block)
|
50
|
-
end
|
7
|
+
def method_missing(sym,*args,&block)
|
8
|
+
@@entities ||= Treat.core.entities.list
|
9
|
+
@@learning ||= Treat.core.learning.list
|
10
|
+
if @@entities.include?(sym)
|
11
|
+
klass = Treat::Entities.const_get(sym.cc)
|
12
|
+
return klass.build(*args)
|
13
|
+
elsif @@learning.include?(sym)
|
14
|
+
klass = Treat::Learning.const_get(sym.cc)
|
15
|
+
return klass.new(*args)
|
16
|
+
else
|
17
|
+
super(sym,*args,&block)
|
18
|
+
raise "Uncaught method ended up in Treat DSL."
|
51
19
|
end
|
52
20
|
end
|
53
21
|
end
|
data/lib/treat/version.rb
CHANGED
@@ -25,7 +25,7 @@ class Treat::Workers::Extractors::NameTag::Stanford
|
|
25
25
|
isolated_token = entity.is_a?(Treat::Entities::Token)
|
26
26
|
tokens = isolated_token ? [entity] : entity.tokens
|
27
27
|
|
28
|
-
ms = StanfordCoreNLP::Config::Models[:ner][language]
|
28
|
+
ms = StanfordCoreNLP::Config::Models[:ner][language.intern]
|
29
29
|
model_path = Treat.libraries.stanford.model_path ||
|
30
30
|
(Treat.paths.models + '/stanford/')
|
31
31
|
ms = model_path + '/' +
|
@@ -12,7 +12,9 @@ class Treat::Workers::Formatters::Readers::Autoselect
|
|
12
12
|
# - (Symbol) :default_to => format to default to.
|
13
13
|
def self.read(document, options = {})
|
14
14
|
options = DefaultOptions.merge(options)
|
15
|
-
|
15
|
+
fmt = detect_format(document.file, options[:default_to])
|
16
|
+
Treat::Workers::Formatters::Readers.
|
17
|
+
const_get(fmt.cc).read(document,options)
|
16
18
|
end
|
17
19
|
|
18
20
|
def self.detect_format(filename, default_to = nil)
|
@@ -6,12 +6,13 @@
|
|
6
6
|
# https://github.com/iterationlabs/ruby-readability
|
7
7
|
class Treat::Workers::Formatters::Readers::HTML
|
8
8
|
|
9
|
-
silence_warnings { require '
|
9
|
+
silence_warnings { require 'jruby-readability' }
|
10
10
|
|
11
11
|
# By default, don't backup the original HTML
|
12
12
|
DefaultOptions = {
|
13
13
|
:keep_html => false,
|
14
|
-
:tags => %w[p div h1 h2 h3 ul ol dl dt li]
|
14
|
+
:tags => %w[p div h1 h2 h3 ul ol dl dt li img],
|
15
|
+
|
15
16
|
}
|
16
17
|
|
17
18
|
# Read the HTML document and strip it of its markup.
|
@@ -46,6 +47,7 @@ class Treat::Workers::Formatters::Readers::HTML
|
|
46
47
|
d = Readability::Document.new(html, options)
|
47
48
|
document.value = "<h1>#{d.title}</h1>\n" + d.content
|
48
49
|
document.set :format, 'html'
|
50
|
+
document.set :images, d.images
|
49
51
|
end
|
50
52
|
|
51
53
|
document
|
@@ -69,9 +69,7 @@ module Treat::Workers::Groupable
|
|
69
69
|
|
70
70
|
# Get constants in this module, excluding by
|
71
71
|
# default those defined by parent modules.
|
72
|
-
def const_get(const)
|
73
|
-
super(const, false)
|
74
|
-
end
|
72
|
+
def const_get(const); super(const, false); end
|
75
73
|
|
76
74
|
# Modify the extended class.
|
77
75
|
def self.extended(group)
|
@@ -28,8 +28,9 @@ class Treat::Workers::Lexicalizers::Categorizers::FromTag
|
|
28
28
|
|
29
29
|
tag = entity.check_has(:tag)
|
30
30
|
|
31
|
-
return 'unknown' if tag.nil? || tag == ''
|
32
|
-
return '
|
31
|
+
return 'unknown' if tag.nil? || tag == ''
|
32
|
+
return 'fragment' if tag == 'F'
|
33
|
+
return 'sentence' if tag == 'S'
|
33
34
|
return 'number' if entity.type == :number
|
34
35
|
|
35
36
|
return Ptc[entity.to_s] if entity.type == :punctuation
|
@@ -29,9 +29,19 @@ class Treat::Workers::Lexicalizers::Sensers::Wordnet
|
|
29
29
|
|
30
30
|
category = word.check_has(:category)
|
31
31
|
|
32
|
-
|
32
|
+
if !options[:nym]
|
33
33
|
raise Treat::Exception, "You must supply " +
|
34
|
-
"the :nym option (
|
34
|
+
"the :nym option ('synonyms', 'hypernyms', etc.)"
|
35
|
+
end
|
36
|
+
|
37
|
+
if !options[:nym].is_a?(Symbol)
|
38
|
+
options[:nym] == options[:nym].intern
|
39
|
+
end
|
40
|
+
|
41
|
+
if ![:synonyms, :antonyms,
|
42
|
+
:hypernyms, :hyponyms].include?(options[:nym])
|
43
|
+
raise Treat::Exception, "You must supply " +
|
44
|
+
"a valid :nym option ('synonyms', 'hypernyms', etc.)"
|
35
45
|
end
|
36
46
|
|
37
47
|
unless ['noun', 'adjective', 'verb'].
|
@@ -47,7 +47,8 @@ class Treat::Workers::Lexicalizers::Taggers::Brill
|
|
47
47
|
|
48
48
|
return 'S' if entity.is_a?(Treat::Entities::Sentence)
|
49
49
|
return 'P' if entity.is_a?(Treat::Entities::Phrase)
|
50
|
-
|
50
|
+
return 'F' if entity.is_a?(Treat::Entities::Fragment)
|
51
|
+
return 'G' if entity.is_a?(Treat::Entities::Group)
|
51
52
|
end
|
52
53
|
|
53
54
|
end
|
@@ -65,9 +65,11 @@ class Treat::Workers::Lexicalizers::Taggers::Lingua
|
|
65
65
|
!entity.parent_sentence
|
66
66
|
entity.set :tag_set, :penn
|
67
67
|
end
|
68
|
-
|
68
|
+
|
69
69
|
return 'S' if entity.is_a?(Treat::Entities::Sentence)
|
70
70
|
return 'P' if entity.is_a?(Treat::Entities::Phrase)
|
71
|
+
return 'F' if entity.is_a?(Treat::Entities::Fragment)
|
72
|
+
return 'G' if entity.is_a?(Treat::Entities::Group)
|
71
73
|
|
72
74
|
end
|
73
75
|
|
@@ -32,11 +32,10 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
32
32
|
entity.set :tag_set, tag_set
|
33
33
|
end
|
34
34
|
|
35
|
-
if entity.is_a?(Treat::Entities::Sentence)
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
end
|
35
|
+
return 'S' if entity.is_a?(Treat::Entities::Sentence)
|
36
|
+
return 'P' if entity.is_a?(Treat::Entities::Phrase)
|
37
|
+
return 'F' if entity.is_a?(Treat::Entities::Fragment)
|
38
|
+
return 'G' if entity.is_a?(Treat::Entities::Group)
|
40
39
|
|
41
40
|
# Handle options and initialize the tagger.
|
42
41
|
lang = entity.language
|
data/spec/entities/collection.rb
CHANGED
@@ -15,7 +15,7 @@ describe Treat::Entities::Collection do
|
|
15
15
|
it "recursively searches the folder for " +
|
16
16
|
"files and opens them into a collection of documents" do
|
17
17
|
collection = Treat::Entities::Collection.build(@file)
|
18
|
-
collection.size.should eql
|
18
|
+
collection.size.should eql 6
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
@@ -40,7 +40,7 @@ describe Treat::Entities::Collection do
|
|
40
40
|
f = Treat.paths.spec + 'workers/examples/english/economist'
|
41
41
|
c = Treat::Entities::Collection.build(f)
|
42
42
|
c << Treat::Entities::Document.new
|
43
|
-
c.size.should eql
|
43
|
+
c.size.should eql 4
|
44
44
|
end
|
45
45
|
|
46
46
|
end
|
data/spec/entities/entity.rb
CHANGED
@@ -391,11 +391,11 @@ describe Treat::Entities::Entity do
|
|
391
391
|
context "when language detection is disabled " +
|
392
392
|
"(Treat.core.detect is set to false)" do
|
393
393
|
it "returns the default language (Treat.core.language.default)" do
|
394
|
-
|
395
|
-
|
394
|
+
Treat.core.language.detect = false
|
395
|
+
Treat.core.language.default = :test
|
396
396
|
s = 'Les grands hommes ne sont pas toujours grands, dit un jour Napoleon.'
|
397
|
-
|
398
|
-
|
397
|
+
s.language.should eql :test
|
398
|
+
Treat.core.language.default = :english
|
399
399
|
end
|
400
400
|
end
|
401
401
|
|
data/spec/helper.rb
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
require_relative '../lib/treat'
|
2
|
+
|
2
3
|
module Treat::Specs
|
3
4
|
|
4
|
-
# Require the worker specs.
|
5
|
-
require_relative 'workers'
|
6
|
-
# Require RSpec library.
|
7
5
|
require 'rspec'
|
8
|
-
# Require Ruby benchmark library.
|
9
|
-
require 'benchmark'
|
10
|
-
# Require gem to build ASCII tables.
|
11
6
|
|
12
7
|
# Some configuration options for devel.
|
13
8
|
Treat.databases.mongo.db = 'treat_test'
|
@@ -20,30 +15,11 @@ module Treat::Specs
|
|
20
15
|
Treat.libraries.reuters.model_path =
|
21
16
|
'/ruby/reuters/'
|
22
17
|
|
18
|
+
ModuleFiles = ['entities/*.rb', 'learning/*.rb']
|
19
|
+
|
23
20
|
# Provide helper functions for running specs.
|
24
21
|
class Helper
|
25
22
|
|
26
|
-
ModuleFiles = [
|
27
|
-
'./spec/core/*.rb',
|
28
|
-
'./spec/entities/*.rb'
|
29
|
-
]
|
30
|
-
|
31
|
-
# Run all worker example files as :specs
|
32
|
-
# or :benchmarks for the given language.
|
33
|
-
def self.run_examples_as(what, language)
|
34
|
-
self.require_language_files(language)
|
35
|
-
Treat::Specs::Workers::Language.
|
36
|
-
list.each { |l| l.new(what).run }
|
37
|
-
RSpec::Core::CommandLine.new([]).run($stderr, $stdout)
|
38
|
-
end
|
39
|
-
|
40
|
-
# Run specs for the core classes.
|
41
|
-
def self.run_core_specs
|
42
|
-
RSpec::Core::Runner.run(
|
43
|
-
ModuleFiles.map { |d| Dir.glob(d) },
|
44
|
-
$stderr, $stdout)
|
45
|
-
end
|
46
|
-
|
47
23
|
# Start SimpleCov coverage.
|
48
24
|
def self.start_coverage
|
49
25
|
require 'simplecov'
|
@@ -61,56 +37,28 @@ module Treat::Specs
|
|
61
37
|
end
|
62
38
|
end
|
63
39
|
|
40
|
+
# Run specs for the core classes.
|
41
|
+
def self.run_core_specs
|
42
|
+
files = ModuleFiles.map do |d|
|
43
|
+
Dir.glob(Treat.paths.spec + d)
|
44
|
+
end
|
45
|
+
RSpec::Core::Runner.run(files)
|
46
|
+
end
|
47
|
+
|
64
48
|
# Require language files based on the argument.
|
65
|
-
def self.
|
66
|
-
# Require the base language class.
|
67
|
-
require_relative 'workers/language'
|
49
|
+
def self.run_language_specs(lang)
|
68
50
|
# If no language supplied, get all languages.
|
69
|
-
if !
|
51
|
+
if !lang || lang == ''
|
70
52
|
pattern = "./spec/workers/*.rb"
|
71
53
|
# Otherwise, get a specific language file.
|
72
54
|
else
|
73
|
-
pattern = "./spec/workers/#{
|
74
|
-
# Check if a spec file exists.
|
55
|
+
pattern = "./spec/workers/#{lang}.rb"
|
75
56
|
unless File.readable?(pattern)
|
76
57
|
raise Treat::Exception,
|
77
|
-
"There are no examples for '#{
|
78
|
-
end
|
79
|
-
end
|
80
|
-
# Require all files matched by the pattern.
|
81
|
-
Dir.glob(pattern).each { |f| require f }
|
82
|
-
end
|
83
|
-
|
84
|
-
def self.text_table(headings, rows)
|
85
|
-
require 'terminal-table'
|
86
|
-
puts Terminal::Table.new(
|
87
|
-
headings: headings, rows: rows)
|
88
|
-
end
|
89
|
-
|
90
|
-
def self.html_table(headings, rows)
|
91
|
-
require 'fileutils'
|
92
|
-
html = "<table>\n"
|
93
|
-
html += "<tr>\n"
|
94
|
-
headings.each do |heading|
|
95
|
-
html += "<td>" + heading + "</td>\n"
|
96
|
-
end
|
97
|
-
html += "</tr>\n"
|
98
|
-
rows.each do |row|
|
99
|
-
html += "<tr>\n"
|
100
|
-
row.each do |el|
|
101
|
-
html += "<td>#{el}</td>"
|
58
|
+
"There are no examples for '#{lang}'."
|
102
59
|
end
|
103
|
-
html += "</tr>\n"
|
104
|
-
end
|
105
|
-
self.write_html('benchmark', html)
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.write_html(dir, html)
|
109
|
-
unless FileTest.directory?(dir)
|
110
|
-
FileUtils.mkdir('./' + dir)
|
111
60
|
end
|
112
|
-
|
113
|
-
File.open(fn, 'w+') { |f| f.write(html) }
|
61
|
+
RSpec::Core::Runner.run(Dir.glob(pattern))
|
114
62
|
end
|
115
63
|
|
116
64
|
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/spec/sandbox.rb
CHANGED
@@ -4,6 +4,15 @@ require_relative '../lib/treat'
|
|
4
4
|
require 'treat'
|
5
5
|
include Treat::Core::DSL
|
6
6
|
|
7
|
+
collection Treat.paths.spec + '/workers/examples/english/economist'
|
8
|
+
collection.apply :chunk, :segment, :tokenize
|
9
|
+
puts collection.topic_words.inspect
|
10
|
+
|
11
|
+
=begin
|
12
|
+
|
13
|
+
g = group("I was running")
|
14
|
+
puts g.tag.inspect
|
15
|
+
|
7
16
|
Treat.libraries.stanford.jar_path = '/ruby/treat/bin/'
|
8
17
|
Treat.libraries.stanford.model_path = '/ruby/treat/models/'
|
9
18
|
|
@@ -14,6 +23,8 @@ w = word
|
|
14
23
|
p = phrase 'hello world'
|
15
24
|
e = email 'louis@gmail.com'
|
16
25
|
|
26
|
+
d = question(:is_feature, :word)
|
27
|
+
=end
|
17
28
|
#d = document Treat.paths.spec + 'workers/examples/english/economist/hungarys_troubles.txt'
|
18
29
|
#d.apply :chunk, :segment, :tokenize, :tag, :category, :name_tag
|
19
30
|
#d.print_tree
|
@@ -266,6 +277,6 @@ sect = section title(phra), para
|
|
266
277
|
=begin
|
267
278
|
puts "beer".plural.inspect
|
268
279
|
=end
|
269
|
-
Treat.core.language.detect = true
|
270
|
-
s = sentence "Du hast deiner Frau einen roten Ring gekauft."
|
271
|
-
s.apply(:parse,:category).print_tree
|
280
|
+
# Treat.core.language.detect = true
|
281
|
+
# s = sentence "Du hast deiner Frau einen roten Ring gekauft."
|
282
|
+
#s.apply(:parse,:category).print_tree
|
data/spec/workers/agnostic.rb
CHANGED
@@ -1,3 +1,80 @@
|
|
1
|
+
$workers = Treat.languages.agnostic.workers
|
2
|
+
|
3
|
+
describe Treat::Workers::Extractors::Language do
|
4
|
+
before do
|
5
|
+
@entities = ["Obama and Sarkozy will meet in Berlin."]
|
6
|
+
@languages = ["english"]
|
7
|
+
end
|
8
|
+
context "when called on any textual entity" do
|
9
|
+
it "returns the language of the entity" do
|
10
|
+
# Treat.core.language.detect = true
|
11
|
+
$workers.extractors.language.each do |extractor|
|
12
|
+
@entities.map(&:language).should eql @languages
|
13
|
+
end
|
14
|
+
# Treat.core.language.detect = false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe Treat::Workers::Formatters::Serializers do
|
20
|
+
before do
|
21
|
+
@texts = ["A test entity"]
|
22
|
+
end
|
23
|
+
context "when #serialize is called on any textual entity" do
|
24
|
+
it "serializes the entity to disk and returns a pointer to the location" do
|
25
|
+
# m = Treat::Entities::Entity.build
|
26
|
+
@texts.map(&:to_entity).map(&:serialize)
|
27
|
+
.map(&method(:entity)).map(&:to_s).should eql @texts
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe Treat::Workers::Formatters::Unserializers do
|
33
|
+
before do
|
34
|
+
@texts = ["A te"]
|
35
|
+
end
|
36
|
+
context "when #unserialize is called with a selector on any textual entity" do
|
37
|
+
it "unserializes the file and loads it in the entity" do
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
visualize: {
|
44
|
+
entity: {
|
45
|
+
examples: {
|
46
|
+
standoff: [
|
47
|
+
["I walked to the store.", "(S\n (PRP I) (VBD walked) (TO to) (DT the) (NN store) (. .))\n"]
|
48
|
+
],
|
49
|
+
tree: [
|
50
|
+
["I walked to the store.", "+ Sentence (*) --- \"I walked to the store.\" --- {} --- [] \n|\n+--> Word (*) --- \"I\" --- {} --- [] \n+--> Word (*) --- \"walked\" --- {} --- [] \n+--> Word (*) --- \"to\" --- {} --- [] \n+--> Word (*) --- \"the\" --- {} --- [] \n+--> Word (*) --- \"store\" --- {} --- [] \n+--> Punctuation (*) --- \".\" --- {} --- [] "]
|
51
|
+
],
|
52
|
+
dot: [
|
53
|
+
["I walked to the store.", "graph {\n* [label=\"Sentence\\n\\\"I walked to the store.\\\"\",color=\"\"]\n* [label=\"Word\\n\\\"I\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"walked\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"to\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"the\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"store\\\"\",color=\"\"]\n* -- *;\n* [label=\"Punctuation\\n\\\".\\\"\",color=\"\"]\n* -- *;\n}"]
|
54
|
+
]
|
55
|
+
},
|
56
|
+
preprocessor: lambda { |entity| entity.tokenize },
|
57
|
+
generator: lambda { |result| result.gsub(/[0-9]+/, '*') }
|
58
|
+
}
|
59
|
+
},
|
60
|
+
|
61
|
+
|
62
|
+
describe Treat::Workers::Formatters::Visualizers do
|
63
|
+
before do
|
64
|
+
@texts = ["I walked to the store."]
|
65
|
+
end
|
66
|
+
describe "when #visualize is called with the :dot worker" do
|
67
|
+
|
68
|
+
end
|
69
|
+
describe "when #visualize is called with the :tree worker" do
|
70
|
+
|
71
|
+
end
|
72
|
+
describe "when #visualize is called with the :dot worker" do
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
=begin
|
1
78
|
class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
2
79
|
|
3
80
|
# TODO: :tf_idf, :keywords, :classifiers
|
@@ -5,15 +82,6 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
5
82
|
|
6
83
|
Scenarios = {
|
7
84
|
|
8
|
-
# Also tests unserialize.
|
9
|
-
serialize: {
|
10
|
-
entity: {
|
11
|
-
examples: [
|
12
|
-
["A test entity.", "A test entity."]
|
13
|
-
],
|
14
|
-
generator: lambda { |selector| Treat::Entities::Entity.build(selector).to_s }
|
15
|
-
}
|
16
|
-
},
|
17
85
|
classify: {
|
18
86
|
entity: {
|
19
87
|
examples: [
|
@@ -39,23 +107,6 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
39
107
|
end
|
40
108
|
}
|
41
109
|
},
|
42
|
-
visualize: {
|
43
|
-
entity: {
|
44
|
-
examples: {
|
45
|
-
standoff: [
|
46
|
-
["I walked to the store.", "(S\n (PRP I) (VBD walked) (TO to) (DT the) (NN store) (. .))\n"]
|
47
|
-
],
|
48
|
-
tree: [
|
49
|
-
["I walked to the store.", "+ Sentence (*) --- \"I walked to the store.\" --- {} --- [] \n|\n+--> Word (*) --- \"I\" --- {} --- [] \n+--> Word (*) --- \"walked\" --- {} --- [] \n+--> Word (*) --- \"to\" --- {} --- [] \n+--> Word (*) --- \"the\" --- {} --- [] \n+--> Word (*) --- \"store\" --- {} --- [] \n+--> Punctuation (*) --- \".\" --- {} --- [] "]
|
50
|
-
],
|
51
|
-
dot: [
|
52
|
-
["I walked to the store.", "graph {\n* [label=\"Sentence\\n\\\"I walked to the store.\\\"\",color=\"\"]\n* [label=\"Word\\n\\\"I\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"walked\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"to\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"the\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"store\\\"\",color=\"\"]\n* -- *;\n* [label=\"Punctuation\\n\\\".\\\"\",color=\"\"]\n* -- *;\n}"]
|
53
|
-
]
|
54
|
-
},
|
55
|
-
preprocessor: lambda { |entity| entity.tokenize },
|
56
|
-
generator: lambda { |result| result.gsub(/[0-9]+/, '*') }
|
57
|
-
}
|
58
|
-
},
|
59
110
|
|
60
111
|
=begin
|
61
112
|
keywords: {
|
@@ -103,8 +154,7 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
103
154
|
preprocessor: lambda { |coll| coll.apply(:index) }
|
104
155
|
},
|
105
156
|
},
|
106
|
-
|
107
|
-
=begin
|
157
|
+
|
108
158
|
keywords: {
|
109
159
|
document: {
|
110
160
|
examples: [
|
@@ -124,7 +174,7 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
124
174
|
]
|
125
175
|
}
|
126
176
|
},
|
127
|
-
|
177
|
+
|
128
178
|
topic_words: {
|
129
179
|
collection: {
|
130
180
|
examples: [
|
@@ -134,4 +184,4 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
134
184
|
}
|
135
185
|
}
|
136
186
|
|
137
|
-
end
|
187
|
+
=end
|