treat 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -0
- data/lib/treat/config/data/core.rb +3 -1
- data/lib/treat/config/data/languages/agnostic.rb +1 -1
- data/lib/treat/core/dsl.rb +12 -44
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/extractors/name_tag/stanford.rb +1 -1
- data/lib/treat/workers/extractors/topic_words/lda.rb +1 -1
- data/lib/treat/workers/formatters/readers/autoselect.rb +3 -1
- data/lib/treat/workers/formatters/readers/html.rb +4 -2
- data/lib/treat/workers/formatters/serializers/xml.rb +1 -1
- data/lib/treat/workers/groupable.rb +1 -3
- data/lib/treat/workers/lexicalizers/categorizers/from_tag.rb +3 -2
- data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +12 -2
- data/lib/treat/workers/lexicalizers/taggers/brill.rb +2 -1
- data/lib/treat/workers/lexicalizers/taggers/lingua.rb +3 -1
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -5
- data/spec/entities/collection.rb +2 -2
- data/spec/entities/entity.rb +4 -4
- data/spec/helper.rb +16 -68
- data/spec/{core → learning}/data_set.rb +0 -0
- data/spec/{core → learning}/export.rb +0 -0
- data/spec/{core → learning}/problem.rb +0 -0
- data/spec/{core → learning}/question.rb +0 -0
- data/spec/sandbox.rb +14 -3
- data/spec/workers/agnostic.rb +80 -30
- data/spec/workers/english.rb +475 -190
- metadata +6 -11
- data/files/21552208.html +0 -792
- data/files/nethttp-cheat-sheet-2940.html +0 -392
- data/lib/treat/config/data/config.rb +0 -50
- data/spec/workers/language.rb +0 -280
- data/spec/workers.rb +0 -28
data/README.md
CHANGED
@@ -20,6 +20,17 @@ Treat is a toolkit for natural language processing and computational linguistics
|
|
20
20
|
|
21
21
|
I am actively seeking developers that can help maintain and expand this project. You can find a list of ideas for contributing to the project [here](https://github.com/louismullie/treat/wiki/Contributing).
|
22
22
|
|
23
|
+
**Authors**
|
24
|
+
|
25
|
+
Lead developper: @louismullie [[Twitter](https://twitter.com/LouisMullie)]
|
26
|
+
|
27
|
+
Contributors:
|
28
|
+
- @bdigital
|
29
|
+
- @automatedtendencies
|
30
|
+
- @LeFnord
|
31
|
+
- @darkphantum
|
32
|
+
- @whistlerbrk
|
33
|
+
|
23
34
|
**License**
|
24
35
|
|
25
36
|
This software is released under the [GPL License](https://github.com/louismullie/treat/wiki/License-Information) and includes software released under the GPL, Ruby, Apache 2.0 and MIT licenses.
|
@@ -3,7 +3,7 @@
|
|
3
3
|
'nokogiri', 'ferret',
|
4
4
|
'bson_ext', 'mongo', 'lda-ruby',
|
5
5
|
'stanford-core-nlp', 'linguistics',
|
6
|
-
'
|
6
|
+
'jruby-readability', 'whatlanguage',
|
7
7
|
'chronic', 'nickel', 'decisiontree',
|
8
8
|
'rb-libsvm', 'ruby-fann', 'zip',
|
9
9
|
'tf-idf-similarity', 'narray'
|
data/lib/treat/core/dsl.rb
CHANGED
@@ -1,53 +1,21 @@
|
|
1
1
|
module Treat::Core::DSL
|
2
2
|
|
3
|
-
# Message for deprecation of old DSL syntax.
|
4
|
-
DeprecationMessage = "The DSL that used " +
|
5
|
-
"capitalized entity names is now deprecated. " +
|
6
|
-
"Use `include Treat::Core::DSL` along with " +
|
7
|
-
"lowercase names from now on."
|
8
|
-
|
9
3
|
# Map all classes in Treat::Entities to
|
10
4
|
# a global builder function (entity, word,
|
11
5
|
# phrase, punctuation, symbol, list, etc.)
|
12
6
|
def self.included(base)
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
define_method(mname.capitalize) do |*args|
|
26
|
-
raise DeprecationMessage
|
27
|
-
end
|
28
|
-
old_mm = instance_method(:method_missing)
|
29
|
-
define_method(:method_missing) do |sym,*args,&block|
|
30
|
-
return klass.build(*args) if sym == mname
|
31
|
-
old_mm.bind(self).call(sym,*args,&block)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
# Map all classes in the Learning module
|
38
|
-
# to a global builder function. Defines:
|
39
|
-
# dataset, export, feature, tag, problem
|
40
|
-
# question.
|
41
|
-
def self.sweeten_learning(base, on = true)
|
42
|
-
Treat::Learning.constants.each do |kname|
|
43
|
-
mname = kname.downcase
|
44
|
-
klass = Treat::Learning.const_get(kname)
|
45
|
-
base.class_eval do
|
46
|
-
old_mm = instance_method(:method_missing)
|
47
|
-
define_method(:method_missing) do |sym,*args,&block|
|
48
|
-
return klass.new(*args) if sym == mname
|
49
|
-
old_mm.bind(self).call(sym,*args,&block)
|
50
|
-
end
|
7
|
+
def method_missing(sym,*args,&block)
|
8
|
+
@@entities ||= Treat.core.entities.list
|
9
|
+
@@learning ||= Treat.core.learning.list
|
10
|
+
if @@entities.include?(sym)
|
11
|
+
klass = Treat::Entities.const_get(sym.cc)
|
12
|
+
return klass.build(*args)
|
13
|
+
elsif @@learning.include?(sym)
|
14
|
+
klass = Treat::Learning.const_get(sym.cc)
|
15
|
+
return klass.new(*args)
|
16
|
+
else
|
17
|
+
super(sym,*args,&block)
|
18
|
+
raise "Uncaught method ended up in Treat DSL."
|
51
19
|
end
|
52
20
|
end
|
53
21
|
end
|
data/lib/treat/version.rb
CHANGED
@@ -25,7 +25,7 @@ class Treat::Workers::Extractors::NameTag::Stanford
|
|
25
25
|
isolated_token = entity.is_a?(Treat::Entities::Token)
|
26
26
|
tokens = isolated_token ? [entity] : entity.tokens
|
27
27
|
|
28
|
-
ms = StanfordCoreNLP::Config::Models[:ner][language]
|
28
|
+
ms = StanfordCoreNLP::Config::Models[:ner][language.intern]
|
29
29
|
model_path = Treat.libraries.stanford.model_path ||
|
30
30
|
(Treat.paths.models + '/stanford/')
|
31
31
|
ms = model_path + '/' +
|
@@ -12,7 +12,9 @@ class Treat::Workers::Formatters::Readers::Autoselect
|
|
12
12
|
# - (Symbol) :default_to => format to default to.
|
13
13
|
def self.read(document, options = {})
|
14
14
|
options = DefaultOptions.merge(options)
|
15
|
-
|
15
|
+
fmt = detect_format(document.file, options[:default_to])
|
16
|
+
Treat::Workers::Formatters::Readers.
|
17
|
+
const_get(fmt.cc).read(document,options)
|
16
18
|
end
|
17
19
|
|
18
20
|
def self.detect_format(filename, default_to = nil)
|
@@ -6,12 +6,13 @@
|
|
6
6
|
# https://github.com/iterationlabs/ruby-readability
|
7
7
|
class Treat::Workers::Formatters::Readers::HTML
|
8
8
|
|
9
|
-
silence_warnings { require '
|
9
|
+
silence_warnings { require 'jruby-readability' }
|
10
10
|
|
11
11
|
# By default, don't backup the original HTML
|
12
12
|
DefaultOptions = {
|
13
13
|
:keep_html => false,
|
14
|
-
:tags => %w[p div h1 h2 h3 ul ol dl dt li]
|
14
|
+
:tags => %w[p div h1 h2 h3 ul ol dl dt li img],
|
15
|
+
|
15
16
|
}
|
16
17
|
|
17
18
|
# Read the HTML document and strip it of its markup.
|
@@ -46,6 +47,7 @@ class Treat::Workers::Formatters::Readers::HTML
|
|
46
47
|
d = Readability::Document.new(html, options)
|
47
48
|
document.value = "<h1>#{d.title}</h1>\n" + d.content
|
48
49
|
document.set :format, 'html'
|
50
|
+
document.set :images, d.images
|
49
51
|
end
|
50
52
|
|
51
53
|
document
|
@@ -69,9 +69,7 @@ module Treat::Workers::Groupable
|
|
69
69
|
|
70
70
|
# Get constants in this module, excluding by
|
71
71
|
# default those defined by parent modules.
|
72
|
-
def const_get(const)
|
73
|
-
super(const, false)
|
74
|
-
end
|
72
|
+
def const_get(const); super(const, false); end
|
75
73
|
|
76
74
|
# Modify the extended class.
|
77
75
|
def self.extended(group)
|
@@ -28,8 +28,9 @@ class Treat::Workers::Lexicalizers::Categorizers::FromTag
|
|
28
28
|
|
29
29
|
tag = entity.check_has(:tag)
|
30
30
|
|
31
|
-
return 'unknown' if tag.nil? || tag == ''
|
32
|
-
return '
|
31
|
+
return 'unknown' if tag.nil? || tag == ''
|
32
|
+
return 'fragment' if tag == 'F'
|
33
|
+
return 'sentence' if tag == 'S'
|
33
34
|
return 'number' if entity.type == :number
|
34
35
|
|
35
36
|
return Ptc[entity.to_s] if entity.type == :punctuation
|
@@ -29,9 +29,19 @@ class Treat::Workers::Lexicalizers::Sensers::Wordnet
|
|
29
29
|
|
30
30
|
category = word.check_has(:category)
|
31
31
|
|
32
|
-
|
32
|
+
if !options[:nym]
|
33
33
|
raise Treat::Exception, "You must supply " +
|
34
|
-
"the :nym option (
|
34
|
+
"the :nym option ('synonyms', 'hypernyms', etc.)"
|
35
|
+
end
|
36
|
+
|
37
|
+
if !options[:nym].is_a?(Symbol)
|
38
|
+
options[:nym] == options[:nym].intern
|
39
|
+
end
|
40
|
+
|
41
|
+
if ![:synonyms, :antonyms,
|
42
|
+
:hypernyms, :hyponyms].include?(options[:nym])
|
43
|
+
raise Treat::Exception, "You must supply " +
|
44
|
+
"a valid :nym option ('synonyms', 'hypernyms', etc.)"
|
35
45
|
end
|
36
46
|
|
37
47
|
unless ['noun', 'adjective', 'verb'].
|
@@ -47,7 +47,8 @@ class Treat::Workers::Lexicalizers::Taggers::Brill
|
|
47
47
|
|
48
48
|
return 'S' if entity.is_a?(Treat::Entities::Sentence)
|
49
49
|
return 'P' if entity.is_a?(Treat::Entities::Phrase)
|
50
|
-
|
50
|
+
return 'F' if entity.is_a?(Treat::Entities::Fragment)
|
51
|
+
return 'G' if entity.is_a?(Treat::Entities::Group)
|
51
52
|
end
|
52
53
|
|
53
54
|
end
|
@@ -65,9 +65,11 @@ class Treat::Workers::Lexicalizers::Taggers::Lingua
|
|
65
65
|
!entity.parent_sentence
|
66
66
|
entity.set :tag_set, :penn
|
67
67
|
end
|
68
|
-
|
68
|
+
|
69
69
|
return 'S' if entity.is_a?(Treat::Entities::Sentence)
|
70
70
|
return 'P' if entity.is_a?(Treat::Entities::Phrase)
|
71
|
+
return 'F' if entity.is_a?(Treat::Entities::Fragment)
|
72
|
+
return 'G' if entity.is_a?(Treat::Entities::Group)
|
71
73
|
|
72
74
|
end
|
73
75
|
|
@@ -32,11 +32,10 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
32
32
|
entity.set :tag_set, tag_set
|
33
33
|
end
|
34
34
|
|
35
|
-
if entity.is_a?(Treat::Entities::Sentence)
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
end
|
35
|
+
return 'S' if entity.is_a?(Treat::Entities::Sentence)
|
36
|
+
return 'P' if entity.is_a?(Treat::Entities::Phrase)
|
37
|
+
return 'F' if entity.is_a?(Treat::Entities::Fragment)
|
38
|
+
return 'G' if entity.is_a?(Treat::Entities::Group)
|
40
39
|
|
41
40
|
# Handle options and initialize the tagger.
|
42
41
|
lang = entity.language
|
data/spec/entities/collection.rb
CHANGED
@@ -15,7 +15,7 @@ describe Treat::Entities::Collection do
|
|
15
15
|
it "recursively searches the folder for " +
|
16
16
|
"files and opens them into a collection of documents" do
|
17
17
|
collection = Treat::Entities::Collection.build(@file)
|
18
|
-
collection.size.should eql
|
18
|
+
collection.size.should eql 6
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
@@ -40,7 +40,7 @@ describe Treat::Entities::Collection do
|
|
40
40
|
f = Treat.paths.spec + 'workers/examples/english/economist'
|
41
41
|
c = Treat::Entities::Collection.build(f)
|
42
42
|
c << Treat::Entities::Document.new
|
43
|
-
c.size.should eql
|
43
|
+
c.size.should eql 4
|
44
44
|
end
|
45
45
|
|
46
46
|
end
|
data/spec/entities/entity.rb
CHANGED
@@ -391,11 +391,11 @@ describe Treat::Entities::Entity do
|
|
391
391
|
context "when language detection is disabled " +
|
392
392
|
"(Treat.core.detect is set to false)" do
|
393
393
|
it "returns the default language (Treat.core.language.default)" do
|
394
|
-
|
395
|
-
|
394
|
+
Treat.core.language.detect = false
|
395
|
+
Treat.core.language.default = :test
|
396
396
|
s = 'Les grands hommes ne sont pas toujours grands, dit un jour Napoleon.'
|
397
|
-
|
398
|
-
|
397
|
+
s.language.should eql :test
|
398
|
+
Treat.core.language.default = :english
|
399
399
|
end
|
400
400
|
end
|
401
401
|
|
data/spec/helper.rb
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
require_relative '../lib/treat'
|
2
|
+
|
2
3
|
module Treat::Specs
|
3
4
|
|
4
|
-
# Require the worker specs.
|
5
|
-
require_relative 'workers'
|
6
|
-
# Require RSpec library.
|
7
5
|
require 'rspec'
|
8
|
-
# Require Ruby benchmark library.
|
9
|
-
require 'benchmark'
|
10
|
-
# Require gem to build ASCII tables.
|
11
6
|
|
12
7
|
# Some configuration options for devel.
|
13
8
|
Treat.databases.mongo.db = 'treat_test'
|
@@ -20,30 +15,11 @@ module Treat::Specs
|
|
20
15
|
Treat.libraries.reuters.model_path =
|
21
16
|
'/ruby/reuters/'
|
22
17
|
|
18
|
+
ModuleFiles = ['entities/*.rb', 'learning/*.rb']
|
19
|
+
|
23
20
|
# Provide helper functions for running specs.
|
24
21
|
class Helper
|
25
22
|
|
26
|
-
ModuleFiles = [
|
27
|
-
'./spec/core/*.rb',
|
28
|
-
'./spec/entities/*.rb'
|
29
|
-
]
|
30
|
-
|
31
|
-
# Run all worker example files as :specs
|
32
|
-
# or :benchmarks for the given language.
|
33
|
-
def self.run_examples_as(what, language)
|
34
|
-
self.require_language_files(language)
|
35
|
-
Treat::Specs::Workers::Language.
|
36
|
-
list.each { |l| l.new(what).run }
|
37
|
-
RSpec::Core::CommandLine.new([]).run($stderr, $stdout)
|
38
|
-
end
|
39
|
-
|
40
|
-
# Run specs for the core classes.
|
41
|
-
def self.run_core_specs
|
42
|
-
RSpec::Core::Runner.run(
|
43
|
-
ModuleFiles.map { |d| Dir.glob(d) },
|
44
|
-
$stderr, $stdout)
|
45
|
-
end
|
46
|
-
|
47
23
|
# Start SimpleCov coverage.
|
48
24
|
def self.start_coverage
|
49
25
|
require 'simplecov'
|
@@ -61,56 +37,28 @@ module Treat::Specs
|
|
61
37
|
end
|
62
38
|
end
|
63
39
|
|
40
|
+
# Run specs for the core classes.
|
41
|
+
def self.run_core_specs
|
42
|
+
files = ModuleFiles.map do |d|
|
43
|
+
Dir.glob(Treat.paths.spec + d)
|
44
|
+
end
|
45
|
+
RSpec::Core::Runner.run(files)
|
46
|
+
end
|
47
|
+
|
64
48
|
# Require language files based on the argument.
|
65
|
-
def self.
|
66
|
-
# Require the base language class.
|
67
|
-
require_relative 'workers/language'
|
49
|
+
def self.run_language_specs(lang)
|
68
50
|
# If no language supplied, get all languages.
|
69
|
-
if !
|
51
|
+
if !lang || lang == ''
|
70
52
|
pattern = "./spec/workers/*.rb"
|
71
53
|
# Otherwise, get a specific language file.
|
72
54
|
else
|
73
|
-
pattern = "./spec/workers/#{
|
74
|
-
# Check if a spec file exists.
|
55
|
+
pattern = "./spec/workers/#{lang}.rb"
|
75
56
|
unless File.readable?(pattern)
|
76
57
|
raise Treat::Exception,
|
77
|
-
"There are no examples for '#{
|
78
|
-
end
|
79
|
-
end
|
80
|
-
# Require all files matched by the pattern.
|
81
|
-
Dir.glob(pattern).each { |f| require f }
|
82
|
-
end
|
83
|
-
|
84
|
-
def self.text_table(headings, rows)
|
85
|
-
require 'terminal-table'
|
86
|
-
puts Terminal::Table.new(
|
87
|
-
headings: headings, rows: rows)
|
88
|
-
end
|
89
|
-
|
90
|
-
def self.html_table(headings, rows)
|
91
|
-
require 'fileutils'
|
92
|
-
html = "<table>\n"
|
93
|
-
html += "<tr>\n"
|
94
|
-
headings.each do |heading|
|
95
|
-
html += "<td>" + heading + "</td>\n"
|
96
|
-
end
|
97
|
-
html += "</tr>\n"
|
98
|
-
rows.each do |row|
|
99
|
-
html += "<tr>\n"
|
100
|
-
row.each do |el|
|
101
|
-
html += "<td>#{el}</td>"
|
58
|
+
"There are no examples for '#{lang}'."
|
102
59
|
end
|
103
|
-
html += "</tr>\n"
|
104
|
-
end
|
105
|
-
self.write_html('benchmark', html)
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.write_html(dir, html)
|
109
|
-
unless FileTest.directory?(dir)
|
110
|
-
FileUtils.mkdir('./' + dir)
|
111
60
|
end
|
112
|
-
|
113
|
-
File.open(fn, 'w+') { |f| f.write(html) }
|
61
|
+
RSpec::Core::Runner.run(Dir.glob(pattern))
|
114
62
|
end
|
115
63
|
|
116
64
|
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/spec/sandbox.rb
CHANGED
@@ -4,6 +4,15 @@ require_relative '../lib/treat'
|
|
4
4
|
require 'treat'
|
5
5
|
include Treat::Core::DSL
|
6
6
|
|
7
|
+
collection Treat.paths.spec + '/workers/examples/english/economist'
|
8
|
+
collection.apply :chunk, :segment, :tokenize
|
9
|
+
puts collection.topic_words.inspect
|
10
|
+
|
11
|
+
=begin
|
12
|
+
|
13
|
+
g = group("I was running")
|
14
|
+
puts g.tag.inspect
|
15
|
+
|
7
16
|
Treat.libraries.stanford.jar_path = '/ruby/treat/bin/'
|
8
17
|
Treat.libraries.stanford.model_path = '/ruby/treat/models/'
|
9
18
|
|
@@ -14,6 +23,8 @@ w = word
|
|
14
23
|
p = phrase 'hello world'
|
15
24
|
e = email 'louis@gmail.com'
|
16
25
|
|
26
|
+
d = question(:is_feature, :word)
|
27
|
+
=end
|
17
28
|
#d = document Treat.paths.spec + 'workers/examples/english/economist/hungarys_troubles.txt'
|
18
29
|
#d.apply :chunk, :segment, :tokenize, :tag, :category, :name_tag
|
19
30
|
#d.print_tree
|
@@ -266,6 +277,6 @@ sect = section title(phra), para
|
|
266
277
|
=begin
|
267
278
|
puts "beer".plural.inspect
|
268
279
|
=end
|
269
|
-
Treat.core.language.detect = true
|
270
|
-
s = sentence "Du hast deiner Frau einen roten Ring gekauft."
|
271
|
-
s.apply(:parse,:category).print_tree
|
280
|
+
# Treat.core.language.detect = true
|
281
|
+
# s = sentence "Du hast deiner Frau einen roten Ring gekauft."
|
282
|
+
#s.apply(:parse,:category).print_tree
|
data/spec/workers/agnostic.rb
CHANGED
@@ -1,3 +1,80 @@
|
|
1
|
+
$workers = Treat.languages.agnostic.workers
|
2
|
+
|
3
|
+
describe Treat::Workers::Extractors::Language do
|
4
|
+
before do
|
5
|
+
@entities = ["Obama and Sarkozy will meet in Berlin."]
|
6
|
+
@languages = ["english"]
|
7
|
+
end
|
8
|
+
context "when called on any textual entity" do
|
9
|
+
it "returns the language of the entity" do
|
10
|
+
# Treat.core.language.detect = true
|
11
|
+
$workers.extractors.language.each do |extractor|
|
12
|
+
@entities.map(&:language).should eql @languages
|
13
|
+
end
|
14
|
+
# Treat.core.language.detect = false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe Treat::Workers::Formatters::Serializers do
|
20
|
+
before do
|
21
|
+
@texts = ["A test entity"]
|
22
|
+
end
|
23
|
+
context "when #serialize is called on any textual entity" do
|
24
|
+
it "serializes the entity to disk and returns a pointer to the location" do
|
25
|
+
# m = Treat::Entities::Entity.build
|
26
|
+
@texts.map(&:to_entity).map(&:serialize)
|
27
|
+
.map(&method(:entity)).map(&:to_s).should eql @texts
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe Treat::Workers::Formatters::Unserializers do
|
33
|
+
before do
|
34
|
+
@texts = ["A te"]
|
35
|
+
end
|
36
|
+
context "when #unserialize is called with a selector on any textual entity" do
|
37
|
+
it "unserializes the file and loads it in the entity" do
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
visualize: {
|
44
|
+
entity: {
|
45
|
+
examples: {
|
46
|
+
standoff: [
|
47
|
+
["I walked to the store.", "(S\n (PRP I) (VBD walked) (TO to) (DT the) (NN store) (. .))\n"]
|
48
|
+
],
|
49
|
+
tree: [
|
50
|
+
["I walked to the store.", "+ Sentence (*) --- \"I walked to the store.\" --- {} --- [] \n|\n+--> Word (*) --- \"I\" --- {} --- [] \n+--> Word (*) --- \"walked\" --- {} --- [] \n+--> Word (*) --- \"to\" --- {} --- [] \n+--> Word (*) --- \"the\" --- {} --- [] \n+--> Word (*) --- \"store\" --- {} --- [] \n+--> Punctuation (*) --- \".\" --- {} --- [] "]
|
51
|
+
],
|
52
|
+
dot: [
|
53
|
+
["I walked to the store.", "graph {\n* [label=\"Sentence\\n\\\"I walked to the store.\\\"\",color=\"\"]\n* [label=\"Word\\n\\\"I\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"walked\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"to\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"the\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"store\\\"\",color=\"\"]\n* -- *;\n* [label=\"Punctuation\\n\\\".\\\"\",color=\"\"]\n* -- *;\n}"]
|
54
|
+
]
|
55
|
+
},
|
56
|
+
preprocessor: lambda { |entity| entity.tokenize },
|
57
|
+
generator: lambda { |result| result.gsub(/[0-9]+/, '*') }
|
58
|
+
}
|
59
|
+
},
|
60
|
+
|
61
|
+
|
62
|
+
describe Treat::Workers::Formatters::Visualizers do
|
63
|
+
before do
|
64
|
+
@texts = ["I walked to the store."]
|
65
|
+
end
|
66
|
+
describe "when #visualize is called with the :dot worker" do
|
67
|
+
|
68
|
+
end
|
69
|
+
describe "when #visualize is called with the :tree worker" do
|
70
|
+
|
71
|
+
end
|
72
|
+
describe "when #visualize is called with the :dot worker" do
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
=begin
|
1
78
|
class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
2
79
|
|
3
80
|
# TODO: :tf_idf, :keywords, :classifiers
|
@@ -5,15 +82,6 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
5
82
|
|
6
83
|
Scenarios = {
|
7
84
|
|
8
|
-
# Also tests unserialize.
|
9
|
-
serialize: {
|
10
|
-
entity: {
|
11
|
-
examples: [
|
12
|
-
["A test entity.", "A test entity."]
|
13
|
-
],
|
14
|
-
generator: lambda { |selector| Treat::Entities::Entity.build(selector).to_s }
|
15
|
-
}
|
16
|
-
},
|
17
85
|
classify: {
|
18
86
|
entity: {
|
19
87
|
examples: [
|
@@ -39,23 +107,6 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
39
107
|
end
|
40
108
|
}
|
41
109
|
},
|
42
|
-
visualize: {
|
43
|
-
entity: {
|
44
|
-
examples: {
|
45
|
-
standoff: [
|
46
|
-
["I walked to the store.", "(S\n (PRP I) (VBD walked) (TO to) (DT the) (NN store) (. .))\n"]
|
47
|
-
],
|
48
|
-
tree: [
|
49
|
-
["I walked to the store.", "+ Sentence (*) --- \"I walked to the store.\" --- {} --- [] \n|\n+--> Word (*) --- \"I\" --- {} --- [] \n+--> Word (*) --- \"walked\" --- {} --- [] \n+--> Word (*) --- \"to\" --- {} --- [] \n+--> Word (*) --- \"the\" --- {} --- [] \n+--> Word (*) --- \"store\" --- {} --- [] \n+--> Punctuation (*) --- \".\" --- {} --- [] "]
|
50
|
-
],
|
51
|
-
dot: [
|
52
|
-
["I walked to the store.", "graph {\n* [label=\"Sentence\\n\\\"I walked to the store.\\\"\",color=\"\"]\n* [label=\"Word\\n\\\"I\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"walked\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"to\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"the\\\"\",color=\"\"]\n* -- *;\n* [label=\"Word\\n\\\"store\\\"\",color=\"\"]\n* -- *;\n* [label=\"Punctuation\\n\\\".\\\"\",color=\"\"]\n* -- *;\n}"]
|
53
|
-
]
|
54
|
-
},
|
55
|
-
preprocessor: lambda { |entity| entity.tokenize },
|
56
|
-
generator: lambda { |result| result.gsub(/[0-9]+/, '*') }
|
57
|
-
}
|
58
|
-
},
|
59
110
|
|
60
111
|
=begin
|
61
112
|
keywords: {
|
@@ -103,8 +154,7 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
103
154
|
preprocessor: lambda { |coll| coll.apply(:index) }
|
104
155
|
},
|
105
156
|
},
|
106
|
-
|
107
|
-
=begin
|
157
|
+
|
108
158
|
keywords: {
|
109
159
|
document: {
|
110
160
|
examples: [
|
@@ -124,7 +174,7 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
124
174
|
]
|
125
175
|
}
|
126
176
|
},
|
127
|
-
|
177
|
+
|
128
178
|
topic_words: {
|
129
179
|
collection: {
|
130
180
|
examples: [
|
@@ -134,4 +184,4 @@ class Treat::Specs::Workers::Agnostic < Treat::Specs::Workers::Language
|
|
134
184
|
}
|
135
185
|
}
|
136
186
|
|
137
|
-
end
|
187
|
+
=end
|