treat 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/treat/ai/classifiers/id3.rb +1 -2
- data/lib/treat/ai/classifiers/mlp.rb +30 -0
- data/lib/treat/ai.rb +1 -1
- data/lib/treat/classification.rb +11 -8
- data/lib/treat/data_set.rb +9 -0
- data/lib/treat/dependencies.rb +2 -1
- data/lib/treat/entities/abilities/buildable.rb +1 -1
- data/lib/treat/entities/abilities/countable.rb +1 -1
- data/lib/treat/entities/abilities/iterable.rb +3 -5
- data/lib/treat/entities/abilities/registrable.rb +10 -0
- data/lib/treat/entities/entity.rb +0 -10
- data/lib/treat/extractors/keywords/tf_idf.rb +2 -9
- data/lib/treat/formatters/readers/autoselect.rb +6 -11
- data/lib/treat/formatters/serializers/mongo.rb +64 -0
- data/lib/treat/formatters/serializers/xml.rb +10 -4
- data/lib/treat/formatters/unserializers/xml.rb +6 -0
- data/lib/treat/kernel.rb +1 -1
- data/lib/treat/processors/tokenizers/perl.rb +4 -0
- data/lib/treat.rb +1 -1
- data/spec/entity.rb +47 -12
- data/spec/sandbox.rb +16 -108
- metadata +6 -5
@@ -14,12 +14,11 @@ class Treat::AI::Classifiers::ID3
|
|
14
14
|
set.labels.map { |l| l.to_s }, set.items,
|
15
15
|
cl.default, cl.mode)
|
16
16
|
dec_tree.train
|
17
|
+
@@classifiers[cl] = dec_tree
|
17
18
|
else
|
18
19
|
dec_tree = @@classifiers[cl]
|
19
20
|
end
|
20
21
|
|
21
|
-
cl.export_item(entity, false).inspect
|
22
|
-
|
23
22
|
dec_tree.predict(
|
24
23
|
cl.export_item(entity, false)
|
25
24
|
)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# Currently, this MLP is limited to 1 output.
|
2
|
+
class Treat::AI::Classifiers::MLP
|
3
|
+
|
4
|
+
require 'ai4r'
|
5
|
+
|
6
|
+
@@mlps = {}
|
7
|
+
|
8
|
+
def self.classify(entity, options = {})
|
9
|
+
|
10
|
+
set = options[:training]
|
11
|
+
cl = set.classification
|
12
|
+
|
13
|
+
if !@@mlps[cl]
|
14
|
+
net = Ai4r::NeuralNetwork::
|
15
|
+
Backpropagation.new([cl.labels.size, 3, 1])
|
16
|
+
set.items.each do |item|
|
17
|
+
inputs = item[0..-2]
|
18
|
+
outputs = [item[-1]]
|
19
|
+
net.train(inputs, outputs)
|
20
|
+
end
|
21
|
+
@@mlps[cl] = net
|
22
|
+
else
|
23
|
+
net = @@mlps[cl]
|
24
|
+
end
|
25
|
+
|
26
|
+
net.eval(cl.export_item(entity, false))[0]
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
data/lib/treat/ai.rb
CHANGED
data/lib/treat/classification.rb
CHANGED
@@ -27,21 +27,25 @@ class Treat::Classification
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def export_item(e, include_question = true)
|
30
|
+
|
30
31
|
line = []
|
31
32
|
|
32
33
|
@features.each do |cmd|
|
34
|
+
dflt = nil
|
33
35
|
begin
|
34
36
|
if cmd.is_a?(Array)
|
35
|
-
|
37
|
+
if cmd.size == 3
|
38
|
+
r = cmd[1].call(e)
|
39
|
+
dflt = cmd[2]
|
40
|
+
line << (r ? r : dflt)
|
41
|
+
elsif cmd.size == 2
|
42
|
+
r = e.send(cmd[0])
|
43
|
+
dflt = cmd[1]
|
44
|
+
line << (r ? r : dflt)
|
45
|
+
end
|
36
46
|
else
|
37
47
|
line << e.send(cmd)
|
38
48
|
end
|
39
|
-
rescue Treat::Exception
|
40
|
-
dflt = (
|
41
|
-
(cmd.is_a?(Array) && cmd[2]) ?
|
42
|
-
cmd[2] : nil
|
43
|
-
)
|
44
|
-
line << dflt
|
45
49
|
end
|
46
50
|
end
|
47
51
|
|
@@ -53,7 +57,6 @@ class Treat::Classification
|
|
53
57
|
end
|
54
58
|
end
|
55
59
|
|
56
|
-
line[-1] = '' if line[-1].nil?
|
57
60
|
line
|
58
61
|
end
|
59
62
|
|
data/lib/treat/data_set.rb
CHANGED
data/lib/treat/dependencies.rb
CHANGED
@@ -11,7 +11,8 @@ class Treat::Dependencies
|
|
11
11
|
['linguistics', '>= 1.0.9', 'retrieve the inflection of nouns, verbs and numbers in English'],
|
12
12
|
['punkt-segmenter', '>= 0.9.1', 'segment texts into sentences'],
|
13
13
|
['chronic', '>= 0.6.7', 'detect date and time in text'],
|
14
|
-
['decisiontree', '>= 0.3.0', 'perform decision tree classification of text entities']
|
14
|
+
['decisiontree', '>= 0.3.0', 'perform decision tree classification of text entities'],
|
15
|
+
['ai4r', '>= 1.11', 'perform different kinds of classification tasks on text entities']
|
15
16
|
]
|
16
17
|
|
17
18
|
Binary = [
|
@@ -30,10 +30,8 @@ module Treat::Entities::Abilities::Iterable
|
|
30
30
|
a = []
|
31
31
|
type = :entity unless type
|
32
32
|
each_entity(type) do |e|
|
33
|
-
|
34
|
-
e
|
35
|
-
([:id, :value, :type].include?(feature) &&
|
36
|
-
e.send(feature) == value)
|
33
|
+
r = e.send(feature)
|
34
|
+
a << e if r == value
|
37
35
|
end
|
38
36
|
a
|
39
37
|
end
|
@@ -51,7 +49,7 @@ module Treat::Entities::Abilities::Iterable
|
|
51
49
|
# Returns an array of the entities with the given
|
52
50
|
# category.
|
53
51
|
def entities_with_category(category, type = nil)
|
54
|
-
entities_with_feature(:category, type)
|
52
|
+
entities_with_feature(:category, category, type)
|
55
53
|
end
|
56
54
|
|
57
55
|
# Returns the first ancestor of this entity
|
@@ -5,6 +5,16 @@ module Treat::Entities::Abilities::Registrable
|
|
5
5
|
# Registers a token in the @registry hash.
|
6
6
|
def register(entity)
|
7
7
|
|
8
|
+
unless @registry
|
9
|
+
@count = 0
|
10
|
+
@registry = {
|
11
|
+
:value => {},
|
12
|
+
:position => {},
|
13
|
+
:type => {},
|
14
|
+
:id => {}
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
8
18
|
if entity.is_a?(Treat::Entities::Token) ||
|
9
19
|
entity.is_a?(Treat::Entities::Phrase)
|
10
20
|
val = entity.to_s.downcase
|
@@ -64,18 +64,8 @@ module Treat::Entities
|
|
64
64
|
super(value, id)
|
65
65
|
@type = :entity if self == Entity
|
66
66
|
@type ||= ucc(cl(self.class)).intern
|
67
|
-
unless is_a?(Treat::Entities::Token)
|
68
|
-
@count = 0
|
69
|
-
@registry = {
|
70
|
-
:id => {},
|
71
|
-
:value => {},
|
72
|
-
:type => {},
|
73
|
-
:position => {}
|
74
|
-
}
|
75
|
-
end
|
76
67
|
end
|
77
68
|
|
78
|
-
|
79
69
|
# Add an entity to the current entity.
|
80
70
|
# Registers the entity in the root node
|
81
71
|
# token registry if the entity is a leaf.
|
@@ -41,17 +41,10 @@ class Treat::Extractors::Keywords::TfIdf
|
|
41
41
|
entity.each_word do |word|
|
42
42
|
|
43
43
|
if keywords.include?(word.to_s)
|
44
|
-
word.set :
|
44
|
+
word.set :keyword, true
|
45
45
|
pp = entity.parent_phrase
|
46
|
-
next unless pp
|
47
|
-
if pp.has? :keyword_count
|
48
|
-
pp.set :keyword_count,
|
49
|
-
pp.keyword_count + 1
|
50
|
-
else
|
51
|
-
pp.set :keyword_count, 1
|
52
|
-
end
|
53
46
|
else
|
54
|
-
word.set :
|
47
|
+
word.set :keyword, false
|
55
48
|
end
|
56
49
|
|
57
50
|
end
|
@@ -15,21 +15,16 @@ class Treat::Formatters::Readers::Autoselect
|
|
15
15
|
document.read(detect_format(document.file, options[:default_to]))
|
16
16
|
end
|
17
17
|
|
18
|
-
def self.detect_format(filename, default_to =
|
19
|
-
|
18
|
+
def self.detect_format(filename, default_to = nil)
|
19
|
+
default_to ||= DefaultOptions[:default_to]
|
20
20
|
ext = filename.scan(ExtensionRegexp)
|
21
|
-
ext = (ext.is_a?(Array) && ext[0] && ext[0][0]) ?
|
22
|
-
|
23
|
-
|
24
|
-
format =
|
25
|
-
ImageExtensions.include?(ext) ?
|
26
|
-
'image' : ext
|
27
|
-
|
28
|
-
# Humanize extensions.
|
21
|
+
ext = (ext.is_a?(Array) && ext[0] && ext[0][0]) ? ext[0][0] : ''
|
22
|
+
|
23
|
+
format = ImageExtensions.include?(ext) ? 'image' : ext
|
29
24
|
format = 'html' if format == 'htm'
|
30
25
|
format = 'yaml' if format == 'yml'
|
31
26
|
|
32
|
-
format = default_to if format == ''
|
27
|
+
format = default_to if format.to_s == ''
|
33
28
|
|
34
29
|
format.intern
|
35
30
|
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Stores an entity in a Mongo collection.
|
2
|
+
class Treat::Formatters::Serializers::Mongo
|
3
|
+
|
4
|
+
# Reauire the Mongo DB
|
5
|
+
require 'mongo'
|
6
|
+
|
7
|
+
# Serialize an entity tree in XML format.
|
8
|
+
#
|
9
|
+
# Options:
|
10
|
+
# - (String) :file => a file to write to.
|
11
|
+
def self.serialize(entity, options = {})
|
12
|
+
|
13
|
+
unless options[:database]
|
14
|
+
raise Treat::Exception,
|
15
|
+
'Must supply the database name.'
|
16
|
+
end
|
17
|
+
|
18
|
+
@@conn ||= Mongo::Connection.new
|
19
|
+
@@db ||= @@conn[options[:database]]
|
20
|
+
|
21
|
+
path = []
|
22
|
+
|
23
|
+
entity.each_ancestor do |ancestor|
|
24
|
+
path << [ancestor.type, ancestor.id]
|
25
|
+
end
|
26
|
+
|
27
|
+
path = path.reverse
|
28
|
+
|
29
|
+
target = @@db
|
30
|
+
|
31
|
+
path.each do |type_id|
|
32
|
+
coll = @@db[type_id[0]][type_id[1]]
|
33
|
+
end
|
34
|
+
|
35
|
+
# Store path
|
36
|
+
|
37
|
+
Treat::Entities.list.each do |type|
|
38
|
+
|
39
|
+
type = entity.type.to_s
|
40
|
+
type = (type == 'entity') ? 'entities' : (type + 's')
|
41
|
+
doc = coll[type]
|
42
|
+
|
43
|
+
features = {}
|
44
|
+
features['id'] = entity.id
|
45
|
+
features['value'] = entity.value
|
46
|
+
|
47
|
+
entity.features.each_pair do |feature, value|
|
48
|
+
if value.is_a? Treat::Entities::Entity
|
49
|
+
value = value.id
|
50
|
+
elsif value.is_a?(Array) || value.is_a?(Hash)
|
51
|
+
value = value.inspect
|
52
|
+
else
|
53
|
+
value = value.to_s
|
54
|
+
end
|
55
|
+
features[feature.to_s] = value
|
56
|
+
end
|
57
|
+
|
58
|
+
doc.insert(features)
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
@@ -8,12 +8,14 @@ class Treat::Formatters::Serializers::XML
|
|
8
8
|
# Options:
|
9
9
|
# - (String) :file => a file to write to.
|
10
10
|
def self.serialize(entity, options = {})
|
11
|
-
|
12
|
-
|
11
|
+
if options[:indent].nil?
|
12
|
+
options = options.merge({:indent => 0})
|
13
|
+
end
|
13
14
|
indent = options[:indent]
|
14
15
|
if options[:indent] == 0
|
15
16
|
enc = entity.to_s.encoding.to_s.downcase
|
16
|
-
string = "<?xml version=\"1.0\"
|
17
|
+
string = "<?xml version=\"1.0\" " +
|
18
|
+
"encoding=\"#{enc}\" ?>\n<treat>\n"
|
17
19
|
else
|
18
20
|
string = ''
|
19
21
|
end
|
@@ -26,20 +28,24 @@ class Treat::Formatters::Serializers::XML
|
|
26
28
|
if value.is_a? Treat::Entities::Entity
|
27
29
|
attributes << "#{feature}='#{value.id}' "
|
28
30
|
else
|
31
|
+
value = value.inspect if value.is_a?(Symbol)
|
29
32
|
attributes << "#{feature}='#{escape(value)}' "
|
30
33
|
end
|
31
34
|
end
|
35
|
+
############ To be refactored
|
32
36
|
unless entity.dependencies.empty?
|
33
37
|
attributes << "dependencies='"
|
34
38
|
a = []
|
35
39
|
entity.dependencies.each do |dependency|
|
36
|
-
a << ("{target: #{dependency.target},
|
40
|
+
a << ("{target: #{dependency.target}, "+
|
41
|
+
"type: #{dependency.type}, " +
|
37
42
|
"directed: #{dependency.directed}, " +
|
38
43
|
"direction: #{dependency.direction}}" )
|
39
44
|
end
|
40
45
|
# Structs.
|
41
46
|
attributes << a.join(',') + "'"
|
42
47
|
end
|
48
|
+
############ End of ugly code
|
43
49
|
end
|
44
50
|
tag = entity.class.to_s.split('::')[-1].downcase
|
45
51
|
string += "#{spaces}<#{tag}#{attributes}>"
|
@@ -36,6 +36,7 @@ module Treat::Formatters::Unserializers::XML
|
|
36
36
|
id = nil; value = ''
|
37
37
|
attributes = {}
|
38
38
|
dependencies = []
|
39
|
+
|
39
40
|
unless xml_reader.attributes.size == 0
|
40
41
|
xml_reader.attributes.each_pair do |k,v|
|
41
42
|
if k == 'id'
|
@@ -64,6 +65,11 @@ module Treat::Formatters::Unserializers::XML
|
|
64
65
|
elsif k == 'value'
|
65
66
|
value = v
|
66
67
|
else
|
68
|
+
v = v[1..-1].intern if v[0] == ':'
|
69
|
+
v = v.to_i if v =~ /^[0-9]*$/
|
70
|
+
v = v.to_f if v =~ /^[0-9\.]*$/
|
71
|
+
v = false if v == 'false'
|
72
|
+
v = true if v == 'true'
|
67
73
|
attributes[k.intern] = v
|
68
74
|
end
|
69
75
|
end
|
data/lib/treat/kernel.rb
CHANGED
@@ -10,7 +10,7 @@ module Kernel
|
|
10
10
|
# A list of acronyms used in class names within
|
11
11
|
# the program. These do not CamelCase; they
|
12
12
|
# CAMELCase.
|
13
|
-
Acronyms = %w[xml html txt odt abw doc yaml uea lda pdf ptb dot ai id3 svo]
|
13
|
+
Acronyms = %w[xml html txt odt abw doc yaml uea lda pdf ptb dot ai id3 svo mlp]
|
14
14
|
|
15
15
|
# A cache to optimize camel casing.
|
16
16
|
@@cc_cache = {}
|
@@ -43,6 +43,8 @@ module Treat::Processors::Tokenizers::Perl
|
|
43
43
|
|
44
44
|
# Replace all decimal points by ^^
|
45
45
|
Treat::Helpers::DecimalPointEscaper.escape!(text)
|
46
|
+
|
47
|
+
=begin
|
46
48
|
|
47
49
|
# Translate some common extended ascii
|
48
50
|
# characters to quotes
|
@@ -60,6 +62,8 @@ module Treat::Processors::Tokenizers::Perl
|
|
60
62
|
text.gsub!(/\"(?=\s)/," '' ")
|
61
63
|
#s{\"} { `` }g;
|
62
64
|
text.gsub!(/\"(?=\s)/," `` ")
|
65
|
+
=end
|
66
|
+
|
63
67
|
# Isolate ellipses
|
64
68
|
# s{\.\.\.} { ... }g;
|
65
69
|
text.gsub!(/\.\.\./,' ... ')
|
data/lib/treat.rb
CHANGED
data/spec/entity.rb
CHANGED
@@ -113,7 +113,7 @@ describe Treat::Entities::Entity do
|
|
113
113
|
describe "Exportable" do
|
114
114
|
|
115
115
|
context "when supplied with a classification to export" do
|
116
|
-
classification = Treat::Classification.new(:word, :tag, :is_keyword
|
116
|
+
classification = Treat::Classification.new(:word, :tag, :is_keyword)
|
117
117
|
it "returns a data set with the exported features" do
|
118
118
|
ds = @sentence.export(classification)
|
119
119
|
ds.classification.should eql classification
|
@@ -316,13 +316,14 @@ describe Treat::Entities::Entity do
|
|
316
316
|
|
317
317
|
describe "Formatters" do
|
318
318
|
|
319
|
+
|
320
|
+
before do
|
321
|
+
@serializers = [:xml, :yaml] # Treat::Languages::All::Serializers
|
322
|
+
@txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
|
323
|
+
end
|
324
|
+
|
319
325
|
describe "#serialize" do
|
320
|
-
|
321
|
-
before :all do
|
322
|
-
@serializers = [:xml, :yaml] # Treat::Languages::All::Serializers
|
323
|
-
@txt = "The story of the fox. The quick brown fox jumped over the lazy dog."
|
324
|
-
end
|
325
|
-
|
326
|
+
|
326
327
|
context "when called with a file to save to" do
|
327
328
|
|
328
329
|
it "serializes a document to the supplied format" do
|
@@ -332,24 +333,58 @@ describe Treat::Entities::Entity do
|
|
332
333
|
s = Treat::Entities::Paragraph.new(@txt)
|
333
334
|
s.do(:segment, :tokenize)
|
334
335
|
s.serialize(ser, :file => f)
|
336
|
+
File.delete(f)
|
337
|
+
end
|
338
|
+
|
339
|
+
end
|
340
|
+
|
341
|
+
end
|
342
|
+
|
343
|
+
end
|
344
|
+
|
345
|
+
describe "#unserialize" do
|
346
|
+
|
347
|
+
context "when called with a serialized file" do
|
348
|
+
|
349
|
+
it "reconstitutes the original entity" do
|
350
|
+
@serializers.each do |ser|
|
351
|
+
|
352
|
+
f = Treat.spec + 'test.' + ser.to_s
|
353
|
+
s = Treat::Entities::Paragraph.new(@txt)
|
354
|
+
|
355
|
+
s.set :test_int, 9
|
356
|
+
s.set :test_float, 9.9
|
357
|
+
s.set :test_string, 'hello'
|
358
|
+
s.set :test_sym, :hello
|
359
|
+
s.set :test_bool, false
|
360
|
+
|
361
|
+
s.do(:segment, :tokenize)
|
362
|
+
|
363
|
+
s.serialize(ser, :file => f)
|
364
|
+
|
335
365
|
d = Treat::Entities::Document.build(f)
|
366
|
+
|
367
|
+
d.test_int.should eql 9
|
368
|
+
d.test_float.should eql 9.9
|
369
|
+
d.test_string.should eql 'hello'
|
370
|
+
d.test_sym.should eql :hello
|
371
|
+
d.test_bool.should eql false
|
372
|
+
|
336
373
|
d.to_s.should eql @txt
|
337
374
|
d.size.should eql s.size
|
375
|
+
|
338
376
|
d.token_count.should eql s.token_count
|
339
377
|
d.tokens[0].id.should eql s.tokens[0].id
|
378
|
+
|
340
379
|
File.delete(f)
|
341
380
|
end
|
342
|
-
|
381
|
+
|
343
382
|
end
|
344
383
|
|
345
384
|
end
|
346
385
|
|
347
386
|
end
|
348
387
|
|
349
|
-
describe "#unserialize" do
|
350
|
-
|
351
|
-
end
|
352
|
-
|
353
388
|
end
|
354
389
|
|
355
390
|
describe "Extractors" do
|
data/spec/sandbox.rb
CHANGED
@@ -1,116 +1,24 @@
|
|
1
|
-
=begin
|
2
1
|
require_relative '../lib/treat'
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
*sentences.values
|
7
|
-
.each_with_index
|
8
|
-
.sort.reverse
|
9
|
-
.map(&:last)
|
10
|
-
.sort.take(n))
|
11
|
-
.map(&:first)
|
12
|
-
end
|
3
|
+
s = Sentence "Barack Obama was killed last night."
|
4
|
+
s.tokenize
|
13
5
|
|
14
|
-
|
6
|
+
puts s.word_with_position(2).inspect
|
15
7
|
|
16
|
-
|
17
|
-
|
18
|
-
Treat.debug = true
|
19
|
-
Treat.silence = true
|
8
|
+
s.word_with_position(2).set :highlighted, 1
|
20
9
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
c.each_document do |d|
|
29
|
-
|
30
|
-
sentences = {}
|
31
|
-
|
32
|
-
d.each_sentence do |sentence|
|
33
|
-
cx = sentence.classify(:training => context)
|
34
|
-
ct = sentence.classify(:training => content)
|
35
|
-
sentences[sentence] = cx[1] + ct[1]
|
36
|
-
end
|
10
|
+
cl = Treat::Classification.new(
|
11
|
+
:word,
|
12
|
+
[[:position, 0]],
|
13
|
+
:highlighted,
|
14
|
+
0
|
15
|
+
)
|
37
16
|
|
38
|
-
|
39
|
-
puts d.titles[0].to_s
|
40
|
-
puts
|
41
|
-
|
42
|
-
puts extract(sentences, 5).map { |p| p.to_s }.join(' ')
|
43
|
-
|
44
|
-
end
|
17
|
+
data_set = s.export(cl)
|
45
18
|
|
46
|
-
|
19
|
+
s2 = Sentence ''
|
20
|
+
w = Word 'Hello'
|
21
|
+
s2 << w
|
22
|
+
w.set :position, 2
|
47
23
|
|
48
|
-
|
49
|
-
c.do :chunk, :segment, :tokenize, :tag, :name_tag, :tf_idf, :keywords
|
50
|
-
|
51
|
-
# Topic word count ? Synonyms of keywords ?
|
52
|
-
# Time expressions?
|
53
|
-
classify_content = Treat::Classification.new(
|
54
|
-
:phrase,
|
55
|
-
[:word_count, :number_count,
|
56
|
-
:keyword_count, :name_tag_count],
|
57
|
-
:has_key_content?
|
58
|
-
)
|
59
|
-
|
60
|
-
classify_context = Treat::Classification.new(
|
61
|
-
:phrase,
|
62
|
-
[:position,
|
63
|
-
:position_from_end,
|
64
|
-
:type_of_parent_zone,
|
65
|
-
:value_of_first_word,
|
66
|
-
:tag_of_first_word
|
67
|
-
],
|
68
|
-
:has_key_context?,
|
69
|
-
false,
|
70
|
-
:discrete
|
71
|
-
)
|
72
|
-
|
73
|
-
c.each_sentence do |s|
|
74
|
-
puts s.to_s
|
75
|
-
a = STDIN.gets.to_s.strip
|
76
|
-
if a == ''
|
77
|
-
s.set :has_key_content?, false
|
78
|
-
s.set :has_key_context?, false
|
79
|
-
else
|
80
|
-
s.set :has_key_content?, true
|
81
|
-
s.set :has_key_context?, true
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
context = c.export(classify_context)
|
86
|
-
content = c.export(classify_content)
|
87
|
-
|
88
|
-
context.save('economist-context.yml')
|
89
|
-
content.save('economist-content.yml')
|
90
|
-
|
91
|
-
context = Treat::DataSet.open('economist-context.yml')
|
92
|
-
content = Treat::DataSet.open('economist-content.yml')
|
93
|
-
|
94
|
-
c.each_document do |d|
|
95
|
-
|
96
|
-
sentences = {}
|
97
|
-
|
98
|
-
d.each_sentence do |sentence|
|
99
|
-
cx = sentence.classify(:training => context)
|
100
|
-
ct = sentence.classify(:training => content)
|
101
|
-
sentences[sentence] = cx[1] + ct[1]
|
102
|
-
end
|
103
|
-
|
104
|
-
puts
|
105
|
-
puts d.titles[0].to_s
|
106
|
-
puts
|
107
|
-
|
108
|
-
puts extract(sentences, 5).map { |p| p.to_s }.join(' ')
|
109
|
-
|
110
|
-
end
|
111
|
-
|
112
|
-
|
113
|
-
end
|
114
|
-
|
115
|
-
end
|
116
|
-
=end
|
24
|
+
puts w.classify(:mlp, :training => data_set).inspect
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: treat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-04-
|
12
|
+
date: 2012-04-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rubyzip
|
@@ -75,8 +75,7 @@ dependencies:
|
|
75
75
|
- - ! '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: 0.9.2
|
78
|
-
description: ! ' Treat is a
|
79
|
-
and natural language processing. '
|
78
|
+
description: ! ' Treat is a full-fledged natural language processing toolkit for Ruby. '
|
80
79
|
email:
|
81
80
|
- louis.mullie@gmail.com
|
82
81
|
executables: []
|
@@ -84,6 +83,7 @@ extensions: []
|
|
84
83
|
extra_rdoc_files: []
|
85
84
|
files:
|
86
85
|
- lib/treat/ai/classifiers/id3.rb
|
86
|
+
- lib/treat/ai/classifiers/mlp.rb
|
87
87
|
- lib/treat/ai.rb
|
88
88
|
- lib/treat/categories.rb
|
89
89
|
- lib/treat/categorizable.rb
|
@@ -128,6 +128,7 @@ files:
|
|
128
128
|
- lib/treat/formatters/readers/pdf.rb
|
129
129
|
- lib/treat/formatters/readers/txt.rb
|
130
130
|
- lib/treat/formatters/readers/xml.rb
|
131
|
+
- lib/treat/formatters/serializers/mongo.rb
|
131
132
|
- lib/treat/formatters/serializers/xml.rb
|
132
133
|
- lib/treat/formatters/serializers/yaml.rb
|
133
134
|
- lib/treat/formatters/unserializers/autoselect.rb
|
@@ -244,5 +245,5 @@ rubyforge_project:
|
|
244
245
|
rubygems_version: 1.8.21
|
245
246
|
signing_key:
|
246
247
|
specification_version: 3
|
247
|
-
summary:
|
248
|
+
summary: Text Retrieval, Extraction and Annotation Toolkit.
|
248
249
|
test_files: []
|