treat 2.0.4 → 2.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +7 -0
- data/files/21552208.html +797 -0
- data/files/nethttp-cheat-sheet-2940.html +393 -0
- data/lib/treat/config/data/core.rb +1 -1
- data/lib/treat/config/data/languages/english.rb +1 -1
- data/lib/treat/config/data/languages/german.rb +2 -0
- data/lib/treat/config/data/libraries.rb +4 -0
- data/lib/treat/entities/entity/buildable.rb +4 -6
- data/lib/treat/helpers/string.rb +1 -1
- data/lib/treat/loaders/bind_it.rb +48 -0
- data/lib/treat/loaders/open_nlp.rb +12 -0
- data/lib/treat/loaders/stanford.rb +7 -46
- data/lib/treat/proxies/proxy.rb +3 -2
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/formatters/readers/autoselect.rb +1 -1
- data/lib/treat/workers/formatters/readers/document.rb +17 -0
- data/lib/treat/workers/formatters/unserializers/xml.rb +1 -0
- data/lib/treat/workers/groupable.rb +1 -1
- data/lib/treat/workers/processors/tokenizers/{maxent.rb → open_nlp.rb} +5 -4
- data/spec/entities/document.rb +2 -2
- data/spec/helper.rb +4 -0
- data/spec/sandbox.rb +306 -0
- data/spec/workers/examples/english/mathematicians/euler.html +21 -0
- data/spec/workers/examples/english/mathematicians/pythagoras.docx +0 -0
- metadata +28 -4
data/lib/treat/helpers/string.rb
CHANGED
@@ -54,7 +54,7 @@ class Treat::Helpers::String
|
|
54
54
|
if @@cc_cache[o_phrase]
|
55
55
|
return @@cc_cache[o_phrase]
|
56
56
|
end
|
57
|
-
if Treat.core.acronyms.include?(phrase)
|
57
|
+
if Treat.core.acronyms.include?(phrase.downcase)
|
58
58
|
phrase = phrase.upcase
|
59
59
|
else
|
60
60
|
phrase.gsub!(Regex) { |a| a.upcase }
|
@@ -0,0 +1,48 @@
|
|
1
|
+
class Treat::Loaders::BindIt
|
2
|
+
|
3
|
+
# Keep track of whether its loaded or not.
|
4
|
+
@@loaded = {}
|
5
|
+
|
6
|
+
# Load CoreNLP package for a given language.
|
7
|
+
def self.load(klass, name, language = nil)
|
8
|
+
|
9
|
+
return if @@loaded[klass]
|
10
|
+
|
11
|
+
language ||= Treat.core.language.default
|
12
|
+
|
13
|
+
jar_path = Treat.libraries[name].jar_path ||
|
14
|
+
Treat.paths.bin + "#{name}/"
|
15
|
+
model_path = Treat.libraries[name].model_path ||
|
16
|
+
Treat.paths.models + "#{name}/"
|
17
|
+
|
18
|
+
if !File.directory?(jar_path)
|
19
|
+
raise Treat::Exception, "Looking for #{klass} " +
|
20
|
+
"library JAR files in #{jar_path}, but it is " +
|
21
|
+
"not a directory. Please set the config option " +
|
22
|
+
"Treat.libraries.#{name}.jar_path to a folder " +
|
23
|
+
"containing the appropriate JAR files."
|
24
|
+
end
|
25
|
+
|
26
|
+
if !File.directory?(model_path)
|
27
|
+
raise Treat::Exception, "Looking for #{klass} " +
|
28
|
+
"library model files in #{model_path}, but it " +
|
29
|
+
"is not a directory. Please set the config option " +
|
30
|
+
"Treat.libraries.#{name}.model_path to a folder " +
|
31
|
+
"containing the appropriate JAR files."
|
32
|
+
end
|
33
|
+
|
34
|
+
klass.jar_path = jar_path
|
35
|
+
klass.model_path = model_path
|
36
|
+
klass.use language
|
37
|
+
|
38
|
+
if Treat.core.verbosity.silence
|
39
|
+
klass.log_file = '/dev/null'
|
40
|
+
end
|
41
|
+
|
42
|
+
klass.bind
|
43
|
+
|
44
|
+
@@loaded[klass] = true
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
@@ -1,53 +1,14 @@
|
|
1
|
+
require 'treat/loaders/bind_it'
|
2
|
+
|
1
3
|
# A helper class to load the CoreNLP package.
|
2
|
-
class Treat::Loaders::Stanford
|
4
|
+
class Treat::Loaders::Stanford < Treat::Loaders::BindIt
|
3
5
|
|
4
|
-
|
5
|
-
@@loaded = false
|
6
|
+
require 'stanford-core-nlp'
|
6
7
|
|
7
|
-
# Load CoreNLP package for a given language.
|
8
8
|
def self.load(language = nil)
|
9
|
-
|
10
|
-
return if @@loaded
|
11
|
-
|
12
|
-
language ||= Treat.core.language.default
|
13
|
-
|
14
|
-
jar_path = Treat.libraries.stanford.jar_path ||
|
15
|
-
Treat.paths.bin + 'stanford/'
|
16
|
-
model_path = Treat.libraries.stanford.model_path ||
|
17
|
-
Treat.paths.models + 'stanford/'
|
18
|
-
|
19
|
-
if !File.directory?(jar_path)
|
20
|
-
raise Treat::Exception, "Looking for Stanford " +
|
21
|
-
"CoreNLP JAR files in #{jar_path}, but it is " +
|
22
|
-
"not a directory. Please set the config option " +
|
23
|
-
"Treat.libraries.stanford.jar_path to a folder " +
|
24
|
-
"containing the Stanford JAR files."
|
25
|
-
end
|
26
|
-
|
27
|
-
if !File.directory?(model_path)
|
28
|
-
raise Treat::Exception, "Looking for Stanford " +
|
29
|
-
"CoreNLP model files in #{model_path}, but it " +
|
30
|
-
"is not a directory. Please set the config option " +
|
31
|
-
"Treat.libraries.stanford.model_path to a folder " +
|
32
|
-
"containing the Stanford JAR files."
|
33
|
-
end
|
34
|
-
|
35
|
-
require 'stanford-core-nlp'
|
36
|
-
|
37
|
-
StanfordCoreNLP.jar_path = jar_path
|
38
|
-
StanfordCoreNLP.model_path = model_path
|
39
|
-
StanfordCoreNLP.use(language)
|
40
|
-
|
41
|
-
if Treat.core.verbosity.silence
|
42
|
-
StanfordCoreNLP.log_file = '/dev/null'
|
43
|
-
end
|
44
|
-
|
45
|
-
StanfordCoreNLP.bind
|
46
|
-
|
47
|
-
@@loaded = true
|
48
|
-
|
9
|
+
super(StanfordCoreNLP, :stanford, language)
|
49
10
|
end
|
50
|
-
|
11
|
+
|
51
12
|
def self.find_model(name, language)
|
52
13
|
language = language.intern
|
53
14
|
model_file = StanfordCoreNLP::Config::Models[name][language]
|
@@ -57,4 +18,4 @@ class Treat::Loaders::Stanford
|
|
57
18
|
File.join(model_path, model_dir, model_file)
|
58
19
|
end
|
59
20
|
|
60
|
-
end
|
21
|
+
end
|
data/lib/treat/proxies/proxy.rb
CHANGED
@@ -11,14 +11,15 @@ module Treat::Proxies
|
|
11
11
|
def method_missing(sym, *args, &block)
|
12
12
|
if [:do, :apply].include?(sym) ||
|
13
13
|
Treat::Workers.lookup(sym)
|
14
|
-
|
14
|
+
to_entity.send(sym, *args)
|
15
15
|
else
|
16
16
|
super(sym, *args, &block)
|
17
17
|
end
|
18
18
|
end
|
19
|
+
|
19
20
|
# Create an unknown type of entity by default.
|
20
21
|
def to_entity(builder = nil)
|
21
|
-
Treat::Entities::Unknown(self.to_s)
|
22
|
+
Treat::Entities::Unknown.new(self.to_s)
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
data/lib/treat/version.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'yomu'
|
2
|
+
|
3
|
+
# This class is a wrapper for Yomu.
|
4
|
+
# Yomu is a library for extracting text and metadata from files and documents
|
5
|
+
# using the Apache Tika content analysis toolkit.
|
6
|
+
class Treat::Workers::Formatters::Readers::Document
|
7
|
+
# Extract the readable text from any document.
|
8
|
+
#
|
9
|
+
# Options: none.
|
10
|
+
def self.read(document, options = {})
|
11
|
+
yomu = Yomu.new(document.file)
|
12
|
+
|
13
|
+
document.value = yomu.text
|
14
|
+
document.set :format, yomu.mimetype.extensions.first
|
15
|
+
document
|
16
|
+
end
|
17
|
+
end
|
@@ -1,8 +1,10 @@
|
|
1
1
|
# Maximum entropy tokenization supplied by OpenNLP.
|
2
|
-
class Treat::Workers::Processors::Tokenizers::
|
2
|
+
class Treat::Workers::Processors::Tokenizers::OpenNlp
|
3
3
|
|
4
4
|
require 'open-nlp'
|
5
|
-
OpenNLP.load
|
5
|
+
Treat::Loaders::OpenNLP.load
|
6
|
+
|
7
|
+
@@tokenizers = {}
|
6
8
|
|
7
9
|
# Maximum entropy tokenization.
|
8
10
|
def self.tokenize(entity, options = {})
|
@@ -20,8 +22,7 @@ class Treat::Workers::Processors::Tokenizers::Maxent
|
|
20
22
|
tokens = tokenizer.tokenize(str).to_a
|
21
23
|
|
22
24
|
tokens.each do |token|
|
23
|
-
entity << Treat::Entities
|
24
|
-
::Token.from_string(chunk)
|
25
|
+
entity << Treat::Entities::Token.from_string(token)
|
25
26
|
end
|
26
27
|
|
27
28
|
end
|
data/spec/entities/document.rb
CHANGED
@@ -9,10 +9,10 @@ module Treat::Specs::Entities
|
|
9
9
|
it "opens the file and reads its " +
|
10
10
|
"content into a document" do
|
11
11
|
f = Treat.paths.spec +
|
12
|
-
'workers/examples/english/mathematicians/
|
12
|
+
'workers/examples/english/mathematicians/pythagoras.docx'
|
13
13
|
d = Treat::Entities::Document.build(f)
|
14
14
|
d.should be_an_instance_of Treat::Entities::Document
|
15
|
-
d.to_s.index('
|
15
|
+
d.to_s.index('Pythagoras of Samos').should_not eql nil
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
data/spec/helper.rb
CHANGED
@@ -13,6 +13,10 @@ module Treat::Specs
|
|
13
13
|
'/ruby/stanford-core-nlp-minimal/models/'
|
14
14
|
Treat.libraries.stanford.jar_path =
|
15
15
|
'/ruby/stanford-core-nlp-minimal/bin/'
|
16
|
+
Treat.libraries.open_nlp.jar_path =
|
17
|
+
'/ruby/open-nlp-english/bin/'
|
18
|
+
Treat.libraries.open_nlp.model_path =
|
19
|
+
'/ruby/open-nlp-english/models/'
|
16
20
|
Treat.libraries.punkt.model_path =
|
17
21
|
'/ruby/punkt/models/'
|
18
22
|
Treat.libraries.reuters.model_path =
|
data/spec/sandbox.rb
ADDED
@@ -0,0 +1,306 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative '../lib/treat'
|
3
|
+
|
4
|
+
Treat.databases.mongo.db = 'treat_test'
|
5
|
+
Treat.libraries.stanford.model_path =
|
6
|
+
'/ruby/stanford-core-nlp-minimal/models/'
|
7
|
+
Treat.libraries.stanford.jar_path =
|
8
|
+
'/ruby/stanford-core-nlp-minimal/bin/'
|
9
|
+
Treat.libraries.punkt.model_path =
|
10
|
+
'/ruby/punkt/models/'
|
11
|
+
Treat.libraries.reuters.model_path =
|
12
|
+
'/ruby/reuters/models/'
|
13
|
+
Treat.libraries.open_nlp.jar_path =
|
14
|
+
'/ruby/open-nlp-english/bin/'
|
15
|
+
Treat.libraries.open_nlp.model_path =
|
16
|
+
'/ruby/open-nlp-english/models/'
|
17
|
+
Treat.core.verbosity.silence = false
|
18
|
+
|
19
|
+
include Treat::Core::DSL
|
20
|
+
|
21
|
+
s = sentence "This is a sentence to parse!"
|
22
|
+
s.tokenize(:open_nlp).parse
|
23
|
+
s.print_tree
|
24
|
+
|
25
|
+
=begin
|
26
|
+
Treat::Builder.new do
|
27
|
+
p = phrase "26 Feb"
|
28
|
+
p.tokenize.time :kronic
|
29
|
+
puts p.inspect
|
30
|
+
s = sentence "Hello, world!"
|
31
|
+
s2 = sentence "Hello world"
|
32
|
+
puts s.similarity :jaro_winkler, to: s2
|
33
|
+
puts s.distance :levenshtein, to: s2
|
34
|
+
# puts s.similarity :tf_idf, to: s2
|
35
|
+
end
|
36
|
+
|
37
|
+
g = group("I was running")
|
38
|
+
puts g.tag.inspect
|
39
|
+
|
40
|
+
Treat.libraries.stanford.jar_path = '/ruby/treat/bin/'
|
41
|
+
Treat.libraries.stanford.model_path = '/ruby/treat/models/'
|
42
|
+
|
43
|
+
p = paragraph
|
44
|
+
s = sentence
|
45
|
+
w = word
|
46
|
+
|
47
|
+
p = phrase 'hello world'
|
48
|
+
e = email 'louis@gmail.com'
|
49
|
+
|
50
|
+
d = question(:is_feature, :word)
|
51
|
+
=end
|
52
|
+
#d = document Treat.paths.spec + 'workers/examples/english/economist/hungarys_troubles.txt'
|
53
|
+
#d.apply :chunk, :segment, :tokenize, :tag, :category, :name_tag
|
54
|
+
#d.print_tree
|
55
|
+
#d = document Treat.paths.spec + 'workers/examples/english/economist/saving_the_euro.odt'
|
56
|
+
#d.print_tree
|
57
|
+
=begin
|
58
|
+
d = document 'test.htm'
|
59
|
+
d.apply :chunk
|
60
|
+
#d.serialize :yaml, file: 'test444.yaml'
|
61
|
+
d.set :test, 2
|
62
|
+
d.serialize :mongo, db: 'test'
|
63
|
+
d.set :test, 3
|
64
|
+
d.serialize :mongo, db: 'test'
|
65
|
+
d.apply :segment, :tokenize, :tag, :category
|
66
|
+
puts d.verb_count
|
67
|
+
#d2 = document id: d.id, db: 'test'
|
68
|
+
d2 = document 'features.test' => 3, db: 'test'
|
69
|
+
d2.apply :segment, :tokenize, :tag, :category
|
70
|
+
puts d2.verb_count
|
71
|
+
#d.print_tree
|
72
|
+
#s = document 'http://www.economist.com'
|
73
|
+
|
74
|
+
p = phrase 'hello', 'world', '!'
|
75
|
+
puts p.to_s
|
76
|
+
puts p.to_str
|
77
|
+
=end
|
78
|
+
|
79
|
+
=begin
|
80
|
+
### Super basics.
|
81
|
+
puts p.value
|
82
|
+
|
83
|
+
p << 'bitch'
|
84
|
+
p << word('hello')
|
85
|
+
puts p.to_s
|
86
|
+
puts p.to_str
|
87
|
+
puts p.value
|
88
|
+
puts p.to_ary.inspect
|
89
|
+
=end
|
90
|
+
|
91
|
+
=begin
|
92
|
+
|
93
|
+
### Configuration
|
94
|
+
|
95
|
+
# A boolean value indicating whether to silence the output of external libraries (e.g. Stanford tools, Enju, LDA, Ruby-FANN) when they are used.
|
96
|
+
puts Treat.core.verbosity.silence
|
97
|
+
# A boolean value indicating whether to explain the steps that Treat is performing.
|
98
|
+
puts Treat.core.verbosity.debug
|
99
|
+
# A boolean value indicating whether Treat should try to detect the language of newly input text.
|
100
|
+
puts Treat.core.language.detect
|
101
|
+
# The language to default to when detection is off.
|
102
|
+
puts Treat.core.language.default
|
103
|
+
# A symbol representing the finest level at which language detection should be performed if language detection is turned on.
|
104
|
+
puts Treat.core.language.detect_at
|
105
|
+
|
106
|
+
# A directory in which to create temporary files.
|
107
|
+
puts Treat.paths.tmp
|
108
|
+
# A directory in which to store downloaded files.
|
109
|
+
puts Treat.paths.files
|
110
|
+
# A directory containing trained models for various tasks.
|
111
|
+
puts Treat.paths.models
|
112
|
+
# A directory containing the spec files.
|
113
|
+
puts Treat.paths.spec
|
114
|
+
# A directory containing executables and JAR files.
|
115
|
+
puts Treat.paths.bin
|
116
|
+
puts Treat.paths.lib
|
117
|
+
|
118
|
+
# Set up Mongoid.
|
119
|
+
Treat.databases.mongo.db = 'your_database'
|
120
|
+
Treat.databases.mongo.host = 'localhost'
|
121
|
+
Treat.databases.mongo.port = '27017'
|
122
|
+
|
123
|
+
# Transparent string casting.
|
124
|
+
s = 'inflection'.stem
|
125
|
+
# is equivalent to
|
126
|
+
s = 'inflection'.to_entity.stem
|
127
|
+
# which comes down to
|
128
|
+
s = word('inflection').stem
|
129
|
+
|
130
|
+
# Transparent number casting.
|
131
|
+
n = 2.ordinal
|
132
|
+
# is equivalent to
|
133
|
+
s = 2.to_entity.ordinal
|
134
|
+
# which comes down to
|
135
|
+
s = number(2).ordinal
|
136
|
+
=end
|
137
|
+
=begin
|
138
|
+
### BASIC USAGE
|
139
|
+
|
140
|
+
# Create a sentence
|
141
|
+
s = sentence 'Those who dream by day know of at least ' +
|
142
|
+
'19 things that escape those who dream only at night.'
|
143
|
+
|
144
|
+
# Tokenize and tag it.
|
145
|
+
s.tokenize.tag
|
146
|
+
|
147
|
+
# View the sentence structure.
|
148
|
+
s.print_tree
|
149
|
+
|
150
|
+
# Iterate over the tokens.
|
151
|
+
s.each_token do |tok|
|
152
|
+
puts tok.value
|
153
|
+
puts tok.type
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
# Arrays instead of iterators.
|
159
|
+
(s.nouns + s.adjectives).each do |word|
|
160
|
+
puts word.synonyms
|
161
|
+
puts word.antonyms
|
162
|
+
end
|
163
|
+
|
164
|
+
# Functions on numbers.
|
165
|
+
s.each_number do |num|
|
166
|
+
puts num.ordinal
|
167
|
+
puts num.cardinal
|
168
|
+
end
|
169
|
+
|
170
|
+
# See all the annotations.
|
171
|
+
s.each do |tok|
|
172
|
+
puts tok.inspect
|
173
|
+
end
|
174
|
+
|
175
|
+
# Lazy way of doing all of the above.
|
176
|
+
s = sentence 'Those who dream by day know of at least ' +
|
177
|
+
'19 things that escape those who dream only at night.'
|
178
|
+
|
179
|
+
s.apply :tokenize, :tag, :category,
|
180
|
+
:stem, :hyponyms, :hypernyms,
|
181
|
+
:antonyms, :ordinal, :cardinal
|
182
|
+
|
183
|
+
=end
|
184
|
+
|
185
|
+
=begin
|
186
|
+
### A BIT MORE ADVANCED USAGE
|
187
|
+
|
188
|
+
section = section "Obama-Sarkozy Meeting\n" +
|
189
|
+
"Obama and Sarkozy met on January 1st to investigate " +
|
190
|
+
"the possibility of a new rescue plan. President " +
|
191
|
+
"Sarkozy is to meet Merkel next Tuesday in Berlin."
|
192
|
+
|
193
|
+
# Chunk: split the titles and paragraphs.
|
194
|
+
# Segment: perform sentence segmentation.
|
195
|
+
# Parse: parse the syntax of each sentence.
|
196
|
+
section.apply :chunk, :segment, :parse
|
197
|
+
|
198
|
+
# View the tree structure.
|
199
|
+
section.print_tree
|
200
|
+
|
201
|
+
# Get some basic info on the text.
|
202
|
+
puts section.title
|
203
|
+
puts section.sentence_count
|
204
|
+
puts section.word_count
|
205
|
+
|
206
|
+
section.apply :category
|
207
|
+
puts section.noun_count
|
208
|
+
puts section.frequency_of 'president'
|
209
|
+
|
210
|
+
section.each_phrase_with_tag('NP') do |phrase|
|
211
|
+
puts phrase.to_s
|
212
|
+
end
|
213
|
+
|
214
|
+
=end
|
215
|
+
=begin
|
216
|
+
### URL documents, XML serialization.
|
217
|
+
|
218
|
+
urls = ['http://www.cbc.ca/news/world/story/2012/11/25/snc-lavalin-ben-aissa-charges.html',
|
219
|
+
'http://www.cbc.ca/news/world/story/2012/11/25/egypt.html', 'http://www.cbc.ca/news/canada/prince-edward-island/story/2012/11/25/pei-murder-arrest-stlucia.html', 'http://www.cbc.ca/news/world/story/2012/11/25/bangladesh-garment-factory-fire.html']
|
220
|
+
|
221
|
+
c = collection
|
222
|
+
urls.each { |url| c << document(url) }
|
223
|
+
|
224
|
+
# View the collection.
|
225
|
+
c.print_tree
|
226
|
+
|
227
|
+
c.apply :chunk, :segment, :tokenize
|
228
|
+
c.serialize :xml, :file => 'test.xml'
|
229
|
+
|
230
|
+
# Reopen the collection.
|
231
|
+
c = collection 'test.xml'
|
232
|
+
|
233
|
+
# View it again.
|
234
|
+
c.print_tree
|
235
|
+
=end
|
236
|
+
=begin
|
237
|
+
include Treat::Core::DSL
|
238
|
+
|
239
|
+
# Show progress bars for download.
|
240
|
+
Treat.core.verbosity.silence = false
|
241
|
+
# Explain what Treat is doing.
|
242
|
+
Treat.core.verbosity.debug = true
|
243
|
+
|
244
|
+
# Define the question "is it junk?" on sentences.
|
245
|
+
qn = question(:is_junk, :sentence)
|
246
|
+
|
247
|
+
# Frame the problem as depending on punctuation
|
248
|
+
# count and word count for each sentence.
|
249
|
+
pb = problem(qn,
|
250
|
+
feature(:punctuation_count),
|
251
|
+
feature(:word_count) )
|
252
|
+
|
253
|
+
# Get some web documents to work on.
|
254
|
+
url1 = 'http://en.wikipedia.org/wiki/NOD_mouse'
|
255
|
+
url2 = 'http://en.wikipedia.org/wiki/Academic_studies_about_Wikipedia'
|
256
|
+
d1, d2 = document(url1), document(url2)
|
257
|
+
|
258
|
+
# Process both of our documents.
|
259
|
+
[d1,d2].apply(:chunk, :segment, :tokenize)
|
260
|
+
|
261
|
+
# Answer our problem to create a training set.
|
262
|
+
d1.sentences[0..17].each { |s| s.set :is_junk, 0 }
|
263
|
+
d1.sentences[17..-1].each { |s| s.set :is_junk, 1 }
|
264
|
+
d_set = d1.export(pb)
|
265
|
+
|
266
|
+
# Define our gold standard results for evaluation.
|
267
|
+
d2.sentences[0..81].each { |s| s.set :is_true_junk, 0 }
|
268
|
+
d2.sentences[81..-1].each { |s| s.set :is_true_junk, 1 }
|
269
|
+
|
270
|
+
tp, fp, tn, fn = 0.0, 0.0, 0.0, 0.0
|
271
|
+
|
272
|
+
d2.sentences.map do |s|
|
273
|
+
pred = s.classify(:id3, training: d_set)
|
274
|
+
if pred == 1
|
275
|
+
tp += 1 if s.is_true_junk == 1
|
276
|
+
fp += 1 if s.is_true_junk == 0
|
277
|
+
else
|
278
|
+
tn += 1 if s.is_true_junk == 0
|
279
|
+
fn += 1 if s.is_true_junk == 1
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
puts "Precision: #{tp/(tp + fp)}"
|
284
|
+
puts "Recall: #{tp/(tp + fn)}"
|
285
|
+
=end
|
286
|
+
=begin
|
287
|
+
d = document 'http://louismullie.com/susan-text-scan1.jpg'
|
288
|
+
d.apply :chunk, :segment, :tokenize
|
289
|
+
d.print_tree
|
290
|
+
=end
|
291
|
+
=begin
|
292
|
+
# Syntax example
|
293
|
+
phra = phrase 'Obama', 'Sarkozy', 'Meeting'
|
294
|
+
|
295
|
+
para = paragraph 'Obama and Sarkozy met on January 1st to'
|
296
|
+
'investigate the possibility of a new rescue plan. Nicolas ' +
|
297
|
+
'Sarkozy is to meet Merkel next Tuesday in Berlin.'
|
298
|
+
|
299
|
+
sect = section title(phra), para
|
300
|
+
=end
|
301
|
+
=begin
|
302
|
+
puts "beer".plural.inspect
|
303
|
+
=end
|
304
|
+
# Treat.core.language.detect = true
|
305
|
+
# s = sentence "Du hast deiner Frau einen roten Ring gekauft."
|
306
|
+
#s.apply(:parse,:category).print_tree
|