treat 2.0.5 → 2.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/treat/loaders/open_nlp.rb +1 -2
- data/lib/treat/loaders/stanford.rb +1 -2
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +1 -1
- data/lib/treat/workers/processors/segmenters/stanford.rb +2 -2
- data/lib/treat/workers/processors/tokenizers/open_nlp.rb +2 -3
- data/lib/treat/workers/processors/tokenizers/stanford.rb +1 -2
- data/spec/sandbox.rb +5 -2
- metadata +2 -2
@@ -3,9 +3,8 @@ require 'treat/loaders/bind_it'
|
|
3
3
|
# A helper class to load the CoreNLP package.
|
4
4
|
class Treat::Loaders::Stanford < Treat::Loaders::BindIt
|
5
5
|
|
6
|
-
require 'stanford-core-nlp'
|
7
|
-
|
8
6
|
def self.load(language = nil)
|
7
|
+
require 'stanford-core-nlp'
|
9
8
|
super(StanfordCoreNLP, :stanford, language)
|
10
9
|
end
|
11
10
|
|
data/lib/treat/version.rb
CHANGED
@@ -48,7 +48,7 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
48
48
|
isolated_token = entity.is_a?(Treat::Entities::Token)
|
49
49
|
|
50
50
|
@@taggers[lang].apply(t_list).each do |tok|
|
51
|
-
tokens[i].set(:tag, tok.tag)
|
51
|
+
tokens[i].set(:tag, tok.tag.split('-').first)
|
52
52
|
tokens[i].set(:tag_set,
|
53
53
|
options[:tag_set]) if isolated_token
|
54
54
|
return tok.tag if isolated_token
|
@@ -4,8 +4,6 @@
|
|
4
4
|
# obtained tokens are then grouped into sentences.
|
5
5
|
class Treat::Workers::Processors::Segmenters::Stanford
|
6
6
|
|
7
|
-
Treat::Loaders::Stanford.load
|
8
|
-
|
9
7
|
DefaultOptions = {
|
10
8
|
:also_tokenize => false
|
11
9
|
}
|
@@ -25,6 +23,8 @@ class Treat::Workers::Processors::Segmenters::Stanford
|
|
25
23
|
# add the tokens as children of the sentence.
|
26
24
|
def self.segment(entity, options = {})
|
27
25
|
|
26
|
+
Treat::Loaders::Stanford.load
|
27
|
+
|
28
28
|
options = DefaultOptions.merge(options)
|
29
29
|
entity.check_hasnt_children
|
30
30
|
|
@@ -1,14 +1,13 @@
|
|
1
1
|
# Maximum entropy tokenization supplied by OpenNLP.
|
2
2
|
class Treat::Workers::Processors::Tokenizers::OpenNlp
|
3
|
-
|
4
|
-
require 'open-nlp'
|
5
|
-
Treat::Loaders::OpenNLP.load
|
6
3
|
|
7
4
|
@@tokenizers = {}
|
8
5
|
|
9
6
|
# Maximum entropy tokenization.
|
10
7
|
def self.tokenize(entity, options = {})
|
11
8
|
|
9
|
+
Treat::Loaders::OpenNLP.load
|
10
|
+
|
12
11
|
lang = entity.language
|
13
12
|
str = entity.to_s
|
14
13
|
|
@@ -7,8 +7,6 @@
|
|
7
7
|
# single forward- and backward- quotes (`` and '') by default.
|
8
8
|
class Treat::Workers::Processors::Tokenizers::Stanford
|
9
9
|
|
10
|
-
Treat::Loaders::Stanford.load
|
11
|
-
|
12
10
|
# Default options for the tokenizer.
|
13
11
|
DefaultOptions = {
|
14
12
|
directional_quotes: false,
|
@@ -26,6 +24,7 @@ class Treat::Workers::Processors::Tokenizers::Stanford
|
|
26
24
|
# to attempt to get correct directional quotes,
|
27
25
|
# replacing "..." by ``...''. Off by default.
|
28
26
|
def self.tokenize(entity, options = {})
|
27
|
+
Treat::Loaders::Stanford.load
|
29
28
|
options = DefaultOptions.merge(options)
|
30
29
|
@@tokenizer ||= StanfordCoreNLP.load(:tokenize)
|
31
30
|
entity.check_hasnt_children
|
data/spec/sandbox.rb
CHANGED
@@ -18,8 +18,11 @@ Treat.core.verbosity.silence = false
|
|
18
18
|
|
19
19
|
include Treat::Core::DSL
|
20
20
|
|
21
|
-
|
22
|
-
|
21
|
+
Treat.core.language.default = 'german'
|
22
|
+
Treat.core.verbosity.debug
|
23
|
+
|
24
|
+
s = sentence 'Der wilde Kerl lebte in einem gelben Haus.'
|
25
|
+
s.do(:tokenize, :tag => :stanford, :parse => :stanford)
|
23
26
|
s.print_tree
|
24
27
|
|
25
28
|
=begin
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: treat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: schiphol
|