treat 2.0.5 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/treat/loaders/open_nlp.rb +1 -2
- data/lib/treat/loaders/stanford.rb +1 -2
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +1 -1
- data/lib/treat/workers/processors/segmenters/stanford.rb +2 -2
- data/lib/treat/workers/processors/tokenizers/open_nlp.rb +2 -3
- data/lib/treat/workers/processors/tokenizers/stanford.rb +1 -2
- data/spec/sandbox.rb +5 -2
- metadata +2 -2
@@ -3,9 +3,8 @@ require 'treat/loaders/bind_it'
|
|
3
3
|
# A helper class to load the CoreNLP package.
|
4
4
|
class Treat::Loaders::Stanford < Treat::Loaders::BindIt
|
5
5
|
|
6
|
-
require 'stanford-core-nlp'
|
7
|
-
|
8
6
|
def self.load(language = nil)
|
7
|
+
require 'stanford-core-nlp'
|
9
8
|
super(StanfordCoreNLP, :stanford, language)
|
10
9
|
end
|
11
10
|
|
data/lib/treat/version.rb
CHANGED
@@ -48,7 +48,7 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
48
48
|
isolated_token = entity.is_a?(Treat::Entities::Token)
|
49
49
|
|
50
50
|
@@taggers[lang].apply(t_list).each do |tok|
|
51
|
-
tokens[i].set(:tag, tok.tag)
|
51
|
+
tokens[i].set(:tag, tok.tag.split('-').first)
|
52
52
|
tokens[i].set(:tag_set,
|
53
53
|
options[:tag_set]) if isolated_token
|
54
54
|
return tok.tag if isolated_token
|
@@ -4,8 +4,6 @@
|
|
4
4
|
# obtained tokens are then grouped into sentences.
|
5
5
|
class Treat::Workers::Processors::Segmenters::Stanford
|
6
6
|
|
7
|
-
Treat::Loaders::Stanford.load
|
8
|
-
|
9
7
|
DefaultOptions = {
|
10
8
|
:also_tokenize => false
|
11
9
|
}
|
@@ -25,6 +23,8 @@ class Treat::Workers::Processors::Segmenters::Stanford
|
|
25
23
|
# add the tokens as children of the sentence.
|
26
24
|
def self.segment(entity, options = {})
|
27
25
|
|
26
|
+
Treat::Loaders::Stanford.load
|
27
|
+
|
28
28
|
options = DefaultOptions.merge(options)
|
29
29
|
entity.check_hasnt_children
|
30
30
|
|
@@ -1,14 +1,13 @@
|
|
1
1
|
# Maximum entropy tokenization supplied by OpenNLP.
|
2
2
|
class Treat::Workers::Processors::Tokenizers::OpenNlp
|
3
|
-
|
4
|
-
require 'open-nlp'
|
5
|
-
Treat::Loaders::OpenNLP.load
|
6
3
|
|
7
4
|
@@tokenizers = {}
|
8
5
|
|
9
6
|
# Maximum entropy tokenization.
|
10
7
|
def self.tokenize(entity, options = {})
|
11
8
|
|
9
|
+
Treat::Loaders::OpenNLP.load
|
10
|
+
|
12
11
|
lang = entity.language
|
13
12
|
str = entity.to_s
|
14
13
|
|
@@ -7,8 +7,6 @@
|
|
7
7
|
# single forward- and backward- quotes (`` and '') by default.
|
8
8
|
class Treat::Workers::Processors::Tokenizers::Stanford
|
9
9
|
|
10
|
-
Treat::Loaders::Stanford.load
|
11
|
-
|
12
10
|
# Default options for the tokenizer.
|
13
11
|
DefaultOptions = {
|
14
12
|
directional_quotes: false,
|
@@ -26,6 +24,7 @@ class Treat::Workers::Processors::Tokenizers::Stanford
|
|
26
24
|
# to attempt to get correct directional quotes,
|
27
25
|
# replacing "..." by ``...''. Off by default.
|
28
26
|
def self.tokenize(entity, options = {})
|
27
|
+
Treat::Loaders::Stanford.load
|
29
28
|
options = DefaultOptions.merge(options)
|
30
29
|
@@tokenizer ||= StanfordCoreNLP.load(:tokenize)
|
31
30
|
entity.check_hasnt_children
|
data/spec/sandbox.rb
CHANGED
@@ -18,8 +18,11 @@ Treat.core.verbosity.silence = false
|
|
18
18
|
|
19
19
|
include Treat::Core::DSL
|
20
20
|
|
21
|
-
|
22
|
-
|
21
|
+
Treat.core.language.default = 'german'
|
22
|
+
Treat.core.verbosity.debug
|
23
|
+
|
24
|
+
s = sentence 'Der wilde Kerl lebte in einem gelben Haus.'
|
25
|
+
s.do(:tokenize, :tag => :stanford, :parse => :stanford)
|
23
26
|
s.print_tree
|
24
27
|
|
25
28
|
=begin
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: treat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: schiphol
|