open-nlp 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -8
- data/bin/Utils.java +1 -0
- data/bin/utils.jar +0 -0
- data/lib/open-nlp.rb +20 -151
- data/lib/open-nlp/base.rb +49 -0
- data/lib/open-nlp/bindings.rb +133 -0
- data/lib/open-nlp/classes.rb +77 -0
- data/lib/open-nlp/config.rb +2 -2
- data/spec/english_spec.rb +24 -15
- metadata +34 -14
data/README.md
CHANGED
@@ -1,9 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
This is an alpha release. Expect things to break and/or change in the near future! Also, keep the following in mind:
|
4
|
-
|
5
|
-
- Currently, this gem is only tested on JRuby, but support for MRI through Rjb is coming very soon.
|
6
|
-
- Currently, the parser and chunker classes are not working.
|
1
|
+
[](http://travis-ci.org/louismullie/open-nlp)
|
7
2
|
|
8
3
|
**About**
|
9
4
|
|
@@ -13,9 +8,11 @@ This gem only provides a thin wrapper over the OpenNLP API. If you are looking f
|
|
13
8
|
|
14
9
|
**Installing**
|
15
10
|
|
11
|
+
_Note: If you are running on MRI, this gem will use the Ruby-Java Bridge (Rjb), which currently does not support Java 7. Therefore, if you have installed Java 7, you should set your JAVA_HOME to point to your old Java 6 install before installing Rjb; for example, `export "JAVA_HOME=/usr/lib/jvm/java-6-openjdk/"`.
|
12
|
+
|
16
13
|
First, install the gem: `gem install open-nlp`. Then, individually download the appropriate models from the [open-nlp website](http://opennlp.sourceforge.net/models-1.5/) or just get [all english language models](louismullie.com/treat/open-nlp-english.zip) in one package (80 MB).
|
17
14
|
|
18
|
-
Place the contents of the extracted archive inside the /bin/ folder of the
|
15
|
+
Place the contents of the extracted archive inside the /bin/ folder of the open-nlp gem (e.g. [...]/gems/open-nlp-0.x.x/bin/).
|
19
16
|
|
20
17
|
**Configuration**
|
21
18
|
|
@@ -43,7 +40,7 @@ OpenNLP.log_file = 'log.txt'
|
|
43
40
|
# OpenNLP.use(:french) # or :german
|
44
41
|
#
|
45
42
|
# Change a specific model file.
|
46
|
-
#
|
43
|
+
# OpenNLP.set_model('pos.model', 'english-left3words-distsim.tagger')
|
47
44
|
```
|
48
45
|
|
49
46
|
**Using the gem**
|
data/bin/Utils.java
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
import java.util.Arrays;
|
2
|
|
1
3
|
public static String[] tagWithArrayList(POSTagger posTagger, ArrayList[] objectArray) {
|
2
4
|
return posTagger.tag(getStringArray(objectArray));
|
3
5
|
}
|
4
6
|
public static Object[] findWithArrayList(NameFinderME nameFinder, ArrayList[] tokens) {
|
5
7
|
return nameFinder.find(getStringArray(tokens));
|
6
8
|
}
|
7
9
|
public static Object[] chunkWithArrays(ChunkerME chunker, ArrayList[] tokens, ArrayList[] tags) {
|
8
10
|
return chunker.chunk(getStringArray(tokens), getStringArray(tags));
|
9
11
|
}
|
10
12
|
public static String[] getStringArray(ArrayList[] objectArray) {
|
11
13
|
String[] stringArray = Arrays.copyOf(objectArray, objectArray.length, String[].class);
|
12
14
|
}
|
data/bin/utils.jar
ADDED
Binary file
|
data/lib/open-nlp.rb
CHANGED
@@ -1,162 +1,31 @@
|
|
1
1
|
module OpenNLP
|
2
|
-
|
3
|
-
# Library version.
|
4
|
-
VERSION = '0.0.1'
|
5
2
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
# ############################ #
|
10
|
-
# BindIt Configuration Options #
|
11
|
-
# ############################ #
|
12
|
-
|
13
|
-
require 'bind-it'
|
14
|
-
extend BindIt::Binding
|
15
|
-
|
16
|
-
# The path in which to look for JAR files, with
|
17
|
-
# a trailing slash (default is gem's bin folder).
|
18
|
-
self.jar_path = File.dirname(__FILE__) + '/../bin/'
|
19
|
-
|
20
|
-
# Load the JVM with a minimum heap size of 512MB,
|
21
|
-
# and a maximum heap size of 1024MB.
|
22
|
-
self.jvm_args = ['-Xms512M', '-Xmx1024M']
|
23
|
-
|
24
|
-
# Turn logging off by default.
|
25
|
-
self.log_file = nil
|
26
|
-
|
27
|
-
# Default JARs to load.
|
28
|
-
self.default_jars = [
|
29
|
-
'jwnl-1.3.3.jar',
|
30
|
-
'opennlp-tools-1.5.2-incubating.jar',
|
31
|
-
'opennlp-maxent-3.0.2-incubating.jar',
|
32
|
-
'opennlp-uima-1.5.2-incubating.jar'
|
33
|
-
]
|
34
|
-
|
35
|
-
# Default namespace.
|
36
|
-
self.default_namespace = 'opennlp.tools'
|
37
|
-
|
38
|
-
# Default classes.
|
39
|
-
self.default_classes = [
|
40
|
-
['DocumentCategorizerME', 'opennlp.tools.doccat'],
|
41
|
-
['ChunkerME', 'opennlp.tools.chunker'],
|
42
|
-
['DictionaryDetokenizer', 'opennlp.tools.tokenize'],
|
43
|
-
['NameFinderME', 'opennlp.tools.namefind'],
|
44
|
-
['Parse', 'opennlp.tools.parser'],
|
45
|
-
['ParserFactory', 'opennlp.tools.parser'],
|
46
|
-
['POSTaggerME', 'opennlp.tools.postag'],
|
47
|
-
['SentenceDetectorME', 'opennlp.tools.sentdetect'],
|
48
|
-
['SimpleTokenizer', 'opennlp.tools.tokenize'],
|
49
|
-
['TokenizerME', 'opennlp.tools.tokenize']
|
50
|
-
]
|
51
|
-
|
52
|
-
# Redefine the Bind-It class loader to redefine
|
53
|
-
# a new constructor for classes that require a model.
|
54
|
-
def self.load_klass(klass, base, name = nil)
|
55
|
-
super(klass,base,name)
|
56
|
-
requires_model = OpenNLP::Config::RequiresModel
|
57
|
-
return unless requires_model.include?(klass)
|
58
|
-
new_class = Class.new(const_get(klass)) do
|
59
|
-
def initialize(file = nil, *args)
|
60
|
-
klass = OpenNLP.last_name(self.class)
|
61
|
-
if !file && !OpenNLP.has_default_model?(klass)
|
62
|
-
raise 'This class intentionally has no default ' +
|
63
|
-
'model. Please supply a file name as an argument ' +
|
64
|
-
'to the class constructor.'
|
65
|
-
else
|
66
|
-
model = OpenNLP.get_model(klass, file)
|
67
|
-
super(*([model] + args))
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
remove_const(klass)
|
72
|
-
const_set(klass, new_class)
|
73
|
-
end
|
74
|
-
|
75
|
-
# Make the bindings.
|
76
|
-
self.bind
|
77
|
-
|
78
|
-
# Load utility classes.
|
79
|
-
self.load_class('FileInputStream', 'java.io')
|
80
|
-
|
81
|
-
# ############################ #
|
82
|
-
# OpenNLP bindings proper #
|
83
|
-
# ############################ #
|
3
|
+
# Library version.
|
4
|
+
VERSION = '0.1.0'
|
84
5
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
# A hash containing the names of loaded models.
|
89
|
-
attr_accessor :model_files
|
90
|
-
# The folder in which to look for models.
|
91
|
-
attr_accessor :model_path
|
92
|
-
# Store the language currently being used.
|
93
|
-
attr_accessor :language
|
94
|
-
end
|
95
|
-
|
96
|
-
# The loaded models.
|
97
|
-
self.models = {}
|
98
|
-
|
99
|
-
# The names of loaded models.
|
100
|
-
self.model_files = {}
|
101
|
-
|
102
|
-
# The path to the main folder containing the folders
|
103
|
-
# with the individual models inside. By default, this
|
104
|
-
# is the same as the JAR path.
|
105
|
-
self.model_path = self.jar_path
|
6
|
+
# Require Java bindings.
|
7
|
+
require 'open-nlp/bindings'
|
8
|
+
OpenNLP::Bindings.bind
|
106
9
|
|
107
|
-
#
|
108
|
-
|
10
|
+
# Require Ruby wrappers.
|
11
|
+
require 'open-nlp/classes'
|
109
12
|
|
110
|
-
#
|
111
|
-
|
112
|
-
|
13
|
+
# Load a Java class into the OpenNLP
|
14
|
+
# namespace (e.g. OpenNLP::Loaded).
|
15
|
+
def load_class(*args)
|
16
|
+
OpenNLP::Bindings.load_class(*args)
|
113
17
|
end
|
114
18
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
def self.has_default_model?(klass)
|
120
|
-
name = OpenNLP::Config::ClassToName[klass]
|
121
|
-
if !OpenNLP::Config::DefaultModels[name]
|
122
|
-
raise 'No default model files are available ' +
|
123
|
-
"for the class #{klass}. Please supply a model " +
|
124
|
-
'as an argument to the constructor.'
|
125
|
-
end
|
126
|
-
!OpenNLP::Config::DefaultModels[name].empty?
|
19
|
+
# Forwards the handling of missing
|
20
|
+
# constants to the Bindings class.
|
21
|
+
def const_missing(const)
|
22
|
+
OpenNLP::Bindings.const_get(const)
|
127
23
|
end
|
128
24
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
'constructor. Please supply a model file ' +
|
134
|
-
'or call OpenNLP.use(:some_language), to ' +
|
135
|
-
'load the default models for a language.'
|
136
|
-
end
|
137
|
-
OpenNLP.load_model(name, file)
|
138
|
-
model = OpenNLP.models[name]
|
139
|
-
end
|
140
|
-
|
141
|
-
def self.load_model(name, file = nil)
|
142
|
-
if self.models[name] && file ==
|
143
|
-
self.model_files[name]
|
144
|
-
return self.models[name]
|
145
|
-
end
|
146
|
-
models = Config::DefaultModels[name]
|
147
|
-
file ||= models[self.language]
|
148
|
-
path = self.model_path + file
|
149
|
-
stream = FileInputStream.new(path)
|
150
|
-
klass = Config::NameToClass[name]
|
151
|
-
load_class(*klass)
|
152
|
-
klass = const_get(klass[0])
|
153
|
-
model = klass.new(stream)
|
154
|
-
self.model_files[name] = file
|
155
|
-
self.models[name] = model
|
25
|
+
# Forward the handling of missing
|
26
|
+
# methods to the Bindings class.
|
27
|
+
def method_missing(sym, *args, &block)
|
28
|
+
OpenNLP::Bindings.send(sym, *args, &block)
|
156
29
|
end
|
157
30
|
|
158
|
-
|
159
|
-
klass.to_s.split('::')[-1]
|
160
|
-
end
|
161
|
-
|
162
|
-
end
|
31
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class OpenNLP::Base
|
2
|
+
|
3
|
+
def initialize(file_or_arg=nil, *args)
|
4
|
+
|
5
|
+
@proxy_class = OpenNLP::Bindings.const_get(last_name)
|
6
|
+
|
7
|
+
if requires_model?
|
8
|
+
if !file_or_arg && !has_default_model?
|
9
|
+
raise "No default model files are available for " +
|
10
|
+
"class #{last_name}. Please supply a model as" +
|
11
|
+
"an argument to the constructor."
|
12
|
+
end
|
13
|
+
@model = OpenNLP::Bindings.get_model(last_name, file_or_arg)
|
14
|
+
@proxy_inst = @proxy_class.new(*([@model] + args))
|
15
|
+
else
|
16
|
+
@proxy_inst = @proxy_class.new(*([*file_or_arg] + args))
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
def has_default_model?
|
22
|
+
name = OpenNLP::Config::ClassToName[last_name]
|
23
|
+
!OpenNLP::Config::DefaultModels[name].empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
def requires_model?
|
27
|
+
OpenNLP::Config::RequiresModel.include?(last_name)
|
28
|
+
end
|
29
|
+
|
30
|
+
def last_name
|
31
|
+
self.class.to_s.split('::')[-1]
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def method_missing(sym, *args, &block)
|
36
|
+
@proxy_inst.send(sym, *args, &block)
|
37
|
+
end
|
38
|
+
|
39
|
+
protected
|
40
|
+
|
41
|
+
def get_list(tokens)
|
42
|
+
list = OpenNLP::Bindings::ArrayList.new
|
43
|
+
tokens.each do |t|
|
44
|
+
list.add(OpenNLP::Bindings::String.new(t.to_s))
|
45
|
+
end
|
46
|
+
list
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module OpenNLP::Bindings
|
2
|
+
|
3
|
+
# Require configuration.
|
4
|
+
require 'open-nlp/config'
|
5
|
+
|
6
|
+
# ############################ #
|
7
|
+
# BindIt Configuration Options #
|
8
|
+
# ############################ #
|
9
|
+
|
10
|
+
require 'bind-it'
|
11
|
+
extend BindIt::Binding
|
12
|
+
|
13
|
+
# The path in which to look for JAR files, with
|
14
|
+
# a trailing slash (default is gem's bin folder).
|
15
|
+
self.jar_path = File.dirname(__FILE__) + '/../../bin/'
|
16
|
+
|
17
|
+
# Load the JVM with a minimum heap size of 512MB,
|
18
|
+
# and a maximum heap size of 1024MB.
|
19
|
+
self.jvm_args = ['-Xms512M', '-Xmx1024M']
|
20
|
+
|
21
|
+
# Turn logging off by default.
|
22
|
+
self.log_file = nil
|
23
|
+
|
24
|
+
# Default JARs to load.
|
25
|
+
self.default_jars = [
|
26
|
+
'jwnl-1.3.3.jar',
|
27
|
+
'opennlp-tools-1.5.2-incubating.jar',
|
28
|
+
'opennlp-maxent-3.0.2-incubating.jar',
|
29
|
+
'opennlp-uima-1.5.2-incubating.jar'
|
30
|
+
]
|
31
|
+
|
32
|
+
# Default namespace.
|
33
|
+
self.default_namespace = 'opennlp.tools'
|
34
|
+
|
35
|
+
# Default classes.
|
36
|
+
self.default_classes = [
|
37
|
+
['AbstractBottomUpParser', 'opennlp.tools.parser'],
|
38
|
+
['DocumentCategorizerME', 'opennlp.tools.doccat'],
|
39
|
+
['ChunkerME', 'opennlp.tools.chunker'],
|
40
|
+
['DictionaryDetokenizer', 'opennlp.tools.tokenize'],
|
41
|
+
['NameFinderME', 'opennlp.tools.namefind'],
|
42
|
+
['Parser', 'opennlp.tools.parser.chunking'],
|
43
|
+
['Parse', 'opennlp.tools.parser'],
|
44
|
+
['ParserFactory', 'opennlp.tools.parser'],
|
45
|
+
['POSTaggerME', 'opennlp.tools.postag'],
|
46
|
+
['SentenceDetectorME', 'opennlp.tools.sentdetect'],
|
47
|
+
['SimpleTokenizer', 'opennlp.tools.tokenize'],
|
48
|
+
['Span', 'opennlp.tools.util'],
|
49
|
+
['TokenizerME', 'opennlp.tools.tokenize']
|
50
|
+
]
|
51
|
+
|
52
|
+
# Add in Rjb workarounds.
|
53
|
+
unless RUBY_PLATFORM =~ /java/
|
54
|
+
self.default_jars << 'utils.jar'
|
55
|
+
self.default_classes << ['Utils', '']
|
56
|
+
end
|
57
|
+
|
58
|
+
# Make the bindings.
|
59
|
+
self.bind
|
60
|
+
|
61
|
+
# Load utility classes.
|
62
|
+
self.load_class('FileInputStream', 'java.io')
|
63
|
+
self.load_class('String', 'java.lang')
|
64
|
+
self.load_class('ArrayList', 'java.util')
|
65
|
+
|
66
|
+
# ############################ #
|
67
|
+
# OpenNLP bindings proper #
|
68
|
+
# ############################ #
|
69
|
+
|
70
|
+
class <<self
|
71
|
+
# A hash containing loaded models.
|
72
|
+
attr_accessor :models
|
73
|
+
# A hash containing the names of loaded models.
|
74
|
+
attr_accessor :model_files
|
75
|
+
# The folder in which to look for models.
|
76
|
+
attr_accessor :model_path
|
77
|
+
# Store the language currently being used.
|
78
|
+
attr_accessor :language
|
79
|
+
end
|
80
|
+
|
81
|
+
# The loaded models.
|
82
|
+
self.models = {}
|
83
|
+
|
84
|
+
# The names of loaded models.
|
85
|
+
self.model_files = {}
|
86
|
+
|
87
|
+
# The path to the main folder containing the folders
|
88
|
+
# with the individual models inside. By default, this
|
89
|
+
# is the same as the JAR path.
|
90
|
+
self.model_path = self.jar_path
|
91
|
+
|
92
|
+
# Default the language to English.
|
93
|
+
self.language = :english
|
94
|
+
|
95
|
+
# Use a given language for default models.
|
96
|
+
def self.use(language)
|
97
|
+
self.language = language
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.get_model(klass, file=nil)
|
101
|
+
name = OpenNLP::Config::ClassToName[klass]
|
102
|
+
if !self.language and !file
|
103
|
+
raise 'No model file was supplied to the ' +
|
104
|
+
'constructor. Please supply a model file ' +
|
105
|
+
'or call OpenNLP.use(:some_language), to ' +
|
106
|
+
'load the default models for a language.'
|
107
|
+
end
|
108
|
+
self.load_model(name, file)
|
109
|
+
model = self.models[name]
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.set_model
|
113
|
+
raise 'Not implemented.'
|
114
|
+
end
|
115
|
+
|
116
|
+
def self.load_model(name, file = nil)
|
117
|
+
if self.models[name] && file ==
|
118
|
+
self.model_files[name]
|
119
|
+
return self.models[name]
|
120
|
+
end
|
121
|
+
models = OpenNLP::Config::DefaultModels[name]
|
122
|
+
file ||= models[self.language]
|
123
|
+
path = self.model_path + file
|
124
|
+
stream = FileInputStream.new(path)
|
125
|
+
klass = OpenNLP::Config::NameToClass[name]
|
126
|
+
load_class(*klass) unless const_defined?(klass[0])
|
127
|
+
klass = const_get(klass[0])
|
128
|
+
model = klass.new(stream)
|
129
|
+
self.model_files[name] = file
|
130
|
+
self.models[name] = model
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'open-nlp/base'
|
2
|
+
|
3
|
+
class OpenNLP::SentenceDetectorME < OpenNLP::Base; end
|
4
|
+
|
5
|
+
class OpenNLP::SimpleTokenizer < OpenNLP::Base; end
|
6
|
+
|
7
|
+
class OpenNLP::TokenizerME < OpenNLP::Base; end
|
8
|
+
|
9
|
+
class OpenNLP::POSTaggerME < OpenNLP::Base
|
10
|
+
|
11
|
+
unless RUBY_PLATFORM =~ /java/
|
12
|
+
def tag(*args)
|
13
|
+
OpenNLP::Bindings::Utils.tagWithArrayList(@proxy_inst, args[0])
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
class OpenNLP::ChunkerME < OpenNLP::Base
|
20
|
+
|
21
|
+
if RUBY_PLATFORM =~ /java/
|
22
|
+
|
23
|
+
def chunk(tokens, tags)
|
24
|
+
if !tokens.is_a?(Array)
|
25
|
+
tokens = tokens.to_a
|
26
|
+
tags = tags.to_a
|
27
|
+
end
|
28
|
+
tokens = tokens.to_java(:String)
|
29
|
+
tags = tags.to_java(:String)
|
30
|
+
@proxy_inst.chunk(tokens,tags).to_a
|
31
|
+
end
|
32
|
+
|
33
|
+
else
|
34
|
+
|
35
|
+
def chunk(tokens, tags)
|
36
|
+
chunks = OpenNLP::Bindings::Utils.chunkWithArrays(@proxy_inst, tokens,tags)
|
37
|
+
chunks.map { |c| c.to_s }
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
class OpenNLP::Parser < OpenNLP::Base
|
45
|
+
|
46
|
+
def parse(text)
|
47
|
+
|
48
|
+
tokenizer = OpenNLP::TokenizerME.new
|
49
|
+
full_span = OpenNLP::Bindings::Span.new(0, text.size)
|
50
|
+
|
51
|
+
parse_obj = OpenNLP::Bindings::Parse.new(
|
52
|
+
text, full_span, "INC", 1, 0)
|
53
|
+
|
54
|
+
tokens = tokenizer.tokenize_pos(text)
|
55
|
+
|
56
|
+
tokens.each_with_index do |tok,i|
|
57
|
+
start, stop = tok.get_start, tok.get_end
|
58
|
+
token = text[start..stop-1]
|
59
|
+
span = OpenNLP::Bindings::Span.new(start, stop)
|
60
|
+
parse = OpenNLP::Bindings::Parse.new(text, span, "TK", 0, i)
|
61
|
+
parse_obj.insert(parse)
|
62
|
+
end
|
63
|
+
|
64
|
+
@proxy_inst.parse(parse_obj)
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
class OpenNLP::NameFinderME < OpenNLP::Base
|
71
|
+
unless RUBY_PLATFORM =~ /java/
|
72
|
+
def find(*args)
|
73
|
+
OpenNLP::Bindings::Utils
|
74
|
+
.findWithArrayList(@proxy_inst, args[0])
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/open-nlp/config.rb
CHANGED
@@ -17,7 +17,7 @@ module OpenNLP::Config
|
|
17
17
|
'DocumentCategorizerME' => :categorizer,
|
18
18
|
'NameFinderME' => :name_finder,
|
19
19
|
'POSTaggerME' => :pos_tagger,
|
20
|
-
'
|
20
|
+
'Parser' => :parser,
|
21
21
|
'SentenceDetectorME' => :sentence_detector,
|
22
22
|
'TokenizerME' => :tokenizer,
|
23
23
|
}
|
@@ -48,7 +48,7 @@ module OpenNLP::Config
|
|
48
48
|
# Classes that require a model as first argument to constructor.
|
49
49
|
RequiresModel = [
|
50
50
|
'SentenceDetectorME', 'NameFinderME', 'DictionaryDetokenizer',
|
51
|
-
'TokenizerME', 'ChunkerME', 'POSTaggerME'
|
51
|
+
'TokenizerME', 'ChunkerME', 'POSTaggerME', 'Parser'
|
52
52
|
]
|
53
53
|
|
54
54
|
|
data/spec/english_spec.rb
CHANGED
@@ -3,7 +3,6 @@ require_relative 'spec_helper'
|
|
3
3
|
|
4
4
|
describe OpenNLP do
|
5
5
|
|
6
|
-
# Failing spec #1
|
7
6
|
context "the maximum entropy chunker is run after tokenization and POS tagging" do
|
8
7
|
it "should find the accurate chunks" do
|
9
8
|
|
@@ -15,28 +14,38 @@ describe OpenNLP do
|
|
15
14
|
tokens = tokenizer.tokenize(sent)
|
16
15
|
tags = tagger.tag(tokens)
|
17
16
|
|
18
|
-
chunks = chunker.chunk(tokens
|
19
|
-
|
20
|
-
|
17
|
+
chunks = chunker.chunk(tokens, tags)
|
18
|
+
|
19
|
+
chunks.to_a.should eql %w[B-NP I-NP B-PP B-NP I-NP B-VP I-VP B-PP B-NP I-NP O]
|
21
20
|
tokens.to_a.should eql %w[The death of the poet was kept from his poems .]
|
22
|
-
tags.should eql [
|
21
|
+
tags.to_a.should eql %w[DT NN IN DT NN VBD VBN IN PRP$ NNS .]
|
23
22
|
|
24
23
|
end
|
25
24
|
end
|
26
25
|
|
27
|
-
# Failing spec #2
|
28
26
|
context "the maximum entropy parser is run after tokenization" do
|
29
27
|
it "parses the text accurately" do
|
28
|
+
|
30
29
|
sent = "The death of the poet was kept from his poems."
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
30
|
+
parser = OpenNLP::Parser.new
|
31
|
+
parse = parser.parse(sent)
|
32
|
+
|
33
|
+
parse.get_text.should eql sent
|
34
|
+
|
35
|
+
parse.get_span.get_start.should eql 0
|
36
|
+
parse.get_span.get_end.should eql 46
|
37
|
+
parse.get_span.get_type.should eql nil # ?
|
38
|
+
parse.get_child_count.should eql 1
|
39
|
+
|
40
|
+
child = parse.get_children[0]
|
41
|
+
|
42
|
+
child.text.should eql "The death of the poet was kept from his poems."
|
43
|
+
child.get_child_count.should eql 3
|
44
|
+
child.get_head_index.should eql 5
|
45
|
+
|
46
|
+
child.get_head.get_child_count.should eql 1
|
47
|
+
child.get_type.should eql "S"
|
48
|
+
|
40
49
|
end
|
41
50
|
end
|
42
51
|
|
metadata
CHANGED
@@ -1,32 +1,48 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: open-nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Louis Mullie
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bind-it
|
16
|
-
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
17
18
|
requirements:
|
18
19
|
- - ! '>='
|
19
20
|
- !ruby/object:Gem::Version
|
20
21
|
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
25
|
none: false
|
22
|
-
requirement: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - ! '>='
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rspec
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
27
33
|
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
28
39
|
prerelease: false
|
29
|
-
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
30
46
|
description: ! ' Ruby bindings to the OpenNLP tools, a Java machine learning toolkit
|
31
47
|
for natural language processing (NLP). '
|
32
48
|
email:
|
@@ -39,8 +55,13 @@ files:
|
|
39
55
|
- bin/opennlp-maxent-3.0.2-incubating.jar
|
40
56
|
- bin/opennlp-tools-1.5.2-incubating.jar
|
41
57
|
- bin/opennlp-uima-1.5.2-incubating.jar
|
42
|
-
-
|
58
|
+
- bin/utils.jar
|
59
|
+
- bin/Utils.java
|
60
|
+
- lib/open-nlp/base.rb
|
61
|
+
- lib/open-nlp/bindings.rb
|
62
|
+
- lib/open-nlp/classes.rb
|
43
63
|
- lib/open-nlp/config.rb
|
64
|
+
- lib/open-nlp.rb
|
44
65
|
- spec/english_spec.rb
|
45
66
|
- spec/sample.txt
|
46
67
|
- spec/spec_helper.rb
|
@@ -48,27 +69,26 @@ files:
|
|
48
69
|
- LICENSE
|
49
70
|
homepage: https://github.com/louismullie/open-nlp
|
50
71
|
licenses: []
|
51
|
-
post_install_message:
|
72
|
+
post_install_message:
|
52
73
|
rdoc_options: []
|
53
74
|
require_paths:
|
54
75
|
- lib
|
55
76
|
required_ruby_version: !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
56
78
|
requirements:
|
57
79
|
- - ! '>='
|
58
80
|
- !ruby/object:Gem::Version
|
59
81
|
version: '0'
|
60
|
-
none: false
|
61
82
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
62
84
|
requirements:
|
63
85
|
- - ! '>='
|
64
86
|
- !ruby/object:Gem::Version
|
65
87
|
version: '0'
|
66
|
-
none: false
|
67
88
|
requirements: []
|
68
|
-
rubyforge_project:
|
89
|
+
rubyforge_project:
|
69
90
|
rubygems_version: 1.8.24
|
70
|
-
signing_key:
|
91
|
+
signing_key:
|
71
92
|
specification_version: 3
|
72
93
|
summary: Ruby bindings to the OpenNLP Java toolkit.
|
73
94
|
test_files: []
|
74
|
-
...
|