stanford-core-nlp-abstractor 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +18 -0
- data/README.md +208 -0
- data/bin/AnnotationBridge.java +22 -0
- data/bin/bridge.jar +0 -0
- data/lib/stanford-core-nlp.rb +238 -0
- data/lib/stanford-core-nlp/bridge.rb +57 -0
- data/lib/stanford-core-nlp/config.rb +392 -0
- metadata +96 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: aaef5a50a17996fda21974a9c41edcacba2b27f1
|
4
|
+
data.tar.gz: a16c9702bf92e93d69e66be79d0743d1df98f020
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9e0061a31396b5564500e8264a4b68f5d57f8fb62e943838feb843018178bc6ec6047a9aca218a5727d34c6a52efa7f843ee897ec433e005ec59520a7be41b20
|
7
|
+
data.tar.gz: 866e1c5ab898820a181e25f495136ec4a57e8bcc6b7a0d8d870aee452c049de54b8f5a3b5338e072cc09f3e454ccedbd7e24c9a73c3f3b16c116ccd6864bca25
|
data/LICENSE
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
Ruby bindings for the Stanford CoreNLP package
|
2
|
+
|
3
|
+
This program is free software: you can redistribute it and/or modify
|
4
|
+
it under the terms of the GNU General Public License as published by
|
5
|
+
the Free Software Foundation, either version 3 of the License, or
|
6
|
+
(at your option) any later version.
|
7
|
+
|
8
|
+
This program is distributed in the hope that it will be useful,
|
9
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
GNU General Public License for more details.
|
12
|
+
|
13
|
+
This license also applies to the included Stanford CoreNLP files.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
Author: Louis-Antoine Mullie (louis.mullie@gmail.com). Copyright 2012.
|
data/README.md
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
[![Build Status](https://secure.travis-ci.org/louismullie/stanford-core-nlp.png)](http://travis-ci.org/louismullie/stanford-core-nlp)
|
2
|
+
|
3
|
+
**About**
|
4
|
+
|
5
|
+
This gem provides high-level Ruby bindings to the [Stanford Core NLP package](http://nlp.stanford.edu/software/corenlp.shtml), a set natural language processing tools for tokenization, sentence segmentation, part-of-speech tagging, lemmatization, and parsing of English, French and German. The package also provides named entity recognition and coreference resolution for English.
|
6
|
+
|
7
|
+
This gem is compatible with Ruby 1.9.2 and 1.9.3 as well as JRuby 1.7.1. It is tested on both Java 6 and Java 7.
|
8
|
+
|
9
|
+
**Installing**
|
10
|
+
|
11
|
+
First, install the gem: `gem install stanford-core-nlp`. Then, download the Stanford Core NLP JAR and model files. Two packages are available:
|
12
|
+
|
13
|
+
* A [minimal package](http://louismullie.com/treat/stanford-core-nlp-minimal.zip) with the default tagger and parser models for English, French and German.
|
14
|
+
* A [full package](http://louismullie.com/treat/stanford-core-nlp-full.zip), with all of the tagger and parser models for English, French and German, as well as named entity and coreference resolution models for English.
|
15
|
+
|
16
|
+
Place the contents of the extracted archive inside the /bin/ folder of the stanford-core-nlp gem (e.g. [...]/gems/stanford-core-nlp-0.x/bin/).
|
17
|
+
|
18
|
+
**Configuration**
|
19
|
+
|
20
|
+
You may want to set some optional configuration options. Here are some examples:
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
# Set an alternative path to look for the JAR files
|
24
|
+
# Default is gem's bin folder.
|
25
|
+
StanfordCoreNLP.jar_path = '/path_to_jars/'
|
26
|
+
|
27
|
+
# Set an alternative path to look for the model files
|
28
|
+
# Default is gem's bin folder.
|
29
|
+
StanfordCoreNLP.model_path = '/path_to_models/'
|
30
|
+
|
31
|
+
# Pass some alternative arguments to the Java VM.
|
32
|
+
# Default is ['-Xms512M', '-Xmx1024M'] (be prepared
|
33
|
+
# to take a coffee break).
|
34
|
+
StanfordCoreNLP.jvm_args = ['-option1', '-option2']
|
35
|
+
|
36
|
+
# Redirect VM output to log.txt
|
37
|
+
StanfordCoreNLP.log_file = 'log.txt'
|
38
|
+
|
39
|
+
# Change a specific model file.
|
40
|
+
StanfordCoreNLP.set_model('pos.model', 'english-left3words-distsim.tagger')
|
41
|
+
```
|
42
|
+
|
43
|
+
**Using the gem**
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
# Use the model files for a different language than English.
|
47
|
+
StanfordCoreNLP.use :french # or :german
|
48
|
+
|
49
|
+
text = 'Angela Merkel met Nicolas Sarkozy on January 25th in ' +
|
50
|
+
'Berlin to discuss a new austerity package. Sarkozy ' +
|
51
|
+
'looked pleased, but Merkel was dismayed.'
|
52
|
+
|
53
|
+
pipeline = StanfordCoreNLP.load(:tokenize, :ssplit, :pos, :lemma, :parse, :ner, :dcoref)
|
54
|
+
text = StanfordCoreNLP::Annotation.new(text)
|
55
|
+
pipeline.annotate(text)
|
56
|
+
|
57
|
+
text.get(:sentences).each do |sentence|
|
58
|
+
# Syntatical dependencies
|
59
|
+
puts sentence.get(:basic_dependencies).to_s
|
60
|
+
sentence.get(:tokens).each do |token|
|
61
|
+
# Default annotations for all tokens
|
62
|
+
puts token.get(:value).to_s
|
63
|
+
puts token.get(:original_text).to_s
|
64
|
+
puts token.get(:character_offset_begin).to_s
|
65
|
+
puts token.get(:character_offset_end).to_s
|
66
|
+
# POS returned by the tagger
|
67
|
+
puts token.get(:part_of_speech).to_s
|
68
|
+
# Lemma (base form of the token)
|
69
|
+
puts token.get(:lemma).to_s
|
70
|
+
# Named entity tag
|
71
|
+
puts token.get(:named_entity_tag).to_s
|
72
|
+
# Coreference
|
73
|
+
puts token.get(:coref_cluster_id).to_s
|
74
|
+
# Also of interest: coref, coref_chain,
|
75
|
+
# coref_cluster, coref_dest, coref_graph.
|
76
|
+
end
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
> Important: You need to load the StanfordCoreNLP pipeline before using the StanfordCoreNLP::Annotation class.
|
81
|
+
|
82
|
+
The Ruby symbol (e.g. `:named_entity_tag`) corresponding to a Java annotation class is the `snake_case` of the class name, with 'Annotation' at the end removed. For example, `NamedEntityTagAnnotation` translates to `:named_entity_tag`, `PartOfSpeechAnnotation` to `:part_of_speech`, etc.
|
83
|
+
|
84
|
+
A good reference for names of annotations are the Stanford Javadocs for [CoreAnnotations](http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/ling/CoreAnnotations.html), [CoreCorefAnnotations](http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/dcoref/CorefCoreAnnotations.html), and [TreeCoreAnnotations](http://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/trees/TreeCoreAnnotations.html). For a full list of all possible annotations, see the `config.rb` file inside the gem.
|
85
|
+
|
86
|
+
|
87
|
+
**Loading specific classes**
|
88
|
+
|
89
|
+
You may want to load additional Java classes (including any class from the Stanford NLP packages). The gem provides an API for this:
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
# Default base class is edu.stanford.nlp.pipeline.
|
93
|
+
StanfordCoreNLP.load_class('PTBTokenizerAnnotator')
|
94
|
+
puts StanfordCoreNLP::PTBTokenizerAnnotator.inspect
|
95
|
+
# => #<Rjb::Edu_stanford_nlp_pipeline_PTBTokenizerAnnotator>
|
96
|
+
|
97
|
+
# Here, we specify another base class.
|
98
|
+
StanfordCoreNLP.load_class('MaxentTagger', 'edu.stanford.nlp.tagger')
|
99
|
+
puts StanfordCoreNLP::MaxentTagger.inspect
|
100
|
+
# => <Rjb::Edu_stanford_nlp_tagger_maxent_MaxentTagger:0x007f88491e2020>
|
101
|
+
```
|
102
|
+
|
103
|
+
**List of annotator classes**
|
104
|
+
|
105
|
+
Here is a full list of annotator classes provided by the Stanford Core NLP package. You can load these classes individually using `StanfordCoreNLP.load_class` (see above). Once this is done, you can use them like you would from a Java program. Refer to the Java documentation for a list of functions provided by each of these classes.
|
106
|
+
|
107
|
+
* PTBTokenizerAnnotator - tokenizes the text following Penn Treebank conventions.
|
108
|
+
* WordToSentenceAnnotator - splits a sequence of words into a sequence of sentences.
|
109
|
+
* POSTaggerAnnotator - annotates the text with part-of-speech tags.
|
110
|
+
* MorphaAnnotator - morphological normalizer (generates lemmas).
|
111
|
+
* NERAnnotator - annotates the text with named-entity labels.
|
112
|
+
* NERCombinerAnnotator - combines several NER models.
|
113
|
+
* TrueCaseAnnotator - detects the true case of words in free text.
|
114
|
+
* ParserAnnotator - generates constituent and dependency trees.
|
115
|
+
* NumberAnnotator - recognizes numerical entities such as numbers, money, times, and dates.
|
116
|
+
* TimeWordAnnotator - recognizes common temporal expressions, such as "teatime".
|
117
|
+
* QuantifiableEntityNormalizingAnnotator - normalizes the content of all numerical entities.
|
118
|
+
* SRLAnnotator - annotates predicates and their semantic roles.
|
119
|
+
* DeterministicCorefAnnotator - implements anaphora resolution using a deterministic model.
|
120
|
+
* NFLAnnotator - implements entity and relation mention extraction for the NFL domain.
|
121
|
+
|
122
|
+
**List of model files**
|
123
|
+
|
124
|
+
Here is a full list of the default models for the Stanford Core NLP pipeline. You can change these models individually using `StanfordCoreNLP.set_model` (see above).
|
125
|
+
|
126
|
+
* 'pos.model' - 'english-left3words-distsim.tagger'
|
127
|
+
* 'ner.model' - 'all.3class.distsim.crf.ser.gz'
|
128
|
+
* 'parse.model' - 'englishPCFG.ser.gz'
|
129
|
+
* 'dcoref.demonym' - 'demonyms.txt'
|
130
|
+
* 'dcoref.animate' - 'animate.unigrams.txt'
|
131
|
+
* 'dcoref.female' - 'female.unigrams.txt'
|
132
|
+
* 'dcoref.inanimate' - 'inanimate.unigrams.txt'
|
133
|
+
* 'dcoref.male' - 'male.unigrams.txt'
|
134
|
+
* 'dcoref.neutral' - 'neutral.unigrams.txt'
|
135
|
+
* 'dcoref.plural' - 'plural.unigrams.txt'
|
136
|
+
* 'dcoref.singular' - 'singular.unigrams.txt'
|
137
|
+
* 'dcoref.states' - 'state-abbreviations.txt'
|
138
|
+
* 'dcoref.extra.gender' - 'namegender.combine.txt'
|
139
|
+
|
140
|
+
**Testing**
|
141
|
+
|
142
|
+
To run the specs for each language (after copying the JARs into the `bin` folder):
|
143
|
+
|
144
|
+
rake spec[english]
|
145
|
+
rake spec[german]
|
146
|
+
rake spec[french]
|
147
|
+
|
148
|
+
**Using the latest version of the Stanford CoreNLP**
|
149
|
+
|
150
|
+
Using the latest version of the Stanford CoreNLP (version 3.5.0 as of 31/10/2014) requires some additional manual steps:
|
151
|
+
|
152
|
+
* Download [Stanford CoreNLP version 3.5.0](http://nlp.stanford.edu/software/stanford-corenlp-full-2014-10-31.zip) from http://nlp.stanford.edu/.
|
153
|
+
* Place the contents of the extracted archive inside the /bin/ folder of the stanford-core-nlp gem (e.g. [...]/gems/stanford-core-nlp-0.x/bin/) or inside the directory location configured by setting StanfordCoreNLP.jar_path.
|
154
|
+
* Download [the full Stanford Tagger version 3.5.0](http://nlp.stanford.edu/software/stanford-postagger-full-2014-10-26.zip) from http://nlp.stanford.edu/.
|
155
|
+
* Make a directory named 'taggers' inside the /bin/ folder of the stanford-core-nlp gem (e.g. [...]/gems/stanford-core-nlp-0.x/bin/) or inside the directory configured by setting StanfordCoreNLP.jar_path.
|
156
|
+
* Place the contents of the extracted archive inside taggers directory.
|
157
|
+
* Download [the bridge.jar file](https://github.com/louismullie/stanford-core-nlp/blob/master/bin/bridge.jar?raw=true) from https://github.com/louismullie/stanford-core-nlp.
|
158
|
+
* Place the downloaded bridger.jar file inside the /bin/ folder of the stanford-core-nlp gem (e.g. [...]/gems/stanford-core-nlp-0.x/bin/taggers/) or inside the directory configured by setting StanfordCoreNLP.jar_path.
|
159
|
+
* Configure your setup (for English) as follows:
|
160
|
+
```ruby
|
161
|
+
StanfordCoreNLP.use :english
|
162
|
+
StanfordCoreNLP.model_files = {}
|
163
|
+
StanfordCoreNLP.default_jars = [
|
164
|
+
'joda-time.jar',
|
165
|
+
'xom.jar',
|
166
|
+
'stanford-corenlp-3.5.0.jar',
|
167
|
+
'stanford-corenlp-3.5.0-models.jar',
|
168
|
+
'jollyday.jar',
|
169
|
+
'bridge.jar'
|
170
|
+
]
|
171
|
+
end
|
172
|
+
```
|
173
|
+
Or configure your setup (for French) as follows:
|
174
|
+
```ruby
|
175
|
+
StanfordCoreNLP.use :french
|
176
|
+
StanfordCoreNLP.model_files = {}
|
177
|
+
StanfordCoreNLP.set_model('pos.model', 'french.tagger')
|
178
|
+
StanfordCoreNLP.default_jars = [
|
179
|
+
'joda-time.jar',
|
180
|
+
'xom.jar',
|
181
|
+
'stanford-corenlp-3.5.0.jar',
|
182
|
+
'stanford-corenlp-3.5.0-models.jar',
|
183
|
+
'jollyday.jar',
|
184
|
+
'bridge.jar'
|
185
|
+
]
|
186
|
+
end
|
187
|
+
```
|
188
|
+
Or configure your setup (for German) as follows:
|
189
|
+
```ruby
|
190
|
+
StanfordCoreNLP.use :german
|
191
|
+
StanfordCoreNLP.model_files = {}
|
192
|
+
StanfordCoreNLP.set_model('pos.model', 'german-fast.tagger')
|
193
|
+
StanfordCoreNLP.default_jars = [
|
194
|
+
'joda-time.jar',
|
195
|
+
'xom.jar',
|
196
|
+
'stanford-corenlp-3.5.0.jar',
|
197
|
+
'stanford-corenlp-3.5.0-models.jar',
|
198
|
+
'jollyday.jar',
|
199
|
+
'bridge.jar'
|
200
|
+
]
|
201
|
+
end
|
202
|
+
```
|
203
|
+
**Contributing**
|
204
|
+
|
205
|
+
Simple.
|
206
|
+
|
207
|
+
1. Fork the project.
|
208
|
+
2. Send me a pull request!
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import edu.stanford.nlp.ling.CoreAnnotation;
|
2
|
+
import edu.stanford.nlp.util.ArrayCoreMap;
|
3
|
+
import java.util.Properties;
|
4
|
+
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
|
5
|
+
|
6
|
+
// export JAVA_HOME='/System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Home'
|
7
|
+
// javac -cp '.:stanford-corenlp.jar' AnnotationBridge.java
|
8
|
+
// jar cf bridge.jar AnnotationBridge.class
|
9
|
+
public class AnnotationBridge {
|
10
|
+
|
11
|
+
public static Object getAnnotation(Object entity, String name) throws ClassNotFoundException {
|
12
|
+
Class<CoreAnnotation> klass;
|
13
|
+
klass = (Class<CoreAnnotation>) Class.forName(name);
|
14
|
+
Object object = ((ArrayCoreMap) entity).get(klass);
|
15
|
+
return object;
|
16
|
+
}
|
17
|
+
|
18
|
+
public static Object getPipelineWithProperties(Properties properties) {
|
19
|
+
StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);
|
20
|
+
return pipeline;
|
21
|
+
}
|
22
|
+
}
|
data/bin/bridge.jar
ADDED
Binary file
|
@@ -0,0 +1,238 @@
|
|
1
|
+
require 'stanford-core-nlp/config'
|
2
|
+
|
3
|
+
module StanfordCoreNLP
|
4
|
+
|
5
|
+
VERSION = '0.5.3'
|
6
|
+
|
7
|
+
require 'bind-it'
|
8
|
+
extend BindIt::Binding
|
9
|
+
|
10
|
+
# ############################ #
|
11
|
+
# BindIt Configuration Options #
|
12
|
+
# ############################ #
|
13
|
+
|
14
|
+
# The default path for the JAR files
|
15
|
+
# is the gem's bin folder.
|
16
|
+
self.jar_path = File.dirname(__FILE__).gsub(/\/lib\z/, '') + '/bin/'
|
17
|
+
|
18
|
+
# Default namespace is the Stanford pipeline namespace.
|
19
|
+
self.default_namespace = 'edu.stanford.nlp.pipeline'
|
20
|
+
|
21
|
+
# Load the JVM with a minimum heap size of 512MB,
|
22
|
+
# and a maximum heap size of 1024MB.
|
23
|
+
StanfordCoreNLP.jvm_args = ['-Xms512M', '-Xmx1024M']
|
24
|
+
|
25
|
+
# Turn logging off by default.
|
26
|
+
StanfordCoreNLP.log_file = nil
|
27
|
+
|
28
|
+
# Default JAR files to load.
|
29
|
+
StanfordCoreNLP.default_jars = [
|
30
|
+
'joda-time.jar',
|
31
|
+
'xom.jar',
|
32
|
+
'stanford-corenlp.jar',
|
33
|
+
'jollyday.jar',
|
34
|
+
'bridge.jar'
|
35
|
+
]
|
36
|
+
|
37
|
+
# Default classes to load.
|
38
|
+
StanfordCoreNLP.default_classes = [
|
39
|
+
['StanfordCoreNLP', 'edu.stanford.nlp.pipeline', 'CoreNLP'],
|
40
|
+
['Annotation', 'edu.stanford.nlp.pipeline'],
|
41
|
+
['Word', 'edu.stanford.nlp.ling'],
|
42
|
+
['CoreLabel', 'edu.stanford.nlp.ling'],
|
43
|
+
['MaxentTagger', 'edu.stanford.nlp.tagger.maxent'],
|
44
|
+
['CRFClassifier', 'edu.stanford.nlp.ie.crf'],
|
45
|
+
['LexicalizedParser', 'edu.stanford.nlp.parser.lexparser'],
|
46
|
+
['Options', 'edu.stanford.nlp.parser.lexparser'],
|
47
|
+
['Properties', 'java.util'],
|
48
|
+
['ArrayList', 'java.util'],
|
49
|
+
['AnnotationBridge', '']
|
50
|
+
]
|
51
|
+
|
52
|
+
# ########################### #
|
53
|
+
# Stanford Core NLP bindings #
|
54
|
+
# ########################### #
|
55
|
+
|
56
|
+
require 'stanford-core-nlp/bridge'
|
57
|
+
extend StanfordCoreNLP::Bridge
|
58
|
+
|
59
|
+
class << self
|
60
|
+
# The model file names for a given language.
|
61
|
+
attr_accessor :model_files
|
62
|
+
# The folder in which to look for models.
|
63
|
+
attr_accessor :model_path
|
64
|
+
# Store the language currently being used.
|
65
|
+
attr_accessor :language
|
66
|
+
#Custom properties
|
67
|
+
attr_accessor :custom_properties
|
68
|
+
end
|
69
|
+
|
70
|
+
self.custom_properties = {}
|
71
|
+
|
72
|
+
# The path to the main folder containing the folders
|
73
|
+
# with the individual models inside. By default, this
|
74
|
+
# is the same as the JAR path.
|
75
|
+
self.model_path = self.jar_path
|
76
|
+
|
77
|
+
# ########################### #
|
78
|
+
# Public configuration params #
|
79
|
+
# ########################### #
|
80
|
+
|
81
|
+
# Use models for a given language. Language can be
|
82
|
+
# supplied as full-length, or ISO-639 2 or 3 letter
|
83
|
+
# code (e.g. :english, :eng or :en will work).
|
84
|
+
def self.use(language)
|
85
|
+
lang = nil
|
86
|
+
self.model_files = {}
|
87
|
+
Config::LanguageCodes.each do |l,codes|
|
88
|
+
lang = codes[2] if codes.include?(language)
|
89
|
+
end
|
90
|
+
self.language = lang
|
91
|
+
Config::Models.each do |n, languages|
|
92
|
+
models = languages[lang]
|
93
|
+
folder = Config::ModelFolders[n]
|
94
|
+
if models.is_a?(Hash)
|
95
|
+
n = n.to_s
|
96
|
+
models.each do |m, file|
|
97
|
+
self.model_files["#{n}.#{m}"] = folder + file
|
98
|
+
end
|
99
|
+
elsif models.is_a?(String)
|
100
|
+
self.model_files["#{n}.model"] = folder + models
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Use english by default.
|
106
|
+
self.use :english
|
107
|
+
|
108
|
+
# Set a model file.
|
109
|
+
def self.set_model(name, file)
|
110
|
+
n = name.split('.')[0].intern
|
111
|
+
self.model_files[name] = Config::ModelFolders[n] + file
|
112
|
+
end
|
113
|
+
|
114
|
+
# ########################### #
|
115
|
+
# Public API methods #
|
116
|
+
# ########################### #
|
117
|
+
|
118
|
+
def self.bind
|
119
|
+
|
120
|
+
# Take care of Windows users.
|
121
|
+
if self.running_on_windows?
|
122
|
+
self.jar_path.gsub!('/', '\\')
|
123
|
+
self.model_path.gsub!('/', '\\')
|
124
|
+
end
|
125
|
+
|
126
|
+
# Make the bindings.
|
127
|
+
super
|
128
|
+
|
129
|
+
# Bind annotation bridge.
|
130
|
+
self.default_classes.each do |info|
|
131
|
+
klass = const_get(info.first)
|
132
|
+
self.inject_get_method(klass)
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
# Load a StanfordCoreNLP pipeline with the
|
138
|
+
# specified JVM flags and StanfordCoreNLP
|
139
|
+
# properties.
|
140
|
+
def self.load(*annotators)
|
141
|
+
|
142
|
+
self.bind unless self.bound
|
143
|
+
|
144
|
+
# Prepend the JAR path to the model files.
|
145
|
+
properties = {}
|
146
|
+
self.model_files.each do |k,v|
|
147
|
+
found = false
|
148
|
+
annotators.each do |annotator|
|
149
|
+
found = true if k.index(annotator.to_s)
|
150
|
+
break if found
|
151
|
+
end
|
152
|
+
next unless found
|
153
|
+
f = self.model_path + v
|
154
|
+
unless File.readable?(f)
|
155
|
+
raise "Model file #{f} could not be found. " +
|
156
|
+
"You may need to download this file manually " +
|
157
|
+
"and/or set paths properly."
|
158
|
+
end
|
159
|
+
properties[k] = f
|
160
|
+
end
|
161
|
+
|
162
|
+
properties['annotators'] = annotators.map { |x| x.to_s }.join(', ')
|
163
|
+
|
164
|
+
unless self.language == :english
|
165
|
+
# Bug fix for French/German parsers.
|
166
|
+
# Otherwise throws "IllegalArgumentException:
|
167
|
+
# Unknown option: -retainTmpSubcategories"
|
168
|
+
properties['parse.flags'] = ''
|
169
|
+
# Bug fix for French/German parsers.
|
170
|
+
# Otherswise throws java.lang.NullPointerException: null.
|
171
|
+
properties['parse.buildgraphs'] = 'false'
|
172
|
+
end
|
173
|
+
|
174
|
+
# Bug fix for NER system. Otherwise throws:
|
175
|
+
# Error initializing binder 1 at edu.stanford.
|
176
|
+
# nlp.time.Options.<init>(Options.java:88)
|
177
|
+
properties['sutime.binders'] = '0'
|
178
|
+
|
179
|
+
# Manually include SUTime models.
|
180
|
+
if annotators.include?(:ner)
|
181
|
+
properties['sutime.rules'] =
|
182
|
+
self.model_path + 'sutime/defs.sutime.txt, ' +
|
183
|
+
self.model_path + 'sutime/english.sutime.txt'
|
184
|
+
end
|
185
|
+
|
186
|
+
props = get_properties(properties)
|
187
|
+
|
188
|
+
# Hack for Java7 compatibility.
|
189
|
+
bridge = const_get(:AnnotationBridge)
|
190
|
+
bridge.getPipelineWithProperties(props)
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
# Hack in order not to break backwards compatibility.
|
195
|
+
def self.const_missing(const)
|
196
|
+
if const == :Text
|
197
|
+
puts "WARNING: StanfordCoreNLP::Text has been deprecated." +
|
198
|
+
"Please use StanfordCoreNLP::Annotation instead."
|
199
|
+
Annotation
|
200
|
+
else
|
201
|
+
super(const)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
private
|
206
|
+
|
207
|
+
# Create a java.util.Properties object from a hash.
|
208
|
+
def self.get_properties(properties)
|
209
|
+
properties = properties.merge(self.custom_properties)
|
210
|
+
props = Properties.new
|
211
|
+
properties.each do |property, value|
|
212
|
+
props.set_property(property.to_s, value.to_s)
|
213
|
+
end
|
214
|
+
props
|
215
|
+
end
|
216
|
+
|
217
|
+
# Get a Java ArrayList binding to pass lists
|
218
|
+
# of tokens to the Stanford Core NLP process.
|
219
|
+
def self.get_list(tokens)
|
220
|
+
list = StanfordCoreNLP::ArrayList.new
|
221
|
+
tokens.each do |t|
|
222
|
+
list.add(Word.new(t.to_s))
|
223
|
+
end
|
224
|
+
list
|
225
|
+
end
|
226
|
+
|
227
|
+
# Returns true if we're running on Windows.
|
228
|
+
def self.running_on_windows?
|
229
|
+
RUBY_PLATFORM.split("-")[1] == 'mswin32'
|
230
|
+
end
|
231
|
+
|
232
|
+
# camel_case which also support dot as separator
|
233
|
+
def self.camel_case(s)
|
234
|
+
s = s.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }
|
235
|
+
s.gsub(/(?:^|_|\.)(.)/) { $1.upcase }
|
236
|
+
end
|
237
|
+
|
238
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module StanfordCoreNLP::Bridge
|
2
|
+
|
3
|
+
def inject_get_method(klass)
|
4
|
+
|
5
|
+
klass.class_eval do
|
6
|
+
|
7
|
+
if RUBY_PLATFORM =~ /java/
|
8
|
+
return unless method_defined?(:get)
|
9
|
+
alias_method :get_without_casting, :get
|
10
|
+
end
|
11
|
+
|
12
|
+
# Dynamically defined on all proxied annotation classes.
|
13
|
+
# Get an annotation using the annotation bridge.
|
14
|
+
def get(annotation, anno_base = nil)
|
15
|
+
|
16
|
+
unless RUBY_PLATFORM =~ /java/
|
17
|
+
return unless java_methods.include?('get(Ljava.lang.Class;)')
|
18
|
+
end
|
19
|
+
|
20
|
+
anno_class = "#{StanfordCoreNLP.camel_case(annotation)}Annotation"
|
21
|
+
if anno_base
|
22
|
+
unless StanfordNLP::Config::Annotations[anno_base]
|
23
|
+
raise "The path #{anno_base} doesn't exist."
|
24
|
+
end
|
25
|
+
anno_bases = [anno_base]
|
26
|
+
else
|
27
|
+
anno_bases = StanfordCoreNLP::Config::AnnotationsByName[anno_class]
|
28
|
+
raise "The annotation #{anno_class} doesn't exist." unless anno_bases
|
29
|
+
end
|
30
|
+
if anno_bases.size > 1
|
31
|
+
msg = "There are many different annotations bearing the name #{anno_class}." +
|
32
|
+
"\nPlease specify one of the following base classes as second parameter to disambiguate: "
|
33
|
+
msg << anno_bases.join(',')
|
34
|
+
raise msg
|
35
|
+
else
|
36
|
+
base_class = anno_bases[0]
|
37
|
+
end
|
38
|
+
|
39
|
+
if RUBY_PLATFORM =~ /java/
|
40
|
+
fqcn = "edu.stanford.#{base_class}"
|
41
|
+
class_path = fqcn.split(".")
|
42
|
+
class_name = class_path.pop
|
43
|
+
path = StanfordCoreNLP.camel_case(class_path.join("."))
|
44
|
+
jruby_class = "Java::#{path}::#{class_name}::#{anno_class}"
|
45
|
+
get_without_casting(Object.module_eval(jruby_class))
|
46
|
+
else
|
47
|
+
url = "edu.stanford.#{base_class}$#{anno_class}"
|
48
|
+
StanfordCoreNLP::AnnotationBridge.getAnnotation(self, url)
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,392 @@
|
|
1
|
+
module StanfordCoreNLP
|
2
|
+
|
3
|
+
class Config
|
4
|
+
|
5
|
+
# A hash of language codes in humanized,
|
6
|
+
# 2 and 3-letter ISO639 codes.
|
7
|
+
LanguageCodes = {
|
8
|
+
:english => [:en, :eng, :english],
|
9
|
+
:german => [:de, :ger, :german],
|
10
|
+
:french => [:fr, :fre, :french]
|
11
|
+
}
|
12
|
+
|
13
|
+
# Folders inside the JAR path for the models.
|
14
|
+
ModelFolders = {
|
15
|
+
:pos => 'taggers/',
|
16
|
+
:parse => 'grammar/',
|
17
|
+
:ner => 'classifiers/',
|
18
|
+
:dcoref => 'dcoref/'
|
19
|
+
}
|
20
|
+
|
21
|
+
# Tag sets used by Stanford for each language.
|
22
|
+
TagSets = {
|
23
|
+
:english => :penn,
|
24
|
+
:german => :stutgart,
|
25
|
+
:french => :paris7
|
26
|
+
}
|
27
|
+
|
28
|
+
# Default models for all languages.
|
29
|
+
Models = {
|
30
|
+
|
31
|
+
:pos => {
|
32
|
+
:english => 'english-left3words-distsim.tagger',
|
33
|
+
:german => 'german-fast.tagger',
|
34
|
+
:french => 'french.tagger'
|
35
|
+
},
|
36
|
+
|
37
|
+
:parse => {
|
38
|
+
:english => 'englishPCFG.ser.gz',
|
39
|
+
:german => 'germanPCFG.ser.gz',
|
40
|
+
:french => 'frenchFactored.ser.gz'
|
41
|
+
},
|
42
|
+
|
43
|
+
:ner => {
|
44
|
+
:english => 'english.all.3class.distsim.crf.ser.gz'
|
45
|
+
# :german => {} # Add this at some point.
|
46
|
+
},
|
47
|
+
|
48
|
+
:dcoref => {
|
49
|
+
:english => {
|
50
|
+
'demonym' => 'demonyms.txt',
|
51
|
+
'animate' => 'animate.unigrams.txt',
|
52
|
+
'female' => 'female.unigrams.txt',
|
53
|
+
'inanimate' => 'inanimate.unigrams.txt',
|
54
|
+
'male' => 'male.unigrams.txt',
|
55
|
+
'neutral' => 'neutral.unigrams.txt',
|
56
|
+
'plural' => 'plural.unigrams.txt',
|
57
|
+
'singular' => 'singular.unigrams.txt',
|
58
|
+
'states' => 'state-abbreviations.txt',
|
59
|
+
'countries' => 'countries',
|
60
|
+
'states.provinces' => 'statesandprovinces',
|
61
|
+
'extra.gender' => 'namegender.combine.txt',
|
62
|
+
'singleton.predictor' => 'singleton.predictor.ser'
|
63
|
+
},
|
64
|
+
:german => {},
|
65
|
+
:french => {}
|
66
|
+
}
|
67
|
+
|
68
|
+
# Models to add.
|
69
|
+
|
70
|
+
#"truecase.model" - path towards the true-casing model; default: StanfordCoreNLPModels/truecase/noUN.ser.gz
|
71
|
+
#"truecase.bias" - class bias of the true case model; default: INIT_UPPER:-0.7,UPPER:-0.7,O:0
|
72
|
+
#"truecase.mixedcasefile" - path towards the mixed case file; default: StanfordCoreNLPModels/truecase/MixDisambiguation.list
|
73
|
+
#"nfl.gazetteer" - path towards the gazetteer for the NFL domain
|
74
|
+
#"nfl.relation.model" - path towards the NFL relation extraction model
|
75
|
+
}
|
76
|
+
|
77
|
+
# List of annotations by JAVA class path.
|
78
|
+
Annotations = {
|
79
|
+
|
80
|
+
'nlp.dcoref.CoNLL2011DocumentReader' => [
|
81
|
+
'CorefMentionAnnotation',
|
82
|
+
'NamedEntityAnnotation'
|
83
|
+
],
|
84
|
+
|
85
|
+
'nlp.ling.CoreAnnotations' => [
|
86
|
+
|
87
|
+
'AbbrAnnotation',
|
88
|
+
'AbgeneAnnotation',
|
89
|
+
'AbstrAnnotation',
|
90
|
+
'AfterAnnotation',
|
91
|
+
'AnswerAnnotation',
|
92
|
+
'AnswerObjectAnnotation',
|
93
|
+
'AntecedentAnnotation',
|
94
|
+
'ArgDescendentAnnotation',
|
95
|
+
'ArgumentAnnotation',
|
96
|
+
'BagOfWordsAnnotation',
|
97
|
+
'BeAnnotation',
|
98
|
+
'BeforeAnnotation',
|
99
|
+
'BeginIndexAnnotation',
|
100
|
+
'BestCliquesAnnotation',
|
101
|
+
'BestFullAnnotation',
|
102
|
+
'CalendarAnnotation',
|
103
|
+
'CategoryAnnotation',
|
104
|
+
'CategoryFunctionalTagAnnotation',
|
105
|
+
'CharacterOffsetBeginAnnotation',
|
106
|
+
'CharacterOffsetEndAnnotation',
|
107
|
+
'CharAnnotation',
|
108
|
+
'ChineseCharAnnotation',
|
109
|
+
'ChineseIsSegmentedAnnotation',
|
110
|
+
'ChineseOrigSegAnnotation',
|
111
|
+
'ChineseSegAnnotation',
|
112
|
+
'ChunkAnnotation',
|
113
|
+
'CoarseTagAnnotation',
|
114
|
+
'CommonWordsAnnotation',
|
115
|
+
'CoNLLDepAnnotation',
|
116
|
+
'CoNLLDepParentIndexAnnotation',
|
117
|
+
'CoNLLDepTypeAnnotation',
|
118
|
+
'CoNLLPredicateAnnotation',
|
119
|
+
'CoNLLSRLAnnotation',
|
120
|
+
'ContextsAnnotation',
|
121
|
+
'CopyAnnotation',
|
122
|
+
'CostMagnificationAnnotation',
|
123
|
+
'CovertIDAnnotation',
|
124
|
+
'D2_LBeginAnnotation',
|
125
|
+
'D2_LEndAnnotation',
|
126
|
+
'D2_LMiddleAnnotation',
|
127
|
+
'DayAnnotation',
|
128
|
+
'DependentsAnnotation',
|
129
|
+
'DictAnnotation',
|
130
|
+
'DistSimAnnotation',
|
131
|
+
'DoAnnotation',
|
132
|
+
'DocDateAnnotation',
|
133
|
+
'DocIDAnnotation',
|
134
|
+
'DomainAnnotation',
|
135
|
+
'EndIndexAnnotation',
|
136
|
+
'EntityClassAnnotation',
|
137
|
+
'EntityRuleAnnotation',
|
138
|
+
'EntityTypeAnnotation',
|
139
|
+
'FeaturesAnnotation',
|
140
|
+
'FemaleGazAnnotation',
|
141
|
+
'FirstChildAnnotation',
|
142
|
+
'ForcedSentenceEndAnnotation',
|
143
|
+
'FreqAnnotation',
|
144
|
+
'GazAnnotation',
|
145
|
+
'GazetteerAnnotation',
|
146
|
+
'GenericTokensAnnotation',
|
147
|
+
'GeniaAnnotation',
|
148
|
+
'GoldAnswerAnnotation',
|
149
|
+
'GovernorAnnotation',
|
150
|
+
'GrandparentAnnotation',
|
151
|
+
'HaveAnnotation',
|
152
|
+
'HeadWordStringAnnotation',
|
153
|
+
'HeightAnnotation',
|
154
|
+
'IDAnnotation',
|
155
|
+
'IDFAnnotation',
|
156
|
+
'INAnnotation',
|
157
|
+
'IndexAnnotation',
|
158
|
+
'InterpretationAnnotation',
|
159
|
+
'IsDateRangeAnnotation',
|
160
|
+
'IsURLAnnotation',
|
161
|
+
'LabelAnnotation',
|
162
|
+
'LastGazAnnotation',
|
163
|
+
'LastTaggedAnnotation',
|
164
|
+
'LBeginAnnotation',
|
165
|
+
'LeftChildrenNodeAnnotation',
|
166
|
+
'LeftTermAnnotation',
|
167
|
+
'LemmaAnnotation',
|
168
|
+
'LEndAnnotation',
|
169
|
+
'LengthAnnotation',
|
170
|
+
'LMiddleAnnotation',
|
171
|
+
'MaleGazAnnotation',
|
172
|
+
'MarkingAnnotation',
|
173
|
+
'MonthAnnotation',
|
174
|
+
'MorphoCaseAnnotation',
|
175
|
+
'MorphoGenAnnotation',
|
176
|
+
'MorphoNumAnnotation',
|
177
|
+
'MorphoPersAnnotation',
|
178
|
+
'NamedEntityTagAnnotation',
|
179
|
+
'NeighborsAnnotation',
|
180
|
+
'NERIDAnnotation',
|
181
|
+
'NormalizedNamedEntityTagAnnotation',
|
182
|
+
'NotAnnotation',
|
183
|
+
'NumericCompositeObjectAnnotation',
|
184
|
+
'NumericCompositeTypeAnnotation',
|
185
|
+
'NumericCompositeValueAnnotation',
|
186
|
+
'NumericObjectAnnotation',
|
187
|
+
'NumericTypeAnnotation',
|
188
|
+
'NumericValueAnnotation',
|
189
|
+
'NumerizedTokensAnnotation',
|
190
|
+
'NumTxtSentencesAnnotation',
|
191
|
+
'OriginalAnswerAnnotation',
|
192
|
+
'OriginalCharAnnotation',
|
193
|
+
'OriginalTextAnnotation',
|
194
|
+
'ParagraphAnnotation',
|
195
|
+
'ParagraphsAnnotation',
|
196
|
+
'ParaPositionAnnotation',
|
197
|
+
'ParentAnnotation',
|
198
|
+
'PartOfSpeechAnnotation',
|
199
|
+
'PercentAnnotation',
|
200
|
+
'PhraseWordsAnnotation',
|
201
|
+
'PhraseWordsTagAnnotation',
|
202
|
+
'PolarityAnnotation',
|
203
|
+
'PositionAnnotation',
|
204
|
+
'PossibleAnswersAnnotation',
|
205
|
+
'PredictedAnswerAnnotation',
|
206
|
+
'PrevChildAnnotation',
|
207
|
+
'PriorAnnotation',
|
208
|
+
'ProjectedCategoryAnnotation',
|
209
|
+
'ProtoAnnotation',
|
210
|
+
'RoleAnnotation',
|
211
|
+
'SectionAnnotation',
|
212
|
+
'SemanticHeadTagAnnotation',
|
213
|
+
'SemanticHeadWordAnnotation',
|
214
|
+
'SemanticTagAnnotation',
|
215
|
+
'SemanticWordAnnotation',
|
216
|
+
'SentenceIDAnnotation',
|
217
|
+
'SentenceIndexAnnotation',
|
218
|
+
'SentencePositionAnnotation',
|
219
|
+
'SentencesAnnotation',
|
220
|
+
'ShapeAnnotation',
|
221
|
+
'SpaceBeforeAnnotation',
|
222
|
+
'SpanAnnotation',
|
223
|
+
'SpeakerAnnotation',
|
224
|
+
'SRL_ID',
|
225
|
+
'SRLIDAnnotation',
|
226
|
+
'SRLInstancesAnnotation',
|
227
|
+
'StackedNamedEntityTagAnnotation',
|
228
|
+
'StateAnnotation',
|
229
|
+
'StemAnnotation',
|
230
|
+
'SubcategorizationAnnotation',
|
231
|
+
'TagLabelAnnotation',
|
232
|
+
'TextAnnotation',
|
233
|
+
'TokenBeginAnnotation',
|
234
|
+
'TokenEndAnnotation',
|
235
|
+
'TokensAnnotation',
|
236
|
+
'TopicAnnotation',
|
237
|
+
'TrueCaseAnnotation',
|
238
|
+
'TrueCaseTextAnnotation',
|
239
|
+
'TrueTagAnnotation',
|
240
|
+
'UBlockAnnotation',
|
241
|
+
'UnaryAnnotation',
|
242
|
+
'UnknownAnnotation',
|
243
|
+
'UtteranceAnnotation',
|
244
|
+
'UTypeAnnotation',
|
245
|
+
'ValueAnnotation',
|
246
|
+
'VerbSenseAnnotation',
|
247
|
+
'WebAnnotation',
|
248
|
+
'WordFormAnnotation',
|
249
|
+
'WordnetSynAnnotation',
|
250
|
+
'WordPositionAnnotation',
|
251
|
+
'WordSenseAnnotation',
|
252
|
+
'XmlContextAnnotation',
|
253
|
+
'XmlElementAnnotation',
|
254
|
+
'YearAnnotation'
|
255
|
+
],
|
256
|
+
|
257
|
+
'nlp.dcoref.CorefCoreAnnotations' => [
|
258
|
+
|
259
|
+
'CorefAnnotation',
|
260
|
+
'CorefChainAnnotation',
|
261
|
+
'CorefClusterAnnotation',
|
262
|
+
'CorefClusterIdAnnotation',
|
263
|
+
'CorefDestAnnotation',
|
264
|
+
'CorefGraphAnnotation'
|
265
|
+
],
|
266
|
+
|
267
|
+
'nlp.ling.CoreLabel' => [
|
268
|
+
'GenericAnnotation'
|
269
|
+
],
|
270
|
+
|
271
|
+
'nlp.trees.EnglishGrammaticalRelations' => [
|
272
|
+
'AbbreviationModifierGRAnnotation',
|
273
|
+
'AdjectivalComplementGRAnnotation',
|
274
|
+
'AdjectivalModifierGRAnnotation',
|
275
|
+
'AdvClauseModifierGRAnnotation',
|
276
|
+
'AdverbialModifierGRAnnotation',
|
277
|
+
'AgentGRAnnotation',
|
278
|
+
'AppositionalModifierGRAnnotation',
|
279
|
+
'ArgumentGRAnnotation',
|
280
|
+
'AttributiveGRAnnotation',
|
281
|
+
'AuxModifierGRAnnotation',
|
282
|
+
'AuxPassiveGRAnnotation',
|
283
|
+
'ClausalComplementGRAnnotation',
|
284
|
+
'ClausalPassiveSubjectGRAnnotation',
|
285
|
+
'ClausalSubjectGRAnnotation',
|
286
|
+
'ComplementGRAnnotation',
|
287
|
+
'ComplementizerGRAnnotation',
|
288
|
+
'ConjunctGRAnnotation',
|
289
|
+
'ControllingSubjectGRAnnotation',
|
290
|
+
'CoordinationGRAnnotation',
|
291
|
+
'CopulaGRAnnotation',
|
292
|
+
'DeterminerGRAnnotation',
|
293
|
+
'DirectObjectGRAnnotation',
|
294
|
+
'ExpletiveGRAnnotation',
|
295
|
+
'IndirectObjectGRAnnotation',
|
296
|
+
'InfinitivalModifierGRAnnotation',
|
297
|
+
'MarkerGRAnnotation',
|
298
|
+
'ModifierGRAnnotation',
|
299
|
+
'MultiWordExpressionGRAnnotation',
|
300
|
+
'NegationModifierGRAnnotation',
|
301
|
+
'NominalPassiveSubjectGRAnnotation',
|
302
|
+
'NominalSubjectGRAnnotation',
|
303
|
+
'NounCompoundModifierGRAnnotation',
|
304
|
+
'NpAdverbialModifierGRAnnotation',
|
305
|
+
'NumberModifierGRAnnotation',
|
306
|
+
'NumericModifierGRAnnotation',
|
307
|
+
'ObjectGRAnnotation',
|
308
|
+
'ParataxisGRAnnotation',
|
309
|
+
'ParticipialModifierGRAnnotation',
|
310
|
+
'PhrasalVerbParticleGRAnnotation',
|
311
|
+
'PossessionModifierGRAnnotation',
|
312
|
+
'PossessiveModifierGRAnnotation',
|
313
|
+
'PreconjunctGRAnnotation',
|
314
|
+
'PredeterminerGRAnnotation',
|
315
|
+
'PredicateGRAnnotation',
|
316
|
+
'PrepositionalComplementGRAnnotation',
|
317
|
+
'PrepositionalModifierGRAnnotation',
|
318
|
+
'PrepositionalObjectGRAnnotation',
|
319
|
+
'PunctuationGRAnnotation',
|
320
|
+
'PurposeClauseModifierGRAnnotation',
|
321
|
+
'QuantifierModifierGRAnnotation',
|
322
|
+
'ReferentGRAnnotation',
|
323
|
+
'RelativeClauseModifierGRAnnotation',
|
324
|
+
'RelativeGRAnnotation',
|
325
|
+
'SemanticDependentGRAnnotation',
|
326
|
+
'SubjectGRAnnotation',
|
327
|
+
'TemporalModifierGRAnnotation',
|
328
|
+
'XClausalComplementGRAnnotation'
|
329
|
+
],
|
330
|
+
|
331
|
+
'nlp.trees.GrammaticalRelation' => [
|
332
|
+
'DependentGRAnnotation',
|
333
|
+
'GovernorGRAnnotation',
|
334
|
+
'GrammaticalRelationAnnotation',
|
335
|
+
'KillGRAnnotation',
|
336
|
+
'Language',
|
337
|
+
'RootGRAnnotation'
|
338
|
+
],
|
339
|
+
|
340
|
+
'nlp.ie.machinereading.structure.MachineReadingAnnotations' => [
|
341
|
+
'DependencyAnnotation',
|
342
|
+
'DocumentDirectoryAnnotation',
|
343
|
+
'DocumentIdAnnotation',
|
344
|
+
'EntityMentionsAnnotation',
|
345
|
+
'EventMentionsAnnotation',
|
346
|
+
'GenderAnnotation',
|
347
|
+
'RelationMentionsAnnotation',
|
348
|
+
'TriggerAnnotation'
|
349
|
+
],
|
350
|
+
|
351
|
+
'nlp.parser.lexparser.ParserAnnotations' => [
|
352
|
+
'ConstraintAnnotation'
|
353
|
+
],
|
354
|
+
|
355
|
+
'nlp.semgraph.SemanticGraphCoreAnnotations' => [
|
356
|
+
'BasicDependenciesAnnotation',
|
357
|
+
'CollapsedCCProcessedDependenciesAnnotation',
|
358
|
+
'CollapsedDependenciesAnnotation'
|
359
|
+
],
|
360
|
+
|
361
|
+
'nlp.time.TimeAnnotations' => [
|
362
|
+
'TimexAnnotation',
|
363
|
+
'TimexAnnotations'
|
364
|
+
],
|
365
|
+
|
366
|
+
'nlp.time.TimeExpression' => [
|
367
|
+
'Annotation',
|
368
|
+
'ChildrenAnnotation',
|
369
|
+
'TimeIndexAnnotation'
|
370
|
+
],
|
371
|
+
|
372
|
+
'nlp.trees.TreeCoreAnnotations' => [
|
373
|
+
'TreeHeadTagAnnotation',
|
374
|
+
'TreeHeadWordAnnotation',
|
375
|
+
'TreeAnnotation'
|
376
|
+
]
|
377
|
+
}
|
378
|
+
|
379
|
+
# Create a list of annotation names => paths.
|
380
|
+
annotations_by_name = {}
|
381
|
+
Annotations.each do |base_class, annotation_classes|
|
382
|
+
annotation_classes.each do |annotation_class|
|
383
|
+
annotations_by_name[annotation_class] ||= []
|
384
|
+
annotations_by_name[annotation_class] << base_class
|
385
|
+
end
|
386
|
+
end
|
387
|
+
|
388
|
+
# Hash of name => path.
|
389
|
+
AnnotationsByName = annotations_by_name
|
390
|
+
|
391
|
+
end
|
392
|
+
end
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: stanford-core-nlp-abstractor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Louis Mullie
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-02-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bind-it
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.2.7
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.2.7
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: " High-level Ruby bindings to the Stanford CoreNLP package, a set natural
|
56
|
+
language processing\ntools that provides tokenization, part-of-speech tagging and
|
57
|
+
parsing for several languages, as well as named entity\nrecognition and coreference
|
58
|
+
resolution for English. "
|
59
|
+
email:
|
60
|
+
- louis.mullie@gmail.com
|
61
|
+
executables: []
|
62
|
+
extensions: []
|
63
|
+
extra_rdoc_files: []
|
64
|
+
files:
|
65
|
+
- LICENSE
|
66
|
+
- README.md
|
67
|
+
- bin/AnnotationBridge.java
|
68
|
+
- bin/bridge.jar
|
69
|
+
- lib/stanford-core-nlp.rb
|
70
|
+
- lib/stanford-core-nlp/bridge.rb
|
71
|
+
- lib/stanford-core-nlp/config.rb
|
72
|
+
homepage: https://github.com/louismullie/stanford-core-nlp
|
73
|
+
licenses: []
|
74
|
+
metadata: {}
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options: []
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
requirements: []
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 2.4.3
|
92
|
+
signing_key:
|
93
|
+
specification_version: 4
|
94
|
+
summary: Ruby bindings to the Stanford Core NLP tools.
|
95
|
+
test_files: []
|
96
|
+
has_rdoc:
|