stanford-core-nlp 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
data/lib/stanford-core-nlp.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
module StanfordCoreNLP
|
2
2
|
|
3
|
-
VERSION = '0.1.
|
4
|
-
require 'stanford-core-nlp/
|
5
|
-
require 'stanford-core-nlp/
|
6
|
-
|
3
|
+
VERSION = '0.1.2'
|
4
|
+
require 'stanford-core-nlp/jar_loader.rb'
|
5
|
+
require 'stanford-core-nlp/java_wrapper'
|
6
|
+
require 'stanford-core-nlp/stanford_annotations'
|
7
|
+
|
7
8
|
class << self
|
8
9
|
# The path in which to look for the Stanford JAR files.
|
9
10
|
# This is passed to JarLoader.
|
@@ -51,11 +52,18 @@ module StanfordCoreNLP
|
|
51
52
|
self.model_files[name] = file
|
52
53
|
end
|
53
54
|
|
55
|
+
@@initialized = false
|
56
|
+
# Load the JARs, create the classes.
|
57
|
+
def self.init
|
58
|
+
self.load_jars(self.jvm_args, self.jar_path, self.log_file)
|
59
|
+
self.create_classes
|
60
|
+
@@initialized = true
|
61
|
+
end
|
62
|
+
|
54
63
|
# Load a StanfordCoreNLP pipeline with the specified JVM flags and
|
55
64
|
# StanfordCoreNLP properties (hash of property => values).
|
56
65
|
def self.load(*annotators)
|
57
|
-
self.
|
58
|
-
self.create_classes
|
66
|
+
self.init unless @@initialized
|
59
67
|
# Prepend the JAR path to the model files.
|
60
68
|
properties = {}
|
61
69
|
self.model_files.each { |k,v| properties[k] = self.jar_path + v }
|
@@ -84,7 +92,16 @@ module StanfordCoreNLP
|
|
84
92
|
const_set(:Properties, Rjb::import('java.util.Properties'))
|
85
93
|
const_set(:AnnotationBridge, Rjb::import('AnnotationBridge'))
|
86
94
|
end
|
87
|
-
|
95
|
+
|
96
|
+
# Load a class (e.g. PTBTokenizerAnnotator) in a specific
|
97
|
+
# class path (default is 'edu.stanford.nlp.pipeline').
|
98
|
+
# The class is then accessible under the StanfordCoreNLP
|
99
|
+
# namespace, e.g. StanfordCoreNLP::PTBTokenizerAnnotator.
|
100
|
+
def self.load_class(klass, base = 'edu.stanford.nlp.pipeline')
|
101
|
+
self.init unless @@initialized
|
102
|
+
const_set(klass.intern, Rjb::import("#{base}.#{klass}"))
|
103
|
+
end
|
104
|
+
|
88
105
|
# Create a java.util.Properties object from a hash.
|
89
106
|
def self.get_properties(properties)
|
90
107
|
props = Properties.new
|
@@ -94,4 +111,9 @@ module StanfordCoreNLP
|
|
94
111
|
props
|
95
112
|
end
|
96
113
|
|
114
|
+
# Helper function: under_case -> CamelCase.
|
115
|
+
def self.camel_case(text)
|
116
|
+
text.to_s.gsub(/^[a-z]|_[a-z]/) { |a| a.upcase }.gsub('_', '')
|
117
|
+
end
|
118
|
+
|
97
119
|
end
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module StanfordCoreNLP
|
2
|
+
|
3
|
+
# Modify the Rjb JavaProxy class to add our own methods to every Java object.
|
4
|
+
Rjb::Rjb_JavaProxy.class_eval do
|
5
|
+
|
6
|
+
# Dynamically defined on all proxied Java objects.
|
7
|
+
# Shorthand for to_string defined by Java classes.
|
8
|
+
def to_s; to_string; end
|
9
|
+
|
10
|
+
# Dynamically defined on all proxied Java iterators.
|
11
|
+
# Provide Ruby-style iterators to wrap Java iterators.
|
12
|
+
def each
|
13
|
+
if !java_methods.include?('iterator()')
|
14
|
+
raise 'This object cannot be iterated.'
|
15
|
+
else
|
16
|
+
i = self.iterator
|
17
|
+
while i.has_next; yield i.next; end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,400 @@
|
|
1
|
+
module StanfordCoreNLP
|
2
|
+
|
3
|
+
Annotations = {
|
4
|
+
|
5
|
+
'nlp.trees.international.pennchinese.ChineseGrammaticalRelations' => [
|
6
|
+
'AdjectivalModifierGRAnnotation',
|
7
|
+
'AdverbialModifierGRAnnotation',
|
8
|
+
'ArgumentGRAnnotation',
|
9
|
+
'AspectMarkerGRAnnotation',
|
10
|
+
'AssociativeMarkerGRAnnotation',
|
11
|
+
'AssociativeModifierGRAnnotation',
|
12
|
+
'AttributiveGRAnnotation',
|
13
|
+
'AuxModifierGRAnnotation',
|
14
|
+
'AuxPassiveGRAnnotation',
|
15
|
+
'BaGRAnnotation',
|
16
|
+
'ClausalComplementGRAnnotation',
|
17
|
+
'ClausalSubjectGRAnnotation',
|
18
|
+
'ClauseModifierGRAnnotation',
|
19
|
+
'ComplementGRAnnotation',
|
20
|
+
'ComplementizerGRAnnotation',
|
21
|
+
'ControllingSubjectGRAnnotation',
|
22
|
+
'CoordinationGRAnnotation',
|
23
|
+
'DeterminerGRAnnotation',
|
24
|
+
'DirectObjectGRAnnotation',
|
25
|
+
'DvpMarkerGRAnnotation',
|
26
|
+
'DvpModifierGRAnnotation',
|
27
|
+
'EtcGRAnnotation',
|
28
|
+
'LocalizerComplementGRAnnotation',
|
29
|
+
'ModalGRAnnotation',
|
30
|
+
'ModifierGRAnnotation',
|
31
|
+
'NegationModifierGRAnnotation',
|
32
|
+
'NominalPassiveSubjectGRAnnotation',
|
33
|
+
'NominalSubjectGRAnnotation',
|
34
|
+
'NounCompoundModifierGRAnnotation',
|
35
|
+
'NumberModifierGRAnnotation',
|
36
|
+
'NumericModifierGRAnnotation',
|
37
|
+
'ObjectGRAnnotation',
|
38
|
+
'OrdNumberGRAnnotation',
|
39
|
+
'ParentheticalGRAnnotation',
|
40
|
+
'ParticipialModifierGRAnnotation',
|
41
|
+
'PreconjunctGRAnnotation',
|
42
|
+
'PrepositionalLocalizerModifierGRAnnotation',
|
43
|
+
'PrepositionalModifierGRAnnotation',
|
44
|
+
'PrepositionalObjectGRAnnotation',
|
45
|
+
'PunctuationGRAnnotation',
|
46
|
+
'RangeGRAnnotation',
|
47
|
+
'RelativeClauseModifierGRAnnotation',
|
48
|
+
'ResultativeComplementGRAnnotation',
|
49
|
+
'SemanticDependentGRAnnotation',
|
50
|
+
'SubjectGRAnnotation',
|
51
|
+
'TemporalClauseGRAnnotation',
|
52
|
+
'TemporalGRAnnotation',
|
53
|
+
'TimePostpositionGRAnnotation',
|
54
|
+
'TopicGRAnnotation',
|
55
|
+
'VerbCompoundGRAnnotation',
|
56
|
+
'VerbModifierGRAnnotation',
|
57
|
+
'XClausalComplementGRAnnotation'
|
58
|
+
],
|
59
|
+
|
60
|
+
'nlp.dcoref.CoNLL2011DocumentReader' => [
|
61
|
+
'CorefMentionAnnotation',
|
62
|
+
'NamedEntityAnnotation'
|
63
|
+
],
|
64
|
+
|
65
|
+
'nlp.ling.CoreAnnotations' => [
|
66
|
+
|
67
|
+
'AbbrAnnotation',
|
68
|
+
'AbgeneAnnotation',
|
69
|
+
'AbstrAnnotation',
|
70
|
+
'AfterAnnotation',
|
71
|
+
'AnswerAnnotation',
|
72
|
+
'AnswerObjectAnnotation',
|
73
|
+
'AntecedentAnnotation',
|
74
|
+
'ArgDescendentAnnotation',
|
75
|
+
'ArgumentAnnotation',
|
76
|
+
'BagOfWordsAnnotation',
|
77
|
+
'BeAnnotation',
|
78
|
+
'BeforeAnnotation',
|
79
|
+
'BeginIndexAnnotation',
|
80
|
+
'BestCliquesAnnotation',
|
81
|
+
'BestFullAnnotation',
|
82
|
+
'CalendarAnnotation',
|
83
|
+
'CategoryAnnotation',
|
84
|
+
'CategoryFunctionalTagAnnotation',
|
85
|
+
'CharacterOffsetBeginAnnotation',
|
86
|
+
'CharacterOffsetEndAnnotation',
|
87
|
+
'CharAnnotation',
|
88
|
+
'ChineseCharAnnotation',
|
89
|
+
'ChineseIsSegmentedAnnotation',
|
90
|
+
'ChineseOrigSegAnnotation',
|
91
|
+
'ChineseSegAnnotation',
|
92
|
+
'ChunkAnnotation',
|
93
|
+
'CoarseTagAnnotation',
|
94
|
+
'CommonWordsAnnotation',
|
95
|
+
'CoNLLDepAnnotation',
|
96
|
+
'CoNLLDepParentIndexAnnotation',
|
97
|
+
'CoNLLDepTypeAnnotation',
|
98
|
+
'CoNLLPredicateAnnotation',
|
99
|
+
'CoNLLSRLAnnotation',
|
100
|
+
'ContextsAnnotation',
|
101
|
+
'CopyAnnotation',
|
102
|
+
'CostMagnificationAnnotation',
|
103
|
+
'CovertIDAnnotation',
|
104
|
+
'D2_LBeginAnnotation',
|
105
|
+
'D2_LEndAnnotation',
|
106
|
+
'D2_LMiddleAnnotation',
|
107
|
+
'DayAnnotation',
|
108
|
+
'DependentsAnnotation',
|
109
|
+
'DictAnnotation',
|
110
|
+
'DistSimAnnotation',
|
111
|
+
'DoAnnotation',
|
112
|
+
'DocDateAnnotation',
|
113
|
+
'DocIDAnnotation',
|
114
|
+
'DomainAnnotation',
|
115
|
+
'EndIndexAnnotation',
|
116
|
+
'EntityClassAnnotation',
|
117
|
+
'EntityRuleAnnotation',
|
118
|
+
'EntityTypeAnnotation',
|
119
|
+
'FeaturesAnnotation',
|
120
|
+
'FemaleGazAnnotation',
|
121
|
+
'FirstChildAnnotation',
|
122
|
+
'ForcedSentenceEndAnnotation',
|
123
|
+
'FreqAnnotation',
|
124
|
+
'GazAnnotation',
|
125
|
+
'GazetteerAnnotation',
|
126
|
+
'GenericTokensAnnotation',
|
127
|
+
'GeniaAnnotation',
|
128
|
+
'GoldAnswerAnnotation',
|
129
|
+
'GovernorAnnotation',
|
130
|
+
'GrandparentAnnotation',
|
131
|
+
'HaveAnnotation',
|
132
|
+
'HeadWordStringAnnotation',
|
133
|
+
'HeightAnnotation',
|
134
|
+
'IDAnnotation',
|
135
|
+
'IDFAnnotation',
|
136
|
+
'INAnnotation',
|
137
|
+
'IndexAnnotation',
|
138
|
+
'InterpretationAnnotation',
|
139
|
+
'IsDateRangeAnnotation',
|
140
|
+
'IsURLAnnotation',
|
141
|
+
'LabelAnnotation',
|
142
|
+
'LastGazAnnotation',
|
143
|
+
'LastTaggedAnnotation',
|
144
|
+
'LBeginAnnotation',
|
145
|
+
'LeftChildrenNodeAnnotation',
|
146
|
+
'LeftTermAnnotation',
|
147
|
+
'LemmaAnnotation',
|
148
|
+
'LEndAnnotation',
|
149
|
+
'LengthAnnotation',
|
150
|
+
'LMiddleAnnotation',
|
151
|
+
'MaleGazAnnotation',
|
152
|
+
'MarkingAnnotation',
|
153
|
+
'MonthAnnotation',
|
154
|
+
'MorphoCaseAnnotation',
|
155
|
+
'MorphoGenAnnotation',
|
156
|
+
'MorphoNumAnnotation',
|
157
|
+
'MorphoPersAnnotation',
|
158
|
+
'NamedEntityTagAnnotation',
|
159
|
+
'NeighborsAnnotation',
|
160
|
+
'NERIDAnnotation',
|
161
|
+
'NormalizedNamedEntityTagAnnotation',
|
162
|
+
'NotAnnotation',
|
163
|
+
'NumericCompositeObjectAnnotation',
|
164
|
+
'NumericCompositeTypeAnnotation',
|
165
|
+
'NumericCompositeValueAnnotation',
|
166
|
+
'NumericObjectAnnotation',
|
167
|
+
'NumericTypeAnnotation',
|
168
|
+
'NumericValueAnnotation',
|
169
|
+
'NumerizedTokensAnnotation',
|
170
|
+
'NumTxtSentencesAnnotation',
|
171
|
+
'OriginalAnswerAnnotation',
|
172
|
+
'OriginalCharAnnotation',
|
173
|
+
'OriginalTextAnnotation',
|
174
|
+
'ParagraphAnnotation',
|
175
|
+
'ParagraphsAnnotation',
|
176
|
+
'ParaPositionAnnotation',
|
177
|
+
'ParentAnnotation',
|
178
|
+
'PartOfSpeechAnnotation',
|
179
|
+
'PercentAnnotation',
|
180
|
+
'PhraseWordsAnnotation',
|
181
|
+
'PhraseWordsTagAnnotation',
|
182
|
+
'PolarityAnnotation',
|
183
|
+
'PositionAnnotation',
|
184
|
+
'PossibleAnswersAnnotation',
|
185
|
+
'PredictedAnswerAnnotation',
|
186
|
+
'PrevChildAnnotation',
|
187
|
+
'PriorAnnotation',
|
188
|
+
'ProjectedCategoryAnnotation',
|
189
|
+
'ProtoAnnotation',
|
190
|
+
'RoleAnnotation',
|
191
|
+
'SectionAnnotation',
|
192
|
+
'SemanticHeadTagAnnotation',
|
193
|
+
'SemanticHeadWordAnnotation',
|
194
|
+
'SemanticTagAnnotation',
|
195
|
+
'SemanticWordAnnotation',
|
196
|
+
'SentenceIDAnnotation',
|
197
|
+
'SentenceIndexAnnotation',
|
198
|
+
'SentencePositionAnnotation',
|
199
|
+
'SentencesAnnotation',
|
200
|
+
'ShapeAnnotation',
|
201
|
+
'SpaceBeforeAnnotation',
|
202
|
+
'SpanAnnotation',
|
203
|
+
'SpeakerAnnotation',
|
204
|
+
'SRL_ID',
|
205
|
+
'SRLIDAnnotation',
|
206
|
+
'SRLInstancesAnnotation',
|
207
|
+
'StackedNamedEntityTagAnnotation',
|
208
|
+
'StateAnnotation',
|
209
|
+
'StemAnnotation',
|
210
|
+
'SubcategorizationAnnotation',
|
211
|
+
'TagLabelAnnotation',
|
212
|
+
'TextAnnotation',
|
213
|
+
'TokenBeginAnnotation',
|
214
|
+
'TokenEndAnnotation',
|
215
|
+
'TokensAnnotation',
|
216
|
+
'TopicAnnotation',
|
217
|
+
'TrueCaseAnnotation',
|
218
|
+
'TrueCaseTextAnnotation',
|
219
|
+
'TrueTagAnnotation',
|
220
|
+
'UBlockAnnotation',
|
221
|
+
'UnaryAnnotation',
|
222
|
+
'UnknownAnnotation',
|
223
|
+
'UtteranceAnnotation',
|
224
|
+
'UTypeAnnotation',
|
225
|
+
'ValueAnnotation',
|
226
|
+
'VerbSenseAnnotation',
|
227
|
+
'WebAnnotation',
|
228
|
+
'WordFormAnnotation',
|
229
|
+
'WordnetSynAnnotation',
|
230
|
+
'WordPositionAnnotation',
|
231
|
+
'WordSenseAnnotation',
|
232
|
+
'XmlContextAnnotation',
|
233
|
+
'XmlElementAnnotation',
|
234
|
+
'YearAnnotation'
|
235
|
+
],
|
236
|
+
|
237
|
+
'nlp.dcoref.CorefCoreAnnotations' => [
|
238
|
+
|
239
|
+
'CorefAnnotation',
|
240
|
+
'CorefChainAnnotation',
|
241
|
+
'CorefClusterAnnotation',
|
242
|
+
'CorefClusterIdAnnotation',
|
243
|
+
'CorefDestAnnotation',
|
244
|
+
'CorefGraphAnnotation'
|
245
|
+
],
|
246
|
+
|
247
|
+
'nlp.ling.CoreLabel' => [
|
248
|
+
'GenericAnnotation'
|
249
|
+
],
|
250
|
+
|
251
|
+
'nlp.trees.EnglishGrammaticalRelations' => [
|
252
|
+
'AbbreviationModifierGRAnnotation',
|
253
|
+
'AdjectivalComplementGRAnnotation',
|
254
|
+
'AdjectivalModifierGRAnnotation',
|
255
|
+
'AdvClauseModifierGRAnnotation',
|
256
|
+
'AdverbialModifierGRAnnotation',
|
257
|
+
'AgentGRAnnotation',
|
258
|
+
'AppositionalModifierGRAnnotation',
|
259
|
+
'ArgumentGRAnnotation',
|
260
|
+
'AttributiveGRAnnotation',
|
261
|
+
'AuxModifierGRAnnotation',
|
262
|
+
'AuxPassiveGRAnnotation',
|
263
|
+
'ClausalComplementGRAnnotation',
|
264
|
+
'ClausalPassiveSubjectGRAnnotation',
|
265
|
+
'ClausalSubjectGRAnnotation',
|
266
|
+
'ComplementGRAnnotation',
|
267
|
+
'ComplementizerGRAnnotation',
|
268
|
+
'ConjunctGRAnnotation',
|
269
|
+
'ControllingSubjectGRAnnotation',
|
270
|
+
'CoordinationGRAnnotation',
|
271
|
+
'CopulaGRAnnotation',
|
272
|
+
'DeterminerGRAnnotation',
|
273
|
+
'DirectObjectGRAnnotation',
|
274
|
+
'ExpletiveGRAnnotation',
|
275
|
+
'IndirectObjectGRAnnotation',
|
276
|
+
'InfinitivalModifierGRAnnotation',
|
277
|
+
'MarkerGRAnnotation',
|
278
|
+
'ModifierGRAnnotation',
|
279
|
+
'MultiWordExpressionGRAnnotation',
|
280
|
+
'NegationModifierGRAnnotation',
|
281
|
+
'NominalPassiveSubjectGRAnnotation',
|
282
|
+
'NominalSubjectGRAnnotation',
|
283
|
+
'NounCompoundModifierGRAnnotation',
|
284
|
+
'NpAdverbialModifierGRAnnotation',
|
285
|
+
'NumberModifierGRAnnotation',
|
286
|
+
'NumericModifierGRAnnotation',
|
287
|
+
'ObjectGRAnnotation',
|
288
|
+
'ParataxisGRAnnotation',
|
289
|
+
'ParticipialModifierGRAnnotation',
|
290
|
+
'PhrasalVerbParticleGRAnnotation',
|
291
|
+
'PossessionModifierGRAnnotation',
|
292
|
+
'PossessiveModifierGRAnnotation',
|
293
|
+
'PreconjunctGRAnnotation',
|
294
|
+
'PredeterminerGRAnnotation',
|
295
|
+
'PredicateGRAnnotation',
|
296
|
+
'PrepositionalComplementGRAnnotation',
|
297
|
+
'PrepositionalModifierGRAnnotation',
|
298
|
+
'PrepositionalObjectGRAnnotation',
|
299
|
+
'PunctuationGRAnnotation',
|
300
|
+
'PurposeClauseModifierGRAnnotation',
|
301
|
+
'QuantifierModifierGRAnnotation',
|
302
|
+
'ReferentGRAnnotation',
|
303
|
+
'RelativeClauseModifierGRAnnotation',
|
304
|
+
'RelativeGRAnnotation',
|
305
|
+
'SemanticDependentGRAnnotation',
|
306
|
+
'SubjectGRAnnotation',
|
307
|
+
'TemporalModifierGRAnnotation',
|
308
|
+
'XClausalComplementGRAnnotation'
|
309
|
+
],
|
310
|
+
|
311
|
+
'nlp.trees.GrammaticalRelation' => [
|
312
|
+
'DependentGRAnnotation',
|
313
|
+
'GovernorGRAnnotation',
|
314
|
+
'GrammaticalRelationAnnotation',
|
315
|
+
'KillGRAnnotation',
|
316
|
+
'Language',
|
317
|
+
'RootGRAnnotation'
|
318
|
+
],
|
319
|
+
|
320
|
+
'nlp.ie.machinereading.structure.MachineReadingAnnotations' => [
|
321
|
+
'DependencyAnnotation',
|
322
|
+
'DocumentDirectoryAnnotation',
|
323
|
+
'DocumentIdAnnotation',
|
324
|
+
'EntityMentionsAnnotation',
|
325
|
+
'EventMentionsAnnotation',
|
326
|
+
'GenderAnnotation',
|
327
|
+
'RelationMentionsAnnotation',
|
328
|
+
'TriggerAnnotation'
|
329
|
+
],
|
330
|
+
|
331
|
+
'nlp.parser.lexparser.ParserAnnotations' => [
|
332
|
+
'ConstraintAnnotation'
|
333
|
+
],
|
334
|
+
|
335
|
+
'nlp.trees.semgraph.SemanticGraphCoreAnnotations' => [
|
336
|
+
'SemanticGraphBasicDependenciesAnnotation',
|
337
|
+
'SemanticGraphCollapsedCCProcessedDependenciesAnnotation',
|
338
|
+
'SemanticGraphCollapsedDependenciesAnnotation'
|
339
|
+
],
|
340
|
+
|
341
|
+
'nlp.time.TimeAnnotations' => [
|
342
|
+
'TimexAnnotation',
|
343
|
+
'TimexAnnotations'
|
344
|
+
],
|
345
|
+
|
346
|
+
'nlp.time.TimeExpression' => [
|
347
|
+
'Annotation',
|
348
|
+
'ChildrenAnnotation'
|
349
|
+
],
|
350
|
+
|
351
|
+
'nlp.trees.TreeCoreAnnotations' => [
|
352
|
+
'TreeHeadTagAnnotation',
|
353
|
+
'TreeHeadWordAnnotation',
|
354
|
+
'TreeAnnotation'
|
355
|
+
]
|
356
|
+
}
|
357
|
+
|
358
|
+
annotations_by_name = {}
|
359
|
+
Annotations.each do |base_class, annotation_classes|
|
360
|
+
annotation_classes.each do |annotation_class|
|
361
|
+
annotations_by_name[annotation_class] ||= []
|
362
|
+
annotations_by_name[annotation_class] << base_class
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
AnnotationsByName = annotations_by_name
|
367
|
+
|
368
|
+
# Modify the Rjb JavaProxy class to add our own method to get annotations.
|
369
|
+
Rjb::Rjb_JavaProxy.class_eval do
|
370
|
+
|
371
|
+
# Dynamically defined on all proxied annotation classes.
|
372
|
+
# Get an annotation using the annotation bridge.
|
373
|
+
def get(annotation, anno_base = nil)
|
374
|
+
if !java_methods.include?('get(Ljava.lang.Class;)')
|
375
|
+
raise'No annotation can be retrieved on this object.'
|
376
|
+
else
|
377
|
+
anno_class = "#{StanfordCoreNLP.camel_case(annotation)}Annotation"
|
378
|
+
if anno_base
|
379
|
+
raise "The path #{anno_base} doesn't exist." unless Annotations[anno_base]
|
380
|
+
anno_bases = [anno_base]
|
381
|
+
else
|
382
|
+
anno_bases = AnnotationsByName[anno_class]
|
383
|
+
raise "The annotation #{anno_class} doesn't exist." unless anno_bases
|
384
|
+
end
|
385
|
+
if anno_bases.size > 1
|
386
|
+
msg = "There are many different annotations bearing the name #{anno_class}. "
|
387
|
+
msg << "Please specify one of the following base classes as second parameter to disambiguate: "
|
388
|
+
msg << anno_bases.join(',')
|
389
|
+
raise msg
|
390
|
+
else
|
391
|
+
base_class = anno_bases[0]
|
392
|
+
end
|
393
|
+
url = "edu.stanford.#{base_class}$#{anno_class}"
|
394
|
+
AnnotationBridge.getAnnotation(self, url)
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
end
|
399
|
+
|
400
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-core-nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-01-28 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rjb
|
16
|
-
requirement: &
|
16
|
+
requirement: &70145364951100 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,18 +21,19 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70145364951100
|
25
25
|
description: ! " High-level Ruby bindings to the Stanford CoreNLP package, a set natural
|
26
26
|
language processing \ntools for English, including tokenization, part-of-speech
|
27
|
-
tagging, lemmatization, named entity recognition
|
27
|
+
tagging, lemmatization, named entity recognition,\nparsing, and coreference resolution. "
|
28
28
|
email:
|
29
29
|
- louis.mullie@gmail.com
|
30
30
|
executables: []
|
31
31
|
extensions: []
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
|
-
- lib/stanford-core-nlp/
|
35
|
-
- lib/stanford-core-nlp/
|
34
|
+
- lib/stanford-core-nlp/jar_loader.rb
|
35
|
+
- lib/stanford-core-nlp/java_wrapper.rb
|
36
|
+
- lib/stanford-core-nlp/stanford_annotations.rb
|
36
37
|
- lib/stanford-core-nlp.rb
|
37
38
|
- bin/bridge.jar
|
38
39
|
- bin/classifiers/all.3class.distsim.crf.ser.gz
|
@@ -1,37 +0,0 @@
|
|
1
|
-
module StanfordCoreNLP
|
2
|
-
|
3
|
-
# Modify the Rjb JavaProxy class to add our own methods to every Java object.
|
4
|
-
Rjb::Rjb_JavaProxy.class_eval do
|
5
|
-
|
6
|
-
# Dynamically defined on all proxied Java objects.
|
7
|
-
# Shorthand for to_string defined by Java classes.
|
8
|
-
def to_s; to_string; end
|
9
|
-
|
10
|
-
# Dynamically defined on all proxied Java iterators.
|
11
|
-
# Provide Ruby-style iterators to wrap Java iterators.
|
12
|
-
def each
|
13
|
-
if !java_methods.include?('iterator()')
|
14
|
-
raise 'This object cannot be iterated.'
|
15
|
-
else
|
16
|
-
i = self.iterator
|
17
|
-
while i.has_next; yield i.next; end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
# Dynamically defined on all proxied annotation classes.
|
22
|
-
# Get an annotation using the annotation bridge.
|
23
|
-
def get(annotation)
|
24
|
-
if !java_methods.include?('get(Ljava.lang.Class;)')
|
25
|
-
raise 'No annotation can be retrieved on this object.'
|
26
|
-
else
|
27
|
-
base_class = (annotation.to_s.split('_')[0] == 'coref') ?
|
28
|
-
'edu.stanford.nlp.dcoref.CorefCoreAnnotations$' :
|
29
|
-
'edu.stanford.nlp.ling.CoreAnnotations$'
|
30
|
-
anno_class = annotation.to_s.gsub(/^[a-z]|_[a-z]/) { |a| a.upcase }.gsub('_', '')
|
31
|
-
url = "#{base_class}#{anno_class}Annotation"
|
32
|
-
AnnotationBridge.getAnnotation(self, url)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
end
|