stanford-core-nlp 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/stanford-core-nlp.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
module StanfordCoreNLP
|
2
2
|
|
3
|
-
VERSION = '0.1.
|
4
|
-
require 'stanford-core-nlp/
|
5
|
-
require 'stanford-core-nlp/
|
6
|
-
|
3
|
+
VERSION = '0.1.2'
|
4
|
+
require 'stanford-core-nlp/jar_loader.rb'
|
5
|
+
require 'stanford-core-nlp/java_wrapper'
|
6
|
+
require 'stanford-core-nlp/stanford_annotations'
|
7
|
+
|
7
8
|
class << self
|
8
9
|
# The path in which to look for the Stanford JAR files.
|
9
10
|
# This is passed to JarLoader.
|
@@ -51,11 +52,18 @@ module StanfordCoreNLP
|
|
51
52
|
self.model_files[name] = file
|
52
53
|
end
|
53
54
|
|
55
|
+
@@initialized = false
|
56
|
+
# Load the JARs, create the classes.
|
57
|
+
def self.init
|
58
|
+
self.load_jars(self.jvm_args, self.jar_path, self.log_file)
|
59
|
+
self.create_classes
|
60
|
+
@@initialized = true
|
61
|
+
end
|
62
|
+
|
54
63
|
# Load a StanfordCoreNLP pipeline with the specified JVM flags and
|
55
64
|
# StanfordCoreNLP properties (hash of property => values).
|
56
65
|
def self.load(*annotators)
|
57
|
-
self.
|
58
|
-
self.create_classes
|
66
|
+
self.init unless @@initialized
|
59
67
|
# Prepend the JAR path to the model files.
|
60
68
|
properties = {}
|
61
69
|
self.model_files.each { |k,v| properties[k] = self.jar_path + v }
|
@@ -84,7 +92,16 @@ module StanfordCoreNLP
|
|
84
92
|
const_set(:Properties, Rjb::import('java.util.Properties'))
|
85
93
|
const_set(:AnnotationBridge, Rjb::import('AnnotationBridge'))
|
86
94
|
end
|
87
|
-
|
95
|
+
|
96
|
+
# Load a class (e.g. PTBTokenizerAnnotator) in a specific
|
97
|
+
# class path (default is 'edu.stanford.nlp.pipeline').
|
98
|
+
# The class is then accessible under the StanfordCoreNLP
|
99
|
+
# namespace, e.g. StanfordCoreNLP::PTBTokenizerAnnotator.
|
100
|
+
def self.load_class(klass, base = 'edu.stanford.nlp.pipeline')
|
101
|
+
self.init unless @@initialized
|
102
|
+
const_set(klass.intern, Rjb::import("#{base}.#{klass}"))
|
103
|
+
end
|
104
|
+
|
88
105
|
# Create a java.util.Properties object from a hash.
|
89
106
|
def self.get_properties(properties)
|
90
107
|
props = Properties.new
|
@@ -94,4 +111,9 @@ module StanfordCoreNLP
|
|
94
111
|
props
|
95
112
|
end
|
96
113
|
|
114
|
+
# Helper function: under_case -> CamelCase.
|
115
|
+
def self.camel_case(text)
|
116
|
+
text.to_s.gsub(/^[a-z]|_[a-z]/) { |a| a.upcase }.gsub('_', '')
|
117
|
+
end
|
118
|
+
|
97
119
|
end
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module StanfordCoreNLP
|
2
|
+
|
3
|
+
# Modify the Rjb JavaProxy class to add our own methods to every Java object.
|
4
|
+
Rjb::Rjb_JavaProxy.class_eval do
|
5
|
+
|
6
|
+
# Dynamically defined on all proxied Java objects.
|
7
|
+
# Shorthand for to_string defined by Java classes.
|
8
|
+
def to_s; to_string; end
|
9
|
+
|
10
|
+
# Dynamically defined on all proxied Java iterators.
|
11
|
+
# Provide Ruby-style iterators to wrap Java iterators.
|
12
|
+
def each
|
13
|
+
if !java_methods.include?('iterator()')
|
14
|
+
raise 'This object cannot be iterated.'
|
15
|
+
else
|
16
|
+
i = self.iterator
|
17
|
+
while i.has_next; yield i.next; end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,400 @@
|
|
1
|
+
module StanfordCoreNLP
|
2
|
+
|
3
|
+
Annotations = {
|
4
|
+
|
5
|
+
'nlp.trees.international.pennchinese.ChineseGrammaticalRelations' => [
|
6
|
+
'AdjectivalModifierGRAnnotation',
|
7
|
+
'AdverbialModifierGRAnnotation',
|
8
|
+
'ArgumentGRAnnotation',
|
9
|
+
'AspectMarkerGRAnnotation',
|
10
|
+
'AssociativeMarkerGRAnnotation',
|
11
|
+
'AssociativeModifierGRAnnotation',
|
12
|
+
'AttributiveGRAnnotation',
|
13
|
+
'AuxModifierGRAnnotation',
|
14
|
+
'AuxPassiveGRAnnotation',
|
15
|
+
'BaGRAnnotation',
|
16
|
+
'ClausalComplementGRAnnotation',
|
17
|
+
'ClausalSubjectGRAnnotation',
|
18
|
+
'ClauseModifierGRAnnotation',
|
19
|
+
'ComplementGRAnnotation',
|
20
|
+
'ComplementizerGRAnnotation',
|
21
|
+
'ControllingSubjectGRAnnotation',
|
22
|
+
'CoordinationGRAnnotation',
|
23
|
+
'DeterminerGRAnnotation',
|
24
|
+
'DirectObjectGRAnnotation',
|
25
|
+
'DvpMarkerGRAnnotation',
|
26
|
+
'DvpModifierGRAnnotation',
|
27
|
+
'EtcGRAnnotation',
|
28
|
+
'LocalizerComplementGRAnnotation',
|
29
|
+
'ModalGRAnnotation',
|
30
|
+
'ModifierGRAnnotation',
|
31
|
+
'NegationModifierGRAnnotation',
|
32
|
+
'NominalPassiveSubjectGRAnnotation',
|
33
|
+
'NominalSubjectGRAnnotation',
|
34
|
+
'NounCompoundModifierGRAnnotation',
|
35
|
+
'NumberModifierGRAnnotation',
|
36
|
+
'NumericModifierGRAnnotation',
|
37
|
+
'ObjectGRAnnotation',
|
38
|
+
'OrdNumberGRAnnotation',
|
39
|
+
'ParentheticalGRAnnotation',
|
40
|
+
'ParticipialModifierGRAnnotation',
|
41
|
+
'PreconjunctGRAnnotation',
|
42
|
+
'PrepositionalLocalizerModifierGRAnnotation',
|
43
|
+
'PrepositionalModifierGRAnnotation',
|
44
|
+
'PrepositionalObjectGRAnnotation',
|
45
|
+
'PunctuationGRAnnotation',
|
46
|
+
'RangeGRAnnotation',
|
47
|
+
'RelativeClauseModifierGRAnnotation',
|
48
|
+
'ResultativeComplementGRAnnotation',
|
49
|
+
'SemanticDependentGRAnnotation',
|
50
|
+
'SubjectGRAnnotation',
|
51
|
+
'TemporalClauseGRAnnotation',
|
52
|
+
'TemporalGRAnnotation',
|
53
|
+
'TimePostpositionGRAnnotation',
|
54
|
+
'TopicGRAnnotation',
|
55
|
+
'VerbCompoundGRAnnotation',
|
56
|
+
'VerbModifierGRAnnotation',
|
57
|
+
'XClausalComplementGRAnnotation'
|
58
|
+
],
|
59
|
+
|
60
|
+
'nlp.dcoref.CoNLL2011DocumentReader' => [
|
61
|
+
'CorefMentionAnnotation',
|
62
|
+
'NamedEntityAnnotation'
|
63
|
+
],
|
64
|
+
|
65
|
+
'nlp.ling.CoreAnnotations' => [
|
66
|
+
|
67
|
+
'AbbrAnnotation',
|
68
|
+
'AbgeneAnnotation',
|
69
|
+
'AbstrAnnotation',
|
70
|
+
'AfterAnnotation',
|
71
|
+
'AnswerAnnotation',
|
72
|
+
'AnswerObjectAnnotation',
|
73
|
+
'AntecedentAnnotation',
|
74
|
+
'ArgDescendentAnnotation',
|
75
|
+
'ArgumentAnnotation',
|
76
|
+
'BagOfWordsAnnotation',
|
77
|
+
'BeAnnotation',
|
78
|
+
'BeforeAnnotation',
|
79
|
+
'BeginIndexAnnotation',
|
80
|
+
'BestCliquesAnnotation',
|
81
|
+
'BestFullAnnotation',
|
82
|
+
'CalendarAnnotation',
|
83
|
+
'CategoryAnnotation',
|
84
|
+
'CategoryFunctionalTagAnnotation',
|
85
|
+
'CharacterOffsetBeginAnnotation',
|
86
|
+
'CharacterOffsetEndAnnotation',
|
87
|
+
'CharAnnotation',
|
88
|
+
'ChineseCharAnnotation',
|
89
|
+
'ChineseIsSegmentedAnnotation',
|
90
|
+
'ChineseOrigSegAnnotation',
|
91
|
+
'ChineseSegAnnotation',
|
92
|
+
'ChunkAnnotation',
|
93
|
+
'CoarseTagAnnotation',
|
94
|
+
'CommonWordsAnnotation',
|
95
|
+
'CoNLLDepAnnotation',
|
96
|
+
'CoNLLDepParentIndexAnnotation',
|
97
|
+
'CoNLLDepTypeAnnotation',
|
98
|
+
'CoNLLPredicateAnnotation',
|
99
|
+
'CoNLLSRLAnnotation',
|
100
|
+
'ContextsAnnotation',
|
101
|
+
'CopyAnnotation',
|
102
|
+
'CostMagnificationAnnotation',
|
103
|
+
'CovertIDAnnotation',
|
104
|
+
'D2_LBeginAnnotation',
|
105
|
+
'D2_LEndAnnotation',
|
106
|
+
'D2_LMiddleAnnotation',
|
107
|
+
'DayAnnotation',
|
108
|
+
'DependentsAnnotation',
|
109
|
+
'DictAnnotation',
|
110
|
+
'DistSimAnnotation',
|
111
|
+
'DoAnnotation',
|
112
|
+
'DocDateAnnotation',
|
113
|
+
'DocIDAnnotation',
|
114
|
+
'DomainAnnotation',
|
115
|
+
'EndIndexAnnotation',
|
116
|
+
'EntityClassAnnotation',
|
117
|
+
'EntityRuleAnnotation',
|
118
|
+
'EntityTypeAnnotation',
|
119
|
+
'FeaturesAnnotation',
|
120
|
+
'FemaleGazAnnotation',
|
121
|
+
'FirstChildAnnotation',
|
122
|
+
'ForcedSentenceEndAnnotation',
|
123
|
+
'FreqAnnotation',
|
124
|
+
'GazAnnotation',
|
125
|
+
'GazetteerAnnotation',
|
126
|
+
'GenericTokensAnnotation',
|
127
|
+
'GeniaAnnotation',
|
128
|
+
'GoldAnswerAnnotation',
|
129
|
+
'GovernorAnnotation',
|
130
|
+
'GrandparentAnnotation',
|
131
|
+
'HaveAnnotation',
|
132
|
+
'HeadWordStringAnnotation',
|
133
|
+
'HeightAnnotation',
|
134
|
+
'IDAnnotation',
|
135
|
+
'IDFAnnotation',
|
136
|
+
'INAnnotation',
|
137
|
+
'IndexAnnotation',
|
138
|
+
'InterpretationAnnotation',
|
139
|
+
'IsDateRangeAnnotation',
|
140
|
+
'IsURLAnnotation',
|
141
|
+
'LabelAnnotation',
|
142
|
+
'LastGazAnnotation',
|
143
|
+
'LastTaggedAnnotation',
|
144
|
+
'LBeginAnnotation',
|
145
|
+
'LeftChildrenNodeAnnotation',
|
146
|
+
'LeftTermAnnotation',
|
147
|
+
'LemmaAnnotation',
|
148
|
+
'LEndAnnotation',
|
149
|
+
'LengthAnnotation',
|
150
|
+
'LMiddleAnnotation',
|
151
|
+
'MaleGazAnnotation',
|
152
|
+
'MarkingAnnotation',
|
153
|
+
'MonthAnnotation',
|
154
|
+
'MorphoCaseAnnotation',
|
155
|
+
'MorphoGenAnnotation',
|
156
|
+
'MorphoNumAnnotation',
|
157
|
+
'MorphoPersAnnotation',
|
158
|
+
'NamedEntityTagAnnotation',
|
159
|
+
'NeighborsAnnotation',
|
160
|
+
'NERIDAnnotation',
|
161
|
+
'NormalizedNamedEntityTagAnnotation',
|
162
|
+
'NotAnnotation',
|
163
|
+
'NumericCompositeObjectAnnotation',
|
164
|
+
'NumericCompositeTypeAnnotation',
|
165
|
+
'NumericCompositeValueAnnotation',
|
166
|
+
'NumericObjectAnnotation',
|
167
|
+
'NumericTypeAnnotation',
|
168
|
+
'NumericValueAnnotation',
|
169
|
+
'NumerizedTokensAnnotation',
|
170
|
+
'NumTxtSentencesAnnotation',
|
171
|
+
'OriginalAnswerAnnotation',
|
172
|
+
'OriginalCharAnnotation',
|
173
|
+
'OriginalTextAnnotation',
|
174
|
+
'ParagraphAnnotation',
|
175
|
+
'ParagraphsAnnotation',
|
176
|
+
'ParaPositionAnnotation',
|
177
|
+
'ParentAnnotation',
|
178
|
+
'PartOfSpeechAnnotation',
|
179
|
+
'PercentAnnotation',
|
180
|
+
'PhraseWordsAnnotation',
|
181
|
+
'PhraseWordsTagAnnotation',
|
182
|
+
'PolarityAnnotation',
|
183
|
+
'PositionAnnotation',
|
184
|
+
'PossibleAnswersAnnotation',
|
185
|
+
'PredictedAnswerAnnotation',
|
186
|
+
'PrevChildAnnotation',
|
187
|
+
'PriorAnnotation',
|
188
|
+
'ProjectedCategoryAnnotation',
|
189
|
+
'ProtoAnnotation',
|
190
|
+
'RoleAnnotation',
|
191
|
+
'SectionAnnotation',
|
192
|
+
'SemanticHeadTagAnnotation',
|
193
|
+
'SemanticHeadWordAnnotation',
|
194
|
+
'SemanticTagAnnotation',
|
195
|
+
'SemanticWordAnnotation',
|
196
|
+
'SentenceIDAnnotation',
|
197
|
+
'SentenceIndexAnnotation',
|
198
|
+
'SentencePositionAnnotation',
|
199
|
+
'SentencesAnnotation',
|
200
|
+
'ShapeAnnotation',
|
201
|
+
'SpaceBeforeAnnotation',
|
202
|
+
'SpanAnnotation',
|
203
|
+
'SpeakerAnnotation',
|
204
|
+
'SRL_ID',
|
205
|
+
'SRLIDAnnotation',
|
206
|
+
'SRLInstancesAnnotation',
|
207
|
+
'StackedNamedEntityTagAnnotation',
|
208
|
+
'StateAnnotation',
|
209
|
+
'StemAnnotation',
|
210
|
+
'SubcategorizationAnnotation',
|
211
|
+
'TagLabelAnnotation',
|
212
|
+
'TextAnnotation',
|
213
|
+
'TokenBeginAnnotation',
|
214
|
+
'TokenEndAnnotation',
|
215
|
+
'TokensAnnotation',
|
216
|
+
'TopicAnnotation',
|
217
|
+
'TrueCaseAnnotation',
|
218
|
+
'TrueCaseTextAnnotation',
|
219
|
+
'TrueTagAnnotation',
|
220
|
+
'UBlockAnnotation',
|
221
|
+
'UnaryAnnotation',
|
222
|
+
'UnknownAnnotation',
|
223
|
+
'UtteranceAnnotation',
|
224
|
+
'UTypeAnnotation',
|
225
|
+
'ValueAnnotation',
|
226
|
+
'VerbSenseAnnotation',
|
227
|
+
'WebAnnotation',
|
228
|
+
'WordFormAnnotation',
|
229
|
+
'WordnetSynAnnotation',
|
230
|
+
'WordPositionAnnotation',
|
231
|
+
'WordSenseAnnotation',
|
232
|
+
'XmlContextAnnotation',
|
233
|
+
'XmlElementAnnotation',
|
234
|
+
'YearAnnotation'
|
235
|
+
],
|
236
|
+
|
237
|
+
'nlp.dcoref.CorefCoreAnnotations' => [
|
238
|
+
|
239
|
+
'CorefAnnotation',
|
240
|
+
'CorefChainAnnotation',
|
241
|
+
'CorefClusterAnnotation',
|
242
|
+
'CorefClusterIdAnnotation',
|
243
|
+
'CorefDestAnnotation',
|
244
|
+
'CorefGraphAnnotation'
|
245
|
+
],
|
246
|
+
|
247
|
+
'nlp.ling.CoreLabel' => [
|
248
|
+
'GenericAnnotation'
|
249
|
+
],
|
250
|
+
|
251
|
+
'nlp.trees.EnglishGrammaticalRelations' => [
|
252
|
+
'AbbreviationModifierGRAnnotation',
|
253
|
+
'AdjectivalComplementGRAnnotation',
|
254
|
+
'AdjectivalModifierGRAnnotation',
|
255
|
+
'AdvClauseModifierGRAnnotation',
|
256
|
+
'AdverbialModifierGRAnnotation',
|
257
|
+
'AgentGRAnnotation',
|
258
|
+
'AppositionalModifierGRAnnotation',
|
259
|
+
'ArgumentGRAnnotation',
|
260
|
+
'AttributiveGRAnnotation',
|
261
|
+
'AuxModifierGRAnnotation',
|
262
|
+
'AuxPassiveGRAnnotation',
|
263
|
+
'ClausalComplementGRAnnotation',
|
264
|
+
'ClausalPassiveSubjectGRAnnotation',
|
265
|
+
'ClausalSubjectGRAnnotation',
|
266
|
+
'ComplementGRAnnotation',
|
267
|
+
'ComplementizerGRAnnotation',
|
268
|
+
'ConjunctGRAnnotation',
|
269
|
+
'ControllingSubjectGRAnnotation',
|
270
|
+
'CoordinationGRAnnotation',
|
271
|
+
'CopulaGRAnnotation',
|
272
|
+
'DeterminerGRAnnotation',
|
273
|
+
'DirectObjectGRAnnotation',
|
274
|
+
'ExpletiveGRAnnotation',
|
275
|
+
'IndirectObjectGRAnnotation',
|
276
|
+
'InfinitivalModifierGRAnnotation',
|
277
|
+
'MarkerGRAnnotation',
|
278
|
+
'ModifierGRAnnotation',
|
279
|
+
'MultiWordExpressionGRAnnotation',
|
280
|
+
'NegationModifierGRAnnotation',
|
281
|
+
'NominalPassiveSubjectGRAnnotation',
|
282
|
+
'NominalSubjectGRAnnotation',
|
283
|
+
'NounCompoundModifierGRAnnotation',
|
284
|
+
'NpAdverbialModifierGRAnnotation',
|
285
|
+
'NumberModifierGRAnnotation',
|
286
|
+
'NumericModifierGRAnnotation',
|
287
|
+
'ObjectGRAnnotation',
|
288
|
+
'ParataxisGRAnnotation',
|
289
|
+
'ParticipialModifierGRAnnotation',
|
290
|
+
'PhrasalVerbParticleGRAnnotation',
|
291
|
+
'PossessionModifierGRAnnotation',
|
292
|
+
'PossessiveModifierGRAnnotation',
|
293
|
+
'PreconjunctGRAnnotation',
|
294
|
+
'PredeterminerGRAnnotation',
|
295
|
+
'PredicateGRAnnotation',
|
296
|
+
'PrepositionalComplementGRAnnotation',
|
297
|
+
'PrepositionalModifierGRAnnotation',
|
298
|
+
'PrepositionalObjectGRAnnotation',
|
299
|
+
'PunctuationGRAnnotation',
|
300
|
+
'PurposeClauseModifierGRAnnotation',
|
301
|
+
'QuantifierModifierGRAnnotation',
|
302
|
+
'ReferentGRAnnotation',
|
303
|
+
'RelativeClauseModifierGRAnnotation',
|
304
|
+
'RelativeGRAnnotation',
|
305
|
+
'SemanticDependentGRAnnotation',
|
306
|
+
'SubjectGRAnnotation',
|
307
|
+
'TemporalModifierGRAnnotation',
|
308
|
+
'XClausalComplementGRAnnotation'
|
309
|
+
],
|
310
|
+
|
311
|
+
'nlp.trees.GrammaticalRelation' => [
|
312
|
+
'DependentGRAnnotation',
|
313
|
+
'GovernorGRAnnotation',
|
314
|
+
'GrammaticalRelationAnnotation',
|
315
|
+
'KillGRAnnotation',
|
316
|
+
'Language',
|
317
|
+
'RootGRAnnotation'
|
318
|
+
],
|
319
|
+
|
320
|
+
'nlp.ie.machinereading.structure.MachineReadingAnnotations' => [
|
321
|
+
'DependencyAnnotation',
|
322
|
+
'DocumentDirectoryAnnotation',
|
323
|
+
'DocumentIdAnnotation',
|
324
|
+
'EntityMentionsAnnotation',
|
325
|
+
'EventMentionsAnnotation',
|
326
|
+
'GenderAnnotation',
|
327
|
+
'RelationMentionsAnnotation',
|
328
|
+
'TriggerAnnotation'
|
329
|
+
],
|
330
|
+
|
331
|
+
'nlp.parser.lexparser.ParserAnnotations' => [
|
332
|
+
'ConstraintAnnotation'
|
333
|
+
],
|
334
|
+
|
335
|
+
'nlp.trees.semgraph.SemanticGraphCoreAnnotations' => [
|
336
|
+
'SemanticGraphBasicDependenciesAnnotation',
|
337
|
+
'SemanticGraphCollapsedCCProcessedDependenciesAnnotation',
|
338
|
+
'SemanticGraphCollapsedDependenciesAnnotation'
|
339
|
+
],
|
340
|
+
|
341
|
+
'nlp.time.TimeAnnotations' => [
|
342
|
+
'TimexAnnotation',
|
343
|
+
'TimexAnnotations'
|
344
|
+
],
|
345
|
+
|
346
|
+
'nlp.time.TimeExpression' => [
|
347
|
+
'Annotation',
|
348
|
+
'ChildrenAnnotation'
|
349
|
+
],
|
350
|
+
|
351
|
+
'nlp.trees.TreeCoreAnnotations' => [
|
352
|
+
'TreeHeadTagAnnotation',
|
353
|
+
'TreeHeadWordAnnotation',
|
354
|
+
'TreeAnnotation'
|
355
|
+
]
|
356
|
+
}
|
357
|
+
|
358
|
+
annotations_by_name = {}
|
359
|
+
Annotations.each do |base_class, annotation_classes|
|
360
|
+
annotation_classes.each do |annotation_class|
|
361
|
+
annotations_by_name[annotation_class] ||= []
|
362
|
+
annotations_by_name[annotation_class] << base_class
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
AnnotationsByName = annotations_by_name
|
367
|
+
|
368
|
+
# Modify the Rjb JavaProxy class to add our own method to get annotations.
|
369
|
+
Rjb::Rjb_JavaProxy.class_eval do
|
370
|
+
|
371
|
+
# Dynamically defined on all proxied annotation classes.
|
372
|
+
# Get an annotation using the annotation bridge.
|
373
|
+
def get(annotation, anno_base = nil)
|
374
|
+
if !java_methods.include?('get(Ljava.lang.Class;)')
|
375
|
+
raise'No annotation can be retrieved on this object.'
|
376
|
+
else
|
377
|
+
anno_class = "#{StanfordCoreNLP.camel_case(annotation)}Annotation"
|
378
|
+
if anno_base
|
379
|
+
raise "The path #{anno_base} doesn't exist." unless Annotations[anno_base]
|
380
|
+
anno_bases = [anno_base]
|
381
|
+
else
|
382
|
+
anno_bases = AnnotationsByName[anno_class]
|
383
|
+
raise "The annotation #{anno_class} doesn't exist." unless anno_bases
|
384
|
+
end
|
385
|
+
if anno_bases.size > 1
|
386
|
+
msg = "There are many different annotations bearing the name #{anno_class}. "
|
387
|
+
msg << "Please specify one of the following base classes as second parameter to disambiguate: "
|
388
|
+
msg << anno_bases.join(',')
|
389
|
+
raise msg
|
390
|
+
else
|
391
|
+
base_class = anno_bases[0]
|
392
|
+
end
|
393
|
+
url = "edu.stanford.#{base_class}$#{anno_class}"
|
394
|
+
AnnotationBridge.getAnnotation(self, url)
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
end
|
399
|
+
|
400
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-core-nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-01-28 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rjb
|
16
|
-
requirement: &
|
16
|
+
requirement: &70145364951100 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,18 +21,19 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70145364951100
|
25
25
|
description: ! " High-level Ruby bindings to the Stanford CoreNLP package, a set natural
|
26
26
|
language processing \ntools for English, including tokenization, part-of-speech
|
27
|
-
tagging, lemmatization, named entity recognition
|
27
|
+
tagging, lemmatization, named entity recognition,\nparsing, and coreference resolution. "
|
28
28
|
email:
|
29
29
|
- louis.mullie@gmail.com
|
30
30
|
executables: []
|
31
31
|
extensions: []
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
|
-
- lib/stanford-core-nlp/
|
35
|
-
- lib/stanford-core-nlp/
|
34
|
+
- lib/stanford-core-nlp/jar_loader.rb
|
35
|
+
- lib/stanford-core-nlp/java_wrapper.rb
|
36
|
+
- lib/stanford-core-nlp/stanford_annotations.rb
|
36
37
|
- lib/stanford-core-nlp.rb
|
37
38
|
- bin/bridge.jar
|
38
39
|
- bin/classifiers/all.3class.distsim.crf.ser.gz
|
@@ -1,37 +0,0 @@
|
|
1
|
-
module StanfordCoreNLP
|
2
|
-
|
3
|
-
# Modify the Rjb JavaProxy class to add our own methods to every Java object.
|
4
|
-
Rjb::Rjb_JavaProxy.class_eval do
|
5
|
-
|
6
|
-
# Dynamically defined on all proxied Java objects.
|
7
|
-
# Shorthand for to_string defined by Java classes.
|
8
|
-
def to_s; to_string; end
|
9
|
-
|
10
|
-
# Dynamically defined on all proxied Java iterators.
|
11
|
-
# Provide Ruby-style iterators to wrap Java iterators.
|
12
|
-
def each
|
13
|
-
if !java_methods.include?('iterator()')
|
14
|
-
raise 'This object cannot be iterated.'
|
15
|
-
else
|
16
|
-
i = self.iterator
|
17
|
-
while i.has_next; yield i.next; end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
# Dynamically defined on all proxied annotation classes.
|
22
|
-
# Get an annotation using the annotation bridge.
|
23
|
-
def get(annotation)
|
24
|
-
if !java_methods.include?('get(Ljava.lang.Class;)')
|
25
|
-
raise 'No annotation can be retrieved on this object.'
|
26
|
-
else
|
27
|
-
base_class = (annotation.to_s.split('_')[0] == 'coref') ?
|
28
|
-
'edu.stanford.nlp.dcoref.CorefCoreAnnotations$' :
|
29
|
-
'edu.stanford.nlp.ling.CoreAnnotations$'
|
30
|
-
anno_class = annotation.to_s.gsub(/^[a-z]|_[a-z]/) { |a| a.upcase }.gsub('_', '')
|
31
|
-
url = "#{base_class}#{anno_class}Annotation"
|
32
|
-
AnnotationBridge.getAnnotation(self, url)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
end
|