RubyGems - stanford-core-nlp - Versions diffs - 0.1.1 → 0.1.2 - Mend

stanford-core-nlp 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/lib/stanford-core-nlp.rb +29 -7
data/lib/stanford-core-nlp/{jar-loader.rb → jar_loader.rb} +0 -0
data/lib/stanford-core-nlp/java_wrapper.rb +22 -0
data/lib/stanford-core-nlp/stanford_annotations.rb +400 -0
metadata +7 -6
data/lib/stanford-core-nlp/java-wrapper.rb +0 -37

data/lib/stanford-core-nlp.rb CHANGED

@@ -1,9 +1,10 @@
 module StanfordCoreNLP
-  VERSION = '0.1.1'
-  require 'stanford-core-nlp/jar-loader.rb'
-  require 'stanford-core-nlp/java-wrapper.rb'
+  VERSION = '0.1.2'
+  require 'stanford-core-nlp/jar_loader.rb'
+  require 'stanford-core-nlp/java_wrapper'
+  require 'stanford-core-nlp/stanford_annotations'
   class << self
     # The path in which to look for the Stanford JAR files.
     # This is passed to JarLoader.
@@ -51,11 +52,18 @@ module StanfordCoreNLP
     self.model_files[name] = file
   end
+  @@initialized = false
+  # Load the JARs, create the classes.
+  def self.init
+    self.load_jars(self.jvm_args, self.jar_path, self.log_file)
+    self.create_classes
+    @@initialized = true
+  end
   # Load a StanfordCoreNLP pipeline with the specified JVM flags and
   # StanfordCoreNLP properties (hash of property => values).
   def self.load(*annotators)
-    self.load_jars(self.jvm_args, self.jar_path, self.log_file)
-    self.create_classes
+    self.init unless @@initialized
     # Prepend the JAR path to the model files.
     properties = {}
     self.model_files.each { |k,v| properties[k] = self.jar_path + v }
@@ -84,7 +92,16 @@ module StanfordCoreNLP
     const_set(:Properties, Rjb::import('java.util.Properties'))
     const_set(:AnnotationBridge, Rjb::import('AnnotationBridge'))
   end
+  # Load a class (e.g. PTBTokenizerAnnotator) in a specific
+  # class path (default is 'edu.stanford.nlp.pipeline').
+  # The class is then accessible under the StanfordCoreNLP
+  # namespace, e.g. StanfordCoreNLP::PTBTokenizerAnnotator.
+  def self.load_class(klass, base = 'edu.stanford.nlp.pipeline')
+    self.init unless @@initialized
+    const_set(klass.intern, Rjb::import("#{base}.#{klass}"))
+  end
   # Create a java.util.Properties object from a hash.
   def self.get_properties(properties)
     props = Properties.new
@@ -94,4 +111,9 @@ module StanfordCoreNLP
     props
   end
+  # Helper function: under_case -> CamelCase.
+  def self.camel_case(text)
+    text.to_s.gsub(/^[a-z]|_[a-z]/) { |a| a.upcase }.gsub('_', '')
+  end
 end

data/lib/stanford-core-nlp/{jar-loader.rb → jar_loader.rb} RENAMED

File without changes

data/lib/stanford-core-nlp/java_wrapper.rb ADDED

@@ -0,0 +1,22 @@
+module StanfordCoreNLP
+  # Modify the Rjb JavaProxy class to add our own methods to every Java object.
+  Rjb::Rjb_JavaProxy.class_eval do
+    # Dynamically defined on all proxied Java objects.
+    # Shorthand for to_string defined by Java classes.
+    def to_s; to_string; end
+    # Dynamically defined on all proxied Java iterators.
+    # Provide Ruby-style iterators to wrap Java iterators.
+    def each
+      if !java_methods.include?('iterator()')
+        raise 'This object cannot be iterated.'
+      else
+        i = self.iterator
+        while i.has_next; yield i.next; end
+      end
+    end
+  end
+end

data/lib/stanford-core-nlp/stanford_annotations.rb ADDED

@@ -0,0 +1,400 @@
+module StanfordCoreNLP
+  Annotations = {
+   'nlp.trees.international.pennchinese.ChineseGrammaticalRelations' => [
+     'AdjectivalModifierGRAnnotation',
+     'AdverbialModifierGRAnnotation',
+     'ArgumentGRAnnotation',
+     'AspectMarkerGRAnnotation',
+     'AssociativeMarkerGRAnnotation',
+     'AssociativeModifierGRAnnotation',
+     'AttributiveGRAnnotation',
+     'AuxModifierGRAnnotation',
+     'AuxPassiveGRAnnotation',
+     'BaGRAnnotation',
+     'ClausalComplementGRAnnotation',
+     'ClausalSubjectGRAnnotation',
+     'ClauseModifierGRAnnotation',
+     'ComplementGRAnnotation',
+     'ComplementizerGRAnnotation',
+     'ControllingSubjectGRAnnotation',
+     'CoordinationGRAnnotation',
+     'DeterminerGRAnnotation',
+     'DirectObjectGRAnnotation',
+     'DvpMarkerGRAnnotation',
+     'DvpModifierGRAnnotation',
+     'EtcGRAnnotation',
+     'LocalizerComplementGRAnnotation',
+     'ModalGRAnnotation',
+     'ModifierGRAnnotation',
+     'NegationModifierGRAnnotation',
+     'NominalPassiveSubjectGRAnnotation',
+     'NominalSubjectGRAnnotation',
+     'NounCompoundModifierGRAnnotation',
+     'NumberModifierGRAnnotation',
+     'NumericModifierGRAnnotation',
+     'ObjectGRAnnotation',
+     'OrdNumberGRAnnotation',
+     'ParentheticalGRAnnotation',
+     'ParticipialModifierGRAnnotation',
+     'PreconjunctGRAnnotation',
+     'PrepositionalLocalizerModifierGRAnnotation',
+     'PrepositionalModifierGRAnnotation',
+     'PrepositionalObjectGRAnnotation',
+     'PunctuationGRAnnotation',
+     'RangeGRAnnotation',
+     'RelativeClauseModifierGRAnnotation',
+     'ResultativeComplementGRAnnotation',
+     'SemanticDependentGRAnnotation',
+     'SubjectGRAnnotation',
+     'TemporalClauseGRAnnotation',
+     'TemporalGRAnnotation',
+     'TimePostpositionGRAnnotation',
+     'TopicGRAnnotation',
+     'VerbCompoundGRAnnotation',
+     'VerbModifierGRAnnotation',
+     'XClausalComplementGRAnnotation'
+    ],
+   'nlp.dcoref.CoNLL2011DocumentReader' => [
+     'CorefMentionAnnotation',
+     'NamedEntityAnnotation'
+    ],
+   'nlp.ling.CoreAnnotations' => [
+     'AbbrAnnotation',
+     'AbgeneAnnotation',
+     'AbstrAnnotation',
+     'AfterAnnotation',
+     'AnswerAnnotation',
+     'AnswerObjectAnnotation',
+     'AntecedentAnnotation',
+     'ArgDescendentAnnotation',
+     'ArgumentAnnotation',
+     'BagOfWordsAnnotation',
+     'BeAnnotation',
+     'BeforeAnnotation',
+     'BeginIndexAnnotation',
+     'BestCliquesAnnotation',
+     'BestFullAnnotation',
+     'CalendarAnnotation',
+     'CategoryAnnotation',
+     'CategoryFunctionalTagAnnotation',
+     'CharacterOffsetBeginAnnotation',
+     'CharacterOffsetEndAnnotation',
+     'CharAnnotation',
+     'ChineseCharAnnotation',
+     'ChineseIsSegmentedAnnotation',
+     'ChineseOrigSegAnnotation',
+     'ChineseSegAnnotation',
+     'ChunkAnnotation',
+     'CoarseTagAnnotation',
+     'CommonWordsAnnotation',
+     'CoNLLDepAnnotation',
+     'CoNLLDepParentIndexAnnotation',
+     'CoNLLDepTypeAnnotation',
+     'CoNLLPredicateAnnotation',
+     'CoNLLSRLAnnotation',
+     'ContextsAnnotation',
+     'CopyAnnotation',
+     'CostMagnificationAnnotation',
+     'CovertIDAnnotation',
+     'D2_LBeginAnnotation',
+     'D2_LEndAnnotation',
+     'D2_LMiddleAnnotation',
+     'DayAnnotation',
+     'DependentsAnnotation',
+     'DictAnnotation',
+     'DistSimAnnotation',
+     'DoAnnotation',
+     'DocDateAnnotation',
+     'DocIDAnnotation',
+     'DomainAnnotation',
+     'EndIndexAnnotation',
+     'EntityClassAnnotation',
+     'EntityRuleAnnotation',
+     'EntityTypeAnnotation',
+     'FeaturesAnnotation',
+     'FemaleGazAnnotation',
+     'FirstChildAnnotation',
+     'ForcedSentenceEndAnnotation',
+     'FreqAnnotation',
+     'GazAnnotation',
+     'GazetteerAnnotation',
+     'GenericTokensAnnotation',
+     'GeniaAnnotation',
+     'GoldAnswerAnnotation',
+     'GovernorAnnotation',
+     'GrandparentAnnotation',
+     'HaveAnnotation',
+     'HeadWordStringAnnotation',
+     'HeightAnnotation',
+     'IDAnnotation',
+     'IDFAnnotation',
+     'INAnnotation',
+     'IndexAnnotation',
+     'InterpretationAnnotation',
+     'IsDateRangeAnnotation',
+     'IsURLAnnotation',
+     'LabelAnnotation',
+     'LastGazAnnotation',
+     'LastTaggedAnnotation',
+     'LBeginAnnotation',
+     'LeftChildrenNodeAnnotation',
+     'LeftTermAnnotation',
+     'LemmaAnnotation',
+     'LEndAnnotation',
+     'LengthAnnotation',
+     'LMiddleAnnotation',
+     'MaleGazAnnotation',
+     'MarkingAnnotation',
+     'MonthAnnotation',
+     'MorphoCaseAnnotation',
+     'MorphoGenAnnotation',
+     'MorphoNumAnnotation',
+     'MorphoPersAnnotation',
+     'NamedEntityTagAnnotation',
+     'NeighborsAnnotation',
+     'NERIDAnnotation',
+     'NormalizedNamedEntityTagAnnotation',
+     'NotAnnotation',
+     'NumericCompositeObjectAnnotation',
+     'NumericCompositeTypeAnnotation',
+     'NumericCompositeValueAnnotation',
+     'NumericObjectAnnotation',
+     'NumericTypeAnnotation',
+     'NumericValueAnnotation',
+     'NumerizedTokensAnnotation',
+     'NumTxtSentencesAnnotation',
+     'OriginalAnswerAnnotation',
+     'OriginalCharAnnotation',
+     'OriginalTextAnnotation',
+     'ParagraphAnnotation',
+     'ParagraphsAnnotation',
+     'ParaPositionAnnotation',
+     'ParentAnnotation',
+     'PartOfSpeechAnnotation',
+     'PercentAnnotation',
+     'PhraseWordsAnnotation',
+     'PhraseWordsTagAnnotation',
+     'PolarityAnnotation',
+     'PositionAnnotation',
+     'PossibleAnswersAnnotation',
+     'PredictedAnswerAnnotation',
+     'PrevChildAnnotation',
+     'PriorAnnotation',
+     'ProjectedCategoryAnnotation',
+     'ProtoAnnotation',
+     'RoleAnnotation',
+     'SectionAnnotation',
+     'SemanticHeadTagAnnotation',
+     'SemanticHeadWordAnnotation',
+     'SemanticTagAnnotation',
+     'SemanticWordAnnotation',
+     'SentenceIDAnnotation',
+     'SentenceIndexAnnotation',
+     'SentencePositionAnnotation',
+     'SentencesAnnotation',
+     'ShapeAnnotation',
+     'SpaceBeforeAnnotation',
+     'SpanAnnotation',
+     'SpeakerAnnotation',
+     'SRL_ID',
+     'SRLIDAnnotation',
+     'SRLInstancesAnnotation',
+     'StackedNamedEntityTagAnnotation',
+     'StateAnnotation',
+     'StemAnnotation',
+     'SubcategorizationAnnotation',
+     'TagLabelAnnotation',
+     'TextAnnotation',
+     'TokenBeginAnnotation',
+     'TokenEndAnnotation',
+     'TokensAnnotation',
+     'TopicAnnotation',
+     'TrueCaseAnnotation',
+     'TrueCaseTextAnnotation',
+     'TrueTagAnnotation',
+     'UBlockAnnotation',
+     'UnaryAnnotation',
+     'UnknownAnnotation',
+     'UtteranceAnnotation',
+     'UTypeAnnotation',
+     'ValueAnnotation',
+     'VerbSenseAnnotation',
+     'WebAnnotation',
+     'WordFormAnnotation',
+     'WordnetSynAnnotation',
+     'WordPositionAnnotation',
+     'WordSenseAnnotation',
+     'XmlContextAnnotation',
+     'XmlElementAnnotation',
+     'YearAnnotation'
+    ],
+   'nlp.dcoref.CorefCoreAnnotations' => [
+     'CorefAnnotation',
+     'CorefChainAnnotation',
+     'CorefClusterAnnotation',
+     'CorefClusterIdAnnotation',
+     'CorefDestAnnotation',
+     'CorefGraphAnnotation'
+    ],
+   'nlp.ling.CoreLabel' => [
+     'GenericAnnotation'
+    ],
+   'nlp.trees.EnglishGrammaticalRelations' => [
+     'AbbreviationModifierGRAnnotation',
+     'AdjectivalComplementGRAnnotation',
+     'AdjectivalModifierGRAnnotation',
+     'AdvClauseModifierGRAnnotation',
+     'AdverbialModifierGRAnnotation',
+     'AgentGRAnnotation',
+     'AppositionalModifierGRAnnotation',
+     'ArgumentGRAnnotation',
+     'AttributiveGRAnnotation',
+     'AuxModifierGRAnnotation',
+     'AuxPassiveGRAnnotation',
+     'ClausalComplementGRAnnotation',
+     'ClausalPassiveSubjectGRAnnotation',
+     'ClausalSubjectGRAnnotation',
+     'ComplementGRAnnotation',
+     'ComplementizerGRAnnotation',
+     'ConjunctGRAnnotation',
+     'ControllingSubjectGRAnnotation',
+     'CoordinationGRAnnotation',
+     'CopulaGRAnnotation',
+     'DeterminerGRAnnotation',
+     'DirectObjectGRAnnotation',
+     'ExpletiveGRAnnotation',
+     'IndirectObjectGRAnnotation',
+     'InfinitivalModifierGRAnnotation',
+     'MarkerGRAnnotation',
+     'ModifierGRAnnotation',
+     'MultiWordExpressionGRAnnotation',
+     'NegationModifierGRAnnotation',
+     'NominalPassiveSubjectGRAnnotation',
+     'NominalSubjectGRAnnotation',
+     'NounCompoundModifierGRAnnotation',
+     'NpAdverbialModifierGRAnnotation',
+     'NumberModifierGRAnnotation',
+     'NumericModifierGRAnnotation',
+     'ObjectGRAnnotation',
+     'ParataxisGRAnnotation',
+     'ParticipialModifierGRAnnotation',
+     'PhrasalVerbParticleGRAnnotation',
+     'PossessionModifierGRAnnotation',
+     'PossessiveModifierGRAnnotation',
+     'PreconjunctGRAnnotation',
+     'PredeterminerGRAnnotation',
+     'PredicateGRAnnotation',
+     'PrepositionalComplementGRAnnotation',
+     'PrepositionalModifierGRAnnotation',
+     'PrepositionalObjectGRAnnotation',
+     'PunctuationGRAnnotation',
+     'PurposeClauseModifierGRAnnotation',
+     'QuantifierModifierGRAnnotation',
+     'ReferentGRAnnotation',
+     'RelativeClauseModifierGRAnnotation',
+     'RelativeGRAnnotation',
+     'SemanticDependentGRAnnotation',
+     'SubjectGRAnnotation',
+     'TemporalModifierGRAnnotation',
+     'XClausalComplementGRAnnotation'
+    ],
+   'nlp.trees.GrammaticalRelation' => [
+     'DependentGRAnnotation',
+     'GovernorGRAnnotation',
+     'GrammaticalRelationAnnotation',
+     'KillGRAnnotation',
+     'Language',
+     'RootGRAnnotation'
+    ],
+   'nlp.ie.machinereading.structure.MachineReadingAnnotations' => [
+     'DependencyAnnotation',
+     'DocumentDirectoryAnnotation',
+     'DocumentIdAnnotation',
+     'EntityMentionsAnnotation',
+     'EventMentionsAnnotation',
+     'GenderAnnotation',
+     'RelationMentionsAnnotation',
+     'TriggerAnnotation'
+    ],
+   'nlp.parser.lexparser.ParserAnnotations' => [
+     'ConstraintAnnotation'
+    ],
+   'nlp.trees.semgraph.SemanticGraphCoreAnnotations' => [
+     'SemanticGraphBasicDependenciesAnnotation',
+     'SemanticGraphCollapsedCCProcessedDependenciesAnnotation',
+     'SemanticGraphCollapsedDependenciesAnnotation'
+    ],
+   'nlp.time.TimeAnnotations' => [
+     'TimexAnnotation',
+     'TimexAnnotations'
+    ],
+   'nlp.time.TimeExpression' => [
+     'Annotation',
+     'ChildrenAnnotation'
+    ],
+   'nlp.trees.TreeCoreAnnotations' => [
+     'TreeHeadTagAnnotation',
+     'TreeHeadWordAnnotation',
+     'TreeAnnotation'
+    ]
+  }
+  annotations_by_name = {}
+  Annotations.each do |base_class, annotation_classes|
+    annotation_classes.each do |annotation_class|
+      annotations_by_name[annotation_class] ||= []
+      annotations_by_name[annotation_class] << base_class
+    end
+  end
+  AnnotationsByName = annotations_by_name
+  # Modify the Rjb JavaProxy class to add our own method to get annotations.
+  Rjb::Rjb_JavaProxy.class_eval do
+    # Dynamically defined on all proxied annotation classes.
+    # Get an annotation using the annotation bridge.
+    def get(annotation, anno_base = nil)
+      if !java_methods.include?('get(Ljava.lang.Class;)')
+        raise'No annotation can be retrieved on this object.'
+      else
+        anno_class = "#{StanfordCoreNLP.camel_case(annotation)}Annotation"
+        if anno_base
+          raise "The path #{anno_base} doesn't exist." unless Annotations[anno_base]
+           anno_bases = [anno_base]
+        else
+          anno_bases = AnnotationsByName[anno_class]
+          raise "The annotation #{anno_class} doesn't exist." unless anno_bases
+        end
+        if anno_bases.size > 1
+          msg = "There are many different annotations bearing the name #{anno_class}. "
+          msg << "Please specify one of the following base classes as second parameter to disambiguate: "
+          msg << anno_bases.join(',')
+          raise msg
+        else
+          base_class = anno_bases[0]
+        end
+        url = "edu.stanford.#{base_class}$#{anno_class}"
+        AnnotationBridge.getAnnotation(self, url)
+      end
+    end
+  end
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: stanford-core-nlp
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
   prerelease:
 platform: ruby
 authors:
@@ -13,7 +13,7 @@ date: 2012-01-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rjb
-  requirement: &70258761325680 !ruby/object:Gem::Requirement
+  requirement: &70145364951100 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,18 +21,19 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *70258761325680
+  version_requirements: *70145364951100
 description: ! " High-level Ruby bindings to the Stanford CoreNLP package, a set natural
   language processing \ntools for English, including tokenization, part-of-speech
-  tagging, lemmatization, named entity recognition, à\nparsing, and coreference resolution. "
+  tagging, lemmatization, named entity recognition,\nparsing, and coreference resolution. "
 email:
 - louis.mullie@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files: []
 files:
-- lib/stanford-core-nlp/jar-loader.rb
-- lib/stanford-core-nlp/java-wrapper.rb
+- lib/stanford-core-nlp/jar_loader.rb
+- lib/stanford-core-nlp/java_wrapper.rb
+- lib/stanford-core-nlp/stanford_annotations.rb
 - lib/stanford-core-nlp.rb
 - bin/bridge.jar
 - bin/classifiers/all.3class.distsim.crf.ser.gz

data/lib/stanford-core-nlp/java-wrapper.rb DELETED

@@ -1,37 +0,0 @@
-module StanfordCoreNLP
-  # Modify the Rjb JavaProxy class to add our own methods to every Java object.
-  Rjb::Rjb_JavaProxy.class_eval do
-    # Dynamically defined on all proxied Java objects.
-    # Shorthand for to_string defined by Java classes.
-    def to_s; to_string; end
-    # Dynamically defined on all proxied Java iterators.
-    # Provide Ruby-style iterators to wrap Java iterators.
-    def each
-      if !java_methods.include?('iterator()')
-        raise 'This object cannot be iterated.'
-      else
-        i = self.iterator
-        while i.has_next; yield i.next; end
-      end
-    end
-    # Dynamically defined on all proxied annotation classes.
-    # Get an annotation using the annotation bridge.
-    def get(annotation)
-      if !java_methods.include?('get(Ljava.lang.Class;)')
-        raise 'No annotation can be retrieved on this object.'
-      else
-        base_class = (annotation.to_s.split('_')[0] == 'coref') ?
-        'edu.stanford.nlp.dcoref.CorefCoreAnnotations$' :
-        'edu.stanford.nlp.ling.CoreAnnotations$'
-        anno_class = annotation.to_s.gsub(/^[a-z]|_[a-z]/) { |a| a.upcase }.gsub('_', '')
-        url = "#{base_class}#{anno_class}Annotation"
-        AnnotationBridge.getAnnotation(self, url)
-      end
-    end
-  end
-end