RubyGems - stanford-core-nlp - Versions diffs - 0.1.1 → 0.1.2 - Mend

stanford-core-nlp 0.1.1 → 0.1.2

Files changed (6) hide show

data/lib/stanford-core-nlp.rb +29 -7
data/lib/stanford-core-nlp/{jar-loader.rb → jar_loader.rb} +0 -0
data/lib/stanford-core-nlp/java_wrapper.rb +22 -0
data/lib/stanford-core-nlp/stanford_annotations.rb +400 -0
metadata +7 -6
data/lib/stanford-core-nlp/java-wrapper.rb +0 -37

@@ -1,9 +1,10 @@
 module StanfordCoreNLP
-  VERSION = '0.1.1'
-  require 'stanford-core-nlp/jar-loader.rb'
-  require 'stanford-core-nlp/java-wrapper.rb'
+  VERSION = '0.1.2'
+  require 'stanford-core-nlp/jar_loader.rb'
+  require 'stanford-core-nlp/java_wrapper'
+  require 'stanford-core-nlp/stanford_annotations'
   class << self
     # The path in which to look for the Stanford JAR files.
     # This is passed to JarLoader.
@@ -51,11 +52,18 @@ module StanfordCoreNLP
     self.model_files[name] = file
   end
+  @@initialized = false
+  # Load the JARs, create the classes.
+  def self.init
+    self.load_jars(self.jvm_args, self.jar_path, self.log_file)
+    self.create_classes
+    @@initialized = true
+  end
   # Load a StanfordCoreNLP pipeline with the specified JVM flags and
   # StanfordCoreNLP properties (hash of property => values).
   def self.load(*annotators)
-    self.load_jars(self.jvm_args, self.jar_path, self.log_file)
-    self.create_classes
+    self.init unless @@initialized
     # Prepend the JAR path to the model files.
     properties = {}
     self.model_files.each { |k,v| properties[k] = self.jar_path + v }
@@ -84,7 +92,16 @@ module StanfordCoreNLP
     const_set(:Properties, Rjb::import('java.util.Properties'))
     const_set(:AnnotationBridge, Rjb::import('AnnotationBridge'))
   end
+  # Load a class (e.g. PTBTokenizerAnnotator) in a specific
+  # class path (default is 'edu.stanford.nlp.pipeline').
+  # The class is then accessible under the StanfordCoreNLP
+  # namespace, e.g. StanfordCoreNLP::PTBTokenizerAnnotator.
+  def self.load_class(klass, base = 'edu.stanford.nlp.pipeline')
+    self.init unless @@initialized
+    const_set(klass.intern, Rjb::import("#{base}.#{klass}"))
+  end
   # Create a java.util.Properties object from a hash.
   def self.get_properties(properties)
     props = Properties.new
@@ -94,4 +111,9 @@ module StanfordCoreNLP
     props
   end
+  # Helper function: under_case -> CamelCase.
+  def self.camel_case(text)
+    text.to_s.gsub(/^[a-z]|_[a-z]/) { |a| a.upcase }.gsub('_', '')
+  end
 end

data/lib/stanford-core-nlp/{jar-loader.rb → jar_loader.rb} RENAMED

File without changes

data/lib/stanford-core-nlp/java_wrapper.rb ADDED

@@ -0,0 +1,22 @@
+module StanfordCoreNLP
+  # Modify the Rjb JavaProxy class to add our own methods to every Java object.
+  Rjb::Rjb_JavaProxy.class_eval do
+    # Dynamically defined on all proxied Java objects.
+    # Shorthand for to_string defined by Java classes.
+    def to_s; to_string; end
+    # Dynamically defined on all proxied Java iterators.
+    # Provide Ruby-style iterators to wrap Java iterators.
+    def each
+      if !java_methods.include?('iterator()')
+        raise 'This object cannot be iterated.'
+      else
+        i = self.iterator
+        while i.has_next; yield i.next; end
+      end
+    end
+  end
+end

data/lib/stanford-core-nlp/stanford_annotations.rb ADDED

@@ -0,0 +1,400 @@
+module StanfordCoreNLP
+  Annotations = {
+   'nlp.trees.international.pennchinese.ChineseGrammaticalRelations' => [
+     'AdjectivalModifierGRAnnotation',
+     'AdverbialModifierGRAnnotation',
+     'ArgumentGRAnnotation',
+     'AspectMarkerGRAnnotation',
+     'AssociativeMarkerGRAnnotation',
+     'AssociativeModifierGRAnnotation',
+     'AttributiveGRAnnotation',
+     'AuxModifierGRAnnotation',
+     'AuxPassiveGRAnnotation',
+     'BaGRAnnotation',
+     'ClausalComplementGRAnnotation',
+     'ClausalSubjectGRAnnotation',
+     'ClauseModifierGRAnnotation',
+     'ComplementGRAnnotation',
+     'ComplementizerGRAnnotation',
+     'ControllingSubjectGRAnnotation',
+     'CoordinationGRAnnotation',
+     'DeterminerGRAnnotation',
+     'DirectObjectGRAnnotation',
+     'DvpMarkerGRAnnotation',
+     'DvpModifierGRAnnotation',
+     'EtcGRAnnotation',
+     'LocalizerComplementGRAnnotation',
+     'ModalGRAnnotation',
+     'ModifierGRAnnotation',
+     'NegationModifierGRAnnotation',
+     'NominalPassiveSubjectGRAnnotation',
+     'NominalSubjectGRAnnotation',
+     'NounCompoundModifierGRAnnotation',
+     'NumberModifierGRAnnotation',
+     'NumericModifierGRAnnotation',
+     'ObjectGRAnnotation',
+     'OrdNumberGRAnnotation',
+     'ParentheticalGRAnnotation',
+     'ParticipialModifierGRAnnotation',
+     'PreconjunctGRAnnotation',
+     'PrepositionalLocalizerModifierGRAnnotation',
+     'PrepositionalModifierGRAnnotation',
+     'PrepositionalObjectGRAnnotation',
+     'PunctuationGRAnnotation',
+     'RangeGRAnnotation',
+     'RelativeClauseModifierGRAnnotation',
+     'ResultativeComplementGRAnnotation',
+     'SemanticDependentGRAnnotation',
+     'SubjectGRAnnotation',
+     'TemporalClauseGRAnnotation',
+     'TemporalGRAnnotation',
+     'TimePostpositionGRAnnotation',
+     'TopicGRAnnotation',
+     'VerbCompoundGRAnnotation',
+     'VerbModifierGRAnnotation',
+     'XClausalComplementGRAnnotation'
+    ],
+   'nlp.dcoref.CoNLL2011DocumentReader' => [
+     'CorefMentionAnnotation',
+     'NamedEntityAnnotation'
+    ],
+   'nlp.ling.CoreAnnotations' => [
+     'AbbrAnnotation',
+     'AbgeneAnnotation',
+     'AbstrAnnotation',
+     'AfterAnnotation',
+     'AnswerAnnotation',
+     'AnswerObjectAnnotation',
+     'AntecedentAnnotation',
+     'ArgDescendentAnnotation',
+     'ArgumentAnnotation',
+     'BagOfWordsAnnotation',
+     'BeAnnotation',
+     'BeforeAnnotation',
+     'BeginIndexAnnotation',
+     'BestCliquesAnnotation',
+     'BestFullAnnotation',
+     'CalendarAnnotation',
+     'CategoryAnnotation',
+     'CategoryFunctionalTagAnnotation',
+     'CharacterOffsetBeginAnnotation',
+     'CharacterOffsetEndAnnotation',
+     'CharAnnotation',
+     'ChineseCharAnnotation',
+     'ChineseIsSegmentedAnnotation',
+     'ChineseOrigSegAnnotation',
+     'ChineseSegAnnotation',
+     'ChunkAnnotation',
+     'CoarseTagAnnotation',
+     'CommonWordsAnnotation',
+     'CoNLLDepAnnotation',
+     'CoNLLDepParentIndexAnnotation',
+     'CoNLLDepTypeAnnotation',
+     'CoNLLPredicateAnnotation',
+     'CoNLLSRLAnnotation',
+     'ContextsAnnotation',
+     'CopyAnnotation',
+     'CostMagnificationAnnotation',
+     'CovertIDAnnotation',
+     'D2_LBeginAnnotation',
+     'D2_LEndAnnotation',
+     'D2_LMiddleAnnotation',
+     'DayAnnotation',
+     'DependentsAnnotation',
+     'DictAnnotation',
+     'DistSimAnnotation',
+     'DoAnnotation',
+     'DocDateAnnotation',
+     'DocIDAnnotation',
+     'DomainAnnotation',
+     'EndIndexAnnotation',
+     'EntityClassAnnotation',
+     'EntityRuleAnnotation',
+     'EntityTypeAnnotation',
+     'FeaturesAnnotation',
+     'FemaleGazAnnotation',
+     'FirstChildAnnotation',
+     'ForcedSentenceEndAnnotation',
+     'FreqAnnotation',
+     'GazAnnotation',
+     'GazetteerAnnotation',
+     'GenericTokensAnnotation',
+     'GeniaAnnotation',
+     'GoldAnswerAnnotation',
+     'GovernorAnnotation',
+     'GrandparentAnnotation',
+     'HaveAnnotation',
+     'HeadWordStringAnnotation',
+     'HeightAnnotation',
+     'IDAnnotation',
+     'IDFAnnotation',
+     'INAnnotation',
+     'IndexAnnotation',
+     'InterpretationAnnotation',
+     'IsDateRangeAnnotation',
+     'IsURLAnnotation',
+     'LabelAnnotation',
+     'LastGazAnnotation',
+     'LastTaggedAnnotation',
+     'LBeginAnnotation',
+     'LeftChildrenNodeAnnotation',
+     'LeftTermAnnotation',
+     'LemmaAnnotation',
+     'LEndAnnotation',
+     'LengthAnnotation',
+     'LMiddleAnnotation',
+     'MaleGazAnnotation',
+     'MarkingAnnotation',
+     'MonthAnnotation',
+     'MorphoCaseAnnotation',
+     'MorphoGenAnnotation',
+     'MorphoNumAnnotation',
+     'MorphoPersAnnotation',
+     'NamedEntityTagAnnotation',
+     'NeighborsAnnotation',
+     'NERIDAnnotation',
+     'NormalizedNamedEntityTagAnnotation',
+     'NotAnnotation',
+     'NumericCompositeObjectAnnotation',
+     'NumericCompositeTypeAnnotation',
+     'NumericCompositeValueAnnotation',
+     'NumericObjectAnnotation',
+     'NumericTypeAnnotation',
+     'NumericValueAnnotation',
+     'NumerizedTokensAnnotation',
+     'NumTxtSentencesAnnotation',
+     'OriginalAnswerAnnotation',
+     'OriginalCharAnnotation',
+     'OriginalTextAnnotation',
+     'ParagraphAnnotation',
+     'ParagraphsAnnotation',
+     'ParaPositionAnnotation',
+     'ParentAnnotation',
+     'PartOfSpeechAnnotation',
+     'PercentAnnotation',
+     'PhraseWordsAnnotation',
+     'PhraseWordsTagAnnotation',
+     'PolarityAnnotation',
+     'PositionAnnotation',
+     'PossibleAnswersAnnotation',
+     'PredictedAnswerAnnotation',
+     'PrevChildAnnotation',
+     'PriorAnnotation',
+     'ProjectedCategoryAnnotation',
+     'ProtoAnnotation',
+     'RoleAnnotation',
+     'SectionAnnotation',
+     'SemanticHeadTagAnnotation',
+     'SemanticHeadWordAnnotation',
+     'SemanticTagAnnotation',
+     'SemanticWordAnnotation',
+     'SentenceIDAnnotation',
+     'SentenceIndexAnnotation',
+     'SentencePositionAnnotation',
+     'SentencesAnnotation',
+     'ShapeAnnotation',
+     'SpaceBeforeAnnotation',
+     'SpanAnnotation',
+     'SpeakerAnnotation',
+     'SRL_ID',
+     'SRLIDAnnotation',
+     'SRLInstancesAnnotation',
+     'StackedNamedEntityTagAnnotation',
+     'StateAnnotation',
+     'StemAnnotation',
+     'SubcategorizationAnnotation',
+     'TagLabelAnnotation',
+     'TextAnnotation',
+     'TokenBeginAnnotation',
+     'TokenEndAnnotation',
+     'TokensAnnotation',
+     'TopicAnnotation',
+     'TrueCaseAnnotation',
+     'TrueCaseTextAnnotation',
+     'TrueTagAnnotation',
+     'UBlockAnnotation',
+     'UnaryAnnotation',
+     'UnknownAnnotation',
+     'UtteranceAnnotation',
+     'UTypeAnnotation',
+     'ValueAnnotation',
+     'VerbSenseAnnotation',
+     'WebAnnotation',
+     'WordFormAnnotation',
+     'WordnetSynAnnotation',
+     'WordPositionAnnotation',
+     'WordSenseAnnotation',
+     'XmlContextAnnotation',
+     'XmlElementAnnotation',
+     'YearAnnotation'
+    ],
+   'nlp.dcoref.CorefCoreAnnotations' => [
+     'CorefAnnotation',
+     'CorefChainAnnotation',
+     'CorefClusterAnnotation',
+     'CorefClusterIdAnnotation',
+     'CorefDestAnnotation',
+     'CorefGraphAnnotation'
+    ],
+   'nlp.ling.CoreLabel' => [
+     'GenericAnnotation'
+    ],
+   'nlp.trees.EnglishGrammaticalRelations' => [
+     'AbbreviationModifierGRAnnotation',
+     'AdjectivalComplementGRAnnotation',
+     'AdjectivalModifierGRAnnotation',
+     'AdvClauseModifierGRAnnotation',
+     'AdverbialModifierGRAnnotation',
+     'AgentGRAnnotation',
+     'AppositionalModifierGRAnnotation',
+     'ArgumentGRAnnotation',
+     'AttributiveGRAnnotation',
+     'AuxModifierGRAnnotation',
+     'AuxPassiveGRAnnotation',
+     'ClausalComplementGRAnnotation',
+     'ClausalPassiveSubjectGRAnnotation',
+     'ClausalSubjectGRAnnotation',
+     'ComplementGRAnnotation',
+     'ComplementizerGRAnnotation',
+     'ConjunctGRAnnotation',
+     'ControllingSubjectGRAnnotation',
+     'CoordinationGRAnnotation',
+     'CopulaGRAnnotation',
+     'DeterminerGRAnnotation',
+     'DirectObjectGRAnnotation',
+     'ExpletiveGRAnnotation',
+     'IndirectObjectGRAnnotation',
+     'InfinitivalModifierGRAnnotation',
+     'MarkerGRAnnotation',
+     'ModifierGRAnnotation',
+     'MultiWordExpressionGRAnnotation',
+     'NegationModifierGRAnnotation',
+     'NominalPassiveSubjectGRAnnotation',
+     'NominalSubjectGRAnnotation',
+     'NounCompoundModifierGRAnnotation',
+     'NpAdverbialModifierGRAnnotation',
+     'NumberModifierGRAnnotation',
+     'NumericModifierGRAnnotation',
+     'ObjectGRAnnotation',
+     'ParataxisGRAnnotation',
+     'ParticipialModifierGRAnnotation',
+     'PhrasalVerbParticleGRAnnotation',
+     'PossessionModifierGRAnnotation',
+     'PossessiveModifierGRAnnotation',
+     'PreconjunctGRAnnotation',
+     'PredeterminerGRAnnotation',
+     'PredicateGRAnnotation',
+     'PrepositionalComplementGRAnnotation',
+     'PrepositionalModifierGRAnnotation',
+     'PrepositionalObjectGRAnnotation',
+     'PunctuationGRAnnotation',
+     'PurposeClauseModifierGRAnnotation',
+     'QuantifierModifierGRAnnotation',
+     'ReferentGRAnnotation',
+     'RelativeClauseModifierGRAnnotation',
+     'RelativeGRAnnotation',
+     'SemanticDependentGRAnnotation',
+     'SubjectGRAnnotation',
+     'TemporalModifierGRAnnotation',
+     'XClausalComplementGRAnnotation'
+    ],
+   'nlp.trees.GrammaticalRelation' => [
+     'DependentGRAnnotation',
+     'GovernorGRAnnotation',
+     'GrammaticalRelationAnnotation',
+     'KillGRAnnotation',
+     'Language',
+     'RootGRAnnotation'
+    ],
+   'nlp.ie.machinereading.structure.MachineReadingAnnotations' => [
+     'DependencyAnnotation',
+     'DocumentDirectoryAnnotation',
+     'DocumentIdAnnotation',
+     'EntityMentionsAnnotation',
+     'EventMentionsAnnotation',
+     'GenderAnnotation',
+     'RelationMentionsAnnotation',
+     'TriggerAnnotation'
+    ],
+   'nlp.parser.lexparser.ParserAnnotations' => [
+     'ConstraintAnnotation'
+    ],
+   'nlp.trees.semgraph.SemanticGraphCoreAnnotations' => [
+     'SemanticGraphBasicDependenciesAnnotation',
+     'SemanticGraphCollapsedCCProcessedDependenciesAnnotation',
+     'SemanticGraphCollapsedDependenciesAnnotation'
+    ],
+   'nlp.time.TimeAnnotations' => [
+     'TimexAnnotation',
+     'TimexAnnotations'
+    ],
+   'nlp.time.TimeExpression' => [
+     'Annotation',
+     'ChildrenAnnotation'
+    ],
+   'nlp.trees.TreeCoreAnnotations' => [
+     'TreeHeadTagAnnotation',
+     'TreeHeadWordAnnotation',
+     'TreeAnnotation'
+    ]
+  }
+  annotations_by_name = {}
+  Annotations.each do |base_class, annotation_classes|
+    annotation_classes.each do |annotation_class|
+      annotations_by_name[annotation_class] ||= []
+      annotations_by_name[annotation_class] << base_class
+    end
+  end
+  AnnotationsByName = annotations_by_name
+  # Modify the Rjb JavaProxy class to add our own method to get annotations.
+  Rjb::Rjb_JavaProxy.class_eval do
+    # Dynamically defined on all proxied annotation classes.
+    # Get an annotation using the annotation bridge.
+    def get(annotation, anno_base = nil)
+      if !java_methods.include?('get(Ljava.lang.Class;)')
+        raise'No annotation can be retrieved on this object.'
+      else
+        anno_class = "#{StanfordCoreNLP.camel_case(annotation)}Annotation"
+        if anno_base
+          raise "The path #{anno_base} doesn't exist." unless Annotations[anno_base]
+           anno_bases = [anno_base]
+        else
+          anno_bases = AnnotationsByName[anno_class]
+          raise "The annotation #{anno_class} doesn't exist." unless anno_bases
+        end
+        if anno_bases.size > 1
+          msg = "There are many different annotations bearing the name #{anno_class}. "
+          msg << "Please specify one of the following base classes as second parameter to disambiguate: "
+          msg << anno_bases.join(',')
+          raise msg
+        else
+          base_class = anno_bases[0]
+        end
+        url = "edu.stanford.#{base_class}$#{anno_class}"
+        AnnotationBridge.getAnnotation(self, url)
+      end
+    end
+  end
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: stanford-core-nlp
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
   prerelease:
 platform: ruby
 authors:
@@ -13,7 +13,7 @@ date: 2012-01-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rjb
-  requirement: &70258761325680 !ruby/object:Gem::Requirement
+  requirement: &70145364951100 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,18 +21,19 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *70258761325680
+  version_requirements: *70145364951100
 description: ! " High-level Ruby bindings to the Stanford CoreNLP package, a set natural
   language processing \ntools for English, including tokenization, part-of-speech
-  tagging, lemmatization, named entity recognition, à\nparsing, and coreference resolution. "
+  tagging, lemmatization, named entity recognition,\nparsing, and coreference resolution. "
 email:
 - louis.mullie@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files: []
 files:
-- lib/stanford-core-nlp/jar-loader.rb
-- lib/stanford-core-nlp/java-wrapper.rb
+- lib/stanford-core-nlp/jar_loader.rb
+- lib/stanford-core-nlp/java_wrapper.rb
+- lib/stanford-core-nlp/stanford_annotations.rb
 - lib/stanford-core-nlp.rb
 - bin/bridge.jar
 - bin/classifiers/all.3class.distsim.crf.ser.gz

data/lib/stanford-core-nlp/java-wrapper.rb DELETED

@@ -1,37 +0,0 @@
-module StanfordCoreNLP
-  # Modify the Rjb JavaProxy class to add our own methods to every Java object.
-  Rjb::Rjb_JavaProxy.class_eval do
-    # Dynamically defined on all proxied Java objects.
-    # Shorthand for to_string defined by Java classes.
-    def to_s; to_string; end
-    # Dynamically defined on all proxied Java iterators.
-    # Provide Ruby-style iterators to wrap Java iterators.
-    def each
-      if !java_methods.include?('iterator()')
-        raise 'This object cannot be iterated.'
-      else
-        i = self.iterator
-        while i.has_next; yield i.next; end
-      end
-    end
-    # Dynamically defined on all proxied annotation classes.
-    # Get an annotation using the annotation bridge.
-    def get(annotation)
-      if !java_methods.include?('get(Ljava.lang.Class;)')
-        raise 'No annotation can be retrieved on this object.'
-      else
-        base_class = (annotation.to_s.split('_')[0] == 'coref') ?
-        'edu.stanford.nlp.dcoref.CorefCoreAnnotations$' :
-        'edu.stanford.nlp.ling.CoreAnnotations$'
-        anno_class = annotation.to_s.gsub(/^[a-z]|_[a-z]/) { |a| a.upcase }.gsub('_', '')
-        url = "#{base_class}#{anno_class}Annotation"
-        AnnotationBridge.getAnnotation(self, url)
-      end
-    end
-  end
-end