RubyGems - ispras-api - Versions diffs - 0.1.7 → 0.2.0 - Mend

ispras-api 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/ispras-api/ispras_api.rb +27 -4
data/lib/ispras-api/texterra/nlp.rb +30 -26
data/lib/ispras-api/texterra_api.rb +9 -12
data/lib/ispras-api/version.rb +3 -3
data/test/test_texterra_api.rb +93 -31
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 6bcfa8343a980dd19a4ef3c0034994631a8abe0f
-  data.tar.gz: e8e9c2339717da7ffc54801902b39b12ffc8cca8
+  metadata.gz: e13c5d64eacc2525ca461d5995d44f502f70b988
+  data.tar.gz: 098f036364aa088ac20883b88baeab517902e764
 SHA512:
-  metadata.gz: 8beee1396a7291cea5bd2cbfeb209b465cffad3e54a0be576e4ab0f5881d204ddca20b5cdf158e336d9dd74bf52d20c38650721b7d63a26e8e80aceb9f971d90
-  data.tar.gz: 974689d185f7357ec9f6fa0940404aea6146aec8404ce695d1d333d59c266bebf84bee56acdac5ee9d1c7503da9cfacafbc392a197ad3a9e9700655db0db56b6
+  metadata.gz: 792870423e6d110a92f3ff8efe29152032a87fc73796f114d86707bb86831e36df6d71f3bae0e27e71a6ed1c02b023e5d7c87eae3dd31460574f5923dcdda595
+  data.tar.gz: 68dcabf74d6edfe6788868be755deae38566641738285d9161ed0f0b7ab3a333dbbf283e6fb33192f4cc3e16bd9c70c543113a2fd3120e226b8342c165cf8482

data/lib/ispras-api/ispras_api.rb CHANGED

@@ -1,3 +1,4 @@
+require 'json'
 require 'httparty'
 require 'nori'
 require_relative './api_error'
@@ -17,20 +18,42 @@ class IsprasAPI
     end
   end
-  def GET(path = '', params = {})
-    options = { query: params }
+  def GET(path = '', params = {}, format=:xml)
+    options = {
+      headers: headers(format),
+      query: params
+    }
     response = self.class.get "/#{path}", options
     response.code == 200 ? response.parsed_response : check_error(response)
   end
-  def POST(path = '', params = {}, form = {})
-    options = { query: params, body: form }
+  def POST(path = '', params = {}, body = {}, format=:xml)
+    options = {
+      headers: headers(format),
+      query: params,
+      body: body
+    }
     response = self.class.post "/#{path}", options
     response.code == 200 ? response.parsed_response : check_error(response)
   end
   private
+  def headers(format)
+    case(format)
+    when :json
+      {
+        'Accept' => 'application/json'
+      }
+    when :xml
+      {
+        'Accept' => 'application/xml'
+      }
+    else
+      {}
+    end
+  end
   def check_error(response)
     fail ApiError, "#{response.code} Error occured"
   end

data/lib/ispras-api/texterra/nlp.rb CHANGED

@@ -5,7 +5,7 @@ module TexterraNLP
   # Detects language of given text
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def language_detection_annotate(text)
     preset_nlp(:languageDetection, text)
   end
@@ -13,7 +13,7 @@ module TexterraNLP
   # Detects boundaries of sentences in a given text
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def sentence_detection_annotate(text)
     preset_nlp(:sentenceDetection, text)
   end
@@ -21,7 +21,7 @@ module TexterraNLP
   # Detects all tokens (minimal significant text parts) in a given text
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def tokenization_annotate(text)
     preset_nlp(:tokenization, text)
   end
@@ -29,7 +29,7 @@ module TexterraNLP
   # Detects lemma of each word of a given text
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def lemmatization_annotate(text)
     preset_nlp(:lemmatization, text)
   end
@@ -37,7 +37,7 @@ module TexterraNLP
   # Detects part of speech tag for each word of a given text
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def pos_tagging_annotate(text)
     preset_nlp(:posTagging, text)
   end
@@ -45,7 +45,7 @@ module TexterraNLP
   # Tries to correct disprints and other spelling errors in a given text
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def spelling_correction_annotate(text)
     preset_nlp(:spellingCorrection, text)
   end
@@ -53,7 +53,7 @@ module TexterraNLP
   # Finds all named entities occurences in a given text
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def named_entities_annotate(text)
     preset_nlp(:namedEntities, text)
   end
@@ -61,7 +61,7 @@ module TexterraNLP
   # Extracts not overlapping terms within a given text; term is a textual representation for some concept of the real world
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def term_detection_annotate(text)
     preset_nlp(:termDetection, text)
   end
@@ -69,7 +69,7 @@ module TexterraNLP
   # Detects the most appropriate meanings (concepts) for terms occurred in a given text
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def disambiguation_annotate(text)
     preset_nlp(:disambiguation, text)
   end
@@ -78,7 +78,7 @@ module TexterraNLP
   # This service extracts a set of key concepts for a given text
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def key_concepts_annotate(text)
     preset_nlp(:keyConcepts, text)
   end
@@ -88,7 +88,7 @@ module TexterraNLP
   # If no domain from this list has been detected, the text is assumed to be no domain, or general domain
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def domain_detection_annotate(text)
     preset_nlp(:domainDetection, text)
   end
@@ -96,7 +96,7 @@ module TexterraNLP
   # Detects whether the given text is subjective or not
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def subjectivity_detection_annotate(text)
     preset_nlp(:subjectivityDetection, text)
   end
@@ -104,7 +104,7 @@ module TexterraNLP
   # Detects whether the given text has positive, negative or no sentiment
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def polarity_detection_annotate(text)
     preset_nlp(:polarityDetection, text)
   end
@@ -115,21 +115,22 @@ module TexterraNLP
   #
   # @param [String] text Text to process
   # @param [String] domain Domain for polarity detection
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def domain_polarity_detection_annotate(text, domain = '')
     specs = NLP_SPECS[:domainPolarityDetection]
     domain = "(#{domain})" unless domain.empty?
-    result = POST(specs[:path] % domain, specs[:params], text: text)[:nlp_document][:annotations][:i_annotation]
-    return [] if result.nil?
-    result = [].push result unless result.is_a? Array
-    result.map { |e| assign_text(e, text) }
+    result = POST(specs[:path] % domain, specs[:params], {text: text}, :json)
+    result[:annotations].each do |key, value|
+      value.map! { |an| assign_text(an, text) }
+    end
+    result
   end
   # Detects Twitter-specific entities: Hashtags, User names, Emoticons, URLs.
   # And also: Stop-words, Misspellings, Spelling suggestions, Spelling corrections
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def tweet_normalization(text)
     preset_nlp(:tweetNormalization, text)
   end
@@ -137,11 +138,13 @@ module TexterraNLP
   # Detects Syntax relations in text. Only works for russian texts
   #
   # @param [String] text Text to process
-  # @return [Array] Texterra annotations
+  # @return [Hash] Texterra document
   def syntax_detection(text)
-    preset_nlp(:syntaxDetection, text).each do |an|
-      an[:value][:parent_token] = assign_text(an[:value][:parent_token], text) if an[:value] && an[:value][:parent_token]
+    result = preset_nlp(:syntaxDetection, text)
+    result[:annotations][:'syntax-relation'].each do |an|
+      an[:value][:parent] = assign_text(an[:value][:parent], text) if an[:value] && an[:value][:parent]
     end
+    result
   end
   private
@@ -149,10 +152,11 @@ module TexterraNLP
   # Utility NLP part method
   def preset_nlp(methodName, text)
     specs = NLP_SPECS[methodName]
-    result = POST(specs[:path], specs[:params], text: text)[:nlp_document][:annotations][:i_annotation]
-    return [] if result.nil?
-    result = [].push result unless result.is_a? Array
-    result.map { |an| assign_text(an, text) }
+    result = POST(specs[:path], specs[:params], {text: text}, :json)
+    result[:annotations].each do |key, value|
+      value.map! { |an| assign_text(an, text) }
+    end
+    result
   end
   # Utility text assignement for annotation

data/lib/ispras-api/texterra_api.rb CHANGED

@@ -38,20 +38,15 @@ class TexterraAPI < IsprasAPI
   # @param [String] text Text to process
   # @return [Array] Array of weighted key concepts
   def key_concepts(text)
-    key_concepts = key_concepts_annotate(text)[0][:value][:concepts_weights][:entry] || []
-    key_concepts = [].push key_concepts unless key_concepts.is_a? Array
-    key_concepts.map do |kc|
-      kc[:concept][:weight] = kc[:double]
-      kc[:concept]
-    end
+    key_concepts = key_concepts_annotate(text)[:annotations][:keyconcepts][0][:value] || []
   end
   # Detects whether the given text has positive, negative or no sentiment
   #
   # @param [String] text Text to process
-  # @return [Array] Sentiment of the text
+  # @return [String] Sentiment of the text
   def sentiment_analysis(text)
-    polarity_detection_annotate(text)[0][:value].to_s || 'NEUTRAL'
+    polarity_detection_annotate(text)[:annotations][:polarity][0][:value].to_s || 'NEUTRAL'
     rescue NoMethodError
       'NEUTRAL'
   end
@@ -66,9 +61,11 @@ class TexterraAPI < IsprasAPI
   def domain_sentiment_analysis(text, domain = '')
     used_domain = 'general'
     sentiment = 'NEUTRAL'
-    (domain_polarity_detection_annotate(text, domain) || []).each do |an|
-      sentiment = an[:value] if an[:@class].include? 'SentimentPolarity'
-      used_domain = an[:value] if an[:@class].include? 'DomainAnnotation'
+    annotations = domain_polarity_detection_annotate(text, domain)[:annotations]
+    begin
+      used_domain = annotations[:domain][0][:value]
+      sentiment = annotations[:polarity][0][:value]
+    rescue NoMethodError
     end
     {
       domain: used_domain,
@@ -81,7 +78,7 @@ class TexterraAPI < IsprasAPI
   # @param [String] text Text to process
   # @return [Array] Texterra annotations
   def disambiguation(text)
-    disambiguation_annotate(text)
+    disambiguation_annotate(text)[:annotations][:'disambiguated-phrase']
   end
   def custom_query(path, query, form = nil)

data/lib/ispras-api/version.rb CHANGED

@@ -1,12 +1,12 @@
 module Version
   MAJOR = 0
-  MINOR = 1
-  PATCH = 7
+  MINOR = 2
+  PATCH = 0
   PRE = nil
   YEAR = '2016'
   MONTH = '09'
-  DAY = '09'
+  DAY = '12'
   def self.to_s
     [MAJOR, MINOR, PATCH, PRE].compact.join('.')

data/test/test_texterra_api.rb CHANGED

@@ -44,61 +44,123 @@ class TestTexterraAPI < Minitest::Test
   end
   def test_tweet_normalization
-    assert_instance_of Array, @texterra.tweet_normalization(@en_tweet)
-    assert_instance_of Array, @texterra.tweet_normalization(@ru_tweet)
+    res = @texterra.tweet_normalization(@en_tweet)
+    assert_instance_of Hash, res
+    assert_equal @en_tweet, res[:text]
+    res = @texterra.tweet_normalization(@ru_tweet)
+    assert_instance_of Hash, res
+    assert_equal @ru_tweet, res[:text]
   end
   def test_syntax_detection
-    assert_instance_of Array, @texterra.syntax_detection(@ru_text)
+    res = @texterra.syntax_detection(@ru_text)
+    assert_instance_of Hash, res
+    assert_equal @ru_text, res[:text]
   end
   def test_language_detection_annotate
-    assert_instance_of Array, @texterra.language_detection_annotate(@en_text)
-    assert_instance_of Array, @texterra.language_detection_annotate(@ru_text)
-    assert_instance_of Array, @texterra.language_detection_annotate(@en_tweet)
-    assert_instance_of Array, @texterra.language_detection_annotate(@ru_tweet)
+    res = @texterra.language_detection_annotate(@en_text)
+    assert_instance_of Hash, res
+    assert_equal @en_text, res[:text]
+    res = @texterra.language_detection_annotate(@ru_text)
+    assert_instance_of Hash, res
+    assert_equal @ru_text, res[:text]
+    res = @texterra.language_detection_annotate(@en_tweet)
+    assert_instance_of Hash, res
+    assert_equal @en_tweet, res[:text]
+    res = @texterra.language_detection_annotate(@ru_tweet)
+    assert_instance_of Hash, res
+    assert_equal @ru_tweet, res[:text]
   end
   def test_sentence_detection_annotate
-    assert_instance_of Array, @texterra.sentence_detection_annotate(@en_text)
-    assert_instance_of Array, @texterra.sentence_detection_annotate(@ru_text)
-    assert_instance_of Array, @texterra.sentence_detection_annotate(@en_tweet)
-    assert_instance_of Array, @texterra.sentence_detection_annotate(@ru_tweet)
+    res = @texterra.sentence_detection_annotate(@en_text)
+    assert_instance_of Hash, res
+    assert_equal @en_text, res[:text]
+    res = @texterra.sentence_detection_annotate(@ru_text)
+    assert_instance_of Hash, res
+    assert_equal @ru_text, res[:text]
+    res = @texterra.sentence_detection_annotate(@en_tweet)
+    assert_instance_of Hash, res
+    assert_equal @en_tweet, res[:text]
+    res = @texterra.sentence_detection_annotate(@ru_tweet)
+    assert_instance_of Hash, res
+    assert_equal @ru_tweet, res[:text]
   end
   def test_tokenization_annotate
-    assert_instance_of Array, @texterra.tokenization_annotate(@en_text)
-    assert_instance_of Array, @texterra.tokenization_annotate(@ru_text)
-    assert_instance_of Array, @texterra.tokenization_annotate(@en_tweet)
-    assert_instance_of Array, @texterra.tokenization_annotate(@ru_tweet)
+    res = @texterra.tokenization_annotate(@en_text)
+    assert_instance_of Hash, res
+    assert_equal @en_text, res[:text]
+    res = @texterra.tokenization_annotate(@ru_text)
+    assert_instance_of Hash, res
+    assert_equal @ru_text, res[:text]
+    res = @texterra.tokenization_annotate(@en_tweet)
+    assert_instance_of Hash, res
+    assert_equal @en_tweet, res[:text]
+    res = @texterra.tokenization_annotate(@ru_tweet)
+    assert_instance_of Hash, res
+    assert_equal @ru_tweet, res[:text]
   end
   def test_lemmatization_annotate
-    assert_instance_of Array, @texterra.lemmatization_annotate(@en_text)
-    assert_instance_of Array, @texterra.lemmatization_annotate(@ru_text)
-    assert_instance_of Array, @texterra.lemmatization_annotate(@en_tweet)
-    assert_instance_of Array, @texterra.lemmatization_annotate(@ru_tweet)
+    res = @texterra.lemmatization_annotate(@en_text)
+    assert_instance_of Hash, res
+    assert_equal @en_text, res[:text]
+    res = @texterra.lemmatization_annotate(@ru_text)
+    assert_instance_of Hash, res
+    assert_equal @ru_text, res[:text]
+    res = @texterra.lemmatization_annotate(@en_tweet)
+    assert_instance_of Hash, res
+    assert_equal @en_tweet, res[:text]
+    res = @texterra.lemmatization_annotate(@ru_tweet)
+    assert_instance_of Hash, res
+    assert_equal @ru_tweet, res[:text]
   end
   def test_pos_tagging_annotate
-    assert_instance_of Array, @texterra.pos_tagging_annotate(@en_text)
-    assert_instance_of Array, @texterra.pos_tagging_annotate(@ru_text)
-    assert_instance_of Array, @texterra.pos_tagging_annotate(@en_tweet)
-    assert_instance_of Array, @texterra.pos_tagging_annotate(@ru_tweet)
+    res = @texterra.pos_tagging_annotate(@en_text)
+    assert_instance_of Hash, res
+    assert_equal @en_text, res[:text]
+    res = @texterra.pos_tagging_annotate(@ru_text)
+    assert_instance_of Hash, res
+    assert_equal @ru_text, res[:text]
+    res = @texterra.pos_tagging_annotate(@en_tweet)
+    assert_instance_of Hash, res
+    assert_equal @en_tweet, res[:text]
+    res = @texterra.pos_tagging_annotate(@ru_tweet)
+    assert_instance_of Hash, res
+    assert_equal @ru_tweet, res[:text]
   end
   def test_named_entities_annotate
-    assert_instance_of Array, @texterra.named_entities_annotate(@en_text)
-    assert_instance_of Array, @texterra.named_entities_annotate(@ru_text)
-    assert_instance_of Array, @texterra.named_entities_annotate(@en_tweet)
-    assert_instance_of Array, @texterra.named_entities_annotate(@ru_tweet)
+    res = @texterra.named_entities_annotate(@en_text)
+    assert_instance_of Hash, res
+    assert_equal @en_text, res[:text]
+    res = @texterra.named_entities_annotate(@ru_text)
+    assert_instance_of Hash, res
+    assert_equal @ru_text, res[:text]
+    res = @texterra.named_entities_annotate(@en_tweet)
+    assert_instance_of Hash, res
+    assert_equal @en_tweet, res[:text]
+    res = @texterra.named_entities_annotate(@ru_tweet)
+    assert_instance_of Hash, res
+    assert_equal @ru_tweet, res[:text]
   end
   def test_subjectivity_detection_annotate
-    assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_text)
-    assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_text)
-    assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_tweet)
-    assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_tweet)
+    res = @texterra.subjectivity_detection_annotate(@en_text)
+    assert_instance_of Hash, res
+    assert_equal @en_text, res[:text]
+    res = @texterra.subjectivity_detection_annotate(@ru_text)
+    assert_instance_of Hash, res
+    assert_equal @ru_text, res[:text]
+    res = @texterra.subjectivity_detection_annotate(@en_tweet)
+    assert_instance_of Hash, res
+    assert_equal @en_tweet, res[:text]
+    res = @texterra.subjectivity_detection_annotate(@ru_tweet)
+    assert_instance_of Hash, res
+    assert_equal @ru_tweet, res[:text]
   end
   def test_representation_terms

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ispras-api
 version: !ruby/object:Gem::Version
-  version: 0.1.7
+  version: 0.2.0
 platform: ruby
 authors:
 - Alexey Laguta
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-09-09 00:00:00.000000000 Z
+date: 2016-09-12 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: httparty