ispras-api 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6bcfa8343a980dd19a4ef3c0034994631a8abe0f
4
- data.tar.gz: e8e9c2339717da7ffc54801902b39b12ffc8cca8
3
+ metadata.gz: e13c5d64eacc2525ca461d5995d44f502f70b988
4
+ data.tar.gz: 098f036364aa088ac20883b88baeab517902e764
5
5
  SHA512:
6
- metadata.gz: 8beee1396a7291cea5bd2cbfeb209b465cffad3e54a0be576e4ab0f5881d204ddca20b5cdf158e336d9dd74bf52d20c38650721b7d63a26e8e80aceb9f971d90
7
- data.tar.gz: 974689d185f7357ec9f6fa0940404aea6146aec8404ce695d1d333d59c266bebf84bee56acdac5ee9d1c7503da9cfacafbc392a197ad3a9e9700655db0db56b6
6
+ metadata.gz: 792870423e6d110a92f3ff8efe29152032a87fc73796f114d86707bb86831e36df6d71f3bae0e27e71a6ed1c02b023e5d7c87eae3dd31460574f5923dcdda595
7
+ data.tar.gz: 68dcabf74d6edfe6788868be755deae38566641738285d9161ed0f0b7ab3a333dbbf283e6fb33192f4cc3e16bd9c70c543113a2fd3120e226b8342c165cf8482
@@ -1,3 +1,4 @@
1
+ require 'json'
1
2
  require 'httparty'
2
3
  require 'nori'
3
4
  require_relative './api_error'
@@ -17,20 +18,42 @@ class IsprasAPI
17
18
  end
18
19
  end
19
20
 
20
- def GET(path = '', params = {})
21
- options = { query: params }
21
+ def GET(path = '', params = {}, format=:xml)
22
+ options = {
23
+ headers: headers(format),
24
+ query: params
25
+ }
22
26
  response = self.class.get "/#{path}", options
23
27
  response.code == 200 ? response.parsed_response : check_error(response)
24
28
  end
25
29
 
26
- def POST(path = '', params = {}, form = {})
27
- options = { query: params, body: form }
30
+ def POST(path = '', params = {}, body = {}, format=:xml)
31
+ options = {
32
+ headers: headers(format),
33
+ query: params,
34
+ body: body
35
+ }
28
36
  response = self.class.post "/#{path}", options
29
37
  response.code == 200 ? response.parsed_response : check_error(response)
30
38
  end
31
39
 
32
40
  private
33
41
 
42
+ def headers(format)
43
+ case(format)
44
+ when :json
45
+ {
46
+ 'Accept' => 'application/json'
47
+ }
48
+ when :xml
49
+ {
50
+ 'Accept' => 'application/xml'
51
+ }
52
+ else
53
+ {}
54
+ end
55
+ end
56
+
34
57
  def check_error(response)
35
58
  fail ApiError, "#{response.code} Error occured"
36
59
  end
@@ -5,7 +5,7 @@ module TexterraNLP
5
5
  # Detects language of given text
6
6
  #
7
7
  # @param [String] text Text to process
8
- # @return [Array] Texterra annotations
8
+ # @return [Hash] Texterra document
9
9
  def language_detection_annotate(text)
10
10
  preset_nlp(:languageDetection, text)
11
11
  end
@@ -13,7 +13,7 @@ module TexterraNLP
13
13
  # Detects boundaries of sentences in a given text
14
14
  #
15
15
  # @param [String] text Text to process
16
- # @return [Array] Texterra annotations
16
+ # @return [Hash] Texterra document
17
17
  def sentence_detection_annotate(text)
18
18
  preset_nlp(:sentenceDetection, text)
19
19
  end
@@ -21,7 +21,7 @@ module TexterraNLP
21
21
  # Detects all tokens (minimal significant text parts) in a given text
22
22
  #
23
23
  # @param [String] text Text to process
24
- # @return [Array] Texterra annotations
24
+ # @return [Hash] Texterra document
25
25
  def tokenization_annotate(text)
26
26
  preset_nlp(:tokenization, text)
27
27
  end
@@ -29,7 +29,7 @@ module TexterraNLP
29
29
  # Detects lemma of each word of a given text
30
30
  #
31
31
  # @param [String] text Text to process
32
- # @return [Array] Texterra annotations
32
+ # @return [Hash] Texterra document
33
33
  def lemmatization_annotate(text)
34
34
  preset_nlp(:lemmatization, text)
35
35
  end
@@ -37,7 +37,7 @@ module TexterraNLP
37
37
  # Detects part of speech tag for each word of a given text
38
38
  #
39
39
  # @param [String] text Text to process
40
- # @return [Array] Texterra annotations
40
+ # @return [Hash] Texterra document
41
41
  def pos_tagging_annotate(text)
42
42
  preset_nlp(:posTagging, text)
43
43
  end
@@ -45,7 +45,7 @@ module TexterraNLP
45
45
  # Tries to correct disprints and other spelling errors in a given text
46
46
  #
47
47
  # @param [String] text Text to process
48
- # @return [Array] Texterra annotations
48
+ # @return [Hash] Texterra document
49
49
  def spelling_correction_annotate(text)
50
50
  preset_nlp(:spellingCorrection, text)
51
51
  end
@@ -53,7 +53,7 @@ module TexterraNLP
53
53
  # Finds all named entities occurences in a given text
54
54
  #
55
55
  # @param [String] text Text to process
56
- # @return [Array] Texterra annotations
56
+ # @return [Hash] Texterra document
57
57
  def named_entities_annotate(text)
58
58
  preset_nlp(:namedEntities, text)
59
59
  end
@@ -61,7 +61,7 @@ module TexterraNLP
61
61
  # Extracts not overlapping terms within a given text; term is a textual representation for some concept of the real world
62
62
  #
63
63
  # @param [String] text Text to process
64
- # @return [Array] Texterra annotations
64
+ # @return [Hash] Texterra document
65
65
  def term_detection_annotate(text)
66
66
  preset_nlp(:termDetection, text)
67
67
  end
@@ -69,7 +69,7 @@ module TexterraNLP
69
69
  # Detects the most appropriate meanings (concepts) for terms occurred in a given text
70
70
  #
71
71
  # @param [String] text Text to process
72
- # @return [Array] Texterra annotations
72
+ # @return [Hash] Texterra document
73
73
  def disambiguation_annotate(text)
74
74
  preset_nlp(:disambiguation, text)
75
75
  end
@@ -78,7 +78,7 @@ module TexterraNLP
78
78
  # This service extracts a set of key concepts for a given text
79
79
  #
80
80
  # @param [String] text Text to process
81
- # @return [Array] Texterra annotations
81
+ # @return [Hash] Texterra document
82
82
  def key_concepts_annotate(text)
83
83
  preset_nlp(:keyConcepts, text)
84
84
  end
@@ -88,7 +88,7 @@ module TexterraNLP
88
88
  # If no domain from this list has been detected, the text is assumed to be no domain, or general domain
89
89
  #
90
90
  # @param [String] text Text to process
91
- # @return [Array] Texterra annotations
91
+ # @return [Hash] Texterra document
92
92
  def domain_detection_annotate(text)
93
93
  preset_nlp(:domainDetection, text)
94
94
  end
@@ -96,7 +96,7 @@ module TexterraNLP
96
96
  # Detects whether the given text is subjective or not
97
97
  #
98
98
  # @param [String] text Text to process
99
- # @return [Array] Texterra annotations
99
+ # @return [Hash] Texterra document
100
100
  def subjectivity_detection_annotate(text)
101
101
  preset_nlp(:subjectivityDetection, text)
102
102
  end
@@ -104,7 +104,7 @@ module TexterraNLP
104
104
  # Detects whether the given text has positive, negative or no sentiment
105
105
  #
106
106
  # @param [String] text Text to process
107
- # @return [Array] Texterra annotations
107
+ # @return [Hash] Texterra document
108
108
  def polarity_detection_annotate(text)
109
109
  preset_nlp(:polarityDetection, text)
110
110
  end
@@ -115,21 +115,22 @@ module TexterraNLP
115
115
  #
116
116
  # @param [String] text Text to process
117
117
  # @param [String] domain Domain for polarity detection
118
- # @return [Array] Texterra annotations
118
+ # @return [Hash] Texterra document
119
119
  def domain_polarity_detection_annotate(text, domain = '')
120
120
  specs = NLP_SPECS[:domainPolarityDetection]
121
121
  domain = "(#{domain})" unless domain.empty?
122
- result = POST(specs[:path] % domain, specs[:params], text: text)[:nlp_document][:annotations][:i_annotation]
123
- return [] if result.nil?
124
- result = [].push result unless result.is_a? Array
125
- result.map { |e| assign_text(e, text) }
122
+ result = POST(specs[:path] % domain, specs[:params], {text: text}, :json)
123
+ result[:annotations].each do |key, value|
124
+ value.map! { |an| assign_text(an, text) }
125
+ end
126
+ result
126
127
  end
127
128
 
128
129
  # Detects Twitter-specific entities: Hashtags, User names, Emoticons, URLs.
129
130
  # And also: Stop-words, Misspellings, Spelling suggestions, Spelling corrections
130
131
  #
131
132
  # @param [String] text Text to process
132
- # @return [Array] Texterra annotations
133
+ # @return [Hash] Texterra document
133
134
  def tweet_normalization(text)
134
135
  preset_nlp(:tweetNormalization, text)
135
136
  end
@@ -137,11 +138,13 @@ module TexterraNLP
137
138
  # Detects Syntax relations in text. Only works for russian texts
138
139
  #
139
140
  # @param [String] text Text to process
140
- # @return [Array] Texterra annotations
141
+ # @return [Hash] Texterra document
141
142
  def syntax_detection(text)
142
- preset_nlp(:syntaxDetection, text).each do |an|
143
- an[:value][:parent_token] = assign_text(an[:value][:parent_token], text) if an[:value] && an[:value][:parent_token]
143
+ result = preset_nlp(:syntaxDetection, text)
144
+ result[:annotations][:'syntax-relation'].each do |an|
145
+ an[:value][:parent] = assign_text(an[:value][:parent], text) if an[:value] && an[:value][:parent]
144
146
  end
147
+ result
145
148
  end
146
149
 
147
150
  private
@@ -149,10 +152,11 @@ module TexterraNLP
149
152
  # Utility NLP part method
150
153
  def preset_nlp(methodName, text)
151
154
  specs = NLP_SPECS[methodName]
152
- result = POST(specs[:path], specs[:params], text: text)[:nlp_document][:annotations][:i_annotation]
153
- return [] if result.nil?
154
- result = [].push result unless result.is_a? Array
155
- result.map { |an| assign_text(an, text) }
155
+ result = POST(specs[:path], specs[:params], {text: text}, :json)
156
+ result[:annotations].each do |key, value|
157
+ value.map! { |an| assign_text(an, text) }
158
+ end
159
+ result
156
160
  end
157
161
 
158
162
  # Utility text assignement for annotation
@@ -38,20 +38,15 @@ class TexterraAPI < IsprasAPI
38
38
  # @param [String] text Text to process
39
39
  # @return [Array] Array of weighted key concepts
40
40
  def key_concepts(text)
41
- key_concepts = key_concepts_annotate(text)[0][:value][:concepts_weights][:entry] || []
42
- key_concepts = [].push key_concepts unless key_concepts.is_a? Array
43
- key_concepts.map do |kc|
44
- kc[:concept][:weight] = kc[:double]
45
- kc[:concept]
46
- end
41
+ key_concepts = key_concepts_annotate(text)[:annotations][:keyconcepts][0][:value] || []
47
42
  end
48
43
 
49
44
  # Detects whether the given text has positive, negative or no sentiment
50
45
  #
51
46
  # @param [String] text Text to process
52
- # @return [Array] Sentiment of the text
47
+ # @return [String] Sentiment of the text
53
48
  def sentiment_analysis(text)
54
- polarity_detection_annotate(text)[0][:value].to_s || 'NEUTRAL'
49
+ polarity_detection_annotate(text)[:annotations][:polarity][0][:value].to_s || 'NEUTRAL'
55
50
  rescue NoMethodError
56
51
  'NEUTRAL'
57
52
  end
@@ -66,9 +61,11 @@ class TexterraAPI < IsprasAPI
66
61
  def domain_sentiment_analysis(text, domain = '')
67
62
  used_domain = 'general'
68
63
  sentiment = 'NEUTRAL'
69
- (domain_polarity_detection_annotate(text, domain) || []).each do |an|
70
- sentiment = an[:value] if an[:@class].include? 'SentimentPolarity'
71
- used_domain = an[:value] if an[:@class].include? 'DomainAnnotation'
64
+ annotations = domain_polarity_detection_annotate(text, domain)[:annotations]
65
+ begin
66
+ used_domain = annotations[:domain][0][:value]
67
+ sentiment = annotations[:polarity][0][:value]
68
+ rescue NoMethodError
72
69
  end
73
70
  {
74
71
  domain: used_domain,
@@ -81,7 +78,7 @@ class TexterraAPI < IsprasAPI
81
78
  # @param [String] text Text to process
82
79
  # @return [Array] Texterra annotations
83
80
  def disambiguation(text)
84
- disambiguation_annotate(text)
81
+ disambiguation_annotate(text)[:annotations][:'disambiguated-phrase']
85
82
  end
86
83
 
87
84
  def custom_query(path, query, form = nil)
@@ -1,12 +1,12 @@
1
1
  module Version
2
2
  MAJOR = 0
3
- MINOR = 1
4
- PATCH = 7
3
+ MINOR = 2
4
+ PATCH = 0
5
5
  PRE = nil
6
6
 
7
7
  YEAR = '2016'
8
8
  MONTH = '09'
9
- DAY = '09'
9
+ DAY = '12'
10
10
 
11
11
  def self.to_s
12
12
  [MAJOR, MINOR, PATCH, PRE].compact.join('.')
@@ -44,61 +44,123 @@ class TestTexterraAPI < Minitest::Test
44
44
  end
45
45
 
46
46
  def test_tweet_normalization
47
- assert_instance_of Array, @texterra.tweet_normalization(@en_tweet)
48
- assert_instance_of Array, @texterra.tweet_normalization(@ru_tweet)
47
+ res = @texterra.tweet_normalization(@en_tweet)
48
+ assert_instance_of Hash, res
49
+ assert_equal @en_tweet, res[:text]
50
+ res = @texterra.tweet_normalization(@ru_tweet)
51
+ assert_instance_of Hash, res
52
+ assert_equal @ru_tweet, res[:text]
49
53
  end
50
54
 
51
55
  def test_syntax_detection
52
- assert_instance_of Array, @texterra.syntax_detection(@ru_text)
56
+ res = @texterra.syntax_detection(@ru_text)
57
+ assert_instance_of Hash, res
58
+ assert_equal @ru_text, res[:text]
53
59
  end
54
60
 
55
61
  def test_language_detection_annotate
56
- assert_instance_of Array, @texterra.language_detection_annotate(@en_text)
57
- assert_instance_of Array, @texterra.language_detection_annotate(@ru_text)
58
- assert_instance_of Array, @texterra.language_detection_annotate(@en_tweet)
59
- assert_instance_of Array, @texterra.language_detection_annotate(@ru_tweet)
62
+ res = @texterra.language_detection_annotate(@en_text)
63
+ assert_instance_of Hash, res
64
+ assert_equal @en_text, res[:text]
65
+ res = @texterra.language_detection_annotate(@ru_text)
66
+ assert_instance_of Hash, res
67
+ assert_equal @ru_text, res[:text]
68
+ res = @texterra.language_detection_annotate(@en_tweet)
69
+ assert_instance_of Hash, res
70
+ assert_equal @en_tweet, res[:text]
71
+ res = @texterra.language_detection_annotate(@ru_tweet)
72
+ assert_instance_of Hash, res
73
+ assert_equal @ru_tweet, res[:text]
60
74
  end
61
75
 
62
76
  def test_sentence_detection_annotate
63
- assert_instance_of Array, @texterra.sentence_detection_annotate(@en_text)
64
- assert_instance_of Array, @texterra.sentence_detection_annotate(@ru_text)
65
- assert_instance_of Array, @texterra.sentence_detection_annotate(@en_tweet)
66
- assert_instance_of Array, @texterra.sentence_detection_annotate(@ru_tweet)
77
+ res = @texterra.sentence_detection_annotate(@en_text)
78
+ assert_instance_of Hash, res
79
+ assert_equal @en_text, res[:text]
80
+ res = @texterra.sentence_detection_annotate(@ru_text)
81
+ assert_instance_of Hash, res
82
+ assert_equal @ru_text, res[:text]
83
+ res = @texterra.sentence_detection_annotate(@en_tweet)
84
+ assert_instance_of Hash, res
85
+ assert_equal @en_tweet, res[:text]
86
+ res = @texterra.sentence_detection_annotate(@ru_tweet)
87
+ assert_instance_of Hash, res
88
+ assert_equal @ru_tweet, res[:text]
67
89
  end
68
90
 
69
91
  def test_tokenization_annotate
70
- assert_instance_of Array, @texterra.tokenization_annotate(@en_text)
71
- assert_instance_of Array, @texterra.tokenization_annotate(@ru_text)
72
- assert_instance_of Array, @texterra.tokenization_annotate(@en_tweet)
73
- assert_instance_of Array, @texterra.tokenization_annotate(@ru_tweet)
92
+ res = @texterra.tokenization_annotate(@en_text)
93
+ assert_instance_of Hash, res
94
+ assert_equal @en_text, res[:text]
95
+ res = @texterra.tokenization_annotate(@ru_text)
96
+ assert_instance_of Hash, res
97
+ assert_equal @ru_text, res[:text]
98
+ res = @texterra.tokenization_annotate(@en_tweet)
99
+ assert_instance_of Hash, res
100
+ assert_equal @en_tweet, res[:text]
101
+ res = @texterra.tokenization_annotate(@ru_tweet)
102
+ assert_instance_of Hash, res
103
+ assert_equal @ru_tweet, res[:text]
74
104
  end
75
105
 
76
106
  def test_lemmatization_annotate
77
- assert_instance_of Array, @texterra.lemmatization_annotate(@en_text)
78
- assert_instance_of Array, @texterra.lemmatization_annotate(@ru_text)
79
- assert_instance_of Array, @texterra.lemmatization_annotate(@en_tweet)
80
- assert_instance_of Array, @texterra.lemmatization_annotate(@ru_tweet)
107
+ res = @texterra.lemmatization_annotate(@en_text)
108
+ assert_instance_of Hash, res
109
+ assert_equal @en_text, res[:text]
110
+ res = @texterra.lemmatization_annotate(@ru_text)
111
+ assert_instance_of Hash, res
112
+ assert_equal @ru_text, res[:text]
113
+ res = @texterra.lemmatization_annotate(@en_tweet)
114
+ assert_instance_of Hash, res
115
+ assert_equal @en_tweet, res[:text]
116
+ res = @texterra.lemmatization_annotate(@ru_tweet)
117
+ assert_instance_of Hash, res
118
+ assert_equal @ru_tweet, res[:text]
81
119
  end
82
120
 
83
121
  def test_pos_tagging_annotate
84
- assert_instance_of Array, @texterra.pos_tagging_annotate(@en_text)
85
- assert_instance_of Array, @texterra.pos_tagging_annotate(@ru_text)
86
- assert_instance_of Array, @texterra.pos_tagging_annotate(@en_tweet)
87
- assert_instance_of Array, @texterra.pos_tagging_annotate(@ru_tweet)
122
+ res = @texterra.pos_tagging_annotate(@en_text)
123
+ assert_instance_of Hash, res
124
+ assert_equal @en_text, res[:text]
125
+ res = @texterra.pos_tagging_annotate(@ru_text)
126
+ assert_instance_of Hash, res
127
+ assert_equal @ru_text, res[:text]
128
+ res = @texterra.pos_tagging_annotate(@en_tweet)
129
+ assert_instance_of Hash, res
130
+ assert_equal @en_tweet, res[:text]
131
+ res = @texterra.pos_tagging_annotate(@ru_tweet)
132
+ assert_instance_of Hash, res
133
+ assert_equal @ru_tweet, res[:text]
88
134
  end
89
135
 
90
136
  def test_named_entities_annotate
91
- assert_instance_of Array, @texterra.named_entities_annotate(@en_text)
92
- assert_instance_of Array, @texterra.named_entities_annotate(@ru_text)
93
- assert_instance_of Array, @texterra.named_entities_annotate(@en_tweet)
94
- assert_instance_of Array, @texterra.named_entities_annotate(@ru_tweet)
137
+ res = @texterra.named_entities_annotate(@en_text)
138
+ assert_instance_of Hash, res
139
+ assert_equal @en_text, res[:text]
140
+ res = @texterra.named_entities_annotate(@ru_text)
141
+ assert_instance_of Hash, res
142
+ assert_equal @ru_text, res[:text]
143
+ res = @texterra.named_entities_annotate(@en_tweet)
144
+ assert_instance_of Hash, res
145
+ assert_equal @en_tweet, res[:text]
146
+ res = @texterra.named_entities_annotate(@ru_tweet)
147
+ assert_instance_of Hash, res
148
+ assert_equal @ru_tweet, res[:text]
95
149
  end
96
150
 
97
151
  def test_subjectivity_detection_annotate
98
- assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_text)
99
- assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_text)
100
- assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_tweet)
101
- assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_tweet)
152
+ res = @texterra.subjectivity_detection_annotate(@en_text)
153
+ assert_instance_of Hash, res
154
+ assert_equal @en_text, res[:text]
155
+ res = @texterra.subjectivity_detection_annotate(@ru_text)
156
+ assert_instance_of Hash, res
157
+ assert_equal @ru_text, res[:text]
158
+ res = @texterra.subjectivity_detection_annotate(@en_tweet)
159
+ assert_instance_of Hash, res
160
+ assert_equal @en_tweet, res[:text]
161
+ res = @texterra.subjectivity_detection_annotate(@ru_tweet)
162
+ assert_instance_of Hash, res
163
+ assert_equal @ru_tweet, res[:text]
102
164
  end
103
165
 
104
166
  def test_representation_terms
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ispras-api
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexey Laguta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-09 00:00:00.000000000 Z
11
+ date: 2016-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty