ispras-api 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ispras-api/ispras_api.rb +27 -4
- data/lib/ispras-api/texterra/nlp.rb +30 -26
- data/lib/ispras-api/texterra_api.rb +9 -12
- data/lib/ispras-api/version.rb +3 -3
- data/test/test_texterra_api.rb +93 -31
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e13c5d64eacc2525ca461d5995d44f502f70b988
|
4
|
+
data.tar.gz: 098f036364aa088ac20883b88baeab517902e764
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 792870423e6d110a92f3ff8efe29152032a87fc73796f114d86707bb86831e36df6d71f3bae0e27e71a6ed1c02b023e5d7c87eae3dd31460574f5923dcdda595
|
7
|
+
data.tar.gz: 68dcabf74d6edfe6788868be755deae38566641738285d9161ed0f0b7ab3a333dbbf283e6fb33192f4cc3e16bd9c70c543113a2fd3120e226b8342c165cf8482
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'json'
|
1
2
|
require 'httparty'
|
2
3
|
require 'nori'
|
3
4
|
require_relative './api_error'
|
@@ -17,20 +18,42 @@ class IsprasAPI
|
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
20
|
-
def GET(path = '', params = {})
|
21
|
-
options = {
|
21
|
+
def GET(path = '', params = {}, format=:xml)
|
22
|
+
options = {
|
23
|
+
headers: headers(format),
|
24
|
+
query: params
|
25
|
+
}
|
22
26
|
response = self.class.get "/#{path}", options
|
23
27
|
response.code == 200 ? response.parsed_response : check_error(response)
|
24
28
|
end
|
25
29
|
|
26
|
-
def POST(path = '', params = {},
|
27
|
-
options = {
|
30
|
+
def POST(path = '', params = {}, body = {}, format=:xml)
|
31
|
+
options = {
|
32
|
+
headers: headers(format),
|
33
|
+
query: params,
|
34
|
+
body: body
|
35
|
+
}
|
28
36
|
response = self.class.post "/#{path}", options
|
29
37
|
response.code == 200 ? response.parsed_response : check_error(response)
|
30
38
|
end
|
31
39
|
|
32
40
|
private
|
33
41
|
|
42
|
+
def headers(format)
|
43
|
+
case(format)
|
44
|
+
when :json
|
45
|
+
{
|
46
|
+
'Accept' => 'application/json'
|
47
|
+
}
|
48
|
+
when :xml
|
49
|
+
{
|
50
|
+
'Accept' => 'application/xml'
|
51
|
+
}
|
52
|
+
else
|
53
|
+
{}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
34
57
|
def check_error(response)
|
35
58
|
fail ApiError, "#{response.code} Error occured"
|
36
59
|
end
|
@@ -5,7 +5,7 @@ module TexterraNLP
|
|
5
5
|
# Detects language of given text
|
6
6
|
#
|
7
7
|
# @param [String] text Text to process
|
8
|
-
# @return [
|
8
|
+
# @return [Hash] Texterra document
|
9
9
|
def language_detection_annotate(text)
|
10
10
|
preset_nlp(:languageDetection, text)
|
11
11
|
end
|
@@ -13,7 +13,7 @@ module TexterraNLP
|
|
13
13
|
# Detects boundaries of sentences in a given text
|
14
14
|
#
|
15
15
|
# @param [String] text Text to process
|
16
|
-
# @return [
|
16
|
+
# @return [Hash] Texterra document
|
17
17
|
def sentence_detection_annotate(text)
|
18
18
|
preset_nlp(:sentenceDetection, text)
|
19
19
|
end
|
@@ -21,7 +21,7 @@ module TexterraNLP
|
|
21
21
|
# Detects all tokens (minimal significant text parts) in a given text
|
22
22
|
#
|
23
23
|
# @param [String] text Text to process
|
24
|
-
# @return [
|
24
|
+
# @return [Hash] Texterra document
|
25
25
|
def tokenization_annotate(text)
|
26
26
|
preset_nlp(:tokenization, text)
|
27
27
|
end
|
@@ -29,7 +29,7 @@ module TexterraNLP
|
|
29
29
|
# Detects lemma of each word of a given text
|
30
30
|
#
|
31
31
|
# @param [String] text Text to process
|
32
|
-
# @return [
|
32
|
+
# @return [Hash] Texterra document
|
33
33
|
def lemmatization_annotate(text)
|
34
34
|
preset_nlp(:lemmatization, text)
|
35
35
|
end
|
@@ -37,7 +37,7 @@ module TexterraNLP
|
|
37
37
|
# Detects part of speech tag for each word of a given text
|
38
38
|
#
|
39
39
|
# @param [String] text Text to process
|
40
|
-
# @return [
|
40
|
+
# @return [Hash] Texterra document
|
41
41
|
def pos_tagging_annotate(text)
|
42
42
|
preset_nlp(:posTagging, text)
|
43
43
|
end
|
@@ -45,7 +45,7 @@ module TexterraNLP
|
|
45
45
|
# Tries to correct disprints and other spelling errors in a given text
|
46
46
|
#
|
47
47
|
# @param [String] text Text to process
|
48
|
-
# @return [
|
48
|
+
# @return [Hash] Texterra document
|
49
49
|
def spelling_correction_annotate(text)
|
50
50
|
preset_nlp(:spellingCorrection, text)
|
51
51
|
end
|
@@ -53,7 +53,7 @@ module TexterraNLP
|
|
53
53
|
# Finds all named entities occurences in a given text
|
54
54
|
#
|
55
55
|
# @param [String] text Text to process
|
56
|
-
# @return [
|
56
|
+
# @return [Hash] Texterra document
|
57
57
|
def named_entities_annotate(text)
|
58
58
|
preset_nlp(:namedEntities, text)
|
59
59
|
end
|
@@ -61,7 +61,7 @@ module TexterraNLP
|
|
61
61
|
# Extracts not overlapping terms within a given text; term is a textual representation for some concept of the real world
|
62
62
|
#
|
63
63
|
# @param [String] text Text to process
|
64
|
-
# @return [
|
64
|
+
# @return [Hash] Texterra document
|
65
65
|
def term_detection_annotate(text)
|
66
66
|
preset_nlp(:termDetection, text)
|
67
67
|
end
|
@@ -69,7 +69,7 @@ module TexterraNLP
|
|
69
69
|
# Detects the most appropriate meanings (concepts) for terms occurred in a given text
|
70
70
|
#
|
71
71
|
# @param [String] text Text to process
|
72
|
-
# @return [
|
72
|
+
# @return [Hash] Texterra document
|
73
73
|
def disambiguation_annotate(text)
|
74
74
|
preset_nlp(:disambiguation, text)
|
75
75
|
end
|
@@ -78,7 +78,7 @@ module TexterraNLP
|
|
78
78
|
# This service extracts a set of key concepts for a given text
|
79
79
|
#
|
80
80
|
# @param [String] text Text to process
|
81
|
-
# @return [
|
81
|
+
# @return [Hash] Texterra document
|
82
82
|
def key_concepts_annotate(text)
|
83
83
|
preset_nlp(:keyConcepts, text)
|
84
84
|
end
|
@@ -88,7 +88,7 @@ module TexterraNLP
|
|
88
88
|
# If no domain from this list has been detected, the text is assumed to be no domain, or general domain
|
89
89
|
#
|
90
90
|
# @param [String] text Text to process
|
91
|
-
# @return [
|
91
|
+
# @return [Hash] Texterra document
|
92
92
|
def domain_detection_annotate(text)
|
93
93
|
preset_nlp(:domainDetection, text)
|
94
94
|
end
|
@@ -96,7 +96,7 @@ module TexterraNLP
|
|
96
96
|
# Detects whether the given text is subjective or not
|
97
97
|
#
|
98
98
|
# @param [String] text Text to process
|
99
|
-
# @return [
|
99
|
+
# @return [Hash] Texterra document
|
100
100
|
def subjectivity_detection_annotate(text)
|
101
101
|
preset_nlp(:subjectivityDetection, text)
|
102
102
|
end
|
@@ -104,7 +104,7 @@ module TexterraNLP
|
|
104
104
|
# Detects whether the given text has positive, negative or no sentiment
|
105
105
|
#
|
106
106
|
# @param [String] text Text to process
|
107
|
-
# @return [
|
107
|
+
# @return [Hash] Texterra document
|
108
108
|
def polarity_detection_annotate(text)
|
109
109
|
preset_nlp(:polarityDetection, text)
|
110
110
|
end
|
@@ -115,21 +115,22 @@ module TexterraNLP
|
|
115
115
|
#
|
116
116
|
# @param [String] text Text to process
|
117
117
|
# @param [String] domain Domain for polarity detection
|
118
|
-
# @return [
|
118
|
+
# @return [Hash] Texterra document
|
119
119
|
def domain_polarity_detection_annotate(text, domain = '')
|
120
120
|
specs = NLP_SPECS[:domainPolarityDetection]
|
121
121
|
domain = "(#{domain})" unless domain.empty?
|
122
|
-
result = POST(specs[:path] % domain, specs[:params], text: text)
|
123
|
-
|
124
|
-
|
125
|
-
|
122
|
+
result = POST(specs[:path] % domain, specs[:params], {text: text}, :json)
|
123
|
+
result[:annotations].each do |key, value|
|
124
|
+
value.map! { |an| assign_text(an, text) }
|
125
|
+
end
|
126
|
+
result
|
126
127
|
end
|
127
128
|
|
128
129
|
# Detects Twitter-specific entities: Hashtags, User names, Emoticons, URLs.
|
129
130
|
# And also: Stop-words, Misspellings, Spelling suggestions, Spelling corrections
|
130
131
|
#
|
131
132
|
# @param [String] text Text to process
|
132
|
-
# @return [
|
133
|
+
# @return [Hash] Texterra document
|
133
134
|
def tweet_normalization(text)
|
134
135
|
preset_nlp(:tweetNormalization, text)
|
135
136
|
end
|
@@ -137,11 +138,13 @@ module TexterraNLP
|
|
137
138
|
# Detects Syntax relations in text. Only works for russian texts
|
138
139
|
#
|
139
140
|
# @param [String] text Text to process
|
140
|
-
# @return [
|
141
|
+
# @return [Hash] Texterra document
|
141
142
|
def syntax_detection(text)
|
142
|
-
preset_nlp(:syntaxDetection, text)
|
143
|
-
|
143
|
+
result = preset_nlp(:syntaxDetection, text)
|
144
|
+
result[:annotations][:'syntax-relation'].each do |an|
|
145
|
+
an[:value][:parent] = assign_text(an[:value][:parent], text) if an[:value] && an[:value][:parent]
|
144
146
|
end
|
147
|
+
result
|
145
148
|
end
|
146
149
|
|
147
150
|
private
|
@@ -149,10 +152,11 @@ module TexterraNLP
|
|
149
152
|
# Utility NLP part method
|
150
153
|
def preset_nlp(methodName, text)
|
151
154
|
specs = NLP_SPECS[methodName]
|
152
|
-
result = POST(specs[:path], specs[:params], text: text)
|
153
|
-
|
154
|
-
|
155
|
-
|
155
|
+
result = POST(specs[:path], specs[:params], {text: text}, :json)
|
156
|
+
result[:annotations].each do |key, value|
|
157
|
+
value.map! { |an| assign_text(an, text) }
|
158
|
+
end
|
159
|
+
result
|
156
160
|
end
|
157
161
|
|
158
162
|
# Utility text assignement for annotation
|
@@ -38,20 +38,15 @@ class TexterraAPI < IsprasAPI
|
|
38
38
|
# @param [String] text Text to process
|
39
39
|
# @return [Array] Array of weighted key concepts
|
40
40
|
def key_concepts(text)
|
41
|
-
key_concepts = key_concepts_annotate(text)[
|
42
|
-
key_concepts = [].push key_concepts unless key_concepts.is_a? Array
|
43
|
-
key_concepts.map do |kc|
|
44
|
-
kc[:concept][:weight] = kc[:double]
|
45
|
-
kc[:concept]
|
46
|
-
end
|
41
|
+
key_concepts = key_concepts_annotate(text)[:annotations][:keyconcepts][0][:value] || []
|
47
42
|
end
|
48
43
|
|
49
44
|
# Detects whether the given text has positive, negative or no sentiment
|
50
45
|
#
|
51
46
|
# @param [String] text Text to process
|
52
|
-
# @return [
|
47
|
+
# @return [String] Sentiment of the text
|
53
48
|
def sentiment_analysis(text)
|
54
|
-
polarity_detection_annotate(text)[0][:value].to_s || 'NEUTRAL'
|
49
|
+
polarity_detection_annotate(text)[:annotations][:polarity][0][:value].to_s || 'NEUTRAL'
|
55
50
|
rescue NoMethodError
|
56
51
|
'NEUTRAL'
|
57
52
|
end
|
@@ -66,9 +61,11 @@ class TexterraAPI < IsprasAPI
|
|
66
61
|
def domain_sentiment_analysis(text, domain = '')
|
67
62
|
used_domain = 'general'
|
68
63
|
sentiment = 'NEUTRAL'
|
69
|
-
|
70
|
-
|
71
|
-
used_domain =
|
64
|
+
annotations = domain_polarity_detection_annotate(text, domain)[:annotations]
|
65
|
+
begin
|
66
|
+
used_domain = annotations[:domain][0][:value]
|
67
|
+
sentiment = annotations[:polarity][0][:value]
|
68
|
+
rescue NoMethodError
|
72
69
|
end
|
73
70
|
{
|
74
71
|
domain: used_domain,
|
@@ -81,7 +78,7 @@ class TexterraAPI < IsprasAPI
|
|
81
78
|
# @param [String] text Text to process
|
82
79
|
# @return [Array] Texterra annotations
|
83
80
|
def disambiguation(text)
|
84
|
-
disambiguation_annotate(text)
|
81
|
+
disambiguation_annotate(text)[:annotations][:'disambiguated-phrase']
|
85
82
|
end
|
86
83
|
|
87
84
|
def custom_query(path, query, form = nil)
|
data/lib/ispras-api/version.rb
CHANGED
data/test/test_texterra_api.rb
CHANGED
@@ -44,61 +44,123 @@ class TestTexterraAPI < Minitest::Test
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def test_tweet_normalization
|
47
|
-
|
48
|
-
assert_instance_of
|
47
|
+
res = @texterra.tweet_normalization(@en_tweet)
|
48
|
+
assert_instance_of Hash, res
|
49
|
+
assert_equal @en_tweet, res[:text]
|
50
|
+
res = @texterra.tweet_normalization(@ru_tweet)
|
51
|
+
assert_instance_of Hash, res
|
52
|
+
assert_equal @ru_tweet, res[:text]
|
49
53
|
end
|
50
54
|
|
51
55
|
def test_syntax_detection
|
52
|
-
|
56
|
+
res = @texterra.syntax_detection(@ru_text)
|
57
|
+
assert_instance_of Hash, res
|
58
|
+
assert_equal @ru_text, res[:text]
|
53
59
|
end
|
54
60
|
|
55
61
|
def test_language_detection_annotate
|
56
|
-
|
57
|
-
assert_instance_of
|
58
|
-
|
59
|
-
|
62
|
+
res = @texterra.language_detection_annotate(@en_text)
|
63
|
+
assert_instance_of Hash, res
|
64
|
+
assert_equal @en_text, res[:text]
|
65
|
+
res = @texterra.language_detection_annotate(@ru_text)
|
66
|
+
assert_instance_of Hash, res
|
67
|
+
assert_equal @ru_text, res[:text]
|
68
|
+
res = @texterra.language_detection_annotate(@en_tweet)
|
69
|
+
assert_instance_of Hash, res
|
70
|
+
assert_equal @en_tweet, res[:text]
|
71
|
+
res = @texterra.language_detection_annotate(@ru_tweet)
|
72
|
+
assert_instance_of Hash, res
|
73
|
+
assert_equal @ru_tweet, res[:text]
|
60
74
|
end
|
61
75
|
|
62
76
|
def test_sentence_detection_annotate
|
63
|
-
|
64
|
-
assert_instance_of
|
65
|
-
|
66
|
-
|
77
|
+
res = @texterra.sentence_detection_annotate(@en_text)
|
78
|
+
assert_instance_of Hash, res
|
79
|
+
assert_equal @en_text, res[:text]
|
80
|
+
res = @texterra.sentence_detection_annotate(@ru_text)
|
81
|
+
assert_instance_of Hash, res
|
82
|
+
assert_equal @ru_text, res[:text]
|
83
|
+
res = @texterra.sentence_detection_annotate(@en_tweet)
|
84
|
+
assert_instance_of Hash, res
|
85
|
+
assert_equal @en_tweet, res[:text]
|
86
|
+
res = @texterra.sentence_detection_annotate(@ru_tweet)
|
87
|
+
assert_instance_of Hash, res
|
88
|
+
assert_equal @ru_tweet, res[:text]
|
67
89
|
end
|
68
90
|
|
69
91
|
def test_tokenization_annotate
|
70
|
-
|
71
|
-
assert_instance_of
|
72
|
-
|
73
|
-
|
92
|
+
res = @texterra.tokenization_annotate(@en_text)
|
93
|
+
assert_instance_of Hash, res
|
94
|
+
assert_equal @en_text, res[:text]
|
95
|
+
res = @texterra.tokenization_annotate(@ru_text)
|
96
|
+
assert_instance_of Hash, res
|
97
|
+
assert_equal @ru_text, res[:text]
|
98
|
+
res = @texterra.tokenization_annotate(@en_tweet)
|
99
|
+
assert_instance_of Hash, res
|
100
|
+
assert_equal @en_tweet, res[:text]
|
101
|
+
res = @texterra.tokenization_annotate(@ru_tweet)
|
102
|
+
assert_instance_of Hash, res
|
103
|
+
assert_equal @ru_tweet, res[:text]
|
74
104
|
end
|
75
105
|
|
76
106
|
def test_lemmatization_annotate
|
77
|
-
|
78
|
-
assert_instance_of
|
79
|
-
|
80
|
-
|
107
|
+
res = @texterra.lemmatization_annotate(@en_text)
|
108
|
+
assert_instance_of Hash, res
|
109
|
+
assert_equal @en_text, res[:text]
|
110
|
+
res = @texterra.lemmatization_annotate(@ru_text)
|
111
|
+
assert_instance_of Hash, res
|
112
|
+
assert_equal @ru_text, res[:text]
|
113
|
+
res = @texterra.lemmatization_annotate(@en_tweet)
|
114
|
+
assert_instance_of Hash, res
|
115
|
+
assert_equal @en_tweet, res[:text]
|
116
|
+
res = @texterra.lemmatization_annotate(@ru_tweet)
|
117
|
+
assert_instance_of Hash, res
|
118
|
+
assert_equal @ru_tweet, res[:text]
|
81
119
|
end
|
82
120
|
|
83
121
|
def test_pos_tagging_annotate
|
84
|
-
|
85
|
-
assert_instance_of
|
86
|
-
|
87
|
-
|
122
|
+
res = @texterra.pos_tagging_annotate(@en_text)
|
123
|
+
assert_instance_of Hash, res
|
124
|
+
assert_equal @en_text, res[:text]
|
125
|
+
res = @texterra.pos_tagging_annotate(@ru_text)
|
126
|
+
assert_instance_of Hash, res
|
127
|
+
assert_equal @ru_text, res[:text]
|
128
|
+
res = @texterra.pos_tagging_annotate(@en_tweet)
|
129
|
+
assert_instance_of Hash, res
|
130
|
+
assert_equal @en_tweet, res[:text]
|
131
|
+
res = @texterra.pos_tagging_annotate(@ru_tweet)
|
132
|
+
assert_instance_of Hash, res
|
133
|
+
assert_equal @ru_tweet, res[:text]
|
88
134
|
end
|
89
135
|
|
90
136
|
def test_named_entities_annotate
|
91
|
-
|
92
|
-
assert_instance_of
|
93
|
-
|
94
|
-
|
137
|
+
res = @texterra.named_entities_annotate(@en_text)
|
138
|
+
assert_instance_of Hash, res
|
139
|
+
assert_equal @en_text, res[:text]
|
140
|
+
res = @texterra.named_entities_annotate(@ru_text)
|
141
|
+
assert_instance_of Hash, res
|
142
|
+
assert_equal @ru_text, res[:text]
|
143
|
+
res = @texterra.named_entities_annotate(@en_tweet)
|
144
|
+
assert_instance_of Hash, res
|
145
|
+
assert_equal @en_tweet, res[:text]
|
146
|
+
res = @texterra.named_entities_annotate(@ru_tweet)
|
147
|
+
assert_instance_of Hash, res
|
148
|
+
assert_equal @ru_tweet, res[:text]
|
95
149
|
end
|
96
150
|
|
97
151
|
def test_subjectivity_detection_annotate
|
98
|
-
|
99
|
-
assert_instance_of
|
100
|
-
|
101
|
-
|
152
|
+
res = @texterra.subjectivity_detection_annotate(@en_text)
|
153
|
+
assert_instance_of Hash, res
|
154
|
+
assert_equal @en_text, res[:text]
|
155
|
+
res = @texterra.subjectivity_detection_annotate(@ru_text)
|
156
|
+
assert_instance_of Hash, res
|
157
|
+
assert_equal @ru_text, res[:text]
|
158
|
+
res = @texterra.subjectivity_detection_annotate(@en_tweet)
|
159
|
+
assert_instance_of Hash, res
|
160
|
+
assert_equal @en_tweet, res[:text]
|
161
|
+
res = @texterra.subjectivity_detection_annotate(@ru_tweet)
|
162
|
+
assert_instance_of Hash, res
|
163
|
+
assert_equal @ru_tweet, res[:text]
|
102
164
|
end
|
103
165
|
|
104
166
|
def test_representation_terms
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ispras-api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexey Laguta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-09-
|
11
|
+
date: 2016-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|