ispras-api 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ispras-api/ispras_api.rb +27 -4
- data/lib/ispras-api/texterra/nlp.rb +30 -26
- data/lib/ispras-api/texterra_api.rb +9 -12
- data/lib/ispras-api/version.rb +3 -3
- data/test/test_texterra_api.rb +93 -31
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e13c5d64eacc2525ca461d5995d44f502f70b988
|
4
|
+
data.tar.gz: 098f036364aa088ac20883b88baeab517902e764
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 792870423e6d110a92f3ff8efe29152032a87fc73796f114d86707bb86831e36df6d71f3bae0e27e71a6ed1c02b023e5d7c87eae3dd31460574f5923dcdda595
|
7
|
+
data.tar.gz: 68dcabf74d6edfe6788868be755deae38566641738285d9161ed0f0b7ab3a333dbbf283e6fb33192f4cc3e16bd9c70c543113a2fd3120e226b8342c165cf8482
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'json'
|
1
2
|
require 'httparty'
|
2
3
|
require 'nori'
|
3
4
|
require_relative './api_error'
|
@@ -17,20 +18,42 @@ class IsprasAPI
|
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
20
|
-
def GET(path = '', params = {})
|
21
|
-
options = {
|
21
|
+
def GET(path = '', params = {}, format=:xml)
|
22
|
+
options = {
|
23
|
+
headers: headers(format),
|
24
|
+
query: params
|
25
|
+
}
|
22
26
|
response = self.class.get "/#{path}", options
|
23
27
|
response.code == 200 ? response.parsed_response : check_error(response)
|
24
28
|
end
|
25
29
|
|
26
|
-
def POST(path = '', params = {},
|
27
|
-
options = {
|
30
|
+
def POST(path = '', params = {}, body = {}, format=:xml)
|
31
|
+
options = {
|
32
|
+
headers: headers(format),
|
33
|
+
query: params,
|
34
|
+
body: body
|
35
|
+
}
|
28
36
|
response = self.class.post "/#{path}", options
|
29
37
|
response.code == 200 ? response.parsed_response : check_error(response)
|
30
38
|
end
|
31
39
|
|
32
40
|
private
|
33
41
|
|
42
|
+
def headers(format)
|
43
|
+
case(format)
|
44
|
+
when :json
|
45
|
+
{
|
46
|
+
'Accept' => 'application/json'
|
47
|
+
}
|
48
|
+
when :xml
|
49
|
+
{
|
50
|
+
'Accept' => 'application/xml'
|
51
|
+
}
|
52
|
+
else
|
53
|
+
{}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
34
57
|
def check_error(response)
|
35
58
|
fail ApiError, "#{response.code} Error occured"
|
36
59
|
end
|
@@ -5,7 +5,7 @@ module TexterraNLP
|
|
5
5
|
# Detects language of given text
|
6
6
|
#
|
7
7
|
# @param [String] text Text to process
|
8
|
-
# @return [
|
8
|
+
# @return [Hash] Texterra document
|
9
9
|
def language_detection_annotate(text)
|
10
10
|
preset_nlp(:languageDetection, text)
|
11
11
|
end
|
@@ -13,7 +13,7 @@ module TexterraNLP
|
|
13
13
|
# Detects boundaries of sentences in a given text
|
14
14
|
#
|
15
15
|
# @param [String] text Text to process
|
16
|
-
# @return [
|
16
|
+
# @return [Hash] Texterra document
|
17
17
|
def sentence_detection_annotate(text)
|
18
18
|
preset_nlp(:sentenceDetection, text)
|
19
19
|
end
|
@@ -21,7 +21,7 @@ module TexterraNLP
|
|
21
21
|
# Detects all tokens (minimal significant text parts) in a given text
|
22
22
|
#
|
23
23
|
# @param [String] text Text to process
|
24
|
-
# @return [
|
24
|
+
# @return [Hash] Texterra document
|
25
25
|
def tokenization_annotate(text)
|
26
26
|
preset_nlp(:tokenization, text)
|
27
27
|
end
|
@@ -29,7 +29,7 @@ module TexterraNLP
|
|
29
29
|
# Detects lemma of each word of a given text
|
30
30
|
#
|
31
31
|
# @param [String] text Text to process
|
32
|
-
# @return [
|
32
|
+
# @return [Hash] Texterra document
|
33
33
|
def lemmatization_annotate(text)
|
34
34
|
preset_nlp(:lemmatization, text)
|
35
35
|
end
|
@@ -37,7 +37,7 @@ module TexterraNLP
|
|
37
37
|
# Detects part of speech tag for each word of a given text
|
38
38
|
#
|
39
39
|
# @param [String] text Text to process
|
40
|
-
# @return [
|
40
|
+
# @return [Hash] Texterra document
|
41
41
|
def pos_tagging_annotate(text)
|
42
42
|
preset_nlp(:posTagging, text)
|
43
43
|
end
|
@@ -45,7 +45,7 @@ module TexterraNLP
|
|
45
45
|
# Tries to correct disprints and other spelling errors in a given text
|
46
46
|
#
|
47
47
|
# @param [String] text Text to process
|
48
|
-
# @return [
|
48
|
+
# @return [Hash] Texterra document
|
49
49
|
def spelling_correction_annotate(text)
|
50
50
|
preset_nlp(:spellingCorrection, text)
|
51
51
|
end
|
@@ -53,7 +53,7 @@ module TexterraNLP
|
|
53
53
|
# Finds all named entities occurences in a given text
|
54
54
|
#
|
55
55
|
# @param [String] text Text to process
|
56
|
-
# @return [
|
56
|
+
# @return [Hash] Texterra document
|
57
57
|
def named_entities_annotate(text)
|
58
58
|
preset_nlp(:namedEntities, text)
|
59
59
|
end
|
@@ -61,7 +61,7 @@ module TexterraNLP
|
|
61
61
|
# Extracts not overlapping terms within a given text; term is a textual representation for some concept of the real world
|
62
62
|
#
|
63
63
|
# @param [String] text Text to process
|
64
|
-
# @return [
|
64
|
+
# @return [Hash] Texterra document
|
65
65
|
def term_detection_annotate(text)
|
66
66
|
preset_nlp(:termDetection, text)
|
67
67
|
end
|
@@ -69,7 +69,7 @@ module TexterraNLP
|
|
69
69
|
# Detects the most appropriate meanings (concepts) for terms occurred in a given text
|
70
70
|
#
|
71
71
|
# @param [String] text Text to process
|
72
|
-
# @return [
|
72
|
+
# @return [Hash] Texterra document
|
73
73
|
def disambiguation_annotate(text)
|
74
74
|
preset_nlp(:disambiguation, text)
|
75
75
|
end
|
@@ -78,7 +78,7 @@ module TexterraNLP
|
|
78
78
|
# This service extracts a set of key concepts for a given text
|
79
79
|
#
|
80
80
|
# @param [String] text Text to process
|
81
|
-
# @return [
|
81
|
+
# @return [Hash] Texterra document
|
82
82
|
def key_concepts_annotate(text)
|
83
83
|
preset_nlp(:keyConcepts, text)
|
84
84
|
end
|
@@ -88,7 +88,7 @@ module TexterraNLP
|
|
88
88
|
# If no domain from this list has been detected, the text is assumed to be no domain, or general domain
|
89
89
|
#
|
90
90
|
# @param [String] text Text to process
|
91
|
-
# @return [
|
91
|
+
# @return [Hash] Texterra document
|
92
92
|
def domain_detection_annotate(text)
|
93
93
|
preset_nlp(:domainDetection, text)
|
94
94
|
end
|
@@ -96,7 +96,7 @@ module TexterraNLP
|
|
96
96
|
# Detects whether the given text is subjective or not
|
97
97
|
#
|
98
98
|
# @param [String] text Text to process
|
99
|
-
# @return [
|
99
|
+
# @return [Hash] Texterra document
|
100
100
|
def subjectivity_detection_annotate(text)
|
101
101
|
preset_nlp(:subjectivityDetection, text)
|
102
102
|
end
|
@@ -104,7 +104,7 @@ module TexterraNLP
|
|
104
104
|
# Detects whether the given text has positive, negative or no sentiment
|
105
105
|
#
|
106
106
|
# @param [String] text Text to process
|
107
|
-
# @return [
|
107
|
+
# @return [Hash] Texterra document
|
108
108
|
def polarity_detection_annotate(text)
|
109
109
|
preset_nlp(:polarityDetection, text)
|
110
110
|
end
|
@@ -115,21 +115,22 @@ module TexterraNLP
|
|
115
115
|
#
|
116
116
|
# @param [String] text Text to process
|
117
117
|
# @param [String] domain Domain for polarity detection
|
118
|
-
# @return [
|
118
|
+
# @return [Hash] Texterra document
|
119
119
|
def domain_polarity_detection_annotate(text, domain = '')
|
120
120
|
specs = NLP_SPECS[:domainPolarityDetection]
|
121
121
|
domain = "(#{domain})" unless domain.empty?
|
122
|
-
result = POST(specs[:path] % domain, specs[:params], text: text)
|
123
|
-
|
124
|
-
|
125
|
-
|
122
|
+
result = POST(specs[:path] % domain, specs[:params], {text: text}, :json)
|
123
|
+
result[:annotations].each do |key, value|
|
124
|
+
value.map! { |an| assign_text(an, text) }
|
125
|
+
end
|
126
|
+
result
|
126
127
|
end
|
127
128
|
|
128
129
|
# Detects Twitter-specific entities: Hashtags, User names, Emoticons, URLs.
|
129
130
|
# And also: Stop-words, Misspellings, Spelling suggestions, Spelling corrections
|
130
131
|
#
|
131
132
|
# @param [String] text Text to process
|
132
|
-
# @return [
|
133
|
+
# @return [Hash] Texterra document
|
133
134
|
def tweet_normalization(text)
|
134
135
|
preset_nlp(:tweetNormalization, text)
|
135
136
|
end
|
@@ -137,11 +138,13 @@ module TexterraNLP
|
|
137
138
|
# Detects Syntax relations in text. Only works for russian texts
|
138
139
|
#
|
139
140
|
# @param [String] text Text to process
|
140
|
-
# @return [
|
141
|
+
# @return [Hash] Texterra document
|
141
142
|
def syntax_detection(text)
|
142
|
-
preset_nlp(:syntaxDetection, text)
|
143
|
-
|
143
|
+
result = preset_nlp(:syntaxDetection, text)
|
144
|
+
result[:annotations][:'syntax-relation'].each do |an|
|
145
|
+
an[:value][:parent] = assign_text(an[:value][:parent], text) if an[:value] && an[:value][:parent]
|
144
146
|
end
|
147
|
+
result
|
145
148
|
end
|
146
149
|
|
147
150
|
private
|
@@ -149,10 +152,11 @@ module TexterraNLP
|
|
149
152
|
# Utility NLP part method
|
150
153
|
def preset_nlp(methodName, text)
|
151
154
|
specs = NLP_SPECS[methodName]
|
152
|
-
result = POST(specs[:path], specs[:params], text: text)
|
153
|
-
|
154
|
-
|
155
|
-
|
155
|
+
result = POST(specs[:path], specs[:params], {text: text}, :json)
|
156
|
+
result[:annotations].each do |key, value|
|
157
|
+
value.map! { |an| assign_text(an, text) }
|
158
|
+
end
|
159
|
+
result
|
156
160
|
end
|
157
161
|
|
158
162
|
# Utility text assignement for annotation
|
@@ -38,20 +38,15 @@ class TexterraAPI < IsprasAPI
|
|
38
38
|
# @param [String] text Text to process
|
39
39
|
# @return [Array] Array of weighted key concepts
|
40
40
|
def key_concepts(text)
|
41
|
-
key_concepts = key_concepts_annotate(text)[
|
42
|
-
key_concepts = [].push key_concepts unless key_concepts.is_a? Array
|
43
|
-
key_concepts.map do |kc|
|
44
|
-
kc[:concept][:weight] = kc[:double]
|
45
|
-
kc[:concept]
|
46
|
-
end
|
41
|
+
key_concepts = key_concepts_annotate(text)[:annotations][:keyconcepts][0][:value] || []
|
47
42
|
end
|
48
43
|
|
49
44
|
# Detects whether the given text has positive, negative or no sentiment
|
50
45
|
#
|
51
46
|
# @param [String] text Text to process
|
52
|
-
# @return [
|
47
|
+
# @return [String] Sentiment of the text
|
53
48
|
def sentiment_analysis(text)
|
54
|
-
polarity_detection_annotate(text)[0][:value].to_s || 'NEUTRAL'
|
49
|
+
polarity_detection_annotate(text)[:annotations][:polarity][0][:value].to_s || 'NEUTRAL'
|
55
50
|
rescue NoMethodError
|
56
51
|
'NEUTRAL'
|
57
52
|
end
|
@@ -66,9 +61,11 @@ class TexterraAPI < IsprasAPI
|
|
66
61
|
def domain_sentiment_analysis(text, domain = '')
|
67
62
|
used_domain = 'general'
|
68
63
|
sentiment = 'NEUTRAL'
|
69
|
-
|
70
|
-
|
71
|
-
used_domain =
|
64
|
+
annotations = domain_polarity_detection_annotate(text, domain)[:annotations]
|
65
|
+
begin
|
66
|
+
used_domain = annotations[:domain][0][:value]
|
67
|
+
sentiment = annotations[:polarity][0][:value]
|
68
|
+
rescue NoMethodError
|
72
69
|
end
|
73
70
|
{
|
74
71
|
domain: used_domain,
|
@@ -81,7 +78,7 @@ class TexterraAPI < IsprasAPI
|
|
81
78
|
# @param [String] text Text to process
|
82
79
|
# @return [Array] Texterra annotations
|
83
80
|
def disambiguation(text)
|
84
|
-
disambiguation_annotate(text)
|
81
|
+
disambiguation_annotate(text)[:annotations][:'disambiguated-phrase']
|
85
82
|
end
|
86
83
|
|
87
84
|
def custom_query(path, query, form = nil)
|
data/lib/ispras-api/version.rb
CHANGED
data/test/test_texterra_api.rb
CHANGED
@@ -44,61 +44,123 @@ class TestTexterraAPI < Minitest::Test
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def test_tweet_normalization
|
47
|
-
|
48
|
-
assert_instance_of
|
47
|
+
res = @texterra.tweet_normalization(@en_tweet)
|
48
|
+
assert_instance_of Hash, res
|
49
|
+
assert_equal @en_tweet, res[:text]
|
50
|
+
res = @texterra.tweet_normalization(@ru_tweet)
|
51
|
+
assert_instance_of Hash, res
|
52
|
+
assert_equal @ru_tweet, res[:text]
|
49
53
|
end
|
50
54
|
|
51
55
|
def test_syntax_detection
|
52
|
-
|
56
|
+
res = @texterra.syntax_detection(@ru_text)
|
57
|
+
assert_instance_of Hash, res
|
58
|
+
assert_equal @ru_text, res[:text]
|
53
59
|
end
|
54
60
|
|
55
61
|
def test_language_detection_annotate
|
56
|
-
|
57
|
-
assert_instance_of
|
58
|
-
|
59
|
-
|
62
|
+
res = @texterra.language_detection_annotate(@en_text)
|
63
|
+
assert_instance_of Hash, res
|
64
|
+
assert_equal @en_text, res[:text]
|
65
|
+
res = @texterra.language_detection_annotate(@ru_text)
|
66
|
+
assert_instance_of Hash, res
|
67
|
+
assert_equal @ru_text, res[:text]
|
68
|
+
res = @texterra.language_detection_annotate(@en_tweet)
|
69
|
+
assert_instance_of Hash, res
|
70
|
+
assert_equal @en_tweet, res[:text]
|
71
|
+
res = @texterra.language_detection_annotate(@ru_tweet)
|
72
|
+
assert_instance_of Hash, res
|
73
|
+
assert_equal @ru_tweet, res[:text]
|
60
74
|
end
|
61
75
|
|
62
76
|
def test_sentence_detection_annotate
|
63
|
-
|
64
|
-
assert_instance_of
|
65
|
-
|
66
|
-
|
77
|
+
res = @texterra.sentence_detection_annotate(@en_text)
|
78
|
+
assert_instance_of Hash, res
|
79
|
+
assert_equal @en_text, res[:text]
|
80
|
+
res = @texterra.sentence_detection_annotate(@ru_text)
|
81
|
+
assert_instance_of Hash, res
|
82
|
+
assert_equal @ru_text, res[:text]
|
83
|
+
res = @texterra.sentence_detection_annotate(@en_tweet)
|
84
|
+
assert_instance_of Hash, res
|
85
|
+
assert_equal @en_tweet, res[:text]
|
86
|
+
res = @texterra.sentence_detection_annotate(@ru_tweet)
|
87
|
+
assert_instance_of Hash, res
|
88
|
+
assert_equal @ru_tweet, res[:text]
|
67
89
|
end
|
68
90
|
|
69
91
|
def test_tokenization_annotate
|
70
|
-
|
71
|
-
assert_instance_of
|
72
|
-
|
73
|
-
|
92
|
+
res = @texterra.tokenization_annotate(@en_text)
|
93
|
+
assert_instance_of Hash, res
|
94
|
+
assert_equal @en_text, res[:text]
|
95
|
+
res = @texterra.tokenization_annotate(@ru_text)
|
96
|
+
assert_instance_of Hash, res
|
97
|
+
assert_equal @ru_text, res[:text]
|
98
|
+
res = @texterra.tokenization_annotate(@en_tweet)
|
99
|
+
assert_instance_of Hash, res
|
100
|
+
assert_equal @en_tweet, res[:text]
|
101
|
+
res = @texterra.tokenization_annotate(@ru_tweet)
|
102
|
+
assert_instance_of Hash, res
|
103
|
+
assert_equal @ru_tweet, res[:text]
|
74
104
|
end
|
75
105
|
|
76
106
|
def test_lemmatization_annotate
|
77
|
-
|
78
|
-
assert_instance_of
|
79
|
-
|
80
|
-
|
107
|
+
res = @texterra.lemmatization_annotate(@en_text)
|
108
|
+
assert_instance_of Hash, res
|
109
|
+
assert_equal @en_text, res[:text]
|
110
|
+
res = @texterra.lemmatization_annotate(@ru_text)
|
111
|
+
assert_instance_of Hash, res
|
112
|
+
assert_equal @ru_text, res[:text]
|
113
|
+
res = @texterra.lemmatization_annotate(@en_tweet)
|
114
|
+
assert_instance_of Hash, res
|
115
|
+
assert_equal @en_tweet, res[:text]
|
116
|
+
res = @texterra.lemmatization_annotate(@ru_tweet)
|
117
|
+
assert_instance_of Hash, res
|
118
|
+
assert_equal @ru_tweet, res[:text]
|
81
119
|
end
|
82
120
|
|
83
121
|
def test_pos_tagging_annotate
|
84
|
-
|
85
|
-
assert_instance_of
|
86
|
-
|
87
|
-
|
122
|
+
res = @texterra.pos_tagging_annotate(@en_text)
|
123
|
+
assert_instance_of Hash, res
|
124
|
+
assert_equal @en_text, res[:text]
|
125
|
+
res = @texterra.pos_tagging_annotate(@ru_text)
|
126
|
+
assert_instance_of Hash, res
|
127
|
+
assert_equal @ru_text, res[:text]
|
128
|
+
res = @texterra.pos_tagging_annotate(@en_tweet)
|
129
|
+
assert_instance_of Hash, res
|
130
|
+
assert_equal @en_tweet, res[:text]
|
131
|
+
res = @texterra.pos_tagging_annotate(@ru_tweet)
|
132
|
+
assert_instance_of Hash, res
|
133
|
+
assert_equal @ru_tweet, res[:text]
|
88
134
|
end
|
89
135
|
|
90
136
|
def test_named_entities_annotate
|
91
|
-
|
92
|
-
assert_instance_of
|
93
|
-
|
94
|
-
|
137
|
+
res = @texterra.named_entities_annotate(@en_text)
|
138
|
+
assert_instance_of Hash, res
|
139
|
+
assert_equal @en_text, res[:text]
|
140
|
+
res = @texterra.named_entities_annotate(@ru_text)
|
141
|
+
assert_instance_of Hash, res
|
142
|
+
assert_equal @ru_text, res[:text]
|
143
|
+
res = @texterra.named_entities_annotate(@en_tweet)
|
144
|
+
assert_instance_of Hash, res
|
145
|
+
assert_equal @en_tweet, res[:text]
|
146
|
+
res = @texterra.named_entities_annotate(@ru_tweet)
|
147
|
+
assert_instance_of Hash, res
|
148
|
+
assert_equal @ru_tweet, res[:text]
|
95
149
|
end
|
96
150
|
|
97
151
|
def test_subjectivity_detection_annotate
|
98
|
-
|
99
|
-
assert_instance_of
|
100
|
-
|
101
|
-
|
152
|
+
res = @texterra.subjectivity_detection_annotate(@en_text)
|
153
|
+
assert_instance_of Hash, res
|
154
|
+
assert_equal @en_text, res[:text]
|
155
|
+
res = @texterra.subjectivity_detection_annotate(@ru_text)
|
156
|
+
assert_instance_of Hash, res
|
157
|
+
assert_equal @ru_text, res[:text]
|
158
|
+
res = @texterra.subjectivity_detection_annotate(@en_tweet)
|
159
|
+
assert_instance_of Hash, res
|
160
|
+
assert_equal @en_tweet, res[:text]
|
161
|
+
res = @texterra.subjectivity_detection_annotate(@ru_tweet)
|
162
|
+
assert_instance_of Hash, res
|
163
|
+
assert_equal @ru_tweet, res[:text]
|
102
164
|
end
|
103
165
|
|
104
166
|
def test_representation_terms
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ispras-api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexey Laguta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-09-
|
11
|
+
date: 2016-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|