ispras-api 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +5 -5
- data/ispras-api.gemspec +7 -7
- data/lib/ispras-api.rb +1 -1
- data/lib/ispras-api/api_error.rb +1 -1
- data/lib/ispras-api/ispras_api.rb +8 -9
- data/lib/ispras-api/texterra/kbm.rb +73 -70
- data/lib/ispras-api/texterra/kbm_specs.rb +2 -2
- data/lib/ispras-api/texterra/nlp.rb +47 -47
- data/lib/ispras-api/texterra/nlp_specs.rb +82 -83
- data/lib/ispras-api/texterra_api.rb +25 -23
- data/lib/ispras-api/twitter_api.rb +17 -14
- data/lib/ispras-api/version.rb +8 -10
- data/test/test_texterra_api.rb +60 -33
- data/test/test_twitter_api.rb +3 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1955bea741fb9dbd04c3682a99eae1b82e4a03b
|
4
|
+
data.tar.gz: 28df0c9a20ab351602bf4d50e8d9e034a0cb4880
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7fd17b0e890c496deda36b6e84a4c9f5ab5de5923dde9217f29eeeff3aecdb4d86a4c7307a4451f553ff3a2698c4053c639fc718143773b8e0aa09194291e0a4
|
7
|
+
data.tar.gz: 426a60eafe53ad2c027f77d2e42e92abac8844a2f82a60ada7ac6d577ff435e9d449146a94e23efb20c85ed8d2d5603ff46b1bb6045f637345057970b3aac15b
|
data/Rakefile
CHANGED
@@ -3,16 +3,16 @@ require 'rake/testtask'
|
|
3
3
|
Rake::TestTask.new do |t|
|
4
4
|
t.name = 'test:texterra'
|
5
5
|
t.libs << 'test'
|
6
|
-
t.test_files = ['test/test_texterra_api.rb']
|
6
|
+
t.test_files = ['test/test_texterra_api.rb']
|
7
7
|
end
|
8
8
|
|
9
9
|
Rake::TestTask.new do |t|
|
10
10
|
t.name = 'test:twitter'
|
11
11
|
t.libs << 'test'
|
12
|
-
t.test_files = ['test/test_twitter_api.rb']
|
12
|
+
t.test_files = ['test/test_twitter_api.rb']
|
13
13
|
end
|
14
14
|
|
15
|
-
task :
|
15
|
+
task test: ['test:texterra', 'test:twitter']
|
16
16
|
|
17
|
-
desc
|
18
|
-
task :
|
17
|
+
desc 'Run all tests'
|
18
|
+
task default: :test
|
data/ispras-api.gemspec
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
-
require
|
1
|
+
require './lib/ispras-api/version'
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'ispras-api'
|
5
5
|
s.version = Version.current
|
6
6
|
s.date = Version.current_date
|
7
7
|
s.files = `git ls-files`.split($\)
|
8
|
-
s.require_paths = [
|
8
|
+
s.require_paths = ['lib']
|
9
9
|
s.add_runtime_dependency 'httparty', '~> 0.13'
|
10
10
|
s.add_runtime_dependency 'nori', '~> 2.4'
|
11
11
|
s.add_development_dependency 'rake', '~> 10.4'
|
12
12
|
s.add_development_dependency 'minitest', '~> 5.5'
|
13
13
|
s.add_development_dependency 'dotenv', '~> 1.0'
|
14
|
-
s.summary =
|
15
|
-
s.description =
|
16
|
-
s.homepage =
|
17
|
-
s.authors = [
|
14
|
+
s.summary = 'ISPRAS API Ruby SDK'
|
15
|
+
s.description = 'This is Ruby wrapper for REST API provided by ISPRAS. More info at https://api.ispras.ru/'
|
16
|
+
s.homepage = 'https://github.com/alexlag/ispapi.ruby'
|
17
|
+
s.authors = ['Alexey Laguta']
|
18
18
|
s.email = 'laguta@ispras.ru'
|
19
|
-
end
|
19
|
+
end
|
data/lib/ispras-api.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
require_relative 'ispras-api/texterra_api'
|
1
|
+
require_relative 'ispras-api/texterra_api'
|
data/lib/ispras-api/api_error.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ApiError = Class.new StandardError
|
1
|
+
ApiError = Class.new StandardError
|
@@ -12,26 +12,25 @@ class IsprasAPI
|
|
12
12
|
self.class.base_uri ROOT_URL % [name, ver]
|
13
13
|
self.class.default_params apikey: key
|
14
14
|
else
|
15
|
-
|
15
|
+
fail ApiError, 'Please provide proper apikey'
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
def GET(path='', params={})
|
19
|
+
def GET(path = '', params = {})
|
20
20
|
options = { query: params }
|
21
21
|
response = self.class.get "/#{path}", options
|
22
22
|
response.code == 200 ? response.parsed_response : check_error(response)
|
23
23
|
end
|
24
24
|
|
25
|
-
def POST(path='', params={}, form={})
|
25
|
+
def POST(path = '', params = {}, form = {})
|
26
26
|
options = { query: params, body: form }
|
27
27
|
response = self.class.post "/#{path}", options
|
28
28
|
response.code == 200 ? response.parsed_response : check_error(response)
|
29
29
|
end
|
30
30
|
|
31
|
-
private
|
31
|
+
private
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
33
|
+
def check_error(response)
|
34
|
+
fail ApiError, "#{response.code} Error occured"
|
35
|
+
end
|
36
|
+
end
|
@@ -8,38 +8,42 @@ module TexterraKBM
|
|
8
8
|
# @param term [String] term
|
9
9
|
# @return [Hash] with :presence field
|
10
10
|
def term_presence(term)
|
11
|
-
|
11
|
+
preset_kbm :termPresence, term
|
12
12
|
end
|
13
13
|
|
14
|
-
# Returns information measure for the given term. Information measure denotes,
|
14
|
+
# Returns information measure for the given term. Information measure denotes,
|
15
|
+
# how often given term is used as link caption among all its occurences
|
15
16
|
#
|
16
17
|
# @param term [String] term
|
17
18
|
# @result [Hash] with :infomeasure field
|
18
19
|
def term_info_measure(term)
|
19
|
-
|
20
|
+
preset_kbm :termInfoMeasure, term
|
20
21
|
end
|
21
22
|
|
22
|
-
# Return concepts resource from the Knowledge base corresponding
|
23
|
+
# Return concepts resource from the Knowledge base corresponding
|
24
|
+
# to the found meanings of the given term
|
23
25
|
#
|
24
26
|
# @param term [String] term
|
25
27
|
# @result [Hash] with :elements field
|
26
28
|
def term_meanings(term)
|
27
|
-
|
29
|
+
preset_kbm :termMeanings, term
|
28
30
|
end
|
29
31
|
|
30
|
-
# If concept isn't provided, returns concepts with their commonness,
|
32
|
+
# If concept isn't provided, returns concepts with their commonness,
|
33
|
+
# corresponding to the found meanings of the given term.
|
34
|
+
# Commonness denotes, how often the given term is associated with the given concept.
|
31
35
|
# With concept(format is {id}:{kbname}) returns commonness of given concept for the given term.
|
32
36
|
#
|
33
37
|
# @param term [String] term
|
34
38
|
# @param concept [String] concept as {id}:{kbname}
|
35
39
|
# @result [Hash] with :elements field
|
36
|
-
def term_commonness(term, concept='')
|
40
|
+
def term_commonness(term, concept = '')
|
37
41
|
concept = "id=#{concept}" unless concept.empty?
|
38
|
-
|
42
|
+
preset_kbm :termCommonness, [term, concept]
|
39
43
|
end
|
40
44
|
|
41
|
-
# Return neighbour concepts for the given concepts(list or single concept, each concept is {id}:{kbname}).
|
42
|
-
#
|
45
|
+
# Return neighbour concepts for the given concepts(list or single concept, each concept is {id}:{kbname}).
|
46
|
+
#
|
43
47
|
# @param concepts [String, Array<String>] either concept as {id}:{kbname} or array of such concepts
|
44
48
|
# @param traverse_params [Hash] optional
|
45
49
|
# @option traverse_params [String] :linkType searching for neightbour concepts only along these link types
|
@@ -48,17 +52,17 @@ module TexterraKBM
|
|
48
52
|
# @option traverse_params [Fixnum] :maxDepth maximum distance from original to result concepts
|
49
53
|
#
|
50
54
|
# @result [Hash] with :elements field
|
51
|
-
#
|
52
|
-
# If at least one traverse parameter(check REST Documentation for values) is specified, all other parameters should also be specified
|
53
|
-
def neighbours(concepts, traverse_params={})
|
55
|
+
#
|
56
|
+
# If at least one traverse parameter(check REST Documentation for values) is specified, all other parameters should also be specified
|
57
|
+
def neighbours(concepts, traverse_params = {})
|
54
58
|
traverse = traverse_params.inject('') do |res, (name, value)|
|
55
|
-
res
|
59
|
+
res + ";#{name}=#{value}"
|
56
60
|
end unless traverse_params.empty?
|
57
|
-
|
61
|
+
preset_kbm :neighbours, [wrap_concepts(concepts), traverse]
|
58
62
|
end
|
59
63
|
|
60
64
|
# Return neighbour concepts size for the given concepts(list or single concept, each concept is {id}:{kbname}).
|
61
|
-
#
|
65
|
+
#
|
62
66
|
# @param concepts [String, Array<String>] either concept as {id}:{kbname} or array of such concepts
|
63
67
|
# @param traverse_params [Hash] optional
|
64
68
|
# @option traverse_params [String] :linkType searching for neightbour concepts only along these link types
|
@@ -67,104 +71,103 @@ module TexterraKBM
|
|
67
71
|
# @option traverse_params [Fixnum] :maxDepth maximum distance from original to result concepts
|
68
72
|
#
|
69
73
|
# @result [Hash] with :size field
|
70
|
-
#
|
71
|
-
# @note If at least one traverse parameter(check REST Documentation for values) is specified, all other parameters should also be specified
|
72
|
-
def neighbours_size(concepts, traverse_params={})
|
74
|
+
#
|
75
|
+
# @note If at least one traverse parameter(check REST Documentation for values) is specified, all other parameters should also be specified
|
76
|
+
def neighbours_size(concepts, traverse_params = {})
|
73
77
|
traverse = traverse_params.inject('') do |res, (name, value)|
|
74
|
-
res
|
78
|
+
res + ";#{name}=#{value}"
|
75
79
|
end unless traverse_params.empty?
|
76
|
-
|
80
|
+
preset_kbm :neighbours, [wrap_concepts(concepts), "#{traverse}/size"]
|
77
81
|
end
|
78
82
|
|
79
|
-
# Compute similarity for each pair of concepts(list or single concept, each concept is {id}:{kbname}).
|
83
|
+
# Compute similarity for each pair of concepts(list or single concept, each concept is {id}:{kbname}).
|
80
84
|
#
|
81
85
|
# @param [Array<String>] concepts Array of concepts as {id}:{kbname}
|
82
86
|
# @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
|
83
|
-
def similarity_graph(concepts, linkWeight='MAX')
|
84
|
-
|
87
|
+
def similarity_graph(concepts, linkWeight = 'MAX')
|
88
|
+
preset_kbm :similarityGraph, "#{wrap_concepts(concepts)}linkWeight=#{linkWeight}"
|
85
89
|
end
|
86
90
|
|
87
91
|
# Computes sum of similarities from each concepts(list or single concept, each concept is {id}:{kbname}) from the first list to all concepts(list or single concept, each concept is {id}:{kbname}) from the second one.
|
88
|
-
#
|
92
|
+
#
|
89
93
|
# @param [Array<String>] first_concepts Array of concepts as {id}:{kbname}
|
90
94
|
# @param [Array<String>] second_concepts Array of concepts as {id}:{kbname}
|
91
|
-
# @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
|
92
|
-
def all_pairs_similarity(first_concepts, second_concepts, linkWeight='MAX')
|
93
|
-
|
95
|
+
# @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
|
96
|
+
def all_pairs_similarity(first_concepts, second_concepts, linkWeight = 'MAX')
|
97
|
+
preset_kbm :allPairsSimilarity, ["#{wrap_concepts(first_concepts)}linkWeight=#{linkWeight}", wrap_concepts(second_concepts)]
|
94
98
|
end
|
95
99
|
|
96
100
|
# Compute similarity from each concept from the first list to all concepts(list or single concept, each concept is {id}:{kbname}) from the second list as a whole.
|
97
101
|
# Links of second list concepts(each concept is {id}:{kbname}) are collected together, thus forming a "virtual" article, similarity to which is computed.
|
98
|
-
#
|
102
|
+
#
|
99
103
|
# @param [Array<String>] concepts Array of concepts as {id}:{kbname}
|
100
104
|
# @param [Array<String>] virtual_aricle Array of concepts as {id}:{kbname}
|
101
|
-
# @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
|
102
|
-
def similarity_to_virtual_article(concepts, virtual_aricle, linkWeight='MAX')
|
103
|
-
|
105
|
+
# @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
|
106
|
+
def similarity_to_virtual_article(concepts, virtual_aricle, linkWeight = 'MAX')
|
107
|
+
preset_kbm :similarityToVirtualArticle, ["#{wrap_concepts(concepts)}linkWeight=#{linkWeight}", wrap_concepts(virtual_aricle)]
|
104
108
|
end
|
105
109
|
|
106
110
|
# Compute similarity between two sets of concepts(list or single concept, each concept is {id}:{kbname}) as between "virtual" articles from these sets.
|
107
|
-
# The links of each virtual article are composed of links of the collection of concepts.
|
108
|
-
#
|
111
|
+
# The links of each virtual article are composed of links of the collection of concepts.
|
112
|
+
#
|
109
113
|
# @param [Array<String>] first_virtual_aricle Array of concepts as {id}:{kbname}
|
110
114
|
# @param [Array<String>] second_virtual_article Array of concepts as {id}:{kbname}
|
111
|
-
# @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
|
112
|
-
def similarity_between_virtual_articles(first_virtual_aricle, second_virtual_article, linkWeight='MAX')
|
113
|
-
|
115
|
+
# @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
|
116
|
+
def similarity_between_virtual_articles(first_virtual_aricle, second_virtual_article, linkWeight = 'MAX')
|
117
|
+
preset_kbm :similarityBetweenVirtualArticle, ["#{wrap_concepts(first_virtual_aricle)}linkWeight=#{linkWeight}", wrap_concepts(second_virtual_article)]
|
114
118
|
end
|
115
119
|
|
116
120
|
# Search for similar concepts among the first neighbours of the given ones(list or single concept, each concept is {id}:{kbname}).
|
117
|
-
#
|
121
|
+
#
|
118
122
|
# @param [Array<String>] concepts Array of concepts as {id}:{kbname}
|
119
|
-
# @param [Hash] params
|
120
|
-
# @option params [String] :linkWeight Specifies method for computation of link weight in case of multiple link types
|
123
|
+
# @param [Hash] params
|
124
|
+
# @option params [String] :linkWeight Specifies method for computation of link weight in case of multiple link types
|
121
125
|
# @option params [Fixnum] :offset Provides a possibility to skip several concepts from the start of the result
|
122
126
|
# @option params [Fixnum] :limit Provides a possibility to limit size of result
|
123
|
-
#
|
127
|
+
#
|
124
128
|
# @note check REST Documentation for values
|
125
|
-
def similar_over_first_neighbours(concepts, params={linkWeight:'MAX'})
|
126
|
-
|
129
|
+
def similar_over_first_neighbours(concepts, params = { linkWeight: 'MAX' })
|
130
|
+
preset_kbm :similarOverFirstNeighbours, "#{wrap_concepts(concepts)};linkWeight=#{params[:linkWeight]}", params
|
127
131
|
end
|
128
132
|
|
129
133
|
# Search for similar concepts over filtered set of the first and the second neighbours of the given ones(list or single concept, each concept is {id}:{kbname}).
|
130
|
-
#
|
134
|
+
#
|
131
135
|
# @param [Array<String>] concepts Array of concepts as {id}:{kbname}
|
132
|
-
# @param [Hash] params
|
133
|
-
# @option params [String] :linkWeight Specifies method for computation of link weight in case of multiple link types
|
134
|
-
# @option params [Fixnum] :offset Provides a possibility to skip several concepts from the start of the result
|
135
|
-
# @option params [Fixnum] :limit Provides a possibility to limit size of result
|
136
|
+
# @param [Hash] params
|
137
|
+
# @option params [String] :linkWeight Specifies method for computation of link weight in case of multiple link types
|
138
|
+
# @option params [Fixnum] :offset Provides a possibility to skip several concepts from the start of the result
|
139
|
+
# @option params [Fixnum] :limit Provides a possibility to limit size of result
|
136
140
|
# @option params [String] :among Specifies how to filter neighbour concepts when searching for most similar
|
137
|
-
#
|
141
|
+
#
|
138
142
|
# @note check REST Documentation for values
|
139
|
-
def similar_over_filtered_neighbours(concepts, params={linkWeight:'MAX'})
|
140
|
-
|
143
|
+
def similar_over_filtered_neighbours(concepts, params = { linkWeight: 'MAX' })
|
144
|
+
preset_kbm :similarOverFilteredNeighbours, "#{wrap_concepts(concepts)};linkWeight=#{params[:linkWeight]}", params
|
141
145
|
end
|
142
146
|
|
143
147
|
# Get attributes for concepts(list or single concept, each concept is {id}:{kbname})
|
144
|
-
#
|
148
|
+
#
|
145
149
|
# @param [String, Array<String>] concepts Either concept as {id}:{kbname} or array of such concepts
|
146
150
|
# @param [Array<String>] attributes Specifies attributes to be included into response
|
147
151
|
# @note check REST Documentation for supported attributes
|
148
|
-
def get_attributes(concepts, attributes=[])
|
149
|
-
|
152
|
+
def get_attributes(concepts, attributes = [])
|
153
|
+
preset_kbm :getAttributes, wrap_concepts(concepts), attribute: attributes
|
150
154
|
end
|
151
155
|
|
152
|
-
private
|
153
|
-
|
154
|
-
# Utility wrapper for matrix parameters
|
155
|
-
def wrap_concepts(concepts)
|
156
|
-
if concepts.is_a? Array
|
157
|
-
concepts.map { |c| "id=#{c};" }.join
|
158
|
-
else
|
159
|
-
"id=#{concepts};"
|
160
|
-
end
|
161
|
-
end
|
156
|
+
private
|
162
157
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
158
|
+
# Utility wrapper for matrix parameters
|
159
|
+
def wrap_concepts(concepts)
|
160
|
+
if concepts.is_a? Array
|
161
|
+
concepts.map { |c| "id=#{c};" }.join
|
162
|
+
else
|
163
|
+
"id=#{concepts};"
|
168
164
|
end
|
165
|
+
end
|
169
166
|
|
170
|
-
|
167
|
+
# Utility EKB part method
|
168
|
+
def preset_kbm(methodName, pathParam, queryParam = {})
|
169
|
+
specs = KBM_SPECS[methodName]
|
170
|
+
queryParam.merge specs[:params]
|
171
|
+
GET(specs[:path] % pathParam, queryParam)
|
172
|
+
end
|
173
|
+
end
|
@@ -3,126 +3,126 @@ require_relative './nlp_specs'
|
|
3
3
|
module TexterraNLP
|
4
4
|
include TexterraNLPSpecs
|
5
5
|
# Detects language of given text
|
6
|
-
#
|
6
|
+
#
|
7
7
|
# @param [String] text Text to process
|
8
8
|
# @return [Array] Texterra annotations
|
9
9
|
def language_detection_annotate(text)
|
10
|
-
|
10
|
+
preset_nlp(:languageDetection, text)
|
11
11
|
end
|
12
12
|
|
13
13
|
# Detects boundaries of sentences in a given text
|
14
|
-
#
|
14
|
+
#
|
15
15
|
# @param [String] text Text to process
|
16
16
|
# @return [Array] Texterra annotations
|
17
|
-
def
|
18
|
-
|
17
|
+
def sentence_detection_annotate(text)
|
18
|
+
preset_nlp(:sentenceDetection, text)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Detects all tokens (minimal significant text parts) in a given text
|
22
|
-
#
|
22
|
+
#
|
23
23
|
# @param [String] text Text to process
|
24
24
|
# @return [Array] Texterra annotations
|
25
25
|
def tokenization_annotate(text)
|
26
|
-
|
26
|
+
preset_nlp(:tokenization, text)
|
27
27
|
end
|
28
28
|
|
29
29
|
# Detects lemma of each word of a given text
|
30
|
-
#
|
30
|
+
#
|
31
31
|
# @param [String] text Text to process
|
32
32
|
# @return [Array] Texterra annotations
|
33
33
|
def lemmatization_annotate(text)
|
34
|
-
|
34
|
+
preset_nlp(:lemmatization, text)
|
35
35
|
end
|
36
36
|
|
37
37
|
# Detects part of speech tag for each word of a given text
|
38
|
-
#
|
38
|
+
#
|
39
39
|
# @param [String] text Text to process
|
40
40
|
# @return [Array] Texterra annotations
|
41
41
|
def pos_tagging_annotate(text)
|
42
|
-
|
42
|
+
preset_nlp(:posTagging, text)
|
43
43
|
end
|
44
44
|
|
45
45
|
# Tries to correct disprints and other spelling errors in a given text
|
46
|
-
#
|
46
|
+
#
|
47
47
|
# @param [String] text Text to process
|
48
48
|
# @return [Array] Texterra annotations
|
49
49
|
def spelling_correction_annotate(text)
|
50
|
-
|
50
|
+
preset_nlp(:spellingCorrection, text)
|
51
51
|
end
|
52
52
|
|
53
53
|
# Finds all named entities occurences in a given text
|
54
|
-
#
|
54
|
+
#
|
55
55
|
# @param [String] text Text to process
|
56
56
|
# @return [Array] Texterra annotations
|
57
57
|
def named_entities_annotate(text)
|
58
|
-
|
58
|
+
preset_nlp(:namedEntities, text)
|
59
59
|
end
|
60
60
|
|
61
61
|
# Extracts not overlapping terms within a given text; term is a textual representation for some concept of the real world
|
62
|
-
#
|
62
|
+
#
|
63
63
|
# @param [String] text Text to process
|
64
64
|
# @return [Array] Texterra annotations
|
65
65
|
def term_detection_annotate(text)
|
66
|
-
|
66
|
+
preset_nlp(:termDetection, text)
|
67
67
|
end
|
68
68
|
|
69
69
|
# Detects the most appropriate meanings (concepts) for terms occurred in a given text
|
70
|
-
#
|
70
|
+
#
|
71
71
|
# @param [String] text Text to process
|
72
72
|
# @return [Array] Texterra annotations
|
73
73
|
def disambiguation_annotate(text)
|
74
|
-
|
74
|
+
preset_nlp(:disambiguation, text)
|
75
75
|
end
|
76
76
|
|
77
77
|
# Key concepts are the concepts providing short (conceptual) and informative text description.
|
78
78
|
# This service extracts a set of key concepts for a given text
|
79
|
-
#
|
79
|
+
#
|
80
80
|
# @param [String] text Text to process
|
81
81
|
# @return [Array] Texterra annotations
|
82
82
|
def key_concepts_annotate(text)
|
83
|
-
|
83
|
+
preset_nlp(:keyConcepts, text)
|
84
84
|
end
|
85
85
|
|
86
86
|
# Detects the most appropriate domain for the given text.
|
87
87
|
# Currently only 2 specific domains are supported: 'movie' and 'politics'
|
88
88
|
# If no domain from this list has been detected, the text is assumed to be no domain, or general domain
|
89
|
-
#
|
89
|
+
#
|
90
90
|
# @param [String] text Text to process
|
91
91
|
# @return [Array] Texterra annotations
|
92
92
|
def domain_detection_annotate(text)
|
93
|
-
|
93
|
+
preset_nlp(:domainDetection, text)
|
94
94
|
end
|
95
95
|
|
96
96
|
# Detects whether the given text is subjective or not
|
97
|
-
#
|
97
|
+
#
|
98
98
|
# @param [String] text Text to process
|
99
99
|
# @return [Array] Texterra annotations
|
100
100
|
def subjectivity_detection_annotate(text)
|
101
|
-
|
101
|
+
preset_nlp(:subjectivityDetection, text)
|
102
102
|
end
|
103
103
|
|
104
104
|
# Detects whether the given text has positive, negative or no sentiment
|
105
|
-
#
|
105
|
+
#
|
106
106
|
# @param [String] text Text to process
|
107
107
|
# @return [Array] Texterra annotations
|
108
108
|
def polarity_detection_annotate(text)
|
109
|
-
|
109
|
+
preset_nlp(:polarityDetection, text)
|
110
110
|
end
|
111
111
|
|
112
|
-
# Detects whether the given text has positive, negative, or no sentiment, with respect to domain.
|
112
|
+
# Detects whether the given text has positive, negative, or no sentiment, with respect to domain.
|
113
113
|
# If domain isn't provided, Domain detection is applied, this way method tries to achieve best results.
|
114
114
|
# If no domain is detected general domain algorithm is applied
|
115
|
-
#
|
115
|
+
#
|
116
116
|
# @param [String] text Text to process
|
117
117
|
# @param [String] domain Domain for polarity detection
|
118
118
|
# @return [Array] Texterra annotations
|
119
|
-
def domain_polarity_detection_annotate(text, domain='')
|
120
|
-
specs =
|
121
|
-
domain =
|
122
|
-
result = POST(specs[:path] % domain, specs[:params],
|
119
|
+
def domain_polarity_detection_annotate(text, domain = '')
|
120
|
+
specs = NLP_SPECS[:domainPolarityDetection]
|
121
|
+
domain = "(#{domain})" unless domain.empty?
|
122
|
+
result = POST(specs[:path] % domain, specs[:params], text: text)[:nlp_document][:annotations][:i_annotation]
|
123
123
|
return [] if result.nil?
|
124
124
|
result = [].push result unless result.is_a? Array
|
125
|
-
result.each do |e|
|
125
|
+
result.each do |e|
|
126
126
|
st, en = e[:start].to_i, e[:end].to_i
|
127
127
|
e[:text] = e[:annotated_text] = text[st..en]
|
128
128
|
end
|
@@ -130,24 +130,24 @@ module TexterraNLP
|
|
130
130
|
|
131
131
|
# Detects Twitter-specific entities: Hashtags, User names, Emoticons, URLs.
|
132
132
|
# And also: Stop-words, Misspellings, Spelling suggestions, Spelling corrections
|
133
|
-
#
|
133
|
+
#
|
134
134
|
# @param [String] text Text to process
|
135
135
|
# @return [Array] Texterra annotations
|
136
136
|
def tweet_normalization(text)
|
137
|
-
|
137
|
+
preset_nlp(:tweetNormalization, text)
|
138
138
|
end
|
139
139
|
|
140
140
|
private
|
141
141
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
end
|
142
|
+
# Utility NLP part method
|
143
|
+
def preset_nlp(methodName, text)
|
144
|
+
specs = NLP_SPECS[methodName]
|
145
|
+
result = POST(specs[:path], specs[:params], text: text)[:nlp_document][:annotations][:i_annotation]
|
146
|
+
return [] if result.nil?
|
147
|
+
result = [].push result unless result.is_a? Array
|
148
|
+
result.each do |e|
|
149
|
+
st, en = e[:start].to_i, e[:end].to_i
|
150
|
+
e[:text] = e[:annotated_text] = text[st..en]
|
152
151
|
end
|
153
|
-
end
|
152
|
+
end
|
153
|
+
end
|
@@ -1,126 +1,125 @@
|
|
1
1
|
module TexterraNLPSpecs
|
2
2
|
# Path and parameters for preset NLP queries
|
3
|
-
|
3
|
+
NLP_SPECS = {
|
4
4
|
languageDetection: {
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.LanguageDetectionPipeline',
|
6
|
+
params: {
|
7
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.Language',
|
8
|
+
filtering: 'KEEPING'
|
9
|
+
}
|
10
10
|
},
|
11
11
|
sentenceDetection: {
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
path: 'nlp/sentence',
|
13
|
+
params: {
|
14
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.Sentence',
|
15
|
+
filtering: 'KEEPING'
|
16
|
+
}
|
17
17
|
},
|
18
18
|
tokenization: {
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
path: 'nlp/token',
|
20
|
+
params: {
|
21
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.Token',
|
22
|
+
filtering: 'KEEPING'
|
23
|
+
}
|
24
24
|
},
|
25
25
|
lemmatization: {
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
26
|
+
path: 'nlp/lemma',
|
27
|
+
params: {
|
28
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.Lemma',
|
29
|
+
filtering: 'KEEPING'
|
30
|
+
}
|
31
31
|
},
|
32
32
|
posTagging: {
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
path: 'nlp/pos',
|
34
|
+
params: {
|
35
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.pos.POSToken',
|
36
|
+
filtering: 'KEEPING'
|
37
|
+
}
|
38
38
|
},
|
39
39
|
spellingCorrection: {
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
40
|
+
path: 'nlp/ru.ispras.texterra.core.nlp.annotators.spelling.SpellingCorrector',
|
41
|
+
params: {
|
42
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.SpellingCorrection',
|
43
|
+
filtering: 'KEEPING'
|
44
|
+
}
|
45
45
|
},
|
46
46
|
namedEntities: {
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
47
|
+
path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.NETaggingPipeline',
|
48
|
+
params: {
|
49
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.ne.NamedEntityToken',
|
50
|
+
filtering: 'KEEPING'
|
51
|
+
}
|
52
52
|
},
|
53
53
|
termDetection: {
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
54
|
+
path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.TermDetectionPipeline',
|
55
|
+
params: {
|
56
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.Frame',
|
57
|
+
filtering: 'KEEPING'
|
58
|
+
}
|
59
59
|
},
|
60
60
|
disambiguation: {
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
61
|
+
path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.DisambiguationPipeline',
|
62
|
+
params: {
|
63
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.DisambiguatedPhrase',
|
64
|
+
filtering: 'KEEPING'
|
65
|
+
}
|
66
66
|
|
67
67
|
},
|
68
68
|
keyConcepts: {
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
69
|
+
path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.KeyConceptsPipeline',
|
70
|
+
params: {
|
71
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.KeyconceptsSemanticContext',
|
72
|
+
filtering: 'KEEPING'
|
73
|
+
}
|
74
74
|
|
75
75
|
},
|
76
76
|
domainDetection: {
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
77
|
+
path: 'nlp/domain',
|
78
|
+
params: {
|
79
|
+
class: 'domain',
|
80
|
+
filtering: 'KEEPING'
|
81
|
+
}
|
82
82
|
|
83
83
|
},
|
84
84
|
subjectivityDetection: {
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
85
|
+
path: 'nlp/subjectivity',
|
86
|
+
params: {
|
87
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.SentimentSubjectivity',
|
88
|
+
filtering: 'KEEPING'
|
89
|
+
}
|
90
90
|
|
91
91
|
},
|
92
92
|
polarityDetection: {
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
93
|
+
path: 'nlp/polarity',
|
94
|
+
params: {
|
95
|
+
class: 'ru.ispras.texterra.core.nlp.datamodel.SentimentPolarity',
|
96
|
+
filtering: 'KEEPING'
|
97
|
+
}
|
98
98
|
|
99
99
|
},
|
100
100
|
aspectExtraction: {
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
101
|
+
path: 'nlp/aspectsentiment',
|
102
|
+
params: {
|
103
|
+
class: 'aspect-sentiment',
|
104
|
+
filtering: 'KEEPING'
|
105
|
+
}
|
106
106
|
|
107
107
|
},
|
108
108
|
domainPolarityDetection: {
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
109
|
+
path: 'nlp/domainpolarity%s',
|
110
|
+
params: {
|
111
|
+
class: %w(domain sentiment-polarity),
|
112
|
+
filtering: 'KEEPING'
|
113
|
+
}
|
114
114
|
|
115
115
|
},
|
116
116
|
tweetNormalization: {
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
117
|
+
path: 'nlp/twitterdetection',
|
118
|
+
params: {
|
119
|
+
class: %w(sentence language token),
|
120
|
+
filtering: 'REMOVING'
|
121
|
+
}
|
122
122
|
|
123
123
|
}
|
124
124
|
}
|
125
|
-
|
126
|
-
end
|
125
|
+
end
|
@@ -3,23 +3,26 @@ require_relative './texterra/nlp'
|
|
3
3
|
require_relative './texterra/kbm'
|
4
4
|
|
5
5
|
class TexterraAPI < IsprasAPI
|
6
|
-
# This class provides methods to work with Texterra REST via OpenAPI,
|
6
|
+
# This class provides methods to work with Texterra REST via OpenAPI,
|
7
|
+
# including NLP and EKB methods and custom queries
|
7
8
|
# Note that NLP methods return annotations only
|
8
9
|
include TexterraNLP, TexterraKBM
|
9
10
|
disable_rails_query_string_format
|
10
|
-
nori = Nori.new(parser: :rexml,
|
11
|
-
|
11
|
+
nori = Nori.new(parser: :rexml,
|
12
|
+
convert_tags_to: ->(tag) { tag.snakecase.to_sym })
|
13
|
+
parser proc { |data| nori.parse data }
|
12
14
|
|
13
|
-
def initialize(key, name=nil, ver=nil)
|
14
|
-
name='texterra' if name.nil? || name.empty?
|
15
|
-
ver='v3.1' if ver.nil? || ver.empty?
|
15
|
+
def initialize(key, name = nil, ver = nil)
|
16
|
+
name = 'texterra' if name.nil? || name.empty?
|
17
|
+
ver = 'v3.1' if ver.nil? || ver.empty?
|
16
18
|
super(key, name, ver)
|
17
19
|
end
|
18
20
|
|
19
21
|
# Section of NLP methods
|
20
22
|
# NLP basic helper methods
|
21
23
|
|
22
|
-
# Key concepts are the concepts providing
|
24
|
+
# Key concepts are the concepts providing
|
25
|
+
# short (conceptual) and informative text description.
|
23
26
|
# This service extracts a set of key concepts for a given text
|
24
27
|
#
|
25
28
|
# @param [String] text Text to process
|
@@ -27,10 +30,10 @@ class TexterraAPI < IsprasAPI
|
|
27
30
|
def key_concepts(text)
|
28
31
|
key_concepts = key_concepts_annotate(text)[0][:value][:concepts_weights][:entry] || []
|
29
32
|
key_concepts = [].push key_concepts unless key_concepts.is_a? Array
|
30
|
-
key_concepts.map
|
33
|
+
key_concepts.map do |kc|
|
31
34
|
kc[:concept][:weight] = kc[:double]
|
32
35
|
kc[:concept]
|
33
|
-
|
36
|
+
end
|
34
37
|
end
|
35
38
|
|
36
39
|
# Detects whether the given text has positive, negative or no sentiment
|
@@ -38,27 +41,25 @@ class TexterraAPI < IsprasAPI
|
|
38
41
|
# @param [String] text Text to process
|
39
42
|
# @return [Array] Sentiment of the text
|
40
43
|
def sentiment_analysis(text)
|
41
|
-
|
42
|
-
polarity_detection_annotate(text)[0][:value].to_s || 'NEUTRAL'
|
44
|
+
polarity_detection_annotate(text)[0][:value].to_s || 'NEUTRAL'
|
43
45
|
rescue NoMethodError
|
44
46
|
'NEUTRAL'
|
45
|
-
end
|
46
47
|
end
|
47
48
|
|
48
|
-
# Detects whether the given text has positive, negative, or no sentiment, with respect to domain.
|
49
|
+
# Detects whether the given text has positive, negative, or no sentiment, with respect to domain.
|
49
50
|
# If domain isn't provided, Domain detection is applied, this way method tries to achieve best results.
|
50
51
|
# If no domain is detected general domain algorithm is applied
|
51
52
|
#
|
52
53
|
# @param [String] text Text to process
|
53
54
|
# @param domain [String] domain to use. Can be empty
|
54
55
|
# @return [Hash] used :domain and detected :polarity
|
55
|
-
def domain_sentiment_analysis(text, domain='')
|
56
|
+
def domain_sentiment_analysis(text, domain = '')
|
56
57
|
used_domain = 'general'
|
57
58
|
sentiment = 'NEUTRAL'
|
58
|
-
(domain_polarity_detection_annotate(text, domain) || []).each
|
59
|
+
(domain_polarity_detection_annotate(text, domain) || []).each do |an|
|
59
60
|
sentiment = an[:value] if an[:@class].include? 'SentimentPolarity'
|
60
61
|
used_domain = an[:value] if an[:@class].include? 'DomainAnnotation'
|
61
|
-
|
62
|
+
end
|
62
63
|
{
|
63
64
|
domain: used_domain,
|
64
65
|
polarity: sentiment
|
@@ -73,16 +74,17 @@ class TexterraAPI < IsprasAPI
|
|
73
74
|
disambiguation_annotate(text)
|
74
75
|
end
|
75
76
|
|
76
|
-
def custom_query(path, query, form=nil)
|
77
|
+
def custom_query(path, query, form = nil)
|
77
78
|
form.nil? ? GET(path, query) : POST(path, query, form)
|
78
79
|
end
|
79
80
|
|
80
81
|
private
|
81
82
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
def check_error(response)
|
84
|
+
hash = response.parsed_response
|
85
|
+
er_node = hash[:html][:body][:p].detect do |node|
|
86
|
+
node.is_a?(Hash) && node[:b] == 'root cause'
|
86
87
|
end
|
87
|
-
|
88
|
-
end
|
88
|
+
fail ApiError, er_node[:pre].gsub(/ru\.ispras.*:\s*/, '')
|
89
|
+
end
|
90
|
+
end
|
@@ -1,18 +1,20 @@
|
|
1
1
|
require_relative './ispras_api'
|
2
2
|
|
3
3
|
class TwitterAPI < IsprasAPI
|
4
|
-
#This class provides methods to work with Twitter NLP REST via OpenAPI
|
4
|
+
# This class provides methods to work with Twitter NLP REST via OpenAPI
|
5
5
|
disable_rails_query_string_format
|
6
|
-
nori = Nori.new(parser: :rexml,
|
7
|
-
|
6
|
+
nori = Nori.new(parser: :rexml,
|
7
|
+
convert_tags_to: ->(tag) { tag.snakecase.to_sym })
|
8
|
+
parser proc { |data| nori.parse data }
|
8
9
|
|
9
|
-
def initialize(key, name=nil, ver=nil)
|
10
|
-
name='twitter-nlp' if name.nil? || name.empty?
|
11
|
-
ver='1.0' if ver.nil? || ver.empty?
|
10
|
+
def initialize(key, name = nil, ver = nil)
|
11
|
+
name = 'twitter-nlp' if name.nil? || name.empty?
|
12
|
+
ver = '1.0' if ver.nil? || ver.empty?
|
12
13
|
super(key, name, ver)
|
13
14
|
end
|
14
15
|
|
15
|
-
# Extracts demographic attributes from provided Twitter info.
|
16
|
+
# Extracts demographic attributes from provided Twitter info.
|
17
|
+
# All info is required, but can be empty
|
16
18
|
#
|
17
19
|
# @param [Hash] params
|
18
20
|
# @option params [String] :lang Language of tweets
|
@@ -26,16 +28,17 @@ class TwitterAPI < IsprasAPI
|
|
26
28
|
POST 'extract', {}, params
|
27
29
|
end
|
28
30
|
|
29
|
-
def custom_query(path, query, form=nil)
|
31
|
+
def custom_query(path, query, form = nil)
|
30
32
|
form.nil? ? GET(path, query) : POST(path, query, form)
|
31
33
|
end
|
32
34
|
|
33
35
|
private
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
37
|
+
def check_error(response)
|
38
|
+
hash = response.parsed_response
|
39
|
+
er_node = hash[:html][:body][:p].detect do |node|
|
40
|
+
node.is_a?(Hash) && node[:b] == 'root cause'
|
39
41
|
end
|
40
|
-
|
41
|
-
end
|
42
|
+
fail ApiError, er_node[:pre].gsub(/ru\.ispras.*:\s*/, '')
|
43
|
+
end
|
44
|
+
end
|
data/lib/ispras-api/version.rb
CHANGED
@@ -1,22 +1,21 @@
|
|
1
1
|
module Version
|
2
|
-
|
3
2
|
MAJOR = 0
|
4
3
|
MINOR = 1
|
5
|
-
PATCH =
|
4
|
+
PATCH = 3
|
6
5
|
PRE = nil
|
7
|
-
|
8
|
-
YEAR =
|
9
|
-
MONTH =
|
10
|
-
DAY =
|
6
|
+
|
7
|
+
YEAR = '2015'
|
8
|
+
MONTH = '02'
|
9
|
+
DAY = '12'
|
11
10
|
|
12
11
|
def self.to_s
|
13
|
-
[MAJOR, MINOR, PATCH, PRE].compact.join(
|
12
|
+
[MAJOR, MINOR, PATCH, PRE].compact.join('.')
|
14
13
|
end
|
15
14
|
|
16
15
|
def self.current
|
17
16
|
to_s
|
18
17
|
end
|
19
|
-
|
18
|
+
|
20
19
|
def self.current_date
|
21
20
|
"#{YEAR}-#{MONTH}-#{DAY}"
|
22
21
|
end
|
@@ -30,5 +29,4 @@ module Version
|
|
30
29
|
version_hash[:pre] = version_array[3]
|
31
30
|
version_hash
|
32
31
|
end
|
33
|
-
|
34
|
-
end
|
32
|
+
end
|
data/test/test_texterra_api.rb
CHANGED
@@ -4,25 +4,24 @@ Dotenv.load
|
|
4
4
|
require_relative '../lib/ispras-api/texterra_api'
|
5
5
|
|
6
6
|
class TestTexterraAPI < Minitest::Test
|
7
|
-
|
8
7
|
def setup
|
9
8
|
@texterra = TexterraAPI.new ENV['TEXTERRA_KEY'], ENV['TEXTERRA_SERVICE_NAME'], ENV['TEXTERRA_SERVICE_VERSION']
|
10
9
|
@en_text = 'Apple today updated iMac to bring numerous high-performance enhancements to the leading all-in-one desktop. iMac now features fourth-generation Intel Core processors, new graphics, and next-generation Wi-Fi. In addition, it now supports PCIe-based flash storage, making its Fusion Drive and all-flash storage options up to 50 percent faster than the previous generation'
|
11
10
|
@ru_text = 'Первые в этом году переговоры министра иностранных дел России Сергея Лаврова и госсекретаря США Джона Керри, длившиеся 1,5 часа, завершились в Мюнхене.'
|
12
11
|
@en_tweet = 'mentioning veterens care which Mccain has voted AGAINST - SUPER GOOOOD point Obama+1 #tweetdebate'
|
13
12
|
@ru_tweet = 'В мастерской готовят пушку и автомобили 1940-х годов, для участия в Параде Победы в Ново-Переделкино.'
|
14
|
-
end
|
13
|
+
end
|
15
14
|
|
16
15
|
def test_key_concepts
|
17
|
-
assert_instance_of Array, @texterra.key_concepts(@en_text)
|
18
|
-
assert_instance_of Array, @texterra.key_concepts(@ru_text)
|
19
|
-
assert_instance_of Array, @texterra.key_concepts(@en_tweet)
|
20
|
-
assert_instance_of Array, @texterra.key_concepts(@ru_tweet)
|
16
|
+
assert_instance_of Array, @texterra.key_concepts(@en_text)
|
17
|
+
assert_instance_of Array, @texterra.key_concepts(@ru_text)
|
18
|
+
assert_instance_of Array, @texterra.key_concepts(@en_tweet)
|
19
|
+
assert_instance_of Array, @texterra.key_concepts(@ru_tweet)
|
21
20
|
end
|
22
21
|
|
23
22
|
def test_disambiguation
|
24
|
-
assert_instance_of Array, @texterra.disambiguation(@en_text)
|
25
|
-
assert_instance_of Array, @texterra.disambiguation(@ru_text)
|
23
|
+
assert_instance_of Array, @texterra.disambiguation(@en_text)
|
24
|
+
assert_instance_of Array, @texterra.disambiguation(@ru_text)
|
26
25
|
end
|
27
26
|
|
28
27
|
def test_sentiment_analysis
|
@@ -38,37 +37,65 @@ class TestTexterraAPI < Minitest::Test
|
|
38
37
|
res = @texterra.domain_sentiment_analysis(@en_tweet, 'politics')
|
39
38
|
assert_instance_of Hash, res
|
40
39
|
assert_equal 'politics', res[:domain]
|
41
|
-
assert_raises ApiError do
|
40
|
+
assert_raises ApiError do
|
42
41
|
@texterra.domain_sentiment_analysis(@ru_text, 'politics')
|
43
42
|
end
|
44
43
|
end
|
45
44
|
|
46
45
|
def test_tweet_normalization
|
47
|
-
assert_instance_of Array, @texterra.tweet_normalization(@en_tweet)
|
48
|
-
assert_raises ApiError do
|
49
|
-
@texterra.tweet_normalization(@ru_tweet)
|
46
|
+
assert_instance_of Array, @texterra.tweet_normalization(@en_tweet)
|
47
|
+
assert_raises ApiError do
|
48
|
+
@texterra.tweet_normalization(@ru_tweet)
|
50
49
|
end
|
51
50
|
end
|
52
51
|
|
53
52
|
def test_language_detection_annotate
|
54
|
-
assert_instance_of Array, @texterra.language_detection_annotate(@en_text)
|
55
|
-
assert_instance_of Array, @texterra.language_detection_annotate(@ru_text)
|
56
|
-
assert_instance_of Array, @texterra.language_detection_annotate(@en_tweet)
|
57
|
-
assert_instance_of Array, @texterra.language_detection_annotate(@ru_tweet)
|
53
|
+
assert_instance_of Array, @texterra.language_detection_annotate(@en_text)
|
54
|
+
assert_instance_of Array, @texterra.language_detection_annotate(@ru_text)
|
55
|
+
assert_instance_of Array, @texterra.language_detection_annotate(@en_tweet)
|
56
|
+
assert_instance_of Array, @texterra.language_detection_annotate(@ru_tweet)
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_sentence_detection_annotate
|
60
|
+
assert_instance_of Array, @texterra.sentence_detection_annotate(@en_text)
|
61
|
+
assert_instance_of Array, @texterra.sentence_detection_annotate(@ru_text)
|
62
|
+
assert_instance_of Array, @texterra.sentence_detection_annotate(@en_tweet)
|
63
|
+
assert_instance_of Array, @texterra.sentence_detection_annotate(@ru_tweet)
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_tokenization_annotate
|
67
|
+
assert_instance_of Array, @texterra.tokenization_annotate(@en_text)
|
68
|
+
assert_instance_of Array, @texterra.tokenization_annotate(@ru_text)
|
69
|
+
assert_instance_of Array, @texterra.tokenization_annotate(@en_tweet)
|
70
|
+
assert_instance_of Array, @texterra.tokenization_annotate(@ru_tweet)
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_lemmatization_annotate
|
74
|
+
assert_instance_of Array, @texterra.lemmatization_annotate(@en_text)
|
75
|
+
assert_instance_of Array, @texterra.lemmatization_annotate(@ru_text)
|
76
|
+
assert_instance_of Array, @texterra.lemmatization_annotate(@en_tweet)
|
77
|
+
assert_instance_of Array, @texterra.lemmatization_annotate(@ru_tweet)
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_pos_tagging_annotate
|
81
|
+
assert_instance_of Array, @texterra.pos_tagging_annotate(@en_text)
|
82
|
+
assert_instance_of Array, @texterra.pos_tagging_annotate(@ru_text)
|
83
|
+
assert_instance_of Array, @texterra.pos_tagging_annotate(@en_tweet)
|
84
|
+
assert_instance_of Array, @texterra.pos_tagging_annotate(@ru_tweet)
|
58
85
|
end
|
59
86
|
|
60
87
|
def test_named_entities_annotate
|
61
|
-
assert_instance_of Array, @texterra.named_entities_annotate(@en_text)
|
62
|
-
assert_instance_of Array, @texterra.named_entities_annotate(@ru_text)
|
63
|
-
assert_instance_of Array, @texterra.named_entities_annotate(@en_tweet)
|
64
|
-
assert_instance_of Array, @texterra.named_entities_annotate(@ru_tweet)
|
88
|
+
assert_instance_of Array, @texterra.named_entities_annotate(@en_text)
|
89
|
+
assert_instance_of Array, @texterra.named_entities_annotate(@ru_text)
|
90
|
+
assert_instance_of Array, @texterra.named_entities_annotate(@en_tweet)
|
91
|
+
assert_instance_of Array, @texterra.named_entities_annotate(@ru_tweet)
|
65
92
|
end
|
66
93
|
|
67
94
|
def test_subjectivity_detection_annotate
|
68
|
-
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_text)
|
69
|
-
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_text)
|
70
|
-
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_tweet)
|
71
|
-
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_tweet)
|
95
|
+
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_text)
|
96
|
+
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_text)
|
97
|
+
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_tweet)
|
98
|
+
assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_tweet)
|
72
99
|
end
|
73
100
|
|
74
101
|
def test_term_presence
|
@@ -105,23 +132,23 @@ class TestTexterraAPI < Minitest::Test
|
|
105
132
|
end
|
106
133
|
|
107
134
|
def test_similarity_graph
|
108
|
-
assert_instance_of Hash, @texterra.similarity_graph(['12:enwiki','13137:enwiki','156327:enwiki'])
|
109
|
-
assert_instance_of Hash, @texterra.similarity_graph(['12:enwiki','13137:enwiki','156327:enwiki'], 'MIN')
|
135
|
+
assert_instance_of Hash, @texterra.similarity_graph(['12:enwiki', '13137:enwiki', '156327:enwiki'])
|
136
|
+
assert_instance_of Hash, @texterra.similarity_graph(['12:enwiki', '13137:enwiki', '156327:enwiki'], 'MIN')
|
110
137
|
end
|
111
138
|
|
112
139
|
def test_all_pairs_similarity
|
113
|
-
assert_instance_of Hash, @texterra.all_pairs_similarity(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
|
114
|
-
assert_instance_of Hash, @texterra.all_pairs_similarity(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
|
140
|
+
assert_instance_of Hash, @texterra.all_pairs_similarity(['12:enwiki', '13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
|
141
|
+
assert_instance_of Hash, @texterra.all_pairs_similarity(['12:enwiki', '13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
|
115
142
|
end
|
116
143
|
|
117
144
|
def test_similarity_to_virtual_article
|
118
|
-
assert_instance_of Hash, @texterra.similarity_to_virtual_article(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
|
119
|
-
assert_instance_of Hash, @texterra.similarity_to_virtual_article(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
|
145
|
+
assert_instance_of Hash, @texterra.similarity_to_virtual_article(['12:enwiki', '13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
|
146
|
+
assert_instance_of Hash, @texterra.similarity_to_virtual_article(['12:enwiki', '13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
|
120
147
|
end
|
121
148
|
|
122
149
|
def test_similarity_between_virtual_articles
|
123
|
-
assert_instance_of Hash, @texterra.similarity_between_virtual_articles(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
|
124
|
-
assert_instance_of Hash, @texterra.similarity_between_virtual_articles(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
|
150
|
+
assert_instance_of Hash, @texterra.similarity_between_virtual_articles(['12:enwiki', '13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
|
151
|
+
assert_instance_of Hash, @texterra.similarity_between_virtual_articles(['12:enwiki', '13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
|
125
152
|
end
|
126
153
|
|
127
154
|
def test_similar_over_first_neighbours
|
@@ -140,4 +167,4 @@ class TestTexterraAPI < Minitest::Test
|
|
140
167
|
assert_instance_of Hash, @texterra.get_attributes('12:enwiki', ['url(en)', 'type'])
|
141
168
|
assert_instance_of Hash, @texterra.get_attributes(['12:enwiki', '13137:enwiki'], ['url(en)', 'title'])
|
142
169
|
end
|
143
|
-
end
|
170
|
+
end
|
data/test/test_twitter_api.rb
CHANGED
@@ -4,13 +4,12 @@ Dotenv.load
|
|
4
4
|
require_relative '../lib/ispras-api/twitter_api'
|
5
5
|
|
6
6
|
class TestTwitterAPI < Minitest::Test
|
7
|
-
|
8
7
|
def setup
|
9
8
|
@twitter = TwitterAPI.new ENV['DDE_KEY'], ENV['DDE_SERVICE_NAME'], ENV['DDE_SERVICE_VERSION']
|
10
|
-
end
|
9
|
+
end
|
11
10
|
|
12
11
|
def test_extract_dde
|
13
|
-
req = { lang: 'en', username: 'Ann', screenname: 'bob', description: 'I am Ann from NY', tweet:'Hi there, I am Ann fromNY' }
|
12
|
+
req = { lang: 'en', username: 'Ann', screenname: 'bob', description: 'I am Ann from NY', tweet: 'Hi there, I am Ann fromNY' }
|
14
13
|
assert_instance_of Hash, @twitter.extract_dde(req)
|
15
14
|
end
|
16
|
-
end
|
15
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ispras-api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexey Laguta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|