ispras-api 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 77b155b56d3f5581f60222f3cce5871780b0699b
4
+ data.tar.gz: 2a9b25c10ce0ce99ad10bb442f41f86fa9d5811f
5
+ SHA512:
6
+ metadata.gz: d78e6768b4bd2da6dd0b000159e57481c0bf6ff6826c9d2e69ba94dd335bbcb44cb8d3c3c969c7bd5d28d54fdba3f8aad46d0a94e31c1a9dc6572608ecc4199b
7
+ data.tar.gz: 43b53acf76cb4280b3e13f7921ca4ecaf2b726d15220f35187fdd45f3388488cb14888ef47ab3702ddd8e0c439655514a54a437414ba636e9067b4f8d0cf87a8
@@ -0,0 +1,36 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
35
+
36
+ .env
@@ -0,0 +1,18 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.name = 'test:texterra'
5
+ t.libs << 'test'
6
+ t.test_files = ['test/test_texterra_api.rb']
7
+ end
8
+
9
+ Rake::TestTask.new do |t|
10
+ t.name = 'test:twitter'
11
+ t.libs << 'test'
12
+ t.test_files = ['test/test_twitter_api.rb']
13
+ end
14
+
15
+ task :test => ['test:texterra', 'test:twitter']
16
+
17
+ desc "Run all tests"
18
+ task :default => :test
@@ -0,0 +1,19 @@
1
+ require "./lib/ispapi/version"
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'ispras-api'
5
+ s.version = Version.current
6
+ s.date = Version.current_date
7
+ s.files = `git ls-files`.split($\)
8
+ s.require_paths = ["lib"]
9
+ s.add_runtime_dependency 'httparty', '~> 0.13'
10
+ s.add_runtime_dependency 'nori', '~> 2.4'
11
+ s.add_development_dependency 'rake', '~> 10.4'
12
+ s.add_development_dependency 'minitest', '~> 5.5'
13
+ s.add_development_dependency 'dotenv', '~> 1.0'
14
+ s.summary = "ISPRAS API Ruby SDK"
15
+ s.description = "This is Ruby wrapper for REST API provided by ISPRAS. More info at https://api.ispras.ru/"
16
+ s.homepage = "https://github.com/alexlag/ispapi.ruby"
17
+ s.authors = ["Alexey Laguta"]
18
+ s.email = 'laguta@ispras.ru'
19
+ end
@@ -0,0 +1 @@
1
+ require_relative 'ispapi/texterra_api'
@@ -0,0 +1 @@
1
+ ApiError = Class.new StandardError
@@ -0,0 +1,41 @@
1
+ require 'httparty'
2
+ require 'nori'
3
+ require_relative 'api_error'
4
+
5
+ class IsprasAPI
6
+ include HTTParty
7
+ # debug_output $stdout
8
+ ROOT_URL = 'api.ispras.ru/%s/%s'
9
+ parser Proc.new { |data| data }
10
+
11
+ def initialize(key, name, ver)
12
+ if key && key.size == 40
13
+ self.class.base_uri ROOT_URL % [name, ver]
14
+ self.class.default_params apikey: key
15
+ @nori = Nori.new(parser: :rexml, convert_tags_to: lambda { |tag| tag.snakecase.to_sym })
16
+ else
17
+ raise ApiError, 'Please provide proper apikey'
18
+ end
19
+ end
20
+
21
+ def GET(path='', params={})
22
+ options = { query: params }
23
+ response = self.class.get "/#{path}", options
24
+ check_error response unless response.code == 200
25
+ hash = @nori.parse response.body
26
+ end
27
+
28
+ def POST(path='', params={}, form={})
29
+ options = { query: params, body: form }
30
+ response = self.class.post "/#{path}", options
31
+ check_error response unless response.code == 200
32
+ hash = @nori.parse response.body
33
+ end
34
+
35
+ private
36
+
37
+ def check_error(response)
38
+ raise ApiError, "#{response.code} Error occured"
39
+ end
40
+
41
+ end
@@ -0,0 +1,170 @@
1
+ require_relative './kbm_specs'
2
+
3
+ module TexterraKBM
4
+ include TexterraKBMSpecs
5
+
6
+ # Determines if Knowledge base contains the specified term
7
+ #
8
+ # @param term [String] term
9
+ # @return [Hash] with :presence field
10
+ def term_presence(term)
11
+ presetKBM :termPresence, term
12
+ end
13
+
14
+ # Returns information measure for the given term. Information measure denotes, how often given term is used as link caption among all its occurences
15
+ #
16
+ # @param term [String] term
17
+ # @result [Hash] with :infomeasure field
18
+ def term_info_measure(term)
19
+ presetKBM :termInfoMeasure, term
20
+ end
21
+
22
+ # Return concepts resource from the Knowledge base corresponding to the found meanings of the given term
23
+ #
24
+ # @param term [String] term
25
+ # @result [Hash] with :elements field
26
+ def term_meanings(term)
27
+ presetKBM :termMeanings, term
28
+ end
29
+
30
+ # If concept isn't provided, returns concepts with their commonness, corresponding to the found meanings of the given term. Commonness denotes, how often the given term is associated with the given concept.
31
+ # With concept(format is {id}:{kbname}) returns commonness of given concept for the given term.
32
+ #
33
+ # @param term [String] term
34
+ # @param concept [String] concept as {id}:{kbname}
35
+ # @result [Hash] with :elements field
36
+ def term_commonness(term, concept='')
37
+ concept = "id=#{concept}" unless concept.empty?
38
+ presetKBM :termCommonness, [term, concept]
39
+ end
40
+
41
+ # Return neighbour concepts for the given concepts(list or single concept, each concept is {id}:{kbname}).
42
+ #
43
+ # @param concepts [String, Array<String>] either concept as {id}:{kbname} or array of such concepts
44
+ # @param traverse_params [Hash] optional
45
+ # @option traverse_params [String] :linkType searching for neightbour concepts only along these link types
46
+ # @option traverse_params [String] :nodeType searching for neightbour concepts only of these types
47
+ # @option traverse_params [Fixnum] :minDepth minimum distance from original to result concepts
48
+ # @option traverse_params [Fixnum] :maxDepth maximum distance from original to result concepts
49
+ #
50
+ # @result [Hash] with :elements field
51
+ #
52
+ # If at least one traverse parameter(check REST Documentation for values) is specified, all other parameters should also be specified
53
+ def neighbours(concepts, traverse_params={})
54
+ traverse = traverse_params.inject('') do |res, (name, value)|
55
+ res += ";#{name}=#{value}"
56
+ end unless traverse_params.empty?
57
+ presetKBM :neighbours, [wrap_concepts(concepts), traverse]
58
+ end
59
+
60
+ # Return neighbour concepts size for the given concepts(list or single concept, each concept is {id}:{kbname}).
61
+ #
62
+ # @param concepts [String, Array<String>] either concept as {id}:{kbname} or array of such concepts
63
+ # @param traverse_params [Hash] optional
64
+ # @option traverse_params [String] :linkType searching for neightbour concepts only along these link types
65
+ # @option traverse_params [String] :nodeType searching for neightbour concepts only of these types
66
+ # @option traverse_params [Fixnum] :minDepth minimum distance from original to result concepts
67
+ # @option traverse_params [Fixnum] :maxDepth maximum distance from original to result concepts
68
+ #
69
+ # @result [Hash] with :size field
70
+ #
71
+ # @note If at least one traverse parameter(check REST Documentation for values) is specified, all other parameters should also be specified
72
+ def neighbours_size(concepts, traverse_params={})
73
+ traverse = traverse_params.inject('') do |res, (name, value)|
74
+ res += ";#{name}=#{value}"
75
+ end unless traverse_params.empty?
76
+ presetKBM :neighbours, [wrap_concepts(concepts), "#{traverse}/size"]
77
+ end
78
+
79
+ # Compute similarity for each pair of concepts(list or single concept, each concept is {id}:{kbname}).
80
+ #
81
+ # @param [Array<String>] concepts Array of concepts as {id}:{kbname}
82
+ # @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
83
+ def similarity_graph(concepts, linkWeight='MAX')
84
+ presetKBM :similarityGraph, "#{wrap_concepts(concepts)}linkWeight=#{linkWeight}"
85
+ end
86
+
87
+ # Computes sum of similarities from each concepts(list or single concept, each concept is {id}:{kbname}) from the first list to all concepts(list or single concept, each concept is {id}:{kbname}) from the second one.
88
+ #
89
+ # @param [Array<String>] first_concepts Array of concepts as {id}:{kbname}
90
+ # @param [Array<String>] second_concepts Array of concepts as {id}:{kbname}
91
+ # @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
92
+ def all_pairs_similarity(first_concepts, second_concepts, linkWeight='MAX')
93
+ presetKBM :allPairsSimilarity, ["#{wrap_concepts(first_concepts)}linkWeight=#{linkWeight}", wrap_concepts(second_concepts)]
94
+ end
95
+
96
+ # Compute similarity from each concept from the first list to all concepts(list or single concept, each concept is {id}:{kbname}) from the second list as a whole.
97
+ # Links of second list concepts(each concept is {id}:{kbname}) are collected together, thus forming a "virtual" article, similarity to which is computed.
98
+ #
99
+ # @param [Array<String>] concepts Array of concepts as {id}:{kbname}
100
+ # @param [Array<String>] virtual_aricle Array of concepts as {id}:{kbname}
101
+ # @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
102
+ def similarity_to_virtual_article(concepts, virtual_aricle, linkWeight='MAX')
103
+ presetKBM :similarityToVirtualArticle, ["#{wrap_concepts(concepts)}linkWeight=#{linkWeight}", wrap_concepts(virtual_aricle)]
104
+ end
105
+
106
+ # Compute similarity between two sets of concepts(list or single concept, each concept is {id}:{kbname}) as between "virtual" articles from these sets.
107
+ # The links of each virtual article are composed of links of the collection of concepts.
108
+ #
109
+ # @param [Array<String>] first_virtual_aricle Array of concepts as {id}:{kbname}
110
+ # @param [Array<String>] second_virtual_article Array of concepts as {id}:{kbname}
111
+ # @param [String] linkWeight Specifies method for computation of link weight in case of multiple link types - check REST Documentation for values
112
+ def similarity_between_virtual_articles(first_virtual_aricle, second_virtual_article, linkWeight='MAX')
113
+ presetKBM :similarityBetweenVirtualArticle, ["#{wrap_concepts(first_virtual_aricle)}linkWeight=#{linkWeight}", wrap_concepts(second_virtual_article)]
114
+ end
115
+
116
+ # Search for similar concepts among the first neighbours of the given ones(list or single concept, each concept is {id}:{kbname}).
117
+ #
118
+ # @param [Array<String>] concepts Array of concepts as {id}:{kbname}
119
+ # @param [Hash] params
120
+ # @option params [String] :linkWeight Specifies method for computation of link weight in case of multiple link types
121
+ # @option params [Fixnum] :offset Provides a possibility to skip several concepts from the start of the result
122
+ # @option params [Fixnum] :limit Provides a possibility to limit size of result
123
+ #
124
+ # @note check REST Documentation for values
125
+ def similar_over_first_neighbours(concepts, params={linkWeight:'MAX'})
126
+ presetKBM :similarOverFirstNeighbours, "#{wrap_concepts(concepts)};linkWeight=#{params[:linkWeight]}", params
127
+ end
128
+
129
+ # Search for similar concepts over filtered set of the first and the second neighbours of the given ones(list or single concept, each concept is {id}:{kbname}).
130
+ #
131
+ # @param [Array<String>] concepts Array of concepts as {id}:{kbname}
132
+ # @param [Hash] params
133
+ # @option params [String] :linkWeight Specifies method for computation of link weight in case of multiple link types
134
+ # @option params [Fixnum] :offset Provides a possibility to skip several concepts from the start of the result
135
+ # @option params [Fixnum] :limit Provides a possibility to limit size of result
136
+ # @option params [String] :among Specifies how to filter neighbour concepts when searching for most similar
137
+ #
138
+ # @note check REST Documentation for values
139
+ def similar_over_filtered_neighbours(concepts, params={linkWeight:'MAX'})
140
+ presetKBM :similarOverFilteredNeighbours, "#{wrap_concepts(concepts)};linkWeight=#{params[:linkWeight]}", params
141
+ end
142
+
143
+ # Get attributes for concepts(list or single concept, each concept is {id}:{kbname})
144
+ #
145
+ # @param [String, Array<String>] concepts Either concept as {id}:{kbname} or array of such concepts
146
+ # @param [Array<String>] attributes Specifies attributes to be included into response
147
+ # @note check REST Documentation for supported attributes
148
+ def get_attributes(concepts, attributes=[])
149
+ presetKBM :getAttributes, wrap_concepts(concepts), attribute: attributes
150
+ end
151
+
152
+ private
153
+
154
+ # Utility wrapper for matrix parameters
155
+ def wrap_concepts(concepts)
156
+ if concepts.is_a? Array
157
+ concepts.map { |c| "id=#{c};" }.join
158
+ else
159
+ "id=#{concepts};"
160
+ end
161
+ end
162
+
163
+ # Utility EKB part method
164
+ def presetKBM(methodName, pathParam, queryParam={})
165
+ specs = KBMSpecs[methodName]
166
+ queryParam.merge specs[:params]
167
+ GET(specs[:path] % pathParam, queryParam)
168
+ end
169
+
170
+ end
@@ -0,0 +1,53 @@
1
+ module TexterraKBMSpecs
2
+ # Path and parameters for preset KBM queries
3
+ KBMSpecs = {
4
+ termPresence: {
5
+ path: 'representation/%s/contained',
6
+ params: {}
7
+ },
8
+ termInfoMeasure: {
9
+ path: 'representation/%s/infomeasure',
10
+ params: {}
11
+ },
12
+ termMeanings: {
13
+ path: 'representation/%s/meanings',
14
+ params: {}
15
+ },
16
+ termCommonness: {
17
+ path: 'representation/%s/commonness/%s',
18
+ params: {}
19
+ },
20
+ neighbours: {
21
+ path: 'walker/%s/neighbours%s',
22
+ params: {}
23
+ },
24
+ similarityGraph: {
25
+ path: 'similarity/%s/graph',
26
+ params: {}
27
+ },
28
+ allPairsSimilarity: {
29
+ path: 'similarity/%s/summed/%s',
30
+ params: {}
31
+ },
32
+ similarityToVirtualArticle: {
33
+ path: 'similarity/%s/toVirtualArticle/%s',
34
+ params: {}
35
+ },
36
+ similarityBetweenVirtualArticle: {
37
+ path: 'similarity/%s/betweenVirtualArticles/%s',
38
+ params: {}
39
+ },
40
+ similarOverFirstNeighbours: {
41
+ path: 'similarity/%s/similar/neighbours',
42
+ params: {}
43
+ },
44
+ similarOverFilteredNeighbours: {
45
+ path: 'similarity/%s/similar/all',
46
+ params: {}
47
+ },
48
+ getAttributes: {
49
+ path: 'walker/%s',
50
+ params: {}
51
+ }
52
+ }
53
+ end
@@ -0,0 +1,153 @@
1
+ require_relative './nlp_specs'
2
+
3
+ module TexterraNLP
4
+ include TexterraNLPSpecs
5
+ # Detects language of given text
6
+ #
7
+ # @param [String] text Text to process
8
+ # @return [Array] Texterra annotations
9
+ def language_detection_annotate(text)
10
+ presetNLP(:languageDetection, text)
11
+ end
12
+
13
+ # Detects boundaries of sentences in a given text
14
+ #
15
+ # @param [String] text Text to process
16
+ # @return [Array] Texterra annotations
17
+ def sentence_detection(text)
18
+ presetNLP(:sentenceDetection, text)
19
+ end
20
+
21
+ # Detects all tokens (minimal significant text parts) in a given text
22
+ #
23
+ # @param [String] text Text to process
24
+ # @return [Array] Texterra annotations
25
+ def tokenization_annotate(text)
26
+ presetNLP(:tokenization, text)
27
+ end
28
+
29
+ # Detects lemma of each word of a given text
30
+ #
31
+ # @param [String] text Text to process
32
+ # @return [Array] Texterra annotations
33
+ def lemmatization_annotate(text)
34
+ presetNLP(:lemmatization, text)
35
+ end
36
+
37
+ # Detects part of speech tag for each word of a given text
38
+ #
39
+ # @param [String] text Text to process
40
+ # @return [Array] Texterra annotations
41
+ def pos_tagging_annotate(text)
42
+ presetNLP(:posTagging, text)
43
+ end
44
+
45
+ # Tries to correct disprints and other spelling errors in a given text
46
+ #
47
+ # @param [String] text Text to process
48
+ # @return [Array] Texterra annotations
49
+ def spelling_correction_annotate(text)
50
+ presetNLP(:spellingCorrection, text)
51
+ end
52
+
53
+ # Finds all named entities occurences in a given text
54
+ #
55
+ # @param [String] text Text to process
56
+ # @return [Array] Texterra annotations
57
+ def named_entities_annotate(text)
58
+ presetNLP(:namedEntities, text)
59
+ end
60
+
61
+ # Extracts not overlapping terms within a given text; term is a textual representation for some concept of the real world
62
+ #
63
+ # @param [String] text Text to process
64
+ # @return [Array] Texterra annotations
65
+ def term_detection_annotate(text)
66
+ presetNLP(:termDetection, text)
67
+ end
68
+
69
+ # Detects the most appropriate meanings (concepts) for terms occurred in a given text
70
+ #
71
+ # @param [String] text Text to process
72
+ # @return [Array] Texterra annotations
73
+ def disambiguation_annotate(text)
74
+ presetNLP(:disambiguation, text)
75
+ end
76
+
77
+ # Key concepts are the concepts providing short (conceptual) and informative text description.
78
+ # This service extracts a set of key concepts for a given text
79
+ #
80
+ # @param [String] text Text to process
81
+ # @return [Array] Texterra annotations
82
+ def key_concepts_annotate(text)
83
+ presetNLP(:keyConcepts, text)
84
+ end
85
+
86
+ # Detects the most appropriate domain for the given text.
87
+ # Currently only 2 specific domains are supported: 'movie' and 'politics'
88
+ # If no domain from this list has been detected, the text is assumed to be no domain, or general domain
89
+ #
90
+ # @param [String] text Text to process
91
+ # @return [Array] Texterra annotations
92
+ def domain_detection_annotate(text)
93
+ presetNLP(:domainDetection, text)
94
+ end
95
+
96
+ # Detects whether the given text is subjective or not
97
+ #
98
+ # @param [String] text Text to process
99
+ # @return [Array] Texterra annotations
100
+ def subjectivity_detection_annotate(text)
101
+ presetNLP(:subjectivityDetection, text)
102
+ end
103
+
104
+ # Detects whether the given text has positive, negative or no sentiment
105
+ #
106
+ # @param [String] text Text to process
107
+ # @return [Array] Texterra annotations
108
+ def polarity_detection_annotate(text)
109
+ presetNLP(:polarityDetection, text)
110
+ end
111
+
112
+ # Detects whether the given text has positive, negative, or no sentiment, with respect to domain.
113
+ # If domain isn't provided, Domain detection is applied, this way method tries to achieve best results.
114
+ # If no domain is detected general domain algorithm is applied
115
+ #
116
+ # @param [String] text Text to process
117
+ # @param [String] domain Domain for polarity detection
118
+ # @return [Array] Texterra annotations
119
+ def domain_polarity_detection_annotate(text, domain='')
120
+ specs = NLPSpecs[:domainPolarityDetection]
121
+ domain = '(%s)' % domain unless domain.empty?
122
+ result = POST(specs[:path] % domain, specs[:params], {text: text})[:nlp_document][:annotations][:i_annotation]
123
+ return [] if result.nil?
124
+ result = [].push result unless result.is_a? Array
125
+ result.each do |e|
126
+ st, en = e[:start].to_i, e[:end].to_i
127
+ e[:text] = e[:annotated_text] = text[st..en]
128
+ end
129
+ end
130
+
131
+ # Detects Twitter-specific entities: Hashtags, User names, Emoticons, URLs.
132
+ # And also: Stop-words, Misspellings, Spelling suggestions, Spelling corrections
133
+ #
134
+ # @param [String] text Text to process
135
+ # @return [Array] Texterra annotations
136
+ def tweet_normalization(text)
137
+ presetNLP(:tweetNormalization, text)
138
+ end
139
+
140
+ private
141
+
142
+ # Utility NLP part method
143
+ def presetNLP(methodName, text)
144
+ specs = NLPSpecs[methodName]
145
+ result = POST(specs[:path], specs[:params], {text: text})[:nlp_document][:annotations][:i_annotation]
146
+ return [] if result.nil?
147
+ result = [].push result unless result.is_a? Array
148
+ result.each do |e|
149
+ st, en = e[:start].to_i, e[:end].to_i
150
+ e[:text] = e[:annotated_text] = text[st..en]
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,126 @@
1
+ module TexterraNLPSpecs
2
+ # Path and parameters for preset NLP queries
3
+ NLPSpecs = {
4
+ languageDetection: {
5
+ path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.LanguageDetectionPipeline',
6
+ params: {
7
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.Language',
8
+ filtering: 'KEEPING'
9
+ }
10
+ },
11
+ sentenceDetection: {
12
+ path: 'nlp/sentence',
13
+ params: {
14
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.Sentence',
15
+ filtering: 'KEEPING'
16
+ }
17
+ },
18
+ tokenization: {
19
+ path: 'nlp/token',
20
+ params: {
21
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.Token',
22
+ filtering: 'KEEPING'
23
+ }
24
+ },
25
+ lemmatization: {
26
+ path: 'nlp/lemma',
27
+ params: {
28
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.Lemma',
29
+ filtering: 'KEEPING'
30
+ }
31
+ },
32
+ posTagging: {
33
+ path: 'nlp/pos',
34
+ params: {
35
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.pos.IPOSToken',
36
+ filtering: 'KEEPING'
37
+ }
38
+ },
39
+ spellingCorrection: {
40
+ path: 'nlp/ru.ispras.texterra.core.nlp.annotators.spelling.SpellingCorrector',
41
+ params: {
42
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.SpellingCorrection',
43
+ filtering: 'KEEPING'
44
+ }
45
+ },
46
+ namedEntities: {
47
+ path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.NETaggingPipeline',
48
+ params: {
49
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.ne.NamedEntityToken',
50
+ filtering: 'KEEPING'
51
+ }
52
+ },
53
+ termDetection: {
54
+ path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.TermDetectionPipeline',
55
+ params: {
56
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.Frame',
57
+ filtering: 'KEEPING'
58
+ }
59
+ },
60
+ disambiguation: {
61
+ path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.DisambiguationPipeline',
62
+ params: {
63
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.DisambiguatedPhrase',
64
+ filtering: 'KEEPING'
65
+ }
66
+
67
+ },
68
+ keyConcepts: {
69
+ path: 'nlp/ru.ispras.texterra.core.nlp.pipelines.KeyConceptsPipeline',
70
+ params: {
71
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.KeyconceptsSemanticContext',
72
+ filtering: 'KEEPING'
73
+ }
74
+
75
+ },
76
+ domainDetection: {
77
+ path: 'nlp/domain',
78
+ params: {
79
+ :class => 'domain',
80
+ filtering: 'KEEPING'
81
+ }
82
+
83
+ },
84
+ subjectivityDetection: {
85
+ path: 'nlp/subjectivity',
86
+ params: {
87
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.SentimentSubjectivity',
88
+ filtering: 'KEEPING'
89
+ }
90
+
91
+ },
92
+ polarityDetection: {
93
+ path: 'nlp/polarity',
94
+ params: {
95
+ :class => 'ru.ispras.texterra.core.nlp.datamodel.SentimentPolarity',
96
+ filtering: 'KEEPING'
97
+ }
98
+
99
+ },
100
+ aspectExtraction: {
101
+ path: 'nlp/aspectsentiment',
102
+ params: {
103
+ :class => 'aspect-sentiment',
104
+ filtering: 'KEEPING'
105
+ }
106
+
107
+ },
108
+ domainPolarityDetection: {
109
+ path: 'nlp/domainpolarity%s',
110
+ params: {
111
+ :class => [ 'domain', 'sentiment-polarity' ],
112
+ filtering: 'KEEPING'
113
+ }
114
+
115
+ },
116
+ tweetNormalization: {
117
+ path: 'nlp/twitterdetection',
118
+ params: {
119
+ :class => ['sentence', 'language', 'token'],
120
+ filtering: 'REMOVING'
121
+ }
122
+
123
+ }
124
+ }
125
+
126
+ end
@@ -0,0 +1,86 @@
1
+ require_relative './ispras_api'
2
+ require_relative './texterra/nlp'
3
+ require_relative './texterra/kbm'
4
+
5
+ class TexterraAPI < IsprasAPI
6
+ # This class provides methods to work with Texterra REST via OpenAPI, including NLP and EKB methods and custom queriesю
7
+ # Note that NLP methods return annotations only
8
+ include TexterraNLP, TexterraKBM
9
+ disable_rails_query_string_format
10
+
11
+ def initialize(key, name, ver)
12
+ name='texterra' if name.nil? || name.empty?
13
+ ver='v3.1' if ver.nil? || ver.empty?
14
+ super(key, name, ver)
15
+ end
16
+
17
+ # Section of NLP methods
18
+ # NLP basic helper methods
19
+
20
+ # Key concepts are the concepts providing short (conceptual) and informative text description.
21
+ # This service extracts a set of key concepts for a given text
22
+ #
23
+ # @param [String] text Text to process
24
+ # @return [Array] Array of weighted key concepts
25
+ def key_concepts(text)
26
+ key_concepts = key_concepts_annotate(text)[0][:value][:concepts_weights][:entry] || []
27
+ key_concepts = [].push key_concepts unless key_concepts.is_a? Array
28
+ key_concepts.map { |kc|
29
+ kc[:concept][:weight] = kc[:double]
30
+ kc[:concept]
31
+ }
32
+ end
33
+
34
+ # Detects whether the given text has positive, negative or no sentiment
35
+ #
36
+ # @param [String] text Text to process
37
+ # @return [Array] Sentiment of the text
38
+ def sentiment_analysis(text)
39
+ begin
40
+ polarity_detection_annotate(text)[0][:value].to_s || 'NEUTRAL'
41
+ rescue NoMethodError
42
+ 'NEUTRAL'
43
+ end
44
+ end
45
+
46
+ # Detects whether the given text has positive, negative, or no sentiment, with respect to domain.
47
+ # If domain isn't provided, Domain detection is applied, this way method tries to achieve best results.
48
+ # If no domain is detected general domain algorithm is applied
49
+ #
50
+ # @param [String] text Text to process
51
+ # @param domain [String] domain to use. Can be empty
52
+ # @return [Hash] used :domain and detected :polarity
53
+ def domain_sentiment_analysis(text, domain='')
54
+ used_domain = 'general'
55
+ sentiment = 'NEUTRAL'
56
+ (domain_polarity_detection_annotate(text, domain) || []).each { |an|
57
+ sentiment = an[:value] if an[:@class].include? 'SentimentPolarity'
58
+ used_domain = an[:value] if an[:@class].include? 'DomainAnnotation'
59
+ }
60
+ {
61
+ domain: used_domain,
62
+ polarity: sentiment
63
+ }
64
+ end
65
+
66
+ # Detects the most appropriate meanings (concepts) for terms occurred in a given text
67
+ #
68
+ # @param [String] text Text to process
69
+ # @return [Array] Texterra annotations
70
+ def disambiguation(text)
71
+ disambiguation_annotate(text)
72
+ end
73
+
74
+ def custom_query(path, query, form=nil)
75
+ form.nil? ? GET(path, query) : POST(path, query, form)
76
+ end
77
+
78
+ private
79
+
80
+ def check_error(response)
81
+ hash = @nori.parse response.body
82
+ er_node = hash[:html][:body][:p].detect { |node| node.is_a? Hash and node[:b] == 'root cause' }
83
+ raise ApiError, er_node[:pre].gsub(/ru\.ispras.*:\s*/, '')
84
+ end
85
+
86
+ end
@@ -0,0 +1,39 @@
1
+ require_relative './ispras_api'
2
+
3
+ class TwitterAPI < IsprasAPI
4
+ #This class provides methods to work with Twitter NLP REST via OpenAPI
5
+ disable_rails_query_string_format
6
+
7
+ def initialize(key, name, ver)
8
+ name='twitter-nlp' if name.nil? || name.empty?
9
+ ver='1.0' if ver.nil? || ver.empty?
10
+ super(key, name, ver)
11
+ end
12
+
13
+ # Extracts demographic attributes from provided Twitter info. All info is required, but can be empty
14
+ #
15
+ # @param [Hash] params
16
+ # @option params [String] :lang Language of tweets
17
+ # @option params [String] :username Username of Twitter user
18
+ # @option params [String] :screenname Screen name of Twitter user
19
+ # @option params [String] :description Description of Twitter user
20
+ # @option params [String, Array<String>] :tweet User's tweets
21
+ # @return [Hash] Enriched user with attributes
22
+ def extract_dde(params)
23
+ params[:tweet] = params[:tweet].join(' ') if params[:tweet].is_a? Array
24
+ POST 'extract', {}, params
25
+ end
26
+
27
+ def custom_query(path, query, form=nil)
28
+ form.nil? ? GET(path, query) : POST(path, query, form)
29
+ end
30
+
31
+ private
32
+
33
+ def check_error(response)
34
+ hash = @nori.parse response.body
35
+ er_node = hash[:html][:body][:p].detect { |node| node.is_a? Hash and node[:b] == 'root cause' }
36
+ raise ApiError, er_node[:pre].gsub(/ru\.ispras.*:\s*/, '')
37
+ end
38
+
39
+ end
@@ -0,0 +1,34 @@
1
+ module Version
2
+
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ PATCH = 0
6
+ PRE = nil
7
+
8
+ YEAR = "2015"
9
+ MONTH = "02"
10
+ DAY = "10"
11
+
12
+ def self.to_s
13
+ [MAJOR, MINOR, PATCH, PRE].compact.join(".")
14
+ end
15
+
16
+ def self.current
17
+ to_s
18
+ end
19
+
20
+ def self.current_date
21
+ "#{YEAR}-#{MONTH}-#{DAY}"
22
+ end
23
+
24
+ def self.version_to_h(version)
25
+ version_array = version.split(/\./)
26
+ version_hash = {}
27
+ version_hash[:major] = version_array[0]
28
+ version_hash[:minor] = version_array[1]
29
+ version_hash[:patch] = version_array[2]
30
+ version_hash[:pre] = version_array[3]
31
+ version_hash
32
+ end
33
+
34
+ end
@@ -0,0 +1,143 @@
1
+ require 'minitest/autorun'
2
+ require 'dotenv'
3
+ Dotenv.load
4
+ require_relative '../lib/ispapi/texterra_api'
5
+
6
+ class TestTexterraAPI < Minitest::Test
7
+
8
+ def setup
9
+ @texterra = TexterraAPI.new ENV['TEXTERRA_KEY'], ENV['TEXTERRA_SERVICE_NAME'], ENV['TEXTERRA_SERVICE_VERSION']
10
+ @en_text = 'Apple today updated iMac to bring numerous high-performance enhancements to the leading all-in-one desktop. iMac now features fourth-generation Intel Core processors, new graphics, and next-generation Wi-Fi. In addition, it now supports PCIe-based flash storage, making its Fusion Drive and all-flash storage options up to 50 percent faster than the previous generation'
11
+ @ru_text = 'Первые в этом году переговоры министра иностранных дел России Сергея Лаврова и госсекретаря США Джона Керри, длившиеся 1,5 часа, завершились в Мюнхене.'
12
+ @en_tweet = 'mentioning veterens care which Mccain has voted AGAINST - SUPER GOOOOD point Obama+1 #tweetdebate'
13
+ @ru_tweet = 'В мастерской готовят пушку и автомобили 1940-х годов, для участия в Параде Победы в Ново-Переделкино.'
14
+ end
15
+
16
+ def test_key_concepts
17
+ assert_instance_of Array, @texterra.key_concepts(@en_text)
18
+ assert_instance_of Array, @texterra.key_concepts(@ru_text)
19
+ assert_instance_of Array, @texterra.key_concepts(@en_tweet)
20
+ assert_instance_of Array, @texterra.key_concepts(@ru_tweet)
21
+ end
22
+
23
+ def test_disambiguation
24
+ assert_instance_of Array, @texterra.disambiguation(@en_text)
25
+ assert_instance_of Array, @texterra.disambiguation(@ru_text)
26
+ end
27
+
28
+ def test_sentiment_analysis
29
+ assert_instance_of String, @texterra.sentiment_analysis(@en_text)
30
+ assert_instance_of String, @texterra.sentiment_analysis(@ru_text)
31
+ assert_instance_of String, @texterra.sentiment_analysis(@en_tweet)
32
+ assert_instance_of String, @texterra.sentiment_analysis(@ru_tweet)
33
+ end
34
+
35
+ def test_domain_sentiment_analysis
36
+ assert_instance_of Hash, @texterra.domain_sentiment_analysis(@en_text)
37
+ assert_instance_of Hash, @texterra.domain_sentiment_analysis(@ru_text)
38
+ res = @texterra.domain_sentiment_analysis(@en_tweet, 'politics')
39
+ assert_instance_of Hash, res
40
+ assert_equal 'politics', res[:domain]
41
+ assert_raises ApiError do
42
+ @texterra.domain_sentiment_analysis(@ru_text, 'politics')
43
+ end
44
+ end
45
+
46
+ def test_tweet_normalization
47
+ assert_instance_of Array, @texterra.tweet_normalization(@en_tweet)
48
+ assert_raises ApiError do
49
+ @texterra.tweet_normalization(@ru_tweet)
50
+ end
51
+ end
52
+
53
+ def test_language_detection_annotate
54
+ assert_instance_of Array, @texterra.language_detection_annotate(@en_text)
55
+ assert_instance_of Array, @texterra.language_detection_annotate(@ru_text)
56
+ assert_instance_of Array, @texterra.language_detection_annotate(@en_tweet)
57
+ assert_instance_of Array, @texterra.language_detection_annotate(@ru_tweet)
58
+ end
59
+
60
+ def test_named_entities_annotate
61
+ assert_instance_of Array, @texterra.named_entities_annotate(@en_text)
62
+ assert_instance_of Array, @texterra.named_entities_annotate(@ru_text)
63
+ assert_instance_of Array, @texterra.named_entities_annotate(@en_tweet)
64
+ assert_instance_of Array, @texterra.named_entities_annotate(@ru_tweet)
65
+ end
66
+
67
+ def test_subjectivity_detection_annotate
68
+ assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_text)
69
+ assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_text)
70
+ assert_instance_of Array, @texterra.subjectivity_detection_annotate(@en_tweet)
71
+ assert_instance_of Array, @texterra.subjectivity_detection_annotate(@ru_tweet)
72
+ end
73
+
74
+ def test_term_presence
75
+ res = @texterra.term_presence('Anarchism')
76
+ assert_instance_of Hash, res
77
+ assert_equal true, res[:presence]
78
+ end
79
+
80
+ def test_term_info_measure
81
+ assert_instance_of Hash, @texterra.term_info_measure('Anarchism')
82
+ end
83
+
84
+ def test_term_meanings
85
+ assert_instance_of Hash, @texterra.term_meanings('android')
86
+ end
87
+
88
+ def test_term_commonness
89
+ assert_instance_of Hash, @texterra.term_commonness('android')
90
+ assert_instance_of Hash, @texterra.term_commonness('android', '713:enwiki')
91
+ end
92
+
93
+ def test_neignbours
94
+ assert_instance_of Hash, @texterra.neighbours('12:enwiki')
95
+ assert_instance_of Hash, @texterra.neighbours('12:enwiki', linkType: 'RELATED', nodeType: 'REGULAR', minDepth: 1, maxDepth: 3)
96
+ assert_instance_of Hash, @texterra.neighbours(['12:enwiki', '713:enwiki'])
97
+ assert_instance_of Hash, @texterra.neighbours(['12:enwiki', '713:enwiki'], linkType: 'RELATED', nodeType: 'REGULAR', minDepth: 1, maxDepth: 3)
98
+ end
99
+
100
+ def test_neignbours_size
101
+ assert_instance_of Hash, @texterra.neighbours_size('12:enwiki')
102
+ assert_instance_of Hash, @texterra.neighbours_size('12:enwiki', linkType: 'RELATED', nodeType: 'REGULAR', minDepth: 1, maxDepth: 3)
103
+ assert_instance_of Hash, @texterra.neighbours_size(['12:enwiki', '713:enwiki'])
104
+ assert_instance_of Hash, @texterra.neighbours_size(['12:enwiki', '713:enwiki'], linkType: 'RELATED', nodeType: 'REGULAR', minDepth: 1, maxDepth: 3)
105
+ end
106
+
107
+ def test_similarity_graph
108
+ assert_instance_of Hash, @texterra.similarity_graph(['12:enwiki','13137:enwiki','156327:enwiki'])
109
+ assert_instance_of Hash, @texterra.similarity_graph(['12:enwiki','13137:enwiki','156327:enwiki'], 'MIN')
110
+ end
111
+
112
+ def test_all_pairs_similarity
113
+ assert_instance_of Hash, @texterra.all_pairs_similarity(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
114
+ assert_instance_of Hash, @texterra.all_pairs_similarity(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
115
+ end
116
+
117
+ def test_similarity_to_virtual_article
118
+ assert_instance_of Hash, @texterra.similarity_to_virtual_article(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
119
+ assert_instance_of Hash, @texterra.similarity_to_virtual_article(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
120
+ end
121
+
122
+ def test_similarity_between_virtual_articles
123
+ assert_instance_of Hash, @texterra.similarity_between_virtual_articles(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'])
124
+ assert_instance_of Hash, @texterra.similarity_between_virtual_articles(['12:enwiki','13137:enwiki'], ['156327:enwiki', '15942292:enwiki', '1921431:enwiki'], 'MIN')
125
+ end
126
+
127
+ def test_similar_over_first_neighbours
128
+ assert_instance_of Hash, @texterra.similar_over_first_neighbours('12:enwiki')
129
+ assert_instance_of Hash, @texterra.similar_over_first_neighbours('12:enwiki', linkWeight: 'MIN', offset: 1, limit: 3)
130
+ end
131
+
132
+ def test_similar_over_filtered_neighbours
133
+ assert_instance_of Hash, @texterra.similar_over_filtered_neighbours('12:enwiki')
134
+ assert_instance_of Hash, @texterra.similar_over_filtered_neighbours('12:enwiki', linkWeight: 'MIN', offset: 1, limit: 3, among: 'PORTION(0.2)')
135
+ end
136
+
137
+ def test_get_attributes
138
+ assert_instance_of Hash, @texterra.get_attributes('12:enwiki')
139
+ assert_instance_of Hash, @texterra.get_attributes(['12:enwiki', '13137:enwiki'])
140
+ assert_instance_of Hash, @texterra.get_attributes('12:enwiki', ['url(en)', 'type'])
141
+ assert_instance_of Hash, @texterra.get_attributes(['12:enwiki', '13137:enwiki'], ['url(en)', 'title'])
142
+ end
143
+ end
@@ -0,0 +1,15 @@
1
+ require 'minitest/autorun'
2
+ require 'dotenv'
3
+ Dotenv.load
4
+ require_relative '../lib/ispapi/twitter_api'
5
+
6
+ class TestTwitterAPI < Minitest::Test
7
+
8
+ def setup
9
+ @twitter = TwitterAPI.new ENV['DDE_KEY'], ENV['DDE_SERVICE_NAME'], ENV['DDE_SERVICE_VERSION']
10
+ end
11
+
12
+ def test_extract_dde
13
+ @twitter.extract_dde lang: 'en', username: 'Ann', screenname: 'bob', description: 'I am Ann from NY', tweet:'Hi there, I am Ann fromNY'
14
+ end
15
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ispras-api
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Alexey Laguta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: httparty
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.13'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nori
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.4'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.4'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5.5'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: dotenv
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.0'
83
+ description: This is Ruby wrapper for REST API provided by ISPRAS. More info at https://api.ispras.ru/
84
+ email: laguta@ispras.ru
85
+ executables: []
86
+ extensions: []
87
+ extra_rdoc_files: []
88
+ files:
89
+ - ".gitignore"
90
+ - Rakefile
91
+ - ispapi.gemspec
92
+ - lib/ispapi.rb
93
+ - lib/ispapi/api_error.rb
94
+ - lib/ispapi/ispras_api.rb
95
+ - lib/ispapi/texterra/kbm.rb
96
+ - lib/ispapi/texterra/kbm_specs.rb
97
+ - lib/ispapi/texterra/nlp.rb
98
+ - lib/ispapi/texterra/nlp_specs.rb
99
+ - lib/ispapi/texterra_api.rb
100
+ - lib/ispapi/twitter_api.rb
101
+ - lib/ispapi/version.rb
102
+ - test/test_texterra_api.rb
103
+ - test/test_twitter_api.rb
104
+ homepage: https://github.com/alexlag/ispapi.ruby
105
+ licenses: []
106
+ metadata: {}
107
+ post_install_message:
108
+ rdoc_options: []
109
+ require_paths:
110
+ - lib
111
+ required_ruby_version: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ requirements:
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: '0'
121
+ requirements: []
122
+ rubyforge_project:
123
+ rubygems_version: 2.4.5
124
+ signing_key:
125
+ specification_version: 4
126
+ summary: ISPRAS API Ruby SDK
127
+ test_files: []
128
+ has_rdoc: