textrazor 0.0.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/README.md +26 -2
- data/Rakefile +10 -2
- data/lib/textrazor.rb +5 -0
- data/lib/textrazor/client.rb +38 -5
- data/lib/textrazor/configuration.rb +1 -1
- data/lib/textrazor/entailment.rb +24 -0
- data/lib/textrazor/property.rb +22 -0
- data/lib/textrazor/relation.rb +25 -0
- data/lib/textrazor/relation_param.rb +18 -0
- data/lib/textrazor/request.rb +6 -2
- data/lib/textrazor/response.rb +110 -34
- data/lib/textrazor/sentence.rb +24 -0
- data/lib/textrazor/topic.rb +8 -6
- data/lib/textrazor/util.rb +1 -1
- data/lib/textrazor/version.rb +1 -1
- data/lib/textrazor/word.rb +1 -1
- data/spec/functional/service_spec.rb +29 -0
- data/spec/lib/textrazor/client_spec.rb +113 -65
- data/spec/lib/textrazor/entailment_spec.rb +36 -0
- data/spec/lib/textrazor/entity_spec.rb +50 -26
- data/spec/lib/textrazor/phrase_spec.rb +8 -4
- data/spec/lib/textrazor/property_spec.rb +30 -0
- data/spec/lib/textrazor/relation_param_spec.rb +29 -0
- data/spec/lib/textrazor/relation_spec.rb +37 -0
- data/spec/lib/textrazor/request_spec.rb +7 -4
- data/spec/lib/textrazor/response_spec.rb +604 -49
- data/spec/lib/textrazor/sentence_spec.rb +41 -0
- data/spec/lib/textrazor/topic_spec.rb +12 -5
- data/textrazor.gemspec +1 -0
- metadata +35 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2996a5abb8a9c7d3113db4c9f64daf10c20e7360
|
4
|
+
data.tar.gz: cf7010d2573ada02239d021a960e6543627f5ea5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cd8f18e796b07f1ac921c95bf45d1d829a913c2442e0155dbab01d863f4f1ce0611856eb228766ebc192aafcbb85f2ae61d012eeaa8a6079dc16a7b6139ebf99
|
7
|
+
data.tar.gz: 362e74c53aef5bbe198ef7b79a95ff179d605b20e89a43de05d47669e4b07f3515476d4756aa471bd8064cc1625eab720dcfd53ef9df30c1b0645aa426c59674
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
textrazor
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.2.0
|
data/README.md
CHANGED
@@ -4,8 +4,6 @@ This is a gem wrapper for TextRazor REST API reference.
|
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
7
|
-
|
8
|
-
|
9
7
|
Add this line to your application's Gemfile:
|
10
8
|
|
11
9
|
gem 'textrazor', :git => 'git://github.com/andhapp/textrazor.git'
|
@@ -76,6 +74,32 @@ TextRazor.phrases('api_key', 'text')
|
|
76
74
|
Only implemented this for topics, entities, words and phrases. Also, implement
|
77
75
|
it for other information that we can retrieve from the public API.
|
78
76
|
|
77
|
+
### API Issues (To investigate)
|
78
|
+
|
79
|
+
#### Response
|
80
|
+
|
81
|
+
* error - Descriptive error message of any problems that may have occurred during analysis, or an empty string if there was no error.
|
82
|
+
|
83
|
+
Missing from the successful response.
|
84
|
+
|
85
|
+
* message - Any warning or informational messages returned from the server, or an empty string if there was no message.
|
86
|
+
|
87
|
+
Missing from the successful response.
|
88
|
+
|
89
|
+
* cleanedText
|
90
|
+
|
91
|
+
Missing from the successful response.
|
92
|
+
|
93
|
+
* customAnnotationOutput
|
94
|
+
|
95
|
+
Missing from the successful response.
|
96
|
+
|
97
|
+
### Specs
|
98
|
+
|
99
|
+
#### Prolog rules
|
100
|
+
|
101
|
+
Specs around custom prolog rules need to be added.
|
102
|
+
|
79
103
|
|
80
104
|
## Contributing
|
81
105
|
|
data/Rakefile
CHANGED
@@ -2,6 +2,14 @@ require 'bundler/gem_tasks'
|
|
2
2
|
require 'bundler/setup'
|
3
3
|
require 'rspec/core/rake_task'
|
4
4
|
|
5
|
-
|
5
|
+
desc 'Run all the specs'
|
6
|
+
RSpec::Core::RakeTask.new(:spec) do |task|
|
7
|
+
task.pattern = "spec/lib/**/*_spec.rb"
|
8
|
+
end
|
6
9
|
|
7
|
-
|
10
|
+
desc 'Run all the functional specs'
|
11
|
+
RSpec::Core::RakeTask.new(:specf) do |task|
|
12
|
+
task.pattern = "spec/functional/*_spec.rb"
|
13
|
+
end
|
14
|
+
|
15
|
+
task :default => [:spec, :specf]
|
data/lib/textrazor.rb
CHANGED
@@ -5,9 +5,14 @@ require "textrazor/client"
|
|
5
5
|
require "textrazor/request"
|
6
6
|
require "textrazor/response"
|
7
7
|
require "textrazor/topic"
|
8
|
+
require "textrazor/entailment"
|
8
9
|
require "textrazor/entity"
|
9
10
|
require "textrazor/word"
|
10
11
|
require "textrazor/phrase"
|
12
|
+
require "textrazor/property"
|
13
|
+
require "textrazor/sentence"
|
14
|
+
require "textrazor/relation_param"
|
15
|
+
require "textrazor/relation"
|
11
16
|
|
12
17
|
module TextRazor
|
13
18
|
|
data/lib/textrazor/client.rb
CHANGED
@@ -5,15 +5,24 @@ module TextRazor
|
|
5
5
|
EmptyApiKey = Class.new(StandardError)
|
6
6
|
EmptyText = Class.new(StandardError)
|
7
7
|
TextTooLong = Class.new(StandardError)
|
8
|
+
UnsupportedExtractor = Class.new(StandardError)
|
9
|
+
UnsupportedCleanupMode = Class.new(StandardError)
|
8
10
|
|
9
11
|
DEFAULT_EXTRACTORS = ['entities', 'topics', 'words', 'phrases', 'dependency-trees',
|
10
12
|
'relations', 'entailments', 'senses']
|
11
13
|
|
12
|
-
|
13
|
-
|
14
|
+
DEFAULT_CLEANUP_MODE = 'raw'
|
15
|
+
|
16
|
+
VALID_CLEANUP_MODE_VALUES = [DEFAULT_CLEANUP_MODE, 'stripTags', 'cleanHTML']
|
17
|
+
|
18
|
+
REQUEST_OPTIONS = [:extractors, :rules, :cleanup_mode, :cleanup_return_cleaned, :cleanup_return_raw,
|
19
|
+
:language, :filter_dbpedia_types, :filter_freebase_types, :allow_overlap,
|
20
|
+
:enrichment_queries]
|
14
21
|
|
15
22
|
attr_reader :response, :api_key, :request_options
|
16
23
|
|
24
|
+
private_constant :DEFAULT_EXTRACTORS, :VALID_CLEANUP_MODE_VALUES, :DEFAULT_CLEANUP_MODE, :REQUEST_OPTIONS
|
25
|
+
|
17
26
|
def initialize(api_key, options = {})
|
18
27
|
assign_api_key(api_key)
|
19
28
|
assign_request_options(options)
|
@@ -21,7 +30,9 @@ module TextRazor
|
|
21
30
|
|
22
31
|
def analyse(text)
|
23
32
|
assert_text(text)
|
24
|
-
options = {
|
33
|
+
options = {
|
34
|
+
api_key: api_key
|
35
|
+
}.merge(request_options)
|
25
36
|
|
26
37
|
Response.new(Request.post(text, options))
|
27
38
|
end
|
@@ -67,9 +78,31 @@ module TextRazor
|
|
67
78
|
end
|
68
79
|
|
69
80
|
def assign_request_options(options)
|
70
|
-
|
81
|
+
extractors = options.delete(:extractors)
|
82
|
+
assert_extractors(extractors)
|
83
|
+
|
84
|
+
cleanup_mode = options.delete(:cleanup_mode)
|
85
|
+
assert_cleanup_mode(cleanup_mode)
|
86
|
+
|
87
|
+
@request_options = {
|
88
|
+
extractors: extractors || DEFAULT_EXTRACTORS,
|
89
|
+
cleanup_mode: cleanup_mode || DEFAULT_CLEANUP_MODE
|
90
|
+
}
|
91
|
+
|
71
92
|
REQUEST_OPTIONS.each do |key|
|
72
|
-
@request_options[key] = options[key]
|
93
|
+
@request_options[key] = options[key] unless options[key].nil?
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def assert_extractors(extractors)
|
98
|
+
if extractors && !extractors.all? { |extractor| DEFAULT_EXTRACTORS.include?(extractor) }
|
99
|
+
raise UnsupportedExtractor.new('Unsupported extractor')
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def assert_cleanup_mode(cleanup_mode)
|
104
|
+
if cleanup_mode && !VALID_CLEANUP_MODE_VALUES.include?(cleanup_mode)
|
105
|
+
raise UnsupportedCleanupMode.new('Unsupported clean up mode')
|
73
106
|
end
|
74
107
|
end
|
75
108
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module TextRazor
|
2
|
+
|
3
|
+
class Entailment
|
4
|
+
|
5
|
+
extend Util
|
6
|
+
|
7
|
+
attr_reader :id, :word_positions, :prior_score, :context_score,
|
8
|
+
:score, :entailed_tree, :entailed_words
|
9
|
+
|
10
|
+
def initialize(params = {})
|
11
|
+
@type = []
|
12
|
+
params.each do |k, v|
|
13
|
+
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.create_from_hash(params)
|
18
|
+
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
19
|
+
new(params)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module TextRazor
|
2
|
+
|
3
|
+
class Property
|
4
|
+
|
5
|
+
extend Util
|
6
|
+
|
7
|
+
attr_reader :id, :word_positions, :property_positions
|
8
|
+
|
9
|
+
def initialize(params = {})
|
10
|
+
params.each do |k, v|
|
11
|
+
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.create_from_hash(params)
|
16
|
+
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
17
|
+
new(params)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module TextRazor
|
2
|
+
|
3
|
+
class Relation
|
4
|
+
|
5
|
+
attr_reader :id, :word_positions, :relation_params
|
6
|
+
|
7
|
+
def initialize(params = {})
|
8
|
+
@id = params[:id]
|
9
|
+
@word_positions = params[:wordPositions]
|
10
|
+
@relation_params = params[:params].map do |relation_param_hash|
|
11
|
+
RelationParam.create_from_hash(relation_param_hash)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def number_of_relation_params
|
16
|
+
@relation_params.size
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.create_from_hash(params)
|
20
|
+
new(params)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module TextRazor
|
2
|
+
|
3
|
+
class RelationParam
|
4
|
+
|
5
|
+
attr_reader :relation, :word_positions
|
6
|
+
|
7
|
+
def initialize(params = {})
|
8
|
+
@relation = params[:relation]
|
9
|
+
@word_positions = params[:wordPositions]
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.create_from_hash(params)
|
13
|
+
new(params)
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
data/lib/textrazor/request.rb
CHANGED
@@ -9,10 +9,14 @@ module TextRazor
|
|
9
9
|
|
10
10
|
OPTIONS_MAPPING = {
|
11
11
|
extractors: 'extractors',
|
12
|
-
|
12
|
+
cleanup_mode: 'cleanup.mode',
|
13
|
+
cleanup_return_cleaned: 'cleanup.returnCleaned',
|
14
|
+
cleanup_return_raw: 'cleanup.returnRaw',
|
13
15
|
language: 'languageOverride',
|
14
16
|
filter_dbpedia_types: 'entities.filterDbpediaTypes',
|
15
|
-
filter_freebase_types: 'entities.filterFreebaseTypes'
|
17
|
+
filter_freebase_types: 'entities.filterFreebaseTypes',
|
18
|
+
allow_overlap: 'entities.allowOverlap',
|
19
|
+
enrichment_queries: 'entities.enrichmentQueries'
|
16
20
|
}
|
17
21
|
|
18
22
|
def self.post(text, options)
|
data/lib/textrazor/response.rb
CHANGED
@@ -8,7 +8,7 @@ module TextRazor
|
|
8
8
|
Unauthorised = Class.new(StandardError)
|
9
9
|
RequestEntityTooLong = Class.new(StandardError)
|
10
10
|
|
11
|
-
attr_reader :raw_response
|
11
|
+
attr_reader :raw_response, :time
|
12
12
|
|
13
13
|
def initialize(http_response)
|
14
14
|
code = http_response.code
|
@@ -18,45 +18,78 @@ module TextRazor
|
|
18
18
|
raise Unauthorised.new(body) if unauthorised?(code)
|
19
19
|
raise RequestEntityTooLong.new(body) if request_entity_too_long?(code)
|
20
20
|
|
21
|
-
|
21
|
+
json_body = ::JSON::parse(body, symbolize_names: true)
|
22
|
+
|
23
|
+
@time = json_body[:time].to_f
|
24
|
+
@ok = json_body[:ok]
|
25
|
+
@raw_response = json_body[:response]
|
22
26
|
end
|
23
27
|
|
24
|
-
def
|
25
|
-
@
|
28
|
+
def ok?
|
29
|
+
@ok
|
26
30
|
end
|
27
31
|
|
28
|
-
|
29
|
-
|
32
|
+
#TODO: Not in a successful response
|
33
|
+
#def error
|
34
|
+
#end
|
35
|
+
|
36
|
+
#def message
|
37
|
+
#end
|
38
|
+
|
39
|
+
def custom_annotation_output
|
40
|
+
@custom_annotation_output ||= raw_response[:customAnnotationOutput]
|
41
|
+
end
|
42
|
+
|
43
|
+
def cleaned_text
|
44
|
+
@cleaned_text ||= raw_response[:cleanedText]
|
45
|
+
end
|
46
|
+
|
47
|
+
def raw_text
|
48
|
+
@raw_text||= raw_response[:rawText]
|
49
|
+
end
|
50
|
+
|
51
|
+
def entailments
|
52
|
+
@entailments ||= parse_entailments
|
30
53
|
end
|
31
54
|
|
32
55
|
def entities
|
33
|
-
|
34
|
-
|
56
|
+
@entities ||= parse_entities
|
57
|
+
end
|
35
58
|
|
36
|
-
|
37
|
-
|
38
|
-
Entity.create_from_hash(entity_hash)
|
39
|
-
end
|
40
|
-
end
|
59
|
+
def coarse_topics
|
60
|
+
@coarse_topics ||= parse_coarse_topics
|
41
61
|
end
|
42
62
|
|
43
|
-
def
|
44
|
-
|
45
|
-
return nil if raw_sentences.nil?
|
46
|
-
|
47
|
-
@words ||= begin
|
48
|
-
words = []
|
49
|
-
raw_sentences.each do |sentence_hash|
|
50
|
-
sentence_hash["words"].each do |word_hash|
|
51
|
-
words << Word.create_from_hash(word_hash)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
words
|
55
|
-
end
|
63
|
+
def topics
|
64
|
+
@topics ||= parse_topics
|
56
65
|
end
|
57
66
|
|
58
67
|
def phrases
|
59
|
-
@phrases ||= parse_phrases
|
68
|
+
@phrases ||= parse_phrases
|
69
|
+
end
|
70
|
+
|
71
|
+
def words
|
72
|
+
@words ||= parse_words
|
73
|
+
end
|
74
|
+
|
75
|
+
def properties
|
76
|
+
@properties ||= parse_properties
|
77
|
+
end
|
78
|
+
|
79
|
+
def relations
|
80
|
+
@relations ||= parse_relations
|
81
|
+
end
|
82
|
+
|
83
|
+
def sentences
|
84
|
+
@sentences ||= parse_sentences
|
85
|
+
end
|
86
|
+
|
87
|
+
def language
|
88
|
+
raw_response[:language]
|
89
|
+
end
|
90
|
+
|
91
|
+
def language_is_reliable?
|
92
|
+
raw_response[:languageIsReliable]
|
60
93
|
end
|
61
94
|
|
62
95
|
private
|
@@ -73,22 +106,65 @@ module TextRazor
|
|
73
106
|
code == 413
|
74
107
|
end
|
75
108
|
|
76
|
-
def
|
77
|
-
|
109
|
+
def parse_entailments
|
110
|
+
parse(:entailment, raw_response[:entailments])
|
111
|
+
end
|
78
112
|
|
79
|
-
|
80
|
-
|
81
|
-
end
|
113
|
+
def parse_entities
|
114
|
+
parse(:entity, raw_response[:entities])
|
82
115
|
end
|
83
116
|
|
84
|
-
def
|
85
|
-
|
117
|
+
def parse_coarse_topics
|
118
|
+
parse(:topic, raw_response[:coarseTopics])
|
119
|
+
end
|
120
|
+
|
121
|
+
def parse_topics
|
122
|
+
parse(:topic, raw_response[:topics])
|
123
|
+
end
|
124
|
+
|
125
|
+
def parse_phrases
|
126
|
+
raw_phrases = raw_response[:nounPhrases]
|
127
|
+
return if raw_phrases.nil?
|
86
128
|
|
87
129
|
raw_phrases.map do |phrase_hash|
|
88
130
|
Phrase.create_from_hash(phrase_hash, words)
|
89
131
|
end
|
90
132
|
end
|
91
133
|
|
134
|
+
def parse_words
|
135
|
+
raw_sentences = raw_response[:sentences]
|
136
|
+
return if raw_sentences.nil?
|
137
|
+
|
138
|
+
words = []
|
139
|
+
raw_sentences.each do |sentence_hash|
|
140
|
+
sentence_hash[:words].each do |word_hash|
|
141
|
+
words << Word.create_from_hash(word_hash)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
words
|
145
|
+
end
|
146
|
+
|
147
|
+
def parse_properties
|
148
|
+
parse(:property, raw_response[:properties])
|
149
|
+
end
|
150
|
+
|
151
|
+
def parse_relations
|
152
|
+
parse(:relation, raw_response[:relations])
|
153
|
+
end
|
154
|
+
|
155
|
+
def parse_sentences
|
156
|
+
parse(:sentence, raw_response[:sentences])
|
157
|
+
end
|
158
|
+
|
159
|
+
def parse(type, data)
|
160
|
+
return nil if data.nil?
|
161
|
+
|
162
|
+
klass = Object.const_get("TextRazor::#{type.capitalize}")
|
163
|
+
|
164
|
+
data.map do |data_hash|
|
165
|
+
klass.create_from_hash(data_hash)
|
166
|
+
end
|
167
|
+
end
|
92
168
|
end
|
93
169
|
|
94
170
|
end
|