textrazor 0.0.8 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/README.md +26 -2
- data/Rakefile +10 -2
- data/lib/textrazor.rb +5 -0
- data/lib/textrazor/client.rb +38 -5
- data/lib/textrazor/configuration.rb +1 -1
- data/lib/textrazor/entailment.rb +24 -0
- data/lib/textrazor/property.rb +22 -0
- data/lib/textrazor/relation.rb +25 -0
- data/lib/textrazor/relation_param.rb +18 -0
- data/lib/textrazor/request.rb +6 -2
- data/lib/textrazor/response.rb +110 -34
- data/lib/textrazor/sentence.rb +24 -0
- data/lib/textrazor/topic.rb +8 -6
- data/lib/textrazor/util.rb +1 -1
- data/lib/textrazor/version.rb +1 -1
- data/lib/textrazor/word.rb +1 -1
- data/spec/functional/service_spec.rb +29 -0
- data/spec/lib/textrazor/client_spec.rb +113 -65
- data/spec/lib/textrazor/entailment_spec.rb +36 -0
- data/spec/lib/textrazor/entity_spec.rb +50 -26
- data/spec/lib/textrazor/phrase_spec.rb +8 -4
- data/spec/lib/textrazor/property_spec.rb +30 -0
- data/spec/lib/textrazor/relation_param_spec.rb +29 -0
- data/spec/lib/textrazor/relation_spec.rb +37 -0
- data/spec/lib/textrazor/request_spec.rb +7 -4
- data/spec/lib/textrazor/response_spec.rb +604 -49
- data/spec/lib/textrazor/sentence_spec.rb +41 -0
- data/spec/lib/textrazor/topic_spec.rb +12 -5
- data/textrazor.gemspec +1 -0
- metadata +35 -2
@@ -0,0 +1,24 @@
|
|
1
|
+
module TextRazor
|
2
|
+
|
3
|
+
class Sentence
|
4
|
+
|
5
|
+
attr_reader :position, :words
|
6
|
+
|
7
|
+
def initialize(params)
|
8
|
+
@position = params[:position]
|
9
|
+
@words = params[:words].map do |word_hash|
|
10
|
+
Word.create_from_hash(word_hash)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def number_of_words
|
15
|
+
@words.size
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.create_from_hash(params)
|
19
|
+
new(params)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
data/lib/textrazor/topic.rb
CHANGED
@@ -2,17 +2,19 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Topic
|
4
4
|
|
5
|
+
extend Util
|
6
|
+
|
5
7
|
attr_reader :id, :label, :wiki_link, :score
|
6
8
|
|
7
|
-
def initialize(
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
@score = score
|
9
|
+
def initialize(params = {})
|
10
|
+
params.each do |k, v|
|
11
|
+
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
12
|
+
end
|
12
13
|
end
|
13
14
|
|
14
15
|
def self.create_from_hash(params)
|
15
|
-
|
16
|
+
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
17
|
+
new(params)
|
16
18
|
end
|
17
19
|
|
18
20
|
end
|
data/lib/textrazor/util.rb
CHANGED
data/lib/textrazor/version.rb
CHANGED
data/lib/textrazor/word.rb
CHANGED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Functional spec', functional: true do
|
4
|
+
let(:api_key) do
|
5
|
+
ENV["TEXTRAZOR_API_KEY"] or raise 'Please specify a TEXTRAZOR_API_KEY in your local environment to run this'
|
6
|
+
end
|
7
|
+
|
8
|
+
let(:client) do
|
9
|
+
TextRazor::Client.new(api_key)
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:text) do
|
13
|
+
t = <<TEXT
|
14
|
+
Barclays misled shareholders and the public about one of the biggest investments in the bank's history, a BBC Panorama investigation has found.
|
15
|
+
The bank announced in 2008 that Manchester City owner Sheikh Mansour had agreed to invest more than £3bn.
|
16
|
+
But the BBC found that the money, which helped Barclays avoid a bailout by British taxpayers, actually came from the Abu Dhabi government.
|
17
|
+
Barclays said the mistake in its accounts was "a drafting error".
|
18
|
+
Unlike RBS and Lloyds TSB, Barclays narrowly avoided having to request a government bailout late in 2008 after it was rescued by £7bn worth of new investment, most of which came from the gulf states of Qatar and Abu Dhabi.
|
19
|
+
Half of the cash was supposed to be coming from Sheikh Mansour.
|
20
|
+
But Barclays has admitted it was told the investor might change shortly before shareholders voted to approve the deal on 24 November 2008.
|
21
|
+
But instead of telling shareholders, the bank remained silent until the change of investor was confirmed a few hours later.
|
22
|
+
TEXT
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'returns a response' do
|
26
|
+
expect(client.analyse(text)).to be_ok
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -4,55 +4,103 @@ module TextRazor
|
|
4
4
|
|
5
5
|
describe Client do
|
6
6
|
|
7
|
-
let(:api_key)
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
let(:
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
let(:
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
7
|
+
let(:api_key) do
|
8
|
+
'api_key'
|
9
|
+
end
|
10
|
+
|
11
|
+
let(:client) do
|
12
|
+
custom_options_client
|
13
|
+
end
|
14
|
+
|
15
|
+
let(:nil_api_key_client) do
|
16
|
+
Client.new(nil)
|
17
|
+
end
|
18
|
+
|
19
|
+
let(:empty_api_key_client) do
|
20
|
+
Client.new('')
|
21
|
+
end
|
22
|
+
|
23
|
+
let(:custom_options_client) do
|
24
|
+
Client.new(api_key, {
|
25
|
+
extractors: %w(entities topics words), cleanup_mode: 'raw',
|
26
|
+
cleanup_return_cleaned: true, cleanup_return_raw: true,
|
27
|
+
filter_dbpedia_types: %w(type1), language: 'fre',
|
28
|
+
filter_freebase_types: %w(type2), allow_overlap: false
|
29
|
+
})
|
30
|
+
end
|
31
|
+
|
32
|
+
let(:default_options_client) do
|
33
|
+
Client.new(api_key)
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "#initialize" do
|
37
|
+
|
38
|
+
context 'with valid api key' do
|
39
|
+
|
40
|
+
context "and default request options" do
|
41
|
+
|
42
|
+
it 'assigns correctly' do
|
43
|
+
expect(default_options_client.api_key).to eq(api_key)
|
44
|
+
expect(default_options_client.request_options).
|
45
|
+
to eq({extractors: %w(entities topics words phrases dependency-trees
|
46
|
+
relations entailments senses), cleanup_mode: 'raw'})
|
47
|
+
end
|
48
|
+
|
25
49
|
end
|
26
50
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
to eq(
|
31
|
-
|
51
|
+
context 'and custom request options' do
|
52
|
+
|
53
|
+
it "assigns correctly" do
|
54
|
+
expect(custom_options_client.api_key).to eq(api_key)
|
55
|
+
expect(custom_options_client.request_options).
|
56
|
+
to eq({extractors: %w(entities topics words), cleanup_mode: 'raw', language: 'fre',
|
57
|
+
cleanup_return_cleaned: true, cleanup_return_raw: true,
|
58
|
+
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
|
59
|
+
allow_overlap: false})
|
60
|
+
end
|
61
|
+
|
32
62
|
end
|
33
63
|
|
34
64
|
end
|
35
65
|
|
36
|
-
context "invalid
|
66
|
+
context "with invalid api key" do
|
67
|
+
|
68
|
+
context "when nil" do
|
69
|
+
|
70
|
+
it "raises an exception" do
|
71
|
+
expect { nil_api_key_client }.
|
72
|
+
to raise_error(Client::EmptyApiKey)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
context "when empty" do
|
78
|
+
|
79
|
+
it "raises an exception" do
|
80
|
+
expect { empty_api_key_client }.
|
81
|
+
to raise_error(Client::EmptyApiKey)
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
37
85
|
|
38
|
-
|
86
|
+
end
|
39
87
|
|
40
|
-
|
88
|
+
context 'with invalid request options' do
|
41
89
|
|
42
|
-
|
43
|
-
expect { nil_api_key_client }.
|
44
|
-
to raise_error(Client::EmptyApiKey)
|
45
|
-
end
|
90
|
+
context 'when an invalid extractor value is supplied' do
|
46
91
|
|
92
|
+
it 'raises an exception' do
|
93
|
+
expect { Client.new(api_key, {extractors: ['invalid-extractor', 'topics']}) }.
|
94
|
+
to raise_error(Client::UnsupportedExtractor)
|
47
95
|
end
|
48
96
|
|
49
|
-
|
97
|
+
end
|
50
98
|
|
51
|
-
|
52
|
-
expect { empty_api_key_client }.
|
53
|
-
to raise_error(Client::EmptyApiKey)
|
54
|
-
end
|
99
|
+
context 'when an invalid cleanup_mode value is supplied' do
|
55
100
|
|
101
|
+
it 'raises an exception' do
|
102
|
+
expect { Client.new(api_key, {cleanup_mode: 'invalid-cleanup-mode'}) }.
|
103
|
+
to raise_error(Client::UnsupportedCleanupMode)
|
56
104
|
end
|
57
105
|
|
58
106
|
end
|
@@ -63,16 +111,20 @@ module TextRazor
|
|
63
111
|
|
64
112
|
context "#analyse" do
|
65
113
|
|
66
|
-
let(:very_long_text)
|
114
|
+
let(:very_long_text) do
|
115
|
+
"L" * 201 * 1024
|
116
|
+
end
|
67
117
|
|
68
|
-
context "valid
|
118
|
+
context "valid value of 'text'" do
|
69
119
|
|
70
|
-
it "
|
71
|
-
request =
|
120
|
+
it "makes correct calls" do
|
121
|
+
request = BasicObject.new
|
72
122
|
|
73
123
|
expect(Request).to receive(:post).
|
74
|
-
with('text', {api_key: 'api_key', extractors: %w(entities topics words),
|
75
|
-
|
124
|
+
with('text', {api_key: 'api_key', extractors: %w(entities topics words), cleanup_mode: 'raw',
|
125
|
+
cleanup_return_cleaned: true, cleanup_return_raw: true, language: 'fre',
|
126
|
+
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
|
127
|
+
allow_overlap: false}).
|
76
128
|
and_return(request)
|
77
129
|
|
78
130
|
expect(Response).to receive(:new).with(request)
|
@@ -82,35 +134,31 @@ module TextRazor
|
|
82
134
|
|
83
135
|
end
|
84
136
|
|
85
|
-
context "invalid
|
137
|
+
context "invalid value of 'text'" do
|
86
138
|
|
87
|
-
context "
|
88
|
-
|
89
|
-
context "is nil" do
|
90
|
-
|
91
|
-
it "should raise an exception" do
|
92
|
-
expect { client.analyse(nil) }.
|
93
|
-
to raise_error(Client::EmptyText)
|
94
|
-
end
|
139
|
+
context "when nil" do
|
95
140
|
|
141
|
+
it "raises an exception" do
|
142
|
+
expect { client.analyse(nil) }.
|
143
|
+
to raise_error(Client::EmptyText)
|
96
144
|
end
|
97
145
|
|
98
|
-
|
146
|
+
end
|
99
147
|
|
100
|
-
|
101
|
-
expect { client.analyse('') }.
|
102
|
-
to raise_error(Client::EmptyText)
|
103
|
-
end
|
148
|
+
context "when empty" do
|
104
149
|
|
150
|
+
it "raises an exception" do
|
151
|
+
expect { client.analyse('') }.
|
152
|
+
to raise_error(Client::EmptyText)
|
105
153
|
end
|
106
154
|
|
107
|
-
|
155
|
+
end
|
108
156
|
|
109
|
-
|
110
|
-
expect { client.analyse(very_long_text) }.
|
111
|
-
to raise_error(Client::TextTooLong)
|
112
|
-
end
|
157
|
+
context "when size is > 200kb" do
|
113
158
|
|
159
|
+
it "raises an exception" do
|
160
|
+
expect { client.analyse(very_long_text) }.
|
161
|
+
to raise_error(Client::TextTooLong)
|
114
162
|
end
|
115
163
|
|
116
164
|
end
|
@@ -121,7 +169,7 @@ module TextRazor
|
|
121
169
|
|
122
170
|
context ".topics" do
|
123
171
|
|
124
|
-
it "
|
172
|
+
it "makes correct calls" do
|
125
173
|
client = OpenStruct.new
|
126
174
|
response = OpenStruct.new topics: ['topic1'], coarseTopics: ['topic1']
|
127
175
|
|
@@ -140,7 +188,7 @@ module TextRazor
|
|
140
188
|
|
141
189
|
context ".coarse_topics" do
|
142
190
|
|
143
|
-
it "
|
191
|
+
it "makes correct calls" do
|
144
192
|
client = OpenStruct.new
|
145
193
|
response = OpenStruct.new topics: ['topic1'], coarseTopics: ['topic1']
|
146
194
|
|
@@ -159,7 +207,7 @@ module TextRazor
|
|
159
207
|
|
160
208
|
context ".entities" do
|
161
209
|
|
162
|
-
it "
|
210
|
+
it "makes correct calls" do
|
163
211
|
client = OpenStruct.new
|
164
212
|
response = OpenStruct.new entities: ['Entity1']
|
165
213
|
|
@@ -178,7 +226,7 @@ module TextRazor
|
|
178
226
|
|
179
227
|
context ".words" do
|
180
228
|
|
181
|
-
it "
|
229
|
+
it "makes correct calls" do
|
182
230
|
client = OpenStruct.new
|
183
231
|
response = OpenStruct.new words: ['Word1']
|
184
232
|
|
@@ -197,7 +245,7 @@ module TextRazor
|
|
197
245
|
|
198
246
|
context ".phrases" do
|
199
247
|
|
200
|
-
it "
|
248
|
+
it "makes correct calls" do
|
201
249
|
client = OpenStruct.new
|
202
250
|
response = OpenStruct.new phrases: ['Phrase1']
|
203
251
|
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
module TextRazor
|
4
|
+
|
5
|
+
describe Entailment do
|
6
|
+
|
7
|
+
context "#create_from_hash" do
|
8
|
+
|
9
|
+
let(:entailment_hash) {
|
10
|
+
{
|
11
|
+
:id=>2, :wordPositions=>[1], :entailedWords=>["misrepresentation"],
|
12
|
+
:entailedTree=>{
|
13
|
+
:word=>"misrepresentation", :wordId=>0, :parentRelation=>-1
|
14
|
+
},
|
15
|
+
:priorScore=>0.00132419, :contextScore=>0.0694058, :score=>0.154246
|
16
|
+
}
|
17
|
+
}
|
18
|
+
|
19
|
+
it "creates a new instance" do
|
20
|
+
entailment = Entailment.create_from_hash(entailment_hash)
|
21
|
+
|
22
|
+
expect(entailment.id).to eq(2)
|
23
|
+
expect(entailment.entailed_tree).to eq({
|
24
|
+
:word=>"misrepresentation", :wordId=>0, :parentRelation=>-1
|
25
|
+
})
|
26
|
+
expect(entailment.word_positions).to eq([1])
|
27
|
+
expect(entailment.prior_score).to eq(0.00132419)
|
28
|
+
expect(entailment.context_score).to eq(0.0694058)
|
29
|
+
expect(entailment.score).to eq(0.154246)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -6,36 +6,60 @@ module TextRazor
|
|
6
6
|
|
7
7
|
context "#create_from_hash" do
|
8
8
|
|
9
|
-
|
10
|
-
entity_hash
|
11
|
-
"freebaseTypes" => ["government/government_office_or_title"],
|
12
|
-
"confidenceScore" => 0.897858, "wikiLink" => "http://en.wikipedia.org/wiki/Foreign_minister",
|
13
|
-
"matchedText" => "foreign ministers", "freebaseId" => "/m/01t_55", "relevanceScore" => 0.311479,
|
14
|
-
"entityEnglishId" => "Foreign minister", "startingPos" => 3, "endingPos" => 20}
|
15
|
-
|
16
|
-
entity = Entity.create_from_hash(entity_hash)
|
17
|
-
|
18
|
-
expect(entity.id).to eq(1)
|
19
|
-
expect(entity.type).to eq(['Person'])
|
20
|
-
expect(entity.matching_tokens).to eq([1,2])
|
21
|
-
expect(entity.entity_id).to eq("Foreign minister")
|
22
|
-
expect(entity.freebase_types).to eq(["government/government_office_or_title"])
|
23
|
-
expect(entity.confidence_score).to eq(0.897858)
|
24
|
-
expect(entity.wiki_link).to eq("http://en.wikipedia.org/wiki/Foreign_minister")
|
25
|
-
expect(entity.matched_text).to eq("foreign ministers")
|
26
|
-
expect(entity.freebase_id).to eq("/m/01t_55")
|
27
|
-
expect(entity.relevance_score).to eq(0.311479)
|
28
|
-
expect(entity.entity_english_id).to eq("Foreign minister")
|
29
|
-
expect(entity.starting_pos).to eq(3)
|
30
|
-
expect(entity.ending_pos).to eq(20)
|
9
|
+
let(:entity) do
|
10
|
+
Entity.create_from_hash(entity_hash)
|
31
11
|
end
|
32
12
|
|
33
|
-
|
34
|
-
entity_hash
|
13
|
+
context "with defined values" do
|
14
|
+
let(:entity_hash) do
|
15
|
+
{
|
16
|
+
"id" => 1,
|
17
|
+
"type" => ['Person'],
|
18
|
+
"matchingTokens" => [1, 2],
|
19
|
+
"entityId" => "Foreign minister",
|
20
|
+
"freebaseTypes" => ["government/government_office_or_title"],
|
21
|
+
"confidenceScore" => 0.897858,
|
22
|
+
"wikiLink" => "http://en.wikipedia.org/wiki/Foreign_minister",
|
23
|
+
"matchedText" => "foreign ministers",
|
24
|
+
"freebaseId" => "/m/01t_55",
|
25
|
+
"relevanceScore" => 0.311479,
|
26
|
+
"entityEnglishId" => "Foreign minister",
|
27
|
+
"startingPos" => 3,
|
28
|
+
"endingPos" => 20
|
29
|
+
}
|
30
|
+
end
|
35
31
|
|
36
|
-
|
32
|
+
it "should create a new instance" do
|
33
|
+
expect(entity.id).to eq(1)
|
34
|
+
expect(entity.type).to eq(['Person'])
|
35
|
+
expect(entity.matching_tokens).to eq([1,2])
|
36
|
+
expect(entity.entity_id).to eq("Foreign minister")
|
37
|
+
expect(entity.freebase_types).to eq(["government/government_office_or_title"])
|
38
|
+
expect(entity.confidence_score).to eq(0.897858)
|
39
|
+
expect(entity.wiki_link).to eq("http://en.wikipedia.org/wiki/Foreign_minister")
|
40
|
+
expect(entity.matched_text).to eq("foreign ministers")
|
41
|
+
expect(entity.freebase_id).to eq("/m/01t_55")
|
42
|
+
expect(entity.relevance_score).to eq(0.311479)
|
43
|
+
expect(entity.entity_english_id).to eq("Foreign minister")
|
44
|
+
expect(entity.starting_pos).to eq(3)
|
45
|
+
expect(entity.ending_pos).to eq(20)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context "with empty values" do
|
50
|
+
|
51
|
+
let(:entity_hash) do
|
52
|
+
{
|
53
|
+
"id" => 1,
|
54
|
+
"startingPos" => 3,
|
55
|
+
"endingPos" => 20
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should use sensible defaults" do
|
60
|
+
expect(entity.type).to eq([])
|
61
|
+
end
|
37
62
|
|
38
|
-
expect(entity.type).to eq([])
|
39
63
|
end
|
40
64
|
|
41
65
|
end
|