textrazor 0.0.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/README.md +26 -2
- data/Rakefile +10 -2
- data/lib/textrazor.rb +5 -0
- data/lib/textrazor/client.rb +38 -5
- data/lib/textrazor/configuration.rb +1 -1
- data/lib/textrazor/entailment.rb +24 -0
- data/lib/textrazor/property.rb +22 -0
- data/lib/textrazor/relation.rb +25 -0
- data/lib/textrazor/relation_param.rb +18 -0
- data/lib/textrazor/request.rb +6 -2
- data/lib/textrazor/response.rb +110 -34
- data/lib/textrazor/sentence.rb +24 -0
- data/lib/textrazor/topic.rb +8 -6
- data/lib/textrazor/util.rb +1 -1
- data/lib/textrazor/version.rb +1 -1
- data/lib/textrazor/word.rb +1 -1
- data/spec/functional/service_spec.rb +29 -0
- data/spec/lib/textrazor/client_spec.rb +113 -65
- data/spec/lib/textrazor/entailment_spec.rb +36 -0
- data/spec/lib/textrazor/entity_spec.rb +50 -26
- data/spec/lib/textrazor/phrase_spec.rb +8 -4
- data/spec/lib/textrazor/property_spec.rb +30 -0
- data/spec/lib/textrazor/relation_param_spec.rb +29 -0
- data/spec/lib/textrazor/relation_spec.rb +37 -0
- data/spec/lib/textrazor/request_spec.rb +7 -4
- data/spec/lib/textrazor/response_spec.rb +604 -49
- data/spec/lib/textrazor/sentence_spec.rb +41 -0
- data/spec/lib/textrazor/topic_spec.rb +12 -5
- data/textrazor.gemspec +1 -0
- metadata +35 -2
@@ -0,0 +1,24 @@
|
|
1
|
+
module TextRazor
|
2
|
+
|
3
|
+
class Sentence
|
4
|
+
|
5
|
+
attr_reader :position, :words
|
6
|
+
|
7
|
+
def initialize(params)
|
8
|
+
@position = params[:position]
|
9
|
+
@words = params[:words].map do |word_hash|
|
10
|
+
Word.create_from_hash(word_hash)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def number_of_words
|
15
|
+
@words.size
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.create_from_hash(params)
|
19
|
+
new(params)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
data/lib/textrazor/topic.rb
CHANGED
@@ -2,17 +2,19 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Topic
|
4
4
|
|
5
|
+
extend Util
|
6
|
+
|
5
7
|
attr_reader :id, :label, :wiki_link, :score
|
6
8
|
|
7
|
-
def initialize(
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
@score = score
|
9
|
+
def initialize(params = {})
|
10
|
+
params.each do |k, v|
|
11
|
+
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
12
|
+
end
|
12
13
|
end
|
13
14
|
|
14
15
|
def self.create_from_hash(params)
|
15
|
-
|
16
|
+
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
17
|
+
new(params)
|
16
18
|
end
|
17
19
|
|
18
20
|
end
|
data/lib/textrazor/util.rb
CHANGED
data/lib/textrazor/version.rb
CHANGED
data/lib/textrazor/word.rb
CHANGED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'Functional spec', functional: true do
|
4
|
+
let(:api_key) do
|
5
|
+
ENV["TEXTRAZOR_API_KEY"] or raise 'Please specify a TEXTRAZOR_API_KEY in your local environment to run this'
|
6
|
+
end
|
7
|
+
|
8
|
+
let(:client) do
|
9
|
+
TextRazor::Client.new(api_key)
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:text) do
|
13
|
+
t = <<TEXT
|
14
|
+
Barclays misled shareholders and the public about one of the biggest investments in the bank's history, a BBC Panorama investigation has found.
|
15
|
+
The bank announced in 2008 that Manchester City owner Sheikh Mansour had agreed to invest more than £3bn.
|
16
|
+
But the BBC found that the money, which helped Barclays avoid a bailout by British taxpayers, actually came from the Abu Dhabi government.
|
17
|
+
Barclays said the mistake in its accounts was "a drafting error".
|
18
|
+
Unlike RBS and Lloyds TSB, Barclays narrowly avoided having to request a government bailout late in 2008 after it was rescued by £7bn worth of new investment, most of which came from the gulf states of Qatar and Abu Dhabi.
|
19
|
+
Half of the cash was supposed to be coming from Sheikh Mansour.
|
20
|
+
But Barclays has admitted it was told the investor might change shortly before shareholders voted to approve the deal on 24 November 2008.
|
21
|
+
But instead of telling shareholders, the bank remained silent until the change of investor was confirmed a few hours later.
|
22
|
+
TEXT
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'returns a response' do
|
26
|
+
expect(client.analyse(text)).to be_ok
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -4,55 +4,103 @@ module TextRazor
|
|
4
4
|
|
5
5
|
describe Client do
|
6
6
|
|
7
|
-
let(:api_key)
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
let(:
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
let(:
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
7
|
+
let(:api_key) do
|
8
|
+
'api_key'
|
9
|
+
end
|
10
|
+
|
11
|
+
let(:client) do
|
12
|
+
custom_options_client
|
13
|
+
end
|
14
|
+
|
15
|
+
let(:nil_api_key_client) do
|
16
|
+
Client.new(nil)
|
17
|
+
end
|
18
|
+
|
19
|
+
let(:empty_api_key_client) do
|
20
|
+
Client.new('')
|
21
|
+
end
|
22
|
+
|
23
|
+
let(:custom_options_client) do
|
24
|
+
Client.new(api_key, {
|
25
|
+
extractors: %w(entities topics words), cleanup_mode: 'raw',
|
26
|
+
cleanup_return_cleaned: true, cleanup_return_raw: true,
|
27
|
+
filter_dbpedia_types: %w(type1), language: 'fre',
|
28
|
+
filter_freebase_types: %w(type2), allow_overlap: false
|
29
|
+
})
|
30
|
+
end
|
31
|
+
|
32
|
+
let(:default_options_client) do
|
33
|
+
Client.new(api_key)
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "#initialize" do
|
37
|
+
|
38
|
+
context 'with valid api key' do
|
39
|
+
|
40
|
+
context "and default request options" do
|
41
|
+
|
42
|
+
it 'assigns correctly' do
|
43
|
+
expect(default_options_client.api_key).to eq(api_key)
|
44
|
+
expect(default_options_client.request_options).
|
45
|
+
to eq({extractors: %w(entities topics words phrases dependency-trees
|
46
|
+
relations entailments senses), cleanup_mode: 'raw'})
|
47
|
+
end
|
48
|
+
|
25
49
|
end
|
26
50
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
to eq(
|
31
|
-
|
51
|
+
context 'and custom request options' do
|
52
|
+
|
53
|
+
it "assigns correctly" do
|
54
|
+
expect(custom_options_client.api_key).to eq(api_key)
|
55
|
+
expect(custom_options_client.request_options).
|
56
|
+
to eq({extractors: %w(entities topics words), cleanup_mode: 'raw', language: 'fre',
|
57
|
+
cleanup_return_cleaned: true, cleanup_return_raw: true,
|
58
|
+
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
|
59
|
+
allow_overlap: false})
|
60
|
+
end
|
61
|
+
|
32
62
|
end
|
33
63
|
|
34
64
|
end
|
35
65
|
|
36
|
-
context "invalid
|
66
|
+
context "with invalid api key" do
|
67
|
+
|
68
|
+
context "when nil" do
|
69
|
+
|
70
|
+
it "raises an exception" do
|
71
|
+
expect { nil_api_key_client }.
|
72
|
+
to raise_error(Client::EmptyApiKey)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
context "when empty" do
|
78
|
+
|
79
|
+
it "raises an exception" do
|
80
|
+
expect { empty_api_key_client }.
|
81
|
+
to raise_error(Client::EmptyApiKey)
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
37
85
|
|
38
|
-
|
86
|
+
end
|
39
87
|
|
40
|
-
|
88
|
+
context 'with invalid request options' do
|
41
89
|
|
42
|
-
|
43
|
-
expect { nil_api_key_client }.
|
44
|
-
to raise_error(Client::EmptyApiKey)
|
45
|
-
end
|
90
|
+
context 'when an invalid extractor value is supplied' do
|
46
91
|
|
92
|
+
it 'raises an exception' do
|
93
|
+
expect { Client.new(api_key, {extractors: ['invalid-extractor', 'topics']}) }.
|
94
|
+
to raise_error(Client::UnsupportedExtractor)
|
47
95
|
end
|
48
96
|
|
49
|
-
|
97
|
+
end
|
50
98
|
|
51
|
-
|
52
|
-
expect { empty_api_key_client }.
|
53
|
-
to raise_error(Client::EmptyApiKey)
|
54
|
-
end
|
99
|
+
context 'when an invalid cleanup_mode value is supplied' do
|
55
100
|
|
101
|
+
it 'raises an exception' do
|
102
|
+
expect { Client.new(api_key, {cleanup_mode: 'invalid-cleanup-mode'}) }.
|
103
|
+
to raise_error(Client::UnsupportedCleanupMode)
|
56
104
|
end
|
57
105
|
|
58
106
|
end
|
@@ -63,16 +111,20 @@ module TextRazor
|
|
63
111
|
|
64
112
|
context "#analyse" do
|
65
113
|
|
66
|
-
let(:very_long_text)
|
114
|
+
let(:very_long_text) do
|
115
|
+
"L" * 201 * 1024
|
116
|
+
end
|
67
117
|
|
68
|
-
context "valid
|
118
|
+
context "valid value of 'text'" do
|
69
119
|
|
70
|
-
it "
|
71
|
-
request =
|
120
|
+
it "makes correct calls" do
|
121
|
+
request = BasicObject.new
|
72
122
|
|
73
123
|
expect(Request).to receive(:post).
|
74
|
-
with('text', {api_key: 'api_key', extractors: %w(entities topics words),
|
75
|
-
|
124
|
+
with('text', {api_key: 'api_key', extractors: %w(entities topics words), cleanup_mode: 'raw',
|
125
|
+
cleanup_return_cleaned: true, cleanup_return_raw: true, language: 'fre',
|
126
|
+
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
|
127
|
+
allow_overlap: false}).
|
76
128
|
and_return(request)
|
77
129
|
|
78
130
|
expect(Response).to receive(:new).with(request)
|
@@ -82,35 +134,31 @@ module TextRazor
|
|
82
134
|
|
83
135
|
end
|
84
136
|
|
85
|
-
context "invalid
|
137
|
+
context "invalid value of 'text'" do
|
86
138
|
|
87
|
-
context "
|
88
|
-
|
89
|
-
context "is nil" do
|
90
|
-
|
91
|
-
it "should raise an exception" do
|
92
|
-
expect { client.analyse(nil) }.
|
93
|
-
to raise_error(Client::EmptyText)
|
94
|
-
end
|
139
|
+
context "when nil" do
|
95
140
|
|
141
|
+
it "raises an exception" do
|
142
|
+
expect { client.analyse(nil) }.
|
143
|
+
to raise_error(Client::EmptyText)
|
96
144
|
end
|
97
145
|
|
98
|
-
|
146
|
+
end
|
99
147
|
|
100
|
-
|
101
|
-
expect { client.analyse('') }.
|
102
|
-
to raise_error(Client::EmptyText)
|
103
|
-
end
|
148
|
+
context "when empty" do
|
104
149
|
|
150
|
+
it "raises an exception" do
|
151
|
+
expect { client.analyse('') }.
|
152
|
+
to raise_error(Client::EmptyText)
|
105
153
|
end
|
106
154
|
|
107
|
-
|
155
|
+
end
|
108
156
|
|
109
|
-
|
110
|
-
expect { client.analyse(very_long_text) }.
|
111
|
-
to raise_error(Client::TextTooLong)
|
112
|
-
end
|
157
|
+
context "when size is > 200kb" do
|
113
158
|
|
159
|
+
it "raises an exception" do
|
160
|
+
expect { client.analyse(very_long_text) }.
|
161
|
+
to raise_error(Client::TextTooLong)
|
114
162
|
end
|
115
163
|
|
116
164
|
end
|
@@ -121,7 +169,7 @@ module TextRazor
|
|
121
169
|
|
122
170
|
context ".topics" do
|
123
171
|
|
124
|
-
it "
|
172
|
+
it "makes correct calls" do
|
125
173
|
client = OpenStruct.new
|
126
174
|
response = OpenStruct.new topics: ['topic1'], coarseTopics: ['topic1']
|
127
175
|
|
@@ -140,7 +188,7 @@ module TextRazor
|
|
140
188
|
|
141
189
|
context ".coarse_topics" do
|
142
190
|
|
143
|
-
it "
|
191
|
+
it "makes correct calls" do
|
144
192
|
client = OpenStruct.new
|
145
193
|
response = OpenStruct.new topics: ['topic1'], coarseTopics: ['topic1']
|
146
194
|
|
@@ -159,7 +207,7 @@ module TextRazor
|
|
159
207
|
|
160
208
|
context ".entities" do
|
161
209
|
|
162
|
-
it "
|
210
|
+
it "makes correct calls" do
|
163
211
|
client = OpenStruct.new
|
164
212
|
response = OpenStruct.new entities: ['Entity1']
|
165
213
|
|
@@ -178,7 +226,7 @@ module TextRazor
|
|
178
226
|
|
179
227
|
context ".words" do
|
180
228
|
|
181
|
-
it "
|
229
|
+
it "makes correct calls" do
|
182
230
|
client = OpenStruct.new
|
183
231
|
response = OpenStruct.new words: ['Word1']
|
184
232
|
|
@@ -197,7 +245,7 @@ module TextRazor
|
|
197
245
|
|
198
246
|
context ".phrases" do
|
199
247
|
|
200
|
-
it "
|
248
|
+
it "makes correct calls" do
|
201
249
|
client = OpenStruct.new
|
202
250
|
response = OpenStruct.new phrases: ['Phrase1']
|
203
251
|
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
module TextRazor
|
4
|
+
|
5
|
+
describe Entailment do
|
6
|
+
|
7
|
+
context "#create_from_hash" do
|
8
|
+
|
9
|
+
let(:entailment_hash) {
|
10
|
+
{
|
11
|
+
:id=>2, :wordPositions=>[1], :entailedWords=>["misrepresentation"],
|
12
|
+
:entailedTree=>{
|
13
|
+
:word=>"misrepresentation", :wordId=>0, :parentRelation=>-1
|
14
|
+
},
|
15
|
+
:priorScore=>0.00132419, :contextScore=>0.0694058, :score=>0.154246
|
16
|
+
}
|
17
|
+
}
|
18
|
+
|
19
|
+
it "creates a new instance" do
|
20
|
+
entailment = Entailment.create_from_hash(entailment_hash)
|
21
|
+
|
22
|
+
expect(entailment.id).to eq(2)
|
23
|
+
expect(entailment.entailed_tree).to eq({
|
24
|
+
:word=>"misrepresentation", :wordId=>0, :parentRelation=>-1
|
25
|
+
})
|
26
|
+
expect(entailment.word_positions).to eq([1])
|
27
|
+
expect(entailment.prior_score).to eq(0.00132419)
|
28
|
+
expect(entailment.context_score).to eq(0.0694058)
|
29
|
+
expect(entailment.score).to eq(0.154246)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -6,36 +6,60 @@ module TextRazor
|
|
6
6
|
|
7
7
|
context "#create_from_hash" do
|
8
8
|
|
9
|
-
|
10
|
-
entity_hash
|
11
|
-
"freebaseTypes" => ["government/government_office_or_title"],
|
12
|
-
"confidenceScore" => 0.897858, "wikiLink" => "http://en.wikipedia.org/wiki/Foreign_minister",
|
13
|
-
"matchedText" => "foreign ministers", "freebaseId" => "/m/01t_55", "relevanceScore" => 0.311479,
|
14
|
-
"entityEnglishId" => "Foreign minister", "startingPos" => 3, "endingPos" => 20}
|
15
|
-
|
16
|
-
entity = Entity.create_from_hash(entity_hash)
|
17
|
-
|
18
|
-
expect(entity.id).to eq(1)
|
19
|
-
expect(entity.type).to eq(['Person'])
|
20
|
-
expect(entity.matching_tokens).to eq([1,2])
|
21
|
-
expect(entity.entity_id).to eq("Foreign minister")
|
22
|
-
expect(entity.freebase_types).to eq(["government/government_office_or_title"])
|
23
|
-
expect(entity.confidence_score).to eq(0.897858)
|
24
|
-
expect(entity.wiki_link).to eq("http://en.wikipedia.org/wiki/Foreign_minister")
|
25
|
-
expect(entity.matched_text).to eq("foreign ministers")
|
26
|
-
expect(entity.freebase_id).to eq("/m/01t_55")
|
27
|
-
expect(entity.relevance_score).to eq(0.311479)
|
28
|
-
expect(entity.entity_english_id).to eq("Foreign minister")
|
29
|
-
expect(entity.starting_pos).to eq(3)
|
30
|
-
expect(entity.ending_pos).to eq(20)
|
9
|
+
let(:entity) do
|
10
|
+
Entity.create_from_hash(entity_hash)
|
31
11
|
end
|
32
12
|
|
33
|
-
|
34
|
-
entity_hash
|
13
|
+
context "with defined values" do
|
14
|
+
let(:entity_hash) do
|
15
|
+
{
|
16
|
+
"id" => 1,
|
17
|
+
"type" => ['Person'],
|
18
|
+
"matchingTokens" => [1, 2],
|
19
|
+
"entityId" => "Foreign minister",
|
20
|
+
"freebaseTypes" => ["government/government_office_or_title"],
|
21
|
+
"confidenceScore" => 0.897858,
|
22
|
+
"wikiLink" => "http://en.wikipedia.org/wiki/Foreign_minister",
|
23
|
+
"matchedText" => "foreign ministers",
|
24
|
+
"freebaseId" => "/m/01t_55",
|
25
|
+
"relevanceScore" => 0.311479,
|
26
|
+
"entityEnglishId" => "Foreign minister",
|
27
|
+
"startingPos" => 3,
|
28
|
+
"endingPos" => 20
|
29
|
+
}
|
30
|
+
end
|
35
31
|
|
36
|
-
|
32
|
+
it "should create a new instance" do
|
33
|
+
expect(entity.id).to eq(1)
|
34
|
+
expect(entity.type).to eq(['Person'])
|
35
|
+
expect(entity.matching_tokens).to eq([1,2])
|
36
|
+
expect(entity.entity_id).to eq("Foreign minister")
|
37
|
+
expect(entity.freebase_types).to eq(["government/government_office_or_title"])
|
38
|
+
expect(entity.confidence_score).to eq(0.897858)
|
39
|
+
expect(entity.wiki_link).to eq("http://en.wikipedia.org/wiki/Foreign_minister")
|
40
|
+
expect(entity.matched_text).to eq("foreign ministers")
|
41
|
+
expect(entity.freebase_id).to eq("/m/01t_55")
|
42
|
+
expect(entity.relevance_score).to eq(0.311479)
|
43
|
+
expect(entity.entity_english_id).to eq("Foreign minister")
|
44
|
+
expect(entity.starting_pos).to eq(3)
|
45
|
+
expect(entity.ending_pos).to eq(20)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context "with empty values" do
|
50
|
+
|
51
|
+
let(:entity_hash) do
|
52
|
+
{
|
53
|
+
"id" => 1,
|
54
|
+
"startingPos" => 3,
|
55
|
+
"endingPos" => 20
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should use sensible defaults" do
|
60
|
+
expect(entity.type).to eq([])
|
61
|
+
end
|
37
62
|
|
38
|
-
expect(entity.type).to eq([])
|
39
63
|
end
|
40
64
|
|
41
65
|
end
|