textrazor 1.0.1 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/textrazor.rb +1 -0
- data/lib/textrazor/category.rb +15 -0
- data/lib/textrazor/client.rb +7 -1
- data/lib/textrazor/entailment.rb +2 -9
- data/lib/textrazor/entity.rb +3 -10
- data/lib/textrazor/property.rb +2 -9
- data/lib/textrazor/request.rb +2 -1
- data/lib/textrazor/response.rb +8 -0
- data/lib/textrazor/topic.rb +3 -10
- data/lib/textrazor/util.rb +25 -6
- data/lib/textrazor/version.rb +1 -1
- data/lib/textrazor/word.rb +2 -10
- data/spec/lib/textrazor/category_spec.rb +34 -0
- data/spec/lib/textrazor/client_spec.rb +23 -2
- data/spec/lib/textrazor/entity_spec.rb +3 -1
- data/spec/lib/textrazor/request_spec.rb +2 -2
- data/spec/lib/textrazor/response_spec.rb +67 -0
- data/spec/lib/textrazor/topic_spec.rb +3 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97bc6dcda89ce7ef88868906d97b92b0e46a9ef8
|
4
|
+
data.tar.gz: f4e7d689ddeb6b41130d0fabb0e020dff8d9eb24
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 506f8a61cda18fc018267cee10323dfc2a9bef7a52edacac46924e0cfcc2689e1348f834e67879dff892b8fb36b9e7ea6aa8b638ee9727579ccaafdf8e71b850
|
7
|
+
data.tar.gz: 013233c7cd51d4467aeaf5e5a2e5f3960a4acdc1e17724a1b0a2092cab28f732ec59605033cdd890d0481011d9191742f4d89e57788876444c1a3a0fc105e27d
|
data/lib/textrazor.rb
CHANGED
data/lib/textrazor/client.rb
CHANGED
@@ -17,7 +17,7 @@ module TextRazor
|
|
17
17
|
|
18
18
|
REQUEST_OPTIONS = [:extractors, :rules, :cleanup_mode, :cleanup_return_cleaned, :cleanup_return_raw,
|
19
19
|
:language, :dictionaries, :filter_dbpedia_types, :filter_freebase_types, :allow_overlap,
|
20
|
-
:enrichment_queries]
|
20
|
+
:enrichment_queries, :classifiers]
|
21
21
|
|
22
22
|
attr_reader :response, :api_key, :request_options
|
23
23
|
|
@@ -43,6 +43,12 @@ module TextRazor
|
|
43
43
|
topics
|
44
44
|
end
|
45
45
|
|
46
|
+
def self.categories(api_key, text, options = {})
|
47
|
+
new(api_key, options.merge(classifiers: ['textrazor_iab'])).
|
48
|
+
analyse(text).
|
49
|
+
categories
|
50
|
+
end
|
51
|
+
|
46
52
|
def self.coarse_topics(api_key, text, options = {})
|
47
53
|
new(api_key, options.merge(extractors: ['topics'])).
|
48
54
|
analyse(text).
|
data/lib/textrazor/entailment.rb
CHANGED
@@ -2,21 +2,14 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Entailment
|
4
4
|
|
5
|
-
|
5
|
+
include Util
|
6
6
|
|
7
7
|
attr_reader :id, :word_positions, :prior_score, :context_score,
|
8
8
|
:score, :entailed_tree, :entailed_words
|
9
9
|
|
10
10
|
def initialize(params = {})
|
11
11
|
@type = []
|
12
|
-
params
|
13
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.create_from_hash(params)
|
18
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
19
|
-
new(params)
|
12
|
+
initialize_params params
|
20
13
|
end
|
21
14
|
|
22
15
|
end
|
data/lib/textrazor/entity.rb
CHANGED
@@ -2,22 +2,15 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Entity
|
4
4
|
|
5
|
-
|
5
|
+
include Util
|
6
6
|
|
7
7
|
attr_reader :id, :type, :matching_tokens, :entity_id, :freebase_types, :confidence_score,
|
8
8
|
:wiki_link, :matched_text, :freebase_id, :relevance_score, :entity_english_id,
|
9
|
-
:starting_pos, :ending_pos, :data
|
9
|
+
:starting_pos, :ending_pos, :data, :wikidata_id
|
10
10
|
|
11
11
|
def initialize(params = {})
|
12
12
|
@type = []
|
13
|
-
params
|
14
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.create_from_hash(params)
|
19
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
20
|
-
new(params)
|
13
|
+
initialize_params params
|
21
14
|
end
|
22
15
|
|
23
16
|
end
|
data/lib/textrazor/property.rb
CHANGED
@@ -2,19 +2,12 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Property
|
4
4
|
|
5
|
-
|
5
|
+
include Util
|
6
6
|
|
7
7
|
attr_reader :id, :word_positions, :property_positions
|
8
8
|
|
9
9
|
def initialize(params = {})
|
10
|
-
params
|
11
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.create_from_hash(params)
|
16
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
17
|
-
new(params)
|
10
|
+
initialize_params params
|
18
11
|
end
|
19
12
|
|
20
13
|
end
|
data/lib/textrazor/request.rb
CHANGED
@@ -17,7 +17,8 @@ module TextRazor
|
|
17
17
|
filter_dbpedia_types: 'entities.filterDbpediaTypes',
|
18
18
|
filter_freebase_types: 'entities.filterFreebaseTypes',
|
19
19
|
allow_overlap: 'entities.allowOverlap',
|
20
|
-
enrichment_queries: 'entities.enrichmentQueries'
|
20
|
+
enrichment_queries: 'entities.enrichmentQueries',
|
21
|
+
classifiers: 'classifiers'
|
21
22
|
}
|
22
23
|
|
23
24
|
def self.post(text, options)
|
data/lib/textrazor/response.rb
CHANGED
@@ -84,6 +84,10 @@ module TextRazor
|
|
84
84
|
@sentences ||= parse_sentences
|
85
85
|
end
|
86
86
|
|
87
|
+
def categories
|
88
|
+
@categories ||= parse_categories
|
89
|
+
end
|
90
|
+
|
87
91
|
def language
|
88
92
|
raw_response[:language]
|
89
93
|
end
|
@@ -122,6 +126,10 @@ module TextRazor
|
|
122
126
|
parse(:topic, raw_response[:topics])
|
123
127
|
end
|
124
128
|
|
129
|
+
def parse_categories
|
130
|
+
parse(:category, raw_response[:categories])
|
131
|
+
end
|
132
|
+
|
125
133
|
def parse_phrases
|
126
134
|
raw_phrases = raw_response[:nounPhrases]
|
127
135
|
return if raw_phrases.nil?
|
data/lib/textrazor/topic.rb
CHANGED
@@ -2,19 +2,12 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Topic
|
4
4
|
|
5
|
-
|
5
|
+
include Util
|
6
6
|
|
7
|
-
attr_reader :id, :label, :wiki_link, :score
|
7
|
+
attr_reader :id, :label, :wiki_link, :score, :wikidata_id
|
8
8
|
|
9
9
|
def initialize(params = {})
|
10
|
-
params
|
11
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.create_from_hash(params)
|
16
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
17
|
-
new(params)
|
10
|
+
initialize_params params
|
18
11
|
end
|
19
12
|
|
20
13
|
end
|
data/lib/textrazor/util.rb
CHANGED
@@ -1,11 +1,30 @@
|
|
1
1
|
module TextRazor
|
2
2
|
module Util
|
3
|
-
def
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
def self.included(base)
|
4
|
+
base.extend ClassMethods
|
5
|
+
|
6
|
+
base.class_eval do
|
7
|
+
def initialize_params(params)
|
8
|
+
params.each do |k, v|
|
9
|
+
instance_variable_set(:"@#{k}", v) if self.respond_to?(:"#{k}") && v && (!v.is_a?(String) || !v.empty?)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module ClassMethods
|
16
|
+
def standardize(param)
|
17
|
+
param.to_s.gsub(/::/, '/').
|
18
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
19
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
20
|
+
tr("-", "_").
|
21
|
+
downcase
|
22
|
+
end
|
23
|
+
|
24
|
+
def create_from_hash(params)
|
25
|
+
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
26
|
+
new(params)
|
27
|
+
end
|
9
28
|
end
|
10
29
|
end
|
11
30
|
end
|
data/lib/textrazor/version.rb
CHANGED
data/lib/textrazor/word.rb
CHANGED
@@ -1,22 +1,14 @@
|
|
1
1
|
module TextRazor
|
2
2
|
|
3
3
|
class Word
|
4
|
-
|
5
|
-
extend Util
|
4
|
+
include Util
|
6
5
|
|
7
6
|
attr_reader :position, :starting_pos, :ending_pos, :stem, :lemma,
|
8
7
|
:token, :part_of_speech, :parent_position
|
9
8
|
|
10
9
|
def initialize(params = {})
|
11
10
|
@type = []
|
12
|
-
params
|
13
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.create_from_hash(params)
|
18
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
19
|
-
new(params)
|
11
|
+
initialize_params params
|
20
12
|
end
|
21
13
|
|
22
14
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
module TextRazor
|
4
|
+
|
5
|
+
describe Category do
|
6
|
+
|
7
|
+
context "#create_from_hash" do
|
8
|
+
let(:category_hash) do
|
9
|
+
{
|
10
|
+
"id":0,
|
11
|
+
"classifierId":"textrazor_iab",
|
12
|
+
"categoryId":"IAB11",
|
13
|
+
"label":"Law, Gov’t & Politics",
|
14
|
+
"score":0.809611
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
let(:category) do
|
19
|
+
Category.create_from_hash(category_hash)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should create a new instance" do
|
23
|
+
expect(category.id).to eq(0)
|
24
|
+
expect(category.classifier_id).to eq("textrazor_iab")
|
25
|
+
expect(category.category_id).to eq("IAB11")
|
26
|
+
expect(category.label).to eq("Law, Gov’t & Politics")
|
27
|
+
expect(category.score).to eq(0.809611)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -23,6 +23,7 @@ module TextRazor
|
|
23
23
|
let(:custom_options_client) do
|
24
24
|
Client.new(api_key, {
|
25
25
|
extractors: %w(entities topics words), cleanup_mode: 'raw',
|
26
|
+
classifiers: 'textrazor_newscodes',
|
26
27
|
cleanup_return_cleaned: true, cleanup_return_raw: true,
|
27
28
|
filter_dbpedia_types: %w(type1), language: 'fre',
|
28
29
|
filter_freebase_types: %w(type2), allow_overlap: false,
|
@@ -57,7 +58,8 @@ module TextRazor
|
|
57
58
|
to eq({extractors: %w(entities topics words), cleanup_mode: 'raw', language: 'fre',
|
58
59
|
cleanup_return_cleaned: true, cleanup_return_raw: true,
|
59
60
|
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
|
60
|
-
allow_overlap: false, dictionaries: %w(test)
|
61
|
+
allow_overlap: false, dictionaries: %w(test),
|
62
|
+
classifiers: 'textrazor_newscodes'})
|
61
63
|
end
|
62
64
|
|
63
65
|
end
|
@@ -125,7 +127,7 @@ module TextRazor
|
|
125
127
|
with('text', {api_key: 'api_key', extractors: %w(entities topics words), cleanup_mode: 'raw',
|
126
128
|
cleanup_return_cleaned: true, cleanup_return_raw: true, language: 'fre',
|
127
129
|
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
|
128
|
-
allow_overlap: false, dictionaries: %w(test)}).
|
130
|
+
allow_overlap: false, dictionaries: %w(test), classifiers: 'textrazor_newscodes'}).
|
129
131
|
and_return(request)
|
130
132
|
|
131
133
|
expect(Response).to receive(:new).with(request)
|
@@ -263,6 +265,25 @@ module TextRazor
|
|
263
265
|
|
264
266
|
end
|
265
267
|
|
268
|
+
context ".categories" do
|
269
|
+
|
270
|
+
it "makes correct calls" do
|
271
|
+
client = OpenStruct.new
|
272
|
+
response = OpenStruct.new categories: ['Category1']
|
273
|
+
|
274
|
+
expect(Client).to receive(:new).
|
275
|
+
with(api_key, {classifiers: ['textrazor_iab']}).
|
276
|
+
and_return(client)
|
277
|
+
|
278
|
+
expect(client).to receive(:analyse).
|
279
|
+
with("text").
|
280
|
+
and_return(response)
|
281
|
+
|
282
|
+
Client.categories(api_key, 'text', {})
|
283
|
+
end
|
284
|
+
|
285
|
+
end
|
286
|
+
|
266
287
|
end
|
267
288
|
|
268
289
|
end
|
@@ -28,7 +28,8 @@ module TextRazor
|
|
28
28
|
"endingPos" => 20,
|
29
29
|
"data" => {
|
30
30
|
"type" => ['person', 'company']
|
31
|
-
}
|
31
|
+
},
|
32
|
+
"wikidataId" => 'Q7330070'
|
32
33
|
}
|
33
34
|
end
|
34
35
|
|
@@ -47,6 +48,7 @@ module TextRazor
|
|
47
48
|
expect(entity.starting_pos).to eq(3)
|
48
49
|
expect(entity.ending_pos).to eq(20)
|
49
50
|
expect(entity.data['type']).to match_array(['person', 'company'])
|
51
|
+
expect(entity.wikidata_id).to eq('Q7330070')
|
50
52
|
end
|
51
53
|
end
|
52
54
|
|
@@ -60,13 +60,13 @@ module TextRazor
|
|
60
60
|
options = {api_key: 'api_key', extractors: %w(entities topics words), cleanup_mode: 'raw',
|
61
61
|
cleanup_return_cleaned: true, cleanup_return_raw: true, language: 'fre',
|
62
62
|
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2), allow_overlap: false,
|
63
|
-
enrichment_queries: 'queries'}
|
63
|
+
enrichment_queries: 'queries', classifiers: 'textrazor_iab'}
|
64
64
|
|
65
65
|
expect(::RestClient).to receive(:post).
|
66
66
|
with("https://api.textrazor.com/", { "text" => 'text', "apiKey" => 'api_key', "extractors" => "entities,topics,words",
|
67
67
|
"cleanup.mode" => "raw", "cleanup.returnCleaned" => true, "cleanup.returnRaw" => true, "languageOverride" => 'fre',
|
68
68
|
"entities.filterDbpediaTypes" => "type1", "entities.filterFreebaseTypes" => "type2" , "entities.allowOverlap" => false,
|
69
|
-
"entities.enrichmentQueries" => "queries"},
|
69
|
+
"entities.enrichmentQueries" => "queries", "classifiers" => 'textrazor_iab'},
|
70
70
|
accept_encoding: 'gzip')
|
71
71
|
|
72
72
|
Request.post('text', options)
|
@@ -441,6 +441,73 @@ module TextRazor
|
|
441
441
|
|
442
442
|
end
|
443
443
|
|
444
|
+
describe "#categories" do
|
445
|
+
|
446
|
+
let(:http_response) do
|
447
|
+
::OpenStruct.new(code: 200, body: body)
|
448
|
+
end
|
449
|
+
|
450
|
+
let(:response) do
|
451
|
+
Response.new(http_response)
|
452
|
+
end
|
453
|
+
|
454
|
+
context "if there are categories returned from api" do
|
455
|
+
|
456
|
+
let(:body) do
|
457
|
+
{
|
458
|
+
"time" => "0.013219",
|
459
|
+
"response" => {
|
460
|
+
"language" => "eng",
|
461
|
+
"languageIsReliable" => true,
|
462
|
+
"categories" => [
|
463
|
+
{
|
464
|
+
"id" => 0,
|
465
|
+
"classifierId" => "textrazor_iab",
|
466
|
+
"categoryId" => "IAB11",
|
467
|
+
"label" => "Law, Gov’t & Politics",
|
468
|
+
"score" => 0.809611
|
469
|
+
},
|
470
|
+
{
|
471
|
+
"id" => 1,
|
472
|
+
"classifierId" => "textrazor_iab",
|
473
|
+
"categoryId" => "IAB11-2",
|
474
|
+
"label" => "Law, Gov’t & Politics>Legal Issues",
|
475
|
+
"score" => 0.61239
|
476
|
+
}
|
477
|
+
]
|
478
|
+
}
|
479
|
+
}.to_json
|
480
|
+
end
|
481
|
+
|
482
|
+
it "returns categories" do
|
483
|
+
categories = response.categories
|
484
|
+
|
485
|
+
expect(categories).to_not be_nil
|
486
|
+
expect(categories.size).to eq(2)
|
487
|
+
end
|
488
|
+
|
489
|
+
end
|
490
|
+
|
491
|
+
context "if there are no categories returned from api" do
|
492
|
+
|
493
|
+
let(:body) do
|
494
|
+
{
|
495
|
+
"time" => "0.013219",
|
496
|
+
"response" => {
|
497
|
+
"language" => "eng",
|
498
|
+
"languageIsReliable" => true
|
499
|
+
}
|
500
|
+
}.to_json
|
501
|
+
end
|
502
|
+
|
503
|
+
it "returns nil" do
|
504
|
+
expect(response.categories).to be_nil
|
505
|
+
end
|
506
|
+
|
507
|
+
end
|
508
|
+
|
509
|
+
end
|
510
|
+
|
444
511
|
describe "#words" do
|
445
512
|
|
446
513
|
let(:http_response) do
|
@@ -10,7 +10,8 @@ module TextRazor
|
|
10
10
|
id: 1,
|
11
11
|
label: "Sports",
|
12
12
|
wikiLink: "link_to_wiki",
|
13
|
-
score: 1.03589
|
13
|
+
score: 1.03589,
|
14
|
+
wikidataId: "Q042"
|
14
15
|
}
|
15
16
|
end
|
16
17
|
|
@@ -23,6 +24,7 @@ module TextRazor
|
|
23
24
|
expect(topic.label).to eq("Sports")
|
24
25
|
expect(topic.wiki_link).to eq("link_to_wiki")
|
25
26
|
expect(topic.score).to eq(1.03589)
|
27
|
+
expect(topic.wikidata_id).to eq("Q042")
|
26
28
|
end
|
27
29
|
|
28
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textrazor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: '1.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anuj Dutta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- README.md
|
112
112
|
- Rakefile
|
113
113
|
- lib/textrazor.rb
|
114
|
+
- lib/textrazor/category.rb
|
114
115
|
- lib/textrazor/client.rb
|
115
116
|
- lib/textrazor/configuration.rb
|
116
117
|
- lib/textrazor/entailment.rb
|
@@ -127,6 +128,7 @@ files:
|
|
127
128
|
- lib/textrazor/version.rb
|
128
129
|
- lib/textrazor/word.rb
|
129
130
|
- spec/functional/service_spec.rb
|
131
|
+
- spec/lib/textrazor/category_spec.rb
|
130
132
|
- spec/lib/textrazor/client_spec.rb
|
131
133
|
- spec/lib/textrazor/configuration_spec.rb
|
132
134
|
- spec/lib/textrazor/entailment_spec.rb
|
@@ -163,12 +165,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
165
|
version: '0'
|
164
166
|
requirements: []
|
165
167
|
rubyforge_project:
|
166
|
-
rubygems_version: 2.
|
168
|
+
rubygems_version: 2.4.5.1
|
167
169
|
signing_key:
|
168
170
|
specification_version: 4
|
169
171
|
summary: An api wrapper for text razor in ruby
|
170
172
|
test_files:
|
171
173
|
- spec/functional/service_spec.rb
|
174
|
+
- spec/lib/textrazor/category_spec.rb
|
172
175
|
- spec/lib/textrazor/client_spec.rb
|
173
176
|
- spec/lib/textrazor/configuration_spec.rb
|
174
177
|
- spec/lib/textrazor/entailment_spec.rb
|