textrazor 1.0.1 → 1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/textrazor.rb +1 -0
- data/lib/textrazor/category.rb +15 -0
- data/lib/textrazor/client.rb +7 -1
- data/lib/textrazor/entailment.rb +2 -9
- data/lib/textrazor/entity.rb +3 -10
- data/lib/textrazor/property.rb +2 -9
- data/lib/textrazor/request.rb +2 -1
- data/lib/textrazor/response.rb +8 -0
- data/lib/textrazor/topic.rb +3 -10
- data/lib/textrazor/util.rb +25 -6
- data/lib/textrazor/version.rb +1 -1
- data/lib/textrazor/word.rb +2 -10
- data/spec/lib/textrazor/category_spec.rb +34 -0
- data/spec/lib/textrazor/client_spec.rb +23 -2
- data/spec/lib/textrazor/entity_spec.rb +3 -1
- data/spec/lib/textrazor/request_spec.rb +2 -2
- data/spec/lib/textrazor/response_spec.rb +67 -0
- data/spec/lib/textrazor/topic_spec.rb +3 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97bc6dcda89ce7ef88868906d97b92b0e46a9ef8
|
4
|
+
data.tar.gz: f4e7d689ddeb6b41130d0fabb0e020dff8d9eb24
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 506f8a61cda18fc018267cee10323dfc2a9bef7a52edacac46924e0cfcc2689e1348f834e67879dff892b8fb36b9e7ea6aa8b638ee9727579ccaafdf8e71b850
|
7
|
+
data.tar.gz: 013233c7cd51d4467aeaf5e5a2e5f3960a4acdc1e17724a1b0a2092cab28f732ec59605033cdd890d0481011d9191742f4d89e57788876444c1a3a0fc105e27d
|
data/lib/textrazor.rb
CHANGED
data/lib/textrazor/client.rb
CHANGED
@@ -17,7 +17,7 @@ module TextRazor
|
|
17
17
|
|
18
18
|
REQUEST_OPTIONS = [:extractors, :rules, :cleanup_mode, :cleanup_return_cleaned, :cleanup_return_raw,
|
19
19
|
:language, :dictionaries, :filter_dbpedia_types, :filter_freebase_types, :allow_overlap,
|
20
|
-
:enrichment_queries]
|
20
|
+
:enrichment_queries, :classifiers]
|
21
21
|
|
22
22
|
attr_reader :response, :api_key, :request_options
|
23
23
|
|
@@ -43,6 +43,12 @@ module TextRazor
|
|
43
43
|
topics
|
44
44
|
end
|
45
45
|
|
46
|
+
def self.categories(api_key, text, options = {})
|
47
|
+
new(api_key, options.merge(classifiers: ['textrazor_iab'])).
|
48
|
+
analyse(text).
|
49
|
+
categories
|
50
|
+
end
|
51
|
+
|
46
52
|
def self.coarse_topics(api_key, text, options = {})
|
47
53
|
new(api_key, options.merge(extractors: ['topics'])).
|
48
54
|
analyse(text).
|
data/lib/textrazor/entailment.rb
CHANGED
@@ -2,21 +2,14 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Entailment
|
4
4
|
|
5
|
-
|
5
|
+
include Util
|
6
6
|
|
7
7
|
attr_reader :id, :word_positions, :prior_score, :context_score,
|
8
8
|
:score, :entailed_tree, :entailed_words
|
9
9
|
|
10
10
|
def initialize(params = {})
|
11
11
|
@type = []
|
12
|
-
params
|
13
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.create_from_hash(params)
|
18
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
19
|
-
new(params)
|
12
|
+
initialize_params params
|
20
13
|
end
|
21
14
|
|
22
15
|
end
|
data/lib/textrazor/entity.rb
CHANGED
@@ -2,22 +2,15 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Entity
|
4
4
|
|
5
|
-
|
5
|
+
include Util
|
6
6
|
|
7
7
|
attr_reader :id, :type, :matching_tokens, :entity_id, :freebase_types, :confidence_score,
|
8
8
|
:wiki_link, :matched_text, :freebase_id, :relevance_score, :entity_english_id,
|
9
|
-
:starting_pos, :ending_pos, :data
|
9
|
+
:starting_pos, :ending_pos, :data, :wikidata_id
|
10
10
|
|
11
11
|
def initialize(params = {})
|
12
12
|
@type = []
|
13
|
-
params
|
14
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.create_from_hash(params)
|
19
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
20
|
-
new(params)
|
13
|
+
initialize_params params
|
21
14
|
end
|
22
15
|
|
23
16
|
end
|
data/lib/textrazor/property.rb
CHANGED
@@ -2,19 +2,12 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Property
|
4
4
|
|
5
|
-
|
5
|
+
include Util
|
6
6
|
|
7
7
|
attr_reader :id, :word_positions, :property_positions
|
8
8
|
|
9
9
|
def initialize(params = {})
|
10
|
-
params
|
11
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.create_from_hash(params)
|
16
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
17
|
-
new(params)
|
10
|
+
initialize_params params
|
18
11
|
end
|
19
12
|
|
20
13
|
end
|
data/lib/textrazor/request.rb
CHANGED
@@ -17,7 +17,8 @@ module TextRazor
|
|
17
17
|
filter_dbpedia_types: 'entities.filterDbpediaTypes',
|
18
18
|
filter_freebase_types: 'entities.filterFreebaseTypes',
|
19
19
|
allow_overlap: 'entities.allowOverlap',
|
20
|
-
enrichment_queries: 'entities.enrichmentQueries'
|
20
|
+
enrichment_queries: 'entities.enrichmentQueries',
|
21
|
+
classifiers: 'classifiers'
|
21
22
|
}
|
22
23
|
|
23
24
|
def self.post(text, options)
|
data/lib/textrazor/response.rb
CHANGED
@@ -84,6 +84,10 @@ module TextRazor
|
|
84
84
|
@sentences ||= parse_sentences
|
85
85
|
end
|
86
86
|
|
87
|
+
def categories
|
88
|
+
@categories ||= parse_categories
|
89
|
+
end
|
90
|
+
|
87
91
|
def language
|
88
92
|
raw_response[:language]
|
89
93
|
end
|
@@ -122,6 +126,10 @@ module TextRazor
|
|
122
126
|
parse(:topic, raw_response[:topics])
|
123
127
|
end
|
124
128
|
|
129
|
+
def parse_categories
|
130
|
+
parse(:category, raw_response[:categories])
|
131
|
+
end
|
132
|
+
|
125
133
|
def parse_phrases
|
126
134
|
raw_phrases = raw_response[:nounPhrases]
|
127
135
|
return if raw_phrases.nil?
|
data/lib/textrazor/topic.rb
CHANGED
@@ -2,19 +2,12 @@ module TextRazor
|
|
2
2
|
|
3
3
|
class Topic
|
4
4
|
|
5
|
-
|
5
|
+
include Util
|
6
6
|
|
7
|
-
attr_reader :id, :label, :wiki_link, :score
|
7
|
+
attr_reader :id, :label, :wiki_link, :score, :wikidata_id
|
8
8
|
|
9
9
|
def initialize(params = {})
|
10
|
-
params
|
11
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def self.create_from_hash(params)
|
16
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
17
|
-
new(params)
|
10
|
+
initialize_params params
|
18
11
|
end
|
19
12
|
|
20
13
|
end
|
data/lib/textrazor/util.rb
CHANGED
@@ -1,11 +1,30 @@
|
|
1
1
|
module TextRazor
|
2
2
|
module Util
|
3
|
-
def
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
def self.included(base)
|
4
|
+
base.extend ClassMethods
|
5
|
+
|
6
|
+
base.class_eval do
|
7
|
+
def initialize_params(params)
|
8
|
+
params.each do |k, v|
|
9
|
+
instance_variable_set(:"@#{k}", v) if self.respond_to?(:"#{k}") && v && (!v.is_a?(String) || !v.empty?)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module ClassMethods
|
16
|
+
def standardize(param)
|
17
|
+
param.to_s.gsub(/::/, '/').
|
18
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
19
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
20
|
+
tr("-", "_").
|
21
|
+
downcase
|
22
|
+
end
|
23
|
+
|
24
|
+
def create_from_hash(params)
|
25
|
+
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
26
|
+
new(params)
|
27
|
+
end
|
9
28
|
end
|
10
29
|
end
|
11
30
|
end
|
data/lib/textrazor/version.rb
CHANGED
data/lib/textrazor/word.rb
CHANGED
@@ -1,22 +1,14 @@
|
|
1
1
|
module TextRazor
|
2
2
|
|
3
3
|
class Word
|
4
|
-
|
5
|
-
extend Util
|
4
|
+
include Util
|
6
5
|
|
7
6
|
attr_reader :position, :starting_pos, :ending_pos, :stem, :lemma,
|
8
7
|
:token, :part_of_speech, :parent_position
|
9
8
|
|
10
9
|
def initialize(params = {})
|
11
10
|
@type = []
|
12
|
-
params
|
13
|
-
instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.create_from_hash(params)
|
18
|
-
params = Hash[params.map {|k, v| [standardize(k), v] }]
|
19
|
-
new(params)
|
11
|
+
initialize_params params
|
20
12
|
end
|
21
13
|
|
22
14
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
module TextRazor
|
4
|
+
|
5
|
+
describe Category do
|
6
|
+
|
7
|
+
context "#create_from_hash" do
|
8
|
+
let(:category_hash) do
|
9
|
+
{
|
10
|
+
"id":0,
|
11
|
+
"classifierId":"textrazor_iab",
|
12
|
+
"categoryId":"IAB11",
|
13
|
+
"label":"Law, Gov’t & Politics",
|
14
|
+
"score":0.809611
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
let(:category) do
|
19
|
+
Category.create_from_hash(category_hash)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should create a new instance" do
|
23
|
+
expect(category.id).to eq(0)
|
24
|
+
expect(category.classifier_id).to eq("textrazor_iab")
|
25
|
+
expect(category.category_id).to eq("IAB11")
|
26
|
+
expect(category.label).to eq("Law, Gov’t & Politics")
|
27
|
+
expect(category.score).to eq(0.809611)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -23,6 +23,7 @@ module TextRazor
|
|
23
23
|
let(:custom_options_client) do
|
24
24
|
Client.new(api_key, {
|
25
25
|
extractors: %w(entities topics words), cleanup_mode: 'raw',
|
26
|
+
classifiers: 'textrazor_newscodes',
|
26
27
|
cleanup_return_cleaned: true, cleanup_return_raw: true,
|
27
28
|
filter_dbpedia_types: %w(type1), language: 'fre',
|
28
29
|
filter_freebase_types: %w(type2), allow_overlap: false,
|
@@ -57,7 +58,8 @@ module TextRazor
|
|
57
58
|
to eq({extractors: %w(entities topics words), cleanup_mode: 'raw', language: 'fre',
|
58
59
|
cleanup_return_cleaned: true, cleanup_return_raw: true,
|
59
60
|
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
|
60
|
-
allow_overlap: false, dictionaries: %w(test)
|
61
|
+
allow_overlap: false, dictionaries: %w(test),
|
62
|
+
classifiers: 'textrazor_newscodes'})
|
61
63
|
end
|
62
64
|
|
63
65
|
end
|
@@ -125,7 +127,7 @@ module TextRazor
|
|
125
127
|
with('text', {api_key: 'api_key', extractors: %w(entities topics words), cleanup_mode: 'raw',
|
126
128
|
cleanup_return_cleaned: true, cleanup_return_raw: true, language: 'fre',
|
127
129
|
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
|
128
|
-
allow_overlap: false, dictionaries: %w(test)}).
|
130
|
+
allow_overlap: false, dictionaries: %w(test), classifiers: 'textrazor_newscodes'}).
|
129
131
|
and_return(request)
|
130
132
|
|
131
133
|
expect(Response).to receive(:new).with(request)
|
@@ -263,6 +265,25 @@ module TextRazor
|
|
263
265
|
|
264
266
|
end
|
265
267
|
|
268
|
+
context ".categories" do
|
269
|
+
|
270
|
+
it "makes correct calls" do
|
271
|
+
client = OpenStruct.new
|
272
|
+
response = OpenStruct.new categories: ['Category1']
|
273
|
+
|
274
|
+
expect(Client).to receive(:new).
|
275
|
+
with(api_key, {classifiers: ['textrazor_iab']}).
|
276
|
+
and_return(client)
|
277
|
+
|
278
|
+
expect(client).to receive(:analyse).
|
279
|
+
with("text").
|
280
|
+
and_return(response)
|
281
|
+
|
282
|
+
Client.categories(api_key, 'text', {})
|
283
|
+
end
|
284
|
+
|
285
|
+
end
|
286
|
+
|
266
287
|
end
|
267
288
|
|
268
289
|
end
|
@@ -28,7 +28,8 @@ module TextRazor
|
|
28
28
|
"endingPos" => 20,
|
29
29
|
"data" => {
|
30
30
|
"type" => ['person', 'company']
|
31
|
-
}
|
31
|
+
},
|
32
|
+
"wikidataId" => 'Q7330070'
|
32
33
|
}
|
33
34
|
end
|
34
35
|
|
@@ -47,6 +48,7 @@ module TextRazor
|
|
47
48
|
expect(entity.starting_pos).to eq(3)
|
48
49
|
expect(entity.ending_pos).to eq(20)
|
49
50
|
expect(entity.data['type']).to match_array(['person', 'company'])
|
51
|
+
expect(entity.wikidata_id).to eq('Q7330070')
|
50
52
|
end
|
51
53
|
end
|
52
54
|
|
@@ -60,13 +60,13 @@ module TextRazor
|
|
60
60
|
options = {api_key: 'api_key', extractors: %w(entities topics words), cleanup_mode: 'raw',
|
61
61
|
cleanup_return_cleaned: true, cleanup_return_raw: true, language: 'fre',
|
62
62
|
filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2), allow_overlap: false,
|
63
|
-
enrichment_queries: 'queries'}
|
63
|
+
enrichment_queries: 'queries', classifiers: 'textrazor_iab'}
|
64
64
|
|
65
65
|
expect(::RestClient).to receive(:post).
|
66
66
|
with("https://api.textrazor.com/", { "text" => 'text', "apiKey" => 'api_key', "extractors" => "entities,topics,words",
|
67
67
|
"cleanup.mode" => "raw", "cleanup.returnCleaned" => true, "cleanup.returnRaw" => true, "languageOverride" => 'fre',
|
68
68
|
"entities.filterDbpediaTypes" => "type1", "entities.filterFreebaseTypes" => "type2" , "entities.allowOverlap" => false,
|
69
|
-
"entities.enrichmentQueries" => "queries"},
|
69
|
+
"entities.enrichmentQueries" => "queries", "classifiers" => 'textrazor_iab'},
|
70
70
|
accept_encoding: 'gzip')
|
71
71
|
|
72
72
|
Request.post('text', options)
|
@@ -441,6 +441,73 @@ module TextRazor
|
|
441
441
|
|
442
442
|
end
|
443
443
|
|
444
|
+
describe "#categories" do
|
445
|
+
|
446
|
+
let(:http_response) do
|
447
|
+
::OpenStruct.new(code: 200, body: body)
|
448
|
+
end
|
449
|
+
|
450
|
+
let(:response) do
|
451
|
+
Response.new(http_response)
|
452
|
+
end
|
453
|
+
|
454
|
+
context "if there are categories returned from api" do
|
455
|
+
|
456
|
+
let(:body) do
|
457
|
+
{
|
458
|
+
"time" => "0.013219",
|
459
|
+
"response" => {
|
460
|
+
"language" => "eng",
|
461
|
+
"languageIsReliable" => true,
|
462
|
+
"categories" => [
|
463
|
+
{
|
464
|
+
"id" => 0,
|
465
|
+
"classifierId" => "textrazor_iab",
|
466
|
+
"categoryId" => "IAB11",
|
467
|
+
"label" => "Law, Gov’t & Politics",
|
468
|
+
"score" => 0.809611
|
469
|
+
},
|
470
|
+
{
|
471
|
+
"id" => 1,
|
472
|
+
"classifierId" => "textrazor_iab",
|
473
|
+
"categoryId" => "IAB11-2",
|
474
|
+
"label" => "Law, Gov’t & Politics>Legal Issues",
|
475
|
+
"score" => 0.61239
|
476
|
+
}
|
477
|
+
]
|
478
|
+
}
|
479
|
+
}.to_json
|
480
|
+
end
|
481
|
+
|
482
|
+
it "returns categories" do
|
483
|
+
categories = response.categories
|
484
|
+
|
485
|
+
expect(categories).to_not be_nil
|
486
|
+
expect(categories.size).to eq(2)
|
487
|
+
end
|
488
|
+
|
489
|
+
end
|
490
|
+
|
491
|
+
context "if there are no categories returned from api" do
|
492
|
+
|
493
|
+
let(:body) do
|
494
|
+
{
|
495
|
+
"time" => "0.013219",
|
496
|
+
"response" => {
|
497
|
+
"language" => "eng",
|
498
|
+
"languageIsReliable" => true
|
499
|
+
}
|
500
|
+
}.to_json
|
501
|
+
end
|
502
|
+
|
503
|
+
it "returns nil" do
|
504
|
+
expect(response.categories).to be_nil
|
505
|
+
end
|
506
|
+
|
507
|
+
end
|
508
|
+
|
509
|
+
end
|
510
|
+
|
444
511
|
describe "#words" do
|
445
512
|
|
446
513
|
let(:http_response) do
|
@@ -10,7 +10,8 @@ module TextRazor
|
|
10
10
|
id: 1,
|
11
11
|
label: "Sports",
|
12
12
|
wikiLink: "link_to_wiki",
|
13
|
-
score: 1.03589
|
13
|
+
score: 1.03589,
|
14
|
+
wikidataId: "Q042"
|
14
15
|
}
|
15
16
|
end
|
16
17
|
|
@@ -23,6 +24,7 @@ module TextRazor
|
|
23
24
|
expect(topic.label).to eq("Sports")
|
24
25
|
expect(topic.wiki_link).to eq("link_to_wiki")
|
25
26
|
expect(topic.score).to eq(1.03589)
|
27
|
+
expect(topic.wikidata_id).to eq("Q042")
|
26
28
|
end
|
27
29
|
|
28
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textrazor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: '1.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anuj Dutta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- README.md
|
112
112
|
- Rakefile
|
113
113
|
- lib/textrazor.rb
|
114
|
+
- lib/textrazor/category.rb
|
114
115
|
- lib/textrazor/client.rb
|
115
116
|
- lib/textrazor/configuration.rb
|
116
117
|
- lib/textrazor/entailment.rb
|
@@ -127,6 +128,7 @@ files:
|
|
127
128
|
- lib/textrazor/version.rb
|
128
129
|
- lib/textrazor/word.rb
|
129
130
|
- spec/functional/service_spec.rb
|
131
|
+
- spec/lib/textrazor/category_spec.rb
|
130
132
|
- spec/lib/textrazor/client_spec.rb
|
131
133
|
- spec/lib/textrazor/configuration_spec.rb
|
132
134
|
- spec/lib/textrazor/entailment_spec.rb
|
@@ -163,12 +165,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
165
|
version: '0'
|
164
166
|
requirements: []
|
165
167
|
rubyforge_project:
|
166
|
-
rubygems_version: 2.
|
168
|
+
rubygems_version: 2.4.5.1
|
167
169
|
signing_key:
|
168
170
|
specification_version: 4
|
169
171
|
summary: An api wrapper for text razor in ruby
|
170
172
|
test_files:
|
171
173
|
- spec/functional/service_spec.rb
|
174
|
+
- spec/lib/textrazor/category_spec.rb
|
172
175
|
- spec/lib/textrazor/client_spec.rb
|
173
176
|
- spec/lib/textrazor/configuration_spec.rb
|
174
177
|
- spec/lib/textrazor/entailment_spec.rb
|