textrazor 1.0.1 → 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a5dd726ebc5946e424815cfad69070296b759d2e
4
- data.tar.gz: fda4507e8aa64b77073789d97526fa661aa63c13
3
+ metadata.gz: 97bc6dcda89ce7ef88868906d97b92b0e46a9ef8
4
+ data.tar.gz: f4e7d689ddeb6b41130d0fabb0e020dff8d9eb24
5
5
  SHA512:
6
- metadata.gz: aa872f1ddd89bb8b2273997a068fb026f90504cb610f6a319b32207964041d2b59052648fc77f6538f1263d08c9e07806686e80de30416cc2c099ca01bc97962
7
- data.tar.gz: d3e63881541061cc0a3042e683d2f14cf7780f07563d852737e644ee3a4d99bb515d962c7492f6786df634ffa778e09800f96731327e6b6eeebd0f1ab5cf2671
6
+ metadata.gz: 506f8a61cda18fc018267cee10323dfc2a9bef7a52edacac46924e0cfcc2689e1348f834e67879dff892b8fb36b9e7ea6aa8b638ee9727579ccaafdf8e71b850
7
+ data.tar.gz: 013233c7cd51d4467aeaf5e5a2e5f3960a4acdc1e17724a1b0a2092cab28f732ec59605033cdd890d0481011d9191742f4d89e57788876444c1a3a0fc105e27d
@@ -4,6 +4,7 @@ require "textrazor/util"
4
4
  require "textrazor/client"
5
5
  require "textrazor/request"
6
6
  require "textrazor/response"
7
+ require "textrazor/category"
7
8
  require "textrazor/topic"
8
9
  require "textrazor/entailment"
9
10
  require "textrazor/entity"
@@ -0,0 +1,15 @@
1
+ module TextRazor
2
+
3
+ class Category
4
+
5
+ include Util
6
+
7
+ attr_reader :id, :category_id, :label, :score, :classifier_id
8
+
9
+ def initialize(params = {})
10
+ initialize_params params
11
+ end
12
+
13
+ end
14
+
15
+ end
@@ -17,7 +17,7 @@ module TextRazor
17
17
 
18
18
  REQUEST_OPTIONS = [:extractors, :rules, :cleanup_mode, :cleanup_return_cleaned, :cleanup_return_raw,
19
19
  :language, :dictionaries, :filter_dbpedia_types, :filter_freebase_types, :allow_overlap,
20
- :enrichment_queries]
20
+ :enrichment_queries, :classifiers]
21
21
 
22
22
  attr_reader :response, :api_key, :request_options
23
23
 
@@ -43,6 +43,12 @@ module TextRazor
43
43
  topics
44
44
  end
45
45
 
46
+ def self.categories(api_key, text, options = {})
47
+ new(api_key, options.merge(classifiers: ['textrazor_iab'])).
48
+ analyse(text).
49
+ categories
50
+ end
51
+
46
52
  def self.coarse_topics(api_key, text, options = {})
47
53
  new(api_key, options.merge(extractors: ['topics'])).
48
54
  analyse(text).
@@ -2,21 +2,14 @@ module TextRazor
2
2
 
3
3
  class Entailment
4
4
 
5
- extend Util
5
+ include Util
6
6
 
7
7
  attr_reader :id, :word_positions, :prior_score, :context_score,
8
8
  :score, :entailed_tree, :entailed_words
9
9
 
10
10
  def initialize(params = {})
11
11
  @type = []
12
- params.each do |k, v|
13
- instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
14
- end
15
- end
16
-
17
- def self.create_from_hash(params)
18
- params = Hash[params.map {|k, v| [standardize(k), v] }]
19
- new(params)
12
+ initialize_params params
20
13
  end
21
14
 
22
15
  end
@@ -2,22 +2,15 @@ module TextRazor
2
2
 
3
3
  class Entity
4
4
 
5
- extend Util
5
+ include Util
6
6
 
7
7
  attr_reader :id, :type, :matching_tokens, :entity_id, :freebase_types, :confidence_score,
8
8
  :wiki_link, :matched_text, :freebase_id, :relevance_score, :entity_english_id,
9
- :starting_pos, :ending_pos, :data
9
+ :starting_pos, :ending_pos, :data, :wikidata_id
10
10
 
11
11
  def initialize(params = {})
12
12
  @type = []
13
- params.each do |k, v|
14
- instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
15
- end
16
- end
17
-
18
- def self.create_from_hash(params)
19
- params = Hash[params.map {|k, v| [standardize(k), v] }]
20
- new(params)
13
+ initialize_params params
21
14
  end
22
15
 
23
16
  end
@@ -2,19 +2,12 @@ module TextRazor
2
2
 
3
3
  class Property
4
4
 
5
- extend Util
5
+ include Util
6
6
 
7
7
  attr_reader :id, :word_positions, :property_positions
8
8
 
9
9
  def initialize(params = {})
10
- params.each do |k, v|
11
- instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
12
- end
13
- end
14
-
15
- def self.create_from_hash(params)
16
- params = Hash[params.map {|k, v| [standardize(k), v] }]
17
- new(params)
10
+ initialize_params params
18
11
  end
19
12
 
20
13
  end
@@ -17,7 +17,8 @@ module TextRazor
17
17
  filter_dbpedia_types: 'entities.filterDbpediaTypes',
18
18
  filter_freebase_types: 'entities.filterFreebaseTypes',
19
19
  allow_overlap: 'entities.allowOverlap',
20
- enrichment_queries: 'entities.enrichmentQueries'
20
+ enrichment_queries: 'entities.enrichmentQueries',
21
+ classifiers: 'classifiers'
21
22
  }
22
23
 
23
24
  def self.post(text, options)
@@ -84,6 +84,10 @@ module TextRazor
84
84
  @sentences ||= parse_sentences
85
85
  end
86
86
 
87
+ def categories
88
+ @categories ||= parse_categories
89
+ end
90
+
87
91
  def language
88
92
  raw_response[:language]
89
93
  end
@@ -122,6 +126,10 @@ module TextRazor
122
126
  parse(:topic, raw_response[:topics])
123
127
  end
124
128
 
129
+ def parse_categories
130
+ parse(:category, raw_response[:categories])
131
+ end
132
+
125
133
  def parse_phrases
126
134
  raw_phrases = raw_response[:nounPhrases]
127
135
  return if raw_phrases.nil?
@@ -2,19 +2,12 @@ module TextRazor
2
2
 
3
3
  class Topic
4
4
 
5
- extend Util
5
+ include Util
6
6
 
7
- attr_reader :id, :label, :wiki_link, :score
7
+ attr_reader :id, :label, :wiki_link, :score, :wikidata_id
8
8
 
9
9
  def initialize(params = {})
10
- params.each do |k, v|
11
- instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
12
- end
13
- end
14
-
15
- def self.create_from_hash(params)
16
- params = Hash[params.map {|k, v| [standardize(k), v] }]
17
- new(params)
10
+ initialize_params params
18
11
  end
19
12
 
20
13
  end
@@ -1,11 +1,30 @@
1
1
  module TextRazor
2
2
  module Util
3
- def standardize(param)
4
- param.to_s.gsub(/::/, '/').
5
- gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
6
- gsub(/([a-z\d])([A-Z])/,'\1_\2').
7
- tr("-", "_").
8
- downcase
3
+ def self.included(base)
4
+ base.extend ClassMethods
5
+
6
+ base.class_eval do
7
+ def initialize_params(params)
8
+ params.each do |k, v|
9
+ instance_variable_set(:"@#{k}", v) if self.respond_to?(:"#{k}") && v && (!v.is_a?(String) || !v.empty?)
10
+ end
11
+ end
12
+ end
13
+ end
14
+
15
+ module ClassMethods
16
+ def standardize(param)
17
+ param.to_s.gsub(/::/, '/').
18
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
19
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
20
+ tr("-", "_").
21
+ downcase
22
+ end
23
+
24
+ def create_from_hash(params)
25
+ params = Hash[params.map {|k, v| [standardize(k), v] }]
26
+ new(params)
27
+ end
9
28
  end
10
29
  end
11
30
  end
@@ -1,3 +1,3 @@
1
1
  module TextRazor
2
- VERSION = "1.0.1"
2
+ VERSION = "1.1"
3
3
  end
@@ -1,22 +1,14 @@
1
1
  module TextRazor
2
2
 
3
3
  class Word
4
-
5
- extend Util
4
+ include Util
6
5
 
7
6
  attr_reader :position, :starting_pos, :ending_pos, :stem, :lemma,
8
7
  :token, :part_of_speech, :parent_position
9
8
 
10
9
  def initialize(params = {})
11
10
  @type = []
12
- params.each do |k, v|
13
- instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
14
- end
15
- end
16
-
17
- def self.create_from_hash(params)
18
- params = Hash[params.map {|k, v| [standardize(k), v] }]
19
- new(params)
11
+ initialize_params params
20
12
  end
21
13
 
22
14
  end
@@ -0,0 +1,34 @@
1
+ require "spec_helper"
2
+
3
+ module TextRazor
4
+
5
+ describe Category do
6
+
7
+ context "#create_from_hash" do
8
+ let(:category_hash) do
9
+ {
10
+ "id":0,
11
+ "classifierId":"textrazor_iab",
12
+ "categoryId":"IAB11",
13
+ "label":"Law, Gov’t & Politics",
14
+ "score":0.809611
15
+ }
16
+ end
17
+
18
+ let(:category) do
19
+ Category.create_from_hash(category_hash)
20
+ end
21
+
22
+ it "should create a new instance" do
23
+ expect(category.id).to eq(0)
24
+ expect(category.classifier_id).to eq("textrazor_iab")
25
+ expect(category.category_id).to eq("IAB11")
26
+ expect(category.label).to eq("Law, Gov’t & Politics")
27
+ expect(category.score).to eq(0.809611)
28
+ end
29
+
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -23,6 +23,7 @@ module TextRazor
23
23
  let(:custom_options_client) do
24
24
  Client.new(api_key, {
25
25
  extractors: %w(entities topics words), cleanup_mode: 'raw',
26
+ classifiers: 'textrazor_newscodes',
26
27
  cleanup_return_cleaned: true, cleanup_return_raw: true,
27
28
  filter_dbpedia_types: %w(type1), language: 'fre',
28
29
  filter_freebase_types: %w(type2), allow_overlap: false,
@@ -57,7 +58,8 @@ module TextRazor
57
58
  to eq({extractors: %w(entities topics words), cleanup_mode: 'raw', language: 'fre',
58
59
  cleanup_return_cleaned: true, cleanup_return_raw: true,
59
60
  filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
60
- allow_overlap: false, dictionaries: %w(test)})
61
+ allow_overlap: false, dictionaries: %w(test),
62
+ classifiers: 'textrazor_newscodes'})
61
63
  end
62
64
 
63
65
  end
@@ -125,7 +127,7 @@ module TextRazor
125
127
  with('text', {api_key: 'api_key', extractors: %w(entities topics words), cleanup_mode: 'raw',
126
128
  cleanup_return_cleaned: true, cleanup_return_raw: true, language: 'fre',
127
129
  filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2),
128
- allow_overlap: false, dictionaries: %w(test)}).
130
+ allow_overlap: false, dictionaries: %w(test), classifiers: 'textrazor_newscodes'}).
129
131
  and_return(request)
130
132
 
131
133
  expect(Response).to receive(:new).with(request)
@@ -263,6 +265,25 @@ module TextRazor
263
265
 
264
266
  end
265
267
 
268
+ context ".categories" do
269
+
270
+ it "makes correct calls" do
271
+ client = OpenStruct.new
272
+ response = OpenStruct.new categories: ['Category1']
273
+
274
+ expect(Client).to receive(:new).
275
+ with(api_key, {classifiers: ['textrazor_iab']}).
276
+ and_return(client)
277
+
278
+ expect(client).to receive(:analyse).
279
+ with("text").
280
+ and_return(response)
281
+
282
+ Client.categories(api_key, 'text', {})
283
+ end
284
+
285
+ end
286
+
266
287
  end
267
288
 
268
289
  end
@@ -28,7 +28,8 @@ module TextRazor
28
28
  "endingPos" => 20,
29
29
  "data" => {
30
30
  "type" => ['person', 'company']
31
- }
31
+ },
32
+ "wikidataId" => 'Q7330070'
32
33
  }
33
34
  end
34
35
 
@@ -47,6 +48,7 @@ module TextRazor
47
48
  expect(entity.starting_pos).to eq(3)
48
49
  expect(entity.ending_pos).to eq(20)
49
50
  expect(entity.data['type']).to match_array(['person', 'company'])
51
+ expect(entity.wikidata_id).to eq('Q7330070')
50
52
  end
51
53
  end
52
54
 
@@ -60,13 +60,13 @@ module TextRazor
60
60
  options = {api_key: 'api_key', extractors: %w(entities topics words), cleanup_mode: 'raw',
61
61
  cleanup_return_cleaned: true, cleanup_return_raw: true, language: 'fre',
62
62
  filter_dbpedia_types: %w(type1), filter_freebase_types: %w(type2), allow_overlap: false,
63
- enrichment_queries: 'queries'}
63
+ enrichment_queries: 'queries', classifiers: 'textrazor_iab'}
64
64
 
65
65
  expect(::RestClient).to receive(:post).
66
66
  with("https://api.textrazor.com/", { "text" => 'text', "apiKey" => 'api_key', "extractors" => "entities,topics,words",
67
67
  "cleanup.mode" => "raw", "cleanup.returnCleaned" => true, "cleanup.returnRaw" => true, "languageOverride" => 'fre',
68
68
  "entities.filterDbpediaTypes" => "type1", "entities.filterFreebaseTypes" => "type2" , "entities.allowOverlap" => false,
69
- "entities.enrichmentQueries" => "queries"},
69
+ "entities.enrichmentQueries" => "queries", "classifiers" => 'textrazor_iab'},
70
70
  accept_encoding: 'gzip')
71
71
 
72
72
  Request.post('text', options)
@@ -441,6 +441,73 @@ module TextRazor
441
441
 
442
442
  end
443
443
 
444
+ describe "#categories" do
445
+
446
+ let(:http_response) do
447
+ ::OpenStruct.new(code: 200, body: body)
448
+ end
449
+
450
+ let(:response) do
451
+ Response.new(http_response)
452
+ end
453
+
454
+ context "if there are categories returned from api" do
455
+
456
+ let(:body) do
457
+ {
458
+ "time" => "0.013219",
459
+ "response" => {
460
+ "language" => "eng",
461
+ "languageIsReliable" => true,
462
+ "categories" => [
463
+ {
464
+ "id" => 0,
465
+ "classifierId" => "textrazor_iab",
466
+ "categoryId" => "IAB11",
467
+ "label" => "Law, Gov’t & Politics",
468
+ "score" => 0.809611
469
+ },
470
+ {
471
+ "id" => 1,
472
+ "classifierId" => "textrazor_iab",
473
+ "categoryId" => "IAB11-2",
474
+ "label" => "Law, Gov’t & Politics>Legal Issues",
475
+ "score" => 0.61239
476
+ }
477
+ ]
478
+ }
479
+ }.to_json
480
+ end
481
+
482
+ it "returns categories" do
483
+ categories = response.categories
484
+
485
+ expect(categories).to_not be_nil
486
+ expect(categories.size).to eq(2)
487
+ end
488
+
489
+ end
490
+
491
+ context "if there are no categories returned from api" do
492
+
493
+ let(:body) do
494
+ {
495
+ "time" => "0.013219",
496
+ "response" => {
497
+ "language" => "eng",
498
+ "languageIsReliable" => true
499
+ }
500
+ }.to_json
501
+ end
502
+
503
+ it "returns nil" do
504
+ expect(response.categories).to be_nil
505
+ end
506
+
507
+ end
508
+
509
+ end
510
+
444
511
  describe "#words" do
445
512
 
446
513
  let(:http_response) do
@@ -10,7 +10,8 @@ module TextRazor
10
10
  id: 1,
11
11
  label: "Sports",
12
12
  wikiLink: "link_to_wiki",
13
- score: 1.03589
13
+ score: 1.03589,
14
+ wikidataId: "Q042"
14
15
  }
15
16
  end
16
17
 
@@ -23,6 +24,7 @@ module TextRazor
23
24
  expect(topic.label).to eq("Sports")
24
25
  expect(topic.wiki_link).to eq("link_to_wiki")
25
26
  expect(topic.score).to eq(1.03589)
27
+ expect(topic.wikidata_id).to eq("Q042")
26
28
  end
27
29
 
28
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textrazor
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: '1.1'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Anuj Dutta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-15 00:00:00.000000000 Z
11
+ date: 2016-05-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -111,6 +111,7 @@ files:
111
111
  - README.md
112
112
  - Rakefile
113
113
  - lib/textrazor.rb
114
+ - lib/textrazor/category.rb
114
115
  - lib/textrazor/client.rb
115
116
  - lib/textrazor/configuration.rb
116
117
  - lib/textrazor/entailment.rb
@@ -127,6 +128,7 @@ files:
127
128
  - lib/textrazor/version.rb
128
129
  - lib/textrazor/word.rb
129
130
  - spec/functional/service_spec.rb
131
+ - spec/lib/textrazor/category_spec.rb
130
132
  - spec/lib/textrazor/client_spec.rb
131
133
  - spec/lib/textrazor/configuration_spec.rb
132
134
  - spec/lib/textrazor/entailment_spec.rb
@@ -163,12 +165,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
165
  version: '0'
164
166
  requirements: []
165
167
  rubyforge_project:
166
- rubygems_version: 2.2.5
168
+ rubygems_version: 2.4.5.1
167
169
  signing_key:
168
170
  specification_version: 4
169
171
  summary: An api wrapper for text razor in ruby
170
172
  test_files:
171
173
  - spec/functional/service_spec.rb
174
+ - spec/lib/textrazor/category_spec.rb
172
175
  - spec/lib/textrazor/client_spec.rb
173
176
  - spec/lib/textrazor/configuration_spec.rb
174
177
  - spec/lib/textrazor/entailment_spec.rb