textrazor 0.0.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a18bfa0d8979cd6af68c481ff1200802d989dfc
4
- data.tar.gz: 71666356571e6e4d6cad269ad6f7f6da306d90d0
3
+ metadata.gz: 2996a5abb8a9c7d3113db4c9f64daf10c20e7360
4
+ data.tar.gz: cf7010d2573ada02239d021a960e6543627f5ea5
5
5
  SHA512:
6
- metadata.gz: 653c8e38cf0d1ab4b42796eec8853bc1104c5122ba6e87e6073f74fb2e75b76b75911136a2583f0f0320ab93711da5d269a6ee67280a6c90b3884ca1b7c24961
7
- data.tar.gz: c65e0182196f36cf12a080772dd4953ec8d239a6bc66cd3897ef97e2bb83c8bdea5823ef14c91e76d0249980628ca762ae734f1d445c3238f6a5de86c88bc990
6
+ metadata.gz: cd8f18e796b07f1ac921c95bf45d1d829a913c2442e0155dbab01d863f4f1ce0611856eb228766ebc192aafcbb85f2ae61d012eeaa8a6079dc16a7b6139ebf99
7
+ data.tar.gz: 362e74c53aef5bbe198ef7b79a95ff179d605b20e89a43de05d47669e4b07f3515476d4756aa471bd8064cc1625eab720dcfd53ef9df30c1b0645aa426c59674
@@ -0,0 +1 @@
1
+ textrazor
@@ -0,0 +1 @@
1
+ 2.2.0
data/README.md CHANGED
@@ -4,8 +4,6 @@ This is a gem wrapper for TextRazor REST API reference.
4
4
 
5
5
  ## Installation
6
6
 
7
-
8
-
9
7
  Add this line to your application's Gemfile:
10
8
 
11
9
  gem 'textrazor', :git => 'git://github.com/andhapp/textrazor.git'
@@ -76,6 +74,32 @@ TextRazor.phrases('api_key', 'text')
76
74
  Only implemented this for topics, entities, words and phrases. Also, implement
77
75
  it for other information that we can retrieve from the public API.
78
76
 
77
+ ### API Issues (To investigate)
78
+
79
+ #### Response
80
+
81
+ * error - Descriptive error message of any problems that may have occurred during analysis, or an empty string if there was no error.
82
+
83
+ Missing from the successful response.
84
+
85
+ * message - Any warning or informational messages returned from the server, or an empty string if there was no message.
86
+
87
+ Missing from the successful response.
88
+
89
+ * cleanedText
90
+
91
+ Missing from the successful response.
92
+
93
+ * customAnnotationOutput
94
+
95
+ Missing from the successful response.
96
+
97
+ ### Specs
98
+
99
+ #### Prolog rules
100
+
101
+ Specs around custom prolog rules need to be added.
102
+
79
103
 
80
104
  ## Contributing
81
105
 
data/Rakefile CHANGED
@@ -2,6 +2,14 @@ require 'bundler/gem_tasks'
2
2
  require 'bundler/setup'
3
3
  require 'rspec/core/rake_task'
4
4
 
5
- RSpec::Core::RakeTask.new(:spec)
5
+ desc 'Run all the specs'
6
+ RSpec::Core::RakeTask.new(:spec) do |task|
7
+ task.pattern = "spec/lib/**/*_spec.rb"
8
+ end
6
9
 
7
- task :default => :spec
10
+ desc 'Run all the functional specs'
11
+ RSpec::Core::RakeTask.new(:specf) do |task|
12
+ task.pattern = "spec/functional/*_spec.rb"
13
+ end
14
+
15
+ task :default => [:spec, :specf]
@@ -5,9 +5,14 @@ require "textrazor/client"
5
5
  require "textrazor/request"
6
6
  require "textrazor/response"
7
7
  require "textrazor/topic"
8
+ require "textrazor/entailment"
8
9
  require "textrazor/entity"
9
10
  require "textrazor/word"
10
11
  require "textrazor/phrase"
12
+ require "textrazor/property"
13
+ require "textrazor/sentence"
14
+ require "textrazor/relation_param"
15
+ require "textrazor/relation"
11
16
 
12
17
  module TextRazor
13
18
 
@@ -5,15 +5,24 @@ module TextRazor
5
5
  EmptyApiKey = Class.new(StandardError)
6
6
  EmptyText = Class.new(StandardError)
7
7
  TextTooLong = Class.new(StandardError)
8
+ UnsupportedExtractor = Class.new(StandardError)
9
+ UnsupportedCleanupMode = Class.new(StandardError)
8
10
 
9
11
  DEFAULT_EXTRACTORS = ['entities', 'topics', 'words', 'phrases', 'dependency-trees',
10
12
  'relations', 'entailments', 'senses']
11
13
 
12
- REQUEST_OPTIONS = [:extractors, :cleanup_html, :language,
13
- :filter_dbpedia_types, :filter_freebase_types]
14
+ DEFAULT_CLEANUP_MODE = 'raw'
15
+
16
+ VALID_CLEANUP_MODE_VALUES = [DEFAULT_CLEANUP_MODE, 'stripTags', 'cleanHTML']
17
+
18
+ REQUEST_OPTIONS = [:extractors, :rules, :cleanup_mode, :cleanup_return_cleaned, :cleanup_return_raw,
19
+ :language, :filter_dbpedia_types, :filter_freebase_types, :allow_overlap,
20
+ :enrichment_queries]
14
21
 
15
22
  attr_reader :response, :api_key, :request_options
16
23
 
24
+ private_constant :DEFAULT_EXTRACTORS, :VALID_CLEANUP_MODE_VALUES, :DEFAULT_CLEANUP_MODE, :REQUEST_OPTIONS
25
+
17
26
  def initialize(api_key, options = {})
18
27
  assign_api_key(api_key)
19
28
  assign_request_options(options)
@@ -21,7 +30,9 @@ module TextRazor
21
30
 
22
31
  def analyse(text)
23
32
  assert_text(text)
24
- options = {api_key: api_key}.merge(request_options)
33
+ options = {
34
+ api_key: api_key
35
+ }.merge(request_options)
25
36
 
26
37
  Response.new(Request.post(text, options))
27
38
  end
@@ -67,9 +78,31 @@ module TextRazor
67
78
  end
68
79
 
69
80
  def assign_request_options(options)
70
- @request_options = { extractors: DEFAULT_EXTRACTORS }
81
+ extractors = options.delete(:extractors)
82
+ assert_extractors(extractors)
83
+
84
+ cleanup_mode = options.delete(:cleanup_mode)
85
+ assert_cleanup_mode(cleanup_mode)
86
+
87
+ @request_options = {
88
+ extractors: extractors || DEFAULT_EXTRACTORS,
89
+ cleanup_mode: cleanup_mode || DEFAULT_CLEANUP_MODE
90
+ }
91
+
71
92
  REQUEST_OPTIONS.each do |key|
72
- @request_options[key] = options[key] if options[key]
93
+ @request_options[key] = options[key] unless options[key].nil?
94
+ end
95
+ end
96
+
97
+ def assert_extractors(extractors)
98
+ if extractors && !extractors.all? { |extractor| DEFAULT_EXTRACTORS.include?(extractor) }
99
+ raise UnsupportedExtractor.new('Unsupported extractor')
100
+ end
101
+ end
102
+
103
+ def assert_cleanup_mode(cleanup_mode)
104
+ if cleanup_mode && !VALID_CLEANUP_MODE_VALUES.include?(cleanup_mode)
105
+ raise UnsupportedCleanupMode.new('Unsupported clean up mode')
73
106
  end
74
107
  end
75
108
 
@@ -6,4 +6,4 @@ module TextRazor
6
6
  @secure = true
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -0,0 +1,24 @@
1
+ module TextRazor
2
+
3
+ class Entailment
4
+
5
+ extend Util
6
+
7
+ attr_reader :id, :word_positions, :prior_score, :context_score,
8
+ :score, :entailed_tree, :entailed_words
9
+
10
+ def initialize(params = {})
11
+ @type = []
12
+ params.each do |k, v|
13
+ instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
14
+ end
15
+ end
16
+
17
+ def self.create_from_hash(params)
18
+ params = Hash[params.map {|k, v| [standardize(k), v] }]
19
+ new(params)
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,22 @@
1
+ module TextRazor
2
+
3
+ class Property
4
+
5
+ extend Util
6
+
7
+ attr_reader :id, :word_positions, :property_positions
8
+
9
+ def initialize(params = {})
10
+ params.each do |k, v|
11
+ instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
12
+ end
13
+ end
14
+
15
+ def self.create_from_hash(params)
16
+ params = Hash[params.map {|k, v| [standardize(k), v] }]
17
+ new(params)
18
+ end
19
+
20
+ end
21
+
22
+ end
@@ -0,0 +1,25 @@
1
+ module TextRazor
2
+
3
+ class Relation
4
+
5
+ attr_reader :id, :word_positions, :relation_params
6
+
7
+ def initialize(params = {})
8
+ @id = params[:id]
9
+ @word_positions = params[:wordPositions]
10
+ @relation_params = params[:params].map do |relation_param_hash|
11
+ RelationParam.create_from_hash(relation_param_hash)
12
+ end
13
+ end
14
+
15
+ def number_of_relation_params
16
+ @relation_params.size
17
+ end
18
+
19
+ def self.create_from_hash(params)
20
+ new(params)
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,18 @@
1
+ module TextRazor
2
+
3
+ class RelationParam
4
+
5
+ attr_reader :relation, :word_positions
6
+
7
+ def initialize(params = {})
8
+ @relation = params[:relation]
9
+ @word_positions = params[:wordPositions]
10
+ end
11
+
12
+ def self.create_from_hash(params)
13
+ new(params)
14
+ end
15
+
16
+ end
17
+
18
+ end
@@ -9,10 +9,14 @@ module TextRazor
9
9
 
10
10
  OPTIONS_MAPPING = {
11
11
  extractors: 'extractors',
12
- cleanup_html: 'cleanupHTML',
12
+ cleanup_mode: 'cleanup.mode',
13
+ cleanup_return_cleaned: 'cleanup.returnCleaned',
14
+ cleanup_return_raw: 'cleanup.returnRaw',
13
15
  language: 'languageOverride',
14
16
  filter_dbpedia_types: 'entities.filterDbpediaTypes',
15
- filter_freebase_types: 'entities.filterFreebaseTypes'
17
+ filter_freebase_types: 'entities.filterFreebaseTypes',
18
+ allow_overlap: 'entities.allowOverlap',
19
+ enrichment_queries: 'entities.enrichmentQueries'
16
20
  }
17
21
 
18
22
  def self.post(text, options)
@@ -8,7 +8,7 @@ module TextRazor
8
8
  Unauthorised = Class.new(StandardError)
9
9
  RequestEntityTooLong = Class.new(StandardError)
10
10
 
11
- attr_reader :raw_response
11
+ attr_reader :raw_response, :time
12
12
 
13
13
  def initialize(http_response)
14
14
  code = http_response.code
@@ -18,45 +18,78 @@ module TextRazor
18
18
  raise Unauthorised.new(body) if unauthorised?(code)
19
19
  raise RequestEntityTooLong.new(body) if request_entity_too_long?(code)
20
20
 
21
- @raw_response = ::JSON.parse(body)["response"]
21
+ json_body = ::JSON::parse(body, symbolize_names: true)
22
+
23
+ @time = json_body[:time].to_f
24
+ @ok = json_body[:ok]
25
+ @raw_response = json_body[:response]
22
26
  end
23
27
 
24
- def topics
25
- @topics ||= parse_topics(raw_response["topics"])
28
+ def ok?
29
+ @ok
26
30
  end
27
31
 
28
- def coarse_topics
29
- @coarse_topics ||= parse_topics(raw_response["coarseTopics"])
32
+ #TODO: Not in a successful response
33
+ #def error
34
+ #end
35
+
36
+ #def message
37
+ #end
38
+
39
+ def custom_annotation_output
40
+ @custom_annotation_output ||= raw_response[:customAnnotationOutput]
41
+ end
42
+
43
+ def cleaned_text
44
+ @cleaned_text ||= raw_response[:cleanedText]
45
+ end
46
+
47
+ def raw_text
48
+ @raw_text||= raw_response[:rawText]
49
+ end
50
+
51
+ def entailments
52
+ @entailments ||= parse_entailments
30
53
  end
31
54
 
32
55
  def entities
33
- raw_entities = raw_response["entities"]
34
- return nil if raw_entities.nil?
56
+ @entities ||= parse_entities
57
+ end
35
58
 
36
- @entities ||= begin
37
- raw_entities.map do |entity_hash|
38
- Entity.create_from_hash(entity_hash)
39
- end
40
- end
59
+ def coarse_topics
60
+ @coarse_topics ||= parse_coarse_topics
41
61
  end
42
62
 
43
- def words
44
- raw_sentences = raw_response["sentences"]
45
- return nil if raw_sentences.nil?
46
-
47
- @words ||= begin
48
- words = []
49
- raw_sentences.each do |sentence_hash|
50
- sentence_hash["words"].each do |word_hash|
51
- words << Word.create_from_hash(word_hash)
52
- end
53
- end
54
- words
55
- end
63
+ def topics
64
+ @topics ||= parse_topics
56
65
  end
57
66
 
58
67
  def phrases
59
- @phrases ||= parse_phrases(raw_response["nounPhrases"], words)
68
+ @phrases ||= parse_phrases
69
+ end
70
+
71
+ def words
72
+ @words ||= parse_words
73
+ end
74
+
75
+ def properties
76
+ @properties ||= parse_properties
77
+ end
78
+
79
+ def relations
80
+ @relations ||= parse_relations
81
+ end
82
+
83
+ def sentences
84
+ @sentences ||= parse_sentences
85
+ end
86
+
87
+ def language
88
+ raw_response[:language]
89
+ end
90
+
91
+ def language_is_reliable?
92
+ raw_response[:languageIsReliable]
60
93
  end
61
94
 
62
95
  private
@@ -73,22 +106,65 @@ module TextRazor
73
106
  code == 413
74
107
  end
75
108
 
76
- def parse_topics(raw_topics)
77
- return nil if raw_topics.nil?
109
+ def parse_entailments
110
+ parse(:entailment, raw_response[:entailments])
111
+ end
78
112
 
79
- raw_topics.map do |topic_hash|
80
- Topic.create_from_hash(topic_hash)
81
- end
113
+ def parse_entities
114
+ parse(:entity, raw_response[:entities])
82
115
  end
83
116
 
84
- def parse_phrases(raw_phrases, words)
85
- return nil if raw_phrases.nil?
117
+ def parse_coarse_topics
118
+ parse(:topic, raw_response[:coarseTopics])
119
+ end
120
+
121
+ def parse_topics
122
+ parse(:topic, raw_response[:topics])
123
+ end
124
+
125
+ def parse_phrases
126
+ raw_phrases = raw_response[:nounPhrases]
127
+ return if raw_phrases.nil?
86
128
 
87
129
  raw_phrases.map do |phrase_hash|
88
130
  Phrase.create_from_hash(phrase_hash, words)
89
131
  end
90
132
  end
91
133
 
134
+ def parse_words
135
+ raw_sentences = raw_response[:sentences]
136
+ return if raw_sentences.nil?
137
+
138
+ words = []
139
+ raw_sentences.each do |sentence_hash|
140
+ sentence_hash[:words].each do |word_hash|
141
+ words << Word.create_from_hash(word_hash)
142
+ end
143
+ end
144
+ words
145
+ end
146
+
147
+ def parse_properties
148
+ parse(:property, raw_response[:properties])
149
+ end
150
+
151
+ def parse_relations
152
+ parse(:relation, raw_response[:relations])
153
+ end
154
+
155
+ def parse_sentences
156
+ parse(:sentence, raw_response[:sentences])
157
+ end
158
+
159
+ def parse(type, data)
160
+ return nil if data.nil?
161
+
162
+ klass = Object.const_get("TextRazor::#{type.capitalize}")
163
+
164
+ data.map do |data_hash|
165
+ klass.create_from_hash(data_hash)
166
+ end
167
+ end
92
168
  end
93
169
 
94
170
  end