textrazor 0.0.8 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a18bfa0d8979cd6af68c481ff1200802d989dfc
4
- data.tar.gz: 71666356571e6e4d6cad269ad6f7f6da306d90d0
3
+ metadata.gz: 2996a5abb8a9c7d3113db4c9f64daf10c20e7360
4
+ data.tar.gz: cf7010d2573ada02239d021a960e6543627f5ea5
5
5
  SHA512:
6
- metadata.gz: 653c8e38cf0d1ab4b42796eec8853bc1104c5122ba6e87e6073f74fb2e75b76b75911136a2583f0f0320ab93711da5d269a6ee67280a6c90b3884ca1b7c24961
7
- data.tar.gz: c65e0182196f36cf12a080772dd4953ec8d239a6bc66cd3897ef97e2bb83c8bdea5823ef14c91e76d0249980628ca762ae734f1d445c3238f6a5de86c88bc990
6
+ metadata.gz: cd8f18e796b07f1ac921c95bf45d1d829a913c2442e0155dbab01d863f4f1ce0611856eb228766ebc192aafcbb85f2ae61d012eeaa8a6079dc16a7b6139ebf99
7
+ data.tar.gz: 362e74c53aef5bbe198ef7b79a95ff179d605b20e89a43de05d47669e4b07f3515476d4756aa471bd8064cc1625eab720dcfd53ef9df30c1b0645aa426c59674
@@ -0,0 +1 @@
1
+ textrazor
@@ -0,0 +1 @@
1
+ 2.2.0
data/README.md CHANGED
@@ -4,8 +4,6 @@ This is a gem wrapper for TextRazor REST API reference.
4
4
 
5
5
  ## Installation
6
6
 
7
-
8
-
9
7
  Add this line to your application's Gemfile:
10
8
 
11
9
  gem 'textrazor', :git => 'git://github.com/andhapp/textrazor.git'
@@ -76,6 +74,32 @@ TextRazor.phrases('api_key', 'text')
76
74
  Only implemented this for topics, entities, words and phrases. Also, implement
77
75
  it for other information that we can retrieve from the public API.
78
76
 
77
+ ### API Issues (To investigate)
78
+
79
+ #### Response
80
+
81
+ * error - Descriptive error message of any problems that may have occurred during analysis, or an empty string if there was no error.
82
+
83
+ Missing from the successful response.
84
+
85
+ * message - Any warning or informational messages returned from the server, or an empty string if there was no message.
86
+
87
+ Missing from the successful response.
88
+
89
+ * cleanedText
90
+
91
+ Missing from the successful response.
92
+
93
+ * customAnnotationOutput
94
+
95
+ Missing from the successful response.
96
+
97
+ ### Specs
98
+
99
+ #### Prolog rules
100
+
101
+ Specs around custom prolog rules need to be added.
102
+
79
103
 
80
104
  ## Contributing
81
105
 
data/Rakefile CHANGED
@@ -2,6 +2,14 @@ require 'bundler/gem_tasks'
2
2
  require 'bundler/setup'
3
3
  require 'rspec/core/rake_task'
4
4
 
5
- RSpec::Core::RakeTask.new(:spec)
5
+ desc 'Run all the specs'
6
+ RSpec::Core::RakeTask.new(:spec) do |task|
7
+ task.pattern = "spec/lib/**/*_spec.rb"
8
+ end
6
9
 
7
- task :default => :spec
10
+ desc 'Run all the functional specs'
11
+ RSpec::Core::RakeTask.new(:specf) do |task|
12
+ task.pattern = "spec/functional/*_spec.rb"
13
+ end
14
+
15
+ task :default => [:spec, :specf]
@@ -5,9 +5,14 @@ require "textrazor/client"
5
5
  require "textrazor/request"
6
6
  require "textrazor/response"
7
7
  require "textrazor/topic"
8
+ require "textrazor/entailment"
8
9
  require "textrazor/entity"
9
10
  require "textrazor/word"
10
11
  require "textrazor/phrase"
12
+ require "textrazor/property"
13
+ require "textrazor/sentence"
14
+ require "textrazor/relation_param"
15
+ require "textrazor/relation"
11
16
 
12
17
  module TextRazor
13
18
 
@@ -5,15 +5,24 @@ module TextRazor
5
5
  EmptyApiKey = Class.new(StandardError)
6
6
  EmptyText = Class.new(StandardError)
7
7
  TextTooLong = Class.new(StandardError)
8
+ UnsupportedExtractor = Class.new(StandardError)
9
+ UnsupportedCleanupMode = Class.new(StandardError)
8
10
 
9
11
  DEFAULT_EXTRACTORS = ['entities', 'topics', 'words', 'phrases', 'dependency-trees',
10
12
  'relations', 'entailments', 'senses']
11
13
 
12
- REQUEST_OPTIONS = [:extractors, :cleanup_html, :language,
13
- :filter_dbpedia_types, :filter_freebase_types]
14
+ DEFAULT_CLEANUP_MODE = 'raw'
15
+
16
+ VALID_CLEANUP_MODE_VALUES = [DEFAULT_CLEANUP_MODE, 'stripTags', 'cleanHTML']
17
+
18
+ REQUEST_OPTIONS = [:extractors, :rules, :cleanup_mode, :cleanup_return_cleaned, :cleanup_return_raw,
19
+ :language, :filter_dbpedia_types, :filter_freebase_types, :allow_overlap,
20
+ :enrichment_queries]
14
21
 
15
22
  attr_reader :response, :api_key, :request_options
16
23
 
24
+ private_constant :DEFAULT_EXTRACTORS, :VALID_CLEANUP_MODE_VALUES, :DEFAULT_CLEANUP_MODE, :REQUEST_OPTIONS
25
+
17
26
  def initialize(api_key, options = {})
18
27
  assign_api_key(api_key)
19
28
  assign_request_options(options)
@@ -21,7 +30,9 @@ module TextRazor
21
30
 
22
31
  def analyse(text)
23
32
  assert_text(text)
24
- options = {api_key: api_key}.merge(request_options)
33
+ options = {
34
+ api_key: api_key
35
+ }.merge(request_options)
25
36
 
26
37
  Response.new(Request.post(text, options))
27
38
  end
@@ -67,9 +78,31 @@ module TextRazor
67
78
  end
68
79
 
69
80
  def assign_request_options(options)
70
- @request_options = { extractors: DEFAULT_EXTRACTORS }
81
+ extractors = options.delete(:extractors)
82
+ assert_extractors(extractors)
83
+
84
+ cleanup_mode = options.delete(:cleanup_mode)
85
+ assert_cleanup_mode(cleanup_mode)
86
+
87
+ @request_options = {
88
+ extractors: extractors || DEFAULT_EXTRACTORS,
89
+ cleanup_mode: cleanup_mode || DEFAULT_CLEANUP_MODE
90
+ }
91
+
71
92
  REQUEST_OPTIONS.each do |key|
72
- @request_options[key] = options[key] if options[key]
93
+ @request_options[key] = options[key] unless options[key].nil?
94
+ end
95
+ end
96
+
97
+ def assert_extractors(extractors)
98
+ if extractors && !extractors.all? { |extractor| DEFAULT_EXTRACTORS.include?(extractor) }
99
+ raise UnsupportedExtractor.new('Unsupported extractor')
100
+ end
101
+ end
102
+
103
+ def assert_cleanup_mode(cleanup_mode)
104
+ if cleanup_mode && !VALID_CLEANUP_MODE_VALUES.include?(cleanup_mode)
105
+ raise UnsupportedCleanupMode.new('Unsupported clean up mode')
73
106
  end
74
107
  end
75
108
 
@@ -6,4 +6,4 @@ module TextRazor
6
6
  @secure = true
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -0,0 +1,24 @@
1
+ module TextRazor
2
+
3
+ class Entailment
4
+
5
+ extend Util
6
+
7
+ attr_reader :id, :word_positions, :prior_score, :context_score,
8
+ :score, :entailed_tree, :entailed_words
9
+
10
+ def initialize(params = {})
11
+ @type = []
12
+ params.each do |k, v|
13
+ instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
14
+ end
15
+ end
16
+
17
+ def self.create_from_hash(params)
18
+ params = Hash[params.map {|k, v| [standardize(k), v] }]
19
+ new(params)
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,22 @@
1
+ module TextRazor
2
+
3
+ class Property
4
+
5
+ extend Util
6
+
7
+ attr_reader :id, :word_positions, :property_positions
8
+
9
+ def initialize(params = {})
10
+ params.each do |k, v|
11
+ instance_variable_set(:"@#{k}", v) if v && self.respond_to?(:"#{k}")
12
+ end
13
+ end
14
+
15
+ def self.create_from_hash(params)
16
+ params = Hash[params.map {|k, v| [standardize(k), v] }]
17
+ new(params)
18
+ end
19
+
20
+ end
21
+
22
+ end
@@ -0,0 +1,25 @@
1
+ module TextRazor
2
+
3
+ class Relation
4
+
5
+ attr_reader :id, :word_positions, :relation_params
6
+
7
+ def initialize(params = {})
8
+ @id = params[:id]
9
+ @word_positions = params[:wordPositions]
10
+ @relation_params = params[:params].map do |relation_param_hash|
11
+ RelationParam.create_from_hash(relation_param_hash)
12
+ end
13
+ end
14
+
15
+ def number_of_relation_params
16
+ @relation_params.size
17
+ end
18
+
19
+ def self.create_from_hash(params)
20
+ new(params)
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,18 @@
1
+ module TextRazor
2
+
3
+ class RelationParam
4
+
5
+ attr_reader :relation, :word_positions
6
+
7
+ def initialize(params = {})
8
+ @relation = params[:relation]
9
+ @word_positions = params[:wordPositions]
10
+ end
11
+
12
+ def self.create_from_hash(params)
13
+ new(params)
14
+ end
15
+
16
+ end
17
+
18
+ end
@@ -9,10 +9,14 @@ module TextRazor
9
9
 
10
10
  OPTIONS_MAPPING = {
11
11
  extractors: 'extractors',
12
- cleanup_html: 'cleanupHTML',
12
+ cleanup_mode: 'cleanup.mode',
13
+ cleanup_return_cleaned: 'cleanup.returnCleaned',
14
+ cleanup_return_raw: 'cleanup.returnRaw',
13
15
  language: 'languageOverride',
14
16
  filter_dbpedia_types: 'entities.filterDbpediaTypes',
15
- filter_freebase_types: 'entities.filterFreebaseTypes'
17
+ filter_freebase_types: 'entities.filterFreebaseTypes',
18
+ allow_overlap: 'entities.allowOverlap',
19
+ enrichment_queries: 'entities.enrichmentQueries'
16
20
  }
17
21
 
18
22
  def self.post(text, options)
@@ -8,7 +8,7 @@ module TextRazor
8
8
  Unauthorised = Class.new(StandardError)
9
9
  RequestEntityTooLong = Class.new(StandardError)
10
10
 
11
- attr_reader :raw_response
11
+ attr_reader :raw_response, :time
12
12
 
13
13
  def initialize(http_response)
14
14
  code = http_response.code
@@ -18,45 +18,78 @@ module TextRazor
18
18
  raise Unauthorised.new(body) if unauthorised?(code)
19
19
  raise RequestEntityTooLong.new(body) if request_entity_too_long?(code)
20
20
 
21
- @raw_response = ::JSON.parse(body)["response"]
21
+ json_body = ::JSON::parse(body, symbolize_names: true)
22
+
23
+ @time = json_body[:time].to_f
24
+ @ok = json_body[:ok]
25
+ @raw_response = json_body[:response]
22
26
  end
23
27
 
24
- def topics
25
- @topics ||= parse_topics(raw_response["topics"])
28
+ def ok?
29
+ @ok
26
30
  end
27
31
 
28
- def coarse_topics
29
- @coarse_topics ||= parse_topics(raw_response["coarseTopics"])
32
+ #TODO: Not in a successful response
33
+ #def error
34
+ #end
35
+
36
+ #def message
37
+ #end
38
+
39
+ def custom_annotation_output
40
+ @custom_annotation_output ||= raw_response[:customAnnotationOutput]
41
+ end
42
+
43
+ def cleaned_text
44
+ @cleaned_text ||= raw_response[:cleanedText]
45
+ end
46
+
47
+ def raw_text
48
+ @raw_text||= raw_response[:rawText]
49
+ end
50
+
51
+ def entailments
52
+ @entailments ||= parse_entailments
30
53
  end
31
54
 
32
55
  def entities
33
- raw_entities = raw_response["entities"]
34
- return nil if raw_entities.nil?
56
+ @entities ||= parse_entities
57
+ end
35
58
 
36
- @entities ||= begin
37
- raw_entities.map do |entity_hash|
38
- Entity.create_from_hash(entity_hash)
39
- end
40
- end
59
+ def coarse_topics
60
+ @coarse_topics ||= parse_coarse_topics
41
61
  end
42
62
 
43
- def words
44
- raw_sentences = raw_response["sentences"]
45
- return nil if raw_sentences.nil?
46
-
47
- @words ||= begin
48
- words = []
49
- raw_sentences.each do |sentence_hash|
50
- sentence_hash["words"].each do |word_hash|
51
- words << Word.create_from_hash(word_hash)
52
- end
53
- end
54
- words
55
- end
63
+ def topics
64
+ @topics ||= parse_topics
56
65
  end
57
66
 
58
67
  def phrases
59
- @phrases ||= parse_phrases(raw_response["nounPhrases"], words)
68
+ @phrases ||= parse_phrases
69
+ end
70
+
71
+ def words
72
+ @words ||= parse_words
73
+ end
74
+
75
+ def properties
76
+ @properties ||= parse_properties
77
+ end
78
+
79
+ def relations
80
+ @relations ||= parse_relations
81
+ end
82
+
83
+ def sentences
84
+ @sentences ||= parse_sentences
85
+ end
86
+
87
+ def language
88
+ raw_response[:language]
89
+ end
90
+
91
+ def language_is_reliable?
92
+ raw_response[:languageIsReliable]
60
93
  end
61
94
 
62
95
  private
@@ -73,22 +106,65 @@ module TextRazor
73
106
  code == 413
74
107
  end
75
108
 
76
- def parse_topics(raw_topics)
77
- return nil if raw_topics.nil?
109
+ def parse_entailments
110
+ parse(:entailment, raw_response[:entailments])
111
+ end
78
112
 
79
- raw_topics.map do |topic_hash|
80
- Topic.create_from_hash(topic_hash)
81
- end
113
+ def parse_entities
114
+ parse(:entity, raw_response[:entities])
82
115
  end
83
116
 
84
- def parse_phrases(raw_phrases, words)
85
- return nil if raw_phrases.nil?
117
+ def parse_coarse_topics
118
+ parse(:topic, raw_response[:coarseTopics])
119
+ end
120
+
121
+ def parse_topics
122
+ parse(:topic, raw_response[:topics])
123
+ end
124
+
125
+ def parse_phrases
126
+ raw_phrases = raw_response[:nounPhrases]
127
+ return if raw_phrases.nil?
86
128
 
87
129
  raw_phrases.map do |phrase_hash|
88
130
  Phrase.create_from_hash(phrase_hash, words)
89
131
  end
90
132
  end
91
133
 
134
+ def parse_words
135
+ raw_sentences = raw_response[:sentences]
136
+ return if raw_sentences.nil?
137
+
138
+ words = []
139
+ raw_sentences.each do |sentence_hash|
140
+ sentence_hash[:words].each do |word_hash|
141
+ words << Word.create_from_hash(word_hash)
142
+ end
143
+ end
144
+ words
145
+ end
146
+
147
+ def parse_properties
148
+ parse(:property, raw_response[:properties])
149
+ end
150
+
151
+ def parse_relations
152
+ parse(:relation, raw_response[:relations])
153
+ end
154
+
155
+ def parse_sentences
156
+ parse(:sentence, raw_response[:sentences])
157
+ end
158
+
159
+ def parse(type, data)
160
+ return nil if data.nil?
161
+
162
+ klass = Object.const_get("TextRazor::#{type.capitalize}")
163
+
164
+ data.map do |data_hash|
165
+ klass.create_from_hash(data_hash)
166
+ end
167
+ end
92
168
  end
93
169
 
94
170
  end