monkeylearn 0.2.2 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,6 @@ require 'monkeylearn/configurable'
2
2
  require 'monkeylearn/exceptions'
3
3
  require 'monkeylearn/classifiers'
4
4
  require 'monkeylearn/extractors'
5
- require 'monkeylearn/pipelines'
6
5
 
7
6
 
8
7
  module Monkeylearn
@@ -11,8 +11,8 @@ module Monkeylearn
11
11
  class << self
12
12
  include Monkeylearn::Requests
13
13
 
14
- def categories
15
- return Categories
14
+ def tags
15
+ return Tags
16
16
  end
17
17
 
18
18
  def build_endpoint(*args)
@@ -24,114 +24,135 @@ module Monkeylearn
24
24
  if batch_size > max_size
25
25
  raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
26
26
  end
27
- min_size = Monkeylearn::Defaults.min_batch_size
28
- if batch_size < min_size
29
- raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
30
- end
31
27
  true
32
28
  end
33
29
 
34
- def classify(module_id, texts, options = {})
30
+ def classify(model_id, data, options = {})
35
31
  options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
36
32
  batch_size = options[:batch_size]
37
33
  validate_batch_size batch_size
38
34
 
39
- endpoint = build_endpoint(module_id, 'classify')
40
- query_params = { sandbox: true } if options[:sandbox]
35
+ endpoint = build_endpoint(model_id, 'classify')
36
+
37
+ if Monkeylearn.auto_batch
38
+ responses = (0...data.length).step(batch_size).collect do |start_idx|
39
+ sliced_data = { data: data[start_idx, batch_size] }
40
+ if options.key? :production_model
41
+ sliced_data[:production_model] = options[:production_model]
42
+ end
43
+ request(:post, endpoint, sliced_data)
44
+ end
41
45
 
42
- responses = (0...texts.length).step(batch_size).collect do |start_idx|
43
- data = { text_list: texts.slice(start_idx, batch_size) }
44
- response = request :post, endpoint, data, query_params
46
+ return Monkeylearn::MultiResponse.new(responses)
47
+ else
48
+ body = {data: data}
49
+ if options.key? :production_model
50
+ body[:production_model] = options[:production_model]
51
+ end
52
+ return request(:post, endpoint, body)
45
53
  end
54
+ end
46
55
 
47
- Monkeylearn::MultiResponse.new(responses)
56
+ def list(options = {})
57
+ request(:get, build_endpoint, nil, options)
48
58
  end
49
59
 
50
60
  def create(name, options = {})
51
61
  data = {
52
62
  name: name,
53
63
  description: options[:description],
64
+ algorithm: options[:algorithm],
54
65
  language: options[:language],
66
+ max_features: options[:max_features],
55
67
  ngram_range: options[:ngram_range],
56
- use_stemmer: options[:use_stemmer],
57
- stop_words: options[:stop_words],
68
+ use_stemming: options[:use_stemming],
69
+ preprocess_numbers: options[:preprocess_numbers],
70
+ preprocess_social_media: options[:preprocess_social_media],
71
+ normalize_weights: options[:normalize_weights],
72
+ stopwords: options[:stopwords],
73
+ whitelist: options[:whitelist],
74
+ }.delete_if { |k,v| v.nil? }
75
+ request(:post, build_endpoint, data)
76
+ end
77
+
78
+ def edit(module_id, options = {})
79
+ data = {
80
+ name: options[:name],
81
+ description: options[:description],
82
+ algorithm: options[:algorithm],
83
+ language: options[:language],
58
84
  max_features: options[:max_features],
59
- strip_stopwords: options[:strip_stopwords],
60
- is_multilabel: options[:is_multilabel],
61
- is_twitter_data: options[:is_twitter_data],
85
+ ngram_range: options[:ngram_range],
86
+ use_stemming: options[:use_stemming],
87
+ preprocess_numbers: options[:preprocess_numbers],
88
+ preprocess_social_media: options[:preprocess_social_media],
62
89
  normalize_weights: options[:normalize_weights],
63
- classifier: options[:classifier],
64
- industry: options[:industry],
65
- classifier_type: options[:classifier_type],
66
- text_type: options[:text_type],
67
- permissions: options[:permissions]
90
+ stopwords: options[:stopwords],
91
+ whitelist: options[:whitelist],
68
92
  }.delete_if { |k,v| v.nil? }
69
- request :post, build_endpoint, data
93
+ request(:patch, build_endpoint(module_id), data)
70
94
  end
71
95
 
72
96
  def detail(module_id)
73
- request :get, build_endpoint(module_id)
97
+ request(:get, build_endpoint(module_id))
74
98
  end
75
99
 
76
- def upload_samples(module_id, samples_with_categories)
77
- unless samples_with_categories.respond_to? :each
78
- raise MonkeylearnError, "The second param must be an enumerable type (i.e. an Array)."
79
- end
80
- endpoint = build_endpoint(module_id, 'samples')
81
- data = {
82
- samples: samples_with_categories.collect do |text, category_ids|
83
- {text: text, category_id: category_ids}
84
- end
85
- }
86
- request :post, endpoint, data
100
+ def deploy(module_id)
101
+ request(:post, build_endpoint(module_id, 'deploy'))
87
102
  end
88
103
 
89
- def train(module_id)
90
- request :post, build_endpoint(module_id, 'train')
91
- end
104
+ def upload_data(module_id, data)
105
+ endpoint = build_endpoint(module_id, 'data')
92
106
 
93
- def deploy(module_id)
94
- request :post, build_endpoint(module_id, 'deploy')
107
+ request(:post, endpoint, {data: data})
95
108
  end
96
109
 
97
110
  def delete(module_id)
98
- request :delete, build_endpoint(module_id)
111
+ request(:delete, build_endpoint(module_id))
99
112
  end
100
113
  end
101
114
  end
102
115
 
103
- module Categories
116
+ module Tags
104
117
  class << self
105
118
  include Monkeylearn::Requests
106
119
 
107
120
  def build_endpoint(module_id, *args)
108
- File.join('classifiers', module_id, 'categories', *args.collect { |x| x.to_s }) + '/'
121
+ File.join('classifiers', module_id, 'tags', *args.collect { |x| x.to_s }) + '/'
109
122
  end
110
123
 
111
- def create(module_id, name, parent_id)
124
+ def create(module_id, name, options = {})
112
125
  data = {
113
126
  name: name,
114
- parent_id: parent_id
115
127
  }
116
- request :post, build_endpoint(module_id), data
128
+ if options[:parent_id]
129
+ data[:parent_id] = options[:parent_id]
130
+ end
131
+ request(:post, build_endpoint(module_id), data)
117
132
  end
118
133
 
119
- def edit(module_id, category_id, name = nil, parent_id = nil)
120
- endpoint = build_endpoint(module_id, category_id)
134
+ def detail(module_id, tag_id)
135
+ request :get, build_endpoint(module_id, tag_id)
136
+ end
137
+
138
+ def edit(module_id, tag_id, options = {})
139
+ endpoint = build_endpoint(module_id, tag_id)
121
140
  data = {
122
- name: name,
123
- parent_id: parent_id
141
+ name: options[:name],
142
+ parent_id: options[:parent_id]
124
143
  }.delete_if { |k,v| v.nil? }
125
144
  request :patch, endpoint, data
126
145
  end
127
146
 
128
- def delete(module_id, category_id, samples_strategy = nil, samples_category_id = nil)
129
- endpoint = build_endpoint(module_id, category_id)
130
- data = {
131
- 'samples-strategy'.to_s => samples_strategy,
132
- 'samples-category-id'.to_s => samples_category_id
133
- }.delete_if { |k,v| v.nil? }
134
- request :delete, endpoint, data
147
+ def delete(module_id, tag_id, options = {})
148
+ endpoint = build_endpoint(module_id, tag_id)
149
+
150
+ data = nil
151
+ if options.key?(:move_data_to)
152
+ data = {move_data_to: options[:move_data_to]}
153
+ end
154
+
155
+ request(:delete, endpoint, data)
135
156
  end
136
157
  end
137
158
  end
@@ -2,15 +2,16 @@ require 'monkeylearn/defaults'
2
2
 
3
3
  module Monkeylearn
4
4
  module Configurable
5
- attr_accessor :token, :api_endpoint
6
- attr_writer :api_endpoint
5
+ attr_accessor :token, :base_url, :retry_if_throttle, :auto_batch
6
+ attr_writer :base_url
7
7
 
8
8
  class << self
9
9
  def keys
10
10
  @keys ||= [
11
- :api_endpoint,
11
+ :base_url,
12
12
  :token,
13
- :wait_on_throttle
13
+ :retry_if_throttle,
14
+ :auto_batch,
14
15
  ]
15
16
  end
16
17
  end
@@ -26,12 +27,8 @@ module Monkeylearn
26
27
  self
27
28
  end
28
29
 
29
- def wait_on_throttle
30
- @wait_on_throttle
31
- end
32
-
33
- def api_endpoint
34
- File.join(@api_endpoint, "")
30
+ def base_url
31
+ File.join(@base_url, "")
35
32
  end
36
33
  end
37
34
  end
@@ -2,35 +2,35 @@ module Monkeylearn
2
2
  module Defaults
3
3
  # Constants
4
4
  DEFAULT_BATCH_SIZE = 200
5
- MAX_BATCH_SIZE = 500
6
- MIN_BATCH_SIZE = 100
5
+ MAX_BATCH_SIZE = 200
7
6
  # Configurable options
8
- API_ENDPOINT = 'https://api.monkeylearn.com/v2/'
9
- WAIT_ON_THROTTLE = true
7
+ BASE_URL = 'https://api.monkeylearn.com/v3/'
8
+ RETRY_IF_THROTTLE = true
9
+ AUTO_BATCH = true
10
10
 
11
11
  class << self
12
12
  def options
13
13
  Hash[Monkeylearn::Configurable.keys.map{|key| [key, send(key)]}]
14
14
  end
15
15
 
16
- def api_endpoint
17
- ENV['MONKEYLEARN_API_ENDPOINT'] || API_ENDPOINT
16
+ def base_url
17
+ ENV['MONKEYLEARN_API_BASE_URL'] || BASE_URL
18
18
  end
19
19
 
20
20
  def token
21
21
  ENV['MONKEYLEARN_TOKEN'] || nil
22
22
  end
23
23
 
24
- def wait_on_throttle
25
- ENV['MONKEYLEARN_WAIT_ON_THROTTLE'] || WAIT_ON_THROTTLE
24
+ def retry_if_throttle
25
+ ENV['MONKEYLEARN_RETRY_IF_THROTTLE'] || RETRY_IF_THROTTLE
26
26
  end
27
27
 
28
- def max_batch_size
29
- MAX_BATCH_SIZE
28
+ def auto_batch
29
+ ENV['MONKEYLEARN_AUTO_BATCH'] || AUTO_BATCH
30
30
  end
31
31
 
32
- def min_batch_size
33
- MIN_BATCH_SIZE
32
+ def max_batch_size
33
+ MAX_BATCH_SIZE
34
34
  end
35
35
 
36
36
  def default_batch_size
@@ -1,2 +1,76 @@
1
+ require 'json'
2
+
1
3
  class MonkeylearnError < StandardError
2
4
  end
5
+
6
+ class MonkeylearnResponseError < MonkeylearnError
7
+ attr_accessor :detail, :error_code, :status_code
8
+
9
+ def initialize(raw_response)
10
+ @response = raw_response
11
+
12
+
13
+ body = JSON.parse(raw_response.body)
14
+ @detail = body['detail']
15
+ @error_code = body['error_code']
16
+ @status_code = raw_response.status
17
+
18
+
19
+ super "#{@error_code}: #{@detail}"
20
+ end
21
+ end
22
+
23
+ # Request Validation Errors (422)
24
+
25
+ class RequestParamsError < MonkeylearnResponseError
26
+ end
27
+
28
+ # Authentication (401)
29
+
30
+
31
+ class AuthenticationError < MonkeylearnResponseError
32
+ end
33
+
34
+ # Forbidden (403)
35
+
36
+ class ForbiddenError < MonkeylearnResponseError
37
+ end
38
+
39
+
40
+ class ModelLimitError < ForbiddenError
41
+ end
42
+
43
+ # Not found Exceptions (404)
44
+
45
+ class ResourceNotFound < MonkeylearnResponseError
46
+ end
47
+
48
+
49
+ class ModelNotFound < ResourceNotFound
50
+ end
51
+
52
+
53
+ class TagNotFound < ResourceNotFound
54
+ end
55
+
56
+ # Rate limit (429)
57
+
58
+ class RateLimitError < MonkeylearnResponseError
59
+ end
60
+
61
+
62
+ class PlanQueryLimitError < MonkeylearnResponseError
63
+ end
64
+
65
+
66
+ class PlanRateLimitError < RateLimitError
67
+ end
68
+
69
+
70
+ class ConcurrencyRateLimitError < RateLimitError
71
+ end
72
+
73
+ # State errors < 423)
74
+
75
+ class ModuleStateError < MonkeylearnResponseError
76
+ end
@@ -20,26 +20,41 @@ module Monkeylearn
20
20
  if batch_size > max_size
21
21
  raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
22
22
  end
23
- min_size = Monkeylearn::Defaults.min_batch_size
24
- if batch_size < min_size
25
- raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
26
- end
27
23
  true
28
24
  end
29
25
 
30
- def extract(module_id, texts, options = {})
26
+ def extract(module_id, data, options = {})
31
27
  options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
32
28
  batch_size = options[:batch_size]
33
29
  validate_batch_size batch_size
34
30
 
35
31
  endpoint = build_endpoint(module_id, 'extract')
36
32
 
37
- responses = (0...texts.length).step(batch_size).collect do |start_idx|
38
- data = { text_list: texts.slice(start_idx, batch_size) }
39
- response = request :post, endpoint, data
33
+ if Monkeylearn.auto_batch
34
+ responses = (0...data.length).step(batch_size).collect do |start_idx|
35
+ sliced_data = {data: data.slice(start_idx, batch_size)}
36
+ if options.key? :production_model
37
+ sliced_data[:production_model] = options[:production_model]
38
+ end
39
+ request(:post, endpoint, sliced_data)
40
+ end
41
+ return Monkeylearn::MultiResponse.new(responses)
42
+ else
43
+ body = {data: data}
44
+ if options.key? :production_model
45
+ body[:production_model] = options[:production_model]
46
+ end
47
+ return request(:post, endpoint, body)
40
48
  end
41
49
 
42
- Monkeylearn::MultiResponse.new(responses)
50
+ end
51
+
52
+ def list(options = {})
53
+ request(:get, build_endpoint, nil, options)
54
+ end
55
+
56
+ def detail(module_id)
57
+ request(:get, build_endpoint(module_id))
43
58
  end
44
59
  end
45
60
  end
@@ -1,6 +1,7 @@
1
1
  require 'faraday'
2
2
  require 'json'
3
3
  require 'monkeylearn/response'
4
+ require 'monkeylearn/exceptions'
4
5
 
5
6
  module Monkeylearn
6
7
  module Requests
@@ -9,36 +10,98 @@ module Monkeylearn
9
10
  raise MonkeylearnError, 'Please initialize the Monkeylearn library with your API token'
10
11
  end
11
12
 
12
- response = get_connection.send(method) do |req|
13
- url = path.to_s
14
- if query_params
15
- url += '?' + URI.encode_www_form(query_params)
13
+ while true
14
+ response = get_connection.send(method) do |req|
15
+ url = path.to_s
16
+ if query_params
17
+ url += '?' + URI.encode_www_form(query_params)
18
+ end
19
+ req.url url
20
+ req.headers['Authorization'] = 'Token ' + Monkeylearn.token
21
+ req.headers['Content-Type'] = 'application/json'
22
+ req.headers['User-Agent'] = 'ruby-sdk'
23
+ if data
24
+ req.body = data.to_json
25
+ end
16
26
  end
17
- req.url url
18
- req.headers['Authorization'] = 'Token ' + Monkeylearn.token
19
- req.headers['Content-Type'] = 'application/json'
20
- req.headers['User-Agent'] = 'ruby-sdk'
21
- if data
22
- req.body = data.to_json
27
+
28
+ seconds = throttled?(response)
29
+ if seconds && Monkeylearn.retry_if_throttle
30
+ sleep seconds
31
+ else
32
+ break
23
33
  end
24
34
  end
25
- if Monkeylearn.wait_on_throttle && seconds = throttled?(response)
26
- # Request was throttled, wait 'seconds' seconds and retry
27
- sleep seconds
28
- response = request(method, path, data)
35
+
36
+ if response.status != 200
37
+ raise_for_status(response)
29
38
  end
39
+
30
40
  Monkeylearn::Response.new(response)
31
41
  end
32
42
 
43
+ def raise_for_status(raw_response)
44
+ body = JSON.parse(raw_response.body)
45
+ error_code = body.fetch("error_code", nil)
46
+ raise get_exception_class(raw_response.status, error_code).new(raw_response)
47
+ end
48
+
49
+ def get_exception_class(status_code, error_code)
50
+ case status_code
51
+ when 422
52
+ return RequestParamsError
53
+ when 401
54
+ return AuthenticationError
55
+ when 403
56
+ case error_code
57
+ when 'MODEL_LIMIT'
58
+ return ModelLimitError
59
+ else
60
+ return ForbiddenError
61
+ end
62
+ when 404
63
+ case error_code
64
+ when 'MODEL_NOT_FOUND'
65
+ return ModelNotFound
66
+ when 'TAG_NOT_FOUND'
67
+ return TagNotFound
68
+ else
69
+ return ResourceNotFound
70
+ end
71
+ when 429
72
+ case error_code
73
+ when 'PLAN_RATE_LIMIT'
74
+ return PlanRateLimitError
75
+ when 'CONCURRENCY_RATE_LIMIT'
76
+ return ConcurrencyRateLimitError
77
+ when 'PLAN_QUERY_LIMIT'
78
+ return PlanQueryLimitError
79
+ else
80
+ return RateLimitError
81
+ end
82
+ when 423
83
+ return ModuleStateError
84
+ else
85
+ return MonkeylearnResponseError
86
+ end
87
+ end
88
+
33
89
  def throttled?(response)
34
- return false if response.status != 429
35
- error_detail = JSON.parse(response.body)['detail']
36
- match = /available in ([\d]+) seconds/.match(error_detail)
37
- if match then match[1].to_i else false end
90
+ return false unless response.status == 429
91
+ body = JSON.parse(response.body)
92
+
93
+ case body['error_code']
94
+ when 'CONCURRENCY_RATE_LIMIT'
95
+ seconds = 2
96
+ when 'PLAN_RATE_LIMIT'
97
+ match = /([\d]+) seconds/.match(body['detail'])
98
+ seconds = if match then match[1].to_i else 60 end
99
+ end
100
+ seconds
38
101
  end
39
102
 
40
103
  def get_connection
41
- @conn ||= Faraday.new(url: Monkeylearn.api_endpoint) do |faraday|
104
+ @conn ||= Faraday.new(url: Monkeylearn.base_url) do |faraday|
42
105
  faraday.adapter Faraday.default_adapter # Net::HTTP
43
106
  end
44
107
  end