monkeylearn 0.2.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,6 @@ require 'monkeylearn/configurable'
2
2
  require 'monkeylearn/exceptions'
3
3
  require 'monkeylearn/classifiers'
4
4
  require 'monkeylearn/extractors'
5
- require 'monkeylearn/pipelines'
6
5
 
7
6
 
8
7
  module Monkeylearn
@@ -11,8 +11,8 @@ module Monkeylearn
11
11
  class << self
12
12
  include Monkeylearn::Requests
13
13
 
14
- def categories
15
- return Categories
14
+ def tags
15
+ return Tags
16
16
  end
17
17
 
18
18
  def build_endpoint(*args)
@@ -24,114 +24,135 @@ module Monkeylearn
24
24
  if batch_size > max_size
25
25
  raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
26
26
  end
27
- min_size = Monkeylearn::Defaults.min_batch_size
28
- if batch_size < min_size
29
- raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
30
- end
31
27
  true
32
28
  end
33
29
 
34
- def classify(module_id, texts, options = {})
30
+ def classify(model_id, data, options = {})
35
31
  options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
36
32
  batch_size = options[:batch_size]
37
33
  validate_batch_size batch_size
38
34
 
39
- endpoint = build_endpoint(module_id, 'classify')
40
- query_params = { sandbox: true } if options[:sandbox]
35
+ endpoint = build_endpoint(model_id, 'classify')
36
+
37
+ if Monkeylearn.auto_batch
38
+ responses = (0...data.length).step(batch_size).collect do |start_idx|
39
+ sliced_data = { data: data[start_idx, batch_size] }
40
+ if options.key? :production_model
41
+ sliced_data[:production_model] = options[:production_model]
42
+ end
43
+ request(:post, endpoint, sliced_data)
44
+ end
41
45
 
42
- responses = (0...texts.length).step(batch_size).collect do |start_idx|
43
- data = { text_list: texts.slice(start_idx, batch_size) }
44
- response = request :post, endpoint, data, query_params
46
+ return Monkeylearn::MultiResponse.new(responses)
47
+ else
48
+ body = {data: data}
49
+ if options.key? :production_model
50
+ body[:production_model] = options[:production_model]
51
+ end
52
+ return request(:post, endpoint, body)
45
53
  end
54
+ end
46
55
 
47
- Monkeylearn::MultiResponse.new(responses)
56
+ def list(options = {})
57
+ request(:get, build_endpoint, nil, options)
48
58
  end
49
59
 
50
60
  def create(name, options = {})
51
61
  data = {
52
62
  name: name,
53
63
  description: options[:description],
64
+ algorithm: options[:algorithm],
54
65
  language: options[:language],
66
+ max_features: options[:max_features],
55
67
  ngram_range: options[:ngram_range],
56
- use_stemmer: options[:use_stemmer],
57
- stop_words: options[:stop_words],
68
+ use_stemming: options[:use_stemming],
69
+ preprocess_numbers: options[:preprocess_numbers],
70
+ preprocess_social_media: options[:preprocess_social_media],
71
+ normalize_weights: options[:normalize_weights],
72
+ stopwords: options[:stopwords],
73
+ whitelist: options[:whitelist],
74
+ }.delete_if { |k,v| v.nil? }
75
+ request(:post, build_endpoint, data)
76
+ end
77
+
78
+ def edit(module_id, options = {})
79
+ data = {
80
+ name: options[:name],
81
+ description: options[:description],
82
+ algorithm: options[:algorithm],
83
+ language: options[:language],
58
84
  max_features: options[:max_features],
59
- strip_stopwords: options[:strip_stopwords],
60
- is_multilabel: options[:is_multilabel],
61
- is_twitter_data: options[:is_twitter_data],
85
+ ngram_range: options[:ngram_range],
86
+ use_stemming: options[:use_stemming],
87
+ preprocess_numbers: options[:preprocess_numbers],
88
+ preprocess_social_media: options[:preprocess_social_media],
62
89
  normalize_weights: options[:normalize_weights],
63
- classifier: options[:classifier],
64
- industry: options[:industry],
65
- classifier_type: options[:classifier_type],
66
- text_type: options[:text_type],
67
- permissions: options[:permissions]
90
+ stopwords: options[:stopwords],
91
+ whitelist: options[:whitelist],
68
92
  }.delete_if { |k,v| v.nil? }
69
- request :post, build_endpoint, data
93
+ request(:patch, build_endpoint(module_id), data)
70
94
  end
71
95
 
72
96
  def detail(module_id)
73
- request :get, build_endpoint(module_id)
97
+ request(:get, build_endpoint(module_id))
74
98
  end
75
99
 
76
- def upload_samples(module_id, samples_with_categories)
77
- unless samples_with_categories.respond_to? :each
78
- raise MonkeylearnError, "The second param must be an enumerable type (i.e. an Array)."
79
- end
80
- endpoint = build_endpoint(module_id, 'samples')
81
- data = {
82
- samples: samples_with_categories.collect do |text, category_ids|
83
- {text: text, category_id: category_ids}
84
- end
85
- }
86
- request :post, endpoint, data
100
+ def deploy(module_id)
101
+ request(:post, build_endpoint(module_id, 'deploy'))
87
102
  end
88
103
 
89
- def train(module_id)
90
- request :post, build_endpoint(module_id, 'train')
91
- end
104
+ def upload_data(module_id, data)
105
+ endpoint = build_endpoint(module_id, 'data')
92
106
 
93
- def deploy(module_id)
94
- request :post, build_endpoint(module_id, 'deploy')
107
+ request(:post, endpoint, {data: data})
95
108
  end
96
109
 
97
110
  def delete(module_id)
98
- request :delete, build_endpoint(module_id)
111
+ request(:delete, build_endpoint(module_id))
99
112
  end
100
113
  end
101
114
  end
102
115
 
103
- module Categories
116
+ module Tags
104
117
  class << self
105
118
  include Monkeylearn::Requests
106
119
 
107
120
  def build_endpoint(module_id, *args)
108
- File.join('classifiers', module_id, 'categories', *args.collect { |x| x.to_s }) + '/'
121
+ File.join('classifiers', module_id, 'tags', *args.collect { |x| x.to_s }) + '/'
109
122
  end
110
123
 
111
- def create(module_id, name, parent_id)
124
+ def create(module_id, name, options = {})
112
125
  data = {
113
126
  name: name,
114
- parent_id: parent_id
115
127
  }
116
- request :post, build_endpoint(module_id), data
128
+ if options[:parent_id]
129
+ data[:parent_id] = options[:parent_id]
130
+ end
131
+ request(:post, build_endpoint(module_id), data)
117
132
  end
118
133
 
119
- def edit(module_id, category_id, name = nil, parent_id = nil)
120
- endpoint = build_endpoint(module_id, category_id)
134
+ def detail(module_id, tag_id)
135
+ request :get, build_endpoint(module_id, tag_id)
136
+ end
137
+
138
+ def edit(module_id, tag_id, options = {})
139
+ endpoint = build_endpoint(module_id, tag_id)
121
140
  data = {
122
- name: name,
123
- parent_id: parent_id
141
+ name: options[:name],
142
+ parent_id: options[:parent_id]
124
143
  }.delete_if { |k,v| v.nil? }
125
144
  request :patch, endpoint, data
126
145
  end
127
146
 
128
- def delete(module_id, category_id, samples_strategy = nil, samples_category_id = nil)
129
- endpoint = build_endpoint(module_id, category_id)
130
- data = {
131
- 'samples-strategy'.to_s => samples_strategy,
132
- 'samples-category-id'.to_s => samples_category_id
133
- }.delete_if { |k,v| v.nil? }
134
- request :delete, endpoint, data
147
+ def delete(module_id, tag_id, options = {})
148
+ endpoint = build_endpoint(module_id, tag_id)
149
+
150
+ data = nil
151
+ if options.key?(:move_data_to)
152
+ data = {move_data_to: options[:move_data_to]}
153
+ end
154
+
155
+ request(:delete, endpoint, data)
135
156
  end
136
157
  end
137
158
  end
@@ -2,15 +2,16 @@ require 'monkeylearn/defaults'
2
2
 
3
3
  module Monkeylearn
4
4
  module Configurable
5
- attr_accessor :token, :api_endpoint
6
- attr_writer :api_endpoint
5
+ attr_accessor :token, :base_url, :retry_if_throttle, :auto_batch
6
+ attr_writer :base_url
7
7
 
8
8
  class << self
9
9
  def keys
10
10
  @keys ||= [
11
- :api_endpoint,
11
+ :base_url,
12
12
  :token,
13
- :wait_on_throttle
13
+ :retry_if_throttle,
14
+ :auto_batch,
14
15
  ]
15
16
  end
16
17
  end
@@ -26,12 +27,8 @@ module Monkeylearn
26
27
  self
27
28
  end
28
29
 
29
- def wait_on_throttle
30
- @wait_on_throttle
31
- end
32
-
33
- def api_endpoint
34
- File.join(@api_endpoint, "")
30
+ def base_url
31
+ File.join(@base_url, "")
35
32
  end
36
33
  end
37
34
  end
@@ -2,35 +2,35 @@ module Monkeylearn
2
2
  module Defaults
3
3
  # Constants
4
4
  DEFAULT_BATCH_SIZE = 200
5
- MAX_BATCH_SIZE = 500
6
- MIN_BATCH_SIZE = 100
5
+ MAX_BATCH_SIZE = 200
7
6
  # Configurable options
8
- API_ENDPOINT = 'https://api.monkeylearn.com/v2/'
9
- WAIT_ON_THROTTLE = true
7
+ BASE_URL = 'https://api.monkeylearn.com/v3/'
8
+ RETRY_IF_THROTTLE = true
9
+ AUTO_BATCH = true
10
10
 
11
11
  class << self
12
12
  def options
13
13
  Hash[Monkeylearn::Configurable.keys.map{|key| [key, send(key)]}]
14
14
  end
15
15
 
16
- def api_endpoint
17
- ENV['MONKEYLEARN_API_ENDPOINT'] || API_ENDPOINT
16
+ def base_url
17
+ ENV['MONKEYLEARN_API_BASE_URL'] || BASE_URL
18
18
  end
19
19
 
20
20
  def token
21
21
  ENV['MONKEYLEARN_TOKEN'] || nil
22
22
  end
23
23
 
24
- def wait_on_throttle
25
- ENV['MONKEYLEARN_WAIT_ON_THROTTLE'] || WAIT_ON_THROTTLE
24
+ def retry_if_throttle
25
+ ENV['MONKEYLEARN_RETRY_IF_THROTTLE'] || RETRY_IF_THROTTLE
26
26
  end
27
27
 
28
- def max_batch_size
29
- MAX_BATCH_SIZE
28
+ def auto_batch
29
+ ENV['MONKEYLEARN_AUTO_BATCH'] || AUTO_BATCH
30
30
  end
31
31
 
32
- def min_batch_size
33
- MIN_BATCH_SIZE
32
+ def max_batch_size
33
+ MAX_BATCH_SIZE
34
34
  end
35
35
 
36
36
  def default_batch_size
@@ -1,2 +1,76 @@
1
+ require 'json'
2
+
1
3
  class MonkeylearnError < StandardError
2
4
  end
5
+
6
+ class MonkeylearnResponseError < MonkeylearnError
7
+ attr_accessor :detail, :error_code, :status_code
8
+
9
+ def initialize(raw_response)
10
+ @response = raw_response
11
+
12
+
13
+ body = JSON.parse(raw_response.body)
14
+ @detail = body['detail']
15
+ @error_code = body['error_code']
16
+ @status_code = raw_response.status
17
+
18
+
19
+ super "#{@error_code}: #{@detail}"
20
+ end
21
+ end
22
+
23
+ # Request Validation Errors (422)
24
+
25
+ class RequestParamsError < MonkeylearnResponseError
26
+ end
27
+
28
+ # Authentication (401)
29
+
30
+
31
+ class AuthenticationError < MonkeylearnResponseError
32
+ end
33
+
34
+ # Forbidden (403)
35
+
36
+ class ForbiddenError < MonkeylearnResponseError
37
+ end
38
+
39
+
40
+ class ModelLimitError < ForbiddenError
41
+ end
42
+
43
+ # Not found Exceptions (404)
44
+
45
+ class ResourceNotFound < MonkeylearnResponseError
46
+ end
47
+
48
+
49
+ class ModelNotFound < ResourceNotFound
50
+ end
51
+
52
+
53
+ class TagNotFound < ResourceNotFound
54
+ end
55
+
56
+ # Rate limit (429)
57
+
58
+ class RateLimitError < MonkeylearnResponseError
59
+ end
60
+
61
+
62
+ class PlanQueryLimitError < MonkeylearnResponseError
63
+ end
64
+
65
+
66
+ class PlanRateLimitError < RateLimitError
67
+ end
68
+
69
+
70
+ class ConcurrencyRateLimitError < RateLimitError
71
+ end
72
+
73
+ # State errors < 423)
74
+
75
+ class ModuleStateError < MonkeylearnResponseError
76
+ end
@@ -20,26 +20,41 @@ module Monkeylearn
20
20
  if batch_size > max_size
21
21
  raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
22
22
  end
23
- min_size = Monkeylearn::Defaults.min_batch_size
24
- if batch_size < min_size
25
- raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
26
- end
27
23
  true
28
24
  end
29
25
 
30
- def extract(module_id, texts, options = {})
26
+ def extract(module_id, data, options = {})
31
27
  options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
32
28
  batch_size = options[:batch_size]
33
29
  validate_batch_size batch_size
34
30
 
35
31
  endpoint = build_endpoint(module_id, 'extract')
36
32
 
37
- responses = (0...texts.length).step(batch_size).collect do |start_idx|
38
- data = { text_list: texts.slice(start_idx, batch_size) }
39
- response = request :post, endpoint, data
33
+ if Monkeylearn.auto_batch
34
+ responses = (0...data.length).step(batch_size).collect do |start_idx|
35
+ sliced_data = {data: data.slice(start_idx, batch_size)}
36
+ if options.key? :production_model
37
+ sliced_data[:production_model] = options[:production_model]
38
+ end
39
+ request(:post, endpoint, sliced_data)
40
+ end
41
+ return Monkeylearn::MultiResponse.new(responses)
42
+ else
43
+ body = {data: data}
44
+ if options.key? :production_model
45
+ body[:production_model] = options[:production_model]
46
+ end
47
+ return request(:post, endpoint, body)
40
48
  end
41
49
 
42
- Monkeylearn::MultiResponse.new(responses)
50
+ end
51
+
52
+ def list(options = {})
53
+ request(:get, build_endpoint, nil, options)
54
+ end
55
+
56
+ def detail(module_id)
57
+ request(:get, build_endpoint(module_id))
43
58
  end
44
59
  end
45
60
  end
@@ -1,6 +1,7 @@
1
1
  require 'faraday'
2
2
  require 'json'
3
3
  require 'monkeylearn/response'
4
+ require 'monkeylearn/exceptions'
4
5
 
5
6
  module Monkeylearn
6
7
  module Requests
@@ -9,36 +10,98 @@ module Monkeylearn
9
10
  raise MonkeylearnError, 'Please initialize the Monkeylearn library with your API token'
10
11
  end
11
12
 
12
- response = get_connection.send(method) do |req|
13
- url = path.to_s
14
- if query_params
15
- url += '?' + URI.encode_www_form(query_params)
13
+ while true
14
+ response = get_connection.send(method) do |req|
15
+ url = path.to_s
16
+ if query_params
17
+ url += '?' + URI.encode_www_form(query_params)
18
+ end
19
+ req.url url
20
+ req.headers['Authorization'] = 'Token ' + Monkeylearn.token
21
+ req.headers['Content-Type'] = 'application/json'
22
+ req.headers['User-Agent'] = 'ruby-sdk'
23
+ if data
24
+ req.body = data.to_json
25
+ end
16
26
  end
17
- req.url url
18
- req.headers['Authorization'] = 'Token ' + Monkeylearn.token
19
- req.headers['Content-Type'] = 'application/json'
20
- req.headers['User-Agent'] = 'ruby-sdk'
21
- if data
22
- req.body = data.to_json
27
+
28
+ seconds = throttled?(response)
29
+ if seconds && Monkeylearn.retry_if_throttle
30
+ sleep seconds
31
+ else
32
+ break
23
33
  end
24
34
  end
25
- if Monkeylearn.wait_on_throttle && seconds = throttled?(response)
26
- # Request was throttled, wait 'seconds' seconds and retry
27
- sleep seconds
28
- response = request(method, path, data)
35
+
36
+ if response.status != 200
37
+ raise_for_status(response)
29
38
  end
39
+
30
40
  Monkeylearn::Response.new(response)
31
41
  end
32
42
 
43
+ def raise_for_status(raw_response)
44
+ body = JSON.parse(raw_response.body)
45
+ error_code = body.fetch("error_code", nil)
46
+ raise get_exception_class(raw_response.status, error_code).new(raw_response)
47
+ end
48
+
49
+ def get_exception_class(status_code, error_code)
50
+ case status_code
51
+ when 422
52
+ return RequestParamsError
53
+ when 401
54
+ return AuthenticationError
55
+ when 403
56
+ case error_code
57
+ when 'MODEL_LIMIT'
58
+ return ModelLimitError
59
+ else
60
+ return ForbiddenError
61
+ end
62
+ when 404
63
+ case error_code
64
+ when 'MODEL_NOT_FOUND'
65
+ return ModelNotFound
66
+ when 'TAG_NOT_FOUND'
67
+ return TagNotFound
68
+ else
69
+ return ResourceNotFound
70
+ end
71
+ when 429
72
+ case error_code
73
+ when 'PLAN_RATE_LIMIT'
74
+ return PlanRateLimitError
75
+ when 'CONCURRENCY_RATE_LIMIT'
76
+ return ConcurrencyRateLimitError
77
+ when 'PLAN_QUERY_LIMIT'
78
+ return PlanQueryLimitError
79
+ else
80
+ return RateLimitError
81
+ end
82
+ when 423
83
+ return ModuleStateError
84
+ else
85
+ return MonkeylearnResponseError
86
+ end
87
+ end
88
+
33
89
  def throttled?(response)
34
- return false if response.status != 429
35
- error_detail = JSON.parse(response.body)['detail']
36
- match = /available in ([\d]+) seconds/.match(error_detail)
37
- if match then match[1].to_i else false end
90
+ return false unless response.status == 429
91
+ body = JSON.parse(response.body)
92
+
93
+ case body['error_code']
94
+ when 'CONCURRENCY_RATE_LIMIT'
95
+ seconds = 2
96
+ when 'PLAN_RATE_LIMIT'
97
+ match = /([\d]+) seconds/.match(body['detail'])
98
+ seconds = if match then match[1].to_i else 60 end
99
+ end
100
+ seconds
38
101
  end
39
102
 
40
103
  def get_connection
41
- @conn ||= Faraday.new(url: Monkeylearn.api_endpoint) do |faraday|
104
+ @conn ||= Faraday.new(url: Monkeylearn.base_url) do |faraday|
42
105
  faraday.adapter Faraday.default_adapter # Net::HTTP
43
106
  end
44
107
  end