monkeylearn 0.2.2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +535 -65
- data/lib/monkeylearn.rb +0 -1
- data/lib/monkeylearn/classifiers.rb +79 -58
- data/lib/monkeylearn/configurable.rb +7 -10
- data/lib/monkeylearn/defaults.rb +12 -12
- data/lib/monkeylearn/exceptions.rb +74 -0
- data/lib/monkeylearn/extractors.rb +24 -9
- data/lib/monkeylearn/requests.rb +82 -19
- data/lib/monkeylearn/response.rb +20 -7
- data/monkeylearn.gemspec +3 -2
- metadata +11 -12
- data/lib/monkeylearn/pipelines.rb +0 -28
data/lib/monkeylearn.rb
CHANGED
@@ -11,8 +11,8 @@ module Monkeylearn
|
|
11
11
|
class << self
|
12
12
|
include Monkeylearn::Requests
|
13
13
|
|
14
|
-
def
|
15
|
-
return
|
14
|
+
def tags
|
15
|
+
return Tags
|
16
16
|
end
|
17
17
|
|
18
18
|
def build_endpoint(*args)
|
@@ -24,114 +24,135 @@ module Monkeylearn
|
|
24
24
|
if batch_size > max_size
|
25
25
|
raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
|
26
26
|
end
|
27
|
-
min_size = Monkeylearn::Defaults.min_batch_size
|
28
|
-
if batch_size < min_size
|
29
|
-
raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
|
30
|
-
end
|
31
27
|
true
|
32
28
|
end
|
33
29
|
|
34
|
-
def classify(
|
30
|
+
def classify(model_id, data, options = {})
|
35
31
|
options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
|
36
32
|
batch_size = options[:batch_size]
|
37
33
|
validate_batch_size batch_size
|
38
34
|
|
39
|
-
endpoint = build_endpoint(
|
40
|
-
|
35
|
+
endpoint = build_endpoint(model_id, 'classify')
|
36
|
+
|
37
|
+
if Monkeylearn.auto_batch
|
38
|
+
responses = (0...data.length).step(batch_size).collect do |start_idx|
|
39
|
+
sliced_data = { data: data[start_idx, batch_size] }
|
40
|
+
if options.key? :production_model
|
41
|
+
sliced_data[:production_model] = options[:production_model]
|
42
|
+
end
|
43
|
+
request(:post, endpoint, sliced_data)
|
44
|
+
end
|
41
45
|
|
42
|
-
|
43
|
-
|
44
|
-
|
46
|
+
return Monkeylearn::MultiResponse.new(responses)
|
47
|
+
else
|
48
|
+
body = {data: data}
|
49
|
+
if options.key? :production_model
|
50
|
+
body[:production_model] = options[:production_model]
|
51
|
+
end
|
52
|
+
return request(:post, endpoint, body)
|
45
53
|
end
|
54
|
+
end
|
46
55
|
|
47
|
-
|
56
|
+
def list(options = {})
|
57
|
+
request(:get, build_endpoint, nil, options)
|
48
58
|
end
|
49
59
|
|
50
60
|
def create(name, options = {})
|
51
61
|
data = {
|
52
62
|
name: name,
|
53
63
|
description: options[:description],
|
64
|
+
algorithm: options[:algorithm],
|
54
65
|
language: options[:language],
|
66
|
+
max_features: options[:max_features],
|
55
67
|
ngram_range: options[:ngram_range],
|
56
|
-
|
57
|
-
|
68
|
+
use_stemming: options[:use_stemming],
|
69
|
+
preprocess_numbers: options[:preprocess_numbers],
|
70
|
+
preprocess_social_media: options[:preprocess_social_media],
|
71
|
+
normalize_weights: options[:normalize_weights],
|
72
|
+
stopwords: options[:stopwords],
|
73
|
+
whitelist: options[:whitelist],
|
74
|
+
}.delete_if { |k,v| v.nil? }
|
75
|
+
request(:post, build_endpoint, data)
|
76
|
+
end
|
77
|
+
|
78
|
+
def edit(module_id, options = {})
|
79
|
+
data = {
|
80
|
+
name: options[:name],
|
81
|
+
description: options[:description],
|
82
|
+
algorithm: options[:algorithm],
|
83
|
+
language: options[:language],
|
58
84
|
max_features: options[:max_features],
|
59
|
-
|
60
|
-
|
61
|
-
|
85
|
+
ngram_range: options[:ngram_range],
|
86
|
+
use_stemming: options[:use_stemming],
|
87
|
+
preprocess_numbers: options[:preprocess_numbers],
|
88
|
+
preprocess_social_media: options[:preprocess_social_media],
|
62
89
|
normalize_weights: options[:normalize_weights],
|
63
|
-
|
64
|
-
|
65
|
-
classifier_type: options[:classifier_type],
|
66
|
-
text_type: options[:text_type],
|
67
|
-
permissions: options[:permissions]
|
90
|
+
stopwords: options[:stopwords],
|
91
|
+
whitelist: options[:whitelist],
|
68
92
|
}.delete_if { |k,v| v.nil? }
|
69
|
-
request
|
93
|
+
request(:patch, build_endpoint(module_id), data)
|
70
94
|
end
|
71
95
|
|
72
96
|
def detail(module_id)
|
73
|
-
request
|
97
|
+
request(:get, build_endpoint(module_id))
|
74
98
|
end
|
75
99
|
|
76
|
-
def
|
77
|
-
|
78
|
-
raise MonkeylearnError, "The second param must be an enumerable type (i.e. an Array)."
|
79
|
-
end
|
80
|
-
endpoint = build_endpoint(module_id, 'samples')
|
81
|
-
data = {
|
82
|
-
samples: samples_with_categories.collect do |text, category_ids|
|
83
|
-
{text: text, category_id: category_ids}
|
84
|
-
end
|
85
|
-
}
|
86
|
-
request :post, endpoint, data
|
100
|
+
def deploy(module_id)
|
101
|
+
request(:post, build_endpoint(module_id, 'deploy'))
|
87
102
|
end
|
88
103
|
|
89
|
-
def
|
90
|
-
|
91
|
-
end
|
104
|
+
def upload_data(module_id, data)
|
105
|
+
endpoint = build_endpoint(module_id, 'data')
|
92
106
|
|
93
|
-
|
94
|
-
request :post, build_endpoint(module_id, 'deploy')
|
107
|
+
request(:post, endpoint, {data: data})
|
95
108
|
end
|
96
109
|
|
97
110
|
def delete(module_id)
|
98
|
-
request
|
111
|
+
request(:delete, build_endpoint(module_id))
|
99
112
|
end
|
100
113
|
end
|
101
114
|
end
|
102
115
|
|
103
|
-
module
|
116
|
+
module Tags
|
104
117
|
class << self
|
105
118
|
include Monkeylearn::Requests
|
106
119
|
|
107
120
|
def build_endpoint(module_id, *args)
|
108
|
-
File.join('classifiers', module_id, '
|
121
|
+
File.join('classifiers', module_id, 'tags', *args.collect { |x| x.to_s }) + '/'
|
109
122
|
end
|
110
123
|
|
111
|
-
def create(module_id, name,
|
124
|
+
def create(module_id, name, options = {})
|
112
125
|
data = {
|
113
126
|
name: name,
|
114
|
-
parent_id: parent_id
|
115
127
|
}
|
116
|
-
|
128
|
+
if options[:parent_id]
|
129
|
+
data[:parent_id] = options[:parent_id]
|
130
|
+
end
|
131
|
+
request(:post, build_endpoint(module_id), data)
|
117
132
|
end
|
118
133
|
|
119
|
-
def
|
120
|
-
|
134
|
+
def detail(module_id, tag_id)
|
135
|
+
request :get, build_endpoint(module_id, tag_id)
|
136
|
+
end
|
137
|
+
|
138
|
+
def edit(module_id, tag_id, options = {})
|
139
|
+
endpoint = build_endpoint(module_id, tag_id)
|
121
140
|
data = {
|
122
|
-
name: name,
|
123
|
-
parent_id: parent_id
|
141
|
+
name: options[:name],
|
142
|
+
parent_id: options[:parent_id]
|
124
143
|
}.delete_if { |k,v| v.nil? }
|
125
144
|
request :patch, endpoint, data
|
126
145
|
end
|
127
146
|
|
128
|
-
def delete(module_id,
|
129
|
-
endpoint = build_endpoint(module_id,
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
147
|
+
def delete(module_id, tag_id, options = {})
|
148
|
+
endpoint = build_endpoint(module_id, tag_id)
|
149
|
+
|
150
|
+
data = nil
|
151
|
+
if options.key?(:move_data_to)
|
152
|
+
data = {move_data_to: options[:move_data_to]}
|
153
|
+
end
|
154
|
+
|
155
|
+
request(:delete, endpoint, data)
|
135
156
|
end
|
136
157
|
end
|
137
158
|
end
|
@@ -2,15 +2,16 @@ require 'monkeylearn/defaults'
|
|
2
2
|
|
3
3
|
module Monkeylearn
|
4
4
|
module Configurable
|
5
|
-
attr_accessor :token, :
|
6
|
-
attr_writer :
|
5
|
+
attr_accessor :token, :base_url, :retry_if_throttle, :auto_batch
|
6
|
+
attr_writer :base_url
|
7
7
|
|
8
8
|
class << self
|
9
9
|
def keys
|
10
10
|
@keys ||= [
|
11
|
-
:
|
11
|
+
:base_url,
|
12
12
|
:token,
|
13
|
-
:
|
13
|
+
:retry_if_throttle,
|
14
|
+
:auto_batch,
|
14
15
|
]
|
15
16
|
end
|
16
17
|
end
|
@@ -26,12 +27,8 @@ module Monkeylearn
|
|
26
27
|
self
|
27
28
|
end
|
28
29
|
|
29
|
-
def
|
30
|
-
@
|
31
|
-
end
|
32
|
-
|
33
|
-
def api_endpoint
|
34
|
-
File.join(@api_endpoint, "")
|
30
|
+
def base_url
|
31
|
+
File.join(@base_url, "")
|
35
32
|
end
|
36
33
|
end
|
37
34
|
end
|
data/lib/monkeylearn/defaults.rb
CHANGED
@@ -2,35 +2,35 @@ module Monkeylearn
|
|
2
2
|
module Defaults
|
3
3
|
# Constants
|
4
4
|
DEFAULT_BATCH_SIZE = 200
|
5
|
-
MAX_BATCH_SIZE =
|
6
|
-
MIN_BATCH_SIZE = 100
|
5
|
+
MAX_BATCH_SIZE = 200
|
7
6
|
# Configurable options
|
8
|
-
|
9
|
-
|
7
|
+
BASE_URL = 'https://api.monkeylearn.com/v3/'
|
8
|
+
RETRY_IF_THROTTLE = true
|
9
|
+
AUTO_BATCH = true
|
10
10
|
|
11
11
|
class << self
|
12
12
|
def options
|
13
13
|
Hash[Monkeylearn::Configurable.keys.map{|key| [key, send(key)]}]
|
14
14
|
end
|
15
15
|
|
16
|
-
def
|
17
|
-
ENV['
|
16
|
+
def base_url
|
17
|
+
ENV['MONKEYLEARN_API_BASE_URL'] || BASE_URL
|
18
18
|
end
|
19
19
|
|
20
20
|
def token
|
21
21
|
ENV['MONKEYLEARN_TOKEN'] || nil
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
25
|
-
ENV['
|
24
|
+
def retry_if_throttle
|
25
|
+
ENV['MONKEYLEARN_RETRY_IF_THROTTLE'] || RETRY_IF_THROTTLE
|
26
26
|
end
|
27
27
|
|
28
|
-
def
|
29
|
-
|
28
|
+
def auto_batch
|
29
|
+
ENV['MONKEYLEARN_AUTO_BATCH'] || AUTO_BATCH
|
30
30
|
end
|
31
31
|
|
32
|
-
def
|
33
|
-
|
32
|
+
def max_batch_size
|
33
|
+
MAX_BATCH_SIZE
|
34
34
|
end
|
35
35
|
|
36
36
|
def default_batch_size
|
@@ -1,2 +1,76 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
1
3
|
class MonkeylearnError < StandardError
|
2
4
|
end
|
5
|
+
|
6
|
+
class MonkeylearnResponseError < MonkeylearnError
|
7
|
+
attr_accessor :detail, :error_code, :status_code
|
8
|
+
|
9
|
+
def initialize(raw_response)
|
10
|
+
@response = raw_response
|
11
|
+
|
12
|
+
|
13
|
+
body = JSON.parse(raw_response.body)
|
14
|
+
@detail = body['detail']
|
15
|
+
@error_code = body['error_code']
|
16
|
+
@status_code = raw_response.status
|
17
|
+
|
18
|
+
|
19
|
+
super "#{@error_code}: #{@detail}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Request Validation Errors (422)
|
24
|
+
|
25
|
+
class RequestParamsError < MonkeylearnResponseError
|
26
|
+
end
|
27
|
+
|
28
|
+
# Authentication (401)
|
29
|
+
|
30
|
+
|
31
|
+
class AuthenticationError < MonkeylearnResponseError
|
32
|
+
end
|
33
|
+
|
34
|
+
# Forbidden (403)
|
35
|
+
|
36
|
+
class ForbiddenError < MonkeylearnResponseError
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
class ModelLimitError < ForbiddenError
|
41
|
+
end
|
42
|
+
|
43
|
+
# Not found Exceptions (404)
|
44
|
+
|
45
|
+
class ResourceNotFound < MonkeylearnResponseError
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
class ModelNotFound < ResourceNotFound
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
class TagNotFound < ResourceNotFound
|
54
|
+
end
|
55
|
+
|
56
|
+
# Rate limit (429)
|
57
|
+
|
58
|
+
class RateLimitError < MonkeylearnResponseError
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
class PlanQueryLimitError < MonkeylearnResponseError
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
class PlanRateLimitError < RateLimitError
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
class ConcurrencyRateLimitError < RateLimitError
|
71
|
+
end
|
72
|
+
|
73
|
+
# State errors < 423)
|
74
|
+
|
75
|
+
class ModuleStateError < MonkeylearnResponseError
|
76
|
+
end
|
@@ -20,26 +20,41 @@ module Monkeylearn
|
|
20
20
|
if batch_size > max_size
|
21
21
|
raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
|
22
22
|
end
|
23
|
-
min_size = Monkeylearn::Defaults.min_batch_size
|
24
|
-
if batch_size < min_size
|
25
|
-
raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
|
26
|
-
end
|
27
23
|
true
|
28
24
|
end
|
29
25
|
|
30
|
-
def extract(module_id,
|
26
|
+
def extract(module_id, data, options = {})
|
31
27
|
options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
|
32
28
|
batch_size = options[:batch_size]
|
33
29
|
validate_batch_size batch_size
|
34
30
|
|
35
31
|
endpoint = build_endpoint(module_id, 'extract')
|
36
32
|
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
if Monkeylearn.auto_batch
|
34
|
+
responses = (0...data.length).step(batch_size).collect do |start_idx|
|
35
|
+
sliced_data = {data: data.slice(start_idx, batch_size)}
|
36
|
+
if options.key? :production_model
|
37
|
+
sliced_data[:production_model] = options[:production_model]
|
38
|
+
end
|
39
|
+
request(:post, endpoint, sliced_data)
|
40
|
+
end
|
41
|
+
return Monkeylearn::MultiResponse.new(responses)
|
42
|
+
else
|
43
|
+
body = {data: data}
|
44
|
+
if options.key? :production_model
|
45
|
+
body[:production_model] = options[:production_model]
|
46
|
+
end
|
47
|
+
return request(:post, endpoint, body)
|
40
48
|
end
|
41
49
|
|
42
|
-
|
50
|
+
end
|
51
|
+
|
52
|
+
def list(options = {})
|
53
|
+
request(:get, build_endpoint, nil, options)
|
54
|
+
end
|
55
|
+
|
56
|
+
def detail(module_id)
|
57
|
+
request(:get, build_endpoint(module_id))
|
43
58
|
end
|
44
59
|
end
|
45
60
|
end
|
data/lib/monkeylearn/requests.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'faraday'
|
2
2
|
require 'json'
|
3
3
|
require 'monkeylearn/response'
|
4
|
+
require 'monkeylearn/exceptions'
|
4
5
|
|
5
6
|
module Monkeylearn
|
6
7
|
module Requests
|
@@ -9,36 +10,98 @@ module Monkeylearn
|
|
9
10
|
raise MonkeylearnError, 'Please initialize the Monkeylearn library with your API token'
|
10
11
|
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
while true
|
14
|
+
response = get_connection.send(method) do |req|
|
15
|
+
url = path.to_s
|
16
|
+
if query_params
|
17
|
+
url += '?' + URI.encode_www_form(query_params)
|
18
|
+
end
|
19
|
+
req.url url
|
20
|
+
req.headers['Authorization'] = 'Token ' + Monkeylearn.token
|
21
|
+
req.headers['Content-Type'] = 'application/json'
|
22
|
+
req.headers['User-Agent'] = 'ruby-sdk'
|
23
|
+
if data
|
24
|
+
req.body = data.to_json
|
25
|
+
end
|
16
26
|
end
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
27
|
+
|
28
|
+
seconds = throttled?(response)
|
29
|
+
if seconds && Monkeylearn.retry_if_throttle
|
30
|
+
sleep seconds
|
31
|
+
else
|
32
|
+
break
|
23
33
|
end
|
24
34
|
end
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
response = request(method, path, data)
|
35
|
+
|
36
|
+
if response.status != 200
|
37
|
+
raise_for_status(response)
|
29
38
|
end
|
39
|
+
|
30
40
|
Monkeylearn::Response.new(response)
|
31
41
|
end
|
32
42
|
|
43
|
+
def raise_for_status(raw_response)
|
44
|
+
body = JSON.parse(raw_response.body)
|
45
|
+
error_code = body.fetch("error_code", nil)
|
46
|
+
raise get_exception_class(raw_response.status, error_code).new(raw_response)
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_exception_class(status_code, error_code)
|
50
|
+
case status_code
|
51
|
+
when 422
|
52
|
+
return RequestParamsError
|
53
|
+
when 401
|
54
|
+
return AuthenticationError
|
55
|
+
when 403
|
56
|
+
case error_code
|
57
|
+
when 'MODEL_LIMIT'
|
58
|
+
return ModelLimitError
|
59
|
+
else
|
60
|
+
return ForbiddenError
|
61
|
+
end
|
62
|
+
when 404
|
63
|
+
case error_code
|
64
|
+
when 'MODEL_NOT_FOUND'
|
65
|
+
return ModelNotFound
|
66
|
+
when 'TAG_NOT_FOUND'
|
67
|
+
return TagNotFound
|
68
|
+
else
|
69
|
+
return ResourceNotFound
|
70
|
+
end
|
71
|
+
when 429
|
72
|
+
case error_code
|
73
|
+
when 'PLAN_RATE_LIMIT'
|
74
|
+
return PlanRateLimitError
|
75
|
+
when 'CONCURRENCY_RATE_LIMIT'
|
76
|
+
return ConcurrencyRateLimitError
|
77
|
+
when 'PLAN_QUERY_LIMIT'
|
78
|
+
return PlanQueryLimitError
|
79
|
+
else
|
80
|
+
return RateLimitError
|
81
|
+
end
|
82
|
+
when 423
|
83
|
+
return ModuleStateError
|
84
|
+
else
|
85
|
+
return MonkeylearnResponseError
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
33
89
|
def throttled?(response)
|
34
|
-
return false
|
35
|
-
|
36
|
-
|
37
|
-
|
90
|
+
return false unless response.status == 429
|
91
|
+
body = JSON.parse(response.body)
|
92
|
+
|
93
|
+
case body['error_code']
|
94
|
+
when 'CONCURRENCY_RATE_LIMIT'
|
95
|
+
seconds = 2
|
96
|
+
when 'PLAN_RATE_LIMIT'
|
97
|
+
match = /([\d]+) seconds/.match(body['detail'])
|
98
|
+
seconds = if match then match[1].to_i else 60 end
|
99
|
+
end
|
100
|
+
seconds
|
38
101
|
end
|
39
102
|
|
40
103
|
def get_connection
|
41
|
-
@conn ||= Faraday.new(url: Monkeylearn.
|
104
|
+
@conn ||= Faraday.new(url: Monkeylearn.base_url) do |faraday|
|
42
105
|
faraday.adapter Faraday.default_adapter # Net::HTTP
|
43
106
|
end
|
44
107
|
end
|