monkeylearn 0.2.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +535 -65
- data/lib/monkeylearn.rb +0 -1
- data/lib/monkeylearn/classifiers.rb +79 -58
- data/lib/monkeylearn/configurable.rb +7 -10
- data/lib/monkeylearn/defaults.rb +12 -12
- data/lib/monkeylearn/exceptions.rb +74 -0
- data/lib/monkeylearn/extractors.rb +24 -9
- data/lib/monkeylearn/requests.rb +82 -19
- data/lib/monkeylearn/response.rb +20 -7
- data/monkeylearn.gemspec +3 -2
- metadata +11 -12
- data/lib/monkeylearn/pipelines.rb +0 -28
data/lib/monkeylearn.rb
CHANGED
@@ -11,8 +11,8 @@ module Monkeylearn
|
|
11
11
|
class << self
|
12
12
|
include Monkeylearn::Requests
|
13
13
|
|
14
|
-
def
|
15
|
-
return
|
14
|
+
def tags
|
15
|
+
return Tags
|
16
16
|
end
|
17
17
|
|
18
18
|
def build_endpoint(*args)
|
@@ -24,114 +24,135 @@ module Monkeylearn
|
|
24
24
|
if batch_size > max_size
|
25
25
|
raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
|
26
26
|
end
|
27
|
-
min_size = Monkeylearn::Defaults.min_batch_size
|
28
|
-
if batch_size < min_size
|
29
|
-
raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
|
30
|
-
end
|
31
27
|
true
|
32
28
|
end
|
33
29
|
|
34
|
-
def classify(
|
30
|
+
def classify(model_id, data, options = {})
|
35
31
|
options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
|
36
32
|
batch_size = options[:batch_size]
|
37
33
|
validate_batch_size batch_size
|
38
34
|
|
39
|
-
endpoint = build_endpoint(
|
40
|
-
|
35
|
+
endpoint = build_endpoint(model_id, 'classify')
|
36
|
+
|
37
|
+
if Monkeylearn.auto_batch
|
38
|
+
responses = (0...data.length).step(batch_size).collect do |start_idx|
|
39
|
+
sliced_data = { data: data[start_idx, batch_size] }
|
40
|
+
if options.key? :production_model
|
41
|
+
sliced_data[:production_model] = options[:production_model]
|
42
|
+
end
|
43
|
+
request(:post, endpoint, sliced_data)
|
44
|
+
end
|
41
45
|
|
42
|
-
|
43
|
-
|
44
|
-
|
46
|
+
return Monkeylearn::MultiResponse.new(responses)
|
47
|
+
else
|
48
|
+
body = {data: data}
|
49
|
+
if options.key? :production_model
|
50
|
+
body[:production_model] = options[:production_model]
|
51
|
+
end
|
52
|
+
return request(:post, endpoint, body)
|
45
53
|
end
|
54
|
+
end
|
46
55
|
|
47
|
-
|
56
|
+
def list(options = {})
|
57
|
+
request(:get, build_endpoint, nil, options)
|
48
58
|
end
|
49
59
|
|
50
60
|
def create(name, options = {})
|
51
61
|
data = {
|
52
62
|
name: name,
|
53
63
|
description: options[:description],
|
64
|
+
algorithm: options[:algorithm],
|
54
65
|
language: options[:language],
|
66
|
+
max_features: options[:max_features],
|
55
67
|
ngram_range: options[:ngram_range],
|
56
|
-
|
57
|
-
|
68
|
+
use_stemming: options[:use_stemming],
|
69
|
+
preprocess_numbers: options[:preprocess_numbers],
|
70
|
+
preprocess_social_media: options[:preprocess_social_media],
|
71
|
+
normalize_weights: options[:normalize_weights],
|
72
|
+
stopwords: options[:stopwords],
|
73
|
+
whitelist: options[:whitelist],
|
74
|
+
}.delete_if { |k,v| v.nil? }
|
75
|
+
request(:post, build_endpoint, data)
|
76
|
+
end
|
77
|
+
|
78
|
+
def edit(module_id, options = {})
|
79
|
+
data = {
|
80
|
+
name: options[:name],
|
81
|
+
description: options[:description],
|
82
|
+
algorithm: options[:algorithm],
|
83
|
+
language: options[:language],
|
58
84
|
max_features: options[:max_features],
|
59
|
-
|
60
|
-
|
61
|
-
|
85
|
+
ngram_range: options[:ngram_range],
|
86
|
+
use_stemming: options[:use_stemming],
|
87
|
+
preprocess_numbers: options[:preprocess_numbers],
|
88
|
+
preprocess_social_media: options[:preprocess_social_media],
|
62
89
|
normalize_weights: options[:normalize_weights],
|
63
|
-
|
64
|
-
|
65
|
-
classifier_type: options[:classifier_type],
|
66
|
-
text_type: options[:text_type],
|
67
|
-
permissions: options[:permissions]
|
90
|
+
stopwords: options[:stopwords],
|
91
|
+
whitelist: options[:whitelist],
|
68
92
|
}.delete_if { |k,v| v.nil? }
|
69
|
-
request
|
93
|
+
request(:patch, build_endpoint(module_id), data)
|
70
94
|
end
|
71
95
|
|
72
96
|
def detail(module_id)
|
73
|
-
request
|
97
|
+
request(:get, build_endpoint(module_id))
|
74
98
|
end
|
75
99
|
|
76
|
-
def
|
77
|
-
|
78
|
-
raise MonkeylearnError, "The second param must be an enumerable type (i.e. an Array)."
|
79
|
-
end
|
80
|
-
endpoint = build_endpoint(module_id, 'samples')
|
81
|
-
data = {
|
82
|
-
samples: samples_with_categories.collect do |text, category_ids|
|
83
|
-
{text: text, category_id: category_ids}
|
84
|
-
end
|
85
|
-
}
|
86
|
-
request :post, endpoint, data
|
100
|
+
def deploy(module_id)
|
101
|
+
request(:post, build_endpoint(module_id, 'deploy'))
|
87
102
|
end
|
88
103
|
|
89
|
-
def
|
90
|
-
|
91
|
-
end
|
104
|
+
def upload_data(module_id, data)
|
105
|
+
endpoint = build_endpoint(module_id, 'data')
|
92
106
|
|
93
|
-
|
94
|
-
request :post, build_endpoint(module_id, 'deploy')
|
107
|
+
request(:post, endpoint, {data: data})
|
95
108
|
end
|
96
109
|
|
97
110
|
def delete(module_id)
|
98
|
-
request
|
111
|
+
request(:delete, build_endpoint(module_id))
|
99
112
|
end
|
100
113
|
end
|
101
114
|
end
|
102
115
|
|
103
|
-
module
|
116
|
+
module Tags
|
104
117
|
class << self
|
105
118
|
include Monkeylearn::Requests
|
106
119
|
|
107
120
|
def build_endpoint(module_id, *args)
|
108
|
-
File.join('classifiers', module_id, '
|
121
|
+
File.join('classifiers', module_id, 'tags', *args.collect { |x| x.to_s }) + '/'
|
109
122
|
end
|
110
123
|
|
111
|
-
def create(module_id, name,
|
124
|
+
def create(module_id, name, options = {})
|
112
125
|
data = {
|
113
126
|
name: name,
|
114
|
-
parent_id: parent_id
|
115
127
|
}
|
116
|
-
|
128
|
+
if options[:parent_id]
|
129
|
+
data[:parent_id] = options[:parent_id]
|
130
|
+
end
|
131
|
+
request(:post, build_endpoint(module_id), data)
|
117
132
|
end
|
118
133
|
|
119
|
-
def
|
120
|
-
|
134
|
+
def detail(module_id, tag_id)
|
135
|
+
request :get, build_endpoint(module_id, tag_id)
|
136
|
+
end
|
137
|
+
|
138
|
+
def edit(module_id, tag_id, options = {})
|
139
|
+
endpoint = build_endpoint(module_id, tag_id)
|
121
140
|
data = {
|
122
|
-
name: name,
|
123
|
-
parent_id: parent_id
|
141
|
+
name: options[:name],
|
142
|
+
parent_id: options[:parent_id]
|
124
143
|
}.delete_if { |k,v| v.nil? }
|
125
144
|
request :patch, endpoint, data
|
126
145
|
end
|
127
146
|
|
128
|
-
def delete(module_id,
|
129
|
-
endpoint = build_endpoint(module_id,
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
147
|
+
def delete(module_id, tag_id, options = {})
|
148
|
+
endpoint = build_endpoint(module_id, tag_id)
|
149
|
+
|
150
|
+
data = nil
|
151
|
+
if options.key?(:move_data_to)
|
152
|
+
data = {move_data_to: options[:move_data_to]}
|
153
|
+
end
|
154
|
+
|
155
|
+
request(:delete, endpoint, data)
|
135
156
|
end
|
136
157
|
end
|
137
158
|
end
|
@@ -2,15 +2,16 @@ require 'monkeylearn/defaults'
|
|
2
2
|
|
3
3
|
module Monkeylearn
|
4
4
|
module Configurable
|
5
|
-
attr_accessor :token, :
|
6
|
-
attr_writer :
|
5
|
+
attr_accessor :token, :base_url, :retry_if_throttle, :auto_batch
|
6
|
+
attr_writer :base_url
|
7
7
|
|
8
8
|
class << self
|
9
9
|
def keys
|
10
10
|
@keys ||= [
|
11
|
-
:
|
11
|
+
:base_url,
|
12
12
|
:token,
|
13
|
-
:
|
13
|
+
:retry_if_throttle,
|
14
|
+
:auto_batch,
|
14
15
|
]
|
15
16
|
end
|
16
17
|
end
|
@@ -26,12 +27,8 @@ module Monkeylearn
|
|
26
27
|
self
|
27
28
|
end
|
28
29
|
|
29
|
-
def
|
30
|
-
@
|
31
|
-
end
|
32
|
-
|
33
|
-
def api_endpoint
|
34
|
-
File.join(@api_endpoint, "")
|
30
|
+
def base_url
|
31
|
+
File.join(@base_url, "")
|
35
32
|
end
|
36
33
|
end
|
37
34
|
end
|
data/lib/monkeylearn/defaults.rb
CHANGED
@@ -2,35 +2,35 @@ module Monkeylearn
|
|
2
2
|
module Defaults
|
3
3
|
# Constants
|
4
4
|
DEFAULT_BATCH_SIZE = 200
|
5
|
-
MAX_BATCH_SIZE =
|
6
|
-
MIN_BATCH_SIZE = 100
|
5
|
+
MAX_BATCH_SIZE = 200
|
7
6
|
# Configurable options
|
8
|
-
|
9
|
-
|
7
|
+
BASE_URL = 'https://api.monkeylearn.com/v3/'
|
8
|
+
RETRY_IF_THROTTLE = true
|
9
|
+
AUTO_BATCH = true
|
10
10
|
|
11
11
|
class << self
|
12
12
|
def options
|
13
13
|
Hash[Monkeylearn::Configurable.keys.map{|key| [key, send(key)]}]
|
14
14
|
end
|
15
15
|
|
16
|
-
def
|
17
|
-
ENV['
|
16
|
+
def base_url
|
17
|
+
ENV['MONKEYLEARN_API_BASE_URL'] || BASE_URL
|
18
18
|
end
|
19
19
|
|
20
20
|
def token
|
21
21
|
ENV['MONKEYLEARN_TOKEN'] || nil
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
25
|
-
ENV['
|
24
|
+
def retry_if_throttle
|
25
|
+
ENV['MONKEYLEARN_RETRY_IF_THROTTLE'] || RETRY_IF_THROTTLE
|
26
26
|
end
|
27
27
|
|
28
|
-
def
|
29
|
-
|
28
|
+
def auto_batch
|
29
|
+
ENV['MONKEYLEARN_AUTO_BATCH'] || AUTO_BATCH
|
30
30
|
end
|
31
31
|
|
32
|
-
def
|
33
|
-
|
32
|
+
def max_batch_size
|
33
|
+
MAX_BATCH_SIZE
|
34
34
|
end
|
35
35
|
|
36
36
|
def default_batch_size
|
@@ -1,2 +1,76 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
1
3
|
class MonkeylearnError < StandardError
|
2
4
|
end
|
5
|
+
|
6
|
+
class MonkeylearnResponseError < MonkeylearnError
|
7
|
+
attr_accessor :detail, :error_code, :status_code
|
8
|
+
|
9
|
+
def initialize(raw_response)
|
10
|
+
@response = raw_response
|
11
|
+
|
12
|
+
|
13
|
+
body = JSON.parse(raw_response.body)
|
14
|
+
@detail = body['detail']
|
15
|
+
@error_code = body['error_code']
|
16
|
+
@status_code = raw_response.status
|
17
|
+
|
18
|
+
|
19
|
+
super "#{@error_code}: #{@detail}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Request Validation Errors (422)
|
24
|
+
|
25
|
+
class RequestParamsError < MonkeylearnResponseError
|
26
|
+
end
|
27
|
+
|
28
|
+
# Authentication (401)
|
29
|
+
|
30
|
+
|
31
|
+
class AuthenticationError < MonkeylearnResponseError
|
32
|
+
end
|
33
|
+
|
34
|
+
# Forbidden (403)
|
35
|
+
|
36
|
+
class ForbiddenError < MonkeylearnResponseError
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
class ModelLimitError < ForbiddenError
|
41
|
+
end
|
42
|
+
|
43
|
+
# Not found Exceptions (404)
|
44
|
+
|
45
|
+
class ResourceNotFound < MonkeylearnResponseError
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
class ModelNotFound < ResourceNotFound
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
class TagNotFound < ResourceNotFound
|
54
|
+
end
|
55
|
+
|
56
|
+
# Rate limit (429)
|
57
|
+
|
58
|
+
class RateLimitError < MonkeylearnResponseError
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
class PlanQueryLimitError < MonkeylearnResponseError
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
class PlanRateLimitError < RateLimitError
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
class ConcurrencyRateLimitError < RateLimitError
|
71
|
+
end
|
72
|
+
|
73
|
+
# State errors < 423)
|
74
|
+
|
75
|
+
class ModuleStateError < MonkeylearnResponseError
|
76
|
+
end
|
@@ -20,26 +20,41 @@ module Monkeylearn
|
|
20
20
|
if batch_size > max_size
|
21
21
|
raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
|
22
22
|
end
|
23
|
-
min_size = Monkeylearn::Defaults.min_batch_size
|
24
|
-
if batch_size < min_size
|
25
|
-
raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
|
26
|
-
end
|
27
23
|
true
|
28
24
|
end
|
29
25
|
|
30
|
-
def extract(module_id,
|
26
|
+
def extract(module_id, data, options = {})
|
31
27
|
options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
|
32
28
|
batch_size = options[:batch_size]
|
33
29
|
validate_batch_size batch_size
|
34
30
|
|
35
31
|
endpoint = build_endpoint(module_id, 'extract')
|
36
32
|
|
37
|
-
|
38
|
-
|
39
|
-
|
33
|
+
if Monkeylearn.auto_batch
|
34
|
+
responses = (0...data.length).step(batch_size).collect do |start_idx|
|
35
|
+
sliced_data = {data: data.slice(start_idx, batch_size)}
|
36
|
+
if options.key? :production_model
|
37
|
+
sliced_data[:production_model] = options[:production_model]
|
38
|
+
end
|
39
|
+
request(:post, endpoint, sliced_data)
|
40
|
+
end
|
41
|
+
return Monkeylearn::MultiResponse.new(responses)
|
42
|
+
else
|
43
|
+
body = {data: data}
|
44
|
+
if options.key? :production_model
|
45
|
+
body[:production_model] = options[:production_model]
|
46
|
+
end
|
47
|
+
return request(:post, endpoint, body)
|
40
48
|
end
|
41
49
|
|
42
|
-
|
50
|
+
end
|
51
|
+
|
52
|
+
def list(options = {})
|
53
|
+
request(:get, build_endpoint, nil, options)
|
54
|
+
end
|
55
|
+
|
56
|
+
def detail(module_id)
|
57
|
+
request(:get, build_endpoint(module_id))
|
43
58
|
end
|
44
59
|
end
|
45
60
|
end
|
data/lib/monkeylearn/requests.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'faraday'
|
2
2
|
require 'json'
|
3
3
|
require 'monkeylearn/response'
|
4
|
+
require 'monkeylearn/exceptions'
|
4
5
|
|
5
6
|
module Monkeylearn
|
6
7
|
module Requests
|
@@ -9,36 +10,98 @@ module Monkeylearn
|
|
9
10
|
raise MonkeylearnError, 'Please initialize the Monkeylearn library with your API token'
|
10
11
|
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
while true
|
14
|
+
response = get_connection.send(method) do |req|
|
15
|
+
url = path.to_s
|
16
|
+
if query_params
|
17
|
+
url += '?' + URI.encode_www_form(query_params)
|
18
|
+
end
|
19
|
+
req.url url
|
20
|
+
req.headers['Authorization'] = 'Token ' + Monkeylearn.token
|
21
|
+
req.headers['Content-Type'] = 'application/json'
|
22
|
+
req.headers['User-Agent'] = 'ruby-sdk'
|
23
|
+
if data
|
24
|
+
req.body = data.to_json
|
25
|
+
end
|
16
26
|
end
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
27
|
+
|
28
|
+
seconds = throttled?(response)
|
29
|
+
if seconds && Monkeylearn.retry_if_throttle
|
30
|
+
sleep seconds
|
31
|
+
else
|
32
|
+
break
|
23
33
|
end
|
24
34
|
end
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
response = request(method, path, data)
|
35
|
+
|
36
|
+
if response.status != 200
|
37
|
+
raise_for_status(response)
|
29
38
|
end
|
39
|
+
|
30
40
|
Monkeylearn::Response.new(response)
|
31
41
|
end
|
32
42
|
|
43
|
+
def raise_for_status(raw_response)
|
44
|
+
body = JSON.parse(raw_response.body)
|
45
|
+
error_code = body.fetch("error_code", nil)
|
46
|
+
raise get_exception_class(raw_response.status, error_code).new(raw_response)
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_exception_class(status_code, error_code)
|
50
|
+
case status_code
|
51
|
+
when 422
|
52
|
+
return RequestParamsError
|
53
|
+
when 401
|
54
|
+
return AuthenticationError
|
55
|
+
when 403
|
56
|
+
case error_code
|
57
|
+
when 'MODEL_LIMIT'
|
58
|
+
return ModelLimitError
|
59
|
+
else
|
60
|
+
return ForbiddenError
|
61
|
+
end
|
62
|
+
when 404
|
63
|
+
case error_code
|
64
|
+
when 'MODEL_NOT_FOUND'
|
65
|
+
return ModelNotFound
|
66
|
+
when 'TAG_NOT_FOUND'
|
67
|
+
return TagNotFound
|
68
|
+
else
|
69
|
+
return ResourceNotFound
|
70
|
+
end
|
71
|
+
when 429
|
72
|
+
case error_code
|
73
|
+
when 'PLAN_RATE_LIMIT'
|
74
|
+
return PlanRateLimitError
|
75
|
+
when 'CONCURRENCY_RATE_LIMIT'
|
76
|
+
return ConcurrencyRateLimitError
|
77
|
+
when 'PLAN_QUERY_LIMIT'
|
78
|
+
return PlanQueryLimitError
|
79
|
+
else
|
80
|
+
return RateLimitError
|
81
|
+
end
|
82
|
+
when 423
|
83
|
+
return ModuleStateError
|
84
|
+
else
|
85
|
+
return MonkeylearnResponseError
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
33
89
|
def throttled?(response)
|
34
|
-
return false
|
35
|
-
|
36
|
-
|
37
|
-
|
90
|
+
return false unless response.status == 429
|
91
|
+
body = JSON.parse(response.body)
|
92
|
+
|
93
|
+
case body['error_code']
|
94
|
+
when 'CONCURRENCY_RATE_LIMIT'
|
95
|
+
seconds = 2
|
96
|
+
when 'PLAN_RATE_LIMIT'
|
97
|
+
match = /([\d]+) seconds/.match(body['detail'])
|
98
|
+
seconds = if match then match[1].to_i else 60 end
|
99
|
+
end
|
100
|
+
seconds
|
38
101
|
end
|
39
102
|
|
40
103
|
def get_connection
|
41
|
-
@conn ||= Faraday.new(url: Monkeylearn.
|
104
|
+
@conn ||= Faraday.new(url: Monkeylearn.base_url) do |faraday|
|
42
105
|
faraday.adapter Faraday.default_adapter # Net::HTTP
|
43
106
|
end
|
44
107
|
end
|