RubyGems - monkeylearn - Versions diffs - 0.2.2 → 3.0.0 - Mend

monkeylearn 0.2.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/README.md +535 -65
data/lib/monkeylearn.rb +0 -1
data/lib/monkeylearn/classifiers.rb +79 -58
data/lib/monkeylearn/configurable.rb +7 -10
data/lib/monkeylearn/defaults.rb +12 -12
data/lib/monkeylearn/exceptions.rb +74 -0
data/lib/monkeylearn/extractors.rb +24 -9
data/lib/monkeylearn/requests.rb +82 -19
data/lib/monkeylearn/response.rb +20 -7
data/monkeylearn.gemspec +3 -2
metadata +11 -12
data/lib/monkeylearn/pipelines.rb +0 -28

data/lib/monkeylearn.rb CHANGED

@@ -2,7 +2,6 @@ require 'monkeylearn/configurable'
 require 'monkeylearn/exceptions'
 require 'monkeylearn/classifiers'
 require 'monkeylearn/extractors'
-require 'monkeylearn/pipelines'
 module Monkeylearn

data/lib/monkeylearn/classifiers.rb CHANGED

@@ -11,8 +11,8 @@ module Monkeylearn
     class << self
       include Monkeylearn::Requests
-      def categories
-        return Categories
+      def tags
+        return Tags
       end
       def build_endpoint(*args)
@@ -24,114 +24,135 @@ module Monkeylearn
         if batch_size >  max_size
           raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
         end
-        min_size = Monkeylearn::Defaults.min_batch_size
-        if batch_size <  min_size
-          raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
-        end
         true
       end
-      def classify(module_id, texts, options = {})
+      def classify(model_id, data, options = {})
         options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
         batch_size = options[:batch_size]
         validate_batch_size batch_size
-        endpoint = build_endpoint(module_id, 'classify')
-        query_params = { sandbox: true } if options[:sandbox]
+        endpoint = build_endpoint(model_id, 'classify')
+        if Monkeylearn.auto_batch
+          responses = (0...data.length).step(batch_size).collect do |start_idx|
+            sliced_data = { data: data[start_idx, batch_size] }
+            if options.key? :production_model
+              sliced_data[:production_model] = options[:production_model]
+            end
+            request(:post, endpoint, sliced_data)
+          end
-        responses = (0...texts.length).step(batch_size).collect do |start_idx|
-          data = { text_list: texts.slice(start_idx, batch_size) }
-          response = request :post, endpoint, data, query_params
+          return Monkeylearn::MultiResponse.new(responses)
+        else
+          body = {data: data}
+          if options.key? :production_model
+              body[:production_model] = options[:production_model]
+          end
+          return request(:post, endpoint, body)
         end
+      end
-        Monkeylearn::MultiResponse.new(responses)
+      def list(options = {})
+        request(:get, build_endpoint, nil, options)
       end
       def create(name, options = {})
         data = {
             name: name,
             description: options[:description],
+            algorithm: options[:algorithm],
             language: options[:language],
+            max_features: options[:max_features],
             ngram_range: options[:ngram_range],
-            use_stemmer: options[:use_stemmer],
-            stop_words: options[:stop_words],
+            use_stemming: options[:use_stemming],
+            preprocess_numbers: options[:preprocess_numbers],
+            preprocess_social_media: options[:preprocess_social_media],
+            normalize_weights: options[:normalize_weights],
+            stopwords: options[:stopwords],
+            whitelist: options[:whitelist],
+        }.delete_if { |k,v| v.nil? }
+        request(:post, build_endpoint, data)
+      end
+      def edit(module_id, options = {})
+        data = {
+            name: options[:name],
+            description: options[:description],
+            algorithm: options[:algorithm],
+            language: options[:language],
             max_features: options[:max_features],
-            strip_stopwords: options[:strip_stopwords],
-            is_multilabel: options[:is_multilabel],
-            is_twitter_data: options[:is_twitter_data],
+            ngram_range: options[:ngram_range],
+            use_stemming: options[:use_stemming],
+            preprocess_numbers: options[:preprocess_numbers],
+            preprocess_social_media: options[:preprocess_social_media],
             normalize_weights: options[:normalize_weights],
-            classifier: options[:classifier],
-            industry: options[:industry],
-            classifier_type: options[:classifier_type],
-            text_type: options[:text_type],
-            permissions: options[:permissions]
+            stopwords: options[:stopwords],
+            whitelist: options[:whitelist],
         }.delete_if { |k,v| v.nil? }
-        request :post, build_endpoint, data
+        request(:patch, build_endpoint(module_id), data)
       end
       def detail(module_id)
-        request :get, build_endpoint(module_id)
+        request(:get, build_endpoint(module_id))
       end
-      def upload_samples(module_id, samples_with_categories)
-        unless samples_with_categories.respond_to? :each
-          raise MonkeylearnError, "The second param must be an enumerable type (i.e. an Array)."
-        end
-        endpoint = build_endpoint(module_id, 'samples')
-        data = {
-          samples: samples_with_categories.collect do |text, category_ids|
-            {text: text, category_id: category_ids}
-          end
-        }
-        request :post, endpoint, data
+      def deploy(module_id)
+        request(:post, build_endpoint(module_id, 'deploy'))
       end
-      def train(module_id)
-        request :post, build_endpoint(module_id, 'train')
-      end
+      def upload_data(module_id, data)
+        endpoint = build_endpoint(module_id, 'data')
-      def deploy(module_id)
-        request :post, build_endpoint(module_id, 'deploy')
+        request(:post, endpoint, {data: data})
       end
       def delete(module_id)
-        request :delete, build_endpoint(module_id)
+        request(:delete, build_endpoint(module_id))
       end
     end
   end
-  module Categories
+  module Tags
     class << self
       include Monkeylearn::Requests
       def build_endpoint(module_id, *args)
-        File.join('classifiers', module_id, 'categories', *args.collect { |x| x.to_s }) + '/'
+        File.join('classifiers', module_id, 'tags', *args.collect { |x| x.to_s }) + '/'
       end
-      def create(module_id, name, parent_id)
+      def create(module_id, name, options = {})
         data = {
           name: name,
-          parent_id: parent_id
         }
-        request :post, build_endpoint(module_id), data
+        if options[:parent_id]
+          data[:parent_id] = options[:parent_id]
+        end
+        request(:post, build_endpoint(module_id), data)
       end
-      def edit(module_id, category_id, name = nil, parent_id = nil)
-        endpoint = build_endpoint(module_id, category_id)
+      def detail(module_id, tag_id)
+        request :get, build_endpoint(module_id, tag_id)
+      end
+      def edit(module_id, tag_id, options = {})
+        endpoint = build_endpoint(module_id, tag_id)
         data = {
-          name: name,
-          parent_id: parent_id
+          name: options[:name],
+          parent_id: options[:parent_id]
         }.delete_if { |k,v| v.nil? }
         request :patch, endpoint, data
       end
-      def delete(module_id, category_id, samples_strategy = nil, samples_category_id = nil)
-        endpoint = build_endpoint(module_id, category_id)
-        data = {
-          'samples-strategy'.to_s => samples_strategy,
-          'samples-category-id'.to_s => samples_category_id
-        }.delete_if { |k,v| v.nil? }
-        request :delete, endpoint, data
+      def delete(module_id, tag_id, options = {})
+        endpoint = build_endpoint(module_id, tag_id)
+        data = nil
+        if options.key?(:move_data_to)
+          data = {move_data_to: options[:move_data_to]}
+        end
+        request(:delete, endpoint, data)
       end
     end
   end

data/lib/monkeylearn/configurable.rb CHANGED

@@ -2,15 +2,16 @@ require 'monkeylearn/defaults'
 module Monkeylearn
   module Configurable
-    attr_accessor :token, :api_endpoint
-    attr_writer :api_endpoint
+    attr_accessor :token, :base_url, :retry_if_throttle, :auto_batch
+    attr_writer :base_url
     class << self
       def keys
         @keys ||= [
-          :api_endpoint,
+          :base_url,
           :token,
-          :wait_on_throttle
+          :retry_if_throttle,
+          :auto_batch,
         ]
       end
     end
@@ -26,12 +27,8 @@ module Monkeylearn
       self
     end
-    def wait_on_throttle
-      @wait_on_throttle
-    end
-    def api_endpoint
-      File.join(@api_endpoint, "")
+    def base_url
+      File.join(@base_url, "")
     end
   end
 end

data/lib/monkeylearn/defaults.rb CHANGED

@@ -2,35 +2,35 @@ module Monkeylearn
   module Defaults
     # Constants
     DEFAULT_BATCH_SIZE = 200
-    MAX_BATCH_SIZE = 500
-    MIN_BATCH_SIZE = 100
+    MAX_BATCH_SIZE = 200
     # Configurable options
-    API_ENDPOINT = 'https://api.monkeylearn.com/v2/'
-    WAIT_ON_THROTTLE = true
+    BASE_URL = 'https://api.monkeylearn.com/v3/'
+    RETRY_IF_THROTTLE = true
+    AUTO_BATCH = true
     class << self
       def options
         Hash[Monkeylearn::Configurable.keys.map{|key| [key, send(key)]}]
       end
-      def api_endpoint
-        ENV['MONKEYLEARN_API_ENDPOINT'] || API_ENDPOINT
+      def base_url
+        ENV['MONKEYLEARN_API_BASE_URL'] || BASE_URL
       end
       def token
         ENV['MONKEYLEARN_TOKEN'] || nil
       end
-      def wait_on_throttle
-        ENV['MONKEYLEARN_WAIT_ON_THROTTLE'] || WAIT_ON_THROTTLE
+      def retry_if_throttle
+        ENV['MONKEYLEARN_RETRY_IF_THROTTLE'] || RETRY_IF_THROTTLE
       end
-      def max_batch_size
-        MAX_BATCH_SIZE
+      def auto_batch
+        ENV['MONKEYLEARN_AUTO_BATCH'] || AUTO_BATCH
       end
-      def min_batch_size
-        MIN_BATCH_SIZE
+      def max_batch_size
+        MAX_BATCH_SIZE
       end
       def default_batch_size

data/lib/monkeylearn/exceptions.rb CHANGED

@@ -1,2 +1,76 @@
+require 'json'
 class MonkeylearnError < StandardError
 end
+class MonkeylearnResponseError < MonkeylearnError
+  attr_accessor :detail, :error_code, :status_code
+  def initialize(raw_response)
+    @response = raw_response
+    body = JSON.parse(raw_response.body)
+    @detail = body['detail']
+    @error_code = body['error_code']
+    @status_code = raw_response.status
+    super "#{@error_code}: #{@detail}"
+  end
+end
+# Request Validation Errors (422)
+class RequestParamsError < MonkeylearnResponseError
+end
+# Authentication (401)
+class AuthenticationError < MonkeylearnResponseError
+end
+# Forbidden  (403)
+class ForbiddenError < MonkeylearnResponseError
+end
+class ModelLimitError < ForbiddenError
+end
+# Not found Exceptions (404)
+class ResourceNotFound < MonkeylearnResponseError
+end
+class ModelNotFound < ResourceNotFound
+end
+class TagNotFound < ResourceNotFound
+end
+# Rate limit  (429)
+class RateLimitError < MonkeylearnResponseError
+end
+class PlanQueryLimitError < MonkeylearnResponseError
+end
+class PlanRateLimitError < RateLimitError
+end
+class ConcurrencyRateLimitError < RateLimitError
+end
+# State errors  < 423)
+class ModuleStateError < MonkeylearnResponseError
+end

data/lib/monkeylearn/extractors.rb CHANGED

@@ -20,26 +20,41 @@ module Monkeylearn
         if batch_size >  max_size
           raise MonkeylearnError, "The param batch_size is too big, max value is #{max_size}."
         end
-        min_size = Monkeylearn::Defaults.min_batch_size
-        if batch_size <  min_size
-          raise MonkeylearnError, "The param batch_size is too small, min value is #{min_size}."
-        end
         true
       end
-      def extract(module_id, texts, options = {})
+      def extract(module_id, data, options = {})
         options[:batch_size] ||= Monkeylearn::Defaults.default_batch_size
         batch_size = options[:batch_size]
         validate_batch_size batch_size
         endpoint = build_endpoint(module_id, 'extract')
-        responses = (0...texts.length).step(batch_size).collect do |start_idx|
-          data = { text_list: texts.slice(start_idx, batch_size) }
-          response = request :post, endpoint, data
+        if Monkeylearn.auto_batch
+          responses = (0...data.length).step(batch_size).collect do |start_idx|
+            sliced_data = {data: data.slice(start_idx, batch_size)}
+            if options.key? :production_model
+              sliced_data[:production_model] = options[:production_model]
+            end
+            request(:post, endpoint, sliced_data)
+          end
+          return Monkeylearn::MultiResponse.new(responses)
+        else
+          body = {data: data}
+          if options.key? :production_model
+              body[:production_model] = options[:production_model]
+          end
+          return request(:post, endpoint, body)
         end
-        Monkeylearn::MultiResponse.new(responses)
+      end
+      def list(options = {})
+        request(:get, build_endpoint, nil, options)
+      end
+      def detail(module_id)
+        request(:get, build_endpoint(module_id))
       end
     end
   end

data/lib/monkeylearn/requests.rb CHANGED

@@ -1,6 +1,7 @@
 require 'faraday'
 require 'json'
 require 'monkeylearn/response'
+require 'monkeylearn/exceptions'
 module Monkeylearn
   module Requests
@@ -9,36 +10,98 @@ module Monkeylearn
         raise MonkeylearnError, 'Please initialize the Monkeylearn library with your API token'
       end
-      response = get_connection.send(method) do |req|
-        url = path.to_s
-        if query_params
-          url += '?' + URI.encode_www_form(query_params)
+      while true
+        response = get_connection.send(method) do |req|
+          url = path.to_s
+          if query_params
+            url += '?' + URI.encode_www_form(query_params)
+          end
+          req.url url
+          req.headers['Authorization'] = 'Token ' + Monkeylearn.token
+          req.headers['Content-Type'] = 'application/json'
+          req.headers['User-Agent'] = 'ruby-sdk'
+          if data
+            req.body = data.to_json
+          end
         end
-        req.url url
-        req.headers['Authorization'] = 'Token ' + Monkeylearn.token
-        req.headers['Content-Type'] = 'application/json'
-        req.headers['User-Agent'] = 'ruby-sdk'
-        if data
-          req.body = data.to_json
+        seconds = throttled?(response)
+        if seconds && Monkeylearn.retry_if_throttle
+          sleep seconds
+        else
+          break
         end
       end
-      if Monkeylearn.wait_on_throttle && seconds = throttled?(response)
-        # Request was throttled, wait 'seconds' seconds and retry
-        sleep seconds
-        response = request(method, path, data)
+      if response.status != 200
+        raise_for_status(response)
       end
       Monkeylearn::Response.new(response)
     end
+    def raise_for_status(raw_response)
+      body = JSON.parse(raw_response.body)
+      error_code = body.fetch("error_code", nil)
+      raise get_exception_class(raw_response.status, error_code).new(raw_response)
+    end
+    def get_exception_class(status_code, error_code)
+      case status_code
+      when 422
+        return RequestParamsError
+      when 401
+        return AuthenticationError
+      when 403
+        case error_code
+        when 'MODEL_LIMIT'
+          return ModelLimitError
+        else
+          return ForbiddenError
+        end
+      when 404
+        case error_code
+        when 'MODEL_NOT_FOUND'
+          return ModelNotFound
+        when 'TAG_NOT_FOUND'
+          return TagNotFound
+        else
+          return ResourceNotFound
+        end
+      when 429
+        case error_code
+        when 'PLAN_RATE_LIMIT'
+          return PlanRateLimitError
+        when 'CONCURRENCY_RATE_LIMIT'
+          return ConcurrencyRateLimitError
+        when 'PLAN_QUERY_LIMIT'
+          return PlanQueryLimitError
+        else
+          return RateLimitError
+        end
+      when 423
+        return ModuleStateError
+      else
+        return MonkeylearnResponseError
+      end
+    end
     def throttled?(response)
-      return false if response.status != 429
-      error_detail = JSON.parse(response.body)['detail']
-      match = /available in ([\d]+) seconds/.match(error_detail)
-       if match then match[1].to_i else false end
+      return false unless response.status == 429
+      body = JSON.parse(response.body)
+      case body['error_code']
+      when 'CONCURRENCY_RATE_LIMIT'
+        seconds = 2
+      when 'PLAN_RATE_LIMIT'
+        match = /([\d]+) seconds/.match(body['detail'])
+        seconds = if match then match[1].to_i else 60 end
+      end
+      seconds
     end
     def get_connection
-      @conn ||= Faraday.new(url: Monkeylearn.api_endpoint) do |faraday|
+      @conn ||= Faraday.new(url: Monkeylearn.base_url) do |faraday|
         faraday.adapter Faraday.default_adapter # Net::HTTP
       end
     end