RubyGems - answersengine - Versions diffs - 0.2.33 - Mend

answersengine 0.2.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

checksums.yaml +7 -0
data/.gitignore +12 -0
data/.travis.yml +7 -0
data/CODE_OF_CONDUCT.md +74 -0
data/Gemfile +6 -0
data/LICENSE.txt +21 -0
data/README.md +30 -0
data/Rakefile +22 -0
data/answersengine.gemspec +45 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/examples/fetchtest/libraries/hello.rb +9 -0
data/examples/fetchtest/libraries/hello_fail.rb +10 -0
data/examples/fetchtest/parsers/failed.rb +2 -0
data/examples/fetchtest/parsers/find_outputs.rb +18 -0
data/examples/fetchtest/parsers/home.rb +50 -0
data/examples/fetchtest/parsers/nested_fail.rb +3 -0
data/examples/fetchtest/parsers/simple.rb +14 -0
data/examples/fetchtest/seeders/csv_seeder.rb +12 -0
data/examples/fetchtest/seeders/failed.rb +1 -0
data/examples/fetchtest/seeders/list_of_urls.csv +5 -0
data/examples/fetchtest/seeders/seed.rb +28 -0
data/examples/fetchtest/seeders/test_reset_page.rb +4 -0
data/exe/answersengine +3 -0
data/lib/answersengine.rb +5 -0
data/lib/answersengine/cli.rb +33 -0
data/lib/answersengine/cli/global_page.rb +39 -0
data/lib/answersengine/cli/job.rb +30 -0
data/lib/answersengine/cli/job_output.rb +69 -0
data/lib/answersengine/cli/parser.rb +64 -0
data/lib/answersengine/cli/scraper.rb +172 -0
data/lib/answersengine/cli/scraper_deployment.rb +24 -0
data/lib/answersengine/cli/scraper_export.rb +51 -0
data/lib/answersengine/cli/scraper_exporter.rb +40 -0
data/lib/answersengine/cli/scraper_job.rb +71 -0
data/lib/answersengine/cli/scraper_page.rb +200 -0
data/lib/answersengine/cli/seeder.rb +40 -0
data/lib/answersengine/client.rb +23 -0
data/lib/answersengine/client/backblaze_content.rb +45 -0
data/lib/answersengine/client/base.rb +50 -0
data/lib/answersengine/client/export.rb +10 -0
data/lib/answersengine/client/global_page.rb +18 -0
data/lib/answersengine/client/job.rb +53 -0
data/lib/answersengine/client/job_export.rb +10 -0
data/lib/answersengine/client/job_log.rb +27 -0
data/lib/answersengine/client/job_output.rb +19 -0
data/lib/answersengine/client/job_page.rb +62 -0
data/lib/answersengine/client/job_stat.rb +16 -0
data/lib/answersengine/client/scraper.rb +54 -0
data/lib/answersengine/client/scraper_deployment.rb +17 -0
data/lib/answersengine/client/scraper_export.rb +22 -0
data/lib/answersengine/client/scraper_exporter.rb +14 -0
data/lib/answersengine/client/scraper_job.rb +49 -0
data/lib/answersengine/client/scraper_job_output.rb +19 -0
data/lib/answersengine/client/scraper_job_page.rb +55 -0
data/lib/answersengine/plugin.rb +6 -0
data/lib/answersengine/plugin/context_exposer.rb +55 -0
data/lib/answersengine/scraper.rb +16 -0
data/lib/answersengine/scraper/executor.rb +292 -0
data/lib/answersengine/scraper/parser.rb +18 -0
data/lib/answersengine/scraper/ruby_parser_executor.rb +141 -0
data/lib/answersengine/scraper/ruby_seeder_executor.rb +114 -0
data/lib/answersengine/scraper/seeder.rb +18 -0
data/lib/answersengine/version.rb +3 -0
metadata +255 -0

data/lib/answersengine/client/base.rb ADDED Viewed

@@ -0,0 +1,50 @@
+require 'httparty'
+module AnswersEngine
+  module Client
+    class Base
+      include HTTParty
+      base_uri(ENV['ANSWERSENGINE_API_URL'].nil? ? 'https://fetch.answersengine.com/api/v1' : ENV['ANSWERSENGINE_API_URL'])
+      def self.env_auth_token
+        ENV['ANSWERSENGINE_TOKEN']
+      end
+      def auth_token
+        @auth_token ||= self.class.env_auth_token
+      end
+      def auth_token= value
+        @auth_token = value
+      end
+      def initialize(opts={})
+        self.auth_token = opts[:auth_token] unless opts[:auth_token].nil?
+        @options = { headers: {
+          "Authorization" => "Bearer #{auth_token}",
+          "Content-Type" => "application/json",
+          }}
+        query = {}
+        query[:p] = opts[:page] if opts[:page]
+        query[:pp] = opts[:per_page] if opts[:per_page]
+        query[:fetchfail] = opts[:fetch_fail] if opts[:fetch_fail]
+        query[:parsefail] = opts[:parse_fail] if opts[:parse_fail]
+        query[:page_type] = opts[:page_type] if opts[:page_type]
+        query[:gid] = opts[:gid] if opts[:gid]
+        if opts[:query]
+          if opts[:query].is_a?(Hash)
+            query[:q] = opts[:query].to_json
+          elsif opts[:query].is_a?(String)
+            query[:q] = JSON.parse(opts[:query]).to_json
+          end
+        end
+        unless query.empty?
+          @options.merge!(query: query)
+        end
+      end
+    end
+  end
+end

data/lib/answersengine/client/export.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module AnswersEngine
+  module Client
+    class Export < AnswersEngine::Client::Base
+      def all(opts={})
+        self.class.get("/scrapers/exports", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/global_page.rb ADDED Viewed

@@ -0,0 +1,18 @@
+module AnswersEngine
+  module Client
+    class GlobalPage < AnswersEngine::Client::Base
+      def find(gid)
+        self.class.get("/global_pages/#{gid}", @options)
+      end
+      def find_content(gid)
+        self.class.get("/global_pages/#{gid}/content", @options)
+      end
+      def find_failed_content(gid)
+        self.class.get("/global_pages/#{gid}/failed_content", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/job.rb ADDED Viewed

@@ -0,0 +1,53 @@
+module AnswersEngine
+  module Client
+    class Job < AnswersEngine::Client::Base
+      def all(opts={})
+        self.class.get("/jobs", @options)
+      end
+      def find(job_id)
+        self.class.get("/jobs/#{job_id}", @options)
+      end
+      def update(job_id, opts={})
+        body = {}
+        body[:status] = opts[:status] if opts[:status]
+        body[:standard_worker_count] = opts[:workers] if opts[:workers]
+        body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
+        @options.merge!({body: body.to_json})
+        self.class.put("/jobs/#{job_id}", @options)
+      end
+      def cancel(job_id, opts={})
+        opts[:status] = 'cancelled'
+        update(job_id, opts)
+      end
+      def resume(job_id, opts={})
+        opts[:status] = 'active'
+        update(job_id, opts)
+      end
+      def pause(job_id, opts={})
+        opts[:status] = 'paused'
+        update(job_id, opts)
+      end
+      def seeding_update(job_id, opts={})
+        body = {}
+        body[:outputs] = opts.fetch(:outputs) {[]}
+        body[:pages] = opts.fetch(:pages) {[]}
+        body[:seeding_status] = opts.fetch(:seeding_status){ nil }
+        body[:log_error] = opts[:log_error] if opts[:log_error]
+        @options.merge!({body: body.to_json})
+        self.class.put("/jobs/#{job_id}/seeding_update", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/job_export.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module AnswersEngine
+  module Client
+    class JobExport < AnswersEngine::Client::Base
+      def create(job_id, exporter_name)
+        self.class.post("/jobs/#{job_id}/exports/#{exporter_name}", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/job_log.rb ADDED Viewed

@@ -0,0 +1,27 @@
+module AnswersEngine
+  module Client
+    class JobLog < AnswersEngine::Client::Base
+      def all_job_page_log(job_id, gid, opts={})
+        @options.merge!(opts)
+        self.class.get("/jobs/#{job_id}/pages/#{gid}/log", @options)
+      end
+      def scraper_all_job_page_log(scraper_name, gid, opts={})
+        @options.merge!(opts)
+        self.class.get("/scrapers/#{scraper_name}/current_job/pages/#{gid}/log", @options)
+      end
+      def all_job_log(job_id, opts={})
+        @options.merge!(opts)
+        self.class.get("/jobs/#{job_id}/log", @options)
+      end
+      def scraper_all_job_log(scraper_name, opts={})
+        @options.merge!(opts)
+        self.class.get("/scrapers/#{scraper_name}/current_job/log", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/job_output.rb ADDED Viewed

@@ -0,0 +1,19 @@
+module AnswersEngine
+  module Client
+    class JobOutput < AnswersEngine::Client::Base
+      def find(job_id, collection, id)
+        self.class.get("/jobs/#{job_id}/output/collections/#{collection}/records/#{id}", @options)
+      end
+      def all(job_id, collection = 'default')
+        self.class.get("/jobs/#{job_id}/output/collections/#{collection}/records", @options)
+      end
+      def collections(job_id)
+        self.class.get("/jobs/#{job_id}/output/collections", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/job_page.rb ADDED Viewed

@@ -0,0 +1,62 @@
+module AnswersEngine
+  module Client
+    class JobPage < AnswersEngine::Client::Base
+      def find(job_id, gid)
+        self.class.get("/jobs/#{job_id}/pages/#{gid}", @options)
+      end
+      def all(job_id, opts={})
+        self.class.get("/jobs/#{job_id}/pages", @options)
+      end
+      def update(job_id, gid, opts={})
+        body = {}
+        body[:page_type] = opts[:page_type] if opts[:page_type]
+        body[:priority] = opts[:priority] if opts[:priority]
+        body[:vars] = opts[:vars] if opts[:vars]
+        @options.merge!({body: body.to_json})
+        self.class.put("/jobs/#{job_id}/pages/#{gid}", @options)
+      end
+      def reset(job_id, gid, opts={})
+        self.class.put("/jobs/#{job_id}/pages/#{gid}/reset", @options)
+      end
+      def enqueue(job_id, method, url, opts={})
+        body = {}
+        body[:method] =  method != "" ? method : "GET"
+        body[:url] =  url
+        body[:page_type] = opts[:page_type] if opts[:page_type]
+        body[:priority] = opts[:priority] if opts[:priority]
+        body[:fetch_type] = opts[:fetch_type] if opts[:fetch_type]
+        body[:body] = opts[:body] if opts[:body]
+        body[:headers] = opts[:headers] if opts[:headers]
+        body[:vars] = opts[:vars] if opts[:vars]
+        body[:force_fetch] = opts[:force_fetch] if opts[:force_fetch]
+        body[:freshness] = opts[:freshness] if opts[:freshness]
+        body[:ua_type] = opts[:ua_type] if opts[:ua_type]
+        body[:no_redirect] = opts[:no_redirect] if opts[:no_redirect]
+        body[:cookie] = opts[:cookie] if opts[:cookie]
+        @options.merge!({body: body.to_json})
+        self.class.post("/jobs/#{job_id}/pages", @options)
+      end
+      def parsing_update(job_id, gid, opts={})
+        body = {}
+        body[:outputs] = opts.fetch(:outputs) {[]}
+        body[:pages] = opts.fetch(:pages) {[]}
+        body[:parsing_status] = opts.fetch(:parsing_status){ nil }
+        body[:log_error] = opts[:log_error] if opts[:log_error]
+        @options.merge!({body: body.to_json})
+        self.class.put("/jobs/#{job_id}/pages/#{gid}/parsing_update", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/job_stat.rb ADDED Viewed

@@ -0,0 +1,16 @@
+module AnswersEngine
+  module Client
+    class JobStat < AnswersEngine::Client::Base
+      def job_current_stats(job_id)
+        self.class.get("/jobs/#{job_id}/stats/current", @options)
+      end
+      def scraper_job_current_stats(scraper_name)
+        self.class.get("/scrapers/#{scraper_name}/current_job/stats/current", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/scraper.rb ADDED Viewed

@@ -0,0 +1,54 @@
+module AnswersEngine
+  module Client
+    class Scraper < AnswersEngine::Client::Base
+      def find(scraper_name)
+        self.class.get("/scrapers/#{scraper_name}", @options)
+      end
+      def all(opts={})
+        self.class.get("/scrapers", @options)
+      end
+      def create(scraper_name, git_repository, opts={})
+        body = {
+            name: scraper_name,
+            git_repository: git_repository,
+            git_branch: opts[:branch] ? opts[:branch] : "master"}
+        body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
+        body[:force_fetch] = opts[:force_fetch] if opts[:force_fetch]
+        body[:standard_worker_count] = opts[:workers] if opts[:workers]
+        body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
+        body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
+        body[:disable_scheduler] = opts[:disable_scheduler] if opts[:disable_scheduler]
+        body[:cancel_current_job] = opts[:cancel_current_job] if opts[:cancel_current_job]
+        body[:schedule] = opts[:schedule] if opts[:schedule]
+        body[:timezone] = opts[:timezone] if opts[:timezone]
+        @options.merge!({body: body.to_json})
+        self.class.post("/scrapers", @options)
+      end
+      def update(scraper_name, opts={})
+        body = {}
+        body[:name] = opts[:name] if opts[:name]
+        body[:git_repository] = opts[:repo] if opts[:repo]
+        body[:git_branch] = opts[:branch] if opts[:branch]
+        body[:freshness_type] = opts[:freshness_type] if opts[:freshness_type]
+        body[:force_fetch] = opts[:force_fetch] if opts.has_key?("force_fetch")
+        body[:standard_worker_count] = opts[:workers] if opts[:workers]
+        body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
+        body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
+        body[:disable_scheduler] = opts[:disable_scheduler] if opts.has_key?("disable_scheduler")
+        body[:cancel_current_job] = opts[:cancel_current_job] if opts.has_key?("cancel_current_job")
+        body[:schedule] = opts[:schedule] if opts[:schedule]
+        body[:timezone] = opts[:timezone] if opts[:timezone]
+        @options.merge!({body: body.to_json})
+        self.class.put("/scrapers/#{scraper_name}", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/scraper_deployment.rb ADDED Viewed

@@ -0,0 +1,17 @@
+module AnswersEngine
+  module Client
+    class ScraperDeployment < AnswersEngine::Client::Base
+      def all(scraper_name, opts={})
+        self.class.get("/scrapers/#{scraper_name}/deployments", @options)
+      end
+      def deploy(scraper_name, opts={})
+        self.class.post("/scrapers/#{scraper_name}/deployments", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/scraper_export.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module AnswersEngine
+  module Client
+    class ScraperExport < AnswersEngine::Client::Base
+      def all(scraper_name, opts={})
+        self.class.get("/scrapers/#{scraper_name}/exports", @options)
+      end
+      def find(export_id)
+        self.class.get("/scrapers/exports/#{export_id}", @options)
+      end
+      def create(scraper_name, exporter_name)
+        self.class.post("/scrapers/#{scraper_name}/exports/#{exporter_name}", @options)
+      end
+      def download(export_id)
+        self.class.get("/scrapers/exports/#{export_id}/download", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/scraper_exporter.rb ADDED Viewed

@@ -0,0 +1,14 @@
+module AnswersEngine
+  module Client
+    class ScraperExporter < AnswersEngine::Client::Base
+      def all(scraper_name, opts={})
+        self.class.get("/scrapers/#{scraper_name}/exporters", @options)
+      end
+      def find(scraper_name, exporter_name)
+        self.class.get("/scrapers/#{scraper_name}/exporters/#{exporter_name}", @options)
+      end
+    end
+  end
+end

data/lib/answersengine/client/scraper_job.rb ADDED Viewed

@@ -0,0 +1,49 @@
+module AnswersEngine
+  module Client
+    class ScraperJob < AnswersEngine::Client::Base
+      def all(scraper_name, opts={})
+        self.class.get("/scrapers/#{scraper_name}/jobs", @options)
+      end
+      def create(scraper_name, opts={})
+        body = {}
+        body[:standard_worker_count] = opts[:workers] if opts[:workers]
+        body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
+        body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
+        @options.merge!({body: body.to_json})
+        self.class.post("/scrapers/#{scraper_name}/jobs", @options)
+      end
+      def find(scraper_name)
+        self.class.get("/scrapers/#{scraper_name}/current_job", @options)
+      end
+      def update(scraper_name, opts={})
+        body = {}
+        body[:status] = opts[:status] if opts[:status]
+        body[:standard_worker_count] = opts[:workers] if opts[:workers]
+        body[:browser_worker_count] = opts[:browsers] if opts[:browsers]
+        body[:proxy_type] = opts[:proxy_type] if opts[:proxy_type]
+        @options.merge!({body: body.to_json})
+        self.class.put("/scrapers/#{scraper_name}/current_job", @options)
+      end
+      def cancel(scraper_name, opts={})
+        opts[:status] = 'cancelled'
+        update(scraper_name, opts)
+      end
+      def resume(scraper_name, opts={})
+        opts[:status] = 'active'
+        update(scraper_name, opts)
+      end
+      def pause(scraper_name, opts={})
+        opts[:status] = 'paused'
+        update(scraper_name, opts)
+      end
+    end
+  end
+end