answersengine 0.2.33 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: af3d2a9258208b8fa48b641da2c5965551390519
4
- data.tar.gz: 0ed05cf695e1237c00f55c4914fdf625d8564a59
2
+ SHA256:
3
+ metadata.gz: b4c9499d52facc546f3f78c357de5b3a2b625e0668ef77bbaa3053e2f8940861
4
+ data.tar.gz: ce03196a46bcb2ca53e753de266fa6129c320db8e645e6fd753d833bcdf3a12d
5
5
  SHA512:
6
- metadata.gz: cd4096df084c799fdb90aeb832918e34e15f1d5e7374b23af7fcbbde443fb1f10087ef5ce0dce41a512d39d516f2fbb2675fc9947a5cfc44be7313218af66119
7
- data.tar.gz: a83793d9eceb6ff07bce795f0e35941cd24af87658a32ecdee5e79795d72139ac5cbc506b7d653ebf48f0debd243c3974a3856a06545b61ff9eca3c9b7c134e6
6
+ metadata.gz: dcd0280c93f52604723c9df3ace160654c8d4bed3e9568d8136046908e8be40b2078b9fe343bb21b58a28966b7954102231c53b53c83c5b79542302ac57e83eb
7
+ data.tar.gz: 47a80c92802ce98c2c86f9c3c099efd532fed95fd9f640f2c985f26b0a1596f6361eb67c4575eef342904d28a76296bd27bc6aacfeb794a01d7e79923297124c
@@ -0,0 +1,50 @@
1
+ module AnswersEngine
2
+ module Client
3
+ class AuthToken < AnswersEngine::Client::Base
4
+
5
+ def find(token)
6
+ self.class.get("/auth_tokens/#{token}", @options)
7
+ end
8
+
9
+ def all(opts={})
10
+ self.class.get("/auth_tokens", @options)
11
+ end
12
+
13
+ def create(role, description, opts={})
14
+ body = {
15
+ role: role,
16
+ description: description}
17
+
18
+ @options.merge!({body: body.to_json})
19
+ self.class.post("/auth_tokens", @options)
20
+ end
21
+
22
+ def create_on_account(account_id, role, description)
23
+ body = {
24
+ role: role,
25
+ description: description}
26
+
27
+ @options.merge!({body: body.to_json})
28
+ self.class.post("/accounts/#{account_id}/auth_tokens", @options)
29
+ end
30
+
31
+ def update(token, role, description="", opts={})
32
+ body = {}
33
+
34
+ body[:role] = role
35
+ body[:description] = description if description.present?
36
+ @options.merge!({body: body.to_json})
37
+
38
+ self.class.put("/auth_tokens/#{token}", @options)
39
+ end
40
+
41
+ def delete(token, opts={})
42
+ body = {}
43
+ @options.merge!({body: body.to_json})
44
+
45
+ self.class.delete("/auth_tokens/#{token}", @options)
46
+ end
47
+ end
48
+ end
49
+ end
50
+
@@ -4,12 +4,15 @@ module AnswersEngine
4
4
  module Client
5
5
  class Base
6
6
  include HTTParty
7
- base_uri(ENV['ANSWERSENGINE_API_URL'].nil? ? 'https://fetch.answersengine.com/api/v1' : ENV['ANSWERSENGINE_API_URL'])
8
7
 
9
8
  def self.env_auth_token
10
9
  ENV['ANSWERSENGINE_TOKEN']
11
10
  end
12
11
 
12
+ def env_api_url
13
+ ENV['ANSWERSENGINE_API_URL'].nil? ? 'https://fetch.answersengine.com/api/v1' : ENV['ANSWERSENGINE_API_URL']
14
+ end
15
+
13
16
  def auth_token
14
17
  @auth_token ||= self.class.env_auth_token
15
18
  end
@@ -19,6 +22,7 @@ module AnswersEngine
19
22
  end
20
23
 
21
24
  def initialize(opts={})
25
+ self.class.base_uri(env_api_url)
22
26
  self.auth_token = opts[:auth_token] unless opts[:auth_token].nil?
23
27
  @options = { headers: {
24
28
  "Authorization" => "Bearer #{auth_token}",
@@ -1,4 +1,5 @@
1
1
  require "answersengine/client/base"
2
+ require "answersengine/client/auth_token"
2
3
  require 'answersengine/client/export'
3
4
  require "answersengine/client/scraper"
4
5
  require "answersengine/client/scraper_deployment"
@@ -191,29 +191,88 @@ module AnswersEngine
191
191
  result.respond_to?(:first) ? result.first : nil
192
192
  end
193
193
 
194
+ # Remove dups by prioritizing the latest dup.
195
+ #
196
+ # @param [Array] list List of hashes to dedup.
197
+ # @param [Hash] key_defaults Key and default value pair hash to use on
198
+ # uniq validation.
199
+ #
200
+ # @return [Integer] Removed duplicated items count.
201
+ def remove_old_dups!(list, key_defaults)
202
+ raw_count = list.count
203
+ keys = key_defaults.keys
204
+ force_uniq = 0
205
+ list.reverse!.uniq! do |item|
206
+ # Extract stringify keys as hash
207
+ key_hash = Hash[item.map{|k,v|keys.include?(k.to_s) ? [k.to_s,v] : nil}.select{|i|!i.nil?}]
208
+
209
+ # Apply defaults for uniq validation
210
+ key_defaults.each{|k,v| key_hash[k] = v if key_hash[k].nil?}
211
+
212
+ # Don't dedup nil key defaults
213
+ skip_dedup = !keys.find{|k| key_hash[k].nil?}.nil?
214
+ skip_dedup ? (force_uniq += 1) : key_hash
215
+ end
216
+ list.reverse!
217
+ dup_count = raw_count - list.count
218
+ dup_count
219
+ end
220
+
221
+ # Remove page dups by prioritizing the latest dup.
222
+ #
223
+ # @param [Array] list List of pages to dedup.
224
+ #
225
+ # @return [Integer] Removed duplicated items count.
226
+ #
227
+ # @note It will not dedup for now as it is hard to build gid.
228
+ # TODO: Build gid so we can dedup
229
+ def remove_old_page_dups!(list)
230
+ key_defaults = {
231
+ 'gid' => nil
232
+ }
233
+ remove_old_dups! list, key_defaults
234
+ end
235
+
236
+ # Remove dups by prioritizing the latest dup.
237
+ #
238
+ # @param [Array] list List of outputs to dedup.
239
+ #
240
+ # @return [Integer] Removed duplicated items count.
241
+ def remove_old_output_dups!(list)
242
+ key_defaults = {
243
+ '_id' => nil,
244
+ '_collection' => 'default'
245
+ }
246
+ remove_old_dups! list, key_defaults
247
+ end
248
+
194
249
  def save_pages_and_outputs(pages = [], outputs = [], status)
195
250
  total_pages = pages.count
196
251
  total_outputs = outputs.count
197
252
  records_per_slice = 100
198
253
  until pages.empty? && outputs.empty?
199
254
  pages_slice = pages.shift(records_per_slice)
255
+ pages_dup_count = remove_old_page_dups! pages_slice
200
256
  outputs_slice = outputs.shift(records_per_slice)
257
+ outputs_dup_count = remove_old_output_dups! outputs_slice
201
258
 
202
259
  log_msgs = []
203
260
  unless pages_slice.empty?
204
- log_msgs << "#{pages_slice.count} out of #{total_pages} Pages"
261
+ page_dups_ignored = pages_dup_count > 0 ? " (#{pages_dup_count} dups ignored)" : ''
262
+ log_msgs << "#{pages_slice.count} out of #{total_pages} Pages#{page_dups_ignored}"
205
263
  unless save
206
264
  puts '----------------------------------------'
207
- puts "Would have saved #{log_msgs.last}"
265
+ puts "Would have saved #{log_msgs.last}#{page_dups_ignored}"
208
266
  puts JSON.pretty_generate pages_slice
209
267
  end
210
268
  end
211
269
 
212
270
  unless outputs_slice.empty?
213
- log_msgs << "#{outputs_slice.count} out of #{total_outputs} Outputs"
271
+ output_dups_ignored = outputs_dup_count > 0 ? " (#{outputs_dup_count} dups ignored)" : ''
272
+ log_msgs << "#{outputs_slice.count} out of #{total_outputs} Outputs#{output_dups_ignored}"
214
273
  unless save
215
274
  puts '----------------------------------------'
216
- puts "Would have saved #{log_msgs.last}"
275
+ puts "Would have saved #{log_msgs.last}#{output_dups_ignored}"
217
276
  puts JSON.pretty_generate outputs_slice
218
277
  end
219
278
  end
@@ -279,7 +338,7 @@ module AnswersEngine
279
338
 
280
339
  # Eval a filename with a custom binding
281
340
  #
282
- # @param [String] filename File path to read.
341
+ # @param [String] file_path File path to read.
283
342
  # @param [Binding] context Context binding to evaluate with.
284
343
  #
285
344
  # @note Using this method will allow scripts to contain `return` to
@@ -1,3 +1,3 @@
1
1
  module AnswersEngine
2
- VERSION = "0.2.33"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: answersengine
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.33
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-11 00:00:00.000000000 Z
11
+ date: 2019-03-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -201,6 +201,7 @@ files:
201
201
  - lib/answersengine/cli/scraper_page.rb
202
202
  - lib/answersengine/cli/seeder.rb
203
203
  - lib/answersengine/client.rb
204
+ - lib/answersengine/client/auth_token.rb
204
205
  - lib/answersengine/client/backblaze_content.rb
205
206
  - lib/answersengine/client/base.rb
206
207
  - lib/answersengine/client/export.rb
@@ -248,7 +249,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
248
249
  version: '0'
249
250
  requirements: []
250
251
  rubyforge_project:
251
- rubygems_version: 2.6.14.1
252
+ rubygems_version: 2.7.6
252
253
  signing_key:
253
254
  specification_version: 4
254
255
  summary: AnswersEngine toolbelt for developers