datahen 0.14.22 → 0.14.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25eb02598ac32462a53995a4b9e72b3bc466b54c2d74be02516f8d04f178a7b8
4
- data.tar.gz: 5f6fcedfa7f4a477e18fc1a0ee80126b1a646a3ecefdd8258d1982bf7d7fe06f
3
+ metadata.gz: 54064cf5656d253f683540fa3704cdcae3991bc07a91ab5339bf9083eb0792f2
4
+ data.tar.gz: 3c82cedd06937454a9af5d91eea9b32f3ad744394763ca9fadbcdc6068eda683
5
5
  SHA512:
6
- metadata.gz: 69a71d740f9078a5a4c2a77211587c0099a4064cabda690cd6fb4803c153975e3e91f1c05f98278f2852a0bacf8cb444bba8f29f56c3cfbd0fba12cece39b9cd
7
- data.tar.gz: df131c11592d2b6192fa74d26fc0e8d823b99f8073b907c82c8e9f04622c7d28aa5e1145419ac0377a99f6efcd3f46ff8fcef88fc436e802d51afc014fd4383a
6
+ metadata.gz: fb17b046f9dbd15cf7a278f68f550e8b3b84d8c16a030d0ef4df100df28c8e4dce29fe74ffc70e02f79af26a2d1e3f66ccb6890e06a342a70fac09d824169431
7
+ data.tar.gz: 732256fc714635896f444bed50e2d6f49c2a03c4868fd23eb84e52701935bc2bfbef25811055983691c5e0ff39af53328dbeb67faf91f127c1d06af450d7d666
@@ -14,20 +14,19 @@ module Datahen
14
14
  def try_parse(scraper_name, parser_file, gid)
15
15
  begin
16
16
 
17
- if options[:job]
18
- job_id = options[:job]
19
- elsif options[:global]
20
- job_id = nil
21
- else
22
- job = Client::ScraperJob.new(options).find(scraper_name)
23
- job_id = job['id']
24
- end
25
-
17
+ if options[:job]
18
+ job_id = options[:job]
19
+ elsif options[:global]
20
+ job_id = nil
21
+ else
22
+ job = Client::ScraperJob.new(options).find(scraper_name)
23
+ job_id = job['id']
24
+ end
26
25
 
27
26
  vars = JSON.parse(options[:vars]) if options[:vars]
28
27
  puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, false, vars, options[:"keep-outputs"])
29
28
 
30
- rescue JSON::ParserError
29
+ rescue JSON::ParserError
31
30
  if options[:vars]
32
31
  puts "Error: #{options[:vars]} on vars is not a valid JSON"
33
32
  end
@@ -4,8 +4,8 @@ require 'httparty'
4
4
  module Datahen
5
5
  module Client
6
6
  class BackblazeContent
7
- include HTTParty
8
-
7
+ include HTTParty
8
+
9
9
  def get_content(url)
10
10
  self.class.get(url, format: :plain)
11
11
  end
@@ -19,19 +19,23 @@ module Datahen
19
19
  sio = StringIO.new(string)
20
20
  gz = Zlib::GzipReader.new(sio, encoding: Encoding::ASCII_8BIT)
21
21
  _content = ""
22
- begin
22
+ begin
23
23
  _content = gz.read
24
24
  rescue => e
25
25
  # if unexpected eof error, then readchar until error, and ignore it
26
26
  if e.to_s == 'unexpected end of file'
27
- begin
28
- while !gz.eof?
29
- _content += gz.readchar
30
- end
27
+ # heavily improve content read recovery by using "String#<<",
28
+ # reading all "good" lines and then concat the remaining chars
29
+ begin
30
+ gz.each_line{|line| _content << line}
31
31
  rescue => e
32
- puts "Ignored Zlib error: #{e.to_s}"
32
+ begin
33
+ _content << gz.readchar while !gz.eof
34
+ rescue => e
35
+ puts "Ignored Zlib error: #{e.to_s}"
36
+ end
33
37
  end
34
- else
38
+ else
35
39
  raise e
36
40
  end
37
41
  end
@@ -374,6 +374,11 @@ module Datahen
374
374
  def eval_with_context file_path, context
375
375
  eval(File.read(file_path), context, file_path)
376
376
  end
377
+
378
+ # Finish the executor execution
379
+ def finish
380
+ raise Error::SafeTerminateError
381
+ end
377
382
  end
378
383
  end
379
384
  end
@@ -40,6 +40,8 @@ module Datahen
40
40
  job_id: job_id
41
41
  })
42
42
  eval_with_context filename, context
43
+ rescue Error::SafeTerminateError => e
44
+ # do nothing, this is fine
43
45
  rescue SyntaxError => e
44
46
  handle_error(e) if save
45
47
  raise e
@@ -55,7 +57,7 @@ module Datahen
55
57
  handle_error(e) if save
56
58
  raise e
57
59
  end
58
-
60
+
59
61
  update_finisher_done_status
60
62
  end
61
63
  proc.call
@@ -44,6 +44,8 @@ module Datahen
44
44
  pages: pages
45
45
  })
46
46
  eval_with_context filename, context
47
+ rescue Error::SafeTerminateError => e
48
+ # do nothing, this is fine
47
49
  rescue SyntaxError => e
48
50
  handle_error(e) if save
49
51
  raise e
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.14.22"
2
+ VERSION = "0.14.24"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.22
4
+ version: 0.14.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-12-14 00:00:00.000000000 Z
11
+ date: 2021-03-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor