datahen 0.14.22 → 0.14.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25eb02598ac32462a53995a4b9e72b3bc466b54c2d74be02516f8d04f178a7b8
4
- data.tar.gz: 5f6fcedfa7f4a477e18fc1a0ee80126b1a646a3ecefdd8258d1982bf7d7fe06f
3
+ metadata.gz: 54064cf5656d253f683540fa3704cdcae3991bc07a91ab5339bf9083eb0792f2
4
+ data.tar.gz: 3c82cedd06937454a9af5d91eea9b32f3ad744394763ca9fadbcdc6068eda683
5
5
  SHA512:
6
- metadata.gz: 69a71d740f9078a5a4c2a77211587c0099a4064cabda690cd6fb4803c153975e3e91f1c05f98278f2852a0bacf8cb444bba8f29f56c3cfbd0fba12cece39b9cd
7
- data.tar.gz: df131c11592d2b6192fa74d26fc0e8d823b99f8073b907c82c8e9f04622c7d28aa5e1145419ac0377a99f6efcd3f46ff8fcef88fc436e802d51afc014fd4383a
6
+ metadata.gz: fb17b046f9dbd15cf7a278f68f550e8b3b84d8c16a030d0ef4df100df28c8e4dce29fe74ffc70e02f79af26a2d1e3f66ccb6890e06a342a70fac09d824169431
7
+ data.tar.gz: 732256fc714635896f444bed50e2d6f49c2a03c4868fd23eb84e52701935bc2bfbef25811055983691c5e0ff39af53328dbeb67faf91f127c1d06af450d7d666
@@ -14,20 +14,19 @@ module Datahen
14
14
  def try_parse(scraper_name, parser_file, gid)
15
15
  begin
16
16
 
17
- if options[:job]
18
- job_id = options[:job]
19
- elsif options[:global]
20
- job_id = nil
21
- else
22
- job = Client::ScraperJob.new(options).find(scraper_name)
23
- job_id = job['id']
24
- end
25
-
17
+ if options[:job]
18
+ job_id = options[:job]
19
+ elsif options[:global]
20
+ job_id = nil
21
+ else
22
+ job = Client::ScraperJob.new(options).find(scraper_name)
23
+ job_id = job['id']
24
+ end
26
25
 
27
26
  vars = JSON.parse(options[:vars]) if options[:vars]
28
27
  puts Datahen::Scraper::Parser.exec_parser_page(parser_file, gid, job_id, false, vars, options[:"keep-outputs"])
29
28
 
30
- rescue JSON::ParserError
29
+ rescue JSON::ParserError
31
30
  if options[:vars]
32
31
  puts "Error: #{options[:vars]} on vars is not a valid JSON"
33
32
  end
@@ -4,8 +4,8 @@ require 'httparty'
4
4
  module Datahen
5
5
  module Client
6
6
  class BackblazeContent
7
- include HTTParty
8
-
7
+ include HTTParty
8
+
9
9
  def get_content(url)
10
10
  self.class.get(url, format: :plain)
11
11
  end
@@ -19,19 +19,23 @@ module Datahen
19
19
  sio = StringIO.new(string)
20
20
  gz = Zlib::GzipReader.new(sio, encoding: Encoding::ASCII_8BIT)
21
21
  _content = ""
22
- begin
22
+ begin
23
23
  _content = gz.read
24
24
  rescue => e
25
25
  # if unexpected eof error, then readchar until error, and ignore it
26
26
  if e.to_s == 'unexpected end of file'
27
- begin
28
- while !gz.eof?
29
- _content += gz.readchar
30
- end
27
+ # heavily improve content read recovery by using "String#<<",
28
+ # reading all "good" lines and then concat the remaining chars
29
+ begin
30
+ gz.each_line{|line| _content << line}
31
31
  rescue => e
32
- puts "Ignored Zlib error: #{e.to_s}"
32
+ begin
33
+ _content << gz.readchar while !gz.eof
34
+ rescue => e
35
+ puts "Ignored Zlib error: #{e.to_s}"
36
+ end
33
37
  end
34
- else
38
+ else
35
39
  raise e
36
40
  end
37
41
  end
@@ -374,6 +374,11 @@ module Datahen
374
374
  def eval_with_context file_path, context
375
375
  eval(File.read(file_path), context, file_path)
376
376
  end
377
+
378
+ # Finish the executor execution
379
+ def finish
380
+ raise Error::SafeTerminateError
381
+ end
377
382
  end
378
383
  end
379
384
  end
@@ -40,6 +40,8 @@ module Datahen
40
40
  job_id: job_id
41
41
  })
42
42
  eval_with_context filename, context
43
+ rescue Error::SafeTerminateError => e
44
+ # do nothing, this is fine
43
45
  rescue SyntaxError => e
44
46
  handle_error(e) if save
45
47
  raise e
@@ -55,7 +57,7 @@ module Datahen
55
57
  handle_error(e) if save
56
58
  raise e
57
59
  end
58
-
60
+
59
61
  update_finisher_done_status
60
62
  end
61
63
  proc.call
@@ -44,6 +44,8 @@ module Datahen
44
44
  pages: pages
45
45
  })
46
46
  eval_with_context filename, context
47
+ rescue Error::SafeTerminateError => e
48
+ # do nothing, this is fine
47
49
  rescue SyntaxError => e
48
50
  handle_error(e) if save
49
51
  raise e
@@ -1,3 +1,3 @@
1
1
  module Datahen
2
- VERSION = "0.14.22"
2
+ VERSION = "0.14.24"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datahen
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.22
4
+ version: 0.14.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Parama Danoesubroto
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-12-14 00:00:00.000000000 Z
11
+ date: 2021-03-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor