chupa-text 1.2.6 → 1.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0555d47f2e5bc1f80ed47485190a9db10a7a3996a4f8e43012dff65cc19e4069
4
- data.tar.gz: acc1ea064a381f34ded5e9253b93ddefd33308d5b12184f3f2bb018dc98f985a
3
+ metadata.gz: 415bf5c173d68ce1e887dc97e014823cdc9f747bb3d191a714b74e87ee4a6fc7
4
+ data.tar.gz: 41da0888b006b66e3134bb77b3478a9c5b8fb99958ac29ecc08d5a8c4d79829a
5
5
  SHA512:
6
- metadata.gz: 9c3ea10422cd7cf588f07ee97819499c6c33fcbb9bf235b2d49ac3fe69aa29feaae42e94a9fddde48532293f503d8d4f4bb1d75a897d7045e1910d8cea261cac
7
- data.tar.gz: e8d8e3e1acfcc6a9a1b4329118652590d3a354cbce043aad821960dcd169a049e9917ae9e4bf13ab3c14e7da2df613cb16f627d138d4c372cc2fbf301ddb1f4d
6
+ metadata.gz: 63097bace8113b4ed2d3634cc09e9d670d08918109f8fe27cdf26f4c4b05a2ec7d28e200b6ed6ec5417d43d9fbbb908a3eb807591fd36d6bc232ec90961c0253
7
+ data.tar.gz: 59644ec76de0529616ffe7e3f31e7b3c5792ee31e62a7a0ece851a77b2e8d8add52aafbefa0401005201e8780608e349a00cd43de08f04cb7bf3a4eeea115cea
@@ -1,5 +1,11 @@
1
1
  # News
2
2
 
3
+ ## 1.2.7: 2019-06-13
4
+
5
+ ### Improvements
6
+
7
+ * Added support for timeout.
8
+
3
9
  ## 1.2.6: 2019-06-10
4
10
 
5
11
  ### Improvements
@@ -53,4 +53,13 @@ module ChupaText
53
53
  super("Unknown encoding data: <#{data.uri}>(#{data.mime_type}): <#{encoding}>")
54
54
  end
55
55
  end
56
+
57
+ class TimeoutError < Error
58
+ attr_reader :data, :timeout
59
+ def initialize(data, timeout)
60
+ @data = data
61
+ @timeout = timeout
62
+ super("Timeout error: <#{data.uri}>(#{data.mime_type}): <#{timeout}>")
63
+ end
64
+ end
56
65
  end
@@ -16,6 +16,7 @@
16
16
 
17
17
  require "pathname"
18
18
  require "uri"
19
+ require "timeout"
19
20
 
20
21
  module ChupaText
21
22
  class Extractor
@@ -103,19 +104,37 @@ module ChupaText
103
104
  end
104
105
  else
105
106
  debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"}
106
- decomposer.decompose(target) do |decomposed|
107
- debug do
108
- "#{log_tag}[extract][decomposed] " +
109
- "#{decomposer.class}: " +
110
- "<#{target.uri}>: " +
111
- "<#{target.mime_type}> -> <#{decomposed.mime_type}>"
107
+ with_timeout(target) do
108
+ decomposer.decompose(target) do |decomposed|
109
+ begin
110
+ debug do
111
+ "#{log_tag}[extract][decomposed] " +
112
+ "#{decomposer.class}: " +
113
+ "<#{target.uri}>: " +
114
+ "<#{target.mime_type}> -> <#{decomposed.mime_type}>"
115
+ end
116
+ extract_recursive(decomposed, &block)
117
+ ensure
118
+ decomposed.release
119
+ end
112
120
  end
113
- extract_recursive(decomposed, &block)
114
- decomposed.release
115
121
  end
116
122
  end
117
123
  end
118
124
 
125
+ def with_timeout(data, &block)
126
+ timeout = data.timeout
127
+ if timeout
128
+ begin
129
+ Timeout.timeout(timeout, &block)
130
+ rescue Timeout::Error
131
+ raise TimeoutError.new(data, timeout)
132
+ end
133
+ else
134
+ yield
135
+ end
136
+ end
137
+
119
138
  def log_tag
120
139
  "[extractor]"
121
140
  end
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.2.6"
18
+ VERSION = "1.2.7"
19
19
  end
@@ -76,6 +76,7 @@ class TestExtractor < Test::Unit::TestCase
76
76
  extracted = ChupaText::Data.new
77
77
  extracted.mime_type = "text/plain"
78
78
  extracted.body = data.body.gsub(/<.+?>/, "")
79
+ sleep(data.timeout * 2) if data.timeout
79
80
  yield(extracted)
80
81
  end
81
82
  end
@@ -92,6 +93,17 @@ class TestExtractor < Test::Unit::TestCase
92
93
  data.body = "<html><body>Hello</body></html>"
93
94
  assert_equal(["Hello"], extract(data))
94
95
  end
96
+
97
+ def test_timeout
98
+ data = ChupaText::Data.new
99
+ data.mime_type = "text/html"
100
+ data.body = "<html><body>Hello</body></html>"
101
+ data.timeout = 0.0001
102
+ error = ChupaText::TimeoutError.new(data, data.timeout)
103
+ assert_raise(error) do
104
+ extract(data)
105
+ end
106
+ end
95
107
  end
96
108
 
97
109
  sub_test_case("multi decomposed") do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.6
4
+ version: 1.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-10 00:00:00.000000000 Z
11
+ date: 2019-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: archive-zip