chupa-text 1.2.6 → 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0555d47f2e5bc1f80ed47485190a9db10a7a3996a4f8e43012dff65cc19e4069
4
- data.tar.gz: acc1ea064a381f34ded5e9253b93ddefd33308d5b12184f3f2bb018dc98f985a
3
+ metadata.gz: 415bf5c173d68ce1e887dc97e014823cdc9f747bb3d191a714b74e87ee4a6fc7
4
+ data.tar.gz: 41da0888b006b66e3134bb77b3478a9c5b8fb99958ac29ecc08d5a8c4d79829a
5
5
  SHA512:
6
- metadata.gz: 9c3ea10422cd7cf588f07ee97819499c6c33fcbb9bf235b2d49ac3fe69aa29feaae42e94a9fddde48532293f503d8d4f4bb1d75a897d7045e1910d8cea261cac
7
- data.tar.gz: e8d8e3e1acfcc6a9a1b4329118652590d3a354cbce043aad821960dcd169a049e9917ae9e4bf13ab3c14e7da2df613cb16f627d138d4c372cc2fbf301ddb1f4d
6
+ metadata.gz: 63097bace8113b4ed2d3634cc09e9d670d08918109f8fe27cdf26f4c4b05a2ec7d28e200b6ed6ec5417d43d9fbbb908a3eb807591fd36d6bc232ec90961c0253
7
+ data.tar.gz: 59644ec76de0529616ffe7e3f31e7b3c5792ee31e62a7a0ece851a77b2e8d8add52aafbefa0401005201e8780608e349a00cd43de08f04cb7bf3a4eeea115cea
@@ -1,5 +1,11 @@
1
1
  # News
2
2
 
3
+ ## 1.2.7: 2019-06-13
4
+
5
+ ### Improvements
6
+
7
+ * Added support for timeout.
8
+
3
9
  ## 1.2.6: 2019-06-10
4
10
 
5
11
  ### Improvements
@@ -53,4 +53,13 @@ module ChupaText
53
53
  super("Unknown encoding data: <#{data.uri}>(#{data.mime_type}): <#{encoding}>")
54
54
  end
55
55
  end
56
+
57
+ class TimeoutError < Error
58
+ attr_reader :data, :timeout
59
+ def initialize(data, timeout)
60
+ @data = data
61
+ @timeout = timeout
62
+ super("Timeout error: <#{data.uri}>(#{data.mime_type}): <#{timeout}>")
63
+ end
64
+ end
56
65
  end
@@ -16,6 +16,7 @@
16
16
 
17
17
  require "pathname"
18
18
  require "uri"
19
+ require "timeout"
19
20
 
20
21
  module ChupaText
21
22
  class Extractor
@@ -103,19 +104,37 @@ module ChupaText
103
104
  end
104
105
  else
105
106
  debug {"#{log_tag}[extract][decomposer] #{decomposer.class}"}
106
- decomposer.decompose(target) do |decomposed|
107
- debug do
108
- "#{log_tag}[extract][decomposed] " +
109
- "#{decomposer.class}: " +
110
- "<#{target.uri}>: " +
111
- "<#{target.mime_type}> -> <#{decomposed.mime_type}>"
107
+ with_timeout(target) do
108
+ decomposer.decompose(target) do |decomposed|
109
+ begin
110
+ debug do
111
+ "#{log_tag}[extract][decomposed] " +
112
+ "#{decomposer.class}: " +
113
+ "<#{target.uri}>: " +
114
+ "<#{target.mime_type}> -> <#{decomposed.mime_type}>"
115
+ end
116
+ extract_recursive(decomposed, &block)
117
+ ensure
118
+ decomposed.release
119
+ end
112
120
  end
113
- extract_recursive(decomposed, &block)
114
- decomposed.release
115
121
  end
116
122
  end
117
123
  end
118
124
 
125
+ def with_timeout(data, &block)
126
+ timeout = data.timeout
127
+ if timeout
128
+ begin
129
+ Timeout.timeout(timeout, &block)
130
+ rescue Timeout::Error
131
+ raise TimeoutError.new(data, timeout)
132
+ end
133
+ else
134
+ yield
135
+ end
136
+ end
137
+
119
138
  def log_tag
120
139
  "[extractor]"
121
140
  end
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.2.6"
18
+ VERSION = "1.2.7"
19
19
  end
@@ -76,6 +76,7 @@ class TestExtractor < Test::Unit::TestCase
76
76
  extracted = ChupaText::Data.new
77
77
  extracted.mime_type = "text/plain"
78
78
  extracted.body = data.body.gsub(/<.+?>/, "")
79
+ sleep(data.timeout * 2) if data.timeout
79
80
  yield(extracted)
80
81
  end
81
82
  end
@@ -92,6 +93,17 @@ class TestExtractor < Test::Unit::TestCase
92
93
  data.body = "<html><body>Hello</body></html>"
93
94
  assert_equal(["Hello"], extract(data))
94
95
  end
96
+
97
+ def test_timeout
98
+ data = ChupaText::Data.new
99
+ data.mime_type = "text/html"
100
+ data.body = "<html><body>Hello</body></html>"
101
+ data.timeout = 0.0001
102
+ error = ChupaText::TimeoutError.new(data, data.timeout)
103
+ assert_raise(error) do
104
+ extract(data)
105
+ end
106
+ end
95
107
  end
96
108
 
97
109
  sub_test_case("multi decomposed") do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.6
4
+ version: 1.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-10 00:00:00.000000000 Z
11
+ date: 2019-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: archive-zip