chupa-text 1.2.1 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 99a53085d7eca8e459b0944e44cdb415036d19b6dc5a366e10bc3921a8c8cb09
4
- data.tar.gz: ce808a0a9e9352e2b4cf92c9e7348b57e522ee1717dc8070631451aa6a3b9a29
3
+ metadata.gz: f6d2f05926206d3da67e157161c9e4c35f036af80e80e5177fbc4edf9006e039
4
+ data.tar.gz: 5ed29a55f62d7a44cbbdbfc690b39005d35b354c2ba65629436b90d1ef32cef1
5
5
  SHA512:
6
- metadata.gz: 334356c90df57f22ae1fe0871e93147470c7505782487f71613f3fa794ea7fe0ed03b16980892beb46d662f830b876b6b4ff5132b8677892be1246ea26e38f40
7
- data.tar.gz: 86de4013d3f07d4d89c2113d01d78d4e6685b95af28ab1ad72267908d3c4b9ea30720eec515380989b6fb2a50d913041ee8223197cbcfa00c51b61b97506d1d7
6
+ metadata.gz: cd5e2b4b04d2572bb90ec832b618bc855e5c038d0a46f84a970f8e1ae1011609356387c30a71c6d476362fb27391094a25d0759d62bd56772a630914f543e644
7
+ data.tar.gz: c1c22bb010320fe5f48eb0c8b500e54b4557ca0f24461b832c3ae278c8dd9b0c6dc93f5682cf9b52d15b05673cd013c5546543e2f46e095b72fe46d64deb0aba
data/doc/text/news.md CHANGED
@@ -1,5 +1,36 @@
1
1
  # News
2
2
 
3
+ ## 1.2.2: 2019-03-28
4
+
5
+ ### Improvements
6
+
7
+ * Added `http-server` decomposer.
8
+
9
+ * `ChupaText::Data#max_body_size`: Added.
10
+
11
+ * `ChupaText::Data#max_body_size=`: Added.
12
+
13
+ * `ChupaText::Data#timeout`: Added.
14
+
15
+ * `ChupaText::Data#timeout=`: Added.
16
+
17
+ * `ChupaText::Data#limit_cpu`: Added.
18
+
19
+ * `ChupaText::Data#limit_cpu=`: Added.
20
+
21
+ * `ChupaText::Data#limit_ax`: Added.
22
+
23
+ * `ChupaText::Data#limit_ax=`: Added.
24
+
25
+ * `ChupaText::ExternalCommand`: Added support for soft timeout and limits.
26
+
27
+ * `ChupaText::Extractor`: Stopped receiving the max body size as an
28
+ option. Use `ChupaText::Data#max_body_size=` instead.
29
+
30
+ ### Fixes
31
+
32
+ * Fixed decomposer choose logic.
33
+
3
34
  ## 1.2.1: 2019-03-04
4
35
 
5
36
  ### Improvements
@@ -196,7 +196,7 @@ module ChupaText
196
196
  end
197
197
 
198
198
  def create_extractor
199
- extractor = Extractor.new(max_body_size: @max_body_size)
199
+ extractor = Extractor.new
200
200
  extractor.apply_configuration(@configuration)
201
201
  extractor
202
202
  end
@@ -222,6 +222,7 @@ module ChupaText
222
222
  data.mime_type = @mime_type if @mime_type
223
223
  data.need_screenshot = @need_screenshot
224
224
  data.expected_screenshot_size = @expected_screenshot_size
225
+ data.max_body_size = @max_body_size
225
226
  data
226
227
  end
227
228
 
@@ -65,6 +65,20 @@ module ChupaText
65
65
  # @return [Array<Integer, Integer>] the expected screenshot size.
66
66
  attr_accessor :expected_screenshot_size
67
67
 
68
+ # @return [Integer, nil] the max body size in bytes.
69
+ attr_accessor :max_body_size
70
+
71
+ # @return [Numeric, String, nil] the timeout on extraction.
72
+ attr_accessor :timeout
73
+
74
+ # @return [Numeric, String, nil] the max CPU time on extraction by
75
+ # external command.
76
+ attr_accessor :limit_cpu
77
+
78
+ # @return [Numeric, String, nil] the max memory on extraction by
79
+ # external command.
80
+ attr_accessor :limit_as
81
+
68
82
  def initialize(options={})
69
83
  @uri = nil
70
84
  @body = nil
@@ -76,6 +90,10 @@ module ChupaText
76
90
  @screenshot = nil
77
91
  @need_screenshot = true
78
92
  @expected_screenshot_size = [200, 200]
93
+ @max_body_size = nil
94
+ @timeout = nil
95
+ @limit_cpu = nil
96
+ @limit_as = nil
79
97
  @options = options || {}
80
98
  source_data = @options[:source_data]
81
99
  if source_data
@@ -107,6 +125,10 @@ module ChupaText
107
125
  end
108
126
  self.need_screenshot = data.need_screenshot?
109
127
  self.expected_screenshot_size = data.expected_screenshot_size
128
+ self.max_body_size = data.max_body_size
129
+ self.timeout = data.timeout
130
+ self.limit_cpu = data.limit_cpu
131
+ self.limit_as = data.limit_as
110
132
  end
111
133
 
112
134
  # @param [String, URI, nil] uri The URI for the data. If `uri` is
@@ -198,11 +220,11 @@ module ChupaText
198
220
  @need_screenshot
199
221
  end
200
222
 
201
- def to_utf8_body_data(max_body_size: nil)
223
+ def to_utf8_body_data
202
224
  b = nil
203
- if max_body_size
225
+ if @max_body_size
204
226
  open do |input|
205
- b = input.read(max_body_size)
227
+ b = input.read(@max_body_size)
206
228
  end
207
229
  else
208
230
  b = body
@@ -211,7 +233,7 @@ module ChupaText
211
233
 
212
234
  converter = UTF8Converter.new(b)
213
235
  utf8_body = converter.convert
214
- if max_body_size.nil? and b.equal?(utf8_body)
236
+ if @max_body_size.nil? and b.equal?(utf8_body)
215
237
  self
216
238
  else
217
239
  TextData.new(utf8_body, source_data: self)
@@ -0,0 +1,160 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ require "net/http"
18
+ require "pp"
19
+ require "uri"
20
+
21
+ module ChupaText
22
+ module Decomposers
23
+ class HTTPServer < Decomposer
24
+ include Loggable
25
+
26
+ registry.register("http-server", self)
27
+
28
+ @@default_url = nil
29
+ class << self
30
+ def default_url
31
+ @@default_url
32
+ end
33
+
34
+ def default_url=(url)
35
+ @@default_url = url
36
+ end
37
+ end
38
+
39
+ def initialize(options)
40
+ super
41
+ @url = @options[:url] ||
42
+ self.class.default_url ||
43
+ ENV["CHUPA_TEXT_HTTP_SERVER_URL"]
44
+ @url = URI(@url) if @url
45
+ end
46
+
47
+ def target?(data)
48
+ return false unless @url
49
+ return false if data.text_plain?
50
+ true
51
+ end
52
+
53
+ def target_score(data)
54
+ if target?(data)
55
+ 100
56
+ else
57
+ nil
58
+ end
59
+ end
60
+
61
+ def decompose(data, &block)
62
+ http = Net::HTTP.new(@url.host, @url.port)
63
+ http.use_ssl = true if @url.is_a?(URI::HTTPS)
64
+ if data.timeout.is_a?(Numeric)
65
+ http.open_timeout = data.timeout * 1.5
66
+ http.read_timeout = data.timeout * 1.5
67
+ http.write_timeout = data.timeout * 1.5
68
+ end
69
+ begin
70
+ http.start do
71
+ process_request(http, data, &block)
72
+ end
73
+ rescue SystemCallError => error
74
+ error do
75
+ message = "#{log_tag}[connection] "
76
+ message << "Failed to process data in server: "
77
+ message << "#{@url}: "
78
+ message << "#{error.class}: #{error.message}\n"
79
+ message << error.backtrace.join("\n")
80
+ message
81
+ end
82
+ rescue Net::ReadTimeout => error
83
+ error do
84
+ message = "#{log_tag}[timeout] "
85
+ message << "Failed to process data in server: "
86
+ message << "#{@url}: "
87
+ message << "#{error.class}: #{error.message}\n"
88
+ message << error.backtrace.join("\n")
89
+ message
90
+ end
91
+ end
92
+ end
93
+
94
+ private
95
+ def process_request(http, data)
96
+ request = Net::HTTP::Post.new(@url)
97
+ request["transfer-encoding"] = "chunked"
98
+ data.open do |input|
99
+ request.set_form(build_parameters(data, input),
100
+ "multipart/form-data")
101
+ response = http.request(request)
102
+ case response
103
+ when Net::HTTPOK
104
+ extracted = JSON.parse(response.body)
105
+ (extracted["texts"] || []).each do |text|
106
+ text_data = TextData.new(text["body"], source_data: data)
107
+ text.each do |key, value|
108
+ next if key == "body"
109
+ text_data[key] = value
110
+ end
111
+ yield(text_data)
112
+ end
113
+ else
114
+ error do
115
+ message = "#{log_tag} Failed to process data in server: "
116
+ message << "#{@url}: "
117
+ message << "#{response.code}: #{response.message.strip}\n"
118
+ case response.content_type
119
+ when "application/json"
120
+ PP.pp(JSON.parse(response.body), message)
121
+ else
122
+ message << response.body
123
+ end
124
+ message
125
+ end
126
+ end
127
+ end
128
+ end
129
+
130
+ def build_parameters(data, input)
131
+ parameters = []
132
+ [
133
+ ["timeout",
134
+ data.timeout || ChupaText::ExternalCommand.default_timeout],
135
+ ["limit_cpu",
136
+ data.limit_cpu || ChupaText::ExternalCommand.default_limit_cpu],
137
+ ["limit_as",
138
+ data.limit_as || ChupaText::ExternalCommand.default_limit_as],
139
+ ["max_body_size", data.max_body_size],
140
+ ].each do |key, value|
141
+ next if value.nil?
142
+ parameters << [key, StringIO.new(value.to_s)]
143
+ end
144
+ parameters << [
145
+ "data",
146
+ input,
147
+ {
148
+ filename: data.path.to_s,
149
+ content_type: data.mime_type,
150
+ },
151
+ ]
152
+ parameters
153
+ end
154
+
155
+ def log_tag
156
+ "[decomposer][http-server]"
157
+ end
158
+ end
159
+ end
160
+ end
@@ -69,13 +69,19 @@ module ChupaText
69
69
  else
70
70
  options = {}
71
71
  end
72
+ data = options[:data]
72
73
  pid = spawn(options[:env] || {},
73
74
  @path.to_s,
74
75
  *arguments,
75
- spawn_options(options[:spawn_options]))
76
+ spawn_options(options[:spawn_options], data))
77
+ if data
78
+ soft_timeout = data.timeout
79
+ else
80
+ soft_timeout = nil
81
+ end
76
82
  status = nil
77
83
  begin
78
- status = wait_process(pid, options[:timeout])
84
+ status = wait_process(pid, options[:timeout], soft_timeout)
79
85
  ensure
80
86
  unless status
81
87
  begin
@@ -99,28 +105,44 @@ module ChupaText
99
105
  end
100
106
 
101
107
  private
102
- def spawn_options(user_options)
108
+ def spawn_options(user_options, data)
103
109
  options = (user_options || {}).dup
104
- apply_default_spawn_limit(options, :cpu, :int)
105
- apply_default_spawn_limit(options, :as, :size)
110
+ if data
111
+ soft_limit_cpu = data.limit_cpu
112
+ soft_limit_as = data.limit_as
113
+ else
114
+ soft_limit_cpu = nil
115
+ soft_limit_as = nil
116
+ end
117
+ apply_default_spawn_limit(options, soft_limit_cpu, :cpu, :time)
118
+ apply_default_spawn_limit(options, soft_limit_as, :as, :size)
106
119
  options
107
120
  end
108
121
 
109
- def apply_default_spawn_limit(options, key, type)
122
+ def apply_default_spawn_limit(options, soft_value, key, type)
110
123
  # TODO: Workaround for Ruby 2.3.3p222
111
124
  case key
112
125
  when :cpu
113
126
  option_key = :rlimit_cpu
127
+ unit = "s"
114
128
  when :as
115
129
  option_key = :rlimit_as
130
+ unit = ""
116
131
  else
117
132
  option_key = :"rlimit_#{key}"
133
+ unit = ""
118
134
  end
119
135
  return if options[option_key]
120
136
 
121
137
  tag = "[limit][#{key}]"
122
138
  value = self.class.__send__("default_limit_#{key}")
123
139
  value = __send__("parse_#{type}", tag, value)
140
+ soft_value = __send__("parse_#{type}", tag, soft_value)
141
+ if value
142
+ value = soft_value if soft_value and soft_value < value
143
+ else
144
+ value = soft_value
145
+ end
124
146
  return if value.nil?
125
147
  rlimit_number = Process.const_get("RLIMIT_#{key.to_s.upcase}")
126
148
  soft_limit, hard_limit = Process.getrlimit(rlimit_number)
@@ -129,7 +151,7 @@ module ChupaText
129
151
  return nil
130
152
  end
131
153
  limit_info = "soft-limit:#{soft_limit}, hard-limit:#{hard_limit}"
132
- info("#{log_tag}#{tag}[set] <#{value}>(#{limit_info})")
154
+ info("#{log_tag}#{tag}[set] <#{value}#{unit}>(#{limit_info})")
133
155
 
134
156
  options[option_key] = value
135
157
  end
@@ -169,12 +191,21 @@ module ChupaText
169
191
  scale = 1
170
192
  case value
171
193
  when /GB?\z/i
194
+ scale = 1000 ** 3
195
+ number = $PREMATCH
196
+ when /GiB?\z/i
172
197
  scale = 1024 ** 3
173
198
  number = $PREMATCH
174
199
  when /MB?\z/i
200
+ scale = 1000 ** 2
201
+ number = $PREMATCH
202
+ when /MiB?\z/i
175
203
  scale = 1024 ** 2
176
204
  number = $PREMATCH
177
- when /KB?\z/i
205
+ when /[kK]B?\z/i
206
+ scale = 1000 ** 1
207
+ number = $PREMATCH
208
+ when /KiB?\z/i
178
209
  scale = 1024 ** 1
179
210
  number = $PREMATCH
180
211
  when /B?\z/i
@@ -227,9 +258,15 @@ module ChupaText
227
258
  warn("#{log_tag}#{tag}[invalid] <#{value}>(#{type})")
228
259
  end
229
260
 
230
- def wait_process(pid, timeout)
261
+ def wait_process(pid, timeout, soft_timeout)
231
262
  tag = "[timeout]"
232
263
  timeout = parse_time(tag, timeout || self.class.default_timeout)
264
+ soft_timeout = parse_time(tag, soft_timeout)
265
+ if timeout
266
+ timeout = soft_timeout if soft_timeout and soft_timeout < timeout
267
+ else
268
+ timeout = soft_timeout
269
+ end
233
270
  if timeout
234
271
  info("#{log_tag}#{tag}[use] <#{timeout}s>: <#{pid}>")
235
272
  status = wait_process_timeout(pid, timeout)
@@ -21,9 +21,8 @@ module ChupaText
21
21
  class Extractor
22
22
  include Loggable
23
23
 
24
- def initialize(max_body_size: nil)
24
+ def initialize
25
25
  @decomposers = []
26
- @max_body_size = max_body_size
27
26
  end
28
27
 
29
28
  # Sets the extractor up by the configuration. It adds decomposers
@@ -79,7 +78,7 @@ module ChupaText
79
78
  candidates << [score, decomposer]
80
79
  end
81
80
  return nil if candidates.empty?
82
- candidate = candidates.sort_by {|score, _| score}.first
81
+ candidate = candidates.sort_by {|score, _| -score}.first
83
82
  candidate[1]
84
83
  end
85
84
 
@@ -91,11 +90,11 @@ module ChupaText
91
90
  if decomposer.nil?
92
91
  if target.text_plain?
93
92
  debug {"#{log_tag}[extract][text-plain]"}
94
- yield(target.to_utf8_body_data(max_body_size: @max_body_size))
93
+ yield(target.to_utf8_body_data)
95
94
  else
96
95
  debug {"#{log_tag}[extract][decomposer] not found"}
97
96
  if target.text?
98
- yield(target.to_utf8_body_data(max_body_size: @max_body_size))
97
+ yield(target.to_utf8_body_data)
99
98
  end
100
99
  end
101
100
  else
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.2.1"
18
+ VERSION = "1.2.2"
19
19
  end
@@ -14,7 +14,7 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
- class TestDecomposersCSV< Test::Unit::TestCase
17
+ class TestDecomposersCSV < Test::Unit::TestCase
18
18
  include Helper
19
19
 
20
20
  def setup
@@ -0,0 +1,175 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersHTTPServer < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ ChupaText::Decomposers::HTTPServer.default_url = nil
22
+ setup_server
23
+ setup_data
24
+ setup_decomposer
25
+ end
26
+
27
+ def setup_server
28
+ @port = 40080
29
+ @path = "/extraction.json"
30
+ @server_url = "http://127.0.0.1:#{@port}#{@path}"
31
+ logger = WEBrick::Log.new
32
+ logger.level = logger.class::ERROR
33
+ @server = WEBrick::HTTPServer.new(Port: @port,
34
+ Logger: logger,
35
+ AccessLog: [])
36
+ @response_status = 200
37
+ @server.mount_proc(@path) do |request, response|
38
+ sleep(@timeout * 2) if @timeout
39
+ response.status = @response_status
40
+ response.content_type = "application/json"
41
+ response.body = JSON.generate(@response)
42
+ end
43
+ @server_thread = Thread.new do
44
+ @server.start
45
+ end
46
+ end
47
+
48
+ def setup_data
49
+ @input_data = <<-CSV
50
+ Hello,World
51
+ Ruby,ChupaText
52
+ CSV
53
+ @input_mime_type = "text/csv"
54
+ @input_path = "/tmp/hello.csv"
55
+ @timeout = nil
56
+ @extracted_text = @input_data.gsub(/,/, "\t")
57
+ @extracted_path = @input_path.gsub(/\.csv\z/, ".txt")
58
+ @response = {
59
+ "mime-type" => @input_mime_type,
60
+ "uri" => "file://#{@input_path}",
61
+ "path" => @input_path,
62
+ "size" => @input_data.bytesize,
63
+ "texts" => [
64
+ {
65
+ "mime-type" => "text/plain",
66
+ "uri" => "file://#{@extracted_path}",
67
+ "path" => @extracted_path,
68
+ "size" => @extracted_text.bytesize,
69
+ "source-mime-types" => [
70
+ @input_mime_type,
71
+ ],
72
+ "body" => @extracted_text,
73
+ },
74
+ ],
75
+ }
76
+ end
77
+
78
+ def setup_decomposer
79
+ @decomposer = ChupaText::Decomposers::HTTPServer.new(:url => @server_url)
80
+ end
81
+
82
+ def teardown
83
+ teardown_server
84
+ end
85
+
86
+ def teardown_server
87
+ @server.shutdown
88
+ @server_thread.join
89
+ end
90
+
91
+ sub_test_case("decompose") do
92
+ def test_success
93
+ assert_equal([@extracted_text],
94
+ decompose.collect(&:body))
95
+ end
96
+
97
+ def test_not_ok
98
+ @response_status = 404
99
+ messages = capture_log do
100
+ assert_equal([], decompose.collect(&:body))
101
+ end
102
+ assert_equal([
103
+ [
104
+ :error,
105
+ "[decomposer][http-server] " +
106
+ "Failed to process data in server: " +
107
+ "#{@server_url}: " +
108
+ "#{@response_status}: Not Found",
109
+ ],
110
+ ],
111
+ messages)
112
+ end
113
+
114
+ def test_no_server
115
+ no_server_url = "http://127.0.0.1:2929/extraction.json"
116
+ @decomposer = ChupaText::Decomposers::HTTPServer.new(:url => no_server_url)
117
+ messages = capture_log do
118
+ assert_equal([], decompose.collect(&:body))
119
+ end
120
+ messages = messages.collect do |level, message|
121
+ [level, message.gsub(/Errno::.*\z/, "")]
122
+ end
123
+ assert_equal([
124
+ [
125
+ :error,
126
+ "[decomposer][http-server][connection] " +
127
+ "Failed to process data in server: " +
128
+ "#{no_server_url}: ",
129
+ ],
130
+ ],
131
+ messages)
132
+ end
133
+
134
+ def test_read_timeout
135
+ @timeout = 0.1
136
+ messages = capture_log do
137
+ assert_equal([], decompose.collect(&:body))
138
+ end
139
+ messages = messages.collect do |level, message|
140
+ [level, message.gsub(/Net::.*\z/, "")]
141
+ end
142
+ assert_equal([
143
+ [
144
+ :error,
145
+ "[decomposer][http-server][timeout] " +
146
+ "Failed to process data in server: " +
147
+ "#{@server_url}: ",
148
+ ],
149
+ ],
150
+ messages)
151
+ end
152
+
153
+ def test_default_url
154
+ ChupaText::Decomposers::HTTPServer.default_url = @server_url
155
+ @decomposer = ChupaText::Decomposers::HTTPServer.new({})
156
+ assert_equal([@extracted_text],
157
+ decompose.collect(&:body))
158
+ end
159
+
160
+ private
161
+ def decompose
162
+ data = ChupaText::Data.new
163
+ data.path = @input_path
164
+ data.mime_type = @input_mime_type
165
+ data.body = @input_data
166
+ data.timeout = @timeout
167
+
168
+ decomposed = []
169
+ @decomposer.decompose(data) do |decomposed_data|
170
+ decomposed << decomposed_data
171
+ end
172
+ decomposed
173
+ end
174
+ end
175
+ end
data/test/helper.rb CHANGED
@@ -15,8 +15,10 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  require "pathname"
18
+ require "rbconfig"
18
19
  require "tempfile"
19
20
  require "uri"
21
+ require "webrick"
20
22
 
21
23
  module Helper
22
24
  def fixture_path(*components)
@@ -39,4 +41,8 @@ module Helper
39
41
  [level, message]
40
42
  end
41
43
  end
44
+
45
+ def ruby
46
+ RbConfig.ruby
47
+ end
42
48
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2014 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2014-2019 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -14,12 +14,8 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
- require "rbconfig"
18
-
19
17
  class TestExternalCommand < Test::Unit::TestCase
20
- def ruby
21
- RbConfig.ruby
22
- end
18
+ include Helper
23
19
 
24
20
  def create_command(command)
25
21
  ChupaText::ExternalCommand.new(command)
@@ -76,4 +72,321 @@ class TestExternalCommand < Test::Unit::TestCase
76
72
  assert_false(exist?("nonexistent"))
77
73
  end
78
74
  end
75
+
76
+ class TestTimeout < self
77
+ def setup
78
+ @data = ChupaText::TextData.new("Hello")
79
+ timeout = ChupaText::ExternalCommand.default_timeout
80
+ begin
81
+ yield
82
+ ensure
83
+ ChupaText::ExternalCommand.default_timeout = timeout
84
+ end
85
+ end
86
+
87
+ def run_command(options={})
88
+ IO.pipe do |input, output|
89
+ command = create_command(ruby)
90
+ command.run("-e", "puts(Process.pid)",
91
+ options.merge(data: @data,
92
+ spawn_options: {out: output}))
93
+ input.gets.chomp
94
+ end
95
+ end
96
+
97
+ def test_option
98
+ pid = nil
99
+ messages = capture_log do
100
+ pid = run_command(timeout: "60s")
101
+ end
102
+ assert_equal([
103
+ [
104
+ :info,
105
+ "[external-command][timeout][use] <60.0s>: <#{pid}>",
106
+ ]
107
+ ],
108
+ messages)
109
+ end
110
+
111
+ def test_data_not_use
112
+ @data.timeout = "90s"
113
+ pid = nil
114
+ messages = capture_log do
115
+ pid = run_command(timeout: "60s")
116
+ end
117
+ assert_equal([
118
+ [
119
+ :info,
120
+ "[external-command][timeout][use] <60.0s>: <#{pid}>",
121
+ ]
122
+ ],
123
+ messages)
124
+ end
125
+
126
+ def test_data_use
127
+ @data.timeout = "30s"
128
+ pid = nil
129
+ messages = capture_log do
130
+ pid = run_command(timeout: "60s")
131
+ end
132
+ assert_equal([
133
+ [
134
+ :info,
135
+ "[external-command][timeout][use] <30.0s>: <#{pid}>",
136
+ ]
137
+ ],
138
+ messages)
139
+ end
140
+
141
+ def test_data_only
142
+ @data.timeout = "30s"
143
+ pid = nil
144
+ messages = capture_log do
145
+ pid = run_command
146
+ end
147
+ assert_equal([
148
+ [
149
+ :info,
150
+ "[external-command][timeout][use] <30.0s>: <#{pid}>",
151
+ ]
152
+ ],
153
+ messages)
154
+ end
155
+
156
+ def test_default
157
+ ChupaText::ExternalCommand.default_timeout = "60s"
158
+ pid = nil
159
+ messages = capture_log do
160
+ pid = run_command
161
+ end
162
+ assert_equal([
163
+ [
164
+ :info,
165
+ "[external-command][timeout][use] <60.0s>: <#{pid}>",
166
+ ]
167
+ ],
168
+ messages)
169
+ end
170
+
171
+ def test_default_data_not_use
172
+ ChupaText::ExternalCommand.default_timeout = "60s"
173
+ @data.timeout = "90s"
174
+ pid = nil
175
+ messages = capture_log do
176
+ pid = run_command
177
+ end
178
+ assert_equal([
179
+ [
180
+ :info,
181
+ "[external-command][timeout][use] <60.0s>: <#{pid}>",
182
+ ]
183
+ ],
184
+ messages)
185
+ end
186
+
187
+ def test_default_data_use
188
+ ChupaText::ExternalCommand.default_timeout = "60s"
189
+ @data.timeout = "30s"
190
+ pid = nil
191
+ messages = capture_log do
192
+ pid = run_command
193
+ end
194
+ assert_equal([
195
+ [
196
+ :info,
197
+ "[external-command][timeout][use] <30.0s>: <#{pid}>",
198
+ ]
199
+ ],
200
+ messages)
201
+ end
202
+
203
+ def test_default_data_only
204
+ @data.timeout = "30s"
205
+ pid = nil
206
+ messages = capture_log do
207
+ pid = run_command
208
+ end
209
+ assert_equal([
210
+ [
211
+ :info,
212
+ "[external-command][timeout][use] <30.0s>: <#{pid}>",
213
+ ]
214
+ ],
215
+ messages)
216
+ end
217
+ end
218
+
219
+ class TestLimitCPU < self
220
+ def setup
221
+ @data = ChupaText::TextData.new("Hello")
222
+ limit_cpu = ChupaText::ExternalCommand.default_limit_cpu
223
+ begin
224
+ yield
225
+ ensure
226
+ ChupaText::ExternalCommand.default_limit_cpu = limit_cpu
227
+ end
228
+ end
229
+
230
+ def run_command(spawn_options={})
231
+ command = create_command(ruby)
232
+ command.run("-e", "true",
233
+ data: @data,
234
+ spawn_options: spawn_options)
235
+ end
236
+
237
+ def test_default
238
+ ChupaText::ExternalCommand.default_limit_cpu = "60s"
239
+ messages = capture_log do
240
+ run_command
241
+ end
242
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_CPU)
243
+ assert_equal([
244
+ [
245
+ :info,
246
+ "[external-command][limit][cpu][set] <60.0s>" +
247
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
248
+ ]
249
+ ],
250
+ messages)
251
+ end
252
+
253
+ def test_default_data_not_use
254
+ ChupaText::ExternalCommand.default_limit_cpu = "60s"
255
+ @data.limit_cpu = "90s"
256
+ messages = capture_log do
257
+ run_command
258
+ end
259
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_CPU)
260
+ assert_equal([
261
+ [
262
+ :info,
263
+ "[external-command][limit][cpu][set] <60.0s>" +
264
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
265
+ ]
266
+ ],
267
+ messages)
268
+ end
269
+
270
+ def test_default_data_use
271
+ ChupaText::ExternalCommand.default_limit_cpu = "60s"
272
+ @data.limit_cpu = "30s"
273
+ messages = capture_log do
274
+ run_command
275
+ end
276
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_CPU)
277
+ assert_equal([
278
+ [
279
+ :info,
280
+ "[external-command][limit][cpu][set] <30.0s>" +
281
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
282
+ ]
283
+ ],
284
+ messages)
285
+ end
286
+
287
+ def test_default_data_only
288
+ @data.limit_cpu = "30s"
289
+ messages = capture_log do
290
+ run_command
291
+ end
292
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_CPU)
293
+ assert_equal([
294
+ [
295
+ :info,
296
+ "[external-command][limit][cpu][set] <30.0s>" +
297
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
298
+ ]
299
+ ],
300
+ messages)
301
+ end
302
+ end
303
+
304
+ class TestLimitAS < self
305
+ def setup
306
+ @data = ChupaText::TextData.new("Hello")
307
+ limit_as = ChupaText::ExternalCommand.default_limit_as
308
+ begin
309
+ yield
310
+ ensure
311
+ ChupaText::ExternalCommand.default_limit_as = limit_as
312
+ end
313
+ end
314
+
315
+ def run_command(spawn_options={})
316
+ command = create_command(ruby)
317
+ command.run("-e", "true",
318
+ data: @data,
319
+ spawn_options: spawn_options)
320
+ end
321
+
322
+ def test_default
323
+ ChupaText::ExternalCommand.default_limit_as = "100MiB"
324
+ messages = capture_log do
325
+ run_command
326
+ end
327
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_AS)
328
+ assert_equal([
329
+ [
330
+ :info,
331
+ "[external-command][limit][as][set] " +
332
+ "<#{100 * 1024 * 1024}>" +
333
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
334
+ ]
335
+ ],
336
+ messages)
337
+ end
338
+
339
+ def test_default_data_not_use
340
+ ChupaText::ExternalCommand.default_limit_as = "100MiB"
341
+ @data.limit_as = "150MiB"
342
+ messages = capture_log do
343
+ run_command
344
+ end
345
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_AS)
346
+ assert_equal([
347
+ [
348
+ :info,
349
+ "[external-command][limit][as][set] " +
350
+ "<#{100 * 1024 * 1024}>" +
351
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
352
+ ]
353
+ ],
354
+ messages)
355
+ end
356
+
357
+ def test_default_soft_use
358
+ ChupaText::ExternalCommand.default_limit_as = "100MiB"
359
+ @data.limit_as = "50MiB"
360
+ messages = capture_log do
361
+ run_command
362
+ end
363
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_AS)
364
+ assert_equal([
365
+ [
366
+ :info,
367
+ "[external-command][limit][as][set] " +
368
+ "<#{50 * 1024 * 1024}>" +
369
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
370
+ ]
371
+ ],
372
+ messages)
373
+ end
374
+
375
+ def test_default_soft_only
376
+ @data.limit_as = "50MiB"
377
+ messages = capture_log do
378
+ run_command
379
+ end
380
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_AS)
381
+ assert_equal([
382
+ [
383
+ :info,
384
+ "[external-command][limit][as][set] " +
385
+ "<#{50 * 1024 * 1024}>" +
386
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
387
+ ]
388
+ ],
389
+ messages)
390
+ end
391
+ end
79
392
  end
@@ -231,10 +231,11 @@ class TestExtractor < Test::Unit::TestCase
231
231
 
232
232
  sub_test_case("max body size") do
233
233
  def test_last_invalid
234
- @extractor = ChupaText::Extractor.new(max_body_size: 5)
234
+ @extractor = ChupaText::Extractor.new
235
235
  data = ChupaText::Data.new
236
236
  data.mime_type = "text/plain"
237
237
  data.body = "こん"
238
+ data.max_body_size = 5
238
239
  assert_equal(["こ"], extract(data))
239
240
  end
240
241
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-03 00:00:00.000000000 Z
11
+ date: 2019-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: archive-zip
@@ -159,6 +159,7 @@ files:
159
159
  - lib/chupa-text/decomposers.rb
160
160
  - lib/chupa-text/decomposers/csv.rb
161
161
  - lib/chupa-text/decomposers/gzip.rb
162
+ - lib/chupa-text/decomposers/http-server.rb
162
163
  - lib/chupa-text/decomposers/office-open-xml-document.rb
163
164
  - lib/chupa-text/decomposers/office-open-xml-presentation.rb
164
165
  - lib/chupa-text/decomposers/office-open-xml-workbook.rb
@@ -198,6 +199,7 @@ files:
198
199
  - test/command/test-chupa-text.rb
199
200
  - test/decomposers/test-csv.rb
200
201
  - test/decomposers/test-gzip.rb
202
+ - test/decomposers/test-http-server.rb
201
203
  - test/decomposers/test-office-open-xml-document.rb
202
204
  - test/decomposers/test-office-open-xml-presentation.rb
203
205
  - test/decomposers/test-office-open-xml-workbook.rb