chupa-text 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 99a53085d7eca8e459b0944e44cdb415036d19b6dc5a366e10bc3921a8c8cb09
4
- data.tar.gz: ce808a0a9e9352e2b4cf92c9e7348b57e522ee1717dc8070631451aa6a3b9a29
3
+ metadata.gz: f6d2f05926206d3da67e157161c9e4c35f036af80e80e5177fbc4edf9006e039
4
+ data.tar.gz: 5ed29a55f62d7a44cbbdbfc690b39005d35b354c2ba65629436b90d1ef32cef1
5
5
  SHA512:
6
- metadata.gz: 334356c90df57f22ae1fe0871e93147470c7505782487f71613f3fa794ea7fe0ed03b16980892beb46d662f830b876b6b4ff5132b8677892be1246ea26e38f40
7
- data.tar.gz: 86de4013d3f07d4d89c2113d01d78d4e6685b95af28ab1ad72267908d3c4b9ea30720eec515380989b6fb2a50d913041ee8223197cbcfa00c51b61b97506d1d7
6
+ metadata.gz: cd5e2b4b04d2572bb90ec832b618bc855e5c038d0a46f84a970f8e1ae1011609356387c30a71c6d476362fb27391094a25d0759d62bd56772a630914f543e644
7
+ data.tar.gz: c1c22bb010320fe5f48eb0c8b500e54b4557ca0f24461b832c3ae278c8dd9b0c6dc93f5682cf9b52d15b05673cd013c5546543e2f46e095b72fe46d64deb0aba
data/doc/text/news.md CHANGED
@@ -1,5 +1,36 @@
1
1
  # News
2
2
 
3
+ ## 1.2.2: 2019-03-28
4
+
5
+ ### Improvements
6
+
7
+ * Added `http-server` decomposer.
8
+
9
+ * `ChupaText::Data#max_body_size`: Added.
10
+
11
+ * `ChupaText::Data#max_body_size=`: Added.
12
+
13
+ * `ChupaText::Data#timeout`: Added.
14
+
15
+ * `ChupaText::Data#timeout=`: Added.
16
+
17
+ * `ChupaText::Data#limit_cpu`: Added.
18
+
19
+ * `ChupaText::Data#limit_cpu=`: Added.
20
+
21
+ * `ChupaText::Data#limit_ax`: Added.
22
+
23
+ * `ChupaText::Data#limit_ax=`: Added.
24
+
25
+ * `ChupaText::ExternalCommand`: Added support for soft timeout and limits.
26
+
27
+ * `ChupaText::Extractor`: Stopped receiving the max body size as an
28
+ option. Use `ChupaText::Data#max_body_size=` instead.
29
+
30
+ ### Fixes
31
+
32
+ * Fixed decomposer choose logic.
33
+
3
34
  ## 1.2.1: 2019-03-04
4
35
 
5
36
  ### Improvements
@@ -196,7 +196,7 @@ module ChupaText
196
196
  end
197
197
 
198
198
  def create_extractor
199
- extractor = Extractor.new(max_body_size: @max_body_size)
199
+ extractor = Extractor.new
200
200
  extractor.apply_configuration(@configuration)
201
201
  extractor
202
202
  end
@@ -222,6 +222,7 @@ module ChupaText
222
222
  data.mime_type = @mime_type if @mime_type
223
223
  data.need_screenshot = @need_screenshot
224
224
  data.expected_screenshot_size = @expected_screenshot_size
225
+ data.max_body_size = @max_body_size
225
226
  data
226
227
  end
227
228
 
@@ -65,6 +65,20 @@ module ChupaText
65
65
  # @return [Array<Integer, Integer>] the expected screenshot size.
66
66
  attr_accessor :expected_screenshot_size
67
67
 
68
+ # @return [Integer, nil] the max body size in bytes.
69
+ attr_accessor :max_body_size
70
+
71
+ # @return [Numeric, String, nil] the timeout on extraction.
72
+ attr_accessor :timeout
73
+
74
+ # @return [Numeric, String, nil] the max CPU time on extraction by
75
+ # external command.
76
+ attr_accessor :limit_cpu
77
+
78
+ # @return [Numeric, String, nil] the max memory on extraction by
79
+ # external command.
80
+ attr_accessor :limit_as
81
+
68
82
  def initialize(options={})
69
83
  @uri = nil
70
84
  @body = nil
@@ -76,6 +90,10 @@ module ChupaText
76
90
  @screenshot = nil
77
91
  @need_screenshot = true
78
92
  @expected_screenshot_size = [200, 200]
93
+ @max_body_size = nil
94
+ @timeout = nil
95
+ @limit_cpu = nil
96
+ @limit_as = nil
79
97
  @options = options || {}
80
98
  source_data = @options[:source_data]
81
99
  if source_data
@@ -107,6 +125,10 @@ module ChupaText
107
125
  end
108
126
  self.need_screenshot = data.need_screenshot?
109
127
  self.expected_screenshot_size = data.expected_screenshot_size
128
+ self.max_body_size = data.max_body_size
129
+ self.timeout = data.timeout
130
+ self.limit_cpu = data.limit_cpu
131
+ self.limit_as = data.limit_as
110
132
  end
111
133
 
112
134
  # @param [String, URI, nil] uri The URI for the data. If `uri` is
@@ -198,11 +220,11 @@ module ChupaText
198
220
  @need_screenshot
199
221
  end
200
222
 
201
- def to_utf8_body_data(max_body_size: nil)
223
+ def to_utf8_body_data
202
224
  b = nil
203
- if max_body_size
225
+ if @max_body_size
204
226
  open do |input|
205
- b = input.read(max_body_size)
227
+ b = input.read(@max_body_size)
206
228
  end
207
229
  else
208
230
  b = body
@@ -211,7 +233,7 @@ module ChupaText
211
233
 
212
234
  converter = UTF8Converter.new(b)
213
235
  utf8_body = converter.convert
214
- if max_body_size.nil? and b.equal?(utf8_body)
236
+ if @max_body_size.nil? and b.equal?(utf8_body)
215
237
  self
216
238
  else
217
239
  TextData.new(utf8_body, source_data: self)
@@ -0,0 +1,160 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ require "net/http"
18
+ require "pp"
19
+ require "uri"
20
+
21
+ module ChupaText
22
+ module Decomposers
23
+ class HTTPServer < Decomposer
24
+ include Loggable
25
+
26
+ registry.register("http-server", self)
27
+
28
+ @@default_url = nil
29
+ class << self
30
+ def default_url
31
+ @@default_url
32
+ end
33
+
34
+ def default_url=(url)
35
+ @@default_url = url
36
+ end
37
+ end
38
+
39
+ def initialize(options)
40
+ super
41
+ @url = @options[:url] ||
42
+ self.class.default_url ||
43
+ ENV["CHUPA_TEXT_HTTP_SERVER_URL"]
44
+ @url = URI(@url) if @url
45
+ end
46
+
47
+ def target?(data)
48
+ return false unless @url
49
+ return false if data.text_plain?
50
+ true
51
+ end
52
+
53
+ def target_score(data)
54
+ if target?(data)
55
+ 100
56
+ else
57
+ nil
58
+ end
59
+ end
60
+
61
+ def decompose(data, &block)
62
+ http = Net::HTTP.new(@url.host, @url.port)
63
+ http.use_ssl = true if @url.is_a?(URI::HTTPS)
64
+ if data.timeout.is_a?(Numeric)
65
+ http.open_timeout = data.timeout * 1.5
66
+ http.read_timeout = data.timeout * 1.5
67
+ http.write_timeout = data.timeout * 1.5
68
+ end
69
+ begin
70
+ http.start do
71
+ process_request(http, data, &block)
72
+ end
73
+ rescue SystemCallError => error
74
+ error do
75
+ message = "#{log_tag}[connection] "
76
+ message << "Failed to process data in server: "
77
+ message << "#{@url}: "
78
+ message << "#{error.class}: #{error.message}\n"
79
+ message << error.backtrace.join("\n")
80
+ message
81
+ end
82
+ rescue Net::ReadTimeout => error
83
+ error do
84
+ message = "#{log_tag}[timeout] "
85
+ message << "Failed to process data in server: "
86
+ message << "#{@url}: "
87
+ message << "#{error.class}: #{error.message}\n"
88
+ message << error.backtrace.join("\n")
89
+ message
90
+ end
91
+ end
92
+ end
93
+
94
+ private
95
+ def process_request(http, data)
96
+ request = Net::HTTP::Post.new(@url)
97
+ request["transfer-encoding"] = "chunked"
98
+ data.open do |input|
99
+ request.set_form(build_parameters(data, input),
100
+ "multipart/form-data")
101
+ response = http.request(request)
102
+ case response
103
+ when Net::HTTPOK
104
+ extracted = JSON.parse(response.body)
105
+ (extracted["texts"] || []).each do |text|
106
+ text_data = TextData.new(text["body"], source_data: data)
107
+ text.each do |key, value|
108
+ next if key == "body"
109
+ text_data[key] = value
110
+ end
111
+ yield(text_data)
112
+ end
113
+ else
114
+ error do
115
+ message = "#{log_tag} Failed to process data in server: "
116
+ message << "#{@url}: "
117
+ message << "#{response.code}: #{response.message.strip}\n"
118
+ case response.content_type
119
+ when "application/json"
120
+ PP.pp(JSON.parse(response.body), message)
121
+ else
122
+ message << response.body
123
+ end
124
+ message
125
+ end
126
+ end
127
+ end
128
+ end
129
+
130
+ def build_parameters(data, input)
131
+ parameters = []
132
+ [
133
+ ["timeout",
134
+ data.timeout || ChupaText::ExternalCommand.default_timeout],
135
+ ["limit_cpu",
136
+ data.limit_cpu || ChupaText::ExternalCommand.default_limit_cpu],
137
+ ["limit_as",
138
+ data.limit_as || ChupaText::ExternalCommand.default_limit_as],
139
+ ["max_body_size", data.max_body_size],
140
+ ].each do |key, value|
141
+ next if value.nil?
142
+ parameters << [key, StringIO.new(value.to_s)]
143
+ end
144
+ parameters << [
145
+ "data",
146
+ input,
147
+ {
148
+ filename: data.path.to_s,
149
+ content_type: data.mime_type,
150
+ },
151
+ ]
152
+ parameters
153
+ end
154
+
155
+ def log_tag
156
+ "[decomposer][http-server]"
157
+ end
158
+ end
159
+ end
160
+ end
@@ -69,13 +69,19 @@ module ChupaText
69
69
  else
70
70
  options = {}
71
71
  end
72
+ data = options[:data]
72
73
  pid = spawn(options[:env] || {},
73
74
  @path.to_s,
74
75
  *arguments,
75
- spawn_options(options[:spawn_options]))
76
+ spawn_options(options[:spawn_options], data))
77
+ if data
78
+ soft_timeout = data.timeout
79
+ else
80
+ soft_timeout = nil
81
+ end
76
82
  status = nil
77
83
  begin
78
- status = wait_process(pid, options[:timeout])
84
+ status = wait_process(pid, options[:timeout], soft_timeout)
79
85
  ensure
80
86
  unless status
81
87
  begin
@@ -99,28 +105,44 @@ module ChupaText
99
105
  end
100
106
 
101
107
  private
102
- def spawn_options(user_options)
108
+ def spawn_options(user_options, data)
103
109
  options = (user_options || {}).dup
104
- apply_default_spawn_limit(options, :cpu, :int)
105
- apply_default_spawn_limit(options, :as, :size)
110
+ if data
111
+ soft_limit_cpu = data.limit_cpu
112
+ soft_limit_as = data.limit_as
113
+ else
114
+ soft_limit_cpu = nil
115
+ soft_limit_as = nil
116
+ end
117
+ apply_default_spawn_limit(options, soft_limit_cpu, :cpu, :time)
118
+ apply_default_spawn_limit(options, soft_limit_as, :as, :size)
106
119
  options
107
120
  end
108
121
 
109
- def apply_default_spawn_limit(options, key, type)
122
+ def apply_default_spawn_limit(options, soft_value, key, type)
110
123
  # TODO: Workaround for Ruby 2.3.3p222
111
124
  case key
112
125
  when :cpu
113
126
  option_key = :rlimit_cpu
127
+ unit = "s"
114
128
  when :as
115
129
  option_key = :rlimit_as
130
+ unit = ""
116
131
  else
117
132
  option_key = :"rlimit_#{key}"
133
+ unit = ""
118
134
  end
119
135
  return if options[option_key]
120
136
 
121
137
  tag = "[limit][#{key}]"
122
138
  value = self.class.__send__("default_limit_#{key}")
123
139
  value = __send__("parse_#{type}", tag, value)
140
+ soft_value = __send__("parse_#{type}", tag, soft_value)
141
+ if value
142
+ value = soft_value if soft_value and soft_value < value
143
+ else
144
+ value = soft_value
145
+ end
124
146
  return if value.nil?
125
147
  rlimit_number = Process.const_get("RLIMIT_#{key.to_s.upcase}")
126
148
  soft_limit, hard_limit = Process.getrlimit(rlimit_number)
@@ -129,7 +151,7 @@ module ChupaText
129
151
  return nil
130
152
  end
131
153
  limit_info = "soft-limit:#{soft_limit}, hard-limit:#{hard_limit}"
132
- info("#{log_tag}#{tag}[set] <#{value}>(#{limit_info})")
154
+ info("#{log_tag}#{tag}[set] <#{value}#{unit}>(#{limit_info})")
133
155
 
134
156
  options[option_key] = value
135
157
  end
@@ -169,12 +191,21 @@ module ChupaText
169
191
  scale = 1
170
192
  case value
171
193
  when /GB?\z/i
194
+ scale = 1000 ** 3
195
+ number = $PREMATCH
196
+ when /GiB?\z/i
172
197
  scale = 1024 ** 3
173
198
  number = $PREMATCH
174
199
  when /MB?\z/i
200
+ scale = 1000 ** 2
201
+ number = $PREMATCH
202
+ when /MiB?\z/i
175
203
  scale = 1024 ** 2
176
204
  number = $PREMATCH
177
- when /KB?\z/i
205
+ when /[kK]B?\z/i
206
+ scale = 1000 ** 1
207
+ number = $PREMATCH
208
+ when /KiB?\z/i
178
209
  scale = 1024 ** 1
179
210
  number = $PREMATCH
180
211
  when /B?\z/i
@@ -227,9 +258,15 @@ module ChupaText
227
258
  warn("#{log_tag}#{tag}[invalid] <#{value}>(#{type})")
228
259
  end
229
260
 
230
- def wait_process(pid, timeout)
261
+ def wait_process(pid, timeout, soft_timeout)
231
262
  tag = "[timeout]"
232
263
  timeout = parse_time(tag, timeout || self.class.default_timeout)
264
+ soft_timeout = parse_time(tag, soft_timeout)
265
+ if timeout
266
+ timeout = soft_timeout if soft_timeout and soft_timeout < timeout
267
+ else
268
+ timeout = soft_timeout
269
+ end
233
270
  if timeout
234
271
  info("#{log_tag}#{tag}[use] <#{timeout}s>: <#{pid}>")
235
272
  status = wait_process_timeout(pid, timeout)
@@ -21,9 +21,8 @@ module ChupaText
21
21
  class Extractor
22
22
  include Loggable
23
23
 
24
- def initialize(max_body_size: nil)
24
+ def initialize
25
25
  @decomposers = []
26
- @max_body_size = max_body_size
27
26
  end
28
27
 
29
28
  # Sets the extractor up by the configuration. It adds decomposers
@@ -79,7 +78,7 @@ module ChupaText
79
78
  candidates << [score, decomposer]
80
79
  end
81
80
  return nil if candidates.empty?
82
- candidate = candidates.sort_by {|score, _| score}.first
81
+ candidate = candidates.sort_by {|score, _| -score}.first
83
82
  candidate[1]
84
83
  end
85
84
 
@@ -91,11 +90,11 @@ module ChupaText
91
90
  if decomposer.nil?
92
91
  if target.text_plain?
93
92
  debug {"#{log_tag}[extract][text-plain]"}
94
- yield(target.to_utf8_body_data(max_body_size: @max_body_size))
93
+ yield(target.to_utf8_body_data)
95
94
  else
96
95
  debug {"#{log_tag}[extract][decomposer] not found"}
97
96
  if target.text?
98
- yield(target.to_utf8_body_data(max_body_size: @max_body_size))
97
+ yield(target.to_utf8_body_data)
99
98
  end
100
99
  end
101
100
  else
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.2.1"
18
+ VERSION = "1.2.2"
19
19
  end
@@ -14,7 +14,7 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
- class TestDecomposersCSV< Test::Unit::TestCase
17
+ class TestDecomposersCSV < Test::Unit::TestCase
18
18
  include Helper
19
19
 
20
20
  def setup
@@ -0,0 +1,175 @@
1
+ # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ class TestDecomposersHTTPServer < Test::Unit::TestCase
18
+ include Helper
19
+
20
+ def setup
21
+ ChupaText::Decomposers::HTTPServer.default_url = nil
22
+ setup_server
23
+ setup_data
24
+ setup_decomposer
25
+ end
26
+
27
+ def setup_server
28
+ @port = 40080
29
+ @path = "/extraction.json"
30
+ @server_url = "http://127.0.0.1:#{@port}#{@path}"
31
+ logger = WEBrick::Log.new
32
+ logger.level = logger.class::ERROR
33
+ @server = WEBrick::HTTPServer.new(Port: @port,
34
+ Logger: logger,
35
+ AccessLog: [])
36
+ @response_status = 200
37
+ @server.mount_proc(@path) do |request, response|
38
+ sleep(@timeout * 2) if @timeout
39
+ response.status = @response_status
40
+ response.content_type = "application/json"
41
+ response.body = JSON.generate(@response)
42
+ end
43
+ @server_thread = Thread.new do
44
+ @server.start
45
+ end
46
+ end
47
+
48
+ def setup_data
49
+ @input_data = <<-CSV
50
+ Hello,World
51
+ Ruby,ChupaText
52
+ CSV
53
+ @input_mime_type = "text/csv"
54
+ @input_path = "/tmp/hello.csv"
55
+ @timeout = nil
56
+ @extracted_text = @input_data.gsub(/,/, "\t")
57
+ @extracted_path = @input_path.gsub(/\.csv\z/, ".txt")
58
+ @response = {
59
+ "mime-type" => @input_mime_type,
60
+ "uri" => "file://#{@input_path}",
61
+ "path" => @input_path,
62
+ "size" => @input_data.bytesize,
63
+ "texts" => [
64
+ {
65
+ "mime-type" => "text/plain",
66
+ "uri" => "file://#{@extracted_path}",
67
+ "path" => @extracted_path,
68
+ "size" => @extracted_text.bytesize,
69
+ "source-mime-types" => [
70
+ @input_mime_type,
71
+ ],
72
+ "body" => @extracted_text,
73
+ },
74
+ ],
75
+ }
76
+ end
77
+
78
+ def setup_decomposer
79
+ @decomposer = ChupaText::Decomposers::HTTPServer.new(:url => @server_url)
80
+ end
81
+
82
+ def teardown
83
+ teardown_server
84
+ end
85
+
86
+ def teardown_server
87
+ @server.shutdown
88
+ @server_thread.join
89
+ end
90
+
91
+ sub_test_case("decompose") do
92
+ def test_success
93
+ assert_equal([@extracted_text],
94
+ decompose.collect(&:body))
95
+ end
96
+
97
+ def test_not_ok
98
+ @response_status = 404
99
+ messages = capture_log do
100
+ assert_equal([], decompose.collect(&:body))
101
+ end
102
+ assert_equal([
103
+ [
104
+ :error,
105
+ "[decomposer][http-server] " +
106
+ "Failed to process data in server: " +
107
+ "#{@server_url}: " +
108
+ "#{@response_status}: Not Found",
109
+ ],
110
+ ],
111
+ messages)
112
+ end
113
+
114
+ def test_no_server
115
+ no_server_url = "http://127.0.0.1:2929/extraction.json"
116
+ @decomposer = ChupaText::Decomposers::HTTPServer.new(:url => no_server_url)
117
+ messages = capture_log do
118
+ assert_equal([], decompose.collect(&:body))
119
+ end
120
+ messages = messages.collect do |level, message|
121
+ [level, message.gsub(/Errno::.*\z/, "")]
122
+ end
123
+ assert_equal([
124
+ [
125
+ :error,
126
+ "[decomposer][http-server][connection] " +
127
+ "Failed to process data in server: " +
128
+ "#{no_server_url}: ",
129
+ ],
130
+ ],
131
+ messages)
132
+ end
133
+
134
+ def test_read_timeout
135
+ @timeout = 0.1
136
+ messages = capture_log do
137
+ assert_equal([], decompose.collect(&:body))
138
+ end
139
+ messages = messages.collect do |level, message|
140
+ [level, message.gsub(/Net::.*\z/, "")]
141
+ end
142
+ assert_equal([
143
+ [
144
+ :error,
145
+ "[decomposer][http-server][timeout] " +
146
+ "Failed to process data in server: " +
147
+ "#{@server_url}: ",
148
+ ],
149
+ ],
150
+ messages)
151
+ end
152
+
153
+ def test_default_url
154
+ ChupaText::Decomposers::HTTPServer.default_url = @server_url
155
+ @decomposer = ChupaText::Decomposers::HTTPServer.new({})
156
+ assert_equal([@extracted_text],
157
+ decompose.collect(&:body))
158
+ end
159
+
160
+ private
161
+ def decompose
162
+ data = ChupaText::Data.new
163
+ data.path = @input_path
164
+ data.mime_type = @input_mime_type
165
+ data.body = @input_data
166
+ data.timeout = @timeout
167
+
168
+ decomposed = []
169
+ @decomposer.decompose(data) do |decomposed_data|
170
+ decomposed << decomposed_data
171
+ end
172
+ decomposed
173
+ end
174
+ end
175
+ end
data/test/helper.rb CHANGED
@@ -15,8 +15,10 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  require "pathname"
18
+ require "rbconfig"
18
19
  require "tempfile"
19
20
  require "uri"
21
+ require "webrick"
20
22
 
21
23
  module Helper
22
24
  def fixture_path(*components)
@@ -39,4 +41,8 @@ module Helper
39
41
  [level, message]
40
42
  end
41
43
  end
44
+
45
+ def ruby
46
+ RbConfig.ruby
47
+ end
42
48
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2014 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2014-2019 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -14,12 +14,8 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
- require "rbconfig"
18
-
19
17
  class TestExternalCommand < Test::Unit::TestCase
20
- def ruby
21
- RbConfig.ruby
22
- end
18
+ include Helper
23
19
 
24
20
  def create_command(command)
25
21
  ChupaText::ExternalCommand.new(command)
@@ -76,4 +72,321 @@ class TestExternalCommand < Test::Unit::TestCase
76
72
  assert_false(exist?("nonexistent"))
77
73
  end
78
74
  end
75
+
76
+ class TestTimeout < self
77
+ def setup
78
+ @data = ChupaText::TextData.new("Hello")
79
+ timeout = ChupaText::ExternalCommand.default_timeout
80
+ begin
81
+ yield
82
+ ensure
83
+ ChupaText::ExternalCommand.default_timeout = timeout
84
+ end
85
+ end
86
+
87
+ def run_command(options={})
88
+ IO.pipe do |input, output|
89
+ command = create_command(ruby)
90
+ command.run("-e", "puts(Process.pid)",
91
+ options.merge(data: @data,
92
+ spawn_options: {out: output}))
93
+ input.gets.chomp
94
+ end
95
+ end
96
+
97
+ def test_option
98
+ pid = nil
99
+ messages = capture_log do
100
+ pid = run_command(timeout: "60s")
101
+ end
102
+ assert_equal([
103
+ [
104
+ :info,
105
+ "[external-command][timeout][use] <60.0s>: <#{pid}>",
106
+ ]
107
+ ],
108
+ messages)
109
+ end
110
+
111
+ def test_data_not_use
112
+ @data.timeout = "90s"
113
+ pid = nil
114
+ messages = capture_log do
115
+ pid = run_command(timeout: "60s")
116
+ end
117
+ assert_equal([
118
+ [
119
+ :info,
120
+ "[external-command][timeout][use] <60.0s>: <#{pid}>",
121
+ ]
122
+ ],
123
+ messages)
124
+ end
125
+
126
+ def test_data_use
127
+ @data.timeout = "30s"
128
+ pid = nil
129
+ messages = capture_log do
130
+ pid = run_command(timeout: "60s")
131
+ end
132
+ assert_equal([
133
+ [
134
+ :info,
135
+ "[external-command][timeout][use] <30.0s>: <#{pid}>",
136
+ ]
137
+ ],
138
+ messages)
139
+ end
140
+
141
+ def test_data_only
142
+ @data.timeout = "30s"
143
+ pid = nil
144
+ messages = capture_log do
145
+ pid = run_command
146
+ end
147
+ assert_equal([
148
+ [
149
+ :info,
150
+ "[external-command][timeout][use] <30.0s>: <#{pid}>",
151
+ ]
152
+ ],
153
+ messages)
154
+ end
155
+
156
+ def test_default
157
+ ChupaText::ExternalCommand.default_timeout = "60s"
158
+ pid = nil
159
+ messages = capture_log do
160
+ pid = run_command
161
+ end
162
+ assert_equal([
163
+ [
164
+ :info,
165
+ "[external-command][timeout][use] <60.0s>: <#{pid}>",
166
+ ]
167
+ ],
168
+ messages)
169
+ end
170
+
171
+ def test_default_data_not_use
172
+ ChupaText::ExternalCommand.default_timeout = "60s"
173
+ @data.timeout = "90s"
174
+ pid = nil
175
+ messages = capture_log do
176
+ pid = run_command
177
+ end
178
+ assert_equal([
179
+ [
180
+ :info,
181
+ "[external-command][timeout][use] <60.0s>: <#{pid}>",
182
+ ]
183
+ ],
184
+ messages)
185
+ end
186
+
187
+ def test_default_data_use
188
+ ChupaText::ExternalCommand.default_timeout = "60s"
189
+ @data.timeout = "30s"
190
+ pid = nil
191
+ messages = capture_log do
192
+ pid = run_command
193
+ end
194
+ assert_equal([
195
+ [
196
+ :info,
197
+ "[external-command][timeout][use] <30.0s>: <#{pid}>",
198
+ ]
199
+ ],
200
+ messages)
201
+ end
202
+
203
+ def test_default_data_only
204
+ @data.timeout = "30s"
205
+ pid = nil
206
+ messages = capture_log do
207
+ pid = run_command
208
+ end
209
+ assert_equal([
210
+ [
211
+ :info,
212
+ "[external-command][timeout][use] <30.0s>: <#{pid}>",
213
+ ]
214
+ ],
215
+ messages)
216
+ end
217
+ end
218
+
219
+ class TestLimitCPU < self
220
+ def setup
221
+ @data = ChupaText::TextData.new("Hello")
222
+ limit_cpu = ChupaText::ExternalCommand.default_limit_cpu
223
+ begin
224
+ yield
225
+ ensure
226
+ ChupaText::ExternalCommand.default_limit_cpu = limit_cpu
227
+ end
228
+ end
229
+
230
+ def run_command(spawn_options={})
231
+ command = create_command(ruby)
232
+ command.run("-e", "true",
233
+ data: @data,
234
+ spawn_options: spawn_options)
235
+ end
236
+
237
+ def test_default
238
+ ChupaText::ExternalCommand.default_limit_cpu = "60s"
239
+ messages = capture_log do
240
+ run_command
241
+ end
242
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_CPU)
243
+ assert_equal([
244
+ [
245
+ :info,
246
+ "[external-command][limit][cpu][set] <60.0s>" +
247
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
248
+ ]
249
+ ],
250
+ messages)
251
+ end
252
+
253
+ def test_default_data_not_use
254
+ ChupaText::ExternalCommand.default_limit_cpu = "60s"
255
+ @data.limit_cpu = "90s"
256
+ messages = capture_log do
257
+ run_command
258
+ end
259
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_CPU)
260
+ assert_equal([
261
+ [
262
+ :info,
263
+ "[external-command][limit][cpu][set] <60.0s>" +
264
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
265
+ ]
266
+ ],
267
+ messages)
268
+ end
269
+
270
+ def test_default_data_use
271
+ ChupaText::ExternalCommand.default_limit_cpu = "60s"
272
+ @data.limit_cpu = "30s"
273
+ messages = capture_log do
274
+ run_command
275
+ end
276
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_CPU)
277
+ assert_equal([
278
+ [
279
+ :info,
280
+ "[external-command][limit][cpu][set] <30.0s>" +
281
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
282
+ ]
283
+ ],
284
+ messages)
285
+ end
286
+
287
+ def test_default_data_only
288
+ @data.limit_cpu = "30s"
289
+ messages = capture_log do
290
+ run_command
291
+ end
292
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_CPU)
293
+ assert_equal([
294
+ [
295
+ :info,
296
+ "[external-command][limit][cpu][set] <30.0s>" +
297
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
298
+ ]
299
+ ],
300
+ messages)
301
+ end
302
+ end
303
+
304
+ class TestLimitAS < self
305
+ def setup
306
+ @data = ChupaText::TextData.new("Hello")
307
+ limit_as = ChupaText::ExternalCommand.default_limit_as
308
+ begin
309
+ yield
310
+ ensure
311
+ ChupaText::ExternalCommand.default_limit_as = limit_as
312
+ end
313
+ end
314
+
315
+ def run_command(spawn_options={})
316
+ command = create_command(ruby)
317
+ command.run("-e", "true",
318
+ data: @data,
319
+ spawn_options: spawn_options)
320
+ end
321
+
322
+ def test_default
323
+ ChupaText::ExternalCommand.default_limit_as = "100MiB"
324
+ messages = capture_log do
325
+ run_command
326
+ end
327
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_AS)
328
+ assert_equal([
329
+ [
330
+ :info,
331
+ "[external-command][limit][as][set] " +
332
+ "<#{100 * 1024 * 1024}>" +
333
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
334
+ ]
335
+ ],
336
+ messages)
337
+ end
338
+
339
+ def test_default_data_not_use
340
+ ChupaText::ExternalCommand.default_limit_as = "100MiB"
341
+ @data.limit_as = "150MiB"
342
+ messages = capture_log do
343
+ run_command
344
+ end
345
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_AS)
346
+ assert_equal([
347
+ [
348
+ :info,
349
+ "[external-command][limit][as][set] " +
350
+ "<#{100 * 1024 * 1024}>" +
351
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
352
+ ]
353
+ ],
354
+ messages)
355
+ end
356
+
357
+ def test_default_soft_use
358
+ ChupaText::ExternalCommand.default_limit_as = "100MiB"
359
+ @data.limit_as = "50MiB"
360
+ messages = capture_log do
361
+ run_command
362
+ end
363
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_AS)
364
+ assert_equal([
365
+ [
366
+ :info,
367
+ "[external-command][limit][as][set] " +
368
+ "<#{50 * 1024 * 1024}>" +
369
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
370
+ ]
371
+ ],
372
+ messages)
373
+ end
374
+
375
+ def test_default_soft_only
376
+ @data.limit_as = "50MiB"
377
+ messages = capture_log do
378
+ run_command
379
+ end
380
+ soft_limit, hard_limit = Process.getrlimit(Process::RLIMIT_AS)
381
+ assert_equal([
382
+ [
383
+ :info,
384
+ "[external-command][limit][as][set] " +
385
+ "<#{50 * 1024 * 1024}>" +
386
+ "(soft-limit:#{soft_limit}, hard-limit:#{hard_limit})",
387
+ ]
388
+ ],
389
+ messages)
390
+ end
391
+ end
79
392
  end
@@ -231,10 +231,11 @@ class TestExtractor < Test::Unit::TestCase
231
231
 
232
232
  sub_test_case("max body size") do
233
233
  def test_last_invalid
234
- @extractor = ChupaText::Extractor.new(max_body_size: 5)
234
+ @extractor = ChupaText::Extractor.new
235
235
  data = ChupaText::Data.new
236
236
  data.mime_type = "text/plain"
237
237
  data.body = "こん"
238
+ data.max_body_size = 5
238
239
  assert_equal(["こ"], extract(data))
239
240
  end
240
241
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-03 00:00:00.000000000 Z
11
+ date: 2019-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: archive-zip
@@ -159,6 +159,7 @@ files:
159
159
  - lib/chupa-text/decomposers.rb
160
160
  - lib/chupa-text/decomposers/csv.rb
161
161
  - lib/chupa-text/decomposers/gzip.rb
162
+ - lib/chupa-text/decomposers/http-server.rb
162
163
  - lib/chupa-text/decomposers/office-open-xml-document.rb
163
164
  - lib/chupa-text/decomposers/office-open-xml-presentation.rb
164
165
  - lib/chupa-text/decomposers/office-open-xml-workbook.rb
@@ -198,6 +199,7 @@ files:
198
199
  - test/command/test-chupa-text.rb
199
200
  - test/decomposers/test-csv.rb
200
201
  - test/decomposers/test-gzip.rb
202
+ - test/decomposers/test-http-server.rb
201
203
  - test/decomposers/test-office-open-xml-document.rb
202
204
  - test/decomposers/test-office-open-xml-presentation.rb
203
205
  - test/decomposers/test-office-open-xml-workbook.rb