chupa-text 1.2.9 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a96f66fb14ab05d80ef7d53494ee20d2aae298397d3b787591efd3ebfb30d5ab
4
- data.tar.gz: 7dcaeb98ff526ea31b52871b492211d624b607957146aa6986eb5b6d41eba2e4
3
+ metadata.gz: 97113be234e69a94d3710448d9468f4138fc7aa686275201c9f2441ed6fd217b
4
+ data.tar.gz: be11f97456e1511bb91fed287485a456c9a8db7fafc4357d8cf0784dfb444a0e
5
5
  SHA512:
6
- metadata.gz: 8c07878c8308483eebecc3e683c74dbc4ce00ee1841ac588c82ba173bd1644bde026703934e34f66ebaf8ca369091cbff3940025953992bf572d5c038718b746
7
- data.tar.gz: b8989bb876e8cafe5097bf5667a9e8fd796c7a8e9331254b4f5afef54774584a21ea0de20ea892745a9227275c21577f94d7399a76fb27963e3f0a09ca53e58f
6
+ metadata.gz: 4f4ebd3d6fdb67b2f70e73841325586e80b2a5ff94178d4f25066072890af613571939d2bfb0f12ff1465a4c4cacbcccc6a0a50bda0a5514ca247709f0b45d09
7
+ data.tar.gz: 3ae71d85db976f28f731df669fe2eddfc93d5cb94a176e01ef2298b5a4a5eaf5848fc33a1c10a70c286f00ca41773b00f3962bef0cf635399fa315b58bba4937
data/.yardopts CHANGED
@@ -1,5 +1,5 @@
1
1
  --output-dir doc/reference/en
2
2
  --markup markdown
3
- --markup-provider redcarpet
3
+ --markup-provider kramdown
4
4
  -
5
5
  doc/text/*
data/chupa-text.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  # -*- ruby -*-
2
2
  #
3
- # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
3
+ # Copyright (C) 2013-2020 Sutou Kouhei <kou@clear-code.com>
4
4
  #
5
5
  # This library is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the GNU Lesser General Public
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
32
32
  spec.name = "chupa-text"
33
33
  spec.version = ChupaText::VERSION
34
34
  spec.homepage = "http://ranguba.org/#about-chupa-text"
35
- spec.authors = ["Kouhei Sutou"]
35
+ spec.authors = ["Sutou Kouhei"]
36
36
  spec.email = ["kou@clear-code.com"]
37
37
  readme = File.read("README.md", :encoding => "UTF-8")
38
38
  entries = readme.split(/^\#\#\s(.*)$/)
@@ -52,11 +52,12 @@ Gem::Specification.new do |spec|
52
52
 
53
53
  spec.add_runtime_dependency("archive-zip", ">= 0.12.0")
54
54
  spec.add_runtime_dependency("csv", ">= 3.0.4")
55
+ spec.add_runtime_dependency("rexml")
55
56
 
56
57
  spec.add_development_dependency("bundler")
58
+ spec.add_development_dependency("kramdown")
57
59
  spec.add_development_dependency("nokogiri")
58
60
  spec.add_development_dependency("packnga")
59
61
  spec.add_development_dependency("rake")
60
- spec.add_development_dependency("redcarpet")
61
62
  spec.add_development_dependency("test-unit")
62
63
  end
data/doc/text/news.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # News
2
2
 
3
+ ## 1.3.3: 2022-02-01
4
+
5
+ ### Improvements
6
+
7
+ * `xlsx`: Added support for inline string.
8
+
9
+ ## 1.3.2: 2020-05-01
10
+
11
+ ### Improvements
12
+
13
+ * Added support for Ruby 2.8.
14
+
15
+ ## 1.3.1: 2019-06-18
16
+
17
+ ### Fixes
18
+
19
+ * `http-server`: Added support for `need_screenshot` parameter.
20
+
21
+ ## 1.3.0: 2019-06-14
22
+
23
+ ### Fixes
24
+
25
+ * Added support for timeout as string again.
26
+
3
27
  ## 1.2.9: 2019-06-13
4
28
 
5
29
  ### Improvements
@@ -144,6 +144,7 @@ module ChupaText
144
144
  ["limit_as",
145
145
  data.limit_as || ChupaText::ExternalCommand.default_limit_as],
146
146
  ["max_body_size", data.max_body_size],
147
+ ["need_screenshot", data.need_screenshot?],
147
148
  ].each do |key, value|
148
149
  next if value.nil?
149
150
  parameters << [key, StringIO.new(value.to_s)]
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2019 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2019-2022 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -119,7 +119,8 @@ module ChupaText
119
119
  def initialize(sheet)
120
120
  @sheet = sheet
121
121
  @cell_type = nil
122
- @in_v = false
122
+ @in_is = false # inline string
123
+ @in_v = false # value
123
124
  end
124
125
 
125
126
  def start_element(uri, local_name, qname, attributes)
@@ -129,7 +130,8 @@ module ChupaText
129
130
  @sheet << []
130
131
  when "c"
131
132
  @cell_type = parse_cell_type(attributes["t"])
132
- # when "is" # TODO
133
+ when "is"
134
+ @in_is = true
133
135
  when "v"
134
136
  @in_v = true
135
137
  end
@@ -140,6 +142,8 @@ module ChupaText
140
142
  case local_name
141
143
  when "c"
142
144
  @cell_type = nil
145
+ when "is"
146
+ @in_is = false
143
147
  when "v"
144
148
  @in_v = false
145
149
  end
@@ -174,8 +178,14 @@ module ChupaText
174
178
  end
175
179
  end
176
180
 
181
+ def have_text?
182
+ return true if @in_is
183
+ return true if @in_v
184
+ false
185
+ end
186
+
177
187
  def add_column(text)
178
- return unless @in_v
188
+ return unless have_text?
179
189
  case @cell_type
180
190
  when :shared_string
181
191
  @sheet.last << Integer(text, 10)
@@ -255,20 +255,21 @@ module ChupaText
255
255
  end
256
256
 
257
257
  def log_invalid_value(tag, value, type)
258
- warn("#{log_tag}#{tag}[invalid] <#{value}>(#{type})")
258
+ super("#{log_tag}#{tag}", value, type)
259
259
  end
260
260
 
261
261
  def wait_process(pid, timeout, soft_timeout)
262
262
  tag = "[timeout]"
263
- timeout = parse_time(tag, timeout || self.class.default_timeout)
264
- soft_timeout = parse_time(tag, soft_timeout)
263
+ timeout = TimeoutValue.new(tag, timeout || self.class.default_timeout).raw
264
+ soft_timeout = TimeoutValue.new(tag, soft_timeout).raw
265
265
  if timeout
266
266
  timeout = soft_timeout if soft_timeout and soft_timeout < timeout
267
267
  else
268
268
  timeout = soft_timeout
269
269
  end
270
270
  if timeout
271
- info("#{log_tag}#{tag}[use] <#{timeout}s>: <#{pid}>")
271
+ info("#{log_tag}#{tag}[use] " +
272
+ "<#{TimeoutValue.new(tag, timeout)}>: <#{pid}>")
272
273
  status = wait_process_timeout(pid, timeout)
273
274
  return status if status
274
275
  info("#{log_tag}#{tag}[terminate] <#{pid}>")
@@ -123,7 +123,7 @@ module ChupaText
123
123
  end
124
124
 
125
125
  def with_timeout(data, &block)
126
- timeout = data.timeout
126
+ timeout = TimeoutValue.new("#{log_tag}[timeout]", data.timeout).raw
127
127
  if timeout
128
128
  begin
129
129
  Timeout.timeout(timeout, &block)
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2019 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -45,5 +45,9 @@ module ChupaText
45
45
  def unknown(*arguments, &block)
46
46
  logger.unknown(*arguments, &block)
47
47
  end
48
+
49
+ def log_invalid_value(tag, value, type)
50
+ warn("#{tag}[invalid] <#{value}>(#{type})")
51
+ end
48
52
  end
49
53
  end
@@ -0,0 +1,76 @@
1
+ # Copyright (C) 2019 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ require "English"
18
+
19
+ module ChupaText
20
+ class TimeoutValue
21
+ include Comparable
22
+ include Loggable
23
+
24
+ attr_reader :raw
25
+ def initialize(tag, value)
26
+ value = parse(value) if value.is_a?(String)
27
+ @raw = value
28
+ end
29
+
30
+ def to_s
31
+ return "" if @raw.nil?
32
+
33
+ if @raw < 1
34
+ "%.2fms" % (@raw * 1000.0)
35
+ elsif @raw < 60
36
+ "%.2fs" % @raw
37
+ elsif @raw < (60 * 60)
38
+ "%.2fm" % (@raw / 60.0)
39
+ else
40
+ "%.2fh" % (@raw / 60.0 / 60.0)
41
+ end
42
+ end
43
+
44
+ private
45
+ def parse(value)
46
+ case value
47
+ when nil
48
+ nil
49
+ when Numeric
50
+ value
51
+ else
52
+ return nil if value.empty?
53
+ scale = 1
54
+ case value
55
+ when /h\z/i
56
+ scale = 60 * 60
57
+ number = $PREMATCH
58
+ when /m\z/i
59
+ scale = 60
60
+ number = $PREMATCH
61
+ when /s\z/i
62
+ number = $PREMATCH
63
+ else
64
+ number = value
65
+ end
66
+ begin
67
+ number = Float(number)
68
+ rescue ArgumentError
69
+ log_invalid_value(@tag, value, "time")
70
+ return nil
71
+ end
72
+ (number * scale).to_f
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013-2019 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2020 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.2.9"
18
+ VERSION = "1.3.3"
19
19
  end
data/lib/chupa-text.rb CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2019 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -26,6 +26,8 @@ require "chupa-text/logger"
26
26
  require "chupa-text/loggable"
27
27
  require "chupa-text/unzippable"
28
28
 
29
+ require "chupa-text/timeout-value"
30
+
29
31
  require "chupa-text/configuration"
30
32
  require "chupa-text/configuration-loader"
31
33
  require "chupa-text/mime-type"
data/test/helper.rb CHANGED
@@ -45,4 +45,8 @@ module Helper
45
45
  def ruby
46
46
  RbConfig.ruby
47
47
  end
48
+
49
+ def omit_on_windows(message)
50
+ omit("Omit on Windows: #{message}") if Gem.win_platform?
51
+ end
48
52
  end
data/test/run-test.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  #
3
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
3
+ # Copyright (C) 2013-2020 Sutou Kouhei <kou@clear-code.com>
4
4
  #
5
5
  # This library is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the GNU Lesser General Public
@@ -24,6 +24,8 @@ require "test-unit"
24
24
 
25
25
  base_dir = Pathname(__FILE__).dirname.parent
26
26
  lib_dir = base_dir + "lib"
27
+ test_dir = base_dir + "test"
28
+
27
29
  $LOAD_PATH.unshift(lib_dir.to_s)
28
30
 
29
31
  require "chupa-text"
@@ -32,4 +34,4 @@ ChupaText::Decomposers.load
32
34
 
33
35
  require_relative "helper"
34
36
 
35
- exit(Test::Unit::AutoRunner.run(true))
37
+ exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
@@ -102,7 +102,7 @@ class TestExternalCommand < Test::Unit::TestCase
102
102
  assert_equal([
103
103
  [
104
104
  :info,
105
- "[external-command][timeout][use] <60.0s>: <#{pid}>",
105
+ "[external-command][timeout][use] <1.00m>: <#{pid}>",
106
106
  ]
107
107
  ],
108
108
  messages)
@@ -117,7 +117,7 @@ class TestExternalCommand < Test::Unit::TestCase
117
117
  assert_equal([
118
118
  [
119
119
  :info,
120
- "[external-command][timeout][use] <60.0s>: <#{pid}>",
120
+ "[external-command][timeout][use] <1.00m>: <#{pid}>",
121
121
  ]
122
122
  ],
123
123
  messages)
@@ -132,7 +132,7 @@ class TestExternalCommand < Test::Unit::TestCase
132
132
  assert_equal([
133
133
  [
134
134
  :info,
135
- "[external-command][timeout][use] <30.0s>: <#{pid}>",
135
+ "[external-command][timeout][use] <30.00s>: <#{pid}>",
136
136
  ]
137
137
  ],
138
138
  messages)
@@ -147,7 +147,7 @@ class TestExternalCommand < Test::Unit::TestCase
147
147
  assert_equal([
148
148
  [
149
149
  :info,
150
- "[external-command][timeout][use] <30.0s>: <#{pid}>",
150
+ "[external-command][timeout][use] <30.00s>: <#{pid}>",
151
151
  ]
152
152
  ],
153
153
  messages)
@@ -162,7 +162,7 @@ class TestExternalCommand < Test::Unit::TestCase
162
162
  assert_equal([
163
163
  [
164
164
  :info,
165
- "[external-command][timeout][use] <60.0s>: <#{pid}>",
165
+ "[external-command][timeout][use] <1.00m>: <#{pid}>",
166
166
  ]
167
167
  ],
168
168
  messages)
@@ -178,7 +178,7 @@ class TestExternalCommand < Test::Unit::TestCase
178
178
  assert_equal([
179
179
  [
180
180
  :info,
181
- "[external-command][timeout][use] <60.0s>: <#{pid}>",
181
+ "[external-command][timeout][use] <1.00m>: <#{pid}>",
182
182
  ]
183
183
  ],
184
184
  messages)
@@ -194,7 +194,7 @@ class TestExternalCommand < Test::Unit::TestCase
194
194
  assert_equal([
195
195
  [
196
196
  :info,
197
- "[external-command][timeout][use] <30.0s>: <#{pid}>",
197
+ "[external-command][timeout][use] <30.00s>: <#{pid}>",
198
198
  ]
199
199
  ],
200
200
  messages)
@@ -209,7 +209,7 @@ class TestExternalCommand < Test::Unit::TestCase
209
209
  assert_equal([
210
210
  [
211
211
  :info,
212
- "[external-command][timeout][use] <30.0s>: <#{pid}>",
212
+ "[external-command][timeout][use] <30.00s>: <#{pid}>",
213
213
  ]
214
214
  ],
215
215
  messages)
@@ -218,6 +218,7 @@ class TestExternalCommand < Test::Unit::TestCase
218
218
 
219
219
  class TestLimitCPU < self
220
220
  def setup
221
+ omit_on_windows("RLIMIT_CPU doesn't exist")
221
222
  @data = ChupaText::TextData.new("Hello")
222
223
  limit_cpu = ChupaText::ExternalCommand.default_limit_cpu
223
224
  begin
@@ -303,6 +304,7 @@ class TestExternalCommand < Test::Unit::TestCase
303
304
 
304
305
  class TestLimitAS < self
305
306
  def setup
307
+ omit_on_windows("RLIMIT_AS doesn't exist")
306
308
  @data = ChupaText::TextData.new("Hello")
307
309
  limit_as = ChupaText::ExternalCommand.default_limit_as
308
310
  begin
@@ -76,7 +76,7 @@ class TestExtractor < Test::Unit::TestCase
76
76
  extracted = ChupaText::Data.new
77
77
  extracted.mime_type = "text/plain"
78
78
  extracted.body = data.body.gsub(/<.+?>/, "")
79
- sleep(data.timeout * 2) if data.timeout
79
+ sleep(data.timeout * 10) if data.timeout
80
80
  yield(extracted)
81
81
  end
82
82
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.9
4
+ version: 1.3.3
5
5
  platform: ruby
6
6
  authors:
7
- - Kouhei Sutou
7
+ - Sutou Kouhei
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-13 00:00:00.000000000 Z
11
+ date: 2022-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: archive-zip
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.0.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: rexml
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: bundler
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -53,7 +67,7 @@ dependencies:
53
67
  - !ruby/object:Gem::Version
54
68
  version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
- name: nokogiri
70
+ name: kramdown
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - ">="
@@ -67,7 +81,7 @@ dependencies:
67
81
  - !ruby/object:Gem::Version
68
82
  version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
- name: packnga
84
+ name: nokogiri
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
87
  - - ">="
@@ -81,7 +95,7 @@ dependencies:
81
95
  - !ruby/object:Gem::Version
82
96
  version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
- name: rake
98
+ name: packnga
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - ">="
@@ -95,7 +109,7 @@ dependencies:
95
109
  - !ruby/object:Gem::Version
96
110
  version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
- name: redcarpet
112
+ name: rake
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - ">="
@@ -126,8 +140,8 @@ description: ''
126
140
  email:
127
141
  - kou@clear-code.com
128
142
  executables:
129
- - chupa-text-generate-decomposer
130
143
  - chupa-text
144
+ - chupa-text-generate-decomposer
131
145
  extensions: []
132
146
  extra_rdoc_files: []
133
147
  files:
@@ -191,6 +205,7 @@ files:
191
205
  - lib/chupa-text/screenshot.rb
192
206
  - lib/chupa-text/size-parser.rb
193
207
  - lib/chupa-text/text-data.rb
208
+ - lib/chupa-text/timeout-value.rb
194
209
  - lib/chupa-text/unzippable.rb
195
210
  - lib/chupa-text/utf8-converter.rb
196
211
  - lib/chupa-text/version.rb
@@ -289,8 +304,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
289
304
  - !ruby/object:Gem::Version
290
305
  version: '0'
291
306
  requirements: []
292
- rubyforge_project:
293
- rubygems_version: 2.7.6.2
307
+ rubygems_version: 3.4.0.dev
294
308
  signing_key:
295
309
  specification_version: 4
296
310
  summary: ChupaText is an extensible text extractor. You can plug your custom text