chupa-text 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3af110814417d579cde496c50b05d6d49964dd97
4
- data.tar.gz: 2609179c9e817e33a82df59c53834236dc63da3d
3
+ metadata.gz: 9a50556c287fd5148a3032b4fd8518679237e7be
4
+ data.tar.gz: 0d58a4948b3df081449e67211e23168b539be83b
5
5
  SHA512:
6
- metadata.gz: 6ae9ed51763e954b6b99973dfb2c020e046ccfb57f6670bfe96e935319055a6f898e71f0ed30ba25f3d3c8a4180ea6dbdfcb80337829659fdfaeb6a08841d873
7
- data.tar.gz: a114dd75066faaf9501cd1fd4891df6a8ed46adf99711721a4d4d4cfaf4b1a63bb852aba578d7c46f5c63535bd8d5a19508b65c44fa953087ff8ac301cc85535
6
+ metadata.gz: 298a7416122a757fbb6018c73327f768adaa6b18402f7725e5c9dde12b320d00a7015c04a5eb90580b4c6f822678acded64e132a8ba9d8d1b1de792804d13103
7
+ data.tar.gz: 3aaaa8b120e085434094163d3e582cb4bb33c5478a2916a268cd5b03cfff10b48cd05f6912acb26185e288650cc656a3f38260a48468fd988de74f0dff59bc1e
data/doc/text/news.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # News
2
2
 
3
+ ## 1.0.7: 2017-07-06
4
+
5
+ ### Improvements
6
+
7
+ * Supported screenshot.
8
+
9
+ * `chupa-text`: Added new options:
10
+
11
+ * `--need-screenshot`
12
+
13
+ * `--expected-screenshot-size=WIDTHxHEIGHT`
14
+
15
+ ### Fixes
16
+
17
+ * CSV decomposer: Fixed a infinite loop bug.
18
+
3
19
  ## 1.0.6: 2017-07-05
4
20
 
5
21
  ### Improvements
data/lib/chupa-text.rb CHANGED
@@ -41,6 +41,8 @@ require "chupa-text/formatters"
41
41
  require "chupa-text/file-content"
42
42
  require "chupa-text/virtual-content"
43
43
 
44
+ require "chupa-text/screenshot"
45
+
44
46
  require "chupa-text/attributes"
45
47
  require "chupa-text/data"
46
48
  require "chupa-text/input-data"
@@ -28,11 +28,24 @@ module ChupaText
28
28
 
29
29
  AVAILABLE_FORMATS = [:json, :text]
30
30
 
31
+ SIZE = /\A\d+x\d+\z/o
32
+ OptionParser.accept(SIZE, SIZE) do |value|
33
+ if value
34
+ begin
35
+ value.split("x").collect {|number| Integer(number)}
36
+ rescue ArgumentError
37
+ raise OptionParser::InvalidArgument, value
38
+ end
39
+ end
40
+ end
41
+
31
42
  def initialize
32
43
  @input = nil
33
- @configuration = Configuration.default
44
+ @configuration = Configuration.load_default
34
45
  @enable_gems = true
35
46
  @format = :json
47
+ @need_screenshot = true
48
+ @expected_screenshot_size = [200, 200]
36
49
  end
37
50
 
38
51
  def run(*arguments)
@@ -92,12 +105,25 @@ module ChupaText
92
105
  "Appends PATH to decomposer load path.") do |path|
93
106
  $LOAD_PATH << path
94
107
  end
108
+
109
+ parser.separator("")
110
+ parser.separator("Output related options")
95
111
  parser.on("--format=FORMAT", AVAILABLE_FORMATS,
96
112
  "Output FORMAT.",
97
113
  "[#{AVAILABLE_FORMATS.join(', ')}]",
98
- "(default: json)") do |format|
114
+ "(default: #{@format})") do |format|
99
115
  @format = format
100
116
  end
117
+ parser.on("--[no-]need-screenshot",
118
+ "Generate screenshot if available.",
119
+ "(default: #{@need_screenshot})") do |boolean|
120
+ @need_screenshot = boolean
121
+ end
122
+ parser.on("--expected-screenshot-size=WIDTHxHEIGHT", SIZE,
123
+ "Expected screenshot size.",
124
+ "(default: #{@expected_screenshot_size.join("x")})") do |size|
125
+ @expected_screenshot_size = size
126
+ end
101
127
 
102
128
  parser.separator("")
103
129
  parser.separator("Log related options:")
@@ -152,7 +178,7 @@ module ChupaText
152
178
 
153
179
  def create_data
154
180
  if @input.nil?
155
- VirtualFileData.new(nil, $stdin)
181
+ data = VirtualFileData.new(nil, $stdin)
156
182
  else
157
183
  case @input
158
184
  when /\A[a-z]+:\/\//i
@@ -160,8 +186,11 @@ module ChupaText
160
186
  else
161
187
  input = Pathname(@input)
162
188
  end
163
- InputData.new(input)
189
+ data = InputData.new(input)
164
190
  end
191
+ data.need_screenshot = @need_screenshot
192
+ data.expected_screenshot_size = @expected_screenshot_size
193
+ data
165
194
  end
166
195
 
167
196
  def create_formatter
@@ -18,11 +18,10 @@ module ChupaText
18
18
  class Configuration
19
19
  class << self
20
20
  def default
21
- @default ||= create_default
21
+ @default ||= load_default
22
22
  end
23
23
 
24
- private
25
- def create_default
24
+ def load_default
26
25
  configuration = new
27
26
  loader = ConfigurationLoader.new(configuration)
28
27
  loader.load("chupa-text.conf")
@@ -52,6 +52,17 @@ module ChupaText
52
52
  # archive data in {#source}.
53
53
  attr_accessor :source
54
54
 
55
+ # @return [Screenshot, nil] The screenshot of the data. For example,
56
+ # the first page image for PDF file.text.
57
+ attr_accessor :screenshot
58
+
59
+ # @param [Bool] value `true` when screenshot is needed.
60
+ # @return [Bool] the specified value
61
+ attr_writer :need_screenshot
62
+
63
+ # @return [Array<Integer, Integer>] the expected screenshot size.
64
+ attr_accessor :expected_screenshot_size
65
+
55
66
  def initialize(options={})
56
67
  @uri = nil
57
68
  @body = nil
@@ -60,9 +71,15 @@ module ChupaText
60
71
  @mime_type = nil
61
72
  @attributes = Attributes.new
62
73
  @source = nil
74
+ @screenshot = nil
75
+ @need_screenshot = true
76
+ @expected_screenshot_size = [200, 200]
63
77
  @options = options || {}
64
78
  source_data = @options[:source_data]
65
- merge!(source_data) if source_data
79
+ if source_data
80
+ merge!(source_data)
81
+ @source = source_data
82
+ end
66
83
  end
67
84
 
68
85
  def initialize_copy(object)
@@ -86,6 +103,8 @@ module ChupaText
86
103
  self["source-mime-types"] ||= []
87
104
  self["source-mime-types"].unshift(data.mime_type)
88
105
  end
106
+ self.need_screenshot = data.need_screenshot?
107
+ self.expected_screenshot_size = data.expected_screenshot_size
89
108
  end
90
109
 
91
110
  # @param [String, URI, nil] uri The URI for the data. If `uri` is
@@ -162,6 +181,11 @@ module ChupaText
162
181
  mime_type == "text/plain"
163
182
  end
164
183
 
184
+ # @return [Bool] `true` when screenshot is needed if available.
185
+ def need_screenshot?
186
+ @need_screenshot
187
+ end
188
+
165
189
  private
166
190
  def guess_mime_type
167
191
  guess_mime_type_from_uri or
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -14,6 +14,7 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
+ require "cgi/util"
17
18
  require "csv"
18
19
 
19
20
  module ChupaText
@@ -22,8 +23,14 @@ module ChupaText
22
23
  registry.register("csv", self)
23
24
 
24
25
  def target?(data)
25
- data.extension == "csv" or
26
- data.mime_type == "text/csv"
26
+ return true if data.mime_type == "text/csv"
27
+
28
+ if data.text_plain? and
29
+ (data["source-mime-types"] || []).include?("text/csv")
30
+ return false
31
+ end
32
+
33
+ data.extension == "csv"
27
34
  end
28
35
 
29
36
  def decompose(data)
@@ -35,10 +42,42 @@ module ChupaText
35
42
  text << "\n"
36
43
  end
37
44
  end
38
- text_data = TextData.new(text)
39
- text_data.uri = data.uri
45
+
46
+ text_data = TextData.new(text, :source_data => data)
47
+ if data.need_screenshot?
48
+ text_data.screenshot = create_screenshot(data, text)
49
+ end
50
+
40
51
  yield(text_data)
41
52
  end
53
+
54
+ private
55
+ def create_screenshot(data, text)
56
+ width, height = data.expected_screenshot_size
57
+ max_n_lines = 10
58
+ font_size = height / max_n_lines
59
+ target_text = ""
60
+ text.each_line.with_index do |line, i|
61
+ break if i == max_n_lines
62
+ target_text << line
63
+ end
64
+ mime_type = "image/svg+xml"
65
+ data = <<-SVG
66
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
67
+ <svg
68
+ xmlns="http://www.w3.org/2000/svg"
69
+ width="#{width}"
70
+ height="#{height}"
71
+ viewBox="0 0 #{width} #{height}">
72
+ <text
73
+ x="0"
74
+ y="#{font_size}"
75
+ style="font-size: #{font_size}px; white-space: pre-wrap;"
76
+ xml:space="preserve">#{CGI.escapeHTML(target_text)}</text>
77
+ </svg>
78
+ SVG
79
+ Screenshot.new(mime_type, data)
80
+ end
42
81
  end
43
82
  end
44
83
  end
@@ -42,8 +42,7 @@ module ChupaText
42
42
  when "tgz"
43
43
  uri = data.uri.to_s.gsub(/\.tgz\z/i, ".tar")
44
44
  end
45
- extracted = VirtualFileData.new(uri, reader)
46
- extracted.source = data
45
+ extracted = VirtualFileData.new(uri, reader, :source_data => data)
47
46
  yield(extracted)
48
47
  end
49
48
  end
@@ -32,8 +32,8 @@ module ChupaText
32
32
  reader.each do |entry|
33
33
  next unless entry.file?
34
34
  entry.extend(CopyStreamable)
35
- extracted = VirtualFileData.new(entry.full_name, entry)
36
- extracted.source = data
35
+ extracted = VirtualFileData.new(entry.full_name, entry,
36
+ :source_data => data)
37
37
  yield(extracted)
38
38
  end
39
39
  end
@@ -34,8 +34,7 @@ module ChupaText
34
34
  parser = REXML::Parsers::StreamParser.new(input, listener)
35
35
  parser.parse
36
36
  end
37
- text_data = TextData.new(text)
38
- text_data.uri = data.uri
37
+ text_data = TextData.new(text, :source_data => data)
39
38
  yield(text_data)
40
39
  end
41
40
 
@@ -28,6 +28,16 @@ module ChupaText
28
28
  text = {}
29
29
  format_headers(data, text)
30
30
  text["body"] = data.body
31
+ screenshot = data.screenshot
32
+ if screenshot
33
+ text["screenshot"] = {
34
+ "mime-type" => screenshot.mime_type,
35
+ "data" => screenshot.data,
36
+ }
37
+ if screenshot.encoding
38
+ text["screenshot"]["encoding"] = screenshot.encoding
39
+ end
40
+ end
31
41
  @texts << text
32
42
  end
33
43
 
@@ -0,0 +1,46 @@
1
+ # Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ module ChupaText
18
+ class Screenshot
19
+ # @return [String] The MIME type of the screenshot.
20
+ attr_reader :mime_type
21
+
22
+ # @return [String] The data of the screenshot.
23
+ attr_reader :data
24
+
25
+ # @return [String, nil] The encoding of the screenshot data.
26
+ # `nil` means that the data is raw data. It's used for SVG data
27
+ # because it's text data. `"base64"` means that the data is encoded
28
+ # by Base64. It's used for PNG data because it's binary data.
29
+ attr_reader :encoding
30
+
31
+ def initialize(mime_type, data, encoding=nil)
32
+ @mime_type = mime_type
33
+ @data = data
34
+ @encoding = encoding
35
+ end
36
+
37
+ def decoded_data
38
+ case @encoding
39
+ when "base64"
40
+ @data.unpack("m*")[0]
41
+ else
42
+ @data
43
+ end
44
+ end
45
+ end
46
+ end
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.0.6"
18
+ VERSION = "1.0.7"
19
19
  end
@@ -182,4 +182,52 @@ class TestCommandChupaText < Test::Unit::TestCase
182
182
  path.to_s))
183
183
  end
184
184
  end
185
+
186
+ sub_test_case("extract") do
187
+ def test_csv
188
+ fixture_name = "numbers.csv"
189
+ uri = fixture_uri(fixture_name)
190
+ path = fixture_path(fixture_name)
191
+ assert_equal([
192
+ true,
193
+ {
194
+ "uri" => uri.to_s,
195
+ "path" => path.to_s,
196
+ "mime-type" => "text/csv",
197
+ "size" => path.stat.size,
198
+ "texts" => [
199
+ {
200
+ "uri" => uri.to_s,
201
+ "path" => path.to_s,
202
+ "mime-type" => "text/plain",
203
+ "source-mime-types" => ["text/csv"],
204
+ "body" => "1 2 3\n4 5 6\n7 8 9\n",
205
+ "size" => 18,
206
+ "screenshot" => {
207
+ "mime-type" => "image/svg+xml",
208
+ "data" => <<-SVG
209
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
210
+ <svg
211
+ xmlns="http://www.w3.org/2000/svg"
212
+ width="200"
213
+ height="200"
214
+ viewBox="0 0 200 200">
215
+ <text
216
+ x="0"
217
+ y="20"
218
+ style="font-size: 20px; white-space: pre-wrap;"
219
+ xml:space="preserve">1 2 3
220
+ 4 5 6
221
+ 7 8 9
222
+ </text>
223
+ </svg>
224
+ SVG
225
+ },
226
+ },
227
+ ],
228
+ },
229
+ ],
230
+ run_command(path.to_s))
231
+ end
232
+ end
185
233
  end
@@ -0,0 +1,3 @@
1
+ 1,2,3
2
+ 4,5,6
3
+ 7,8,9
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.6
4
+ version: 1.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-05 00:00:00.000000000 Z
11
+ date: 2017-07-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -132,6 +132,7 @@ files:
132
132
  - lib/chupa-text/logger.rb
133
133
  - lib/chupa-text/mime-type-registry.rb
134
134
  - lib/chupa-text/mime-type.rb
135
+ - lib/chupa-text/screenshot.rb
135
136
  - lib/chupa-text/size-parser.rb
136
137
  - lib/chupa-text/text-data.rb
137
138
  - lib/chupa-text/version.rb
@@ -145,6 +146,7 @@ files:
145
146
  - test/fixture/command/chupa-text/hello.txt
146
147
  - test/fixture/command/chupa-text/hello.txt.gz
147
148
  - test/fixture/command/chupa-text/no-decomposer.conf
149
+ - test/fixture/command/chupa-text/numbers.csv
148
150
  - test/fixture/extractor/hello.txt
149
151
  - test/fixture/gzip/hello.tar.gz
150
152
  - test/fixture/gzip/hello.tgz