chupa-text 1.0.6 → 1.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3af110814417d579cde496c50b05d6d49964dd97
4
- data.tar.gz: 2609179c9e817e33a82df59c53834236dc63da3d
3
+ metadata.gz: 9a50556c287fd5148a3032b4fd8518679237e7be
4
+ data.tar.gz: 0d58a4948b3df081449e67211e23168b539be83b
5
5
  SHA512:
6
- metadata.gz: 6ae9ed51763e954b6b99973dfb2c020e046ccfb57f6670bfe96e935319055a6f898e71f0ed30ba25f3d3c8a4180ea6dbdfcb80337829659fdfaeb6a08841d873
7
- data.tar.gz: a114dd75066faaf9501cd1fd4891df6a8ed46adf99711721a4d4d4cfaf4b1a63bb852aba578d7c46f5c63535bd8d5a19508b65c44fa953087ff8ac301cc85535
6
+ metadata.gz: 298a7416122a757fbb6018c73327f768adaa6b18402f7725e5c9dde12b320d00a7015c04a5eb90580b4c6f822678acded64e132a8ba9d8d1b1de792804d13103
7
+ data.tar.gz: 3aaaa8b120e085434094163d3e582cb4bb33c5478a2916a268cd5b03cfff10b48cd05f6912acb26185e288650cc656a3f38260a48468fd988de74f0dff59bc1e
data/doc/text/news.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # News
2
2
 
3
+ ## 1.0.7: 2017-07-06
4
+
5
+ ### Improvements
6
+
7
+ * Supported screenshot.
8
+
9
+ * `chupa-text`: Added new options:
10
+
11
+ * `--need-screenshot`
12
+
13
+ * `--expected-screenshot-size=WIDTHxHEIGHT`
14
+
15
+ ### Fixes
16
+
17
+ * CSV decomposer: Fixed a infinite loop bug.
18
+
3
19
  ## 1.0.6: 2017-07-05
4
20
 
5
21
  ### Improvements
data/lib/chupa-text.rb CHANGED
@@ -41,6 +41,8 @@ require "chupa-text/formatters"
41
41
  require "chupa-text/file-content"
42
42
  require "chupa-text/virtual-content"
43
43
 
44
+ require "chupa-text/screenshot"
45
+
44
46
  require "chupa-text/attributes"
45
47
  require "chupa-text/data"
46
48
  require "chupa-text/input-data"
@@ -28,11 +28,24 @@ module ChupaText
28
28
 
29
29
  AVAILABLE_FORMATS = [:json, :text]
30
30
 
31
+ SIZE = /\A\d+x\d+\z/o
32
+ OptionParser.accept(SIZE, SIZE) do |value|
33
+ if value
34
+ begin
35
+ value.split("x").collect {|number| Integer(number)}
36
+ rescue ArgumentError
37
+ raise OptionParser::InvalidArgument, value
38
+ end
39
+ end
40
+ end
41
+
31
42
  def initialize
32
43
  @input = nil
33
- @configuration = Configuration.default
44
+ @configuration = Configuration.load_default
34
45
  @enable_gems = true
35
46
  @format = :json
47
+ @need_screenshot = true
48
+ @expected_screenshot_size = [200, 200]
36
49
  end
37
50
 
38
51
  def run(*arguments)
@@ -92,12 +105,25 @@ module ChupaText
92
105
  "Appends PATH to decomposer load path.") do |path|
93
106
  $LOAD_PATH << path
94
107
  end
108
+
109
+ parser.separator("")
110
+ parser.separator("Output related options")
95
111
  parser.on("--format=FORMAT", AVAILABLE_FORMATS,
96
112
  "Output FORMAT.",
97
113
  "[#{AVAILABLE_FORMATS.join(', ')}]",
98
- "(default: json)") do |format|
114
+ "(default: #{@format})") do |format|
99
115
  @format = format
100
116
  end
117
+ parser.on("--[no-]need-screenshot",
118
+ "Generate screenshot if available.",
119
+ "(default: #{@need_screenshot})") do |boolean|
120
+ @need_screenshot = boolean
121
+ end
122
+ parser.on("--expected-screenshot-size=WIDTHxHEIGHT", SIZE,
123
+ "Expected screenshot size.",
124
+ "(default: #{@expected_screenshot_size.join("x")})") do |size|
125
+ @expected_screenshot_size = size
126
+ end
101
127
 
102
128
  parser.separator("")
103
129
  parser.separator("Log related options:")
@@ -152,7 +178,7 @@ module ChupaText
152
178
 
153
179
  def create_data
154
180
  if @input.nil?
155
- VirtualFileData.new(nil, $stdin)
181
+ data = VirtualFileData.new(nil, $stdin)
156
182
  else
157
183
  case @input
158
184
  when /\A[a-z]+:\/\//i
@@ -160,8 +186,11 @@ module ChupaText
160
186
  else
161
187
  input = Pathname(@input)
162
188
  end
163
- InputData.new(input)
189
+ data = InputData.new(input)
164
190
  end
191
+ data.need_screenshot = @need_screenshot
192
+ data.expected_screenshot_size = @expected_screenshot_size
193
+ data
165
194
  end
166
195
 
167
196
  def create_formatter
@@ -18,11 +18,10 @@ module ChupaText
18
18
  class Configuration
19
19
  class << self
20
20
  def default
21
- @default ||= create_default
21
+ @default ||= load_default
22
22
  end
23
23
 
24
- private
25
- def create_default
24
+ def load_default
26
25
  configuration = new
27
26
  loader = ConfigurationLoader.new(configuration)
28
27
  loader.load("chupa-text.conf")
@@ -52,6 +52,17 @@ module ChupaText
52
52
  # archive data in {#source}.
53
53
  attr_accessor :source
54
54
 
55
+ # @return [Screenshot, nil] The screenshot of the data. For example,
56
+ # the first page image for PDF file.text.
57
+ attr_accessor :screenshot
58
+
59
+ # @param [Bool] value `true` when screenshot is needed.
60
+ # @return [Bool] the specified value
61
+ attr_writer :need_screenshot
62
+
63
+ # @return [Array<Integer, Integer>] the expected screenshot size.
64
+ attr_accessor :expected_screenshot_size
65
+
55
66
  def initialize(options={})
56
67
  @uri = nil
57
68
  @body = nil
@@ -60,9 +71,15 @@ module ChupaText
60
71
  @mime_type = nil
61
72
  @attributes = Attributes.new
62
73
  @source = nil
74
+ @screenshot = nil
75
+ @need_screenshot = true
76
+ @expected_screenshot_size = [200, 200]
63
77
  @options = options || {}
64
78
  source_data = @options[:source_data]
65
- merge!(source_data) if source_data
79
+ if source_data
80
+ merge!(source_data)
81
+ @source = source_data
82
+ end
66
83
  end
67
84
 
68
85
  def initialize_copy(object)
@@ -86,6 +103,8 @@ module ChupaText
86
103
  self["source-mime-types"] ||= []
87
104
  self["source-mime-types"].unshift(data.mime_type)
88
105
  end
106
+ self.need_screenshot = data.need_screenshot?
107
+ self.expected_screenshot_size = data.expected_screenshot_size
89
108
  end
90
109
 
91
110
  # @param [String, URI, nil] uri The URI for the data. If `uri` is
@@ -162,6 +181,11 @@ module ChupaText
162
181
  mime_type == "text/plain"
163
182
  end
164
183
 
184
+ # @return [Bool] `true` when screenshot is needed if available.
185
+ def need_screenshot?
186
+ @need_screenshot
187
+ end
188
+
165
189
  private
166
190
  def guess_mime_type
167
191
  guess_mime_type_from_uri or
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -14,6 +14,7 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
+ require "cgi/util"
17
18
  require "csv"
18
19
 
19
20
  module ChupaText
@@ -22,8 +23,14 @@ module ChupaText
22
23
  registry.register("csv", self)
23
24
 
24
25
  def target?(data)
25
- data.extension == "csv" or
26
- data.mime_type == "text/csv"
26
+ return true if data.mime_type == "text/csv"
27
+
28
+ if data.text_plain? and
29
+ (data["source-mime-types"] || []).include?("text/csv")
30
+ return false
31
+ end
32
+
33
+ data.extension == "csv"
27
34
  end
28
35
 
29
36
  def decompose(data)
@@ -35,10 +42,42 @@ module ChupaText
35
42
  text << "\n"
36
43
  end
37
44
  end
38
- text_data = TextData.new(text)
39
- text_data.uri = data.uri
45
+
46
+ text_data = TextData.new(text, :source_data => data)
47
+ if data.need_screenshot?
48
+ text_data.screenshot = create_screenshot(data, text)
49
+ end
50
+
40
51
  yield(text_data)
41
52
  end
53
+
54
+ private
55
+ def create_screenshot(data, text)
56
+ width, height = data.expected_screenshot_size
57
+ max_n_lines = 10
58
+ font_size = height / max_n_lines
59
+ target_text = ""
60
+ text.each_line.with_index do |line, i|
61
+ break if i == max_n_lines
62
+ target_text << line
63
+ end
64
+ mime_type = "image/svg+xml"
65
+ data = <<-SVG
66
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
67
+ <svg
68
+ xmlns="http://www.w3.org/2000/svg"
69
+ width="#{width}"
70
+ height="#{height}"
71
+ viewBox="0 0 #{width} #{height}">
72
+ <text
73
+ x="0"
74
+ y="#{font_size}"
75
+ style="font-size: #{font_size}px; white-space: pre-wrap;"
76
+ xml:space="preserve">#{CGI.escapeHTML(target_text)}</text>
77
+ </svg>
78
+ SVG
79
+ Screenshot.new(mime_type, data)
80
+ end
42
81
  end
43
82
  end
44
83
  end
@@ -42,8 +42,7 @@ module ChupaText
42
42
  when "tgz"
43
43
  uri = data.uri.to_s.gsub(/\.tgz\z/i, ".tar")
44
44
  end
45
- extracted = VirtualFileData.new(uri, reader)
46
- extracted.source = data
45
+ extracted = VirtualFileData.new(uri, reader, :source_data => data)
47
46
  yield(extracted)
48
47
  end
49
48
  end
@@ -32,8 +32,8 @@ module ChupaText
32
32
  reader.each do |entry|
33
33
  next unless entry.file?
34
34
  entry.extend(CopyStreamable)
35
- extracted = VirtualFileData.new(entry.full_name, entry)
36
- extracted.source = data
35
+ extracted = VirtualFileData.new(entry.full_name, entry,
36
+ :source_data => data)
37
37
  yield(extracted)
38
38
  end
39
39
  end
@@ -34,8 +34,7 @@ module ChupaText
34
34
  parser = REXML::Parsers::StreamParser.new(input, listener)
35
35
  parser.parse
36
36
  end
37
- text_data = TextData.new(text)
38
- text_data.uri = data.uri
37
+ text_data = TextData.new(text, :source_data => data)
39
38
  yield(text_data)
40
39
  end
41
40
 
@@ -28,6 +28,16 @@ module ChupaText
28
28
  text = {}
29
29
  format_headers(data, text)
30
30
  text["body"] = data.body
31
+ screenshot = data.screenshot
32
+ if screenshot
33
+ text["screenshot"] = {
34
+ "mime-type" => screenshot.mime_type,
35
+ "data" => screenshot.data,
36
+ }
37
+ if screenshot.encoding
38
+ text["screenshot"]["encoding"] = screenshot.encoding
39
+ end
40
+ end
31
41
  @texts << text
32
42
  end
33
43
 
@@ -0,0 +1,46 @@
1
+ # Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ module ChupaText
18
+ class Screenshot
19
+ # @return [String] The MIME type of the screenshot.
20
+ attr_reader :mime_type
21
+
22
+ # @return [String] The data of the screenshot.
23
+ attr_reader :data
24
+
25
+ # @return [String, nil] The encoding of the screenshot data.
26
+ # `nil` means that the data is raw data. It's used for SVG data
27
+ # because it's text data. `"base64"` means that the data is encoded
28
+ # by Base64. It's used for PNG data because it's binary data.
29
+ attr_reader :encoding
30
+
31
+ def initialize(mime_type, data, encoding=nil)
32
+ @mime_type = mime_type
33
+ @data = data
34
+ @encoding = encoding
35
+ end
36
+
37
+ def decoded_data
38
+ case @encoding
39
+ when "base64"
40
+ @data.unpack("m*")[0]
41
+ else
42
+ @data
43
+ end
44
+ end
45
+ end
46
+ end
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.0.6"
18
+ VERSION = "1.0.7"
19
19
  end
@@ -182,4 +182,52 @@ class TestCommandChupaText < Test::Unit::TestCase
182
182
  path.to_s))
183
183
  end
184
184
  end
185
+
186
+ sub_test_case("extract") do
187
+ def test_csv
188
+ fixture_name = "numbers.csv"
189
+ uri = fixture_uri(fixture_name)
190
+ path = fixture_path(fixture_name)
191
+ assert_equal([
192
+ true,
193
+ {
194
+ "uri" => uri.to_s,
195
+ "path" => path.to_s,
196
+ "mime-type" => "text/csv",
197
+ "size" => path.stat.size,
198
+ "texts" => [
199
+ {
200
+ "uri" => uri.to_s,
201
+ "path" => path.to_s,
202
+ "mime-type" => "text/plain",
203
+ "source-mime-types" => ["text/csv"],
204
+ "body" => "1 2 3\n4 5 6\n7 8 9\n",
205
+ "size" => 18,
206
+ "screenshot" => {
207
+ "mime-type" => "image/svg+xml",
208
+ "data" => <<-SVG
209
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
210
+ <svg
211
+ xmlns="http://www.w3.org/2000/svg"
212
+ width="200"
213
+ height="200"
214
+ viewBox="0 0 200 200">
215
+ <text
216
+ x="0"
217
+ y="20"
218
+ style="font-size: 20px; white-space: pre-wrap;"
219
+ xml:space="preserve">1 2 3
220
+ 4 5 6
221
+ 7 8 9
222
+ </text>
223
+ </svg>
224
+ SVG
225
+ },
226
+ },
227
+ ],
228
+ },
229
+ ],
230
+ run_command(path.to_s))
231
+ end
232
+ end
185
233
  end
@@ -0,0 +1,3 @@
1
+ 1,2,3
2
+ 4,5,6
3
+ 7,8,9
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.6
4
+ version: 1.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-05 00:00:00.000000000 Z
11
+ date: 2017-07-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -132,6 +132,7 @@ files:
132
132
  - lib/chupa-text/logger.rb
133
133
  - lib/chupa-text/mime-type-registry.rb
134
134
  - lib/chupa-text/mime-type.rb
135
+ - lib/chupa-text/screenshot.rb
135
136
  - lib/chupa-text/size-parser.rb
136
137
  - lib/chupa-text/text-data.rb
137
138
  - lib/chupa-text/version.rb
@@ -145,6 +146,7 @@ files:
145
146
  - test/fixture/command/chupa-text/hello.txt
146
147
  - test/fixture/command/chupa-text/hello.txt.gz
147
148
  - test/fixture/command/chupa-text/no-decomposer.conf
149
+ - test/fixture/command/chupa-text/numbers.csv
148
150
  - test/fixture/extractor/hello.txt
149
151
  - test/fixture/gzip/hello.tar.gz
150
152
  - test/fixture/gzip/hello.tgz