chupa-text 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/news.md +11 -0
- data/lib/chupa-text/command/chupa-text.rb +8 -6
- data/lib/chupa-text/formatters.rb +1 -0
- data/lib/chupa-text/formatters/mime.rb +63 -0
- data/lib/chupa-text/version.rb +1 -1
- data/test/decomposers/test-tar.rb +2 -2
- data/test/decomposers/test-zip.rb +6 -6
- data/test/formatters/test-mime.rb +85 -0
- data/test/helper.rb +4 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dfad2e42fd742ce0a396ccb6240d87e2e2df335f
|
4
|
+
data.tar.gz: 3200375114c918a04f41e89fb2bbd60cde223628
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 265d9f9a164b04ebf0cbe67f8e022faed0c99f9af594449affe21e9387f12229c2a31691aa2e60df2a12fa17e3fca027555cbd9ebfec6bec03fd6e37a74d19ae
|
7
|
+
data.tar.gz: fe8031b39a1654ae7c0f54a06cd588263bfba9f988b124c823438213742a6c9f2e23eaefc705c3fac55f1e1fb84d61151b5c9c39cd5699ff78f65b1c4ed9dd22
|
data/doc/text/news.md
CHANGED
@@ -26,16 +26,16 @@ module ChupaText
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
-
AVAILABLE_FORMATS = [:json, :text]
|
29
|
+
AVAILABLE_FORMATS = [:json, :text, :mime]
|
30
30
|
|
31
31
|
SIZE = /\A\d+x\d+\z/o
|
32
32
|
OptionParser.accept(SIZE, SIZE) do |value|
|
33
33
|
if value
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
34
|
+
begin
|
35
|
+
value.split("x").collect {|number| Integer(number)}
|
36
|
+
rescue ArgumentError
|
37
|
+
raise OptionParser::InvalidArgument, value
|
38
|
+
end
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
@@ -219,6 +219,8 @@ module ChupaText
|
|
219
219
|
Formatters::JSON.new($stdout)
|
220
220
|
when :text
|
221
221
|
Formatters::Text.new($stdout)
|
222
|
+
when :mime
|
223
|
+
Formatters::MIME.new($stdout)
|
222
224
|
end
|
223
225
|
end
|
224
226
|
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Copyright (C) 2017 Kenji Okimoto <okimoto@clear-code.com>
|
2
|
+
# Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
#
|
4
|
+
# This library is free software; you can redistribute it and/or
|
5
|
+
# modify it under the terms of the GNU Lesser General Public
|
6
|
+
# License as published by the Free Software Foundation; either
|
7
|
+
# version 2.1 of the License, or (at your option) any later version.
|
8
|
+
#
|
9
|
+
# This library is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
12
|
+
# Lesser General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU Lesser General Public
|
15
|
+
# License along with this library; if not, write to the Free Software
|
16
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
17
|
+
|
18
|
+
require "digest/sha1"
|
19
|
+
|
20
|
+
require "chupa-text/formatters/hash"
|
21
|
+
|
22
|
+
module ChupaText
|
23
|
+
module Formatters
|
24
|
+
class MIME < Hash
|
25
|
+
def initialize(output)
|
26
|
+
super()
|
27
|
+
@output = output
|
28
|
+
end
|
29
|
+
|
30
|
+
def format_finish(data)
|
31
|
+
formatted = super
|
32
|
+
|
33
|
+
@output << "MIME-Version: 1.0\r\n"
|
34
|
+
format_hash(formatted, ["texts"])
|
35
|
+
texts = formatted["texts"]
|
36
|
+
boundary = Digest::SHA1.hexdigest(data.uri.to_s)
|
37
|
+
@output << "Content-Type: multipart/mixed; boundary=#{boundary}\r\n"
|
38
|
+
texts.each do |text|
|
39
|
+
@output << "\r\n--#{boundary}\r\n"
|
40
|
+
format_text(text)
|
41
|
+
end
|
42
|
+
@output << "\r\n--#{boundary}--\r\n"
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def format_hash(hash, ignore_keys)
|
47
|
+
hash.each do |key, value|
|
48
|
+
next if ignore_keys.include?(key)
|
49
|
+
@output << "#{key}: #{value}\r\n"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def format_text(hash)
|
54
|
+
format_hash(hash, ["body"])
|
55
|
+
body = hash["body"]
|
56
|
+
if body
|
57
|
+
@output << "\r\n"
|
58
|
+
@output << body
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/chupa-text/version.rb
CHANGED
@@ -46,7 +46,7 @@ class TestDecomposersTar < Test::Unit::TestCase
|
|
46
46
|
data = ChupaText::InputData.new(data_path)
|
47
47
|
assert_equal([
|
48
48
|
{
|
49
|
-
:uri => "
|
49
|
+
:uri => file_uri("#{base_path}/top-level.txt").to_s,
|
50
50
|
:body => "top level\n",
|
51
51
|
:source => data.uri.to_s,
|
52
52
|
},
|
@@ -62,7 +62,7 @@ class TestDecomposersTar < Test::Unit::TestCase
|
|
62
62
|
data = ChupaText::InputData.new(data_path)
|
63
63
|
assert_equal([
|
64
64
|
{
|
65
|
-
:uri => "
|
65
|
+
:uri => file_uri("#{base_path}/directory/hello.txt").to_s,
|
66
66
|
:body => "Hello in directory\n",
|
67
67
|
:source => data.uri.to_s,
|
68
68
|
},
|
@@ -45,19 +45,19 @@ class TestDecomposersZip < Test::Unit::TestCase
|
|
45
45
|
base_path = data_path.sub_ext("")
|
46
46
|
assert_equal([
|
47
47
|
{
|
48
|
-
:uri => "
|
48
|
+
:uri => file_uri("#{base_path}/hello.txt").to_s,
|
49
49
|
:body => "Hello!\n",
|
50
|
-
:source =>
|
50
|
+
:source => file_uri(data_path).to_s,
|
51
51
|
},
|
52
52
|
{
|
53
|
-
:uri => "
|
53
|
+
:uri => file_uri("#{base_path}/hello.csv").to_s,
|
54
54
|
:body => "Hello,World\n",
|
55
|
-
:source =>
|
55
|
+
:source => file_uri(data_path).to_s,
|
56
56
|
},
|
57
57
|
{
|
58
|
-
:uri => "
|
58
|
+
:uri => file_uri("#{base_path}/hello/world.txt").to_s,
|
59
59
|
:body => "World!\n",
|
60
|
-
:source =>
|
60
|
+
:source => file_uri(data_path).to_s,
|
61
61
|
},
|
62
62
|
],
|
63
63
|
decompose(data_path))
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
class TestMIMEFormatter < Test::Unit::TestCase
|
18
|
+
def setup
|
19
|
+
@output = StringIO.new
|
20
|
+
@formatter = ChupaText::Formatters::MIME.new(@output)
|
21
|
+
end
|
22
|
+
|
23
|
+
def format(data, extracted_data)
|
24
|
+
@formatter.format_start(data)
|
25
|
+
extracted_data.each do |extracted_datum|
|
26
|
+
@formatter.format_extracted(extracted_datum)
|
27
|
+
end
|
28
|
+
@formatter.format_finish(data)
|
29
|
+
@output.string
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_text
|
33
|
+
data = ChupaText::TextData.new("Hello")
|
34
|
+
data.uri = URI.parse("file:///tmp/hello.txt")
|
35
|
+
assert_equal(<<-MIME.gsub(/\n/, "\r\n"), format(data, [data]))
|
36
|
+
MIME-Version: 1.0
|
37
|
+
mime-type: text/plain
|
38
|
+
uri: file:///tmp/hello.txt
|
39
|
+
path: /tmp/hello.txt
|
40
|
+
size: 5
|
41
|
+
Content-Type: multipart/mixed; boundary=a21ff2fc51d8d8c8af3e7ccb974e34b0368e2891
|
42
|
+
|
43
|
+
--a21ff2fc51d8d8c8af3e7ccb974e34b0368e2891
|
44
|
+
mime-type: text/plain
|
45
|
+
uri: file:///tmp/hello.txt
|
46
|
+
path: /tmp/hello.txt
|
47
|
+
size: 5
|
48
|
+
|
49
|
+
Hello
|
50
|
+
--a21ff2fc51d8d8c8af3e7ccb974e34b0368e2891--
|
51
|
+
MIME
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_texts
|
55
|
+
data = ChupaText::Data.new
|
56
|
+
data.uri = URI.parse("file:///tmp/hello-world.zip")
|
57
|
+
data1 = ChupaText::TextData.new("Hello")
|
58
|
+
data1.uri = URI.parse("file:///tmp/hello.txt")
|
59
|
+
data2 = ChupaText::TextData.new("World")
|
60
|
+
data2.uri = URI.parse("file:///tmp/world.txt")
|
61
|
+
assert_equal(<<-MIME.gsub(/\n/, "\r\n"), format(data, [data1, data2]))
|
62
|
+
MIME-Version: 1.0
|
63
|
+
mime-type: application/zip
|
64
|
+
uri: file:///tmp/hello-world.zip
|
65
|
+
path: /tmp/hello-world.zip
|
66
|
+
Content-Type: multipart/mixed; boundary=e53a82b45aee7c6a07ea51dcf930118dedf7da4d
|
67
|
+
|
68
|
+
--e53a82b45aee7c6a07ea51dcf930118dedf7da4d
|
69
|
+
mime-type: text/plain
|
70
|
+
uri: file:///tmp/hello.txt
|
71
|
+
path: /tmp/hello.txt
|
72
|
+
size: 5
|
73
|
+
|
74
|
+
Hello
|
75
|
+
--e53a82b45aee7c6a07ea51dcf930118dedf7da4d
|
76
|
+
mime-type: text/plain
|
77
|
+
uri: file:///tmp/world.txt
|
78
|
+
path: /tmp/world.txt
|
79
|
+
size: 5
|
80
|
+
|
81
|
+
World
|
82
|
+
--e53a82b45aee7c6a07ea51dcf930118dedf7da4d--
|
83
|
+
MIME
|
84
|
+
end
|
85
|
+
end
|
data/test/helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: archive-zip
|
@@ -141,6 +141,7 @@ files:
|
|
141
141
|
- lib/chupa-text/formatters.rb
|
142
142
|
- lib/chupa-text/formatters/hash.rb
|
143
143
|
- lib/chupa-text/formatters/json.rb
|
144
|
+
- lib/chupa-text/formatters/mime.rb
|
144
145
|
- lib/chupa-text/formatters/text.rb
|
145
146
|
- lib/chupa-text/input-data.rb
|
146
147
|
- lib/chupa-text/loggable.rb
|
@@ -171,6 +172,7 @@ files:
|
|
171
172
|
- test/fixture/tar/top-level.tar
|
172
173
|
- test/fixture/zip/hello.zip
|
173
174
|
- test/fixture/zip/password.zip
|
175
|
+
- test/formatters/test-mime.rb
|
174
176
|
- test/helper.rb
|
175
177
|
- test/run-test.rb
|
176
178
|
- test/test-attributes.rb
|
@@ -207,7 +209,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
207
209
|
version: '0'
|
208
210
|
requirements: []
|
209
211
|
rubyforge_project:
|
210
|
-
rubygems_version: 2.5.2
|
212
|
+
rubygems_version: 2.5.2.1
|
211
213
|
signing_key:
|
212
214
|
specification_version: 4
|
213
215
|
summary: ChupaText is an extensible text extractor. You can plug your custom text
|