chupa-text 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/news.md +11 -0
- data/lib/chupa-text/command/chupa-text.rb +8 -6
- data/lib/chupa-text/formatters.rb +1 -0
- data/lib/chupa-text/formatters/mime.rb +63 -0
- data/lib/chupa-text/version.rb +1 -1
- data/test/decomposers/test-tar.rb +2 -2
- data/test/decomposers/test-zip.rb +6 -6
- data/test/formatters/test-mime.rb +85 -0
- data/test/helper.rb +4 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dfad2e42fd742ce0a396ccb6240d87e2e2df335f
|
4
|
+
data.tar.gz: 3200375114c918a04f41e89fb2bbd60cde223628
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 265d9f9a164b04ebf0cbe67f8e022faed0c99f9af594449affe21e9387f12229c2a31691aa2e60df2a12fa17e3fca027555cbd9ebfec6bec03fd6e37a74d19ae
|
7
|
+
data.tar.gz: fe8031b39a1654ae7c0f54a06cd588263bfba9f988b124c823438213742a6c9f2e23eaefc705c3fac55f1e1fb84d61151b5c9c39cd5699ff78f65b1c4ed9dd22
|
data/doc/text/news.md
CHANGED
@@ -26,16 +26,16 @@ module ChupaText
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
-
AVAILABLE_FORMATS = [:json, :text]
|
29
|
+
AVAILABLE_FORMATS = [:json, :text, :mime]
|
30
30
|
|
31
31
|
SIZE = /\A\d+x\d+\z/o
|
32
32
|
OptionParser.accept(SIZE, SIZE) do |value|
|
33
33
|
if value
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
34
|
+
begin
|
35
|
+
value.split("x").collect {|number| Integer(number)}
|
36
|
+
rescue ArgumentError
|
37
|
+
raise OptionParser::InvalidArgument, value
|
38
|
+
end
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
@@ -219,6 +219,8 @@ module ChupaText
|
|
219
219
|
Formatters::JSON.new($stdout)
|
220
220
|
when :text
|
221
221
|
Formatters::Text.new($stdout)
|
222
|
+
when :mime
|
223
|
+
Formatters::MIME.new($stdout)
|
222
224
|
end
|
223
225
|
end
|
224
226
|
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Copyright (C) 2017 Kenji Okimoto <okimoto@clear-code.com>
|
2
|
+
# Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
|
3
|
+
#
|
4
|
+
# This library is free software; you can redistribute it and/or
|
5
|
+
# modify it under the terms of the GNU Lesser General Public
|
6
|
+
# License as published by the Free Software Foundation; either
|
7
|
+
# version 2.1 of the License, or (at your option) any later version.
|
8
|
+
#
|
9
|
+
# This library is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
12
|
+
# Lesser General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU Lesser General Public
|
15
|
+
# License along with this library; if not, write to the Free Software
|
16
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
17
|
+
|
18
|
+
require "digest/sha1"
|
19
|
+
|
20
|
+
require "chupa-text/formatters/hash"
|
21
|
+
|
22
|
+
module ChupaText
|
23
|
+
module Formatters
|
24
|
+
class MIME < Hash
|
25
|
+
def initialize(output)
|
26
|
+
super()
|
27
|
+
@output = output
|
28
|
+
end
|
29
|
+
|
30
|
+
def format_finish(data)
|
31
|
+
formatted = super
|
32
|
+
|
33
|
+
@output << "MIME-Version: 1.0\r\n"
|
34
|
+
format_hash(formatted, ["texts"])
|
35
|
+
texts = formatted["texts"]
|
36
|
+
boundary = Digest::SHA1.hexdigest(data.uri.to_s)
|
37
|
+
@output << "Content-Type: multipart/mixed; boundary=#{boundary}\r\n"
|
38
|
+
texts.each do |text|
|
39
|
+
@output << "\r\n--#{boundary}\r\n"
|
40
|
+
format_text(text)
|
41
|
+
end
|
42
|
+
@output << "\r\n--#{boundary}--\r\n"
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def format_hash(hash, ignore_keys)
|
47
|
+
hash.each do |key, value|
|
48
|
+
next if ignore_keys.include?(key)
|
49
|
+
@output << "#{key}: #{value}\r\n"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def format_text(hash)
|
54
|
+
format_hash(hash, ["body"])
|
55
|
+
body = hash["body"]
|
56
|
+
if body
|
57
|
+
@output << "\r\n"
|
58
|
+
@output << body
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/chupa-text/version.rb
CHANGED
@@ -46,7 +46,7 @@ class TestDecomposersTar < Test::Unit::TestCase
|
|
46
46
|
data = ChupaText::InputData.new(data_path)
|
47
47
|
assert_equal([
|
48
48
|
{
|
49
|
-
:uri => "
|
49
|
+
:uri => file_uri("#{base_path}/top-level.txt").to_s,
|
50
50
|
:body => "top level\n",
|
51
51
|
:source => data.uri.to_s,
|
52
52
|
},
|
@@ -62,7 +62,7 @@ class TestDecomposersTar < Test::Unit::TestCase
|
|
62
62
|
data = ChupaText::InputData.new(data_path)
|
63
63
|
assert_equal([
|
64
64
|
{
|
65
|
-
:uri => "
|
65
|
+
:uri => file_uri("#{base_path}/directory/hello.txt").to_s,
|
66
66
|
:body => "Hello in directory\n",
|
67
67
|
:source => data.uri.to_s,
|
68
68
|
},
|
@@ -45,19 +45,19 @@ class TestDecomposersZip < Test::Unit::TestCase
|
|
45
45
|
base_path = data_path.sub_ext("")
|
46
46
|
assert_equal([
|
47
47
|
{
|
48
|
-
:uri => "
|
48
|
+
:uri => file_uri("#{base_path}/hello.txt").to_s,
|
49
49
|
:body => "Hello!\n",
|
50
|
-
:source =>
|
50
|
+
:source => file_uri(data_path).to_s,
|
51
51
|
},
|
52
52
|
{
|
53
|
-
:uri => "
|
53
|
+
:uri => file_uri("#{base_path}/hello.csv").to_s,
|
54
54
|
:body => "Hello,World\n",
|
55
|
-
:source =>
|
55
|
+
:source => file_uri(data_path).to_s,
|
56
56
|
},
|
57
57
|
{
|
58
|
-
:uri => "
|
58
|
+
:uri => file_uri("#{base_path}/hello/world.txt").to_s,
|
59
59
|
:body => "World!\n",
|
60
|
-
:source =>
|
60
|
+
:source => file_uri(data_path).to_s,
|
61
61
|
},
|
62
62
|
],
|
63
63
|
decompose(data_path))
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
class TestMIMEFormatter < Test::Unit::TestCase
|
18
|
+
def setup
|
19
|
+
@output = StringIO.new
|
20
|
+
@formatter = ChupaText::Formatters::MIME.new(@output)
|
21
|
+
end
|
22
|
+
|
23
|
+
def format(data, extracted_data)
|
24
|
+
@formatter.format_start(data)
|
25
|
+
extracted_data.each do |extracted_datum|
|
26
|
+
@formatter.format_extracted(extracted_datum)
|
27
|
+
end
|
28
|
+
@formatter.format_finish(data)
|
29
|
+
@output.string
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_text
|
33
|
+
data = ChupaText::TextData.new("Hello")
|
34
|
+
data.uri = URI.parse("file:///tmp/hello.txt")
|
35
|
+
assert_equal(<<-MIME.gsub(/\n/, "\r\n"), format(data, [data]))
|
36
|
+
MIME-Version: 1.0
|
37
|
+
mime-type: text/plain
|
38
|
+
uri: file:///tmp/hello.txt
|
39
|
+
path: /tmp/hello.txt
|
40
|
+
size: 5
|
41
|
+
Content-Type: multipart/mixed; boundary=a21ff2fc51d8d8c8af3e7ccb974e34b0368e2891
|
42
|
+
|
43
|
+
--a21ff2fc51d8d8c8af3e7ccb974e34b0368e2891
|
44
|
+
mime-type: text/plain
|
45
|
+
uri: file:///tmp/hello.txt
|
46
|
+
path: /tmp/hello.txt
|
47
|
+
size: 5
|
48
|
+
|
49
|
+
Hello
|
50
|
+
--a21ff2fc51d8d8c8af3e7ccb974e34b0368e2891--
|
51
|
+
MIME
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_texts
|
55
|
+
data = ChupaText::Data.new
|
56
|
+
data.uri = URI.parse("file:///tmp/hello-world.zip")
|
57
|
+
data1 = ChupaText::TextData.new("Hello")
|
58
|
+
data1.uri = URI.parse("file:///tmp/hello.txt")
|
59
|
+
data2 = ChupaText::TextData.new("World")
|
60
|
+
data2.uri = URI.parse("file:///tmp/world.txt")
|
61
|
+
assert_equal(<<-MIME.gsub(/\n/, "\r\n"), format(data, [data1, data2]))
|
62
|
+
MIME-Version: 1.0
|
63
|
+
mime-type: application/zip
|
64
|
+
uri: file:///tmp/hello-world.zip
|
65
|
+
path: /tmp/hello-world.zip
|
66
|
+
Content-Type: multipart/mixed; boundary=e53a82b45aee7c6a07ea51dcf930118dedf7da4d
|
67
|
+
|
68
|
+
--e53a82b45aee7c6a07ea51dcf930118dedf7da4d
|
69
|
+
mime-type: text/plain
|
70
|
+
uri: file:///tmp/hello.txt
|
71
|
+
path: /tmp/hello.txt
|
72
|
+
size: 5
|
73
|
+
|
74
|
+
Hello
|
75
|
+
--e53a82b45aee7c6a07ea51dcf930118dedf7da4d
|
76
|
+
mime-type: text/plain
|
77
|
+
uri: file:///tmp/world.txt
|
78
|
+
path: /tmp/world.txt
|
79
|
+
size: 5
|
80
|
+
|
81
|
+
World
|
82
|
+
--e53a82b45aee7c6a07ea51dcf930118dedf7da4d--
|
83
|
+
MIME
|
84
|
+
end
|
85
|
+
end
|
data/test/helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: archive-zip
|
@@ -141,6 +141,7 @@ files:
|
|
141
141
|
- lib/chupa-text/formatters.rb
|
142
142
|
- lib/chupa-text/formatters/hash.rb
|
143
143
|
- lib/chupa-text/formatters/json.rb
|
144
|
+
- lib/chupa-text/formatters/mime.rb
|
144
145
|
- lib/chupa-text/formatters/text.rb
|
145
146
|
- lib/chupa-text/input-data.rb
|
146
147
|
- lib/chupa-text/loggable.rb
|
@@ -171,6 +172,7 @@ files:
|
|
171
172
|
- test/fixture/tar/top-level.tar
|
172
173
|
- test/fixture/zip/hello.zip
|
173
174
|
- test/fixture/zip/password.zip
|
175
|
+
- test/formatters/test-mime.rb
|
174
176
|
- test/helper.rb
|
175
177
|
- test/run-test.rb
|
176
178
|
- test/test-attributes.rb
|
@@ -207,7 +209,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
207
209
|
version: '0'
|
208
210
|
requirements: []
|
209
211
|
rubyforge_project:
|
210
|
-
rubygems_version: 2.5.2
|
212
|
+
rubygems_version: 2.5.2.1
|
211
213
|
signing_key:
|
212
214
|
specification_version: 4
|
213
215
|
summary: ChupaText is an extensible text extractor. You can plug your custom text
|