chupa-text 1.0.5 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/news.md +6 -0
- data/lib/chupa-text/command/chupa-text.rb +8 -2
- data/lib/chupa-text/configuration-loader.rb +8 -7
- data/lib/chupa-text/data.rb +21 -3
- data/lib/chupa-text/decomposers/tar.rb +24 -1
- data/lib/chupa-text/formatters.rb +2 -1
- data/lib/chupa-text/formatters/hash.rb +60 -0
- data/lib/chupa-text/formatters/json.rb +5 -30
- data/lib/chupa-text/input-data.rb +3 -5
- data/lib/chupa-text/version.rb +2 -2
- data/lib/chupa-text/virtual-content.rb +4 -27
- data/lib/chupa-text/virtual-file-data.rb +0 -9
- data/test/command/test-chupa-text.rb +15 -8
- data/test/decomposers/test-gzip.rb +5 -5
- data/test/helper.rb +7 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3af110814417d579cde496c50b05d6d49964dd97
|
4
|
+
data.tar.gz: 2609179c9e817e33a82df59c53834236dc63da3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6ae9ed51763e954b6b99973dfb2c020e046ccfb57f6670bfe96e935319055a6f898e71f0ed30ba25f3d3c8a4180ea6dbdfcb80337829659fdfaeb6a08841d873
|
7
|
+
data.tar.gz: a114dd75066faaf9501cd1fd4891df6a8ed46adf99711721a4d4d4cfaf4b1a63bb852aba578d7c46f5c63535bd8d5a19508b65c44fa953087ff8ac301cc85535
|
data/doc/text/news.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -154,7 +154,13 @@ module ChupaText
|
|
154
154
|
if @input.nil?
|
155
155
|
VirtualFileData.new(nil, $stdin)
|
156
156
|
else
|
157
|
-
|
157
|
+
case @input
|
158
|
+
when /\A[a-z]+:\/\//i
|
159
|
+
input = URI.parse(@input)
|
160
|
+
else
|
161
|
+
input = Pathname(@input)
|
162
|
+
end
|
163
|
+
InputData.new(input)
|
158
164
|
end
|
159
165
|
end
|
160
166
|
|
@@ -25,24 +25,25 @@ module ChupaText
|
|
25
25
|
@decomposer = DecomposerLoader.new(@configuration.decomposer)
|
26
26
|
@mime_types = MIMETypesLoader.new(@configuration.mime_type_registry)
|
27
27
|
@load_paths = []
|
28
|
-
data_dir =
|
29
|
-
@load_paths <<
|
28
|
+
data_dir = Pathname(__dir__) + ".." + ".." + "data"
|
29
|
+
@load_paths << data_dir.expand_path
|
30
30
|
end
|
31
31
|
|
32
32
|
def load(path)
|
33
33
|
path = resolve_path(path)
|
34
34
|
File.open(path) do |file|
|
35
|
-
instance_eval(file.read, path, 1)
|
35
|
+
instance_eval(file.read, path.to_path, 1)
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
39
|
private
|
40
40
|
def resolve_path(path)
|
41
|
-
|
42
|
-
return path if
|
41
|
+
path = Pathname(path) unless path.is_a?(Pathname)
|
42
|
+
return path if path.exist?
|
43
|
+
return path if path.absolute?
|
43
44
|
@load_paths.each do |load_path|
|
44
|
-
resolved_path =
|
45
|
-
return resolved_path if
|
45
|
+
resolved_path = path.expand_path(load_path)
|
46
|
+
return resolved_path if resolved_path.exist?
|
46
47
|
end
|
47
48
|
path
|
48
49
|
end
|
data/lib/chupa-text/data.rb
CHANGED
@@ -14,6 +14,7 @@
|
|
14
14
|
# License along with this library; if not, write to the Free Software
|
15
15
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
16
|
|
17
|
+
require "cgi/util"
|
17
18
|
require "uri"
|
18
19
|
require "open-uri"
|
19
20
|
|
@@ -91,10 +92,27 @@ module ChupaText
|
|
91
92
|
# `nil`, it means that the data isn't associated with any URIs.
|
92
93
|
def uri=(uri)
|
93
94
|
case uri
|
94
|
-
when
|
95
|
-
|
95
|
+
when Pathname
|
96
|
+
file_uri = ""
|
97
|
+
target = uri.expand_path
|
98
|
+
loop do
|
99
|
+
target, base = target.split
|
100
|
+
file_uri = "/#{CGI.escape(base.to_s)}#{file_uri}"
|
101
|
+
break if target.root?
|
102
|
+
end
|
103
|
+
file_uri = "file://#{file_uri}"
|
104
|
+
@uri = URI.parse(file_uri)
|
105
|
+
self.path ||= uri
|
106
|
+
when NilClass
|
107
|
+
@uri = nil
|
108
|
+
self.path = nil
|
109
|
+
else
|
110
|
+
unless uri.is_a?(URI)
|
111
|
+
uri = URI.parse(uri)
|
112
|
+
end
|
113
|
+
@uri = uri
|
114
|
+
self.path ||= @uri.path
|
96
115
|
end
|
97
|
-
@uri = uri
|
98
116
|
end
|
99
117
|
|
100
118
|
def open
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -31,12 +31,35 @@ module ChupaText
|
|
31
31
|
Gem::Package::TarReader.new(StringIO.new(data.body)) do |reader|
|
32
32
|
reader.each do |entry|
|
33
33
|
next unless entry.file?
|
34
|
+
entry.extend(CopyStreamable)
|
34
35
|
extracted = VirtualFileData.new(entry.full_name, entry)
|
35
36
|
extracted.source = data
|
36
37
|
yield(extracted)
|
37
38
|
end
|
38
39
|
end
|
39
40
|
end
|
41
|
+
|
42
|
+
# TODO: Supporting output buffer in #read and #readpartial
|
43
|
+
# should be done in RubyGems' tar implementation.
|
44
|
+
module CopyStreamable
|
45
|
+
def readpartial(max_length, buffer=nil)
|
46
|
+
data = super(max_length)
|
47
|
+
if data.nil?
|
48
|
+
if max_length.zero?
|
49
|
+
return ""
|
50
|
+
else
|
51
|
+
raise EOFError
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if buffer.nil?
|
56
|
+
data
|
57
|
+
else
|
58
|
+
buffer.replace(data)
|
59
|
+
buffer
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
40
63
|
end
|
41
64
|
end
|
42
65
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -14,5 +14,6 @@
|
|
14
14
|
# License along with this library; if not, write to the Free Software
|
15
15
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
16
|
|
17
|
+
require "chupa-text/formatters/hash"
|
17
18
|
require "chupa-text/formatters/json"
|
18
19
|
require "chupa-text/formatters/text"
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
module ChupaText
|
18
|
+
module Formatters
|
19
|
+
class Hash
|
20
|
+
def initialize
|
21
|
+
@texts = []
|
22
|
+
end
|
23
|
+
|
24
|
+
def format_start(data)
|
25
|
+
end
|
26
|
+
|
27
|
+
def format_extracted(data)
|
28
|
+
text = {}
|
29
|
+
format_headers(data, text)
|
30
|
+
text["body"] = data.body
|
31
|
+
@texts << text
|
32
|
+
end
|
33
|
+
|
34
|
+
def format_finish(data)
|
35
|
+
formatted = {}
|
36
|
+
format_headers(data, formatted)
|
37
|
+
formatted["texts"] = @texts
|
38
|
+
formatted
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def format_headers(data, target)
|
43
|
+
format_header("mime-type", data.mime_type, target)
|
44
|
+
format_header("uri", data.uri, target)
|
45
|
+
if data.uri.class == URI::Generic
|
46
|
+
format_header("path", data.path, target)
|
47
|
+
end
|
48
|
+
format_header("size", data.size, target)
|
49
|
+
data.attributes.each do |name, value|
|
50
|
+
format_header(name, value, target)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def format_header(name, value, target)
|
55
|
+
return if value.nil?
|
56
|
+
target[name] = value
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -16,46 +16,21 @@
|
|
16
16
|
|
17
17
|
require "json"
|
18
18
|
|
19
|
+
require "chupa-text/formatters/hash"
|
20
|
+
|
19
21
|
module ChupaText
|
20
22
|
module Formatters
|
21
|
-
class JSON
|
23
|
+
class JSON < Hash
|
22
24
|
def initialize(output)
|
25
|
+
super()
|
23
26
|
@output = output
|
24
|
-
@texts = []
|
25
|
-
end
|
26
|
-
|
27
|
-
def format_start(data)
|
28
|
-
end
|
29
|
-
|
30
|
-
def format_extracted(data)
|
31
|
-
text = {}
|
32
|
-
format_headers(data, text)
|
33
|
-
text["body"] = data.body
|
34
|
-
@texts << text
|
35
27
|
end
|
36
28
|
|
37
29
|
def format_finish(data)
|
38
|
-
formatted =
|
39
|
-
format_headers(data, formatted)
|
40
|
-
formatted["texts"] = @texts
|
30
|
+
formatted = super
|
41
31
|
@output << ::JSON.pretty_generate(formatted)
|
42
32
|
@output << "\n"
|
43
33
|
end
|
44
|
-
|
45
|
-
private
|
46
|
-
def format_headers(data, target)
|
47
|
-
format_header("mime-type", data.mime_type, target)
|
48
|
-
format_header("uri", data.uri, target)
|
49
|
-
format_header("size", data.size, target)
|
50
|
-
data.attributes.each do |name, value|
|
51
|
-
format_header(name, value, target)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
def format_header(name, value, target)
|
56
|
-
return if value.nil?
|
57
|
-
target[name] = value
|
58
|
-
end
|
59
34
|
end
|
60
35
|
end
|
61
36
|
end
|
@@ -14,6 +14,7 @@
|
|
14
14
|
# License along with this library; if not, write to the Free Software
|
15
15
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
16
|
|
17
|
+
require "cgi/util"
|
17
18
|
require "uri"
|
18
19
|
require "open-uri"
|
19
20
|
|
@@ -23,9 +24,10 @@ module ChupaText
|
|
23
24
|
super(options)
|
24
25
|
self.uri = uri
|
25
26
|
if @uri.class == URI::Generic
|
26
|
-
@content = FileContent.new(
|
27
|
+
@content = FileContent.new(path)
|
27
28
|
else
|
28
29
|
@content = download
|
30
|
+
self.path = @content.path
|
29
31
|
end
|
30
32
|
end
|
31
33
|
|
@@ -37,10 +39,6 @@ module ChupaText
|
|
37
39
|
@content.size
|
38
40
|
end
|
39
41
|
|
40
|
-
def path
|
41
|
-
@content.path
|
42
|
-
end
|
43
|
-
|
44
42
|
def open(&block)
|
45
43
|
@content.open(&block)
|
46
44
|
end
|
data/lib/chupa-text/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013-
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -15,5 +15,5 @@
|
|
15
15
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
16
|
|
17
17
|
module ChupaText
|
18
|
-
VERSION = "1.0.
|
18
|
+
VERSION = "1.0.6"
|
19
19
|
end
|
@@ -26,30 +26,14 @@ module ChupaText
|
|
26
26
|
def initialize(input, original_path=nil)
|
27
27
|
@file = nil
|
28
28
|
@base_name = compute_base_name(original_path)
|
29
|
-
|
30
|
-
|
31
|
-
@
|
32
|
-
@body = chunk
|
33
|
-
@size = @body.bytesize
|
34
|
-
else
|
35
|
-
@body = nil
|
36
|
-
@size = chunk.bytesize
|
37
|
-
setup_file do |file|
|
38
|
-
file.write(chunk)
|
39
|
-
while (chunk = input.read(BUFFER_SIZE))
|
40
|
-
@size += chunk.bytesize
|
41
|
-
file.write(chunk)
|
42
|
-
end
|
43
|
-
end
|
29
|
+
@body = nil
|
30
|
+
setup_file do |file|
|
31
|
+
@size = IO.copy_stream(input, file)
|
44
32
|
end
|
45
33
|
end
|
46
34
|
|
47
35
|
def open(&block)
|
48
|
-
|
49
|
-
yield(StringIO.new(@body))
|
50
|
-
else
|
51
|
-
File.open(path, "rb", &block)
|
52
|
-
end
|
36
|
+
File.open(path, "rb", &block)
|
53
37
|
end
|
54
38
|
|
55
39
|
def body
|
@@ -57,9 +41,6 @@ module ChupaText
|
|
57
41
|
end
|
58
42
|
|
59
43
|
def path
|
60
|
-
ensure_setup_file do |file|
|
61
|
-
file.write(@body)
|
62
|
-
end
|
63
44
|
@path
|
64
45
|
end
|
65
46
|
|
@@ -77,10 +58,6 @@ module ChupaText
|
|
77
58
|
end
|
78
59
|
end
|
79
60
|
|
80
|
-
def ensure_setup_file(&block)
|
81
|
-
setup_file(&block) unless @file
|
82
|
-
end
|
83
|
-
|
84
61
|
def setup_file
|
85
62
|
@file = Tempfile.new(@base_name)
|
86
63
|
@path = @file.path
|
@@ -19,11 +19,6 @@ module ChupaText
|
|
19
19
|
def initialize(uri, input, options={})
|
20
20
|
super(options)
|
21
21
|
self.uri = uri
|
22
|
-
if @uri
|
23
|
-
path = @uri.path
|
24
|
-
else
|
25
|
-
path = nil
|
26
|
-
end
|
27
22
|
@content = VirtualContent.new(input, path)
|
28
23
|
end
|
29
24
|
|
@@ -35,10 +30,6 @@ module ChupaText
|
|
35
30
|
@content.size
|
36
31
|
end
|
37
32
|
|
38
|
-
def path
|
39
|
-
@content.path
|
40
|
-
end
|
41
|
-
|
42
33
|
def open(&block)
|
43
34
|
@content.open(&block)
|
44
35
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -57,17 +57,21 @@ class TestCommandChupaText < Test::Unit::TestCase
|
|
57
57
|
sub_test_case("file") do
|
58
58
|
def test_single
|
59
59
|
body = "Hello\n"
|
60
|
-
|
60
|
+
fixture_name = "hello.txt"
|
61
|
+
uri = fixture_uri(fixture_name).to_s
|
62
|
+
path = fixture_path(fixture_name).to_s
|
61
63
|
assert_equal([
|
62
64
|
true,
|
63
65
|
{
|
64
66
|
"mime-type" => "text/plain",
|
65
|
-
"uri" =>
|
67
|
+
"uri" => uri,
|
68
|
+
"path" => path,
|
66
69
|
"size" => body.bytesize,
|
67
70
|
"texts" => [
|
68
71
|
{
|
69
72
|
"mime-type" => "text/plain",
|
70
|
-
"uri" =>
|
73
|
+
"uri" => uri,
|
74
|
+
"path" => path,
|
71
75
|
"size" => body.bytesize,
|
72
76
|
"body" => body,
|
73
77
|
},
|
@@ -161,18 +165,21 @@ class TestCommandChupaText < Test::Unit::TestCase
|
|
161
165
|
sub_test_case("configuration") do
|
162
166
|
def test_no_decomposer
|
163
167
|
conf = fixture_path("no-decomposer.conf")
|
164
|
-
|
168
|
+
fixture_name = "hello.txt.gz"
|
169
|
+
uri = fixture_uri(fixture_name)
|
170
|
+
path = fixture_path(fixture_name)
|
165
171
|
assert_equal([
|
166
172
|
true,
|
167
173
|
{
|
168
|
-
"uri" =>
|
174
|
+
"uri" => uri.to_s,
|
175
|
+
"path" => path.to_s,
|
169
176
|
"mime-type" => "application/x-gzip",
|
170
|
-
"size" =>
|
177
|
+
"size" => path.stat.size,
|
171
178
|
"texts" => [],
|
172
179
|
},
|
173
180
|
],
|
174
181
|
run_command("--configuration", conf.to_s,
|
175
|
-
|
182
|
+
path.to_s))
|
176
183
|
end
|
177
184
|
end
|
178
185
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -41,8 +41,8 @@ class TestDecomposersGzip < Test::Unit::TestCase
|
|
41
41
|
@data = ChupaText::InputData.new(fixture_path("hello.txt.gz"))
|
42
42
|
end
|
43
43
|
|
44
|
-
def
|
45
|
-
assert_equal([
|
44
|
+
def test_uri
|
45
|
+
assert_equal([fixture_uri("hello.txt")],
|
46
46
|
decompose(@data).collect(&:uri))
|
47
47
|
end
|
48
48
|
|
@@ -64,7 +64,7 @@ class TestDecomposersGzip < Test::Unit::TestCase
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def test_uri
|
67
|
-
assert_equal([
|
67
|
+
assert_equal([fixture_uri("hello.tar")],
|
68
68
|
decompose(@data).collect(&:uri))
|
69
69
|
end
|
70
70
|
|
@@ -91,7 +91,7 @@ class TestDecomposersGzip < Test::Unit::TestCase
|
|
91
91
|
end
|
92
92
|
|
93
93
|
def test_uri
|
94
|
-
assert_equal([
|
94
|
+
assert_equal([fixture_uri("hello.tar")],
|
95
95
|
decompose(@data).collect(&:uri))
|
96
96
|
end
|
97
97
|
|
data/test/helper.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -16,10 +16,16 @@
|
|
16
16
|
|
17
17
|
require "pathname"
|
18
18
|
require "tempfile"
|
19
|
+
require "uri"
|
19
20
|
|
20
21
|
module Helper
|
21
22
|
def fixture_path(*components)
|
22
23
|
base_path = Pathname(__FILE__).dirname + "fixture"
|
23
24
|
base_path.join(*components)
|
24
25
|
end
|
26
|
+
|
27
|
+
def fixture_uri(*components)
|
28
|
+
path = fixture_path(*components)
|
29
|
+
URI.parse("file://#{path}")
|
30
|
+
end
|
25
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-05
|
11
|
+
date: 2017-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -124,6 +124,7 @@ files:
|
|
124
124
|
- lib/chupa-text/extractor.rb
|
125
125
|
- lib/chupa-text/file-content.rb
|
126
126
|
- lib/chupa-text/formatters.rb
|
127
|
+
- lib/chupa-text/formatters/hash.rb
|
127
128
|
- lib/chupa-text/formatters/json.rb
|
128
129
|
- lib/chupa-text/formatters/text.rb
|
129
130
|
- lib/chupa-text/input-data.rb
|