chupa-text 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b7bb1a550ae5f986d102ceb5b13c542201a1d630
4
- data.tar.gz: 0ca5560f911c3f19ab018f8b0cb37780a2738e25
3
+ metadata.gz: 3af110814417d579cde496c50b05d6d49964dd97
4
+ data.tar.gz: 2609179c9e817e33a82df59c53834236dc63da3d
5
5
  SHA512:
6
- metadata.gz: 0bfc9e586bfbcd10ba60fdfa982fc3027e0e3b2433874fd5870ece5396201e42871de772d6eb10c6e39b32172ea18e21cc84944e48182f70dc72918b949719a8
7
- data.tar.gz: 40e10ff0742ae1a5b98733c8a64ea3f10934da02c30b834e8bfba0914271b31bbc04a151ddef19f8f6a11101f4de7590afc7f3d4901caca37e3d7d7c1ce4207a
6
+ metadata.gz: 6ae9ed51763e954b6b99973dfb2c020e046ccfb57f6670bfe96e935319055a6f898e71f0ed30ba25f3d3c8a4180ea6dbdfcb80337829659fdfaeb6a08841d873
7
+ data.tar.gz: a114dd75066faaf9501cd1fd4891df6a8ed46adf99711721a4d4d4cfaf4b1a63bb852aba578d7c46f5c63535bd8d5a19508b65c44fa953087ff8ac301cc85535
@@ -1,5 +1,11 @@
1
1
  # News
2
2
 
3
+ ## 1.0.6: 2017-07-05
4
+
5
+ ### Improvements
6
+
7
+ * Supported non ASCII characters in file name.
8
+
3
9
  ## 1.0.5: 2017-05-02
4
10
 
5
11
  ### Improvements
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -154,7 +154,13 @@ module ChupaText
154
154
  if @input.nil?
155
155
  VirtualFileData.new(nil, $stdin)
156
156
  else
157
- InputData.new(@input)
157
+ case @input
158
+ when /\A[a-z]+:\/\//i
159
+ input = URI.parse(@input)
160
+ else
161
+ input = Pathname(@input)
162
+ end
163
+ InputData.new(input)
158
164
  end
159
165
  end
160
166
 
@@ -25,24 +25,25 @@ module ChupaText
25
25
  @decomposer = DecomposerLoader.new(@configuration.decomposer)
26
26
  @mime_types = MIMETypesLoader.new(@configuration.mime_type_registry)
27
27
  @load_paths = []
28
- data_dir = File.join(File.dirname(__FILE__), "..", "..", "data")
29
- @load_paths << File.expand_path(data_dir)
28
+ data_dir = Pathname(__dir__) + ".." + ".." + "data"
29
+ @load_paths << data_dir.expand_path
30
30
  end
31
31
 
32
32
  def load(path)
33
33
  path = resolve_path(path)
34
34
  File.open(path) do |file|
35
- instance_eval(file.read, path, 1)
35
+ instance_eval(file.read, path.to_path, 1)
36
36
  end
37
37
  end
38
38
 
39
39
  private
40
40
  def resolve_path(path)
41
- return path if File.exist?(path)
42
- return path if Pathname(path).absolute?
41
+ path = Pathname(path) unless path.is_a?(Pathname)
42
+ return path if path.exist?
43
+ return path if path.absolute?
43
44
  @load_paths.each do |load_path|
44
- resolved_path = File.join(load_path, path)
45
- return resolved_path if File.exist?(resolved_path)
45
+ resolved_path = path.expand_path(load_path)
46
+ return resolved_path if resolved_path.exist?
46
47
  end
47
48
  path
48
49
  end
@@ -14,6 +14,7 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
+ require "cgi/util"
17
18
  require "uri"
18
19
  require "open-uri"
19
20
 
@@ -91,10 +92,27 @@ module ChupaText
91
92
  # `nil`, it means that the data isn't associated with any URIs.
92
93
  def uri=(uri)
93
94
  case uri
94
- when String, Pathname
95
- uri = URI.parse(uri.to_s)
95
+ when Pathname
96
+ file_uri = ""
97
+ target = uri.expand_path
98
+ loop do
99
+ target, base = target.split
100
+ file_uri = "/#{CGI.escape(base.to_s)}#{file_uri}"
101
+ break if target.root?
102
+ end
103
+ file_uri = "file://#{file_uri}"
104
+ @uri = URI.parse(file_uri)
105
+ self.path ||= uri
106
+ when NilClass
107
+ @uri = nil
108
+ self.path = nil
109
+ else
110
+ unless uri.is_a?(URI)
111
+ uri = URI.parse(uri)
112
+ end
113
+ @uri = uri
114
+ self.path ||= @uri.path
96
115
  end
97
- @uri = uri
98
116
  end
99
117
 
100
118
  def open
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -31,12 +31,35 @@ module ChupaText
31
31
  Gem::Package::TarReader.new(StringIO.new(data.body)) do |reader|
32
32
  reader.each do |entry|
33
33
  next unless entry.file?
34
+ entry.extend(CopyStreamable)
34
35
  extracted = VirtualFileData.new(entry.full_name, entry)
35
36
  extracted.source = data
36
37
  yield(extracted)
37
38
  end
38
39
  end
39
40
  end
41
+
42
+ # TODO: Supporting output buffer in #read and #readpartial
43
+ # should be done in RubyGems' tar implementation.
44
+ module CopyStreamable
45
+ def readpartial(max_length, buffer=nil)
46
+ data = super(max_length)
47
+ if data.nil?
48
+ if max_length.zero?
49
+ return ""
50
+ else
51
+ raise EOFError
52
+ end
53
+ end
54
+
55
+ if buffer.nil?
56
+ data
57
+ else
58
+ buffer.replace(data)
59
+ buffer
60
+ end
61
+ end
62
+ end
40
63
  end
41
64
  end
42
65
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -14,5 +14,6 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
+ require "chupa-text/formatters/hash"
17
18
  require "chupa-text/formatters/json"
18
19
  require "chupa-text/formatters/text"
@@ -0,0 +1,60 @@
1
+ # Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ module ChupaText
18
+ module Formatters
19
+ class Hash
20
+ def initialize
21
+ @texts = []
22
+ end
23
+
24
+ def format_start(data)
25
+ end
26
+
27
+ def format_extracted(data)
28
+ text = {}
29
+ format_headers(data, text)
30
+ text["body"] = data.body
31
+ @texts << text
32
+ end
33
+
34
+ def format_finish(data)
35
+ formatted = {}
36
+ format_headers(data, formatted)
37
+ formatted["texts"] = @texts
38
+ formatted
39
+ end
40
+
41
+ private
42
+ def format_headers(data, target)
43
+ format_header("mime-type", data.mime_type, target)
44
+ format_header("uri", data.uri, target)
45
+ if data.uri.class == URI::Generic
46
+ format_header("path", data.path, target)
47
+ end
48
+ format_header("size", data.size, target)
49
+ data.attributes.each do |name, value|
50
+ format_header(name, value, target)
51
+ end
52
+ end
53
+
54
+ def format_header(name, value, target)
55
+ return if value.nil?
56
+ target[name] = value
57
+ end
58
+ end
59
+ end
60
+ end
@@ -16,46 +16,21 @@
16
16
 
17
17
  require "json"
18
18
 
19
+ require "chupa-text/formatters/hash"
20
+
19
21
  module ChupaText
20
22
  module Formatters
21
- class JSON
23
+ class JSON < Hash
22
24
  def initialize(output)
25
+ super()
23
26
  @output = output
24
- @texts = []
25
- end
26
-
27
- def format_start(data)
28
- end
29
-
30
- def format_extracted(data)
31
- text = {}
32
- format_headers(data, text)
33
- text["body"] = data.body
34
- @texts << text
35
27
  end
36
28
 
37
29
  def format_finish(data)
38
- formatted = {}
39
- format_headers(data, formatted)
40
- formatted["texts"] = @texts
30
+ formatted = super
41
31
  @output << ::JSON.pretty_generate(formatted)
42
32
  @output << "\n"
43
33
  end
44
-
45
- private
46
- def format_headers(data, target)
47
- format_header("mime-type", data.mime_type, target)
48
- format_header("uri", data.uri, target)
49
- format_header("size", data.size, target)
50
- data.attributes.each do |name, value|
51
- format_header(name, value, target)
52
- end
53
- end
54
-
55
- def format_header(name, value, target)
56
- return if value.nil?
57
- target[name] = value
58
- end
59
34
  end
60
35
  end
61
36
  end
@@ -14,6 +14,7 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
+ require "cgi/util"
17
18
  require "uri"
18
19
  require "open-uri"
19
20
 
@@ -23,9 +24,10 @@ module ChupaText
23
24
  super(options)
24
25
  self.uri = uri
25
26
  if @uri.class == URI::Generic
26
- @content = FileContent.new(@uri.path)
27
+ @content = FileContent.new(path)
27
28
  else
28
29
  @content = download
30
+ self.path = @content.path
29
31
  end
30
32
  end
31
33
 
@@ -37,10 +39,6 @@ module ChupaText
37
39
  @content.size
38
40
  end
39
41
 
40
- def path
41
- @content.path
42
- end
43
-
44
42
  def open(&block)
45
43
  @content.open(&block)
46
44
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013-2014 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.0.5"
18
+ VERSION = "1.0.6"
19
19
  end
@@ -26,30 +26,14 @@ module ChupaText
26
26
  def initialize(input, original_path=nil)
27
27
  @file = nil
28
28
  @base_name = compute_base_name(original_path)
29
- chunk = input.read(BUFFER_SIZE) || ""
30
- if chunk.bytesize != BUFFER_SIZE
31
- @path = nil
32
- @body = chunk
33
- @size = @body.bytesize
34
- else
35
- @body = nil
36
- @size = chunk.bytesize
37
- setup_file do |file|
38
- file.write(chunk)
39
- while (chunk = input.read(BUFFER_SIZE))
40
- @size += chunk.bytesize
41
- file.write(chunk)
42
- end
43
- end
29
+ @body = nil
30
+ setup_file do |file|
31
+ @size = IO.copy_stream(input, file)
44
32
  end
45
33
  end
46
34
 
47
35
  def open(&block)
48
- if @body
49
- yield(StringIO.new(@body))
50
- else
51
- File.open(path, "rb", &block)
52
- end
36
+ File.open(path, "rb", &block)
53
37
  end
54
38
 
55
39
  def body
@@ -57,9 +41,6 @@ module ChupaText
57
41
  end
58
42
 
59
43
  def path
60
- ensure_setup_file do |file|
61
- file.write(@body)
62
- end
63
44
  @path
64
45
  end
65
46
 
@@ -77,10 +58,6 @@ module ChupaText
77
58
  end
78
59
  end
79
60
 
80
- def ensure_setup_file(&block)
81
- setup_file(&block) unless @file
82
- end
83
-
84
61
  def setup_file
85
62
  @file = Tempfile.new(@base_name)
86
63
  @path = @file.path
@@ -19,11 +19,6 @@ module ChupaText
19
19
  def initialize(uri, input, options={})
20
20
  super(options)
21
21
  self.uri = uri
22
- if @uri
23
- path = @uri.path
24
- else
25
- path = nil
26
- end
27
22
  @content = VirtualContent.new(input, path)
28
23
  end
29
24
 
@@ -35,10 +30,6 @@ module ChupaText
35
30
  @content.size
36
31
  end
37
32
 
38
- def path
39
- @content.path
40
- end
41
-
42
33
  def open(&block)
43
34
  @content.open(&block)
44
35
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -57,17 +57,21 @@ class TestCommandChupaText < Test::Unit::TestCase
57
57
  sub_test_case("file") do
58
58
  def test_single
59
59
  body = "Hello\n"
60
- path = fixture_path("hello.txt").to_s
60
+ fixture_name = "hello.txt"
61
+ uri = fixture_uri(fixture_name).to_s
62
+ path = fixture_path(fixture_name).to_s
61
63
  assert_equal([
62
64
  true,
63
65
  {
64
66
  "mime-type" => "text/plain",
65
- "uri" => path,
67
+ "uri" => uri,
68
+ "path" => path,
66
69
  "size" => body.bytesize,
67
70
  "texts" => [
68
71
  {
69
72
  "mime-type" => "text/plain",
70
- "uri" => path,
73
+ "uri" => uri,
74
+ "path" => path,
71
75
  "size" => body.bytesize,
72
76
  "body" => body,
73
77
  },
@@ -161,18 +165,21 @@ class TestCommandChupaText < Test::Unit::TestCase
161
165
  sub_test_case("configuration") do
162
166
  def test_no_decomposer
163
167
  conf = fixture_path("no-decomposer.conf")
164
- gz = fixture_path("hello.txt.gz")
168
+ fixture_name = "hello.txt.gz"
169
+ uri = fixture_uri(fixture_name)
170
+ path = fixture_path(fixture_name)
165
171
  assert_equal([
166
172
  true,
167
173
  {
168
- "uri" => gz.to_s,
174
+ "uri" => uri.to_s,
175
+ "path" => path.to_s,
169
176
  "mime-type" => "application/x-gzip",
170
- "size" => gz.stat.size,
177
+ "size" => path.stat.size,
171
178
  "texts" => [],
172
179
  },
173
180
  ],
174
181
  run_command("--configuration", conf.to_s,
175
- gz.to_s))
182
+ path.to_s))
176
183
  end
177
184
  end
178
185
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -41,8 +41,8 @@ class TestDecomposersGzip < Test::Unit::TestCase
41
41
  @data = ChupaText::InputData.new(fixture_path("hello.txt.gz"))
42
42
  end
43
43
 
44
- def test_path
45
- assert_equal([URI.parse(fixture_path("hello.txt").to_s)],
44
+ def test_uri
45
+ assert_equal([fixture_uri("hello.txt")],
46
46
  decompose(@data).collect(&:uri))
47
47
  end
48
48
 
@@ -64,7 +64,7 @@ class TestDecomposersGzip < Test::Unit::TestCase
64
64
  end
65
65
 
66
66
  def test_uri
67
- assert_equal([URI.parse(fixture_path("hello.tar").to_s)],
67
+ assert_equal([fixture_uri("hello.tar")],
68
68
  decompose(@data).collect(&:uri))
69
69
  end
70
70
 
@@ -91,7 +91,7 @@ class TestDecomposersGzip < Test::Unit::TestCase
91
91
  end
92
92
 
93
93
  def test_uri
94
- assert_equal([URI.parse(fixture_path("hello.tar").to_s)],
94
+ assert_equal([fixture_uri("hello.tar")],
95
95
  decompose(@data).collect(&:uri))
96
96
  end
97
97
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -16,10 +16,16 @@
16
16
 
17
17
  require "pathname"
18
18
  require "tempfile"
19
+ require "uri"
19
20
 
20
21
  module Helper
21
22
  def fixture_path(*components)
22
23
  base_path = Pathname(__FILE__).dirname + "fixture"
23
24
  base_path.join(*components)
24
25
  end
26
+
27
+ def fixture_uri(*components)
28
+ path = fixture_path(*components)
29
+ URI.parse("file://#{path}")
30
+ end
25
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-02 00:00:00.000000000 Z
11
+ date: 2017-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -124,6 +124,7 @@ files:
124
124
  - lib/chupa-text/extractor.rb
125
125
  - lib/chupa-text/file-content.rb
126
126
  - lib/chupa-text/formatters.rb
127
+ - lib/chupa-text/formatters/hash.rb
127
128
  - lib/chupa-text/formatters/json.rb
128
129
  - lib/chupa-text/formatters/text.rb
129
130
  - lib/chupa-text/input-data.rb