chupa-text 1.0.5 → 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b7bb1a550ae5f986d102ceb5b13c542201a1d630
4
- data.tar.gz: 0ca5560f911c3f19ab018f8b0cb37780a2738e25
3
+ metadata.gz: 3af110814417d579cde496c50b05d6d49964dd97
4
+ data.tar.gz: 2609179c9e817e33a82df59c53834236dc63da3d
5
5
  SHA512:
6
- metadata.gz: 0bfc9e586bfbcd10ba60fdfa982fc3027e0e3b2433874fd5870ece5396201e42871de772d6eb10c6e39b32172ea18e21cc84944e48182f70dc72918b949719a8
7
- data.tar.gz: 40e10ff0742ae1a5b98733c8a64ea3f10934da02c30b834e8bfba0914271b31bbc04a151ddef19f8f6a11101f4de7590afc7f3d4901caca37e3d7d7c1ce4207a
6
+ metadata.gz: 6ae9ed51763e954b6b99973dfb2c020e046ccfb57f6670bfe96e935319055a6f898e71f0ed30ba25f3d3c8a4180ea6dbdfcb80337829659fdfaeb6a08841d873
7
+ data.tar.gz: a114dd75066faaf9501cd1fd4891df6a8ed46adf99711721a4d4d4cfaf4b1a63bb852aba578d7c46f5c63535bd8d5a19508b65c44fa953087ff8ac301cc85535
@@ -1,5 +1,11 @@
1
1
  # News
2
2
 
3
+ ## 1.0.6: 2017-07-05
4
+
5
+ ### Improvements
6
+
7
+ * Supported non ASCII characters in file name.
8
+
3
9
  ## 1.0.5: 2017-05-02
4
10
 
5
11
  ### Improvements
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -154,7 +154,13 @@ module ChupaText
154
154
  if @input.nil?
155
155
  VirtualFileData.new(nil, $stdin)
156
156
  else
157
- InputData.new(@input)
157
+ case @input
158
+ when /\A[a-z]+:\/\//i
159
+ input = URI.parse(@input)
160
+ else
161
+ input = Pathname(@input)
162
+ end
163
+ InputData.new(input)
158
164
  end
159
165
  end
160
166
 
@@ -25,24 +25,25 @@ module ChupaText
25
25
  @decomposer = DecomposerLoader.new(@configuration.decomposer)
26
26
  @mime_types = MIMETypesLoader.new(@configuration.mime_type_registry)
27
27
  @load_paths = []
28
- data_dir = File.join(File.dirname(__FILE__), "..", "..", "data")
29
- @load_paths << File.expand_path(data_dir)
28
+ data_dir = Pathname(__dir__) + ".." + ".." + "data"
29
+ @load_paths << data_dir.expand_path
30
30
  end
31
31
 
32
32
  def load(path)
33
33
  path = resolve_path(path)
34
34
  File.open(path) do |file|
35
- instance_eval(file.read, path, 1)
35
+ instance_eval(file.read, path.to_path, 1)
36
36
  end
37
37
  end
38
38
 
39
39
  private
40
40
  def resolve_path(path)
41
- return path if File.exist?(path)
42
- return path if Pathname(path).absolute?
41
+ path = Pathname(path) unless path.is_a?(Pathname)
42
+ return path if path.exist?
43
+ return path if path.absolute?
43
44
  @load_paths.each do |load_path|
44
- resolved_path = File.join(load_path, path)
45
- return resolved_path if File.exist?(resolved_path)
45
+ resolved_path = path.expand_path(load_path)
46
+ return resolved_path if resolved_path.exist?
46
47
  end
47
48
  path
48
49
  end
@@ -14,6 +14,7 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
+ require "cgi/util"
17
18
  require "uri"
18
19
  require "open-uri"
19
20
 
@@ -91,10 +92,27 @@ module ChupaText
91
92
  # `nil`, it means that the data isn't associated with any URIs.
92
93
  def uri=(uri)
93
94
  case uri
94
- when String, Pathname
95
- uri = URI.parse(uri.to_s)
95
+ when Pathname
96
+ file_uri = ""
97
+ target = uri.expand_path
98
+ loop do
99
+ target, base = target.split
100
+ file_uri = "/#{CGI.escape(base.to_s)}#{file_uri}"
101
+ break if target.root?
102
+ end
103
+ file_uri = "file://#{file_uri}"
104
+ @uri = URI.parse(file_uri)
105
+ self.path ||= uri
106
+ when NilClass
107
+ @uri = nil
108
+ self.path = nil
109
+ else
110
+ unless uri.is_a?(URI)
111
+ uri = URI.parse(uri)
112
+ end
113
+ @uri = uri
114
+ self.path ||= @uri.path
96
115
  end
97
- @uri = uri
98
116
  end
99
117
 
100
118
  def open
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -31,12 +31,35 @@ module ChupaText
31
31
  Gem::Package::TarReader.new(StringIO.new(data.body)) do |reader|
32
32
  reader.each do |entry|
33
33
  next unless entry.file?
34
+ entry.extend(CopyStreamable)
34
35
  extracted = VirtualFileData.new(entry.full_name, entry)
35
36
  extracted.source = data
36
37
  yield(extracted)
37
38
  end
38
39
  end
39
40
  end
41
+
42
+ # TODO: Supporting output buffer in #read and #readpartial
43
+ # should be done in RubyGems' tar implementation.
44
+ module CopyStreamable
45
+ def readpartial(max_length, buffer=nil)
46
+ data = super(max_length)
47
+ if data.nil?
48
+ if max_length.zero?
49
+ return ""
50
+ else
51
+ raise EOFError
52
+ end
53
+ end
54
+
55
+ if buffer.nil?
56
+ data
57
+ else
58
+ buffer.replace(data)
59
+ buffer
60
+ end
61
+ end
62
+ end
40
63
  end
41
64
  end
42
65
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -14,5 +14,6 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
+ require "chupa-text/formatters/hash"
17
18
  require "chupa-text/formatters/json"
18
19
  require "chupa-text/formatters/text"
@@ -0,0 +1,60 @@
1
+ # Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # This library is free software; you can redistribute it and/or
4
+ # modify it under the terms of the GNU Lesser General Public
5
+ # License as published by the Free Software Foundation; either
6
+ # version 2.1 of the License, or (at your option) any later version.
7
+ #
8
+ # This library is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
+ # Lesser General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Lesser General Public
14
+ # License along with this library; if not, write to the Free Software
15
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+
17
+ module ChupaText
18
+ module Formatters
19
+ class Hash
20
+ def initialize
21
+ @texts = []
22
+ end
23
+
24
+ def format_start(data)
25
+ end
26
+
27
+ def format_extracted(data)
28
+ text = {}
29
+ format_headers(data, text)
30
+ text["body"] = data.body
31
+ @texts << text
32
+ end
33
+
34
+ def format_finish(data)
35
+ formatted = {}
36
+ format_headers(data, formatted)
37
+ formatted["texts"] = @texts
38
+ formatted
39
+ end
40
+
41
+ private
42
+ def format_headers(data, target)
43
+ format_header("mime-type", data.mime_type, target)
44
+ format_header("uri", data.uri, target)
45
+ if data.uri.class == URI::Generic
46
+ format_header("path", data.path, target)
47
+ end
48
+ format_header("size", data.size, target)
49
+ data.attributes.each do |name, value|
50
+ format_header(name, value, target)
51
+ end
52
+ end
53
+
54
+ def format_header(name, value, target)
55
+ return if value.nil?
56
+ target[name] = value
57
+ end
58
+ end
59
+ end
60
+ end
@@ -16,46 +16,21 @@
16
16
 
17
17
  require "json"
18
18
 
19
+ require "chupa-text/formatters/hash"
20
+
19
21
  module ChupaText
20
22
  module Formatters
21
- class JSON
23
+ class JSON < Hash
22
24
  def initialize(output)
25
+ super()
23
26
  @output = output
24
- @texts = []
25
- end
26
-
27
- def format_start(data)
28
- end
29
-
30
- def format_extracted(data)
31
- text = {}
32
- format_headers(data, text)
33
- text["body"] = data.body
34
- @texts << text
35
27
  end
36
28
 
37
29
  def format_finish(data)
38
- formatted = {}
39
- format_headers(data, formatted)
40
- formatted["texts"] = @texts
30
+ formatted = super
41
31
  @output << ::JSON.pretty_generate(formatted)
42
32
  @output << "\n"
43
33
  end
44
-
45
- private
46
- def format_headers(data, target)
47
- format_header("mime-type", data.mime_type, target)
48
- format_header("uri", data.uri, target)
49
- format_header("size", data.size, target)
50
- data.attributes.each do |name, value|
51
- format_header(name, value, target)
52
- end
53
- end
54
-
55
- def format_header(name, value, target)
56
- return if value.nil?
57
- target[name] = value
58
- end
59
34
  end
60
35
  end
61
36
  end
@@ -14,6 +14,7 @@
14
14
  # License along with this library; if not, write to the Free Software
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
+ require "cgi/util"
17
18
  require "uri"
18
19
  require "open-uri"
19
20
 
@@ -23,9 +24,10 @@ module ChupaText
23
24
  super(options)
24
25
  self.uri = uri
25
26
  if @uri.class == URI::Generic
26
- @content = FileContent.new(@uri.path)
27
+ @content = FileContent.new(path)
27
28
  else
28
29
  @content = download
30
+ self.path = @content.path
29
31
  end
30
32
  end
31
33
 
@@ -37,10 +39,6 @@ module ChupaText
37
39
  @content.size
38
40
  end
39
41
 
40
- def path
41
- @content.path
42
- end
43
-
44
42
  def open(&block)
45
43
  @content.open(&block)
46
44
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013-2014 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -15,5 +15,5 @@
15
15
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
  module ChupaText
18
- VERSION = "1.0.5"
18
+ VERSION = "1.0.6"
19
19
  end
@@ -26,30 +26,14 @@ module ChupaText
26
26
  def initialize(input, original_path=nil)
27
27
  @file = nil
28
28
  @base_name = compute_base_name(original_path)
29
- chunk = input.read(BUFFER_SIZE) || ""
30
- if chunk.bytesize != BUFFER_SIZE
31
- @path = nil
32
- @body = chunk
33
- @size = @body.bytesize
34
- else
35
- @body = nil
36
- @size = chunk.bytesize
37
- setup_file do |file|
38
- file.write(chunk)
39
- while (chunk = input.read(BUFFER_SIZE))
40
- @size += chunk.bytesize
41
- file.write(chunk)
42
- end
43
- end
29
+ @body = nil
30
+ setup_file do |file|
31
+ @size = IO.copy_stream(input, file)
44
32
  end
45
33
  end
46
34
 
47
35
  def open(&block)
48
- if @body
49
- yield(StringIO.new(@body))
50
- else
51
- File.open(path, "rb", &block)
52
- end
36
+ File.open(path, "rb", &block)
53
37
  end
54
38
 
55
39
  def body
@@ -57,9 +41,6 @@ module ChupaText
57
41
  end
58
42
 
59
43
  def path
60
- ensure_setup_file do |file|
61
- file.write(@body)
62
- end
63
44
  @path
64
45
  end
65
46
 
@@ -77,10 +58,6 @@ module ChupaText
77
58
  end
78
59
  end
79
60
 
80
- def ensure_setup_file(&block)
81
- setup_file(&block) unless @file
82
- end
83
-
84
61
  def setup_file
85
62
  @file = Tempfile.new(@base_name)
86
63
  @path = @file.path
@@ -19,11 +19,6 @@ module ChupaText
19
19
  def initialize(uri, input, options={})
20
20
  super(options)
21
21
  self.uri = uri
22
- if @uri
23
- path = @uri.path
24
- else
25
- path = nil
26
- end
27
22
  @content = VirtualContent.new(input, path)
28
23
  end
29
24
 
@@ -35,10 +30,6 @@ module ChupaText
35
30
  @content.size
36
31
  end
37
32
 
38
- def path
39
- @content.path
40
- end
41
-
42
33
  def open(&block)
43
34
  @content.open(&block)
44
35
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -57,17 +57,21 @@ class TestCommandChupaText < Test::Unit::TestCase
57
57
  sub_test_case("file") do
58
58
  def test_single
59
59
  body = "Hello\n"
60
- path = fixture_path("hello.txt").to_s
60
+ fixture_name = "hello.txt"
61
+ uri = fixture_uri(fixture_name).to_s
62
+ path = fixture_path(fixture_name).to_s
61
63
  assert_equal([
62
64
  true,
63
65
  {
64
66
  "mime-type" => "text/plain",
65
- "uri" => path,
67
+ "uri" => uri,
68
+ "path" => path,
66
69
  "size" => body.bytesize,
67
70
  "texts" => [
68
71
  {
69
72
  "mime-type" => "text/plain",
70
- "uri" => path,
73
+ "uri" => uri,
74
+ "path" => path,
71
75
  "size" => body.bytesize,
72
76
  "body" => body,
73
77
  },
@@ -161,18 +165,21 @@ class TestCommandChupaText < Test::Unit::TestCase
161
165
  sub_test_case("configuration") do
162
166
  def test_no_decomposer
163
167
  conf = fixture_path("no-decomposer.conf")
164
- gz = fixture_path("hello.txt.gz")
168
+ fixture_name = "hello.txt.gz"
169
+ uri = fixture_uri(fixture_name)
170
+ path = fixture_path(fixture_name)
165
171
  assert_equal([
166
172
  true,
167
173
  {
168
- "uri" => gz.to_s,
174
+ "uri" => uri.to_s,
175
+ "path" => path.to_s,
169
176
  "mime-type" => "application/x-gzip",
170
- "size" => gz.stat.size,
177
+ "size" => path.stat.size,
171
178
  "texts" => [],
172
179
  },
173
180
  ],
174
181
  run_command("--configuration", conf.to_s,
175
- gz.to_s))
182
+ path.to_s))
176
183
  end
177
184
  end
178
185
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -41,8 +41,8 @@ class TestDecomposersGzip < Test::Unit::TestCase
41
41
  @data = ChupaText::InputData.new(fixture_path("hello.txt.gz"))
42
42
  end
43
43
 
44
- def test_path
45
- assert_equal([URI.parse(fixture_path("hello.txt").to_s)],
44
+ def test_uri
45
+ assert_equal([fixture_uri("hello.txt")],
46
46
  decompose(@data).collect(&:uri))
47
47
  end
48
48
 
@@ -64,7 +64,7 @@ class TestDecomposersGzip < Test::Unit::TestCase
64
64
  end
65
65
 
66
66
  def test_uri
67
- assert_equal([URI.parse(fixture_path("hello.tar").to_s)],
67
+ assert_equal([fixture_uri("hello.tar")],
68
68
  decompose(@data).collect(&:uri))
69
69
  end
70
70
 
@@ -91,7 +91,7 @@ class TestDecomposersGzip < Test::Unit::TestCase
91
91
  end
92
92
 
93
93
  def test_uri
94
- assert_equal([URI.parse(fixture_path("hello.tar").to_s)],
94
+ assert_equal([fixture_uri("hello.tar")],
95
95
  decompose(@data).collect(&:uri))
96
96
  end
97
97
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -16,10 +16,16 @@
16
16
 
17
17
  require "pathname"
18
18
  require "tempfile"
19
+ require "uri"
19
20
 
20
21
  module Helper
21
22
  def fixture_path(*components)
22
23
  base_path = Pathname(__FILE__).dirname + "fixture"
23
24
  base_path.join(*components)
24
25
  end
26
+
27
+ def fixture_uri(*components)
28
+ path = fixture_path(*components)
29
+ URI.parse("file://#{path}")
30
+ end
25
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-02 00:00:00.000000000 Z
11
+ date: 2017-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -124,6 +124,7 @@ files:
124
124
  - lib/chupa-text/extractor.rb
125
125
  - lib/chupa-text/file-content.rb
126
126
  - lib/chupa-text/formatters.rb
127
+ - lib/chupa-text/formatters/hash.rb
127
128
  - lib/chupa-text/formatters/json.rb
128
129
  - lib/chupa-text/formatters/text.rb
129
130
  - lib/chupa-text/input-data.rb