tar 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rubocop.yml +40 -0
- data/.ruby-version +1 -0
- data/.travis.yml +8 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +68 -0
- data/Rakefile +16 -0
- data/bin/console +8 -0
- data/bin/setup +6 -0
- data/lib/char_size.rb +123 -0
- data/lib/tar.rb +4 -0
- data/lib/tar/backports.rb +51 -0
- data/lib/tar/error.rb +11 -0
- data/lib/tar/file_reader.rb +357 -0
- data/lib/tar/file_reader/line.rb +122 -0
- data/lib/tar/header.rb +58 -0
- data/lib/tar/header_reader.rb +37 -0
- data/lib/tar/polyfills.rb +14 -0
- data/lib/tar/reader.rb +30 -0
- data/lib/tar/schema.rb +87 -0
- data/lib/tar/ustar.rb +27 -0
- data/lib/tar/version.rb +5 -0
- data/tar.gemspec +28 -0
- metadata +139 -0
data/lib/tar/error.rb
ADDED
@@ -0,0 +1,357 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "char_size"
|
4
|
+
require "tar/backports"
|
5
|
+
require "tar/file_reader/line"
|
6
|
+
require "tar/polyfills"
|
7
|
+
require "tar/ustar"
|
8
|
+
|
9
|
+
module Tar
|
10
|
+
class FileReader
|
11
|
+
include Enumerable
|
12
|
+
using Backports
|
13
|
+
using Polyfills
|
14
|
+
|
15
|
+
attr_reader :header
|
16
|
+
|
17
|
+
def initialize(header, io, external_encoding: Encoding.default_external, internal_encoding: Encoding.default_internal, **encoding_options)
|
18
|
+
@header = header
|
19
|
+
@io = io
|
20
|
+
@closed = false
|
21
|
+
@lineno = 0
|
22
|
+
@pos = 0
|
23
|
+
set_encoding external_encoding, internal_encoding, **encoding_options
|
24
|
+
end
|
25
|
+
|
26
|
+
def close
|
27
|
+
@closed = true
|
28
|
+
end
|
29
|
+
|
30
|
+
def closed?
|
31
|
+
@closed || @io.closed?
|
32
|
+
end
|
33
|
+
|
34
|
+
def eof?
|
35
|
+
check_not_closed!
|
36
|
+
@pos >= @header.size
|
37
|
+
end
|
38
|
+
alias eof eof?
|
39
|
+
|
40
|
+
def pos
|
41
|
+
check_not_closed!
|
42
|
+
@pos
|
43
|
+
end
|
44
|
+
alias tell pos
|
45
|
+
|
46
|
+
def pos=(new_pos)
|
47
|
+
seek new_pos
|
48
|
+
end
|
49
|
+
|
50
|
+
def pending
|
51
|
+
check_not_closed!
|
52
|
+
[0, @header.size - @pos].max
|
53
|
+
end
|
54
|
+
|
55
|
+
def lineno
|
56
|
+
check_not_closed!
|
57
|
+
@lineno
|
58
|
+
end
|
59
|
+
|
60
|
+
def lineno=(new_lineno)
|
61
|
+
check_not_closed!
|
62
|
+
@lineno = new_lineno
|
63
|
+
end
|
64
|
+
|
65
|
+
def read(length = nil, buffer = nil)
|
66
|
+
check_not_closed!
|
67
|
+
|
68
|
+
data = @io.read(truncate(length), buffer)
|
69
|
+
@pos += data.bytesize
|
70
|
+
|
71
|
+
if length.nil?
|
72
|
+
encode(data)
|
73
|
+
else
|
74
|
+
data.force_encoding(Encoding::BINARY)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def readpartial(max_length, buffer = nil)
|
79
|
+
check_not_closed!
|
80
|
+
|
81
|
+
data = @io.readpartial(truncate(max_length), buffer)
|
82
|
+
@pos += data.bytesize
|
83
|
+
data.force_encoding(Encoding::BINARY)
|
84
|
+
end
|
85
|
+
|
86
|
+
def skip_to_next_record
|
87
|
+
check_not_closed!
|
88
|
+
|
89
|
+
target_pos = USTAR.records_size(@header.size)
|
90
|
+
|
91
|
+
if seekable?
|
92
|
+
seek target_pos
|
93
|
+
else
|
94
|
+
@io.read(target_pos - @pos)
|
95
|
+
@pos = target_pos
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def external_encoding
|
100
|
+
check_not_closed!
|
101
|
+
@external_encoding
|
102
|
+
end
|
103
|
+
|
104
|
+
def internal_encoding
|
105
|
+
check_not_closed!
|
106
|
+
@internal_encoding
|
107
|
+
end
|
108
|
+
|
109
|
+
def set_encoding(external_encoding, *internal_encoding, **encoding_options)
|
110
|
+
check_not_closed!
|
111
|
+
|
112
|
+
external_encoding, internal_encoding = extract_encodings(external_encoding, *internal_encoding)
|
113
|
+
|
114
|
+
if parse_bom?(external_encoding)
|
115
|
+
external_encoding = parse_bom || external_encoding[4..-1]
|
116
|
+
end
|
117
|
+
|
118
|
+
@external_encoding = find_encoding(external_encoding, if_nil: Encoding.default_external, if_unsupported: Encoding.default_external)
|
119
|
+
@internal_encoding = find_encoding(internal_encoding, if_nil: nil, if_unsupported: Encoding.default_internal)
|
120
|
+
@encoding_options = encoding_options
|
121
|
+
end
|
122
|
+
|
123
|
+
def binmode
|
124
|
+
set_encoding Encoding::BINARY
|
125
|
+
end
|
126
|
+
|
127
|
+
def binmode?
|
128
|
+
check_not_closed!
|
129
|
+
@external_encoding == Encoding::BINARY && @internal_encoding.nil?
|
130
|
+
end
|
131
|
+
|
132
|
+
def tty?
|
133
|
+
check_not_closed!
|
134
|
+
@io.respond_to?(:tty?) && @io.tty?
|
135
|
+
end
|
136
|
+
alias isatty tty?
|
137
|
+
|
138
|
+
def seek(amount, mode = IO::SEEK_SET)
|
139
|
+
check_seekable!
|
140
|
+
check_not_closed!
|
141
|
+
offset = relativize(amount, mode)
|
142
|
+
@io.seek offset, IO::SEEK_CUR
|
143
|
+
@pos += offset
|
144
|
+
end
|
145
|
+
|
146
|
+
def rewind
|
147
|
+
seek 0
|
148
|
+
@lineno = 0
|
149
|
+
end
|
150
|
+
|
151
|
+
def getbyte
|
152
|
+
check_not_closed!
|
153
|
+
return nil if eof?
|
154
|
+
@pos += 1
|
155
|
+
@io.getbyte
|
156
|
+
end
|
157
|
+
|
158
|
+
def ungetbyte(byte)
|
159
|
+
check_not_closed!
|
160
|
+
@pos -= 1
|
161
|
+
@io.ungetbyte byte
|
162
|
+
end
|
163
|
+
|
164
|
+
def readbyte
|
165
|
+
check_not_closed!
|
166
|
+
check_not_eof!
|
167
|
+
getbyte
|
168
|
+
end
|
169
|
+
|
170
|
+
def each_byte
|
171
|
+
check_not_closed!
|
172
|
+
return to_enum(__method__) unless block_given?
|
173
|
+
yield getbyte until eof?
|
174
|
+
end
|
175
|
+
|
176
|
+
def bytes(&block)
|
177
|
+
warn "warning: #{self.class}#bytes is deprecated; use #each_byte instead"
|
178
|
+
each_byte(&block)
|
179
|
+
end
|
180
|
+
|
181
|
+
def getc
|
182
|
+
check_not_closed!
|
183
|
+
return nil if eof?
|
184
|
+
|
185
|
+
char = String.new(encoding: Encoding::BINARY)
|
186
|
+
min_char_size, max_char_size = CharSize.minmax(external_encoding)
|
187
|
+
|
188
|
+
until char.size == max_char_size || eof?
|
189
|
+
char << read(min_char_size)
|
190
|
+
|
191
|
+
char.force_encoding external_encoding
|
192
|
+
return encode(char) if char.valid_encoding?
|
193
|
+
char.force_encoding Encoding::BINARY
|
194
|
+
end
|
195
|
+
|
196
|
+
undo_getc_attempt char, min_char_size
|
197
|
+
|
198
|
+
encode(char)
|
199
|
+
end
|
200
|
+
|
201
|
+
def ungetc(char)
|
202
|
+
char.encode(external_encoding).bytes.reverse_each do |byte|
|
203
|
+
ungetbyte byte
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def readchar
|
208
|
+
check_not_closed!
|
209
|
+
check_not_eof!
|
210
|
+
getc
|
211
|
+
end
|
212
|
+
|
213
|
+
def each_char
|
214
|
+
check_not_closed!
|
215
|
+
return to_enum(__method__) unless block_given?
|
216
|
+
yield getc until eof?
|
217
|
+
end
|
218
|
+
|
219
|
+
def chars(&block)
|
220
|
+
warn "warning: #{self.class}#chars is deprecated; use #each_char instead"
|
221
|
+
each_char(&block)
|
222
|
+
end
|
223
|
+
|
224
|
+
def each_codepoint
|
225
|
+
check_not_closed!
|
226
|
+
return to_enum(__method__) unless block_given?
|
227
|
+
each_char do |char|
|
228
|
+
char.each_codepoint do |codepoint|
|
229
|
+
yield codepoint
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
def codepoints(&block)
|
235
|
+
warn "warning: #{self.class}#codepoints is deprecated; use #each_codepoint instead"
|
236
|
+
each_codepoint(&block)
|
237
|
+
end
|
238
|
+
|
239
|
+
def gets(*args)
|
240
|
+
line = Line.new(self, *args)
|
241
|
+
check_not_closed!
|
242
|
+
return nil if eof?
|
243
|
+
line.read
|
244
|
+
end
|
245
|
+
|
246
|
+
def readline(*args)
|
247
|
+
line = Line.new(self, *args)
|
248
|
+
check_not_closed!
|
249
|
+
check_not_eof!
|
250
|
+
line.read
|
251
|
+
end
|
252
|
+
|
253
|
+
def each_line(*args)
|
254
|
+
line = Line.new(self, *args)
|
255
|
+
check_not_closed!
|
256
|
+
return to_enum(__method__, *args) unless block_given?
|
257
|
+
yield line.read until eof?
|
258
|
+
end
|
259
|
+
alias each each_line
|
260
|
+
|
261
|
+
def lines(*args, &block)
|
262
|
+
warn "warning: #{self.class}#lines is deprecated; use #each_line instead"
|
263
|
+
each_line(*args, &block)
|
264
|
+
end
|
265
|
+
|
266
|
+
def readlines(*args)
|
267
|
+
each_line(*args).to_a
|
268
|
+
end
|
269
|
+
|
270
|
+
private
|
271
|
+
|
272
|
+
def truncate(length)
|
273
|
+
[pending, length].compact.min
|
274
|
+
end
|
275
|
+
|
276
|
+
def extract_encodings(external_encoding, *internal_encoding)
|
277
|
+
raise ArgumentError, "wrong number of arguments (given #{internal_encoding.size + 1}, expected 1..2)" if internal_encoding.size > 1
|
278
|
+
return [external_encoding, *internal_encoding] if external_encoding.nil? || external_encoding.is_a?(Encoding) || !internal_encoding.empty?
|
279
|
+
external_encoding.split(":", 2)
|
280
|
+
end
|
281
|
+
|
282
|
+
def parse_bom?(encoding)
|
283
|
+
encoding.is_a?(String) && /^BOM\|/i.match?(encoding)
|
284
|
+
end
|
285
|
+
|
286
|
+
def parse_bom
|
287
|
+
return nil unless pos.zero?
|
288
|
+
walk_bom_tree(BOM_TREE)
|
289
|
+
end
|
290
|
+
|
291
|
+
def find_encoding(encoding, if_nil:, if_unsupported:)
|
292
|
+
return if_nil if encoding.nil? || encoding == ""
|
293
|
+
Encoding.find(encoding)
|
294
|
+
rescue ArgumentError
|
295
|
+
warn "warning: encoding #{encoding} unsupported, defaulting to #{if_unsupported}"
|
296
|
+
if_unsupported
|
297
|
+
end
|
298
|
+
|
299
|
+
def encode(data)
|
300
|
+
data.force_encoding @external_encoding
|
301
|
+
data.encode! @internal_encoding, @encoding_options if @internal_encoding
|
302
|
+
data
|
303
|
+
end
|
304
|
+
|
305
|
+
def relativize(amount, mode)
|
306
|
+
case mode
|
307
|
+
when :CUR, IO::SEEK_CUR then amount
|
308
|
+
when :SET, IO::SEEK_SET then amount - @pos
|
309
|
+
when :END, IO::SEEK_END then @header.size + amount - @pos
|
310
|
+
else raise ArgumentError, "unknown seek mode #{mode.inspect}, expected :CUR, :END, or :SET (or IO::SEEK_*)"
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def undo_getc_attempt(char, min_char_size)
|
315
|
+
char.slice!(min_char_size..-1).bytes.reverse_each do |byte|
|
316
|
+
ungetbyte byte
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
def check_not_closed!
|
321
|
+
raise IOError, "closed stream" if closed?
|
322
|
+
end
|
323
|
+
|
324
|
+
def check_not_eof!
|
325
|
+
raise EOFError, "end of file reached" if eof?
|
326
|
+
end
|
327
|
+
|
328
|
+
def seekable?
|
329
|
+
return @seekable if defined?(@seekable)
|
330
|
+
@seekable = @io.respond_to?(:seek) && begin
|
331
|
+
@io.pos
|
332
|
+
true
|
333
|
+
rescue Errno::ESPIPE
|
334
|
+
false
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
def check_seekable!
|
339
|
+
raise SeekNotSupported, "seek not supported by #{@io}" unless seekable?
|
340
|
+
end
|
341
|
+
|
342
|
+
def walk_bom_tree((tree, encoding))
|
343
|
+
byte = getbyte
|
344
|
+
found_encoding = walk_bom_tree(tree[byte]) if tree.key?(byte)
|
345
|
+
ungetbyte byte unless found_encoding
|
346
|
+
found_encoding || encoding
|
347
|
+
end
|
348
|
+
|
349
|
+
BOM_TREE = {
|
350
|
+
0x00 => { 0x00 => { 0xFE => { 0xFF => [{}, Encoding::UTF_32BE] } } },
|
351
|
+
0xEF => { 0xBB => { 0xBF => [{}, Encoding::UTF_8] } },
|
352
|
+
0xFE => { 0xFF => [{}, Encoding::UTF_16BE] },
|
353
|
+
0xFF => { 0xFE => [{ 0x00 => { 0x00 => [{}, Encoding::UTF_32LE] } }, Encoding::UTF_16LE] }
|
354
|
+
}.freeze
|
355
|
+
private_constant :BOM_TREE
|
356
|
+
end
|
357
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "English"
|
4
|
+
require "tar/backports"
|
5
|
+
|
6
|
+
module Tar
|
7
|
+
class FileReader
|
8
|
+
class Line
|
9
|
+
using Backports
|
10
|
+
|
11
|
+
def initialize(file, *args)
|
12
|
+
@file = file
|
13
|
+
@skip = nil
|
14
|
+
|
15
|
+
case args.size
|
16
|
+
when 0
|
17
|
+
use_default_separator
|
18
|
+
use_default_limit
|
19
|
+
when 1
|
20
|
+
extract_separator_or_limit(*args)
|
21
|
+
when 2
|
22
|
+
extract_separator(args.first)
|
23
|
+
extract_limit(args.last)
|
24
|
+
else
|
25
|
+
raise ArgumentError, "wrong number of arguments (given #{args.size}, expected 0..2)", drop_internal_frames(caller)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def read
|
30
|
+
return @file.read if read_to_eof?
|
31
|
+
|
32
|
+
skip_newlines if @skip
|
33
|
+
line = read_line
|
34
|
+
skip_newlines if @skip
|
35
|
+
line
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def skip_newlines
|
41
|
+
until @file.eof?
|
42
|
+
char = @file.getc
|
43
|
+
if char != @skip
|
44
|
+
@file.ungetc char
|
45
|
+
break
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def read_line
|
51
|
+
line = String.new(encoding: encoding)
|
52
|
+
line << @file.getc until stop_reading?(line)
|
53
|
+
@file.lineno += 1 if reached_separator?(line)
|
54
|
+
line
|
55
|
+
end
|
56
|
+
|
57
|
+
def encoding
|
58
|
+
@file.internal_encoding || @file.external_encoding
|
59
|
+
end
|
60
|
+
|
61
|
+
def drop_internal_frames(stacktrace)
|
62
|
+
stacktrace.drop_while { |frame| frame.include?("in `new'") }
|
63
|
+
end
|
64
|
+
|
65
|
+
def use_default_separator
|
66
|
+
@separator = $INPUT_RECORD_SEPARATOR
|
67
|
+
end
|
68
|
+
|
69
|
+
def use_default_limit
|
70
|
+
@limit = nil
|
71
|
+
end
|
72
|
+
|
73
|
+
def extract_separator_or_limit(separator_or_limit)
|
74
|
+
if separator_or_limit.respond_to?(:to_int)
|
75
|
+
use_default_separator
|
76
|
+
extract_limit(separator_or_limit)
|
77
|
+
else
|
78
|
+
extract_separator(separator_or_limit)
|
79
|
+
use_default_limit
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def extract_separator(separator)
|
84
|
+
case separator
|
85
|
+
when nil
|
86
|
+
@separator = nil
|
87
|
+
when ""
|
88
|
+
@separator = "\n\n".encode(encoding)
|
89
|
+
@skip = "\n".encode(encoding)
|
90
|
+
else
|
91
|
+
@separator = separator.to_str.encode(encoding)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def extract_limit(limit)
|
96
|
+
if limit.nil?
|
97
|
+
use_default_limit
|
98
|
+
else
|
99
|
+
@limit = limit.to_int
|
100
|
+
use_default_limit if @limit.negative?
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def read_to_eof?
|
105
|
+
@separator.nil? && @limit.nil?
|
106
|
+
end
|
107
|
+
|
108
|
+
def stop_reading?(line)
|
109
|
+
reached_separator?(line) || reached_limit?(line) || @file.eof?
|
110
|
+
end
|
111
|
+
|
112
|
+
def reached_separator?(line)
|
113
|
+
@separator && line.end_with?(@separator)
|
114
|
+
end
|
115
|
+
|
116
|
+
def reached_limit?(line)
|
117
|
+
@limit && line.bytesize >= @limit
|
118
|
+
end
|
119
|
+
end
|
120
|
+
private_constant :Line
|
121
|
+
end
|
122
|
+
end
|