tar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rubocop.yml +40 -0
- data/.ruby-version +1 -0
- data/.travis.yml +8 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +68 -0
- data/Rakefile +16 -0
- data/bin/console +8 -0
- data/bin/setup +6 -0
- data/lib/char_size.rb +123 -0
- data/lib/tar.rb +4 -0
- data/lib/tar/backports.rb +51 -0
- data/lib/tar/error.rb +11 -0
- data/lib/tar/file_reader.rb +357 -0
- data/lib/tar/file_reader/line.rb +122 -0
- data/lib/tar/header.rb +58 -0
- data/lib/tar/header_reader.rb +37 -0
- data/lib/tar/polyfills.rb +14 -0
- data/lib/tar/reader.rb +30 -0
- data/lib/tar/schema.rb +87 -0
- data/lib/tar/ustar.rb +27 -0
- data/lib/tar/version.rb +5 -0
- data/tar.gemspec +28 -0
- metadata +139 -0
data/lib/tar/error.rb
ADDED
@@ -0,0 +1,357 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "char_size"
|
4
|
+
require "tar/backports"
|
5
|
+
require "tar/file_reader/line"
|
6
|
+
require "tar/polyfills"
|
7
|
+
require "tar/ustar"
|
8
|
+
|
9
|
+
module Tar
|
10
|
+
class FileReader
|
11
|
+
include Enumerable
|
12
|
+
using Backports
|
13
|
+
using Polyfills
|
14
|
+
|
15
|
+
attr_reader :header
|
16
|
+
|
17
|
+
def initialize(header, io, external_encoding: Encoding.default_external, internal_encoding: Encoding.default_internal, **encoding_options)
|
18
|
+
@header = header
|
19
|
+
@io = io
|
20
|
+
@closed = false
|
21
|
+
@lineno = 0
|
22
|
+
@pos = 0
|
23
|
+
set_encoding external_encoding, internal_encoding, **encoding_options
|
24
|
+
end
|
25
|
+
|
26
|
+
def close
|
27
|
+
@closed = true
|
28
|
+
end
|
29
|
+
|
30
|
+
def closed?
|
31
|
+
@closed || @io.closed?
|
32
|
+
end
|
33
|
+
|
34
|
+
def eof?
|
35
|
+
check_not_closed!
|
36
|
+
@pos >= @header.size
|
37
|
+
end
|
38
|
+
alias eof eof?
|
39
|
+
|
40
|
+
def pos
|
41
|
+
check_not_closed!
|
42
|
+
@pos
|
43
|
+
end
|
44
|
+
alias tell pos
|
45
|
+
|
46
|
+
def pos=(new_pos)
|
47
|
+
seek new_pos
|
48
|
+
end
|
49
|
+
|
50
|
+
def pending
|
51
|
+
check_not_closed!
|
52
|
+
[0, @header.size - @pos].max
|
53
|
+
end
|
54
|
+
|
55
|
+
def lineno
|
56
|
+
check_not_closed!
|
57
|
+
@lineno
|
58
|
+
end
|
59
|
+
|
60
|
+
def lineno=(new_lineno)
|
61
|
+
check_not_closed!
|
62
|
+
@lineno = new_lineno
|
63
|
+
end
|
64
|
+
|
65
|
+
def read(length = nil, buffer = nil)
|
66
|
+
check_not_closed!
|
67
|
+
|
68
|
+
data = @io.read(truncate(length), buffer)
|
69
|
+
@pos += data.bytesize
|
70
|
+
|
71
|
+
if length.nil?
|
72
|
+
encode(data)
|
73
|
+
else
|
74
|
+
data.force_encoding(Encoding::BINARY)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def readpartial(max_length, buffer = nil)
|
79
|
+
check_not_closed!
|
80
|
+
|
81
|
+
data = @io.readpartial(truncate(max_length), buffer)
|
82
|
+
@pos += data.bytesize
|
83
|
+
data.force_encoding(Encoding::BINARY)
|
84
|
+
end
|
85
|
+
|
86
|
+
def skip_to_next_record
|
87
|
+
check_not_closed!
|
88
|
+
|
89
|
+
target_pos = USTAR.records_size(@header.size)
|
90
|
+
|
91
|
+
if seekable?
|
92
|
+
seek target_pos
|
93
|
+
else
|
94
|
+
@io.read(target_pos - @pos)
|
95
|
+
@pos = target_pos
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def external_encoding
|
100
|
+
check_not_closed!
|
101
|
+
@external_encoding
|
102
|
+
end
|
103
|
+
|
104
|
+
def internal_encoding
|
105
|
+
check_not_closed!
|
106
|
+
@internal_encoding
|
107
|
+
end
|
108
|
+
|
109
|
+
def set_encoding(external_encoding, *internal_encoding, **encoding_options)
|
110
|
+
check_not_closed!
|
111
|
+
|
112
|
+
external_encoding, internal_encoding = extract_encodings(external_encoding, *internal_encoding)
|
113
|
+
|
114
|
+
if parse_bom?(external_encoding)
|
115
|
+
external_encoding = parse_bom || external_encoding[4..-1]
|
116
|
+
end
|
117
|
+
|
118
|
+
@external_encoding = find_encoding(external_encoding, if_nil: Encoding.default_external, if_unsupported: Encoding.default_external)
|
119
|
+
@internal_encoding = find_encoding(internal_encoding, if_nil: nil, if_unsupported: Encoding.default_internal)
|
120
|
+
@encoding_options = encoding_options
|
121
|
+
end
|
122
|
+
|
123
|
+
def binmode
|
124
|
+
set_encoding Encoding::BINARY
|
125
|
+
end
|
126
|
+
|
127
|
+
def binmode?
|
128
|
+
check_not_closed!
|
129
|
+
@external_encoding == Encoding::BINARY && @internal_encoding.nil?
|
130
|
+
end
|
131
|
+
|
132
|
+
def tty?
|
133
|
+
check_not_closed!
|
134
|
+
@io.respond_to?(:tty?) && @io.tty?
|
135
|
+
end
|
136
|
+
alias isatty tty?
|
137
|
+
|
138
|
+
def seek(amount, mode = IO::SEEK_SET)
|
139
|
+
check_seekable!
|
140
|
+
check_not_closed!
|
141
|
+
offset = relativize(amount, mode)
|
142
|
+
@io.seek offset, IO::SEEK_CUR
|
143
|
+
@pos += offset
|
144
|
+
end
|
145
|
+
|
146
|
+
def rewind
|
147
|
+
seek 0
|
148
|
+
@lineno = 0
|
149
|
+
end
|
150
|
+
|
151
|
+
def getbyte
|
152
|
+
check_not_closed!
|
153
|
+
return nil if eof?
|
154
|
+
@pos += 1
|
155
|
+
@io.getbyte
|
156
|
+
end
|
157
|
+
|
158
|
+
def ungetbyte(byte)
|
159
|
+
check_not_closed!
|
160
|
+
@pos -= 1
|
161
|
+
@io.ungetbyte byte
|
162
|
+
end
|
163
|
+
|
164
|
+
def readbyte
|
165
|
+
check_not_closed!
|
166
|
+
check_not_eof!
|
167
|
+
getbyte
|
168
|
+
end
|
169
|
+
|
170
|
+
def each_byte
|
171
|
+
check_not_closed!
|
172
|
+
return to_enum(__method__) unless block_given?
|
173
|
+
yield getbyte until eof?
|
174
|
+
end
|
175
|
+
|
176
|
+
def bytes(&block)
|
177
|
+
warn "warning: #{self.class}#bytes is deprecated; use #each_byte instead"
|
178
|
+
each_byte(&block)
|
179
|
+
end
|
180
|
+
|
181
|
+
def getc
|
182
|
+
check_not_closed!
|
183
|
+
return nil if eof?
|
184
|
+
|
185
|
+
char = String.new(encoding: Encoding::BINARY)
|
186
|
+
min_char_size, max_char_size = CharSize.minmax(external_encoding)
|
187
|
+
|
188
|
+
until char.size == max_char_size || eof?
|
189
|
+
char << read(min_char_size)
|
190
|
+
|
191
|
+
char.force_encoding external_encoding
|
192
|
+
return encode(char) if char.valid_encoding?
|
193
|
+
char.force_encoding Encoding::BINARY
|
194
|
+
end
|
195
|
+
|
196
|
+
undo_getc_attempt char, min_char_size
|
197
|
+
|
198
|
+
encode(char)
|
199
|
+
end
|
200
|
+
|
201
|
+
def ungetc(char)
|
202
|
+
char.encode(external_encoding).bytes.reverse_each do |byte|
|
203
|
+
ungetbyte byte
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def readchar
|
208
|
+
check_not_closed!
|
209
|
+
check_not_eof!
|
210
|
+
getc
|
211
|
+
end
|
212
|
+
|
213
|
+
def each_char
|
214
|
+
check_not_closed!
|
215
|
+
return to_enum(__method__) unless block_given?
|
216
|
+
yield getc until eof?
|
217
|
+
end
|
218
|
+
|
219
|
+
def chars(&block)
|
220
|
+
warn "warning: #{self.class}#chars is deprecated; use #each_char instead"
|
221
|
+
each_char(&block)
|
222
|
+
end
|
223
|
+
|
224
|
+
def each_codepoint
|
225
|
+
check_not_closed!
|
226
|
+
return to_enum(__method__) unless block_given?
|
227
|
+
each_char do |char|
|
228
|
+
char.each_codepoint do |codepoint|
|
229
|
+
yield codepoint
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
def codepoints(&block)
|
235
|
+
warn "warning: #{self.class}#codepoints is deprecated; use #each_codepoint instead"
|
236
|
+
each_codepoint(&block)
|
237
|
+
end
|
238
|
+
|
239
|
+
def gets(*args)
|
240
|
+
line = Line.new(self, *args)
|
241
|
+
check_not_closed!
|
242
|
+
return nil if eof?
|
243
|
+
line.read
|
244
|
+
end
|
245
|
+
|
246
|
+
def readline(*args)
|
247
|
+
line = Line.new(self, *args)
|
248
|
+
check_not_closed!
|
249
|
+
check_not_eof!
|
250
|
+
line.read
|
251
|
+
end
|
252
|
+
|
253
|
+
def each_line(*args)
|
254
|
+
line = Line.new(self, *args)
|
255
|
+
check_not_closed!
|
256
|
+
return to_enum(__method__, *args) unless block_given?
|
257
|
+
yield line.read until eof?
|
258
|
+
end
|
259
|
+
alias each each_line
|
260
|
+
|
261
|
+
def lines(*args, &block)
|
262
|
+
warn "warning: #{self.class}#lines is deprecated; use #each_line instead"
|
263
|
+
each_line(*args, &block)
|
264
|
+
end
|
265
|
+
|
266
|
+
def readlines(*args)
|
267
|
+
each_line(*args).to_a
|
268
|
+
end
|
269
|
+
|
270
|
+
private
|
271
|
+
|
272
|
+
def truncate(length)
|
273
|
+
[pending, length].compact.min
|
274
|
+
end
|
275
|
+
|
276
|
+
def extract_encodings(external_encoding, *internal_encoding)
|
277
|
+
raise ArgumentError, "wrong number of arguments (given #{internal_encoding.size + 1}, expected 1..2)" if internal_encoding.size > 1
|
278
|
+
return [external_encoding, *internal_encoding] if external_encoding.nil? || external_encoding.is_a?(Encoding) || !internal_encoding.empty?
|
279
|
+
external_encoding.split(":", 2)
|
280
|
+
end
|
281
|
+
|
282
|
+
def parse_bom?(encoding)
|
283
|
+
encoding.is_a?(String) && /^BOM\|/i.match?(encoding)
|
284
|
+
end
|
285
|
+
|
286
|
+
def parse_bom
|
287
|
+
return nil unless pos.zero?
|
288
|
+
walk_bom_tree(BOM_TREE)
|
289
|
+
end
|
290
|
+
|
291
|
+
def find_encoding(encoding, if_nil:, if_unsupported:)
|
292
|
+
return if_nil if encoding.nil? || encoding == ""
|
293
|
+
Encoding.find(encoding)
|
294
|
+
rescue ArgumentError
|
295
|
+
warn "warning: encoding #{encoding} unsupported, defaulting to #{if_unsupported}"
|
296
|
+
if_unsupported
|
297
|
+
end
|
298
|
+
|
299
|
+
def encode(data)
|
300
|
+
data.force_encoding @external_encoding
|
301
|
+
data.encode! @internal_encoding, @encoding_options if @internal_encoding
|
302
|
+
data
|
303
|
+
end
|
304
|
+
|
305
|
+
def relativize(amount, mode)
|
306
|
+
case mode
|
307
|
+
when :CUR, IO::SEEK_CUR then amount
|
308
|
+
when :SET, IO::SEEK_SET then amount - @pos
|
309
|
+
when :END, IO::SEEK_END then @header.size + amount - @pos
|
310
|
+
else raise ArgumentError, "unknown seek mode #{mode.inspect}, expected :CUR, :END, or :SET (or IO::SEEK_*)"
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def undo_getc_attempt(char, min_char_size)
|
315
|
+
char.slice!(min_char_size..-1).bytes.reverse_each do |byte|
|
316
|
+
ungetbyte byte
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
def check_not_closed!
|
321
|
+
raise IOError, "closed stream" if closed?
|
322
|
+
end
|
323
|
+
|
324
|
+
def check_not_eof!
|
325
|
+
raise EOFError, "end of file reached" if eof?
|
326
|
+
end
|
327
|
+
|
328
|
+
def seekable?
|
329
|
+
return @seekable if defined?(@seekable)
|
330
|
+
@seekable = @io.respond_to?(:seek) && begin
|
331
|
+
@io.pos
|
332
|
+
true
|
333
|
+
rescue Errno::ESPIPE
|
334
|
+
false
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
def check_seekable!
|
339
|
+
raise SeekNotSupported, "seek not supported by #{@io}" unless seekable?
|
340
|
+
end
|
341
|
+
|
342
|
+
def walk_bom_tree((tree, encoding))
|
343
|
+
byte = getbyte
|
344
|
+
found_encoding = walk_bom_tree(tree[byte]) if tree.key?(byte)
|
345
|
+
ungetbyte byte unless found_encoding
|
346
|
+
found_encoding || encoding
|
347
|
+
end
|
348
|
+
|
349
|
+
BOM_TREE = {
|
350
|
+
0x00 => { 0x00 => { 0xFE => { 0xFF => [{}, Encoding::UTF_32BE] } } },
|
351
|
+
0xEF => { 0xBB => { 0xBF => [{}, Encoding::UTF_8] } },
|
352
|
+
0xFE => { 0xFF => [{}, Encoding::UTF_16BE] },
|
353
|
+
0xFF => { 0xFE => [{ 0x00 => { 0x00 => [{}, Encoding::UTF_32LE] } }, Encoding::UTF_16LE] }
|
354
|
+
}.freeze
|
355
|
+
private_constant :BOM_TREE
|
356
|
+
end
|
357
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "English"
|
4
|
+
require "tar/backports"
|
5
|
+
|
6
|
+
module Tar
|
7
|
+
class FileReader
|
8
|
+
class Line
|
9
|
+
using Backports
|
10
|
+
|
11
|
+
def initialize(file, *args)
|
12
|
+
@file = file
|
13
|
+
@skip = nil
|
14
|
+
|
15
|
+
case args.size
|
16
|
+
when 0
|
17
|
+
use_default_separator
|
18
|
+
use_default_limit
|
19
|
+
when 1
|
20
|
+
extract_separator_or_limit(*args)
|
21
|
+
when 2
|
22
|
+
extract_separator(args.first)
|
23
|
+
extract_limit(args.last)
|
24
|
+
else
|
25
|
+
raise ArgumentError, "wrong number of arguments (given #{args.size}, expected 0..2)", drop_internal_frames(caller)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def read
|
30
|
+
return @file.read if read_to_eof?
|
31
|
+
|
32
|
+
skip_newlines if @skip
|
33
|
+
line = read_line
|
34
|
+
skip_newlines if @skip
|
35
|
+
line
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def skip_newlines
|
41
|
+
until @file.eof?
|
42
|
+
char = @file.getc
|
43
|
+
if char != @skip
|
44
|
+
@file.ungetc char
|
45
|
+
break
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def read_line
|
51
|
+
line = String.new(encoding: encoding)
|
52
|
+
line << @file.getc until stop_reading?(line)
|
53
|
+
@file.lineno += 1 if reached_separator?(line)
|
54
|
+
line
|
55
|
+
end
|
56
|
+
|
57
|
+
def encoding
|
58
|
+
@file.internal_encoding || @file.external_encoding
|
59
|
+
end
|
60
|
+
|
61
|
+
def drop_internal_frames(stacktrace)
|
62
|
+
stacktrace.drop_while { |frame| frame.include?("in `new'") }
|
63
|
+
end
|
64
|
+
|
65
|
+
def use_default_separator
|
66
|
+
@separator = $INPUT_RECORD_SEPARATOR
|
67
|
+
end
|
68
|
+
|
69
|
+
def use_default_limit
|
70
|
+
@limit = nil
|
71
|
+
end
|
72
|
+
|
73
|
+
def extract_separator_or_limit(separator_or_limit)
|
74
|
+
if separator_or_limit.respond_to?(:to_int)
|
75
|
+
use_default_separator
|
76
|
+
extract_limit(separator_or_limit)
|
77
|
+
else
|
78
|
+
extract_separator(separator_or_limit)
|
79
|
+
use_default_limit
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def extract_separator(separator)
|
84
|
+
case separator
|
85
|
+
when nil
|
86
|
+
@separator = nil
|
87
|
+
when ""
|
88
|
+
@separator = "\n\n".encode(encoding)
|
89
|
+
@skip = "\n".encode(encoding)
|
90
|
+
else
|
91
|
+
@separator = separator.to_str.encode(encoding)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def extract_limit(limit)
|
96
|
+
if limit.nil?
|
97
|
+
use_default_limit
|
98
|
+
else
|
99
|
+
@limit = limit.to_int
|
100
|
+
use_default_limit if @limit.negative?
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def read_to_eof?
|
105
|
+
@separator.nil? && @limit.nil?
|
106
|
+
end
|
107
|
+
|
108
|
+
def stop_reading?(line)
|
109
|
+
reached_separator?(line) || reached_limit?(line) || @file.eof?
|
110
|
+
end
|
111
|
+
|
112
|
+
def reached_separator?(line)
|
113
|
+
@separator && line.end_with?(@separator)
|
114
|
+
end
|
115
|
+
|
116
|
+
def reached_limit?(line)
|
117
|
+
@limit && line.bytesize >= @limit
|
118
|
+
end
|
119
|
+
end
|
120
|
+
private_constant :Line
|
121
|
+
end
|
122
|
+
end
|