tar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Tar
4
+ Error = Class.new(StandardError)
5
+
6
+ InvalidArchive = Class.new(Error)
7
+ ChecksumMismatch = Class.new(InvalidArchive)
8
+ UnexpectedEOF = Class.new(InvalidArchive)
9
+
10
+ SeekNotSupported = Class.new(Error)
11
+ end
@@ -0,0 +1,357 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "char_size"
4
+ require "tar/backports"
5
+ require "tar/file_reader/line"
6
+ require "tar/polyfills"
7
+ require "tar/ustar"
8
+
9
+ module Tar
10
+ class FileReader
11
+ include Enumerable
12
+ using Backports
13
+ using Polyfills
14
+
15
+ attr_reader :header
16
+
17
+ def initialize(header, io, external_encoding: Encoding.default_external, internal_encoding: Encoding.default_internal, **encoding_options)
18
+ @header = header
19
+ @io = io
20
+ @closed = false
21
+ @lineno = 0
22
+ @pos = 0
23
+ set_encoding external_encoding, internal_encoding, **encoding_options
24
+ end
25
+
26
+ def close
27
+ @closed = true
28
+ end
29
+
30
+ def closed?
31
+ @closed || @io.closed?
32
+ end
33
+
34
+ def eof?
35
+ check_not_closed!
36
+ @pos >= @header.size
37
+ end
38
+ alias eof eof?
39
+
40
+ def pos
41
+ check_not_closed!
42
+ @pos
43
+ end
44
+ alias tell pos
45
+
46
+ def pos=(new_pos)
47
+ seek new_pos
48
+ end
49
+
50
+ def pending
51
+ check_not_closed!
52
+ [0, @header.size - @pos].max
53
+ end
54
+
55
+ def lineno
56
+ check_not_closed!
57
+ @lineno
58
+ end
59
+
60
+ def lineno=(new_lineno)
61
+ check_not_closed!
62
+ @lineno = new_lineno
63
+ end
64
+
65
+ def read(length = nil, buffer = nil)
66
+ check_not_closed!
67
+
68
+ data = @io.read(truncate(length), buffer)
69
+ @pos += data.bytesize
70
+
71
+ if length.nil?
72
+ encode(data)
73
+ else
74
+ data.force_encoding(Encoding::BINARY)
75
+ end
76
+ end
77
+
78
+ def readpartial(max_length, buffer = nil)
79
+ check_not_closed!
80
+
81
+ data = @io.readpartial(truncate(max_length), buffer)
82
+ @pos += data.bytesize
83
+ data.force_encoding(Encoding::BINARY)
84
+ end
85
+
86
+ def skip_to_next_record
87
+ check_not_closed!
88
+
89
+ target_pos = USTAR.records_size(@header.size)
90
+
91
+ if seekable?
92
+ seek target_pos
93
+ else
94
+ @io.read(target_pos - @pos)
95
+ @pos = target_pos
96
+ end
97
+ end
98
+
99
+ def external_encoding
100
+ check_not_closed!
101
+ @external_encoding
102
+ end
103
+
104
+ def internal_encoding
105
+ check_not_closed!
106
+ @internal_encoding
107
+ end
108
+
109
+ def set_encoding(external_encoding, *internal_encoding, **encoding_options)
110
+ check_not_closed!
111
+
112
+ external_encoding, internal_encoding = extract_encodings(external_encoding, *internal_encoding)
113
+
114
+ if parse_bom?(external_encoding)
115
+ external_encoding = parse_bom || external_encoding[4..-1]
116
+ end
117
+
118
+ @external_encoding = find_encoding(external_encoding, if_nil: Encoding.default_external, if_unsupported: Encoding.default_external)
119
+ @internal_encoding = find_encoding(internal_encoding, if_nil: nil, if_unsupported: Encoding.default_internal)
120
+ @encoding_options = encoding_options
121
+ end
122
+
123
+ def binmode
124
+ set_encoding Encoding::BINARY
125
+ end
126
+
127
+ def binmode?
128
+ check_not_closed!
129
+ @external_encoding == Encoding::BINARY && @internal_encoding.nil?
130
+ end
131
+
132
+ def tty?
133
+ check_not_closed!
134
+ @io.respond_to?(:tty?) && @io.tty?
135
+ end
136
+ alias isatty tty?
137
+
138
+ def seek(amount, mode = IO::SEEK_SET)
139
+ check_seekable!
140
+ check_not_closed!
141
+ offset = relativize(amount, mode)
142
+ @io.seek offset, IO::SEEK_CUR
143
+ @pos += offset
144
+ end
145
+
146
+ def rewind
147
+ seek 0
148
+ @lineno = 0
149
+ end
150
+
151
+ def getbyte
152
+ check_not_closed!
153
+ return nil if eof?
154
+ @pos += 1
155
+ @io.getbyte
156
+ end
157
+
158
+ def ungetbyte(byte)
159
+ check_not_closed!
160
+ @pos -= 1
161
+ @io.ungetbyte byte
162
+ end
163
+
164
+ def readbyte
165
+ check_not_closed!
166
+ check_not_eof!
167
+ getbyte
168
+ end
169
+
170
+ def each_byte
171
+ check_not_closed!
172
+ return to_enum(__method__) unless block_given?
173
+ yield getbyte until eof?
174
+ end
175
+
176
+ def bytes(&block)
177
+ warn "warning: #{self.class}#bytes is deprecated; use #each_byte instead"
178
+ each_byte(&block)
179
+ end
180
+
181
+ def getc
182
+ check_not_closed!
183
+ return nil if eof?
184
+
185
+ char = String.new(encoding: Encoding::BINARY)
186
+ min_char_size, max_char_size = CharSize.minmax(external_encoding)
187
+
188
+ until char.size == max_char_size || eof?
189
+ char << read(min_char_size)
190
+
191
+ char.force_encoding external_encoding
192
+ return encode(char) if char.valid_encoding?
193
+ char.force_encoding Encoding::BINARY
194
+ end
195
+
196
+ undo_getc_attempt char, min_char_size
197
+
198
+ encode(char)
199
+ end
200
+
201
+ def ungetc(char)
202
+ char.encode(external_encoding).bytes.reverse_each do |byte|
203
+ ungetbyte byte
204
+ end
205
+ end
206
+
207
+ def readchar
208
+ check_not_closed!
209
+ check_not_eof!
210
+ getc
211
+ end
212
+
213
+ def each_char
214
+ check_not_closed!
215
+ return to_enum(__method__) unless block_given?
216
+ yield getc until eof?
217
+ end
218
+
219
+ def chars(&block)
220
+ warn "warning: #{self.class}#chars is deprecated; use #each_char instead"
221
+ each_char(&block)
222
+ end
223
+
224
+ def each_codepoint
225
+ check_not_closed!
226
+ return to_enum(__method__) unless block_given?
227
+ each_char do |char|
228
+ char.each_codepoint do |codepoint|
229
+ yield codepoint
230
+ end
231
+ end
232
+ end
233
+
234
+ def codepoints(&block)
235
+ warn "warning: #{self.class}#codepoints is deprecated; use #each_codepoint instead"
236
+ each_codepoint(&block)
237
+ end
238
+
239
+ def gets(*args)
240
+ line = Line.new(self, *args)
241
+ check_not_closed!
242
+ return nil if eof?
243
+ line.read
244
+ end
245
+
246
+ def readline(*args)
247
+ line = Line.new(self, *args)
248
+ check_not_closed!
249
+ check_not_eof!
250
+ line.read
251
+ end
252
+
253
+ def each_line(*args)
254
+ line = Line.new(self, *args)
255
+ check_not_closed!
256
+ return to_enum(__method__, *args) unless block_given?
257
+ yield line.read until eof?
258
+ end
259
+ alias each each_line
260
+
261
+ def lines(*args, &block)
262
+ warn "warning: #{self.class}#lines is deprecated; use #each_line instead"
263
+ each_line(*args, &block)
264
+ end
265
+
266
+ def readlines(*args)
267
+ each_line(*args).to_a
268
+ end
269
+
270
+ private
271
+
272
+ def truncate(length)
273
+ [pending, length].compact.min
274
+ end
275
+
276
+ def extract_encodings(external_encoding, *internal_encoding)
277
+ raise ArgumentError, "wrong number of arguments (given #{internal_encoding.size + 1}, expected 1..2)" if internal_encoding.size > 1
278
+ return [external_encoding, *internal_encoding] if external_encoding.nil? || external_encoding.is_a?(Encoding) || !internal_encoding.empty?
279
+ external_encoding.split(":", 2)
280
+ end
281
+
282
+ def parse_bom?(encoding)
283
+ encoding.is_a?(String) && /^BOM\|/i.match?(encoding)
284
+ end
285
+
286
+ def parse_bom
287
+ return nil unless pos.zero?
288
+ walk_bom_tree(BOM_TREE)
289
+ end
290
+
291
+ def find_encoding(encoding, if_nil:, if_unsupported:)
292
+ return if_nil if encoding.nil? || encoding == ""
293
+ Encoding.find(encoding)
294
+ rescue ArgumentError
295
+ warn "warning: encoding #{encoding} unsupported, defaulting to #{if_unsupported}"
296
+ if_unsupported
297
+ end
298
+
299
+ def encode(data)
300
+ data.force_encoding @external_encoding
301
+ data.encode! @internal_encoding, @encoding_options if @internal_encoding
302
+ data
303
+ end
304
+
305
+ def relativize(amount, mode)
306
+ case mode
307
+ when :CUR, IO::SEEK_CUR then amount
308
+ when :SET, IO::SEEK_SET then amount - @pos
309
+ when :END, IO::SEEK_END then @header.size + amount - @pos
310
+ else raise ArgumentError, "unknown seek mode #{mode.inspect}, expected :CUR, :END, or :SET (or IO::SEEK_*)"
311
+ end
312
+ end
313
+
314
+ def undo_getc_attempt(char, min_char_size)
315
+ char.slice!(min_char_size..-1).bytes.reverse_each do |byte|
316
+ ungetbyte byte
317
+ end
318
+ end
319
+
320
+ def check_not_closed!
321
+ raise IOError, "closed stream" if closed?
322
+ end
323
+
324
+ def check_not_eof!
325
+ raise EOFError, "end of file reached" if eof?
326
+ end
327
+
328
+ def seekable?
329
+ return @seekable if defined?(@seekable)
330
+ @seekable = @io.respond_to?(:seek) && begin
331
+ @io.pos
332
+ true
333
+ rescue Errno::ESPIPE
334
+ false
335
+ end
336
+ end
337
+
338
+ def check_seekable!
339
+ raise SeekNotSupported, "seek not supported by #{@io}" unless seekable?
340
+ end
341
+
342
+ def walk_bom_tree((tree, encoding))
343
+ byte = getbyte
344
+ found_encoding = walk_bom_tree(tree[byte]) if tree.key?(byte)
345
+ ungetbyte byte unless found_encoding
346
+ found_encoding || encoding
347
+ end
348
+
349
+ BOM_TREE = {
350
+ 0x00 => { 0x00 => { 0xFE => { 0xFF => [{}, Encoding::UTF_32BE] } } },
351
+ 0xEF => { 0xBB => { 0xBF => [{}, Encoding::UTF_8] } },
352
+ 0xFE => { 0xFF => [{}, Encoding::UTF_16BE] },
353
+ 0xFF => { 0xFE => [{ 0x00 => { 0x00 => [{}, Encoding::UTF_32LE] } }, Encoding::UTF_16LE] }
354
+ }.freeze
355
+ private_constant :BOM_TREE
356
+ end
357
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "English"
4
+ require "tar/backports"
5
+
6
+ module Tar
7
+ class FileReader
8
+ class Line
9
+ using Backports
10
+
11
+ def initialize(file, *args)
12
+ @file = file
13
+ @skip = nil
14
+
15
+ case args.size
16
+ when 0
17
+ use_default_separator
18
+ use_default_limit
19
+ when 1
20
+ extract_separator_or_limit(*args)
21
+ when 2
22
+ extract_separator(args.first)
23
+ extract_limit(args.last)
24
+ else
25
+ raise ArgumentError, "wrong number of arguments (given #{args.size}, expected 0..2)", drop_internal_frames(caller)
26
+ end
27
+ end
28
+
29
+ def read
30
+ return @file.read if read_to_eof?
31
+
32
+ skip_newlines if @skip
33
+ line = read_line
34
+ skip_newlines if @skip
35
+ line
36
+ end
37
+
38
+ private
39
+
40
+ def skip_newlines
41
+ until @file.eof?
42
+ char = @file.getc
43
+ if char != @skip
44
+ @file.ungetc char
45
+ break
46
+ end
47
+ end
48
+ end
49
+
50
+ def read_line
51
+ line = String.new(encoding: encoding)
52
+ line << @file.getc until stop_reading?(line)
53
+ @file.lineno += 1 if reached_separator?(line)
54
+ line
55
+ end
56
+
57
+ def encoding
58
+ @file.internal_encoding || @file.external_encoding
59
+ end
60
+
61
+ def drop_internal_frames(stacktrace)
62
+ stacktrace.drop_while { |frame| frame.include?("in `new'") }
63
+ end
64
+
65
+ def use_default_separator
66
+ @separator = $INPUT_RECORD_SEPARATOR
67
+ end
68
+
69
+ def use_default_limit
70
+ @limit = nil
71
+ end
72
+
73
+ def extract_separator_or_limit(separator_or_limit)
74
+ if separator_or_limit.respond_to?(:to_int)
75
+ use_default_separator
76
+ extract_limit(separator_or_limit)
77
+ else
78
+ extract_separator(separator_or_limit)
79
+ use_default_limit
80
+ end
81
+ end
82
+
83
+ def extract_separator(separator)
84
+ case separator
85
+ when nil
86
+ @separator = nil
87
+ when ""
88
+ @separator = "\n\n".encode(encoding)
89
+ @skip = "\n".encode(encoding)
90
+ else
91
+ @separator = separator.to_str.encode(encoding)
92
+ end
93
+ end
94
+
95
+ def extract_limit(limit)
96
+ if limit.nil?
97
+ use_default_limit
98
+ else
99
+ @limit = limit.to_int
100
+ use_default_limit if @limit.negative?
101
+ end
102
+ end
103
+
104
+ def read_to_eof?
105
+ @separator.nil? && @limit.nil?
106
+ end
107
+
108
+ def stop_reading?(line)
109
+ reached_separator?(line) || reached_limit?(line) || @file.eof?
110
+ end
111
+
112
+ def reached_separator?(line)
113
+ @separator && line.end_with?(@separator)
114
+ end
115
+
116
+ def reached_limit?(line)
117
+ @limit && line.bytesize >= @limit
118
+ end
119
+ end
120
+ private_constant :Line
121
+ end
122
+ end