scanf 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/scanf.rb +776 -0
- metadata +87 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bf672bd5e82d5d049b39df5c6a2533b4659851db
|
4
|
+
data.tar.gz: cd371a3e521a39447bd8acf2b307205eb08a2a82
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6fecc17f264ac5eafc4acb6e8aa8b1941f1f619bd32f120e2e9e1c6105bfdb436c1177503faa07d0945cddf20a4f164e634e63c4ffeace918742acb39d208425
|
7
|
+
data.tar.gz: 9d9b939de5f0148a737fc49bce4ca586f09406564bd5b62de045161eb29ef3e47ef40814f2db46a9c366c717ca1c5f9833dc03e72238a2cf25c1c58f0d2388c0
|
data/lib/scanf.rb
ADDED
@@ -0,0 +1,776 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
# scanf for Ruby
|
3
|
+
#
|
4
|
+
#--
|
5
|
+
# $Release Version: 1.1.2 $
|
6
|
+
# $Revision$
|
7
|
+
# $Id$
|
8
|
+
# $Author$
|
9
|
+
#++
|
10
|
+
#
|
11
|
+
# == Description
|
12
|
+
#
|
13
|
+
# scanf is an implementation of the C function scanf(3), modified as necessary
|
14
|
+
# for Ruby compatibility.
|
15
|
+
#
|
16
|
+
# the methods provided are String#scanf, IO#scanf, and
|
17
|
+
# Kernel#scanf. Kernel#scanf is a wrapper around STDIN.scanf. IO#scanf
|
18
|
+
# can be used on any IO stream, including file handles and sockets.
|
19
|
+
# scanf can be called either with or without a block.
|
20
|
+
#
|
21
|
+
# Scanf scans an input string or stream according to a <b>format</b>, as
|
22
|
+
# described below in Conversions, and returns an array of matches between
|
23
|
+
# the format and the input. The format is defined in a string, and is
|
24
|
+
# similar (though not identical) to the formats used in Kernel#printf and
|
25
|
+
# Kernel#sprintf.
|
26
|
+
#
|
27
|
+
# The format may contain <b>conversion specifiers</b>, which tell scanf
|
28
|
+
# what form (type) each particular matched substring should be converted
|
29
|
+
# to (e.g., decimal integer, floating point number, literal string,
|
30
|
+
# etc.) The matches and conversions take place from left to right, and
|
31
|
+
# the conversions themselves are returned as an array.
|
32
|
+
#
|
33
|
+
# The format string may also contain characters other than those in the
|
34
|
+
# conversion specifiers. White space (blanks, tabs, or newlines) in the
|
35
|
+
# format string matches any amount of white space, including none, in
|
36
|
+
# the input. Everything else matches only itself.
|
37
|
+
#
|
38
|
+
# Scanning stops, and scanf returns, when any input character fails to
|
39
|
+
# match the specifications in the format string, or when input is
|
40
|
+
# exhausted, or when everything in the format string has been
|
41
|
+
# matched. All matches found up to the stopping point are returned in
|
42
|
+
# the return array (or yielded to the block, if a block was given).
|
43
|
+
#
|
44
|
+
#
|
45
|
+
# == Basic usage
|
46
|
+
#
|
47
|
+
# require 'scanf'
|
48
|
+
#
|
49
|
+
# # String#scanf and IO#scanf take a single argument, the format string
|
50
|
+
# array = a_string.scanf("%d%s")
|
51
|
+
# array = an_io.scanf("%d%s")
|
52
|
+
#
|
53
|
+
# # Kernel#scanf reads from STDIN
|
54
|
+
# array = scanf("%d%s")
|
55
|
+
#
|
56
|
+
# == Block usage
|
57
|
+
#
|
58
|
+
# When called with a block, scanf keeps scanning the input, cycling back
|
59
|
+
# to the beginning of the format string, and yields a new array of
|
60
|
+
# conversions to the block every time the format string is matched
|
61
|
+
# (including partial matches, but not including complete failures). The
|
62
|
+
# actual return value of scanf when called with a block is an array
|
63
|
+
# containing the results of all the executions of the block.
|
64
|
+
#
|
65
|
+
# str = "123 abc 456 def 789 ghi"
|
66
|
+
# str.scanf("%d%s") { |num,str| [ num * 2, str.upcase ] }
|
67
|
+
# # => [[246, "ABC"], [912, "DEF"], [1578, "GHI"]]
|
68
|
+
#
|
69
|
+
# == Conversions
|
70
|
+
#
|
71
|
+
# The single argument to scanf is a format string, which generally
|
72
|
+
# includes one or more conversion specifiers. Conversion specifiers
|
73
|
+
# begin with the percent character ('%') and include information about
|
74
|
+
# what scanf should next scan for (string, decimal number, single
|
75
|
+
# character, etc.).
|
76
|
+
#
|
77
|
+
# There may be an optional maximum field width, expressed as a decimal
|
78
|
+
# integer, between the % and the conversion. If no width is given, a
|
79
|
+
# default of `infinity' is used (with the exception of the %c specifier;
|
80
|
+
# see below). Otherwise, given a field width of <em>n</em> for a given
|
81
|
+
# conversion, at most <em>n</em> characters are scanned in processing
|
82
|
+
# that conversion. Before conversion begins, most conversions skip
|
83
|
+
# white space in the input string; this white space is not counted
|
84
|
+
# against the field width.
|
85
|
+
#
|
86
|
+
# The following conversions are available.
|
87
|
+
#
|
88
|
+
# [%]
|
89
|
+
# Matches a literal `%'. That is, `%%' in the format string matches a
|
90
|
+
# single input `%' character. No conversion is done, and the resulting
|
91
|
+
# '%' is not included in the return array.
|
92
|
+
#
|
93
|
+
# [d]
|
94
|
+
# Matches an optionally signed decimal integer.
|
95
|
+
#
|
96
|
+
# [u]
|
97
|
+
# Same as d.
|
98
|
+
#
|
99
|
+
# [i]
|
100
|
+
# Matches an optionally signed integer. The integer is read in base
|
101
|
+
# 16 if it begins with `0x' or `0X', in base 8 if it begins with `0',
|
102
|
+
# and in base 10 other- wise. Only characters that correspond to the
|
103
|
+
# base are recognized.
|
104
|
+
#
|
105
|
+
# [o]
|
106
|
+
# Matches an optionally signed octal integer.
|
107
|
+
#
|
108
|
+
# [x, X]
|
109
|
+
# Matches an optionally signed hexadecimal integer,
|
110
|
+
#
|
111
|
+
# [a, e, f, g, A, E, F, G]
|
112
|
+
# Matches an optionally signed floating-point number.
|
113
|
+
#
|
114
|
+
# [s]
|
115
|
+
# Matches a sequence of non-white-space character. The input string stops at
|
116
|
+
# white space or at the maximum field width, whichever occurs first.
|
117
|
+
#
|
118
|
+
# [c]
|
119
|
+
# Matches a single character, or a sequence of <em>n</em> characters if a
|
120
|
+
# field width of <em>n</em> is specified. The usual skip of leading white
|
121
|
+
# space is suppressed. To skip white space first, use an explicit space in
|
122
|
+
# the format.
|
123
|
+
#
|
124
|
+
# [[]
|
125
|
+
# Matches a nonempty sequence of characters from the specified set
|
126
|
+
# of accepted characters. The usual skip of leading white space is
|
127
|
+
# suppressed. This bracketed sub-expression is interpreted exactly like a
|
128
|
+
# character class in a Ruby regular expression. (In fact, it is placed as-is
|
129
|
+
# in a regular expression.) The matching against the input string ends with
|
130
|
+
# the appearance of a character not in (or, with a circumflex, in) the set,
|
131
|
+
# or when the field width runs out, whichever comes first.
|
132
|
+
#
|
133
|
+
# === Assignment suppression
|
134
|
+
#
|
135
|
+
# To require that a particular match occur, but without including the result
|
136
|
+
# in the return array, place the <b>assignment suppression flag</b>, which is
|
137
|
+
# the star character ('*'), immediately after the leading '%' of a format
|
138
|
+
# specifier (just before the field width, if any).
|
139
|
+
#
|
140
|
+
# == scanf for Ruby compared with scanf in C
|
141
|
+
#
|
142
|
+
# scanf for Ruby is based on the C function scanf(3), but with modifications,
|
143
|
+
# dictated mainly by the underlying differences between the languages.
|
144
|
+
#
|
145
|
+
# === Unimplemented flags and specifiers
|
146
|
+
#
|
147
|
+
# * The only flag implemented in scanf for Ruby is '<tt>*</tt>' (ignore
|
148
|
+
# upcoming conversion). Many of the flags available in C versions of
|
149
|
+
# scanf(3) have to do with the type of upcoming pointer arguments, and are
|
150
|
+
# meaningless in Ruby.
|
151
|
+
#
|
152
|
+
# * The <tt>n</tt> specifier (store number of characters consumed so far in
|
153
|
+
# next pointer) is not implemented.
|
154
|
+
#
|
155
|
+
# * The <tt>p</tt> specifier (match a pointer value) is not implemented.
|
156
|
+
#
|
157
|
+
# === Altered specifiers
|
158
|
+
#
|
159
|
+
# [o, u, x, X]
|
160
|
+
# In scanf for Ruby, all of these specifiers scan for an optionally signed
|
161
|
+
# integer, rather than for an unsigned integer like their C counterparts.
|
162
|
+
#
|
163
|
+
# === Return values
|
164
|
+
#
|
165
|
+
# scanf for Ruby returns an array of successful conversions, whereas
|
166
|
+
# scanf(3) returns the number of conversions successfully
|
167
|
+
# completed. (See below for more details on scanf for Ruby's return
|
168
|
+
# values.)
|
169
|
+
#
|
170
|
+
# == Return values
|
171
|
+
#
|
172
|
+
# Without a block, scanf returns an array containing all the conversions
|
173
|
+
# it has found. If none are found, scanf will return an empty array. An
|
174
|
+
# unsuccessful match is never ignored, but rather always signals the end
|
175
|
+
# of the scanning operation. If the first unsuccessful match takes place
|
176
|
+
# after one or more successful matches have already taken place, the
|
177
|
+
# returned array will contain the results of those successful matches.
|
178
|
+
#
|
179
|
+
# With a block scanf returns a 'map'-like array of transformations from
|
180
|
+
# the block -- that is, an array reflecting what the block did with each
|
181
|
+
# yielded result from the iterative scanf operation. (See "Block
|
182
|
+
# usage", above.)
|
183
|
+
#
|
184
|
+
# == Current limitations and bugs
|
185
|
+
#
|
186
|
+
# When using IO#scanf under Windows, make sure you open your files in
|
187
|
+
# binary mode:
|
188
|
+
#
|
189
|
+
# File.open("filename", "rb")
|
190
|
+
#
|
191
|
+
# so that scanf can keep track of characters correctly.
|
192
|
+
#
|
193
|
+
# Support for character classes is reasonably complete (since it
|
194
|
+
# essentially piggy-backs on Ruby's regular expression handling of
|
195
|
+
# character classes), but users are advised that character class testing
|
196
|
+
# has not been exhaustive, and that they should exercise some caution
|
197
|
+
# in using any of the more complex and/or arcane character class
|
198
|
+
# idioms.
|
199
|
+
#
|
200
|
+
# == License and copyright
|
201
|
+
#
|
202
|
+
# Copyright:: (c) 2002-2003 David Alan Black
|
203
|
+
# License:: Distributed on the same licensing terms as Ruby itself
|
204
|
+
#
|
205
|
+
# == Warranty disclaimer
|
206
|
+
#
|
207
|
+
# This software is provided "as is" and without any express or implied
|
208
|
+
# warranties, including, without limitation, the implied warranties of
|
209
|
+
# merchantability and fitness for a particular purpose.
|
210
|
+
#
|
211
|
+
# == Credits and acknowledgements
|
212
|
+
#
|
213
|
+
# scanf was developed as the major activity of the Austin Ruby Codefest
|
214
|
+
# (Austin, Texas, August 2002).
|
215
|
+
#
|
216
|
+
# Principal author:: David Alan Black (mailto:dblack@superlink.net)
|
217
|
+
# Co-author:: Hal Fulton (mailto:hal9000@hypermetrics.com)
|
218
|
+
# Project contributors:: Nolan Darilek, Jason Johnston
|
219
|
+
#
|
220
|
+
# Thanks to Hal Fulton for hosting the Codefest.
|
221
|
+
#
|
222
|
+
# Thanks to Matz for suggestions about the class design.
|
223
|
+
#
|
224
|
+
# Thanks to Gavin Sinclair for some feedback on the documentation.
|
225
|
+
#
|
226
|
+
# The text for parts of this document, especially the Description and
|
227
|
+
# Conversions sections, above, were adapted from the Linux Programmer's
|
228
|
+
# Manual manpage for scanf(3), dated 1995-11-01.
|
229
|
+
#
|
230
|
+
# == Bugs and bug reports
|
231
|
+
#
|
232
|
+
# scanf for Ruby is based on something of an amalgam of C scanf
|
233
|
+
# implementations and documentation, rather than on a single canonical
|
234
|
+
# description. Suggestions for features and behaviors which appear in
|
235
|
+
# other scanfs, and would be meaningful in Ruby, are welcome, as are
|
236
|
+
# reports of suspicious behaviors and/or bugs. (Please see "Credits and
|
237
|
+
# acknowledgements", above, for email addresses.)
|
238
|
+
|
239
|
+
module Scanf
|
240
|
+
# :stopdoc:
|
241
|
+
|
242
|
+
# ==Technical notes
|
243
|
+
#
|
244
|
+
# ===Rationale behind scanf for Ruby
|
245
|
+
#
|
246
|
+
# The impetus for a scanf implementation in Ruby comes chiefly from the fact
|
247
|
+
# that existing pattern matching operations, such as Regexp#match and
|
248
|
+
# String#scan, return all results as strings, which have to be converted to
|
249
|
+
# integers or floats explicitly in cases where what's ultimately wanted are
|
250
|
+
# integer or float values.
|
251
|
+
#
|
252
|
+
# ===Design of scanf for Ruby
|
253
|
+
#
|
254
|
+
# scanf for Ruby is essentially a <format string>-to-<regular
|
255
|
+
# expression> converter.
|
256
|
+
#
|
257
|
+
# When scanf is called, a FormatString object is generated from the
|
258
|
+
# format string ("%d%s...") argument. The FormatString object breaks the
|
259
|
+
# format string down into atoms ("%d", "%5f", "blah", etc.), and from
|
260
|
+
# each atom it creates a FormatSpecifier object, which it
|
261
|
+
# saves.
|
262
|
+
#
|
263
|
+
# Each FormatSpecifier has a regular expression fragment and a "handler"
|
264
|
+
# associated with it. For example, the regular expression fragment
|
265
|
+
# associated with the format "%d" is "([-+]?\d+)", and the handler
|
266
|
+
# associated with it is a wrapper around String#to_i. scanf itself calls
|
267
|
+
# FormatString#match, passing in the input string. FormatString#match
|
268
|
+
# iterates through its FormatSpecifiers; for each one, it matches the
|
269
|
+
# corresponding regular expression fragment against the string. If
|
270
|
+
# there's a match, it sends the matched string to the handler associated
|
271
|
+
# with the FormatSpecifier.
|
272
|
+
#
|
273
|
+
# Thus, to follow up the "%d" example: if "123" occurs in the input
|
274
|
+
# string when a FormatSpecifier consisting of "%d" is reached, the "123"
|
275
|
+
# will be matched against "([-+]?\d+)", and the matched string will be
|
276
|
+
# rendered into an integer by a call to to_i.
|
277
|
+
#
|
278
|
+
# The rendered match is then saved to an accumulator array, and the
|
279
|
+
# input string is reduced to the post-match substring. Thus the string
|
280
|
+
# is "eaten" from the left as the FormatSpecifiers are applied in
|
281
|
+
# sequence. (This is done to a duplicate string; the original string is
|
282
|
+
# not altered.)
|
283
|
+
#
|
284
|
+
# As soon as a regular expression fragment fails to match the string, or
|
285
|
+
# when the FormatString object runs out of FormatSpecifiers, scanning
|
286
|
+
# stops and results accumulated so far are returned in an array.
|
287
|
+
|
288
|
+
class FormatSpecifier
|
289
|
+
|
290
|
+
attr_reader :re_string, :matched_string, :conversion, :matched
|
291
|
+
|
292
|
+
private
|
293
|
+
|
294
|
+
def skip; /^\s*%\*/.match(@spec_string); end
|
295
|
+
|
296
|
+
def extract_float(s)
|
297
|
+
return nil unless s &&! skip
|
298
|
+
if /\A(?<sign>[-+]?)0[xX](?<frac>\.\h+|\h+(?:\.\h*)?)[pP](?<exp>[-+]\d+)/ =~ s
|
299
|
+
f1, f2 = frac.split('.')
|
300
|
+
f = f1.hex
|
301
|
+
if f2
|
302
|
+
len = f2.length
|
303
|
+
if len > 0
|
304
|
+
f += f2.hex / (16.0 ** len)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
(sign == ?- ? -1 : 1) * Math.ldexp(f, exp.to_i)
|
308
|
+
elsif /\A([-+]?\d+)\.([eE][-+]\d+)/ =~ s
|
309
|
+
($1 << $2).to_f
|
310
|
+
else
|
311
|
+
s.to_f
|
312
|
+
end
|
313
|
+
end
|
314
|
+
def extract_decimal(s); s.to_i if s &&! skip; end
|
315
|
+
def extract_hex(s); s.hex if s &&! skip; end
|
316
|
+
def extract_octal(s); s.oct if s &&! skip; end
|
317
|
+
def extract_integer(s); Integer(s) if s &&! skip; end
|
318
|
+
def extract_plain(s); s unless skip; end
|
319
|
+
|
320
|
+
def nil_proc(s); nil; end
|
321
|
+
|
322
|
+
public
|
323
|
+
|
324
|
+
def to_s
|
325
|
+
@spec_string
|
326
|
+
end
|
327
|
+
|
328
|
+
def count_space?
|
329
|
+
/(?:\A|\S)%\*?\d*c|%\d*\[/.match(@spec_string)
|
330
|
+
end
|
331
|
+
|
332
|
+
def initialize(str)
|
333
|
+
@spec_string = str
|
334
|
+
h = '[A-Fa-f0-9]'
|
335
|
+
|
336
|
+
@re_string, @handler =
|
337
|
+
case @spec_string
|
338
|
+
|
339
|
+
# %[[:...:]]
|
340
|
+
when /%\*?(\[\[:[a-z]+:\]\])/
|
341
|
+
[ "(#{$1}+)", :extract_plain ]
|
342
|
+
|
343
|
+
# %5[[:...:]]
|
344
|
+
when /%\*?(\d+)(\[\[:[a-z]+:\]\])/
|
345
|
+
[ "(#{$2}{1,#{$1}})", :extract_plain ]
|
346
|
+
|
347
|
+
# %[...]
|
348
|
+
when /%\*?\[([^\]]*)\]/
|
349
|
+
yes = $1
|
350
|
+
if /^\^/.match(yes) then no = yes[1..-1] else no = '^' + yes end
|
351
|
+
[ "([#{yes}]+)(?=[#{no}]|\\z)", :extract_plain ]
|
352
|
+
|
353
|
+
# %5[...]
|
354
|
+
when /%\*?(\d+)\[([^\]]*)\]/
|
355
|
+
yes = $2
|
356
|
+
w = $1
|
357
|
+
[ "([#{yes}]{1,#{w}})", :extract_plain ]
|
358
|
+
|
359
|
+
# %i
|
360
|
+
when /%\*?i/
|
361
|
+
[ "([-+]?(?:(?:0[0-7]+)|(?:0[Xx]#{h}+)|(?:[1-9]\\d*)))", :extract_integer ]
|
362
|
+
|
363
|
+
# %5i
|
364
|
+
when /%\*?(\d+)i/
|
365
|
+
n = $1.to_i
|
366
|
+
s = "("
|
367
|
+
if n > 1 then s += "[1-9]\\d{1,#{n-1}}|" end
|
368
|
+
if n > 1 then s += "0[0-7]{1,#{n-1}}|" end
|
369
|
+
if n > 2 then s += "[-+]0[0-7]{1,#{n-2}}|" end
|
370
|
+
if n > 2 then s += "[-+][1-9]\\d{1,#{n-2}}|" end
|
371
|
+
if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end
|
372
|
+
if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end
|
373
|
+
s += "\\d"
|
374
|
+
s += ")"
|
375
|
+
[ s, :extract_integer ]
|
376
|
+
|
377
|
+
# %d, %u
|
378
|
+
when /%\*?[du]/
|
379
|
+
[ '([-+]?\d+)', :extract_decimal ]
|
380
|
+
|
381
|
+
# %5d, %5u
|
382
|
+
when /%\*?(\d+)[du]/
|
383
|
+
n = $1.to_i
|
384
|
+
s = "("
|
385
|
+
if n > 1 then s += "[-+]\\d{1,#{n-1}}|" end
|
386
|
+
s += "\\d{1,#{$1}})"
|
387
|
+
[ s, :extract_decimal ]
|
388
|
+
|
389
|
+
# %x
|
390
|
+
when /%\*?[Xx]/
|
391
|
+
[ "([-+]?(?:0[Xx])?#{h}+)", :extract_hex ]
|
392
|
+
|
393
|
+
# %5x
|
394
|
+
when /%\*?(\d+)[Xx]/
|
395
|
+
n = $1.to_i
|
396
|
+
s = "("
|
397
|
+
if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end
|
398
|
+
if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end
|
399
|
+
if n > 1 then s += "[-+]#{h}{1,#{n-1}}|" end
|
400
|
+
s += "#{h}{1,#{n}}"
|
401
|
+
s += ")"
|
402
|
+
[ s, :extract_hex ]
|
403
|
+
|
404
|
+
# %o
|
405
|
+
when /%\*?o/
|
406
|
+
[ '([-+]?[0-7]+)', :extract_octal ]
|
407
|
+
|
408
|
+
# %5o
|
409
|
+
when /%\*?(\d+)o/
|
410
|
+
[ "([-+][0-7]{1,#{$1.to_i-1}}|[0-7]{1,#{$1}})", :extract_octal ]
|
411
|
+
|
412
|
+
# %f
|
413
|
+
when /%\*?[aefgAEFG]/
|
414
|
+
[ '([-+]?(?:0[xX](?:\.\h+|\h+(?:\.\h*)?)[pP][-+]\d+|\d+(?![\d.])|\d*\.\d*(?:[eE][-+]?\d+)?))', :extract_float ]
|
415
|
+
|
416
|
+
# %5f
|
417
|
+
when /%\*?(\d+)[aefgAEFG]/
|
418
|
+
[ '(?=[-+]?(?:0[xX](?:\.\h+|\h+(?:\.\h*)?)[pP][-+]\d+|\d+(?![\d.])|\d*\.\d*(?:[eE][-+]?\d+)?))' +
|
419
|
+
"(\\S{1,#{$1}})", :extract_float ]
|
420
|
+
|
421
|
+
# %5s
|
422
|
+
when /%\*?(\d+)s/
|
423
|
+
[ "(\\S{1,#{$1}})", :extract_plain ]
|
424
|
+
|
425
|
+
# %s
|
426
|
+
when /%\*?s/
|
427
|
+
[ '(\S+)', :extract_plain ]
|
428
|
+
|
429
|
+
# %c
|
430
|
+
when /\s%\*?c/
|
431
|
+
[ "\\s*(.)", :extract_plain ]
|
432
|
+
|
433
|
+
# %c
|
434
|
+
when /%\*?c/
|
435
|
+
[ "(.)", :extract_plain ]
|
436
|
+
|
437
|
+
# %5c (whitespace issues are handled by the count_*_space? methods)
|
438
|
+
when /%\*?(\d+)c/
|
439
|
+
[ "(.{1,#{$1}})", :extract_plain ]
|
440
|
+
|
441
|
+
# %%
|
442
|
+
when /%%/
|
443
|
+
[ '(\s*%)', :nil_proc ]
|
444
|
+
|
445
|
+
# literal characters
|
446
|
+
else
|
447
|
+
[ "(#{Regexp.escape(@spec_string)})", :nil_proc ]
|
448
|
+
end
|
449
|
+
|
450
|
+
@re_string = '\A' + @re_string
|
451
|
+
end
|
452
|
+
|
453
|
+
def to_re
|
454
|
+
Regexp.new(@re_string,Regexp::MULTILINE)
|
455
|
+
end
|
456
|
+
|
457
|
+
def match(str)
|
458
|
+
@matched = false
|
459
|
+
s = str.dup
|
460
|
+
s.sub!(/\A\s+/,'') unless count_space?
|
461
|
+
res = to_re.match(s)
|
462
|
+
if res
|
463
|
+
@conversion = send(@handler, res[1])
|
464
|
+
@matched_string = @conversion.to_s
|
465
|
+
@matched = true
|
466
|
+
end
|
467
|
+
res
|
468
|
+
end
|
469
|
+
|
470
|
+
def letter
|
471
|
+
@spec_string[/%\*?\d*([a-z\[])/, 1]
|
472
|
+
end
|
473
|
+
|
474
|
+
def width
|
475
|
+
@spec_string[/%\*?(\d+)/, 1]&.to_i
|
476
|
+
end
|
477
|
+
|
478
|
+
def mid_match?
|
479
|
+
return false unless @matched
|
480
|
+
cc_no_width = letter == '[' &&! width
|
481
|
+
c_or_cc_width = (letter == 'c' || letter == '[') && width
|
482
|
+
width_left = c_or_cc_width && (matched_string.size < width)
|
483
|
+
|
484
|
+
return width_left || cc_no_width
|
485
|
+
end
|
486
|
+
|
487
|
+
end
|
488
|
+
|
489
|
+
class FormatString
|
490
|
+
|
491
|
+
attr_reader :string_left, :last_spec_tried,
|
492
|
+
:last_match_tried, :matched_count, :space
|
493
|
+
|
494
|
+
SPECIFIERS = 'diuXxofFeEgGscaA'
|
495
|
+
REGEX = /
|
496
|
+
# possible space, followed by...
|
497
|
+
(?:\s*
|
498
|
+
# percent sign, followed by...
|
499
|
+
%
|
500
|
+
# another percent sign, or...
|
501
|
+
(?:%|
|
502
|
+
# optional assignment suppression flag
|
503
|
+
\*?
|
504
|
+
# optional maximum field width
|
505
|
+
\d*
|
506
|
+
# named character class, ...
|
507
|
+
(?:\[\[:\w+:\]\]|
|
508
|
+
# traditional character class, or...
|
509
|
+
\[[^\]]*\]|
|
510
|
+
# specifier letter.
|
511
|
+
[#{SPECIFIERS}])))|
|
512
|
+
# or miscellaneous characters
|
513
|
+
[^%\s]+/ix
|
514
|
+
|
515
|
+
def initialize(str)
|
516
|
+
@specs = []
|
517
|
+
@i = 1
|
518
|
+
s = str.to_s
|
519
|
+
return unless /\S/.match(s)
|
520
|
+
@space = true if /\s\z/.match(s)
|
521
|
+
@specs.replace s.scan(REGEX).map {|spec| FormatSpecifier.new(spec) }
|
522
|
+
end
|
523
|
+
|
524
|
+
def to_s
|
525
|
+
@specs.join('')
|
526
|
+
end
|
527
|
+
|
528
|
+
def prune(n=matched_count)
|
529
|
+
n.times { @specs.shift }
|
530
|
+
end
|
531
|
+
|
532
|
+
def spec_count
|
533
|
+
@specs.size
|
534
|
+
end
|
535
|
+
|
536
|
+
def last_spec
|
537
|
+
@i == spec_count - 1
|
538
|
+
end
|
539
|
+
|
540
|
+
def match(str)
|
541
|
+
accum = []
|
542
|
+
@string_left = str
|
543
|
+
@matched_count = 0
|
544
|
+
|
545
|
+
@specs.each_with_index do |spec,i|
|
546
|
+
@i=i
|
547
|
+
@last_spec_tried = spec
|
548
|
+
@last_match_tried = spec.match(@string_left)
|
549
|
+
break unless @last_match_tried
|
550
|
+
@matched_count += 1
|
551
|
+
|
552
|
+
accum << spec.conversion
|
553
|
+
|
554
|
+
@string_left = @last_match_tried.post_match
|
555
|
+
break if @string_left.empty?
|
556
|
+
end
|
557
|
+
return accum.compact
|
558
|
+
end
|
559
|
+
end
|
560
|
+
# :startdoc:
|
561
|
+
end
|
562
|
+
|
563
|
+
class IO
|
564
|
+
|
565
|
+
#:stopdoc:
|
566
|
+
# The trick here is doing a match where you grab one *line*
|
567
|
+
# of input at a time. The linebreak may or may not occur
|
568
|
+
# at the boundary where the string matches a format specifier.
|
569
|
+
# And if it does, some rule about whitespace may or may not
|
570
|
+
# be in effect...
|
571
|
+
#
|
572
|
+
# That's why this is much more elaborate than the string
|
573
|
+
# version.
|
574
|
+
#
|
575
|
+
# For each line:
|
576
|
+
#
|
577
|
+
# Match succeeds (non-emptily)
|
578
|
+
# and the last attempted spec/string sub-match succeeded:
|
579
|
+
#
|
580
|
+
# could the last spec keep matching?
|
581
|
+
# yes: save interim results and continue (next line)
|
582
|
+
#
|
583
|
+
# The last attempted spec/string did not match:
|
584
|
+
#
|
585
|
+
# are we on the next-to-last spec in the string?
|
586
|
+
# yes:
|
587
|
+
# is fmt_string.string_left all spaces?
|
588
|
+
# yes: does current spec care about input space?
|
589
|
+
# yes: fatal failure
|
590
|
+
# no: save interim results and continue
|
591
|
+
# no: continue [this state could be analyzed further]
|
592
|
+
#
|
593
|
+
#:startdoc:
|
594
|
+
|
595
|
+
# Scans the current string until the match is exhausted,
|
596
|
+
# yielding each match as it is encountered in the string.
|
597
|
+
# A block is not necessary though, as the results will simply
|
598
|
+
# be aggregated into the final array.
|
599
|
+
#
|
600
|
+
# "123 456".block_scanf("%d")
|
601
|
+
# # => [123, 456]
|
602
|
+
#
|
603
|
+
# If a block is given, the value from that is returned from
|
604
|
+
# the yield is added to an output array.
|
605
|
+
#
|
606
|
+
# "123 456".block_scanf("%d") do |digit,| # the ',' unpacks the Array
|
607
|
+
# digit + 100
|
608
|
+
# end
|
609
|
+
# # => [223, 556]
|
610
|
+
#
|
611
|
+
# See Scanf for details on creating a format string.
|
612
|
+
#
|
613
|
+
# You will need to require 'scanf' to use IO#scanf.
|
614
|
+
def scanf(str,&b) #:yield: current_match
|
615
|
+
return block_scanf(str,&b) if b
|
616
|
+
return [] unless str.size > 0
|
617
|
+
|
618
|
+
start_position = pos rescue 0
|
619
|
+
matched_so_far = 0
|
620
|
+
source_buffer = ""
|
621
|
+
result_buffer = []
|
622
|
+
final_result = []
|
623
|
+
|
624
|
+
fstr = Scanf::FormatString.new(str)
|
625
|
+
|
626
|
+
loop do
|
627
|
+
if eof || (tty? &&! fstr.match(source_buffer))
|
628
|
+
final_result.concat(result_buffer)
|
629
|
+
break
|
630
|
+
end
|
631
|
+
|
632
|
+
source_buffer << gets
|
633
|
+
|
634
|
+
current_match = fstr.match(source_buffer)
|
635
|
+
|
636
|
+
spec = fstr.last_spec_tried
|
637
|
+
|
638
|
+
if spec.matched
|
639
|
+
if spec.mid_match?
|
640
|
+
result_buffer.replace(current_match)
|
641
|
+
next
|
642
|
+
end
|
643
|
+
|
644
|
+
elsif (fstr.matched_count == fstr.spec_count - 1)
|
645
|
+
if /\A\s*\z/.match(fstr.string_left)
|
646
|
+
break if spec.count_space?
|
647
|
+
result_buffer.replace(current_match)
|
648
|
+
next
|
649
|
+
end
|
650
|
+
end
|
651
|
+
|
652
|
+
final_result.concat(current_match)
|
653
|
+
|
654
|
+
matched_so_far += source_buffer.size
|
655
|
+
source_buffer.replace(fstr.string_left)
|
656
|
+
matched_so_far -= source_buffer.size
|
657
|
+
break if fstr.last_spec
|
658
|
+
fstr.prune
|
659
|
+
end
|
660
|
+
|
661
|
+
begin
|
662
|
+
seek(start_position + matched_so_far, IO::SEEK_SET)
|
663
|
+
rescue Errno::ESPIPE
|
664
|
+
end
|
665
|
+
|
666
|
+
soak_up_spaces if fstr.last_spec && fstr.space
|
667
|
+
|
668
|
+
return final_result
|
669
|
+
end
|
670
|
+
|
671
|
+
private
|
672
|
+
|
673
|
+
def soak_up_spaces
|
674
|
+
c = getc
|
675
|
+
ungetc(c) if c
|
676
|
+
until eof ||! c || /\S/.match(c.chr)
|
677
|
+
c = getc
|
678
|
+
end
|
679
|
+
ungetc(c) if (c && /\S/.match(c.chr))
|
680
|
+
end
|
681
|
+
|
682
|
+
def block_scanf(str)
|
683
|
+
final = []
|
684
|
+
# Sub-ideal, since another FS gets created in scanf.
|
685
|
+
# But used here to determine the number of specifiers.
|
686
|
+
fstr = Scanf::FormatString.new(str)
|
687
|
+
last_spec = fstr.last_spec
|
688
|
+
begin
|
689
|
+
current = scanf(str)
|
690
|
+
break if current.empty?
|
691
|
+
final.push(yield(current))
|
692
|
+
end until eof || fstr.last_spec_tried == last_spec
|
693
|
+
return final
|
694
|
+
end
|
695
|
+
end
|
696
|
+
|
697
|
+
class String
|
698
|
+
|
699
|
+
# :section: scanf
|
700
|
+
#
|
701
|
+
# You will need to require 'scanf' to use these methods
|
702
|
+
|
703
|
+
# Scans the current string. If a block is given, it
|
704
|
+
# functions exactly like block_scanf.
|
705
|
+
#
|
706
|
+
# arr = "123 456".scanf("%d%d")
|
707
|
+
# # => [123, 456]
|
708
|
+
#
|
709
|
+
# require 'pp'
|
710
|
+
#
|
711
|
+
# "this 123 read that 456 other".scanf("%s%d%s") {|m| pp m}
|
712
|
+
#
|
713
|
+
# # ["this", 123, "read"]
|
714
|
+
# # ["that", 456, "other"]
|
715
|
+
# # => [["this", 123, "read"], ["that", 456, "other"]]
|
716
|
+
#
|
717
|
+
# See Scanf for details on creating a format string.
|
718
|
+
#
|
719
|
+
# You will need to require 'scanf' to use String#scanf
|
720
|
+
def scanf(fstr,&b) #:yield: current_match
|
721
|
+
if b
|
722
|
+
block_scanf(fstr,&b)
|
723
|
+
else
|
724
|
+
fs =
|
725
|
+
if fstr.is_a? Scanf::FormatString
|
726
|
+
fstr
|
727
|
+
else
|
728
|
+
Scanf::FormatString.new(fstr)
|
729
|
+
end
|
730
|
+
fs.match(self)
|
731
|
+
end
|
732
|
+
end
|
733
|
+
|
734
|
+
# Scans the current string until the match is exhausted
|
735
|
+
# yielding each match as it is encountered in the string.
|
736
|
+
# A block is not necessary as the results will simply
|
737
|
+
# be aggregated into the final array.
|
738
|
+
#
|
739
|
+
# "123 456".block_scanf("%d")
|
740
|
+
# # => [123, 456]
|
741
|
+
#
|
742
|
+
# If a block is given, the value from that is returned from
|
743
|
+
# the yield is added to an output array.
|
744
|
+
#
|
745
|
+
# "123 456".block_scanf("%d) do |digit,| # the ',' unpacks the Array
|
746
|
+
# digit + 100
|
747
|
+
# end
|
748
|
+
# # => [223, 556]
|
749
|
+
#
|
750
|
+
# See Scanf for details on creating a format string.
|
751
|
+
#
|
752
|
+
# You will need to require 'scanf' to use String#block_scanf
|
753
|
+
def block_scanf(fstr) #:yield: current_match
|
754
|
+
fs = Scanf::FormatString.new(fstr)
|
755
|
+
str = self.dup
|
756
|
+
final = []
|
757
|
+
begin
|
758
|
+
current = str.scanf(fs)
|
759
|
+
final.push(yield(current)) unless current.empty?
|
760
|
+
str = fs.string_left
|
761
|
+
end until current.empty? || str.empty?
|
762
|
+
return final
|
763
|
+
end
|
764
|
+
end
|
765
|
+
|
766
|
+
module Kernel
|
767
|
+
private
|
768
|
+
# Scans STDIN for data matching +format+. See IO#scanf for details.
|
769
|
+
#
|
770
|
+
# See Scanf for details on creating a format string.
|
771
|
+
#
|
772
|
+
# You will need to require 'scanf' to use Kernel#scanf.
|
773
|
+
def scanf(format, &b) #:doc:
|
774
|
+
STDIN.scanf(format ,&b)
|
775
|
+
end
|
776
|
+
end
|
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scanf
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Alan Black
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-05-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.14'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.14'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: test-unit
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: scanf is an implementation of the C function scanf(3).
|
56
|
+
email:
|
57
|
+
- dblack@superlink.net
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- lib/scanf.rb
|
63
|
+
homepage: https://github.com/ruby/scanf
|
64
|
+
licenses:
|
65
|
+
- BSD-2-Clause
|
66
|
+
metadata: {}
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 2.5.0dev
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: '0'
|
81
|
+
requirements: []
|
82
|
+
rubyforge_project:
|
83
|
+
rubygems_version: 2.6.12
|
84
|
+
signing_key:
|
85
|
+
specification_version: 4
|
86
|
+
summary: scanf is an implementation of the C function scanf(3).
|
87
|
+
test_files: []
|