ruby_sscanf 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +53 -23
- data/bench/bench.rb +18 -0
- data/lib/ruby_sscanf.rb +93 -36
- data/lib/ruby_sscanf/version.rb +3 -1
- data/tests/scan_tests.rb +24 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec28a950c28b1a0af3450694cb4c79bcee8847c1
|
4
|
+
data.tar.gz: 1f37bacdef6e1e8b04b34614f50e925c771e49d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 826961429f3d5ada3a4e2b4a21313d4187abe41e2c2c0313c8684b3755e71a615df2550381dd996df38acb3887db909ce1dd7bfca125d8c96c7dd55d14f73463
|
7
|
+
data.tar.gz: 3193a380491b336751702198518cc956273234101b04acce068307c82900e410dfdcb126931bb951caa681fc4d60097b9383f881e9ecfbe7e41c0031152ecb6b
|
data/README.md
CHANGED
@@ -32,23 +32,30 @@ format string is a description of that format. The output of the sscanf method
|
|
32
32
|
is an array of data extracted from the input string.
|
33
33
|
|
34
34
|
The format string consists of literal string components and format specifiers.
|
35
|
+
During execution of the sscanf method, each element in the format string is
|
36
|
+
used to find the corresponding data in the input string, optionally placing
|
37
|
+
the extracted data in the aforementioned output array. If a format element
|
38
|
+
cannot be matched to input data, processing stops at that point. Otherwise
|
39
|
+
processing continues until all the format elements are done.
|
35
40
|
|
36
41
|
Literal string components match themselves in the input string. If the literal
|
37
|
-
has a trailing space, then this matches zero or more spaces.
|
38
|
-
sequence '%%' matches one '%'.
|
42
|
+
has a trailing space, then this matches zero or more spaces.
|
43
|
+
The special sequence '%%' matches one '%' in the input string.
|
39
44
|
|
40
45
|
The layout of a format specifier is:
|
41
46
|
|
42
47
|
%[skip_flag][width]format
|
43
48
|
|
44
49
|
* The % sign is the lead-in character.
|
45
|
-
* The optional skip flag, the
|
46
|
-
* The width field is an integer
|
50
|
+
* The optional skip flag, the *, causes any data extracted to be ignored.
|
51
|
+
* The width field is an integer that determines the amount of text to be
|
47
52
|
parsed.
|
48
53
|
* The format field determines the type of data being parsed.
|
49
54
|
|
50
55
|
The supported format field values are:
|
51
56
|
<br>
|
57
|
+
* a,e,f,g,A,E,F,G - Scan for an (optionally signed) floating point or
|
58
|
+
scientific notation number.
|
52
59
|
* b - Scan for an (optionally signed) binary number with an optional
|
53
60
|
leading '0b' or '0B'.
|
54
61
|
* c - Grab the next character. If a positive width is specified, grab width
|
@@ -56,7 +63,6 @@ characters. For a negative width, grab characters to the position from the
|
|
56
63
|
end of the input. For example a width of -1 will grab all of the remaining
|
57
64
|
input data.
|
58
65
|
* d - Scan for an (optionally signed) decimal number.
|
59
|
-
* f - Scan for an (optionally signed) floating point number.
|
60
66
|
* i - Scan for an (optionally signed) integer. If the number begins with '0x'
|
61
67
|
or '0X', process hexadecimal; with '0b' or '0B', process binary, if '0', '0o',
|
62
68
|
or '0O', process octal, else process decimal.
|
@@ -70,7 +76,7 @@ or "...".
|
|
70
76
|
[+-]?decimal/decimal[r]?
|
71
77
|
* s - Scan for a space terminated string.
|
72
78
|
* u - Scan for a decimal number.
|
73
|
-
* x - Scan for an (optionally signed) hexadecimal number with an optional
|
79
|
+
* x,X - Scan for an (optionally signed) hexadecimal number with an optional
|
74
80
|
leading '0x' or '0X'.
|
75
81
|
* [chars] - Scan for a contiguous string of characters in the set [chars].
|
76
82
|
* [^chars] - Scan for a contiguous string of characters not in the set [^chars]
|
@@ -78,56 +84,80 @@ leading '0x' or '0X'.
|
|
78
84
|
## Examples
|
79
85
|
Here are a few exmaples of the sscanf method in action.
|
80
86
|
|
87
|
+
```ruby
|
81
88
|
"12 34 -56".sscanf "%d %2d %4d"
|
82
|
-
|
89
|
+
returns [12, 34, -56]
|
83
90
|
|
84
91
|
"255 0b11111111 0377 0xFF 0 ".sscanf "%i %i %i %i %i"
|
85
|
-
|
92
|
+
returns [255, 255, 255, 255, 0]
|
86
93
|
|
87
94
|
"7 10 377".sscanf "%o %o %o"
|
88
|
-
|
95
|
+
returns [7, 8, 255]
|
89
96
|
|
90
97
|
"10 10011 11110000".sscanf "%b %b %b"
|
91
|
-
|
98
|
+
returns [2, 19, 240]
|
92
99
|
|
93
100
|
"0 F FF FFF FFFF".sscanf "%x %x %x %x %x"
|
94
|
-
|
101
|
+
returns [0, 15, 255, 4095, 65535]
|
95
102
|
|
96
103
|
"Hello Silly World".sscanf "%s %*s %s"
|
97
|
-
|
104
|
+
returns ["Hello", "World"]
|
98
105
|
|
99
106
|
"Hello Silly World".sscanf "%5c %*5c %5c"
|
100
|
-
|
107
|
+
returns ["Hello", "World"]
|
101
108
|
|
102
109
|
"42 The secret is X".sscanf "%i %-1c"
|
103
|
-
|
110
|
+
returns [42, "The secret is X"]
|
104
111
|
|
105
112
|
"42 The secret is X".sscanf "%i %-2c%c"
|
106
|
-
|
113
|
+
returns [42, "The secret is ", "X"]
|
107
114
|
|
108
115
|
"42 The secret is X".sscanf "%i %*-2c%c"
|
109
|
-
|
116
|
+
returns [42, "X"]
|
110
117
|
|
111
118
|
"9.99 1.234e56 -1e100".sscanf "%f %f %f"
|
112
|
-
|
119
|
+
returns [9.99, 1.234e56, -1e100]
|
113
120
|
|
114
121
|
"85% 75%".sscanf "%f%% %f%%"
|
115
|
-
|
122
|
+
returns [85, 75]
|
116
123
|
|
117
124
|
"12 34 -56".sscanf "%u %u %u"
|
118
|
-
|
125
|
+
returns [12, 34]
|
119
126
|
|
120
127
|
"1/2 3/4r -5/6".sscanf "%r %r %r"
|
121
|
-
|
128
|
+
returns ['1/2'.to_r, '3/4'.to_r, '-5/6'.to_r]
|
122
129
|
|
123
130
|
"1+2i 3+4j -5e10-6.2i".sscanf "%j %j %j"
|
124
|
-
|
131
|
+
returns [Complex('1+2i'), Complex('3+4j'), Complex('-5e10-6.2i')]
|
125
132
|
|
126
133
|
"'quote' 'silly' \"un quote\" 'a \\'' ".sscanf "%q %*q %q %q"
|
127
|
-
|
134
|
+
returns ["quote", "un quote", "a '"]
|
128
135
|
|
129
136
|
"a b c".sscanf "%[a] %[b] %[c]"
|
130
|
-
|
137
|
+
returns ["a", "b", "c"]
|
138
|
+
```
|
139
|
+
|
140
|
+
## Benchmarks
|
141
|
+
|
142
|
+
I ran a test just to make sure that ruby_sscanf was not terribly
|
143
|
+
under-performant when compared to the ruby standard library version. I was
|
144
|
+
please to see that in fact ruby_sscanf was faster. Here are the results:
|
145
|
+
|
146
|
+
Calculating -------------------------------------
|
147
|
+
Scan strings with ruby_sscanf
|
148
|
+
1.520k i/100ms
|
149
|
+
Scan strings with scanf
|
150
|
+
308.000 i/100ms
|
151
|
+
-------------------------------------------------
|
152
|
+
Scan strings with ruby_sscanf
|
153
|
+
15.844k (± 5.2%) i/s - 79.040k
|
154
|
+
Scan strings with scanf
|
155
|
+
3.127k (± 4.2%) i/s - 15.708k
|
156
|
+
|
157
|
+
Comparison:
|
158
|
+
Scan strings with ruby_sscanf: 15843.8 i/s
|
159
|
+
Scan strings with scanf: 3126.7 i/s - 5.07x slower
|
160
|
+
|
131
161
|
|
132
162
|
## Contributing
|
133
163
|
|
data/bench/bench.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "benchmark/ips"
|
2
|
+
require 'scanf'
|
3
|
+
require 'ruby_sscanf'
|
4
|
+
|
5
|
+
def use_scanf
|
6
|
+
'12 34 56 89 1.234 1.0e10'.scanf('%d %d %d %d %f %f')
|
7
|
+
end
|
8
|
+
|
9
|
+
def use_ruby_sscanf
|
10
|
+
'12 34 56 89 1.234 1.0e10'.sscanf('%d %d %d %d %f %f')
|
11
|
+
end
|
12
|
+
|
13
|
+
Benchmark.ips do |x|
|
14
|
+
x.report("Scan strings with ruby_sscanf") { use_ruby_sscanf }
|
15
|
+
x.report("Scan strings with scanf") { use_scanf }
|
16
|
+
x.compare!
|
17
|
+
end
|
18
|
+
|
data/lib/ruby_sscanf.rb
CHANGED
@@ -1,61 +1,118 @@
|
|
1
|
-
|
2
1
|
require 'format_engine'
|
3
2
|
require_relative 'ruby_sscanf/version'
|
4
3
|
|
4
|
+
#The String class is monkey patched to support sscanf.
|
5
5
|
class String
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
7
|
+
#A regular expression for decimal integers.
|
8
|
+
RSF_DECIMAL = /[+-]?\d+/
|
9
|
+
|
10
|
+
#A regular expression for unsigned decimal integers.
|
11
|
+
RSF_UNSIGNED = /[+]?\d+/
|
12
|
+
|
13
|
+
#A regular expression for hexadecimal integers.
|
14
|
+
RSF_HEX = /[+-]?(0[xX])?\h+/
|
15
|
+
RSF_HEX_PARSE = lambda {parse(RSF_HEX) ? dst << found.to_i(16) : :break}
|
16
|
+
RSF_HEX_SKIP = lambda {parse(RSF_HEX) || :break}
|
17
|
+
|
18
|
+
#A regular expression for octal integers.
|
19
|
+
RSF_OCTAL = /[+-]?(0[oO])?[0-7]+/
|
20
|
+
|
21
|
+
#A regular expression for binary integers.
|
22
|
+
RSF_BINARY = /[+-]?(0[bB])?[01]+/
|
23
|
+
|
24
|
+
#A regular expression for flexible base integers.
|
25
|
+
RSF_INTEGER = /[+-]?((0[xX]\h+)|(0[bB][01]+)|(0[oO]?[0-7]*)|([1-9]\d*))/
|
26
|
+
|
27
|
+
#A regular expression for floating point and scientific notation numbers.
|
28
|
+
RSF_FLOAT = /[+-]?\d+(\.\d+)?([eE][+-]?\d+)?/
|
29
|
+
RSF_FLOAT_PARSE = lambda {parse(RSF_FLOAT) ? dst << found.to_f : :break}
|
30
|
+
RSF_FLOAT_SKIP = lambda {parse(RSF_FLOAT) || :break}
|
31
|
+
|
32
|
+
#A regular expression for rational numbers.
|
33
|
+
RSF_RATIONAL = /[+-]?\d+\/\d+(r)?/
|
34
|
+
|
35
|
+
#A regular expression for complex numbers.
|
36
|
+
RSF_COMPLEX = %r{(?<num> \d+(\.\d+)?([eE][+-]?\d+)?){0}
|
37
|
+
[+-]?\g<num>[+-]\g<num>[ij]
|
38
|
+
}x
|
39
|
+
|
40
|
+
#A regular expression for a string.
|
41
|
+
RSF_STRING = /\S+/
|
42
|
+
|
43
|
+
#A regular expression for quoted strings.
|
44
|
+
RSF_QUOTED = /("([^\\"]|\\.)*")|('([^\\']|\\.)*')/
|
45
|
+
|
46
|
+
#Get the parsing engine. This is cached on a per-thread basis. That is to
|
47
|
+
#say, each thread gets its own \FormatEngine::Engine instance.
|
20
48
|
def self.get_engine
|
21
49
|
Thread.current[:ruby_sscanf_engine] ||= FormatEngine::Engine.new(
|
22
|
-
"%
|
23
|
-
"%*
|
50
|
+
"%a" => RSF_FLOAT_PARSE,
|
51
|
+
"%*a" => RSF_FLOAT_SKIP,
|
52
|
+
|
53
|
+
"%A" => RSF_FLOAT_PARSE,
|
54
|
+
"%*A" => RSF_FLOAT_SKIP,
|
55
|
+
|
56
|
+
"%b" => lambda {parse(RSF_BINARY) ? dst << found.to_i(2) : :break},
|
57
|
+
"%*b" => lambda {parse(RSF_BINARY) || :break},
|
24
58
|
|
25
59
|
"%c" => lambda {dst << grab},
|
26
60
|
"%*c" => lambda {grab},
|
27
61
|
|
28
|
-
"%d" => lambda {parse(
|
29
|
-
"%*d" => lambda {parse(
|
62
|
+
"%d" => lambda {parse(RSF_DECIMAL) ? dst << found.to_i : :break},
|
63
|
+
"%*d" => lambda {parse(RSF_DECIMAL) || :break},
|
64
|
+
|
65
|
+
"%e" => RSF_FLOAT_PARSE,
|
66
|
+
"%*e" => RSF_FLOAT_SKIP,
|
67
|
+
|
68
|
+
"%E" => RSF_FLOAT_PARSE,
|
69
|
+
"%*E" => RSF_FLOAT_SKIP,
|
30
70
|
|
31
|
-
"%f" =>
|
32
|
-
"%*f" =>
|
71
|
+
"%f" => RSF_FLOAT_PARSE,
|
72
|
+
"%*f" => RSF_FLOAT_SKIP,
|
33
73
|
|
34
|
-
"%
|
35
|
-
"%*
|
74
|
+
"%F" => RSF_FLOAT_PARSE,
|
75
|
+
"%*F" => RSF_FLOAT_SKIP,
|
36
76
|
|
37
|
-
"%
|
38
|
-
"%*
|
77
|
+
"%g" => RSF_FLOAT_PARSE,
|
78
|
+
"%*g" => RSF_FLOAT_SKIP,
|
39
79
|
|
40
|
-
"%
|
41
|
-
"%*
|
80
|
+
"%G" => RSF_FLOAT_PARSE,
|
81
|
+
"%*G" => RSF_FLOAT_SKIP,
|
82
|
+
|
83
|
+
"%i" => lambda {parse(RSF_INTEGER) ? dst << found.to_i(0) : :break},
|
84
|
+
"%*i" => lambda {parse(RSF_INTEGER) || :break},
|
85
|
+
|
86
|
+
"%j" => lambda {parse(RSF_COMPLEX) ? dst << Complex(found) : :break},
|
87
|
+
"%*j" => lambda {parse(RSF_COMPLEX) || :break},
|
88
|
+
|
89
|
+
"%o" => lambda {parse(RSF_OCTAL) ? dst << found.to_i(8) : :break},
|
90
|
+
"%*o" => lambda {parse(RSF_OCTAL) || :break},
|
42
91
|
|
43
92
|
"%q" => lambda do
|
44
|
-
parse(
|
93
|
+
if parse(RSF_QUOTED)
|
94
|
+
dst << found[1..-2].gsub(/\\./) {|seq| seq[-1]}
|
95
|
+
else
|
96
|
+
:break
|
97
|
+
end
|
45
98
|
end,
|
46
|
-
"%*q" => lambda {parse(QUOTED) || :break},
|
47
99
|
|
48
|
-
"
|
49
|
-
|
100
|
+
"%*q" => lambda {parse(RSF_QUOTED) || :break},
|
101
|
+
|
102
|
+
"%r" => lambda {parse(RSF_RATIONAL) ? dst << found.to_r : :break},
|
103
|
+
"%*r" => lambda {parse(RSF_RATIONAL) || :break},
|
104
|
+
|
105
|
+
"%s" => lambda {parse(RSF_STRING) ? dst << found : :break},
|
106
|
+
"%*s" => lambda {parse(RSF_STRING) || :break},
|
50
107
|
|
51
|
-
"%
|
52
|
-
"%*
|
108
|
+
"%u" => lambda {parse(RSF_UNSIGNED) ? dst << found.to_i : :break},
|
109
|
+
"%*u" => lambda {parse(RSF_UNSIGNED) || :break},
|
53
110
|
|
54
|
-
"%
|
55
|
-
"%*
|
111
|
+
"%x" => RSF_HEX_PARSE,
|
112
|
+
"%*x" => RSF_HEX_SKIP,
|
56
113
|
|
57
|
-
"%
|
58
|
-
"%*
|
114
|
+
"%X" => RSF_HEX_PARSE,
|
115
|
+
"%*X" => RSF_HEX_SKIP,
|
59
116
|
|
60
117
|
"%[" => lambda {parse(fmt.regex) ? dst << found : :break},
|
61
118
|
"%*[" => lambda {parse(fmt.regex) || :break})
|
data/lib/ruby_sscanf/version.rb
CHANGED
data/tests/scan_tests.rb
CHANGED
@@ -26,6 +26,9 @@ class ScanTester < Minitest::Test
|
|
26
26
|
result = "0 F FF FFF FFFF".sscanf "%x %x %x %x %x"
|
27
27
|
assert_equal([0, 15, 255, 4095, 65535] , result)
|
28
28
|
|
29
|
+
result = "0 F FF FFF FFFF".sscanf "%X %*x %*X %x %X"
|
30
|
+
assert_equal([0, 4095, 65535] , result)
|
31
|
+
|
29
32
|
result = "Hello Silly World".sscanf "%s %*s %s"
|
30
33
|
assert_equal(["Hello", "World"] , result)
|
31
34
|
|
@@ -41,9 +44,30 @@ class ScanTester < Minitest::Test
|
|
41
44
|
result = "42 The secret is X".sscanf "%i %*-2c%c"
|
42
45
|
assert_equal([42, "X"] , result)
|
43
46
|
|
47
|
+
result = "9.99 1.234e56 -1e100".sscanf "%a %e %g"
|
48
|
+
assert_equal([9.99, 1.234e56, -1e100] , result)
|
49
|
+
|
50
|
+
result = "9.99 1.234e56 -1e100".sscanf "%*a %e %g"
|
51
|
+
assert_equal([1.234e56, -1e100] , result)
|
52
|
+
|
53
|
+
result = "9.99 1.234e56 -1e100".sscanf "%a %*e %g"
|
54
|
+
assert_equal([9.99, -1e100] , result)
|
55
|
+
|
56
|
+
result = "9.99 1.234e56 -1e100".sscanf "%a %e %*g"
|
57
|
+
assert_equal([9.99, 1.234e56] , result)
|
58
|
+
|
59
|
+
result = "9.99 1.234e56 -1e100".sscanf "%A %E %G"
|
60
|
+
assert_equal([9.99, 1.234e56, -1e100] , result)
|
61
|
+
|
62
|
+
result = "9.99 1.234e56 -1e100".sscanf "%A %*E %G"
|
63
|
+
assert_equal([9.99, -1e100] , result)
|
64
|
+
|
44
65
|
result = "9.99 1.234e56 -1e100".sscanf "%f %f %f"
|
45
66
|
assert_equal([9.99, 1.234e56, -1e100] , result)
|
46
67
|
|
68
|
+
result = "9.99 1.234e56 -1e100".sscanf "%F %*F %F"
|
69
|
+
assert_equal([9.99, -1e100] , result)
|
70
|
+
|
47
71
|
result = "85% 75%".sscanf "%f%% %f%%"
|
48
72
|
assert_equal([85, 75] , result)
|
49
73
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_sscanf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Camilleri
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: format_engine
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- LICENSE.txt
|
107
107
|
- README.md
|
108
108
|
- Rakefile
|
109
|
+
- bench/bench.rb
|
109
110
|
- lib/ruby_sscanf.rb
|
110
111
|
- lib/ruby_sscanf/version.rb
|
111
112
|
- ruby_sscanf.gemspec
|
@@ -135,3 +136,4 @@ signing_key:
|
|
135
136
|
specification_version: 4
|
136
137
|
summary: A string parser.
|
137
138
|
test_files: []
|
139
|
+
has_rdoc:
|