ruby_sscanf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +53 -23
- data/bench/bench.rb +18 -0
- data/lib/ruby_sscanf.rb +93 -36
- data/lib/ruby_sscanf/version.rb +3 -1
- data/tests/scan_tests.rb +24 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec28a950c28b1a0af3450694cb4c79bcee8847c1
|
4
|
+
data.tar.gz: 1f37bacdef6e1e8b04b34614f50e925c771e49d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 826961429f3d5ada3a4e2b4a21313d4187abe41e2c2c0313c8684b3755e71a615df2550381dd996df38acb3887db909ce1dd7bfca125d8c96c7dd55d14f73463
|
7
|
+
data.tar.gz: 3193a380491b336751702198518cc956273234101b04acce068307c82900e410dfdcb126931bb951caa681fc4d60097b9383f881e9ecfbe7e41c0031152ecb6b
|
data/README.md
CHANGED
@@ -32,23 +32,30 @@ format string is a description of that format. The output of the sscanf method
|
|
32
32
|
is an array of data extracted from the input string.
|
33
33
|
|
34
34
|
The format string consists of literal string components and format specifiers.
|
35
|
+
During execution of the sscanf method, each element in the format string is
|
36
|
+
used to find the corresponding data in the input string, optionally placing
|
37
|
+
the extracted data in the aforementioned output array. If a format element
|
38
|
+
cannot be matched to input data, processing stops at that point. Otherwise
|
39
|
+
processing continues until all the format elements are done.
|
35
40
|
|
36
41
|
Literal string components match themselves in the input string. If the literal
|
37
|
-
has a trailing space, then this matches zero or more spaces.
|
38
|
-
sequence '%%' matches one '%'.
|
42
|
+
has a trailing space, then this matches zero or more spaces.
|
43
|
+
The special sequence '%%' matches one '%' in the input string.
|
39
44
|
|
40
45
|
The layout of a format specifier is:
|
41
46
|
|
42
47
|
%[skip_flag][width]format
|
43
48
|
|
44
49
|
* The % sign is the lead-in character.
|
45
|
-
* The optional skip flag, the
|
46
|
-
* The width field is an integer
|
50
|
+
* The optional skip flag, the *, causes any data extracted to be ignored.
|
51
|
+
* The width field is an integer that determines the amount of text to be
|
47
52
|
parsed.
|
48
53
|
* The format field determines the type of data being parsed.
|
49
54
|
|
50
55
|
The supported format field values are:
|
51
56
|
<br>
|
57
|
+
* a,e,f,g,A,E,F,G - Scan for an (optionally signed) floating point or
|
58
|
+
scientific notation number.
|
52
59
|
* b - Scan for an (optionally signed) binary number with an optional
|
53
60
|
leading '0b' or '0B'.
|
54
61
|
* c - Grab the next character. If a positive width is specified, grab width
|
@@ -56,7 +63,6 @@ characters. For a negative width, grab characters to the position from the
|
|
56
63
|
end of the input. For example a width of -1 will grab all of the remaining
|
57
64
|
input data.
|
58
65
|
* d - Scan for an (optionally signed) decimal number.
|
59
|
-
* f - Scan for an (optionally signed) floating point number.
|
60
66
|
* i - Scan for an (optionally signed) integer. If the number begins with '0x'
|
61
67
|
or '0X', process hexadecimal; with '0b' or '0B', process binary, if '0', '0o',
|
62
68
|
or '0O', process octal, else process decimal.
|
@@ -70,7 +76,7 @@ or "...".
|
|
70
76
|
[+-]?decimal/decimal[r]?
|
71
77
|
* s - Scan for a space terminated string.
|
72
78
|
* u - Scan for a decimal number.
|
73
|
-
* x - Scan for an (optionally signed) hexadecimal number with an optional
|
79
|
+
* x,X - Scan for an (optionally signed) hexadecimal number with an optional
|
74
80
|
leading '0x' or '0X'.
|
75
81
|
* [chars] - Scan for a contiguous string of characters in the set [chars].
|
76
82
|
* [^chars] - Scan for a contiguous string of characters not in the set [^chars]
|
@@ -78,56 +84,80 @@ leading '0x' or '0X'.
|
|
78
84
|
## Examples
|
79
85
|
Here are a few exmaples of the sscanf method in action.
|
80
86
|
|
87
|
+
```ruby
|
81
88
|
"12 34 -56".sscanf "%d %2d %4d"
|
82
|
-
|
89
|
+
returns [12, 34, -56]
|
83
90
|
|
84
91
|
"255 0b11111111 0377 0xFF 0 ".sscanf "%i %i %i %i %i"
|
85
|
-
|
92
|
+
returns [255, 255, 255, 255, 0]
|
86
93
|
|
87
94
|
"7 10 377".sscanf "%o %o %o"
|
88
|
-
|
95
|
+
returns [7, 8, 255]
|
89
96
|
|
90
97
|
"10 10011 11110000".sscanf "%b %b %b"
|
91
|
-
|
98
|
+
returns [2, 19, 240]
|
92
99
|
|
93
100
|
"0 F FF FFF FFFF".sscanf "%x %x %x %x %x"
|
94
|
-
|
101
|
+
returns [0, 15, 255, 4095, 65535]
|
95
102
|
|
96
103
|
"Hello Silly World".sscanf "%s %*s %s"
|
97
|
-
|
104
|
+
returns ["Hello", "World"]
|
98
105
|
|
99
106
|
"Hello Silly World".sscanf "%5c %*5c %5c"
|
100
|
-
|
107
|
+
returns ["Hello", "World"]
|
101
108
|
|
102
109
|
"42 The secret is X".sscanf "%i %-1c"
|
103
|
-
|
110
|
+
returns [42, "The secret is X"]
|
104
111
|
|
105
112
|
"42 The secret is X".sscanf "%i %-2c%c"
|
106
|
-
|
113
|
+
returns [42, "The secret is ", "X"]
|
107
114
|
|
108
115
|
"42 The secret is X".sscanf "%i %*-2c%c"
|
109
|
-
|
116
|
+
returns [42, "X"]
|
110
117
|
|
111
118
|
"9.99 1.234e56 -1e100".sscanf "%f %f %f"
|
112
|
-
|
119
|
+
returns [9.99, 1.234e56, -1e100]
|
113
120
|
|
114
121
|
"85% 75%".sscanf "%f%% %f%%"
|
115
|
-
|
122
|
+
returns [85, 75]
|
116
123
|
|
117
124
|
"12 34 -56".sscanf "%u %u %u"
|
118
|
-
|
125
|
+
returns [12, 34]
|
119
126
|
|
120
127
|
"1/2 3/4r -5/6".sscanf "%r %r %r"
|
121
|
-
|
128
|
+
returns ['1/2'.to_r, '3/4'.to_r, '-5/6'.to_r]
|
122
129
|
|
123
130
|
"1+2i 3+4j -5e10-6.2i".sscanf "%j %j %j"
|
124
|
-
|
131
|
+
returns [Complex('1+2i'), Complex('3+4j'), Complex('-5e10-6.2i')]
|
125
132
|
|
126
133
|
"'quote' 'silly' \"un quote\" 'a \\'' ".sscanf "%q %*q %q %q"
|
127
|
-
|
134
|
+
returns ["quote", "un quote", "a '"]
|
128
135
|
|
129
136
|
"a b c".sscanf "%[a] %[b] %[c]"
|
130
|
-
|
137
|
+
returns ["a", "b", "c"]
|
138
|
+
```
|
139
|
+
|
140
|
+
## Benchmarks
|
141
|
+
|
142
|
+
I ran a test just to make sure that ruby_sscanf was not terribly
|
143
|
+
under-performant when compared to the ruby standard library version. I was
|
144
|
+
please to see that in fact ruby_sscanf was faster. Here are the results:
|
145
|
+
|
146
|
+
Calculating -------------------------------------
|
147
|
+
Scan strings with ruby_sscanf
|
148
|
+
1.520k i/100ms
|
149
|
+
Scan strings with scanf
|
150
|
+
308.000 i/100ms
|
151
|
+
-------------------------------------------------
|
152
|
+
Scan strings with ruby_sscanf
|
153
|
+
15.844k (± 5.2%) i/s - 79.040k
|
154
|
+
Scan strings with scanf
|
155
|
+
3.127k (± 4.2%) i/s - 15.708k
|
156
|
+
|
157
|
+
Comparison:
|
158
|
+
Scan strings with ruby_sscanf: 15843.8 i/s
|
159
|
+
Scan strings with scanf: 3126.7 i/s - 5.07x slower
|
160
|
+
|
131
161
|
|
132
162
|
## Contributing
|
133
163
|
|
data/bench/bench.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "benchmark/ips"
|
2
|
+
require 'scanf'
|
3
|
+
require 'ruby_sscanf'
|
4
|
+
|
5
|
+
def use_scanf
|
6
|
+
'12 34 56 89 1.234 1.0e10'.scanf('%d %d %d %d %f %f')
|
7
|
+
end
|
8
|
+
|
9
|
+
def use_ruby_sscanf
|
10
|
+
'12 34 56 89 1.234 1.0e10'.sscanf('%d %d %d %d %f %f')
|
11
|
+
end
|
12
|
+
|
13
|
+
Benchmark.ips do |x|
|
14
|
+
x.report("Scan strings with ruby_sscanf") { use_ruby_sscanf }
|
15
|
+
x.report("Scan strings with scanf") { use_scanf }
|
16
|
+
x.compare!
|
17
|
+
end
|
18
|
+
|
data/lib/ruby_sscanf.rb
CHANGED
@@ -1,61 +1,118 @@
|
|
1
|
-
|
2
1
|
require 'format_engine'
|
3
2
|
require_relative 'ruby_sscanf/version'
|
4
3
|
|
4
|
+
#The String class is monkey patched to support sscanf.
|
5
5
|
class String
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
7
|
+
#A regular expression for decimal integers.
|
8
|
+
RSF_DECIMAL = /[+-]?\d+/
|
9
|
+
|
10
|
+
#A regular expression for unsigned decimal integers.
|
11
|
+
RSF_UNSIGNED = /[+]?\d+/
|
12
|
+
|
13
|
+
#A regular expression for hexadecimal integers.
|
14
|
+
RSF_HEX = /[+-]?(0[xX])?\h+/
|
15
|
+
RSF_HEX_PARSE = lambda {parse(RSF_HEX) ? dst << found.to_i(16) : :break}
|
16
|
+
RSF_HEX_SKIP = lambda {parse(RSF_HEX) || :break}
|
17
|
+
|
18
|
+
#A regular expression for octal integers.
|
19
|
+
RSF_OCTAL = /[+-]?(0[oO])?[0-7]+/
|
20
|
+
|
21
|
+
#A regular expression for binary integers.
|
22
|
+
RSF_BINARY = /[+-]?(0[bB])?[01]+/
|
23
|
+
|
24
|
+
#A regular expression for flexible base integers.
|
25
|
+
RSF_INTEGER = /[+-]?((0[xX]\h+)|(0[bB][01]+)|(0[oO]?[0-7]*)|([1-9]\d*))/
|
26
|
+
|
27
|
+
#A regular expression for floating point and scientific notation numbers.
|
28
|
+
RSF_FLOAT = /[+-]?\d+(\.\d+)?([eE][+-]?\d+)?/
|
29
|
+
RSF_FLOAT_PARSE = lambda {parse(RSF_FLOAT) ? dst << found.to_f : :break}
|
30
|
+
RSF_FLOAT_SKIP = lambda {parse(RSF_FLOAT) || :break}
|
31
|
+
|
32
|
+
#A regular expression for rational numbers.
|
33
|
+
RSF_RATIONAL = /[+-]?\d+\/\d+(r)?/
|
34
|
+
|
35
|
+
#A regular expression for complex numbers.
|
36
|
+
RSF_COMPLEX = %r{(?<num> \d+(\.\d+)?([eE][+-]?\d+)?){0}
|
37
|
+
[+-]?\g<num>[+-]\g<num>[ij]
|
38
|
+
}x
|
39
|
+
|
40
|
+
#A regular expression for a string.
|
41
|
+
RSF_STRING = /\S+/
|
42
|
+
|
43
|
+
#A regular expression for quoted strings.
|
44
|
+
RSF_QUOTED = /("([^\\"]|\\.)*")|('([^\\']|\\.)*')/
|
45
|
+
|
46
|
+
#Get the parsing engine. This is cached on a per-thread basis. That is to
|
47
|
+
#say, each thread gets its own \FormatEngine::Engine instance.
|
20
48
|
def self.get_engine
|
21
49
|
Thread.current[:ruby_sscanf_engine] ||= FormatEngine::Engine.new(
|
22
|
-
"%
|
23
|
-
"%*
|
50
|
+
"%a" => RSF_FLOAT_PARSE,
|
51
|
+
"%*a" => RSF_FLOAT_SKIP,
|
52
|
+
|
53
|
+
"%A" => RSF_FLOAT_PARSE,
|
54
|
+
"%*A" => RSF_FLOAT_SKIP,
|
55
|
+
|
56
|
+
"%b" => lambda {parse(RSF_BINARY) ? dst << found.to_i(2) : :break},
|
57
|
+
"%*b" => lambda {parse(RSF_BINARY) || :break},
|
24
58
|
|
25
59
|
"%c" => lambda {dst << grab},
|
26
60
|
"%*c" => lambda {grab},
|
27
61
|
|
28
|
-
"%d" => lambda {parse(
|
29
|
-
"%*d" => lambda {parse(
|
62
|
+
"%d" => lambda {parse(RSF_DECIMAL) ? dst << found.to_i : :break},
|
63
|
+
"%*d" => lambda {parse(RSF_DECIMAL) || :break},
|
64
|
+
|
65
|
+
"%e" => RSF_FLOAT_PARSE,
|
66
|
+
"%*e" => RSF_FLOAT_SKIP,
|
67
|
+
|
68
|
+
"%E" => RSF_FLOAT_PARSE,
|
69
|
+
"%*E" => RSF_FLOAT_SKIP,
|
30
70
|
|
31
|
-
"%f" =>
|
32
|
-
"%*f" =>
|
71
|
+
"%f" => RSF_FLOAT_PARSE,
|
72
|
+
"%*f" => RSF_FLOAT_SKIP,
|
33
73
|
|
34
|
-
"%
|
35
|
-
"%*
|
74
|
+
"%F" => RSF_FLOAT_PARSE,
|
75
|
+
"%*F" => RSF_FLOAT_SKIP,
|
36
76
|
|
37
|
-
"%
|
38
|
-
"%*
|
77
|
+
"%g" => RSF_FLOAT_PARSE,
|
78
|
+
"%*g" => RSF_FLOAT_SKIP,
|
39
79
|
|
40
|
-
"%
|
41
|
-
"%*
|
80
|
+
"%G" => RSF_FLOAT_PARSE,
|
81
|
+
"%*G" => RSF_FLOAT_SKIP,
|
82
|
+
|
83
|
+
"%i" => lambda {parse(RSF_INTEGER) ? dst << found.to_i(0) : :break},
|
84
|
+
"%*i" => lambda {parse(RSF_INTEGER) || :break},
|
85
|
+
|
86
|
+
"%j" => lambda {parse(RSF_COMPLEX) ? dst << Complex(found) : :break},
|
87
|
+
"%*j" => lambda {parse(RSF_COMPLEX) || :break},
|
88
|
+
|
89
|
+
"%o" => lambda {parse(RSF_OCTAL) ? dst << found.to_i(8) : :break},
|
90
|
+
"%*o" => lambda {parse(RSF_OCTAL) || :break},
|
42
91
|
|
43
92
|
"%q" => lambda do
|
44
|
-
parse(
|
93
|
+
if parse(RSF_QUOTED)
|
94
|
+
dst << found[1..-2].gsub(/\\./) {|seq| seq[-1]}
|
95
|
+
else
|
96
|
+
:break
|
97
|
+
end
|
45
98
|
end,
|
46
|
-
"%*q" => lambda {parse(QUOTED) || :break},
|
47
99
|
|
48
|
-
"
|
49
|
-
|
100
|
+
"%*q" => lambda {parse(RSF_QUOTED) || :break},
|
101
|
+
|
102
|
+
"%r" => lambda {parse(RSF_RATIONAL) ? dst << found.to_r : :break},
|
103
|
+
"%*r" => lambda {parse(RSF_RATIONAL) || :break},
|
104
|
+
|
105
|
+
"%s" => lambda {parse(RSF_STRING) ? dst << found : :break},
|
106
|
+
"%*s" => lambda {parse(RSF_STRING) || :break},
|
50
107
|
|
51
|
-
"%
|
52
|
-
"%*
|
108
|
+
"%u" => lambda {parse(RSF_UNSIGNED) ? dst << found.to_i : :break},
|
109
|
+
"%*u" => lambda {parse(RSF_UNSIGNED) || :break},
|
53
110
|
|
54
|
-
"%
|
55
|
-
"%*
|
111
|
+
"%x" => RSF_HEX_PARSE,
|
112
|
+
"%*x" => RSF_HEX_SKIP,
|
56
113
|
|
57
|
-
"%
|
58
|
-
"%*
|
114
|
+
"%X" => RSF_HEX_PARSE,
|
115
|
+
"%*X" => RSF_HEX_SKIP,
|
59
116
|
|
60
117
|
"%[" => lambda {parse(fmt.regex) ? dst << found : :break},
|
61
118
|
"%*[" => lambda {parse(fmt.regex) || :break})
|
data/lib/ruby_sscanf/version.rb
CHANGED
data/tests/scan_tests.rb
CHANGED
@@ -26,6 +26,9 @@ class ScanTester < Minitest::Test
|
|
26
26
|
result = "0 F FF FFF FFFF".sscanf "%x %x %x %x %x"
|
27
27
|
assert_equal([0, 15, 255, 4095, 65535] , result)
|
28
28
|
|
29
|
+
result = "0 F FF FFF FFFF".sscanf "%X %*x %*X %x %X"
|
30
|
+
assert_equal([0, 4095, 65535] , result)
|
31
|
+
|
29
32
|
result = "Hello Silly World".sscanf "%s %*s %s"
|
30
33
|
assert_equal(["Hello", "World"] , result)
|
31
34
|
|
@@ -41,9 +44,30 @@ class ScanTester < Minitest::Test
|
|
41
44
|
result = "42 The secret is X".sscanf "%i %*-2c%c"
|
42
45
|
assert_equal([42, "X"] , result)
|
43
46
|
|
47
|
+
result = "9.99 1.234e56 -1e100".sscanf "%a %e %g"
|
48
|
+
assert_equal([9.99, 1.234e56, -1e100] , result)
|
49
|
+
|
50
|
+
result = "9.99 1.234e56 -1e100".sscanf "%*a %e %g"
|
51
|
+
assert_equal([1.234e56, -1e100] , result)
|
52
|
+
|
53
|
+
result = "9.99 1.234e56 -1e100".sscanf "%a %*e %g"
|
54
|
+
assert_equal([9.99, -1e100] , result)
|
55
|
+
|
56
|
+
result = "9.99 1.234e56 -1e100".sscanf "%a %e %*g"
|
57
|
+
assert_equal([9.99, 1.234e56] , result)
|
58
|
+
|
59
|
+
result = "9.99 1.234e56 -1e100".sscanf "%A %E %G"
|
60
|
+
assert_equal([9.99, 1.234e56, -1e100] , result)
|
61
|
+
|
62
|
+
result = "9.99 1.234e56 -1e100".sscanf "%A %*E %G"
|
63
|
+
assert_equal([9.99, -1e100] , result)
|
64
|
+
|
44
65
|
result = "9.99 1.234e56 -1e100".sscanf "%f %f %f"
|
45
66
|
assert_equal([9.99, 1.234e56, -1e100] , result)
|
46
67
|
|
68
|
+
result = "9.99 1.234e56 -1e100".sscanf "%F %*F %F"
|
69
|
+
assert_equal([9.99, -1e100] , result)
|
70
|
+
|
47
71
|
result = "85% 75%".sscanf "%f%% %f%%"
|
48
72
|
assert_equal([85, 75] , result)
|
49
73
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_sscanf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Camilleri
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: format_engine
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- LICENSE.txt
|
107
107
|
- README.md
|
108
108
|
- Rakefile
|
109
|
+
- bench/bench.rb
|
109
110
|
- lib/ruby_sscanf.rb
|
110
111
|
- lib/ruby_sscanf/version.rb
|
111
112
|
- ruby_sscanf.gemspec
|
@@ -135,3 +136,4 @@ signing_key:
|
|
135
136
|
specification_version: 4
|
136
137
|
summary: A string parser.
|
137
138
|
test_files: []
|
139
|
+
has_rdoc:
|