ruby_sscanf 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9b3aff8ecf681b1e92152004c653cfdc2925206d
4
- data.tar.gz: e17f0677bae7aa9fa6eefb4307bab0f37f1490d2
3
+ metadata.gz: ec28a950c28b1a0af3450694cb4c79bcee8847c1
4
+ data.tar.gz: 1f37bacdef6e1e8b04b34614f50e925c771e49d8
5
5
  SHA512:
6
- metadata.gz: d7fc559a57dfdf39e518d42c9fed379003c775e6faf10ddcc0dffeaebd308a3e7d27b6bc4b3dcd7550cba3b4776d9c7d971c576137b5de58efbb278b324bfa86
7
- data.tar.gz: 018dbf925c84f151a2186db83f99e2056c7f14043009fc3d19f5b3d4e4241b5c6dfeb266c5332a5ff7b0371aab9051f18fed27da4ea128a086caeb5292648e43
6
+ metadata.gz: 826961429f3d5ada3a4e2b4a21313d4187abe41e2c2c0313c8684b3755e71a615df2550381dd996df38acb3887db909ce1dd7bfca125d8c96c7dd55d14f73463
7
+ data.tar.gz: 3193a380491b336751702198518cc956273234101b04acce068307c82900e410dfdcb126931bb951caa681fc4d60097b9383f881e9ecfbe7e41c0031152ecb6b
data/README.md CHANGED
@@ -32,23 +32,30 @@ format string is a description of that format. The output of the sscanf method
32
32
  is an array of data extracted from the input string.
33
33
 
34
34
  The format string consists of literal string components and format specifiers.
35
+ During execution of the sscanf method, each element in the format string is
36
+ used to find the corresponding data in the input string, optionally placing
37
+ the extracted data in the aforementioned output array. If a format element
38
+ cannot be matched to input data, processing stops at that point. Otherwise
39
+ processing continues until all the format elements are done.
35
40
 
36
41
  Literal string components match themselves in the input string. If the literal
37
- has a trailing space, then this matches zero or more spaces. The special
38
- sequence '%%' matches one '%'.
42
+ has a trailing space, then this matches zero or more spaces.
43
+ The special sequence '%%' matches one '%' in the input string.
39
44
 
40
45
  The layout of a format specifier is:
41
46
 
42
47
  %[skip_flag][width]format
43
48
 
44
49
  * The % sign is the lead-in character.
45
- * The optional skip flag, the * causes any data extracted to be ignored.
46
- * The width field is an integer field that determines the amount of text to be
50
+ * The optional skip flag, the *, causes any data extracted to be ignored.
51
+ * The width field is an integer that determines the amount of text to be
47
52
  parsed.
48
53
  * The format field determines the type of data being parsed.
49
54
 
50
55
  The supported format field values are:
51
56
  <br>
57
+ * a,e,f,g,A,E,F,G - Scan for an (optionally signed) floating point or
58
+ scientific notation number.
52
59
  * b - Scan for an (optionally signed) binary number with an optional
53
60
  leading '0b' or '0B'.
54
61
  * c - Grab the next character. If a positive width is specified, grab width
@@ -56,7 +63,6 @@ characters. For a negative width, grab characters to the position from the
56
63
  end of the input. For example a width of -1 will grab all of the remaining
57
64
  input data.
58
65
  * d - Scan for an (optionally signed) decimal number.
59
- * f - Scan for an (optionally signed) floating point number.
60
66
  * i - Scan for an (optionally signed) integer. If the number begins with '0x'
61
67
  or '0X', process hexadecimal; with '0b' or '0B', process binary, if '0', '0o',
62
68
  or '0O', process octal, else process decimal.
@@ -70,7 +76,7 @@ or "...".
70
76
  [+-]?decimal/decimal[r]?
71
77
  * s - Scan for a space terminated string.
72
78
  * u - Scan for a decimal number.
73
- * x - Scan for an (optionally signed) hexadecimal number with an optional
79
+ * x,X - Scan for an (optionally signed) hexadecimal number with an optional
74
80
  leading '0x' or '0X'.
75
81
  * [chars] - Scan for a contiguous string of characters in the set [chars].
76
82
  * [^chars] - Scan for a contiguous string of characters not in the set [^chars]
@@ -78,56 +84,80 @@ leading '0x' or '0X'.
78
84
  ## Examples
79
85
  Here are a few exmaples of the sscanf method in action.
80
86
 
87
+ ```ruby
81
88
  "12 34 -56".sscanf "%d %2d %4d"
82
- <br>returns [12, 34, -56]
89
+ returns [12, 34, -56]
83
90
 
84
91
  "255 0b11111111 0377 0xFF 0 ".sscanf "%i %i %i %i %i"
85
- <br>returns [255, 255, 255, 255, 0]
92
+ returns [255, 255, 255, 255, 0]
86
93
 
87
94
  "7 10 377".sscanf "%o %o %o"
88
- <br>returns [7, 8, 255]
95
+ returns [7, 8, 255]
89
96
 
90
97
  "10 10011 11110000".sscanf "%b %b %b"
91
- <br>returns [2, 19, 240]
98
+ returns [2, 19, 240]
92
99
 
93
100
  "0 F FF FFF FFFF".sscanf "%x %x %x %x %x"
94
- <br>returns [0, 15, 255, 4095, 65535]
101
+ returns [0, 15, 255, 4095, 65535]
95
102
 
96
103
  "Hello Silly World".sscanf "%s %*s %s"
97
- <br>returns ["Hello", "World"]
104
+ returns ["Hello", "World"]
98
105
 
99
106
  "Hello Silly World".sscanf "%5c %*5c %5c"
100
- <br>returns ["Hello", "World"]
107
+ returns ["Hello", "World"]
101
108
 
102
109
  "42 The secret is X".sscanf "%i %-1c"
103
- <br>returns [42, "The secret is X"]
110
+ returns [42, "The secret is X"]
104
111
 
105
112
  "42 The secret is X".sscanf "%i %-2c%c"
106
- <br>returns [42, "The secret is ", "X"]
113
+ returns [42, "The secret is ", "X"]
107
114
 
108
115
  "42 The secret is X".sscanf "%i %*-2c%c"
109
- <br>returns [42, "X"]
116
+ returns [42, "X"]
110
117
 
111
118
  "9.99 1.234e56 -1e100".sscanf "%f %f %f"
112
- <br>returns [9.99, 1.234e56, -1e100]
119
+ returns [9.99, 1.234e56, -1e100]
113
120
 
114
121
  "85% 75%".sscanf "%f%% %f%%"
115
- <br>returns [85, 75]
122
+ returns [85, 75]
116
123
 
117
124
  "12 34 -56".sscanf "%u %u %u"
118
- <br>returns [12, 34]
125
+ returns [12, 34]
119
126
 
120
127
  "1/2 3/4r -5/6".sscanf "%r %r %r"
121
- <br>returns ['1/2'.to_r, '3/4'.to_r, '-5/6'.to_r]
128
+ returns ['1/2'.to_r, '3/4'.to_r, '-5/6'.to_r]
122
129
 
123
130
  "1+2i 3+4j -5e10-6.2i".sscanf "%j %j %j"
124
- <br>returns [Complex('1+2i'), Complex('3+4j'), Complex('-5e10-6.2i')]
131
+ returns [Complex('1+2i'), Complex('3+4j'), Complex('-5e10-6.2i')]
125
132
 
126
133
  "'quote' 'silly' \"un quote\" 'a \\'' ".sscanf "%q %*q %q %q"
127
- <br>returns ["quote", "un quote", "a '"]
134
+ returns ["quote", "un quote", "a '"]
128
135
 
129
136
  "a b c".sscanf "%[a] %[b] %[c]"
130
- <br>returns ["a", "b", "c"]
137
+ returns ["a", "b", "c"]
138
+ ```
139
+
140
+ ## Benchmarks
141
+
142
+ I ran a test just to make sure that ruby_sscanf was not terribly
143
+ under-performant when compared to the ruby standard library version. I was
144
+ please to see that in fact ruby_sscanf was faster. Here are the results:
145
+
146
+ Calculating -------------------------------------
147
+ Scan strings with ruby_sscanf
148
+ 1.520k i/100ms
149
+ Scan strings with scanf
150
+ 308.000 i/100ms
151
+ -------------------------------------------------
152
+ Scan strings with ruby_sscanf
153
+ 15.844k (± 5.2%) i/s - 79.040k
154
+ Scan strings with scanf
155
+ 3.127k (± 4.2%) i/s - 15.708k
156
+
157
+ Comparison:
158
+ Scan strings with ruby_sscanf: 15843.8 i/s
159
+ Scan strings with scanf: 3126.7 i/s - 5.07x slower
160
+
131
161
 
132
162
  ## Contributing
133
163
 
data/bench/bench.rb ADDED
@@ -0,0 +1,18 @@
1
+ require "benchmark/ips"
2
+ require 'scanf'
3
+ require 'ruby_sscanf'
4
+
5
+ def use_scanf
6
+ '12 34 56 89 1.234 1.0e10'.scanf('%d %d %d %d %f %f')
7
+ end
8
+
9
+ def use_ruby_sscanf
10
+ '12 34 56 89 1.234 1.0e10'.sscanf('%d %d %d %d %f %f')
11
+ end
12
+
13
+ Benchmark.ips do |x|
14
+ x.report("Scan strings with ruby_sscanf") { use_ruby_sscanf }
15
+ x.report("Scan strings with scanf") { use_scanf }
16
+ x.compare!
17
+ end
18
+
data/lib/ruby_sscanf.rb CHANGED
@@ -1,61 +1,118 @@
1
-
2
1
  require 'format_engine'
3
2
  require_relative 'ruby_sscanf/version'
4
3
 
4
+ #The String class is monkey patched to support sscanf.
5
5
  class String
6
6
 
7
- DECIMAL = /[+-]?\d+/
8
- HEX = /[+-]?(0[xX])?\h+/
9
- OCTAL = /[+-]?(0[oO])?[0-7]+/
10
- BINARY = /[+-]?(0[bB])?[01]+/
11
- INTEGER = /[+-]?((0[xX]\h+)|(0[bB][01]+)|(0[oO]?[0-7]*)|([1-9]\d*))/
12
- FLOAT = /[+-]?\d+(\.\d+)?([eE][+-]?\d+)?/
13
- RATIONAL = /[+-]?\d+\/\d+(r)?/
14
- COMPLEX = %r{(?<num> \d+(\.\d+)?([eE][+-]?\d+)?){0}
15
- [+-]?\g<num>[+-]\g<num>[ij]
16
- }x
17
- QUOTED = /("([^\\"]|\\.)*")|('([^\\']|\\.)*')/
18
-
19
- #Get the parsing engine.
7
+ #A regular expression for decimal integers.
8
+ RSF_DECIMAL = /[+-]?\d+/
9
+
10
+ #A regular expression for unsigned decimal integers.
11
+ RSF_UNSIGNED = /[+]?\d+/
12
+
13
+ #A regular expression for hexadecimal integers.
14
+ RSF_HEX = /[+-]?(0[xX])?\h+/
15
+ RSF_HEX_PARSE = lambda {parse(RSF_HEX) ? dst << found.to_i(16) : :break}
16
+ RSF_HEX_SKIP = lambda {parse(RSF_HEX) || :break}
17
+
18
+ #A regular expression for octal integers.
19
+ RSF_OCTAL = /[+-]?(0[oO])?[0-7]+/
20
+
21
+ #A regular expression for binary integers.
22
+ RSF_BINARY = /[+-]?(0[bB])?[01]+/
23
+
24
+ #A regular expression for flexible base integers.
25
+ RSF_INTEGER = /[+-]?((0[xX]\h+)|(0[bB][01]+)|(0[oO]?[0-7]*)|([1-9]\d*))/
26
+
27
+ #A regular expression for floating point and scientific notation numbers.
28
+ RSF_FLOAT = /[+-]?\d+(\.\d+)?([eE][+-]?\d+)?/
29
+ RSF_FLOAT_PARSE = lambda {parse(RSF_FLOAT) ? dst << found.to_f : :break}
30
+ RSF_FLOAT_SKIP = lambda {parse(RSF_FLOAT) || :break}
31
+
32
+ #A regular expression for rational numbers.
33
+ RSF_RATIONAL = /[+-]?\d+\/\d+(r)?/
34
+
35
+ #A regular expression for complex numbers.
36
+ RSF_COMPLEX = %r{(?<num> \d+(\.\d+)?([eE][+-]?\d+)?){0}
37
+ [+-]?\g<num>[+-]\g<num>[ij]
38
+ }x
39
+
40
+ #A regular expression for a string.
41
+ RSF_STRING = /\S+/
42
+
43
+ #A regular expression for quoted strings.
44
+ RSF_QUOTED = /("([^\\"]|\\.)*")|('([^\\']|\\.)*')/
45
+
46
+ #Get the parsing engine. This is cached on a per-thread basis. That is to
47
+ #say, each thread gets its own \FormatEngine::Engine instance.
20
48
  def self.get_engine
21
49
  Thread.current[:ruby_sscanf_engine] ||= FormatEngine::Engine.new(
22
- "%b" => lambda {parse(BINARY) ? dst << found.to_i(2) : :break},
23
- "%*b" => lambda {parse(BINARY) || :break},
50
+ "%a" => RSF_FLOAT_PARSE,
51
+ "%*a" => RSF_FLOAT_SKIP,
52
+
53
+ "%A" => RSF_FLOAT_PARSE,
54
+ "%*A" => RSF_FLOAT_SKIP,
55
+
56
+ "%b" => lambda {parse(RSF_BINARY) ? dst << found.to_i(2) : :break},
57
+ "%*b" => lambda {parse(RSF_BINARY) || :break},
24
58
 
25
59
  "%c" => lambda {dst << grab},
26
60
  "%*c" => lambda {grab},
27
61
 
28
- "%d" => lambda {parse(DECIMAL) ? dst << found.to_i : :break},
29
- "%*d" => lambda {parse(DECIMAL) || :break},
62
+ "%d" => lambda {parse(RSF_DECIMAL) ? dst << found.to_i : :break},
63
+ "%*d" => lambda {parse(RSF_DECIMAL) || :break},
64
+
65
+ "%e" => RSF_FLOAT_PARSE,
66
+ "%*e" => RSF_FLOAT_SKIP,
67
+
68
+ "%E" => RSF_FLOAT_PARSE,
69
+ "%*E" => RSF_FLOAT_SKIP,
30
70
 
31
- "%f" => lambda {parse(FLOAT) ? dst << found.to_f : :break},
32
- "%*f" => lambda {parse(FLOAT) || :break},
71
+ "%f" => RSF_FLOAT_PARSE,
72
+ "%*f" => RSF_FLOAT_SKIP,
33
73
 
34
- "%i" => lambda {parse(INTEGER) ? dst << found.to_i(0) : :break},
35
- "%*i" => lambda {parse(INTEGER) || :break},
74
+ "%F" => RSF_FLOAT_PARSE,
75
+ "%*F" => RSF_FLOAT_SKIP,
36
76
 
37
- "%j" => lambda {parse(COMPLEX) ? dst << Complex(found) : :break},
38
- "%*j" => lambda {parse(COMPLEX) || :break},
77
+ "%g" => RSF_FLOAT_PARSE,
78
+ "%*g" => RSF_FLOAT_SKIP,
39
79
 
40
- "%o" => lambda {parse(OCTAL) ? dst << found.to_i(8) : :break},
41
- "%*o" => lambda {parse(OCTAL) || :break},
80
+ "%G" => RSF_FLOAT_PARSE,
81
+ "%*G" => RSF_FLOAT_SKIP,
82
+
83
+ "%i" => lambda {parse(RSF_INTEGER) ? dst << found.to_i(0) : :break},
84
+ "%*i" => lambda {parse(RSF_INTEGER) || :break},
85
+
86
+ "%j" => lambda {parse(RSF_COMPLEX) ? dst << Complex(found) : :break},
87
+ "%*j" => lambda {parse(RSF_COMPLEX) || :break},
88
+
89
+ "%o" => lambda {parse(RSF_OCTAL) ? dst << found.to_i(8) : :break},
90
+ "%*o" => lambda {parse(RSF_OCTAL) || :break},
42
91
 
43
92
  "%q" => lambda do
44
- parse(QUOTED) ? dst << found[1..-2].gsub(/\\./) {|seq| seq[-1]} : :break
93
+ if parse(RSF_QUOTED)
94
+ dst << found[1..-2].gsub(/\\./) {|seq| seq[-1]}
95
+ else
96
+ :break
97
+ end
45
98
  end,
46
- "%*q" => lambda {parse(QUOTED) || :break},
47
99
 
48
- "%r" => lambda {parse(RATIONAL) ? dst << found.to_r : :break},
49
- "%*r" => lambda {parse(RATIONAL) || :break},
100
+ "%*q" => lambda {parse(RSF_QUOTED) || :break},
101
+
102
+ "%r" => lambda {parse(RSF_RATIONAL) ? dst << found.to_r : :break},
103
+ "%*r" => lambda {parse(RSF_RATIONAL) || :break},
104
+
105
+ "%s" => lambda {parse(RSF_STRING) ? dst << found : :break},
106
+ "%*s" => lambda {parse(RSF_STRING) || :break},
50
107
 
51
- "%s" => lambda {parse(/\S+/) ? dst << found : :break},
52
- "%*s" => lambda {parse(/\S+/) || :break},
108
+ "%u" => lambda {parse(RSF_UNSIGNED) ? dst << found.to_i : :break},
109
+ "%*u" => lambda {parse(RSF_UNSIGNED) || :break},
53
110
 
54
- "%u" => lambda {parse(/\d+/) ? dst << found.to_i : :break},
55
- "%*u" => lambda {parse(/\d+/) || :break},
111
+ "%x" => RSF_HEX_PARSE,
112
+ "%*x" => RSF_HEX_SKIP,
56
113
 
57
- "%x" => lambda {parse(HEX) ? dst << found.to_i(16) : :break},
58
- "%*x" => lambda {parse(HEX) || :break},
114
+ "%X" => RSF_HEX_PARSE,
115
+ "%*X" => RSF_HEX_SKIP,
59
116
 
60
117
  "%[" => lambda {parse(fmt.regex) ? dst << found : :break},
61
118
  "%*[" => lambda {parse(fmt.regex) || :break})
@@ -1,3 +1,5 @@
1
+ #The ruby_sscanf doesn't really live here.
1
2
  module RubySscanf
2
- VERSION = "0.1.1"
3
+ #The gem's version.
4
+ VERSION = "0.1.2"
3
5
  end
data/tests/scan_tests.rb CHANGED
@@ -26,6 +26,9 @@ class ScanTester < Minitest::Test
26
26
  result = "0 F FF FFF FFFF".sscanf "%x %x %x %x %x"
27
27
  assert_equal([0, 15, 255, 4095, 65535] , result)
28
28
 
29
+ result = "0 F FF FFF FFFF".sscanf "%X %*x %*X %x %X"
30
+ assert_equal([0, 4095, 65535] , result)
31
+
29
32
  result = "Hello Silly World".sscanf "%s %*s %s"
30
33
  assert_equal(["Hello", "World"] , result)
31
34
 
@@ -41,9 +44,30 @@ class ScanTester < Minitest::Test
41
44
  result = "42 The secret is X".sscanf "%i %*-2c%c"
42
45
  assert_equal([42, "X"] , result)
43
46
 
47
+ result = "9.99 1.234e56 -1e100".sscanf "%a %e %g"
48
+ assert_equal([9.99, 1.234e56, -1e100] , result)
49
+
50
+ result = "9.99 1.234e56 -1e100".sscanf "%*a %e %g"
51
+ assert_equal([1.234e56, -1e100] , result)
52
+
53
+ result = "9.99 1.234e56 -1e100".sscanf "%a %*e %g"
54
+ assert_equal([9.99, -1e100] , result)
55
+
56
+ result = "9.99 1.234e56 -1e100".sscanf "%a %e %*g"
57
+ assert_equal([9.99, 1.234e56] , result)
58
+
59
+ result = "9.99 1.234e56 -1e100".sscanf "%A %E %G"
60
+ assert_equal([9.99, 1.234e56, -1e100] , result)
61
+
62
+ result = "9.99 1.234e56 -1e100".sscanf "%A %*E %G"
63
+ assert_equal([9.99, -1e100] , result)
64
+
44
65
  result = "9.99 1.234e56 -1e100".sscanf "%f %f %f"
45
66
  assert_equal([9.99, 1.234e56, -1e100] , result)
46
67
 
68
+ result = "9.99 1.234e56 -1e100".sscanf "%F %*F %F"
69
+ assert_equal([9.99, -1e100] , result)
70
+
47
71
  result = "85% 75%".sscanf "%f%% %f%%"
48
72
  assert_equal([85, 75] , result)
49
73
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_sscanf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Camilleri
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-06 00:00:00.000000000 Z
11
+ date: 2016-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: format_engine
@@ -106,6 +106,7 @@ files:
106
106
  - LICENSE.txt
107
107
  - README.md
108
108
  - Rakefile
109
+ - bench/bench.rb
109
110
  - lib/ruby_sscanf.rb
110
111
  - lib/ruby_sscanf/version.rb
111
112
  - ruby_sscanf.gemspec
@@ -135,3 +136,4 @@ signing_key:
135
136
  specification_version: 4
136
137
  summary: A string parser.
137
138
  test_files: []
139
+ has_rdoc: