unicode_scanner 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.rvmrc +1 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +34 -0
- data/LICENSE.txt +20 -0
- data/README.md +45 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/lib/unicode_scanner.rb +655 -0
- data/spec/spec_helper.rb +12 -0
- data/spec/unicode_scanner_spec.rb +206 -0
- data/unicode_scanner.gemspec +64 -0
- metadata +143 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm 1.9.3@scanner --create
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
diff-lcs (1.1.3)
|
5
|
+
git (1.2.5)
|
6
|
+
jeweler (1.8.4)
|
7
|
+
bundler (~> 1.0)
|
8
|
+
git (>= 1.2.5)
|
9
|
+
rake
|
10
|
+
rdoc
|
11
|
+
json (1.7.3)
|
12
|
+
rake (0.9.2.2)
|
13
|
+
rdoc (3.12)
|
14
|
+
json (~> 1.4)
|
15
|
+
redcarpet (2.1.1)
|
16
|
+
rspec (2.11.0)
|
17
|
+
rspec-core (~> 2.11.0)
|
18
|
+
rspec-expectations (~> 2.11.0)
|
19
|
+
rspec-mocks (~> 2.11.0)
|
20
|
+
rspec-core (2.11.0)
|
21
|
+
rspec-expectations (2.11.1)
|
22
|
+
diff-lcs (~> 1.1.3)
|
23
|
+
rspec-mocks (2.11.1)
|
24
|
+
yard (0.8.2.1)
|
25
|
+
|
26
|
+
PLATFORMS
|
27
|
+
ruby
|
28
|
+
|
29
|
+
DEPENDENCIES
|
30
|
+
bundler
|
31
|
+
jeweler
|
32
|
+
redcarpet
|
33
|
+
rspec
|
34
|
+
yard
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Tim Morgan
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
Unicode String Scanner
|
2
|
+
======================
|
3
|
+
|
4
|
+
A Unicode-aware implementation of Ruby's `StringScanner`.
|
5
|
+
|
6
|
+
| | |
|
7
|
+
|:------------|:--------------------------------|
|
8
|
+
| **Author** | Tim Morgan |
|
9
|
+
| **Version** | 1.0 (Jul 11, 2012) |
|
10
|
+
| **License** | Released under the MIT license. |
|
11
|
+
|
12
|
+
About
|
13
|
+
-----
|
14
|
+
|
15
|
+
Did you know that `StringScanner` splits codepoints? Neither did I. This one
|
16
|
+
doesn't.
|
17
|
+
|
18
|
+
**When would I want to use this?** When you want to use `StringScanner` on a
|
19
|
+
Unicode (UTF-_n_) string.
|
20
|
+
|
21
|
+
**When would I _not_ want to use this?** If you're interested in speed. This is
|
22
|
+
slower than StringScanner because a) it's not written in native C, and b) it's
|
23
|
+
slower to traverse Unicode strings anyway because characters can have varying
|
24
|
+
byte sizes.
|
25
|
+
|
26
|
+
Installation
|
27
|
+
------------
|
28
|
+
|
29
|
+
Simply add this gem to your project's `Gemfile`:
|
30
|
+
|
31
|
+
```` ruby
|
32
|
+
gem 'unicode_scanner'
|
33
|
+
````
|
34
|
+
|
35
|
+
Usage
|
36
|
+
-----
|
37
|
+
|
38
|
+
The `UnicodeScanner` object responds to exactly the same API as
|
39
|
+
[StringScanner](http://ruby-doc.org/stdlib-1.9.3/libdoc/strscan/rdoc/StringScanner.html),
|
40
|
+
with the exception of the following methods:
|
41
|
+
|
42
|
+
* `getbyte`
|
43
|
+
* any obsolete methods
|
44
|
+
|
45
|
+
For more information, see the {UnicodeScanner} class documentation.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "unicode_scanner"
|
18
|
+
gem.homepage = "http://github.com/RISCfuture/unicode_scanner"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Unicode-aware implementation of StringScanner}
|
21
|
+
gem.description = %Q{An implementation of StringScanner that doesn't split multibyte characters.}
|
22
|
+
gem.email = "git@timothymorgan.info"
|
23
|
+
gem.authors = ["Tim Morgan"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
task default: :spec
|
35
|
+
|
36
|
+
require 'yard'
|
37
|
+
|
38
|
+
# bring sexy back (sexy == tables)
|
39
|
+
module YARD::Templates::Helpers::HtmlHelper
|
40
|
+
def html_markup_markdown(text)
|
41
|
+
markup_class(:markdown).new(text, :gh_blockcode, :fenced_code, :autolink, :tables).to_html
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
YARD::Rake::YardocTask.new('doc') do |doc|
|
46
|
+
doc.options << '-m' << 'markdown' << '-M' << 'redcarpet'
|
47
|
+
doc.options << '--protected' << '--no-private'
|
48
|
+
doc.options << '-r' << 'README.md'
|
49
|
+
doc.options << '-o' << 'doc'
|
50
|
+
doc.options << '--title' << 'Unicode String Scanner Documentation'
|
51
|
+
|
52
|
+
doc.files = %w( lib/**/* README.md )
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
@@ -0,0 +1,655 @@
|
|
1
|
+
# UnicodeScanner provides for Unicode-aware lexical scanning operations on a
|
2
|
+
# `String`. Here is an example of its usage:
|
3
|
+
#
|
4
|
+
# ```` ruby
|
5
|
+
# s = UnicodeScanner.new('This is an example string')
|
6
|
+
# s.eos? # -> false
|
7
|
+
#
|
8
|
+
# p s.scan(/\w+/) # -> "This"
|
9
|
+
# p s.scan(/\w+/) # -> nil
|
10
|
+
# p s.scan(/\s+/) # -> " "
|
11
|
+
# p s.scan(/\s+/) # -> nil
|
12
|
+
# p s.scan(/\w+/) # -> "is"
|
13
|
+
# s.eos? # -> false
|
14
|
+
#
|
15
|
+
# p s.scan(/\s+/) # -> " "
|
16
|
+
# p s.scan(/\w+/) # -> "an"
|
17
|
+
# p s.scan(/\s+/) # -> " "
|
18
|
+
# p s.scan(/\w+/) # -> "example"
|
19
|
+
# p s.scan(/\s+/) # -> " "
|
20
|
+
# p s.scan(/\w+/) # -> "string"
|
21
|
+
# s.eos? # -> true
|
22
|
+
#
|
23
|
+
# p s.scan(/\s+/) # -> nil
|
24
|
+
# p s.scan(/\w+/) # -> nil
|
25
|
+
# ````
|
26
|
+
#
|
27
|
+
# Scanning a string means remembering the position of a _scan pointer_, which is
|
28
|
+
# just an index. The point of scanning is to move forward a bit at a time, so
|
29
|
+
# matches are sought after the scan pointer; usually immediately after it.
|
30
|
+
#
|
31
|
+
# Given the string "test string", here are the pertinent scan pointer positions:
|
32
|
+
#
|
33
|
+
# ````
|
34
|
+
# t e s t s t r i n g
|
35
|
+
# 0 1 2 ... 1
|
36
|
+
# 0
|
37
|
+
# ````
|
38
|
+
#
|
39
|
+
# When you {#scan} for a pattern (a regular expression), the match must occur at
|
40
|
+
# the character after the scan pointer. If you use {#scan_until}, then the
|
41
|
+
# match can occur anywhere after the scan pointer. In both cases, the scan
|
42
|
+
# pointer moves _just beyond_ the last character of the match, ready to scan
|
43
|
+
# again from the next character onwards. This is demonstrated by the example
|
44
|
+
# above.
|
45
|
+
#
|
46
|
+
# Method Categories
|
47
|
+
# -----------------
|
48
|
+
#
|
49
|
+
# There are other methods besides the plain scanners. You can look ahead in the
|
50
|
+
# string without actually scanning. You can access the most recent match. You
|
51
|
+
# can modify the string being scanned, reset or terminate the scanner, find out
|
52
|
+
# or change the position of the scan pointer, skip ahead, and so on.
|
53
|
+
#
|
54
|
+
# ### Advancing the Scan Pointer
|
55
|
+
#
|
56
|
+
# - {#getch}
|
57
|
+
# - {#scan}
|
58
|
+
# - {#scan_until}
|
59
|
+
# - {#skip}
|
60
|
+
# - {#skip_until}
|
61
|
+
#
|
62
|
+
# ### Looking Ahead
|
63
|
+
#
|
64
|
+
# - {#check}
|
65
|
+
# - {#check_until}
|
66
|
+
# - {#exist?}
|
67
|
+
# - {#match?}
|
68
|
+
# - {#peek}
|
69
|
+
#
|
70
|
+
# ### Finding Where we Are
|
71
|
+
#
|
72
|
+
# - {#beginning_of_line?} ({#bol?})
|
73
|
+
# - {#eos?}
|
74
|
+
# - {#rest_size}
|
75
|
+
# - {#pos}
|
76
|
+
#
|
77
|
+
# ### Setting Where we Are
|
78
|
+
#
|
79
|
+
# - {#reset}
|
80
|
+
# - {#terminate}
|
81
|
+
# - {#pos=}
|
82
|
+
#
|
83
|
+
# ### Match Data
|
84
|
+
#
|
85
|
+
# - {#matched}
|
86
|
+
# - {#matched?}
|
87
|
+
# - {#matched_size}
|
88
|
+
# - {#[]}
|
89
|
+
# - {#pre_match}
|
90
|
+
# - {#post_match}
|
91
|
+
#
|
92
|
+
# ### Miscellaneous
|
93
|
+
#
|
94
|
+
# - {#<<}
|
95
|
+
# - {#concat}
|
96
|
+
# - {#string}
|
97
|
+
# - {#string=}
|
98
|
+
# - {#unscan}
|
99
|
+
#
|
100
|
+
# There are aliases to several of the methods.
|
101
|
+
|
102
|
+
class UnicodeScanner
|
103
|
+
INSPECT_LENGTH = 5
|
104
|
+
|
105
|
+
# Creates a new UnicodeScanner object to scan over the given `string`.
|
106
|
+
#
|
107
|
+
# @param [String] string The string to iterate over.
|
108
|
+
|
109
|
+
def initialize(string)
|
110
|
+
@string = string
|
111
|
+
@matches = nil
|
112
|
+
@matched = false
|
113
|
+
@current = 0
|
114
|
+
@previous = 0
|
115
|
+
end
|
116
|
+
|
117
|
+
# Appends `str` to the string being scanned. This method does not affect scan
|
118
|
+
# pointer.
|
119
|
+
#
|
120
|
+
# @param [String] str The string to append.
|
121
|
+
#
|
122
|
+
# @example
|
123
|
+
# s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
124
|
+
# s.scan(/Fri /)
|
125
|
+
# s << " +1000 GMT"
|
126
|
+
# s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
|
127
|
+
# s.scan(/Dec/) # -> "Dec"
|
128
|
+
|
129
|
+
def concat(str)
|
130
|
+
@string.concat str
|
131
|
+
end
|
132
|
+
|
133
|
+
alias << concat
|
134
|
+
|
135
|
+
# Return the <i>n</i>th subgroup in the most recent match.
|
136
|
+
#
|
137
|
+
# @param [Fixnum] n The index of the subgroup to return.
|
138
|
+
# @return [String, nil] The subgroup, if it exists.
|
139
|
+
#
|
140
|
+
# @example
|
141
|
+
# s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
142
|
+
# s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
|
143
|
+
# s[0] # -> "Fri Dec 12 "
|
144
|
+
# s[1] # -> "Fri"
|
145
|
+
# s[2] # -> "Dec"
|
146
|
+
# s[3] # -> "12"
|
147
|
+
# s.post_match # -> "1975 14:39"
|
148
|
+
# s.pre_match # -> ""
|
149
|
+
|
150
|
+
def [](n)
|
151
|
+
@matched ? @matches[n] : nil
|
152
|
+
end
|
153
|
+
|
154
|
+
# @return [true, false] `true` iff the scan pointer is at the beginning of the
|
155
|
+
# line.
|
156
|
+
#
|
157
|
+
# @example
|
158
|
+
# s = UnicodeScanner.new("test\ntest\n")
|
159
|
+
# s.bol? # => true
|
160
|
+
# s.scan(/te/)
|
161
|
+
# s.bol? # => false
|
162
|
+
# s.scan(/st\n/)
|
163
|
+
# s.bol? # => true
|
164
|
+
# s.terminate
|
165
|
+
# s.bol? # => true
|
166
|
+
|
167
|
+
def beginning_of_line?
|
168
|
+
return nil if @current > @string.length
|
169
|
+
return true if @current == 0
|
170
|
+
return @string[@current - 1] == "\n"
|
171
|
+
end
|
172
|
+
|
173
|
+
alias bol? beginning_of_line?
|
174
|
+
|
175
|
+
# This returns the value that {#scan} would return, without advancing the scan
|
176
|
+
# pointer. The match register is affected, though.
|
177
|
+
#
|
178
|
+
# Mnemonic: it "checks" to see whether a {#scan} will return a value.
|
179
|
+
#
|
180
|
+
# @param [Regexp] pattern The pattern to scan for.
|
181
|
+
# @return [String, nil] The matched segment, if matched.
|
182
|
+
#
|
183
|
+
# @example
|
184
|
+
# s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
185
|
+
# s.check /Fri/ # -> "Fri"
|
186
|
+
# s.pos # -> 0
|
187
|
+
# s.matched # -> "Fri"
|
188
|
+
# s.check /12/ # -> nil
|
189
|
+
# s.matched # -> nil
|
190
|
+
|
191
|
+
def check(pattern)
|
192
|
+
do_scan pattern, false, true, true
|
193
|
+
end
|
194
|
+
|
195
|
+
# This returns the value that {#scan_until} would return, without advancing
|
196
|
+
# the scan pointer. The match register is affected, though.
|
197
|
+
#
|
198
|
+
# Mnemonic: it "checks" to see whether a {#scan_until} will return a value.
|
199
|
+
#
|
200
|
+
# @param [Regexp] pattern The pattern to scan until reaching.
|
201
|
+
# @return [String, nil] The matched segment, if matched.
|
202
|
+
#
|
203
|
+
# @example
|
204
|
+
# s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
205
|
+
# s.check_until /12/ # -> "Fri Dec 12"
|
206
|
+
# s.pos # -> 0
|
207
|
+
# s.matched # -> 12
|
208
|
+
|
209
|
+
def check_until(pattern)
|
210
|
+
do_scan pattern, false, true, false
|
211
|
+
end
|
212
|
+
|
213
|
+
# @return [true, false] `true` if the scan pointer is at the end of the string.
|
214
|
+
#
|
215
|
+
# @example
|
216
|
+
# s = UnicodeScanner.new('test string')
|
217
|
+
# p s.eos? # => false
|
218
|
+
# s.scan(/test/)
|
219
|
+
# p s.eos? # => false
|
220
|
+
# s.terminate
|
221
|
+
# p s.eos? # => true
|
222
|
+
|
223
|
+
def eos?
|
224
|
+
@current >= @string.length
|
225
|
+
end
|
226
|
+
|
227
|
+
# Looks _ahead_ to see if the `pattern` exists _anywhere_ in the string,
|
228
|
+
# without advancing the scan pointer. This predicates whether a {#scan_until}
|
229
|
+
# will return a value.
|
230
|
+
#
|
231
|
+
# @param [Regexp] pattern The pattern to search for.
|
232
|
+
# @return [true, false] Whether the pattern exists ahead.
|
233
|
+
#
|
234
|
+
# @example
|
235
|
+
# s = UnicodeScanner.new('test string')
|
236
|
+
# s.exist? /s/ # -> 3
|
237
|
+
# s.scan /test/ # -> "test"
|
238
|
+
# s.exist? /s/ # -> 2
|
239
|
+
# s.exist? /e/ # -> nil
|
240
|
+
|
241
|
+
def exist?(pattern)
|
242
|
+
do_scan pattern, false, false, false
|
243
|
+
end
|
244
|
+
|
245
|
+
# Scans one character and returns it.
|
246
|
+
#
|
247
|
+
# @return [String] The character.
|
248
|
+
#
|
249
|
+
# @example
|
250
|
+
# s = UnicodeScanner.new("ab")
|
251
|
+
# s.getch # => "a"
|
252
|
+
# s.getch # => "b"
|
253
|
+
# s.getch # => nil
|
254
|
+
#
|
255
|
+
# $KCODE = 'EUC'
|
256
|
+
# s = UnicodeScanner.new("\2244\2242")
|
257
|
+
# s.getch # => "\244\242" # Japanese hira-kana "A" in EUC-JP
|
258
|
+
# s.getch # => nil
|
259
|
+
|
260
|
+
def getch
|
261
|
+
return nil if eos?
|
262
|
+
do_scan /./u, true, true, true
|
263
|
+
end
|
264
|
+
|
265
|
+
# Returns a string that represents the UnicodeScanner object, showing:
|
266
|
+
#
|
267
|
+
# * the current position
|
268
|
+
# * the size of the string
|
269
|
+
# * the characters surrounding the scan pointer
|
270
|
+
#
|
271
|
+
# @return [String] A description of this object.
|
272
|
+
#
|
273
|
+
# @example
|
274
|
+
# s = ::new("Fri Dec 12 1975 14:39")
|
275
|
+
# s.inspect # -> '#<UnicodeScanner 0/21 @ "Fri D...">'
|
276
|
+
# s.scan_until /12/ # -> "Fri Dec 12"
|
277
|
+
# s.inspect # -> '#<UnicodeScanner 10/21 "...ec 12" @ " 1975...">'
|
278
|
+
|
279
|
+
def inspect
|
280
|
+
return "#<#{self.class.to_s} (uninitialized)>" if @string.nil?
|
281
|
+
return "#<#{self.class.to_s} fin>" if eos?
|
282
|
+
|
283
|
+
if @current == 0
|
284
|
+
return "#<%s %d/%d @ %s>" % [self.class.to_s, @current, @string.length, inspect_after.inspect]
|
285
|
+
end
|
286
|
+
|
287
|
+
"#<%s %d/%d %s @ %s>" % [self.class.to_s, @current, @string.length, inspect_before.inspect, inspect_after.inspect]
|
288
|
+
end
|
289
|
+
|
290
|
+
# Tests whether the given `pattern` is matched from the current scan pointer.
|
291
|
+
# Returns the length of the match, or `nil`. The scan pointer is not advanced.
|
292
|
+
#
|
293
|
+
# @param [Regexp] pattern The pattern to match with.
|
294
|
+
# @return [true, false] Whether the pattern is matched from the scan pointer.
|
295
|
+
#
|
296
|
+
# @example
|
297
|
+
# s = UnicodeScanner.new('test string')
|
298
|
+
# p s.match?(/\w+/) # -> 4
|
299
|
+
# p s.match?(/\w+/) # -> 4
|
300
|
+
# p s.match?(/\s+/) # -> nil
|
301
|
+
|
302
|
+
def match?(pattern)
|
303
|
+
do_scan pattern, false, false, true
|
304
|
+
end
|
305
|
+
|
306
|
+
# @return [String, nil] The last matched string.
|
307
|
+
# @example
|
308
|
+
# s = UnicodeScanner.new('test string')
|
309
|
+
# s.match?(/\w+/) # -> 4
|
310
|
+
# s.matched # -> "test"
|
311
|
+
|
312
|
+
def matched
|
313
|
+
return nil unless @matched
|
314
|
+
@matches[0]
|
315
|
+
end
|
316
|
+
|
317
|
+
# @return [true, false] `true` iff the last match was successful.
|
318
|
+
# @example
|
319
|
+
# s = UnicodeScanner.new('test string')
|
320
|
+
# s.match?(/\w+/) # => 4
|
321
|
+
# s.matched? # => true
|
322
|
+
# s.match?(/\d+/) # => nil
|
323
|
+
# s.matched? # => false
|
324
|
+
|
325
|
+
def matched?() @matched end
|
326
|
+
|
327
|
+
# @return [Fixnum, nil] The size of the most recent match (see {#matched}), or
|
328
|
+
# `nil` if there was no recent match.
|
329
|
+
# @example
|
330
|
+
# s = UnicodeScanner.new('test string')
|
331
|
+
# s.check /\w+/ # -> "test"
|
332
|
+
# s.matched_size # -> 4
|
333
|
+
# s.check /\d+/ # -> nil
|
334
|
+
# s.matched_size # -> nil
|
335
|
+
|
336
|
+
def matched_size
|
337
|
+
return nil unless @matched
|
338
|
+
@matches.end(0) - @matches.begin(0)
|
339
|
+
end
|
340
|
+
|
341
|
+
# Extracts a string corresponding to `string[pos,len]`, without advancing the
|
342
|
+
# scan pointer.
|
343
|
+
#
|
344
|
+
# @param [Fixnum] len The number of characters ahead to peek.
|
345
|
+
# @return [String] The string after the current position.
|
346
|
+
#
|
347
|
+
# @example
|
348
|
+
# s = UnicodeScanner.new('test string')
|
349
|
+
# s.peek(7) # => "test st"
|
350
|
+
# s.peek(7) # => "test st"
|
351
|
+
|
352
|
+
def peek(len)
|
353
|
+
return '' if eos?
|
354
|
+
@string[@current, len]
|
355
|
+
end
|
356
|
+
|
357
|
+
# Returns the byte position of the scan pointer. In the 'reset' position, this
|
358
|
+
# value is zero. In the 'terminated' position (i.e. the string is exhausted),
|
359
|
+
# this value is the bytesize of the string.
|
360
|
+
#
|
361
|
+
# In short, it's a 0-based index into the string.
|
362
|
+
#
|
363
|
+
# @return [Fixnum] The current scan position.
|
364
|
+
#
|
365
|
+
# @example
|
366
|
+
# s = UnicodeScanner.new('test string')
|
367
|
+
# s.pos # -> 0
|
368
|
+
# s.scan_until /str/ # -> "test str"
|
369
|
+
# s.pos # -> 8
|
370
|
+
# s.terminate # -> #<UnicodeScanner fin>
|
371
|
+
# s.pos # -> 11
|
372
|
+
|
373
|
+
def pos() @current end
|
374
|
+
|
375
|
+
alias pointer pos
|
376
|
+
|
377
|
+
# Set the byte position of the scan pointer.
|
378
|
+
#
|
379
|
+
# @param [Fixnum] n The new position.
|
380
|
+
#
|
381
|
+
# @example
|
382
|
+
# s = UnicodeScanner.new('test string')
|
383
|
+
# s.pos = 7 # -> 7
|
384
|
+
# s.rest # -> "ring"
|
385
|
+
|
386
|
+
def pos=(n)
|
387
|
+
n += @string.length if n < 0
|
388
|
+
raise RangeError, "index out of range" if n < 0
|
389
|
+
raise RangeError, "index out of range" if n > @string.length
|
390
|
+
@current = n
|
391
|
+
end
|
392
|
+
|
393
|
+
# @return [String] The _**post**-match_ (in the regular expression sense) of
|
394
|
+
# the last scan.
|
395
|
+
# @example
|
396
|
+
# s = UnicodeScanner.new('test string')
|
397
|
+
# s.scan(/\w+/) # -> "test"
|
398
|
+
# s.scan(/\s+/) # -> " "
|
399
|
+
# s.pre_match # -> "test"
|
400
|
+
# s.post_match # -> "string"
|
401
|
+
|
402
|
+
def post_match
|
403
|
+
return nil unless @matched
|
404
|
+
@string[@previous + @matches.end(0), @string.length]
|
405
|
+
end
|
406
|
+
|
407
|
+
# @return [String] The _**pre**-match_ (in the regular expression sense) of
|
408
|
+
# the last scan.
|
409
|
+
# @example
|
410
|
+
# s = UnicodeScanner.new('test string')
|
411
|
+
# s.scan(/\w+/) # -> "test"
|
412
|
+
# s.scan(/\s+/) # -> " "
|
413
|
+
# s.pre_match # -> "test"
|
414
|
+
# s.post_match # -> "string"
|
415
|
+
|
416
|
+
def pre_match
|
417
|
+
return nil unless @matched
|
418
|
+
@string[0, @previous + @matches.begin(0)]
|
419
|
+
end
|
420
|
+
|
421
|
+
# Reset the scan pointer (index 0) and clear matching data.
|
422
|
+
|
423
|
+
def reset
|
424
|
+
@current = 0
|
425
|
+
@matched = false
|
426
|
+
end
|
427
|
+
|
428
|
+
# @return [String] The "rest" of the string (i.e. everything after the scan
|
429
|
+
# pointer). If there is no more data (`eos? = true`), it returns `""`.
|
430
|
+
|
431
|
+
def rest
|
432
|
+
return '' if eos?
|
433
|
+
return @string[@current, @string.length]
|
434
|
+
end
|
435
|
+
|
436
|
+
# @return [Fixnum] The value returned by `s.rest.size`.
|
437
|
+
|
438
|
+
def rest_size
|
439
|
+
return 0 if eos?
|
440
|
+
@string.length - @current
|
441
|
+
end
|
442
|
+
|
443
|
+
# Tries to match with `pattern` at the current position. If there's a match,
|
444
|
+
# the scanner advances the "scan pointer" and returns the matched string.
|
445
|
+
# Otherwise, the scanner returns `nil`.
|
446
|
+
#
|
447
|
+
# @param [Regexp] pattern The pattern to match.
|
448
|
+
# @return [String, nil] The string that was matched, if a match was found.
|
449
|
+
#
|
450
|
+
# @example
|
451
|
+
# s = UnicodeScanner.new('test string')
|
452
|
+
# p s.scan(/\w+/) # -> "test"
|
453
|
+
# p s.scan(/\w+/) # -> nil
|
454
|
+
# p s.scan(/\s+/) # -> " "
|
455
|
+
# p s.scan(/\w+/) # -> "string"
|
456
|
+
# p s.scan(/./) # -> nil
|
457
|
+
|
458
|
+
def scan(pattern)
|
459
|
+
do_scan pattern, true, true, true
|
460
|
+
end
|
461
|
+
|
462
|
+
# Tests whether the given `pattern` is matched from the current scan pointer.
|
463
|
+
# Advances the scan pointer if `advance_pointer` is `true`. Returns the
|
464
|
+
# matched string if `return_string` is true. The match register is affected.
|
465
|
+
#
|
466
|
+
# "full" means "scan with full parameters".
|
467
|
+
#
|
468
|
+
# @param [Regexp] pattern The pattern to scan.
|
469
|
+
# @param [true, false] advance_pointer Whether to advance the scan pointer if
|
470
|
+
# a match is found.
|
471
|
+
# @param [true, false] return_string Whether to return the matched segment.
|
472
|
+
# @return [String, Fixnum, nil] The matched segment if `return_string` is
|
473
|
+
# `true`, otherwise the number of characters advanced. `nil` if nothing
|
474
|
+
# matched.
|
475
|
+
|
476
|
+
def scan_full(pattern, advance_pointer, return_string)
|
477
|
+
do_scan pattern, advance_pointer, return_string, true
|
478
|
+
end
|
479
|
+
|
480
|
+
# Scans the string _until_ the `pattern` is matched. Returns the substring up
|
481
|
+
# to and including the end of the match, advancing the scan pointer to that
|
482
|
+
# location. If there is no match, `nil` is returned.
|
483
|
+
#
|
484
|
+
# @param [Regexp] pattern The pattern to match.
|
485
|
+
# @return [String, nil] The segment that matched.
|
486
|
+
#
|
487
|
+
# @example
|
488
|
+
# s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
489
|
+
# s.scan_until(/1/) # -> "Fri Dec 1"
|
490
|
+
# s.pre_match # -> "Fri Dec "
|
491
|
+
# s.scan_until(/XYZ/) # -> nil
|
492
|
+
|
493
|
+
def scan_until(pattern)
|
494
|
+
do_scan pattern, true, true, false
|
495
|
+
end
|
496
|
+
|
497
|
+
# Scans the string `until` the pattern is matched. Advances the scan pointer
|
498
|
+
# if `advance_pointer`, otherwise not. Returns the matched string if
|
499
|
+
# `return_string` is `true`, otherwise returns the number of characters
|
500
|
+
# advanced. This method does affect the match register.
|
501
|
+
#
|
502
|
+
# @param [Regexp] pattern The pattern to scan.
|
503
|
+
# @param [true, false] advance_pointer Whether to advance the scan pointer if
|
504
|
+
# a match is found.
|
505
|
+
# @param [true, false] return_string Whether to return the matched segment.
|
506
|
+
# @return [String, Fixnum, nil] The matched segment if `return_string` is
|
507
|
+
# `true`, otherwise the number of characters advanced. `nil` if nothing
|
508
|
+
# matched.
|
509
|
+
|
510
|
+
def search_full(pattern, advance_pointer, return_string)
|
511
|
+
do_scan pattern, advance_pointer, return_string, false
|
512
|
+
end
|
513
|
+
|
514
|
+
# Attempts to skip over the given `pattern` beginning with the scan pointer.
|
515
|
+
# If it matches, the scan pointer is advanced to the end of the match, and the
|
516
|
+
# length of the match is returned. Otherwise, `nil` is returned.
|
517
|
+
#
|
518
|
+
# It's similar to {#scan}, but without returning the matched string.
|
519
|
+
#
|
520
|
+
# @param [Regexp] pattern The pattern to match.
|
521
|
+
# @return [Fixnum, nil] The number of characters advanced, if matched.
|
522
|
+
#
|
523
|
+
# @example
|
524
|
+
# s = UnicodeScanner.new('test string')
|
525
|
+
# p s.skip(/\w+/) # -> 4
|
526
|
+
# p s.skip(/\w+/) # -> nil
|
527
|
+
# p s.skip(/\s+/) # -> 1
|
528
|
+
# p s.skip(/\w+/) # -> 6
|
529
|
+
# p s.skip(/./) # -> nil
|
530
|
+
|
531
|
+
def skip(pattern)
|
532
|
+
do_scan pattern, true, false, true
|
533
|
+
end
|
534
|
+
|
535
|
+
# Advances the scan pointer until `pattern` is matched and consumed. Returns
|
536
|
+
# the number of characters advanced, or `nil` if no match was found.
|
537
|
+
#
|
538
|
+
# Look ahead to match `pattern`, and advance the scan pointer to the _end_ of
|
539
|
+
# the match. Return the number of characters advanced, or `nil` if the match
|
540
|
+
# was unsuccessful.
|
541
|
+
#
|
542
|
+
# It's similar to {#scan_until}, but without returning the intervening string.
|
543
|
+
#
|
544
|
+
# @param [Regexp] pattern The pattern to match.
|
545
|
+
# @return [Fixnum, nil] The number of characters advanced, if matched.
|
546
|
+
|
547
|
+
def skip_until(pattern)
|
548
|
+
do_scan pattern, true, false, false
|
549
|
+
end
|
550
|
+
|
551
|
+
# @return [String] The string being scanned.
|
552
|
+
|
553
|
+
def string() @string end
|
554
|
+
|
555
|
+
# Changes the string being scanned to `str` and resets the scanner.
|
556
|
+
#
|
557
|
+
# @param [String] str The new string to scan.
|
558
|
+
# @return [String] `str`
|
559
|
+
|
560
|
+
def string=(str)
|
561
|
+
@string = str
|
562
|
+
@matched = false
|
563
|
+
@current = 0
|
564
|
+
str
|
565
|
+
end
|
566
|
+
|
567
|
+
# Set the scan pointer to the end of the string and clear matching data.
|
568
|
+
|
569
|
+
def terminate
|
570
|
+
@current = @string.length
|
571
|
+
@matched = false
|
572
|
+
self
|
573
|
+
end
|
574
|
+
alias clear terminate
|
575
|
+
|
576
|
+
# Set the scan pointer to the previous position. Only one previous position is
|
577
|
+
# remembered, and it changes with each scanning operation.
|
578
|
+
#
|
579
|
+
# @example
|
580
|
+
# s = UnicodeScanner.new('test string')
|
581
|
+
# s.scan(/\w+/) # => "test"
|
582
|
+
# s.unscan
|
583
|
+
# s.scan(/../) # => "te"
|
584
|
+
# s.scan(/\d/) # => nil
|
585
|
+
# s.unscan # ScanError: unscan failed: previous match record not exist
|
586
|
+
|
587
|
+
def unscan
|
588
|
+
raise ScanError, "unscan failed: previous match record not exist" unless @matched
|
589
|
+
@current = @previous
|
590
|
+
@matched = false
|
591
|
+
self
|
592
|
+
end
|
593
|
+
|
594
|
+
private
|
595
|
+
|
596
|
+
def do_scan(regex, advance_pointer, return_string, head_only)
|
597
|
+
raise ArgumentError unless regex.kind_of?(Regexp)
|
598
|
+
|
599
|
+
@matched = false
|
600
|
+
return nil if eos?
|
601
|
+
|
602
|
+
@matches = regex.match(@string[@current, @string.length])
|
603
|
+
return nil unless @matches
|
604
|
+
|
605
|
+
if head_only && @matches.begin(0) > 0
|
606
|
+
@matches = nil
|
607
|
+
return nil
|
608
|
+
end
|
609
|
+
|
610
|
+
@matched = true
|
611
|
+
|
612
|
+
@previous = @current
|
613
|
+
@current += @matches.end(0) if advance_pointer
|
614
|
+
if return_string
|
615
|
+
return @string[@previous, @matches.end(0)]
|
616
|
+
else
|
617
|
+
return @matches.end(0)
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
def inspect_before # inspect1
|
622
|
+
return '' if @current == 0
|
623
|
+
|
624
|
+
str = String.new
|
625
|
+
len = 0
|
626
|
+
|
627
|
+
if @current > INSPECT_LENGTH
|
628
|
+
str << '...'
|
629
|
+
len = INSPECT_LENGTH
|
630
|
+
else
|
631
|
+
len = @current
|
632
|
+
end
|
633
|
+
|
634
|
+
str << @string[@current - len, len]
|
635
|
+
return str
|
636
|
+
end
|
637
|
+
|
638
|
+
def inspect_after # inspect2
|
639
|
+
return '' if eos?
|
640
|
+
|
641
|
+
str = String.new
|
642
|
+
len = @string.length - @current
|
643
|
+
if len > INSPECT_LENGTH
|
644
|
+
len = INSPECT_LENGTH
|
645
|
+
str << @string[@current, len]
|
646
|
+
str << '...'
|
647
|
+
else
|
648
|
+
str << @string[@current, len]
|
649
|
+
end
|
650
|
+
|
651
|
+
return str
|
652
|
+
end
|
653
|
+
end
|
654
|
+
|
655
|
+
class ScanError < StandardError; end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'unicode_scanner'
|
5
|
+
|
6
|
+
# Requires supporting files with custom matchers and macros, etc,
|
7
|
+
# in ./support/ and its subdirectories.
|
8
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
|
12
|
+
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
4
|
+
|
5
|
+
describe UnicodeScanner do
|
6
|
+
it "should pass all the class-level examples" do
|
7
|
+
s = UnicodeScanner.new('This is an example string')
|
8
|
+
s.eos?.should == false
|
9
|
+
|
10
|
+
s.scan(/\w+/).should == "This"
|
11
|
+
s.scan(/\w+/).should == nil
|
12
|
+
s.scan(/\s+/).should == " "
|
13
|
+
s.scan(/\s+/).should == nil
|
14
|
+
s.scan(/\w+/).should == "is"
|
15
|
+
s.eos?.should == false
|
16
|
+
|
17
|
+
s.scan(/\s+/).should == " "
|
18
|
+
s.scan(/\w+/).should == "an"
|
19
|
+
s.scan(/\s+/).should == " "
|
20
|
+
s.scan(/\w+/).should == "example"
|
21
|
+
s.scan(/\s+/).should == " "
|
22
|
+
s.scan(/\w+/).should == "string"
|
23
|
+
s.eos?.should == true
|
24
|
+
|
25
|
+
s.scan(/\s+/).should == nil
|
26
|
+
s.scan(/\w+/).should == nil
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should pass the #concat example" do
|
30
|
+
s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
31
|
+
s.scan(/Fri /)
|
32
|
+
s << " +1000 GMT"
|
33
|
+
s.string.should == "Fri Dec 12 1975 14:39 +1000 GMT"
|
34
|
+
s.scan(/Dec/).should == "Dec"
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should pass the #[] example" do
|
38
|
+
s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
39
|
+
s.scan(/(\w+) (\w+) (\d+) /).should == "Fri Dec 12 "
|
40
|
+
s[0].should == "Fri Dec 12 "
|
41
|
+
s[1].should == "Fri"
|
42
|
+
s[2].should == "Dec"
|
43
|
+
s[3].should == "12"
|
44
|
+
s.post_match.should == "1975 14:39"
|
45
|
+
s.pre_match.should == ""
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should pass the #beginning_of_line? example" do
|
49
|
+
s = UnicodeScanner.new("test\ntest\n")
|
50
|
+
s.bol?.should == true
|
51
|
+
s.scan(/te/)
|
52
|
+
s.bol?.should == false
|
53
|
+
s.scan(/st\n/)
|
54
|
+
s.bol?.should == true
|
55
|
+
s.terminate
|
56
|
+
s.bol?.should == true
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should pass the #check example" do
|
60
|
+
s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
61
|
+
s.check(/Fri/).should == "Fri"
|
62
|
+
s.pos.should == 0
|
63
|
+
s.matched.should == "Fri"
|
64
|
+
s.check(/12/).should == nil
|
65
|
+
s.matched.should == nil
|
66
|
+
end
|
67
|
+
|
68
|
+
it "should pass the #check_until example" do
|
69
|
+
s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
70
|
+
s.check_until(/12/).should == "Fri Dec 12"
|
71
|
+
s.pos.should == 0
|
72
|
+
s.matched.should == "12"
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should pass the #eos? example" do
|
76
|
+
s = UnicodeScanner.new('test string')
|
77
|
+
s.eos?.should == false
|
78
|
+
s.scan(/test/)
|
79
|
+
s.eos?.should == false
|
80
|
+
s.terminate
|
81
|
+
s.eos?.should == true
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should pass the #exist? example" do
|
85
|
+
s = UnicodeScanner.new('test string')
|
86
|
+
s.exist?(/s/).should == 3
|
87
|
+
s.scan(/test/).should == "test"
|
88
|
+
s.exist?(/s/).should == 2
|
89
|
+
s.exist?(/e/).should == nil
|
90
|
+
end
|
91
|
+
|
92
|
+
it "should pass a tweaked version of the #getch example" do
|
93
|
+
s = UnicodeScanner.new("ab")
|
94
|
+
s.getch.should == "a"
|
95
|
+
s.getch.should == "b"
|
96
|
+
s.getch.should == nil
|
97
|
+
|
98
|
+
s = UnicodeScanner.new("ぁ")
|
99
|
+
s.getch.should == "ぁ" # Japanese hira-kana "A" in EUC-JP
|
100
|
+
s.getch.should == nil
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should pass the #inspect example" do
|
104
|
+
s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
105
|
+
s.inspect.should == '#<UnicodeScanner 0/21 @ "Fri D...">'
|
106
|
+
s.scan_until(/12/).should == "Fri Dec 12"
|
107
|
+
s.inspect.should == '#<UnicodeScanner 10/21 "...ec 12" @ " 1975...">'
|
108
|
+
end
|
109
|
+
|
110
|
+
it "should pass the #match? example" do
|
111
|
+
s = UnicodeScanner.new('test string')
|
112
|
+
s.match?(/\w+/).should == 4
|
113
|
+
s.match?(/\w+/).should == 4
|
114
|
+
s.match?(/\s+/).should == nil
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should pass the #matched example" do
|
118
|
+
s = UnicodeScanner.new('test string')
|
119
|
+
s.match?(/\w+/).should == 4
|
120
|
+
s.matched.should == "test"
|
121
|
+
end
|
122
|
+
|
123
|
+
it "should pass the #matched? example" do
|
124
|
+
s = UnicodeScanner.new('test string')
|
125
|
+
s.match?(/\w+/).should == 4
|
126
|
+
s.matched?.should == true
|
127
|
+
s.match?(/\d+/).should == nil
|
128
|
+
s.matched?.should == false
|
129
|
+
end
|
130
|
+
|
131
|
+
it "should pass the #matched_size example" do
|
132
|
+
s = UnicodeScanner.new('test string')
|
133
|
+
s.check(/\w+/).should == "test"
|
134
|
+
s.matched_size.should == 4
|
135
|
+
s.check(/\d+/).should == nil
|
136
|
+
s.matched_size.should == nil
|
137
|
+
end
|
138
|
+
|
139
|
+
it "should pass the #peek example" do
|
140
|
+
s = UnicodeScanner.new('test string')
|
141
|
+
s.peek(7).should == "test st"
|
142
|
+
s.peek(7).should == "test st"
|
143
|
+
end
|
144
|
+
|
145
|
+
it "should pass the #pos example" do
|
146
|
+
s = UnicodeScanner.new('test string')
|
147
|
+
s.pos.should == 0
|
148
|
+
s.scan_until(/str/).should == "test str"
|
149
|
+
s.pos.should == 8
|
150
|
+
s.terminate.inspect.should == "#<UnicodeScanner fin>"
|
151
|
+
s.pos.should == 11
|
152
|
+
end
|
153
|
+
|
154
|
+
it "should pass the #pos= example" do
|
155
|
+
s = UnicodeScanner.new('test string')
|
156
|
+
(s.pos = 7).should == 7
|
157
|
+
s.rest.should == "ring"
|
158
|
+
end
|
159
|
+
|
160
|
+
it "should pass the #post_match/#pre_match example" do
|
161
|
+
s = UnicodeScanner.new('test string')
|
162
|
+
s.scan(/\w+/).should == "test"
|
163
|
+
s.scan(/\s+/).should == " "
|
164
|
+
s.pre_match.should == "test"
|
165
|
+
s.post_match.should == "string"
|
166
|
+
end
|
167
|
+
|
168
|
+
it "should pass the #scan example" do
|
169
|
+
s = UnicodeScanner.new('test string')
|
170
|
+
s.scan(/\w+/).should == "test"
|
171
|
+
s.scan(/\w+/).should == nil
|
172
|
+
s.scan(/\s+/).should == " "
|
173
|
+
s.scan(/\w+/).should == "string"
|
174
|
+
s.scan(/./).should == nil
|
175
|
+
end
|
176
|
+
|
177
|
+
it "should pass the #scan_until example" do
|
178
|
+
s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
179
|
+
s.scan_until(/1/).should == "Fri Dec 1"
|
180
|
+
s.pre_match.should == "Fri Dec "
|
181
|
+
s.scan_until(/XYZ/).should == nil
|
182
|
+
end
|
183
|
+
|
184
|
+
it "should pass the #skip example" do
|
185
|
+
s = UnicodeScanner.new('test string')
|
186
|
+
s.skip(/\w+/).should == 4
|
187
|
+
s.skip(/\w+/).should == nil
|
188
|
+
s.skip(/\s+/).should == 1
|
189
|
+
s.skip(/\w+/).should == 6
|
190
|
+
s.skip(/./).should == nil
|
191
|
+
end
|
192
|
+
|
193
|
+
it "should pass the half-finished #skip_until example" do
|
194
|
+
s = UnicodeScanner.new("Fri Dec 12 1975 14:39")
|
195
|
+
s.skip_until(/12/).should == 10
|
196
|
+
end
|
197
|
+
|
198
|
+
it "should pass the #unscan example" do
|
199
|
+
s = UnicodeScanner.new('test string')
|
200
|
+
s.scan(/\w+/).should == "test"
|
201
|
+
s.unscan
|
202
|
+
s.scan(/../).should == "te"
|
203
|
+
s.scan(/\d/).should == nil
|
204
|
+
-> { s.unscan }.should raise_error(ScanError, 'unscan failed: previous match record not exist')
|
205
|
+
end
|
206
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "unicode_scanner"
|
8
|
+
s.version = "1.0.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Tim Morgan"]
|
12
|
+
s.date = "2012-07-12"
|
13
|
+
s.description = "An implementation of StringScanner that doesn't split multibyte characters."
|
14
|
+
s.email = "git@timothymorgan.info"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.md"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".rspec",
|
22
|
+
".rvmrc",
|
23
|
+
"Gemfile",
|
24
|
+
"Gemfile.lock",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.md",
|
27
|
+
"Rakefile",
|
28
|
+
"VERSION",
|
29
|
+
"lib/unicode_scanner.rb",
|
30
|
+
"spec/spec_helper.rb",
|
31
|
+
"spec/unicode_scanner_spec.rb",
|
32
|
+
"unicode_scanner.gemspec"
|
33
|
+
]
|
34
|
+
s.homepage = "http://github.com/RISCfuture/unicode_scanner"
|
35
|
+
s.licenses = ["MIT"]
|
36
|
+
s.require_paths = ["lib"]
|
37
|
+
s.rubygems_version = "1.8.24"
|
38
|
+
s.summary = "Unicode-aware implementation of StringScanner"
|
39
|
+
|
40
|
+
if s.respond_to? :specification_version then
|
41
|
+
s.specification_version = 3
|
42
|
+
|
43
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
44
|
+
s.add_development_dependency(%q<rspec>, [">= 0"])
|
45
|
+
s.add_development_dependency(%q<redcarpet>, [">= 0"])
|
46
|
+
s.add_development_dependency(%q<yard>, [">= 0"])
|
47
|
+
s.add_development_dependency(%q<bundler>, [">= 0"])
|
48
|
+
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
49
|
+
else
|
50
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
51
|
+
s.add_dependency(%q<redcarpet>, [">= 0"])
|
52
|
+
s.add_dependency(%q<yard>, [">= 0"])
|
53
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
54
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
55
|
+
end
|
56
|
+
else
|
57
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
58
|
+
s.add_dependency(%q<redcarpet>, [">= 0"])
|
59
|
+
s.add_dependency(%q<yard>, [">= 0"])
|
60
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
61
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
metadata
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: unicode_scanner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tim Morgan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-12 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: redcarpet
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: yard
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: bundler
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: jeweler
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
description: An implementation of StringScanner that doesn't split multibyte characters.
|
95
|
+
email: git@timothymorgan.info
|
96
|
+
executables: []
|
97
|
+
extensions: []
|
98
|
+
extra_rdoc_files:
|
99
|
+
- LICENSE.txt
|
100
|
+
- README.md
|
101
|
+
files:
|
102
|
+
- .document
|
103
|
+
- .rspec
|
104
|
+
- .rvmrc
|
105
|
+
- Gemfile
|
106
|
+
- Gemfile.lock
|
107
|
+
- LICENSE.txt
|
108
|
+
- README.md
|
109
|
+
- Rakefile
|
110
|
+
- VERSION
|
111
|
+
- lib/unicode_scanner.rb
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
- spec/unicode_scanner_spec.rb
|
114
|
+
- unicode_scanner.gemspec
|
115
|
+
homepage: http://github.com/RISCfuture/unicode_scanner
|
116
|
+
licenses:
|
117
|
+
- MIT
|
118
|
+
post_install_message:
|
119
|
+
rdoc_options: []
|
120
|
+
require_paths:
|
121
|
+
- lib
|
122
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
124
|
+
requirements:
|
125
|
+
- - ! '>='
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: '0'
|
128
|
+
segments:
|
129
|
+
- 0
|
130
|
+
hash: -3935821298050612576
|
131
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
132
|
+
none: false
|
133
|
+
requirements:
|
134
|
+
- - ! '>='
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: '0'
|
137
|
+
requirements: []
|
138
|
+
rubyforge_project:
|
139
|
+
rubygems_version: 1.8.24
|
140
|
+
signing_key:
|
141
|
+
specification_version: 3
|
142
|
+
summary: Unicode-aware implementation of StringScanner
|
143
|
+
test_files: []
|