email_address_validator 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/LICENSE +1 -0
- data/README.md +87 -0
- data/Rakefile +15 -0
- data/email_address_validator.gemspec +29 -0
- data/grammars/domain.kpeg +28 -0
- data/grammars/rfc2822.kpeg +152 -0
- data/grammars/rfc822.kpeg +76 -0
- data/lib/email_address_validator.rb +103 -0
- data/lib/email_address_validator/domain-parser.rb +503 -0
- data/lib/email_address_validator/rfc2822-parser.rb +2543 -0
- data/lib/email_address_validator/rfc822-parser.rb +1326 -0
- data/lib/email_address_validator/version.rb +3 -0
- data/spec/email_address_validator_spec.rb +120 -0
- data/spec/spec_helper.rb +2 -0
- data/version.txt +1 -0
- metadata +95 -0
@@ -0,0 +1,503 @@
|
|
1
|
+
class EmailAddressValidator::DomainParser
|
2
|
+
# STANDALONE START
|
3
|
+
def setup_parser(str, debug=false)
|
4
|
+
@string = str
|
5
|
+
@pos = 0
|
6
|
+
@memoizations = Hash.new { |h,k| h[k] = {} }
|
7
|
+
@result = nil
|
8
|
+
@failed_rule = nil
|
9
|
+
@failing_rule_offset = -1
|
10
|
+
|
11
|
+
setup_foreign_grammar
|
12
|
+
end
|
13
|
+
|
14
|
+
def setup_foreign_grammar
|
15
|
+
end
|
16
|
+
|
17
|
+
# This is distinct from setup_parser so that a standalone parser
|
18
|
+
# can redefine #initialize and still have access to the proper
|
19
|
+
# parser setup code.
|
20
|
+
#
|
21
|
+
def initialize(str, debug=false)
|
22
|
+
setup_parser(str, debug)
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_reader :string
|
26
|
+
attr_reader :result, :failing_rule_offset
|
27
|
+
attr_accessor :pos
|
28
|
+
|
29
|
+
# STANDALONE START
|
30
|
+
def current_column(target=pos)
|
31
|
+
if c = string.rindex("\n", target-1)
|
32
|
+
return target - c - 1
|
33
|
+
end
|
34
|
+
|
35
|
+
target + 1
|
36
|
+
end
|
37
|
+
|
38
|
+
def current_line(target=pos)
|
39
|
+
cur_offset = 0
|
40
|
+
cur_line = 0
|
41
|
+
|
42
|
+
string.each_line do |line|
|
43
|
+
cur_line += 1
|
44
|
+
cur_offset += line.size
|
45
|
+
return cur_line if cur_offset >= target
|
46
|
+
end
|
47
|
+
|
48
|
+
-1
|
49
|
+
end
|
50
|
+
|
51
|
+
def lines
|
52
|
+
lines = []
|
53
|
+
string.each_line { |l| lines << l }
|
54
|
+
lines
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
|
59
|
+
def get_text(start)
|
60
|
+
@string[start..@pos-1]
|
61
|
+
end
|
62
|
+
|
63
|
+
def show_pos
|
64
|
+
width = 10
|
65
|
+
if @pos < width
|
66
|
+
"#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")"
|
67
|
+
else
|
68
|
+
"#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def failure_info
|
73
|
+
l = current_line @failing_rule_offset
|
74
|
+
c = current_column @failing_rule_offset
|
75
|
+
|
76
|
+
if @failed_rule.kind_of? Symbol
|
77
|
+
info = self.class::Rules[@failed_rule]
|
78
|
+
"line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'"
|
79
|
+
else
|
80
|
+
"line #{l}, column #{c}: failed rule '#{@failed_rule}'"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def failure_caret
|
85
|
+
l = current_line @failing_rule_offset
|
86
|
+
c = current_column @failing_rule_offset
|
87
|
+
|
88
|
+
line = lines[l-1]
|
89
|
+
"#{line}\n#{' ' * (c - 1)}^"
|
90
|
+
end
|
91
|
+
|
92
|
+
def failure_character
|
93
|
+
l = current_line @failing_rule_offset
|
94
|
+
c = current_column @failing_rule_offset
|
95
|
+
lines[l-1][c-1, 1]
|
96
|
+
end
|
97
|
+
|
98
|
+
def failure_oneline
|
99
|
+
l = current_line @failing_rule_offset
|
100
|
+
c = current_column @failing_rule_offset
|
101
|
+
|
102
|
+
char = lines[l-1][c-1, 1]
|
103
|
+
|
104
|
+
if @failed_rule.kind_of? Symbol
|
105
|
+
info = self.class::Rules[@failed_rule]
|
106
|
+
"@#{l}:#{c} failed rule '#{info.name}', got '#{char}'"
|
107
|
+
else
|
108
|
+
"@#{l}:#{c} failed rule '#{@failed_rule}', got '#{char}'"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
class ParseError < RuntimeError
|
113
|
+
end
|
114
|
+
|
115
|
+
def raise_error
|
116
|
+
raise ParseError, failure_oneline
|
117
|
+
end
|
118
|
+
|
119
|
+
def show_error(io=STDOUT)
|
120
|
+
error_pos = @failing_rule_offset
|
121
|
+
line_no = current_line(error_pos)
|
122
|
+
col_no = current_column(error_pos)
|
123
|
+
|
124
|
+
io.puts "On line #{line_no}, column #{col_no}:"
|
125
|
+
|
126
|
+
if @failed_rule.kind_of? Symbol
|
127
|
+
info = self.class::Rules[@failed_rule]
|
128
|
+
io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')"
|
129
|
+
else
|
130
|
+
io.puts "Failed to match rule '#{@failed_rule}'"
|
131
|
+
end
|
132
|
+
|
133
|
+
io.puts "Got: #{string[error_pos,1].inspect}"
|
134
|
+
line = lines[line_no-1]
|
135
|
+
io.puts "=> #{line}"
|
136
|
+
io.print(" " * (col_no + 3))
|
137
|
+
io.puts "^"
|
138
|
+
end
|
139
|
+
|
140
|
+
def set_failed_rule(name)
|
141
|
+
if @pos > @failing_rule_offset
|
142
|
+
@failed_rule = name
|
143
|
+
@failing_rule_offset = @pos
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
attr_reader :failed_rule
|
148
|
+
|
149
|
+
def match_string(str)
|
150
|
+
len = str.size
|
151
|
+
if @string[pos,len] == str
|
152
|
+
@pos += len
|
153
|
+
return str
|
154
|
+
end
|
155
|
+
|
156
|
+
return nil
|
157
|
+
end
|
158
|
+
|
159
|
+
def scan(reg)
|
160
|
+
if m = reg.match(@string[@pos..-1])
|
161
|
+
width = m.end(0)
|
162
|
+
@pos += width
|
163
|
+
return true
|
164
|
+
end
|
165
|
+
|
166
|
+
return nil
|
167
|
+
end
|
168
|
+
|
169
|
+
if "".respond_to? :getbyte
|
170
|
+
def get_byte
|
171
|
+
if @pos >= @string.size
|
172
|
+
return nil
|
173
|
+
end
|
174
|
+
|
175
|
+
s = @string.getbyte @pos
|
176
|
+
@pos += 1
|
177
|
+
s
|
178
|
+
end
|
179
|
+
else
|
180
|
+
def get_byte
|
181
|
+
if @pos >= @string.size
|
182
|
+
return nil
|
183
|
+
end
|
184
|
+
|
185
|
+
s = @string[@pos]
|
186
|
+
@pos += 1
|
187
|
+
s
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def parse(rule=nil)
|
192
|
+
if !rule
|
193
|
+
_root ? true : false
|
194
|
+
else
|
195
|
+
# This is not shared with code_generator.rb so this can be standalone
|
196
|
+
method = rule.gsub("-","_hyphen_")
|
197
|
+
__send__("_#{method}") ? true : false
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
class LeftRecursive
|
202
|
+
def initialize(detected=false)
|
203
|
+
@detected = detected
|
204
|
+
end
|
205
|
+
|
206
|
+
attr_accessor :detected
|
207
|
+
end
|
208
|
+
|
209
|
+
class MemoEntry
|
210
|
+
def initialize(ans, pos)
|
211
|
+
@ans = ans
|
212
|
+
@pos = pos
|
213
|
+
@uses = 1
|
214
|
+
@result = nil
|
215
|
+
end
|
216
|
+
|
217
|
+
attr_reader :ans, :pos, :uses, :result
|
218
|
+
|
219
|
+
def inc!
|
220
|
+
@uses += 1
|
221
|
+
end
|
222
|
+
|
223
|
+
def move!(ans, pos, result)
|
224
|
+
@ans = ans
|
225
|
+
@pos = pos
|
226
|
+
@result = result
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
def external_invoke(other, rule, *args)
|
231
|
+
old_pos = @pos
|
232
|
+
old_string = @string
|
233
|
+
|
234
|
+
@pos = other.pos
|
235
|
+
@string = other.string
|
236
|
+
|
237
|
+
begin
|
238
|
+
if val = __send__(rule, *args)
|
239
|
+
other.pos = @pos
|
240
|
+
else
|
241
|
+
other.set_failed_rule "#{self.class}##{rule}"
|
242
|
+
end
|
243
|
+
val
|
244
|
+
ensure
|
245
|
+
@pos = old_pos
|
246
|
+
@string = old_string
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
def apply(rule)
|
251
|
+
if m = @memoizations[rule][@pos]
|
252
|
+
m.inc!
|
253
|
+
|
254
|
+
prev = @pos
|
255
|
+
@pos = m.pos
|
256
|
+
if m.ans.kind_of? LeftRecursive
|
257
|
+
m.ans.detected = true
|
258
|
+
return nil
|
259
|
+
end
|
260
|
+
|
261
|
+
@result = m.result
|
262
|
+
|
263
|
+
return m.ans
|
264
|
+
else
|
265
|
+
lr = LeftRecursive.new(false)
|
266
|
+
m = MemoEntry.new(lr, @pos)
|
267
|
+
@memoizations[rule][@pos] = m
|
268
|
+
start_pos = @pos
|
269
|
+
|
270
|
+
ans = __send__ rule
|
271
|
+
|
272
|
+
m.move! ans, @pos, @result
|
273
|
+
|
274
|
+
# Don't bother trying to grow the left recursion
|
275
|
+
# if it's failing straight away (thus there is no seed)
|
276
|
+
if ans and lr.detected
|
277
|
+
return grow_lr(rule, start_pos, m)
|
278
|
+
else
|
279
|
+
return ans
|
280
|
+
end
|
281
|
+
|
282
|
+
return ans
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def grow_lr(rule, start_pos, m)
|
287
|
+
while true
|
288
|
+
@pos = start_pos
|
289
|
+
@result = m.result
|
290
|
+
|
291
|
+
ans = __send__ rule
|
292
|
+
return nil unless ans
|
293
|
+
|
294
|
+
break if @pos <= m.pos
|
295
|
+
|
296
|
+
m.move! ans, @pos, @result
|
297
|
+
end
|
298
|
+
|
299
|
+
@result = m.result
|
300
|
+
@pos = m.pos
|
301
|
+
return m.ans
|
302
|
+
end
|
303
|
+
|
304
|
+
class RuleInfo
|
305
|
+
def initialize(name, rendered)
|
306
|
+
@name = name
|
307
|
+
@rendered = rendered
|
308
|
+
end
|
309
|
+
|
310
|
+
attr_reader :name, :rendered
|
311
|
+
end
|
312
|
+
|
313
|
+
def self.rule_info(name, rendered)
|
314
|
+
RuleInfo.new(name, rendered)
|
315
|
+
end
|
316
|
+
|
317
|
+
#
|
318
|
+
def setup_foreign_grammar; end
|
319
|
+
|
320
|
+
# domain = < subdomain > &{ text.size < 255 }
|
321
|
+
def _domain
|
322
|
+
|
323
|
+
_save = self.pos
|
324
|
+
while true # sequence
|
325
|
+
_text_start = self.pos
|
326
|
+
_tmp = apply(:_subdomain)
|
327
|
+
if _tmp
|
328
|
+
text = get_text(_text_start)
|
329
|
+
end
|
330
|
+
unless _tmp
|
331
|
+
self.pos = _save
|
332
|
+
break
|
333
|
+
end
|
334
|
+
_save1 = self.pos
|
335
|
+
_tmp = begin; text.size < 255 ; end
|
336
|
+
self.pos = _save1
|
337
|
+
unless _tmp
|
338
|
+
self.pos = _save
|
339
|
+
end
|
340
|
+
break
|
341
|
+
end # end sequence
|
342
|
+
|
343
|
+
set_failed_rule :_domain unless _tmp
|
344
|
+
return _tmp
|
345
|
+
end
|
346
|
+
|
347
|
+
# subdomain = (subdomain "." label | label)
|
348
|
+
def _subdomain
|
349
|
+
|
350
|
+
_save = self.pos
|
351
|
+
while true # choice
|
352
|
+
|
353
|
+
_save1 = self.pos
|
354
|
+
while true # sequence
|
355
|
+
_tmp = apply(:_subdomain)
|
356
|
+
unless _tmp
|
357
|
+
self.pos = _save1
|
358
|
+
break
|
359
|
+
end
|
360
|
+
_tmp = match_string(".")
|
361
|
+
unless _tmp
|
362
|
+
self.pos = _save1
|
363
|
+
break
|
364
|
+
end
|
365
|
+
_tmp = apply(:_label)
|
366
|
+
unless _tmp
|
367
|
+
self.pos = _save1
|
368
|
+
end
|
369
|
+
break
|
370
|
+
end # end sequence
|
371
|
+
|
372
|
+
break if _tmp
|
373
|
+
self.pos = _save
|
374
|
+
_tmp = apply(:_label)
|
375
|
+
break if _tmp
|
376
|
+
self.pos = _save
|
377
|
+
break
|
378
|
+
end # end choice
|
379
|
+
|
380
|
+
set_failed_rule :_subdomain unless _tmp
|
381
|
+
return _tmp
|
382
|
+
end
|
383
|
+
|
384
|
+
# label = let-dig < let-dig-hyp* > &{ text.size < 63 && (text.size == 0 || text[-1] != ?-) }
|
385
|
+
def _label
|
386
|
+
|
387
|
+
_save = self.pos
|
388
|
+
while true # sequence
|
389
|
+
_tmp = apply(:_let_hyphen_dig)
|
390
|
+
unless _tmp
|
391
|
+
self.pos = _save
|
392
|
+
break
|
393
|
+
end
|
394
|
+
_text_start = self.pos
|
395
|
+
while true
|
396
|
+
_tmp = apply(:_let_hyphen_dig_hyphen_hyp)
|
397
|
+
break unless _tmp
|
398
|
+
end
|
399
|
+
_tmp = true
|
400
|
+
if _tmp
|
401
|
+
text = get_text(_text_start)
|
402
|
+
end
|
403
|
+
unless _tmp
|
404
|
+
self.pos = _save
|
405
|
+
break
|
406
|
+
end
|
407
|
+
_save2 = self.pos
|
408
|
+
_tmp = begin; text.size < 63 && (text.size == 0 || text[-1] != ?-) ; end
|
409
|
+
self.pos = _save2
|
410
|
+
unless _tmp
|
411
|
+
self.pos = _save
|
412
|
+
end
|
413
|
+
break
|
414
|
+
end # end sequence
|
415
|
+
|
416
|
+
set_failed_rule :_label unless _tmp
|
417
|
+
return _tmp
|
418
|
+
end
|
419
|
+
|
420
|
+
# let-dig-hyp = (let-dig | "-")
|
421
|
+
def _let_hyphen_dig_hyphen_hyp
|
422
|
+
|
423
|
+
_save = self.pos
|
424
|
+
while true # choice
|
425
|
+
_tmp = apply(:_let_hyphen_dig)
|
426
|
+
break if _tmp
|
427
|
+
self.pos = _save
|
428
|
+
_tmp = match_string("-")
|
429
|
+
break if _tmp
|
430
|
+
self.pos = _save
|
431
|
+
break
|
432
|
+
end # end choice
|
433
|
+
|
434
|
+
set_failed_rule :_let_hyphen_dig_hyphen_hyp unless _tmp
|
435
|
+
return _tmp
|
436
|
+
end
|
437
|
+
|
438
|
+
# let-dig = (letter | digit)
|
439
|
+
def _let_hyphen_dig
|
440
|
+
|
441
|
+
_save = self.pos
|
442
|
+
while true # choice
|
443
|
+
_tmp = apply(:_letter)
|
444
|
+
break if _tmp
|
445
|
+
self.pos = _save
|
446
|
+
_tmp = apply(:_digit)
|
447
|
+
break if _tmp
|
448
|
+
self.pos = _save
|
449
|
+
break
|
450
|
+
end # end choice
|
451
|
+
|
452
|
+
set_failed_rule :_let_hyphen_dig unless _tmp
|
453
|
+
return _tmp
|
454
|
+
end
|
455
|
+
|
456
|
+
# letter = /[A-Za-z]/
|
457
|
+
def _letter
|
458
|
+
_tmp = scan(/\A(?-mix:[A-Za-z])/)
|
459
|
+
set_failed_rule :_letter unless _tmp
|
460
|
+
return _tmp
|
461
|
+
end
|
462
|
+
|
463
|
+
# digit = /[0-9]/
|
464
|
+
def _digit
|
465
|
+
_tmp = scan(/\A(?-mix:[0-9])/)
|
466
|
+
set_failed_rule :_digit unless _tmp
|
467
|
+
return _tmp
|
468
|
+
end
|
469
|
+
|
470
|
+
# root = domain !.
|
471
|
+
def _root
|
472
|
+
|
473
|
+
_save = self.pos
|
474
|
+
while true # sequence
|
475
|
+
_tmp = apply(:_domain)
|
476
|
+
unless _tmp
|
477
|
+
self.pos = _save
|
478
|
+
break
|
479
|
+
end
|
480
|
+
_save1 = self.pos
|
481
|
+
_tmp = get_byte
|
482
|
+
_tmp = _tmp ? nil : true
|
483
|
+
self.pos = _save1
|
484
|
+
unless _tmp
|
485
|
+
self.pos = _save
|
486
|
+
end
|
487
|
+
break
|
488
|
+
end # end sequence
|
489
|
+
|
490
|
+
set_failed_rule :_root unless _tmp
|
491
|
+
return _tmp
|
492
|
+
end
|
493
|
+
|
494
|
+
Rules = {}
|
495
|
+
Rules[:_domain] = rule_info("domain", "< subdomain > &{ text.size < 255 }")
|
496
|
+
Rules[:_subdomain] = rule_info("subdomain", "(subdomain \".\" label | label)")
|
497
|
+
Rules[:_label] = rule_info("label", "let-dig < let-dig-hyp* > &{ text.size < 63 && (text.size == 0 || text[-1] != ?-) }")
|
498
|
+
Rules[:_let_hyphen_dig_hyphen_hyp] = rule_info("let-dig-hyp", "(let-dig | \"-\")")
|
499
|
+
Rules[:_let_hyphen_dig] = rule_info("let-dig", "(letter | digit)")
|
500
|
+
Rules[:_letter] = rule_info("letter", "/[A-Za-z]/")
|
501
|
+
Rules[:_digit] = rule_info("digit", "/[0-9]/")
|
502
|
+
Rules[:_root] = rule_info("root", "domain !.")
|
503
|
+
end
|