rley 0.6.00 → 0.6.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -1
- data/CHANGELOG.md +3 -0
- data/Gemfile +1 -1
- data/examples/NLP/benchmark_pico_en.rb +6 -10
- data/examples/NLP/nano_eng/nano_en_demo.rb +2 -2
- data/examples/NLP/nano_eng/nano_grammar.rb +1 -2
- data/examples/data_formats/JSON/json_ast_builder.rb +8 -8
- data/examples/general/SRL/lib/ast_builder.rb +74 -72
- data/examples/general/SRL/lib/grammar.rb +2 -2
- data/examples/general/SRL/lib/regex/abstract_method.rb +28 -28
- data/examples/general/SRL/lib/regex/alternation.rb +21 -25
- data/examples/general/SRL/lib/regex/anchor.rb +6 -9
- data/examples/general/SRL/lib/regex/atomic_expression.rb +10 -15
- data/examples/general/SRL/lib/regex/capturing_group.rb +15 -14
- data/examples/general/SRL/lib/regex/char_class.rb +10 -13
- data/examples/general/SRL/lib/regex/char_range.rb +45 -46
- data/examples/general/SRL/lib/regex/char_shorthand.rb +8 -9
- data/examples/general/SRL/lib/regex/character.rb +196 -191
- data/examples/general/SRL/lib/regex/compound_expression.rb +47 -50
- data/examples/general/SRL/lib/regex/concatenation.rb +23 -27
- data/examples/general/SRL/lib/regex/expression.rb +53 -56
- data/examples/general/SRL/lib/regex/lookaround.rb +23 -20
- data/examples/general/SRL/lib/regex/match_option.rb +26 -28
- data/examples/general/SRL/lib/regex/monadic_expression.rb +20 -23
- data/examples/general/SRL/lib/regex/multiplicity.rb +17 -20
- data/examples/general/SRL/lib/regex/non_capturing_group.rb +9 -12
- data/examples/general/SRL/lib/regex/polyadic_expression.rb +51 -55
- data/examples/general/SRL/lib/regex/quantifiable.rb +14 -20
- data/examples/general/SRL/lib/regex/repetition.rb +20 -23
- data/examples/general/SRL/lib/regex/wildcard.rb +15 -19
- data/examples/general/SRL/lib/regex_repr.rb +1 -1
- data/examples/general/SRL/lib/tokenizer.rb +2 -2
- data/examples/general/SRL/spec/integration_spec.rb +17 -12
- data/examples/general/SRL/spec/regex/character_spec.rb +160 -153
- data/examples/general/SRL/spec/regex/multiplicity_spec.rb +27 -31
- data/examples/general/SRL/spec/spec_helper.rb +1 -1
- data/examples/general/SRL/spec/tokenizer_spec.rb +25 -27
- data/examples/general/calc_iter1/calc_ast_builder.rb +10 -10
- data/examples/general/calc_iter2/calc_ast_builder.rb +7 -9
- data/examples/general/calc_iter2/calc_ast_nodes.rb +5 -6
- data/examples/general/calc_iter2/calc_lexer.rb +3 -5
- data/examples/general/calc_iter2/spec/calculator_spec.rb +16 -14
- data/examples/general/left.rb +8 -8
- data/examples/general/right.rb +8 -8
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +16 -20
- data/lib/rley/formatter/json.rb +1 -1
- data/lib/rley/gfg/grm_flow_graph.rb +1 -1
- data/lib/rley/gfg/item_vertex.rb +6 -5
- data/lib/rley/gfg/vertex.rb +3 -3
- data/lib/rley/lexical/token.rb +4 -3
- data/lib/rley/parse_rep/ast_base_builder.rb +4 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +1 -1
- data/lib/rley/parse_rep/parse_tree_builder.rb +3 -2
- data/lib/rley/parser/error_reason.rb +1 -1
- data/lib/rley/parser/gfg_chart.rb +6 -6
- data/lib/rley/parser/gfg_parsing.rb +19 -19
- data/lib/rley/parser/parse_entry.rb +3 -3
- data/lib/rley/parser/parse_entry_set.rb +1 -1
- data/lib/rley/parser/parse_walker_factory.rb +15 -15
- data/lib/rley/syntax/grammar.rb +1 -1
- data/lib/rley/syntax/grammar_builder.rb +2 -2
- data/lib/rley/syntax/production.rb +4 -3
- data/lib/rley/syntax/symbol_seq.rb +2 -2
- data/spec/rley/base/grm_items_builder_spec.rb +1 -1
- data/spec/rley/engine_spec.rb +3 -6
- data/spec/rley/formatter/asciitree_spec.rb +0 -1
- data/spec/rley/formatter/bracket_notation_spec.rb +0 -1
- data/spec/rley/formatter/debug_spec.rb +2 -3
- data/spec/rley/gfg/grm_flow_graph_spec.rb +19 -19
- data/spec/rley/parse_rep/ast_builder_spec.rb +12 -12
- data/spec/rley/parser/gfg_earley_parser_spec.rb +1 -1
- data/spec/rley/parser/parse_entry_set_spec.rb +5 -5
- data/spec/rley/parser/parse_state_spec.rb +8 -3
- data/spec/rley/parser/parse_tracer_spec.rb +3 -1
- data/spec/rley/parser/parse_walker_factory_spec.rb +1 -1
- data/spec/rley/ptree/parse_tree_node_spec.rb +1 -1
- data/spec/rley/syntax/grammar_builder_spec.rb +1 -1
- data/spec/rley/syntax/grammar_spec.rb +1 -1
- metadata +2 -3
- data/spec/rley/support/ast_builder.rb +0 -403
@@ -1,30 +1,27 @@
|
|
1
1
|
# File: non_capturing_group.rb
|
2
2
|
|
3
|
-
require_relative
|
3
|
+
require_relative 'monadic_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
-
|
7
|
-
#
|
6
|
+
# A non-capturing group, in other word it is a pure grouping
|
7
|
+
# of sub-expressions
|
8
8
|
class NonCapturingGroup < MonadicExpression
|
9
|
-
|
10
9
|
# Constructor.
|
11
|
-
# [aChildExpression]
|
10
|
+
# [aChildExpression] A sub-expression to match. When successful
|
12
11
|
# the matching text is assigned to the capture variable.
|
13
|
-
def initialize(aChildExpression)
|
12
|
+
def initialize(aChildExpression)
|
14
13
|
super(aChildExpression)
|
15
14
|
end
|
16
|
-
|
15
|
+
|
17
16
|
protected
|
18
|
-
|
17
|
+
|
19
18
|
# Conversion method re-definition.
|
20
19
|
# Purpose: Return the String representation of the captured expression.
|
21
20
|
def text_repr()
|
22
|
-
result = '(?:' + all_child_text
|
21
|
+
result = '(?:' + all_child_text + ')'
|
23
22
|
return result
|
24
23
|
end
|
25
|
-
|
26
24
|
end # class
|
27
|
-
|
28
25
|
end # module
|
29
26
|
|
30
|
-
# End of file
|
27
|
+
# End of file
|
@@ -1,64 +1,60 @@
|
|
1
1
|
# File: polyadic_expression.rb
|
2
2
|
|
3
|
-
require_relative
|
3
|
+
require_relative 'compound_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
+
# Abstract class. An element that is part of a regular expression &
|
7
|
+
# that has its own child sub-expressions.
|
8
|
+
class PolyadicExpression < CompoundExpression
|
9
|
+
# The aggregation of child elements
|
10
|
+
attr_reader(:children)
|
6
11
|
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
# Constructor.
|
14
|
-
def initialize(theChildren)
|
15
|
-
super()
|
16
|
-
@children = theChildren
|
17
|
-
end
|
18
|
-
|
19
|
-
public
|
20
|
-
# Append the given child to the list of children.
|
21
|
-
# TODO: assess whether to defer to a subclass NAryExpression
|
22
|
-
def <<(aChild)
|
23
|
-
@children << aChild
|
24
|
-
|
25
|
-
return self
|
26
|
-
end
|
27
|
-
|
28
|
-
# Build a depth-first in-order children visitor.
|
29
|
-
# The visitor is implemented as an Enumerator.
|
30
|
-
def df_visitor()
|
31
|
-
root = children # The visit will start from the children of this object
|
32
|
-
|
33
|
-
visitor = Enumerator.new do |result| # result is a Yielder
|
34
|
-
# Initialization part: will run once
|
35
|
-
visit_stack = [ root ] # The LIFO queue of nodes to visit
|
36
|
-
|
37
|
-
begin # Traversal part (as a loop)
|
38
|
-
top = visit_stack.pop()
|
39
|
-
if top.kind_of?(Array)
|
40
|
-
if top.empty?
|
41
|
-
next
|
42
|
-
else
|
43
|
-
currChild = top.pop()
|
44
|
-
visit_stack.push top
|
45
|
-
end
|
46
|
-
else
|
47
|
-
currChild = top
|
48
|
-
end
|
49
|
-
|
50
|
-
result << currChild # Return the visited child
|
51
|
-
|
52
|
-
unless currChild.atomic?
|
53
|
-
children_to_enqueue = currChild.children.reverse() # in-order traversal implies LIFO queue
|
54
|
-
visit_stack.push(children_to_enqueue)
|
55
|
-
end
|
56
|
-
end until visit_stack.empty?
|
57
|
-
end
|
58
|
-
end
|
12
|
+
# Constructor.
|
13
|
+
def initialize(theChildren)
|
14
|
+
super()
|
15
|
+
@children = theChildren
|
16
|
+
end
|
59
17
|
|
60
|
-
|
18
|
+
# Append the given child to the list of children.
|
19
|
+
# TODO: assess whether to defer to a subclass NAryExpression
|
20
|
+
def <<(aChild)
|
21
|
+
@children << aChild
|
61
22
|
|
23
|
+
return self
|
24
|
+
end
|
25
|
+
|
26
|
+
# Build a depth-first in-order children visitor.
|
27
|
+
# The visitor is implemented as an Enumerator.
|
28
|
+
def df_visitor()
|
29
|
+
root = children # The visit will start from the children of this object
|
30
|
+
|
31
|
+
visitor = Enumerator.new do |result| # result is a Yielder
|
32
|
+
# Initialization part: will run once
|
33
|
+
visit_stack = [root] # The LIFO queue of nodes to visit
|
34
|
+
|
35
|
+
begin # Traversal part (as a loop)
|
36
|
+
top = visit_stack.pop
|
37
|
+
if top.kind_of?(Array)
|
38
|
+
next if top.empty?
|
39
|
+
currChild = top.pop
|
40
|
+
visit_stack.push top
|
41
|
+
else
|
42
|
+
currChild = top
|
43
|
+
end
|
44
|
+
|
45
|
+
result << currChild # Return the visited child
|
46
|
+
|
47
|
+
unless currChild.atomic?
|
48
|
+
# in-order traversal implies LIFO queue
|
49
|
+
children_to_enqueue = currChild.children.reverse
|
50
|
+
visit_stack.push(children_to_enqueue)
|
51
|
+
end
|
52
|
+
end until visit_stack.empty?
|
53
|
+
end
|
54
|
+
|
55
|
+
return visitor
|
56
|
+
end
|
57
|
+
end # class
|
62
58
|
end # module
|
63
59
|
|
64
|
-
# End of file
|
60
|
+
# End of file
|
@@ -1,28 +1,22 @@
|
|
1
1
|
# File: quantifiable.rb
|
2
2
|
|
3
|
-
require_relative 'multiplicity'
|
3
|
+
require_relative 'multiplicity'
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
+
module Quantifiable
|
7
|
+
# Redefined method. Return true since it may not have any child.
|
8
|
+
def quantified?
|
9
|
+
return @quantifier.nil? ? false : true
|
10
|
+
end
|
6
11
|
|
12
|
+
def quantifier
|
13
|
+
@quantifier
|
14
|
+
end
|
7
15
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def quantified?
|
13
|
-
return @quantifier.nil? ? false :true
|
14
|
-
end
|
15
|
-
|
16
|
-
def quantifier
|
17
|
-
@quantifier
|
18
|
-
end
|
19
|
-
|
20
|
-
def quantifier=(aQuantifier)
|
21
|
-
@quantifier = aQuantifier
|
22
|
-
end
|
23
|
-
|
24
|
-
end # class
|
25
|
-
|
16
|
+
def quantifier=(aQuantifier)
|
17
|
+
@quantifier = aQuantifier
|
18
|
+
end
|
19
|
+
end # module
|
26
20
|
end # module
|
27
21
|
|
28
|
-
# End of file
|
22
|
+
# End of file
|
@@ -1,32 +1,29 @@
|
|
1
1
|
# File: repetition.rb
|
2
2
|
|
3
|
-
require_relative
|
3
|
+
require_relative 'monadic_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
+
# Abstract class. An unary matching operator.
|
7
|
+
# It succeeds when the specified repetition of the child expression
|
8
|
+
# succeeds to match the subject text in the same serial arrangement
|
9
|
+
class Repetition < MonadicExpression
|
10
|
+
attr_reader(:multiplicity)
|
6
11
|
|
7
|
-
#
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
# Constructor.
|
14
|
-
def initialize(childExpressionToRepeat, aMultiplicity)
|
15
|
-
super(childExpressionToRepeat)
|
16
|
-
@multiplicity = aMultiplicity
|
17
|
-
end
|
18
|
-
|
19
|
-
protected
|
12
|
+
# Constructor.
|
13
|
+
def initialize(childExpressionToRepeat, aMultiplicity)
|
14
|
+
super(childExpressionToRepeat)
|
15
|
+
@multiplicity = aMultiplicity
|
16
|
+
end
|
20
17
|
|
21
|
-
|
22
|
-
# Purpose: Return the String representation of the concatented expressions.
|
23
|
-
def text_repr()
|
24
|
-
result = all_child_text() + multiplicity.to_str()
|
25
|
-
return result
|
26
|
-
end
|
27
|
-
|
28
|
-
end # class
|
18
|
+
protected
|
29
19
|
|
20
|
+
# Conversion method re-definition.
|
21
|
+
# Purpose: Return the String representation of the concatented expressions.
|
22
|
+
def text_repr()
|
23
|
+
result = all_child_text + multiplicity.to_str
|
24
|
+
return result
|
25
|
+
end
|
26
|
+
end # class
|
30
27
|
end # module
|
31
28
|
|
32
|
-
# End of file
|
29
|
+
# End of file
|
@@ -1,27 +1,23 @@
|
|
1
1
|
# File: wildcard.rb
|
2
2
|
|
3
|
-
require_relative 'atomic_expression'
|
3
|
+
require_relative 'atomic_expression' # Access the superclass
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
|
+
# A wildcard matches any character (except for the newline).
|
7
|
+
class Wildcard < AtomicExpression
|
8
|
+
# Constructor
|
9
|
+
def initialize()
|
10
|
+
super
|
11
|
+
end
|
6
12
|
|
7
|
-
|
8
|
-
class Wildcard < AtomicExpression
|
13
|
+
protected
|
9
14
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
# Conversion method re-definition.
|
18
|
-
# Purpose: Return the String representation of the expression.
|
19
|
-
def text_repr()
|
20
|
-
return '.'
|
21
|
-
end
|
22
|
-
|
23
|
-
end # class
|
24
|
-
|
15
|
+
# Conversion method re-definition.
|
16
|
+
# Purpose: Return the String representation of the expression.
|
17
|
+
def text_repr()
|
18
|
+
return '.'
|
19
|
+
end
|
20
|
+
end # class
|
25
21
|
end # module
|
26
22
|
|
27
|
-
# End of file
|
23
|
+
# End of file
|
@@ -105,7 +105,7 @@ module SRL
|
|
105
105
|
# Delimiters, separators => single character token
|
106
106
|
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
107
107
|
elsif (lexeme = scanner.scan(/[0-9]{2,}/))
|
108
|
-
token = build_token('INTEGER', lexeme) # An integer has
|
108
|
+
token = build_token('INTEGER', lexeme) # An integer has 2..* digits
|
109
109
|
elsif (lexeme = scanner.scan(/[0-9]/))
|
110
110
|
token = build_token('DIGIT_LIT', lexeme)
|
111
111
|
elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
|
@@ -132,7 +132,7 @@ module SRL
|
|
132
132
|
def build_token(aSymbolName, aLexeme)
|
133
133
|
begin
|
134
134
|
token = Rley::Lexical::Token.new(aLexeme, aSymbolName)
|
135
|
-
rescue
|
135
|
+
rescue StandardError
|
136
136
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
137
137
|
raise ex
|
138
138
|
end
|
@@ -12,7 +12,7 @@ describe 'Integration tests:' do
|
|
12
12
|
def regexp_repr(aResult)
|
13
13
|
# Generate an abstract syntax parse tree from the parse result
|
14
14
|
tree = @engine.convert(aResult)
|
15
|
-
|
15
|
+
tree.root
|
16
16
|
end
|
17
17
|
|
18
18
|
before(:each) do
|
@@ -196,7 +196,7 @@ describe 'Integration tests:' do
|
|
196
196
|
end # context
|
197
197
|
|
198
198
|
context 'Parsing concatenation:' do
|
199
|
-
it
|
199
|
+
it 'should reject dangling comma' do
|
200
200
|
source = 'literally "a",'
|
201
201
|
result = parse(source)
|
202
202
|
expect(result).not_to be_success
|
@@ -212,7 +212,7 @@ describe 'Integration tests:' do
|
|
212
212
|
expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
|
213
213
|
end
|
214
214
|
|
215
|
-
it
|
215
|
+
it 'should parse a long sequence of patterns' do
|
216
216
|
source = <<-ENDS
|
217
217
|
any of (any character, one of "._%-+") once or more,
|
218
218
|
literally "@",
|
@@ -225,8 +225,9 @@ ENDS
|
|
225
225
|
expect(result).to be_success
|
226
226
|
|
227
227
|
regexp = regexp_repr(result)
|
228
|
-
# SRL
|
229
|
-
|
228
|
+
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
229
|
+
expectation = '(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}'
|
230
|
+
expect(regexp.to_str).to eq(expectation)
|
230
231
|
end
|
231
232
|
end # context
|
232
233
|
|
@@ -345,7 +346,8 @@ ENDS
|
|
345
346
|
end
|
346
347
|
|
347
348
|
it 'should parse complex anonymous capturing group' do
|
348
|
-
|
349
|
+
source = 'capture(any of (literally "sample", (digit once or more)))'
|
350
|
+
result = parse(source)
|
349
351
|
expect(result).to be_success
|
350
352
|
|
351
353
|
regexp = regexp_repr(result)
|
@@ -361,7 +363,11 @@ ENDS
|
|
361
363
|
end
|
362
364
|
|
363
365
|
it 'should parse complex named capturing group' do
|
364
|
-
|
366
|
+
source = <<-END_SRL
|
367
|
+
capture(any of (literally "sample", (digit once or more)))
|
368
|
+
as "foo"
|
369
|
+
END_SRL
|
370
|
+
result = parse(source)
|
365
371
|
expect(result).to be_success
|
366
372
|
|
367
373
|
regexp = regexp_repr(result)
|
@@ -382,7 +388,8 @@ ENDS
|
|
382
388
|
end
|
383
389
|
|
384
390
|
it 'should parse complex named until capturing group' do
|
385
|
-
|
391
|
+
source = 'capture (anything once or more) as "foo" until literally "m"'
|
392
|
+
result = parse(source)
|
386
393
|
expect(result).to be_success
|
387
394
|
|
388
395
|
regexp = regexp_repr(result)
|
@@ -423,7 +430,7 @@ ENDS
|
|
423
430
|
expect(regexp.to_str).to eq('^match$')
|
424
431
|
end
|
425
432
|
|
426
|
-
it
|
433
|
+
it 'should accept anchor with a sequence of patterns' do
|
427
434
|
source = <<-ENDS
|
428
435
|
begin with any of (digit, letter, one of ".-") once or more,
|
429
436
|
literally ".",
|
@@ -434,10 +441,8 @@ ENDS
|
|
434
441
|
expect(result).to be_success
|
435
442
|
|
436
443
|
regexp = regexp_repr(result)
|
437
|
-
# SRL
|
444
|
+
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
438
445
|
expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
|
439
446
|
end
|
440
447
|
end # context
|
441
448
|
end # describe
|
442
|
-
|
443
|
-
|
@@ -1,159 +1,166 @@
|
|
1
1
|
# File: character_spec.rb
|
2
|
-
require_relative '../spec_helper'
|
2
|
+
require_relative '../spec_helper' # Use the RSpec test framework
|
3
3
|
require_relative '../../lib/regex/character'
|
4
4
|
|
5
|
-
module Regex
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
SampleChars = [?a, ?\0, ?\u0107]
|
10
|
-
|
11
|
-
# This constant holds the codepoints of the character selection
|
12
|
-
SampleInts = [0x61, 0, 0x0107]
|
13
|
-
|
14
|
-
# This constant holds an arbitrary selection of two characters (digrams) escape sequences
|
15
|
-
SampleDigrams = %w[ \n \e \0 \6 \k]
|
16
|
-
|
17
|
-
# This constant holds an arbitrary selection of escaped octal or hexadecimal literals
|
18
|
-
SampleNumEscs = %w[ \0 \07 \x07 \xa \x0F \u03a3 \u{a}]
|
19
|
-
|
20
|
-
before(:all) do
|
21
|
-
# Ensure that the set of codepoints is mapping the set of chars...
|
22
|
-
expect(SampleChars.map(&:ord)).to eq(SampleInts)
|
23
|
-
end
|
24
|
-
|
25
|
-
context 'Creation & initialization' do
|
26
|
-
it 'should be created with a with an integer value (codepoint) or...' do
|
27
|
-
SampleInts.each do |aCodepoint|
|
28
|
-
expect { Character.new(aCodepoint) }.not_to raise_error
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
it '...could be created with a single character String or...' do
|
33
|
-
SampleChars.each do |aChar|
|
34
|
-
expect { Character.new(aChar) }.not_to raise_error
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
it '...could be created with an escape sequence' do
|
39
|
-
# Case 1: escape sequence is a digram
|
40
|
-
SampleDigrams.each do |anEscapeSeq|
|
41
|
-
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
42
|
-
end
|
43
|
-
|
44
|
-
# Case 2: escape sequence is an escaped octal or hexadecimal literal
|
45
|
-
SampleNumEscs.each do |anEscapeSeq|
|
46
|
-
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
end # context
|
51
|
-
|
52
|
-
context 'Provided services' do
|
53
|
-
it 'Should know its lexeme if created from a string' do
|
54
|
-
# Lexeme is defined when the character was initialised from a text
|
55
|
-
SampleChars.each do |aChar|
|
56
|
-
ch = Character.new(aChar)
|
57
|
-
expect(ch.lexeme).to eq(aChar)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
it 'Should not know its lexeme representation if created from a codepoint' do
|
62
|
-
SampleInts.each do |aChar|
|
63
|
-
ch = Character.new(aChar)
|
64
|
-
expect(ch.lexeme).to be_nil
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
it 'should know its String representation' do
|
69
|
-
# Try for one character
|
70
|
-
newOne = Character.new(?\u03a3)
|
71
|
-
expect(newOne.char).to eq('Σ')
|
72
|
-
expect(newOne.to_str).to eq("\u03A3")
|
73
|
-
|
74
|
-
# Try with our chars sample
|
75
|
-
SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
|
76
|
-
|
77
|
-
# Try with our codepoint sample
|
78
|
-
mapped_chars = SampleInts.map { |aCodepoint| Character.new(aCodepoint).char }
|
79
|
-
expect(mapped_chars).to eq(SampleChars)
|
80
|
-
|
81
|
-
# Try with our escape sequence samples
|
82
|
-
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
83
|
-
Character.new(anEscSeq).to_str == String::class_eval(%Q|"#{anEscSeq}"|)
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
it 'should know its codepoint' do
|
88
|
-
# Try for one character
|
89
|
-
newOne = Character.new(?\u03a3)
|
90
|
-
expect(newOne.codepoint).to eq(0x03a3)
|
91
|
-
|
92
|
-
# Try with our chars sample
|
93
|
-
allCodepoints = SampleChars.map { |aChar| Character.new(aChar).codepoint }
|
94
|
-
expect(allCodepoints).to eq(SampleInts)
|
95
|
-
|
96
|
-
# Try with our codepoint sample
|
97
|
-
mapped_chars = SampleInts.each { |aCodepoint| expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint) }
|
98
|
-
|
99
|
-
# Try with our escape sequence samples
|
100
|
-
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
101
|
-
expect(Character.new(anEscSeq).codepoint).to eq(String::class_eval(%Q|"#{anEscSeq}".ord()|))
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
it 'should known whether it is equal to another Object' do
|
106
|
-
newOne = Character.new(?\u03a3)
|
107
|
-
|
108
|
-
# Case 1: test equality with itself
|
109
|
-
expect(newOne).to eq(newOne)
|
110
|
-
|
111
|
-
# Case 2: test equality with another Character
|
112
|
-
expect(newOne).to eq(Character.new(?\u03a3))
|
113
|
-
expect(newOne).not_to eq(Character.new(?\u0333))
|
114
|
-
|
115
|
-
# Case 3: test equality with an integer value (equality based on codepoint value)
|
116
|
-
expect(newOne).to eq(0x03a3)
|
117
|
-
expect(newOne).not_to eq(0x0333)
|
118
|
-
|
119
|
-
# Case 4: test equality with a single-character String
|
120
|
-
expect(newOne).to eq(?\u03a3)
|
121
|
-
expect(newOne).not_to eq(?\u0333)
|
122
|
-
|
123
|
-
# Case 5: test fails with multiple character strings
|
124
|
-
expect(newOne).not_to eq('03a3')
|
125
|
-
|
126
|
-
# Case 6: equality testing with arbitray object
|
127
|
-
expect(newOne).not_to eq(nil)
|
128
|
-
expect(newOne).not_to eq(Object.new)
|
129
|
-
|
130
|
-
# In case 6, equality is based on to_s method.
|
131
|
-
simulator = double('fake')
|
132
|
-
expect(simulator).to receive(:to_s).and_return(?\u03a3)
|
133
|
-
expect(newOne).to eq(simulator)
|
134
|
-
|
135
|
-
# Create a module that re-defines the existing to_s method
|
136
|
-
module Tweak_to_s
|
137
|
-
def to_s() # Overwrite the existing to_s method
|
138
|
-
return ?\u03a3
|
139
|
-
end
|
140
|
-
end # module
|
141
|
-
weird = Object.new
|
142
|
-
weird.extend(Tweak_to_s)
|
143
|
-
expect(newOne).to eq(weird)
|
144
|
-
end
|
145
|
-
|
146
|
-
it "should know its readable description" do
|
147
|
-
ch1 = Character.new('a')
|
148
|
-
expect(ch1.explain).to eq("the character 'a'")
|
149
|
-
|
150
|
-
ch2 = Character.new(?\u03a3)
|
151
|
-
expect(ch2.explain).to eq("the character '\u03a3'")
|
152
|
-
end
|
153
|
-
end # context
|
154
|
-
|
155
|
-
end # describe
|
5
|
+
module Regex # Open this namespace, to get rid of scope qualifiers
|
6
|
+
describe Character do
|
7
|
+
# This constant holds an arbitrary selection of characters
|
8
|
+
SampleChars = [?a, ?\0, ?\u0107].freeze
|
156
9
|
|
10
|
+
# This constant holds the codepoints of the character selection
|
11
|
+
SampleInts = [0x61, 0, 0x0107].freeze
|
12
|
+
|
13
|
+
# This constant holds an arbitrary selection of two characters (digrams)
|
14
|
+
# escape sequences
|
15
|
+
SampleDigrams = %w[\n \e \0 \6 \k].freeze
|
16
|
+
|
17
|
+
# This constant holds an arbitrary selection of escaped octal
|
18
|
+
# or hexadecimal literals
|
19
|
+
SampleNumEscs = %w[\0 \07 \x07 \xa \x0F \u03a3 \u{a}].freeze
|
20
|
+
|
21
|
+
before(:all) do
|
22
|
+
# Ensure that the set of codepoints is mapping the set of chars...
|
23
|
+
expect(SampleChars.map(&:ord)).to eq(SampleInts)
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'Creation & initialization' do
|
27
|
+
it 'should be created with a with an integer value (codepoint) or...' do
|
28
|
+
SampleInts.each do |aCodepoint|
|
29
|
+
expect { Character.new(aCodepoint) }.not_to raise_error
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
it '...could be created with a single character String or...' do
|
34
|
+
SampleChars.each do |aChar|
|
35
|
+
expect { Character.new(aChar) }.not_to raise_error
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
it '...could be created with an escape sequence' do
|
40
|
+
# Case 1: escape sequence is a digram
|
41
|
+
SampleDigrams.each do |anEscapeSeq|
|
42
|
+
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
43
|
+
end
|
44
|
+
|
45
|
+
# Case 2: escape sequence is an escaped octal or hexadecimal literal
|
46
|
+
SampleNumEscs.each do |anEscapeSeq|
|
47
|
+
expect { Character.new(anEscapeSeq) }.not_to raise_error
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end # context
|
51
|
+
|
52
|
+
context 'Provided services' do
|
53
|
+
it 'Should know its lexeme if created from a string' do
|
54
|
+
# Lexeme is defined when the character was initialised from a text
|
55
|
+
SampleChars.each do |aChar|
|
56
|
+
ch = Character.new(aChar)
|
57
|
+
expect(ch.lexeme).to eq(aChar)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'Should not know its lexeme representation from a codepoint' do
|
62
|
+
SampleInts.each do |aChar|
|
63
|
+
ch = Character.new(aChar)
|
64
|
+
expect(ch.lexeme).to be_nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should know its String representation' do
|
69
|
+
# Try for one character
|
70
|
+
newOne = Character.new(?\u03a3)
|
71
|
+
expect(newOne.char).to eq('Σ')
|
72
|
+
expect(newOne.to_str).to eq("\u03A3")
|
73
|
+
|
74
|
+
# Try with our chars sample
|
75
|
+
SampleChars.each { |aChar| Character.new(aChar).to_str == aChar }
|
76
|
+
|
77
|
+
# Try with our codepoint sample
|
78
|
+
mapped_chars = SampleInts.map do |aCodepoint|
|
79
|
+
Character.new(aCodepoint).char
|
80
|
+
end
|
81
|
+
expect(mapped_chars).to eq(SampleChars)
|
82
|
+
|
83
|
+
# Try with our escape sequence samples
|
84
|
+
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
85
|
+
expectation = String.class_eval(%Q|"#{anEscSeq}"|, __FILE__, __LINE__)
|
86
|
+
Character.new(anEscSeq).to_str == expectation
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'should know its codepoint' do
|
91
|
+
# Try for one character
|
92
|
+
newOne = Character.new(?\u03a3)
|
93
|
+
expect(newOne.codepoint).to eq(0x03a3)
|
94
|
+
|
95
|
+
# Try with our chars sample
|
96
|
+
allCodepoints = SampleChars.map do |aChar|
|
97
|
+
Character.new(aChar).codepoint
|
98
|
+
end
|
99
|
+
expect(allCodepoints).to eq(SampleInts)
|
100
|
+
|
101
|
+
# Try with our codepoint sample
|
102
|
+
mapped_chars = SampleInts.each do |aCodepoint|
|
103
|
+
expect(Character.new(aCodepoint).codepoint).to eq(aCodepoint)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Try with our escape sequence samples
|
107
|
+
(SampleDigrams + SampleNumEscs).each do |anEscSeq|
|
108
|
+
expectation = String.class_eval(%Q|"#{anEscSeq}".ord()|, __FILE__, __LINE__)
|
109
|
+
expect(Character.new(anEscSeq).codepoint).to eq(expectation)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'should known whether it is equal to another Object' do
|
114
|
+
newOne = Character.new(?\u03a3)
|
115
|
+
|
116
|
+
# Case 1: test equality with itself
|
117
|
+
expect(newOne).to eq(newOne)
|
118
|
+
|
119
|
+
# Case 2: test equality with another Character
|
120
|
+
expect(newOne).to eq(Character.new(?\u03a3))
|
121
|
+
expect(newOne).not_to eq(Character.new(?\u0333))
|
122
|
+
|
123
|
+
# Case 3: test equality with an integer value
|
124
|
+
# (equality based on codepoint value)
|
125
|
+
expect(newOne).to eq(0x03a3)
|
126
|
+
expect(newOne).not_to eq(0x0333)
|
127
|
+
|
128
|
+
# Case 4: test equality with a single-character String
|
129
|
+
expect(newOne).to eq(?\u03a3)
|
130
|
+
expect(newOne).not_to eq(?\u0333)
|
131
|
+
|
132
|
+
# Case 5: test fails with multiple character strings
|
133
|
+
expect(newOne).not_to eq('03a3')
|
134
|
+
|
135
|
+
# Case 6: equality testing with arbitray object
|
136
|
+
expect(newOne).not_to eq(nil)
|
137
|
+
expect(newOne).not_to eq(Object.new)
|
138
|
+
|
139
|
+
# In case 6, equality is based on to_s method.
|
140
|
+
simulator = double('fake')
|
141
|
+
expect(simulator).to receive(:to_s).and_return(?\u03a3)
|
142
|
+
expect(newOne).to eq(simulator)
|
143
|
+
|
144
|
+
# Create a module that re-defines the existing to_s method
|
145
|
+
module Tweak_to_s
|
146
|
+
def to_s() # Overwrite the existing to_s method
|
147
|
+
return ?\u03a3
|
148
|
+
end
|
149
|
+
end # module
|
150
|
+
weird = Object.new
|
151
|
+
weird.extend(Tweak_to_s)
|
152
|
+
expect(newOne).to eq(weird)
|
153
|
+
end
|
154
|
+
|
155
|
+
it 'should know its readable description' do
|
156
|
+
ch1 = Character.new('a')
|
157
|
+
expect(ch1.explain).to eq("the character 'a'")
|
158
|
+
|
159
|
+
ch2 = Character.new(?\u03a3)
|
160
|
+
expect(ch2.explain).to eq("the character '\u03a3'")
|
161
|
+
end
|
162
|
+
end # context
|
163
|
+
end # describe
|
157
164
|
end # module
|
158
165
|
|
159
|
-
# End of file
|
166
|
+
# End of file
|