lexical_analyzer 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +38 -17
- data/lib/lexical_analyzer/lexical_rule.rb +24 -0
- data/lib/lexical_analyzer/version.rb +1 -1
- data/lib/lexical_analyzer.rb +10 -10
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17cf068290c697c20216d7e8a171392caa14dc9f
|
4
|
+
data.tar.gz: 44cc961d916c2b03e226138ff11c0b637f54e52b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80a7590b7abd987fc22cdf2be1b547a79ec2b04d4ba39d9961eb71544a03ef1604935e01b66d12da0198cb8f6e5c3e7215cb74d513b018e8ebd90307449eab82
|
7
|
+
data.tar.gz: 8f0483b6d822154daf2757ebeb0fb28a35ab38a0b9e42e8da10615d5d075f7f20c78b633487b10d405792e532fd3a6546ed6568344ba29bc77939c005b8fb912
|
data/README.md
CHANGED
@@ -30,44 +30,63 @@ be analyzed and an array of rules for performing that task.
|
|
30
30
|
```ruby
|
31
31
|
lexical_analyser = LexicalAnalyzer.new(text: text, rules: rules)
|
32
32
|
|
33
|
+
token = lexical_analyser.get
|
34
|
+
|
33
35
|
```
|
34
36
|
|
35
|
-
|
37
|
+
It is sometimes desirable to reuse an existing lexical analyzer. This can be
|
38
|
+
done with the renew method.
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
lexical_analyser.renew(text: new_text)
|
36
42
|
|
37
|
-
|
43
|
+
token = lexical_analyser.get
|
38
44
|
|
39
|
-
|
45
|
+
```
|
46
|
+
|
47
|
+
Note: The renew method takes the same arguments as the new method, text and an
|
48
|
+
array of rules. If these are omitted, the default is to leave that value
|
49
|
+
unchanged. The renew method returns the updated lexical analyzer just like the
|
50
|
+
new method returns the newly created one.
|
40
51
|
|
41
|
-
|
42
|
-
correct operation of the analyzer.
|
52
|
+
#### Rules
|
43
53
|
|
44
|
-
|
45
|
-
|
54
|
+
The rules are an array of LexicalRule objects. Each consists of a symbol, a
|
55
|
+
regular expression, and an optional action.
|
46
56
|
|
47
57
|
```ruby
|
48
|
-
#
|
49
|
-
|
58
|
+
# Rule with default block returns [:equality, "=="] on a match.
|
59
|
+
LexicalRule.new(:equality, /\A==/)
|
50
60
|
|
51
|
-
#
|
52
|
-
|
61
|
+
# Rule with an ignore block, ignores matches.
|
62
|
+
LexicalRule.new(:spaces, /\A\s+/) {|_value| false }
|
53
63
|
|
54
|
-
#
|
55
|
-
|
64
|
+
# Rule with an integer block returns [:integer, an_integer] on a match.
|
65
|
+
LexicalRule.new(:integer, /\A\d+/) {|value| [@symbol, value.to_i] }
|
56
66
|
|
67
|
+
# Rule with a block that expands of to a sub-rule. Returns the value of the
|
68
|
+
# lexical analyzer in the captured variable ka.
|
69
|
+
LexicalRule.new(:identifier, /\A[a-zA-Z_]\w*(?=\W|$|\z)/) {|value|
|
70
|
+
ka.renew(text: value).get
|
71
|
+
}
|
57
72
|
```
|
58
73
|
|
59
|
-
|
74
|
+
Notes:
|
75
|
+
|
76
|
+
* The regular expression must begin with a \A clause to ensure correct
|
77
|
+
operation of the analyzer.
|
78
|
+
* The order of rules is important. For example, if there are two rules
|
60
79
|
looking for "==" and "=" respectively, if the "=" is ahead of the "==" rule
|
61
80
|
in the array the "==" rule will never trigger and the analysis will be
|
62
81
|
incorrect.
|
63
82
|
|
64
83
|
#### Tokens
|
65
84
|
|
66
|
-
The token is
|
85
|
+
The output token is an array with two elements.
|
67
86
|
|
68
87
|
token[0] - the symbol extracted from the rule that generated this token.
|
69
88
|
|
70
|
-
token[1] - the text that generated this token.
|
89
|
+
token[1] - the text that generated this token or its value.
|
71
90
|
|
72
91
|
|
73
92
|
#### Example
|
@@ -88,7 +107,9 @@ action.
|
|
88
107
|
|
89
108
|
#### Plan B
|
90
109
|
|
91
|
-
Go to the GitHub repository and raise an
|
110
|
+
Go to the GitHub repository and raise an
|
111
|
+
[issue](https://github.com/PeterCamilleri/lexical_analyzer/issues)
|
112
|
+
calling attention to some
|
92
113
|
aspect that could use some TLC or a suggestion or an idea.
|
93
114
|
|
94
115
|
## License
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# The Ruby Compiler Toolkit Project - Lexical Rule
|
2
|
+
# A rule for lexical analysis.
|
3
|
+
|
4
|
+
class LexicalRule
|
5
|
+
|
6
|
+
# Create a lexical rule.
|
7
|
+
def initialize(symbol, regex, &action)
|
8
|
+
@symbol = symbol
|
9
|
+
@regex = regex
|
10
|
+
|
11
|
+
define_singleton_method(:call, &action) if block_given?
|
12
|
+
end
|
13
|
+
|
14
|
+
# Does this rule match?
|
15
|
+
def match(text)
|
16
|
+
text.match(@regex)
|
17
|
+
end
|
18
|
+
|
19
|
+
# The default rule action.
|
20
|
+
def call(value)
|
21
|
+
[@symbol, value]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
data/lib/lexical_analyzer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# The Ruby Compiler Toolkit Project - Lexical Analyzer
|
2
2
|
# Scan input and extract lexical tokens.
|
3
3
|
|
4
|
+
require_relative 'lexical_analyzer/lexical_rule'
|
4
5
|
require_relative 'lexical_analyzer/version'
|
5
6
|
|
6
7
|
# The RCTP class for lexical analysis.
|
@@ -8,26 +9,25 @@ class LexicalAnalyzer
|
|
8
9
|
attr_reader :text # Access the text in the analyzer.
|
9
10
|
attr_reader :rules # Access the array of lexical rules.
|
10
11
|
|
11
|
-
# Some array index values.
|
12
|
-
SYMBOL = 0
|
13
|
-
REGEX = 1
|
14
|
-
BLOCK = 2
|
15
|
-
|
16
|
-
# The default tokenizer block
|
17
|
-
DTB = lambda {|symbol, value| [symbol, value] }
|
18
|
-
|
19
12
|
# Set things up.
|
20
13
|
def initialize(text: "", rules: [])
|
21
14
|
@text = text
|
22
15
|
@rules = rules
|
23
16
|
end
|
24
17
|
|
18
|
+
# Reuse an existing lexical analyzer.
|
19
|
+
def renew(text: @text, rules: @rules)
|
20
|
+
@text = text
|
21
|
+
@rules = rules
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
25
|
# Get the next lexical token
|
26
26
|
def get(extra=[])
|
27
27
|
(rules + extra).each do |rule|
|
28
|
-
if match_data =
|
28
|
+
if match_data = rule.match(text)
|
29
29
|
@text = match_data.post_match
|
30
|
-
return
|
30
|
+
return rule.call(match_data.to_s) || get
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lexical_analyzer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- PeterCamilleri
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,7 @@ files:
|
|
80
80
|
- README.md
|
81
81
|
- lexical_analyzer.gemspec
|
82
82
|
- lib/lexical_analyzer.rb
|
83
|
+
- lib/lexical_analyzer/lexical_rule.rb
|
83
84
|
- lib/lexical_analyzer/version.rb
|
84
85
|
- rakefile.rb
|
85
86
|
- reek.txt
|