lexical_analyzer 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +38 -17
- data/lib/lexical_analyzer/lexical_rule.rb +24 -0
- data/lib/lexical_analyzer/version.rb +1 -1
- data/lib/lexical_analyzer.rb +10 -10
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17cf068290c697c20216d7e8a171392caa14dc9f
|
4
|
+
data.tar.gz: 44cc961d916c2b03e226138ff11c0b637f54e52b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80a7590b7abd987fc22cdf2be1b547a79ec2b04d4ba39d9961eb71544a03ef1604935e01b66d12da0198cb8f6e5c3e7215cb74d513b018e8ebd90307449eab82
|
7
|
+
data.tar.gz: 8f0483b6d822154daf2757ebeb0fb28a35ab38a0b9e42e8da10615d5d075f7f20c78b633487b10d405792e532fd3a6546ed6568344ba29bc77939c005b8fb912
|
data/README.md
CHANGED
@@ -30,44 +30,63 @@ be analyzed and an array of rules for performing that task.
|
|
30
30
|
```ruby
|
31
31
|
lexical_analyser = LexicalAnalyzer.new(text: text, rules: rules)
|
32
32
|
|
33
|
+
token = lexical_analyser.get
|
34
|
+
|
33
35
|
```
|
34
36
|
|
35
|
-
|
37
|
+
It is sometimes desirable to reuse an existing lexical analyzer. This can be
|
38
|
+
done with the renew method.
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
lexical_analyser.renew(text: new_text)
|
36
42
|
|
37
|
-
|
43
|
+
token = lexical_analyser.get
|
38
44
|
|
39
|
-
|
45
|
+
```
|
46
|
+
|
47
|
+
Note: The renew method takes the same arguments as the new method, text and an
|
48
|
+
array of rules. If these are omitted, the default is to leave that value
|
49
|
+
unchanged. The renew method returns the updated lexical analyzer just like the
|
50
|
+
new method returns the newly created one.
|
40
51
|
|
41
|
-
|
42
|
-
correct operation of the analyzer.
|
52
|
+
#### Rules
|
43
53
|
|
44
|
-
|
45
|
-
|
54
|
+
The rules are an array of LexicalRule objects. Each consists of a symbol, a
|
55
|
+
regular expression, and an optional action.
|
46
56
|
|
47
57
|
```ruby
|
48
|
-
#
|
49
|
-
|
58
|
+
# Rule with default block returns [:equality, "=="] on a match.
|
59
|
+
LexicalRule.new(:equality, /\A==/)
|
50
60
|
|
51
|
-
#
|
52
|
-
|
61
|
+
# Rule with an ignore block, ignores matches.
|
62
|
+
LexicalRule.new(:spaces, /\A\s+/) {|_value| false }
|
53
63
|
|
54
|
-
#
|
55
|
-
|
64
|
+
# Rule with an integer block returns [:integer, an_integer] on a match.
|
65
|
+
LexicalRule.new(:integer, /\A\d+/) {|value| [@symbol, value.to_i] }
|
56
66
|
|
67
|
+
# Rule with a block that expands of to a sub-rule. Returns the value of the
|
68
|
+
# lexical analyzer in the captured variable ka.
|
69
|
+
LexicalRule.new(:identifier, /\A[a-zA-Z_]\w*(?=\W|$|\z)/) {|value|
|
70
|
+
ka.renew(text: value).get
|
71
|
+
}
|
57
72
|
```
|
58
73
|
|
59
|
-
|
74
|
+
Notes:
|
75
|
+
|
76
|
+
* The regular expression must begin with a \A clause to ensure correct
|
77
|
+
operation of the analyzer.
|
78
|
+
* The order of rules is important. For example, if there are two rules
|
60
79
|
looking for "==" and "=" respectively, if the "=" is ahead of the "==" rule
|
61
80
|
in the array the "==" rule will never trigger and the analysis will be
|
62
81
|
incorrect.
|
63
82
|
|
64
83
|
#### Tokens
|
65
84
|
|
66
|
-
The token is
|
85
|
+
The output token is an array with two elements.
|
67
86
|
|
68
87
|
token[0] - the symbol extracted from the rule that generated this token.
|
69
88
|
|
70
|
-
token[1] - the text that generated this token.
|
89
|
+
token[1] - the text that generated this token or its value.
|
71
90
|
|
72
91
|
|
73
92
|
#### Example
|
@@ -88,7 +107,9 @@ action.
|
|
88
107
|
|
89
108
|
#### Plan B
|
90
109
|
|
91
|
-
Go to the GitHub repository and raise an
|
110
|
+
Go to the GitHub repository and raise an
|
111
|
+
[issue](https://github.com/PeterCamilleri/lexical_analyzer/issues)
|
112
|
+
calling attention to some
|
92
113
|
aspect that could use some TLC or a suggestion or an idea.
|
93
114
|
|
94
115
|
## License
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# The Ruby Compiler Toolkit Project - Lexical Rule
|
2
|
+
# A rule for lexical analysis.
|
3
|
+
|
4
|
+
class LexicalRule
|
5
|
+
|
6
|
+
# Create a lexical rule.
|
7
|
+
def initialize(symbol, regex, &action)
|
8
|
+
@symbol = symbol
|
9
|
+
@regex = regex
|
10
|
+
|
11
|
+
define_singleton_method(:call, &action) if block_given?
|
12
|
+
end
|
13
|
+
|
14
|
+
# Does this rule match?
|
15
|
+
def match(text)
|
16
|
+
text.match(@regex)
|
17
|
+
end
|
18
|
+
|
19
|
+
# The default rule action.
|
20
|
+
def call(value)
|
21
|
+
[@symbol, value]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
data/lib/lexical_analyzer.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# The Ruby Compiler Toolkit Project - Lexical Analyzer
|
2
2
|
# Scan input and extract lexical tokens.
|
3
3
|
|
4
|
+
require_relative 'lexical_analyzer/lexical_rule'
|
4
5
|
require_relative 'lexical_analyzer/version'
|
5
6
|
|
6
7
|
# The RCTP class for lexical analysis.
|
@@ -8,26 +9,25 @@ class LexicalAnalyzer
|
|
8
9
|
attr_reader :text # Access the text in the analyzer.
|
9
10
|
attr_reader :rules # Access the array of lexical rules.
|
10
11
|
|
11
|
-
# Some array index values.
|
12
|
-
SYMBOL = 0
|
13
|
-
REGEX = 1
|
14
|
-
BLOCK = 2
|
15
|
-
|
16
|
-
# The default tokenizer block
|
17
|
-
DTB = lambda {|symbol, value| [symbol, value] }
|
18
|
-
|
19
12
|
# Set things up.
|
20
13
|
def initialize(text: "", rules: [])
|
21
14
|
@text = text
|
22
15
|
@rules = rules
|
23
16
|
end
|
24
17
|
|
18
|
+
# Reuse an existing lexical analyzer.
|
19
|
+
def renew(text: @text, rules: @rules)
|
20
|
+
@text = text
|
21
|
+
@rules = rules
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
25
|
# Get the next lexical token
|
26
26
|
def get(extra=[])
|
27
27
|
(rules + extra).each do |rule|
|
28
|
-
if match_data =
|
28
|
+
if match_data = rule.match(text)
|
29
29
|
@text = match_data.post_match
|
30
|
-
return
|
30
|
+
return rule.call(match_data.to_s) || get
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lexical_analyzer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- PeterCamilleri
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,7 @@ files:
|
|
80
80
|
- README.md
|
81
81
|
- lexical_analyzer.gemspec
|
82
82
|
- lib/lexical_analyzer.rb
|
83
|
+
- lib/lexical_analyzer/lexical_rule.rb
|
83
84
|
- lib/lexical_analyzer/version.rb
|
84
85
|
- rakefile.rb
|
85
86
|
- reek.txt
|