lexeme 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/lexeme.rb +2 -3
- data/lib/lexeme/lexeme.rb +23 -35
- data/lib/lexeme/ruleset.rb +13 -4
- data/lib/lexeme/token.rb +6 -2
- data/lib/lexeme/version.rb +3 -0
- metadata +3 -2
data/lib/lexeme.rb
CHANGED
|
@@ -3,10 +3,9 @@ require 'lexeme/ruleset'
|
|
|
3
3
|
require 'lexeme/token'
|
|
4
4
|
require 'lexeme/lexeme'
|
|
5
5
|
require 'lexeme/core_extensions'
|
|
6
|
+
require 'lexeme/version'
|
|
6
7
|
|
|
7
8
|
module Lexeme
|
|
8
|
-
VERSION = '0.0.2'
|
|
9
|
-
|
|
10
9
|
def self.analyze(source = nil)
|
|
11
10
|
raise RuntimeError, 'Please use #define before calling #analyze.' unless @lexer
|
|
12
11
|
|
|
@@ -20,7 +19,7 @@ module Lexeme
|
|
|
20
19
|
end
|
|
21
20
|
|
|
22
21
|
def self.define(&block)
|
|
23
|
-
@lexer = Lexeme.new
|
|
22
|
+
@lexer = Lexeme.new
|
|
24
23
|
@lexer.instance_eval(&block)
|
|
25
24
|
|
|
26
25
|
@lexer
|
data/lib/lexeme/lexeme.rb
CHANGED
|
@@ -38,71 +38,59 @@ module Lexeme
|
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
private
|
|
41
|
-
|
|
41
|
+
|
|
42
|
+
# TODO: Work on the time complexity for this one
|
|
43
|
+
# This could be better.
|
|
42
44
|
def scan(input)
|
|
43
45
|
previous = ''
|
|
44
46
|
current = ''
|
|
45
47
|
tokens = []
|
|
46
48
|
line = 1
|
|
47
|
-
string_state = false
|
|
48
49
|
|
|
49
50
|
input.each_char do |c|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
previous << c
|
|
54
|
-
string_state ^= true
|
|
55
|
-
next
|
|
51
|
+
if c == "\n"
|
|
52
|
+
line += 1
|
|
53
|
+
c = ' '
|
|
56
54
|
end
|
|
57
|
-
|
|
58
|
-
if
|
|
59
|
-
previous << c
|
|
60
|
-
next
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
if ignorable?(c)
|
|
64
|
-
unless previous.empty?
|
|
65
|
-
token = identify(previous)
|
|
66
|
-
raise RuntimeError, "Unknown token #{previous} on line #{line}!" if
|
|
67
|
-
token.nil? || token.name.nil?
|
|
68
|
-
|
|
69
|
-
tokens << token
|
|
70
|
-
end
|
|
71
|
-
|
|
55
|
+
|
|
56
|
+
if !previous.empty? && ignorable?(previous)
|
|
72
57
|
previous = ''
|
|
73
58
|
current = ''
|
|
74
|
-
next
|
|
75
59
|
end
|
|
76
|
-
|
|
60
|
+
|
|
77
61
|
current << c
|
|
62
|
+
|
|
78
63
|
if !identifiable?(current)
|
|
79
|
-
raise RuntimeError, "Unknown token
|
|
80
|
-
previous.empty?
|
|
64
|
+
raise RuntimeError, "Unknown token `#{current}` on line #{line}" if
|
|
65
|
+
previous.empty?
|
|
81
66
|
|
|
82
67
|
token = identify(previous)
|
|
83
|
-
|
|
84
|
-
raise RuntimeError, "Unknown token
|
|
68
|
+
|
|
69
|
+
raise RuntimeError, "Unknown token `#{previous}` on line #{line}" if
|
|
85
70
|
token.nil? || token.name.nil?
|
|
86
71
|
|
|
87
|
-
tokens
|
|
72
|
+
tokens << token
|
|
88
73
|
previous = c.clone
|
|
89
74
|
current = c.clone
|
|
90
|
-
|
|
91
75
|
next
|
|
92
76
|
end
|
|
93
77
|
|
|
94
78
|
previous = current.clone
|
|
95
79
|
end
|
|
96
80
|
|
|
97
|
-
|
|
98
|
-
|
|
81
|
+
if !previous.empty? && !ignorable?(previous)
|
|
82
|
+
token = identify(previous)
|
|
83
|
+
raise RuntimeError, "Unknow token `#{previous}` on line #{line}" if
|
|
84
|
+
token.nil? || token.name.nil?
|
|
85
|
+
|
|
86
|
+
tokens << token
|
|
99
87
|
end
|
|
100
88
|
|
|
101
89
|
tokens
|
|
102
90
|
end
|
|
103
91
|
|
|
104
|
-
def ignorable?(
|
|
105
|
-
@ruleset.ignorable?
|
|
92
|
+
def ignorable?(string)
|
|
93
|
+
@ruleset.ignorable?(string)
|
|
106
94
|
end
|
|
107
95
|
|
|
108
96
|
def identifiable?(string)
|
data/lib/lexeme/ruleset.rb
CHANGED
|
@@ -2,10 +2,15 @@ module Lexeme
|
|
|
2
2
|
class Ruleset
|
|
3
3
|
def initialize(&block)
|
|
4
4
|
@rules = []
|
|
5
|
-
@ignore = []
|
|
5
|
+
@ignore = []
|
|
6
6
|
|
|
7
|
+
# this is here to capture any other
|
|
8
|
+
# symbols that could be identified
|
|
9
|
+
# as var names, function names ...
|
|
7
10
|
@unknown = Rule.new(nil, /^\w+$/)
|
|
8
|
-
|
|
11
|
+
|
|
12
|
+
# this skips all whitespaces by default
|
|
13
|
+
@ignore << /^\s+/
|
|
9
14
|
|
|
10
15
|
yield self if block_given?
|
|
11
16
|
end
|
|
@@ -18,9 +23,9 @@ module Lexeme
|
|
|
18
23
|
@ignore << regex
|
|
19
24
|
end
|
|
20
25
|
|
|
21
|
-
def ignorable?(
|
|
26
|
+
def ignorable?(string)
|
|
22
27
|
@ignore.each do |i|
|
|
23
|
-
return true if
|
|
28
|
+
return true if string =~ i
|
|
24
29
|
end
|
|
25
30
|
|
|
26
31
|
false
|
|
@@ -32,6 +37,10 @@ module Lexeme
|
|
|
32
37
|
end
|
|
33
38
|
|
|
34
39
|
return true if string =~ @unknown.regex
|
|
40
|
+
|
|
41
|
+
@ignore.each do |i|
|
|
42
|
+
return true if string =~ i
|
|
43
|
+
end
|
|
35
44
|
|
|
36
45
|
false
|
|
37
46
|
end
|
data/lib/lexeme/token.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lexeme
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.3
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2013-07-
|
|
12
|
+
date: 2013-07-13 00:00:00.000000000 Z
|
|
13
13
|
dependencies: []
|
|
14
14
|
description: A simple lexical analyzer written in Ruby
|
|
15
15
|
email: vladimir.ivic@icloud.com
|
|
@@ -23,6 +23,7 @@ files:
|
|
|
23
23
|
- lib/lexeme/ruleset.rb
|
|
24
24
|
- lib/lexeme/rule.rb
|
|
25
25
|
- lib/lexeme/core_extensions.rb
|
|
26
|
+
- lib/lexeme/version.rb
|
|
26
27
|
homepage: http://rubygems.org/gems/lexeme
|
|
27
28
|
licenses: []
|
|
28
29
|
post_install_message:
|