lexeme 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/lexeme/lexeme.rb +81 -0
- data/lib/lexeme/rule.rb +15 -0
- data/lib/lexeme/ruleset.rb +50 -0
- data/lib/lexeme/token.rb +18 -0
- data/lib/lexeme.rb +20 -0
- metadata +49 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
module Lexeme
|
|
2
|
+
class Lexeme
|
|
3
|
+
attr_accessor :ruleset
|
|
4
|
+
|
|
5
|
+
def analyze(source)
|
|
6
|
+
raise ArgumentError, 'Argument 1 must be a String' unless
|
|
7
|
+
source.instance_of? String
|
|
8
|
+
|
|
9
|
+
raise ArgumentError, 'Source not defined' if
|
|
10
|
+
source.empty?
|
|
11
|
+
|
|
12
|
+
raise RuntimeError, 'Source file not readable' unless
|
|
13
|
+
File.exists?(source)
|
|
14
|
+
|
|
15
|
+
content = IO.read(source)
|
|
16
|
+
tokens = scan(content)
|
|
17
|
+
|
|
18
|
+
tokens
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def scan(input)
|
|
24
|
+
previous = ''
|
|
25
|
+
current = ''
|
|
26
|
+
tokens = []
|
|
27
|
+
line = 1
|
|
28
|
+
|
|
29
|
+
input.each_char do |c|
|
|
30
|
+
line += 1 if c == "\n"
|
|
31
|
+
|
|
32
|
+
if ignorable?(c)
|
|
33
|
+
unless previous.empty?
|
|
34
|
+
token = identify(previous)
|
|
35
|
+
raise RuntimeError, "Unknown token #{previous} on line #{line}!" if
|
|
36
|
+
token.nil? || token.name.nil?
|
|
37
|
+
|
|
38
|
+
tokens << token
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
previous = ''
|
|
42
|
+
current = ''
|
|
43
|
+
next
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
current << c
|
|
47
|
+
if !identifiable?(current)
|
|
48
|
+
raise RuntimeError, "Unknown token #{current} on line #{line}!" if
|
|
49
|
+
previous.empty?
|
|
50
|
+
|
|
51
|
+
token = identify(previous)
|
|
52
|
+
|
|
53
|
+
raise RuntimeError, "Unknown token #{previous} on line #{line}!" if
|
|
54
|
+
token.nil? || token.name.nil?
|
|
55
|
+
|
|
56
|
+
tokens << token
|
|
57
|
+
previous = c.clone
|
|
58
|
+
current = c.clone
|
|
59
|
+
|
|
60
|
+
next
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
previous = current.clone
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
tokens
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def ignorable?(char)
|
|
70
|
+
@ruleset.ignorable? char
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def identifiable?(string)
|
|
74
|
+
@ruleset.identifiable? string
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def identify(string)
|
|
78
|
+
@ruleset.identify string
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
data/lib/lexeme/rule.rb
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module Lexeme
|
|
2
|
+
class Rule
|
|
3
|
+
attr_reader :name, :regex
|
|
4
|
+
|
|
5
|
+
def initialize(name, regex)
|
|
6
|
+
raise ArgumentError, 'name must be a String' unless
|
|
7
|
+
name.nil? || name.is_a?(String)
|
|
8
|
+
raise ArgumentError, 'regex must be a Regex' unless
|
|
9
|
+
regex.is_a? Regexp
|
|
10
|
+
|
|
11
|
+
@name = name
|
|
12
|
+
@regex = regex
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
module Lexeme
|
|
2
|
+
class Ruleset
|
|
3
|
+
def initialize(&block)
|
|
4
|
+
@rules = []
|
|
5
|
+
@ignore = []
|
|
6
|
+
|
|
7
|
+
@unknown = Rule.new(nil, /^\w+$/)
|
|
8
|
+
@ignore << /\s+/
|
|
9
|
+
|
|
10
|
+
yield self if block_given?
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def rule(name, regex)
|
|
14
|
+
@rules << Rule.new(name, regex)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def ignore(regex)
|
|
18
|
+
@ignore << regex
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def ignorable?(char)
|
|
22
|
+
@ignore.each do |i|
|
|
23
|
+
return true if char =~ i
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
false
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def identifiable?(string)
|
|
30
|
+
@rules.each do |r|
|
|
31
|
+
return true if string =~ r.regex
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
return true if string =~ @unknown.regex
|
|
35
|
+
|
|
36
|
+
false
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def identify(string)
|
|
40
|
+
@rules.each do |r|
|
|
41
|
+
return Token.new(r.name, string) if string =~ r.regex
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
return Token.new(@unknown.name, string) if
|
|
45
|
+
string =~ @unknown.regex
|
|
46
|
+
|
|
47
|
+
nil
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
data/lib/lexeme/token.rb
ADDED
data/lib/lexeme.rb
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
require 'lexeme/rule'
|
|
2
|
+
require 'lexeme/ruleset'
|
|
3
|
+
require 'lexeme/token'
|
|
4
|
+
require 'lexeme/lexeme'
|
|
5
|
+
|
|
6
|
+
module Lexeme
|
|
7
|
+
VERSION = '0.0.1'
|
|
8
|
+
|
|
9
|
+
def self.setup
|
|
10
|
+
@lexer = Lexeme.new unless @lexer
|
|
11
|
+
yield @lexer if block_given?
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.analyze(source)
|
|
15
|
+
tokens = @lexer.analyze(source)
|
|
16
|
+
tokens.each { |t| yield t } if block_given?
|
|
17
|
+
|
|
18
|
+
tokens
|
|
19
|
+
end
|
|
20
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lexeme
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- Vladimir Ivic
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2013-06-23 00:00:00.000000000 Z
|
|
13
|
+
dependencies: []
|
|
14
|
+
description: A smiple lexical analyzer written in Ruby
|
|
15
|
+
email: vladimir.ivic@me.com
|
|
16
|
+
executables: []
|
|
17
|
+
extensions: []
|
|
18
|
+
extra_rdoc_files: []
|
|
19
|
+
files:
|
|
20
|
+
- lib/lexeme.rb
|
|
21
|
+
- lib/lexeme/lexeme.rb
|
|
22
|
+
- lib/lexeme/token.rb
|
|
23
|
+
- lib/lexeme/ruleset.rb
|
|
24
|
+
- lib/lexeme/rule.rb
|
|
25
|
+
homepage: http://rubygems.org/gems/lexeme
|
|
26
|
+
licenses: []
|
|
27
|
+
post_install_message:
|
|
28
|
+
rdoc_options: []
|
|
29
|
+
require_paths:
|
|
30
|
+
- lib
|
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
32
|
+
none: false
|
|
33
|
+
requirements:
|
|
34
|
+
- - ! '>='
|
|
35
|
+
- !ruby/object:Gem::Version
|
|
36
|
+
version: '0'
|
|
37
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
38
|
+
none: false
|
|
39
|
+
requirements:
|
|
40
|
+
- - ! '>='
|
|
41
|
+
- !ruby/object:Gem::Version
|
|
42
|
+
version: '0'
|
|
43
|
+
requirements: []
|
|
44
|
+
rubyforge_project:
|
|
45
|
+
rubygems_version: 1.8.25
|
|
46
|
+
signing_key:
|
|
47
|
+
specification_version: 3
|
|
48
|
+
summary: Lexeme
|
|
49
|
+
test_files: []
|