lexeme 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ module Lexeme
2
+ class Lexeme
3
+ attr_accessor :ruleset
4
+
5
+ def analyze(source)
6
+ raise ArgumentError, 'Argument 1 must be a String' unless
7
+ source.instance_of? String
8
+
9
+ raise ArgumentError, 'Source not defined' if
10
+ source.empty?
11
+
12
+ raise RuntimeError, 'Source file not readable' unless
13
+ File.exists?(source)
14
+
15
+ content = IO.read(source)
16
+ tokens = scan(content)
17
+
18
+ tokens
19
+ end
20
+
21
+ private
22
+
23
+ def scan(input)
24
+ previous = ''
25
+ current = ''
26
+ tokens = []
27
+ line = 1
28
+
29
+ input.each_char do |c|
30
+ line += 1 if c == "\n"
31
+
32
+ if ignorable?(c)
33
+ unless previous.empty?
34
+ token = identify(previous)
35
+ raise RuntimeError, "Unknown token #{previous} on line #{line}!" if
36
+ token.nil? || token.name.nil?
37
+
38
+ tokens << token
39
+ end
40
+
41
+ previous = ''
42
+ current = ''
43
+ next
44
+ end
45
+
46
+ current << c
47
+ if !identifiable?(current)
48
+ raise RuntimeError, "Unknown token #{current} on line #{line}!" if
49
+ previous.empty?
50
+
51
+ token = identify(previous)
52
+
53
+ raise RuntimeError, "Unknown token #{previous} on line #{line}!" if
54
+ token.nil? || token.name.nil?
55
+
56
+ tokens << token
57
+ previous = c.clone
58
+ current = c.clone
59
+
60
+ next
61
+ end
62
+
63
+ previous = current.clone
64
+ end
65
+
66
+ tokens
67
+ end
68
+
69
+ def ignorable?(char)
70
+ @ruleset.ignorable? char
71
+ end
72
+
73
+ def identifiable?(string)
74
+ @ruleset.identifiable? string
75
+ end
76
+
77
+ def identify(string)
78
+ @ruleset.identify string
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,15 @@
1
+ module Lexeme
2
+ class Rule
3
+ attr_reader :name, :regex
4
+
5
+ def initialize(name, regex)
6
+ raise ArgumentError, 'name must be a String' unless
7
+ name.nil? || name.is_a?(String)
8
+ raise ArgumentError, 'regex must be a Regex' unless
9
+ regex.is_a? Regexp
10
+
11
+ @name = name
12
+ @regex = regex
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,50 @@
1
+ module Lexeme
2
+ class Ruleset
3
+ def initialize(&block)
4
+ @rules = []
5
+ @ignore = []
6
+
7
+ @unknown = Rule.new(nil, /^\w+$/)
8
+ @ignore << /\s+/
9
+
10
+ yield self if block_given?
11
+ end
12
+
13
+ def rule(name, regex)
14
+ @rules << Rule.new(name, regex)
15
+ end
16
+
17
+ def ignore(regex)
18
+ @ignore << regex
19
+ end
20
+
21
+ def ignorable?(char)
22
+ @ignore.each do |i|
23
+ return true if char =~ i
24
+ end
25
+
26
+ false
27
+ end
28
+
29
+ def identifiable?(string)
30
+ @rules.each do |r|
31
+ return true if string =~ r.regex
32
+ end
33
+
34
+ return true if string =~ @unknown.regex
35
+
36
+ false
37
+ end
38
+
39
+ def identify(string)
40
+ @rules.each do |r|
41
+ return Token.new(r.name, string) if string =~ r.regex
42
+ end
43
+
44
+ return Token.new(@unknown.name, string) if
45
+ string =~ @unknown.regex
46
+
47
+ nil
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,18 @@
1
+ module Lexeme
2
+ class Token
3
+ attr_reader :name, :value
4
+
5
+ def initialize(name, value)
6
+ @name = name
7
+ @value = value
8
+ end
9
+
10
+ def to_text
11
+ "#{@name}: #{@value}"
12
+ end
13
+
14
+ def to_array
15
+ [@name, @value]
16
+ end
17
+ end
18
+ end
data/lib/lexeme.rb ADDED
@@ -0,0 +1,20 @@
1
+ require 'lexeme/rule'
2
+ require 'lexeme/ruleset'
3
+ require 'lexeme/token'
4
+ require 'lexeme/lexeme'
5
+
6
+ module Lexeme
7
+ VERSION = '0.0.1'
8
+
9
+ def self.setup
10
+ @lexer = Lexeme.new unless @lexer
11
+ yield @lexer if block_given?
12
+ end
13
+
14
+ def self.analyze(source)
15
+ tokens = @lexer.analyze(source)
16
+ tokens.each { |t| yield t } if block_given?
17
+
18
+ tokens
19
+ end
20
+ end
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lexeme
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Vladimir Ivic
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-06-23 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A smiple lexical analyzer written in Ruby
15
+ email: vladimir.ivic@me.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/lexeme.rb
21
+ - lib/lexeme/lexeme.rb
22
+ - lib/lexeme/token.rb
23
+ - lib/lexeme/ruleset.rb
24
+ - lib/lexeme/rule.rb
25
+ homepage: http://rubygems.org/gems/lexeme
26
+ licenses: []
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ none: false
33
+ requirements:
34
+ - - ! '>='
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ! '>='
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubyforge_project:
45
+ rubygems_version: 1.8.25
46
+ signing_key:
47
+ specification_version: 3
48
+ summary: Lexeme
49
+ test_files: []