simple_lexer 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # SimpleLexer
2
2
 
3
- TODO: Write a gem description
3
+ A very basic toy Lexer implemented with Regular Expressions.
4
4
 
5
5
  ## Installation
6
6
 
@@ -18,7 +18,7 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
- TODO: Write usage instructions here
21
+ See docs for SimpleLexer::Lexer.
22
22
 
23
23
  ## Contributing
24
24
 
data/lib/simple_lexer.rb CHANGED
@@ -2,18 +2,31 @@ require_relative "simple_lexer/version"
2
2
 
3
3
  module SimpleLexer
4
4
 
5
+
6
+ # An Exception that is raised when Lexer encounters text for which
7
+ # there is no rule to match.
5
8
  class NoMatchError < Exception
6
- # unable to match
7
9
  end
8
10
 
11
+ # Exception that is raised when Lexer is finished tokenizing the
12
+ # input string.
9
13
  class EndOfStreamException < Exception
10
- # when the Lexer is finished
11
14
  end
12
15
 
16
+ # Object defined with certain rules that takes text as input and
17
+ # outputs Tokens based on the rules.
18
+ # @!attribute [r] rules
19
+ # @return [Array<Regexp>] A list of the rules for the Lexer.
20
+ # @!attribute [rw] pos
21
+ # @return [Fixnum] The current position of the input pointer.
13
22
  class Lexer
14
23
 
15
- attr_reader :rules, :pos
24
+ attr_reader :rules
25
+ attr_accessor :pos
16
26
 
27
+ # Creates a new instance of Lexer.
28
+ # @yield [] Some rules passed to instance_eval.
29
+ # @see #tok An example of a number Lexer using <code>tok</code>.
17
30
  def initialize(&rules)
18
31
  @rules = [] # list of {:rule => Regexp, :token => :token_id}
19
32
  @ignore = [] # list of Regexp
@@ -21,24 +34,37 @@ module SimpleLexer
21
34
  instance_eval &rules
22
35
  end
23
36
 
37
+ # Defines a new Token rule for the Lexer to match.
38
+ # @param [Regexp] rule Regular expression that defines the token
39
+ # @param [Symbol] token Token class
40
+ # @yield [text] The expression will give the Token its value.
41
+ # @example Rule for numbers
42
+ # my_lexer = SimpleLexer::Lexer.new do
43
+ # tok /-?\d+(\.\d+)?/, :number do |text| text.to_f end
44
+ # end
45
+ # my_lexer.load = "-435.234"
46
+ # puts my_lexer.next_token[:value] # -435.234
24
47
  def tok(rule, token, &action)
25
- # defining a new rule:
26
- #
27
- # my_lexer = SimpleLexer::Lexer.new do
28
- # tok /\w+/, :identifier
29
- # end
30
-
31
48
  @rules << {:rule => Regexp.new('\A' + rule.source), :token => token, :action => action}
32
49
  end
33
50
 
34
- def ign(rule)
35
- # defining conditions to ignore:
36
- #
37
- # my_lexer = SimpleLexer::Lexer.new do
38
- # tok /\w+/, :identifier
39
- # ign :whitespace
40
- # end
41
-
51
+ # Defines rules of input classes to ignore (consume and not output any
52
+ # tokens.)
53
+ # @param [Regexp, Symbol] rule Regular expression that defines ignored
54
+ # characters.
55
+ # @note You can set _rule_ to <code>:whitespace</code> to ignore whitespace
56
+ # characters.
57
+ # @example Ignoring parentheses
58
+ # my_lexer = SimpleLexer::Lexer.new do
59
+ # tok /\w+/, :identifier
60
+ # ign /[\(\)]/
61
+ # end
62
+ # @example Ignoring whitespace
63
+ # my_lexer = SimpleLexer::Lexer.new do
64
+ # tok /\w+/, :identifier
65
+ # ign :whitespace
66
+ # end
67
+ def ign(rule)
42
68
  if rule == :whitespace
43
69
  rule = /\s+/
44
70
  end
@@ -46,21 +72,26 @@ module SimpleLexer
46
72
  @ignore << Regexp.new('\A' + rule.source)
47
73
  end
48
74
 
49
- def load=(string)
50
- # load a string into the lexer
51
- # my_lexer.load( ... )
52
-
53
- @load = string
75
+ # Give the Lexer some text to tokenize.
76
+ # @param [String] input Text for the Lexer to tokenize.
77
+ def load=(input)
78
+ @load = input
54
79
  @pos = 0
55
80
  end
56
-
81
+
82
+ # What still remains to be processed.
83
+ # @return [String] Substring of the input starting from input pointer.
57
84
  def load
58
- # what the lexer currently sees
59
- # my_lexer.load ...
60
-
61
85
  @load[@pos..-1]
62
86
  end
63
87
 
88
+ # Gets the next Token in the input and advances the input pointer.
89
+ # @return [Hash{Symbol=>Values}]
90
+ # - <code>:token</code> Token class
91
+ # - <code>:text</code> Matched text
92
+ # - <code>:value</code> Value as defined by passed block, if applicable.
93
+ # @raise [NoMatchError] If load contains a sequence for which the Lexer has
94
+ # no rule.
64
95
  def next_token
65
96
  # get the next token
66
97
  # my_lexer.next_token -> [ :token => :token_id, :text => matched ]
@@ -85,10 +116,9 @@ module SimpleLexer
85
116
  raise NoMatchError, "Unable to match, unexpected characters: '#{load[0..10]}...'"
86
117
  end
87
118
 
88
- def all_tokens
89
- # returns the array of all tokens until it is finished lexing
90
- # my_lexer.all_tokens
91
-
119
+ # Tokenize the entire input stream.
120
+ # @return [Array<Hash>] An Array of Tokens processed by the Lexer
121
+ def all_tokens
92
122
  tokens = []
93
123
  loop do
94
124
  tokens << next_token
@@ -97,24 +127,11 @@ module SimpleLexer
97
127
  tokens
98
128
  end
99
129
 
130
+ # Checks if the Lexer has finished Tokenizing the entire input stream.
131
+ # @return [Boolean] Whether Lexer has reached the end of input.
100
132
  def finished?
101
133
  return @pos >= @load.length
102
134
  end
103
135
 
104
136
  end
105
137
  end
106
-
107
- my_lexer = SimpleLexer::Lexer.new do
108
- tok /-?\d+(\.\d+)?/, :number do |t| t.to_f end
109
- tok /\+/, :plus
110
- tok /-/, :minus
111
- tok /\//, :div
112
- tok /\*/, :mult
113
- tok /\(/, :lparen
114
- tok /\)/, :rparen
115
-
116
- ign :whitespace
117
- end
118
-
119
- my_lexer.load = "321.32 + -432.388 - 33/4.3 - 4.228 * 5 - (32*632)"
120
- p my_lexer.all_tokens
@@ -1,3 +1,3 @@
1
1
  module SimpleLexer
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_lexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-11-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
- requirement: &11890980 !ruby/object:Gem::Requirement
16
+ requirement: &10265840 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *11890980
24
+ version_requirements: *10265840
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rake
27
- requirement: &11890240 !ruby/object:Gem::Requirement
27
+ requirement: &10264520 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *11890240
35
+ version_requirements: *10264520
36
36
  description: A simple toy lexer for Ruby
37
37
  email:
38
38
  - wchen298@gmail.com
@@ -74,3 +74,4 @@ signing_key:
74
74
  specification_version: 3
75
75
  summary: Rudimentary lexer for Ruby
76
76
  test_files: []
77
+ has_rdoc: