simple_lexer 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # SimpleLexer
2
2
 
3
- TODO: Write a gem description
3
+ A very basic toy Lexer implemented with Regular Expressions.
4
4
 
5
5
  ## Installation
6
6
 
@@ -18,7 +18,7 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
- TODO: Write usage instructions here
21
+ See docs for SimpleLexer::Lexer.
22
22
 
23
23
  ## Contributing
24
24
 
data/lib/simple_lexer.rb CHANGED
@@ -2,18 +2,31 @@ require_relative "simple_lexer/version"
2
2
 
3
3
  module SimpleLexer
4
4
 
5
+
6
+ # An Exception that is raised when Lexer encounters text for which
7
+ # there is no rule to match.
5
8
  class NoMatchError < Exception
6
- # unable to match
7
9
  end
8
10
 
11
+ # Exception that is raised when Lexer is finished tokenizing the
12
+ # input string.
9
13
  class EndOfStreamException < Exception
10
- # when the Lexer is finished
11
14
  end
12
15
 
16
+ # Object defined with certain rules that takes text as input and
17
+ # outputs Tokens based on the rules.
18
+ # @!attribute [r] rules
19
+ # @return [Array<Regexp>] A list of the rules for the Lexer.
20
+ # @!attribute [rw] pos
21
+ # @return [Fixnum] The current position of the input pointer.
13
22
  class Lexer
14
23
 
15
- attr_reader :rules, :pos
24
+ attr_reader :rules
25
+ attr_accessor :pos
16
26
 
27
+ # Creates a new instance of Lexer.
28
+ # @yield [] Some rules passed to instance_eval.
29
+ # @see #tok An example of a number Lexer using <code>tok</code>.
17
30
  def initialize(&rules)
18
31
  @rules = [] # list of {:rule => Regexp, :token => :token_id}
19
32
  @ignore = [] # list of Regexp
@@ -21,24 +34,37 @@ module SimpleLexer
21
34
  instance_eval &rules
22
35
  end
23
36
 
37
+ # Defines a new Token rule for the Lexer to match.
38
+ # @param [Regexp] rule Regular expression that defines the token
39
+ # @param [Symbol] token Token class
40
+ # @yield [text] The expression will give the Token its value.
41
+ # @example Rule for numbers
42
+ # my_lexer = SimpleLexer::Lexer.new do
43
+ # tok /-?\d+(\.\d+)?/, :number do |text| text.to_f end
44
+ # end
45
+ # my_lexer.load = "-435.234"
46
+ # puts my_lexer.next_token[:value] # -435.234
24
47
  def tok(rule, token, &action)
25
- # defining a new rule:
26
- #
27
- # my_lexer = SimpleLexer::Lexer.new do
28
- # tok /\w+/, :identifier
29
- # end
30
-
31
48
  @rules << {:rule => Regexp.new('\A' + rule.source), :token => token, :action => action}
32
49
  end
33
50
 
34
- def ign(rule)
35
- # defining conditions to ignore:
36
- #
37
- # my_lexer = SimpleLexer::Lexer.new do
38
- # tok /\w+/, :identifier
39
- # ign :whitespace
40
- # end
41
-
51
+ # Defines rules of input classes to ignore (consume and not output any
52
+ # tokens.)
53
+ # @param [Regexp, Symbol] rule Regular expression that defines ignored
54
+ # characters.
55
+ # @note You can set _rule_ to <code>:whitespace</code> to ignore whitespace
56
+ # characters.
57
+ # @example Ignoring parentheses
58
+ # my_lexer = SimpleLexer::Lexer.new do
59
+ # tok /\w+/, :identifier
60
+ # ign /[\(\)]/
61
+ # end
62
+ # @example Ignoring whitespace
63
+ # my_lexer = SimpleLexer::Lexer.new do
64
+ # tok /\w+/, :identifier
65
+ # ign :whitespace
66
+ # end
67
+ def ign(rule)
42
68
  if rule == :whitespace
43
69
  rule = /\s+/
44
70
  end
@@ -46,21 +72,26 @@ module SimpleLexer
46
72
  @ignore << Regexp.new('\A' + rule.source)
47
73
  end
48
74
 
49
- def load=(string)
50
- # load a string into the lexer
51
- # my_lexer.load( ... )
52
-
53
- @load = string
75
+ # Give the Lexer some text to tokenize.
76
+ # @param [String] input Text for the Lexer to tokenize.
77
+ def load=(input)
78
+ @load = input
54
79
  @pos = 0
55
80
  end
56
-
81
+
82
+ # What still remains to be processed.
83
+ # @return [String] Substring of the input starting from input pointer.
57
84
  def load
58
- # what the lexer currently sees
59
- # my_lexer.load ...
60
-
61
85
  @load[@pos..-1]
62
86
  end
63
87
 
88
+ # Gets the next Token in the input and advances the input pointer.
89
+ # @return [Hash{Symbol=>Values}]
90
+ # - <code>:token</code> Token class
91
+ # - <code>:text</code> Matched text
92
+ # - <code>:value</code> Value as defined by passed block, if applicable.
93
+ # @raise [NoMatchError] If load contains a sequence for which the Lexer has
94
+ # no rule.
64
95
  def next_token
65
96
  # get the next token
66
97
  # my_lexer.next_token -> [ :token => :token_id, :text => matched ]
@@ -85,10 +116,9 @@ module SimpleLexer
85
116
  raise NoMatchError, "Unable to match, unexpected characters: '#{load[0..10]}...'"
86
117
  end
87
118
 
88
- def all_tokens
89
- # returns the array of all tokens until it is finished lexing
90
- # my_lexer.all_tokens
91
-
119
+ # Tokenize the entire input stream.
120
+ # @return [Array<Hash>] An Array of Tokens processed by the Lexer
121
+ def all_tokens
92
122
  tokens = []
93
123
  loop do
94
124
  tokens << next_token
@@ -97,24 +127,11 @@ module SimpleLexer
97
127
  tokens
98
128
  end
99
129
 
130
+ # Checks if the Lexer has finished Tokenizing the entire input stream.
131
+ # @return [Boolean] Whether Lexer has reached the end of input.
100
132
  def finished?
101
133
  return @pos >= @load.length
102
134
  end
103
135
 
104
136
  end
105
137
  end
106
-
107
- my_lexer = SimpleLexer::Lexer.new do
108
- tok /-?\d+(\.\d+)?/, :number do |t| t.to_f end
109
- tok /\+/, :plus
110
- tok /-/, :minus
111
- tok /\//, :div
112
- tok /\*/, :mult
113
- tok /\(/, :lparen
114
- tok /\)/, :rparen
115
-
116
- ign :whitespace
117
- end
118
-
119
- my_lexer.load = "321.32 + -432.388 - 33/4.3 - 4.228 * 5 - (32*632)"
120
- p my_lexer.all_tokens
@@ -1,3 +1,3 @@
1
1
  module SimpleLexer
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_lexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-11-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
- requirement: &11890980 !ruby/object:Gem::Requirement
16
+ requirement: &10265840 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *11890980
24
+ version_requirements: *10265840
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rake
27
- requirement: &11890240 !ruby/object:Gem::Requirement
27
+ requirement: &10264520 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *11890240
35
+ version_requirements: *10264520
36
36
  description: A simple toy lexer for Ruby
37
37
  email:
38
38
  - wchen298@gmail.com
@@ -74,3 +74,4 @@ signing_key:
74
74
  specification_version: 3
75
75
  summary: Rudimentary lexer for Ruby
76
76
  test_files: []
77
+ has_rdoc: