rparsec-ruby19 1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/rparsec.rb +3 -0
- data/rparsec/context.rb +83 -0
- data/rparsec/error.rb +28 -0
- data/rparsec/expressions.rb +184 -0
- data/rparsec/functors.rb +274 -0
- data/rparsec/id_monad.rb +17 -0
- data/rparsec/keywords.rb +114 -0
- data/rparsec/locator.rb +40 -0
- data/rparsec/misc.rb +130 -0
- data/rparsec/monad.rb +62 -0
- data/rparsec/operators.rb +103 -0
- data/rparsec/parser.rb +894 -0
- data/rparsec/parser_monad.rb +23 -0
- data/rparsec/parsers.rb +623 -0
- data/rparsec/token.rb +43 -0
- data/test/src/expression_test.rb +124 -0
- data/test/src/full_parser_test.rb +95 -0
- data/test/src/functor_test.rb +66 -0
- data/test/src/import.rb +5 -0
- data/test/src/keyword_test.rb +28 -0
- data/test/src/operator_test.rb +21 -0
- data/test/src/parser_test.rb +53 -0
- data/test/src/perf_benchmark.rb +25 -0
- data/test/src/s_expression_test.rb +33 -0
- data/test/src/scratch.rb +41 -0
- data/test/src/simple_monad_test.rb +22 -0
- data/test/src/simple_parser_test.rb +423 -0
- data/test/src/sql.rb +268 -0
- data/test/src/sql_parser.rb +258 -0
- data/test/src/sql_test.rb +128 -0
- data/test/src/tests.rb +13 -0
- metadata +95 -0
data/rparsec/id_monad.rb
ADDED
data/rparsec/keywords.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'rparsec/parser'
|
2
|
+
|
3
|
+
module RParsec
|
4
|
+
|
5
|
+
#
|
6
|
+
# This class helps building lexers and parsers for keywords.
|
7
|
+
#
|
8
|
+
class Keywords
|
9
|
+
extend Parsers
|
10
|
+
|
11
|
+
private_class_method :new
|
12
|
+
|
13
|
+
#
|
14
|
+
# The symbol used to identify a keyword token
|
15
|
+
#
|
16
|
+
attr_reader :keyword_symbol
|
17
|
+
|
18
|
+
#
|
19
|
+
# The lexer that parses all the keywords represented
|
20
|
+
#
|
21
|
+
attr_reader :lexer
|
22
|
+
|
23
|
+
#
|
24
|
+
# Do we lex case sensitively?
|
25
|
+
#
|
26
|
+
def case_sensitive?
|
27
|
+
@case_sensitive
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# To create an instance that lexes the given keywords
|
32
|
+
# case sensitively.
|
33
|
+
# _default_lexer_ is used to lex a token first, the token text is then compared with
|
34
|
+
# the given keywords. If it matches any of the keyword, a keyword token is generated instead
|
35
|
+
# using _keyword_symbol_.
|
36
|
+
# The _block_ parameter, if present, is used to convert the token text to another object
|
37
|
+
# when the token is recognized during grammar parsing phase.
|
38
|
+
#
|
39
|
+
def self.case_sensitive(words, default_lexer=word.token(:word), keyword_symbol=:keyword, &block)
|
40
|
+
new(words, true, default_lexer, keyword_symbol, &block)
|
41
|
+
end
|
42
|
+
|
43
|
+
#
|
44
|
+
# To create an instance that lexes the given keywords
|
45
|
+
# case insensitively.
|
46
|
+
# _default_lexer_ is used to lex a token first, the token text is then compared with
|
47
|
+
# the given keywords. If it matches any of the keyword, a keyword token is generated instead
|
48
|
+
# using _keyword_symbol_.
|
49
|
+
# The _block_ parameter, if present, is used to convert the token text to another object
|
50
|
+
# when the token is recognized during parsing phase.
|
51
|
+
#
|
52
|
+
def self.case_insensitive(words, default_lexer=word.token(:word), keyword_symbol=:keyword, &block)
|
53
|
+
new(words, false, default_lexer, keyword_symbol, &block)
|
54
|
+
end
|
55
|
+
|
56
|
+
# scanner has to return a string
|
57
|
+
def initialize(words, case_sensitive, default_lexer, keyword_symbol, &block)
|
58
|
+
@default_lexer, @case_sensitive, @keyword_symbol = default_lexer, case_sensitive, keyword_symbol
|
59
|
+
# this guarantees that we have copy of the words array and all the word strings.
|
60
|
+
words = copy_words(words, case_sensitive)
|
61
|
+
@name_map = {}
|
62
|
+
@symbol_map = {}
|
63
|
+
word_map = {}
|
64
|
+
words.each do |w|
|
65
|
+
symbol = "#{keyword_symbol}:#{w}".to_sym
|
66
|
+
word_map[w] = symbol
|
67
|
+
parser = Parsers.token(symbol, &block)
|
68
|
+
@symbol_map["#{w}".to_sym] = parser
|
69
|
+
@name_map[w] = parser
|
70
|
+
end
|
71
|
+
@lexer = make_lexer(default_lexer, word_map)
|
72
|
+
end
|
73
|
+
|
74
|
+
#
|
75
|
+
# Get the parser that recognizes the token of the given keyword during the parsing phase.
|
76
|
+
#
|
77
|
+
def parser(key)
|
78
|
+
result = nil
|
79
|
+
if key.kind_of? String
|
80
|
+
name = canonical_name(key)
|
81
|
+
result = @name_map[name]
|
82
|
+
else
|
83
|
+
result = @symbol_map[key]
|
84
|
+
end
|
85
|
+
raise ArgumentError, "parser not found for #{key}" if result.nil?
|
86
|
+
result
|
87
|
+
end
|
88
|
+
|
89
|
+
alias [] parser
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def make_lexer(default_lexer, word_map)
|
94
|
+
default_lexer.map do |tok|
|
95
|
+
text,ind = tok.text, tok.index
|
96
|
+
key = canonical_name(text)
|
97
|
+
my_symbol = word_map[key]
|
98
|
+
case when my_symbol.nil? then tok
|
99
|
+
else Token.new(my_symbol, text, ind) end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def canonical_name(name)
|
104
|
+
case when @case_sensitive then name else name.downcase end
|
105
|
+
end
|
106
|
+
|
107
|
+
def copy_words(words, case_sensitive)
|
108
|
+
words.map do |w|
|
109
|
+
case when case_sensitive then w.dup else w.downcase end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end # module
|
data/rparsec/locator.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'rparsec/misc'
|
2
|
+
|
3
|
+
module RParsec
|
4
|
+
|
5
|
+
class CodeLocator
|
6
|
+
extend DefHelper
|
7
|
+
|
8
|
+
def_readable :code
|
9
|
+
|
10
|
+
LF = ?\n
|
11
|
+
|
12
|
+
def locate(ind)
|
13
|
+
return _locateEof if ind >= code.length
|
14
|
+
line, col = 1,1
|
15
|
+
return line,col if ind<=0
|
16
|
+
for i in (0...ind)
|
17
|
+
c = code[i]
|
18
|
+
if c == LF
|
19
|
+
line, col = line+1, 1
|
20
|
+
else
|
21
|
+
col = col+1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
return line, col
|
25
|
+
end
|
26
|
+
|
27
|
+
def _locateEof
|
28
|
+
line, col = 1, 1
|
29
|
+
code.each_byte do |c|
|
30
|
+
if c == LF
|
31
|
+
line, col = line+1, 1
|
32
|
+
else
|
33
|
+
col = col+1
|
34
|
+
end
|
35
|
+
end
|
36
|
+
return line, col
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end # module
|
data/rparsec/misc.rb
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
module RParsec
|
2
|
+
|
3
|
+
#
|
4
|
+
# Internal utility functions for string manipulations.
|
5
|
+
#
|
6
|
+
module StringUtils
|
7
|
+
#
|
8
|
+
# Does _str_ starts with the _sub_ string?
|
9
|
+
#
|
10
|
+
def self.starts_with? str, sub
|
11
|
+
return true if sub.nil?
|
12
|
+
len = sub.length
|
13
|
+
return false if len > str.length
|
14
|
+
for i in (0...len)
|
15
|
+
return false if str[i] != sub[i]
|
16
|
+
end
|
17
|
+
true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
#
|
22
|
+
# Helpers for defining ctor.
|
23
|
+
#
|
24
|
+
module DefHelper
|
25
|
+
def def_ctor(*vars)
|
26
|
+
define_method(:initialize) do |*params|
|
27
|
+
vars.each_with_index do |var, i|
|
28
|
+
instance_variable_set("@"+var.to_s, params[i])
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def def_readable(*vars)
|
34
|
+
attr_reader(*vars)
|
35
|
+
def_ctor(*vars)
|
36
|
+
end
|
37
|
+
|
38
|
+
def def_mutable(*vars)
|
39
|
+
attr_accessor(*vars)
|
40
|
+
def_ctor(*vars)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# To type check method parameters.
|
46
|
+
#
|
47
|
+
module TypeChecker
|
48
|
+
private
|
49
|
+
|
50
|
+
def nth n
|
51
|
+
th = case n when 0 then 'st' when 1 then 'nd' else 'th' end
|
52
|
+
"#{n+1}#{th}"
|
53
|
+
end
|
54
|
+
|
55
|
+
public
|
56
|
+
|
57
|
+
def check_arg_type expected, obj, mtd, n=0
|
58
|
+
unless obj.kind_of? expected
|
59
|
+
raise ArgumentError,
|
60
|
+
"#{obj.class} assigned to #{expected} for the #{nth n} argument of #{mtd}."
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def check_arg_array_type elem_type, arg, mtd, n=0
|
65
|
+
check_arg_type Array, arg, mtd, n
|
66
|
+
arg.each_with_index do |x, i|
|
67
|
+
unless x.kind_of? elem_type
|
68
|
+
raise ArgumentError,
|
69
|
+
"#{x.class} assigned to #{elem_type} for the #{nth i} element of the #{nth n} argument of #{mtd}."
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def check_vararg_type expected, args, mtd, n = 0
|
75
|
+
(n...args.length).each do |i|
|
76
|
+
check_arg_type expected, args[i], mtd, i
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
extend self
|
81
|
+
end
|
82
|
+
|
83
|
+
#
|
84
|
+
# To add declarative signature support.
|
85
|
+
#
|
86
|
+
module Signature
|
87
|
+
# Signatures = {}
|
88
|
+
def def_sig sym, *types
|
89
|
+
types.each_with_index do |t,i|
|
90
|
+
unless t.kind_of? Class
|
91
|
+
TypeChecker.check_arg_type Class, t, :def_sig, i unless t.kind_of? Array
|
92
|
+
TypeChecker.check_arg_type Class, t, :def_sig, i unless t.length <= 1
|
93
|
+
TypeChecker.check_arg_array_type Class, t, :def_sig, i
|
94
|
+
end
|
95
|
+
end
|
96
|
+
# Signatures[sym] = types
|
97
|
+
__intercept_method_to_check_param_types__(sym, types)
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def __intercept_method_to_check_param_types__(sym, types)
|
103
|
+
mtd = instance_method(sym)
|
104
|
+
helper = "_#{sym}_param_types_checked_helper".to_sym
|
105
|
+
define_method(helper) do |*params|
|
106
|
+
star_type, star_ind = nil, nil
|
107
|
+
types.each_with_index do |t, i|
|
108
|
+
t = star_type unless star_type.nil?
|
109
|
+
arg = params[i]
|
110
|
+
if t.kind_of? Class
|
111
|
+
TypeChecker.check_arg_type t, arg, sym, i
|
112
|
+
elsif t.empty?
|
113
|
+
TypeChecker.check_arg_type Array, arg, sym, i
|
114
|
+
else
|
115
|
+
star_type, star_ind = t[0], i
|
116
|
+
break
|
117
|
+
end
|
118
|
+
end
|
119
|
+
TypeChecker.check_vararg_type star_type, params, sym, star_ind unless star_ind.nil?
|
120
|
+
mtd.bind(self)
|
121
|
+
end
|
122
|
+
module_eval """
|
123
|
+
def #{sym}(*params, &block)
|
124
|
+
#{helper}(*params).call(*params, &block)
|
125
|
+
end
|
126
|
+
"""
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
end # module
|
data/rparsec/monad.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module RParsec
|
2
|
+
|
3
|
+
#
|
4
|
+
# module for Monad
|
5
|
+
#
|
6
|
+
module Monad
|
7
|
+
attr_reader :this
|
8
|
+
|
9
|
+
#
|
10
|
+
# To initialize with a monad implementation and an object that obeys the monad law.
|
11
|
+
#
|
12
|
+
def initMonad(m, v)
|
13
|
+
raise ArgumentError, 'monad cannot be nil' if m.nil?
|
14
|
+
@monad = m;
|
15
|
+
@this = v;
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# To create a value based on the monad impl.
|
20
|
+
#
|
21
|
+
def value v
|
22
|
+
@monad.value v
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Run the _bind_ operation on the encapsulated object following the monad law.
|
27
|
+
#
|
28
|
+
def bind(&binder)
|
29
|
+
@monad.bind(@this, &binder)
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Run the _seq_ operation on the encapsulated object following the monad law.
|
34
|
+
# If _seq_ is not defined by the monad impl, use _bind_ to implement.
|
35
|
+
#
|
36
|
+
def seq(other)
|
37
|
+
if @monad.respond_to? :seq
|
38
|
+
@monad.seq(other)
|
39
|
+
else bind {|x|other}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
#
|
44
|
+
# Run the _map_ operation on the encapsulated object following the monad law.
|
45
|
+
# _bind_ is used to implement.
|
46
|
+
#
|
47
|
+
def map(&mapper)
|
48
|
+
bind do |v|
|
49
|
+
result = mapper.call v;
|
50
|
+
value(result);
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Run the _plus_ operation on the encapsulated object following the MonadPlus law.
|
56
|
+
#
|
57
|
+
def plus other
|
58
|
+
@monad.mplus(@this, other.this)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
end # module
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'rparsec/parser'
|
2
|
+
|
3
|
+
module RParsec
|
4
|
+
|
5
|
+
#
|
6
|
+
# This class helps building lexer and parser for operators.
|
7
|
+
# The case that one operator (++ for example) contains another operator (+)
|
8
|
+
# is automatically handled so client code don't have to worry about ambiguity.
|
9
|
+
#
|
10
|
+
class Operators
|
11
|
+
#
|
12
|
+
# To create an instance of Operators for the given operators.
|
13
|
+
# The _block_ parameter, if present, is used to convert the token text to another object
|
14
|
+
# when the token is recognized during grammar parsing phase.
|
15
|
+
#
|
16
|
+
def initialize(ops, &block)
|
17
|
+
@lexers = {}
|
18
|
+
@parsers = {}
|
19
|
+
sorted = Operators.sort(ops)
|
20
|
+
lexers = sorted.map do |op|
|
21
|
+
symbol = op.to_sym
|
22
|
+
result = nil
|
23
|
+
if op.length == 1
|
24
|
+
result = Parsers.char(op)
|
25
|
+
else
|
26
|
+
result = Parsers.str(op)
|
27
|
+
end
|
28
|
+
result = result.token(symbol)
|
29
|
+
@lexers[symbol] = result
|
30
|
+
@parsers[symbol] = Parsers.token(symbol, &block)
|
31
|
+
result
|
32
|
+
end
|
33
|
+
@lexer = Parsers.sum(*lexers)
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Get the parser for the given operator.
|
38
|
+
#
|
39
|
+
def parser(op)
|
40
|
+
result = @parsers[op.to_sym]
|
41
|
+
raise ArgumentError, "parser not found for #{op}" if result.nil?
|
42
|
+
result
|
43
|
+
end
|
44
|
+
|
45
|
+
alias [] parser
|
46
|
+
|
47
|
+
#
|
48
|
+
# Get the lexer that lexes operators.
|
49
|
+
# If an operator is specified, the lexer for that operator is returned.
|
50
|
+
#
|
51
|
+
def lexer(op=nil)
|
52
|
+
return @lexer if op.nil?
|
53
|
+
@lexers[op.to_sym]
|
54
|
+
end
|
55
|
+
|
56
|
+
#
|
57
|
+
# Sort an array of operators so that contained operator appears after containers.
|
58
|
+
# When no containment exist between two operators, the shorter one takes precedence.
|
59
|
+
#
|
60
|
+
def self.sort(ops)
|
61
|
+
#sort the array by longer-string-first.
|
62
|
+
ordered = ops.sort {|x, y|y.length <=> x.length}
|
63
|
+
suites = []
|
64
|
+
# loop from the longer to shorter string
|
65
|
+
ordered.each do |s|
|
66
|
+
populate_suites(suites, s)
|
67
|
+
end
|
68
|
+
# suites are populated with bigger suite first
|
69
|
+
to_array suites
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def self.populate_suites(suites, s)
|
75
|
+
# populate the suites so that bigger suite first
|
76
|
+
# this way we can use << operator for non-contained strings.
|
77
|
+
|
78
|
+
# we need to start from bigger suite. So loop in reverse order
|
79
|
+
for suite in suites
|
80
|
+
return if populate_suite(suite, s)
|
81
|
+
end
|
82
|
+
suites << [s]
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.populate_suite(suite, s)
|
86
|
+
# loop from the tail of the suite
|
87
|
+
for i in (1..suite.length)
|
88
|
+
ind = suite.length - i
|
89
|
+
cur = suite[ind]
|
90
|
+
if StringUtils.starts_with? cur, s
|
91
|
+
suite.insert(ind+1, s) unless cur == s
|
92
|
+
return true
|
93
|
+
end
|
94
|
+
end
|
95
|
+
false
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.to_array suites
|
99
|
+
suites.reverse!.flatten!
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
end # module
|