nscript 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,230 @@
1
+ module NScript
2
+
3
+ class Lexer
4
+
5
+ KEYWORDS = ["if", "else", "then", "unless",
6
+ "true", "false", "yes", "no", "on", "off",
7
+ "and", "or", "is", "isnt", "not",
8
+ "new", "return",
9
+ "try", "catch", "finally", "throw",
10
+ "break", "continue",
11
+ "for", "in", "of", "by", "where", "while",
12
+ "delete", "instanceof", "typeof",
13
+ "switch", "when",
14
+ "super", "extends"]
15
+
16
+ IDENTIFIER = /\A([a-zA-Z$_](\w|\$)*)/
17
+ NUMBER = /\A(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
18
+ STRING = /\A(""|''|"(.*?)([^\\]|\\\\)"|'(.*?)([^\\]|\\\\)')/m
19
+ HEREDOC = /\A("{6}|'{6}|"{3}\n?(.*?)\n?([ \t]*)"{3}|'{3}\n?(.*?)\n?([ \t]*)'{3})/m
20
+ JS = /\A(``|`(.*?)([^\\]|\\\\)`)/m
21
+ OPERATOR = /\A([+\*&|\/\-%=<>:!?]+)/
22
+ WHITESPACE = /\A([ \t]+)/
23
+ COMMENT = /\A(((\n?[ \t]*)?#.*$)+)/
24
+ CODE = /\A((-|=)>)/
25
+ REGEX = /\A(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/
26
+ MULTI_DENT = /\A((\n([ \t]*))+)(\.)?/
27
+ LAST_DENT = /\n([ \t]*)/
28
+ ASSIGNMENT = /\A(:|=)\Z/
29
+
30
+ JS_CLEANER = /(\A`|`\Z)/
31
+ MULTILINER = /\n/
32
+ STRING_NEWLINES = /\n[ \t]*/
33
+ COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/
34
+ NO_NEWLINE = /\A([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)\Z/
35
+ HEREDOC_INDENT = /^[ \t]+/
36
+
37
+ NOT_REGEX = [
38
+ :IDENTIFIER, :NUMBER, :REGEX, :STRING,
39
+ ')', '++', '--', ']', '}',
40
+ :FALSE, :NULL, :TRUE
41
+ ]
42
+
43
+ CALLABLE = [:IDENTIFIER, :SUPER, ')', ']', '}', :STRING]
44
+
45
+ def tokenize(code)
46
+ @code = code.chomp # Cleanup code by remove extra line breaks
47
+ @i = 0 # Current character position we're parsing
48
+ @line = 1 # The current line.
49
+ @indent = 0 # The current indent level.
50
+ @indents = [] # The stack of all indent levels we are currently within.
51
+ @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
52
+ @spaced = nil # The last value that has a space following it.
53
+ while @i < @code.length
54
+ @chunk = @code[@i..-1]
55
+ extract_next_token
56
+ end
57
+ puts "original stream: #{@tokens.inspect}" if ENV['VERBOSE']
58
+ close_indentation
59
+ Rewriter.new.rewrite(@tokens)
60
+ end
61
+
62
+ def extract_next_token
63
+ return if identifier_token
64
+ return if number_token
65
+ return if heredoc_token
66
+ return if string_token
67
+ return if js_token
68
+ return if regex_token
69
+ return if indent_token
70
+ return if comment_token
71
+ return if whitespace_token
72
+ return literal_token
73
+ end
74
+
75
+ def identifier_token
76
+ return false unless identifier = @chunk[IDENTIFIER, 1]
77
+ # Keywords are special identifiers tagged with their own name,
78
+ # 'if' will result in an [:IF, "if"] token.
79
+ tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
80
+ tag = :LEADING_WHEN if tag == :WHEN && [:OUTDENT, :INDENT, "\n"].include?(last_tag)
81
+ @tokens[-1][0] = :PROTOTYPE_ACCESS if tag == :IDENTIFIER && last_value == '::'
82
+ if tag == :IDENTIFIER && last_value == '.' && !(@tokens[-2] && @tokens[-2][1] == '.')
83
+ if @tokens[-2][0] == "?"
84
+ @tokens[-1][0] = :SOAK_ACCESS
85
+ @tokens.delete_at(-2)
86
+ else
87
+ @tokens[-1][0] = :PROPERTY_ACCESS
88
+ end
89
+ end
90
+ token(tag, identifier)
91
+ @i += identifier.length
92
+ end
93
+
94
+ def number_token
95
+ return false unless number = @chunk[NUMBER, 1]
96
+ token(:NUMBER, number)
97
+ @i += number.length
98
+ end
99
+
100
+ def string_token
101
+ return false unless string = @chunk[STRING, 1]
102
+ escaped = string.gsub(STRING_NEWLINES, " \\\n")
103
+ token(:STRING, escaped)
104
+ @line += string.count("\n")
105
+ @i += string.length
106
+ end
107
+
108
+ def heredoc_token
109
+ return false unless match = @chunk.match(HEREDOC)
110
+ doc = match[2] || match[4]
111
+ indent = doc.scan(HEREDOC_INDENT).min
112
+ doc.gsub!(/^#{indent}/, "")
113
+ doc.gsub!("\n", "\\n")
114
+ doc.gsub!('"', '\\"')
115
+ token(:STRING, "\"#{doc}\"")
116
+ @line += match[1].count("\n")
117
+ @i += match[1].length
118
+ end
119
+
120
+ def js_token
121
+ return false unless script = @chunk[JS, 1]
122
+ token(:JS, script.gsub(JS_CLEANER, ''))
123
+ @i += script.length
124
+ end
125
+
126
+ def regex_token
127
+ return false unless regex = @chunk[REGEX, 1]
128
+ return false if NOT_REGEX.include?(last_tag)
129
+ token(:REGEX, regex)
130
+ @i += regex.length
131
+ end
132
+
133
+ def comment_token
134
+ return false unless comment = @chunk[COMMENT, 1]
135
+ @line += comment.scan(MULTILINER).length
136
+ token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
137
+ token("\n", "\n")
138
+ @i += comment.length
139
+ end
140
+
141
+ def indent_token
142
+ return false unless indent = @chunk[MULTI_DENT, 1]
143
+ @line += indent.scan(MULTILINER).size
144
+ @i += indent.size
145
+ next_character = @chunk[MULTI_DENT, 4]
146
+ no_newlines = next_character == '.' || (last_value.to_s.match(NO_NEWLINE) && @tokens[-2][0] != '.' && !last_value.match(CODE))
147
+ return suppress_newlines(indent) if no_newlines
148
+ size = indent.scan(LAST_DENT).last.last.length
149
+ return newline_token(indent) if size == @indent
150
+ if size > @indent
151
+ token(:INDENT, size - @indent)
152
+ @indents << (size - @indent)
153
+ else
154
+ outdent_token(@indent - size)
155
+ end
156
+ @indent = size
157
+ end
158
+
159
+ def outdent_token(move_out)
160
+ while move_out > 0 && !@indents.empty?
161
+ last_indent = @indents.pop
162
+ token(:OUTDENT, last_indent)
163
+ move_out -= last_indent
164
+ end
165
+ token("\n", "\n")
166
+ end
167
+
168
+ def whitespace_token
169
+ return false unless whitespace = @chunk[WHITESPACE, 1]
170
+ @spaced = last_value
171
+ @i += whitespace.length
172
+ end
173
+
174
+ def newline_token(newlines)
175
+ token("\n", "\n") unless last_value == "\n"
176
+ true
177
+ end
178
+
179
+ def suppress_newlines(newlines)
180
+ @tokens.pop if last_value == "\\"
181
+ true
182
+ end
183
+
184
+ def literal_token
185
+ value = @chunk[OPERATOR, 1]
186
+ tag_parameters if value && value.match(CODE)
187
+ value ||= @chunk[0,1]
188
+ tag = value.match(ASSIGNMENT) ? :ASSIGN : value
189
+ if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag)
190
+ tag = :CALL_START if value == '('
191
+ tag = :INDEX_START if value == '['
192
+ end
193
+ token(tag, value)
194
+ @i += value.length
195
+ end
196
+
197
+ def token(tag, value)
198
+ @tokens << [tag, Value.new(value, @line)]
199
+ end
200
+
201
+ def last_value
202
+ @tokens.last && @tokens.last[1]
203
+ end
204
+
205
+ def last_tag
206
+ @tokens.last && @tokens.last[0]
207
+ end
208
+
209
+ def tag_parameters
210
+ return if last_tag != ')'
211
+ i = 0
212
+ loop do
213
+ i -= 1
214
+ tok = @tokens[i]
215
+ return if !tok
216
+ case tok[0]
217
+ when :IDENTIFIER then tok[0] = :PARAM
218
+ when ')' then tok[0] = :PARAM_END
219
+ when '(' then return tok[0] = :PARAM_START
220
+ end
221
+ end
222
+ end
223
+
224
+ def close_indentation
225
+ outdent_token(@indent)
226
+ end
227
+
228
+ end
229
+
230
+ end
@@ -0,0 +1,238 @@
1
+ module NScript
2
+
3
+ class Rewriter
4
+
5
+ BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], [:INDENT, :OUTDENT],
6
+ [:PARAM_START, :PARAM_END], [:CALL_START, :CALL_END], [:INDEX_START, :INDEX_END]]
7
+
8
+ EXPRESSION_START = BALANCED_PAIRS.map {|pair| pair.first }
9
+ EXPRESSION_TAIL = BALANCED_PAIRS.map {|pair| pair.last }
10
+ EXPRESSION_CLOSE = [:CATCH, :WHEN, :ELSE, :FINALLY] + EXPRESSION_TAIL
11
+
12
+ IMPLICIT_FUNC = [:IDENTIFIER, :SUPER, ')', :CALL_END, ']', :INDEX_END]
13
+ IMPLICIT_END = [:IF, :UNLESS, :FOR, :WHILE, "\n", :OUTDENT]
14
+ IMPLICIT_CALL = [:IDENTIFIER, :NUMBER, :STRING, :JS, :REGEX, :NEW, :PARAM_START,
15
+ :TRY, :DELETE, :TYPEOF, :SWITCH,
16
+ :TRUE, :FALSE, :YES, :NO, :ON, :OFF, '!', '!!', :NOT,
17
+ '@', '->', '=>', '[', '(', '{']
18
+
19
+ INVERSES = BALANCED_PAIRS.inject({}) do |memo, pair|
20
+ memo[pair.first] = pair.last
21
+ memo[pair.last] = pair.first
22
+ memo
23
+ end
24
+
25
+ SINGLE_LINERS = [:ELSE, "->", "=>", :TRY, :FINALLY, :THEN]
26
+ SINGLE_CLOSERS = ["\n", :CATCH, :FINALLY, :ELSE, :OUTDENT, :LEADING_WHEN, :PARAM_START]
27
+
28
+ def rewrite(tokens)
29
+ @tokens = tokens
30
+ adjust_comments
31
+ remove_leading_newlines
32
+ remove_mid_expression_newlines
33
+ move_commas_outside_outdents
34
+ close_open_calls_and_indexes
35
+ add_implicit_parentheses
36
+ add_implicit_indentation
37
+ ensure_balance(*BALANCED_PAIRS)
38
+ rewrite_closing_parens
39
+ @tokens
40
+ end
41
+
42
+ def scan_tokens
43
+ i = 0
44
+ loop do
45
+ break unless @tokens[i]
46
+ move = yield(@tokens[i - 1], @tokens[i], @tokens[i + 1], i)
47
+ i += move
48
+ end
49
+ end
50
+
51
+ def adjust_comments
52
+ scan_tokens do |prev, token, post, i|
53
+ next 1 unless token[0] == :COMMENT
54
+ before, after = @tokens[i - 2], @tokens[i + 2]
55
+ if before && after &&
56
+ ((before[0] == :INDENT && after[0] == :OUTDENT) ||
57
+ (before[0] == :OUTDENT && after[0] == :INDENT)) &&
58
+ before[1] == after[1]
59
+ @tokens.delete_at(i + 2)
60
+ @tokens.delete_at(i - 2)
61
+ next 0
62
+ elsif prev[0] == "\n" && [:INDENT].include?(after[0])
63
+ @tokens.delete_at(i + 2)
64
+ @tokens[i - 1] = after
65
+ next 1
66
+ elsif !["\n", :INDENT, :OUTDENT].include?(prev[0])
67
+ @tokens.insert(i, ["\n", Value.new("\n", token[1].line)])
68
+ next 2
69
+ else
70
+ next 1
71
+ end
72
+ end
73
+ end
74
+
75
+ def remove_leading_newlines
76
+ @tokens.shift if @tokens[0][0] == "\n"
77
+ end
78
+
79
+ def remove_mid_expression_newlines
80
+ scan_tokens do |prev, token, post, i|
81
+ next 1 unless post && EXPRESSION_CLOSE.include?(post[0]) && token[0] == "\n"
82
+ @tokens.delete_at(i)
83
+ next 0
84
+ end
85
+ end
86
+
87
+ def move_commas_outside_outdents
88
+ scan_tokens do |prev, token, post, i|
89
+ if token[0] == :OUTDENT && prev[0] == ','
90
+ @tokens.delete_at(i)
91
+ @tokens.insert(i - 1, token)
92
+ end
93
+ next 1
94
+ end
95
+ end
96
+
97
+ def close_open_calls_and_indexes
98
+ parens, brackets = [0], [0]
99
+ scan_tokens do |prev, token, post, i|
100
+ case token[0]
101
+ when :CALL_START then parens.push(0)
102
+ when :INDEX_START then brackets.push(0)
103
+ when '(' then parens[-1] += 1
104
+ when '[' then brackets[-1] += 1
105
+ when ')'
106
+ if parens.last == 0
107
+ parens.pop
108
+ token[0] = :CALL_END
109
+ else
110
+ parens[-1] -= 1
111
+ end
112
+ when ']'
113
+ if brackets.last == 0
114
+ brackets.pop
115
+ token[0] = :INDEX_END
116
+ else
117
+ brackets[-1] -= 1
118
+ end
119
+ end
120
+ next 1
121
+ end
122
+ end
123
+
124
+ def add_implicit_parentheses
125
+ stack = [0]
126
+ scan_tokens do |prev, token, post, i|
127
+ stack.push(0) if token[0] == :INDENT
128
+ if token[0] == :OUTDENT
129
+ last = stack.pop
130
+ stack[-1] += last
131
+ end
132
+ if stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?)
133
+ idx = token[0] == :OUTDENT ? i + 1 : i
134
+ stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) }
135
+ size, stack[-1] = stack[-1] + 1, 0
136
+ next size
137
+ end
138
+ next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0])
139
+ @tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)])
140
+ stack[-1] += 1
141
+ next 2
142
+ end
143
+ end
144
+
145
+ def add_implicit_indentation
146
+ scan_tokens do |prev, token, post, i|
147
+ next 1 unless SINGLE_LINERS.include?(token[0]) && post[0] != :INDENT &&
148
+ !(token[0] == :ELSE && post[0] == :IF) # Elsifs shouldn't get blocks.
149
+ starter = token[0]
150
+ line = token[1].line
151
+ @tokens.insert(i + 1, [:INDENT, Value.new(2, line)])
152
+ idx = i + 1
153
+ parens = 0
154
+ loop do
155
+ idx += 1
156
+ tok = @tokens[idx]
157
+ if (!tok || SINGLE_CLOSERS.include?(tok[0]) ||
158
+ (tok[0] == ')' && parens == 0)) &&
159
+ !(starter == :ELSE && tok[0] == :ELSE)
160
+ insertion = @tokens[idx - 1][0] == "," ? idx - 1 : idx
161
+ @tokens.insert(insertion, [:OUTDENT, Value.new(2, line)])
162
+ break
163
+ end
164
+ parens += 1 if tok[0] == '('
165
+ parens -= 1 if tok[0] == ')'
166
+ end
167
+ next 1 unless token[0] == :THEN
168
+ @tokens.delete_at(i)
169
+ next 0
170
+ end
171
+ end
172
+
173
+ def ensure_balance(*pairs)
174
+ puts "\nbefore ensure_balance: #{@tokens.inspect}" if ENV['VERBOSE']
175
+ levels, lines = Hash.new(0), Hash.new
176
+ scan_tokens do |prev, token, post, i|
177
+ pairs.each do |pair|
178
+ open, close = *pair
179
+ levels[open] += 1 if token[0] == open
180
+ levels[open] -= 1 if token[0] == close
181
+ lines[token[0]] = token[1].line
182
+ raise ParseError.new(token[0], token[1], nil) if levels[open] < 0
183
+ end
184
+ next 1
185
+ end
186
+ unclosed = levels.detect {|k, v| v > 0 }
187
+ sym = unclosed && unclosed[0]
188
+ raise ParseError.new(sym, Value.new(sym, lines[sym]), nil, "unclosed '#{sym}'") if unclosed
189
+ end
190
+
191
+ def rewrite_closing_parens
192
+ verbose = ENV['VERBOSE']
193
+ stack, debt = [], Hash.new(0)
194
+ stack_stats = lambda { "stack: #{stack.inspect} debt: #{debt.inspect}\n\n" }
195
+ puts "rewrite_closing_original: #{@tokens.inspect}" if verbose
196
+ scan_tokens do |prev, token, post, i|
197
+ tag, inv = token[0], INVERSES[token[0]]
198
+ # Push openers onto the stack.
199
+ if EXPRESSION_START.include?(tag)
200
+ stack.push(token)
201
+ puts "pushing #{tag} #{stack_stats[]}" if verbose
202
+ next 1
203
+ # The end of an expression, check stack and debt for a pair.
204
+ elsif EXPRESSION_TAIL.include?(tag)
205
+ puts @tokens[i..-1].inspect if verbose
206
+ # If the tag is already in our debt, swallow it.
207
+ if debt[inv] > 0
208
+ debt[inv] -= 1
209
+ @tokens.delete_at(i)
210
+ puts "tag in debt #{tag} #{stack_stats[]}" if verbose
211
+ next 0
212
+ else
213
+ # Pop the stack of open delimiters.
214
+ match = stack.pop
215
+ mtag = match[0]
216
+ # Continue onwards if it's the expected tag.
217
+ if tag == INVERSES[mtag]
218
+ puts "expected tag #{tag} #{stack_stats[]}" if verbose
219
+ next 1
220
+ else
221
+ # Unexpected close, insert correct close, adding to the debt.
222
+ debt[mtag] += 1
223
+ puts "unexpected #{tag}, replacing with #{INVERSES[mtag]} #{stack_stats[]}" if verbose
224
+ val = mtag == :INDENT ? match[1] : INVERSES[mtag]
225
+ @tokens.insert(i, [INVERSES[mtag], Value.new(val, token[1].line)])
226
+ next 1
227
+ end
228
+ end
229
+ else
230
+ # Uninteresting token:
231
+ next 1
232
+ end
233
+ end
234
+ end
235
+
236
+ end
237
+
238
+ end