nscript 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ module NScript
2
+
3
+ class Lexer
4
+
5
+ KEYWORDS = ["if", "else", "then", "unless",
6
+ "true", "false", "yes", "no", "on", "off",
7
+ "and", "or", "is", "isnt", "not",
8
+ "new", "return",
9
+ "try", "catch", "finally", "throw",
10
+ "break", "continue",
11
+ "for", "in", "of", "by", "where", "while",
12
+ "delete", "instanceof", "typeof",
13
+ "switch", "when",
14
+ "super", "extends"]
15
+
16
+ IDENTIFIER = /\A([a-zA-Z$_](\w|\$)*)/
17
+ NUMBER = /\A(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
18
+ STRING = /\A(""|''|"(.*?)([^\\]|\\\\)"|'(.*?)([^\\]|\\\\)')/m
19
+ HEREDOC = /\A("{6}|'{6}|"{3}\n?(.*?)\n?([ \t]*)"{3}|'{3}\n?(.*?)\n?([ \t]*)'{3})/m
20
+ JS = /\A(``|`(.*?)([^\\]|\\\\)`)/m
21
+ OPERATOR = /\A([+\*&|\/\-%=<>:!?]+)/
22
+ WHITESPACE = /\A([ \t]+)/
23
+ COMMENT = /\A(((\n?[ \t]*)?#.*$)+)/
24
+ CODE = /\A((-|=)>)/
25
+ REGEX = /\A(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/
26
+ MULTI_DENT = /\A((\n([ \t]*))+)(\.)?/
27
+ LAST_DENT = /\n([ \t]*)/
28
+ ASSIGNMENT = /\A(:|=)\Z/
29
+
30
+ JS_CLEANER = /(\A`|`\Z)/
31
+ MULTILINER = /\n/
32
+ STRING_NEWLINES = /\n[ \t]*/
33
+ COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/
34
+ NO_NEWLINE = /\A([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)\Z/
35
+ HEREDOC_INDENT = /^[ \t]+/
36
+
37
+ NOT_REGEX = [
38
+ :IDENTIFIER, :NUMBER, :REGEX, :STRING,
39
+ ')', '++', '--', ']', '}',
40
+ :FALSE, :NULL, :TRUE
41
+ ]
42
+
43
+ CALLABLE = [:IDENTIFIER, :SUPER, ')', ']', '}', :STRING]
44
+
45
+ def tokenize(code)
46
+ @code = code.chomp # Cleanup code by remove extra line breaks
47
+ @i = 0 # Current character position we're parsing
48
+ @line = 1 # The current line.
49
+ @indent = 0 # The current indent level.
50
+ @indents = [] # The stack of all indent levels we are currently within.
51
+ @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
52
+ @spaced = nil # The last value that has a space following it.
53
+ while @i < @code.length
54
+ @chunk = @code[@i..-1]
55
+ extract_next_token
56
+ end
57
+ puts "original stream: #{@tokens.inspect}" if ENV['VERBOSE']
58
+ close_indentation
59
+ Rewriter.new.rewrite(@tokens)
60
+ end
61
+
62
+ def extract_next_token
63
+ return if identifier_token
64
+ return if number_token
65
+ return if heredoc_token
66
+ return if string_token
67
+ return if js_token
68
+ return if regex_token
69
+ return if indent_token
70
+ return if comment_token
71
+ return if whitespace_token
72
+ return literal_token
73
+ end
74
+
75
+ def identifier_token
76
+ return false unless identifier = @chunk[IDENTIFIER, 1]
77
+ # Keywords are special identifiers tagged with their own name,
78
+ # 'if' will result in an [:IF, "if"] token.
79
+ tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
80
+ tag = :LEADING_WHEN if tag == :WHEN && [:OUTDENT, :INDENT, "\n"].include?(last_tag)
81
+ @tokens[-1][0] = :PROTOTYPE_ACCESS if tag == :IDENTIFIER && last_value == '::'
82
+ if tag == :IDENTIFIER && last_value == '.' && !(@tokens[-2] && @tokens[-2][1] == '.')
83
+ if @tokens[-2][0] == "?"
84
+ @tokens[-1][0] = :SOAK_ACCESS
85
+ @tokens.delete_at(-2)
86
+ else
87
+ @tokens[-1][0] = :PROPERTY_ACCESS
88
+ end
89
+ end
90
+ token(tag, identifier)
91
+ @i += identifier.length
92
+ end
93
+
94
+ def number_token
95
+ return false unless number = @chunk[NUMBER, 1]
96
+ token(:NUMBER, number)
97
+ @i += number.length
98
+ end
99
+
100
+ def string_token
101
+ return false unless string = @chunk[STRING, 1]
102
+ escaped = string.gsub(STRING_NEWLINES, " \\\n")
103
+ token(:STRING, escaped)
104
+ @line += string.count("\n")
105
+ @i += string.length
106
+ end
107
+
108
+ def heredoc_token
109
+ return false unless match = @chunk.match(HEREDOC)
110
+ doc = match[2] || match[4]
111
+ indent = doc.scan(HEREDOC_INDENT).min
112
+ doc.gsub!(/^#{indent}/, "")
113
+ doc.gsub!("\n", "\\n")
114
+ doc.gsub!('"', '\\"')
115
+ token(:STRING, "\"#{doc}\"")
116
+ @line += match[1].count("\n")
117
+ @i += match[1].length
118
+ end
119
+
120
+ def js_token
121
+ return false unless script = @chunk[JS, 1]
122
+ token(:JS, script.gsub(JS_CLEANER, ''))
123
+ @i += script.length
124
+ end
125
+
126
+ def regex_token
127
+ return false unless regex = @chunk[REGEX, 1]
128
+ return false if NOT_REGEX.include?(last_tag)
129
+ token(:REGEX, regex)
130
+ @i += regex.length
131
+ end
132
+
133
+ def comment_token
134
+ return false unless comment = @chunk[COMMENT, 1]
135
+ @line += comment.scan(MULTILINER).length
136
+ token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
137
+ token("\n", "\n")
138
+ @i += comment.length
139
+ end
140
+
141
+ def indent_token
142
+ return false unless indent = @chunk[MULTI_DENT, 1]
143
+ @line += indent.scan(MULTILINER).size
144
+ @i += indent.size
145
+ next_character = @chunk[MULTI_DENT, 4]
146
+ no_newlines = next_character == '.' || (last_value.to_s.match(NO_NEWLINE) && @tokens[-2][0] != '.' && !last_value.match(CODE))
147
+ return suppress_newlines(indent) if no_newlines
148
+ size = indent.scan(LAST_DENT).last.last.length
149
+ return newline_token(indent) if size == @indent
150
+ if size > @indent
151
+ token(:INDENT, size - @indent)
152
+ @indents << (size - @indent)
153
+ else
154
+ outdent_token(@indent - size)
155
+ end
156
+ @indent = size
157
+ end
158
+
159
+ def outdent_token(move_out)
160
+ while move_out > 0 && !@indents.empty?
161
+ last_indent = @indents.pop
162
+ token(:OUTDENT, last_indent)
163
+ move_out -= last_indent
164
+ end
165
+ token("\n", "\n")
166
+ end
167
+
168
+ def whitespace_token
169
+ return false unless whitespace = @chunk[WHITESPACE, 1]
170
+ @spaced = last_value
171
+ @i += whitespace.length
172
+ end
173
+
174
+ def newline_token(newlines)
175
+ token("\n", "\n") unless last_value == "\n"
176
+ true
177
+ end
178
+
179
+ def suppress_newlines(newlines)
180
+ @tokens.pop if last_value == "\\"
181
+ true
182
+ end
183
+
184
+ def literal_token
185
+ value = @chunk[OPERATOR, 1]
186
+ tag_parameters if value && value.match(CODE)
187
+ value ||= @chunk[0,1]
188
+ tag = value.match(ASSIGNMENT) ? :ASSIGN : value
189
+ if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag)
190
+ tag = :CALL_START if value == '('
191
+ tag = :INDEX_START if value == '['
192
+ end
193
+ token(tag, value)
194
+ @i += value.length
195
+ end
196
+
197
+ def token(tag, value)
198
+ @tokens << [tag, Value.new(value, @line)]
199
+ end
200
+
201
+ def last_value
202
+ @tokens.last && @tokens.last[1]
203
+ end
204
+
205
+ def last_tag
206
+ @tokens.last && @tokens.last[0]
207
+ end
208
+
209
+ def tag_parameters
210
+ return if last_tag != ')'
211
+ i = 0
212
+ loop do
213
+ i -= 1
214
+ tok = @tokens[i]
215
+ return if !tok
216
+ case tok[0]
217
+ when :IDENTIFIER then tok[0] = :PARAM
218
+ when ')' then tok[0] = :PARAM_END
219
+ when '(' then return tok[0] = :PARAM_START
220
+ end
221
+ end
222
+ end
223
+
224
+ def close_indentation
225
+ outdent_token(@indent)
226
+ end
227
+
228
+ end
229
+
230
+ end
@@ -0,0 +1,238 @@
1
+ module NScript
2
+
3
+ class Rewriter
4
+
5
+ BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], [:INDENT, :OUTDENT],
6
+ [:PARAM_START, :PARAM_END], [:CALL_START, :CALL_END], [:INDEX_START, :INDEX_END]]
7
+
8
+ EXPRESSION_START = BALANCED_PAIRS.map {|pair| pair.first }
9
+ EXPRESSION_TAIL = BALANCED_PAIRS.map {|pair| pair.last }
10
+ EXPRESSION_CLOSE = [:CATCH, :WHEN, :ELSE, :FINALLY] + EXPRESSION_TAIL
11
+
12
+ IMPLICIT_FUNC = [:IDENTIFIER, :SUPER, ')', :CALL_END, ']', :INDEX_END]
13
+ IMPLICIT_END = [:IF, :UNLESS, :FOR, :WHILE, "\n", :OUTDENT]
14
+ IMPLICIT_CALL = [:IDENTIFIER, :NUMBER, :STRING, :JS, :REGEX, :NEW, :PARAM_START,
15
+ :TRY, :DELETE, :TYPEOF, :SWITCH,
16
+ :TRUE, :FALSE, :YES, :NO, :ON, :OFF, '!', '!!', :NOT,
17
+ '@', '->', '=>', '[', '(', '{']
18
+
19
+ INVERSES = BALANCED_PAIRS.inject({}) do |memo, pair|
20
+ memo[pair.first] = pair.last
21
+ memo[pair.last] = pair.first
22
+ memo
23
+ end
24
+
25
+ SINGLE_LINERS = [:ELSE, "->", "=>", :TRY, :FINALLY, :THEN]
26
+ SINGLE_CLOSERS = ["\n", :CATCH, :FINALLY, :ELSE, :OUTDENT, :LEADING_WHEN, :PARAM_START]
27
+
28
+ def rewrite(tokens)
29
+ @tokens = tokens
30
+ adjust_comments
31
+ remove_leading_newlines
32
+ remove_mid_expression_newlines
33
+ move_commas_outside_outdents
34
+ close_open_calls_and_indexes
35
+ add_implicit_parentheses
36
+ add_implicit_indentation
37
+ ensure_balance(*BALANCED_PAIRS)
38
+ rewrite_closing_parens
39
+ @tokens
40
+ end
41
+
42
+ def scan_tokens
43
+ i = 0
44
+ loop do
45
+ break unless @tokens[i]
46
+ move = yield(@tokens[i - 1], @tokens[i], @tokens[i + 1], i)
47
+ i += move
48
+ end
49
+ end
50
+
51
+ def adjust_comments
52
+ scan_tokens do |prev, token, post, i|
53
+ next 1 unless token[0] == :COMMENT
54
+ before, after = @tokens[i - 2], @tokens[i + 2]
55
+ if before && after &&
56
+ ((before[0] == :INDENT && after[0] == :OUTDENT) ||
57
+ (before[0] == :OUTDENT && after[0] == :INDENT)) &&
58
+ before[1] == after[1]
59
+ @tokens.delete_at(i + 2)
60
+ @tokens.delete_at(i - 2)
61
+ next 0
62
+ elsif prev[0] == "\n" && [:INDENT].include?(after[0])
63
+ @tokens.delete_at(i + 2)
64
+ @tokens[i - 1] = after
65
+ next 1
66
+ elsif !["\n", :INDENT, :OUTDENT].include?(prev[0])
67
+ @tokens.insert(i, ["\n", Value.new("\n", token[1].line)])
68
+ next 2
69
+ else
70
+ next 1
71
+ end
72
+ end
73
+ end
74
+
75
+ def remove_leading_newlines
76
+ @tokens.shift if @tokens[0][0] == "\n"
77
+ end
78
+
79
+ def remove_mid_expression_newlines
80
+ scan_tokens do |prev, token, post, i|
81
+ next 1 unless post && EXPRESSION_CLOSE.include?(post[0]) && token[0] == "\n"
82
+ @tokens.delete_at(i)
83
+ next 0
84
+ end
85
+ end
86
+
87
+ def move_commas_outside_outdents
88
+ scan_tokens do |prev, token, post, i|
89
+ if token[0] == :OUTDENT && prev[0] == ','
90
+ @tokens.delete_at(i)
91
+ @tokens.insert(i - 1, token)
92
+ end
93
+ next 1
94
+ end
95
+ end
96
+
97
+ def close_open_calls_and_indexes
98
+ parens, brackets = [0], [0]
99
+ scan_tokens do |prev, token, post, i|
100
+ case token[0]
101
+ when :CALL_START then parens.push(0)
102
+ when :INDEX_START then brackets.push(0)
103
+ when '(' then parens[-1] += 1
104
+ when '[' then brackets[-1] += 1
105
+ when ')'
106
+ if parens.last == 0
107
+ parens.pop
108
+ token[0] = :CALL_END
109
+ else
110
+ parens[-1] -= 1
111
+ end
112
+ when ']'
113
+ if brackets.last == 0
114
+ brackets.pop
115
+ token[0] = :INDEX_END
116
+ else
117
+ brackets[-1] -= 1
118
+ end
119
+ end
120
+ next 1
121
+ end
122
+ end
123
+
124
+ def add_implicit_parentheses
125
+ stack = [0]
126
+ scan_tokens do |prev, token, post, i|
127
+ stack.push(0) if token[0] == :INDENT
128
+ if token[0] == :OUTDENT
129
+ last = stack.pop
130
+ stack[-1] += last
131
+ end
132
+ if stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?)
133
+ idx = token[0] == :OUTDENT ? i + 1 : i
134
+ stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) }
135
+ size, stack[-1] = stack[-1] + 1, 0
136
+ next size
137
+ end
138
+ next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0])
139
+ @tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)])
140
+ stack[-1] += 1
141
+ next 2
142
+ end
143
+ end
144
+
145
+ def add_implicit_indentation
146
+ scan_tokens do |prev, token, post, i|
147
+ next 1 unless SINGLE_LINERS.include?(token[0]) && post[0] != :INDENT &&
148
+ !(token[0] == :ELSE && post[0] == :IF) # Elsifs shouldn't get blocks.
149
+ starter = token[0]
150
+ line = token[1].line
151
+ @tokens.insert(i + 1, [:INDENT, Value.new(2, line)])
152
+ idx = i + 1
153
+ parens = 0
154
+ loop do
155
+ idx += 1
156
+ tok = @tokens[idx]
157
+ if (!tok || SINGLE_CLOSERS.include?(tok[0]) ||
158
+ (tok[0] == ')' && parens == 0)) &&
159
+ !(starter == :ELSE && tok[0] == :ELSE)
160
+ insertion = @tokens[idx - 1][0] == "," ? idx - 1 : idx
161
+ @tokens.insert(insertion, [:OUTDENT, Value.new(2, line)])
162
+ break
163
+ end
164
+ parens += 1 if tok[0] == '('
165
+ parens -= 1 if tok[0] == ')'
166
+ end
167
+ next 1 unless token[0] == :THEN
168
+ @tokens.delete_at(i)
169
+ next 0
170
+ end
171
+ end
172
+
173
+ def ensure_balance(*pairs)
174
+ puts "\nbefore ensure_balance: #{@tokens.inspect}" if ENV['VERBOSE']
175
+ levels, lines = Hash.new(0), Hash.new
176
+ scan_tokens do |prev, token, post, i|
177
+ pairs.each do |pair|
178
+ open, close = *pair
179
+ levels[open] += 1 if token[0] == open
180
+ levels[open] -= 1 if token[0] == close
181
+ lines[token[0]] = token[1].line
182
+ raise ParseError.new(token[0], token[1], nil) if levels[open] < 0
183
+ end
184
+ next 1
185
+ end
186
+ unclosed = levels.detect {|k, v| v > 0 }
187
+ sym = unclosed && unclosed[0]
188
+ raise ParseError.new(sym, Value.new(sym, lines[sym]), nil, "unclosed '#{sym}'") if unclosed
189
+ end
190
+
191
+ def rewrite_closing_parens
192
+ verbose = ENV['VERBOSE']
193
+ stack, debt = [], Hash.new(0)
194
+ stack_stats = lambda { "stack: #{stack.inspect} debt: #{debt.inspect}\n\n" }
195
+ puts "rewrite_closing_original: #{@tokens.inspect}" if verbose
196
+ scan_tokens do |prev, token, post, i|
197
+ tag, inv = token[0], INVERSES[token[0]]
198
+ # Push openers onto the stack.
199
+ if EXPRESSION_START.include?(tag)
200
+ stack.push(token)
201
+ puts "pushing #{tag} #{stack_stats[]}" if verbose
202
+ next 1
203
+ # The end of an expression, check stack and debt for a pair.
204
+ elsif EXPRESSION_TAIL.include?(tag)
205
+ puts @tokens[i..-1].inspect if verbose
206
+ # If the tag is already in our debt, swallow it.
207
+ if debt[inv] > 0
208
+ debt[inv] -= 1
209
+ @tokens.delete_at(i)
210
+ puts "tag in debt #{tag} #{stack_stats[]}" if verbose
211
+ next 0
212
+ else
213
+ # Pop the stack of open delimiters.
214
+ match = stack.pop
215
+ mtag = match[0]
216
+ # Continue onwards if it's the expected tag.
217
+ if tag == INVERSES[mtag]
218
+ puts "expected tag #{tag} #{stack_stats[]}" if verbose
219
+ next 1
220
+ else
221
+ # Unexpected close, insert correct close, adding to the debt.
222
+ debt[mtag] += 1
223
+ puts "unexpected #{tag}, replacing with #{INVERSES[mtag]} #{stack_stats[]}" if verbose
224
+ val = mtag == :INDENT ? match[1] : INVERSES[mtag]
225
+ @tokens.insert(i, [INVERSES[mtag], Value.new(val, token[1].line)])
226
+ next 1
227
+ end
228
+ end
229
+ else
230
+ # Uninteresting token:
231
+ next 1
232
+ end
233
+ end
234
+ end
235
+
236
+ end
237
+
238
+ end