apricot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/.gitignore +3 -0
  2. data/.rspec +1 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +7 -0
  5. data/Gemfile +6 -0
  6. data/Gemfile.lock +26 -0
  7. data/README.md +90 -0
  8. data/Rakefile +9 -0
  9. data/apricot.gemspec +22 -0
  10. data/bin/apricot +58 -0
  11. data/examples/bot.apr +23 -0
  12. data/examples/cinch-bot.apr +12 -0
  13. data/examples/hanoi.apr +10 -0
  14. data/examples/hello.apr +1 -0
  15. data/examples/plot.apr +28 -0
  16. data/examples/quine.apr +1 -0
  17. data/kernel/core.apr +928 -0
  18. data/lib/apricot/ast/identifier.rb +111 -0
  19. data/lib/apricot/ast/list.rb +99 -0
  20. data/lib/apricot/ast/literals.rb +240 -0
  21. data/lib/apricot/ast/node.rb +45 -0
  22. data/lib/apricot/ast/scopes.rb +147 -0
  23. data/lib/apricot/ast/toplevel.rb +66 -0
  24. data/lib/apricot/ast/variables.rb +64 -0
  25. data/lib/apricot/ast.rb +3 -0
  26. data/lib/apricot/compiler.rb +55 -0
  27. data/lib/apricot/cons.rb +27 -0
  28. data/lib/apricot/errors.rb +38 -0
  29. data/lib/apricot/generator.rb +15 -0
  30. data/lib/apricot/identifier.rb +91 -0
  31. data/lib/apricot/list.rb +96 -0
  32. data/lib/apricot/macroexpand.rb +47 -0
  33. data/lib/apricot/misc.rb +11 -0
  34. data/lib/apricot/namespace.rb +59 -0
  35. data/lib/apricot/parser.rb +541 -0
  36. data/lib/apricot/printers.rb +12 -0
  37. data/lib/apricot/repl.rb +254 -0
  38. data/lib/apricot/ruby_ext.rb +254 -0
  39. data/lib/apricot/seq.rb +44 -0
  40. data/lib/apricot/special_forms.rb +735 -0
  41. data/lib/apricot/stages.rb +60 -0
  42. data/lib/apricot/version.rb +3 -0
  43. data/lib/apricot.rb +30 -0
  44. data/spec/compiler_spec.rb +499 -0
  45. data/spec/identifier_spec.rb +58 -0
  46. data/spec/list_spec.rb +96 -0
  47. data/spec/parser_spec.rb +312 -0
  48. data/spec/spec_helper.rb +10 -0
  49. metadata +188 -0
@@ -0,0 +1,541 @@
1
+ require 'stringio'
2
+
3
+ module Apricot
4
+ class Parser
5
+ IDENTIFIER = /[^'`~()\[\]{}";,\s]/
6
+ OCTAL = /[0-7]/
7
+ HEX = /[0-9a-fA-F]/
8
+ DIGITS = ('0'..'9').to_a + ('a'..'z').to_a
9
+ CHAR_ESCAPES = {"a" => "\a", "b" => "\b", "t" => "\t", "n" => "\n",
10
+ "v" => "\v", "f" => "\f", "r" => "\r", "e" => "\e"}
11
+ REGEXP_OPTIONS = {'i' => Regexp::IGNORECASE, 'x' => Regexp::EXTENDED,
12
+ 'm' => Regexp::MULTILINE}
13
+
14
+ FnState = Struct.new(:args, :rest)
15
+
16
+ # @param [IO] io an input stream object to read forms from
17
+ def initialize(io, filename = "(none)", line = 1)
18
+ @filename = filename
19
+ @io = io
20
+ @location = 0
21
+ @line = line
22
+
23
+ @fn_state = []
24
+ @syntax_quote_gensyms = []
25
+ end
26
+
27
+ def self.parse_file(filename)
28
+ File.open(filename) {|f| new(f, filename).parse(true) }
29
+ end
30
+
31
+ def self.parse_string(source, filename = "(none)", line = 1)
32
+ new(StringIO.new(source, "r"), filename, line).parse
33
+ end
34
+
35
+ # @return [Array<AST::Node>] a list of the forms in the program
36
+ def parse(evaluate = false)
37
+ program = []
38
+ next_char
39
+
40
+ skip_whitespace
41
+ while @char
42
+ program << parse_form
43
+ skip_whitespace
44
+ end
45
+
46
+ Apricot::AST::TopLevel.new(program, @filename, 1, evaluate)
47
+ end
48
+
49
+ # @return AST::Node an AST node representing the form read
50
+ def parse_one
51
+ next_char
52
+ skip_whitespace
53
+ parse_form
54
+ end
55
+
56
+ private
57
+ # Parse Lisp forms until the given character is encountered
58
+ # @param [String] terminator the character to stop parsing at
59
+ # @return [Array<AST::Node>] a list of the Lisp forms parsed
60
+ def parse_forms_until(terminator)
61
+ skip_whitespace
62
+ forms = []
63
+
64
+ while @char
65
+ if @char == terminator
66
+ next_char # consume the terminator
67
+ return forms
68
+ end
69
+
70
+ forms << parse_form
71
+ skip_whitespace
72
+ end
73
+
74
+ # Can only reach here if we run out of chars without getting a terminator
75
+ incomplete_error "Unexpected end of program, expected #{terminator}"
76
+ end
77
+
78
+ # Parse a single Lisp form
79
+ # @return [AST::Node] an AST node representing the form
80
+ def parse_form
81
+ case @char
82
+ when '#' then parse_dispatch
83
+ when "'" then parse_quote
84
+ when "`" then parse_syntax_quote
85
+ when "~" then parse_unquote
86
+ when '(' then parse_list
87
+ when '[' then parse_array
88
+ when '{' then parse_hash
89
+ when '"' then parse_string
90
+ when ':' then parse_symbol
91
+ when /\d/ then parse_number
92
+ when IDENTIFIER
93
+ if @char =~ /[+-]/ && peek_char =~ /\d/
94
+ parse_number
95
+ else
96
+ parse_identifier
97
+ end
98
+ else syntax_error "Unexpected character: #{@char}"
99
+ end
100
+ end
101
+
102
+ def parse_dispatch
103
+ next_char # skip #
104
+ case @char
105
+ when '|' then parse_pipe_identifier
106
+ when '{' then parse_set
107
+ when '(' then parse_fn
108
+ when 'r' then parse_regex
109
+ when 'q' then parse_quotation(false)
110
+ when 'Q' then parse_quotation(true)
111
+ else syntax_error "Unknown reader macro: ##{@char}"
112
+ end
113
+ end
114
+
115
+ # Skips whitespace, commas, and comments
116
+ def skip_whitespace
117
+ while @char =~ /[\s,;#]/
118
+ # Comments begin with a semicolon and extend to the end of the line
119
+ # Treat #! as a comment for shebang lines
120
+ if @char == ';' || (@char == '#' && peek_char == '!')
121
+ while @char && @char != "\n"
122
+ next_char
123
+ end
124
+ elsif @char == '#'
125
+ break unless peek_char == '_'
126
+ next_char; next_char # skip #_
127
+ skip_whitespace
128
+ incomplete_error "Unexpected end of program after #_, expected a form" unless @char
129
+ parse_form # discard next form
130
+ else
131
+ next_char
132
+ end
133
+ end
134
+ end
135
+
136
+ def parse_quote
137
+ next_char # skip the '
138
+ skip_whitespace
139
+ incomplete_error "Unexpected end of program after quote ('), expected a form" unless @char
140
+
141
+ form = parse_form
142
+ quote = AST::Identifier.new(@line, :quote)
143
+ AST::List.new(@line, [quote, form])
144
+ end
145
+
146
+ def parse_syntax_quote
147
+ next_char # skip the `
148
+ skip_whitespace
149
+ incomplete_error "Unexpected end of program after syntax quote (`), expected a form" unless @char
150
+
151
+ @syntax_quote_gensyms << {}
152
+ form = syntax_quote(parse_form)
153
+ @syntax_quote_gensyms.pop
154
+
155
+ form
156
+ end
157
+
158
+ def syntax_quote(form)
159
+ quote = AST::Identifier.new(@line, :quote)
160
+
161
+ case form
162
+ when AST::List
163
+ if is_unquote?(form)
164
+ form[1]
165
+ elsif is_unquote_splicing?(form)
166
+ syntax_error "splicing unquote (~@) not in list"
167
+ else
168
+ concat = AST::Identifier.new(@line, :concat)
169
+ AST::List.new(@line, [concat] + syntax_quote_list(form.elements))
170
+ end
171
+ when AST::ArrayLiteral
172
+ syntax_quote_coll(:array, form.elements)
173
+ when AST::SetLiteral
174
+ syntax_quote_coll(:set, form.elements)
175
+ when AST::HashLiteral
176
+ syntax_quote_coll(:hash, form.elements)
177
+ when AST::Identifier
178
+ name = form.name
179
+ if name.to_s.end_with?('#')
180
+ @syntax_quote_gensyms.last[name] ||= Apricot.gensym(name)
181
+ id = AST::Identifier.new(@line, @syntax_quote_gensyms.last[name])
182
+ AST::List.new(@line, [quote, id])
183
+ else
184
+ AST::List.new(@line, [quote, form])
185
+ end
186
+ when AST::BasicLiteral
187
+ form
188
+ else
189
+ AST::List.new(@line, [quote, form])
190
+ end
191
+ end
192
+
193
+ def syntax_quote_coll(creator_name, elements)
194
+ apply = AST::Identifier.new(@line, :apply)
195
+ concat = AST::Identifier.new(@line, :concat)
196
+ creator = AST::Identifier.new(@line, creator_name)
197
+ list = AST::List.new(@line, [concat] + syntax_quote_list(elements))
198
+ AST::List.new(@line, [apply, creator, list])
199
+ end
200
+
201
+ def syntax_quote_list(elements)
202
+ list = AST::Identifier.new(@line, :list)
203
+
204
+ elements.map do |form|
205
+ if is_unquote?(form)
206
+ AST::List.new(@line, [list, form[1]])
207
+ elsif is_unquote_splicing?(form)
208
+ form[1]
209
+ else
210
+ AST::List.new(@line, [list, syntax_quote(form)])
211
+ end
212
+ end
213
+ end
214
+
215
+ def is_unquote?(form)
216
+ form.is_a?(AST::List) &&
217
+ form[0].is_a?(AST::Identifier) &&
218
+ form[0].name == :unquote
219
+ end
220
+
221
+ def is_unquote_splicing?(form)
222
+ form.is_a?(AST::List) &&
223
+ form[0].is_a?(AST::Identifier) &&
224
+ form[0].name == :'unquote-splicing'
225
+ end
226
+
227
+ def parse_unquote
228
+ unquote = :unquote
229
+ next_char # skip the ~
230
+
231
+ if @char == '@'
232
+ next_char # skip the ~@
233
+ unquote = :'unquote-splicing'
234
+ end
235
+
236
+ skip_whitespace
237
+
238
+ unless @char
239
+ syntax = unquote == :unquote ? '~' : '~@'
240
+ incomplete_error "Unexpected end of program after #{syntax}, expected a form"
241
+ end
242
+
243
+ form = parse_form
244
+ unquote = AST::Identifier.new(@line, unquote)
245
+ AST::List.new(@line, [unquote, form])
246
+ end
247
+
248
+ def parse_fn
249
+ @fn_state << FnState.new([], nil)
250
+ body = parse_list
251
+ state = @fn_state.pop
252
+
253
+ state.args << :'&' << state.rest if state.rest
254
+ args = state.args.map.with_index do |x, i|
255
+ AST::Identifier.new(body.line, x || Apricot.gensym("p#{i + 1}"))
256
+ end
257
+
258
+ AST::List.new(body.line, [AST::Identifier.new(body.line, :fn),
259
+ AST::ArrayLiteral.new(body.line, args),
260
+ body])
261
+ end
262
+
263
+ def parse_list
264
+ next_char # skip the (
265
+ AST::List.new(@line, parse_forms_until(')'))
266
+ end
267
+
268
+ def parse_array
269
+ next_char # skip the [
270
+ AST::ArrayLiteral.new(@line, parse_forms_until(']'))
271
+ end
272
+
273
+ def parse_hash
274
+ next_char # skip the {
275
+ forms = parse_forms_until('}')
276
+ syntax_error "Odd number of forms in key-value hash" if forms.count.odd?
277
+ AST::HashLiteral.new(@line, forms)
278
+ end
279
+
280
+ def parse_set
281
+ next_char # skip the {
282
+ AST::SetLiteral.new(@line, parse_forms_until('}'))
283
+ end
284
+
285
+ def parse_string
286
+ line = @line
287
+ next_char # skip the opening "
288
+ string = ""
289
+
290
+ while @char
291
+ if @char == '"'
292
+ next_char # consume the "
293
+ return AST::StringLiteral.new(line, string)
294
+ end
295
+
296
+ string << parse_string_char
297
+ end
298
+
299
+ # Can only reach here if we run out of chars without getting a "
300
+ incomplete_error "Unexpected end of program while parsing string"
301
+ end
302
+
303
+ def parse_string_char
304
+ char = if @char == "\\"
305
+ next_char
306
+ if CHAR_ESCAPES.has_key?(@char)
307
+ CHAR_ESCAPES[consume_char]
308
+ elsif @char =~ OCTAL
309
+ char_escape_helper(8, OCTAL, 3)
310
+ elsif @char == 'x'
311
+ next_char
312
+ syntax_error "Invalid hex character escape" unless @char =~ HEX
313
+ char_escape_helper(16, HEX, 2)
314
+ else
315
+ consume_char
316
+ end
317
+ else
318
+ consume_char
319
+ end
320
+ incomplete_error "Unexpected end of file while parsing character escape" unless char
321
+ char
322
+ end
323
+
324
+ # Parse digits in a certain base for string character escapes
325
+ def char_escape_helper(base, regex, n)
326
+ number = ""
327
+
328
+ n.times do
329
+ number << @char
330
+ next_char
331
+ break if @char !~ regex
332
+ end
333
+
334
+ number.to_i(base).chr
335
+ end
336
+
337
+ def delimiter_helper(c)
338
+ case c
339
+ when '(' then ')'
340
+ when '[' then ']'
341
+ when '{' then '}'
342
+ when '<' then '>'
343
+ else c
344
+ end
345
+ end
346
+
347
+ def parse_regex
348
+ line = @line
349
+ next_char # skip the r
350
+ delimiter = delimiter_helper(@char)
351
+ next_char # skip delimiter
352
+ regex = ""
353
+
354
+ while @char
355
+ if @char == delimiter
356
+ next_char # consume delimiter
357
+ options = regex_options_helper
358
+ return AST::RegexLiteral.new(line, regex, options)
359
+ elsif @char == "\\" && peek_char == delimiter
360
+ next_char
361
+ elsif @char == "\\" && peek_char == "\\"
362
+ regex << consume_char
363
+ end
364
+ regex << consume_char
365
+ end
366
+
367
+ incomplete_error "Unexpected end of program while parsing regex"
368
+ end
369
+
370
+ def regex_options_helper
371
+ options = 0
372
+
373
+ while @char =~ /[a-zA-Z]/
374
+ if option = REGEXP_OPTIONS[@char]
375
+ options |= option
376
+ else
377
+ syntax_error "Unknown regexp option: '#{@char}'"
378
+ end
379
+
380
+ next_char
381
+ end
382
+
383
+ options
384
+ end
385
+
386
+ def parse_quotation(double_quote)
387
+ line = @line
388
+ next_char # skip the prefix
389
+ delimiter = delimiter_helper(@char)
390
+ next_char # skip delimiter
391
+ string = ""
392
+
393
+ while @char
394
+ if @char == delimiter
395
+ next_char # consume delimiter
396
+ return AST::StringLiteral.new(line, string)
397
+ end
398
+
399
+ if double_quote
400
+ string << parse_string_char
401
+ elsif @char == "\\" && (peek_char == delimiter || peek_char == "\\")
402
+ next_char
403
+ string << consume_char
404
+ else
405
+ string << consume_char
406
+ end
407
+ end
408
+
409
+ incomplete_error "Unexpected end of program while parsing quotation"
410
+ end
411
+
412
+ def parse_symbol
413
+ line = @line
414
+ next_char # skip the :
415
+ symbol = ""
416
+
417
+ if @char == '"'
418
+ next_char # skip opening "
419
+ while @char
420
+ break if @char == '"'
421
+ symbol << parse_string_char
422
+ end
423
+ incomplete_error "Unexpected end of program while parsing symbol" unless @char == '"'
424
+ next_char # skip closing "
425
+ else
426
+ while @char =~ IDENTIFIER
427
+ symbol << @char
428
+ next_char
429
+ end
430
+
431
+ syntax_error "Empty symbol name" if symbol.empty?
432
+ end
433
+
434
+ AST::SymbolLiteral.new(line, symbol.to_sym)
435
+ end
436
+
437
+ def parse_number
438
+ number = ""
439
+
440
+ while @char =~ IDENTIFIER
441
+ number << @char
442
+ next_char
443
+ end
444
+
445
+ case number
446
+ when /^[+-]?\d+$/
447
+ AST.new_integer(@line, number.to_i)
448
+ when /^([+-]?)(\d+)r([a-zA-Z0-9]+)$/
449
+ sign, radix, digits = $1, $2.to_i, $3
450
+ syntax_error "Radix out of range: #{radix}" unless 2 <= radix && radix <= 36
451
+ syntax_error "Invalid digits for radix in number: #{number}" unless digits.downcase.chars.all? {|d| DIGITS[0..radix-1].include?(d) }
452
+ AST.new_integer(@line, (sign + digits).to_i(radix))
453
+ when /^[+-]?\d+\.?\d*(?:e[+-]?\d+)?$/
454
+ AST::FloatLiteral.new(@line, number.to_f)
455
+ when /^([+-]?\d+)\/(\d+)$/
456
+ AST::RationalLiteral.new(@line, $1.to_i, $2.to_i)
457
+ else
458
+ syntax_error "Invalid number: #{number}"
459
+ end
460
+ end
461
+
462
+ def parse_identifier
463
+ identifier = ""
464
+
465
+ while @char =~ IDENTIFIER
466
+ identifier << @char
467
+ next_char
468
+ end
469
+
470
+ # Handle % identifiers in #() syntax
471
+ if (state = @fn_state.last) && identifier[0] == '%'
472
+ identifier = case identifier[1..-1]
473
+ when '' # % is equivalent to %1
474
+ state.args[0] ||= Apricot.gensym('p1')
475
+ when '&'
476
+ state.rest ||= Apricot.gensym('rest')
477
+ when /^[1-9]\d*$/
478
+ n = identifier[1..-1].to_i
479
+ state.args[n - 1] ||= Apricot.gensym("p#{n}")
480
+ else
481
+ syntax_error "arg literal must be %, %& or %integer"
482
+ end
483
+ else
484
+ identifier = identifier.to_sym
485
+ end
486
+
487
+ case identifier
488
+ when :true, :false, :nil
489
+ AST::Literal.new(@line, identifier)
490
+ else
491
+ AST::Identifier.new(@line, identifier)
492
+ end
493
+ end
494
+
495
+ def parse_pipe_identifier
496
+ line = @line
497
+ next_char # skip the |
498
+ identifier = ""
499
+
500
+ while @char
501
+ if @char == '|'
502
+ next_char # consume the |
503
+ return AST::Identifier.new(line, identifier.to_sym)
504
+ end
505
+
506
+ identifier << parse_string_char
507
+ end
508
+
509
+ incomplete_error "Unexpected end of program while parsing pipe identifier"
510
+ end
511
+
512
+ def consume_char
513
+ char = @char
514
+ next_char
515
+ char
516
+ end
517
+
518
+ def next_char
519
+ @line += 1 if @char == "\n"
520
+ @char = @io.getc
521
+ return nil unless @char
522
+ @location += 1
523
+ @char
524
+ end
525
+
526
+ def peek_char
527
+ char = @io.getc
528
+ return nil unless char
529
+ @io.ungetc char
530
+ char
531
+ end
532
+
533
+ def syntax_error(message)
534
+ raise SyntaxError.new(@filename, @line, message)
535
+ end
536
+
537
+ def incomplete_error(message)
538
+ raise SyntaxError.new(@filename, @line, message, true)
539
+ end
540
+ end
541
+ end
@@ -0,0 +1,12 @@
1
+ module Apricot
2
+ class Compiler
3
+ class BytecodePrinter < Rubinius::Compiler::Printer
4
+ def run
5
+ puts @input.decode
6
+
7
+ @output = @input
8
+ run_next
9
+ end
10
+ end
11
+ end
12
+ end