rdoc 2.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rdoc might be problematic. Click here for more details.

Files changed (62) hide show
  1. data/History.txt +13 -0
  2. data/Manifest.txt +61 -0
  3. data/README.txt +34 -0
  4. data/Rakefile +10 -0
  5. data/bin/rdoc +22 -0
  6. data/bin/ri +6 -0
  7. data/lib/rdoc.rb +277 -0
  8. data/lib/rdoc/code_objects.rb +776 -0
  9. data/lib/rdoc/diagram.rb +338 -0
  10. data/lib/rdoc/dot.rb +249 -0
  11. data/lib/rdoc/generator.rb +1048 -0
  12. data/lib/rdoc/generator/chm.rb +113 -0
  13. data/lib/rdoc/generator/chm/chm.rb +98 -0
  14. data/lib/rdoc/generator/html.rb +370 -0
  15. data/lib/rdoc/generator/html/hefss.rb +414 -0
  16. data/lib/rdoc/generator/html/html.rb +704 -0
  17. data/lib/rdoc/generator/html/kilmer.rb +418 -0
  18. data/lib/rdoc/generator/html/one_page_html.rb +121 -0
  19. data/lib/rdoc/generator/ri.rb +229 -0
  20. data/lib/rdoc/generator/xml.rb +120 -0
  21. data/lib/rdoc/generator/xml/rdf.rb +113 -0
  22. data/lib/rdoc/generator/xml/xml.rb +111 -0
  23. data/lib/rdoc/markup.rb +473 -0
  24. data/lib/rdoc/markup/attribute_manager.rb +274 -0
  25. data/lib/rdoc/markup/formatter.rb +14 -0
  26. data/lib/rdoc/markup/fragments.rb +337 -0
  27. data/lib/rdoc/markup/inline.rb +101 -0
  28. data/lib/rdoc/markup/lines.rb +152 -0
  29. data/lib/rdoc/markup/preprocess.rb +71 -0
  30. data/lib/rdoc/markup/to_flow.rb +185 -0
  31. data/lib/rdoc/markup/to_html.rb +353 -0
  32. data/lib/rdoc/markup/to_html_crossref.rb +86 -0
  33. data/lib/rdoc/markup/to_latex.rb +328 -0
  34. data/lib/rdoc/markup/to_test.rb +50 -0
  35. data/lib/rdoc/options.rb +616 -0
  36. data/lib/rdoc/parsers/parse_c.rb +775 -0
  37. data/lib/rdoc/parsers/parse_f95.rb +1841 -0
  38. data/lib/rdoc/parsers/parse_rb.rb +2584 -0
  39. data/lib/rdoc/parsers/parse_simple.rb +40 -0
  40. data/lib/rdoc/parsers/parserfactory.rb +99 -0
  41. data/lib/rdoc/rdoc.rb +277 -0
  42. data/lib/rdoc/ri.rb +4 -0
  43. data/lib/rdoc/ri/cache.rb +188 -0
  44. data/lib/rdoc/ri/descriptions.rb +150 -0
  45. data/lib/rdoc/ri/display.rb +274 -0
  46. data/lib/rdoc/ri/driver.rb +452 -0
  47. data/lib/rdoc/ri/formatter.rb +616 -0
  48. data/lib/rdoc/ri/paths.rb +102 -0
  49. data/lib/rdoc/ri/reader.rb +106 -0
  50. data/lib/rdoc/ri/util.rb +81 -0
  51. data/lib/rdoc/ri/writer.rb +68 -0
  52. data/lib/rdoc/stats.rb +25 -0
  53. data/lib/rdoc/template.rb +64 -0
  54. data/lib/rdoc/tokenstream.rb +33 -0
  55. data/test/test_rdoc_c_parser.rb +261 -0
  56. data/test/test_rdoc_markup.rb +613 -0
  57. data/test/test_rdoc_markup_attribute_manager.rb +224 -0
  58. data/test/test_rdoc_ri_attribute_formatter.rb +42 -0
  59. data/test/test_rdoc_ri_default_display.rb +295 -0
  60. data/test/test_rdoc_ri_formatter.rb +318 -0
  61. data/test/test_rdoc_ri_overstrike_formatter.rb +69 -0
  62. metadata +134 -0
@@ -0,0 +1,2584 @@
1
+ #!/usr/local/bin/ruby
2
+
3
+ # Parse a Ruby source file, building a set of objects
4
+ # representing the modules, classes, methods,
5
+ # requires, and includes we find (these classes
6
+ # are defined in code_objects.rb).
7
+
8
+ # This file contains stuff stolen outright from:
9
+ #
10
+ # rtags.rb -
11
+ # ruby-lex.rb - ruby lexcal analizer
12
+ # ruby-token.rb - ruby tokens
13
+ # by Keiju ISHITSUKA (Nippon Rational Inc.)
14
+ #
15
+
16
+ require "e2mmap"
17
+ require "irb/slex"
18
+
19
+ require "rdoc/code_objects"
20
+ require "rdoc/tokenstream"
21
+
22
+ require "rdoc/markup/preprocess"
23
+
24
+ require "rdoc/parsers/parserfactory"
25
+
26
+ $TOKEN_DEBUG ||= nil
27
+ #$TOKEN_DEBUG = $DEBUG_RDOC
28
+
29
+ # Definitions of all tokens involved in the lexical analysis
30
+
31
+ module RubyToken
32
+ EXPR_BEG = :EXPR_BEG
33
+ EXPR_MID = :EXPR_MID
34
+ EXPR_END = :EXPR_END
35
+ EXPR_ARG = :EXPR_ARG
36
+ EXPR_FNAME = :EXPR_FNAME
37
+ EXPR_DOT = :EXPR_DOT
38
+ EXPR_CLASS = :EXPR_CLASS
39
+
40
+ class Token
41
+ NO_TEXT = "??".freeze
42
+ attr_accessor :text
43
+
44
+ def initialize(line_no, char_no)
45
+ @line_no = line_no
46
+ @char_no = char_no
47
+ @text = NO_TEXT
48
+ end
49
+
50
+ # Because we're used in contexts that expect to return a token,
51
+ # we set the text string and then return ourselves
52
+ def set_text(text)
53
+ @text = text
54
+ self
55
+ end
56
+
57
+ attr_reader :line_no, :char_no
58
+ end
59
+
60
+ class TkNode < Token
61
+ attr :node
62
+ end
63
+
64
+ class TkId < Token
65
+ def initialize(line_no, char_no, name)
66
+ super(line_no, char_no)
67
+ @name = name
68
+ end
69
+ attr :name
70
+ end
71
+
72
+ class TkKW < TkId
73
+ end
74
+
75
+ class TkVal < Token
76
+ def initialize(line_no, char_no, value = nil)
77
+ super(line_no, char_no)
78
+ set_text(value)
79
+ end
80
+ end
81
+
82
+ class TkOp < Token
83
+ def name
84
+ self.class.op_name
85
+ end
86
+ end
87
+
88
+ class TkOPASGN < TkOp
89
+ def initialize(line_no, char_no, op)
90
+ super(line_no, char_no)
91
+ op = TkReading2Token[op] unless op.kind_of?(Symbol)
92
+ @op = op
93
+ end
94
+ attr :op
95
+ end
96
+
97
+ class TkUnknownChar < Token
98
+ def initialize(line_no, char_no, id)
99
+ super(line_no, char_no)
100
+ @name = char_no.chr
101
+ end
102
+ attr :name
103
+ end
104
+
105
+ class TkError < Token
106
+ end
107
+
108
+ def set_token_position(line, char)
109
+ @prev_line_no = line
110
+ @prev_char_no = char
111
+ end
112
+
113
+ def Token(token, value = nil)
114
+ tk = nil
115
+ case token
116
+ when String, Symbol
117
+ source = token.kind_of?(String) ? TkReading2Token : TkSymbol2Token
118
+ if (tk = source[token]).nil?
119
+ fail TkReading2TokenNoKey, token
120
+ end
121
+ tk = Token(tk[0], value)
122
+ else
123
+ tk = if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty?
124
+ token.new(@prev_line_no, @prev_char_no)
125
+ else
126
+ token.new(@prev_line_no, @prev_char_no, value)
127
+ end
128
+ end
129
+ tk
130
+ end
131
+
132
+ TokenDefinitions = [
133
+ [:TkCLASS, TkKW, "class", EXPR_CLASS],
134
+ [:TkMODULE, TkKW, "module", EXPR_BEG],
135
+ [:TkDEF, TkKW, "def", EXPR_FNAME],
136
+ [:TkUNDEF, TkKW, "undef", EXPR_FNAME],
137
+ [:TkBEGIN, TkKW, "begin", EXPR_BEG],
138
+ [:TkRESCUE, TkKW, "rescue", EXPR_MID],
139
+ [:TkENSURE, TkKW, "ensure", EXPR_BEG],
140
+ [:TkEND, TkKW, "end", EXPR_END],
141
+ [:TkIF, TkKW, "if", EXPR_BEG, :TkIF_MOD],
142
+ [:TkUNLESS, TkKW, "unless", EXPR_BEG, :TkUNLESS_MOD],
143
+ [:TkTHEN, TkKW, "then", EXPR_BEG],
144
+ [:TkELSIF, TkKW, "elsif", EXPR_BEG],
145
+ [:TkELSE, TkKW, "else", EXPR_BEG],
146
+ [:TkCASE, TkKW, "case", EXPR_BEG],
147
+ [:TkWHEN, TkKW, "when", EXPR_BEG],
148
+ [:TkWHILE, TkKW, "while", EXPR_BEG, :TkWHILE_MOD],
149
+ [:TkUNTIL, TkKW, "until", EXPR_BEG, :TkUNTIL_MOD],
150
+ [:TkFOR, TkKW, "for", EXPR_BEG],
151
+ [:TkBREAK, TkKW, "break", EXPR_END],
152
+ [:TkNEXT, TkKW, "next", EXPR_END],
153
+ [:TkREDO, TkKW, "redo", EXPR_END],
154
+ [:TkRETRY, TkKW, "retry", EXPR_END],
155
+ [:TkIN, TkKW, "in", EXPR_BEG],
156
+ [:TkDO, TkKW, "do", EXPR_BEG],
157
+ [:TkRETURN, TkKW, "return", EXPR_MID],
158
+ [:TkYIELD, TkKW, "yield", EXPR_END],
159
+ [:TkSUPER, TkKW, "super", EXPR_END],
160
+ [:TkSELF, TkKW, "self", EXPR_END],
161
+ [:TkNIL, TkKW, "nil", EXPR_END],
162
+ [:TkTRUE, TkKW, "true", EXPR_END],
163
+ [:TkFALSE, TkKW, "false", EXPR_END],
164
+ [:TkAND, TkKW, "and", EXPR_BEG],
165
+ [:TkOR, TkKW, "or", EXPR_BEG],
166
+ [:TkNOT, TkKW, "not", EXPR_BEG],
167
+ [:TkIF_MOD, TkKW],
168
+ [:TkUNLESS_MOD, TkKW],
169
+ [:TkWHILE_MOD, TkKW],
170
+ [:TkUNTIL_MOD, TkKW],
171
+ [:TkALIAS, TkKW, "alias", EXPR_FNAME],
172
+ [:TkDEFINED, TkKW, "defined?", EXPR_END],
173
+ [:TklBEGIN, TkKW, "BEGIN", EXPR_END],
174
+ [:TklEND, TkKW, "END", EXPR_END],
175
+ [:Tk__LINE__, TkKW, "__LINE__", EXPR_END],
176
+ [:Tk__FILE__, TkKW, "__FILE__", EXPR_END],
177
+
178
+ [:TkIDENTIFIER, TkId],
179
+ [:TkFID, TkId],
180
+ [:TkGVAR, TkId],
181
+ [:TkIVAR, TkId],
182
+ [:TkCONSTANT, TkId],
183
+
184
+ [:TkINTEGER, TkVal],
185
+ [:TkFLOAT, TkVal],
186
+ [:TkSTRING, TkVal],
187
+ [:TkXSTRING, TkVal],
188
+ [:TkREGEXP, TkVal],
189
+ [:TkCOMMENT, TkVal],
190
+
191
+ [:TkDSTRING, TkNode],
192
+ [:TkDXSTRING, TkNode],
193
+ [:TkDREGEXP, TkNode],
194
+ [:TkNTH_REF, TkId],
195
+ [:TkBACK_REF, TkId],
196
+
197
+ [:TkUPLUS, TkOp, "+@"],
198
+ [:TkUMINUS, TkOp, "-@"],
199
+ [:TkPOW, TkOp, "**"],
200
+ [:TkCMP, TkOp, "<=>"],
201
+ [:TkEQ, TkOp, "=="],
202
+ [:TkEQQ, TkOp, "==="],
203
+ [:TkNEQ, TkOp, "!="],
204
+ [:TkGEQ, TkOp, ">="],
205
+ [:TkLEQ, TkOp, "<="],
206
+ [:TkANDOP, TkOp, "&&"],
207
+ [:TkOROP, TkOp, "||"],
208
+ [:TkMATCH, TkOp, "=~"],
209
+ [:TkNMATCH, TkOp, "!~"],
210
+ [:TkDOT2, TkOp, ".."],
211
+ [:TkDOT3, TkOp, "..."],
212
+ [:TkAREF, TkOp, "[]"],
213
+ [:TkASET, TkOp, "[]="],
214
+ [:TkLSHFT, TkOp, "<<"],
215
+ [:TkRSHFT, TkOp, ">>"],
216
+ [:TkCOLON2, TkOp],
217
+ [:TkCOLON3, TkOp],
218
+ # [:OPASGN, TkOp], # +=, -= etc. #
219
+ [:TkASSOC, TkOp, "=>"],
220
+ [:TkQUESTION, TkOp, "?"], #?
221
+ [:TkCOLON, TkOp, ":"], #:
222
+
223
+ [:TkfLPAREN], # func( #
224
+ [:TkfLBRACK], # func[ #
225
+ [:TkfLBRACE], # func{ #
226
+ [:TkSTAR], # *arg
227
+ [:TkAMPER], # &arg #
228
+ [:TkSYMBOL, TkId], # :SYMBOL
229
+ [:TkSYMBEG, TkId],
230
+ [:TkGT, TkOp, ">"],
231
+ [:TkLT, TkOp, "<"],
232
+ [:TkPLUS, TkOp, "+"],
233
+ [:TkMINUS, TkOp, "-"],
234
+ [:TkMULT, TkOp, "*"],
235
+ [:TkDIV, TkOp, "/"],
236
+ [:TkMOD, TkOp, "%"],
237
+ [:TkBITOR, TkOp, "|"],
238
+ [:TkBITXOR, TkOp, "^"],
239
+ [:TkBITAND, TkOp, "&"],
240
+ [:TkBITNOT, TkOp, "~"],
241
+ [:TkNOTOP, TkOp, "!"],
242
+
243
+ [:TkBACKQUOTE, TkOp, "`"],
244
+
245
+ [:TkASSIGN, Token, "="],
246
+ [:TkDOT, Token, "."],
247
+ [:TkLPAREN, Token, "("], #(exp)
248
+ [:TkLBRACK, Token, "["], #[arry]
249
+ [:TkLBRACE, Token, "{"], #{hash}
250
+ [:TkRPAREN, Token, ")"],
251
+ [:TkRBRACK, Token, "]"],
252
+ [:TkRBRACE, Token, "}"],
253
+ [:TkCOMMA, Token, ","],
254
+ [:TkSEMICOLON, Token, ";"],
255
+
256
+ [:TkRD_COMMENT],
257
+ [:TkSPACE],
258
+ [:TkNL],
259
+ [:TkEND_OF_SCRIPT],
260
+
261
+ [:TkBACKSLASH, TkUnknownChar, "\\"],
262
+ [:TkAT, TkUnknownChar, "@"],
263
+ [:TkDOLLAR, TkUnknownChar, "\$"], #"
264
+ ]
265
+
266
+ # {reading => token_class}
267
+ # {reading => [token_class, *opt]}
268
+ TkReading2Token = {}
269
+ TkSymbol2Token = {}
270
+
271
+ def RubyToken.def_token(token_n, super_token = Token, reading = nil, *opts)
272
+ token_n = token_n.id2name unless token_n.kind_of?(String)
273
+ if RubyToken.const_defined?(token_n)
274
+ fail AlreadyDefinedToken, token_n
275
+ end
276
+
277
+ token_c = Class.new super_token
278
+ RubyToken.const_set token_n, token_c
279
+ # token_c.inspect
280
+
281
+ if reading
282
+ if TkReading2Token[reading]
283
+ fail TkReading2TokenDuplicateError, token_n, reading
284
+ end
285
+ if opts.empty?
286
+ TkReading2Token[reading] = [token_c]
287
+ else
288
+ TkReading2Token[reading] = [token_c].concat(opts)
289
+ end
290
+ end
291
+ TkSymbol2Token[token_n.intern] = token_c
292
+
293
+ if token_c <= TkOp
294
+ token_c.class_eval %{
295
+ def self.op_name; "#{reading}"; end
296
+ }
297
+ end
298
+ end
299
+
300
+ for defs in TokenDefinitions
301
+ def_token(*defs)
302
+ end
303
+
304
+ NEWLINE_TOKEN = TkNL.new(0,0)
305
+ NEWLINE_TOKEN.set_text("\n")
306
+
307
+ end
308
+
309
+ # Lexical analyzer for Ruby source
310
+
311
+ class RubyLex
312
+
313
+ ######################################################################
314
+ #
315
+ # Read an input stream character by character. We allow for unlimited
316
+ # ungetting of characters just read.
317
+ #
318
+ # We simplify the implementation greatly by reading the entire input
319
+ # into a buffer initially, and then simply traversing it using
320
+ # pointers.
321
+ #
322
+ # We also have to allow for the <i>here document diversion</i>. This
323
+ # little gem comes about when the lexer encounters a here
324
+ # document. At this point we effectively need to split the input
325
+ # stream into two parts: one to read the body of the here document,
326
+ # the other to read the rest of the input line where the here
327
+ # document was initially encountered. For example, we might have
328
+ #
329
+ # do_something(<<-A, <<-B)
330
+ # stuff
331
+ # for
332
+ # A
333
+ # stuff
334
+ # for
335
+ # B
336
+ #
337
+ # When the lexer encounters the <<A, it reads until the end of the
338
+ # line, and keeps it around for later. It then reads the body of the
339
+ # here document. Once complete, it needs to read the rest of the
340
+ # original line, but then skip the here document body.
341
+ #
342
+
343
+ class BufferedReader
344
+
345
+ attr_reader :line_num
346
+
347
+ def initialize(content, options)
348
+ @options = options
349
+
350
+ if /\t/ =~ content
351
+ tab_width = @options.tab_width
352
+ content = content.split(/\n/).map do |line|
353
+ 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #`
354
+ line
355
+ end .join("\n")
356
+ end
357
+ @content = content
358
+ @content << "\n" unless @content[-1,1] == "\n"
359
+ @size = @content.size
360
+ @offset = 0
361
+ @hwm = 0
362
+ @line_num = 1
363
+ @read_back_offset = 0
364
+ @last_newline = 0
365
+ @newline_pending = false
366
+ end
367
+
368
+ def column
369
+ @offset - @last_newline
370
+ end
371
+
372
+ def getc
373
+ return nil if @offset >= @size
374
+ ch = @content[@offset, 1]
375
+
376
+ @offset += 1
377
+ @hwm = @offset if @hwm < @offset
378
+
379
+ if @newline_pending
380
+ @line_num += 1
381
+ @last_newline = @offset - 1
382
+ @newline_pending = false
383
+ end
384
+
385
+ if ch == "\n"
386
+ @newline_pending = true
387
+ end
388
+ ch
389
+ end
390
+
391
+ def getc_already_read
392
+ getc
393
+ end
394
+
395
+ def ungetc(ch)
396
+ raise "unget past beginning of file" if @offset <= 0
397
+ @offset -= 1
398
+ if @content[@offset] == ?\n
399
+ @newline_pending = false
400
+ end
401
+ end
402
+
403
+ def get_read
404
+ res = @content[@read_back_offset...@offset]
405
+ @read_back_offset = @offset
406
+ res
407
+ end
408
+
409
+ def peek(at)
410
+ pos = @offset + at
411
+ if pos >= @size
412
+ nil
413
+ else
414
+ @content[pos, 1]
415
+ end
416
+ end
417
+
418
+ def peek_equal(str)
419
+ @content[@offset, str.length] == str
420
+ end
421
+
422
+ def divert_read_from(reserve)
423
+ @content[@offset, 0] = reserve
424
+ @size = @content.size
425
+ end
426
+ end
427
+
428
+ # end of nested class BufferedReader
429
+
430
+ extend Exception2MessageMapper
431
+ def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
432
+ def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
433
+ def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
434
+ def_exception(:TkReading2TokenDuplicateError,
435
+ "key duplicate(token_n='%s', key='%s')")
436
+ def_exception(:SyntaxError, "%s")
437
+
438
+ include RubyToken
439
+ include IRB
440
+
441
+ attr_reader :continue
442
+ attr_reader :lex_state
443
+
444
+ def RubyLex.debug?
445
+ false
446
+ end
447
+
448
+ def initialize(content, options)
449
+ lex_init
450
+
451
+ @options = options
452
+
453
+ @reader = BufferedReader.new content, @options
454
+
455
+ @exp_line_no = @line_no = 1
456
+ @base_char_no = 0
457
+ @indent = 0
458
+
459
+ @ltype = nil
460
+ @quoted = nil
461
+ @lex_state = EXPR_BEG
462
+ @space_seen = false
463
+
464
+ @continue = false
465
+ @line = ""
466
+
467
+ @skip_space = false
468
+ @read_auto_clean_up = false
469
+ @exception_on_syntax_error = true
470
+ end
471
+
472
+ attr_accessor :skip_space
473
+ attr_accessor :read_auto_clean_up
474
+ attr_accessor :exception_on_syntax_error
475
+ attr_reader :indent
476
+
477
+ # io functions
478
+ def line_no
479
+ @reader.line_num
480
+ end
481
+
482
+ def char_no
483
+ @reader.column
484
+ end
485
+
486
+ def get_read
487
+ @reader.get_read
488
+ end
489
+
490
+ def getc
491
+ @reader.getc
492
+ end
493
+
494
+ def getc_of_rests
495
+ @reader.getc_already_read
496
+ end
497
+
498
+ def gets
499
+ c = getc or return
500
+ l = ""
501
+ begin
502
+ l.concat c unless c == "\r"
503
+ break if c == "\n"
504
+ end while c = getc
505
+ l
506
+ end
507
+
508
+
509
+ def ungetc(c = nil)
510
+ @reader.ungetc(c)
511
+ end
512
+
513
+ def peek_equal?(str)
514
+ @reader.peek_equal(str)
515
+ end
516
+
517
+ def peek(i = 0)
518
+ @reader.peek(i)
519
+ end
520
+
521
+ def lex
522
+ until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
523
+ !@continue or
524
+ tk.nil?)
525
+ end
526
+ line = get_read
527
+
528
+ if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
529
+ nil
530
+ else
531
+ line
532
+ end
533
+ end
534
+
535
+ def token
536
+ set_token_position(line_no, char_no)
537
+ begin
538
+ begin
539
+ tk = @OP.match(self)
540
+ @space_seen = tk.kind_of?(TkSPACE)
541
+ rescue SyntaxError
542
+ abort if @exception_on_syntax_error
543
+ tk = TkError.new(line_no, char_no)
544
+ end
545
+ end while @skip_space and tk.kind_of?(TkSPACE)
546
+ if @read_auto_clean_up
547
+ get_read
548
+ end
549
+ # throw :eof unless tk
550
+ tk
551
+ end
552
+
553
+ ENINDENT_CLAUSE = [
554
+ "case", "class", "def", "do", "for", "if",
555
+ "module", "unless", "until", "while", "begin" #, "when"
556
+ ]
557
+ DEINDENT_CLAUSE = ["end" #, "when"
558
+ ]
559
+
560
+ PERCENT_LTYPE = {
561
+ "q" => "\'",
562
+ "Q" => "\"",
563
+ "x" => "\`",
564
+ "r" => "/",
565
+ "w" => "]"
566
+ }
567
+
568
+ PERCENT_PAREN = {
569
+ "{" => "}",
570
+ "[" => "]",
571
+ "<" => ">",
572
+ "(" => ")"
573
+ }
574
+
575
+ Ltype2Token = {
576
+ "\'" => TkSTRING,
577
+ "\"" => TkSTRING,
578
+ "\`" => TkXSTRING,
579
+ "/" => TkREGEXP,
580
+ "]" => TkDSTRING
581
+ }
582
+ Ltype2Token.default = TkSTRING
583
+
584
+ DLtype2Token = {
585
+ "\"" => TkDSTRING,
586
+ "\`" => TkDXSTRING,
587
+ "/" => TkDREGEXP,
588
+ }
589
+
590
+ def lex_init()
591
+ @OP = IRB::SLex.new
592
+ @OP.def_rules("\0", "\004", "\032") do |chars, io|
593
+ Token(TkEND_OF_SCRIPT).set_text(chars)
594
+ end
595
+
596
+ @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io|
597
+ @space_seen = TRUE
598
+ while (ch = getc) =~ /[ \t\f\r\13]/
599
+ chars << ch
600
+ end
601
+ ungetc
602
+ Token(TkSPACE).set_text(chars)
603
+ end
604
+
605
+ @OP.def_rule("#") do
606
+ |op, io|
607
+ identify_comment
608
+ end
609
+
610
+ @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
611
+ |op, io|
612
+ str = op
613
+ @ltype = "="
614
+
615
+
616
+ begin
617
+ line = ""
618
+ begin
619
+ ch = getc
620
+ line << ch
621
+ end until ch == "\n"
622
+ str << line
623
+ end until line =~ /^=end/
624
+
625
+ ungetc
626
+
627
+ @ltype = nil
628
+
629
+ if str =~ /\A=begin\s+rdoc/i
630
+ str.sub!(/\A=begin.*\n/, '')
631
+ str.sub!(/^=end.*/m, '')
632
+ Token(TkCOMMENT).set_text(str)
633
+ else
634
+ Token(TkRD_COMMENT)#.set_text(str)
635
+ end
636
+ end
637
+
638
+ @OP.def_rule("\n") do
639
+ print "\\n\n" if RubyLex.debug?
640
+ case @lex_state
641
+ when EXPR_BEG, EXPR_FNAME, EXPR_DOT
642
+ @continue = TRUE
643
+ else
644
+ @continue = FALSE
645
+ @lex_state = EXPR_BEG
646
+ end
647
+ Token(TkNL).set_text("\n")
648
+ end
649
+
650
+ @OP.def_rules("*", "**",
651
+ "!", "!=", "!~",
652
+ "=", "==", "===",
653
+ "=~", "<=>",
654
+ "<", "<=",
655
+ ">", ">=", ">>") do
656
+ |op, io|
657
+ @lex_state = EXPR_BEG
658
+ Token(op).set_text(op)
659
+ end
660
+
661
+ @OP.def_rules("<<") do
662
+ |op, io|
663
+ tk = nil
664
+ if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
665
+ (@lex_state != EXPR_ARG || @space_seen)
666
+ c = peek(0)
667
+ if /[-\w_\"\'\`]/ =~ c
668
+ tk = identify_here_document
669
+ end
670
+ end
671
+ if !tk
672
+ @lex_state = EXPR_BEG
673
+ tk = Token(op).set_text(op)
674
+ end
675
+ tk
676
+ end
677
+
678
+ @OP.def_rules("'", '"') do
679
+ |op, io|
680
+ identify_string(op)
681
+ end
682
+
683
+ @OP.def_rules("`") do
684
+ |op, io|
685
+ if @lex_state == EXPR_FNAME
686
+ Token(op).set_text(op)
687
+ else
688
+ identify_string(op)
689
+ end
690
+ end
691
+
692
+ @OP.def_rules('?') do
693
+ |op, io|
694
+ if @lex_state == EXPR_END
695
+ @lex_state = EXPR_BEG
696
+ Token(TkQUESTION).set_text(op)
697
+ else
698
+ ch = getc
699
+ if @lex_state == EXPR_ARG && ch !~ /\s/
700
+ ungetc
701
+ @lex_state = EXPR_BEG;
702
+ Token(TkQUESTION).set_text(op)
703
+ else
704
+ str = op
705
+ str << ch
706
+ if (ch == '\\') #'
707
+ str << read_escape
708
+ end
709
+ @lex_state = EXPR_END
710
+ Token(TkINTEGER).set_text(str)
711
+ end
712
+ end
713
+ end
714
+
715
+ @OP.def_rules("&", "&&", "|", "||") do
716
+ |op, io|
717
+ @lex_state = EXPR_BEG
718
+ Token(op).set_text(op)
719
+ end
720
+
721
+ @OP.def_rules("+=", "-=", "*=", "**=",
722
+ "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
723
+ |op, io|
724
+ @lex_state = EXPR_BEG
725
+ op =~ /^(.*)=$/
726
+ Token(TkOPASGN, $1).set_text(op)
727
+ end
728
+
729
+ @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
730
+ Token(TkUPLUS).set_text(op)
731
+ end
732
+
733
+ @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
734
+ Token(TkUMINUS).set_text(op)
735
+ end
736
+
737
+ @OP.def_rules("+", "-") do
738
+ |op, io|
739
+ catch(:RET) do
740
+ if @lex_state == EXPR_ARG
741
+ if @space_seen and peek(0) =~ /[0-9]/
742
+ throw :RET, identify_number(op)
743
+ else
744
+ @lex_state = EXPR_BEG
745
+ end
746
+ elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
747
+ throw :RET, identify_number(op)
748
+ else
749
+ @lex_state = EXPR_BEG
750
+ end
751
+ Token(op).set_text(op)
752
+ end
753
+ end
754
+
755
+ @OP.def_rule(".") do
756
+ @lex_state = EXPR_BEG
757
+ if peek(0) =~ /[0-9]/
758
+ ungetc
759
+ identify_number("")
760
+ else
761
+ # for obj.if
762
+ @lex_state = EXPR_DOT
763
+ Token(TkDOT).set_text(".")
764
+ end
765
+ end
766
+
767
+ @OP.def_rules("..", "...") do
768
+ |op, io|
769
+ @lex_state = EXPR_BEG
770
+ Token(op).set_text(op)
771
+ end
772
+
773
+ lex_int2
774
+ end
775
+
776
+ def lex_int2
777
+ @OP.def_rules("]", "}", ")") do
778
+ |op, io|
779
+ @lex_state = EXPR_END
780
+ @indent -= 1
781
+ Token(op).set_text(op)
782
+ end
783
+
784
+ @OP.def_rule(":") do
785
+ if @lex_state == EXPR_END || peek(0) =~ /\s/
786
+ @lex_state = EXPR_BEG
787
+ tk = Token(TkCOLON)
788
+ else
789
+ @lex_state = EXPR_FNAME;
790
+ tk = Token(TkSYMBEG)
791
+ end
792
+ tk.set_text(":")
793
+ end
794
+
795
+ @OP.def_rule("::") do
796
+ # p @lex_state.id2name, @space_seen
797
+ if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
798
+ @lex_state = EXPR_BEG
799
+ tk = Token(TkCOLON3)
800
+ else
801
+ @lex_state = EXPR_DOT
802
+ tk = Token(TkCOLON2)
803
+ end
804
+ tk.set_text("::")
805
+ end
806
+
807
+ @OP.def_rule("/") do
808
+ |op, io|
809
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
810
+ identify_string(op)
811
+ elsif peek(0) == '='
812
+ getc
813
+ @lex_state = EXPR_BEG
814
+ Token(TkOPASGN, :/).set_text("/=") #")
815
+ elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
816
+ identify_string(op)
817
+ else
818
+ @lex_state = EXPR_BEG
819
+ Token("/").set_text(op)
820
+ end
821
+ end
822
+
823
+ @OP.def_rules("^") do
824
+ @lex_state = EXPR_BEG
825
+ Token("^").set_text("^")
826
+ end
827
+
828
+ # @OP.def_rules("^=") do
829
+ # @lex_state = EXPR_BEG
830
+ # Token(TkOPASGN, :^)
831
+ # end
832
+
833
+ @OP.def_rules(",", ";") do
834
+ |op, io|
835
+ @lex_state = EXPR_BEG
836
+ Token(op).set_text(op)
837
+ end
838
+
839
+ @OP.def_rule("~") do
840
+ @lex_state = EXPR_BEG
841
+ Token("~").set_text("~")
842
+ end
843
+
844
+ @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
845
+ @lex_state = EXPR_BEG
846
+ Token("~").set_text("~@")
847
+ end
848
+
849
+ @OP.def_rule("(") do
850
+ @indent += 1
851
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
852
+ @lex_state = EXPR_BEG
853
+ tk = Token(TkfLPAREN)
854
+ else
855
+ @lex_state = EXPR_BEG
856
+ tk = Token(TkLPAREN)
857
+ end
858
+ tk.set_text("(")
859
+ end
860
+
861
+ @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
862
+ Token("[]").set_text("[]")
863
+ end
864
+
865
+ @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
866
+ Token("[]=").set_text("[]=")
867
+ end
868
+
869
+ @OP.def_rule("[") do
870
+ @indent += 1
871
+ if @lex_state == EXPR_FNAME
872
+ t = Token(TkfLBRACK)
873
+ else
874
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
875
+ t = Token(TkLBRACK)
876
+ elsif @lex_state == EXPR_ARG && @space_seen
877
+ t = Token(TkLBRACK)
878
+ else
879
+ t = Token(TkfLBRACK)
880
+ end
881
+ @lex_state = EXPR_BEG
882
+ end
883
+ t.set_text("[")
884
+ end
885
+
886
+ @OP.def_rule("{") do
887
+ @indent += 1
888
+ if @lex_state != EXPR_END && @lex_state != EXPR_ARG
889
+ t = Token(TkLBRACE)
890
+ else
891
+ t = Token(TkfLBRACE)
892
+ end
893
+ @lex_state = EXPR_BEG
894
+ t.set_text("{")
895
+ end
896
+
897
+ @OP.def_rule('\\') do #'
898
+ if getc == "\n"
899
+ @space_seen = true
900
+ @continue = true
901
+ Token(TkSPACE).set_text("\\\n")
902
+ else
903
+ ungetc
904
+ Token("\\").set_text("\\") #"
905
+ end
906
+ end
907
+
908
+ @OP.def_rule('%') do
909
+ |op, io|
910
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
911
+ identify_quotation('%')
912
+ elsif peek(0) == '='
913
+ getc
914
+ Token(TkOPASGN, "%").set_text("%=")
915
+ elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
916
+ identify_quotation('%')
917
+ else
918
+ @lex_state = EXPR_BEG
919
+ Token("%").set_text("%")
920
+ end
921
+ end
922
+
923
+ @OP.def_rule('$') do #'
924
+ identify_gvar
925
+ end
926
+
927
+ @OP.def_rule('@') do
928
+ if peek(0) =~ /[@\w_]/
929
+ ungetc
930
+ identify_identifier
931
+ else
932
+ Token("@").set_text("@")
933
+ end
934
+ end
935
+
936
+ # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
937
+ # |op, io|
938
+ # @indent += 1
939
+ # @lex_state = EXPR_FNAME
940
+ # # @lex_state = EXPR_END
941
+ # # until @rests[0] == "\n" or @rests[0] == ";"
942
+ # # rests.shift
943
+ # # end
944
+ # end
945
+
946
+ @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
947
+ throw :eof
948
+ end
949
+
950
+ @OP.def_rule("") do
951
+ |op, io|
952
+ printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
953
+ if peek(0) =~ /[0-9]/
954
+ t = identify_number("")
955
+ elsif peek(0) =~ /[\w_]/
956
+ t = identify_identifier
957
+ end
958
+ printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
959
+ t
960
+ end
961
+
962
+ p @OP if RubyLex.debug?
963
+ end
964
+
965
+ def identify_gvar
966
+ @lex_state = EXPR_END
967
+ str = "$"
968
+
969
+ tk = case ch = getc
970
+ when /[~_*$?!@\/\\;,=:<>".]/ #"
971
+ str << ch
972
+ Token(TkGVAR, str)
973
+
974
+ when "-"
975
+ str << "-" << getc
976
+ Token(TkGVAR, str)
977
+
978
+ when "&", "`", "'", "+"
979
+ str << ch
980
+ Token(TkBACK_REF, str)
981
+
982
+ when /[1-9]/
983
+ str << ch
984
+ while (ch = getc) =~ /[0-9]/
985
+ str << ch
986
+ end
987
+ ungetc
988
+ Token(TkNTH_REF)
989
+ when /\w/
990
+ ungetc
991
+ ungetc
992
+ return identify_identifier
993
+ else
994
+ ungetc
995
+ Token("$")
996
+ end
997
+ tk.set_text(str)
998
+ end
999
+
1000
+ def identify_identifier
1001
+ token = ""
1002
+ token.concat getc if peek(0) =~ /[$@]/
1003
+ token.concat getc if peek(0) == "@"
1004
+
1005
+ while (ch = getc) =~ /\w|_/
1006
+ print ":", ch, ":" if RubyLex.debug?
1007
+ token.concat ch
1008
+ end
1009
+ ungetc
1010
+
1011
+ if ch == "!" or ch == "?"
1012
+ token.concat getc
1013
+ end
1014
+ # fix token
1015
+
1016
+ # $stderr.puts "identifier - #{token}, state = #@lex_state"
1017
+
1018
+ case token
1019
+ when /^\$/
1020
+ return Token(TkGVAR, token).set_text(token)
1021
+ when /^\@/
1022
+ @lex_state = EXPR_END
1023
+ return Token(TkIVAR, token).set_text(token)
1024
+ end
1025
+
1026
+ if @lex_state != EXPR_DOT
1027
+ print token, "\n" if RubyLex.debug?
1028
+
1029
+ token_c, *trans = TkReading2Token[token]
1030
+ if token_c
1031
+ # reserved word?
1032
+
1033
+ if (@lex_state != EXPR_BEG &&
1034
+ @lex_state != EXPR_FNAME &&
1035
+ trans[1])
1036
+ # modifiers
1037
+ token_c = TkSymbol2Token[trans[1]]
1038
+ @lex_state = trans[0]
1039
+ else
1040
+ if @lex_state != EXPR_FNAME
1041
+ if ENINDENT_CLAUSE.include?(token)
1042
+ @indent += 1
1043
+ elsif DEINDENT_CLAUSE.include?(token)
1044
+ @indent -= 1
1045
+ end
1046
+ @lex_state = trans[0]
1047
+ else
1048
+ @lex_state = EXPR_END
1049
+ end
1050
+ end
1051
+ return Token(token_c, token).set_text(token)
1052
+ end
1053
+ end
1054
+
1055
+ if @lex_state == EXPR_FNAME
1056
+ @lex_state = EXPR_END
1057
+ if peek(0) == '='
1058
+ token.concat getc
1059
+ end
1060
+ elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
1061
+ @lex_state = EXPR_ARG
1062
+ else
1063
+ @lex_state = EXPR_END
1064
+ end
1065
+
1066
+ if token[0, 1] =~ /[A-Z]/
1067
+ return Token(TkCONSTANT, token).set_text(token)
1068
+ elsif token[token.size - 1, 1] =~ /[!?]/
1069
+ return Token(TkFID, token).set_text(token)
1070
+ else
1071
+ return Token(TkIDENTIFIER, token).set_text(token)
1072
+ end
1073
+ end
1074
+
1075
+ def identify_here_document
1076
+ ch = getc
1077
+ if ch == "-"
1078
+ ch = getc
1079
+ indent = true
1080
+ end
1081
+ if /['"`]/ =~ ch # '
1082
+ lt = ch
1083
+ quoted = ""
1084
+ while (c = getc) && c != lt
1085
+ quoted.concat c
1086
+ end
1087
+ else
1088
+ lt = '"'
1089
+ quoted = ch.dup
1090
+ while (c = getc) && c =~ /\w/
1091
+ quoted.concat c
1092
+ end
1093
+ ungetc
1094
+ end
1095
+
1096
+ ltback, @ltype = @ltype, lt
1097
+ reserve = ""
1098
+
1099
+ while ch = getc
1100
+ reserve << ch
1101
+ if ch == "\\" #"
1102
+ ch = getc
1103
+ reserve << ch
1104
+ elsif ch == "\n"
1105
+ break
1106
+ end
1107
+ end
1108
+
1109
+ str = ""
1110
+ while (l = gets)
1111
+ l.chomp!
1112
+ l.strip! if indent
1113
+ break if l == quoted
1114
+ str << l.chomp << "\n"
1115
+ end
1116
+
1117
+ @reader.divert_read_from(reserve)
1118
+
1119
+ @ltype = ltback
1120
+ @lex_state = EXPR_END
1121
+ Token(Ltype2Token[lt], str).set_text(str.dump)
1122
+ end
1123
+
1124
+ def identify_quotation(initial_char)
1125
+ ch = getc
1126
+ if lt = PERCENT_LTYPE[ch]
1127
+ initial_char += ch
1128
+ ch = getc
1129
+ elsif ch =~ /\W/
1130
+ lt = "\""
1131
+ else
1132
+ fail SyntaxError, "unknown type of %string ('#{ch}')"
1133
+ end
1134
+ # if ch !~ /\W/
1135
+ # ungetc
1136
+ # next
1137
+ # end
1138
+ #@ltype = lt
1139
+ @quoted = ch unless @quoted = PERCENT_PAREN[ch]
1140
+ identify_string(lt, @quoted, ch, initial_char)
1141
+ end
1142
+
1143
+ def identify_number(start)
1144
+ str = start.dup
1145
+
1146
+ if start == "+" or start == "-" or start == ""
1147
+ start = getc
1148
+ str << start
1149
+ end
1150
+
1151
+ @lex_state = EXPR_END
1152
+
1153
+ if start == "0"
1154
+ if peek(0) == "x"
1155
+ ch = getc
1156
+ str << ch
1157
+ match = /[0-9a-f_]/
1158
+ else
1159
+ match = /[0-7_]/
1160
+ end
1161
+ while ch = getc
1162
+ if ch !~ match
1163
+ ungetc
1164
+ break
1165
+ else
1166
+ str << ch
1167
+ end
1168
+ end
1169
+ return Token(TkINTEGER).set_text(str)
1170
+ end
1171
+
1172
+ type = TkINTEGER
1173
+ allow_point = TRUE
1174
+ allow_e = TRUE
1175
+ while ch = getc
1176
+ case ch
1177
+ when /[0-9_]/
1178
+ str << ch
1179
+
1180
+ when allow_point && "."
1181
+ type = TkFLOAT
1182
+ if peek(0) !~ /[0-9]/
1183
+ ungetc
1184
+ break
1185
+ end
1186
+ str << ch
1187
+ allow_point = false
1188
+
1189
+ when allow_e && "e", allow_e && "E"
1190
+ str << ch
1191
+ type = TkFLOAT
1192
+ if peek(0) =~ /[+-]/
1193
+ str << getc
1194
+ end
1195
+ allow_e = false
1196
+ allow_point = false
1197
+ else
1198
+ ungetc
1199
+ break
1200
+ end
1201
+ end
1202
+ Token(type).set_text(str)
1203
+ end
1204
+
1205
+ def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
1206
+ @ltype = ltype
1207
+ @quoted = quoted
1208
+ subtype = nil
1209
+
1210
+ str = ""
1211
+ str << initial_char if initial_char
1212
+ str << (opener||quoted)
1213
+
1214
+ nest = 0
1215
+ begin
1216
+ while ch = getc
1217
+ str << ch
1218
+ if @quoted == ch
1219
+ if nest == 0
1220
+ break
1221
+ else
1222
+ nest -= 1
1223
+ end
1224
+ elsif opener == ch
1225
+ nest += 1
1226
+ elsif @ltype != "'" && @ltype != "]" and ch == "#"
1227
+ ch = getc
1228
+ if ch == "{"
1229
+ subtype = true
1230
+ str << ch << skip_inner_expression
1231
+ else
1232
+ ungetc(ch)
1233
+ end
1234
+ elsif ch == '\\' #'
1235
+ str << read_escape
1236
+ end
1237
+ end
1238
+ if @ltype == "/"
1239
+ if peek(0) =~ /i|o|n|e|s/
1240
+ str << getc
1241
+ end
1242
+ end
1243
+ if subtype
1244
+ Token(DLtype2Token[ltype], str)
1245
+ else
1246
+ Token(Ltype2Token[ltype], str)
1247
+ end.set_text(str)
1248
+ ensure
1249
+ @ltype = nil
1250
+ @quoted = nil
1251
+ @lex_state = EXPR_END
1252
+ end
1253
+ end
1254
+
1255
+ def skip_inner_expression
1256
+ res = ""
1257
+ nest = 0
1258
+ while (ch = getc)
1259
+ res << ch
1260
+ if ch == '}'
1261
+ break if nest.zero?
1262
+ nest -= 1
1263
+ elsif ch == '{'
1264
+ nest += 1
1265
+ end
1266
+ end
1267
+ res
1268
+ end
1269
+
1270
+ def identify_comment
1271
+ @ltype = "#"
1272
+ comment = "#"
1273
+ while ch = getc
1274
+ if ch == "\\"
1275
+ ch = getc
1276
+ if ch == "\n"
1277
+ ch = " "
1278
+ else
1279
+ comment << "\\"
1280
+ end
1281
+ else
1282
+ if ch == "\n"
1283
+ @ltype = nil
1284
+ ungetc
1285
+ break
1286
+ end
1287
+ end
1288
+ comment << ch
1289
+ end
1290
+ return Token(TkCOMMENT).set_text(comment)
1291
+ end
1292
+
1293
+ def read_escape
1294
+ res = ""
1295
+ case ch = getc
1296
+ when /[0-7]/
1297
+ ungetc ch
1298
+ 3.times do
1299
+ case ch = getc
1300
+ when /[0-7]/
1301
+ when nil
1302
+ break
1303
+ else
1304
+ ungetc
1305
+ break
1306
+ end
1307
+ res << ch
1308
+ end
1309
+
1310
+ when "x"
1311
+ res << ch
1312
+ 2.times do
1313
+ case ch = getc
1314
+ when /[0-9a-fA-F]/
1315
+ when nil
1316
+ break
1317
+ else
1318
+ ungetc
1319
+ break
1320
+ end
1321
+ res << ch
1322
+ end
1323
+
1324
+ when "M"
1325
+ res << ch
1326
+ if (ch = getc) != '-'
1327
+ ungetc
1328
+ else
1329
+ res << ch
1330
+ if (ch = getc) == "\\" #"
1331
+ res << ch
1332
+ res << read_escape
1333
+ else
1334
+ res << ch
1335
+ end
1336
+ end
1337
+
1338
+ when "C", "c" #, "^"
1339
+ res << ch
1340
+ if ch == "C" and (ch = getc) != "-"
1341
+ ungetc
1342
+ else
1343
+ res << ch
1344
+ if (ch = getc) == "\\" #"
1345
+ res << ch
1346
+ res << read_escape
1347
+ else
1348
+ res << ch
1349
+ end
1350
+ end
1351
+ else
1352
+ res << ch
1353
+ end
1354
+ res
1355
+ end
1356
+ end
1357
+
1358
+ ##
1359
+ # Extract code elements from a source file, returning a TopLevel object
1360
+ # containing the constituent file elements.
1361
+ #
1362
+ # This file is based on rtags
1363
+
1364
+ class RDoc::RubyParser
1365
+
1366
+ include RubyToken
1367
+ include RDoc::TokenStream
1368
+
1369
+ extend RDoc::ParserFactory
1370
+
1371
+ parse_files_matching(/\.rbw?$/)
1372
+
1373
+ def initialize(top_level, file_name, content, options, stats)
1374
+ @options = options
1375
+ @stats = stats
1376
+ @size = 0
1377
+ @token_listeners = nil
1378
+ @input_file_name = file_name
1379
+ @scanner = RubyLex.new content, @options
1380
+ @scanner.exception_on_syntax_error = false
1381
+ @top_level = top_level
1382
+ @progress = $stderr unless options.quiet
1383
+ end
1384
+
1385
+ def scan
1386
+ @tokens = []
1387
+ @unget_read = []
1388
+ @read = []
1389
+ catch(:eof) do
1390
+ catch(:enddoc) do
1391
+ begin
1392
+ parse_toplevel_statements(@top_level)
1393
+ rescue Exception => e
1394
+ $stderr.puts "\n\n"
1395
+ $stderr.puts "RDoc failure in #@input_file_name at or around " +
1396
+ "line #{@scanner.line_no} column #{@scanner.char_no}"
1397
+ $stderr.puts
1398
+ $stderr.puts "Before reporting this, could you check that the file"
1399
+ $stderr.puts "you're documenting compiles cleanly--RDoc is not a"
1400
+ $stderr.puts "full Ruby parser, and gets confused easily if fed"
1401
+ $stderr.puts "invalid programs."
1402
+ $stderr.puts
1403
+ $stderr.puts "The internal error was:\n\n"
1404
+
1405
+ e.set_backtrace(e.backtrace[0,4])
1406
+ raise
1407
+ end
1408
+ end
1409
+ end
1410
+ @top_level
1411
+ end
1412
+
1413
+ private
1414
+
1415
+ def make_message(msg)
1416
+ prefix = "\n" + @input_file_name + ":"
1417
+ if @scanner
1418
+ prefix << "#{@scanner.line_no}:#{@scanner.char_no}: "
1419
+ end
1420
+ return prefix + msg
1421
+ end
1422
+
1423
+ def warn(msg)
1424
+ return if @options.quiet
1425
+ msg = make_message msg
1426
+ $stderr.puts msg
1427
+ end
1428
+
1429
+ def error(msg)
1430
+ msg = make_message msg
1431
+ $stderr.puts msg
1432
+ exit(1)
1433
+ end
1434
+
1435
+ def progress(char)
1436
+ unless @options.quiet
1437
+ @progress.print(char)
1438
+ @progress.flush
1439
+ end
1440
+ end
1441
+
1442
+ def add_token_listener(obj)
1443
+ @token_listeners ||= []
1444
+ @token_listeners << obj
1445
+ end
1446
+
1447
+ def remove_token_listener(obj)
1448
+ @token_listeners.delete(obj)
1449
+ end
1450
+
1451
+ def get_tk
1452
+ tk = nil
1453
+ if @tokens.empty?
1454
+ tk = @scanner.token
1455
+ @read.push @scanner.get_read
1456
+ puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG
1457
+ else
1458
+ @read.push @unget_read.shift
1459
+ tk = @tokens.shift
1460
+ puts "get_tk2 => #{tk.inspect}" if $TOKEN_DEBUG
1461
+ end
1462
+
1463
+ if tk.kind_of?(TkSYMBEG)
1464
+ set_token_position(tk.line_no, tk.char_no)
1465
+ tk1 = get_tk
1466
+ if tk1.kind_of?(TkId) || tk1.kind_of?(TkOp) || tk1.kind_of?(TkSTRING)
1467
+ if tk1.respond_to?(:name)
1468
+ tk = Token(TkSYMBOL).set_text(":" + tk1.name)
1469
+ else
1470
+ tk = Token(TkSYMBOL).set_text(":" + tk1.text)
1471
+ end
1472
+ # remove the identifier we just read (we're about to
1473
+ # replace it with a symbol)
1474
+ @token_listeners.each do |obj|
1475
+ obj.pop_token
1476
+ end if @token_listeners
1477
+ else
1478
+ warn("':' not followed by identifier or operator")
1479
+ tk = tk1
1480
+ end
1481
+ end
1482
+
1483
+ # inform any listeners of our shiny new token
1484
+ @token_listeners.each do |obj|
1485
+ obj.add_token(tk)
1486
+ end if @token_listeners
1487
+
1488
+ tk
1489
+ end
1490
+
1491
+ def peek_tk
1492
+ unget_tk(tk = get_tk)
1493
+ tk
1494
+ end
1495
+
1496
+ def unget_tk(tk)
1497
+ @tokens.unshift tk
1498
+ @unget_read.unshift @read.pop
1499
+
1500
+ # Remove this token from any listeners
1501
+ @token_listeners.each do |obj|
1502
+ obj.pop_token
1503
+ end if @token_listeners
1504
+ end
1505
+
1506
+ def skip_tkspace(skip_nl = true)
1507
+ tokens = []
1508
+ while ((tk = get_tk).kind_of?(TkSPACE) ||
1509
+ (skip_nl && tk.kind_of?(TkNL)))
1510
+ tokens.push tk
1511
+ end
1512
+ unget_tk(tk)
1513
+ tokens
1514
+ end
1515
+
1516
+ def get_tkread
1517
+ read = @read.join("")
1518
+ @read = []
1519
+ read
1520
+ end
1521
+
1522
+ def peek_read
1523
+ @read.join('')
1524
+ end
1525
+
1526
+ NORMAL = "::"
1527
+ SINGLE = "<<"
1528
+
1529
+ ##
1530
+ # Look for the first comment in a file that isn't a shebang line.
1531
+
1532
+ def collect_first_comment
1533
+ skip_tkspace
1534
+ res = ''
1535
+ first_line = true
1536
+
1537
+ tk = get_tk
1538
+ while tk.kind_of?(TkCOMMENT)
1539
+ if first_line && tk.text[0,2] == "#!"
1540
+ skip_tkspace
1541
+ tk = get_tk
1542
+ else
1543
+ res << tk.text << "\n"
1544
+ tk = get_tk
1545
+ if tk.kind_of? TkNL
1546
+ skip_tkspace(false)
1547
+ tk = get_tk
1548
+ end
1549
+ end
1550
+ first_line = false
1551
+ end
1552
+ unget_tk(tk)
1553
+ res
1554
+ end
1555
+
1556
+ def parse_toplevel_statements(container)
1557
+ comment = collect_first_comment
1558
+ look_for_directives_in(container, comment)
1559
+ container.comment = comment unless comment.empty?
1560
+ parse_statements(container, NORMAL, nil, comment)
1561
+ end
1562
+
1563
+ def parse_statements(container, single=NORMAL, current_method=nil, comment='')
1564
+ nest = 1
1565
+ save_visibility = container.visibility
1566
+
1567
+ # if container.kind_of?(TopLevel)
1568
+ # else
1569
+ # comment = ''
1570
+ # end
1571
+
1572
+ non_comment_seen = true
1573
+
1574
+ while tk = get_tk
1575
+ keep_comment = false
1576
+
1577
+ non_comment_seen = true unless tk.kind_of?(TkCOMMENT)
1578
+
1579
+ case tk
1580
+ when TkNL
1581
+ skip_tkspace(true) # Skip blanks and newlines
1582
+ tk = get_tk
1583
+ if tk.kind_of?(TkCOMMENT)
1584
+ if non_comment_seen
1585
+ comment = ''
1586
+ non_comment_seen = false
1587
+ end
1588
+ while tk.kind_of?(TkCOMMENT)
1589
+ comment << tk.text << "\n"
1590
+ tk = get_tk # this is the newline
1591
+ skip_tkspace(false) # leading spaces
1592
+ tk = get_tk
1593
+ end
1594
+ unless comment.empty?
1595
+ look_for_directives_in(container, comment)
1596
+ if container.done_documenting
1597
+ container.ongoing_visibility = save_visibility
1598
+ # return
1599
+ end
1600
+ end
1601
+ keep_comment = true
1602
+ else
1603
+ non_comment_seen = true
1604
+ end
1605
+ unget_tk(tk)
1606
+ keep_comment = true
1607
+
1608
+ when TkCLASS
1609
+ if container.document_children
1610
+ parse_class(container, single, tk, comment)
1611
+ else
1612
+ nest += 1
1613
+ end
1614
+
1615
+ when TkMODULE
1616
+ if container.document_children
1617
+ parse_module(container, single, tk, comment)
1618
+ else
1619
+ nest += 1
1620
+ end
1621
+
1622
+ when TkDEF
1623
+ if container.document_self
1624
+ parse_method(container, single, tk, comment)
1625
+ else
1626
+ nest += 1
1627
+ end
1628
+
1629
+ when TkCONSTANT
1630
+ if container.document_self
1631
+ parse_constant(container, single, tk, comment)
1632
+ end
1633
+
1634
+ when TkALIAS
1635
+ if container.document_self
1636
+ parse_alias(container, single, tk, comment)
1637
+ end
1638
+
1639
+ when TkYIELD
1640
+ if current_method.nil?
1641
+ warn("Warning: yield outside of method") if container.document_self
1642
+ else
1643
+ parse_yield(container, single, tk, current_method)
1644
+ end
1645
+
1646
+ # Until and While can have a 'do', which shouldn't increas
1647
+ # the nesting. We can't solve the general case, but we can
1648
+ # handle most occurrences by ignoring a do at the end of a line
1649
+
1650
+ when TkUNTIL, TkWHILE
1651
+ nest += 1
1652
+ puts "Found #{tk.class} in #{container.name}, nest = #{nest}, " +
1653
+ "line #{tk.line_no}" if $DEBUG_RDOC
1654
+ skip_optional_do_after_expression
1655
+
1656
+ # 'for' is trickier
1657
+ when TkFOR
1658
+ nest += 1
1659
+ puts "Found #{tk.class} in #{container.name}, nest = #{nest}, " +
1660
+ "line #{tk.line_no}" if $DEBUG_RDOC
1661
+ skip_for_variable
1662
+ skip_optional_do_after_expression
1663
+
1664
+ when TkCASE, TkDO, TkIF, TkUNLESS, TkBEGIN
1665
+ nest += 1
1666
+ puts "Found #{tk.class} in #{container.name}, nest = #{nest}, " +
1667
+ "line #{tk.line_no}" if $DEBUG_RDOC
1668
+
1669
+ when TkIDENTIFIER
1670
+ if nest == 1 and current_method.nil?
1671
+ case tk.name
1672
+ when "private", "protected", "public",
1673
+ "private_class_method", "public_class_method"
1674
+ parse_visibility(container, single, tk)
1675
+ keep_comment = true
1676
+ when "attr"
1677
+ parse_attr(container, single, tk, comment)
1678
+ when /^attr_(reader|writer|accessor)$/, @options.extra_accessors
1679
+ parse_attr_accessor(container, single, tk, comment)
1680
+ when "alias_method"
1681
+ if container.document_self
1682
+ parse_alias(container, single, tk, comment)
1683
+ end
1684
+ end
1685
+ end
1686
+
1687
+ case tk.name
1688
+ when "require"
1689
+ parse_require(container, comment)
1690
+ when "include"
1691
+ parse_include(container, comment)
1692
+ end
1693
+
1694
+
1695
+ when TkEND
1696
+ nest -= 1
1697
+ puts "Found 'end' in #{container.name}, nest = #{nest}, line #{tk.line_no}" if $DEBUG_RDOC
1698
+ puts "Method = #{current_method.name}" if $DEBUG_RDOC and current_method
1699
+ if nest == 0
1700
+ read_documentation_modifiers container, RDoc::CLASS_MODIFIERS
1701
+ container.ongoing_visibility = save_visibility
1702
+ return
1703
+ end
1704
+
1705
+ end
1706
+
1707
+ comment = '' unless keep_comment
1708
+
1709
+ begin
1710
+ get_tkread
1711
+ skip_tkspace(false)
1712
+ end while peek_tk == TkNL
1713
+ end
1714
+ end
1715
+
1716
+ def parse_class(container, single, tk, comment, &block)
1717
+ progress("c")
1718
+
1719
+ @stats.num_classes += 1
1720
+
1721
+ container, name_t = get_class_or_module(container)
1722
+
1723
+ case name_t
1724
+ when TkCONSTANT
1725
+ name = name_t.name
1726
+ superclass = "Object"
1727
+
1728
+ if peek_tk.kind_of?(TkLT)
1729
+ get_tk
1730
+ skip_tkspace(true)
1731
+ superclass = get_class_specification
1732
+ superclass = "<unknown>" if superclass.empty?
1733
+ end
1734
+
1735
+ if single == SINGLE
1736
+ cls_type = RDoc::SingleClass
1737
+ else
1738
+ cls_type = RDoc::NormalClass
1739
+ end
1740
+
1741
+ cls = container.add_class cls_type, name, superclass
1742
+ read_documentation_modifiers cls, RDoc::CLASS_MODIFIERS
1743
+ cls.record_location(@top_level)
1744
+ parse_statements(cls)
1745
+ cls.comment = comment
1746
+
1747
+ when TkLSHFT
1748
+ case name = get_class_specification
1749
+ when "self", container.name
1750
+ parse_statements(container, SINGLE, &block)
1751
+ else
1752
+ other = RDoc::TopLevel.find_class_named(name)
1753
+ unless other
1754
+ # other = @top_level.add_class(NormalClass, name, nil)
1755
+ # other.record_location(@top_level)
1756
+ # other.comment = comment
1757
+ other = RDoc::NormalClass.new "Dummy", nil
1758
+ end
1759
+ read_documentation_modifiers other, RDoc::CLASS_MODIFIERS
1760
+ parse_statements(other, SINGLE, &block)
1761
+ end
1762
+
1763
+ else
1764
+ warn("Expected class name or '<<'. Got #{name_t.class}: #{name_t.text.inspect}")
1765
+ end
1766
+ end
1767
+
1768
+ def parse_module(container, single, tk, comment)
1769
+ progress("m")
1770
+ @stats.num_modules += 1
1771
+ container, name_t = get_class_or_module(container)
1772
+ # skip_tkspace
1773
+ name = name_t.name
1774
+ mod = container.add_module RDoc::NormalModule, name
1775
+ mod.record_location @top_level
1776
+ read_documentation_modifiers mod, RDoc::CLASS_MODIFIERS
1777
+ parse_statements(mod)
1778
+ mod.comment = comment
1779
+ end
1780
+
1781
+ # Look for the name of a class of module (optionally with a leading :: or
1782
+ # with :: separated named) and return the ultimate name and container
1783
+
1784
+ def get_class_or_module(container)
1785
+ skip_tkspace
1786
+ name_t = get_tk
1787
+
1788
+ # class ::A -> A is in the top level
1789
+ if name_t.kind_of?(TkCOLON2)
1790
+ name_t = get_tk
1791
+ container = @top_level
1792
+ end
1793
+
1794
+ skip_tkspace(false)
1795
+
1796
+ while peek_tk.kind_of?(TkCOLON2)
1797
+ prev_container = container
1798
+ container = container.find_module_named(name_t.name)
1799
+ if !container
1800
+ # warn("Couldn't find module #{name_t.name}")
1801
+ container = prev_container.add_module RDoc::NormalModule, name_t.name
1802
+ end
1803
+ get_tk
1804
+ name_t = get_tk
1805
+ end
1806
+ skip_tkspace(false)
1807
+ return [container, name_t]
1808
+ end
1809
+
1810
+ def parse_constant(container, single, tk, comment)
1811
+ name = tk.name
1812
+ skip_tkspace(false)
1813
+ eq_tk = get_tk
1814
+
1815
+ unless eq_tk.kind_of?(TkASSIGN)
1816
+ unget_tk(eq_tk)
1817
+ return
1818
+ end
1819
+
1820
+
1821
+ nest = 0
1822
+ get_tkread
1823
+
1824
+ tk = get_tk
1825
+ if tk.kind_of? TkGT
1826
+ unget_tk(tk)
1827
+ unget_tk(eq_tk)
1828
+ return
1829
+ end
1830
+
1831
+ loop do
1832
+ puts "Param: %p, %s %s %s" %
1833
+ [tk.text, @scanner.continue, @scanner.lex_state, nest] if $DEBUG_RDOC
1834
+
1835
+ case tk
1836
+ when TkSEMICOLON
1837
+ break
1838
+ when TkLPAREN, TkfLPAREN
1839
+ nest += 1
1840
+ when TkRPAREN
1841
+ nest -= 1
1842
+ when TkCOMMENT
1843
+ if nest <= 0 && @scanner.lex_state == EXPR_END
1844
+ unget_tk(tk)
1845
+ break
1846
+ end
1847
+ when TkNL
1848
+ if (@scanner.lex_state == EXPR_END and nest <= 0) || !@scanner.continue
1849
+ unget_tk(tk)
1850
+ break
1851
+ end
1852
+ end
1853
+ tk = get_tk
1854
+ end
1855
+
1856
+ res = get_tkread.tr("\n", " ").strip
1857
+ res = "" if res == ";"
1858
+
1859
+ con = RDoc::Constant.new name, res, comment
1860
+ read_documentation_modifiers con, RDoc::CONSTANT_MODIFIERS
1861
+
1862
+ if con.document_self
1863
+ container.add_constant(con)
1864
+ end
1865
+ end
1866
+
1867
+ def parse_method(container, single, tk, comment)
1868
+ progress(".")
1869
+ @stats.num_methods += 1
1870
+ line_no = tk.line_no
1871
+ column = tk.char_no
1872
+
1873
+ start_collecting_tokens
1874
+ add_token(tk)
1875
+ add_token_listener(self)
1876
+
1877
+ @scanner.instance_eval{@lex_state = EXPR_FNAME}
1878
+ skip_tkspace(false)
1879
+ name_t = get_tk
1880
+ back_tk = skip_tkspace
1881
+ meth = nil
1882
+ added_container = false
1883
+
1884
+ dot = get_tk
1885
+ if dot.kind_of?(TkDOT) or dot.kind_of?(TkCOLON2)
1886
+ @scanner.instance_eval{@lex_state = EXPR_FNAME}
1887
+ skip_tkspace
1888
+ name_t2 = get_tk
1889
+ case name_t
1890
+ when TkSELF
1891
+ name = name_t2.name
1892
+ when TkCONSTANT
1893
+ name = name_t2.name
1894
+ prev_container = container
1895
+ container = container.find_module_named(name_t.name)
1896
+ if !container
1897
+ added_container = true
1898
+ obj = name_t.name.split("::").inject(Object) do |state, item|
1899
+ state.const_get(item)
1900
+ end rescue nil
1901
+
1902
+ type = obj.class == Class ? RDoc::NormalClass : RDoc::NormalModule
1903
+ if not [Class, Module].include?(obj.class)
1904
+ warn("Couldn't find #{name_t.name}. Assuming it's a module")
1905
+ end
1906
+
1907
+ if type == RDoc::NormalClass then
1908
+ container = prev_container.add_class(type, name_t.name, obj.superclass.name)
1909
+ else
1910
+ container = prev_container.add_module(type, name_t.name)
1911
+ end
1912
+ end
1913
+ else
1914
+ # warn("Unexpected token '#{name_t2.inspect}'")
1915
+ # break
1916
+ skip_method(container)
1917
+ return
1918
+ end
1919
+ meth = RDoc::AnyMethod.new(get_tkread, name)
1920
+ meth.singleton = true
1921
+ else
1922
+ unget_tk dot
1923
+ back_tk.reverse_each do |token|
1924
+ unget_tk token
1925
+ end
1926
+ name = name_t.name
1927
+
1928
+ meth = RDoc::AnyMethod.new get_tkread, name
1929
+ meth.singleton = (single == SINGLE)
1930
+ end
1931
+
1932
+ remove_token_listener(self)
1933
+
1934
+ meth.start_collecting_tokens
1935
+ indent = TkSPACE.new(1,1)
1936
+ indent.set_text(" " * column)
1937
+
1938
+ meth.add_tokens([TkCOMMENT.new(line_no,
1939
+ 1,
1940
+ "# File #{@top_level.file_absolute_name}, line #{line_no}"),
1941
+ NEWLINE_TOKEN,
1942
+ indent])
1943
+
1944
+ meth.add_tokens(@token_stream)
1945
+
1946
+ add_token_listener(meth)
1947
+
1948
+ @scanner.instance_eval{@continue = false}
1949
+ parse_method_parameters(meth)
1950
+
1951
+ if meth.document_self
1952
+ container.add_method(meth)
1953
+ elsif added_container
1954
+ container.document_self = false
1955
+ end
1956
+
1957
+ # Having now read the method parameters and documentation modifiers, we
1958
+ # now know whether we have to rename #initialize to ::new
1959
+
1960
+ if name == "initialize" && !meth.singleton
1961
+ if meth.dont_rename_initialize
1962
+ meth.visibility = :protected
1963
+ else
1964
+ meth.singleton = true
1965
+ meth.name = "new"
1966
+ meth.visibility = :public
1967
+ end
1968
+ end
1969
+
1970
+ parse_statements(container, single, meth)
1971
+
1972
+ remove_token_listener(meth)
1973
+
1974
+ # Look for a 'call-seq' in the comment, and override the
1975
+ # normal parameter stuff
1976
+
1977
+ if comment.sub!(/:?call-seq:(.*?)^\s*\#?\s*$/m, '')
1978
+ seq = $1
1979
+ seq.gsub!(/^\s*\#\s*/, '')
1980
+ meth.call_seq = seq
1981
+ end
1982
+
1983
+ meth.comment = comment
1984
+ end
1985
+
1986
+ def skip_method(container)
1987
+ meth = RDoc::AnyMethod.new "", "anon"
1988
+ parse_method_parameters(meth)
1989
+ parse_statements(container, false, meth)
1990
+ end
1991
+
1992
+ # Capture the method's parameters. Along the way, look for a comment
1993
+ # containing.
1994
+ #
1995
+ # # yields: ....
1996
+ #
1997
+ # and add this as the block_params for the method
1998
+
1999
+ def parse_method_parameters(method)
2000
+ res = parse_method_or_yield_parameters(method)
2001
+ res = "(" + res + ")" unless res[0] == ?(
2002
+ method.params = res unless method.params
2003
+ if method.block_params.nil?
2004
+ skip_tkspace(false)
2005
+ read_documentation_modifiers method, RDoc::METHOD_MODIFIERS
2006
+ end
2007
+ end
2008
+
2009
+ def parse_method_or_yield_parameters(method = nil,
2010
+ modifiers = RDoc::METHOD_MODIFIERS)
2011
+ skip_tkspace(false)
2012
+ tk = get_tk
2013
+
2014
+ # Little hack going on here. In the statement
2015
+ # f = 2*(1+yield)
2016
+ # We see the RPAREN as the next token, so we need
2017
+ # to exit early. This still won't catch all cases
2018
+ # (such as "a = yield + 1"
2019
+ end_token = case tk
2020
+ when TkLPAREN, TkfLPAREN
2021
+ TkRPAREN
2022
+ when TkRPAREN
2023
+ return ""
2024
+ else
2025
+ TkNL
2026
+ end
2027
+ nest = 0
2028
+
2029
+ loop do
2030
+ puts "Param: %p, %s %s %s" %
2031
+ [tk.text, @scanner.continue, @scanner.lex_state, nest] if $DEBUG_RDOC
2032
+ case tk
2033
+ when TkSEMICOLON
2034
+ break
2035
+ when TkLBRACE
2036
+ nest += 1
2037
+ when TkRBRACE
2038
+ # we might have a.each {|i| yield i }
2039
+ unget_tk(tk) if nest.zero?
2040
+ nest -= 1
2041
+ break if nest <= 0
2042
+ when TkLPAREN, TkfLPAREN
2043
+ nest += 1
2044
+ when end_token
2045
+ if end_token == TkRPAREN
2046
+ nest -= 1
2047
+ break if @scanner.lex_state == EXPR_END and nest <= 0
2048
+ else
2049
+ break unless @scanner.continue
2050
+ end
2051
+ when method && method.block_params.nil? && TkCOMMENT
2052
+ unget_tk(tk)
2053
+ read_documentation_modifiers(method, modifiers)
2054
+ end
2055
+ tk = get_tk
2056
+ end
2057
+ res = get_tkread.tr("\n", " ").strip
2058
+ res = "" if res == ";"
2059
+ res
2060
+ end
2061
+
2062
+ # skip the var [in] part of a 'for' statement
2063
+ def skip_for_variable
2064
+ skip_tkspace(false)
2065
+ tk = get_tk
2066
+ skip_tkspace(false)
2067
+ tk = get_tk
2068
+ unget_tk(tk) unless tk.kind_of?(TkIN)
2069
+ end
2070
+
2071
+ # while, until, and for have an optional
2072
+ def skip_optional_do_after_expression
2073
+ skip_tkspace(false)
2074
+ tk = get_tk
2075
+ case tk
2076
+ when TkLPAREN, TkfLPAREN
2077
+ end_token = TkRPAREN
2078
+ else
2079
+ end_token = TkNL
2080
+ end
2081
+
2082
+ nest = 0
2083
+ @scanner.instance_eval{@continue = false}
2084
+
2085
+ loop do
2086
+ puts("\nWhile: #{tk.text.inspect}, #{@scanner.continue} " \
2087
+ "#{@scanner.lex_state} #{nest}") if $DEBUG_RDOC
2088
+ case tk
2089
+ when TkSEMICOLON
2090
+ break
2091
+ when TkLPAREN, TkfLPAREN
2092
+ nest += 1
2093
+ when TkDO
2094
+ break if nest.zero?
2095
+ when end_token
2096
+ if end_token == TkRPAREN
2097
+ nest -= 1
2098
+ break if @scanner.lex_state == EXPR_END and nest.zero?
2099
+ else
2100
+ break unless @scanner.continue
2101
+ end
2102
+ end
2103
+ tk = get_tk
2104
+ end
2105
+ skip_tkspace(false)
2106
+ if peek_tk.kind_of? TkDO
2107
+ get_tk
2108
+ end
2109
+ end
2110
+
2111
+ # Return a superclass, which can be either a constant
2112
+ # of an expression
2113
+
2114
+ def get_class_specification
2115
+ tk = get_tk
2116
+ return "self" if tk.kind_of?(TkSELF)
2117
+
2118
+ res = ""
2119
+ while tk.kind_of?(TkCOLON2) ||
2120
+ tk.kind_of?(TkCOLON3) ||
2121
+ tk.kind_of?(TkCONSTANT)
2122
+
2123
+ res += tk.text
2124
+ tk = get_tk
2125
+ end
2126
+
2127
+ unget_tk(tk)
2128
+ skip_tkspace(false)
2129
+
2130
+ get_tkread # empty out read buffer
2131
+
2132
+ tk = get_tk
2133
+
2134
+ case tk
2135
+ when TkNL, TkCOMMENT, TkSEMICOLON
2136
+ unget_tk(tk)
2137
+ return res
2138
+ end
2139
+
2140
+ res += parse_call_parameters(tk)
2141
+ res
2142
+ end
2143
+
2144
+ def parse_call_parameters(tk)
2145
+
2146
+ end_token = case tk
2147
+ when TkLPAREN, TkfLPAREN
2148
+ TkRPAREN
2149
+ when TkRPAREN
2150
+ return ""
2151
+ else
2152
+ TkNL
2153
+ end
2154
+ nest = 0
2155
+
2156
+ loop do
2157
+ puts("Call param: #{tk}, #{@scanner.continue} " +
2158
+ "#{@scanner.lex_state} #{nest}") if $DEBUG_RDOC
2159
+ case tk
2160
+ when TkSEMICOLON
2161
+ break
2162
+ when TkLPAREN, TkfLPAREN
2163
+ nest += 1
2164
+ when end_token
2165
+ if end_token == TkRPAREN
2166
+ nest -= 1
2167
+ break if @scanner.lex_state == EXPR_END and nest <= 0
2168
+ else
2169
+ break unless @scanner.continue
2170
+ end
2171
+ when TkCOMMENT
2172
+ unget_tk(tk)
2173
+ break
2174
+ end
2175
+ tk = get_tk
2176
+ end
2177
+ res = get_tkread.tr("\n", " ").strip
2178
+ res = "" if res == ";"
2179
+ res
2180
+ end
2181
+
2182
+ # Parse a constant, which might be qualified by
2183
+ # one or more class or module names
2184
+
2185
+ def get_constant
2186
+ res = ""
2187
+ skip_tkspace(false)
2188
+ tk = get_tk
2189
+
2190
+ while tk.kind_of?(TkCOLON2) ||
2191
+ tk.kind_of?(TkCOLON3) ||
2192
+ tk.kind_of?(TkCONSTANT)
2193
+
2194
+ res += tk.text
2195
+ tk = get_tk
2196
+ end
2197
+
2198
+ # if res.empty?
2199
+ # warn("Unexpected token #{tk} in constant")
2200
+ # end
2201
+ unget_tk(tk)
2202
+ res
2203
+ end
2204
+
2205
+ # Get a constant that may be surrounded by parens
2206
+
2207
+ def get_constant_with_optional_parens
2208
+ skip_tkspace(false)
2209
+ nest = 0
2210
+ while (tk = peek_tk).kind_of?(TkLPAREN) || tk.kind_of?(TkfLPAREN)
2211
+ get_tk
2212
+ skip_tkspace(true)
2213
+ nest += 1
2214
+ end
2215
+
2216
+ name = get_constant
2217
+
2218
+ while nest > 0
2219
+ skip_tkspace(true)
2220
+ tk = get_tk
2221
+ nest -= 1 if tk.kind_of?(TkRPAREN)
2222
+ end
2223
+ name
2224
+ end
2225
+
2226
+ # Directives are modifier comments that can appear after class, module,
2227
+ # or method names. For example:
2228
+ #
2229
+ # def fred # :yields: a, b
2230
+ #
2231
+ # or:
2232
+ #
2233
+ # class MyClass # :nodoc:
2234
+ #
2235
+ # We return the directive name and any parameters as a two element array
2236
+
2237
+ def read_directive(allowed)
2238
+ tk = get_tk
2239
+ puts "directive: #{tk.text.inspect}" if $DEBUG_RDOC
2240
+ result = nil
2241
+ if tk.kind_of?(TkCOMMENT)
2242
+ if tk.text =~ /\s*:?(\w+):\s*(.*)/
2243
+ directive = $1.downcase
2244
+ if allowed.include?(directive)
2245
+ result = [directive, $2]
2246
+ end
2247
+ end
2248
+ else
2249
+ unget_tk(tk)
2250
+ end
2251
+ result
2252
+ end
2253
+
2254
+ def read_documentation_modifiers(context, allow)
2255
+ dir = read_directive(allow)
2256
+
2257
+ case dir[0]
2258
+
2259
+ when "notnew", "not_new", "not-new"
2260
+ context.dont_rename_initialize = true
2261
+
2262
+ when "nodoc"
2263
+ context.document_self = false
2264
+ if dir[1].downcase == "all"
2265
+ context.document_children = false
2266
+ end
2267
+
2268
+ when "doc"
2269
+ context.document_self = true
2270
+ context.force_documentation = true
2271
+
2272
+ when "yield", "yields"
2273
+ unless context.params.nil?
2274
+ context.params.sub!(/(,|)\s*&\w+/,'') # remove parameter &proc
2275
+ end
2276
+ context.block_params = dir[1]
2277
+
2278
+ when "arg", "args"
2279
+ context.params = dir[1]
2280
+ end if dir
2281
+ end
2282
+
2283
+ ##
2284
+ # Look for directives in a normal comment block:
2285
+ #
2286
+ # #-- - don't display comment from this point forward
2287
+ #
2288
+ # This routine modifies it's parameter
2289
+
2290
+ def look_for_directives_in(context, comment)
2291
+ preprocess = RDoc::Markup::PreProcess.new(@input_file_name,
2292
+ @options.rdoc_include)
2293
+
2294
+ preprocess.handle(comment) do |directive, param|
2295
+ case directive
2296
+ when "stopdoc"
2297
+ context.stop_doc
2298
+ ""
2299
+ when "startdoc"
2300
+ context.start_doc
2301
+ context.force_documentation = true
2302
+ ""
2303
+
2304
+ when "enddoc"
2305
+ #context.done_documenting = true
2306
+ #""
2307
+ throw :enddoc
2308
+
2309
+ when "main"
2310
+ @options.main_page = param
2311
+ ""
2312
+
2313
+ when "title"
2314
+ @options.title = param
2315
+ ""
2316
+
2317
+ when "section"
2318
+ context.set_current_section(param, comment)
2319
+ comment.replace ''
2320
+ break
2321
+
2322
+ else
2323
+ warn "Unrecognized directive '#{directive}'"
2324
+ break
2325
+ end
2326
+ end
2327
+
2328
+ remove_private_comments(comment)
2329
+ end
2330
+
2331
+ def remove_private_comments(comment)
2332
+ comment.gsub!(/^#--.*?^#\+\+/m, '')
2333
+ comment.sub!(/^#--.*/m, '')
2334
+ end
2335
+
2336
+ def get_symbol_or_name
2337
+ tk = get_tk
2338
+ case tk
2339
+ when TkSYMBOL
2340
+ tk.text.sub(/^:/, '')
2341
+ when TkId, TkOp
2342
+ tk.name
2343
+ when TkSTRING
2344
+ tk.text
2345
+ else
2346
+ raise "Name or symbol expected (got #{tk})"
2347
+ end
2348
+ end
2349
+
2350
+ def parse_alias(context, single, tk, comment)
2351
+ skip_tkspace
2352
+ if (peek_tk.kind_of? TkLPAREN)
2353
+ get_tk
2354
+ skip_tkspace
2355
+ end
2356
+ new_name = get_symbol_or_name
2357
+ @scanner.instance_eval{@lex_state = EXPR_FNAME}
2358
+ skip_tkspace
2359
+ if (peek_tk.kind_of? TkCOMMA)
2360
+ get_tk
2361
+ skip_tkspace
2362
+ end
2363
+ old_name = get_symbol_or_name
2364
+
2365
+ al = RDoc::Alias.new get_tkread, old_name, new_name, comment
2366
+ read_documentation_modifiers al, RDoc::ATTR_MODIFIERS
2367
+ if al.document_self
2368
+ context.add_alias(al)
2369
+ end
2370
+ end
2371
+
2372
+ def parse_yield_parameters
2373
+ parse_method_or_yield_parameters
2374
+ end
2375
+
2376
+ def parse_yield(context, single, tk, method)
2377
+ if method.block_params.nil?
2378
+ get_tkread
2379
+ @scanner.instance_eval{@continue = false}
2380
+ method.block_params = parse_yield_parameters
2381
+ end
2382
+ end
2383
+
2384
+ def parse_require(context, comment)
2385
+ skip_tkspace_comment
2386
+ tk = get_tk
2387
+ if tk.kind_of? TkLPAREN
2388
+ skip_tkspace_comment
2389
+ tk = get_tk
2390
+ end
2391
+
2392
+ name = nil
2393
+ case tk
2394
+ when TkSTRING
2395
+ name = tk.text
2396
+ # when TkCONSTANT, TkIDENTIFIER, TkIVAR, TkGVAR
2397
+ # name = tk.name
2398
+ when TkDSTRING
2399
+ warn "Skipping require of dynamic string: #{tk.text}"
2400
+ # else
2401
+ # warn "'require' used as variable"
2402
+ end
2403
+ if name
2404
+ context.add_require(RDoc::Require.new(name, comment))
2405
+ else
2406
+ unget_tk(tk)
2407
+ end
2408
+ end
2409
+
2410
+ def parse_include(context, comment)
2411
+ loop do
2412
+ skip_tkspace_comment
2413
+ name = get_constant_with_optional_parens
2414
+ unless name.empty?
2415
+ context.add_include RDoc::Include.new(name, comment)
2416
+ end
2417
+ return unless peek_tk.kind_of?(TkCOMMA)
2418
+ get_tk
2419
+ end
2420
+ end
2421
+
2422
+ def get_bool
2423
+ skip_tkspace
2424
+ tk = get_tk
2425
+ case tk
2426
+ when TkTRUE
2427
+ true
2428
+ when TkFALSE, TkNIL
2429
+ false
2430
+ else
2431
+ unget_tk tk
2432
+ true
2433
+ end
2434
+ end
2435
+
2436
+ def parse_attr(context, single, tk, comment)
2437
+ args = parse_symbol_arg(1)
2438
+ if args.size > 0
2439
+ name = args[0]
2440
+ rw = "R"
2441
+ skip_tkspace(false)
2442
+ tk = get_tk
2443
+ if tk.kind_of? TkCOMMA
2444
+ rw = "RW" if get_bool
2445
+ else
2446
+ unget_tk tk
2447
+ end
2448
+ att = RDoc::Attr.new get_tkread, name, rw, comment
2449
+ read_documentation_modifiers att, RDoc::ATTR_MODIFIERS
2450
+ if att.document_self
2451
+ context.add_attribute(att)
2452
+ end
2453
+ else
2454
+ warn("'attr' ignored - looks like a variable")
2455
+ end
2456
+ end
2457
+
2458
+ def parse_visibility(container, single, tk)
2459
+ singleton = (single == SINGLE)
2460
+ vis = case tk.name
2461
+ when "private" then :private
2462
+ when "protected" then :protected
2463
+ when "public" then :public
2464
+ when "private_class_method"
2465
+ singleton = true
2466
+ :private
2467
+ when "public_class_method"
2468
+ singleton = true
2469
+ :public
2470
+ else raise "Invalid visibility: #{tk.name}"
2471
+ end
2472
+
2473
+ skip_tkspace_comment(false)
2474
+ case peek_tk
2475
+ # Ryan Davis suggested the extension to ignore modifiers, because he
2476
+ # often writes
2477
+ #
2478
+ # protected unless $TESTING
2479
+ #
2480
+ when TkNL, TkUNLESS_MOD, TkIF_MOD
2481
+ # error("Missing argument") if singleton
2482
+ container.ongoing_visibility = vis
2483
+ else
2484
+ args = parse_symbol_arg
2485
+ container.set_visibility_for(args, vis, singleton)
2486
+ end
2487
+ end
2488
+
2489
+ def parse_attr_accessor(context, single, tk, comment)
2490
+ args = parse_symbol_arg
2491
+ read = get_tkread
2492
+ rw = "?"
2493
+
2494
+ # If nodoc is given, don't document any of them
2495
+
2496
+ tmp = RDoc::CodeObject.new
2497
+ read_documentation_modifiers tmp, RDoc::ATTR_MODIFIERS
2498
+ return unless tmp.document_self
2499
+
2500
+ case tk.name
2501
+ when "attr_reader" then rw = "R"
2502
+ when "attr_writer" then rw = "W"
2503
+ when "attr_accessor" then rw = "RW"
2504
+ else
2505
+ rw = @options.extra_accessor_flags[tk.name]
2506
+ end
2507
+
2508
+ for name in args
2509
+ att = RDoc::Attr.new get_tkread, name, rw, comment
2510
+ context.add_attribute att
2511
+ end
2512
+ end
2513
+
2514
+ def skip_tkspace_comment(skip_nl = true)
2515
+ loop do
2516
+ skip_tkspace(skip_nl)
2517
+ return unless peek_tk.kind_of? TkCOMMENT
2518
+ get_tk
2519
+ end
2520
+ end
2521
+
2522
+ def parse_symbol_arg(no = nil)
2523
+ args = []
2524
+ skip_tkspace_comment
2525
+ case tk = get_tk
2526
+ when TkLPAREN
2527
+ loop do
2528
+ skip_tkspace_comment
2529
+ if tk1 = parse_symbol_in_arg
2530
+ args.push tk1
2531
+ break if no and args.size >= no
2532
+ end
2533
+
2534
+ skip_tkspace_comment
2535
+ case tk2 = get_tk
2536
+ when TkRPAREN
2537
+ break
2538
+ when TkCOMMA
2539
+ else
2540
+ warn("unexpected token: '#{tk2.inspect}'") if $DEBUG_RDOC
2541
+ break
2542
+ end
2543
+ end
2544
+ else
2545
+ unget_tk tk
2546
+ if tk = parse_symbol_in_arg
2547
+ args.push tk
2548
+ return args if no and args.size >= no
2549
+ end
2550
+
2551
+ loop do
2552
+ # skip_tkspace_comment(false)
2553
+ skip_tkspace(false)
2554
+
2555
+ tk1 = get_tk
2556
+ unless tk1.kind_of?(TkCOMMA)
2557
+ unget_tk tk1
2558
+ break
2559
+ end
2560
+
2561
+ skip_tkspace_comment
2562
+ if tk = parse_symbol_in_arg
2563
+ args.push tk
2564
+ break if no and args.size >= no
2565
+ end
2566
+ end
2567
+ end
2568
+ args
2569
+ end
2570
+
2571
+ def parse_symbol_in_arg
2572
+ case tk = get_tk
2573
+ when TkSYMBOL
2574
+ tk.text.sub(/^:/, '')
2575
+ when TkSTRING
2576
+ eval @read[-1]
2577
+ else
2578
+ warn("Expected symbol or string, got #{tk.inspect}") if $DEBUG_RDOC
2579
+ nil
2580
+ end
2581
+ end
2582
+
2583
+ end
2584
+