rdoc 3.12.2 → 4.0.0.preview2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rdoc might be problematic. Click here for more details.

Files changed (200) hide show
  1. checksums.yaml +6 -6
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/.autotest +3 -2
  5. data/DEVELOPERS.rdoc +53 -0
  6. data/History.rdoc +159 -25
  7. data/LEGAL.rdoc +12 -0
  8. data/Manifest.txt +56 -3
  9. data/README.rdoc +87 -19
  10. data/Rakefile +11 -2
  11. data/TODO.rdoc +20 -13
  12. data/bin/rdoc +4 -0
  13. data/lib/gauntlet_rdoc.rb +1 -1
  14. data/lib/rdoc.rb +32 -71
  15. data/lib/rdoc/any_method.rb +75 -21
  16. data/lib/rdoc/attr.rb +49 -10
  17. data/lib/rdoc/class_module.rb +182 -32
  18. data/lib/rdoc/code_object.rb +54 -12
  19. data/lib/rdoc/comment.rb +8 -1
  20. data/lib/rdoc/constant.rb +100 -6
  21. data/lib/rdoc/context.rb +93 -41
  22. data/lib/rdoc/context/section.rb +143 -28
  23. data/lib/rdoc/cross_reference.rb +58 -50
  24. data/lib/rdoc/encoding.rb +34 -29
  25. data/lib/rdoc/erb_partial.rb +18 -0
  26. data/lib/rdoc/extend.rb +117 -0
  27. data/lib/rdoc/generator.rb +11 -6
  28. data/lib/rdoc/generator/darkfish.rb +250 -62
  29. data/lib/rdoc/generator/json_index.rb +20 -12
  30. data/lib/rdoc/generator/markup.rb +10 -12
  31. data/lib/rdoc/generator/ri.rb +7 -60
  32. data/lib/rdoc/generator/template/darkfish/_head.rhtml +7 -7
  33. data/lib/rdoc/generator/template/darkfish/_sidebar_extends.rhtml +16 -0
  34. data/lib/rdoc/generator/template/darkfish/_sidebar_in_files.rhtml +1 -1
  35. data/lib/rdoc/generator/template/darkfish/_sidebar_installed.rhtml +14 -0
  36. data/lib/rdoc/generator/template/darkfish/_sidebar_methods.rhtml +1 -1
  37. data/lib/rdoc/generator/template/darkfish/_sidebar_table_of_contents.rhtml +13 -0
  38. data/lib/rdoc/generator/template/darkfish/class.rhtml +15 -1
  39. data/lib/rdoc/generator/template/darkfish/images/arrow_up.png +0 -0
  40. data/lib/rdoc/generator/template/darkfish/index.rhtml +3 -3
  41. data/lib/rdoc/generator/template/darkfish/js/darkfish.js +7 -9
  42. data/lib/rdoc/generator/template/darkfish/page.rhtml +2 -0
  43. data/lib/rdoc/generator/template/darkfish/rdoc.css +31 -0
  44. data/lib/rdoc/generator/template/darkfish/servlet_not_found.rhtml +18 -0
  45. data/lib/rdoc/generator/template/darkfish/servlet_root.rhtml +37 -0
  46. data/lib/rdoc/generator/template/darkfish/table_of_contents.rhtml +3 -3
  47. data/lib/rdoc/include.rb +12 -3
  48. data/lib/rdoc/markdown.kpeg +1186 -0
  49. data/lib/rdoc/markdown.rb +16336 -0
  50. data/lib/rdoc/markdown/entities.rb +2128 -0
  51. data/lib/rdoc/markdown/literals_1_8.kpeg +18 -0
  52. data/lib/rdoc/markdown/literals_1_8.rb +454 -0
  53. data/lib/rdoc/markdown/literals_1_9.kpeg +22 -0
  54. data/lib/rdoc/markdown/literals_1_9.rb +417 -0
  55. data/lib/rdoc/markup.rb +69 -10
  56. data/lib/rdoc/markup/attr_changer.rb +2 -5
  57. data/lib/rdoc/markup/attribute_manager.rb +23 -14
  58. data/lib/rdoc/markup/attributes.rb +70 -0
  59. data/lib/rdoc/markup/block_quote.rb +14 -0
  60. data/lib/rdoc/markup/document.rb +20 -4
  61. data/lib/rdoc/markup/formatter.rb +17 -6
  62. data/lib/rdoc/markup/formatter_test_case.rb +93 -24
  63. data/lib/rdoc/markup/hard_break.rb +31 -0
  64. data/lib/rdoc/markup/heading.rb +1 -1
  65. data/lib/rdoc/markup/indented_paragraph.rb +14 -0
  66. data/lib/rdoc/markup/list.rb +23 -4
  67. data/lib/rdoc/markup/list_item.rb +17 -4
  68. data/lib/rdoc/markup/paragraph.rb +14 -0
  69. data/lib/rdoc/markup/parser.rb +107 -60
  70. data/lib/rdoc/markup/raw.rb +4 -4
  71. data/lib/rdoc/markup/special.rb +3 -3
  72. data/lib/rdoc/markup/to_ansi.rb +7 -1
  73. data/lib/rdoc/markup/to_html.rb +42 -14
  74. data/lib/rdoc/markup/to_html_crossref.rb +10 -9
  75. data/lib/rdoc/markup/to_html_snippet.rb +20 -4
  76. data/lib/rdoc/markup/to_joined_paragraph.rb +68 -0
  77. data/lib/rdoc/markup/to_label.rb +20 -1
  78. data/lib/rdoc/markup/to_markdown.rb +134 -0
  79. data/lib/rdoc/markup/to_rdoc.rb +36 -5
  80. data/lib/rdoc/markup/to_table_of_contents.rb +6 -1
  81. data/lib/rdoc/markup/to_tt_only.rb +11 -2
  82. data/lib/rdoc/markup/verbatim.rb +19 -0
  83. data/lib/rdoc/method_attr.rb +33 -19
  84. data/lib/rdoc/normal_class.rb +26 -7
  85. data/lib/rdoc/normal_module.rb +10 -5
  86. data/lib/rdoc/options.rb +95 -21
  87. data/lib/rdoc/parser.rb +6 -2
  88. data/lib/rdoc/parser/c.rb +212 -97
  89. data/lib/rdoc/parser/markdown.rb +23 -0
  90. data/lib/rdoc/parser/ruby.rb +115 -35
  91. data/lib/rdoc/parser/ruby_tools.rb +8 -3
  92. data/lib/rdoc/rd.rb +8 -4
  93. data/lib/rdoc/rd/block_parser.rb +1 -1
  94. data/lib/rdoc/rd/block_parser.ry +1 -1
  95. data/lib/rdoc/rdoc.rb +45 -21
  96. data/lib/rdoc/ri/driver.rb +322 -76
  97. data/lib/rdoc/ri/paths.rb +90 -31
  98. data/lib/rdoc/ri/store.rb +2 -353
  99. data/lib/rdoc/ruby_lex.rb +5 -21
  100. data/lib/rdoc/ruby_token.rb +2 -3
  101. data/lib/rdoc/rubygems_hook.rb +21 -9
  102. data/lib/rdoc/servlet.rb +302 -0
  103. data/lib/rdoc/stats.rb +28 -20
  104. data/lib/rdoc/store.rb +881 -0
  105. data/lib/rdoc/task.rb +2 -1
  106. data/lib/rdoc/test_case.rb +103 -1
  107. data/lib/rdoc/text.rb +5 -4
  108. data/lib/rdoc/tom_doc.rb +17 -16
  109. data/lib/rdoc/top_level.rb +43 -285
  110. data/test/MarkdownTest_1.0.3/Amps and angle encoding.text +21 -0
  111. data/test/MarkdownTest_1.0.3/Auto links.text +13 -0
  112. data/test/MarkdownTest_1.0.3/Backslash escapes.text +120 -0
  113. data/test/MarkdownTest_1.0.3/Blockquotes with code blocks.text +11 -0
  114. data/test/MarkdownTest_1.0.3/Code Blocks.text +14 -0
  115. data/test/MarkdownTest_1.0.3/Code Spans.text +6 -0
  116. data/test/MarkdownTest_1.0.3/Hard-wrapped paragraphs with list-like lines.text +8 -0
  117. data/test/MarkdownTest_1.0.3/Horizontal rules.text +67 -0
  118. data/test/MarkdownTest_1.0.3/Inline HTML (Advanced).text +15 -0
  119. data/test/MarkdownTest_1.0.3/Inline HTML (Simple).text +69 -0
  120. data/test/MarkdownTest_1.0.3/Inline HTML comments.text +13 -0
  121. data/test/MarkdownTest_1.0.3/Links, inline style.text +12 -0
  122. data/test/MarkdownTest_1.0.3/Links, reference style.text +71 -0
  123. data/test/MarkdownTest_1.0.3/Links, shortcut references.text +20 -0
  124. data/test/MarkdownTest_1.0.3/Literal quotes in titles.text +7 -0
  125. data/test/MarkdownTest_1.0.3/Markdown Documentation - Basics.text +306 -0
  126. data/test/MarkdownTest_1.0.3/Markdown Documentation - Syntax.text +888 -0
  127. data/test/MarkdownTest_1.0.3/Nested blockquotes.text +5 -0
  128. data/test/MarkdownTest_1.0.3/Ordered and unordered lists.text +131 -0
  129. data/test/MarkdownTest_1.0.3/Strong and em together.text +7 -0
  130. data/test/MarkdownTest_1.0.3/Tabs.text +21 -0
  131. data/test/MarkdownTest_1.0.3/Tidyness.text +5 -0
  132. data/test/test_attribute_manager.rb +7 -4
  133. data/test/test_rdoc_any_method.rb +84 -13
  134. data/test/test_rdoc_attr.rb +59 -9
  135. data/test/test_rdoc_class_module.rb +670 -73
  136. data/test/test_rdoc_code_object.rb +21 -1
  137. data/test/test_rdoc_comment.rb +1 -1
  138. data/test/test_rdoc_constant.rb +132 -0
  139. data/test/test_rdoc_context.rb +84 -18
  140. data/test/test_rdoc_context_section.rb +99 -15
  141. data/test/test_rdoc_cross_reference.rb +1 -1
  142. data/test/test_rdoc_encoding.rb +17 -1
  143. data/test/test_rdoc_extend.rb +94 -0
  144. data/test/test_rdoc_generator_darkfish.rb +45 -19
  145. data/test/test_rdoc_generator_json_index.rb +27 -7
  146. data/test/test_rdoc_generator_markup.rb +3 -3
  147. data/test/test_rdoc_generator_ri.rb +11 -9
  148. data/test/test_rdoc_include.rb +12 -0
  149. data/test/test_rdoc_markdown.rb +977 -0
  150. data/test/test_rdoc_markdown_test.rb +1891 -0
  151. data/test/test_rdoc_markup.rb +1 -1
  152. data/test/test_rdoc_markup_attribute_manager.rb +2 -2
  153. data/test/test_rdoc_markup_attributes.rb +39 -0
  154. data/test/test_rdoc_markup_document.rb +16 -1
  155. data/test/test_rdoc_markup_formatter.rb +7 -4
  156. data/test/test_rdoc_markup_hard_break.rb +31 -0
  157. data/test/test_rdoc_markup_indented_paragraph.rb +14 -0
  158. data/test/test_rdoc_markup_paragraph.rb +15 -1
  159. data/test/test_rdoc_markup_parser.rb +152 -89
  160. data/test/test_rdoc_markup_to_ansi.rb +23 -2
  161. data/test/test_rdoc_markup_to_bs.rb +24 -0
  162. data/test/test_rdoc_markup_to_html.rb +50 -19
  163. data/test/test_rdoc_markup_to_html_crossref.rb +23 -5
  164. data/test/test_rdoc_markup_to_html_snippet.rb +49 -8
  165. data/test/test_rdoc_markup_to_joined_paragraph.rb +32 -0
  166. data/test/test_rdoc_markup_to_label.rb +63 -1
  167. data/test/test_rdoc_markup_to_markdown.rb +352 -0
  168. data/test/test_rdoc_markup_to_rdoc.rb +22 -2
  169. data/test/test_rdoc_markup_to_table_of_contents.rb +44 -39
  170. data/test/test_rdoc_markup_to_tt_only.rb +20 -0
  171. data/test/test_rdoc_markup_verbatim.rb +13 -0
  172. data/test/test_rdoc_method_attr.rb +5 -0
  173. data/test/test_rdoc_normal_class.rb +24 -5
  174. data/test/test_rdoc_normal_module.rb +1 -1
  175. data/test/test_rdoc_options.rb +21 -6
  176. data/test/test_rdoc_parser.rb +24 -0
  177. data/test/test_rdoc_parser_c.rb +151 -26
  178. data/test/test_rdoc_parser_markdown.rb +55 -0
  179. data/test/test_rdoc_parser_rd.rb +2 -2
  180. data/test/test_rdoc_parser_ruby.rb +468 -109
  181. data/test/test_rdoc_parser_simple.rb +2 -2
  182. data/test/test_rdoc_rd_block_parser.rb +0 -4
  183. data/test/test_rdoc_rdoc.rb +110 -22
  184. data/test/test_rdoc_ri_driver.rb +415 -80
  185. data/test/test_rdoc_ri_paths.rb +122 -13
  186. data/test/test_rdoc_ruby_lex.rb +5 -61
  187. data/test/test_rdoc_ruby_token.rb +19 -0
  188. data/test/test_rdoc_rubygems_hook.rb +64 -43
  189. data/test/test_rdoc_servlet.rb +429 -0
  190. data/test/test_rdoc_stats.rb +83 -24
  191. data/test/{test_rdoc_ri_store.rb → test_rdoc_store.rb} +395 -22
  192. data/test/test_rdoc_task.rb +2 -2
  193. data/test/test_rdoc_text.rb +37 -11
  194. data/test/test_rdoc_tom_doc.rb +59 -62
  195. data/test/test_rdoc_top_level.rb +71 -113
  196. data/test/xref_test_case.rb +7 -9
  197. metadata +122 -39
  198. metadata.gz.sig +0 -0
  199. data/CVE-2013-0256.rdoc +0 -49
  200. data/lib/rdoc/markup/attribute.rb +0 -51
@@ -0,0 +1,31 @@
1
+ ##
2
+ # A hard-break in the middle of a paragraph.
3
+
4
+ class RDoc::Markup::HardBreak
5
+
6
+ @instance = new
7
+
8
+ ##
9
+ # RDoc::Markup::HardBreak is a singleton
10
+
11
+ def self.new
12
+ @instance
13
+ end
14
+
15
+ ##
16
+ # Calls #accept_hard_break on +visitor+
17
+
18
+ def accept visitor
19
+ visitor.accept_hard_break self
20
+ end
21
+
22
+ def == other # :nodoc:
23
+ self.class === other
24
+ end
25
+
26
+ def pretty_print q # :nodoc:
27
+ q.text "[break]"
28
+ end
29
+
30
+ end
31
+
@@ -23,7 +23,7 @@ class RDoc::Markup::Heading < Struct.new :level, :text
23
23
  markup = RDoc::Markup.new
24
24
  markup.add_special RDoc::CrossReference::CROSSREF_REGEXP, :CROSSREF
25
25
 
26
- @to_html = RDoc::Markup::ToHtml.new
26
+ @to_html = RDoc::Markup::ToHtml.new nil
27
27
 
28
28
  def @to_html.handle_special_CROSSREF special
29
29
  special.text.sub(/^\\/, '')
@@ -29,5 +29,19 @@ class RDoc::Markup::IndentedParagraph < RDoc::Markup::Raw
29
29
  visitor.accept_indented_paragraph self
30
30
  end
31
31
 
32
+ ##
33
+ # Joins the raw paragraph text and converts inline HardBreaks to the
34
+ # +hard_break+ text followed by the indent.
35
+
36
+ def text hard_break = nil
37
+ @parts.map do |part|
38
+ if RDoc::Markup::HardBreak === part then
39
+ '%1$s%3$*2$s' % [hard_break, @indent, ' '] if hard_break
40
+ else
41
+ part
42
+ end
43
+ end.join
44
+ end
45
+
32
46
  end
33
47
 
@@ -1,5 +1,24 @@
1
1
  ##
2
- # A List of ListItems
2
+ # A List is a homogeneous set of ListItems.
3
+ #
4
+ # The supported list types include:
5
+ #
6
+ # :BULLET::
7
+ # An unordered list
8
+ # :LABEL::
9
+ # An unordered definition list, but using an alternate RDoc::Markup syntax
10
+ # :LALPHA::
11
+ # An ordered list using increasing lowercase English letters
12
+ # :NOTE::
13
+ # An unordered definition list
14
+ # :NUMBER::
15
+ # An ordered list using increasing Arabic numerals
16
+ # :UALPHA::
17
+ # An ordered list using increasing uppercase English letters
18
+ #
19
+ # Definition lists behave like HTML definition lists. Each list item can
20
+ # describe multiple terms. See RDoc::Markup::ListItem for how labels and
21
+ # definition are stored as list items.
3
22
 
4
23
  class RDoc::Markup::List
5
24
 
@@ -15,12 +34,12 @@ class RDoc::Markup::List
15
34
 
16
35
  ##
17
36
  # Creates a new list of +type+ with +items+. Valid list types are:
18
- # +:BULLET+, +:LABEL+, +:LALPHA+, +:NOTE+, +:NUMBER+, +:UALPHA+
37
+ # +:BULLET+, +:LABEL+, +:LALPHA+, +:NOTE+, +:NUMBER+, +:UALPHA+
19
38
 
20
39
  def initialize type = nil, *items
21
40
  @type = type
22
41
  @items = []
23
- @items.push(*items)
42
+ @items.concat items
24
43
  end
25
44
 
26
45
  ##
@@ -75,7 +94,7 @@ class RDoc::Markup::List
75
94
  # Appends +items+ to the list
76
95
 
77
96
  def push *items
78
- @items.push(*items)
97
+ @items.concat items
79
98
  end
80
99
 
81
100
  end
@@ -1,5 +1,12 @@
1
1
  ##
2
2
  # An item within a List that contains paragraphs, headings, etc.
3
+ #
4
+ # For BULLET, NUMBER, LALPHA and UALPHA lists, the label will always be nil.
5
+ # For NOTE and LABEL lists, the list label may contain:
6
+ #
7
+ # * a single String for a single label
8
+ # * an Array of Strings for a list item with multiple terms
9
+ # * nil for an extra description attached to a previously labeled list item
3
10
 
4
11
  class RDoc::Markup::ListItem
5
12
 
@@ -19,7 +26,7 @@ class RDoc::Markup::ListItem
19
26
  def initialize label = nil, *parts
20
27
  @label = label
21
28
  @parts = []
22
- @parts.push(*parts)
29
+ @parts.concat parts
23
30
  end
24
31
 
25
32
  ##
@@ -64,8 +71,14 @@ class RDoc::Markup::ListItem
64
71
 
65
72
  def pretty_print q # :nodoc:
66
73
  q.group 2, '[item: ', ']' do
67
- if @label then
68
- q.text @label
74
+ case @label
75
+ when Array then
76
+ q.pp @label
77
+ q.text ';'
78
+ q.breakable
79
+ when String then
80
+ q.pp @label
81
+ q.text ';'
69
82
  q.breakable
70
83
  end
71
84
 
@@ -79,7 +92,7 @@ class RDoc::Markup::ListItem
79
92
  # Adds +parts+ to the ListItem
80
93
 
81
94
  def push *parts
82
- @parts.push(*parts)
95
+ @parts.concat parts
83
96
  end
84
97
 
85
98
  end
@@ -10,5 +10,19 @@ class RDoc::Markup::Paragraph < RDoc::Markup::Raw
10
10
  visitor.accept_paragraph self
11
11
  end
12
12
 
13
+ ##
14
+ # Joins the raw paragraph text and converts inline HardBreaks to the
15
+ # +hard_break+ text.
16
+
17
+ def text hard_break = ''
18
+ @parts.map do |part|
19
+ if RDoc::Markup::HardBreak === part then
20
+ hard_break
21
+ else
22
+ part
23
+ end
24
+ end.join
25
+ end
26
+
13
27
  end
14
28
 
@@ -75,12 +75,17 @@ class RDoc::Markup::Parser
75
75
  # Creates a new Parser. See also ::parse
76
76
 
77
77
  def initialize
78
- @tokens = []
79
- @current_token = nil
80
- @debug = false
81
-
82
- @line = 0
83
- @line_pos = 0
78
+ @binary_input = nil
79
+ @current_token = nil
80
+ @debug = false
81
+ @have_encoding = Object.const_defined? :Encoding
82
+ @have_byteslice = ''.respond_to? :byteslice
83
+ @input = nil
84
+ @input_encoding = nil
85
+ @line = 0
86
+ @line_pos = 0
87
+ @s = nil
88
+ @tokens = []
84
89
  end
85
90
 
86
91
  ##
@@ -108,13 +113,13 @@ class RDoc::Markup::Parser
108
113
  p :list_start => margin if @debug
109
114
 
110
115
  list = RDoc::Markup::List.new
116
+ label = nil
111
117
 
112
118
  until @tokens.empty? do
113
119
  type, data, column, = get
114
120
 
115
121
  case type
116
- when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
117
-
122
+ when *LIST_TOKENS then
118
123
  if column < margin || (list.type && list.type != type) then
119
124
  unget
120
125
  break
@@ -125,6 +130,8 @@ class RDoc::Markup::Parser
125
130
 
126
131
  case type
127
132
  when :NOTE, :LABEL then
133
+ label = [] unless label
134
+
128
135
  if peek_type == :NEWLINE then
129
136
  # description not on the same line as LABEL/NOTE
130
137
  # skip the trailing newline & any blank lines below
@@ -147,32 +154,35 @@ class RDoc::Markup::Parser
147
154
  # In all cases, we have an empty description.
148
155
  # In the last case only, we continue.
149
156
  if peek_type.nil? || column < margin then
150
- empty = 1
157
+ empty = true
151
158
  elsif column == margin then
152
159
  case peek_type
153
160
  when type
154
- empty = 2 # continue
161
+ empty = :continue
155
162
  when *LIST_TOKENS
156
- empty = 1
163
+ empty = true
157
164
  else
158
- empty = 0
165
+ empty = false
159
166
  end
160
167
  else
161
- empty = 0
168
+ empty = false
162
169
  end
163
170
 
164
- if empty > 0 then
165
- item = RDoc::Markup::ListItem.new(data)
166
- item << RDoc::Markup::BlankLine.new
167
- list << item
168
- break if empty == 1
169
- next
171
+ if empty then
172
+ label << data
173
+ next if empty == :continue
174
+ break
170
175
  end
171
176
  end
172
177
  else
173
178
  data = nil
174
179
  end
175
180
 
181
+ if label then
182
+ data = label << data
183
+ label = nil
184
+ end
185
+
176
186
  list_item = RDoc::Markup::ListItem.new data
177
187
  parse list_item, column
178
188
  list << list_item
@@ -185,7 +195,13 @@ class RDoc::Markup::Parser
185
195
 
186
196
  p :list_end => margin if @debug
187
197
 
188
- return nil if list.empty?
198
+ if list.empty? then
199
+ return nil unless label
200
+ return nil unless [:LABEL, :NOTE].include? list.type
201
+
202
+ list_item = RDoc::Markup::ListItem.new label, RDoc::Markup::BlankLine.new
203
+ list << list_item
204
+ end
189
205
 
190
206
  list
191
207
  end
@@ -201,20 +217,20 @@ class RDoc::Markup::Parser
201
217
  until @tokens.empty? do
202
218
  type, data, column, = get
203
219
 
204
- if type == :TEXT && column == margin then
220
+ if type == :TEXT and column == margin then
205
221
  paragraph << data
206
222
 
207
- if peek_token[0] == :BREAK then
208
- break
209
- end
223
+ break if peek_token.first == :BREAK
210
224
 
211
- skip :NEWLINE
225
+ data << ' ' if skip :NEWLINE
212
226
  else
213
227
  unget
214
228
  break
215
229
  end
216
230
  end
217
231
 
232
+ paragraph.parts.last.sub!(/ \z/, '') # cleanup
233
+
218
234
  p :paragraph_end => margin if @debug
219
235
 
220
236
  paragraph
@@ -303,6 +319,21 @@ class RDoc::Markup::Parser
303
319
  verbatim
304
320
  end
305
321
 
322
+ ##
323
+ # The character offset for the input string at the given +byte_offset+
324
+
325
+ def char_pos byte_offset
326
+ if @have_byteslice then
327
+ @input.byteslice(0, byte_offset).length
328
+ elsif @have_encoding then
329
+ matched = @binary_input[0, byte_offset]
330
+ matched.force_encoding @input_encoding
331
+ matched.length
332
+ else
333
+ byte_offset
334
+ end
335
+ end
336
+
306
337
  ##
307
338
  # Pulls the next token from the stream.
308
339
 
@@ -383,6 +414,22 @@ class RDoc::Markup::Parser
383
414
  token
384
415
  end
385
416
 
417
+ ##
418
+ # Creates the StringScanner
419
+
420
+ def setup_scanner input
421
+ @line = 0
422
+ @line_pos = 0
423
+ @input = input.dup
424
+
425
+ if @have_encoding and not @have_byteslice then
426
+ @input_encoding = @input.encoding
427
+ @binary_input = @input.force_encoding Encoding::BINARY
428
+ end
429
+
430
+ @s = StringScanner.new input
431
+ end
432
+
386
433
  ##
387
434
  # Skips the next token if its type is +token_type+.
388
435
  #
@@ -400,58 +447,55 @@ class RDoc::Markup::Parser
400
447
  # Turns text +input+ into a stream of tokens
401
448
 
402
449
  def tokenize input
403
- s = StringScanner.new input
450
+ setup_scanner input
404
451
 
405
- @line = 0
406
- @line_pos = 0
407
-
408
- until s.eos? do
409
- pos = s.pos
452
+ until @s.eos? do
453
+ pos = @s.pos
410
454
 
411
455
  # leading spaces will be reflected by the column of the next token
412
456
  # the only thing we loose are trailing spaces at the end of the file
413
- next if s.scan(/ +/)
457
+ next if @s.scan(/ +/)
414
458
 
415
459
  # note: after BULLET, LABEL, etc.,
416
460
  # indent will be the column of the next non-newline token
417
461
 
418
462
  @tokens << case
419
463
  # [CR]LF => :NEWLINE
420
- when s.scan(/\r?\n/) then
421
- token = [:NEWLINE, s.matched, *token_pos(pos)]
422
- @line_pos = s.pos
464
+ when @s.scan(/\r?\n/) then
465
+ token = [:NEWLINE, @s.matched, *token_pos(pos)]
466
+ @line_pos = char_pos @s.pos
423
467
  @line += 1
424
468
  token
425
469
  # === text => :HEADER then :TEXT
426
- when s.scan(/(=+)(\s*)/) then
427
- level = s[1].length
470
+ when @s.scan(/(=+)(\s*)/) then
471
+ level = @s[1].length
428
472
  header = [:HEADER, level, *token_pos(pos)]
429
473
 
430
- if s[2] =~ /^\r?\n/ then
431
- s.pos -= s[2].length
474
+ if @s[2] =~ /^\r?\n/ then
475
+ @s.pos -= @s[2].length
432
476
  header
433
477
  else
434
- pos = s.pos
435
- s.scan(/.*/)
478
+ pos = @s.pos
479
+ @s.scan(/.*/)
436
480
  @tokens << header
437
- [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
481
+ [:TEXT, @s.matched.sub(/\r$/, ''), *token_pos(pos)]
438
482
  end
439
483
  # --- (at least 3) and nothing else on the line => :RULE
440
- when s.scan(/(-{3,}) *\r?$/) then
441
- [:RULE, s[1].length - 2, *token_pos(pos)]
484
+ when @s.scan(/(-{3,}) *\r?$/) then
485
+ [:RULE, @s[1].length - 2, *token_pos(pos)]
442
486
  # * or - followed by white space and text => :BULLET
443
- when s.scan(/([*-]) +(\S)/) then
444
- s.pos -= s[2].bytesize # unget \S
445
- [:BULLET, s[1], *token_pos(pos)]
487
+ when @s.scan(/([*-]) +(\S)/) then
488
+ @s.pos -= @s[2].bytesize # unget \S
489
+ [:BULLET, @s[1], *token_pos(pos)]
446
490
  # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
447
- when s.scan(/([a-z]|\d+)\. +(\S)/i) then
491
+ when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
448
492
  # FIXME if tab(s), the column will be wrong
449
493
  # either support tabs everywhere by first expanding them to
450
494
  # spaces, or assume that they will have been replaced
451
495
  # before (and provide a check for that at least in debug
452
496
  # mode)
453
- list_label = s[1]
454
- s.pos -= s[2].bytesize # unget \S
497
+ list_label = @s[1]
498
+ @s.pos -= @s[2].bytesize # unget \S
455
499
  list_type =
456
500
  case list_label
457
501
  when /[a-z]/ then :LALPHA
@@ -462,18 +506,18 @@ class RDoc::Markup::Parser
462
506
  end
463
507
  [list_type, list_label, *token_pos(pos)]
464
508
  # [text] followed by spaces or end of line => :LABEL
465
- when s.scan(/\[(.*?)\]( +|\r?$)/) then
466
- [:LABEL, s[1], *token_pos(pos)]
509
+ when @s.scan(/\[(.*?)\]( +|\r?$)/) then
510
+ [:LABEL, @s[1], *token_pos(pos)]
467
511
  # text:: followed by spaces or end of line => :NOTE
468
- when s.scan(/(.*?)::( +|\r?$)/) then
469
- [:NOTE, s[1], *token_pos(pos)]
512
+ when @s.scan(/(.*?)::( +|\r?$)/) then
513
+ [:NOTE, @s[1], *token_pos(pos)]
470
514
  # anything else: :TEXT
471
- else s.scan(/(.*?)( )?\r?$/)
472
- token = [:TEXT, s[1], *token_pos(pos)]
515
+ else @s.scan(/(.*?)( )?\r?$/)
516
+ token = [:TEXT, @s[1], *token_pos(pos)]
473
517
 
474
- if s[2] then
518
+ if @s[2] then
475
519
  @tokens << token
476
- [:BREAK, s[2], *token_pos(pos + s[1].length)]
520
+ [:BREAK, @s[2], *token_pos(pos + @s[1].length)]
477
521
  else
478
522
  token
479
523
  end
@@ -484,9 +528,12 @@ class RDoc::Markup::Parser
484
528
  end
485
529
 
486
530
  ##
487
- # Calculates the column and line of the current token based on +offset+.
531
+ # Calculates the column (by character) and line of the current token from
532
+ # +scanner+ based on +byte_offset+.
533
+
534
+ def token_pos byte_offset
535
+ offset = char_pos byte_offset
488
536
 
489
- def token_pos offset
490
537
  [offset - @line_pos, @line]
491
538
  end
492
539