rougegal 2.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (311) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +26 -0
  3. data/LICENSE +186 -0
  4. data/bin/rougify +17 -0
  5. data/lib/rouge/cli.rb +394 -0
  6. data/lib/rouge/demos/abap +6 -0
  7. data/lib/rouge/demos/actionscript +4 -0
  8. data/lib/rouge/demos/apache +21 -0
  9. data/lib/rouge/demos/apiblueprint +33 -0
  10. data/lib/rouge/demos/applescript +2 -0
  11. data/lib/rouge/demos/awk +4 -0
  12. data/lib/rouge/demos/biml +38 -0
  13. data/lib/rouge/demos/bsl +7 -0
  14. data/lib/rouge/demos/c +8 -0
  15. data/lib/rouge/demos/ceylon +7 -0
  16. data/lib/rouge/demos/cfscript +18 -0
  17. data/lib/rouge/demos/clojure +5 -0
  18. data/lib/rouge/demos/cmake +7 -0
  19. data/lib/rouge/demos/coffeescript +5 -0
  20. data/lib/rouge/demos/common_lisp +1 -0
  21. data/lib/rouge/demos/conf +4 -0
  22. data/lib/rouge/demos/console +6 -0
  23. data/lib/rouge/demos/coq +13 -0
  24. data/lib/rouge/demos/cpp +8 -0
  25. data/lib/rouge/demos/csharp +5 -0
  26. data/lib/rouge/demos/css +4 -0
  27. data/lib/rouge/demos/d +16 -0
  28. data/lib/rouge/demos/dart +6 -0
  29. data/lib/rouge/demos/diff +7 -0
  30. data/lib/rouge/demos/digdag +19 -0
  31. data/lib/rouge/demos/docker +9 -0
  32. data/lib/rouge/demos/dot +5 -0
  33. data/lib/rouge/demos/eiffel +30 -0
  34. data/lib/rouge/demos/elixir +1 -0
  35. data/lib/rouge/demos/erb +1 -0
  36. data/lib/rouge/demos/erlang +7 -0
  37. data/lib/rouge/demos/factor +5 -0
  38. data/lib/rouge/demos/fortran +22 -0
  39. data/lib/rouge/demos/fsharp +12 -0
  40. data/lib/rouge/demos/gal +22 -0
  41. data/lib/rouge/demos/gherkin +17 -0
  42. data/lib/rouge/demos/glsl +14 -0
  43. data/lib/rouge/demos/go +7 -0
  44. data/lib/rouge/demos/gradle +10 -0
  45. data/lib/rouge/demos/graphql +17 -0
  46. data/lib/rouge/demos/groovy +9 -0
  47. data/lib/rouge/demos/haml +5 -0
  48. data/lib/rouge/demos/handlebars +7 -0
  49. data/lib/rouge/demos/haskell +6 -0
  50. data/lib/rouge/demos/html +8 -0
  51. data/lib/rouge/demos/http +14 -0
  52. data/lib/rouge/demos/hylang +10 -0
  53. data/lib/rouge/demos/idlang +8 -0
  54. data/lib/rouge/demos/igorpro +9 -0
  55. data/lib/rouge/demos/ini +4 -0
  56. data/lib/rouge/demos/io +11 -0
  57. data/lib/rouge/demos/irb +4 -0
  58. data/lib/rouge/demos/irb_output +2 -0
  59. data/lib/rouge/demos/java +5 -0
  60. data/lib/rouge/demos/javascript +1 -0
  61. data/lib/rouge/demos/jinja +9 -0
  62. data/lib/rouge/demos/json +1 -0
  63. data/lib/rouge/demos/json-doc +1 -0
  64. data/lib/rouge/demos/jsonnet +28 -0
  65. data/lib/rouge/demos/jsx +17 -0
  66. data/lib/rouge/demos/julia +11 -0
  67. data/lib/rouge/demos/kotlin +3 -0
  68. data/lib/rouge/demos/lasso +12 -0
  69. data/lib/rouge/demos/liquid +11 -0
  70. data/lib/rouge/demos/literate_coffeescript +3 -0
  71. data/lib/rouge/demos/literate_haskell +7 -0
  72. data/lib/rouge/demos/llvm +20 -0
  73. data/lib/rouge/demos/lua +12 -0
  74. data/lib/rouge/demos/make +6 -0
  75. data/lib/rouge/demos/markdown +4 -0
  76. data/lib/rouge/demos/matlab +6 -0
  77. data/lib/rouge/demos/moonscript +16 -0
  78. data/lib/rouge/demos/mosel +10 -0
  79. data/lib/rouge/demos/mxml +22 -0
  80. data/lib/rouge/demos/nasm +26 -0
  81. data/lib/rouge/demos/nginx +5 -0
  82. data/lib/rouge/demos/nim +27 -0
  83. data/lib/rouge/demos/objective_c +18 -0
  84. data/lib/rouge/demos/ocaml +12 -0
  85. data/lib/rouge/demos/pascal +14 -0
  86. data/lib/rouge/demos/perl +5 -0
  87. data/lib/rouge/demos/php +3 -0
  88. data/lib/rouge/demos/plaintext +1 -0
  89. data/lib/rouge/demos/plist +142 -0
  90. data/lib/rouge/demos/pony +17 -0
  91. data/lib/rouge/demos/powershell +49 -0
  92. data/lib/rouge/demos/praat +26 -0
  93. data/lib/rouge/demos/prolog +9 -0
  94. data/lib/rouge/demos/prometheus +9 -0
  95. data/lib/rouge/demos/properties +7 -0
  96. data/lib/rouge/demos/protobuf +5 -0
  97. data/lib/rouge/demos/puppet +6 -0
  98. data/lib/rouge/demos/python +6 -0
  99. data/lib/rouge/demos/q +2 -0
  100. data/lib/rouge/demos/qml +9 -0
  101. data/lib/rouge/demos/r +8 -0
  102. data/lib/rouge/demos/racket +24 -0
  103. data/lib/rouge/demos/ruby +9 -0
  104. data/lib/rouge/demos/rust +12 -0
  105. data/lib/rouge/demos/sass +3 -0
  106. data/lib/rouge/demos/scala +3 -0
  107. data/lib/rouge/demos/scheme +4 -0
  108. data/lib/rouge/demos/scss +5 -0
  109. data/lib/rouge/demos/sed +4 -0
  110. data/lib/rouge/demos/shell +2 -0
  111. data/lib/rouge/demos/sieve +10 -0
  112. data/lib/rouge/demos/slim +17 -0
  113. data/lib/rouge/demos/smalltalk +6 -0
  114. data/lib/rouge/demos/smarty +12 -0
  115. data/lib/rouge/demos/sml +4 -0
  116. data/lib/rouge/demos/sql +1 -0
  117. data/lib/rouge/demos/swift +5 -0
  118. data/lib/rouge/demos/tap +5 -0
  119. data/lib/rouge/demos/tcl +1 -0
  120. data/lib/rouge/demos/tex +1 -0
  121. data/lib/rouge/demos/toml +9 -0
  122. data/lib/rouge/demos/tsx +17 -0
  123. data/lib/rouge/demos/tulip +13 -0
  124. data/lib/rouge/demos/turtle +26 -0
  125. data/lib/rouge/demos/twig +9 -0
  126. data/lib/rouge/demos/typescript +1 -0
  127. data/lib/rouge/demos/vala +8 -0
  128. data/lib/rouge/demos/vb +4 -0
  129. data/lib/rouge/demos/verilog +27 -0
  130. data/lib/rouge/demos/vhdl +23 -0
  131. data/lib/rouge/demos/viml +14 -0
  132. data/lib/rouge/demos/vue +11 -0
  133. data/lib/rouge/demos/wollok +11 -0
  134. data/lib/rouge/demos/xml +2 -0
  135. data/lib/rouge/demos/yaml +4 -0
  136. data/lib/rouge/formatter.rb +75 -0
  137. data/lib/rouge/formatters/html.rb +37 -0
  138. data/lib/rouge/formatters/html_inline.rb +30 -0
  139. data/lib/rouge/formatters/html_legacy.rb +44 -0
  140. data/lib/rouge/formatters/html_linewise.rb +27 -0
  141. data/lib/rouge/formatters/html_pygments.rb +16 -0
  142. data/lib/rouge/formatters/html_table.rb +61 -0
  143. data/lib/rouge/formatters/null.rb +19 -0
  144. data/lib/rouge/formatters/terminal256.rb +172 -0
  145. data/lib/rouge/guesser.rb +55 -0
  146. data/lib/rouge/guessers/filename.rb +25 -0
  147. data/lib/rouge/guessers/glob_mapping.rb +46 -0
  148. data/lib/rouge/guessers/mimetype.rb +14 -0
  149. data/lib/rouge/guessers/modeline.rb +42 -0
  150. data/lib/rouge/guessers/source.rb +39 -0
  151. data/lib/rouge/lexer.rb +452 -0
  152. data/lib/rouge/lexers/abap.rb +238 -0
  153. data/lib/rouge/lexers/actionscript.rb +195 -0
  154. data/lib/rouge/lexers/apache.rb +71 -0
  155. data/lib/rouge/lexers/apache/keywords.yml +764 -0
  156. data/lib/rouge/lexers/apiblueprint.rb +51 -0
  157. data/lib/rouge/lexers/apple_script.rb +367 -0
  158. data/lib/rouge/lexers/awk.rb +161 -0
  159. data/lib/rouge/lexers/biml.rb +41 -0
  160. data/lib/rouge/lexers/bsl.rb +81 -0
  161. data/lib/rouge/lexers/c.rb +217 -0
  162. data/lib/rouge/lexers/ceylon.rb +123 -0
  163. data/lib/rouge/lexers/cfscript.rb +153 -0
  164. data/lib/rouge/lexers/clojure.rb +112 -0
  165. data/lib/rouge/lexers/cmake.rb +206 -0
  166. data/lib/rouge/lexers/coffeescript.rb +174 -0
  167. data/lib/rouge/lexers/common_lisp.rb +345 -0
  168. data/lib/rouge/lexers/conf.rb +24 -0
  169. data/lib/rouge/lexers/console.rb +136 -0
  170. data/lib/rouge/lexers/coq.rb +191 -0
  171. data/lib/rouge/lexers/cpp.rb +78 -0
  172. data/lib/rouge/lexers/csharp.rb +114 -0
  173. data/lib/rouge/lexers/css.rb +273 -0
  174. data/lib/rouge/lexers/d.rb +176 -0
  175. data/lib/rouge/lexers/dart.rb +104 -0
  176. data/lib/rouge/lexers/diff.rb +31 -0
  177. data/lib/rouge/lexers/digdag.rb +72 -0
  178. data/lib/rouge/lexers/docker.rb +50 -0
  179. data/lib/rouge/lexers/dot.rb +68 -0
  180. data/lib/rouge/lexers/eiffel.rb +65 -0
  181. data/lib/rouge/lexers/elixir.rb +133 -0
  182. data/lib/rouge/lexers/erb.rb +56 -0
  183. data/lib/rouge/lexers/erlang.rb +118 -0
  184. data/lib/rouge/lexers/factor.rb +302 -0
  185. data/lib/rouge/lexers/fortran.rb +170 -0
  186. data/lib/rouge/lexers/fsharp.rb +118 -0
  187. data/lib/rouge/lexers/gal.rb +50 -0
  188. data/lib/rouge/lexers/gherkin.rb +137 -0
  189. data/lib/rouge/lexers/gherkin/keywords.rb +14 -0
  190. data/lib/rouge/lexers/glsl.rb +135 -0
  191. data/lib/rouge/lexers/go.rb +178 -0
  192. data/lib/rouge/lexers/gradle.rb +37 -0
  193. data/lib/rouge/lexers/graphql.rb +243 -0
  194. data/lib/rouge/lexers/groovy.rb +112 -0
  195. data/lib/rouge/lexers/haml.rb +233 -0
  196. data/lib/rouge/lexers/handlebars.rb +79 -0
  197. data/lib/rouge/lexers/haskell.rb +183 -0
  198. data/lib/rouge/lexers/html.rb +138 -0
  199. data/lib/rouge/lexers/http.rb +80 -0
  200. data/lib/rouge/lexers/hylang.rb +93 -0
  201. data/lib/rouge/lexers/idlang.rb +316 -0
  202. data/lib/rouge/lexers/igorpro.rb +407 -0
  203. data/lib/rouge/lexers/ini.rb +57 -0
  204. data/lib/rouge/lexers/io.rb +68 -0
  205. data/lib/rouge/lexers/irb.rb +66 -0
  206. data/lib/rouge/lexers/java.rb +87 -0
  207. data/lib/rouge/lexers/javascript.rb +269 -0
  208. data/lib/rouge/lexers/jinja.rb +137 -0
  209. data/lib/rouge/lexers/json.rb +29 -0
  210. data/lib/rouge/lexers/json_doc.rb +23 -0
  211. data/lib/rouge/lexers/jsonnet.rb +151 -0
  212. data/lib/rouge/lexers/jsx.rb +102 -0
  213. data/lib/rouge/lexers/julia.rb +172 -0
  214. data/lib/rouge/lexers/kotlin.rb +84 -0
  215. data/lib/rouge/lexers/lasso.rb +217 -0
  216. data/lib/rouge/lexers/lasso/keywords.yml +446 -0
  217. data/lib/rouge/lexers/liquid.rb +287 -0
  218. data/lib/rouge/lexers/literate_coffeescript.rb +33 -0
  219. data/lib/rouge/lexers/literate_haskell.rb +36 -0
  220. data/lib/rouge/lexers/llvm.rb +84 -0
  221. data/lib/rouge/lexers/lua.rb +125 -0
  222. data/lib/rouge/lexers/lua/builtins.rb +22 -0
  223. data/lib/rouge/lexers/make.rb +116 -0
  224. data/lib/rouge/lexers/markdown.rb +154 -0
  225. data/lib/rouge/lexers/matlab.rb +75 -0
  226. data/lib/rouge/lexers/matlab/builtins.rb +11 -0
  227. data/lib/rouge/lexers/moonscript.rb +114 -0
  228. data/lib/rouge/lexers/mosel.rb +231 -0
  229. data/lib/rouge/lexers/mxml.rb +68 -0
  230. data/lib/rouge/lexers/nasm.rb +203 -0
  231. data/lib/rouge/lexers/nginx.rb +71 -0
  232. data/lib/rouge/lexers/nim.rb +152 -0
  233. data/lib/rouge/lexers/objective_c.rb +208 -0
  234. data/lib/rouge/lexers/ocaml.rb +100 -0
  235. data/lib/rouge/lexers/pascal.rb +66 -0
  236. data/lib/rouge/lexers/perl.rb +197 -0
  237. data/lib/rouge/lexers/php.rb +193 -0
  238. data/lib/rouge/lexers/php/builtins.rb +194 -0
  239. data/lib/rouge/lexers/plain_text.rb +26 -0
  240. data/lib/rouge/lexers/plist.rb +49 -0
  241. data/lib/rouge/lexers/pony.rb +93 -0
  242. data/lib/rouge/lexers/powershell.rb +132 -0
  243. data/lib/rouge/lexers/praat.rb +350 -0
  244. data/lib/rouge/lexers/prolog.rb +64 -0
  245. data/lib/rouge/lexers/prometheus.rb +121 -0
  246. data/lib/rouge/lexers/properties.rb +55 -0
  247. data/lib/rouge/lexers/protobuf.rb +70 -0
  248. data/lib/rouge/lexers/puppet.rb +128 -0
  249. data/lib/rouge/lexers/python.rb +231 -0
  250. data/lib/rouge/lexers/q.rb +124 -0
  251. data/lib/rouge/lexers/qml.rb +73 -0
  252. data/lib/rouge/lexers/r.rb +89 -0
  253. data/lib/rouge/lexers/racket.rb +542 -0
  254. data/lib/rouge/lexers/ruby.rb +437 -0
  255. data/lib/rouge/lexers/rust.rb +192 -0
  256. data/lib/rouge/lexers/sass.rb +74 -0
  257. data/lib/rouge/lexers/sass/common.rb +180 -0
  258. data/lib/rouge/lexers/scala.rb +142 -0
  259. data/lib/rouge/lexers/scheme.rb +112 -0
  260. data/lib/rouge/lexers/scss.rb +34 -0
  261. data/lib/rouge/lexers/sed.rb +172 -0
  262. data/lib/rouge/lexers/shell.rb +180 -0
  263. data/lib/rouge/lexers/sieve.rb +96 -0
  264. data/lib/rouge/lexers/slim.rb +228 -0
  265. data/lib/rouge/lexers/smalltalk.rb +116 -0
  266. data/lib/rouge/lexers/smarty.rb +91 -0
  267. data/lib/rouge/lexers/sml.rb +348 -0
  268. data/lib/rouge/lexers/sql.rb +140 -0
  269. data/lib/rouge/lexers/swift.rb +161 -0
  270. data/lib/rouge/lexers/tap.rb +91 -0
  271. data/lib/rouge/lexers/tcl.rb +192 -0
  272. data/lib/rouge/lexers/tex.rb +69 -0
  273. data/lib/rouge/lexers/toml.rb +71 -0
  274. data/lib/rouge/lexers/tsx.rb +19 -0
  275. data/lib/rouge/lexers/tulip.rb +107 -0
  276. data/lib/rouge/lexers/turtle.rb +72 -0
  277. data/lib/rouge/lexers/twig.rb +39 -0
  278. data/lib/rouge/lexers/typescript.rb +22 -0
  279. data/lib/rouge/lexers/typescript/common.rb +33 -0
  280. data/lib/rouge/lexers/vala.rb +77 -0
  281. data/lib/rouge/lexers/vb.rb +164 -0
  282. data/lib/rouge/lexers/verilog.rb +164 -0
  283. data/lib/rouge/lexers/vhdl.rb +97 -0
  284. data/lib/rouge/lexers/viml.rb +101 -0
  285. data/lib/rouge/lexers/viml/keywords.rb +12 -0
  286. data/lib/rouge/lexers/vue.rb +124 -0
  287. data/lib/rouge/lexers/wollok.rb +107 -0
  288. data/lib/rouge/lexers/xml.rb +59 -0
  289. data/lib/rouge/lexers/yaml.rb +373 -0
  290. data/lib/rouge/plugins/redcarpet.rb +30 -0
  291. data/lib/rouge/regex_lexer.rb +441 -0
  292. data/lib/rouge/template_lexer.rb +20 -0
  293. data/lib/rouge/text_analyzer.rb +48 -0
  294. data/lib/rouge/theme.rb +213 -0
  295. data/lib/rouge/themes/base16.rb +130 -0
  296. data/lib/rouge/themes/colorful.rb +67 -0
  297. data/lib/rouge/themes/github.rb +71 -0
  298. data/lib/rouge/themes/gruvbox.rb +167 -0
  299. data/lib/rouge/themes/igor_pro.rb +20 -0
  300. data/lib/rouge/themes/molokai.rb +82 -0
  301. data/lib/rouge/themes/monokai.rb +92 -0
  302. data/lib/rouge/themes/monokai_sublime.rb +90 -0
  303. data/lib/rouge/themes/pastie.rb +69 -0
  304. data/lib/rouge/themes/thankful_eyes.rb +74 -0
  305. data/lib/rouge/themes/tulip.rb +69 -0
  306. data/lib/rouge/token.rb +182 -0
  307. data/lib/rouge/util.rb +101 -0
  308. data/lib/rouge/version.rb +7 -0
  309. data/lib/rougegal.rb +80 -0
  310. data/rougegal.gemspec +18 -0
  311. metadata +356 -0
@@ -0,0 +1,30 @@
1
+ # -*- coding: utf-8 -*- #
2
+
3
+ # this file is not require'd from the root. To use this plugin, run:
4
+ #
5
+ # require 'rouge/plugins/redcarpet'
6
+
7
+ module Rouge
8
+ module Plugins
9
+ module Redcarpet
10
+ def block_code(code, language)
11
+ lexer = Lexer.find_fancy(language, code) || Lexers::PlainText
12
+
13
+ # XXX HACK: Redcarpet strips hard tabs out of code blocks,
14
+ # so we assume you're not using leading spaces that aren't tabs,
15
+ # and just replace them here.
16
+ if lexer.tag == 'make'
17
+ code.gsub! /^ /, "\t"
18
+ end
19
+
20
+ formatter = rouge_formatter(lexer)
21
+ formatter.format(lexer.lex(code))
22
+ end
23
+
24
+ # override this method for custom formatting behavior
25
+ def rouge_formatter(lexer)
26
+ Formatters::HTMLLegacy.new(:css_class => "highlight #{lexer.tag}")
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,441 @@
1
+ # -*- coding: utf-8 -*- #
2
+
3
+ module Rouge
4
+ # @abstract
5
+ # A stateful lexer that uses sets of regular expressions to
6
+ # tokenize a string. Most lexers are instances of RegexLexer.
7
+ class RegexLexer < Lexer
8
+ # A rule is a tuple of a regular expression to test, and a callback
9
+ # to perform if the test succeeds.
10
+ #
11
+ # @see StateDSL#rule
12
+ class Rule
13
+ attr_reader :callback
14
+ attr_reader :re
15
+ attr_reader :beginning_of_line
16
+ def initialize(re, callback)
17
+ @re = re
18
+ @callback = callback
19
+ @beginning_of_line = re.source[0] == ?^
20
+ end
21
+
22
+ def inspect
23
+ "#<Rule #{@re.inspect}>"
24
+ end
25
+ end
26
+
27
+ # a State is a named set of rules that can be tested for or
28
+ # mixed in.
29
+ #
30
+ # @see RegexLexer.state
31
+ class State
32
+ attr_reader :name, :rules
33
+ def initialize(name, rules)
34
+ @name = name
35
+ @rules = rules
36
+ end
37
+
38
+ def inspect
39
+ "#<#{self.class.name} #{@name.inspect}>"
40
+ end
41
+ end
42
+
43
+ class StateDSL
44
+ attr_reader :rules
45
+ def initialize(name, &defn)
46
+ @name = name
47
+ @defn = defn
48
+ @rules = []
49
+ @loaded = false
50
+ end
51
+
52
+ def to_state(lexer_class)
53
+ load!
54
+ rules = @rules.map do |rule|
55
+ rule.is_a?(String) ? lexer_class.get_state(rule) : rule
56
+ end
57
+ State.new(@name, rules)
58
+ end
59
+
60
+ def prepended(&defn)
61
+ parent_defn = @defn
62
+ StateDSL.new(@name) do
63
+ instance_eval(&defn)
64
+ instance_eval(&parent_defn)
65
+ end
66
+ end
67
+
68
+ def appended(&defn)
69
+ parent_defn = @defn
70
+ StateDSL.new(@name) do
71
+ instance_eval(&parent_defn)
72
+ instance_eval(&defn)
73
+ end
74
+ end
75
+
76
+ protected
77
+ # Define a new rule for this state.
78
+ #
79
+ # @overload rule(re, token, next_state=nil)
80
+ # @overload rule(re, &callback)
81
+ #
82
+ # @param [Regexp] re
83
+ # a regular expression for this rule to test.
84
+ # @param [String] tok
85
+ # the token type to yield if `re` matches.
86
+ # @param [#to_s] next_state
87
+ # (optional) a state to push onto the stack if `re` matches.
88
+ # If `next_state` is `:pop!`, the state stack will be popped
89
+ # instead.
90
+ # @param [Proc] callback
91
+ # a block that will be evaluated in the context of the lexer
92
+ # if `re` matches. This block has access to a number of lexer
93
+ # methods, including {RegexLexer#push}, {RegexLexer#pop!},
94
+ # {RegexLexer#token}, and {RegexLexer#delegate}. The first
95
+ # argument can be used to access the match groups.
96
+ def rule(re, tok=nil, next_state=nil, &callback)
97
+ if tok.nil? && callback.nil?
98
+ raise "please pass `rule` a token to yield or a callback"
99
+ end
100
+
101
+ callback ||= case next_state
102
+ when :pop!
103
+ proc do |stream|
104
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
105
+ @output_stream.call(tok, stream[0])
106
+ puts " popping stack: 1" if @debug
107
+ @stack.pop or raise 'empty stack!'
108
+ end
109
+ when :push
110
+ proc do |stream|
111
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
112
+ @output_stream.call(tok, stream[0])
113
+ puts " pushing :#{@stack.last.name}" if @debug
114
+ @stack.push(@stack.last)
115
+ end
116
+ when Symbol
117
+ proc do |stream|
118
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
119
+ @output_stream.call(tok, stream[0])
120
+ state = @states[next_state] || self.class.get_state(next_state)
121
+ puts " pushing :#{state.name}" if @debug
122
+ @stack.push(state)
123
+ end
124
+ when nil
125
+ proc do |stream|
126
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
127
+ @output_stream.call(tok, stream[0])
128
+ end
129
+ else
130
+ raise "invalid next state: #{next_state.inspect}"
131
+ end
132
+
133
+ rules << Rule.new(re, callback)
134
+ end
135
+
136
+ # Mix in the rules from another state into this state. The rules
137
+ # from the mixed-in state will be tried in order before moving on
138
+ # to the rest of the rules in this state.
139
+ def mixin(state)
140
+ rules << state.to_s
141
+ end
142
+
143
+ private
144
+ def load!
145
+ return if @loaded
146
+ @loaded = true
147
+ instance_eval(&@defn)
148
+ end
149
+ end
150
+
151
+ # The states hash for this lexer.
152
+ # @see state
153
+ def self.states
154
+ @states ||= {}
155
+ end
156
+
157
+ def self.state_definitions
158
+ @state_definitions ||= InheritableHash.new(superclass.state_definitions)
159
+ end
160
+ @state_definitions = {}
161
+
162
+ def self.replace_state(name, new_defn)
163
+ states[name] = nil
164
+ state_definitions[name] = new_defn
165
+ end
166
+
167
+ # The routines to run at the beginning of a fresh lex.
168
+ # @see start
169
+ def self.start_procs
170
+ @start_procs ||= InheritableList.new(superclass.start_procs)
171
+ end
172
+ @start_procs = []
173
+
174
+ # Specify an action to be run every fresh lex.
175
+ #
176
+ # @example
177
+ # start { puts "I'm lexing a new string!" }
178
+ def self.start(&b)
179
+ start_procs << b
180
+ end
181
+
182
+ # Define a new state for this lexer with the given name.
183
+ # The block will be evaluated in the context of a {StateDSL}.
184
+ def self.state(name, &b)
185
+ name = name.to_s
186
+ state_definitions[name] = StateDSL.new(name, &b)
187
+ end
188
+
189
+ def self.prepend(name, &b)
190
+ name = name.to_s
191
+ dsl = state_definitions[name] or raise "no such state #{name.inspect}"
192
+ replace_state(name, dsl.prepended(&b))
193
+ end
194
+
195
+ def self.append(name, &b)
196
+ name = name.to_s
197
+ dsl = state_definitions[name] or raise "no such state #{name.inspect}"
198
+ replace_state(name, dsl.appended(&b))
199
+ end
200
+
201
+ # @private
202
+ def self.get_state(name)
203
+ return name if name.is_a? State
204
+
205
+ states[name.to_sym] ||= begin
206
+ defn = state_definitions[name.to_s] or raise "unknown state: #{name.inspect}"
207
+ defn.to_state(self)
208
+ end
209
+ end
210
+
211
+ # @private
212
+ def get_state(state_name)
213
+ self.class.get_state(state_name)
214
+ end
215
+
216
+ # The state stack. This is initially the single state `[:root]`.
217
+ # It is an error for this stack to be empty.
218
+ # @see #state
219
+ def stack
220
+ @stack ||= [get_state(:root)]
221
+ end
222
+
223
+ # The current state - i.e. one on top of the state stack.
224
+ #
225
+ # NB: if the state stack is empty, this will throw an error rather
226
+ # than returning nil.
227
+ def state
228
+ stack.last or raise 'empty stack!'
229
+ end
230
+
231
+ # reset this lexer to its initial state. This runs all of the
232
+ # start_procs.
233
+ def reset!
234
+ @stack = nil
235
+ @current_stream = nil
236
+
237
+ puts "start blocks" if @debug && self.class.start_procs.any?
238
+ self.class.start_procs.each do |pr|
239
+ instance_eval(&pr)
240
+ end
241
+ end
242
+
243
+ # This implements the lexer protocol, by yielding [token, value] pairs.
244
+ #
245
+ # The process for lexing works as follows, until the stream is empty:
246
+ #
247
+ # 1. We look at the state on top of the stack (which by default is
248
+ # `[:root]`).
249
+ # 2. Each rule in that state is tried until one is successful. If one
250
+ # is found, that rule's callback is evaluated - which may yield
251
+ # tokens and manipulate the state stack. Otherwise, one character
252
+ # is consumed with an `'Error'` token, and we continue at (1.)
253
+ #
254
+ # @see #step #step (where (2.) is implemented)
255
+ def stream_tokens(str, &b)
256
+ stream = StringScanner.new(str)
257
+
258
+ @current_stream = stream
259
+ @output_stream = b
260
+ @states = self.class.states
261
+ @null_steps = 0
262
+
263
+ until stream.eos?
264
+ if @debug
265
+ puts "lexer: #{self.class.tag}"
266
+ puts "stack: #{stack.map(&:name).map(&:to_sym).inspect}"
267
+ puts "stream: #{stream.peek(20).inspect}"
268
+ end
269
+
270
+ success = step(state, stream)
271
+
272
+ if !success
273
+ puts " no match, yielding Error" if @debug
274
+ b.call(Token::Tokens::Error, stream.getch)
275
+ end
276
+ end
277
+ end
278
+
279
+ # The number of successive scans permitted without consuming
280
+ # the input stream. If this is exceeded, the match fails.
281
+ MAX_NULL_SCANS = 5
282
+
283
+ # Runs one step of the lex. Rules in the current state are tried
284
+ # until one matches, at which point its callback is called.
285
+ #
286
+ # @return true if a rule was tried successfully
287
+ # @return false otherwise.
288
+ def step(state, stream)
289
+ state.rules.each do |rule|
290
+ if rule.is_a?(State)
291
+ puts " entering mixin #{rule.name}" if @debug
292
+ return true if step(rule, stream)
293
+ puts " exiting mixin #{rule.name}" if @debug
294
+ else
295
+ puts " trying #{rule.inspect}" if @debug
296
+
297
+ # XXX HACK XXX
298
+ # StringScanner's implementation of ^ is b0rken.
299
+ # see http://bugs.ruby-lang.org/issues/7092
300
+ # TODO: this doesn't cover cases like /(a|^b)/, but it's
301
+ # the most common, for now...
302
+ next if rule.beginning_of_line && !stream.beginning_of_line?
303
+
304
+ if (size = stream.skip(rule.re))
305
+ puts " got #{stream[0].inspect}" if @debug
306
+
307
+ instance_exec(stream, &rule.callback)
308
+
309
+ if size.zero?
310
+ @null_steps += 1
311
+ if @null_steps > MAX_NULL_SCANS
312
+ puts " too many scans without consuming the string!" if @debug
313
+ return false
314
+ end
315
+ else
316
+ @null_steps = 0
317
+ end
318
+
319
+ return true
320
+ end
321
+ end
322
+ end
323
+
324
+ false
325
+ end
326
+
327
+ # Yield a token.
328
+ #
329
+ # @param tok
330
+ # the token type
331
+ # @param val
332
+ # (optional) the string value to yield. If absent, this defaults
333
+ # to the entire last match.
334
+ def token(tok, val=@current_stream[0])
335
+ yield_token(tok, val)
336
+ end
337
+
338
+ # @deprecated
339
+ #
340
+ # Yield a token with the next matched group. Subsequent calls
341
+ # to this method will yield subsequent groups.
342
+ def group(tok)
343
+ raise "RegexLexer#group is deprecated: use #groups instead"
344
+ end
345
+
346
+ # Yield tokens corresponding to the matched groups of the current
347
+ # match.
348
+ def groups(*tokens)
349
+ tokens.each_with_index do |tok, i|
350
+ yield_token(tok, @current_stream[i+1])
351
+ end
352
+ end
353
+
354
+ # Delegate the lex to another lexer. The #lex method will be called
355
+ # with `:continue` set to true, so that #reset! will not be called.
356
+ # In this way, a single lexer can be repeatedly delegated to while
357
+ # maintaining its own internal state stack.
358
+ #
359
+ # @param [#lex] lexer
360
+ # The lexer or lexer class to delegate to
361
+ # @param [String] text
362
+ # The text to delegate. This defaults to the last matched string.
363
+ def delegate(lexer, text=nil)
364
+ puts " delegating to #{lexer.inspect}" if @debug
365
+ text ||= @current_stream[0]
366
+
367
+ lexer.lex(text, :continue => true) do |tok, val|
368
+ puts " delegated token: #{tok.inspect}, #{val.inspect}" if @debug
369
+ yield_token(tok, val)
370
+ end
371
+ end
372
+
373
+ def recurse(text=nil)
374
+ delegate(self.class, text)
375
+ end
376
+
377
+ # Push a state onto the stack. If no state name is given and you've
378
+ # passed a block, a state will be dynamically created using the
379
+ # {StateDSL}.
380
+ def push(state_name=nil, &b)
381
+ push_state = if state_name
382
+ get_state(state_name)
383
+ elsif block_given?
384
+ StateDSL.new(b.inspect, &b).to_state(self.class)
385
+ else
386
+ # use the top of the stack by default
387
+ self.state
388
+ end
389
+
390
+ puts " pushing :#{push_state.name}" if @debug
391
+ stack.push(push_state)
392
+ end
393
+
394
+ # Pop the state stack. If a number is passed in, it will be popped
395
+ # that number of times.
396
+ def pop!(times=1)
397
+ raise 'empty stack!' if stack.empty?
398
+
399
+ puts " popping stack: #{times}" if @debug
400
+
401
+ stack.pop(times)
402
+
403
+ nil
404
+ end
405
+
406
+ # replace the head of the stack with the given state
407
+ def goto(state_name)
408
+ raise 'empty stack!' if stack.empty?
409
+
410
+ puts " going to state :#{state_name} " if @debug
411
+ stack[-1] = get_state(state_name)
412
+ end
413
+
414
+ # reset the stack back to `[:root]`.
415
+ def reset_stack
416
+ puts ' resetting stack' if @debug
417
+ stack.clear
418
+ stack.push get_state(:root)
419
+ end
420
+
421
+ # Check if `state_name` is in the state stack.
422
+ def in_state?(state_name)
423
+ state_name = state_name.to_s
424
+ stack.any? do |state|
425
+ state.name == state_name.to_s
426
+ end
427
+ end
428
+
429
+ # Check if `state_name` is the state on top of the state stack.
430
+ def state?(state_name)
431
+ state_name.to_s == state.name
432
+ end
433
+
434
+ private
435
+ def yield_token(tok, val)
436
+ return if val.nil? || val.empty?
437
+ puts " yielding #{tok.qualname}, #{val.inspect}" if @debug
438
+ @output_stream.yield(tok, val)
439
+ end
440
+ end
441
+ end