rouge_ecl 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +26 -0
  3. data/LICENSE +186 -0
  4. data/bin/rougify +17 -0
  5. data/lib/rouge.rb +82 -0
  6. data/lib/rouge/cli.rb +429 -0
  7. data/lib/rouge/demos/abap +6 -0
  8. data/lib/rouge/demos/actionscript +4 -0
  9. data/lib/rouge/demos/apache +21 -0
  10. data/lib/rouge/demos/apiblueprint +33 -0
  11. data/lib/rouge/demos/applescript +2 -0
  12. data/lib/rouge/demos/awk +4 -0
  13. data/lib/rouge/demos/biml +38 -0
  14. data/lib/rouge/demos/bsl +7 -0
  15. data/lib/rouge/demos/c +8 -0
  16. data/lib/rouge/demos/ceylon +7 -0
  17. data/lib/rouge/demos/cfscript +18 -0
  18. data/lib/rouge/demos/clojure +5 -0
  19. data/lib/rouge/demos/cmake +7 -0
  20. data/lib/rouge/demos/coffeescript +5 -0
  21. data/lib/rouge/demos/common_lisp +1 -0
  22. data/lib/rouge/demos/conf +4 -0
  23. data/lib/rouge/demos/console +6 -0
  24. data/lib/rouge/demos/coq +13 -0
  25. data/lib/rouge/demos/cpp +8 -0
  26. data/lib/rouge/demos/csharp +5 -0
  27. data/lib/rouge/demos/css +4 -0
  28. data/lib/rouge/demos/d +16 -0
  29. data/lib/rouge/demos/dart +6 -0
  30. data/lib/rouge/demos/diff +7 -0
  31. data/lib/rouge/demos/digdag +19 -0
  32. data/lib/rouge/demos/docker +9 -0
  33. data/lib/rouge/demos/dot +5 -0
  34. data/lib/rouge/demos/ecl +1 -0
  35. data/lib/rouge/demos/eiffel +30 -0
  36. data/lib/rouge/demos/elixir +1 -0
  37. data/lib/rouge/demos/elm +4 -0
  38. data/lib/rouge/demos/erb +1 -0
  39. data/lib/rouge/demos/erlang +7 -0
  40. data/lib/rouge/demos/factor +5 -0
  41. data/lib/rouge/demos/fortran +22 -0
  42. data/lib/rouge/demos/fsharp +12 -0
  43. data/lib/rouge/demos/gherkin +17 -0
  44. data/lib/rouge/demos/glsl +14 -0
  45. data/lib/rouge/demos/go +7 -0
  46. data/lib/rouge/demos/gradle +10 -0
  47. data/lib/rouge/demos/graphql +17 -0
  48. data/lib/rouge/demos/groovy +9 -0
  49. data/lib/rouge/demos/hack +5 -0
  50. data/lib/rouge/demos/haml +5 -0
  51. data/lib/rouge/demos/handlebars +7 -0
  52. data/lib/rouge/demos/haskell +6 -0
  53. data/lib/rouge/demos/html +8 -0
  54. data/lib/rouge/demos/http +14 -0
  55. data/lib/rouge/demos/hylang +10 -0
  56. data/lib/rouge/demos/idlang +8 -0
  57. data/lib/rouge/demos/igorpro +9 -0
  58. data/lib/rouge/demos/ini +4 -0
  59. data/lib/rouge/demos/io +11 -0
  60. data/lib/rouge/demos/irb +4 -0
  61. data/lib/rouge/demos/irb_output +2 -0
  62. data/lib/rouge/demos/java +5 -0
  63. data/lib/rouge/demos/javascript +1 -0
  64. data/lib/rouge/demos/jinja +9 -0
  65. data/lib/rouge/demos/json +1 -0
  66. data/lib/rouge/demos/json-doc +1 -0
  67. data/lib/rouge/demos/jsonnet +28 -0
  68. data/lib/rouge/demos/jsx +17 -0
  69. data/lib/rouge/demos/julia +11 -0
  70. data/lib/rouge/demos/kotlin +3 -0
  71. data/lib/rouge/demos/lasso +12 -0
  72. data/lib/rouge/demos/liquid +11 -0
  73. data/lib/rouge/demos/literate_coffeescript +3 -0
  74. data/lib/rouge/demos/literate_haskell +7 -0
  75. data/lib/rouge/demos/llvm +20 -0
  76. data/lib/rouge/demos/lua +12 -0
  77. data/lib/rouge/demos/make +6 -0
  78. data/lib/rouge/demos/markdown +4 -0
  79. data/lib/rouge/demos/matlab +6 -0
  80. data/lib/rouge/demos/moonscript +16 -0
  81. data/lib/rouge/demos/mosel +10 -0
  82. data/lib/rouge/demos/mxml +22 -0
  83. data/lib/rouge/demos/nasm +26 -0
  84. data/lib/rouge/demos/nginx +5 -0
  85. data/lib/rouge/demos/nim +27 -0
  86. data/lib/rouge/demos/nix +19 -0
  87. data/lib/rouge/demos/objective_c +18 -0
  88. data/lib/rouge/demos/ocaml +12 -0
  89. data/lib/rouge/demos/pascal +14 -0
  90. data/lib/rouge/demos/perl +5 -0
  91. data/lib/rouge/demos/php +3 -0
  92. data/lib/rouge/demos/plaintext +1 -0
  93. data/lib/rouge/demos/plist +142 -0
  94. data/lib/rouge/demos/pony +17 -0
  95. data/lib/rouge/demos/powershell +49 -0
  96. data/lib/rouge/demos/praat +26 -0
  97. data/lib/rouge/demos/prolog +9 -0
  98. data/lib/rouge/demos/prometheus +9 -0
  99. data/lib/rouge/demos/properties +7 -0
  100. data/lib/rouge/demos/protobuf +5 -0
  101. data/lib/rouge/demos/puppet +6 -0
  102. data/lib/rouge/demos/python +6 -0
  103. data/lib/rouge/demos/q +2 -0
  104. data/lib/rouge/demos/qml +9 -0
  105. data/lib/rouge/demos/r +8 -0
  106. data/lib/rouge/demos/racket +24 -0
  107. data/lib/rouge/demos/ruby +9 -0
  108. data/lib/rouge/demos/rust +12 -0
  109. data/lib/rouge/demos/sass +3 -0
  110. data/lib/rouge/demos/scala +3 -0
  111. data/lib/rouge/demos/scheme +4 -0
  112. data/lib/rouge/demos/scss +5 -0
  113. data/lib/rouge/demos/sed +4 -0
  114. data/lib/rouge/demos/shell +2 -0
  115. data/lib/rouge/demos/sieve +10 -0
  116. data/lib/rouge/demos/slim +17 -0
  117. data/lib/rouge/demos/smalltalk +6 -0
  118. data/lib/rouge/demos/smarty +12 -0
  119. data/lib/rouge/demos/sml +4 -0
  120. data/lib/rouge/demos/sql +1 -0
  121. data/lib/rouge/demos/swift +5 -0
  122. data/lib/rouge/demos/tap +5 -0
  123. data/lib/rouge/demos/tcl +1 -0
  124. data/lib/rouge/demos/tex +1 -0
  125. data/lib/rouge/demos/toml +9 -0
  126. data/lib/rouge/demos/tsx +17 -0
  127. data/lib/rouge/demos/tulip +13 -0
  128. data/lib/rouge/demos/turtle +26 -0
  129. data/lib/rouge/demos/twig +9 -0
  130. data/lib/rouge/demos/typescript +1 -0
  131. data/lib/rouge/demos/vala +8 -0
  132. data/lib/rouge/demos/vb +4 -0
  133. data/lib/rouge/demos/verilog +27 -0
  134. data/lib/rouge/demos/vhdl +23 -0
  135. data/lib/rouge/demos/viml +14 -0
  136. data/lib/rouge/demos/vue +11 -0
  137. data/lib/rouge/demos/wollok +11 -0
  138. data/lib/rouge/demos/xml +2 -0
  139. data/lib/rouge/demos/yaml +4 -0
  140. data/lib/rouge/formatter.rb +75 -0
  141. data/lib/rouge/formatters/html.rb +37 -0
  142. data/lib/rouge/formatters/html_inline.rb +30 -0
  143. data/lib/rouge/formatters/html_legacy.rb +44 -0
  144. data/lib/rouge/formatters/html_linewise.rb +27 -0
  145. data/lib/rouge/formatters/html_pygments.rb +16 -0
  146. data/lib/rouge/formatters/html_table.rb +61 -0
  147. data/lib/rouge/formatters/null.rb +19 -0
  148. data/lib/rouge/formatters/terminal256.rb +180 -0
  149. data/lib/rouge/guesser.rb +55 -0
  150. data/lib/rouge/guessers/disambiguation.rb +101 -0
  151. data/lib/rouge/guessers/filename.rb +25 -0
  152. data/lib/rouge/guessers/glob_mapping.rb +43 -0
  153. data/lib/rouge/guessers/mimetype.rb +14 -0
  154. data/lib/rouge/guessers/modeline.rb +44 -0
  155. data/lib/rouge/guessers/source.rb +29 -0
  156. data/lib/rouge/guessers/util.rb +32 -0
  157. data/lib/rouge/lexer.rb +461 -0
  158. data/lib/rouge/lexers/abap.rb +238 -0
  159. data/lib/rouge/lexers/actionscript.rb +195 -0
  160. data/lib/rouge/lexers/apache.rb +71 -0
  161. data/lib/rouge/lexers/apache/keywords.yml +764 -0
  162. data/lib/rouge/lexers/apiblueprint.rb +47 -0
  163. data/lib/rouge/lexers/apple_script.rb +367 -0
  164. data/lib/rouge/lexers/awk.rb +161 -0
  165. data/lib/rouge/lexers/biml.rb +41 -0
  166. data/lib/rouge/lexers/bsl.rb +81 -0
  167. data/lib/rouge/lexers/c.rb +212 -0
  168. data/lib/rouge/lexers/ceylon.rb +123 -0
  169. data/lib/rouge/lexers/cfscript.rb +153 -0
  170. data/lib/rouge/lexers/clojure.rb +112 -0
  171. data/lib/rouge/lexers/cmake.rb +206 -0
  172. data/lib/rouge/lexers/coffeescript.rb +174 -0
  173. data/lib/rouge/lexers/common_lisp.rb +345 -0
  174. data/lib/rouge/lexers/conf.rb +24 -0
  175. data/lib/rouge/lexers/console.rb +136 -0
  176. data/lib/rouge/lexers/coq.rb +187 -0
  177. data/lib/rouge/lexers/cpp.rb +78 -0
  178. data/lib/rouge/lexers/csharp.rb +114 -0
  179. data/lib/rouge/lexers/css.rb +273 -0
  180. data/lib/rouge/lexers/d.rb +176 -0
  181. data/lib/rouge/lexers/dart.rb +104 -0
  182. data/lib/rouge/lexers/diff.rb +31 -0
  183. data/lib/rouge/lexers/digdag.rb +68 -0
  184. data/lib/rouge/lexers/docker.rb +50 -0
  185. data/lib/rouge/lexers/dot.rb +68 -0
  186. data/lib/rouge/lexers/ecl.rb +138 -0
  187. data/lib/rouge/lexers/eiffel.rb +65 -0
  188. data/lib/rouge/lexers/elixir.rb +133 -0
  189. data/lib/rouge/lexers/elm.rb +89 -0
  190. data/lib/rouge/lexers/erb.rb +52 -0
  191. data/lib/rouge/lexers/erlang.rb +114 -0
  192. data/lib/rouge/lexers/factor.rb +302 -0
  193. data/lib/rouge/lexers/fortran.rb +176 -0
  194. data/lib/rouge/lexers/fsharp.rb +118 -0
  195. data/lib/rouge/lexers/gherkin.rb +137 -0
  196. data/lib/rouge/lexers/gherkin/keywords.rb +14 -0
  197. data/lib/rouge/lexers/glsl.rb +135 -0
  198. data/lib/rouge/lexers/go.rb +174 -0
  199. data/lib/rouge/lexers/gradle.rb +37 -0
  200. data/lib/rouge/lexers/graphql.rb +243 -0
  201. data/lib/rouge/lexers/groovy.rb +112 -0
  202. data/lib/rouge/lexers/hack.rb +48 -0
  203. data/lib/rouge/lexers/haml.rb +229 -0
  204. data/lib/rouge/lexers/handlebars.rb +79 -0
  205. data/lib/rouge/lexers/haskell.rb +182 -0
  206. data/lib/rouge/lexers/html.rb +139 -0
  207. data/lib/rouge/lexers/http.rb +80 -0
  208. data/lib/rouge/lexers/hylang.rb +93 -0
  209. data/lib/rouge/lexers/idlang.rb +310 -0
  210. data/lib/rouge/lexers/igorpro.rb +408 -0
  211. data/lib/rouge/lexers/ini.rb +53 -0
  212. data/lib/rouge/lexers/io.rb +68 -0
  213. data/lib/rouge/lexers/irb.rb +66 -0
  214. data/lib/rouge/lexers/java.rb +87 -0
  215. data/lib/rouge/lexers/javascript.rb +281 -0
  216. data/lib/rouge/lexers/jinja.rb +137 -0
  217. data/lib/rouge/lexers/json.rb +29 -0
  218. data/lib/rouge/lexers/json_doc.rb +23 -0
  219. data/lib/rouge/lexers/jsonnet.rb +151 -0
  220. data/lib/rouge/lexers/jsx.rb +102 -0
  221. data/lib/rouge/lexers/julia.rb +172 -0
  222. data/lib/rouge/lexers/kotlin.rb +79 -0
  223. data/lib/rouge/lexers/lasso.rb +214 -0
  224. data/lib/rouge/lexers/lasso/keywords.yml +446 -0
  225. data/lib/rouge/lexers/liquid.rb +287 -0
  226. data/lib/rouge/lexers/literate_coffeescript.rb +33 -0
  227. data/lib/rouge/lexers/literate_haskell.rb +36 -0
  228. data/lib/rouge/lexers/llvm.rb +80 -0
  229. data/lib/rouge/lexers/lua.rb +125 -0
  230. data/lib/rouge/lexers/lua/builtins.rb +22 -0
  231. data/lib/rouge/lexers/make.rb +112 -0
  232. data/lib/rouge/lexers/markdown.rb +154 -0
  233. data/lib/rouge/lexers/matlab.rb +71 -0
  234. data/lib/rouge/lexers/matlab/builtins.rb +11 -0
  235. data/lib/rouge/lexers/moonscript.rb +114 -0
  236. data/lib/rouge/lexers/mosel.rb +231 -0
  237. data/lib/rouge/lexers/mxml.rb +68 -0
  238. data/lib/rouge/lexers/nasm.rb +198 -0
  239. data/lib/rouge/lexers/nginx.rb +71 -0
  240. data/lib/rouge/lexers/nim.rb +152 -0
  241. data/lib/rouge/lexers/nix.rb +205 -0
  242. data/lib/rouge/lexers/objective_c.rb +194 -0
  243. data/lib/rouge/lexers/ocaml.rb +100 -0
  244. data/lib/rouge/lexers/pascal.rb +66 -0
  245. data/lib/rouge/lexers/perl.rb +196 -0
  246. data/lib/rouge/lexers/php.rb +193 -0
  247. data/lib/rouge/lexers/php/builtins.rb +194 -0
  248. data/lib/rouge/lexers/plain_text.rb +26 -0
  249. data/lib/rouge/lexers/plist.rb +45 -0
  250. data/lib/rouge/lexers/pony.rb +93 -0
  251. data/lib/rouge/lexers/powershell.rb +678 -0
  252. data/lib/rouge/lexers/praat.rb +350 -0
  253. data/lib/rouge/lexers/prolog.rb +59 -0
  254. data/lib/rouge/lexers/prometheus.rb +121 -0
  255. data/lib/rouge/lexers/properties.rb +51 -0
  256. data/lib/rouge/lexers/protobuf.rb +70 -0
  257. data/lib/rouge/lexers/puppet.rb +128 -0
  258. data/lib/rouge/lexers/python.rb +232 -0
  259. data/lib/rouge/lexers/q.rb +123 -0
  260. data/lib/rouge/lexers/qml.rb +73 -0
  261. data/lib/rouge/lexers/r.rb +89 -0
  262. data/lib/rouge/lexers/racket.rb +543 -0
  263. data/lib/rouge/lexers/ruby.rb +437 -0
  264. data/lib/rouge/lexers/rust.rb +192 -0
  265. data/lib/rouge/lexers/sass.rb +74 -0
  266. data/lib/rouge/lexers/sass/common.rb +180 -0
  267. data/lib/rouge/lexers/scala.rb +142 -0
  268. data/lib/rouge/lexers/scheme.rb +112 -0
  269. data/lib/rouge/lexers/scss.rb +34 -0
  270. data/lib/rouge/lexers/sed.rb +172 -0
  271. data/lib/rouge/lexers/shell.rb +189 -0
  272. data/lib/rouge/lexers/sieve.rb +96 -0
  273. data/lib/rouge/lexers/slim.rb +228 -0
  274. data/lib/rouge/lexers/smalltalk.rb +116 -0
  275. data/lib/rouge/lexers/smarty.rb +80 -0
  276. data/lib/rouge/lexers/sml.rb +344 -0
  277. data/lib/rouge/lexers/sql.rb +140 -0
  278. data/lib/rouge/lexers/swift.rb +181 -0
  279. data/lib/rouge/lexers/tap.rb +87 -0
  280. data/lib/rouge/lexers/tcl.rb +192 -0
  281. data/lib/rouge/lexers/tex.rb +69 -0
  282. data/lib/rouge/lexers/toml.rb +67 -0
  283. data/lib/rouge/lexers/tsx.rb +19 -0
  284. data/lib/rouge/lexers/tulip.rb +106 -0
  285. data/lib/rouge/lexers/turtle.rb +63 -0
  286. data/lib/rouge/lexers/twig.rb +39 -0
  287. data/lib/rouge/lexers/typescript.rb +22 -0
  288. data/lib/rouge/lexers/typescript/common.rb +33 -0
  289. data/lib/rouge/lexers/vala.rb +77 -0
  290. data/lib/rouge/lexers/vb.rb +164 -0
  291. data/lib/rouge/lexers/verilog.rb +164 -0
  292. data/lib/rouge/lexers/vhdl.rb +97 -0
  293. data/lib/rouge/lexers/viml.rb +101 -0
  294. data/lib/rouge/lexers/viml/keywords.rb +12 -0
  295. data/lib/rouge/lexers/vue.rb +122 -0
  296. data/lib/rouge/lexers/wollok.rb +103 -0
  297. data/lib/rouge/lexers/xml.rb +57 -0
  298. data/lib/rouge/lexers/yaml.rb +373 -0
  299. data/lib/rouge/plugins/redcarpet.rb +30 -0
  300. data/lib/rouge/regex_lexer.rb +441 -0
  301. data/lib/rouge/template_lexer.rb +20 -0
  302. data/lib/rouge/text_analyzer.rb +49 -0
  303. data/lib/rouge/theme.rb +213 -0
  304. data/lib/rouge/themes/base16.rb +130 -0
  305. data/lib/rouge/themes/colorful.rb +67 -0
  306. data/lib/rouge/themes/github.rb +71 -0
  307. data/lib/rouge/themes/gruvbox.rb +167 -0
  308. data/lib/rouge/themes/igor_pro.rb +20 -0
  309. data/lib/rouge/themes/molokai.rb +82 -0
  310. data/lib/rouge/themes/monokai.rb +92 -0
  311. data/lib/rouge/themes/monokai_sublime.rb +90 -0
  312. data/lib/rouge/themes/pastie.rb +69 -0
  313. data/lib/rouge/themes/thankful_eyes.rb +74 -0
  314. data/lib/rouge/themes/tulip.rb +69 -0
  315. data/lib/rouge/token.rb +182 -0
  316. data/lib/rouge/util.rb +101 -0
  317. data/lib/rouge/version.rb +7 -0
  318. data/rouge.gemspec +23 -0
  319. metadata +365 -0
@@ -0,0 +1,30 @@
1
+ # -*- coding: utf-8 -*- #
2
+
3
+ # this file is not require'd from the root. To use this plugin, run:
4
+ #
5
+ # require 'rouge/plugins/redcarpet'
6
+
7
+ module Rouge
8
+ module Plugins
9
+ module Redcarpet
10
+ def block_code(code, language)
11
+ lexer = Lexer.find_fancy(language, code) || Lexers::PlainText
12
+
13
+ # XXX HACK: Redcarpet strips hard tabs out of code blocks,
14
+ # so we assume you're not using leading spaces that aren't tabs,
15
+ # and just replace them here.
16
+ if lexer.tag == 'make'
17
+ code.gsub! /^ /, "\t"
18
+ end
19
+
20
+ formatter = rouge_formatter(lexer)
21
+ formatter.format(lexer.lex(code))
22
+ end
23
+
24
+ # override this method for custom formatting behavior
25
+ def rouge_formatter(lexer)
26
+ Formatters::HTMLLegacy.new(:css_class => "highlight #{lexer.tag}")
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,441 @@
1
+ # -*- coding: utf-8 -*- #
2
+
3
+ module Rouge
4
+ # @abstract
5
+ # A stateful lexer that uses sets of regular expressions to
6
+ # tokenize a string. Most lexers are instances of RegexLexer.
7
+ class RegexLexer < Lexer
8
+ # A rule is a tuple of a regular expression to test, and a callback
9
+ # to perform if the test succeeds.
10
+ #
11
+ # @see StateDSL#rule
12
+ class Rule
13
+ attr_reader :callback
14
+ attr_reader :re
15
+ attr_reader :beginning_of_line
16
+ def initialize(re, callback)
17
+ @re = re
18
+ @callback = callback
19
+ @beginning_of_line = re.source[0] == ?^
20
+ end
21
+
22
+ def inspect
23
+ "#<Rule #{@re.inspect}>"
24
+ end
25
+ end
26
+
27
+ # a State is a named set of rules that can be tested for or
28
+ # mixed in.
29
+ #
30
+ # @see RegexLexer.state
31
+ class State
32
+ attr_reader :name, :rules
33
+ def initialize(name, rules)
34
+ @name = name
35
+ @rules = rules
36
+ end
37
+
38
+ def inspect
39
+ "#<#{self.class.name} #{@name.inspect}>"
40
+ end
41
+ end
42
+
43
+ class StateDSL
44
+ attr_reader :rules
45
+ def initialize(name, &defn)
46
+ @name = name
47
+ @defn = defn
48
+ @rules = []
49
+ @loaded = false
50
+ end
51
+
52
+ def to_state(lexer_class)
53
+ load!
54
+ rules = @rules.map do |rule|
55
+ rule.is_a?(String) ? lexer_class.get_state(rule) : rule
56
+ end
57
+ State.new(@name, rules)
58
+ end
59
+
60
+ def prepended(&defn)
61
+ parent_defn = @defn
62
+ StateDSL.new(@name) do
63
+ instance_eval(&defn)
64
+ instance_eval(&parent_defn)
65
+ end
66
+ end
67
+
68
+ def appended(&defn)
69
+ parent_defn = @defn
70
+ StateDSL.new(@name) do
71
+ instance_eval(&parent_defn)
72
+ instance_eval(&defn)
73
+ end
74
+ end
75
+
76
+ protected
77
+ # Define a new rule for this state.
78
+ #
79
+ # @overload rule(re, token, next_state=nil)
80
+ # @overload rule(re, &callback)
81
+ #
82
+ # @param [Regexp] re
83
+ # a regular expression for this rule to test.
84
+ # @param [String] tok
85
+ # the token type to yield if `re` matches.
86
+ # @param [#to_s] next_state
87
+ # (optional) a state to push onto the stack if `re` matches.
88
+ # If `next_state` is `:pop!`, the state stack will be popped
89
+ # instead.
90
+ # @param [Proc] callback
91
+ # a block that will be evaluated in the context of the lexer
92
+ # if `re` matches. This block has access to a number of lexer
93
+ # methods, including {RegexLexer#push}, {RegexLexer#pop!},
94
+ # {RegexLexer#token}, and {RegexLexer#delegate}. The first
95
+ # argument can be used to access the match groups.
96
+ def rule(re, tok=nil, next_state=nil, &callback)
97
+ if tok.nil? && callback.nil?
98
+ raise "please pass `rule` a token to yield or a callback"
99
+ end
100
+
101
+ callback ||= case next_state
102
+ when :pop!
103
+ proc do |stream|
104
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
105
+ @output_stream.call(tok, stream[0])
106
+ puts " popping stack: 1" if @debug
107
+ @stack.pop or raise 'empty stack!'
108
+ end
109
+ when :push
110
+ proc do |stream|
111
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
112
+ @output_stream.call(tok, stream[0])
113
+ puts " pushing :#{@stack.last.name}" if @debug
114
+ @stack.push(@stack.last)
115
+ end
116
+ when Symbol
117
+ proc do |stream|
118
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
119
+ @output_stream.call(tok, stream[0])
120
+ state = @states[next_state] || self.class.get_state(next_state)
121
+ puts " pushing :#{state.name}" if @debug
122
+ @stack.push(state)
123
+ end
124
+ when nil
125
+ proc do |stream|
126
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
127
+ @output_stream.call(tok, stream[0])
128
+ end
129
+ else
130
+ raise "invalid next state: #{next_state.inspect}"
131
+ end
132
+
133
+ rules << Rule.new(re, callback)
134
+ end
135
+
136
+ # Mix in the rules from another state into this state. The rules
137
+ # from the mixed-in state will be tried in order before moving on
138
+ # to the rest of the rules in this state.
139
+ def mixin(state)
140
+ rules << state.to_s
141
+ end
142
+
143
+ private
144
+ def load!
145
+ return if @loaded
146
+ @loaded = true
147
+ instance_eval(&@defn)
148
+ end
149
+ end
150
+
151
+ # The states hash for this lexer.
152
+ # @see state
153
+ def self.states
154
+ @states ||= {}
155
+ end
156
+
157
+ def self.state_definitions
158
+ @state_definitions ||= InheritableHash.new(superclass.state_definitions)
159
+ end
160
+ @state_definitions = {}
161
+
162
+ def self.replace_state(name, new_defn)
163
+ states[name] = nil
164
+ state_definitions[name] = new_defn
165
+ end
166
+
167
+ # The routines to run at the beginning of a fresh lex.
168
+ # @see start
169
+ def self.start_procs
170
+ @start_procs ||= InheritableList.new(superclass.start_procs)
171
+ end
172
+ @start_procs = []
173
+
174
+ # Specify an action to be run every fresh lex.
175
+ #
176
+ # @example
177
+ # start { puts "I'm lexing a new string!" }
178
+ def self.start(&b)
179
+ start_procs << b
180
+ end
181
+
182
+ # Define a new state for this lexer with the given name.
183
+ # The block will be evaluated in the context of a {StateDSL}.
184
+ def self.state(name, &b)
185
+ name = name.to_s
186
+ state_definitions[name] = StateDSL.new(name, &b)
187
+ end
188
+
189
+ def self.prepend(name, &b)
190
+ name = name.to_s
191
+ dsl = state_definitions[name] or raise "no such state #{name.inspect}"
192
+ replace_state(name, dsl.prepended(&b))
193
+ end
194
+
195
+ def self.append(name, &b)
196
+ name = name.to_s
197
+ dsl = state_definitions[name] or raise "no such state #{name.inspect}"
198
+ replace_state(name, dsl.appended(&b))
199
+ end
200
+
201
+ # @private
202
+ def self.get_state(name)
203
+ return name if name.is_a? State
204
+
205
+ states[name.to_sym] ||= begin
206
+ defn = state_definitions[name.to_s] or raise "unknown state: #{name.inspect}"
207
+ defn.to_state(self)
208
+ end
209
+ end
210
+
211
+ # @private
212
+ def get_state(state_name)
213
+ self.class.get_state(state_name)
214
+ end
215
+
216
+ # The state stack. This is initially the single state `[:root]`.
217
+ # It is an error for this stack to be empty.
218
+ # @see #state
219
+ def stack
220
+ @stack ||= [get_state(:root)]
221
+ end
222
+
223
+ # The current state - i.e. one on top of the state stack.
224
+ #
225
+ # NB: if the state stack is empty, this will throw an error rather
226
+ # than returning nil.
227
+ def state
228
+ stack.last or raise 'empty stack!'
229
+ end
230
+
231
+ # reset this lexer to its initial state. This runs all of the
232
+ # start_procs.
233
+ def reset!
234
+ @stack = nil
235
+ @current_stream = nil
236
+
237
+ puts "start blocks" if @debug && self.class.start_procs.any?
238
+ self.class.start_procs.each do |pr|
239
+ instance_eval(&pr)
240
+ end
241
+ end
242
+
243
+ # This implements the lexer protocol, by yielding [token, value] pairs.
244
+ #
245
+ # The process for lexing works as follows, until the stream is empty:
246
+ #
247
+ # 1. We look at the state on top of the stack (which by default is
248
+ # `[:root]`).
249
+ # 2. Each rule in that state is tried until one is successful. If one
250
+ # is found, that rule's callback is evaluated - which may yield
251
+ # tokens and manipulate the state stack. Otherwise, one character
252
+ # is consumed with an `'Error'` token, and we continue at (1.)
253
+ #
254
+ # @see #step #step (where (2.) is implemented)
255
+ def stream_tokens(str, &b)
256
+ stream = StringScanner.new(str)
257
+
258
+ @current_stream = stream
259
+ @output_stream = b
260
+ @states = self.class.states
261
+ @null_steps = 0
262
+
263
+ until stream.eos?
264
+ if @debug
265
+ puts "lexer: #{self.class.tag}"
266
+ puts "stack: #{stack.map(&:name).map(&:to_sym).inspect}"
267
+ puts "stream: #{stream.peek(20).inspect}"
268
+ end
269
+
270
+ success = step(state, stream)
271
+
272
+ if !success
273
+ puts " no match, yielding Error" if @debug
274
+ b.call(Token::Tokens::Error, stream.getch)
275
+ end
276
+ end
277
+ end
278
+
279
+ # The number of successive scans permitted without consuming
280
+ # the input stream. If this is exceeded, the match fails.
281
+ MAX_NULL_SCANS = 5
282
+
283
+ # Runs one step of the lex. Rules in the current state are tried
284
+ # until one matches, at which point its callback is called.
285
+ #
286
+ # @return true if a rule was tried successfully
287
+ # @return false otherwise.
288
+ def step(state, stream)
289
+ state.rules.each do |rule|
290
+ if rule.is_a?(State)
291
+ puts " entering mixin #{rule.name}" if @debug
292
+ return true if step(rule, stream)
293
+ puts " exiting mixin #{rule.name}" if @debug
294
+ else
295
+ puts " trying #{rule.inspect}" if @debug
296
+
297
+ # XXX HACK XXX
298
+ # StringScanner's implementation of ^ is b0rken.
299
+ # see http://bugs.ruby-lang.org/issues/7092
300
+ # TODO: this doesn't cover cases like /(a|^b)/, but it's
301
+ # the most common, for now...
302
+ next if rule.beginning_of_line && !stream.beginning_of_line?
303
+
304
+ if (size = stream.skip(rule.re))
305
+ puts " got #{stream[0].inspect}" if @debug
306
+
307
+ instance_exec(stream, &rule.callback)
308
+
309
+ if size.zero?
310
+ @null_steps += 1
311
+ if @null_steps > MAX_NULL_SCANS
312
+ puts " too many scans without consuming the string!" if @debug
313
+ return false
314
+ end
315
+ else
316
+ @null_steps = 0
317
+ end
318
+
319
+ return true
320
+ end
321
+ end
322
+ end
323
+
324
+ false
325
+ end
326
+
327
+ # Yield a token.
328
+ #
329
+ # @param tok
330
+ # the token type
331
+ # @param val
332
+ # (optional) the string value to yield. If absent, this defaults
333
+ # to the entire last match.
334
+ def token(tok, val=@current_stream[0])
335
+ yield_token(tok, val)
336
+ end
337
+
338
+ # @deprecated
339
+ #
340
+ # Yield a token with the next matched group. Subsequent calls
341
+ # to this method will yield subsequent groups.
342
+ def group(tok)
343
+ raise "RegexLexer#group is deprecated: use #groups instead"
344
+ end
345
+
346
+ # Yield tokens corresponding to the matched groups of the current
347
+ # match.
348
+ def groups(*tokens)
349
+ tokens.each_with_index do |tok, i|
350
+ yield_token(tok, @current_stream[i+1])
351
+ end
352
+ end
353
+
354
+ # Delegate the lex to another lexer. The #lex method will be called
355
+ # with `:continue` set to true, so that #reset! will not be called.
356
+ # In this way, a single lexer can be repeatedly delegated to while
357
+ # maintaining its own internal state stack.
358
+ #
359
+ # @param [#lex] lexer
360
+ # The lexer or lexer class to delegate to
361
+ # @param [String] text
362
+ # The text to delegate. This defaults to the last matched string.
363
+ def delegate(lexer, text=nil)
364
+ puts " delegating to #{lexer.inspect}" if @debug
365
+ text ||= @current_stream[0]
366
+
367
+ lexer.lex(text, :continue => true) do |tok, val|
368
+ puts " delegated token: #{tok.inspect}, #{val.inspect}" if @debug
369
+ yield_token(tok, val)
370
+ end
371
+ end
372
+
373
+ def recurse(text=nil)
374
+ delegate(self.class, text)
375
+ end
376
+
377
+ # Push a state onto the stack. If no state name is given and you've
378
+ # passed a block, a state will be dynamically created using the
379
+ # {StateDSL}.
380
+ def push(state_name=nil, &b)
381
+ push_state = if state_name
382
+ get_state(state_name)
383
+ elsif block_given?
384
+ StateDSL.new(b.inspect, &b).to_state(self.class)
385
+ else
386
+ # use the top of the stack by default
387
+ self.state
388
+ end
389
+
390
+ puts " pushing :#{push_state.name}" if @debug
391
+ stack.push(push_state)
392
+ end
393
+
394
+ # Pop the state stack. If a number is passed in, it will be popped
395
+ # that number of times.
396
+ def pop!(times=1)
397
+ raise 'empty stack!' if stack.empty?
398
+
399
+ puts " popping stack: #{times}" if @debug
400
+
401
+ stack.pop(times)
402
+
403
+ nil
404
+ end
405
+
406
+ # replace the head of the stack with the given state
407
+ def goto(state_name)
408
+ raise 'empty stack!' if stack.empty?
409
+
410
+ puts " going to state :#{state_name} " if @debug
411
+ stack[-1] = get_state(state_name)
412
+ end
413
+
414
+ # reset the stack back to `[:root]`.
415
+ def reset_stack
416
+ puts ' resetting stack' if @debug
417
+ stack.clear
418
+ stack.push get_state(:root)
419
+ end
420
+
421
+ # Check if `state_name` is in the state stack.
422
+ def in_state?(state_name)
423
+ state_name = state_name.to_s
424
+ stack.any? do |state|
425
+ state.name == state_name.to_s
426
+ end
427
+ end
428
+
429
+ # Check if `state_name` is the state on top of the state stack.
430
+ def state?(state_name)
431
+ state_name.to_s == state.name
432
+ end
433
+
434
+ private
435
+ def yield_token(tok, val)
436
+ return if val.nil? || val.empty?
437
+ puts " yielding #{tok.qualname}, #{val.inspect}" if @debug
438
+ @output_stream.yield(tok, val)
439
+ end
440
+ end
441
+ end