rougegal 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +26 -0
  3. data/LICENSE +186 -0
  4. data/bin/rougify +17 -0
  5. data/lib/rouge/cli.rb +394 -0
  6. data/lib/rouge/demos/abap +6 -0
  7. data/lib/rouge/demos/actionscript +4 -0
  8. data/lib/rouge/demos/apache +21 -0
  9. data/lib/rouge/demos/apiblueprint +33 -0
  10. data/lib/rouge/demos/applescript +2 -0
  11. data/lib/rouge/demos/awk +4 -0
  12. data/lib/rouge/demos/biml +38 -0
  13. data/lib/rouge/demos/bsl +7 -0
  14. data/lib/rouge/demos/c +8 -0
  15. data/lib/rouge/demos/ceylon +7 -0
  16. data/lib/rouge/demos/cfscript +18 -0
  17. data/lib/rouge/demos/clojure +5 -0
  18. data/lib/rouge/demos/cmake +7 -0
  19. data/lib/rouge/demos/coffeescript +5 -0
  20. data/lib/rouge/demos/common_lisp +1 -0
  21. data/lib/rouge/demos/conf +4 -0
  22. data/lib/rouge/demos/console +6 -0
  23. data/lib/rouge/demos/coq +13 -0
  24. data/lib/rouge/demos/cpp +8 -0
  25. data/lib/rouge/demos/csharp +5 -0
  26. data/lib/rouge/demos/css +4 -0
  27. data/lib/rouge/demos/d +16 -0
  28. data/lib/rouge/demos/dart +6 -0
  29. data/lib/rouge/demos/diff +7 -0
  30. data/lib/rouge/demos/digdag +19 -0
  31. data/lib/rouge/demos/docker +9 -0
  32. data/lib/rouge/demos/dot +5 -0
  33. data/lib/rouge/demos/eiffel +30 -0
  34. data/lib/rouge/demos/elixir +1 -0
  35. data/lib/rouge/demos/erb +1 -0
  36. data/lib/rouge/demos/erlang +7 -0
  37. data/lib/rouge/demos/factor +5 -0
  38. data/lib/rouge/demos/fortran +22 -0
  39. data/lib/rouge/demos/fsharp +12 -0
  40. data/lib/rouge/demos/gal +22 -0
  41. data/lib/rouge/demos/gherkin +17 -0
  42. data/lib/rouge/demos/glsl +14 -0
  43. data/lib/rouge/demos/go +7 -0
  44. data/lib/rouge/demos/gradle +10 -0
  45. data/lib/rouge/demos/graphql +17 -0
  46. data/lib/rouge/demos/groovy +9 -0
  47. data/lib/rouge/demos/haml +5 -0
  48. data/lib/rouge/demos/handlebars +7 -0
  49. data/lib/rouge/demos/haskell +6 -0
  50. data/lib/rouge/demos/html +8 -0
  51. data/lib/rouge/demos/http +14 -0
  52. data/lib/rouge/demos/hylang +10 -0
  53. data/lib/rouge/demos/idlang +8 -0
  54. data/lib/rouge/demos/igorpro +9 -0
  55. data/lib/rouge/demos/ini +4 -0
  56. data/lib/rouge/demos/io +11 -0
  57. data/lib/rouge/demos/irb +4 -0
  58. data/lib/rouge/demos/irb_output +2 -0
  59. data/lib/rouge/demos/java +5 -0
  60. data/lib/rouge/demos/javascript +1 -0
  61. data/lib/rouge/demos/jinja +9 -0
  62. data/lib/rouge/demos/json +1 -0
  63. data/lib/rouge/demos/json-doc +1 -0
  64. data/lib/rouge/demos/jsonnet +28 -0
  65. data/lib/rouge/demos/jsx +17 -0
  66. data/lib/rouge/demos/julia +11 -0
  67. data/lib/rouge/demos/kotlin +3 -0
  68. data/lib/rouge/demos/lasso +12 -0
  69. data/lib/rouge/demos/liquid +11 -0
  70. data/lib/rouge/demos/literate_coffeescript +3 -0
  71. data/lib/rouge/demos/literate_haskell +7 -0
  72. data/lib/rouge/demos/llvm +20 -0
  73. data/lib/rouge/demos/lua +12 -0
  74. data/lib/rouge/demos/make +6 -0
  75. data/lib/rouge/demos/markdown +4 -0
  76. data/lib/rouge/demos/matlab +6 -0
  77. data/lib/rouge/demos/moonscript +16 -0
  78. data/lib/rouge/demos/mosel +10 -0
  79. data/lib/rouge/demos/mxml +22 -0
  80. data/lib/rouge/demos/nasm +26 -0
  81. data/lib/rouge/demos/nginx +5 -0
  82. data/lib/rouge/demos/nim +27 -0
  83. data/lib/rouge/demos/objective_c +18 -0
  84. data/lib/rouge/demos/ocaml +12 -0
  85. data/lib/rouge/demos/pascal +14 -0
  86. data/lib/rouge/demos/perl +5 -0
  87. data/lib/rouge/demos/php +3 -0
  88. data/lib/rouge/demos/plaintext +1 -0
  89. data/lib/rouge/demos/plist +142 -0
  90. data/lib/rouge/demos/pony +17 -0
  91. data/lib/rouge/demos/powershell +49 -0
  92. data/lib/rouge/demos/praat +26 -0
  93. data/lib/rouge/demos/prolog +9 -0
  94. data/lib/rouge/demos/prometheus +9 -0
  95. data/lib/rouge/demos/properties +7 -0
  96. data/lib/rouge/demos/protobuf +5 -0
  97. data/lib/rouge/demos/puppet +6 -0
  98. data/lib/rouge/demos/python +6 -0
  99. data/lib/rouge/demos/q +2 -0
  100. data/lib/rouge/demos/qml +9 -0
  101. data/lib/rouge/demos/r +8 -0
  102. data/lib/rouge/demos/racket +24 -0
  103. data/lib/rouge/demos/ruby +9 -0
  104. data/lib/rouge/demos/rust +12 -0
  105. data/lib/rouge/demos/sass +3 -0
  106. data/lib/rouge/demos/scala +3 -0
  107. data/lib/rouge/demos/scheme +4 -0
  108. data/lib/rouge/demos/scss +5 -0
  109. data/lib/rouge/demos/sed +4 -0
  110. data/lib/rouge/demos/shell +2 -0
  111. data/lib/rouge/demos/sieve +10 -0
  112. data/lib/rouge/demos/slim +17 -0
  113. data/lib/rouge/demos/smalltalk +6 -0
  114. data/lib/rouge/demos/smarty +12 -0
  115. data/lib/rouge/demos/sml +4 -0
  116. data/lib/rouge/demos/sql +1 -0
  117. data/lib/rouge/demos/swift +5 -0
  118. data/lib/rouge/demos/tap +5 -0
  119. data/lib/rouge/demos/tcl +1 -0
  120. data/lib/rouge/demos/tex +1 -0
  121. data/lib/rouge/demos/toml +9 -0
  122. data/lib/rouge/demos/tsx +17 -0
  123. data/lib/rouge/demos/tulip +13 -0
  124. data/lib/rouge/demos/turtle +26 -0
  125. data/lib/rouge/demos/twig +9 -0
  126. data/lib/rouge/demos/typescript +1 -0
  127. data/lib/rouge/demos/vala +8 -0
  128. data/lib/rouge/demos/vb +4 -0
  129. data/lib/rouge/demos/verilog +27 -0
  130. data/lib/rouge/demos/vhdl +23 -0
  131. data/lib/rouge/demos/viml +14 -0
  132. data/lib/rouge/demos/vue +11 -0
  133. data/lib/rouge/demos/wollok +11 -0
  134. data/lib/rouge/demos/xml +2 -0
  135. data/lib/rouge/demos/yaml +4 -0
  136. data/lib/rouge/formatter.rb +75 -0
  137. data/lib/rouge/formatters/html.rb +37 -0
  138. data/lib/rouge/formatters/html_inline.rb +30 -0
  139. data/lib/rouge/formatters/html_legacy.rb +44 -0
  140. data/lib/rouge/formatters/html_linewise.rb +27 -0
  141. data/lib/rouge/formatters/html_pygments.rb +16 -0
  142. data/lib/rouge/formatters/html_table.rb +61 -0
  143. data/lib/rouge/formatters/null.rb +19 -0
  144. data/lib/rouge/formatters/terminal256.rb +172 -0
  145. data/lib/rouge/guesser.rb +55 -0
  146. data/lib/rouge/guessers/filename.rb +25 -0
  147. data/lib/rouge/guessers/glob_mapping.rb +46 -0
  148. data/lib/rouge/guessers/mimetype.rb +14 -0
  149. data/lib/rouge/guessers/modeline.rb +42 -0
  150. data/lib/rouge/guessers/source.rb +39 -0
  151. data/lib/rouge/lexer.rb +452 -0
  152. data/lib/rouge/lexers/abap.rb +238 -0
  153. data/lib/rouge/lexers/actionscript.rb +195 -0
  154. data/lib/rouge/lexers/apache.rb +71 -0
  155. data/lib/rouge/lexers/apache/keywords.yml +764 -0
  156. data/lib/rouge/lexers/apiblueprint.rb +51 -0
  157. data/lib/rouge/lexers/apple_script.rb +367 -0
  158. data/lib/rouge/lexers/awk.rb +161 -0
  159. data/lib/rouge/lexers/biml.rb +41 -0
  160. data/lib/rouge/lexers/bsl.rb +81 -0
  161. data/lib/rouge/lexers/c.rb +217 -0
  162. data/lib/rouge/lexers/ceylon.rb +123 -0
  163. data/lib/rouge/lexers/cfscript.rb +153 -0
  164. data/lib/rouge/lexers/clojure.rb +112 -0
  165. data/lib/rouge/lexers/cmake.rb +206 -0
  166. data/lib/rouge/lexers/coffeescript.rb +174 -0
  167. data/lib/rouge/lexers/common_lisp.rb +345 -0
  168. data/lib/rouge/lexers/conf.rb +24 -0
  169. data/lib/rouge/lexers/console.rb +136 -0
  170. data/lib/rouge/lexers/coq.rb +191 -0
  171. data/lib/rouge/lexers/cpp.rb +78 -0
  172. data/lib/rouge/lexers/csharp.rb +114 -0
  173. data/lib/rouge/lexers/css.rb +273 -0
  174. data/lib/rouge/lexers/d.rb +176 -0
  175. data/lib/rouge/lexers/dart.rb +104 -0
  176. data/lib/rouge/lexers/diff.rb +31 -0
  177. data/lib/rouge/lexers/digdag.rb +72 -0
  178. data/lib/rouge/lexers/docker.rb +50 -0
  179. data/lib/rouge/lexers/dot.rb +68 -0
  180. data/lib/rouge/lexers/eiffel.rb +65 -0
  181. data/lib/rouge/lexers/elixir.rb +133 -0
  182. data/lib/rouge/lexers/erb.rb +56 -0
  183. data/lib/rouge/lexers/erlang.rb +118 -0
  184. data/lib/rouge/lexers/factor.rb +302 -0
  185. data/lib/rouge/lexers/fortran.rb +170 -0
  186. data/lib/rouge/lexers/fsharp.rb +118 -0
  187. data/lib/rouge/lexers/gal.rb +50 -0
  188. data/lib/rouge/lexers/gherkin.rb +137 -0
  189. data/lib/rouge/lexers/gherkin/keywords.rb +14 -0
  190. data/lib/rouge/lexers/glsl.rb +135 -0
  191. data/lib/rouge/lexers/go.rb +178 -0
  192. data/lib/rouge/lexers/gradle.rb +37 -0
  193. data/lib/rouge/lexers/graphql.rb +243 -0
  194. data/lib/rouge/lexers/groovy.rb +112 -0
  195. data/lib/rouge/lexers/haml.rb +233 -0
  196. data/lib/rouge/lexers/handlebars.rb +79 -0
  197. data/lib/rouge/lexers/haskell.rb +183 -0
  198. data/lib/rouge/lexers/html.rb +138 -0
  199. data/lib/rouge/lexers/http.rb +80 -0
  200. data/lib/rouge/lexers/hylang.rb +93 -0
  201. data/lib/rouge/lexers/idlang.rb +316 -0
  202. data/lib/rouge/lexers/igorpro.rb +407 -0
  203. data/lib/rouge/lexers/ini.rb +57 -0
  204. data/lib/rouge/lexers/io.rb +68 -0
  205. data/lib/rouge/lexers/irb.rb +66 -0
  206. data/lib/rouge/lexers/java.rb +87 -0
  207. data/lib/rouge/lexers/javascript.rb +269 -0
  208. data/lib/rouge/lexers/jinja.rb +137 -0
  209. data/lib/rouge/lexers/json.rb +29 -0
  210. data/lib/rouge/lexers/json_doc.rb +23 -0
  211. data/lib/rouge/lexers/jsonnet.rb +151 -0
  212. data/lib/rouge/lexers/jsx.rb +102 -0
  213. data/lib/rouge/lexers/julia.rb +172 -0
  214. data/lib/rouge/lexers/kotlin.rb +84 -0
  215. data/lib/rouge/lexers/lasso.rb +217 -0
  216. data/lib/rouge/lexers/lasso/keywords.yml +446 -0
  217. data/lib/rouge/lexers/liquid.rb +287 -0
  218. data/lib/rouge/lexers/literate_coffeescript.rb +33 -0
  219. data/lib/rouge/lexers/literate_haskell.rb +36 -0
  220. data/lib/rouge/lexers/llvm.rb +84 -0
  221. data/lib/rouge/lexers/lua.rb +125 -0
  222. data/lib/rouge/lexers/lua/builtins.rb +22 -0
  223. data/lib/rouge/lexers/make.rb +116 -0
  224. data/lib/rouge/lexers/markdown.rb +154 -0
  225. data/lib/rouge/lexers/matlab.rb +75 -0
  226. data/lib/rouge/lexers/matlab/builtins.rb +11 -0
  227. data/lib/rouge/lexers/moonscript.rb +114 -0
  228. data/lib/rouge/lexers/mosel.rb +231 -0
  229. data/lib/rouge/lexers/mxml.rb +68 -0
  230. data/lib/rouge/lexers/nasm.rb +203 -0
  231. data/lib/rouge/lexers/nginx.rb +71 -0
  232. data/lib/rouge/lexers/nim.rb +152 -0
  233. data/lib/rouge/lexers/objective_c.rb +208 -0
  234. data/lib/rouge/lexers/ocaml.rb +100 -0
  235. data/lib/rouge/lexers/pascal.rb +66 -0
  236. data/lib/rouge/lexers/perl.rb +197 -0
  237. data/lib/rouge/lexers/php.rb +193 -0
  238. data/lib/rouge/lexers/php/builtins.rb +194 -0
  239. data/lib/rouge/lexers/plain_text.rb +26 -0
  240. data/lib/rouge/lexers/plist.rb +49 -0
  241. data/lib/rouge/lexers/pony.rb +93 -0
  242. data/lib/rouge/lexers/powershell.rb +132 -0
  243. data/lib/rouge/lexers/praat.rb +350 -0
  244. data/lib/rouge/lexers/prolog.rb +64 -0
  245. data/lib/rouge/lexers/prometheus.rb +121 -0
  246. data/lib/rouge/lexers/properties.rb +55 -0
  247. data/lib/rouge/lexers/protobuf.rb +70 -0
  248. data/lib/rouge/lexers/puppet.rb +128 -0
  249. data/lib/rouge/lexers/python.rb +231 -0
  250. data/lib/rouge/lexers/q.rb +124 -0
  251. data/lib/rouge/lexers/qml.rb +73 -0
  252. data/lib/rouge/lexers/r.rb +89 -0
  253. data/lib/rouge/lexers/racket.rb +542 -0
  254. data/lib/rouge/lexers/ruby.rb +437 -0
  255. data/lib/rouge/lexers/rust.rb +192 -0
  256. data/lib/rouge/lexers/sass.rb +74 -0
  257. data/lib/rouge/lexers/sass/common.rb +180 -0
  258. data/lib/rouge/lexers/scala.rb +142 -0
  259. data/lib/rouge/lexers/scheme.rb +112 -0
  260. data/lib/rouge/lexers/scss.rb +34 -0
  261. data/lib/rouge/lexers/sed.rb +172 -0
  262. data/lib/rouge/lexers/shell.rb +180 -0
  263. data/lib/rouge/lexers/sieve.rb +96 -0
  264. data/lib/rouge/lexers/slim.rb +228 -0
  265. data/lib/rouge/lexers/smalltalk.rb +116 -0
  266. data/lib/rouge/lexers/smarty.rb +91 -0
  267. data/lib/rouge/lexers/sml.rb +348 -0
  268. data/lib/rouge/lexers/sql.rb +140 -0
  269. data/lib/rouge/lexers/swift.rb +161 -0
  270. data/lib/rouge/lexers/tap.rb +91 -0
  271. data/lib/rouge/lexers/tcl.rb +192 -0
  272. data/lib/rouge/lexers/tex.rb +69 -0
  273. data/lib/rouge/lexers/toml.rb +71 -0
  274. data/lib/rouge/lexers/tsx.rb +19 -0
  275. data/lib/rouge/lexers/tulip.rb +107 -0
  276. data/lib/rouge/lexers/turtle.rb +72 -0
  277. data/lib/rouge/lexers/twig.rb +39 -0
  278. data/lib/rouge/lexers/typescript.rb +22 -0
  279. data/lib/rouge/lexers/typescript/common.rb +33 -0
  280. data/lib/rouge/lexers/vala.rb +77 -0
  281. data/lib/rouge/lexers/vb.rb +164 -0
  282. data/lib/rouge/lexers/verilog.rb +164 -0
  283. data/lib/rouge/lexers/vhdl.rb +97 -0
  284. data/lib/rouge/lexers/viml.rb +101 -0
  285. data/lib/rouge/lexers/viml/keywords.rb +12 -0
  286. data/lib/rouge/lexers/vue.rb +124 -0
  287. data/lib/rouge/lexers/wollok.rb +107 -0
  288. data/lib/rouge/lexers/xml.rb +59 -0
  289. data/lib/rouge/lexers/yaml.rb +373 -0
  290. data/lib/rouge/plugins/redcarpet.rb +30 -0
  291. data/lib/rouge/regex_lexer.rb +441 -0
  292. data/lib/rouge/template_lexer.rb +20 -0
  293. data/lib/rouge/text_analyzer.rb +48 -0
  294. data/lib/rouge/theme.rb +213 -0
  295. data/lib/rouge/themes/base16.rb +130 -0
  296. data/lib/rouge/themes/colorful.rb +67 -0
  297. data/lib/rouge/themes/github.rb +71 -0
  298. data/lib/rouge/themes/gruvbox.rb +167 -0
  299. data/lib/rouge/themes/igor_pro.rb +20 -0
  300. data/lib/rouge/themes/molokai.rb +82 -0
  301. data/lib/rouge/themes/monokai.rb +92 -0
  302. data/lib/rouge/themes/monokai_sublime.rb +90 -0
  303. data/lib/rouge/themes/pastie.rb +69 -0
  304. data/lib/rouge/themes/thankful_eyes.rb +74 -0
  305. data/lib/rouge/themes/tulip.rb +69 -0
  306. data/lib/rouge/token.rb +182 -0
  307. data/lib/rouge/util.rb +101 -0
  308. data/lib/rouge/version.rb +7 -0
  309. data/lib/rougegal.rb +80 -0
  310. data/rougegal.gemspec +18 -0
  311. metadata +356 -0
@@ -0,0 +1,46 @@
1
+ module Rouge
2
+ module Guessers
3
+ # This class allows for custom behavior
4
+ # with glob -> lexer name mappings
5
+ class GlobMapping < Guesser
6
+ def self.by_pairs(mapping, filename)
7
+ glob_map = {}
8
+ mapping.each do |(glob, lexer_name)|
9
+ lexer = Lexer.find(lexer_name)
10
+
11
+ # ignore unknown lexers
12
+ next unless lexer
13
+
14
+ glob_map[lexer.name] ||= []
15
+ glob_map[lexer.name] << glob
16
+ end
17
+
18
+ new(glob_map, filename)
19
+ end
20
+
21
+ attr_reader :glob_map, :filename
22
+ def initialize(glob_map, filename)
23
+ @glob_map = glob_map
24
+ @filename = filename
25
+ end
26
+
27
+ def filter(lexers)
28
+ basename = File.basename(filename)
29
+
30
+ collect_best(lexers) do |lexer|
31
+ score = (@glob_map[lexer.name] || []).map do |pattern|
32
+ if test_pattern(pattern, basename)
33
+ # specificity is better the fewer wildcards there are
34
+ -pattern.scan(/[*?\[]/).size
35
+ end
36
+ end.compact.min
37
+ end
38
+ end
39
+
40
+ private
41
+ def test_pattern(pattern, path)
42
+ File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,14 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Mimetype < Guesser
4
+ attr_reader :mimetype
5
+ def initialize(mimetype)
6
+ @mimetype = mimetype
7
+ end
8
+
9
+ def filter(lexers)
10
+ lexers.select { |lexer| lexer.mimetypes.include? @mimetype }
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,42 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Modeline < Guesser
4
+ # [jneen] regexen stolen from linguist
5
+ EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
6
+
7
+ # First form vim modeline
8
+ # [text]{white}{vi:|vim:|ex:}[white]{options}
9
+ # ex: 'vim: syntax=ruby'
10
+ VIM_MODELINE_1 = /(?:vim|vi|ex):\s*(?:ft|filetype|syntax)=(\w+)\s?/i
11
+
12
+ # Second form vim modeline (compatible with some versions of Vi)
13
+ # [text]{white}{vi:|vim:|Vim:|ex:}[white]se[t] {options}:[text]
14
+ # ex: 'vim set syntax=ruby:'
15
+ VIM_MODELINE_2 = /(?:vim|vi|Vim|ex):\s*se(?:t)?.*\s(?:ft|filetype|syntax)=(\w+)\s?.*:/i
16
+
17
+ MODELINES = [EMACS_MODELINE, VIM_MODELINE_1, VIM_MODELINE_2]
18
+
19
+ def initialize(source, opts={})
20
+ @source = source
21
+ @lines = opts[:lines] || 5
22
+ end
23
+
24
+ def filter(lexers)
25
+ # don't bother reading the stream if we've already decided
26
+ return lexers if lexers.size == 1
27
+
28
+ source_text = @source
29
+ source_text = source_text.read if source_text.respond_to? :read
30
+
31
+ lines = source_text.split(/\r?\n/)
32
+
33
+ search_space = (lines.first(@lines) + lines.last(@lines)).join("\n")
34
+
35
+ matches = MODELINES.map { |re| re.match(search_space) }.compact
36
+ match_set = Set.new(matches.map { |m| m[1] })
37
+
38
+ lexers.select { |l| (Set.new([l.tag] + l.aliases) & match_set).any? }
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,39 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Source < Guesser
4
+ attr_reader :source
5
+ def initialize(source)
6
+ @source = source
7
+ end
8
+
9
+ def filter(lexers)
10
+ # don't bother reading the input if
11
+ # we've already filtered to 1
12
+ return lexers if lexers.size == 1
13
+
14
+ # If we're filtering against *all* lexers, we only use confident return
15
+ # values from analyze_text. But if we've filtered down already, we can trust
16
+ # the analysis more.
17
+ threshold = lexers.size < 10 ? 0 : 0.5
18
+
19
+ source_text = case @source
20
+ when String
21
+ @source
22
+ when ->(s){ s.respond_to? :read }
23
+ @source.read
24
+ else
25
+ raise 'invalid source'
26
+ end
27
+
28
+ Lexer.assert_utf8!(source_text)
29
+
30
+ source_text = TextAnalyzer.new(source_text)
31
+
32
+ collect_best(lexers, threshold: threshold) do |lexer|
33
+ next unless lexer.methods(false).include? :analyze_text
34
+ lexer.analyze_text(source_text)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,452 @@
1
+ # -*- coding: utf-8 -*- #
2
+
3
+ # stdlib
4
+ require 'strscan'
5
+ require 'cgi'
6
+ require 'set'
7
+
8
+ module Rouge
9
+ # @abstract
10
+ # A lexer transforms text into a stream of `[token, chunk]` pairs.
11
+ class Lexer
12
+ include Token::Tokens
13
+
14
+ @option_docs = {}
15
+
16
+ class << self
17
+ # Lexes `stream` with the given options. The lex is delegated to a
18
+ # new instance.
19
+ #
20
+ # @see #lex
21
+ def lex(stream, opts={}, &b)
22
+ new(opts).lex(stream, &b)
23
+ end
24
+
25
+ # Given a string, return the correct lexer class.
26
+ def find(name)
27
+ registry[name.to_s]
28
+ end
29
+
30
+ # Find a lexer, with fancy shiny features.
31
+ #
32
+ # * The string you pass can include CGI-style options
33
+ #
34
+ # Lexer.find_fancy('erb?parent=tex')
35
+ #
36
+ # * You can pass the special name 'guess' so we guess for you,
37
+ # and you can pass a second argument of the code to guess by
38
+ #
39
+ # Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
40
+ #
41
+ # This is used in the Redcarpet plugin as well as Rouge's own
42
+ # markdown lexer for highlighting internal code blocks.
43
+ #
44
+ def find_fancy(str, code=nil, additional_options={})
45
+ if str && !str.include?('?') && str != 'guess'
46
+ lexer_class = find(str)
47
+ return lexer_class && lexer_class.new(additional_options)
48
+ end
49
+
50
+ name, opts = str ? str.split('?', 2) : [nil, '']
51
+
52
+ # parse the options hash from a cgi-style string
53
+ opts = CGI.parse(opts || '').map do |k, vals|
54
+ val = case vals.size
55
+ when 0 then true
56
+ when 1 then vals[0]
57
+ else vals
58
+ end
59
+
60
+ [ k.to_s, val ]
61
+ end
62
+
63
+ opts = additional_options.merge(Hash[opts])
64
+
65
+ lexer_class = case name
66
+ when 'guess', nil
67
+ self.guess(:source => code, :mimetype => opts['mimetype'])
68
+ when String
69
+ self.find(name)
70
+ end
71
+
72
+ lexer_class && lexer_class.new(opts)
73
+ end
74
+
75
+ # Specify or get this lexer's title. Meant to be human-readable.
76
+ def title(t=nil)
77
+ if t.nil?
78
+ t = tag.capitalize
79
+ end
80
+ @title ||= t
81
+ end
82
+
83
+ # Specify or get this lexer's description.
84
+ def desc(arg=:absent)
85
+ if arg == :absent
86
+ @desc
87
+ else
88
+ @desc = arg
89
+ end
90
+ end
91
+
92
+ def option_docs
93
+ @option_docs ||= InheritableHash.new(superclass.option_docs)
94
+ end
95
+
96
+ def option(name, desc)
97
+ option_docs[name.to_s] = desc
98
+ end
99
+
100
+ # Specify or get the path name containing a small demo for
101
+ # this lexer (can be overriden by {demo}).
102
+ def demo_file(arg=:absent)
103
+ return @demo_file = Pathname.new(arg) unless arg == :absent
104
+
105
+ @demo_file = Pathname.new(__FILE__).dirname.join('demos', tag)
106
+ end
107
+
108
+ # Specify or get a small demo string for this lexer
109
+ def demo(arg=:absent)
110
+ return @demo = arg unless arg == :absent
111
+
112
+ @demo = File.read(demo_file, encoding: 'utf-8')
113
+ end
114
+
115
+ # @return a list of all lexers.
116
+ def all
117
+ registry.values.uniq
118
+ end
119
+
120
+ # Guess which lexer to use based on a hash of info.
121
+ #
122
+ # This accepts the same arguments as Lexer.guess, but will never throw
123
+ # an error. It will return a (possibly empty) list of potential lexers
124
+ # to use.
125
+ def guesses(info={})
126
+ mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
127
+ custom_globs = info[:custom_globs]
128
+
129
+ guessers = (info[:guessers] || []).dup
130
+
131
+ guessers << Guessers::Mimetype.new(mimetype) if mimetype
132
+ guessers << Guessers::GlobMapping.by_pairs(custom_globs, filename) if custom_globs && filename
133
+ guessers << Guessers::Filename.new(filename) if filename
134
+ guessers << Guessers::Modeline.new(source) if source
135
+ guessers << Guessers::Source.new(source) if source
136
+
137
+ Guesser.guess(guessers, Lexer.all)
138
+ end
139
+
140
+ # Guess which lexer to use based on a hash of info.
141
+ #
142
+ # @option info :mimetype
143
+ # A mimetype to guess by
144
+ # @option info :filename
145
+ # A filename to guess by
146
+ # @option info :source
147
+ # The source itself, which, if guessing by mimetype or filename
148
+ # fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
149
+ # other hints.
150
+ #
151
+ # @see Lexer.analyze_text
152
+ # @see Lexer.guesses
153
+ def guess(info={})
154
+ lexers = guesses(info)
155
+
156
+ return Lexers::PlainText if lexers.empty?
157
+ return lexers[0] if lexers.size == 1
158
+
159
+ raise Guesser::Ambiguous.new(lexers)
160
+ end
161
+
162
+ def guess_by_mimetype(mt)
163
+ guess :mimetype => mt
164
+ end
165
+
166
+ def guess_by_filename(fname)
167
+ guess :filename => fname
168
+ end
169
+
170
+ def guess_by_source(source)
171
+ guess :source => source
172
+ end
173
+
174
+ def enable_debug!
175
+ @debug_enabled = true
176
+ end
177
+
178
+ def disable_debug!
179
+ @debug_enabled = false
180
+ end
181
+
182
+ def debug_enabled?
183
+ !!@debug_enabled
184
+ end
185
+
186
+ protected
187
+ # @private
188
+ def register(name, lexer)
189
+ registry[name.to_s] = lexer
190
+ end
191
+
192
+ public
193
+ # Used to specify or get the canonical name of this lexer class.
194
+ #
195
+ # @example
196
+ # class MyLexer < Lexer
197
+ # tag 'foo'
198
+ # end
199
+ #
200
+ # MyLexer.tag # => 'foo'
201
+ #
202
+ # Lexer.find('foo') # => MyLexer
203
+ def tag(t=nil)
204
+ return @tag if t.nil?
205
+
206
+ @tag = t.to_s
207
+ Lexer.register(@tag, self)
208
+ end
209
+
210
+ # Used to specify alternate names this lexer class may be found by.
211
+ #
212
+ # @example
213
+ # class Erb < Lexer
214
+ # tag 'erb'
215
+ # aliases 'eruby', 'rhtml'
216
+ # end
217
+ #
218
+ # Lexer.find('eruby') # => Erb
219
+ def aliases(*args)
220
+ args.map!(&:to_s)
221
+ args.each { |arg| Lexer.register(arg, self) }
222
+ (@aliases ||= []).concat(args)
223
+ end
224
+
225
+ # Specify a list of filename globs associated with this lexer.
226
+ #
227
+ # @example
228
+ # class Ruby < Lexer
229
+ # filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
230
+ # end
231
+ def filenames(*fnames)
232
+ (@filenames ||= []).concat(fnames)
233
+ end
234
+
235
+ # Specify a list of mimetypes associated with this lexer.
236
+ #
237
+ # @example
238
+ # class Html < Lexer
239
+ # mimetypes 'text/html', 'application/xhtml+xml'
240
+ # end
241
+ def mimetypes(*mts)
242
+ (@mimetypes ||= []).concat(mts)
243
+ end
244
+
245
+ # @private
246
+ def assert_utf8!(str)
247
+ return if %w(US-ASCII UTF-8 ASCII-8BIT).include? str.encoding.name
248
+ raise EncodingError.new(
249
+ "Bad encoding: #{str.encoding.names.join(',')}. " +
250
+ "Please convert your string to UTF-8."
251
+ )
252
+ end
253
+
254
+ private
255
+ def registry
256
+ @registry ||= {}
257
+ end
258
+ end
259
+
260
+ # -*- instance methods -*- #
261
+
262
+ attr_reader :options
263
+ # Create a new lexer with the given options. Individual lexers may
264
+ # specify extra options. The only current globally accepted option
265
+ # is `:debug`.
266
+ #
267
+ # @option opts :debug
268
+ # Prints debug information to stdout. The particular info depends
269
+ # on the lexer in question. In regex lexers, this will log the
270
+ # state stack at the beginning of each step, along with each regex
271
+ # tried and each stream consumed. Try it, it's pretty useful.
272
+ def initialize(opts={})
273
+ @options = {}
274
+ opts.each { |k, v| @options[k.to_s] = v }
275
+
276
+ @debug = Lexer.debug_enabled? && bool_option(:debug)
277
+ end
278
+
279
+ def as_bool(val)
280
+ case val
281
+ when nil, false, 0, '0', 'off'
282
+ false
283
+ when Array
284
+ val.empty? ? true : as_bool(val.last)
285
+ else
286
+ true
287
+ end
288
+ end
289
+
290
+ def as_string(val)
291
+ return as_string(val.last) if val.is_a?(Array)
292
+
293
+ val ? val.to_s : nil
294
+ end
295
+
296
+ def as_list(val)
297
+ case val
298
+ when Array
299
+ val.flat_map { |v| as_list(v) }
300
+ when String
301
+ val.split(',')
302
+ else
303
+ []
304
+ end
305
+ end
306
+
307
+ def as_lexer(val)
308
+ return as_lexer(val.last) if val.is_a?(Array)
309
+ return val.new(@options) if val.is_a?(Class) && val < Lexer
310
+
311
+ case val
312
+ when Lexer
313
+ val
314
+ when String
315
+ lexer_class = Lexer.find(val)
316
+ lexer_class && lexer_class.new(@options)
317
+ end
318
+ end
319
+
320
+ def as_token(val)
321
+ return as_token(val.last) if val.is_a?(Array)
322
+ case val
323
+ when Token
324
+ val
325
+ else
326
+ Token[val]
327
+ end
328
+ end
329
+
330
+ def bool_option(name, &default)
331
+ if @options.key?(name.to_s)
332
+ as_bool(@options[name.to_s])
333
+ else
334
+ default ? default.call : false
335
+ end
336
+ end
337
+
338
+ def string_option(name, &default)
339
+ as_string(@options.delete(name.to_s, &default))
340
+ end
341
+
342
+ def lexer_option(name, &default)
343
+ as_lexer(@options.delete(name.to_s, &default))
344
+ end
345
+
346
+ def list_option(name, &default)
347
+ as_list(@options.delete(name.to_s, &default))
348
+ end
349
+
350
+ def token_option(name, &default)
351
+ as_token(@options.delete(name.to_s, &default))
352
+ end
353
+
354
+ def hash_option(name, defaults, &val_cast)
355
+ name = name.to_s
356
+ out = defaults.dup
357
+
358
+ base = @options.delete(name.to_s)
359
+ base = {} unless base.is_a?(Hash)
360
+ base.each { |k, v| out[k.to_s] = val_cast ? val_cast.call(v) : v }
361
+
362
+ @options.keys.each do |key|
363
+ next unless key =~ /(\w+)\[(\w+)\]/ and $1 == name
364
+ value = @options.delete(key)
365
+
366
+ out[$2] = val_cast ? val_cast.call(value) : value
367
+ end
368
+
369
+ out
370
+ end
371
+
372
+ # @abstract
373
+ #
374
+ # Called after each lex is finished. The default implementation
375
+ # is a noop.
376
+ def reset!
377
+ end
378
+
379
+ # Given a string, yield [token, chunk] pairs. If no block is given,
380
+ # an enumerator is returned.
381
+ #
382
+ # @option opts :continue
383
+ # Continue the lex from the previous state (i.e. don't call #reset!)
384
+ def lex(string, opts={}, &b)
385
+ return enum_for(:lex, string, opts) unless block_given?
386
+
387
+ Lexer.assert_utf8!(string)
388
+
389
+ reset! unless opts[:continue]
390
+
391
+ # consolidate consecutive tokens of the same type
392
+ last_token = nil
393
+ last_val = nil
394
+ stream_tokens(string) do |tok, val|
395
+ next if val.empty?
396
+
397
+ if tok == last_token
398
+ last_val << val
399
+ next
400
+ end
401
+
402
+ b.call(last_token, last_val) if last_token
403
+ last_token = tok
404
+ last_val = val
405
+ end
406
+
407
+ b.call(last_token, last_val) if last_token
408
+ end
409
+
410
+ # delegated to {Lexer.tag}
411
+ def tag
412
+ self.class.tag
413
+ end
414
+
415
+ # @abstract
416
+ #
417
+ # Yield `[token, chunk]` pairs, given a prepared input stream. This
418
+ # must be implemented.
419
+ #
420
+ # @param [StringScanner] stream
421
+ # the stream
422
+ def stream_tokens(stream, &b)
423
+ raise 'abstract'
424
+ end
425
+
426
+ # @abstract
427
+ #
428
+ # Return a number between 0 and 1 indicating the likelihood that
429
+ # the text given should be lexed with this lexer. The default
430
+ # implementation returns 0. Values under 0.5 will only be used
431
+ # to disambiguate filename or mimetype matches.
432
+ #
433
+ # @param [TextAnalyzer] text
434
+ # the text to be analyzed, with a couple of handy methods on it,
435
+ # like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
436
+ def self.analyze_text(text)
437
+ 0
438
+ end
439
+ end
440
+
441
+ module Lexers
442
+ @_loaded_lexers = {}
443
+
444
+ def self.load_lexer(relpath)
445
+ return if @_loaded_lexers.key?(relpath)
446
+ @_loaded_lexers[relpath] = true
447
+
448
+ root = Pathname.new(__FILE__).dirname.join('lexers')
449
+ load root.join(relpath)
450
+ end
451
+ end
452
+ end