rouge_ecl 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +26 -0
  3. data/LICENSE +186 -0
  4. data/bin/rougify +17 -0
  5. data/lib/rouge.rb +82 -0
  6. data/lib/rouge/cli.rb +429 -0
  7. data/lib/rouge/demos/abap +6 -0
  8. data/lib/rouge/demos/actionscript +4 -0
  9. data/lib/rouge/demos/apache +21 -0
  10. data/lib/rouge/demos/apiblueprint +33 -0
  11. data/lib/rouge/demos/applescript +2 -0
  12. data/lib/rouge/demos/awk +4 -0
  13. data/lib/rouge/demos/biml +38 -0
  14. data/lib/rouge/demos/bsl +7 -0
  15. data/lib/rouge/demos/c +8 -0
  16. data/lib/rouge/demos/ceylon +7 -0
  17. data/lib/rouge/demos/cfscript +18 -0
  18. data/lib/rouge/demos/clojure +5 -0
  19. data/lib/rouge/demos/cmake +7 -0
  20. data/lib/rouge/demos/coffeescript +5 -0
  21. data/lib/rouge/demos/common_lisp +1 -0
  22. data/lib/rouge/demos/conf +4 -0
  23. data/lib/rouge/demos/console +6 -0
  24. data/lib/rouge/demos/coq +13 -0
  25. data/lib/rouge/demos/cpp +8 -0
  26. data/lib/rouge/demos/csharp +5 -0
  27. data/lib/rouge/demos/css +4 -0
  28. data/lib/rouge/demos/d +16 -0
  29. data/lib/rouge/demos/dart +6 -0
  30. data/lib/rouge/demos/diff +7 -0
  31. data/lib/rouge/demos/digdag +19 -0
  32. data/lib/rouge/demos/docker +9 -0
  33. data/lib/rouge/demos/dot +5 -0
  34. data/lib/rouge/demos/ecl +1 -0
  35. data/lib/rouge/demos/eiffel +30 -0
  36. data/lib/rouge/demos/elixir +1 -0
  37. data/lib/rouge/demos/elm +4 -0
  38. data/lib/rouge/demos/erb +1 -0
  39. data/lib/rouge/demos/erlang +7 -0
  40. data/lib/rouge/demos/factor +5 -0
  41. data/lib/rouge/demos/fortran +22 -0
  42. data/lib/rouge/demos/fsharp +12 -0
  43. data/lib/rouge/demos/gherkin +17 -0
  44. data/lib/rouge/demos/glsl +14 -0
  45. data/lib/rouge/demos/go +7 -0
  46. data/lib/rouge/demos/gradle +10 -0
  47. data/lib/rouge/demos/graphql +17 -0
  48. data/lib/rouge/demos/groovy +9 -0
  49. data/lib/rouge/demos/hack +5 -0
  50. data/lib/rouge/demos/haml +5 -0
  51. data/lib/rouge/demos/handlebars +7 -0
  52. data/lib/rouge/demos/haskell +6 -0
  53. data/lib/rouge/demos/html +8 -0
  54. data/lib/rouge/demos/http +14 -0
  55. data/lib/rouge/demos/hylang +10 -0
  56. data/lib/rouge/demos/idlang +8 -0
  57. data/lib/rouge/demos/igorpro +9 -0
  58. data/lib/rouge/demos/ini +4 -0
  59. data/lib/rouge/demos/io +11 -0
  60. data/lib/rouge/demos/irb +4 -0
  61. data/lib/rouge/demos/irb_output +2 -0
  62. data/lib/rouge/demos/java +5 -0
  63. data/lib/rouge/demos/javascript +1 -0
  64. data/lib/rouge/demos/jinja +9 -0
  65. data/lib/rouge/demos/json +1 -0
  66. data/lib/rouge/demos/json-doc +1 -0
  67. data/lib/rouge/demos/jsonnet +28 -0
  68. data/lib/rouge/demos/jsx +17 -0
  69. data/lib/rouge/demos/julia +11 -0
  70. data/lib/rouge/demos/kotlin +3 -0
  71. data/lib/rouge/demos/lasso +12 -0
  72. data/lib/rouge/demos/liquid +11 -0
  73. data/lib/rouge/demos/literate_coffeescript +3 -0
  74. data/lib/rouge/demos/literate_haskell +7 -0
  75. data/lib/rouge/demos/llvm +20 -0
  76. data/lib/rouge/demos/lua +12 -0
  77. data/lib/rouge/demos/make +6 -0
  78. data/lib/rouge/demos/markdown +4 -0
  79. data/lib/rouge/demos/matlab +6 -0
  80. data/lib/rouge/demos/moonscript +16 -0
  81. data/lib/rouge/demos/mosel +10 -0
  82. data/lib/rouge/demos/mxml +22 -0
  83. data/lib/rouge/demos/nasm +26 -0
  84. data/lib/rouge/demos/nginx +5 -0
  85. data/lib/rouge/demos/nim +27 -0
  86. data/lib/rouge/demos/nix +19 -0
  87. data/lib/rouge/demos/objective_c +18 -0
  88. data/lib/rouge/demos/ocaml +12 -0
  89. data/lib/rouge/demos/pascal +14 -0
  90. data/lib/rouge/demos/perl +5 -0
  91. data/lib/rouge/demos/php +3 -0
  92. data/lib/rouge/demos/plaintext +1 -0
  93. data/lib/rouge/demos/plist +142 -0
  94. data/lib/rouge/demos/pony +17 -0
  95. data/lib/rouge/demos/powershell +49 -0
  96. data/lib/rouge/demos/praat +26 -0
  97. data/lib/rouge/demos/prolog +9 -0
  98. data/lib/rouge/demos/prometheus +9 -0
  99. data/lib/rouge/demos/properties +7 -0
  100. data/lib/rouge/demos/protobuf +5 -0
  101. data/lib/rouge/demos/puppet +6 -0
  102. data/lib/rouge/demos/python +6 -0
  103. data/lib/rouge/demos/q +2 -0
  104. data/lib/rouge/demos/qml +9 -0
  105. data/lib/rouge/demos/r +8 -0
  106. data/lib/rouge/demos/racket +24 -0
  107. data/lib/rouge/demos/ruby +9 -0
  108. data/lib/rouge/demos/rust +12 -0
  109. data/lib/rouge/demos/sass +3 -0
  110. data/lib/rouge/demos/scala +3 -0
  111. data/lib/rouge/demos/scheme +4 -0
  112. data/lib/rouge/demos/scss +5 -0
  113. data/lib/rouge/demos/sed +4 -0
  114. data/lib/rouge/demos/shell +2 -0
  115. data/lib/rouge/demos/sieve +10 -0
  116. data/lib/rouge/demos/slim +17 -0
  117. data/lib/rouge/demos/smalltalk +6 -0
  118. data/lib/rouge/demos/smarty +12 -0
  119. data/lib/rouge/demos/sml +4 -0
  120. data/lib/rouge/demos/sql +1 -0
  121. data/lib/rouge/demos/swift +5 -0
  122. data/lib/rouge/demos/tap +5 -0
  123. data/lib/rouge/demos/tcl +1 -0
  124. data/lib/rouge/demos/tex +1 -0
  125. data/lib/rouge/demos/toml +9 -0
  126. data/lib/rouge/demos/tsx +17 -0
  127. data/lib/rouge/demos/tulip +13 -0
  128. data/lib/rouge/demos/turtle +26 -0
  129. data/lib/rouge/demos/twig +9 -0
  130. data/lib/rouge/demos/typescript +1 -0
  131. data/lib/rouge/demos/vala +8 -0
  132. data/lib/rouge/demos/vb +4 -0
  133. data/lib/rouge/demos/verilog +27 -0
  134. data/lib/rouge/demos/vhdl +23 -0
  135. data/lib/rouge/demos/viml +14 -0
  136. data/lib/rouge/demos/vue +11 -0
  137. data/lib/rouge/demos/wollok +11 -0
  138. data/lib/rouge/demos/xml +2 -0
  139. data/lib/rouge/demos/yaml +4 -0
  140. data/lib/rouge/formatter.rb +75 -0
  141. data/lib/rouge/formatters/html.rb +37 -0
  142. data/lib/rouge/formatters/html_inline.rb +30 -0
  143. data/lib/rouge/formatters/html_legacy.rb +44 -0
  144. data/lib/rouge/formatters/html_linewise.rb +27 -0
  145. data/lib/rouge/formatters/html_pygments.rb +16 -0
  146. data/lib/rouge/formatters/html_table.rb +61 -0
  147. data/lib/rouge/formatters/null.rb +19 -0
  148. data/lib/rouge/formatters/terminal256.rb +180 -0
  149. data/lib/rouge/guesser.rb +55 -0
  150. data/lib/rouge/guessers/disambiguation.rb +101 -0
  151. data/lib/rouge/guessers/filename.rb +25 -0
  152. data/lib/rouge/guessers/glob_mapping.rb +43 -0
  153. data/lib/rouge/guessers/mimetype.rb +14 -0
  154. data/lib/rouge/guessers/modeline.rb +44 -0
  155. data/lib/rouge/guessers/source.rb +29 -0
  156. data/lib/rouge/guessers/util.rb +32 -0
  157. data/lib/rouge/lexer.rb +461 -0
  158. data/lib/rouge/lexers/abap.rb +238 -0
  159. data/lib/rouge/lexers/actionscript.rb +195 -0
  160. data/lib/rouge/lexers/apache.rb +71 -0
  161. data/lib/rouge/lexers/apache/keywords.yml +764 -0
  162. data/lib/rouge/lexers/apiblueprint.rb +47 -0
  163. data/lib/rouge/lexers/apple_script.rb +367 -0
  164. data/lib/rouge/lexers/awk.rb +161 -0
  165. data/lib/rouge/lexers/biml.rb +41 -0
  166. data/lib/rouge/lexers/bsl.rb +81 -0
  167. data/lib/rouge/lexers/c.rb +212 -0
  168. data/lib/rouge/lexers/ceylon.rb +123 -0
  169. data/lib/rouge/lexers/cfscript.rb +153 -0
  170. data/lib/rouge/lexers/clojure.rb +112 -0
  171. data/lib/rouge/lexers/cmake.rb +206 -0
  172. data/lib/rouge/lexers/coffeescript.rb +174 -0
  173. data/lib/rouge/lexers/common_lisp.rb +345 -0
  174. data/lib/rouge/lexers/conf.rb +24 -0
  175. data/lib/rouge/lexers/console.rb +136 -0
  176. data/lib/rouge/lexers/coq.rb +187 -0
  177. data/lib/rouge/lexers/cpp.rb +78 -0
  178. data/lib/rouge/lexers/csharp.rb +114 -0
  179. data/lib/rouge/lexers/css.rb +273 -0
  180. data/lib/rouge/lexers/d.rb +176 -0
  181. data/lib/rouge/lexers/dart.rb +104 -0
  182. data/lib/rouge/lexers/diff.rb +31 -0
  183. data/lib/rouge/lexers/digdag.rb +68 -0
  184. data/lib/rouge/lexers/docker.rb +50 -0
  185. data/lib/rouge/lexers/dot.rb +68 -0
  186. data/lib/rouge/lexers/ecl.rb +138 -0
  187. data/lib/rouge/lexers/eiffel.rb +65 -0
  188. data/lib/rouge/lexers/elixir.rb +133 -0
  189. data/lib/rouge/lexers/elm.rb +89 -0
  190. data/lib/rouge/lexers/erb.rb +52 -0
  191. data/lib/rouge/lexers/erlang.rb +114 -0
  192. data/lib/rouge/lexers/factor.rb +302 -0
  193. data/lib/rouge/lexers/fortran.rb +176 -0
  194. data/lib/rouge/lexers/fsharp.rb +118 -0
  195. data/lib/rouge/lexers/gherkin.rb +137 -0
  196. data/lib/rouge/lexers/gherkin/keywords.rb +14 -0
  197. data/lib/rouge/lexers/glsl.rb +135 -0
  198. data/lib/rouge/lexers/go.rb +174 -0
  199. data/lib/rouge/lexers/gradle.rb +37 -0
  200. data/lib/rouge/lexers/graphql.rb +243 -0
  201. data/lib/rouge/lexers/groovy.rb +112 -0
  202. data/lib/rouge/lexers/hack.rb +48 -0
  203. data/lib/rouge/lexers/haml.rb +229 -0
  204. data/lib/rouge/lexers/handlebars.rb +79 -0
  205. data/lib/rouge/lexers/haskell.rb +182 -0
  206. data/lib/rouge/lexers/html.rb +139 -0
  207. data/lib/rouge/lexers/http.rb +80 -0
  208. data/lib/rouge/lexers/hylang.rb +93 -0
  209. data/lib/rouge/lexers/idlang.rb +310 -0
  210. data/lib/rouge/lexers/igorpro.rb +408 -0
  211. data/lib/rouge/lexers/ini.rb +53 -0
  212. data/lib/rouge/lexers/io.rb +68 -0
  213. data/lib/rouge/lexers/irb.rb +66 -0
  214. data/lib/rouge/lexers/java.rb +87 -0
  215. data/lib/rouge/lexers/javascript.rb +281 -0
  216. data/lib/rouge/lexers/jinja.rb +137 -0
  217. data/lib/rouge/lexers/json.rb +29 -0
  218. data/lib/rouge/lexers/json_doc.rb +23 -0
  219. data/lib/rouge/lexers/jsonnet.rb +151 -0
  220. data/lib/rouge/lexers/jsx.rb +102 -0
  221. data/lib/rouge/lexers/julia.rb +172 -0
  222. data/lib/rouge/lexers/kotlin.rb +79 -0
  223. data/lib/rouge/lexers/lasso.rb +214 -0
  224. data/lib/rouge/lexers/lasso/keywords.yml +446 -0
  225. data/lib/rouge/lexers/liquid.rb +287 -0
  226. data/lib/rouge/lexers/literate_coffeescript.rb +33 -0
  227. data/lib/rouge/lexers/literate_haskell.rb +36 -0
  228. data/lib/rouge/lexers/llvm.rb +80 -0
  229. data/lib/rouge/lexers/lua.rb +125 -0
  230. data/lib/rouge/lexers/lua/builtins.rb +22 -0
  231. data/lib/rouge/lexers/make.rb +112 -0
  232. data/lib/rouge/lexers/markdown.rb +154 -0
  233. data/lib/rouge/lexers/matlab.rb +71 -0
  234. data/lib/rouge/lexers/matlab/builtins.rb +11 -0
  235. data/lib/rouge/lexers/moonscript.rb +114 -0
  236. data/lib/rouge/lexers/mosel.rb +231 -0
  237. data/lib/rouge/lexers/mxml.rb +68 -0
  238. data/lib/rouge/lexers/nasm.rb +198 -0
  239. data/lib/rouge/lexers/nginx.rb +71 -0
  240. data/lib/rouge/lexers/nim.rb +152 -0
  241. data/lib/rouge/lexers/nix.rb +205 -0
  242. data/lib/rouge/lexers/objective_c.rb +194 -0
  243. data/lib/rouge/lexers/ocaml.rb +100 -0
  244. data/lib/rouge/lexers/pascal.rb +66 -0
  245. data/lib/rouge/lexers/perl.rb +196 -0
  246. data/lib/rouge/lexers/php.rb +193 -0
  247. data/lib/rouge/lexers/php/builtins.rb +194 -0
  248. data/lib/rouge/lexers/plain_text.rb +26 -0
  249. data/lib/rouge/lexers/plist.rb +45 -0
  250. data/lib/rouge/lexers/pony.rb +93 -0
  251. data/lib/rouge/lexers/powershell.rb +678 -0
  252. data/lib/rouge/lexers/praat.rb +350 -0
  253. data/lib/rouge/lexers/prolog.rb +59 -0
  254. data/lib/rouge/lexers/prometheus.rb +121 -0
  255. data/lib/rouge/lexers/properties.rb +51 -0
  256. data/lib/rouge/lexers/protobuf.rb +70 -0
  257. data/lib/rouge/lexers/puppet.rb +128 -0
  258. data/lib/rouge/lexers/python.rb +232 -0
  259. data/lib/rouge/lexers/q.rb +123 -0
  260. data/lib/rouge/lexers/qml.rb +73 -0
  261. data/lib/rouge/lexers/r.rb +89 -0
  262. data/lib/rouge/lexers/racket.rb +543 -0
  263. data/lib/rouge/lexers/ruby.rb +437 -0
  264. data/lib/rouge/lexers/rust.rb +192 -0
  265. data/lib/rouge/lexers/sass.rb +74 -0
  266. data/lib/rouge/lexers/sass/common.rb +180 -0
  267. data/lib/rouge/lexers/scala.rb +142 -0
  268. data/lib/rouge/lexers/scheme.rb +112 -0
  269. data/lib/rouge/lexers/scss.rb +34 -0
  270. data/lib/rouge/lexers/sed.rb +172 -0
  271. data/lib/rouge/lexers/shell.rb +189 -0
  272. data/lib/rouge/lexers/sieve.rb +96 -0
  273. data/lib/rouge/lexers/slim.rb +228 -0
  274. data/lib/rouge/lexers/smalltalk.rb +116 -0
  275. data/lib/rouge/lexers/smarty.rb +80 -0
  276. data/lib/rouge/lexers/sml.rb +344 -0
  277. data/lib/rouge/lexers/sql.rb +140 -0
  278. data/lib/rouge/lexers/swift.rb +181 -0
  279. data/lib/rouge/lexers/tap.rb +87 -0
  280. data/lib/rouge/lexers/tcl.rb +192 -0
  281. data/lib/rouge/lexers/tex.rb +69 -0
  282. data/lib/rouge/lexers/toml.rb +67 -0
  283. data/lib/rouge/lexers/tsx.rb +19 -0
  284. data/lib/rouge/lexers/tulip.rb +106 -0
  285. data/lib/rouge/lexers/turtle.rb +63 -0
  286. data/lib/rouge/lexers/twig.rb +39 -0
  287. data/lib/rouge/lexers/typescript.rb +22 -0
  288. data/lib/rouge/lexers/typescript/common.rb +33 -0
  289. data/lib/rouge/lexers/vala.rb +77 -0
  290. data/lib/rouge/lexers/vb.rb +164 -0
  291. data/lib/rouge/lexers/verilog.rb +164 -0
  292. data/lib/rouge/lexers/vhdl.rb +97 -0
  293. data/lib/rouge/lexers/viml.rb +101 -0
  294. data/lib/rouge/lexers/viml/keywords.rb +12 -0
  295. data/lib/rouge/lexers/vue.rb +122 -0
  296. data/lib/rouge/lexers/wollok.rb +103 -0
  297. data/lib/rouge/lexers/xml.rb +57 -0
  298. data/lib/rouge/lexers/yaml.rb +373 -0
  299. data/lib/rouge/plugins/redcarpet.rb +30 -0
  300. data/lib/rouge/regex_lexer.rb +441 -0
  301. data/lib/rouge/template_lexer.rb +20 -0
  302. data/lib/rouge/text_analyzer.rb +49 -0
  303. data/lib/rouge/theme.rb +213 -0
  304. data/lib/rouge/themes/base16.rb +130 -0
  305. data/lib/rouge/themes/colorful.rb +67 -0
  306. data/lib/rouge/themes/github.rb +71 -0
  307. data/lib/rouge/themes/gruvbox.rb +167 -0
  308. data/lib/rouge/themes/igor_pro.rb +20 -0
  309. data/lib/rouge/themes/molokai.rb +82 -0
  310. data/lib/rouge/themes/monokai.rb +92 -0
  311. data/lib/rouge/themes/monokai_sublime.rb +90 -0
  312. data/lib/rouge/themes/pastie.rb +69 -0
  313. data/lib/rouge/themes/thankful_eyes.rb +74 -0
  314. data/lib/rouge/themes/tulip.rb +69 -0
  315. data/lib/rouge/token.rb +182 -0
  316. data/lib/rouge/util.rb +101 -0
  317. data/lib/rouge/version.rb +7 -0
  318. data/rouge.gemspec +23 -0
  319. metadata +365 -0
@@ -0,0 +1,25 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Filename < Guesser
4
+ attr_reader :fname
5
+ def initialize(filename)
6
+ @filename = filename
7
+ end
8
+
9
+ # returns a list of lexers that match the given filename with
10
+ # equal specificity (i.e. number of wildcards in the pattern).
11
+ # This helps disambiguate between, e.g. the Nginx lexer, which
12
+ # matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
13
+ # In this case, nginx will win because the pattern has no wildcards,
14
+ # while `*.conf` has one.
15
+ def filter(lexers)
16
+ mapping = {}
17
+ lexers.each do |lexer|
18
+ mapping[lexer.name] = lexer.filenames || []
19
+ end
20
+
21
+ GlobMapping.new(mapping, @filename).filter(lexers)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,43 @@
1
+ module Rouge
2
+ module Guessers
3
+ # This class allows for custom behavior
4
+ # with glob -> lexer name mappings
5
+ class GlobMapping < Guesser
6
+ include Util
7
+
8
+ def self.by_pairs(mapping, filename)
9
+ glob_map = {}
10
+ mapping.each do |(glob, lexer_name)|
11
+ lexer = Lexer.find(lexer_name)
12
+
13
+ # ignore unknown lexers
14
+ next unless lexer
15
+
16
+ glob_map[lexer.name] ||= []
17
+ glob_map[lexer.name] << glob
18
+ end
19
+
20
+ new(glob_map, filename)
21
+ end
22
+
23
+ attr_reader :glob_map, :filename
24
+ def initialize(glob_map, filename)
25
+ @glob_map = glob_map
26
+ @filename = filename
27
+ end
28
+
29
+ def filter(lexers)
30
+ basename = File.basename(filename)
31
+
32
+ collect_best(lexers) do |lexer|
33
+ score = (@glob_map[lexer.name] || []).map do |pattern|
34
+ if test_glob(pattern, basename)
35
+ # specificity is better the fewer wildcards there are
36
+ -pattern.scan(/[*?\[]/).size
37
+ end
38
+ end.compact.min
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,14 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Mimetype < Guesser
4
+ attr_reader :mimetype
5
+ def initialize(mimetype)
6
+ @mimetype = mimetype
7
+ end
8
+
9
+ def filter(lexers)
10
+ lexers.select { |lexer| lexer.mimetypes.include? @mimetype }
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,44 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Modeline < Guesser
4
+ include Util
5
+
6
+ # [jneen] regexen stolen from linguist
7
+ EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
8
+
9
+ # First form vim modeline
10
+ # [text]{white}{vi:|vim:|ex:}[white]{options}
11
+ # ex: 'vim: syntax=ruby'
12
+ VIM_MODELINE_1 = /(?:vim|vi|ex):\s*(?:ft|filetype|syntax)=(\w+)\s?/i
13
+
14
+ # Second form vim modeline (compatible with some versions of Vi)
15
+ # [text]{white}{vi:|vim:|Vim:|ex:}[white]se[t] {options}:[text]
16
+ # ex: 'vim set syntax=ruby:'
17
+ VIM_MODELINE_2 = /(?:vim|vi|Vim|ex):\s*se(?:t)?.*\s(?:ft|filetype|syntax)=(\w+)\s?.*:/i
18
+
19
+ MODELINES = [EMACS_MODELINE, VIM_MODELINE_1, VIM_MODELINE_2]
20
+
21
+ def initialize(source, opts={})
22
+ @source = source
23
+ @lines = opts[:lines] || 5
24
+ end
25
+
26
+ def filter(lexers)
27
+ # don't bother reading the stream if we've already decided
28
+ return lexers if lexers.size == 1
29
+
30
+ source_text = get_source(@source)
31
+
32
+ lines = source_text.split(/\n/)
33
+
34
+ search_space = (lines.first(@lines) + lines.last(@lines)).join("\n")
35
+
36
+ matches = MODELINES.map { |re| re.match(search_space) }.compact
37
+ return lexers unless matches.any?
38
+
39
+ match_set = Set.new(matches.map { |m| m[1] })
40
+ lexers.select { |l| match_set.include?(l.tag) || l.aliases.any? { |a| match_set.include?(a) } }
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,29 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Source < Guesser
4
+ include Util
5
+
6
+ attr_reader :source
7
+ def initialize(source)
8
+ @source = source
9
+ end
10
+
11
+ def filter(lexers)
12
+ # don't bother reading the input if
13
+ # we've already filtered to 1
14
+ return lexers if lexers.size == 1
15
+
16
+ source_text = get_source(@source)
17
+
18
+ Lexer.assert_utf8!(source_text)
19
+
20
+ source_text = TextAnalyzer.new(source_text)
21
+
22
+ collect_best(lexers) do |lexer|
23
+ next unless lexer.methods(false).include? :detect?
24
+ lexer.detect?(source_text) ? 1 : nil
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,32 @@
1
+ module Rouge
2
+ module Guessers
3
+ module Util
4
+ module SourceNormalizer
5
+ UTF8_BOM = "\xEF\xBB\xBF"
6
+ UTF8_BOM_RE = /\A#{UTF8_BOM}/
7
+
8
+ # @param [String,nil] source
9
+ # @return [String,nil]
10
+ def self.normalize(source)
11
+ source.sub(UTF8_BOM_RE, '').gsub(/\r\n/, "\n")
12
+ end
13
+ end
14
+
15
+ def test_glob(pattern, path)
16
+ File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
17
+ end
18
+
19
+ # @param [String,IO] source
20
+ # @return [String]
21
+ def get_source(source)
22
+ if source.respond_to?(:to_str)
23
+ SourceNormalizer.normalize(source.to_str)
24
+ elsif source.respond_to?(:read)
25
+ SourceNormalizer.normalize(source.read)
26
+ else
27
+ raise ArgumentError, "Invalid source: #{source.inspect}"
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,461 @@
1
+ # -*- coding: utf-8 -*- #
2
+
3
+ # stdlib
4
+ require 'strscan'
5
+ require 'cgi'
6
+ require 'set'
7
+
8
+ module Rouge
9
+ # @abstract
10
+ # A lexer transforms text into a stream of `[token, chunk]` pairs.
11
+ class Lexer
12
+ include Token::Tokens
13
+
14
+ @option_docs = {}
15
+
16
+ class << self
17
+ # Lexes `stream` with the given options. The lex is delegated to a
18
+ # new instance.
19
+ #
20
+ # @see #lex
21
+ def lex(stream, opts={}, &b)
22
+ new(opts).lex(stream, &b)
23
+ end
24
+
25
+ # Given a name in string, return the correct lexer class.
26
+ # @param [String] name
27
+ # @return [Class<Rouge::Lexer>,nil]
28
+ def find(name)
29
+ registry[name.to_s]
30
+ end
31
+
32
+ # Find a lexer, with fancy shiny features.
33
+ #
34
+ # * The string you pass can include CGI-style options
35
+ #
36
+ # Lexer.find_fancy('erb?parent=tex')
37
+ #
38
+ # * You can pass the special name 'guess' so we guess for you,
39
+ # and you can pass a second argument of the code to guess by
40
+ #
41
+ # Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
42
+ #
43
+ # This is used in the Redcarpet plugin as well as Rouge's own
44
+ # markdown lexer for highlighting internal code blocks.
45
+ #
46
+ def find_fancy(str, code=nil, additional_options={})
47
+
48
+ if str && !str.include?('?') && str != 'guess'
49
+ lexer_class = find(str)
50
+ return lexer_class && lexer_class.new(additional_options)
51
+ end
52
+
53
+ name, opts = str ? str.split('?', 2) : [nil, '']
54
+
55
+ # parse the options hash from a cgi-style string
56
+ opts = CGI.parse(opts || '').map do |k, vals|
57
+ val = case vals.size
58
+ when 0 then true
59
+ when 1 then vals[0]
60
+ else vals
61
+ end
62
+
63
+ [ k.to_s, val ]
64
+ end
65
+
66
+ opts = additional_options.merge(Hash[opts])
67
+
68
+ lexer_class = case name
69
+ when 'guess', nil
70
+ self.guess(:source => code, :mimetype => opts['mimetype'])
71
+ when String
72
+ self.find(name)
73
+ end
74
+
75
+ lexer_class && lexer_class.new(opts)
76
+ end
77
+
78
+ # Specify or get this lexer's title. Meant to be human-readable.
79
+ def title(t=nil)
80
+ if t.nil?
81
+ t = tag.capitalize
82
+ end
83
+ @title ||= t
84
+ end
85
+
86
+ # Specify or get this lexer's description.
87
+ def desc(arg=:absent)
88
+ if arg == :absent
89
+ @desc
90
+ else
91
+ @desc = arg
92
+ end
93
+ end
94
+
95
+ def option_docs
96
+ @option_docs ||= InheritableHash.new(superclass.option_docs)
97
+ end
98
+
99
+ def option(name, desc)
100
+ option_docs[name.to_s] = desc
101
+ end
102
+
103
+ # Specify or get the path name containing a small demo for
104
+ # this lexer (can be overriden by {demo}).
105
+ def demo_file(arg=:absent)
106
+ return @demo_file = Pathname.new(arg) unless arg == :absent
107
+
108
+ @demo_file = Pathname.new(__FILE__).dirname.join('demos', tag)
109
+ end
110
+
111
+ # Specify or get a small demo string for this lexer
112
+ def demo(arg=:absent)
113
+ return @demo = arg unless arg == :absent
114
+
115
+ @demo = File.read(demo_file, mode: 'rt:bom|utf-8')
116
+ end
117
+
118
+ # @return a list of all lexers.
119
+ def all
120
+ registry.values.uniq
121
+ end
122
+
123
+ # Guess which lexer to use based on a hash of info.
124
+ #
125
+ # This accepts the same arguments as Lexer.guess, but will never throw
126
+ # an error. It will return a (possibly empty) list of potential lexers
127
+ # to use.
128
+ def guesses(info={})
129
+ mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
130
+ custom_globs = info[:custom_globs]
131
+
132
+ guessers = (info[:guessers] || []).dup
133
+
134
+ guessers << Guessers::Mimetype.new(mimetype) if mimetype
135
+ guessers << Guessers::GlobMapping.by_pairs(custom_globs, filename) if custom_globs && filename
136
+ guessers << Guessers::Filename.new(filename) if filename
137
+ guessers << Guessers::Modeline.new(source) if source
138
+ guessers << Guessers::Source.new(source) if source
139
+ guessers << Guessers::Disambiguation.new(filename, source) if source && filename
140
+
141
+ Guesser.guess(guessers, Lexer.all)
142
+ end
143
+
144
+ # Guess which lexer to use based on a hash of info.
145
+ #
146
+ # @option info :mimetype
147
+ # A mimetype to guess by
148
+ # @option info :filename
149
+ # A filename to guess by
150
+ # @option info :source
151
+ # The source itself, which, if guessing by mimetype or filename
152
+ # fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
153
+ # other hints.
154
+ # @param [Proc] fallback called if multiple lexers are detected.
155
+ # If omitted, Guesser::Ambiguous is raised.
156
+ #
157
+ # @see Lexer.detect?
158
+ # @see Lexer.guesses
159
+ # @return [Class<Rouge::Lexer>]
160
+ def guess(info={}, &fallback)
161
+ lexers = guesses(info)
162
+
163
+ return Lexers::PlainText if lexers.empty?
164
+ return lexers[0] if lexers.size == 1
165
+
166
+ if fallback
167
+ fallback.call(lexers)
168
+ else
169
+ raise Guesser::Ambiguous.new(lexers)
170
+ end
171
+ end
172
+
173
+ def guess_by_mimetype(mt)
174
+ guess :mimetype => mt
175
+ end
176
+
177
+ def guess_by_filename(fname)
178
+ guess :filename => fname
179
+ end
180
+
181
+ def guess_by_source(source)
182
+ guess :source => source
183
+ end
184
+
185
+ def enable_debug!
186
+ @debug_enabled = true
187
+ end
188
+
189
+ def disable_debug!
190
+ @debug_enabled = false
191
+ end
192
+
193
+ def debug_enabled?
194
+ !!@debug_enabled
195
+ end
196
+
197
+ protected
198
+ # @private
199
+ def register(name, lexer)
200
+ registry[name.to_s] = lexer
201
+ end
202
+
203
+ public
204
+ # Used to specify or get the canonical name of this lexer class.
205
+ #
206
+ # @example
207
+ # class MyLexer < Lexer
208
+ # tag 'foo'
209
+ # end
210
+ #
211
+ # MyLexer.tag # => 'foo'
212
+ #
213
+ # Lexer.find('foo') # => MyLexer
214
+ def tag(t=nil)
215
+ return @tag if t.nil?
216
+
217
+ @tag = t.to_s
218
+ Lexer.register(@tag, self)
219
+ end
220
+
221
+ # Used to specify alternate names this lexer class may be found by.
222
+ #
223
+ # @example
224
+ # class Erb < Lexer
225
+ # tag 'erb'
226
+ # aliases 'eruby', 'rhtml'
227
+ # end
228
+ #
229
+ # Lexer.find('eruby') # => Erb
230
+ def aliases(*args)
231
+ args.map!(&:to_s)
232
+ args.each { |arg| Lexer.register(arg, self) }
233
+ (@aliases ||= []).concat(args)
234
+ end
235
+
236
+ # Specify a list of filename globs associated with this lexer.
237
+ #
238
+ # @example
239
+ # class Ruby < Lexer
240
+ # filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
241
+ # end
242
+ def filenames(*fnames)
243
+ (@filenames ||= []).concat(fnames)
244
+ end
245
+
246
+ # Specify a list of mimetypes associated with this lexer.
247
+ #
248
+ # @example
249
+ # class Html < Lexer
250
+ # mimetypes 'text/html', 'application/xhtml+xml'
251
+ # end
252
+ def mimetypes(*mts)
253
+ (@mimetypes ||= []).concat(mts)
254
+ end
255
+
256
+ # @private
257
+ def assert_utf8!(str)
258
+ return if %w(US-ASCII UTF-8 ASCII-8BIT).include? str.encoding.name
259
+ raise EncodingError.new(
260
+ "Bad encoding: #{str.encoding.names.join(',')}. " +
261
+ "Please convert your string to UTF-8."
262
+ )
263
+ end
264
+
265
+ private
266
+ def registry
267
+ @registry ||= {}
268
+ end
269
+ end
270
+
271
+ # -*- instance methods -*- #
272
+
273
+ attr_reader :options
274
+ # Create a new lexer with the given options. Individual lexers may
275
+ # specify extra options. The only current globally accepted option
276
+ # is `:debug`.
277
+ #
278
+ # @option opts :debug
279
+ # Prints debug information to stdout. The particular info depends
280
+ # on the lexer in question. In regex lexers, this will log the
281
+ # state stack at the beginning of each step, along with each regex
282
+ # tried and each stream consumed. Try it, it's pretty useful.
283
+ def initialize(opts={})
284
+ @options = {}
285
+ opts.each { |k, v| @options[k.to_s] = v }
286
+
287
+ @debug = Lexer.debug_enabled? && bool_option(:debug)
288
+ end
289
+
290
+ def as_bool(val)
291
+ case val
292
+ when nil, false, 0, '0', 'off'
293
+ false
294
+ when Array
295
+ val.empty? ? true : as_bool(val.last)
296
+ else
297
+ true
298
+ end
299
+ end
300
+
301
+ def as_string(val)
302
+ return as_string(val.last) if val.is_a?(Array)
303
+
304
+ val ? val.to_s : nil
305
+ end
306
+
307
+ def as_list(val)
308
+ case val
309
+ when Array
310
+ val.flat_map { |v| as_list(v) }
311
+ when String
312
+ val.split(',')
313
+ else
314
+ []
315
+ end
316
+ end
317
+
318
+ def as_lexer(val)
319
+ return as_lexer(val.last) if val.is_a?(Array)
320
+ return val.new(@options) if val.is_a?(Class) && val < Lexer
321
+
322
+ case val
323
+ when Lexer
324
+ val
325
+ when String
326
+ lexer_class = Lexer.find(val)
327
+ lexer_class && lexer_class.new(@options)
328
+ end
329
+ end
330
+
331
+ def as_token(val)
332
+ return as_token(val.last) if val.is_a?(Array)
333
+ case val
334
+ when Token
335
+ val
336
+ else
337
+ Token[val]
338
+ end
339
+ end
340
+
341
+ def bool_option(name, &default)
342
+ if @options.key?(name.to_s)
343
+ as_bool(@options[name.to_s])
344
+ else
345
+ default ? default.call : false
346
+ end
347
+ end
348
+
349
+ def string_option(name, &default)
350
+ as_string(@options.delete(name.to_s, &default))
351
+ end
352
+
353
+ def lexer_option(name, &default)
354
+ as_lexer(@options.delete(name.to_s, &default))
355
+ end
356
+
357
+ def list_option(name, &default)
358
+ as_list(@options.delete(name.to_s, &default))
359
+ end
360
+
361
+ def token_option(name, &default)
362
+ as_token(@options.delete(name.to_s, &default))
363
+ end
364
+
365
+ def hash_option(name, defaults, &val_cast)
366
+ name = name.to_s
367
+ out = defaults.dup
368
+
369
+ base = @options.delete(name.to_s)
370
+ base = {} unless base.is_a?(Hash)
371
+ base.each { |k, v| out[k.to_s] = val_cast ? val_cast.call(v) : v }
372
+
373
+ @options.keys.each do |key|
374
+ next unless key =~ /(\w+)\[(\w+)\]/ and $1 == name
375
+ value = @options.delete(key)
376
+
377
+ out[$2] = val_cast ? val_cast.call(value) : value
378
+ end
379
+
380
+ out
381
+ end
382
+
383
+ # @abstract
384
+ #
385
+ # Called after each lex is finished. The default implementation
386
+ # is a noop.
387
+ def reset!
388
+ end
389
+
390
+ # Given a string, yield [token, chunk] pairs. If no block is given,
391
+ # an enumerator is returned.
392
+ #
393
+ # @option opts :continue
394
+ # Continue the lex from the previous state (i.e. don't call #reset!)
395
+ def lex(string, opts={}, &b)
396
+ return enum_for(:lex, string, opts) unless block_given?
397
+
398
+ Lexer.assert_utf8!(string)
399
+
400
+ reset! unless opts[:continue]
401
+
402
+ # consolidate consecutive tokens of the same type
403
+ last_token = nil
404
+ last_val = nil
405
+ stream_tokens(string) do |tok, val|
406
+ next if val.empty?
407
+
408
+ if tok == last_token
409
+ last_val << val
410
+ next
411
+ end
412
+
413
+ b.call(last_token, last_val) if last_token
414
+ last_token = tok
415
+ last_val = val
416
+ end
417
+
418
+ b.call(last_token, last_val) if last_token
419
+ end
420
+
421
+ # delegated to {Lexer.tag}
422
+ def tag
423
+ self.class.tag
424
+ end
425
+
426
+ # @abstract
427
+ #
428
+ # Yield `[token, chunk]` pairs, given a prepared input stream. This
429
+ # must be implemented.
430
+ #
431
+ # @param [StringScanner] stream
432
+ # the stream
433
+ def stream_tokens(stream, &b)
434
+ raise 'abstract'
435
+ end
436
+
437
+ # @abstract
438
+ #
439
+ # Return true if there is an in-text indication (such as a shebang
440
+ # or DOCTYPE declaration) that this lexer should be used.
441
+ #
442
+ # @param [TextAnalyzer] text
443
+ # the text to be analyzed, with a couple of handy methods on it,
444
+ # like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
445
+ def self.detect?(text)
446
+ false
447
+ end
448
+ end
449
+
450
+ module Lexers
451
+ @_loaded_lexers = {}
452
+
453
+ def self.load_lexer(relpath)
454
+ return if @_loaded_lexers.key?(relpath)
455
+ @_loaded_lexers[relpath] = true
456
+
457
+ root = Pathname.new(__FILE__).dirname.join('lexers')
458
+ load root.join(relpath)
459
+ end
460
+ end
461
+ end