rouge_ecl 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +26 -0
- data/LICENSE +186 -0
- data/bin/rougify +17 -0
- data/lib/rouge.rb +82 -0
- data/lib/rouge/cli.rb +429 -0
- data/lib/rouge/demos/abap +6 -0
- data/lib/rouge/demos/actionscript +4 -0
- data/lib/rouge/demos/apache +21 -0
- data/lib/rouge/demos/apiblueprint +33 -0
- data/lib/rouge/demos/applescript +2 -0
- data/lib/rouge/demos/awk +4 -0
- data/lib/rouge/demos/biml +38 -0
- data/lib/rouge/demos/bsl +7 -0
- data/lib/rouge/demos/c +8 -0
- data/lib/rouge/demos/ceylon +7 -0
- data/lib/rouge/demos/cfscript +18 -0
- data/lib/rouge/demos/clojure +5 -0
- data/lib/rouge/demos/cmake +7 -0
- data/lib/rouge/demos/coffeescript +5 -0
- data/lib/rouge/demos/common_lisp +1 -0
- data/lib/rouge/demos/conf +4 -0
- data/lib/rouge/demos/console +6 -0
- data/lib/rouge/demos/coq +13 -0
- data/lib/rouge/demos/cpp +8 -0
- data/lib/rouge/demos/csharp +5 -0
- data/lib/rouge/demos/css +4 -0
- data/lib/rouge/demos/d +16 -0
- data/lib/rouge/demos/dart +6 -0
- data/lib/rouge/demos/diff +7 -0
- data/lib/rouge/demos/digdag +19 -0
- data/lib/rouge/demos/docker +9 -0
- data/lib/rouge/demos/dot +5 -0
- data/lib/rouge/demos/ecl +1 -0
- data/lib/rouge/demos/eiffel +30 -0
- data/lib/rouge/demos/elixir +1 -0
- data/lib/rouge/demos/elm +4 -0
- data/lib/rouge/demos/erb +1 -0
- data/lib/rouge/demos/erlang +7 -0
- data/lib/rouge/demos/factor +5 -0
- data/lib/rouge/demos/fortran +22 -0
- data/lib/rouge/demos/fsharp +12 -0
- data/lib/rouge/demos/gherkin +17 -0
- data/lib/rouge/demos/glsl +14 -0
- data/lib/rouge/demos/go +7 -0
- data/lib/rouge/demos/gradle +10 -0
- data/lib/rouge/demos/graphql +17 -0
- data/lib/rouge/demos/groovy +9 -0
- data/lib/rouge/demos/hack +5 -0
- data/lib/rouge/demos/haml +5 -0
- data/lib/rouge/demos/handlebars +7 -0
- data/lib/rouge/demos/haskell +6 -0
- data/lib/rouge/demos/html +8 -0
- data/lib/rouge/demos/http +14 -0
- data/lib/rouge/demos/hylang +10 -0
- data/lib/rouge/demos/idlang +8 -0
- data/lib/rouge/demos/igorpro +9 -0
- data/lib/rouge/demos/ini +4 -0
- data/lib/rouge/demos/io +11 -0
- data/lib/rouge/demos/irb +4 -0
- data/lib/rouge/demos/irb_output +2 -0
- data/lib/rouge/demos/java +5 -0
- data/lib/rouge/demos/javascript +1 -0
- data/lib/rouge/demos/jinja +9 -0
- data/lib/rouge/demos/json +1 -0
- data/lib/rouge/demos/json-doc +1 -0
- data/lib/rouge/demos/jsonnet +28 -0
- data/lib/rouge/demos/jsx +17 -0
- data/lib/rouge/demos/julia +11 -0
- data/lib/rouge/demos/kotlin +3 -0
- data/lib/rouge/demos/lasso +12 -0
- data/lib/rouge/demos/liquid +11 -0
- data/lib/rouge/demos/literate_coffeescript +3 -0
- data/lib/rouge/demos/literate_haskell +7 -0
- data/lib/rouge/demos/llvm +20 -0
- data/lib/rouge/demos/lua +12 -0
- data/lib/rouge/demos/make +6 -0
- data/lib/rouge/demos/markdown +4 -0
- data/lib/rouge/demos/matlab +6 -0
- data/lib/rouge/demos/moonscript +16 -0
- data/lib/rouge/demos/mosel +10 -0
- data/lib/rouge/demos/mxml +22 -0
- data/lib/rouge/demos/nasm +26 -0
- data/lib/rouge/demos/nginx +5 -0
- data/lib/rouge/demos/nim +27 -0
- data/lib/rouge/demos/nix +19 -0
- data/lib/rouge/demos/objective_c +18 -0
- data/lib/rouge/demos/ocaml +12 -0
- data/lib/rouge/demos/pascal +14 -0
- data/lib/rouge/demos/perl +5 -0
- data/lib/rouge/demos/php +3 -0
- data/lib/rouge/demos/plaintext +1 -0
- data/lib/rouge/demos/plist +142 -0
- data/lib/rouge/demos/pony +17 -0
- data/lib/rouge/demos/powershell +49 -0
- data/lib/rouge/demos/praat +26 -0
- data/lib/rouge/demos/prolog +9 -0
- data/lib/rouge/demos/prometheus +9 -0
- data/lib/rouge/demos/properties +7 -0
- data/lib/rouge/demos/protobuf +5 -0
- data/lib/rouge/demos/puppet +6 -0
- data/lib/rouge/demos/python +6 -0
- data/lib/rouge/demos/q +2 -0
- data/lib/rouge/demos/qml +9 -0
- data/lib/rouge/demos/r +8 -0
- data/lib/rouge/demos/racket +24 -0
- data/lib/rouge/demos/ruby +9 -0
- data/lib/rouge/demos/rust +12 -0
- data/lib/rouge/demos/sass +3 -0
- data/lib/rouge/demos/scala +3 -0
- data/lib/rouge/demos/scheme +4 -0
- data/lib/rouge/demos/scss +5 -0
- data/lib/rouge/demos/sed +4 -0
- data/lib/rouge/demos/shell +2 -0
- data/lib/rouge/demos/sieve +10 -0
- data/lib/rouge/demos/slim +17 -0
- data/lib/rouge/demos/smalltalk +6 -0
- data/lib/rouge/demos/smarty +12 -0
- data/lib/rouge/demos/sml +4 -0
- data/lib/rouge/demos/sql +1 -0
- data/lib/rouge/demos/swift +5 -0
- data/lib/rouge/demos/tap +5 -0
- data/lib/rouge/demos/tcl +1 -0
- data/lib/rouge/demos/tex +1 -0
- data/lib/rouge/demos/toml +9 -0
- data/lib/rouge/demos/tsx +17 -0
- data/lib/rouge/demos/tulip +13 -0
- data/lib/rouge/demos/turtle +26 -0
- data/lib/rouge/demos/twig +9 -0
- data/lib/rouge/demos/typescript +1 -0
- data/lib/rouge/demos/vala +8 -0
- data/lib/rouge/demos/vb +4 -0
- data/lib/rouge/demos/verilog +27 -0
- data/lib/rouge/demos/vhdl +23 -0
- data/lib/rouge/demos/viml +14 -0
- data/lib/rouge/demos/vue +11 -0
- data/lib/rouge/demos/wollok +11 -0
- data/lib/rouge/demos/xml +2 -0
- data/lib/rouge/demos/yaml +4 -0
- data/lib/rouge/formatter.rb +75 -0
- data/lib/rouge/formatters/html.rb +37 -0
- data/lib/rouge/formatters/html_inline.rb +30 -0
- data/lib/rouge/formatters/html_legacy.rb +44 -0
- data/lib/rouge/formatters/html_linewise.rb +27 -0
- data/lib/rouge/formatters/html_pygments.rb +16 -0
- data/lib/rouge/formatters/html_table.rb +61 -0
- data/lib/rouge/formatters/null.rb +19 -0
- data/lib/rouge/formatters/terminal256.rb +180 -0
- data/lib/rouge/guesser.rb +55 -0
- data/lib/rouge/guessers/disambiguation.rb +101 -0
- data/lib/rouge/guessers/filename.rb +25 -0
- data/lib/rouge/guessers/glob_mapping.rb +43 -0
- data/lib/rouge/guessers/mimetype.rb +14 -0
- data/lib/rouge/guessers/modeline.rb +44 -0
- data/lib/rouge/guessers/source.rb +29 -0
- data/lib/rouge/guessers/util.rb +32 -0
- data/lib/rouge/lexer.rb +461 -0
- data/lib/rouge/lexers/abap.rb +238 -0
- data/lib/rouge/lexers/actionscript.rb +195 -0
- data/lib/rouge/lexers/apache.rb +71 -0
- data/lib/rouge/lexers/apache/keywords.yml +764 -0
- data/lib/rouge/lexers/apiblueprint.rb +47 -0
- data/lib/rouge/lexers/apple_script.rb +367 -0
- data/lib/rouge/lexers/awk.rb +161 -0
- data/lib/rouge/lexers/biml.rb +41 -0
- data/lib/rouge/lexers/bsl.rb +81 -0
- data/lib/rouge/lexers/c.rb +212 -0
- data/lib/rouge/lexers/ceylon.rb +123 -0
- data/lib/rouge/lexers/cfscript.rb +153 -0
- data/lib/rouge/lexers/clojure.rb +112 -0
- data/lib/rouge/lexers/cmake.rb +206 -0
- data/lib/rouge/lexers/coffeescript.rb +174 -0
- data/lib/rouge/lexers/common_lisp.rb +345 -0
- data/lib/rouge/lexers/conf.rb +24 -0
- data/lib/rouge/lexers/console.rb +136 -0
- data/lib/rouge/lexers/coq.rb +187 -0
- data/lib/rouge/lexers/cpp.rb +78 -0
- data/lib/rouge/lexers/csharp.rb +114 -0
- data/lib/rouge/lexers/css.rb +273 -0
- data/lib/rouge/lexers/d.rb +176 -0
- data/lib/rouge/lexers/dart.rb +104 -0
- data/lib/rouge/lexers/diff.rb +31 -0
- data/lib/rouge/lexers/digdag.rb +68 -0
- data/lib/rouge/lexers/docker.rb +50 -0
- data/lib/rouge/lexers/dot.rb +68 -0
- data/lib/rouge/lexers/ecl.rb +138 -0
- data/lib/rouge/lexers/eiffel.rb +65 -0
- data/lib/rouge/lexers/elixir.rb +133 -0
- data/lib/rouge/lexers/elm.rb +89 -0
- data/lib/rouge/lexers/erb.rb +52 -0
- data/lib/rouge/lexers/erlang.rb +114 -0
- data/lib/rouge/lexers/factor.rb +302 -0
- data/lib/rouge/lexers/fortran.rb +176 -0
- data/lib/rouge/lexers/fsharp.rb +118 -0
- data/lib/rouge/lexers/gherkin.rb +137 -0
- data/lib/rouge/lexers/gherkin/keywords.rb +14 -0
- data/lib/rouge/lexers/glsl.rb +135 -0
- data/lib/rouge/lexers/go.rb +174 -0
- data/lib/rouge/lexers/gradle.rb +37 -0
- data/lib/rouge/lexers/graphql.rb +243 -0
- data/lib/rouge/lexers/groovy.rb +112 -0
- data/lib/rouge/lexers/hack.rb +48 -0
- data/lib/rouge/lexers/haml.rb +229 -0
- data/lib/rouge/lexers/handlebars.rb +79 -0
- data/lib/rouge/lexers/haskell.rb +182 -0
- data/lib/rouge/lexers/html.rb +139 -0
- data/lib/rouge/lexers/http.rb +80 -0
- data/lib/rouge/lexers/hylang.rb +93 -0
- data/lib/rouge/lexers/idlang.rb +310 -0
- data/lib/rouge/lexers/igorpro.rb +408 -0
- data/lib/rouge/lexers/ini.rb +53 -0
- data/lib/rouge/lexers/io.rb +68 -0
- data/lib/rouge/lexers/irb.rb +66 -0
- data/lib/rouge/lexers/java.rb +87 -0
- data/lib/rouge/lexers/javascript.rb +281 -0
- data/lib/rouge/lexers/jinja.rb +137 -0
- data/lib/rouge/lexers/json.rb +29 -0
- data/lib/rouge/lexers/json_doc.rb +23 -0
- data/lib/rouge/lexers/jsonnet.rb +151 -0
- data/lib/rouge/lexers/jsx.rb +102 -0
- data/lib/rouge/lexers/julia.rb +172 -0
- data/lib/rouge/lexers/kotlin.rb +79 -0
- data/lib/rouge/lexers/lasso.rb +214 -0
- data/lib/rouge/lexers/lasso/keywords.yml +446 -0
- data/lib/rouge/lexers/liquid.rb +287 -0
- data/lib/rouge/lexers/literate_coffeescript.rb +33 -0
- data/lib/rouge/lexers/literate_haskell.rb +36 -0
- data/lib/rouge/lexers/llvm.rb +80 -0
- data/lib/rouge/lexers/lua.rb +125 -0
- data/lib/rouge/lexers/lua/builtins.rb +22 -0
- data/lib/rouge/lexers/make.rb +112 -0
- data/lib/rouge/lexers/markdown.rb +154 -0
- data/lib/rouge/lexers/matlab.rb +71 -0
- data/lib/rouge/lexers/matlab/builtins.rb +11 -0
- data/lib/rouge/lexers/moonscript.rb +114 -0
- data/lib/rouge/lexers/mosel.rb +231 -0
- data/lib/rouge/lexers/mxml.rb +68 -0
- data/lib/rouge/lexers/nasm.rb +198 -0
- data/lib/rouge/lexers/nginx.rb +71 -0
- data/lib/rouge/lexers/nim.rb +152 -0
- data/lib/rouge/lexers/nix.rb +205 -0
- data/lib/rouge/lexers/objective_c.rb +194 -0
- data/lib/rouge/lexers/ocaml.rb +100 -0
- data/lib/rouge/lexers/pascal.rb +66 -0
- data/lib/rouge/lexers/perl.rb +196 -0
- data/lib/rouge/lexers/php.rb +193 -0
- data/lib/rouge/lexers/php/builtins.rb +194 -0
- data/lib/rouge/lexers/plain_text.rb +26 -0
- data/lib/rouge/lexers/plist.rb +45 -0
- data/lib/rouge/lexers/pony.rb +93 -0
- data/lib/rouge/lexers/powershell.rb +678 -0
- data/lib/rouge/lexers/praat.rb +350 -0
- data/lib/rouge/lexers/prolog.rb +59 -0
- data/lib/rouge/lexers/prometheus.rb +121 -0
- data/lib/rouge/lexers/properties.rb +51 -0
- data/lib/rouge/lexers/protobuf.rb +70 -0
- data/lib/rouge/lexers/puppet.rb +128 -0
- data/lib/rouge/lexers/python.rb +232 -0
- data/lib/rouge/lexers/q.rb +123 -0
- data/lib/rouge/lexers/qml.rb +73 -0
- data/lib/rouge/lexers/r.rb +89 -0
- data/lib/rouge/lexers/racket.rb +543 -0
- data/lib/rouge/lexers/ruby.rb +437 -0
- data/lib/rouge/lexers/rust.rb +192 -0
- data/lib/rouge/lexers/sass.rb +74 -0
- data/lib/rouge/lexers/sass/common.rb +180 -0
- data/lib/rouge/lexers/scala.rb +142 -0
- data/lib/rouge/lexers/scheme.rb +112 -0
- data/lib/rouge/lexers/scss.rb +34 -0
- data/lib/rouge/lexers/sed.rb +172 -0
- data/lib/rouge/lexers/shell.rb +189 -0
- data/lib/rouge/lexers/sieve.rb +96 -0
- data/lib/rouge/lexers/slim.rb +228 -0
- data/lib/rouge/lexers/smalltalk.rb +116 -0
- data/lib/rouge/lexers/smarty.rb +80 -0
- data/lib/rouge/lexers/sml.rb +344 -0
- data/lib/rouge/lexers/sql.rb +140 -0
- data/lib/rouge/lexers/swift.rb +181 -0
- data/lib/rouge/lexers/tap.rb +87 -0
- data/lib/rouge/lexers/tcl.rb +192 -0
- data/lib/rouge/lexers/tex.rb +69 -0
- data/lib/rouge/lexers/toml.rb +67 -0
- data/lib/rouge/lexers/tsx.rb +19 -0
- data/lib/rouge/lexers/tulip.rb +106 -0
- data/lib/rouge/lexers/turtle.rb +63 -0
- data/lib/rouge/lexers/twig.rb +39 -0
- data/lib/rouge/lexers/typescript.rb +22 -0
- data/lib/rouge/lexers/typescript/common.rb +33 -0
- data/lib/rouge/lexers/vala.rb +77 -0
- data/lib/rouge/lexers/vb.rb +164 -0
- data/lib/rouge/lexers/verilog.rb +164 -0
- data/lib/rouge/lexers/vhdl.rb +97 -0
- data/lib/rouge/lexers/viml.rb +101 -0
- data/lib/rouge/lexers/viml/keywords.rb +12 -0
- data/lib/rouge/lexers/vue.rb +122 -0
- data/lib/rouge/lexers/wollok.rb +103 -0
- data/lib/rouge/lexers/xml.rb +57 -0
- data/lib/rouge/lexers/yaml.rb +373 -0
- data/lib/rouge/plugins/redcarpet.rb +30 -0
- data/lib/rouge/regex_lexer.rb +441 -0
- data/lib/rouge/template_lexer.rb +20 -0
- data/lib/rouge/text_analyzer.rb +49 -0
- data/lib/rouge/theme.rb +213 -0
- data/lib/rouge/themes/base16.rb +130 -0
- data/lib/rouge/themes/colorful.rb +67 -0
- data/lib/rouge/themes/github.rb +71 -0
- data/lib/rouge/themes/gruvbox.rb +167 -0
- data/lib/rouge/themes/igor_pro.rb +20 -0
- data/lib/rouge/themes/molokai.rb +82 -0
- data/lib/rouge/themes/monokai.rb +92 -0
- data/lib/rouge/themes/monokai_sublime.rb +90 -0
- data/lib/rouge/themes/pastie.rb +69 -0
- data/lib/rouge/themes/thankful_eyes.rb +74 -0
- data/lib/rouge/themes/tulip.rb +69 -0
- data/lib/rouge/token.rb +182 -0
- data/lib/rouge/util.rb +101 -0
- data/lib/rouge/version.rb +7 -0
- data/rouge.gemspec +23 -0
- metadata +365 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
module Rouge
|
|
2
|
+
module Guessers
|
|
3
|
+
class Filename < Guesser
|
|
4
|
+
attr_reader :fname
|
|
5
|
+
def initialize(filename)
|
|
6
|
+
@filename = filename
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# returns a list of lexers that match the given filename with
|
|
10
|
+
# equal specificity (i.e. number of wildcards in the pattern).
|
|
11
|
+
# This helps disambiguate between, e.g. the Nginx lexer, which
|
|
12
|
+
# matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
|
|
13
|
+
# In this case, nginx will win because the pattern has no wildcards,
|
|
14
|
+
# while `*.conf` has one.
|
|
15
|
+
def filter(lexers)
|
|
16
|
+
mapping = {}
|
|
17
|
+
lexers.each do |lexer|
|
|
18
|
+
mapping[lexer.name] = lexer.filenames || []
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
GlobMapping.new(mapping, @filename).filter(lexers)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
module Rouge
|
|
2
|
+
module Guessers
|
|
3
|
+
# This class allows for custom behavior
|
|
4
|
+
# with glob -> lexer name mappings
|
|
5
|
+
class GlobMapping < Guesser
|
|
6
|
+
include Util
|
|
7
|
+
|
|
8
|
+
def self.by_pairs(mapping, filename)
|
|
9
|
+
glob_map = {}
|
|
10
|
+
mapping.each do |(glob, lexer_name)|
|
|
11
|
+
lexer = Lexer.find(lexer_name)
|
|
12
|
+
|
|
13
|
+
# ignore unknown lexers
|
|
14
|
+
next unless lexer
|
|
15
|
+
|
|
16
|
+
glob_map[lexer.name] ||= []
|
|
17
|
+
glob_map[lexer.name] << glob
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
new(glob_map, filename)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
attr_reader :glob_map, :filename
|
|
24
|
+
def initialize(glob_map, filename)
|
|
25
|
+
@glob_map = glob_map
|
|
26
|
+
@filename = filename
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def filter(lexers)
|
|
30
|
+
basename = File.basename(filename)
|
|
31
|
+
|
|
32
|
+
collect_best(lexers) do |lexer|
|
|
33
|
+
score = (@glob_map[lexer.name] || []).map do |pattern|
|
|
34
|
+
if test_glob(pattern, basename)
|
|
35
|
+
# specificity is better the fewer wildcards there are
|
|
36
|
+
-pattern.scan(/[*?\[]/).size
|
|
37
|
+
end
|
|
38
|
+
end.compact.min
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
module Rouge
|
|
2
|
+
module Guessers
|
|
3
|
+
class Modeline < Guesser
|
|
4
|
+
include Util
|
|
5
|
+
|
|
6
|
+
# [jneen] regexen stolen from linguist
|
|
7
|
+
EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
|
|
8
|
+
|
|
9
|
+
# First form vim modeline
|
|
10
|
+
# [text]{white}{vi:|vim:|ex:}[white]{options}
|
|
11
|
+
# ex: 'vim: syntax=ruby'
|
|
12
|
+
VIM_MODELINE_1 = /(?:vim|vi|ex):\s*(?:ft|filetype|syntax)=(\w+)\s?/i
|
|
13
|
+
|
|
14
|
+
# Second form vim modeline (compatible with some versions of Vi)
|
|
15
|
+
# [text]{white}{vi:|vim:|Vim:|ex:}[white]se[t] {options}:[text]
|
|
16
|
+
# ex: 'vim set syntax=ruby:'
|
|
17
|
+
VIM_MODELINE_2 = /(?:vim|vi|Vim|ex):\s*se(?:t)?.*\s(?:ft|filetype|syntax)=(\w+)\s?.*:/i
|
|
18
|
+
|
|
19
|
+
MODELINES = [EMACS_MODELINE, VIM_MODELINE_1, VIM_MODELINE_2]
|
|
20
|
+
|
|
21
|
+
def initialize(source, opts={})
|
|
22
|
+
@source = source
|
|
23
|
+
@lines = opts[:lines] || 5
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def filter(lexers)
|
|
27
|
+
# don't bother reading the stream if we've already decided
|
|
28
|
+
return lexers if lexers.size == 1
|
|
29
|
+
|
|
30
|
+
source_text = get_source(@source)
|
|
31
|
+
|
|
32
|
+
lines = source_text.split(/\n/)
|
|
33
|
+
|
|
34
|
+
search_space = (lines.first(@lines) + lines.last(@lines)).join("\n")
|
|
35
|
+
|
|
36
|
+
matches = MODELINES.map { |re| re.match(search_space) }.compact
|
|
37
|
+
return lexers unless matches.any?
|
|
38
|
+
|
|
39
|
+
match_set = Set.new(matches.map { |m| m[1] })
|
|
40
|
+
lexers.select { |l| match_set.include?(l.tag) || l.aliases.any? { |a| match_set.include?(a) } }
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
module Rouge
|
|
2
|
+
module Guessers
|
|
3
|
+
class Source < Guesser
|
|
4
|
+
include Util
|
|
5
|
+
|
|
6
|
+
attr_reader :source
|
|
7
|
+
def initialize(source)
|
|
8
|
+
@source = source
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def filter(lexers)
|
|
12
|
+
# don't bother reading the input if
|
|
13
|
+
# we've already filtered to 1
|
|
14
|
+
return lexers if lexers.size == 1
|
|
15
|
+
|
|
16
|
+
source_text = get_source(@source)
|
|
17
|
+
|
|
18
|
+
Lexer.assert_utf8!(source_text)
|
|
19
|
+
|
|
20
|
+
source_text = TextAnalyzer.new(source_text)
|
|
21
|
+
|
|
22
|
+
collect_best(lexers) do |lexer|
|
|
23
|
+
next unless lexer.methods(false).include? :detect?
|
|
24
|
+
lexer.detect?(source_text) ? 1 : nil
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module Rouge
|
|
2
|
+
module Guessers
|
|
3
|
+
module Util
|
|
4
|
+
module SourceNormalizer
|
|
5
|
+
UTF8_BOM = "\xEF\xBB\xBF"
|
|
6
|
+
UTF8_BOM_RE = /\A#{UTF8_BOM}/
|
|
7
|
+
|
|
8
|
+
# @param [String,nil] source
|
|
9
|
+
# @return [String,nil]
|
|
10
|
+
def self.normalize(source)
|
|
11
|
+
source.sub(UTF8_BOM_RE, '').gsub(/\r\n/, "\n")
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_glob(pattern, path)
|
|
16
|
+
File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# @param [String,IO] source
|
|
20
|
+
# @return [String]
|
|
21
|
+
def get_source(source)
|
|
22
|
+
if source.respond_to?(:to_str)
|
|
23
|
+
SourceNormalizer.normalize(source.to_str)
|
|
24
|
+
elsif source.respond_to?(:read)
|
|
25
|
+
SourceNormalizer.normalize(source.read)
|
|
26
|
+
else
|
|
27
|
+
raise ArgumentError, "Invalid source: #{source.inspect}"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
data/lib/rouge/lexer.rb
ADDED
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*- #
|
|
2
|
+
|
|
3
|
+
# stdlib
|
|
4
|
+
require 'strscan'
|
|
5
|
+
require 'cgi'
|
|
6
|
+
require 'set'
|
|
7
|
+
|
|
8
|
+
module Rouge
|
|
9
|
+
# @abstract
|
|
10
|
+
# A lexer transforms text into a stream of `[token, chunk]` pairs.
|
|
11
|
+
class Lexer
|
|
12
|
+
include Token::Tokens
|
|
13
|
+
|
|
14
|
+
@option_docs = {}
|
|
15
|
+
|
|
16
|
+
class << self
|
|
17
|
+
# Lexes `stream` with the given options. The lex is delegated to a
|
|
18
|
+
# new instance.
|
|
19
|
+
#
|
|
20
|
+
# @see #lex
|
|
21
|
+
def lex(stream, opts={}, &b)
|
|
22
|
+
new(opts).lex(stream, &b)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Given a name in string, return the correct lexer class.
|
|
26
|
+
# @param [String] name
|
|
27
|
+
# @return [Class<Rouge::Lexer>,nil]
|
|
28
|
+
def find(name)
|
|
29
|
+
registry[name.to_s]
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Find a lexer, with fancy shiny features.
|
|
33
|
+
#
|
|
34
|
+
# * The string you pass can include CGI-style options
|
|
35
|
+
#
|
|
36
|
+
# Lexer.find_fancy('erb?parent=tex')
|
|
37
|
+
#
|
|
38
|
+
# * You can pass the special name 'guess' so we guess for you,
|
|
39
|
+
# and you can pass a second argument of the code to guess by
|
|
40
|
+
#
|
|
41
|
+
# Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
|
|
42
|
+
#
|
|
43
|
+
# This is used in the Redcarpet plugin as well as Rouge's own
|
|
44
|
+
# markdown lexer for highlighting internal code blocks.
|
|
45
|
+
#
|
|
46
|
+
def find_fancy(str, code=nil, additional_options={})
|
|
47
|
+
|
|
48
|
+
if str && !str.include?('?') && str != 'guess'
|
|
49
|
+
lexer_class = find(str)
|
|
50
|
+
return lexer_class && lexer_class.new(additional_options)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
name, opts = str ? str.split('?', 2) : [nil, '']
|
|
54
|
+
|
|
55
|
+
# parse the options hash from a cgi-style string
|
|
56
|
+
opts = CGI.parse(opts || '').map do |k, vals|
|
|
57
|
+
val = case vals.size
|
|
58
|
+
when 0 then true
|
|
59
|
+
when 1 then vals[0]
|
|
60
|
+
else vals
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
[ k.to_s, val ]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
opts = additional_options.merge(Hash[opts])
|
|
67
|
+
|
|
68
|
+
lexer_class = case name
|
|
69
|
+
when 'guess', nil
|
|
70
|
+
self.guess(:source => code, :mimetype => opts['mimetype'])
|
|
71
|
+
when String
|
|
72
|
+
self.find(name)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
lexer_class && lexer_class.new(opts)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Specify or get this lexer's title. Meant to be human-readable.
|
|
79
|
+
def title(t=nil)
|
|
80
|
+
if t.nil?
|
|
81
|
+
t = tag.capitalize
|
|
82
|
+
end
|
|
83
|
+
@title ||= t
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Specify or get this lexer's description.
|
|
87
|
+
def desc(arg=:absent)
|
|
88
|
+
if arg == :absent
|
|
89
|
+
@desc
|
|
90
|
+
else
|
|
91
|
+
@desc = arg
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def option_docs
|
|
96
|
+
@option_docs ||= InheritableHash.new(superclass.option_docs)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def option(name, desc)
|
|
100
|
+
option_docs[name.to_s] = desc
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Specify or get the path name containing a small demo for
|
|
104
|
+
# this lexer (can be overriden by {demo}).
|
|
105
|
+
def demo_file(arg=:absent)
|
|
106
|
+
return @demo_file = Pathname.new(arg) unless arg == :absent
|
|
107
|
+
|
|
108
|
+
@demo_file = Pathname.new(__FILE__).dirname.join('demos', tag)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Specify or get a small demo string for this lexer
|
|
112
|
+
def demo(arg=:absent)
|
|
113
|
+
return @demo = arg unless arg == :absent
|
|
114
|
+
|
|
115
|
+
@demo = File.read(demo_file, mode: 'rt:bom|utf-8')
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# @return a list of all lexers.
|
|
119
|
+
def all
|
|
120
|
+
registry.values.uniq
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Guess which lexer to use based on a hash of info.
|
|
124
|
+
#
|
|
125
|
+
# This accepts the same arguments as Lexer.guess, but will never throw
|
|
126
|
+
# an error. It will return a (possibly empty) list of potential lexers
|
|
127
|
+
# to use.
|
|
128
|
+
def guesses(info={})
|
|
129
|
+
mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
|
|
130
|
+
custom_globs = info[:custom_globs]
|
|
131
|
+
|
|
132
|
+
guessers = (info[:guessers] || []).dup
|
|
133
|
+
|
|
134
|
+
guessers << Guessers::Mimetype.new(mimetype) if mimetype
|
|
135
|
+
guessers << Guessers::GlobMapping.by_pairs(custom_globs, filename) if custom_globs && filename
|
|
136
|
+
guessers << Guessers::Filename.new(filename) if filename
|
|
137
|
+
guessers << Guessers::Modeline.new(source) if source
|
|
138
|
+
guessers << Guessers::Source.new(source) if source
|
|
139
|
+
guessers << Guessers::Disambiguation.new(filename, source) if source && filename
|
|
140
|
+
|
|
141
|
+
Guesser.guess(guessers, Lexer.all)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Guess which lexer to use based on a hash of info.
|
|
145
|
+
#
|
|
146
|
+
# @option info :mimetype
|
|
147
|
+
# A mimetype to guess by
|
|
148
|
+
# @option info :filename
|
|
149
|
+
# A filename to guess by
|
|
150
|
+
# @option info :source
|
|
151
|
+
# The source itself, which, if guessing by mimetype or filename
|
|
152
|
+
# fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
|
|
153
|
+
# other hints.
|
|
154
|
+
# @param [Proc] fallback called if multiple lexers are detected.
|
|
155
|
+
# If omitted, Guesser::Ambiguous is raised.
|
|
156
|
+
#
|
|
157
|
+
# @see Lexer.detect?
|
|
158
|
+
# @see Lexer.guesses
|
|
159
|
+
# @return [Class<Rouge::Lexer>]
|
|
160
|
+
def guess(info={}, &fallback)
|
|
161
|
+
lexers = guesses(info)
|
|
162
|
+
|
|
163
|
+
return Lexers::PlainText if lexers.empty?
|
|
164
|
+
return lexers[0] if lexers.size == 1
|
|
165
|
+
|
|
166
|
+
if fallback
|
|
167
|
+
fallback.call(lexers)
|
|
168
|
+
else
|
|
169
|
+
raise Guesser::Ambiguous.new(lexers)
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def guess_by_mimetype(mt)
|
|
174
|
+
guess :mimetype => mt
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def guess_by_filename(fname)
|
|
178
|
+
guess :filename => fname
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def guess_by_source(source)
|
|
182
|
+
guess :source => source
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def enable_debug!
|
|
186
|
+
@debug_enabled = true
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def disable_debug!
|
|
190
|
+
@debug_enabled = false
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def debug_enabled?
|
|
194
|
+
!!@debug_enabled
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
protected
|
|
198
|
+
# @private
|
|
199
|
+
def register(name, lexer)
|
|
200
|
+
registry[name.to_s] = lexer
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
public
|
|
204
|
+
# Used to specify or get the canonical name of this lexer class.
|
|
205
|
+
#
|
|
206
|
+
# @example
|
|
207
|
+
# class MyLexer < Lexer
|
|
208
|
+
# tag 'foo'
|
|
209
|
+
# end
|
|
210
|
+
#
|
|
211
|
+
# MyLexer.tag # => 'foo'
|
|
212
|
+
#
|
|
213
|
+
# Lexer.find('foo') # => MyLexer
|
|
214
|
+
def tag(t=nil)
|
|
215
|
+
return @tag if t.nil?
|
|
216
|
+
|
|
217
|
+
@tag = t.to_s
|
|
218
|
+
Lexer.register(@tag, self)
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Used to specify alternate names this lexer class may be found by.
|
|
222
|
+
#
|
|
223
|
+
# @example
|
|
224
|
+
# class Erb < Lexer
|
|
225
|
+
# tag 'erb'
|
|
226
|
+
# aliases 'eruby', 'rhtml'
|
|
227
|
+
# end
|
|
228
|
+
#
|
|
229
|
+
# Lexer.find('eruby') # => Erb
|
|
230
|
+
def aliases(*args)
|
|
231
|
+
args.map!(&:to_s)
|
|
232
|
+
args.each { |arg| Lexer.register(arg, self) }
|
|
233
|
+
(@aliases ||= []).concat(args)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Specify a list of filename globs associated with this lexer.
|
|
237
|
+
#
|
|
238
|
+
# @example
|
|
239
|
+
# class Ruby < Lexer
|
|
240
|
+
# filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
|
|
241
|
+
# end
|
|
242
|
+
def filenames(*fnames)
|
|
243
|
+
(@filenames ||= []).concat(fnames)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Specify a list of mimetypes associated with this lexer.
|
|
247
|
+
#
|
|
248
|
+
# @example
|
|
249
|
+
# class Html < Lexer
|
|
250
|
+
# mimetypes 'text/html', 'application/xhtml+xml'
|
|
251
|
+
# end
|
|
252
|
+
def mimetypes(*mts)
|
|
253
|
+
(@mimetypes ||= []).concat(mts)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# @private
|
|
257
|
+
def assert_utf8!(str)
|
|
258
|
+
return if %w(US-ASCII UTF-8 ASCII-8BIT).include? str.encoding.name
|
|
259
|
+
raise EncodingError.new(
|
|
260
|
+
"Bad encoding: #{str.encoding.names.join(',')}. " +
|
|
261
|
+
"Please convert your string to UTF-8."
|
|
262
|
+
)
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
private
|
|
266
|
+
def registry
|
|
267
|
+
@registry ||= {}
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# -*- instance methods -*- #
|
|
272
|
+
|
|
273
|
+
attr_reader :options
|
|
274
|
+
# Create a new lexer with the given options. Individual lexers may
|
|
275
|
+
# specify extra options. The only current globally accepted option
|
|
276
|
+
# is `:debug`.
|
|
277
|
+
#
|
|
278
|
+
# @option opts :debug
|
|
279
|
+
# Prints debug information to stdout. The particular info depends
|
|
280
|
+
# on the lexer in question. In regex lexers, this will log the
|
|
281
|
+
# state stack at the beginning of each step, along with each regex
|
|
282
|
+
# tried and each stream consumed. Try it, it's pretty useful.
|
|
283
|
+
def initialize(opts={})
|
|
284
|
+
@options = {}
|
|
285
|
+
opts.each { |k, v| @options[k.to_s] = v }
|
|
286
|
+
|
|
287
|
+
@debug = Lexer.debug_enabled? && bool_option(:debug)
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def as_bool(val)
|
|
291
|
+
case val
|
|
292
|
+
when nil, false, 0, '0', 'off'
|
|
293
|
+
false
|
|
294
|
+
when Array
|
|
295
|
+
val.empty? ? true : as_bool(val.last)
|
|
296
|
+
else
|
|
297
|
+
true
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def as_string(val)
|
|
302
|
+
return as_string(val.last) if val.is_a?(Array)
|
|
303
|
+
|
|
304
|
+
val ? val.to_s : nil
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def as_list(val)
|
|
308
|
+
case val
|
|
309
|
+
when Array
|
|
310
|
+
val.flat_map { |v| as_list(v) }
|
|
311
|
+
when String
|
|
312
|
+
val.split(',')
|
|
313
|
+
else
|
|
314
|
+
[]
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def as_lexer(val)
|
|
319
|
+
return as_lexer(val.last) if val.is_a?(Array)
|
|
320
|
+
return val.new(@options) if val.is_a?(Class) && val < Lexer
|
|
321
|
+
|
|
322
|
+
case val
|
|
323
|
+
when Lexer
|
|
324
|
+
val
|
|
325
|
+
when String
|
|
326
|
+
lexer_class = Lexer.find(val)
|
|
327
|
+
lexer_class && lexer_class.new(@options)
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def as_token(val)
|
|
332
|
+
return as_token(val.last) if val.is_a?(Array)
|
|
333
|
+
case val
|
|
334
|
+
when Token
|
|
335
|
+
val
|
|
336
|
+
else
|
|
337
|
+
Token[val]
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def bool_option(name, &default)
|
|
342
|
+
if @options.key?(name.to_s)
|
|
343
|
+
as_bool(@options[name.to_s])
|
|
344
|
+
else
|
|
345
|
+
default ? default.call : false
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def string_option(name, &default)
|
|
350
|
+
as_string(@options.delete(name.to_s, &default))
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def lexer_option(name, &default)
|
|
354
|
+
as_lexer(@options.delete(name.to_s, &default))
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def list_option(name, &default)
|
|
358
|
+
as_list(@options.delete(name.to_s, &default))
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def token_option(name, &default)
|
|
362
|
+
as_token(@options.delete(name.to_s, &default))
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
def hash_option(name, defaults, &val_cast)
|
|
366
|
+
name = name.to_s
|
|
367
|
+
out = defaults.dup
|
|
368
|
+
|
|
369
|
+
base = @options.delete(name.to_s)
|
|
370
|
+
base = {} unless base.is_a?(Hash)
|
|
371
|
+
base.each { |k, v| out[k.to_s] = val_cast ? val_cast.call(v) : v }
|
|
372
|
+
|
|
373
|
+
@options.keys.each do |key|
|
|
374
|
+
next unless key =~ /(\w+)\[(\w+)\]/ and $1 == name
|
|
375
|
+
value = @options.delete(key)
|
|
376
|
+
|
|
377
|
+
out[$2] = val_cast ? val_cast.call(value) : value
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
out
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# @abstract
|
|
384
|
+
#
|
|
385
|
+
# Called after each lex is finished. The default implementation
|
|
386
|
+
# is a noop.
|
|
387
|
+
def reset!
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
# Given a string, yield [token, chunk] pairs. If no block is given,
|
|
391
|
+
# an enumerator is returned.
|
|
392
|
+
#
|
|
393
|
+
# @option opts :continue
|
|
394
|
+
# Continue the lex from the previous state (i.e. don't call #reset!)
|
|
395
|
+
def lex(string, opts={}, &b)
|
|
396
|
+
return enum_for(:lex, string, opts) unless block_given?
|
|
397
|
+
|
|
398
|
+
Lexer.assert_utf8!(string)
|
|
399
|
+
|
|
400
|
+
reset! unless opts[:continue]
|
|
401
|
+
|
|
402
|
+
# consolidate consecutive tokens of the same type
|
|
403
|
+
last_token = nil
|
|
404
|
+
last_val = nil
|
|
405
|
+
stream_tokens(string) do |tok, val|
|
|
406
|
+
next if val.empty?
|
|
407
|
+
|
|
408
|
+
if tok == last_token
|
|
409
|
+
last_val << val
|
|
410
|
+
next
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
b.call(last_token, last_val) if last_token
|
|
414
|
+
last_token = tok
|
|
415
|
+
last_val = val
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
b.call(last_token, last_val) if last_token
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
# delegated to {Lexer.tag}
|
|
422
|
+
def tag
|
|
423
|
+
self.class.tag
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
# @abstract
|
|
427
|
+
#
|
|
428
|
+
# Yield `[token, chunk]` pairs, given a prepared input stream. This
|
|
429
|
+
# must be implemented.
|
|
430
|
+
#
|
|
431
|
+
# @param [StringScanner] stream
|
|
432
|
+
# the stream
|
|
433
|
+
def stream_tokens(stream, &b)
|
|
434
|
+
raise 'abstract'
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
# @abstract
|
|
438
|
+
#
|
|
439
|
+
# Return true if there is an in-text indication (such as a shebang
|
|
440
|
+
# or DOCTYPE declaration) that this lexer should be used.
|
|
441
|
+
#
|
|
442
|
+
# @param [TextAnalyzer] text
|
|
443
|
+
# the text to be analyzed, with a couple of handy methods on it,
|
|
444
|
+
# like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
|
|
445
|
+
def self.detect?(text)
|
|
446
|
+
false
|
|
447
|
+
end
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
module Lexers
|
|
451
|
+
@_loaded_lexers = {}
|
|
452
|
+
|
|
453
|
+
def self.load_lexer(relpath)
|
|
454
|
+
return if @_loaded_lexers.key?(relpath)
|
|
455
|
+
@_loaded_lexers[relpath] = true
|
|
456
|
+
|
|
457
|
+
root = Pathname.new(__FILE__).dirname.join('lexers')
|
|
458
|
+
load root.join(relpath)
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
end
|