gitlab-pygments.rb 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/Gemfile +2 -0
- data/README.md +91 -0
- data/Rakefile +78 -0
- data/bench.rb +22 -0
- data/cache-lexers.rb +8 -0
- data/lexers +0 -0
- data/lib/pygments/lexer.rb +148 -0
- data/lib/pygments/mentos.py +344 -0
- data/lib/pygments/popen.rb +389 -0
- data/lib/pygments/version.rb +3 -0
- data/lib/pygments.rb +8 -0
- data/pygments.rb.gemspec +24 -0
- data/test/test_data.c +2581 -0
- data/test/test_data.py +514 -0
- data/test/test_data_generated +2582 -0
- data/test/test_pygments.rb +276 -0
- data/vendor/custom_formatters/gitlab.py +171 -0
- data/vendor/custom_lexers/github.py +362 -0
- data/vendor/pygments-main/AUTHORS +115 -0
- data/vendor/pygments-main/CHANGES +762 -0
- data/vendor/pygments-main/LICENSE +25 -0
- data/vendor/pygments-main/MANIFEST.in +6 -0
- data/vendor/pygments-main/Makefile +59 -0
- data/vendor/pygments-main/REVISION +1 -0
- data/vendor/pygments-main/TODO +15 -0
- data/vendor/pygments-main/docs/generate.py +472 -0
- data/vendor/pygments-main/docs/pygmentize.1 +94 -0
- data/vendor/pygments-main/docs/src/api.txt +270 -0
- data/vendor/pygments-main/docs/src/authors.txt +5 -0
- data/vendor/pygments-main/docs/src/changelog.txt +5 -0
- data/vendor/pygments-main/docs/src/cmdline.txt +147 -0
- data/vendor/pygments-main/docs/src/filterdevelopment.txt +70 -0
- data/vendor/pygments-main/docs/src/filters.txt +42 -0
- data/vendor/pygments-main/docs/src/formatterdevelopment.txt +169 -0
- data/vendor/pygments-main/docs/src/formatters.txt +48 -0
- data/vendor/pygments-main/docs/src/index.txt +69 -0
- data/vendor/pygments-main/docs/src/installation.txt +71 -0
- data/vendor/pygments-main/docs/src/integrate.txt +43 -0
- data/vendor/pygments-main/docs/src/lexerdevelopment.txt +551 -0
- data/vendor/pygments-main/docs/src/lexers.txt +67 -0
- data/vendor/pygments-main/docs/src/moinmoin.txt +39 -0
- data/vendor/pygments-main/docs/src/plugins.txt +93 -0
- data/vendor/pygments-main/docs/src/quickstart.txt +202 -0
- data/vendor/pygments-main/docs/src/rstdirective.txt +22 -0
- data/vendor/pygments-main/docs/src/styles.txt +143 -0
- data/vendor/pygments-main/docs/src/tokens.txt +349 -0
- data/vendor/pygments-main/docs/src/unicode.txt +49 -0
- data/vendor/pygments-main/external/markdown-processor.py +67 -0
- data/vendor/pygments-main/external/moin-parser.py +112 -0
- data/vendor/pygments-main/external/pygments.bashcomp +38 -0
- data/vendor/pygments-main/external/rst-directive-old.py +77 -0
- data/vendor/pygments-main/external/rst-directive.py +83 -0
- data/vendor/pygments-main/ez_setup.py +276 -0
- data/vendor/pygments-main/pygmentize +7 -0
- data/vendor/pygments-main/pygments/__init__.py +91 -0
- data/vendor/pygments-main/pygments/cmdline.py +433 -0
- data/vendor/pygments-main/pygments/console.py +74 -0
- data/vendor/pygments-main/pygments/filter.py +74 -0
- data/vendor/pygments-main/pygments/filters/__init__.py +357 -0
- data/vendor/pygments-main/pygments/formatter.py +92 -0
- data/vendor/pygments-main/pygments/formatters/__init__.py +68 -0
- data/vendor/pygments-main/pygments/formatters/_mapping.py +94 -0
- data/vendor/pygments-main/pygments/formatters/bbcode.py +109 -0
- data/vendor/pygments-main/pygments/formatters/gitlab.py +171 -0
- data/vendor/pygments-main/pygments/formatters/html.py +750 -0
- data/vendor/pygments-main/pygments/formatters/img.py +553 -0
- data/vendor/pygments-main/pygments/formatters/latex.py +378 -0
- data/vendor/pygments-main/pygments/formatters/other.py +117 -0
- data/vendor/pygments-main/pygments/formatters/rtf.py +136 -0
- data/vendor/pygments-main/pygments/formatters/svg.py +154 -0
- data/vendor/pygments-main/pygments/formatters/terminal.py +112 -0
- data/vendor/pygments-main/pygments/formatters/terminal256.py +222 -0
- data/vendor/pygments-main/pygments/lexer.py +697 -0
- data/vendor/pygments-main/pygments/lexers/__init__.py +229 -0
- data/vendor/pygments-main/pygments/lexers/_asybuiltins.py +1645 -0
- data/vendor/pygments-main/pygments/lexers/_clbuiltins.py +232 -0
- data/vendor/pygments-main/pygments/lexers/_luabuiltins.py +249 -0
- data/vendor/pygments-main/pygments/lexers/_mapping.py +298 -0
- data/vendor/pygments-main/pygments/lexers/_phpbuiltins.py +3787 -0
- data/vendor/pygments-main/pygments/lexers/_postgres_builtins.py +232 -0
- data/vendor/pygments-main/pygments/lexers/_scilab_builtins.py +29 -0
- data/vendor/pygments-main/pygments/lexers/_vimbuiltins.py +3 -0
- data/vendor/pygments-main/pygments/lexers/agile.py +1803 -0
- data/vendor/pygments-main/pygments/lexers/asm.py +360 -0
- data/vendor/pygments-main/pygments/lexers/compiled.py +2891 -0
- data/vendor/pygments-main/pygments/lexers/dotnet.py +636 -0
- data/vendor/pygments-main/pygments/lexers/functional.py +1832 -0
- data/vendor/pygments-main/pygments/lexers/github.py +362 -0
- data/vendor/pygments-main/pygments/lexers/hdl.py +356 -0
- data/vendor/pygments-main/pygments/lexers/jvm.py +847 -0
- data/vendor/pygments-main/pygments/lexers/math.py +1072 -0
- data/vendor/pygments-main/pygments/lexers/other.py +3339 -0
- data/vendor/pygments-main/pygments/lexers/parsers.py +695 -0
- data/vendor/pygments-main/pygments/lexers/shell.py +361 -0
- data/vendor/pygments-main/pygments/lexers/special.py +100 -0
- data/vendor/pygments-main/pygments/lexers/sql.py +559 -0
- data/vendor/pygments-main/pygments/lexers/templates.py +1631 -0
- data/vendor/pygments-main/pygments/lexers/text.py +1753 -0
- data/vendor/pygments-main/pygments/lexers/web.py +2864 -0
- data/vendor/pygments-main/pygments/plugin.py +74 -0
- data/vendor/pygments-main/pygments/scanner.py +104 -0
- data/vendor/pygments-main/pygments/style.py +117 -0
- data/vendor/pygments-main/pygments/styles/__init__.py +70 -0
- data/vendor/pygments-main/pygments/styles/autumn.py +65 -0
- data/vendor/pygments-main/pygments/styles/borland.py +51 -0
- data/vendor/pygments-main/pygments/styles/bw.py +49 -0
- data/vendor/pygments-main/pygments/styles/colorful.py +81 -0
- data/vendor/pygments-main/pygments/styles/default.py +73 -0
- data/vendor/pygments-main/pygments/styles/emacs.py +72 -0
- data/vendor/pygments-main/pygments/styles/friendly.py +72 -0
- data/vendor/pygments-main/pygments/styles/fruity.py +42 -0
- data/vendor/pygments-main/pygments/styles/manni.py +75 -0
- data/vendor/pygments-main/pygments/styles/monokai.py +106 -0
- data/vendor/pygments-main/pygments/styles/murphy.py +80 -0
- data/vendor/pygments-main/pygments/styles/native.py +65 -0
- data/vendor/pygments-main/pygments/styles/pastie.py +75 -0
- data/vendor/pygments-main/pygments/styles/perldoc.py +69 -0
- data/vendor/pygments-main/pygments/styles/rrt.py +33 -0
- data/vendor/pygments-main/pygments/styles/tango.py +141 -0
- data/vendor/pygments-main/pygments/styles/trac.py +63 -0
- data/vendor/pygments-main/pygments/styles/vim.py +63 -0
- data/vendor/pygments-main/pygments/styles/vs.py +38 -0
- data/vendor/pygments-main/pygments/token.py +195 -0
- data/vendor/pygments-main/pygments/unistring.py +130 -0
- data/vendor/pygments-main/pygments/util.py +232 -0
- data/vendor/pygments-main/scripts/check_sources.py +242 -0
- data/vendor/pygments-main/scripts/detect_missing_analyse_text.py +30 -0
- data/vendor/pygments-main/scripts/epydoc.css +280 -0
- data/vendor/pygments-main/scripts/find_codetags.py +205 -0
- data/vendor/pygments-main/scripts/find_error.py +171 -0
- data/vendor/pygments-main/scripts/get_vimkw.py +43 -0
- data/vendor/pygments-main/scripts/pylintrc +301 -0
- data/vendor/pygments-main/scripts/reindent.py +291 -0
- data/vendor/pygments-main/scripts/vim2pygments.py +933 -0
- data/vendor/pygments-main/setup.cfg +6 -0
- data/vendor/pygments-main/setup.py +88 -0
- data/vendor/pygments-main/tests/dtds/HTML4-f.dtd +37 -0
- data/vendor/pygments-main/tests/dtds/HTML4-s.dtd +869 -0
- data/vendor/pygments-main/tests/dtds/HTML4.dcl +88 -0
- data/vendor/pygments-main/tests/dtds/HTML4.dtd +1092 -0
- data/vendor/pygments-main/tests/dtds/HTML4.soc +9 -0
- data/vendor/pygments-main/tests/dtds/HTMLlat1.ent +195 -0
- data/vendor/pygments-main/tests/dtds/HTMLspec.ent +77 -0
- data/vendor/pygments-main/tests/dtds/HTMLsym.ent +241 -0
- data/vendor/pygments-main/tests/examplefiles/ANTLRv3.g +608 -0
- data/vendor/pygments-main/tests/examplefiles/AcidStateAdvanced.hs +209 -0
- data/vendor/pygments-main/tests/examplefiles/AlternatingGroup.mu +102 -0
- data/vendor/pygments-main/tests/examplefiles/CPDictionary.j +611 -0
- data/vendor/pygments-main/tests/examplefiles/Constants.mo +158 -0
- data/vendor/pygments-main/tests/examplefiles/DancingSudoku.lhs +411 -0
- data/vendor/pygments-main/tests/examplefiles/Errors.scala +18 -0
- data/vendor/pygments-main/tests/examplefiles/File.hy +174 -0
- data/vendor/pygments-main/tests/examplefiles/Intro.java +1660 -0
- data/vendor/pygments-main/tests/examplefiles/Makefile +1131 -0
- data/vendor/pygments-main/tests/examplefiles/Object.st +4394 -0
- data/vendor/pygments-main/tests/examplefiles/OrderedMap.hx +584 -0
- data/vendor/pygments-main/tests/examplefiles/SmallCheck.hs +378 -0
- data/vendor/pygments-main/tests/examplefiles/Sorting.mod +470 -0
- data/vendor/pygments-main/tests/examplefiles/Sudoku.lhs +382 -0
- data/vendor/pygments-main/tests/examplefiles/addressbook.proto +30 -0
- data/vendor/pygments-main/tests/examplefiles/antlr_throws +1 -0
- data/vendor/pygments-main/tests/examplefiles/apache2.conf +393 -0
- data/vendor/pygments-main/tests/examplefiles/as3_test.as +143 -0
- data/vendor/pygments-main/tests/examplefiles/as3_test2.as +46 -0
- data/vendor/pygments-main/tests/examplefiles/as3_test3.as +3 -0
- data/vendor/pygments-main/tests/examplefiles/aspx-cs_example +27 -0
- data/vendor/pygments-main/tests/examplefiles/badcase.java +2 -0
- data/vendor/pygments-main/tests/examplefiles/batchfile.bat +49 -0
- data/vendor/pygments-main/tests/examplefiles/boot-9.scm +1557 -0
- data/vendor/pygments-main/tests/examplefiles/cells.ps +515 -0
- data/vendor/pygments-main/tests/examplefiles/ceval.c +2604 -0
- data/vendor/pygments-main/tests/examplefiles/cheetah_example.html +13 -0
- data/vendor/pygments-main/tests/examplefiles/classes.dylan +40 -0
- data/vendor/pygments-main/tests/examplefiles/condensed_ruby.rb +10 -0
- data/vendor/pygments-main/tests/examplefiles/coq_RelationClasses +447 -0
- data/vendor/pygments-main/tests/examplefiles/database.pytb +20 -0
- data/vendor/pygments-main/tests/examplefiles/de.MoinMoin.po +2461 -0
- data/vendor/pygments-main/tests/examplefiles/demo.ahk +181 -0
- data/vendor/pygments-main/tests/examplefiles/demo.cfm +38 -0
- data/vendor/pygments-main/tests/examplefiles/django_sample.html+django +68 -0
- data/vendor/pygments-main/tests/examplefiles/dwarf.cw +17 -0
- data/vendor/pygments-main/tests/examplefiles/erl_session +10 -0
- data/vendor/pygments-main/tests/examplefiles/escape_semicolon.clj +1 -0
- data/vendor/pygments-main/tests/examplefiles/evil_regex.js +48 -0
- data/vendor/pygments-main/tests/examplefiles/example.c +2080 -0
- data/vendor/pygments-main/tests/examplefiles/example.cls +15 -0
- data/vendor/pygments-main/tests/examplefiles/example.cpp +2363 -0
- data/vendor/pygments-main/tests/examplefiles/example.gs +106 -0
- data/vendor/pygments-main/tests/examplefiles/example.gst +7 -0
- data/vendor/pygments-main/tests/examplefiles/example.kt +47 -0
- data/vendor/pygments-main/tests/examplefiles/example.lua +250 -0
- data/vendor/pygments-main/tests/examplefiles/example.moo +26 -0
- data/vendor/pygments-main/tests/examplefiles/example.moon +629 -0
- data/vendor/pygments-main/tests/examplefiles/example.nim +1010 -0
- data/vendor/pygments-main/tests/examplefiles/example.ns2 +69 -0
- data/vendor/pygments-main/tests/examplefiles/example.p +34 -0
- data/vendor/pygments-main/tests/examplefiles/example.pas +2708 -0
- data/vendor/pygments-main/tests/examplefiles/example.rb +1852 -0
- data/vendor/pygments-main/tests/examplefiles/example.rhtml +561 -0
- data/vendor/pygments-main/tests/examplefiles/example.sh-session +19 -0
- data/vendor/pygments-main/tests/examplefiles/example.sml +156 -0
- data/vendor/pygments-main/tests/examplefiles/example.snobol +15 -0
- data/vendor/pygments-main/tests/examplefiles/example.tea +34 -0
- data/vendor/pygments-main/tests/examplefiles/example.u +548 -0
- data/vendor/pygments-main/tests/examplefiles/example.weechatlog +9 -0
- data/vendor/pygments-main/tests/examplefiles/example.xhtml +376 -0
- data/vendor/pygments-main/tests/examplefiles/example.yaml +302 -0
- data/vendor/pygments-main/tests/examplefiles/example2.aspx +29 -0
- data/vendor/pygments-main/tests/examplefiles/example_elixir.ex +363 -0
- data/vendor/pygments-main/tests/examplefiles/example_file.fy +128 -0
- data/vendor/pygments-main/tests/examplefiles/firefox.mak +586 -0
- data/vendor/pygments-main/tests/examplefiles/flipflop.sv +19 -0
- data/vendor/pygments-main/tests/examplefiles/foo.sce +6 -0
- data/vendor/pygments-main/tests/examplefiles/format.ml +1213 -0
- data/vendor/pygments-main/tests/examplefiles/fucked_up.rb +77 -0
- data/vendor/pygments-main/tests/examplefiles/function.mu +1 -0
- data/vendor/pygments-main/tests/examplefiles/functional.rst +1472 -0
- data/vendor/pygments-main/tests/examplefiles/genclass.clj +510 -0
- data/vendor/pygments-main/tests/examplefiles/genshi_example.xml+genshi +193 -0
- data/vendor/pygments-main/tests/examplefiles/genshitext_example.genshitext +33 -0
- data/vendor/pygments-main/tests/examplefiles/glsl.frag +7 -0
- data/vendor/pygments-main/tests/examplefiles/glsl.vert +13 -0
- data/vendor/pygments-main/tests/examplefiles/html+php_faulty.php +1 -0
- data/vendor/pygments-main/tests/examplefiles/http_request_example +14 -0
- data/vendor/pygments-main/tests/examplefiles/http_response_example +27 -0
- data/vendor/pygments-main/tests/examplefiles/import.hs +4 -0
- data/vendor/pygments-main/tests/examplefiles/intro.ik +24 -0
- data/vendor/pygments-main/tests/examplefiles/ints.php +10 -0
- data/vendor/pygments-main/tests/examplefiles/intsyn.fun +675 -0
- data/vendor/pygments-main/tests/examplefiles/intsyn.sig +286 -0
- data/vendor/pygments-main/tests/examplefiles/irb_heredoc +8 -0
- data/vendor/pygments-main/tests/examplefiles/irc.lsp +214 -0
- data/vendor/pygments-main/tests/examplefiles/java.properties +16 -0
- data/vendor/pygments-main/tests/examplefiles/jbst_example1.jbst +28 -0
- data/vendor/pygments-main/tests/examplefiles/jbst_example2.jbst +45 -0
- data/vendor/pygments-main/tests/examplefiles/jinjadesignerdoc.rst +713 -0
- data/vendor/pygments-main/tests/examplefiles/lighttpd_config.conf +13 -0
- data/vendor/pygments-main/tests/examplefiles/linecontinuation.py +47 -0
- data/vendor/pygments-main/tests/examplefiles/ltmain.sh +2849 -0
- data/vendor/pygments-main/tests/examplefiles/main.cmake +42 -0
- data/vendor/pygments-main/tests/examplefiles/markdown.lsp +679 -0
- data/vendor/pygments-main/tests/examplefiles/matlab_noreturn +3 -0
- data/vendor/pygments-main/tests/examplefiles/matlab_sample +27 -0
- data/vendor/pygments-main/tests/examplefiles/matlabsession_sample.txt +37 -0
- data/vendor/pygments-main/tests/examplefiles/minimal.ns2 +4 -0
- data/vendor/pygments-main/tests/examplefiles/moin_SyntaxReference.txt +340 -0
- data/vendor/pygments-main/tests/examplefiles/multiline_regexes.rb +38 -0
- data/vendor/pygments-main/tests/examplefiles/nasm_aoutso.asm +96 -0
- data/vendor/pygments-main/tests/examplefiles/nasm_objexe.asm +30 -0
- data/vendor/pygments-main/tests/examplefiles/nemerle_sample.n +87 -0
- data/vendor/pygments-main/tests/examplefiles/nginx_nginx.conf +118 -0
- data/vendor/pygments-main/tests/examplefiles/numbers.c +12 -0
- data/vendor/pygments-main/tests/examplefiles/objc_example.m +25 -0
- data/vendor/pygments-main/tests/examplefiles/objc_example2.m +24 -0
- data/vendor/pygments-main/tests/examplefiles/perl_misc +62 -0
- data/vendor/pygments-main/tests/examplefiles/perl_perl5db +998 -0
- data/vendor/pygments-main/tests/examplefiles/perl_regex-delims +120 -0
- data/vendor/pygments-main/tests/examplefiles/perlfunc.1 +856 -0
- data/vendor/pygments-main/tests/examplefiles/phpcomplete.vim +567 -0
- data/vendor/pygments-main/tests/examplefiles/pleac.in.rb +1223 -0
- data/vendor/pygments-main/tests/examplefiles/postgresql_test.txt +47 -0
- data/vendor/pygments-main/tests/examplefiles/pppoe.applescript +10 -0
- data/vendor/pygments-main/tests/examplefiles/psql_session.txt +122 -0
- data/vendor/pygments-main/tests/examplefiles/py3_test.txt +2 -0
- data/vendor/pygments-main/tests/examplefiles/pycon_test.pycon +14 -0
- data/vendor/pygments-main/tests/examplefiles/pytb_test2.pytb +2 -0
- data/vendor/pygments-main/tests/examplefiles/python25-bsd.mak +234 -0
- data/vendor/pygments-main/tests/examplefiles/qsort.prolog +13 -0
- data/vendor/pygments-main/tests/examplefiles/r-console-transcript.Rout +38 -0
- data/vendor/pygments-main/tests/examplefiles/ragel-cpp_rlscan +280 -0
- data/vendor/pygments-main/tests/examplefiles/ragel-cpp_snippet +2 -0
- data/vendor/pygments-main/tests/examplefiles/regex.js +22 -0
- data/vendor/pygments-main/tests/examplefiles/reversi.lsp +427 -0
- data/vendor/pygments-main/tests/examplefiles/ruby_func_def.rb +11 -0
- data/vendor/pygments-main/tests/examplefiles/scilab.sci +30 -0
- data/vendor/pygments-main/tests/examplefiles/sibling.prolog +19 -0
- data/vendor/pygments-main/tests/examplefiles/simple.md +747 -0
- data/vendor/pygments-main/tests/examplefiles/smarty_example.html +209 -0
- data/vendor/pygments-main/tests/examplefiles/source.lgt +343 -0
- data/vendor/pygments-main/tests/examplefiles/sources.list +62 -0
- data/vendor/pygments-main/tests/examplefiles/sphere.pov +18 -0
- data/vendor/pygments-main/tests/examplefiles/sqlite3.sqlite3-console +27 -0
- data/vendor/pygments-main/tests/examplefiles/squid.conf +30 -0
- data/vendor/pygments-main/tests/examplefiles/string.jl +1031 -0
- data/vendor/pygments-main/tests/examplefiles/string_delimiters.d +21 -0
- data/vendor/pygments-main/tests/examplefiles/stripheredoc.sh +3 -0
- data/vendor/pygments-main/tests/examplefiles/test.R +119 -0
- data/vendor/pygments-main/tests/examplefiles/test.adb +211 -0
- data/vendor/pygments-main/tests/examplefiles/test.asy +131 -0
- data/vendor/pygments-main/tests/examplefiles/test.awk +121 -0
- data/vendor/pygments-main/tests/examplefiles/test.bas +29 -0
- data/vendor/pygments-main/tests/examplefiles/test.bmx +145 -0
- data/vendor/pygments-main/tests/examplefiles/test.boo +39 -0
- data/vendor/pygments-main/tests/examplefiles/test.bro +250 -0
- data/vendor/pygments-main/tests/examplefiles/test.cs +374 -0
- data/vendor/pygments-main/tests/examplefiles/test.css +54 -0
- data/vendor/pygments-main/tests/examplefiles/test.d +135 -0
- data/vendor/pygments-main/tests/examplefiles/test.dart +23 -0
- data/vendor/pygments-main/tests/examplefiles/test.dtd +89 -0
- data/vendor/pygments-main/tests/examplefiles/test.ec +605 -0
- data/vendor/pygments-main/tests/examplefiles/test.ecl +58 -0
- data/vendor/pygments-main/tests/examplefiles/test.eh +315 -0
- data/vendor/pygments-main/tests/examplefiles/test.erl +169 -0
- data/vendor/pygments-main/tests/examplefiles/test.evoque +33 -0
- data/vendor/pygments-main/tests/examplefiles/test.fan +818 -0
- data/vendor/pygments-main/tests/examplefiles/test.flx +57 -0
- data/vendor/pygments-main/tests/examplefiles/test.gdc +13 -0
- data/vendor/pygments-main/tests/examplefiles/test.groovy +97 -0
- data/vendor/pygments-main/tests/examplefiles/test.html +339 -0
- data/vendor/pygments-main/tests/examplefiles/test.ini +10 -0
- data/vendor/pygments-main/tests/examplefiles/test.java +653 -0
- data/vendor/pygments-main/tests/examplefiles/test.jsp +24 -0
- data/vendor/pygments-main/tests/examplefiles/test.maql +45 -0
- data/vendor/pygments-main/tests/examplefiles/test.mod +374 -0
- data/vendor/pygments-main/tests/examplefiles/test.moo +51 -0
- data/vendor/pygments-main/tests/examplefiles/test.myt +166 -0
- data/vendor/pygments-main/tests/examplefiles/test.nim +93 -0
- data/vendor/pygments-main/tests/examplefiles/test.pas +743 -0
- data/vendor/pygments-main/tests/examplefiles/test.php +505 -0
- data/vendor/pygments-main/tests/examplefiles/test.plot +333 -0
- data/vendor/pygments-main/tests/examplefiles/test.ps1 +108 -0
- data/vendor/pygments-main/tests/examplefiles/test.pypylog +1839 -0
- data/vendor/pygments-main/tests/examplefiles/test.r3 +94 -0
- data/vendor/pygments-main/tests/examplefiles/test.rb +177 -0
- data/vendor/pygments-main/tests/examplefiles/test.rhtml +43 -0
- data/vendor/pygments-main/tests/examplefiles/test.scaml +8 -0
- data/vendor/pygments-main/tests/examplefiles/test.ssp +12 -0
- data/vendor/pygments-main/tests/examplefiles/test.tcsh +830 -0
- data/vendor/pygments-main/tests/examplefiles/test.vb +407 -0
- data/vendor/pygments-main/tests/examplefiles/test.vhdl +161 -0
- data/vendor/pygments-main/tests/examplefiles/test.xqy +138 -0
- data/vendor/pygments-main/tests/examplefiles/test.xsl +23 -0
- data/vendor/pygments-main/tests/examplefiles/truncated.pytb +15 -0
- data/vendor/pygments-main/tests/examplefiles/type.lisp +1202 -0
- data/vendor/pygments-main/tests/examplefiles/underscore.coffee +603 -0
- data/vendor/pygments-main/tests/examplefiles/unicode.applescript +5 -0
- data/vendor/pygments-main/tests/examplefiles/unicodedoc.py +11 -0
- data/vendor/pygments-main/tests/examplefiles/webkit-transition.css +3 -0
- data/vendor/pygments-main/tests/examplefiles/while.pov +13 -0
- data/vendor/pygments-main/tests/examplefiles/wiki.factor +384 -0
- data/vendor/pygments-main/tests/examplefiles/xml_example +1897 -0
- data/vendor/pygments-main/tests/examplefiles/zmlrpc.f90 +798 -0
- data/vendor/pygments-main/tests/old_run.py +138 -0
- data/vendor/pygments-main/tests/run.py +48 -0
- data/vendor/pygments-main/tests/support.py +15 -0
- data/vendor/pygments-main/tests/test_basic_api.py +294 -0
- data/vendor/pygments-main/tests/test_clexer.py +31 -0
- data/vendor/pygments-main/tests/test_cmdline.py +105 -0
- data/vendor/pygments-main/tests/test_examplefiles.py +97 -0
- data/vendor/pygments-main/tests/test_html_formatter.py +162 -0
- data/vendor/pygments-main/tests/test_latex_formatter.py +55 -0
- data/vendor/pygments-main/tests/test_perllexer.py +137 -0
- data/vendor/pygments-main/tests/test_regexlexer.py +47 -0
- data/vendor/pygments-main/tests/test_token.py +46 -0
- data/vendor/pygments-main/tests/test_using_api.py +40 -0
- data/vendor/pygments-main/tests/test_util.py +116 -0
- data/vendor/simplejson/.gitignore +10 -0
- data/vendor/simplejson/.travis.yml +5 -0
- data/vendor/simplejson/CHANGES.txt +291 -0
- data/vendor/simplejson/LICENSE.txt +19 -0
- data/vendor/simplejson/MANIFEST.in +5 -0
- data/vendor/simplejson/README.rst +19 -0
- data/vendor/simplejson/conf.py +179 -0
- data/vendor/simplejson/index.rst +628 -0
- data/vendor/simplejson/scripts/make_docs.py +18 -0
- data/vendor/simplejson/setup.py +104 -0
- data/vendor/simplejson/simplejson/__init__.py +510 -0
- data/vendor/simplejson/simplejson/_speedups.c +2745 -0
- data/vendor/simplejson/simplejson/decoder.py +425 -0
- data/vendor/simplejson/simplejson/encoder.py +567 -0
- data/vendor/simplejson/simplejson/ordered_dict.py +119 -0
- data/vendor/simplejson/simplejson/scanner.py +77 -0
- data/vendor/simplejson/simplejson/tests/__init__.py +67 -0
- data/vendor/simplejson/simplejson/tests/test_bigint_as_string.py +55 -0
- data/vendor/simplejson/simplejson/tests/test_check_circular.py +30 -0
- data/vendor/simplejson/simplejson/tests/test_decimal.py +66 -0
- data/vendor/simplejson/simplejson/tests/test_decode.py +83 -0
- data/vendor/simplejson/simplejson/tests/test_default.py +9 -0
- data/vendor/simplejson/simplejson/tests/test_dump.py +67 -0
- data/vendor/simplejson/simplejson/tests/test_encode_basestring_ascii.py +46 -0
- data/vendor/simplejson/simplejson/tests/test_encode_for_html.py +32 -0
- data/vendor/simplejson/simplejson/tests/test_errors.py +34 -0
- data/vendor/simplejson/simplejson/tests/test_fail.py +91 -0
- data/vendor/simplejson/simplejson/tests/test_float.py +19 -0
- data/vendor/simplejson/simplejson/tests/test_indent.py +86 -0
- data/vendor/simplejson/simplejson/tests/test_item_sort_key.py +20 -0
- data/vendor/simplejson/simplejson/tests/test_namedtuple.py +121 -0
- data/vendor/simplejson/simplejson/tests/test_pass1.py +76 -0
- data/vendor/simplejson/simplejson/tests/test_pass2.py +14 -0
- data/vendor/simplejson/simplejson/tests/test_pass3.py +20 -0
- data/vendor/simplejson/simplejson/tests/test_recursion.py +67 -0
- data/vendor/simplejson/simplejson/tests/test_scanstring.py +117 -0
- data/vendor/simplejson/simplejson/tests/test_separators.py +42 -0
- data/vendor/simplejson/simplejson/tests/test_speedups.py +20 -0
- data/vendor/simplejson/simplejson/tests/test_tuple.py +49 -0
- data/vendor/simplejson/simplejson/tests/test_unicode.py +109 -0
- data/vendor/simplejson/simplejson/tool.py +39 -0
- metadata +492 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
.. -*- mode: rst -*-
|
2
|
+
|
3
|
+
===================================
|
4
|
+
Using Pygments in various scenarios
|
5
|
+
===================================
|
6
|
+
|
7
|
+
PyGtk
|
8
|
+
-----
|
9
|
+
|
10
|
+
Armin has written a piece of sample code that shows how to create a Gtk
|
11
|
+
`TextBuffer` object containing Pygments-highlighted text.
|
12
|
+
|
13
|
+
See the article here: http://lucumr.pocoo.org/cogitations/2007/05/30/pygments-gtk-rendering/
|
14
|
+
|
15
|
+
Wordpress
|
16
|
+
---------
|
17
|
+
|
18
|
+
He also has a snippet that shows how to use Pygments in WordPress:
|
19
|
+
|
20
|
+
http://lucumr.pocoo.org/cogitations/2007/05/30/pygments-in-wordpress/
|
21
|
+
|
22
|
+
Markdown
|
23
|
+
--------
|
24
|
+
|
25
|
+
Since Pygments 0.9, the distribution ships Markdown_ preprocessor sample code
|
26
|
+
that uses Pygments to render source code in `external/markdown-processor.py`.
|
27
|
+
You can copy and adapt it to your liking.
|
28
|
+
|
29
|
+
.. _Markdown: http://www.freewisdom.org/projects/python-markdown/
|
30
|
+
|
31
|
+
TextMate
|
32
|
+
--------
|
33
|
+
|
34
|
+
Antonio Cangiano has created a Pygments bundle for TextMate that allows to
|
35
|
+
colorize code via a simple menu option. It can be found here_.
|
36
|
+
|
37
|
+
.. _here: http://antoniocangiano.com/2008/10/28/pygments-textmate-bundle/
|
38
|
+
|
39
|
+
Bash completion
|
40
|
+
---------------
|
41
|
+
|
42
|
+
The source distribution contains a file ``external/pygments.bashcomp`` that
|
43
|
+
sets up completion for the ``pygmentize`` command in bash.
|
@@ -0,0 +1,551 @@
|
|
1
|
+
.. -*- mode: rst -*-
|
2
|
+
|
3
|
+
====================
|
4
|
+
Write your own lexer
|
5
|
+
====================
|
6
|
+
|
7
|
+
If a lexer for your favorite language is missing in the Pygments package, you can
|
8
|
+
easily write your own and extend Pygments.
|
9
|
+
|
10
|
+
All you need can be found inside the `pygments.lexer` module. As you can read in
|
11
|
+
the `API documentation <api.txt>`_, a lexer is a class that is initialized with
|
12
|
+
some keyword arguments (the lexer options) and that provides a
|
13
|
+
`get_tokens_unprocessed()` method which is given a string or unicode object with
|
14
|
+
the data to parse.
|
15
|
+
|
16
|
+
The `get_tokens_unprocessed()` method must return an iterator or iterable
|
17
|
+
containing tuples in the form ``(index, token, value)``. Normally you don't need
|
18
|
+
to do this since there are numerous base lexers you can subclass.
|
19
|
+
|
20
|
+
|
21
|
+
RegexLexer
|
22
|
+
==========
|
23
|
+
|
24
|
+
A very powerful (but quite easy to use) lexer is the `RegexLexer`. This lexer
|
25
|
+
base class allows you to define lexing rules in terms of *regular expressions*
|
26
|
+
for different *states*.
|
27
|
+
|
28
|
+
States are groups of regular expressions that are matched against the input
|
29
|
+
string at the *current position*. If one of these expressions matches, a
|
30
|
+
corresponding action is performed (normally yielding a token with a specific
|
31
|
+
type), the current position is set to where the last match ended and the
|
32
|
+
matching process continues with the first regex of the current state.
|
33
|
+
|
34
|
+
Lexer states are kept in a state stack: each time a new state is entered, the
|
35
|
+
new state is pushed onto the stack. The most basic lexers (like the
|
36
|
+
`DiffLexer`) just need one state.
|
37
|
+
|
38
|
+
Each state is defined as a list of tuples in the form (`regex`, `action`,
|
39
|
+
`new_state`) where the last item is optional. In the most basic form, `action`
|
40
|
+
is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a
|
41
|
+
token with the match text and type `tokentype` and push `new_state` on the state
|
42
|
+
stack. If the new state is ``'#pop'``, the topmost state is popped from the
|
43
|
+
stack instead. (To pop more than one state, use ``'#pop:2'`` and so on.)
|
44
|
+
``'#push'`` is a synonym for pushing the current state on the
|
45
|
+
stack.
|
46
|
+
|
47
|
+
The following example shows the `DiffLexer` from the builtin lexers. Note that
|
48
|
+
it contains some additional attributes `name`, `aliases` and `filenames` which
|
49
|
+
aren't required for a lexer. They are used by the builtin lexer lookup
|
50
|
+
functions.
|
51
|
+
|
52
|
+
.. sourcecode:: python
|
53
|
+
|
54
|
+
from pygments.lexer import RegexLexer
|
55
|
+
from pygments.token import *
|
56
|
+
|
57
|
+
class DiffLexer(RegexLexer):
|
58
|
+
name = 'Diff'
|
59
|
+
aliases = ['diff']
|
60
|
+
filenames = ['*.diff']
|
61
|
+
|
62
|
+
tokens = {
|
63
|
+
'root': [
|
64
|
+
(r' .*\n', Text),
|
65
|
+
(r'\+.*\n', Generic.Inserted),
|
66
|
+
(r'-.*\n', Generic.Deleted),
|
67
|
+
(r'@.*\n', Generic.Subheading),
|
68
|
+
(r'Index.*\n', Generic.Heading),
|
69
|
+
(r'=.*\n', Generic.Heading),
|
70
|
+
(r'.*\n', Text),
|
71
|
+
]
|
72
|
+
}
|
73
|
+
|
74
|
+
As you can see this lexer only uses one state. When the lexer starts scanning
|
75
|
+
the text, it first checks if the current character is a space. If this is true
|
76
|
+
it scans everything until newline and returns the parsed data as `Text` token.
|
77
|
+
|
78
|
+
If this rule doesn't match, it checks if the current char is a plus sign. And
|
79
|
+
so on.
|
80
|
+
|
81
|
+
If no rule matches at the current position, the current char is emitted as an
|
82
|
+
`Error` token that indicates a parsing error, and the position is increased by
|
83
|
+
1.
|
84
|
+
|
85
|
+
|
86
|
+
Regex Flags
|
87
|
+
===========
|
88
|
+
|
89
|
+
You can either define regex flags in the regex (``r'(?x)foo bar'``) or by adding
|
90
|
+
a `flags` attribute to your lexer class. If no attribute is defined, it defaults
|
91
|
+
to `re.MULTILINE`. For more informations about regular expression flags see the
|
92
|
+
`regular expressions`_ help page in the python documentation.
|
93
|
+
|
94
|
+
.. _regular expressions: http://docs.python.org/lib/re-syntax.html
|
95
|
+
|
96
|
+
|
97
|
+
Scanning multiple tokens at once
|
98
|
+
================================
|
99
|
+
|
100
|
+
Here is a more complex lexer that highlights INI files. INI files consist of
|
101
|
+
sections, comments and key = value pairs:
|
102
|
+
|
103
|
+
.. sourcecode:: python
|
104
|
+
|
105
|
+
from pygments.lexer import RegexLexer, bygroups
|
106
|
+
from pygments.token import *
|
107
|
+
|
108
|
+
class IniLexer(RegexLexer):
|
109
|
+
name = 'INI'
|
110
|
+
aliases = ['ini', 'cfg']
|
111
|
+
filenames = ['*.ini', '*.cfg']
|
112
|
+
|
113
|
+
tokens = {
|
114
|
+
'root': [
|
115
|
+
(r'\s+', Text),
|
116
|
+
(r';.*?$', Comment),
|
117
|
+
(r'\[.*?\]$', Keyword),
|
118
|
+
(r'(.*?)(\s*)(=)(\s*)(.*?)$',
|
119
|
+
bygroups(Name.Attribute, Text, Operator, Text, String))
|
120
|
+
]
|
121
|
+
}
|
122
|
+
|
123
|
+
The lexer first looks for whitespace, comments and section names. And later it
|
124
|
+
looks for a line that looks like a key, value pair, separated by an ``'='``
|
125
|
+
sign, and optional whitespace.
|
126
|
+
|
127
|
+
The `bygroups` helper makes sure that each group is yielded with a different
|
128
|
+
token type. First the `Name.Attribute` token, then a `Text` token for the
|
129
|
+
optional whitespace, after that a `Operator` token for the equals sign. Then a
|
130
|
+
`Text` token for the whitespace again. The rest of the line is returned as
|
131
|
+
`String`.
|
132
|
+
|
133
|
+
Note that for this to work, every part of the match must be inside a capturing
|
134
|
+
group (a ``(...)``), and there must not be any nested capturing groups. If you
|
135
|
+
nevertheless need a group, use a non-capturing group defined using this syntax:
|
136
|
+
``r'(?:some|words|here)'`` (note the ``?:`` after the beginning parenthesis).
|
137
|
+
|
138
|
+
If you find yourself needing a capturing group inside the regex which
|
139
|
+
shouldn't be part of the output but is used in the regular expressions for
|
140
|
+
backreferencing (eg: ``r'(<(foo|bar)>)(.*?)(</\2>)'``), you can pass `None`
|
141
|
+
to the bygroups function and it will skip that group will be skipped in the
|
142
|
+
output.
|
143
|
+
|
144
|
+
|
145
|
+
Changing states
|
146
|
+
===============
|
147
|
+
|
148
|
+
Many lexers need multiple states to work as expected. For example, some
|
149
|
+
languages allow multiline comments to be nested. Since this is a recursive
|
150
|
+
pattern it's impossible to lex just using regular expressions.
|
151
|
+
|
152
|
+
Here is the solution:
|
153
|
+
|
154
|
+
.. sourcecode:: python
|
155
|
+
|
156
|
+
from pygments.lexer import RegexLexer
|
157
|
+
from pygments.token import *
|
158
|
+
|
159
|
+
class ExampleLexer(RegexLexer):
|
160
|
+
name = 'Example Lexer with states'
|
161
|
+
|
162
|
+
tokens = {
|
163
|
+
'root': [
|
164
|
+
(r'[^/]+', Text),
|
165
|
+
(r'/\*', Comment.Multiline, 'comment'),
|
166
|
+
(r'//.*?$', Comment.Singleline),
|
167
|
+
(r'/', Text)
|
168
|
+
],
|
169
|
+
'comment': [
|
170
|
+
(r'[^*/]', Comment.Multiline),
|
171
|
+
(r'/\*', Comment.Multiline, '#push'),
|
172
|
+
(r'\*/', Comment.Multiline, '#pop'),
|
173
|
+
(r'[*/]', Comment.Multiline)
|
174
|
+
]
|
175
|
+
}
|
176
|
+
|
177
|
+
This lexer starts lexing in the ``'root'`` state. It tries to match as much as
|
178
|
+
possible until it finds a slash (``'/'``). If the next character after the slash
|
179
|
+
is a star (``'*'``) the `RegexLexer` sends those two characters to the output
|
180
|
+
stream marked as `Comment.Multiline` and continues parsing with the rules
|
181
|
+
defined in the ``'comment'`` state.
|
182
|
+
|
183
|
+
If there wasn't a star after the slash, the `RegexLexer` checks if it's a
|
184
|
+
singleline comment (eg: followed by a second slash). If this also wasn't the
|
185
|
+
case it must be a single slash (the separate regex for a single slash must also
|
186
|
+
be given, else the slash would be marked as an error token).
|
187
|
+
|
188
|
+
Inside the ``'comment'`` state, we do the same thing again. Scan until the lexer
|
189
|
+
finds a star or slash. If it's the opening of a multiline comment, push the
|
190
|
+
``'comment'`` state on the stack and continue scanning, again in the
|
191
|
+
``'comment'`` state. Else, check if it's the end of the multiline comment. If
|
192
|
+
yes, pop one state from the stack.
|
193
|
+
|
194
|
+
Note: If you pop from an empty stack you'll get an `IndexError`. (There is an
|
195
|
+
easy way to prevent this from happening: don't ``'#pop'`` in the root state).
|
196
|
+
|
197
|
+
If the `RegexLexer` encounters a newline that is flagged as an error token, the
|
198
|
+
stack is emptied and the lexer continues scanning in the ``'root'`` state. This
|
199
|
+
helps producing error-tolerant highlighting for erroneous input, e.g. when a
|
200
|
+
single-line string is not closed.
|
201
|
+
|
202
|
+
|
203
|
+
Advanced state tricks
|
204
|
+
=====================
|
205
|
+
|
206
|
+
There are a few more things you can do with states:
|
207
|
+
|
208
|
+
- You can push multiple states onto the stack if you give a tuple instead of a
|
209
|
+
simple string as the third item in a rule tuple. For example, if you want to
|
210
|
+
match a comment containing a directive, something like::
|
211
|
+
|
212
|
+
/* <processing directive> rest of comment */
|
213
|
+
|
214
|
+
you can use this rule:
|
215
|
+
|
216
|
+
.. sourcecode:: python
|
217
|
+
|
218
|
+
tokens = {
|
219
|
+
'root': [
|
220
|
+
(r'/\* <', Comment, ('comment', 'directive')),
|
221
|
+
...
|
222
|
+
],
|
223
|
+
'directive': [
|
224
|
+
(r'[^>]*', Comment.Directive),
|
225
|
+
(r'>', Comment, '#pop'),
|
226
|
+
],
|
227
|
+
'comment': [
|
228
|
+
(r'[^*]+', Comment),
|
229
|
+
(r'\*/', Comment, '#pop'),
|
230
|
+
(r'\*', Comment),
|
231
|
+
]
|
232
|
+
}
|
233
|
+
|
234
|
+
When this encounters the above sample, first ``'comment'`` and ``'directive'``
|
235
|
+
are pushed onto the stack, then the lexer continues in the directive state
|
236
|
+
until it finds the closing ``>``, then it continues in the comment state until
|
237
|
+
the closing ``*/``. Then, both states are popped from the stack again and
|
238
|
+
lexing continues in the root state.
|
239
|
+
|
240
|
+
*New in Pygments 0.9:* The tuple can contain the special ``'#push'`` and
|
241
|
+
``'#pop'`` (but not ``'#pop:n'``) directives.
|
242
|
+
|
243
|
+
|
244
|
+
- You can include the rules of a state in the definition of another. This is
|
245
|
+
done by using `include` from `pygments.lexer`:
|
246
|
+
|
247
|
+
.. sourcecode:: python
|
248
|
+
|
249
|
+
from pygments.lexer import RegexLexer, bygroups, include
|
250
|
+
from pygments.token import *
|
251
|
+
|
252
|
+
class ExampleLexer(RegexLexer):
|
253
|
+
tokens = {
|
254
|
+
'comments': [
|
255
|
+
(r'/\*.*?\*/', Comment),
|
256
|
+
(r'//.*?\n', Comment),
|
257
|
+
],
|
258
|
+
'root': [
|
259
|
+
include('comments'),
|
260
|
+
(r'(function )(\w+)( {)',
|
261
|
+
bygroups(Keyword, Name, Keyword), 'function'),
|
262
|
+
(r'.', Text),
|
263
|
+
],
|
264
|
+
'function': [
|
265
|
+
(r'[^}/]+', Text),
|
266
|
+
include('comments'),
|
267
|
+
(r'/', Text),
|
268
|
+
(r'}', Keyword, '#pop'),
|
269
|
+
]
|
270
|
+
}
|
271
|
+
|
272
|
+
This is a hypothetical lexer for a language that consist of functions and
|
273
|
+
comments. Because comments can occur at toplevel and in functions, we need
|
274
|
+
rules for comments in both states. As you can see, the `include` helper saves
|
275
|
+
repeating rules that occur more than once (in this example, the state
|
276
|
+
``'comment'`` will never be entered by the lexer, as it's only there to be
|
277
|
+
included in ``'root'`` and ``'function'``).
|
278
|
+
|
279
|
+
|
280
|
+
- Sometimes, you may want to "combine" a state from existing ones. This is
|
281
|
+
possible with the `combine` helper from `pygments.lexer`.
|
282
|
+
|
283
|
+
If you, instead of a new state, write ``combined('state1', 'state2')`` as the
|
284
|
+
third item of a rule tuple, a new anonymous state will be formed from state1
|
285
|
+
and state2 and if the rule matches, the lexer will enter this state.
|
286
|
+
|
287
|
+
This is not used very often, but can be helpful in some cases, such as the
|
288
|
+
`PythonLexer`'s string literal processing.
|
289
|
+
|
290
|
+
- If you want your lexer to start lexing in a different state you can modify
|
291
|
+
the stack by overloading the `get_tokens_unprocessed()` method:
|
292
|
+
|
293
|
+
.. sourcecode:: python
|
294
|
+
|
295
|
+
from pygments.lexer import RegexLexer
|
296
|
+
|
297
|
+
class MyLexer(RegexLexer):
|
298
|
+
tokens = {...}
|
299
|
+
|
300
|
+
def get_tokens_unprocessed(self, text):
|
301
|
+
stack = ['root', 'otherstate']
|
302
|
+
for item in RegexLexer.get_tokens_unprocessed(text, stack):
|
303
|
+
yield item
|
304
|
+
|
305
|
+
Some lexers like the `PhpLexer` use this to make the leading ``<?php``
|
306
|
+
preprocessor comments optional. Note that you can crash the lexer easily
|
307
|
+
by putting values into the stack that don't exist in the token map. Also
|
308
|
+
removing ``'root'`` from the stack can result in strange errors!
|
309
|
+
|
310
|
+
- An empty regex at the end of a state list, combined with ``'#pop'``, can
|
311
|
+
act as a return point from a state that doesn't have a clear end marker.
|
312
|
+
|
313
|
+
|
314
|
+
Using multiple lexers
|
315
|
+
=====================
|
316
|
+
|
317
|
+
Using multiple lexers for the same input can be tricky. One of the easiest
|
318
|
+
combination techniques is shown here: You can replace the token type entry in a
|
319
|
+
rule tuple (the second item) with a lexer class. The matched text will then be
|
320
|
+
lexed with that lexer, and the resulting tokens will be yielded.
|
321
|
+
|
322
|
+
For example, look at this stripped-down HTML lexer:
|
323
|
+
|
324
|
+
.. sourcecode:: python
|
325
|
+
|
326
|
+
from pygments.lexer import RegexLexer, bygroups, using
|
327
|
+
from pygments.token import *
|
328
|
+
from pygments.lexers.web import JavascriptLexer
|
329
|
+
|
330
|
+
class HtmlLexer(RegexLexer):
|
331
|
+
name = 'HTML'
|
332
|
+
aliases = ['html']
|
333
|
+
filenames = ['*.html', '*.htm']
|
334
|
+
|
335
|
+
flags = re.IGNORECASE | re.DOTALL
|
336
|
+
tokens = {
|
337
|
+
'root': [
|
338
|
+
('[^<&]+', Text),
|
339
|
+
('&.*?;', Name.Entity),
|
340
|
+
(r'<\s*script\s*', Name.Tag, ('script-content', 'tag')),
|
341
|
+
(r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'),
|
342
|
+
(r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag),
|
343
|
+
],
|
344
|
+
'script-content': [
|
345
|
+
(r'(.+?)(<\s*/\s*script\s*>)',
|
346
|
+
bygroups(using(JavascriptLexer), Name.Tag),
|
347
|
+
'#pop'),
|
348
|
+
]
|
349
|
+
}
|
350
|
+
|
351
|
+
Here the content of a ``<script>`` tag is passed to a newly created instance of
|
352
|
+
a `JavascriptLexer` and not processed by the `HtmlLexer`. This is done using the
|
353
|
+
`using` helper that takes the other lexer class as its parameter.
|
354
|
+
|
355
|
+
Note the combination of `bygroups` and `using`. This makes sure that the content
|
356
|
+
up to the ``</script>`` end tag is processed by the `JavascriptLexer`, while the
|
357
|
+
end tag is yielded as a normal token with the `Name.Tag` type.
|
358
|
+
|
359
|
+
As an additional goodie, if the lexer class is replaced by `this` (imported from
|
360
|
+
`pygments.lexer`), the "other" lexer will be the current one (because you cannot
|
361
|
+
refer to the current class within the code that runs at class definition time).
|
362
|
+
|
363
|
+
Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule.
|
364
|
+
Here, two states are pushed onto the state stack, ``'script-content'`` and
|
365
|
+
``'tag'``. That means that first ``'tag'`` is processed, which will parse
|
366
|
+
attributes and the closing ``>``, then the ``'tag'`` state is popped and the
|
367
|
+
next state on top of the stack will be ``'script-content'``.
|
368
|
+
|
369
|
+
The `using()` helper has a special keyword argument, `state`, which works as
|
370
|
+
follows: if given, the lexer to use initially is not in the ``"root"`` state,
|
371
|
+
but in the state given by this argument. This *only* works with a `RegexLexer`.
|
372
|
+
|
373
|
+
Any other keywords arguments passed to `using()` are added to the keyword
|
374
|
+
arguments used to create the lexer.
|
375
|
+
|
376
|
+
|
377
|
+
Delegating Lexer
|
378
|
+
================
|
379
|
+
|
380
|
+
Another approach for nested lexers is the `DelegatingLexer` which is for
|
381
|
+
example used for the template engine lexers. It takes two lexers as
|
382
|
+
arguments on initialisation: a `root_lexer` and a `language_lexer`.
|
383
|
+
|
384
|
+
The input is processed as follows: First, the whole text is lexed with the
|
385
|
+
`language_lexer`. All tokens yielded with a type of ``Other`` are then
|
386
|
+
concatenated and given to the `root_lexer`. The language tokens of the
|
387
|
+
`language_lexer` are then inserted into the `root_lexer`'s token stream
|
388
|
+
at the appropriate positions.
|
389
|
+
|
390
|
+
.. sourcecode:: python
|
391
|
+
|
392
|
+
from pygments.lexer import DelegatingLexer
|
393
|
+
from pygments.lexers.web import HtmlLexer, PhpLexer
|
394
|
+
|
395
|
+
class HtmlPhpLexer(DelegatingLexer):
|
396
|
+
def __init__(self, **options):
|
397
|
+
super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options)
|
398
|
+
|
399
|
+
This procedure ensures that e.g. HTML with template tags in it is highlighted
|
400
|
+
correctly even if the template tags are put into HTML tags or attributes.
|
401
|
+
|
402
|
+
If you want to change the needle token ``Other`` to something else, you can
|
403
|
+
give the lexer another token type as the third parameter:
|
404
|
+
|
405
|
+
.. sourcecode:: python
|
406
|
+
|
407
|
+
DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options)
|
408
|
+
|
409
|
+
|
410
|
+
Callbacks
|
411
|
+
=========
|
412
|
+
|
413
|
+
Sometimes the grammar of a language is so complex that a lexer would be unable
|
414
|
+
to parse it just by using regular expressions and stacks.
|
415
|
+
|
416
|
+
For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead
|
417
|
+
of token types (`bygroups` and `using` are nothing else but preimplemented
|
418
|
+
callbacks). The callback must be a function taking two arguments:
|
419
|
+
|
420
|
+
* the lexer itself
|
421
|
+
* the match object for the last matched rule
|
422
|
+
|
423
|
+
The callback must then return an iterable of (or simply yield) ``(index,
|
424
|
+
tokentype, value)`` tuples, which are then just passed through by
|
425
|
+
`get_tokens_unprocessed()`. The ``index`` here is the position of the token in
|
426
|
+
the input string, ``tokentype`` is the normal token type (like `Name.Builtin`),
|
427
|
+
and ``value`` the associated part of the input string.
|
428
|
+
|
429
|
+
You can see an example here:
|
430
|
+
|
431
|
+
.. sourcecode:: python
|
432
|
+
|
433
|
+
from pygments.lexer import RegexLexer
|
434
|
+
from pygments.token import Generic
|
435
|
+
|
436
|
+
class HypotheticLexer(RegexLexer):
|
437
|
+
|
438
|
+
def headline_callback(lexer, match):
|
439
|
+
equal_signs = match.group(1)
|
440
|
+
text = match.group(2)
|
441
|
+
yield match.start(), Generic.Headline, equal_signs + text + equal_signs
|
442
|
+
|
443
|
+
tokens = {
|
444
|
+
'root': [
|
445
|
+
(r'(=+)(.*?)(\1)', headline_callback)
|
446
|
+
]
|
447
|
+
}
|
448
|
+
|
449
|
+
If the regex for the `headline_callback` matches, the function is called with the
|
450
|
+
match object. Note that after the callback is done, processing continues
|
451
|
+
normally, that is, after the end of the previous match. The callback has no
|
452
|
+
possibility to influence the position.
|
453
|
+
|
454
|
+
There are not really any simple examples for lexer callbacks, but you can see
|
455
|
+
them in action e.g. in the `compiled.py`_ source code in the `CLexer` and
|
456
|
+
`JavaLexer` classes.
|
457
|
+
|
458
|
+
.. _compiled.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/compiled.py
|
459
|
+
|
460
|
+
|
461
|
+
The ExtendedRegexLexer class
|
462
|
+
============================
|
463
|
+
|
464
|
+
The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for
|
465
|
+
the funky syntax rules of some languages that will go unnamed, such as Ruby.
|
466
|
+
|
467
|
+
But fear not; even then you don't have to abandon the regular expression
|
468
|
+
approach. For Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`.
|
469
|
+
All features known from RegexLexers are available here too, and the tokens are
|
470
|
+
specified in exactly the same way, *except* for one detail:
|
471
|
+
|
472
|
+
The `get_tokens_unprocessed()` method holds its internal state data not as local
|
473
|
+
variables, but in an instance of the `pygments.lexer.LexerContext` class, and
|
474
|
+
that instance is passed to callbacks as a third argument. This means that you
|
475
|
+
can modify the lexer state in callbacks.
|
476
|
+
|
477
|
+
The `LexerContext` class has the following members:
|
478
|
+
|
479
|
+
* `text` -- the input text
|
480
|
+
* `pos` -- the current starting position that is used for matching regexes
|
481
|
+
* `stack` -- a list containing the state stack
|
482
|
+
* `end` -- the maximum position to which regexes are matched, this defaults to
|
483
|
+
the length of `text`
|
484
|
+
|
485
|
+
Additionally, the `get_tokens_unprocessed()` method can be given a
|
486
|
+
`LexerContext` instead of a string and will then process this context instead of
|
487
|
+
creating a new one for the string argument.
|
488
|
+
|
489
|
+
Note that because you can set the current position to anything in the callback,
|
490
|
+
it won't be automatically be set by the caller after the callback is finished.
|
491
|
+
For example, this is how the hypothetical lexer above would be written with the
|
492
|
+
`ExtendedRegexLexer`:
|
493
|
+
|
494
|
+
.. sourcecode:: python
|
495
|
+
|
496
|
+
from pygments.lexer import ExtendedRegexLexer
|
497
|
+
from pygments.token import Generic
|
498
|
+
|
499
|
+
class ExHypotheticLexer(ExtendedRegexLexer):
|
500
|
+
|
501
|
+
def headline_callback(lexer, match, ctx):
|
502
|
+
equal_signs = match.group(1)
|
503
|
+
text = match.group(2)
|
504
|
+
yield match.start(), Generic.Headline, equal_signs + text + equal_signs
|
505
|
+
ctx.pos = match.end()
|
506
|
+
|
507
|
+
tokens = {
|
508
|
+
'root': [
|
509
|
+
(r'(=+)(.*?)(\1)', headline_callback)
|
510
|
+
]
|
511
|
+
}
|
512
|
+
|
513
|
+
This might sound confusing (and it can really be). But it is needed, and for an
|
514
|
+
example look at the Ruby lexer in `agile.py`_.
|
515
|
+
|
516
|
+
.. _agile.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/agile.py
|
517
|
+
|
518
|
+
|
519
|
+
Filtering Token Streams
|
520
|
+
=======================
|
521
|
+
|
522
|
+
Some languages ship a lot of builtin functions (for example PHP). The total
|
523
|
+
amount of those functions differs from system to system because not everybody
|
524
|
+
has every extension installed. In the case of PHP there are over 3000 builtin
|
525
|
+
functions. That's an incredible huge amount of functions, much more than you
|
526
|
+
can put into a regular expression.
|
527
|
+
|
528
|
+
But because only `Name` tokens can be function names it's solvable by overriding
|
529
|
+
the ``get_tokens_unprocessed()`` method. The following lexer subclasses the
|
530
|
+
`PythonLexer` so that it highlights some additional names as pseudo keywords:
|
531
|
+
|
532
|
+
.. sourcecode:: python
|
533
|
+
|
534
|
+
from pygments.lexers.agile import PythonLexer
|
535
|
+
from pygments.token import Name, Keyword
|
536
|
+
|
537
|
+
class MyPythonLexer(PythonLexer):
|
538
|
+
EXTRA_KEYWORDS = ['foo', 'bar', 'foobar', 'barfoo', 'spam', 'eggs']
|
539
|
+
|
540
|
+
def get_tokens_unprocessed(self, text):
|
541
|
+
for index, token, value in PythonLexer.get_tokens_unprocessed(self, text):
|
542
|
+
if token is Name and value in self.EXTRA_KEYWORDS:
|
543
|
+
yield index, Keyword.Pseudo, value
|
544
|
+
else:
|
545
|
+
yield index, token, value
|
546
|
+
|
547
|
+
The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions.
|
548
|
+
|
549
|
+
**Note** Do not confuse this with the `filter`_ system.
|
550
|
+
|
551
|
+
.. _filter: filters.txt
|