mortar-pygments.rb 0.5.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/CHANGELOG.md +71 -0
- data/Gemfile +2 -0
- data/LICENSE +17 -0
- data/README.md +121 -0
- data/Rakefile +66 -0
- data/bench.rb +22 -0
- data/cache-lexers.rb +8 -0
- data/lexers +0 -0
- data/lib/pygments/lexer.rb +148 -0
- data/lib/pygments/mentos.py +351 -0
- data/lib/pygments/popen.rb +406 -0
- data/lib/pygments/version.rb +3 -0
- data/lib/pygments.rb +8 -0
- data/pygments.rb.gemspec +25 -0
- data/test/test_data.c +2581 -0
- data/test/test_data.py +514 -0
- data/test/test_data_generated +2582 -0
- data/test/test_pygments.rb +287 -0
- data/vendor/custom_lexers/github.py +565 -0
- data/vendor/pygments-main/AUTHORS +153 -0
- data/vendor/pygments-main/CHANGES +889 -0
- data/vendor/pygments-main/LICENSE +25 -0
- data/vendor/pygments-main/MANIFEST.in +6 -0
- data/vendor/pygments-main/Makefile +59 -0
- data/vendor/pygments-main/REVISION +1 -0
- data/vendor/pygments-main/TODO +15 -0
- data/vendor/pygments-main/docs/generate.py +472 -0
- data/vendor/pygments-main/docs/pygmentize.1 +94 -0
- data/vendor/pygments-main/docs/src/api.txt +270 -0
- data/vendor/pygments-main/docs/src/authors.txt +5 -0
- data/vendor/pygments-main/docs/src/changelog.txt +5 -0
- data/vendor/pygments-main/docs/src/cmdline.txt +147 -0
- data/vendor/pygments-main/docs/src/filterdevelopment.txt +70 -0
- data/vendor/pygments-main/docs/src/filters.txt +42 -0
- data/vendor/pygments-main/docs/src/formatterdevelopment.txt +169 -0
- data/vendor/pygments-main/docs/src/formatters.txt +48 -0
- data/vendor/pygments-main/docs/src/index.txt +69 -0
- data/vendor/pygments-main/docs/src/installation.txt +71 -0
- data/vendor/pygments-main/docs/src/integrate.txt +48 -0
- data/vendor/pygments-main/docs/src/java.txt +70 -0
- data/vendor/pygments-main/docs/src/lexerdevelopment.txt +603 -0
- data/vendor/pygments-main/docs/src/lexers.txt +67 -0
- data/vendor/pygments-main/docs/src/moinmoin.txt +39 -0
- data/vendor/pygments-main/docs/src/plugins.txt +93 -0
- data/vendor/pygments-main/docs/src/quickstart.txt +202 -0
- data/vendor/pygments-main/docs/src/rstdirective.txt +22 -0
- data/vendor/pygments-main/docs/src/styles.txt +143 -0
- data/vendor/pygments-main/docs/src/tokens.txt +349 -0
- data/vendor/pygments-main/docs/src/unicode.txt +49 -0
- data/vendor/pygments-main/external/autopygmentize +64 -0
- data/vendor/pygments-main/external/lasso-builtins-generator-9.lasso +144 -0
- data/vendor/pygments-main/external/markdown-processor.py +67 -0
- data/vendor/pygments-main/external/moin-parser.py +112 -0
- data/vendor/pygments-main/external/pygments.bashcomp +38 -0
- data/vendor/pygments-main/external/rst-directive-old.py +77 -0
- data/vendor/pygments-main/external/rst-directive.py +83 -0
- data/vendor/pygments-main/ez_setup.py +276 -0
- data/vendor/pygments-main/pygmentize +7 -0
- data/vendor/pygments-main/pygments/__init__.py +91 -0
- data/vendor/pygments-main/pygments/cmdline.py +441 -0
- data/vendor/pygments-main/pygments/console.py +74 -0
- data/vendor/pygments-main/pygments/filter.py +74 -0
- data/vendor/pygments-main/pygments/filters/__init__.py +356 -0
- data/vendor/pygments-main/pygments/formatter.py +95 -0
- data/vendor/pygments-main/pygments/formatters/__init__.py +68 -0
- data/vendor/pygments-main/pygments/formatters/_mapping.py +92 -0
- data/vendor/pygments-main/pygments/formatters/bbcode.py +109 -0
- data/vendor/pygments-main/pygments/formatters/html.py +821 -0
- data/vendor/pygments-main/pygments/formatters/img.py +553 -0
- data/vendor/pygments-main/pygments/formatters/latex.py +378 -0
- data/vendor/pygments-main/pygments/formatters/other.py +115 -0
- data/vendor/pygments-main/pygments/formatters/rtf.py +136 -0
- data/vendor/pygments-main/pygments/formatters/svg.py +154 -0
- data/vendor/pygments-main/pygments/formatters/terminal.py +112 -0
- data/vendor/pygments-main/pygments/formatters/terminal256.py +222 -0
- data/vendor/pygments-main/pygments/lexer.py +765 -0
- data/vendor/pygments-main/pygments/lexers/__init__.py +240 -0
- data/vendor/pygments-main/pygments/lexers/_asybuiltins.py +1645 -0
- data/vendor/pygments-main/pygments/lexers/_clbuiltins.py +232 -0
- data/vendor/pygments-main/pygments/lexers/_lassobuiltins.py +5172 -0
- data/vendor/pygments-main/pygments/lexers/_luabuiltins.py +249 -0
- data/vendor/pygments-main/pygments/lexers/_mapping.py +355 -0
- data/vendor/pygments-main/pygments/lexers/_openedgebuiltins.py +562 -0
- data/vendor/pygments-main/pygments/lexers/_phpbuiltins.py +3787 -0
- data/vendor/pygments-main/pygments/lexers/_postgres_builtins.py +233 -0
- data/vendor/pygments-main/pygments/lexers/_robotframeworklexer.py +557 -0
- data/vendor/pygments-main/pygments/lexers/_scilab_builtins.py +40 -0
- data/vendor/pygments-main/pygments/lexers/_sourcemodbuiltins.py +1072 -0
- data/vendor/pygments-main/pygments/lexers/_stan_builtins.py +360 -0
- data/vendor/pygments-main/pygments/lexers/_vimbuiltins.py +13 -0
- data/vendor/pygments-main/pygments/lexers/agile.py +2290 -0
- data/vendor/pygments-main/pygments/lexers/asm.py +398 -0
- data/vendor/pygments-main/pygments/lexers/compiled.py +3723 -0
- data/vendor/pygments-main/pygments/lexers/dalvik.py +104 -0
- data/vendor/pygments-main/pygments/lexers/dotnet.py +671 -0
- data/vendor/pygments-main/pygments/lexers/foxpro.py +428 -0
- data/vendor/pygments-main/pygments/lexers/functional.py +2731 -0
- data/vendor/pygments-main/pygments/lexers/github.py +565 -0
- data/vendor/pygments-main/pygments/lexers/hdl.py +356 -0
- data/vendor/pygments-main/pygments/lexers/jvm.py +1112 -0
- data/vendor/pygments-main/pygments/lexers/math.py +1918 -0
- data/vendor/pygments-main/pygments/lexers/other.py +3778 -0
- data/vendor/pygments-main/pygments/lexers/parsers.py +778 -0
- data/vendor/pygments-main/pygments/lexers/shell.py +424 -0
- data/vendor/pygments-main/pygments/lexers/special.py +100 -0
- data/vendor/pygments-main/pygments/lexers/sql.py +609 -0
- data/vendor/pygments-main/pygments/lexers/templates.py +1742 -0
- data/vendor/pygments-main/pygments/lexers/text.py +1893 -0
- data/vendor/pygments-main/pygments/lexers/web.py +4045 -0
- data/vendor/pygments-main/pygments/modeline.py +40 -0
- data/vendor/pygments-main/pygments/plugin.py +74 -0
- data/vendor/pygments-main/pygments/scanner.py +104 -0
- data/vendor/pygments-main/pygments/style.py +117 -0
- data/vendor/pygments-main/pygments/styles/__init__.py +70 -0
- data/vendor/pygments-main/pygments/styles/autumn.py +65 -0
- data/vendor/pygments-main/pygments/styles/borland.py +51 -0
- data/vendor/pygments-main/pygments/styles/bw.py +49 -0
- data/vendor/pygments-main/pygments/styles/colorful.py +81 -0
- data/vendor/pygments-main/pygments/styles/default.py +73 -0
- data/vendor/pygments-main/pygments/styles/emacs.py +72 -0
- data/vendor/pygments-main/pygments/styles/friendly.py +72 -0
- data/vendor/pygments-main/pygments/styles/fruity.py +42 -0
- data/vendor/pygments-main/pygments/styles/manni.py +75 -0
- data/vendor/pygments-main/pygments/styles/monokai.py +106 -0
- data/vendor/pygments-main/pygments/styles/murphy.py +80 -0
- data/vendor/pygments-main/pygments/styles/native.py +65 -0
- data/vendor/pygments-main/pygments/styles/pastie.py +75 -0
- data/vendor/pygments-main/pygments/styles/perldoc.py +69 -0
- data/vendor/pygments-main/pygments/styles/rrt.py +33 -0
- data/vendor/pygments-main/pygments/styles/tango.py +141 -0
- data/vendor/pygments-main/pygments/styles/trac.py +63 -0
- data/vendor/pygments-main/pygments/styles/vim.py +63 -0
- data/vendor/pygments-main/pygments/styles/vs.py +38 -0
- data/vendor/pygments-main/pygments/token.py +195 -0
- data/vendor/pygments-main/pygments/unistring.py +140 -0
- data/vendor/pygments-main/pygments/util.py +277 -0
- data/vendor/pygments-main/scripts/check_sources.py +242 -0
- data/vendor/pygments-main/scripts/detect_missing_analyse_text.py +32 -0
- data/vendor/pygments-main/scripts/epydoc.css +280 -0
- data/vendor/pygments-main/scripts/find_codetags.py +205 -0
- data/vendor/pygments-main/scripts/find_error.py +170 -0
- data/vendor/pygments-main/scripts/get_vimkw.py +43 -0
- data/vendor/pygments-main/scripts/pylintrc +301 -0
- data/vendor/pygments-main/scripts/reindent.py +291 -0
- data/vendor/pygments-main/scripts/vim2pygments.py +933 -0
- data/vendor/pygments-main/setup.cfg +7 -0
- data/vendor/pygments-main/setup.py +90 -0
- data/vendor/pygments-main/tests/dtds/HTML4-f.dtd +37 -0
- data/vendor/pygments-main/tests/dtds/HTML4-s.dtd +869 -0
- data/vendor/pygments-main/tests/dtds/HTML4.dcl +88 -0
- data/vendor/pygments-main/tests/dtds/HTML4.dtd +1092 -0
- data/vendor/pygments-main/tests/dtds/HTML4.soc +9 -0
- data/vendor/pygments-main/tests/dtds/HTMLlat1.ent +195 -0
- data/vendor/pygments-main/tests/dtds/HTMLspec.ent +77 -0
- data/vendor/pygments-main/tests/dtds/HTMLsym.ent +241 -0
- data/vendor/pygments-main/tests/examplefiles/ANTLRv3.g +608 -0
- data/vendor/pygments-main/tests/examplefiles/AcidStateAdvanced.hs +209 -0
- data/vendor/pygments-main/tests/examplefiles/AlternatingGroup.mu +102 -0
- data/vendor/pygments-main/tests/examplefiles/BOM.js +1 -0
- data/vendor/pygments-main/tests/examplefiles/CPDictionary.j +611 -0
- data/vendor/pygments-main/tests/examplefiles/Config.in.cache +1973 -0
- data/vendor/pygments-main/tests/examplefiles/Constants.mo +158 -0
- data/vendor/pygments-main/tests/examplefiles/DancingSudoku.lhs +411 -0
- data/vendor/pygments-main/tests/examplefiles/Deflate.fs +578 -0
- data/vendor/pygments-main/tests/examplefiles/Errors.scala +18 -0
- data/vendor/pygments-main/tests/examplefiles/File.hy +174 -0
- data/vendor/pygments-main/tests/examplefiles/Get-CommandDefinitionHtml.ps1 +66 -0
- data/vendor/pygments-main/tests/examplefiles/IPDispatchC.nc +104 -0
- data/vendor/pygments-main/tests/examplefiles/IPDispatchP.nc +671 -0
- data/vendor/pygments-main/tests/examplefiles/Intro.java +1660 -0
- data/vendor/pygments-main/tests/examplefiles/Makefile +1131 -0
- data/vendor/pygments-main/tests/examplefiles/Object.st +4394 -0
- data/vendor/pygments-main/tests/examplefiles/OrderedMap.hx +584 -0
- data/vendor/pygments-main/tests/examplefiles/RoleQ.pm6 +23 -0
- data/vendor/pygments-main/tests/examplefiles/SmallCheck.hs +378 -0
- data/vendor/pygments-main/tests/examplefiles/Sorting.mod +470 -0
- data/vendor/pygments-main/tests/examplefiles/Sudoku.lhs +382 -0
- data/vendor/pygments-main/tests/examplefiles/addressbook.proto +30 -0
- data/vendor/pygments-main/tests/examplefiles/antlr_throws +1 -0
- data/vendor/pygments-main/tests/examplefiles/apache2.conf +393 -0
- data/vendor/pygments-main/tests/examplefiles/as3_test.as +143 -0
- data/vendor/pygments-main/tests/examplefiles/as3_test2.as +46 -0
- data/vendor/pygments-main/tests/examplefiles/as3_test3.as +3 -0
- data/vendor/pygments-main/tests/examplefiles/aspx-cs_example +27 -0
- data/vendor/pygments-main/tests/examplefiles/autoit_submit.au3 +25 -0
- data/vendor/pygments-main/tests/examplefiles/badcase.java +2 -0
- data/vendor/pygments-main/tests/examplefiles/batchfile.bat +49 -0
- data/vendor/pygments-main/tests/examplefiles/bigtest.nsi +308 -0
- data/vendor/pygments-main/tests/examplefiles/boot-9.scm +1557 -0
- data/vendor/pygments-main/tests/examplefiles/ca65_example +284 -0
- data/vendor/pygments-main/tests/examplefiles/cbmbas_example +9 -0
- data/vendor/pygments-main/tests/examplefiles/cells.ps +515 -0
- data/vendor/pygments-main/tests/examplefiles/ceval.c +2604 -0
- data/vendor/pygments-main/tests/examplefiles/cheetah_example.html +13 -0
- data/vendor/pygments-main/tests/examplefiles/classes.dylan +125 -0
- data/vendor/pygments-main/tests/examplefiles/condensed_ruby.rb +10 -0
- data/vendor/pygments-main/tests/examplefiles/coq_RelationClasses +447 -0
- data/vendor/pygments-main/tests/examplefiles/database.pytb +20 -0
- data/vendor/pygments-main/tests/examplefiles/de.MoinMoin.po +2461 -0
- data/vendor/pygments-main/tests/examplefiles/demo.ahk +181 -0
- data/vendor/pygments-main/tests/examplefiles/demo.cfm +38 -0
- data/vendor/pygments-main/tests/examplefiles/django_sample.html+django +68 -0
- data/vendor/pygments-main/tests/examplefiles/dwarf.cw +17 -0
- data/vendor/pygments-main/tests/examplefiles/erl_session +10 -0
- data/vendor/pygments-main/tests/examplefiles/escape_semicolon.clj +1 -0
- data/vendor/pygments-main/tests/examplefiles/evil_regex.js +48 -0
- data/vendor/pygments-main/tests/examplefiles/example.Rd +78 -0
- data/vendor/pygments-main/tests/examplefiles/example.bug +54 -0
- data/vendor/pygments-main/tests/examplefiles/example.c +2080 -0
- data/vendor/pygments-main/tests/examplefiles/example.ceylon +52 -0
- data/vendor/pygments-main/tests/examplefiles/example.clay +33 -0
- data/vendor/pygments-main/tests/examplefiles/example.cls +15 -0
- data/vendor/pygments-main/tests/examplefiles/example.cob +3556 -0
- data/vendor/pygments-main/tests/examplefiles/example.cpp +2363 -0
- data/vendor/pygments-main/tests/examplefiles/example.gs +106 -0
- data/vendor/pygments-main/tests/examplefiles/example.gst +7 -0
- data/vendor/pygments-main/tests/examplefiles/example.hx +142 -0
- data/vendor/pygments-main/tests/examplefiles/example.jag +48 -0
- data/vendor/pygments-main/tests/examplefiles/example.kt +47 -0
- data/vendor/pygments-main/tests/examplefiles/example.lagda +19 -0
- data/vendor/pygments-main/tests/examplefiles/example.lua +250 -0
- data/vendor/pygments-main/tests/examplefiles/example.monkey +152 -0
- data/vendor/pygments-main/tests/examplefiles/example.moo +26 -0
- data/vendor/pygments-main/tests/examplefiles/example.moon +629 -0
- data/vendor/pygments-main/tests/examplefiles/example.msc +43 -0
- data/vendor/pygments-main/tests/examplefiles/example.nim +1010 -0
- data/vendor/pygments-main/tests/examplefiles/example.ns2 +69 -0
- data/vendor/pygments-main/tests/examplefiles/example.p +34 -0
- data/vendor/pygments-main/tests/examplefiles/example.pas +2708 -0
- data/vendor/pygments-main/tests/examplefiles/example.prg +161 -0
- data/vendor/pygments-main/tests/examplefiles/example.rb +1852 -0
- data/vendor/pygments-main/tests/examplefiles/example.reg +19 -0
- data/vendor/pygments-main/tests/examplefiles/example.rexx +50 -0
- data/vendor/pygments-main/tests/examplefiles/example.rhtml +561 -0
- data/vendor/pygments-main/tests/examplefiles/example.rkt +95 -0
- data/vendor/pygments-main/tests/examplefiles/example.rpf +4 -0
- data/vendor/pygments-main/tests/examplefiles/example.sh-session +19 -0
- data/vendor/pygments-main/tests/examplefiles/example.shell-session +45 -0
- data/vendor/pygments-main/tests/examplefiles/example.sml +156 -0
- data/vendor/pygments-main/tests/examplefiles/example.snobol +15 -0
- data/vendor/pygments-main/tests/examplefiles/example.stan +108 -0
- data/vendor/pygments-main/tests/examplefiles/example.tea +34 -0
- data/vendor/pygments-main/tests/examplefiles/example.ts +28 -0
- data/vendor/pygments-main/tests/examplefiles/example.u +548 -0
- data/vendor/pygments-main/tests/examplefiles/example.weechatlog +9 -0
- data/vendor/pygments-main/tests/examplefiles/example.xhtml +376 -0
- data/vendor/pygments-main/tests/examplefiles/example.xtend +34 -0
- data/vendor/pygments-main/tests/examplefiles/example.yaml +302 -0
- data/vendor/pygments-main/tests/examplefiles/example2.aspx +29 -0
- data/vendor/pygments-main/tests/examplefiles/example2.msc +79 -0
- data/vendor/pygments-main/tests/examplefiles/example_elixir.ex +363 -0
- data/vendor/pygments-main/tests/examplefiles/example_file.fy +128 -0
- data/vendor/pygments-main/tests/examplefiles/firefox.mak +586 -0
- data/vendor/pygments-main/tests/examplefiles/flipflop.sv +19 -0
- data/vendor/pygments-main/tests/examplefiles/foo.sce +6 -0
- data/vendor/pygments-main/tests/examplefiles/format.ml +1213 -0
- data/vendor/pygments-main/tests/examplefiles/fucked_up.rb +77 -0
- data/vendor/pygments-main/tests/examplefiles/function.mu +1 -0
- data/vendor/pygments-main/tests/examplefiles/functional.rst +1472 -0
- data/vendor/pygments-main/tests/examplefiles/garcia-wachs.kk +133 -0
- data/vendor/pygments-main/tests/examplefiles/genclass.clj +510 -0
- data/vendor/pygments-main/tests/examplefiles/genshi_example.xml+genshi +193 -0
- data/vendor/pygments-main/tests/examplefiles/genshitext_example.genshitext +33 -0
- data/vendor/pygments-main/tests/examplefiles/glsl.frag +7 -0
- data/vendor/pygments-main/tests/examplefiles/glsl.vert +13 -0
- data/vendor/pygments-main/tests/examplefiles/grammar-test.p6 +22 -0
- data/vendor/pygments-main/tests/examplefiles/hello.smali +40 -0
- data/vendor/pygments-main/tests/examplefiles/hello.sp +9 -0
- data/vendor/pygments-main/tests/examplefiles/html+php_faulty.php +1 -0
- data/vendor/pygments-main/tests/examplefiles/http_request_example +15 -0
- data/vendor/pygments-main/tests/examplefiles/http_response_example +29 -0
- data/vendor/pygments-main/tests/examplefiles/import.hs +4 -0
- data/vendor/pygments-main/tests/examplefiles/inet_pton6.dg +71 -0
- data/vendor/pygments-main/tests/examplefiles/intro.ik +24 -0
- data/vendor/pygments-main/tests/examplefiles/ints.php +10 -0
- data/vendor/pygments-main/tests/examplefiles/intsyn.fun +675 -0
- data/vendor/pygments-main/tests/examplefiles/intsyn.sig +286 -0
- data/vendor/pygments-main/tests/examplefiles/irb_heredoc +8 -0
- data/vendor/pygments-main/tests/examplefiles/irc.lsp +214 -0
- data/vendor/pygments-main/tests/examplefiles/java.properties +16 -0
- data/vendor/pygments-main/tests/examplefiles/jbst_example1.jbst +28 -0
- data/vendor/pygments-main/tests/examplefiles/jbst_example2.jbst +45 -0
- data/vendor/pygments-main/tests/examplefiles/jinjadesignerdoc.rst +713 -0
- data/vendor/pygments-main/tests/examplefiles/json.lasso +301 -0
- data/vendor/pygments-main/tests/examplefiles/json.lasso9 +213 -0
- data/vendor/pygments-main/tests/examplefiles/lighttpd_config.conf +13 -0
- data/vendor/pygments-main/tests/examplefiles/linecontinuation.py +47 -0
- data/vendor/pygments-main/tests/examplefiles/livescript-demo.ls +41 -0
- data/vendor/pygments-main/tests/examplefiles/logos_example.xm +28 -0
- data/vendor/pygments-main/tests/examplefiles/ltmain.sh +2849 -0
- data/vendor/pygments-main/tests/examplefiles/main.cmake +42 -0
- data/vendor/pygments-main/tests/examplefiles/markdown.lsp +679 -0
- data/vendor/pygments-main/tests/examplefiles/matlab_noreturn +3 -0
- data/vendor/pygments-main/tests/examplefiles/matlab_sample +30 -0
- data/vendor/pygments-main/tests/examplefiles/matlabsession_sample.txt +37 -0
- data/vendor/pygments-main/tests/examplefiles/metagrammar.treetop +455 -0
- data/vendor/pygments-main/tests/examplefiles/mg_sample.pro +73 -0
- data/vendor/pygments-main/tests/examplefiles/minehunt.qml +112 -0
- data/vendor/pygments-main/tests/examplefiles/minimal.ns2 +4 -0
- data/vendor/pygments-main/tests/examplefiles/moin_SyntaxReference.txt +340 -0
- data/vendor/pygments-main/tests/examplefiles/multiline_regexes.rb +38 -0
- data/vendor/pygments-main/tests/examplefiles/nanomsg.intr +95 -0
- data/vendor/pygments-main/tests/examplefiles/nasm_aoutso.asm +96 -0
- data/vendor/pygments-main/tests/examplefiles/nasm_objexe.asm +30 -0
- data/vendor/pygments-main/tests/examplefiles/nemerle_sample.n +87 -0
- data/vendor/pygments-main/tests/examplefiles/nginx_nginx.conf +118 -0
- data/vendor/pygments-main/tests/examplefiles/numbers.c +12 -0
- data/vendor/pygments-main/tests/examplefiles/objc_example.m +32 -0
- data/vendor/pygments-main/tests/examplefiles/objc_example2.m +24 -0
- data/vendor/pygments-main/tests/examplefiles/perl_misc +62 -0
- data/vendor/pygments-main/tests/examplefiles/perl_perl5db +998 -0
- data/vendor/pygments-main/tests/examplefiles/perl_regex-delims +120 -0
- data/vendor/pygments-main/tests/examplefiles/perlfunc.1 +856 -0
- data/vendor/pygments-main/tests/examplefiles/phpMyAdmin.spec +163 -0
- data/vendor/pygments-main/tests/examplefiles/phpcomplete.vim +567 -0
- data/vendor/pygments-main/tests/examplefiles/pleac.in.rb +1223 -0
- data/vendor/pygments-main/tests/examplefiles/postgresql_test.txt +47 -0
- data/vendor/pygments-main/tests/examplefiles/pppoe.applescript +10 -0
- data/vendor/pygments-main/tests/examplefiles/psql_session.txt +122 -0
- data/vendor/pygments-main/tests/examplefiles/py3_test.txt +2 -0
- data/vendor/pygments-main/tests/examplefiles/py3tb_test.py3tb +4 -0
- data/vendor/pygments-main/tests/examplefiles/pycon_test.pycon +14 -0
- data/vendor/pygments-main/tests/examplefiles/pytb_test2.pytb +2 -0
- data/vendor/pygments-main/tests/examplefiles/pytb_test3.pytb +4 -0
- data/vendor/pygments-main/tests/examplefiles/python25-bsd.mak +234 -0
- data/vendor/pygments-main/tests/examplefiles/qsort.prolog +13 -0
- data/vendor/pygments-main/tests/examplefiles/r-console-transcript.Rout +38 -0
- data/vendor/pygments-main/tests/examplefiles/ragel-cpp_rlscan +280 -0
- data/vendor/pygments-main/tests/examplefiles/ragel-cpp_snippet +2 -0
- data/vendor/pygments-main/tests/examplefiles/regex.js +22 -0
- data/vendor/pygments-main/tests/examplefiles/reversi.lsp +427 -0
- data/vendor/pygments-main/tests/examplefiles/robotframework.txt +39 -0
- data/vendor/pygments-main/tests/examplefiles/ruby_func_def.rb +11 -0
- data/vendor/pygments-main/tests/examplefiles/rust_example.rs +233 -0
- data/vendor/pygments-main/tests/examplefiles/scilab.sci +30 -0
- data/vendor/pygments-main/tests/examplefiles/session.dylan-console +9 -0
- data/vendor/pygments-main/tests/examplefiles/sibling.prolog +19 -0
- data/vendor/pygments-main/tests/examplefiles/simple.md +747 -0
- data/vendor/pygments-main/tests/examplefiles/smarty_example.html +209 -0
- data/vendor/pygments-main/tests/examplefiles/source.lgt +343 -0
- data/vendor/pygments-main/tests/examplefiles/sources.list +62 -0
- data/vendor/pygments-main/tests/examplefiles/sphere.pov +18 -0
- data/vendor/pygments-main/tests/examplefiles/sqlite3.sqlite3-console +27 -0
- data/vendor/pygments-main/tests/examplefiles/squid.conf +30 -0
- data/vendor/pygments-main/tests/examplefiles/string.jl +1031 -0
- data/vendor/pygments-main/tests/examplefiles/string_delimiters.d +21 -0
- data/vendor/pygments-main/tests/examplefiles/stripheredoc.sh +3 -0
- data/vendor/pygments-main/tests/examplefiles/swig_java.swg +1329 -0
- data/vendor/pygments-main/tests/examplefiles/swig_std_vector.i +225 -0
- data/vendor/pygments-main/tests/examplefiles/test.R +153 -0
- data/vendor/pygments-main/tests/examplefiles/test.adb +211 -0
- data/vendor/pygments-main/tests/examplefiles/test.agda +102 -0
- data/vendor/pygments-main/tests/examplefiles/test.asy +131 -0
- data/vendor/pygments-main/tests/examplefiles/test.awk +121 -0
- data/vendor/pygments-main/tests/examplefiles/test.bas +29 -0
- data/vendor/pygments-main/tests/examplefiles/test.bb +95 -0
- data/vendor/pygments-main/tests/examplefiles/test.bmx +145 -0
- data/vendor/pygments-main/tests/examplefiles/test.boo +39 -0
- data/vendor/pygments-main/tests/examplefiles/test.bro +250 -0
- data/vendor/pygments-main/tests/examplefiles/test.cs +374 -0
- data/vendor/pygments-main/tests/examplefiles/test.css +54 -0
- data/vendor/pygments-main/tests/examplefiles/test.cu +36 -0
- data/vendor/pygments-main/tests/examplefiles/test.d +135 -0
- data/vendor/pygments-main/tests/examplefiles/test.dart +23 -0
- data/vendor/pygments-main/tests/examplefiles/test.dtd +89 -0
- data/vendor/pygments-main/tests/examplefiles/test.ebnf +31 -0
- data/vendor/pygments-main/tests/examplefiles/test.ec +605 -0
- data/vendor/pygments-main/tests/examplefiles/test.ecl +58 -0
- data/vendor/pygments-main/tests/examplefiles/test.eh +315 -0
- data/vendor/pygments-main/tests/examplefiles/test.erl +169 -0
- data/vendor/pygments-main/tests/examplefiles/test.evoque +33 -0
- data/vendor/pygments-main/tests/examplefiles/test.fan +818 -0
- data/vendor/pygments-main/tests/examplefiles/test.flx +57 -0
- data/vendor/pygments-main/tests/examplefiles/test.gdc +13 -0
- data/vendor/pygments-main/tests/examplefiles/test.groovy +97 -0
- data/vendor/pygments-main/tests/examplefiles/test.html +339 -0
- data/vendor/pygments-main/tests/examplefiles/test.ini +10 -0
- data/vendor/pygments-main/tests/examplefiles/test.java +653 -0
- data/vendor/pygments-main/tests/examplefiles/test.jsp +24 -0
- data/vendor/pygments-main/tests/examplefiles/test.maql +45 -0
- data/vendor/pygments-main/tests/examplefiles/test.mod +374 -0
- data/vendor/pygments-main/tests/examplefiles/test.moo +51 -0
- data/vendor/pygments-main/tests/examplefiles/test.myt +166 -0
- data/vendor/pygments-main/tests/examplefiles/test.nim +93 -0
- data/vendor/pygments-main/tests/examplefiles/test.opa +10 -0
- data/vendor/pygments-main/tests/examplefiles/test.p6 +252 -0
- data/vendor/pygments-main/tests/examplefiles/test.pas +743 -0
- data/vendor/pygments-main/tests/examplefiles/test.php +505 -0
- data/vendor/pygments-main/tests/examplefiles/test.plot +333 -0
- data/vendor/pygments-main/tests/examplefiles/test.ps1 +108 -0
- data/vendor/pygments-main/tests/examplefiles/test.pypylog +1839 -0
- data/vendor/pygments-main/tests/examplefiles/test.r3 +94 -0
- data/vendor/pygments-main/tests/examplefiles/test.rb +177 -0
- data/vendor/pygments-main/tests/examplefiles/test.rhtml +43 -0
- data/vendor/pygments-main/tests/examplefiles/test.scaml +8 -0
- data/vendor/pygments-main/tests/examplefiles/test.ssp +12 -0
- data/vendor/pygments-main/tests/examplefiles/test.tcsh +830 -0
- data/vendor/pygments-main/tests/examplefiles/test.vb +407 -0
- data/vendor/pygments-main/tests/examplefiles/test.vhdl +161 -0
- data/vendor/pygments-main/tests/examplefiles/test.xqy +138 -0
- data/vendor/pygments-main/tests/examplefiles/test.xsl +23 -0
- data/vendor/pygments-main/tests/examplefiles/test2.pypylog +120 -0
- data/vendor/pygments-main/tests/examplefiles/truncated.pytb +15 -0
- data/vendor/pygments-main/tests/examplefiles/type.lisp +1218 -0
- data/vendor/pygments-main/tests/examplefiles/underscore.coffee +603 -0
- data/vendor/pygments-main/tests/examplefiles/unicode.applescript +5 -0
- data/vendor/pygments-main/tests/examplefiles/unicodedoc.py +11 -0
- data/vendor/pygments-main/tests/examplefiles/unix-io.lid +37 -0
- data/vendor/pygments-main/tests/examplefiles/webkit-transition.css +3 -0
- data/vendor/pygments-main/tests/examplefiles/while.pov +13 -0
- data/vendor/pygments-main/tests/examplefiles/wiki.factor +384 -0
- data/vendor/pygments-main/tests/examplefiles/xml_example +1897 -0
- data/vendor/pygments-main/tests/examplefiles/zmlrpc.f90 +798 -0
- data/vendor/pygments-main/tests/old_run.py +138 -0
- data/vendor/pygments-main/tests/run.py +49 -0
- data/vendor/pygments-main/tests/support/tags +36 -0
- data/vendor/pygments-main/tests/support.py +15 -0
- data/vendor/pygments-main/tests/test_basic_api.py +295 -0
- data/vendor/pygments-main/tests/test_clexer.py +31 -0
- data/vendor/pygments-main/tests/test_cmdline.py +105 -0
- data/vendor/pygments-main/tests/test_examplefiles.py +99 -0
- data/vendor/pygments-main/tests/test_html_formatter.py +178 -0
- data/vendor/pygments-main/tests/test_latex_formatter.py +55 -0
- data/vendor/pygments-main/tests/test_lexers_other.py +68 -0
- data/vendor/pygments-main/tests/test_perllexer.py +137 -0
- data/vendor/pygments-main/tests/test_regexlexer.py +47 -0
- data/vendor/pygments-main/tests/test_token.py +46 -0
- data/vendor/pygments-main/tests/test_using_api.py +40 -0
- data/vendor/pygments-main/tests/test_util.py +135 -0
- data/vendor/simplejson/.gitignore +10 -0
- data/vendor/simplejson/.travis.yml +5 -0
- data/vendor/simplejson/CHANGES.txt +291 -0
- data/vendor/simplejson/LICENSE.txt +19 -0
- data/vendor/simplejson/MANIFEST.in +5 -0
- data/vendor/simplejson/README.rst +19 -0
- data/vendor/simplejson/conf.py +179 -0
- data/vendor/simplejson/index.rst +628 -0
- data/vendor/simplejson/scripts/make_docs.py +18 -0
- data/vendor/simplejson/setup.py +104 -0
- data/vendor/simplejson/simplejson/__init__.py +510 -0
- data/vendor/simplejson/simplejson/_speedups.c +2745 -0
- data/vendor/simplejson/simplejson/decoder.py +425 -0
- data/vendor/simplejson/simplejson/encoder.py +567 -0
- data/vendor/simplejson/simplejson/ordered_dict.py +119 -0
- data/vendor/simplejson/simplejson/scanner.py +77 -0
- data/vendor/simplejson/simplejson/tests/__init__.py +67 -0
- data/vendor/simplejson/simplejson/tests/test_bigint_as_string.py +55 -0
- data/vendor/simplejson/simplejson/tests/test_check_circular.py +30 -0
- data/vendor/simplejson/simplejson/tests/test_decimal.py +66 -0
- data/vendor/simplejson/simplejson/tests/test_decode.py +83 -0
- data/vendor/simplejson/simplejson/tests/test_default.py +9 -0
- data/vendor/simplejson/simplejson/tests/test_dump.py +67 -0
- data/vendor/simplejson/simplejson/tests/test_encode_basestring_ascii.py +46 -0
- data/vendor/simplejson/simplejson/tests/test_encode_for_html.py +32 -0
- data/vendor/simplejson/simplejson/tests/test_errors.py +34 -0
- data/vendor/simplejson/simplejson/tests/test_fail.py +91 -0
- data/vendor/simplejson/simplejson/tests/test_float.py +19 -0
- data/vendor/simplejson/simplejson/tests/test_indent.py +86 -0
- data/vendor/simplejson/simplejson/tests/test_item_sort_key.py +20 -0
- data/vendor/simplejson/simplejson/tests/test_namedtuple.py +121 -0
- data/vendor/simplejson/simplejson/tests/test_pass1.py +76 -0
- data/vendor/simplejson/simplejson/tests/test_pass2.py +14 -0
- data/vendor/simplejson/simplejson/tests/test_pass3.py +20 -0
- data/vendor/simplejson/simplejson/tests/test_recursion.py +67 -0
- data/vendor/simplejson/simplejson/tests/test_scanstring.py +117 -0
- data/vendor/simplejson/simplejson/tests/test_separators.py +42 -0
- data/vendor/simplejson/simplejson/tests/test_speedups.py +20 -0
- data/vendor/simplejson/simplejson/tests/test_tuple.py +49 -0
- data/vendor/simplejson/simplejson/tests/test_unicode.py +109 -0
- data/vendor/simplejson/simplejson/tool.py +39 -0
- metadata +566 -0
@@ -0,0 +1,603 @@
|
|
1
|
+
.. -*- mode: rst -*-
|
2
|
+
|
3
|
+
====================
|
4
|
+
Write your own lexer
|
5
|
+
====================
|
6
|
+
|
7
|
+
If a lexer for your favorite language is missing in the Pygments package, you can
|
8
|
+
easily write your own and extend Pygments.
|
9
|
+
|
10
|
+
All you need can be found inside the `pygments.lexer` module. As you can read in
|
11
|
+
the `API documentation <api.txt>`_, a lexer is a class that is initialized with
|
12
|
+
some keyword arguments (the lexer options) and that provides a
|
13
|
+
`get_tokens_unprocessed()` method which is given a string or unicode object with
|
14
|
+
the data to parse.
|
15
|
+
|
16
|
+
The `get_tokens_unprocessed()` method must return an iterator or iterable
|
17
|
+
containing tuples in the form ``(index, token, value)``. Normally you don't need
|
18
|
+
to do this since there are numerous base lexers you can subclass.
|
19
|
+
|
20
|
+
|
21
|
+
RegexLexer
|
22
|
+
==========
|
23
|
+
|
24
|
+
A very powerful (but quite easy to use) lexer is the `RegexLexer`. This lexer
|
25
|
+
base class allows you to define lexing rules in terms of *regular expressions*
|
26
|
+
for different *states*.
|
27
|
+
|
28
|
+
States are groups of regular expressions that are matched against the input
|
29
|
+
string at the *current position*. If one of these expressions matches, a
|
30
|
+
corresponding action is performed (normally yielding a token with a specific
|
31
|
+
type), the current position is set to where the last match ended and the
|
32
|
+
matching process continues with the first regex of the current state.
|
33
|
+
|
34
|
+
Lexer states are kept in a state stack: each time a new state is entered, the
|
35
|
+
new state is pushed onto the stack. The most basic lexers (like the
|
36
|
+
`DiffLexer`) just need one state.
|
37
|
+
|
38
|
+
Each state is defined as a list of tuples in the form (`regex`, `action`,
|
39
|
+
`new_state`) where the last item is optional. In the most basic form, `action`
|
40
|
+
is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a
|
41
|
+
token with the match text and type `tokentype` and push `new_state` on the state
|
42
|
+
stack. If the new state is ``'#pop'``, the topmost state is popped from the
|
43
|
+
stack instead. (To pop more than one state, use ``'#pop:2'`` and so on.)
|
44
|
+
``'#push'`` is a synonym for pushing the current state on the
|
45
|
+
stack.
|
46
|
+
|
47
|
+
The following example shows the `DiffLexer` from the builtin lexers. Note that
|
48
|
+
it contains some additional attributes `name`, `aliases` and `filenames` which
|
49
|
+
aren't required for a lexer. They are used by the builtin lexer lookup
|
50
|
+
functions.
|
51
|
+
|
52
|
+
.. sourcecode:: python
|
53
|
+
|
54
|
+
from pygments.lexer import RegexLexer
|
55
|
+
from pygments.token import *
|
56
|
+
|
57
|
+
class DiffLexer(RegexLexer):
|
58
|
+
name = 'Diff'
|
59
|
+
aliases = ['diff']
|
60
|
+
filenames = ['*.diff']
|
61
|
+
|
62
|
+
tokens = {
|
63
|
+
'root': [
|
64
|
+
(r' .*\n', Text),
|
65
|
+
(r'\+.*\n', Generic.Inserted),
|
66
|
+
(r'-.*\n', Generic.Deleted),
|
67
|
+
(r'@.*\n', Generic.Subheading),
|
68
|
+
(r'Index.*\n', Generic.Heading),
|
69
|
+
(r'=.*\n', Generic.Heading),
|
70
|
+
(r'.*\n', Text),
|
71
|
+
]
|
72
|
+
}
|
73
|
+
|
74
|
+
As you can see this lexer only uses one state. When the lexer starts scanning
|
75
|
+
the text, it first checks if the current character is a space. If this is true
|
76
|
+
it scans everything until newline and returns the parsed data as `Text` token.
|
77
|
+
|
78
|
+
If this rule doesn't match, it checks if the current char is a plus sign. And
|
79
|
+
so on.
|
80
|
+
|
81
|
+
If no rule matches at the current position, the current char is emitted as an
|
82
|
+
`Error` token that indicates a parsing error, and the position is increased by
|
83
|
+
1.
|
84
|
+
|
85
|
+
|
86
|
+
Adding and testing a new lexer
|
87
|
+
==============================
|
88
|
+
|
89
|
+
To make pygments aware of your new lexer, you have to perform the following
|
90
|
+
steps:
|
91
|
+
|
92
|
+
First, change to the current directory containing the pygments source code:
|
93
|
+
|
94
|
+
.. sourcecode:: console
|
95
|
+
|
96
|
+
$ cd .../pygments-main
|
97
|
+
|
98
|
+
Next, make sure the lexer is known from outside of the module. All modules in
|
99
|
+
the ``pygments.lexers`` specify ``__all__``. For example, ``other.py`` sets:
|
100
|
+
|
101
|
+
.. sourcecode:: python
|
102
|
+
|
103
|
+
__all__ = ['BrainfuckLexer', 'BefungeLexer', ...]
|
104
|
+
|
105
|
+
Simply add the name of your lexer class to this list.
|
106
|
+
|
107
|
+
Finally the lexer can be made publically known by rebuilding the lexer
|
108
|
+
mapping:
|
109
|
+
|
110
|
+
.. sourcecode:: console
|
111
|
+
|
112
|
+
$ make mapfiles
|
113
|
+
|
114
|
+
To test the new lexer, store an example file with the proper extension in
|
115
|
+
``tests/examplefiles``. For example, to test your ``DiffLexer``, add a
|
116
|
+
``tests/examplefiles/example.diff`` containing a sample diff output.
|
117
|
+
|
118
|
+
Now you can use pygmentize to render your example to HTML:
|
119
|
+
|
120
|
+
.. sourcecode:: console
|
121
|
+
|
122
|
+
$ ./pygmentize -O full -f html -o /tmp/example.html tests/examplefiles/example.diff
|
123
|
+
|
124
|
+
Note that this explicitely calls the ``pygmentize`` in the current directory
|
125
|
+
by preceding it with ``./``. This ensures your modifications are used.
|
126
|
+
Otherwise a possibly already installed, unmodified version without your new
|
127
|
+
lexer would have been called from the system search path (``$PATH``).
|
128
|
+
|
129
|
+
To view the result, open ``/tmp/example.html`` in your browser.
|
130
|
+
|
131
|
+
Once the example renders as expected, you should run the complete test suite:
|
132
|
+
|
133
|
+
.. sourcecode:: console
|
134
|
+
|
135
|
+
$ make test
|
136
|
+
|
137
|
+
|
138
|
+
Regex Flags
|
139
|
+
===========
|
140
|
+
|
141
|
+
You can either define regex flags in the regex (``r'(?x)foo bar'``) or by adding
|
142
|
+
a `flags` attribute to your lexer class. If no attribute is defined, it defaults
|
143
|
+
to `re.MULTILINE`. For more informations about regular expression flags see the
|
144
|
+
`regular expressions`_ help page in the python documentation.
|
145
|
+
|
146
|
+
.. _regular expressions: http://docs.python.org/lib/re-syntax.html
|
147
|
+
|
148
|
+
|
149
|
+
Scanning multiple tokens at once
|
150
|
+
================================
|
151
|
+
|
152
|
+
Here is a more complex lexer that highlights INI files. INI files consist of
|
153
|
+
sections, comments and key = value pairs:
|
154
|
+
|
155
|
+
.. sourcecode:: python
|
156
|
+
|
157
|
+
from pygments.lexer import RegexLexer, bygroups
|
158
|
+
from pygments.token import *
|
159
|
+
|
160
|
+
class IniLexer(RegexLexer):
|
161
|
+
name = 'INI'
|
162
|
+
aliases = ['ini', 'cfg']
|
163
|
+
filenames = ['*.ini', '*.cfg']
|
164
|
+
|
165
|
+
tokens = {
|
166
|
+
'root': [
|
167
|
+
(r'\s+', Text),
|
168
|
+
(r';.*?$', Comment),
|
169
|
+
(r'\[.*?\]$', Keyword),
|
170
|
+
(r'(.*?)(\s*)(=)(\s*)(.*?)$',
|
171
|
+
bygroups(Name.Attribute, Text, Operator, Text, String))
|
172
|
+
]
|
173
|
+
}
|
174
|
+
|
175
|
+
The lexer first looks for whitespace, comments and section names. And later it
|
176
|
+
looks for a line that looks like a key, value pair, separated by an ``'='``
|
177
|
+
sign, and optional whitespace.
|
178
|
+
|
179
|
+
The `bygroups` helper makes sure that each group is yielded with a different
|
180
|
+
token type. First the `Name.Attribute` token, then a `Text` token for the
|
181
|
+
optional whitespace, after that a `Operator` token for the equals sign. Then a
|
182
|
+
`Text` token for the whitespace again. The rest of the line is returned as
|
183
|
+
`String`.
|
184
|
+
|
185
|
+
Note that for this to work, every part of the match must be inside a capturing
|
186
|
+
group (a ``(...)``), and there must not be any nested capturing groups. If you
|
187
|
+
nevertheless need a group, use a non-capturing group defined using this syntax:
|
188
|
+
``r'(?:some|words|here)'`` (note the ``?:`` after the beginning parenthesis).
|
189
|
+
|
190
|
+
If you find yourself needing a capturing group inside the regex which
|
191
|
+
shouldn't be part of the output but is used in the regular expressions for
|
192
|
+
backreferencing (eg: ``r'(<(foo|bar)>)(.*?)(</\2>)'``), you can pass `None`
|
193
|
+
to the bygroups function and it will skip that group will be skipped in the
|
194
|
+
output.
|
195
|
+
|
196
|
+
|
197
|
+
Changing states
|
198
|
+
===============
|
199
|
+
|
200
|
+
Many lexers need multiple states to work as expected. For example, some
|
201
|
+
languages allow multiline comments to be nested. Since this is a recursive
|
202
|
+
pattern it's impossible to lex just using regular expressions.
|
203
|
+
|
204
|
+
Here is the solution:
|
205
|
+
|
206
|
+
.. sourcecode:: python
|
207
|
+
|
208
|
+
from pygments.lexer import RegexLexer
|
209
|
+
from pygments.token import *
|
210
|
+
|
211
|
+
class ExampleLexer(RegexLexer):
|
212
|
+
name = 'Example Lexer with states'
|
213
|
+
|
214
|
+
tokens = {
|
215
|
+
'root': [
|
216
|
+
(r'[^/]+', Text),
|
217
|
+
(r'/\*', Comment.Multiline, 'comment'),
|
218
|
+
(r'//.*?$', Comment.Singleline),
|
219
|
+
(r'/', Text)
|
220
|
+
],
|
221
|
+
'comment': [
|
222
|
+
(r'[^*/]', Comment.Multiline),
|
223
|
+
(r'/\*', Comment.Multiline, '#push'),
|
224
|
+
(r'\*/', Comment.Multiline, '#pop'),
|
225
|
+
(r'[*/]', Comment.Multiline)
|
226
|
+
]
|
227
|
+
}
|
228
|
+
|
229
|
+
This lexer starts lexing in the ``'root'`` state. It tries to match as much as
|
230
|
+
possible until it finds a slash (``'/'``). If the next character after the slash
|
231
|
+
is a star (``'*'``) the `RegexLexer` sends those two characters to the output
|
232
|
+
stream marked as `Comment.Multiline` and continues parsing with the rules
|
233
|
+
defined in the ``'comment'`` state.
|
234
|
+
|
235
|
+
If there wasn't a star after the slash, the `RegexLexer` checks if it's a
|
236
|
+
singleline comment (eg: followed by a second slash). If this also wasn't the
|
237
|
+
case it must be a single slash (the separate regex for a single slash must also
|
238
|
+
be given, else the slash would be marked as an error token).
|
239
|
+
|
240
|
+
Inside the ``'comment'`` state, we do the same thing again. Scan until the lexer
|
241
|
+
finds a star or slash. If it's the opening of a multiline comment, push the
|
242
|
+
``'comment'`` state on the stack and continue scanning, again in the
|
243
|
+
``'comment'`` state. Else, check if it's the end of the multiline comment. If
|
244
|
+
yes, pop one state from the stack.
|
245
|
+
|
246
|
+
Note: If you pop from an empty stack you'll get an `IndexError`. (There is an
|
247
|
+
easy way to prevent this from happening: don't ``'#pop'`` in the root state).
|
248
|
+
|
249
|
+
If the `RegexLexer` encounters a newline that is flagged as an error token, the
|
250
|
+
stack is emptied and the lexer continues scanning in the ``'root'`` state. This
|
251
|
+
helps producing error-tolerant highlighting for erroneous input, e.g. when a
|
252
|
+
single-line string is not closed.
|
253
|
+
|
254
|
+
|
255
|
+
Advanced state tricks
|
256
|
+
=====================
|
257
|
+
|
258
|
+
There are a few more things you can do with states:
|
259
|
+
|
260
|
+
- You can push multiple states onto the stack if you give a tuple instead of a
|
261
|
+
simple string as the third item in a rule tuple. For example, if you want to
|
262
|
+
match a comment containing a directive, something like::
|
263
|
+
|
264
|
+
/* <processing directive> rest of comment */
|
265
|
+
|
266
|
+
you can use this rule:
|
267
|
+
|
268
|
+
.. sourcecode:: python
|
269
|
+
|
270
|
+
tokens = {
|
271
|
+
'root': [
|
272
|
+
(r'/\* <', Comment, ('comment', 'directive')),
|
273
|
+
...
|
274
|
+
],
|
275
|
+
'directive': [
|
276
|
+
(r'[^>]*', Comment.Directive),
|
277
|
+
(r'>', Comment, '#pop'),
|
278
|
+
],
|
279
|
+
'comment': [
|
280
|
+
(r'[^*]+', Comment),
|
281
|
+
(r'\*/', Comment, '#pop'),
|
282
|
+
(r'\*', Comment),
|
283
|
+
]
|
284
|
+
}
|
285
|
+
|
286
|
+
When this encounters the above sample, first ``'comment'`` and ``'directive'``
|
287
|
+
are pushed onto the stack, then the lexer continues in the directive state
|
288
|
+
until it finds the closing ``>``, then it continues in the comment state until
|
289
|
+
the closing ``*/``. Then, both states are popped from the stack again and
|
290
|
+
lexing continues in the root state.
|
291
|
+
|
292
|
+
*New in Pygments 0.9:* The tuple can contain the special ``'#push'`` and
|
293
|
+
``'#pop'`` (but not ``'#pop:n'``) directives.
|
294
|
+
|
295
|
+
|
296
|
+
- You can include the rules of a state in the definition of another. This is
|
297
|
+
done by using `include` from `pygments.lexer`:
|
298
|
+
|
299
|
+
.. sourcecode:: python
|
300
|
+
|
301
|
+
from pygments.lexer import RegexLexer, bygroups, include
|
302
|
+
from pygments.token import *
|
303
|
+
|
304
|
+
class ExampleLexer(RegexLexer):
|
305
|
+
tokens = {
|
306
|
+
'comments': [
|
307
|
+
(r'/\*.*?\*/', Comment),
|
308
|
+
(r'//.*?\n', Comment),
|
309
|
+
],
|
310
|
+
'root': [
|
311
|
+
include('comments'),
|
312
|
+
(r'(function )(\w+)( {)',
|
313
|
+
bygroups(Keyword, Name, Keyword), 'function'),
|
314
|
+
(r'.', Text),
|
315
|
+
],
|
316
|
+
'function': [
|
317
|
+
(r'[^}/]+', Text),
|
318
|
+
include('comments'),
|
319
|
+
(r'/', Text),
|
320
|
+
(r'}', Keyword, '#pop'),
|
321
|
+
]
|
322
|
+
}
|
323
|
+
|
324
|
+
This is a hypothetical lexer for a language that consist of functions and
|
325
|
+
comments. Because comments can occur at toplevel and in functions, we need
|
326
|
+
rules for comments in both states. As you can see, the `include` helper saves
|
327
|
+
repeating rules that occur more than once (in this example, the state
|
328
|
+
``'comment'`` will never be entered by the lexer, as it's only there to be
|
329
|
+
included in ``'root'`` and ``'function'``).
|
330
|
+
|
331
|
+
|
332
|
+
- Sometimes, you may want to "combine" a state from existing ones. This is
|
333
|
+
possible with the `combine` helper from `pygments.lexer`.
|
334
|
+
|
335
|
+
If you, instead of a new state, write ``combined('state1', 'state2')`` as the
|
336
|
+
third item of a rule tuple, a new anonymous state will be formed from state1
|
337
|
+
and state2 and if the rule matches, the lexer will enter this state.
|
338
|
+
|
339
|
+
This is not used very often, but can be helpful in some cases, such as the
|
340
|
+
`PythonLexer`'s string literal processing.
|
341
|
+
|
342
|
+
- If you want your lexer to start lexing in a different state you can modify
|
343
|
+
the stack by overloading the `get_tokens_unprocessed()` method:
|
344
|
+
|
345
|
+
.. sourcecode:: python
|
346
|
+
|
347
|
+
from pygments.lexer import RegexLexer
|
348
|
+
|
349
|
+
class MyLexer(RegexLexer):
|
350
|
+
tokens = {...}
|
351
|
+
|
352
|
+
def get_tokens_unprocessed(self, text):
|
353
|
+
stack = ['root', 'otherstate']
|
354
|
+
for item in RegexLexer.get_tokens_unprocessed(text, stack):
|
355
|
+
yield item
|
356
|
+
|
357
|
+
Some lexers like the `PhpLexer` use this to make the leading ``<?php``
|
358
|
+
preprocessor comments optional. Note that you can crash the lexer easily
|
359
|
+
by putting values into the stack that don't exist in the token map. Also
|
360
|
+
removing ``'root'`` from the stack can result in strange errors!
|
361
|
+
|
362
|
+
- An empty regex at the end of a state list, combined with ``'#pop'``, can
|
363
|
+
act as a return point from a state that doesn't have a clear end marker.
|
364
|
+
|
365
|
+
|
366
|
+
Using multiple lexers
|
367
|
+
=====================
|
368
|
+
|
369
|
+
Using multiple lexers for the same input can be tricky. One of the easiest
|
370
|
+
combination techniques is shown here: You can replace the token type entry in a
|
371
|
+
rule tuple (the second item) with a lexer class. The matched text will then be
|
372
|
+
lexed with that lexer, and the resulting tokens will be yielded.
|
373
|
+
|
374
|
+
For example, look at this stripped-down HTML lexer:
|
375
|
+
|
376
|
+
.. sourcecode:: python
|
377
|
+
|
378
|
+
from pygments.lexer import RegexLexer, bygroups, using
|
379
|
+
from pygments.token import *
|
380
|
+
from pygments.lexers.web import JavascriptLexer
|
381
|
+
|
382
|
+
class HtmlLexer(RegexLexer):
|
383
|
+
name = 'HTML'
|
384
|
+
aliases = ['html']
|
385
|
+
filenames = ['*.html', '*.htm']
|
386
|
+
|
387
|
+
flags = re.IGNORECASE | re.DOTALL
|
388
|
+
tokens = {
|
389
|
+
'root': [
|
390
|
+
('[^<&]+', Text),
|
391
|
+
('&.*?;', Name.Entity),
|
392
|
+
(r'<\s*script\s*', Name.Tag, ('script-content', 'tag')),
|
393
|
+
(r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'),
|
394
|
+
(r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag),
|
395
|
+
],
|
396
|
+
'script-content': [
|
397
|
+
(r'(.+?)(<\s*/\s*script\s*>)',
|
398
|
+
bygroups(using(JavascriptLexer), Name.Tag),
|
399
|
+
'#pop'),
|
400
|
+
]
|
401
|
+
}
|
402
|
+
|
403
|
+
Here the content of a ``<script>`` tag is passed to a newly created instance of
|
404
|
+
a `JavascriptLexer` and not processed by the `HtmlLexer`. This is done using the
|
405
|
+
`using` helper that takes the other lexer class as its parameter.
|
406
|
+
|
407
|
+
Note the combination of `bygroups` and `using`. This makes sure that the content
|
408
|
+
up to the ``</script>`` end tag is processed by the `JavascriptLexer`, while the
|
409
|
+
end tag is yielded as a normal token with the `Name.Tag` type.
|
410
|
+
|
411
|
+
As an additional goodie, if the lexer class is replaced by `this` (imported from
|
412
|
+
`pygments.lexer`), the "other" lexer will be the current one (because you cannot
|
413
|
+
refer to the current class within the code that runs at class definition time).
|
414
|
+
|
415
|
+
Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule.
|
416
|
+
Here, two states are pushed onto the state stack, ``'script-content'`` and
|
417
|
+
``'tag'``. That means that first ``'tag'`` is processed, which will parse
|
418
|
+
attributes and the closing ``>``, then the ``'tag'`` state is popped and the
|
419
|
+
next state on top of the stack will be ``'script-content'``.
|
420
|
+
|
421
|
+
The `using()` helper has a special keyword argument, `state`, which works as
|
422
|
+
follows: if given, the lexer to use initially is not in the ``"root"`` state,
|
423
|
+
but in the state given by this argument. This *only* works with a `RegexLexer`.
|
424
|
+
|
425
|
+
Any other keywords arguments passed to `using()` are added to the keyword
|
426
|
+
arguments used to create the lexer.
|
427
|
+
|
428
|
+
|
429
|
+
Delegating Lexer
|
430
|
+
================
|
431
|
+
|
432
|
+
Another approach for nested lexers is the `DelegatingLexer` which is for
|
433
|
+
example used for the template engine lexers. It takes two lexers as
|
434
|
+
arguments on initialisation: a `root_lexer` and a `language_lexer`.
|
435
|
+
|
436
|
+
The input is processed as follows: First, the whole text is lexed with the
|
437
|
+
`language_lexer`. All tokens yielded with a type of ``Other`` are then
|
438
|
+
concatenated and given to the `root_lexer`. The language tokens of the
|
439
|
+
`language_lexer` are then inserted into the `root_lexer`'s token stream
|
440
|
+
at the appropriate positions.
|
441
|
+
|
442
|
+
.. sourcecode:: python
|
443
|
+
|
444
|
+
from pygments.lexer import DelegatingLexer
|
445
|
+
from pygments.lexers.web import HtmlLexer, PhpLexer
|
446
|
+
|
447
|
+
class HtmlPhpLexer(DelegatingLexer):
|
448
|
+
def __init__(self, **options):
|
449
|
+
super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options)
|
450
|
+
|
451
|
+
This procedure ensures that e.g. HTML with template tags in it is highlighted
|
452
|
+
correctly even if the template tags are put into HTML tags or attributes.
|
453
|
+
|
454
|
+
If you want to change the needle token ``Other`` to something else, you can
|
455
|
+
give the lexer another token type as the third parameter:
|
456
|
+
|
457
|
+
.. sourcecode:: python
|
458
|
+
|
459
|
+
DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options)
|
460
|
+
|
461
|
+
|
462
|
+
Callbacks
|
463
|
+
=========
|
464
|
+
|
465
|
+
Sometimes the grammar of a language is so complex that a lexer would be unable
|
466
|
+
to parse it just by using regular expressions and stacks.
|
467
|
+
|
468
|
+
For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead
|
469
|
+
of token types (`bygroups` and `using` are nothing else but preimplemented
|
470
|
+
callbacks). The callback must be a function taking two arguments:
|
471
|
+
|
472
|
+
* the lexer itself
|
473
|
+
* the match object for the last matched rule
|
474
|
+
|
475
|
+
The callback must then return an iterable of (or simply yield) ``(index,
|
476
|
+
tokentype, value)`` tuples, which are then just passed through by
|
477
|
+
`get_tokens_unprocessed()`. The ``index`` here is the position of the token in
|
478
|
+
the input string, ``tokentype`` is the normal token type (like `Name.Builtin`),
|
479
|
+
and ``value`` the associated part of the input string.
|
480
|
+
|
481
|
+
You can see an example here:
|
482
|
+
|
483
|
+
.. sourcecode:: python
|
484
|
+
|
485
|
+
from pygments.lexer import RegexLexer
|
486
|
+
from pygments.token import Generic
|
487
|
+
|
488
|
+
class HypotheticLexer(RegexLexer):
|
489
|
+
|
490
|
+
def headline_callback(lexer, match):
|
491
|
+
equal_signs = match.group(1)
|
492
|
+
text = match.group(2)
|
493
|
+
yield match.start(), Generic.Headline, equal_signs + text + equal_signs
|
494
|
+
|
495
|
+
tokens = {
|
496
|
+
'root': [
|
497
|
+
(r'(=+)(.*?)(\1)', headline_callback)
|
498
|
+
]
|
499
|
+
}
|
500
|
+
|
501
|
+
If the regex for the `headline_callback` matches, the function is called with the
|
502
|
+
match object. Note that after the callback is done, processing continues
|
503
|
+
normally, that is, after the end of the previous match. The callback has no
|
504
|
+
possibility to influence the position.
|
505
|
+
|
506
|
+
There are not really any simple examples for lexer callbacks, but you can see
|
507
|
+
them in action e.g. in the `compiled.py`_ source code in the `CLexer` and
|
508
|
+
`JavaLexer` classes.
|
509
|
+
|
510
|
+
.. _compiled.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/compiled.py
|
511
|
+
|
512
|
+
|
513
|
+
The ExtendedRegexLexer class
|
514
|
+
============================
|
515
|
+
|
516
|
+
The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for
|
517
|
+
the funky syntax rules of some languages that will go unnamed, such as Ruby.
|
518
|
+
|
519
|
+
But fear not; even then you don't have to abandon the regular expression
|
520
|
+
approach. For Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`.
|
521
|
+
All features known from RegexLexers are available here too, and the tokens are
|
522
|
+
specified in exactly the same way, *except* for one detail:
|
523
|
+
|
524
|
+
The `get_tokens_unprocessed()` method holds its internal state data not as local
|
525
|
+
variables, but in an instance of the `pygments.lexer.LexerContext` class, and
|
526
|
+
that instance is passed to callbacks as a third argument. This means that you
|
527
|
+
can modify the lexer state in callbacks.
|
528
|
+
|
529
|
+
The `LexerContext` class has the following members:
|
530
|
+
|
531
|
+
* `text` -- the input text
|
532
|
+
* `pos` -- the current starting position that is used for matching regexes
|
533
|
+
* `stack` -- a list containing the state stack
|
534
|
+
* `end` -- the maximum position to which regexes are matched, this defaults to
|
535
|
+
the length of `text`
|
536
|
+
|
537
|
+
Additionally, the `get_tokens_unprocessed()` method can be given a
|
538
|
+
`LexerContext` instead of a string and will then process this context instead of
|
539
|
+
creating a new one for the string argument.
|
540
|
+
|
541
|
+
Note that because you can set the current position to anything in the callback,
|
542
|
+
it won't be automatically be set by the caller after the callback is finished.
|
543
|
+
For example, this is how the hypothetical lexer above would be written with the
|
544
|
+
`ExtendedRegexLexer`:
|
545
|
+
|
546
|
+
.. sourcecode:: python
|
547
|
+
|
548
|
+
from pygments.lexer import ExtendedRegexLexer
|
549
|
+
from pygments.token import Generic
|
550
|
+
|
551
|
+
class ExHypotheticLexer(ExtendedRegexLexer):
|
552
|
+
|
553
|
+
def headline_callback(lexer, match, ctx):
|
554
|
+
equal_signs = match.group(1)
|
555
|
+
text = match.group(2)
|
556
|
+
yield match.start(), Generic.Headline, equal_signs + text + equal_signs
|
557
|
+
ctx.pos = match.end()
|
558
|
+
|
559
|
+
tokens = {
|
560
|
+
'root': [
|
561
|
+
(r'(=+)(.*?)(\1)', headline_callback)
|
562
|
+
]
|
563
|
+
}
|
564
|
+
|
565
|
+
This might sound confusing (and it can really be). But it is needed, and for an
|
566
|
+
example look at the Ruby lexer in `agile.py`_.
|
567
|
+
|
568
|
+
.. _agile.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/agile.py
|
569
|
+
|
570
|
+
|
571
|
+
Filtering Token Streams
|
572
|
+
=======================
|
573
|
+
|
574
|
+
Some languages ship a lot of builtin functions (for example PHP). The total
|
575
|
+
amount of those functions differs from system to system because not everybody
|
576
|
+
has every extension installed. In the case of PHP there are over 3000 builtin
|
577
|
+
functions. That's an incredible huge amount of functions, much more than you
|
578
|
+
can put into a regular expression.
|
579
|
+
|
580
|
+
But because only `Name` tokens can be function names it's solvable by overriding
|
581
|
+
the ``get_tokens_unprocessed()`` method. The following lexer subclasses the
|
582
|
+
`PythonLexer` so that it highlights some additional names as pseudo keywords:
|
583
|
+
|
584
|
+
.. sourcecode:: python
|
585
|
+
|
586
|
+
from pygments.lexers.agile import PythonLexer
|
587
|
+
from pygments.token import Name, Keyword
|
588
|
+
|
589
|
+
class MyPythonLexer(PythonLexer):
|
590
|
+
EXTRA_KEYWORDS = ['foo', 'bar', 'foobar', 'barfoo', 'spam', 'eggs']
|
591
|
+
|
592
|
+
def get_tokens_unprocessed(self, text):
|
593
|
+
for index, token, value in PythonLexer.get_tokens_unprocessed(self, text):
|
594
|
+
if token is Name and value in self.EXTRA_KEYWORDS:
|
595
|
+
yield index, Keyword.Pseudo, value
|
596
|
+
else:
|
597
|
+
yield index, token, value
|
598
|
+
|
599
|
+
The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions.
|
600
|
+
|
601
|
+
**Note** Do not confuse this with the `filter`_ system.
|
602
|
+
|
603
|
+
.. _filter: filters.txt
|
@@ -0,0 +1,67 @@
|
|
1
|
+
.. -*- mode: rst -*-
|
2
|
+
|
3
|
+
================
|
4
|
+
Available lexers
|
5
|
+
================
|
6
|
+
|
7
|
+
This page lists all available builtin lexers and the options they take.
|
8
|
+
|
9
|
+
Currently, **all lexers** support these options:
|
10
|
+
|
11
|
+
`stripnl`
|
12
|
+
Strip leading and trailing newlines from the input (default: ``True``)
|
13
|
+
|
14
|
+
`stripall`
|
15
|
+
Strip all leading and trailing whitespace from the input (default:
|
16
|
+
``False``).
|
17
|
+
|
18
|
+
`ensurenl`
|
19
|
+
Make sure that the input ends with a newline (default: ``True``). This
|
20
|
+
is required for some lexers that consume input linewise.
|
21
|
+
*New in Pygments 1.3.*
|
22
|
+
|
23
|
+
`tabsize`
|
24
|
+
If given and greater than 0, expand tabs in the input (default: ``0``).
|
25
|
+
|
26
|
+
`encoding`
|
27
|
+
*New in Pygments 0.6.*
|
28
|
+
|
29
|
+
If given, must be an encoding name (such as ``"utf-8"``). This encoding
|
30
|
+
will be used to convert the input string to Unicode (if it is not already
|
31
|
+
a Unicode string). The default is ``"latin1"``.
|
32
|
+
|
33
|
+
If this option is set to ``"guess"``, a simple UTF-8 vs. Latin-1
|
34
|
+
detection is used, if it is set to ``"chardet"``, the
|
35
|
+
`chardet library <http://chardet.feedparser.org/>`__ is used to
|
36
|
+
guess the encoding of the input.
|
37
|
+
|
38
|
+
|
39
|
+
The "Short Names" field lists the identifiers that can be used with the
|
40
|
+
`get_lexer_by_name()` function.
|
41
|
+
|
42
|
+
These lexers are builtin and can be imported from `pygments.lexers`:
|
43
|
+
|
44
|
+
[builtin_lexer_docs]
|
45
|
+
|
46
|
+
Iterating over all lexers
|
47
|
+
-------------------------
|
48
|
+
|
49
|
+
*New in Pygments 0.6.*
|
50
|
+
|
51
|
+
To get all lexers (both the builtin and the plugin ones), you can
|
52
|
+
use the `get_all_lexers()` function from the `pygments.lexers`
|
53
|
+
module:
|
54
|
+
|
55
|
+
.. sourcecode:: pycon
|
56
|
+
|
57
|
+
>>> from pygments.lexers import get_all_lexers
|
58
|
+
>>> i = get_all_lexers()
|
59
|
+
>>> i.next()
|
60
|
+
('Diff', ('diff',), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch'))
|
61
|
+
>>> i.next()
|
62
|
+
('Delphi', ('delphi', 'objectpascal', 'pas', 'pascal'), ('*.pas',), ('text/x-pascal',))
|
63
|
+
>>> i.next()
|
64
|
+
('XML+Ruby', ('xml+erb', 'xml+ruby'), (), ())
|
65
|
+
|
66
|
+
As you can see, the return value is an iterator which yields tuples
|
67
|
+
in the form ``(name, aliases, filetypes, mimetypes)``.
|