RubyGems - pygments.rb - Versions diffs - 0.2.1 → 0.2.2 - Mend

pygments.rb 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

data/README.md +51 -1
data/ext/extconf.rb +1 -1
data/ext/pygments.c +12 -1
data/lib/pygments/ffi.rb +1 -0
data/lib/pygments/lexer.rb +1 -1
data/lib/pygments/version.rb +1 -1
data/vendor/pygments-main/AUTHORS +3 -0
data/vendor/pygments-main/CHANGES +5 -0
data/vendor/pygments-main/REVISION +1 -1
data/vendor/pygments-main/docs/src/lexerdevelopment.txt +1 -1
data/vendor/pygments-main/docs/src/styles.txt +1 -1
data/vendor/pygments-main/pygments/cmdline.py +1 -1
data/vendor/pygments-main/pygments/formatters/latex.py +1 -1
data/vendor/pygments-main/pygments/lexers/_mapping.py +4 -1
data/vendor/pygments-main/pygments/lexers/agile.py +21 -29
data/vendor/pygments-main/pygments/lexers/functional.py +327 -3
data/vendor/pygments-main/pygments/lexers/math.py +400 -7
data/vendor/pygments-main/pygments/lexers/other.py +210 -170
data/vendor/pygments-main/pygments/lexers/postgres.py +1 -1
data/vendor/pygments-main/pygments/lexers/pypylog.py +6 -4
data/vendor/pygments-main/pygments/lexers/web.py +45 -8
data/vendor/pygments-main/tests/examplefiles/example.sml +156 -0
data/vendor/pygments-main/tests/examplefiles/intsyn.fun +675 -0
data/vendor/pygments-main/tests/examplefiles/intsyn.sig +286 -0
data/vendor/pygments-main/tests/examplefiles/psql_session.txt +1 -0
data/vendor/pygments-main/tests/examplefiles/test.nim +93 -0
metadata +10 -6

data/README.md CHANGED Viewed

@@ -1,6 +1,54 @@
 # pygments.rb
-a ruby wrapper for the pygments syntax highlighter via embedded python.
+A ruby wrapper for the python [pygments syntax highlighter](http://pygments.org/).
+This library replaces [github/albino](https://github.com/github/albino).
+Instead of shelling out to `pygmentize`, it embeds the python
+interpreter inside ruby via FFI. This avoids the cost of setting up the
+python VM on every invocation and speeds up code highlighting from ruby by 10-15x.
+## usage
+``` ruby
+Pygments.highlight(File.read(__FILE__), :lexer => 'ruby')
+```
+Encoding and other lexer/formatter options can be passed in via an
+options hash:
+``` ruby
+Pygments.highlight('code', :options => {:encoding => 'utf-8'})
+```
+To use a formatter other than html, specify it explicitly:
+``` ruby
+Pygments.highlight('code', :formatter => 'bbcode')
+Pygments.highlight('code', :formatter => 'terminal')
+```
+To generate CSS for html formatted code, use the css method:
+``` ruby
+Pygments.css
+Pygments.css('.highlight')
+```
+To use a custom python installation (like in ArchLinux), tell
+RubyPython where python lives:
+``` ruby
+RubyPython.configure :python_exe => 'python2.7'
+```
+To use a custom pygments installation, specify the path to
+Pygments.start:
+``` ruby
+Pygments.start("/path/to/pygments")
+```
+## benchmarks
     $ ruby -rubygems bench.rb 50
                                    user     system      total        real
@@ -9,3 +57,5 @@ a ruby wrapper for the pygments syntax highlighter via embedded python.
     pygments::ffi + reload    11.350000   1.240000  12.590000 ( 12.692320)
     pygments::ffi              1.130000   0.010000   1.140000 (  1.171589)
+To run `bench.rb`, use a git checkout. The C extension is not included
+in gem releases.

data/ext/extconf.rb CHANGED Viewed

@@ -9,6 +9,6 @@ $CFLAGS << " -Wall "
 unless python
   $stderr.puts '*** could not find libpython or Python.h'
 else
-  $CFLAGS << " -I/usr/include/python#{python} "
+  $defs << "-DPYGMENTS_PYTHON_VERSION=#{python.gsub('.','')}"
   create_makefile('pygments_ext')
 end

data/ext/pygments.c CHANGED Viewed

@@ -2,7 +2,18 @@
 #include <stdlib.h>
 #include <ruby.h>
-#include <Python.h>
+#if PYGMENTS_PYTHON_VERSION == 24
+#include <python2.4/Python.h>
+#elif PYGMENTS_PYTHON_VERSION == 25
+#include <python2.5/Python.h>
+#elif PYGMENTS_PYTHON_VERSION == 26
+#include <python2.6/Python.h>
+#elif PYGMENTS_PYTHON_VERSION == 27
+#include <python2.7/Python.h>
+#else
+#error Unknown python version
+#endif
 #ifdef RUBY_VM
 #include <ruby/st.h>

data/lib/pygments/ffi.rb CHANGED Viewed

@@ -6,6 +6,7 @@ module Pygments
     def start(pygments_path = File.expand_path('../../../vendor/pygments-main/', __FILE__))
       RubyPython.start
+      RubyPython.import('pkg_resources') rescue nil
       sys = RubyPython.import('sys')
       sys.path.insert(0, pygments_path)

data/lib/pygments/lexer.rb CHANGED Viewed

@@ -65,7 +65,7 @@ module Pygments
     #
     # Returns the Lexer or nil if none was found.
     def self.find(name)
-      @index[name.downcase]
+      @index[name.to_s.downcase]
     end
     # Public: Alias for find.

data/lib/pygments/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Pygments
-  VERSION = '0.2.1'
+  VERSION = '0.2.2'
 end

data/vendor/pygments-main/AUTHORS CHANGED Viewed

@@ -39,6 +39,7 @@ Other contributors, listed alphabetically, are:
 * Matthew Harrison -- SVG formatter
 * Steven Hazel -- Tcl lexer
 * Aslak Hellesøy -- Gherkin lexer
+* Jordi Gutiérrez Hermoso -- Octave lexer
 * David Hess, Fish Software, Inc. -- Objective-J lexer
 * Varun Hiremath -- Debian control lexer
 * Ben Hollis -- Mason lexer
@@ -78,6 +79,7 @@ Other contributors, listed alphabetically, are:
 * Ken Schutte -- Matlab lexers
 * Tassilo Schweyer -- Io, MOOCode lexers
 * Joerg Sieker -- ABAP lexer
+* Robert Simmons -- Standard ML lexer
 * Kirill Simonov -- YAML lexer
 * Steve Spigarelli -- XQuery lexer
 * Jerome St-Louis -- eC lexer
@@ -90,6 +92,7 @@ Other contributors, listed alphabetically, are:
 * Dietmar Winkler -- Modelica lexer
 * Nils Winter -- Smalltalk lexer
 * Davy Wybiral -- Clojure lexer
+* Diego Zamboni -- CFengine3 lexer
 * Alex Zimin -- Nemerle lexer
 Many thanks for all contributions!

data/vendor/pygments-main/CHANGES CHANGED Viewed

@@ -21,6 +21,9 @@ Version 1.5
   * PostgreSQL (#660)
   * DTD
   * Gosu
+  * Octave (PR#22)
+  * Standard ML (PR#14)
+  * CFengine3 (#601)
 - In the LaTeX formatter, escape special &, < and > chars (#648).
@@ -41,6 +44,8 @@ Version 1.5
 - Fix generic type highlighting in ActionScript 3 (#666).
+- Fixes to the Clojure lexer (PR#9).
 Version 1.4
 -----------

data/vendor/pygments-main/REVISION CHANGED Viewed

	@@ -1 +1 @@
1	- ~~456992e7ff81~~
1	+ db34feabe4b8

data/vendor/pygments-main/docs/src/lexerdevelopment.txt CHANGED Viewed

@@ -121,7 +121,7 @@ sections, comments and key = value pairs:
         }
 The lexer first looks for whitespace, comments and section names. And later it
-looks for a line that looks like a key, value pair, seperated by an ``'='``
+looks for a line that looks like a key, value pair, separated by an ``'='``
 sign, and optional whitespace.
 The `bygroups` helper makes sure that each group is yielded with a different

data/vendor/pygments-main/docs/src/styles.txt CHANGED Viewed

@@ -85,7 +85,7 @@ Here a small overview of all allowed styles:
 ``bold``
     render text as bold
 ``nobold``
-    don't render text as bold (to prevent subtokens behing highlighted bold)
+    don't render text as bold (to prevent subtokens being highlighted bold)
 ``italic``
     render text italic
 ``noitalic``

data/vendor/pygments-main/pygments/cmdline.py CHANGED Viewed

@@ -219,7 +219,7 @@ def main(args=sys.argv):
         return 0
     if opts.pop('-V', None) is not None:
-        print 'Pygments version %s, (c) 2006-2008 by Georg Brandl.' % __version__
+        print 'Pygments version %s, (c) 2006-2011 by Georg Brandl.' % __version__
         return 0
     # handle ``pygmentize -L``

data/vendor/pygments-main/pygments/formatters/latex.py CHANGED Viewed

@@ -286,7 +286,7 @@ class LatexFormatter(Formatter):
         cp = self.commandprefix
         styles = []
         for name, definition in self.cmd2def.iteritems():
-            styles.append(r'\def\%s@tok@%s{%s}' % (cp, name, definition))
+            styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' % (cp, name, definition))
         return STYLE_TEMPLATE % {'cp': self.commandprefix,
                                  'styles': '\n'.join(styles)}

data/vendor/pygments-main/pygments/lexers/_mapping.py CHANGED Viewed

@@ -46,6 +46,7 @@ LEXERS = {
     'CObjdumpLexer': ('pygments.lexers.asm', 'c-objdump', ('c-objdump',), ('*.c-objdump',), ('text/x-c-objdump',)),
     'CSharpAspxLexer': ('pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()),
     'CSharpLexer': ('pygments.lexers.dotnet', 'C#', ('csharp', 'c#'), ('*.cs',), ('text/x-csharp',)),
+    'Cfengine3Lexer': ('pygments.lexers.other', 'CFEngine3', ('cfengine3', 'cf3'), ('*.cf',), ()),
     'CheetahHtmlLexer': ('pygments.lexers.templates', 'HTML+Cheetah', ('html+cheetah', 'html+spitfire'), (), ('text/html+cheetah', 'text/html+spitfire')),
     'CheetahJavascriptLexer': ('pygments.lexers.templates', 'JavaScript+Cheetah', ('js+cheetah', 'javascript+cheetah', 'js+spitfire', 'javascript+spitfire'), (), ('application/x-javascript+cheetah', 'text/x-javascript+cheetah', 'text/javascript+cheetah', 'application/x-javascript+spitfire', 'text/x-javascript+spitfire', 'text/javascript+spitfire')),
     'CheetahLexer': ('pygments.lexers.templates', 'Cheetah', ('cheetah', 'spitfire'), ('*.tmpl', '*.spt'), ('application/x-cheetah', 'application/x-spitfire')),
@@ -135,7 +136,7 @@ LEXERS = {
     'MakoXmlLexer': ('pygments.lexers.templates', 'XML+Mako', ('xml+mako',), (), ('application/xml+mako',)),
     'MaqlLexer': ('pygments.lexers.other', 'MAQL', ('maql',), ('*.maql',), ('text/x-gooddata-maql', 'application/x-gooddata-maql')),
     'MasonLexer': ('pygments.lexers.templates', 'Mason', ('mason',), ('*.m', '*.mhtml', '*.mc', '*.mi', 'autohandler', 'dhandler'), ('application/x-mason',)),
-    'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab', 'octave'), ('*.m',), ('text/matlab',)),
+    'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab',), ('*.m',), ('text/matlab',)),
     'MatlabSessionLexer': ('pygments.lexers.math', 'Matlab session', ('matlabsession',), (), ()),
     'MiniDLexer': ('pygments.lexers.agile', 'MiniD', ('minid',), ('*.md',), ('text/x-minidsrc',)),
     'ModelicaLexer': ('pygments.lexers.other', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)),
@@ -160,6 +161,7 @@ LEXERS = {
     'ObjectiveJLexer': ('pygments.lexers.web', 'Objective-J', ('objective-j', 'objectivej', 'obj-j', 'objj'), ('*.j',), ('text/x-objective-j',)),
     'OcamlLexer': ('pygments.lexers.compiled', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)),
     'OcamlLexer': ('pygments.lexers.functional', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)),
+    'OctaveLexer': ('pygments.lexers.math', 'Octave', ('octave',), ('*.m',), ('text/octave',)),
     'OocLexer': ('pygments.lexers.compiled', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)),
     'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')),
     'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]'), ('text/x-php',)),
@@ -194,6 +196,7 @@ LEXERS = {
     'RubyConsoleLexer': ('pygments.lexers.agile', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)),
     'RubyLexer': ('pygments.lexers.agile', 'Ruby', ('rb', 'ruby', 'duby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby'), ('text/x-ruby', 'application/x-ruby')),
     'SLexer': ('pygments.lexers.math', 'S', ('splus', 's', 'r'), ('*.S', '*.R'), ('text/S-plus', 'text/S', 'text/R')),
+    'SMLLexer': ('pygments.lexers.functional', 'Standard ML', ('sml',), ('*.sml', '*.sig', '*.fun'), ('text/x-standardml', 'application/x-standardml')),
     'SassLexer': ('pygments.lexers.web', 'Sass', ('sass', 'SASS'), ('*.sass',), ('text/x-sass',)),
     'ScalaLexer': ('pygments.lexers.compiled', 'Scala', ('scala',), ('*.scala',), ('text/x-scala',)),
     'ScamlLexer': ('pygments.lexers.web', 'Scaml', ('scaml', 'SCAML'), ('*.scaml',), ('text/x-scaml',)),

data/vendor/pygments-main/pygments/lexers/agile.py CHANGED Viewed

@@ -13,7 +13,7 @@ import re
 from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \
      LexerContext, include, combined, do_insertions, bygroups, using, this
-from pygments.token import Error, Text, Other, \
+from pygments.token import Error, Text, Whitespace, Other, \
      Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation
 from pygments.util import get_bool_opt, get_list_opt, shebang_matches
 from pygments import unistring as uni
@@ -1367,13 +1367,11 @@ class ClojureLexer(RegexLexer):
     keywords = [
         'fn', 'def', 'defn', 'defmacro', 'defmethod', 'defmulti', 'defn-',
-        'defstruct',
-        'if', 'cond',
-        'let', 'for'
+        'defstruct', 'if', 'cond', 'let', 'for'
     ]
     builtins = [
         '.', '..',
-        '*', '+', '-', '->', '..', '/', '<', '<=', '=', '==', '>', '>=',
+        '*', '+', '-', '->', '/', '<', '<=', '=', '==', '>', '>=',
         'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns',
         'alter', 'and', 'append-child', 'apply', 'array-map', 'aset',
         'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float',
@@ -1389,13 +1387,13 @@ class ClojureLexer(RegexLexer):
         'double', 'down', 'drop', 'drop-while', 'edit', 'end?', 'ensure',
         'eval', 'every?', 'false?', 'ffirst', 'file-seq', 'filter', 'find',
         'find-doc', 'find-ns', 'find-var', 'first', 'float', 'flush',
-        'fnseq', 'frest', 'gensym', 'get', 'get-proxy-class',
+        'fnseq', 'frest', 'gensym', 'get-proxy-class', 'get',
         'hash-map', 'hash-set', 'identical?', 'identity', 'if-let', 'import',
         'in-ns', 'inc', 'index', 'insert-child', 'insert-left', 'insert-right',
         'inspect-table', 'inspect-tree', 'instance?', 'int', 'interleave',
         'intersection', 'into', 'into-array', 'iterate', 'join', 'key', 'keys',
         'keyword', 'keyword?', 'last', 'lazy-cat', 'lazy-cons', 'left',
-        'lefts', 'line-seq', 'list', 'list*', 'load', 'load-file',
+        'lefts', 'line-seq', 'list*', 'list', 'load', 'load-file',
         'locking', 'long', 'loop', 'macroexpand', 'macroexpand-1',
         'make-array', 'make-node', 'map', 'map-invert', 'map?', 'mapcat',
         'max', 'max-key', 'memfn', 'merge', 'merge-with', 'meta', 'min',
@@ -1426,7 +1424,14 @@ class ClojureLexer(RegexLexer):
     # valid names for identifiers
     # well, names can only not consist fully of numbers
     # but this should be good enough for now
-    valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~-]+'
+    # TODO / should divide keywords/symbols into namespace/rest
+    # but that's hard, so just pretend / is part of the name
+    valid_name = r'[\w!$%*+,<=>?/.-]+'
+    def _multi_escape(entries):
+        return '|'.join([re.escape(entry) + '(?![\\w-!$%*+,<=>?/.-])'
+                         for entry in entries])
     tokens = {
         'root' : [
@@ -1435,42 +1440,29 @@ class ClojureLexer(RegexLexer):
             (r';.*$', Comment.Single),
             # whitespaces - usually not relevant
-            (r'\s+', Text),
+            (r'[,\s]+', Whitespace),
             # numbers
             (r'-?\d+\.\d+', Number.Float),
             (r'-?\d+', Number.Integer),
-            # support for uncommon kinds of numbers -
-            # have to figure out what the characters mean
-            #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
+            (r'0x-?[abcdef\d]+', Number.Hex),
             # strings, symbols and characters
             (r'"(\\\\|\\"|[^"])*"', String),
             (r"'" + valid_name, String.Symbol),
-            (r"\\([()/'\".'_!Â§$%& ?;=#+-]{1}|[a-zA-Z0-9]+)", String.Char),
+            (r"\\(.|[a-z]+)", String.Char),
-            # constants
-            (r'(#t|#f)', Name.Constant),
+            # keywords
+            (r':' + valid_name, Name.Constant),
             # special operators
-            (r"('|#|`|,@|,|\.)", Operator),
+            (r'~@|[`\'#^~&]', Operator),
             # highlight the keywords
-            ('(%s)' % '|'.join([
-                re.escape(entry) + ' ' for entry in keywords]),
-                Keyword
-            ),
-            # first variable in a quoted string like
-            # '(this is syntactic sugar)
-            (r"(?<='\()" + valid_name, Name.Variable),
-            (r"(?<=#\()" + valid_name, Name.Variable),
+            (_multi_escape(keywords), Keyword),
             # highlight the builtins
-            ("(?<=\()(%s)" % '|'.join([
-                re.escape(entry) + ' ' for entry in builtins]),
-                Name.Builtin
-            ),
+            (_multi_escape(builtins), Name.Builtin),
             # the remaining functions
             (r'(?<=\()' + valid_name, Name.Function),

data/vendor/pygments-main/pygments/lexers/functional.py CHANGED Viewed

@@ -13,11 +13,12 @@ import re
 from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
 from pygments.token import Text, Comment, Operator, Keyword, Name, \
-     String, Number, Punctuation, Literal, Generic
+     String, Number, Punctuation, Literal, Generic, Error
-__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer',
-           'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer']
+__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
+           'LiterateHaskellLexer', 'SMLLexer', 'OcamlLexer', 'ErlangLexer',
+           'ErlangShellLexer']
 class SchemeLexer(RegexLexer):
@@ -515,6 +516,329 @@ class LiterateHaskellLexer(Lexer):
             yield item
+class SMLLexer(RegexLexer):
+    """
+    For the Standard ML language.
+    *New in Pygments 1.5.*
+    """
+    name = 'Standard ML'
+    aliases = ['sml']
+    filenames = ['*.sml', '*.sig', '*.fun',]
+    mimetypes = ['text/x-standardml', 'application/x-standardml']
+    alphanumid_reserved = [
+        # Core
+        'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
+        'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
+        'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
+        'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
+        # Modules
+        'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
+        'struct', 'structure', 'where',
+    ]
+    symbolicid_reserved = [
+        # Core
+        ':', '\|', '=', '=>', '->', '#',
+        # Modules
+        ':>',
+    ]
+    nonid_reserved = [ '(', ')', '[', ']', '{', '}', ',', ';', '...', '_' ]
+    alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*"
+    symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
+    # A character constant is a sequence of the form #s, where s is a string
+    # constant denoting a string of size one character. This setup just parses
+    # the entire string as either a String.Double or a String.Char (depending
+    # on the argument), even if the String.Char is an erronous
+    # multiple-character string.
+    def stringy (whatkind):
+        return [
+            (r'[^"\\]', whatkind),
+            (r'\\[\\\"abtnvfr]', String.Escape),
+            (r'\\\^[@-^]', String.Escape),
+            (r'\\[0-9]{3}', String.Escape),
+            (r'\\u[0-9a-fA-F]{4}', String.Escape),
+            (r'\\\s+\\', String.Interpol),
+            (r'"', whatkind, '#pop'),
+        ]
+    # Callbacks for distinguishing tokens and reserved words
+    def long_id_callback(self, match):
+        if match.group(1) in self.alphanumid_reserved: token = Error
+        else: token = Name.Namespace
+        yield match.start(1), token, match.group(1)
+        yield match.start(2), Punctuation, match.group(2)
+    def end_id_callback(self, match):
+        if match.group(1) in self.alphanumid_reserved: token = Error
+        elif match.group(1) in self.symbolicid_reserved: token = Error
+        else: token = Name
+        yield match.start(1), token, match.group(1)
+    def id_callback(self, match):
+        str = match.group(1)
+        if str in self.alphanumid_reserved: token = Keyword.Reserved
+        elif str in self.symbolicid_reserved: token = Punctuation
+        else: token = Name
+        yield match.start(1), token, str
+    tokens = {
+        # Whitespace and comments are (almost) everywhere
+        'whitespace': [
+            (r'\s+', Text),
+            (r'\(\*', Comment.Multiline, 'comment'),
+        ],
+        'delimiters': [
+            # This lexer treats these delimiters specially:
+            # Delimiters define scopes, and the scope is how the meaning of
+            # the `|' is resolved - is it a case/handle expression, or function
+            # definition by cases? (This is not how the Definition works, but
+            # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
+            (r'\(|\[|{', Punctuation, 'main'),
+            (r'\)|\]|}', Punctuation, '#pop'),
+            (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
+            (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
+            (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
+        ],
+        'core': [
+            # Punctuation that doesn't overlap symbolic identifiers
+            (r'(%s)' % '|'.join([re.escape(z) for z in nonid_reserved]),
+             Punctuation),
+            # Special constants: strings, floats, numbers in decimal and hex
+            (r'#"', String.Char, 'char'),
+            (r'"', String.Double, 'string'),
+            (r'~?0x[0-9a-fA-F]+', Number.Hex),
+            (r'0wx[0-9a-fA-F]+', Number.Hex),
+            (r'0w\d+', Number.Integer),
+            (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
+            (r'~?\d+\.\d+', Number.Float),
+            (r'~?\d+[eE]~?\d+', Number.Float),
+            (r'~?\d+', Number.Integer),
+            # Labels
+            (r'#\s*[1-9][0-9]*', Name.Label),
+            (r'#\s*(%s)' % alphanumid_re, Name.Label),
+            (r'#\s+(%s)' % symbolicid_re, Name.Label),
+            # Some reserved words trigger a special, local lexer state change
+            (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
+            (r'(?=\b(exception)\b(?!\'))', Text, ('ename')),
+            (r'\b(functor|include|open|signature|structure)\b(?!\')',
+             Keyword.Reserved, 'sname'),
+            (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
+            # Regular identifiers, long and otherwise
+            (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+            (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
+            (r'(%s)' % alphanumid_re, id_callback),
+            (r'(%s)' % symbolicid_re, id_callback),
+        ],
+        'dotted': [
+            (r'(%s)(\.)' % alphanumid_re, long_id_callback),
+            (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
+            (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
+            (r'\s+', Error),
+            (r'\S+', Error),
+        ],
+        # Main parser (prevents errors in files that have scoping errors)
+        'root': [ (r'', Text, 'main') ],
+        # In this scope, I expect '|' to not be followed by a function name,
+        # and I expect 'and' to be followed by a binding site
+        'main': [
+            include('whitespace'),
+            # Special behavior of val/and/fun
+            (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
+            (r'\b(fun)\b(?!\')', Keyword.Reserved,
+             ('#pop', 'main-fun', 'fname')),
+            include('delimiters'),
+            include('core'),
+            (r'\S+', Error),
+        ],
+        # In this scope, I expect '|' and 'and' to be followed by a function
+        'main-fun': [
+            include('whitespace'),
+            (r'\s', Text),
+            (r'\(\*', Comment.Multiline, 'comment'),
+            # Special behavior of val/and/fun
+            (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
+            (r'\b(val)\b(?!\')', Keyword.Reserved,
+             ('#pop', 'main', 'vname')),
+            # Special behavior of '|' and '|'-manipulating keywords
+            (r'\|', Punctuation, 'fname'),
+            (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
+             ('#pop', 'main')),
+            include('delimiters'),
+            include('core'),
+            (r'\S+', Error),
+        ],
+        # Character and string parsers
+        'char': stringy(String.Char),
+        'string': stringy(String.Double),
+        'breakout': [
+            (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
+        ],
+        # Dealing with what comes after module system keywords
+        'sname': [
+            include('whitespace'),
+            include('breakout'),
+            (r'(%s)' % alphanumid_re, Name.Namespace),
+            (r'', Text, '#pop'),
+        ],
+        # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
+        'fname': [
+            include('whitespace'),
+            (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+            (r'\(', Punctuation, 'tyvarseq'),
+            (r'(%s)' % alphanumid_re, Name.Function, '#pop'),
+            (r'(%s)' % symbolicid_re, Name.Function, '#pop'),
+            # Ignore interesting function declarations like "fun (x + y) = ..."
+            (r'', Text, '#pop'),
+        ],
+        # Dealing with what comes after the 'val' (or 'and') keyword
+        'vname': [
+            include('whitespace'),
+            (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+            (r'\(', Punctuation, 'tyvarseq'),
+            (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
+             bygroups(Name.Variable, Text, Punctuation), '#pop'),
+            (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
+             bygroups(Name.Variable, Text, Punctuation), '#pop'),
+            (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
+            (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
+            # Ignore interesting patterns like 'val (x, y)'
+            (r'', Text, '#pop'),
+        ],
+        # Dealing with what comes after the 'type' (or 'and') keyword
+        'tname': [
+            include('whitespace'),
+            include('breakout'),
+            (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+            (r'\(', Punctuation, 'tyvarseq'),
+            (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
+            (r'(%s)' % alphanumid_re, Keyword.Type),
+            (r'(%s)' % symbolicid_re, Keyword.Type),
+            (r'\S+', Error, '#pop'),
+        ],
+        # A type binding includes most identifiers
+        'typbind': [
+            include('whitespace'),
+            (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
+            include('breakout'),
+            include('core'),
+            (r'\S+', Error, '#pop'),
+        ],
+        # Dealing with what comes after the 'datatype' (or 'and') keyword
+        'dname': [
+            include('whitespace'),
+            include('breakout'),
+            (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+            (r'\(', Punctuation, 'tyvarseq'),
+            (r'(=)(\s*)(datatype)',
+             bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
+            (r'=(?!%s)' % symbolicid_re, Punctuation,
+             ('#pop', 'datbind', 'datcon')),
+            (r'(%s)' % alphanumid_re, Keyword.Type),
+            (r'(%s)' % symbolicid_re, Keyword.Type),
+            (r'\S+', Error, '#pop'),
+        ],
+        # common case - A | B | C of int
+        'datbind': [
+            include('whitespace'),
+            (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
+            (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
+            (r'\b(of)\b(?!\')', Keyword.Reserved),
+            (r'(\|)(\s*)(%s)' % alphanumid_re,
+             bygroups(Punctuation, Text, Name.Class)),
+            (r'(\|)(\s+)(%s)' % symbolicid_re,
+             bygroups(Punctuation, Text, Name.Class)),
+            include('breakout'),
+            include('core'),
+            (r'\S+', Error),
+        ],
+        # Dealing with what comes after an exception
+        'ename': [
+            include('whitespace'),
+            (r'(exception|and)\b(\s+)(%s)' % alphanumid_re,
+             bygroups(Keyword.Reserved, Text, Name.Class)),
+            (r'(exception|and)\b(\s*)(%s)' % symbolicid_re,
+             bygroups(Keyword.Reserved, Text, Name.Class)),
+            (r'\b(of)\b(?!\')', Keyword.Reserved),
+            include('breakout'),
+            include('core'),
+            (r'\S+', Error),
+        ],
+        'datcon': [
+            include('whitespace'),
+            (r'(%s)' % alphanumid_re, Name.Class, '#pop'),
+            (r'(%s)' % symbolicid_re, Name.Class, '#pop'),
+            (r'\S+', Error, '#pop'),
+        ],
+        # Series of type variables
+        'tyvarseq': [
+            (r'\s', Text),
+            (r'\(\*', Comment.Multiline, 'comment'),
+            (r'\'[0-9a-zA-Z_\']*', Name.Decorator),
+            (alphanumid_re, Name),
+            (r',', Punctuation),
+            (r'\)', Punctuation, '#pop'),
+            (symbolicid_re, Name),
+        ],
+        'comment': [
+            (r'[^(*)]', Comment.Multiline),
+            (r'\(\*', Comment.Multiline, '#push'),
+            (r'\*\)', Comment.Multiline, '#pop'),
+            (r'[(*)]', Comment.Multiline),
+        ],
+    }
 class OcamlLexer(RegexLexer):
     """
     For the OCaml language.