pygments.rb 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +51 -1
- data/ext/extconf.rb +1 -1
- data/ext/pygments.c +12 -1
- data/lib/pygments/ffi.rb +1 -0
- data/lib/pygments/lexer.rb +1 -1
- data/lib/pygments/version.rb +1 -1
- data/vendor/pygments-main/AUTHORS +3 -0
- data/vendor/pygments-main/CHANGES +5 -0
- data/vendor/pygments-main/REVISION +1 -1
- data/vendor/pygments-main/docs/src/lexerdevelopment.txt +1 -1
- data/vendor/pygments-main/docs/src/styles.txt +1 -1
- data/vendor/pygments-main/pygments/cmdline.py +1 -1
- data/vendor/pygments-main/pygments/formatters/latex.py +1 -1
- data/vendor/pygments-main/pygments/lexers/_mapping.py +4 -1
- data/vendor/pygments-main/pygments/lexers/agile.py +21 -29
- data/vendor/pygments-main/pygments/lexers/functional.py +327 -3
- data/vendor/pygments-main/pygments/lexers/math.py +400 -7
- data/vendor/pygments-main/pygments/lexers/other.py +210 -170
- data/vendor/pygments-main/pygments/lexers/postgres.py +1 -1
- data/vendor/pygments-main/pygments/lexers/pypylog.py +6 -4
- data/vendor/pygments-main/pygments/lexers/web.py +45 -8
- data/vendor/pygments-main/tests/examplefiles/example.sml +156 -0
- data/vendor/pygments-main/tests/examplefiles/intsyn.fun +675 -0
- data/vendor/pygments-main/tests/examplefiles/intsyn.sig +286 -0
- data/vendor/pygments-main/tests/examplefiles/psql_session.txt +1 -0
- data/vendor/pygments-main/tests/examplefiles/test.nim +93 -0
- metadata +10 -6
data/README.md
CHANGED
@@ -1,6 +1,54 @@
|
|
1
1
|
# pygments.rb
|
2
2
|
|
3
|
-
|
3
|
+
A ruby wrapper for the python [pygments syntax highlighter](http://pygments.org/).
|
4
|
+
|
5
|
+
This library replaces [github/albino](https://github.com/github/albino).
|
6
|
+
Instead of shelling out to `pygmentize`, it embeds the python
|
7
|
+
interpreter inside ruby via FFI. This avoids the cost of setting up the
|
8
|
+
python VM on every invocation and speeds up code highlighting from ruby by 10-15x.
|
9
|
+
|
10
|
+
## usage
|
11
|
+
|
12
|
+
``` ruby
|
13
|
+
Pygments.highlight(File.read(__FILE__), :lexer => 'ruby')
|
14
|
+
```
|
15
|
+
|
16
|
+
Encoding and other lexer/formatter options can be passed in via an
|
17
|
+
options hash:
|
18
|
+
|
19
|
+
``` ruby
|
20
|
+
Pygments.highlight('code', :options => {:encoding => 'utf-8'})
|
21
|
+
```
|
22
|
+
|
23
|
+
To use a formatter other than html, specify it explicitly:
|
24
|
+
|
25
|
+
``` ruby
|
26
|
+
Pygments.highlight('code', :formatter => 'bbcode')
|
27
|
+
Pygments.highlight('code', :formatter => 'terminal')
|
28
|
+
```
|
29
|
+
|
30
|
+
To generate CSS for html formatted code, use the css method:
|
31
|
+
|
32
|
+
``` ruby
|
33
|
+
Pygments.css
|
34
|
+
Pygments.css('.highlight')
|
35
|
+
```
|
36
|
+
|
37
|
+
To use a custom python installation (like in ArchLinux), tell
|
38
|
+
RubyPython where python lives:
|
39
|
+
|
40
|
+
``` ruby
|
41
|
+
RubyPython.configure :python_exe => 'python2.7'
|
42
|
+
```
|
43
|
+
|
44
|
+
To use a custom pygments installation, specify the path to
|
45
|
+
Pygments.start:
|
46
|
+
|
47
|
+
``` ruby
|
48
|
+
Pygments.start("/path/to/pygments")
|
49
|
+
```
|
50
|
+
|
51
|
+
## benchmarks
|
4
52
|
|
5
53
|
$ ruby -rubygems bench.rb 50
|
6
54
|
user system total real
|
@@ -9,3 +57,5 @@ a ruby wrapper for the pygments syntax highlighter via embedded python.
|
|
9
57
|
pygments::ffi + reload 11.350000 1.240000 12.590000 ( 12.692320)
|
10
58
|
pygments::ffi 1.130000 0.010000 1.140000 ( 1.171589)
|
11
59
|
|
60
|
+
To run `bench.rb`, use a git checkout. The C extension is not included
|
61
|
+
in gem releases.
|
data/ext/extconf.rb
CHANGED
data/ext/pygments.c
CHANGED
@@ -2,7 +2,18 @@
|
|
2
2
|
#include <stdlib.h>
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
|
-
|
5
|
+
|
6
|
+
#if PYGMENTS_PYTHON_VERSION == 24
|
7
|
+
#include <python2.4/Python.h>
|
8
|
+
#elif PYGMENTS_PYTHON_VERSION == 25
|
9
|
+
#include <python2.5/Python.h>
|
10
|
+
#elif PYGMENTS_PYTHON_VERSION == 26
|
11
|
+
#include <python2.6/Python.h>
|
12
|
+
#elif PYGMENTS_PYTHON_VERSION == 27
|
13
|
+
#include <python2.7/Python.h>
|
14
|
+
#else
|
15
|
+
#error Unknown python version
|
16
|
+
#endif
|
6
17
|
|
7
18
|
#ifdef RUBY_VM
|
8
19
|
#include <ruby/st.h>
|
data/lib/pygments/ffi.rb
CHANGED
data/lib/pygments/lexer.rb
CHANGED
data/lib/pygments/version.rb
CHANGED
@@ -39,6 +39,7 @@ Other contributors, listed alphabetically, are:
|
|
39
39
|
* Matthew Harrison -- SVG formatter
|
40
40
|
* Steven Hazel -- Tcl lexer
|
41
41
|
* Aslak Hellesøy -- Gherkin lexer
|
42
|
+
* Jordi Gutiérrez Hermoso -- Octave lexer
|
42
43
|
* David Hess, Fish Software, Inc. -- Objective-J lexer
|
43
44
|
* Varun Hiremath -- Debian control lexer
|
44
45
|
* Ben Hollis -- Mason lexer
|
@@ -78,6 +79,7 @@ Other contributors, listed alphabetically, are:
|
|
78
79
|
* Ken Schutte -- Matlab lexers
|
79
80
|
* Tassilo Schweyer -- Io, MOOCode lexers
|
80
81
|
* Joerg Sieker -- ABAP lexer
|
82
|
+
* Robert Simmons -- Standard ML lexer
|
81
83
|
* Kirill Simonov -- YAML lexer
|
82
84
|
* Steve Spigarelli -- XQuery lexer
|
83
85
|
* Jerome St-Louis -- eC lexer
|
@@ -90,6 +92,7 @@ Other contributors, listed alphabetically, are:
|
|
90
92
|
* Dietmar Winkler -- Modelica lexer
|
91
93
|
* Nils Winter -- Smalltalk lexer
|
92
94
|
* Davy Wybiral -- Clojure lexer
|
95
|
+
* Diego Zamboni -- CFengine3 lexer
|
93
96
|
* Alex Zimin -- Nemerle lexer
|
94
97
|
|
95
98
|
Many thanks for all contributions!
|
@@ -21,6 +21,9 @@ Version 1.5
|
|
21
21
|
* PostgreSQL (#660)
|
22
22
|
* DTD
|
23
23
|
* Gosu
|
24
|
+
* Octave (PR#22)
|
25
|
+
* Standard ML (PR#14)
|
26
|
+
* CFengine3 (#601)
|
24
27
|
|
25
28
|
- In the LaTeX formatter, escape special &, < and > chars (#648).
|
26
29
|
|
@@ -41,6 +44,8 @@ Version 1.5
|
|
41
44
|
|
42
45
|
- Fix generic type highlighting in ActionScript 3 (#666).
|
43
46
|
|
47
|
+
- Fixes to the Clojure lexer (PR#9).
|
48
|
+
|
44
49
|
|
45
50
|
Version 1.4
|
46
51
|
-----------
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
db34feabe4b8
|
@@ -121,7 +121,7 @@ sections, comments and key = value pairs:
|
|
121
121
|
}
|
122
122
|
|
123
123
|
The lexer first looks for whitespace, comments and section names. And later it
|
124
|
-
looks for a line that looks like a key, value pair,
|
124
|
+
looks for a line that looks like a key, value pair, separated by an ``'='``
|
125
125
|
sign, and optional whitespace.
|
126
126
|
|
127
127
|
The `bygroups` helper makes sure that each group is yielded with a different
|
@@ -85,7 +85,7 @@ Here a small overview of all allowed styles:
|
|
85
85
|
``bold``
|
86
86
|
render text as bold
|
87
87
|
``nobold``
|
88
|
-
don't render text as bold (to prevent subtokens
|
88
|
+
don't render text as bold (to prevent subtokens being highlighted bold)
|
89
89
|
``italic``
|
90
90
|
render text italic
|
91
91
|
``noitalic``
|
@@ -219,7 +219,7 @@ def main(args=sys.argv):
|
|
219
219
|
return 0
|
220
220
|
|
221
221
|
if opts.pop('-V', None) is not None:
|
222
|
-
print 'Pygments version %s, (c) 2006-
|
222
|
+
print 'Pygments version %s, (c) 2006-2011 by Georg Brandl.' % __version__
|
223
223
|
return 0
|
224
224
|
|
225
225
|
# handle ``pygmentize -L``
|
@@ -286,7 +286,7 @@ class LatexFormatter(Formatter):
|
|
286
286
|
cp = self.commandprefix
|
287
287
|
styles = []
|
288
288
|
for name, definition in self.cmd2def.iteritems():
|
289
|
-
styles.append(r'\def
|
289
|
+
styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' % (cp, name, definition))
|
290
290
|
return STYLE_TEMPLATE % {'cp': self.commandprefix,
|
291
291
|
'styles': '\n'.join(styles)}
|
292
292
|
|
@@ -46,6 +46,7 @@ LEXERS = {
|
|
46
46
|
'CObjdumpLexer': ('pygments.lexers.asm', 'c-objdump', ('c-objdump',), ('*.c-objdump',), ('text/x-c-objdump',)),
|
47
47
|
'CSharpAspxLexer': ('pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()),
|
48
48
|
'CSharpLexer': ('pygments.lexers.dotnet', 'C#', ('csharp', 'c#'), ('*.cs',), ('text/x-csharp',)),
|
49
|
+
'Cfengine3Lexer': ('pygments.lexers.other', 'CFEngine3', ('cfengine3', 'cf3'), ('*.cf',), ()),
|
49
50
|
'CheetahHtmlLexer': ('pygments.lexers.templates', 'HTML+Cheetah', ('html+cheetah', 'html+spitfire'), (), ('text/html+cheetah', 'text/html+spitfire')),
|
50
51
|
'CheetahJavascriptLexer': ('pygments.lexers.templates', 'JavaScript+Cheetah', ('js+cheetah', 'javascript+cheetah', 'js+spitfire', 'javascript+spitfire'), (), ('application/x-javascript+cheetah', 'text/x-javascript+cheetah', 'text/javascript+cheetah', 'application/x-javascript+spitfire', 'text/x-javascript+spitfire', 'text/javascript+spitfire')),
|
51
52
|
'CheetahLexer': ('pygments.lexers.templates', 'Cheetah', ('cheetah', 'spitfire'), ('*.tmpl', '*.spt'), ('application/x-cheetah', 'application/x-spitfire')),
|
@@ -135,7 +136,7 @@ LEXERS = {
|
|
135
136
|
'MakoXmlLexer': ('pygments.lexers.templates', 'XML+Mako', ('xml+mako',), (), ('application/xml+mako',)),
|
136
137
|
'MaqlLexer': ('pygments.lexers.other', 'MAQL', ('maql',), ('*.maql',), ('text/x-gooddata-maql', 'application/x-gooddata-maql')),
|
137
138
|
'MasonLexer': ('pygments.lexers.templates', 'Mason', ('mason',), ('*.m', '*.mhtml', '*.mc', '*.mi', 'autohandler', 'dhandler'), ('application/x-mason',)),
|
138
|
-
'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab',
|
139
|
+
'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab',), ('*.m',), ('text/matlab',)),
|
139
140
|
'MatlabSessionLexer': ('pygments.lexers.math', 'Matlab session', ('matlabsession',), (), ()),
|
140
141
|
'MiniDLexer': ('pygments.lexers.agile', 'MiniD', ('minid',), ('*.md',), ('text/x-minidsrc',)),
|
141
142
|
'ModelicaLexer': ('pygments.lexers.other', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)),
|
@@ -160,6 +161,7 @@ LEXERS = {
|
|
160
161
|
'ObjectiveJLexer': ('pygments.lexers.web', 'Objective-J', ('objective-j', 'objectivej', 'obj-j', 'objj'), ('*.j',), ('text/x-objective-j',)),
|
161
162
|
'OcamlLexer': ('pygments.lexers.compiled', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)),
|
162
163
|
'OcamlLexer': ('pygments.lexers.functional', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)),
|
164
|
+
'OctaveLexer': ('pygments.lexers.math', 'Octave', ('octave',), ('*.m',), ('text/octave',)),
|
163
165
|
'OocLexer': ('pygments.lexers.compiled', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)),
|
164
166
|
'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')),
|
165
167
|
'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]'), ('text/x-php',)),
|
@@ -194,6 +196,7 @@ LEXERS = {
|
|
194
196
|
'RubyConsoleLexer': ('pygments.lexers.agile', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)),
|
195
197
|
'RubyLexer': ('pygments.lexers.agile', 'Ruby', ('rb', 'ruby', 'duby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby'), ('text/x-ruby', 'application/x-ruby')),
|
196
198
|
'SLexer': ('pygments.lexers.math', 'S', ('splus', 's', 'r'), ('*.S', '*.R'), ('text/S-plus', 'text/S', 'text/R')),
|
199
|
+
'SMLLexer': ('pygments.lexers.functional', 'Standard ML', ('sml',), ('*.sml', '*.sig', '*.fun'), ('text/x-standardml', 'application/x-standardml')),
|
197
200
|
'SassLexer': ('pygments.lexers.web', 'Sass', ('sass', 'SASS'), ('*.sass',), ('text/x-sass',)),
|
198
201
|
'ScalaLexer': ('pygments.lexers.compiled', 'Scala', ('scala',), ('*.scala',), ('text/x-scala',)),
|
199
202
|
'ScamlLexer': ('pygments.lexers.web', 'Scaml', ('scaml', 'SCAML'), ('*.scaml',), ('text/x-scaml',)),
|
@@ -13,7 +13,7 @@ import re
|
|
13
13
|
|
14
14
|
from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \
|
15
15
|
LexerContext, include, combined, do_insertions, bygroups, using, this
|
16
|
-
from pygments.token import Error, Text, Other, \
|
16
|
+
from pygments.token import Error, Text, Whitespace, Other, \
|
17
17
|
Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation
|
18
18
|
from pygments.util import get_bool_opt, get_list_opt, shebang_matches
|
19
19
|
from pygments import unistring as uni
|
@@ -1367,13 +1367,11 @@ class ClojureLexer(RegexLexer):
|
|
1367
1367
|
|
1368
1368
|
keywords = [
|
1369
1369
|
'fn', 'def', 'defn', 'defmacro', 'defmethod', 'defmulti', 'defn-',
|
1370
|
-
'defstruct',
|
1371
|
-
'if', 'cond',
|
1372
|
-
'let', 'for'
|
1370
|
+
'defstruct', 'if', 'cond', 'let', 'for'
|
1373
1371
|
]
|
1374
1372
|
builtins = [
|
1375
1373
|
'.', '..',
|
1376
|
-
'*', '+', '-', '->', '
|
1374
|
+
'*', '+', '-', '->', '/', '<', '<=', '=', '==', '>', '>=',
|
1377
1375
|
'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns',
|
1378
1376
|
'alter', 'and', 'append-child', 'apply', 'array-map', 'aset',
|
1379
1377
|
'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float',
|
@@ -1389,13 +1387,13 @@ class ClojureLexer(RegexLexer):
|
|
1389
1387
|
'double', 'down', 'drop', 'drop-while', 'edit', 'end?', 'ensure',
|
1390
1388
|
'eval', 'every?', 'false?', 'ffirst', 'file-seq', 'filter', 'find',
|
1391
1389
|
'find-doc', 'find-ns', 'find-var', 'first', 'float', 'flush',
|
1392
|
-
'fnseq', 'frest', 'gensym', 'get', 'get
|
1390
|
+
'fnseq', 'frest', 'gensym', 'get-proxy-class', 'get',
|
1393
1391
|
'hash-map', 'hash-set', 'identical?', 'identity', 'if-let', 'import',
|
1394
1392
|
'in-ns', 'inc', 'index', 'insert-child', 'insert-left', 'insert-right',
|
1395
1393
|
'inspect-table', 'inspect-tree', 'instance?', 'int', 'interleave',
|
1396
1394
|
'intersection', 'into', 'into-array', 'iterate', 'join', 'key', 'keys',
|
1397
1395
|
'keyword', 'keyword?', 'last', 'lazy-cat', 'lazy-cons', 'left',
|
1398
|
-
'lefts', 'line-seq', 'list', 'list
|
1396
|
+
'lefts', 'line-seq', 'list*', 'list', 'load', 'load-file',
|
1399
1397
|
'locking', 'long', 'loop', 'macroexpand', 'macroexpand-1',
|
1400
1398
|
'make-array', 'make-node', 'map', 'map-invert', 'map?', 'mapcat',
|
1401
1399
|
'max', 'max-key', 'memfn', 'merge', 'merge-with', 'meta', 'min',
|
@@ -1426,7 +1424,14 @@ class ClojureLexer(RegexLexer):
|
|
1426
1424
|
# valid names for identifiers
|
1427
1425
|
# well, names can only not consist fully of numbers
|
1428
1426
|
# but this should be good enough for now
|
1429
|
-
|
1427
|
+
|
1428
|
+
# TODO / should divide keywords/symbols into namespace/rest
|
1429
|
+
# but that's hard, so just pretend / is part of the name
|
1430
|
+
valid_name = r'[\w!$%*+,<=>?/.-]+'
|
1431
|
+
|
1432
|
+
def _multi_escape(entries):
|
1433
|
+
return '|'.join([re.escape(entry) + '(?![\\w-!$%*+,<=>?/.-])'
|
1434
|
+
for entry in entries])
|
1430
1435
|
|
1431
1436
|
tokens = {
|
1432
1437
|
'root' : [
|
@@ -1435,42 +1440,29 @@ class ClojureLexer(RegexLexer):
|
|
1435
1440
|
(r';.*$', Comment.Single),
|
1436
1441
|
|
1437
1442
|
# whitespaces - usually not relevant
|
1438
|
-
(r'
|
1443
|
+
(r'[,\s]+', Whitespace),
|
1439
1444
|
|
1440
1445
|
# numbers
|
1441
1446
|
(r'-?\d+\.\d+', Number.Float),
|
1442
1447
|
(r'-?\d+', Number.Integer),
|
1443
|
-
|
1444
|
-
# have to figure out what the characters mean
|
1445
|
-
#(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
|
1448
|
+
(r'0x-?[abcdef\d]+', Number.Hex),
|
1446
1449
|
|
1447
1450
|
# strings, symbols and characters
|
1448
1451
|
(r'"(\\\\|\\"|[^"])*"', String),
|
1449
1452
|
(r"'" + valid_name, String.Symbol),
|
1450
|
-
(r"\\([
|
1453
|
+
(r"\\(.|[a-z]+)", String.Char),
|
1451
1454
|
|
1452
|
-
#
|
1453
|
-
(r'
|
1455
|
+
# keywords
|
1456
|
+
(r':' + valid_name, Name.Constant),
|
1454
1457
|
|
1455
1458
|
# special operators
|
1456
|
-
(r
|
1459
|
+
(r'~@|[`\'#^~&]', Operator),
|
1457
1460
|
|
1458
1461
|
# highlight the keywords
|
1459
|
-
(
|
1460
|
-
re.escape(entry) + ' ' for entry in keywords]),
|
1461
|
-
Keyword
|
1462
|
-
),
|
1463
|
-
|
1464
|
-
# first variable in a quoted string like
|
1465
|
-
# '(this is syntactic sugar)
|
1466
|
-
(r"(?<='\()" + valid_name, Name.Variable),
|
1467
|
-
(r"(?<=#\()" + valid_name, Name.Variable),
|
1462
|
+
(_multi_escape(keywords), Keyword),
|
1468
1463
|
|
1469
1464
|
# highlight the builtins
|
1470
|
-
(
|
1471
|
-
re.escape(entry) + ' ' for entry in builtins]),
|
1472
|
-
Name.Builtin
|
1473
|
-
),
|
1465
|
+
(_multi_escape(builtins), Name.Builtin),
|
1474
1466
|
|
1475
1467
|
# the remaining functions
|
1476
1468
|
(r'(?<=\()' + valid_name, Name.Function),
|
@@ -13,11 +13,12 @@ import re
|
|
13
13
|
|
14
14
|
from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
|
15
15
|
from pygments.token import Text, Comment, Operator, Keyword, Name, \
|
16
|
-
String, Number, Punctuation, Literal, Generic
|
16
|
+
String, Number, Punctuation, Literal, Generic, Error
|
17
17
|
|
18
18
|
|
19
|
-
__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
|
20
|
-
'
|
19
|
+
__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
|
20
|
+
'LiterateHaskellLexer', 'SMLLexer', 'OcamlLexer', 'ErlangLexer',
|
21
|
+
'ErlangShellLexer']
|
21
22
|
|
22
23
|
|
23
24
|
class SchemeLexer(RegexLexer):
|
@@ -515,6 +516,329 @@ class LiterateHaskellLexer(Lexer):
|
|
515
516
|
yield item
|
516
517
|
|
517
518
|
|
519
|
+
class SMLLexer(RegexLexer):
|
520
|
+
"""
|
521
|
+
For the Standard ML language.
|
522
|
+
|
523
|
+
*New in Pygments 1.5.*
|
524
|
+
"""
|
525
|
+
|
526
|
+
name = 'Standard ML'
|
527
|
+
aliases = ['sml']
|
528
|
+
filenames = ['*.sml', '*.sig', '*.fun',]
|
529
|
+
mimetypes = ['text/x-standardml', 'application/x-standardml']
|
530
|
+
|
531
|
+
alphanumid_reserved = [
|
532
|
+
# Core
|
533
|
+
'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
|
534
|
+
'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
|
535
|
+
'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
|
536
|
+
'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
|
537
|
+
# Modules
|
538
|
+
'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
|
539
|
+
'struct', 'structure', 'where',
|
540
|
+
]
|
541
|
+
|
542
|
+
symbolicid_reserved = [
|
543
|
+
# Core
|
544
|
+
':', '\|', '=', '=>', '->', '#',
|
545
|
+
# Modules
|
546
|
+
':>',
|
547
|
+
]
|
548
|
+
|
549
|
+
nonid_reserved = [ '(', ')', '[', ']', '{', '}', ',', ';', '...', '_' ]
|
550
|
+
|
551
|
+
alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*"
|
552
|
+
symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
|
553
|
+
|
554
|
+
# A character constant is a sequence of the form #s, where s is a string
|
555
|
+
# constant denoting a string of size one character. This setup just parses
|
556
|
+
# the entire string as either a String.Double or a String.Char (depending
|
557
|
+
# on the argument), even if the String.Char is an erronous
|
558
|
+
# multiple-character string.
|
559
|
+
def stringy (whatkind):
|
560
|
+
return [
|
561
|
+
(r'[^"\\]', whatkind),
|
562
|
+
(r'\\[\\\"abtnvfr]', String.Escape),
|
563
|
+
(r'\\\^[@-^]', String.Escape),
|
564
|
+
(r'\\[0-9]{3}', String.Escape),
|
565
|
+
(r'\\u[0-9a-fA-F]{4}', String.Escape),
|
566
|
+
(r'\\\s+\\', String.Interpol),
|
567
|
+
(r'"', whatkind, '#pop'),
|
568
|
+
]
|
569
|
+
|
570
|
+
# Callbacks for distinguishing tokens and reserved words
|
571
|
+
def long_id_callback(self, match):
|
572
|
+
if match.group(1) in self.alphanumid_reserved: token = Error
|
573
|
+
else: token = Name.Namespace
|
574
|
+
yield match.start(1), token, match.group(1)
|
575
|
+
yield match.start(2), Punctuation, match.group(2)
|
576
|
+
|
577
|
+
def end_id_callback(self, match):
|
578
|
+
if match.group(1) in self.alphanumid_reserved: token = Error
|
579
|
+
elif match.group(1) in self.symbolicid_reserved: token = Error
|
580
|
+
else: token = Name
|
581
|
+
yield match.start(1), token, match.group(1)
|
582
|
+
|
583
|
+
def id_callback(self, match):
|
584
|
+
str = match.group(1)
|
585
|
+
if str in self.alphanumid_reserved: token = Keyword.Reserved
|
586
|
+
elif str in self.symbolicid_reserved: token = Punctuation
|
587
|
+
else: token = Name
|
588
|
+
yield match.start(1), token, str
|
589
|
+
|
590
|
+
tokens = {
|
591
|
+
# Whitespace and comments are (almost) everywhere
|
592
|
+
'whitespace': [
|
593
|
+
(r'\s+', Text),
|
594
|
+
(r'\(\*', Comment.Multiline, 'comment'),
|
595
|
+
],
|
596
|
+
|
597
|
+
'delimiters': [
|
598
|
+
# This lexer treats these delimiters specially:
|
599
|
+
# Delimiters define scopes, and the scope is how the meaning of
|
600
|
+
# the `|' is resolved - is it a case/handle expression, or function
|
601
|
+
# definition by cases? (This is not how the Definition works, but
|
602
|
+
# it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
|
603
|
+
(r'\(|\[|{', Punctuation, 'main'),
|
604
|
+
(r'\)|\]|}', Punctuation, '#pop'),
|
605
|
+
(r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
|
606
|
+
(r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
|
607
|
+
(r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
|
608
|
+
],
|
609
|
+
|
610
|
+
'core': [
|
611
|
+
# Punctuation that doesn't overlap symbolic identifiers
|
612
|
+
(r'(%s)' % '|'.join([re.escape(z) for z in nonid_reserved]),
|
613
|
+
Punctuation),
|
614
|
+
|
615
|
+
# Special constants: strings, floats, numbers in decimal and hex
|
616
|
+
(r'#"', String.Char, 'char'),
|
617
|
+
(r'"', String.Double, 'string'),
|
618
|
+
(r'~?0x[0-9a-fA-F]+', Number.Hex),
|
619
|
+
(r'0wx[0-9a-fA-F]+', Number.Hex),
|
620
|
+
(r'0w\d+', Number.Integer),
|
621
|
+
(r'~?\d+\.\d+[eE]~?\d+', Number.Float),
|
622
|
+
(r'~?\d+\.\d+', Number.Float),
|
623
|
+
(r'~?\d+[eE]~?\d+', Number.Float),
|
624
|
+
(r'~?\d+', Number.Integer),
|
625
|
+
|
626
|
+
# Labels
|
627
|
+
(r'#\s*[1-9][0-9]*', Name.Label),
|
628
|
+
(r'#\s*(%s)' % alphanumid_re, Name.Label),
|
629
|
+
(r'#\s+(%s)' % symbolicid_re, Name.Label),
|
630
|
+
# Some reserved words trigger a special, local lexer state change
|
631
|
+
(r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
|
632
|
+
(r'(?=\b(exception)\b(?!\'))', Text, ('ename')),
|
633
|
+
(r'\b(functor|include|open|signature|structure)\b(?!\')',
|
634
|
+
Keyword.Reserved, 'sname'),
|
635
|
+
(r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
|
636
|
+
|
637
|
+
# Regular identifiers, long and otherwise
|
638
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
639
|
+
(r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
|
640
|
+
(r'(%s)' % alphanumid_re, id_callback),
|
641
|
+
(r'(%s)' % symbolicid_re, id_callback),
|
642
|
+
],
|
643
|
+
'dotted': [
|
644
|
+
(r'(%s)(\.)' % alphanumid_re, long_id_callback),
|
645
|
+
(r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
|
646
|
+
(r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
|
647
|
+
(r'\s+', Error),
|
648
|
+
(r'\S+', Error),
|
649
|
+
],
|
650
|
+
|
651
|
+
|
652
|
+
# Main parser (prevents errors in files that have scoping errors)
|
653
|
+
'root': [ (r'', Text, 'main') ],
|
654
|
+
|
655
|
+
# In this scope, I expect '|' to not be followed by a function name,
|
656
|
+
# and I expect 'and' to be followed by a binding site
|
657
|
+
'main': [
|
658
|
+
include('whitespace'),
|
659
|
+
|
660
|
+
# Special behavior of val/and/fun
|
661
|
+
(r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
|
662
|
+
(r'\b(fun)\b(?!\')', Keyword.Reserved,
|
663
|
+
('#pop', 'main-fun', 'fname')),
|
664
|
+
|
665
|
+
include('delimiters'),
|
666
|
+
include('core'),
|
667
|
+
(r'\S+', Error),
|
668
|
+
],
|
669
|
+
|
670
|
+
# In this scope, I expect '|' and 'and' to be followed by a function
|
671
|
+
'main-fun': [
|
672
|
+
include('whitespace'),
|
673
|
+
|
674
|
+
(r'\s', Text),
|
675
|
+
(r'\(\*', Comment.Multiline, 'comment'),
|
676
|
+
|
677
|
+
# Special behavior of val/and/fun
|
678
|
+
(r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
|
679
|
+
(r'\b(val)\b(?!\')', Keyword.Reserved,
|
680
|
+
('#pop', 'main', 'vname')),
|
681
|
+
|
682
|
+
# Special behavior of '|' and '|'-manipulating keywords
|
683
|
+
(r'\|', Punctuation, 'fname'),
|
684
|
+
(r'\b(case|handle)\b(?!\')', Keyword.Reserved,
|
685
|
+
('#pop', 'main')),
|
686
|
+
|
687
|
+
include('delimiters'),
|
688
|
+
include('core'),
|
689
|
+
(r'\S+', Error),
|
690
|
+
],
|
691
|
+
|
692
|
+
# Character and string parsers
|
693
|
+
'char': stringy(String.Char),
|
694
|
+
'string': stringy(String.Double),
|
695
|
+
|
696
|
+
'breakout': [
|
697
|
+
(r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
|
698
|
+
],
|
699
|
+
|
700
|
+
# Dealing with what comes after module system keywords
|
701
|
+
'sname': [
|
702
|
+
include('whitespace'),
|
703
|
+
include('breakout'),
|
704
|
+
|
705
|
+
(r'(%s)' % alphanumid_re, Name.Namespace),
|
706
|
+
(r'', Text, '#pop'),
|
707
|
+
],
|
708
|
+
|
709
|
+
# Dealing with what comes after the 'fun' (or 'and' or '|') keyword
|
710
|
+
'fname': [
|
711
|
+
include('whitespace'),
|
712
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
713
|
+
(r'\(', Punctuation, 'tyvarseq'),
|
714
|
+
|
715
|
+
(r'(%s)' % alphanumid_re, Name.Function, '#pop'),
|
716
|
+
(r'(%s)' % symbolicid_re, Name.Function, '#pop'),
|
717
|
+
|
718
|
+
# Ignore interesting function declarations like "fun (x + y) = ..."
|
719
|
+
(r'', Text, '#pop'),
|
720
|
+
],
|
721
|
+
|
722
|
+
# Dealing with what comes after the 'val' (or 'and') keyword
|
723
|
+
'vname': [
|
724
|
+
include('whitespace'),
|
725
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
726
|
+
(r'\(', Punctuation, 'tyvarseq'),
|
727
|
+
|
728
|
+
(r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
|
729
|
+
bygroups(Name.Variable, Text, Punctuation), '#pop'),
|
730
|
+
(r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
|
731
|
+
bygroups(Name.Variable, Text, Punctuation), '#pop'),
|
732
|
+
(r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
|
733
|
+
(r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
|
734
|
+
|
735
|
+
# Ignore interesting patterns like 'val (x, y)'
|
736
|
+
(r'', Text, '#pop'),
|
737
|
+
],
|
738
|
+
|
739
|
+
# Dealing with what comes after the 'type' (or 'and') keyword
|
740
|
+
'tname': [
|
741
|
+
include('whitespace'),
|
742
|
+
include('breakout'),
|
743
|
+
|
744
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
745
|
+
(r'\(', Punctuation, 'tyvarseq'),
|
746
|
+
(r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
|
747
|
+
|
748
|
+
(r'(%s)' % alphanumid_re, Keyword.Type),
|
749
|
+
(r'(%s)' % symbolicid_re, Keyword.Type),
|
750
|
+
(r'\S+', Error, '#pop'),
|
751
|
+
],
|
752
|
+
|
753
|
+
# A type binding includes most identifiers
|
754
|
+
'typbind': [
|
755
|
+
include('whitespace'),
|
756
|
+
|
757
|
+
(r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
|
758
|
+
|
759
|
+
include('breakout'),
|
760
|
+
include('core'),
|
761
|
+
(r'\S+', Error, '#pop'),
|
762
|
+
],
|
763
|
+
|
764
|
+
# Dealing with what comes after the 'datatype' (or 'and') keyword
|
765
|
+
'dname': [
|
766
|
+
include('whitespace'),
|
767
|
+
include('breakout'),
|
768
|
+
|
769
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
770
|
+
(r'\(', Punctuation, 'tyvarseq'),
|
771
|
+
(r'(=)(\s*)(datatype)',
|
772
|
+
bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
|
773
|
+
(r'=(?!%s)' % symbolicid_re, Punctuation,
|
774
|
+
('#pop', 'datbind', 'datcon')),
|
775
|
+
|
776
|
+
(r'(%s)' % alphanumid_re, Keyword.Type),
|
777
|
+
(r'(%s)' % symbolicid_re, Keyword.Type),
|
778
|
+
(r'\S+', Error, '#pop'),
|
779
|
+
],
|
780
|
+
|
781
|
+
# common case - A | B | C of int
|
782
|
+
'datbind': [
|
783
|
+
include('whitespace'),
|
784
|
+
|
785
|
+
(r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
|
786
|
+
(r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
|
787
|
+
(r'\b(of)\b(?!\')', Keyword.Reserved),
|
788
|
+
|
789
|
+
(r'(\|)(\s*)(%s)' % alphanumid_re,
|
790
|
+
bygroups(Punctuation, Text, Name.Class)),
|
791
|
+
(r'(\|)(\s+)(%s)' % symbolicid_re,
|
792
|
+
bygroups(Punctuation, Text, Name.Class)),
|
793
|
+
|
794
|
+
include('breakout'),
|
795
|
+
include('core'),
|
796
|
+
(r'\S+', Error),
|
797
|
+
],
|
798
|
+
|
799
|
+
# Dealing with what comes after an exception
|
800
|
+
'ename': [
|
801
|
+
include('whitespace'),
|
802
|
+
|
803
|
+
(r'(exception|and)\b(\s+)(%s)' % alphanumid_re,
|
804
|
+
bygroups(Keyword.Reserved, Text, Name.Class)),
|
805
|
+
(r'(exception|and)\b(\s*)(%s)' % symbolicid_re,
|
806
|
+
bygroups(Keyword.Reserved, Text, Name.Class)),
|
807
|
+
(r'\b(of)\b(?!\')', Keyword.Reserved),
|
808
|
+
|
809
|
+
include('breakout'),
|
810
|
+
include('core'),
|
811
|
+
(r'\S+', Error),
|
812
|
+
],
|
813
|
+
|
814
|
+
'datcon': [
|
815
|
+
include('whitespace'),
|
816
|
+
(r'(%s)' % alphanumid_re, Name.Class, '#pop'),
|
817
|
+
(r'(%s)' % symbolicid_re, Name.Class, '#pop'),
|
818
|
+
(r'\S+', Error, '#pop'),
|
819
|
+
],
|
820
|
+
|
821
|
+
# Series of type variables
|
822
|
+
'tyvarseq': [
|
823
|
+
(r'\s', Text),
|
824
|
+
(r'\(\*', Comment.Multiline, 'comment'),
|
825
|
+
|
826
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
827
|
+
(alphanumid_re, Name),
|
828
|
+
(r',', Punctuation),
|
829
|
+
(r'\)', Punctuation, '#pop'),
|
830
|
+
(symbolicid_re, Name),
|
831
|
+
],
|
832
|
+
|
833
|
+
'comment': [
|
834
|
+
(r'[^(*)]', Comment.Multiline),
|
835
|
+
(r'\(\*', Comment.Multiline, '#push'),
|
836
|
+
(r'\*\)', Comment.Multiline, '#pop'),
|
837
|
+
(r'[(*)]', Comment.Multiline),
|
838
|
+
],
|
839
|
+
}
|
840
|
+
|
841
|
+
|
518
842
|
class OcamlLexer(RegexLexer):
|
519
843
|
"""
|
520
844
|
For the OCaml language.
|