pygments.rb 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +51 -1
- data/ext/extconf.rb +1 -1
- data/ext/pygments.c +12 -1
- data/lib/pygments/ffi.rb +1 -0
- data/lib/pygments/lexer.rb +1 -1
- data/lib/pygments/version.rb +1 -1
- data/vendor/pygments-main/AUTHORS +3 -0
- data/vendor/pygments-main/CHANGES +5 -0
- data/vendor/pygments-main/REVISION +1 -1
- data/vendor/pygments-main/docs/src/lexerdevelopment.txt +1 -1
- data/vendor/pygments-main/docs/src/styles.txt +1 -1
- data/vendor/pygments-main/pygments/cmdline.py +1 -1
- data/vendor/pygments-main/pygments/formatters/latex.py +1 -1
- data/vendor/pygments-main/pygments/lexers/_mapping.py +4 -1
- data/vendor/pygments-main/pygments/lexers/agile.py +21 -29
- data/vendor/pygments-main/pygments/lexers/functional.py +327 -3
- data/vendor/pygments-main/pygments/lexers/math.py +400 -7
- data/vendor/pygments-main/pygments/lexers/other.py +210 -170
- data/vendor/pygments-main/pygments/lexers/postgres.py +1 -1
- data/vendor/pygments-main/pygments/lexers/pypylog.py +6 -4
- data/vendor/pygments-main/pygments/lexers/web.py +45 -8
- data/vendor/pygments-main/tests/examplefiles/example.sml +156 -0
- data/vendor/pygments-main/tests/examplefiles/intsyn.fun +675 -0
- data/vendor/pygments-main/tests/examplefiles/intsyn.sig +286 -0
- data/vendor/pygments-main/tests/examplefiles/psql_session.txt +1 -0
- data/vendor/pygments-main/tests/examplefiles/test.nim +93 -0
- metadata +10 -6
data/README.md
CHANGED
@@ -1,6 +1,54 @@
|
|
1
1
|
# pygments.rb
|
2
2
|
|
3
|
-
|
3
|
+
A ruby wrapper for the python [pygments syntax highlighter](http://pygments.org/).
|
4
|
+
|
5
|
+
This library replaces [github/albino](https://github.com/github/albino).
|
6
|
+
Instead of shelling out to `pygmentize`, it embeds the python
|
7
|
+
interpreter inside ruby via FFI. This avoids the cost of setting up the
|
8
|
+
python VM on every invocation and speeds up code highlighting from ruby by 10-15x.
|
9
|
+
|
10
|
+
## usage
|
11
|
+
|
12
|
+
``` ruby
|
13
|
+
Pygments.highlight(File.read(__FILE__), :lexer => 'ruby')
|
14
|
+
```
|
15
|
+
|
16
|
+
Encoding and other lexer/formatter options can be passed in via an
|
17
|
+
options hash:
|
18
|
+
|
19
|
+
``` ruby
|
20
|
+
Pygments.highlight('code', :options => {:encoding => 'utf-8'})
|
21
|
+
```
|
22
|
+
|
23
|
+
To use a formatter other than html, specify it explicitly:
|
24
|
+
|
25
|
+
``` ruby
|
26
|
+
Pygments.highlight('code', :formatter => 'bbcode')
|
27
|
+
Pygments.highlight('code', :formatter => 'terminal')
|
28
|
+
```
|
29
|
+
|
30
|
+
To generate CSS for html formatted code, use the css method:
|
31
|
+
|
32
|
+
``` ruby
|
33
|
+
Pygments.css
|
34
|
+
Pygments.css('.highlight')
|
35
|
+
```
|
36
|
+
|
37
|
+
To use a custom python installation (like in ArchLinux), tell
|
38
|
+
RubyPython where python lives:
|
39
|
+
|
40
|
+
``` ruby
|
41
|
+
RubyPython.configure :python_exe => 'python2.7'
|
42
|
+
```
|
43
|
+
|
44
|
+
To use a custom pygments installation, specify the path to
|
45
|
+
Pygments.start:
|
46
|
+
|
47
|
+
``` ruby
|
48
|
+
Pygments.start("/path/to/pygments")
|
49
|
+
```
|
50
|
+
|
51
|
+
## benchmarks
|
4
52
|
|
5
53
|
$ ruby -rubygems bench.rb 50
|
6
54
|
user system total real
|
@@ -9,3 +57,5 @@ a ruby wrapper for the pygments syntax highlighter via embedded python.
|
|
9
57
|
pygments::ffi + reload 11.350000 1.240000 12.590000 ( 12.692320)
|
10
58
|
pygments::ffi 1.130000 0.010000 1.140000 ( 1.171589)
|
11
59
|
|
60
|
+
To run `bench.rb`, use a git checkout. The C extension is not included
|
61
|
+
in gem releases.
|
data/ext/extconf.rb
CHANGED
data/ext/pygments.c
CHANGED
@@ -2,7 +2,18 @@
|
|
2
2
|
#include <stdlib.h>
|
3
3
|
|
4
4
|
#include <ruby.h>
|
5
|
-
|
5
|
+
|
6
|
+
#if PYGMENTS_PYTHON_VERSION == 24
|
7
|
+
#include <python2.4/Python.h>
|
8
|
+
#elif PYGMENTS_PYTHON_VERSION == 25
|
9
|
+
#include <python2.5/Python.h>
|
10
|
+
#elif PYGMENTS_PYTHON_VERSION == 26
|
11
|
+
#include <python2.6/Python.h>
|
12
|
+
#elif PYGMENTS_PYTHON_VERSION == 27
|
13
|
+
#include <python2.7/Python.h>
|
14
|
+
#else
|
15
|
+
#error Unknown python version
|
16
|
+
#endif
|
6
17
|
|
7
18
|
#ifdef RUBY_VM
|
8
19
|
#include <ruby/st.h>
|
data/lib/pygments/ffi.rb
CHANGED
data/lib/pygments/lexer.rb
CHANGED
data/lib/pygments/version.rb
CHANGED
@@ -39,6 +39,7 @@ Other contributors, listed alphabetically, are:
|
|
39
39
|
* Matthew Harrison -- SVG formatter
|
40
40
|
* Steven Hazel -- Tcl lexer
|
41
41
|
* Aslak Hellesøy -- Gherkin lexer
|
42
|
+
* Jordi Gutiérrez Hermoso -- Octave lexer
|
42
43
|
* David Hess, Fish Software, Inc. -- Objective-J lexer
|
43
44
|
* Varun Hiremath -- Debian control lexer
|
44
45
|
* Ben Hollis -- Mason lexer
|
@@ -78,6 +79,7 @@ Other contributors, listed alphabetically, are:
|
|
78
79
|
* Ken Schutte -- Matlab lexers
|
79
80
|
* Tassilo Schweyer -- Io, MOOCode lexers
|
80
81
|
* Joerg Sieker -- ABAP lexer
|
82
|
+
* Robert Simmons -- Standard ML lexer
|
81
83
|
* Kirill Simonov -- YAML lexer
|
82
84
|
* Steve Spigarelli -- XQuery lexer
|
83
85
|
* Jerome St-Louis -- eC lexer
|
@@ -90,6 +92,7 @@ Other contributors, listed alphabetically, are:
|
|
90
92
|
* Dietmar Winkler -- Modelica lexer
|
91
93
|
* Nils Winter -- Smalltalk lexer
|
92
94
|
* Davy Wybiral -- Clojure lexer
|
95
|
+
* Diego Zamboni -- CFengine3 lexer
|
93
96
|
* Alex Zimin -- Nemerle lexer
|
94
97
|
|
95
98
|
Many thanks for all contributions!
|
@@ -21,6 +21,9 @@ Version 1.5
|
|
21
21
|
* PostgreSQL (#660)
|
22
22
|
* DTD
|
23
23
|
* Gosu
|
24
|
+
* Octave (PR#22)
|
25
|
+
* Standard ML (PR#14)
|
26
|
+
* CFengine3 (#601)
|
24
27
|
|
25
28
|
- In the LaTeX formatter, escape special &, < and > chars (#648).
|
26
29
|
|
@@ -41,6 +44,8 @@ Version 1.5
|
|
41
44
|
|
42
45
|
- Fix generic type highlighting in ActionScript 3 (#666).
|
43
46
|
|
47
|
+
- Fixes to the Clojure lexer (PR#9).
|
48
|
+
|
44
49
|
|
45
50
|
Version 1.4
|
46
51
|
-----------
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
db34feabe4b8
|
@@ -121,7 +121,7 @@ sections, comments and key = value pairs:
|
|
121
121
|
}
|
122
122
|
|
123
123
|
The lexer first looks for whitespace, comments and section names. And later it
|
124
|
-
looks for a line that looks like a key, value pair,
|
124
|
+
looks for a line that looks like a key, value pair, separated by an ``'='``
|
125
125
|
sign, and optional whitespace.
|
126
126
|
|
127
127
|
The `bygroups` helper makes sure that each group is yielded with a different
|
@@ -85,7 +85,7 @@ Here a small overview of all allowed styles:
|
|
85
85
|
``bold``
|
86
86
|
render text as bold
|
87
87
|
``nobold``
|
88
|
-
don't render text as bold (to prevent subtokens
|
88
|
+
don't render text as bold (to prevent subtokens being highlighted bold)
|
89
89
|
``italic``
|
90
90
|
render text italic
|
91
91
|
``noitalic``
|
@@ -219,7 +219,7 @@ def main(args=sys.argv):
|
|
219
219
|
return 0
|
220
220
|
|
221
221
|
if opts.pop('-V', None) is not None:
|
222
|
-
print 'Pygments version %s, (c) 2006-
|
222
|
+
print 'Pygments version %s, (c) 2006-2011 by Georg Brandl.' % __version__
|
223
223
|
return 0
|
224
224
|
|
225
225
|
# handle ``pygmentize -L``
|
@@ -286,7 +286,7 @@ class LatexFormatter(Formatter):
|
|
286
286
|
cp = self.commandprefix
|
287
287
|
styles = []
|
288
288
|
for name, definition in self.cmd2def.iteritems():
|
289
|
-
styles.append(r'\def
|
289
|
+
styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' % (cp, name, definition))
|
290
290
|
return STYLE_TEMPLATE % {'cp': self.commandprefix,
|
291
291
|
'styles': '\n'.join(styles)}
|
292
292
|
|
@@ -46,6 +46,7 @@ LEXERS = {
|
|
46
46
|
'CObjdumpLexer': ('pygments.lexers.asm', 'c-objdump', ('c-objdump',), ('*.c-objdump',), ('text/x-c-objdump',)),
|
47
47
|
'CSharpAspxLexer': ('pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()),
|
48
48
|
'CSharpLexer': ('pygments.lexers.dotnet', 'C#', ('csharp', 'c#'), ('*.cs',), ('text/x-csharp',)),
|
49
|
+
'Cfengine3Lexer': ('pygments.lexers.other', 'CFEngine3', ('cfengine3', 'cf3'), ('*.cf',), ()),
|
49
50
|
'CheetahHtmlLexer': ('pygments.lexers.templates', 'HTML+Cheetah', ('html+cheetah', 'html+spitfire'), (), ('text/html+cheetah', 'text/html+spitfire')),
|
50
51
|
'CheetahJavascriptLexer': ('pygments.lexers.templates', 'JavaScript+Cheetah', ('js+cheetah', 'javascript+cheetah', 'js+spitfire', 'javascript+spitfire'), (), ('application/x-javascript+cheetah', 'text/x-javascript+cheetah', 'text/javascript+cheetah', 'application/x-javascript+spitfire', 'text/x-javascript+spitfire', 'text/javascript+spitfire')),
|
51
52
|
'CheetahLexer': ('pygments.lexers.templates', 'Cheetah', ('cheetah', 'spitfire'), ('*.tmpl', '*.spt'), ('application/x-cheetah', 'application/x-spitfire')),
|
@@ -135,7 +136,7 @@ LEXERS = {
|
|
135
136
|
'MakoXmlLexer': ('pygments.lexers.templates', 'XML+Mako', ('xml+mako',), (), ('application/xml+mako',)),
|
136
137
|
'MaqlLexer': ('pygments.lexers.other', 'MAQL', ('maql',), ('*.maql',), ('text/x-gooddata-maql', 'application/x-gooddata-maql')),
|
137
138
|
'MasonLexer': ('pygments.lexers.templates', 'Mason', ('mason',), ('*.m', '*.mhtml', '*.mc', '*.mi', 'autohandler', 'dhandler'), ('application/x-mason',)),
|
138
|
-
'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab',
|
139
|
+
'MatlabLexer': ('pygments.lexers.math', 'Matlab', ('matlab',), ('*.m',), ('text/matlab',)),
|
139
140
|
'MatlabSessionLexer': ('pygments.lexers.math', 'Matlab session', ('matlabsession',), (), ()),
|
140
141
|
'MiniDLexer': ('pygments.lexers.agile', 'MiniD', ('minid',), ('*.md',), ('text/x-minidsrc',)),
|
141
142
|
'ModelicaLexer': ('pygments.lexers.other', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)),
|
@@ -160,6 +161,7 @@ LEXERS = {
|
|
160
161
|
'ObjectiveJLexer': ('pygments.lexers.web', 'Objective-J', ('objective-j', 'objectivej', 'obj-j', 'objj'), ('*.j',), ('text/x-objective-j',)),
|
161
162
|
'OcamlLexer': ('pygments.lexers.compiled', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)),
|
162
163
|
'OcamlLexer': ('pygments.lexers.functional', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)),
|
164
|
+
'OctaveLexer': ('pygments.lexers.math', 'Octave', ('octave',), ('*.m',), ('text/octave',)),
|
163
165
|
'OocLexer': ('pygments.lexers.compiled', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)),
|
164
166
|
'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm'), ('text/x-perl', 'application/x-perl')),
|
165
167
|
'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]'), ('text/x-php',)),
|
@@ -194,6 +196,7 @@ LEXERS = {
|
|
194
196
|
'RubyConsoleLexer': ('pygments.lexers.agile', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)),
|
195
197
|
'RubyLexer': ('pygments.lexers.agile', 'Ruby', ('rb', 'ruby', 'duby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby'), ('text/x-ruby', 'application/x-ruby')),
|
196
198
|
'SLexer': ('pygments.lexers.math', 'S', ('splus', 's', 'r'), ('*.S', '*.R'), ('text/S-plus', 'text/S', 'text/R')),
|
199
|
+
'SMLLexer': ('pygments.lexers.functional', 'Standard ML', ('sml',), ('*.sml', '*.sig', '*.fun'), ('text/x-standardml', 'application/x-standardml')),
|
197
200
|
'SassLexer': ('pygments.lexers.web', 'Sass', ('sass', 'SASS'), ('*.sass',), ('text/x-sass',)),
|
198
201
|
'ScalaLexer': ('pygments.lexers.compiled', 'Scala', ('scala',), ('*.scala',), ('text/x-scala',)),
|
199
202
|
'ScamlLexer': ('pygments.lexers.web', 'Scaml', ('scaml', 'SCAML'), ('*.scaml',), ('text/x-scaml',)),
|
@@ -13,7 +13,7 @@ import re
|
|
13
13
|
|
14
14
|
from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \
|
15
15
|
LexerContext, include, combined, do_insertions, bygroups, using, this
|
16
|
-
from pygments.token import Error, Text, Other, \
|
16
|
+
from pygments.token import Error, Text, Whitespace, Other, \
|
17
17
|
Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation
|
18
18
|
from pygments.util import get_bool_opt, get_list_opt, shebang_matches
|
19
19
|
from pygments import unistring as uni
|
@@ -1367,13 +1367,11 @@ class ClojureLexer(RegexLexer):
|
|
1367
1367
|
|
1368
1368
|
keywords = [
|
1369
1369
|
'fn', 'def', 'defn', 'defmacro', 'defmethod', 'defmulti', 'defn-',
|
1370
|
-
'defstruct',
|
1371
|
-
'if', 'cond',
|
1372
|
-
'let', 'for'
|
1370
|
+
'defstruct', 'if', 'cond', 'let', 'for'
|
1373
1371
|
]
|
1374
1372
|
builtins = [
|
1375
1373
|
'.', '..',
|
1376
|
-
'*', '+', '-', '->', '
|
1374
|
+
'*', '+', '-', '->', '/', '<', '<=', '=', '==', '>', '>=',
|
1377
1375
|
'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns',
|
1378
1376
|
'alter', 'and', 'append-child', 'apply', 'array-map', 'aset',
|
1379
1377
|
'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float',
|
@@ -1389,13 +1387,13 @@ class ClojureLexer(RegexLexer):
|
|
1389
1387
|
'double', 'down', 'drop', 'drop-while', 'edit', 'end?', 'ensure',
|
1390
1388
|
'eval', 'every?', 'false?', 'ffirst', 'file-seq', 'filter', 'find',
|
1391
1389
|
'find-doc', 'find-ns', 'find-var', 'first', 'float', 'flush',
|
1392
|
-
'fnseq', 'frest', 'gensym', 'get', 'get
|
1390
|
+
'fnseq', 'frest', 'gensym', 'get-proxy-class', 'get',
|
1393
1391
|
'hash-map', 'hash-set', 'identical?', 'identity', 'if-let', 'import',
|
1394
1392
|
'in-ns', 'inc', 'index', 'insert-child', 'insert-left', 'insert-right',
|
1395
1393
|
'inspect-table', 'inspect-tree', 'instance?', 'int', 'interleave',
|
1396
1394
|
'intersection', 'into', 'into-array', 'iterate', 'join', 'key', 'keys',
|
1397
1395
|
'keyword', 'keyword?', 'last', 'lazy-cat', 'lazy-cons', 'left',
|
1398
|
-
'lefts', 'line-seq', 'list', 'list
|
1396
|
+
'lefts', 'line-seq', 'list*', 'list', 'load', 'load-file',
|
1399
1397
|
'locking', 'long', 'loop', 'macroexpand', 'macroexpand-1',
|
1400
1398
|
'make-array', 'make-node', 'map', 'map-invert', 'map?', 'mapcat',
|
1401
1399
|
'max', 'max-key', 'memfn', 'merge', 'merge-with', 'meta', 'min',
|
@@ -1426,7 +1424,14 @@ class ClojureLexer(RegexLexer):
|
|
1426
1424
|
# valid names for identifiers
|
1427
1425
|
# well, names can only not consist fully of numbers
|
1428
1426
|
# but this should be good enough for now
|
1429
|
-
|
1427
|
+
|
1428
|
+
# TODO / should divide keywords/symbols into namespace/rest
|
1429
|
+
# but that's hard, so just pretend / is part of the name
|
1430
|
+
valid_name = r'[\w!$%*+,<=>?/.-]+'
|
1431
|
+
|
1432
|
+
def _multi_escape(entries):
|
1433
|
+
return '|'.join([re.escape(entry) + '(?![\\w-!$%*+,<=>?/.-])'
|
1434
|
+
for entry in entries])
|
1430
1435
|
|
1431
1436
|
tokens = {
|
1432
1437
|
'root' : [
|
@@ -1435,42 +1440,29 @@ class ClojureLexer(RegexLexer):
|
|
1435
1440
|
(r';.*$', Comment.Single),
|
1436
1441
|
|
1437
1442
|
# whitespaces - usually not relevant
|
1438
|
-
(r'
|
1443
|
+
(r'[,\s]+', Whitespace),
|
1439
1444
|
|
1440
1445
|
# numbers
|
1441
1446
|
(r'-?\d+\.\d+', Number.Float),
|
1442
1447
|
(r'-?\d+', Number.Integer),
|
1443
|
-
|
1444
|
-
# have to figure out what the characters mean
|
1445
|
-
#(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
|
1448
|
+
(r'0x-?[abcdef\d]+', Number.Hex),
|
1446
1449
|
|
1447
1450
|
# strings, symbols and characters
|
1448
1451
|
(r'"(\\\\|\\"|[^"])*"', String),
|
1449
1452
|
(r"'" + valid_name, String.Symbol),
|
1450
|
-
(r"\\([
|
1453
|
+
(r"\\(.|[a-z]+)", String.Char),
|
1451
1454
|
|
1452
|
-
#
|
1453
|
-
(r'
|
1455
|
+
# keywords
|
1456
|
+
(r':' + valid_name, Name.Constant),
|
1454
1457
|
|
1455
1458
|
# special operators
|
1456
|
-
(r
|
1459
|
+
(r'~@|[`\'#^~&]', Operator),
|
1457
1460
|
|
1458
1461
|
# highlight the keywords
|
1459
|
-
(
|
1460
|
-
re.escape(entry) + ' ' for entry in keywords]),
|
1461
|
-
Keyword
|
1462
|
-
),
|
1463
|
-
|
1464
|
-
# first variable in a quoted string like
|
1465
|
-
# '(this is syntactic sugar)
|
1466
|
-
(r"(?<='\()" + valid_name, Name.Variable),
|
1467
|
-
(r"(?<=#\()" + valid_name, Name.Variable),
|
1462
|
+
(_multi_escape(keywords), Keyword),
|
1468
1463
|
|
1469
1464
|
# highlight the builtins
|
1470
|
-
(
|
1471
|
-
re.escape(entry) + ' ' for entry in builtins]),
|
1472
|
-
Name.Builtin
|
1473
|
-
),
|
1465
|
+
(_multi_escape(builtins), Name.Builtin),
|
1474
1466
|
|
1475
1467
|
# the remaining functions
|
1476
1468
|
(r'(?<=\()' + valid_name, Name.Function),
|
@@ -13,11 +13,12 @@ import re
|
|
13
13
|
|
14
14
|
from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
|
15
15
|
from pygments.token import Text, Comment, Operator, Keyword, Name, \
|
16
|
-
String, Number, Punctuation, Literal, Generic
|
16
|
+
String, Number, Punctuation, Literal, Generic, Error
|
17
17
|
|
18
18
|
|
19
|
-
__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
|
20
|
-
'
|
19
|
+
__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
|
20
|
+
'LiterateHaskellLexer', 'SMLLexer', 'OcamlLexer', 'ErlangLexer',
|
21
|
+
'ErlangShellLexer']
|
21
22
|
|
22
23
|
|
23
24
|
class SchemeLexer(RegexLexer):
|
@@ -515,6 +516,329 @@ class LiterateHaskellLexer(Lexer):
|
|
515
516
|
yield item
|
516
517
|
|
517
518
|
|
519
|
+
class SMLLexer(RegexLexer):
|
520
|
+
"""
|
521
|
+
For the Standard ML language.
|
522
|
+
|
523
|
+
*New in Pygments 1.5.*
|
524
|
+
"""
|
525
|
+
|
526
|
+
name = 'Standard ML'
|
527
|
+
aliases = ['sml']
|
528
|
+
filenames = ['*.sml', '*.sig', '*.fun',]
|
529
|
+
mimetypes = ['text/x-standardml', 'application/x-standardml']
|
530
|
+
|
531
|
+
alphanumid_reserved = [
|
532
|
+
# Core
|
533
|
+
'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
|
534
|
+
'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
|
535
|
+
'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
|
536
|
+
'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
|
537
|
+
# Modules
|
538
|
+
'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
|
539
|
+
'struct', 'structure', 'where',
|
540
|
+
]
|
541
|
+
|
542
|
+
symbolicid_reserved = [
|
543
|
+
# Core
|
544
|
+
':', '\|', '=', '=>', '->', '#',
|
545
|
+
# Modules
|
546
|
+
':>',
|
547
|
+
]
|
548
|
+
|
549
|
+
nonid_reserved = [ '(', ')', '[', ']', '{', '}', ',', ';', '...', '_' ]
|
550
|
+
|
551
|
+
alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*"
|
552
|
+
symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
|
553
|
+
|
554
|
+
# A character constant is a sequence of the form #s, where s is a string
|
555
|
+
# constant denoting a string of size one character. This setup just parses
|
556
|
+
# the entire string as either a String.Double or a String.Char (depending
|
557
|
+
# on the argument), even if the String.Char is an erronous
|
558
|
+
# multiple-character string.
|
559
|
+
def stringy (whatkind):
|
560
|
+
return [
|
561
|
+
(r'[^"\\]', whatkind),
|
562
|
+
(r'\\[\\\"abtnvfr]', String.Escape),
|
563
|
+
(r'\\\^[@-^]', String.Escape),
|
564
|
+
(r'\\[0-9]{3}', String.Escape),
|
565
|
+
(r'\\u[0-9a-fA-F]{4}', String.Escape),
|
566
|
+
(r'\\\s+\\', String.Interpol),
|
567
|
+
(r'"', whatkind, '#pop'),
|
568
|
+
]
|
569
|
+
|
570
|
+
# Callbacks for distinguishing tokens and reserved words
|
571
|
+
def long_id_callback(self, match):
|
572
|
+
if match.group(1) in self.alphanumid_reserved: token = Error
|
573
|
+
else: token = Name.Namespace
|
574
|
+
yield match.start(1), token, match.group(1)
|
575
|
+
yield match.start(2), Punctuation, match.group(2)
|
576
|
+
|
577
|
+
def end_id_callback(self, match):
|
578
|
+
if match.group(1) in self.alphanumid_reserved: token = Error
|
579
|
+
elif match.group(1) in self.symbolicid_reserved: token = Error
|
580
|
+
else: token = Name
|
581
|
+
yield match.start(1), token, match.group(1)
|
582
|
+
|
583
|
+
def id_callback(self, match):
|
584
|
+
str = match.group(1)
|
585
|
+
if str in self.alphanumid_reserved: token = Keyword.Reserved
|
586
|
+
elif str in self.symbolicid_reserved: token = Punctuation
|
587
|
+
else: token = Name
|
588
|
+
yield match.start(1), token, str
|
589
|
+
|
590
|
+
tokens = {
|
591
|
+
# Whitespace and comments are (almost) everywhere
|
592
|
+
'whitespace': [
|
593
|
+
(r'\s+', Text),
|
594
|
+
(r'\(\*', Comment.Multiline, 'comment'),
|
595
|
+
],
|
596
|
+
|
597
|
+
'delimiters': [
|
598
|
+
# This lexer treats these delimiters specially:
|
599
|
+
# Delimiters define scopes, and the scope is how the meaning of
|
600
|
+
# the `|' is resolved - is it a case/handle expression, or function
|
601
|
+
# definition by cases? (This is not how the Definition works, but
|
602
|
+
# it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
|
603
|
+
(r'\(|\[|{', Punctuation, 'main'),
|
604
|
+
(r'\)|\]|}', Punctuation, '#pop'),
|
605
|
+
(r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
|
606
|
+
(r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
|
607
|
+
(r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
|
608
|
+
],
|
609
|
+
|
610
|
+
'core': [
|
611
|
+
# Punctuation that doesn't overlap symbolic identifiers
|
612
|
+
(r'(%s)' % '|'.join([re.escape(z) for z in nonid_reserved]),
|
613
|
+
Punctuation),
|
614
|
+
|
615
|
+
# Special constants: strings, floats, numbers in decimal and hex
|
616
|
+
(r'#"', String.Char, 'char'),
|
617
|
+
(r'"', String.Double, 'string'),
|
618
|
+
(r'~?0x[0-9a-fA-F]+', Number.Hex),
|
619
|
+
(r'0wx[0-9a-fA-F]+', Number.Hex),
|
620
|
+
(r'0w\d+', Number.Integer),
|
621
|
+
(r'~?\d+\.\d+[eE]~?\d+', Number.Float),
|
622
|
+
(r'~?\d+\.\d+', Number.Float),
|
623
|
+
(r'~?\d+[eE]~?\d+', Number.Float),
|
624
|
+
(r'~?\d+', Number.Integer),
|
625
|
+
|
626
|
+
# Labels
|
627
|
+
(r'#\s*[1-9][0-9]*', Name.Label),
|
628
|
+
(r'#\s*(%s)' % alphanumid_re, Name.Label),
|
629
|
+
(r'#\s+(%s)' % symbolicid_re, Name.Label),
|
630
|
+
# Some reserved words trigger a special, local lexer state change
|
631
|
+
(r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
|
632
|
+
(r'(?=\b(exception)\b(?!\'))', Text, ('ename')),
|
633
|
+
(r'\b(functor|include|open|signature|structure)\b(?!\')',
|
634
|
+
Keyword.Reserved, 'sname'),
|
635
|
+
(r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
|
636
|
+
|
637
|
+
# Regular identifiers, long and otherwise
|
638
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
639
|
+
(r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
|
640
|
+
(r'(%s)' % alphanumid_re, id_callback),
|
641
|
+
(r'(%s)' % symbolicid_re, id_callback),
|
642
|
+
],
|
643
|
+
'dotted': [
|
644
|
+
(r'(%s)(\.)' % alphanumid_re, long_id_callback),
|
645
|
+
(r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
|
646
|
+
(r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
|
647
|
+
(r'\s+', Error),
|
648
|
+
(r'\S+', Error),
|
649
|
+
],
|
650
|
+
|
651
|
+
|
652
|
+
# Main parser (prevents errors in files that have scoping errors)
|
653
|
+
'root': [ (r'', Text, 'main') ],
|
654
|
+
|
655
|
+
# In this scope, I expect '|' to not be followed by a function name,
|
656
|
+
# and I expect 'and' to be followed by a binding site
|
657
|
+
'main': [
|
658
|
+
include('whitespace'),
|
659
|
+
|
660
|
+
# Special behavior of val/and/fun
|
661
|
+
(r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
|
662
|
+
(r'\b(fun)\b(?!\')', Keyword.Reserved,
|
663
|
+
('#pop', 'main-fun', 'fname')),
|
664
|
+
|
665
|
+
include('delimiters'),
|
666
|
+
include('core'),
|
667
|
+
(r'\S+', Error),
|
668
|
+
],
|
669
|
+
|
670
|
+
# In this scope, I expect '|' and 'and' to be followed by a function
|
671
|
+
'main-fun': [
|
672
|
+
include('whitespace'),
|
673
|
+
|
674
|
+
(r'\s', Text),
|
675
|
+
(r'\(\*', Comment.Multiline, 'comment'),
|
676
|
+
|
677
|
+
# Special behavior of val/and/fun
|
678
|
+
(r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
|
679
|
+
(r'\b(val)\b(?!\')', Keyword.Reserved,
|
680
|
+
('#pop', 'main', 'vname')),
|
681
|
+
|
682
|
+
# Special behavior of '|' and '|'-manipulating keywords
|
683
|
+
(r'\|', Punctuation, 'fname'),
|
684
|
+
(r'\b(case|handle)\b(?!\')', Keyword.Reserved,
|
685
|
+
('#pop', 'main')),
|
686
|
+
|
687
|
+
include('delimiters'),
|
688
|
+
include('core'),
|
689
|
+
(r'\S+', Error),
|
690
|
+
],
|
691
|
+
|
692
|
+
# Character and string parsers
|
693
|
+
'char': stringy(String.Char),
|
694
|
+
'string': stringy(String.Double),
|
695
|
+
|
696
|
+
'breakout': [
|
697
|
+
(r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
|
698
|
+
],
|
699
|
+
|
700
|
+
# Dealing with what comes after module system keywords
|
701
|
+
'sname': [
|
702
|
+
include('whitespace'),
|
703
|
+
include('breakout'),
|
704
|
+
|
705
|
+
(r'(%s)' % alphanumid_re, Name.Namespace),
|
706
|
+
(r'', Text, '#pop'),
|
707
|
+
],
|
708
|
+
|
709
|
+
# Dealing with what comes after the 'fun' (or 'and' or '|') keyword
|
710
|
+
'fname': [
|
711
|
+
include('whitespace'),
|
712
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
713
|
+
(r'\(', Punctuation, 'tyvarseq'),
|
714
|
+
|
715
|
+
(r'(%s)' % alphanumid_re, Name.Function, '#pop'),
|
716
|
+
(r'(%s)' % symbolicid_re, Name.Function, '#pop'),
|
717
|
+
|
718
|
+
# Ignore interesting function declarations like "fun (x + y) = ..."
|
719
|
+
(r'', Text, '#pop'),
|
720
|
+
],
|
721
|
+
|
722
|
+
# Dealing with what comes after the 'val' (or 'and') keyword
|
723
|
+
'vname': [
|
724
|
+
include('whitespace'),
|
725
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
726
|
+
(r'\(', Punctuation, 'tyvarseq'),
|
727
|
+
|
728
|
+
(r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
|
729
|
+
bygroups(Name.Variable, Text, Punctuation), '#pop'),
|
730
|
+
(r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
|
731
|
+
bygroups(Name.Variable, Text, Punctuation), '#pop'),
|
732
|
+
(r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
|
733
|
+
(r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
|
734
|
+
|
735
|
+
# Ignore interesting patterns like 'val (x, y)'
|
736
|
+
(r'', Text, '#pop'),
|
737
|
+
],
|
738
|
+
|
739
|
+
# Dealing with what comes after the 'type' (or 'and') keyword
|
740
|
+
'tname': [
|
741
|
+
include('whitespace'),
|
742
|
+
include('breakout'),
|
743
|
+
|
744
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
745
|
+
(r'\(', Punctuation, 'tyvarseq'),
|
746
|
+
(r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
|
747
|
+
|
748
|
+
(r'(%s)' % alphanumid_re, Keyword.Type),
|
749
|
+
(r'(%s)' % symbolicid_re, Keyword.Type),
|
750
|
+
(r'\S+', Error, '#pop'),
|
751
|
+
],
|
752
|
+
|
753
|
+
# A type binding includes most identifiers
|
754
|
+
'typbind': [
|
755
|
+
include('whitespace'),
|
756
|
+
|
757
|
+
(r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
|
758
|
+
|
759
|
+
include('breakout'),
|
760
|
+
include('core'),
|
761
|
+
(r'\S+', Error, '#pop'),
|
762
|
+
],
|
763
|
+
|
764
|
+
# Dealing with what comes after the 'datatype' (or 'and') keyword
|
765
|
+
'dname': [
|
766
|
+
include('whitespace'),
|
767
|
+
include('breakout'),
|
768
|
+
|
769
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
770
|
+
(r'\(', Punctuation, 'tyvarseq'),
|
771
|
+
(r'(=)(\s*)(datatype)',
|
772
|
+
bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
|
773
|
+
(r'=(?!%s)' % symbolicid_re, Punctuation,
|
774
|
+
('#pop', 'datbind', 'datcon')),
|
775
|
+
|
776
|
+
(r'(%s)' % alphanumid_re, Keyword.Type),
|
777
|
+
(r'(%s)' % symbolicid_re, Keyword.Type),
|
778
|
+
(r'\S+', Error, '#pop'),
|
779
|
+
],
|
780
|
+
|
781
|
+
# common case - A | B | C of int
|
782
|
+
'datbind': [
|
783
|
+
include('whitespace'),
|
784
|
+
|
785
|
+
(r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
|
786
|
+
(r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
|
787
|
+
(r'\b(of)\b(?!\')', Keyword.Reserved),
|
788
|
+
|
789
|
+
(r'(\|)(\s*)(%s)' % alphanumid_re,
|
790
|
+
bygroups(Punctuation, Text, Name.Class)),
|
791
|
+
(r'(\|)(\s+)(%s)' % symbolicid_re,
|
792
|
+
bygroups(Punctuation, Text, Name.Class)),
|
793
|
+
|
794
|
+
include('breakout'),
|
795
|
+
include('core'),
|
796
|
+
(r'\S+', Error),
|
797
|
+
],
|
798
|
+
|
799
|
+
# Dealing with what comes after an exception
|
800
|
+
'ename': [
|
801
|
+
include('whitespace'),
|
802
|
+
|
803
|
+
(r'(exception|and)\b(\s+)(%s)' % alphanumid_re,
|
804
|
+
bygroups(Keyword.Reserved, Text, Name.Class)),
|
805
|
+
(r'(exception|and)\b(\s*)(%s)' % symbolicid_re,
|
806
|
+
bygroups(Keyword.Reserved, Text, Name.Class)),
|
807
|
+
(r'\b(of)\b(?!\')', Keyword.Reserved),
|
808
|
+
|
809
|
+
include('breakout'),
|
810
|
+
include('core'),
|
811
|
+
(r'\S+', Error),
|
812
|
+
],
|
813
|
+
|
814
|
+
'datcon': [
|
815
|
+
include('whitespace'),
|
816
|
+
(r'(%s)' % alphanumid_re, Name.Class, '#pop'),
|
817
|
+
(r'(%s)' % symbolicid_re, Name.Class, '#pop'),
|
818
|
+
(r'\S+', Error, '#pop'),
|
819
|
+
],
|
820
|
+
|
821
|
+
# Series of type variables
|
822
|
+
'tyvarseq': [
|
823
|
+
(r'\s', Text),
|
824
|
+
(r'\(\*', Comment.Multiline, 'comment'),
|
825
|
+
|
826
|
+
(r'\'[0-9a-zA-Z_\']*', Name.Decorator),
|
827
|
+
(alphanumid_re, Name),
|
828
|
+
(r',', Punctuation),
|
829
|
+
(r'\)', Punctuation, '#pop'),
|
830
|
+
(symbolicid_re, Name),
|
831
|
+
],
|
832
|
+
|
833
|
+
'comment': [
|
834
|
+
(r'[^(*)]', Comment.Multiline),
|
835
|
+
(r'\(\*', Comment.Multiline, '#push'),
|
836
|
+
(r'\*\)', Comment.Multiline, '#pop'),
|
837
|
+
(r'[(*)]', Comment.Multiline),
|
838
|
+
],
|
839
|
+
}
|
840
|
+
|
841
|
+
|
518
842
|
class OcamlLexer(RegexLexer):
|
519
843
|
"""
|
520
844
|
For the OCaml language.
|