rouge 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/lib/rouge.rb +3 -0
- data/lib/rouge/cli.rb +18 -2
- data/lib/rouge/formatter.rb +7 -0
- data/lib/rouge/formatters/html.rb +4 -0
- data/lib/rouge/formatters/terminal256.rb +8 -2
- data/lib/rouge/lexer.rb +34 -4
- data/lib/rouge/lexers/c.rb +3 -0
- data/lib/rouge/lexers/common_lisp.rb +1 -0
- data/lib/rouge/lexers/cpp.rb +2 -0
- data/lib/rouge/lexers/css.rb +2 -0
- data/lib/rouge/lexers/diff.rb +2 -0
- data/lib/rouge/lexers/erb.rb +2 -0
- data/lib/rouge/lexers/factor.rb +1 -0
- data/lib/rouge/lexers/haml.rb +9 -4
- data/lib/rouge/lexers/haskell.rb +2 -0
- data/lib/rouge/lexers/html.rb +1 -0
- data/lib/rouge/lexers/java.rb +2 -0
- data/lib/rouge/lexers/javascript.rb +4 -1
- data/lib/rouge/lexers/make.rb +1 -0
- data/lib/rouge/lexers/markdown.rb +2 -0
- data/lib/rouge/lexers/perl.rb +2 -0
- data/lib/rouge/lexers/php.rb +6 -2
- data/lib/rouge/lexers/python.rb +1 -0
- data/lib/rouge/lexers/ruby.rb +2 -1
- data/lib/rouge/lexers/scheme.rb +2 -0
- data/lib/rouge/lexers/shell.rb +2 -0
- data/lib/rouge/lexers/sql.rb +137 -0
- data/lib/rouge/lexers/tcl.rb +1 -0
- data/lib/rouge/lexers/tex.rb +1 -0
- data/lib/rouge/lexers/text.rb +2 -0
- data/lib/rouge/lexers/viml.rb +98 -0
- data/lib/rouge/lexers/viml/keywords.rb +11 -0
- data/lib/rouge/lexers/xml.rb +1 -0
- data/lib/rouge/lexers/yaml.rb +4 -3
- data/lib/rouge/regex_lexer.rb +142 -43
- data/lib/rouge/template_lexer.rb +6 -0
- data/lib/rouge/text_analyzer.rb +9 -0
- data/lib/rouge/util.rb +10 -0
- data/lib/rouge/version.rb +1 -1
- metadata +5 -2
data/lib/rouge/lexers/python.rb
CHANGED
data/lib/rouge/lexers/ruby.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Lexers
|
3
3
|
class Ruby < RegexLexer
|
4
|
+
desc "The Ruby programming language (ruby-lang.org)"
|
4
5
|
tag 'ruby'
|
5
6
|
aliases 'rb'
|
6
7
|
filenames '*.rb', '*.ruby', '*.rbw', '*.rake', '*.gemspec',
|
@@ -142,7 +143,7 @@ module Rouge
|
|
142
143
|
(module)
|
143
144
|
(\s+)
|
144
145
|
([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)
|
145
|
-
) do
|
146
|
+
)x do
|
146
147
|
group 'Keyword'
|
147
148
|
group 'Text'
|
148
149
|
group 'Name.Namespace'
|
data/lib/rouge/lexers/scheme.rb
CHANGED
data/lib/rouge/lexers/shell.rb
CHANGED
@@ -0,0 +1,137 @@
|
|
1
|
+
module Rouge
|
2
|
+
module Lexers
|
3
|
+
class SQL < RegexLexer
|
4
|
+
desc "Structured Query Language, for relational databases"
|
5
|
+
tag 'sql'
|
6
|
+
filenames '*.sql'
|
7
|
+
mimetypes 'text/x-sql'
|
8
|
+
|
9
|
+
def self.keywords
|
10
|
+
@keywords ||= Set.new %w(
|
11
|
+
ABORT ABS ABSOLUTE ACCESS ADA ADD ADMIN AFTER AGGREGATE ALIAS
|
12
|
+
ALL ALLOCATE ALTER ANALYSE ANALYZE AND ANY ARE AS ASC ASENSITIVE
|
13
|
+
ASSERTION ASSIGNMENT ASYMMETRIC AT ATOMIC AUTHORIZATION
|
14
|
+
AVG BACKWARD BEFORE BEGIN BETWEEN BITVAR BIT_LENGTH BOTH
|
15
|
+
BREADTH BY C CACHE CALL CALLED CARDINALITY CASCADE CASCADED
|
16
|
+
CASE CAST CATALOG CATALOG_NAME CHAIN CHARACTERISTICS
|
17
|
+
CHARACTER_LENGTH CHARACTER_SET_CATALOG CHARACTER_SET_NAME
|
18
|
+
CHARACTER_SET_SCHEMA CHAR_LENGTH CHECK CHECKED CHECKPOINT
|
19
|
+
CLASS CLASS_ORIGIN CLOB CLOSE CLUSTER COALSECE COBOL COLLATE
|
20
|
+
COLLATION COLLATION_CATALOG COLLATION_NAME COLLATION_SCHEMA
|
21
|
+
COLUMN COLUMN_NAME COMMAND_FUNCTION COMMAND_FUNCTION_CODE
|
22
|
+
COMMENT COMMIT COMMITTED COMPLETION CONDITION_NUMBER
|
23
|
+
CONNECT CONNECTION CONNECTION_NAME CONSTRAINT CONSTRAINTS
|
24
|
+
CONSTRAINT_CATALOG CONSTRAINT_NAME CONSTRAINT_SCHEMA
|
25
|
+
CONSTRUCTOR CONTAINS CONTINUE CONVERSION CONVERT COPY
|
26
|
+
CORRESPONTING COUNT CREATE CREATEDB CREATEUSER CROSS CUBE
|
27
|
+
CURRENT CURRENT_DATE CURRENT_PATH CURRENT_ROLE CURRENT_TIME
|
28
|
+
CURRENT_TIMESTAMP CURRENT_USER CURSOR CURSOR_NAME CYCLE DATA
|
29
|
+
DATABASE DATETIME_INTERVAL_CODE DATETIME_INTERVAL_PRECISION
|
30
|
+
DAY DEALLOCATE DECLARE DEFAULT DEFAULTS DEFERRABLE DEFERRED
|
31
|
+
DEFINED DEFINER DELETE DELIMITER DELIMITERS DEREF DESC DESCRIBE
|
32
|
+
DESCRIPTOR DESTROY DESTRUCTOR DETERMINISTIC DIAGNOSTICS
|
33
|
+
DICTIONARY DISCONNECT DISPATCH DISTINCT DO DOMAIN DROP
|
34
|
+
DYNAMIC DYNAMIC_FUNCTION DYNAMIC_FUNCTION_CODE EACH ELSE
|
35
|
+
ENCODING ENCRYPTED END END-EXEC EQUALS ESCAPE EVERY EXCEPT
|
36
|
+
ESCEPTION EXCLUDING EXCLUSIVE EXEC EXECUTE EXISTING EXISTS
|
37
|
+
EXPLAIN EXTERNAL EXTRACT FALSE FETCH FINAL FIRST FOR FORCE
|
38
|
+
FOREIGN FORTRAN FORWARD FOUND FREE FREEZE FROM FULL FUNCTION
|
39
|
+
G GENERAL GENERATED GET GLOBAL GO GOTO GRANT GRANTED GROUP
|
40
|
+
GROUPING HANDLER HAVING HIERARCHY HOLD HOST IDENTITY IGNORE
|
41
|
+
ILIKE IMMEDIATE IMMUTABLE IMPLEMENTATION IMPLICIT IN INCLUDING
|
42
|
+
INCREMENT INDEX INDITCATOR INFIX INHERITS INITIALIZE INITIALLY
|
43
|
+
INNER INOUT INPUT INSENSITIVE INSERT INSTANTIABLE INSTEAD
|
44
|
+
INTERSECT INTO INVOKER IS ISNULL ISOLATION ITERATE JOIN KEY
|
45
|
+
KEY_MEMBER KEY_TYPE LANCOMPILER LANGUAGE LARGE LAST LATERAL
|
46
|
+
LEADING LEFT LENGTH LESS LEVEL LIKE LIMIT LISTEN LOAD LOCAL
|
47
|
+
LOCALTIME LOCALTIMESTAMP LOCATION LOCATOR LOCK LOWER MAP MATCH
|
48
|
+
MAX MAXVALUE MESSAGE_LENGTH MESSAGE_OCTET_LENGTH MESSAGE_TEXT
|
49
|
+
METHOD MIN MINUTE MINVALUE MOD MODE MODIFIES MODIFY MONTH
|
50
|
+
MORE MOVE MUMPS NAMES NATIONAL NATURAL NCHAR NCLOB NEW NEXT
|
51
|
+
NO NOCREATEDB NOCREATEUSER NONE NOT NOTHING NOTIFY NOTNULL
|
52
|
+
NULL NULLABLE NULLIF OBJECT OCTET_LENGTH OF OFF OFFSET OIDS
|
53
|
+
OLD ON ONLY OPEN OPERATION OPERATOR OPTION OPTIONS OR ORDER
|
54
|
+
ORDINALITY OUT OUTER OUTPUT OVERLAPS OVERLAY OVERRIDING
|
55
|
+
OWNER PAD PARAMETER PARAMETERS PARAMETER_MODE PARAMATER_NAME
|
56
|
+
PARAMATER_ORDINAL_POSITION PARAMETER_SPECIFIC_CATALOG
|
57
|
+
PARAMETER_SPECIFIC_NAME PARAMATER_SPECIFIC_SCHEMA PARTIAL PASCAL
|
58
|
+
PENDANT PLACING PLI POSITION POSTFIX PRECISION PREFIX PREORDER
|
59
|
+
PREPARE PRESERVE PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE
|
60
|
+
PUBLIC READ READS RECHECK RECURSIVE REF REFERENCES REFERENCING
|
61
|
+
REINDEX RELATIVE RENAME REPEATABLE REPLACE RESET RESTART
|
62
|
+
RESTRICT RESULT RETURN RETURNED_LENGTH RETURNED_OCTET_LENGTH
|
63
|
+
RETURNED_SQLSTATE RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP
|
64
|
+
ROUTINE ROUTINE_CATALOG ROUTINE_NAME ROUTINE_SCHEMA ROW ROWS
|
65
|
+
ROW_COUNT RULE SAVE_POINT SCALE SCHEMA SCHEMA_NAME SCOPE SCROLL
|
66
|
+
SEARCH SECOND SECURITY SELECT SELF SENSITIVE SERIALIZABLE
|
67
|
+
SERVER_NAME SESSION SESSION_USER SET SETOF SETS SHARE SHOW
|
68
|
+
SIMILAR SIMPLE SIZE SOME SOURCE SPACE SPECIFIC SPECIFICTYPE
|
69
|
+
SPECIFIC_NAME SQL SQLCODE SQLERROR SQLEXCEPTION SQLSTATE
|
70
|
+
SQLWARNINIG STABLE START STATE STATEMENT STATIC STATISTICS
|
71
|
+
STDIN STDOUT STORAGE STRICT STRUCTURE STYPE SUBCLASS_ORIGIN
|
72
|
+
SUBLIST SUBSTRING SUM SYMMETRIC SYSID SYSTEM SYSTEM_USER
|
73
|
+
TABLE TABLE_NAME TEMP TEMPLATE TEMPORARY TERMINATE THAN THEN
|
74
|
+
TIMESTAMP TIMEZONE_HOUR TIMEZONE_MINUTE TO TOAST TRAILING
|
75
|
+
TRANSATION TRANSACTIONS_COMMITTED TRANSACTIONS_ROLLED_BACK
|
76
|
+
TRANSATION_ACTIVE TRANSFORM TRANSFORMS TRANSLATE TRANSLATION
|
77
|
+
TREAT TRIGGER TRIGGER_CATALOG TRIGGER_NAME TRIGGER_SCHEMA TRIM
|
78
|
+
TRUE TRUNCATE TRUSTED TYPE UNCOMMITTED UNDER UNENCRYPTED UNION
|
79
|
+
UNIQUE UNKNOWN UNLISTEN UNNAMED UNNEST UNTIL UPDATE UPPER
|
80
|
+
USAGE USER USER_DEFINED_TYPE_CATALOG USER_DEFINED_TYPE_NAME
|
81
|
+
USER_DEFINED_TYPE_SCHEMA USING VACUUM VALID VALIDATOR VALUES
|
82
|
+
VARIABLE VERBOSE VERSION VIEW VOLATILE WHEN WHENEVER WHERE
|
83
|
+
WITH WITHOUT WORK WRITE YEAR ZONE
|
84
|
+
)
|
85
|
+
end
|
86
|
+
|
87
|
+
state :root do
|
88
|
+
rule /\s+/m, 'Text'
|
89
|
+
rule /--.*?\n/, 'Comment.Single'
|
90
|
+
rule %r(/\*), 'Comment.Multiline', :multiline_comments
|
91
|
+
rule /\d+/, 'Literal.Number.Integer'
|
92
|
+
rule /'/, 'Literal.String.Single', :single_string
|
93
|
+
rule /"/, 'Name.Variable', :double_string
|
94
|
+
rule /`/, 'Name.Variable', :backtick
|
95
|
+
|
96
|
+
rule /\w[\w\d]*/ do |m|
|
97
|
+
if self.class.keywords.include? m[0].upcase
|
98
|
+
token 'Keyword'
|
99
|
+
else
|
100
|
+
token 'Name'
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
rule %r([+*/<>=~!@#%^&|?^-]), 'Operator'
|
105
|
+
rule /[;:()\[\],.]/, 'Punctuation'
|
106
|
+
end
|
107
|
+
|
108
|
+
state :multiline_comments do
|
109
|
+
rule %r(/[*]), 'Comment.Multiline', :multiline_comments
|
110
|
+
rule %r([*]/), 'Comment.Multiline', :pop!
|
111
|
+
rule %r([^/*]+), 'Comment.Multiline'
|
112
|
+
rule %r([/*]), 'Comment.Multiline'
|
113
|
+
end
|
114
|
+
|
115
|
+
state :backtick do
|
116
|
+
rule /\\./, 'Literal.String.Escape'
|
117
|
+
rule /``/, 'Literal.String.Escape'
|
118
|
+
rule /`/, 'Name.Variable', :pop!
|
119
|
+
rule /[^\\`]+/, 'Name.Variable'
|
120
|
+
end
|
121
|
+
|
122
|
+
state :single_string do
|
123
|
+
rule /\\./, 'Literal.String.Escape'
|
124
|
+
rule /''/, 'Literal.String.Escape'
|
125
|
+
rule /'/, 'Literal.String.Single', :pop!
|
126
|
+
rule /[^\\']+/, 'Literal.String.Single'
|
127
|
+
end
|
128
|
+
|
129
|
+
state :double_string do
|
130
|
+
rule /\\./, 'Literal.String.Escape'
|
131
|
+
rule /""/, 'Literal.String.Escape'
|
132
|
+
rule /"/, 'Name.Variable', :pop!
|
133
|
+
rule /[^\\"]+/, 'Name.Variable'
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
data/lib/rouge/lexers/tcl.rb
CHANGED
data/lib/rouge/lexers/tex.rb
CHANGED
data/lib/rouge/lexers/text.rb
CHANGED
@@ -0,0 +1,98 @@
|
|
1
|
+
module Rouge
|
2
|
+
module Lexers
|
3
|
+
class VimL < RegexLexer
|
4
|
+
desc "VimL, the scripting language for the Vim editor (vim.org)"
|
5
|
+
tag 'viml'
|
6
|
+
aliases 'vim', 'vimscript', 'ex'
|
7
|
+
filenames '*.vim', '*.vba', '.vimrc', '.exrc', '.gvimrc',
|
8
|
+
'_vimrc', '_exrc', '_gvimrc' # _ names for windows
|
9
|
+
|
10
|
+
mimetypes 'text/x-vim'
|
11
|
+
|
12
|
+
def self.keywords
|
13
|
+
load Pathname.new(__FILE__).dirname.join('viml/keywords.rb')
|
14
|
+
self.keywords
|
15
|
+
end
|
16
|
+
|
17
|
+
state :root do
|
18
|
+
rule /^(\s*)(".*?)$/ do
|
19
|
+
group 'Text'; group 'Comment'
|
20
|
+
end
|
21
|
+
|
22
|
+
rule /^\s*\\/, 'Literal.String.Escape'
|
23
|
+
|
24
|
+
rule /[ \t]+/, 'Text'
|
25
|
+
|
26
|
+
# TODO: regexes can have other delimiters
|
27
|
+
rule %r(/(\\\\|\\/|[^\n/])*/), 'Literal.String.Regex'
|
28
|
+
rule %r("(\\\\|\\"|[^\n"])*"), 'Literal.String.Double'
|
29
|
+
rule %r('(\\\\|\\'|[^\n'])*'), 'Literal.String.Single'
|
30
|
+
|
31
|
+
# if it's not a string, it's a comment.
|
32
|
+
rule /(?<=\s)"[^-:.%#=*].*?$/, 'Comment'
|
33
|
+
|
34
|
+
rule /-?\d+/, 'Literal.Number'
|
35
|
+
rule /#[0-9a-f]{6}/i, 'Literal.Number.Hex'
|
36
|
+
rule /^:/, 'Punctuation'
|
37
|
+
rule /[():<>+=!\[\]{}\|,~.-]/, 'Punctuation'
|
38
|
+
rule /\b(let|if|else|endif|elseif|fun|function|endfunction)\b/,
|
39
|
+
'Keyword'
|
40
|
+
|
41
|
+
rule /\b(NONE|bold|italic|underline|dark|light)\b/, 'Name.Builtin'
|
42
|
+
|
43
|
+
rule /[absg]:\w+\b/, 'Name.Variable'
|
44
|
+
rule /\b\w+\b/, 'Postprocess.Name'
|
45
|
+
|
46
|
+
# no errors in VimL!
|
47
|
+
rule /./m, 'Text'
|
48
|
+
end
|
49
|
+
|
50
|
+
postprocess 'Postprocess.Name' do |tok, name|
|
51
|
+
keywords = self.class.keywords
|
52
|
+
|
53
|
+
if mapping_contains?(keywords[:command], name)
|
54
|
+
token 'Keyword', name
|
55
|
+
elsif mapping_contains?(keywords[:option], name)
|
56
|
+
token 'Name.Builtin', name
|
57
|
+
elsif mapping_contains?(keywords[:auto], name)
|
58
|
+
token 'Name.Builtin', name
|
59
|
+
else
|
60
|
+
token 'Text', name
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def mapping_contains?(mapping, word)
|
65
|
+
shortest, longest = find_likely_mapping(mapping, word)
|
66
|
+
|
67
|
+
word.start_with?(shortest) and longest.start_with?(word)
|
68
|
+
end
|
69
|
+
|
70
|
+
# binary search through the mappings to find the one that's likely
|
71
|
+
# to actually work.
|
72
|
+
def find_likely_mapping(mapping, word)
|
73
|
+
min = 0
|
74
|
+
max = mapping.size
|
75
|
+
|
76
|
+
until max == min
|
77
|
+
mid = (max + min) / 2
|
78
|
+
|
79
|
+
cmp, _ = mapping[mid]
|
80
|
+
|
81
|
+
case word <=> cmp
|
82
|
+
when 1
|
83
|
+
# too low
|
84
|
+
min = mid + 1
|
85
|
+
when -1
|
86
|
+
# too high
|
87
|
+
max = mid
|
88
|
+
when 0
|
89
|
+
# just right, abort!
|
90
|
+
return mapping[mid]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
mapping[max - 1]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# DO NOT EDIT: automatically generated by `rake vimkeywords`.
|
2
|
+
# see tasks/vim.rake for more info.
|
3
|
+
module Rouge
|
4
|
+
module Lexers
|
5
|
+
class VimL
|
6
|
+
def self.keywords
|
7
|
+
@keywords ||= {:command=>[[":p", ":p"], ["Allargs", "Allargs"], ["DiffOrig", "DiffOrig"], ["Error", "Error"], ["Man", "Man"], ["MyCommand", "MyCommand"], ["Mycmd", "Mycmd"], ["N", "N"], ["N", "Next"], ["P", "P"], ["P", "Print"], ["Ren", "Ren"], ["Rena", "Rena"], ["Renu", "Renu"], ["TOhtml", "TOhtml"], ["X", "X"], ["XMLent", "XMLent"], ["XMLns", "XMLns"], ["a", "a"], ["ab", "ab"], ["abc", "abclear"], ["abo", "aboveleft"], ["al", "all"], ["ar", "ar"], ["ar", "args"], ["arga", "argadd"], ["argd", "argdelete"], ["argdo", "argdo"], ["arge", "argedit"], ["argg", "argglobal"], ["argl", "arglocal"], ["argu", "argument"], ["as", "ascii"], ["au", "au"], ["b", "buffer"], ["bN", "bNext"], ["ba", "ball"], ["bad", "badd"], ["bar", "bar"], ["bd", "bdelete"], ["bel", "belowright"], ["bf", "bfirst"], ["bl", "blast"], ["bm", "bmodified"], ["bn", "bnext"], ["bo", "botright"], ["bp", "bprevious"], ["br", "br"], ["br", "brewind"], ["brea", "break"], ["breaka", "breakadd"], ["breakd", "breakdel"], ["breakl", "breaklist"], ["bro", "browse"], ["browseset", "browseset"], ["bu", "bu"], ["buf", "buf"], ["bufdo", "bufdo"], ["buffers", "buffers"], ["bun", "bunload"], ["bw", "bwipeout"], ["c", "c"], ["c", "change"], ["cN", "cN"], ["cN", "cNext"], ["cNf", "cNf"], ["cNf", "cNfile"], ["cabc", "cabclear"], ["cad", "cad"], ["cad", "caddexpr"], ["caddb", "caddbuffer"], ["caddf", "caddfile"], ["cal", "call"], ["cat", "catch"], ["cb", "cbuffer"], ["cc", "cc"], ["ccl", "cclose"], ["cd", "cd"], ["ce", "center"], ["cex", "cexpr"], ["cf", "cfile"], ["cfir", "cfirst"], ["cg", "cgetfile"], ["cgetb", "cgetbuffer"], ["cgete", "cgetexpr"], ["changes", "changes"], ["chd", "chdir"], ["che", "checkpath"], ["checkt", "checktime"], ["cl", "cl"], ["cl", "clist"], ["cla", "clast"], ["clo", "close"], ["cmapc", "cmapclear"], ["cmdname", "cmdname"], ["cn", "cn"], ["cn", "cnext"], ["cnew", "cnewer"], ["cnf", "cnf"], ["cnf", "cnfile"], ["co", "copy"], ["col", "colder"], ["colo", "colorscheme"], ["com", "com"], ["comc", "comclear"], ["comment", "comment"], ["comp", "compiler"], ["con", "con"], ["con", "continue"], ["conf", "confirm"], ["cope", "copen"], ["count", "count"], ["cp", "cprevious"], ["cpf", "cpfile"], ["cq", "cquit"], ["cr", "crewind"], ["cs", "cs"], ["cscope", "cscope"], ["cstag", "cstag"], ["cuna", "cunabbrev"], ["cw", "cwindow"], ["d", "d"], ["d", "delete"], ["de", "de"], ["debug", "debug"], ["debugg", "debuggreedy"], ["del", "del"], ["delc", "delcommand"], ["delf", "delf"], ["delf", "delfunction"], ["delm", "delmarks"], ["di", "di"], ["di", "display"], ["diffg", "diffget"], ["diffo", "diffo"], ["diffoff", "diffoff"], ["diffp", "diffp"], ["diffpatch", "diffpatch"], ["diffpu", "diffput"], ["diffsplit", "diffsplit"], ["difft", "difft"], ["diffthis", "diffthis"], ["diffu", "diffupdate"], ["dig", "dig"], ["dig", "digraphs"], ["dj", "djump"], ["dl", "dlist"], ["do", "do"], ["doau", "doau"], ["dr", "drop"], ["ds", "dsearch"], ["dsp", "dsplit"], ["dwim", "dwim"], ["e", "e"], ["e", "edit"], ["e:e", "e:e"], ["e:e:e", "e:e:e"], ["e:e:r", "e:e:r"], ["ea", "ea"], ["earlier", "earlier"], ["ec", "ec"], ["echoe", "echoerr"], ["echom", "echomsg"], ["echon", "echon"], ["el", "else"], ["elsei", "elseif"], ["em", "emenu"], ["emenu", "emenu"], ["en", "en"], ["en", "endif"], ["endf", "endf"], ["endf", "endfunction"], ["endfo", "endfor"], ["endfun", "endfun"], ["endt", "endtry"], ["endw", "endwhile"], ["ene", "enew"], ["ex", "ex"], ["exi", "exit"], ["exu", "exusage"], ["f", "f"], ["f", "file"], ["filename", "filename"], ["files", "files"], ["filet", "filet"], ["filetype", "filetype"], ["fin", "fin"], ["fin", "find"], ["fina", "finally"], ["fini", "finish"], ["fir", "first"], ["fix", "fixdel"], ["fo", "fold"], ["foldc", "foldclose"], ["foldd", "folddoopen"], ["folddoc", "folddoclosed"], ["foldo", "foldopen"], ["for", "for"], ["fu", "fu"], ["fu", "function"], ["fun", "fun"], ["g", "g"], ["get", "get"], ["go", "goto"], ["gr", "grep"], ["grepa", "grepadd"], ["gs", "gs"], ["gui", "gui"], ["gvim", "gvim"], ["h", "h"], ["h", "help"], ["ha", "hardcopy"], ["helpf", "helpfind"], ["helpg", "helpgrep"], ["helpt", "helptags"], ["hi", "hi"], ["hid", "hide"], ["his", "history"], ["i", "i"], ["ia", "ia"], ["iabc", "iabclear"], ["if", "if"], ["ij", "ijump"], ["il", "ilist"], ["imapc", "imapclear"], ["in", "in"], ["index", "index"], ["intro", "intro"], ["is", "isearch"], ["isp", "isplit"], ["iuna", "iunabbrev"], ["j", "join"], ["ju", "jumps"], ["k", "k"], ["kee", "keepmarks"], ["keepa", "keepa"], ["keepalt", "keepalt"], ["keepj", "keepjumps"], ["l", "l"], ["l", "list"], ["lN", "lN"], ["lN", "lNext"], ["lNf", "lNf"], ["lNf", "lNfile"], ["la", "la"], ["la", "last"], ["lad", "lad"], ["lad", "laddexpr"], ["laddb", "laddbuffer"], ["laddf", "laddfile"], ["lan", "lan"], ["lan", "language"], ["lat", "lat"], ["later", "later"], ["lb", "lbuffer"], ["lc", "lcd"], ["lch", "lchdir"], ["lcl", "lclose"], ["lcs", "lcs"], ["lcscope", "lcscope"], ["le", "left"], ["lefta", "leftabove"], ["let", "let"], ["lex", "lexpr"], ["lf", "lfile"], ["lfir", "lfirst"], ["lg", "lgetfile"], ["lgetb", "lgetbuffer"], ["lgete", "lgetexpr"], ["lgr", "lgrep"], ["lgrepa", "lgrepadd"], ["lh", "lhelpgrep"], ["ll", "ll"], ["lla", "llast"], ["lli", "llist"], ["lmak", "lmake"], ["lmapc", "lmapclear"], ["lne", "lne"], ["lne", "lnext"], ["lnew", "lnewer"], ["lnf", "lnf"], ["lnf", "lnfile"], ["lo", "lo"], ["lo", "loadview"], ["loadk", "loadk"], ["loadkeymap", "loadkeymap"], ["loc", "lockmarks"], ["locale", "locale"], ["lockv", "lockvar"], ["lol", "lolder"], ["lop", "lopen"], ["lp", "lprevious"], ["lpf", "lpfile"], ["lr", "lrewind"], ["ls", "ls"], ["lt", "ltag"], ["lua", "lua"], ["luado", "luado"], ["luafile", "luafile"], ["lv", "lvimgrep"], ["lvimgrepa", "lvimgrepadd"], ["lw", "lwindow"], ["m", "move"], ["ma", "ma"], ["ma", "mark"], ["main", "main"], ["main", "main"], ["mak", "make"], ["marks", "marks"], ["mat", "match"], ["menut", "menut"], ["menut", "menutranslate"], ["mes", "mes"], ["messages", "messages"], ["mk", "mk"], ["mk", "mkexrc"], ["mkdir", "mkdir"], ["mks", "mksession"], ["mksp", "mkspell"], ["mkv", "mkv"], ["mkv", "mkvimrc"], ["mkvie", "mkview"], ["mo", "mo"], ["mod", "mode"], ["mv", "mv"], ["mz", "mz"], ["mz", "mzscheme"], ["mzf", "mzfile"], ["n", "n"], ["n", "n"], ["n", "next"], ["nb", "nbkey"], ["nbc", "nbclose"], ["nbs", "nbstart"], ["ne", "ne"], ["new", "new"], ["nkf", "nkf"], ["nmapc", "nmapclear"], ["noa", "noa"], ["noautocmd", "noautocmd"], ["noh", "nohlsearch"], ["nu", "number"], ["o", "o"], ["o", "open"], ["ol", "oldfiles"], ["omapc", "omapclear"], ["on", "only"], ["opt", "options"], ["ownsyntax", "ownsyntax"], ["p", "p"], ["p", "print"], ["p:", "p:"], ["p:", "p:"], ["p:gs", "p:gs"], ["p:h", "p:h"], ["p:h:h", "p:h:h"], ["p:r", "p:r"], ["p:t", "p:t"], ["pat", "pat"], ["pat", "pat"], ["pc", "pclose"], ["pe", "pe"], ["pe", "perl"], ["ped", "pedit"], ["perld", "perldo"], ["po", "pop"], ["popu", "popu"], ["popu", "popup"], ["pp", "ppop"], ["pr", "pr"], ["pre", "preserve"], ["prev", "previous"], ["pro", "pro"], ["prof", "profile"], ["profd", "profdel"], ["promptf", "promptfind"], ["promptr", "promptrepl"], ["ps", "psearch"], ["ptN", "ptN"], ["ptN", "ptNext"], ["pta", "ptag"], ["ptf", "ptfirst"], ["ptj", "ptjump"], ["ptl", "ptlast"], ["ptn", "ptn"], ["ptn", "ptnext"], ["ptp", "ptprevious"], ["ptr", "ptrewind"], ["pts", "ptselect"], ["pu", "put"], ["pw", "pwd"], ["py", "py"], ["py", "python"], ["py3", "py3"], ["py3", "py3"], ["py3file", "py3file"], ["pyf", "pyfile"], ["python3", "python3"], ["q", "q"], ["q", "quit"], ["qa", "qall"], ["quita", "quitall"], ["quote", "quote"], ["r", "r"], ["r", "r"], ["r", "read"], ["r:e", "r:e"], ["r:r", "r:r"], ["r:r:r", "r:r:r"], ["re", "re"], ["rec", "recover"], ["red", "red"], ["red", "redo"], ["redi", "redir"], ["redr", "redraw"], ["redraws", "redrawstatus"], ["reg", "registers"], ["res", "resize"], ["ret", "retab"], ["retu", "return"], ["rew", "rewind"], ["ri", "right"], ["rightb", "rightbelow"], ["ru", "ru"], ["ru", "runtime"], ["rub", "ruby"], ["rubyd", "rubydo"], ["rubyf", "rubyfile"], ["rundo", "rundo"], ["rv", "rviminfo"], ["s", "s"], ["s", "s"], ["s", "s"], ["s", "s"], ["sN", "sNext"], ["sa", "sargument"], ["sal", "sall"], ["san", "sandbox"], ["sav", "saveas"], ["sb", "sbuffer"], ["sbN", "sbNext"], ["sba", "sball"], ["sbf", "sbfirst"], ["sbl", "sblast"], ["sbm", "sbmodified"], ["sbn", "sbnext"], ["sbp", "sbprevious"], ["sbr", "sbrewind"], ["scrip", "scrip"], ["scrip", "scriptnames"], ["scripte", "scriptencoding"], ["scs", "scs"], ["scscope", "scscope"], ["se", "set"], ["setf", "setfiletype"], ["setg", "setglobal"], ["setl", "setlocal"], ["sf", "sfind"], ["sfir", "sfirst"], ["sh", "shell"], ["si", "si"], ["sig", "sig"], ["sign", "sign"], ["sil", "silent"], ["sim", "simalt"], ["sl", "sl"], ["sl", "sleep"], ["sla", "slast"], ["sm", "smagic"], ["sm", "smap"], ["sme", "sme"], ["smenu", "smenu"], ["sn", "snext"], ["sni", "sniff"], ["sno", "snomagic"], ["snoreme", "snoreme"], ["snoremenu", "snoremenu"], ["so", "so"], ["so", "source"], ["sor", "sort"], ["sp", "split"], ["spe", "spe"], ["spe", "spellgood"], ["spelld", "spelldump"], ["spelli", "spellinfo"], ["spellr", "spellrepall"], ["spellu", "spellundo"], ["spellw", "spellwrong"], ["spr", "sprevious"], ["sre", "srewind"], ["st", "st"], ["st", "stop"], ["sta", "stag"], ["star", "star"], ["star", "startinsert"], ["start", "start"], ["startg", "startgreplace"], ["startr", "startreplace"], ["stj", "stjump"], ["stopi", "stopinsert"], ["sts", "stselect"], ["sub", "sub"], ["sub", "sub"], ["sun", "sunhide"], ["sunme", "sunme"], ["sunmenu", "sunmenu"], ["sus", "suspend"], ["sv", "sview"], ["sw", "swapname"], ["sy", "sy"], ["syn", "syn"], ["sync", "sync"], ["syncbind", "syncbind"], ["synlist", "synlist"], ["t", "t"], ["t:r", "t:r"], ["tN", "tN"], ["tN", "tNext"], ["ta", "ta"], ["ta", "tag"], ["tab", "tab"], ["tabN", "tabN"], ["tabN", "tabNext"], ["tabc", "tabclose"], ["tabd", "tabdo"], ["tabe", "tabedit"], ["tabf", "tabfind"], ["tabfir", "tabfirst"], ["tabl", "tablast"], ["tabm", "tabmove"], ["tabn", "tabnext"], ["tabnew", "tabnew"], ["tabo", "tabonly"], ["tabp", "tabprevious"], ["tabr", "tabrewind"], ["tabs", "tabs"], ["tags", "tags"], ["tc", "tcl"], ["tcld", "tcldo"], ["tclf", "tclfile"], ["te", "tearoff"], ["tf", "tfirst"], ["th", "throw"], ["tj", "tjump"], ["tl", "tlast"], ["tm", "tm"], ["tm", "tmenu"], ["tn", "tn"], ["tn", "tnext"], ["to", "topleft"], ["tp", "tprevious"], ["tr", "tr"], ["tr", "trewind"], ["try", "try"], ["ts", "tselect"], ["tu", "tu"], ["tu", "tunmenu"], ["u", "u"], ["u", "undo"], ["un", "un"], ["una", "unabbreviate"], ["undoj", "undojoin"], ["undol", "undolist"], ["unh", "unhide"], ["unl", "unl"], ["unlo", "unlockvar"], ["uns", "unsilent"], ["up", "update"], ["v", "v"], ["ve", "ve"], ["ve", "version"], ["verb", "verbose"], ["version", "version"], ["version", "version"], ["vert", "vertical"], ["vi", "vi"], ["vi", "visual"], ["vie", "view"], ["vim", "vimgrep"], ["vimgrepa", "vimgrepadd"], ["viu", "viusage"], ["vmapc", "vmapclear"], ["vne", "vnew"], ["vs", "vsplit"], ["w", "w"], ["w", "write"], ["wN", "wNext"], ["wa", "wall"], ["wh", "while"], ["win", "win"], ["win", "winsize"], ["winc", "wincmd"], ["windo", "windo"], ["winp", "winpos"], ["wn", "wnext"], ["wp", "wprevious"], ["wq", "wq"], ["wqa", "wqall"], ["ws", "wsverb"], ["wundo", "wundo"], ["wv", "wviminfo"], ["x", "x"], ["x", "xit"], ["xa", "xall"], ["xmapc", "xmapclear"], ["xme", "xme"], ["xmenu", "xmenu"], ["xnoreme", "xnoreme"], ["xnoremenu", "xnoremenu"], ["xterm", "xterm"], ["xunme", "xunme"], ["xunmenu", "xunmenu"], ["xwininfo", "xwininfo"], ["y", "yank"]], :option=>[], :auto=>[["BufAdd", "BufAdd"], ["BufCreate", "BufCreate"], ["BufDelete", "BufDelete"], ["BufEnter", "BufEnter"], ["BufFilePost", "BufFilePost"], ["BufFilePre", "BufFilePre"], ["BufHidden", "BufHidden"], ["BufLeave", "BufLeave"], ["BufNew", "BufNew"], ["BufNewFile", "BufNewFile"], ["BufRead", "BufRead"], ["BufReadCmd", "BufReadCmd"], ["BufReadPost", "BufReadPost"], ["BufReadPre", "BufReadPre"], ["BufUnload", "BufUnload"], ["BufWinEnter", "BufWinEnter"], ["BufWinLeave", "BufWinLeave"], ["BufWipeout", "BufWipeout"], ["BufWrite", "BufWrite"], ["BufWriteCmd", "BufWriteCmd"], ["BufWritePost", "BufWritePost"], ["BufWritePre", "BufWritePre"], ["Cmd", "Cmd"], ["CmdwinEnter", "CmdwinEnter"], ["CmdwinLeave", "CmdwinLeave"], ["ColorScheme", "ColorScheme"], ["CursorHold", "CursorHold"], ["CursorHoldI", "CursorHoldI"], ["CursorMoved", "CursorMoved"], ["CursorMovedI", "CursorMovedI"], ["EncodingChanged", "EncodingChanged"], ["FileAppendCmd", "FileAppendCmd"], ["FileAppendPost", "FileAppendPost"], ["FileAppendPre", "FileAppendPre"], ["FileChangedRO", "FileChangedRO"], ["FileChangedShell", "FileChangedShell"], ["FileChangedShellPost", "FileChangedShellPost"], ["FileEncoding", "FileEncoding"], ["FileReadCmd", "FileReadCmd"], ["FileReadPost", "FileReadPost"], ["FileReadPre", "FileReadPre"], ["FileType", "FileType"], ["FileWriteCmd", "FileWriteCmd"], ["FileWritePost", "FileWritePost"], ["FileWritePre", "FileWritePre"], ["FilterReadPost", "FilterReadPost"], ["FilterReadPre", "FilterReadPre"], ["FilterWritePost", "FilterWritePost"], ["FilterWritePre", "FilterWritePre"], ["FocusGained", "FocusGained"], ["FocusLost", "FocusLost"], ["FuncUndefined", "FuncUndefined"], ["GUIEnter", "GUIEnter"], ["GUIFailed", "GUIFailed"], ["InsertChange", "InsertChange"], ["InsertCharPre", "InsertCharPre"], ["InsertEnter", "InsertEnter"], ["InsertLeave", "InsertLeave"], ["MenuPopup", "MenuPopup"], ["QuickFixCmdPost", "QuickFixCmdPost"], ["QuickFixCmdPre", "QuickFixCmdPre"], ["RemoteReply", "RemoteReply"], ["SessionLoadPost", "SessionLoadPost"], ["ShellCmdPost", "ShellCmdPost"], ["ShellFilterPost", "ShellFilterPost"], ["SourceCmd", "SourceCmd"], ["SourcePre", "SourcePre"], ["SpellFileMissing", "SpellFileMissing"], ["StdinReadPost", "StdinReadPost"], ["StdinReadPre", "StdinReadPre"], ["SwapExists", "SwapExists"], ["Syntax", "Syntax"], ["TabEnter", "TabEnter"], ["TabLeave", "TabLeave"], ["TermChanged", "TermChanged"], ["TermResponse", "TermResponse"], ["User", "User"], ["UserGettingBored", "UserGettingBored"], ["VimEnter", "VimEnter"], ["VimLeave", "VimLeave"], ["VimLeavePre", "VimLeavePre"], ["VimResized", "VimResized"], ["WinEnter", "WinEnter"], ["WinLeave", "WinLeave"], ["event", "event"]]}
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/rouge/lexers/xml.rb
CHANGED
data/lib/rouge/lexers/yaml.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Lexers
|
3
3
|
class YAML < RegexLexer
|
4
|
+
desc "Yaml Ain't Markup Language (yaml.org)"
|
4
5
|
tag 'yaml'
|
5
6
|
aliases 'yml'
|
6
7
|
|
@@ -123,7 +124,7 @@ module Rouge
|
|
123
124
|
# indented line in the block context
|
124
125
|
state :block_line do
|
125
126
|
# line end
|
126
|
-
|
127
|
+
rule /[ ]*(?=#|$)/, 'Text', :pop!
|
127
128
|
rule /[ ]+/, 'Text'
|
128
129
|
# tags, anchors, and aliases
|
129
130
|
mixin :descriptors
|
@@ -145,8 +146,8 @@ module Rouge
|
|
145
146
|
|
146
147
|
# a tag in the form '!', '!suffix' or '!handle!suffix'
|
147
148
|
rule %r(
|
148
|
-
|
149
|
-
(
|
149
|
+
(?:![\w-]+)? # handle
|
150
|
+
!(?:[\w;/?:@&=+$,.!~*\'()\[\]%-]*) # suffix
|
150
151
|
)x, 'Keyword.Type'
|
151
152
|
|
152
153
|
# an anchor
|
data/lib/rouge/regex_lexer.rb
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
module Rouge
|
2
|
+
# @abstract
|
3
|
+
# A stateful lexer that uses sets of regular expressions to
|
4
|
+
# tokenize a string. Most lexers are instances of RegexLexer.
|
2
5
|
class RegexLexer < Lexer
|
6
|
+
# A rule is a tuple of a regular expression to test, and a callback
|
7
|
+
# to perform if the test succeeds.
|
8
|
+
#
|
9
|
+
# @see StateDSL#rule
|
3
10
|
class Rule
|
4
11
|
attr_reader :callback
|
5
|
-
attr_reader :next_state
|
6
12
|
attr_reader :re
|
7
|
-
def initialize(re, callback
|
13
|
+
def initialize(re, callback)
|
8
14
|
@re = re
|
9
15
|
@callback = callback
|
10
|
-
@next_state = next_state
|
11
16
|
end
|
12
17
|
|
13
18
|
def inspect
|
@@ -15,22 +20,17 @@ module Rouge
|
|
15
20
|
end
|
16
21
|
end
|
17
22
|
|
23
|
+
# a State is a named set of rules that can be tested for or
|
24
|
+
# mixed in.
|
25
|
+
#
|
26
|
+
# @see RegexLexer.state
|
18
27
|
class State
|
19
28
|
attr_reader :name
|
20
|
-
def initialize(
|
21
|
-
@lexer_class = lexer_class
|
29
|
+
def initialize(name, &defn)
|
22
30
|
@name = name
|
23
31
|
@defn = defn
|
24
32
|
end
|
25
33
|
|
26
|
-
def relative_state(state_name=nil, &b)
|
27
|
-
if state_name
|
28
|
-
@lexer_class.get_state(state_name)
|
29
|
-
else
|
30
|
-
State.new(@lexer_class, b.inspect, &b).load!
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
34
|
def rules
|
35
35
|
@rules ||= []
|
36
36
|
end
|
@@ -49,6 +49,25 @@ module Rouge
|
|
49
49
|
@rules = rules
|
50
50
|
end
|
51
51
|
|
52
|
+
# Define a new rule for this state.
|
53
|
+
#
|
54
|
+
# @overload rule(re, token, next_state=nil)
|
55
|
+
# @overload rule(re, &callback)
|
56
|
+
#
|
57
|
+
# @param [Regexp] re
|
58
|
+
# a regular expression for this rule to test.
|
59
|
+
# @param [String] tok
|
60
|
+
# the token type to yield if `re` matches.
|
61
|
+
# @param [#to_s] next_state
|
62
|
+
# (optional) a state to push onto the stack if `re` matches.
|
63
|
+
# If `next_state` is `:pop!`, the state stack will be popped
|
64
|
+
# instead.
|
65
|
+
# @param [Proc] callback
|
66
|
+
# a block that will be evaluated in the context of the lexer
|
67
|
+
# if `re` matches. This block has access to a number of lexer
|
68
|
+
# methods, including {RegexLexer#push}, {RegexLexer#pop!},
|
69
|
+
# {RegexLexer#token}, and {RegexLexer#delegate}. The first
|
70
|
+
# argument can be used to access the match groups.
|
52
71
|
def rule(re, tok=nil, next_state=nil, &callback)
|
53
72
|
if block_given?
|
54
73
|
next_state = tok
|
@@ -66,22 +85,29 @@ module Rouge
|
|
66
85
|
end
|
67
86
|
end
|
68
87
|
|
69
|
-
rules << Rule.new(re, callback
|
88
|
+
rules << Rule.new(re, callback)
|
70
89
|
end
|
71
90
|
|
91
|
+
# Mix in the rules from another state into this state. The rules
|
92
|
+
# from the mixed-in state will be tried in order before moving on
|
93
|
+
# to the rest of the rules in this state.
|
72
94
|
def mixin(lexer_name)
|
73
95
|
rules << lexer_name.to_s
|
74
96
|
end
|
75
97
|
end
|
76
98
|
|
99
|
+
# The states hash for this lexer.
|
100
|
+
# @see state
|
77
101
|
def self.states
|
78
102
|
@states ||= {}
|
79
103
|
end
|
80
104
|
|
81
|
-
|
105
|
+
# The routines to run at the beginning of a fresh lex.
|
106
|
+
# @see start
|
82
107
|
def self.start_procs
|
83
108
|
@start_procs ||= InheritableList.new(superclass.start_procs)
|
84
109
|
end
|
110
|
+
@start_procs = []
|
85
111
|
|
86
112
|
# Specify an action to be run every fresh lex.
|
87
113
|
#
|
@@ -91,20 +117,33 @@ module Rouge
|
|
91
117
|
start_procs << b
|
92
118
|
end
|
93
119
|
|
120
|
+
# Specify a filter to be applied as the lexer yields tokens.
|
121
|
+
#
|
122
|
+
# @param toktype
|
123
|
+
# The token type to postprocess
|
124
|
+
# @yield [tok, val]
|
125
|
+
# The token and the matched value. The block will be evaluated in
|
126
|
+
# the context of the lexer, and it must yield an equivalent
|
127
|
+
# token/value pair, usually by calling #token.
|
94
128
|
def self.postprocess(toktype, &b)
|
95
129
|
postprocesses << [Token[toktype], b]
|
96
130
|
end
|
97
131
|
|
98
|
-
|
132
|
+
# where the postprocess blocks are stored.
|
133
|
+
# @see postprocess
|
99
134
|
def self.postprocesses
|
100
135
|
@postprocesses ||= InheritableList.new(superclass.postprocesses)
|
101
136
|
end
|
137
|
+
@postprocesses = []
|
102
138
|
|
139
|
+
# Define a new state for this lexer with the given name.
|
140
|
+
# The block will be evaluated in the context of a {StateDSL}.
|
103
141
|
def self.state(name, &b)
|
104
142
|
name = name.to_s
|
105
|
-
states[name] = State.new(
|
143
|
+
states[name] = State.new(name, &b)
|
106
144
|
end
|
107
145
|
|
146
|
+
# @private
|
108
147
|
def self.get_state(name)
|
109
148
|
return name if name.is_a? State
|
110
149
|
|
@@ -113,41 +152,55 @@ module Rouge
|
|
113
152
|
state.load!
|
114
153
|
end
|
115
154
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
def get_state(name)
|
121
|
-
self.class.get_state(name)
|
155
|
+
# @private
|
156
|
+
def get_state(state_name)
|
157
|
+
self.class.get_state(state_name)
|
122
158
|
end
|
123
159
|
|
160
|
+
# The state stack. This is initially the single state `[:root]`.
|
161
|
+
# It is an error for this stack to be empty.
|
162
|
+
# @see #state
|
124
163
|
def stack
|
125
164
|
@stack ||= [get_state(:root)]
|
126
165
|
end
|
127
166
|
|
167
|
+
# The current state - i.e. one on top of the state stack.
|
168
|
+
#
|
169
|
+
# NB: if the state stack is empty, this will throw an error rather
|
170
|
+
# than returning nil.
|
128
171
|
def state
|
129
172
|
stack.last or raise 'empty stack!'
|
130
173
|
end
|
131
174
|
|
175
|
+
# reset this lexer to its initial state. This runs all of the
|
176
|
+
# start_procs.
|
132
177
|
def reset!
|
133
|
-
@
|
178
|
+
@stack = nil
|
134
179
|
|
135
180
|
self.class.start_procs.each do |pr|
|
136
181
|
instance_eval(&pr)
|
137
182
|
end
|
138
183
|
end
|
139
184
|
|
185
|
+
# This implements the lexer protocol, by yielding [token, value] pairs.
|
186
|
+
#
|
187
|
+
# The process for lexing works as follows, until the stream is empty:
|
188
|
+
#
|
189
|
+
# 1. We look at the state on top of the stack (which by default is
|
190
|
+
# `[:root]`).
|
191
|
+
# 2. Each rule in that state is tried until one is successful. If one
|
192
|
+
# is found, that rule's callback is evaluated - which may yield
|
193
|
+
# tokens and manipulate the state stack. Otherwise, one character
|
194
|
+
# is consumed with an `'Error'` token, and we continue at (1.)
|
195
|
+
#
|
196
|
+
# @see #step #step (where (2.) is implemented)
|
140
197
|
def stream_tokens(stream, &b)
|
141
198
|
stream_without_postprocessing(stream) do |tok, val|
|
142
199
|
_, processor = self.class.postprocesses.find { |t, _| t == tok }
|
143
200
|
|
144
201
|
if processor
|
145
|
-
|
146
|
-
Enumerator.new do |y|
|
147
|
-
@output_stream = y
|
202
|
+
with_output_stream(b) do
|
148
203
|
instance_exec(tok, val, &processor)
|
149
|
-
end.each do |newtok, newval|
|
150
|
-
yield Token[newtok], newval
|
151
204
|
end
|
152
205
|
else
|
153
206
|
yield tok, val
|
@@ -170,6 +223,11 @@ module Rouge
|
|
170
223
|
end
|
171
224
|
end
|
172
225
|
|
226
|
+
# Runs one step of the lex. Rules in the current state are tried
|
227
|
+
# until one matches, at which point its callback is called.
|
228
|
+
#
|
229
|
+
# @return true if a rule was tried successfully
|
230
|
+
# @return false otherwise.
|
173
231
|
def step(state, stream, &b)
|
174
232
|
state.rules.each do |rule|
|
175
233
|
return true if run_rule(rule, stream, &b)
|
@@ -178,6 +236,7 @@ module Rouge
|
|
178
236
|
false
|
179
237
|
end
|
180
238
|
|
239
|
+
# @private
|
181
240
|
def run_rule(rule, stream, &b)
|
182
241
|
case rule
|
183
242
|
when String
|
@@ -190,26 +249,26 @@ module Rouge
|
|
190
249
|
scan(stream, rule.re) do
|
191
250
|
debug { " got #{stream[0].inspect}" }
|
192
251
|
|
193
|
-
run_callback(stream,
|
194
|
-
debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" }
|
195
|
-
b.call(Token[tok], res)
|
196
|
-
end
|
252
|
+
run_callback(stream, rule.callback, &b)
|
197
253
|
end
|
198
254
|
end
|
199
255
|
end
|
200
256
|
|
201
|
-
|
202
|
-
|
203
|
-
|
257
|
+
# @private
|
258
|
+
def run_callback(stream, callback, &output_stream)
|
259
|
+
with_output_stream(output_stream) do
|
204
260
|
@group_count = 0
|
205
261
|
@last_match = stream
|
206
262
|
instance_exec(stream, &callback)
|
207
263
|
@last_match = nil
|
208
|
-
@output_stream = nil
|
209
264
|
end
|
210
265
|
end
|
211
266
|
|
212
|
-
|
267
|
+
# The number of successive scans permitted without consuming
|
268
|
+
# the input stream. If this is exceeded, the match fails.
|
269
|
+
MAX_NULL_SCANS = 5
|
270
|
+
|
271
|
+
# @private
|
213
272
|
def scan(scanner, re, &b)
|
214
273
|
# XXX HACK XXX
|
215
274
|
# StringScanner's implementation of ^ is b0rken.
|
@@ -219,7 +278,7 @@ module Rouge
|
|
219
278
|
|
220
279
|
@null_steps ||= 0
|
221
280
|
|
222
|
-
if @null_steps >=
|
281
|
+
if @null_steps >= MAX_NULL_SCANS
|
223
282
|
debug { " too many scans without consuming the string!" }
|
224
283
|
return false
|
225
284
|
end
|
@@ -240,6 +299,13 @@ module Rouge
|
|
240
299
|
return false
|
241
300
|
end
|
242
301
|
|
302
|
+
# Yield a token.
|
303
|
+
#
|
304
|
+
# @param tok
|
305
|
+
# the token type
|
306
|
+
# @param val
|
307
|
+
# (optional) the string value to yield. If absent, this defaults
|
308
|
+
# to the entire last match.
|
243
309
|
def token(tok, val=:__absent__)
|
244
310
|
val = @last_match[0] if val == :__absent__
|
245
311
|
val ||= ''
|
@@ -249,10 +315,21 @@ module Rouge
|
|
249
315
|
@output_stream << [Token[tok], val] unless val.empty?
|
250
316
|
end
|
251
317
|
|
318
|
+
# Yield a token with the next matched group. Subsequent calls
|
319
|
+
# to this method will yield subsequent groups.
|
252
320
|
def group(tok)
|
253
321
|
token(tok, @last_match[@group_count += 1])
|
254
322
|
end
|
255
323
|
|
324
|
+
# Delegate the lex to another lexer. The #lex method will be called
|
325
|
+
# with `:continue` set to true, so that #reset! will not be called.
|
326
|
+
# In this way, a single lexer can be repeatedly delegated to while
|
327
|
+
# maintaining its own internal state stack.
|
328
|
+
#
|
329
|
+
# @param [#lex] lexer
|
330
|
+
# The lexer or lexer class to delegate to
|
331
|
+
# @param [String] text
|
332
|
+
# The text to delegate. This defaults to the last matched string.
|
256
333
|
def delegate(lexer, text=nil)
|
257
334
|
debug { " delegating to #{lexer.inspect}" }
|
258
335
|
text ||= @last_match[0]
|
@@ -263,18 +340,25 @@ module Rouge
|
|
263
340
|
end
|
264
341
|
end
|
265
342
|
|
343
|
+
# Push a state onto the stack. If no state name is given and you've
|
344
|
+
# passed a block, a state will be dynamically created using the
|
345
|
+
# {StateDSL}.
|
266
346
|
def push(state_name=nil, &b)
|
267
|
-
|
268
|
-
|
269
|
-
|
347
|
+
push_state = if state_name
|
348
|
+
get_state(state_name)
|
349
|
+
elsif block_given?
|
350
|
+
State.new(b.inspect, &b).load!
|
270
351
|
else
|
271
|
-
|
352
|
+
# use the top of the stack by default
|
353
|
+
self.state
|
272
354
|
end
|
273
355
|
|
274
356
|
debug { " pushing #{push_state.name}" }
|
275
357
|
stack.push(push_state)
|
276
358
|
end
|
277
359
|
|
360
|
+
# Pop the state stack. If a number is passed in, it will be popped
|
361
|
+
# that number of times.
|
278
362
|
def pop!(times=1)
|
279
363
|
raise 'empty stack!' if stack.empty?
|
280
364
|
|
@@ -282,19 +366,34 @@ module Rouge
|
|
282
366
|
times.times { stack.pop }
|
283
367
|
end
|
284
368
|
|
369
|
+
# reset the stack back to `[:root]`.
|
285
370
|
def reset_stack
|
286
371
|
debug { ' resetting stack' }
|
287
372
|
stack.clear
|
288
373
|
stack.push get_state(:root)
|
289
374
|
end
|
290
375
|
|
376
|
+
# Check if `state_name` is in the state stack.
|
291
377
|
def in_state?(state_name)
|
292
378
|
stack.map(&:name).include? state_name.to_s
|
293
379
|
end
|
294
380
|
|
381
|
+
# Check if `state_name` is the state on top of the state stack.
|
295
382
|
def state?(state_name)
|
296
383
|
state_name.to_s == state.name
|
297
384
|
end
|
298
385
|
|
386
|
+
private
|
387
|
+
def with_output_stream(output_stream, &b)
|
388
|
+
@output_stream = Yielder.new do |tok, val|
|
389
|
+
debug { " yielding #{tok.to_s.inspect}, #{val.inspect}" }
|
390
|
+
output_stream.call(Token[tok], val)
|
391
|
+
end
|
392
|
+
|
393
|
+
yield
|
394
|
+
|
395
|
+
ensure
|
396
|
+
@output_stream = nil
|
397
|
+
end
|
299
398
|
end
|
300
399
|
end
|