rouge 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/lib/rouge.rb +3 -0
- data/lib/rouge/cli.rb +18 -2
- data/lib/rouge/formatter.rb +7 -0
- data/lib/rouge/formatters/html.rb +4 -0
- data/lib/rouge/formatters/terminal256.rb +8 -2
- data/lib/rouge/lexer.rb +34 -4
- data/lib/rouge/lexers/c.rb +3 -0
- data/lib/rouge/lexers/common_lisp.rb +1 -0
- data/lib/rouge/lexers/cpp.rb +2 -0
- data/lib/rouge/lexers/css.rb +2 -0
- data/lib/rouge/lexers/diff.rb +2 -0
- data/lib/rouge/lexers/erb.rb +2 -0
- data/lib/rouge/lexers/factor.rb +1 -0
- data/lib/rouge/lexers/haml.rb +9 -4
- data/lib/rouge/lexers/haskell.rb +2 -0
- data/lib/rouge/lexers/html.rb +1 -0
- data/lib/rouge/lexers/java.rb +2 -0
- data/lib/rouge/lexers/javascript.rb +4 -1
- data/lib/rouge/lexers/make.rb +1 -0
- data/lib/rouge/lexers/markdown.rb +2 -0
- data/lib/rouge/lexers/perl.rb +2 -0
- data/lib/rouge/lexers/php.rb +6 -2
- data/lib/rouge/lexers/python.rb +1 -0
- data/lib/rouge/lexers/ruby.rb +2 -1
- data/lib/rouge/lexers/scheme.rb +2 -0
- data/lib/rouge/lexers/shell.rb +2 -0
- data/lib/rouge/lexers/sql.rb +137 -0
- data/lib/rouge/lexers/tcl.rb +1 -0
- data/lib/rouge/lexers/tex.rb +1 -0
- data/lib/rouge/lexers/text.rb +2 -0
- data/lib/rouge/lexers/viml.rb +98 -0
- data/lib/rouge/lexers/viml/keywords.rb +11 -0
- data/lib/rouge/lexers/xml.rb +1 -0
- data/lib/rouge/lexers/yaml.rb +4 -3
- data/lib/rouge/regex_lexer.rb +142 -43
- data/lib/rouge/template_lexer.rb +6 -0
- data/lib/rouge/text_analyzer.rb +9 -0
- data/lib/rouge/util.rb +10 -0
- data/lib/rouge/version.rb +1 -1
- metadata +5 -2
data/lib/rouge/lexers/python.rb
CHANGED
data/lib/rouge/lexers/ruby.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Lexers
|
3
3
|
class Ruby < RegexLexer
|
4
|
+
desc "The Ruby programming language (ruby-lang.org)"
|
4
5
|
tag 'ruby'
|
5
6
|
aliases 'rb'
|
6
7
|
filenames '*.rb', '*.ruby', '*.rbw', '*.rake', '*.gemspec',
|
@@ -142,7 +143,7 @@ module Rouge
|
|
142
143
|
(module)
|
143
144
|
(\s+)
|
144
145
|
([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)
|
145
|
-
) do
|
146
|
+
)x do
|
146
147
|
group 'Keyword'
|
147
148
|
group 'Text'
|
148
149
|
group 'Name.Namespace'
|
data/lib/rouge/lexers/scheme.rb
CHANGED
data/lib/rouge/lexers/shell.rb
CHANGED
@@ -0,0 +1,137 @@
|
|
1
|
+
module Rouge
|
2
|
+
module Lexers
|
3
|
+
class SQL < RegexLexer
|
4
|
+
desc "Structured Query Language, for relational databases"
|
5
|
+
tag 'sql'
|
6
|
+
filenames '*.sql'
|
7
|
+
mimetypes 'text/x-sql'
|
8
|
+
|
9
|
+
def self.keywords
|
10
|
+
@keywords ||= Set.new %w(
|
11
|
+
ABORT ABS ABSOLUTE ACCESS ADA ADD ADMIN AFTER AGGREGATE ALIAS
|
12
|
+
ALL ALLOCATE ALTER ANALYSE ANALYZE AND ANY ARE AS ASC ASENSITIVE
|
13
|
+
ASSERTION ASSIGNMENT ASYMMETRIC AT ATOMIC AUTHORIZATION
|
14
|
+
AVG BACKWARD BEFORE BEGIN BETWEEN BITVAR BIT_LENGTH BOTH
|
15
|
+
BREADTH BY C CACHE CALL CALLED CARDINALITY CASCADE CASCADED
|
16
|
+
CASE CAST CATALOG CATALOG_NAME CHAIN CHARACTERISTICS
|
17
|
+
CHARACTER_LENGTH CHARACTER_SET_CATALOG CHARACTER_SET_NAME
|
18
|
+
CHARACTER_SET_SCHEMA CHAR_LENGTH CHECK CHECKED CHECKPOINT
|
19
|
+
CLASS CLASS_ORIGIN CLOB CLOSE CLUSTER COALSECE COBOL COLLATE
|
20
|
+
COLLATION COLLATION_CATALOG COLLATION_NAME COLLATION_SCHEMA
|
21
|
+
COLUMN COLUMN_NAME COMMAND_FUNCTION COMMAND_FUNCTION_CODE
|
22
|
+
COMMENT COMMIT COMMITTED COMPLETION CONDITION_NUMBER
|
23
|
+
CONNECT CONNECTION CONNECTION_NAME CONSTRAINT CONSTRAINTS
|
24
|
+
CONSTRAINT_CATALOG CONSTRAINT_NAME CONSTRAINT_SCHEMA
|
25
|
+
CONSTRUCTOR CONTAINS CONTINUE CONVERSION CONVERT COPY
|
26
|
+
CORRESPONTING COUNT CREATE CREATEDB CREATEUSER CROSS CUBE
|
27
|
+
CURRENT CURRENT_DATE CURRENT_PATH CURRENT_ROLE CURRENT_TIME
|
28
|
+
CURRENT_TIMESTAMP CURRENT_USER CURSOR CURSOR_NAME CYCLE DATA
|
29
|
+
DATABASE DATETIME_INTERVAL_CODE DATETIME_INTERVAL_PRECISION
|
30
|
+
DAY DEALLOCATE DECLARE DEFAULT DEFAULTS DEFERRABLE DEFERRED
|
31
|
+
DEFINED DEFINER DELETE DELIMITER DELIMITERS DEREF DESC DESCRIBE
|
32
|
+
DESCRIPTOR DESTROY DESTRUCTOR DETERMINISTIC DIAGNOSTICS
|
33
|
+
DICTIONARY DISCONNECT DISPATCH DISTINCT DO DOMAIN DROP
|
34
|
+
DYNAMIC DYNAMIC_FUNCTION DYNAMIC_FUNCTION_CODE EACH ELSE
|
35
|
+
ENCODING ENCRYPTED END END-EXEC EQUALS ESCAPE EVERY EXCEPT
|
36
|
+
ESCEPTION EXCLUDING EXCLUSIVE EXEC EXECUTE EXISTING EXISTS
|
37
|
+
EXPLAIN EXTERNAL EXTRACT FALSE FETCH FINAL FIRST FOR FORCE
|
38
|
+
FOREIGN FORTRAN FORWARD FOUND FREE FREEZE FROM FULL FUNCTION
|
39
|
+
G GENERAL GENERATED GET GLOBAL GO GOTO GRANT GRANTED GROUP
|
40
|
+
GROUPING HANDLER HAVING HIERARCHY HOLD HOST IDENTITY IGNORE
|
41
|
+
ILIKE IMMEDIATE IMMUTABLE IMPLEMENTATION IMPLICIT IN INCLUDING
|
42
|
+
INCREMENT INDEX INDITCATOR INFIX INHERITS INITIALIZE INITIALLY
|
43
|
+
INNER INOUT INPUT INSENSITIVE INSERT INSTANTIABLE INSTEAD
|
44
|
+
INTERSECT INTO INVOKER IS ISNULL ISOLATION ITERATE JOIN KEY
|
45
|
+
KEY_MEMBER KEY_TYPE LANCOMPILER LANGUAGE LARGE LAST LATERAL
|
46
|
+
LEADING LEFT LENGTH LESS LEVEL LIKE LIMIT LISTEN LOAD LOCAL
|
47
|
+
LOCALTIME LOCALTIMESTAMP LOCATION LOCATOR LOCK LOWER MAP MATCH
|
48
|
+
MAX MAXVALUE MESSAGE_LENGTH MESSAGE_OCTET_LENGTH MESSAGE_TEXT
|
49
|
+
METHOD MIN MINUTE MINVALUE MOD MODE MODIFIES MODIFY MONTH
|
50
|
+
MORE MOVE MUMPS NAMES NATIONAL NATURAL NCHAR NCLOB NEW NEXT
|
51
|
+
NO NOCREATEDB NOCREATEUSER NONE NOT NOTHING NOTIFY NOTNULL
|
52
|
+
NULL NULLABLE NULLIF OBJECT OCTET_LENGTH OF OFF OFFSET OIDS
|
53
|
+
OLD ON ONLY OPEN OPERATION OPERATOR OPTION OPTIONS OR ORDER
|
54
|
+
ORDINALITY OUT OUTER OUTPUT OVERLAPS OVERLAY OVERRIDING
|
55
|
+
OWNER PAD PARAMETER PARAMETERS PARAMETER_MODE PARAMATER_NAME
|
56
|
+
PARAMATER_ORDINAL_POSITION PARAMETER_SPECIFIC_CATALOG
|
57
|
+
PARAMETER_SPECIFIC_NAME PARAMATER_SPECIFIC_SCHEMA PARTIAL PASCAL
|
58
|
+
PENDANT PLACING PLI POSITION POSTFIX PRECISION PREFIX PREORDER
|
59
|
+
PREPARE PRESERVE PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE
|
60
|
+
PUBLIC READ READS RECHECK RECURSIVE REF REFERENCES REFERENCING
|
61
|
+
REINDEX RELATIVE RENAME REPEATABLE REPLACE RESET RESTART
|
62
|
+
RESTRICT RESULT RETURN RETURNED_LENGTH RETURNED_OCTET_LENGTH
|
63
|
+
RETURNED_SQLSTATE RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP
|
64
|
+
ROUTINE ROUTINE_CATALOG ROUTINE_NAME ROUTINE_SCHEMA ROW ROWS
|
65
|
+
ROW_COUNT RULE SAVE_POINT SCALE SCHEMA SCHEMA_NAME SCOPE SCROLL
|
66
|
+
SEARCH SECOND SECURITY SELECT SELF SENSITIVE SERIALIZABLE
|
67
|
+
SERVER_NAME SESSION SESSION_USER SET SETOF SETS SHARE SHOW
|
68
|
+
SIMILAR SIMPLE SIZE SOME SOURCE SPACE SPECIFIC SPECIFICTYPE
|
69
|
+
SPECIFIC_NAME SQL SQLCODE SQLERROR SQLEXCEPTION SQLSTATE
|
70
|
+
SQLWARNINIG STABLE START STATE STATEMENT STATIC STATISTICS
|
71
|
+
STDIN STDOUT STORAGE STRICT STRUCTURE STYPE SUBCLASS_ORIGIN
|
72
|
+
SUBLIST SUBSTRING SUM SYMMETRIC SYSID SYSTEM SYSTEM_USER
|
73
|
+
TABLE TABLE_NAME TEMP TEMPLATE TEMPORARY TERMINATE THAN THEN
|
74
|
+
TIMESTAMP TIMEZONE_HOUR TIMEZONE_MINUTE TO TOAST TRAILING
|
75
|
+
TRANSATION TRANSACTIONS_COMMITTED TRANSACTIONS_ROLLED_BACK
|
76
|
+
TRANSATION_ACTIVE TRANSFORM TRANSFORMS TRANSLATE TRANSLATION
|
77
|
+
TREAT TRIGGER TRIGGER_CATALOG TRIGGER_NAME TRIGGER_SCHEMA TRIM
|
78
|
+
TRUE TRUNCATE TRUSTED TYPE UNCOMMITTED UNDER UNENCRYPTED UNION
|
79
|
+
UNIQUE UNKNOWN UNLISTEN UNNAMED UNNEST UNTIL UPDATE UPPER
|
80
|
+
USAGE USER USER_DEFINED_TYPE_CATALOG USER_DEFINED_TYPE_NAME
|
81
|
+
USER_DEFINED_TYPE_SCHEMA USING VACUUM VALID VALIDATOR VALUES
|
82
|
+
VARIABLE VERBOSE VERSION VIEW VOLATILE WHEN WHENEVER WHERE
|
83
|
+
WITH WITHOUT WORK WRITE YEAR ZONE
|
84
|
+
)
|
85
|
+
end
|
86
|
+
|
87
|
+
state :root do
|
88
|
+
rule /\s+/m, 'Text'
|
89
|
+
rule /--.*?\n/, 'Comment.Single'
|
90
|
+
rule %r(/\*), 'Comment.Multiline', :multiline_comments
|
91
|
+
rule /\d+/, 'Literal.Number.Integer'
|
92
|
+
rule /'/, 'Literal.String.Single', :single_string
|
93
|
+
rule /"/, 'Name.Variable', :double_string
|
94
|
+
rule /`/, 'Name.Variable', :backtick
|
95
|
+
|
96
|
+
rule /\w[\w\d]*/ do |m|
|
97
|
+
if self.class.keywords.include? m[0].upcase
|
98
|
+
token 'Keyword'
|
99
|
+
else
|
100
|
+
token 'Name'
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
rule %r([+*/<>=~!@#%^&|?^-]), 'Operator'
|
105
|
+
rule /[;:()\[\],.]/, 'Punctuation'
|
106
|
+
end
|
107
|
+
|
108
|
+
state :multiline_comments do
|
109
|
+
rule %r(/[*]), 'Comment.Multiline', :multiline_comments
|
110
|
+
rule %r([*]/), 'Comment.Multiline', :pop!
|
111
|
+
rule %r([^/*]+), 'Comment.Multiline'
|
112
|
+
rule %r([/*]), 'Comment.Multiline'
|
113
|
+
end
|
114
|
+
|
115
|
+
state :backtick do
|
116
|
+
rule /\\./, 'Literal.String.Escape'
|
117
|
+
rule /``/, 'Literal.String.Escape'
|
118
|
+
rule /`/, 'Name.Variable', :pop!
|
119
|
+
rule /[^\\`]+/, 'Name.Variable'
|
120
|
+
end
|
121
|
+
|
122
|
+
state :single_string do
|
123
|
+
rule /\\./, 'Literal.String.Escape'
|
124
|
+
rule /''/, 'Literal.String.Escape'
|
125
|
+
rule /'/, 'Literal.String.Single', :pop!
|
126
|
+
rule /[^\\']+/, 'Literal.String.Single'
|
127
|
+
end
|
128
|
+
|
129
|
+
state :double_string do
|
130
|
+
rule /\\./, 'Literal.String.Escape'
|
131
|
+
rule /""/, 'Literal.String.Escape'
|
132
|
+
rule /"/, 'Name.Variable', :pop!
|
133
|
+
rule /[^\\"]+/, 'Name.Variable'
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
data/lib/rouge/lexers/tcl.rb
CHANGED
data/lib/rouge/lexers/tex.rb
CHANGED
data/lib/rouge/lexers/text.rb
CHANGED
@@ -0,0 +1,98 @@
|
|
1
|
+
module Rouge
|
2
|
+
module Lexers
|
3
|
+
class VimL < RegexLexer
|
4
|
+
desc "VimL, the scripting language for the Vim editor (vim.org)"
|
5
|
+
tag 'viml'
|
6
|
+
aliases 'vim', 'vimscript', 'ex'
|
7
|
+
filenames '*.vim', '*.vba', '.vimrc', '.exrc', '.gvimrc',
|
8
|
+
'_vimrc', '_exrc', '_gvimrc' # _ names for windows
|
9
|
+
|
10
|
+
mimetypes 'text/x-vim'
|
11
|
+
|
12
|
+
def self.keywords
|
13
|
+
load Pathname.new(__FILE__).dirname.join('viml/keywords.rb')
|
14
|
+
self.keywords
|
15
|
+
end
|
16
|
+
|
17
|
+
state :root do
|
18
|
+
rule /^(\s*)(".*?)$/ do
|
19
|
+
group 'Text'; group 'Comment'
|
20
|
+
end
|
21
|
+
|
22
|
+
rule /^\s*\\/, 'Literal.String.Escape'
|
23
|
+
|
24
|
+
rule /[ \t]+/, 'Text'
|
25
|
+
|
26
|
+
# TODO: regexes can have other delimiters
|
27
|
+
rule %r(/(\\\\|\\/|[^\n/])*/), 'Literal.String.Regex'
|
28
|
+
rule %r("(\\\\|\\"|[^\n"])*"), 'Literal.String.Double'
|
29
|
+
rule %r('(\\\\|\\'|[^\n'])*'), 'Literal.String.Single'
|
30
|
+
|
31
|
+
# if it's not a string, it's a comment.
|
32
|
+
rule /(?<=\s)"[^-:.%#=*].*?$/, 'Comment'
|
33
|
+
|
34
|
+
rule /-?\d+/, 'Literal.Number'
|
35
|
+
rule /#[0-9a-f]{6}/i, 'Literal.Number.Hex'
|
36
|
+
rule /^:/, 'Punctuation'
|
37
|
+
rule /[():<>+=!\[\]{}\|,~.-]/, 'Punctuation'
|
38
|
+
rule /\b(let|if|else|endif|elseif|fun|function|endfunction)\b/,
|
39
|
+
'Keyword'
|
40
|
+
|
41
|
+
rule /\b(NONE|bold|italic|underline|dark|light)\b/, 'Name.Builtin'
|
42
|
+
|
43
|
+
rule /[absg]:\w+\b/, 'Name.Variable'
|
44
|
+
rule /\b\w+\b/, 'Postprocess.Name'
|
45
|
+
|
46
|
+
# no errors in VimL!
|
47
|
+
rule /./m, 'Text'
|
48
|
+
end
|
49
|
+
|
50
|
+
postprocess 'Postprocess.Name' do |tok, name|
|
51
|
+
keywords = self.class.keywords
|
52
|
+
|
53
|
+
if mapping_contains?(keywords[:command], name)
|
54
|
+
token 'Keyword', name
|
55
|
+
elsif mapping_contains?(keywords[:option], name)
|
56
|
+
token 'Name.Builtin', name
|
57
|
+
elsif mapping_contains?(keywords[:auto], name)
|
58
|
+
token 'Name.Builtin', name
|
59
|
+
else
|
60
|
+
token 'Text', name
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def mapping_contains?(mapping, word)
|
65
|
+
shortest, longest = find_likely_mapping(mapping, word)
|
66
|
+
|
67
|
+
word.start_with?(shortest) and longest.start_with?(word)
|
68
|
+
end
|
69
|
+
|
70
|
+
# binary search through the mappings to find the one that's likely
|
71
|
+
# to actually work.
|
72
|
+
def find_likely_mapping(mapping, word)
|
73
|
+
min = 0
|
74
|
+
max = mapping.size
|
75
|
+
|
76
|
+
until max == min
|
77
|
+
mid = (max + min) / 2
|
78
|
+
|
79
|
+
cmp, _ = mapping[mid]
|
80
|
+
|
81
|
+
case word <=> cmp
|
82
|
+
when 1
|
83
|
+
# too low
|
84
|
+
min = mid + 1
|
85
|
+
when -1
|
86
|
+
# too high
|
87
|
+
max = mid
|
88
|
+
when 0
|
89
|
+
# just right, abort!
|
90
|
+
return mapping[mid]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
mapping[max - 1]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# DO NOT EDIT: automatically generated by `rake vimkeywords`.
|
2
|
+
# see tasks/vim.rake for more info.
|
3
|
+
module Rouge
|
4
|
+
module Lexers
|
5
|
+
class VimL
|
6
|
+
def self.keywords
|
7
|
+
@keywords ||= {:command=>[[":p", ":p"], ["Allargs", "Allargs"], ["DiffOrig", "DiffOrig"], ["Error", "Error"], ["Man", "Man"], ["MyCommand", "MyCommand"], ["Mycmd", "Mycmd"], ["N", "N"], ["N", "Next"], ["P", "P"], ["P", "Print"], ["Ren", "Ren"], ["Rena", "Rena"], ["Renu", "Renu"], ["TOhtml", "TOhtml"], ["X", "X"], ["XMLent", "XMLent"], ["XMLns", "XMLns"], ["a", "a"], ["ab", "ab"], ["abc", "abclear"], ["abo", "aboveleft"], ["al", "all"], ["ar", "ar"], ["ar", "args"], ["arga", "argadd"], ["argd", "argdelete"], ["argdo", "argdo"], ["arge", "argedit"], ["argg", "argglobal"], ["argl", "arglocal"], ["argu", "argument"], ["as", "ascii"], ["au", "au"], ["b", "buffer"], ["bN", "bNext"], ["ba", "ball"], ["bad", "badd"], ["bar", "bar"], ["bd", "bdelete"], ["bel", "belowright"], ["bf", "bfirst"], ["bl", "blast"], ["bm", "bmodified"], ["bn", "bnext"], ["bo", "botright"], ["bp", "bprevious"], ["br", "br"], ["br", "brewind"], ["brea", "break"], ["breaka", "breakadd"], ["breakd", "breakdel"], ["breakl", "breaklist"], ["bro", "browse"], ["browseset", "browseset"], ["bu", "bu"], ["buf", "buf"], ["bufdo", "bufdo"], ["buffers", "buffers"], ["bun", "bunload"], ["bw", "bwipeout"], ["c", "c"], ["c", "change"], ["cN", "cN"], ["cN", "cNext"], ["cNf", "cNf"], ["cNf", "cNfile"], ["cabc", "cabclear"], ["cad", "cad"], ["cad", "caddexpr"], ["caddb", "caddbuffer"], ["caddf", "caddfile"], ["cal", "call"], ["cat", "catch"], ["cb", "cbuffer"], ["cc", "cc"], ["ccl", "cclose"], ["cd", "cd"], ["ce", "center"], ["cex", "cexpr"], ["cf", "cfile"], ["cfir", "cfirst"], ["cg", "cgetfile"], ["cgetb", "cgetbuffer"], ["cgete", "cgetexpr"], ["changes", "changes"], ["chd", "chdir"], ["che", "checkpath"], ["checkt", "checktime"], ["cl", "cl"], ["cl", "clist"], ["cla", "clast"], ["clo", "close"], ["cmapc", "cmapclear"], ["cmdname", "cmdname"], ["cn", "cn"], ["cn", "cnext"], ["cnew", "cnewer"], ["cnf", "cnf"], ["cnf", "cnfile"], ["co", "copy"], ["col", "colder"], ["colo", "colorscheme"], ["com", "com"], ["comc", "comclear"], ["comment", "comment"], ["comp", "compiler"], ["con", "con"], ["con", "continue"], ["conf", "confirm"], ["cope", "copen"], ["count", "count"], ["cp", "cprevious"], ["cpf", "cpfile"], ["cq", "cquit"], ["cr", "crewind"], ["cs", "cs"], ["cscope", "cscope"], ["cstag", "cstag"], ["cuna", "cunabbrev"], ["cw", "cwindow"], ["d", "d"], ["d", "delete"], ["de", "de"], ["debug", "debug"], ["debugg", "debuggreedy"], ["del", "del"], ["delc", "delcommand"], ["delf", "delf"], ["delf", "delfunction"], ["delm", "delmarks"], ["di", "di"], ["di", "display"], ["diffg", "diffget"], ["diffo", "diffo"], ["diffoff", "diffoff"], ["diffp", "diffp"], ["diffpatch", "diffpatch"], ["diffpu", "diffput"], ["diffsplit", "diffsplit"], ["difft", "difft"], ["diffthis", "diffthis"], ["diffu", "diffupdate"], ["dig", "dig"], ["dig", "digraphs"], ["dj", "djump"], ["dl", "dlist"], ["do", "do"], ["doau", "doau"], ["dr", "drop"], ["ds", "dsearch"], ["dsp", "dsplit"], ["dwim", "dwim"], ["e", "e"], ["e", "edit"], ["e:e", "e:e"], ["e:e:e", "e:e:e"], ["e:e:r", "e:e:r"], ["ea", "ea"], ["earlier", "earlier"], ["ec", "ec"], ["echoe", "echoerr"], ["echom", "echomsg"], ["echon", "echon"], ["el", "else"], ["elsei", "elseif"], ["em", "emenu"], ["emenu", "emenu"], ["en", "en"], ["en", "endif"], ["endf", "endf"], ["endf", "endfunction"], ["endfo", "endfor"], ["endfun", "endfun"], ["endt", "endtry"], ["endw", "endwhile"], ["ene", "enew"], ["ex", "ex"], ["exi", "exit"], ["exu", "exusage"], ["f", "f"], ["f", "file"], ["filename", "filename"], ["files", "files"], ["filet", "filet"], ["filetype", "filetype"], ["fin", "fin"], ["fin", "find"], ["fina", "finally"], ["fini", "finish"], ["fir", "first"], ["fix", "fixdel"], ["fo", "fold"], ["foldc", "foldclose"], ["foldd", "folddoopen"], ["folddoc", "folddoclosed"], ["foldo", "foldopen"], ["for", "for"], ["fu", "fu"], ["fu", "function"], ["fun", "fun"], ["g", "g"], ["get", "get"], ["go", "goto"], ["gr", "grep"], ["grepa", "grepadd"], ["gs", "gs"], ["gui", "gui"], ["gvim", "gvim"], ["h", "h"], ["h", "help"], ["ha", "hardcopy"], ["helpf", "helpfind"], ["helpg", "helpgrep"], ["helpt", "helptags"], ["hi", "hi"], ["hid", "hide"], ["his", "history"], ["i", "i"], ["ia", "ia"], ["iabc", "iabclear"], ["if", "if"], ["ij", "ijump"], ["il", "ilist"], ["imapc", "imapclear"], ["in", "in"], ["index", "index"], ["intro", "intro"], ["is", "isearch"], ["isp", "isplit"], ["iuna", "iunabbrev"], ["j", "join"], ["ju", "jumps"], ["k", "k"], ["kee", "keepmarks"], ["keepa", "keepa"], ["keepalt", "keepalt"], ["keepj", "keepjumps"], ["l", "l"], ["l", "list"], ["lN", "lN"], ["lN", "lNext"], ["lNf", "lNf"], ["lNf", "lNfile"], ["la", "la"], ["la", "last"], ["lad", "lad"], ["lad", "laddexpr"], ["laddb", "laddbuffer"], ["laddf", "laddfile"], ["lan", "lan"], ["lan", "language"], ["lat", "lat"], ["later", "later"], ["lb", "lbuffer"], ["lc", "lcd"], ["lch", "lchdir"], ["lcl", "lclose"], ["lcs", "lcs"], ["lcscope", "lcscope"], ["le", "left"], ["lefta", "leftabove"], ["let", "let"], ["lex", "lexpr"], ["lf", "lfile"], ["lfir", "lfirst"], ["lg", "lgetfile"], ["lgetb", "lgetbuffer"], ["lgete", "lgetexpr"], ["lgr", "lgrep"], ["lgrepa", "lgrepadd"], ["lh", "lhelpgrep"], ["ll", "ll"], ["lla", "llast"], ["lli", "llist"], ["lmak", "lmake"], ["lmapc", "lmapclear"], ["lne", "lne"], ["lne", "lnext"], ["lnew", "lnewer"], ["lnf", "lnf"], ["lnf", "lnfile"], ["lo", "lo"], ["lo", "loadview"], ["loadk", "loadk"], ["loadkeymap", "loadkeymap"], ["loc", "lockmarks"], ["locale", "locale"], ["lockv", "lockvar"], ["lol", "lolder"], ["lop", "lopen"], ["lp", "lprevious"], ["lpf", "lpfile"], ["lr", "lrewind"], ["ls", "ls"], ["lt", "ltag"], ["lua", "lua"], ["luado", "luado"], ["luafile", "luafile"], ["lv", "lvimgrep"], ["lvimgrepa", "lvimgrepadd"], ["lw", "lwindow"], ["m", "move"], ["ma", "ma"], ["ma", "mark"], ["main", "main"], ["main", "main"], ["mak", "make"], ["marks", "marks"], ["mat", "match"], ["menut", "menut"], ["menut", "menutranslate"], ["mes", "mes"], ["messages", "messages"], ["mk", "mk"], ["mk", "mkexrc"], ["mkdir", "mkdir"], ["mks", "mksession"], ["mksp", "mkspell"], ["mkv", "mkv"], ["mkv", "mkvimrc"], ["mkvie", "mkview"], ["mo", "mo"], ["mod", "mode"], ["mv", "mv"], ["mz", "mz"], ["mz", "mzscheme"], ["mzf", "mzfile"], ["n", "n"], ["n", "n"], ["n", "next"], ["nb", "nbkey"], ["nbc", "nbclose"], ["nbs", "nbstart"], ["ne", "ne"], ["new", "new"], ["nkf", "nkf"], ["nmapc", "nmapclear"], ["noa", "noa"], ["noautocmd", "noautocmd"], ["noh", "nohlsearch"], ["nu", "number"], ["o", "o"], ["o", "open"], ["ol", "oldfiles"], ["omapc", "omapclear"], ["on", "only"], ["opt", "options"], ["ownsyntax", "ownsyntax"], ["p", "p"], ["p", "print"], ["p:", "p:"], ["p:", "p:"], ["p:gs", "p:gs"], ["p:h", "p:h"], ["p:h:h", "p:h:h"], ["p:r", "p:r"], ["p:t", "p:t"], ["pat", "pat"], ["pat", "pat"], ["pc", "pclose"], ["pe", "pe"], ["pe", "perl"], ["ped", "pedit"], ["perld", "perldo"], ["po", "pop"], ["popu", "popu"], ["popu", "popup"], ["pp", "ppop"], ["pr", "pr"], ["pre", "preserve"], ["prev", "previous"], ["pro", "pro"], ["prof", "profile"], ["profd", "profdel"], ["promptf", "promptfind"], ["promptr", "promptrepl"], ["ps", "psearch"], ["ptN", "ptN"], ["ptN", "ptNext"], ["pta", "ptag"], ["ptf", "ptfirst"], ["ptj", "ptjump"], ["ptl", "ptlast"], ["ptn", "ptn"], ["ptn", "ptnext"], ["ptp", "ptprevious"], ["ptr", "ptrewind"], ["pts", "ptselect"], ["pu", "put"], ["pw", "pwd"], ["py", "py"], ["py", "python"], ["py3", "py3"], ["py3", "py3"], ["py3file", "py3file"], ["pyf", "pyfile"], ["python3", "python3"], ["q", "q"], ["q", "quit"], ["qa", "qall"], ["quita", "quitall"], ["quote", "quote"], ["r", "r"], ["r", "r"], ["r", "read"], ["r:e", "r:e"], ["r:r", "r:r"], ["r:r:r", "r:r:r"], ["re", "re"], ["rec", "recover"], ["red", "red"], ["red", "redo"], ["redi", "redir"], ["redr", "redraw"], ["redraws", "redrawstatus"], ["reg", "registers"], ["res", "resize"], ["ret", "retab"], ["retu", "return"], ["rew", "rewind"], ["ri", "right"], ["rightb", "rightbelow"], ["ru", "ru"], ["ru", "runtime"], ["rub", "ruby"], ["rubyd", "rubydo"], ["rubyf", "rubyfile"], ["rundo", "rundo"], ["rv", "rviminfo"], ["s", "s"], ["s", "s"], ["s", "s"], ["s", "s"], ["sN", "sNext"], ["sa", "sargument"], ["sal", "sall"], ["san", "sandbox"], ["sav", "saveas"], ["sb", "sbuffer"], ["sbN", "sbNext"], ["sba", "sball"], ["sbf", "sbfirst"], ["sbl", "sblast"], ["sbm", "sbmodified"], ["sbn", "sbnext"], ["sbp", "sbprevious"], ["sbr", "sbrewind"], ["scrip", "scrip"], ["scrip", "scriptnames"], ["scripte", "scriptencoding"], ["scs", "scs"], ["scscope", "scscope"], ["se", "set"], ["setf", "setfiletype"], ["setg", "setglobal"], ["setl", "setlocal"], ["sf", "sfind"], ["sfir", "sfirst"], ["sh", "shell"], ["si", "si"], ["sig", "sig"], ["sign", "sign"], ["sil", "silent"], ["sim", "simalt"], ["sl", "sl"], ["sl", "sleep"], ["sla", "slast"], ["sm", "smagic"], ["sm", "smap"], ["sme", "sme"], ["smenu", "smenu"], ["sn", "snext"], ["sni", "sniff"], ["sno", "snomagic"], ["snoreme", "snoreme"], ["snoremenu", "snoremenu"], ["so", "so"], ["so", "source"], ["sor", "sort"], ["sp", "split"], ["spe", "spe"], ["spe", "spellgood"], ["spelld", "spelldump"], ["spelli", "spellinfo"], ["spellr", "spellrepall"], ["spellu", "spellundo"], ["spellw", "spellwrong"], ["spr", "sprevious"], ["sre", "srewind"], ["st", "st"], ["st", "stop"], ["sta", "stag"], ["star", "star"], ["star", "startinsert"], ["start", "start"], ["startg", "startgreplace"], ["startr", "startreplace"], ["stj", "stjump"], ["stopi", "stopinsert"], ["sts", "stselect"], ["sub", "sub"], ["sub", "sub"], ["sun", "sunhide"], ["sunme", "sunme"], ["sunmenu", "sunmenu"], ["sus", "suspend"], ["sv", "sview"], ["sw", "swapname"], ["sy", "sy"], ["syn", "syn"], ["sync", "sync"], ["syncbind", "syncbind"], ["synlist", "synlist"], ["t", "t"], ["t:r", "t:r"], ["tN", "tN"], ["tN", "tNext"], ["ta", "ta"], ["ta", "tag"], ["tab", "tab"], ["tabN", "tabN"], ["tabN", "tabNext"], ["tabc", "tabclose"], ["tabd", "tabdo"], ["tabe", "tabedit"], ["tabf", "tabfind"], ["tabfir", "tabfirst"], ["tabl", "tablast"], ["tabm", "tabmove"], ["tabn", "tabnext"], ["tabnew", "tabnew"], ["tabo", "tabonly"], ["tabp", "tabprevious"], ["tabr", "tabrewind"], ["tabs", "tabs"], ["tags", "tags"], ["tc", "tcl"], ["tcld", "tcldo"], ["tclf", "tclfile"], ["te", "tearoff"], ["tf", "tfirst"], ["th", "throw"], ["tj", "tjump"], ["tl", "tlast"], ["tm", "tm"], ["tm", "tmenu"], ["tn", "tn"], ["tn", "tnext"], ["to", "topleft"], ["tp", "tprevious"], ["tr", "tr"], ["tr", "trewind"], ["try", "try"], ["ts", "tselect"], ["tu", "tu"], ["tu", "tunmenu"], ["u", "u"], ["u", "undo"], ["un", "un"], ["una", "unabbreviate"], ["undoj", "undojoin"], ["undol", "undolist"], ["unh", "unhide"], ["unl", "unl"], ["unlo", "unlockvar"], ["uns", "unsilent"], ["up", "update"], ["v", "v"], ["ve", "ve"], ["ve", "version"], ["verb", "verbose"], ["version", "version"], ["version", "version"], ["vert", "vertical"], ["vi", "vi"], ["vi", "visual"], ["vie", "view"], ["vim", "vimgrep"], ["vimgrepa", "vimgrepadd"], ["viu", "viusage"], ["vmapc", "vmapclear"], ["vne", "vnew"], ["vs", "vsplit"], ["w", "w"], ["w", "write"], ["wN", "wNext"], ["wa", "wall"], ["wh", "while"], ["win", "win"], ["win", "winsize"], ["winc", "wincmd"], ["windo", "windo"], ["winp", "winpos"], ["wn", "wnext"], ["wp", "wprevious"], ["wq", "wq"], ["wqa", "wqall"], ["ws", "wsverb"], ["wundo", "wundo"], ["wv", "wviminfo"], ["x", "x"], ["x", "xit"], ["xa", "xall"], ["xmapc", "xmapclear"], ["xme", "xme"], ["xmenu", "xmenu"], ["xnoreme", "xnoreme"], ["xnoremenu", "xnoremenu"], ["xterm", "xterm"], ["xunme", "xunme"], ["xunmenu", "xunmenu"], ["xwininfo", "xwininfo"], ["y", "yank"]], :option=>[], :auto=>[["BufAdd", "BufAdd"], ["BufCreate", "BufCreate"], ["BufDelete", "BufDelete"], ["BufEnter", "BufEnter"], ["BufFilePost", "BufFilePost"], ["BufFilePre", "BufFilePre"], ["BufHidden", "BufHidden"], ["BufLeave", "BufLeave"], ["BufNew", "BufNew"], ["BufNewFile", "BufNewFile"], ["BufRead", "BufRead"], ["BufReadCmd", "BufReadCmd"], ["BufReadPost", "BufReadPost"], ["BufReadPre", "BufReadPre"], ["BufUnload", "BufUnload"], ["BufWinEnter", "BufWinEnter"], ["BufWinLeave", "BufWinLeave"], ["BufWipeout", "BufWipeout"], ["BufWrite", "BufWrite"], ["BufWriteCmd", "BufWriteCmd"], ["BufWritePost", "BufWritePost"], ["BufWritePre", "BufWritePre"], ["Cmd", "Cmd"], ["CmdwinEnter", "CmdwinEnter"], ["CmdwinLeave", "CmdwinLeave"], ["ColorScheme", "ColorScheme"], ["CursorHold", "CursorHold"], ["CursorHoldI", "CursorHoldI"], ["CursorMoved", "CursorMoved"], ["CursorMovedI", "CursorMovedI"], ["EncodingChanged", "EncodingChanged"], ["FileAppendCmd", "FileAppendCmd"], ["FileAppendPost", "FileAppendPost"], ["FileAppendPre", "FileAppendPre"], ["FileChangedRO", "FileChangedRO"], ["FileChangedShell", "FileChangedShell"], ["FileChangedShellPost", "FileChangedShellPost"], ["FileEncoding", "FileEncoding"], ["FileReadCmd", "FileReadCmd"], ["FileReadPost", "FileReadPost"], ["FileReadPre", "FileReadPre"], ["FileType", "FileType"], ["FileWriteCmd", "FileWriteCmd"], ["FileWritePost", "FileWritePost"], ["FileWritePre", "FileWritePre"], ["FilterReadPost", "FilterReadPost"], ["FilterReadPre", "FilterReadPre"], ["FilterWritePost", "FilterWritePost"], ["FilterWritePre", "FilterWritePre"], ["FocusGained", "FocusGained"], ["FocusLost", "FocusLost"], ["FuncUndefined", "FuncUndefined"], ["GUIEnter", "GUIEnter"], ["GUIFailed", "GUIFailed"], ["InsertChange", "InsertChange"], ["InsertCharPre", "InsertCharPre"], ["InsertEnter", "InsertEnter"], ["InsertLeave", "InsertLeave"], ["MenuPopup", "MenuPopup"], ["QuickFixCmdPost", "QuickFixCmdPost"], ["QuickFixCmdPre", "QuickFixCmdPre"], ["RemoteReply", "RemoteReply"], ["SessionLoadPost", "SessionLoadPost"], ["ShellCmdPost", "ShellCmdPost"], ["ShellFilterPost", "ShellFilterPost"], ["SourceCmd", "SourceCmd"], ["SourcePre", "SourcePre"], ["SpellFileMissing", "SpellFileMissing"], ["StdinReadPost", "StdinReadPost"], ["StdinReadPre", "StdinReadPre"], ["SwapExists", "SwapExists"], ["Syntax", "Syntax"], ["TabEnter", "TabEnter"], ["TabLeave", "TabLeave"], ["TermChanged", "TermChanged"], ["TermResponse", "TermResponse"], ["User", "User"], ["UserGettingBored", "UserGettingBored"], ["VimEnter", "VimEnter"], ["VimLeave", "VimLeave"], ["VimLeavePre", "VimLeavePre"], ["VimResized", "VimResized"], ["WinEnter", "WinEnter"], ["WinLeave", "WinLeave"], ["event", "event"]]}
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
data/lib/rouge/lexers/xml.rb
CHANGED
data/lib/rouge/lexers/yaml.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Lexers
|
3
3
|
class YAML < RegexLexer
|
4
|
+
desc "Yaml Ain't Markup Language (yaml.org)"
|
4
5
|
tag 'yaml'
|
5
6
|
aliases 'yml'
|
6
7
|
|
@@ -123,7 +124,7 @@ module Rouge
|
|
123
124
|
# indented line in the block context
|
124
125
|
state :block_line do
|
125
126
|
# line end
|
126
|
-
|
127
|
+
rule /[ ]*(?=#|$)/, 'Text', :pop!
|
127
128
|
rule /[ ]+/, 'Text'
|
128
129
|
# tags, anchors, and aliases
|
129
130
|
mixin :descriptors
|
@@ -145,8 +146,8 @@ module Rouge
|
|
145
146
|
|
146
147
|
# a tag in the form '!', '!suffix' or '!handle!suffix'
|
147
148
|
rule %r(
|
148
|
-
|
149
|
-
(
|
149
|
+
(?:![\w-]+)? # handle
|
150
|
+
!(?:[\w;/?:@&=+$,.!~*\'()\[\]%-]*) # suffix
|
150
151
|
)x, 'Keyword.Type'
|
151
152
|
|
152
153
|
# an anchor
|
data/lib/rouge/regex_lexer.rb
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
module Rouge
|
2
|
+
# @abstract
|
3
|
+
# A stateful lexer that uses sets of regular expressions to
|
4
|
+
# tokenize a string. Most lexers are instances of RegexLexer.
|
2
5
|
class RegexLexer < Lexer
|
6
|
+
# A rule is a tuple of a regular expression to test, and a callback
|
7
|
+
# to perform if the test succeeds.
|
8
|
+
#
|
9
|
+
# @see StateDSL#rule
|
3
10
|
class Rule
|
4
11
|
attr_reader :callback
|
5
|
-
attr_reader :next_state
|
6
12
|
attr_reader :re
|
7
|
-
def initialize(re, callback
|
13
|
+
def initialize(re, callback)
|
8
14
|
@re = re
|
9
15
|
@callback = callback
|
10
|
-
@next_state = next_state
|
11
16
|
end
|
12
17
|
|
13
18
|
def inspect
|
@@ -15,22 +20,17 @@ module Rouge
|
|
15
20
|
end
|
16
21
|
end
|
17
22
|
|
23
|
+
# a State is a named set of rules that can be tested for or
|
24
|
+
# mixed in.
|
25
|
+
#
|
26
|
+
# @see RegexLexer.state
|
18
27
|
class State
|
19
28
|
attr_reader :name
|
20
|
-
def initialize(
|
21
|
-
@lexer_class = lexer_class
|
29
|
+
def initialize(name, &defn)
|
22
30
|
@name = name
|
23
31
|
@defn = defn
|
24
32
|
end
|
25
33
|
|
26
|
-
def relative_state(state_name=nil, &b)
|
27
|
-
if state_name
|
28
|
-
@lexer_class.get_state(state_name)
|
29
|
-
else
|
30
|
-
State.new(@lexer_class, b.inspect, &b).load!
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
34
|
def rules
|
35
35
|
@rules ||= []
|
36
36
|
end
|
@@ -49,6 +49,25 @@ module Rouge
|
|
49
49
|
@rules = rules
|
50
50
|
end
|
51
51
|
|
52
|
+
# Define a new rule for this state.
|
53
|
+
#
|
54
|
+
# @overload rule(re, token, next_state=nil)
|
55
|
+
# @overload rule(re, &callback)
|
56
|
+
#
|
57
|
+
# @param [Regexp] re
|
58
|
+
# a regular expression for this rule to test.
|
59
|
+
# @param [String] tok
|
60
|
+
# the token type to yield if `re` matches.
|
61
|
+
# @param [#to_s] next_state
|
62
|
+
# (optional) a state to push onto the stack if `re` matches.
|
63
|
+
# If `next_state` is `:pop!`, the state stack will be popped
|
64
|
+
# instead.
|
65
|
+
# @param [Proc] callback
|
66
|
+
# a block that will be evaluated in the context of the lexer
|
67
|
+
# if `re` matches. This block has access to a number of lexer
|
68
|
+
# methods, including {RegexLexer#push}, {RegexLexer#pop!},
|
69
|
+
# {RegexLexer#token}, and {RegexLexer#delegate}. The first
|
70
|
+
# argument can be used to access the match groups.
|
52
71
|
def rule(re, tok=nil, next_state=nil, &callback)
|
53
72
|
if block_given?
|
54
73
|
next_state = tok
|
@@ -66,22 +85,29 @@ module Rouge
|
|
66
85
|
end
|
67
86
|
end
|
68
87
|
|
69
|
-
rules << Rule.new(re, callback
|
88
|
+
rules << Rule.new(re, callback)
|
70
89
|
end
|
71
90
|
|
91
|
+
# Mix in the rules from another state into this state. The rules
|
92
|
+
# from the mixed-in state will be tried in order before moving on
|
93
|
+
# to the rest of the rules in this state.
|
72
94
|
def mixin(lexer_name)
|
73
95
|
rules << lexer_name.to_s
|
74
96
|
end
|
75
97
|
end
|
76
98
|
|
99
|
+
# The states hash for this lexer.
|
100
|
+
# @see state
|
77
101
|
def self.states
|
78
102
|
@states ||= {}
|
79
103
|
end
|
80
104
|
|
81
|
-
|
105
|
+
# The routines to run at the beginning of a fresh lex.
|
106
|
+
# @see start
|
82
107
|
def self.start_procs
|
83
108
|
@start_procs ||= InheritableList.new(superclass.start_procs)
|
84
109
|
end
|
110
|
+
@start_procs = []
|
85
111
|
|
86
112
|
# Specify an action to be run every fresh lex.
|
87
113
|
#
|
@@ -91,20 +117,33 @@ module Rouge
|
|
91
117
|
start_procs << b
|
92
118
|
end
|
93
119
|
|
120
|
+
# Specify a filter to be applied as the lexer yields tokens.
|
121
|
+
#
|
122
|
+
# @param toktype
|
123
|
+
# The token type to postprocess
|
124
|
+
# @yield [tok, val]
|
125
|
+
# The token and the matched value. The block will be evaluated in
|
126
|
+
# the context of the lexer, and it must yield an equivalent
|
127
|
+
# token/value pair, usually by calling #token.
|
94
128
|
def self.postprocess(toktype, &b)
|
95
129
|
postprocesses << [Token[toktype], b]
|
96
130
|
end
|
97
131
|
|
98
|
-
|
132
|
+
# where the postprocess blocks are stored.
|
133
|
+
# @see postprocess
|
99
134
|
def self.postprocesses
|
100
135
|
@postprocesses ||= InheritableList.new(superclass.postprocesses)
|
101
136
|
end
|
137
|
+
@postprocesses = []
|
102
138
|
|
139
|
+
# Define a new state for this lexer with the given name.
|
140
|
+
# The block will be evaluated in the context of a {StateDSL}.
|
103
141
|
def self.state(name, &b)
|
104
142
|
name = name.to_s
|
105
|
-
states[name] = State.new(
|
143
|
+
states[name] = State.new(name, &b)
|
106
144
|
end
|
107
145
|
|
146
|
+
# @private
|
108
147
|
def self.get_state(name)
|
109
148
|
return name if name.is_a? State
|
110
149
|
|
@@ -113,41 +152,55 @@ module Rouge
|
|
113
152
|
state.load!
|
114
153
|
end
|
115
154
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
def get_state(name)
|
121
|
-
self.class.get_state(name)
|
155
|
+
# @private
|
156
|
+
def get_state(state_name)
|
157
|
+
self.class.get_state(state_name)
|
122
158
|
end
|
123
159
|
|
160
|
+
# The state stack. This is initially the single state `[:root]`.
|
161
|
+
# It is an error for this stack to be empty.
|
162
|
+
# @see #state
|
124
163
|
def stack
|
125
164
|
@stack ||= [get_state(:root)]
|
126
165
|
end
|
127
166
|
|
167
|
+
# The current state - i.e. one on top of the state stack.
|
168
|
+
#
|
169
|
+
# NB: if the state stack is empty, this will throw an error rather
|
170
|
+
# than returning nil.
|
128
171
|
def state
|
129
172
|
stack.last or raise 'empty stack!'
|
130
173
|
end
|
131
174
|
|
175
|
+
# reset this lexer to its initial state. This runs all of the
|
176
|
+
# start_procs.
|
132
177
|
def reset!
|
133
|
-
@
|
178
|
+
@stack = nil
|
134
179
|
|
135
180
|
self.class.start_procs.each do |pr|
|
136
181
|
instance_eval(&pr)
|
137
182
|
end
|
138
183
|
end
|
139
184
|
|
185
|
+
# This implements the lexer protocol, by yielding [token, value] pairs.
|
186
|
+
#
|
187
|
+
# The process for lexing works as follows, until the stream is empty:
|
188
|
+
#
|
189
|
+
# 1. We look at the state on top of the stack (which by default is
|
190
|
+
# `[:root]`).
|
191
|
+
# 2. Each rule in that state is tried until one is successful. If one
|
192
|
+
# is found, that rule's callback is evaluated - which may yield
|
193
|
+
# tokens and manipulate the state stack. Otherwise, one character
|
194
|
+
# is consumed with an `'Error'` token, and we continue at (1.)
|
195
|
+
#
|
196
|
+
# @see #step #step (where (2.) is implemented)
|
140
197
|
def stream_tokens(stream, &b)
|
141
198
|
stream_without_postprocessing(stream) do |tok, val|
|
142
199
|
_, processor = self.class.postprocesses.find { |t, _| t == tok }
|
143
200
|
|
144
201
|
if processor
|
145
|
-
|
146
|
-
Enumerator.new do |y|
|
147
|
-
@output_stream = y
|
202
|
+
with_output_stream(b) do
|
148
203
|
instance_exec(tok, val, &processor)
|
149
|
-
end.each do |newtok, newval|
|
150
|
-
yield Token[newtok], newval
|
151
204
|
end
|
152
205
|
else
|
153
206
|
yield tok, val
|
@@ -170,6 +223,11 @@ module Rouge
|
|
170
223
|
end
|
171
224
|
end
|
172
225
|
|
226
|
+
# Runs one step of the lex. Rules in the current state are tried
|
227
|
+
# until one matches, at which point its callback is called.
|
228
|
+
#
|
229
|
+
# @return true if a rule was tried successfully
|
230
|
+
# @return false otherwise.
|
173
231
|
def step(state, stream, &b)
|
174
232
|
state.rules.each do |rule|
|
175
233
|
return true if run_rule(rule, stream, &b)
|
@@ -178,6 +236,7 @@ module Rouge
|
|
178
236
|
false
|
179
237
|
end
|
180
238
|
|
239
|
+
# @private
|
181
240
|
def run_rule(rule, stream, &b)
|
182
241
|
case rule
|
183
242
|
when String
|
@@ -190,26 +249,26 @@ module Rouge
|
|
190
249
|
scan(stream, rule.re) do
|
191
250
|
debug { " got #{stream[0].inspect}" }
|
192
251
|
|
193
|
-
run_callback(stream,
|
194
|
-
debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" }
|
195
|
-
b.call(Token[tok], res)
|
196
|
-
end
|
252
|
+
run_callback(stream, rule.callback, &b)
|
197
253
|
end
|
198
254
|
end
|
199
255
|
end
|
200
256
|
|
201
|
-
|
202
|
-
|
203
|
-
|
257
|
+
# @private
|
258
|
+
def run_callback(stream, callback, &output_stream)
|
259
|
+
with_output_stream(output_stream) do
|
204
260
|
@group_count = 0
|
205
261
|
@last_match = stream
|
206
262
|
instance_exec(stream, &callback)
|
207
263
|
@last_match = nil
|
208
|
-
@output_stream = nil
|
209
264
|
end
|
210
265
|
end
|
211
266
|
|
212
|
-
|
267
|
+
# The number of successive scans permitted without consuming
|
268
|
+
# the input stream. If this is exceeded, the match fails.
|
269
|
+
MAX_NULL_SCANS = 5
|
270
|
+
|
271
|
+
# @private
|
213
272
|
def scan(scanner, re, &b)
|
214
273
|
# XXX HACK XXX
|
215
274
|
# StringScanner's implementation of ^ is b0rken.
|
@@ -219,7 +278,7 @@ module Rouge
|
|
219
278
|
|
220
279
|
@null_steps ||= 0
|
221
280
|
|
222
|
-
if @null_steps >=
|
281
|
+
if @null_steps >= MAX_NULL_SCANS
|
223
282
|
debug { " too many scans without consuming the string!" }
|
224
283
|
return false
|
225
284
|
end
|
@@ -240,6 +299,13 @@ module Rouge
|
|
240
299
|
return false
|
241
300
|
end
|
242
301
|
|
302
|
+
# Yield a token.
|
303
|
+
#
|
304
|
+
# @param tok
|
305
|
+
# the token type
|
306
|
+
# @param val
|
307
|
+
# (optional) the string value to yield. If absent, this defaults
|
308
|
+
# to the entire last match.
|
243
309
|
def token(tok, val=:__absent__)
|
244
310
|
val = @last_match[0] if val == :__absent__
|
245
311
|
val ||= ''
|
@@ -249,10 +315,21 @@ module Rouge
|
|
249
315
|
@output_stream << [Token[tok], val] unless val.empty?
|
250
316
|
end
|
251
317
|
|
318
|
+
# Yield a token with the next matched group. Subsequent calls
|
319
|
+
# to this method will yield subsequent groups.
|
252
320
|
def group(tok)
|
253
321
|
token(tok, @last_match[@group_count += 1])
|
254
322
|
end
|
255
323
|
|
324
|
+
# Delegate the lex to another lexer. The #lex method will be called
|
325
|
+
# with `:continue` set to true, so that #reset! will not be called.
|
326
|
+
# In this way, a single lexer can be repeatedly delegated to while
|
327
|
+
# maintaining its own internal state stack.
|
328
|
+
#
|
329
|
+
# @param [#lex] lexer
|
330
|
+
# The lexer or lexer class to delegate to
|
331
|
+
# @param [String] text
|
332
|
+
# The text to delegate. This defaults to the last matched string.
|
256
333
|
def delegate(lexer, text=nil)
|
257
334
|
debug { " delegating to #{lexer.inspect}" }
|
258
335
|
text ||= @last_match[0]
|
@@ -263,18 +340,25 @@ module Rouge
|
|
263
340
|
end
|
264
341
|
end
|
265
342
|
|
343
|
+
# Push a state onto the stack. If no state name is given and you've
|
344
|
+
# passed a block, a state will be dynamically created using the
|
345
|
+
# {StateDSL}.
|
266
346
|
def push(state_name=nil, &b)
|
267
|
-
|
268
|
-
|
269
|
-
|
347
|
+
push_state = if state_name
|
348
|
+
get_state(state_name)
|
349
|
+
elsif block_given?
|
350
|
+
State.new(b.inspect, &b).load!
|
270
351
|
else
|
271
|
-
|
352
|
+
# use the top of the stack by default
|
353
|
+
self.state
|
272
354
|
end
|
273
355
|
|
274
356
|
debug { " pushing #{push_state.name}" }
|
275
357
|
stack.push(push_state)
|
276
358
|
end
|
277
359
|
|
360
|
+
# Pop the state stack. If a number is passed in, it will be popped
|
361
|
+
# that number of times.
|
278
362
|
def pop!(times=1)
|
279
363
|
raise 'empty stack!' if stack.empty?
|
280
364
|
|
@@ -282,19 +366,34 @@ module Rouge
|
|
282
366
|
times.times { stack.pop }
|
283
367
|
end
|
284
368
|
|
369
|
+
# reset the stack back to `[:root]`.
|
285
370
|
def reset_stack
|
286
371
|
debug { ' resetting stack' }
|
287
372
|
stack.clear
|
288
373
|
stack.push get_state(:root)
|
289
374
|
end
|
290
375
|
|
376
|
+
# Check if `state_name` is in the state stack.
|
291
377
|
def in_state?(state_name)
|
292
378
|
stack.map(&:name).include? state_name.to_s
|
293
379
|
end
|
294
380
|
|
381
|
+
# Check if `state_name` is the state on top of the state stack.
|
295
382
|
def state?(state_name)
|
296
383
|
state_name.to_s == state.name
|
297
384
|
end
|
298
385
|
|
386
|
+
private
|
387
|
+
def with_output_stream(output_stream, &b)
|
388
|
+
@output_stream = Yielder.new do |tok, val|
|
389
|
+
debug { " yielding #{tok.to_s.inspect}, #{val.inspect}" }
|
390
|
+
output_stream.call(Token[tok], val)
|
391
|
+
end
|
392
|
+
|
393
|
+
yield
|
394
|
+
|
395
|
+
ensure
|
396
|
+
@output_stream = nil
|
397
|
+
end
|
299
398
|
end
|
300
399
|
end
|