coderay 0.8.357 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/README +4 -3
- data/lib/coderay.rb +2 -1
- data/lib/coderay/encoder.rb +41 -15
- data/lib/coderay/encoders/_map.rb +3 -1
- data/lib/coderay/encoders/comment_filter.rb +43 -0
- data/lib/coderay/encoders/div.rb +2 -3
- data/lib/coderay/encoders/filter.rb +75 -0
- data/lib/coderay/encoders/html.rb +20 -3
- data/lib/coderay/encoders/html/css.rb +1 -1
- data/lib/coderay/encoders/html/numerization.rb +11 -2
- data/lib/coderay/encoders/html/output.rb +10 -1
- data/lib/coderay/encoders/json.rb +69 -0
- data/lib/coderay/encoders/lines_of_code.rb +90 -0
- data/lib/coderay/encoders/page.rb +1 -2
- data/lib/coderay/encoders/span.rb +2 -3
- data/lib/coderay/encoders/term.rb +137 -0
- data/lib/coderay/encoders/text.rb +4 -4
- data/lib/coderay/encoders/token_class_filter.rb +84 -0
- data/lib/coderay/encoders/xml.rb +1 -0
- data/lib/coderay/for_redcloth.rb +9 -4
- data/lib/coderay/helpers/file_type.rb +54 -15
- data/lib/coderay/helpers/plugin.rb +21 -3
- data/lib/coderay/helpers/word_list.rb +19 -4
- data/lib/coderay/scanner.rb +33 -2
- data/lib/coderay/scanners/_map.rb +10 -4
- data/lib/coderay/scanners/c.rb +61 -23
- data/lib/coderay/scanners/cpp.rb +228 -0
- data/lib/coderay/scanners/css.rb +9 -1
- data/lib/coderay/scanners/debug.rb +1 -0
- data/lib/coderay/scanners/delphi.rb +2 -2
- data/lib/coderay/scanners/diff.rb +1 -0
- data/lib/coderay/scanners/groovy.rb +263 -0
- data/lib/coderay/scanners/html.rb +9 -2
- data/lib/coderay/scanners/java.rb +18 -14
- data/lib/coderay/scanners/java_script.rb +42 -13
- data/lib/coderay/scanners/json.rb +7 -1
- data/lib/coderay/scanners/nitro_xhtml.rb +4 -0
- data/lib/coderay/scanners/php.rb +526 -0
- data/lib/coderay/scanners/plaintext.rb +4 -1
- data/lib/coderay/scanners/python.rb +285 -0
- data/lib/coderay/scanners/rhtml.rb +3 -0
- data/lib/coderay/scanners/ruby.rb +29 -11
- data/lib/coderay/scanners/ruby/patterns.rb +26 -20
- data/lib/coderay/scanners/scheme.rb +3 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +4 -1
- data/lib/coderay/styles/cycnus.rb +11 -7
- data/lib/coderay/token_classes.rb +4 -1
- data/lib/coderay/tokens.rb +50 -46
- metadata +14 -4
- data/lib/coderay/encoders/tokens.rb +0 -44
@@ -6,9 +6,15 @@ module Scanners
|
|
6
6
|
include Streamable
|
7
7
|
|
8
8
|
register_for :json
|
9
|
+
file_extension 'json'
|
10
|
+
|
11
|
+
KINDS_NOT_LOC = [
|
12
|
+
:float, :char, :content, :delimiter,
|
13
|
+
:error, :integer, :operator, :value,
|
14
|
+
]
|
9
15
|
|
10
16
|
CONSTANTS = %w( true false null )
|
11
|
-
IDENT_KIND = WordList.new(:key).add(CONSTANTS, :
|
17
|
+
IDENT_KIND = WordList.new(:key).add(CONSTANTS, :value)
|
12
18
|
|
13
19
|
ESCAPE = / [bfnrt\\"\/] /x
|
14
20
|
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x
|
@@ -10,7 +10,10 @@ module Scanners
|
|
10
10
|
include Streamable
|
11
11
|
register_for :nitro_xhtml
|
12
12
|
file_extension :xhtml
|
13
|
+
title 'Nitro XHTML'
|
13
14
|
|
15
|
+
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
16
|
+
|
14
17
|
NITRO_RUBY_BLOCK = /
|
15
18
|
<\?r
|
16
19
|
(?>
|
@@ -118,6 +121,7 @@ module Scanners
|
|
118
121
|
|
119
122
|
else
|
120
123
|
raise_inspect 'else-case reached!', tokens
|
124
|
+
|
121
125
|
end
|
122
126
|
|
123
127
|
end
|
@@ -0,0 +1,526 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
load :html
|
5
|
+
|
6
|
+
# Original by Stefan Walk.
|
7
|
+
class PHP < Scanner
|
8
|
+
|
9
|
+
register_for :php
|
10
|
+
file_extension 'php'
|
11
|
+
|
12
|
+
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
13
|
+
|
14
|
+
def setup
|
15
|
+
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
|
16
|
+
end
|
17
|
+
|
18
|
+
def reset_instance
|
19
|
+
super
|
20
|
+
@html_scanner.reset
|
21
|
+
end
|
22
|
+
|
23
|
+
module Words
|
24
|
+
|
25
|
+
# according to http://www.php.net/manual/en/reserved.keywords.php
|
26
|
+
KEYWORDS = %w[
|
27
|
+
abstract and array as break case catch class clone const continue declare default do else elseif
|
28
|
+
enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
|
29
|
+
goto if implements interface instanceof namespace new or private protected public static switch
|
30
|
+
throw try use var while xor
|
31
|
+
cfunction old_function
|
32
|
+
]
|
33
|
+
|
34
|
+
TYPES = %w[ int integer float double bool boolean string array object resource ]
|
35
|
+
|
36
|
+
LANGUAGE_CONSTRUCTS = %w[
|
37
|
+
die echo empty exit eval include include_once isset list
|
38
|
+
require require_once return print unset
|
39
|
+
]
|
40
|
+
|
41
|
+
CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
|
42
|
+
|
43
|
+
# according to http://php.net/quickref.php on 2009-04-21;
|
44
|
+
# all functions with _ excluded (module functions) and selected additional functions
|
45
|
+
BUILTIN_FUNCTIONS = %w[
|
46
|
+
abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
|
47
|
+
atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
|
48
|
+
bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
|
49
|
+
calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
|
50
|
+
compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
|
51
|
+
decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
|
52
|
+
ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
|
53
|
+
fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
|
54
|
+
fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
|
55
|
+
fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
|
56
|
+
getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
|
57
|
+
getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
|
58
|
+
gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
|
59
|
+
gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
|
60
|
+
gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
|
61
|
+
implode include intval ip2long iptcembed iptcparse isset
|
62
|
+
jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
|
63
|
+
juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
|
64
|
+
log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
|
65
|
+
natsort next ngettext nl2br nthmac octdec opendir openlog
|
66
|
+
ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
|
67
|
+
prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
|
68
|
+
readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
|
69
|
+
serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
|
70
|
+
snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
|
71
|
+
strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
|
72
|
+
strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
|
73
|
+
strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
|
74
|
+
textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
|
75
|
+
unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
|
76
|
+
array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
|
77
|
+
array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
|
78
|
+
array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
|
79
|
+
array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
|
80
|
+
array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
|
81
|
+
array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
|
82
|
+
array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
|
83
|
+
array_walk_recursive
|
84
|
+
assert_options base_convert base64_decode base64_encode
|
85
|
+
chunk_split class_exists class_implements class_parents
|
86
|
+
count_chars debug_backtrace debug_print_backtrace debug_zval_dump
|
87
|
+
error_get_last error_log error_reporting extension_loaded
|
88
|
+
file_exists file_get_contents file_put_contents load_file
|
89
|
+
func_get_arg func_get_args func_num_args function_exists
|
90
|
+
get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
|
91
|
+
get_current_user get_declared_classes get_declared_interfaces get_defined_constants
|
92
|
+
get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
|
93
|
+
get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
|
94
|
+
get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
|
95
|
+
gc_collect_cycles gc_disable gc_enable gc_enabled
|
96
|
+
halt_compiler headers_list headers_sent highlight_file highlight_string
|
97
|
+
html_entity_decode htmlspecialchars_decode
|
98
|
+
in_array include_once inclued_get_data
|
99
|
+
is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
|
100
|
+
is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
|
101
|
+
is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
|
102
|
+
is_writable is_writeable
|
103
|
+
locale_get_default locale_set_default
|
104
|
+
number_format override_function parse_str parse_url
|
105
|
+
php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
|
106
|
+
php_strip_whitespace php_uname
|
107
|
+
preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
|
108
|
+
preg_replace_callback preg_split print_r
|
109
|
+
require_once register_shutdown_function register_tick_function
|
110
|
+
set_error_handler set_exception_handler set_file_buffer set_include_path
|
111
|
+
set_magic_quotes_runtime set_time_limit shell_exec
|
112
|
+
str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
|
113
|
+
strip_tags substr_compare substr_count substr_replace
|
114
|
+
time_nanosleep time_sleep_until
|
115
|
+
token_get_all token_name trigger_error
|
116
|
+
unregister_tick_function use_soap_error_handler user_error
|
117
|
+
utf8_decode utf8_encode var_dump var_export
|
118
|
+
version_compare
|
119
|
+
zend_logo_guid zend_thread_id zend_version
|
120
|
+
create_function call_user_func_array
|
121
|
+
posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
|
122
|
+
posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
|
123
|
+
posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
|
124
|
+
posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
|
125
|
+
posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
|
126
|
+
posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
|
127
|
+
posix_setuid posix_strerror posix_times posix_ttyname posix_uname
|
128
|
+
pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
|
129
|
+
pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
|
130
|
+
pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
|
131
|
+
pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
|
132
|
+
]
|
133
|
+
# TODO: more built-in PHP functions?
|
134
|
+
|
135
|
+
EXCEPTIONS = %w[
|
136
|
+
E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
|
137
|
+
E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
|
138
|
+
]
|
139
|
+
|
140
|
+
CONSTANTS = %w[
|
141
|
+
null true false self parent
|
142
|
+
__LINE__ __DIR__ __FILE__ __LINE__
|
143
|
+
__CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
|
144
|
+
PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
|
145
|
+
PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
|
146
|
+
PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
|
147
|
+
PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
|
148
|
+
PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
|
149
|
+
__COMPILER_HALT_OFFSET__
|
150
|
+
EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
|
151
|
+
EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
|
152
|
+
COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
|
153
|
+
CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
|
154
|
+
M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
|
155
|
+
CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
|
156
|
+
HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
|
157
|
+
INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
|
158
|
+
CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
|
159
|
+
STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
|
160
|
+
LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
|
161
|
+
ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
|
162
|
+
ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
|
163
|
+
MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
|
164
|
+
ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
|
165
|
+
POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
|
166
|
+
N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
|
167
|
+
YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
|
168
|
+
LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
|
169
|
+
LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
|
170
|
+
LOG_NDELAY LOG_NOWAIT LOG_PERROR
|
171
|
+
]
|
172
|
+
|
173
|
+
PREDEFINED = %w[
|
174
|
+
$GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
|
175
|
+
$_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
|
176
|
+
$argc $argv
|
177
|
+
]
|
178
|
+
|
179
|
+
IDENT_KIND = CaseIgnoringWordList.new(:ident).
|
180
|
+
add(KEYWORDS, :reserved).
|
181
|
+
add(TYPES, :pre_type).
|
182
|
+
add(LANGUAGE_CONSTRUCTS, :reserved).
|
183
|
+
add(BUILTIN_FUNCTIONS, :predefined).
|
184
|
+
add(CLASSES, :pre_constant).
|
185
|
+
add(EXCEPTIONS, :exception).
|
186
|
+
add(CONSTANTS, :pre_constant)
|
187
|
+
|
188
|
+
VARIABLE_KIND = WordList.new(:local_variable).
|
189
|
+
add(PREDEFINED, :predefined)
|
190
|
+
end
|
191
|
+
|
192
|
+
module RE
|
193
|
+
|
194
|
+
PHP_START = /
|
195
|
+
<script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
|
196
|
+
<script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
|
197
|
+
<\?php\d? |
|
198
|
+
<\?(?!xml)
|
199
|
+
/xi
|
200
|
+
|
201
|
+
PHP_END = %r!
|
202
|
+
</script> |
|
203
|
+
\?>
|
204
|
+
!xi
|
205
|
+
|
206
|
+
HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
|
207
|
+
|
208
|
+
IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
|
209
|
+
VARIABLE = /\$#{IDENTIFIER}/
|
210
|
+
|
211
|
+
OPERATOR = /
|
212
|
+
\.(?!\d)=? | # dot that is not decimal point, string concatenation
|
213
|
+
&& | \|\| | # logic
|
214
|
+
:: | -> | => | # scope, member, dictionary
|
215
|
+
\+\+ | -- | # increment, decrement
|
216
|
+
[,;?:()\[\]{}] | # simple delimiters
|
217
|
+
[-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
|
218
|
+
[~$] | # whatever
|
219
|
+
=& | # reference assignment
|
220
|
+
[=!]=?=? | <> | # comparison and assignment
|
221
|
+
<<=? | >>=? | [<>]=? # comparison and shift
|
222
|
+
/x
|
223
|
+
|
224
|
+
end
|
225
|
+
|
226
|
+
def scan_tokens tokens, options
|
227
|
+
|
228
|
+
if check(RE::PHP_START) || # starts with <?
|
229
|
+
(match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
|
230
|
+
exist?(RE::HTML_INDICATOR) ||
|
231
|
+
check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
|
232
|
+
# is HTML with embedded PHP, so start with HTML
|
233
|
+
states = [:initial]
|
234
|
+
else
|
235
|
+
# is just PHP, so start with PHP surrounded by HTML
|
236
|
+
states = [:initial, :php]
|
237
|
+
end
|
238
|
+
|
239
|
+
label_expected = true
|
240
|
+
case_expected = false
|
241
|
+
|
242
|
+
heredoc_delimiter = nil
|
243
|
+
delimiter = nil
|
244
|
+
modifier = nil
|
245
|
+
|
246
|
+
until eos?
|
247
|
+
|
248
|
+
match = nil
|
249
|
+
kind = nil
|
250
|
+
|
251
|
+
case states.last
|
252
|
+
|
253
|
+
when :initial # HTML
|
254
|
+
if scan RE::PHP_START
|
255
|
+
kind = :inline_delimiter
|
256
|
+
label_expected = true
|
257
|
+
states << :php
|
258
|
+
else
|
259
|
+
match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
|
260
|
+
@html_scanner.tokenize match unless match.empty?
|
261
|
+
next
|
262
|
+
end
|
263
|
+
|
264
|
+
when :php
|
265
|
+
if match = scan(/\s+/)
|
266
|
+
tokens << [match, :space]
|
267
|
+
next
|
268
|
+
|
269
|
+
elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
|
270
|
+
kind = :comment
|
271
|
+
|
272
|
+
elsif match = scan(RE::IDENTIFIER)
|
273
|
+
kind = Words::IDENT_KIND[match]
|
274
|
+
if kind == :ident && label_expected && check(/:(?!:)/)
|
275
|
+
kind = :label
|
276
|
+
label_expected = true
|
277
|
+
else
|
278
|
+
label_expected = false
|
279
|
+
if kind == :ident && match =~ /^[A-Z]/
|
280
|
+
kind = :constant
|
281
|
+
elsif kind == :reserved
|
282
|
+
case match
|
283
|
+
when 'class'
|
284
|
+
states << :class_expected
|
285
|
+
when 'function'
|
286
|
+
states << :function_expected
|
287
|
+
when 'case', 'default'
|
288
|
+
case_expected = true
|
289
|
+
end
|
290
|
+
elsif match == 'b' && check(/['"]/) # binary string literal
|
291
|
+
modifier = match
|
292
|
+
next
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
|
297
|
+
label_expected = false
|
298
|
+
kind = :float
|
299
|
+
|
300
|
+
elsif scan(/0x[0-9a-fA-F]+/)
|
301
|
+
label_expected = false
|
302
|
+
kind = :hex
|
303
|
+
|
304
|
+
elsif scan(/\d+/)
|
305
|
+
label_expected = false
|
306
|
+
kind = :integer
|
307
|
+
|
308
|
+
elsif scan(/'/)
|
309
|
+
tokens << [:open, :string]
|
310
|
+
if modifier
|
311
|
+
tokens << [modifier, :modifier]
|
312
|
+
modifier = nil
|
313
|
+
end
|
314
|
+
kind = :delimiter
|
315
|
+
states.push :sqstring
|
316
|
+
|
317
|
+
elsif match = scan(/["`]/)
|
318
|
+
tokens << [:open, :string]
|
319
|
+
if modifier
|
320
|
+
tokens << [modifier, :modifier]
|
321
|
+
modifier = nil
|
322
|
+
end
|
323
|
+
delimiter = match
|
324
|
+
kind = :delimiter
|
325
|
+
states.push :dqstring
|
326
|
+
|
327
|
+
elsif match = scan(RE::VARIABLE)
|
328
|
+
label_expected = false
|
329
|
+
kind = Words::VARIABLE_KIND[match]
|
330
|
+
|
331
|
+
elsif scan(/\{/)
|
332
|
+
kind = :operator
|
333
|
+
label_expected = true
|
334
|
+
states.push :php
|
335
|
+
|
336
|
+
elsif scan(/\}/)
|
337
|
+
if states.size == 1
|
338
|
+
kind = :error
|
339
|
+
else
|
340
|
+
states.pop
|
341
|
+
if states.last.is_a?(::Array)
|
342
|
+
delimiter = states.last[1]
|
343
|
+
states[-1] = states.last[0]
|
344
|
+
tokens << [matched, :delimiter]
|
345
|
+
tokens << [:close, :inline]
|
346
|
+
next
|
347
|
+
else
|
348
|
+
kind = :operator
|
349
|
+
label_expected = true
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
elsif scan(/@/)
|
354
|
+
label_expected = false
|
355
|
+
kind = :exception
|
356
|
+
|
357
|
+
elsif scan RE::PHP_END
|
358
|
+
kind = :inline_delimiter
|
359
|
+
states = [:initial]
|
360
|
+
|
361
|
+
elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
|
362
|
+
tokens << [:open, :string]
|
363
|
+
warn 'heredoc in heredoc?' if heredoc_delimiter
|
364
|
+
heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
|
365
|
+
kind = :delimiter
|
366
|
+
states.push self[3] ? :sqstring : :dqstring
|
367
|
+
heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
|
368
|
+
|
369
|
+
elsif match = scan(/#{RE::OPERATOR}/o)
|
370
|
+
label_expected = match == ';'
|
371
|
+
if case_expected
|
372
|
+
label_expected = true if match == ':'
|
373
|
+
case_expected = false
|
374
|
+
end
|
375
|
+
kind = :operator
|
376
|
+
|
377
|
+
else
|
378
|
+
getch
|
379
|
+
kind = :error
|
380
|
+
|
381
|
+
end
|
382
|
+
|
383
|
+
when :sqstring
|
384
|
+
if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
|
385
|
+
kind = :content
|
386
|
+
elsif !heredoc_delimiter && scan(/'/)
|
387
|
+
tokens << [matched, :delimiter]
|
388
|
+
tokens << [:close, :string]
|
389
|
+
delimiter = nil
|
390
|
+
label_expected = false
|
391
|
+
states.pop
|
392
|
+
next
|
393
|
+
elsif heredoc_delimiter && match = scan(/\n/)
|
394
|
+
kind = :content
|
395
|
+
if scan heredoc_delimiter
|
396
|
+
tokens << ["\n", :content]
|
397
|
+
tokens << [matched, :delimiter]
|
398
|
+
tokens << [:close, :string]
|
399
|
+
heredoc_delimiter = nil
|
400
|
+
label_expected = false
|
401
|
+
states.pop
|
402
|
+
next
|
403
|
+
end
|
404
|
+
elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
|
405
|
+
kind = :char
|
406
|
+
elsif scan(/\\./m)
|
407
|
+
kind = :content
|
408
|
+
elsif scan(/\\/)
|
409
|
+
kind = :error
|
410
|
+
end
|
411
|
+
|
412
|
+
when :dqstring
|
413
|
+
if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
|
414
|
+
kind = :content
|
415
|
+
elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
|
416
|
+
tokens << [matched, :delimiter]
|
417
|
+
tokens << [:close, :string]
|
418
|
+
delimiter = nil
|
419
|
+
label_expected = false
|
420
|
+
states.pop
|
421
|
+
next
|
422
|
+
elsif heredoc_delimiter && match = scan(/\n/)
|
423
|
+
kind = :content
|
424
|
+
if scan heredoc_delimiter
|
425
|
+
tokens << ["\n", :content]
|
426
|
+
tokens << [matched, :delimiter]
|
427
|
+
tokens << [:close, :string]
|
428
|
+
heredoc_delimiter = nil
|
429
|
+
label_expected = false
|
430
|
+
states.pop
|
431
|
+
next
|
432
|
+
end
|
433
|
+
elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
|
434
|
+
kind = :char
|
435
|
+
elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
|
436
|
+
kind = :char
|
437
|
+
elsif scan(/\\./m)
|
438
|
+
kind = :content
|
439
|
+
elsif scan(/\\/)
|
440
|
+
kind = :error
|
441
|
+
elsif match = scan(/#{RE::VARIABLE}/o)
|
442
|
+
kind = :local_variable
|
443
|
+
if check(/\[#{RE::IDENTIFIER}\]/o)
|
444
|
+
tokens << [:open, :inline]
|
445
|
+
tokens << [match, :local_variable]
|
446
|
+
tokens << [scan(/\[/), :operator]
|
447
|
+
tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
|
448
|
+
tokens << [scan(/\]/), :operator]
|
449
|
+
tokens << [:close, :inline]
|
450
|
+
next
|
451
|
+
elsif check(/\[/)
|
452
|
+
match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
|
453
|
+
kind = :error
|
454
|
+
elsif check(/->#{RE::IDENTIFIER}/o)
|
455
|
+
tokens << [:open, :inline]
|
456
|
+
tokens << [match, :local_variable]
|
457
|
+
tokens << [scan(/->/), :operator]
|
458
|
+
tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
|
459
|
+
tokens << [:close, :inline]
|
460
|
+
next
|
461
|
+
elsif check(/->/)
|
462
|
+
match << scan(/->/)
|
463
|
+
kind = :error
|
464
|
+
end
|
465
|
+
elsif match = scan(/\{/)
|
466
|
+
if check(/\$/)
|
467
|
+
kind = :delimiter
|
468
|
+
states[-1] = [states.last, delimiter]
|
469
|
+
delimiter = nil
|
470
|
+
states.push :php
|
471
|
+
tokens << [:open, :inline]
|
472
|
+
else
|
473
|
+
kind = :string
|
474
|
+
end
|
475
|
+
elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
|
476
|
+
kind = :local_variable
|
477
|
+
elsif scan(/\$/)
|
478
|
+
kind = :content
|
479
|
+
end
|
480
|
+
|
481
|
+
when :class_expected
|
482
|
+
if scan(/\s+/)
|
483
|
+
kind = :space
|
484
|
+
elsif match = scan(/#{RE::IDENTIFIER}/o)
|
485
|
+
kind = :class
|
486
|
+
states.pop
|
487
|
+
else
|
488
|
+
states.pop
|
489
|
+
next
|
490
|
+
end
|
491
|
+
|
492
|
+
when :function_expected
|
493
|
+
if scan(/\s+/)
|
494
|
+
kind = :space
|
495
|
+
elsif scan(/&/)
|
496
|
+
kind = :operator
|
497
|
+
elsif match = scan(/#{RE::IDENTIFIER}/o)
|
498
|
+
kind = :function
|
499
|
+
states.pop
|
500
|
+
else
|
501
|
+
states.pop
|
502
|
+
next
|
503
|
+
end
|
504
|
+
|
505
|
+
else
|
506
|
+
raise_inspect 'Unknown state!', tokens, states
|
507
|
+
end
|
508
|
+
|
509
|
+
match ||= matched
|
510
|
+
if $DEBUG and not kind
|
511
|
+
raise_inspect 'Error token %p in line %d' %
|
512
|
+
[[match, kind], line], tokens, states
|
513
|
+
end
|
514
|
+
raise_inspect 'Empty token', tokens, states unless match
|
515
|
+
|
516
|
+
tokens << [match, kind]
|
517
|
+
|
518
|
+
end
|
519
|
+
|
520
|
+
tokens
|
521
|
+
end
|
522
|
+
|
523
|
+
end
|
524
|
+
|
525
|
+
end
|
526
|
+
end
|