coderay 0.8.357 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. data/lib/README +4 -3
  2. data/lib/coderay.rb +2 -1
  3. data/lib/coderay/encoder.rb +41 -15
  4. data/lib/coderay/encoders/_map.rb +3 -1
  5. data/lib/coderay/encoders/comment_filter.rb +43 -0
  6. data/lib/coderay/encoders/div.rb +2 -3
  7. data/lib/coderay/encoders/filter.rb +75 -0
  8. data/lib/coderay/encoders/html.rb +20 -3
  9. data/lib/coderay/encoders/html/css.rb +1 -1
  10. data/lib/coderay/encoders/html/numerization.rb +11 -2
  11. data/lib/coderay/encoders/html/output.rb +10 -1
  12. data/lib/coderay/encoders/json.rb +69 -0
  13. data/lib/coderay/encoders/lines_of_code.rb +90 -0
  14. data/lib/coderay/encoders/page.rb +1 -2
  15. data/lib/coderay/encoders/span.rb +2 -3
  16. data/lib/coderay/encoders/term.rb +137 -0
  17. data/lib/coderay/encoders/text.rb +4 -4
  18. data/lib/coderay/encoders/token_class_filter.rb +84 -0
  19. data/lib/coderay/encoders/xml.rb +1 -0
  20. data/lib/coderay/for_redcloth.rb +9 -4
  21. data/lib/coderay/helpers/file_type.rb +54 -15
  22. data/lib/coderay/helpers/plugin.rb +21 -3
  23. data/lib/coderay/helpers/word_list.rb +19 -4
  24. data/lib/coderay/scanner.rb +33 -2
  25. data/lib/coderay/scanners/_map.rb +10 -4
  26. data/lib/coderay/scanners/c.rb +61 -23
  27. data/lib/coderay/scanners/cpp.rb +228 -0
  28. data/lib/coderay/scanners/css.rb +9 -1
  29. data/lib/coderay/scanners/debug.rb +1 -0
  30. data/lib/coderay/scanners/delphi.rb +2 -2
  31. data/lib/coderay/scanners/diff.rb +1 -0
  32. data/lib/coderay/scanners/groovy.rb +263 -0
  33. data/lib/coderay/scanners/html.rb +9 -2
  34. data/lib/coderay/scanners/java.rb +18 -14
  35. data/lib/coderay/scanners/java_script.rb +42 -13
  36. data/lib/coderay/scanners/json.rb +7 -1
  37. data/lib/coderay/scanners/nitro_xhtml.rb +4 -0
  38. data/lib/coderay/scanners/php.rb +526 -0
  39. data/lib/coderay/scanners/plaintext.rb +4 -1
  40. data/lib/coderay/scanners/python.rb +285 -0
  41. data/lib/coderay/scanners/rhtml.rb +3 -0
  42. data/lib/coderay/scanners/ruby.rb +29 -11
  43. data/lib/coderay/scanners/ruby/patterns.rb +26 -20
  44. data/lib/coderay/scanners/scheme.rb +3 -0
  45. data/lib/coderay/scanners/sql.rb +162 -0
  46. data/lib/coderay/scanners/xml.rb +1 -1
  47. data/lib/coderay/scanners/yaml.rb +4 -1
  48. data/lib/coderay/styles/cycnus.rb +11 -7
  49. data/lib/coderay/token_classes.rb +4 -1
  50. data/lib/coderay/tokens.rb +50 -46
  51. metadata +14 -4
  52. data/lib/coderay/encoders/tokens.rb +0 -44
@@ -6,9 +6,15 @@ module Scanners
6
6
  include Streamable
7
7
 
8
8
  register_for :json
9
+ file_extension 'json'
10
+
11
+ KINDS_NOT_LOC = [
12
+ :float, :char, :content, :delimiter,
13
+ :error, :integer, :operator, :value,
14
+ ]
9
15
 
10
16
  CONSTANTS = %w( true false null )
11
- IDENT_KIND = WordList.new(:key).add(CONSTANTS, :reserved)
17
+ IDENT_KIND = WordList.new(:key).add(CONSTANTS, :value)
12
18
 
13
19
  ESCAPE = / [bfnrt\\"\/] /x
14
20
  UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x
@@ -10,7 +10,10 @@ module Scanners
10
10
  include Streamable
11
11
  register_for :nitro_xhtml
12
12
  file_extension :xhtml
13
+ title 'Nitro XHTML'
13
14
 
15
+ KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
16
+
14
17
  NITRO_RUBY_BLOCK = /
15
18
  <\?r
16
19
  (?>
@@ -118,6 +121,7 @@ module Scanners
118
121
 
119
122
  else
120
123
  raise_inspect 'else-case reached!', tokens
124
+
121
125
  end
122
126
 
123
127
  end
@@ -0,0 +1,526 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ load :html
5
+
6
+ # Original by Stefan Walk.
7
+ class PHP < Scanner
8
+
9
+ register_for :php
10
+ file_extension 'php'
11
+
12
+ KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
13
+
14
+ def setup
15
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
16
+ end
17
+
18
+ def reset_instance
19
+ super
20
+ @html_scanner.reset
21
+ end
22
+
23
+ module Words
24
+
25
+ # according to http://www.php.net/manual/en/reserved.keywords.php
26
+ KEYWORDS = %w[
27
+ abstract and array as break case catch class clone const continue declare default do else elseif
28
+ enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
29
+ goto if implements interface instanceof namespace new or private protected public static switch
30
+ throw try use var while xor
31
+ cfunction old_function
32
+ ]
33
+
34
+ TYPES = %w[ int integer float double bool boolean string array object resource ]
35
+
36
+ LANGUAGE_CONSTRUCTS = %w[
37
+ die echo empty exit eval include include_once isset list
38
+ require require_once return print unset
39
+ ]
40
+
41
+ CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
42
+
43
+ # according to http://php.net/quickref.php on 2009-04-21;
44
+ # all functions with _ excluded (module functions) and selected additional functions
45
+ BUILTIN_FUNCTIONS = %w[
46
+ abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
47
+ atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
48
+ bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
49
+ calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
50
+ compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
51
+ decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
52
+ ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
53
+ fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
54
+ fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
55
+ fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
56
+ getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
57
+ getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
58
+ gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
59
+ gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
60
+ gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
61
+ implode include intval ip2long iptcembed iptcparse isset
62
+ jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
63
+ juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
64
+ log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
65
+ natsort next ngettext nl2br nthmac octdec opendir openlog
66
+ ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
67
+ prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
68
+ readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
69
+ serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
70
+ snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
71
+ strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
72
+ strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
73
+ strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
74
+ textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
75
+ unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
76
+ array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
77
+ array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
78
+ array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
79
+ array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
80
+ array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
81
+ array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
82
+ array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
83
+ array_walk_recursive
84
+ assert_options base_convert base64_decode base64_encode
85
+ chunk_split class_exists class_implements class_parents
86
+ count_chars debug_backtrace debug_print_backtrace debug_zval_dump
87
+ error_get_last error_log error_reporting extension_loaded
88
+ file_exists file_get_contents file_put_contents load_file
89
+ func_get_arg func_get_args func_num_args function_exists
90
+ get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
91
+ get_current_user get_declared_classes get_declared_interfaces get_defined_constants
92
+ get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
93
+ get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
94
+ get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
95
+ gc_collect_cycles gc_disable gc_enable gc_enabled
96
+ halt_compiler headers_list headers_sent highlight_file highlight_string
97
+ html_entity_decode htmlspecialchars_decode
98
+ in_array include_once inclued_get_data
99
+ is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
100
+ is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
101
+ is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
102
+ is_writable is_writeable
103
+ locale_get_default locale_set_default
104
+ number_format override_function parse_str parse_url
105
+ php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
106
+ php_strip_whitespace php_uname
107
+ preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
108
+ preg_replace_callback preg_split print_r
109
+ require_once register_shutdown_function register_tick_function
110
+ set_error_handler set_exception_handler set_file_buffer set_include_path
111
+ set_magic_quotes_runtime set_time_limit shell_exec
112
+ str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
113
+ strip_tags substr_compare substr_count substr_replace
114
+ time_nanosleep time_sleep_until
115
+ token_get_all token_name trigger_error
116
+ unregister_tick_function use_soap_error_handler user_error
117
+ utf8_decode utf8_encode var_dump var_export
118
+ version_compare
119
+ zend_logo_guid zend_thread_id zend_version
120
+ create_function call_user_func_array
121
+ posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
122
+ posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
123
+ posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
124
+ posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
125
+ posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
126
+ posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
127
+ posix_setuid posix_strerror posix_times posix_ttyname posix_uname
128
+ pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
129
+ pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
130
+ pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
131
+ pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
132
+ ]
133
+ # TODO: more built-in PHP functions?
134
+
135
+ EXCEPTIONS = %w[
136
+ E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
137
+ E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
138
+ ]
139
+
140
+ CONSTANTS = %w[
141
+ null true false self parent
142
+ __LINE__ __DIR__ __FILE__ __LINE__
143
+ __CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
144
+ PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
145
+ PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
146
+ PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
147
+ PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
148
+ PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
149
+ __COMPILER_HALT_OFFSET__
150
+ EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
151
+ EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
152
+ COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
153
+ CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
154
+ M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
155
+ CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
156
+ HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
157
+ INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
158
+ CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
159
+ STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
160
+ LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
161
+ ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
162
+ ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
163
+ MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
164
+ ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
165
+ POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
166
+ N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
167
+ YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
168
+ LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
169
+ LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
170
+ LOG_NDELAY LOG_NOWAIT LOG_PERROR
171
+ ]
172
+
173
+ PREDEFINED = %w[
174
+ $GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
175
+ $_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
176
+ $argc $argv
177
+ ]
178
+
179
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
180
+ add(KEYWORDS, :reserved).
181
+ add(TYPES, :pre_type).
182
+ add(LANGUAGE_CONSTRUCTS, :reserved).
183
+ add(BUILTIN_FUNCTIONS, :predefined).
184
+ add(CLASSES, :pre_constant).
185
+ add(EXCEPTIONS, :exception).
186
+ add(CONSTANTS, :pre_constant)
187
+
188
+ VARIABLE_KIND = WordList.new(:local_variable).
189
+ add(PREDEFINED, :predefined)
190
+ end
191
+
192
+ module RE
193
+
194
+ PHP_START = /
195
+ <script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
196
+ <script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
197
+ <\?php\d? |
198
+ <\?(?!xml)
199
+ /xi
200
+
201
+ PHP_END = %r!
202
+ </script> |
203
+ \?>
204
+ !xi
205
+
206
+ HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
207
+
208
+ IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
209
+ VARIABLE = /\$#{IDENTIFIER}/
210
+
211
+ OPERATOR = /
212
+ \.(?!\d)=? | # dot that is not decimal point, string concatenation
213
+ && | \|\| | # logic
214
+ :: | -> | => | # scope, member, dictionary
215
+ \+\+ | -- | # increment, decrement
216
+ [,;?:()\[\]{}] | # simple delimiters
217
+ [-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
218
+ [~$] | # whatever
219
+ =& | # reference assignment
220
+ [=!]=?=? | <> | # comparison and assignment
221
+ <<=? | >>=? | [<>]=? # comparison and shift
222
+ /x
223
+
224
+ end
225
+
226
+ def scan_tokens tokens, options
227
+
228
+ if check(RE::PHP_START) || # starts with <?
229
+ (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
230
+ exist?(RE::HTML_INDICATOR) ||
231
+ check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
232
+ # is HTML with embedded PHP, so start with HTML
233
+ states = [:initial]
234
+ else
235
+ # is just PHP, so start with PHP surrounded by HTML
236
+ states = [:initial, :php]
237
+ end
238
+
239
+ label_expected = true
240
+ case_expected = false
241
+
242
+ heredoc_delimiter = nil
243
+ delimiter = nil
244
+ modifier = nil
245
+
246
+ until eos?
247
+
248
+ match = nil
249
+ kind = nil
250
+
251
+ case states.last
252
+
253
+ when :initial # HTML
254
+ if scan RE::PHP_START
255
+ kind = :inline_delimiter
256
+ label_expected = true
257
+ states << :php
258
+ else
259
+ match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
260
+ @html_scanner.tokenize match unless match.empty?
261
+ next
262
+ end
263
+
264
+ when :php
265
+ if match = scan(/\s+/)
266
+ tokens << [match, :space]
267
+ next
268
+
269
+ elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
270
+ kind = :comment
271
+
272
+ elsif match = scan(RE::IDENTIFIER)
273
+ kind = Words::IDENT_KIND[match]
274
+ if kind == :ident && label_expected && check(/:(?!:)/)
275
+ kind = :label
276
+ label_expected = true
277
+ else
278
+ label_expected = false
279
+ if kind == :ident && match =~ /^[A-Z]/
280
+ kind = :constant
281
+ elsif kind == :reserved
282
+ case match
283
+ when 'class'
284
+ states << :class_expected
285
+ when 'function'
286
+ states << :function_expected
287
+ when 'case', 'default'
288
+ case_expected = true
289
+ end
290
+ elsif match == 'b' && check(/['"]/) # binary string literal
291
+ modifier = match
292
+ next
293
+ end
294
+ end
295
+
296
+ elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
297
+ label_expected = false
298
+ kind = :float
299
+
300
+ elsif scan(/0x[0-9a-fA-F]+/)
301
+ label_expected = false
302
+ kind = :hex
303
+
304
+ elsif scan(/\d+/)
305
+ label_expected = false
306
+ kind = :integer
307
+
308
+ elsif scan(/'/)
309
+ tokens << [:open, :string]
310
+ if modifier
311
+ tokens << [modifier, :modifier]
312
+ modifier = nil
313
+ end
314
+ kind = :delimiter
315
+ states.push :sqstring
316
+
317
+ elsif match = scan(/["`]/)
318
+ tokens << [:open, :string]
319
+ if modifier
320
+ tokens << [modifier, :modifier]
321
+ modifier = nil
322
+ end
323
+ delimiter = match
324
+ kind = :delimiter
325
+ states.push :dqstring
326
+
327
+ elsif match = scan(RE::VARIABLE)
328
+ label_expected = false
329
+ kind = Words::VARIABLE_KIND[match]
330
+
331
+ elsif scan(/\{/)
332
+ kind = :operator
333
+ label_expected = true
334
+ states.push :php
335
+
336
+ elsif scan(/\}/)
337
+ if states.size == 1
338
+ kind = :error
339
+ else
340
+ states.pop
341
+ if states.last.is_a?(::Array)
342
+ delimiter = states.last[1]
343
+ states[-1] = states.last[0]
344
+ tokens << [matched, :delimiter]
345
+ tokens << [:close, :inline]
346
+ next
347
+ else
348
+ kind = :operator
349
+ label_expected = true
350
+ end
351
+ end
352
+
353
+ elsif scan(/@/)
354
+ label_expected = false
355
+ kind = :exception
356
+
357
+ elsif scan RE::PHP_END
358
+ kind = :inline_delimiter
359
+ states = [:initial]
360
+
361
+ elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
362
+ tokens << [:open, :string]
363
+ warn 'heredoc in heredoc?' if heredoc_delimiter
364
+ heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
365
+ kind = :delimiter
366
+ states.push self[3] ? :sqstring : :dqstring
367
+ heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
368
+
369
+ elsif match = scan(/#{RE::OPERATOR}/o)
370
+ label_expected = match == ';'
371
+ if case_expected
372
+ label_expected = true if match == ':'
373
+ case_expected = false
374
+ end
375
+ kind = :operator
376
+
377
+ else
378
+ getch
379
+ kind = :error
380
+
381
+ end
382
+
383
+ when :sqstring
384
+ if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
385
+ kind = :content
386
+ elsif !heredoc_delimiter && scan(/'/)
387
+ tokens << [matched, :delimiter]
388
+ tokens << [:close, :string]
389
+ delimiter = nil
390
+ label_expected = false
391
+ states.pop
392
+ next
393
+ elsif heredoc_delimiter && match = scan(/\n/)
394
+ kind = :content
395
+ if scan heredoc_delimiter
396
+ tokens << ["\n", :content]
397
+ tokens << [matched, :delimiter]
398
+ tokens << [:close, :string]
399
+ heredoc_delimiter = nil
400
+ label_expected = false
401
+ states.pop
402
+ next
403
+ end
404
+ elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
405
+ kind = :char
406
+ elsif scan(/\\./m)
407
+ kind = :content
408
+ elsif scan(/\\/)
409
+ kind = :error
410
+ end
411
+
412
+ when :dqstring
413
+ if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
414
+ kind = :content
415
+ elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
416
+ tokens << [matched, :delimiter]
417
+ tokens << [:close, :string]
418
+ delimiter = nil
419
+ label_expected = false
420
+ states.pop
421
+ next
422
+ elsif heredoc_delimiter && match = scan(/\n/)
423
+ kind = :content
424
+ if scan heredoc_delimiter
425
+ tokens << ["\n", :content]
426
+ tokens << [matched, :delimiter]
427
+ tokens << [:close, :string]
428
+ heredoc_delimiter = nil
429
+ label_expected = false
430
+ states.pop
431
+ next
432
+ end
433
+ elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
434
+ kind = :char
435
+ elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
436
+ kind = :char
437
+ elsif scan(/\\./m)
438
+ kind = :content
439
+ elsif scan(/\\/)
440
+ kind = :error
441
+ elsif match = scan(/#{RE::VARIABLE}/o)
442
+ kind = :local_variable
443
+ if check(/\[#{RE::IDENTIFIER}\]/o)
444
+ tokens << [:open, :inline]
445
+ tokens << [match, :local_variable]
446
+ tokens << [scan(/\[/), :operator]
447
+ tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
448
+ tokens << [scan(/\]/), :operator]
449
+ tokens << [:close, :inline]
450
+ next
451
+ elsif check(/\[/)
452
+ match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
453
+ kind = :error
454
+ elsif check(/->#{RE::IDENTIFIER}/o)
455
+ tokens << [:open, :inline]
456
+ tokens << [match, :local_variable]
457
+ tokens << [scan(/->/), :operator]
458
+ tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
459
+ tokens << [:close, :inline]
460
+ next
461
+ elsif check(/->/)
462
+ match << scan(/->/)
463
+ kind = :error
464
+ end
465
+ elsif match = scan(/\{/)
466
+ if check(/\$/)
467
+ kind = :delimiter
468
+ states[-1] = [states.last, delimiter]
469
+ delimiter = nil
470
+ states.push :php
471
+ tokens << [:open, :inline]
472
+ else
473
+ kind = :string
474
+ end
475
+ elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
476
+ kind = :local_variable
477
+ elsif scan(/\$/)
478
+ kind = :content
479
+ end
480
+
481
+ when :class_expected
482
+ if scan(/\s+/)
483
+ kind = :space
484
+ elsif match = scan(/#{RE::IDENTIFIER}/o)
485
+ kind = :class
486
+ states.pop
487
+ else
488
+ states.pop
489
+ next
490
+ end
491
+
492
+ when :function_expected
493
+ if scan(/\s+/)
494
+ kind = :space
495
+ elsif scan(/&/)
496
+ kind = :operator
497
+ elsif match = scan(/#{RE::IDENTIFIER}/o)
498
+ kind = :function
499
+ states.pop
500
+ else
501
+ states.pop
502
+ next
503
+ end
504
+
505
+ else
506
+ raise_inspect 'Unknown state!', tokens, states
507
+ end
508
+
509
+ match ||= matched
510
+ if $DEBUG and not kind
511
+ raise_inspect 'Error token %p in line %d' %
512
+ [[match, kind], line], tokens, states
513
+ end
514
+ raise_inspect 'Empty token', tokens, states unless match
515
+
516
+ tokens << [match, kind]
517
+
518
+ end
519
+
520
+ tokens
521
+ end
522
+
523
+ end
524
+
525
+ end
526
+ end