coderay-beta 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. data/FOLDERS +53 -0
  2. data/LICENSE +504 -0
  3. data/bin/coderay +82 -0
  4. data/bin/coderay_stylesheet +4 -0
  5. data/lib/README +129 -0
  6. data/lib/coderay.rb +320 -0
  7. data/lib/coderay/duo.rb +85 -0
  8. data/lib/coderay/encoder.rb +213 -0
  9. data/lib/coderay/encoders/_map.rb +11 -0
  10. data/lib/coderay/encoders/comment_filter.rb +43 -0
  11. data/lib/coderay/encoders/count.rb +21 -0
  12. data/lib/coderay/encoders/debug.rb +49 -0
  13. data/lib/coderay/encoders/div.rb +19 -0
  14. data/lib/coderay/encoders/filter.rb +75 -0
  15. data/lib/coderay/encoders/html.rb +305 -0
  16. data/lib/coderay/encoders/html/css.rb +70 -0
  17. data/lib/coderay/encoders/html/numerization.rb +133 -0
  18. data/lib/coderay/encoders/html/output.rb +206 -0
  19. data/lib/coderay/encoders/json.rb +69 -0
  20. data/lib/coderay/encoders/lines_of_code.rb +90 -0
  21. data/lib/coderay/encoders/null.rb +26 -0
  22. data/lib/coderay/encoders/page.rb +20 -0
  23. data/lib/coderay/encoders/span.rb +19 -0
  24. data/lib/coderay/encoders/statistic.rb +77 -0
  25. data/lib/coderay/encoders/term.rb +137 -0
  26. data/lib/coderay/encoders/text.rb +32 -0
  27. data/lib/coderay/encoders/token_class_filter.rb +84 -0
  28. data/lib/coderay/encoders/xml.rb +71 -0
  29. data/lib/coderay/encoders/yaml.rb +22 -0
  30. data/lib/coderay/for_redcloth.rb +85 -0
  31. data/lib/coderay/helpers/file_type.rb +240 -0
  32. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  33. data/lib/coderay/helpers/plugin.rb +349 -0
  34. data/lib/coderay/helpers/word_list.rb +138 -0
  35. data/lib/coderay/scanner.rb +284 -0
  36. data/lib/coderay/scanners/_map.rb +23 -0
  37. data/lib/coderay/scanners/c.rb +203 -0
  38. data/lib/coderay/scanners/cpp.rb +228 -0
  39. data/lib/coderay/scanners/css.rb +210 -0
  40. data/lib/coderay/scanners/debug.rb +62 -0
  41. data/lib/coderay/scanners/delphi.rb +150 -0
  42. data/lib/coderay/scanners/diff.rb +105 -0
  43. data/lib/coderay/scanners/groovy.rb +263 -0
  44. data/lib/coderay/scanners/html.rb +182 -0
  45. data/lib/coderay/scanners/java.rb +176 -0
  46. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  47. data/lib/coderay/scanners/java_script.rb +224 -0
  48. data/lib/coderay/scanners/json.rb +112 -0
  49. data/lib/coderay/scanners/nitro_xhtml.rb +136 -0
  50. data/lib/coderay/scanners/php.rb +526 -0
  51. data/lib/coderay/scanners/plaintext.rb +21 -0
  52. data/lib/coderay/scanners/python.rb +285 -0
  53. data/lib/coderay/scanners/rhtml.rb +74 -0
  54. data/lib/coderay/scanners/ruby.rb +404 -0
  55. data/lib/coderay/scanners/ruby/patterns.rb +238 -0
  56. data/lib/coderay/scanners/scheme.rb +145 -0
  57. data/lib/coderay/scanners/sql.rb +162 -0
  58. data/lib/coderay/scanners/xml.rb +17 -0
  59. data/lib/coderay/scanners/yaml.rb +144 -0
  60. data/lib/coderay/style.rb +20 -0
  61. data/lib/coderay/styles/_map.rb +7 -0
  62. data/lib/coderay/styles/cycnus.rb +151 -0
  63. data/lib/coderay/styles/murphy.rb +132 -0
  64. data/lib/coderay/token_classes.rb +86 -0
  65. data/lib/coderay/tokens.rb +391 -0
  66. data/lib/term/ansicolor.rb +220 -0
  67. metadata +123 -0
@@ -0,0 +1,526 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ load :html
5
+
6
+ # Original by Stefan Walk.
7
+ class PHP < Scanner
8
+
9
+ register_for :php
10
+ file_extension 'php'
11
+
12
+ KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
13
+
14
+ def setup
15
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
16
+ end
17
+
18
+ def reset_instance
19
+ super
20
+ @html_scanner.reset
21
+ end
22
+
23
+ module Words
24
+
25
+ # according to http://www.php.net/manual/en/reserved.keywords.php
26
+ KEYWORDS = %w[
27
+ abstract and array as break case catch class clone const continue declare default do else elseif
28
+ enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
29
+ goto if implements interface instanceof namespace new or private protected public static switch
30
+ throw try use var while xor
31
+ cfunction old_function
32
+ ]
33
+
34
+ TYPES = %w[ int integer float double bool boolean string array object resource ]
35
+
36
+ LANGUAGE_CONSTRUCTS = %w[
37
+ die echo empty exit eval include include_once isset list
38
+ require require_once return print unset
39
+ ]
40
+
41
+ CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
42
+
43
+ # according to http://php.net/quickref.php on 2009-04-21;
44
+ # all functions with _ excluded (module functions) and selected additional functions
45
+ BUILTIN_FUNCTIONS = %w[
46
+ abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
47
+ atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
48
+ bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
49
+ calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
50
+ compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
51
+ decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
52
+ ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
53
+ fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
54
+ fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
55
+ fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
56
+ getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
57
+ getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
58
+ gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
59
+ gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
60
+ gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
61
+ implode include intval ip2long iptcembed iptcparse isset
62
+ jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
63
+ juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
64
+ log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
65
+ natsort next ngettext nl2br nthmac octdec opendir openlog
66
+ ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
67
+ prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
68
+ readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
69
+ serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
70
+ snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
71
+ strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
72
+ strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
73
+ strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
74
+ textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
75
+ unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
76
+ array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
77
+ array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
78
+ array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
79
+ array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
80
+ array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
81
+ array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
82
+ array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
83
+ array_walk_recursive
84
+ assert_options base_convert base64_decode base64_encode
85
+ chunk_split class_exists class_implements class_parents
86
+ count_chars debug_backtrace debug_print_backtrace debug_zval_dump
87
+ error_get_last error_log error_reporting extension_loaded
88
+ file_exists file_get_contents file_put_contents load_file
89
+ func_get_arg func_get_args func_num_args function_exists
90
+ get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
91
+ get_current_user get_declared_classes get_declared_interfaces get_defined_constants
92
+ get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
93
+ get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
94
+ get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
95
+ gc_collect_cycles gc_disable gc_enable gc_enabled
96
+ halt_compiler headers_list headers_sent highlight_file highlight_string
97
+ html_entity_decode htmlspecialchars_decode
98
+ in_array include_once inclued_get_data
99
+ is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
100
+ is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
101
+ is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
102
+ is_writable is_writeable
103
+ locale_get_default locale_set_default
104
+ number_format override_function parse_str parse_url
105
+ php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
106
+ php_strip_whitespace php_uname
107
+ preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
108
+ preg_replace_callback preg_split print_r
109
+ require_once register_shutdown_function register_tick_function
110
+ set_error_handler set_exception_handler set_file_buffer set_include_path
111
+ set_magic_quotes_runtime set_time_limit shell_exec
112
+ str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
113
+ strip_tags substr_compare substr_count substr_replace
114
+ time_nanosleep time_sleep_until
115
+ token_get_all token_name trigger_error
116
+ unregister_tick_function use_soap_error_handler user_error
117
+ utf8_decode utf8_encode var_dump var_export
118
+ version_compare
119
+ zend_logo_guid zend_thread_id zend_version
120
+ create_function call_user_func_array
121
+ posix_access posix_ctermid posix_get_last_error posix_getcwd posix_getegid
122
+ posix_geteuid posix_getgid posix_getgrgid posix_getgrnam posix_getgroups
123
+ posix_getlogin posix_getpgid posix_getpgrp posix_getpid posix_getppid
124
+ posix_getpwnam posix_getpwuid posix_getrlimit posix_getsid posix_getuid
125
+ posix_initgroups posix_isatty posix_kill posix_mkfifo posix_mknod
126
+ posix_setegid posix_seteuid posix_setgid posix_setpgid posix_setsid
127
+ posix_setuid posix_strerror posix_times posix_ttyname posix_uname
128
+ pcntl_alarm pcntl_exec pcntl_fork pcntl_getpriority pcntl_setpriority
129
+ pcntl_signal pcntl_signal_dispatch pcntl_sigprocmask pcntl_sigtimedwait
130
+ pcntl_sigwaitinfo pcntl_wait pcntl_waitpid pcntl_wexitstatus pcntl_wifexited
131
+ pcntl_wifsignaled pcntl_wifstopped pcntl_wstopsig pcntl_wtermsig
132
+ ]
133
+ # TODO: more built-in PHP functions?
134
+
135
+ EXCEPTIONS = %w[
136
+ E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
137
+ E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
138
+ ]
139
+
140
+ CONSTANTS = %w[
141
+ null true false self parent
142
+ __LINE__ __DIR__ __FILE__ __LINE__
143
+ __CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
144
+ PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
145
+ PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
146
+ PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
147
+ PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
148
+ PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
149
+ __COMPILER_HALT_OFFSET__
150
+ EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
151
+ EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
152
+ COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
153
+ CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
154
+ M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
155
+ CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
156
+ HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
157
+ INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
158
+ CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
159
+ STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
160
+ LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
161
+ ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
162
+ ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
163
+ MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
164
+ ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
165
+ POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
166
+ N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
167
+ YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
168
+ LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
169
+ LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
170
+ LOG_NDELAY LOG_NOWAIT LOG_PERROR
171
+ ]
172
+
173
+ PREDEFINED = %w[
174
+ $GLOBALS $_SERVER $_GET $_POST $_FILES $_REQUEST $_SESSION $_ENV
175
+ $_COOKIE $php_errormsg $HTTP_RAW_POST_DATA $http_response_header
176
+ $argc $argv
177
+ ]
178
+
179
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
180
+ add(KEYWORDS, :reserved).
181
+ add(TYPES, :pre_type).
182
+ add(LANGUAGE_CONSTRUCTS, :reserved).
183
+ add(BUILTIN_FUNCTIONS, :predefined).
184
+ add(CLASSES, :pre_constant).
185
+ add(EXCEPTIONS, :exception).
186
+ add(CONSTANTS, :pre_constant)
187
+
188
+ VARIABLE_KIND = WordList.new(:local_variable).
189
+ add(PREDEFINED, :predefined)
190
+ end
191
+
192
+ module RE
193
+
194
+ PHP_START = /
195
+ <script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
196
+ <script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
197
+ <\?php\d? |
198
+ <\?(?!xml)
199
+ /xi
200
+
201
+ PHP_END = %r!
202
+ </script> |
203
+ \?>
204
+ !xi
205
+
206
+ HTML_INDICATOR = /<!DOCTYPE html|<(?:html|body|div|p)[> ]/i
207
+
208
+ IDENTIFIER = /[a-z_\x7f-\xFF][a-z0-9_\x7f-\xFF]*/i
209
+ VARIABLE = /\$#{IDENTIFIER}/
210
+
211
+ OPERATOR = /
212
+ \.(?!\d)=? | # dot that is not decimal point, string concatenation
213
+ && | \|\| | # logic
214
+ :: | -> | => | # scope, member, dictionary
215
+ \+\+ | -- | # increment, decrement
216
+ [,;?:()\[\]{}] | # simple delimiters
217
+ [-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
218
+ [~$] | # whatever
219
+ =& | # reference assignment
220
+ [=!]=?=? | <> | # comparison and assignment
221
+ <<=? | >>=? | [<>]=? # comparison and shift
222
+ /x
223
+
224
+ end
225
+
226
+ def scan_tokens tokens, options
227
+
228
+ if check(RE::PHP_START) || # starts with <?
229
+ (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
230
+ exist?(RE::HTML_INDICATOR) ||
231
+ check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
232
+ # is HTML with embedded PHP, so start with HTML
233
+ states = [:initial]
234
+ else
235
+ # is just PHP, so start with PHP surrounded by HTML
236
+ states = [:initial, :php]
237
+ end
238
+
239
+ label_expected = true
240
+ case_expected = false
241
+
242
+ heredoc_delimiter = nil
243
+ delimiter = nil
244
+ modifier = nil
245
+
246
+ until eos?
247
+
248
+ match = nil
249
+ kind = nil
250
+
251
+ case states.last
252
+
253
+ when :initial # HTML
254
+ if scan RE::PHP_START
255
+ kind = :inline_delimiter
256
+ label_expected = true
257
+ states << :php
258
+ else
259
+ match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
260
+ @html_scanner.tokenize match unless match.empty?
261
+ next
262
+ end
263
+
264
+ when :php
265
+ if match = scan(/\s+/)
266
+ tokens << [match, :space]
267
+ next
268
+
269
+ elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
270
+ kind = :comment
271
+
272
+ elsif match = scan(RE::IDENTIFIER)
273
+ kind = Words::IDENT_KIND[match]
274
+ if kind == :ident && label_expected && check(/:(?!:)/)
275
+ kind = :label
276
+ label_expected = true
277
+ else
278
+ label_expected = false
279
+ if kind == :ident && match =~ /^[A-Z]/
280
+ kind = :constant
281
+ elsif kind == :reserved
282
+ case match
283
+ when 'class'
284
+ states << :class_expected
285
+ when 'function'
286
+ states << :function_expected
287
+ when 'case', 'default'
288
+ case_expected = true
289
+ end
290
+ elsif match == 'b' && check(/['"]/) # binary string literal
291
+ modifier = match
292
+ next
293
+ end
294
+ end
295
+
296
+ elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
297
+ label_expected = false
298
+ kind = :float
299
+
300
+ elsif scan(/0x[0-9a-fA-F]+/)
301
+ label_expected = false
302
+ kind = :hex
303
+
304
+ elsif scan(/\d+/)
305
+ label_expected = false
306
+ kind = :integer
307
+
308
+ elsif scan(/'/)
309
+ tokens << [:open, :string]
310
+ if modifier
311
+ tokens << [modifier, :modifier]
312
+ modifier = nil
313
+ end
314
+ kind = :delimiter
315
+ states.push :sqstring
316
+
317
+ elsif match = scan(/["`]/)
318
+ tokens << [:open, :string]
319
+ if modifier
320
+ tokens << [modifier, :modifier]
321
+ modifier = nil
322
+ end
323
+ delimiter = match
324
+ kind = :delimiter
325
+ states.push :dqstring
326
+
327
+ elsif match = scan(RE::VARIABLE)
328
+ label_expected = false
329
+ kind = Words::VARIABLE_KIND[match]
330
+
331
+ elsif scan(/\{/)
332
+ kind = :operator
333
+ label_expected = true
334
+ states.push :php
335
+
336
+ elsif scan(/\}/)
337
+ if states.size == 1
338
+ kind = :error
339
+ else
340
+ states.pop
341
+ if states.last.is_a?(::Array)
342
+ delimiter = states.last[1]
343
+ states[-1] = states.last[0]
344
+ tokens << [matched, :delimiter]
345
+ tokens << [:close, :inline]
346
+ next
347
+ else
348
+ kind = :operator
349
+ label_expected = true
350
+ end
351
+ end
352
+
353
+ elsif scan(/@/)
354
+ label_expected = false
355
+ kind = :exception
356
+
357
+ elsif scan RE::PHP_END
358
+ kind = :inline_delimiter
359
+ states = [:initial]
360
+
361
+ elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
362
+ tokens << [:open, :string]
363
+ warn 'heredoc in heredoc?' if heredoc_delimiter
364
+ heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
365
+ kind = :delimiter
366
+ states.push self[3] ? :sqstring : :dqstring
367
+ heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
368
+
369
+ elsif match = scan(/#{RE::OPERATOR}/o)
370
+ label_expected = match == ';'
371
+ if case_expected
372
+ label_expected = true if match == ':'
373
+ case_expected = false
374
+ end
375
+ kind = :operator
376
+
377
+ else
378
+ getch
379
+ kind = :error
380
+
381
+ end
382
+
383
+ when :sqstring
384
+ if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
385
+ kind = :content
386
+ elsif !heredoc_delimiter && scan(/'/)
387
+ tokens << [matched, :delimiter]
388
+ tokens << [:close, :string]
389
+ delimiter = nil
390
+ label_expected = false
391
+ states.pop
392
+ next
393
+ elsif heredoc_delimiter && match = scan(/\n/)
394
+ kind = :content
395
+ if scan heredoc_delimiter
396
+ tokens << ["\n", :content]
397
+ tokens << [matched, :delimiter]
398
+ tokens << [:close, :string]
399
+ heredoc_delimiter = nil
400
+ label_expected = false
401
+ states.pop
402
+ next
403
+ end
404
+ elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
405
+ kind = :char
406
+ elsif scan(/\\./m)
407
+ kind = :content
408
+ elsif scan(/\\/)
409
+ kind = :error
410
+ end
411
+
412
+ when :dqstring
413
+ if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
414
+ kind = :content
415
+ elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
416
+ tokens << [matched, :delimiter]
417
+ tokens << [:close, :string]
418
+ delimiter = nil
419
+ label_expected = false
420
+ states.pop
421
+ next
422
+ elsif heredoc_delimiter && match = scan(/\n/)
423
+ kind = :content
424
+ if scan heredoc_delimiter
425
+ tokens << ["\n", :content]
426
+ tokens << [matched, :delimiter]
427
+ tokens << [:close, :string]
428
+ heredoc_delimiter = nil
429
+ label_expected = false
430
+ states.pop
431
+ next
432
+ end
433
+ elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
434
+ kind = :char
435
+ elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
436
+ kind = :char
437
+ elsif scan(/\\./m)
438
+ kind = :content
439
+ elsif scan(/\\/)
440
+ kind = :error
441
+ elsif match = scan(/#{RE::VARIABLE}/o)
442
+ kind = :local_variable
443
+ if check(/\[#{RE::IDENTIFIER}\]/o)
444
+ tokens << [:open, :inline]
445
+ tokens << [match, :local_variable]
446
+ tokens << [scan(/\[/), :operator]
447
+ tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
448
+ tokens << [scan(/\]/), :operator]
449
+ tokens << [:close, :inline]
450
+ next
451
+ elsif check(/\[/)
452
+ match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
453
+ kind = :error
454
+ elsif check(/->#{RE::IDENTIFIER}/o)
455
+ tokens << [:open, :inline]
456
+ tokens << [match, :local_variable]
457
+ tokens << [scan(/->/), :operator]
458
+ tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
459
+ tokens << [:close, :inline]
460
+ next
461
+ elsif check(/->/)
462
+ match << scan(/->/)
463
+ kind = :error
464
+ end
465
+ elsif match = scan(/\{/)
466
+ if check(/\$/)
467
+ kind = :delimiter
468
+ states[-1] = [states.last, delimiter]
469
+ delimiter = nil
470
+ states.push :php
471
+ tokens << [:open, :inline]
472
+ else
473
+ kind = :string
474
+ end
475
+ elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
476
+ kind = :local_variable
477
+ elsif scan(/\$/)
478
+ kind = :content
479
+ end
480
+
481
+ when :class_expected
482
+ if scan(/\s+/)
483
+ kind = :space
484
+ elsif match = scan(/#{RE::IDENTIFIER}/o)
485
+ kind = :class
486
+ states.pop
487
+ else
488
+ states.pop
489
+ next
490
+ end
491
+
492
+ when :function_expected
493
+ if scan(/\s+/)
494
+ kind = :space
495
+ elsif scan(/&/)
496
+ kind = :operator
497
+ elsif match = scan(/#{RE::IDENTIFIER}/o)
498
+ kind = :function
499
+ states.pop
500
+ else
501
+ states.pop
502
+ next
503
+ end
504
+
505
+ else
506
+ raise_inspect 'Unknown state!', tokens, states
507
+ end
508
+
509
+ match ||= matched
510
+ if $DEBUG and not kind
511
+ raise_inspect 'Error token %p in line %d' %
512
+ [[match, kind], line], tokens, states
513
+ end
514
+ raise_inspect 'Empty token', tokens, states unless match
515
+
516
+ tokens << [match, kind]
517
+
518
+ end
519
+
520
+ tokens
521
+ end
522
+
523
+ end
524
+
525
+ end
526
+ end