raldred-coderay 0.9.0 → 0.9.339

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/lib/README +128 -0
  2. data/lib/coderay.rb +319 -0
  3. data/lib/coderay/duo.rb +85 -0
  4. data/lib/coderay/encoder.rb +187 -0
  5. data/lib/coderay/encoders/_map.rb +9 -0
  6. data/lib/coderay/encoders/count.rb +21 -0
  7. data/lib/coderay/encoders/debug.rb +49 -0
  8. data/lib/coderay/encoders/div.rb +20 -0
  9. data/lib/coderay/encoders/html.rb +306 -0
  10. data/lib/coderay/encoders/html/css.rb +70 -0
  11. data/lib/coderay/encoders/html/numerization.rb +133 -0
  12. data/lib/coderay/encoders/html/output.rb +206 -0
  13. data/lib/coderay/encoders/json.rb +19 -0
  14. data/lib/coderay/encoders/null.rb +26 -0
  15. data/lib/coderay/encoders/page.rb +21 -0
  16. data/lib/coderay/encoders/span.rb +20 -0
  17. data/lib/coderay/encoders/statistic.rb +77 -0
  18. data/lib/coderay/encoders/term.rb +114 -0
  19. data/lib/coderay/encoders/text.rb +32 -0
  20. data/lib/coderay/encoders/tokens.rb +44 -0
  21. data/lib/coderay/encoders/xml.rb +71 -0
  22. data/lib/coderay/encoders/yaml.rb +22 -0
  23. data/lib/coderay/for_redcloth.rb +73 -0
  24. data/lib/coderay/helpers/file_type.rb +226 -0
  25. data/lib/coderay/helpers/gzip_simple.rb +123 -0
  26. data/lib/coderay/helpers/plugin.rb +339 -0
  27. data/lib/coderay/helpers/word_list.rb +124 -0
  28. data/lib/coderay/scanner.rb +271 -0
  29. data/lib/coderay/scanners/_map.rb +21 -0
  30. data/lib/coderay/scanners/c.rb +166 -0
  31. data/lib/coderay/scanners/css.rb +202 -0
  32. data/lib/coderay/scanners/debug.rb +61 -0
  33. data/lib/coderay/scanners/delphi.rb +150 -0
  34. data/lib/coderay/scanners/diff.rb +104 -0
  35. data/lib/coderay/scanners/groovy.rb +271 -0
  36. data/lib/coderay/scanners/html.rb +175 -0
  37. data/lib/coderay/scanners/java.rb +173 -0
  38. data/lib/coderay/scanners/java/builtin_types.rb +419 -0
  39. data/lib/coderay/scanners/java_script.rb +195 -0
  40. data/lib/coderay/scanners/json.rb +107 -0
  41. data/lib/coderay/scanners/nitro_xhtml.rb +132 -0
  42. data/lib/coderay/scanners/php.rb +404 -0
  43. data/lib/coderay/scanners/plaintext.rb +18 -0
  44. data/lib/coderay/scanners/python.rb +232 -0
  45. data/lib/coderay/scanners/rhtml.rb +71 -0
  46. data/lib/coderay/scanners/ruby.rb +386 -0
  47. data/lib/coderay/scanners/ruby/patterns.rb +232 -0
  48. data/lib/coderay/scanners/scheme.rb +142 -0
  49. data/lib/coderay/scanners/sql.rb +162 -0
  50. data/lib/coderay/scanners/xml.rb +17 -0
  51. data/lib/coderay/scanners/yaml.rb +142 -0
  52. data/lib/coderay/style.rb +20 -0
  53. data/lib/coderay/styles/_map.rb +7 -0
  54. data/lib/coderay/styles/cycnus.rb +151 -0
  55. data/lib/coderay/styles/murphy.rb +132 -0
  56. data/lib/coderay/token_classes.rb +86 -0
  57. data/lib/coderay/tokens.rb +387 -0
  58. metadata +59 -1
@@ -0,0 +1,107 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ class JSON < Scanner
5
+
6
+ include Streamable
7
+
8
+ register_for :json
9
+ file_extension 'json'
10
+
11
+ CONSTANTS = %w( true false null )
12
+ IDENT_KIND = WordList.new(:key).add(CONSTANTS, :reserved)
13
+
14
+ ESCAPE = / [bfnrt\\"\/] /x
15
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x
16
+
17
+ def scan_tokens tokens, options
18
+
19
+ state = :initial
20
+ stack = []
21
+ string_delimiter = nil
22
+ key_expected = false
23
+
24
+ until eos?
25
+
26
+ kind = nil
27
+ match = nil
28
+
29
+ case state
30
+
31
+ when :initial
32
+ if match = scan(/ \s+ | \\\n /x)
33
+ tokens << [match, :space]
34
+ next
35
+ elsif match = scan(/ [:,\[{\]}] /x)
36
+ kind = :operator
37
+ case match
38
+ when '{' then stack << :object; key_expected = true
39
+ when '[' then stack << :array
40
+ when ':' then key_expected = false
41
+ when ',' then key_expected = true if stack.last == :object
42
+ when '}', ']' then stack.pop # no error recovery, but works for valid JSON
43
+ end
44
+ elsif match = scan(/ true | false | null /x)
45
+ kind = IDENT_KIND[match]
46
+ elsif match = scan(/-?(?:0|[1-9]\d*)/)
47
+ kind = :integer
48
+ if scan(/\.\d+(?:[eE][-+]?\d+)?|[eE][-+]?\d+/)
49
+ match << matched
50
+ kind = :float
51
+ end
52
+ elsif match = scan(/"/)
53
+ state = key_expected ? :key : :string
54
+ tokens << [:open, state]
55
+ kind = :delimiter
56
+ else
57
+ getch
58
+ kind = :error
59
+ end
60
+
61
+ when :string, :key
62
+ if scan(/[^\\"]+/)
63
+ kind = :content
64
+ elsif scan(/"/)
65
+ tokens << ['"', :delimiter]
66
+ tokens << [:close, state]
67
+ state = :initial
68
+ next
69
+ elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
70
+ kind = :char
71
+ elsif scan(/\\./m)
72
+ kind = :content
73
+ elsif scan(/ \\ | $ /x)
74
+ tokens << [:close, :delimiter]
75
+ kind = :error
76
+ state = :initial
77
+ else
78
+ raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
79
+ end
80
+
81
+ else
82
+ raise_inspect 'Unknown state', tokens
83
+
84
+ end
85
+
86
+ match ||= matched
87
+ if $DEBUG and not kind
88
+ raise_inspect 'Error token %p in line %d' %
89
+ [[match, kind], line], tokens
90
+ end
91
+ raise_inspect 'Empty token', tokens unless match
92
+
93
+ tokens << [match, kind]
94
+
95
+ end
96
+
97
+ if [:string, :key].include? state
98
+ tokens << [:close, state]
99
+ end
100
+
101
+ tokens
102
+ end
103
+
104
+ end
105
+
106
+ end
107
+ end
@@ -0,0 +1,132 @@
1
+ module CodeRay
2
+ module Scanners
3
+
4
+ load :html
5
+ load :ruby
6
+
7
+ # Nitro XHTML Scanner
8
+ class NitroXHTML < Scanner
9
+
10
+ include Streamable
11
+ register_for :nitro_xhtml
12
+ file_extension :xhtml
13
+
14
+ NITRO_RUBY_BLOCK = /
15
+ <\?r
16
+ (?>
17
+ [^\?]*
18
+ (?> \?(?!>) [^\?]* )*
19
+ )
20
+ (?: \?> )?
21
+ |
22
+ <ruby>
23
+ (?>
24
+ [^<]*
25
+ (?> <(?!\/ruby>) [^<]* )*
26
+ )
27
+ (?: <\/ruby> )?
28
+ |
29
+ <%
30
+ (?>
31
+ [^%]*
32
+ (?> %(?!>) [^%]* )*
33
+ )
34
+ (?: %> )?
35
+ /mx
36
+
37
+ NITRO_VALUE_BLOCK = /
38
+ \#
39
+ (?:
40
+ \{
41
+ [^{}]*
42
+ (?>
43
+ \{ [^}]* \}
44
+ (?> [^{}]* )
45
+ )*
46
+ \}?
47
+ | \| [^|]* \|?
48
+ | \( [^)]* \)?
49
+ | \[ [^\]]* \]?
50
+ | \\ [^\\]* \\?
51
+ )
52
+ /x
53
+
54
+ NITRO_ENTITY = /
55
+ % (?: \#\d+ | \w+ ) ;
56
+ /
57
+
58
+ START_OF_RUBY = /
59
+ (?=[<\#%])
60
+ < (?: \?r | % | ruby> )
61
+ | \# [{(|]
62
+ | % (?: \#\d+ | \w+ ) ;
63
+ /x
64
+
65
+ CLOSING_PAREN = Hash.new do |h, p|
66
+ h[p] = p
67
+ end.update( {
68
+ '(' => ')',
69
+ '[' => ']',
70
+ '{' => '}',
71
+ } )
72
+
73
+ private
74
+
75
+ def setup
76
+ @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
77
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
78
+ end
79
+
80
+ def reset_instance
81
+ super
82
+ @html_scanner.reset
83
+ end
84
+
85
+ def scan_tokens tokens, options
86
+
87
+ until eos?
88
+
89
+ if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty?
90
+ @html_scanner.tokenize match
91
+
92
+ elsif match = scan(/#{NITRO_VALUE_BLOCK}/o)
93
+ start_tag = match[0,2]
94
+ delimiter = CLOSING_PAREN[start_tag[1,1]]
95
+ end_tag = match[-1,1] == delimiter ? delimiter : ''
96
+ tokens << [:open, :inline]
97
+ tokens << [start_tag, :inline_delimiter]
98
+ code = match[start_tag.size .. -1 - end_tag.size]
99
+ @ruby_scanner.tokenize code
100
+ tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
101
+ tokens << [:close, :inline]
102
+
103
+ elsif match = scan(/#{NITRO_RUBY_BLOCK}/o)
104
+ start_tag = '<?r'
105
+ end_tag = match[-2,2] == '?>' ? '?>' : ''
106
+ tokens << [:open, :inline]
107
+ tokens << [start_tag, :inline_delimiter]
108
+ code = match[start_tag.size .. -(end_tag.size)-1]
109
+ @ruby_scanner.tokenize code
110
+ tokens << [end_tag, :inline_delimiter] unless end_tag.empty?
111
+ tokens << [:close, :inline]
112
+
113
+ elsif entity = scan(/#{NITRO_ENTITY}/o)
114
+ tokens << [entity, :entity]
115
+
116
+ elsif scan(/%/)
117
+ tokens << [matched, :error]
118
+
119
+ else
120
+ raise_inspect 'else-case reached!', tokens
121
+ end
122
+
123
+ end
124
+
125
+ tokens
126
+
127
+ end
128
+
129
+ end
130
+
131
+ end
132
+ end
@@ -0,0 +1,404 @@
1
+ class XRegexp
2
+ def |(other)
3
+ Regexp.union(self, other)
4
+ end
5
+ def +(other)
6
+ /#{self}#{other}/
7
+ end
8
+ end
9
+ module CodeRay
10
+ module Scanners
11
+
12
+ load :html
13
+
14
+ # Original by Stefan Walk.
15
+ class PHP < Scanner
16
+
17
+ register_for :php
18
+ file_extension 'php'
19
+
20
+ def setup
21
+ @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
22
+ end
23
+
24
+ def reset_instance
25
+ super
26
+ @html_scanner.reset
27
+ end
28
+
29
+ module Words
30
+
31
+ # according to http://www.php.net/manual/en/reserved.keywords.php
32
+ KEYWORDS = %w[
33
+ abstract and array as break case catch class clone const continue declare default do else elseif
34
+ enddeclare endfor endforeach endif endswitch endwhile extends final for foreach function global
35
+ goto if implements interface instanceof namespace new or private protected public static switch
36
+ throw try use var while xor
37
+ cfunction old_function
38
+ ]
39
+
40
+ TYPES = %w[ int integer float double bool boolean string array object resource ]
41
+
42
+ LANGUAGE_CONSTRUCTS = %w[
43
+ die echo empty exit eval include include_once isset list
44
+ require require_once return print unset
45
+ ]
46
+
47
+ CLASSES = %w[ Directory stdClass __PHP_Incomplete_Class exception php_user_filter Closure ]
48
+
49
+ # according to http://php.net/quickref.php on 2009-04-21;
50
+ # all functions with _ excluded (module functions) and selected additional functions
51
+ BUILTIN_FUNCTIONS = %w[
52
+ abs acos acosh addcslashes addslashes aggregate array arsort ascii2ebcdic asin asinh asort assert atan atan2
53
+ atanh basename bcadd bccomp bcdiv bcmod bcmul bcpow bcpowmod bcscale bcsqrt bcsub bin2hex bindec
54
+ bindtextdomain bzclose bzcompress bzdecompress bzerrno bzerror bzerrstr bzflush bzopen bzread bzwrite
55
+ calculhmac ceil chdir checkdate checkdnsrr chgrp chmod chop chown chr chroot clearstatcache closedir closelog
56
+ compact constant copy cos cosh count crc32 crypt current date dcgettext dcngettext deaggregate decbin dechex
57
+ decoct define defined deg2rad delete dgettext die dirname diskfreespace dl dngettext doubleval each
58
+ ebcdic2ascii echo empty end ereg eregi escapeshellarg escapeshellcmd eval exec exit exp explode expm1 extract
59
+ fclose feof fflush fgetc fgetcsv fgets fgetss file fileatime filectime filegroup fileinode filemtime fileowner
60
+ fileperms filepro filesize filetype floatval flock floor flush fmod fnmatch fopen fpassthru fprintf fputcsv
61
+ fputs fread frenchtojd fscanf fseek fsockopen fstat ftell ftok ftruncate fwrite getallheaders getcwd getdate
62
+ getenv gethostbyaddr gethostbyname gethostbynamel getimagesize getlastmod getmxrr getmygid getmyinode getmypid
63
+ getmyuid getopt getprotobyname getprotobynumber getrandmax getrusage getservbyname getservbyport gettext
64
+ gettimeofday gettype glob gmdate gmmktime gmstrftime gregoriantojd gzclose gzcompress gzdecode gzdeflate
65
+ gzencode gzeof gzfile gzgetc gzgets gzgetss gzinflate gzopen gzpassthru gzputs gzread gzrewind gzseek gztell
66
+ gzuncompress gzwrite hash header hebrev hebrevc hexdec htmlentities htmlspecialchars hypot iconv idate
67
+ implode include intval ip2long iptcembed iptcparse isset
68
+ jddayofweek jdmonthname jdtofrench jdtogregorian jdtojewish jdtojulian jdtounix jewishtojd join jpeg2wbmp
69
+ juliantojd key krsort ksort lcfirst lchgrp lchown levenshtein link linkinfo list localeconv localtime log
70
+ log10 log1p long2ip lstat ltrim mail main max md5 metaphone mhash microtime min mkdir mktime msql natcasesort
71
+ natsort next ngettext nl2br nthmac octdec opendir openlog
72
+ ord overload pack passthru pathinfo pclose pfsockopen phpcredits phpinfo phpversion pi png2wbmp popen pos pow
73
+ prev print printf putenv quotemeta rad2deg rand range rawurldecode rawurlencode readdir readfile readgzfile
74
+ readline readlink realpath recode rename require reset rewind rewinddir rmdir round rsort rtrim scandir
75
+ serialize setcookie setlocale setrawcookie settype sha1 shuffle signeurlpaiement sin sinh sizeof sleep snmpget
76
+ snmpgetnext snmprealwalk snmpset snmpwalk snmpwalkoid sort soundex split spliti sprintf sqrt srand sscanf stat
77
+ strcasecmp strchr strcmp strcoll strcspn strftime stripcslashes stripos stripslashes stristr strlen
78
+ strnatcasecmp strnatcmp strncasecmp strncmp strpbrk strpos strptime strrchr strrev strripos strrpos strspn
79
+ strstr strtok strtolower strtotime strtoupper strtr strval substr symlink syslog system tan tanh tempnam
80
+ textdomain time tmpfile touch trim uasort ucfirst ucwords uksort umask uniqid unixtojd unlink unpack
81
+ unserialize unset urldecode urlencode usleep usort vfprintf virtual vprintf vsprintf wordwrap
82
+ array_change_key_case array_chunk array_combine array_count_values array_diff array_diff_assoc
83
+ array_diff_key array_diff_uassoc array_diff_ukey array_fill array_fill_keys array_filter array_flip
84
+ array_intersect array_intersect_assoc array_intersect_key array_intersect_uassoc array_intersect_ukey
85
+ array_key_exists array_keys array_map array_merge array_merge_recursive array_multisort array_pad
86
+ array_pop array_product array_push array_rand array_reduce array_reverse array_search array_shift
87
+ array_slice array_splice array_sum array_udiff array_udiff_assoc array_udiff_uassoc array_uintersect
88
+ array_uintersect_assoc array_uintersect_uassoc array_unique array_unshift array_values array_walk
89
+ array_walk_recursive
90
+ assert_options base_convert base64_decode base64_encode
91
+ chunk_split class_exists class_implements class_parents
92
+ count_chars debug_backtrace debug_print_backtrace debug_zval_dump
93
+ error_get_last error_log error_reporting extension_loaded
94
+ file_exists file_get_contents file_put_contents load_file
95
+ func_get_arg func_get_args func_num_args function_exists
96
+ get_browser get_called_class get_cfg_var get_class get_class_methods get_class_vars
97
+ get_current_user get_declared_classes get_declared_interfaces get_defined_constants
98
+ get_defined_functions get_defined_vars get_extension_funcs get_headers get_html_translation_table
99
+ get_include_path get_included_files get_loaded_extensions get_magic_quotes_gpc get_magic_quotes_runtime
100
+ get_meta_tags get_object_vars get_parent_class get_required_filesget_resource_type
101
+ gc_collect_cycles gc_disable gc_enable gc_enabled
102
+ halt_compiler headers_list headers_sent highlight_file highlight_string
103
+ html_entity_decode htmlspecialchars_decode
104
+ in_array include_once inclued_get_data
105
+ is_a is_array is_binary is_bool is_buffer is_callable is_dir is_double is_executable is_file is_finite
106
+ is_float is_infinite is_int is_integer is_link is_long is_nan is_null is_numeric is_object is_readable
107
+ is_real is_resource is_scalar is_soap_fault is_string is_subclass_of is_unicode is_uploaded_file
108
+ is_writable is_writeable
109
+ locale_get_default locale_set_default
110
+ number_format override_function parse_str parse_url
111
+ php_check_syntax php_ini_loaded_file php_ini_scanned_files php_logo_guid php_sapi_name
112
+ php_strip_whitespace php_uname
113
+ preg_filter preg_grep preg_last_error preg_match preg_match_all preg_quote preg_replace
114
+ preg_replace_callback preg_split print_r
115
+ require_once register_shutdown_function register_tick_function
116
+ set_error_handler set_exception_handler set_file_buffer set_include_path
117
+ set_magic_quotes_runtime set_time_limit shell_exec
118
+ str_getcsv str_ireplace str_pad str_repeat str_replace str_rot13 str_shuffle str_split str_word_count
119
+ strip_tags substr_compare substr_count substr_replace
120
+ time_nanosleep time_sleep_until
121
+ token_get_all token_name trigger_error
122
+ unregister_tick_function use_soap_error_handler user_error
123
+ utf8_decode utf8_encode var_dump var_export
124
+ version_compare
125
+ zend_logo_guid zend_thread_id zend_version
126
+ ]
127
+ # TODO: more built-in PHP functions?
128
+
129
+ EXCEPTIONS = %w[
130
+ E_ERROR E_WARNING E_PARSE E_NOTICE E_CORE_ERROR E_CORE_WARNING E_COMPILE_ERROR E_COMPILE_WARNING
131
+ E_USER_ERROR E_USER_WARNING E_USER_NOTICE E_DEPRECATED E_USER_DEPRECATED E_ALL E_STRICT
132
+ ]
133
+
134
+ CONSTANTS = %w[
135
+ null true false self parent
136
+ __LINE__ __DIR__ __FILE__ __LINE__
137
+ __CLASS__ __NAMESPACE__ __METHOD__ __FUNCTION__
138
+ PHP_VERSION PHP_MAJOR_VERSION PHP_MINOR_VERSION PHP_RELEASE_VERSION PHP_VERSION_ID PHP_EXTRA_VERSION PHP_ZTS
139
+ PHP_DEBUG PHP_MAXPATHLEN PHP_OS PHP_SAPI PHP_EOL PHP_INT_MAX PHP_INT_SIZE DEFAULT_INCLUDE_PATH
140
+ PEAR_INSTALL_DIR PEAR_EXTENSION_DIR PHP_EXTENSION_DIR PHP_PREFIX PHP_BINDIR PHP_LIBDIR PHP_DATADIR
141
+ PHP_SYSCONFDIR PHP_LOCALSTATEDIR PHP_CONFIG_FILE_PATH PHP_CONFIG_FILE_SCAN_DIR PHP_SHLIB_SUFFIX
142
+ PHP_OUTPUT_HANDLER_START PHP_OUTPUT_HANDLER_CONT PHP_OUTPUT_HANDLER_END
143
+ __COMPILER_HALT_OFFSET__
144
+ EXTR_OVERWRITE EXTR_SKIP EXTR_PREFIX_SAME EXTR_PREFIX_ALL EXTR_PREFIX_INVALID EXTR_PREFIX_IF_EXISTS
145
+ EXTR_IF_EXISTS SORT_ASC SORT_DESC SORT_REGULAR SORT_NUMERIC SORT_STRING CASE_LOWER CASE_UPPER COUNT_NORMAL
146
+ COUNT_RECURSIVE ASSERT_ACTIVE ASSERT_CALLBACK ASSERT_BAIL ASSERT_WARNING ASSERT_QUIET_EVAL CONNECTION_ABORTED
147
+ CONNECTION_NORMAL CONNECTION_TIMEOUT INI_USER INI_PERDIR INI_SYSTEM INI_ALL M_E M_LOG2E M_LOG10E M_LN2 M_LN10
148
+ M_PI M_PI_2 M_PI_4 M_1_PI M_2_PI M_2_SQRTPI M_SQRT2 M_SQRT1_2 CRYPT_SALT_LENGTH CRYPT_STD_DES CRYPT_EXT_DES
149
+ CRYPT_MD5 CRYPT_BLOWFISH DIRECTORY_SEPARATOR SEEK_SET SEEK_CUR SEEK_END LOCK_SH LOCK_EX LOCK_UN LOCK_NB
150
+ HTML_SPECIALCHARS HTML_ENTITIES ENT_COMPAT ENT_QUOTES ENT_NOQUOTES INFO_GENERAL INFO_CREDITS
151
+ INFO_CONFIGURATION INFO_MODULES INFO_ENVIRONMENT INFO_VARIABLES INFO_LICENSE INFO_ALL CREDITS_GROUP
152
+ CREDITS_GENERAL CREDITS_SAPI CREDITS_MODULES CREDITS_DOCS CREDITS_FULLPAGE CREDITS_QA CREDITS_ALL STR_PAD_LEFT
153
+ STR_PAD_RIGHT STR_PAD_BOTH PATHINFO_DIRNAME PATHINFO_BASENAME PATHINFO_EXTENSION PATH_SEPARATOR CHAR_MAX
154
+ LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_ALL LC_MESSAGES ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5
155
+ ABDAY_6 ABDAY_7 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
156
+ ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 MON_8 MON_9
157
+ MON_10 MON_11 MON_12 AM_STR PM_STR D_T_FMT D_FMT T_FMT T_FMT_AMPM ERA ERA_YEAR ERA_D_T_FMT ERA_D_FMT ERA_T_FMT
158
+ ALT_DIGITS INT_CURR_SYMBOL CURRENCY_SYMBOL CRNCYSTR MON_DECIMAL_POINT MON_THOUSANDS_SEP MON_GROUPING
159
+ POSITIVE_SIGN NEGATIVE_SIGN INT_FRAC_DIGITS FRAC_DIGITS P_CS_PRECEDES P_SEP_BY_SPACE N_CS_PRECEDES
160
+ N_SEP_BY_SPACE P_SIGN_POSN N_SIGN_POSN DECIMAL_POINT RADIXCHAR THOUSANDS_SEP THOUSEP GROUPING YESEXPR NOEXPR
161
+ YESSTR NOSTR CODESET LOG_EMERG LOG_ALERT LOG_CRIT LOG_ERR LOG_WARNING LOG_NOTICE LOG_INFO LOG_DEBUG LOG_KERN
162
+ LOG_USER LOG_MAIL LOG_DAEMON LOG_AUTH LOG_SYSLOG LOG_LPR LOG_NEWS LOG_UUCP LOG_CRON LOG_AUTHPRIV LOG_LOCAL0
163
+ LOG_LOCAL1 LOG_LOCAL2 LOG_LOCAL3 LOG_LOCAL4 LOG_LOCAL5 LOG_LOCAL6 LOG_LOCAL7 LOG_PID LOG_CONS LOG_ODELAY
164
+ LOG_NDELAY LOG_NOWAIT LOG_PERROR
165
+ ]
166
+
167
+ IDENT_KIND = CaseIgnoringWordList.new(:ident, true).
168
+ add(KEYWORDS, :reserved).
169
+ add(TYPES, :pre_type).
170
+ add(LANGUAGE_CONSTRUCTS, :reserved).
171
+ add(BUILTIN_FUNCTIONS, :predefined).
172
+ add(CLASSES, :pre_constant).
173
+ add(EXCEPTIONS, :exception).
174
+ add(CONSTANTS, :pre_constant)
175
+ end
176
+
177
+ module RE
178
+
179
+ PHP_START = /
180
+ <script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
181
+ <script\s+[^>]*?language\s*=\s*'php'[^>]*?> |
182
+ <\?php\d? |
183
+ <\?(?!xml)
184
+ /xi
185
+
186
+ PHP_END = %r!
187
+ </script> |
188
+ \?>
189
+ !xi
190
+
191
+ IDENTIFIER = /[a-z_\x80-\xFF][a-z0-9_\x80-\xFF]*/i
192
+ VARIABLE = /\$#{IDENTIFIER}/
193
+
194
+ OPERATOR = /
195
+ \.(?!\d)=? | # dot that is not decimal point, string concatenation
196
+ && | \|\| | # logic
197
+ :: | -> | => | # scope, member, dictionary
198
+ \+\+ | -- | # increment, decrement
199
+ [,;?:()\[\]{}] | # simple delimiters
200
+ [-+*\/%&|^]=? | # ordinary math, binary logic, assignment shortcuts
201
+ [~@$] | # whatever
202
+ [=!]=?=? | <> | # comparison and assignment
203
+ <<=? | >>=? | [<>]=? # comparison and shift
204
+ /x
205
+
206
+ end
207
+
208
+ def scan_tokens tokens, options
209
+
210
+ states = [:initial]
211
+ if match?(RE::PHP_START) || # starts with <?
212
+ (match?(/\s*<(?i:\w|\?xml)/) && exist?(RE::PHP_START)) # starts with HTML tag and contains <?
213
+ # start with HTML
214
+ else
215
+ states << :php
216
+ end
217
+ # heredocdelim = nil
218
+ delimiter = nil
219
+
220
+ until eos?
221
+
222
+ match = nil
223
+ kind = nil
224
+
225
+ case states.last
226
+
227
+ when :initial # HTML
228
+ if scan RE::PHP_START
229
+ kind = :inline_delimiter
230
+ states << :php
231
+ else
232
+ match = scan_until(/(?=#{RE::PHP_START})/o) || scan_until(/\z/)
233
+ @html_scanner.tokenize match unless match.empty?
234
+ next
235
+ end
236
+
237
+ when :php
238
+ if scan RE::PHP_END
239
+ kind = :inline_delimiter
240
+ states = [:initial]
241
+
242
+ elsif scan(/\s+/)
243
+ kind = :space
244
+
245
+ elsif scan(/ \/\* (?: .*? \*\/ | .* ) /mx)
246
+ kind = :comment
247
+
248
+ elsif scan(%r!(?://|#).*?(?=#{RE::PHP_END}|$)!o)
249
+ kind = :comment
250
+
251
+ elsif match = scan(RE::IDENTIFIER)
252
+ kind = Words::IDENT_KIND[match]
253
+ if kind == :ident && check(/:(?!:)/) #&& tokens[-2][0] == 'case'
254
+ kind = :label
255
+ elsif kind == :ident && match =~ /^[A-Z]/
256
+ kind = :constant
257
+ # TODO: function and class definitions
258
+ end
259
+
260
+ elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
261
+ kind = :float
262
+
263
+ elsif scan(/0x[0-9a-fA-F]+/)
264
+ kind = :hex
265
+
266
+ elsif scan(/\d+/)
267
+ kind = :integer
268
+
269
+ elsif scan(/'/)
270
+ tokens << [:open, :string]
271
+ kind = :delimiter
272
+ states.push :sqstring
273
+
274
+ elsif match = scan(/["`]/)
275
+ tokens << [:open, :string]
276
+ delimiter = match
277
+ kind = :delimiter
278
+ states.push :dqstring
279
+
280
+ # TODO: Heredocs
281
+ # elsif match = scan(/<<</ + IDENTIFIER)
282
+ # tokens << [:open, :string]
283
+ # heredocdelim = match[RE::IDENTIFIER]
284
+ # kind = :delimiter
285
+ # states.push :heredocstring
286
+
287
+ elsif scan RE::VARIABLE
288
+ kind = :local_variable
289
+
290
+ elsif scan(/\{/)
291
+ kind = :operator
292
+ states.push :php
293
+
294
+ elsif scan(/\}/)
295
+ if states.size == 1
296
+ kind = :error
297
+ else
298
+ states.pop
299
+ if states.last.is_a?(::Array)
300
+ delimiter = states.last[1]
301
+ states[-1] = states.last[0]
302
+ tokens << [matched, :delimiter]
303
+ tokens << [:close, :inline]
304
+ next
305
+ else
306
+ kind = :operator
307
+ end
308
+ end
309
+
310
+ elsif scan(/#{RE::OPERATOR}/o)
311
+ kind = :operator
312
+
313
+ else
314
+ getch
315
+ kind = :error
316
+
317
+ end
318
+
319
+ when :sqstring
320
+ if scan(/[^'\\]+/)
321
+ kind = :content
322
+ elsif scan(/'/)
323
+ tokens << [matched, :delimiter]
324
+ tokens << [:close, :string]
325
+ delimiter = nil
326
+ states.pop
327
+ next
328
+ elsif scan(/\\[\\'\n]/)
329
+ kind = :char
330
+ elsif scan(/\\./m)
331
+ kind = :content
332
+ elsif scan(/\\/)
333
+ kind = :error
334
+ end
335
+
336
+ when :dqstring
337
+ if scan(delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/)
338
+ kind = :content
339
+ elsif scan(delimiter == '"' ? /"/ : /`/)
340
+ tokens << [matched, :delimiter]
341
+ tokens << [:close, :string]
342
+ delimiter = nil
343
+ states.pop
344
+ next
345
+ elsif scan(/\\(?:x[0-9a-fA-F]{2}|\d{3})/)
346
+ kind = :char
347
+ elsif scan(delimiter == '"' ? /\\["\\\nfnrtv]/ : /\\[`\\\nfnrtv]/)
348
+ kind = :char
349
+ elsif scan(/\\./m)
350
+ kind = :content
351
+ elsif scan(/\\/)
352
+ kind = :error
353
+ elsif match = scan(/#{RE::VARIABLE}/o)
354
+ kind = :local_variable
355
+ # $foo[bar] and $foo->bar kind of stuff
356
+ # TODO: highlight tokens separately!
357
+ if check(/\[#{RE::IDENTIFIER}\]/o)
358
+ match << scan(/\[#{RE::IDENTIFIER}\]/o)
359
+ elsif check(/\[/)
360
+ match << scan(/\[#{RE::IDENTIFIER}?/o)
361
+ kind = :error
362
+ elsif check(/->#{RE::IDENTIFIER}/o)
363
+ match << scan(/->#{RE::IDENTIFIER}/o)
364
+ elsif check(/->/)
365
+ match << scan(/->/)
366
+ kind = :error
367
+ end
368
+ elsif match = scan(/\{/)
369
+ if check(/\$/)
370
+ kind = :delimiter
371
+ states[-1] = [states.last, delimiter]
372
+ delimiter = nil
373
+ states.push :php
374
+ tokens << [:open, :inline]
375
+ else
376
+ kind = :string
377
+ end
378
+ elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
379
+ kind = :local_variable
380
+ elsif scan(/\$/)
381
+ kind = :content
382
+ end
383
+ else
384
+ raise_inspect 'Unknown state!', tokens, states
385
+ end
386
+
387
+ match ||= matched
388
+ if $DEBUG and not kind
389
+ raise_inspect 'Error token %p in line %d' %
390
+ [[match, kind], line], tokens, states
391
+ end
392
+ raise_inspect 'Empty token', tokens, states unless match
393
+
394
+ tokens << [match, kind]
395
+
396
+ end
397
+
398
+ tokens
399
+ end
400
+
401
+ end
402
+
403
+ end
404
+ end