llm-shell 0.9.2 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +61 -66
- data/lib/llm/shell/command.rb +40 -40
- data/lib/llm/shell/commands/clear_screen.rb +4 -18
- data/lib/llm/shell/commands/debug_mode.rb +12 -0
- data/lib/llm/shell/commands/dir_import.rb +4 -20
- data/lib/llm/shell/commands/disable_tool.rb +33 -0
- data/lib/llm/shell/commands/enable_tool.rb +33 -0
- data/lib/llm/shell/commands/file_import.rb +4 -20
- data/lib/llm/shell/commands/help.rb +23 -36
- data/lib/llm/shell/commands/show_chat.rb +4 -19
- data/lib/llm/shell/commands/show_version.rb +4 -20
- data/lib/llm/shell/commands/system_prompt.rb +4 -18
- data/lib/llm/shell/completion.rb +5 -5
- data/lib/llm/shell/config.rb +4 -5
- data/lib/llm/shell/formatter.rb +1 -2
- data/lib/llm/shell/internal/coderay/lib/coderay/duo.rb +81 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/_map.rb +17 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/comment_filter.rb +25 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/count.rb +39 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/debug.rb +49 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/debug_lint.rb +63 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/div.rb +23 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/encoder.rb +190 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/filter.rb +58 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/html/css.rb +65 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/html/numbering.rb +108 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/html/output.rb +164 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/html.rb +333 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/json.rb +83 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/lines_of_code.rb +45 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/lint.rb +59 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/null.rb +18 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/page.rb +24 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/span.rb +23 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/statistic.rb +95 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/terminal.rb +195 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/text.rb +46 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/xml.rb +72 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders/yaml.rb +50 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/encoders.rb +18 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/for_redcloth.rb +95 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/helpers/file_type.rb +151 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/helpers/plugin.rb +55 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/helpers/plugin_host.rb +221 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/helpers/word_list.rb +72 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/_map.rb +24 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/c.rb +189 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/cpp.rb +217 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/css.rb +196 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/debug.rb +75 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/delphi.rb +144 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/diff.rb +221 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/erb.rb +81 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/go.rb +208 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/groovy.rb +268 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/haml.rb +168 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/html.rb +275 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/java/builtin_types.rb +421 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/java.rb +174 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/java_script.rb +236 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/json.rb +98 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/lua.rb +280 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/php.rb +527 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/python.rb +287 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/raydebug.rb +75 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/ruby/patterns.rb +178 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/ruby/string_state.rb +79 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/ruby.rb +477 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/sass.rb +232 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/scanner.rb +337 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/sql.rb +169 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/taskpaper.rb +36 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/text.rb +26 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/xml.rb +17 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners/yaml.rb +140 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/scanners.rb +27 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/styles/_map.rb +7 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/styles/alpha.rb +153 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/styles/style.rb +18 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/styles.rb +15 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/token_kinds.rb +85 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/tokens.rb +164 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/llm/shell/internal/coderay/lib/coderay/version.rb +3 -0
- data/lib/llm/shell/internal/coderay/lib/coderay.rb +284 -0
- data/lib/llm/shell/internal/io-line/lib/io/line/multiple.rb +19 -0
- data/lib/{io → llm/shell/internal/io-line/lib/io}/line.rb +2 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/bot/builder.rb +31 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/bot/conversable.rb +37 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/bot/prompt/completion.rb +49 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/bot/prompt/respond.rb +49 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/bot.rb +150 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/buffer.rb +162 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/client.rb +36 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/error.rb +49 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/eventhandler.rb +44 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/eventstream/event.rb +69 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/eventstream/parser.rb +88 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/eventstream.rb +8 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/file.rb +91 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/function.rb +177 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/message.rb +178 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/mime.rb +140 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/multipart.rb +101 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/object/builder.rb +38 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/object/kernel.rb +53 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/object.rb +89 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/provider.rb +352 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/error_handler.rb +36 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/files.rb +155 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/format/completion_format.rb +88 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/format.rb +29 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/models.rb +54 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/response/completion.rb +39 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/response/enumerable.rb +11 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/response/file.rb +23 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/response/web_search.rb +21 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic/stream_parser.rb +66 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/anthropic.rb +138 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/deepseek/format/completion_format.rb +68 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/deepseek/format.rb +27 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/deepseek.rb +75 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/audio.rb +73 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/error_handler.rb +47 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/files.rb +146 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/format/completion_format.rb +69 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/format.rb +39 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/images.rb +133 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/models.rb +60 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/response/completion.rb +35 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/response/embedding.rb +8 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/response/file.rb +11 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/response/files.rb +15 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/response/image.rb +31 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/response/models.rb +15 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/response/web_search.rb +22 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini/stream_parser.rb +86 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/gemini.rb +173 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/llamacpp.rb +74 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/ollama/error_handler.rb +36 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/ollama/format/completion_format.rb +77 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/ollama/format.rb +29 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/ollama/models.rb +56 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/ollama/response/completion.rb +28 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/ollama/response/embedding.rb +9 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/ollama/stream_parser.rb +44 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/ollama.rb +116 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/audio.rb +91 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/error_handler.rb +46 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/files.rb +134 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/format/completion_format.rb +90 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/format/moderation_format.rb +35 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/format/respond_format.rb +72 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/format.rb +54 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/images.rb +109 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/models.rb +55 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/moderations.rb +65 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/response/audio.rb +7 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/response/completion.rb +40 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/response/embedding.rb +9 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/response/enumerable.rb +23 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/response/file.rb +7 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/response/image.rb +16 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/response/moderations.rb +34 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/response/responds.rb +48 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/response/web_search.rb +21 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/responses/stream_parser.rb +76 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/responses.rb +99 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/stream_parser.rb +86 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai/vector_stores.rb +228 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/openai.rb +206 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/xai/images.rb +58 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/xai.rb +72 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/providers/zai.rb +74 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/response.rb +67 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema/array.rb +26 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema/boolean.rb +13 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema/integer.rb +43 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema/leaf.rb +78 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema/null.rb +13 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema/number.rb +43 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema/object.rb +41 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema/string.rb +34 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema/version.rb +8 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/schema.rb +81 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/server_tool.rb +32 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/tool/param.rb +75 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/tool.rb +78 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/utils.rb +19 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm/version.rb +5 -0
- data/lib/llm/shell/internal/llm.rb/lib/llm.rb +121 -0
- data/lib/llm/shell/internal/optparse/lib/optionparser.rb +2 -0
- data/lib/llm/shell/internal/optparse/lib/optparse/ac.rb +70 -0
- data/lib/llm/shell/internal/optparse/lib/optparse/date.rb +18 -0
- data/lib/llm/shell/internal/optparse/lib/optparse/kwargs.rb +27 -0
- data/lib/llm/shell/internal/optparse/lib/optparse/shellwords.rb +7 -0
- data/lib/llm/shell/internal/optparse/lib/optparse/time.rb +11 -0
- data/lib/llm/shell/internal/optparse/lib/optparse/uri.rb +7 -0
- data/lib/llm/shell/internal/optparse/lib/optparse/version.rb +80 -0
- data/lib/llm/shell/internal/optparse/lib/optparse.rb +2469 -0
- data/lib/llm/shell/internal/paint/lib/paint/constants.rb +104 -0
- data/lib/llm/shell/internal/paint/lib/paint/pa.rb +13 -0
- data/lib/llm/shell/internal/paint/lib/paint/rgb_colors.rb +14 -0
- data/lib/llm/shell/internal/paint/lib/paint/shortcuts.rb +100 -0
- data/lib/llm/shell/internal/paint/lib/paint/shortcuts_version.rb +5 -0
- data/lib/llm/shell/internal/paint/lib/paint/util.rb +16 -0
- data/lib/llm/shell/internal/paint/lib/paint/version.rb +5 -0
- data/lib/llm/shell/internal/paint/lib/paint.rb +261 -0
- data/lib/llm/shell/internal/reline/lib/reline/config.rb +378 -0
- data/lib/llm/shell/internal/reline/lib/reline/face.rb +199 -0
- data/lib/llm/shell/internal/reline/lib/reline/history.rb +76 -0
- data/lib/llm/shell/internal/reline/lib/reline/io/ansi.rb +322 -0
- data/lib/llm/shell/internal/reline/lib/reline/io/dumb.rb +120 -0
- data/lib/llm/shell/internal/reline/lib/reline/io/windows.rb +530 -0
- data/lib/llm/shell/internal/reline/lib/reline/io.rb +55 -0
- data/lib/llm/shell/internal/reline/lib/reline/key_actor/base.rb +37 -0
- data/lib/llm/shell/internal/reline/lib/reline/key_actor/composite.rb +17 -0
- data/lib/llm/shell/internal/reline/lib/reline/key_actor/emacs.rb +517 -0
- data/lib/llm/shell/internal/reline/lib/reline/key_actor/vi_command.rb +518 -0
- data/lib/llm/shell/internal/reline/lib/reline/key_actor/vi_insert.rb +517 -0
- data/lib/llm/shell/internal/reline/lib/reline/key_actor.rb +8 -0
- data/lib/llm/shell/internal/reline/lib/reline/key_stroke.rb +119 -0
- data/lib/llm/shell/internal/reline/lib/reline/kill_ring.rb +125 -0
- data/lib/llm/shell/internal/reline/lib/reline/line_editor.rb +2356 -0
- data/lib/llm/shell/internal/reline/lib/reline/unicode/east_asian_width.rb +1292 -0
- data/lib/llm/shell/internal/reline/lib/reline/unicode.rb +421 -0
- data/lib/llm/shell/internal/reline/lib/reline/version.rb +3 -0
- data/lib/llm/shell/internal/reline/lib/reline.rb +527 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb/generated_parser.rb +712 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb/handler.rb +268 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb/local_date.rb +35 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb/local_date_time.rb +42 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb/local_time.rb +40 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb/parser.rb +21 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb/scanner.rb +92 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb/string_utils.rb +40 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb/version.rb +5 -0
- data/lib/llm/shell/internal/tomlrb/lib/tomlrb.rb +49 -0
- data/lib/llm/shell/options.rb +1 -1
- data/lib/llm/shell/renderer.rb +2 -3
- data/lib/llm/shell/repl.rb +21 -16
- data/lib/llm/shell/tool.rb +42 -0
- data/lib/llm/shell/tools/read_file.rb +15 -0
- data/lib/llm/shell/tools/system.rb +17 -0
- data/lib/llm/shell/tools/write_file.rb +16 -0
- data/lib/llm/shell/version.rb +1 -1
- data/lib/llm/shell.rb +83 -39
- data/libexec/llm-shell/shell +4 -6
- data/llm-shell.gemspec +0 -4
- metadata +233 -63
- data/lib/llm/function.rb +0 -17
- data/lib/llm/shell/command/extension.rb +0 -42
- data/lib/llm/shell/commands/utils.rb +0 -21
- data/lib/llm/shell/functions/read_file.rb +0 -22
- data/lib/llm/shell/functions/write_file.rb +0 -22
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
module CodeRay
|
|
2
|
+
module Scanners
|
|
3
|
+
|
|
4
|
+
load :java
|
|
5
|
+
|
|
6
|
+
# Scanner for Groovy.
|
|
7
|
+
class Groovy < Java
|
|
8
|
+
|
|
9
|
+
register_for :groovy
|
|
10
|
+
|
|
11
|
+
# TODO: check list of keywords
|
|
12
|
+
GROOVY_KEYWORDS = %w[
|
|
13
|
+
as assert def in
|
|
14
|
+
] # :nodoc:
|
|
15
|
+
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
|
|
16
|
+
case instanceof new return throw typeof while as assert in
|
|
17
|
+
] # :nodoc:
|
|
18
|
+
GROOVY_MAGIC_VARIABLES = %w[ it ] # :nodoc:
|
|
19
|
+
|
|
20
|
+
IDENT_KIND = Java::IDENT_KIND.dup.
|
|
21
|
+
add(GROOVY_KEYWORDS, :keyword).
|
|
22
|
+
add(GROOVY_MAGIC_VARIABLES, :local_variable) # :nodoc:
|
|
23
|
+
|
|
24
|
+
ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
|
25
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: no 4-byte unicode chars? U[a-fA-F0-9]{8}
|
|
26
|
+
REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x # :nodoc:
|
|
27
|
+
|
|
28
|
+
# TODO: interpretation inside ', ", /
|
|
29
|
+
STRING_CONTENT_PATTERN = {
|
|
30
|
+
"'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
|
|
31
|
+
'"' => /[^\\$"\n]+/,
|
|
32
|
+
"'''" => /(?>[^\\']+|'(?!''))+/,
|
|
33
|
+
'"""' => /(?>[^\\$"]+|"(?!""))+/,
|
|
34
|
+
'/' => /[^\\$\/\n]+/,
|
|
35
|
+
} # :nodoc:
|
|
36
|
+
|
|
37
|
+
protected
|
|
38
|
+
|
|
39
|
+
def setup
|
|
40
|
+
@state = :initial
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def scan_tokens encoder, options
|
|
44
|
+
state = options[:state] || @state
|
|
45
|
+
inline_block_stack = []
|
|
46
|
+
inline_block_paren_depth = nil
|
|
47
|
+
string_delimiter = nil
|
|
48
|
+
import_clause = class_name_follows = last_token = after_def = false
|
|
49
|
+
value_expected = true
|
|
50
|
+
|
|
51
|
+
until eos?
|
|
52
|
+
|
|
53
|
+
case state
|
|
54
|
+
|
|
55
|
+
when :initial
|
|
56
|
+
|
|
57
|
+
if match = scan(/ \s+ | \\\n /x)
|
|
58
|
+
encoder.text_token match, :space
|
|
59
|
+
if match.index ?\n
|
|
60
|
+
import_clause = after_def = false
|
|
61
|
+
value_expected = true unless value_expected
|
|
62
|
+
end
|
|
63
|
+
next
|
|
64
|
+
|
|
65
|
+
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
|
66
|
+
value_expected = true
|
|
67
|
+
after_def = false
|
|
68
|
+
encoder.text_token match, :comment
|
|
69
|
+
|
|
70
|
+
elsif bol? && match = scan(/ \#!.* /x)
|
|
71
|
+
encoder.text_token match, :doctype
|
|
72
|
+
|
|
73
|
+
elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
|
|
74
|
+
after_def = value_expected = false
|
|
75
|
+
encoder.text_token match, :include
|
|
76
|
+
|
|
77
|
+
elsif match = scan(/ #{IDENT} | \[\] /ox)
|
|
78
|
+
kind = IDENT_KIND[match]
|
|
79
|
+
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
|
|
80
|
+
if last_token == '.'
|
|
81
|
+
kind = :ident
|
|
82
|
+
elsif class_name_follows
|
|
83
|
+
kind = :class
|
|
84
|
+
class_name_follows = false
|
|
85
|
+
elsif after_def && check(/\s*[({]/)
|
|
86
|
+
kind = :method
|
|
87
|
+
after_def = false
|
|
88
|
+
elsif kind == :ident && last_token != '?' && check(/:/)
|
|
89
|
+
kind = :key
|
|
90
|
+
else
|
|
91
|
+
class_name_follows = true if match == 'class' || (import_clause && match == 'as')
|
|
92
|
+
import_clause = match == 'import'
|
|
93
|
+
after_def = true if match == 'def'
|
|
94
|
+
end
|
|
95
|
+
encoder.text_token match, kind
|
|
96
|
+
|
|
97
|
+
elsif match = scan(/;/)
|
|
98
|
+
import_clause = after_def = false
|
|
99
|
+
value_expected = true
|
|
100
|
+
encoder.text_token match, :operator
|
|
101
|
+
|
|
102
|
+
elsif match = scan(/\{/)
|
|
103
|
+
class_name_follows = after_def = false
|
|
104
|
+
value_expected = true
|
|
105
|
+
encoder.text_token match, :operator
|
|
106
|
+
if !inline_block_stack.empty?
|
|
107
|
+
inline_block_paren_depth += 1
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# TODO: ~'...', ~"..." and ~/.../ style regexps
|
|
111
|
+
elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
|
|
112
|
+
&& | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
|
|
113
|
+
value_expected = true
|
|
114
|
+
value_expected = :regexp if match == '~'
|
|
115
|
+
after_def = false
|
|
116
|
+
encoder.text_token match, :operator
|
|
117
|
+
|
|
118
|
+
elsif match = scan(/ [)\]}] /x)
|
|
119
|
+
value_expected = after_def = false
|
|
120
|
+
if !inline_block_stack.empty? && match == '}'
|
|
121
|
+
inline_block_paren_depth -= 1
|
|
122
|
+
if inline_block_paren_depth == 0 # closing brace of inline block reached
|
|
123
|
+
encoder.text_token match, :inline_delimiter
|
|
124
|
+
encoder.end_group :inline
|
|
125
|
+
state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
|
|
126
|
+
next
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
encoder.text_token match, :operator
|
|
130
|
+
|
|
131
|
+
elsif check(/[\d.]/)
|
|
132
|
+
after_def = value_expected = false
|
|
133
|
+
if match = scan(/0[xX][0-9A-Fa-f]+/)
|
|
134
|
+
encoder.text_token match, :hex
|
|
135
|
+
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
|
|
136
|
+
encoder.text_token match, :octal
|
|
137
|
+
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
|
|
138
|
+
encoder.text_token match, :float
|
|
139
|
+
elsif match = scan(/\d+[lLgG]?/)
|
|
140
|
+
encoder.text_token match, :integer
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
elsif match = scan(/'''|"""/)
|
|
144
|
+
after_def = value_expected = false
|
|
145
|
+
state = :multiline_string
|
|
146
|
+
encoder.begin_group :string
|
|
147
|
+
string_delimiter = match
|
|
148
|
+
encoder.text_token match, :delimiter
|
|
149
|
+
|
|
150
|
+
# TODO: record.'name' syntax
|
|
151
|
+
elsif match = scan(/["']/)
|
|
152
|
+
after_def = value_expected = false
|
|
153
|
+
state = match == '/' ? :regexp : :string
|
|
154
|
+
encoder.begin_group state
|
|
155
|
+
string_delimiter = match
|
|
156
|
+
encoder.text_token match, :delimiter
|
|
157
|
+
|
|
158
|
+
elsif value_expected && match = scan(/\//)
|
|
159
|
+
after_def = value_expected = false
|
|
160
|
+
encoder.begin_group :regexp
|
|
161
|
+
state = :regexp
|
|
162
|
+
string_delimiter = '/'
|
|
163
|
+
encoder.text_token match, :delimiter
|
|
164
|
+
|
|
165
|
+
elsif match = scan(/ @ #{IDENT} /ox)
|
|
166
|
+
after_def = value_expected = false
|
|
167
|
+
encoder.text_token match, :annotation
|
|
168
|
+
|
|
169
|
+
elsif match = scan(/\//)
|
|
170
|
+
after_def = false
|
|
171
|
+
value_expected = true
|
|
172
|
+
encoder.text_token match, :operator
|
|
173
|
+
|
|
174
|
+
else
|
|
175
|
+
encoder.text_token getch, :error
|
|
176
|
+
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
when :string, :regexp, :multiline_string
|
|
180
|
+
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
|
|
181
|
+
encoder.text_token match, :content
|
|
182
|
+
|
|
183
|
+
elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
|
|
184
|
+
encoder.text_token match, :delimiter
|
|
185
|
+
if state == :regexp
|
|
186
|
+
# TODO: regexp modifiers? s, m, x, i?
|
|
187
|
+
modifiers = scan(/[ix]+/)
|
|
188
|
+
encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
|
|
189
|
+
end
|
|
190
|
+
state = :string if state == :multiline_string
|
|
191
|
+
encoder.end_group state
|
|
192
|
+
string_delimiter = nil
|
|
193
|
+
after_def = value_expected = false
|
|
194
|
+
state = :initial
|
|
195
|
+
next
|
|
196
|
+
|
|
197
|
+
elsif (state == :string || state == :multiline_string) &&
|
|
198
|
+
(match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
|
199
|
+
if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
|
|
200
|
+
encoder.text_token match, :content
|
|
201
|
+
else
|
|
202
|
+
encoder.text_token match, :char
|
|
203
|
+
end
|
|
204
|
+
elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
|
205
|
+
encoder.text_token match, :char
|
|
206
|
+
|
|
207
|
+
elsif match = scan(/ \$ #{IDENT} /mox)
|
|
208
|
+
encoder.begin_group :inline
|
|
209
|
+
encoder.text_token '$', :inline_delimiter
|
|
210
|
+
match = match[1..-1]
|
|
211
|
+
encoder.text_token match, IDENT_KIND[match]
|
|
212
|
+
encoder.end_group :inline
|
|
213
|
+
next
|
|
214
|
+
elsif match = scan(/ \$ \{ /x)
|
|
215
|
+
encoder.begin_group :inline
|
|
216
|
+
encoder.text_token match, :inline_delimiter
|
|
217
|
+
inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
|
|
218
|
+
inline_block_paren_depth = 1
|
|
219
|
+
state = :initial
|
|
220
|
+
next
|
|
221
|
+
|
|
222
|
+
elsif match = scan(/ \$ /mx)
|
|
223
|
+
encoder.text_token match, :content
|
|
224
|
+
|
|
225
|
+
elsif match = scan(/ \\. /mx)
|
|
226
|
+
encoder.text_token match, :content # TODO: Shouldn't this be :error?
|
|
227
|
+
|
|
228
|
+
elsif match = scan(/ \\ | \n /x)
|
|
229
|
+
encoder.end_group state == :regexp ? :regexp : :string
|
|
230
|
+
encoder.text_token match, :error
|
|
231
|
+
after_def = value_expected = false
|
|
232
|
+
state = :initial
|
|
233
|
+
|
|
234
|
+
else
|
|
235
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
|
236
|
+
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
else
|
|
240
|
+
raise_inspect 'Unknown state', encoder
|
|
241
|
+
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
last_token = match unless [:space, :comment, :doctype].include? kind
|
|
245
|
+
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
if [:multiline_string, :string, :regexp].include? state
|
|
249
|
+
encoder.end_group state == :regexp ? :regexp : :string
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
if options[:keep_state]
|
|
253
|
+
@state = state
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
until inline_block_stack.empty?
|
|
257
|
+
state, = *inline_block_stack.pop
|
|
258
|
+
encoder.end_group :inline
|
|
259
|
+
encoder.end_group state == :regexp ? :regexp : :string
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
encoder
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
end
|
|
268
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
module CodeRay
|
|
2
|
+
module Scanners
|
|
3
|
+
|
|
4
|
+
load :ruby
|
|
5
|
+
load :html
|
|
6
|
+
load :java_script
|
|
7
|
+
|
|
8
|
+
class HAML < Scanner
|
|
9
|
+
|
|
10
|
+
register_for :haml
|
|
11
|
+
title 'HAML Template'
|
|
12
|
+
|
|
13
|
+
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
|
14
|
+
|
|
15
|
+
protected
|
|
16
|
+
|
|
17
|
+
def setup
|
|
18
|
+
super
|
|
19
|
+
@ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true
|
|
20
|
+
@embedded_ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true, :state => @ruby_scanner.interpreted_string_state
|
|
21
|
+
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def scan_tokens encoder, options
|
|
25
|
+
|
|
26
|
+
match = nil
|
|
27
|
+
code = ''
|
|
28
|
+
|
|
29
|
+
until eos?
|
|
30
|
+
|
|
31
|
+
if bol?
|
|
32
|
+
if match = scan(/!!!.*/)
|
|
33
|
+
encoder.text_token match, :doctype
|
|
34
|
+
next
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
if match = scan(/(?>( *)(\/(?!\[if)|-\#|:javascript|:ruby|:\w+) *)(?=\n)/)
|
|
38
|
+
encoder.text_token match, :comment
|
|
39
|
+
|
|
40
|
+
code = self[2]
|
|
41
|
+
if match = scan(/(?:\n+#{self[1]} .*)+/)
|
|
42
|
+
case code
|
|
43
|
+
when '/', '-#'
|
|
44
|
+
encoder.text_token match, :comment
|
|
45
|
+
when ':javascript'
|
|
46
|
+
# TODO: recognize #{...} snippets inside JavaScript
|
|
47
|
+
@java_script_scanner ||= CodeRay.scanner :java_script, :tokens => @tokens, :keep_tokens => true
|
|
48
|
+
@java_script_scanner.tokenize match, :tokens => encoder
|
|
49
|
+
when ':ruby'
|
|
50
|
+
@ruby_scanner.tokenize match, :tokens => encoder
|
|
51
|
+
when /:\w+/
|
|
52
|
+
encoder.text_token match, :comment
|
|
53
|
+
else
|
|
54
|
+
raise 'else-case reached: %p' % [code]
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
if match = scan(/ +/)
|
|
60
|
+
encoder.text_token match, :space
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
if match = scan(/\/.*/)
|
|
64
|
+
encoder.text_token match, :comment
|
|
65
|
+
next
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
if match = scan(/\\/)
|
|
69
|
+
encoder.text_token match, :plain
|
|
70
|
+
if match = scan(/.+/)
|
|
71
|
+
@html_scanner.tokenize match, :tokens => encoder
|
|
72
|
+
end
|
|
73
|
+
next
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
tag = false
|
|
77
|
+
|
|
78
|
+
if match = scan(/%[-\w:]+\/?/)
|
|
79
|
+
encoder.text_token match, :tag
|
|
80
|
+
# if match = scan(/( +)(.+)/)
|
|
81
|
+
# encoder.text_token self[1], :space
|
|
82
|
+
# @embedded_ruby_scanner.tokenize self[2], :tokens => encoder
|
|
83
|
+
# end
|
|
84
|
+
tag = true
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
while match = scan(/([.#])[-\w]*\w/)
|
|
88
|
+
encoder.text_token match, self[1] == '#' ? :constant : :class
|
|
89
|
+
tag = true
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
if tag && match = scan(/(\()([^)]+)?(\))?/)
|
|
93
|
+
# TODO: recognize title=@title, class="widget_#{@widget.number}"
|
|
94
|
+
encoder.text_token self[1], :plain
|
|
95
|
+
@html_scanner.tokenize self[2], :tokens => encoder, :state => :attribute if self[2]
|
|
96
|
+
encoder.text_token self[3], :plain if self[3]
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
if tag && match = scan(/\{/)
|
|
100
|
+
encoder.text_token match, :plain
|
|
101
|
+
|
|
102
|
+
code = ''
|
|
103
|
+
level = 1
|
|
104
|
+
while true
|
|
105
|
+
code << scan(/([^\{\},\n]|, *\n?)*/)
|
|
106
|
+
case match = getch
|
|
107
|
+
when '{'
|
|
108
|
+
level += 1
|
|
109
|
+
code << match
|
|
110
|
+
when '}'
|
|
111
|
+
level -= 1
|
|
112
|
+
if level > 0
|
|
113
|
+
code << match
|
|
114
|
+
else
|
|
115
|
+
break
|
|
116
|
+
end
|
|
117
|
+
when "\n", ",", nil
|
|
118
|
+
break
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
@ruby_scanner.tokenize code, :tokens => encoder unless code.empty?
|
|
122
|
+
|
|
123
|
+
encoder.text_token match, :plain if match
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
if tag && match = scan(/(\[)([^\]\n]+)?(\])?/)
|
|
127
|
+
encoder.text_token self[1], :plain
|
|
128
|
+
@ruby_scanner.tokenize self[2], :tokens => encoder if self[2]
|
|
129
|
+
encoder.text_token self[3], :plain if self[3]
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
if tag && match = scan(/\//)
|
|
133
|
+
encoder.text_token match, :tag
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
if scan(/(>?<?[-=]|[&!]=|(& |!)|~)( *)([^,\n\|]+(?:(, *|\|(?=.|\n.*\|$))\n?[^,\n\|]*)*)?/)
|
|
137
|
+
encoder.text_token self[1] + self[3], :plain
|
|
138
|
+
if self[4]
|
|
139
|
+
if self[2]
|
|
140
|
+
@embedded_ruby_scanner.tokenize self[4], :tokens => encoder
|
|
141
|
+
else
|
|
142
|
+
@ruby_scanner.tokenize self[4], :tokens => encoder
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
elsif match = scan(/((?:<|><?)(?![!?\/\w]))?(.+)?/)
|
|
146
|
+
encoder.text_token self[1], :plain if self[1]
|
|
147
|
+
# TODO: recognize #{...} snippets
|
|
148
|
+
@html_scanner.tokenize self[2], :tokens => encoder if self[2]
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
elsif match = scan(/.+/)
|
|
152
|
+
@html_scanner.tokenize match, :tokens => encoder
|
|
153
|
+
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
if match = scan(/\n/)
|
|
157
|
+
encoder.text_token match, :space
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
encoder
|
|
162
|
+
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
end
|
|
168
|
+
end
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
module CodeRay
|
|
2
|
+
module Scanners
|
|
3
|
+
|
|
4
|
+
# HTML Scanner
|
|
5
|
+
#
|
|
6
|
+
# Alias: +xhtml+
|
|
7
|
+
#
|
|
8
|
+
# See also: Scanners::XML
|
|
9
|
+
class HTML < Scanner
|
|
10
|
+
|
|
11
|
+
register_for :html
|
|
12
|
+
|
|
13
|
+
KINDS_NOT_LOC = [
|
|
14
|
+
:comment, :doctype, :preprocessor,
|
|
15
|
+
:tag, :attribute_name, :operator,
|
|
16
|
+
:attribute_value, :string,
|
|
17
|
+
:plain, :entity, :error,
|
|
18
|
+
] # :nodoc:
|
|
19
|
+
|
|
20
|
+
EVENT_ATTRIBUTES = %w(
|
|
21
|
+
onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay
|
|
22
|
+
oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick
|
|
23
|
+
ondrag ondragdrop ondragend ondragenter ondragleave ondragover
|
|
24
|
+
ondragstart ondrop ondurationchange onemptied onended onerror onfocus
|
|
25
|
+
onformchange onforminput onhashchange oninput oninvalid onkeydown
|
|
26
|
+
onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart
|
|
27
|
+
onmessage onmousedown onmousemove onmouseout onmouseover onmouseup
|
|
28
|
+
onmousewheel onmove onoffline ononline onpagehide onpageshow onpause
|
|
29
|
+
onplay onplaying onpopstate onprogress onratechange onreadystatechange
|
|
30
|
+
onredo onreset onresize onscroll onseeked onseeking onselect onshow
|
|
31
|
+
onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload
|
|
32
|
+
onvolumechange onwaiting
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil).
|
|
36
|
+
add(EVENT_ATTRIBUTES, :script).
|
|
37
|
+
add(['style'], :style)
|
|
38
|
+
|
|
39
|
+
ATTR_NAME = /[\w.:-]+/ # :nodoc:
|
|
40
|
+
TAG_END = /\/?>/ # :nodoc:
|
|
41
|
+
HEX = /[0-9a-fA-F]/ # :nodoc:
|
|
42
|
+
ENTITY = /
|
|
43
|
+
&
|
|
44
|
+
(?:
|
|
45
|
+
\w+
|
|
46
|
+
|
|
|
47
|
+
\#
|
|
48
|
+
(?:
|
|
49
|
+
\d+
|
|
50
|
+
|
|
|
51
|
+
x#{HEX}+
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
;
|
|
55
|
+
/ox # :nodoc:
|
|
56
|
+
|
|
57
|
+
PLAIN_STRING_CONTENT = {
|
|
58
|
+
"'" => /[^&'>\n]+/,
|
|
59
|
+
'"' => /[^&">\n]+/,
|
|
60
|
+
} # :nodoc:
|
|
61
|
+
|
|
62
|
+
def reset
|
|
63
|
+
super
|
|
64
|
+
@state = :initial
|
|
65
|
+
@plain_string_content = nil
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
protected
|
|
69
|
+
|
|
70
|
+
def setup
|
|
71
|
+
@state = :initial
|
|
72
|
+
@plain_string_content = nil
|
|
73
|
+
@in_tag = nil
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def scan_java_script encoder, code
|
|
77
|
+
if code && !code.empty?
|
|
78
|
+
@java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
|
|
79
|
+
@java_script_scanner.tokenize code, :tokens => encoder
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def scan_css encoder, code, state = [:initial]
|
|
84
|
+
if code && !code.empty?
|
|
85
|
+
@css_scanner ||= Scanners::CSS.new '', :keep_tokens => true
|
|
86
|
+
@css_scanner.tokenize code, :tokens => encoder, :state => state
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def scan_tokens encoder, options
|
|
91
|
+
state = options[:state] || @state
|
|
92
|
+
plain_string_content = @plain_string_content
|
|
93
|
+
in_tag = @in_tag
|
|
94
|
+
in_attribute = nil
|
|
95
|
+
|
|
96
|
+
encoder.begin_group :string if state == :attribute_value_string
|
|
97
|
+
|
|
98
|
+
until eos?
|
|
99
|
+
|
|
100
|
+
if state != :in_special_tag && match = scan(/\s+/m)
|
|
101
|
+
encoder.text_token match, :space
|
|
102
|
+
|
|
103
|
+
else
|
|
104
|
+
|
|
105
|
+
case state
|
|
106
|
+
|
|
107
|
+
when :initial
|
|
108
|
+
if match = scan(/<!\[CDATA\[/)
|
|
109
|
+
encoder.text_token match, :inline_delimiter
|
|
110
|
+
if match = scan(/.*?\]\]>/m)
|
|
111
|
+
encoder.text_token match[0..-4], :plain
|
|
112
|
+
encoder.text_token ']]>', :inline_delimiter
|
|
113
|
+
elsif match = scan(/.+/)
|
|
114
|
+
encoder.text_token match, :error
|
|
115
|
+
end
|
|
116
|
+
elsif match = scan(/<!--(?:.*?-->|.*)/m)
|
|
117
|
+
encoder.text_token match, :comment
|
|
118
|
+
elsif match = scan(/<!(\w+)(?:.*?>|.*)|\]>/m)
|
|
119
|
+
encoder.text_token match, :doctype
|
|
120
|
+
elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
|
|
121
|
+
encoder.text_token match, :preprocessor
|
|
122
|
+
elsif match = scan(/<\?(?:.*?\?>|.*)/m)
|
|
123
|
+
encoder.text_token match, :comment
|
|
124
|
+
elsif match = scan(/<\/[-\w.:]*>?/m)
|
|
125
|
+
in_tag = nil
|
|
126
|
+
encoder.text_token match, :tag
|
|
127
|
+
elsif match = scan(/<(?:(script|style)|[-\w.:]+)(>)?/m)
|
|
128
|
+
encoder.text_token match, :tag
|
|
129
|
+
in_tag = self[1]
|
|
130
|
+
if self[2]
|
|
131
|
+
state = :in_special_tag if in_tag
|
|
132
|
+
else
|
|
133
|
+
state = :attribute
|
|
134
|
+
end
|
|
135
|
+
elsif match = scan(/[^<>&]+/)
|
|
136
|
+
encoder.text_token match, :plain
|
|
137
|
+
elsif match = scan(/#{ENTITY}/ox)
|
|
138
|
+
encoder.text_token match, :entity
|
|
139
|
+
elsif match = scan(/[<>&]/)
|
|
140
|
+
in_tag = nil
|
|
141
|
+
encoder.text_token match, :error
|
|
142
|
+
else
|
|
143
|
+
raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
when :attribute
|
|
147
|
+
if match = scan(/#{TAG_END}/o)
|
|
148
|
+
encoder.text_token match, :tag
|
|
149
|
+
in_attribute = nil
|
|
150
|
+
if in_tag
|
|
151
|
+
state = :in_special_tag
|
|
152
|
+
else
|
|
153
|
+
state = :initial
|
|
154
|
+
end
|
|
155
|
+
elsif match = scan(/#{ATTR_NAME}/o)
|
|
156
|
+
in_attribute = IN_ATTRIBUTE[match]
|
|
157
|
+
encoder.text_token match, :attribute_name
|
|
158
|
+
state = :attribute_equal
|
|
159
|
+
else
|
|
160
|
+
in_tag = nil
|
|
161
|
+
encoder.text_token getch, :error
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
when :attribute_equal
|
|
165
|
+
if match = scan(/=/) #/
|
|
166
|
+
encoder.text_token match, :operator
|
|
167
|
+
state = :attribute_value
|
|
168
|
+
else
|
|
169
|
+
state = :attribute
|
|
170
|
+
next
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
when :attribute_value
|
|
174
|
+
if match = scan(/#{ATTR_NAME}/o)
|
|
175
|
+
encoder.text_token match, :attribute_value
|
|
176
|
+
state = :attribute
|
|
177
|
+
elsif match = scan(/["']/)
|
|
178
|
+
if in_attribute == :script || in_attribute == :style
|
|
179
|
+
encoder.begin_group :string
|
|
180
|
+
encoder.text_token match, :delimiter
|
|
181
|
+
if scan(/javascript:[ \t]*/)
|
|
182
|
+
encoder.text_token matched, :comment
|
|
183
|
+
end
|
|
184
|
+
code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
|
|
185
|
+
if in_attribute == :script
|
|
186
|
+
scan_java_script encoder, code
|
|
187
|
+
else
|
|
188
|
+
scan_css encoder, code, [:block]
|
|
189
|
+
end
|
|
190
|
+
match = scan(/["']/)
|
|
191
|
+
encoder.text_token match, :delimiter if match
|
|
192
|
+
encoder.end_group :string
|
|
193
|
+
state = :attribute
|
|
194
|
+
in_attribute = nil
|
|
195
|
+
else
|
|
196
|
+
encoder.begin_group :string
|
|
197
|
+
state = :attribute_value_string
|
|
198
|
+
plain_string_content = PLAIN_STRING_CONTENT[match]
|
|
199
|
+
encoder.text_token match, :delimiter
|
|
200
|
+
end
|
|
201
|
+
elsif match = scan(/#{TAG_END}/o)
|
|
202
|
+
encoder.text_token match, :tag
|
|
203
|
+
state = :initial
|
|
204
|
+
else
|
|
205
|
+
encoder.text_token getch, :error
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
when :attribute_value_string
|
|
209
|
+
if match = scan(plain_string_content)
|
|
210
|
+
encoder.text_token match, :content
|
|
211
|
+
elsif match = scan(/['"]/)
|
|
212
|
+
encoder.text_token match, :delimiter
|
|
213
|
+
encoder.end_group :string
|
|
214
|
+
state = :attribute
|
|
215
|
+
elsif match = scan(/#{ENTITY}/ox)
|
|
216
|
+
encoder.text_token match, :entity
|
|
217
|
+
elsif match = scan(/&/)
|
|
218
|
+
encoder.text_token match, :content
|
|
219
|
+
elsif match = scan(/[\n>]/)
|
|
220
|
+
encoder.end_group :string
|
|
221
|
+
state = :initial
|
|
222
|
+
encoder.text_token match, :error
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
when :in_special_tag
|
|
226
|
+
case in_tag
|
|
227
|
+
when 'script', 'style'
|
|
228
|
+
encoder.text_token match, :space if match = scan(/[ \t]*\n/)
|
|
229
|
+
if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
|
|
230
|
+
code = self[2] || self[4]
|
|
231
|
+
closing = self[3]
|
|
232
|
+
encoder.text_token self[1], :comment
|
|
233
|
+
else
|
|
234
|
+
code = scan_until(/(?=(?:\n\s*)?<\/#{in_tag}>)|\z/)
|
|
235
|
+
closing = false
|
|
236
|
+
end
|
|
237
|
+
unless code.empty?
|
|
238
|
+
encoder.begin_group :inline
|
|
239
|
+
if in_tag == 'script'
|
|
240
|
+
scan_java_script encoder, code
|
|
241
|
+
else
|
|
242
|
+
scan_css encoder, code
|
|
243
|
+
end
|
|
244
|
+
encoder.end_group :inline
|
|
245
|
+
end
|
|
246
|
+
encoder.text_token closing, :comment if closing
|
|
247
|
+
state = :initial
|
|
248
|
+
else
|
|
249
|
+
raise 'unknown special tag: %p' % [in_tag]
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
else
|
|
253
|
+
raise_inspect 'Unknown state: %p' % [state], encoder
|
|
254
|
+
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
if options[:keep_state]
|
|
262
|
+
@state = state
|
|
263
|
+
@plain_string_content = plain_string_content
|
|
264
|
+
@in_tag = in_tag
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
encoder.end_group :string if state == :attribute_value_string
|
|
268
|
+
|
|
269
|
+
encoder
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
end
|
|
275
|
+
end
|