RubyGems - codnar - Versions diffs - 0.1.64 - Mend

codnar 0.1.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

data/ChangeLog +165 -0
data/LICENSE +19 -0
data/README.rdoc +32 -0
data/Rakefile +66 -0
data/bin/codnar-split +5 -0
data/bin/codnar-weave +5 -0
data/codnar.html +10945 -0
data/doc/logo.png +0 -0
data/doc/root.html +22 -0
data/doc/story.markdown +180 -0
data/doc/system.markdown +671 -0
data/lib/codnar.rb +41 -0
data/lib/codnar/application.rb +92 -0
data/lib/codnar/cache.rb +61 -0
data/lib/codnar/data/contents.js +113 -0
data/lib/codnar/data/control_chunks.js +44 -0
data/lib/codnar/data/style.css +95 -0
data/lib/codnar/data/sunlight/README.txt +4 -0
data/lib/codnar/data/sunlight/css-min.js +1 -0
data/lib/codnar/data/sunlight/default.css +236 -0
data/lib/codnar/data/sunlight/javascript-min.js +1 -0
data/lib/codnar/data/sunlight/min.js +1 -0
data/lib/codnar/data/sunlight/ruby-min.js +1 -0
data/lib/codnar/data/yui/README.txt +3 -0
data/lib/codnar/data/yui/base.css +132 -0
data/lib/codnar/data/yui/reset.css +142 -0
data/lib/codnar/formatter.rb +180 -0
data/lib/codnar/grouper.rb +28 -0
data/lib/codnar/gvim.rb +132 -0
data/lib/codnar/hash_extensions.rb +41 -0
data/lib/codnar/markdown.rb +47 -0
data/lib/codnar/merger.rb +138 -0
data/lib/codnar/rake.rb +41 -0
data/lib/codnar/rake/split_task.rb +71 -0
data/lib/codnar/rake/weave_task.rb +59 -0
data/lib/codnar/rdoc.rb +9 -0
data/lib/codnar/reader.rb +121 -0
data/lib/codnar/scanner.rb +216 -0
data/lib/codnar/split.rb +58 -0
data/lib/codnar/split_configurations.rb +367 -0
data/lib/codnar/splitter.rb +32 -0
data/lib/codnar/string_extensions.rb +25 -0
data/lib/codnar/sunlight.rb +17 -0
data/lib/codnar/version.rb +8 -0
data/lib/codnar/weave.rb +58 -0
data/lib/codnar/weave_configurations.rb +48 -0
data/lib/codnar/weaver.rb +105 -0
data/lib/codnar/writer.rb +38 -0
data/test/cache_computations.rb +41 -0
data/test/deep_merge.rb +29 -0
data/test/embed_images.rb +12 -0
data/test/expand_markdown.rb +27 -0
data/test/expand_rdoc.rb +20 -0
data/test/format_code_gvim_configurations.rb +55 -0
data/test/format_code_sunlight_configurations.rb +37 -0
data/test/format_comment_configurations.rb +86 -0
data/test/format_lines.rb +72 -0
data/test/group_lines.rb +31 -0
data/test/gvim_highlight_syntax.rb +49 -0
data/test/identify_chunks.rb +32 -0
data/test/lib/test_with_configurations.rb +15 -0
data/test/merge_lines.rb +133 -0
data/test/rake_tasks.rb +38 -0
data/test/read_chunks.rb +110 -0
data/test/run_application.rb +56 -0
data/test/run_split.rb +38 -0
data/test/run_weave.rb +75 -0
data/test/scan_lines.rb +78 -0
data/test/split_chunk_configurations.rb +55 -0
data/test/split_code.rb +109 -0
data/test/split_code_configurations.rb +73 -0
data/test/split_combined_configurations.rb +114 -0
data/test/split_complex_comment_configurations.rb +73 -0
data/test/split_documentation.rb +92 -0
data/test/split_documentation_configurations.rb +97 -0
data/test/split_simple_comment_configurations.rb +50 -0
data/test/sunlight_highlight_syntax.rb +25 -0
data/test/weave_configurations.rb +144 -0
data/test/write_chunks.rb +28 -0
metadata +363 -0

data/lib/codnar/split.rb ADDED

@@ -0,0 +1,58 @@
+module Codnar
+  # Split application.
+  class Split < Application
+    # Run the weaving Codnar application, returning its status.
+    def run
+      super { split }
+    end
+  protected
+    # Split the specified input file into chunks.
+    def split
+      @configuration = Codnar::Configuration::SPLIT_HTML_DOCUMENTATION if @configuration == {}
+      splitter = Splitter.new(@errors, @configuration)
+      print(splitter.chunks(ARGV[0]).to_yaml)
+    end
+    # Parse remaining command-line file arguments.
+    def parse_arguments
+      case ARGV.size
+      when 1 then return
+      when 0 then $stderr.puts("#{$0}: No input file to split")
+      else $stderr.puts("#{$0}: Too many input files to split")
+      end
+      exit(1)
+    end
+    # Return the banner line of the help message.
+    def banner
+      return "codnar-split - Split documentation or code files to chunks."
+    end
+    # Return the name and description of any final command-line file arguments.
+    def arguments
+      return "FILE", "Documentation or code file to split."
+    end
+    # Return a short description of the program.
+    def description
+      return <<-EOF.unindent
+        Split the documentation of file into chunks that are printed in YAML format to
+        the output (to be read by codnar-weave). Many file formats can be split
+        depending on the specified configuration. The default configuration is called
+        SPLIT_HTML_DOCUMENTATION, and it preserves the whole file as a single formatted
+        HTML documentation chunk. This isn't very useful.
+        The configuration needs to specify a set of line classification patterns,
+        parsing states and pattern-based transitions between them, the initial state,
+        and expressions for formatting classified lines to HTML. See the Codnar
+        documentation for details.
+      EOF
+    end
+  end
+end

data/lib/codnar/split_configurations.rb ADDED

@@ -0,0 +1,367 @@
+module Codnar
+  # A module for all the "built-in" configurations. The names of these
+  # configurations can be passed to the --require option of any Codnar
+  # Application.
+  module Configuration
+    # {{{ Documentation "splitting" configurations
+    # "Split" a documentation file. All lines are assumed to have the same kind
+    # +doc+ and no indentation is collected. Unless overriden by additional
+    # configuration(s), the lines are assumed to contain formatted HTML, and
+    # are passed as-is to the output.
+    #
+    # This is the default configuration as it performs the minimal amount of
+    # processing on the input. It isn't the most useful configuration.
+    SPLIT_HTML_DOCUMENTATION = {
+      "formatters" => {
+        "doc" => "Formatter.cast_lines(lines, 'html')",
+      },
+      "syntax" => {
+        "patterns" => {
+          "doc" => { "regexp" => "^(.*)$", "groups" => [ "payload" ] },
+        },
+        "states" => {
+          "start" => { "transitions" => [ { "pattern" => "doc" } ] },
+        },
+      },
+    }
+    # "Split" a documentation file containing arbitrary text, which is
+    # preserved by escaping it and wrapping it in an HTML pre element.
+    SPLIT_PRE_DOCUMENTATION = SPLIT_HTML_DOCUMENTATION.deep_merge(
+      "formatters" => {
+        "doc" => "Formatter.lines_to_pre_html(lines, :class => :doc)",
+      }
+    )
+    # "Split" a documentation file containing pure RDoc documentation.
+    SPLIT_RDOC_DOCUMENTATION = SPLIT_HTML_DOCUMENTATION.deep_merge(
+      "formatters" => {
+        "doc" => "Formatter.markup_lines_to_html(lines, 'RDoc')",
+        "unindented_html" => "Formatter.unindented_lines_to_html(lines)",
+      }
+    )
+    # "Split" a documentation file containing pure Markdown documentation.
+    SPLIT_MARKDOWN_DOCUMENTATION = SPLIT_HTML_DOCUMENTATION.deep_merge(
+      "formatters" => {
+        "doc" => "Formatter.markup_lines_to_html(lines, 'Markdown')",
+        "unindented_html" => "Formatter.unindented_lines_to_html(lines)",
+      }
+    )
+    # }}}
+    # {{{ Source code lines classification configurations
+    # Classify all lines as source code of some syntax (kind). This doesn't
+    # distinguish between comment and code lines; to do that, you need to
+    # combine this with comment classification configuration(s). Also, it just
+    # formats the lines in an HTML +pre+ element, without any syntax
+    # highlighting; to do that, you need to combine this with syntax
+    # highlighting formatting configuration(s).
+    CLASSIFY_SOURCE_CODE = lambda do |syntax|
+      return {
+        "formatters" => {
+          "#{syntax}_code" => "Formatter.lines_to_pre_html(lines, :class => :code)",
+        },
+        "syntax" => {
+          "patterns" => {
+            "#{syntax}_code" => { "regexp" => "^(\\s*)(.*)$" },
+          },
+          "states" => {
+            "start" => {
+              "transitions" => [
+                { "pattern" => "#{syntax}_code" },
+              ],
+            },
+          },
+        },
+      }
+    end
+    # }}}
+    # {{{ Nested foreign syntax code islands configurations
+    # Allow for comments containing "((( <syntax>" and "))) <syntax>" to
+    # designate nested islands of foreign syntax inside the normal code. The
+    # designator comment lines are always treated as part of the surrounding
+    # code, not as part of the nested foreign syntax code. There is no further
+    # classification of the nested foreign syntax code. Therefore, the nested
+    # code is not examined for begin/end chunk markers. Likewise, the nested
+    # code may not contain deeper nested code using a third syntax.
+    CLASSIFY_NESTED_CODE = lambda do |outer_syntax, inner_syntax|
+      {
+        "syntax" => {
+          "patterns" => {
+            "start_#{inner_syntax}_in_#{outer_syntax}" =>
+              { "regexp" => "^(\\s*)(.*\\(\\(\\(\\s*#{inner_syntax}.*)$" },
+            "end_#{inner_syntax}_in_#{outer_syntax}" =>
+              { "regexp" => "^(\\s*)(.*\\)\\)\\)\\s*#{inner_syntax}.*)$" },
+            "#{inner_syntax}_in_#{outer_syntax}" =>
+              { "regexp" => "^(\\s*)(.*)$" },
+          },
+          "states" => {
+            "start" => {
+              "transitions" => [
+                { "pattern" => "start_#{inner_syntax}_in_#{outer_syntax}",
+                  "kind" => "#{outer_syntax}_code",
+                  "next_state" => "#{inner_syntax}_in_#{outer_syntax}" },
+                [],
+              ],
+            },
+            "#{inner_syntax}_in_#{outer_syntax}" => {
+              "transitions" => [
+                { "pattern" => "end_#{inner_syntax}_in_#{outer_syntax}",
+                  "kind" => "#{outer_syntax}_code",
+                  "next_state" => "start" },
+                { "pattern" => "#{inner_syntax}_in_#{outer_syntax}",
+                  "kind" => "#{inner_syntax}_code" },
+              ],
+            },
+          },
+        },
+      }
+    end
+    # }}}
+    # {{{ Simple comment classification configurations
+    # Classify simple comment lines. It accepts a restricted format: each
+    # comment is expected to start with some exact prefix (e.g. "#" for shell
+    # style comments or "//" for C++ style comments). The following space, if
+    # any, is stripped from the payload. As a convenience, comment that starts
+    # with "!" is not taken to start a comment. This both protects the 1st line
+    # of shell scripts ("#!"), and also any other line you wish to avoid being
+    # treated as a comment.
+    #
+    # This configuration is typically complemented by an additional one
+    # specifying how to format the (stripped!) comments; by default they are
+    # just displayed as-is using an HTML +pre+ element, which isn't very
+    # useful.
+    CLASSIFY_SIMPLE_COMMENTS = lambda do |prefix|
+      return Configuration.simple_comments(prefix)
+    end
+    # Classify simple shell ("#") comment lines.
+    CLASSIFY_SHELL_COMMENTS = lambda do
+      return Configuration.simple_comments("#")
+    end
+    # Classify simple C++ ("//") comment lines.
+    CLASSIFY_CPP_COMMENTS = lambda do
+      return Configuration.simple_comments("//")
+    end
+    # Configuration for classifying lines to comments and code based on a
+    # simple prefix (e.g. "#" for shell style comments or "//" for C++ style
+    # comments).
+    def self.simple_comments(prefix)
+      return {
+        "syntax" => {
+          "patterns" => {
+            "comment_#{prefix}" => { "regexp" => "^(\\s*)#{prefix}(?!!)\\s?(.*)$" },
+          },
+          "states" => {
+            "start" => {
+              "transitions" => [
+                { "pattern" => "comment_#{prefix}", "kind" => "comment" },
+                []
+              ],
+            },
+          },
+        },
+      }
+    end
+    # }}}
+    # {{{ Complex comment classification configurations
+    # Classify complex comment lines. It accepts a restricted format: each
+    # comment is expected to start with some exact prefix (e.g. "/*" for C
+    # style comments or "<!--" for HTML style comments). The following space,
+    # if any, is stripped from the payload. Following lines are also considered
+    # comments; a leading inner line prefix (e.g., " *" for C style comments or
+    # " -" for HTML style comments) with an optional following space are
+    # stripped from the payload. Finally, a line containing some exact suffix
+    # (e.g. "*/" for C style comments, or "-->" for HTML style comments) ends
+    # the comment. A one line comment format is also supported containing the
+    # prefix, the payload, and the suffix. As a convenience, comment that
+    # starts with "!" is not taken to start a comment. This allows protecting
+    # comment block you wish to avoid being classified as a comment.
+    #
+    # This configuration is typically complemented by an additional one
+    # specifying how to format the (stripped!) comments; by default they are
+    # just displayed as-is using an HTML +pre+ element, which isn't very
+    # useful.
+    CLASSIFY_COMPLEX_COMMENTS = lambda do |prefix, inner, suffix|
+      return Configuration.complex_comments(prefix, inner, suffix)
+    end
+    # Classify complex C ("/*", " *", " */") style comments.
+    CLASSIFY_C_COMMENTS = lambda do
+      # Since the prefix/inner/suffix passed to the configuration are regexps,
+      # we need to escape special characters such as "*".
+      return Configuration.complex_comments("/\\*", " \\*", " \\*/")
+    end
+    # Classify complex HTML ("<!--", " -", "-->") style comments.
+    CLASSIFY_HTML_COMMENTS = lambda do
+      return Configuration.complex_comments("<!--", " -", "-->")
+    end
+    # Configuration for classifying lines to comments and code based on a
+    # complex start prefix, inner line prefix and final suffix (e.g., "/*", "
+    # *", " */" for C-style comments or "<!--", " -", "-->" for HTML style
+    # comments).
+    def self.complex_comments(prefix, inner, suffix)
+      return {
+        "syntax" => {
+          "patterns" => {
+            "comment_prefix_#{prefix}" => { "regexp" => "^(\\s*)#{prefix}(?!!)\\s?(.*)$" },
+            "comment_inner_#{inner}" => { "regexp" => "^(\\s*)#{inner}\\s?(.*)$" },
+            "comment_suffix_#{suffix}" => { "regexp" => "^(\\s*)#{suffix}\\s*$" },
+            "comment_line_#{prefix}_#{suffix}" => { "regexp" => "^(\\s*)#{prefix}(?!!)\s?(.*?)\s*#{suffix}\\s*$" },
+          },
+          "states" => {
+            "start" => {
+              "transitions" => [
+                { "pattern" => "comment_line_#{prefix}_#{suffix}",
+                  "kind" => "comment" },
+                { "pattern" => "comment_prefix_#{prefix}",
+                  "kind" => "comment",
+                  "next_state" => "comment_#{prefix}" },
+                [],
+              ],
+            },
+            "comment_#{prefix}" => {
+              "transitions" => [
+                { "pattern" => "comment_suffix_#{suffix}",
+                  "kind" => "comment",
+                  "next_state" => "start" },
+                { "pattern" => "comment_inner_#{inner}",
+                  "kind" => "comment" },
+              ],
+            },
+          },
+        },
+      }
+    end
+    # }}}
+    # {{{ Comment formatting configurations
+    # Format comments as HTML pre elements. Is used to complement a
+    # configuration that classifies some lines as +comment+.
+    FORMAT_PRE_COMMENTS = {
+      "formatters" => {
+        "comment" => "Formatter.lines_to_pre_html(lines, :class => :comment)",
+      },
+    }
+    # Format comments that use the RDoc notation. Is used to complement a
+    # configuration that classifies some lines as +comment+.
+    FORMAT_RDOC_COMMENTS = {
+      "formatters" => {
+        "comment" => "Formatter.markup_lines_to_html(lines, 'RDoc')",
+        "unindented_html" => "Formatter.unindented_lines_to_html(lines)",
+      },
+    }
+    # Format comments that use the Markdown notation. Is used to complement a
+    # configuration that classifies some lines as +comment+.
+    FORMAT_MARKDOWN_COMMENTS = {
+      "formatters" => {
+        "comment" => "Formatter.markup_lines_to_html(lines, 'Markdown')",
+        "unindented_html" => "Formatter.unindented_lines_to_html(lines)",
+      },
+    }
+    # }}}
+    # {{{ GVim syntax highlighting formatting configurations
+    # Format code using GVim's Ruby syntax highlighting, using explicit HTML
+    # constructs. Assumes some previous configuration already classified the
+    # code lines.
+    FORMAT_CODE_GVIM_HTML = lambda do |syntax|
+      return Configuration.gvim_code_format(syntax)
+    end
+    # Format code using GVim's Ruby syntax highlighting, using CSS classes
+    # instead of explicit font and color styles. Assumes some previous
+    # configuration already classified the code lines.
+    FORMAT_CODE_GVIM_CSS = lambda do |syntax|
+      return Configuration.gvim_code_format(syntax, "'+:let html_use_css=1'")
+    end
+    # Return a configuration for highlighting a specific syntax using GVim.
+    def self.gvim_code_format(syntax, extra_commands = "")
+      return {
+        "formatters" => {
+          "#{syntax}_code" => "GVim.lines_to_html(lines, '#{syntax}', [ #{extra_commands} ])",
+        },
+      }
+    end
+    # }}}
+    # {{{ Sunlight syntax highlighting formatting configurations
+    # Format code using Sunlight's syntax highlighting. This assumes the HTML
+    # will include and invoke Sunlight's Javascript file which does the
+    # highlighting on the fly inside the DOM, instead of pre-computing it when
+    # splitting the file.
+    FORMAT_CODE_SUNLIGHT = lambda do |syntax|
+      return Configuration.sunlight_code_format(syntax)
+    end
+    # Return a configuration for highlighting a specific syntax using Sunlight.
+    def self.sunlight_code_format(syntax)
+      return {
+        "formatters" => {
+          "#{syntax}_code" => "Sunlight.lines_to_html(lines, '#{syntax}')",
+        },
+      }
+    end
+    # }}}
+    # {{{ Chunk splitting configurations
+    # Group lines into chunks using VIM-style "{{{"/"}}}" region designations.
+    # Assumes other configurations handle the actual content lines.
+    CHUNK_BY_VIM_REGIONS = {
+      "formatters" => {
+        "begin_chunk" => "[]",
+        "end_chunk" => "[]",
+        "nested_chunk" => "Formatter.nested_chunk_lines_to_html(lines)",
+      },
+      "syntax" => {
+        "patterns" => {
+          "begin_chunk" => { "regexp" => "^(\\s*)\\W*\\{\\{\\{\\s*(.*?)\\s*$" },
+          "end_chunk" => { "regexp" => "^(\\s*)\\W*\\}\\}\\}\\s*(.*?)\\s*$" },
+        },
+        "states" => {
+          "start" => {
+            "transitions" => [
+              { "pattern" => "begin_chunk" },
+              { "pattern" => "end_chunk" },
+              [],
+            ],
+          },
+        },
+      },
+    }
+    # }}}
+  end
+end

data/lib/codnar/splitter.rb ADDED

@@ -0,0 +1,32 @@
+module Codnar
+  # Split disk files into chunks.
+  class Splitter
+    # Construct a splitter based on a configuration in the following structure:
+    #
+    #   syntax: <syntax>
+    #   formatters:
+    #     <kind>: <expression>
+    #
+    # Where the syntax is passed as-is to (and expanded in-place by) a Scanner,
+    # and the formatters are passed as-is to a Formatter to convert the chunk's
+    # classified lines into HTML.
+    def initialize(errors, configuration)
+      @errors = errors
+      @configuration = configuration
+      @scanner = Scanner.new(errors, configuration.syntax)
+      @formatter = Formatter.new(errors, configuration.formatters)
+    end
+    # Split a disk file into HTML chunks.
+    def chunks(path)
+      lines = @scanner.lines(path)
+      chunks = Merger.chunks(@errors, path, lines)
+      chunks.each { |chunk| chunk.html = @formatter.lines_to_html(chunk.delete("lines")) }
+      return chunks
+    end
+  end
+end