RubyGems - ntxt - Versions diffs - 1.0.0 → 1.0.1 - Mend

ntxt 1.0.0 → 1.0.1

Files changed (7) hide show

data/README.rdoc CHANGED

@@ -0,0 +1,63 @@
+= About
+Ntxt is a simple text format that defines hierarchical blocks of text
+and tags on those blocks. The goal is to give the author an easy way
+to search their text in a slightly more structured way than +grep+ 'ing.
+Any tags found in a block are extracted and the block is _tagged_ with them.
+All parent blocks also receive the tags of their child blocks. Thus, the
+root block is tagged with all tags occuring in the document.
+== Format Rules
+=== Headers
+Header lines look like something you would see out of a wiki.
+  = Header 1 =
+  == Header 2 ==
+  = Another Header 1 =
+Header 1 is the largest and highest header. All text after it, aside from
+another 1-header, will be considered a sub-block. In the example
+"Header 1" and "Another Header 1" are the two top-level blocks. "Header 2" is
+a child of "Header 1".
+=== Indentation
+Indentation also forms blocks.
+  = Header 1 =
+  == Header 2 ==
+    Sub block of header 2.
+      Sub block of the preceding line.
+    Another block below header 2.
+  = Another Header 1 =
+In the above example Header 2 has 1 sub-block because there is an indentation
+of 2-spaces with an intermediate indentation of 4-spaces. That 4-space line
+is bundled into a subblock to the 2-space indented block of text.
+The only ways to break out of this 2-indent text is to:
+1. Put in an empty line.
+2. Put in a header.
+3. Indent more shallowly, such as a 1-space line.
+=== Tags
+Lines beginning with [tag1] [tag2] are considered to have tags
++tag1+ and +tag2+. For example:
+  Block1
+  [block 1 tag] [example]
+  [this is tag] Block2 [not a tag]
+  [block 2 tag] [example]
+Notice, tag names may have spaces. Both blocks are tagged with +example+.
+And finally, <code>not a tag</code> is, well, not a tag. It does not start a line.
+Also note that you can't tag header blocks directly because the header line
+must start with = and tag lines must begin with [. Blocks inherit all their
+child blocks' tags, though, so finding header blocks by tags is still possible.

data/bin/ntxt CHANGED

@@ -27,7 +27,13 @@ OptionParser.new do |opt|
     $configs[:cmd] = 'tag'
     $configs[:tag_string] = v
   end
+  opt.on('-l', '--last=[Integer]',
+    'Show the last n top-level blocks. Default is 1') do |v|
+    $configs[:cmd] = 'last'
+    $configs[:last] = (v.nil?)? 1 : v.to_i
+  end
   opt.on('-s','--search=String', 'Search the text for.' ) do |v|
     $configs[:cmd] = 'search'
     $configs[:search_string] = v
@@ -41,9 +47,10 @@ else
   exit 1
 end
+# Remove empty lines from blocks and print. Guards against nils and empty lines.
 def printNonEmpty(txt)
   if txt
-    txt = txt.strip
+    txt = txt.gsub(/^\s*$/m, '')
     print txt, "\n" if txt.length > 0
   end
 end
@@ -55,11 +62,11 @@ ntxt = File.open($configs[:filename]) { |io| Ntxt::Ntxt.new(io.read) }
 case $configs[:cmd]
 when 'print_tags'
   # Notice that we are re-wrapping the tags back into square brackets.
-  puts "[#{ntxt.rootBlock.tags.sort.join('] [')}]"
+  puts "[#{ntxt.rootBlock.tags.keys.sort.join('] [')}]"
 when 'tag'
   ntxt.walkText(
     lambda { |txt, depth, block|
-      printNonEmpty txt if block.tags.join(', ').index( $configs[:tag_string])},
+      printNonEmpty txt if block.tags.keys.sort.join(', ').index( $configs[:tag_string])},
     lambda { |depth, block| },
     lambda { |depth, block| } )
 when 'search'
@@ -73,11 +80,17 @@ when 'print'
     lambda { |txt, depth, block| print txt },
     lambda { |depth, block| },
     lambda { |depth, block| } )
+when 'last'
+  ntxt.rootBlock.children[-$configs[:last].. -1].each do |blk|
+    puts blk.text
+  end
 when 'trace'
   ntxt.walkText(
     lambda { |txt, depth, block| print txt },
-    lambda { |depth, block| puts "-----> #{depth} #{block.tags.join(',')}" },
-    lambda { |depth, block| puts "<----- #{depth} " } )
+    lambda { |depth, block|
+      puts "-----> #{depth} #{block.tags.keys.sort.join(',')}" },
+    lambda { |depth, block|
+      puts "<----- #{depth} " } )
 when ''
   # nop
 when nil

data/lib/ntxt.rb CHANGED

@@ -1,3 +1,8 @@
 require 'ntxt/block'
 require 'ntxt/parser'
 require 'ntxt/ntxt'
+# Ntxt module.
+# See Ntxt::Parser and Ntxt::Block.
+module Ntxt
+end

data/lib/ntxt/block.rb CHANGED

@@ -10,21 +10,35 @@ module Ntxt
   # ONLY contain subblocks.
   class Block
-    attr_accessor :children, :tags, :start, :offset, :ntxt, :parent
+    # A list of child Blcoks.
+    attr_accessor :children
+    # A hash of all tags of this block and its children.
+    attr_accessor :tags
+    # The +start+ index in the text string held in the Ntxt parent object.
+    # See the +ntxt+ field.
+    attr_accessor :start
+    # The offset from the +start+ field.
+    attr_accessor :offset
+    # The Ntxt object.
+    attr_accessor :ntxt
+    # The parent Block or nil if this is a root Block.
+    attr_accessor :parent
-    if RUBY_VERSION =~ /^1.8/
-      def self.blockReMatch(re, txt, offset)
-        re.match(txt[offset..-1])
-      end
-    else
-      def self.blockReMatch(re, txt, offset)
-        re.match(text, offset)
-      end
-    end
+    # Create a new Block. Typically you will never need to do this.
+    # Blocks are created by Parser.
+    # [ntxtObj] The Ntxt object that this block belongs to.
+    #           The Ntxt object holds the text this block will reference.
+    # [parentBlock] The parent block. Nil by default.
+    # [startTxt] The staring character in Ntxt.text.
+    # [stopTxt] The initial offset. If nil this is set to ntxtObj.text.length.
     def initialize(ntxtObj, parentBlock=nil, startTxt=0, stopTxt=0)
       @children = []
-      @tags = []
+      @tags = Hash.new(0)
       @start = startTxt
       @offset = stopTxt || ntxtObj.text.length
       @ntxt = ntxtObj
@@ -45,31 +59,64 @@ module Ntxt
       end
     end
+    # Add a tag to this block and all ancestor blocks.
     def addTag(tag)
-      @tags.push(tag)
+      @tags[tag] += 1
       @parent.addTag(tag) if @parent
     end
+    # Return the text slice that this block refers to.
+    # Note that parent blocks include their child blocks' text.
     def text
       @ntxt.text[@start, @offset]
     end
+    # Return true if the parent object is nil.
     def is_root?
-      @parent
+      @parent.nil?
     end
+    # Given a block this will first call that block with this Block as
+    # the only argument. Then walk is recusively called on all child Blocks.
     def walk(&y)
       yield self
       @children.each { |c| c.walk(&y) }
     end
-    # printFunc is a lambda that takes the text, depth, and the node.
-    # enterChild is a lambda that takes depth and the node.
-    # exitChild is a lambda that takes depth and the node.
+    # This method handles the complexity of handing the user
+    # the text immediately handled by each block.
+    #
+    # If you call Block.text you will get a contiguous block of text
+    # that covers this Block and all its children. Essentially the +start+
+    # to the +offset+ substring of Ntxt.text.
+    #
+    # What this method does is pass each text that belongs only to
+    # the particular Block in question and the children. The text
+    # is passed to the user in order, so concatinating it would
+    # result in equivalent output to Block.text.
+    #
+    # This method is useful for visualizing the text structure
+    # or filtering out blocks that aren't interesting to the user.
+    #
+    # [printFunc] A lambda that takes the text, depth, and the Block.
+    # [enterChild] A lambda that takes depth and the Block.
+    # [exitChild] A lambda that takes depth and the Block.
+    # For example:
+    #
+    #   printBlock = lambda { |text, depth, block| ... }
+    #   enterBlock = lambda { |depth, block| ... }
+    #   exitBlock  = lambda { |depth, block| ... }
+    #
+    #   block.walkText( printBlock, enterBlock, exitBlock )
+    #
     def walkText(printFunc, enterChild, exitChild)
       walkTextHelper(printFunc, enterChild, exitChild, 0)
     end
+    protected
+    # Helper function for walkText. Takes the same arguments
+    # with the depth set to 0.
     def walkTextHelper(printFunc, enterChild, exitChild, depth)
       enterChild.call(depth, self)
@@ -95,5 +142,25 @@ module Ntxt
       exitChild.call(depth, self)
     end
+    private
+    if RUBY_VERSION =~ /^1.8/
+      # Regular expression matcher with an offset.
+      # The implementation of this is descided at runtime depending on
+      # if Ruby 1.9's new RE match method is found.
+      def self.blockReMatch(re, txt, offset)
+        re.match(txt[offset..-1])
+      end
+    else
+      # Regular expression matcher with an offset.
+      # The implementation of this is descided at runtime depending on
+      # if Ruby 1.9's new RE match method is found.
+      def self.blockReMatch(re, txt, offset)
+        re.match(txt, offset)
+      end
+    end
   end
 end

data/lib/ntxt/ntxt.rb CHANGED

@@ -1,24 +1,25 @@
 module Ntxt
-  # Root class that contains the text array that Blocks reference
-  # and the root block.
+  # Root class that contains the text array that Block objects reference.
   class Ntxt
-    attr_accessor :text, :rootBlock
+    # The raw text file. This is a String.
+    attr_accessor :text
+    # The root Block. It will contain all tags in the document
+    # and has a Block.start of 0 and a Block.offset of #text.length.
+    attr_accessor :rootBlock
+    # Create a new Ntxt object. This requires a String that is the text
+    # of the object.
+    #   ntxt = Ntxt::Ntxt.new( File.open('n.txt'){ |io| io.read } )
     def initialize(text)
       @text  = text
       @rootBlock = (Parser.new).parse(self)
     end
-    # walkText(print, enter, exit)
-    # Walk the ntxt tree with 3 callbacks, print, enter, and exit.
-    # Print is a lambda that takes the text, the depth, and a copy of
-    # the Ntxt::Block it is in.
-    # Enter is the same, but is called with no text argument and is called
-    # when a block is entered (that is, the depth has increased by 1).
-    # Exit is the same, but is called with no text argument and is called
-    # when a block is exited (that is, the depth has decreated by 1).
+    # Calls Block#walkText.
     def walkText(print, enter, exit)
       @rootBlock.walkText(print, enter, exit)
     end

data/lib/ntxt/parser.rb CHANGED

@@ -1,98 +1,126 @@
 require 'ntxt/block'
+require 'ntxt/ntxt'
 module Ntxt
+  # The parser for Ntxt. Most of this a typical user will not find useful
+  # with the exception of Parser.parse.
   class Parser
-    ###########################################################################
+    # An internal class that contains the current parse position
+    # and current limits on that parse, such as an artificial end-of-file
+    # marker to terminate a sub-parsing of a sub-block.
     class State
-      attr_accessor :lines, :block, :lineStart, :lineEnd, :line, :start, :offset
+      # Array of lines. The result of Ntxt.text being split on '\n'
+      attr_accessor :lines
+      # The current Block being built up.
+      attr_accessor :block
+      # The index into #lines to start parsing at
+      # and before which #prevLine should return nil.
+      attr_accessor :lineStart
+      # The index into #lines at which #nextLine should return nil.
+      # This defaults to #lines.length
+      attr_accessor :lineEnd
+      # The current line this State points at.
+      attr_accessor :line
+      # The index into Ntxt.text that corresponds to the first
+      # character of the #currLine.
+      attr_accessor :start
+      # The offset from #start. The substring of Ntxt.text
+      # starting at #start and of length #offset will produce the
+      # text being considered by this State.
+      attr_accessor :offset
       def initialize(lines, block, lineStart, start, lineEnd)
-        @lines = lines
-        @block = block
-        @lineStart = lineStart
-        @lineEnd = lineEnd
-        @line = lineStart
-        @start = start
-        @offset = 0
+        @lines     = lines      # The array of lines to parse.
+        @block     = block      # The block this state is operating on.
+        @lineStart = lineStart  # The starting line in this state.
+        @line      = lineStart  # The current line this state points at.
+        @lineEnd   = lineEnd    # The last line. @lineEnd <= @lines.length.
+        @start     = start      # Start index in text.
+        @offset    = 0          # Offset from @start.
       end
-      # Return the current line.
+      # Return the current line. If #prevLine or #nextLine has
+      # walked outside of the #lineStart or #lineEnd limits this will
+      # return nil.
       def currLine
-        @lines[@line]
+        if @line < @lineEnd && @line >= @lineStart
+          @lines[@line]
+        else
+          nil
+        end
       end
-      # Shift the state to the next line and return that line.
-      # If this goes out of bounds of the text nil is returned.
+      # Return the next line (#line + 1) unless we step beyond #lineEnd.
+      # If we exceed #lineEnd, nil is returned.
+      # Notice that this also updates #offset.
       def nextLine
-        nextLine = @line+1
-        if nextLine < @lineEnd
-          nextOffset = @offset + @lines[@line].length + 1
-          @offset = nextOffset
-          @line = nextLine
-          @lines[nextLine]
-        else
+        # If we are already past the end, return nil, do nothing.
+        if @line >= @lineEnd
           nil
-        end
+        # Otherwise we are updating some state.
+        else
+          @offset = @offset + @lines[@line].length + 1
+          @line = @line + 1
+          # Recheck if we are inside the array and return nil if we are not.
+          (@line < @lineEnd) ? @lines[@line] : nil
+        end
       end # nextLine
-      # Shift the state to the previous line and return that line.
-      # If this goes out of bounds of the text nil is returned.
+      # Return the previous line (#line - 1) unless we step before #lineStart.
+      # If we exceed #lineStart, nil is returned.
+      # Notice that this also updates #offset.
       def prevLine
-        nextLine = @line - 1
-        if nextLine >= @lineStart
-          nextOffset = @offset - @lines[nextLine].length - 1
-          @offset = nextOffset
-          @line = nextLine
-          @lines[nextLine]
-        else
+        if @line < @lineStart
           nil
+        else
+          nLine = @line - 1
+          @offset = @offset - @lines[nLine].length - 1
+          @line = nLine
+          @lines[nLine]
         end
       end # prevLine
       # Shift the state starting points to the current position of
       # what has been read in this state, effecitvely consuming that input.
+      #
+      # The #start field is moved to #start+offset. The #offset is set to 0.
+      # Finally the #lineStart is set to #line.
       def consume
         @start = @start + @offset
         @offset = 0
         @lineStart = @line
       end
-      # Create a new state that is framed from the lineStart+1 of this state
-      # and ends at the current line of the given state.
-      def lowerSubState
-        endOfFrame = @line+1
-        endOfFrame = @lineEnd if endOfFrame > @lineEnd
-        State.new(@lines,
-                  @block,
-                  @lineStart+1,
-                  @start + @lines[@lineStart].length + 1,
-                  endOfFrame)
-      end # lowerSubState
-      # Create a new state that is framed with the remaining contents of
-      # this state
-      def upperSubState
-        State.new(@lines, @block, @line, @start + @offset, @lineEnd)
-      end
+      # Print as a string.
       def to_s
         "lineStart: %s, lineEnd: %s, line: %s, start: %s, offset: %s"%[
           @lineStart, @lineEnd, @line, @start, @offset
         ]
       end
-      # Seek this state's position to the tiven state's position.
+      # Seek this state's position forward to the given state's position.
+      # If a state with a position behind the current state's position is
+      # passed in as an argument the behavior is undefined.
       def seek(state)
         @line = state.line
         @offset = (state.start + state.offset - @start).abs()
       end # seek
     end # Parser::State
-    ###########################################################################
     # Return an array in which the first element is the indent length and
     # the second element is the contained text. Nil otherwise.
@@ -115,6 +143,17 @@ module Ntxt
       end
     end # self.hlevel
+    # Extract all the tags from the given line.
+    # [block] The block to which all tags will be added with Block#addTag.
+    #         All parent blocks recieve copies of the child block's tag.
+    # [line] The line to extract all tags from. Tags are
+    #        square-bracket-enclosed strings found in sequence at the
+    #        beginning of a line. If the sequence is broken, extraction stops.
+    #    Some tag examples:
+    #      [a tag] [another tag]
+    #      [a tag] [another tag] Not a tag. [not a tag]
+    #      No tag on this line.
+    #      No tag on this line either. [not a tag]
     def self.extractTags(block, line)
       while line =~ /^\s*\[([^\[]+)\]/m
         block.addTag($~[1])
@@ -123,6 +162,10 @@ module Ntxt
       end
     end # self.extractTags
+    # Parse the given Ntxt 's Ntxt#text.
+    # [ntxtObj] If this is an Ntxt object, Ntxt#text is parsed.
+    #           If +ntxtObj+ is not an Ntxt object, it is assumed to be
+    #           a valid argument for Ntxt.new and a new Ntxt is constructed.
     def parse(ntxtObj)
       # If ntxtObj isn't an Ntxt, create it as one.
@@ -144,60 +187,87 @@ module Ntxt
         nil
       end
     end # parse(ntxtObj)
+    # Take the state off the top of the #stack and attempt to parse
+    # an Hlevel block. An HLevel block is a wiki-like header block of text.
+    # For example:
+    #   = Header 1 =
+    #   == Header 2 ==
+    # [level] an integer from 1 to 6.
+    # [title] a string of the text found between the equal signs.
     def parseHlevel(level, title)
       state = @stack[-1]
-      begin
-        line = state.nextLine
+      # If in parseHlevel, don't get the current line. That is contained
+      # in the title argument. Instead, get the next line and proceed.
+      line = state.nextLine
+      while line
+        # Check if we have discovered another block in the form of an hlevel.
+        hl = Parser::hlevel(line)
-        if line
-          hl = Parser::hlevel(line)
-          if hl && hl[0].to_i <= level
-            state.prevLine # Rewind. We steped onto another h block.
-            break
-          end
+        if hl && hl[0].to_i <= level
+          break
         end
-      end while line
+        line = state.nextLine
+      end
+      block = Block.new(
+        state.block.ntxt,
+        state.block,
+        state.start,
+        state.offset)
+      subState = State.new(
+        state.lines,
+        block,
+        state.lineStart+1,
+        state.start + state.lines[state.lineStart].length + 1,
+        state.line)
-      block = Block.new(state.block.ntxt,
-                        state.block,
-                        state.start,
-                        state.offset)
-      subState = state.lowerSubState
-      subState.block = block
       @stack.push subState
       parseLines
       @stack.pop
       state.consume
     end # parseHlevel(leve, title)
+    # Parse blocks of text that are indented at the given level or greater.
+    # [indentLevel] an integer denoteing the number of characters this line is
+    #               indented at.
+    # [text] the content of the line that was indented.
     def parseIndent(indentLevel, text)
       state = @stack[-1]
       line = state.currLine
-      # BUild the block. Update the offset.
+      # Build the block. Update the offset.
       block = Block.new(state.block.ntxt,
                         state.block,
                         state.start,
                         state.offset)
+      id = rand(100)
       # Position state at the ed of the block.
       # Blocks are ended by empty lines or lines with the = starting them.
       while line
         break unless line =~ /^(\s*)([^=\s].*)$/
         nextIndentLevel = $~[1].length
         nextLine = $~[2]
         break if nextIndentLevel < indentLevel
         if nextIndentLevel > indentLevel
           # Advance to the next line after parsing a subblock.
-          subState = state.upperSubState()
-          subState.block = block
+          subState = State.new(
+            state.lines,
+            block,
+            state.line,
+            state.start + state.offset,
+            state.lineEnd)
           @stack.push subState
           parseIndent(nextIndentLevel, nextLine)
           @stack.pop
@@ -209,28 +279,35 @@ module Ntxt
         end # if nextIndentLevel > indentLevel
       end # while line
       block.offset = state.offset
       state.consume
     end # parseIndent(indentLevel, text)
+    # This is the root of the parser's call tree after #parse sets up
+    # the parse. This plucks the State off the Parser.stack, obtains the
+    # State.currLine.
+    #
+    # When an indented line is found, #parseIndent is called.
+    # When a header line is found, #parseHlevel is caled.
+    # Otherwise, we move to the next line.
     def parseLines
       state = @stack[-1]
-      state.block.children = []
       line = state.currLine
       while line
         tmp = Parser::hlevel(line)
         if tmp
           state.consume
           parseHlevel(tmp[0].to_i, tmp[1])
+          line = state.currLine
         elsif line =~ /^(\s*)(\S.*)$/
           state.consume
           parseIndent($~[1].length, $~[2])
+          line = state.currLine
+        else
+          line = state.nextLine
         end # if tmp
-        line = state.nextLine
       end # while line
     end # parseLines
   end

metadata CHANGED

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 1
   - 0
-  - 0
-  version: 1.0.0
+  - 1
+  version: 1.0.1
 platform: ruby
 authors:
 - Sam Baskinger
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-10-30 00:00:00 -05:00
+date: 2011-11-01 00:00:00 -05:00
 default_executable:
 dependencies: []