RubyGems - docbook_status - Versions diffs - 0.1.1 → 0.3.0 - Mend

docbook_status 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

data/Gemfile +3 -1
data/Gemfile.lock +11 -3
data/History.txt +19 -1
data/README.rdoc +142 -0
data/Rakefile +5 -2
data/bin/docbook_status +196 -25
data/lib/docbook_status.rb +46 -125
data/lib/docbook_status/history.rb +108 -0
data/lib/docbook_status/status.rb +258 -0
data/test/.DS_Store +0 -0
data/test/fixtures/book.xml +14 -1
data/test/fixtures/bookxi.xml +15 -0
data/test/fixtures/chapter2.xml +2 -1
data/test/fixtures/chapter2xi.xml +11 -0
data/test/fixtures/chapter3xi.xml +8 -0
data/test/fixtures/section1xi.xml +9 -0
data/test/test_docbook_status.rb +41 -6
data/test/test_history.rb +74 -0
data/version.txt +1 -1
metadata +61 -10
data/README.txt +0 -59

data/lib/docbook_status.rb CHANGED Viewed

@@ -1,143 +1,64 @@
-# -*- encoding: utf-8 -*-
+# -*- encoding:utf-8 -*-
+module DocbookStatus
-require 'xml'
+  # :stopdoc:
+  LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
+  PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
+  # :startdoc:
-# Analyzes DocBook 5 documents for document structure (sections) and text length.
-#
-class DocbookStatus
-  # :stopdoc
-  #
-  PATH = File.expand_path('../..', __FILE__) + File::SEPARATOR
-  LIBPATH = File.expand_path('..', __FILE__) + File::SEPARATOR
-  VERSION = File.read(PATH + '/version.txt').strip
-  HOME = File.expand_path(ENV['HOME'] || ENV['USERPROFILE'])
-  #
-  # :startdoc
-  # The DocBook 5 namespace URL
-  DOCBOOK_NS = 'http://docbook.org/ns/docbook'
-  # The XInclude namespace URL
-  XINCLUDE_NS = 'http://www.w3.org/2001/XInclude'
-  # Elements whose contents is counted as text
-  @@text_elements = ['para','simpara','formalpara']
-  # Section elements, following the list given in http://docbook.org/tdg5/en/html/ch02.html#roots
-  # except for the refsect... elements.
-  @@section_elements = %w[
-    acknowledgements appendix article
-    bibliography book
-    chapter colophon
-    dedication
-    glossary
-    index
-    preface
-    section sect1 sect2 sect3 sect4 set simplesect
-    toc
-  ]
-  def initialize
-    @sections = []
-  end
-  # Returns the version of docbook_status
+  # Returns the version string for the library.
   #
   def self.version
-    VERSION
-  end
-  # Counts the words in the contents of the given node. _Word_ in this
-  # context means something that is delimited by _space_ charactes and starts with
-  # _word_ characters (in the regexp sense).
-  #
-  def count_words(node)
-    words = node.content.strip.split(/[[:space:]]+/).find_all {|w| w =~ /\w+/}
-    words.size
+    @version ||= File.read(path('version.txt')).strip
   end
-  # Find the _title_ of the current section. That element is either
-  # directly following or inside an _info_ element. Return the empty
-  # string if no title can be found.
+  # Returns the library path for the module. If any arguments are given,
+  # they will be joined to the end of the libray path using
+  # <tt>File.join</tt>.
   #
-  def find_section_title(node)
-    title = node.find_first('./db:title')
-    if title.nil?
-      title = node.find_first './db:info/db:title'
-    end
-    if title.nil?
-      ""
-    else
-      title.content
+  def self.libpath( *args, &block )
+    rv =  args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
+    if block
+      begin
+        $LOAD_PATH.unshift LIBPATH
+        rv = block.call
+      ensure
+        $LOAD_PATH.shift
+      end
     end
+    return rv
   end
-  # Check the document elements for content and type recursively,
-  # starting at the current node.  Returns an array with paragraph and
-  # section maps.
+  # Returns the lpath for the module. If any arguments are given,
+  # they will be joined to the end of the path using
+  # <tt>File.join</tt>.
   #
-  def check_node(node, level, ctr)
-    if (@@text_elements.include? node.name)
-      ctr << {:type => :para, :level => level, :words => count_words(node)}
-    elsif (@@section_elements.include? node.name)
-      title = find_section_title(node)
-      ctr << {:type => :section, :level => level, :title => title, :name => node.name}
-    end
-    node.children.each {|inner_elem| check_node(inner_elem, level+1, ctr)} if node.children?
-    ctr
-  end
-  # Check whether the document has a DocBook default namespace
-  def is_docbook?(doc)
-    dbns = doc.root.namespaces.default
-    (!dbns.nil? && (dbns.href.casecmp(DOCBOOK_NS) == 0))
-  end
-  # Check whether the document has a XInclude namespace
-  def has_xinclude?(doc)
-    ret = false
-    doc.root.namespaces.each do |ns|
-      if (ns.href.casecmp(XINCLUDE_NS) == 0)
-        ret = true
-        break
+  def self.path( *args, &block )
+    rv = args.empty? ? PATH : ::File.join(PATH, args.flatten)
+    if block
+      begin
+        $LOAD_PATH.unshift PATH
+        rv = block.call
+      ensure
+        $LOAD_PATH.shift
       end
     end
-    ret
+    return rv
   end
-  # Searches the XML document for sections and word counts. Returns an
-  # array of sections with their word counts.
+  # Utility method used to require all files ending in .rb that lie in the
+  # directory below this file that has the same name as the filename passed
+  # in. Optionally, a specific _directory_ name can be passed in such that
+  # the _filename_ does not have to be equivalent to the directory.
   #
-  def analyze_document(doc)
-    # Add a namespace declaration for XPath expressions
-    doc.root.namespaces.default_prefix = 'db'
-    # Analyze the document starting with the root node
-    doc_maps = check_node(doc.root,0,[])
-    @sections = []
-    section_name = doc_maps[0][:title]
-    section_type = doc_maps[0][:name]
-    section_ctr = 0
-    section_level = 0
-    doc_ctr = 0
-    #puts doc_maps.inspect
-    xms = doc_maps.drop(1)
-    # Compute word counts per section
-    xms.each do |m|
-      if (m[:type] == :para)
-        doc_ctr += m[:words]
-        section_ctr += m[:words]
-      else
-        @sections << [section_name,section_ctr,section_level,section_type]
-        section_name = m[:title]
-        section_ctr = 0
-        section_level = m[:level]
-        section_type = m[:name]
-      end
-    end
-    @sections << [section_name,section_ctr,section_level,section_type]
-    # Put the document word count near the document type
-    @sections[0][1] = doc_ctr
-    @sections
+  def self.require_all_libs_relative_to( fname, dir = nil )
+    dir ||= ::File.basename(fname, '.*')
+    search_me = ::File.expand_path(
+        ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
+    Dir.glob(search_me).sort.each {|rb| require rb}
   end
-end
+end  # module DocbookStatus
+DocbookStatus.require_all_libs_relative_to(__FILE__)

data/lib/docbook_status/history.rb ADDED Viewed

@@ -0,0 +1,108 @@
+ # -*- encoding:utf-8 -*-
+require 'yaml'
+module DocbookStatus
+  # Manages the history of writing progress in two modes. In session
+  # or demon mode the history shows progress for the user session. In
+  # normal mode the history is only maintained for calendar days,
+  # weeks, months.
+  #
+  # The writing progress can (but must not) measured with these optional
+  # items:
+  # * start date (date of initialization)
+  # * scheduled end date
+  # * total word count goal
+  # * daily word count goal
+  #
+  # * file name
+  # * goal total
+  # * goal daily
+  # * start date
+  # * planned end date
+  # current entries
+  # * timestamp
+  # * word count
+  # archive entries
+  # * date
+  # * start
+  # * end
+  # * min
+  # * max
+  # * ctr (number of entries for the day)
+  #
+  class History
+    # History file, YAML format
+    HISTORY_FILE = 'dbs_work.yml'
+    # Does the history file exist?
+    def self.exists?()
+      File.exists?(HISTORY_FILE)
+    end
+    # Load the exisiting writing history
+    def initialize(name,end_planned=nil,goal_total=0,goal_daily=0)
+      if File.exists?(HISTORY_FILE)
+        @history = YAML.load_file(HISTORY_FILE)
+      else
+        @history = {:file => name,
+          :goal => {
+            :start => Date.today,
+            :end => end_planned,
+            :goal_total => goal_total,
+            :goal_daily => goal_daily},
+          :current => [],
+          :archive => {}}
+      end
+    end
+    def planned_end(date)
+      @history[:goal][:end]=date
+    end
+    def total_words(tw)
+      @history[:goal][:goal_total]=tw
+    end
+    def daily_words(tw)
+      @history[:goal][:goal_daily]=tw
+    end
+    # Add to the history
+    def add(ts,word_count)
+      # FIXME add demon mode
+      #@history[:current] << progress
+      #archive
+      k = ts.to_date
+      unless (@history[:archive][k].nil?)
+        @history[:archive][k][:min] = word_count if @history[:archive][k][:min] > word_count
+        @history[:archive][k][:max] = word_count if @history[:archive][k][:max] < word_count
+        @history[:archive][k][:end] = word_count
+        @history[:archive][k][:ctr] += 1
+      else
+        @history[:archive][k] = {:min => word_count, :max => word_count, :start => word_count, :end => word_count, :ctr => 1}
+      end
+    end
+    # Is there already a history?
+    def history?
+      @history[:archive].length != 0
+    end
+    # Convenience - returns the statistics for today
+    def today
+      @history[:archive][Date.today]
+    end
+    # Return the goals
+    def goals
+      @history[:goal]
+    end
+    # Save the writing history
+    def save
+      File.open(HISTORY_FILE, 'w') {|f| YAML.dump(@history,f)}
+    end
+  end
+end

data/lib/docbook_status/status.rb ADDED Viewed

@@ -0,0 +1,258 @@
+ # -*- encoding:utf-8 -*-
+require 'xml'
+module DocbookStatus
+ # Analyzes DocBook 5 documents for document structure (sections) and text length.
+ #
+ class Status
+   # The DocBook 5 namespace URL
+   #
+   DOCBOOK_NS = 'http://docbook.org/ns/docbook'
+   # The XInclude namespace URL
+   #
+   XINCLUDE_NS = 'http://www.w3.org/2001/XInclude'
+   # Elements whose contents is counted as text. The _formalpara_
+   # elements are included implicitly because they contain _para_ child
+   # elements.
+   #
+   @@text_elements = ['para','simpara']
+   # Section elements, following the list given in http://docbook.org/tdg5/en/html/ch02.html#roots
+   # except for the refsect... elements.
+   #
+   @@section_elements = %w[
+     acknowledgements appendix article
+     bibliography book
+     chapter colophon
+     dedication
+     glossary
+     index
+     part preface
+     section sect1 sect2 sect3 sect4 set simplesect
+     toc
+   ]
+   attr_reader :doc
+   def initialize(fname=nil)
+     @sections = []
+     @remarks = []
+     @source = fname
+     @source_dir = fname.nil? ? nil : File.dirname(fname)
+     @source_file = fname.nil? ? nil : File.basename(fname)
+     @doc = nil
+     XML.default_line_numbers=true
+   end
+   # Return the remark-elements found in the document. If _keyword_ is
+   # nil then return all remarks, else only the ones with the right
+   # keyword.
+   #
+   def remarks(keyword=nil)
+     if keyword.nil?
+       @remarks
+     else
+       ukw = keyword.upcase
+       @remarks.find_all {|r| r[:keyword] == (ukw)}
+     end
+   end
+   # Counts the words in the contents of the given node. _Word_ in this
+   # context means something that is delimited by _space_ charactes and starts with
+   # _word_ characters (in the regexp sense).
+   #
+   def count_words(node)
+     words = node.content.strip.split(/[[:space:]]+/).find_all {|w| w =~ /\w+/}
+     words.size
+   end
+   # Counts the words in the contents of the given node.
+   # It is assumed that the node is a kind of pure content (a paragraph) and therefore everything in it
+   # should be included in the word count. An exception to this are
+   # _remark_ elements, which are conisdered as comments, not meant for final publication.
+   #
+   def count_content_words(node)
+     ws = count_words(node)
+     # Count the remark text contained in the paragraph and subtract it from the real thing
+     wsr = node.find('db:remark').reduce(0) {|m,r| m+count_words(r)}
+     ws - wsr
+   end
+   # Find the _title_ of the current section. That element is either
+   # directly following or inside an _info_ element. Return the empty
+   # string if no title can be found.
+   #
+   def find_section_title(node)
+     title = node.find_first('./db:title')
+     if title.nil?
+       title = node.find_first './db:info/db:title'
+     end
+     if title.nil?
+       ""
+     else
+       title.content
+     end
+   end
+   # Check the document elements for content and type recursively,
+   # starting at the current node.  Returns an array with paragraph and
+   # section maps.
+   #
+   def check_node(node, level, ctr)
+     if (@@text_elements.include? node.name)
+       ctr << {:type => :para, :level => level, :words => count_content_words(node)}
+     elsif (@@section_elements.include? node.name)
+       title = find_section_title(node)
+       ctr << {:type => :section, :level => level, :title => title, :name => node.name}
+       node.children.each {|inner_elem| check_node(inner_elem, level+1, ctr)} if node.children?
+     else
+       node.children.each {|inner_elem| check_node(inner_elem, level+1, ctr)} if node.children?
+     end
+     ctr
+   end
+   # Check whether the document has a DocBook default namespace
+   def is_docbook?(doc)
+     dbns = doc.root.namespaces.default
+     (!dbns.nil? && (dbns.href.casecmp(DOCBOOK_NS) == 0))
+   end
+   # Check whether the document has a XInclude namespace
+   def has_xinclude?(doc)
+     ret = false
+     doc.root.namespaces.each do |ns|
+       if (ns.href.casecmp(XINCLUDE_NS) == 0)
+         ret = true
+         break
+       end
+     end
+     ret
+   end
+   # Finds and returns all XInclude files/URLs in a document.
+   #
+   # OPTIMIZE implement xpointer and fallback handling for
+   # xi:include? see http://www.w3.org/TR/xinclude/
+   #
+   def find_xincludes(doc)
+     if has_xinclude?(doc)
+       xincs = doc.find('//xi:include', "xi:"+XINCLUDE_NS)
+       xfiles = xincs.map {|x| x.attributes['href'] }
+       (xfiles << xfiles.map {|xf|
+                    xfn = File.exists?(xf) ? xf : File.expand_path(xf,File.dirname(doc.root.base_uri))
+                    xdoc = XML::Document.file(xfn)
+                    find_xincludes(xdoc)
+                  }).flatten
+     else
+       []
+     end
+   end
+   # Find all remark elements in the document and return a map for
+   # every such element. The map contains:
+   #
+   # * keyword: if the first word of the content is uppercase that is the keyword, else _REMARK_
+   # * text: the content of the remark element, minus the keyword
+   # * file: the name of the source file
+   # * line: the line number in the source file
+   #
+   # OPTIMIZE look for 'role' attributes as keywords?
+   #
+   def find_remarks_in_doc(doc,source)
+     rems = doc.find('//db:remark')
+     rems.map {|rem|
+       c = rem.content.strip
+       kw = 'REMARK'
+       if rem.first.text?
+         kw1 = c.match('^([[:upper:]]+)([[:space:][:punct:]]|$)')
+         unless kw1.nil?
+           kw = kw1[1]
+           c = kw1.post_match.lstrip
+         end
+       end
+       # TODO XPath integrieren? :path => rem.path, :parent => rem.parent.path,
+       {:keyword => kw, :text => c, :file=>source, :line => rem.line_num}
+     }
+   end
+   # Finds the remarks by looking through all the Xincluded files
+   #
+   def find_remarks(filter=[])
+     if (@source.nil?)
+       rfiles = find_xincludes(@doc)
+     else
+       @doc = XML::Document.file(@source)
+       rfiles = [@source_file] + find_xincludes(@doc)
+     end
+     @remarks = rfiles.map {|rf|
+       ind = XML::Document.file(File.expand_path(rf,@source.nil? ? '.' : @source_dir))
+       ind.root.namespaces.default_prefix = 'db'
+       rems = find_remarks_in_doc(ind, rf)
+       rems
+     }.flatten
+     if (filter.empty?)
+       @remarks
+     else
+       filter.map {|f|
+         @remarks.find_all {|r| f.casecmp(r[:keyword]) == 0}
+       }.flatten
+     end
+   end
+   # Searches the XML document for sections and word counts. Returns an
+   # array of sections (map) with title, word count, section level and DocBook tag.
+   #
+   def analyze_document(doc)
+     # Add a namespace declaration for XPath expressions
+     doc.root.namespaces.default_prefix = 'db'
+     # Analyze the document starting with the root node
+     doc_maps = check_node(doc.root,0,[])
+     @sections = []
+     section_name = doc_maps[0][:title]
+     section_type = doc_maps[0][:name]
+     section_ctr = 0
+     section_level = 0
+     doc_ctr = 0
+     #puts doc_maps.inspect
+     xms = doc_maps.drop(1)
+     # Compute word counts per section
+     xms.each do |m|
+       if (m[:type] == :para)
+         doc_ctr += m[:words]
+         section_ctr += m[:words]
+       else
+         @sections << {:title => section_name, :words => section_ctr, :level => section_level, :tag => section_type}
+         section_name = m[:title]
+         section_ctr = 0
+         section_level = m[:level]
+         section_type = m[:name]
+       end
+     end
+     @sections << {:title => section_name, :words => section_ctr, :level => section_level, :tag => section_type}
+     # Put the document word count near the document type
+     @sections[0][:words] = doc_ctr
+     @sections
+   end
+   # Open the XML document, check for the DocBook5 namespace and finally
+   # apply Xinclude tretement to it, if it has a XInclude namespace.
+   # Returns a map with the file name, the file's modification time, and the section structure.
+   #
+   def analyze_file
+     full_name = File.expand_path(@source)
+     changed  = File.ctime(@source)
+     @doc = XML::Document.file(@source)
+     raise ArgumentError, "Error: #{@source} is apparently not DocBook 5." unless is_docbook?(@doc)
+     @doc.xinclude if has_xinclude?(@doc)
+     sections = analyze_document(@doc)
+     {:file => full_name, :modified => changed, :sections => sections}
+   end
+ end
+end