RubyGems - weft-qda - Versions diffs - 0.9.6 - Mend

weft-qda 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

data/lib/weft.rb +21 -0
data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
data/lib/weft/application.rb +130 -0
data/lib/weft/backend.rb +39 -0
data/lib/weft/backend/marshal.rb +26 -0
data/lib/weft/backend/mysql.rb +267 -0
data/lib/weft/backend/n6.rb +366 -0
data/lib/weft/backend/sqlite.rb +633 -0
data/lib/weft/backend/sqlite/category_tree.rb +104 -0
data/lib/weft/backend/sqlite/schema.rb +152 -0
data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
data/lib/weft/category.rb +157 -0
data/lib/weft/coding.rb +355 -0
data/lib/weft/document.rb +118 -0
data/lib/weft/filters.rb +243 -0
data/lib/weft/wxgui.rb +687 -0
data/lib/weft/wxgui/category.xpm +26 -0
data/lib/weft/wxgui/dialogs.rb +128 -0
data/lib/weft/wxgui/document.xpm +25 -0
data/lib/weft/wxgui/error_handler.rb +52 -0
data/lib/weft/wxgui/inspectors.rb +361 -0
data/lib/weft/wxgui/inspectors/category.rb +165 -0
data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
data/lib/weft/wxgui/inspectors/document.rb +139 -0
data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
data/lib/weft/wxgui/inspectors/script.rb +35 -0
data/lib/weft/wxgui/inspectors/search.rb +265 -0
data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
data/lib/weft/wxgui/lang.rb +17 -0
data/lib/weft/wxgui/lang/en.rb +45 -0
data/lib/weft/wxgui/mondrian.xpm +44 -0
data/lib/weft/wxgui/search.xpm +25 -0
data/lib/weft/wxgui/sidebar.rb +498 -0
data/lib/weft/wxgui/utilities.rb +148 -0
data/lib/weft/wxgui/weft16.xpm +31 -0
data/lib/weft/wxgui/workarea.rb +249 -0
data/test/001-document.rb +196 -0
data/test/002-category.rb +138 -0
data/test/003-code.rb +370 -0
data/test/004-application.rb +52 -0
data/test/006-filters.rb +139 -0
data/test/009a-backend_sqlite_basic.rb +280 -0
data/test/009b-backend_sqlite_complex.rb +175 -0
data/test/009c_backend_sqlite_bench.rb +81 -0
data/test/010-backend_nudist.rb +5 -0
data/test/all-tests.rb +1 -0
data/test/manual-gui-script.txt +24 -0
data/test/testdata/autocoding-test.txt +15 -0
data/test/testdata/iso-8859-1.txt +5 -0
data/test/testdata/sample_doc.txt +19 -0
data/test/testdata/search_results.txt +1254 -0
data/test/testdata/text1-dos-ascii.txt +2 -0
data/test/testdata/text1-unix-utf8.txt +2 -0
data/weft-qda.rb +28 -0
metadata +96 -0

data/lib/weft/backend/n6.rb ADDED Viewed

@@ -0,0 +1,366 @@
+# Read-only storage backend using N6's native file format
+module QDA
+  module Backend
+	module N6
+      # number of seconds between 1/1/1900 and 1/1/1970
+      SEVENTY_YEARS = 2208985187
+	  def start(args)
+		@basedir = args[:basedir]
+		@npr     = NPReader.new()
+		@doc_dbid_counter = 0
+		load_docs()
+		load_nodes()
+	  end
+	  def get_all_docs
+		@docs
+	  end
+	  def get_doc(title)
+		@docs.find { | d | d.title == title }
+	  end
+	  def get_all_categories
+		@root_node
+	  end
+	  private
+	  PARSE_DOCS = /^\("((?:[^"]|\\")*)" (\d+) (\d+) (?#
+			)(NIL|(?:\([0-9 ]+\))) (?#
+			)\((\d{10}) \. (\d{10})\) (?#
+			)"((?:[^"]|\\")*)"/
+	  def load_docs()
+		@docs = []
+		File.foreach(File.join(@basedir, 'DATABASE/docsys') ) do | docline |
+		  if matches = PARSE_DOCS.match(docline)
+			doc_id = matches[7]
+			parsed = {
+			  :title      => matches[1],
+			  :external   => ( matches[2] == "0" ? true : false ),
+			  :unitlength => matches[3],
+			  :crt_date   => matches[5],
+			  :mod_date   => matches[6],
+			  :doc_id     => matches[7],
+			  :source     => self
+			}
+			docfacts  = nil
+			fragments = []
+			# read the breaking of the documents into chunks
+			File.open(File.join(@basedir,
+								'DATABASE/DOCFACTS',
+								doc_id ) ) do | dff |
+			  docfacts = @npr.parse(dff.read)
+			end
+			# read the source document in chunks defined in docfacts
+			# not sure why each fragment seems to be made up of two
+            # chunks
+			File.open( File.join(@basedir,
+								 'DATABASE/DOCFILES',
+								 doc_id ) ) do | df |
+			  docfacts.values.each do | chunk |
+				# bit 1 - seems longer
+				fragments.push(df.read(chunk.values[0].to_i))
+				# bit 2 - generally shorter
+				fragments[-1] << df.read(chunk.values[2].to_i)
+                fragments[-1].sub(/[\n\r]+$/, '')
+			  end
+			end
+			parsed[:fragments] = fragments
+			memo = ''
+			if FileTest.exist?( File.join( @basedir,
+										  'DATABASE/DOCMEMS',
+										  doc_id) )
+			  File.open(File.join(@basedir,
+								  'DATABASE/DOCMEMS',
+								  doc_id) ) do | dm |
+				memo = dm.read()
+			  end
+			end
+            crt_date = Time.at( parsed[:crt_date].to_i - SEVENTY_YEARS )
+            mod_date = Time.at( parsed[:mod_date].to_i - SEVENTY_YEARS )
+			doc = Document.new(parsed[:title], '', memo,
+                               crt_date, mod_date)
+            if parsed[:external]
+              doc.instance_eval { @external = true }
+            end
+            class << doc
+              def chunks_to_vector(start, length)
+                return 0, 0 if @external
+                this_start = @chunkmap[0, start].inject(0) do | tot, ck |
+                  tot += ck
+                end
+                this_offset = @chunkmap[start, length].inject(0) do | tot, ck |
+                  tot += ck
+                end
+                return this_start, this_offset
+              end
+              def append(text)
+                @chunkmap ||= []
+                len = super(text)
+                @chunkmap.push(len)
+              end
+            end
+            doc.dbid = next_doc_dbid
+			fragments.each do | frag |
+              doc.append(frag)
+            end
+			@docs.push(doc)
+		  end
+		end
+	  end
+      def next_doc_dbid
+        token = @doc_dbid_counter += 1
+        # token.to_s
+      end
+	  PARSE_INDEX = /^\((\d+|NIL) "([^"]*)" (?#
+				)(?:(NIL)|(?:"((?:[^"]|\\")*)")) (?# comment
+				)(?:(NIL)|(?:"((?:[^"]|\\")*)")) (?# "memoreference
+				)\((\d{10}) \. (\d{10})\) (.*?) (\d+)\) $/
+	  def load_nodes()
+		@nodes = []
+		curr_node = nil
+		@root_node = nil
+		content = ''
+		pending = ''
+		# hash of nodes -> number of pending children
+		tree_kids = {}
+		File.foreach( File.join(@basedir,
+								'DATABASE/indexsys') ) do | line |
+		  next if line =~ /^\s*$/
+		  unless matches = PARSE_INDEX.match(pending + line)
+			pending += line
+			next
+		  end
+		  pending = ''
+		  parsed = {
+			:child_id => matches[1],
+			:title    => matches[2],
+			:comment  => matches[3] || matches[4],
+			:memo     => '',
+			:big_fig1 => matches[7],
+			:big_fig2 => matches[8],
+			:codes    => matches[9],
+			:ccount   => matches[10],
+			:source   => self }
+		  # load the node memo if there is one
+		  if matches[6]
+			memofile = File.join(@basedir, 'DATABASE/NODEMEMS', matches[6])
+			# they don't necessarily have a memo file, in which case memo = ''
+			if FileTest.exist?(memofile)
+			  File.open(memofile) { | nm | parsed[:memo] = nm.read() }
+			end
+		  end
+		  # create a node object to represent the code
+		  # first of all, see if it's the root node (no parent)
+		  if curr_node.nil?
+			@root_node = Category.new(parsed[:title], nil)
+			# yuk
+			load_node_codes(@root_node,
+							parsed[:codes])
+			@nodes.push(@root_node)
+			# root node is always looking for additional children -
+			# the default parent.
+			tree_kids[@root_node] = -1
+			curr_node = @root_node
+		  else 		  # it's not the root node
+			# go back up the tree until we find a node which needs
+			# more children
+			until tree_kids[curr_node] != 0
+			  curr_node = curr_node.parent
+			end
+			tree_kids[curr_node] -= 1
+			parsed[:parent] = curr_node
+			new_node = Category.new(parsed[:title], parsed[:parent])
+			load_node_codes(new_node, parsed[:codes])
+			# get ready for the next round
+			tree_kids[new_node] = parsed[:ccount].to_i
+			curr_node = new_node
+			@nodes.push(new_node)
+		  end
+		end
+	  end
+	  # parse which parts of which documents are coded by category +category+
+	  def load_node_codes(category, codesrc)
+		return if codesrc == 'NIL'
+		unless parsed = @npr.parse(codesrc)
+		  raise "cannot parse #{codesrc} for node codes"
+		end
+		codes = {}
+		parsed.values.each do | val |
+		  # val.values[0] is the title of the coded document
+		  doc = get_doc(val.values[0])
+		  # the segments of the document which are coded
+		  val.values[1..-1].each do | code |
+			start  = code.values[0].to_i
+			length = code.values[2].to_i - start + 1
+			# we have to convert these chunks into character-indexes
+            conv = doc.chunks_to_vector(start, length)
+            # can be 0-length coded when applied to external doc
+            if conv[1] == 0
+              next
+            end
+			category.code( doc.dbid, *conv )
+		  end
+		end
+	  end
+	end
+	# NPReader.rb - Nested parenthesis (LISP syntax) parser.
+	# Copyright (C) 2001 Gordon James Miller
+	# This library is free software; you can redistribute it and/or
+	# modify it under the terms of the GNU Lesser General Public License
+	# as published by the Free Software Foundation; either version 2.1
+	# of the License, or (at your option) any later version.
+	# A container for strings that reside between matching parenthesis.  Each
+	# instance of this class contains a list of values that are either
+	# strings or other NPNode objects representing nesting.
+	class NPNode
+	  @@NORMAL_MODE = 0
+	  @@QUOTE_MODE = 1
+	  # The members of this group.
+	  attr_reader :values
+	  # The parent of this group.
+	  attr_reader :parent
+	  # The level of this group.  This is set by the constructor and is
+	  # based upon the level of the parent.
+	  attr_reader :level
+	  # The state of this group.  This will either be NORMAL or QUOTE.
+	  # In QUOTE, whitespace is absorbed.
+	  attr_reader :state
+	  def close
+		if ( @curval.size > 0 ) then
+		  @values.push( @curval.to_s )
+		  @curval.clear
+		end
+	  end
+	  # Initialize a new instance that has the specified parent.  The
+	  # parent can be nil, in which case it is assumed that this is
+	  # the top level node.  If the parent is not nil, then this
+	  # object is added as a child to the parent.
+	  def initialize (parent)
+		@parent = parent
+		if ( @parent != nil ) then
+		  @parent.push(self)
+		end
+		@state = @@NORMAL_MODE
+		@level = ( parent == nil ) ? 0 : @parent.level + 1
+		@values = Array.new
+		@curval = Array.new
+	  end
+	  def push(ch)
+		if ( ch.kind_of? NPNode ) then
+		  # If this is an NPNode instance, just add it to the end of the
+		  # values array.
+		  @values.push(ch)
+		else
+		  if ( ch == "'" || ch == "\"" ) then
+			# If this is a double quote, then we have to start a new
+			# value, toggle the mode.
+			@state = ( @state + 1 ) % 2
+			close
+		  elsif ( (@state != @@QUOTE_MODE) && ch =~ /[ \t\n\r]/ ) then
+			# If this is a whitespace character and the length is
+			# greater than zero, push the current value on the values
+			# array and clear the current values string.
+			close
+		  else
+			# Otherwise go ahead and push the character onto the end of
+			# the current value.
+			@curval.push( ch )
+		  end
+		end
+	  end
+	  def to_s
+		str = Array.new()
+		return "(#{@values.join(' ')})"
+	  end
+	end
+	# An implementation of a nested parenthesis reader.  This implementation
+	# constructs an AST from a data source.
+	class NPReader
+	  # Initialize a new instance of the reader.  This does not start the
+	  # parsing, that is done with the parse method.
+	  def initialize ()
+	  end
+	  # Parse the data contained in the string and return the reference to the
+	  # top level group.
+	  def parse (string)
+		@curgroup = nil
+		@top = nil
+		string.each_byte { |byte|
+		  ch = byte.chr
+		  if ( ch == '(' ) then
+			@curgroup = NPNode.new(@curgroup)
+			if ( @top == nil ) then
+			  @top = @curgroup
+			end
+		  elsif ( ch == ')' ) then
+			@curgroup.close
+			@curgroup = @curgroup.parent
+		  else
+			if ( @curgroup != nil ) then
+			  @curgroup.push(ch)
+			end
+		  end
+		}
+		return @top
+	  end
+	end
+  end
+end

data/lib/weft/backend/sqlite.rb ADDED Viewed

@@ -0,0 +1,633 @@
+require 'sqlite'
+# require 'sqlite3'
+require 'strscan'
+require 'tempfile'
+require 'fileutils'
+require 'base64'
+require 'rexml/document'
+# require 'iconv'
+module QDA
+# Storage backend using SQLite module - can use SQlite 3 or SQLite, but
+# currently problems with SQLite 3 and non-ASCII characters. Will pick
+# up whether sqlite or sqlite3 is available.
+module Backend::SQLite
+  require 'weft/backend/sqlite/schema.rb'
+  require 'weft/backend/sqlite/upgradeable.rb'
+  require 'weft/backend/sqlite/category_tree.rb'
+  include Upgradeable
+  # if working with sqlite v2 with the sqlite-ruby v2, we need a
+  # couple of compatibility tweaks.
+  if defined?(::SQLite)
+    SQLITE_DB_CLASS = ::SQLite::Database
+    # Ruby-SQLite3 statements have a close() method, but Ruby-SQLite
+    # v 2 don't - so we supply a dummy method for when using v2
+    class ::SQLite::Statement
+      def close(); end
+    end
+    # SQLite3 introduced this more ruby-ish notation
+    class ::SQLite::Database::FunctionProxy
+      alias :result= :set_result
+    end
+  elsif defined?(::SQLite3)
+    SQLITE_DB_CLASS = ::SQLite3::Database
+  else
+    raise LoadError, "No SQlite database class loaded"
+  end
+  class Database < SQLITE_DB_CLASS
+    def initialize(file)
+      # super(file, :driver => "Native")
+      super(file)
+      self.results_as_hash = true
+      # self.type_translation = true
+    end
+    def undo_action()
+      @dbh.transaction do
+        @dbh.execute("SELECT * FROM undoable WHERE step = 1
+                     ORDER BY step, actionid DESC") do | task |
+          @dbh.execute(task[0])
+        end
+        @dbh.execute("UPDATE undoable SET step = step -1")
+        @dbh.execute("DELETE FROM undoable WHERE step = 0")
+      end
+    end
+    def redo_action()
+      transaction do
+        execute("SELECT * FROM undoable WHERE step = -1
+                     ORDER BY step, actionid DESC") do | task |
+          execute(task[0])
+        end
+        execute("DELETE FROM undoable WHERE step = -1")
+        execute("UPDATE undoable SET step = step + 1")
+      end
+    end
+    def date_freeze(date)
+      date ? date.strftime('%Y-%m-%d %H:%M:%S') : ''
+    end
+    def date_thaw(str)
+      return nil if str.empty?
+      return Time.local( *str.split(/[- :]/) )
+    end
+  end
+  attr_reader :dbh, :dbfile
+  # load up the database connection. A hash argument containing the
+  # key :dbfile should be supplied. If this is +nil+, then a
+  # temporary storage will be used
+  def start(args)
+    if ! args.key?(:dbfile)
+      raise ArgumentError, "Must specify SQLite dbfile to load from"
+    end
+    @dbfile  = args[:dbfile]
+    if @dbfile and ! File.exists?(@dbfile)
+      raise RuntimeError, "Tried to open an non-existent database"
+    end
+    tmp_fname = @dbfile ? File::basename(@dbfile) : 'Weft'
+    tmpfile   = Tempfile.new(tmp_fname || 'Weft')
+    tmpfile.close(false) # don't delete
+    @tmpfile = tmpfile.path
+    if @dbfile
+      FileUtils.copy(@dbfile, @tmpfile)
+    end
+    @dbh = Database.new(@tmpfile)
+    # if opening from an existing file, check and do any upgrding
+    # required from older versions
+    do_version_format_upgrading() if @dbfile
+    undirty!
+  end
+  def connect(args)
+    @dbh = args[:dbh]
+  end
+  def end(force = false)
+    @cat_tree = nil
+    @dbh.close()
+  end
+  def save(target = @dbfile)
+    if target.nil?
+      raise RuntimeError,
+      "No previously saved file, and no named supplied for save"
+    end
+    @dbh.close
+    @dbfile = target
+    FileUtils.copy(@tmpfile, @dbfile)
+    @dbh = Database.new(@tmpfile)
+    undirty!
+  end
+  # roll the current state back to the last-saved state.
+  def revert()
+    @dbh.close()
+    FileUtils.copy(@dbfile, @tmpfile)
+    @dbh = Database.new(@tmpfile)
+  end
+  # hint to do the next series of actions as a batch
+  def batch
+    @dbh.transaction { yield }
+  end
+  def cat_tree
+    return @cat_tree if @cat_tree
+    xml  = @dbh.get_first_value("SELECT xml FROM category_structure")
+    if xml and xml.length > 0
+      @cat_tree = CategoryTree.load(xml)
+    else
+      @cat_tree = CategoryTree.new()
+    end
+  end
+  # private :cat_tree
+  # get every doc
+  def get_all_docs()
+    docs = []
+    @dbh.execute("SELECT doctitle, docid FROM document") do | row |
+      doc = Document.new(row['doctitle'])
+      doc.dbid = row['docid'].to_i
+      docs.push(doc)
+    end
+    docs
+  end
+  # fetch the document identified by the string ident
+  def get_doc(ident)
+    doc = nil
+    @dbh.transaction do
+      stmt = nil
+      if ident.kind_of?(Fixnum) || ident =~ /^\d+$/
+        stmt = @dbh.prepare("SELECT * FROM document WHERE docid = ?")
+      else
+        stmt = @dbh.prepare("SELECT * FROM document WHERE doctitle = ?")
+      end
+      stmt.execute!(ident) do | r |
+        doc = Document.new(r['doctitle'].dup,
+                            r['doctext'].dup,
+                            r['docmemo'].dup,
+                            @dbh.date_thaw(r['created_date']),
+                            @dbh.date_thaw(r['modified_date']) )
+        doc.dbid = r['docid'].to_i
+      end
+      raise "Not found: #{ident}" if doc.nil?
+      stmt.close()
+    end
+    return doc
+  end
+  alias :get_document :get_doc
+  def save_preference(pref_name, pref_value)
+    frozen_value = Base64.encode64( Marshal.dump( pref_value) )
+    @dbh.transaction do
+      @dbh.execute("INSERT OR REPLACE INTO app_preference
+                        VALUES (?, ?)",
+                    pref_name, frozen_value )
+    end
+    dirty!
+  end
+  def get_preference(pref_name)
+    frozen_pref = nil
+    @dbh.transaction do
+      @dbh.execute("SELECT value FROM app_preference
+                        WHERE name = ? ", pref_name ) do | r |
+        frozen_pref = r['value']
+      end
+    end
+    return nil if frozen_pref.nil?
+    return Marshal.load( Base64.decode64(frozen_pref) )
+  end
+  def save_document(doc)
+    raise TypeError unless doc.kind_of? QDA::Document
+    @dbh.transaction { _save_document(doc) }
+    dirty!
+    doc
+  end
+  def _save_document(doc)
+    if doc.dbid
+      @dbh.execute("UPDATE document
+                        SET doctitle = ?, doctext = ?,
+                            docmemo = ?, modified_date = ?
+                        WHERE docid = ? ",
+                    doc.title, doc.text, doc.memo,
+                    @dbh.date_freeze( Time.now() ),
+                    doc.dbid)
+    else
+      @dbh.execute("INSERT INTO document
+                        VALUES(NULL, ?, ?, ?, ?, ?)",
+                    doc.title, doc.text, doc.memo,
+                    @dbh.date_freeze(doc.create_date),
+                    @dbh.date_freeze( Time.now() ) )
+      doc.dbid = @dbh.last_insert_row_id().to_i
+    end
+  end
+  # delete teh document identified by +dbid+ from the database
+  def delete_document(dbid)
+    @dbh.transaction do
+      @dbh.execute("DELETE FROM document WHERE docid = ?", dbid)
+    end
+    dirty!
+  end
+  # retrieve the category with the internal id +catid+, along with
+  # its codes. If +get_structure+ is set to a true value then the
+  # category's children will also be retrieved from the database
+  def get_category(catid, get_structure = false)
+    catid = catid.to_i if catid =~ /^\d+$/
+    raise "Invalid id #{catid.inspect}" unless catid.kind_of?(Fixnum)
+    category = nil
+    stmt = @dbh.prepare("SELECT * FROM category WHERE catid = ?")
+    stmt.execute!(catid) do | r |
+      parent = get_category_parent(catid)
+      category = Category.new(r['catname'], parent, r['catdesc'])
+      category.dbid = catid
+    end
+    raise "No category found matching id '#{catid}'" unless category
+    stmt.close()
+    get_codes_for_category(category)
+    get_and_build_children(category) if get_structure
+    category
+  end
+  # gets the root category named +name+
+  def get_root_category(name)
+    root = cat_tree.roots.find { | r | r.name == name }
+    raise "Not found, root category #{name.inspect}" unless root
+    return get_category(root.dbid)
+  end
+  # fetch categories by relative or absolute paths. Returns an
+  # array of categories
+  def get_categories_by_path(path)
+    # cos it should be quicker ...
+    if path =~ /\//
+      return cat_tree.find(path).map do | found |
+        get_category(found.dbid)
+      end
+    else
+      return get_categories_by_name(path)
+    end
+  end
+  # fetch categories by partial names. This is currently
+  # case-insensitive by default. Returns an array of categories
+  # whose names match.
+  def get_categories_by_name(namebit, insensitive = true)
+    stmt = nil
+    if insensitive
+      stmt = @dbh.prepare("SELECT catid FROM category
+                   WHERE UPPER(catname) LIKE ?
+                   AND parent >= 0" )
+      namebit = namebit.upcase
+    else
+      stmt = @dbh.prepare("SELECT catid FROM category
+                   WHERE catname GLOB ?
+                   AND parent >= 0" )
+    end
+    categories = []
+    @dbh.transaction do
+      stmt.execute!(namebit + "%") do | r |
+        categories.push( get_category( r['catid'] ) )
+      end
+      stmt.close()
+    end
+    return categories
+  end
+  def is_descendant?(ancestor, descendant)
+    cat_tree.is_descendant?(ancestor.dbid, descendant.dbid)
+  end
+  # builds the tree structure below +category+, modifying
+  # +category+ in place. After this call, the retrieved structure
+  # is available as the +children+ property of the category.
+  def get_and_build_children(category)
+    # this duplicates stuff below
+    append_f = Proc.new do | parent, elem |
+      cat = Category.new(elem.name, parent)
+      cat.dbid = elem.dbid
+      elem.children { | c | append_f.call(cat, c) }
+    end
+    cat_tree[category.dbid].children do | first_child |
+      append_f.call(category, first_child)
+    end
+  end
+  private :get_and_build_children
+  # applies the codes to category +cat+
+  def get_codes_for_category(cat)
+    @dbh.execute("SELECT docid, offset, length
+                      FROM code
+                      WHERE catid = ? ", cat.dbid) do | row |
+      cat.code( row['docid'].to_i,
+                row['offset'].to_i,
+                row['length'].to_i )
+    end
+    return cat
+  end
+  # looks up the string indices of the document and returns the
+  # appropriate text fragments as an array
+  # returns a hash keyed on document title where the values are an
+  # array of fragments in order of offset from the start of the document
+  def get_text_at_category(cat)
+    vectors = FragmentTable.new()
+    @dbh.execute("SELECT document.doctitle AS doctitle,
+                             code.docid AS docid,
+                             code.offset AS offset, code.length,
+                      SUBSTR(document.doctext,
+                             code.offset + 1, code.length) AS fragment
+                FROM document, code
+                WHERE code.catid = ?
+                AND code.docid = document.docid
+                ORDER BY code.docid, code.offset",  cat.dbid ) do | r |
+      vectors.add( Fragment.new( r['fragment'],
+                                 r['doctitle'],
+                                 r['offset'].to_i,
+                                 r['docid'].to_i ) )
+    end
+    vectors
+  end
+  def get_category_parent(catid)
+    if cat_tree[catid].parent
+      return get_category(cat_tree[catid].parent)
+    else
+      return  nil
+    end
+  end
+  # fetches all the categories in a tree structure, starting from the right
+  def get_all_categories()
+    build_cat = Proc.new do | elem, parent |
+      cat = Category.new( elem.name, parent )
+      cat.dbid = elem.dbid
+      elem.children.each { | ch | build_cat.call(ch, cat) }
+      cat
+    end
+    # return the Category Tree (internal storage) as a tree of
+    # actual Categories
+    cat_tree.roots.map { | root | build_cat.call(root, nil) }
+  end
+  # saves the category
+  def save_category(cat)
+    @dbh.transaction { _save_category(cat) }
+    dirty!
+    cat
+  end
+  def _save_category(cat)
+    # only resave the tree structure if nec,
+    xml_needs_update = false
+    # updating an existing category
+    if cat.dbid
+      # check for re-parenting or renaming
+      child = cat_tree[cat.dbid]
+      if child.parent != cat.parent.dbid
+        cat_tree.move(child.dbid, cat.parent.dbid)
+        xml_needs_update = true
+      end
+      if child.name != cat.name
+        child.name = cat.name
+        xml_needs_update = true
+      end
+      @dbh.execute("DELETE FROM code WHERE catid = ?", cat.dbid)
+      @dbh.execute("UPDATE category
+                          SET catname = ?,
+                              catdesc = ?,
+                              parent = ?,
+                              modified_date = ?
+                          WHERE catid = ? ",
+                    cat.name,
+                    cat.memo,
+                  ( cat.parent ? cat.parent.dbid : nil),
+                    @dbh.date_freeze( Time.now ),
+                    cat.dbid)
+      # adding a new category
+    else
+      parentid = cat.parent ? cat.parent.dbid : nil
+      @dbh.execute("INSERT INTO category
+                        VALUES(NULL, ?, ?, ?, ?, ?)",
+                    cat.name, cat.memo, parentid,
+                    @dbh.date_freeze( Time.now ),
+                    @dbh.date_freeze( Time.now ) )
+      cat.dbid = @dbh.last_insert_row_id().to_i
+      if cat.parent
+        cat_tree.add(cat.parent.dbid, cat.dbid, cat.name)
+      else
+        cat_tree.add(nil, cat.dbid, cat.name)
+      end
+      xml_needs_update = true
+    end
+    stmt_code = @dbh.prepare("INSERT INTO code VALUES(?, ?, ?, ?)")
+    cat.codes.each do | docid, vecs |
+      vecs.each do | vec |
+        stmt_code.execute( cat.dbid, vec.docid, vec.offset, vec.length )
+      end
+    end
+    stmt_code.close()
+    if xml_needs_update
+      @dbh.execute( "UPDATE category_structure SET xml = ? ",
+                    cat_tree.serialise())
+    end
+  end
+  # deletes the category +category+. If +recursive+ is false then
+  # any children of +category+ will be reattached to the deleted
+  # category's parent. If +recursive+ is true (default), then all
+  # descendants will be deleted.
+  # Returns a list of categories that were actually deleted.
+  def delete_category(cat, recursive = true)
+    return unless cat.dbid
+    deleted_items = []
+    # TODO not all items being returned in list
+    if recursive
+      me = cat_tree[cat.dbid]
+      me.children.each do | child |
+        deleted_items += delete_category(child, true)
+      end
+      cat_tree.remove(cat.dbid)
+      deleted_items << cat
+    else
+      raise NotImplementedError,
+      'Non-recursive deletion not implemented'
+    end
+    @dbh.transaction do
+      @dbh.execute("DELETE FROM category WHERE catid = ? ", cat.dbid)
+      xml = cat_tree.serialise()
+      @dbh.execute("UPDATE category_structure SET xml = ?", xml)
+    end
+    dirty!
+    return deleted_items
+  end
+  MAGIC_REV_INDEX_ID = -2
+  # adds the reverse indexes for +words+ to the existing reverse
+  # indexes.
+  def save_reverse_index(docid, words, prog_bar = nil)
+    stmt_wordid = @dbh.prepare("SELECT catid FROM category
+                                    WHERE catname = ? AND parent = ? ")
+    stmt_insert = @dbh.prepare("INSERT INTO category
+                                    VALUES(NULL, ?, ?, ?, ?, ?)")
+    stmt_code   = @dbh.prepare("INSERT INTO code VALUES(?, ?, ?, ?)")
+    @dbh.transaction do
+      words.each do | word, locations |
+        wordid = nil
+        stmt_wordid.execute!(word, MAGIC_REV_INDEX_ID) do | r |
+          wordid = r[0] # get first value
+        end
+        unless wordid
+          stmt_insert.execute( word, '', MAGIC_REV_INDEX_ID,
+                               @dbh.date_freeze( Time.now ),
+                               @dbh.date_freeze( Time.now ) )
+          wordid = @dbh.last_insert_row_id().to_s
+        end
+        locations.each do | loc |
+          stmt_code.execute(wordid, docid, loc, word.length)
+        end
+        prog_bar.next() if prog_bar
+      end
+    end # transaction
+    [stmt_wordid, stmt_insert, stmt_code].each { | s | s.close() }
+  end
+  # Delete all reverse word indexes associated with +docid+
+  def drop_reverse_indexes(docid)
+    @dbh.transaction do
+      stmt_del = @dbh.prepare("DELETE FROM code
+                                   WHERE docid = ? AND catid IN
+                                     (SELECT catid
+                                      FROM category
+                                      WHERE parent = ?) ")
+      stmt_del.execute(docid, MAGIC_REV_INDEX_ID)
+      stmt_del.close()
+    end
+  end
+  # should work for latin-1 characters
+  WORD_PATTERN = /^[\w\xC0-\xD6\xD8-\xF6\xF8-\xFF][\w\xC0-\xD6\xD8-\xF6\xF8-\xFF']+$/s
+  # returns a hash of document fragments. mods are additional
+  # arguments
+  def get_search_fragments(term, mods = {})
+    if term !~  WORD_PATTERN
+      return get_search_fragments_scan(term, mods)
+    else
+      return get_search_fragments_index(term, mods)
+    end
+  end
+  def get_search_fragments_scan(term, mods = {})
+    vectors = FragmentTable.new()
+    wrap = mods[:wrap_both] || 0
+    @dbh.execute("SELECT * FROM document WHERE doctext LIKE ?",
+                  "%#{term}%") do | r |
+      doc_title = r['doctitle']
+      doc_id    = r['docid'].to_i
+      # compile a search regexp
+      rx = mods[:whole_word] ?
+      '\b\w*.{0,%i}\b%s\b.{0,%i}\w*?\b' % [wrap, term, wrap] :
+        '\b\w*.{0,%i}%s.{0,%i}\w*?\b' % [wrap, term, wrap]
+      search =  mods[:case_sensitive] ?
+      Regexp.new(rx, Regexp::MULTILINE) :
+        Regexp.new(rx, Regexp::MULTILINE|Regexp::IGNORECASE)
+      scanner = StringScanner.new(r['doctext'])
+      while scanner.scan_until(search)
+        offset = scanner.pos - scanner.matched_size
+        f = Fragment.new(scanner.matched, doc_title, offset, doc_id)
+        vectors.add(f)
+      end
+    end
+    vectors
+  end
+  # returns a hash of document fragments. mods are additional
+  # arguments.
+  # It is currently case-insensitive and searches for parts of
+  # words. SQL needs to be altered below to change this via +mods+
+  def get_search_fragments_index(word, mods = {})
+    wrap = mods[:wrap_both] || 0
+    query = Schema::RINDEX_SEARCH_MODEL_QUERY.dup()
+    word = word.gsub(/'/, "''") + "%"
+    # if "there" shouldn't be matched when searching with "the"
+    if mods[:whole_word]
+      query.sub!(/LIKE :search/, "= :search")
+      word.sub!(/\%$/, '')
+    end
+    # SQLite GLOB is case-sensitive, LIKE isn't
+    if mods[:case_sensitive]
+      query.sub!(/LOWER\(category.catname\)/, 'category.catname')
+      query.sub!(/LIKE :search/, "GLOB :search")
+      word.sub!(/\%/, '*') # for GLOB
+    end
+    # something to hold the results
+    vectors = FragmentTable.new()
+    @dbh.transaction do
+      # old-style ? bind params seem to work better with sqlite-2
+      params = [ wrap, wrap, wrap, wrap, MAGIC_REV_INDEX_ID, word ]
+      @dbh.execute(query, *params) do | r |
+        f = Fragment.new( r['snip'], r['doctitle'],
+                          r['start_at'].to_i, r['docid'].to_i )
+        vectors.add(f)
+      end
+    end
+    vectors
+  end
+  # destructively reinstalls the schema
+  def install_clean()
+    if @dbfile and FileTest.exist?(@dbfile)
+      @dbh.close()
+      File.delete(@dbfile)
+      @dbh = Database.new(@dbfile)
+    end
+    # transaction make a big speed difference here
+    @dbh.transaction do
+      @dbh.execute_batch(Schema::SCHEMA_TABLES)
+      @dbh.execute_batch(Schema::SCHEMA_TRIGGERS)
+      @dbh.execute_batch(Schema::SCHEMA_INDEXES)
+    end
+  end
+end
+end