weft-qda 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/lib/weft.rb +21 -0
  2. data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
  3. data/lib/weft/application.rb +130 -0
  4. data/lib/weft/backend.rb +39 -0
  5. data/lib/weft/backend/marshal.rb +26 -0
  6. data/lib/weft/backend/mysql.rb +267 -0
  7. data/lib/weft/backend/n6.rb +366 -0
  8. data/lib/weft/backend/sqlite.rb +633 -0
  9. data/lib/weft/backend/sqlite/category_tree.rb +104 -0
  10. data/lib/weft/backend/sqlite/schema.rb +152 -0
  11. data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
  12. data/lib/weft/category.rb +157 -0
  13. data/lib/weft/coding.rb +355 -0
  14. data/lib/weft/document.rb +118 -0
  15. data/lib/weft/filters.rb +243 -0
  16. data/lib/weft/wxgui.rb +687 -0
  17. data/lib/weft/wxgui/category.xpm +26 -0
  18. data/lib/weft/wxgui/dialogs.rb +128 -0
  19. data/lib/weft/wxgui/document.xpm +25 -0
  20. data/lib/weft/wxgui/error_handler.rb +52 -0
  21. data/lib/weft/wxgui/inspectors.rb +361 -0
  22. data/lib/weft/wxgui/inspectors/category.rb +165 -0
  23. data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
  24. data/lib/weft/wxgui/inspectors/document.rb +139 -0
  25. data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
  26. data/lib/weft/wxgui/inspectors/script.rb +35 -0
  27. data/lib/weft/wxgui/inspectors/search.rb +265 -0
  28. data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
  29. data/lib/weft/wxgui/lang.rb +17 -0
  30. data/lib/weft/wxgui/lang/en.rb +45 -0
  31. data/lib/weft/wxgui/mondrian.xpm +44 -0
  32. data/lib/weft/wxgui/search.xpm +25 -0
  33. data/lib/weft/wxgui/sidebar.rb +498 -0
  34. data/lib/weft/wxgui/utilities.rb +148 -0
  35. data/lib/weft/wxgui/weft16.xpm +31 -0
  36. data/lib/weft/wxgui/workarea.rb +249 -0
  37. data/test/001-document.rb +196 -0
  38. data/test/002-category.rb +138 -0
  39. data/test/003-code.rb +370 -0
  40. data/test/004-application.rb +52 -0
  41. data/test/006-filters.rb +139 -0
  42. data/test/009a-backend_sqlite_basic.rb +280 -0
  43. data/test/009b-backend_sqlite_complex.rb +175 -0
  44. data/test/009c_backend_sqlite_bench.rb +81 -0
  45. data/test/010-backend_nudist.rb +5 -0
  46. data/test/all-tests.rb +1 -0
  47. data/test/manual-gui-script.txt +24 -0
  48. data/test/testdata/autocoding-test.txt +15 -0
  49. data/test/testdata/iso-8859-1.txt +5 -0
  50. data/test/testdata/sample_doc.txt +19 -0
  51. data/test/testdata/search_results.txt +1254 -0
  52. data/test/testdata/text1-dos-ascii.txt +2 -0
  53. data/test/testdata/text1-unix-utf8.txt +2 -0
  54. data/weft-qda.rb +28 -0
  55. metadata +96 -0
@@ -0,0 +1,104 @@
1
+ require 'base64'
2
+
3
+ module QDA::Backend::SQLite
4
+ class CategoryTreeNode
5
+ attr_reader :dbid, :children
6
+ attr_accessor :parent, :name
7
+ protected :parent=
8
+
9
+ def initialize(parent, dbid, name)
10
+ @parent, @dbid, @name = parent, dbid, name
11
+ @children = []
12
+ end
13
+
14
+ def add(dbid, name)
15
+ append( CategoryTreeNode.new(@dbid, dbid, name) )
16
+ end
17
+
18
+ def append(child)
19
+ child.parent = @dbid
20
+ @children.push(child)[-1]
21
+ end
22
+
23
+ def remove(target)
24
+ @children.delete_if { | c | c.dbid == target.dbid }
25
+ end
26
+
27
+ def like(other)
28
+ name =~ /^#{other}/i
29
+ end
30
+
31
+ def to_s()
32
+ "<CategoryTreeNode #{dbid} '#{name}' parent=#{parent}>"
33
+ end
34
+
35
+ def descendants()
36
+ @children.map() { | c | [ c.dbid, c.descendants ] }.flatten
37
+ end
38
+ end
39
+
40
+ class CategoryTree
41
+ attr_reader :roots
42
+ def initialize()
43
+ @table = {}
44
+ @roots = []
45
+ end
46
+
47
+ def CategoryTree.load(str)
48
+ Marshal.load( Base64::decode64(str) )
49
+ end
50
+
51
+ def [](id)
52
+ @table[id] or raise "Unknown id #{id.inspect}"
53
+ end
54
+
55
+ def find(path)
56
+ points = path.split('/')
57
+ scope = points[0].empty? ? @roots : @table.values
58
+ points.delete('')
59
+ while elem = points.shift
60
+ scope = scope.find_all { | x | x.like(elem) }
61
+ scope.map! { | x | x.children }.flatten! unless points.empty?
62
+ end
63
+ scope
64
+ end
65
+
66
+ def add(parentid, dbid, name)
67
+ if parentid
68
+ @table[dbid] = @table[parentid].add(dbid, name)
69
+ else
70
+ @roots.push( CategoryTreeNode.new(nil, dbid, name) )
71
+ @table[dbid] = @roots[-1]
72
+ end
73
+ end
74
+
75
+ def remove(dbid)
76
+ child = @table.delete(dbid)
77
+ @table[child.parent].remove(child)
78
+ end
79
+
80
+ def move(dbid, new_parent)
81
+ child = @table[dbid]
82
+ old_parent = child.parent
83
+ @table[new_parent].append(child)
84
+ @table[old_parent].remove(child)
85
+ end
86
+
87
+ def is_descendant?(ancestor, descendant)
88
+ @table[ancestor].descendants.include?(descendant)
89
+ end
90
+
91
+ def serialise()
92
+ Base64::encode64( Marshal.dump(self) )
93
+ end
94
+
95
+ def pretty_print(out = STDOUT)
96
+ pp = Proc.new do | item, depth |
97
+ out << "-" * depth
98
+ out << " #{item.name} (#{item.dbid})\n"
99
+ item.children.each { | c | pp.call(c, depth + 1) }
100
+ end
101
+ roots.each { | r | pp.call(r, 0) }
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,152 @@
1
+ module QDA::Backend::SQLite::Schema
2
+ SCHEMA_TABLES = <<'SCHEMA_TABLES'
3
+ CREATE TABLE category (
4
+ catid INTEGER PRIMARY KEY,
5
+ catname VARCHAR(255) DEFAULT NULL,
6
+ catdesc TEXT,
7
+ parent INTEGER,
8
+ created_date TIMESTAMP,
9
+ modified_date TIMESTAMP );
10
+
11
+ CREATE TABLE category_structure ( xml TEXT );
12
+
13
+ INSERT INTO category_structure VALUES ('');
14
+
15
+ CREATE TABLE code (
16
+ catid INT(11) default NULL,
17
+ docid INT(11) default NULL,
18
+ offset INT(11) default NULL,
19
+ length INT(11) default NULL );
20
+
21
+ CREATE TABLE docmeta (
22
+ docid INT(11) default NULL,
23
+ metaname VARCHAR(100) default NULL,
24
+ metavalue TEXT );
25
+
26
+ CREATE TABLE document (
27
+ docid INTEGER PRIMARY KEY,
28
+ doctitle VARCHAR(255) default NULL,
29
+ doctext TEXT,
30
+ docmemo TEXT,
31
+ created_date TIMESTAMP,
32
+ modified_date TIMESTAMP);
33
+
34
+ CREATE TABLE app_preference (
35
+ name VARCHAR(255) NOT NULL PRIMARY KEY ON CONFLICT REPLACE,
36
+ value TEXT);
37
+ SCHEMA_TABLES
38
+
39
+ SCHEMA_TRIGGERS = <<'SCHEMA_TRIGGERS'
40
+ CREATE TRIGGER insert_category
41
+ INSERT ON category
42
+ BEGIN
43
+ END;
44
+
45
+ CREATE TRIGGER delete_category
46
+ DELETE ON category
47
+ BEGIN
48
+ DELETE FROM code WHERE catid = old.catid;
49
+ END;
50
+
51
+ CREATE TRIGGER insert_doc
52
+ INSERT ON document
53
+ BEGIN
54
+ END;
55
+
56
+ CREATE TRIGGER delete_doc DELETE ON document
57
+ BEGIN
58
+ DELETE FROM docmeta WHERE docid = old.docid;
59
+ DELETE FROM code WHERE docid = old.docid;
60
+ END;
61
+
62
+ SCHEMA_TRIGGERS
63
+
64
+ # This is here because it's written, but it's not in use yet.
65
+ SCHEMA_UNDO = <<'SCHEMA_UNDO'
66
+ CREATE TABLE undoable (
67
+ actionid INTEGER PRIMARY KEY,
68
+ step INT(255) DEFAULT 0,
69
+ sql TEXT );
70
+
71
+ CREATE TRIGGER undo_insert_category INSERT ON category
72
+ BEGIN
73
+ INSERT INTO undoable VALUES(NULL, 0, 'DELETE FROM category ' ||
74
+ 'WHERE docid = ' ||
75
+ LAST_INSERT_ROWID() );
76
+ END;
77
+
78
+ CREATE TRIGGER undo_delete_category DELETE ON category
79
+ BEGIN
80
+ INSERT INTO undoable VALUES(NULL, 0,
81
+ 'INSERT INTO category
82
+ VALUES (' ||
83
+ old.catid || ', ' ||
84
+ QUOTE(old.catname) || ' ,' ||
85
+ QUOTE(old.catdesc) || ', ' ||
86
+ old.parent || ', ' ||
87
+ QUOTE(old.created_date) || ', ' ||
88
+ QUOTE(old.modified_date) || ')' );
89
+
90
+ CREATE TRIGGER undo_insert_document INSERTO ON document
91
+ BEGIN
92
+ END;
93
+
94
+ CREATE TRIGGER undo_delete_document DELETE ON document
95
+ BEGIN
96
+ END;
97
+
98
+ CREATE TRIGGER undo_insert_code INSERT ON code
99
+ BEGIN
100
+ END;
101
+
102
+ CREATE TRIGGER undo_delete_code DELETE ON code
103
+ BEGIN
104
+ END;
105
+
106
+ CREATE TRIGGER undo_insert_docmeta INSERT ON docmeta
107
+ BEGIN
108
+ END;
109
+
110
+ CREATE TRIGGER undo_delete_docmeta DELETE ON docmeta
111
+ BEGIN
112
+ END;
113
+
114
+ CREATE TRIGGER undo_update_category_structure
115
+ UPDATE ON category_structure
116
+ BEGIN
117
+ END;
118
+ SCHEMA_UNDO
119
+
120
+ SCHEMA_INDEXES = <<'SCHEMA_INDEXES'
121
+
122
+ CREATE INDEX document_idx
123
+ ON document(doctitle);
124
+
125
+ CREATE INDEX code_idx
126
+ ON code(docid, catid);
127
+
128
+ CREATE INDEX docmeta_idx
129
+ ON docmeta(metaname, docid);
130
+
131
+ SCHEMA_INDEXES
132
+
133
+ RINDEX_SEARCH_MODEL_QUERY = <<'RINDEX_SEARCH_MODEL_QUERY'
134
+ SELECT document.docid AS docid, document.doctitle AS doctitle,
135
+ MAX( 0, code.offset - ?)
136
+ AS start_at,
137
+ SUBSTR(document.doctext,
138
+ MAX( 0, code.offset - ?) + 1,
139
+ MIN( code.length + ( ? * 2 ),
140
+ LENGTH(document.doctext) - MAX(1, code.offset - ?) - 1 ) )
141
+ AS snip
142
+ FROM document, code
143
+ WHERE code.docid = document.docid
144
+ AND code.catid IN (
145
+ SELECT catid
146
+ FROM category
147
+ WHERE parent = ?
148
+ AND LOWER(category.catname) LIKE ?)
149
+ ORDER BY code.catid, code.offset
150
+ RINDEX_SEARCH_MODEL_QUERY
151
+
152
+ end
@@ -0,0 +1,55 @@
1
+ require 'rexml/document'
2
+
3
+ module QDA::Backend::SQLite
4
+
5
+ # This module provides support for opening projects created in older
6
+ # versions of Weft.
7
+ module Upgradeable
8
+ # This is called when a project is opened. It checks whether any
9
+ # changes need to be made to the storage format.
10
+ def do_version_format_upgrading()
11
+ version = get_preference('LastModifiedVersion') ||
12
+ get_preference('CreateVersion')
13
+
14
+ # don't do anything to those created in testing versions, just
15
+ # mark as last-opened in this version.
16
+ if version == QDA::Version.default_version()
17
+ save_preference('LastModifiedVersion', WEFT_VERSION)
18
+ end
19
+
20
+ # 0.9.4 and earlier - add indexes to tables
21
+ if version.nil?
22
+ @dbh.execute_batch(QDA::Backend::SQLite::Schema::SCHEMA_INDEXES)
23
+ end
24
+
25
+ # 0.9.5 and earlier - upgrade the category tree storage format
26
+ if version.nil? || version == '0.9.5'
27
+ legacy_category_tree_storage()
28
+ save_preference('LastModifiedVersion', WEFT_VERSION)
29
+ end
30
+ end
31
+
32
+ # This is a change from 0.9.5 -> 0.9.6; Category tree structure
33
+ # used to be stored in XML in the database, is now stored as a
34
+ # marshalled CategoryTree pure ruby object.
35
+ def legacy_category_tree_storage()
36
+ @cat_tree = CategoryTree.new()
37
+
38
+ build_cat = Proc.new do | elem, parent |
39
+ cat = @cat_tree.add( parent,
40
+ elem.attributes['dbid'].to_i,
41
+ elem.attributes['name'] )
42
+ elem.each_child { | kid | build_cat.call(kid, cat.dbid) }
43
+ end
44
+
45
+ xml = @dbh.get_first_value("SELECT xml FROM category_structure")
46
+ doc = REXML::Document.new(xml)
47
+ doc.root.each_child { | elem | build_cat.call(elem, nil) }
48
+
49
+ @dbh.transaction do
50
+ xml = @cat_tree.serialise
51
+ @dbh.execute("UPDATE category_structure SET xml = ? ", xml )
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,157 @@
1
+ require 'weft/coding'
2
+
3
+ module QDA
4
+ class Category
5
+ attr_reader :children, :codes
6
+ attr_accessor :dbid, :text, :name, :meta, :parent, :memo
7
+
8
+ def initialize(name, parent = nil, memo = '')
9
+ @name = name
10
+ @memo = memo
11
+ @parent = parent
12
+ @children = []
13
+ @codes = QDA::CodingTable.new()
14
+ @parent.add_child(self) if @parent
15
+ end
16
+
17
+ def add_child(child)
18
+ @children.push(child)
19
+ child.parent = self
20
+ end
21
+
22
+ def append_to(parent)
23
+ parent.add_child(self)
24
+ @parent = parent
25
+ end
26
+
27
+ def ==(other)
28
+ if other.respond_to?(:dbid)
29
+ return self.dbid == other.dbid
30
+ elsif other.nil?
31
+ return false
32
+ else
33
+ raise "No comparison of Category with #{other.inspect}"
34
+ end
35
+ end
36
+
37
+ # number of separate documents coded by this category
38
+ def num_of_docs
39
+ @codes.num_of_docs
40
+ end
41
+
42
+ def num_of_codes
43
+ @codes.num_of_codes
44
+ end
45
+
46
+ def num_of_chars
47
+ @codes.num_of_chars
48
+ end
49
+
50
+ def codetable=(codetable)
51
+ @codes = codetable
52
+ end
53
+
54
+ # returns a new category with codes representing all the text by
55
+ # +self+ and +other+.
56
+ def intersection(other, new_name = 'INTERSECTION',
57
+ new_parent = nil, new_memo = '')
58
+ result = Category.new(new_name, new_parent, new_memo)
59
+ @codes.each do | docid, codes |
60
+ if other.codes.include?[docid]
61
+ result.codes[docid] = codes.intersect( other.codes[docid] )
62
+ end
63
+ end
64
+ return result
65
+ end
66
+
67
+ # apply a code to a document; returns the new set of codes applied
68
+ # to that document. +docid+ should be the database id of the
69
+ # document to be retrieved (a string)
70
+ def code(docid, offset, length)
71
+ unless docid.nil? || docid.kind_of?(Fixnum)
72
+ raise ArgumentError,
73
+ "Docid should be an integer or nil, got #{docid.inspect}"
74
+ end
75
+ unless offset >= 0
76
+ raise ArgumentError, "Offset should be an integer >= 0, got #{offset}"
77
+ end
78
+ unless length > 0
79
+ raise ArgumentError, "Length should be an integer > 0, got #{length}"
80
+ end
81
+ new_code = QDA::Code.new(docid, offset, length)
82
+ @codes.add(new_code)
83
+ end
84
+
85
+ def uncode(docid, offset, length)
86
+ # raise "docid should be an integer > 0, is #{docid}" if docid == 0
87
+ c = Code.new(docid, offset, length)
88
+ @codes.subtract(c)
89
+ end
90
+
91
+ # return the vector set associated with +docid+
92
+ def [](docid)
93
+ @codes[docid]
94
+ end
95
+ end
96
+
97
+ # object representing a particular application of a code to a
98
+ # document. Not sure entirely what the use of this class is at the
99
+ # moment - have moved over to using Fragment in preference to save
100
+ # lots of round trips to the database when calculating intersections
101
+ # and so on. All the functionality originally developed in this class
102
+ # now moved to Module +Coding+, mixed-in here and by +Fragment+. That
103
+ # defines all the union, intersection and exclusion operators
104
+ # (+, %, -)
105
+ class Code
106
+ include Coding
107
+ attr_accessor :docid, :offset, :length
108
+ # Create a code applied to the document identified by +docid+,
109
+ # starting at point +offset+ and running for +length+ characters
110
+ def initialize(docid, offset, length)
111
+ unless docid.is_a?(Integer)
112
+ raise ArgumentError,
113
+ "Bad docid value #{docid} expected integer"
114
+ end
115
+ unless offset.is_a?(Integer) and offset >= 0
116
+ raise ArgumentError,
117
+ "Bad offset value #{offset} expected integer >= 0"
118
+ end
119
+ unless length.is_a?(Integer) and length > 0
120
+ raise ArgumentError,
121
+ "Bad length value #{length}, should be an integer > 0"
122
+ end
123
+ @docid = docid
124
+ @offset = offset
125
+ @length = length
126
+ end
127
+
128
+ # a Code is already it's own simplest representation, so never
129
+ # needs to be modified to work with another code-like object.
130
+ def coerce(other)
131
+ self
132
+ end
133
+
134
+ # A +code+ is equal to +other+ if applied to the same document
135
+ # starts at the same point and runs for the same number of characters
136
+ def ==(other)
137
+ @docid == other.docid and
138
+ @offset == other.offset and
139
+ @length == other.length
140
+ end
141
+
142
+ #
143
+ def <<(other)
144
+ @length += other.length
145
+ end
146
+
147
+ # returns a new code starting from +point+ within the whole
148
+ # document and +length+ characters long.
149
+ def [](point, length)
150
+ Code.new(@docid, point, length)
151
+ end
152
+
153
+ def inspect
154
+ "#<QDA::Code [#{@docid}]: #{@offset}-#{self.end}>"
155
+ end
156
+ end
157
+ end