weft-qda 0.9.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/lib/weft.rb +21 -0
  2. data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
  3. data/lib/weft/application.rb +130 -0
  4. data/lib/weft/backend.rb +39 -0
  5. data/lib/weft/backend/marshal.rb +26 -0
  6. data/lib/weft/backend/mysql.rb +267 -0
  7. data/lib/weft/backend/n6.rb +366 -0
  8. data/lib/weft/backend/sqlite.rb +633 -0
  9. data/lib/weft/backend/sqlite/category_tree.rb +104 -0
  10. data/lib/weft/backend/sqlite/schema.rb +152 -0
  11. data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
  12. data/lib/weft/category.rb +157 -0
  13. data/lib/weft/coding.rb +355 -0
  14. data/lib/weft/document.rb +118 -0
  15. data/lib/weft/filters.rb +243 -0
  16. data/lib/weft/wxgui.rb +687 -0
  17. data/lib/weft/wxgui/category.xpm +26 -0
  18. data/lib/weft/wxgui/dialogs.rb +128 -0
  19. data/lib/weft/wxgui/document.xpm +25 -0
  20. data/lib/weft/wxgui/error_handler.rb +52 -0
  21. data/lib/weft/wxgui/inspectors.rb +361 -0
  22. data/lib/weft/wxgui/inspectors/category.rb +165 -0
  23. data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
  24. data/lib/weft/wxgui/inspectors/document.rb +139 -0
  25. data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
  26. data/lib/weft/wxgui/inspectors/script.rb +35 -0
  27. data/lib/weft/wxgui/inspectors/search.rb +265 -0
  28. data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
  29. data/lib/weft/wxgui/lang.rb +17 -0
  30. data/lib/weft/wxgui/lang/en.rb +45 -0
  31. data/lib/weft/wxgui/mondrian.xpm +44 -0
  32. data/lib/weft/wxgui/search.xpm +25 -0
  33. data/lib/weft/wxgui/sidebar.rb +498 -0
  34. data/lib/weft/wxgui/utilities.rb +148 -0
  35. data/lib/weft/wxgui/weft16.xpm +31 -0
  36. data/lib/weft/wxgui/workarea.rb +249 -0
  37. data/test/001-document.rb +196 -0
  38. data/test/002-category.rb +138 -0
  39. data/test/003-code.rb +370 -0
  40. data/test/004-application.rb +52 -0
  41. data/test/006-filters.rb +139 -0
  42. data/test/009a-backend_sqlite_basic.rb +280 -0
  43. data/test/009b-backend_sqlite_complex.rb +175 -0
  44. data/test/009c_backend_sqlite_bench.rb +81 -0
  45. data/test/010-backend_nudist.rb +5 -0
  46. data/test/all-tests.rb +1 -0
  47. data/test/manual-gui-script.txt +24 -0
  48. data/test/testdata/autocoding-test.txt +15 -0
  49. data/test/testdata/iso-8859-1.txt +5 -0
  50. data/test/testdata/sample_doc.txt +19 -0
  51. data/test/testdata/search_results.txt +1254 -0
  52. data/test/testdata/text1-dos-ascii.txt +2 -0
  53. data/test/testdata/text1-unix-utf8.txt +2 -0
  54. data/weft-qda.rb +28 -0
  55. metadata +96 -0
@@ -0,0 +1,104 @@
1
+ require 'base64'
2
+
3
+ module QDA::Backend::SQLite
4
+ class CategoryTreeNode
5
+ attr_reader :dbid, :children
6
+ attr_accessor :parent, :name
7
+ protected :parent=
8
+
9
+ def initialize(parent, dbid, name)
10
+ @parent, @dbid, @name = parent, dbid, name
11
+ @children = []
12
+ end
13
+
14
+ def add(dbid, name)
15
+ append( CategoryTreeNode.new(@dbid, dbid, name) )
16
+ end
17
+
18
+ def append(child)
19
+ child.parent = @dbid
20
+ @children.push(child)[-1]
21
+ end
22
+
23
+ def remove(target)
24
+ @children.delete_if { | c | c.dbid == target.dbid }
25
+ end
26
+
27
+ def like(other)
28
+ name =~ /^#{other}/i
29
+ end
30
+
31
+ def to_s()
32
+ "<CategoryTreeNode #{dbid} '#{name}' parent=#{parent}>"
33
+ end
34
+
35
+ def descendants()
36
+ @children.map() { | c | [ c.dbid, c.descendants ] }.flatten
37
+ end
38
+ end
39
+
40
+ class CategoryTree
41
+ attr_reader :roots
42
+ def initialize()
43
+ @table = {}
44
+ @roots = []
45
+ end
46
+
47
+ def CategoryTree.load(str)
48
+ Marshal.load( Base64::decode64(str) )
49
+ end
50
+
51
+ def [](id)
52
+ @table[id] or raise "Unknown id #{id.inspect}"
53
+ end
54
+
55
+ def find(path)
56
+ points = path.split('/')
57
+ scope = points[0].empty? ? @roots : @table.values
58
+ points.delete('')
59
+ while elem = points.shift
60
+ scope = scope.find_all { | x | x.like(elem) }
61
+ scope.map! { | x | x.children }.flatten! unless points.empty?
62
+ end
63
+ scope
64
+ end
65
+
66
+ def add(parentid, dbid, name)
67
+ if parentid
68
+ @table[dbid] = @table[parentid].add(dbid, name)
69
+ else
70
+ @roots.push( CategoryTreeNode.new(nil, dbid, name) )
71
+ @table[dbid] = @roots[-1]
72
+ end
73
+ end
74
+
75
+ def remove(dbid)
76
+ child = @table.delete(dbid)
77
+ @table[child.parent].remove(child)
78
+ end
79
+
80
+ def move(dbid, new_parent)
81
+ child = @table[dbid]
82
+ old_parent = child.parent
83
+ @table[new_parent].append(child)
84
+ @table[old_parent].remove(child)
85
+ end
86
+
87
+ def is_descendant?(ancestor, descendant)
88
+ @table[ancestor].descendants.include?(descendant)
89
+ end
90
+
91
+ def serialise()
92
+ Base64::encode64( Marshal.dump(self) )
93
+ end
94
+
95
+ def pretty_print(out = STDOUT)
96
+ pp = Proc.new do | item, depth |
97
+ out << "-" * depth
98
+ out << " #{item.name} (#{item.dbid})\n"
99
+ item.children.each { | c | pp.call(c, depth + 1) }
100
+ end
101
+ roots.each { | r | pp.call(r, 0) }
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,152 @@
1
+ module QDA::Backend::SQLite::Schema
2
+ SCHEMA_TABLES = <<'SCHEMA_TABLES'
3
+ CREATE TABLE category (
4
+ catid INTEGER PRIMARY KEY,
5
+ catname VARCHAR(255) DEFAULT NULL,
6
+ catdesc TEXT,
7
+ parent INTEGER,
8
+ created_date TIMESTAMP,
9
+ modified_date TIMESTAMP );
10
+
11
+ CREATE TABLE category_structure ( xml TEXT );
12
+
13
+ INSERT INTO category_structure VALUES ('');
14
+
15
+ CREATE TABLE code (
16
+ catid INT(11) default NULL,
17
+ docid INT(11) default NULL,
18
+ offset INT(11) default NULL,
19
+ length INT(11) default NULL );
20
+
21
+ CREATE TABLE docmeta (
22
+ docid INT(11) default NULL,
23
+ metaname VARCHAR(100) default NULL,
24
+ metavalue TEXT );
25
+
26
+ CREATE TABLE document (
27
+ docid INTEGER PRIMARY KEY,
28
+ doctitle VARCHAR(255) default NULL,
29
+ doctext TEXT,
30
+ docmemo TEXT,
31
+ created_date TIMESTAMP,
32
+ modified_date TIMESTAMP);
33
+
34
+ CREATE TABLE app_preference (
35
+ name VARCHAR(255) NOT NULL PRIMARY KEY ON CONFLICT REPLACE,
36
+ value TEXT);
37
+ SCHEMA_TABLES
38
+
39
+ SCHEMA_TRIGGERS = <<'SCHEMA_TRIGGERS'
40
+ CREATE TRIGGER insert_category
41
+ INSERT ON category
42
+ BEGIN
43
+ END;
44
+
45
+ CREATE TRIGGER delete_category
46
+ DELETE ON category
47
+ BEGIN
48
+ DELETE FROM code WHERE catid = old.catid;
49
+ END;
50
+
51
+ CREATE TRIGGER insert_doc
52
+ INSERT ON document
53
+ BEGIN
54
+ END;
55
+
56
+ CREATE TRIGGER delete_doc DELETE ON document
57
+ BEGIN
58
+ DELETE FROM docmeta WHERE docid = old.docid;
59
+ DELETE FROM code WHERE docid = old.docid;
60
+ END;
61
+
62
+ SCHEMA_TRIGGERS
63
+
64
+ # This is here because it's written, but it's not in use yet.
65
+ SCHEMA_UNDO = <<'SCHEMA_UNDO'
66
+ CREATE TABLE undoable (
67
+ actionid INTEGER PRIMARY KEY,
68
+ step INT(255) DEFAULT 0,
69
+ sql TEXT );
70
+
71
+ CREATE TRIGGER undo_insert_category INSERT ON category
72
+ BEGIN
73
+ INSERT INTO undoable VALUES(NULL, 0, 'DELETE FROM category ' ||
74
+ 'WHERE docid = ' ||
75
+ LAST_INSERT_ROWID() );
76
+ END;
77
+
78
+ CREATE TRIGGER undo_delete_category DELETE ON category
79
+ BEGIN
80
+ INSERT INTO undoable VALUES(NULL, 0,
81
+ 'INSERT INTO category
82
+ VALUES (' ||
83
+ old.catid || ', ' ||
84
+ QUOTE(old.catname) || ' ,' ||
85
+ QUOTE(old.catdesc) || ', ' ||
86
+ old.parent || ', ' ||
87
+ QUOTE(old.created_date) || ', ' ||
88
+ QUOTE(old.modified_date) || ')' );
89
+
90
+ CREATE TRIGGER undo_insert_document INSERTO ON document
91
+ BEGIN
92
+ END;
93
+
94
+ CREATE TRIGGER undo_delete_document DELETE ON document
95
+ BEGIN
96
+ END;
97
+
98
+ CREATE TRIGGER undo_insert_code INSERT ON code
99
+ BEGIN
100
+ END;
101
+
102
+ CREATE TRIGGER undo_delete_code DELETE ON code
103
+ BEGIN
104
+ END;
105
+
106
+ CREATE TRIGGER undo_insert_docmeta INSERT ON docmeta
107
+ BEGIN
108
+ END;
109
+
110
+ CREATE TRIGGER undo_delete_docmeta DELETE ON docmeta
111
+ BEGIN
112
+ END;
113
+
114
+ CREATE TRIGGER undo_update_category_structure
115
+ UPDATE ON category_structure
116
+ BEGIN
117
+ END;
118
+ SCHEMA_UNDO
119
+
120
+ SCHEMA_INDEXES = <<'SCHEMA_INDEXES'
121
+
122
+ CREATE INDEX document_idx
123
+ ON document(doctitle);
124
+
125
+ CREATE INDEX code_idx
126
+ ON code(docid, catid);
127
+
128
+ CREATE INDEX docmeta_idx
129
+ ON docmeta(metaname, docid);
130
+
131
+ SCHEMA_INDEXES
132
+
133
+ RINDEX_SEARCH_MODEL_QUERY = <<'RINDEX_SEARCH_MODEL_QUERY'
134
+ SELECT document.docid AS docid, document.doctitle AS doctitle,
135
+ MAX( 0, code.offset - ?)
136
+ AS start_at,
137
+ SUBSTR(document.doctext,
138
+ MAX( 0, code.offset - ?) + 1,
139
+ MIN( code.length + ( ? * 2 ),
140
+ LENGTH(document.doctext) - MAX(1, code.offset - ?) - 1 ) )
141
+ AS snip
142
+ FROM document, code
143
+ WHERE code.docid = document.docid
144
+ AND code.catid IN (
145
+ SELECT catid
146
+ FROM category
147
+ WHERE parent = ?
148
+ AND LOWER(category.catname) LIKE ?)
149
+ ORDER BY code.catid, code.offset
150
+ RINDEX_SEARCH_MODEL_QUERY
151
+
152
+ end
@@ -0,0 +1,55 @@
1
+ require 'rexml/document'
2
+
3
+ module QDA::Backend::SQLite
4
+
5
+ # This module provides support for opening projects created in older
6
+ # versions of Weft.
7
+ module Upgradeable
8
+ # This is called when a project is opened. It checks whether any
9
+ # changes need to be made to the storage format.
10
+ def do_version_format_upgrading()
11
+ version = get_preference('LastModifiedVersion') ||
12
+ get_preference('CreateVersion')
13
+
14
+ # don't do anything to those created in testing versions, just
15
+ # mark as last-opened in this version.
16
+ if version == QDA::Version.default_version()
17
+ save_preference('LastModifiedVersion', WEFT_VERSION)
18
+ end
19
+
20
+ # 0.9.4 and earlier - add indexes to tables
21
+ if version.nil?
22
+ @dbh.execute_batch(QDA::Backend::SQLite::Schema::SCHEMA_INDEXES)
23
+ end
24
+
25
+ # 0.9.5 and earlier - upgrade the category tree storage format
26
+ if version.nil? || version == '0.9.5'
27
+ legacy_category_tree_storage()
28
+ save_preference('LastModifiedVersion', WEFT_VERSION)
29
+ end
30
+ end
31
+
32
+ # This is a change from 0.9.5 -> 0.9.6; Category tree structure
33
+ # used to be stored in XML in the database, is now stored as a
34
+ # marshalled CategoryTree pure ruby object.
35
+ def legacy_category_tree_storage()
36
+ @cat_tree = CategoryTree.new()
37
+
38
+ build_cat = Proc.new do | elem, parent |
39
+ cat = @cat_tree.add( parent,
40
+ elem.attributes['dbid'].to_i,
41
+ elem.attributes['name'] )
42
+ elem.each_child { | kid | build_cat.call(kid, cat.dbid) }
43
+ end
44
+
45
+ xml = @dbh.get_first_value("SELECT xml FROM category_structure")
46
+ doc = REXML::Document.new(xml)
47
+ doc.root.each_child { | elem | build_cat.call(elem, nil) }
48
+
49
+ @dbh.transaction do
50
+ xml = @cat_tree.serialise
51
+ @dbh.execute("UPDATE category_structure SET xml = ? ", xml )
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,157 @@
1
+ require 'weft/coding'
2
+
3
+ module QDA
4
+ class Category
5
+ attr_reader :children, :codes
6
+ attr_accessor :dbid, :text, :name, :meta, :parent, :memo
7
+
8
+ def initialize(name, parent = nil, memo = '')
9
+ @name = name
10
+ @memo = memo
11
+ @parent = parent
12
+ @children = []
13
+ @codes = QDA::CodingTable.new()
14
+ @parent.add_child(self) if @parent
15
+ end
16
+
17
+ def add_child(child)
18
+ @children.push(child)
19
+ child.parent = self
20
+ end
21
+
22
+ def append_to(parent)
23
+ parent.add_child(self)
24
+ @parent = parent
25
+ end
26
+
27
+ def ==(other)
28
+ if other.respond_to?(:dbid)
29
+ return self.dbid == other.dbid
30
+ elsif other.nil?
31
+ return false
32
+ else
33
+ raise "No comparison of Category with #{other.inspect}"
34
+ end
35
+ end
36
+
37
+ # number of separate documents coded by this category
38
+ def num_of_docs
39
+ @codes.num_of_docs
40
+ end
41
+
42
+ def num_of_codes
43
+ @codes.num_of_codes
44
+ end
45
+
46
+ def num_of_chars
47
+ @codes.num_of_chars
48
+ end
49
+
50
+ def codetable=(codetable)
51
+ @codes = codetable
52
+ end
53
+
54
+ # returns a new category with codes representing all the text by
55
+ # +self+ and +other+.
56
+ def intersection(other, new_name = 'INTERSECTION',
57
+ new_parent = nil, new_memo = '')
58
+ result = Category.new(new_name, new_parent, new_memo)
59
+ @codes.each do | docid, codes |
60
+ if other.codes.include?[docid]
61
+ result.codes[docid] = codes.intersect( other.codes[docid] )
62
+ end
63
+ end
64
+ return result
65
+ end
66
+
67
+ # apply a code to a document; returns the new set of codes applied
68
+ # to that document. +docid+ should be the database id of the
69
+ # document to be retrieved (a string)
70
+ def code(docid, offset, length)
71
+ unless docid.nil? || docid.kind_of?(Fixnum)
72
+ raise ArgumentError,
73
+ "Docid should be an integer or nil, got #{docid.inspect}"
74
+ end
75
+ unless offset >= 0
76
+ raise ArgumentError, "Offset should be an integer >= 0, got #{offset}"
77
+ end
78
+ unless length > 0
79
+ raise ArgumentError, "Length should be an integer > 0, got #{length}"
80
+ end
81
+ new_code = QDA::Code.new(docid, offset, length)
82
+ @codes.add(new_code)
83
+ end
84
+
85
+ def uncode(docid, offset, length)
86
+ # raise "docid should be an integer > 0, is #{docid}" if docid == 0
87
+ c = Code.new(docid, offset, length)
88
+ @codes.subtract(c)
89
+ end
90
+
91
+ # return the vector set associated with +docid+
92
+ def [](docid)
93
+ @codes[docid]
94
+ end
95
+ end
96
+
97
+ # object representing a particular application of a code to a
98
+ # document. Not sure entirely what the use of this class is at the
99
+ # moment - have moved over to using Fragment in preference to save
100
+ # lots of round trips to the database when calculating intersections
101
+ # and so on. All the functionality originally developed in this class
102
+ # now moved to Module +Coding+, mixed-in here and by +Fragment+. That
103
+ # defines all the union, intersection and exclusion operators
104
+ # (+, %, -)
105
+ class Code
106
+ include Coding
107
+ attr_accessor :docid, :offset, :length
108
+ # Create a code applied to the document identified by +docid+,
109
+ # starting at point +offset+ and running for +length+ characters
110
+ def initialize(docid, offset, length)
111
+ unless docid.is_a?(Integer)
112
+ raise ArgumentError,
113
+ "Bad docid value #{docid} expected integer"
114
+ end
115
+ unless offset.is_a?(Integer) and offset >= 0
116
+ raise ArgumentError,
117
+ "Bad offset value #{offset} expected integer >= 0"
118
+ end
119
+ unless length.is_a?(Integer) and length > 0
120
+ raise ArgumentError,
121
+ "Bad length value #{length}, should be an integer > 0"
122
+ end
123
+ @docid = docid
124
+ @offset = offset
125
+ @length = length
126
+ end
127
+
128
+ # a Code is already it's own simplest representation, so never
129
+ # needs to be modified to work with another code-like object.
130
+ def coerce(other)
131
+ self
132
+ end
133
+
134
+ # A +code+ is equal to +other+ if applied to the same document
135
+ # starts at the same point and runs for the same number of characters
136
+ def ==(other)
137
+ @docid == other.docid and
138
+ @offset == other.offset and
139
+ @length == other.length
140
+ end
141
+
142
+ #
143
+ def <<(other)
144
+ @length += other.length
145
+ end
146
+
147
+ # returns a new code starting from +point+ within the whole
148
+ # document and +length+ characters long.
149
+ def [](point, length)
150
+ Code.new(@docid, point, length)
151
+ end
152
+
153
+ def inspect
154
+ "#<QDA::Code [#{@docid}]: #{@offset}-#{self.end}>"
155
+ end
156
+ end
157
+ end