weft-qda 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/weft.rb +21 -0
- data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
- data/lib/weft/application.rb +130 -0
- data/lib/weft/backend.rb +39 -0
- data/lib/weft/backend/marshal.rb +26 -0
- data/lib/weft/backend/mysql.rb +267 -0
- data/lib/weft/backend/n6.rb +366 -0
- data/lib/weft/backend/sqlite.rb +633 -0
- data/lib/weft/backend/sqlite/category_tree.rb +104 -0
- data/lib/weft/backend/sqlite/schema.rb +152 -0
- data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
- data/lib/weft/category.rb +157 -0
- data/lib/weft/coding.rb +355 -0
- data/lib/weft/document.rb +118 -0
- data/lib/weft/filters.rb +243 -0
- data/lib/weft/wxgui.rb +687 -0
- data/lib/weft/wxgui/category.xpm +26 -0
- data/lib/weft/wxgui/dialogs.rb +128 -0
- data/lib/weft/wxgui/document.xpm +25 -0
- data/lib/weft/wxgui/error_handler.rb +52 -0
- data/lib/weft/wxgui/inspectors.rb +361 -0
- data/lib/weft/wxgui/inspectors/category.rb +165 -0
- data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
- data/lib/weft/wxgui/inspectors/document.rb +139 -0
- data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
- data/lib/weft/wxgui/inspectors/script.rb +35 -0
- data/lib/weft/wxgui/inspectors/search.rb +265 -0
- data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
- data/lib/weft/wxgui/lang.rb +17 -0
- data/lib/weft/wxgui/lang/en.rb +45 -0
- data/lib/weft/wxgui/mondrian.xpm +44 -0
- data/lib/weft/wxgui/search.xpm +25 -0
- data/lib/weft/wxgui/sidebar.rb +498 -0
- data/lib/weft/wxgui/utilities.rb +148 -0
- data/lib/weft/wxgui/weft16.xpm +31 -0
- data/lib/weft/wxgui/workarea.rb +249 -0
- data/test/001-document.rb +196 -0
- data/test/002-category.rb +138 -0
- data/test/003-code.rb +370 -0
- data/test/004-application.rb +52 -0
- data/test/006-filters.rb +139 -0
- data/test/009a-backend_sqlite_basic.rb +280 -0
- data/test/009b-backend_sqlite_complex.rb +175 -0
- data/test/009c_backend_sqlite_bench.rb +81 -0
- data/test/010-backend_nudist.rb +5 -0
- data/test/all-tests.rb +1 -0
- data/test/manual-gui-script.txt +24 -0
- data/test/testdata/autocoding-test.txt +15 -0
- data/test/testdata/iso-8859-1.txt +5 -0
- data/test/testdata/sample_doc.txt +19 -0
- data/test/testdata/search_results.txt +1254 -0
- data/test/testdata/text1-dos-ascii.txt +2 -0
- data/test/testdata/text1-unix-utf8.txt +2 -0
- data/weft-qda.rb +28 -0
- metadata +96 -0
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'base64'
|
2
|
+
|
3
|
+
module QDA::Backend::SQLite
|
4
|
+
class CategoryTreeNode
|
5
|
+
attr_reader :dbid, :children
|
6
|
+
attr_accessor :parent, :name
|
7
|
+
protected :parent=
|
8
|
+
|
9
|
+
def initialize(parent, dbid, name)
|
10
|
+
@parent, @dbid, @name = parent, dbid, name
|
11
|
+
@children = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def add(dbid, name)
|
15
|
+
append( CategoryTreeNode.new(@dbid, dbid, name) )
|
16
|
+
end
|
17
|
+
|
18
|
+
def append(child)
|
19
|
+
child.parent = @dbid
|
20
|
+
@children.push(child)[-1]
|
21
|
+
end
|
22
|
+
|
23
|
+
def remove(target)
|
24
|
+
@children.delete_if { | c | c.dbid == target.dbid }
|
25
|
+
end
|
26
|
+
|
27
|
+
def like(other)
|
28
|
+
name =~ /^#{other}/i
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_s()
|
32
|
+
"<CategoryTreeNode #{dbid} '#{name}' parent=#{parent}>"
|
33
|
+
end
|
34
|
+
|
35
|
+
def descendants()
|
36
|
+
@children.map() { | c | [ c.dbid, c.descendants ] }.flatten
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class CategoryTree
|
41
|
+
attr_reader :roots
|
42
|
+
def initialize()
|
43
|
+
@table = {}
|
44
|
+
@roots = []
|
45
|
+
end
|
46
|
+
|
47
|
+
def CategoryTree.load(str)
|
48
|
+
Marshal.load( Base64::decode64(str) )
|
49
|
+
end
|
50
|
+
|
51
|
+
def [](id)
|
52
|
+
@table[id] or raise "Unknown id #{id.inspect}"
|
53
|
+
end
|
54
|
+
|
55
|
+
def find(path)
|
56
|
+
points = path.split('/')
|
57
|
+
scope = points[0].empty? ? @roots : @table.values
|
58
|
+
points.delete('')
|
59
|
+
while elem = points.shift
|
60
|
+
scope = scope.find_all { | x | x.like(elem) }
|
61
|
+
scope.map! { | x | x.children }.flatten! unless points.empty?
|
62
|
+
end
|
63
|
+
scope
|
64
|
+
end
|
65
|
+
|
66
|
+
def add(parentid, dbid, name)
|
67
|
+
if parentid
|
68
|
+
@table[dbid] = @table[parentid].add(dbid, name)
|
69
|
+
else
|
70
|
+
@roots.push( CategoryTreeNode.new(nil, dbid, name) )
|
71
|
+
@table[dbid] = @roots[-1]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def remove(dbid)
|
76
|
+
child = @table.delete(dbid)
|
77
|
+
@table[child.parent].remove(child)
|
78
|
+
end
|
79
|
+
|
80
|
+
def move(dbid, new_parent)
|
81
|
+
child = @table[dbid]
|
82
|
+
old_parent = child.parent
|
83
|
+
@table[new_parent].append(child)
|
84
|
+
@table[old_parent].remove(child)
|
85
|
+
end
|
86
|
+
|
87
|
+
def is_descendant?(ancestor, descendant)
|
88
|
+
@table[ancestor].descendants.include?(descendant)
|
89
|
+
end
|
90
|
+
|
91
|
+
def serialise()
|
92
|
+
Base64::encode64( Marshal.dump(self) )
|
93
|
+
end
|
94
|
+
|
95
|
+
def pretty_print(out = STDOUT)
|
96
|
+
pp = Proc.new do | item, depth |
|
97
|
+
out << "-" * depth
|
98
|
+
out << " #{item.name} (#{item.dbid})\n"
|
99
|
+
item.children.each { | c | pp.call(c, depth + 1) }
|
100
|
+
end
|
101
|
+
roots.each { | r | pp.call(r, 0) }
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module QDA::Backend::SQLite::Schema
|
2
|
+
SCHEMA_TABLES = <<'SCHEMA_TABLES'
|
3
|
+
CREATE TABLE category (
|
4
|
+
catid INTEGER PRIMARY KEY,
|
5
|
+
catname VARCHAR(255) DEFAULT NULL,
|
6
|
+
catdesc TEXT,
|
7
|
+
parent INTEGER,
|
8
|
+
created_date TIMESTAMP,
|
9
|
+
modified_date TIMESTAMP );
|
10
|
+
|
11
|
+
CREATE TABLE category_structure ( xml TEXT );
|
12
|
+
|
13
|
+
INSERT INTO category_structure VALUES ('');
|
14
|
+
|
15
|
+
CREATE TABLE code (
|
16
|
+
catid INT(11) default NULL,
|
17
|
+
docid INT(11) default NULL,
|
18
|
+
offset INT(11) default NULL,
|
19
|
+
length INT(11) default NULL );
|
20
|
+
|
21
|
+
CREATE TABLE docmeta (
|
22
|
+
docid INT(11) default NULL,
|
23
|
+
metaname VARCHAR(100) default NULL,
|
24
|
+
metavalue TEXT );
|
25
|
+
|
26
|
+
CREATE TABLE document (
|
27
|
+
docid INTEGER PRIMARY KEY,
|
28
|
+
doctitle VARCHAR(255) default NULL,
|
29
|
+
doctext TEXT,
|
30
|
+
docmemo TEXT,
|
31
|
+
created_date TIMESTAMP,
|
32
|
+
modified_date TIMESTAMP);
|
33
|
+
|
34
|
+
CREATE TABLE app_preference (
|
35
|
+
name VARCHAR(255) NOT NULL PRIMARY KEY ON CONFLICT REPLACE,
|
36
|
+
value TEXT);
|
37
|
+
SCHEMA_TABLES
|
38
|
+
|
39
|
+
SCHEMA_TRIGGERS = <<'SCHEMA_TRIGGERS'
|
40
|
+
CREATE TRIGGER insert_category
|
41
|
+
INSERT ON category
|
42
|
+
BEGIN
|
43
|
+
END;
|
44
|
+
|
45
|
+
CREATE TRIGGER delete_category
|
46
|
+
DELETE ON category
|
47
|
+
BEGIN
|
48
|
+
DELETE FROM code WHERE catid = old.catid;
|
49
|
+
END;
|
50
|
+
|
51
|
+
CREATE TRIGGER insert_doc
|
52
|
+
INSERT ON document
|
53
|
+
BEGIN
|
54
|
+
END;
|
55
|
+
|
56
|
+
CREATE TRIGGER delete_doc DELETE ON document
|
57
|
+
BEGIN
|
58
|
+
DELETE FROM docmeta WHERE docid = old.docid;
|
59
|
+
DELETE FROM code WHERE docid = old.docid;
|
60
|
+
END;
|
61
|
+
|
62
|
+
SCHEMA_TRIGGERS
|
63
|
+
|
64
|
+
# This is here because it's written, but it's not in use yet.
|
65
|
+
SCHEMA_UNDO = <<'SCHEMA_UNDO'
|
66
|
+
CREATE TABLE undoable (
|
67
|
+
actionid INTEGER PRIMARY KEY,
|
68
|
+
step INT(255) DEFAULT 0,
|
69
|
+
sql TEXT );
|
70
|
+
|
71
|
+
CREATE TRIGGER undo_insert_category INSERT ON category
|
72
|
+
BEGIN
|
73
|
+
INSERT INTO undoable VALUES(NULL, 0, 'DELETE FROM category ' ||
|
74
|
+
'WHERE docid = ' ||
|
75
|
+
LAST_INSERT_ROWID() );
|
76
|
+
END;
|
77
|
+
|
78
|
+
CREATE TRIGGER undo_delete_category DELETE ON category
|
79
|
+
BEGIN
|
80
|
+
INSERT INTO undoable VALUES(NULL, 0,
|
81
|
+
'INSERT INTO category
|
82
|
+
VALUES (' ||
|
83
|
+
old.catid || ', ' ||
|
84
|
+
QUOTE(old.catname) || ' ,' ||
|
85
|
+
QUOTE(old.catdesc) || ', ' ||
|
86
|
+
old.parent || ', ' ||
|
87
|
+
QUOTE(old.created_date) || ', ' ||
|
88
|
+
QUOTE(old.modified_date) || ')' );
|
89
|
+
|
90
|
+
CREATE TRIGGER undo_insert_document INSERTO ON document
|
91
|
+
BEGIN
|
92
|
+
END;
|
93
|
+
|
94
|
+
CREATE TRIGGER undo_delete_document DELETE ON document
|
95
|
+
BEGIN
|
96
|
+
END;
|
97
|
+
|
98
|
+
CREATE TRIGGER undo_insert_code INSERT ON code
|
99
|
+
BEGIN
|
100
|
+
END;
|
101
|
+
|
102
|
+
CREATE TRIGGER undo_delete_code DELETE ON code
|
103
|
+
BEGIN
|
104
|
+
END;
|
105
|
+
|
106
|
+
CREATE TRIGGER undo_insert_docmeta INSERT ON docmeta
|
107
|
+
BEGIN
|
108
|
+
END;
|
109
|
+
|
110
|
+
CREATE TRIGGER undo_delete_docmeta DELETE ON docmeta
|
111
|
+
BEGIN
|
112
|
+
END;
|
113
|
+
|
114
|
+
CREATE TRIGGER undo_update_category_structure
|
115
|
+
UPDATE ON category_structure
|
116
|
+
BEGIN
|
117
|
+
END;
|
118
|
+
SCHEMA_UNDO
|
119
|
+
|
120
|
+
SCHEMA_INDEXES = <<'SCHEMA_INDEXES'
|
121
|
+
|
122
|
+
CREATE INDEX document_idx
|
123
|
+
ON document(doctitle);
|
124
|
+
|
125
|
+
CREATE INDEX code_idx
|
126
|
+
ON code(docid, catid);
|
127
|
+
|
128
|
+
CREATE INDEX docmeta_idx
|
129
|
+
ON docmeta(metaname, docid);
|
130
|
+
|
131
|
+
SCHEMA_INDEXES
|
132
|
+
|
133
|
+
RINDEX_SEARCH_MODEL_QUERY = <<'RINDEX_SEARCH_MODEL_QUERY'
|
134
|
+
SELECT document.docid AS docid, document.doctitle AS doctitle,
|
135
|
+
MAX( 0, code.offset - ?)
|
136
|
+
AS start_at,
|
137
|
+
SUBSTR(document.doctext,
|
138
|
+
MAX( 0, code.offset - ?) + 1,
|
139
|
+
MIN( code.length + ( ? * 2 ),
|
140
|
+
LENGTH(document.doctext) - MAX(1, code.offset - ?) - 1 ) )
|
141
|
+
AS snip
|
142
|
+
FROM document, code
|
143
|
+
WHERE code.docid = document.docid
|
144
|
+
AND code.catid IN (
|
145
|
+
SELECT catid
|
146
|
+
FROM category
|
147
|
+
WHERE parent = ?
|
148
|
+
AND LOWER(category.catname) LIKE ?)
|
149
|
+
ORDER BY code.catid, code.offset
|
150
|
+
RINDEX_SEARCH_MODEL_QUERY
|
151
|
+
|
152
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
|
3
|
+
module QDA::Backend::SQLite
|
4
|
+
|
5
|
+
# This module provides support for opening projects created in older
|
6
|
+
# versions of Weft.
|
7
|
+
module Upgradeable
|
8
|
+
# This is called when a project is opened. It checks whether any
|
9
|
+
# changes need to be made to the storage format.
|
10
|
+
def do_version_format_upgrading()
|
11
|
+
version = get_preference('LastModifiedVersion') ||
|
12
|
+
get_preference('CreateVersion')
|
13
|
+
|
14
|
+
# don't do anything to those created in testing versions, just
|
15
|
+
# mark as last-opened in this version.
|
16
|
+
if version == QDA::Version.default_version()
|
17
|
+
save_preference('LastModifiedVersion', WEFT_VERSION)
|
18
|
+
end
|
19
|
+
|
20
|
+
# 0.9.4 and earlier - add indexes to tables
|
21
|
+
if version.nil?
|
22
|
+
@dbh.execute_batch(QDA::Backend::SQLite::Schema::SCHEMA_INDEXES)
|
23
|
+
end
|
24
|
+
|
25
|
+
# 0.9.5 and earlier - upgrade the category tree storage format
|
26
|
+
if version.nil? || version == '0.9.5'
|
27
|
+
legacy_category_tree_storage()
|
28
|
+
save_preference('LastModifiedVersion', WEFT_VERSION)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# This is a change from 0.9.5 -> 0.9.6; Category tree structure
|
33
|
+
# used to be stored in XML in the database, is now stored as a
|
34
|
+
# marshalled CategoryTree pure ruby object.
|
35
|
+
def legacy_category_tree_storage()
|
36
|
+
@cat_tree = CategoryTree.new()
|
37
|
+
|
38
|
+
build_cat = Proc.new do | elem, parent |
|
39
|
+
cat = @cat_tree.add( parent,
|
40
|
+
elem.attributes['dbid'].to_i,
|
41
|
+
elem.attributes['name'] )
|
42
|
+
elem.each_child { | kid | build_cat.call(kid, cat.dbid) }
|
43
|
+
end
|
44
|
+
|
45
|
+
xml = @dbh.get_first_value("SELECT xml FROM category_structure")
|
46
|
+
doc = REXML::Document.new(xml)
|
47
|
+
doc.root.each_child { | elem | build_cat.call(elem, nil) }
|
48
|
+
|
49
|
+
@dbh.transaction do
|
50
|
+
xml = @cat_tree.serialise
|
51
|
+
@dbh.execute("UPDATE category_structure SET xml = ? ", xml )
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
require 'weft/coding'
|
2
|
+
|
3
|
+
module QDA
|
4
|
+
class Category
|
5
|
+
attr_reader :children, :codes
|
6
|
+
attr_accessor :dbid, :text, :name, :meta, :parent, :memo
|
7
|
+
|
8
|
+
def initialize(name, parent = nil, memo = '')
|
9
|
+
@name = name
|
10
|
+
@memo = memo
|
11
|
+
@parent = parent
|
12
|
+
@children = []
|
13
|
+
@codes = QDA::CodingTable.new()
|
14
|
+
@parent.add_child(self) if @parent
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_child(child)
|
18
|
+
@children.push(child)
|
19
|
+
child.parent = self
|
20
|
+
end
|
21
|
+
|
22
|
+
def append_to(parent)
|
23
|
+
parent.add_child(self)
|
24
|
+
@parent = parent
|
25
|
+
end
|
26
|
+
|
27
|
+
def ==(other)
|
28
|
+
if other.respond_to?(:dbid)
|
29
|
+
return self.dbid == other.dbid
|
30
|
+
elsif other.nil?
|
31
|
+
return false
|
32
|
+
else
|
33
|
+
raise "No comparison of Category with #{other.inspect}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# number of separate documents coded by this category
|
38
|
+
def num_of_docs
|
39
|
+
@codes.num_of_docs
|
40
|
+
end
|
41
|
+
|
42
|
+
def num_of_codes
|
43
|
+
@codes.num_of_codes
|
44
|
+
end
|
45
|
+
|
46
|
+
def num_of_chars
|
47
|
+
@codes.num_of_chars
|
48
|
+
end
|
49
|
+
|
50
|
+
def codetable=(codetable)
|
51
|
+
@codes = codetable
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns a new category with codes representing all the text by
|
55
|
+
# +self+ and +other+.
|
56
|
+
def intersection(other, new_name = 'INTERSECTION',
|
57
|
+
new_parent = nil, new_memo = '')
|
58
|
+
result = Category.new(new_name, new_parent, new_memo)
|
59
|
+
@codes.each do | docid, codes |
|
60
|
+
if other.codes.include?[docid]
|
61
|
+
result.codes[docid] = codes.intersect( other.codes[docid] )
|
62
|
+
end
|
63
|
+
end
|
64
|
+
return result
|
65
|
+
end
|
66
|
+
|
67
|
+
# apply a code to a document; returns the new set of codes applied
|
68
|
+
# to that document. +docid+ should be the database id of the
|
69
|
+
# document to be retrieved (a string)
|
70
|
+
def code(docid, offset, length)
|
71
|
+
unless docid.nil? || docid.kind_of?(Fixnum)
|
72
|
+
raise ArgumentError,
|
73
|
+
"Docid should be an integer or nil, got #{docid.inspect}"
|
74
|
+
end
|
75
|
+
unless offset >= 0
|
76
|
+
raise ArgumentError, "Offset should be an integer >= 0, got #{offset}"
|
77
|
+
end
|
78
|
+
unless length > 0
|
79
|
+
raise ArgumentError, "Length should be an integer > 0, got #{length}"
|
80
|
+
end
|
81
|
+
new_code = QDA::Code.new(docid, offset, length)
|
82
|
+
@codes.add(new_code)
|
83
|
+
end
|
84
|
+
|
85
|
+
def uncode(docid, offset, length)
|
86
|
+
# raise "docid should be an integer > 0, is #{docid}" if docid == 0
|
87
|
+
c = Code.new(docid, offset, length)
|
88
|
+
@codes.subtract(c)
|
89
|
+
end
|
90
|
+
|
91
|
+
# return the vector set associated with +docid+
|
92
|
+
def [](docid)
|
93
|
+
@codes[docid]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# object representing a particular application of a code to a
|
98
|
+
# document. Not sure entirely what the use of this class is at the
|
99
|
+
# moment - have moved over to using Fragment in preference to save
|
100
|
+
# lots of round trips to the database when calculating intersections
|
101
|
+
# and so on. All the functionality originally developed in this class
|
102
|
+
# now moved to Module +Coding+, mixed-in here and by +Fragment+. That
|
103
|
+
# defines all the union, intersection and exclusion operators
|
104
|
+
# (+, %, -)
|
105
|
+
class Code
|
106
|
+
include Coding
|
107
|
+
attr_accessor :docid, :offset, :length
|
108
|
+
# Create a code applied to the document identified by +docid+,
|
109
|
+
# starting at point +offset+ and running for +length+ characters
|
110
|
+
def initialize(docid, offset, length)
|
111
|
+
unless docid.is_a?(Integer)
|
112
|
+
raise ArgumentError,
|
113
|
+
"Bad docid value #{docid} expected integer"
|
114
|
+
end
|
115
|
+
unless offset.is_a?(Integer) and offset >= 0
|
116
|
+
raise ArgumentError,
|
117
|
+
"Bad offset value #{offset} expected integer >= 0"
|
118
|
+
end
|
119
|
+
unless length.is_a?(Integer) and length > 0
|
120
|
+
raise ArgumentError,
|
121
|
+
"Bad length value #{length}, should be an integer > 0"
|
122
|
+
end
|
123
|
+
@docid = docid
|
124
|
+
@offset = offset
|
125
|
+
@length = length
|
126
|
+
end
|
127
|
+
|
128
|
+
# a Code is already it's own simplest representation, so never
|
129
|
+
# needs to be modified to work with another code-like object.
|
130
|
+
def coerce(other)
|
131
|
+
self
|
132
|
+
end
|
133
|
+
|
134
|
+
# A +code+ is equal to +other+ if applied to the same document
|
135
|
+
# starts at the same point and runs for the same number of characters
|
136
|
+
def ==(other)
|
137
|
+
@docid == other.docid and
|
138
|
+
@offset == other.offset and
|
139
|
+
@length == other.length
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
def <<(other)
|
144
|
+
@length += other.length
|
145
|
+
end
|
146
|
+
|
147
|
+
# returns a new code starting from +point+ within the whole
|
148
|
+
# document and +length+ characters long.
|
149
|
+
def [](point, length)
|
150
|
+
Code.new(@docid, point, length)
|
151
|
+
end
|
152
|
+
|
153
|
+
def inspect
|
154
|
+
"#<QDA::Code [#{@docid}]: #{@offset}-#{self.end}>"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|