weft-qda 0.9.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/weft.rb +21 -0
- data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
- data/lib/weft/application.rb +130 -0
- data/lib/weft/backend.rb +39 -0
- data/lib/weft/backend/marshal.rb +26 -0
- data/lib/weft/backend/mysql.rb +267 -0
- data/lib/weft/backend/n6.rb +366 -0
- data/lib/weft/backend/sqlite.rb +633 -0
- data/lib/weft/backend/sqlite/category_tree.rb +104 -0
- data/lib/weft/backend/sqlite/schema.rb +152 -0
- data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
- data/lib/weft/category.rb +157 -0
- data/lib/weft/coding.rb +355 -0
- data/lib/weft/document.rb +118 -0
- data/lib/weft/filters.rb +243 -0
- data/lib/weft/wxgui.rb +687 -0
- data/lib/weft/wxgui/category.xpm +26 -0
- data/lib/weft/wxgui/dialogs.rb +128 -0
- data/lib/weft/wxgui/document.xpm +25 -0
- data/lib/weft/wxgui/error_handler.rb +52 -0
- data/lib/weft/wxgui/inspectors.rb +361 -0
- data/lib/weft/wxgui/inspectors/category.rb +165 -0
- data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
- data/lib/weft/wxgui/inspectors/document.rb +139 -0
- data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
- data/lib/weft/wxgui/inspectors/script.rb +35 -0
- data/lib/weft/wxgui/inspectors/search.rb +265 -0
- data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
- data/lib/weft/wxgui/lang.rb +17 -0
- data/lib/weft/wxgui/lang/en.rb +45 -0
- data/lib/weft/wxgui/mondrian.xpm +44 -0
- data/lib/weft/wxgui/search.xpm +25 -0
- data/lib/weft/wxgui/sidebar.rb +498 -0
- data/lib/weft/wxgui/utilities.rb +148 -0
- data/lib/weft/wxgui/weft16.xpm +31 -0
- data/lib/weft/wxgui/workarea.rb +249 -0
- data/test/001-document.rb +196 -0
- data/test/002-category.rb +138 -0
- data/test/003-code.rb +370 -0
- data/test/004-application.rb +52 -0
- data/test/006-filters.rb +139 -0
- data/test/009a-backend_sqlite_basic.rb +280 -0
- data/test/009b-backend_sqlite_complex.rb +175 -0
- data/test/009c_backend_sqlite_bench.rb +81 -0
- data/test/010-backend_nudist.rb +5 -0
- data/test/all-tests.rb +1 -0
- data/test/manual-gui-script.txt +24 -0
- data/test/testdata/autocoding-test.txt +15 -0
- data/test/testdata/iso-8859-1.txt +5 -0
- data/test/testdata/sample_doc.txt +19 -0
- data/test/testdata/search_results.txt +1254 -0
- data/test/testdata/text1-dos-ascii.txt +2 -0
- data/test/testdata/text1-unix-utf8.txt +2 -0
- data/weft-qda.rb +28 -0
- metadata +96 -0
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'base64'
|
2
|
+
|
3
|
+
module QDA::Backend::SQLite
|
4
|
+
class CategoryTreeNode
|
5
|
+
attr_reader :dbid, :children
|
6
|
+
attr_accessor :parent, :name
|
7
|
+
protected :parent=
|
8
|
+
|
9
|
+
def initialize(parent, dbid, name)
|
10
|
+
@parent, @dbid, @name = parent, dbid, name
|
11
|
+
@children = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def add(dbid, name)
|
15
|
+
append( CategoryTreeNode.new(@dbid, dbid, name) )
|
16
|
+
end
|
17
|
+
|
18
|
+
def append(child)
|
19
|
+
child.parent = @dbid
|
20
|
+
@children.push(child)[-1]
|
21
|
+
end
|
22
|
+
|
23
|
+
def remove(target)
|
24
|
+
@children.delete_if { | c | c.dbid == target.dbid }
|
25
|
+
end
|
26
|
+
|
27
|
+
def like(other)
|
28
|
+
name =~ /^#{other}/i
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_s()
|
32
|
+
"<CategoryTreeNode #{dbid} '#{name}' parent=#{parent}>"
|
33
|
+
end
|
34
|
+
|
35
|
+
def descendants()
|
36
|
+
@children.map() { | c | [ c.dbid, c.descendants ] }.flatten
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class CategoryTree
|
41
|
+
attr_reader :roots
|
42
|
+
def initialize()
|
43
|
+
@table = {}
|
44
|
+
@roots = []
|
45
|
+
end
|
46
|
+
|
47
|
+
def CategoryTree.load(str)
|
48
|
+
Marshal.load( Base64::decode64(str) )
|
49
|
+
end
|
50
|
+
|
51
|
+
def [](id)
|
52
|
+
@table[id] or raise "Unknown id #{id.inspect}"
|
53
|
+
end
|
54
|
+
|
55
|
+
def find(path)
|
56
|
+
points = path.split('/')
|
57
|
+
scope = points[0].empty? ? @roots : @table.values
|
58
|
+
points.delete('')
|
59
|
+
while elem = points.shift
|
60
|
+
scope = scope.find_all { | x | x.like(elem) }
|
61
|
+
scope.map! { | x | x.children }.flatten! unless points.empty?
|
62
|
+
end
|
63
|
+
scope
|
64
|
+
end
|
65
|
+
|
66
|
+
def add(parentid, dbid, name)
|
67
|
+
if parentid
|
68
|
+
@table[dbid] = @table[parentid].add(dbid, name)
|
69
|
+
else
|
70
|
+
@roots.push( CategoryTreeNode.new(nil, dbid, name) )
|
71
|
+
@table[dbid] = @roots[-1]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def remove(dbid)
|
76
|
+
child = @table.delete(dbid)
|
77
|
+
@table[child.parent].remove(child)
|
78
|
+
end
|
79
|
+
|
80
|
+
def move(dbid, new_parent)
|
81
|
+
child = @table[dbid]
|
82
|
+
old_parent = child.parent
|
83
|
+
@table[new_parent].append(child)
|
84
|
+
@table[old_parent].remove(child)
|
85
|
+
end
|
86
|
+
|
87
|
+
def is_descendant?(ancestor, descendant)
|
88
|
+
@table[ancestor].descendants.include?(descendant)
|
89
|
+
end
|
90
|
+
|
91
|
+
def serialise()
|
92
|
+
Base64::encode64( Marshal.dump(self) )
|
93
|
+
end
|
94
|
+
|
95
|
+
def pretty_print(out = STDOUT)
|
96
|
+
pp = Proc.new do | item, depth |
|
97
|
+
out << "-" * depth
|
98
|
+
out << " #{item.name} (#{item.dbid})\n"
|
99
|
+
item.children.each { | c | pp.call(c, depth + 1) }
|
100
|
+
end
|
101
|
+
roots.each { | r | pp.call(r, 0) }
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module QDA::Backend::SQLite::Schema
|
2
|
+
SCHEMA_TABLES = <<'SCHEMA_TABLES'
|
3
|
+
CREATE TABLE category (
|
4
|
+
catid INTEGER PRIMARY KEY,
|
5
|
+
catname VARCHAR(255) DEFAULT NULL,
|
6
|
+
catdesc TEXT,
|
7
|
+
parent INTEGER,
|
8
|
+
created_date TIMESTAMP,
|
9
|
+
modified_date TIMESTAMP );
|
10
|
+
|
11
|
+
CREATE TABLE category_structure ( xml TEXT );
|
12
|
+
|
13
|
+
INSERT INTO category_structure VALUES ('');
|
14
|
+
|
15
|
+
CREATE TABLE code (
|
16
|
+
catid INT(11) default NULL,
|
17
|
+
docid INT(11) default NULL,
|
18
|
+
offset INT(11) default NULL,
|
19
|
+
length INT(11) default NULL );
|
20
|
+
|
21
|
+
CREATE TABLE docmeta (
|
22
|
+
docid INT(11) default NULL,
|
23
|
+
metaname VARCHAR(100) default NULL,
|
24
|
+
metavalue TEXT );
|
25
|
+
|
26
|
+
CREATE TABLE document (
|
27
|
+
docid INTEGER PRIMARY KEY,
|
28
|
+
doctitle VARCHAR(255) default NULL,
|
29
|
+
doctext TEXT,
|
30
|
+
docmemo TEXT,
|
31
|
+
created_date TIMESTAMP,
|
32
|
+
modified_date TIMESTAMP);
|
33
|
+
|
34
|
+
CREATE TABLE app_preference (
|
35
|
+
name VARCHAR(255) NOT NULL PRIMARY KEY ON CONFLICT REPLACE,
|
36
|
+
value TEXT);
|
37
|
+
SCHEMA_TABLES
|
38
|
+
|
39
|
+
SCHEMA_TRIGGERS = <<'SCHEMA_TRIGGERS'
|
40
|
+
CREATE TRIGGER insert_category
|
41
|
+
INSERT ON category
|
42
|
+
BEGIN
|
43
|
+
END;
|
44
|
+
|
45
|
+
CREATE TRIGGER delete_category
|
46
|
+
DELETE ON category
|
47
|
+
BEGIN
|
48
|
+
DELETE FROM code WHERE catid = old.catid;
|
49
|
+
END;
|
50
|
+
|
51
|
+
CREATE TRIGGER insert_doc
|
52
|
+
INSERT ON document
|
53
|
+
BEGIN
|
54
|
+
END;
|
55
|
+
|
56
|
+
CREATE TRIGGER delete_doc DELETE ON document
|
57
|
+
BEGIN
|
58
|
+
DELETE FROM docmeta WHERE docid = old.docid;
|
59
|
+
DELETE FROM code WHERE docid = old.docid;
|
60
|
+
END;
|
61
|
+
|
62
|
+
SCHEMA_TRIGGERS
|
63
|
+
|
64
|
+
# This is here because it's written, but it's not in use yet.
|
65
|
+
SCHEMA_UNDO = <<'SCHEMA_UNDO'
|
66
|
+
CREATE TABLE undoable (
|
67
|
+
actionid INTEGER PRIMARY KEY,
|
68
|
+
step INT(255) DEFAULT 0,
|
69
|
+
sql TEXT );
|
70
|
+
|
71
|
+
CREATE TRIGGER undo_insert_category INSERT ON category
|
72
|
+
BEGIN
|
73
|
+
INSERT INTO undoable VALUES(NULL, 0, 'DELETE FROM category ' ||
|
74
|
+
'WHERE docid = ' ||
|
75
|
+
LAST_INSERT_ROWID() );
|
76
|
+
END;
|
77
|
+
|
78
|
+
CREATE TRIGGER undo_delete_category DELETE ON category
|
79
|
+
BEGIN
|
80
|
+
INSERT INTO undoable VALUES(NULL, 0,
|
81
|
+
'INSERT INTO category
|
82
|
+
VALUES (' ||
|
83
|
+
old.catid || ', ' ||
|
84
|
+
QUOTE(old.catname) || ' ,' ||
|
85
|
+
QUOTE(old.catdesc) || ', ' ||
|
86
|
+
old.parent || ', ' ||
|
87
|
+
QUOTE(old.created_date) || ', ' ||
|
88
|
+
QUOTE(old.modified_date) || ')' );
|
89
|
+
|
90
|
+
CREATE TRIGGER undo_insert_document INSERTO ON document
|
91
|
+
BEGIN
|
92
|
+
END;
|
93
|
+
|
94
|
+
CREATE TRIGGER undo_delete_document DELETE ON document
|
95
|
+
BEGIN
|
96
|
+
END;
|
97
|
+
|
98
|
+
CREATE TRIGGER undo_insert_code INSERT ON code
|
99
|
+
BEGIN
|
100
|
+
END;
|
101
|
+
|
102
|
+
CREATE TRIGGER undo_delete_code DELETE ON code
|
103
|
+
BEGIN
|
104
|
+
END;
|
105
|
+
|
106
|
+
CREATE TRIGGER undo_insert_docmeta INSERT ON docmeta
|
107
|
+
BEGIN
|
108
|
+
END;
|
109
|
+
|
110
|
+
CREATE TRIGGER undo_delete_docmeta DELETE ON docmeta
|
111
|
+
BEGIN
|
112
|
+
END;
|
113
|
+
|
114
|
+
CREATE TRIGGER undo_update_category_structure
|
115
|
+
UPDATE ON category_structure
|
116
|
+
BEGIN
|
117
|
+
END;
|
118
|
+
SCHEMA_UNDO
|
119
|
+
|
120
|
+
SCHEMA_INDEXES = <<'SCHEMA_INDEXES'
|
121
|
+
|
122
|
+
CREATE INDEX document_idx
|
123
|
+
ON document(doctitle);
|
124
|
+
|
125
|
+
CREATE INDEX code_idx
|
126
|
+
ON code(docid, catid);
|
127
|
+
|
128
|
+
CREATE INDEX docmeta_idx
|
129
|
+
ON docmeta(metaname, docid);
|
130
|
+
|
131
|
+
SCHEMA_INDEXES
|
132
|
+
|
133
|
+
RINDEX_SEARCH_MODEL_QUERY = <<'RINDEX_SEARCH_MODEL_QUERY'
|
134
|
+
SELECT document.docid AS docid, document.doctitle AS doctitle,
|
135
|
+
MAX( 0, code.offset - ?)
|
136
|
+
AS start_at,
|
137
|
+
SUBSTR(document.doctext,
|
138
|
+
MAX( 0, code.offset - ?) + 1,
|
139
|
+
MIN( code.length + ( ? * 2 ),
|
140
|
+
LENGTH(document.doctext) - MAX(1, code.offset - ?) - 1 ) )
|
141
|
+
AS snip
|
142
|
+
FROM document, code
|
143
|
+
WHERE code.docid = document.docid
|
144
|
+
AND code.catid IN (
|
145
|
+
SELECT catid
|
146
|
+
FROM category
|
147
|
+
WHERE parent = ?
|
148
|
+
AND LOWER(category.catname) LIKE ?)
|
149
|
+
ORDER BY code.catid, code.offset
|
150
|
+
RINDEX_SEARCH_MODEL_QUERY
|
151
|
+
|
152
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
|
3
|
+
module QDA::Backend::SQLite
|
4
|
+
|
5
|
+
# This module provides support for opening projects created in older
|
6
|
+
# versions of Weft.
|
7
|
+
module Upgradeable
|
8
|
+
# This is called when a project is opened. It checks whether any
|
9
|
+
# changes need to be made to the storage format.
|
10
|
+
def do_version_format_upgrading()
|
11
|
+
version = get_preference('LastModifiedVersion') ||
|
12
|
+
get_preference('CreateVersion')
|
13
|
+
|
14
|
+
# don't do anything to those created in testing versions, just
|
15
|
+
# mark as last-opened in this version.
|
16
|
+
if version == QDA::Version.default_version()
|
17
|
+
save_preference('LastModifiedVersion', WEFT_VERSION)
|
18
|
+
end
|
19
|
+
|
20
|
+
# 0.9.4 and earlier - add indexes to tables
|
21
|
+
if version.nil?
|
22
|
+
@dbh.execute_batch(QDA::Backend::SQLite::Schema::SCHEMA_INDEXES)
|
23
|
+
end
|
24
|
+
|
25
|
+
# 0.9.5 and earlier - upgrade the category tree storage format
|
26
|
+
if version.nil? || version == '0.9.5'
|
27
|
+
legacy_category_tree_storage()
|
28
|
+
save_preference('LastModifiedVersion', WEFT_VERSION)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# This is a change from 0.9.5 -> 0.9.6; Category tree structure
|
33
|
+
# used to be stored in XML in the database, is now stored as a
|
34
|
+
# marshalled CategoryTree pure ruby object.
|
35
|
+
def legacy_category_tree_storage()
|
36
|
+
@cat_tree = CategoryTree.new()
|
37
|
+
|
38
|
+
build_cat = Proc.new do | elem, parent |
|
39
|
+
cat = @cat_tree.add( parent,
|
40
|
+
elem.attributes['dbid'].to_i,
|
41
|
+
elem.attributes['name'] )
|
42
|
+
elem.each_child { | kid | build_cat.call(kid, cat.dbid) }
|
43
|
+
end
|
44
|
+
|
45
|
+
xml = @dbh.get_first_value("SELECT xml FROM category_structure")
|
46
|
+
doc = REXML::Document.new(xml)
|
47
|
+
doc.root.each_child { | elem | build_cat.call(elem, nil) }
|
48
|
+
|
49
|
+
@dbh.transaction do
|
50
|
+
xml = @cat_tree.serialise
|
51
|
+
@dbh.execute("UPDATE category_structure SET xml = ? ", xml )
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
require 'weft/coding'
|
2
|
+
|
3
|
+
module QDA
|
4
|
+
class Category
|
5
|
+
attr_reader :children, :codes
|
6
|
+
attr_accessor :dbid, :text, :name, :meta, :parent, :memo
|
7
|
+
|
8
|
+
def initialize(name, parent = nil, memo = '')
|
9
|
+
@name = name
|
10
|
+
@memo = memo
|
11
|
+
@parent = parent
|
12
|
+
@children = []
|
13
|
+
@codes = QDA::CodingTable.new()
|
14
|
+
@parent.add_child(self) if @parent
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_child(child)
|
18
|
+
@children.push(child)
|
19
|
+
child.parent = self
|
20
|
+
end
|
21
|
+
|
22
|
+
def append_to(parent)
|
23
|
+
parent.add_child(self)
|
24
|
+
@parent = parent
|
25
|
+
end
|
26
|
+
|
27
|
+
def ==(other)
|
28
|
+
if other.respond_to?(:dbid)
|
29
|
+
return self.dbid == other.dbid
|
30
|
+
elsif other.nil?
|
31
|
+
return false
|
32
|
+
else
|
33
|
+
raise "No comparison of Category with #{other.inspect}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# number of separate documents coded by this category
|
38
|
+
def num_of_docs
|
39
|
+
@codes.num_of_docs
|
40
|
+
end
|
41
|
+
|
42
|
+
def num_of_codes
|
43
|
+
@codes.num_of_codes
|
44
|
+
end
|
45
|
+
|
46
|
+
def num_of_chars
|
47
|
+
@codes.num_of_chars
|
48
|
+
end
|
49
|
+
|
50
|
+
def codetable=(codetable)
|
51
|
+
@codes = codetable
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns a new category with codes representing all the text by
|
55
|
+
# +self+ and +other+.
|
56
|
+
def intersection(other, new_name = 'INTERSECTION',
|
57
|
+
new_parent = nil, new_memo = '')
|
58
|
+
result = Category.new(new_name, new_parent, new_memo)
|
59
|
+
@codes.each do | docid, codes |
|
60
|
+
if other.codes.include?[docid]
|
61
|
+
result.codes[docid] = codes.intersect( other.codes[docid] )
|
62
|
+
end
|
63
|
+
end
|
64
|
+
return result
|
65
|
+
end
|
66
|
+
|
67
|
+
# apply a code to a document; returns the new set of codes applied
|
68
|
+
# to that document. +docid+ should be the database id of the
|
69
|
+
# document to be retrieved (a string)
|
70
|
+
def code(docid, offset, length)
|
71
|
+
unless docid.nil? || docid.kind_of?(Fixnum)
|
72
|
+
raise ArgumentError,
|
73
|
+
"Docid should be an integer or nil, got #{docid.inspect}"
|
74
|
+
end
|
75
|
+
unless offset >= 0
|
76
|
+
raise ArgumentError, "Offset should be an integer >= 0, got #{offset}"
|
77
|
+
end
|
78
|
+
unless length > 0
|
79
|
+
raise ArgumentError, "Length should be an integer > 0, got #{length}"
|
80
|
+
end
|
81
|
+
new_code = QDA::Code.new(docid, offset, length)
|
82
|
+
@codes.add(new_code)
|
83
|
+
end
|
84
|
+
|
85
|
+
def uncode(docid, offset, length)
|
86
|
+
# raise "docid should be an integer > 0, is #{docid}" if docid == 0
|
87
|
+
c = Code.new(docid, offset, length)
|
88
|
+
@codes.subtract(c)
|
89
|
+
end
|
90
|
+
|
91
|
+
# return the vector set associated with +docid+
|
92
|
+
def [](docid)
|
93
|
+
@codes[docid]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# object representing a particular application of a code to a
|
98
|
+
# document. Not sure entirely what the use of this class is at the
|
99
|
+
# moment - have moved over to using Fragment in preference to save
|
100
|
+
# lots of round trips to the database when calculating intersections
|
101
|
+
# and so on. All the functionality originally developed in this class
|
102
|
+
# now moved to Module +Coding+, mixed-in here and by +Fragment+. That
|
103
|
+
# defines all the union, intersection and exclusion operators
|
104
|
+
# (+, %, -)
|
105
|
+
class Code
|
106
|
+
include Coding
|
107
|
+
attr_accessor :docid, :offset, :length
|
108
|
+
# Create a code applied to the document identified by +docid+,
|
109
|
+
# starting at point +offset+ and running for +length+ characters
|
110
|
+
def initialize(docid, offset, length)
|
111
|
+
unless docid.is_a?(Integer)
|
112
|
+
raise ArgumentError,
|
113
|
+
"Bad docid value #{docid} expected integer"
|
114
|
+
end
|
115
|
+
unless offset.is_a?(Integer) and offset >= 0
|
116
|
+
raise ArgumentError,
|
117
|
+
"Bad offset value #{offset} expected integer >= 0"
|
118
|
+
end
|
119
|
+
unless length.is_a?(Integer) and length > 0
|
120
|
+
raise ArgumentError,
|
121
|
+
"Bad length value #{length}, should be an integer > 0"
|
122
|
+
end
|
123
|
+
@docid = docid
|
124
|
+
@offset = offset
|
125
|
+
@length = length
|
126
|
+
end
|
127
|
+
|
128
|
+
# a Code is already it's own simplest representation, so never
|
129
|
+
# needs to be modified to work with another code-like object.
|
130
|
+
def coerce(other)
|
131
|
+
self
|
132
|
+
end
|
133
|
+
|
134
|
+
# A +code+ is equal to +other+ if applied to the same document
|
135
|
+
# starts at the same point and runs for the same number of characters
|
136
|
+
def ==(other)
|
137
|
+
@docid == other.docid and
|
138
|
+
@offset == other.offset and
|
139
|
+
@length == other.length
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
def <<(other)
|
144
|
+
@length += other.length
|
145
|
+
end
|
146
|
+
|
147
|
+
# returns a new code starting from +point+ within the whole
|
148
|
+
# document and +length+ characters long.
|
149
|
+
def [](point, length)
|
150
|
+
Code.new(@docid, point, length)
|
151
|
+
end
|
152
|
+
|
153
|
+
def inspect
|
154
|
+
"#<QDA::Code [#{@docid}]: #{@offset}-#{self.end}>"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|