weft-qda 0.9.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/lib/weft.rb +21 -0
  2. data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
  3. data/lib/weft/application.rb +130 -0
  4. data/lib/weft/backend.rb +39 -0
  5. data/lib/weft/backend/marshal.rb +26 -0
  6. data/lib/weft/backend/mysql.rb +267 -0
  7. data/lib/weft/backend/n6.rb +366 -0
  8. data/lib/weft/backend/sqlite.rb +633 -0
  9. data/lib/weft/backend/sqlite/category_tree.rb +104 -0
  10. data/lib/weft/backend/sqlite/schema.rb +152 -0
  11. data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
  12. data/lib/weft/category.rb +157 -0
  13. data/lib/weft/coding.rb +355 -0
  14. data/lib/weft/document.rb +118 -0
  15. data/lib/weft/filters.rb +243 -0
  16. data/lib/weft/wxgui.rb +687 -0
  17. data/lib/weft/wxgui/category.xpm +26 -0
  18. data/lib/weft/wxgui/dialogs.rb +128 -0
  19. data/lib/weft/wxgui/document.xpm +25 -0
  20. data/lib/weft/wxgui/error_handler.rb +52 -0
  21. data/lib/weft/wxgui/inspectors.rb +361 -0
  22. data/lib/weft/wxgui/inspectors/category.rb +165 -0
  23. data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
  24. data/lib/weft/wxgui/inspectors/document.rb +139 -0
  25. data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
  26. data/lib/weft/wxgui/inspectors/script.rb +35 -0
  27. data/lib/weft/wxgui/inspectors/search.rb +265 -0
  28. data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
  29. data/lib/weft/wxgui/lang.rb +17 -0
  30. data/lib/weft/wxgui/lang/en.rb +45 -0
  31. data/lib/weft/wxgui/mondrian.xpm +44 -0
  32. data/lib/weft/wxgui/search.xpm +25 -0
  33. data/lib/weft/wxgui/sidebar.rb +498 -0
  34. data/lib/weft/wxgui/utilities.rb +148 -0
  35. data/lib/weft/wxgui/weft16.xpm +31 -0
  36. data/lib/weft/wxgui/workarea.rb +249 -0
  37. data/test/001-document.rb +196 -0
  38. data/test/002-category.rb +138 -0
  39. data/test/003-code.rb +370 -0
  40. data/test/004-application.rb +52 -0
  41. data/test/006-filters.rb +139 -0
  42. data/test/009a-backend_sqlite_basic.rb +280 -0
  43. data/test/009b-backend_sqlite_complex.rb +175 -0
  44. data/test/009c_backend_sqlite_bench.rb +81 -0
  45. data/test/010-backend_nudist.rb +5 -0
  46. data/test/all-tests.rb +1 -0
  47. data/test/manual-gui-script.txt +24 -0
  48. data/test/testdata/autocoding-test.txt +15 -0
  49. data/test/testdata/iso-8859-1.txt +5 -0
  50. data/test/testdata/sample_doc.txt +19 -0
  51. data/test/testdata/search_results.txt +1254 -0
  52. data/test/testdata/text1-dos-ascii.txt +2 -0
  53. data/test/testdata/text1-unix-utf8.txt +2 -0
  54. data/weft-qda.rb +28 -0
  55. metadata +96 -0
data/lib/weft.rb ADDED
@@ -0,0 +1,21 @@
1
+ require 'weft/filters'
2
+ require 'weft/document'
3
+ require 'weft/category'
4
+ require 'weft/backend'
5
+ require 'weft/application'
6
+
7
+ begin
8
+ require 'weft/WEFT-VERSION-STRING'
9
+ WEFT_VERSION = QDA::Version.new(WEFT_VERSION_STRING)
10
+ # rs2exed but from local CVS branch
11
+ if WEFT_VERSION == QDA::Version.default_version()
12
+ WEFT_TESTING = true
13
+ else
14
+ # release or testing version from CVS checkout
15
+ WEFT_TESTING = false
16
+ end
17
+ rescue LoadError
18
+ WEFT_VERSION = QDA::Version.default_version()
19
+ WEFT_VERSION_STRING = '[unreleased version]'
20
+ WEFT_TESTING = true
21
+ end
@@ -0,0 +1 @@
1
+ WEFT_VERSION_STRING = '0.9.6'
@@ -0,0 +1,130 @@
1
+ require 'observer'
2
+
3
+ module QDA
4
+ class Version
5
+ attr_reader :major, :minor, :release
6
+ def Version.default_version()
7
+ self.new('0.0.0')
8
+ end
9
+
10
+ def initialize(str)
11
+ if str =~ /(\d+)\.(\d+)\.(\d+)\s*/
12
+ @major, @minor, @release = $1, $2, $3
13
+ else
14
+ raise ArgumentError.new("Invalid version string #{str}")
15
+ end
16
+ end
17
+
18
+ def to_s
19
+ [@major, @minor, @release].join('.')
20
+ end
21
+
22
+ def ==(other)
23
+ if other.kind_of?(String)
24
+ other = Version.new(other)
25
+ end
26
+ self.major == other.major &&
27
+ self.minor == other.minor &&
28
+ self.release == other.release
29
+ end
30
+ end
31
+
32
+ class Application
33
+ include Observable
34
+ def initialize(observer = nil)
35
+ add_observer(observer) if observer
36
+ @dirty = false
37
+ end
38
+
39
+ # creates a completely empty new application / project, using the
40
+ # backend +backend+, to be intialized with args +args+
41
+ def Application::new_virgin(backend, args, observer = nil)
42
+ app = new()
43
+ app.extend( backend )
44
+ app.start(args)
45
+ app.install_clean()
46
+ app
47
+ end
48
+
49
+ # create some basic nodes
50
+ def set_up()
51
+ save_category( Category.new('CATEGORIES', nil) )
52
+ save_category( Category.new('SEARCHES', nil) )
53
+ save_preference( 'CreateVersion', WEFT_VERSION )
54
+ save_preference( 'CreateVersionString', WEFT_VERSION_STRING )
55
+ undirty!
56
+ end
57
+
58
+ def each_doc()
59
+ get_all_docs().each { | doc | yield doc }
60
+ end
61
+
62
+ # are we in a state where we need saving - should be overridden in subclass
63
+ def dirty?()
64
+ @dirty
65
+ end
66
+
67
+ def dirty!()
68
+ changed if not dirty?
69
+ @dirty = true
70
+ notify_observers(@dirty)
71
+ end
72
+
73
+ def undirty!()
74
+ changed if dirty?
75
+ @dirty = false
76
+ notify_observers(@dirty)
77
+ end
78
+
79
+ # is it up and running
80
+ def started?
81
+ true
82
+ end
83
+
84
+ # signal to clear up - should this be the level at which an exception
85
+ # is raised - no, should probably be at the gui level
86
+ def finish(force = false)
87
+ if ! consistent? and ! force
88
+ raise "Not ready to be saved"
89
+ end
90
+ end
91
+
92
+ def query_segment(function, *args)
93
+ case function
94
+ when "IS CODED BY"
95
+ category = get_category(args[0])
96
+ return get_text_at_category( category )
97
+ when "CONTAINS WORD"
98
+ return get_search_fragments( args[0], :wrap_both => 100 )
99
+ else
100
+ raise RuntimeError.new("Unknown function '#{function}' in query")
101
+ end
102
+ end
103
+ private :query_segment
104
+
105
+ # executes a query, which is a series of descriptions of text
106
+ # functions (eg 'CODED BY "category x"') and operators describing
107
+ # how to combine them('AND', 'NOT', 'Or')
108
+ def do_query(*query)
109
+ text_1 = query_segment(query.shift, query.shift)
110
+
111
+ # work rightwards through the query, doing various kinds of
112
+ # combination of the result sets retrieved
113
+ while op = query.shift
114
+ return text_1 if op.empty?
115
+ text_2 = query_segment(query.shift, query.shift)
116
+ if op == 'AND'
117
+ text_1.join(text_2)
118
+ elsif op == 'OR'
119
+ text_1.merge(text_2)
120
+ elsif op =~ /(AND )?NOT/
121
+ text_1.remove(text_2)
122
+ else
123
+ raise RuntimeError.new("Unknown operator '#{op}' in query")
124
+ end
125
+ end
126
+
127
+ return text_1
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,39 @@
1
+ module QDA
2
+ module Backend
3
+ autoload :MySQL, 'weft/backend/mysql'
4
+ # autoload :SQLite, 'backend/sqlite'
5
+ require 'weft/backend/sqlite'
6
+ # autoload :N6, 'backend/n6'
7
+ require 'weft/backend/n6'
8
+ autoload :RubyNative, 'weft/backend/marshal'
9
+
10
+ module Abstract
11
+ # receive arguments and make any connection required to the
12
+ # storage source
13
+ def start(args = {})
14
+ # raise "virtual"
15
+ end
16
+
17
+ # load a specific document
18
+ def get_doc(doctitle)
19
+ raise "virtual"
20
+ end
21
+
22
+ # an array of all the documents - should this include TEXT
23
+ def get_all_docs()
24
+ []
25
+ end
26
+
27
+ # all categories in a tree structure, the root nodes are returned
28
+ def get_all_categories()
29
+ [ Category.new('') ]
30
+ end
31
+
32
+ # save changes
33
+ def save()
34
+ raise "virtual"
35
+ end
36
+
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,26 @@
1
+ # storage backend using Ruby's Marshal module
2
+ module QDA
3
+ module Backend
4
+ module MySQL
5
+ def connect(args)
6
+ @basedir = args[:basedir]
7
+ File.open(@basedir + 'docs') { | f | @docs = Marshal.load(f) }
8
+ @docs.values.each { | doc | doc.source = self }
9
+
10
+ File.open(@basedir + 'nodes') { | f | @nodes = Marshal.load(f) }
11
+ @nodes.each { | node | node.source = self }
12
+ @root_node = @nodes.detect { | node | node.is_a?(Nudist::RootNode) }
13
+ end
14
+
15
+ def save()
16
+ @docs.values.each { | doc | doc.source = nil }
17
+ File.open(@basedir + 'docs', 'w') { | f | Marshal.dump(@docs, f) }
18
+ @docs.values.each { | doc | doc.source = self }
19
+
20
+ @nodes.each { | node | node.source = nil }
21
+ File.open(@basedir + 'nodes', 'w') { | f | Marshal.dump(@nodes, f) }
22
+ @nodes.each { | node | node.source = self }
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,267 @@
1
+ require 'dbi'
2
+
3
+ # storage backend using MYSQL-dbi module
4
+ module QDA
5
+ module Backend
6
+ module MySQL
7
+ # load up the database connection
8
+ def connect(dbh)
9
+ @dbh = dbh
10
+ end
11
+
12
+ #
13
+ def get_all_docs()
14
+ @dbh.select_all("SELECT * FROM docs") do | row |
15
+ doc = Document.new(row['doctitle'])
16
+ doc.dbid = row['docid']
17
+ end
18
+ end
19
+ # fetch the document identified by the string ident
20
+ def get_doc(ident)
21
+ doc = nil
22
+ if ident =~ /^\d+/
23
+ r = @dbh.select_one("SELECT doctitle FROM docs WHERE docid = ?",
24
+ ident)
25
+ unless r
26
+ raise "No document found matching id '#{ident}'"
27
+ end
28
+ doc = Document.new(r[0])
29
+ doc.dbid = ident
30
+ else
31
+ r = @dbh.select_one("SELECT docid FROM docs WHERE doctitle = ?", ident)
32
+ unless r
33
+ raise "No document found matching title '#{ident}'"
34
+ end
35
+ doc = Document.new(ident)
36
+ doc.dbid = r[0]
37
+ end
38
+
39
+ text = ''
40
+ @dbh.select_all("SELECT * FROM chunks
41
+ WHERE docid = ?
42
+ ORDER BY docseq", doc.dbid) do | row |
43
+ doc.append(row['chunk'], row['type'])
44
+
45
+ end
46
+ doc
47
+ end
48
+
49
+ # decide what level to code at?
50
+ # this returns a weighted search result set
51
+ def doc_search_by_category(category)
52
+ # SELECT SUM(SQRT(idx_cat.score * idx_doc.score)), idx_doc.catid
53
+ # FROM reverse_index AS idx_doc, reverse_index AS idx_cat
54
+ # WHERE idx_cat.catid = 1
55
+ # AND idx_doc.catid != idx_cat.catid
56
+ # AND idx_doc.word = idx_cat.word
57
+ # GROUP BY idx_doc.catid;
58
+ end
59
+
60
+ def save_document(doc)
61
+ if doc.dbid
62
+ @dbh.do("UPDATE docs SET doctitle = ? WHERE docid = ?",
63
+ doc.title, doc.dbid)
64
+ else
65
+ @dbh.do("INSERT INTO docs VALUES(NULL, ?)", @title)
66
+ r = @dbh.select_one("SELECT LAST_INSERT_ID()")
67
+ doc.dbid = r[0]
68
+ doc.fragments.each_with_index do | frag, i |
69
+ @dbh.do("INSERT INTO chunks VALUES(?, 0, ?, ?)",
70
+ doc.dbid, frag.text, i)
71
+ doc.dbid = r[0]
72
+ end
73
+ end
74
+ self
75
+ end
76
+
77
+ def connect(args)
78
+ @dbh = args[:dbh]
79
+ end
80
+
81
+ def get_category(catid)
82
+ r = @dbh.select_one("SELECT * FROM categories WHERE catid = ?",
83
+ catid)
84
+ return nil unless r
85
+ # raise "No category found matching id '#{cat}'" unless r
86
+ category = Category.new(r['catname'])
87
+
88
+
89
+ # not found
90
+ category.dbid = catid
91
+
92
+ @dbh.select_all("SELECT codes.docid, codes.start, codes.offset
93
+ FROM codes
94
+ WHERE catid = ? ", category.dbid) do | row |
95
+ category.code(row['docid'], row['start'].to_i, row['offset'].to_i)
96
+
97
+ end
98
+ category
99
+ end
100
+
101
+ def category_search_by_words(*words)
102
+ words = words.collect { | word | @dbh.quote(word) }
103
+ cats = []
104
+ @dbh.select_all("SELECT category.catid, category.catname, " +
105
+ "SUM(category_ridx.score) AS score " +
106
+ "FROM category_ridx, category " +
107
+ "WHERE category_ridx.catid = category.catid " +
108
+ "AND category_ridx.word IN ( ? ) " +
109
+ "GROUP BY category_ridx.catid",
110
+ words.join(',') ) do | r |
111
+
112
+ cat = Category.new(r['catname'])
113
+ cat.dbid = r['catid']
114
+ cats.push(cat)
115
+ end
116
+ cats
117
+ end
118
+
119
+ # fetches all the categories in a tree structure, starting from the right
120
+ def get_all_categories()
121
+ parents = [ { 'cat' => Category.new('root'), 'r' => 10 } ]
122
+ l = 0 # avoid reinit
123
+ r = 0 # avoid reinit
124
+ cat = nil
125
+
126
+ query = "SELECT * FROM categories ORDER BY l"
127
+ @dbh.select_all(query) do | row |
128
+ l = row['l'].to_i
129
+ r = row['r'].to_i
130
+ cat = Category.new(row['catname'])
131
+ cat.dbid = row['catid'].to_i
132
+ # if a leaf
133
+ if l + 1 == r
134
+ parents[-1]['cat'].add_child(cat)
135
+ else
136
+ parents.push( { 'cat' => cat, 'r' => r } )
137
+ end
138
+
139
+ until parents.length == 1 || r <= parents[-1]['r']
140
+ parents.pop.fetch('cat').append_to(parents[-1]['cat'])
141
+ end
142
+ end
143
+
144
+ # clean up outstanding items to be added
145
+ while parents.length > 1
146
+ parents.pop.fetch('cat').append_to(parents[-1]['cat'])
147
+ end
148
+
149
+ return parents[0]['cat']
150
+ end
151
+
152
+ def save_category(cat)
153
+ if cat.dbid
154
+ @dbh.do("DELETE FROM codes WHERE catid = ?", cat.dbid)
155
+ @dbh.do("UPDATE categories SET catname = ? WHERE catid = ?",
156
+ cat.name, cat.dbid)
157
+ else
158
+ l = nil
159
+ if cat.parent
160
+ row = @dbh.select_one("SELECT r FROM categories
161
+ WHERE catid = ? ", cat.parent.dbid)
162
+ l = row['r'].to_i
163
+ else
164
+ row = @dbh.select_one("SELECT MAX(r) r FROM categories")
165
+ l = row['r'].to_i + 1
166
+ end
167
+ puts "got l = #{l}"
168
+
169
+ @dbh.do("INSERT INTO categories VALUES(NULL, ?, '', ?, ?)",
170
+ cat.name, l, l + 1)
171
+ r = @dbh.select_one("SELECT LAST_INSERT_ID()")
172
+ cat.dbid = r[0]
173
+ @dbh.do("UPDATE categories SET l = l + 2
174
+ WHERE l > ? ", l)
175
+ @dbh.do("UPDATE categories SET r = r + 2
176
+ WHERE r >= ?
177
+ AND catid != ? ", l, cat.dbid)
178
+ end
179
+
180
+ cat.vectors.each do | docid, vecs |
181
+ vecs.each do | vec |
182
+ @dbh.do("INSERT INTO codes VALUES(?, ?, ?, ?)",
183
+ cat.dbid, docid, vec.start, vec.length )
184
+ end
185
+ end
186
+
187
+ self
188
+ end
189
+
190
+ ## WARN - below here is older code imported from a different project
191
+ ## NOT YET TESTED FOR COMPATIBILITY
192
+ def save()
193
+ sql_install()
194
+ @docs.values.each do | doc |
195
+ sql_store_document(doc)
196
+ end
197
+ i = 0
198
+ counter = proc { i += 1 }
199
+
200
+ sql_store_node(@root_node, counter)
201
+ end
202
+
203
+ def sql_install()
204
+ @dbh.do("DROP TABLE IF EXISTS docs")
205
+
206
+ # docid is just nudist's internal document keying system
207
+ @dbh.do("CREATE TABLE docs (
208
+ docid varchar(8) primary key,
209
+ doctitle varchar(255),
210
+ docmemo text,
211
+ external CHAR(1) )")
212
+
213
+ @dbh.do("DROP TABLE IF EXISTS fragments")
214
+ @dbh.do("CREATE TABLE fragments (
215
+ docid VARCHAR(8) REFERENCES doc(docid),
216
+ seq INT,
217
+ fragment text)")
218
+
219
+ @dbh.do("DROP TABLE IF EXISTS nodes")
220
+ @dbh.do("CREATE TABLE nodes (
221
+ nodeid VARCHAR(32) PRIMARY KEY,
222
+ nodepath VARCHAR(255),
223
+ nodeuid VARCHAR(255),
224
+ nodename VARCHAR(32),
225
+ nodememo TEXT,
226
+ l INT,
227
+ r INT )")
228
+
229
+ @dbh.do("DROP TABLE IF EXISTS vectors")
230
+ @dbh.do("CREATE TABLE vectors (
231
+ nodeid VARCHAR(32) REFERENCES nodes(nodeid),
232
+ docid VARCHAR(8) REFERENCES doc(docid),
233
+ start INT REFERENCES DOCFRAGMENT(seq),
234
+ length INT)")
235
+ end
236
+
237
+ def sql_store_node(node, counter)
238
+ left = counter.call()
239
+ node.children.each do | child |
240
+ sql_store_node(child, counter)
241
+ end
242
+ right = counter.call()
243
+
244
+ @dbh.do("INSERT INTO nodes VALUES(?, ?, ?, ?, ?, ?, ?)",
245
+ node.uniq_id, node.path, node.child_id, node.title,
246
+ node.memo, left, right)
247
+ node.codes.each do | doc, vectors |
248
+ vectors.each do | vector |
249
+ @dbh.do("INSERT INTO vectors VALUES (?, ?, ?, ?)",
250
+ node.uniq_id, vector.doc.doc_id,
251
+ vector.start, vector.length)
252
+ end
253
+ end
254
+ end
255
+
256
+ def sql_store_document(doc)
257
+ @dbh.do("INSERT INTO docs VALUES (?, ?, ?, ?)",
258
+ doc.doc_id, doc.title, doc.memo, doc.external?)
259
+
260
+ doc.fragments.each_with_index do | frag, i |
261
+ @dbh.do("INSERT INTO fragments VALUES(?, ?, ?)",
262
+ doc.doc_id, i, frag)
263
+ end
264
+ end
265
+ end
266
+ end
267
+ end