weft-qda 0.9.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/weft.rb +21 -0
- data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
- data/lib/weft/application.rb +130 -0
- data/lib/weft/backend.rb +39 -0
- data/lib/weft/backend/marshal.rb +26 -0
- data/lib/weft/backend/mysql.rb +267 -0
- data/lib/weft/backend/n6.rb +366 -0
- data/lib/weft/backend/sqlite.rb +633 -0
- data/lib/weft/backend/sqlite/category_tree.rb +104 -0
- data/lib/weft/backend/sqlite/schema.rb +152 -0
- data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
- data/lib/weft/category.rb +157 -0
- data/lib/weft/coding.rb +355 -0
- data/lib/weft/document.rb +118 -0
- data/lib/weft/filters.rb +243 -0
- data/lib/weft/wxgui.rb +687 -0
- data/lib/weft/wxgui/category.xpm +26 -0
- data/lib/weft/wxgui/dialogs.rb +128 -0
- data/lib/weft/wxgui/document.xpm +25 -0
- data/lib/weft/wxgui/error_handler.rb +52 -0
- data/lib/weft/wxgui/inspectors.rb +361 -0
- data/lib/weft/wxgui/inspectors/category.rb +165 -0
- data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
- data/lib/weft/wxgui/inspectors/document.rb +139 -0
- data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
- data/lib/weft/wxgui/inspectors/script.rb +35 -0
- data/lib/weft/wxgui/inspectors/search.rb +265 -0
- data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
- data/lib/weft/wxgui/lang.rb +17 -0
- data/lib/weft/wxgui/lang/en.rb +45 -0
- data/lib/weft/wxgui/mondrian.xpm +44 -0
- data/lib/weft/wxgui/search.xpm +25 -0
- data/lib/weft/wxgui/sidebar.rb +498 -0
- data/lib/weft/wxgui/utilities.rb +148 -0
- data/lib/weft/wxgui/weft16.xpm +31 -0
- data/lib/weft/wxgui/workarea.rb +249 -0
- data/test/001-document.rb +196 -0
- data/test/002-category.rb +138 -0
- data/test/003-code.rb +370 -0
- data/test/004-application.rb +52 -0
- data/test/006-filters.rb +139 -0
- data/test/009a-backend_sqlite_basic.rb +280 -0
- data/test/009b-backend_sqlite_complex.rb +175 -0
- data/test/009c_backend_sqlite_bench.rb +81 -0
- data/test/010-backend_nudist.rb +5 -0
- data/test/all-tests.rb +1 -0
- data/test/manual-gui-script.txt +24 -0
- data/test/testdata/autocoding-test.txt +15 -0
- data/test/testdata/iso-8859-1.txt +5 -0
- data/test/testdata/sample_doc.txt +19 -0
- data/test/testdata/search_results.txt +1254 -0
- data/test/testdata/text1-dos-ascii.txt +2 -0
- data/test/testdata/text1-unix-utf8.txt +2 -0
- data/weft-qda.rb +28 -0
- metadata +96 -0
@@ -0,0 +1,366 @@
|
|
1
|
+
# Read-only storage backend using N6's native file format
|
2
|
+
module QDA
|
3
|
+
module Backend
|
4
|
+
module N6
|
5
|
+
# number of seconds between 1/1/1900 and 1/1/1970
|
6
|
+
SEVENTY_YEARS = 2208985187
|
7
|
+
|
8
|
+
def start(args)
|
9
|
+
@basedir = args[:basedir]
|
10
|
+
@npr = NPReader.new()
|
11
|
+
@doc_dbid_counter = 0
|
12
|
+
load_docs()
|
13
|
+
load_nodes()
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_all_docs
|
17
|
+
@docs
|
18
|
+
end
|
19
|
+
def get_doc(title)
|
20
|
+
@docs.find { | d | d.title == title }
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_all_categories
|
24
|
+
@root_node
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
PARSE_DOCS = /^\("((?:[^"]|\\")*)" (\d+) (\d+) (?#
|
29
|
+
)(NIL|(?:\([0-9 ]+\))) (?#
|
30
|
+
)\((\d{10}) \. (\d{10})\) (?#
|
31
|
+
)"((?:[^"]|\\")*)"/
|
32
|
+
|
33
|
+
def load_docs()
|
34
|
+
@docs = []
|
35
|
+
File.foreach(File.join(@basedir, 'DATABASE/docsys') ) do | docline |
|
36
|
+
if matches = PARSE_DOCS.match(docline)
|
37
|
+
doc_id = matches[7]
|
38
|
+
|
39
|
+
parsed = {
|
40
|
+
:title => matches[1],
|
41
|
+
:external => ( matches[2] == "0" ? true : false ),
|
42
|
+
:unitlength => matches[3],
|
43
|
+
:crt_date => matches[5],
|
44
|
+
:mod_date => matches[6],
|
45
|
+
:doc_id => matches[7],
|
46
|
+
:source => self
|
47
|
+
}
|
48
|
+
docfacts = nil
|
49
|
+
fragments = []
|
50
|
+
|
51
|
+
# read the breaking of the documents into chunks
|
52
|
+
File.open(File.join(@basedir,
|
53
|
+
'DATABASE/DOCFACTS',
|
54
|
+
doc_id ) ) do | dff |
|
55
|
+
docfacts = @npr.parse(dff.read)
|
56
|
+
end
|
57
|
+
|
58
|
+
# read the source document in chunks defined in docfacts
|
59
|
+
# not sure why each fragment seems to be made up of two
|
60
|
+
# chunks
|
61
|
+
File.open( File.join(@basedir,
|
62
|
+
'DATABASE/DOCFILES',
|
63
|
+
doc_id ) ) do | df |
|
64
|
+
docfacts.values.each do | chunk |
|
65
|
+
# bit 1 - seems longer
|
66
|
+
fragments.push(df.read(chunk.values[0].to_i))
|
67
|
+
# bit 2 - generally shorter
|
68
|
+
fragments[-1] << df.read(chunk.values[2].to_i)
|
69
|
+
fragments[-1].sub(/[\n\r]+$/, '')
|
70
|
+
end
|
71
|
+
end
|
72
|
+
parsed[:fragments] = fragments
|
73
|
+
|
74
|
+
memo = ''
|
75
|
+
if FileTest.exist?( File.join( @basedir,
|
76
|
+
'DATABASE/DOCMEMS',
|
77
|
+
doc_id) )
|
78
|
+
File.open(File.join(@basedir,
|
79
|
+
'DATABASE/DOCMEMS',
|
80
|
+
doc_id) ) do | dm |
|
81
|
+
memo = dm.read()
|
82
|
+
end
|
83
|
+
end
|
84
|
+
crt_date = Time.at( parsed[:crt_date].to_i - SEVENTY_YEARS )
|
85
|
+
mod_date = Time.at( parsed[:mod_date].to_i - SEVENTY_YEARS )
|
86
|
+
|
87
|
+
doc = Document.new(parsed[:title], '', memo,
|
88
|
+
crt_date, mod_date)
|
89
|
+
|
90
|
+
if parsed[:external]
|
91
|
+
doc.instance_eval { @external = true }
|
92
|
+
end
|
93
|
+
|
94
|
+
class << doc
|
95
|
+
def chunks_to_vector(start, length)
|
96
|
+
return 0, 0 if @external
|
97
|
+
|
98
|
+
this_start = @chunkmap[0, start].inject(0) do | tot, ck |
|
99
|
+
tot += ck
|
100
|
+
end
|
101
|
+
this_offset = @chunkmap[start, length].inject(0) do | tot, ck |
|
102
|
+
tot += ck
|
103
|
+
end
|
104
|
+
|
105
|
+
return this_start, this_offset
|
106
|
+
end
|
107
|
+
def append(text)
|
108
|
+
@chunkmap ||= []
|
109
|
+
len = super(text)
|
110
|
+
@chunkmap.push(len)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
doc.dbid = next_doc_dbid
|
114
|
+
|
115
|
+
fragments.each do | frag |
|
116
|
+
doc.append(frag)
|
117
|
+
end
|
118
|
+
@docs.push(doc)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def next_doc_dbid
|
124
|
+
token = @doc_dbid_counter += 1
|
125
|
+
# token.to_s
|
126
|
+
end
|
127
|
+
|
128
|
+
PARSE_INDEX = /^\((\d+|NIL) "([^"]*)" (?#
|
129
|
+
)(?:(NIL)|(?:"((?:[^"]|\\")*)")) (?# comment
|
130
|
+
)(?:(NIL)|(?:"((?:[^"]|\\")*)")) (?# "memoreference
|
131
|
+
)\((\d{10}) \. (\d{10})\) (.*?) (\d+)\) $/
|
132
|
+
def load_nodes()
|
133
|
+
@nodes = []
|
134
|
+
curr_node = nil
|
135
|
+
@root_node = nil
|
136
|
+
content = ''
|
137
|
+
pending = ''
|
138
|
+
|
139
|
+
# hash of nodes -> number of pending children
|
140
|
+
tree_kids = {}
|
141
|
+
File.foreach( File.join(@basedir,
|
142
|
+
'DATABASE/indexsys') ) do | line |
|
143
|
+
next if line =~ /^\s*$/
|
144
|
+
|
145
|
+
unless matches = PARSE_INDEX.match(pending + line)
|
146
|
+
pending += line
|
147
|
+
next
|
148
|
+
end
|
149
|
+
|
150
|
+
pending = ''
|
151
|
+
|
152
|
+
parsed = {
|
153
|
+
:child_id => matches[1],
|
154
|
+
:title => matches[2],
|
155
|
+
:comment => matches[3] || matches[4],
|
156
|
+
:memo => '',
|
157
|
+
|
158
|
+
:big_fig1 => matches[7],
|
159
|
+
:big_fig2 => matches[8],
|
160
|
+
|
161
|
+
:codes => matches[9],
|
162
|
+
:ccount => matches[10],
|
163
|
+
:source => self }
|
164
|
+
|
165
|
+
# load the node memo if there is one
|
166
|
+
if matches[6]
|
167
|
+
memofile = File.join(@basedir, 'DATABASE/NODEMEMS', matches[6])
|
168
|
+
# they don't necessarily have a memo file, in which case memo = ''
|
169
|
+
if FileTest.exist?(memofile)
|
170
|
+
File.open(memofile) { | nm | parsed[:memo] = nm.read() }
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# create a node object to represent the code
|
175
|
+
# first of all, see if it's the root node (no parent)
|
176
|
+
if curr_node.nil?
|
177
|
+
@root_node = Category.new(parsed[:title], nil)
|
178
|
+
# yuk
|
179
|
+
load_node_codes(@root_node,
|
180
|
+
parsed[:codes])
|
181
|
+
|
182
|
+
@nodes.push(@root_node)
|
183
|
+
# root node is always looking for additional children -
|
184
|
+
# the default parent.
|
185
|
+
tree_kids[@root_node] = -1
|
186
|
+
curr_node = @root_node
|
187
|
+
else # it's not the root node
|
188
|
+
# go back up the tree until we find a node which needs
|
189
|
+
# more children
|
190
|
+
until tree_kids[curr_node] != 0
|
191
|
+
curr_node = curr_node.parent
|
192
|
+
end
|
193
|
+
tree_kids[curr_node] -= 1
|
194
|
+
|
195
|
+
parsed[:parent] = curr_node
|
196
|
+
new_node = Category.new(parsed[:title], parsed[:parent])
|
197
|
+
load_node_codes(new_node, parsed[:codes])
|
198
|
+
|
199
|
+
# get ready for the next round
|
200
|
+
tree_kids[new_node] = parsed[:ccount].to_i
|
201
|
+
curr_node = new_node
|
202
|
+
@nodes.push(new_node)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
# parse which parts of which documents are coded by category +category+
|
209
|
+
def load_node_codes(category, codesrc)
|
210
|
+
return if codesrc == 'NIL'
|
211
|
+
unless parsed = @npr.parse(codesrc)
|
212
|
+
raise "cannot parse #{codesrc} for node codes"
|
213
|
+
end
|
214
|
+
|
215
|
+
codes = {}
|
216
|
+
parsed.values.each do | val |
|
217
|
+
# val.values[0] is the title of the coded document
|
218
|
+
doc = get_doc(val.values[0])
|
219
|
+
# the segments of the document which are coded
|
220
|
+
val.values[1..-1].each do | code |
|
221
|
+
start = code.values[0].to_i
|
222
|
+
length = code.values[2].to_i - start + 1
|
223
|
+
# we have to convert these chunks into character-indexes
|
224
|
+
conv = doc.chunks_to_vector(start, length)
|
225
|
+
# can be 0-length coded when applied to external doc
|
226
|
+
if conv[1] == 0
|
227
|
+
next
|
228
|
+
end
|
229
|
+
category.code( doc.dbid, *conv )
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# NPReader.rb - Nested parenthesis (LISP syntax) parser.
|
236
|
+
# Copyright (C) 2001 Gordon James Miller
|
237
|
+
|
238
|
+
# This library is free software; you can redistribute it and/or
|
239
|
+
# modify it under the terms of the GNU Lesser General Public License
|
240
|
+
# as published by the Free Software Foundation; either version 2.1
|
241
|
+
# of the License, or (at your option) any later version.
|
242
|
+
|
243
|
+
# A container for strings that reside between matching parenthesis. Each
|
244
|
+
# instance of this class contains a list of values that are either
|
245
|
+
# strings or other NPNode objects representing nesting.
|
246
|
+
|
247
|
+
class NPNode
|
248
|
+
@@NORMAL_MODE = 0
|
249
|
+
@@QUOTE_MODE = 1
|
250
|
+
|
251
|
+
# The members of this group.
|
252
|
+
attr_reader :values
|
253
|
+
|
254
|
+
# The parent of this group.
|
255
|
+
attr_reader :parent
|
256
|
+
|
257
|
+
# The level of this group. This is set by the constructor and is
|
258
|
+
# based upon the level of the parent.
|
259
|
+
attr_reader :level
|
260
|
+
|
261
|
+
# The state of this group. This will either be NORMAL or QUOTE.
|
262
|
+
# In QUOTE, whitespace is absorbed.
|
263
|
+
attr_reader :state
|
264
|
+
|
265
|
+
def close
|
266
|
+
if ( @curval.size > 0 ) then
|
267
|
+
@values.push( @curval.to_s )
|
268
|
+
@curval.clear
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# Initialize a new instance that has the specified parent. The
|
273
|
+
# parent can be nil, in which case it is assumed that this is
|
274
|
+
# the top level node. If the parent is not nil, then this
|
275
|
+
# object is added as a child to the parent.
|
276
|
+
def initialize (parent)
|
277
|
+
|
278
|
+
@parent = parent
|
279
|
+
|
280
|
+
if ( @parent != nil ) then
|
281
|
+
@parent.push(self)
|
282
|
+
end
|
283
|
+
|
284
|
+
@state = @@NORMAL_MODE
|
285
|
+
@level = ( parent == nil ) ? 0 : @parent.level + 1
|
286
|
+
|
287
|
+
@values = Array.new
|
288
|
+
@curval = Array.new
|
289
|
+
end
|
290
|
+
|
291
|
+
def push(ch)
|
292
|
+
if ( ch.kind_of? NPNode ) then
|
293
|
+
# If this is an NPNode instance, just add it to the end of the
|
294
|
+
# values array.
|
295
|
+
@values.push(ch)
|
296
|
+
|
297
|
+
else
|
298
|
+
if ( ch == "'" || ch == "\"" ) then
|
299
|
+
# If this is a double quote, then we have to start a new
|
300
|
+
# value, toggle the mode.
|
301
|
+
@state = ( @state + 1 ) % 2
|
302
|
+
close
|
303
|
+
|
304
|
+
elsif ( (@state != @@QUOTE_MODE) && ch =~ /[ \t\n\r]/ ) then
|
305
|
+
# If this is a whitespace character and the length is
|
306
|
+
# greater than zero, push the current value on the values
|
307
|
+
# array and clear the current values string.
|
308
|
+
close
|
309
|
+
|
310
|
+
else
|
311
|
+
# Otherwise go ahead and push the character onto the end of
|
312
|
+
# the current value.
|
313
|
+
@curval.push( ch )
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
def to_s
|
319
|
+
str = Array.new()
|
320
|
+
return "(#{@values.join(' ')})"
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
# An implementation of a nested parenthesis reader. This implementation
|
325
|
+
# constructs an AST from a data source.
|
326
|
+
class NPReader
|
327
|
+
|
328
|
+
# Initialize a new instance of the reader. This does not start the
|
329
|
+
# parsing, that is done with the parse method.
|
330
|
+
def initialize ()
|
331
|
+
end
|
332
|
+
|
333
|
+
# Parse the data contained in the string and return the reference to the
|
334
|
+
# top level group.
|
335
|
+
def parse (string)
|
336
|
+
|
337
|
+
@curgroup = nil
|
338
|
+
@top = nil
|
339
|
+
|
340
|
+
string.each_byte { |byte|
|
341
|
+
|
342
|
+
ch = byte.chr
|
343
|
+
|
344
|
+
if ( ch == '(' ) then
|
345
|
+
@curgroup = NPNode.new(@curgroup)
|
346
|
+
if ( @top == nil ) then
|
347
|
+
@top = @curgroup
|
348
|
+
end
|
349
|
+
|
350
|
+
elsif ( ch == ')' ) then
|
351
|
+
@curgroup.close
|
352
|
+
@curgroup = @curgroup.parent
|
353
|
+
|
354
|
+
else
|
355
|
+
if ( @curgroup != nil ) then
|
356
|
+
@curgroup.push(ch)
|
357
|
+
end
|
358
|
+
|
359
|
+
end
|
360
|
+
}
|
361
|
+
|
362
|
+
return @top
|
363
|
+
end
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
@@ -0,0 +1,633 @@
|
|
1
|
+
require 'sqlite'
|
2
|
+
# require 'sqlite3'
|
3
|
+
require 'strscan'
|
4
|
+
require 'tempfile'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'base64'
|
7
|
+
require 'rexml/document'
|
8
|
+
# require 'iconv'
|
9
|
+
|
10
|
+
module QDA
|
11
|
+
# Storage backend using SQLite module - can use SQlite 3 or SQLite, but
|
12
|
+
# currently problems with SQLite 3 and non-ASCII characters. Will pick
|
13
|
+
# up whether sqlite or sqlite3 is available.
|
14
|
+
module Backend::SQLite
|
15
|
+
|
16
|
+
require 'weft/backend/sqlite/schema.rb'
|
17
|
+
require 'weft/backend/sqlite/upgradeable.rb'
|
18
|
+
require 'weft/backend/sqlite/category_tree.rb'
|
19
|
+
include Upgradeable
|
20
|
+
|
21
|
+
# if working with sqlite v2 with the sqlite-ruby v2, we need a
|
22
|
+
# couple of compatibility tweaks.
|
23
|
+
if defined?(::SQLite)
|
24
|
+
SQLITE_DB_CLASS = ::SQLite::Database
|
25
|
+
# Ruby-SQLite3 statements have a close() method, but Ruby-SQLite
|
26
|
+
# v 2 don't - so we supply a dummy method for when using v2
|
27
|
+
class ::SQLite::Statement
|
28
|
+
def close(); end
|
29
|
+
end
|
30
|
+
# SQLite3 introduced this more ruby-ish notation
|
31
|
+
class ::SQLite::Database::FunctionProxy
|
32
|
+
alias :result= :set_result
|
33
|
+
end
|
34
|
+
elsif defined?(::SQLite3)
|
35
|
+
SQLITE_DB_CLASS = ::SQLite3::Database
|
36
|
+
else
|
37
|
+
raise LoadError, "No SQlite database class loaded"
|
38
|
+
end
|
39
|
+
|
40
|
+
class Database < SQLITE_DB_CLASS
|
41
|
+
def initialize(file)
|
42
|
+
# super(file, :driver => "Native")
|
43
|
+
super(file)
|
44
|
+
self.results_as_hash = true
|
45
|
+
# self.type_translation = true
|
46
|
+
end
|
47
|
+
|
48
|
+
def undo_action()
|
49
|
+
@dbh.transaction do
|
50
|
+
@dbh.execute("SELECT * FROM undoable WHERE step = 1
|
51
|
+
ORDER BY step, actionid DESC") do | task |
|
52
|
+
@dbh.execute(task[0])
|
53
|
+
end
|
54
|
+
@dbh.execute("UPDATE undoable SET step = step -1")
|
55
|
+
@dbh.execute("DELETE FROM undoable WHERE step = 0")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def redo_action()
|
60
|
+
transaction do
|
61
|
+
execute("SELECT * FROM undoable WHERE step = -1
|
62
|
+
ORDER BY step, actionid DESC") do | task |
|
63
|
+
execute(task[0])
|
64
|
+
end
|
65
|
+
execute("DELETE FROM undoable WHERE step = -1")
|
66
|
+
execute("UPDATE undoable SET step = step + 1")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def date_freeze(date)
|
71
|
+
date ? date.strftime('%Y-%m-%d %H:%M:%S') : ''
|
72
|
+
end
|
73
|
+
|
74
|
+
def date_thaw(str)
|
75
|
+
return nil if str.empty?
|
76
|
+
return Time.local( *str.split(/[- :]/) )
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
attr_reader :dbh, :dbfile
|
81
|
+
|
82
|
+
# load up the database connection. A hash argument containing the
|
83
|
+
# key :dbfile should be supplied. If this is +nil+, then a
|
84
|
+
# temporary storage will be used
|
85
|
+
def start(args)
|
86
|
+
if ! args.key?(:dbfile)
|
87
|
+
raise ArgumentError, "Must specify SQLite dbfile to load from"
|
88
|
+
end
|
89
|
+
|
90
|
+
@dbfile = args[:dbfile]
|
91
|
+
if @dbfile and ! File.exists?(@dbfile)
|
92
|
+
raise RuntimeError, "Tried to open an non-existent database"
|
93
|
+
end
|
94
|
+
|
95
|
+
tmp_fname = @dbfile ? File::basename(@dbfile) : 'Weft'
|
96
|
+
tmpfile = Tempfile.new(tmp_fname || 'Weft')
|
97
|
+
tmpfile.close(false) # don't delete
|
98
|
+
|
99
|
+
@tmpfile = tmpfile.path
|
100
|
+
if @dbfile
|
101
|
+
FileUtils.copy(@dbfile, @tmpfile)
|
102
|
+
end
|
103
|
+
@dbh = Database.new(@tmpfile)
|
104
|
+
# if opening from an existing file, check and do any upgrding
|
105
|
+
# required from older versions
|
106
|
+
do_version_format_upgrading() if @dbfile
|
107
|
+
undirty!
|
108
|
+
end
|
109
|
+
|
110
|
+
def connect(args)
|
111
|
+
@dbh = args[:dbh]
|
112
|
+
end
|
113
|
+
|
114
|
+
def end(force = false)
|
115
|
+
@cat_tree = nil
|
116
|
+
@dbh.close()
|
117
|
+
end
|
118
|
+
|
119
|
+
def save(target = @dbfile)
|
120
|
+
if target.nil?
|
121
|
+
raise RuntimeError,
|
122
|
+
"No previously saved file, and no named supplied for save"
|
123
|
+
end
|
124
|
+
@dbh.close
|
125
|
+
@dbfile = target
|
126
|
+
FileUtils.copy(@tmpfile, @dbfile)
|
127
|
+
@dbh = Database.new(@tmpfile)
|
128
|
+
undirty!
|
129
|
+
end
|
130
|
+
|
131
|
+
# roll the current state back to the last-saved state.
|
132
|
+
def revert()
|
133
|
+
@dbh.close()
|
134
|
+
FileUtils.copy(@dbfile, @tmpfile)
|
135
|
+
@dbh = Database.new(@tmpfile)
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
# hint to do the next series of actions as a batch
|
140
|
+
def batch
|
141
|
+
@dbh.transaction { yield }
|
142
|
+
end
|
143
|
+
|
144
|
+
def cat_tree
|
145
|
+
return @cat_tree if @cat_tree
|
146
|
+
xml = @dbh.get_first_value("SELECT xml FROM category_structure")
|
147
|
+
if xml and xml.length > 0
|
148
|
+
@cat_tree = CategoryTree.load(xml)
|
149
|
+
else
|
150
|
+
@cat_tree = CategoryTree.new()
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# private :cat_tree
|
155
|
+
# get every doc
|
156
|
+
def get_all_docs()
|
157
|
+
docs = []
|
158
|
+
@dbh.execute("SELECT doctitle, docid FROM document") do | row |
|
159
|
+
doc = Document.new(row['doctitle'])
|
160
|
+
doc.dbid = row['docid'].to_i
|
161
|
+
docs.push(doc)
|
162
|
+
end
|
163
|
+
docs
|
164
|
+
end
|
165
|
+
|
166
|
+
# fetch the document identified by the string ident
|
167
|
+
def get_doc(ident)
|
168
|
+
doc = nil
|
169
|
+
@dbh.transaction do
|
170
|
+
stmt = nil
|
171
|
+
if ident.kind_of?(Fixnum) || ident =~ /^\d+$/
|
172
|
+
stmt = @dbh.prepare("SELECT * FROM document WHERE docid = ?")
|
173
|
+
else
|
174
|
+
stmt = @dbh.prepare("SELECT * FROM document WHERE doctitle = ?")
|
175
|
+
end
|
176
|
+
stmt.execute!(ident) do | r |
|
177
|
+
doc = Document.new(r['doctitle'].dup,
|
178
|
+
r['doctext'].dup,
|
179
|
+
r['docmemo'].dup,
|
180
|
+
@dbh.date_thaw(r['created_date']),
|
181
|
+
@dbh.date_thaw(r['modified_date']) )
|
182
|
+
doc.dbid = r['docid'].to_i
|
183
|
+
end
|
184
|
+
raise "Not found: #{ident}" if doc.nil?
|
185
|
+
|
186
|
+
stmt.close()
|
187
|
+
end
|
188
|
+
return doc
|
189
|
+
end
|
190
|
+
alias :get_document :get_doc
|
191
|
+
|
192
|
+
def save_preference(pref_name, pref_value)
|
193
|
+
frozen_value = Base64.encode64( Marshal.dump( pref_value) )
|
194
|
+
@dbh.transaction do
|
195
|
+
@dbh.execute("INSERT OR REPLACE INTO app_preference
|
196
|
+
VALUES (?, ?)",
|
197
|
+
pref_name, frozen_value )
|
198
|
+
end
|
199
|
+
dirty!
|
200
|
+
end
|
201
|
+
|
202
|
+
def get_preference(pref_name)
|
203
|
+
frozen_pref = nil
|
204
|
+
@dbh.transaction do
|
205
|
+
@dbh.execute("SELECT value FROM app_preference
|
206
|
+
WHERE name = ? ", pref_name ) do | r |
|
207
|
+
frozen_pref = r['value']
|
208
|
+
end
|
209
|
+
end
|
210
|
+
return nil if frozen_pref.nil?
|
211
|
+
return Marshal.load( Base64.decode64(frozen_pref) )
|
212
|
+
end
|
213
|
+
|
214
|
+
def save_document(doc)
|
215
|
+
raise TypeError unless doc.kind_of? QDA::Document
|
216
|
+
@dbh.transaction { _save_document(doc) }
|
217
|
+
dirty!
|
218
|
+
doc
|
219
|
+
end
|
220
|
+
|
221
|
+
def _save_document(doc)
|
222
|
+
if doc.dbid
|
223
|
+
@dbh.execute("UPDATE document
|
224
|
+
SET doctitle = ?, doctext = ?,
|
225
|
+
docmemo = ?, modified_date = ?
|
226
|
+
WHERE docid = ? ",
|
227
|
+
doc.title, doc.text, doc.memo,
|
228
|
+
@dbh.date_freeze( Time.now() ),
|
229
|
+
doc.dbid)
|
230
|
+
else
|
231
|
+
@dbh.execute("INSERT INTO document
|
232
|
+
VALUES(NULL, ?, ?, ?, ?, ?)",
|
233
|
+
doc.title, doc.text, doc.memo,
|
234
|
+
@dbh.date_freeze(doc.create_date),
|
235
|
+
@dbh.date_freeze( Time.now() ) )
|
236
|
+
doc.dbid = @dbh.last_insert_row_id().to_i
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
# delete teh document identified by +dbid+ from the database
|
241
|
+
def delete_document(dbid)
|
242
|
+
@dbh.transaction do
|
243
|
+
@dbh.execute("DELETE FROM document WHERE docid = ?", dbid)
|
244
|
+
end
|
245
|
+
dirty!
|
246
|
+
end
|
247
|
+
|
248
|
+
# retrieve the category with the internal id +catid+, along with
|
249
|
+
# its codes. If +get_structure+ is set to a true value then the
|
250
|
+
# category's children will also be retrieved from the database
|
251
|
+
def get_category(catid, get_structure = false)
|
252
|
+
catid = catid.to_i if catid =~ /^\d+$/
|
253
|
+
raise "Invalid id #{catid.inspect}" unless catid.kind_of?(Fixnum)
|
254
|
+
|
255
|
+
category = nil
|
256
|
+
stmt = @dbh.prepare("SELECT * FROM category WHERE catid = ?")
|
257
|
+
stmt.execute!(catid) do | r |
|
258
|
+
parent = get_category_parent(catid)
|
259
|
+
category = Category.new(r['catname'], parent, r['catdesc'])
|
260
|
+
category.dbid = catid
|
261
|
+
end
|
262
|
+
raise "No category found matching id '#{catid}'" unless category
|
263
|
+
stmt.close()
|
264
|
+
|
265
|
+
get_codes_for_category(category)
|
266
|
+
get_and_build_children(category) if get_structure
|
267
|
+
category
|
268
|
+
end
|
269
|
+
|
270
|
+
# gets the root category named +name+
|
271
|
+
def get_root_category(name)
|
272
|
+
root = cat_tree.roots.find { | r | r.name == name }
|
273
|
+
raise "Not found, root category #{name.inspect}" unless root
|
274
|
+
return get_category(root.dbid)
|
275
|
+
end
|
276
|
+
|
277
|
+
# fetch categories by relative or absolute paths. Returns an
|
278
|
+
# array of categories
|
279
|
+
def get_categories_by_path(path)
|
280
|
+
# cos it should be quicker ...
|
281
|
+
if path =~ /\//
|
282
|
+
return cat_tree.find(path).map do | found |
|
283
|
+
get_category(found.dbid)
|
284
|
+
end
|
285
|
+
else
|
286
|
+
return get_categories_by_name(path)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
# fetch categories by partial names. This is currently
|
291
|
+
# case-insensitive by default. Returns an array of categories
|
292
|
+
# whose names match.
|
293
|
+
def get_categories_by_name(namebit, insensitive = true)
|
294
|
+
stmt = nil
|
295
|
+
if insensitive
|
296
|
+
stmt = @dbh.prepare("SELECT catid FROM category
|
297
|
+
WHERE UPPER(catname) LIKE ?
|
298
|
+
AND parent >= 0" )
|
299
|
+
namebit = namebit.upcase
|
300
|
+
else
|
301
|
+
stmt = @dbh.prepare("SELECT catid FROM category
|
302
|
+
WHERE catname GLOB ?
|
303
|
+
AND parent >= 0" )
|
304
|
+
|
305
|
+
end
|
306
|
+
categories = []
|
307
|
+
@dbh.transaction do
|
308
|
+
stmt.execute!(namebit + "%") do | r |
|
309
|
+
categories.push( get_category( r['catid'] ) )
|
310
|
+
end
|
311
|
+
stmt.close()
|
312
|
+
end
|
313
|
+
return categories
|
314
|
+
end
|
315
|
+
|
316
|
+
def is_descendant?(ancestor, descendant)
|
317
|
+
cat_tree.is_descendant?(ancestor.dbid, descendant.dbid)
|
318
|
+
end
|
319
|
+
|
320
|
+
# builds the tree structure below +category+, modifying
|
321
|
+
# +category+ in place. After this call, the retrieved structure
|
322
|
+
# is available as the +children+ property of the category.
|
323
|
+
def get_and_build_children(category)
|
324
|
+
# this duplicates stuff below
|
325
|
+
append_f = Proc.new do | parent, elem |
|
326
|
+
cat = Category.new(elem.name, parent)
|
327
|
+
cat.dbid = elem.dbid
|
328
|
+
elem.children { | c | append_f.call(cat, c) }
|
329
|
+
end
|
330
|
+
|
331
|
+
cat_tree[category.dbid].children do | first_child |
|
332
|
+
append_f.call(category, first_child)
|
333
|
+
end
|
334
|
+
end
|
335
|
+
private :get_and_build_children
|
336
|
+
|
337
|
+
# applies the codes to category +cat+
|
338
|
+
def get_codes_for_category(cat)
|
339
|
+
@dbh.execute("SELECT docid, offset, length
|
340
|
+
FROM code
|
341
|
+
WHERE catid = ? ", cat.dbid) do | row |
|
342
|
+
cat.code( row['docid'].to_i,
|
343
|
+
row['offset'].to_i,
|
344
|
+
row['length'].to_i )
|
345
|
+
end
|
346
|
+
return cat
|
347
|
+
end
|
348
|
+
|
349
|
+
# looks up the string indices of the document and returns the
|
350
|
+
# appropriate text fragments as an array
|
351
|
+
# returns a hash keyed on document title where the values are an
|
352
|
+
# array of fragments in order of offset from the start of the document
|
353
|
+
def get_text_at_category(cat)
|
354
|
+
vectors = FragmentTable.new()
|
355
|
+
@dbh.execute("SELECT document.doctitle AS doctitle,
|
356
|
+
code.docid AS docid,
|
357
|
+
code.offset AS offset, code.length,
|
358
|
+
SUBSTR(document.doctext,
|
359
|
+
code.offset + 1, code.length) AS fragment
|
360
|
+
FROM document, code
|
361
|
+
WHERE code.catid = ?
|
362
|
+
AND code.docid = document.docid
|
363
|
+
ORDER BY code.docid, code.offset", cat.dbid ) do | r |
|
364
|
+
vectors.add( Fragment.new( r['fragment'],
|
365
|
+
r['doctitle'],
|
366
|
+
r['offset'].to_i,
|
367
|
+
r['docid'].to_i ) )
|
368
|
+
end
|
369
|
+
vectors
|
370
|
+
end
|
371
|
+
|
372
|
+
def get_category_parent(catid)
|
373
|
+
if cat_tree[catid].parent
|
374
|
+
return get_category(cat_tree[catid].parent)
|
375
|
+
else
|
376
|
+
return nil
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
# fetches all the categories in a tree structure, starting from the right
|
381
|
+
def get_all_categories()
|
382
|
+
build_cat = Proc.new do | elem, parent |
|
383
|
+
cat = Category.new( elem.name, parent )
|
384
|
+
cat.dbid = elem.dbid
|
385
|
+
elem.children.each { | ch | build_cat.call(ch, cat) }
|
386
|
+
cat
|
387
|
+
end
|
388
|
+
# return the Category Tree (internal storage) as a tree of
|
389
|
+
# actual Categories
|
390
|
+
cat_tree.roots.map { | root | build_cat.call(root, nil) }
|
391
|
+
end
|
392
|
+
|
393
|
+
# saves the category
|
394
|
+
def save_category(cat)
|
395
|
+
@dbh.transaction { _save_category(cat) }
|
396
|
+
dirty!
|
397
|
+
cat
|
398
|
+
end
|
399
|
+
|
400
|
+
def _save_category(cat)
|
401
|
+
# only resave the tree structure if nec,
|
402
|
+
xml_needs_update = false
|
403
|
+
|
404
|
+
# updating an existing category
|
405
|
+
if cat.dbid
|
406
|
+
# check for re-parenting or renaming
|
407
|
+
child = cat_tree[cat.dbid]
|
408
|
+
|
409
|
+
if child.parent != cat.parent.dbid
|
410
|
+
cat_tree.move(child.dbid, cat.parent.dbid)
|
411
|
+
xml_needs_update = true
|
412
|
+
end
|
413
|
+
|
414
|
+
if child.name != cat.name
|
415
|
+
child.name = cat.name
|
416
|
+
xml_needs_update = true
|
417
|
+
end
|
418
|
+
@dbh.execute("DELETE FROM code WHERE catid = ?", cat.dbid)
|
419
|
+
@dbh.execute("UPDATE category
|
420
|
+
SET catname = ?,
|
421
|
+
catdesc = ?,
|
422
|
+
parent = ?,
|
423
|
+
modified_date = ?
|
424
|
+
WHERE catid = ? ",
|
425
|
+
cat.name,
|
426
|
+
cat.memo,
|
427
|
+
( cat.parent ? cat.parent.dbid : nil),
|
428
|
+
@dbh.date_freeze( Time.now ),
|
429
|
+
cat.dbid)
|
430
|
+
# adding a new category
|
431
|
+
else
|
432
|
+
parentid = cat.parent ? cat.parent.dbid : nil
|
433
|
+
@dbh.execute("INSERT INTO category
|
434
|
+
VALUES(NULL, ?, ?, ?, ?, ?)",
|
435
|
+
cat.name, cat.memo, parentid,
|
436
|
+
@dbh.date_freeze( Time.now ),
|
437
|
+
@dbh.date_freeze( Time.now ) )
|
438
|
+
cat.dbid = @dbh.last_insert_row_id().to_i
|
439
|
+
|
440
|
+
if cat.parent
|
441
|
+
cat_tree.add(cat.parent.dbid, cat.dbid, cat.name)
|
442
|
+
else
|
443
|
+
cat_tree.add(nil, cat.dbid, cat.name)
|
444
|
+
end
|
445
|
+
xml_needs_update = true
|
446
|
+
end
|
447
|
+
|
448
|
+
stmt_code = @dbh.prepare("INSERT INTO code VALUES(?, ?, ?, ?)")
|
449
|
+
cat.codes.each do | docid, vecs |
|
450
|
+
vecs.each do | vec |
|
451
|
+
stmt_code.execute( cat.dbid, vec.docid, vec.offset, vec.length )
|
452
|
+
end
|
453
|
+
end
|
454
|
+
stmt_code.close()
|
455
|
+
|
456
|
+
if xml_needs_update
|
457
|
+
@dbh.execute( "UPDATE category_structure SET xml = ? ",
|
458
|
+
cat_tree.serialise())
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
# deletes the category +category+. If +recursive+ is false then
|
463
|
+
# any children of +category+ will be reattached to the deleted
|
464
|
+
# category's parent. If +recursive+ is true (default), then all
|
465
|
+
# descendants will be deleted.
|
466
|
+
# Returns a list of categories that were actually deleted.
|
467
|
+
def delete_category(cat, recursive = true)
|
468
|
+
return unless cat.dbid
|
469
|
+
deleted_items = []
|
470
|
+
# TODO not all items being returned in list
|
471
|
+
if recursive
|
472
|
+
me = cat_tree[cat.dbid]
|
473
|
+
me.children.each do | child |
|
474
|
+
deleted_items += delete_category(child, true)
|
475
|
+
end
|
476
|
+
cat_tree.remove(cat.dbid)
|
477
|
+
deleted_items << cat
|
478
|
+
else
|
479
|
+
raise NotImplementedError,
|
480
|
+
'Non-recursive deletion not implemented'
|
481
|
+
end
|
482
|
+
@dbh.transaction do
|
483
|
+
@dbh.execute("DELETE FROM category WHERE catid = ? ", cat.dbid)
|
484
|
+
xml = cat_tree.serialise()
|
485
|
+
@dbh.execute("UPDATE category_structure SET xml = ?", xml)
|
486
|
+
end
|
487
|
+
dirty!
|
488
|
+
return deleted_items
|
489
|
+
end
|
490
|
+
|
491
|
+
MAGIC_REV_INDEX_ID = -2
|
492
|
+
# adds the reverse indexes for +words+ to the existing reverse
|
493
|
+
# indexes.
|
494
|
+
def save_reverse_index(docid, words, prog_bar = nil)
|
495
|
+
stmt_wordid = @dbh.prepare("SELECT catid FROM category
|
496
|
+
WHERE catname = ? AND parent = ? ")
|
497
|
+
stmt_insert = @dbh.prepare("INSERT INTO category
|
498
|
+
VALUES(NULL, ?, ?, ?, ?, ?)")
|
499
|
+
stmt_code = @dbh.prepare("INSERT INTO code VALUES(?, ?, ?, ?)")
|
500
|
+
|
501
|
+
@dbh.transaction do
|
502
|
+
words.each do | word, locations |
|
503
|
+
wordid = nil
|
504
|
+
stmt_wordid.execute!(word, MAGIC_REV_INDEX_ID) do | r |
|
505
|
+
wordid = r[0] # get first value
|
506
|
+
end
|
507
|
+
unless wordid
|
508
|
+
stmt_insert.execute( word, '', MAGIC_REV_INDEX_ID,
|
509
|
+
@dbh.date_freeze( Time.now ),
|
510
|
+
@dbh.date_freeze( Time.now ) )
|
511
|
+
wordid = @dbh.last_insert_row_id().to_s
|
512
|
+
end
|
513
|
+
|
514
|
+
locations.each do | loc |
|
515
|
+
stmt_code.execute(wordid, docid, loc, word.length)
|
516
|
+
end
|
517
|
+
prog_bar.next() if prog_bar
|
518
|
+
end
|
519
|
+
end # transaction
|
520
|
+
[stmt_wordid, stmt_insert, stmt_code].each { | s | s.close() }
|
521
|
+
end
|
522
|
+
|
523
|
+
|
524
|
+
# Delete all reverse word indexes associated with +docid+
|
525
|
+
def drop_reverse_indexes(docid)
|
526
|
+
@dbh.transaction do
|
527
|
+
stmt_del = @dbh.prepare("DELETE FROM code
|
528
|
+
WHERE docid = ? AND catid IN
|
529
|
+
(SELECT catid
|
530
|
+
FROM category
|
531
|
+
WHERE parent = ?) ")
|
532
|
+
stmt_del.execute(docid, MAGIC_REV_INDEX_ID)
|
533
|
+
stmt_del.close()
|
534
|
+
end
|
535
|
+
end
|
536
|
+
|
537
|
+
|
538
|
+
# should work for latin-1 characters
|
539
|
+
WORD_PATTERN = /^[\w\xC0-\xD6\xD8-\xF6\xF8-\xFF][\w\xC0-\xD6\xD8-\xF6\xF8-\xFF']+$/s
|
540
|
+
|
541
|
+
# returns a hash of document fragments. mods are additional
|
542
|
+
# arguments
|
543
|
+
def get_search_fragments(term, mods = {})
|
544
|
+
if term !~ WORD_PATTERN
|
545
|
+
return get_search_fragments_scan(term, mods)
|
546
|
+
else
|
547
|
+
return get_search_fragments_index(term, mods)
|
548
|
+
end
|
549
|
+
end
|
550
|
+
|
551
|
+
def get_search_fragments_scan(term, mods = {})
|
552
|
+
vectors = FragmentTable.new()
|
553
|
+
wrap = mods[:wrap_both] || 0
|
554
|
+
@dbh.execute("SELECT * FROM document WHERE doctext LIKE ?",
|
555
|
+
"%#{term}%") do | r |
|
556
|
+
doc_title = r['doctitle']
|
557
|
+
doc_id = r['docid'].to_i
|
558
|
+
|
559
|
+
# compile a search regexp
|
560
|
+
rx = mods[:whole_word] ?
|
561
|
+
'\b\w*.{0,%i}\b%s\b.{0,%i}\w*?\b' % [wrap, term, wrap] :
|
562
|
+
'\b\w*.{0,%i}%s.{0,%i}\w*?\b' % [wrap, term, wrap]
|
563
|
+
|
564
|
+
search = mods[:case_sensitive] ?
|
565
|
+
Regexp.new(rx, Regexp::MULTILINE) :
|
566
|
+
Regexp.new(rx, Regexp::MULTILINE|Regexp::IGNORECASE)
|
567
|
+
|
568
|
+
scanner = StringScanner.new(r['doctext'])
|
569
|
+
while scanner.scan_until(search)
|
570
|
+
offset = scanner.pos - scanner.matched_size
|
571
|
+
f = Fragment.new(scanner.matched, doc_title, offset, doc_id)
|
572
|
+
vectors.add(f)
|
573
|
+
end
|
574
|
+
end
|
575
|
+
vectors
|
576
|
+
end
|
577
|
+
|
578
|
+
# returns a hash of document fragments. mods are additional
|
579
|
+
# arguments.
|
580
|
+
# It is currently case-insensitive and searches for parts of
|
581
|
+
# words. SQL needs to be altered below to change this via +mods+
|
582
|
+
def get_search_fragments_index(word, mods = {})
|
583
|
+
wrap = mods[:wrap_both] || 0
|
584
|
+
|
585
|
+
query = Schema::RINDEX_SEARCH_MODEL_QUERY.dup()
|
586
|
+
|
587
|
+
word = word.gsub(/'/, "''") + "%"
|
588
|
+
# if "there" shouldn't be matched when searching with "the"
|
589
|
+
if mods[:whole_word]
|
590
|
+
query.sub!(/LIKE :search/, "= :search")
|
591
|
+
word.sub!(/\%$/, '')
|
592
|
+
end
|
593
|
+
|
594
|
+
# SQLite GLOB is case-sensitive, LIKE isn't
|
595
|
+
if mods[:case_sensitive]
|
596
|
+
query.sub!(/LOWER\(category.catname\)/, 'category.catname')
|
597
|
+
query.sub!(/LIKE :search/, "GLOB :search")
|
598
|
+
word.sub!(/\%/, '*') # for GLOB
|
599
|
+
end
|
600
|
+
|
601
|
+
# something to hold the results
|
602
|
+
vectors = FragmentTable.new()
|
603
|
+
|
604
|
+
@dbh.transaction do
|
605
|
+
# old-style ? bind params seem to work better with sqlite-2
|
606
|
+
params = [ wrap, wrap, wrap, wrap, MAGIC_REV_INDEX_ID, word ]
|
607
|
+
@dbh.execute(query, *params) do | r |
|
608
|
+
f = Fragment.new( r['snip'], r['doctitle'],
|
609
|
+
r['start_at'].to_i, r['docid'].to_i )
|
610
|
+
vectors.add(f)
|
611
|
+
end
|
612
|
+
end
|
613
|
+
|
614
|
+
vectors
|
615
|
+
end
|
616
|
+
|
617
|
+
# destructively reinstalls the schema
|
618
|
+
def install_clean()
|
619
|
+
if @dbfile and FileTest.exist?(@dbfile)
|
620
|
+
@dbh.close()
|
621
|
+
File.delete(@dbfile)
|
622
|
+
@dbh = Database.new(@dbfile)
|
623
|
+
end
|
624
|
+
|
625
|
+
# transaction make a big speed difference here
|
626
|
+
@dbh.transaction do
|
627
|
+
@dbh.execute_batch(Schema::SCHEMA_TABLES)
|
628
|
+
@dbh.execute_batch(Schema::SCHEMA_TRIGGERS)
|
629
|
+
@dbh.execute_batch(Schema::SCHEMA_INDEXES)
|
630
|
+
end
|
631
|
+
end
|
632
|
+
end
|
633
|
+
end
|