weft-qda 0.9.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/lib/weft.rb +21 -0
  2. data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
  3. data/lib/weft/application.rb +130 -0
  4. data/lib/weft/backend.rb +39 -0
  5. data/lib/weft/backend/marshal.rb +26 -0
  6. data/lib/weft/backend/mysql.rb +267 -0
  7. data/lib/weft/backend/n6.rb +366 -0
  8. data/lib/weft/backend/sqlite.rb +633 -0
  9. data/lib/weft/backend/sqlite/category_tree.rb +104 -0
  10. data/lib/weft/backend/sqlite/schema.rb +152 -0
  11. data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
  12. data/lib/weft/category.rb +157 -0
  13. data/lib/weft/coding.rb +355 -0
  14. data/lib/weft/document.rb +118 -0
  15. data/lib/weft/filters.rb +243 -0
  16. data/lib/weft/wxgui.rb +687 -0
  17. data/lib/weft/wxgui/category.xpm +26 -0
  18. data/lib/weft/wxgui/dialogs.rb +128 -0
  19. data/lib/weft/wxgui/document.xpm +25 -0
  20. data/lib/weft/wxgui/error_handler.rb +52 -0
  21. data/lib/weft/wxgui/inspectors.rb +361 -0
  22. data/lib/weft/wxgui/inspectors/category.rb +165 -0
  23. data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
  24. data/lib/weft/wxgui/inspectors/document.rb +139 -0
  25. data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
  26. data/lib/weft/wxgui/inspectors/script.rb +35 -0
  27. data/lib/weft/wxgui/inspectors/search.rb +265 -0
  28. data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
  29. data/lib/weft/wxgui/lang.rb +17 -0
  30. data/lib/weft/wxgui/lang/en.rb +45 -0
  31. data/lib/weft/wxgui/mondrian.xpm +44 -0
  32. data/lib/weft/wxgui/search.xpm +25 -0
  33. data/lib/weft/wxgui/sidebar.rb +498 -0
  34. data/lib/weft/wxgui/utilities.rb +148 -0
  35. data/lib/weft/wxgui/weft16.xpm +31 -0
  36. data/lib/weft/wxgui/workarea.rb +249 -0
  37. data/test/001-document.rb +196 -0
  38. data/test/002-category.rb +138 -0
  39. data/test/003-code.rb +370 -0
  40. data/test/004-application.rb +52 -0
  41. data/test/006-filters.rb +139 -0
  42. data/test/009a-backend_sqlite_basic.rb +280 -0
  43. data/test/009b-backend_sqlite_complex.rb +175 -0
  44. data/test/009c_backend_sqlite_bench.rb +81 -0
  45. data/test/010-backend_nudist.rb +5 -0
  46. data/test/all-tests.rb +1 -0
  47. data/test/manual-gui-script.txt +24 -0
  48. data/test/testdata/autocoding-test.txt +15 -0
  49. data/test/testdata/iso-8859-1.txt +5 -0
  50. data/test/testdata/sample_doc.txt +19 -0
  51. data/test/testdata/search_results.txt +1254 -0
  52. data/test/testdata/text1-dos-ascii.txt +2 -0
  53. data/test/testdata/text1-unix-utf8.txt +2 -0
  54. data/weft-qda.rb +28 -0
  55. metadata +96 -0
@@ -0,0 +1,366 @@
1
+ # Read-only storage backend using N6's native file format
2
+ module QDA
3
+ module Backend
4
+ module N6
5
+ # number of seconds between 1/1/1900 and 1/1/1970
6
+ SEVENTY_YEARS = 2208985187
7
+
8
+ def start(args)
9
+ @basedir = args[:basedir]
10
+ @npr = NPReader.new()
11
+ @doc_dbid_counter = 0
12
+ load_docs()
13
+ load_nodes()
14
+ end
15
+
16
+ def get_all_docs
17
+ @docs
18
+ end
19
+ def get_doc(title)
20
+ @docs.find { | d | d.title == title }
21
+ end
22
+
23
+ def get_all_categories
24
+ @root_node
25
+ end
26
+
27
+ private
28
+ PARSE_DOCS = /^\("((?:[^"]|\\")*)" (\d+) (\d+) (?#
29
+ )(NIL|(?:\([0-9 ]+\))) (?#
30
+ )\((\d{10}) \. (\d{10})\) (?#
31
+ )"((?:[^"]|\\")*)"/
32
+
33
+ def load_docs()
34
+ @docs = []
35
+ File.foreach(File.join(@basedir, 'DATABASE/docsys') ) do | docline |
36
+ if matches = PARSE_DOCS.match(docline)
37
+ doc_id = matches[7]
38
+
39
+ parsed = {
40
+ :title => matches[1],
41
+ :external => ( matches[2] == "0" ? true : false ),
42
+ :unitlength => matches[3],
43
+ :crt_date => matches[5],
44
+ :mod_date => matches[6],
45
+ :doc_id => matches[7],
46
+ :source => self
47
+ }
48
+ docfacts = nil
49
+ fragments = []
50
+
51
+ # read the breaking of the documents into chunks
52
+ File.open(File.join(@basedir,
53
+ 'DATABASE/DOCFACTS',
54
+ doc_id ) ) do | dff |
55
+ docfacts = @npr.parse(dff.read)
56
+ end
57
+
58
+ # read the source document in chunks defined in docfacts
59
+ # not sure why each fragment seems to be made up of two
60
+ # chunks
61
+ File.open( File.join(@basedir,
62
+ 'DATABASE/DOCFILES',
63
+ doc_id ) ) do | df |
64
+ docfacts.values.each do | chunk |
65
+ # bit 1 - seems longer
66
+ fragments.push(df.read(chunk.values[0].to_i))
67
+ # bit 2 - generally shorter
68
+ fragments[-1] << df.read(chunk.values[2].to_i)
69
+ fragments[-1].sub(/[\n\r]+$/, '')
70
+ end
71
+ end
72
+ parsed[:fragments] = fragments
73
+
74
+ memo = ''
75
+ if FileTest.exist?( File.join( @basedir,
76
+ 'DATABASE/DOCMEMS',
77
+ doc_id) )
78
+ File.open(File.join(@basedir,
79
+ 'DATABASE/DOCMEMS',
80
+ doc_id) ) do | dm |
81
+ memo = dm.read()
82
+ end
83
+ end
84
+ crt_date = Time.at( parsed[:crt_date].to_i - SEVENTY_YEARS )
85
+ mod_date = Time.at( parsed[:mod_date].to_i - SEVENTY_YEARS )
86
+
87
+ doc = Document.new(parsed[:title], '', memo,
88
+ crt_date, mod_date)
89
+
90
+ if parsed[:external]
91
+ doc.instance_eval { @external = true }
92
+ end
93
+
94
+ class << doc
95
+ def chunks_to_vector(start, length)
96
+ return 0, 0 if @external
97
+
98
+ this_start = @chunkmap[0, start].inject(0) do | tot, ck |
99
+ tot += ck
100
+ end
101
+ this_offset = @chunkmap[start, length].inject(0) do | tot, ck |
102
+ tot += ck
103
+ end
104
+
105
+ return this_start, this_offset
106
+ end
107
+ def append(text)
108
+ @chunkmap ||= []
109
+ len = super(text)
110
+ @chunkmap.push(len)
111
+ end
112
+ end
113
+ doc.dbid = next_doc_dbid
114
+
115
+ fragments.each do | frag |
116
+ doc.append(frag)
117
+ end
118
+ @docs.push(doc)
119
+ end
120
+ end
121
+ end
122
+
123
+ def next_doc_dbid
124
+ token = @doc_dbid_counter += 1
125
+ # token.to_s
126
+ end
127
+
128
+ PARSE_INDEX = /^\((\d+|NIL) "([^"]*)" (?#
129
+ )(?:(NIL)|(?:"((?:[^"]|\\")*)")) (?# comment
130
+ )(?:(NIL)|(?:"((?:[^"]|\\")*)")) (?# "memoreference
131
+ )\((\d{10}) \. (\d{10})\) (.*?) (\d+)\) $/
132
+ def load_nodes()
133
+ @nodes = []
134
+ curr_node = nil
135
+ @root_node = nil
136
+ content = ''
137
+ pending = ''
138
+
139
+ # hash of nodes -> number of pending children
140
+ tree_kids = {}
141
+ File.foreach( File.join(@basedir,
142
+ 'DATABASE/indexsys') ) do | line |
143
+ next if line =~ /^\s*$/
144
+
145
+ unless matches = PARSE_INDEX.match(pending + line)
146
+ pending += line
147
+ next
148
+ end
149
+
150
+ pending = ''
151
+
152
+ parsed = {
153
+ :child_id => matches[1],
154
+ :title => matches[2],
155
+ :comment => matches[3] || matches[4],
156
+ :memo => '',
157
+
158
+ :big_fig1 => matches[7],
159
+ :big_fig2 => matches[8],
160
+
161
+ :codes => matches[9],
162
+ :ccount => matches[10],
163
+ :source => self }
164
+
165
+ # load the node memo if there is one
166
+ if matches[6]
167
+ memofile = File.join(@basedir, 'DATABASE/NODEMEMS', matches[6])
168
+ # they don't necessarily have a memo file, in which case memo = ''
169
+ if FileTest.exist?(memofile)
170
+ File.open(memofile) { | nm | parsed[:memo] = nm.read() }
171
+ end
172
+ end
173
+
174
+ # create a node object to represent the code
175
+ # first of all, see if it's the root node (no parent)
176
+ if curr_node.nil?
177
+ @root_node = Category.new(parsed[:title], nil)
178
+ # yuk
179
+ load_node_codes(@root_node,
180
+ parsed[:codes])
181
+
182
+ @nodes.push(@root_node)
183
+ # root node is always looking for additional children -
184
+ # the default parent.
185
+ tree_kids[@root_node] = -1
186
+ curr_node = @root_node
187
+ else # it's not the root node
188
+ # go back up the tree until we find a node which needs
189
+ # more children
190
+ until tree_kids[curr_node] != 0
191
+ curr_node = curr_node.parent
192
+ end
193
+ tree_kids[curr_node] -= 1
194
+
195
+ parsed[:parent] = curr_node
196
+ new_node = Category.new(parsed[:title], parsed[:parent])
197
+ load_node_codes(new_node, parsed[:codes])
198
+
199
+ # get ready for the next round
200
+ tree_kids[new_node] = parsed[:ccount].to_i
201
+ curr_node = new_node
202
+ @nodes.push(new_node)
203
+ end
204
+ end
205
+ end
206
+
207
+
208
+ # parse which parts of which documents are coded by category +category+
209
+ def load_node_codes(category, codesrc)
210
+ return if codesrc == 'NIL'
211
+ unless parsed = @npr.parse(codesrc)
212
+ raise "cannot parse #{codesrc} for node codes"
213
+ end
214
+
215
+ codes = {}
216
+ parsed.values.each do | val |
217
+ # val.values[0] is the title of the coded document
218
+ doc = get_doc(val.values[0])
219
+ # the segments of the document which are coded
220
+ val.values[1..-1].each do | code |
221
+ start = code.values[0].to_i
222
+ length = code.values[2].to_i - start + 1
223
+ # we have to convert these chunks into character-indexes
224
+ conv = doc.chunks_to_vector(start, length)
225
+ # can be 0-length coded when applied to external doc
226
+ if conv[1] == 0
227
+ next
228
+ end
229
+ category.code( doc.dbid, *conv )
230
+ end
231
+ end
232
+ end
233
+ end
234
+
235
+ # NPReader.rb - Nested parenthesis (LISP syntax) parser.
236
+ # Copyright (C) 2001 Gordon James Miller
237
+
238
+ # This library is free software; you can redistribute it and/or
239
+ # modify it under the terms of the GNU Lesser General Public License
240
+ # as published by the Free Software Foundation; either version 2.1
241
+ # of the License, or (at your option) any later version.
242
+
243
+ # A container for strings that reside between matching parenthesis. Each
244
+ # instance of this class contains a list of values that are either
245
+ # strings or other NPNode objects representing nesting.
246
+
247
+ class NPNode
248
+ @@NORMAL_MODE = 0
249
+ @@QUOTE_MODE = 1
250
+
251
+ # The members of this group.
252
+ attr_reader :values
253
+
254
+ # The parent of this group.
255
+ attr_reader :parent
256
+
257
+ # The level of this group. This is set by the constructor and is
258
+ # based upon the level of the parent.
259
+ attr_reader :level
260
+
261
+ # The state of this group. This will either be NORMAL or QUOTE.
262
+ # In QUOTE, whitespace is absorbed.
263
+ attr_reader :state
264
+
265
+ def close
266
+ if ( @curval.size > 0 ) then
267
+ @values.push( @curval.to_s )
268
+ @curval.clear
269
+ end
270
+ end
271
+
272
+ # Initialize a new instance that has the specified parent. The
273
+ # parent can be nil, in which case it is assumed that this is
274
+ # the top level node. If the parent is not nil, then this
275
+ # object is added as a child to the parent.
276
+ def initialize (parent)
277
+
278
+ @parent = parent
279
+
280
+ if ( @parent != nil ) then
281
+ @parent.push(self)
282
+ end
283
+
284
+ @state = @@NORMAL_MODE
285
+ @level = ( parent == nil ) ? 0 : @parent.level + 1
286
+
287
+ @values = Array.new
288
+ @curval = Array.new
289
+ end
290
+
291
+ def push(ch)
292
+ if ( ch.kind_of? NPNode ) then
293
+ # If this is an NPNode instance, just add it to the end of the
294
+ # values array.
295
+ @values.push(ch)
296
+
297
+ else
298
+ if ( ch == "'" || ch == "\"" ) then
299
+ # If this is a double quote, then we have to start a new
300
+ # value, toggle the mode.
301
+ @state = ( @state + 1 ) % 2
302
+ close
303
+
304
+ elsif ( (@state != @@QUOTE_MODE) && ch =~ /[ \t\n\r]/ ) then
305
+ # If this is a whitespace character and the length is
306
+ # greater than zero, push the current value on the values
307
+ # array and clear the current values string.
308
+ close
309
+
310
+ else
311
+ # Otherwise go ahead and push the character onto the end of
312
+ # the current value.
313
+ @curval.push( ch )
314
+ end
315
+ end
316
+ end
317
+
318
+ def to_s
319
+ str = Array.new()
320
+ return "(#{@values.join(' ')})"
321
+ end
322
+ end
323
+
324
+ # An implementation of a nested parenthesis reader. This implementation
325
+ # constructs an AST from a data source.
326
+ class NPReader
327
+
328
+ # Initialize a new instance of the reader. This does not start the
329
+ # parsing, that is done with the parse method.
330
+ def initialize ()
331
+ end
332
+
333
+ # Parse the data contained in the string and return the reference to the
334
+ # top level group.
335
+ def parse (string)
336
+
337
+ @curgroup = nil
338
+ @top = nil
339
+
340
+ string.each_byte { |byte|
341
+
342
+ ch = byte.chr
343
+
344
+ if ( ch == '(' ) then
345
+ @curgroup = NPNode.new(@curgroup)
346
+ if ( @top == nil ) then
347
+ @top = @curgroup
348
+ end
349
+
350
+ elsif ( ch == ')' ) then
351
+ @curgroup.close
352
+ @curgroup = @curgroup.parent
353
+
354
+ else
355
+ if ( @curgroup != nil ) then
356
+ @curgroup.push(ch)
357
+ end
358
+
359
+ end
360
+ }
361
+
362
+ return @top
363
+ end
364
+ end
365
+ end
366
+ end
@@ -0,0 +1,633 @@
1
+ require 'sqlite'
2
+ # require 'sqlite3'
3
+ require 'strscan'
4
+ require 'tempfile'
5
+ require 'fileutils'
6
+ require 'base64'
7
+ require 'rexml/document'
8
+ # require 'iconv'
9
+
10
+ module QDA
11
+ # Storage backend using SQLite module - can use SQlite 3 or SQLite, but
12
+ # currently problems with SQLite 3 and non-ASCII characters. Will pick
13
+ # up whether sqlite or sqlite3 is available.
14
+ module Backend::SQLite
15
+
16
+ require 'weft/backend/sqlite/schema.rb'
17
+ require 'weft/backend/sqlite/upgradeable.rb'
18
+ require 'weft/backend/sqlite/category_tree.rb'
19
+ include Upgradeable
20
+
21
+ # if working with sqlite v2 with the sqlite-ruby v2, we need a
22
+ # couple of compatibility tweaks.
23
+ if defined?(::SQLite)
24
+ SQLITE_DB_CLASS = ::SQLite::Database
25
+ # Ruby-SQLite3 statements have a close() method, but Ruby-SQLite
26
+ # v 2 don't - so we supply a dummy method for when using v2
27
+ class ::SQLite::Statement
28
+ def close(); end
29
+ end
30
+ # SQLite3 introduced this more ruby-ish notation
31
+ class ::SQLite::Database::FunctionProxy
32
+ alias :result= :set_result
33
+ end
34
+ elsif defined?(::SQLite3)
35
+ SQLITE_DB_CLASS = ::SQLite3::Database
36
+ else
37
+ raise LoadError, "No SQlite database class loaded"
38
+ end
39
+
40
+ class Database < SQLITE_DB_CLASS
41
+ def initialize(file)
42
+ # super(file, :driver => "Native")
43
+ super(file)
44
+ self.results_as_hash = true
45
+ # self.type_translation = true
46
+ end
47
+
48
+ def undo_action()
49
+ @dbh.transaction do
50
+ @dbh.execute("SELECT * FROM undoable WHERE step = 1
51
+ ORDER BY step, actionid DESC") do | task |
52
+ @dbh.execute(task[0])
53
+ end
54
+ @dbh.execute("UPDATE undoable SET step = step -1")
55
+ @dbh.execute("DELETE FROM undoable WHERE step = 0")
56
+ end
57
+ end
58
+
59
+ def redo_action()
60
+ transaction do
61
+ execute("SELECT * FROM undoable WHERE step = -1
62
+ ORDER BY step, actionid DESC") do | task |
63
+ execute(task[0])
64
+ end
65
+ execute("DELETE FROM undoable WHERE step = -1")
66
+ execute("UPDATE undoable SET step = step + 1")
67
+ end
68
+ end
69
+
70
+ def date_freeze(date)
71
+ date ? date.strftime('%Y-%m-%d %H:%M:%S') : ''
72
+ end
73
+
74
+ def date_thaw(str)
75
+ return nil if str.empty?
76
+ return Time.local( *str.split(/[- :]/) )
77
+ end
78
+ end
79
+
80
+ attr_reader :dbh, :dbfile
81
+
82
+ # load up the database connection. A hash argument containing the
83
+ # key :dbfile should be supplied. If this is +nil+, then a
84
+ # temporary storage will be used
85
+ def start(args)
86
+ if ! args.key?(:dbfile)
87
+ raise ArgumentError, "Must specify SQLite dbfile to load from"
88
+ end
89
+
90
+ @dbfile = args[:dbfile]
91
+ if @dbfile and ! File.exists?(@dbfile)
92
+ raise RuntimeError, "Tried to open an non-existent database"
93
+ end
94
+
95
+ tmp_fname = @dbfile ? File::basename(@dbfile) : 'Weft'
96
+ tmpfile = Tempfile.new(tmp_fname || 'Weft')
97
+ tmpfile.close(false) # don't delete
98
+
99
+ @tmpfile = tmpfile.path
100
+ if @dbfile
101
+ FileUtils.copy(@dbfile, @tmpfile)
102
+ end
103
+ @dbh = Database.new(@tmpfile)
104
+ # if opening from an existing file, check and do any upgrding
105
+ # required from older versions
106
+ do_version_format_upgrading() if @dbfile
107
+ undirty!
108
+ end
109
+
110
+ def connect(args)
111
+ @dbh = args[:dbh]
112
+ end
113
+
114
+ def end(force = false)
115
+ @cat_tree = nil
116
+ @dbh.close()
117
+ end
118
+
119
+ def save(target = @dbfile)
120
+ if target.nil?
121
+ raise RuntimeError,
122
+ "No previously saved file, and no named supplied for save"
123
+ end
124
+ @dbh.close
125
+ @dbfile = target
126
+ FileUtils.copy(@tmpfile, @dbfile)
127
+ @dbh = Database.new(@tmpfile)
128
+ undirty!
129
+ end
130
+
131
+ # roll the current state back to the last-saved state.
132
+ def revert()
133
+ @dbh.close()
134
+ FileUtils.copy(@dbfile, @tmpfile)
135
+ @dbh = Database.new(@tmpfile)
136
+ end
137
+
138
+
139
+ # hint to do the next series of actions as a batch
140
+ def batch
141
+ @dbh.transaction { yield }
142
+ end
143
+
144
+ def cat_tree
145
+ return @cat_tree if @cat_tree
146
+ xml = @dbh.get_first_value("SELECT xml FROM category_structure")
147
+ if xml and xml.length > 0
148
+ @cat_tree = CategoryTree.load(xml)
149
+ else
150
+ @cat_tree = CategoryTree.new()
151
+ end
152
+ end
153
+
154
+ # private :cat_tree
155
+ # get every doc
156
+ def get_all_docs()
157
+ docs = []
158
+ @dbh.execute("SELECT doctitle, docid FROM document") do | row |
159
+ doc = Document.new(row['doctitle'])
160
+ doc.dbid = row['docid'].to_i
161
+ docs.push(doc)
162
+ end
163
+ docs
164
+ end
165
+
166
+ # fetch the document identified by the string ident
167
+ def get_doc(ident)
168
+ doc = nil
169
+ @dbh.transaction do
170
+ stmt = nil
171
+ if ident.kind_of?(Fixnum) || ident =~ /^\d+$/
172
+ stmt = @dbh.prepare("SELECT * FROM document WHERE docid = ?")
173
+ else
174
+ stmt = @dbh.prepare("SELECT * FROM document WHERE doctitle = ?")
175
+ end
176
+ stmt.execute!(ident) do | r |
177
+ doc = Document.new(r['doctitle'].dup,
178
+ r['doctext'].dup,
179
+ r['docmemo'].dup,
180
+ @dbh.date_thaw(r['created_date']),
181
+ @dbh.date_thaw(r['modified_date']) )
182
+ doc.dbid = r['docid'].to_i
183
+ end
184
+ raise "Not found: #{ident}" if doc.nil?
185
+
186
+ stmt.close()
187
+ end
188
+ return doc
189
+ end
190
+ alias :get_document :get_doc
191
+
192
+ def save_preference(pref_name, pref_value)
193
+ frozen_value = Base64.encode64( Marshal.dump( pref_value) )
194
+ @dbh.transaction do
195
+ @dbh.execute("INSERT OR REPLACE INTO app_preference
196
+ VALUES (?, ?)",
197
+ pref_name, frozen_value )
198
+ end
199
+ dirty!
200
+ end
201
+
202
+ def get_preference(pref_name)
203
+ frozen_pref = nil
204
+ @dbh.transaction do
205
+ @dbh.execute("SELECT value FROM app_preference
206
+ WHERE name = ? ", pref_name ) do | r |
207
+ frozen_pref = r['value']
208
+ end
209
+ end
210
+ return nil if frozen_pref.nil?
211
+ return Marshal.load( Base64.decode64(frozen_pref) )
212
+ end
213
+
214
+ def save_document(doc)
215
+ raise TypeError unless doc.kind_of? QDA::Document
216
+ @dbh.transaction { _save_document(doc) }
217
+ dirty!
218
+ doc
219
+ end
220
+
221
+ def _save_document(doc)
222
+ if doc.dbid
223
+ @dbh.execute("UPDATE document
224
+ SET doctitle = ?, doctext = ?,
225
+ docmemo = ?, modified_date = ?
226
+ WHERE docid = ? ",
227
+ doc.title, doc.text, doc.memo,
228
+ @dbh.date_freeze( Time.now() ),
229
+ doc.dbid)
230
+ else
231
+ @dbh.execute("INSERT INTO document
232
+ VALUES(NULL, ?, ?, ?, ?, ?)",
233
+ doc.title, doc.text, doc.memo,
234
+ @dbh.date_freeze(doc.create_date),
235
+ @dbh.date_freeze( Time.now() ) )
236
+ doc.dbid = @dbh.last_insert_row_id().to_i
237
+ end
238
+ end
239
+
240
+ # delete teh document identified by +dbid+ from the database
241
+ def delete_document(dbid)
242
+ @dbh.transaction do
243
+ @dbh.execute("DELETE FROM document WHERE docid = ?", dbid)
244
+ end
245
+ dirty!
246
+ end
247
+
248
+ # retrieve the category with the internal id +catid+, along with
249
+ # its codes. If +get_structure+ is set to a true value then the
250
+ # category's children will also be retrieved from the database
251
+ def get_category(catid, get_structure = false)
252
+ catid = catid.to_i if catid =~ /^\d+$/
253
+ raise "Invalid id #{catid.inspect}" unless catid.kind_of?(Fixnum)
254
+
255
+ category = nil
256
+ stmt = @dbh.prepare("SELECT * FROM category WHERE catid = ?")
257
+ stmt.execute!(catid) do | r |
258
+ parent = get_category_parent(catid)
259
+ category = Category.new(r['catname'], parent, r['catdesc'])
260
+ category.dbid = catid
261
+ end
262
+ raise "No category found matching id '#{catid}'" unless category
263
+ stmt.close()
264
+
265
+ get_codes_for_category(category)
266
+ get_and_build_children(category) if get_structure
267
+ category
268
+ end
269
+
270
+ # gets the root category named +name+
271
+ def get_root_category(name)
272
+ root = cat_tree.roots.find { | r | r.name == name }
273
+ raise "Not found, root category #{name.inspect}" unless root
274
+ return get_category(root.dbid)
275
+ end
276
+
277
+ # fetch categories by relative or absolute paths. Returns an
278
+ # array of categories
279
+ def get_categories_by_path(path)
280
+ # cos it should be quicker ...
281
+ if path =~ /\//
282
+ return cat_tree.find(path).map do | found |
283
+ get_category(found.dbid)
284
+ end
285
+ else
286
+ return get_categories_by_name(path)
287
+ end
288
+ end
289
+
290
+ # fetch categories by partial names. This is currently
291
+ # case-insensitive by default. Returns an array of categories
292
+ # whose names match.
293
+ def get_categories_by_name(namebit, insensitive = true)
294
+ stmt = nil
295
+ if insensitive
296
+ stmt = @dbh.prepare("SELECT catid FROM category
297
+ WHERE UPPER(catname) LIKE ?
298
+ AND parent >= 0" )
299
+ namebit = namebit.upcase
300
+ else
301
+ stmt = @dbh.prepare("SELECT catid FROM category
302
+ WHERE catname GLOB ?
303
+ AND parent >= 0" )
304
+
305
+ end
306
+ categories = []
307
+ @dbh.transaction do
308
+ stmt.execute!(namebit + "%") do | r |
309
+ categories.push( get_category( r['catid'] ) )
310
+ end
311
+ stmt.close()
312
+ end
313
+ return categories
314
+ end
315
+
316
+ def is_descendant?(ancestor, descendant)
317
+ cat_tree.is_descendant?(ancestor.dbid, descendant.dbid)
318
+ end
319
+
320
+ # builds the tree structure below +category+, modifying
321
+ # +category+ in place. After this call, the retrieved structure
322
+ # is available as the +children+ property of the category.
323
+ def get_and_build_children(category)
324
+ # this duplicates stuff below
325
+ append_f = Proc.new do | parent, elem |
326
+ cat = Category.new(elem.name, parent)
327
+ cat.dbid = elem.dbid
328
+ elem.children { | c | append_f.call(cat, c) }
329
+ end
330
+
331
+ cat_tree[category.dbid].children do | first_child |
332
+ append_f.call(category, first_child)
333
+ end
334
+ end
335
+ private :get_and_build_children
336
+
337
+ # applies the codes to category +cat+
338
+ def get_codes_for_category(cat)
339
+ @dbh.execute("SELECT docid, offset, length
340
+ FROM code
341
+ WHERE catid = ? ", cat.dbid) do | row |
342
+ cat.code( row['docid'].to_i,
343
+ row['offset'].to_i,
344
+ row['length'].to_i )
345
+ end
346
+ return cat
347
+ end
348
+
349
+ # looks up the string indices of the document and returns the
350
+ # appropriate text fragments as an array
351
+ # returns a hash keyed on document title where the values are an
352
+ # array of fragments in order of offset from the start of the document
353
+ def get_text_at_category(cat)
354
+ vectors = FragmentTable.new()
355
+ @dbh.execute("SELECT document.doctitle AS doctitle,
356
+ code.docid AS docid,
357
+ code.offset AS offset, code.length,
358
+ SUBSTR(document.doctext,
359
+ code.offset + 1, code.length) AS fragment
360
+ FROM document, code
361
+ WHERE code.catid = ?
362
+ AND code.docid = document.docid
363
+ ORDER BY code.docid, code.offset", cat.dbid ) do | r |
364
+ vectors.add( Fragment.new( r['fragment'],
365
+ r['doctitle'],
366
+ r['offset'].to_i,
367
+ r['docid'].to_i ) )
368
+ end
369
+ vectors
370
+ end
371
+
372
+ def get_category_parent(catid)
373
+ if cat_tree[catid].parent
374
+ return get_category(cat_tree[catid].parent)
375
+ else
376
+ return nil
377
+ end
378
+ end
379
+
380
+ # fetches all the categories in a tree structure, starting from the right
381
+ def get_all_categories()
382
+ build_cat = Proc.new do | elem, parent |
383
+ cat = Category.new( elem.name, parent )
384
+ cat.dbid = elem.dbid
385
+ elem.children.each { | ch | build_cat.call(ch, cat) }
386
+ cat
387
+ end
388
+ # return the Category Tree (internal storage) as a tree of
389
+ # actual Categories
390
+ cat_tree.roots.map { | root | build_cat.call(root, nil) }
391
+ end
392
+
393
+ # saves the category
394
+ def save_category(cat)
395
+ @dbh.transaction { _save_category(cat) }
396
+ dirty!
397
+ cat
398
+ end
399
+
400
+ def _save_category(cat)
401
+ # only resave the tree structure if nec,
402
+ xml_needs_update = false
403
+
404
+ # updating an existing category
405
+ if cat.dbid
406
+ # check for re-parenting or renaming
407
+ child = cat_tree[cat.dbid]
408
+
409
+ if child.parent != cat.parent.dbid
410
+ cat_tree.move(child.dbid, cat.parent.dbid)
411
+ xml_needs_update = true
412
+ end
413
+
414
+ if child.name != cat.name
415
+ child.name = cat.name
416
+ xml_needs_update = true
417
+ end
418
+ @dbh.execute("DELETE FROM code WHERE catid = ?", cat.dbid)
419
+ @dbh.execute("UPDATE category
420
+ SET catname = ?,
421
+ catdesc = ?,
422
+ parent = ?,
423
+ modified_date = ?
424
+ WHERE catid = ? ",
425
+ cat.name,
426
+ cat.memo,
427
+ ( cat.parent ? cat.parent.dbid : nil),
428
+ @dbh.date_freeze( Time.now ),
429
+ cat.dbid)
430
+ # adding a new category
431
+ else
432
+ parentid = cat.parent ? cat.parent.dbid : nil
433
+ @dbh.execute("INSERT INTO category
434
+ VALUES(NULL, ?, ?, ?, ?, ?)",
435
+ cat.name, cat.memo, parentid,
436
+ @dbh.date_freeze( Time.now ),
437
+ @dbh.date_freeze( Time.now ) )
438
+ cat.dbid = @dbh.last_insert_row_id().to_i
439
+
440
+ if cat.parent
441
+ cat_tree.add(cat.parent.dbid, cat.dbid, cat.name)
442
+ else
443
+ cat_tree.add(nil, cat.dbid, cat.name)
444
+ end
445
+ xml_needs_update = true
446
+ end
447
+
448
+ stmt_code = @dbh.prepare("INSERT INTO code VALUES(?, ?, ?, ?)")
449
+ cat.codes.each do | docid, vecs |
450
+ vecs.each do | vec |
451
+ stmt_code.execute( cat.dbid, vec.docid, vec.offset, vec.length )
452
+ end
453
+ end
454
+ stmt_code.close()
455
+
456
+ if xml_needs_update
457
+ @dbh.execute( "UPDATE category_structure SET xml = ? ",
458
+ cat_tree.serialise())
459
+ end
460
+ end
461
+
462
+ # deletes the category +category+. If +recursive+ is false then
463
+ # any children of +category+ will be reattached to the deleted
464
+ # category's parent. If +recursive+ is true (default), then all
465
+ # descendants will be deleted.
466
+ # Returns a list of categories that were actually deleted.
467
+ def delete_category(cat, recursive = true)
468
+ return unless cat.dbid
469
+ deleted_items = []
470
+ # TODO not all items being returned in list
471
+ if recursive
472
+ me = cat_tree[cat.dbid]
473
+ me.children.each do | child |
474
+ deleted_items += delete_category(child, true)
475
+ end
476
+ cat_tree.remove(cat.dbid)
477
+ deleted_items << cat
478
+ else
479
+ raise NotImplementedError,
480
+ 'Non-recursive deletion not implemented'
481
+ end
482
+ @dbh.transaction do
483
+ @dbh.execute("DELETE FROM category WHERE catid = ? ", cat.dbid)
484
+ xml = cat_tree.serialise()
485
+ @dbh.execute("UPDATE category_structure SET xml = ?", xml)
486
+ end
487
+ dirty!
488
+ return deleted_items
489
+ end
490
+
491
+ MAGIC_REV_INDEX_ID = -2
492
+ # adds the reverse indexes for +words+ to the existing reverse
493
+ # indexes.
494
+ def save_reverse_index(docid, words, prog_bar = nil)
495
+ stmt_wordid = @dbh.prepare("SELECT catid FROM category
496
+ WHERE catname = ? AND parent = ? ")
497
+ stmt_insert = @dbh.prepare("INSERT INTO category
498
+ VALUES(NULL, ?, ?, ?, ?, ?)")
499
+ stmt_code = @dbh.prepare("INSERT INTO code VALUES(?, ?, ?, ?)")
500
+
501
+ @dbh.transaction do
502
+ words.each do | word, locations |
503
+ wordid = nil
504
+ stmt_wordid.execute!(word, MAGIC_REV_INDEX_ID) do | r |
505
+ wordid = r[0] # get first value
506
+ end
507
+ unless wordid
508
+ stmt_insert.execute( word, '', MAGIC_REV_INDEX_ID,
509
+ @dbh.date_freeze( Time.now ),
510
+ @dbh.date_freeze( Time.now ) )
511
+ wordid = @dbh.last_insert_row_id().to_s
512
+ end
513
+
514
+ locations.each do | loc |
515
+ stmt_code.execute(wordid, docid, loc, word.length)
516
+ end
517
+ prog_bar.next() if prog_bar
518
+ end
519
+ end # transaction
520
+ [stmt_wordid, stmt_insert, stmt_code].each { | s | s.close() }
521
+ end
522
+
523
+
524
+ # Delete all reverse word indexes associated with +docid+
525
+ def drop_reverse_indexes(docid)
526
+ @dbh.transaction do
527
+ stmt_del = @dbh.prepare("DELETE FROM code
528
+ WHERE docid = ? AND catid IN
529
+ (SELECT catid
530
+ FROM category
531
+ WHERE parent = ?) ")
532
+ stmt_del.execute(docid, MAGIC_REV_INDEX_ID)
533
+ stmt_del.close()
534
+ end
535
+ end
536
+
537
+
538
+ # should work for latin-1 characters
539
+ WORD_PATTERN = /^[\w\xC0-\xD6\xD8-\xF6\xF8-\xFF][\w\xC0-\xD6\xD8-\xF6\xF8-\xFF']+$/s
540
+
541
+ # returns a hash of document fragments. mods are additional
542
+ # arguments
543
+ def get_search_fragments(term, mods = {})
544
+ if term !~ WORD_PATTERN
545
+ return get_search_fragments_scan(term, mods)
546
+ else
547
+ return get_search_fragments_index(term, mods)
548
+ end
549
+ end
550
+
551
+ def get_search_fragments_scan(term, mods = {})
552
+ vectors = FragmentTable.new()
553
+ wrap = mods[:wrap_both] || 0
554
+ @dbh.execute("SELECT * FROM document WHERE doctext LIKE ?",
555
+ "%#{term}%") do | r |
556
+ doc_title = r['doctitle']
557
+ doc_id = r['docid'].to_i
558
+
559
+ # compile a search regexp
560
+ rx = mods[:whole_word] ?
561
+ '\b\w*.{0,%i}\b%s\b.{0,%i}\w*?\b' % [wrap, term, wrap] :
562
+ '\b\w*.{0,%i}%s.{0,%i}\w*?\b' % [wrap, term, wrap]
563
+
564
+ search = mods[:case_sensitive] ?
565
+ Regexp.new(rx, Regexp::MULTILINE) :
566
+ Regexp.new(rx, Regexp::MULTILINE|Regexp::IGNORECASE)
567
+
568
+ scanner = StringScanner.new(r['doctext'])
569
+ while scanner.scan_until(search)
570
+ offset = scanner.pos - scanner.matched_size
571
+ f = Fragment.new(scanner.matched, doc_title, offset, doc_id)
572
+ vectors.add(f)
573
+ end
574
+ end
575
+ vectors
576
+ end
577
+
578
+ # returns a hash of document fragments. mods are additional
579
+ # arguments.
580
+ # It is currently case-insensitive and searches for parts of
581
+ # words. SQL needs to be altered below to change this via +mods+
582
+ def get_search_fragments_index(word, mods = {})
583
+ wrap = mods[:wrap_both] || 0
584
+
585
+ query = Schema::RINDEX_SEARCH_MODEL_QUERY.dup()
586
+
587
+ word = word.gsub(/'/, "''") + "%"
588
+ # if "there" shouldn't be matched when searching with "the"
589
+ if mods[:whole_word]
590
+ query.sub!(/LIKE :search/, "= :search")
591
+ word.sub!(/\%$/, '')
592
+ end
593
+
594
+ # SQLite GLOB is case-sensitive, LIKE isn't
595
+ if mods[:case_sensitive]
596
+ query.sub!(/LOWER\(category.catname\)/, 'category.catname')
597
+ query.sub!(/LIKE :search/, "GLOB :search")
598
+ word.sub!(/\%/, '*') # for GLOB
599
+ end
600
+
601
+ # something to hold the results
602
+ vectors = FragmentTable.new()
603
+
604
+ @dbh.transaction do
605
+ # old-style ? bind params seem to work better with sqlite-2
606
+ params = [ wrap, wrap, wrap, wrap, MAGIC_REV_INDEX_ID, word ]
607
+ @dbh.execute(query, *params) do | r |
608
+ f = Fragment.new( r['snip'], r['doctitle'],
609
+ r['start_at'].to_i, r['docid'].to_i )
610
+ vectors.add(f)
611
+ end
612
+ end
613
+
614
+ vectors
615
+ end
616
+
617
+ # destructively reinstalls the schema
618
+ def install_clean()
619
+ if @dbfile and FileTest.exist?(@dbfile)
620
+ @dbh.close()
621
+ File.delete(@dbfile)
622
+ @dbh = Database.new(@dbfile)
623
+ end
624
+
625
+ # transaction make a big speed difference here
626
+ @dbh.transaction do
627
+ @dbh.execute_batch(Schema::SCHEMA_TABLES)
628
+ @dbh.execute_batch(Schema::SCHEMA_TRIGGERS)
629
+ @dbh.execute_batch(Schema::SCHEMA_INDEXES)
630
+ end
631
+ end
632
+ end
633
+ end