weft-qda 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/lib/weft.rb +21 -0
  2. data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
  3. data/lib/weft/application.rb +130 -0
  4. data/lib/weft/backend.rb +39 -0
  5. data/lib/weft/backend/marshal.rb +26 -0
  6. data/lib/weft/backend/mysql.rb +267 -0
  7. data/lib/weft/backend/n6.rb +366 -0
  8. data/lib/weft/backend/sqlite.rb +633 -0
  9. data/lib/weft/backend/sqlite/category_tree.rb +104 -0
  10. data/lib/weft/backend/sqlite/schema.rb +152 -0
  11. data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
  12. data/lib/weft/category.rb +157 -0
  13. data/lib/weft/coding.rb +355 -0
  14. data/lib/weft/document.rb +118 -0
  15. data/lib/weft/filters.rb +243 -0
  16. data/lib/weft/wxgui.rb +687 -0
  17. data/lib/weft/wxgui/category.xpm +26 -0
  18. data/lib/weft/wxgui/dialogs.rb +128 -0
  19. data/lib/weft/wxgui/document.xpm +25 -0
  20. data/lib/weft/wxgui/error_handler.rb +52 -0
  21. data/lib/weft/wxgui/inspectors.rb +361 -0
  22. data/lib/weft/wxgui/inspectors/category.rb +165 -0
  23. data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
  24. data/lib/weft/wxgui/inspectors/document.rb +139 -0
  25. data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
  26. data/lib/weft/wxgui/inspectors/script.rb +35 -0
  27. data/lib/weft/wxgui/inspectors/search.rb +265 -0
  28. data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
  29. data/lib/weft/wxgui/lang.rb +17 -0
  30. data/lib/weft/wxgui/lang/en.rb +45 -0
  31. data/lib/weft/wxgui/mondrian.xpm +44 -0
  32. data/lib/weft/wxgui/search.xpm +25 -0
  33. data/lib/weft/wxgui/sidebar.rb +498 -0
  34. data/lib/weft/wxgui/utilities.rb +148 -0
  35. data/lib/weft/wxgui/weft16.xpm +31 -0
  36. data/lib/weft/wxgui/workarea.rb +249 -0
  37. data/test/001-document.rb +196 -0
  38. data/test/002-category.rb +138 -0
  39. data/test/003-code.rb +370 -0
  40. data/test/004-application.rb +52 -0
  41. data/test/006-filters.rb +139 -0
  42. data/test/009a-backend_sqlite_basic.rb +280 -0
  43. data/test/009b-backend_sqlite_complex.rb +175 -0
  44. data/test/009c_backend_sqlite_bench.rb +81 -0
  45. data/test/010-backend_nudist.rb +5 -0
  46. data/test/all-tests.rb +1 -0
  47. data/test/manual-gui-script.txt +24 -0
  48. data/test/testdata/autocoding-test.txt +15 -0
  49. data/test/testdata/iso-8859-1.txt +5 -0
  50. data/test/testdata/sample_doc.txt +19 -0
  51. data/test/testdata/search_results.txt +1254 -0
  52. data/test/testdata/text1-dos-ascii.txt +2 -0
  53. data/test/testdata/text1-unix-utf8.txt +2 -0
  54. data/weft-qda.rb +28 -0
  55. metadata +96 -0
@@ -0,0 +1,355 @@
1
+ module QDA
2
+ # Classes mixing-in should implement the offset, length, [x, y], and
3
+ # << methods
4
+ module Coding
5
+ def end()
6
+ offset + length
7
+ end
8
+
9
+ def include?(point)
10
+ if point.nil?
11
+ raise ArgumentError,
12
+ "Point should be an integer, got #{point.inspect}"
13
+ end
14
+ point >= self.offset && point < self.end()
15
+ end
16
+ alias :contains? :include?
17
+
18
+ # Returns true if self and +other+ overlap at any point - ie there
19
+ # is at least one character that is coded by both items
20
+ def overlap?(other)
21
+ first, second = [self, other].sort_by { | x | x.offset }
22
+ first.end > second.offset ? true : false
23
+ end
24
+
25
+ # Returns true if self and +other+ overlap or are contiguous
26
+ def touch?(other)
27
+ first, second = [self, other].sort_by { | x | x.offset }
28
+ first.end >= second.offset ? true : false
29
+ end
30
+
31
+ # note that no-argument version of Array#sort does *not* call this
32
+ # method
33
+ def <=>(other)
34
+ self.offset == other.offset ?
35
+ self.end <=> other.end :
36
+ self.offset <=> other.offset
37
+ end
38
+
39
+
40
+ def prepare_args(other)
41
+ # it should be a type that implements these methods
42
+ unless other.kind_of?(Coding)
43
+ raise ArgumentError,
44
+ "Cannot combine with #{other.inspect}, should implement Coding"
45
+ end
46
+
47
+ # if it's not the same class, we need to determine what kind of
48
+ # thing to return by combining the classes
49
+ if other.is_a?(self.class)
50
+ return self, other
51
+ else
52
+ return self.coerce(other), other.coerce(self)
53
+ end
54
+ end
55
+
56
+ # returns the code representing the intersection of +self+ and +other+
57
+ # returns nil if there is no overlap
58
+ def intersect(other)
59
+ # this represents self, possibly coerced into a different class
60
+ this, other = prepare_args(other)
61
+ unless this.overlap?(other)
62
+ return nil
63
+ end
64
+ sorted = QDA::CodeSet[ this, other ].sort
65
+ this_start = [ other.offset, this.offset ].max
66
+ this_end = [other.end, this.end ].min
67
+ fragment = sorted[0][this_start, this_end - this_start ]
68
+ end
69
+ alias :% :intersect
70
+
71
+ # returns a QDA::CodeSet created by removing the characters coded
72
+ # by +other+ from +self+. The returned CodeSet may be 0, 1 or 2
73
+ # elements long. The diagram below shows how 2 results may be
74
+ # returned.
75
+ #
76
+ # -----+++++++++++++++++------ # self
77
+ # -
78
+ # -----------++++++++--------- # exclude
79
+ # =
80
+ # -----++++++--------+++------ # result
81
+ def exclude(other)
82
+ this, other = prepare_args(other)
83
+ results = QDA::CodeSet[]
84
+ if offset < other.offset
85
+ if this.end < other.offset
86
+ results.add( this )
87
+ else
88
+ results.add( this[offset, other.offset - offset] )
89
+ end
90
+ end
91
+ if this.end > other.end
92
+ if this.offset > other.end
93
+ results.add(this)
94
+ else
95
+ results.add( this[other.end, this.end - other.end] )
96
+ end
97
+ end
98
+ return results
99
+ end
100
+ alias :- :exclude
101
+
102
+ # Returns the code produced by merging +self+ with +other+. If the
103
+ # two codes do not touch each other, then an CodeSet of the two codes
104
+ # is returned.
105
+ def union(other)
106
+ this, other = prepare_args(other)
107
+ return this if this == other
108
+ return QDA::CodeSet[ self, other ].sort unless touch?(other)
109
+
110
+ # if they overlap or touch, a single coding will be returned
111
+ first, second = QDA::CodeSet[ this, other ].sort
112
+ fragment = first.dup()
113
+ if second.end > first.end
114
+ fragment << second[first.end, second.end - first.end]
115
+ end
116
+ return fragment
117
+ end
118
+ alias :+ :union
119
+ end
120
+
121
+ # A collection of things that are +Coding+ - ie that mix-in the class
122
+ # above.
123
+ class CodeSet < Array
124
+ protected :<<, :push, :pop, :shift, :unshift
125
+
126
+ # Populate a new CodeSet from an array of coding items +arr+. These
127
+ # should either be QDA::Codes, QDA::Fragments or three-item arrays.
128
+ # Where the latter are found, they will be automatically turned into
129
+ # QDA::Code, taking the contents of each three-item array to be
130
+ # [+docid+, +offset+, +length+]
131
+ def initialize(arr = [])
132
+ arr.collect! do | item |
133
+ case item
134
+ when Array
135
+ Code.new(*item)
136
+ when Fragment, Code
137
+ item
138
+ else
139
+ raise ArgumentError, "unexpected item #{item} in list"
140
+ end
141
+ end
142
+ super(arr)
143
+ end
144
+
145
+ # iterate over each successive neighbouring pair of codings in
146
+ # the set, i.e. items 1, 2; items 2,3; items 3, 4 .. items n-1,
147
+ # n]. This is practically useful for +intersect+ but no other
148
+ # use at the moment.
149
+ def each_pair()
150
+ 0.upto( length - 2 ) { | i | yield self[i], self[i + 1] }
151
+ end
152
+
153
+ def sort()
154
+ block_given? ? super : super { | a, b | a <=> b }
155
+ end
156
+
157
+ def intersect(other)
158
+ results = CodeSet[]
159
+ sorted = CodeSet[ *(self + other).sort_by { | x | x.end } ]
160
+ sorted.each_pair { | a, b | results << a % b }
161
+ results.compact # return less nils
162
+ end
163
+
164
+ # add the extent covered by +code+ (a QDA::Code) to the set,
165
+ # modifying in place.
166
+ def add(code)
167
+ replace( union( CodeSet[code] ) )
168
+ end
169
+
170
+ # remove the extent covered by +uncode+ (a QDA::Code) from the set,
171
+ # modifying it in place.
172
+ def subtract(uncode)
173
+ replace( exclude( CodeSet[uncode] ) )
174
+ end
175
+
176
+ # returns the set produced by removing all extents covered by
177
+ # +other+, which should be a QDA::CodeSet. Note that unlike +union+
178
+ # and +intersect+
179
+ # self.exclude(other) != other.exclude(self)
180
+ def exclude(other)
181
+ return self if other.nil?
182
+ results = self.dup
183
+ other.each do | uncode |
184
+ results.collect! { | code | code - uncode }.flatten!
185
+ end
186
+ return results
187
+ end
188
+
189
+ # Returns the set produced by merging all the codes in this one
190
+ # with those in +other+, which should be a QDA::CodeSet
191
+ def union(other)
192
+ results = CodeSet[]
193
+ sorted = CodeSet[ *(self + other).sort_by { | f | f.end } ]
194
+
195
+ last_code = nil
196
+ sorted.each do | code |
197
+ if ! last_code
198
+ last_code = code
199
+ elsif last_code.touch?(code)
200
+ last_code = last_code + code
201
+ else
202
+ results.push(last_code)
203
+ last_code = code.dup
204
+ end
205
+ end
206
+ results.push(last_code)
207
+ return results
208
+ end
209
+ end
210
+
211
+ # a hash representing a complex series of codes applied to one or
212
+ # more documents
213
+ class CodingTable < Hash
214
+ def initialize
215
+ super { | h, k | h[k] = CodeSet.new() }
216
+ end
217
+
218
+ # should access using +add+ or +set+
219
+ # private :[]=
220
+ protected :[]=
221
+
222
+ # add the coding of +item+ to the coding table. +item+ should be
223
+ # a QDA::Code or QDA::Fragment.
224
+ def add(item)
225
+ self[item.docid].add(item)
226
+ end
227
+
228
+ # Sets the coding of the document identified by +docid+ to be +codeset+
229
+ def set(docid, codeset)
230
+ unless codeset.kind_of?(CodeSet)
231
+ raise ArgumentError,
232
+ "Cannot set codeset #{codeset.inspect} as a CodingTable entry"
233
+ end
234
+ self[docid] = codeset
235
+ end
236
+
237
+ # Removes all coding associated with +docid+
238
+ alias :unset :delete
239
+
240
+ # remove the coding of +item+ to the coding table. +item+ should be
241
+ # a QDA::Code or QDA::Fragment.
242
+ def subtract(item)
243
+ self[item.docid].subtract(item)
244
+ end
245
+
246
+ def num_of_docs
247
+ keys.reject { | set | self[set].length == 0 }.length
248
+ end
249
+
250
+ def num_of_codes
251
+ values.inject(0) { | count, codeset | count + codeset.length }
252
+ end
253
+
254
+ def num_of_chars
255
+ values.inject(0) do | total, codes |
256
+ codes.inject(total) { | sub_total, code | sub_total + code.length }
257
+ end
258
+ end
259
+
260
+ # returns true if this coding table contains coding for the
261
+ # document +doc+
262
+ def codes?(doc)
263
+ key?(doc.dbid) and self[doc.dbid].length > 0
264
+ end
265
+
266
+ # Adds the coding of the other coding table +other+ to this one,
267
+ # modifying +self in place
268
+ def merge(other)
269
+ results = CodingTable.new()
270
+ either = self.keys + other.keys
271
+ either.uniq.each do | docid |
272
+ if ! other[docid]
273
+ results[docid] = self[docid]
274
+ elsif ! self[docid]
275
+ results[docid] = other[docid]
276
+ else
277
+ results[docid] = self[docid].union(other[docid])
278
+ end
279
+ end
280
+ replace(results)
281
+ end
282
+
283
+ # Removes all coding from this table that occurs in the other table
284
+ # +other+, modifying this CodingTable in place
285
+ def remove(other)
286
+ results = CodingTable.new()
287
+ each do | docid, codes |
288
+ results[docid] = self[docid].exclude(other[docid])
289
+ end
290
+ replace(results)
291
+ end
292
+
293
+ # deletes all coding except that which is also covered by +other+
294
+ def join(other)
295
+ both = keys.find_all { | doc | other.key?(doc) }
296
+ results = CodingTable.new()
297
+ both.each do | docid |
298
+ results[docid] = self[docid].intersect( other[docid] )
299
+ end
300
+ replace(results)
301
+ end
302
+
303
+ def sort(&block)
304
+ if block_given
305
+ super(&block)
306
+ else
307
+ super { | a, b | a <=> b }
308
+ end
309
+ end
310
+ end
311
+
312
+ # a FragmentTable holds a collection of fragments. It contains a
313
+ # number of CodeSets of Fragments. Each CodeSet can be retrieved
314
+ # either by document title or by document dbid.
315
+ # tbl = FragmentTable.new()
316
+ # f = Fragment.new('Weft QDA', 'the title', 6, 1)
317
+ # tbl.add(f)
318
+ # tbl['the title'] # => QDA::CodeSet[ <Fragment 1 6-14: 'Weft QDA'> ]
319
+ # tbl[1] # => QDA::CodeSet[ <Fragment 1 6-14: 'Weft QDA'> ]
320
+ class FragmentTable < CodingTable
321
+ def initialize
322
+ @titles = Hash.new() { | h, k | h[k] = CodeSet }
323
+ super()
324
+ end
325
+
326
+ # Assumes this is a document title if a string, or an dbid if an integer
327
+ def [](k)
328
+ k.kind_of?(String) ? super(@titles[k]) : super(k)
329
+ end
330
+
331
+ # Always use this method to add fragments to the collection
332
+ def add(fragment)
333
+ unless fragment.is_a?(Fragment)
334
+ raise ArgumentError, "Fragment expected, got #{fragment.inspect}"
335
+ end
336
+ self[fragment.docid].add(fragment)
337
+ @titles[fragment.doctitle] = fragment.docid
338
+ end
339
+
340
+ def each_title()
341
+ titles = @titles.keys.sort
342
+ titles.each do | title |
343
+ yield title, self[ @titles[title] ]
344
+ end
345
+ end
346
+
347
+ def to_codingtable()
348
+ ct = CodingTable.new
349
+ each do | docid, codeset |
350
+ ct[docid] = QDA::CodeSet[ *codeset.map { | frag | frag.to_code } ]
351
+ end
352
+ return ct
353
+ end
354
+ end
355
+ end
@@ -0,0 +1,118 @@
1
+ require 'weft/coding'
2
+
3
+ module QDA
4
+ class Fragment < String
5
+ include Coding
6
+ attr_reader :doctitle, :offset
7
+ attr_accessor :docid
8
+
9
+ def initialize(text, doctitle, offset, docid = nil)
10
+ super(text)
11
+ unless doctitle.kind_of? String
12
+ raise ArgumentError,
13
+ "Fragment.new expects a doctitle string, got #{doctitle.inspect}"
14
+ end
15
+
16
+ unless offset.kind_of?(Fixnum) && offset >= 0
17
+ raise ArgumentError,
18
+ "Fragment.new expects an integer offset, got #{offset.inspect}"
19
+ end
20
+
21
+ unless docid.nil? || docid.kind_of?(Fixnum)
22
+ raise ArgumentError,
23
+ "Fragment.new expects an integer docid, got #{docid.inspect}"
24
+ end
25
+ @doctitle = doctitle
26
+ @offset = offset
27
+ # of the document - duplicates role of doctitle - to fix
28
+ @docid = docid
29
+ end
30
+
31
+ def ==(other)
32
+ super(other) and
33
+ @offset == other.offset and
34
+ @doctitle == other.doctitle
35
+ end
36
+
37
+ def to_code()
38
+ Code.new(@docid, offset, length)
39
+ end
40
+
41
+ def coerce(other)
42
+ self.to_code()
43
+ end
44
+
45
+ # does this code completely cover the document
46
+ def complete?()
47
+ return NotImplementedError # need to fix
48
+ if @doc.fragments.length == @length + 1
49
+ return true
50
+ end
51
+ return false
52
+ end
53
+
54
+ # returns a fragment from +abs+ (relative to the whole document)
55
+ # that is +length+ long
56
+ def [](abs, length)
57
+ if abs < self.offset
58
+ raise "Can't get part of non-overlapping string"
59
+ end
60
+ Fragment.new( super(abs - self.offset, length),
61
+ @doctitle, abs, @docid )
62
+ end
63
+
64
+ def inspect()
65
+ str = length < 50 ? self.to_s : self.to_s[0, 50] << '...'
66
+ "<*Fragment #{docid} #{offset}-#{self.end} : '#{str}>"
67
+ end
68
+ end
69
+
70
+ class Document < Fragment
71
+ attr_reader :meta, :create_date, :mod_date, :dbid
72
+ attr_accessor :title, :memo
73
+
74
+ # expects dbid to be set later
75
+ def initialize(title, text = '', memo = '',
76
+ create_date = nil, mod_date = nil)
77
+ super(text, title, 0)
78
+ @title = title
79
+ @memo = memo
80
+
81
+ @create_date = create_date
82
+ @mod_date = mod_date
83
+ end
84
+
85
+ def text
86
+ self.to_s
87
+ end
88
+
89
+ def dbid=(dbid)
90
+ unless dbid.nil? || dbid.kind_of?(Fixnum)
91
+ raise ArgumentError,
92
+ "Document dbid should be an integer or nil, got #{dbid.inspect}"
93
+ end
94
+ @dbid = dbid
95
+ end
96
+
97
+ # marks the document as created now
98
+ def create()
99
+ @create_date = Time.now()
100
+ end
101
+
102
+ # def append(text, fragtype = 0)
103
+ # returns the number of characters appended
104
+ def append(text, term_char = "\n")
105
+ ins = text.gsub(/[\r\n]+$/, '') + term_char
106
+ self << ins
107
+ ins.length
108
+ end
109
+
110
+ def [](from, num_chars)
111
+ Fragment.new(super, title, from, @dbid)
112
+ end
113
+
114
+ def inspect()
115
+ "<*Document #{dbid} '#{title}' (#{length} chars)>"
116
+ end
117
+ end
118
+ end