weft-qda 0.9.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/lib/weft.rb +21 -0
  2. data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
  3. data/lib/weft/application.rb +130 -0
  4. data/lib/weft/backend.rb +39 -0
  5. data/lib/weft/backend/marshal.rb +26 -0
  6. data/lib/weft/backend/mysql.rb +267 -0
  7. data/lib/weft/backend/n6.rb +366 -0
  8. data/lib/weft/backend/sqlite.rb +633 -0
  9. data/lib/weft/backend/sqlite/category_tree.rb +104 -0
  10. data/lib/weft/backend/sqlite/schema.rb +152 -0
  11. data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
  12. data/lib/weft/category.rb +157 -0
  13. data/lib/weft/coding.rb +355 -0
  14. data/lib/weft/document.rb +118 -0
  15. data/lib/weft/filters.rb +243 -0
  16. data/lib/weft/wxgui.rb +687 -0
  17. data/lib/weft/wxgui/category.xpm +26 -0
  18. data/lib/weft/wxgui/dialogs.rb +128 -0
  19. data/lib/weft/wxgui/document.xpm +25 -0
  20. data/lib/weft/wxgui/error_handler.rb +52 -0
  21. data/lib/weft/wxgui/inspectors.rb +361 -0
  22. data/lib/weft/wxgui/inspectors/category.rb +165 -0
  23. data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
  24. data/lib/weft/wxgui/inspectors/document.rb +139 -0
  25. data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
  26. data/lib/weft/wxgui/inspectors/script.rb +35 -0
  27. data/lib/weft/wxgui/inspectors/search.rb +265 -0
  28. data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
  29. data/lib/weft/wxgui/lang.rb +17 -0
  30. data/lib/weft/wxgui/lang/en.rb +45 -0
  31. data/lib/weft/wxgui/mondrian.xpm +44 -0
  32. data/lib/weft/wxgui/search.xpm +25 -0
  33. data/lib/weft/wxgui/sidebar.rb +498 -0
  34. data/lib/weft/wxgui/utilities.rb +148 -0
  35. data/lib/weft/wxgui/weft16.xpm +31 -0
  36. data/lib/weft/wxgui/workarea.rb +249 -0
  37. data/test/001-document.rb +196 -0
  38. data/test/002-category.rb +138 -0
  39. data/test/003-code.rb +370 -0
  40. data/test/004-application.rb +52 -0
  41. data/test/006-filters.rb +139 -0
  42. data/test/009a-backend_sqlite_basic.rb +280 -0
  43. data/test/009b-backend_sqlite_complex.rb +175 -0
  44. data/test/009c_backend_sqlite_bench.rb +81 -0
  45. data/test/010-backend_nudist.rb +5 -0
  46. data/test/all-tests.rb +1 -0
  47. data/test/manual-gui-script.txt +24 -0
  48. data/test/testdata/autocoding-test.txt +15 -0
  49. data/test/testdata/iso-8859-1.txt +5 -0
  50. data/test/testdata/sample_doc.txt +19 -0
  51. data/test/testdata/search_results.txt +1254 -0
  52. data/test/testdata/text1-dos-ascii.txt +2 -0
  53. data/test/testdata/text1-unix-utf8.txt +2 -0
  54. data/weft-qda.rb +28 -0
  55. metadata +96 -0
@@ -0,0 +1,355 @@
1
+ module QDA
2
+ # Classes mixing-in should implement the offset, length, [x, y], and
3
+ # << methods
4
+ module Coding
5
+ def end()
6
+ offset + length
7
+ end
8
+
9
+ def include?(point)
10
+ if point.nil?
11
+ raise ArgumentError,
12
+ "Point should be an integer, got #{point.inspect}"
13
+ end
14
+ point >= self.offset && point < self.end()
15
+ end
16
+ alias :contains? :include?
17
+
18
+ # Returns true if self and +other+ overlap at any point - ie there
19
+ # is at least one character that is coded by both items
20
+ def overlap?(other)
21
+ first, second = [self, other].sort_by { | x | x.offset }
22
+ first.end > second.offset ? true : false
23
+ end
24
+
25
+ # Returns true if self and +other+ overlap or are contiguous
26
+ def touch?(other)
27
+ first, second = [self, other].sort_by { | x | x.offset }
28
+ first.end >= second.offset ? true : false
29
+ end
30
+
31
+ # note that no-argument version of Array#sort does *not* call this
32
+ # method
33
+ def <=>(other)
34
+ self.offset == other.offset ?
35
+ self.end <=> other.end :
36
+ self.offset <=> other.offset
37
+ end
38
+
39
+
40
+ def prepare_args(other)
41
+ # it should be a type that implements these methods
42
+ unless other.kind_of?(Coding)
43
+ raise ArgumentError,
44
+ "Cannot combine with #{other.inspect}, should implement Coding"
45
+ end
46
+
47
+ # if it's not the same class, we need to determine what kind of
48
+ # thing to return by combining the classes
49
+ if other.is_a?(self.class)
50
+ return self, other
51
+ else
52
+ return self.coerce(other), other.coerce(self)
53
+ end
54
+ end
55
+
56
+ # returns the code representing the intersection of +self+ and +other+
57
+ # returns nil if there is no overlap
58
+ def intersect(other)
59
+ # this represents self, possibly coerced into a different class
60
+ this, other = prepare_args(other)
61
+ unless this.overlap?(other)
62
+ return nil
63
+ end
64
+ sorted = QDA::CodeSet[ this, other ].sort
65
+ this_start = [ other.offset, this.offset ].max
66
+ this_end = [other.end, this.end ].min
67
+ fragment = sorted[0][this_start, this_end - this_start ]
68
+ end
69
+ alias :% :intersect
70
+
71
+ # returns a QDA::CodeSet created by removing the characters coded
72
+ # by +other+ from +self+. The returned CodeSet may be 0, 1 or 2
73
+ # elements long. The diagram below shows how 2 results may be
74
+ # returned.
75
+ #
76
+ # -----+++++++++++++++++------ # self
77
+ # -
78
+ # -----------++++++++--------- # exclude
79
+ # =
80
+ # -----++++++--------+++------ # result
81
+ def exclude(other)
82
+ this, other = prepare_args(other)
83
+ results = QDA::CodeSet[]
84
+ if offset < other.offset
85
+ if this.end < other.offset
86
+ results.add( this )
87
+ else
88
+ results.add( this[offset, other.offset - offset] )
89
+ end
90
+ end
91
+ if this.end > other.end
92
+ if this.offset > other.end
93
+ results.add(this)
94
+ else
95
+ results.add( this[other.end, this.end - other.end] )
96
+ end
97
+ end
98
+ return results
99
+ end
100
+ alias :- :exclude
101
+
102
+ # Returns the code produced by merging +self+ with +other+. If the
103
+ # two codes do not touch each other, then an CodeSet of the two codes
104
+ # is returned.
105
+ def union(other)
106
+ this, other = prepare_args(other)
107
+ return this if this == other
108
+ return QDA::CodeSet[ self, other ].sort unless touch?(other)
109
+
110
+ # if they overlap or touch, a single coding will be returned
111
+ first, second = QDA::CodeSet[ this, other ].sort
112
+ fragment = first.dup()
113
+ if second.end > first.end
114
+ fragment << second[first.end, second.end - first.end]
115
+ end
116
+ return fragment
117
+ end
118
+ alias :+ :union
119
+ end
120
+
121
+ # A collection of things that are +Coding+ - ie that mix-in the class
122
+ # above.
123
+ class CodeSet < Array
124
+ protected :<<, :push, :pop, :shift, :unshift
125
+
126
+ # Populate a new CodeSet from an array of coding items +arr+. These
127
+ # should either be QDA::Codes, QDA::Fragments or three-item arrays.
128
+ # Where the latter are found, they will be automatically turned into
129
+ # QDA::Code, taking the contents of each three-item array to be
130
+ # [+docid+, +offset+, +length+]
131
+ def initialize(arr = [])
132
+ arr.collect! do | item |
133
+ case item
134
+ when Array
135
+ Code.new(*item)
136
+ when Fragment, Code
137
+ item
138
+ else
139
+ raise ArgumentError, "unexpected item #{item} in list"
140
+ end
141
+ end
142
+ super(arr)
143
+ end
144
+
145
+ # iterate over each successive neighbouring pair of codings in
146
+ # the set, i.e. items 1, 2; items 2,3; items 3, 4 .. items n-1,
147
+ # n]. This is practically useful for +intersect+ but no other
148
+ # use at the moment.
149
+ def each_pair()
150
+ 0.upto( length - 2 ) { | i | yield self[i], self[i + 1] }
151
+ end
152
+
153
+ def sort()
154
+ block_given? ? super : super { | a, b | a <=> b }
155
+ end
156
+
157
+ def intersect(other)
158
+ results = CodeSet[]
159
+ sorted = CodeSet[ *(self + other).sort_by { | x | x.end } ]
160
+ sorted.each_pair { | a, b | results << a % b }
161
+ results.compact # return less nils
162
+ end
163
+
164
+ # add the extent covered by +code+ (a QDA::Code) to the set,
165
+ # modifying in place.
166
+ def add(code)
167
+ replace( union( CodeSet[code] ) )
168
+ end
169
+
170
+ # remove the extent covered by +uncode+ (a QDA::Code) from the set,
171
+ # modifying it in place.
172
+ def subtract(uncode)
173
+ replace( exclude( CodeSet[uncode] ) )
174
+ end
175
+
176
+ # returns the set produced by removing all extents covered by
177
+ # +other+, which should be a QDA::CodeSet. Note that unlike +union+
178
+ # and +intersect+
179
+ # self.exclude(other) != other.exclude(self)
180
+ def exclude(other)
181
+ return self if other.nil?
182
+ results = self.dup
183
+ other.each do | uncode |
184
+ results.collect! { | code | code - uncode }.flatten!
185
+ end
186
+ return results
187
+ end
188
+
189
+ # Returns the set produced by merging all the codes in this one
190
+ # with those in +other+, which should be a QDA::CodeSet
191
+ def union(other)
192
+ results = CodeSet[]
193
+ sorted = CodeSet[ *(self + other).sort_by { | f | f.end } ]
194
+
195
+ last_code = nil
196
+ sorted.each do | code |
197
+ if ! last_code
198
+ last_code = code
199
+ elsif last_code.touch?(code)
200
+ last_code = last_code + code
201
+ else
202
+ results.push(last_code)
203
+ last_code = code.dup
204
+ end
205
+ end
206
+ results.push(last_code)
207
+ return results
208
+ end
209
+ end
210
+
211
+ # a hash representing a complex series of codes applied to one or
212
+ # more documents
213
+ class CodingTable < Hash
214
+ def initialize
215
+ super { | h, k | h[k] = CodeSet.new() }
216
+ end
217
+
218
+ # should access using +add+ or +set+
219
+ # private :[]=
220
+ protected :[]=
221
+
222
+ # add the coding of +item+ to the coding table. +item+ should be
223
+ # a QDA::Code or QDA::Fragment.
224
+ def add(item)
225
+ self[item.docid].add(item)
226
+ end
227
+
228
+ # Sets the coding of the document identified by +docid+ to be +codeset+
229
+ def set(docid, codeset)
230
+ unless codeset.kind_of?(CodeSet)
231
+ raise ArgumentError,
232
+ "Cannot set codeset #{codeset.inspect} as a CodingTable entry"
233
+ end
234
+ self[docid] = codeset
235
+ end
236
+
237
+ # Removes all coding associated with +docid+
238
+ alias :unset :delete
239
+
240
+ # remove the coding of +item+ to the coding table. +item+ should be
241
+ # a QDA::Code or QDA::Fragment.
242
+ def subtract(item)
243
+ self[item.docid].subtract(item)
244
+ end
245
+
246
+ def num_of_docs
247
+ keys.reject { | set | self[set].length == 0 }.length
248
+ end
249
+
250
+ def num_of_codes
251
+ values.inject(0) { | count, codeset | count + codeset.length }
252
+ end
253
+
254
+ def num_of_chars
255
+ values.inject(0) do | total, codes |
256
+ codes.inject(total) { | sub_total, code | sub_total + code.length }
257
+ end
258
+ end
259
+
260
+ # returns true if this coding table contains coding for the
261
+ # document +doc+
262
+ def codes?(doc)
263
+ key?(doc.dbid) and self[doc.dbid].length > 0
264
+ end
265
+
266
+ # Adds the coding of the other coding table +other+ to this one,
267
+ # modifying +self in place
268
+ def merge(other)
269
+ results = CodingTable.new()
270
+ either = self.keys + other.keys
271
+ either.uniq.each do | docid |
272
+ if ! other[docid]
273
+ results[docid] = self[docid]
274
+ elsif ! self[docid]
275
+ results[docid] = other[docid]
276
+ else
277
+ results[docid] = self[docid].union(other[docid])
278
+ end
279
+ end
280
+ replace(results)
281
+ end
282
+
283
+ # Removes all coding from this table that occurs in the other table
284
+ # +other+, modifying this CodingTable in place
285
+ def remove(other)
286
+ results = CodingTable.new()
287
+ each do | docid, codes |
288
+ results[docid] = self[docid].exclude(other[docid])
289
+ end
290
+ replace(results)
291
+ end
292
+
293
+ # deletes all coding except that which is also covered by +other+
294
+ def join(other)
295
+ both = keys.find_all { | doc | other.key?(doc) }
296
+ results = CodingTable.new()
297
+ both.each do | docid |
298
+ results[docid] = self[docid].intersect( other[docid] )
299
+ end
300
+ replace(results)
301
+ end
302
+
303
+ def sort(&block)
304
+ if block_given
305
+ super(&block)
306
+ else
307
+ super { | a, b | a <=> b }
308
+ end
309
+ end
310
+ end
311
+
312
+ # a FragmentTable holds a collection of fragments. It contains a
313
+ # number of CodeSets of Fragments. Each CodeSet can be retrieved
314
+ # either by document title or by document dbid.
315
+ # tbl = FragmentTable.new()
316
+ # f = Fragment.new('Weft QDA', 'the title', 6, 1)
317
+ # tbl.add(f)
318
+ # tbl['the title'] # => QDA::CodeSet[ <Fragment 1 6-14: 'Weft QDA'> ]
319
+ # tbl[1] # => QDA::CodeSet[ <Fragment 1 6-14: 'Weft QDA'> ]
320
+ class FragmentTable < CodingTable
321
+ def initialize
322
+ @titles = Hash.new() { | h, k | h[k] = CodeSet }
323
+ super()
324
+ end
325
+
326
+ # Assumes this is a document title if a string, or an dbid if an integer
327
+ def [](k)
328
+ k.kind_of?(String) ? super(@titles[k]) : super(k)
329
+ end
330
+
331
+ # Always use this method to add fragments to the collection
332
+ def add(fragment)
333
+ unless fragment.is_a?(Fragment)
334
+ raise ArgumentError, "Fragment expected, got #{fragment.inspect}"
335
+ end
336
+ self[fragment.docid].add(fragment)
337
+ @titles[fragment.doctitle] = fragment.docid
338
+ end
339
+
340
+ def each_title()
341
+ titles = @titles.keys.sort
342
+ titles.each do | title |
343
+ yield title, self[ @titles[title] ]
344
+ end
345
+ end
346
+
347
+ def to_codingtable()
348
+ ct = CodingTable.new
349
+ each do | docid, codeset |
350
+ ct[docid] = QDA::CodeSet[ *codeset.map { | frag | frag.to_code } ]
351
+ end
352
+ return ct
353
+ end
354
+ end
355
+ end
@@ -0,0 +1,118 @@
1
+ require 'weft/coding'
2
+
3
+ module QDA
4
+ class Fragment < String
5
+ include Coding
6
+ attr_reader :doctitle, :offset
7
+ attr_accessor :docid
8
+
9
+ def initialize(text, doctitle, offset, docid = nil)
10
+ super(text)
11
+ unless doctitle.kind_of? String
12
+ raise ArgumentError,
13
+ "Fragment.new expects a doctitle string, got #{doctitle.inspect}"
14
+ end
15
+
16
+ unless offset.kind_of?(Fixnum) && offset >= 0
17
+ raise ArgumentError,
18
+ "Fragment.new expects an integer offset, got #{offset.inspect}"
19
+ end
20
+
21
+ unless docid.nil? || docid.kind_of?(Fixnum)
22
+ raise ArgumentError,
23
+ "Fragment.new expects an integer docid, got #{docid.inspect}"
24
+ end
25
+ @doctitle = doctitle
26
+ @offset = offset
27
+ # of the document - duplicates role of doctitle - to fix
28
+ @docid = docid
29
+ end
30
+
31
+ def ==(other)
32
+ super(other) and
33
+ @offset == other.offset and
34
+ @doctitle == other.doctitle
35
+ end
36
+
37
+ def to_code()
38
+ Code.new(@docid, offset, length)
39
+ end
40
+
41
+ def coerce(other)
42
+ self.to_code()
43
+ end
44
+
45
+ # does this code completely cover the document
46
+ def complete?()
47
+ return NotImplementedError # need to fix
48
+ if @doc.fragments.length == @length + 1
49
+ return true
50
+ end
51
+ return false
52
+ end
53
+
54
+ # returns a fragment from +abs+ (relative to the whole document)
55
+ # that is +length+ long
56
+ def [](abs, length)
57
+ if abs < self.offset
58
+ raise "Can't get part of non-overlapping string"
59
+ end
60
+ Fragment.new( super(abs - self.offset, length),
61
+ @doctitle, abs, @docid )
62
+ end
63
+
64
+ def inspect()
65
+ str = length < 50 ? self.to_s : self.to_s[0, 50] << '...'
66
+ "<*Fragment #{docid} #{offset}-#{self.end} : '#{str}>"
67
+ end
68
+ end
69
+
70
+ class Document < Fragment
71
+ attr_reader :meta, :create_date, :mod_date, :dbid
72
+ attr_accessor :title, :memo
73
+
74
+ # expects dbid to be set later
75
+ def initialize(title, text = '', memo = '',
76
+ create_date = nil, mod_date = nil)
77
+ super(text, title, 0)
78
+ @title = title
79
+ @memo = memo
80
+
81
+ @create_date = create_date
82
+ @mod_date = mod_date
83
+ end
84
+
85
+ def text
86
+ self.to_s
87
+ end
88
+
89
+ def dbid=(dbid)
90
+ unless dbid.nil? || dbid.kind_of?(Fixnum)
91
+ raise ArgumentError,
92
+ "Document dbid should be an integer or nil, got #{dbid.inspect}"
93
+ end
94
+ @dbid = dbid
95
+ end
96
+
97
+ # marks the document as created now
98
+ def create()
99
+ @create_date = Time.now()
100
+ end
101
+
102
+ # def append(text, fragtype = 0)
103
+ # returns the number of characters appended
104
+ def append(text, term_char = "\n")
105
+ ins = text.gsub(/[\r\n]+$/, '') + term_char
106
+ self << ins
107
+ ins.length
108
+ end
109
+
110
+ def [](from, num_chars)
111
+ Fragment.new(super, title, from, @dbid)
112
+ end
113
+
114
+ def inspect()
115
+ "<*Document #{dbid} '#{title}' (#{length} chars)>"
116
+ end
117
+ end
118
+ end