weft-qda 0.9.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/weft.rb +21 -0
- data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
- data/lib/weft/application.rb +130 -0
- data/lib/weft/backend.rb +39 -0
- data/lib/weft/backend/marshal.rb +26 -0
- data/lib/weft/backend/mysql.rb +267 -0
- data/lib/weft/backend/n6.rb +366 -0
- data/lib/weft/backend/sqlite.rb +633 -0
- data/lib/weft/backend/sqlite/category_tree.rb +104 -0
- data/lib/weft/backend/sqlite/schema.rb +152 -0
- data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
- data/lib/weft/category.rb +157 -0
- data/lib/weft/coding.rb +355 -0
- data/lib/weft/document.rb +118 -0
- data/lib/weft/filters.rb +243 -0
- data/lib/weft/wxgui.rb +687 -0
- data/lib/weft/wxgui/category.xpm +26 -0
- data/lib/weft/wxgui/dialogs.rb +128 -0
- data/lib/weft/wxgui/document.xpm +25 -0
- data/lib/weft/wxgui/error_handler.rb +52 -0
- data/lib/weft/wxgui/inspectors.rb +361 -0
- data/lib/weft/wxgui/inspectors/category.rb +165 -0
- data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
- data/lib/weft/wxgui/inspectors/document.rb +139 -0
- data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
- data/lib/weft/wxgui/inspectors/script.rb +35 -0
- data/lib/weft/wxgui/inspectors/search.rb +265 -0
- data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
- data/lib/weft/wxgui/lang.rb +17 -0
- data/lib/weft/wxgui/lang/en.rb +45 -0
- data/lib/weft/wxgui/mondrian.xpm +44 -0
- data/lib/weft/wxgui/search.xpm +25 -0
- data/lib/weft/wxgui/sidebar.rb +498 -0
- data/lib/weft/wxgui/utilities.rb +148 -0
- data/lib/weft/wxgui/weft16.xpm +31 -0
- data/lib/weft/wxgui/workarea.rb +249 -0
- data/test/001-document.rb +196 -0
- data/test/002-category.rb +138 -0
- data/test/003-code.rb +370 -0
- data/test/004-application.rb +52 -0
- data/test/006-filters.rb +139 -0
- data/test/009a-backend_sqlite_basic.rb +280 -0
- data/test/009b-backend_sqlite_complex.rb +175 -0
- data/test/009c_backend_sqlite_bench.rb +81 -0
- data/test/010-backend_nudist.rb +5 -0
- data/test/all-tests.rb +1 -0
- data/test/manual-gui-script.txt +24 -0
- data/test/testdata/autocoding-test.txt +15 -0
- data/test/testdata/iso-8859-1.txt +5 -0
- data/test/testdata/sample_doc.txt +19 -0
- data/test/testdata/search_results.txt +1254 -0
- data/test/testdata/text1-dos-ascii.txt +2 -0
- data/test/testdata/text1-unix-utf8.txt +2 -0
- data/weft-qda.rb +28 -0
- metadata +96 -0
data/lib/weft/coding.rb
ADDED
@@ -0,0 +1,355 @@
|
|
1
|
+
module QDA
|
2
|
+
# Classes mixing-in should implement the offset, length, [x, y], and
|
3
|
+
# << methods
|
4
|
+
module Coding
|
5
|
+
def end()
|
6
|
+
offset + length
|
7
|
+
end
|
8
|
+
|
9
|
+
def include?(point)
|
10
|
+
if point.nil?
|
11
|
+
raise ArgumentError,
|
12
|
+
"Point should be an integer, got #{point.inspect}"
|
13
|
+
end
|
14
|
+
point >= self.offset && point < self.end()
|
15
|
+
end
|
16
|
+
alias :contains? :include?
|
17
|
+
|
18
|
+
# Returns true if self and +other+ overlap at any point - ie there
|
19
|
+
# is at least one character that is coded by both items
|
20
|
+
def overlap?(other)
|
21
|
+
first, second = [self, other].sort_by { | x | x.offset }
|
22
|
+
first.end > second.offset ? true : false
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns true if self and +other+ overlap or are contiguous
|
26
|
+
def touch?(other)
|
27
|
+
first, second = [self, other].sort_by { | x | x.offset }
|
28
|
+
first.end >= second.offset ? true : false
|
29
|
+
end
|
30
|
+
|
31
|
+
# note that no-argument version of Array#sort does *not* call this
|
32
|
+
# method
|
33
|
+
def <=>(other)
|
34
|
+
self.offset == other.offset ?
|
35
|
+
self.end <=> other.end :
|
36
|
+
self.offset <=> other.offset
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
def prepare_args(other)
|
41
|
+
# it should be a type that implements these methods
|
42
|
+
unless other.kind_of?(Coding)
|
43
|
+
raise ArgumentError,
|
44
|
+
"Cannot combine with #{other.inspect}, should implement Coding"
|
45
|
+
end
|
46
|
+
|
47
|
+
# if it's not the same class, we need to determine what kind of
|
48
|
+
# thing to return by combining the classes
|
49
|
+
if other.is_a?(self.class)
|
50
|
+
return self, other
|
51
|
+
else
|
52
|
+
return self.coerce(other), other.coerce(self)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# returns the code representing the intersection of +self+ and +other+
|
57
|
+
# returns nil if there is no overlap
|
58
|
+
def intersect(other)
|
59
|
+
# this represents self, possibly coerced into a different class
|
60
|
+
this, other = prepare_args(other)
|
61
|
+
unless this.overlap?(other)
|
62
|
+
return nil
|
63
|
+
end
|
64
|
+
sorted = QDA::CodeSet[ this, other ].sort
|
65
|
+
this_start = [ other.offset, this.offset ].max
|
66
|
+
this_end = [other.end, this.end ].min
|
67
|
+
fragment = sorted[0][this_start, this_end - this_start ]
|
68
|
+
end
|
69
|
+
alias :% :intersect
|
70
|
+
|
71
|
+
# returns a QDA::CodeSet created by removing the characters coded
|
72
|
+
# by +other+ from +self+. The returned CodeSet may be 0, 1 or 2
|
73
|
+
# elements long. The diagram below shows how 2 results may be
|
74
|
+
# returned.
|
75
|
+
#
|
76
|
+
# -----+++++++++++++++++------ # self
|
77
|
+
# -
|
78
|
+
# -----------++++++++--------- # exclude
|
79
|
+
# =
|
80
|
+
# -----++++++--------+++------ # result
|
81
|
+
def exclude(other)
|
82
|
+
this, other = prepare_args(other)
|
83
|
+
results = QDA::CodeSet[]
|
84
|
+
if offset < other.offset
|
85
|
+
if this.end < other.offset
|
86
|
+
results.add( this )
|
87
|
+
else
|
88
|
+
results.add( this[offset, other.offset - offset] )
|
89
|
+
end
|
90
|
+
end
|
91
|
+
if this.end > other.end
|
92
|
+
if this.offset > other.end
|
93
|
+
results.add(this)
|
94
|
+
else
|
95
|
+
results.add( this[other.end, this.end - other.end] )
|
96
|
+
end
|
97
|
+
end
|
98
|
+
return results
|
99
|
+
end
|
100
|
+
alias :- :exclude
|
101
|
+
|
102
|
+
# Returns the code produced by merging +self+ with +other+. If the
|
103
|
+
# two codes do not touch each other, then an CodeSet of the two codes
|
104
|
+
# is returned.
|
105
|
+
def union(other)
|
106
|
+
this, other = prepare_args(other)
|
107
|
+
return this if this == other
|
108
|
+
return QDA::CodeSet[ self, other ].sort unless touch?(other)
|
109
|
+
|
110
|
+
# if they overlap or touch, a single coding will be returned
|
111
|
+
first, second = QDA::CodeSet[ this, other ].sort
|
112
|
+
fragment = first.dup()
|
113
|
+
if second.end > first.end
|
114
|
+
fragment << second[first.end, second.end - first.end]
|
115
|
+
end
|
116
|
+
return fragment
|
117
|
+
end
|
118
|
+
alias :+ :union
|
119
|
+
end
|
120
|
+
|
121
|
+
# A collection of things that are +Coding+ - ie that mix-in the class
|
122
|
+
# above.
|
123
|
+
class CodeSet < Array
|
124
|
+
protected :<<, :push, :pop, :shift, :unshift
|
125
|
+
|
126
|
+
# Populate a new CodeSet from an array of coding items +arr+. These
|
127
|
+
# should either be QDA::Codes, QDA::Fragments or three-item arrays.
|
128
|
+
# Where the latter are found, they will be automatically turned into
|
129
|
+
# QDA::Code, taking the contents of each three-item array to be
|
130
|
+
# [+docid+, +offset+, +length+]
|
131
|
+
def initialize(arr = [])
|
132
|
+
arr.collect! do | item |
|
133
|
+
case item
|
134
|
+
when Array
|
135
|
+
Code.new(*item)
|
136
|
+
when Fragment, Code
|
137
|
+
item
|
138
|
+
else
|
139
|
+
raise ArgumentError, "unexpected item #{item} in list"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
super(arr)
|
143
|
+
end
|
144
|
+
|
145
|
+
# iterate over each successive neighbouring pair of codings in
|
146
|
+
# the set, i.e. items 1, 2; items 2,3; items 3, 4 .. items n-1,
|
147
|
+
# n]. This is practically useful for +intersect+ but no other
|
148
|
+
# use at the moment.
|
149
|
+
def each_pair()
|
150
|
+
0.upto( length - 2 ) { | i | yield self[i], self[i + 1] }
|
151
|
+
end
|
152
|
+
|
153
|
+
def sort()
|
154
|
+
block_given? ? super : super { | a, b | a <=> b }
|
155
|
+
end
|
156
|
+
|
157
|
+
def intersect(other)
|
158
|
+
results = CodeSet[]
|
159
|
+
sorted = CodeSet[ *(self + other).sort_by { | x | x.end } ]
|
160
|
+
sorted.each_pair { | a, b | results << a % b }
|
161
|
+
results.compact # return less nils
|
162
|
+
end
|
163
|
+
|
164
|
+
# add the extent covered by +code+ (a QDA::Code) to the set,
|
165
|
+
# modifying in place.
|
166
|
+
def add(code)
|
167
|
+
replace( union( CodeSet[code] ) )
|
168
|
+
end
|
169
|
+
|
170
|
+
# remove the extent covered by +uncode+ (a QDA::Code) from the set,
|
171
|
+
# modifying it in place.
|
172
|
+
def subtract(uncode)
|
173
|
+
replace( exclude( CodeSet[uncode] ) )
|
174
|
+
end
|
175
|
+
|
176
|
+
# returns the set produced by removing all extents covered by
|
177
|
+
# +other+, which should be a QDA::CodeSet. Note that unlike +union+
|
178
|
+
# and +intersect+
|
179
|
+
# self.exclude(other) != other.exclude(self)
|
180
|
+
def exclude(other)
|
181
|
+
return self if other.nil?
|
182
|
+
results = self.dup
|
183
|
+
other.each do | uncode |
|
184
|
+
results.collect! { | code | code - uncode }.flatten!
|
185
|
+
end
|
186
|
+
return results
|
187
|
+
end
|
188
|
+
|
189
|
+
# Returns the set produced by merging all the codes in this one
|
190
|
+
# with those in +other+, which should be a QDA::CodeSet
|
191
|
+
def union(other)
|
192
|
+
results = CodeSet[]
|
193
|
+
sorted = CodeSet[ *(self + other).sort_by { | f | f.end } ]
|
194
|
+
|
195
|
+
last_code = nil
|
196
|
+
sorted.each do | code |
|
197
|
+
if ! last_code
|
198
|
+
last_code = code
|
199
|
+
elsif last_code.touch?(code)
|
200
|
+
last_code = last_code + code
|
201
|
+
else
|
202
|
+
results.push(last_code)
|
203
|
+
last_code = code.dup
|
204
|
+
end
|
205
|
+
end
|
206
|
+
results.push(last_code)
|
207
|
+
return results
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
# a hash representing a complex series of codes applied to one or
|
212
|
+
# more documents
|
213
|
+
class CodingTable < Hash
|
214
|
+
def initialize
|
215
|
+
super { | h, k | h[k] = CodeSet.new() }
|
216
|
+
end
|
217
|
+
|
218
|
+
# should access using +add+ or +set+
|
219
|
+
# private :[]=
|
220
|
+
protected :[]=
|
221
|
+
|
222
|
+
# add the coding of +item+ to the coding table. +item+ should be
|
223
|
+
# a QDA::Code or QDA::Fragment.
|
224
|
+
def add(item)
|
225
|
+
self[item.docid].add(item)
|
226
|
+
end
|
227
|
+
|
228
|
+
# Sets the coding of the document identified by +docid+ to be +codeset+
|
229
|
+
def set(docid, codeset)
|
230
|
+
unless codeset.kind_of?(CodeSet)
|
231
|
+
raise ArgumentError,
|
232
|
+
"Cannot set codeset #{codeset.inspect} as a CodingTable entry"
|
233
|
+
end
|
234
|
+
self[docid] = codeset
|
235
|
+
end
|
236
|
+
|
237
|
+
# Removes all coding associated with +docid+
|
238
|
+
alias :unset :delete
|
239
|
+
|
240
|
+
# remove the coding of +item+ to the coding table. +item+ should be
|
241
|
+
# a QDA::Code or QDA::Fragment.
|
242
|
+
def subtract(item)
|
243
|
+
self[item.docid].subtract(item)
|
244
|
+
end
|
245
|
+
|
246
|
+
def num_of_docs
|
247
|
+
keys.reject { | set | self[set].length == 0 }.length
|
248
|
+
end
|
249
|
+
|
250
|
+
def num_of_codes
|
251
|
+
values.inject(0) { | count, codeset | count + codeset.length }
|
252
|
+
end
|
253
|
+
|
254
|
+
def num_of_chars
|
255
|
+
values.inject(0) do | total, codes |
|
256
|
+
codes.inject(total) { | sub_total, code | sub_total + code.length }
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
# returns true if this coding table contains coding for the
|
261
|
+
# document +doc+
|
262
|
+
def codes?(doc)
|
263
|
+
key?(doc.dbid) and self[doc.dbid].length > 0
|
264
|
+
end
|
265
|
+
|
266
|
+
# Adds the coding of the other coding table +other+ to this one,
|
267
|
+
# modifying +self in place
|
268
|
+
def merge(other)
|
269
|
+
results = CodingTable.new()
|
270
|
+
either = self.keys + other.keys
|
271
|
+
either.uniq.each do | docid |
|
272
|
+
if ! other[docid]
|
273
|
+
results[docid] = self[docid]
|
274
|
+
elsif ! self[docid]
|
275
|
+
results[docid] = other[docid]
|
276
|
+
else
|
277
|
+
results[docid] = self[docid].union(other[docid])
|
278
|
+
end
|
279
|
+
end
|
280
|
+
replace(results)
|
281
|
+
end
|
282
|
+
|
283
|
+
# Removes all coding from this table that occurs in the other table
|
284
|
+
# +other+, modifying this CodingTable in place
|
285
|
+
def remove(other)
|
286
|
+
results = CodingTable.new()
|
287
|
+
each do | docid, codes |
|
288
|
+
results[docid] = self[docid].exclude(other[docid])
|
289
|
+
end
|
290
|
+
replace(results)
|
291
|
+
end
|
292
|
+
|
293
|
+
# deletes all coding except that which is also covered by +other+
|
294
|
+
def join(other)
|
295
|
+
both = keys.find_all { | doc | other.key?(doc) }
|
296
|
+
results = CodingTable.new()
|
297
|
+
both.each do | docid |
|
298
|
+
results[docid] = self[docid].intersect( other[docid] )
|
299
|
+
end
|
300
|
+
replace(results)
|
301
|
+
end
|
302
|
+
|
303
|
+
def sort(&block)
|
304
|
+
if block_given
|
305
|
+
super(&block)
|
306
|
+
else
|
307
|
+
super { | a, b | a <=> b }
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
# a FragmentTable holds a collection of fragments. It contains a
|
313
|
+
# number of CodeSets of Fragments. Each CodeSet can be retrieved
|
314
|
+
# either by document title or by document dbid.
|
315
|
+
# tbl = FragmentTable.new()
|
316
|
+
# f = Fragment.new('Weft QDA', 'the title', 6, 1)
|
317
|
+
# tbl.add(f)
|
318
|
+
# tbl['the title'] # => QDA::CodeSet[ <Fragment 1 6-14: 'Weft QDA'> ]
|
319
|
+
# tbl[1] # => QDA::CodeSet[ <Fragment 1 6-14: 'Weft QDA'> ]
|
320
|
+
class FragmentTable < CodingTable
|
321
|
+
def initialize
|
322
|
+
@titles = Hash.new() { | h, k | h[k] = CodeSet }
|
323
|
+
super()
|
324
|
+
end
|
325
|
+
|
326
|
+
# Assumes this is a document title if a string, or an dbid if an integer
|
327
|
+
def [](k)
|
328
|
+
k.kind_of?(String) ? super(@titles[k]) : super(k)
|
329
|
+
end
|
330
|
+
|
331
|
+
# Always use this method to add fragments to the collection
|
332
|
+
def add(fragment)
|
333
|
+
unless fragment.is_a?(Fragment)
|
334
|
+
raise ArgumentError, "Fragment expected, got #{fragment.inspect}"
|
335
|
+
end
|
336
|
+
self[fragment.docid].add(fragment)
|
337
|
+
@titles[fragment.doctitle] = fragment.docid
|
338
|
+
end
|
339
|
+
|
340
|
+
def each_title()
|
341
|
+
titles = @titles.keys.sort
|
342
|
+
titles.each do | title |
|
343
|
+
yield title, self[ @titles[title] ]
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
def to_codingtable()
|
348
|
+
ct = CodingTable.new
|
349
|
+
each do | docid, codeset |
|
350
|
+
ct[docid] = QDA::CodeSet[ *codeset.map { | frag | frag.to_code } ]
|
351
|
+
end
|
352
|
+
return ct
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'weft/coding'
|
2
|
+
|
3
|
+
module QDA
|
4
|
+
class Fragment < String
|
5
|
+
include Coding
|
6
|
+
attr_reader :doctitle, :offset
|
7
|
+
attr_accessor :docid
|
8
|
+
|
9
|
+
def initialize(text, doctitle, offset, docid = nil)
|
10
|
+
super(text)
|
11
|
+
unless doctitle.kind_of? String
|
12
|
+
raise ArgumentError,
|
13
|
+
"Fragment.new expects a doctitle string, got #{doctitle.inspect}"
|
14
|
+
end
|
15
|
+
|
16
|
+
unless offset.kind_of?(Fixnum) && offset >= 0
|
17
|
+
raise ArgumentError,
|
18
|
+
"Fragment.new expects an integer offset, got #{offset.inspect}"
|
19
|
+
end
|
20
|
+
|
21
|
+
unless docid.nil? || docid.kind_of?(Fixnum)
|
22
|
+
raise ArgumentError,
|
23
|
+
"Fragment.new expects an integer docid, got #{docid.inspect}"
|
24
|
+
end
|
25
|
+
@doctitle = doctitle
|
26
|
+
@offset = offset
|
27
|
+
# of the document - duplicates role of doctitle - to fix
|
28
|
+
@docid = docid
|
29
|
+
end
|
30
|
+
|
31
|
+
def ==(other)
|
32
|
+
super(other) and
|
33
|
+
@offset == other.offset and
|
34
|
+
@doctitle == other.doctitle
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_code()
|
38
|
+
Code.new(@docid, offset, length)
|
39
|
+
end
|
40
|
+
|
41
|
+
def coerce(other)
|
42
|
+
self.to_code()
|
43
|
+
end
|
44
|
+
|
45
|
+
# does this code completely cover the document
|
46
|
+
def complete?()
|
47
|
+
return NotImplementedError # need to fix
|
48
|
+
if @doc.fragments.length == @length + 1
|
49
|
+
return true
|
50
|
+
end
|
51
|
+
return false
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns a fragment from +abs+ (relative to the whole document)
|
55
|
+
# that is +length+ long
|
56
|
+
def [](abs, length)
|
57
|
+
if abs < self.offset
|
58
|
+
raise "Can't get part of non-overlapping string"
|
59
|
+
end
|
60
|
+
Fragment.new( super(abs - self.offset, length),
|
61
|
+
@doctitle, abs, @docid )
|
62
|
+
end
|
63
|
+
|
64
|
+
def inspect()
|
65
|
+
str = length < 50 ? self.to_s : self.to_s[0, 50] << '...'
|
66
|
+
"<*Fragment #{docid} #{offset}-#{self.end} : '#{str}>"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class Document < Fragment
|
71
|
+
attr_reader :meta, :create_date, :mod_date, :dbid
|
72
|
+
attr_accessor :title, :memo
|
73
|
+
|
74
|
+
# expects dbid to be set later
|
75
|
+
def initialize(title, text = '', memo = '',
|
76
|
+
create_date = nil, mod_date = nil)
|
77
|
+
super(text, title, 0)
|
78
|
+
@title = title
|
79
|
+
@memo = memo
|
80
|
+
|
81
|
+
@create_date = create_date
|
82
|
+
@mod_date = mod_date
|
83
|
+
end
|
84
|
+
|
85
|
+
def text
|
86
|
+
self.to_s
|
87
|
+
end
|
88
|
+
|
89
|
+
def dbid=(dbid)
|
90
|
+
unless dbid.nil? || dbid.kind_of?(Fixnum)
|
91
|
+
raise ArgumentError,
|
92
|
+
"Document dbid should be an integer or nil, got #{dbid.inspect}"
|
93
|
+
end
|
94
|
+
@dbid = dbid
|
95
|
+
end
|
96
|
+
|
97
|
+
# marks the document as created now
|
98
|
+
def create()
|
99
|
+
@create_date = Time.now()
|
100
|
+
end
|
101
|
+
|
102
|
+
# def append(text, fragtype = 0)
|
103
|
+
# returns the number of characters appended
|
104
|
+
def append(text, term_char = "\n")
|
105
|
+
ins = text.gsub(/[\r\n]+$/, '') + term_char
|
106
|
+
self << ins
|
107
|
+
ins.length
|
108
|
+
end
|
109
|
+
|
110
|
+
def [](from, num_chars)
|
111
|
+
Fragment.new(super, title, from, @dbid)
|
112
|
+
end
|
113
|
+
|
114
|
+
def inspect()
|
115
|
+
"<*Document #{dbid} '#{title}' (#{length} chars)>"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|