weft-qda 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/weft.rb +21 -0
- data/lib/weft/WEFT-VERSION-STRING.rb +1 -0
- data/lib/weft/application.rb +130 -0
- data/lib/weft/backend.rb +39 -0
- data/lib/weft/backend/marshal.rb +26 -0
- data/lib/weft/backend/mysql.rb +267 -0
- data/lib/weft/backend/n6.rb +366 -0
- data/lib/weft/backend/sqlite.rb +633 -0
- data/lib/weft/backend/sqlite/category_tree.rb +104 -0
- data/lib/weft/backend/sqlite/schema.rb +152 -0
- data/lib/weft/backend/sqlite/upgradeable.rb +55 -0
- data/lib/weft/category.rb +157 -0
- data/lib/weft/coding.rb +355 -0
- data/lib/weft/document.rb +118 -0
- data/lib/weft/filters.rb +243 -0
- data/lib/weft/wxgui.rb +687 -0
- data/lib/weft/wxgui/category.xpm +26 -0
- data/lib/weft/wxgui/dialogs.rb +128 -0
- data/lib/weft/wxgui/document.xpm +25 -0
- data/lib/weft/wxgui/error_handler.rb +52 -0
- data/lib/weft/wxgui/inspectors.rb +361 -0
- data/lib/weft/wxgui/inspectors/category.rb +165 -0
- data/lib/weft/wxgui/inspectors/codereview.rb +275 -0
- data/lib/weft/wxgui/inspectors/document.rb +139 -0
- data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -0
- data/lib/weft/wxgui/inspectors/script.rb +35 -0
- data/lib/weft/wxgui/inspectors/search.rb +265 -0
- data/lib/weft/wxgui/inspectors/textcontrols.rb +304 -0
- data/lib/weft/wxgui/lang.rb +17 -0
- data/lib/weft/wxgui/lang/en.rb +45 -0
- data/lib/weft/wxgui/mondrian.xpm +44 -0
- data/lib/weft/wxgui/search.xpm +25 -0
- data/lib/weft/wxgui/sidebar.rb +498 -0
- data/lib/weft/wxgui/utilities.rb +148 -0
- data/lib/weft/wxgui/weft16.xpm +31 -0
- data/lib/weft/wxgui/workarea.rb +249 -0
- data/test/001-document.rb +196 -0
- data/test/002-category.rb +138 -0
- data/test/003-code.rb +370 -0
- data/test/004-application.rb +52 -0
- data/test/006-filters.rb +139 -0
- data/test/009a-backend_sqlite_basic.rb +280 -0
- data/test/009b-backend_sqlite_complex.rb +175 -0
- data/test/009c_backend_sqlite_bench.rb +81 -0
- data/test/010-backend_nudist.rb +5 -0
- data/test/all-tests.rb +1 -0
- data/test/manual-gui-script.txt +24 -0
- data/test/testdata/autocoding-test.txt +15 -0
- data/test/testdata/iso-8859-1.txt +5 -0
- data/test/testdata/sample_doc.txt +19 -0
- data/test/testdata/search_results.txt +1254 -0
- data/test/testdata/text1-dos-ascii.txt +2 -0
- data/test/testdata/text1-unix-utf8.txt +2 -0
- data/weft-qda.rb +28 -0
- metadata +96 -0
data/lib/weft/coding.rb
ADDED
@@ -0,0 +1,355 @@
|
|
1
|
+
module QDA
|
2
|
+
# Classes mixing-in should implement the offset, length, [x, y], and
|
3
|
+
# << methods
|
4
|
+
module Coding
|
5
|
+
def end()
|
6
|
+
offset + length
|
7
|
+
end
|
8
|
+
|
9
|
+
def include?(point)
|
10
|
+
if point.nil?
|
11
|
+
raise ArgumentError,
|
12
|
+
"Point should be an integer, got #{point.inspect}"
|
13
|
+
end
|
14
|
+
point >= self.offset && point < self.end()
|
15
|
+
end
|
16
|
+
alias :contains? :include?
|
17
|
+
|
18
|
+
# Returns true if self and +other+ overlap at any point - ie there
|
19
|
+
# is at least one character that is coded by both items
|
20
|
+
def overlap?(other)
|
21
|
+
first, second = [self, other].sort_by { | x | x.offset }
|
22
|
+
first.end > second.offset ? true : false
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns true if self and +other+ overlap or are contiguous
|
26
|
+
def touch?(other)
|
27
|
+
first, second = [self, other].sort_by { | x | x.offset }
|
28
|
+
first.end >= second.offset ? true : false
|
29
|
+
end
|
30
|
+
|
31
|
+
# note that no-argument version of Array#sort does *not* call this
|
32
|
+
# method
|
33
|
+
def <=>(other)
|
34
|
+
self.offset == other.offset ?
|
35
|
+
self.end <=> other.end :
|
36
|
+
self.offset <=> other.offset
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
def prepare_args(other)
|
41
|
+
# it should be a type that implements these methods
|
42
|
+
unless other.kind_of?(Coding)
|
43
|
+
raise ArgumentError,
|
44
|
+
"Cannot combine with #{other.inspect}, should implement Coding"
|
45
|
+
end
|
46
|
+
|
47
|
+
# if it's not the same class, we need to determine what kind of
|
48
|
+
# thing to return by combining the classes
|
49
|
+
if other.is_a?(self.class)
|
50
|
+
return self, other
|
51
|
+
else
|
52
|
+
return self.coerce(other), other.coerce(self)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# returns the code representing the intersection of +self+ and +other+
|
57
|
+
# returns nil if there is no overlap
|
58
|
+
def intersect(other)
|
59
|
+
# this represents self, possibly coerced into a different class
|
60
|
+
this, other = prepare_args(other)
|
61
|
+
unless this.overlap?(other)
|
62
|
+
return nil
|
63
|
+
end
|
64
|
+
sorted = QDA::CodeSet[ this, other ].sort
|
65
|
+
this_start = [ other.offset, this.offset ].max
|
66
|
+
this_end = [other.end, this.end ].min
|
67
|
+
fragment = sorted[0][this_start, this_end - this_start ]
|
68
|
+
end
|
69
|
+
alias :% :intersect
|
70
|
+
|
71
|
+
# returns a QDA::CodeSet created by removing the characters coded
|
72
|
+
# by +other+ from +self+. The returned CodeSet may be 0, 1 or 2
|
73
|
+
# elements long. The diagram below shows how 2 results may be
|
74
|
+
# returned.
|
75
|
+
#
|
76
|
+
# -----+++++++++++++++++------ # self
|
77
|
+
# -
|
78
|
+
# -----------++++++++--------- # exclude
|
79
|
+
# =
|
80
|
+
# -----++++++--------+++------ # result
|
81
|
+
def exclude(other)
|
82
|
+
this, other = prepare_args(other)
|
83
|
+
results = QDA::CodeSet[]
|
84
|
+
if offset < other.offset
|
85
|
+
if this.end < other.offset
|
86
|
+
results.add( this )
|
87
|
+
else
|
88
|
+
results.add( this[offset, other.offset - offset] )
|
89
|
+
end
|
90
|
+
end
|
91
|
+
if this.end > other.end
|
92
|
+
if this.offset > other.end
|
93
|
+
results.add(this)
|
94
|
+
else
|
95
|
+
results.add( this[other.end, this.end - other.end] )
|
96
|
+
end
|
97
|
+
end
|
98
|
+
return results
|
99
|
+
end
|
100
|
+
alias :- :exclude
|
101
|
+
|
102
|
+
# Returns the code produced by merging +self+ with +other+. If the
|
103
|
+
# two codes do not touch each other, then an CodeSet of the two codes
|
104
|
+
# is returned.
|
105
|
+
def union(other)
|
106
|
+
this, other = prepare_args(other)
|
107
|
+
return this if this == other
|
108
|
+
return QDA::CodeSet[ self, other ].sort unless touch?(other)
|
109
|
+
|
110
|
+
# if they overlap or touch, a single coding will be returned
|
111
|
+
first, second = QDA::CodeSet[ this, other ].sort
|
112
|
+
fragment = first.dup()
|
113
|
+
if second.end > first.end
|
114
|
+
fragment << second[first.end, second.end - first.end]
|
115
|
+
end
|
116
|
+
return fragment
|
117
|
+
end
|
118
|
+
alias :+ :union
|
119
|
+
end
|
120
|
+
|
121
|
+
# A collection of things that are +Coding+ - ie that mix-in the class
|
122
|
+
# above.
|
123
|
+
class CodeSet < Array
|
124
|
+
protected :<<, :push, :pop, :shift, :unshift
|
125
|
+
|
126
|
+
# Populate a new CodeSet from an array of coding items +arr+. These
|
127
|
+
# should either be QDA::Codes, QDA::Fragments or three-item arrays.
|
128
|
+
# Where the latter are found, they will be automatically turned into
|
129
|
+
# QDA::Code, taking the contents of each three-item array to be
|
130
|
+
# [+docid+, +offset+, +length+]
|
131
|
+
def initialize(arr = [])
|
132
|
+
arr.collect! do | item |
|
133
|
+
case item
|
134
|
+
when Array
|
135
|
+
Code.new(*item)
|
136
|
+
when Fragment, Code
|
137
|
+
item
|
138
|
+
else
|
139
|
+
raise ArgumentError, "unexpected item #{item} in list"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
super(arr)
|
143
|
+
end
|
144
|
+
|
145
|
+
# iterate over each successive neighbouring pair of codings in
|
146
|
+
# the set, i.e. items 1, 2; items 2,3; items 3, 4 .. items n-1,
|
147
|
+
# n]. This is practically useful for +intersect+ but no other
|
148
|
+
# use at the moment.
|
149
|
+
def each_pair()
|
150
|
+
0.upto( length - 2 ) { | i | yield self[i], self[i + 1] }
|
151
|
+
end
|
152
|
+
|
153
|
+
def sort()
|
154
|
+
block_given? ? super : super { | a, b | a <=> b }
|
155
|
+
end
|
156
|
+
|
157
|
+
def intersect(other)
|
158
|
+
results = CodeSet[]
|
159
|
+
sorted = CodeSet[ *(self + other).sort_by { | x | x.end } ]
|
160
|
+
sorted.each_pair { | a, b | results << a % b }
|
161
|
+
results.compact # return less nils
|
162
|
+
end
|
163
|
+
|
164
|
+
# add the extent covered by +code+ (a QDA::Code) to the set,
|
165
|
+
# modifying in place.
|
166
|
+
def add(code)
|
167
|
+
replace( union( CodeSet[code] ) )
|
168
|
+
end
|
169
|
+
|
170
|
+
# remove the extent covered by +uncode+ (a QDA::Code) from the set,
|
171
|
+
# modifying it in place.
|
172
|
+
def subtract(uncode)
|
173
|
+
replace( exclude( CodeSet[uncode] ) )
|
174
|
+
end
|
175
|
+
|
176
|
+
# returns the set produced by removing all extents covered by
|
177
|
+
# +other+, which should be a QDA::CodeSet. Note that unlike +union+
|
178
|
+
# and +intersect+
|
179
|
+
# self.exclude(other) != other.exclude(self)
|
180
|
+
def exclude(other)
|
181
|
+
return self if other.nil?
|
182
|
+
results = self.dup
|
183
|
+
other.each do | uncode |
|
184
|
+
results.collect! { | code | code - uncode }.flatten!
|
185
|
+
end
|
186
|
+
return results
|
187
|
+
end
|
188
|
+
|
189
|
+
# Returns the set produced by merging all the codes in this one
|
190
|
+
# with those in +other+, which should be a QDA::CodeSet
|
191
|
+
def union(other)
|
192
|
+
results = CodeSet[]
|
193
|
+
sorted = CodeSet[ *(self + other).sort_by { | f | f.end } ]
|
194
|
+
|
195
|
+
last_code = nil
|
196
|
+
sorted.each do | code |
|
197
|
+
if ! last_code
|
198
|
+
last_code = code
|
199
|
+
elsif last_code.touch?(code)
|
200
|
+
last_code = last_code + code
|
201
|
+
else
|
202
|
+
results.push(last_code)
|
203
|
+
last_code = code.dup
|
204
|
+
end
|
205
|
+
end
|
206
|
+
results.push(last_code)
|
207
|
+
return results
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
# a hash representing a complex series of codes applied to one or
|
212
|
+
# more documents
|
213
|
+
class CodingTable < Hash
|
214
|
+
def initialize
|
215
|
+
super { | h, k | h[k] = CodeSet.new() }
|
216
|
+
end
|
217
|
+
|
218
|
+
# should access using +add+ or +set+
|
219
|
+
# private :[]=
|
220
|
+
protected :[]=
|
221
|
+
|
222
|
+
# add the coding of +item+ to the coding table. +item+ should be
|
223
|
+
# a QDA::Code or QDA::Fragment.
|
224
|
+
def add(item)
|
225
|
+
self[item.docid].add(item)
|
226
|
+
end
|
227
|
+
|
228
|
+
# Sets the coding of the document identified by +docid+ to be +codeset+
|
229
|
+
def set(docid, codeset)
|
230
|
+
unless codeset.kind_of?(CodeSet)
|
231
|
+
raise ArgumentError,
|
232
|
+
"Cannot set codeset #{codeset.inspect} as a CodingTable entry"
|
233
|
+
end
|
234
|
+
self[docid] = codeset
|
235
|
+
end
|
236
|
+
|
237
|
+
# Removes all coding associated with +docid+
|
238
|
+
alias :unset :delete
|
239
|
+
|
240
|
+
# remove the coding of +item+ to the coding table. +item+ should be
|
241
|
+
# a QDA::Code or QDA::Fragment.
|
242
|
+
def subtract(item)
|
243
|
+
self[item.docid].subtract(item)
|
244
|
+
end
|
245
|
+
|
246
|
+
def num_of_docs
|
247
|
+
keys.reject { | set | self[set].length == 0 }.length
|
248
|
+
end
|
249
|
+
|
250
|
+
def num_of_codes
|
251
|
+
values.inject(0) { | count, codeset | count + codeset.length }
|
252
|
+
end
|
253
|
+
|
254
|
+
def num_of_chars
|
255
|
+
values.inject(0) do | total, codes |
|
256
|
+
codes.inject(total) { | sub_total, code | sub_total + code.length }
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
# returns true if this coding table contains coding for the
|
261
|
+
# document +doc+
|
262
|
+
def codes?(doc)
|
263
|
+
key?(doc.dbid) and self[doc.dbid].length > 0
|
264
|
+
end
|
265
|
+
|
266
|
+
# Adds the coding of the other coding table +other+ to this one,
|
267
|
+
# modifying +self in place
|
268
|
+
def merge(other)
|
269
|
+
results = CodingTable.new()
|
270
|
+
either = self.keys + other.keys
|
271
|
+
either.uniq.each do | docid |
|
272
|
+
if ! other[docid]
|
273
|
+
results[docid] = self[docid]
|
274
|
+
elsif ! self[docid]
|
275
|
+
results[docid] = other[docid]
|
276
|
+
else
|
277
|
+
results[docid] = self[docid].union(other[docid])
|
278
|
+
end
|
279
|
+
end
|
280
|
+
replace(results)
|
281
|
+
end
|
282
|
+
|
283
|
+
# Removes all coding from this table that occurs in the other table
|
284
|
+
# +other+, modifying this CodingTable in place
|
285
|
+
def remove(other)
|
286
|
+
results = CodingTable.new()
|
287
|
+
each do | docid, codes |
|
288
|
+
results[docid] = self[docid].exclude(other[docid])
|
289
|
+
end
|
290
|
+
replace(results)
|
291
|
+
end
|
292
|
+
|
293
|
+
# deletes all coding except that which is also covered by +other+
|
294
|
+
def join(other)
|
295
|
+
both = keys.find_all { | doc | other.key?(doc) }
|
296
|
+
results = CodingTable.new()
|
297
|
+
both.each do | docid |
|
298
|
+
results[docid] = self[docid].intersect( other[docid] )
|
299
|
+
end
|
300
|
+
replace(results)
|
301
|
+
end
|
302
|
+
|
303
|
+
def sort(&block)
|
304
|
+
if block_given
|
305
|
+
super(&block)
|
306
|
+
else
|
307
|
+
super { | a, b | a <=> b }
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
# a FragmentTable holds a collection of fragments. It contains a
|
313
|
+
# number of CodeSets of Fragments. Each CodeSet can be retrieved
|
314
|
+
# either by document title or by document dbid.
|
315
|
+
# tbl = FragmentTable.new()
|
316
|
+
# f = Fragment.new('Weft QDA', 'the title', 6, 1)
|
317
|
+
# tbl.add(f)
|
318
|
+
# tbl['the title'] # => QDA::CodeSet[ <Fragment 1 6-14: 'Weft QDA'> ]
|
319
|
+
# tbl[1] # => QDA::CodeSet[ <Fragment 1 6-14: 'Weft QDA'> ]
|
320
|
+
class FragmentTable < CodingTable
|
321
|
+
def initialize
|
322
|
+
@titles = Hash.new() { | h, k | h[k] = CodeSet }
|
323
|
+
super()
|
324
|
+
end
|
325
|
+
|
326
|
+
# Assumes this is a document title if a string, or an dbid if an integer
|
327
|
+
def [](k)
|
328
|
+
k.kind_of?(String) ? super(@titles[k]) : super(k)
|
329
|
+
end
|
330
|
+
|
331
|
+
# Always use this method to add fragments to the collection
|
332
|
+
def add(fragment)
|
333
|
+
unless fragment.is_a?(Fragment)
|
334
|
+
raise ArgumentError, "Fragment expected, got #{fragment.inspect}"
|
335
|
+
end
|
336
|
+
self[fragment.docid].add(fragment)
|
337
|
+
@titles[fragment.doctitle] = fragment.docid
|
338
|
+
end
|
339
|
+
|
340
|
+
def each_title()
|
341
|
+
titles = @titles.keys.sort
|
342
|
+
titles.each do | title |
|
343
|
+
yield title, self[ @titles[title] ]
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
def to_codingtable()
|
348
|
+
ct = CodingTable.new
|
349
|
+
each do | docid, codeset |
|
350
|
+
ct[docid] = QDA::CodeSet[ *codeset.map { | frag | frag.to_code } ]
|
351
|
+
end
|
352
|
+
return ct
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'weft/coding'
|
2
|
+
|
3
|
+
module QDA
|
4
|
+
class Fragment < String
|
5
|
+
include Coding
|
6
|
+
attr_reader :doctitle, :offset
|
7
|
+
attr_accessor :docid
|
8
|
+
|
9
|
+
def initialize(text, doctitle, offset, docid = nil)
|
10
|
+
super(text)
|
11
|
+
unless doctitle.kind_of? String
|
12
|
+
raise ArgumentError,
|
13
|
+
"Fragment.new expects a doctitle string, got #{doctitle.inspect}"
|
14
|
+
end
|
15
|
+
|
16
|
+
unless offset.kind_of?(Fixnum) && offset >= 0
|
17
|
+
raise ArgumentError,
|
18
|
+
"Fragment.new expects an integer offset, got #{offset.inspect}"
|
19
|
+
end
|
20
|
+
|
21
|
+
unless docid.nil? || docid.kind_of?(Fixnum)
|
22
|
+
raise ArgumentError,
|
23
|
+
"Fragment.new expects an integer docid, got #{docid.inspect}"
|
24
|
+
end
|
25
|
+
@doctitle = doctitle
|
26
|
+
@offset = offset
|
27
|
+
# of the document - duplicates role of doctitle - to fix
|
28
|
+
@docid = docid
|
29
|
+
end
|
30
|
+
|
31
|
+
def ==(other)
|
32
|
+
super(other) and
|
33
|
+
@offset == other.offset and
|
34
|
+
@doctitle == other.doctitle
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_code()
|
38
|
+
Code.new(@docid, offset, length)
|
39
|
+
end
|
40
|
+
|
41
|
+
def coerce(other)
|
42
|
+
self.to_code()
|
43
|
+
end
|
44
|
+
|
45
|
+
# does this code completely cover the document
|
46
|
+
def complete?()
|
47
|
+
return NotImplementedError # need to fix
|
48
|
+
if @doc.fragments.length == @length + 1
|
49
|
+
return true
|
50
|
+
end
|
51
|
+
return false
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns a fragment from +abs+ (relative to the whole document)
|
55
|
+
# that is +length+ long
|
56
|
+
def [](abs, length)
|
57
|
+
if abs < self.offset
|
58
|
+
raise "Can't get part of non-overlapping string"
|
59
|
+
end
|
60
|
+
Fragment.new( super(abs - self.offset, length),
|
61
|
+
@doctitle, abs, @docid )
|
62
|
+
end
|
63
|
+
|
64
|
+
def inspect()
|
65
|
+
str = length < 50 ? self.to_s : self.to_s[0, 50] << '...'
|
66
|
+
"<*Fragment #{docid} #{offset}-#{self.end} : '#{str}>"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class Document < Fragment
|
71
|
+
attr_reader :meta, :create_date, :mod_date, :dbid
|
72
|
+
attr_accessor :title, :memo
|
73
|
+
|
74
|
+
# expects dbid to be set later
|
75
|
+
def initialize(title, text = '', memo = '',
|
76
|
+
create_date = nil, mod_date = nil)
|
77
|
+
super(text, title, 0)
|
78
|
+
@title = title
|
79
|
+
@memo = memo
|
80
|
+
|
81
|
+
@create_date = create_date
|
82
|
+
@mod_date = mod_date
|
83
|
+
end
|
84
|
+
|
85
|
+
def text
|
86
|
+
self.to_s
|
87
|
+
end
|
88
|
+
|
89
|
+
def dbid=(dbid)
|
90
|
+
unless dbid.nil? || dbid.kind_of?(Fixnum)
|
91
|
+
raise ArgumentError,
|
92
|
+
"Document dbid should be an integer or nil, got #{dbid.inspect}"
|
93
|
+
end
|
94
|
+
@dbid = dbid
|
95
|
+
end
|
96
|
+
|
97
|
+
# marks the document as created now
|
98
|
+
def create()
|
99
|
+
@create_date = Time.now()
|
100
|
+
end
|
101
|
+
|
102
|
+
# def append(text, fragtype = 0)
|
103
|
+
# returns the number of characters appended
|
104
|
+
def append(text, term_char = "\n")
|
105
|
+
ins = text.gsub(/[\r\n]+$/, '') + term_char
|
106
|
+
self << ins
|
107
|
+
ins.length
|
108
|
+
end
|
109
|
+
|
110
|
+
def [](from, num_chars)
|
111
|
+
Fragment.new(super, title, from, @dbid)
|
112
|
+
end
|
113
|
+
|
114
|
+
def inspect()
|
115
|
+
"<*Document #{dbid} '#{title}' (#{length} chars)>"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|