weft-qda 0.9.6 → 0.9.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. data/lib/weft.rb +16 -1
  2. data/lib/weft/WEFT-VERSION-STRING.rb +1 -1
  3. data/lib/weft/application.rb +17 -74
  4. data/lib/weft/backend.rb +6 -32
  5. data/lib/weft/backend/sqlite.rb +222 -164
  6. data/lib/weft/backend/sqlite/category_tree.rb +52 -48
  7. data/lib/weft/backend/sqlite/database.rb +57 -0
  8. data/lib/weft/backend/sqlite/upgradeable.rb +7 -0
  9. data/lib/weft/broadcaster.rb +90 -0
  10. data/lib/weft/category.rb +139 -47
  11. data/lib/weft/codereview.rb +160 -0
  12. data/lib/weft/coding.rb +74 -23
  13. data/lib/weft/document.rb +23 -10
  14. data/lib/weft/exceptions.rb +10 -0
  15. data/lib/weft/filters.rb +47 -224
  16. data/lib/weft/filters/indexers.rb +137 -0
  17. data/lib/weft/filters/input.rb +118 -0
  18. data/lib/weft/filters/output.rb +101 -0
  19. data/lib/weft/filters/templates.rb +80 -0
  20. data/lib/weft/filters/win32backtick.rb +246 -0
  21. data/lib/weft/query.rb +169 -0
  22. data/lib/weft/wxgui.rb +349 -294
  23. data/lib/weft/wxgui/constants.rb +43 -0
  24. data/lib/weft/wxgui/controls.rb +6 -0
  25. data/lib/weft/wxgui/controls/category_dropdown.rb +192 -0
  26. data/lib/weft/wxgui/controls/category_tree.rb +314 -0
  27. data/lib/weft/wxgui/controls/document_list.rb +97 -0
  28. data/lib/weft/wxgui/controls/multitype_control.rb +37 -0
  29. data/lib/weft/wxgui/{inspectors → controls}/textcontrols.rb +235 -64
  30. data/lib/weft/wxgui/dialogs.rb +144 -41
  31. data/lib/weft/wxgui/error_handler.rb +116 -36
  32. data/lib/weft/wxgui/exceptions.rb +7 -0
  33. data/lib/weft/wxgui/inspectors.rb +61 -208
  34. data/lib/weft/wxgui/inspectors/category.rb +19 -16
  35. data/lib/weft/wxgui/inspectors/codereview.rb +90 -132
  36. data/lib/weft/wxgui/inspectors/document.rb +12 -8
  37. data/lib/weft/wxgui/inspectors/imagedocument.rb +56 -56
  38. data/lib/weft/wxgui/inspectors/query.rb +284 -0
  39. data/lib/weft/wxgui/inspectors/script.rb +147 -23
  40. data/lib/weft/wxgui/lang/en.rb +69 -0
  41. data/lib/weft/wxgui/sidebar.rb +90 -432
  42. data/lib/weft/wxgui/utilities.rb +70 -91
  43. data/lib/weft/wxgui/workarea.rb +150 -43
  44. data/share/icons/category.ico +0 -0
  45. data/share/icons/category.xpm +109 -0
  46. data/share/icons/codereview.ico +0 -0
  47. data/share/icons/codereview.xpm +54 -0
  48. data/share/icons/d_and_c.xpm +126 -0
  49. data/share/icons/document.ico +0 -0
  50. data/share/icons/document.xpm +70 -0
  51. data/share/icons/project.ico +0 -0
  52. data/share/icons/query.ico +0 -0
  53. data/share/icons/query.xpm +56 -0
  54. data/{lib/weft/wxgui → share/icons}/search.xpm +0 -0
  55. data/share/icons/weft.ico +0 -0
  56. data/share/icons/weft.xpm +62 -0
  57. data/share/icons/weft16.ico +0 -0
  58. data/share/icons/weft32.ico +0 -0
  59. data/share/templates/category_plain.html +18 -0
  60. data/share/templates/codereview_plain.html +18 -0
  61. data/share/templates/document_plain.html +13 -0
  62. data/share/templates/document_plain.txt +7 -0
  63. data/test/001-document.rb +55 -36
  64. data/test/002-category.rb +81 -6
  65. data/test/003-code.rb +8 -4
  66. data/test/004-application.rb +13 -34
  67. data/test/005-query_review.rb +139 -0
  68. data/test/006-filters.rb +54 -42
  69. data/test/007-output_filters.rb +113 -0
  70. data/test/009a-backend_sqlite_basic.rb +95 -24
  71. data/test/009b-backend_sqlite_complex.rb +43 -62
  72. data/test/009c_backend_sqlite_bench.rb +5 -10
  73. data/test/053-doc_inspector.rb +46 -0
  74. data/test/055-query_window.rb +50 -0
  75. data/test/all-tests.rb +1 -0
  76. data/test/test-common.rb +19 -0
  77. data/test/testdata/empty.qdp +0 -0
  78. data/test/testdata/simple with space.pdf +0 -0
  79. data/test/testdata/simple.pdf +0 -0
  80. data/weft-qda.rb +40 -7
  81. metadata +74 -14
  82. data/lib/weft/wxgui/category.xpm +0 -26
  83. data/lib/weft/wxgui/document.xpm +0 -25
  84. data/lib/weft/wxgui/inspectors/search.rb +0 -265
  85. data/lib/weft/wxgui/mondrian.xpm +0 -44
  86. data/lib/weft/wxgui/weft16.xpm +0 -31
@@ -0,0 +1,160 @@
1
+ module QDA
2
+ # CodeReview is a class that is used for cross-tabulation of coding. It makes
3
+ # it possible to get statistics for the number of characters, passages and
4
+ # documents that are coded by both the row column and the
5
+ class CodeReview
6
+ attr_accessor :dbid, :count_method
7
+ attr_reader :cols, :rows, :contents
8
+
9
+ # A new CodeReview is empty when initialised
10
+ def initialize()
11
+ @cols, @rows, @contents = [], [], []
12
+ @count_method = :num_of_docs
13
+ end
14
+
15
+ # returns the total number of columns
16
+ def number_cols()
17
+ @cols.length
18
+ end
19
+
20
+ # returns the index of the last column
21
+ def last_col()
22
+ @cols.length - 1
23
+ end
24
+
25
+ # takes a block, yielding each column Category and its index in turn
26
+ def each_col()
27
+ @cols.each_with_index { | col, i | yield col, i }
28
+ end
29
+
30
+ # add the Category +category+ as the last column
31
+ def add_col(category)
32
+ return nil unless category
33
+ return nil if @cols.include?(category)
34
+ @cols.push(category)
35
+
36
+ @rows.each_with_index do | row_cat, i |
37
+ @contents[i][last_col] = row_cat.codes.dup.join(category.codes)
38
+ end
39
+ end
40
+
41
+ # Updates the column with the changed Category +category+. Useful in a
42
+ # persistent environment where user actions may have altered the coding.
43
+ def update_col(category)
44
+ return nil unless category
45
+ return nil unless idx = @rows.index(category)
46
+
47
+ @rows[idx] = category
48
+ @cols.each_with_index do | col_cat, j |
49
+ @contents[idx][j] = col_cat.codes.dup.join(category.codes)
50
+ end
51
+ return idx
52
+ end
53
+
54
+ # Removes the Category +category+ as a column from the CodeReview. Returns
55
+ # the index of the removed category, if found, or nil, if not.
56
+ def remove_col(category)
57
+ return nil unless category
58
+ return nil unless idx = @cols.index(category)
59
+ @cols.delete_at(idx)
60
+ @contents.each { | row | row.delete_at(idx) }
61
+ return idx
62
+ end
63
+
64
+ # returns the total number of rows in the CodeReview
65
+ def number_rows()
66
+ @rows.length
67
+ end
68
+
69
+ # returns the index of the last row in the CodeReview
70
+ def last_row()
71
+ @rows.length - 1
72
+ end
73
+
74
+ def each_row()
75
+ @rows.each_with_index { | r, i | yield r, i }
76
+ end
77
+
78
+ # appends the category +category+ as the last row. Returns the appended
79
+ # category if it was successfully added, or nil f not - for example, if
80
+ def add_row(category)
81
+ return nil unless category
82
+ return nil if @rows.include?(category)
83
+ @rows.push(category)
84
+ @contents[last_row] = []
85
+ @cols.each_with_index do | col_cat, j |
86
+ @contents[last_row][j] = col_cat.codes.dup.join(category.codes)
87
+ end
88
+ end
89
+
90
+
91
+ def update_row(category)
92
+ return nil unless category
93
+ return nil unless idx = @rows.index(category)
94
+ @rows[idx] = category
95
+ @cols.each_with_index do | col_cat, j |
96
+ @contents[idx][j] = col_cat.codes.dup.join(category.codes)
97
+ end
98
+ return idx
99
+ end
100
+
101
+ # Removes the Category +category+ from the rows of this CodeReview.
102
+ # Returns the index of the corresponding category, if found, or nil, if not.
103
+ def remove_row(category)
104
+ return nil unless category
105
+ return nil unless idx = @rows.index(category)
106
+ @rows.delete_at(idx)
107
+ @contents.delete_at(idx)
108
+ return idx
109
+ end
110
+
111
+ def each_cell()
112
+ 0.upto(last_row) do | i |
113
+ 0.upto(last_col) { | j | yield i, j, @contents[i][j] }
114
+ end
115
+ end
116
+
117
+
118
+ # loops over the contents of this code review, yielding each cell's location
119
+ # and value (calculated by +meth+, defaulting to the code review's current
120
+ # +count_method+. Values are yielded as follows
121
+ #
122
+ # code_review.each_cell { | row_num, col_num, cell_value |
123
+ def each_cell_value(meth = @count_method)
124
+ each_cell { | i, j, cell | yield i, j, cell.send(meth) }
125
+ end
126
+
127
+ # returns the maximum value among the codereview contents using the metric
128
+ # +method+ - which should be a method called upon QDA::CodingTable
129
+ def max(meth = @count_method)
130
+ @contents.flatten.collect { | x | x.send(meth) }.max
131
+ end
132
+
133
+ # returns the minimum value among the codereview contents using the metric
134
+ # +method+ - which should be a method called upon QDA::CodingTable
135
+ def min(meth = @count_method)
136
+ @contents.flatten.collect { | x | x.send(meth) }.min
137
+ end
138
+
139
+ # returns the current content as a series of rows; if +with_array+ is true,
140
+ # a header row of column names will be the first row, and each subsequent
141
+ # row will have the name of the row as the first entry.
142
+ def output_rows(with_header = true)
143
+ out_rows = []
144
+ out_rows << [ '', *cols.map { | cat | cat.name } ] if with_header
145
+ each_row do | row, i |
146
+ this_row = contents[i].map { | isect | isect.send(count_method) }
147
+ this_row.unshift(row.name) if with_header
148
+ out_rows.push(this_row)
149
+ end
150
+ out_rows
151
+ end
152
+
153
+ def to_query(app, x, y)
154
+ return nil unless rows[x] and cols[y]
155
+ query = Query.new( Query::CodedByFunction.new(app, rows[x]) )
156
+ query.add_expression( 'AND', Query::CodedByFunction.new(app, cols[y]) )
157
+ query
158
+ end
159
+ end
160
+ end
@@ -141,6 +141,22 @@ module QDA
141
141
  end
142
142
  super(arr)
143
143
  end
144
+
145
+ def items
146
+ self
147
+ end
148
+
149
+ def docid
150
+ first ? first.docid : nil
151
+ end
152
+
153
+ def title
154
+ first ? first.title : nil
155
+ end
156
+
157
+ def num_of_chars()
158
+ inject(0) { | total, code| total += code.length }
159
+ end
144
160
 
145
161
  # iterate over each successive neighbouring pair of codings in
146
162
  # the set, i.e. items 1, 2; items 2,3; items 3, 4 .. items n-1,
@@ -243,18 +259,17 @@ module QDA
243
259
  self[item.docid].subtract(item)
244
260
  end
245
261
 
246
- def num_of_docs
262
+ def num_of_docs()
247
263
  keys.reject { | set | self[set].length == 0 }.length
248
264
  end
249
265
 
250
- def num_of_codes
266
+ def num_of_codes()
251
267
  values.inject(0) { | count, codeset | count + codeset.length }
252
268
  end
269
+ alias :num_of_passages :num_of_codes
253
270
 
254
- def num_of_chars
255
- values.inject(0) do | total, codes |
256
- codes.inject(total) { | sub_total, code | sub_total + code.length }
257
- end
271
+ def num_of_chars()
272
+ values.inject(0) { | count, codeset | count += codeset.num_of_chars }
258
273
  end
259
274
 
260
275
  # returns true if this coding table contains coding for the
@@ -266,40 +281,51 @@ module QDA
266
281
  # Adds the coding of the other coding table +other+ to this one,
267
282
  # modifying +self in place
268
283
  def merge(other)
269
- results = CodingTable.new()
284
+ results = self.class.new()
270
285
  either = self.keys + other.keys
271
286
  either.uniq.each do | docid |
272
287
  if ! other[docid]
273
- results[docid] = self[docid]
288
+ results.set(docid, self[docid])
274
289
  elsif ! self[docid]
275
- results[docid] = other[docid]
290
+ results.set(docid, other[docid])
276
291
  else
277
- results[docid] = self[docid].union(other[docid])
292
+ results.set( docid, self[docid].union(other[docid]) )
278
293
  end
279
294
  end
280
- replace(results)
295
+ return results
281
296
  end
282
-
297
+
298
+ def merge!(other)
299
+ replace( merge(other) )
300
+ end
301
+
283
302
  # Removes all coding from this table that occurs in the other table
284
303
  # +other+, modifying this CodingTable in place
285
304
  def remove(other)
286
- results = CodingTable.new()
305
+ results = self.class.new()
287
306
  each do | docid, codes |
288
- results[docid] = self[docid].exclude(other[docid])
307
+ results.set(docid, codes.exclude( other[docid] ) )
289
308
  end
290
- replace(results)
309
+ return results
291
310
  end
292
311
 
312
+ def remove!(other)
313
+ replace( remove(other) )
314
+ end
293
315
  # deletes all coding except that which is also covered by +other+
294
316
  def join(other)
295
317
  both = keys.find_all { | doc | other.key?(doc) }
296
- results = CodingTable.new()
318
+ results = self.class.new()
297
319
  both.each do | docid |
298
- results[docid] = self[docid].intersect( other[docid] )
320
+ results.set(docid, self[docid].intersect( other[docid] ) )
299
321
  end
300
- replace(results)
322
+ return results
301
323
  end
302
324
 
325
+ def join!
326
+ replace( join(other) )
327
+ end
328
+
303
329
  def sort(&block)
304
330
  if block_given
305
331
  super(&block)
@@ -307,6 +333,14 @@ module QDA
307
333
  super { | a, b | a <=> b }
308
334
  end
309
335
  end
336
+
337
+ def sets()
338
+ values_at( *keys.sort )
339
+ end
340
+
341
+ def each_set()
342
+ keys.sort.each { | docid | yield self[docid] }
343
+ end
310
344
  end
311
345
 
312
346
  # a FragmentTable holds a collection of fragments. It contains a
@@ -327,7 +361,14 @@ module QDA
327
361
  def [](k)
328
362
  k.kind_of?(String) ? super(@titles[k]) : super(k)
329
363
  end
330
-
364
+
365
+ def set(docid, fragset)
366
+ super(docid, fragset)
367
+ if fragset[0] and fragset[0].respond_to?(:doctitle)
368
+ @titles[fragset[0].doctitle] = fragset[0].docid
369
+ end
370
+ end
371
+
331
372
  # Always use this method to add fragments to the collection
332
373
  def add(fragment)
333
374
  unless fragment.is_a?(Fragment)
@@ -337,11 +378,21 @@ module QDA
337
378
  @titles[fragment.doctitle] = fragment.docid
338
379
  end
339
380
 
381
+ def titles()
382
+ @titles.keys.sort
383
+ end
384
+
340
385
  def each_title()
341
- titles = @titles.keys.sort
342
- titles.each do | title |
343
- yield title, self[ @titles[title] ]
344
- end
386
+ titles.each { | title | yield title, self[ @titles[title] ] }
387
+ end
388
+
389
+ def sets
390
+ docids = titles.map { | t | @titles[t] }
391
+ values_at( *docids )
392
+ end
393
+
394
+ def each_set
395
+ titles.each { | title | yield self[ @titles[title] ] }
345
396
  end
346
397
 
347
398
  def to_codingtable()
@@ -27,7 +27,15 @@ class Fragment < String
27
27
  # of the document - duplicates role of doctitle - to fix
28
28
  @docid = docid
29
29
  end
30
-
30
+
31
+ def title
32
+ @doctitle
33
+ end
34
+
35
+ def text
36
+ self.to_s()
37
+ end
38
+
31
39
  def ==(other)
32
40
  super(other) and
33
41
  @offset == other.offset and
@@ -61,6 +69,13 @@ class Fragment < String
61
69
  @doctitle, abs, @docid )
62
70
  end
63
71
 
72
+ def scan(pattern)
73
+ super do | m |
74
+ yield Fragment.new(m, @doctitle,
75
+ offset + Regexp.last_match.begin(0), @dbid )
76
+ end
77
+ end
78
+
64
79
  def inspect()
65
80
  str = length < 50 ? self.to_s : self.to_s[0, 50] << '...'
66
81
  "<*Fragment #{docid} #{offset}-#{self.end} : '#{str}>"
@@ -72,18 +87,15 @@ class Document < Fragment
72
87
  attr_accessor :title, :memo
73
88
 
74
89
  # expects dbid to be set later
75
- def initialize(title, text = '', memo = '',
76
- create_date = nil, mod_date = nil)
90
+ def initialize( title, text = '', memo = '',
91
+ create_date = Time.now(),
92
+ mod_date = Time.now() )
77
93
  super(text, title, 0)
78
94
  @title = title
79
95
  @memo = memo
80
-
96
+
81
97
  @create_date = create_date
82
- @mod_date = mod_date
83
- end
84
-
85
- def text
86
- self.to_s
98
+ @mod_date = mod_date
87
99
  end
88
100
 
89
101
  def dbid=(dbid)
@@ -98,7 +110,7 @@ class Document < Fragment
98
110
  def create()
99
111
  @create_date = Time.now()
100
112
  end
101
-
113
+
102
114
  # def append(text, fragtype = 0)
103
115
  # returns the number of characters appended
104
116
  def append(text, term_char = "\n")
@@ -114,5 +126,6 @@ class Document < Fragment
114
126
  def inspect()
115
127
  "<*Document #{dbid} '#{title}' (#{length} chars)>"
116
128
  end
129
+
117
130
  end
118
131
  end
@@ -0,0 +1,10 @@
1
+ module QDA
2
+ class NotUniqueNameError < ArgumentError
3
+ end
4
+ class BadNameError < ArgumentError
5
+ end
6
+ class BadStructureError < ArgumentError
7
+ end
8
+ class NotFoundError < StandardError
9
+ end
10
+ end
@@ -3,241 +3,64 @@ require 'weft/coding'
3
3
  require 'English'
4
4
 
5
5
  module QDA
6
- class InputFilter
7
- attr_reader :cursor
8
-
9
- def initialize()
10
- @cursor = 0
11
- @indexers = []
12
- end
13
-
14
- def add_indexer(indexer)
15
- unless indexer.respond_to?(:feed)
16
- raise "Document indexers should have a feed method"
17
- end
18
- @indexers.push(indexer)
19
- end
20
-
21
- # reads +file+ and creates a new document titled +doctitle+. +file+
22
- # may be a String filename or an open stream.
23
- # Under the hood, calls +read_content+ to extract the content. This
24
- # method must be implemented in subclasses. Then +process_content+
25
- # is called to create the documents text. This class does something
26
- # reasonable with plain text, but structured text formats will want
27
- # to subclass this method to process non-text information (for
28
- # example, HTML or XML tags)
29
- def read(file, doctitle)
30
- @content = ''
31
- case file
32
- when IO
33
- @content = file.read()
34
- when QDA::Document
35
- @content = file.text()
36
- when String
37
- @content = File.read(file)
38
- end
39
- process_content(doctitle)
40
- end
41
-
42
- def process_content(doctitle)
43
- # signal to indexers we're about to start
44
- @indexers.each { | indexer | indexer.prepare(@content) }
45
- doc = QDA::Document.new(doctitle)
46
- @content.each_line do | line |
47
- doc.append(line.to_s.chomp)
48
- # inform AutoCoders, reverse indexers and so on.
49
- @indexers.each { | indexer | indexer.feed(line) }
50
- end
51
- @indexers.each { | indexer | indexer.terminate() }
52
- doc.create
53
- return doc
54
- end
55
- end
56
-
57
- class TextFilter < InputFilter
58
- EXTENSIONS = [ 'txt' ]
59
- def read_content(file)
60
- text = file.read()
61
- file.close()
62
- text
63
- end
64
- end
65
-
66
- class PDFFilter < InputFilter
67
- EXTENSIONS = [ 'pdf' ]
68
- PDF_TO_TEXT_EXEC = 'pdftotext'
69
- begin
70
- out = `#{PDF_TO_TEXT_EXEC} -v 2>&1`
71
- unless out =~ /pdftotext version 3/
72
- warn 'PDFtotext Version 3 not found in path' +
73
- 'PDF Filters will not be avaialabl'
6
+ module Filters
7
+ @@import = Hash.new { | h, k | h[k] = [] }
8
+ @@export = Hash.new { | h, k | h[k] = [] }
9
+
10
+ class << self
11
+ def register_filter( filter_class )
12
+ if defined? filter_class::IMPORT_CLASS
13
+ @@import[filter_class::IMPORT_CLASS].push(filter_class)
74
14
  end
75
- end
76
-
77
- NO_COPYING_ERROR_TEXT =
78
- "The author or publisher of this PDF document has locked it to
79
- prevent copying and extraction of its text. It is not possible to
80
- import this document."
81
- def read(file, doctitle)
82
- case file
83
- when IO
84
- raise NotImplementedError
85
- @content = `#{PDF_TO_TEXT_EXEC} -nopgbrk #{file.path} - 2>&1`
86
- file.close()
87
- when String
88
- @content = `#{PDF_TO_TEXT_EXEC} -nopgbrk #{file} - 2>&1`
89
- end
90
-
91
- case $CHILD_STATUS
92
- when 0
93
- process_content(doctitle)
94
- when 3
95
- raise RuntimeError.new(NO_COPYING_ERROR_TEXT)
96
- else
97
- raise RuntimeError.new("Could not extract PDF text: #{text}")
15
+ if defined? filter_class::EXPORT_CLASS
16
+ @@export[filter_class::EXPORT_CLASS].push(filter_class)
98
17
  end
99
18
  end
100
19
 
101
- end
102
-
103
- class OutputFilter
104
-
105
- end
106
-
107
- # ...
108
- class HTMLFilter < OutputFilter
109
-
110
- end
111
-
112
- class Indexer
113
- attr_reader :cursor
114
- def initialize()
115
- @cursor = 0
20
+ # imports an object of class +klass+ e.g. QDA::Document from the file
21
+ # +filename+, which should be a string.
22
+ def import_file(klass, filename, opts = {}, &block)
23
+ ext = filename[-3,3]
24
+ filter = Filters.find_import_filter(klass, ext).new()
25
+ import(filter, filename, &block)
116
26
  end
117
-
118
- def index(str)
119
- prepare(str)
120
- str.each_line { | line | feed(line) }
121
- end
122
-
123
- def terminate()
124
- end
125
-
126
- def prepare(content)
127
- end
128
-
129
- def feed(line)
130
- @cursor += line.length
131
- end
132
- end
133
-
134
- # An indexer which records the position of words for later reverse
135
- # retrieval
136
- class WordIndexer < Indexer
137
- attr_reader :words
138
- # includes accented latin-1 characters
139
- WORD_TOKENIZER = /[\w\xC0-\xD6\xD8-\xF6\xF8-\xFF][\w\xC0-\xD6\xD8-\xF6\xF8-\xFF\']+/
140
- def initialize()
141
- super
142
- @words = Hash.new { | h, k | h[k] = [] }
143
- end
144
-
145
- def feed(line)
146
- line.scan( WORD_TOKENIZER ) do | word |
147
- next if word.length == 1
148
- @words[word].push(cursor + Regexp.last_match.begin(0))
149
- end
150
- super
151
- end
152
- end
153
-
154
- # An indexer that uses text patterns to identify, for example,
155
- # passages by a particular speaker, or text headings.
156
- # The indexer can recognise a number of different types of codes,
157
- # each denoted by a pattern of punctuation in a line of text. A
158
- # default coder recognises the following
159
- # A 'Heading', marked by a line **NAME OF HEADING**
160
- # A 'Speaker', marked by a line SpeakerName:
161
- #
162
- # After the filter has run, the results of the coding can be
163
- # retrieved by calling Autocoder#codes
164
- # This is a hash of codetype names to inner hashes of codevalue names
165
- # (strings) to QDA::Codesets corresponding to them.
166
- class AutoCoder < Indexer
167
- STANDARD_TRIGGER_RULES = {
168
- /^(\w+)\:\s*$/ => 'Speaker',
169
- /^\*\*(.*)\*\*$/ => 'Heading'
170
- }
171
-
172
- attr_reader :codes
173
- # +rules+ should be a hash of string keys, naming types of autocode
174
- # (e.g. "Speaker", "Heading", "Topic") mapped to values, which
175
- # should be regular expressions specifying how the start of such a
176
- # code should be recognised.
177
- # For example, to find topics marked by the characters '##' at the
178
- # start of the line:
179
- # 'Heading' => /^##(.*)$/
180
- def initialize(rules = STANDARD_TRIGGER_RULES)
181
- super()
182
- @trigger_rules = rules
183
- @codes = Hash.new { | h, k | h[k] = {} }
184
- @curr_codes = {}
185
- end
186
-
187
- # check a line of document content for triggers
188
- def feed(line)
189
- @trigger_rules.each do | rule, type |
190
- if match = rule.match(line)
191
- trigger(cursor, type, match[1])
192
- end
193
- end
194
- super
27
+
28
+ def import(filter, content)
29
+ obj = filter.run(content)
30
+ yield obj, filter if block_given?
31
+ obj
195
32
  end
196
-
197
- # take action on finding a autocode marker
198
- def trigger(cursor, group, codename)
199
- # save any previous code that was being done for this group
200
- store(group) if @curr_codes[group]
201
- new_codeset = get_code(group, codename)
202
- @curr_codes[group] = [ new_codeset, cursor ]
33
+
34
+ # Returns a hash of all available import filter types, keyed on Weft
35
+ # classes (eg Document
36
+ def import_filters()
37
+ @@import
203
38
  end
204
- private :trigger
205
-
206
- # returns the code name +codename+ within the group +group+,
207
- # creating a new empty category
208
- def get_code(group, codename)
209
- return @codes[group][codename] if @codes[group][codename]
210
- @codes[group][codename] = QDA::CodeSet.new()
39
+
40
+
41
+ # Returns a hash of all available export filter types, keyed on Weft
42
+ # classes (eg Document)
43
+ def export_filters()
44
+ @@export
211
45
  end
212
-
213
- # Returns the names and codesets for autocodes in group +group+
214
- # in a series of pairs
215
- def each_autocode(group)
216
- @codes[group].each { | name, codeset | yield name, codeset }
46
+
47
+ def find_import_filter( weft_class, ext )
48
+ @@import[weft_class].find { | filter | filter::EXTENSIONS.include?(ext) }
217
49
  end
218
-
219
- # alters all the stored coding in this autocoder so that it refers
220
- # to the document identified by +docid+
221
- def apply(docid)
222
- @codes.values.each do | group |
223
- group.values.each do | codeset |
224
- codeset.map! { | x | x.docid = docid; x }
225
- end
226
- end
50
+
51
+ def find_export_filter( weft_class, ext )
52
+ @@export[weft_class].find { | filter | filter::EXTENSION == ext }
227
53
  end
228
-
229
- # finish up all currently active coding in this autocoder
230
- def terminate()
231
- @curr_codes.each_key { | group | store(group) }
54
+
55
+ def can_export?(weft_class)
56
+ @@export.has_key?(weft_class)
232
57
  end
233
-
234
- # finish the coding for the current code being used among +group+
235
- def store(group)
236
- codeset, start = @curr_codes[group]
237
- # -1 here is a placeholder
238
- terminus = cursor - start
239
- codeset.add( Code.new(-1, start, terminus) )
58
+
240
59
  end
241
- private :store
242
60
  end
61
+
62
+ require 'weft/filters/indexers'
63
+ require 'weft/filters/output'
64
+ require 'weft/filters/input'
65
+ require 'weft/filters/templates'
243
66
  end