lingo 1.8.5 → 1.8.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +25 -0
  3. data/README +7 -5
  4. data/Rakefile +58 -55
  5. data/{lingo-call.cfg → config/lingo-call.cfg} +1 -1
  6. data/{lingo.cfg → config/lingo.cfg} +10 -2
  7. data/{lir.cfg → config/lir.cfg} +10 -2
  8. data/{de → dict/de}/lingo-abk.txt +0 -0
  9. data/{de → dict/de}/lingo-dic.txt +0 -0
  10. data/{de → dict/de}/lingo-mul.txt +0 -0
  11. data/{de → dict/de}/lingo-syn.txt +0 -0
  12. data/{de → dict/de}/test_dic.txt +0 -0
  13. data/{de → dict/de}/test_gen.txt +0 -0
  14. data/{de → dict/de}/test_mu2.txt +0 -0
  15. data/{de → dict/de}/test_mul.txt +0 -0
  16. data/{de → dict/de}/test_sgw.txt +0 -0
  17. data/{de → dict/de}/test_syn.txt +0 -0
  18. data/{de → dict/de}/user-dic.txt +0 -0
  19. data/{en → dict/en}/lingo-dic.txt +0 -0
  20. data/{en → dict/en}/lingo-irr.txt +0 -0
  21. data/{en → dict/en}/lingo-mul.txt +0 -0
  22. data/{en → dict/en}/lingo-syn.txt +0 -0
  23. data/{en → dict/en}/lingo-wdn.txt +0 -0
  24. data/{en → dict/en}/user-dic.txt +0 -0
  25. data/{ru → dict/ru}/lingo-dic.txt +0 -0
  26. data/{ru → dict/ru}/lingo-mul.txt +0 -0
  27. data/{ru → dict/ru}/lingo-syn.txt +0 -0
  28. data/{ru → dict/ru}/user-dic.txt +0 -0
  29. data/{de.lang → lang/de.lang} +1 -1
  30. data/{en.lang → lang/en.lang} +0 -0
  31. data/{ru.lang → lang/ru.lang} +0 -0
  32. data/lib/lingo.rb +14 -15
  33. data/lib/lingo/app.rb +4 -2
  34. data/lib/lingo/attendee.rb +23 -43
  35. data/lib/lingo/attendee/abbreviator.rb +5 -5
  36. data/lib/lingo/attendee/debugger.rb +39 -12
  37. data/lib/lingo/attendee/decomposer.rb +3 -4
  38. data/lib/lingo/attendee/dehyphenizer.rb +4 -4
  39. data/lib/lingo/attendee/formatter.rb +1 -3
  40. data/lib/lingo/attendee/multi_worder.rb +3 -4
  41. data/lib/lingo/attendee/noneword_filter.rb +8 -12
  42. data/lib/lingo/attendee/object_filter.rb +6 -3
  43. data/lib/lingo/attendee/sequencer.rb +5 -5
  44. data/lib/lingo/attendee/stemmer.rb +3 -2
  45. data/lib/lingo/attendee/synonymer.rb +3 -4
  46. data/lib/lingo/attendee/text_reader.rb +39 -38
  47. data/lib/lingo/attendee/text_writer.rb +10 -10
  48. data/lib/lingo/attendee/tokenizer.rb +63 -33
  49. data/lib/lingo/attendee/variator.rb +3 -7
  50. data/lib/lingo/attendee/vector_filter.rb +132 -65
  51. data/lib/lingo/attendee/word_searcher.rb +5 -3
  52. data/lib/lingo/buffered_attendee.rb +1 -3
  53. data/lib/lingo/call.rb +4 -3
  54. data/lib/lingo/cli.rb +5 -1
  55. data/lib/lingo/config.rb +11 -5
  56. data/lib/lingo/ctl.rb +3 -3
  57. data/lib/lingo/database.rb +3 -1
  58. data/lib/lingo/database/crypter.rb +1 -3
  59. data/lib/lingo/database/source.rb +3 -1
  60. data/lib/lingo/database/source/key_value.rb +3 -1
  61. data/lib/lingo/database/source/multi_key.rb +3 -1
  62. data/lib/lingo/database/source/multi_value.rb +3 -1
  63. data/lib/lingo/database/source/single_word.rb +3 -1
  64. data/lib/lingo/database/source/word_class.rb +3 -1
  65. data/lib/lingo/debug.rb +5 -5
  66. data/lib/lingo/{agenda_item.rb → deferred_attendee.rb} +21 -12
  67. data/lib/lingo/error.rb +1 -1
  68. data/lib/lingo/language.rb +1 -9
  69. data/lib/lingo/language/dictionary.rb +2 -17
  70. data/lib/lingo/language/grammar.rb +10 -10
  71. data/lib/lingo/language/lexical.rb +2 -0
  72. data/lib/lingo/language/lexical_hash.rb +2 -0
  73. data/lib/lingo/language/token.rb +17 -3
  74. data/lib/lingo/language/word.rb +13 -5
  75. data/lib/lingo/language/word_form.rb +5 -3
  76. data/lib/lingo/progress.rb +2 -2
  77. data/lib/lingo/srv.rb +1 -1
  78. data/lib/lingo/srv/lingosrv.cfg +1 -1
  79. data/lib/lingo/version.rb +1 -1
  80. data/lib/lingo/web.rb +1 -1
  81. data/lib/lingo/web/lingoweb.cfg +1 -1
  82. data/test/attendee/ts_abbreviator.rb +4 -2
  83. data/test/attendee/ts_multi_worder.rb +81 -88
  84. data/test/attendee/ts_noneword_filter.rb +2 -2
  85. data/test/attendee/ts_object_filter.rb +2 -2
  86. data/test/attendee/ts_sequencer.rb +40 -20
  87. data/test/attendee/ts_stemmer.rb +52 -26
  88. data/test/attendee/ts_text_reader.rb +75 -56
  89. data/test/attendee/ts_text_writer.rb +6 -4
  90. data/test/attendee/ts_tokenizer.rb +304 -193
  91. data/test/attendee/ts_vector_filter.rb +242 -9
  92. data/test/ref/artikel.non +3 -0
  93. data/test/ref/artikel.vec +1 -4
  94. data/test/ref/artikel.vef +940 -0
  95. data/test/ref/artikel.ven +0 -3
  96. data/test/ref/artikel.ver +0 -3
  97. data/test/ref/artikel.vet +2580 -0
  98. data/test/ref/lir.non +34 -31
  99. data/test/ref/lir.seq +14 -15
  100. data/test/ref/lir.vec +37 -37
  101. data/test/ref/lir.vef +329 -0
  102. data/test/ref/lir.ven +329 -0
  103. data/test/ref/lir.ver +329 -0
  104. data/test/ref/lir.vet +329 -0
  105. data/test/test_helper.rb +29 -16
  106. data/test/ts_language.rb +6 -47
  107. metadata +74 -87
  108. data/lingo.rb +0 -29
  109. data/spec/spec_helper.rb +0 -5
@@ -20,7 +20,8 @@ class TestAttendeeTextWriter < AttendeeTestCase
20
20
  wd('Zeile|IDF'),
21
21
  tk('.|PUNC'),
22
22
  ai('EOL|test/test.txt'),
23
- ai('EOF|test/test.txt')
23
+ ai('EOF|test/test.txt'),
24
+ ai('EOT|')
24
25
  ]
25
26
  end
26
27
 
@@ -50,7 +51,7 @@ class TestAttendeeTextWriter < AttendeeTestCase
50
51
 
51
52
  def test_lir_file
52
53
  meet({ 'ext' => 'vec', 'lir-format' => false }, [
53
- ai('LIR-FORMAT|'), ai('FILE|test/lir.txt'),
54
+ ai('LIR|'), ai('FILE|test/lir.txt'),
54
55
  ai('RECORD|00237'),
55
56
  '020: GERHARD.',
56
57
  '025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
@@ -61,7 +62,8 @@ class TestAttendeeTextWriter < AttendeeTestCase
61
62
  ai('RECORD|00239'),
62
63
  '020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
63
64
  "056: \"Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.",
64
- ai('EOF|test/lir.txt')
65
+ ai('EOF|test/lir.txt'),
66
+ ai('EOT|')
65
67
  ])
66
68
 
67
69
  assert_equal([
@@ -76,7 +78,7 @@ FG-Projekt GERHARD.\n",
76
78
 
77
79
  def test_nonewords
78
80
  meet({ 'ext' => 'non', 'sep' => "\n" }, [
79
- ai('FILE|test/text.txt'), 'Nonwörter', 'Nonsense', ai('EOF|test/text.txt')
81
+ ai('FILE|test/text.txt'), 'Nonwörter', 'Nonsense', ai('EOF|test/text.txt'), ai('EOT|')
80
82
  ])
81
83
 
82
84
  assert_equal([
@@ -1,5 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
+ require_relative '../test_helper'
4
+
3
5
  class TestAttendeeTokenizer < AttendeeTestCase
4
6
 
5
7
  def setup
@@ -19,13 +21,24 @@ class TestAttendeeTokenizer < AttendeeTestCase
19
21
  '',
20
22
  '}}'
21
23
  ]
24
+
25
+ @html = [
26
+ 'test <a>test</a> test',
27
+ '<b>test <a>test</a></b>',
28
+ 'test <a test="test"><b>test</b></a>, test',
29
+ '<a>test</a><b test="test">test</b><a>test</a>'
30
+ ]
22
31
  end
23
32
 
24
33
  def test_basic
25
34
  meet({}, [
26
35
  'Dies ist ein Test.'
27
36
  ], [
28
- tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC')
37
+ tk('Dies|WORD|0|0'),
38
+ tk('ist|WORD|1|5'),
39
+ tk('ein|WORD|2|9'),
40
+ tk('Test|WORD|3|13'),
41
+ tk('.|PUNC|4|17')
29
42
  ])
30
43
  end
31
44
 
@@ -33,208 +46,306 @@ class TestAttendeeTokenizer < AttendeeTestCase
33
46
  meet({}, [
34
47
  '1964 www.vorhauer.de bzw. nasenbär, ()'
35
48
  ], [
36
- tk('1964|NUMS'),
37
- tk('www.vorhauer.de|URLS'),
38
- tk('bzw|WORD'),
39
- tk('.|PUNC'),
40
- tk('nasenbär|WORD'),
41
- tk(',|PUNC'),
42
- tk('(|OTHR'),
43
- tk(')|OTHR')
49
+ tk('1964|NUMS|0|0'),
50
+ tk('www.vorhauer.de|URLS|1|5'),
51
+ tk('bzw|WORD|2|21'),
52
+ tk('.|PUNC|3|24'),
53
+ tk('nasenbär|WORD|4|26'),
54
+ tk(',|PUNC|5|35'),
55
+ tk('(|OTHR|6|37'),
56
+ tk(')|OTHR|7|38')
44
57
  ])
45
58
  end
46
59
 
47
60
  def test_wiki1
48
61
  meet({}, @wiki, [
49
- tk('Test|WORD'),
50
- tk('[|OTHR'),
51
- tk('[|OTHR'),
52
- tk('Link|WORD'),
53
- tk('||OTHR'),
54
- tk('internal|WORD'),
55
- tk('link|WORD'),
56
- tk(']|OTHR'),
57
- tk(']|OTHR'),
58
- tk('and|WORD'),
59
- tk('[|OTHR'),
60
- tk('http://example.com|URLS'),
61
- tk('external|WORD'),
62
- tk('link|WORD'),
63
- tk(']|OTHR'),
64
- tk('.|PUNC'),
65
- tk('Try|WORD'),
66
- tk('_|OTHR'),
67
- tk('_|OTHR'),
68
- tk('MAGIC|WORD'),
69
- tk('_|OTHR'),
70
- tk('_|OTHR'),
71
- tk('with|WORD'),
72
- tk('[|OTHR'),
73
- tk('[|OTHR'),
74
- tk('Multiline|WORD'),
75
- tk('link|WORD'),
76
- tk('(|OTHR'),
77
- tk('because|WORD'),
78
- tk('we|WORD'),
79
- tk('can|WORD'),
80
- tk(')|OTHR'),
81
- tk(']|OTHR'),
82
- tk(']|OTHR'),
83
- tk('.|PUNC'),
84
- tk('[|OTHR'),
85
- tk('[|OTHR'),
86
- tk('Category|WORD'),
87
- tk(':|PUNC'),
88
- tk('cat1|WORD'),
89
- tk(']|OTHR'),
90
- tk(']|OTHR'),
91
- tk('Link|WORD'),
92
- tk('to|WORD'),
93
- tk('[|OTHR'),
94
- tk('[|OTHR'),
95
- tk(':|PUNC'),
96
- tk('Category|WORD'),
97
- tk(':|PUNC'),
98
- tk('cat2|WORD'),
99
- tk(']|OTHR'),
100
- tk(']|OTHR'),
101
- tk('.|PUNC'),
102
- tk('=|OTHR'),
103
- tk('=|OTHR'),
104
- tk('Heading|WORD'),
105
- tk('=|OTHR'),
106
- tk('=|OTHR'),
107
- tk('{|OTHR'),
108
- tk('{|OTHR'),
109
- tk('Template|WORD'),
110
- tk('}|OTHR'),
111
- tk('}|OTHR'),
112
- tk('Function|WORD'),
113
- tk('with|WORD'),
114
- tk('{|OTHR'),
115
- tk('{|OTHR'),
116
- tk('#|OTHR'),
117
- tk('func|WORD'),
118
- tk('||OTHR'),
119
- tk('param|WORD'),
120
- tk('||OTHR'),
121
- tk('{|OTHR'),
122
- tk('{|OTHR'),
123
- tk('{|OTHR'),
124
- tk('var|WORD'),
125
- tk('}|OTHR'),
126
- tk('}|OTHR'),
127
- tk('}|OTHR'),
128
- tk('}|OTHR'),
129
- tk('}|OTHR'),
130
- tk('and|WORD'),
131
- tk('<|OTHR'),
132
- tk('nowiki|WORD'),
133
- tk('>|OTHR'),
134
- tk('{|OTHR'),
135
- tk('{|OTHR'),
136
- tk('{|OTHR'),
137
- tk('var|WORD'),
138
- tk('}|OTHR'),
139
- tk('}|OTHR'),
140
- tk('}|OTHR'),
141
- tk('<|OTHR'),
142
- tk('/|OTHR'),
143
- tk('nowiki|WORD'),
144
- tk('>|OTHR'),
145
- tk('!|PUNC'),
146
- tk('{|OTHR'),
147
- tk('{|OTHR'),
148
- tk('Multi|WORD'),
149
- tk('||OTHR'),
150
- tk('line|WORD'),
151
- tk('=|OTHR'),
152
- tk('1|NUMS'),
153
- tk('||OTHR'),
154
- tk('[|OTHR'),
155
- tk('[|OTHR'),
156
- tk('link|WORD'),
157
- tk(']|OTHR'),
158
- tk(']|OTHR'),
159
- tk('||OTHR'),
160
- tk('{|OTHR'),
161
- tk('{|OTHR'),
162
- tk('{|OTHR'),
163
- tk('var|WORD'),
164
- tk('}|OTHR'),
165
- tk('}|OTHR'),
166
- tk('}|OTHR'),
167
- tk('}|OTHR'),
168
- tk('}|OTHR')
62
+ tk('Test|WORD|0|0'),
63
+ tk('[|OTHR|1|5'),
64
+ tk('[|OTHR|2|6'),
65
+ tk('Link|WORD|3|7'),
66
+ tk('||OTHR|4|11'),
67
+ tk('internal|WORD|5|12'),
68
+ tk('link|WORD|6|21'),
69
+ tk(']|OTHR|7|25'),
70
+ tk(']|OTHR|8|26'),
71
+ tk('and|WORD|9|28'),
72
+ tk('[|OTHR|10|32'),
73
+ tk('http://example.com|URLS|11|33'),
74
+ tk('external|WORD|12|52'),
75
+ tk('link|WORD|13|61'),
76
+ tk(']|OTHR|14|65'),
77
+ tk('.|PUNC|15|66'),
78
+ tk('Try|WORD|16|67'),
79
+ tk('_|OTHR|17|71'),
80
+ tk('_|OTHR|18|72'),
81
+ tk('MAGIC|WORD|19|73'),
82
+ tk('_|OTHR|20|78'),
83
+ tk('_|OTHR|21|79'),
84
+ tk('with|WORD|22|81'),
85
+ tk('[|OTHR|23|86'),
86
+ tk('[|OTHR|24|87'),
87
+ tk('Multiline|WORD|25|88'),
88
+ tk('link|WORD|26|97'),
89
+ tk('(|OTHR|27|102'),
90
+ tk('because|WORD|28|103'),
91
+ tk('we|WORD|29|111'),
92
+ tk('can|WORD|30|114'),
93
+ tk(')|OTHR|31|117'),
94
+ tk(']|OTHR|32|118'),
95
+ tk(']|OTHR|33|119'),
96
+ tk('.|PUNC|34|120'),
97
+ tk('[|OTHR|35|121'),
98
+ tk('[|OTHR|36|122'),
99
+ tk('Category|WORD|37|123'),
100
+ tk(':|PUNC|38|131'),
101
+ tk('cat1|WORD|39|132'),
102
+ tk(']|OTHR|40|136'),
103
+ tk(']|OTHR|41|137'),
104
+ tk('Link|WORD|42|138'),
105
+ tk('to|WORD|43|143'),
106
+ tk('[|OTHR|44|146'),
107
+ tk('[|OTHR|45|147'),
108
+ tk(':|PUNC|46|148'),
109
+ tk('Category|WORD|47|149'),
110
+ tk(':|PUNC|48|157'),
111
+ tk('cat2|WORD|49|158'),
112
+ tk(']|OTHR|50|162'),
113
+ tk(']|OTHR|51|163'),
114
+ tk('.|PUNC|52|164'),
115
+ tk('=|OTHR|53|165'),
116
+ tk('=|OTHR|54|166'),
117
+ tk('Heading|WORD|55|168'),
118
+ tk('=|OTHR|56|176'),
119
+ tk('=|OTHR|57|177'),
120
+ tk('{|OTHR|58|178'),
121
+ tk('{|OTHR|59|179'),
122
+ tk('Template|WORD|60|180'),
123
+ tk('}|OTHR|61|188'),
124
+ tk('}|OTHR|62|189'),
125
+ tk('Function|WORD|63|190'),
126
+ tk('with|WORD|64|199'),
127
+ tk('{|OTHR|65|204'),
128
+ tk('{|OTHR|66|205'),
129
+ tk('#|OTHR|67|206'),
130
+ tk('func|WORD|68|207'),
131
+ tk('||OTHR|69|211'),
132
+ tk('param|WORD|70|212'),
133
+ tk('||OTHR|71|217'),
134
+ tk('{|OTHR|72|218'),
135
+ tk('{|OTHR|73|219'),
136
+ tk('{|OTHR|74|220'),
137
+ tk('var|WORD|75|221'),
138
+ tk('}|OTHR|76|224'),
139
+ tk('}|OTHR|77|225'),
140
+ tk('}|OTHR|78|226'),
141
+ tk('}|OTHR|79|227'),
142
+ tk('}|OTHR|80|228'),
143
+ tk('and|WORD|81|230'),
144
+ tk('<|OTHR|82|234'),
145
+ tk('nowiki|WORD|83|235'),
146
+ tk('>|OTHR|84|241'),
147
+ tk('{|OTHR|85|242'),
148
+ tk('{|OTHR|86|243'),
149
+ tk('{|OTHR|87|244'),
150
+ tk('var|WORD|88|245'),
151
+ tk('}|OTHR|89|248'),
152
+ tk('}|OTHR|90|249'),
153
+ tk('}|OTHR|91|250'),
154
+ tk('<|OTHR|92|251'),
155
+ tk('/|OTHR|93|252'),
156
+ tk('nowiki|WORD|94|253'),
157
+ tk('>|OTHR|95|259'),
158
+ tk('!|PUNC|96|260'),
159
+ tk('{|OTHR|97|261'),
160
+ tk('{|OTHR|98|262'),
161
+ tk('Multi|WORD|99|263'),
162
+ tk('||OTHR|100|269'),
163
+ tk('line|WORD|101|271'),
164
+ tk('=|OTHR|102|275'),
165
+ tk('1|NUMS|103|276'),
166
+ tk('||OTHR|104|278'),
167
+ tk('[|OTHR|105|280'),
168
+ tk('[|OTHR|106|281'),
169
+ tk('link|WORD|107|282'),
170
+ tk(']|OTHR|108|286'),
171
+ tk(']|OTHR|109|287'),
172
+ tk('||OTHR|110|289'),
173
+ tk('{|OTHR|111|291'),
174
+ tk('{|OTHR|112|292'),
175
+ tk('{|OTHR|113|293'),
176
+ tk('var|WORD|114|294'),
177
+ tk('}|OTHR|115|297'),
178
+ tk('}|OTHR|116|298'),
179
+ tk('}|OTHR|117|299'),
180
+ tk('}|OTHR|118|300'),
181
+ tk('}|OTHR|119|301')
169
182
  ])
170
183
  end
171
184
 
172
185
  def test_wiki2
173
186
  meet({ 'space' => true, 'tags' => true, 'wiki' => true }, @wiki, [
174
- tk('Test|WORD'),
175
- tk(' |SPAC'),
176
- tk('[[|WIKI'),
177
- tk('Link|internal link]]|WIKI'),
178
- tk(' |SPAC'),
179
- tk('and|WORD'),
180
- tk(' |SPAC'),
181
- tk('[http://|WIKI'),
182
- tk('example.com external link]|WIKI'),
183
- tk('.|PUNC'),
184
- tk('Try|WORD'),
185
- tk(' |SPAC'),
186
- tk('__MAGIC__|WIKI'),
187
- tk(' |SPAC'),
188
- tk('with|WORD'),
189
- tk(' |SPAC'),
190
- tk('[[|WIKI'),
191
- tk('Multiline|WIKI'),
192
- tk('link (because we can)]]|WIKI'),
193
- tk('.|PUNC'),
194
- tk('[[|WIKI'),
195
- tk('Category:cat1]]|WIKI'),
196
- tk('Link|WORD'),
197
- tk(' |SPAC'),
198
- tk('to|WORD'),
199
- tk(' |SPAC'),
200
- tk('[[|WIKI'),
201
- tk(':Category:cat2]]|WIKI'),
202
- tk('.|PUNC'),
203
- tk('== Heading ==|WIKI'),
204
- tk('{{|WIKI'),
205
- tk('Template}}|WIKI'),
206
- tk('Function|WORD'),
207
- tk(' |SPAC'),
208
- tk('with|WORD'),
209
- tk(' |SPAC'),
210
- tk('{{|WIKI'),
211
- tk('#func|param||WIKI'),
212
- tk('{{{|WIKI'),
213
- tk('var}}}|WIKI'),
214
- tk('}}|WIKI'),
215
- tk(' |SPAC'),
216
- tk('and|WORD'),
217
- tk(' |SPAC'),
218
- tk('<|HTML'),
219
- tk('nowiki>|HTML'),
220
- tk('{{{|WIKI'),
221
- tk('var}}}|WIKI'),
222
- tk('<|HTML'),
223
- tk('/nowiki>|HTML'),
224
- tk('!|PUNC'),
225
- tk('{{|WIKI'),
226
- tk('Multi|WIKI'),
227
- tk(' | line=1|WIKI'),
228
- tk(' | |WIKI'),
229
- tk('[[|WIKI'),
230
- tk('link]]|WIKI'),
231
- tk('|WIKI'),
232
- tk(' | |WIKI'),
233
- tk('{{{|WIKI'),
234
- tk('var}}}|WIKI'),
235
- tk('|WIKI'),
236
- tk('|WIKI'),
237
- tk('}}|WIKI')
187
+ tk('Test|WORD|0|0'),
188
+ tk(' |SPAC|1|4'),
189
+ tk('[[|WIKI|2|5'),
190
+ tk('Link|internal link]]|WIKI|3|7'),
191
+ tk(' |SPAC|4|27'),
192
+ tk('and|WORD|5|28'),
193
+ tk(' |SPAC|6|31'),
194
+ tk('[http://|WIKI|7|32'),
195
+ tk('example.com external link]|WIKI|8|40'),
196
+ tk('.|PUNC|9|66'),
197
+ tk('Try|WORD|10|67'),
198
+ tk(' |SPAC|11|70'),
199
+ tk('__MAGIC__|WIKI|12|71'),
200
+ tk(' |SPAC|13|80'),
201
+ tk('with|WORD|14|81'),
202
+ tk(' |SPAC|15|85'),
203
+ tk('[[|WIKI|16|86'),
204
+ tk('Multiline|WIKI|17|88'),
205
+ tk('link (because we can)]]|WIKI|18|97'),
206
+ tk('.|PUNC|19|120'),
207
+ tk('[[|WIKI|20|121'),
208
+ tk('Category:cat1]]|WIKI|21|123'),
209
+ tk('Link|WORD|22|138'),
210
+ tk(' |SPAC|23|142'),
211
+ tk('to|WORD|24|143'),
212
+ tk(' |SPAC|25|145'),
213
+ tk('[[|WIKI|26|146'),
214
+ tk(':Category:cat2]]|WIKI|27|148'),
215
+ tk('.|PUNC|28|164'),
216
+ tk('== Heading ==|WIKI|29|165'),
217
+ tk('{{|WIKI|30|178'),
218
+ tk('Template}}|WIKI|31|180'),
219
+ tk('Function|WORD|32|190'),
220
+ tk(' |SPAC|33|198'),
221
+ tk('with|WORD|34|199'),
222
+ tk(' |SPAC|35|203'),
223
+ tk('{{|WIKI|36|204'),
224
+ tk('#func|param||WIKI|37|206'),
225
+ tk('{{{|WIKI|38|218'),
226
+ tk('var}}}|WIKI|39|221'),
227
+ tk('}}|WIKI|40|227'),
228
+ tk(' |SPAC|41|229'),
229
+ tk('and|WORD|42|230'),
230
+ tk(' |SPAC|43|233'),
231
+ tk('<|HTML|44|234'),
232
+ tk('nowiki>|HTML|45|235'),
233
+ tk('{{{|WIKI|46|242'),
234
+ tk('var}}}|WIKI|47|245'),
235
+ tk('<|HTML|48|251'),
236
+ tk('/nowiki>|HTML|49|252'),
237
+ tk('!|PUNC|50|260'),
238
+ tk('{{|WIKI|51|261'),
239
+ tk('Multi|WIKI|52|263'),
240
+ tk(' | line=1|WIKI|53|268'),
241
+ tk(' | |WIKI|54|277'),
242
+ tk('[[|WIKI|55|280'),
243
+ tk('link]]|WIKI|56|282'),
244
+ tk('|WIKI|57|288'),
245
+ tk(' | |WIKI|58|288'),
246
+ tk('{{{|WIKI|59|291'),
247
+ tk('var}}}|WIKI|60|294'),
248
+ tk('|WIKI|61|300'),
249
+ tk('|WIKI|62|300'),
250
+ tk('}}|WIKI|63|300')
251
+ ])
252
+ end
253
+
254
+ def test_html1
255
+ meet({ 'tags' => true }, @html, [
256
+ tk('test|WORD|0|0'),
257
+ tk('<|HTML|1|5'),
258
+ tk('a>|HTML|2|6'),
259
+ tk('test|WORD|3|8'),
260
+ tk('<|HTML|4|12'),
261
+ tk('/a>|HTML|5|13'),
262
+ tk('test|WORD|6|17'),
263
+ tk('<|HTML|7|21'),
264
+ tk('b>|HTML|8|22'),
265
+ tk('test|WORD|9|24'),
266
+ tk('<|HTML|10|29'),
267
+ tk('a>|HTML|11|30'),
268
+ tk('test|WORD|12|32'),
269
+ tk('<|HTML|13|36'),
270
+ tk('/a>|HTML|14|37'),
271
+ tk('<|HTML|15|40'),
272
+ tk('/b>|HTML|16|41'),
273
+ tk('test|WORD|17|44'),
274
+ tk('<|HTML|18|49'),
275
+ tk('a test="test">|HTML|19|50'),
276
+ tk('<|HTML|20|64'),
277
+ tk('b>|HTML|21|65'),
278
+ tk('test|WORD|22|67'),
279
+ tk('<|HTML|23|71'),
280
+ tk('/b>|HTML|24|72'),
281
+ tk('<|HTML|25|75'),
282
+ tk('/a>|HTML|26|76'),
283
+ tk(',|PUNC|27|79'),
284
+ tk('test|WORD|28|81'),
285
+ tk('<|HTML|29|85'),
286
+ tk('a>|HTML|30|86'),
287
+ tk('test|WORD|31|88'),
288
+ tk('<|HTML|32|92'),
289
+ tk('/a>|HTML|33|93'),
290
+ tk('<|HTML|34|96'),
291
+ tk('b test="test">|HTML|35|97'),
292
+ tk('test|WORD|36|111'),
293
+ tk('<|HTML|37|115'),
294
+ tk('/b>|HTML|38|116'),
295
+ tk('<|HTML|39|119'),
296
+ tk('a>|HTML|40|120'),
297
+ tk('test|WORD|41|122'),
298
+ tk('<|HTML|42|126'),
299
+ tk('/a>|HTML|43|127')
300
+ ])
301
+ end
302
+
303
+ def test_html2
304
+ meet({ 'skip-tags' => 'a' }, @html, [
305
+ tk('test|WORD|0|0'),
306
+ tk('<|SKIP|1|5'),
307
+ tk('a>|SKIP|2|6'),
308
+ tk('test|SKIP|3|8'),
309
+ tk('<|SKIP|4|12'),
310
+ tk('/a>|SKIP|5|13'),
311
+ tk('test|WORD|6|17'),
312
+ tk('<|HTML|7|21'),
313
+ tk('b>|HTML|8|22'),
314
+ tk('test|WORD|9|24'),
315
+ tk('<|SKIP|10|29'),
316
+ tk('a>|SKIP|11|30'),
317
+ tk('test|SKIP|12|32'),
318
+ tk('<|SKIP|13|36'),
319
+ tk('/a>|SKIP|14|37'),
320
+ tk('<|HTML|15|40'),
321
+ tk('/b>|HTML|16|41'),
322
+ tk('test|WORD|17|44'),
323
+ tk('<|SKIP|18|49'),
324
+ tk('a test="test">|SKIP|19|50'),
325
+ tk('<|SKIP|20|64'),
326
+ tk('b>|SKIP|21|65'),
327
+ tk('test|SKIP|22|67'),
328
+ tk('<|SKIP|23|71'),
329
+ tk('/b>|SKIP|24|72'),
330
+ tk('<|SKIP|25|75'),
331
+ tk('/a>|SKIP|26|76'),
332
+ tk(',|PUNC|27|79'),
333
+ tk('test|WORD|28|81'),
334
+ tk('<|SKIP|29|85'),
335
+ tk('a>|SKIP|30|86'),
336
+ tk('test|SKIP|31|88'),
337
+ tk('<|SKIP|32|92'),
338
+ tk('/a>|SKIP|33|93'),
339
+ tk('<|HTML|34|96'),
340
+ tk('b test="test">|HTML|35|97'),
341
+ tk('test|WORD|36|111'),
342
+ tk('<|HTML|37|115'),
343
+ tk('/b>|HTML|38|116'),
344
+ tk('<|SKIP|39|119'),
345
+ tk('a>|SKIP|40|120'),
346
+ tk('test|SKIP|41|122'),
347
+ tk('<|SKIP|42|126'),
348
+ tk('/a>|SKIP|43|127')
238
349
  ])
239
350
  end
240
351