lingo 1.8.5 → 1.8.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +25 -0
- data/README +7 -5
- data/Rakefile +58 -55
- data/{lingo-call.cfg → config/lingo-call.cfg} +1 -1
- data/{lingo.cfg → config/lingo.cfg} +10 -2
- data/{lir.cfg → config/lir.cfg} +10 -2
- data/{de → dict/de}/lingo-abk.txt +0 -0
- data/{de → dict/de}/lingo-dic.txt +0 -0
- data/{de → dict/de}/lingo-mul.txt +0 -0
- data/{de → dict/de}/lingo-syn.txt +0 -0
- data/{de → dict/de}/test_dic.txt +0 -0
- data/{de → dict/de}/test_gen.txt +0 -0
- data/{de → dict/de}/test_mu2.txt +0 -0
- data/{de → dict/de}/test_mul.txt +0 -0
- data/{de → dict/de}/test_sgw.txt +0 -0
- data/{de → dict/de}/test_syn.txt +0 -0
- data/{de → dict/de}/user-dic.txt +0 -0
- data/{en → dict/en}/lingo-dic.txt +0 -0
- data/{en → dict/en}/lingo-irr.txt +0 -0
- data/{en → dict/en}/lingo-mul.txt +0 -0
- data/{en → dict/en}/lingo-syn.txt +0 -0
- data/{en → dict/en}/lingo-wdn.txt +0 -0
- data/{en → dict/en}/user-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-mul.txt +0 -0
- data/{ru → dict/ru}/lingo-syn.txt +0 -0
- data/{ru → dict/ru}/user-dic.txt +0 -0
- data/{de.lang → lang/de.lang} +1 -1
- data/{en.lang → lang/en.lang} +0 -0
- data/{ru.lang → lang/ru.lang} +0 -0
- data/lib/lingo.rb +14 -15
- data/lib/lingo/app.rb +4 -2
- data/lib/lingo/attendee.rb +23 -43
- data/lib/lingo/attendee/abbreviator.rb +5 -5
- data/lib/lingo/attendee/debugger.rb +39 -12
- data/lib/lingo/attendee/decomposer.rb +3 -4
- data/lib/lingo/attendee/dehyphenizer.rb +4 -4
- data/lib/lingo/attendee/formatter.rb +1 -3
- data/lib/lingo/attendee/multi_worder.rb +3 -4
- data/lib/lingo/attendee/noneword_filter.rb +8 -12
- data/lib/lingo/attendee/object_filter.rb +6 -3
- data/lib/lingo/attendee/sequencer.rb +5 -5
- data/lib/lingo/attendee/stemmer.rb +3 -2
- data/lib/lingo/attendee/synonymer.rb +3 -4
- data/lib/lingo/attendee/text_reader.rb +39 -38
- data/lib/lingo/attendee/text_writer.rb +10 -10
- data/lib/lingo/attendee/tokenizer.rb +63 -33
- data/lib/lingo/attendee/variator.rb +3 -7
- data/lib/lingo/attendee/vector_filter.rb +132 -65
- data/lib/lingo/attendee/word_searcher.rb +5 -3
- data/lib/lingo/buffered_attendee.rb +1 -3
- data/lib/lingo/call.rb +4 -3
- data/lib/lingo/cli.rb +5 -1
- data/lib/lingo/config.rb +11 -5
- data/lib/lingo/ctl.rb +3 -3
- data/lib/lingo/database.rb +3 -1
- data/lib/lingo/database/crypter.rb +1 -3
- data/lib/lingo/database/source.rb +3 -1
- data/lib/lingo/database/source/key_value.rb +3 -1
- data/lib/lingo/database/source/multi_key.rb +3 -1
- data/lib/lingo/database/source/multi_value.rb +3 -1
- data/lib/lingo/database/source/single_word.rb +3 -1
- data/lib/lingo/database/source/word_class.rb +3 -1
- data/lib/lingo/debug.rb +5 -5
- data/lib/lingo/{agenda_item.rb → deferred_attendee.rb} +21 -12
- data/lib/lingo/error.rb +1 -1
- data/lib/lingo/language.rb +1 -9
- data/lib/lingo/language/dictionary.rb +2 -17
- data/lib/lingo/language/grammar.rb +10 -10
- data/lib/lingo/language/lexical.rb +2 -0
- data/lib/lingo/language/lexical_hash.rb +2 -0
- data/lib/lingo/language/token.rb +17 -3
- data/lib/lingo/language/word.rb +13 -5
- data/lib/lingo/language/word_form.rb +5 -3
- data/lib/lingo/progress.rb +2 -2
- data/lib/lingo/srv.rb +1 -1
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web.rb +1 -1
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/test/attendee/ts_abbreviator.rb +4 -2
- data/test/attendee/ts_multi_worder.rb +81 -88
- data/test/attendee/ts_noneword_filter.rb +2 -2
- data/test/attendee/ts_object_filter.rb +2 -2
- data/test/attendee/ts_sequencer.rb +40 -20
- data/test/attendee/ts_stemmer.rb +52 -26
- data/test/attendee/ts_text_reader.rb +75 -56
- data/test/attendee/ts_text_writer.rb +6 -4
- data/test/attendee/ts_tokenizer.rb +304 -193
- data/test/attendee/ts_vector_filter.rb +242 -9
- data/test/ref/artikel.non +3 -0
- data/test/ref/artikel.vec +1 -4
- data/test/ref/artikel.vef +940 -0
- data/test/ref/artikel.ven +0 -3
- data/test/ref/artikel.ver +0 -3
- data/test/ref/artikel.vet +2580 -0
- data/test/ref/lir.non +34 -31
- data/test/ref/lir.seq +14 -15
- data/test/ref/lir.vec +37 -37
- data/test/ref/lir.vef +329 -0
- data/test/ref/lir.ven +329 -0
- data/test/ref/lir.ver +329 -0
- data/test/ref/lir.vet +329 -0
- data/test/test_helper.rb +29 -16
- data/test/ts_language.rb +6 -47
- metadata +74 -87
- data/lingo.rb +0 -29
- data/spec/spec_helper.rb +0 -5
@@ -20,7 +20,8 @@ class TestAttendeeTextWriter < AttendeeTestCase
|
|
20
20
|
wd('Zeile|IDF'),
|
21
21
|
tk('.|PUNC'),
|
22
22
|
ai('EOL|test/test.txt'),
|
23
|
-
ai('EOF|test/test.txt')
|
23
|
+
ai('EOF|test/test.txt'),
|
24
|
+
ai('EOT|')
|
24
25
|
]
|
25
26
|
end
|
26
27
|
|
@@ -50,7 +51,7 @@ class TestAttendeeTextWriter < AttendeeTestCase
|
|
50
51
|
|
51
52
|
def test_lir_file
|
52
53
|
meet({ 'ext' => 'vec', 'lir-format' => false }, [
|
53
|
-
ai('LIR
|
54
|
+
ai('LIR|'), ai('FILE|test/lir.txt'),
|
54
55
|
ai('RECORD|00237'),
|
55
56
|
'020: GERHARD.',
|
56
57
|
'025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
|
@@ -61,7 +62,8 @@ class TestAttendeeTextWriter < AttendeeTestCase
|
|
61
62
|
ai('RECORD|00239'),
|
62
63
|
'020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
|
63
64
|
"056: \"Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.",
|
64
|
-
ai('EOF|test/lir.txt')
|
65
|
+
ai('EOF|test/lir.txt'),
|
66
|
+
ai('EOT|')
|
65
67
|
])
|
66
68
|
|
67
69
|
assert_equal([
|
@@ -76,7 +78,7 @@ FG-Projekt GERHARD.\n",
|
|
76
78
|
|
77
79
|
def test_nonewords
|
78
80
|
meet({ 'ext' => 'non', 'sep' => "\n" }, [
|
79
|
-
ai('FILE|test/text.txt'), 'Nonwörter', 'Nonsense', ai('EOF|test/text.txt')
|
81
|
+
ai('FILE|test/text.txt'), 'Nonwörter', 'Nonsense', ai('EOF|test/text.txt'), ai('EOT|')
|
80
82
|
])
|
81
83
|
|
82
84
|
assert_equal([
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require_relative '../test_helper'
|
4
|
+
|
3
5
|
class TestAttendeeTokenizer < AttendeeTestCase
|
4
6
|
|
5
7
|
def setup
|
@@ -19,13 +21,24 @@ class TestAttendeeTokenizer < AttendeeTestCase
|
|
19
21
|
'',
|
20
22
|
'}}'
|
21
23
|
]
|
24
|
+
|
25
|
+
@html = [
|
26
|
+
'test <a>test</a> test',
|
27
|
+
'<b>test <a>test</a></b>',
|
28
|
+
'test <a test="test"><b>test</b></a>, test',
|
29
|
+
'<a>test</a><b test="test">test</b><a>test</a>'
|
30
|
+
]
|
22
31
|
end
|
23
32
|
|
24
33
|
def test_basic
|
25
34
|
meet({}, [
|
26
35
|
'Dies ist ein Test.'
|
27
36
|
], [
|
28
|
-
tk('Dies|WORD
|
37
|
+
tk('Dies|WORD|0|0'),
|
38
|
+
tk('ist|WORD|1|5'),
|
39
|
+
tk('ein|WORD|2|9'),
|
40
|
+
tk('Test|WORD|3|13'),
|
41
|
+
tk('.|PUNC|4|17')
|
29
42
|
])
|
30
43
|
end
|
31
44
|
|
@@ -33,208 +46,306 @@ class TestAttendeeTokenizer < AttendeeTestCase
|
|
33
46
|
meet({}, [
|
34
47
|
'1964 www.vorhauer.de bzw. nasenbär, ()'
|
35
48
|
], [
|
36
|
-
tk('1964|NUMS'),
|
37
|
-
tk('www.vorhauer.de|URLS'),
|
38
|
-
tk('bzw|WORD'),
|
39
|
-
tk('.|PUNC'),
|
40
|
-
tk('nasenbär|WORD'),
|
41
|
-
tk(',|PUNC'),
|
42
|
-
tk('(|OTHR'),
|
43
|
-
tk(')|OTHR')
|
49
|
+
tk('1964|NUMS|0|0'),
|
50
|
+
tk('www.vorhauer.de|URLS|1|5'),
|
51
|
+
tk('bzw|WORD|2|21'),
|
52
|
+
tk('.|PUNC|3|24'),
|
53
|
+
tk('nasenbär|WORD|4|26'),
|
54
|
+
tk(',|PUNC|5|35'),
|
55
|
+
tk('(|OTHR|6|37'),
|
56
|
+
tk(')|OTHR|7|38')
|
44
57
|
])
|
45
58
|
end
|
46
59
|
|
47
60
|
def test_wiki1
|
48
61
|
meet({}, @wiki, [
|
49
|
-
tk('Test|WORD'),
|
50
|
-
tk('[|OTHR'),
|
51
|
-
tk('[|OTHR'),
|
52
|
-
tk('Link|WORD'),
|
53
|
-
tk('||OTHR'),
|
54
|
-
tk('internal|WORD'),
|
55
|
-
tk('link|WORD'),
|
56
|
-
tk(']|OTHR'),
|
57
|
-
tk(']|OTHR'),
|
58
|
-
tk('and|WORD'),
|
59
|
-
tk('[|OTHR'),
|
60
|
-
tk('http://example.com|URLS'),
|
61
|
-
tk('external|WORD'),
|
62
|
-
tk('link|WORD'),
|
63
|
-
tk(']|OTHR'),
|
64
|
-
tk('.|PUNC'),
|
65
|
-
tk('Try|WORD'),
|
66
|
-
tk('_|OTHR'),
|
67
|
-
tk('_|OTHR'),
|
68
|
-
tk('MAGIC|WORD'),
|
69
|
-
tk('_|OTHR'),
|
70
|
-
tk('_|OTHR'),
|
71
|
-
tk('with|WORD'),
|
72
|
-
tk('[|OTHR'),
|
73
|
-
tk('[|OTHR'),
|
74
|
-
tk('Multiline|WORD'),
|
75
|
-
tk('link|WORD'),
|
76
|
-
tk('(|OTHR'),
|
77
|
-
tk('because|WORD'),
|
78
|
-
tk('we|WORD'),
|
79
|
-
tk('can|WORD'),
|
80
|
-
tk(')|OTHR'),
|
81
|
-
tk(']|OTHR'),
|
82
|
-
tk(']|OTHR'),
|
83
|
-
tk('.|PUNC'),
|
84
|
-
tk('[|OTHR'),
|
85
|
-
tk('[|OTHR'),
|
86
|
-
tk('Category|WORD'),
|
87
|
-
tk(':|PUNC'),
|
88
|
-
tk('cat1|WORD'),
|
89
|
-
tk(']|OTHR'),
|
90
|
-
tk(']|OTHR'),
|
91
|
-
tk('Link|WORD'),
|
92
|
-
tk('to|WORD'),
|
93
|
-
tk('[|OTHR'),
|
94
|
-
tk('[|OTHR'),
|
95
|
-
tk(':|PUNC'),
|
96
|
-
tk('Category|WORD'),
|
97
|
-
tk(':|PUNC'),
|
98
|
-
tk('cat2|WORD'),
|
99
|
-
tk(']|OTHR'),
|
100
|
-
tk(']|OTHR'),
|
101
|
-
tk('.|PUNC'),
|
102
|
-
tk('=|OTHR'),
|
103
|
-
tk('=|OTHR'),
|
104
|
-
tk('Heading|WORD'),
|
105
|
-
tk('=|OTHR'),
|
106
|
-
tk('=|OTHR'),
|
107
|
-
tk('{|OTHR'),
|
108
|
-
tk('{|OTHR'),
|
109
|
-
tk('Template|WORD'),
|
110
|
-
tk('}|OTHR'),
|
111
|
-
tk('}|OTHR'),
|
112
|
-
tk('Function|WORD'),
|
113
|
-
tk('with|WORD'),
|
114
|
-
tk('{|OTHR'),
|
115
|
-
tk('{|OTHR'),
|
116
|
-
tk('#|OTHR'),
|
117
|
-
tk('func|WORD'),
|
118
|
-
tk('||OTHR'),
|
119
|
-
tk('param|WORD'),
|
120
|
-
tk('||OTHR'),
|
121
|
-
tk('{|OTHR'),
|
122
|
-
tk('{|OTHR'),
|
123
|
-
tk('{|OTHR'),
|
124
|
-
tk('var|WORD'),
|
125
|
-
tk('}|OTHR'),
|
126
|
-
tk('}|OTHR'),
|
127
|
-
tk('}|OTHR'),
|
128
|
-
tk('}|OTHR'),
|
129
|
-
tk('}|OTHR'),
|
130
|
-
tk('and|WORD'),
|
131
|
-
tk('<|OTHR'),
|
132
|
-
tk('nowiki|WORD'),
|
133
|
-
tk('>|OTHR'),
|
134
|
-
tk('{|OTHR'),
|
135
|
-
tk('{|OTHR'),
|
136
|
-
tk('{|OTHR'),
|
137
|
-
tk('var|WORD'),
|
138
|
-
tk('}|OTHR'),
|
139
|
-
tk('}|OTHR'),
|
140
|
-
tk('}|OTHR'),
|
141
|
-
tk('<|OTHR'),
|
142
|
-
tk('/|OTHR'),
|
143
|
-
tk('nowiki|WORD'),
|
144
|
-
tk('>|OTHR'),
|
145
|
-
tk('!|PUNC'),
|
146
|
-
tk('{|OTHR'),
|
147
|
-
tk('{|OTHR'),
|
148
|
-
tk('Multi|WORD'),
|
149
|
-
tk('||OTHR'),
|
150
|
-
tk('line|WORD'),
|
151
|
-
tk('=|OTHR'),
|
152
|
-
tk('1|NUMS'),
|
153
|
-
tk('||OTHR'),
|
154
|
-
tk('[|OTHR'),
|
155
|
-
tk('[|OTHR'),
|
156
|
-
tk('link|WORD'),
|
157
|
-
tk(']|OTHR'),
|
158
|
-
tk(']|OTHR'),
|
159
|
-
tk('||OTHR'),
|
160
|
-
tk('{|OTHR'),
|
161
|
-
tk('{|OTHR'),
|
162
|
-
tk('{|OTHR'),
|
163
|
-
tk('var|WORD'),
|
164
|
-
tk('}|OTHR'),
|
165
|
-
tk('}|OTHR'),
|
166
|
-
tk('}|OTHR'),
|
167
|
-
tk('}|OTHR'),
|
168
|
-
tk('}|OTHR')
|
62
|
+
tk('Test|WORD|0|0'),
|
63
|
+
tk('[|OTHR|1|5'),
|
64
|
+
tk('[|OTHR|2|6'),
|
65
|
+
tk('Link|WORD|3|7'),
|
66
|
+
tk('||OTHR|4|11'),
|
67
|
+
tk('internal|WORD|5|12'),
|
68
|
+
tk('link|WORD|6|21'),
|
69
|
+
tk(']|OTHR|7|25'),
|
70
|
+
tk(']|OTHR|8|26'),
|
71
|
+
tk('and|WORD|9|28'),
|
72
|
+
tk('[|OTHR|10|32'),
|
73
|
+
tk('http://example.com|URLS|11|33'),
|
74
|
+
tk('external|WORD|12|52'),
|
75
|
+
tk('link|WORD|13|61'),
|
76
|
+
tk(']|OTHR|14|65'),
|
77
|
+
tk('.|PUNC|15|66'),
|
78
|
+
tk('Try|WORD|16|67'),
|
79
|
+
tk('_|OTHR|17|71'),
|
80
|
+
tk('_|OTHR|18|72'),
|
81
|
+
tk('MAGIC|WORD|19|73'),
|
82
|
+
tk('_|OTHR|20|78'),
|
83
|
+
tk('_|OTHR|21|79'),
|
84
|
+
tk('with|WORD|22|81'),
|
85
|
+
tk('[|OTHR|23|86'),
|
86
|
+
tk('[|OTHR|24|87'),
|
87
|
+
tk('Multiline|WORD|25|88'),
|
88
|
+
tk('link|WORD|26|97'),
|
89
|
+
tk('(|OTHR|27|102'),
|
90
|
+
tk('because|WORD|28|103'),
|
91
|
+
tk('we|WORD|29|111'),
|
92
|
+
tk('can|WORD|30|114'),
|
93
|
+
tk(')|OTHR|31|117'),
|
94
|
+
tk(']|OTHR|32|118'),
|
95
|
+
tk(']|OTHR|33|119'),
|
96
|
+
tk('.|PUNC|34|120'),
|
97
|
+
tk('[|OTHR|35|121'),
|
98
|
+
tk('[|OTHR|36|122'),
|
99
|
+
tk('Category|WORD|37|123'),
|
100
|
+
tk(':|PUNC|38|131'),
|
101
|
+
tk('cat1|WORD|39|132'),
|
102
|
+
tk(']|OTHR|40|136'),
|
103
|
+
tk(']|OTHR|41|137'),
|
104
|
+
tk('Link|WORD|42|138'),
|
105
|
+
tk('to|WORD|43|143'),
|
106
|
+
tk('[|OTHR|44|146'),
|
107
|
+
tk('[|OTHR|45|147'),
|
108
|
+
tk(':|PUNC|46|148'),
|
109
|
+
tk('Category|WORD|47|149'),
|
110
|
+
tk(':|PUNC|48|157'),
|
111
|
+
tk('cat2|WORD|49|158'),
|
112
|
+
tk(']|OTHR|50|162'),
|
113
|
+
tk(']|OTHR|51|163'),
|
114
|
+
tk('.|PUNC|52|164'),
|
115
|
+
tk('=|OTHR|53|165'),
|
116
|
+
tk('=|OTHR|54|166'),
|
117
|
+
tk('Heading|WORD|55|168'),
|
118
|
+
tk('=|OTHR|56|176'),
|
119
|
+
tk('=|OTHR|57|177'),
|
120
|
+
tk('{|OTHR|58|178'),
|
121
|
+
tk('{|OTHR|59|179'),
|
122
|
+
tk('Template|WORD|60|180'),
|
123
|
+
tk('}|OTHR|61|188'),
|
124
|
+
tk('}|OTHR|62|189'),
|
125
|
+
tk('Function|WORD|63|190'),
|
126
|
+
tk('with|WORD|64|199'),
|
127
|
+
tk('{|OTHR|65|204'),
|
128
|
+
tk('{|OTHR|66|205'),
|
129
|
+
tk('#|OTHR|67|206'),
|
130
|
+
tk('func|WORD|68|207'),
|
131
|
+
tk('||OTHR|69|211'),
|
132
|
+
tk('param|WORD|70|212'),
|
133
|
+
tk('||OTHR|71|217'),
|
134
|
+
tk('{|OTHR|72|218'),
|
135
|
+
tk('{|OTHR|73|219'),
|
136
|
+
tk('{|OTHR|74|220'),
|
137
|
+
tk('var|WORD|75|221'),
|
138
|
+
tk('}|OTHR|76|224'),
|
139
|
+
tk('}|OTHR|77|225'),
|
140
|
+
tk('}|OTHR|78|226'),
|
141
|
+
tk('}|OTHR|79|227'),
|
142
|
+
tk('}|OTHR|80|228'),
|
143
|
+
tk('and|WORD|81|230'),
|
144
|
+
tk('<|OTHR|82|234'),
|
145
|
+
tk('nowiki|WORD|83|235'),
|
146
|
+
tk('>|OTHR|84|241'),
|
147
|
+
tk('{|OTHR|85|242'),
|
148
|
+
tk('{|OTHR|86|243'),
|
149
|
+
tk('{|OTHR|87|244'),
|
150
|
+
tk('var|WORD|88|245'),
|
151
|
+
tk('}|OTHR|89|248'),
|
152
|
+
tk('}|OTHR|90|249'),
|
153
|
+
tk('}|OTHR|91|250'),
|
154
|
+
tk('<|OTHR|92|251'),
|
155
|
+
tk('/|OTHR|93|252'),
|
156
|
+
tk('nowiki|WORD|94|253'),
|
157
|
+
tk('>|OTHR|95|259'),
|
158
|
+
tk('!|PUNC|96|260'),
|
159
|
+
tk('{|OTHR|97|261'),
|
160
|
+
tk('{|OTHR|98|262'),
|
161
|
+
tk('Multi|WORD|99|263'),
|
162
|
+
tk('||OTHR|100|269'),
|
163
|
+
tk('line|WORD|101|271'),
|
164
|
+
tk('=|OTHR|102|275'),
|
165
|
+
tk('1|NUMS|103|276'),
|
166
|
+
tk('||OTHR|104|278'),
|
167
|
+
tk('[|OTHR|105|280'),
|
168
|
+
tk('[|OTHR|106|281'),
|
169
|
+
tk('link|WORD|107|282'),
|
170
|
+
tk(']|OTHR|108|286'),
|
171
|
+
tk(']|OTHR|109|287'),
|
172
|
+
tk('||OTHR|110|289'),
|
173
|
+
tk('{|OTHR|111|291'),
|
174
|
+
tk('{|OTHR|112|292'),
|
175
|
+
tk('{|OTHR|113|293'),
|
176
|
+
tk('var|WORD|114|294'),
|
177
|
+
tk('}|OTHR|115|297'),
|
178
|
+
tk('}|OTHR|116|298'),
|
179
|
+
tk('}|OTHR|117|299'),
|
180
|
+
tk('}|OTHR|118|300'),
|
181
|
+
tk('}|OTHR|119|301')
|
169
182
|
])
|
170
183
|
end
|
171
184
|
|
172
185
|
def test_wiki2
|
173
186
|
meet({ 'space' => true, 'tags' => true, 'wiki' => true }, @wiki, [
|
174
|
-
tk('Test|WORD'),
|
175
|
-
tk(' |SPAC'),
|
176
|
-
tk('[[|WIKI'),
|
177
|
-
tk('Link|internal link]]|WIKI'),
|
178
|
-
tk(' |SPAC'),
|
179
|
-
tk('and|WORD'),
|
180
|
-
tk(' |SPAC'),
|
181
|
-
tk('[http://|WIKI'),
|
182
|
-
tk('example.com external link]|WIKI'),
|
183
|
-
tk('.|PUNC'),
|
184
|
-
tk('Try|WORD'),
|
185
|
-
tk(' |SPAC'),
|
186
|
-
tk('__MAGIC__|WIKI'),
|
187
|
-
tk(' |SPAC'),
|
188
|
-
tk('with|WORD'),
|
189
|
-
tk(' |SPAC'),
|
190
|
-
tk('[[|WIKI'),
|
191
|
-
tk('Multiline|WIKI'),
|
192
|
-
tk('link (because we can)]]|WIKI'),
|
193
|
-
tk('.|PUNC'),
|
194
|
-
tk('[[|WIKI'),
|
195
|
-
tk('Category:cat1]]|WIKI'),
|
196
|
-
tk('Link|WORD'),
|
197
|
-
tk(' |SPAC'),
|
198
|
-
tk('to|WORD'),
|
199
|
-
tk(' |SPAC'),
|
200
|
-
tk('[[|WIKI'),
|
201
|
-
tk(':Category:cat2]]|WIKI'),
|
202
|
-
tk('.|PUNC'),
|
203
|
-
tk('== Heading ==|WIKI'),
|
204
|
-
tk('{{|WIKI'),
|
205
|
-
tk('Template}}|WIKI'),
|
206
|
-
tk('Function|WORD'),
|
207
|
-
tk(' |SPAC'),
|
208
|
-
tk('with|WORD'),
|
209
|
-
tk(' |SPAC'),
|
210
|
-
tk('{{|WIKI'),
|
211
|
-
tk('#func|param||WIKI'),
|
212
|
-
tk('{{{|WIKI'),
|
213
|
-
tk('var}}}|WIKI'),
|
214
|
-
tk('}}|WIKI'),
|
215
|
-
tk(' |SPAC'),
|
216
|
-
tk('and|WORD'),
|
217
|
-
tk(' |SPAC'),
|
218
|
-
tk('<|HTML'),
|
219
|
-
tk('nowiki>|HTML'),
|
220
|
-
tk('{{{|WIKI'),
|
221
|
-
tk('var}}}|WIKI'),
|
222
|
-
tk('<|HTML'),
|
223
|
-
tk('/nowiki>|HTML'),
|
224
|
-
tk('!|PUNC'),
|
225
|
-
tk('{{|WIKI'),
|
226
|
-
tk('Multi|WIKI'),
|
227
|
-
tk(' | line=1|WIKI'),
|
228
|
-
tk(' | |WIKI'),
|
229
|
-
tk('[[|WIKI'),
|
230
|
-
tk('link]]|WIKI'),
|
231
|
-
tk('|WIKI'),
|
232
|
-
tk(' | |WIKI'),
|
233
|
-
tk('{{{|WIKI'),
|
234
|
-
tk('var}}}|WIKI'),
|
235
|
-
tk('|WIKI'),
|
236
|
-
tk('|WIKI'),
|
237
|
-
tk('}}|WIKI')
|
187
|
+
tk('Test|WORD|0|0'),
|
188
|
+
tk(' |SPAC|1|4'),
|
189
|
+
tk('[[|WIKI|2|5'),
|
190
|
+
tk('Link|internal link]]|WIKI|3|7'),
|
191
|
+
tk(' |SPAC|4|27'),
|
192
|
+
tk('and|WORD|5|28'),
|
193
|
+
tk(' |SPAC|6|31'),
|
194
|
+
tk('[http://|WIKI|7|32'),
|
195
|
+
tk('example.com external link]|WIKI|8|40'),
|
196
|
+
tk('.|PUNC|9|66'),
|
197
|
+
tk('Try|WORD|10|67'),
|
198
|
+
tk(' |SPAC|11|70'),
|
199
|
+
tk('__MAGIC__|WIKI|12|71'),
|
200
|
+
tk(' |SPAC|13|80'),
|
201
|
+
tk('with|WORD|14|81'),
|
202
|
+
tk(' |SPAC|15|85'),
|
203
|
+
tk('[[|WIKI|16|86'),
|
204
|
+
tk('Multiline|WIKI|17|88'),
|
205
|
+
tk('link (because we can)]]|WIKI|18|97'),
|
206
|
+
tk('.|PUNC|19|120'),
|
207
|
+
tk('[[|WIKI|20|121'),
|
208
|
+
tk('Category:cat1]]|WIKI|21|123'),
|
209
|
+
tk('Link|WORD|22|138'),
|
210
|
+
tk(' |SPAC|23|142'),
|
211
|
+
tk('to|WORD|24|143'),
|
212
|
+
tk(' |SPAC|25|145'),
|
213
|
+
tk('[[|WIKI|26|146'),
|
214
|
+
tk(':Category:cat2]]|WIKI|27|148'),
|
215
|
+
tk('.|PUNC|28|164'),
|
216
|
+
tk('== Heading ==|WIKI|29|165'),
|
217
|
+
tk('{{|WIKI|30|178'),
|
218
|
+
tk('Template}}|WIKI|31|180'),
|
219
|
+
tk('Function|WORD|32|190'),
|
220
|
+
tk(' |SPAC|33|198'),
|
221
|
+
tk('with|WORD|34|199'),
|
222
|
+
tk(' |SPAC|35|203'),
|
223
|
+
tk('{{|WIKI|36|204'),
|
224
|
+
tk('#func|param||WIKI|37|206'),
|
225
|
+
tk('{{{|WIKI|38|218'),
|
226
|
+
tk('var}}}|WIKI|39|221'),
|
227
|
+
tk('}}|WIKI|40|227'),
|
228
|
+
tk(' |SPAC|41|229'),
|
229
|
+
tk('and|WORD|42|230'),
|
230
|
+
tk(' |SPAC|43|233'),
|
231
|
+
tk('<|HTML|44|234'),
|
232
|
+
tk('nowiki>|HTML|45|235'),
|
233
|
+
tk('{{{|WIKI|46|242'),
|
234
|
+
tk('var}}}|WIKI|47|245'),
|
235
|
+
tk('<|HTML|48|251'),
|
236
|
+
tk('/nowiki>|HTML|49|252'),
|
237
|
+
tk('!|PUNC|50|260'),
|
238
|
+
tk('{{|WIKI|51|261'),
|
239
|
+
tk('Multi|WIKI|52|263'),
|
240
|
+
tk(' | line=1|WIKI|53|268'),
|
241
|
+
tk(' | |WIKI|54|277'),
|
242
|
+
tk('[[|WIKI|55|280'),
|
243
|
+
tk('link]]|WIKI|56|282'),
|
244
|
+
tk('|WIKI|57|288'),
|
245
|
+
tk(' | |WIKI|58|288'),
|
246
|
+
tk('{{{|WIKI|59|291'),
|
247
|
+
tk('var}}}|WIKI|60|294'),
|
248
|
+
tk('|WIKI|61|300'),
|
249
|
+
tk('|WIKI|62|300'),
|
250
|
+
tk('}}|WIKI|63|300')
|
251
|
+
])
|
252
|
+
end
|
253
|
+
|
254
|
+
def test_html1
|
255
|
+
meet({ 'tags' => true }, @html, [
|
256
|
+
tk('test|WORD|0|0'),
|
257
|
+
tk('<|HTML|1|5'),
|
258
|
+
tk('a>|HTML|2|6'),
|
259
|
+
tk('test|WORD|3|8'),
|
260
|
+
tk('<|HTML|4|12'),
|
261
|
+
tk('/a>|HTML|5|13'),
|
262
|
+
tk('test|WORD|6|17'),
|
263
|
+
tk('<|HTML|7|21'),
|
264
|
+
tk('b>|HTML|8|22'),
|
265
|
+
tk('test|WORD|9|24'),
|
266
|
+
tk('<|HTML|10|29'),
|
267
|
+
tk('a>|HTML|11|30'),
|
268
|
+
tk('test|WORD|12|32'),
|
269
|
+
tk('<|HTML|13|36'),
|
270
|
+
tk('/a>|HTML|14|37'),
|
271
|
+
tk('<|HTML|15|40'),
|
272
|
+
tk('/b>|HTML|16|41'),
|
273
|
+
tk('test|WORD|17|44'),
|
274
|
+
tk('<|HTML|18|49'),
|
275
|
+
tk('a test="test">|HTML|19|50'),
|
276
|
+
tk('<|HTML|20|64'),
|
277
|
+
tk('b>|HTML|21|65'),
|
278
|
+
tk('test|WORD|22|67'),
|
279
|
+
tk('<|HTML|23|71'),
|
280
|
+
tk('/b>|HTML|24|72'),
|
281
|
+
tk('<|HTML|25|75'),
|
282
|
+
tk('/a>|HTML|26|76'),
|
283
|
+
tk(',|PUNC|27|79'),
|
284
|
+
tk('test|WORD|28|81'),
|
285
|
+
tk('<|HTML|29|85'),
|
286
|
+
tk('a>|HTML|30|86'),
|
287
|
+
tk('test|WORD|31|88'),
|
288
|
+
tk('<|HTML|32|92'),
|
289
|
+
tk('/a>|HTML|33|93'),
|
290
|
+
tk('<|HTML|34|96'),
|
291
|
+
tk('b test="test">|HTML|35|97'),
|
292
|
+
tk('test|WORD|36|111'),
|
293
|
+
tk('<|HTML|37|115'),
|
294
|
+
tk('/b>|HTML|38|116'),
|
295
|
+
tk('<|HTML|39|119'),
|
296
|
+
tk('a>|HTML|40|120'),
|
297
|
+
tk('test|WORD|41|122'),
|
298
|
+
tk('<|HTML|42|126'),
|
299
|
+
tk('/a>|HTML|43|127')
|
300
|
+
])
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_html2
|
304
|
+
meet({ 'skip-tags' => 'a' }, @html, [
|
305
|
+
tk('test|WORD|0|0'),
|
306
|
+
tk('<|SKIP|1|5'),
|
307
|
+
tk('a>|SKIP|2|6'),
|
308
|
+
tk('test|SKIP|3|8'),
|
309
|
+
tk('<|SKIP|4|12'),
|
310
|
+
tk('/a>|SKIP|5|13'),
|
311
|
+
tk('test|WORD|6|17'),
|
312
|
+
tk('<|HTML|7|21'),
|
313
|
+
tk('b>|HTML|8|22'),
|
314
|
+
tk('test|WORD|9|24'),
|
315
|
+
tk('<|SKIP|10|29'),
|
316
|
+
tk('a>|SKIP|11|30'),
|
317
|
+
tk('test|SKIP|12|32'),
|
318
|
+
tk('<|SKIP|13|36'),
|
319
|
+
tk('/a>|SKIP|14|37'),
|
320
|
+
tk('<|HTML|15|40'),
|
321
|
+
tk('/b>|HTML|16|41'),
|
322
|
+
tk('test|WORD|17|44'),
|
323
|
+
tk('<|SKIP|18|49'),
|
324
|
+
tk('a test="test">|SKIP|19|50'),
|
325
|
+
tk('<|SKIP|20|64'),
|
326
|
+
tk('b>|SKIP|21|65'),
|
327
|
+
tk('test|SKIP|22|67'),
|
328
|
+
tk('<|SKIP|23|71'),
|
329
|
+
tk('/b>|SKIP|24|72'),
|
330
|
+
tk('<|SKIP|25|75'),
|
331
|
+
tk('/a>|SKIP|26|76'),
|
332
|
+
tk(',|PUNC|27|79'),
|
333
|
+
tk('test|WORD|28|81'),
|
334
|
+
tk('<|SKIP|29|85'),
|
335
|
+
tk('a>|SKIP|30|86'),
|
336
|
+
tk('test|SKIP|31|88'),
|
337
|
+
tk('<|SKIP|32|92'),
|
338
|
+
tk('/a>|SKIP|33|93'),
|
339
|
+
tk('<|HTML|34|96'),
|
340
|
+
tk('b test="test">|HTML|35|97'),
|
341
|
+
tk('test|WORD|36|111'),
|
342
|
+
tk('<|HTML|37|115'),
|
343
|
+
tk('/b>|HTML|38|116'),
|
344
|
+
tk('<|SKIP|39|119'),
|
345
|
+
tk('a>|SKIP|40|120'),
|
346
|
+
tk('test|SKIP|41|122'),
|
347
|
+
tk('<|SKIP|42|126'),
|
348
|
+
tk('/a>|SKIP|43|127')
|
238
349
|
])
|
239
350
|
end
|
240
351
|
|