ots 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/README.md +80 -0
  2. data/dictionaries/bg.xml +101 -0
  3. data/dictionaries/ca.xml +141 -0
  4. data/dictionaries/cs.xml +161 -0
  5. data/dictionaries/cy.xml +118 -0
  6. data/dictionaries/da.xml +129 -0
  7. data/dictionaries/de.xml +354 -0
  8. data/dictionaries/el.xml +80 -0
  9. data/dictionaries/en.xml +606 -0
  10. data/dictionaries/eo.xml +171 -0
  11. data/dictionaries/es.xml +369 -0
  12. data/dictionaries/et.xml +172 -0
  13. data/dictionaries/eu.xml +77 -0
  14. data/dictionaries/fi.xml +105 -0
  15. data/dictionaries/fr.xml +199 -0
  16. data/dictionaries/ga.xml +124 -0
  17. data/dictionaries/gl.xml +290 -0
  18. data/dictionaries/he.xml +334 -0
  19. data/dictionaries/hu.xml +280 -0
  20. data/dictionaries/ia.xml +97 -0
  21. data/dictionaries/id.xml +75 -0
  22. data/dictionaries/is.xml +201 -0
  23. data/dictionaries/it.xml +206 -0
  24. data/dictionaries/lv.xml +77 -0
  25. data/dictionaries/mi.xml +76 -0
  26. data/dictionaries/ms.xml +160 -0
  27. data/dictionaries/mt.xml +73 -0
  28. data/dictionaries/nl.xml +245 -0
  29. data/dictionaries/nn.xml +264 -0
  30. data/dictionaries/pl.xml +92 -0
  31. data/dictionaries/pt.xml +365 -0
  32. data/dictionaries/ro.xml +163 -0
  33. data/dictionaries/ru.xml +150 -0
  34. data/dictionaries/sv.xml +255 -0
  35. data/dictionaries/tl.xml +67 -0
  36. data/dictionaries/tr.xml +65 -0
  37. data/dictionaries/uk.xml +98 -0
  38. data/dictionaries/yi.xml +293 -0
  39. data/ext/article.c +119 -0
  40. data/ext/dictionary.c +335 -0
  41. data/ext/extconf.rb +13 -14
  42. data/ext/grader-tc.c +185 -0
  43. data/ext/grader-tc.h +64 -0
  44. data/ext/grader-tf.c +116 -0
  45. data/ext/grader.c +85 -0
  46. data/ext/highlighter.c +128 -0
  47. data/ext/html.c +131 -0
  48. data/ext/libots.h +158 -0
  49. data/ext/ots.c +130 -151
  50. data/ext/ots.h +15 -0
  51. data/ext/parser.c +173 -0
  52. data/ext/relations.c +163 -0
  53. data/ext/stemmer.c +332 -0
  54. data/ext/text.c +98 -0
  55. data/ext/version.h +2 -0
  56. data/ext/wordlist.c +220 -0
  57. data/test/helper.rb +3 -0
  58. data/test/test_article.rb +52 -0
  59. data/test/test_ots.rb +23 -0
  60. metadata +122 -38
  61. data/README +0 -25
  62. data/VERSION +0 -1
  63. data/lib/ots.rb +0 -1
  64. data/test/ots_test.rb +0 -62
@@ -0,0 +1,201 @@
1
+ <?xml version="1.0"?>
2
+ <dictionary lang="icelandic">
3
+ <stemmer>
4
+
5
+ <step1_pre>
6
+ <rule>"|</rule>
7
+ <rule>(|</rule>
8
+ </step1_pre>
9
+
10
+
11
+ <step1_post>
12
+ <rule>."|</rule>
13
+ <rule>,"|</rule>
14
+ <rule>.|</rule>
15
+ <rule>,|</rule>
16
+ <rule>"|</rule>
17
+ <rule>)|</rule>
18
+ <rule>?|</rule>
19
+ <rule>:|</rule>
20
+ <rule>;|</rule>
21
+ <rule>!|</rule>
22
+ </step1_post>
23
+
24
+
25
+
26
+ <manual>
27
+ <rule>wrote|write</rule>
28
+ <rule>came|come</rule>
29
+ <rule>went|go</rule>
30
+ </manual>
31
+
32
+ <post>
33
+ <rule>before1|1after</rule>
34
+ </post>
35
+ <pre>
36
+ <rule>before1|1after</rule>
37
+ </pre>
38
+ </stemmer>
39
+ <parser>
40
+
41
+ <linebreak>
42
+ <rule>."</rule>
43
+ <rule>?"</rule>
44
+ <rule>!"</rule>
45
+ <rule>,"</rule>
46
+ <rule>.</rule>
47
+ <rule>?</rule>
48
+ <rule>;</rule>
49
+ <rule>|</rule>
50
+ <rule>!</rule>
51
+ </linebreak>
52
+
53
+ <linedontbreak>
54
+ <rule>Dr.</rule>
55
+ <rule>Mr.</rule>
56
+ <rule>Mrs.</rule>
57
+ <rule>U.S.</rule>
58
+ <rule>Rep.</rule>
59
+ <rule>Sen.</rule>
60
+ </linedontbreak>
61
+ </parser>
62
+ <grader-tc>
63
+ <word>að</word>
64
+ <word>af</word>
65
+ <word>andspænis</word>
66
+ <word>annaðhvort</word>
67
+ <word>auk</word>
68
+ <word>austan</word>
69
+ <word>á</word>
70
+ <word>án</word>
71
+ <word>ásamt</word>
72
+ <word>bæði</word>
73
+ <word>eða</word>
74
+ <word>ef</word>
75
+ <word>eftir</word>
76
+ <word>eiga</word>
77
+ <word>en</word>
78
+ <word>er</word>
79
+ <word>ert</word>
80
+ <word>eru</word>
81
+ <word>eruð</word>
82
+ <word>erum</word>
83
+ <word>ég</word>
84
+ <word>fara</word>
85
+ <word>fá</word>
86
+ <word>frá</word>
87
+ <word>fyrir</word>
88
+ <word>fyrst</word>
89
+ <word>gagn</word>
90
+ <word>gagnvart</word>
91
+ <word>gegnt</word>
92
+ <word>gegnum</word>
93
+ <word>geta</word>
94
+ <word>hafa</word>
95
+ <word>hana</word>
96
+ <word>handa</word>
97
+ <word>hann</word>
98
+ <word>hans</word>
99
+ <word>hennar</word>
100
+ <word>henni</word>
101
+ <word>hið</word>
102
+ <word>hin</word>
103
+ <word>hina</word>
104
+ <word>hinar</word>
105
+ <word>hinir</word>
106
+ <word>hinn</word>
107
+ <word>hinna</word>
108
+ <word>hinnar</word>
109
+ <word>hinni</word>
110
+ <word>hins</word>
111
+ <word>hinu</word>
112
+ <word>hinum</word>
113
+ <word>hjá</word>
114
+ <word>honum</word>
115
+ <word>hún</word>
116
+ <word>hver</word>
117
+ <word>hverjum</word>
118
+ <word>hvorki</word>
119
+ <word>hvort</word>
120
+ <word>innan</word>
121
+ <word>í</word>
122
+ <word>kringum</word>
123
+ <word>með</word>
124
+ <word>meðal</word>
125
+ <word>meðfram</word>
126
+ <word>mega</word>
127
+ <word>megin</word>
128
+ <word>mér</word>
129
+ <word>mig</word>
130
+ <word>milli</word>
131
+ <word>millum</word>
132
+ <word>mín</word>
133
+ <word>mót</word>
134
+ <word>móti</word>
135
+ <word>munu</word>
136
+ <word>nálægt</word>
137
+ <word>neðan</word>
138
+ <word>nema</word>
139
+ <word>né</word>
140
+ <word>norðan</word>
141
+ <word>ofan</word>
142
+ <word>og</word>
143
+ <word>okkur</word>
144
+ <word>pro</word>
145
+ <word>sakir</word>
146
+ <word>sem</word>
147
+ <word>sé</word>
148
+ <word>sért</word>
149
+ <word>séu</word>
150
+ <word>séuð</word>
151
+ <word>séum</word>
152
+ <word>síðan</word>
153
+ <word>skulu</word>
154
+ <word>sunnan</word>
155
+ <word>sökum</word>
156
+ <word>til</word>
157
+ <word>um</word>
158
+ <word>umfram</word>
159
+ <word>umhverfis</word>
160
+ <word>undan</word>
161
+ <word>undir</word>
162
+ <word>utan</word>
163
+ <word>úr</word>
164
+ <word>var</word>
165
+ <word>varst</word>
166
+ <word>vegna</word>
167
+ <word>vera</word>
168
+ <word>verandi</word>
169
+ <word>vestan</word>
170
+ <word>við</word>
171
+ <word>voru</word>
172
+ <word>voruð</word>
173
+ <word>vorum</word>
174
+ <word>væri</word>
175
+ <word>værir</word>
176
+ <word>væru</word>
177
+ <word>væruð</word>
178
+ <word>værum</word>
179
+ <word>yðar</word>
180
+ <word>yður</word>
181
+ <word>yfir</word>
182
+ <word>ykkar</word>
183
+ <word>ykkur</word>
184
+ <word>það</word>
185
+ <word>þau</word>
186
+ <word>þá</word>
187
+ <word>þegar</word>
188
+ <word>þeim</word>
189
+ <word>þeir</word>
190
+ <word>þeirra</word>
191
+ <word>þess</word>
192
+ <word>þér</word>
193
+ <word>þið</word>
194
+ <word>þig</word>
195
+ <word>þín</word>
196
+ <word>þótt</word>
197
+ <word>þú</word>
198
+ <word>því</word>
199
+ <word>þær</word>
200
+ </grader-tc>
201
+ </dictionary>
@@ -0,0 +1,206 @@
1
+ <?xml version="1.0"?>
2
+ <dictionary lang="italian">
3
+ <stemmer>
4
+
5
+ <step1_pre>
6
+ <rule>"|</rule>
7
+ <rule>(|</rule>
8
+ </step1_pre>
9
+
10
+
11
+ <step1_post>
12
+ <rule>."|</rule>
13
+ <rule>,"|</rule>
14
+ <rule>.|</rule>
15
+ <rule>,|</rule>
16
+ <rule>"|</rule>
17
+ <rule>)|</rule>
18
+ <rule>?|</rule>
19
+ <rule>:|</rule>
20
+ <rule>;|</rule>
21
+ <rule>!|</rule>
22
+ </step1_post>
23
+
24
+
25
+ <manual>
26
+ <rule>wrote|write</rule>
27
+ <rule>came|come</rule>
28
+ <rule>went|go</rule>
29
+ </manual>
30
+
31
+ <post>
32
+ <rule>before1|1after</rule>
33
+ </post>
34
+ <pre>
35
+ <rule>before1|1after</rule>
36
+ </pre>
37
+ </stemmer>
38
+ <parser>
39
+
40
+ <linebreak>
41
+ <rule>."</rule>
42
+ <rule>?"</rule>
43
+ <rule>!"</rule>
44
+ <rule>,"</rule>
45
+ <rule>.</rule>
46
+ <rule>?</rule>
47
+ <rule>;</rule>
48
+ <rule>|</rule>
49
+ <rule>!</rule>
50
+ </linebreak>
51
+
52
+ <linedontbreak>
53
+ <rule>Dr.</rule>
54
+ <rule>Mr.</rule>
55
+ <rule>Mrs.</rule>
56
+ <rule>U.S.</rule>
57
+ <rule>Rep.</rule>
58
+ <rule>Sen.</rule>
59
+ </linedontbreak>
60
+ </parser>
61
+ <grader-tc>
62
+ <word>il</word>
63
+ <word>lo</word>
64
+ <word>l'</word>
65
+ <word>i</word>
66
+ <word>gli</word>
67
+ <word>gl'</word>
68
+ <word>la</word>
69
+ <word>le</word>
70
+ <word>un</word>
71
+ <word>uno</word>
72
+ <word>una</word>
73
+ <word>un'</word>
74
+ <word>io</word>
75
+ <word>noi</word>
76
+ <word>mio</word>
77
+ <word>tu</word>
78
+ <word>voi</word>
79
+ <word>vostro</word>
80
+ <word>lui</word>
81
+ <word>lei</word>
82
+ <word>egli</word>
83
+ <word>ella</word>
84
+ <word>esso</word>
85
+ <word>essa</word>
86
+ <word>loro</word>
87
+ <word>essi</word>
88
+ <word>esse</word>
89
+ <word>suo</word>
90
+ <word>sé</word>
91
+ <word>si</word>
92
+ <word>c'è</word>
93
+ <word>a</word>
94
+ <word>ad</word>
95
+ <word>alcuno</word>
96
+ <word>che</word>
97
+ <word>come</word>
98
+ <word>con</word>
99
+ <word>così</word>
100
+ <word>da</word>
101
+ <word>di</word>
102
+ <word>domani</word>
103
+ <word>e</word>
104
+ <word>ed</word>
105
+ <word>in</word>
106
+ <word>infine</word>
107
+ <word>ma</word>
108
+ <word>mai</word>
109
+ <word>mentre</word>
110
+ <word>molto</word>
111
+ <word>né</word>
112
+ <word>nessuno</word>
113
+ <word>nessun</word>
114
+ <word>nessuna</word>
115
+ <word>nessun'</word>
116
+ <word>niente</word>
117
+ <word>no</word>
118
+ <word>non</word>
119
+ <word>nulla</word>
120
+ <word>o</word>
121
+ <word>oggi</word>
122
+ <word>ora</word>
123
+ <word>per</word>
124
+ <word>poi</word>
125
+ <word>poiché</word>
126
+ <word>qualche</word>
127
+ <word>qualcuno</word>
128
+ <word>quando</word>
129
+ <word>questo</word>
130
+ <word>qui</word>
131
+ <word>se</word>
132
+ <word>su</word>
133
+ <word>troppo</word>
134
+ <word>tutto</word>
135
+ <word>al</word>
136
+ <word>ai</word>
137
+ <word>allo</word>
138
+ <word>agli</word>
139
+ <word>alla</word>
140
+ <word>alle</word>
141
+ <word>all'</word>
142
+ <word>col</word>
143
+ <word>coi</word>
144
+ <word>collo</word>
145
+ <word>cogli</word>
146
+ <word>colla</word>
147
+ <word>colle</word>
148
+ <word>coll'</word>
149
+ <word>dal</word>
150
+ <word>dai</word>
151
+ <word>dallo</word>
152
+ <word>dagli</word>
153
+ <word>dalla</word>
154
+ <word>dalle</word>
155
+ <word>dall'</word>
156
+ <word>del</word>
157
+ <word>dei</word>
158
+ <word>dello</word>
159
+ <word>degli</word>
160
+ <word>della</word>
161
+ <word>delle</word>
162
+ <word>dell'</word>
163
+ <word>nel</word>
164
+ <word>nei</word>
165
+ <word>nello</word>
166
+ <word>negli</word>
167
+ <word>nella</word>
168
+ <word>nelle</word>
169
+ <word>nell'</word>
170
+ <word>pel</word>
171
+ <word>pei</word>
172
+ <word>sul</word>
173
+ <word>sui</word>
174
+ <word>sullo</word>
175
+ <word>sugli</word>
176
+ <word>sulla</word>
177
+ <word>sulle</word>
178
+ <word>sull'</word>
179
+ <word>primo</word>
180
+ <word>essere</word>
181
+ <word>sono</word>
182
+ <word>sei</word>
183
+ <word>è</word>
184
+ <word>siamo</word>
185
+ <word>siete</word>
186
+ <word>stare</word>
187
+ <word>sto</word>
188
+ <word>stai</word>
189
+ <word>sta</word>
190
+ <word>stiamo</word>
191
+ <word>stano</word>
192
+ <word></word>
193
+ <word>avere</word>
194
+ <word>ho</word>
195
+ <word>hai</word>
196
+ <word>ha</word>
197
+ <word>abbiamo</word>
198
+ <word>avete</word>
199
+ <word>hanno</word>
200
+ <word>dovere</word>
201
+ <word>potere</word>
202
+ <word>andare</word>
203
+ <word>va</word>
204
+ <word></word>
205
+ </grader-tc>
206
+ </dictionary>
@@ -0,0 +1,77 @@
1
+ <?xml version="1.0"?>
2
+ <dictionary lang="latvian">
3
+ <stemmer>
4
+
5
+ <step1_pre>
6
+ <rule>"|</rule>
7
+ <rule>(|</rule>
8
+ </step1_pre>
9
+
10
+
11
+ <step1_post>
12
+ <rule>."|</rule>
13
+ <rule>,"|</rule>
14
+ <rule>.|</rule>
15
+ <rule>,|</rule>
16
+ <rule>"|</rule>
17
+ <rule>)|</rule>
18
+ <rule>?|</rule>
19
+ <rule>:|</rule>
20
+ <rule>;|</rule>
21
+ <rule>!|</rule>
22
+ </step1_post>
23
+
24
+
25
+ <manual>
26
+ <rule>wrote|write</rule>
27
+ <rule>came|come</rule>
28
+ <rule>went|go</rule>
29
+ </manual>
30
+
31
+ <post>
32
+ <rule>before1|1after</rule>
33
+ </post>
34
+ <pre>
35
+ <rule>before1|1after</rule>
36
+ </pre>
37
+ </stemmer>
38
+ <parser>
39
+
40
+ <linebreak>
41
+ <rule>."</rule>
42
+ <rule>?"</rule>
43
+ <rule>!"</rule>
44
+ <rule>,"</rule>
45
+ <rule>.</rule>
46
+ <rule>?</rule>
47
+ <rule>;</rule>
48
+ <rule>|</rule>
49
+ <rule>!</rule>
50
+ </linebreak>
51
+
52
+ <linedontbreak>
53
+ <rule>Dr.</rule>
54
+ <rule>Mr.</rule>
55
+ <rule>Mrs.</rule>
56
+ <rule>U.S.</rule>
57
+ <rule>Rep.</rule>
58
+ <rule>Sen.</rule>
59
+ </linedontbreak>
60
+ </parser>
61
+ <grader-tc>
62
+ <word>pa</word>
63
+ <word>par</word>
64
+ <word>pat</word>
65
+ <word>pats</word>
66
+ <word>pār</word>
67
+ <word>pārāk</word>
68
+ <word>pārējais</word>
69
+ <word>pāri</word>
70
+ <word>pēc</word>
71
+ <word>pie</word>
72
+ <word>pirms</word>
73
+ <word>pret</word>
74
+ <word>priekšu</word>
75
+ <word>projām</word>
76
+ </grader-tc>
77
+ </dictionary>