lingo 1.8.1 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
@@ -4,110 +4,94 @@ require_relative '../test_helper'
4
4
 
5
5
  class TestAttendeeWordSearcher < AttendeeTestCase
6
6
 
7
- def setup
8
- @test_synonyms = [
9
- lx('experiment|y'), lx('kontrolle|y'), lx('probelauf|y'),
10
- lx('prüfung|y'), lx('test|y'), lx('testlauf|y'),
11
- lx('testversuch|y'), lx('trockentest|y'), lx('versuch|y')
12
- ]
13
- end
14
-
15
7
  def test_basic
16
- @input = [tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC'), ai('EOL|')]
17
- @expect = [
8
+ meet({ 'source' => 'sys-dic,sys-syn,sys-mul' }, [
9
+ tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC'), ai('EOL|')
10
+ ], [
18
11
  wd('Dies|IDF', 'dies|w'),
19
12
  wd('ist|IDF', 'sein|v'),
20
13
  wd('ein|IDF', 'einen|v', 'ein|w'),
21
14
  wd('Test|IDF', 'test|s', 'testen|v'),
22
15
  tk('.|PUNC'),
23
16
  ai('EOL|')
24
- ]
25
- meet({'source'=>'sys-dic,sys-syn,sys-mul'})
17
+ ])
26
18
  end
27
19
 
28
20
  def test_mode
29
- @input = [tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC'), ai('EOL|')]
30
- @expect = [
21
+ meet({ 'source' => 'sys-syn,sys-dic', 'mode' => 'first' }, [
22
+ tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC'), ai('EOL|')
23
+ ], [
31
24
  wd('Dies|IDF', 'dies|w'),
32
25
  wd('ist|IDF', 'sein|v'),
33
26
  wd('ein|IDF', 'einen|v', 'ein|w'),
34
27
  wd('Test|IDF', 'test|s', 'testen|v'),
35
28
  tk('.|PUNC'),
36
29
  ai('EOL|')
37
- ]
38
- meet({'source'=>'sys-syn,sys-dic', 'mode'=>'first'})
30
+ ])
39
31
  end
40
32
 
41
33
  def test_two_sources_mode_first
42
- @input = [
34
+ meet({ 'source' => 'sys-dic,tst-dic', 'mode' => 'first' }, [
43
35
  tk('Hasennasen|WORD'),
44
36
  tk('Knaller|WORD'),
45
37
  tk('Lex-Lingo|WORD'),
46
38
  tk('A-Dur|WORD'),
47
39
  ai('EOL|')
48
- ]
49
- @expect = [
40
+ ], [
50
41
  wd('Hasennasen|?'),
51
42
  wd('Knaller|IDF', 'knaller|s'),
52
43
  wd('Lex-Lingo|IDF', 'super indexierungssystem|m'),
53
44
  wd('A-Dur|IDF', 'a-dur|s'),
54
45
  ai('EOL|')
55
- ]
56
- meet({'source'=>'sys-dic,tst-dic', 'mode'=>'first'})
46
+ ])
57
47
  end
58
48
 
59
49
  def test_two_sources_mode_first_flipped
60
- @input = [
50
+ meet({ 'source' => 'tst-dic,sys-dic', 'mode' => 'first' }, [
61
51
  tk('Hasennasen|WORD'),
62
52
  tk('Knaller|WORD'),
63
53
  tk('Lex-Lingo|WORD'),
64
54
  tk('A-Dur|WORD'),
65
55
  ai('EOL|')
66
- ]
67
- @expect = [
56
+ ], [
68
57
  wd('Hasennasen|?'),
69
58
  wd('Knaller|IDF', 'knaller|s'),
70
59
  wd('Lex-Lingo|IDF', 'super indexierungssystem|m'),
71
60
  wd('A-Dur|IDF', 'b-dur|s'),
72
61
  ai('EOL|')
73
- ]
74
- meet({'source'=>'tst-dic,sys-dic', 'mode'=>'first'})
62
+ ])
75
63
  end
76
64
 
77
65
  def test_select_two_sources_mode_all
78
- @input = [
66
+ meet({ 'source' => 'sys-dic,tst-dic', 'mode' => 'all' }, [
79
67
  tk('Hasennasen|WORD'),
80
68
  tk('Knaller|WORD'),
81
69
  tk('Lex-Lingo|WORD'),
82
70
  tk('A-Dur|WORD'),
83
71
  ai('EOL|')
84
- ]
85
- @expect = [
72
+ ], [
86
73
  wd('Hasennasen|?'),
87
74
  wd('Knaller|IDF', 'knaller|s'),
88
75
  wd('Lex-Lingo|IDF', 'super indexierungssystem|m'),
89
76
  wd('A-Dur|IDF', 'a-dur|s', 'b-dur|s'),
90
77
  ai('EOL|')
91
- ]
92
- meet({'source'=>'sys-dic,tst-dic', 'mode'=>'all'})
78
+ ])
93
79
  end
94
80
 
95
81
  def test_select_two_sources_mode_def
96
- @input = [
82
+ meet({ 'source' => 'sys-dic,tst-dic' }, [
97
83
  tk('Hasennasen|WORD'),
98
84
  tk('Knaller|WORD'),
99
85
  tk('Lex-Lingo|WORD'),
100
86
  tk('A-Dur|WORD'),
101
87
  ai('EOL|')
102
- ]
103
- @expect = [
88
+ ], [
104
89
  wd('Hasennasen|?'),
105
90
  wd('Knaller|IDF', 'knaller|s'),
106
91
  wd('Lex-Lingo|IDF', 'super indexierungssystem|m'),
107
92
  wd('A-Dur|IDF', 'a-dur|s', 'b-dur|s'),
108
93
  ai('EOL|')
109
- ]
110
- meet({'source'=>'sys-dic,tst-dic'})
94
+ ])
111
95
  end
112
96
 
113
97
  end
File without changes