lingo 1.8.2 → 1.8.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/ChangeLog +33 -0
  2. data/README +6 -5
  3. data/Rakefile +6 -4
  4. data/{lib/lingo/cachable.rb → bin/lingosrv} +30 -58
  5. data/bin/lingoweb +30 -0
  6. data/de.lang +2 -13
  7. data/en/lingo-irr.txt +266 -0
  8. data/en/lingo-wdn.txt +37319 -0
  9. data/en.lang +2 -15
  10. data/lib/lingo/app.rb +82 -0
  11. data/lib/lingo/attendee/abbreviator.rb +22 -26
  12. data/lib/lingo/attendee/debugger.rb +8 -4
  13. data/lib/lingo/attendee/decomposer.rb +0 -1
  14. data/lib/lingo/attendee/dehyphenizer.rb +2 -2
  15. data/lib/lingo/attendee/multi_worder.rb +20 -13
  16. data/lib/lingo/attendee/noneword_filter.rb +2 -7
  17. data/lib/lingo/attendee/sequencer.rb +43 -19
  18. data/lib/lingo/attendee/stemmer/porter.rb +2 -2
  19. data/lib/lingo/attendee/stemmer.rb +1 -1
  20. data/lib/lingo/attendee/synonymer.rb +1 -9
  21. data/lib/lingo/attendee/text_reader.rb +42 -29
  22. data/lib/lingo/attendee/text_writer.rb +3 -6
  23. data/lib/lingo/attendee/tokenizer.rb +87 -69
  24. data/lib/lingo/attendee/variator.rb +7 -5
  25. data/lib/lingo/attendee/vector_filter.rb +11 -11
  26. data/lib/lingo/attendee/word_searcher.rb +1 -9
  27. data/lib/lingo/attendee.rb +24 -105
  28. data/lib/lingo/buffered_attendee.rb +2 -9
  29. data/lib/lingo/call.rb +18 -13
  30. data/lib/lingo/cli.rb +5 -10
  31. data/lib/lingo/config.rb +40 -7
  32. data/lib/lingo/ctl.rb +69 -57
  33. data/lib/lingo/database/hash_store.rb +9 -4
  34. data/lib/lingo/database/sdbm_store.rb +4 -7
  35. data/lib/lingo/database/source/multi_key.rb +1 -1
  36. data/lib/lingo/database/source/multi_value.rb +1 -1
  37. data/lib/lingo/database/source.rb +2 -20
  38. data/lib/lingo/database.rb +30 -19
  39. data/lib/lingo/debug.rb +79 -0
  40. data/lib/lingo/{core_ext.rb → language/char.rb} +43 -42
  41. data/lib/lingo/language/dictionary.rb +38 -46
  42. data/lib/lingo/language/grammar.rb +40 -57
  43. data/lib/lingo/language/lexical.rb +4 -7
  44. data/lib/lingo/language/lexical_hash.rb +17 -35
  45. data/lib/lingo/language/token.rb +4 -0
  46. data/lib/lingo/language/word.rb +7 -8
  47. data/lib/lingo/language/word_form.rb +4 -4
  48. data/lib/lingo/language.rb +2 -1
  49. data/lib/lingo/srv/config.ru +4 -0
  50. data/lib/lingo/srv/lingosrv.cfg +14 -0
  51. data/lib/lingo/{reportable.rb → srv.rb} +59 -61
  52. data/lib/lingo/version.rb +1 -1
  53. data/lib/lingo/web/config.ru +4 -0
  54. data/lib/lingo/web/lingoweb.cfg +14 -0
  55. data/lib/lingo/web/public/lingo.png +0 -0
  56. data/lib/lingo/web/public/lingoweb.css +74 -0
  57. data/lib/lingo/web/views/index.erb +92 -0
  58. data/lib/lingo/web.rb +94 -0
  59. data/lib/lingo.rb +27 -29
  60. data/lingo.cfg +1 -1
  61. data/lir.cfg +24 -0
  62. data/ru/lingo-dic.txt +22342 -0
  63. data/ru/lingo-mul.txt +5151 -0
  64. data/ru/lingo-syn.txt +0 -0
  65. data/ru.lang +99 -0
  66. data/test/attendee/ts_sequencer.rb +2 -2
  67. data/test/attendee/ts_text_reader.rb +36 -2
  68. data/test/attendee/ts_text_writer.rb +6 -6
  69. data/test/lir.vec +3 -3
  70. data/test/test_helper.rb +104 -102
  71. data/test/ts_database.rb +1 -1
  72. data/test/ts_language.rb +55 -96
  73. data/txt/artikel-ru.txt +45 -0
  74. data/txt/lir.txt +1 -3
  75. metadata +143 -83
  76. data/TODO +0 -23
@@ -1,61 +1,59 @@
1
- # encoding: utf-8
2
-
3
- #--
4
- ###############################################################################
5
- # #
6
- # Lingo -- A full-featured automatic indexing system #
7
- # #
8
- # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
10
- # #
11
- # Lingo is free software; you can redistribute it and/or modify it under the #
12
- # terms of the GNU Affero General Public License as published by the Free #
13
- # Software Foundation; either version 3 of the License, or (at your option) #
14
- # any later version. #
15
- # #
16
- # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
- # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
- # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
- # more details. #
20
- # #
21
- # You should have received a copy of the GNU Affero General Public License #
22
- # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
- # #
24
- ###############################################################################
25
- #++
26
-
27
- class Lingo
28
-
29
- # Provides counters.
30
-
31
- module Reportable
32
-
33
- def init_reportable(prefix = nil)
34
- @reportable_hash = Hash.new(0)
35
- @reportable_prefix = prefix ? "#{prefix}: " : ''
36
- end
37
-
38
- def inc(key)
39
- @reportable_hash[key] += 1
40
- end
41
-
42
- def add(key, val)
43
- @reportable_hash[key] += val
44
- end
45
-
46
- def set(key, val)
47
- @reportable_hash[key] = val
48
- end
49
-
50
- def get(key)
51
- @reportable_hash[key]
52
- end
53
-
54
- def report
55
- q = @reportable_prefix
56
- @reportable_hash.each_with_object({}) { |(k, v), r| r["#{q}#{k}"] = v }
57
- end
58
-
59
- end
60
-
61
- end
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'json'
28
+ require_relative 'app'
29
+
30
+ class Lingo
31
+
32
+ class Srv < App
33
+
34
+ init_app(__FILE__) { %W[-c #{File.join(root, 'lingosrv.cfg')}] }
35
+
36
+ LINGO = Call.new(ARGV).call
37
+ abort 'Something went wrong...' unless LINGO.is_a?(Call)
38
+
39
+ c = LINGO.config.get('meeting/attendees', 'vector_filter', 'src')
40
+ SRC_SEP = c == true ? Attendee::VectorFilter::DEFAULT_SRC_SEP : c
41
+
42
+ get('') { doit }
43
+ get('/') { doit }
44
+ post('/') { doit }
45
+
46
+ def doit
47
+ q = params[:q]
48
+ r = LINGO.talk(q) if q && !q.empty?
49
+
50
+ r = r.inject(Hash.new { |h, k| h[k] = [] }) { |h, s|
51
+ a, b = s.split(SRC_SEP, 2); h[b] << a; h
52
+ } if r && SRC_SEP
53
+
54
+ to_json(q, r)
55
+ end
56
+
57
+ end
58
+
59
+ end
data/lib/lingo/version.rb CHANGED
@@ -4,7 +4,7 @@ class Lingo
4
4
 
5
5
  MAJOR = 1
6
6
  MINOR = 8
7
- TINY = 2
7
+ TINY = 3
8
8
 
9
9
  class << self
10
10
 
@@ -0,0 +1,4 @@
1
+ require 'lingo'
2
+ require 'lingo/web'
3
+
4
+ run Lingo::Web
@@ -0,0 +1,14 @@
1
+ ---
2
+ meeting:
3
+ attendees:
4
+ - text_reader: { files: STDIN }
5
+
6
+ - tokenizer: { }
7
+ - word_searcher: { source: sys-dic, mode: first }
8
+ - decomposer: { source: sys-dic }
9
+ - multi_worder: { source: sys-mul }
10
+ - sequencer: { stopper: PUNC,OTHR }
11
+ - synonymer: { skip: '?,t', source: sys-syn }
12
+
13
+ - vector_filter: { debug: 'true', prompt: '' }
14
+ - text_writer: { ext: STDOUT, sep: "\n" }
Binary file
@@ -0,0 +1,74 @@
1
+ html, body {
2
+ margin-top: 0;
3
+ }
4
+
5
+ a img {
6
+ border: none;
7
+ }
8
+
9
+ form {
10
+ white-space: nowrap;
11
+ }
12
+
13
+ fieldset {
14
+ display: inline;
15
+ width: 47%;
16
+ }
17
+
18
+ textarea {
19
+ width: 98.9%;
20
+ height: 30em;
21
+ background-color: white;
22
+ }
23
+
24
+ #welcome {
25
+ font-size: 70%;
26
+ color: #333333;
27
+ margin-bottom: 0.5em;
28
+ text-align: center;
29
+ }
30
+
31
+ #legend {
32
+ font-size: 75%;
33
+ color: #333333;
34
+ margin-bottom: 0.5em;
35
+ }
36
+
37
+ #legend table {
38
+ margin-left: 2em;
39
+ }
40
+
41
+ #legend th {
42
+ font-size: 110%;
43
+ font-weight: normal;
44
+ font-family: monospace;
45
+ text-align: left;
46
+ }
47
+
48
+ #legend td {
49
+ padding-left: 1em;
50
+ }
51
+
52
+ #footer {
53
+ border-style: solid;
54
+ border-color: black;
55
+ border-width: 1px 0;
56
+ padding: 2px 4px;
57
+ }
58
+
59
+ #footer a {
60
+ font-weight: bold;
61
+ }
62
+
63
+ a:link, a:visited {
64
+ text-decoration: none;
65
+ color: #F35327;
66
+ }
67
+
68
+ fieldset, #footer {
69
+ background-color: #DFDFDF;
70
+ }
71
+
72
+ fieldset.error {
73
+ background-color: #FDB331;
74
+ }
@@ -0,0 +1,92 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml">
5
+ <head>
6
+ <meta http-equiv="content-type" content="application/xhtml+xml; charset=utf-8" />
7
+ <title>Lingo Web - <%= t 'Automatic indexing online', 'Automatische Indexierung Online' %></title>
8
+ <link rel="stylesheet" type="text/css" href="<%= url_for '/lingoweb.css' %>" />
9
+ </head>
10
+ <body>
11
+ <div id="header">
12
+ <a href="http://lex-lingo.de"><img src="<%= url_for '/lingo.png' %>" alt="Lingo" /></a>
13
+ </div>
14
+
15
+ <div id="welcome">
16
+ <strong><%= t 'Welcome to Lingo Web!', 'Willkommen bei Lingo Web!' %></strong>
17
+ <%= t %q{
18
+ Lingo Web provides the opportunity to test the functionality of
19
+ <a href="http://lex-lingo.de">Lingo</a>.<br />
20
+ Lingo is an open source indexing system for research and teaching.
21
+ }, %q{
22
+ Lingo Web bietet die Möglichkeit, die Funktionsweise von
23
+ <a href="http://lex-lingo.de">Lingo</a> zu testen.<br />
24
+ Lingo ist ein frei verfügbares System zur linguistisch und statistisch
25
+ basierten automatischen Indexierung des Deutschen und Englischen.
26
+ } %>
27
+ </div>
28
+
29
+ <div id="main">
30
+ <form action="<%= url_for '/' %>" method="post">
31
+ <div>
32
+ <fieldset><legend><strong><%= t 'Input', 'Eingabe' %></strong></legend>
33
+ <textarea name="q" rows="20" cols="50"><%= @q %></textarea>
34
+ </fieldset>
35
+
36
+ <fieldset><legend><strong><%= t 'Output', 'Ausgabe' %></strong></legend>
37
+ <textarea readonly="readonly" rows="20" cols="50"><%= @r %></textarea>
38
+ </fieldset>
39
+
40
+ <br />
41
+
42
+ <strong><%= t 'Language', 'Sprache' %></strong> = <select name="l">
43
+ <% for l in LANGS %>
44
+ <option value="<%= l %>"<%= ' selected="selected"' if l == @l %>><%= l %></option>
45
+ <% end %>
46
+ </select>
47
+
48
+ <br />
49
+ <br />
50
+
51
+ <input type="submit" value="<%= t 'Start processing...', 'Verarbeitung starten...' %>"></input> |
52
+ <input type="reset" value="<%= t 'Reset form', 'Formular zurücksetzen' %>"></input> |
53
+ <a href="<%= url_for '/' %>"><%= t 'New request', 'Neue Anfrage' %></a>
54
+ </div>
55
+ </form>
56
+
57
+ <br />
58
+ </div>
59
+
60
+ <div id="legend">
61
+ <strong><%= t 'Legend', 'Legende' %></strong>:
62
+ <table>
63
+ <tr><th>s </th><td><%= t 'Noun', 'Substantiv' %></td></tr>
64
+ <tr><th>a </th><td><%= t 'Adjective', 'Adjektiv' %></td></tr>
65
+ <tr><th>v </th><td><%= t 'Verb', 'Verb' %></td></tr>
66
+ <tr><th>e </th><td><%= t 'Proper name', 'Eigenname' %></td></tr>
67
+ <tr><th>w </th><td><%= t 'Word class without suffixes', 'Wortklasse ohne Suffixe' %></td></tr>
68
+ <tr><th>t </th><td><%= t 'Word class without suffixes (e.g. high frequency terms)', 'Wortklasse ohne Suffixe (z.B. Hochfrequenzterme)' %></td></tr>
69
+ <tr><th>y </th><td><%= t 'Synonym', 'Synonym' %></td></tr>
70
+ <tr><th>q (=SEQ)</th><td><%= t 'Sequence (algorithmically identified phrase)', 'Sequenz (algorithmisch erkannter Mehrwortbegriff)' %></td></tr>
71
+ <tr><th>m (=MUL)</th><td><%= t 'Phrase', 'Mehrwortbegriff' %></td></tr>
72
+ <tr><th>k (=KOM)</th><td><%= t 'Compound', 'Kompositum' %></td></tr>
73
+ <tr><th>+ </th><td><%= t 'Part of a compound', 'Kompositum-Bestandteil' %></td></tr>
74
+ <tr><th>x+ </th><td><%= t 'Unknown part of a hyphenated compound', 'unbekannter Kompositum-Bestandteil einer Bindestrich-Konstruktion' %></td></tr>
75
+ <tr><th>? </th><td><%= t 'Unknown word', 'unbekanntes Wort' %></td></tr>
76
+ <tr><th>MU? </th><td><%= t 'Part of a phrase (unknown word)', 'Mehrwortbestandteil (unbekanntes Wort)' %></td></tr>
77
+ <tr><th>HELP </th><td><%= t 'e.g. Special characters', 'z.B. unbekanntes Sonderzeichen' %></td></tr>
78
+ <tr><th>ABRV </th><td><%= t 'Possible abbreviation with a full stop in the middle', 'mögliche Abk. mit eingeschlossenem Punkt (z.B. "Ausst.Kat")' %></td></tr>
79
+ <tr><th>PUNC </th><td><%= t 'Punctuation etc.', 'Satzzeichen etc.' %></td></tr>
80
+ <tr><th>OTHR </th><td><%= t 'Other character', 'Sonstiges Zeichen' %></td></tr>
81
+ <tr><th>URLS </th><td><%= t 'URL', 'URL' %></td></tr>
82
+ <tr><th>NUMS </th><td><%= t 'Number', 'Zahl' %></td></tr>
83
+ </table>
84
+ </div>
85
+
86
+ <div id="footer">
87
+ <em>powered by</em> <a href="http://lex-lingo.de">Lingo</a>
88
+ <em>and</em> <a href="http://www.sinatrarb.com">Sinatra</a>
89
+ -- <strong>v<%= Lingo::VERSION %></strong>
90
+ </div>
91
+ </body>
92
+ </html>
data/lib/lingo/web.rb ADDED
@@ -0,0 +1,94 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'json'
28
+ require 'nuggets/util/ruby'
29
+
30
+ require_relative 'app'
31
+
32
+ class Lingo
33
+
34
+ class Web < App
35
+
36
+ init_app(__FILE__)
37
+
38
+ UILANGS, LANGS = %w[en de], Lingo.list(:lang).map! { |lang|
39
+ lang[%r{.*/(\w+)\.}, 1]
40
+ }.uniq.sort!
41
+
42
+ auth, cfg = %w[auth cfg].map! { |ext|
43
+ File.join(root, "lingoweb.#{ext}")
44
+ }
45
+
46
+ if File.readable?(auth)
47
+ c = File.read(auth).chomp.split(':', 2)
48
+ use(Rack::Auth::Basic) { |*b| b == c } unless c.empty?
49
+ end
50
+
51
+ LINGO = Hash.new { |h, k| h[k] = Lingo.call(cfg, ['-l', k]) }
52
+
53
+ before do
54
+ @uilang = if hal = env['HTTP_ACCEPT_LANGUAGE']
55
+ hals = hal.split(',').map { |l| l.split('-').first.strip }
56
+ (hals & UILANGS).first
57
+ end || UILANGS.first
58
+
59
+ @q = params[:q]
60
+ @l = params[:l] || @uilang
61
+ @l = LANGS.first unless LANGS.include?(@l)
62
+ end
63
+
64
+ get('') { redirect url_for('/') }
65
+ get('/') { doit }
66
+ post('/') { doit }
67
+
68
+ helpers do
69
+ def url_for(path)
70
+ "#{request.script_name}#{path}"
71
+ end
72
+
73
+ def t(*t)
74
+ (i = UILANGS.index(@uilang)) && t[i] || t.first
75
+ end
76
+ end
77
+
78
+ def doit
79
+ @r = LINGO[@l].talk(@q) { |_| _ } if @q && !@q.empty?
80
+
81
+ case params[:f]
82
+ when 'json'
83
+ to_json(@q, @r)
84
+ when 'text'
85
+ @r && @r.join("\n")
86
+ else
87
+ @r &&= @r.join("\n")
88
+ erb :index
89
+ end
90
+ end
91
+
92
+ end
93
+
94
+ end
data/lib/lingo.rb CHANGED
@@ -24,13 +24,12 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'unicode'
27
28
  require 'stringio'
28
29
  require 'pathname'
29
30
  require 'fileutils'
30
- require 'benchmark'
31
31
  require 'nuggets/file/ext'
32
32
  require 'nuggets/env/user_home'
33
- require 'nuggets/numeric/duration'
34
33
  require 'nuggets/string/camelscore'
35
34
 
36
35
  class Lingo
@@ -60,7 +59,7 @@ class Lingo
60
59
  }
61
60
 
62
61
  # Default encoding
63
- ENC = 'UTF-8'.freeze
62
+ Encoding.default_external = Encoding.default_internal = ENC = 'UTF-8'.freeze
64
63
 
65
64
  SEP_RE = %r{[; ,|]}
66
65
 
@@ -76,17 +75,18 @@ class Lingo
76
75
 
77
76
  def list(type, options = {})
78
77
  options = options_for(type, options)
79
- path = path_for(options)
80
78
 
81
- glob = file_with_ext('*', options)
79
+ glob, list = file_with_ext('*', options), []
82
80
  glob = File.join('??', glob) if type == :dict
83
81
 
84
- [].tap { |list| walk(path, options) { |dir|
82
+ walk(path = path_for(options), options) { |dir|
85
83
  Dir[File.join(dir, glob)].sort!.each { |file|
86
84
  pn = Pathname.new(file)
87
85
  list << realpath_for(pn, path) if pn.file?
88
86
  }
89
- } }
87
+ }
88
+
89
+ list
90
90
  end
91
91
 
92
92
  def find(type, file, options = {})
@@ -130,7 +130,7 @@ class Lingo
130
130
  private
131
131
 
132
132
  def include_path(path, pre = false)
133
- PATH.insert(pre ? 0 : -1, *path.map!(&:to_s))
133
+ PATH.insert(pre ? 0 : -1, *path.map! { |i| i.to_s })
134
134
  end
135
135
 
136
136
  def find_file(file, path, options)
@@ -223,13 +223,15 @@ class Lingo
223
223
 
224
224
  end
225
225
 
226
- attr_reader :dictionaries, :report_status, :report_time
227
-
228
226
  def initialize(*args)
227
+ Debug.ps(:lingo_new)
228
+
229
229
  @config_args = args
230
230
  reset(false)
231
231
  end
232
232
 
233
+ attr_reader :dictionaries
234
+
233
235
  def config
234
236
  @config ||= Config.new(*@config_args)
235
237
  end
@@ -252,8 +254,12 @@ class Lingo
252
254
  end
253
255
 
254
256
  def talk
255
- invite
256
- start
257
+ Debug.profile(config['profile']) {
258
+ invite
259
+ start
260
+ }
261
+
262
+ Debug.ps(:lingo_talk)
257
263
  ensure
258
264
  reset
259
265
  end
@@ -288,38 +294,30 @@ class Lingo
288
294
  } }
289
295
  end
290
296
 
291
- def start(report_status = config['status'], report_time = config['perfmon'])
292
- @report_status, @report_time = report_status, report_time
293
-
294
- time = Benchmark.realtime {
295
- @attendees.first.listen(AgendaItem.new(Attendee::STR_CMD_TALK))
296
- }
297
-
298
- if report_status || report_time
299
- warn "Require protocol...\n#{separator = '-' * 61}"
300
- @attendees.first.listen(AgendaItem.new(Attendee::STR_CMD_STATUS))
301
- warn "#{separator}\nThe duration of the meeting was #{time.to_hms(2)}"
302
- end
297
+ def start
298
+ @attendees.first.listen(AgendaItem.new(Attendee::STR_CMD_TALK))
303
299
  end
304
300
 
305
301
  def reset(close = true)
306
- dictionaries.each(&:close) if close
302
+ dictionaries.each { |i| i.close } if close
307
303
  @dictionaries, @attendees = [], []
308
304
  @lexical_hash = Hash.new { |h, k| h[k] = Language::LexicalHash.new(k, self) }
309
305
  end
310
306
 
311
307
  def warn(*msg)
312
- config.stderr.puts(*msg)
308
+ config.warn(*msg)
309
+ end
310
+
311
+ def deprecate(old, new, obj = self)
312
+ config.deprecate(old, new, obj)
313
313
  end
314
314
 
315
315
  end
316
316
 
317
317
  require_relative 'lingo/call'
318
318
  require_relative 'lingo/error'
319
+ require_relative 'lingo/debug'
319
320
  require_relative 'lingo/config'
320
- require_relative 'lingo/core_ext'
321
- require_relative 'lingo/cachable'
322
- require_relative 'lingo/reportable'
323
321
  require_relative 'lingo/agenda_item'
324
322
  require_relative 'lingo/show_progress'
325
323
  require_relative 'lingo/database'
data/lingo.cfg CHANGED
@@ -30,7 +30,7 @@ meeting:
30
30
  # Schreibweisen variieren und erneut suchen
31
31
  # - variator: { source: sys-dic }
32
32
 
33
- # Bindestrichergänzungen rekonstruieren
33
+ # Worttrennungen aufheben
34
34
  # - dehyphenizer: { source: sys-dic }
35
35
 
36
36
  # Wortstämme für nicht erkannte Wörter einfügen
data/lir.cfg CHANGED
@@ -26,9 +26,21 @@ meeting:
26
26
  # Zeile in einzelnen Sinnbestandteile (Token) zerlegen
27
27
  - tokenizer: { }
28
28
 
29
+ # Abkürzungen erkennen und auflösen
30
+ # - abbreviator: { source: sys-abk }
31
+
29
32
  # Verbleibende Token im Wörterbuch suchen
30
33
  - word_searcher: { source: sys-dic, mode: first }
31
34
 
35
+ # Schreibweisen variieren und erneut suchen
36
+ # - variator: { source: sys-dic }
37
+
38
+ # Worttrennungen aufheben
39
+ # - dehyphenizer: { source: sys-dic }
40
+
41
+ # Wortstämme für nicht erkannte Wörter einfügen
42
+ # - stemmer: { }
43
+
32
44
  # Nicht erkannte Wörter auf Kompositum testen
33
45
  - decomposer: { source: sys-dic }
34
46
 
@@ -60,10 +72,22 @@ meeting:
60
72
  - noneword_filter: { in: syn }
61
73
  - text_writer: { ext: non, sep: '|' }
62
74
 
75
+ # Erstelle Datei mit Endung .ste für Wortstämme
76
+ - vector_filter: { in: syn, lexicals: z }
77
+ - text_writer: { ext: ste, sep: '|' }
78
+
63
79
  # Erstelle Datei mit Endung .vec für erkannte Indexterme
64
80
  - vector_filter: { in: syn, lexicals: '^[ksavem]$' }
65
81
  - text_writer: { ext: vec, sep: '|' }
66
82
 
83
+ # Erstelle Datei mit Endung .ven für erkannte Indexterme mit absoluter Häufigkeit
84
+ - vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: term_abs }
85
+ - text_writer: { ext: ven, sep: '|' }
86
+
87
+ # Erstelle Datei mit Endung .ver für erkannte Indexterme mit relativer Häufigkeit
88
+ - vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: term_rel }
89
+ - text_writer: { ext: ver, sep: '|' }
90
+
67
91
  # Erstelle Datei mit Endung .mul für erkannte Mehrwortgruppen
68
92
  - vector_filter: { in: syn, lexicals: m }
69
93
  - text_writer: { ext: mul, sep: '|' }