lingo 1.8.2 → 1.8.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +33 -0
- data/README +6 -5
- data/Rakefile +6 -4
- data/{lib/lingo/cachable.rb → bin/lingosrv} +30 -58
- data/bin/lingoweb +30 -0
- data/de.lang +2 -13
- data/en/lingo-irr.txt +266 -0
- data/en/lingo-wdn.txt +37319 -0
- data/en.lang +2 -15
- data/lib/lingo/app.rb +82 -0
- data/lib/lingo/attendee/abbreviator.rb +22 -26
- data/lib/lingo/attendee/debugger.rb +8 -4
- data/lib/lingo/attendee/decomposer.rb +0 -1
- data/lib/lingo/attendee/dehyphenizer.rb +2 -2
- data/lib/lingo/attendee/multi_worder.rb +20 -13
- data/lib/lingo/attendee/noneword_filter.rb +2 -7
- data/lib/lingo/attendee/sequencer.rb +43 -19
- data/lib/lingo/attendee/stemmer/porter.rb +2 -2
- data/lib/lingo/attendee/stemmer.rb +1 -1
- data/lib/lingo/attendee/synonymer.rb +1 -9
- data/lib/lingo/attendee/text_reader.rb +42 -29
- data/lib/lingo/attendee/text_writer.rb +3 -6
- data/lib/lingo/attendee/tokenizer.rb +87 -69
- data/lib/lingo/attendee/variator.rb +7 -5
- data/lib/lingo/attendee/vector_filter.rb +11 -11
- data/lib/lingo/attendee/word_searcher.rb +1 -9
- data/lib/lingo/attendee.rb +24 -105
- data/lib/lingo/buffered_attendee.rb +2 -9
- data/lib/lingo/call.rb +18 -13
- data/lib/lingo/cli.rb +5 -10
- data/lib/lingo/config.rb +40 -7
- data/lib/lingo/ctl.rb +69 -57
- data/lib/lingo/database/hash_store.rb +9 -4
- data/lib/lingo/database/sdbm_store.rb +4 -7
- data/lib/lingo/database/source/multi_key.rb +1 -1
- data/lib/lingo/database/source/multi_value.rb +1 -1
- data/lib/lingo/database/source.rb +2 -20
- data/lib/lingo/database.rb +30 -19
- data/lib/lingo/debug.rb +79 -0
- data/lib/lingo/{core_ext.rb → language/char.rb} +43 -42
- data/lib/lingo/language/dictionary.rb +38 -46
- data/lib/lingo/language/grammar.rb +40 -57
- data/lib/lingo/language/lexical.rb +4 -7
- data/lib/lingo/language/lexical_hash.rb +17 -35
- data/lib/lingo/language/token.rb +4 -0
- data/lib/lingo/language/word.rb +7 -8
- data/lib/lingo/language/word_form.rb +4 -4
- data/lib/lingo/language.rb +2 -1
- data/lib/lingo/srv/config.ru +4 -0
- data/lib/lingo/srv/lingosrv.cfg +14 -0
- data/lib/lingo/{reportable.rb → srv.rb} +59 -61
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web/config.ru +4 -0
- data/lib/lingo/web/lingoweb.cfg +14 -0
- data/lib/lingo/web/public/lingo.png +0 -0
- data/lib/lingo/web/public/lingoweb.css +74 -0
- data/lib/lingo/web/views/index.erb +92 -0
- data/lib/lingo/web.rb +94 -0
- data/lib/lingo.rb +27 -29
- data/lingo.cfg +1 -1
- data/lir.cfg +24 -0
- data/ru/lingo-dic.txt +22342 -0
- data/ru/lingo-mul.txt +5151 -0
- data/ru/lingo-syn.txt +0 -0
- data/ru.lang +99 -0
- data/test/attendee/ts_sequencer.rb +2 -2
- data/test/attendee/ts_text_reader.rb +36 -2
- data/test/attendee/ts_text_writer.rb +6 -6
- data/test/lir.vec +3 -3
- data/test/test_helper.rb +104 -102
- data/test/ts_database.rb +1 -1
- data/test/ts_language.rb +55 -96
- data/txt/artikel-ru.txt +45 -0
- data/txt/lir.txt +1 -3
- metadata +143 -83
- data/TODO +0 -23
@@ -1,61 +1,59 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
#--
|
4
|
-
###############################################################################
|
5
|
-
# #
|
6
|
-
# Lingo -- A full-featured automatic indexing system #
|
7
|
-
# #
|
8
|
-
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
-
# #
|
11
|
-
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
-
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
-
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
-
# any later version. #
|
15
|
-
# #
|
16
|
-
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
-
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
-
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
-
# more details. #
|
20
|
-
# #
|
21
|
-
# You should have received a copy of the GNU Affero General Public License #
|
22
|
-
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
-
# #
|
24
|
-
###############################################################################
|
25
|
-
#++
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
def
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'json'
|
28
|
+
require_relative 'app'
|
29
|
+
|
30
|
+
class Lingo
|
31
|
+
|
32
|
+
class Srv < App
|
33
|
+
|
34
|
+
init_app(__FILE__) { %W[-c #{File.join(root, 'lingosrv.cfg')}] }
|
35
|
+
|
36
|
+
LINGO = Call.new(ARGV).call
|
37
|
+
abort 'Something went wrong...' unless LINGO.is_a?(Call)
|
38
|
+
|
39
|
+
c = LINGO.config.get('meeting/attendees', 'vector_filter', 'src')
|
40
|
+
SRC_SEP = c == true ? Attendee::VectorFilter::DEFAULT_SRC_SEP : c
|
41
|
+
|
42
|
+
get('') { doit }
|
43
|
+
get('/') { doit }
|
44
|
+
post('/') { doit }
|
45
|
+
|
46
|
+
def doit
|
47
|
+
q = params[:q]
|
48
|
+
r = LINGO.talk(q) if q && !q.empty?
|
49
|
+
|
50
|
+
r = r.inject(Hash.new { |h, k| h[k] = [] }) { |h, s|
|
51
|
+
a, b = s.split(SRC_SEP, 2); h[b] << a; h
|
52
|
+
} if r && SRC_SEP
|
53
|
+
|
54
|
+
to_json(q, r)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
data/lib/lingo/version.rb
CHANGED
@@ -0,0 +1,14 @@
|
|
1
|
+
---
|
2
|
+
meeting:
|
3
|
+
attendees:
|
4
|
+
- text_reader: { files: STDIN }
|
5
|
+
|
6
|
+
- tokenizer: { }
|
7
|
+
- word_searcher: { source: sys-dic, mode: first }
|
8
|
+
- decomposer: { source: sys-dic }
|
9
|
+
- multi_worder: { source: sys-mul }
|
10
|
+
- sequencer: { stopper: PUNC,OTHR }
|
11
|
+
- synonymer: { skip: '?,t', source: sys-syn }
|
12
|
+
|
13
|
+
- vector_filter: { debug: 'true', prompt: '' }
|
14
|
+
- text_writer: { ext: STDOUT, sep: "\n" }
|
Binary file
|
@@ -0,0 +1,74 @@
|
|
1
|
+
html, body {
|
2
|
+
margin-top: 0;
|
3
|
+
}
|
4
|
+
|
5
|
+
a img {
|
6
|
+
border: none;
|
7
|
+
}
|
8
|
+
|
9
|
+
form {
|
10
|
+
white-space: nowrap;
|
11
|
+
}
|
12
|
+
|
13
|
+
fieldset {
|
14
|
+
display: inline;
|
15
|
+
width: 47%;
|
16
|
+
}
|
17
|
+
|
18
|
+
textarea {
|
19
|
+
width: 98.9%;
|
20
|
+
height: 30em;
|
21
|
+
background-color: white;
|
22
|
+
}
|
23
|
+
|
24
|
+
#welcome {
|
25
|
+
font-size: 70%;
|
26
|
+
color: #333333;
|
27
|
+
margin-bottom: 0.5em;
|
28
|
+
text-align: center;
|
29
|
+
}
|
30
|
+
|
31
|
+
#legend {
|
32
|
+
font-size: 75%;
|
33
|
+
color: #333333;
|
34
|
+
margin-bottom: 0.5em;
|
35
|
+
}
|
36
|
+
|
37
|
+
#legend table {
|
38
|
+
margin-left: 2em;
|
39
|
+
}
|
40
|
+
|
41
|
+
#legend th {
|
42
|
+
font-size: 110%;
|
43
|
+
font-weight: normal;
|
44
|
+
font-family: monospace;
|
45
|
+
text-align: left;
|
46
|
+
}
|
47
|
+
|
48
|
+
#legend td {
|
49
|
+
padding-left: 1em;
|
50
|
+
}
|
51
|
+
|
52
|
+
#footer {
|
53
|
+
border-style: solid;
|
54
|
+
border-color: black;
|
55
|
+
border-width: 1px 0;
|
56
|
+
padding: 2px 4px;
|
57
|
+
}
|
58
|
+
|
59
|
+
#footer a {
|
60
|
+
font-weight: bold;
|
61
|
+
}
|
62
|
+
|
63
|
+
a:link, a:visited {
|
64
|
+
text-decoration: none;
|
65
|
+
color: #F35327;
|
66
|
+
}
|
67
|
+
|
68
|
+
fieldset, #footer {
|
69
|
+
background-color: #DFDFDF;
|
70
|
+
}
|
71
|
+
|
72
|
+
fieldset.error {
|
73
|
+
background-color: #FDB331;
|
74
|
+
}
|
@@ -0,0 +1,92 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
5
|
+
<head>
|
6
|
+
<meta http-equiv="content-type" content="application/xhtml+xml; charset=utf-8" />
|
7
|
+
<title>Lingo Web - <%= t 'Automatic indexing online', 'Automatische Indexierung Online' %></title>
|
8
|
+
<link rel="stylesheet" type="text/css" href="<%= url_for '/lingoweb.css' %>" />
|
9
|
+
</head>
|
10
|
+
<body>
|
11
|
+
<div id="header">
|
12
|
+
<a href="http://lex-lingo.de"><img src="<%= url_for '/lingo.png' %>" alt="Lingo" /></a>
|
13
|
+
</div>
|
14
|
+
|
15
|
+
<div id="welcome">
|
16
|
+
<strong><%= t 'Welcome to Lingo Web!', 'Willkommen bei Lingo Web!' %></strong>
|
17
|
+
<%= t %q{
|
18
|
+
Lingo Web provides the opportunity to test the functionality of
|
19
|
+
<a href="http://lex-lingo.de">Lingo</a>.<br />
|
20
|
+
Lingo is an open source indexing system for research and teaching.
|
21
|
+
}, %q{
|
22
|
+
Lingo Web bietet die Möglichkeit, die Funktionsweise von
|
23
|
+
<a href="http://lex-lingo.de">Lingo</a> zu testen.<br />
|
24
|
+
Lingo ist ein frei verfügbares System zur linguistisch und statistisch
|
25
|
+
basierten automatischen Indexierung des Deutschen und Englischen.
|
26
|
+
} %>
|
27
|
+
</div>
|
28
|
+
|
29
|
+
<div id="main">
|
30
|
+
<form action="<%= url_for '/' %>" method="post">
|
31
|
+
<div>
|
32
|
+
<fieldset><legend><strong><%= t 'Input', 'Eingabe' %></strong></legend>
|
33
|
+
<textarea name="q" rows="20" cols="50"><%= @q %></textarea>
|
34
|
+
</fieldset>
|
35
|
+
|
36
|
+
<fieldset><legend><strong><%= t 'Output', 'Ausgabe' %></strong></legend>
|
37
|
+
<textarea readonly="readonly" rows="20" cols="50"><%= @r %></textarea>
|
38
|
+
</fieldset>
|
39
|
+
|
40
|
+
<br />
|
41
|
+
|
42
|
+
<strong><%= t 'Language', 'Sprache' %></strong> = <select name="l">
|
43
|
+
<% for l in LANGS %>
|
44
|
+
<option value="<%= l %>"<%= ' selected="selected"' if l == @l %>><%= l %></option>
|
45
|
+
<% end %>
|
46
|
+
</select>
|
47
|
+
|
48
|
+
<br />
|
49
|
+
<br />
|
50
|
+
|
51
|
+
<input type="submit" value="<%= t 'Start processing...', 'Verarbeitung starten...' %>"></input> |
|
52
|
+
<input type="reset" value="<%= t 'Reset form', 'Formular zurücksetzen' %>"></input> |
|
53
|
+
<a href="<%= url_for '/' %>"><%= t 'New request', 'Neue Anfrage' %></a>
|
54
|
+
</div>
|
55
|
+
</form>
|
56
|
+
|
57
|
+
<br />
|
58
|
+
</div>
|
59
|
+
|
60
|
+
<div id="legend">
|
61
|
+
<strong><%= t 'Legend', 'Legende' %></strong>:
|
62
|
+
<table>
|
63
|
+
<tr><th>s </th><td><%= t 'Noun', 'Substantiv' %></td></tr>
|
64
|
+
<tr><th>a </th><td><%= t 'Adjective', 'Adjektiv' %></td></tr>
|
65
|
+
<tr><th>v </th><td><%= t 'Verb', 'Verb' %></td></tr>
|
66
|
+
<tr><th>e </th><td><%= t 'Proper name', 'Eigenname' %></td></tr>
|
67
|
+
<tr><th>w </th><td><%= t 'Word class without suffixes', 'Wortklasse ohne Suffixe' %></td></tr>
|
68
|
+
<tr><th>t </th><td><%= t 'Word class without suffixes (e.g. high frequency terms)', 'Wortklasse ohne Suffixe (z.B. Hochfrequenzterme)' %></td></tr>
|
69
|
+
<tr><th>y </th><td><%= t 'Synonym', 'Synonym' %></td></tr>
|
70
|
+
<tr><th>q (=SEQ)</th><td><%= t 'Sequence (algorithmically identified phrase)', 'Sequenz (algorithmisch erkannter Mehrwortbegriff)' %></td></tr>
|
71
|
+
<tr><th>m (=MUL)</th><td><%= t 'Phrase', 'Mehrwortbegriff' %></td></tr>
|
72
|
+
<tr><th>k (=KOM)</th><td><%= t 'Compound', 'Kompositum' %></td></tr>
|
73
|
+
<tr><th>+ </th><td><%= t 'Part of a compound', 'Kompositum-Bestandteil' %></td></tr>
|
74
|
+
<tr><th>x+ </th><td><%= t 'Unknown part of a hyphenated compound', 'unbekannter Kompositum-Bestandteil einer Bindestrich-Konstruktion' %></td></tr>
|
75
|
+
<tr><th>? </th><td><%= t 'Unknown word', 'unbekanntes Wort' %></td></tr>
|
76
|
+
<tr><th>MU? </th><td><%= t 'Part of a phrase (unknown word)', 'Mehrwortbestandteil (unbekanntes Wort)' %></td></tr>
|
77
|
+
<tr><th>HELP </th><td><%= t 'e.g. Special characters', 'z.B. unbekanntes Sonderzeichen' %></td></tr>
|
78
|
+
<tr><th>ABRV </th><td><%= t 'Possible abbreviation with a full stop in the middle', 'mögliche Abk. mit eingeschlossenem Punkt (z.B. "Ausst.Kat")' %></td></tr>
|
79
|
+
<tr><th>PUNC </th><td><%= t 'Punctuation etc.', 'Satzzeichen etc.' %></td></tr>
|
80
|
+
<tr><th>OTHR </th><td><%= t 'Other character', 'Sonstiges Zeichen' %></td></tr>
|
81
|
+
<tr><th>URLS </th><td><%= t 'URL', 'URL' %></td></tr>
|
82
|
+
<tr><th>NUMS </th><td><%= t 'Number', 'Zahl' %></td></tr>
|
83
|
+
</table>
|
84
|
+
</div>
|
85
|
+
|
86
|
+
<div id="footer">
|
87
|
+
<em>powered by</em> <a href="http://lex-lingo.de">Lingo</a>
|
88
|
+
<em>and</em> <a href="http://www.sinatrarb.com">Sinatra</a>
|
89
|
+
-- <strong>v<%= Lingo::VERSION %></strong>
|
90
|
+
</div>
|
91
|
+
</body>
|
92
|
+
</html>
|
data/lib/lingo/web.rb
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'json'
|
28
|
+
require 'nuggets/util/ruby'
|
29
|
+
|
30
|
+
require_relative 'app'
|
31
|
+
|
32
|
+
class Lingo
|
33
|
+
|
34
|
+
class Web < App
|
35
|
+
|
36
|
+
init_app(__FILE__)
|
37
|
+
|
38
|
+
UILANGS, LANGS = %w[en de], Lingo.list(:lang).map! { |lang|
|
39
|
+
lang[%r{.*/(\w+)\.}, 1]
|
40
|
+
}.uniq.sort!
|
41
|
+
|
42
|
+
auth, cfg = %w[auth cfg].map! { |ext|
|
43
|
+
File.join(root, "lingoweb.#{ext}")
|
44
|
+
}
|
45
|
+
|
46
|
+
if File.readable?(auth)
|
47
|
+
c = File.read(auth).chomp.split(':', 2)
|
48
|
+
use(Rack::Auth::Basic) { |*b| b == c } unless c.empty?
|
49
|
+
end
|
50
|
+
|
51
|
+
LINGO = Hash.new { |h, k| h[k] = Lingo.call(cfg, ['-l', k]) }
|
52
|
+
|
53
|
+
before do
|
54
|
+
@uilang = if hal = env['HTTP_ACCEPT_LANGUAGE']
|
55
|
+
hals = hal.split(',').map { |l| l.split('-').first.strip }
|
56
|
+
(hals & UILANGS).first
|
57
|
+
end || UILANGS.first
|
58
|
+
|
59
|
+
@q = params[:q]
|
60
|
+
@l = params[:l] || @uilang
|
61
|
+
@l = LANGS.first unless LANGS.include?(@l)
|
62
|
+
end
|
63
|
+
|
64
|
+
get('') { redirect url_for('/') }
|
65
|
+
get('/') { doit }
|
66
|
+
post('/') { doit }
|
67
|
+
|
68
|
+
helpers do
|
69
|
+
def url_for(path)
|
70
|
+
"#{request.script_name}#{path}"
|
71
|
+
end
|
72
|
+
|
73
|
+
def t(*t)
|
74
|
+
(i = UILANGS.index(@uilang)) && t[i] || t.first
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def doit
|
79
|
+
@r = LINGO[@l].talk(@q) { |_| _ } if @q && !@q.empty?
|
80
|
+
|
81
|
+
case params[:f]
|
82
|
+
when 'json'
|
83
|
+
to_json(@q, @r)
|
84
|
+
when 'text'
|
85
|
+
@r && @r.join("\n")
|
86
|
+
else
|
87
|
+
@r &&= @r.join("\n")
|
88
|
+
erb :index
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
data/lib/lingo.rb
CHANGED
@@ -24,13 +24,12 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'unicode'
|
27
28
|
require 'stringio'
|
28
29
|
require 'pathname'
|
29
30
|
require 'fileutils'
|
30
|
-
require 'benchmark'
|
31
31
|
require 'nuggets/file/ext'
|
32
32
|
require 'nuggets/env/user_home'
|
33
|
-
require 'nuggets/numeric/duration'
|
34
33
|
require 'nuggets/string/camelscore'
|
35
34
|
|
36
35
|
class Lingo
|
@@ -60,7 +59,7 @@ class Lingo
|
|
60
59
|
}
|
61
60
|
|
62
61
|
# Default encoding
|
63
|
-
ENC = 'UTF-8'.freeze
|
62
|
+
Encoding.default_external = Encoding.default_internal = ENC = 'UTF-8'.freeze
|
64
63
|
|
65
64
|
SEP_RE = %r{[; ,|]}
|
66
65
|
|
@@ -76,17 +75,18 @@ class Lingo
|
|
76
75
|
|
77
76
|
def list(type, options = {})
|
78
77
|
options = options_for(type, options)
|
79
|
-
path = path_for(options)
|
80
78
|
|
81
|
-
glob = file_with_ext('*', options)
|
79
|
+
glob, list = file_with_ext('*', options), []
|
82
80
|
glob = File.join('??', glob) if type == :dict
|
83
81
|
|
84
|
-
|
82
|
+
walk(path = path_for(options), options) { |dir|
|
85
83
|
Dir[File.join(dir, glob)].sort!.each { |file|
|
86
84
|
pn = Pathname.new(file)
|
87
85
|
list << realpath_for(pn, path) if pn.file?
|
88
86
|
}
|
89
|
-
}
|
87
|
+
}
|
88
|
+
|
89
|
+
list
|
90
90
|
end
|
91
91
|
|
92
92
|
def find(type, file, options = {})
|
@@ -130,7 +130,7 @@ class Lingo
|
|
130
130
|
private
|
131
131
|
|
132
132
|
def include_path(path, pre = false)
|
133
|
-
PATH.insert(pre ? 0 : -1, *path.map!
|
133
|
+
PATH.insert(pre ? 0 : -1, *path.map! { |i| i.to_s })
|
134
134
|
end
|
135
135
|
|
136
136
|
def find_file(file, path, options)
|
@@ -223,13 +223,15 @@ class Lingo
|
|
223
223
|
|
224
224
|
end
|
225
225
|
|
226
|
-
attr_reader :dictionaries, :report_status, :report_time
|
227
|
-
|
228
226
|
def initialize(*args)
|
227
|
+
Debug.ps(:lingo_new)
|
228
|
+
|
229
229
|
@config_args = args
|
230
230
|
reset(false)
|
231
231
|
end
|
232
232
|
|
233
|
+
attr_reader :dictionaries
|
234
|
+
|
233
235
|
def config
|
234
236
|
@config ||= Config.new(*@config_args)
|
235
237
|
end
|
@@ -252,8 +254,12 @@ class Lingo
|
|
252
254
|
end
|
253
255
|
|
254
256
|
def talk
|
255
|
-
|
256
|
-
|
257
|
+
Debug.profile(config['profile']) {
|
258
|
+
invite
|
259
|
+
start
|
260
|
+
}
|
261
|
+
|
262
|
+
Debug.ps(:lingo_talk)
|
257
263
|
ensure
|
258
264
|
reset
|
259
265
|
end
|
@@ -288,38 +294,30 @@ class Lingo
|
|
288
294
|
} }
|
289
295
|
end
|
290
296
|
|
291
|
-
def start
|
292
|
-
@
|
293
|
-
|
294
|
-
time = Benchmark.realtime {
|
295
|
-
@attendees.first.listen(AgendaItem.new(Attendee::STR_CMD_TALK))
|
296
|
-
}
|
297
|
-
|
298
|
-
if report_status || report_time
|
299
|
-
warn "Require protocol...\n#{separator = '-' * 61}"
|
300
|
-
@attendees.first.listen(AgendaItem.new(Attendee::STR_CMD_STATUS))
|
301
|
-
warn "#{separator}\nThe duration of the meeting was #{time.to_hms(2)}"
|
302
|
-
end
|
297
|
+
def start
|
298
|
+
@attendees.first.listen(AgendaItem.new(Attendee::STR_CMD_TALK))
|
303
299
|
end
|
304
300
|
|
305
301
|
def reset(close = true)
|
306
|
-
dictionaries.each
|
302
|
+
dictionaries.each { |i| i.close } if close
|
307
303
|
@dictionaries, @attendees = [], []
|
308
304
|
@lexical_hash = Hash.new { |h, k| h[k] = Language::LexicalHash.new(k, self) }
|
309
305
|
end
|
310
306
|
|
311
307
|
def warn(*msg)
|
312
|
-
config.
|
308
|
+
config.warn(*msg)
|
309
|
+
end
|
310
|
+
|
311
|
+
def deprecate(old, new, obj = self)
|
312
|
+
config.deprecate(old, new, obj)
|
313
313
|
end
|
314
314
|
|
315
315
|
end
|
316
316
|
|
317
317
|
require_relative 'lingo/call'
|
318
318
|
require_relative 'lingo/error'
|
319
|
+
require_relative 'lingo/debug'
|
319
320
|
require_relative 'lingo/config'
|
320
|
-
require_relative 'lingo/core_ext'
|
321
|
-
require_relative 'lingo/cachable'
|
322
|
-
require_relative 'lingo/reportable'
|
323
321
|
require_relative 'lingo/agenda_item'
|
324
322
|
require_relative 'lingo/show_progress'
|
325
323
|
require_relative 'lingo/database'
|
data/lingo.cfg
CHANGED
@@ -30,7 +30,7 @@ meeting:
|
|
30
30
|
# Schreibweisen variieren und erneut suchen
|
31
31
|
# - variator: { source: sys-dic }
|
32
32
|
|
33
|
-
#
|
33
|
+
# Worttrennungen aufheben
|
34
34
|
# - dehyphenizer: { source: sys-dic }
|
35
35
|
|
36
36
|
# Wortstämme für nicht erkannte Wörter einfügen
|
data/lir.cfg
CHANGED
@@ -26,9 +26,21 @@ meeting:
|
|
26
26
|
# Zeile in einzelnen Sinnbestandteile (Token) zerlegen
|
27
27
|
- tokenizer: { }
|
28
28
|
|
29
|
+
# Abkürzungen erkennen und auflösen
|
30
|
+
# - abbreviator: { source: sys-abk }
|
31
|
+
|
29
32
|
# Verbleibende Token im Wörterbuch suchen
|
30
33
|
- word_searcher: { source: sys-dic, mode: first }
|
31
34
|
|
35
|
+
# Schreibweisen variieren und erneut suchen
|
36
|
+
# - variator: { source: sys-dic }
|
37
|
+
|
38
|
+
# Worttrennungen aufheben
|
39
|
+
# - dehyphenizer: { source: sys-dic }
|
40
|
+
|
41
|
+
# Wortstämme für nicht erkannte Wörter einfügen
|
42
|
+
# - stemmer: { }
|
43
|
+
|
32
44
|
# Nicht erkannte Wörter auf Kompositum testen
|
33
45
|
- decomposer: { source: sys-dic }
|
34
46
|
|
@@ -60,10 +72,22 @@ meeting:
|
|
60
72
|
- noneword_filter: { in: syn }
|
61
73
|
- text_writer: { ext: non, sep: '|' }
|
62
74
|
|
75
|
+
# Erstelle Datei mit Endung .ste für Wortstämme
|
76
|
+
- vector_filter: { in: syn, lexicals: z }
|
77
|
+
- text_writer: { ext: ste, sep: '|' }
|
78
|
+
|
63
79
|
# Erstelle Datei mit Endung .vec für erkannte Indexterme
|
64
80
|
- vector_filter: { in: syn, lexicals: '^[ksavem]$' }
|
65
81
|
- text_writer: { ext: vec, sep: '|' }
|
66
82
|
|
83
|
+
# Erstelle Datei mit Endung .ven für erkannte Indexterme mit absoluter Häufigkeit
|
84
|
+
- vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: term_abs }
|
85
|
+
- text_writer: { ext: ven, sep: '|' }
|
86
|
+
|
87
|
+
# Erstelle Datei mit Endung .ver für erkannte Indexterme mit relativer Häufigkeit
|
88
|
+
- vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: term_rel }
|
89
|
+
- text_writer: { ext: ver, sep: '|' }
|
90
|
+
|
67
91
|
# Erstelle Datei mit Endung .mul für erkannte Mehrwortgruppen
|
68
92
|
- vector_filter: { in: syn, lexicals: m }
|
69
93
|
- text_writer: { ext: mul, sep: '|' }
|