lingo 1.8.2 → 1.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +33 -0
- data/README +6 -5
- data/Rakefile +6 -4
- data/{lib/lingo/cachable.rb → bin/lingosrv} +30 -58
- data/bin/lingoweb +30 -0
- data/de.lang +2 -13
- data/en/lingo-irr.txt +266 -0
- data/en/lingo-wdn.txt +37319 -0
- data/en.lang +2 -15
- data/lib/lingo/app.rb +82 -0
- data/lib/lingo/attendee/abbreviator.rb +22 -26
- data/lib/lingo/attendee/debugger.rb +8 -4
- data/lib/lingo/attendee/decomposer.rb +0 -1
- data/lib/lingo/attendee/dehyphenizer.rb +2 -2
- data/lib/lingo/attendee/multi_worder.rb +20 -13
- data/lib/lingo/attendee/noneword_filter.rb +2 -7
- data/lib/lingo/attendee/sequencer.rb +43 -19
- data/lib/lingo/attendee/stemmer/porter.rb +2 -2
- data/lib/lingo/attendee/stemmer.rb +1 -1
- data/lib/lingo/attendee/synonymer.rb +1 -9
- data/lib/lingo/attendee/text_reader.rb +42 -29
- data/lib/lingo/attendee/text_writer.rb +3 -6
- data/lib/lingo/attendee/tokenizer.rb +87 -69
- data/lib/lingo/attendee/variator.rb +7 -5
- data/lib/lingo/attendee/vector_filter.rb +11 -11
- data/lib/lingo/attendee/word_searcher.rb +1 -9
- data/lib/lingo/attendee.rb +24 -105
- data/lib/lingo/buffered_attendee.rb +2 -9
- data/lib/lingo/call.rb +18 -13
- data/lib/lingo/cli.rb +5 -10
- data/lib/lingo/config.rb +40 -7
- data/lib/lingo/ctl.rb +69 -57
- data/lib/lingo/database/hash_store.rb +9 -4
- data/lib/lingo/database/sdbm_store.rb +4 -7
- data/lib/lingo/database/source/multi_key.rb +1 -1
- data/lib/lingo/database/source/multi_value.rb +1 -1
- data/lib/lingo/database/source.rb +2 -20
- data/lib/lingo/database.rb +30 -19
- data/lib/lingo/debug.rb +79 -0
- data/lib/lingo/{core_ext.rb → language/char.rb} +43 -42
- data/lib/lingo/language/dictionary.rb +38 -46
- data/lib/lingo/language/grammar.rb +40 -57
- data/lib/lingo/language/lexical.rb +4 -7
- data/lib/lingo/language/lexical_hash.rb +17 -35
- data/lib/lingo/language/token.rb +4 -0
- data/lib/lingo/language/word.rb +7 -8
- data/lib/lingo/language/word_form.rb +4 -4
- data/lib/lingo/language.rb +2 -1
- data/lib/lingo/srv/config.ru +4 -0
- data/lib/lingo/srv/lingosrv.cfg +14 -0
- data/lib/lingo/{reportable.rb → srv.rb} +59 -61
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web/config.ru +4 -0
- data/lib/lingo/web/lingoweb.cfg +14 -0
- data/lib/lingo/web/public/lingo.png +0 -0
- data/lib/lingo/web/public/lingoweb.css +74 -0
- data/lib/lingo/web/views/index.erb +92 -0
- data/lib/lingo/web.rb +94 -0
- data/lib/lingo.rb +27 -29
- data/lingo.cfg +1 -1
- data/lir.cfg +24 -0
- data/ru/lingo-dic.txt +22342 -0
- data/ru/lingo-mul.txt +5151 -0
- data/ru/lingo-syn.txt +0 -0
- data/ru.lang +99 -0
- data/test/attendee/ts_sequencer.rb +2 -2
- data/test/attendee/ts_text_reader.rb +36 -2
- data/test/attendee/ts_text_writer.rb +6 -6
- data/test/lir.vec +3 -3
- data/test/test_helper.rb +104 -102
- data/test/ts_database.rb +1 -1
- data/test/ts_language.rb +55 -96
- data/txt/artikel-ru.txt +45 -0
- data/txt/lir.txt +1 -3
- metadata +143 -83
- data/TODO +0 -23
@@ -1,61 +1,59 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
#--
|
4
|
-
###############################################################################
|
5
|
-
# #
|
6
|
-
# Lingo -- A full-featured automatic indexing system #
|
7
|
-
# #
|
8
|
-
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
-
# #
|
11
|
-
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
-
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
-
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
-
# any later version. #
|
15
|
-
# #
|
16
|
-
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
-
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
-
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
-
# more details. #
|
20
|
-
# #
|
21
|
-
# You should have received a copy of the GNU Affero General Public License #
|
22
|
-
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
-
# #
|
24
|
-
###############################################################################
|
25
|
-
#++
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
def
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'json'
|
28
|
+
require_relative 'app'
|
29
|
+
|
30
|
+
class Lingo
|
31
|
+
|
32
|
+
class Srv < App
|
33
|
+
|
34
|
+
init_app(__FILE__) { %W[-c #{File.join(root, 'lingosrv.cfg')}] }
|
35
|
+
|
36
|
+
LINGO = Call.new(ARGV).call
|
37
|
+
abort 'Something went wrong...' unless LINGO.is_a?(Call)
|
38
|
+
|
39
|
+
c = LINGO.config.get('meeting/attendees', 'vector_filter', 'src')
|
40
|
+
SRC_SEP = c == true ? Attendee::VectorFilter::DEFAULT_SRC_SEP : c
|
41
|
+
|
42
|
+
get('') { doit }
|
43
|
+
get('/') { doit }
|
44
|
+
post('/') { doit }
|
45
|
+
|
46
|
+
def doit
|
47
|
+
q = params[:q]
|
48
|
+
r = LINGO.talk(q) if q && !q.empty?
|
49
|
+
|
50
|
+
r = r.inject(Hash.new { |h, k| h[k] = [] }) { |h, s|
|
51
|
+
a, b = s.split(SRC_SEP, 2); h[b] << a; h
|
52
|
+
} if r && SRC_SEP
|
53
|
+
|
54
|
+
to_json(q, r)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
data/lib/lingo/version.rb
CHANGED
@@ -0,0 +1,14 @@
|
|
1
|
+
---
|
2
|
+
meeting:
|
3
|
+
attendees:
|
4
|
+
- text_reader: { files: STDIN }
|
5
|
+
|
6
|
+
- tokenizer: { }
|
7
|
+
- word_searcher: { source: sys-dic, mode: first }
|
8
|
+
- decomposer: { source: sys-dic }
|
9
|
+
- multi_worder: { source: sys-mul }
|
10
|
+
- sequencer: { stopper: PUNC,OTHR }
|
11
|
+
- synonymer: { skip: '?,t', source: sys-syn }
|
12
|
+
|
13
|
+
- vector_filter: { debug: 'true', prompt: '' }
|
14
|
+
- text_writer: { ext: STDOUT, sep: "\n" }
|
Binary file
|
@@ -0,0 +1,74 @@
|
|
1
|
+
html, body {
|
2
|
+
margin-top: 0;
|
3
|
+
}
|
4
|
+
|
5
|
+
a img {
|
6
|
+
border: none;
|
7
|
+
}
|
8
|
+
|
9
|
+
form {
|
10
|
+
white-space: nowrap;
|
11
|
+
}
|
12
|
+
|
13
|
+
fieldset {
|
14
|
+
display: inline;
|
15
|
+
width: 47%;
|
16
|
+
}
|
17
|
+
|
18
|
+
textarea {
|
19
|
+
width: 98.9%;
|
20
|
+
height: 30em;
|
21
|
+
background-color: white;
|
22
|
+
}
|
23
|
+
|
24
|
+
#welcome {
|
25
|
+
font-size: 70%;
|
26
|
+
color: #333333;
|
27
|
+
margin-bottom: 0.5em;
|
28
|
+
text-align: center;
|
29
|
+
}
|
30
|
+
|
31
|
+
#legend {
|
32
|
+
font-size: 75%;
|
33
|
+
color: #333333;
|
34
|
+
margin-bottom: 0.5em;
|
35
|
+
}
|
36
|
+
|
37
|
+
#legend table {
|
38
|
+
margin-left: 2em;
|
39
|
+
}
|
40
|
+
|
41
|
+
#legend th {
|
42
|
+
font-size: 110%;
|
43
|
+
font-weight: normal;
|
44
|
+
font-family: monospace;
|
45
|
+
text-align: left;
|
46
|
+
}
|
47
|
+
|
48
|
+
#legend td {
|
49
|
+
padding-left: 1em;
|
50
|
+
}
|
51
|
+
|
52
|
+
#footer {
|
53
|
+
border-style: solid;
|
54
|
+
border-color: black;
|
55
|
+
border-width: 1px 0;
|
56
|
+
padding: 2px 4px;
|
57
|
+
}
|
58
|
+
|
59
|
+
#footer a {
|
60
|
+
font-weight: bold;
|
61
|
+
}
|
62
|
+
|
63
|
+
a:link, a:visited {
|
64
|
+
text-decoration: none;
|
65
|
+
color: #F35327;
|
66
|
+
}
|
67
|
+
|
68
|
+
fieldset, #footer {
|
69
|
+
background-color: #DFDFDF;
|
70
|
+
}
|
71
|
+
|
72
|
+
fieldset.error {
|
73
|
+
background-color: #FDB331;
|
74
|
+
}
|
@@ -0,0 +1,92 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
4
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
5
|
+
<head>
|
6
|
+
<meta http-equiv="content-type" content="application/xhtml+xml; charset=utf-8" />
|
7
|
+
<title>Lingo Web - <%= t 'Automatic indexing online', 'Automatische Indexierung Online' %></title>
|
8
|
+
<link rel="stylesheet" type="text/css" href="<%= url_for '/lingoweb.css' %>" />
|
9
|
+
</head>
|
10
|
+
<body>
|
11
|
+
<div id="header">
|
12
|
+
<a href="http://lex-lingo.de"><img src="<%= url_for '/lingo.png' %>" alt="Lingo" /></a>
|
13
|
+
</div>
|
14
|
+
|
15
|
+
<div id="welcome">
|
16
|
+
<strong><%= t 'Welcome to Lingo Web!', 'Willkommen bei Lingo Web!' %></strong>
|
17
|
+
<%= t %q{
|
18
|
+
Lingo Web provides the opportunity to test the functionality of
|
19
|
+
<a href="http://lex-lingo.de">Lingo</a>.<br />
|
20
|
+
Lingo is an open source indexing system for research and teaching.
|
21
|
+
}, %q{
|
22
|
+
Lingo Web bietet die Möglichkeit, die Funktionsweise von
|
23
|
+
<a href="http://lex-lingo.de">Lingo</a> zu testen.<br />
|
24
|
+
Lingo ist ein frei verfügbares System zur linguistisch und statistisch
|
25
|
+
basierten automatischen Indexierung des Deutschen und Englischen.
|
26
|
+
} %>
|
27
|
+
</div>
|
28
|
+
|
29
|
+
<div id="main">
|
30
|
+
<form action="<%= url_for '/' %>" method="post">
|
31
|
+
<div>
|
32
|
+
<fieldset><legend><strong><%= t 'Input', 'Eingabe' %></strong></legend>
|
33
|
+
<textarea name="q" rows="20" cols="50"><%= @q %></textarea>
|
34
|
+
</fieldset>
|
35
|
+
|
36
|
+
<fieldset><legend><strong><%= t 'Output', 'Ausgabe' %></strong></legend>
|
37
|
+
<textarea readonly="readonly" rows="20" cols="50"><%= @r %></textarea>
|
38
|
+
</fieldset>
|
39
|
+
|
40
|
+
<br />
|
41
|
+
|
42
|
+
<strong><%= t 'Language', 'Sprache' %></strong> = <select name="l">
|
43
|
+
<% for l in LANGS %>
|
44
|
+
<option value="<%= l %>"<%= ' selected="selected"' if l == @l %>><%= l %></option>
|
45
|
+
<% end %>
|
46
|
+
</select>
|
47
|
+
|
48
|
+
<br />
|
49
|
+
<br />
|
50
|
+
|
51
|
+
<input type="submit" value="<%= t 'Start processing...', 'Verarbeitung starten...' %>"></input> |
|
52
|
+
<input type="reset" value="<%= t 'Reset form', 'Formular zurücksetzen' %>"></input> |
|
53
|
+
<a href="<%= url_for '/' %>"><%= t 'New request', 'Neue Anfrage' %></a>
|
54
|
+
</div>
|
55
|
+
</form>
|
56
|
+
|
57
|
+
<br />
|
58
|
+
</div>
|
59
|
+
|
60
|
+
<div id="legend">
|
61
|
+
<strong><%= t 'Legend', 'Legende' %></strong>:
|
62
|
+
<table>
|
63
|
+
<tr><th>s </th><td><%= t 'Noun', 'Substantiv' %></td></tr>
|
64
|
+
<tr><th>a </th><td><%= t 'Adjective', 'Adjektiv' %></td></tr>
|
65
|
+
<tr><th>v </th><td><%= t 'Verb', 'Verb' %></td></tr>
|
66
|
+
<tr><th>e </th><td><%= t 'Proper name', 'Eigenname' %></td></tr>
|
67
|
+
<tr><th>w </th><td><%= t 'Word class without suffixes', 'Wortklasse ohne Suffixe' %></td></tr>
|
68
|
+
<tr><th>t </th><td><%= t 'Word class without suffixes (e.g. high frequency terms)', 'Wortklasse ohne Suffixe (z.B. Hochfrequenzterme)' %></td></tr>
|
69
|
+
<tr><th>y </th><td><%= t 'Synonym', 'Synonym' %></td></tr>
|
70
|
+
<tr><th>q (=SEQ)</th><td><%= t 'Sequence (algorithmically identified phrase)', 'Sequenz (algorithmisch erkannter Mehrwortbegriff)' %></td></tr>
|
71
|
+
<tr><th>m (=MUL)</th><td><%= t 'Phrase', 'Mehrwortbegriff' %></td></tr>
|
72
|
+
<tr><th>k (=KOM)</th><td><%= t 'Compound', 'Kompositum' %></td></tr>
|
73
|
+
<tr><th>+ </th><td><%= t 'Part of a compound', 'Kompositum-Bestandteil' %></td></tr>
|
74
|
+
<tr><th>x+ </th><td><%= t 'Unknown part of a hyphenated compound', 'unbekannter Kompositum-Bestandteil einer Bindestrich-Konstruktion' %></td></tr>
|
75
|
+
<tr><th>? </th><td><%= t 'Unknown word', 'unbekanntes Wort' %></td></tr>
|
76
|
+
<tr><th>MU? </th><td><%= t 'Part of a phrase (unknown word)', 'Mehrwortbestandteil (unbekanntes Wort)' %></td></tr>
|
77
|
+
<tr><th>HELP </th><td><%= t 'e.g. Special characters', 'z.B. unbekanntes Sonderzeichen' %></td></tr>
|
78
|
+
<tr><th>ABRV </th><td><%= t 'Possible abbreviation with a full stop in the middle', 'mögliche Abk. mit eingeschlossenem Punkt (z.B. "Ausst.Kat")' %></td></tr>
|
79
|
+
<tr><th>PUNC </th><td><%= t 'Punctuation etc.', 'Satzzeichen etc.' %></td></tr>
|
80
|
+
<tr><th>OTHR </th><td><%= t 'Other character', 'Sonstiges Zeichen' %></td></tr>
|
81
|
+
<tr><th>URLS </th><td><%= t 'URL', 'URL' %></td></tr>
|
82
|
+
<tr><th>NUMS </th><td><%= t 'Number', 'Zahl' %></td></tr>
|
83
|
+
</table>
|
84
|
+
</div>
|
85
|
+
|
86
|
+
<div id="footer">
|
87
|
+
<em>powered by</em> <a href="http://lex-lingo.de">Lingo</a>
|
88
|
+
<em>and</em> <a href="http://www.sinatrarb.com">Sinatra</a>
|
89
|
+
-- <strong>v<%= Lingo::VERSION %></strong>
|
90
|
+
</div>
|
91
|
+
</body>
|
92
|
+
</html>
|
data/lib/lingo/web.rb
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'json'
|
28
|
+
require 'nuggets/util/ruby'
|
29
|
+
|
30
|
+
require_relative 'app'
|
31
|
+
|
32
|
+
class Lingo
|
33
|
+
|
34
|
+
class Web < App
|
35
|
+
|
36
|
+
init_app(__FILE__)
|
37
|
+
|
38
|
+
UILANGS, LANGS = %w[en de], Lingo.list(:lang).map! { |lang|
|
39
|
+
lang[%r{.*/(\w+)\.}, 1]
|
40
|
+
}.uniq.sort!
|
41
|
+
|
42
|
+
auth, cfg = %w[auth cfg].map! { |ext|
|
43
|
+
File.join(root, "lingoweb.#{ext}")
|
44
|
+
}
|
45
|
+
|
46
|
+
if File.readable?(auth)
|
47
|
+
c = File.read(auth).chomp.split(':', 2)
|
48
|
+
use(Rack::Auth::Basic) { |*b| b == c } unless c.empty?
|
49
|
+
end
|
50
|
+
|
51
|
+
LINGO = Hash.new { |h, k| h[k] = Lingo.call(cfg, ['-l', k]) }
|
52
|
+
|
53
|
+
before do
|
54
|
+
@uilang = if hal = env['HTTP_ACCEPT_LANGUAGE']
|
55
|
+
hals = hal.split(',').map { |l| l.split('-').first.strip }
|
56
|
+
(hals & UILANGS).first
|
57
|
+
end || UILANGS.first
|
58
|
+
|
59
|
+
@q = params[:q]
|
60
|
+
@l = params[:l] || @uilang
|
61
|
+
@l = LANGS.first unless LANGS.include?(@l)
|
62
|
+
end
|
63
|
+
|
64
|
+
get('') { redirect url_for('/') }
|
65
|
+
get('/') { doit }
|
66
|
+
post('/') { doit }
|
67
|
+
|
68
|
+
helpers do
|
69
|
+
def url_for(path)
|
70
|
+
"#{request.script_name}#{path}"
|
71
|
+
end
|
72
|
+
|
73
|
+
def t(*t)
|
74
|
+
(i = UILANGS.index(@uilang)) && t[i] || t.first
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def doit
|
79
|
+
@r = LINGO[@l].talk(@q) { |_| _ } if @q && !@q.empty?
|
80
|
+
|
81
|
+
case params[:f]
|
82
|
+
when 'json'
|
83
|
+
to_json(@q, @r)
|
84
|
+
when 'text'
|
85
|
+
@r && @r.join("\n")
|
86
|
+
else
|
87
|
+
@r &&= @r.join("\n")
|
88
|
+
erb :index
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
data/lib/lingo.rb
CHANGED
@@ -24,13 +24,12 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'unicode'
|
27
28
|
require 'stringio'
|
28
29
|
require 'pathname'
|
29
30
|
require 'fileutils'
|
30
|
-
require 'benchmark'
|
31
31
|
require 'nuggets/file/ext'
|
32
32
|
require 'nuggets/env/user_home'
|
33
|
-
require 'nuggets/numeric/duration'
|
34
33
|
require 'nuggets/string/camelscore'
|
35
34
|
|
36
35
|
class Lingo
|
@@ -60,7 +59,7 @@ class Lingo
|
|
60
59
|
}
|
61
60
|
|
62
61
|
# Default encoding
|
63
|
-
ENC = 'UTF-8'.freeze
|
62
|
+
Encoding.default_external = Encoding.default_internal = ENC = 'UTF-8'.freeze
|
64
63
|
|
65
64
|
SEP_RE = %r{[; ,|]}
|
66
65
|
|
@@ -76,17 +75,18 @@ class Lingo
|
|
76
75
|
|
77
76
|
def list(type, options = {})
|
78
77
|
options = options_for(type, options)
|
79
|
-
path = path_for(options)
|
80
78
|
|
81
|
-
glob = file_with_ext('*', options)
|
79
|
+
glob, list = file_with_ext('*', options), []
|
82
80
|
glob = File.join('??', glob) if type == :dict
|
83
81
|
|
84
|
-
|
82
|
+
walk(path = path_for(options), options) { |dir|
|
85
83
|
Dir[File.join(dir, glob)].sort!.each { |file|
|
86
84
|
pn = Pathname.new(file)
|
87
85
|
list << realpath_for(pn, path) if pn.file?
|
88
86
|
}
|
89
|
-
}
|
87
|
+
}
|
88
|
+
|
89
|
+
list
|
90
90
|
end
|
91
91
|
|
92
92
|
def find(type, file, options = {})
|
@@ -130,7 +130,7 @@ class Lingo
|
|
130
130
|
private
|
131
131
|
|
132
132
|
def include_path(path, pre = false)
|
133
|
-
PATH.insert(pre ? 0 : -1, *path.map!
|
133
|
+
PATH.insert(pre ? 0 : -1, *path.map! { |i| i.to_s })
|
134
134
|
end
|
135
135
|
|
136
136
|
def find_file(file, path, options)
|
@@ -223,13 +223,15 @@ class Lingo
|
|
223
223
|
|
224
224
|
end
|
225
225
|
|
226
|
-
attr_reader :dictionaries, :report_status, :report_time
|
227
|
-
|
228
226
|
def initialize(*args)
|
227
|
+
Debug.ps(:lingo_new)
|
228
|
+
|
229
229
|
@config_args = args
|
230
230
|
reset(false)
|
231
231
|
end
|
232
232
|
|
233
|
+
attr_reader :dictionaries
|
234
|
+
|
233
235
|
def config
|
234
236
|
@config ||= Config.new(*@config_args)
|
235
237
|
end
|
@@ -252,8 +254,12 @@ class Lingo
|
|
252
254
|
end
|
253
255
|
|
254
256
|
def talk
|
255
|
-
|
256
|
-
|
257
|
+
Debug.profile(config['profile']) {
|
258
|
+
invite
|
259
|
+
start
|
260
|
+
}
|
261
|
+
|
262
|
+
Debug.ps(:lingo_talk)
|
257
263
|
ensure
|
258
264
|
reset
|
259
265
|
end
|
@@ -288,38 +294,30 @@ class Lingo
|
|
288
294
|
} }
|
289
295
|
end
|
290
296
|
|
291
|
-
def start
|
292
|
-
@
|
293
|
-
|
294
|
-
time = Benchmark.realtime {
|
295
|
-
@attendees.first.listen(AgendaItem.new(Attendee::STR_CMD_TALK))
|
296
|
-
}
|
297
|
-
|
298
|
-
if report_status || report_time
|
299
|
-
warn "Require protocol...\n#{separator = '-' * 61}"
|
300
|
-
@attendees.first.listen(AgendaItem.new(Attendee::STR_CMD_STATUS))
|
301
|
-
warn "#{separator}\nThe duration of the meeting was #{time.to_hms(2)}"
|
302
|
-
end
|
297
|
+
def start
|
298
|
+
@attendees.first.listen(AgendaItem.new(Attendee::STR_CMD_TALK))
|
303
299
|
end
|
304
300
|
|
305
301
|
def reset(close = true)
|
306
|
-
dictionaries.each
|
302
|
+
dictionaries.each { |i| i.close } if close
|
307
303
|
@dictionaries, @attendees = [], []
|
308
304
|
@lexical_hash = Hash.new { |h, k| h[k] = Language::LexicalHash.new(k, self) }
|
309
305
|
end
|
310
306
|
|
311
307
|
def warn(*msg)
|
312
|
-
config.
|
308
|
+
config.warn(*msg)
|
309
|
+
end
|
310
|
+
|
311
|
+
def deprecate(old, new, obj = self)
|
312
|
+
config.deprecate(old, new, obj)
|
313
313
|
end
|
314
314
|
|
315
315
|
end
|
316
316
|
|
317
317
|
require_relative 'lingo/call'
|
318
318
|
require_relative 'lingo/error'
|
319
|
+
require_relative 'lingo/debug'
|
319
320
|
require_relative 'lingo/config'
|
320
|
-
require_relative 'lingo/core_ext'
|
321
|
-
require_relative 'lingo/cachable'
|
322
|
-
require_relative 'lingo/reportable'
|
323
321
|
require_relative 'lingo/agenda_item'
|
324
322
|
require_relative 'lingo/show_progress'
|
325
323
|
require_relative 'lingo/database'
|
data/lingo.cfg
CHANGED
@@ -30,7 +30,7 @@ meeting:
|
|
30
30
|
# Schreibweisen variieren und erneut suchen
|
31
31
|
# - variator: { source: sys-dic }
|
32
32
|
|
33
|
-
#
|
33
|
+
# Worttrennungen aufheben
|
34
34
|
# - dehyphenizer: { source: sys-dic }
|
35
35
|
|
36
36
|
# Wortstämme für nicht erkannte Wörter einfügen
|
data/lir.cfg
CHANGED
@@ -26,9 +26,21 @@ meeting:
|
|
26
26
|
# Zeile in einzelnen Sinnbestandteile (Token) zerlegen
|
27
27
|
- tokenizer: { }
|
28
28
|
|
29
|
+
# Abkürzungen erkennen und auflösen
|
30
|
+
# - abbreviator: { source: sys-abk }
|
31
|
+
|
29
32
|
# Verbleibende Token im Wörterbuch suchen
|
30
33
|
- word_searcher: { source: sys-dic, mode: first }
|
31
34
|
|
35
|
+
# Schreibweisen variieren und erneut suchen
|
36
|
+
# - variator: { source: sys-dic }
|
37
|
+
|
38
|
+
# Worttrennungen aufheben
|
39
|
+
# - dehyphenizer: { source: sys-dic }
|
40
|
+
|
41
|
+
# Wortstämme für nicht erkannte Wörter einfügen
|
42
|
+
# - stemmer: { }
|
43
|
+
|
32
44
|
# Nicht erkannte Wörter auf Kompositum testen
|
33
45
|
- decomposer: { source: sys-dic }
|
34
46
|
|
@@ -60,10 +72,22 @@ meeting:
|
|
60
72
|
- noneword_filter: { in: syn }
|
61
73
|
- text_writer: { ext: non, sep: '|' }
|
62
74
|
|
75
|
+
# Erstelle Datei mit Endung .ste für Wortstämme
|
76
|
+
- vector_filter: { in: syn, lexicals: z }
|
77
|
+
- text_writer: { ext: ste, sep: '|' }
|
78
|
+
|
63
79
|
# Erstelle Datei mit Endung .vec für erkannte Indexterme
|
64
80
|
- vector_filter: { in: syn, lexicals: '^[ksavem]$' }
|
65
81
|
- text_writer: { ext: vec, sep: '|' }
|
66
82
|
|
83
|
+
# Erstelle Datei mit Endung .ven für erkannte Indexterme mit absoluter Häufigkeit
|
84
|
+
- vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: term_abs }
|
85
|
+
- text_writer: { ext: ven, sep: '|' }
|
86
|
+
|
87
|
+
# Erstelle Datei mit Endung .ver für erkannte Indexterme mit relativer Häufigkeit
|
88
|
+
- vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: term_rel }
|
89
|
+
- text_writer: { ext: ver, sep: '|' }
|
90
|
+
|
67
91
|
# Erstelle Datei mit Endung .mul für erkannte Mehrwortgruppen
|
68
92
|
- vector_filter: { in: syn, lexicals: m }
|
69
93
|
- text_writer: { ext: mul, sep: '|' }
|