RubyGems - lingo - Versions diffs - 1.8.6 → 1.8.7 - Mend

lingo 1.8.6 → 1.8.7

Files changed (75) hide show

checksums.yaml +4 -4
data/ChangeLog +40 -4
data/README +22 -51
data/Rakefile +3 -17
data/config/lingo.cfg +24 -15
data/config/lir.cfg +25 -16
data/dict/de/test_muh.txt +6 -0
data/dict/en/lingo-dic.txt +2 -3
data/lang/de.lang +10 -9
data/lang/en.lang +1 -1
data/lib/lingo.rb +4 -4
data/lib/lingo/attendee.rb +27 -7
data/lib/lingo/attendee/analysis_filter.rb +81 -0
data/lib/lingo/attendee/debug_filter.rb +42 -0
data/lib/lingo/attendee/debugger.rb +2 -11
data/lib/lingo/attendee/decomposer.rb +6 -3
data/lib/lingo/attendee/formatter.rb +6 -6
data/lib/lingo/attendee/hal_filter.rb +94 -0
data/lib/lingo/attendee/lsi_filter.rb +99 -0
data/lib/lingo/attendee/multi_worder.rb +69 -43
data/lib/lingo/attendee/sequencer.rb +32 -19
data/lib/lingo/attendee/synonymer.rb +2 -2
data/lib/lingo/attendee/text_reader.rb +63 -92
data/lib/lingo/attendee/text_writer.rb +12 -21
data/lib/lingo/attendee/tokenizer.rb +32 -21
data/lib/lingo/attendee/variator.rb +3 -3
data/lib/lingo/attendee/vector_filter.rb +7 -9
data/lib/lingo/attendee/word_searcher.rb +3 -3
data/lib/lingo/buffered_attendee.rb +3 -36
data/lib/lingo/config.rb +1 -1
data/lib/lingo/ctl.rb +7 -155
data/lib/lingo/ctl/analysis.rb +136 -0
data/lib/lingo/ctl/files.rb +86 -0
data/lib/lingo/ctl/other.rb +140 -0
data/lib/lingo/database.rb +64 -60
data/lib/lingo/database/crypter.rb +7 -5
data/lib/lingo/error.rb +5 -4
data/lib/lingo/language.rb +13 -5
data/lib/lingo/language/grammar.rb +13 -7
data/lib/lingo/language/token.rb +6 -0
data/lib/lingo/language/word.rb +23 -36
data/lib/lingo/language/word_form.rb +5 -1
data/lib/lingo/srv.rb +2 -2
data/lib/lingo/text_utils.rb +96 -0
data/lib/lingo/version.rb +1 -1
data/lib/lingo/web/views/index.erb +1 -1
data/test/attendee/ts_decomposer.rb +23 -5
data/test/attendee/ts_multi_worder.rb +66 -0
data/test/attendee/ts_sequencer.rb +28 -4
data/test/attendee/ts_text_reader.rb +20 -0
data/test/attendee/ts_tokenizer.rb +20 -0
data/test/attendee/ts_variator.rb +1 -1
data/test/attendee/ts_word_searcher.rb +39 -3
data/test/lir3.txt +12 -0
data/test/ref/artikel.non +1 -12
data/test/ref/artikel.seq +3 -1
data/test/ref/artikel.vec +1 -0
data/test/ref/artikel.vef +35 -34
data/test/ref/artikel.ven +8 -7
data/test/ref/artikel.ver +34 -33
data/test/ref/artikel.vet +2573 -2563
data/test/ref/lir.non +77 -78
data/test/ref/lir.seq +9 -7
data/test/ref/lir.syn +1 -1
data/test/ref/lir.vec +41 -41
data/test/ref/lir.vef +210 -210
data/test/ref/lir.ven +46 -46
data/test/ref/lir.ver +72 -72
data/test/ref/lir.vet +329 -329
data/test/ts_database.rb +166 -62
data/test/ts_language.rb +23 -23
metadata +53 -34
data/lib/lingo/attendee/dehyphenizer.rb +0 -120
data/lib/lingo/attendee/noneword_filter.rb +0 -115
data/test/attendee/ts_noneword_filter.rb +0 -15

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: e4cc870c8c1b49c580841a934b5906ed6ddf75e4
-  data.tar.gz: 1ecb26c708daa4bfa09f4aa76f6d7e17f1a72683
+  metadata.gz: 1130ec52467314ba95af17e635888f60046c5b42
+  data.tar.gz: 6a882ea4f88b1fbcf1a66b1d5fafe8fa05458b89
 SHA512:
-  metadata.gz: f2f0abed6198a7fcf0ff4f44aa442266f38c44646c7f4e8ef894886c453ce1654edd217c675f12e6b7d828c43ac461abb64d92aef20015249dbdf6f9efc03a3f
-  data.tar.gz: cb0be6e46a16639a384bab3507dc3b2bd4465736d1d7e0189d3930d1252e247fff4421364d860bd2cdd12f26b4f4445192a87998bea017bb1f285c8e0bda7639
+  metadata.gz: 3e0b384a822c28961c99d411bbdd399d9a49b29fdc40688d3534a9897cef984a7447cdd21b05fda45dda9a0fad25d99f60d524064803edb218ff95bbf9fc4fe6
+  data.tar.gz: 5cfa5c7f235113238d0e8568e9948f27a7ec864af63ac696a5efdfceed6eb678724f3ca3df24d3ac296068aceb1bf20d1369f439edf7f670265550ace1ce1cd0

data/ChangeLog CHANGED

@@ -2,6 +2,43 @@
 = Revision history for Lingo
+== 1.8.7 [2015-08-07]
+* Added Lingo::Attendee::LsiFilter to correlate semantically related terms
+  (LSI[https://en.wikipedia.org/wiki/Latent_semantic_indexing]) over the
+  "corpus" of all files processed during a single program invocation; requires
+  lsi4r[https://blackwinter.github.com/lsi4r] which in turn requires
+  rb-gsl[https://blackwinter.github.com/rb-gsl]. [EXPERIMENTAL: Interface may
+  be changed or removed in next release.]
+* Added Lingo::Attendee::HalFilter to correlate semantically related terms
+  (HAL[https://en.wikipedia.org/wiki/Hyperspace_Analogue_to_Language]) over
+  individual documents; requires hal4r[https://blackwinter.github.com/hal4r]
+  which in turn requires rb-gsl[https://blackwinter.github.com/rb-gsl].
+  [EXPERIMENTAL: Interface may be changed or removed in next release.]
+* Added Lingo::Attendee::AnalysisFilter and associated +lingoctl+ tooling.
+* Multiword dictionaries can now identify hyphenated variants (e.g.
+  <tt>automatic data-processing</tt>); set <tt>hyphenate: true</tt> in the
+  dictionary config.
+* Lingo::Attendee::Tokenizer no longer considers hyphens at word edges as part
+  of the word. As a consequence, Lingo::Attendee::Dehyphenizer has been
+  dropped.
+* Dropped Lingo::Attendee::NonewordFilter; use Lingo::Attendee::VectorFilter
+  with option <tt>lexicals: '\?'</tt> instead.
+* Lingo::Attendee::TextReader and Lingo::Attendee::TextWriter learned
+  +encoding+ option to read/write text that is not UTF-8 encoded;
+  configuration files and dictionaries still need to be UTF-8, though.
+* Lingo::Attendee::TextReader and Lingo::Attendee::TextWriter learned to
+  read/write Gzip-compressed files (file extension +.gz+ or +.gzip+).
+* Lingo::Attendee::Sequencer learned to recognize +0+ in the pattern to match
+  number tokens.
+* Fixed Lingo::Attendee::TextReader to recognize BOM in input files; does not
+  apply to input read from +STDIN+.
+* Fixed regression introduced in 1.8.6 where Lingo::Attendee::Debugger would
+  no longer work immediately behind Lingo::Attendee::TextReader.
+* Fixed +lingoctl+ copy commands when overwriting existing files.
+* Refactored Lingo::Database::Crypter into a module.
+* JRuby 9000 compatibility.
 == 1.8.6 [2015-02-09]
 * Lingo::Attendee::VectorFilter learned +pos+ option to print position and
@@ -17,8 +54,7 @@
 * Lingo::Attendee::TextReader no longer removes line endings; option +chomp+
   is obsolete.
 * Lingo::Attendee::TextReader passes byte offset to the following attendee.
-* Lingo::Attendee::Tokenizer records token's byte offset.
-* Lingo::Attendee::Tokenizer records token's sequence position.
+* Lingo::Attendee::Tokenizer records token's position and byte offset.
 * Lingo::Attendee::Tokenizer learned <tt>skip-tags</tt> option to skip over
   specified tags' contents.
 * Lingo::Attendee subclasses warn when invalid or obsolete options or names
@@ -184,9 +220,9 @@
   the word class for multiword entries (defaults to <tt>def-wc</tt>). Use
   <tt>def-mul-wc: 'm'</tt> in your <tt>.lang</tt> to restore the previous
   behaviour.
-* New Lingo::Attendee::Formatter for configurable output formatting as an
+* Added Lingo::Attendee::Formatter for configurable output formatting as an
   alternative to Lingo::Attendee::TextWriter.
-* New basic input filters to enable indexing of HTML/XML (and PDF) files.
+* Added basic input filters to enable indexing of HTML/XML (and PDF) files.
 * Updated the system dictionary.
 * Switched license to Affero GPL.

data/README CHANGED

@@ -15,7 +15,6 @@
 * {Example}[rdoc-label:label-EXAMPLE]
 * {Installation and Usage}[rdoc-label:label-INSTALLATION+AND+USAGE]
   * {Dictionary and configuration file lookup}[rdoc-label:label-Dictionary+and+configuration+file+lookup]
-  * {Legacy version}[rdoc-label:label-Legacy+version]
 * {File formats}[rdoc-label:label-FILE+FORMATS]
   * {Configuration}[rdoc-label:label-Configuration]
   * {Language definition}[rdoc-label:label-Language+definition]
@@ -35,7 +34,7 @@
 == VERSION
-This documentation refers to Lingo version 1.8.6
+This documentation refers to Lingo version 1.8.7
 == DESCRIPTION
@@ -58,7 +57,7 @@ is a minimal configuration example to analyse this README file:
   meeting:
     attendees:
       - text_reader: { files: 'README' }
-      - debugger:    { eval: 'true', ceval: 'cmd!="EOL"', prompt: '<debug>:  ' }
+      - debugger:    { eval: 'true', ceval: 'cmd!=:EOL', prompt: '<debug>:  ' }
 Lingo is told to invite two attendees and wants them to talk to each other,
 hence the name Lingo (= the technical language).
@@ -131,8 +130,6 @@ information see each attendee's documentation):
                     Lingo::Attendee::Debugger)
 +variator+::        Tries to correct spelling errors and the like. (see
                     Lingo::Attendee::Variator)
-+dehyphenizer+::    Tries to undo hyphenation. (see
-                    Lingo::Attendee::Dehyphenizer)
 +multi_worder+::    Identifies phrases (word sequences) based on a multiword
                     dictionary. (see Lingo::Attendee::MultiWorder)
 +sequencer+::       Identifies phrases (word sequences) based on patterns of
@@ -186,14 +183,14 @@ of context to external files.
 _Example_:
-  # keep line endings
-  - text_reader:     { files: $(files) }
+  # read files
+  - text_reader:   { files: $(files) }
   # keep whitespace
-  - tokenizer:       { space: true }
+  - tokenizer:     { space: true }
   # do processing...
-  - word_searcher:   { source: sys-dic, mode: first }
-  # insert formatted results (e.g. "[[Name::lingo|Lingo]] got these [[Noun::word|words]].")
-  - formatter:       { ext: out, format: '[[%3$s::%2$s|%1$s]]', map: { e: Name, s: Noun } }
+  - word_searcher: { source: sys-dic, mode: first }
+  # insert formatted results (e.g. "[[Name::lingo|Lingo]] finds [[Noun::word|words]].")
+  - formatter:     { ext: out, format: '[[%3$s::%2$s|%1$s]]', map: { e: Name, s: Noun } }
 === Plugins
@@ -267,7 +264,7 @@ truncated for clarity).
   <Lingo = [(lingo/s), (lingo/e)]>
   <-|?>
   <A|?>
-  <full-featured|KOM = [(full-featured/k), (full/s+), (full/a+), (full/v+), (featured/a+)]>
+  <full-featured|COM = [(full-featured/k), (full/s+), (full/a+), (full/v+), (featured/a+)]>
   <automatic = [(automatic/s), (automatic/a)]>
   <indexing = [(index/v)]>
   <system = [(system/s)]>
@@ -349,9 +346,8 @@ the +lingo+ executable to process your text files. See <tt>lingo --help</tt>
 for available options.
 Please note that Lingo requires Ruby version 1.9.3 or higher to run
-(2.1.3[http://ruby-lang.org/en/downloads/] is the currently recommended
-version). If you want to use Lingo on Ruby 1.8, please refer to the
-{legacy version}[rdoc-label:label-Legacy+version].
+(2.2.2[http://ruby-lang.org/en/downloads/] is the currently recommended
+version).
 Since Lingo depends on native extensions, you need to make sure that
 development files for your Ruby version are installed. On Debian-based
@@ -359,12 +355,8 @@ Linux platforms they are included in the package <tt>ruby-dev</tt>;
 other distributions may have a similarly named package. On Windows those
 development files are currently not required.
-Prior to version 1.8.0, Lingo expected to be run from its installation
-directory. This is no longer necessary. But if you prefer that use case,
-you can either download and extract an
-{archive file}[http://github.com/lex-lingo/lingo/releases] or unpack the
-Gem archive (<tt>gem unpack lingo</tt>); or you can install the legacy
-version of Lingo (see below).
+On JRuby, install gdbm[https://rubygems.org/gems/gdbm] for efficient database
+operations: <tt>gem install gdbm</tt>.
 === Dictionary and configuration file lookup
@@ -395,29 +387,6 @@ typically organized in the following directory structure:
 But for compatibility reasons these naming conventions are not enforced.
-=== Legacy version
-As Lingo 1.8 introduced some major disruptions and no longer runs on Ruby 1.8,
-there is a maintenance branch for Lingo 1.7.x that will remain compatible with
-both Ruby 1.8 and the previous line of Lingo prior to 1.8. This branch may
-receive occasional bug fixes and minor feature updates. However, the bulk of
-the development efforts will be directed towards Lingo 1.8+.
-To install the legacy version, download and extract the
-{ZIP archive}[http://ixtrieve.fh-koeln.de/buch/lingo-1.7.1.zip].
-No additional dependencies are required. This version of Lingo works
-with both Ruby 1.8 (1.8.5 or higher) and 1.9 (1.9.2 or higher).
-The executable is named +lingo.rb+. It's located at the root of the installation
-directory and may only be run from there. See <tt>ruby lingo.rb -h</tt> for
-usage instructions.
-Configuration and language definition files are also located at the root of the
-installation directory (<tt>*.cfg</tt> and <tt>*.lang</tt>, respectively).
-Dictionary source files are found in language-specific subdirectories (+de/+,
-+en/+, ...) and are named <tt>*.txt</tt>. The compiled dictionaries are found
-beneath these language subdirectories in a directory named <tt>store/</tt>.
 == FILE FORMATS
@@ -443,24 +412,24 @@ _Example_:
   # input is taken from the previous attendee,
   # output is sent to the named channel "syn"
-  - synonymer:       { skip: '?,t', source: sys-syn, out: syn }
+  - synonymer:     { skip: '?,t', source: sys-syn, out: syn }
   # input is taken from the named channel "syn",
   # output is sent to the next attendee
-  - vector_filter:   { in: syn, lexicals: y, sort: term_abs }
+  - vector_filter: { in: syn, lexicals: y, sort: term_abs }
   # input is taken from the previous attendee,
   # output is sent to the next attendee
-  - text_writer:     { ext: syn, sep: "\n" }
+  - text_writer:   { ext: syn, sep: "\n" }
   # input is taken from the named channel "syn"
   # (ignoring the output of the previous attendee),
   # output is sent to the next attendee
-  - vector_filter:   { in: syn, lexicals: m }
+  - vector_filter: { in: syn, lexicals: m }
   # input is taken from the previous attendee,
   # output is sent to the next attendee
-  - text_writer:     { ext: mul, sep: "\n" }
+  - text_writer:   { ext: mul, sep: "\n" }
 === Language definition
@@ -532,8 +501,8 @@ the full test suite.
 == LINKS
 Website::       http://lex-lingo.de
-Demo::          http://ixtrieve.fh-koeln.de/lingoweb
-Documentation:: https://lex-lingo.github.com/lingo
+Demo::          http://lex-lingo.de/lingoweb
+Documentation:: http://lex-lingo.de/doc
 Source code::   https://github.com/lex-lingo/lingo
 RubyGem::       https://rubygems.org/gems/lingo
 Bug tracker::   https://github.com/lex-lingo/lingo/issues
@@ -555,6 +524,8 @@ Travis CI::     https://travis-ci.org/lex-lingo/lingo
 === Research publications
+* Siebenkäs, A.; Markscheffel, B.: <em>{Conception of a workflow for the semi-automatic construction of a thesaurus for the German printing industry}[https://zenodo.org/record/17945]</em>. (English) In: Re:inventing Information Science in the Networked Society. Proceedings of the 14th International Symposium on Information Science (ISI 2015), Zadar, Croatia, 19th-21st May 2015. Eds.: F. Pehar, C. Schlögl, C. Wolff. Glückstadt: Verlag Werner Hülsbusch, 2015. pp 217-229
+* Grün, S.: <em>Bildung von Komposita-Indextermen auf der Basis einer algorithmischen Mehrwortgruppenanalyse mit Lingo</em>. (German) Köln: Fachhochschule Köln, 2015.
 * Bredack, J.; Lepsky, K.: <em>{Automatische Extraktion von Fachterminologie aus Volltexten}[http://dx.doi.org/10.1515/abitech-2014-0002]</em>. (German) In: ABI Technik 34 (1), 2014. pp 2-12.
 * Bredack, J.: <em>{Terminologieextraktion von Mehrwortgruppen in kunsthistorischen Fachtexten}[http://ixtrieve.fh-koeln.de/lehre/bredack-2013.pdf]</em>. (German) Köln: Fachhochschule Köln, 2013.
 * Maylein, L.; Langenstein, A.: <em>{Neues vom Relevanz-Ranking im HEIDI-Katalog der Universitätsbibliothek Heidelberg}[http://b-i-t-online.de/heft/2013-03-fachbeitrag-maylein.pdf]</em>. (German) In: b.i.t.online 16 (3), 2013. pp 190-200.

data/Rakefile CHANGED

@@ -37,7 +37,7 @@ The main functions of Lingo are:
       dependencies: {
         'cyclops'       => '~> 0.1',
-        'nuggets'       => '~> 1.1',
+        'nuggets'       => '~> 1.3',
         'rubyzip'       => '~> 1.1',
         'sinatra-bells' => '~> 0.0',
         'unicode'       => '~> 0.4'
@@ -59,10 +59,9 @@ rescue LoadError => err
 end
 CLEAN.include(
-  'txt/*.{log,mul,non,seq,ste,syn,ve?}',
+  'txt/*.{als,hal,log,lsi,mul,non,seq,ste,syn,ve?}',
   'test/{test.*,text.non}',
-  'store/*/*.rev',
-  'bench/tmp.*'
+  'store/*/*.rev'
 )
 CLOBBER.include('store')
@@ -76,19 +75,6 @@ task('test:txt') { test_ref('artikel', 'lingo') }
 desc 'Test against reference file (LIR)'
 task('test:lir') { test_ref('lir') }
-unless (benchmarks = Dir[File.expand_path('../bench/*_bench.rb', __FILE__)]).empty?
-  desc 'Run all benchmarks'
-  task :bench
-  benchmarks.each { |benchmark|
-    bench = File.basename(benchmark, '_bench.rb')
-    task bench: benchtask = "bench:#{bench}"
-    desc "Run #{bench} benchmark"
-    task(benchtask) { system(File.ruby, benchmark) }
-  }
-end
 def test_ref(name, cfg = name)
   require 'diff/lcs'
   require 'diff/lcs/hunk'

data/config/lingo.cfg CHANGED

@@ -30,9 +30,6 @@ meeting:
     # Schreibweisen variieren und erneut suchen
 #   - variator:        { source: sys-dic }
-    # Worttrennungen aufheben
-#   - dehyphenizer:    { source: sys-dic }
     # Wortstämme für nicht erkannte Wörter einfügen
 #   - stemmer:         { }
@@ -46,7 +43,7 @@ meeting:
     - sequencer:       { stopper: 'PUNC,OTHR' }
     # Relationierungen einfügen
-    - synonymer:       { skip: '?,t', source: sys-syn, out: syn }
+    - synonymer:       { skip: '?,t', source: sys-syn, out: res }
     ########################################
@@ -60,45 +57,57 @@ meeting:
     #
     # Erstelle Datei mit Endung .log für Datenstrom
-    - debug_filter:    { in: syn, prompt: 'lex:) ' }
+    - debug_filter:    { in: res, prompt: 'lex:) ' }
     - text_writer:     { ext: log, sep: "\n" }
+    # Erstelle Datei mit Endung .als für Datenstrom
+    - analysis_filter: { in: res }
+    - text_writer:     { ext: als, sep: "\n" }
     # Erstelle Datei mit Endung .non für nicht erkannte Wörter
-    - noneword_filter: { in: syn }
+    - vector_filter:   { in: res, lexicals: '\?' }
     - text_writer:     { ext: non, sep: "\n" }
     # Erstelle Datei mit Endung .ste für Wortstämme
-    - vector_filter:   { in: syn, lexicals: z }
+    - vector_filter:   { in: res, lexicals: z }
     - text_writer:     { ext: ste, sep: "\n" }
     # Erstelle Datei mit Endung .vec für erkannte Indexterme
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$' }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$' }
     - text_writer:     { ext: vec, sep: "\n" }
     # Erstelle Datei mit Endung .ven für erkannte Indexterme mit absoluter Häufigkeit
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$', sort: term_abs }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$', sort: term_abs }
     - text_writer:     { ext: ven, sep: "\n" }
     # Erstelle Datei mit Endung .ver für erkannte Indexterme mit relativer Häufigkeit
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$', sort: term_rel }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$', sort: term_rel }
     - text_writer:     { ext: ver, sep: "\n" }
     # Erstelle Datei mit Endung .vef für erkannte Indexterme mit TFIDF-Gewichtung
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$', sort: term_rel, tfidf: true }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$', sort: term_rel, tfidf: true }
     - text_writer:     { ext: vef, sep: "\n" }
     # Erstelle Datei mit Endung .vet für erkannte Indexterme mit Positionen
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$', sort: false, pos: true }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$', sort: false, pos: true }
     - text_writer:     { ext: vet, sep: "\n" }
     # Erstelle Datei mit Endung .mul für erkannte Mehrwortgruppen
-    - vector_filter:   { in: syn, lexicals: m }
+    - vector_filter:   { in: res, lexicals: m }
     - text_writer:     { ext: mul, sep: "\n" }
     # Erstelle Datei mit Endung .seq für erkannte Wortsequenzen
-    - vector_filter:   { in: syn, lexicals: q, sort: term_abs }
+    - vector_filter:   { in: res, lexicals: q, sort: term_abs }
     - text_writer:     { ext: seq, sep: "\n" }
     # Erstelle Datei mit Endung .syn für erkannte Synonyme
-    - vector_filter:   { in: syn, lexicals: y, sort: term_abs }
+    - vector_filter:   { in: res, lexicals: y, sort: term_abs }
     - text_writer:     { ext: syn, sep: "\n" }
+    # Erstelle Datei mit Endung .hal für HAL-Indexterme
+#   - hal_filter:      { in: res, lexicals: '^[ksavem]$' }
+#   - text_writer:     { ext: hal, sep: "\n" }
+    # Erstelle Datei mit Endung .lsi für LSI-Indexterme
+#   - lsi_filter:      { in: res, lexicals: '^[ksavem]$' }
+#   - text_writer:     { ext: lsi, sep: "\n" }

data/config/lir.cfg CHANGED

@@ -35,9 +35,6 @@ meeting:
     # Schreibweisen variieren und erneut suchen
 #   - variator:        { source: sys-dic }
-    # Worttrennungen aufheben
-#   - dehyphenizer:    { source: sys-dic }
     # Wortstämme für nicht erkannte Wörter einfügen
 #   - stemmer:         { }
@@ -51,7 +48,7 @@ meeting:
     - sequencer:       { stopper: 'PUNC,OTHR' }
     # Relationierungen einfügen
-    - synonymer:       { skip: '?,t', source: sys-syn, out: syn }
+    - synonymer:       { skip: '?,t', source: sys-syn, out: res }
     ########################################
@@ -65,45 +62,57 @@ meeting:
     #
     # Erstelle Datei mit Endung .log für Datenstrom
-    - debug_filter:    { in: syn, prompt: 'lex:) ' }
-    - text_writer:     { ext: log, sep: "\n" }
+    - debug_filter:    { in: res, prompt: 'lex:) ' }
+    - text_writer:     { ext: log, sep: "\n", lir-format: ~ }
+    # Erstelle Datei mit Endung .als für Datenstrom
+    - analysis_filter: { in: res }
+    - text_writer:     { ext: als, sep: "\n", lir-format: ~ }
     # Erstelle Datei mit Endung .non für nicht erkannte Wörter
-    - noneword_filter: { in: syn }
+    - vector_filter:   { in: res, lexicals: '\?' }
     - text_writer:     { ext: non, sep: '|' }
     # Erstelle Datei mit Endung .ste für Wortstämme
-    - vector_filter:   { in: syn, lexicals: z }
+    - vector_filter:   { in: res, lexicals: z }
     - text_writer:     { ext: ste, sep: '|' }
     # Erstelle Datei mit Endung .vec für erkannte Indexterme
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$' }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$' }
     - text_writer:     { ext: vec, sep: '|' }
     # Erstelle Datei mit Endung .ven für erkannte Indexterme mit absoluter Häufigkeit
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$', sort: term_abs }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$', sort: term_abs }
     - text_writer:     { ext: ven, sep: '|' }
     # Erstelle Datei mit Endung .ver für erkannte Indexterme mit relativer Häufigkeit
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$', sort: term_rel }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$', sort: term_rel }
     - text_writer:     { ext: ver, sep: '|' }
     # Erstelle Datei mit Endung .vef für erkannte Indexterme mit TFIDF-Gewichtung
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$', sort: term_rel, tfidf: true }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$', sort: term_rel, tfidf: true }
     - text_writer:     { ext: vef, sep: '|' }
     # Erstelle Datei mit Endung .vet für erkannte Indexterme mit Positionen
-    - vector_filter:   { in: syn, lexicals: '^[ksavem]$', sort: false, pos: true }
+    - vector_filter:   { in: res, lexicals: '^[ksavem]$', sort: false, pos: true }
     - text_writer:     { ext: vet, sep: '|' }
     # Erstelle Datei mit Endung .mul für erkannte Mehrwortgruppen
-    - vector_filter:   { in: syn, lexicals: m }
+    - vector_filter:   { in: res, lexicals: m }
     - text_writer:     { ext: mul, sep: '|' }
     # Erstelle Datei mit Endung .seq für erkannte Wortsequenzen
-    - vector_filter:   { in: syn, lexicals: q, sort: term_abs }
+    - vector_filter:   { in: res, lexicals: q, sort: term_abs }
     - text_writer:     { ext: seq, sep: '|' }
     # Erstelle Datei mit Endung .syn für erkannte Synonyme
-    - vector_filter:   { in: syn, lexicals: y, sort: term_abs }
+    - vector_filter:   { in: res, lexicals: y, sort: term_abs }
     - text_writer:     { ext: syn, sep: '|' }
+    # Erstelle Datei mit Endung .hal für HAL-Indexterme
+#   - hal_filter:      { in: res, lexicals: '^[ksavem]$' }
+#   - text_writer:     { ext: hal, sep: '|' }
+    # Erstelle Datei mit Endung .lsi für LSI-Indexterme
+#   - lsi_filter:      { in: res, lexicals: '^[ksavem]$' }
+#   - text_writer:     { ext: lsi, sep: '|' }