lingo 1.8.2 → 1.8.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/ChangeLog +33 -0
  2. data/README +6 -5
  3. data/Rakefile +6 -4
  4. data/{lib/lingo/cachable.rb → bin/lingosrv} +30 -58
  5. data/bin/lingoweb +30 -0
  6. data/de.lang +2 -13
  7. data/en/lingo-irr.txt +266 -0
  8. data/en/lingo-wdn.txt +37319 -0
  9. data/en.lang +2 -15
  10. data/lib/lingo/app.rb +82 -0
  11. data/lib/lingo/attendee/abbreviator.rb +22 -26
  12. data/lib/lingo/attendee/debugger.rb +8 -4
  13. data/lib/lingo/attendee/decomposer.rb +0 -1
  14. data/lib/lingo/attendee/dehyphenizer.rb +2 -2
  15. data/lib/lingo/attendee/multi_worder.rb +20 -13
  16. data/lib/lingo/attendee/noneword_filter.rb +2 -7
  17. data/lib/lingo/attendee/sequencer.rb +43 -19
  18. data/lib/lingo/attendee/stemmer/porter.rb +2 -2
  19. data/lib/lingo/attendee/stemmer.rb +1 -1
  20. data/lib/lingo/attendee/synonymer.rb +1 -9
  21. data/lib/lingo/attendee/text_reader.rb +42 -29
  22. data/lib/lingo/attendee/text_writer.rb +3 -6
  23. data/lib/lingo/attendee/tokenizer.rb +87 -69
  24. data/lib/lingo/attendee/variator.rb +7 -5
  25. data/lib/lingo/attendee/vector_filter.rb +11 -11
  26. data/lib/lingo/attendee/word_searcher.rb +1 -9
  27. data/lib/lingo/attendee.rb +24 -105
  28. data/lib/lingo/buffered_attendee.rb +2 -9
  29. data/lib/lingo/call.rb +18 -13
  30. data/lib/lingo/cli.rb +5 -10
  31. data/lib/lingo/config.rb +40 -7
  32. data/lib/lingo/ctl.rb +69 -57
  33. data/lib/lingo/database/hash_store.rb +9 -4
  34. data/lib/lingo/database/sdbm_store.rb +4 -7
  35. data/lib/lingo/database/source/multi_key.rb +1 -1
  36. data/lib/lingo/database/source/multi_value.rb +1 -1
  37. data/lib/lingo/database/source.rb +2 -20
  38. data/lib/lingo/database.rb +30 -19
  39. data/lib/lingo/debug.rb +79 -0
  40. data/lib/lingo/{core_ext.rb → language/char.rb} +43 -42
  41. data/lib/lingo/language/dictionary.rb +38 -46
  42. data/lib/lingo/language/grammar.rb +40 -57
  43. data/lib/lingo/language/lexical.rb +4 -7
  44. data/lib/lingo/language/lexical_hash.rb +17 -35
  45. data/lib/lingo/language/token.rb +4 -0
  46. data/lib/lingo/language/word.rb +7 -8
  47. data/lib/lingo/language/word_form.rb +4 -4
  48. data/lib/lingo/language.rb +2 -1
  49. data/lib/lingo/srv/config.ru +4 -0
  50. data/lib/lingo/srv/lingosrv.cfg +14 -0
  51. data/lib/lingo/{reportable.rb → srv.rb} +59 -61
  52. data/lib/lingo/version.rb +1 -1
  53. data/lib/lingo/web/config.ru +4 -0
  54. data/lib/lingo/web/lingoweb.cfg +14 -0
  55. data/lib/lingo/web/public/lingo.png +0 -0
  56. data/lib/lingo/web/public/lingoweb.css +74 -0
  57. data/lib/lingo/web/views/index.erb +92 -0
  58. data/lib/lingo/web.rb +94 -0
  59. data/lib/lingo.rb +27 -29
  60. data/lingo.cfg +1 -1
  61. data/lir.cfg +24 -0
  62. data/ru/lingo-dic.txt +22342 -0
  63. data/ru/lingo-mul.txt +5151 -0
  64. data/ru/lingo-syn.txt +0 -0
  65. data/ru.lang +99 -0
  66. data/test/attendee/ts_sequencer.rb +2 -2
  67. data/test/attendee/ts_text_reader.rb +36 -2
  68. data/test/attendee/ts_text_writer.rb +6 -6
  69. data/test/lir.vec +3 -3
  70. data/test/test_helper.rb +104 -102
  71. data/test/ts_database.rb +1 -1
  72. data/test/ts_language.rb +55 -96
  73. data/txt/artikel-ru.txt +45 -0
  74. data/txt/lir.txt +1 -3
  75. metadata +143 -83
  76. data/TODO +0 -23
data/en.lang CHANGED
@@ -43,18 +43,16 @@
43
43
  # lingo language definition
44
44
  ---
45
45
  language:
46
-
47
46
  name: 'Englisch'
48
47
 
49
48
  dictionary:
50
-
51
49
  databases:
52
-
53
50
  # Systemwörterbücher
54
51
  sys-dic: { name: en/lingo-dic.txt, txt-format: WordClass, separator: '=' }
55
52
  sys-syn: { name: en/lingo-syn.txt, txt-format: KeyValue, separator: '=', def-wc: y }
56
53
  sys-mul: { name: en/lingo-mul.txt, txt-format: SingleWord, use-lex: 'sys-dic', def-wc: m }
57
-
54
+ sys-irr: { name: en/lingo-irr.txt, txt-format: WordClass, separator: '=' }
55
+ sys-wdn: { name: en/lingo-wdn.txt, txt-format: WordClass, separator: '=' }
58
56
  # Benutzerwörterbücher
59
57
  usr-dic: { name: en/user-dic.txt, txt-format: WordClass, separator: '=' }
60
58
 
@@ -77,17 +75,6 @@ language:
77
75
  - [f, ""]
78
76
 
79
77
  attendees:
80
- tokenizer:
81
- regulars:
82
- - _char_: '_baslat_|_lat1sp_|_latexa_|_latexb_|_ipaext_'
83
- - NUMS: '[+-]?(\d{4,}|\d{1,3}(\.\d{3,3})*)(\.|(,\d+)?%?)'
84
- - URLS: '((mailto:|(news|http|https|ftp|ftps)://)\S+|^(www(\.\S+)+)|[^\s.]+([\._]\S+)+@\S+(\.\S+)+)'
85
- - ABRV: '(((_char_)+\.)+)(_char_)+'
86
- - WORD: '(_char_|_digit_|\-)+'
87
- - PUNC: '([!,\.:;?]|\xc2\xa1|\xc2\xbf)'
88
- - OTHR: '([\"#$%&\x27()*\+\-/<=>@\[\\\]^_{|}~]|\xc2\xa2|\xc2\xa3|\xc2\xa4|\xc2\xa5|\xc2\xa6|\xc2\xa7|\xc2\xa8|\xc2\xa9|\xc2\xaa|\xc2\xab|\xc2\xac|\xc2\xae|\xc2\xaf|\xc2\xb0|\xc2\xb1|\xc2\xb2|\xc2\xb3|\xc2\xb4|\xc2\xb5|\xc2\xb6|\xc2\xb7|\xc2\xb8|\xc2\xb9|\xc2\xba|\xc2\xbb|\xc2\xbc|\xc2\xbd|\xc2\xbe|\xc3\x97|\xc3\xb7)'
89
- - HELP: '[^ ]*'
90
-
91
78
  variator:
92
79
  variations:
93
80
  - [ ieh, sch ]
data/lib/lingo/app.rb ADDED
@@ -0,0 +1,82 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'optparse'
28
+ require 'shellwords'
29
+ require 'sinatra/base'
30
+
31
+ class Lingo
32
+
33
+ class App < Sinatra::Base
34
+
35
+ class << self
36
+
37
+ def init_app(file, *args, &block)
38
+ set :root, File.chomp_ext(file)
39
+ parse_options(*args, &block)
40
+ end
41
+
42
+ def parse_options(lingo_options = false)
43
+ argv, banner = [], "Usage: #{$0} [-h|--help] [sinatra-options]"
44
+ while arg = ARGV.shift and arg != '--'; argv << arg; end
45
+
46
+ if lingo_options || block_given?
47
+ banner << ' [-- lingo-options]'
48
+
49
+ opts = ENV["LINGO_#{name.split('::').last.upcase}_OPTS"]
50
+ ARGV.unshift(*Shellwords.shellsplit(opts)) if opts
51
+
52
+ ARGV.unshift(*lingo_options) if lingo_options.is_a?(Array)
53
+ end
54
+
55
+ OptionParser.new(banner, 16) { |o|
56
+ o.on('-p port', 'set the port (default is 4567)') { |v| set :port, Integer(v) }
57
+ o.on('-o addr', 'set the host (default is 0.0.0.0)') { |v| set :bind, v }
58
+ o.on('-e env', 'set the environment (default is development)') { |v| set :environment, v.to_sym }
59
+ o.on('-s server', 'specify rack server/handler (default is thin)') { |v| set :server, v }
60
+ o.on('-x', 'turn on the mutex lock (default is off)') { set :lock, true }
61
+ }.parse!(argv)
62
+
63
+ ARGV.unshift(*yield) if block_given?
64
+ end
65
+
66
+ def rackup(name)
67
+ file = File.join(File.dirname(__FILE__), name, 'config.ru')
68
+ file if File.readable?(file)
69
+ end
70
+
71
+ end
72
+
73
+ def to_json(q, r)
74
+ q, r = 'q', 'Required parameter -- Input string' unless q
75
+
76
+ content_type :json
77
+ { q => r }.to_json
78
+ end
79
+
80
+ end
81
+
82
+ end
@@ -68,7 +68,7 @@ class Lingo
68
68
  # out> *EOL('test.txt')
69
69
  # out> *EOF('test.txt')
70
70
 
71
- class Abbreviator < BufferedAttendee
71
+ class Abbreviator < self
72
72
 
73
73
  protected
74
74
 
@@ -77,36 +77,32 @@ class Lingo
77
77
  end
78
78
 
79
79
  def control(cmd, param)
80
- report_on(cmd, @dic)
81
- process_buffer
80
+ send_abbr(nil) if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
82
81
  end
83
82
 
84
- private
85
-
86
- def process_buffer?
87
- form_at(-1, Token) == CHAR_PUNCT
88
- end
89
-
90
- def process_buffer
91
- if @buffer.size < 2
92
- forward_buffer
93
- return
94
- end
95
-
96
- if form = form_at(-2, Token)
97
- inc('Anzahl gesuchter Abkürzungen')
98
-
99
- if (abbr = find_word(form)).identified?
100
- inc('Anzahl gefundener Abkürzungen')
101
-
102
- abbr.form += CHAR_PUNCT
103
-
104
- @buffer[-2] = abbr
105
- @buffer.delete_at(-1)
83
+ def process(obj)
84
+ if obj.is_a?(Token)
85
+ if obj.form == CHAR_PUNCT
86
+ if @abbr && (abbr = find_word(form = @abbr.form)).identified?
87
+ form << CHAR_PUNCT unless form.end_with?(CHAR_PUNCT)
88
+ send_abbr(abbr)
89
+ else
90
+ send_abbr(@abbr)
91
+ forward(obj)
92
+ end
93
+ else
94
+ send_abbr(@abbr, obj)
106
95
  end
96
+ else
97
+ send_abbr(obj)
107
98
  end
99
+ end
100
+
101
+ private
108
102
 
109
- forward_buffer
103
+ def send_abbr(abbr, obj = nil)
104
+ @abbr = obj
105
+ forward(abbr) if abbr
110
106
  end
111
107
 
112
108
  end
@@ -96,16 +96,20 @@ class Lingo
96
96
  end
97
97
 
98
98
  def control(cmd, param)
99
- if cmd != STR_CMD_STATUS && eval(@cmd_eval)
100
- warn "#{@prompt} #{AgendaItem.new(cmd, param).inspect}"
101
- end
99
+ debug(AgendaItem.new(cmd, param), @cmd_eval)
102
100
  end
103
101
 
104
102
  def process(obj)
105
- warn "#{@prompt} #{obj.inspect}" if eval(@obj_eval)
103
+ debug(obj, @obj_eval)
106
104
  forward(obj)
107
105
  end
108
106
 
107
+ private
108
+
109
+ def debug(obj, cond)
110
+ warn "#{@prompt} #{obj.inspect}" if eval(cond)
111
+ end
112
+
109
113
  end
110
114
 
111
115
  end
@@ -80,7 +80,6 @@ class Lingo
80
80
  end
81
81
 
82
82
  def control(cmd, param)
83
- report_on(cmd, @gra)
84
83
  end
85
84
 
86
85
  def process(obj)
@@ -89,7 +89,7 @@ class Lingo
89
89
  if ab.all? { |i| i.is_a?(Word) } && a.form[-1, 1] == h && !(
90
90
  (c = b.get_class(/./).first) && @skip.include?(c.attr)
91
91
  )
92
- a, b = ab.map!(&:form)
92
+ a, b = ab.map! { |i| i.form }
93
93
 
94
94
  word = dehyphenize(a.chomp(h) + b)
95
95
  word = dehyphenize(a + b) unless dehyphenized?(word)
@@ -106,7 +106,7 @@ class Lingo
106
106
  private
107
107
 
108
108
  def dehyphenize(form)
109
- find_word(form, &:identified?)
109
+ find_word(form) { |i| i.identified? }
110
110
  end
111
111
 
112
112
  def dehyphenized?(word)
@@ -113,7 +113,7 @@ class Lingo
113
113
  end
114
114
 
115
115
  def control(cmd, param)
116
- control_multi(cmd, @mul_dic)
116
+ control_multi(cmd)
117
117
  end
118
118
 
119
119
  def process_buffer
@@ -177,29 +177,36 @@ class Lingo
177
177
  def check_multiword_key(len)
178
178
  return [] if valid_tokens_in_buffer < len
179
179
 
180
- seq = @buffer.map { |obj|
180
+ seq = []
181
+
182
+ @buffer.each { |obj|
181
183
  next [obj] unless obj.is_a?(WordForm)
182
184
  next if (form = obj.form) == CHAR_PUNCT
183
185
 
184
186
  w = find_word(form, @lex_dic, @lex_gra)
185
187
  l = w.lexicals
186
188
 
187
- (w.attr == WA_COMPOUND ? [l.first] : l.empty? ? [w] : l.dup).tap { |i|
188
- i.concat(@syn_dic.find_synonyms(w)) if @syn_dic
189
- i.map! { |j| j.form.downcase }.uniq!
190
- }
191
- }
189
+ i = w.attr == WA_COMPOUND ? [l.first] : l.empty? ? [w] : l.dup
192
190
 
193
- seq.compact!
194
- seq.slice!(len..-1)
191
+ @syn_dic.find_synonyms(w, i) if @syn_dic
192
+ i.map! { |j| Unicode.downcase(j.form) }.uniq!
193
+
194
+ seq << i
195
+
196
+ break unless seq.length < len
197
+ }
195
198
 
196
199
  if @combine
197
- [].tap { |mul| seq.shift.product(*seq) { |key|
198
- mul.concat(@mul_dic.select(key.join(' ')))
200
+ mul = []
201
+
202
+ seq.shift.product(*seq) { |key|
203
+ @mul_dic.select(key.join(' '), mul)
199
204
  break unless @all_keys || mul.empty?
200
- } && mul.uniq! }
205
+ } && mul.uniq!
206
+
207
+ mul
201
208
  else
202
- @mul_dic.select(seq.map!(&:first).join(' '))
209
+ @mul_dic.select(seq.map! { |i,| i }.join(' '))
203
210
  end
204
211
  end
205
212
 
@@ -87,9 +87,7 @@ class Lingo
87
87
 
88
88
  def process(obj)
89
89
  if obj.is_a?(Word) && obj.unknown?
90
- inc('Anzahl nicht erkannter Wörter')
91
-
92
- non = obj.form.downcase
90
+ non = Unicode.downcase(obj.form)
93
91
  @sort ? @nonewords << non : forward(non)
94
92
  end
95
93
  end
@@ -97,11 +95,8 @@ class Lingo
97
95
  private
98
96
 
99
97
  def send_nonewords
100
- @nonewords.sort!
101
98
  @nonewords.uniq!
102
-
103
- add('Objekte gefiltert', @nonewords.size)
104
- @nonewords.each(&method(:forward)).clear
99
+ flush(@nonewords.sort!)
105
100
  end
106
101
 
107
102
  end
@@ -97,11 +97,15 @@ class Lingo
97
97
 
98
98
  def init
99
99
  @stopper = get_array('stopper', DEFAULT_SKIP, :upcase)
100
+ @classes = []
100
101
 
101
102
  @seq = get_key('sequences').map { |string, format|
102
- [string = string.downcase, string.split(//), format]
103
+ @classes.concat(classes = string.downcase!.chars.to_a)
104
+ [string, classes, format]
103
105
  }
104
106
 
107
+ @classes.uniq!
108
+
105
109
  raise MissingConfigError.new(:sequences) if @seq.empty?
106
110
  end
107
111
 
@@ -115,42 +119,62 @@ class Lingo
115
119
  end
116
120
 
117
121
  def process_buffer
118
- insert_sequences if @buffer.size > 1
119
- forward_buffer
122
+ matches = []
123
+
124
+ if @buffer.size > 1
125
+ buf, map, seq, cls, unk = [], [], @seq, @classes, %w[#]
126
+
127
+ @buffer.each { |obj|
128
+ att = obj.is_a?(Word) && !obj.unknown? ? obj.attrs(false) : unk
129
+
130
+ (att &= cls).empty? ? find_seq(buf, map, seq, matches) : begin
131
+ buf << obj
132
+ map << att
133
+ end
134
+ }
135
+
136
+ find_seq(buf, map, seq, matches)
137
+ end
138
+
139
+ flush(@buffer.concat(matches))
120
140
  end
121
141
 
122
142
  private
123
143
 
124
- def insert_sequences
125
- matches, buf, seq = Hash.new { |h, k| h[k] = [] }, @buffer, @seq
144
+ def find_seq(buf, map, seq, matches)
145
+ return if buf.empty?
126
146
 
127
- map = buf.map { |obj|
128
- obj.is_a?(Word) && !obj.unknown? ? obj.attrs(false) : ['#']
129
- }
147
+ match = Hash.new { |h, k| h[k] = [] }
148
+
149
+ map.replace(map.shift.product(*map))
150
+ map.map! { |i| i.join }
151
+ map.uniq!
130
152
 
131
- map.shift.product(*map).map!(&:join).tap(&:uniq!).each { |q|
153
+ map.each { |q|
132
154
  seq.each { |string, classes, format|
133
155
  while pos = q.index(string, pos || 0)
134
- inc('Anzahl erkannter Sequenzen')
135
-
136
- fmt = format.dup
156
+ form = format.dup
137
157
 
138
158
  classes.each_with_index { |wc, i|
139
159
  buf[pos + i].lexicals.find { |l|
140
- fmt.gsub!(i.succ.to_s, l.form) if l.attr == wc
160
+ form.gsub!(i.succ.to_s, l.form) if l.attr == wc
141
161
  } or break
142
162
  } or next
143
163
 
144
- matches[pos] << fmt
145
-
146
- pos += 1
164
+ match[pos += 1] << form
147
165
  end
148
166
  }
149
167
  }
150
168
 
151
- matches.sort.each { |pos, forms| forms.tap(&:uniq!).each { |form|
152
- @inserts << [pos, Word.new_lexical(form, WA_SEQUENCE, LA_SEQUENCE)]
153
- } }
169
+ match.each_value { |forms|
170
+ forms.uniq!
171
+ forms.each { |form|
172
+ matches << Word.new_lexical(form, WA_SEQUENCE, LA_SEQUENCE)
173
+ }
174
+ }
175
+
176
+ buf.clear
177
+ map.clear
154
178
  end
155
179
 
156
180
  end
@@ -290,7 +290,7 @@ class Lingo
290
290
  case rule
291
291
  when RULE_RE
292
292
  cond, repl, goto = $1, $3, $4
293
- stem = word[/(.+)#{$2.downcase}$/, 1] or next
293
+ stem = word[/(.+)#{Unicode.downcase($2)}$/, 1] or next
294
294
  when GOTO_RE
295
295
  goto = $1
296
296
  break
@@ -324,7 +324,7 @@ class Lingo
324
324
  found, word = true, begin
325
325
  stem[0...Integer(repl)]
326
326
  rescue ArgumentError
327
- stem << repl.downcase
327
+ stem << Unicode.downcase(repl)
328
328
  end
329
329
 
330
330
  break
@@ -41,7 +41,7 @@ class Lingo
41
41
 
42
42
  def process(obj)
43
43
  if obj.is_a?(Word) && obj.unknown?
44
- stem = stem(obj.form.downcase, @all)
44
+ stem = stem(Unicode.downcase(obj.form), @all)
45
45
  obj.add_lexicals([Lexical.new(stem, @wc)]) if stem
46
46
  end
47
47
 
@@ -78,19 +78,11 @@ class Lingo
78
78
  end
79
79
 
80
80
  def control(cmd, param)
81
- report_on(cmd, @dic)
82
81
  end
83
82
 
84
83
  def process(obj)
85
84
  if obj.is_a?(Word) && !@skip.include?(obj.attr)
86
- inc('Anzahl gesuchter Wörter')
87
-
88
- unless (syn = @dic.find_synonyms(obj)).empty?
89
- inc('Anzahl erweiteter Wörter')
90
-
91
- obj.add_lexicals(syn.tap(&:uniq!))
92
- add('Anzahl gefundener Synonyme', syn.size)
93
- end
85
+ obj.add_lexicals(@dic.find_synonyms(obj))
94
86
  end
95
87
 
96
88
  forward(obj)
@@ -115,15 +115,17 @@ class Lingo
115
115
  @filter = get_key('filter', false)
116
116
  @progress = get_key('progress', false)
117
117
 
118
- if @lir = get_key('records', get_key('lir-record-pattern', nil)) # DEPRECATE lir-record-pattern
119
- @lir = @lir == true ? %r{^\[(\d+)\.\]} : Regexp.new(@lir)
120
- end
118
+ @lingo.deprecate('lir-record-pattern', :records, self) if has_key?('lir-record-pattern')
119
+
120
+ @lir = get_re('records', get_key('lir-record-pattern', nil), %r{^\[(\d+)\.\]}) # DEPRECATE lir-record-pattern
121
+ @cut = get_re('fields', !!@lir, %r{^.+?:\s*})
122
+ @skip = get_re('skip', nil)
121
123
  end
122
124
 
123
125
  def control(cmd, param)
124
126
  if cmd == STR_CMD_TALK
125
127
  forward(STR_CMD_LIR, '') if @lir
126
- @files.each(&method(:spool))
128
+ @files.each { |i| spool(i) }
127
129
  end
128
130
  end
129
131
 
@@ -132,24 +134,22 @@ class Lingo
132
134
  # Gibt eine Datei zeilenweise in den Ausgabekanal
133
135
  def spool(path)
134
136
  unless stdin = stdin?(path)
135
- inc('Anzahl Dateien')
136
- add('Anzahl Bytes', size = File.size(path))
137
-
138
- size = nil unless @progress
137
+ size = File.size(path) if @progress
139
138
  end
140
139
 
141
140
  forward(STR_CMD_FILE, path)
142
141
 
143
142
  ShowProgress.new(self, size, path) { |progress|
144
143
  filter(path, stdin) { |line, pos|
145
- inc('Anzahl Zeilen')
146
144
  progress[pos]
147
145
 
148
146
  line.chomp! if @chomp
147
+ next if line =~ @skip
149
148
 
150
149
  if line =~ @lir
151
150
  forward(STR_CMD_RECORD, $1)
152
151
  else
152
+ line.sub!(@cut, '') if @cut
153
153
  forward(line) unless line.empty?
154
154
  end
155
155
  }
@@ -159,13 +159,13 @@ class Lingo
159
159
  end
160
160
 
161
161
  def filter(path, stdin = stdin?(path))
162
- io, block = stdin ? [
163
- @lingo.config.stdin.set_encoding(ENC),
164
- lambda { |line| yield line, 0 }
165
- ] : [
166
- File.open(path, 'rb', encoding: ENC),
162
+ io = stdin ?
163
+ @lingo.config.stdin.set_encoding(ENC) :
164
+ File.open(path, 'rb', encoding: ENC)
165
+
166
+ block = stdin || !@progress ?
167
+ lambda { |line| yield line, 0 } :
167
168
  lambda { |line| yield line, io.pos }
168
- ]
169
169
 
170
170
  case @filter == true ? file_type(path, io) : @filter.to_s
171
171
  when /html/i then io = filter_html(io)
@@ -195,13 +195,16 @@ class Lingo
195
195
 
196
196
  def file_type(path, io)
197
197
  if Object.const_defined?(:FileMagic) && io.respond_to?(:rewind)
198
- FileMagic.fm(:mime, simplified: true).buffer(io.read(256)).tap {
199
- io.rewind
200
- }
198
+ type = FileMagic.fm(:mime, simplified: true).buffer(io.read(256))
199
+ io.rewind
200
+ type
201
201
  elsif Object.const_defined?(:MIME) && MIME.const_defined?(:Types)
202
- MIME::Types.of(path).first.tap { |type| type ? type.content_type :
203
- warn('Filters not available. File type could not be determined.')
204
- }
202
+ if type = MIME::Types.of(path).first
203
+ type.content_type
204
+ else
205
+ warn 'Filters not available. File type could not be determined.'
206
+ nil
207
+ end
205
208
  else
206
209
  warn "Filters not available. Please install `ruby-filemagic' or `mime-types'."
207
210
  nil
@@ -220,17 +223,27 @@ class Lingo
220
223
  Array(get_key('files', '-')).each { |path|
221
224
  stdin?(path) ? @files << path : add_files(path, *args)
222
225
  }
223
-
224
- @files.map!(&File.method(:expand_path))
225
- @files.uniq!
226
226
  end
227
227
 
228
228
  def add_files(path, glob, recursive = false)
229
- Dir[path].sort!.each { |match|
230
- File.directory?(match) ? recursive ? Find.find(match) { |entry|
231
- @files << entry if File.file?(entry) && File.fnmatch?(glob, entry)
232
- } : add_files(File.join(match, glob), glob) : @files << match
233
- }.empty? and raise FileNotFoundError.new(path)
229
+ entries = Dir[path].sort!
230
+ raise FileNotFoundError.new(path) if entries.empty?
231
+
232
+ entries.each { |entry|
233
+ if File.directory?(entry)
234
+ if recursive
235
+ Find.find(entry) { |match|
236
+ if File.file?(match) && File.fnmatch?(glob, match)
237
+ @files << File.expand_path(match)
238
+ end
239
+ }
240
+ else
241
+ add_files(File.join(entry, glob), glob)
242
+ end
243
+ else
244
+ @files << File.expand_path(entry)
245
+ end
246
+ }
234
247
  end
235
248
 
236
249
  class PDFFilter
@@ -100,7 +100,6 @@ class Lingo
100
100
  if stdout?(@ext)
101
101
  @filename, @file = @ext, @lingo.config.stdout
102
102
  else
103
- inc('Anzahl Dateien')
104
103
  @file = File.open(@filename = File.set_ext(param, ".#{@ext}"), 'w')
105
104
  end
106
105
 
@@ -116,14 +115,12 @@ class Lingo
116
115
  @no_sep = true
117
116
 
118
117
  unless @lir
119
- inc('Anzahl Zeilen')
120
118
  @file.puts unless @no_puts
121
119
  end
122
120
  when STR_CMD_EOF
123
121
  flush_lir_buffer if @lir
124
122
 
125
123
  unless stdout?(@filename)
126
- add('Anzahl Bytes', @file.size)
127
124
  @file.close
128
125
  end
129
126
  end
@@ -142,9 +139,9 @@ class Lingo
142
139
 
143
140
  def flush_lir_buffer
144
141
  unless @lir_rec_no.empty? || @lir_rec_buf.empty?
145
- @file.print(*[@lir_rec_no, @lir_rec_buf.join(@sep), "\n"].tap { |buf|
146
- @sep =~ /\n/ ? buf.insert(1, "\n").unshift('*') : buf.insert(1, '*')
147
- })
142
+ buf = [@lir_rec_no, @lir_rec_buf.join(@sep), "\n"]
143
+ @sep =~ /\n/ ? buf.insert(1, "\n").unshift('*') : buf.insert(1, '*')
144
+ @file.print(*buf)
148
145
  end
149
146
 
150
147
  @lir_rec_no = ''