lingo 1.8.7 → 1.9.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1130ec52467314ba95af17e635888f60046c5b42
4
- data.tar.gz: 6a882ea4f88b1fbcf1a66b1d5fafe8fa05458b89
3
+ metadata.gz: d863ede7d1dda418b36230165f9f635a8977d73a
4
+ data.tar.gz: 9d7caed0a6d27898605b97429b48965617c96960
5
5
  SHA512:
6
- metadata.gz: 3e0b384a822c28961c99d411bbdd399d9a49b29fdc40688d3534a9897cef984a7447cdd21b05fda45dda9a0fad25d99f60d524064803edb218ff95bbf9fc4fe6
7
- data.tar.gz: 5cfa5c7f235113238d0e8568e9948f27a7ec864af63ac696a5efdfceed6eb678724f3ca3df24d3ac296068aceb1bf20d1369f439edf7f670265550ace1ce1cd0
6
+ metadata.gz: 62044e35ee507ff911b0aa7b656d6247f7f34a168d23e0da2d7f64dd8010e73b0258cbdda3ea1d61a010c3ca1204c442a02e04866bac8afb92a07d9309ed5c9c
7
+ data.tar.gz: eb4df8fa604f9f36cbb8094041519ca9198d5a59b624ef70afb937fc9120b8afa2b1d48f95732a2ffb0a253bf2aba5dc16bdb61543f9217926b94b5f80f3e1f8
data/ChangeLog CHANGED
@@ -2,6 +2,17 @@
2
2
 
3
3
  = Revision history for Lingo
4
4
 
5
+ == 1.9.0 [unreleased]
6
+
7
+ * Removed support for deprecated options and attendee names (+old+ → +new+):
8
+ * Lingo::Language::Grammar : +compositum+ → +compound+
9
+ * Lingo::Attendee::TextReader : +lir-record-pattern+ → +records+
10
+ * Lingo::Config : +multiworder+ → +multi_worder+, +objectfilter+ →
11
+ +object_filter+, +textreader+ → +text_reader+, +textwriter+ →
12
+ +text_writer+, +vectorfilter+ → +vector_filter+, +wordsearcher+ →
13
+ +word_searcher+
14
+ * Fixed errors with XML input (issue #15 by Thomas Berger).
15
+
5
16
  == 1.8.7 [2015-08-07]
6
17
 
7
18
  * Added Lingo::Attendee::LsiFilter to correlate semantically related terms
data/README CHANGED
@@ -34,7 +34,7 @@
34
34
 
35
35
  == VERSION
36
36
 
37
- This documentation refers to Lingo version 1.8.7
37
+ This documentation refers to Lingo version 1.9.0
38
38
 
39
39
 
40
40
  == DESCRIPTION
@@ -555,7 +555,7 @@ Lingo is based on a collective development by Klaus Lepsky and John Vorhauer.
555
555
  == LICENSE AND COPYRIGHT
556
556
 
557
557
  Copyright (C) 2005-2007 John Vorhauer
558
- Copyright (C) 2007-2015 John Vorhauer, Jens Wille
558
+ Copyright (C) 2007-2016 John Vorhauer, Jens Wille
559
559
 
560
560
  Lingo is free software: you can redistribute it and/or modify it under the
561
561
  terms of the GNU Affero General Public License as published by the Free
data/Rakefile CHANGED
@@ -36,16 +36,18 @@ The main functions of Lingo are:
36
36
  ].to_a,
37
37
 
38
38
  dependencies: {
39
- 'cyclops' => '~> 0.1',
40
- 'nuggets' => '~> 1.3',
39
+ 'cyclops' => '~> 0.2',
40
+ 'nuggets' => '~> 1.4',
41
41
  'rubyzip' => '~> 1.1',
42
- 'sinatra-bells' => '~> 0.0',
42
+ 'sinatra-bells' => '~> 0.3',
43
43
  'unicode' => '~> 0.4'
44
44
  },
45
45
 
46
46
  development_dependencies: {
47
- 'diff-lcs' => '~> 1.2',
48
- 'open4' => '~> 1.3'
47
+ 'diff-lcs' => '~> 1.2',
48
+ 'nokogiri' => '~> 1.6',
49
+ 'open4' => '~> 1.3',
50
+ 'pdf-reader' => '~> 1.3'
49
51
  },
50
52
 
51
53
  required_ruby_version: '>= 1.9.3'
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -318,6 +318,7 @@ require_relative 'lingo/call'
318
318
  require_relative 'lingo/error'
319
319
  require_relative 'lingo/debug'
320
320
  require_relative 'lingo/config'
321
+ require_relative 'lingo/filter'
321
322
  require_relative 'lingo/progress'
322
323
  require_relative 'lingo/database'
323
324
  require_relative 'lingo/language'
@@ -236,10 +236,6 @@ class Lingo
236
236
 
237
237
  end
238
238
 
239
- # For backwards compatibility.
240
- Multiworder = MultiWorder
241
- Multi_worder = MultiWorder
242
-
243
239
  end
244
240
 
245
241
  end
@@ -85,10 +85,6 @@ class Lingo
85
85
 
86
86
  end
87
87
 
88
- # For backwards compatibility.
89
- Objectfilter = ObjectFilter
90
- Object_filter = ObjectFilter
91
-
92
88
  end
93
89
 
94
90
  end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -26,15 +26,12 @@
26
26
 
27
27
  require 'find'
28
28
 
29
- %w[filemagic mime/types nokogiri nuggets/file/which pdf-reader].each { |lib|
30
- begin
31
- require lib
32
- rescue LoadError
33
- end
34
- }
35
-
36
29
  class Lingo
37
30
 
31
+ require_optional 'filemagic'
32
+ require_optional 'mime/types'
33
+ require_optional 'nuggets/file/which'
34
+
38
35
  class Attendee
39
36
 
40
37
  #--
@@ -118,11 +115,7 @@ class Lingo
118
115
  @filter = get_key('filter', false)
119
116
  @progress = get_key('progress', false)
120
117
 
121
- if has_key?('lir-record-pattern')
122
- lingo.config.deprecate('lir-record-pattern', :records, self)
123
- end
124
-
125
- @lir = get_re('records', get_key('lir-record-pattern', nil), %r{^\[(\d+)\.\]}) # DEPRECATE lir-record-pattern
118
+ @lir = get_re('records', nil, %r{^\[(\d+)\.\]})
126
119
  @cut = get_re('fields', !!@lir, %r{^.+?:\s*})
127
120
  @skip = get_re('skip', nil)
128
121
  end
@@ -165,8 +158,8 @@ class Lingo
165
158
  def filter(io, path, progress)
166
159
  case @filter == true ? file_type(io, path) : @filter.to_s
167
160
  when 'pdftotext' then filter_pdftotext(io, path, progress)
168
- when /html/i then filter_html(io)
169
- when /xml/i then filter_html(io, true)
161
+ when /html/i then filter_xml(io, :HTML)
162
+ when /xml/i then filter_xml(io)
170
163
  when /pdf/i then filter_pdf(io)
171
164
  else io
172
165
  end
@@ -185,13 +178,13 @@ class Lingo
185
178
  end
186
179
 
187
180
  def filter_pdf(io)
188
- Object.const_defined?(:PDF) && PDF.const_defined?(:Reader) ? text_enum(
189
- PDF::Reader.new(io).pages) : cancel_filter(:PDF, 'pdf-reader')
181
+ cancel_filter(:PDF, 'pdf-reader') unless Object.const_defined?(:PDF)
182
+ Filter::PDF.new(io, @encoding)
190
183
  end
191
184
 
192
- def filter_html(io, xml = false, type = xml ? :XML : :HTML)
193
- Object.const_defined?(:Nokogiri) ? text_enum(Nokogiri.send(type,
194
- io, nil, @encoding).children) : cancel_filter(type, :nokogiri)
185
+ def filter_xml(io, type = :XML)
186
+ cancel_filter(type, :nokogiri) unless Object.const_defined?(:Nokogiri)
187
+ Filter.const_get(type).new(io, @encoding)
195
188
  end
196
189
 
197
190
  def file_type(io, path)
@@ -239,10 +232,6 @@ class Lingo
239
232
  tempfiles.each(&:unlink)
240
233
  end
241
234
 
242
- def text_enum(collection)
243
- Enumerator.new { |y| collection.each { |x| y << x.text } }
244
- end
245
-
246
235
  def get_files
247
236
  args = [get_key('glob', '*.txt'), get_key('recursive', false)]
248
237
 
@@ -263,10 +252,6 @@ class Lingo
263
252
 
264
253
  end
265
254
 
266
- # For backwards compatibility.
267
- Textreader = TextReader
268
- Text_reader = TextReader
269
-
270
255
  end
271
256
 
272
257
  end
@@ -145,10 +145,6 @@ class Lingo
145
145
 
146
146
  end
147
147
 
148
- # For backwards compatibility.
149
- Textwriter = TextWriter
150
- Text_writer = TextWriter
151
-
152
148
  end
153
149
 
154
150
  end
@@ -240,10 +240,6 @@ class Lingo
240
240
 
241
241
  end
242
242
 
243
- # For backwards compatibility.
244
- Vectorfilter = VectorFilter
245
- Vector_filter = VectorFilter
246
-
247
243
  end
248
244
 
249
245
  end
@@ -83,10 +83,6 @@ class Lingo
83
83
 
84
84
  end
85
85
 
86
- # For backwards compatibility.
87
- Wordsearcher = WordSearcher
88
- Word_searcher = WordSearcher
89
-
90
86
  end
91
87
 
92
88
  end
@@ -42,12 +42,7 @@ class Lingo
42
42
  load_config('language', :lang)
43
43
  load_config('config')
44
44
 
45
- if Array(self['meeting/attendees']).flat_map(&:keys).include?('textreader')
46
- deprecate(:textreader, :text_reader)
47
- end
48
-
49
- if r = get('meeting/attendees', 'text_reader') ||
50
- get('meeting/attendees', 'textreader') # DEPRECATE textreader
45
+ if r = get('meeting/attendees', 'text_reader')
51
46
  f = @cli.files
52
47
 
53
48
  if i = r['files']
@@ -110,11 +105,11 @@ class Lingo
110
105
  @cli.send(:quit, *args)
111
106
  end
112
107
 
113
- def deprecate(old, new, obj = self, what = :option)
108
+ def deprecate(old, new, obj = self, what = :option, ver = Version.next_minor)
114
109
  unless @deprecated[[source = obj.class.name.sub(/\ALingo::/, ''), old]]
115
110
  warn(
116
111
  "DEPRECATION WARNING: #{source} #{what} `#{old}' is deprecated " <<
117
- "and will be removed in Lingo 1.9. Please use `#{new}' instead."
112
+ "and will be removed in Lingo #{ver}. Please use `#{new}' instead."
118
113
  )
119
114
  end
120
115
  end
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ class Filter
30
+
31
+ def initialize(io, encoding = ENC)
32
+ @io, @encoding = io, encoding
33
+ end
34
+
35
+ def each
36
+ raise NotImplementedError, 'must be implemented by subclass'
37
+ end
38
+
39
+ def close
40
+ @io.close
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+
47
+ require_relative 'filter/pdf'
48
+ require_relative 'filter/xml'
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ require_optional 'pdf-reader'
30
+
31
+ class Filter
32
+
33
+ class PDF < self
34
+
35
+ def initialize(*args)
36
+ super
37
+ @obj = ::PDF::Reader.new(@io)
38
+ end
39
+
40
+ def each(&block)
41
+ @obj.pages.each { |x| x.text.each_line(&block) }
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ require_optional 'nokogiri'
30
+
31
+ class Filter
32
+
33
+ class XML < self
34
+
35
+ TYPE = :XML
36
+
37
+ def initialize(*args)
38
+ super
39
+ @obj = Nokogiri.send(self.class::TYPE, @io, nil, @encoding.to_s)
40
+ end
41
+
42
+ def each(&block)
43
+ @obj.root.element_children.each { |n| n.content.each_line(&block) }
44
+ end
45
+
46
+ end
47
+
48
+ class HTML < XML
49
+
50
+ TYPE = :HTML
51
+
52
+ end
53
+
54
+ end
55
+
56
+ end
@@ -52,12 +52,7 @@ class Lingo
52
52
  def initialize(config, lingo)
53
53
  @dic, @suggestions = Dictionary.new(config, lingo), []
54
54
 
55
- if lingo.dictionary_config.key?('compositum')
56
- lingo.config.deprecate(:compositum, :compound, self)
57
- end
58
-
59
- cfg = lingo.dictionary_config['compound'] ||
60
- lingo.dictionary_config['compositum'] # DEPRECATE compositum
55
+ cfg = lingo.dictionary_config['compound']
61
56
 
62
57
  DEFAULTS.each { |k, v| instance_variable_set(
63
58
  "@#{k}", cfg.fetch(k.to_s.tr('_', '-'), v).to_i) }
@@ -114,7 +114,7 @@ class Lingo
114
114
  end
115
115
 
116
116
  def each_lex(wc_re = //)
117
- return enum_for(:each_lex, wc_re) unless block_given?
117
+ return enum_for(__method__, wc_re) unless block_given?
118
118
 
119
119
  wc_re = Regexp.new(wc_re) unless wc_re.is_a?(Regexp)
120
120