lingo 1.8.7 → 1.9.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1130ec52467314ba95af17e635888f60046c5b42
4
- data.tar.gz: 6a882ea4f88b1fbcf1a66b1d5fafe8fa05458b89
3
+ metadata.gz: d863ede7d1dda418b36230165f9f635a8977d73a
4
+ data.tar.gz: 9d7caed0a6d27898605b97429b48965617c96960
5
5
  SHA512:
6
- metadata.gz: 3e0b384a822c28961c99d411bbdd399d9a49b29fdc40688d3534a9897cef984a7447cdd21b05fda45dda9a0fad25d99f60d524064803edb218ff95bbf9fc4fe6
7
- data.tar.gz: 5cfa5c7f235113238d0e8568e9948f27a7ec864af63ac696a5efdfceed6eb678724f3ca3df24d3ac296068aceb1bf20d1369f439edf7f670265550ace1ce1cd0
6
+ metadata.gz: 62044e35ee507ff911b0aa7b656d6247f7f34a168d23e0da2d7f64dd8010e73b0258cbdda3ea1d61a010c3ca1204c442a02e04866bac8afb92a07d9309ed5c9c
7
+ data.tar.gz: eb4df8fa604f9f36cbb8094041519ca9198d5a59b624ef70afb937fc9120b8afa2b1d48f95732a2ffb0a253bf2aba5dc16bdb61543f9217926b94b5f80f3e1f8
data/ChangeLog CHANGED
@@ -2,6 +2,17 @@
2
2
 
3
3
  = Revision history for Lingo
4
4
 
5
+ == 1.9.0 [unreleased]
6
+
7
+ * Removed support for deprecated options and attendee names (+old+ → +new+):
8
+ * Lingo::Language::Grammar : +compositum+ → +compound+
9
+ * Lingo::Attendee::TextReader : +lir-record-pattern+ → +records+
10
+ * Lingo::Config : +multiworder+ → +multi_worder+, +objectfilter+ →
11
+ +object_filter+, +textreader+ → +text_reader+, +textwriter+ →
12
+ +text_writer+, +vectorfilter+ → +vector_filter+, +wordsearcher+ →
13
+ +word_searcher+
14
+ * Fixed errors with XML input (issue #15 by Thomas Berger).
15
+
5
16
  == 1.8.7 [2015-08-07]
6
17
 
7
18
  * Added Lingo::Attendee::LsiFilter to correlate semantically related terms
data/README CHANGED
@@ -34,7 +34,7 @@
34
34
 
35
35
  == VERSION
36
36
 
37
- This documentation refers to Lingo version 1.8.7
37
+ This documentation refers to Lingo version 1.9.0
38
38
 
39
39
 
40
40
  == DESCRIPTION
@@ -555,7 +555,7 @@ Lingo is based on a collective development by Klaus Lepsky and John Vorhauer.
555
555
  == LICENSE AND COPYRIGHT
556
556
 
557
557
  Copyright (C) 2005-2007 John Vorhauer
558
- Copyright (C) 2007-2015 John Vorhauer, Jens Wille
558
+ Copyright (C) 2007-2016 John Vorhauer, Jens Wille
559
559
 
560
560
  Lingo is free software: you can redistribute it and/or modify it under the
561
561
  terms of the GNU Affero General Public License as published by the Free
data/Rakefile CHANGED
@@ -36,16 +36,18 @@ The main functions of Lingo are:
36
36
  ].to_a,
37
37
 
38
38
  dependencies: {
39
- 'cyclops' => '~> 0.1',
40
- 'nuggets' => '~> 1.3',
39
+ 'cyclops' => '~> 0.2',
40
+ 'nuggets' => '~> 1.4',
41
41
  'rubyzip' => '~> 1.1',
42
- 'sinatra-bells' => '~> 0.0',
42
+ 'sinatra-bells' => '~> 0.3',
43
43
  'unicode' => '~> 0.4'
44
44
  },
45
45
 
46
46
  development_dependencies: {
47
- 'diff-lcs' => '~> 1.2',
48
- 'open4' => '~> 1.3'
47
+ 'diff-lcs' => '~> 1.2',
48
+ 'nokogiri' => '~> 1.6',
49
+ 'open4' => '~> 1.3',
50
+ 'pdf-reader' => '~> 1.3'
49
51
  },
50
52
 
51
53
  required_ruby_version: '>= 1.9.3'
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -318,6 +318,7 @@ require_relative 'lingo/call'
318
318
  require_relative 'lingo/error'
319
319
  require_relative 'lingo/debug'
320
320
  require_relative 'lingo/config'
321
+ require_relative 'lingo/filter'
321
322
  require_relative 'lingo/progress'
322
323
  require_relative 'lingo/database'
323
324
  require_relative 'lingo/language'
@@ -236,10 +236,6 @@ class Lingo
236
236
 
237
237
  end
238
238
 
239
- # For backwards compatibility.
240
- Multiworder = MultiWorder
241
- Multi_worder = MultiWorder
242
-
243
239
  end
244
240
 
245
241
  end
@@ -85,10 +85,6 @@ class Lingo
85
85
 
86
86
  end
87
87
 
88
- # For backwards compatibility.
89
- Objectfilter = ObjectFilter
90
- Object_filter = ObjectFilter
91
-
92
88
  end
93
89
 
94
90
  end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -26,15 +26,12 @@
26
26
 
27
27
  require 'find'
28
28
 
29
- %w[filemagic mime/types nokogiri nuggets/file/which pdf-reader].each { |lib|
30
- begin
31
- require lib
32
- rescue LoadError
33
- end
34
- }
35
-
36
29
  class Lingo
37
30
 
31
+ require_optional 'filemagic'
32
+ require_optional 'mime/types'
33
+ require_optional 'nuggets/file/which'
34
+
38
35
  class Attendee
39
36
 
40
37
  #--
@@ -118,11 +115,7 @@ class Lingo
118
115
  @filter = get_key('filter', false)
119
116
  @progress = get_key('progress', false)
120
117
 
121
- if has_key?('lir-record-pattern')
122
- lingo.config.deprecate('lir-record-pattern', :records, self)
123
- end
124
-
125
- @lir = get_re('records', get_key('lir-record-pattern', nil), %r{^\[(\d+)\.\]}) # DEPRECATE lir-record-pattern
118
+ @lir = get_re('records', nil, %r{^\[(\d+)\.\]})
126
119
  @cut = get_re('fields', !!@lir, %r{^.+?:\s*})
127
120
  @skip = get_re('skip', nil)
128
121
  end
@@ -165,8 +158,8 @@ class Lingo
165
158
  def filter(io, path, progress)
166
159
  case @filter == true ? file_type(io, path) : @filter.to_s
167
160
  when 'pdftotext' then filter_pdftotext(io, path, progress)
168
- when /html/i then filter_html(io)
169
- when /xml/i then filter_html(io, true)
161
+ when /html/i then filter_xml(io, :HTML)
162
+ when /xml/i then filter_xml(io)
170
163
  when /pdf/i then filter_pdf(io)
171
164
  else io
172
165
  end
@@ -185,13 +178,13 @@ class Lingo
185
178
  end
186
179
 
187
180
  def filter_pdf(io)
188
- Object.const_defined?(:PDF) && PDF.const_defined?(:Reader) ? text_enum(
189
- PDF::Reader.new(io).pages) : cancel_filter(:PDF, 'pdf-reader')
181
+ cancel_filter(:PDF, 'pdf-reader') unless Object.const_defined?(:PDF)
182
+ Filter::PDF.new(io, @encoding)
190
183
  end
191
184
 
192
- def filter_html(io, xml = false, type = xml ? :XML : :HTML)
193
- Object.const_defined?(:Nokogiri) ? text_enum(Nokogiri.send(type,
194
- io, nil, @encoding).children) : cancel_filter(type, :nokogiri)
185
+ def filter_xml(io, type = :XML)
186
+ cancel_filter(type, :nokogiri) unless Object.const_defined?(:Nokogiri)
187
+ Filter.const_get(type).new(io, @encoding)
195
188
  end
196
189
 
197
190
  def file_type(io, path)
@@ -239,10 +232,6 @@ class Lingo
239
232
  tempfiles.each(&:unlink)
240
233
  end
241
234
 
242
- def text_enum(collection)
243
- Enumerator.new { |y| collection.each { |x| y << x.text } }
244
- end
245
-
246
235
  def get_files
247
236
  args = [get_key('glob', '*.txt'), get_key('recursive', false)]
248
237
 
@@ -263,10 +252,6 @@ class Lingo
263
252
 
264
253
  end
265
254
 
266
- # For backwards compatibility.
267
- Textreader = TextReader
268
- Text_reader = TextReader
269
-
270
255
  end
271
256
 
272
257
  end
@@ -145,10 +145,6 @@ class Lingo
145
145
 
146
146
  end
147
147
 
148
- # For backwards compatibility.
149
- Textwriter = TextWriter
150
- Text_writer = TextWriter
151
-
152
148
  end
153
149
 
154
150
  end
@@ -240,10 +240,6 @@ class Lingo
240
240
 
241
241
  end
242
242
 
243
- # For backwards compatibility.
244
- Vectorfilter = VectorFilter
245
- Vector_filter = VectorFilter
246
-
247
243
  end
248
244
 
249
245
  end
@@ -83,10 +83,6 @@ class Lingo
83
83
 
84
84
  end
85
85
 
86
- # For backwards compatibility.
87
- Wordsearcher = WordSearcher
88
- Word_searcher = WordSearcher
89
-
90
86
  end
91
87
 
92
88
  end
@@ -42,12 +42,7 @@ class Lingo
42
42
  load_config('language', :lang)
43
43
  load_config('config')
44
44
 
45
- if Array(self['meeting/attendees']).flat_map(&:keys).include?('textreader')
46
- deprecate(:textreader, :text_reader)
47
- end
48
-
49
- if r = get('meeting/attendees', 'text_reader') ||
50
- get('meeting/attendees', 'textreader') # DEPRECATE textreader
45
+ if r = get('meeting/attendees', 'text_reader')
51
46
  f = @cli.files
52
47
 
53
48
  if i = r['files']
@@ -110,11 +105,11 @@ class Lingo
110
105
  @cli.send(:quit, *args)
111
106
  end
112
107
 
113
- def deprecate(old, new, obj = self, what = :option)
108
+ def deprecate(old, new, obj = self, what = :option, ver = Version.next_minor)
114
109
  unless @deprecated[[source = obj.class.name.sub(/\ALingo::/, ''), old]]
115
110
  warn(
116
111
  "DEPRECATION WARNING: #{source} #{what} `#{old}' is deprecated " <<
117
- "and will be removed in Lingo 1.9. Please use `#{new}' instead."
112
+ "and will be removed in Lingo #{ver}. Please use `#{new}' instead."
118
113
  )
119
114
  end
120
115
  end
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ class Filter
30
+
31
+ def initialize(io, encoding = ENC)
32
+ @io, @encoding = io, encoding
33
+ end
34
+
35
+ def each
36
+ raise NotImplementedError, 'must be implemented by subclass'
37
+ end
38
+
39
+ def close
40
+ @io.close
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+
47
+ require_relative 'filter/pdf'
48
+ require_relative 'filter/xml'
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ require_optional 'pdf-reader'
30
+
31
+ class Filter
32
+
33
+ class PDF < self
34
+
35
+ def initialize(*args)
36
+ super
37
+ @obj = ::PDF::Reader.new(@io)
38
+ end
39
+
40
+ def each(&block)
41
+ @obj.pages.each { |x| x.text.each_line(&block) }
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ require_optional 'nokogiri'
30
+
31
+ class Filter
32
+
33
+ class XML < self
34
+
35
+ TYPE = :XML
36
+
37
+ def initialize(*args)
38
+ super
39
+ @obj = Nokogiri.send(self.class::TYPE, @io, nil, @encoding.to_s)
40
+ end
41
+
42
+ def each(&block)
43
+ @obj.root.element_children.each { |n| n.content.each_line(&block) }
44
+ end
45
+
46
+ end
47
+
48
+ class HTML < XML
49
+
50
+ TYPE = :HTML
51
+
52
+ end
53
+
54
+ end
55
+
56
+ end
@@ -52,12 +52,7 @@ class Lingo
52
52
  def initialize(config, lingo)
53
53
  @dic, @suggestions = Dictionary.new(config, lingo), []
54
54
 
55
- if lingo.dictionary_config.key?('compositum')
56
- lingo.config.deprecate(:compositum, :compound, self)
57
- end
58
-
59
- cfg = lingo.dictionary_config['compound'] ||
60
- lingo.dictionary_config['compositum'] # DEPRECATE compositum
55
+ cfg = lingo.dictionary_config['compound']
61
56
 
62
57
  DEFAULTS.each { |k, v| instance_variable_set(
63
58
  "@#{k}", cfg.fetch(k.to_s.tr('_', '-'), v).to_i) }
@@ -114,7 +114,7 @@ class Lingo
114
114
  end
115
115
 
116
116
  def each_lex(wc_re = //)
117
- return enum_for(:each_lex, wc_re) unless block_given?
117
+ return enum_for(__method__, wc_re) unless block_given?
118
118
 
119
119
  wc_re = Regexp.new(wc_re) unless wc_re.is_a?(Regexp)
120
120