lingo 1.8.7 → 1.9.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +11 -0
- data/README +2 -2
- data/Rakefile +7 -5
- data/lib/lingo.rb +2 -1
- data/lib/lingo/attendee/multi_worder.rb +0 -4
- data/lib/lingo/attendee/object_filter.rb +0 -4
- data/lib/lingo/attendee/text_reader.rb +13 -28
- data/lib/lingo/attendee/text_writer.rb +0 -4
- data/lib/lingo/attendee/vector_filter.rb +0 -4
- data/lib/lingo/attendee/word_searcher.rb +0 -4
- data/lib/lingo/config.rb +3 -8
- data/lib/lingo/filter.rb +48 -0
- data/lib/lingo/filter/pdf.rb +48 -0
- data/lib/lingo/filter/xml.rb +56 -0
- data/lib/lingo/language/grammar.rb +1 -6
- data/lib/lingo/language/word.rb +1 -1
- data/lib/lingo/version.rb +7 -3
- data/test/article.html +63 -0
- data/test/article.pdf +0 -0
- data/test/article.txt +44 -0
- data/test/article.xml +120 -0
- data/test/attendee/ts_text_reader.rb +405 -16
- metadata +58 -49
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lingo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0.pre1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Vorhauer
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2016-02-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: cyclops
|
@@ -17,28 +17,28 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: '0.
|
20
|
+
version: '0.2'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: '0.
|
27
|
+
version: '0.2'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: nuggets
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '1.
|
34
|
+
version: '1.4'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '1.
|
41
|
+
version: '1.4'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: rubyzip
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,14 +59,14 @@ dependencies:
|
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '0.
|
62
|
+
version: '0.3'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '0.
|
69
|
+
version: '0.3'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: unicode
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -95,6 +95,20 @@ dependencies:
|
|
95
95
|
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
97
|
version: '1.2'
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
name: nokogiri
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - "~>"
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '1.6'
|
105
|
+
type: :development
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - "~>"
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '1.6'
|
98
112
|
- !ruby/object:Gem::Dependency
|
99
113
|
name: open4
|
100
114
|
requirement: !ruby/object:Gem::Requirement
|
@@ -109,6 +123,20 @@ dependencies:
|
|
109
123
|
- - "~>"
|
110
124
|
- !ruby/object:Gem::Version
|
111
125
|
version: '1.3'
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: pdf-reader
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - "~>"
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '1.3'
|
133
|
+
type: :development
|
134
|
+
prerelease: false
|
135
|
+
version_requirements: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - "~>"
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '1.3'
|
112
140
|
- !ruby/object:Gem::Dependency
|
113
141
|
name: hen
|
114
142
|
requirement: !ruby/object:Gem::Requirement
|
@@ -118,7 +146,7 @@ dependencies:
|
|
118
146
|
version: '0.8'
|
119
147
|
- - ">="
|
120
148
|
- !ruby/object:Gem::Version
|
121
|
-
version: 0.8.
|
149
|
+
version: 0.8.3
|
122
150
|
type: :development
|
123
151
|
prerelease: false
|
124
152
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -128,7 +156,7 @@ dependencies:
|
|
128
156
|
version: '0.8'
|
129
157
|
- - ">="
|
130
158
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.8.
|
159
|
+
version: 0.8.3
|
132
160
|
- !ruby/object:Gem::Dependency
|
133
161
|
name: rake
|
134
162
|
requirement: !ruby/object:Gem::Requirement
|
@@ -264,6 +292,9 @@ files:
|
|
264
292
|
- lib/lingo/debug.rb
|
265
293
|
- lib/lingo/deferred_attendee.rb
|
266
294
|
- lib/lingo/error.rb
|
295
|
+
- lib/lingo/filter.rb
|
296
|
+
- lib/lingo/filter/pdf.rb
|
297
|
+
- lib/lingo/filter/xml.rb
|
267
298
|
- lib/lingo/language.rb
|
268
299
|
- lib/lingo/language/char.rb
|
269
300
|
- lib/lingo/language/dictionary.rb
|
@@ -286,6 +317,10 @@ files:
|
|
286
317
|
- lib/lingo/web/public/lingo.png
|
287
318
|
- lib/lingo/web/public/lingoweb.css
|
288
319
|
- lib/lingo/web/views/index.erb
|
320
|
+
- test/article.html
|
321
|
+
- test/article.pdf
|
322
|
+
- test/article.txt
|
323
|
+
- test/article.xml
|
289
324
|
- test/attendee/ts_abbreviator.rb
|
290
325
|
- test/attendee/ts_decomposer.rb
|
291
326
|
- test/attendee/ts_multi_worder.rb
|
@@ -335,46 +370,20 @@ licenses:
|
|
335
370
|
metadata: {}
|
336
371
|
post_install_message: |2+
|
337
372
|
|
338
|
-
lingo-1.
|
373
|
+
lingo-1.9.0 [unreleased]:
|
339
374
|
|
340
|
-
*
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
individual documents; requires hal4r[https://blackwinter.github.com/hal4r]
|
349
|
-
which in turn requires rb-gsl[https://blackwinter.github.com/rb-gsl].
|
350
|
-
[EXPERIMENTAL: Interface may be changed or removed in next release.]
|
351
|
-
* Added Lingo::Attendee::AnalysisFilter and associated +lingoctl+ tooling.
|
352
|
-
* Multiword dictionaries can now identify hyphenated variants (e.g.
|
353
|
-
<tt>automatic data-processing</tt>); set <tt>hyphenate: true</tt> in the
|
354
|
-
dictionary config.
|
355
|
-
* Lingo::Attendee::Tokenizer no longer considers hyphens at word edges as part
|
356
|
-
of the word. As a consequence, Lingo::Attendee::Dehyphenizer has been
|
357
|
-
dropped.
|
358
|
-
* Dropped Lingo::Attendee::NonewordFilter; use Lingo::Attendee::VectorFilter
|
359
|
-
with option <tt>lexicals: '\?'</tt> instead.
|
360
|
-
* Lingo::Attendee::TextReader and Lingo::Attendee::TextWriter learned
|
361
|
-
+encoding+ option to read/write text that is not UTF-8 encoded;
|
362
|
-
configuration files and dictionaries still need to be UTF-8, though.
|
363
|
-
* Lingo::Attendee::TextReader and Lingo::Attendee::TextWriter learned to
|
364
|
-
read/write Gzip-compressed files (file extension +.gz+ or +.gzip+).
|
365
|
-
* Lingo::Attendee::Sequencer learned to recognize +0+ in the pattern to match
|
366
|
-
number tokens.
|
367
|
-
* Fixed Lingo::Attendee::TextReader to recognize BOM in input files; does not
|
368
|
-
apply to input read from +STDIN+.
|
369
|
-
* Fixed regression introduced in 1.8.6 where Lingo::Attendee::Debugger would
|
370
|
-
no longer work immediately behind Lingo::Attendee::TextReader.
|
371
|
-
* Fixed +lingoctl+ copy commands when overwriting existing files.
|
372
|
-
* Refactored Lingo::Database::Crypter into a module.
|
373
|
-
* JRuby 9000 compatibility.
|
375
|
+
* Removed support for deprecated options and attendee names (+old+ → +new+):
|
376
|
+
* Lingo::Language::Grammar : +compositum+ → +compound+
|
377
|
+
* Lingo::Attendee::TextReader : +lir-record-pattern+ → +records+
|
378
|
+
* Lingo::Config : +multiworder+ → +multi_worder+, +objectfilter+ →
|
379
|
+
+object_filter+, +textreader+ → +text_reader+, +textwriter+ →
|
380
|
+
+text_writer+, +vectorfilter+ → +vector_filter+, +wordsearcher+ →
|
381
|
+
+word_searcher+
|
382
|
+
* Fixed errors with XML input (issue #15 by Thomas Berger).
|
374
383
|
|
375
384
|
rdoc_options:
|
376
385
|
- "--title"
|
377
|
-
- lingo Application documentation (v1.
|
386
|
+
- lingo Application documentation (v1.9.0.pre1)
|
378
387
|
- "--charset"
|
379
388
|
- UTF-8
|
380
389
|
- "--line-numbers"
|
@@ -390,12 +399,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
390
399
|
version: 1.9.3
|
391
400
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
392
401
|
requirements:
|
393
|
-
- - "
|
402
|
+
- - ">"
|
394
403
|
- !ruby/object:Gem::Version
|
395
|
-
version:
|
404
|
+
version: 1.3.1
|
396
405
|
requirements: []
|
397
406
|
rubyforge_project:
|
398
|
-
rubygems_version: 2.
|
407
|
+
rubygems_version: 2.5.2
|
399
408
|
signing_key:
|
400
409
|
specification_version: 4
|
401
410
|
summary: The full-featured automatic indexing system
|