lingo 1.8.7 → 1.9.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +11 -0
- data/README +2 -2
- data/Rakefile +7 -5
- data/lib/lingo.rb +2 -1
- data/lib/lingo/attendee/multi_worder.rb +0 -4
- data/lib/lingo/attendee/object_filter.rb +0 -4
- data/lib/lingo/attendee/text_reader.rb +13 -28
- data/lib/lingo/attendee/text_writer.rb +0 -4
- data/lib/lingo/attendee/vector_filter.rb +0 -4
- data/lib/lingo/attendee/word_searcher.rb +0 -4
- data/lib/lingo/config.rb +3 -8
- data/lib/lingo/filter.rb +48 -0
- data/lib/lingo/filter/pdf.rb +48 -0
- data/lib/lingo/filter/xml.rb +56 -0
- data/lib/lingo/language/grammar.rb +1 -6
- data/lib/lingo/language/word.rb +1 -1
- data/lib/lingo/version.rb +7 -3
- data/test/article.html +63 -0
- data/test/article.pdf +0 -0
- data/test/article.txt +44 -0
- data/test/article.xml +120 -0
- data/test/attendee/ts_text_reader.rb +405 -16
- metadata +58 -49
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lingo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0.pre1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Vorhauer
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2016-02-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: cyclops
|
@@ -17,28 +17,28 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: '0.
|
20
|
+
version: '0.2'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: '0.
|
27
|
+
version: '0.2'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: nuggets
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '1.
|
34
|
+
version: '1.4'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '1.
|
41
|
+
version: '1.4'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: rubyzip
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,14 +59,14 @@ dependencies:
|
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '0.
|
62
|
+
version: '0.3'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '0.
|
69
|
+
version: '0.3'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: unicode
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -95,6 +95,20 @@ dependencies:
|
|
95
95
|
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
97
|
version: '1.2'
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
name: nokogiri
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - "~>"
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '1.6'
|
105
|
+
type: :development
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - "~>"
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '1.6'
|
98
112
|
- !ruby/object:Gem::Dependency
|
99
113
|
name: open4
|
100
114
|
requirement: !ruby/object:Gem::Requirement
|
@@ -109,6 +123,20 @@ dependencies:
|
|
109
123
|
- - "~>"
|
110
124
|
- !ruby/object:Gem::Version
|
111
125
|
version: '1.3'
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: pdf-reader
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - "~>"
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '1.3'
|
133
|
+
type: :development
|
134
|
+
prerelease: false
|
135
|
+
version_requirements: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - "~>"
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '1.3'
|
112
140
|
- !ruby/object:Gem::Dependency
|
113
141
|
name: hen
|
114
142
|
requirement: !ruby/object:Gem::Requirement
|
@@ -118,7 +146,7 @@ dependencies:
|
|
118
146
|
version: '0.8'
|
119
147
|
- - ">="
|
120
148
|
- !ruby/object:Gem::Version
|
121
|
-
version: 0.8.
|
149
|
+
version: 0.8.3
|
122
150
|
type: :development
|
123
151
|
prerelease: false
|
124
152
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -128,7 +156,7 @@ dependencies:
|
|
128
156
|
version: '0.8'
|
129
157
|
- - ">="
|
130
158
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.8.
|
159
|
+
version: 0.8.3
|
132
160
|
- !ruby/object:Gem::Dependency
|
133
161
|
name: rake
|
134
162
|
requirement: !ruby/object:Gem::Requirement
|
@@ -264,6 +292,9 @@ files:
|
|
264
292
|
- lib/lingo/debug.rb
|
265
293
|
- lib/lingo/deferred_attendee.rb
|
266
294
|
- lib/lingo/error.rb
|
295
|
+
- lib/lingo/filter.rb
|
296
|
+
- lib/lingo/filter/pdf.rb
|
297
|
+
- lib/lingo/filter/xml.rb
|
267
298
|
- lib/lingo/language.rb
|
268
299
|
- lib/lingo/language/char.rb
|
269
300
|
- lib/lingo/language/dictionary.rb
|
@@ -286,6 +317,10 @@ files:
|
|
286
317
|
- lib/lingo/web/public/lingo.png
|
287
318
|
- lib/lingo/web/public/lingoweb.css
|
288
319
|
- lib/lingo/web/views/index.erb
|
320
|
+
- test/article.html
|
321
|
+
- test/article.pdf
|
322
|
+
- test/article.txt
|
323
|
+
- test/article.xml
|
289
324
|
- test/attendee/ts_abbreviator.rb
|
290
325
|
- test/attendee/ts_decomposer.rb
|
291
326
|
- test/attendee/ts_multi_worder.rb
|
@@ -335,46 +370,20 @@ licenses:
|
|
335
370
|
metadata: {}
|
336
371
|
post_install_message: |2+
|
337
372
|
|
338
|
-
lingo-1.
|
373
|
+
lingo-1.9.0 [unreleased]:
|
339
374
|
|
340
|
-
*
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
individual documents; requires hal4r[https://blackwinter.github.com/hal4r]
|
349
|
-
which in turn requires rb-gsl[https://blackwinter.github.com/rb-gsl].
|
350
|
-
[EXPERIMENTAL: Interface may be changed or removed in next release.]
|
351
|
-
* Added Lingo::Attendee::AnalysisFilter and associated +lingoctl+ tooling.
|
352
|
-
* Multiword dictionaries can now identify hyphenated variants (e.g.
|
353
|
-
<tt>automatic data-processing</tt>); set <tt>hyphenate: true</tt> in the
|
354
|
-
dictionary config.
|
355
|
-
* Lingo::Attendee::Tokenizer no longer considers hyphens at word edges as part
|
356
|
-
of the word. As a consequence, Lingo::Attendee::Dehyphenizer has been
|
357
|
-
dropped.
|
358
|
-
* Dropped Lingo::Attendee::NonewordFilter; use Lingo::Attendee::VectorFilter
|
359
|
-
with option <tt>lexicals: '\?'</tt> instead.
|
360
|
-
* Lingo::Attendee::TextReader and Lingo::Attendee::TextWriter learned
|
361
|
-
+encoding+ option to read/write text that is not UTF-8 encoded;
|
362
|
-
configuration files and dictionaries still need to be UTF-8, though.
|
363
|
-
* Lingo::Attendee::TextReader and Lingo::Attendee::TextWriter learned to
|
364
|
-
read/write Gzip-compressed files (file extension +.gz+ or +.gzip+).
|
365
|
-
* Lingo::Attendee::Sequencer learned to recognize +0+ in the pattern to match
|
366
|
-
number tokens.
|
367
|
-
* Fixed Lingo::Attendee::TextReader to recognize BOM in input files; does not
|
368
|
-
apply to input read from +STDIN+.
|
369
|
-
* Fixed regression introduced in 1.8.6 where Lingo::Attendee::Debugger would
|
370
|
-
no longer work immediately behind Lingo::Attendee::TextReader.
|
371
|
-
* Fixed +lingoctl+ copy commands when overwriting existing files.
|
372
|
-
* Refactored Lingo::Database::Crypter into a module.
|
373
|
-
* JRuby 9000 compatibility.
|
375
|
+
* Removed support for deprecated options and attendee names (+old+ → +new+):
|
376
|
+
* Lingo::Language::Grammar : +compositum+ → +compound+
|
377
|
+
* Lingo::Attendee::TextReader : +lir-record-pattern+ → +records+
|
378
|
+
* Lingo::Config : +multiworder+ → +multi_worder+, +objectfilter+ →
|
379
|
+
+object_filter+, +textreader+ → +text_reader+, +textwriter+ →
|
380
|
+
+text_writer+, +vectorfilter+ → +vector_filter+, +wordsearcher+ →
|
381
|
+
+word_searcher+
|
382
|
+
* Fixed errors with XML input (issue #15 by Thomas Berger).
|
374
383
|
|
375
384
|
rdoc_options:
|
376
385
|
- "--title"
|
377
|
-
- lingo Application documentation (v1.
|
386
|
+
- lingo Application documentation (v1.9.0.pre1)
|
378
387
|
- "--charset"
|
379
388
|
- UTF-8
|
380
389
|
- "--line-numbers"
|
@@ -390,12 +399,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
390
399
|
version: 1.9.3
|
391
400
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
392
401
|
requirements:
|
393
|
-
- - "
|
402
|
+
- - ">"
|
394
403
|
- !ruby/object:Gem::Version
|
395
|
-
version:
|
404
|
+
version: 1.3.1
|
396
405
|
requirements: []
|
397
406
|
rubyforge_project:
|
398
|
-
rubygems_version: 2.
|
407
|
+
rubygems_version: 2.5.2
|
399
408
|
signing_key:
|
400
409
|
specification_version: 4
|
401
410
|
summary: The full-featured automatic indexing system
|