rika 2.0.2-java → 2.0.3-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 350fd4a9478c68bea286f43ea43fe7199899fb6124115615d7e023d781845b8b
4
- data.tar.gz: d9470eb7cd432acac089e1f1dc030a414096c87ab8720fa79ebd6cfe16ac4528
3
+ metadata.gz: d1ff6f1340908e2f573ac758ded769bf4a94db0835e06a436addf6dd9408b7ba
4
+ data.tar.gz: 6e99972c0cf72ab44b85e11847ed6685101c8498f2cd8c1a9a51237b2cc7ede2
5
5
  SHA512:
6
- metadata.gz: 82961b39e1df54bd5ef83be432a1391a021537beea8ab63e7870f41ecec56457fd87c13fcdb3b3df848b520344ccbad69fd7d58b5c4c4b4ad2590381fc4dbca2
7
- data.tar.gz: 2e9edfc485e6e2f43299ac34e0bcef47e752b5a3a76d0970f84a7880612619ba41d3adab011000199728847a11f53232515a5893bf084abbd51c41bf3f8c50b6
6
+ metadata.gz: eaf3905403a4a0f66ace5a9658f1aa0ba77f153d67e588f094fbbe426b7cd4c30640c3f21652b8312493e4ad827719d6ff1d83a613b6ae0ff9a73164b179fd4e
7
+ data.tar.gz: dda02d0f0377f54db2fe59535c4b59b073c5fdcae3f1f8c6e66506427e34c248b923c2b01683a6ea811ca4a1ed45305b96739611a119d7dd244895e28fe7dac4
data/Gemfile CHANGED
@@ -6,11 +6,11 @@ source 'https://rubygems.org'
6
6
  gemspec
7
7
 
8
8
  group :development do
9
- gem 'pry'
10
- gem 'rake', '~> 13.0'
11
- gem 'rspec', '~> 3.9'
12
- gem 'rubocop'
13
- gem 'rubocop-rspec'
14
- gem 'simplecov', require: false
15
- gem 'webrick', '~> 1.6'
9
+ gem 'pry', '~> 0.14', '>= 0.14.2'
10
+ gem 'rake', '~> 13.2', '>= 13.2.1'
11
+ gem 'rspec', '~> 3.13', '>= 3.13.0'
12
+ gem 'rubocop', '~> 1.65', '>= 1.65.1'
13
+ gem 'rubocop-rspec', '~> 3.0', '>= 3.0.3'
14
+ gem 'simplecov', '~>0.22', '>= 0.22.0', require: false
15
+ gem 'webrick', '~> 1.8', '>= 1.8.1'
16
16
  end
data/README.md CHANGED
@@ -82,6 +82,10 @@ specify one or more filespecs or URL's as arguments:
82
82
  ```bash
83
83
  rika x.pdf https://github.com/keithrbennett/rika
84
84
  ```
85
+
86
+ > [!NOTE]
87
+ > If running `rika` produces an error indicating that the JRuby interpreter cannot be found, try preceding it with `jruby`, e.g. `jruby rika x.pdf`.
88
+
85
89
  Here is the help text:
86
90
 
87
91
  ```
@@ -245,6 +249,14 @@ rexe -in -oJ -mb 'downcase \
245
249
  This gem has been tested with JRuby managed by rvm. It should work with other Ruby version managers and
246
250
  without any version manager at all, but those configurations have not been tested.
247
251
 
252
+ ## Using the Tika Java Jar File Directly
253
+
254
+ Rika provides only the most common Tika use cases. You may want to dig deeper than Rika does into the massive amount
255
+ of functionality provided by the Tika library. You can do so by bypassing Rika altogether and using the Tika jar file
256
+ directly in your own JRuby code. In addition. Tika provides its own command line application that can be called as,
257
+ for example, `java -jar $TIKA_JAR_FILESPEC --help`. This Tika command line application has finer grained control
258
+ over some Tika options, but is missing some conveniences provided by the Rika command line application.
259
+
248
260
  ## Other Tika Resources
249
261
 
250
262
  * The Apache Tika wiki is at https://cwiki.apache.org/confluence/display/tika.
@@ -265,3 +277,8 @@ without any version manager at all, but those configurations have not been teste
265
277
  3. Commit your changes (`git commit -am 'Add some feature'`)
266
278
  4. Push to the branch (`git push origin my-new-feature`)
267
279
  5. Create new Pull Request
280
+
281
+ ## Acknowledgments
282
+
283
+ Many thanks to the brilliant and dedicated developers who have worked to build Apache Tika since its inception many years ago.
284
+
data/RELEASE_NOTES.md CHANGED
@@ -1,5 +1,10 @@
1
1
  ## Release Notes
2
2
 
3
+ ### v2.0.3
4
+
5
+ * Fix parsing of empty files so they do not halt parsing of all files.
6
+ * Update gem dependencies.
7
+
3
8
  #### v2.0.2
4
9
 
5
10
  * Now prints source name on line with header and footer lines.
@@ -32,6 +32,13 @@ class RikaCommand
32
32
  puts result_array_output
33
33
  else
34
34
  targets.each do |target|
35
+ # If we don't do this, Tika will raise an org.apache.tika.exception.ZeroByteFileException
36
+ # TODO: Do same for URL?
37
+ if File.file?(target) && File.zero?(target)
38
+ $stderr.puts("\n\nFile empty!: #{target}\n\n")
39
+ next
40
+ end
41
+
35
42
  result = Rika.parse(target, max_content_length: max_content_length, key_sort: options[:key_sort])
36
43
  puts single_document_output(target, result)
37
44
  end
data/lib/rika/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rika
4
- VERSION = '2.0.2'
4
+ VERSION = '2.0.3'
5
5
  end
data/rika.gemspec CHANGED
@@ -17,7 +17,7 @@ Gem::Specification.new do |gem|
17
17
  gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
18
18
  gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
19
19
  gem.require_paths = ['lib']
20
- gem.add_dependency 'awesome_print'
20
+ gem.add_dependency 'awesome_print', '~> 1.9', '>= 1.9.2'
21
21
  gem.platform = 'java'
22
22
  gem.license = 'MIT'
23
23
  gem.metadata['rubygems_mfa_required'] = 'true'
File without changes
@@ -0,0 +1 @@
1
+ something
@@ -135,4 +135,21 @@ describe RikaCommand do
135
135
  expect(line).to match("Source: #{sample_filespec}")
136
136
  end
137
137
  end
138
+
139
+ describe 'empty file behavior' do
140
+ let(:empty_file_path) { fixture_path('empty.txt') }
141
+ let(:something_file_path) { fixture_path('something.txt') } # containts "something"
142
+
143
+ specify 'parsing an empty file outputs a message to stderr' do
144
+ expect {
145
+ described_class.new([empty_file_path]).call
146
+ }.to output("\n\nFile empty!: #{empty_file_path}\n\n").to_stderr
147
+ end
148
+
149
+ specify 'parsing an empty file does not interrupt parsing of subsequent files' do
150
+ expect {
151
+ described_class.new([empty_file_path, something_file_path]).call
152
+ }.to output(/something/).to_stdout
153
+ end
154
+ end
138
155
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rika
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.0.3
5
5
  platform: java
6
6
  authors:
7
7
  - Richard Nyström
@@ -9,22 +9,28 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2023-10-07 00:00:00.000000000 Z
12
+ date: 2024-08-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
17
20
  - - ">="
18
21
  - !ruby/object:Gem::Version
19
- version: '0'
22
+ version: 1.9.2
20
23
  name: awesome_print
21
- prerelease: false
22
24
  type: :runtime
25
+ prerelease: false
23
26
  version_requirements: !ruby/object:Gem::Requirement
24
27
  requirements:
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '1.9'
25
31
  - - ">="
26
32
  - !ruby/object:Gem::Version
27
- version: '0'
33
+ version: 1.9.2
28
34
  description: A JRuby wrapper for Apache Tika to extract text and metadata from files
29
35
  of various formats.
30
36
  email:
@@ -60,6 +66,7 @@ files:
60
66
  - spec/fixtures/document.docx
61
67
  - spec/fixtures/document.pdf
62
68
  - spec/fixtures/document.txt
69
+ - spec/fixtures/empty.txt
63
70
  - spec/fixtures/en.txt
64
71
  - spec/fixtures/es.txt
65
72
  - spec/fixtures/fr.txt
@@ -67,6 +74,7 @@ files:
67
74
  - spec/fixtures/image_jpg_without_extension
68
75
  - spec/fixtures/lang_cant_be_determined.txt
69
76
  - spec/fixtures/ru.txt
77
+ - spec/fixtures/something.txt
70
78
  - spec/fixtures/tiny.txt
71
79
  - spec/fixtures/unknown.bin
72
80
  - spec/rika/cli/args_parser_spec.rb