rika 1.4.0-java → 1.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 03d0a3d8e955115312ddf54329e6a613bf0d58e8
4
+ data.tar.gz: 6de10264cd6d8791b353f1e2a574f1eaaf982dd9
5
+ SHA512:
6
+ metadata.gz: 56ef6e0f28e6642dd624407335add98dededce244121996428008590bb1120908c4cb4b8b6964458bd032d2c521e985de524f578446c3c0c7102ad8a6b73d9a6
7
+ data.tar.gz: 67221ab8318af05b90f33cbb7520cc48fa7d474bd5c7170c76f54b268c4ab941b309bbd7c7ac704edd2aeb888634fcf5adf97a1a4d1ea1760d3fda270f6aa300
data/Rakefile CHANGED
@@ -8,4 +8,4 @@ task :default => :spec
8
8
  desc 'Download jars'
9
9
  task :download_jars do
10
10
  system "mvn dependency:copy-dependencies"
11
- end
11
+ end
data/lib/rika.rb CHANGED
@@ -5,13 +5,13 @@ raise "You need to run JRuby to use Rika" unless RUBY_PLATFORM =~ /java/
5
5
  require "rika/version"
6
6
  require 'uri'
7
7
  require 'net/http'
8
- require 'java'
8
+ require 'java'
9
9
 
10
10
  Dir[File.join(File.dirname(__FILE__), "../target/dependency/*.jar")].each do |jar|
11
11
  require jar
12
12
  end
13
13
 
14
- # Heavily based on the Apache Tika API: http://tika.apache.org/1.4/api/org/apache/tika/Tika.html
14
+ # Heavily based on the Apache Tika API: http://tika.apache.org/1.5/api/org/apache/tika/Tika.html
15
15
  module Rika
16
16
  import org.apache.tika.metadata.Metadata
17
17
  import org.apache.tika.Tika
@@ -36,7 +36,7 @@ module Rika
36
36
  end
37
37
 
38
38
  class Parser
39
-
39
+
40
40
  def initialize(file_location, max_content_length = -1, detector = DefaultDetector.new)
41
41
  @uri = file_location
42
42
  @tika = Tika.new(detector)
@@ -48,14 +48,14 @@ module Rika
48
48
 
49
49
  def content
50
50
  self.parse
51
- @content
51
+ @content
52
52
  end
53
53
 
54
54
  def metadata
55
55
  unless @metadata_ruby
56
56
  self.parse
57
57
  @metadata_ruby = {}
58
-
58
+
59
59
  @metadata_java.names.each do |name|
60
60
  @metadata_ruby[name] = @metadata_java.get(name)
61
61
  end
@@ -85,7 +85,7 @@ module Rika
85
85
 
86
86
  def language
87
87
  @lang ||= LanguageIdentifier.new(content)
88
-
88
+
89
89
  @lang.language
90
90
  end
91
91
 
@@ -96,7 +96,7 @@ module Rika
96
96
  end
97
97
 
98
98
  protected
99
-
99
+
100
100
  def parse
101
101
  @content ||= @tika.parse_to_string(input_stream, @metadata_java).to_s.strip
102
102
  end
data/lib/rika/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rika
2
- VERSION = "1.4.0"
2
+ VERSION = "1.5.0"
3
3
  end
data/pom.xml CHANGED
@@ -3,7 +3,7 @@
3
3
  <modelVersion>4.0.0</modelVersion>
4
4
 
5
5
  <name>Rika</name>
6
-
6
+
7
7
  <groupId>org.rika</groupId>
8
8
  <artifactId>Rika</artifactId>
9
9
  <version>1.0-SNAPSHOT</version>
@@ -13,8 +13,8 @@
13
13
  <dependency>
14
14
  <groupId>org.apache.tika</groupId>
15
15
  <artifactId>tika-parsers</artifactId>
16
- <version>1.4</version>
16
+ <version>1.5</version>
17
17
  <scope>test</scope>
18
18
  </dependency>
19
19
  </dependencies>
20
- </project>
20
+ </project>
data/rika.gemspec CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
16
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
17
  gem.require_paths = ["lib"]
18
- gem.add_development_dependency "rspec", "2.12.0"
19
- gem.add_development_dependency "rake", "10.0.3"
18
+ gem.add_development_dependency "rspec", "2.14.1"
19
+ gem.add_development_dependency "rake", "10.3.1"
20
20
  gem.platform = "java"
21
21
  end
data/spec/rika_spec.rb CHANGED
@@ -4,8 +4,8 @@ require 'spec_helper'
4
4
  require 'webrick'
5
5
 
6
6
  include WEBrick
7
-
8
- describe Rika::Parser do
7
+
8
+ describe Rika::Parser do
9
9
  before(:all) do
10
10
  @txt_parser = Rika::Parser.new(file_path("text_file.txt"))
11
11
  @docx_parser = Rika::Parser.new(file_path("document.docx"))
@@ -13,13 +13,13 @@ describe Rika::Parser do
13
13
  @pdf_parser = Rika::Parser.new(file_path("document.pdf"))
14
14
  @image_parser = Rika::Parser.new(file_path("image.jpg"))
15
15
  @unknown_parser = Rika::Parser.new(file_path("unknown.bin"))
16
- @dir = File.expand_path(File.join(File.dirname(__FILE__), 'fixtures'))
17
- port = 50505
16
+ @dir = File.expand_path(File.join(File.dirname(__FILE__), 'fixtures'))
17
+ port = 50515
18
18
  @url = "http://#{Socket.gethostname}:#{port}"
19
19
  @quote = "First they ignore you, then they ridicule you, then they fight you, then you win."
20
20
  @t1 = Thread.new do
21
- @server = HTTPServer.new(:Port => port, :DocumentRoot => @dir,
22
- :AccessLog => [], :Logger => WEBrick::Log::new("/dev/null", 7))
21
+ @server = HTTPServer.new(:Port => port, :DocumentRoot => @dir,
22
+ :AccessLog => [], :Logger => WEBrick::Log::new("/dev/null", 7))
23
23
  @server.start
24
24
  end
25
25
  @sample_pdf_filespec = file_path("document.pdf")
@@ -55,7 +55,7 @@ describe Rika::Parser do
55
55
  @docx_parser.content.should == @quote
56
56
  end
57
57
 
58
- it "should return the content in a pdf file" do
58
+ it "should return the content in a pdf file" do
59
59
  @pdf_parser.content.should == @quote
60
60
  end
61
61
 
@@ -70,7 +70,7 @@ describe Rika::Parser do
70
70
 
71
71
  it "should only return max content length for file over http" do
72
72
  parser = Rika::Parser.new(@url + "/document.pdf", 6)
73
- parser.content.should == "First"
73
+ parser.content.should == "First"
74
74
  end
75
75
 
76
76
  it "should be possible to read files over 100k by default" do
@@ -88,8 +88,8 @@ describe Rika::Parser do
88
88
  end
89
89
  end
90
90
 
91
- # We just test a few of the metadata fields for some common file formats
92
- # to make sure the integration with Apache Tika works. Apache Tika already
91
+ # We just test a few of the metadata fields for some common file formats
92
+ # to make sure the integration with Apache Tika works. Apache Tika already
93
93
  # have tests for all file formats it supports so we won't retest that
94
94
  describe '#metadata' do
95
95
  it "should return nil if metadata field does not exists" do
@@ -164,7 +164,7 @@ describe Rika::Parser do
164
164
 
165
165
  describe '#language' do
166
166
  it "should return the language of the content" do
167
-
167
+
168
168
  ["en", "de", "fr", "ru", "es"].each do |lang|
169
169
  txt = Rika::Parser.new(file_path("#{lang}.txt"))
170
170
  txt.language.should == lang
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rika
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
5
- prerelease:
4
+ version: 1.5.0
6
5
  platform: java
7
6
  authors:
8
7
  - Richard Nyström
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-07-04 00:00:00.000000000 Z
11
+ date: 2014-04-23 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rspec
@@ -17,14 +16,12 @@ dependencies:
17
16
  requirements:
18
17
  - - '='
19
18
  - !ruby/object:Gem::Version
20
- version: 2.12.0
21
- none: false
19
+ version: 2.14.1
22
20
  requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
22
  - - '='
25
23
  - !ruby/object:Gem::Version
26
- version: 2.12.0
27
- none: false
24
+ version: 2.14.1
28
25
  prerelease: false
29
26
  type: :development
30
27
  - !ruby/object:Gem::Dependency
@@ -33,14 +30,12 @@ dependencies:
33
30
  requirements:
34
31
  - - '='
35
32
  - !ruby/object:Gem::Version
36
- version: 10.0.3
37
- none: false
33
+ version: 10.3.1
38
34
  requirement: !ruby/object:Gem::Requirement
39
35
  requirements:
40
36
  - - '='
41
37
  - !ruby/object:Gem::Version
42
- version: 10.0.3
43
- none: false
38
+ version: 10.3.1
44
39
  prerelease: false
45
40
  type: :development
46
41
  description: ' A JRuby wrapper for Apache Tika to extract text and metadata from various
@@ -89,24 +84,25 @@ files:
89
84
  - target/dependency/commons-compress-1.5.jar
90
85
  - target/dependency/commons-logging-1.1.1.jar
91
86
  - target/dependency/dom4j-1.6.1.jar
92
- - target/dependency/fontbox-1.8.1.jar
87
+ - target/dependency/fontbox-1.8.4.jar
93
88
  - target/dependency/geronimo-stax-api_1.0_spec-1.0.1.jar
94
89
  - target/dependency/isoparser-1.0-RC-1.jar
95
90
  - target/dependency/jdom-1.0.jar
96
- - target/dependency/jempbox-1.8.1.jar
91
+ - target/dependency/jempbox-1.8.4.jar
92
+ - target/dependency/jhighlight-1.0.jar
97
93
  - target/dependency/juniversalchardet-1.0.3.jar
98
94
  - target/dependency/metadata-extractor-2.6.2.jar
99
95
  - target/dependency/netcdf-4.2-min.jar
100
- - target/dependency/pdfbox-1.8.1.jar
101
- - target/dependency/poi-3.9.jar
102
- - target/dependency/poi-ooxml-3.9.jar
103
- - target/dependency/poi-ooxml-schemas-3.9.jar
104
- - target/dependency/poi-scratchpad-3.9.jar
96
+ - target/dependency/pdfbox-1.8.4.jar
97
+ - target/dependency/poi-3.10-beta2.jar
98
+ - target/dependency/poi-ooxml-3.10-beta2.jar
99
+ - target/dependency/poi-ooxml-schemas-3.10-beta2.jar
100
+ - target/dependency/poi-scratchpad-3.10-beta2.jar
105
101
  - target/dependency/rome-0.9.jar
106
102
  - target/dependency/slf4j-api-1.5.6.jar
107
103
  - target/dependency/tagsoup-1.2.1.jar
108
- - target/dependency/tika-core-1.4.jar
109
- - target/dependency/tika-parsers-1.4.jar
104
+ - target/dependency/tika-core-1.5.jar
105
+ - target/dependency/tika-parsers-1.5.jar
110
106
  - target/dependency/vorbis-java-core-0.1-tests.jar
111
107
  - target/dependency/vorbis-java-core-0.1.jar
112
108
  - target/dependency/vorbis-java-tika-0.1.jar
@@ -117,6 +113,7 @@ files:
117
113
  - target/dependency/xz-1.2.jar
118
114
  homepage: https://github.com/ricn/rika
119
115
  licenses: []
116
+ metadata: {}
120
117
  post_install_message:
121
118
  rdoc_options: []
122
119
  require_paths:
@@ -126,18 +123,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
126
123
  - - '>='
127
124
  - !ruby/object:Gem::Version
128
125
  version: '0'
129
- none: false
130
126
  required_rubygems_version: !ruby/object:Gem::Requirement
131
127
  requirements:
132
128
  - - '>='
133
129
  - !ruby/object:Gem::Version
134
130
  version: '0'
135
- none: false
136
131
  requirements: []
137
132
  rubyforge_project:
138
- rubygems_version: 1.8.24
133
+ rubygems_version: 2.1.9
139
134
  signing_key:
140
- specification_version: 3
135
+ specification_version: 4
141
136
  summary: A JRuby wrapper for Apache Tika to extract text and metadata from various file formats.
142
137
  test_files:
143
138
  - spec/fixtures/de.txt
Binary file