rika 1.4.0-java → 1.5.0-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 03d0a3d8e955115312ddf54329e6a613bf0d58e8
4
+ data.tar.gz: 6de10264cd6d8791b353f1e2a574f1eaaf982dd9
5
+ SHA512:
6
+ metadata.gz: 56ef6e0f28e6642dd624407335add98dededce244121996428008590bb1120908c4cb4b8b6964458bd032d2c521e985de524f578446c3c0c7102ad8a6b73d9a6
7
+ data.tar.gz: 67221ab8318af05b90f33cbb7520cc48fa7d474bd5c7170c76f54b268c4ab941b309bbd7c7ac704edd2aeb888634fcf5adf97a1a4d1ea1760d3fda270f6aa300
data/Rakefile CHANGED
@@ -8,4 +8,4 @@ task :default => :spec
8
8
  desc 'Download jars'
9
9
  task :download_jars do
10
10
  system "mvn dependency:copy-dependencies"
11
- end
11
+ end
data/lib/rika.rb CHANGED
@@ -5,13 +5,13 @@ raise "You need to run JRuby to use Rika" unless RUBY_PLATFORM =~ /java/
5
5
  require "rika/version"
6
6
  require 'uri'
7
7
  require 'net/http'
8
- require 'java'
8
+ require 'java'
9
9
 
10
10
  Dir[File.join(File.dirname(__FILE__), "../target/dependency/*.jar")].each do |jar|
11
11
  require jar
12
12
  end
13
13
 
14
- # Heavily based on the Apache Tika API: http://tika.apache.org/1.4/api/org/apache/tika/Tika.html
14
+ # Heavily based on the Apache Tika API: http://tika.apache.org/1.5/api/org/apache/tika/Tika.html
15
15
  module Rika
16
16
  import org.apache.tika.metadata.Metadata
17
17
  import org.apache.tika.Tika
@@ -36,7 +36,7 @@ module Rika
36
36
  end
37
37
 
38
38
  class Parser
39
-
39
+
40
40
  def initialize(file_location, max_content_length = -1, detector = DefaultDetector.new)
41
41
  @uri = file_location
42
42
  @tika = Tika.new(detector)
@@ -48,14 +48,14 @@ module Rika
48
48
 
49
49
  def content
50
50
  self.parse
51
- @content
51
+ @content
52
52
  end
53
53
 
54
54
  def metadata
55
55
  unless @metadata_ruby
56
56
  self.parse
57
57
  @metadata_ruby = {}
58
-
58
+
59
59
  @metadata_java.names.each do |name|
60
60
  @metadata_ruby[name] = @metadata_java.get(name)
61
61
  end
@@ -85,7 +85,7 @@ module Rika
85
85
 
86
86
  def language
87
87
  @lang ||= LanguageIdentifier.new(content)
88
-
88
+
89
89
  @lang.language
90
90
  end
91
91
 
@@ -96,7 +96,7 @@ module Rika
96
96
  end
97
97
 
98
98
  protected
99
-
99
+
100
100
  def parse
101
101
  @content ||= @tika.parse_to_string(input_stream, @metadata_java).to_s.strip
102
102
  end
data/lib/rika/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rika
2
- VERSION = "1.4.0"
2
+ VERSION = "1.5.0"
3
3
  end
data/pom.xml CHANGED
@@ -3,7 +3,7 @@
3
3
  <modelVersion>4.0.0</modelVersion>
4
4
 
5
5
  <name>Rika</name>
6
-
6
+
7
7
  <groupId>org.rika</groupId>
8
8
  <artifactId>Rika</artifactId>
9
9
  <version>1.0-SNAPSHOT</version>
@@ -13,8 +13,8 @@
13
13
  <dependency>
14
14
  <groupId>org.apache.tika</groupId>
15
15
  <artifactId>tika-parsers</artifactId>
16
- <version>1.4</version>
16
+ <version>1.5</version>
17
17
  <scope>test</scope>
18
18
  </dependency>
19
19
  </dependencies>
20
- </project>
20
+ </project>
data/rika.gemspec CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
16
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
17
  gem.require_paths = ["lib"]
18
- gem.add_development_dependency "rspec", "2.12.0"
19
- gem.add_development_dependency "rake", "10.0.3"
18
+ gem.add_development_dependency "rspec", "2.14.1"
19
+ gem.add_development_dependency "rake", "10.3.1"
20
20
  gem.platform = "java"
21
21
  end
data/spec/rika_spec.rb CHANGED
@@ -4,8 +4,8 @@ require 'spec_helper'
4
4
  require 'webrick'
5
5
 
6
6
  include WEBrick
7
-
8
- describe Rika::Parser do
7
+
8
+ describe Rika::Parser do
9
9
  before(:all) do
10
10
  @txt_parser = Rika::Parser.new(file_path("text_file.txt"))
11
11
  @docx_parser = Rika::Parser.new(file_path("document.docx"))
@@ -13,13 +13,13 @@ describe Rika::Parser do
13
13
  @pdf_parser = Rika::Parser.new(file_path("document.pdf"))
14
14
  @image_parser = Rika::Parser.new(file_path("image.jpg"))
15
15
  @unknown_parser = Rika::Parser.new(file_path("unknown.bin"))
16
- @dir = File.expand_path(File.join(File.dirname(__FILE__), 'fixtures'))
17
- port = 50505
16
+ @dir = File.expand_path(File.join(File.dirname(__FILE__), 'fixtures'))
17
+ port = 50515
18
18
  @url = "http://#{Socket.gethostname}:#{port}"
19
19
  @quote = "First they ignore you, then they ridicule you, then they fight you, then you win."
20
20
  @t1 = Thread.new do
21
- @server = HTTPServer.new(:Port => port, :DocumentRoot => @dir,
22
- :AccessLog => [], :Logger => WEBrick::Log::new("/dev/null", 7))
21
+ @server = HTTPServer.new(:Port => port, :DocumentRoot => @dir,
22
+ :AccessLog => [], :Logger => WEBrick::Log::new("/dev/null", 7))
23
23
  @server.start
24
24
  end
25
25
  @sample_pdf_filespec = file_path("document.pdf")
@@ -55,7 +55,7 @@ describe Rika::Parser do
55
55
  @docx_parser.content.should == @quote
56
56
  end
57
57
 
58
- it "should return the content in a pdf file" do
58
+ it "should return the content in a pdf file" do
59
59
  @pdf_parser.content.should == @quote
60
60
  end
61
61
 
@@ -70,7 +70,7 @@ describe Rika::Parser do
70
70
 
71
71
  it "should only return max content length for file over http" do
72
72
  parser = Rika::Parser.new(@url + "/document.pdf", 6)
73
- parser.content.should == "First"
73
+ parser.content.should == "First"
74
74
  end
75
75
 
76
76
  it "should be possible to read files over 100k by default" do
@@ -88,8 +88,8 @@ describe Rika::Parser do
88
88
  end
89
89
  end
90
90
 
91
- # We just test a few of the metadata fields for some common file formats
92
- # to make sure the integration with Apache Tika works. Apache Tika already
91
+ # We just test a few of the metadata fields for some common file formats
92
+ # to make sure the integration with Apache Tika works. Apache Tika already
93
93
  # have tests for all file formats it supports so we won't retest that
94
94
  describe '#metadata' do
95
95
  it "should return nil if metadata field does not exists" do
@@ -164,7 +164,7 @@ describe Rika::Parser do
164
164
 
165
165
  describe '#language' do
166
166
  it "should return the language of the content" do
167
-
167
+
168
168
  ["en", "de", "fr", "ru", "es"].each do |lang|
169
169
  txt = Rika::Parser.new(file_path("#{lang}.txt"))
170
170
  txt.language.should == lang
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rika
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
5
- prerelease:
4
+ version: 1.5.0
6
5
  platform: java
7
6
  authors:
8
7
  - Richard Nyström
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-07-04 00:00:00.000000000 Z
11
+ date: 2014-04-23 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rspec
@@ -17,14 +16,12 @@ dependencies:
17
16
  requirements:
18
17
  - - '='
19
18
  - !ruby/object:Gem::Version
20
- version: 2.12.0
21
- none: false
19
+ version: 2.14.1
22
20
  requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
22
  - - '='
25
23
  - !ruby/object:Gem::Version
26
- version: 2.12.0
27
- none: false
24
+ version: 2.14.1
28
25
  prerelease: false
29
26
  type: :development
30
27
  - !ruby/object:Gem::Dependency
@@ -33,14 +30,12 @@ dependencies:
33
30
  requirements:
34
31
  - - '='
35
32
  - !ruby/object:Gem::Version
36
- version: 10.0.3
37
- none: false
33
+ version: 10.3.1
38
34
  requirement: !ruby/object:Gem::Requirement
39
35
  requirements:
40
36
  - - '='
41
37
  - !ruby/object:Gem::Version
42
- version: 10.0.3
43
- none: false
38
+ version: 10.3.1
44
39
  prerelease: false
45
40
  type: :development
46
41
  description: ' A JRuby wrapper for Apache Tika to extract text and metadata from various
@@ -89,24 +84,25 @@ files:
89
84
  - target/dependency/commons-compress-1.5.jar
90
85
  - target/dependency/commons-logging-1.1.1.jar
91
86
  - target/dependency/dom4j-1.6.1.jar
92
- - target/dependency/fontbox-1.8.1.jar
87
+ - target/dependency/fontbox-1.8.4.jar
93
88
  - target/dependency/geronimo-stax-api_1.0_spec-1.0.1.jar
94
89
  - target/dependency/isoparser-1.0-RC-1.jar
95
90
  - target/dependency/jdom-1.0.jar
96
- - target/dependency/jempbox-1.8.1.jar
91
+ - target/dependency/jempbox-1.8.4.jar
92
+ - target/dependency/jhighlight-1.0.jar
97
93
  - target/dependency/juniversalchardet-1.0.3.jar
98
94
  - target/dependency/metadata-extractor-2.6.2.jar
99
95
  - target/dependency/netcdf-4.2-min.jar
100
- - target/dependency/pdfbox-1.8.1.jar
101
- - target/dependency/poi-3.9.jar
102
- - target/dependency/poi-ooxml-3.9.jar
103
- - target/dependency/poi-ooxml-schemas-3.9.jar
104
- - target/dependency/poi-scratchpad-3.9.jar
96
+ - target/dependency/pdfbox-1.8.4.jar
97
+ - target/dependency/poi-3.10-beta2.jar
98
+ - target/dependency/poi-ooxml-3.10-beta2.jar
99
+ - target/dependency/poi-ooxml-schemas-3.10-beta2.jar
100
+ - target/dependency/poi-scratchpad-3.10-beta2.jar
105
101
  - target/dependency/rome-0.9.jar
106
102
  - target/dependency/slf4j-api-1.5.6.jar
107
103
  - target/dependency/tagsoup-1.2.1.jar
108
- - target/dependency/tika-core-1.4.jar
109
- - target/dependency/tika-parsers-1.4.jar
104
+ - target/dependency/tika-core-1.5.jar
105
+ - target/dependency/tika-parsers-1.5.jar
110
106
  - target/dependency/vorbis-java-core-0.1-tests.jar
111
107
  - target/dependency/vorbis-java-core-0.1.jar
112
108
  - target/dependency/vorbis-java-tika-0.1.jar
@@ -117,6 +113,7 @@ files:
117
113
  - target/dependency/xz-1.2.jar
118
114
  homepage: https://github.com/ricn/rika
119
115
  licenses: []
116
+ metadata: {}
120
117
  post_install_message:
121
118
  rdoc_options: []
122
119
  require_paths:
@@ -126,18 +123,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
126
123
  - - '>='
127
124
  - !ruby/object:Gem::Version
128
125
  version: '0'
129
- none: false
130
126
  required_rubygems_version: !ruby/object:Gem::Requirement
131
127
  requirements:
132
128
  - - '>='
133
129
  - !ruby/object:Gem::Version
134
130
  version: '0'
135
- none: false
136
131
  requirements: []
137
132
  rubyforge_project:
138
- rubygems_version: 1.8.24
133
+ rubygems_version: 2.1.9
139
134
  signing_key:
140
- specification_version: 3
135
+ specification_version: 4
141
136
  summary: A JRuby wrapper for Apache Tika to extract text and metadata from various file formats.
142
137
  test_files:
143
138
  - spec/fixtures/de.txt
Binary file